Skip to content
Snippets Groups Projects
Commit fb13c0ac authored by Lihouck Flavien's avatar Lihouck Flavien
Browse files

Created a first config file

parent e4c26ffe
No related branches found
No related tags found
No related merge requests found
......@@ -8,3 +8,4 @@ data/
.#*
*.fasta
MTool
log/
\ No newline at end of file
......@@ -21,11 +21,17 @@ def parse_file(filename, x_scale):
i, s, e, m = map(float, line.split(',')[1:])
except:
i,s,e,m = [0, 0, 0, 0]
for x in range(1, len(line.split(','))-1):
print("Missing values replaced by 0, 0, 0, 0")
identity.append(i)
similarity.append(s)
equivalence.append(e)
mismatches.append(m)
identity.append(i)
similarity.append(s)
equivalence.append(e)
mismatches.append(m)
cnt+=1
return identity, similarity, equivalence, mismatches
......
DATA = "../../data"
DATASET = expand("{data}/msa", data=DATA)
TOOL = ["abpoa", "kalign", "kalign3", "mafft", "muscle", "spoa"]
THRESHOLD = ["30", "40", "50", "60", "70", "80"]
REGION = ["100", "200", "500", "1000", "2000"]
DEPTH = ["10", "20", "50", "100", "150"]
ALIGN = ["muscle", "mafft"]
GRAPH_TYPES = ["identity", "similarity", "equivalence", "mismatches"]
configfile: "./workflow/default_config.yaml"
DATASET = expand("{data}/{format}", data=config["data"], format=config["format"])
GRAPH_TYPES = ["identity", "similarity", "equivalence", "mismatches"]
STAT_HEADER= "threshold, percent_identity, percent_similarity, total_equivalence, total_mismatches\n "
rule all:
input:
expand("consensus/stats/{region}/aln_all_consensus_r{region}_d{depth}_t1_{threshold}_t2_{value}.txt", region=REGION, depth=DEPTH, threshold=THRESHOLD, value=THRESHOLD)
expand("consensus/{region}/{depth}/{align}_meta_consensus_r{region}_d{depth}_t1_{threshold}_t2_{value}.fasta", region=config["region"], depth=config["depth"], threshold=config["threshold_1"], value=config["threshold_2"], align="muscle")
message: "Meta consensus can be found in ./consensus/[region]/[depth]/"
rule MTool:
input:
"msa_handler/MSA.cpp", "msa_handler/MSA.h", "msa_handler/main.cpp", "msa_handler/Makefile"
log: "logs/MTool.log"
output:
"msa_handler/MTool"
shell:
......@@ -27,16 +28,20 @@ rule create_consensus_per_tool:
rules.MTool.output, data=expand("{dataset}/MSA_{tool}_r{region}_d{depth}.fasta", allow_missing=True, dataset=DATASET)
output:
"consensus/{region}/{depth}/individual_consensus/{threshold}/consensus_{tool}_{region}_{depth}.fasta"
log: "logs/consensus_per_tool_{tool}_r{region}_d{depth}_t{threshold}.log"
shell:
"./msa_handler/MTool {input.data} {output} {wildcards.threshold}"
rule create_consensus:
input:
rules.MTool.output, "MTool_over_folder.py", expand("{dataset}/MSA_{tool}_r{region}_d{depth}.fasta", tool=TOOL, allow_missing=True, dataset=DATASET)
rules.MTool.output, "MTool_over_folder.py", expand("{dataset}/MSA_{tool}_r{region}_d{depth}.fasta", tool=config["tool"], allow_missing=True, dataset=DATASET)
params: data=DATASET
log: "logs/consensus_r{region}_d{depth}_t{threshold}.log"
output:
"consensus/{region}/{depth}/test_consensus_{region}_{depth}_{threshold}.fasta"
message: "Consensus region: {wildcards.region} , depth: {wildcards.depth} , threshold: {wildcards.threshold}\n"
shell:
"./MTool_over_folder.py -dir {DATASET} -o {output} -region {wildcards.region} -depth {wildcards.depth} -threshold {wildcards.threshold}"
"./MTool_over_folder.py -dir {params.data} -o {output} -region {wildcards.region} -depth {wildcards.depth} -threshold {wildcards.threshold}"
rule align_mafft:
input:
......@@ -45,8 +50,10 @@ rule align_mafft:
"consensus/{region}/{depth}/mafft_consensus_{region}_{depth}_{threshold}.fasta"
conda:
"envs/mafft.yaml"
log: "logs/align_mafft_r{region}_d{depth}_t{threshold}.log"
message: "Alignment with mafft region: {wildcards.region} , depth: {wildcards.depth} , threshold: {threshold}\n"
shell:
"mafft --auto {input} > {output} 2>/dev/null"
"mafft --auto {input} > {output}"
rule align_muscle:
input:
......@@ -55,59 +62,81 @@ rule align_muscle:
"consensus/{region}/{depth}/muscle_consensus_{region}_{depth}_{threshold}.fasta"
conda:
"envs/muscle.yaml"
log: "logs/align_muscle_r{region}_d{depth}_t{threshold}.log"
message: "Alignment muscle region: {wildcards.region} , depth: {wildcards.depth} , threshold: {wildcards.threshold}\n"
shell:
"muscle -align {input} -output {output} > /dev/null 2>/dev/null"
rule final_consensus:
rule meta_consensus:
input:
muscle=rules.align_muscle.output
output:
"consensus/{region}/{depth}/{align}_final_consensus_{region}_{depth}_t1{threshold}_t2{value}.fasta"
"consensus/{region}/{depth}/{align}_meta_consensus_r{region}_d{depth}_t1_{threshold}_t2_{value}.fasta"
log: "logs/meta_consensus_{align}_r{region}_d{depth}_t1_{threshold}_t2_{value}.log"
message: "Meta-consensus region: {wildcards.region} , depth: {wildcards.depth} , consensus threshold: {wildcards.threshold} , meta consensus threshold: {wildcards.value}\n"
shell:
"./msa_handler/MTool {input.muscle} {output} {wildcards.value}"
# This part handle comparison to a reference and creates plots in order to better visualize the efficiency of the consensus strategie used for both the preliminary consensus and the meta-consensus. #
rule exonerate_ref_result:
input:
files=expand(rules.final_consensus.output, align="muscle", allow_missing=True) ,
ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=DATA, allow_missing=True),
consensus_per_tool=expand(rules.create_consensus_per_tool.output, tool=TOOL, allow_missing=True)
output: "consensus/stats/{region}/aln_all_consensus_r{region}_d{depth}_t1{threshold}_t2_{value}.txt"
resources: format="\"%pi, %ps, %et, %em\n\""
files=expand(rules.meta_consensus.output, align="muscle", allow_missing=True) ,
ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=config["data"], allow_missing=True),
consensus_per_tool=expand(rules.create_consensus_per_tool.output, tool=config["tool"], allow_missing=True)
output: "consensus/stats/{region}/aln_all_consensus_r{region}_d{depth}_t1_{threshold}_t2_{value}.txt"
log: "logs/exonerate_ref_result_r{region}_d{depth}_t1_{threshold}_t2{value}.log"
params: format="\"%pi, %ps, %et, %em\n\""
conda: "envs/exonerate.yaml"
shell: "./exonerate_stats.sh {output} {input.ref} {input.files} {input.consensus_per_tool}"
# "echo {STAT_HEADER} >> {output} \n for FILE in {input.files}; do exonerate --bestn 1 -Q dna -E -m a:g --showalignment false --showsugar false --showvulgar false --showcigar false --ryo {resources.format} --verbose 0 -q {input.ref} -t $FILE >> {output} 2>/dev/null; done" #
# "echo {STAT_HEADER} >> {output} \n for FILE in {input.files}; do exonerate --bestn 1 -Q dna -E -m a:g --showalignment false --showsugar false --showvulgar false --showcigar false --ryo {params.format} --verbose 0 -q {input.ref} -t $FILE >> {output} 2>/dev/null; done" #
rule plottable_exonerate_meta_t2:
input: files=expand(rules.final_consensus.output, align="muscle", value=THRESHOLD, allow_missing=True) , ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=DATA, allow_missing=True)
rule plottable_exonerate_meta:
input: files=expand(rules.meta_consensus.output, align="muscle", value=config["threshold_2"], allow_missing=True) , ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=config["data"], allow_missing=True)
output: "consensus/plot/{region}/{depth}/{threshold}/aln_stats_meta.txt"
log: "logs/plot_exonerate_meta_r{region}_d{depth}_t{threshold}.log"
conda: "envs/exonerate.yaml"
shell: "./exonerate_stats.sh {output} {input.ref} {input.files}"
rule plottable_exonerate_tools_t2:
rule plottable_exonerate_tools:
input:
ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=DATA, allow_missing=True),
consensus_per_tool=expand(rules.create_consensus_per_tool.output, value=THRESHOLD, allow_missing=True)
ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=config["data"], allow_missing=True),
consensus_per_tool=expand(rules.create_consensus_per_tool.output, value=config["threshold_2"], allow_missing=True)
output: "consensus/plot/{region}/{depth}/{threshold}/aln_stats_{tool}.txt"
log: "logs/plot_exonerate_tools_{tool}_r{region}_d{depth}_t{threshold}.log"
conda: "envs/exonerate.yaml"
shell: "./exonerate_stats.sh {output} {input.ref} {input.consensus_per_tool}"
rule plot_depth:
input:
expand(rules.plottable_exonerate_meta.output, depth=config["depth"], allow_missing=True),
params: target_folder="consensus/plot/{region}/graph"
log: "logs/plot_depth_r{region}_t{threshold}.log"
output: expand("consensus/plot/{region}/graph/graph_r{region}_t{threshold}_{graph}.png", graph=GRAPH_TYPES, depth=config["depth"], allow_missing=True)
shell: "./plot_results.py {params.target_folder}/graph_r{wildcards.region}_t{wildcards.threshold} {input}"
rule plot_t1:
input:
expand(rules.plottable_exonerate_meta_t2.output, threshold=THRESHOLD, allow_missing=True),
expand(rules.plottable_exonerate_meta.output, threshold=config["threshold_1"], allow_missing=True)
params: target_folder="consensus/plot/{region}/{depth}"
log: "logs/plot_t1_r{region}_d{depth}.log"
output: expand("consensus/plot/{region}/{depth}/graph_r{region}_d{depth}_{graph}.png", graph=GRAPH_TYPES, allow_missing=True)
shell: "./plot_results.py {params.target_folder}/graph_r{wildcards.region}_d{wildcards.depth} {input}"
rule plot_t2:
input: expand(rules.plottable_exonerate_tools_t2.output, tool=TOOL, allow_missing=True),
rules.plottable_exonerate_meta_t2.output
input: expand(rules.plottable_exonerate_tools.output, tool=config["tool"], allow_missing=True),
rules.plottable_exonerate_meta.output
params: target_folder="consensus/plot/{region}/{depth}/{threshold}/"
log: "logs/plot_t2_r{region}_d{depth}_t_{threshold}.log"
output: expand("consensus/plot/{region}/{depth}/{threshold}/graph_r{region}_d{depth}_t1_{threshold}_{graph}.png", graph=GRAPH_TYPES, allow_missing=True)
shell: "./plot_results.py {params.target_folder}graph_r{wildcards.region}_d{wildcards.depth}_t1_{wildcards.threshold} {input}"
rule make_plots:
input: expand(rules.plot_t2.output, region=REGION, depth=DEPTH, threshold=THRESHOLD),
expand(rules.plot_t1.output, region=REGION, depth=DEPTH)
\ No newline at end of file
input: expand(rules.plot_t2.output, region=config["region"], depth=config["depth"], threshold=config["threshold_1"]),
expand(rules.plot_t1.output, region=config["region"], depth=config["depth"]),
expand(rules.plot_depth.output, region=config["region"], threshold=config["threshold_1"])
\ No newline at end of file
data: "../../data"
format: "msa"
tool: ["abpoa", "kalign", "kalign3", "mafft", "muscle", "spoa"]
threshold_1: ["30", "40", "50", "60", "70", "80"]
threshold_2 : ["30", "40", "50", "60", "70", "80"]
region: ["100", "200", "500", "1000", "2000"]
depth: ["10", "20", "50", "100", "150"]
align: ["muscle", "mafft"]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment