Created a first config file

fb13c0ac · Lihouck Flavien · e4c26ffe · fb13c0ac · fb13c0ac · fb13c0ac
Commit fb13c0ac authored May 17, 2022 by Lihouck Flavien
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ data/
 .#*
 *.fasta
 MTool
+log/
\ No newline at end of file
--- a/src/plot_results.py
+++ b/src/plot_results.py
@@ -21,11 +21,17 @@ def parse_file(filename, x_scale):
                i, s, e, m = map(float, line.split(',')[1:])
            except:
                i,s,e,m = [0, 0, 0, 0]
+                for x in range(1, len(line.split(','))-1):
                    print("Missing values replaced by 0, 0, 0, 0")
                    identity.append(i)
                    similarity.append(s)
                    equivalence.append(e)
                    mismatches.append(m)
+                    
+            identity.append(i)
+            similarity.append(s)
+            equivalence.append(e)
+            mismatches.append(m)
            cnt+=1
    return identity, similarity, equivalence, mismatches


--- a/src/workflow/Snakefile
+++ b/src/workflow/Snakefile
-DATA = "../../data"
-DATASET = expand("{data}/msa", data=DATA)
-TOOL = ["abpoa", "kalign", "kalign3", "mafft", "muscle", "spoa"]
-THRESHOLD = ["30", "40", "50", "60", "70", "80"]
-REGION = ["100", "200", "500", "1000", "2000"]
-DEPTH = ["10", "20", "50", "100", "150"]
-ALIGN = ["muscle", "mafft"]
-GRAPH_TYPES = ["identity", "similarity", "equivalence", "mismatches"]
+configfile: "./workflow/default_config.yaml"

+DATASET = expand("{data}/{format}", data=config["data"], format=config["format"])
+GRAPH_TYPES = ["identity", "similarity", "equivalence", "mismatches"]
 STAT_HEADER= "threshold, percent_identity, percent_similarity, total_equivalence, total_mismatches\n "

+
+
 rule all:
     input:
-          expand("consensus/stats/{region}/aln_all_consensus_r{region}_d{depth}_t1_{threshold}_t2_{value}.txt", region=REGION, depth=DEPTH, threshold=THRESHOLD, value=THRESHOLD)
+          expand("consensus/{region}/{depth}/{align}_meta_consensus_r{region}_d{depth}_t1_{threshold}_t2_{value}.fasta", region=config["region"], depth=config["depth"], threshold=config["threshold_1"], value=config["threshold_2"], align="muscle")
+     message: "Meta consensus can be found in ./consensus/[region]/[depth]/"
+
+


 rule MTool:
     input:
          "msa_handler/MSA.cpp", "msa_handler/MSA.h", "msa_handler/main.cpp", "msa_handler/Makefile"
+     log: "logs/MTool.log"
     output:
          "msa_handler/MTool"
     shell:
@@ -27,16 +28,20 @@ rule create_consensus_per_tool:
          rules.MTool.output, data=expand("{dataset}/MSA_{tool}_r{region}_d{depth}.fasta", allow_missing=True, dataset=DATASET)
     output:
          "consensus/{region}/{depth}/individual_consensus/{threshold}/consensus_{tool}_{region}_{depth}.fasta"
+     log: "logs/consensus_per_tool_{tool}_r{region}_d{depth}_t{threshold}.log"
     shell:
          "./msa_handler/MTool {input.data} {output} {wildcards.threshold}"

 rule create_consensus:
     input:
-          rules.MTool.output, "MTool_over_folder.py", expand("{dataset}/MSA_{tool}_r{region}_d{depth}.fasta", tool=TOOL, allow_missing=True, dataset=DATASET)
+          rules.MTool.output, "MTool_over_folder.py", expand("{dataset}/MSA_{tool}_r{region}_d{depth}.fasta", tool=config["tool"], allow_missing=True, dataset=DATASET)
+     params: data=DATASET
+     log: "logs/consensus_r{region}_d{depth}_t{threshold}.log"
     output:
          "consensus/{region}/{depth}/test_consensus_{region}_{depth}_{threshold}.fasta"
+     message: "Consensus region: {wildcards.region} , depth: {wildcards.depth} , threshold: {wildcards.threshold}\n"  
     shell:
-          "./MTool_over_folder.py -dir {DATASET} -o {output} -region {wildcards.region} -depth {wildcards.depth} -threshold {wildcards.threshold}"
+          "./MTool_over_folder.py -dir {params.data} -o {output} -region {wildcards.region} -depth {wildcards.depth} -threshold {wildcards.threshold}"

 rule align_mafft:
     input:
@@ -45,8 +50,10 @@ rule align_mafft:
          "consensus/{region}/{depth}/mafft_consensus_{region}_{depth}_{threshold}.fasta"
     conda:
          "envs/mafft.yaml"
+     log: "logs/align_mafft_r{region}_d{depth}_t{threshold}.log"
+     message: "Alignment with mafft region: {wildcards.region} , depth: {wildcards.depth} , threshold: {threshold}\n"
     shell:
-          "mafft --auto {input} > {output} 2>/dev/null"
+          "mafft --auto {input} > {output}"

 rule align_muscle:
     input:
@@ -55,59 +62,81 @@ rule align_muscle:
          "consensus/{region}/{depth}/muscle_consensus_{region}_{depth}_{threshold}.fasta"
     conda:
          "envs/muscle.yaml"
+     log: "logs/align_muscle_r{region}_d{depth}_t{threshold}.log"
+     message: "Alignment muscle region: {wildcards.region} , depth: {wildcards.depth} , threshold: {wildcards.threshold}\n"
     shell:
          "muscle -align {input} -output {output} > /dev/null 2>/dev/null"

-rule final_consensus:
+rule meta_consensus:
     input:
          muscle=rules.align_muscle.output
     output:
-          "consensus/{region}/{depth}/{align}_final_consensus_{region}_{depth}_t1{threshold}_t2{value}.fasta"
+          "consensus/{region}/{depth}/{align}_meta_consensus_r{region}_d{depth}_t1_{threshold}_t2_{value}.fasta"
+     log: "logs/meta_consensus_{align}_r{region}_d{depth}_t1_{threshold}_t2_{value}.log"
+     message: "Meta-consensus region: {wildcards.region} , depth: {wildcards.depth} , consensus threshold: {wildcards.threshold} , meta consensus threshold: {wildcards.value}\n"
     shell:
          "./msa_handler/MTool {input.muscle} {output} {wildcards.value}"

+
+# This part handle comparison to a reference and creates plots in order to better visualize the efficiency of the consensus strategie used for both the preliminary consensus and the meta-consensus. #
+
 rule exonerate_ref_result:
     input:
-          files=expand(rules.final_consensus.output, align="muscle", allow_missing=True) ,
-          ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=DATA, allow_missing=True),
-          consensus_per_tool=expand(rules.create_consensus_per_tool.output, tool=TOOL, allow_missing=True)
-     output: "consensus/stats/{region}/aln_all_consensus_r{region}_d{depth}_t1{threshold}_t2_{value}.txt"
-     resources: format="\"%pi, %ps, %et, %em\n\""
+          files=expand(rules.meta_consensus.output, align="muscle", allow_missing=True) ,
+          ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=config["data"], allow_missing=True),
+          consensus_per_tool=expand(rules.create_consensus_per_tool.output, tool=config["tool"], allow_missing=True)
+     output: "consensus/stats/{region}/aln_all_consensus_r{region}_d{depth}_t1_{threshold}_t2_{value}.txt"
+     log: "logs/exonerate_ref_result_r{region}_d{depth}_t1_{threshold}_t2{value}.log"
+     params: format="\"%pi, %ps, %et, %em\n\""
     conda: "envs/exonerate.yaml"
     shell: "./exonerate_stats.sh {output} {input.ref} {input.files} {input.consensus_per_tool}"
-#          "echo {STAT_HEADER} >> {output} \n for FILE in {input.files}; do exonerate --bestn 1 -Q dna -E -m a:g --showalignment false --showsugar false --showvulgar false --showcigar false  --ryo {resources.format} --verbose 0 -q {input.ref} -t $FILE >> {output}  2>/dev/null; done" #
+#          "echo {STAT_HEADER} >> {output} \n for FILE in {input.files}; do exonerate --bestn 1 -Q dna -E -m a:g --showalignment false --showsugar false --showvulgar false --showcigar false  --ryo {params.format} --verbose 0 -q {input.ref} -t $FILE >> {output}  2>/dev/null; done" #


-rule plottable_exonerate_meta_t2:
-     input: files=expand(rules.final_consensus.output, align="muscle", value=THRESHOLD, allow_missing=True) , ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=DATA, allow_missing=True)
+rule plottable_exonerate_meta:
+     input: files=expand(rules.meta_consensus.output, align="muscle", value=config["threshold_2"], allow_missing=True) , ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=config["data"], allow_missing=True)
     output: "consensus/plot/{region}/{depth}/{threshold}/aln_stats_meta.txt"
+     log: "logs/plot_exonerate_meta_r{region}_d{depth}_t{threshold}.log"
     conda: "envs/exonerate.yaml"
     shell: "./exonerate_stats.sh {output} {input.ref} {input.files}"

-rule plottable_exonerate_tools_t2:
+rule plottable_exonerate_tools:
     input:
-          ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=DATA, allow_missing=True),
-          consensus_per_tool=expand(rules.create_consensus_per_tool.output, value=THRESHOLD, allow_missing=True)
+          ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=config["data"], allow_missing=True),
+          consensus_per_tool=expand(rules.create_consensus_per_tool.output, value=config["threshold_2"], allow_missing=True)
     output: "consensus/plot/{region}/{depth}/{threshold}/aln_stats_{tool}.txt"
+     log: "logs/plot_exonerate_tools_{tool}_r{region}_d{depth}_t{threshold}.log"
     conda: "envs/exonerate.yaml"
     shell: "./exonerate_stats.sh {output} {input.ref} {input.consensus_per_tool}"


+rule plot_depth:
+     input: 
+     	     expand(rules.plottable_exonerate_meta.output, depth=config["depth"], allow_missing=True),
+     params: target_folder="consensus/plot/{region}/graph"
+     log: "logs/plot_depth_r{region}_t{threshold}.log"
+     output: expand("consensus/plot/{region}/graph/graph_r{region}_t{threshold}_{graph}.png", graph=GRAPH_TYPES, depth=config["depth"],  allow_missing=True)
+     shell: "./plot_results.py {params.target_folder}/graph_r{wildcards.region}_t{wildcards.threshold} {input}"
+
 rule plot_t1:
     input: 
-     	     expand(rules.plottable_exonerate_meta_t2.output, threshold=THRESHOLD, allow_missing=True),
+     	     expand(rules.plottable_exonerate_meta.output, threshold=config["threshold_1"], allow_missing=True)
     params: target_folder="consensus/plot/{region}/{depth}"
+     log: "logs/plot_t1_r{region}_d{depth}.log"
     output: expand("consensus/plot/{region}/{depth}/graph_r{region}_d{depth}_{graph}.png", graph=GRAPH_TYPES, allow_missing=True)
     shell: "./plot_results.py {params.target_folder}/graph_r{wildcards.region}_d{wildcards.depth} {input}"
     

 rule plot_t2:
-     input: expand(rules.plottable_exonerate_tools_t2.output, tool=TOOL, allow_missing=True),
-     	     rules.plottable_exonerate_meta_t2.output
+     input: expand(rules.plottable_exonerate_tools.output, tool=config["tool"], allow_missing=True),
+     	     rules.plottable_exonerate_meta.output
     params: target_folder="consensus/plot/{region}/{depth}/{threshold}/"
+     log: "logs/plot_t2_r{region}_d{depth}_t_{threshold}.log"
     output: expand("consensus/plot/{region}/{depth}/{threshold}/graph_r{region}_d{depth}_t1_{threshold}_{graph}.png", graph=GRAPH_TYPES, allow_missing=True)
     shell: "./plot_results.py {params.target_folder}graph_r{wildcards.region}_d{wildcards.depth}_t1_{wildcards.threshold}  {input}"

+
 rule make_plots:
-     input: expand(rules.plot_t2.output, region=REGION, depth=DEPTH, threshold=THRESHOLD),
-     	    expand(rules.plot_t1.output, region=REGION, depth=DEPTH)
\ No newline at end of file
+     input: expand(rules.plot_t2.output, region=config["region"], depth=config["depth"], threshold=config["threshold_1"]),
+     	    expand(rules.plot_t1.output, region=config["region"], depth=config["depth"]),
+	    expand(rules.plot_depth.output, region=config["region"], threshold=config["threshold_1"])
\ No newline at end of file
--- a/src/workflow/default_config.yaml
+++ b/src/workflow/default_config.yaml
+data: "../../data"
+format: "msa"
+tool:  ["abpoa", "kalign", "kalign3", "mafft", "muscle", "spoa"]
+threshold_1: ["30", "40", "50", "60", "70", "80"]
+threshold_2 : ["30", "40", "50", "60", "70", "80"]
+region: ["100", "200", "500", "1000", "2000"]
+depth: ["10", "20", "50", "100", "150"]
+align: ["muscle", "mafft"]