Ajout d'environnements pour certains outils et modification du pipeline pour tests massifs

ff3f96ca · Lihouck Flavien · b3231ae0 · ff3f96ca · ff3f96ca · ff3f96ca
Commit ff3f96ca authored 3 years ago by Lihouck Flavien
--- a/src/MTool_over_folder.py
+++ b/src/MTool_over_folder.py
@@ -14,9 +14,9 @@ def main() :
    parser.add_argument("-directory", help="target directory", required=True)
    parser.add_argument("-output", help="output name")
    parser.add_argument("-region", help="MSA region")
-    parser.add_argument("-dimension", help="MSA dimension")
+    parser.add_argument("-depth", help="MSA depth")
    parser.add_argument("-threshold", help="consensus threshold")
-    parser.add_argument("--ploidity", help="expected ploidity")
+    parser.add_argument("-ploidity", help="expected ploidity")
    args = parser.parse_args()
    if args.output:
        output = args.output
@@ -24,8 +24,8 @@ def main() :
        output = "default.txt"
    if args.region:
        pattern += "r"+args.region+"_"
-    if args.dimension:
+    if args.depth:
-        pattern += "d"+args.dimension+"."
+        pattern += "d"+args.depth+"."
    for filename in os.listdir(args.directory) :
        command = ["./msa_handler/MTool", args.directory+"/"+filename, output]
        if args.threshold:

--- a/src/exonerate_stats.sh
+++ b/src/exonerate_stats.sh
+#!/bin/bash
+printf "output_file, percent_identity, percent_similarity, total_equivalence, total_mismatches\n" > $1
+for FILE in $@ ;
+do
+  if [ $FILE != $1 ] && [ $FILE != $2 ] ;
+  then
+  printf " %q :" $FILE  >> $1
+    exonerate --bestn 1 -Q dna -E -m a:g --showalignment false --showsugar false --showvulgar false --showcigar false  --ryo "%pi, %ps, %et, %em\n"  --verbose 0 -q $2 -t $FILE >> $1 2>/dev/null ;
+  fi;
+done
--- a/src/split_fasta.sh
+++ b/src/split_fasta.sh
--- a/src/workflow/Snakefile
+++ b/src/workflow/Snakefile
 DATA = "../../data"
 DATASET = expand("{data}/msa", data=DATA)
 TOOL = ["abpoa", "kalign", "kalign3", "mafft", "muscle", "spoa"]
-THRESHOLD = ["50", "60", "70", "80", "90"]
+THRESHOLD = ["30", "40", "50", "60", "70", "80"]
 REGION = ["100", "200", "500", "1000", "2000"]
 DEPTH = ["10", "20", "50", "100", "150"]
 ALIGN = ["muscle", "mafft"]
+STAT_HEADER= "threshold, percent_identity, percent_similarity, total_equivalence, total_mismatches\n "
 rule all:
-     input: expand("consensus/aln_all_consensus_r{region}_d{depth}.txt", region=REGION, depth=DEPTH)
+     input:
+          expand("consensus/stats/{region}/aln_all_consensus_r{region}_d{depth}_t1_{threshold}_t2_{value}.txt", region=REGION, depth=DEPTH, threshold=THRESHOLD, value=THRESHOLD)
 rule MTool:
-     input: "msa_handler/MSA.cpp", "msa_handler/MSA.h", "msa_handler/main.cpp", "msa_handler/Makefile"
+     input:
-     output: "msa_handler/MTool"
+          "msa_handler/MSA.cpp", "msa_handler/MSA.h", "msa_handler/main.cpp", "msa_handler/Makefile"
-     shell: "cd msa_handler && make"
+     output:
+          "msa_handler/MTool"
+     shell:
+          "cd msa_handler && make"
+rule create_consensus_per_tool:
+     input:
+          rules.MTool.output, data=expand("{dataset}/MSA_{tool}_r{region}_d{depth}.fasta", allow_missing=True, dataset=DATASET)
+     output:
+          "consensus/{region}/{depth}/individual_consensus/{threshold}/consensus_{tool}_{region}_{depth}.fasta"
+     shell:
+          "./msa_handler/MTool {input.data} {output} {wildcards.threshold}"
 rule create_consensus:
-     input: rules.MTool.output, "MTool_over_folder.py", expand("{dataset}/MSA_{tool}_r{region}_d{depth}.fasta", tool=TOOL, allow_missing=True, dataset=DATASET)
+     input:
-     output: "consensus/{region}/{depth}/test_consensus_{region}_{depth}_{threshold}.fasta"
+          rules.MTool.output, "MTool_over_folder.py", expand("{dataset}/MSA_{tool}_r{region}_d{depth}.fasta", tool=TOOL, allow_missing=True, dataset=DATASET)
-     shell: "./MTool_over_folder.py -dir {DATASET} -o {output} -region {wildcards.region} -depth {wildcards.depth} -threshold {wildcards.threshold}"
+     output:
+          "consensus/{region}/{depth}/test_consensus_{region}_{depth}_{threshold}.fasta"
+     shell:
+          "./MTool_over_folder.py -dir {DATASET} -o {output} -region {wildcards.region} -depth {wildcards.depth} -threshold {wildcards.threshold}"
 rule align_mafft:
-     input: rules.create_consensus.output
+     input:
-     output: "consensus/{region}/{depth}/mafft_consensus_{region}_{depth}_{threshold}.fasta"
+          rules.create_consensus.output
-     shell: "mafft --auto {input} > {output}"
+     output:
+          "consensus/{region}/{depth}/mafft_consensus_{region}_{depth}_{threshold}.fasta"
+     conda:
+          "envs/mafft.yaml"
+     shell:
+          "mafft --auto {input} > {output} 2>/dev/null"
 rule align_muscle:
-     input: rules.create_consensus.output
+     input:
-     output: "consensus/{region}/{depth}/muscle_consensus_{region}_{depth}_{threshold}.fasta"
+          rules.create_consensus.output
-     shell: "muscle -align {input} -output {output} > /dev/null 2> /dev/null"
+     output:
+          "consensus/{region}/{depth}/muscle_consensus_{region}_{depth}_{threshold}.fasta"
+     conda:
+          "envs/muscle.yaml"
+     shell:
+          "muscle -align {input} -output {output} > /dev/null 2>/dev/null"
 rule final_consensus:
-     input: rules.align_mafft.output, rules.align_muscle.output
+     input:
-     output: "consensus/{region}/{depth}/{align}_final_consensus_{region}_{depth}_{threshold}.fasta"
+          muscle=rules.align_muscle.output
-     shell: "./msa_handler/MTool consensus/{wildcards.region}/{wildcards.depth}/{wildcards.align}_consensus_{wildcards.region}_{wildcards.depth}_{wildcards.threshold}.fasta consensus/{wildcards.region}/{wildcards.depth}/{wildcards.align}_final_consensus_{wildcards.region}_{wildcards.depth}_{wildcards.threshold}.fasta {wildcards.threshold} 2"
+     output:
+          "consensus/{region}/{depth}/{align}_final_consensus_{region}_{depth}_t1{threshold}_t2{value}.fasta"
+     shell:
+          "./msa_handler/MTool {input.muscle} {output} {wildcards.value}"
 rule exonerate_ref_result:
-     input:  files=expand("consensus/{region}/{depth}/muscle_final_consensus_{region}_{depth}_{threshold}.fasta", threshold=THRESHOLD, allow_missing=True) , ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=DATA, allow_missing=True)
+     input:
-     output: "consensus/aln_all_consensus_r{region}_d{depth}.txt"
+          files=expand(rules.final_consensus.output, align="muscle", allow_missing=True) ,
+          ref=expand("{data}/seq_selectes_region/region_seq_r{region}.fasta", data=DATA, allow_missing=True),
+          consensus_per_tool=expand(rules.create_consensus_per_tool.output, tool=TOOL, allow_missing=True)
+     output: "consensus/stats/{region}/aln_all_consensus_r{region}_d{depth}_t1_{threshold}_t2_{value}.txt"
+     resources: format="\"%pi, %ps, %et, %em\n\""
     conda: "envs/exonerate.yaml"
-     shell: "for FILE in {input.files}; do exonerate --bestn 1 -Q dna -E -m a:g {input.ref} $FILE >> {output}; done 2>/dev/null"
+     shell: "./exonerate_stats.sh {output} {input.ref} {input.files} {input.consensus_per_tool}"
+#          "echo {STAT_HEADER} >> {output} \n for FILE in {input.files}; do exonerate --bestn 1 -Q dna -E -m a:g --showalignment false --showsugar false --showvulgar false --showcigar false  --ryo {resources.format} --verbose 0 -q {input.ref} -t $FILE >> {output}  2>/dev/null; done" #
--- a/src/workflow/envs/mafft.yaml
+++ b/src/workflow/envs/mafft.yaml
+name: mafft
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_gnu
+  - libgcc-ng=11.2.0=h1d223b6_16
+  - libgomp=11.2.0=h1d223b6_16
+  - mafft=7.505=hec16e2b_0
+prefix: /home/flav/anaconda3/envs/mafft
--- a/src/workflow/envs/muscle.yaml
+++ b/src/workflow/envs/muscle.yaml
+name: muscle
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_gnu
+  - libgcc-ng=11.2.0=h1d223b6_16
+  - libgomp=11.2.0=h1d223b6_16
+  - libstdcxx-ng=11.2.0=he4da1e4_16
+  - muscle=5.1=h9f5acd7_1
+prefix: /home/flav/anaconda3/envs/muscle