Result of the consensus of consensus

8ed197cf · Rohmer Coralie · 08241ced · 8ed197cf · 8ed197cf
Commit 8ed197cf authored Jan 28, 2022 by Rohmer Coralie
--- a/Snakefile
+++ b/Snakefile
@@ -114,7 +114,7 @@ rule all :
        expand(EXP + "/" + EXP_NAME + "/results/" + EXP_NAME + "_data_align_t{threshold}.csv", data_set=DATA_SETS, threshold=THRESHOLDS),
        expand('{data_set}/results/' + EXP_NAME + '_graph_{attribute}.pdf', data_set=DATA_SETS, attribute=ATTRIBUTES_DATA),
        expand(EXP + '/'+EXP_NAME + '/results/' + EXP_NAME + '_graph_{attribute}.pdf', data_set=DATA_SETS, attribute=ATTRIBUTES_DATA),
-        expand("{data_set}/seq_consensus/t{threshold}/r{region_size}/align_consensus_consensus_ref_d{depth}.txt", data_set=DATA_SETS, threshold=THRESHOLDS,region_size=REGION_SIZES, depth=DEPTHS)
+        expand(EXP + "/" + EXP_NAME + "/results/" + EXP_NAME + "_consensus_consensus_data_align_t{threshold}.csv", threshold=THRESHOLDS)

 #-------------------------------------------------------------------------------
 # Data set preparation
@@ -502,7 +502,7 @@ rule separate_consensus :
    input :
        "{data_set}/seq_consensus/t{threshold}/r{region_size}/seq_consensus.fasta"
    output :
-        "{data_set}/seq_consensus/t{threshold}/r{region_size}/seq_consensus_d{depth}.fasta"
+        "{data_set}/seq_consensus/t{threshold}/r{region_size}/seq_consensus_r{region_size}_d{depth}.fasta"
    message:
        "Separate consensus for {wildcards.data_set} (Threshold={wildcards.threshold}, Region size={wildcards.region_size} & Depth={wildcards.depth})"
    log:
@@ -514,10 +514,10 @@ rule separate_consensus :

 rule consensus_msa :
    input :
-        "{data_set}/seq_consensus/t{threshold}/r{region_size}/seq_consensus_d{depth}.fasta"
+        "{data_set}/seq_consensus/t{threshold}/r{region_size}/seq_consensus_r{region_size}_d{depth}.fasta"
    output :
        time = os.path.join('{data_set}','time','consensus_msa_t{threshold}_r{region_size}_d{depth}'),
-        out = os.path.join('{data_set}','seq_consensus','t{threshold}','r{region_size}','consensus_msa_d{depth}.fasta')
+        out = os.path.join('{data_set}','seq_consensus','t{threshold}','r{region_size}','msa_consensus_r{region_size}_d{depth}.fasta')
    message:
        "Consensus msa for {wildcards.data_set} (Threshold={wildcards.threshold}, Region size={wildcards.region_size} & Depth={wildcards.depth})"
    log:
@@ -527,15 +527,16 @@ rule consensus_msa :
    shell:
        './src/run_MSA.sh "muscle -in {input} -out {output.out}" {input} {output.out} {output.time} {log} 1'

+
 rule consensus_consensus:
    input :
-        os.path.join('{data_set}','seq_consensus','t{threshold}','r{region_size}','consensus_msa_d{depth}.fasta')
+        expand('{{data_set}}/seq_consensus/t{{threshold}}/r{{region_size}}/msa_consensus_r{{region_size}}_d{depth}.fasta',depth=DEPTHS)
    output :
-        os.path.join('{data_set}','seq_consensus','t{threshold}','r{region_size}','consensus_consensus_d{depth}.fasta')
+        "{data_set}/seq_consensus/t{threshold}/r{region_size}/consensus_consensus_r{region_size}.fasta"
    message:
-        "Consensus consensus for {wildcards.data_set} (Threshold={wildcards.threshold}, Region size={wildcards.region_size} & Depth={wildcards.depth})"
+       "Consensus consensus for {wildcards.data_set} (Threshold={wildcards.threshold} & Region size={wildcards.region_size})"
    log:
-        "{data_set}/logs/16_consensus_consensus_t{threshold}_r{region_size}_d{depth}.log"
+        "{data_set}/logs/16_consensus_consensus_t{threshold}_r{region_size}.log"
    conda:
        "env_conda/python3.yaml"
    shell:
@@ -545,14 +546,14 @@ rule consensus_consensus:

 rule alignment_consensus_consensus_ref :
    input :
-        consensus=os.path.join('{data_set}','seq_consensus','t{threshold}','r{region_size}','consensus_consensus_d{depth}.fasta'),
+        consensus=os.path.join('{data_set}','seq_consensus','t{threshold}','r{region_size}','consensus_consensus_r{region_size}.fasta'),
        region="{data_set}/seq_selectes_region/region_seq_r{region_size}.fasta"
    output :
-        "{data_set}/seq_consensus/t{threshold}/r{region_size}/align_consensus_consensus_ref_d{depth}.txt",
+        "{data_set}/seq_consensus/t{threshold}/r{region_size}/align_consensus_consensus_ref_r{region_size}.txt",
    message:
-        "Alignment_consensus_consensus_ref for {wildcards.data_set} (Threshold={wildcards.threshold}, Region size={wildcards.region_size} & Depth={wildcards.depth})"
+        "Alignment_consensus_consensus_ref for {wildcards.data_set} (Threshold={wildcards.threshold} & Region size={wildcards.region_size})"
    log:
-        "{data_set}/logs/17_alignment_consensus_consensus_ref_t{threshold}_r{region_size}_d{depth}.log"
+        "{data_set}/logs/17_alignment_consensus_consensus_ref_t{threshold}_r{region_size}.log"
    conda:
        "env_conda/exonerate.yaml"
    shell :
@@ -564,3 +565,35 @@ rule alignment_consensus_consensus_ref :
        '   echo "ERROR: No sequences" >>{log};'
        '   touch {output};'
        'fi'
+
+rule consensus_consensus_data_formatting :
+    input :
+        expand("{{data_set}}/seq_consensus/t{{threshold}}/r{region_size}/align_consensus_consensus_ref_r{region_size}.txt" , region_size=REGION_SIZES, depth=DEPTHS),
+    output :
+        "{data_set}/results/"+EXP_NAME+"_consensus_consensus_data_align_t{threshold}.csv"
+    message:
+        "Consensus consensus data formatting for {wildcards.data_set} (Threshold={wildcards.threshold})"
+    log:
+        "{data_set}/logs/18_consensus_consensus_data_formatting_t{threshold}.log"
+    conda:
+        "env_conda/python3.yaml"
+    shell :
+        'ORDER="./src/data_formatting.py -in {input}";'
+        'echo "ORDER: $ORDER" >{log};'
+        '$ORDER >{output} 2>>{log}'
+
+rule consensus_consensus_region_mean:
+    input :
+        expand("{data_set}/results/"+EXP_NAME+"_consensus_consensus_data_align_t{{threshold}}.csv" , data_set = DATA_SETS)
+    output :
+        EXP + '/'+ EXP_NAME + "/results/"+EXP_NAME+"_consensus_consensus_data_align_t{threshold}.csv"
+    message:
+        "Consensus consensus region mean for " + EXP + '/'+ EXP_NAME + " (threshold={wildcards.threshold})"
+    log:
+        EXP + '/'+EXP_NAME + "/logs/19_consensus_consensus_region_mean_t{threshold}.log"
+    conda:
+        "env_conda/python3.yaml"
+    shell :
+        'ORDER="./src/region_mean.py -in {input} -out {output} -t {wildcards.threshold}";'
+        'echo "ORDER: $ORDER" >{log};'
+        '$ORDER 2>>{log}'
--- a/src/data_formatting.py
+++ b/src/data_formatting.py
@@ -48,11 +48,11 @@ else:
 		except:
 			end_files = 1

+TIME=True
 try:
    file_time=[sys.argv[sys.argv.index("-t")+1]][0]
 except:
-	print("ERROR: The name of the input time file is missing.\n")
-	use()
+	TIME=False

 #Main
 data = {}
@@ -102,6 +102,8 @@ for file in files_alignement:
 		data[MSA][read_size][nb_read]=[nb_ambiguity,r_ambiguity,nb_identity,r_identity,
 		error,r_error,match,r_match,size_seq]

+if (TIME == True):
+	pass
 	file_read = open(file_time, "r")
 	for line in file_read.readlines():
 		if ( not re.search("^MSA", line) ):
@@ -123,7 +125,11 @@ for line in file_read.readlines():

 #output
 sep=","
-print("MSA","region_size","depth","number_Ambiguity","percentage_Ambiguity","number_Identity","percentage_Identity","number_Error","percentage_Error","number_Match","percentage_Match","size","time","elapsed","memory",sep=sep)
+print("MSA","region_size","depth","number_Ambiguity","percentage_Ambiguity","number_Identity","percentage_Identity","number_Error","percentage_Error","number_Match","percentage_Match","size",sep=sep,end="")
+if (TIME == True):
+	print(sep+"time","elapsed","memory",sep=sep)
+else:
+	print("")
 for MSA in data:
 	for read_size in data[MSA]:
 		for nb_read in data[MSA][read_size]: