Skip to content
Snippets Groups Projects
Commit 8ed197cf authored by Rohmer Coralie's avatar Rohmer Coralie
Browse files

Result of the consensus of consensus

parent 08241ced
Branches
No related tags found
No related merge requests found
......@@ -114,7 +114,7 @@ rule all :
expand(EXP + "/" + EXP_NAME + "/results/" + EXP_NAME + "_data_align_t{threshold}.csv", data_set=DATA_SETS, threshold=THRESHOLDS),
expand('{data_set}/results/' + EXP_NAME + '_graph_{attribute}.pdf', data_set=DATA_SETS, attribute=ATTRIBUTES_DATA),
expand(EXP + '/'+EXP_NAME + '/results/' + EXP_NAME + '_graph_{attribute}.pdf', data_set=DATA_SETS, attribute=ATTRIBUTES_DATA),
expand("{data_set}/seq_consensus/t{threshold}/r{region_size}/align_consensus_consensus_ref_d{depth}.txt", data_set=DATA_SETS, threshold=THRESHOLDS,region_size=REGION_SIZES, depth=DEPTHS)
expand(EXP + "/" + EXP_NAME + "/results/" + EXP_NAME + "_consensus_consensus_data_align_t{threshold}.csv", threshold=THRESHOLDS)
#-------------------------------------------------------------------------------
# Data set preparation
......@@ -502,7 +502,7 @@ rule separate_consensus :
input :
"{data_set}/seq_consensus/t{threshold}/r{region_size}/seq_consensus.fasta"
output :
"{data_set}/seq_consensus/t{threshold}/r{region_size}/seq_consensus_d{depth}.fasta"
"{data_set}/seq_consensus/t{threshold}/r{region_size}/seq_consensus_r{region_size}_d{depth}.fasta"
message:
"Separate consensus for {wildcards.data_set} (Threshold={wildcards.threshold}, Region size={wildcards.region_size} & Depth={wildcards.depth})"
log:
......@@ -514,10 +514,10 @@ rule separate_consensus :
rule consensus_msa :
input :
"{data_set}/seq_consensus/t{threshold}/r{region_size}/seq_consensus_d{depth}.fasta"
"{data_set}/seq_consensus/t{threshold}/r{region_size}/seq_consensus_r{region_size}_d{depth}.fasta"
output :
time = os.path.join('{data_set}','time','consensus_msa_t{threshold}_r{region_size}_d{depth}'),
out = os.path.join('{data_set}','seq_consensus','t{threshold}','r{region_size}','consensus_msa_d{depth}.fasta')
out = os.path.join('{data_set}','seq_consensus','t{threshold}','r{region_size}','msa_consensus_r{region_size}_d{depth}.fasta')
message:
"Consensus msa for {wildcards.data_set} (Threshold={wildcards.threshold}, Region size={wildcards.region_size} & Depth={wildcards.depth})"
log:
......@@ -527,15 +527,16 @@ rule consensus_msa :
shell:
'./src/run_MSA.sh "muscle -in {input} -out {output.out}" {input} {output.out} {output.time} {log} 1'
rule consensus_consensus:
input :
os.path.join('{data_set}','seq_consensus','t{threshold}','r{region_size}','consensus_msa_d{depth}.fasta')
expand('{{data_set}}/seq_consensus/t{{threshold}}/r{{region_size}}/msa_consensus_r{{region_size}}_d{depth}.fasta',depth=DEPTHS)
output :
os.path.join('{data_set}','seq_consensus','t{threshold}','r{region_size}','consensus_consensus_d{depth}.fasta')
"{data_set}/seq_consensus/t{threshold}/r{region_size}/consensus_consensus_r{region_size}.fasta"
message:
"Consensus consensus for {wildcards.data_set} (Threshold={wildcards.threshold}, Region size={wildcards.region_size} & Depth={wildcards.depth})"
"Consensus consensus for {wildcards.data_set} (Threshold={wildcards.threshold} & Region size={wildcards.region_size})"
log:
"{data_set}/logs/16_consensus_consensus_t{threshold}_r{region_size}_d{depth}.log"
"{data_set}/logs/16_consensus_consensus_t{threshold}_r{region_size}.log"
conda:
"env_conda/python3.yaml"
shell:
......@@ -545,14 +546,14 @@ rule consensus_consensus:
rule alignment_consensus_consensus_ref :
input :
consensus=os.path.join('{data_set}','seq_consensus','t{threshold}','r{region_size}','consensus_consensus_d{depth}.fasta'),
consensus=os.path.join('{data_set}','seq_consensus','t{threshold}','r{region_size}','consensus_consensus_r{region_size}.fasta'),
region="{data_set}/seq_selectes_region/region_seq_r{region_size}.fasta"
output :
"{data_set}/seq_consensus/t{threshold}/r{region_size}/align_consensus_consensus_ref_d{depth}.txt",
"{data_set}/seq_consensus/t{threshold}/r{region_size}/align_consensus_consensus_ref_r{region_size}.txt",
message:
"Alignment_consensus_consensus_ref for {wildcards.data_set} (Threshold={wildcards.threshold}, Region size={wildcards.region_size} & Depth={wildcards.depth})"
"Alignment_consensus_consensus_ref for {wildcards.data_set} (Threshold={wildcards.threshold} & Region size={wildcards.region_size})"
log:
"{data_set}/logs/17_alignment_consensus_consensus_ref_t{threshold}_r{region_size}_d{depth}.log"
"{data_set}/logs/17_alignment_consensus_consensus_ref_t{threshold}_r{region_size}.log"
conda:
"env_conda/exonerate.yaml"
shell :
......@@ -564,3 +565,35 @@ rule alignment_consensus_consensus_ref :
' echo "ERROR: No sequences" >>{log};'
' touch {output};'
'fi'
rule consensus_consensus_data_formatting :
input :
expand("{{data_set}}/seq_consensus/t{{threshold}}/r{region_size}/align_consensus_consensus_ref_r{region_size}.txt" , region_size=REGION_SIZES, depth=DEPTHS),
output :
"{data_set}/results/"+EXP_NAME+"_consensus_consensus_data_align_t{threshold}.csv"
message:
"Consensus consensus data formatting for {wildcards.data_set} (Threshold={wildcards.threshold})"
log:
"{data_set}/logs/18_consensus_consensus_data_formatting_t{threshold}.log"
conda:
"env_conda/python3.yaml"
shell :
'ORDER="./src/data_formatting.py -in {input}";'
'echo "ORDER: $ORDER" >{log};'
'$ORDER >{output} 2>>{log}'
rule consensus_consensus_region_mean:
input :
expand("{data_set}/results/"+EXP_NAME+"_consensus_consensus_data_align_t{{threshold}}.csv" , data_set = DATA_SETS)
output :
EXP + '/'+ EXP_NAME + "/results/"+EXP_NAME+"_consensus_consensus_data_align_t{threshold}.csv"
message:
"Consensus consensus region mean for " + EXP + '/'+ EXP_NAME + " (threshold={wildcards.threshold})"
log:
EXP + '/'+EXP_NAME + "/logs/19_consensus_consensus_region_mean_t{threshold}.log"
conda:
"env_conda/python3.yaml"
shell :
'ORDER="./src/region_mean.py -in {input} -out {output} -t {wildcards.threshold}";'
'echo "ORDER: $ORDER" >{log};'
'$ORDER 2>>{log}'
......@@ -48,11 +48,11 @@ else:
except:
end_files = 1
TIME=True
try:
file_time=[sys.argv[sys.argv.index("-t")+1]][0]
except:
print("ERROR: The name of the input time file is missing.\n")
use()
TIME=False
#Main
data = {}
......@@ -102,6 +102,8 @@ for file in files_alignement:
data[MSA][read_size][nb_read]=[nb_ambiguity,r_ambiguity,nb_identity,r_identity,
error,r_error,match,r_match,size_seq]
if (TIME == True):
pass
file_read = open(file_time, "r")
for line in file_read.readlines():
if ( not re.search("^MSA", line) ):
......@@ -123,7 +125,11 @@ for line in file_read.readlines():
#output
sep=","
print("MSA","region_size","depth","number_Ambiguity","percentage_Ambiguity","number_Identity","percentage_Identity","number_Error","percentage_Error","number_Match","percentage_Match","size","time","elapsed","memory",sep=sep)
print("MSA","region_size","depth","number_Ambiguity","percentage_Ambiguity","number_Identity","percentage_Identity","number_Error","percentage_Error","number_Match","percentage_Match","size",sep=sep,end="")
if (TIME == True):
print(sep+"time","elapsed","memory",sep=sep)
else:
print("")
for MSA in data:
for read_size in data[MSA]:
for nb_read in data[MSA][read_size]:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment