Skip to content
Snippets Groups Projects
Commit 856cd914 authored by Rohmer Coralie's avatar Rohmer Coralie
Browse files

new algo to determine the consensus

parent 107eecb6
No related branches found
No related tags found
No related merge requests found
...@@ -91,17 +91,19 @@ else: ...@@ -91,17 +91,19 @@ else:
ATTRIBUTES_DATA = ["nbIdentity","rIdentity","nbError","rError","nbMatch","rMatch","time","memory"] ATTRIBUTES_DATA = ["number_Ambiguity","percentage_Ambiguity","number_Identity","percentage_Identity","number_Error","percentage_Error","number_Match","percentage_Match","size","time","elapsed","memory"]
onstart: onstart:
print("Workflow Start") print("Workflow Start")
onsuccess: onsuccess:
os.system('grep -r "ERROR" ' + EXP + '/'+EXP_NAME + '/*/logs ' + EXP + '/'+EXP_NAME + '/logs >' +EXP + '/'+ EXP_NAME + '/final_log') os.system('grep -r "ERROR" ' + EXP + '/'+EXP_NAME + '/*/logs ' + EXP + '/'+EXP_NAME + '/logs >' +EXP + '/'+ EXP_NAME + '/final_log')
os.system('if [ `ls|grep ".dnd"|wc -l` -ne 0 ]; then rm *.dnd;fi')
print("Workflow finished, no error") print("Workflow finished, no error")
onerror: onerror:
os.system('grep -r "ERROR" ' + EXP + '/'+EXP_NAME + '/*/logs ' + EXP + '/'+EXP_NAME + '/logs >' + EXP + '/'+EXP_NAME + '/final_log') os.system('grep -r "ERROR" ' + EXP + '/'+EXP_NAME + '/*/logs ' + EXP + '/'+EXP_NAME + '/logs >' + EXP + '/'+EXP_NAME + '/final_log')
os.system('if [ `ls|grep ".dnd"|wc -l` -ne 0 ]; then rm *.dnd;fi')
print("An error has occurred. Please look in log files: " + EXP + '/'+EXP_NAME + "/final_log") print("An error has occurred. Please look in log files: " + EXP + '/'+EXP_NAME + "/final_log")
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
......
...@@ -109,21 +109,21 @@ for line in file_read.readlines(): ...@@ -109,21 +109,21 @@ for line in file_read.readlines():
msa = tab_line[0] msa = tab_line[0]
read_size = tab_line[1] read_size = tab_line[1]
nb_read = tab_line[2] nb_read = tab_line[2]
user = float(tab_line[3]) #user = float(tab_line[3])
system = float(tab_line[4]) #system = float(tab_line[4])
time = float(tab_line[5]) time = float(tab_line[5])
elapsed = float(tab_line[6]) elapsed = float(tab_line[6])
cpu = float(tab_line[7]) #cpu = float(tab_line[7])
memory = float(tab_line[8]) memory = float(tab_line[8])
if msa in data: if msa in data:
if read_size in data[msa]: if read_size in data[msa]:
if nb_read in data[msa][read_size]: if nb_read in data[msa][read_size]:
data[msa][read_size][nb_read] += [user,system,time,elapsed,cpu,memory] data[msa][read_size][nb_read] += [time,elapsed,memory]
#output #output
sep="," sep=","
print("MSA","region_size","depth","nbAmbiguity","rAmbiguity","nbIdentity","rIdentity","nbError","rError","nbMatch","rMatch","size","user","system","time","elapsed","cpu","memory",sep=sep) print("MSA","region_size","depth","number_Ambiguity","percentage_Ambiguity","number_Identity","percentage_Identity","number_Error","percentage_Error","number_Match","percentage_Match","size","time","elapsed","memory",sep=sep)
for MSA in data: for MSA in data:
for read_size in data[MSA]: for read_size in data[MSA]:
for nb_read in data[MSA][read_size]: for nb_read in data[MSA][read_size]:
......
...@@ -39,32 +39,59 @@ def consensus_iupac(alignment,threshold): ...@@ -39,32 +39,59 @@ def consensus_iupac(alignment,threshold):
frequency_letters={} frequency_letters={}
#Calculates the frequency of each letter #Calculates the frequency of each letter
order_frequencys=[]
order_letters=[]
for letter in letters: for letter in letters:
pass pass
frequency_letter=len(re.findall(letter,nucleotides))/nb_reads frequency_letter=100*len(re.findall(letter,nucleotides))/nb_reads
frequency_letters[letter] = frequency_letter frequency_letters[letter] = frequency_letter
#Gap closure if order_frequencys == []:
if ("-" not in frequency_letters): order_frequencys.append(frequency_letter)
frequency_letters["-"] = 0 order_letters.append(letter)
if (frequency_letters["-"] < 0.50):
frequency_sum = 1 - frequency_letters["-"]
del frequency_letters["-"]
letters_threshold =[]
for letter in frequency_letters:
if ( (frequency_letters[letter]/frequency_sum) > threshold):
letters_threshold = letters_threshold + [letter]
if (len(letters_threshold) >= 1 ):
if (len(letters_threshold) == 1 ):
seq+=letters_threshold[0]
else: else:
if (len(letters_threshold) > 1): len_order_frequencys=len(order_frequencys)
letters_threshold.sort() i=0
id_iupac = "".join(letters_threshold) find=0
while (i<len_order_frequencys and find==0):
if(frequency_letter>order_frequencys[i]):
order_frequencys.append(order_frequencys[len_order_frequencys-1])
order_letters.append(order_letters[len_order_frequencys-1])
len_order_frequencys+=1
j=len_order_frequencys-2
while (j>i):
order_frequencys[j]=order_frequencys[j-1]
order_letters[j]=order_letters[j-1]
j-=1
order_frequencys[i]=frequency_letter
order_letters[i]=letter
find=1
i+=1
if i>=len_order_frequencys:
order_frequencys.append(frequency_letter)
order_letters.append(letter)
len_order_frequencys=len(order_frequencys)
#Gap closure
if ("-" != order_letters[0]):
conserved_nucs=[order_letters[0]]
total_frequency=order_frequencys[0];
i=1
while(total_frequency < threshold*100 and i<len_order_frequencys):
if(order_letters[i]!="-" and order_frequencys[i]!= 0):
total_frequency+=order_frequencys[i]
conserved_nucs=conserved_nucs + [order_letters[i]]
j=i
while (j+1<len_order_frequencys and order_frequencys[i]==order_frequencys[j+1]):
total_frequency+=order_frequencys[j+1]
conserved_nucs=conserved_nucs + [order_letters[j+1]]
j+=1
i=j
i+=1
conserved_nucs.sort()
if (len(conserved_nucs) > 1):
id_iupac = "".join(conserved_nucs)
seq+=nuc_to_iupac[id_iupac] seq+=nuc_to_iupac[id_iupac]
else: else:
seq += "N" seq += conserved_nucs[0]
return seq return seq
#Creation of the consensus sequence #Creation of the consensus sequence
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment