Skip to content
Snippets Groups Projects
Commit 01148588 authored by coralie's avatar coralie
Browse files

nuc vs iupac case details

parent 090c985e
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,7 @@
from Bio import SearchIO
import sys,re,os
from collections import Counter
import tool_consensus
import tool_consensus as tc
"""
Project "The Limits of Multiple Aligners"
......@@ -18,9 +18,10 @@ def use():
print("\nScript:\tdata_formating.py",
"Objectif: \n",
"To Run:",
"./SNP_or_Error.py -in <file1> <file2>...",
" ./diploid_metric.py -in <file1> <file2>... -o <output_file>",
"Arguments: ",
" -required: -in : file alignement (exonerate).",sep="\n")
" -required: -in : file alignement (exonerate)",
" -o : name of the output file",sep="\n")
sys.exit()
#Argument testing
......@@ -73,7 +74,6 @@ for file in files_alignement:
occurence_similarity = Counter(similarity)
nb_ambiguity_seq_consensus=0
nb_ambiguity_ref=0
for i in "RYSWKMBDHVN":
if (i in occurence_seq):
nb_ambiguity_seq_consensus += occurence_seq[i]
......@@ -85,8 +85,12 @@ for file in files_alignement:
identical_iupac=0
iupac_vs_iupac=0
iupac_vs_nuc=0
iupac_vs_nucIupac=0
iupac_vs_nucNoIupac=0
iupac_vs_gap=0
nuc_vs_iupac=0
nucIupac_vs_iupac=0
nucNoIupac_vs_iupac=0
gap_vs_iupac=0
for i in range(0,len(seq)):
if(seq[i] in "RYSWKMBDHVN"):
......@@ -100,12 +104,20 @@ for file in files_alignement:
iupac_vs_gap+=1
else:
iupac_vs_nuc+=1
if(tc.iupac_to_nuc[seq[i]].find(hsp.hit[i]) == -1):
iupac_vs_nucNoIupac+=1
else:
iupac_vs_nucIupac+=1
else:
if(hsp.hit[i] in "RYSWKMBDHVN"):
if(seq[i]=="-"):
gap_vs_iupac+=1
else:
nuc_vs_iupac+=1
if(tc.iupac_to_nuc[hsp.hit[i]].find(seq[i]) == -1):
nucNoIupac_vs_iupac+=1
else:
nucIupac_vs_iupac+=1
if (MSA not in data):
data[MSA]={}
......@@ -119,13 +131,13 @@ for file in files_alignement:
if (nb_read not in depths):
depths.append(nb_read)
data[MSA][read_size][nb_read]=[nb_ambiguity_ref,nb_ambiguity_seq_consensus,identical_iupac,iupac_vs_iupac,iupac_vs_nuc,iupac_vs_gap,nuc_vs_iupac,gap_vs_iupac]
data[MSA][read_size][nb_read]=[nb_ambiguity_ref,nb_ambiguity_seq_consensus,identical_iupac,iupac_vs_iupac,iupac_vs_nuc,iupac_vs_nucNoIupac,iupac_vs_nucIupac,iupac_vs_gap,nuc_vs_iupac,nucNoIupac_vs_iupac,nucIupac_vs_iupac,gap_vs_iupac]
depths.sort()
read_sizes.sort()
#output
sep=","
output=open(output,"w")
output.write("MSA,region_size,depth,nb_iupac_ref,nb_iupac_seq_consensus,identical_iupac,iupac_vs_iupac,sc_iupac_vs_r_nuc,sc_iupac_vs_r_gap,sc_nuc_vs_r_iupac,sc_gap_vs_r_iupac\n")
output.write("MSA,region_size,depth,nb_iupac_ref,nb_iupac_seq_consensus,identical_iupac,iupac_vs_iupac,sc_iupac_vs_r_nuc,sc_iupac_vs_r_nucNoIupac,sc_iupac_vs_r_nucIupac,sc_iupac_vs_r_gap,sc_nuc_vs_r_iupac,sc_nucNoIupac_vs_r_iupac,sc_nucIupac_vs_r_iupac,sc_gap_vs_r_iupac\n")
for MSA in data:
for read_size in read_sizes:
if(read_size in data[MSA]):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment