Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
msa-limit
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Rohmer Coralie
msa-limit
Commits
856cd914
Commit
856cd914
authored
3 years ago
by
Rohmer Coralie
Browse files
Options
Downloads
Patches
Plain Diff
new algo to determine the consensus
parent
107eecb6
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
Snakefile
+3
-1
3 additions, 1 deletion
Snakefile
src/data_formatting.py
+5
-5
5 additions, 5 deletions
src/data_formatting.py
src/tool_consensus.py
+48
-21
48 additions, 21 deletions
src/tool_consensus.py
with
56 additions
and
27 deletions
Snakefile
+
3
−
1
View file @
856cd914
...
@@ -91,17 +91,19 @@ else:
...
@@ -91,17 +91,19 @@ else:
ATTRIBUTES_DATA = ["n
bIdentity","r
Identity","n
b
Error","
r
Error","n
b
Match","
rMatch","time
","memory"]
ATTRIBUTES_DATA = ["n
umber_Ambiguity","percentage_Ambiguity","number_Identity","percentage_
Identity","n
umber_
Error","
percentage_
Error","n
umber_
Match","
percentage_Match","size","time","elapsed
","memory"]
onstart:
onstart:
print("Workflow Start")
print("Workflow Start")
onsuccess:
onsuccess:
os.system('grep -r "ERROR" ' + EXP + '/'+EXP_NAME + '/*/logs ' + EXP + '/'+EXP_NAME + '/logs >' +EXP + '/'+ EXP_NAME + '/final_log')
os.system('grep -r "ERROR" ' + EXP + '/'+EXP_NAME + '/*/logs ' + EXP + '/'+EXP_NAME + '/logs >' +EXP + '/'+ EXP_NAME + '/final_log')
os.system('if [ `ls|grep ".dnd"|wc -l` -ne 0 ]; then rm *.dnd;fi')
print("Workflow finished, no error")
print("Workflow finished, no error")
onerror:
onerror:
os.system('grep -r "ERROR" ' + EXP + '/'+EXP_NAME + '/*/logs ' + EXP + '/'+EXP_NAME + '/logs >' + EXP + '/'+EXP_NAME + '/final_log')
os.system('grep -r "ERROR" ' + EXP + '/'+EXP_NAME + '/*/logs ' + EXP + '/'+EXP_NAME + '/logs >' + EXP + '/'+EXP_NAME + '/final_log')
os.system('if [ `ls|grep ".dnd"|wc -l` -ne 0 ]; then rm *.dnd;fi')
print("An error has occurred. Please look in log files: " + EXP + '/'+EXP_NAME + "/final_log")
print("An error has occurred. Please look in log files: " + EXP + '/'+EXP_NAME + "/final_log")
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
...
...
This diff is collapsed.
Click to expand it.
src/data_formatting.py
+
5
−
5
View file @
856cd914
...
@@ -109,21 +109,21 @@ for line in file_read.readlines():
...
@@ -109,21 +109,21 @@ for line in file_read.readlines():
msa
=
tab_line
[
0
]
msa
=
tab_line
[
0
]
read_size
=
tab_line
[
1
]
read_size
=
tab_line
[
1
]
nb_read
=
tab_line
[
2
]
nb_read
=
tab_line
[
2
]
user
=
float
(
tab_line
[
3
])
#
user = float(tab_line[3])
system
=
float
(
tab_line
[
4
])
#
system = float(tab_line[4])
time
=
float
(
tab_line
[
5
])
time
=
float
(
tab_line
[
5
])
elapsed
=
float
(
tab_line
[
6
])
elapsed
=
float
(
tab_line
[
6
])
cpu
=
float
(
tab_line
[
7
])
#
cpu = float(tab_line[7])
memory
=
float
(
tab_line
[
8
])
memory
=
float
(
tab_line
[
8
])
if
msa
in
data
:
if
msa
in
data
:
if
read_size
in
data
[
msa
]:
if
read_size
in
data
[
msa
]:
if
nb_read
in
data
[
msa
][
read_size
]:
if
nb_read
in
data
[
msa
][
read_size
]:
data
[
msa
][
read_size
][
nb_read
]
+=
[
user
,
system
,
time
,
elapsed
,
cpu
,
memory
]
data
[
msa
][
read_size
][
nb_read
]
+=
[
time
,
elapsed
,
memory
]
#output
#output
sep
=
"
,
"
sep
=
"
,
"
print
(
"
MSA
"
,
"
region_size
"
,
"
depth
"
,
"
n
b
Ambiguity
"
,
"
r
Ambiguity
"
,
"
n
b
Identity
"
,
"
r
Identity
"
,
"
n
b
Error
"
,
"
r
Error
"
,
"
n
b
Match
"
,
"
r
Match
"
,
"
size
"
,
"
user
"
,
"
system
"
,
"
time
"
,
"
elapsed
"
,
"
cpu
"
,
"
memory
"
,
sep
=
sep
)
print
(
"
MSA
"
,
"
region_size
"
,
"
depth
"
,
"
n
umber_
Ambiguity
"
,
"
percentage_
Ambiguity
"
,
"
n
umber_
Identity
"
,
"
percentage_
Identity
"
,
"
n
umber_
Error
"
,
"
percentage_
Error
"
,
"
n
umber_
Match
"
,
"
percentage_
Match
"
,
"
size
"
,
"
time
"
,
"
elapsed
"
,
"
memory
"
,
sep
=
sep
)
for
MSA
in
data
:
for
MSA
in
data
:
for
read_size
in
data
[
MSA
]:
for
read_size
in
data
[
MSA
]:
for
nb_read
in
data
[
MSA
][
read_size
]:
for
nb_read
in
data
[
MSA
][
read_size
]:
...
...
This diff is collapsed.
Click to expand it.
src/tool_consensus.py
+
48
−
21
View file @
856cd914
...
@@ -39,32 +39,59 @@ def consensus_iupac(alignment,threshold):
...
@@ -39,32 +39,59 @@ def consensus_iupac(alignment,threshold):
frequency_letters
=
{}
frequency_letters
=
{}
#Calculates the frequency of each letter
#Calculates the frequency of each letter
order_frequencys
=
[]
order_letters
=
[]
for
letter
in
letters
:
for
letter
in
letters
:
pass
pass
frequency_letter
=
len
(
re
.
findall
(
letter
,
nucleotides
))
/
nb_reads
frequency_letter
=
100
*
len
(
re
.
findall
(
letter
,
nucleotides
))
/
nb_reads
frequency_letters
[
letter
]
=
frequency_letter
frequency_letters
[
letter
]
=
frequency_letter
#Gap closure
if
order_frequencys
==
[]:
if
(
"
-
"
not
in
frequency_letters
):
order_frequencys
.
append
(
frequency_letter
)
frequency_letters
[
"
-
"
]
=
0
order_letters
.
append
(
letter
)
if
(
frequency_letters
[
"
-
"
]
<
0.50
):
frequency_sum
=
1
-
frequency_letters
[
"
-
"
]
del
frequency_letters
[
"
-
"
]
letters_threshold
=
[]
for
letter
in
frequency_letters
:
if
(
(
frequency_letters
[
letter
]
/
frequency_sum
)
>
threshold
):
letters_threshold
=
letters_threshold
+
[
letter
]
if
(
len
(
letters_threshold
)
>=
1
):
if
(
len
(
letters_threshold
)
==
1
):
seq
+=
letters_threshold
[
0
]
else
:
else
:
if
(
len
(
letters_threshold
)
>
1
):
len_order_frequencys
=
len
(
order_frequencys
)
letters_threshold
.
sort
()
i
=
0
id_iupac
=
""
.
join
(
letters_threshold
)
find
=
0
while
(
i
<
len_order_frequencys
and
find
==
0
):
if
(
frequency_letter
>
order_frequencys
[
i
]):
order_frequencys
.
append
(
order_frequencys
[
len_order_frequencys
-
1
])
order_letters
.
append
(
order_letters
[
len_order_frequencys
-
1
])
len_order_frequencys
+=
1
j
=
len_order_frequencys
-
2
while
(
j
>
i
):
order_frequencys
[
j
]
=
order_frequencys
[
j
-
1
]
order_letters
[
j
]
=
order_letters
[
j
-
1
]
j
-=
1
order_frequencys
[
i
]
=
frequency_letter
order_letters
[
i
]
=
letter
find
=
1
i
+=
1
if
i
>=
len_order_frequencys
:
order_frequencys
.
append
(
frequency_letter
)
order_letters
.
append
(
letter
)
len_order_frequencys
=
len
(
order_frequencys
)
#Gap closure
if
(
"
-
"
!=
order_letters
[
0
]):
conserved_nucs
=
[
order_letters
[
0
]]
total_frequency
=
order_frequencys
[
0
];
i
=
1
while
(
total_frequency
<
threshold
*
100
and
i
<
len_order_frequencys
):
if
(
order_letters
[
i
]
!=
"
-
"
and
order_frequencys
[
i
]
!=
0
):
total_frequency
+=
order_frequencys
[
i
]
conserved_nucs
=
conserved_nucs
+
[
order_letters
[
i
]]
j
=
i
while
(
j
+
1
<
len_order_frequencys
and
order_frequencys
[
i
]
==
order_frequencys
[
j
+
1
]):
total_frequency
+=
order_frequencys
[
j
+
1
]
conserved_nucs
=
conserved_nucs
+
[
order_letters
[
j
+
1
]]
j
+=
1
i
=
j
i
+=
1
conserved_nucs
.
sort
()
if
(
len
(
conserved_nucs
)
>
1
):
id_iupac
=
""
.
join
(
conserved_nucs
)
seq
+=
nuc_to_iupac
[
id_iupac
]
seq
+=
nuc_to_iupac
[
id_iupac
]
else
:
else
:
seq
+=
"
N
"
seq
+=
conserved_nucs
[
0
]
return
seq
return
seq
#Creation of the consensus sequence
#Creation of the consensus sequence
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment