Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
msa-limit-data
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Rohmer Coralie
msa-limit-data
Commits
4d9d8404
Commit
4d9d8404
authored
3 years ago
by
Rohmer Coralie
Browse files
Options
Downloads
Patches
Plain Diff
ready: ecoli hifi illumina yeast simulated
parent
151b3efb
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
Snakefile
+306
-289
306 additions, 289 deletions
Snakefile
src/exonerate_to_fasta.pl
+6
-0
6 additions, 0 deletions
src/exonerate_to_fasta.pl
src/snakemake_launcher.sh
+1
-1
1 addition, 1 deletion
src/snakemake_launcher.sh
with
313 additions
and
290 deletions
Snakefile
+
306
−
289
View file @
4d9d8404
...
@@ -30,7 +30,7 @@ READS_ILLUMINA_BMB="ERR1308675.fastq"
...
@@ -30,7 +30,7 @@ READS_ILLUMINA_BMB="ERR1308675.fastq"
REF_CCN="ccnscafold.fa"
REF_CCN="ccnscafold.fa"
REF_BMB="bmbscafold.fa"
REF_BMB="bmbscafold.fa"
ID=["CCN_yeast","BMB_yeast","diploid_yeast","ecoli_hifi","ecoli_illumina"]
ID=["CCN_yeast","BMB_yeast","diploid_yeast","ecoli_hifi","ecoli_illumina"]
CONFIGS=["","_clustal","_tcoffee"]
CONFIGS=[""
]#
,"_clustal","_tcoffee"]
READS_HUMAN="reads.fastq"
READS_HUMAN="reads.fastq"
REF_HUMAN="chm13.draft_v1.1.fasta"
REF_HUMAN="chm13.draft_v1.1.fasta"
...
@@ -93,260 +93,6 @@ rule align_human_reads_ref :
...
@@ -93,260 +93,6 @@ rule align_human_reads_ref :
'minimap2 -cax map-ont -t '+THREAD+' {input.ref} {input.reads} >{output} 2>{log}'
'minimap2 -cax map-ont -t '+THREAD+' {input.ref} {input.reads} >{output} 2>{log}'
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
# Ecoli hifi data
#-------------------------------------------------------------------------------
rule download_hifi_reads_ecoli :
output :
os.path.join('ecoli_hifi_data',READS_HIFI)
message :
"Download hifi reads for ecoli hifi data"
log:
os.path.join('logs','ecoli_hifi_data','1_download_hifi.log')
conda:
"env_conda/fastq-dump.yaml"
shell:
'fastq-dump --gzip SRR11434954 >{log} 2>&1;'
'mv ' + READS_HIFI + ' {output}'
rule download_nanopore_reads_ecoli_hifi :
output :
os.path.join('ecoli_hifi_data',READS_NANOPORE_ECOLI_HIFI + '.gz')
message :
"Download nanopore reads for ecoli hifi data"
log:
os.path.join('logs','ecoli_hifi_data','2_download_nanopore.log')
conda:
"env_conda/fastq-dump.yaml"
shell:
'fastq-dump --gzip SRR12801740 >{log} 2>&1;'
'mv ' + READS_NANOPORE_ECOLI_HIFI + '.gz {output}'
rule gunzip_nanopore_reads_ecoli_hifi :
input :
os.path.join('ecoli_hifi_data',READS_NANOPORE_ECOLI_HIFI + '.gz')
output :
os.path.join('ecoli_hifi_data',READS_NANOPORE_ECOLI_HIFI)
message :
"Decompress nanopore reads for ecoli hifi data"
log:
os.path.join('logs','ecoli_hifi_data','3_gunzip_nanopore_reads.log')
conda:
"env_conda/gzip.yaml"
shell:
'gunzip {input} >{log} 2>&1'
rule select_hifi_reads_ecoli :
input :
os.path.join('ecoli_hifi_data',READS_HIFI)
output:
os.path.join('ecoli_hifi_data','reads_hifi.fastq')
message :
"Select hifi reads for ecoli hifi data"
log:
os.path.join('logs','ecoli_hifi_data','4_select_hifi_reads.log')
conda:
"env_conda/seqkit.yaml"
shell:
'set +o pipefail;'
'seqkit sort -l -r {input} 2>{log}|head -100000 >{output} 2>{log}'
rule assembly_hifi_reads_ecoli :
input :
os.path.join('ecoli_hifi_data','reads_hifi.fastq')
output:
os.path.join('ecoli_hifi_data','assembly_hifi','assembly.fasta')
message :
"Assembly hifi reads for ecoli hifi data with flye"
log:
os.path.join('logs','ecoli_hifi_data','5_assembly_hifi_reads.log')
conda:
"env_conda/flye.yaml"
shell:
'flye --pacbio-hifi {input} --genome-size 5m --threads ' + THREAD + ' -i 2 -o ecoli_hifi_data/assembly_hifi >{log} 2>&1'
rule extract_first_contig_hifi_reads :
input :
os.path.join('ecoli_hifi_data','assembly_hifi','assembly.fasta')
output:
os.path.join('ecoli_hifi_data','ref_ecoli_hifi.fasta')
message :
"Extract first contig assembly for ecoli hifi data"
log:
os.path.join('logs','ecoli_hifi_data','6_extract_first_contig_hifi_reads.log')
shell:
'set +o pipefail;'
'cat {input}|tr "\\n" "@"|sed "s/@>/\\n>/g"|sed "s/@/\\n/;s/@//g"|head -2 >{output} 2>{log}'
rule configuration_file_ecoli_hifi_data:
input :
ref=os.path.join('ecoli_hifi_data','ref_ecoli_hifi.fasta'),
reads=os.path.join('ecoli_hifi_data',READS_NANOPORE_ECOLI_HIFI)
output :
ref=os.path.join('before_configuration_files','config_ecoli_hifi.yaml')
message :
"Configuration file for ecoli hifi"
log:
os.path.join('logs','ecoli_hifi_data','7_configuration_ecoli_hifi.log')
shell:
'PATH_DATA=`pwd`;'
'echo -e "N: ecoli_hifi" >{output};'
'echo -e "I: $PATH_DATA/{input.ref}" >>{output};'
'echo -e "R: $PATH_DATA/{input.reads}" >>{output}'
#-------------------------------------------------------------------------------
# Ecoli illumina data
#-------------------------------------------------------------------------------
rule download_illumina_reads_ecoli :
output :
os.path.join('ecoli_illumina_data',READS_ILLUMINA+ '.gz')
message :
"Download illumina reads for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','1_download_illumina.log')
conda:
"env_conda/fastq-dump.yaml"
shell:
'fastq-dump --gzip --split-spot --clip SRR8333590 >{log} 2>&1;'
'mv ' + READS_ILLUMINA + '.gz {output}'
rule gunzip_illumina_reads_ecoli_illumina:
input :
os.path.join('ecoli_illumina_data',READS_ILLUMINA + '.gz')
output :
os.path.join('ecoli_illumina_data',READS_ILLUMINA)
message :
"Decompress illumina reads for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','2_gunzip_illumina_reads.log')
conda:
"env_conda/gzip.yaml"
shell:
'gunzip {input} >{log} 2>&1'
rule download_nanopore_reads_ecoli_illumina :
output :
os.path.join('ecoli_illumina_data',READS_NANOPORE_ECOLI_ILLUMINA + '.gz')
message :
"Download nanopore reads for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','3_download_nanopore.log')
conda:
"env_conda/fastq-dump.yaml"
shell:
'fastq-dump --gzip SRR8335315 >{log} 2>&1;'
'mv ' + READS_NANOPORE_ECOLI_ILLUMINA + '.gz {output}'
rule gunzip_nanopore_reads_ecoli_illumina :
input :
os.path.join('ecoli_illumina_data',READS_NANOPORE_ECOLI_ILLUMINA + '.gz')
output :
os.path.join('ecoli_illumina_data',READS_NANOPORE_ECOLI_ILLUMINA)
message :
"Decompress nanopore reads for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','4_gunzip_nanopore_reads.log')
conda:
"env_conda/gzip.yaml"
shell:
'gunzip {input} >{log} 2>&1'
rule download_ref_ecoli :
output :
os.path.join('ecoli_illumina_data',REF_ECOLI + '.gz')
message :
"Download ref ecoli for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','5_download_ref_ecoli.log')
shell:
'wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/358/385/GCF_004358385.1_ASM435838v1/GCF_004358385.1_ASM435838v1_genomic.fna.gz -o {log};'
'mv ' + REF_ECOLI + '.gz {output}'
rule gunzip_ref_ecoli :
input :
os.path.join('ecoli_illumina_data',REF_ECOLI+ '.gz')
output :
os.path.join('ecoli_illumina_data',REF_ECOLI)
message :
"Decompress ref ecoli for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','6_gunzip_ref_ecoli.log')
conda:
"env_conda/gzip.yaml"
shell:
'gunzip {input} >{log} 2>&1'
rule assembly_illumina_reads_ecoli :
input :
os.path.join('ecoli_illumina_data',READS_ILLUMINA)
output:
os.path.join('ecoli_illumina_data','assembly_illumina','contigs.fasta')
message :
"Assembly illumina reads for ecoli illumina data with spades"
log:
os.path.join('logs','ecoli_illumina_data','7_assembly_illumina_reads.log')
conda:
"env_conda/spades.yaml"
shell:
'spades.py -s {input} -o ecoli_illumina_data/assembly_illumina --only-assembler -t '+THREAD+' >{log} 2>&1'
rule extract_first_chromosome_ref :
input :
os.path.join('ecoli_illumina_data',REF_ECOLI)
output:
os.path.join('ecoli_illumina_data','ref_ecoli.fasta')
message :
"Extract first chromosome ref for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','8_extract_first_chromosome_ref.log')
shell:
'head -67952 {input} >{output} 2>{log}'
rule align_illumina_contig_ref :
input :
contigs=os.path.join('ecoli_illumina_data','assembly_illumina','contigs.fasta'),
ref=os.path.join('ecoli_illumina_data','ref_ecoli.fasta')
output:
os.path.join('ecoli_illumina_data','aln_contigs_illumina_ref_ecoli.sam')
message :
"Align illumina contigs with ref for ecoli illumina data with minimap"
log:
os.path.join('logs','ecoli_illumina_data','9_align_illumina_contig_ref.log')
conda:
"env_conda/minimap2.yaml"
shell:
'minimap2 -cax asm5 -t '+THREAD+' {input.ref} {input.contigs} >{output} 2>{log}'
rule reads_map_region_ecoli_illumina :
input :
os.path.join('ecoli_illumina_data','aln_contigs_illumina_ref_ecoli.sam'),
output :
os.path.join('ecoli_illumina_data','ref_illumina_ecoli.fasta')
message:
"Reads map region for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','10_reads_map_region.log')
conda:
"env_conda/perl.yaml"
shell :
'DEB=4845200;END=5080000;'
'./src/reads_map_region.pl -s $DEB -e $END {input} {output} >{log} 2>&1'
rule configuration_file_ecoli_illumina_data:
input :
ref=os.path.join('ecoli_illumina_data','ref_illumina_ecoli.fasta'),
reads=os.path.join('ecoli_illumina_data',READS_NANOPORE_ECOLI_ILLUMINA)
output :
ref=os.path.join('before_configuration_files','config_ecoli_illumina.yaml')
message :
"Configuration file for ecoli illumina"
log:
os.path.join('logs','ecoli_illumina_data','11_configuration_ecoli_illumina.log')
shell:
'PATH_DATA=`pwd`;'
'echo -e "N: ecoli_illumina" >{output};'
'echo -e "I: $PATH_DATA/{input.ref}" >>{output};'
'echo -e "R: $PATH_DATA/{input.reads}" >>{output}'
#-------------------------------------------------------------------------------
# Yeast data
# Yeast data
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
rule download_CCN_yeast_reads :
rule download_CCN_yeast_reads :
...
@@ -463,7 +209,7 @@ rule assembly_illumina_reads_CCN :
...
@@ -463,7 +209,7 @@ rule assembly_illumina_reads_CCN :
illumina=os.path.join('yeast_data',READS_ILLUMINA_CCN + '.gz'),
illumina=os.path.join('yeast_data',READS_ILLUMINA_CCN + '.gz'),
nanopore=os.path.join('yeast_data',READS_CCN)
nanopore=os.path.join('yeast_data',READS_CCN)
output:
output:
os.path.join('yeast_data','assembly_illumina_
CCN
','contigs.fasta')
os.path.join('yeast_data','assembly_illumina_
ccn
','contigs.fasta')
message :
message :
"Assembly illumina reads for CCN yeast data with spades"
"Assembly illumina reads for CCN yeast data with spades"
log:
log:
...
@@ -471,14 +217,14 @@ rule assembly_illumina_reads_CCN :
...
@@ -471,14 +217,14 @@ rule assembly_illumina_reads_CCN :
conda:
conda:
"env_conda/spades.yaml"
"env_conda/spades.yaml"
shell:
shell:
'spades.py --12 {input.illumina} --nanopore {input.nanopore} -o yeast_data/assembly_illumina_
CCN
--only-assembler -t '+THREAD+' >{log} 2>&1'
'spades.py --12 {input.illumina} --nanopore {input.nanopore} -o yeast_data/assembly_illumina_
ccn
--only-assembler -t '+THREAD+' >{log} 2>&1'
rule assembly_illumina_reads_BMB :
rule assembly_illumina_reads_BMB :
input :
input :
illumina=os.path.join('yeast_data',READS_ILLUMINA_BMB + '.gz'),
illumina=os.path.join('yeast_data',READS_ILLUMINA_BMB + '.gz'),
nanopore=os.path.join('yeast_data',READS_BMB)
nanopore=os.path.join('yeast_data',READS_BMB)
output:
output:
os.path.join('yeast_data','assembly_illumina_
BMB
','contigs.fasta')
os.path.join('yeast_data','assembly_illumina_
bmb
','contigs.fasta')
message :
message :
"Assembly illumina reads for BMB yeast data with spades"
"Assembly illumina reads for BMB yeast data with spades"
log:
log:
...
@@ -486,44 +232,47 @@ rule assembly_illumina_reads_BMB :
...
@@ -486,44 +232,47 @@ rule assembly_illumina_reads_BMB :
conda:
conda:
"env_conda/spades.yaml"
"env_conda/spades.yaml"
shell:
shell:
'spades.py --12 {input.illumina} --nanopore {input.nanopore} -o yeast_data/assembly_illumina_
BMB
--only-assembler -t '+THREAD+' >{log} 2>&1'
'spades.py --12 {input.illumina} --nanopore {input.nanopore} -o yeast_data/assembly_illumina_
bmb
--only-assembler -t '+THREAD+' >{log} 2>&1'
rule reduced_ref_
CCN
:
rule reduced_ref_
bmb
:
input :
input :
os.path.join('yeast_data','assembly_illumina_
CCN
','contigs.fasta')
os.path.join('yeast_data','assembly_illumina_
bmb
','contigs.fasta')
output :
output :
os.path.join('yeast_data',"reduced_
CCN
.fasta")
os.path.join('yeast_data',"reduced_
assembly_bmb
.fasta")
message :
message :
"Reduced reference for
CCN
yeast"
"Reduced reference for
bmb
yeast"
log:
log:
os.path.join('logs','yeast_data','11_reduced_
CCN
_ref.log')
os.path.join('logs','yeast_data','11_reduced_
bmb
_ref.log')
shell:
shell:
'set +o pipefail;'
'set +o pipefail;'
'head -42442 {input} |tail -11976 >{output} 2>>{log}'
'LINE=`cat {input} |grep -n ">"|head -2| cut -d: -f1|tail -1`;'
'head -$(($LINE-1)) {input} >{output} 2>>{log}'
rule reduced_ref_
BMB
:
rule reduced_ref_
ccn
:
input :
input :
os.path.join('yeast_data','assembly_illumina_
BMB
','contigs.fasta')
os.path.join('yeast_data','assembly_illumina_
ccn
','contigs.fasta')
output :
output :
os.path.join('yeast_data',"reduced_
BMB
.fasta")
os.path.join('yeast_data',"reduced_
assembly_ccn
.fasta")
message :
message :
"Reduced reference for
BMB
yeast"
"Reduced reference for
ccn
yeast"
log:
log:
os.path.join('logs','yeast_data','12_reduced_
BMB
_ref.log')
os.path.join('logs','yeast_data','12_reduced_
ccn
_ref.log')
shell:
shell:
'set +o pipefail;'
'set +o pipefail;'
'head -15288 {input} >{output} 2>>{log}'
'LINE_6=`cat {input} |grep -n ">"|head -6| cut -d: -f1|tail -1`;'
'LINE_5=`cat {input} |grep -n ">"|head -5| cut -d: -f1|tail -1`;'
'head -$(($LINE_6 - 1)) {input} | tail -$(($LINE_6 - $LINE_5)) >{output} 2>>{log}'
rule align_ref_BMB_ref_CCN :
rule align_ref_BMB_ref_CCN :
input :
input :
ccn=os.path.join('yeast_data',"reduced_
CCN
.fasta"),
ccn=os.path.join('yeast_data',"reduced_
assembly_ccn
.fasta"),
bmb=os.path.join('yeast_data',"reduced_
BMB
.fasta")
bmb=os.path.join('yeast_data',"reduced_
assembly_bmb
.fasta")
output:
output:
os.path.join('yeast_data','aln_bmb_ccn
_reduit
.sam')
os.path.join('yeast_data','aln_
reduced_assembly_
bmb_ccn.sam')
message :
message :
"Align BMB ref with CCN ref with minimap"
"Align BMB ref with CCN ref with minimap"
log:
log:
os.path.join('logs','yeast_data','13_align_ref_
BMB
_ref_
CCN
.log')
os.path.join('logs','yeast_data','13_align_ref_
bmb
_ref_
ccn
.log')
conda:
conda:
"env_conda/minimap2.yaml"
"env_conda/minimap2.yaml"
shell:
shell:
...
@@ -531,7 +280,7 @@ rule align_ref_BMB_ref_CCN :
...
@@ -531,7 +280,7 @@ rule align_ref_BMB_ref_CCN :
rule reads_map_region_yeast :
rule reads_map_region_yeast :
input :
input :
os.path.join('yeast_data','aln_bmb_ccn
_reduit
.sam')
os.path.join('yeast_data','aln_
reduced_assembly_
bmb_ccn.sam')
output :
output :
os.path.join('yeast_data','ref_ccn.fasta')
os.path.join('yeast_data','ref_ccn.fasta')
message:
message:
...
@@ -541,12 +290,12 @@ rule reads_map_region_yeast :
...
@@ -541,12 +290,12 @@ rule reads_map_region_yeast :
conda:
conda:
"env_conda/perl.yaml"
"env_conda/perl.yaml"
shell :
shell :
'DEB=1
5870
0;END=
453600
;'
'DEB=1
35615
0;END=
1791934
;'
'./src/reads_map_region.pl -s $DEB -e $END {input} {output} >{log} 2>&1'
'./src/reads_map_region.pl -s $DEB -e $END {input} {output} >{log} 2>&1'
rule region_seq_yeast :
rule region_seq_yeast :
input :
input :
os.path.join('yeast_data',"reduced_
BMB
.fasta")
os.path.join('yeast_data',"reduced_
assembly_bmb
.fasta")
output :
output :
start=os.path.join('yeast_data','start_region.txt'),
start=os.path.join('yeast_data','start_region.txt'),
ref=os.path.join('yeast_data','ref_bmb.fasta')
ref=os.path.join('yeast_data','ref_bmb.fasta')
...
@@ -555,7 +304,7 @@ rule region_seq_yeast :
...
@@ -555,7 +304,7 @@ rule region_seq_yeast :
log:
log:
os.path.join('logs','yeast_data','15_region_seq_yeast.log')
os.path.join('logs','yeast_data','15_region_seq_yeast.log')
shell :
shell :
'DEB=1
5870
0;END=
453600
;'
'DEB=1
35615
0;END=
1791934
;'
'echo $DEB >{output.start};'
'echo $DEB >{output.start};'
'SIZE=$(($END-$DEB));'
'SIZE=$(($END-$DEB));'
'./src/region_seq.sh {output.start} {input} {output.ref} $SIZE >{log} 2>&1'
'./src/region_seq.sh {output.start} {input} {output.ref} $SIZE >{log} 2>&1'
...
@@ -573,8 +322,8 @@ rule configuration_file_CCN_yeast_data:
...
@@ -573,8 +322,8 @@ rule configuration_file_CCN_yeast_data:
shell:
shell:
'PATH_DATA=`pwd`;'
'PATH_DATA=`pwd`;'
'echo -e "N: CCN_yeast" >{output};'
'echo -e "N: CCN_yeast" >{output};'
'echo -e "I: $PATH_DATA/{input.re
f
}" >>{output};'
'echo -e "I: $PATH_DATA/{input.re
ads
}" >>{output};'
'echo -e "R: $PATH_DATA/{input.re
ads
}" >>{output}'
'echo -e "R: $PATH_DATA/{input.re
f
}" >>{output}'
rule configuration_file_BMB_yeast_data:
rule configuration_file_BMB_yeast_data:
input :
input :
...
@@ -589,8 +338,8 @@ rule configuration_file_BMB_yeast_data:
...
@@ -589,8 +338,8 @@ rule configuration_file_BMB_yeast_data:
shell:
shell:
'PATH_DATA=`pwd`;'
'PATH_DATA=`pwd`;'
'echo -e "N: BMB_yeast" >{output};'
'echo -e "N: BMB_yeast" >{output};'
'echo -e "I: $PATH_DATA/{input.re
f
}" >>{output};'
'echo -e "I: $PATH_DATA/{input.re
ads
}" >>{output};'
'echo -e "R: $PATH_DATA/{input.re
ads
}" >>{output}'
'echo -e "R: $PATH_DATA/{input.re
f
}" >>{output}'
rule diploid_reads:
rule diploid_reads:
input :
input :
...
@@ -624,7 +373,7 @@ rule align_ref_BMB_ref_CCN_exonerate :
...
@@ -624,7 +373,7 @@ rule align_ref_BMB_ref_CCN_exonerate :
ccn=os.path.join('yeast_data',"ref_ccn.fasta"),
ccn=os.path.join('yeast_data',"ref_ccn.fasta"),
bmb=os.path.join('yeast_data',"ref_bmb.fasta")
bmb=os.path.join('yeast_data',"ref_bmb.fasta")
output:
output:
os.path.join('yeast_data','aln_refs_bmb_ccn.
fasta
')
os.path.join('yeast_data','aln_refs_bmb_ccn.
txt
')
message :
message :
"Align BMB ref with CCN ref with exonerate"
"Align BMB ref with CCN ref with exonerate"
log:
log:
...
@@ -632,7 +381,19 @@ rule align_ref_BMB_ref_CCN_exonerate :
...
@@ -632,7 +381,19 @@ rule align_ref_BMB_ref_CCN_exonerate :
conda:
conda:
"env_conda/exonerate.yaml"
"env_conda/exonerate.yaml"
shell:
shell:
'exonerate -m a:l --bigseq yes --dnahspthreshold 120 {input.bmb} {input.ccn} | ./src/exonerate_to_fasta.pl >{output} 2>{log}'
'exonerate -m a:g --bigseq yes -Q dna -E --bestn 1 --dnahspthreshold 120 {input.bmb} {input.ccn} >{output} 2>{log}'
rule exonerate_to_fasta :
input :
os.path.join('yeast_data','aln_refs_bmb_ccn.txt')
output:
os.path.join('yeast_data','aln_refs_bmb_ccn.fasta')
message :
"Exonerate to fasta"
log:
os.path.join('logs','yeast_data','20_bis_exonerate_to_fasta.log')
shell:
'./src/exonerate_to_fasta.pl <{input} >{output} 2>{log}'
rule ref_diploide :
rule ref_diploide :
input :
input :
...
@@ -661,8 +422,264 @@ rule configuration_file_diploid_yeast_data:
...
@@ -661,8 +422,264 @@ rule configuration_file_diploid_yeast_data:
shell:
shell:
'PATH_DATA=`pwd`;'
'PATH_DATA=`pwd`;'
'echo -e "N: diploid_yeast" >{output};'
'echo -e "N: diploid_yeast" >{output};'
'echo -e "I: $PATH_DATA/{input.ref}" >>{output};'
'echo -e "I: $PATH_DATA/{input.reads}" >>{output};'
'echo -e "R: $PATH_DATA/{input.reads}" >>{output}'
'echo -e "R: $PATH_DATA/{input.ref}" >>{output}'
#-------------------------------------------------------------------------------
# Ecoli hifi data
#-------------------------------------------------------------------------------
rule download_hifi_reads_ecoli :
output :
os.path.join('ecoli_hifi_data',READS_HIFI)
message :
"Download hifi reads for ecoli hifi data"
log:
os.path.join('logs','ecoli_hifi_data','1_download_hifi.log')
conda:
"env_conda/fastq-dump.yaml"
shell:
'fastq-dump --gzip SRR11434954 >{log} 2>&1;'
'mv ' + READS_HIFI + ' {output}'
rule download_nanopore_reads_ecoli_hifi :
output :
os.path.join('ecoli_hifi_data',READS_NANOPORE_ECOLI_HIFI + '.gz')
message :
"Download nanopore reads for ecoli hifi data"
log:
os.path.join('logs','ecoli_hifi_data','2_download_nanopore.log')
conda:
"env_conda/fastq-dump.yaml"
shell:
'fastq-dump --gzip SRR12801740 >{log} 2>&1;'
'mv ' + READS_NANOPORE_ECOLI_HIFI + '.gz {output}'
rule gunzip_nanopore_reads_ecoli_hifi :
input :
os.path.join('ecoli_hifi_data',READS_NANOPORE_ECOLI_HIFI + '.gz')
output :
os.path.join('ecoli_hifi_data',READS_NANOPORE_ECOLI_HIFI)
message :
"Decompress nanopore reads for ecoli hifi data"
log:
os.path.join('logs','ecoli_hifi_data','3_gunzip_nanopore_reads.log')
conda:
"env_conda/gzip.yaml"
shell:
'gunzip {input} >{log} 2>&1'
rule select_hifi_reads_ecoli :
input :
os.path.join('ecoli_hifi_data',READS_HIFI)
output:
os.path.join('ecoli_hifi_data','reads_hifi.fastq')
message :
"Select hifi reads for ecoli hifi data"
log:
os.path.join('logs','ecoli_hifi_data','4_select_hifi_reads.log')
conda:
"env_conda/seqkit.yaml"
shell:
'set +o pipefail;'
'seqkit sort -l -r {input} 2>{log}|head -100000 >{output} 2>{log}'
rule assembly_hifi_reads_ecoli :
input :
os.path.join('ecoli_hifi_data','reads_hifi.fastq')
output:
os.path.join('ecoli_hifi_data','assembly_hifi','assembly.fasta')
message :
"Assembly hifi reads for ecoli hifi data with flye"
log:
os.path.join('logs','ecoli_hifi_data','5_assembly_hifi_reads.log')
conda:
"env_conda/flye.yaml"
shell:
'flye --pacbio-hifi {input} --genome-size 5m --threads ' + THREAD + ' -i 2 -o ecoli_hifi_data/assembly_hifi >{log} 2>&1'
rule extract_first_contig_hifi_reads :
input :
os.path.join('ecoli_hifi_data','assembly_hifi','assembly.fasta')
output:
os.path.join('ecoli_hifi_data','ref_ecoli_hifi.fasta')
message :
"Extract first contig assembly for ecoli hifi data"
log:
os.path.join('logs','ecoli_hifi_data','6_extract_first_contig_hifi_reads.log')
shell:
'set +o pipefail;'
'cat {input}|tr "\\n" "@"|sed "s/@>/\\n>/g"|sed "s/@/\\n/;s/@//g"|head -2 >{output} 2>{log}'
rule configuration_file_ecoli_hifi_data:
input :
ref=os.path.join('ecoli_hifi_data','ref_ecoli_hifi.fasta'),
reads=os.path.join('ecoli_hifi_data',READS_NANOPORE_ECOLI_HIFI)
output :
ref=os.path.join('before_configuration_files','config_ecoli_hifi.yaml')
message :
"Configuration file for ecoli hifi"
log:
os.path.join('logs','ecoli_hifi_data','7_configuration_ecoli_hifi.log')
shell:
'PATH_DATA=`pwd`;'
'echo -e "N: ecoli_hifi" >{output};'
'echo -e "I: $PATH_DATA/{input.reads}" >>{output};'
'echo -e "R: $PATH_DATA/{input.ref}" >>{output}'
#-------------------------------------------------------------------------------
# Ecoli illumina data
#-------------------------------------------------------------------------------
rule download_illumina_reads_ecoli :
output :
os.path.join('ecoli_illumina_data',READS_ILLUMINA+ '.gz')
message :
"Download illumina reads for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','1_download_illumina.log')
conda:
"env_conda/fastq-dump.yaml"
shell:
'fastq-dump --gzip --split-spot --clip SRR8333590 >{log} 2>&1;'
'mv ' + READS_ILLUMINA + '.gz {output}'
rule gunzip_illumina_reads_ecoli_illumina:
input :
os.path.join('ecoli_illumina_data',READS_ILLUMINA + '.gz')
output :
os.path.join('ecoli_illumina_data',READS_ILLUMINA)
message :
"Decompress illumina reads for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','2_gunzip_illumina_reads.log')
conda:
"env_conda/gzip.yaml"
shell:
'gunzip {input} >{log} 2>&1'
rule download_nanopore_reads_ecoli_illumina :
output :
os.path.join('ecoli_illumina_data',READS_NANOPORE_ECOLI_ILLUMINA + '.gz')
message :
"Download nanopore reads for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','3_download_nanopore.log')
conda:
"env_conda/fastq-dump.yaml"
shell:
'fastq-dump --gzip SRR8335315 >{log} 2>&1;'
'mv ' + READS_NANOPORE_ECOLI_ILLUMINA + '.gz {output}'
rule gunzip_nanopore_reads_ecoli_illumina :
input :
os.path.join('ecoli_illumina_data',READS_NANOPORE_ECOLI_ILLUMINA + '.gz')
output :
os.path.join('ecoli_illumina_data',READS_NANOPORE_ECOLI_ILLUMINA)
message :
"Decompress nanopore reads for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','4_gunzip_nanopore_reads.log')
conda:
"env_conda/gzip.yaml"
shell:
'gunzip {input} >{log} 2>&1'
rule download_ref_ecoli :
output :
os.path.join('ecoli_illumina_data',REF_ECOLI + '.gz')
message :
"Download ref ecoli for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','5_download_ref_ecoli.log')
shell:
'wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/004/358/385/GCF_004358385.1_ASM435838v1/GCF_004358385.1_ASM435838v1_genomic.fna.gz -o {log};'
'mv ' + REF_ECOLI + '.gz {output}'
rule gunzip_ref_ecoli :
input :
os.path.join('ecoli_illumina_data',REF_ECOLI+ '.gz')
output :
os.path.join('ecoli_illumina_data',REF_ECOLI)
message :
"Decompress ref ecoli for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','6_gunzip_ref_ecoli.log')
conda:
"env_conda/gzip.yaml"
shell:
'gunzip {input} >{log} 2>&1'
rule assembly_illumina_reads_ecoli :
input :
os.path.join('ecoli_illumina_data',READS_ILLUMINA)
output:
os.path.join('ecoli_illumina_data','assembly_illumina','contigs.fasta')
message :
"Assembly illumina reads for ecoli illumina data with spades"
log:
os.path.join('logs','ecoli_illumina_data','7_assembly_illumina_reads.log')
conda:
"env_conda/spades.yaml"
shell:
'spades.py -s {input} -o ecoli_illumina_data/assembly_illumina --only-assembler -t '+THREAD+' >{log} 2>&1'
rule extract_first_chromosome_ref :
input :
os.path.join('ecoli_illumina_data',REF_ECOLI)
output:
os.path.join('ecoli_illumina_data','ref_ecoli.fasta')
message :
"Extract first chromosome ref for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','8_extract_first_chromosome_ref.log')
shell:
'head -67952 {input} >{output} 2>{log}'
rule align_illumina_contig_ref :
input :
contigs=os.path.join('ecoli_illumina_data','assembly_illumina','contigs.fasta'),
ref=os.path.join('ecoli_illumina_data','ref_ecoli.fasta')
output:
os.path.join('ecoli_illumina_data','aln_contigs_illumina_ref_ecoli.sam')
message :
"Align illumina contigs with ref for ecoli illumina data with minimap"
log:
os.path.join('logs','ecoli_illumina_data','9_align_illumina_contig_ref.log')
conda:
"env_conda/minimap2.yaml"
shell:
'minimap2 -cax asm5 -t '+THREAD+' {input.ref} {input.contigs} >{output} 2>{log}'
rule reads_map_region_ecoli_illumina :
input :
os.path.join('ecoli_illumina_data','aln_contigs_illumina_ref_ecoli.sam'),
output :
os.path.join('ecoli_illumina_data','ref_illumina_ecoli.fasta')
message:
"Reads map region for ecoli illumina data"
log:
os.path.join('logs','ecoli_illumina_data','10_reads_map_region.log')
conda:
"env_conda/perl.yaml"
shell :
'DEB=4845200;END=5080000;'
'./src/reads_map_region.pl -s $DEB -e $END {input} {output} >{log} 2>&1'
rule configuration_file_ecoli_illumina_data:
input :
ref=os.path.join('ecoli_illumina_data','ref_illumina_ecoli.fasta'),
reads=os.path.join('ecoli_illumina_data',READS_NANOPORE_ECOLI_ILLUMINA)
output :
ref=os.path.join('before_configuration_files','config_ecoli_illumina.yaml')
message :
"Configuration file for ecoli illumina"
log:
os.path.join('logs','ecoli_illumina_data','11_configuration_ecoli_illumina.log')
shell:
'PATH_DATA=`pwd`;'
'echo -e "N: ecoli_illumina" >{output};'
'echo -e "I: $PATH_DATA/{input.reads}" >>{output};'
'echo -e "R: $PATH_DATA/{input.ref}" >>{output}'
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
# Simulated data
# Simulated data
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
...
@@ -792,8 +809,8 @@ rule configuration_file_simulated_data:
...
@@ -792,8 +809,8 @@ rule configuration_file_simulated_data:
'PATH_DATA=`pwd`;'
'PATH_DATA=`pwd`;'
'NAME_SIMULATED=`echo {input.reads}|cut -d"/" -f 2|sed "s/_0001.fastq//"`;'
'NAME_SIMULATED=`echo {input.reads}|cut -d"/" -f 2|sed "s/_0001.fastq//"`;'
'echo -e "N: $NAME_SIMULATED" >{output};'
'echo -e "N: $NAME_SIMULATED" >{output};'
'echo -e "I: $PATH_DATA/{input.re
f
}" >>{output};'
'echo -e "I: $PATH_DATA/{input.re
ads
}" >>{output};'
'echo -e "R: $PATH_DATA/{input.re
ads
}" >>{output}'
'echo -e "R: $PATH_DATA/{input.re
f
}" >>{output}'
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
# Configuration files
# Configuration files
...
...
This diff is collapsed.
Click to expand it.
src/exonerate_to_fasta.pl
+
6
−
0
View file @
4d9d8404
...
@@ -38,3 +38,9 @@ while (<>){
...
@@ -38,3 +38,9 @@ while (<>){
}
}
}
}
}
}
if
(
$nb_align
==
1
){
print
("
>
$name_query
\n
");
print
("
$seq_1
\n
");
print
("
>
$name_target
\n
");
print
("
$seq_2
\n
");
}
This diff is collapsed.
Click to expand it.
src/snakemake_launcher.sh
+
1
−
1
View file @
4d9d8404
...
@@ -13,4 +13,4 @@ else
...
@@ -13,4 +13,4 @@ else
CORES
=
$1
CORES
=
$1
fi
fi
echo
"Launch Snakemake"
echo
"Launch Snakemake"
snakemake
-c
$CORES
-np
--use-conda
--rerun-incomplete
--keep-going
snakemake
-c
$CORES
--use-conda
--rerun-incomplete
--keep-going
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment