Skip to content
Snippets Groups Projects
Commit ee47cc40 authored by Lihouck Flavien's avatar Lihouck Flavien
Browse files

Fixed the file for pipeline launcher/installer, everything should work now....

Fixed the file for pipeline launcher/installer, everything should work now. Added concat strategy for joining regions
parent 822a6f77
No related branches found
No related tags found
No related merge requests found
......@@ -2,26 +2,32 @@
import subprocess
import os
import sys , argparse
import sys
import argparse
config_file = "workflow/config.yaml"
def create_config() :
pattern = ""
parser = argparse.ArgumentParser(description="Creates the config file then runs the Meta-consensus pipeline")
parser.add_argument("-input", help="Reads file", required=True)
parser.add_argument("-output", help="Target directory for the pipeline results", required=True)
parser.add_argument("-reference", help="Reference for alignment and statistics")
parser.add_argument("-list", help="A list of regions to work on (format: [r1, r2, ...] or [rStart_End, ...]) (default: no region)")
parser.add_argument("-size", help="The desired region size (default: 2000)", default='2000')
parser.add_argument("-tools", help="The list of tools to use in the meta-consensus (default: ['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle'])", default="['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle']")
parser.add_argument("-consensus_threshold", help="Threshold(s) used for the MSA consensus step (default: [70])", default="[70]")
parser.add_argument("-metaconsensus_threshold", help="Threshold(s) used for the Meta-consensus result (default: [60])", default="[60]")
parser.add_argument("-depth", help="The depth used in the process (default: [60])", default="[60]")
parser.add_argument("-plot", help="Analyse the meta-consensus and MSA consensus quality (requires reference)", action='store_true')
parser.add_argument("-region_overlap", help="The size of the overlap between regions", default="50")
parser.add_argument("-cores", help="The amount of cores to use in the pipeline run (default 1)", default=1)
args = parser.parse_args()
required = parser.add_argument_group("Required arguments")
basic = parser.add_argument_group("Standard arguments")
advanced = parser.add_argument_group("Advanced arguments")
required.add_argument( "-input", "-i", help="Reads file", required=True)
required.add_argument("-output", "-o", help="Target directory for the pipeline results", required=True)
basic.add_argument("-reference", "-r", help="Reference for alignment and statistics")
advanced.add_argument("-list", help="A list of regions to work on (format: [r1, r2, ...] or [rStart_End, ...]) (default: no region)")
basic.add_argument("-size", "-s", help="The desired region size (default: 2000)", default='2000')
basic.add_argument("-tools", "-t", help="The list of tools to use in the meta-consensus (default: ['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle'])", default="['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle']")
basic.add_argument("-consensus_threshold", "-ct", help="Threshold(s) used for the MSA consensus step (default: [70])", default="[70]")
basic.add_argument("-metaconsensus_threshold", "-mt", help="Threshold(s) used for the Meta-consensus result (default: [60])", default="[60]")
basic.add_argument("-depth", "-d", help="The depth used in the process (default: [60])", default="[60]")
#parser.add_argument("-plot", help="Analyse the meta-consensus and MSA consensus quality (requires reference)", action='store_true')
advanced.add_argument("-region_overlap", help="The size of the overlap between regions", default="50")
advanced.add_argument("-cores", help="The amount of cores to use in the pipeline run (default 1)", default=1)
args = parser.parse_args(['-h'])
with open(config_file, 'w') as c_file:
c_file.write('read_file: ' + args.input + "\n")
......@@ -56,8 +62,8 @@ def create_config() :
c_file.write("threshold_2: "+ args.metaconsensus_threshold +"\n")
c_file.write("depth: "+args.depth +" \n")
plot = None
if args.plot and args.reference:
plot = 'plot'
if args.reference:
plot = 'plot'
return {"core": str(args.cores), "plot" : plot}
......@@ -77,8 +83,6 @@ def config_conda():
def main():
config_conda()
data = create_config()
if not data['plot']:
data['plot'] = ""
result = subprocess.run(["./workflow/scripts/snakemake_launcher.sh",os.path.join("workflow","config.yaml"), data["core"] , data['plot']])
if __name__ == "__main__":
......
#!/usr/bin/env python3
import sys
DEFAULT_DISTANCE = 10
def main():
if len(sys.argv) > 3:
distance = int(sys.argv[3])
else:
distance = DEFAULT_DISTANCE
with open(sys.argv[1], 'r') as f:
lines = [line.replace("\n","") for line in f][12:-2]
line_cnt = 1
problems = []
while line_cnt < len(lines):
char_cnt = 8
while char_cnt < len(lines[line_cnt])-9:
if lines[line_cnt][char_cnt] != "|":
print((char_cnt, lines[line_cnt-1][char_cnt], lines[line_cnt][char_cnt], lines[line_cnt+1][char_cnt]))
problems.append((line_cnt, char_cnt-8))
char_cnt += distance
else:
char_cnt += 1
line_cnt += 4
region = []
with open(sys.argv[2], 'w') as output:
pb_cnt = 1
for pb in problems:
x, y = pb
target = lines[x-1].split(":")
query = lines[x+1].split(":")
target[1] = target[1].strip(" ")
query[1] = query[1].strip(" ")
value1 = int(target[0])
value2 = int(query[0])
start = y - distance if y - distance > 0 else 0
end = y + distance if y + distance < len(target[1]) else len(target[1])-1
output.write("Error "+str(pb_cnt) + ":\n" + str(value1 + start)+ ": " + target[1][start:end] + "\n")
output.write(str(value1+start)+": "+lines[x][start+8:end+8]+"`\n")
output.write(str(value2 + start) + ": " + query[1][start:end] + "\n\n")
pb_cnt += 1
if __name__ == "__main__":
main()
\ No newline at end of file
......@@ -51,10 +51,10 @@ def get_length(file):
return len(l)
def cut_region_from_length(length):
start = 2
start = 1
region = []
if config["region_size"] == 0:
return ["2_"+str(length -1)]
return ["1_"+str(length -1)]
while start+config["region_size"] < int(length):
region.append(str(start)+"_"+str( min(start+config["region_size"] , int(length)-2)))
start += config["region_size"] - REGION_HEAD_OVERLAP
......@@ -74,7 +74,9 @@ if not 'region' in config:
rule all:
input:
expand("{output_folder}/meta_consensus/{region}/meta_consensus_{align}_r{region}_d{depth}_t1_{threshold}_t2_{value}.fasta", region=config["region"], depth=config["depth"], threshold=config["threshold_1"], value=config["threshold_2"], align="muscle", output_folder=config["output_folder"])
# expand("{output_folder}/meta_consensus/{region}/meta_consensus_{align}_r{region}_d{depth}_t1_{threshold}_t2_{value}.fasta", region=config["region"], depth=config["depth"], threshold=config["threshold_1"], value=config["threshold_2"], align="muscle", output_folder=config["output_folder"])
expand(os.path.join("{output_folder}","meta_consensus","meta_consensus_d{depth}_t1_{threshold}_t2_{value}.fasta"), depth=config["depth"], threshold=config["threshold_1"], value=config["threshold_2"], align="muscle", output_folder=config["output_folder"])
message: "Meta consensus can be found in {input}"
......@@ -154,7 +156,7 @@ else :
output :
os.path.join(config["data_out"],'alignment','aln_reads_on_read.sam')
message :
"Minimap for {config[data_out]}"
"Minimap for {config['read_file']}"
log: os.path.join(config["output_folder"],'logs','2_alignment_reads_on_anchor.log')
conda:
"envs/minimap2.yaml"
......@@ -172,6 +174,7 @@ else :
rule get_regions_for_read :
input : sam={rules.alignment_reads_on_anchor.output}
output: os.path.join(config["data_out"], 'regions.txt')
message: "Getting the regions where the reads are aligned on the anchor"
conda: "envs/samtools.yaml"
params: depth=max(config["depth"])
shell: "./workflow/scripts/prepare_region_from_read.sh {input.sam} {output} {params.depth}"
......@@ -424,6 +427,14 @@ rule meta_consensus:
shell:
"./src/msa_handler/MTool {input.muscle} {output} {wildcards.value}"
rule concat_region:
input: expand(rules.meta_consensus.output, region=config["region"], align="muscle" , allow_missing=True)
output: os.path.join("{output_folder}", "meta_consensus","meta_consensus_d{depth}_t1_{threshold}_t2_{value}.fasta")
shell:
"./workflow/scripts/concat_regions.py {output} {input}"
# --------------------------------------------------------#
# This part handle comparison to a reference and creates plots in order to better visualize the efficiency of the consensus strategy used for both the preliminary consensus and the meta-consensus. #
if "ref_file" in config:
......@@ -451,7 +462,7 @@ if "ref_file" in config:
params: format="\"%pi, %ps, %et, %em\n\""
conda: "envs/exonerate.yaml"
shell: "./workflow/scripts/exonerate_stats.sh {output} {input.ref} {input.files} {input.consensus_per_tool}"
# "echo {STAT_HEADER} >> {output} \n for FILE in {input.files}; do exonerate --bestn 1 -Q dna -E -m a:g --showalignment false --showsugar false --showvulgar false --showcigar false --ryo {params.format} --verbose 0 -q {input.ref} -t $FILE >> {output} 2>/dev/null; done" #
# "echo {STAT_HEADER} >> {output} \n for FILE in {input.files}; do exonerate --bestn 1 -Q dna -E -m a:g --showsugar false --showvulgar false --showcigar false --ryo {params.format} --verbose 0
rule exonerate_stats_meta:
input: files=expand(rules.meta_consensus.output, align="muscle", value=config["threshold_2"], allow_missing=True) , ref=expand("{ref}/region_references/region_seq_r{region}.fasta", data_in=config["data_in"], ref=config["data_out"], allow_missing=True)
......@@ -497,8 +508,8 @@ if "ref_file" in config:
shell: "./workflow/scripts/plot_results.py {wildcards.plot_folder}/plot/{wildcards.region}/{wildcards.depth}/graph_r{wildcards.region}_d{wildcards.depth} '[{config[threshold_2]}]' {input} 2> {log} "
rule plot_t2:
input: expand(rules.exonerate_stats_tools.output, tool=config["tool"], output_folder=config["consensus_folder"], allow_missing=True),
expand(rules.exonerate_stats_meta.output, output_folder=config["consensus_folder"], allow_missing=True)
input: expand(rules.exonerate_stats_tools.output, tool=config["tool"], output_folder=config["output_folder"], allow_missing=True),
expand(rules.exonerate_stats_meta.output, output_folder=config["output_folder"], allow_missing=True)
params: target_folder="{plot_folder}/plot/{region}/{depth}/{threshold}/"
log: "{plot_folder}/logs/plot_t2_r{region}_d{depth}_t_{threshold}.log"
output: expand(os.path.join("{plot_folder}","plot","{region}","{depth}","{threshold}","graph_r{region}_d{depth}_t1_{threshold}_{graph}.png"), graph=GRAPH_TYPES, allow_missing=True)
......
read_file: ../Lambda_phage/lambda_1000x_5p.fa
output_folder: lambda_config
read_file: ../Lambda_phage/lambda_1000x_10p.fa
output_folder: Lambda_10
ref_file: ../Lambda_phage/lambda_virus.fa
region_size: 2000
region_overlap: 50
region_size: 4000
region_overlap: 0
tool: ['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle']
threshold_1: [70]
threshold_2: [60]
depth: [60]
depth: 30
#!/usr/bin/env python3
import sys
def main():
args = sys.argv
with open(args[1], "w") as output:
with open(args[2], 'r') as file:
lines = file.readlines()
output.write(">meta_consensus\n"+ lines[1][:-1])
file_cnt = 3
while file_cnt < len(args):
with open(args[file_cnt], "r") as file:
lines = file.readlines()
output.write(lines[1][1:-1])
file_cnt += 1
output.write('\n')
if __name__ == "__main__" :
main()
......@@ -14,7 +14,7 @@ else
INPUT_REF=$2
OUTPUT=$3
REGION_SIZE=$4
let "START_REGION= $INPUT_START-1"
let "START_REGION= $INPUT_START"
let "END_REGION = $REGION_SIZE"
cat $INPUT_REF |grep ">" >$OUTPUT
cat $INPUT_REF |grep -v ">"|tr -d "\n"|cut -c $START_REGION-$END_REGION >>$OUTPUT
......
......@@ -2,7 +2,7 @@
source ./workflow/scripts/config_conda.sh
if [[ ! -d .conda_snakemake ]]; then
echo "Create a conda env for snakemake"
conda env create -p .conda_snakemake -f src/workflow/envs/snakemake.yaml >/dev/null
conda env create -p .conda_snakemake -f workflow/envs/snakemake.yaml >/dev/null
fi
CURRENT_PATH=`pwd`
conda activate $CURRENT_PATH/.conda_snakemake
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment