Skip to content
Snippets Groups Projects
Commit 9eaa0830 authored by Lihouck Flavien's avatar Lihouck Flavien
Browse files

Ajout des scripts de lancement de Pipeline de Coralie, et adaptations de ceux-çi a ce pipeline

parent f0e4b35c
No related branches found
No related tags found
No related merge requests found
Showing
with 123 additions and 15 deletions
#!/usr/bin/env python3
import subprocess
import os
import sys , argparse
config_file = "workflow/config.yaml"
def create_config() :
pattern = ""
parser = argparse.ArgumentParser(description="Creates the config file for Meta-consensus pipeline")
parser.add_argument("-input", help="Reads file", required=True)
parser.add_argument("-output", help="Target directory for the pipeline results", required=True)
parser.add_argument("-reference", help="Reference for alignment and statistics")
parser.add_argument("-size", help="The size for cutting region (default: 2000)", default='2000')
parser.add_argument("-list", help="A list of regions to work on (format: [r1, r2, ...] or [rStart_End, ...]) (default: no region)")
parser.add_argument("-tools", help="The list of tools to use in the meta-consensus (default: ['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle'])", default="['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle']")
parser.add_argument("-consensus_threshold", help="Threshold(s) used for the MSA consensus step (default: [70])", default="[70]")
parser.add_argument("-metaconsensus_threshold", help="Threshold(s) used for the Meta-consensus result (default: [60])", default="[60]")
parser.add_argument("-depth", help="The depth used in the process (default: [60])", default="[60]")
parser.add_argument("-plot", help="Analyse the meta-consensus and MSA consensus quality (requires reference)")
parser.add_argument("-region_overlap", help="The size of the overlap between regions", default="50")
parser.add_argument("-cores", help="The amount of cores to use in the pipeline run (default 1)", default=1)
args = parser.parse_args()
with open(config_file, 'w') as c_file:
c_file.write('read_file: ' + args.input + "\n")
c_file.write("output_folder: "+args.output + "\n")
if args.reference:
c_file.write("ref_file: "+args.reference+"\n")
region_param = False
if args.list:
r = args.list
r = r.replace("[", "")
r = r.replace("]","")
regions = r.split(",")
actual_regions = "["
last_elem = None
for elem in regions:
if '_' in elem:
actual_regions+= elem +","
elif args.size != 0:
actual_regions+= elem+"_"+str(int(elem)+int(args.size))+","
elif last_elem:
actual_regions+= last_elem+'_'+elem+","
last_elem = None
else:
last_elem = elem
print(actual_regions)
c_file.write("region: "+actual_regions[:-1]+"]\n")
else:
c_file.write("region_size: " + args.size+"\n")
c_file.write("region_overlap: "+ args.region_overlap+"\n")
c_file.write("tool: "+ args.tools+ "\n")
c_file.write("threshold_1: "+ args.consensus_threshold +"\n")
c_file.write("threshold_2: "+ args.metaconsensus_threshold +"\n")
c_file.write("depth: "+args.depth +" \n")
plot = None
if args.plot and args.reference:
plot = 'plot'
return {"core": str(args.cores), "plot" : plot}
def config_conda():
try:
open("workflow/scripts/config_conda.sh")
except:
result = subprocess.run("./workflow/scripts/create_config_conda.sh")
try:
open("workflow/scripts/config_conda.sh")
except:
print("Unable to launch the pipeline.")
sys.exit()
def main():
config_conda()
data = create_config()
if not data['plot']:
data['plot'] = ""
result = subprocess.run(["./workflow/scripts/snakemake_launcher.sh",os.path.join("workflow","config.yaml"), data["core"] , data['plot']])
if __name__ == "__main__":
main()
\ No newline at end of file
......@@ -69,7 +69,7 @@ string MSA::consensus(int threshold, int ploidity) {
}
vector<char> conserved_nuc;
int stored_nuc_cnt(0);
// int stored_nuc_cnt(0);
int current_score(0);
while ( current_score < threshold * current_weight * (int) text.size()) {
int pos = max_element(scores, scores+alphabet.size()) - scores;
......
......@@ -3,7 +3,7 @@ from scripts.extract_longest_read import extract_longest_read
configfile: "./workflow/config.yaml"
#configfile: "./workflow/config.yaml"
# ------------------------------------ #
......@@ -84,12 +84,12 @@ rule all:
rule MTool:
input:
"msa_handler/MSA.cpp", "msa_handler/MSA.h", "msa_handler/main.cpp", "msa_handler/Makefile"
"src/msa_handler/MSA.cpp", "src/msa_handler/MSA.h", "src/msa_handler/main.cpp", "src/msa_handler/Makefile"
log: "logs/MTool.log"
output:
"msa_handler/MTool"
"src/msa_handler/MTool"
shell:
"cd msa_handler && make MTool"
"cd src/msa_handler && make MTool"
# -------------------------------------------------------- #
# Aligns the reads on a given reference with minimap2 #
......@@ -125,7 +125,7 @@ if "ref_file" in config:
message:
"Reads map region for (Region start= {wildcards.start} end= {wildcards.end})"
log:
os.path.join('config[output_folder]','logs','3_reads_map_region_r{start}_{end}.log')
os.path.join(config['output_folder'],'logs','3_reads_map_region_r{start}_{end}.log')
conda:
"envs/perl.yaml"
shell :
......@@ -190,7 +190,7 @@ else :
output: os.path.join(config['data_out'], "read_map_region",'read_r{start}_{end, \d+}.fasta')
conda: "envs/perl.yaml"
log:
os.path.join('config[output_folder]','logs','3_reads_map_region_r{start}_{end}.log')
os.path.join(config['output_folder'],'logs','3_reads_map_region_r{start}_{end}.log')
shell :
'ORDER="./workflow/scripts/reads_map_region.pl -s {wildcards.start} -e {wildcards.end} -c 100 {input.sam} {output}";'
'echo "ORDER: $ORDER" >{log};'
......@@ -214,7 +214,7 @@ rule select_depth :
message:
"Select depth for (Region size={wildcards.region} & Depth={wildcards.depth})"
log:
os.path.join('config[output_folder]','logs','5_select_number_reads_r{region}_d{depth}.log')
os.path.join(config['output_folder'],'logs','5_select_number_reads_r{region}_d{depth}.log')
shell :
'ORDER="./workflow/scripts/selected_nb_read.sh {input} {wildcards.depth} {output}";'
'echo "ORDER: $ORDER" >{log};'
......@@ -368,7 +368,7 @@ rule create_consensus_per_tool:
os.path.join("{output_folder}","{region}","{depth}","{threshold}","consensus_{tool}_{region}_{depth}.fasta")
log: "{output_folder}/logs/consensus_per_tool_{tool}_r{region}_d{depth}_t{threshold}.log"
message: "Consensus for tools"
shell: "(./msa_handler/MTool {input.data} {output} {wildcards.threshold} )" # >{log} 2>&1"
shell: "(./src/msa_handler/MTool {input.data} {output} {wildcards.threshold} )" # >{log} 2>&1"
rule create_consensus:
input: expand(rules.create_consensus_per_tool.output, tool=config["tool"], allow_missing=True, dataset=DATASET)
......@@ -422,7 +422,7 @@ rule meta_consensus:
log: "{output_folder}/logs/meta_consensus_{align}_r{region}_d{depth}_t1_{threshold}_t2_{value}.log"
message: "Meta-consensus region: {wildcards.region} , depth: {wildcards.depth} , consensus threshold: {wildcards.threshold} , meta consensus threshold: {wildcards.value}\n"
shell:
"./msa_handler/MTool {input.muscle} {output} {wildcards.value}"
"./src/msa_handler/MTool {input.muscle} {output} {wildcards.value}"
# --------------------------------------------------------#
# This part handle comparison to a reference and creates plots in order to better visualize the efficiency of the consensus strategy used for both the preliminary consensus and the meta-consensus. #
......@@ -431,11 +431,11 @@ if "ref_file" in config:
input :
ref =os.path.join(config["data_in"],config["ref_file"])
output :
os.path.join("config[output_folder]","region_references","region_seq_r{start}_{end}.fasta")
os.path.join(config['output_folder'], "data", "region_references","region_seq_r{start}_{end}.fasta")
message:
"Region's seq for (Region {wildcards.start} - {wildcards.end})"
log:
"config[output_folder]/logs/6_region_seq_r{start}_{end}.log"
os.path.join(config['output_folder'],"logs","6_region_seq_r{start}_{end}.log")
shell :
'ORDER="./workflow/scripts/region_seq.sh {wildcards.start} {input.ref} {output} {wildcards.end}";'
'echo "ORDER: $ORDER" >{log};'
......@@ -444,7 +444,7 @@ if "ref_file" in config:
rule exonerate_ref_result:
input:
files=expand(rules.meta_consensus.output, align="muscle", allow_missing=True) ,
ref=expand("{ref}/region_seq_r{region}.fasta", data_in=config["data_in"], ref=config["data_out"], allow_missing=True),
ref=expand("{ref}/region_references/region_seq_r{region}.fasta", data_in=config["data_in"], ref=config["data_out"], allow_missing=True),
consensus_per_tool=expand(rules.create_consensus_per_tool.output, tool=config["tool"], allow_missing=True)
output: os.path.join("{output_folder}","stats","{region}","aln_all_consensus_r{region}_d{depth}_t1_{threshold}_t2_{value}.txt")
log: "{output_folder}/logs/exonerate_ref_result_r{region}_d{depth}_t1_{threshold}_t2{value}.log"
......@@ -454,7 +454,7 @@ if "ref_file" in config:
# "echo {STAT_HEADER} >> {output} \n for FILE in {input.files}; do exonerate --bestn 1 -Q dna -E -m a:g --showalignment false --showsugar false --showvulgar false --showcigar false --ryo {params.format} --verbose 0 -q {input.ref} -t $FILE >> {output} 2>/dev/null; done" #
rule exonerate_stats_meta:
input: files=expand(rules.meta_consensus.output, align="muscle", value=config["threshold_2"], allow_missing=True) , ref=expand("{ref}/region_seq_r{region}.fasta", data_in=config["data_in"], ref=config["data_out"], allow_missing=True)
input: files=expand(rules.meta_consensus.output, align="muscle", value=config["threshold_2"], allow_missing=True) , ref=expand("{ref}/region_references/region_seq_r{region}.fasta", data_in=config["data_in"], ref=config["data_out"], allow_missing=True)
output: os.path.join("{output_folder}","plot","{region}","{depth}","{threshold}","aln_stats_meta.txt")
log: "{output_folder}/logs/plot_exonerate_meta_r{region}_d{depth}_t{threshold}.log"
conda: "envs/exonerate.yaml"
......@@ -463,7 +463,7 @@ if "ref_file" in config:
rule exonerate_stats_tools:
input:
ref=expand("{ref}/region_seq_r{region}.fasta", data_in=config["data_in"], ref=config["data_out"], allow_missing=True),
ref=expand("{ref}/region_references/region_seq_r{region}.fasta", data_in=config["data_in"], ref=config["data_out"], allow_missing=True),
consensus_per_tool=expand(rules.create_consensus_per_tool.output, value=config["threshold_2"], allow_missing=True)
output: os.path.join("{output_folder}","plot","{region}","{depth}","{threshold}","aln_stats_{tool}.txt")
log: "{output_folder}/logs/plot_exonerate_tools_{tool}_r{region}_d{depth}_t{threshold}.log"
......
read_file: ../Lambda_phage/lambda_1000x_5p.fa
output_folder: lambda_config
ref_file: ../Lambda_phage/lambda_virus.fa
region_size: 2000
region_overlap: 50
tool: ['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle']
threshold_1: [70]
threshold_2: [60]
depth: [60]
File moved
File moved
File moved
File moved
File moved
File moved
File moved
File moved
File moved
File moved
File moved
File moved
File moved
#data_in: "../../Lambda_phage"
read_file: "../../Lambda_phage/lambda_1000x_5p.fa"
ref_file: "../../Lambda_phage/lambda_virus.fa"
data_in: "../../Lambda_phage"
read_file: "lambda_1000x_5p.fa"
ref_file: "lambda_virus.fa"
output_folder: "../lambda_5"
region_size : 5000
#data_out: "lambda/data"
#tool: [ "kalign", "kalign3", "mafft", "muscle"]
#threshold_1: ["50","60","70"]
#threshold_2 : ["25","30","35","40", "45"]
#tool: [ "kalign", "kalign3", "mafft", "muscle", "abpoa", "spoa"]
#threshold_1: ["50","60","70", "80", "90" ,"100"]
#threshold_2 : ["50","60","70", "80", "90","100"]
#region: ["2_10002"]
#length: '40500'
#depth: ["80"]
#region_size: 10000
#align: ["muscle"]if not "threshold_1" in config:
# config["threshold_1"] = ["70"]
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment