Skip to content
Snippets Groups Projects
Commit e765ad48 authored by Lihouck Flavien's avatar Lihouck Flavien
Browse files

Fixed default values

parent 1500fc37
Branches
No related tags found
No related merge requests found
......@@ -11,23 +11,25 @@ config_file = "workflow/config.yaml"
def create_config() :
pattern = ""
parser = argparse.ArgumentParser(description="Creates the config file then runs the Meta-consensus pipeline")
parser = argparse.ArgumentParser(description="Creates the config file then runs the Meta-consensus pipeline", add_help=False)
required = parser.add_argument_group("Required arguments")
basic = parser.add_argument_group("Standard arguments")
advanced = parser.add_argument_group("Advanced arguments")
basic.add_argument("-h", "--help", action="help", help="show this help message and exit")
required.add_argument( "-input", "-i", help="Reads file", required=True)
required.add_argument("-output", "-o", help="Target directory for the pipeline results", required=True)
basic.add_argument("-reference", "-r", help="Reference for alignment and statistics")
advanced.add_argument("-list", help="A list of regions to work on (format: [r1, r2, ...] or [rStart_End, ...]) (default: no region)")
basic.add_argument("-size", "-s", help="The desired region size (default: 2000)", default='2000')
basic.add_argument("-size", "-s", help="The desired region size (default: maximum)", default='0')
basic.add_argument("-tools", "-t", help="The list of tools to use in the meta-consensus (default: ['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle'])", default="['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle']")
basic.add_argument("-consensus_threshold", "-ct", help="Threshold(s) used for the MSA consensus step (default: [70])", default="[70]")
basic.add_argument("-metaconsensus_threshold", "-mt", help="Threshold(s) used for the Meta-consensus result (default: [60])", default="[60]")
basic.add_argument("-depth", "-d", help="The depth used in the process (default: [60])", default="[60]")
basic.add_argument("-depth", "-d", help="The depth used in the process (default: max)", default="0")
#parser.add_argument("-plot", help="Analyse the meta-consensus and MSA consensus quality (requires reference)", action='store_true')
advanced.add_argument("-region_overlap", help="The size of the overlap between regions", default="50")
advanced.add_argument("-cores", help="The amount of cores to use in the pipeline run (default 1)", default=1)
args = parser.parse_args(['-h'])
args = parser.parse_args()
with open(config_file, 'w') as c_file:
c_file.write('read_file: ' + args.input + "\n")
......
......@@ -15,7 +15,7 @@ def main():
line_cnt = 1
problems = []
while line_cnt < len(lines):
char_cnt = 8
char_cnt = 9
while char_cnt < len(lines[line_cnt])-9:
if lines[line_cnt][char_cnt] != "|":
print((char_cnt, lines[line_cnt-1][char_cnt], lines[line_cnt][char_cnt], lines[line_cnt+1][char_cnt]))
......@@ -38,7 +38,7 @@ def main():
start = y - distance if y - distance > 0 else 0
end = y + distance if y + distance < len(target[1]) else len(target[1])-1
output.write("Error "+str(pb_cnt) + ":\n" + str(value1 + start)+ ": " + target[1][start:end] + "\n")
output.write(str(value1+start)+": "+lines[x][start+8:end+8]+"`\n")
output.write(str(value1+start)+": "+lines[x][start+9:end+9]+"\n")
output.write(str(value2 + start) + ": " + query[1][start:end] + "\n\n")
pb_cnt += 1
......
import os, sys
import time
from scripts.extract_longest_read import extract_longest_read
#configfile: "./workflow/config.yaml"
# ------------------------------------ #
# Value tests and initialization #
if not "region_size" in config:
......@@ -13,9 +13,6 @@ if not "region_size" in config:
else :
config["region_size"] = int(config["region_size"])
if not "depth" in config:
config["depth"] = "80"
if not "data_out" in config:
config["data_out"] = os.path.join(config["output_folder"],"data")
if not "consensus_folder" in config:
......@@ -37,6 +34,8 @@ if not "data_in" in config:
config["data_in"] = ""
HIGH_DEPTH = 60
HIGH_LENGTH = 5000
REGION_HEAD_OVERLAP = int(config["region_overlap"]) if "region_overlap" in config else 20
......@@ -60,16 +59,47 @@ def cut_region_from_length(length):
start += config["region_size"] - REGION_HEAD_OVERLAP
return region
def get_depth(file):
out = subprocess.run(["wc", "-l", file], capture_output=True, text=True)
return int(int(out.stdout.split(" ")[0]) /2)
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
if config["depth"] == 0:
depth_check = get_depth(config["read_file"])
else:
depth_check = config['depth']
if int(depth_check) > HIGH_DEPTH:
eprint("\nWarning: a high depth may cause the Multiple Sequence Alignment phase to be extremely SLOW")
if "muscle" in config["tool"]:
eprint("\t- muscle may be extremely SLOW for high depth")
if "abpoa" in config["tool"]:
eprint("\t- abpoa may require a lot of memory to run, consider disabling it or lowering the depth if you encounter problems allocating memory")
if "spoa" in config["tool"]:
eprint("\t- spoa may require a lot of memory to run, consider disabling it or lowering the depth if you encounter problems allocating memory")
time.sleep(1.7)
if not 'region' in config:
if not 'length' in config:
if 'ref_file' in config:
config['length'] = get_length(os.path.join(config["data_in"],config["ref_file"]))
else:
config['length'] = len(extract_longest_read(os.path.join(config["data_in"], config["read_file"]), 1)[0])
anchor = extract_longest_read(os.path.join(config["data_in"], config["read_file"]), 1)[0]
config['length'] = len(anchor)
config['region']= cut_region_from_length(config['length'])
if 'length' in config and int(config['length']) > HIGH_LENGTH:
eprint("\nWarning: a high region length may cause the Multiple Sequence Alignment phase to be extremely SLOW")
if "muscle" in config["tool"]:
eprint("\t- muscle may be extremely SLOW for high length")
if "abpoa" in config["tool"]:
eprint("\t- abpoa may require a lot of memory to run, consider disabling it or working on smaller regions if you encounter problems allocating memory")
if "spoa" in config["tool"]:
eprint("\t- spoa may require a lot of memory to run, consider disabling it or working on smaller regions if you encounter problems allocating memory")
time.sleep(2)
# Generates the meta-consensus from given reads #
rule all:
......@@ -518,12 +548,25 @@ if "ref_file" in config:
rule comparison_ref_metaconsensus:
input: query=rules.concat_region.output, target=config["ref_file"]
output: os.path.join("{output_folder}", "mismatches", "alignment_meta_ref_d{depth}_t1_{threshold}_t2_{value}.fasta")
conda: "envs/exonerate.yaml"
shell: "exonerate --bestn 1 -Q dna -E -m a:g --showvulgar false --showsugar false --showcigar false --showalignment true {input.target} {input.query} > {output} 2>/dev/null"
rule extract_mismatches:
input: rules.comparison_ref_metaconsensus.output
output: os.path.join("{output_folder}", "mismatches", "mismatches_d{depth}_t1_{threshold}_t2_{value}.fasta")
shell: "./src/util/exonerate_parser.py {input} {output} >/dev/null"
# --------------------------------------------------------#
rule plot:
input: expand(rules.plot_t2.output, region=config["region"], depth=config["depth"], threshold=config["threshold_1"], plot_folder=config["output_folder"]),
expand(rules.plot_t1.output, region=config["region"], depth=config["depth"] , plot_folder=config["output_folder"]),
expand(rules.plot_depth.output, region=config["region"], threshold=config["threshold_1"], plot_folder=config["output_folder"]),
expand(rules.plot_depth_all.output, region=config["region"], threshold=config["threshold_1"], plot_folder=config["output_folder"])
expand(rules.plot_depth_all.output, region=config["region"], threshold=config["threshold_1"], plot_folder=config["output_folder"]),
expand(rules.extract_mismatches.output , threshold=config["threshold_1"], value=config["threshold_2"], output_folder=config["output_folder"], depth=config["depth"])
message: "Created all plots"
else :
......
read_file: ../Lambda_phage/lambda_1000x_10p.fa
output_folder: Lambda_10
ref_file: ../Lambda_phage/lambda_virus.fa
region_size: 4000
region_overlap: 0
region_size: 0
region_overlap: 50
tool: ['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle']
threshold_1: [70]
threshold_2: [60]
depth: 30
depth: 0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment