Skip to content
Snippets Groups Projects
Commit 7d666158 authored by Lihouck Flavien's avatar Lihouck Flavien
Browse files

Bugfixes

parent 970d980c
Branches
No related tags found
No related merge requests found
......@@ -11,6 +11,10 @@
using namespace std;
int main(int argc, char** argv){
if (argc == 1) {
cerr << "Usage :\n MTool input.fasta [output_name [threshold]]" << endl;
exit(-1);
}
if (argc > 1) {
MSA msa_fasta = MSA_cumulative();
msa_fasta.parser_fasta(argv[1]);
......
......@@ -6,19 +6,33 @@ DATASET = expand("{data}/{format}", data=config["data_out"], format=config["form
GRAPH_TYPES = ["identity", "similarity", "equivalence", "mismatches","sequence_length", "aligned_length"]
STAT_HEADER= "threshold, percent_identity, percent_similarity, total_equivalence, total_mismatches, target length, alignment length\n "
REGION_SIZE = 20000
REGION_SIZE = 0
REGION_HEAD_OVERLAP = 200
def get_length(file):
l = ""
with open(file, 'r') as f:
f.readline()
l = f.readline()
return len(l)
def cut_region_from_length(length):
start = 2
region = []
if REGION_SIZE == 0:
return ["2_"+str(length -1)]
while start < int(length):
region.append(str(start)+"_"+str( min(start+REGION_SIZE, int(length))))
start += REGION_SIZE - REGION_HEAD_OVERLAP
return region
if not "region" in config:
config["region"] = cut_region_from_length(config["length"])
if not 'region' in config:
config['length']= get_length(os.path.join(config["data_in"],config["ref_file"]))
config['region']= cut_region_from_length(config['length'])
# Generates the meta-consensus from given reads #
......@@ -39,7 +53,7 @@ rule MTool:
output:
"msa_handler/MTool"
shell:
"cd msa_handler && make MTool2"
"cd msa_handler && make MTool2 2>{log} 1>&2"
# -------------------------------------------------------- #
# Aligns the reads on a given reference with minimap2 #
......@@ -62,6 +76,9 @@ if "ref_file" in config:
'echo "ORDER: $ORDER" >{log};'
'$ORDER >{output} 2>> {log}'
# config['length'] = get_length('os.path.join(config["data_in"],config["ref_file"])'))
# config['region'] = cut_region_from_length(config['length'])
# Cuts the reads into smaller regions to reduce the overall size and lower the computational cost, and creates fasta to build MSA#
rule reads_map_region:
......@@ -111,7 +128,12 @@ else:
'echo "ORDER: $ORDER" >{log};'
'$ORDER >{output} 2>> {log}'
# Select the region to keep from the anchoring #
# config['length'] = get_length(config['os.path.join(config["data_in"],config["ref_file"])'))
# config['region'] = cut_region_from_length(config['length'])
rule get_regions_for_read :
input : sam={rules.alignment_reads_on_anchor.output}
......
data_in: "../../Lambda_phage"
read_file: "lambda_1000x_10p.fa"
#ref_file: "lambda_virus.fa"
ref_file: "lambda_virus.fa"
data_out: "lambda/data"
format: "msa"
tool: [ "kalign", "kalign3", "mafft", "muscle"]
threshold_1: ["50", "60"]
threshold_2 : ["50", "60"]
tool: [ "kalign", "kalign3", "mafft", "muscle", "abpoa"]
threshold_1: ["70"]
threshold_2 : ["45", "50", "55", "60", "65", "70"]
#region: ["100_1100","1100_2100", "2100_4100", "4100_6100"]
length: '40500'
depth: ["15","20","30"]
#length: '40500'
depth: ["30"]
align: ["muscle"]
output_folder: "lambda/consensus"
ref_folder: "lambda/data"
\ No newline at end of file
import sys
"""
A script used to extract the largest regions where the depth is higher than a specified depth
inputs : the input (samtools depth output format) path, the output file path and the target depth
output : a file with regions written in this format [ 'start1_end1', 'start2_end2' ... ]
"""
def main(args):
start_r = 0
......@@ -14,7 +20,7 @@ def main(args):
start_r = pos
else:
if start_r != 0:
regions.append((start_r, pos))
regions.append((start_r, pos - 1))
start_r = 0
with open(args[2], 'w') as out:
out.write('[')
......@@ -23,10 +29,8 @@ def main(args):
out.write(']')
if __name__ == "__main__":
if len(sys.argv) == 4:
main(sys.argv)
else:
sys.exit("Usage:\ndelimit_regions input output min_depth\n")
sys.exit("Usage:\ndelimit_regions.py input output min_depth\n")
#!/bin/bash
# This script takes a read pile in the sam format, and creates regions where the depth is at least equal to a minimum
# input: $1 A read pile in sam format
# $2 The path for the final output
# $3 The minimum depth
# This script requires samtools to function
INPUT=$1
OUTPUT=$2
MIN=$3
......
#!/bin/bash
# This script splits a region from a reference (or anchor read)
# input $1 (int) the start of the region
# $2 (str) the path to the reference (fasta) to cut
# $3 (str) the path for the output file (fasta)
# $4 (int) the size of the region
if [ "$#" -lt 3 ]
then
echo 'USAGE: ./seq_region.sh {input_start} {input_ref} {output} {region_size}'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment