Skip to content
Snippets Groups Projects
Commit cc326edb authored by Rohmer Coralie's avatar Rohmer Coralie
Browse files

renaming frame -> start_position

parent 6b067bc6
No related branches found
No related tags found
No related merge requests found
......@@ -3,7 +3,7 @@ import os
configfile: "config.yaml"
EXP = "experiments"
PREFIX_FRAME="frame_"
PREFIX_START_POSITION="start_position_"
#Script usage
def usage():
print("\nScript:\tsnakemake",
......@@ -13,8 +13,8 @@ def usage():
"Arguments: ",
" -required: i : nanoport long reads",
" r : reference sequences. (1 or 2)",
" -optional: o : number of frames to be tested",
" b : beginning(s) position of frame(s) (replacing nbr).",
" -optional: o : number of start positions to be tested",
" b : start position(s) (replacing -o).",
" d : depth(s).",
" s : sizes of regions",
" t : threshold for sequence consensus",
......@@ -70,12 +70,12 @@ try:
beginings = config['B']
except:
try:
nb_frame = int(config['O'])
nb_start_position = int(config['O'])
except:
nb_frame = 1
nb_start_position = 1
beginings=[]
first_begining = (genome_size_min - int(REGION_SIZES[-1]))/(nb_frame+1)
for i in range(nb_frame):
first_begining = (genome_size_min - int(REGION_SIZES[-1]))/(nb_start_position+1)
for i in range(nb_start_position):
beginings.append(int((i+1)*first_begining))
DATA_SETS=[]
......@@ -84,9 +84,9 @@ i=0
if type(beginings) != int: # equal : if len(beginings) > 1
for start in beginings:
i += 1
DATA_SETS.append(os.path.join(EXP,EXP_NAME,PREFIX_FRAME + str(start)))
DATA_SETS.append(os.path.join(EXP,EXP_NAME,PREFIX_START_POSITION + str(start)))
else:
DATA_SETS.append(os.path.join(EXP,EXP_NAME,PREFIX_FRAME + str(beginings)))
DATA_SETS.append(os.path.join(EXP,EXP_NAME,PREFIX_START_POSITION + str(beginings)))
beginings = [int(beginings)]
......@@ -127,7 +127,7 @@ rule data_set_preparation :
output :
os.path.join(EXP,EXP_NAME,'data','reads.fasta'),
os.path.join(EXP,EXP_NAME,'data','ref.fasta'),
expand(os.path.join('{data_set}','frame_start','frame_start.txt') , data_set = DATA_SETS)
expand(os.path.join('{data_set}','start_position.txt') , data_set = DATA_SETS)
message :
"Data set preparation for "+EXP + '/'+ EXP_NAME
log:
......@@ -135,7 +135,7 @@ rule data_set_preparation :
conda:
"env_conda/python3.yaml"
shell:
'ORDER="./src/data_set_preparation.py -i {input} -p '+PREFIX_FRAME+' -n '+ EXP + '/'+EXP_NAME + ' -b ' + ' '.join(map(str,beginings)) + '";'
'ORDER="./src/data_set_preparation.py -i {input} -p '+PREFIX_START_POSITION+' -n '+ EXP + '/'+EXP_NAME + ' -b ' + ' '.join(map(str,beginings)) + '";'
'echo "ORDER: $ORDER" >{log};'
'$ORDER 2>&1 >>{log}'
......@@ -165,7 +165,7 @@ rule alignment_reads_on_ref :
rule reads_map_region :
input :
aln = os.path.join(EXP,EXP_NAME,'alignement','aln_reads_on_ref.sam'),
start = os.path.join('{data_set}','frame_start','frame_start.txt')
start = os.path.join('{data_set}','start_position.txt')
output :
os.path.join('{data_set}','read_map_region','reads_r{region_size}.fasta')
message:
......@@ -372,7 +372,7 @@ rule abpoa_correction :
rule region_seq :
input :
ref = EXP + '/'+ EXP_NAME + '/data/ref.fasta',
start = "{data_set}/frame_start/frame_start.txt"
start = "{data_set}/start_position.txt"
output :
"{data_set}/seq_selectes_region/region_seq_r{region_size}.fasta"
message:
......
......@@ -24,9 +24,9 @@ def usage():
" name of the experiment",
" -o <int> ",
" default: 10",
" number of frames to be tested",
" number of start positions to be tested",
" -b <int>,<int>,...",
" beginning(s) position of frame(s) (replacing -o)",
" start position(s) (replacing -o)",
" -d <int>,<int>,...",
" default: 10,20,50",
" sequencing depth(s) (number of reads)",
......@@ -108,11 +108,11 @@ def summary():
exp_names=re.sub('\n', r' ', exp_names)
if os.path.exists("results_mean"):
result = subprocess.run("rm -r results_mean",shell=True)
if os.path.exists("results_all_frames"):
result = subprocess.run("rm -r results_all_frames",shell=True)
if os.path.exists("results_all_start_positions"):
result = subprocess.run("rm -r results_all_start_positions",shell=True)
result = subprocess.run("./src/total_data_format.py -n " + exp_names,shell=True)
result = subprocess.run("./src/total_data_format.py -m -n " + exp_names,shell=True)
print("See folders: results_mean & results_all_frames")
print("See folders: results_mean & results_all_start_positions")
else:
print("No experiment has been launched yet")
......@@ -253,8 +253,8 @@ else:
lines.append("B: [" + beginings + "]")
except:
try:
nb_frame = sys.argv[sys.argv.index("-o")+1]
lines.append("O: [" + nb_frame + "]")
nb_start_position = sys.argv[sys.argv.index("-o")+1]
lines.append("O: [" + nb_start_position + "]")
except:
pass
cores=""
......
......@@ -10,7 +10,7 @@ except:
try:
prefix = sys.argv[sys.argv.index("-p")+1]
except:
sys.stderr.write("ERROR: The prefix for frame is missing.\n")
sys.stderr.write("ERROR: The prefix for start position is missing.\n")
try:
exp_name = sys.argv[sys.argv.index("-n")+1]
......@@ -47,5 +47,5 @@ os.system(order)
for start in starts:
order = "echo " + start + " >" + exp_name + "/" + prefix + start + "/frame_start/frame_start.txt"
order = "echo " + start + " >" + exp_name + "/" + prefix + start + "/start_position.txt"
os.system(order)
......@@ -4,7 +4,7 @@ import subprocess
EXP = "experiments"
ATTRIBUTES_TO_DISPLAY=["percentage_Identity","percentage_Error","percentage_Match"]
ATTRIBUTES_TO_DISPLAY_THRESHOLD_INDEPENDANT=["time","memory"]
PREFIX="frame_"
PREFIX="start_position_"
RESULT_FOLDER="results"
NAME_DATA_FILE="data_align_t"
NAME_META_CONSENSUS="meta_consensus_"
......@@ -49,7 +49,7 @@ except:
files={}
i=0
result = subprocess.run("if [ ! -d results_mean ]; then mkdir results_mean;fi",shell=True)
result = subprocess.run("if [ ! -d results_all_frames ]; then mkdir results_all_frames;fi",shell=True)
result = subprocess.run("if [ ! -d results_all_start_positions ]; then mkdir results_all_start_positions;fi",shell=True)
#-----------------------------------------------------------------------------
# Retrieved what was needed to read the files
#-----------------------------------------------------------------------------
......@@ -146,10 +146,10 @@ for threshold in files :
if attribute in ATTRIBUTES_TO_DISPLAY_THRESHOLD_INDEPENDANT:
pass
output_mean=open("results_mean/data_" + add_name_file_output + "mean_"+ attribute + ".csv","w")
output_all=open("results_all_frames/data_" + add_name_file_output + "all_frame_"+ attribute + ".csv","w")
output_all=open("results_all_start_positions/data_" + add_name_file_output + "all_start_position_"+ attribute + ".csv","w")
else:
output_mean=open("results_mean/data_" + add_name_file_output + "mean_"+ attribute + "_" + threshold + ".csv","w")
output_all=open("results_all_frames/data_" + add_name_file_output + "all_frame_"+ attribute + "_" + threshold + ".csv","w")
output_all=open("results_all_start_positions/data_" + add_name_file_output + "all_start_position_"+ attribute + "_" + threshold + ".csv","w")
output_mean.write(",,")
output_all.write(",,")
for exp_name in data["order"]:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment