Skip to content
Snippets Groups Projects
Commit f0e4b35c authored by Lihouck Flavien's avatar Lihouck Flavien
Browse files

Added config creation script, fixed consensus

parent cdee9e88
No related branches found
No related tags found
No related merge requests found
#include "MSA.h"
#include <iostream>
#include <algorithm>
using namespace std;
......@@ -7,11 +8,14 @@ using namespace std;
char MSA::basis_to_IUPAC(vector<char> basis) {
int index(0);
int value(0);
if (basis.size() < 1) {
return '-';
}
for (auto it = std::begin(basis); it != std::end(basis); ++it) {
index = index | alphabet_IUPAC.find(*it);
value = alphabet_IUPAC.find(*it);
index = value >= 0 ? index | value : index ;
// cout << "Index :" << index << endl;
}
return alphabet_IUPAC[index];
}
......@@ -31,11 +35,12 @@ vector<char> MSA::IUPAC_to_basis(char code) {
void MSA::add_sequence(const string& str){
text.push_back(str);
length=str.size();
length = max(length, (int) str.size());
++lines;
}
string MSA::consensus(int threshold, int ploidity) {
string consensus_seq("");
int score_index(0);
int current_weight(1);
......@@ -43,6 +48,7 @@ string MSA::consensus(int threshold, int ploidity) {
int scores[alphabet.size()]={0};
for(int j(0);j<(int)text.size();++j){
score_index = alphabet.find(text[j][i]);
if (score_index != -1) {
scores[score_index] += current_weight;
......@@ -62,23 +68,14 @@ string MSA::consensus(int threshold, int ploidity) {
}
}
vector<char> conserved_nuc(ploidity, '.');
vector<char> conserved_nuc;
int stored_nuc_cnt(0);
int min_score(100);
int max_score(0);
int current_score;
for(int k(0); k < (int)alphabet.size(); ++k) {
if ( ( current_score= (scores[k]*(100))/(int)text.size())> (threshold)*current_weight) {
if (current_score > max_score) {
max_score = current_score;
conserved_nuc[1] = conserved_nuc[0];
conserved_nuc[0] = alphabet[k];
}
else if (current_score > min_score) {
min_score = current_score;
conserved_nuc[1] = alphabet[k];
}
}
int current_score(0);
while ( current_score < threshold * current_weight * (int) text.size()) {
int pos = max_element(scores, scores+alphabet.size()) - scores;
current_score += (scores[pos]*(100));
conserved_nuc.push_back(alphabet[pos]);
scores[pos] = 0;
}
char nuc = basis_to_IUPAC(conserved_nuc);
if (nuc != '-' && nuc != '.')
......
......@@ -24,7 +24,7 @@ class MSA
char basis_to_IUPAC(vector<char> basis);
vector<char> IUPAC_to_basis(char code);
void parser_fasta(string file);
string consensus(int similarity_threshold, int ploidity);
string consensus(int threshold, int ploidity);
void add_sequence(const string& str);
......
#include "MSA_cumulative.h"
#include <algorithm>
string MSA_cumulative::consensus(int threshold, int ploidity) {
string consensus_seq("");
int size_alpha = int(alphabet.size());
int current_weight(1);
for (int i(0); i < length; ++i) {
int size_bl = int(text.size());
int scores[size_alpha] = {0};
int depth_index = 0;
......@@ -40,21 +44,23 @@ string MSA_cumulative::consensus(int threshold, int ploidity) {
}
else {
vector<char> conserved_nuc;
while (threshold*current_weight > (highest_score*100)/size_bl) {
int total_score(0);
while (conserved_nuc.size() < 6 && threshold*current_weight > (total_score*100)/size_bl) {
alpha_index = 0;
while (alpha_index < size_alpha && scores[alpha_index] <= highest_score) {
////cerr << "alpha_index " << alpha_index << "scores :" << scores[alpha_index] << endl;
while (alpha_index < size_alpha && scores[alpha_index] < highest_score) {
alpha_index++;
}
highest_score = scores[alpha_index];
total_score += scores[alpha_index];
conserved_nuc.insert(conserved_nuc.begin(), alphabet[alpha_index]);
for (int k(0); k < size_alpha; k++)
scores[k] += highest_score;
/*for (int k(0); k < size_alpha; k++)
scores[k] += highest_score;*/
scores[alpha_index] = 0;
auto it = max_element(scores, scores+size_alpha);
highest_score = *it;
}
char nuc = basis_to_IUPAC(conserved_nuc);
//cerr << "char at pos " << i << " is " << nuc << endl;
if (nuc != '-' && nuc != '.')
consensus_seq += nuc;
}
......
......@@ -6,7 +6,7 @@
class MSA_cumulative : public MSA
{
public:
string consensus(int similarity_threshold, int ploidity);
string consensus(int threshold, int ploidity);
};
#endif
......@@ -16,7 +16,7 @@ int main(int argc, char** argv){
exit(-1);
}
if (argc > 1) {
MSA msa_fasta = MSA_cumulative();
MSA_cumulative msa_fasta = MSA_cumulative();
msa_fasta.parser_fasta(argv[1]);
ofstream output;
string filename;
......
This diff is collapsed.
......@@ -4,17 +4,56 @@ import subprocess
import os
import sys , argparse
config_file = "../config.yaml"
def main() :
pattern = ""
parser=argparse.ArgumentParser()
parser = argparse.ArgumentParser(description="Creates the config file for Meta-consensus pipeline")
parser.add_argument("-input", help="Reads file", required=True)
parser.add_argument("-output", help="Target directory for the pipeline results", required=True)
parser.add_argument("-reference", help="Reference for alignment and statistics")
parser.add_argument("-size", help="The size for cutting region (default: maximum size)", default='0')
parser.add_argument("-list", help="A list of regions to work on (format: [r1, r2, ...] or [rStart_End]) (default: no region)")
parser.add_argument("-tools", help="The list of tools to use in the meta-consensus (default: ['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle'])", default="['abpoa', 'spoa', 'kalign2', 'kalign3', 'mafft', 'muscle']")
parser.add_argument("-consensus_threshold", help="Threshold(s) used for the MSA consensus step (default: [70])", default="[70]")
parser.add_argument("-metaconsensus_threshold", help="Threshold(s) used for the Meta-consensus result (default: [60])", default="[60]")
parser.add_argument("-depth", help="The depth used in the process (default: [80])", default="[80]")
parser.add_argument("-plot", help="Analyse the meta-consensus and MSA consensus quality (requires reference)")
parser.add_argument("-region_overlap", help="The size of the overlap between regions", default="50")
args = parser.parse_args()
with open(config_file, 'w') as c_file:
c_file.write('read_file: ' + args.input + "\n")
c_file.write("output_folder: "+args.output + "\n")
if args.reference:
c_file.write("ref_file: "+args.reference+"\n")
region_param = False
if args.list:
r = args.list
r = r.replace("[", "")
r = r.replace("]","")
regions = r.split(",")
actual_regions = "["
last_elem = None
for elem in regions:
if '_' in elem:
actual_regions+= elem +","
elif args.size != 0:
actual_regions+= elem+"_"+str(int(elem)+int(args.size))+","
elif last_elem:
actual_regions+= last_elem+'_'+elem+","
last_elem = None
else:
last_elem = elem
print(actual_regions)
c_file.write("region: "+actual_regions[:-1]+"]\n")
else:
c_file.write("size: " + args.size+"\n")
c_file.write("region_overlap: "+ args.region_overlap+"\n")
c_file.write("tool: "+ args.tools+ "\n")
c_file.write("threshold_1: "+ args.consensus_threshold +"\n")
c_file.write("threshold_2: "+ args.metaconsensus_threshold +"\n")
c_file.write("depth: "+args.depth +" \n")
if __name__ == "__main__":
......
#!/bin/bash
source ./src/config_conda.sh
if [[ ! -d .conda_snakemake ]]; then
echo "Create a conda env for snakemake"
conda env create -p .conda_snakemake -f env_conda/snakemake.yaml >/dev/null
fi
CURRENT_PATH=`pwd`
conda activate $CURRENT_PATH/.conda_snakemake
CONFIG_EXPERIMENT=$1
if [[ $2 == "" ]]; then
CORES="24"
else
CORES=$2
fi
echo "Launch Snakemake with $CONFIG_EXPERIMENT"
snakemake --configfile $CONFIG_EXPERIMENT -c$CORES --use-conda --rerun-incomplete --keep-going
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment