Select Git revision
MSA.cpp 2.52 KiB
#include "MSA.h"
#include <iostream>
#include <algorithm>
using namespace std;
char MSA::basis_to_IUPAC(vector<char> basis) {
int index(0);
int value(0);
if (basis.size() < 1) {
return '-';
}
for (auto it = std::begin(basis); it != std::end(basis); ++it) {
value = alphabet_IUPAC.find(*it);
index = value >= 0 ? index | value : index ;
// cout << "Index :" << index << endl;
}
return alphabet_IUPAC[index];
}
vector<char> MSA::IUPAC_to_basis(char code) {
vector<char> matching_basis;
int index(1);
while (index < 32) {
if ( (index & alphabet_IUPAC.find(code)) != 0) {
matching_basis.push_back(alphabet_IUPAC[index]);
}
index *= 2;
}
return matching_basis;
}
void MSA::add_sequence(const string& str){
text.push_back(str);
length = max(length, (int) str.size());
++lines;
}
string MSA::consensus(int threshold, int ploidity) {
string consensus_seq("");
int score_index(0);
int current_weight(1);
for(int i(0);i<length;++i){
int scores[alphabet.size()]={0};
for(int j(0);j<(int)text.size();++j){
score_index = alphabet.find(text[j][i]);
if (score_index != -1) {
scores[score_index] += current_weight;
}
else {
vector<char> multiple_basis = IUPAC_to_basis(text[j][i]);
int size = multiple_basis.size();
if (current_weight % size != 0) {
current_weight *= size;
for( int k(0); k <(int)alphabet.size();++k) {
scores[k] *= size;
}
}
for (auto it = begin(multiple_basis); it != end(multiple_basis); ++it) {
scores[alphabet.find(*it)] += (current_weight / size);
}
}
}
vector<char> conserved_nuc;
// int stored_nuc_cnt(0);
int current_score(0);
while ( current_score < threshold * current_weight * (int) text.size()) {
int pos = max_element(scores, scores+alphabet.size()) - scores;
current_score += (scores[pos]*(100));
conserved_nuc.push_back(alphabet[pos]);
scores[pos] = 0;
}
char nuc = basis_to_IUPAC(conserved_nuc);
if (nuc != '-' && nuc != '.')
consensus_seq += nuc;
}
return consensus_seq;
}
void MSA::parser_fasta(string file){
ifstream flux(file.c_str());
string seq("");
if(flux){
srand(time(NULL));
string ligne("");
while(getline(flux, ligne)){
if (ligne[0] == '>'){
name.push_back(ligne.substr(1, ligne.size()-1));
if (seq != ""){
add_sequence(seq);
}
seq = "";
}
else {
seq += ligne;
}
}
add_sequence(seq);
}
else{
cout << "ERROR: Unable to open the file.." << endl;
exit(0);
}
}