Select Git revision
Forked from
Polito Guillermo / tofork
Source project has a limited visibility.
MSA.cpp 2.52 KiB
#include "MSA.h"
#include <iostream>
#include <algorithm>
using namespace std;
char MSA::basis_to_IUPAC(vector<char> basis) {
int index(0);
int value(0);
if (basis.size() < 1) {
return '-';
}
for (auto it = std::begin(basis); it != std::end(basis); ++it) {
value = alphabet_IUPAC.find(*it);
index = value >= 0 ? index | value : index ;
// cout << "Index :" << index << endl;
}
return alphabet_IUPAC[index];
}
vector<char> MSA::IUPAC_to_basis(char code) {
vector<char> matching_basis;
int index(1);
while (index < 32) {
if ( (index & alphabet_IUPAC.find(code)) != 0) {
matching_basis.push_back(alphabet_IUPAC[index]);
}
index *= 2;
}
return matching_basis;
}
void MSA::add_sequence(const string& str){
text.push_back(str);
length = max(length, (int) str.size());
++lines;
}
string MSA::consensus(int threshold, int ploidity) {
string consensus_seq("");
int score_index(0);
int current_weight(1);
for(int i(0);i<length;++i){
int scores[alphabet.size()]={0};
for(int j(0);j<(int)text.size();++j){
score_index = alphabet.find(text[j][i]);
if (score_index != -1) {
scores[score_index] += current_weight;
}
else {
vector<char> multiple_basis = IUPAC_to_basis(text[j][i]);
int size = multiple_basis.size();
if (current_weight % size != 0) {
current_weight *= size;
for( int k(0); k <(int)alphabet.size();++k) {
scores[k] *= size;
}
}
for (auto it = begin(multiple_basis); it != end(multiple_basis); ++it) {
scores[alphabet.find(*it)] += (current_weight / size);
}
}
}