Skip to content
Snippets Groups Projects
Select Git revision
  • main
1 result

MSA.cpp

Blame
  • MSA.cpp 2.52 KiB
    #include "MSA.h"
    #include <iostream>
    #include <algorithm>
    
    
    using namespace std;
    
    
    char MSA::basis_to_IUPAC(vector<char> basis) {
      int index(0);
      int value(0);
      if (basis.size() < 1) {
        return '-';
      }
      for (auto it = std::begin(basis); it != std::end(basis); ++it) {
        value = alphabet_IUPAC.find(*it);
        index = value >= 0 ? index | value : index ;
       // cout << "Index :"  << index << endl;
      }
      return alphabet_IUPAC[index];
    }
    
    vector<char> MSA::IUPAC_to_basis(char code) {
      vector<char> matching_basis;
      int index(1);
      while (index < 32) {
        if ( (index & alphabet_IUPAC.find(code)) != 0) {
          matching_basis.push_back(alphabet_IUPAC[index]);
        }
        index *= 2;
      }
      return matching_basis;
    }
     
    
    void MSA::add_sequence(const string& str){
    	text.push_back(str);
    	length = max(length, (int) str.size());
    	++lines;
    }
    
    string MSA::consensus(int threshold, int ploidity) {
    
    	string consensus_seq("");
    	int score_index(0);
    	int current_weight(1);
    	for(int i(0);i<length;++i){
    		int scores[alphabet.size()]={0};
    		
    		for(int j(0);j<(int)text.size();++j){
    
    		  score_index = alphabet.find(text[j][i]);
    		  if (score_index != -1) {
    		    scores[score_index] += current_weight;
    		  }
    		  else {
    		    vector<char> multiple_basis = IUPAC_to_basis(text[j][i]);
    		    int size = multiple_basis.size();
    		    if (current_weight % size != 0) {
    		      current_weight *= size;
    		      for( int k(0); k <(int)alphabet.size();++k) {
    			scores[k] *= size;
    		      }
    		    }
    		    for (auto it = begin(multiple_basis); it != end(multiple_basis); ++it) {
    		      scores[alphabet.find(*it)] += (current_weight / size);
    		    }
    		  }
    		
    		}
    		vector<char> conserved_nuc;
    	   // int stored_nuc_cnt(0);
    		int current_score(0);
    		while ( current_score < threshold * current_weight * (int) text.size()) {
    		    int pos = max_element(scores, scores+alphabet.size()) - scores;
    		    current_score += (scores[pos]*(100));
    		    conserved_nuc.push_back(alphabet[pos]);
    		    scores[pos] = 0;
    		}
    		char nuc = basis_to_IUPAC(conserved_nuc);
    		if (nuc != '-' && nuc != '.') 
    		  consensus_seq += nuc;
    	}
    	return consensus_seq;
    }
    
    
    void MSA::parser_fasta(string file){
    	ifstream flux(file.c_str());
    	string seq("");
    	if(flux){
    		srand(time(NULL));
    		string ligne("");
    		while(getline(flux, ligne)){
    			if (ligne[0] == '>'){
    				name.push_back(ligne.substr(1, ligne.size()-1));
    				if (seq != ""){
    					add_sequence(seq);
    				}
    				seq = "";
    			}
    			else {
    				seq += ligne;
    			}
    		}
    		add_sequence(seq);
    	}
    	else{
    		cout << "ERROR: Unable to open the file.." << endl;
    		exit(0);
    	}
    }