45 #include "../SiteTools.h"
46 #include "../Alphabet/AlphabetTools.h"
47 #include "../SequenceTools.h"
93 const SiteSelection& selection)
98 for (
unsigned int i = 0; i < selection.size(); i++)
118 map<int, double> freq;
124 for (map<int, double>::iterator it = freq.begin(); it != freq.end(); it++)
126 if (it->second > max && it->first != -1)
135 for (map<int, double>::iterator it = freq.begin(); it != freq.end(); it++)
137 if (it->second > max)
144 consensus.push_back(cons);
160 int* element = &sites(j, i);
162 *element = unknownCode;
177 int* element = &sites(j, i);
242 noGapCont->
addSite(*site,
false);
286 map<int, double> freq;
288 if (freq[-1] <= maxFreqGaps)
299 map<int, double> freq;
301 if (freq[-1] > maxFreqGaps)
313 vector<string> seqNames = sites.getSequencesNames();
316 for (
unsigned int i = 0; i < sites.getNumberOfSites(); i++)
318 const Site* site = &sites.getSite(i);
320 noStopCont->
addSite(*site,
false);
332 throw AlphabetException(
"SiteContainerTools::resolveDottedAlignment. Alignment alphabet should of class 'DefaultAlphabet'.", dottedAln.getAlphabet());
335 size_t n = dottedAln.getNumberOfSequences();
337 throw Exception(
"SiteContainerTools::resolveDottedAlignment. Input alignment contains no sequence.");
340 for (
size_t i = 0; i < n; ++i)
342 const Sequence* seq = &dottedAln.getSequence(i);
344 for (
unsigned int j = 0; isRef && j < seq->
size(); ++j)
355 throw Exception(
"SiteContainerTools::resolveDottedAlignment. No reference sequence was found in the input alignment.");
361 size_t m = dottedAln.getNumberOfSites();
363 for (
unsigned int i = 0; i < m; ++i)
365 string resolved = refSeq->
getChar(i);
366 const Site* site = &dottedAln.getSite(i);
368 for (
unsigned int j = 0; j < n; j++)
375 resolvedSite.addElement(state);
395 map<size_t, size_t> tln;
398 unsigned int count = 0;
399 for (
size_t i = 0; i < seq.
size(); i++)
414 map<size_t, size_t> tln;
417 unsigned int count = 0;
418 for (
size_t i = 0; i < seq.
size(); i++)
434 if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType())
436 map<size_t, size_t> tln;
437 if (seq1.size() == 0)
439 unsigned int count1 = 0;
440 unsigned int count2 = 0;
441 if (seq2.size() == 0)
443 int state1 = seq1[count1];
444 int state2 = seq2[count2];
451 if (count1 < seq1.size())
452 state1 = seq1[count1];
459 if (count2 < seq2.size())
460 state2 = seq2[count2];
464 if (state1 != state2)
466 tln[count1 + 1] = count2 + 1;
467 if (count1 == seq1.size() - 1)
471 if (count2 == seq2.size() - 1)
473 state1 = seq1[++count1];
477 if (count1 < seq1.size())
478 state1 = seq1[count1];
489 state1 = seq1[++count1];
490 state2 = seq2[++count2];
503 map<size_t, size_t> tln;
518 tln[count1] = (state2 == -1 ? 0 : count2);
533 if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType())
535 if (seq1.getAlphabet()->getAlphabetType() != s.getAlphabet()->getAlphabetType())
538 auto_ptr<Sequence> s1(seq1.clone());
540 auto_ptr<Sequence> s2(seq2.clone());
546 double choice1, choice2, choice3, mx;
548 for (
size_t i = 0; i <= s1->size(); i++)
550 m(i, 0) =
static_cast<double>(i) * gap;
552 for (
size_t j = 0; j <= s2->size(); j++)
554 m(0, j) =
static_cast<double>(j) * gap;
556 for (
size_t i = 1; i <= s1->size(); i++)
558 for (
size_t j = 1; j <= s2->size(); j++)
560 choice1 = m(i - 1, j - 1) +
static_cast<double>(s.getIndex((*s1)[i - 1], (*s2)[j - 1]));
561 choice2 = m(i - 1, j) + gap;
562 choice3 = m(i, j - 1) + gap;
563 mx = choice1; px =
'd';
566 mx = choice2; px =
'u';
570 mx = choice3; px =
'l';
573 p(i - 1, j - 1) = px;
579 size_t i = s1->size(), j = s2->size();
581 while (i > 0 && j > 0)
586 a1.push_front((*s1)[i - 1]);
587 a2.push_front((*s2)[j - 1]);
593 a1.push_front((*s1)[i - 1]);
600 a2.push_front((*s2)[j - 1]);
606 a1.push_front((*s1)[i - 1]);
613 a2.push_front((*s2)[j - 1]);
616 s1->setContent(vector<int>(a1.begin(), a1.end()));
617 s2->setContent(vector<int>(a2.begin(), a2.end()));
634 if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType())
636 if (seq1.getAlphabet()->getAlphabetType() != s.getAlphabet()->getAlphabetType())
639 auto_ptr<Sequence> s1(seq1.clone());
641 auto_ptr<Sequence> s2(seq2.clone());
650 double choice1, choice2, choice3, mx;
653 for (
size_t i = 0; i <= s1->size(); i++)
657 for (
size_t j = 0; j <= s2->size(); j++)
661 for (
size_t i = 1; i <= s1->size(); i++)
663 m(i, 0) = h(i, 0) = opening +
static_cast<double>(i) * extending;
665 for (
size_t j = 1; j <= s2->size(); j++)
667 m(0, j) = v(0, j) = opening +
static_cast<double>(j) * extending;
670 for (
size_t i = 1; i <= s1->size(); i++)
672 for (
size_t j = 1; j <= s2->size(); j++)
674 choice1 = m(i - 1, j - 1) + s.getIndex((*s1)[i - 1], (*s2)[j - 1]);
675 choice2 = h(i - 1, j - 1) + opening + extending;
676 choice3 = v(i - 1, j - 1) + opening + extending;
688 choice1 = m(i, j - 1) + opening + extending;
689 choice2 = h(i, j - 1) + extending;
697 choice1 = m(i - 1, j) + opening + extending;
698 choice2 = v(i - 1, j) + extending;
707 if (v(i, j) > m(i, j))
709 if (h(i, j) > m(i, j))
711 p(i - 1, j - 1) = px;
717 size_t i = s1->size(), j = s2->size();
719 while (i > 0 && j > 0)
724 a1.push_front((*s1)[i - 1]);
725 a2.push_front((*s2)[j - 1]);
731 a1.push_front((*s1)[i - 1]);
738 a2.push_front((*s2)[j - 1]);
744 a1.push_front((*s1)[i - 1]);
751 a2.push_front((*s2)[j - 1]);
754 s1->setContent(vector<int>(a1.begin(), a1.end()));
755 s2->setContent(vector<int>(a2.begin(), a2.end()));
767 for (
size_t i = 0; i < nbSites; i++)
772 index->push_back(pos);
795 if (seq1.size() != seq2.size())
797 if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType())
800 const Alphabet* alpha = seq1.getAlphabet();
803 for (
size_t i = 0; i < seq1.size(); i++)
815 if (gapOption == SIMILARITY_ALL)
818 if (x == y && !alpha->
isGap(x) && !alpha->
isGap(y))
821 else if (gapOption == SIMILARITY_NODOUBLEGAP)
830 else if (gapOption == SIMILARITY_NOGAP)
840 throw Exception(
"SiteContainerTools::computeSimilarity. Invalid gap option: " + gapOption);
842 double r = (t == 0 ? 0. :
static_cast<double>(s) / static_cast<double>(t));
843 return dist ? 1 - r : r;
852 string pairwiseGapOption = gapOption;
854 if (gapOption == SIMILARITY_NOFULLGAP)
868 pairwiseGapOption = SIMILARITY_ALL;
875 for (
size_t i = 0; i < n; i++)
877 (*mat)(i, i) = dist ? 0. : 1.;
879 for (
size_t j = i + 1; j < n; j++)
882 (*mat)(i, j) = (*mat)(j, i) = computeSimilarity(*seq1, *seq2, dist, pairwiseGapOption, unresolvedAsGap);
894 if (seqCont1.getAlphabet()->getAlphabetType() != seqCont2.getAlphabet()->getAlphabetType())
898 vector<string> seqNames1 = seqCont1.getSequencesNames();
899 vector<string> seqNames2 = seqCont2.getSequencesNames();
902 if (seqNames1 == seqNames2)
904 seqCont2bis = &seqCont2;
911 seqCont2bis = seqCont2ter;
915 if (leavePositionAsIs)
924 int offset =
static_cast<int>(seqCont1.getNumberOfSites());
943 unsigned int pos = 0;
947 positions(i, j) = pos;
960 throw Exception(
"SiteContainerTools::getColumnScores. The two input alignments must have the same number of sequences!");
967 if (positions1(j, i) > 0) {
969 whichPos = positions1(j, i);
982 if (positions2(whichSeq, j) == whichPos) {
988 throw Exception(
"SiteContainerTools::getColumnScores(). Position " +
TextTools::toString(whichPos) +
" of sequence " +
TextTools::toString(whichSeq) +
" not found in reference alignment. Please make sure the two indexes are built from the same data!");
993 test = (positions1(j, i) == positions2(j, i2));
995 scores[i] = test ? 1 : 0;
1005 throw Exception(
"SiteContainerTools::getColumnScores. The two input alignments must have the same number of sequences!");
1009 size_t countAlignable = 0;
1010 size_t countAligned = 0;
1013 size_t whichPos = positions1(j, i);
1014 if (whichPos == 0) {
1022 if (positions2(j, k) == whichPos) {
1028 throw Exception(
"SiteContainerTools::getColumnScores(). Position " +
TextTools::toString(whichPos) +
" of sequence " +
TextTools::toString(j) +
" not found in reference alignment. Please make sure the two indexes are built from the same data!");
1033 size_t whichPos2 = positions1(k, i);
1034 if (whichPos2 == 0) {
1040 if (positions2(k, i2) == whichPos2)
1044 scores[i] = countAlignable == 0 ? na :
static_cast<double>(countAligned) / static_cast<double>(countAlignable);