71 if (end >= sequence.size())
74 throw Exception (
"SequenceTools::subseq : Invalid interval");
77 vector<int> temp(sequence.getContent());
80 temp.erase(temp.begin() + end + 1, temp.end());
81 temp.erase(temp.begin(), temp.begin() + begin);
84 return new BasicSequence(sequence.getName(), temp, sequence.getComments(), sequence.getAlphabet());
92 if ((seq1.getAlphabet()->getAlphabetType()) != (seq2.getAlphabet()->getAlphabetType()))
93 throw AlphabetMismatchException(
"SequenceTools::concatenate : Sequence's alphabets don't match ", seq1.getAlphabet(), seq2.getAlphabet());
96 if (seq1.getName() != seq2.getName())
97 throw Exception (
"SequenceTools::concatenate : Sequence's names don't match");
100 vector<int> sequence = seq1.getContent();
101 vector<int> sequence2 = seq2.getContent();
102 sequence.insert(sequence.end(), sequence2.begin(), sequence2.end());
103 return new BasicSequence(seq1.getName(), sequence, seq1.getComments(), seq1.getAlphabet());
112 if (seq.getAlphabet()->getAlphabetType() ==
"DNA alphabet")
116 else if (seq.getAlphabet()->getAlphabetType() ==
"RNA alphabet")
122 throw AlphabetException(
"SequenceTools::complement: Sequence must be nucleic.", seq.getAlphabet());
124 for (
size_t i = 0; i < seq.size(); i++)
126 seq.setElement(i, NAR->
translate(seq.getValue(i)));
137 if (sequence.getAlphabet()->getAlphabetType() ==
"DNA alphabet")
141 else if (sequence.getAlphabet()->getAlphabetType() ==
"RNA alphabet")
147 throw AlphabetException (
"SequenceTools::getComplement: Sequence must be nucleic.", sequence.getAlphabet());
158 if (sequence.getAlphabet()->getAlphabetType() !=
"DNA alphabet")
160 throw AlphabetException (
"SequenceTools::transcript : Sequence must be DNA", sequence.getAlphabet());
163 return _transc.translate(sequence);
171 if (sequence.getAlphabet()->getAlphabetType() !=
"RNA alphabet")
173 throw AlphabetException (
"SequenceTools::reverseTranscript : Sequence must be RNA", sequence.getAlphabet());
176 return _transc.reverse(sequence);
183 size_t seq_size = seq.
size();
184 unsigned int tmp_state = 0;
186 for (
size_t i = 0; i < seq_size / 2; i++)
188 j = seq_size - 1 - i;
226 size_t seq_size = seq.
size();
229 for (
size_t i = 0; i < seq_size / 2; i++)
231 j = seq_size - 1 - i;
247 if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType())
249 if (seq1.size() != seq2.size())
251 int gap = seq1.getAlphabet()->getGapCharacterCode();
254 for (
size_t i = 0; i < seq1.size(); i++)
256 int x = seq1.getValue(i);
257 int y = seq2.getValue(i);
260 if (x != gap && y != gap)
274 return static_cast<double>(id) / static_cast<double>(tot) * 100.;
283 for (
size_t i = 0; i < seq.
size(); i++)
285 if (!alpha->
isGap(seq[i]))
297 for (
size_t i = 0; i < seq.
size(); i++)
311 for (
size_t i = 0; i < seq.
size(); i++)
325 for (
size_t i = 0; i < seq.
size(); i++)
327 if (!alpha->
isGap(seq[i]))
328 content.push_back(seq[i]);
340 for (
size_t i = seq.
size(); i > 0; --i)
342 if (alpha->
isGap(seq[i - 1]))
353 throw Exception(
"SequenceTools::getSequenceWithoutStops. Input sequence should have a codon alphabet.");
355 for (
size_t i = 0; i < seq.size(); i++)
357 if (!calpha->
isStop(seq[i]))
358 content.push_back(seq[i]);
371 throw Exception(
"SequenceTools::removeStops. Input sequence should have a codon alphabet.");
372 for (
size_t i = seq.size(); i > 0; --i)
374 if (calpha->
isStop(seq[i - 1]))
375 seq.deleteElement(i - 1);
385 throw Exception(
"SequenceTools::replaceStopsWithGaps. Input sequence should have a codon alphabet.");
387 for (
size_t i = 0; i < seq.size(); ++i)
389 if (calpha->
isStop(seq[i]))
390 seq.setElement(i, gap);
399 if (seq1.size() != seq2.size())
401 size_t n = seq1.size();
402 const Alphabet* alpha = seq1.getAlphabet();
403 unsigned int r = alpha->
getSize();
408 for (
size_t i = 0; i < n; i++)
415 array(static_cast<size_t>(x), static_cast<size_t>(y))++;
420 double sb2 = 0, nij, nji;
421 for (
unsigned int i = 1; i < r; ++i)
423 for (
unsigned int j = 0; j < i; ++j)
427 if (nij != 0 || nji != 0)
429 sb2 += pow(nij - nji, 2) / (nij + nji);
449 vector< vector< int > > states(seq.
size());
450 list<Sequence*> t_hap;
452 unsigned int hap_count = 1;
454 for (
size_t i = 0; i < seq.
size(); i++)
456 vector<int> st = alpha->
getAlias(seq[i]);
461 if (st.size() <= level)
467 states[i] = vector<int>(1, seq[i]);
472 for (
size_t i = 0; i < states.size(); i++)
474 for (list<Sequence*>::iterator it = t_hap.begin(); it != t_hap.end(); it++)
476 for (
unsigned int j = 0; j < states[i].size(); j++)
479 if (j < states[i].size() - 1)
483 t_hap.insert(it, tmp_seq);
487 (**it).addElement(states[i][j]);
492 for (list<Sequence*>::reverse_iterator it = t_hap.rbegin(); it != t_hap.rend(); it++)
502 if (s1.getAlphabet()->getAlphabetType() != s2.getAlphabet()->getAlphabetType())
504 throw AlphabetMismatchException(
"SequenceTools::combineSequences(const Sequence& s1, const Sequence& s2): s1 and s2 don't have same Alphabet.", s1.getAlphabet(), s2.getAlphabet());
506 const Alphabet* alpha = s1.getAlphabet();
510 for (
size_t i = 0; i < length; i++)
513 st.push_back(s1.getValue(i));
515 st.push_back(s2.getValue(i));
527 const Alphabet* alpha = s.getAlphabet();
528 if (name.size() == 0)
529 name = s.
getName() +
"_haplotype";
531 if (s.size() != h.size())
533 for (
unsigned int i = 0; i < s.size(); ++i)
536 vector<int> nucs = alpha->
getAlias(s.getValue(i));
539 remove(nucs.begin(), nucs.end(), h.getValue(i));
540 nucs = vector<int>(nucs.begin(), nucs.end() - 1);
544 nucs = vector<int>(nucs.begin(), nucs.end());
562 if (seq.getAlphabet()->getAlphabetType() !=
"DNA alphabet")
564 throw AlphabetException (
"SequenceTools::transcript : Sequence must be DNA", seq.getAlphabet());
567 if ((ph < 1) || (ph > 3))
570 size_t s = seq.size();
571 size_t n = (s - ph + 3) / 3;
573 vector<int> content(n);
575 int tir = seq.getAlphabet()->getGapCharacterCode();
578 for (
size_t i = 0; i < n; i++)
583 content[i] = _RNY.getRNY(tir, seq[0], seq[1], *seq.getAlphabet());
587 content[i] = _RNY.getRNY(seq[j - 1], seq[j], tir, *seq.getAlphabet());
589 content[i] = _RNY.getRNY(seq[j - 1], seq[j], seq[j + 1], *seq.getAlphabet());
595 seq.getComments(), &_RNY);
604 if (seq.getAlphabet()->getAlphabetType() !=
"DNA alphabet")
606 throw AlphabetException (
"SequenceTools::transcript : Sequence must be DNA", seq.getAlphabet());
609 size_t n = seq.size();
611 vector<int> content(n);
613 int tir = seq.getAlphabet()->getGapCharacterCode();
617 content[0] = _RNY.getRNY(tir, seq[0], seq[1], *seq.getAlphabet());
619 for (
unsigned int i = 1; i < n - 1; i++)
621 content[i] = _RNY.getRNY(seq[i - 1], seq[i], seq[i + 1],
625 content[n - 1] = _RNY.getRNY(seq[n - 2], seq[n - 1], tir, *seq.getAlphabet());
630 seq.getComments(), &_RNY);
643 throw AlphabetException(
"SequenceTools::getCDS. Sequence is not a codon sequence.");
647 for (i = 0; i < sequence.
size() && !alphabet->
isInit(sequence[i]); ++i)
649 for (
unsigned int j = 0; includeInit ? j < i : j <= i; ++j)
657 for (i = 0; i < sequence.
size() && !alphabet->
isStop(sequence[i]); ++i)
659 for (
unsigned int j = includeStop ? i + 1 : i; j < sequence.
size(); ++j)
672 for (
size_t seqi = 0; seqi < seq.
size() - motif.
size() + 1; seqi++)
675 for (
size_t moti = 0; moti < motif.
size(); moti++)