bpp-seq  2.1.0
 All Classes Namespaces Files Functions Variables Friends Pages
SequenceTools.h
Go to the documentation of this file.
1 //
2 // File: SequenceTools.h
3 // Authors: Guillaume Deuchst
4 // Julien Dutheil
5 // Sylvain Gaillard
6 // Created on: Tue Aug 21 2003
7 //
8 
9 /*
10  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
11 
12  This software is a computer program whose purpose is to provide classes
13  for sequences analysis.
14 
15  This software is governed by the CeCILL license under French law and
16  abiding by the rules of distribution of free software. You can use,
17  modify and/ or redistribute the software under the terms of the CeCILL
18  license as circulated by CEA, CNRS and INRIA at the following URL
19  "http://www.cecill.info".
20 
21  As a counterpart to the access to the source code and rights to copy,
22  modify and redistribute granted by the license, users are provided only
23  with a limited warranty and the software's author, the holder of the
24  economic rights, and the successive licensors have only limited
25  liability.
26 
27  In this respect, the user's attention is drawn to the risks associated
28  with loading, using, modifying and/or developing or reproducing the
29  software by the user in light of its specific status of free software,
30  that may mean that it is complicated to manipulate, and that also
31  therefore means that it is reserved for developers and experienced
32  professionals having in-depth computer knowledge. Users are therefore
33  encouraged to load and test the software's suitability as regards their
34  requirements in conditions enabling the security of their systems and/or
35  data to be ensured and, more generally, to use and operate it in the
36  same conditions as regards security.
37 
38  The fact that you are presently reading this means that you have had
39  knowledge of the CeCILL license and that you accept its terms.
40  */
41 
42 #ifndef _SEQUENCETOOLS_H_
43 #define _SEQUENCETOOLS_H_
44 
45 #include "Alphabet/Alphabet.h"
46 #include "Alphabet/DNA.h"
47 #include "Alphabet/RNA.h"
48 #include "Alphabet/RNY.h"
49 #include "Sequence.h"
50 #include "SymbolListTools.h"
52 #include <Bpp/Exceptions.h>
55 
56 // From the STL:
57 #include <string>
58 #include <map>
59 #include <vector>
60 #include <algorithm>
61 
62 namespace bpp
63 {
67 class BowkerTest :
68  public StatTest
69 {
70 private:
71  double pvalue_;
72  double stat_;
73 
74 public:
76  stat_(0.) {}
77 
78  virtual ~BowkerTest() {}
79 
80  BowkerTest* clone() const { return new BowkerTest(*this); }
81 
82 public:
83  std::string getName() const { return "Bowker's test for homogeneity."; }
84  double getStatistic() const { return stat_; }
85  double getPValue() const { return pvalue_; }
86 
87  void setStatistic(double stat) { stat_ = stat; }
88  void setPValue(double pvalue) { pvalue_ = pvalue; }
89 };
90 
97  public SymbolListTools
98 {
99 private:
100  static DNA _DNA;
101  static RNA _RNA;
102  static RNY _RNY;
106 
107 public:
109  virtual ~SequenceTools() {}
110 
111 public:
121  static Sequence* subseq(const Sequence& sequence, size_t begin, size_t end) throw (IndexOutOfBoundsException, Exception);
122 
135  static Sequence* concatenate(const Sequence& seq1, const Sequence& seq2)
137 
146  static Sequence& complement(Sequence& seq) throw (AlphabetException);
147 
156  static Sequence* getComplement(const Sequence& sequence) throw (AlphabetException);
157 
168  static Sequence* transcript(const Sequence& sequence) throw (AlphabetException);
169 
180  static Sequence* reverseTranscript(const Sequence& sequence) throw (AlphabetException);
181 
192  static Sequence& invert(Sequence& seq);
193 
204  static Sequence* getInvert(const Sequence& sequence);
205 
216  static Sequence& invertComplement(Sequence& seq);
217 
227  static double getPercentIdentity(const Sequence& seq1, const Sequence& seq2, bool ignoreGaps = false) throw (AlphabetMismatchException, SequenceNotAlignedException);
228 
234  static size_t getNumberOfSites(const Sequence& seq);
235 
241  static size_t getNumberOfCompleteSites(const Sequence& seq);
242 
250  static size_t getNumberOfUnresolvedSites(const Sequence& seq);
251 
258  static void removeGaps(Sequence& seq);
259 
269  static Sequence* getSequenceWithoutGaps(const Sequence& seq);
270 
278  static void removeStops(Sequence& seq) throw (Exception);
279 
290  static Sequence* getSequenceWithoutStops(const Sequence& seq) throw (Exception);
291 
299  static void replaceStopsWithGaps(Sequence& seq) throw (Exception);
300 
316  static BowkerTest* bowkerTest(const Sequence& seq1, const Sequence& seq2) throw (SequenceNotAlignedException);
317 
330  static void getPutativeHaplotypes(const Sequence& seq, std::vector<Sequence*>& hap, unsigned int level = 2);
331 
338  static Sequence* combineSequences(const Sequence& s1, const Sequence& s2) throw (AlphabetMismatchException);
339 
365  static Sequence* subtractHaplotype(const Sequence& s, const Sequence& h, std::string name = "", unsigned int level = 1) throw (SequenceNotAlignedException);
366 
381  static Sequence* RNYslice(const Sequence& sequence, int ph) throw (AlphabetException);
382  static Sequence* RNYslice(const Sequence& sequence) throw (AlphabetException);
383 
393  static void getCDS(Sequence& sequence, bool checkInit, bool checkStop, bool includeInit = true, bool includeStop = true);
394 
405  static size_t findFirstOf(const Sequence& seq, const Sequence& motif, bool strict = true);
406 };
407 } // end of namespace bpp.
408 
409 #endif // _SEQUENCETOOLS_H_
410