bpp-seq  2.1.0
 All Classes Namespaces Files Functions Variables Friends Pages
SequenceContainerTools.h
Go to the documentation of this file.
1 //
2 // File: SequenceContainerTools.h
3 // Created by: Julien Dutheil
4 // Sylvain Gaillard
5 // Created on: Sat Oct 4 09:18:34 2003
6 //
7 
8 /*
9 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
10 
11 This software is a computer program whose purpose is to provide classes
12 for sequences analysis.
13 
14 This software is governed by the CeCILL license under French law and
15 abiding by the rules of distribution of free software. You can use,
16 modify and/ or redistribute the software under the terms of the CeCILL
17 license as circulated by CEA, CNRS and INRIA at the following URL
18 "http://www.cecill.info".
19 
20 As a counterpart to the access to the source code and rights to copy,
21 modify and redistribute granted by the license, users are provided only
22 with a limited warranty and the software's author, the holder of the
23 economic rights, and the successive licensors have only limited
24 liability.
25 
26 In this respect, the user's attention is drawn to the risks associated
27 with loading, using, modifying and/or developing or reproducing the
28 software by the user in light of its specific status of free software,
29 that may mean that it is complicated to manipulate, and that also
30 therefore means that it is reserved for developers and experienced
31 professionals having in-depth computer knowledge. Users are therefore
32 encouraged to load and test the software's suitability as regards their
33 requirements in conditions enabling the security of their systems and/or
34 data to be ensured and, more generally, to use and operate it in the
35 same conditions as regards security.
36 
37 The fact that you are presently reading this means that you have had
38 knowledge of the CeCILL license and that you accept its terms.
39 */
40 
41 #ifndef _SEQUENCECONTAINERTOOLS_H_
42 #define _SEQUENCECONTAINERTOOLS_H_
43 
44 // From the STL:
45 #include <string>
46 #include <vector>
47 #include <map>
48 #include <memory>
49 
50 #include "SequenceContainer.h"
52 
53 namespace bpp
54 {
55 
56 typedef std::vector<size_t> SequenceSelection;
57 
62 {
63 
64  public:
67 
68  public:
80  static SequenceContainer* createContainerOfSpecifiedSize(const Alphabet* alphabet, size_t size);
81 
95  const Alphabet* alphabet,
96  const std::vector<std::string>& seqNames)
97  throw (Exception);
98 
111  template<class ContFrom, class ContTo, class Seq>
112  static void convertContainer(const ContFrom& input, ContTo& output) {
113  for (size_t i = 0; i < input.getNumberOfSequences(); ++i) {
114  std::auto_ptr<Seq> seq(new Seq(input.getSequence(i)));
115  output.addSequence(*seq);
116  }
117  }
118 
134  static void getSelectedSequences(const OrderedSequenceContainer& sequences, const SequenceSelection& selection, SequenceContainer& outputCont) throw (Exception);
135 
153  static void getSelectedSequences(const SequenceContainer& sequences, const std::vector<std::string>& selection, SequenceContainer& outputCont, bool strict = true) throw (Exception);
154 
167  static void keepOnlySelectedSequences(OrderedSequenceContainer& sequences, const SequenceSelection& selection);
168 
175  static bool sequencesHaveTheSameLength(const SequenceContainer& sequences);
176 
190  static void getCounts(const SequenceContainer& sequences, std::map<int, int>&);
191 
205  static void getFrequencies(const SequenceContainer& sequences, std::map<int, double>& f, double pseudoCount = 0);
206 
214  static void append(SequenceContainer& seqCont1, const SequenceContainer& seqCont2, bool checkNames = true)
215  throw (Exception)
216  {
217  std::vector<std::string> seqNames = seqCont2.getSequencesNames();
218  for (size_t i = 0; i < seqNames.size(); i++)
219  seqCont1.addSequence(seqCont2.getSequence(seqNames[i]), checkNames);
220  }
228  static void append(SequenceContainer& seqCont1, const OrderedSequenceContainer& seqCont2, bool checkNames=true)
229  throw (Exception)
230  {
231  for (size_t i = 0; i < seqCont2.getNumberOfSequences(); i++)
232  seqCont1.addSequence(seqCont2.getSequence(i), checkNames);
233  }
234 
249  static void merge(const SequenceContainer& seqCont1, const SequenceContainer& seqCont2, SequenceContainer& outputCont)
250  throw (Exception)
251  {
252  if (seqCont1.getAlphabet()->getAlphabetType() != seqCont2.getAlphabet()->getAlphabetType())
253  throw AlphabetMismatchException("SequenceContainerTools::merge.", seqCont1.getAlphabet(), seqCont2.getAlphabet());
254 
255  std::vector<std::string> seqNames = seqCont1.getSequencesNames();
256  for (size_t i = 0; i < seqNames.size(); i++)
257  {
258  BasicSequence tmp = seqCont1.getSequence(seqNames[i]);
259  tmp.append(seqCont2.getContent(seqNames[i]));
260  outputCont.addSequence(tmp, false);
261  }
262  }
263 
272  static void convertAlphabet(const SequenceContainer& seqCont, SequenceContainer& outputCont)
273  throw (Exception)
274  {
275  std::vector<std::string> seqNames = seqCont.getSequencesNames();
276  bool checkNames = outputCont.getNumberOfSequences() > 0;
277  for (size_t i = 0; i < seqNames.size(); i++)
278  {
279  BasicSequence seq(seqNames[i], seqCont.toString(seqNames[i]), outputCont.getAlphabet());
280  outputCont.addSequence(seq, checkNames);
281  }
282  }
283 
292  static SequenceContainer* getCodonPosition(const SequenceContainer& sequences, size_t pos) throw (AlphabetException);
293 
294 };
295 
296 } //end of namespace bpp.
297 
298 #endif //_SEQUENCECONTAINERTOOLS_H_
299