bpp-phyl  2.4.0
CategorySubstitutionRegister.h
Go to the documentation of this file.
1 //
2 // File: SubstitutionRegister.h
3 // Created by: Julien Dutheil
4 // Created on: Mon Dec 6 16:32 2010
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004, 2005, 2006)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38 */
39 
40 
41 #ifndef _CATEGORY_SUBSTITUTIONREGISTER_H_
42 #define _CATEGORY_SUBSTITUTIONREGISTER_H_
43 
44 #include "SubstitutionRegister.h"
45 
46 namespace bpp
47 {
62  {
63  protected:
64  bool within_;
65  size_t nbCategories_;
66  mutable std::map<size_t, size_t> categories_;
67  std::vector<std::string> categoryNames_;
68  std::vector< std::vector<size_t> > index_;
69  std::vector< std::vector<size_t> > revIndex_;
70 
72 
73  public:
80  CategorySubstitutionRegister(const SubstitutionModel* model, bool within = false) :
81  AbstractSubstitutionRegister(model,"Category"),
82  within_(within),
83  nbCategories_(0),
84  categories_(),
85  categoryNames_(),
86  index_(),
87  revIndex_(),
88  stationarity_()
89  {}
90 
91  protected:
92  template<class T>
93  void setAlphabetCategories(const std::map<int, T>& categories)
94  {
95  //We need to convert alphabet states into model states.
96  std::map<size_t, T> modelCategories;
97  for (typename std::map<int, T>::const_iterator it = categories.begin(); it != categories.end(); ++it)
98  {
99  std::vector<size_t> states = model_->getModelStates(it->first);
100  for (size_t i = 0; i < states.size(); ++i) {
101  modelCategories[states[i]] = it->second;
102  }
103  }
104  //Then we forward the model categories:
105  setModelCategories<T>(modelCategories);
106  }
107 
108  template<class T>
109  void setModelCategories(const std::map<size_t, T>& categories)
110  {
111  // First index categories:
112  nbCategories_ = 0;
113  std::map<T, size_t> cats;
114  for (typename std::map<size_t, T>::const_iterator it = categories.begin(); it != categories.end(); ++it)
115  {
116  if (cats.find(it->second) == cats.end())
117  {
118  ++nbCategories_;
119  cats[it->second] = nbCategories_;
120  }
121  }
122 
123  // Now creates categories:
124  categories_.clear();
125  categoryNames_.resize(nbCategories_);
126  for (size_t i = 0; i < model_->getNumberOfStates(); ++i)
127  {
128  typename std::map<size_t, T>::const_iterator it = categories.find(i);
129  if (it != categories.end())
130  {
131  categories_[i] = cats[it->second];
132  categoryNames_[cats[it->second] - 1] += model_->getStateMap().getStateDescription(i);
133  }
134  else
135  {
136  categories_[i] = 0;
137  }
138  }
139 
140  size_t count = 1;
141  index_.resize(nbCategories_);
142  for (size_t i = 0; i < index_.size(); ++i)
143  {
144  index_[i].resize(nbCategories_);
145  for (size_t j = 0; j < index_.size(); ++j)
146  {
147  if (j != i)
148  {
149  index_[i][j] = count++;
150  std::vector<size_t> pos(2);
151  pos[0] = i;
152  pos[1] = j;
153  revIndex_.push_back(pos);
154  }
155  }
156  }
157  if (within_)
158  {
159  for (size_t i = 0; i < index_.size(); ++i)
160  {
161  index_[i][i] = count++;
162  std::vector<size_t> pos(2);
163  pos[0] = i;
164  pos[1] = i;
165  revIndex_.push_back(pos);
166  }
167  }
168  }
169 
170  public:
171  virtual size_t getCategory(size_t state) const
172  {
173  return categories_[state];
174  }
175 
176  virtual size_t getCategoryFrom(size_t type) const
177  {
178  if (type <= nbCategories_ * (nbCategories_ - 1))
179  {
180  return revIndex_[type - 1][0] + 1;
181  }
182  else
183  {
184  if (within_)
185  return revIndex_[type - 1][0] + 1;
186  else
187  throw Exception("CategorySubstitutionRegister::getCategoryFrom. Bad substitution type.");
188  }
189  }
190 
191  virtual size_t getCategoryTo(size_t type) const
192  {
193  if (type <= nbCategories_ * (nbCategories_ - 1))
194  {
195  return revIndex_[type - 1][1] + 1;
196  }
197  else
198  {
199  if (within_)
200  return revIndex_[type - 1][1] + 1;
201  else
202  throw Exception("CategorySubstitutionRegister::getCategoryTo. Bad substitution type.");
203  }
204  }
205 
206  virtual std::string getCategoryName(size_t category) const
207  {
208  return categoryNames_[category - 1];
209  }
210 
211  virtual bool allowWithin() const { return within_; }
212 
213  bool isStationary() const
214  {
215  return stationarity_;
216  }
217 
218  void setStationarity(bool stat)
219  {
220  stationarity_=stat;
221  }
222 
223  size_t getNumberOfCategories() const { return nbCategories_; }
224 
225  size_t getNumberOfSubstitutionTypes() const { return nbCategories_ * (nbCategories_ - 1) + (within_ ? nbCategories_ : 0); }
226 
227  virtual size_t getType(size_t fromState, size_t toState) const
228  {
229  size_t fromCat = categories_[fromState];
230  size_t toCat = categories_[toState];
231  if (fromCat > 0 && toCat > 0)
232  return index_[fromCat - 1][toCat - 1];
233  else
234  return 0;
235  }
236 
237  std::string getTypeName(size_t type) const
238  {
239  return getCategoryName(getCategoryFrom(type)) + "->" + getCategoryName(getCategoryTo(type));
240  }
241  };
242 
243 
253  {
254  public:
255  ComprehensiveSubstitutionRegister(const SubstitutionModel* model, bool within = false) :
256  CategorySubstitutionRegister(model, within)
257  {
258  std::map<int, int> categories;
259  for (int i = 0; i < static_cast<int>(model->getAlphabet()->getSize()); ++i)
260  {
261  categories[i] = i;
262  }
263  setAlphabetCategories<int>(categories);
264  }
265 
267  };
268 
279  {
280  public:
281  GCSubstitutionRegister(const NucleotideSubstitutionModel* model, bool within = false) :
282  CategorySubstitutionRegister(model, within)
283  {
284  std::map<int, int> categories;
285  categories[0] = 1;
286  categories[1] = 2;
287  categories[2] = 2;
288  categories[3] = 1;
289  setAlphabetCategories<int>(categories);
290  }
291 
292  GCSubstitutionRegister* clone() const { return new GCSubstitutionRegister(*this); }
293  };
294 
295 
311  {
312  private:
314 
315  public:
316  GCSynonymousSubstitutionRegister(const CodonSubstitutionModel* model, bool within = false) :
317  CategorySubstitutionRegister(model, within),
318  code_(model->getGeneticCode())
319  {
320  const CodonAlphabet* pCA = dynamic_cast<const CodonAlphabet*>(code_->getSourceAlphabet());
321 
322  std::map<int, int> categories;
323  for (int i = 0; i < static_cast<int>(pCA->getSize()); i++)
324  {
325  int n = pCA->getThirdPosition(i);
326  switch (n)
327  {
328  case 0:
329  case 3:
330  categories[i] = 1;
331  break;
332  case 1:
333  case 2:
334  categories[i] = 2;
335  break;
336  }
337  }
338  setAlphabetCategories<int>(categories);
339  }
340 
341  GCSynonymousSubstitutionRegister(const SubstitutionModel* model, const GeneticCode& gencod, bool within = false) :
342  CategorySubstitutionRegister(model, within),
343  code_(&gencod)
344  {
345  const CodonAlphabet* pCA = dynamic_cast<const CodonAlphabet*>(code_->getSourceAlphabet());
346 
347  std::map<int, int> categories;
348  for (int i = 0; i < static_cast<int>(pCA->getSize()); i++)
349  {
350  int n = pCA->getThirdPosition(i);
351  switch (n)
352  {
353  case 0:
354  case 3:
355  categories[i] = 1;
356  break;
357  case 1:
358  case 2:
359  categories[i] = 2;
360  break;
361  }
362  }
363  setAlphabetCategories<int>(categories);
364  }
365 
368  code_(reg.code_)
369  {}
370 
372  {
374  code_ = reg.code_;
375  return *this;
376  }
377 
379 
380  public:
381  size_t getNumberOfSubstitutionTypes() const { return 2; }
382 
383  size_t getType(size_t fromState, size_t toState) const
384  {
385  int x = model_->getAlphabetStateAsInt(fromState);
386  int y = model_->getAlphabetStateAsInt(toState);
387  const CodonAlphabet* pCA = dynamic_cast<const CodonAlphabet*>(code_->getSourceAlphabet());
388  if (code_->isStop(x) || code_->isStop( y) || !code_->areSynonymous(x, y))
389  return 0;
390 
391  // only substitutions between 3rd positions
392 
393  if ((pCA->getFirstPosition(x) != pCA->getFirstPosition(y)) ||
394  (pCA->getSecondPosition(x) != pCA->getSecondPosition(y)))
395  return 0;
396 
397  size_t fromCat = categories_[fromState];
398  size_t toCat = categories_[toState];
399 
400  if (fromCat > 0 && toCat > 0)
401  return index_[fromCat - 1][toCat - 1];
402  else
403  return 0;
404  }
405 
406  std::string getTypeName (size_t type) const
407  {
408  if (type == 0)
409  {
410  return "no AT<->GC substitution or non-synonymous substitution";
411  }
412  else if (type == 1)
413  {
414  return "AT->GC synonymous";
415  }
416  else if (type == 2)
417  {
418  return "GC->AT synonymous";
419  }
420  else
421  {
422  throw Exception("GCSynonymousSubstitutionRegister::getTypeName. Bad substitution type.");
423  }
424  }
425  };
426 
427 
428 } // end of namespace bpp.
429 
430 #endif // _CATEGORY_SUBSTITUTIONREGISTER_H_
GCSynonymousSubstitutionRegister(const GCSynonymousSubstitutionRegister &reg)
virtual size_t getCategoryTo(size_t type) const
Interface for all substitution models.
GCSynonymousSubstitutionRegister & operator=(const GCSynonymousSubstitutionRegister &reg)
Distinguishes AT<->GC from GC<->AT.
size_t getType(size_t fromState, size_t toState) const
Get the substitution type far a given pair of model states.
int getThirdPosition(int codon) const
virtual size_t getType(size_t fromState, size_t toState) const
Get the substitution type far a given pair of model states.
virtual unsigned int getSize() const =0
std::size_t count(const std::string &s, const std::string &pattern)
GCSubstitutionRegister(const NucleotideSubstitutionModel *model, bool within=false)
const CodonAlphabet * getSourceAlphabet() const
int getFirstPosition(int codon) const
void setAlphabetCategories(const std::map< int, T > &categories)
GCSubstitutionRegister * clone() const
int getSecondPosition(int codon) const
virtual std::vector< size_t > getModelStates(int code) const =0
Get the state in the model corresponding to a particular state in the alphabet.
The CategorySubstitutionRegisters.
GCSynonymousSubstitutionRegister(const CodonSubstitutionModel *model, bool within=false)
ComprehensiveSubstitutionRegister(const SubstitutionModel *model, bool within=false)
std::string getTypeName(size_t type) const
Get the name of a given substitution type.
GCSynonymousSubstitutionRegister(const SubstitutionModel *model, const GeneticCode &gencod, bool within=false)
virtual int getAlphabetStateAsInt(size_t index) const =0
virtual size_t getCategory(size_t state) const
virtual const Alphabet * getAlphabet() const =0
void setModelCategories(const std::map< size_t, T > &categories)
virtual size_t getCategoryFrom(size_t type) const
std::vector< std::vector< size_t > > revIndex_
AbstractSubstitutionRegister & operator=(const AbstractSubstitutionRegister &asr)
Specialisation interface for nucleotide substitution model.
bool areSynonymous(int i, int j) const
std::string getTypeName(size_t type) const
Get the name of a given substitution type.
virtual std::string getStateDescription(size_t index) const =0
virtual bool isStop(int state) const =0
virtual const StateMap & getStateMap() const =0
Distinguishes all types of substitutions.
ComprehensiveSubstitutionRegister * clone() const
virtual std::string getCategoryName(size_t category) const
std::vector< std::vector< size_t > > index_
virtual size_t getNumberOfStates() const =0
Get the number of states.
unsigned int getSize() const
Distinguishes AT->GC vs GC->AT inside synonymous substitutions on third codon position.
GCSynonymousSubstitutionRegister * clone() const
CategorySubstitutionRegister(const SubstitutionModel *model, bool within=false)
Build a new substitution register with categories. This class is meant to be inherited.