bpp-phyl  2.1.0
 All Classes Namespaces Files Functions Variables Friends Pages
NonHomogeneousSequenceSimulator.cpp
Go to the documentation of this file.
1 //
2 // File: NonHomogeneousSequenceSimulator.cpp
3 // (previously SequenceSimulator.cpp, then HomogeneousSequenceSimulator.cpp)
4 // Created by: Julien Dutheil
5 // Bastien Boussau
6 // Created on: Wed Feb 4 16:30:51 2004
7 //
8 
9 /*
10  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
11 
12  This software is a computer program whose purpose is to provide classes
13  for phylogenetic data analysis.
14 
15  This software is governed by the CeCILL license under French law and
16  abiding by the rules of distribution of free software. You can use,
17  modify and/ or redistribute the software under the terms of the CeCILL
18  license as circulated by CEA, CNRS and INRIA at the following URL
19  "http://www.cecill.info".
20 
21  As a counterpart to the access to the source code and rights to copy,
22  modify and redistribute granted by the license, users are provided only
23  with a limited warranty and the software's author, the holder of the
24  economic rights, and the successive licensors have only limited
25  liability.
26 
27  In this respect, the user's attention is drawn to the risks associated
28  with loading, using, modifying and/or developing or reproducing the
29  software by the user in light of its specific status of free software,
30  that may mean that it is complicated to manipulate, and that also
31  therefore means that it is reserved for developers and experienced
32  professionals having in-depth computer knowledge. Users are therefore
33  encouraged to load and test the software's suitability as regards their
34  requirements in conditions enabling the security of their systems and/or
35  data to be ensured and, more generally, to use and operate it in the
36  same conditions as regards security.
37 
38  The fact that you are presently reading this means that you have had
39  knowledge of the CeCILL license and that you accept its terms.
40  */
41 
43 #include "../Model/SubstitutionModelSetTools.h"
44 
48 
49 // From SeqLib:
51 
52 using namespace bpp;
53 using namespace std;
54 
55 /******************************************************************************/
56 
58  const SubstitutionModelSet* modelSet,
59  const DiscreteDistribution* rate,
60  const Tree* tree) throw (Exception) :
61  modelSet_(modelSet),
62  alphabet_(modelSet_->getAlphabet()),
63  rate_(rate),
64  templateTree_(tree),
65  tree_(*tree),
66  ownModelSet_(false),
67  leaves_(tree_.getLeaves()),
68  seqNames_(),
69  nbNodes_(),
70  nbClasses_(rate_->getNumberOfCategories()),
71  nbStates_(modelSet_->getNumberOfStates()),
72  continuousRates_(false)
73 {
74  if (!modelSet->isFullySetUpFor(*tree))
75  throw Exception("NonHomogeneousSequenceSimulator(constructor). Model set is not fully specified.");
76  init();
77 }
78 
79 /******************************************************************************/
80 
82  const SubstitutionModel* model,
83  const DiscreteDistribution* rate,
84  const Tree* tree) :
85  modelSet_(0),
86  alphabet_(model->getAlphabet()),
87  rate_(rate),
88  templateTree_(tree),
89  tree_(*tree),
90  ownModelSet_(true),
91  leaves_(tree_.getLeaves()),
92  seqNames_(),
93  nbNodes_(),
94  nbClasses_(rate_->getNumberOfCategories()),
95  nbStates_(model->getNumberOfStates()),
96  continuousRates_(false)
97 {
98  FixedFrequenciesSet* fSet = new FixedFrequenciesSet(model->getAlphabet(), model->getFrequencies());
99  fSet->setNamespace("anc.");
100  modelSet_ = SubstitutionModelSetTools::createHomogeneousModelSet(dynamic_cast<SubstitutionModel*>(model->clone()), fSet, templateTree_);
101  init();
102 }
103 
104 /******************************************************************************/
106 {
107  seqNames_.resize(leaves_.size());
108  for (size_t i = 0; i < seqNames_.size(); i++)
109  {
110  seqNames_[i] = leaves_[i]->getName();
111  }
112  // Initialize cumulative pxy:
113  vector<SNode*> nodes = tree_.getNodes();
114  nodes.pop_back(); // remove root
115  nbNodes_ = nodes.size();
116 
117  for (size_t i = 0; i < nodes.size(); i++)
118  {
119  SNode* node = nodes[i];
120  node->getInfos().model = modelSet_->getModelForNode(node->getId());
121  double d = node->getDistanceToFather();
122  VVVdouble* cumpxy_node_ = &node->getInfos().cumpxy;
123  cumpxy_node_->resize(nbClasses_);
124  for (size_t c = 0; c < nbClasses_; c++)
125  {
126  VVdouble* cumpxy_node_c_ = &(*cumpxy_node_)[c];
127  cumpxy_node_c_->resize(nbStates_);
128  RowMatrix<double> P = node->getInfos().model->getPij_t(d * rate_->getCategory(c));
129  for (size_t x = 0; x < nbStates_; x++)
130  {
131  Vdouble* cumpxy_node_c_x_ = &(*cumpxy_node_c_)[x];
132  cumpxy_node_c_x_->resize(nbStates_);
133  (*cumpxy_node_c_x_)[0] = P(x, 0);
134  for (size_t y = 1; y < nbStates_; y++)
135  {
136  (*cumpxy_node_c_x_)[y] = (*cumpxy_node_c_x_)[y - 1] + P(x, y);
137  }
138  }
139  }
140  }
141 }
142 
143 /******************************************************************************/
145 {
146  // Draw an initial state randomly according to equilibrum frequencies:
147  int initialState = 0;
149  double cumprob = 0;
150  vector<double> freqs = modelSet_->getRootFrequencies();
151  for (size_t i = 0; i < nbStates_; i++)
152  {
153  cumprob += freqs[i];
154  if (r <= cumprob)
155  {
156  initialState = static_cast<int>(i);
157  break;
158  }
159  }
160  return simulate(initialState);
161 }
162 
163 /******************************************************************************/
165 {
166  if (continuousRates_)
167  {
168  // Draw a random rate:
169  double rate = rate_->randC();
170  // Make this state evolve:
171  return simulate(initialState, rate);
172  }
173  else
174  {
175  // Draw a random rate:
176  size_t rateClass = static_cast<size_t>(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(rate_->getNumberOfCategories())));
177  // Make this state evolve:
178  return simulate(initialState, rateClass);
179  }
180 }
181 
182 /******************************************************************************/
183 Site* NonHomogeneousSequenceSimulator::simulate(int initialState, size_t rateClass) const
184 {
185  // Launch recursion:
186  SNode* root = tree_.getRootNode();
187  root->getInfos().state = initialState;
188  for (size_t i = 0; i < root->getNumberOfSons(); i++)
189  {
190  evolveInternal(root->getSon(i), rateClass);
191  }
192  // Now create a Site object:
193  Vint site(leaves_.size());
194  for (size_t i = 0; i < leaves_.size(); i++)
195  {
196  site[i] = leaves_[i]->getInfos().model->getAlphabetChar(leaves_[i]->getInfos().state);
197  }
198  return new Site(site, alphabet_);
199 }
200 
201 /******************************************************************************/
202 Site* NonHomogeneousSequenceSimulator::simulate(int initialState, double rate) const
203 {
204  // Launch recursion:
205  SNode* root = tree_.getRootNode();
206  root->getInfos().state = initialState;
207  for (size_t i = 0; i < root->getNumberOfSons(); i++)
208  {
209  evolveInternal(root->getSon(i), rate);
210  }
211  // Now create a Site object:
212  Vint site(leaves_.size());
213  for (size_t i = 0; i < leaves_.size(); i++)
214  {
215  site[i] = leaves_[i]->getInfos().model->getAlphabetChar(leaves_[i]->getInfos().state);
216  }
217  return new Site(site, alphabet_);
218 }
219 
220 /******************************************************************************/
222 {
223  // Draw an initial state randomly according to equilibrum frequencies:
224  int initialState = 0;
226  double cumprob = 0;
227  vector<double> freqs = modelSet_->getRootFrequencies();
228  for (size_t i = 0; i < nbStates_; i++)
229  {
230  cumprob += freqs[i];
231  if (r <= cumprob)
232  {
233  initialState = (int)i;
234  break;
235  }
236  }
237  // Make this state evolve:
238  return simulate(initialState, rate);
239 }
240 
241 /******************************************************************************/
243 {
244  Vint initialStates(numberOfSites, 0);
245  for (size_t j = 0; j < numberOfSites; j++)
246  {
248  double cumprob = 0;
249  vector<double> freqs = modelSet_->getRootFrequencies();
250  for (size_t i = 0; i < nbStates_; i++)
251  {
252  cumprob += freqs[i];
253  if (r <= cumprob)
254  {
255  initialStates[j] = (int)i;
256  break;
257  }
258  }
259  }
260  if (continuousRates_)
261  {
264  for (size_t j = 0; j < numberOfSites; j++)
265  {
266  Site* site = simulate();
267  site->setPosition(static_cast<int>(j));
268  sites->addSite(*site);
269  delete site;
270  }
271  return sites;
272  }
273  else
274  {
275  // More efficient to do site this way:
276  // Draw random rates:
277  vector<size_t> rateClasses(numberOfSites);
278  size_t nCat = rate_->getNumberOfCategories();
279  for (size_t j = 0; j < numberOfSites; j++)
280  {
281  rateClasses[j] = static_cast<size_t>(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(nCat)));
282  }
283  // Make these states evolve:
284  SiteContainer* sites = multipleEvolve(initialStates, rateClasses);
285  return sites;
286  }
287 }
288 
289 /******************************************************************************/
291 {
292  // Draw an initial state randomly according to equilibrum frequencies:
293  int initialState = 0;
295  double cumprob = 0;
296  vector<double> freqs = modelSet_->getRootFrequencies();
297  for (size_t i = 0; i < nbStates_; i++)
298  {
299  cumprob += freqs[i];
300  if (r <= cumprob)
301  {
302  initialState = static_cast<int>(i);
303  break;
304  }
305  }
306 
307  return dSimulate(initialState);
308 }
309 
310 /******************************************************************************/
312 {
313  // Draw a random rate:
314  if (continuousRates_)
315  {
316  double rate = rate_->randC();
317  return dSimulate(initialState, rate);
318  }
319  else
320  {
321  size_t rateClass = static_cast<size_t>(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(rate_->getNumberOfCategories())));
322  return dSimulate(initialState, rateClass);
323  // NB: this is more efficient than dSimulate(initialState, rDist_->rand())
324  }
325 }
326 
327 /******************************************************************************/
329 {
330  // Make this state evolve:
332  dEvolve(initialState, rate, *hssr);
333  return hssr;
334 }
335 
336 /******************************************************************************/
337 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulate(int initialState, size_t rateClass) const
338 {
339  return dSimulate(initialState, rate_->getCategory(rateClass));
340 }
341 
342 /******************************************************************************/
344 {
345  // Draw an initial state randomly according to equilibrum frequencies:
346  int initialState = 0;
348  double cumprob = 0;
349  vector<double> freqs = modelSet_->getRootFrequencies();
350  for (size_t i = 0; i < nbStates_; i++)
351  {
352  cumprob += freqs[i];
353  if (r <= cumprob)
354  {
355  initialState = static_cast<int>(i);
356  break;
357  }
358  }
359  return dSimulate(initialState, rate);
360 }
361 
362 /******************************************************************************/
363 int NonHomogeneousSequenceSimulator::evolve(const SNode* node, int initialState, size_t rateClass) const
364 {
365  const Vdouble* cumpxy_node_c_x_ = &node->getInfos().cumpxy[rateClass][initialState];
367  for (int y = 0; y < static_cast<int>(nbStates_); y++)
368  {
369  if (rand < (*cumpxy_node_c_x_)[y]) return y;
370  }
371  cerr << "DEBUG: This message should never happen! (HomogeneousSequenceSimulator::evolve)" << endl;
372  cout << " rand = " << rand << endl;
373  return -1;
374 }
375 
376 /******************************************************************************/
377 int NonHomogeneousSequenceSimulator::evolve(const SNode* node, int initialState, double rate) const
378 {
379  double cumpxy = 0;
381  double l = rate * node->getDistanceToFather();
382  const SubstitutionModel* model = node->getInfos().model;
383  for (int y = 0; y < static_cast<int>(nbStates_); y++)
384  {
385  cumpxy += model->Pij_t(initialState, y, l);
386  if (rand < cumpxy) return y;
387  }
388  cerr << "DEBUG: This message should never happen! (NonHomogeneousSequenceSimulator::evolve)" << endl;
389  cout << " rand = " << rand << endl;
390  cout << "cumpxy = " << cumpxy << endl;
391  MatrixTools::print(model->getPij_t(l));
392  return -1;
393 }
394 
395 /******************************************************************************/
396 void NonHomogeneousSequenceSimulator::multipleEvolve(const SNode* node, const Vint& initialStates, const vector<size_t>& rateClasses, Vint& finalStates) const
397 {
398  const VVVdouble* cumpxy_node_ = &node->getInfos().cumpxy;
399  for (size_t i = 0; i < initialStates.size(); i++)
400  {
401  const Vdouble* cumpxy_node_c_x_ = &(*cumpxy_node_)[rateClasses[i]][initialStates[i]];
403  for (size_t y = 0; y < nbStates_; y++)
404  {
405  if (rand < (*cumpxy_node_c_x_)[y])
406  {
407  finalStates[i] = (int)y;
408  break;
409  }
410  }
411  }
412 }
413 
414 /******************************************************************************/
415 void NonHomogeneousSequenceSimulator::evolveInternal(SNode* node, size_t rateClass) const
416 {
417  if (!node->hasFather())
418  {
419  cerr << "DEBUG: NonHomogeneousSequenceSimulator::evolveInternal. Forbidden call of method on root node." << endl;
420  return;
421  }
422  node->getInfos().state = evolve(node, node->getFather()->getInfos().state, rateClass);
423  for (size_t i = 0; i < node->getNumberOfSons(); i++)
424  {
425  evolveInternal(node->getSon(i), rateClass);
426  }
427 }
428 
429 /******************************************************************************/
431 {
432  if (!node->hasFather())
433  {
434  cerr << "DEBUG: NonHomogeneousSequenceSimulator::evolveInternal. Forbidden call of method on root node." << endl;
435  return;
436  }
437  node->getInfos().state = evolve(node, node->getFather()->getInfos().state, rate);
438  for (size_t i = 0; i < node->getNumberOfSons(); i++)
439  {
440  evolveInternal(node->getSon(i), rate);
441  }
442 }
443 
444 /******************************************************************************/
445 void NonHomogeneousSequenceSimulator::multipleEvolveInternal(SNode* node, const vector<size_t>& rateClasses) const
446 {
447  if (!node->hasFather())
448  {
449  cerr << "DEBUG: NonHomogeneousSequenceSimulator::multipleEvolveInternal. Forbidden call of method on root node." << endl;
450  return;
451  }
452  const vector<int>* initialStates = &node->getFather()->getInfos().states;
453  size_t n = initialStates->size();
454  node->getInfos().states.resize(n); // allocation.
455  multipleEvolve(node, node->getFather()->getInfos().states, rateClasses, node->getInfos().states);
456  for (size_t i = 0; i < node->getNumberOfSons(); i++)
457  {
458  multipleEvolveInternal(node->getSon(i), rateClasses);
459  }
460 }
461 
462 /******************************************************************************/
463 SiteContainer* NonHomogeneousSequenceSimulator::multipleEvolve(const Vint& initialStates, const vector<size_t>& rateClasses) const
464 {
465  // Launch recursion:
466  SNode* root = tree_.getRootNode();
467  root->getInfos().states = initialStates;
468  for (size_t i = 0; i < root->getNumberOfSons(); i++)
469  {
470  multipleEvolveInternal(root->getSon(i), rateClasses);
471  }
472  // Now create a SiteContainer object:
474  size_t n = leaves_.size();
475  size_t nbSites = initialStates.size();
476  const SubstitutionModel* model = 0;
477  for (size_t i = 0; i < n; i++)
478  {
479  vector<int> content(nbSites);
480  vector<int>* states = &leaves_[i]->getInfos().states;
481  model = leaves_[i]->getInfos().model;
482  for (size_t j = 0; j < nbSites; j++)
483  {
484  content[j] = model->getAlphabetChar((*states)[j]);
485  }
486  sites->addSequence(BasicSequence(leaves_[i]->getName(), content, alphabet_), false);
487  }
488  return sites;
489 }
490 
491 /******************************************************************************/
492 void NonHomogeneousSequenceSimulator::dEvolve(int initialState, double rate, RASiteSimulationResult& rassr) const
493 {
494  // Launch recursion:
495  SNode* root = tree_.getRootNode();
496  root->getInfos().state = initialState;
497  for (size_t i = 0; i < root->getNumberOfSons(); i++)
498  {
499  dEvolveInternal(root->getSon(i), rate, rassr);
500  }
501 }
502 
503 /******************************************************************************/
505 {
506  if (!node->hasFather())
507  {
508  cerr << "DEBUG: NonHomogeneousSequenceSimulator::evolveInternal. Forbidden call of method on root node." << endl;
509  return;
510  }
511  SimpleMutationProcess process(node->getInfos().model);
512  MutationPath mp = process.detailedEvolve(node->getFather()->getInfos().state, node->getDistanceToFather() * rate);
513  node->getInfos().state = mp.getFinalState();
514 
515  // Now append infos in rassr:
516  rassr.addNode(node->getId(), mp);
517 
518  // Now jump to son nodes:
519  for (size_t i = 0; i < node->getNumberOfSons(); i++)
520  {
521  dEvolveInternal(node->getSon(i), rate, rassr);
522  }
523 }
524 
525 /******************************************************************************/
526