bpp-phyl  2.4.0
AbstractNonHomogeneousTreeLikelihood.cpp
Go to the documentation of this file.
1 //
2 // File: AbstractNonHomogeneousTreeLikelihood.cpp
3 // Created by: Julien Dutheil
4 // Created on: Tue Oct 09 16:07 2007
5 // From file: AbstractHomogeneousTreeLikelihood.cpp
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for phylogenetic data analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  encouraged to load and test the software's suitability as regards their
31  requirements in conditions enabling the security of their systems and/or
32  data to be ensured and, more generally, to use and operate it in the
33  same conditions as regards security.
34 
35  The fact that you are presently reading this means that you have had
36  knowledge of the CeCILL license and that you accept its terms.
37 */
38 
40 #include "../PatternTools.h"
41 
42 //From SeqLib:
43 #include <Bpp/Seq/SiteTools.h>
45 
46 #include <Bpp/Text/TextTools.h>
48 
49 using namespace bpp;
50 
51 // From the STL:
52 #include <iostream>
53 
54 using namespace std;
55 
56 /******************************************************************************/
57 
59  const Tree& tree,
60  SubstitutionModelSet* modelSet,
61  DiscreteDistribution* rDist,
62  bool verbose,
63  bool reparametrizeRoot) :
65  modelSet_(0),
66  brLenParameters_(),
67  pxy_(),
68  dpxy_(),
69  d2pxy_(),
70  rootFreqs_(),
71  nodes_(),
72  idToNode_(),
73  nbSites_(),
74  nbDistinctSites_(),
75  nbClasses_(),
76  nbStates_(),
77  nbNodes_(),
78  verbose_(),
79  minimumBrLen_(),
80  maximumBrLen_(),
81  brLenConstraint_(),
82  reparametrizeRoot_(reparametrizeRoot),
83  root1_(),
84  root2_()
85 {
86  init_(tree, modelSet, rDist, verbose);
87 }
88 
89 /******************************************************************************/
90 
94  modelSet_(lik.modelSet_),
96  pxy_(lik.pxy_),
97  dpxy_(lik.dpxy_),
98  d2pxy_(lik.d2pxy_),
100  nodes_(),
101  idToNode_(),
102  nbSites_(lik.nbSites_),
104  nbClasses_(lik.nbClasses_),
105  nbStates_(lik.nbStates_),
106  nbNodes_(lik.nbNodes_),
107  verbose_(lik.verbose_),
110  brLenConstraint_(dynamic_cast<Constraint*>(lik.brLenConstraint_->clone())),
112  root1_(lik.root1_),
113  root2_(lik.root2_)
114 {
115  nodes_ = tree_->getNodes();
116  nodes_.pop_back(); //Remove the root node (the last added!).
117  //Rebuild nodes index:
118  for (unsigned int i = 0; i < nodes_.size(); i++)
119  {
120  const Node* node = nodes_[i];
121  idToNode_[node->getId()] = node;
122  }
123 }
124 
125 /******************************************************************************/
126 
129 {
131  modelSet_ = lik.modelSet_;
133  pxy_ = lik.pxy_;
134  dpxy_ = lik.dpxy_;
135  d2pxy_ = lik.d2pxy_;
136  rootFreqs_ = lik.rootFreqs_;
137  nodes_ = tree_->getNodes();
138  nodes_.pop_back(); //Remove the root node (the last added!).
139  nbSites_ = lik.nbSites_;
141  nbClasses_ = lik.nbClasses_;
142  nbStates_ = lik.nbStates_;
143  nbNodes_ = lik.nbNodes_;
144  verbose_ = lik.verbose_;
147  if (brLenConstraint_.get()) brLenConstraint_.release();
148  brLenConstraint_.reset(lik.brLenConstraint_->clone());
150  root1_ = lik.root1_;
151  root2_ = lik.root2_;
152  //Rebuild nodes index:
153  for( unsigned int i = 0; i < nodes_.size(); i++)
154  {
155  const Node * node = nodes_[i];
156  idToNode_[node->getId()] = node;
157  }
158  return *this;
159 }
160 
161 /******************************************************************************/
162 
164  const Tree& tree,
165  SubstitutionModelSet* modelSet,
166  DiscreteDistribution* rDist,
167  bool verbose)
168 {
169  TreeTools::checkIds(tree, true);
170  tree_ = new TreeTemplate<Node>(tree);
171  root1_ = tree_->getRootNode()->getSon(0)->getId();
172  root2_ = tree_->getRootNode()->getSon(1)->getId();
173  nodes_ = tree_->getNodes();
174  nodes_.pop_back(); //Remove the root node (the last added!).
175  nbNodes_ = nodes_.size();
176  //Build nodes index:
177  for (unsigned int i = 0; i < nodes_.size(); i++)
178  {
179  const Node * node = nodes_[i];
180  idToNode_[node->getId()] = node;
181  }
183 
184  verbose_ = verbose;
185 
186  minimumBrLen_ = 0.000001;
187  maximumBrLen_ = 10000;
189  setSubstitutionModelSet(modelSet);
190 }
191 
192 /******************************************************************************/
193 
195 {
196  //Check:
197  if (data_)
198  {
199  if (modelSet->getAlphabet()->getAlphabetType() != data_->getAlphabet()->getAlphabetType())
200  throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::setSubstitutionModelSet(). Model alphabet do not match existing data.");
201  }
202 
203  modelSet_ = modelSet;
204 
205  if (data_)
206  {
207  if (modelSet->getNumberOfStates() != modelSet_->getNumberOfStates())
208  setData(*data_); //Have to reinitialize the whole data structure.
209  }
210 
211  nbStates_ = modelSet->getNumberOfStates();
212 
213  //Allocate transition probabilities arrays:
214  for (unsigned int l = 0; l < nbNodes_; l++)
215  {
216  //For each son node,i
217  Node* son = nodes_[l];
218 
219  VVVdouble* pxy__son = & pxy_[son->getId()];
220  pxy__son->resize(nbClasses_);
221  for (unsigned int c = 0; c < nbClasses_; c++)
222  {
223  VVdouble * pxy__son_c = & (* pxy__son)[c];
224  pxy__son_c->resize(nbStates_);
225  for(unsigned int x = 0; x < nbStates_; x++)
226  {
227  (*pxy__son_c)[x].resize(nbStates_);
228  }
229  }
230 
231  VVVdouble* dpxy__son = & dpxy_[son->getId()];
232  dpxy__son->resize(nbClasses_);
233  for (unsigned int c = 0; c < nbClasses_; c++)
234  {
235  VVdouble * dpxy__son_c = & (* dpxy__son)[c];
236  dpxy__son_c->resize(nbStates_);
237  for(unsigned int x = 0; x < nbStates_; x++)
238  {
239  (* dpxy__son_c)[x].resize(nbStates_);
240  }
241  }
242 
243  VVVdouble* d2pxy__son = & d2pxy_[son->getId()];
244  d2pxy__son->resize(nbClasses_);
245  for (unsigned int c = 0; c < nbClasses_; c++)
246  {
247  VVdouble * d2pxy__son_c = & (* d2pxy__son)[c];
248  d2pxy__son_c->resize(nbStates_);
249  for(unsigned int x = 0; x < nbStates_; x++)
250  {
251  (* d2pxy__son_c)[x].resize(nbStates_);
252  }
253  }
254  }
255 
256  //We have to reset parameters. If the instance is not initialized, this will be done by the initialize method.
257  if (initialized_)
258  {
259  initParameters();
262  }
263 }
264 
265 /******************************************************************************/
266 
268 {
269  if (initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::initialize(). Object is already initialized.");
270  if (!data_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::initialize(). Data are no set.");
271  initParameters();
272  initialized_ = true;
275 }
276 
277 /******************************************************************************/
278 
280 {
281  if (!initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::getBranchLengthsParameters(). Object is not initialized.");
283 }
284 
285 /******************************************************************************/
286 
288 {
289  if(!initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::getSubstitutionModelParameters(). Object is not initialized.");
291 }
292 
293 /******************************************************************************/
294 
296 {
297  // Reset parameters:
299 
300  // Branch lengths:
303 
304  // Substitution model:
306 
307  // Rate distribution:
309 }
310 
311 /******************************************************************************/
312 
314 {
315  if (!initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::applyParameters(). Object not initialized.");
316  //Apply branch lengths:
317  for (unsigned int i = 0; i < nbNodes_; i++)
318  {
319  int id = nodes_[i]->getId();
320  if (reparametrizeRoot_ && id == root1_)
321  {
322  const Parameter* rootBrLen = &getParameter("BrLenRoot");
323  const Parameter* rootPos = &getParameter("RootPosition");
324  nodes_[i]->setDistanceToFather(rootBrLen->getValue() * rootPos->getValue());
325  }
326  else if (reparametrizeRoot_ && id == root2_)
327  {
328  const Parameter* rootBrLen = &getParameter("BrLenRoot");
329  const Parameter* rootPos = &getParameter("RootPosition");
330  nodes_[i]->setDistanceToFather(rootBrLen->getValue() * (1. - rootPos->getValue()));
331  }
332  else
333  {
334  const Parameter* brLen = &getParameter(string("BrLen") + TextTools::toString(i));
335  if (brLen) nodes_[i]->setDistanceToFather(brLen->getValue());
336  }
337  }
338  //Apply substitution model parameters:
339 
341  //Apply rate distribution parameters:
343 }
344 
345 /******************************************************************************/
346 
348 {
350  double l1 = 0, l2 = 0;
351  for (unsigned int i = 0; i < nbNodes_; i++)
352  {
353  double d = minimumBrLen_;
354  if (!nodes_[i]->hasDistanceToFather())
355  {
356  if (verbose)
357  ApplicationTools::displayWarning("Missing branch length " + TextTools::toString(i) + ". Value is set to " + TextTools::toString(minimumBrLen_));
358  nodes_[i]->setDistanceToFather(minimumBrLen_);
359  }
360  else
361  {
362  d = nodes_[i]->getDistanceToFather();
363  if (d < minimumBrLen_)
364  {
365  if (verbose)
366  ApplicationTools::displayWarning("Branch length " + TextTools::toString(i) + " is too small: " + TextTools::toString(d) + ". Value is set to " + TextTools::toString(minimumBrLen_));
367  nodes_[i]->setDistanceToFather(minimumBrLen_);
368  d = minimumBrLen_;
369  }
370  if (d > maximumBrLen_)
371  {
372  if (verbose)
373  ApplicationTools::displayWarning("Branch length " + TextTools::toString(i) + " is too big: " + TextTools::toString(d) + ". Value is set to " + TextTools::toString(maximumBrLen_));
374  nodes_[i]->setDistanceToFather(maximumBrLen_);
375  d = maximumBrLen_;
376  }
377  }
378  if (reparametrizeRoot_ && nodes_[i]->getId() == root1_)
379  l1 = d;
380  else if (reparametrizeRoot_ && nodes_[i]->getId() == root2_)
381  l2 = d;
382  else
383  {
384  brLenParameters_.addParameter(Parameter("BrLen" + TextTools::toString(i), d, brLenConstraint_->clone(), true)); //Attach constraint to avoid clonage problems!
385  }
386  }
387  if (reparametrizeRoot_) {
388  brLenParameters_.addParameter(Parameter("BrLenRoot", l1 + l2, brLenConstraint_->clone(), true));
389  brLenParameters_.addParameter(Parameter("RootPosition", l1 / (l1 + l2), &Parameter::PROP_CONSTRAINT_EX));
390  }
391 }
392 
393 /*******************************************************************************/
394 
396 {
397  for(unsigned int l = 0; l < nbNodes_; l++)
398  {
399  //For each node,
400  Node * node = nodes_[l];
402  }
404 }
405 
406 /*******************************************************************************/
407 
409 {
410  const TransitionModel* model = modelSet_->getModelForNode(node->getId());
411  double l = node->getDistanceToFather();
412 
413  //Computes all pxy and pyx once for all:
414  VVVdouble * pxy__node = & pxy_[node->getId()];
415  for(unsigned int c = 0; c < nbClasses_; c++)
416  {
417  VVdouble * pxy__node_c = & (* pxy__node)[c];
419  for(unsigned int x = 0; x < nbStates_; x++)
420  {
421  Vdouble * pxy__node_c_x = & (* pxy__node_c)[x];
422  for(unsigned int y = 0; y < nbStates_; y++)
423  {
424  (* pxy__node_c_x)[y] = Q(x, y);
425  }
426  }
427  }
428 
430  {
431  //Computes all dpxy/dt once for all:
432  VVVdouble * dpxy__node = & dpxy_[node->getId()];
433 
434  for(unsigned int c = 0; c < nbClasses_; c++)
435  {
436  VVdouble * dpxy__node_c = & (* dpxy__node)[c];
437  double rc = rateDistribution_->getCategory(c);
438 
439  RowMatrix<double> dQ = model->getdPij_dt(l * rc);
440 
441  for(unsigned int x = 0; x < nbStates_; x++)
442  {
443  Vdouble * dpxy__node_c_x = & (* dpxy__node_c)[x];
444  for(unsigned int y = 0; y < nbStates_; y++)
445  (* dpxy__node_c_x)[y] = rc * dQ(x, y);
446  }
447  }
448  }
449 
451  {
452  //Computes all d2pxy/dt2 once for all:
453  VVVdouble * d2pxy__node = & d2pxy_[node->getId()];
454  for(unsigned int c = 0; c < nbClasses_; c++)
455  {
456  VVdouble * d2pxy__node_c = & (* d2pxy__node)[c];
457  double rc = rateDistribution_->getCategory(c);
458  RowMatrix<double> d2Q = model->getd2Pij_dt2(l * rc);
459  for(unsigned int x = 0; x < nbStates_; x++)
460  {
461  Vdouble * d2pxy__node_c_x = & (* d2pxy__node_c)[x];
462  for(unsigned int y = 0; y < nbStates_; y++)
463  {
464  (* d2pxy__node_c_x)[y] = rc * rc * d2Q(x, y);
465  }
466  }
467  }
468  }
469 }
470 
471 /*******************************************************************************/
472 
const ParameterList & getIndependentParameters() const
virtual bool matchParametersValues(const ParameterList &parameters)=0
virtual void setData(const SiteContainer &sites)=0
Set the dataset for which the likelihood must be evaluated.
Substitution models manager for non-homogeneous / non-reversible models of evolution.
AbstractNonHomogeneousTreeLikelihood(const Tree &tree, SubstitutionModelSet *modelSet, DiscreteDistribution *rDist, bool verbose=true, bool reparametrizeRoot=true)
Partial implementation for branch non-homogeneous models of the TreeLikelihood interface.
static void displayWarning(const std::string &text)
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
Definition: Node.h:283
std::vector< Node * > nodes_
Pointer toward all nodes in the tree.
size_t getNumberOfStates() const
Get the number of states associated to this model set.
virtual size_t getNumberOfCategories() const =0
Partial implementation of the DiscreteRatesAcrossSitesTreeLikelihood interface.
virtual void reset()
virtual const Matrix< double > & getd2Pij_dt2(double t) const =0
virtual void computeAllTransitionProbabilities()
Fill the pxy_, dpxy_ and d2pxy_ arrays for all nodes.
virtual double getValue() const
ParameterList getBranchLengthsParameters() const
Get the branch lengths parameters.
static const IntervalConstraint PROP_CONSTRAINT_EX
virtual void applyParameters()
All parameters are stored in a parameter list. This function apply these parameters to the substituti...
STL namespace.
const TransitionModel * getModelForNode(int nodeId) const
Get the model associated to a particular node id.
ParameterList getSubstitutionModelParameters() const
Get the parameters associated to substitution model(s).
The phylogenetic tree class.
Interface for phylogenetic tree objects.
Definition: Tree.h:148
virtual int getId() const
Get the node&#39;s id.
Definition: Node.h:203
virtual const Matrix< double > & getPij_t(double t) const =0
virtual void fireParameterChanged(const ParameterList &parameters)
virtual const Matrix< double > & getdPij_dt(double t) const =0
const Alphabet * getAlphabet() const
AbstractNonHomogeneousTreeLikelihood & operator=(const AbstractNonHomogeneousTreeLikelihood &lik)
Assignation operator.
static bool checkIds(const Tree &tree, bool throwException)
Check if the ids are uniques.
Definition: TreeTools.cpp:778
virtual void initParameters()
This builds the parameters list from all parametrizable objects, i.e. substitution model...
virtual double getCategory(size_t categoryIndex) const =0
std::vector< double > Vdouble
bool matchParametersValues(const ParameterList &parameters)
virtual void computeTransitionProbabilitiesForNode(const Node *node)
Fill the pxy_, dpxy_ and d2pxy_ arrays for one node.
virtual const ParameterList & getIndependentParameters() const =0
std::map< int, const Node * > idToNode_
An index linking nodes to their id, for faster access than the getNode() method.
AbstractDiscreteRatesAcrossSitesTreeLikelihood & operator=(const AbstractDiscreteRatesAcrossSitesTreeLikelihood &tl)
The phylogenetic node class.
Definition: Node.h:90
virtual ParameterList getCommonParametersWith(const ParameterList &params) const
virtual void addParameter(const Parameter &param)
std::vector< double > getRootFrequencies() const
const ParameterList & getParameters() const
const Parameter & getParameter(const std::string &name) const
virtual const Alphabet * getAlphabet() const =0
std::string toString(T t)
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble
void init_(const Tree &tree, SubstitutionModelSet *modelSet, DiscreteDistribution *rDist, bool verbose)
Method called by constructor.
virtual void addParameters_(const ParameterList &parameters)
NonHomogeneousTreeLikelihood * clone() const =0
virtual std::string getAlphabetType() const =0