bpp-phyl  2.1.0
Bpp/Phyl/Likelihood/AbstractNonHomogeneousTreeLikelihood.cpp
Go to the documentation of this file.
00001 //
00002 // File: AbstractNonHomogeneousTreeLikelihood.cpp
00003 // Created by: Julien Dutheil
00004 // Created on: Tue Oct 09 16:07 2007
00005 // From file: AbstractHomogeneousTreeLikelihood.cpp
00006 //
00007 
00008 /*
00009   Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
00010 
00011   This software is a computer program whose purpose is to provide classes
00012   for phylogenetic data analysis.
00013 
00014   This software is governed by the CeCILL  license under French law and
00015   abiding by the rules of distribution of free software.  You can  use, 
00016   modify and/ or redistribute the software under the terms of the CeCILL
00017   license as circulated by CEA, CNRS and INRIA at the following URL
00018   "http://www.cecill.info". 
00019 
00020   As a counterpart to the access to the source code and  rights to copy,
00021   modify and redistribute granted by the license, users are provided only
00022   with a limited warranty  and the software's author,  the holder of the
00023   economic rights,  and the successive licensors  have only  limited
00024   liability. 
00025 
00026   In this respect, the user's attention is drawn to the risks associated
00027   with loading,  using,  modifying and/or developing or reproducing the
00028   software by the user in light of its specific status of free software,
00029   that may mean  that it is complicated to manipulate,  and  that  also
00030   encouraged to load and test the software's suitability as regards their
00031   requirements in conditions enabling the security of their systems and/or 
00032   data to be ensured and,  more generally, to use and operate it in the 
00033   same conditions as regards security. 
00034 
00035   The fact that you are presently reading this means that you have had
00036   knowledge of the CeCILL license and that you accept its terms.
00037 */
00038 
00039 #include "AbstractNonHomogeneousTreeLikelihood.h"
00040 #include "../PatternTools.h"
00041 
00042 //From SeqLib:
00043 #include <Bpp/Seq/SiteTools.h>
00044 #include <Bpp/Seq/Container/SequenceContainerTools.h>
00045 
00046 #include <Bpp/Text/TextTools.h>
00047 #include <Bpp/App/ApplicationTools.h>
00048 
00049 using namespace bpp;
00050 
00051 // From the STL:
00052 #include <iostream>
00053 
00054 using namespace std;
00055 
00056 /******************************************************************************/
00057 
00058 AbstractNonHomogeneousTreeLikelihood::AbstractNonHomogeneousTreeLikelihood(
00059                                                                            const Tree& tree,
00060                                                                            SubstitutionModelSet* modelSet,
00061                                                                            DiscreteDistribution* rDist,
00062                                                                            bool verbose,
00063                                                                            bool reparametrizeRoot)
00064   throw (Exception) :
00065   AbstractDiscreteRatesAcrossSitesTreeLikelihood(rDist, verbose),
00066   modelSet_(0),
00067   brLenParameters_(),
00068   pxy_(),
00069   dpxy_(),
00070   d2pxy_(),
00071   rootFreqs_(),
00072   nodes_(),
00073   idToNode_(),
00074   nbSites_(),
00075   nbDistinctSites_(),
00076   nbClasses_(),
00077   nbStates_(),
00078   nbNodes_(),
00079   verbose_(),
00080   minimumBrLen_(),
00081   maximumBrLen_(),
00082   brLenConstraint_(0),
00083   reparametrizeRoot_(reparametrizeRoot),
00084   root1_(),
00085   root2_()
00086 {
00087   init_(tree, modelSet, rDist, verbose);
00088 }
00089 
00090 /******************************************************************************/
00091 
00092 AbstractNonHomogeneousTreeLikelihood::AbstractNonHomogeneousTreeLikelihood(
00093                                                                            const AbstractNonHomogeneousTreeLikelihood& lik) :
00094   AbstractDiscreteRatesAcrossSitesTreeLikelihood(lik),
00095   modelSet_(lik.modelSet_),
00096   brLenParameters_(lik.brLenParameters_),
00097   pxy_(lik.pxy_),
00098   dpxy_(lik.dpxy_),
00099   d2pxy_(lik.d2pxy_),
00100   rootFreqs_(lik.rootFreqs_),
00101   nodes_(),
00102   idToNode_(),
00103   nbSites_(lik.nbSites_),
00104   nbDistinctSites_(lik.nbDistinctSites_),
00105   nbClasses_(lik.nbClasses_),
00106   nbStates_(lik.nbStates_),
00107   nbNodes_(lik.nbNodes_),
00108   verbose_(lik.verbose_),
00109   minimumBrLen_(lik.minimumBrLen_),
00110   maximumBrLen_(lik.maximumBrLen_),
00111   brLenConstraint_(dynamic_cast<Constraint*>(lik.brLenConstraint_->clone())),
00112   reparametrizeRoot_(lik.reparametrizeRoot_),
00113   root1_(lik.root1_),
00114   root2_(lik.root2_)
00115 { 
00116   nodes_ = tree_->getNodes();
00117   nodes_.pop_back(); //Remove the root node (the last added!).  
00118   //Rebuild nodes index:
00119   for (unsigned int i = 0; i < nodes_.size(); i++)
00120     {
00121       const Node* node = nodes_[i];
00122       idToNode_[node->getId()] = node;
00123     }
00124 }
00125 
00126 /******************************************************************************/
00127 
00128 AbstractNonHomogeneousTreeLikelihood& AbstractNonHomogeneousTreeLikelihood::operator=(
00129                                                                                       const AbstractNonHomogeneousTreeLikelihood& lik)
00130 {
00131   AbstractDiscreteRatesAcrossSitesTreeLikelihood::operator=(lik);
00132   modelSet_          = lik.modelSet_;
00133   brLenParameters_   = lik.brLenParameters_;
00134   pxy_               = lik.pxy_;
00135   dpxy_              = lik.dpxy_;
00136   d2pxy_             = lik.d2pxy_;
00137   rootFreqs_         = lik.rootFreqs_;
00138   nodes_             = tree_->getNodes();
00139   nodes_.pop_back(); //Remove the root node (the last added!).  
00140   nbSites_           = lik.nbSites_;
00141   nbDistinctSites_   = lik.nbDistinctSites_;
00142   nbClasses_         = lik.nbClasses_;
00143   nbStates_          = lik.nbStates_;
00144   nbNodes_           = lik.nbNodes_;
00145   verbose_           = lik.verbose_;
00146   minimumBrLen_      = lik.minimumBrLen_;
00147   maximumBrLen_      = lik.maximumBrLen_;
00148   if (brLenConstraint_.get()) brLenConstraint_.release();
00149   brLenConstraint_.reset(lik.brLenConstraint_->clone());
00150   reparametrizeRoot_ = lik.reparametrizeRoot_;
00151   root1_             = lik.root1_;
00152   root2_             = lik.root2_;
00153   //Rebuild nodes index:
00154   for( unsigned int i = 0; i < nodes_.size(); i++)
00155     {
00156       const Node * node = nodes_[i];
00157       idToNode_[node->getId()] = node;
00158     }
00159   return *this;
00160 }
00161 
00162 /******************************************************************************/
00163 
00164 void AbstractNonHomogeneousTreeLikelihood::init_(
00165                                                  const Tree& tree,
00166                                                  SubstitutionModelSet* modelSet,
00167                                                  DiscreteDistribution* rDist,
00168                                                  bool verbose) throw (Exception)
00169 {
00170   TreeTools::checkIds(tree, true);
00171   tree_ = new TreeTemplate<Node>(tree);
00172   root1_ = tree_->getRootNode()->getSon(0)->getId();
00173   root2_ = tree_->getRootNode()->getSon(1)->getId();
00174   nodes_ = tree_->getNodes();
00175   nodes_.pop_back(); //Remove the root node (the last added!).  
00176   nbNodes_ = nodes_.size();
00177   //Build nodes index:
00178   for (unsigned int i = 0; i < nodes_.size(); i++)
00179     {
00180       const Node * node = nodes_[i];
00181       idToNode_[node->getId()] = node;
00182     }
00183   nbClasses_ = rateDistribution_->getNumberOfCategories();
00184 
00185   verbose_ = verbose;
00186 
00187   minimumBrLen_ = 0.000001;
00188   maximumBrLen_ = 10000;
00189   brLenConstraint_.reset(new IntervalConstraint(minimumBrLen_, maximumBrLen_, true, true));
00190   setSubstitutionModelSet(modelSet);
00191 }
00192 
00193 /******************************************************************************/
00194 
00195 void AbstractNonHomogeneousTreeLikelihood::setSubstitutionModelSet(SubstitutionModelSet* modelSet) throw (Exception)
00196 {
00197   //Check:
00198   if (data_)
00199     {
00200       if (modelSet->getAlphabet()->getAlphabetType() != data_->getAlphabet()->getAlphabetType())
00201         throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::setSubstitutionModelSet(). Model alphabet do not match existing data.");
00202     }
00203 
00204   modelSet_ = modelSet;
00205   
00206   if (data_)
00207     {
00208       if (modelSet->getNumberOfStates() != modelSet_->getNumberOfStates())
00209         setData(*data_); //Have to reinitialize the whole data structure.
00210     }
00211   
00212   nbStates_ = modelSet->getNumberOfStates();
00213 
00214   //Allocate transition probabilities arrays:
00215   for (unsigned int l = 0; l < nbNodes_; l++)
00216     {
00217       //For each son node,i
00218       Node* son = nodes_[l];
00219 
00220       VVVdouble* pxy__son = & pxy_[son->getId()];
00221       pxy__son->resize(nbClasses_);
00222       for (unsigned int c = 0; c < nbClasses_; c++)
00223         {
00224           VVdouble * pxy__son_c = & (* pxy__son)[c];
00225           pxy__son_c->resize(nbStates_);
00226           for(unsigned int x = 0; x < nbStates_; x++)
00227             {
00228               (*pxy__son_c)[x].resize(nbStates_);
00229             }
00230         }
00231   
00232       VVVdouble* dpxy__son = & dpxy_[son->getId()];
00233       dpxy__son->resize(nbClasses_);
00234       for (unsigned int c = 0; c < nbClasses_; c++)
00235         {
00236           VVdouble * dpxy__son_c = & (* dpxy__son)[c];
00237           dpxy__son_c->resize(nbStates_);
00238           for(unsigned int x = 0; x < nbStates_; x++)
00239             {
00240               (* dpxy__son_c)[x].resize(nbStates_);
00241             }
00242         }
00243       
00244       VVVdouble* d2pxy__son = & d2pxy_[son->getId()];
00245       d2pxy__son->resize(nbClasses_);
00246       for (unsigned int c = 0; c < nbClasses_; c++)
00247         {
00248           VVdouble * d2pxy__son_c = & (* d2pxy__son)[c];
00249           d2pxy__son_c->resize(nbStates_);
00250           for(unsigned int x = 0; x < nbStates_; x++)
00251             {
00252               (* d2pxy__son_c)[x].resize(nbStates_);
00253             }
00254         }
00255     }
00256 
00257   //We have to reset parameters. If the instance is not initialized, this will be done by the initialize method.
00258   if (initialized_) 
00259     {
00260       initParameters();
00261       computeAllTransitionProbabilities();
00262       fireParameterChanged(getParameters());
00263     }
00264 }
00265 
00266 /******************************************************************************/
00267 
00268 void AbstractNonHomogeneousTreeLikelihood::initialize() throw (Exception)
00269 {
00270   if (initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::initialize(). Object is already initialized.");
00271   if (!data_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::initialize(). Data are no set.");
00272   initParameters();
00273   initialized_ = true;
00274   computeAllTransitionProbabilities();
00275   fireParameterChanged(getParameters());
00276 }
00277 
00278 /******************************************************************************/
00279 
00280 ParameterList AbstractNonHomogeneousTreeLikelihood::getBranchLengthsParameters() const
00281 {
00282   if (!initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::getBranchLengthsParameters(). Object is not initialized.");
00283   return brLenParameters_.getCommonParametersWith(getParameters());
00284 }
00285 
00286 /******************************************************************************/
00287 
00288 ParameterList AbstractNonHomogeneousTreeLikelihood::getSubstitutionModelParameters() const
00289 {
00290   if(!initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::getSubstitutionModelParameters(). Object is not initialized.");
00291   return modelSet_->getParameters().getCommonParametersWith(getParameters());
00292 }
00293 
00294 /******************************************************************************/
00295 
00296 void AbstractNonHomogeneousTreeLikelihood::initParameters()
00297 {
00298   // Reset parameters:
00299   resetParameters_();
00300   
00301   // Branch lengths:
00302   initBranchLengthsParameters();
00303   addParameters_(brLenParameters_);
00304   
00305   // Substitution model:
00306   addParameters_(modelSet_->getIndependentParameters());
00307   
00308   // Rate distribution:
00309   addParameters_(rateDistribution_->getIndependentParameters());
00310 }
00311 
00312 /******************************************************************************/
00313 
00314 void AbstractNonHomogeneousTreeLikelihood::applyParameters() throw (Exception)
00315 {
00316   if (!initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::applyParameters(). Object not initialized.");
00317   //Apply branch lengths:
00318   for (unsigned int i = 0; i < nbNodes_; i++)
00319     {
00320       int id = nodes_[i]->getId();
00321       if (reparametrizeRoot_ && id == root1_)
00322         {
00323           const Parameter* rootBrLen = &getParameter("BrLenRoot");
00324           const Parameter* rootPos = &getParameter("RootPosition");
00325           nodes_[i]->setDistanceToFather(rootBrLen->getValue() * rootPos->getValue());
00326         }
00327       else if (reparametrizeRoot_ && id == root2_)
00328         {
00329           const Parameter* rootBrLen = &getParameter("BrLenRoot");
00330           const Parameter* rootPos = &getParameter("RootPosition");
00331           nodes_[i]->setDistanceToFather(rootBrLen->getValue() * (1. - rootPos->getValue()));
00332         }
00333       else
00334         {
00335           const Parameter* brLen = &getParameter(string("BrLen") + TextTools::toString(i));
00336           if (brLen) nodes_[i]->setDistanceToFather(brLen->getValue());
00337         }
00338     }
00339   //Apply substitution model parameters:
00340 
00341   modelSet_->matchParametersValues(getParameters());
00342   //Apply rate distribution parameters:
00343   rateDistribution_->matchParametersValues(getParameters());
00344 }
00345 
00346 /******************************************************************************/
00347 
00348 void AbstractNonHomogeneousTreeLikelihood::initBranchLengthsParameters()
00349 {
00350   brLenParameters_.reset();
00351   double l1 = 0, l2 = 0;
00352   for (unsigned int i = 0; i < nbNodes_; i++)
00353     {
00354       double d = minimumBrLen_;
00355       if (!nodes_[i]->hasDistanceToFather())
00356         {
00357           ApplicationTools::displayWarning("Missing branch length " + TextTools::toString(i) + ". Value is set to " + TextTools::toString(minimumBrLen_));
00358           nodes_[i]->setDistanceToFather(minimumBrLen_);
00359         }
00360       else
00361         {
00362           d = nodes_[i]->getDistanceToFather();
00363           if (d < minimumBrLen_)
00364             {
00365               ApplicationTools::displayWarning("Branch length " + TextTools::toString(i) + " is too small: " + TextTools::toString(d) + ". Value is set to " + TextTools::toString(minimumBrLen_));
00366               nodes_[i]->setDistanceToFather(minimumBrLen_);
00367               d = minimumBrLen_;
00368             }
00369           if (d > maximumBrLen_)
00370             {
00371               ApplicationTools::displayWarning("Branch length " + TextTools::toString(i) + " is too big: " + TextTools::toString(d) + ". Value is set to " + TextTools::toString(maximumBrLen_));
00372               nodes_[i]->setDistanceToFather(maximumBrLen_);
00373               d = maximumBrLen_;
00374             }
00375         }
00376       if (reparametrizeRoot_ && nodes_[i]->getId() == root1_)
00377         l1 = d;
00378       else if (reparametrizeRoot_ && nodes_[i]->getId() == root2_)
00379         l2 = d;
00380       else
00381         {
00382           brLenParameters_.addParameter(Parameter("BrLen" + TextTools::toString(i), d, brLenConstraint_->clone(), true)); //Attach constraint to avoid clonage problems!
00383         }
00384     }
00385   if (reparametrizeRoot_) {
00386     brLenParameters_.addParameter(Parameter("BrLenRoot", l1 + l2, brLenConstraint_->clone(), true));
00387     brLenParameters_.addParameter(Parameter("RootPosition", l1 / (l1 + l2), &Parameter::PROP_CONSTRAINT_EX));
00388   }
00389 }
00390 
00391 /*******************************************************************************/
00392 
00393 void AbstractNonHomogeneousTreeLikelihood::computeAllTransitionProbabilities()
00394 {
00395   for(unsigned int l = 0; l < nbNodes_; l++)
00396     {
00397       //For each node,
00398       Node * node = nodes_[l];
00399       computeTransitionProbabilitiesForNode(node);
00400     }
00401   rootFreqs_ = modelSet_->getRootFrequencies();
00402 }
00403 
00404 /*******************************************************************************/
00405 
00406 void AbstractNonHomogeneousTreeLikelihood::computeTransitionProbabilitiesForNode(const Node* node)
00407 {
00408   const SubstitutionModel* model = modelSet_->getModelForNode(node->getId());
00409   double l = node->getDistanceToFather(); 
00410 
00411   //Computes all pxy and pyx once for all:
00412   VVVdouble * pxy__node = & pxy_[node->getId()];
00413   for(unsigned int c = 0; c < nbClasses_; c++)
00414     {
00415       VVdouble * pxy__node_c = & (* pxy__node)[c];
00416       RowMatrix<double> Q = model->getPij_t(l * rateDistribution_->getCategory(c));
00417       for(unsigned int x = 0; x < nbStates_; x++)
00418         {
00419           Vdouble * pxy__node_c_x = & (* pxy__node_c)[x];
00420           for(unsigned int y = 0; y < nbStates_; y++)
00421             {
00422               (* pxy__node_c_x)[y] = Q(x, y);
00423             }
00424         }
00425     }
00426   
00427   if(computeFirstOrderDerivatives_)
00428     {
00429       //Computes all dpxy/dt once for all:
00430       VVVdouble * dpxy__node = & dpxy_[node->getId()];
00431 
00432       for(unsigned int c = 0; c < nbClasses_; c++)
00433         {
00434           VVdouble * dpxy__node_c = & (* dpxy__node)[c];
00435           double rc = rateDistribution_->getCategory(c);
00436 
00437           RowMatrix<double> dQ = model->getdPij_dt(l * rc);  
00438 
00439           for(unsigned int x = 0; x < nbStates_; x++)
00440             {
00441               Vdouble * dpxy__node_c_x = & (* dpxy__node_c)[x];
00442               for(unsigned int y = 0; y < nbStates_; y++)
00443                 (* dpxy__node_c_x)[y] = rc * dQ(x, y); 
00444             }
00445         }
00446     }
00447       
00448   if(computeSecondOrderDerivatives_)
00449     {
00450       //Computes all d2pxy/dt2 once for all:
00451       VVVdouble * d2pxy__node = & d2pxy_[node->getId()];
00452       for(unsigned int c = 0; c < nbClasses_; c++)
00453         {
00454           VVdouble * d2pxy__node_c = & (* d2pxy__node)[c];
00455           double rc =  rateDistribution_->getCategory(c);
00456           RowMatrix<double> d2Q = model->getd2Pij_dt2(l * rc);
00457           for(unsigned int x = 0; x < nbStates_; x++)
00458             {
00459               Vdouble * d2pxy__node_c_x = & (* d2pxy__node_c)[x];
00460               for(unsigned int y = 0; y < nbStates_; y++)
00461                 {
00462                   (* d2pxy__node_c_x)[y] = rc * rc * d2Q(x, y);
00463                 }
00464             }
00465         }
00466     }
00467 }
00468 
00469 /*******************************************************************************/
00470 
 All Classes Namespaces Files Functions Variables Typedefs Friends