|
bpp-phyl
2.1.0
|
00001 // 00002 // File: AbstractNonHomogeneousTreeLikelihood.cpp 00003 // Created by: Julien Dutheil 00004 // Created on: Tue Oct 09 16:07 2007 00005 // From file: AbstractHomogeneousTreeLikelihood.cpp 00006 // 00007 00008 /* 00009 Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) 00010 00011 This software is a computer program whose purpose is to provide classes 00012 for phylogenetic data analysis. 00013 00014 This software is governed by the CeCILL license under French law and 00015 abiding by the rules of distribution of free software. You can use, 00016 modify and/ or redistribute the software under the terms of the CeCILL 00017 license as circulated by CEA, CNRS and INRIA at the following URL 00018 "http://www.cecill.info". 00019 00020 As a counterpart to the access to the source code and rights to copy, 00021 modify and redistribute granted by the license, users are provided only 00022 with a limited warranty and the software's author, the holder of the 00023 economic rights, and the successive licensors have only limited 00024 liability. 00025 00026 In this respect, the user's attention is drawn to the risks associated 00027 with loading, using, modifying and/or developing or reproducing the 00028 software by the user in light of its specific status of free software, 00029 that may mean that it is complicated to manipulate, and that also 00030 encouraged to load and test the software's suitability as regards their 00031 requirements in conditions enabling the security of their systems and/or 00032 data to be ensured and, more generally, to use and operate it in the 00033 same conditions as regards security. 00034 00035 The fact that you are presently reading this means that you have had 00036 knowledge of the CeCILL license and that you accept its terms. 00037 */ 00038 00039 #include "AbstractNonHomogeneousTreeLikelihood.h" 00040 #include "../PatternTools.h" 00041 00042 //From SeqLib: 00043 #include <Bpp/Seq/SiteTools.h> 00044 #include <Bpp/Seq/Container/SequenceContainerTools.h> 00045 00046 #include <Bpp/Text/TextTools.h> 00047 #include <Bpp/App/ApplicationTools.h> 00048 00049 using namespace bpp; 00050 00051 // From the STL: 00052 #include <iostream> 00053 00054 using namespace std; 00055 00056 /******************************************************************************/ 00057 00058 AbstractNonHomogeneousTreeLikelihood::AbstractNonHomogeneousTreeLikelihood( 00059 const Tree& tree, 00060 SubstitutionModelSet* modelSet, 00061 DiscreteDistribution* rDist, 00062 bool verbose, 00063 bool reparametrizeRoot) 00064 throw (Exception) : 00065 AbstractDiscreteRatesAcrossSitesTreeLikelihood(rDist, verbose), 00066 modelSet_(0), 00067 brLenParameters_(), 00068 pxy_(), 00069 dpxy_(), 00070 d2pxy_(), 00071 rootFreqs_(), 00072 nodes_(), 00073 idToNode_(), 00074 nbSites_(), 00075 nbDistinctSites_(), 00076 nbClasses_(), 00077 nbStates_(), 00078 nbNodes_(), 00079 verbose_(), 00080 minimumBrLen_(), 00081 maximumBrLen_(), 00082 brLenConstraint_(0), 00083 reparametrizeRoot_(reparametrizeRoot), 00084 root1_(), 00085 root2_() 00086 { 00087 init_(tree, modelSet, rDist, verbose); 00088 } 00089 00090 /******************************************************************************/ 00091 00092 AbstractNonHomogeneousTreeLikelihood::AbstractNonHomogeneousTreeLikelihood( 00093 const AbstractNonHomogeneousTreeLikelihood& lik) : 00094 AbstractDiscreteRatesAcrossSitesTreeLikelihood(lik), 00095 modelSet_(lik.modelSet_), 00096 brLenParameters_(lik.brLenParameters_), 00097 pxy_(lik.pxy_), 00098 dpxy_(lik.dpxy_), 00099 d2pxy_(lik.d2pxy_), 00100 rootFreqs_(lik.rootFreqs_), 00101 nodes_(), 00102 idToNode_(), 00103 nbSites_(lik.nbSites_), 00104 nbDistinctSites_(lik.nbDistinctSites_), 00105 nbClasses_(lik.nbClasses_), 00106 nbStates_(lik.nbStates_), 00107 nbNodes_(lik.nbNodes_), 00108 verbose_(lik.verbose_), 00109 minimumBrLen_(lik.minimumBrLen_), 00110 maximumBrLen_(lik.maximumBrLen_), 00111 brLenConstraint_(dynamic_cast<Constraint*>(lik.brLenConstraint_->clone())), 00112 reparametrizeRoot_(lik.reparametrizeRoot_), 00113 root1_(lik.root1_), 00114 root2_(lik.root2_) 00115 { 00116 nodes_ = tree_->getNodes(); 00117 nodes_.pop_back(); //Remove the root node (the last added!). 00118 //Rebuild nodes index: 00119 for (unsigned int i = 0; i < nodes_.size(); i++) 00120 { 00121 const Node* node = nodes_[i]; 00122 idToNode_[node->getId()] = node; 00123 } 00124 } 00125 00126 /******************************************************************************/ 00127 00128 AbstractNonHomogeneousTreeLikelihood& AbstractNonHomogeneousTreeLikelihood::operator=( 00129 const AbstractNonHomogeneousTreeLikelihood& lik) 00130 { 00131 AbstractDiscreteRatesAcrossSitesTreeLikelihood::operator=(lik); 00132 modelSet_ = lik.modelSet_; 00133 brLenParameters_ = lik.brLenParameters_; 00134 pxy_ = lik.pxy_; 00135 dpxy_ = lik.dpxy_; 00136 d2pxy_ = lik.d2pxy_; 00137 rootFreqs_ = lik.rootFreqs_; 00138 nodes_ = tree_->getNodes(); 00139 nodes_.pop_back(); //Remove the root node (the last added!). 00140 nbSites_ = lik.nbSites_; 00141 nbDistinctSites_ = lik.nbDistinctSites_; 00142 nbClasses_ = lik.nbClasses_; 00143 nbStates_ = lik.nbStates_; 00144 nbNodes_ = lik.nbNodes_; 00145 verbose_ = lik.verbose_; 00146 minimumBrLen_ = lik.minimumBrLen_; 00147 maximumBrLen_ = lik.maximumBrLen_; 00148 if (brLenConstraint_.get()) brLenConstraint_.release(); 00149 brLenConstraint_.reset(lik.brLenConstraint_->clone()); 00150 reparametrizeRoot_ = lik.reparametrizeRoot_; 00151 root1_ = lik.root1_; 00152 root2_ = lik.root2_; 00153 //Rebuild nodes index: 00154 for( unsigned int i = 0; i < nodes_.size(); i++) 00155 { 00156 const Node * node = nodes_[i]; 00157 idToNode_[node->getId()] = node; 00158 } 00159 return *this; 00160 } 00161 00162 /******************************************************************************/ 00163 00164 void AbstractNonHomogeneousTreeLikelihood::init_( 00165 const Tree& tree, 00166 SubstitutionModelSet* modelSet, 00167 DiscreteDistribution* rDist, 00168 bool verbose) throw (Exception) 00169 { 00170 TreeTools::checkIds(tree, true); 00171 tree_ = new TreeTemplate<Node>(tree); 00172 root1_ = tree_->getRootNode()->getSon(0)->getId(); 00173 root2_ = tree_->getRootNode()->getSon(1)->getId(); 00174 nodes_ = tree_->getNodes(); 00175 nodes_.pop_back(); //Remove the root node (the last added!). 00176 nbNodes_ = nodes_.size(); 00177 //Build nodes index: 00178 for (unsigned int i = 0; i < nodes_.size(); i++) 00179 { 00180 const Node * node = nodes_[i]; 00181 idToNode_[node->getId()] = node; 00182 } 00183 nbClasses_ = rateDistribution_->getNumberOfCategories(); 00184 00185 verbose_ = verbose; 00186 00187 minimumBrLen_ = 0.000001; 00188 maximumBrLen_ = 10000; 00189 brLenConstraint_.reset(new IntervalConstraint(minimumBrLen_, maximumBrLen_, true, true)); 00190 setSubstitutionModelSet(modelSet); 00191 } 00192 00193 /******************************************************************************/ 00194 00195 void AbstractNonHomogeneousTreeLikelihood::setSubstitutionModelSet(SubstitutionModelSet* modelSet) throw (Exception) 00196 { 00197 //Check: 00198 if (data_) 00199 { 00200 if (modelSet->getAlphabet()->getAlphabetType() != data_->getAlphabet()->getAlphabetType()) 00201 throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::setSubstitutionModelSet(). Model alphabet do not match existing data."); 00202 } 00203 00204 modelSet_ = modelSet; 00205 00206 if (data_) 00207 { 00208 if (modelSet->getNumberOfStates() != modelSet_->getNumberOfStates()) 00209 setData(*data_); //Have to reinitialize the whole data structure. 00210 } 00211 00212 nbStates_ = modelSet->getNumberOfStates(); 00213 00214 //Allocate transition probabilities arrays: 00215 for (unsigned int l = 0; l < nbNodes_; l++) 00216 { 00217 //For each son node,i 00218 Node* son = nodes_[l]; 00219 00220 VVVdouble* pxy__son = & pxy_[son->getId()]; 00221 pxy__son->resize(nbClasses_); 00222 for (unsigned int c = 0; c < nbClasses_; c++) 00223 { 00224 VVdouble * pxy__son_c = & (* pxy__son)[c]; 00225 pxy__son_c->resize(nbStates_); 00226 for(unsigned int x = 0; x < nbStates_; x++) 00227 { 00228 (*pxy__son_c)[x].resize(nbStates_); 00229 } 00230 } 00231 00232 VVVdouble* dpxy__son = & dpxy_[son->getId()]; 00233 dpxy__son->resize(nbClasses_); 00234 for (unsigned int c = 0; c < nbClasses_; c++) 00235 { 00236 VVdouble * dpxy__son_c = & (* dpxy__son)[c]; 00237 dpxy__son_c->resize(nbStates_); 00238 for(unsigned int x = 0; x < nbStates_; x++) 00239 { 00240 (* dpxy__son_c)[x].resize(nbStates_); 00241 } 00242 } 00243 00244 VVVdouble* d2pxy__son = & d2pxy_[son->getId()]; 00245 d2pxy__son->resize(nbClasses_); 00246 for (unsigned int c = 0; c < nbClasses_; c++) 00247 { 00248 VVdouble * d2pxy__son_c = & (* d2pxy__son)[c]; 00249 d2pxy__son_c->resize(nbStates_); 00250 for(unsigned int x = 0; x < nbStates_; x++) 00251 { 00252 (* d2pxy__son_c)[x].resize(nbStates_); 00253 } 00254 } 00255 } 00256 00257 //We have to reset parameters. If the instance is not initialized, this will be done by the initialize method. 00258 if (initialized_) 00259 { 00260 initParameters(); 00261 computeAllTransitionProbabilities(); 00262 fireParameterChanged(getParameters()); 00263 } 00264 } 00265 00266 /******************************************************************************/ 00267 00268 void AbstractNonHomogeneousTreeLikelihood::initialize() throw (Exception) 00269 { 00270 if (initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::initialize(). Object is already initialized."); 00271 if (!data_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::initialize(). Data are no set."); 00272 initParameters(); 00273 initialized_ = true; 00274 computeAllTransitionProbabilities(); 00275 fireParameterChanged(getParameters()); 00276 } 00277 00278 /******************************************************************************/ 00279 00280 ParameterList AbstractNonHomogeneousTreeLikelihood::getBranchLengthsParameters() const 00281 { 00282 if (!initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::getBranchLengthsParameters(). Object is not initialized."); 00283 return brLenParameters_.getCommonParametersWith(getParameters()); 00284 } 00285 00286 /******************************************************************************/ 00287 00288 ParameterList AbstractNonHomogeneousTreeLikelihood::getSubstitutionModelParameters() const 00289 { 00290 if(!initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::getSubstitutionModelParameters(). Object is not initialized."); 00291 return modelSet_->getParameters().getCommonParametersWith(getParameters()); 00292 } 00293 00294 /******************************************************************************/ 00295 00296 void AbstractNonHomogeneousTreeLikelihood::initParameters() 00297 { 00298 // Reset parameters: 00299 resetParameters_(); 00300 00301 // Branch lengths: 00302 initBranchLengthsParameters(); 00303 addParameters_(brLenParameters_); 00304 00305 // Substitution model: 00306 addParameters_(modelSet_->getIndependentParameters()); 00307 00308 // Rate distribution: 00309 addParameters_(rateDistribution_->getIndependentParameters()); 00310 } 00311 00312 /******************************************************************************/ 00313 00314 void AbstractNonHomogeneousTreeLikelihood::applyParameters() throw (Exception) 00315 { 00316 if (!initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::applyParameters(). Object not initialized."); 00317 //Apply branch lengths: 00318 for (unsigned int i = 0; i < nbNodes_; i++) 00319 { 00320 int id = nodes_[i]->getId(); 00321 if (reparametrizeRoot_ && id == root1_) 00322 { 00323 const Parameter* rootBrLen = &getParameter("BrLenRoot"); 00324 const Parameter* rootPos = &getParameter("RootPosition"); 00325 nodes_[i]->setDistanceToFather(rootBrLen->getValue() * rootPos->getValue()); 00326 } 00327 else if (reparametrizeRoot_ && id == root2_) 00328 { 00329 const Parameter* rootBrLen = &getParameter("BrLenRoot"); 00330 const Parameter* rootPos = &getParameter("RootPosition"); 00331 nodes_[i]->setDistanceToFather(rootBrLen->getValue() * (1. - rootPos->getValue())); 00332 } 00333 else 00334 { 00335 const Parameter* brLen = &getParameter(string("BrLen") + TextTools::toString(i)); 00336 if (brLen) nodes_[i]->setDistanceToFather(brLen->getValue()); 00337 } 00338 } 00339 //Apply substitution model parameters: 00340 00341 modelSet_->matchParametersValues(getParameters()); 00342 //Apply rate distribution parameters: 00343 rateDistribution_->matchParametersValues(getParameters()); 00344 } 00345 00346 /******************************************************************************/ 00347 00348 void AbstractNonHomogeneousTreeLikelihood::initBranchLengthsParameters() 00349 { 00350 brLenParameters_.reset(); 00351 double l1 = 0, l2 = 0; 00352 for (unsigned int i = 0; i < nbNodes_; i++) 00353 { 00354 double d = minimumBrLen_; 00355 if (!nodes_[i]->hasDistanceToFather()) 00356 { 00357 ApplicationTools::displayWarning("Missing branch length " + TextTools::toString(i) + ". Value is set to " + TextTools::toString(minimumBrLen_)); 00358 nodes_[i]->setDistanceToFather(minimumBrLen_); 00359 } 00360 else 00361 { 00362 d = nodes_[i]->getDistanceToFather(); 00363 if (d < minimumBrLen_) 00364 { 00365 ApplicationTools::displayWarning("Branch length " + TextTools::toString(i) + " is too small: " + TextTools::toString(d) + ". Value is set to " + TextTools::toString(minimumBrLen_)); 00366 nodes_[i]->setDistanceToFather(minimumBrLen_); 00367 d = minimumBrLen_; 00368 } 00369 if (d > maximumBrLen_) 00370 { 00371 ApplicationTools::displayWarning("Branch length " + TextTools::toString(i) + " is too big: " + TextTools::toString(d) + ". Value is set to " + TextTools::toString(maximumBrLen_)); 00372 nodes_[i]->setDistanceToFather(maximumBrLen_); 00373 d = maximumBrLen_; 00374 } 00375 } 00376 if (reparametrizeRoot_ && nodes_[i]->getId() == root1_) 00377 l1 = d; 00378 else if (reparametrizeRoot_ && nodes_[i]->getId() == root2_) 00379 l2 = d; 00380 else 00381 { 00382 brLenParameters_.addParameter(Parameter("BrLen" + TextTools::toString(i), d, brLenConstraint_->clone(), true)); //Attach constraint to avoid clonage problems! 00383 } 00384 } 00385 if (reparametrizeRoot_) { 00386 brLenParameters_.addParameter(Parameter("BrLenRoot", l1 + l2, brLenConstraint_->clone(), true)); 00387 brLenParameters_.addParameter(Parameter("RootPosition", l1 / (l1 + l2), &Parameter::PROP_CONSTRAINT_EX)); 00388 } 00389 } 00390 00391 /*******************************************************************************/ 00392 00393 void AbstractNonHomogeneousTreeLikelihood::computeAllTransitionProbabilities() 00394 { 00395 for(unsigned int l = 0; l < nbNodes_; l++) 00396 { 00397 //For each node, 00398 Node * node = nodes_[l]; 00399 computeTransitionProbabilitiesForNode(node); 00400 } 00401 rootFreqs_ = modelSet_->getRootFrequencies(); 00402 } 00403 00404 /*******************************************************************************/ 00405 00406 void AbstractNonHomogeneousTreeLikelihood::computeTransitionProbabilitiesForNode(const Node* node) 00407 { 00408 const SubstitutionModel* model = modelSet_->getModelForNode(node->getId()); 00409 double l = node->getDistanceToFather(); 00410 00411 //Computes all pxy and pyx once for all: 00412 VVVdouble * pxy__node = & pxy_[node->getId()]; 00413 for(unsigned int c = 0; c < nbClasses_; c++) 00414 { 00415 VVdouble * pxy__node_c = & (* pxy__node)[c]; 00416 RowMatrix<double> Q = model->getPij_t(l * rateDistribution_->getCategory(c)); 00417 for(unsigned int x = 0; x < nbStates_; x++) 00418 { 00419 Vdouble * pxy__node_c_x = & (* pxy__node_c)[x]; 00420 for(unsigned int y = 0; y < nbStates_; y++) 00421 { 00422 (* pxy__node_c_x)[y] = Q(x, y); 00423 } 00424 } 00425 } 00426 00427 if(computeFirstOrderDerivatives_) 00428 { 00429 //Computes all dpxy/dt once for all: 00430 VVVdouble * dpxy__node = & dpxy_[node->getId()]; 00431 00432 for(unsigned int c = 0; c < nbClasses_; c++) 00433 { 00434 VVdouble * dpxy__node_c = & (* dpxy__node)[c]; 00435 double rc = rateDistribution_->getCategory(c); 00436 00437 RowMatrix<double> dQ = model->getdPij_dt(l * rc); 00438 00439 for(unsigned int x = 0; x < nbStates_; x++) 00440 { 00441 Vdouble * dpxy__node_c_x = & (* dpxy__node_c)[x]; 00442 for(unsigned int y = 0; y < nbStates_; y++) 00443 (* dpxy__node_c_x)[y] = rc * dQ(x, y); 00444 } 00445 } 00446 } 00447 00448 if(computeSecondOrderDerivatives_) 00449 { 00450 //Computes all d2pxy/dt2 once for all: 00451 VVVdouble * d2pxy__node = & d2pxy_[node->getId()]; 00452 for(unsigned int c = 0; c < nbClasses_; c++) 00453 { 00454 VVdouble * d2pxy__node_c = & (* d2pxy__node)[c]; 00455 double rc = rateDistribution_->getCategory(c); 00456 RowMatrix<double> d2Q = model->getd2Pij_dt2(l * rc); 00457 for(unsigned int x = 0; x < nbStates_; x++) 00458 { 00459 Vdouble * d2pxy__node_c_x = & (* d2pxy__node_c)[x]; 00460 for(unsigned int y = 0; y < nbStates_; y++) 00461 { 00462 (* d2pxy__node_c_x)[y] = rc * rc * d2Q(x, y); 00463 } 00464 } 00465 } 00466 } 00467 } 00468 00469 /*******************************************************************************/ 00470