|
bpp-phyl
2.1.0
|
00001 // 00002 // File: DistanceEstimation.h 00003 // Created by: Julien Dutheil 00004 // Vincent Ranwez 00005 // Created on: Wed jun 08 10:39 2005 00006 // 00007 00008 /* 00009 Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) 00010 00011 This software is a computer program whose purpose is to provide classes 00012 for phylogenetic data analysis. 00013 00014 This software is governed by the CeCILL license under French law and 00015 abiding by the rules of distribution of free software. You can use, 00016 modify and/ or redistribute the software under the terms of the CeCILL 00017 license as circulated by CEA, CNRS and INRIA at the following URL 00018 "http://www.cecill.info". 00019 00020 As a counterpart to the access to the source code and rights to copy, 00021 modify and redistribute granted by the license, users are provided only 00022 with a limited warranty and the software's author, the holder of the 00023 economic rights, and the successive licensors have only limited 00024 liability. 00025 00026 In this respect, the user's attention is drawn to the risks associated 00027 with loading, using, modifying and/or developing or reproducing the 00028 software by the user in light of its specific status of free software, 00029 that may mean that it is complicated to manipulate, and that also 00030 therefore means that it is reserved for developers and experienced 00031 professionals having in-depth computer knowledge. Users are therefore 00032 encouraged to load and test the software's suitability as regards their 00033 requirements in conditions enabling the security of their systems and/or 00034 data to be ensured and, more generally, to use and operate it in the 00035 same conditions as regards security. 00036 00037 The fact that you are presently reading this means that you have had 00038 knowledge of the CeCILL license and that you accept its terms. 00039 */ 00040 00041 #ifndef _DISTANCEESTIMATION_H_ 00042 #define _DISTANCEESTIMATION_H_ 00043 00044 #include "../Model/SubstitutionModel.h" 00045 #include "../Likelihood/AbstractTreeLikelihood.h" 00046 #include "../Likelihood/DRHomogeneousTreeLikelihood.h" 00047 #include "../Likelihood/PseudoNewtonOptimizer.h" 00048 00049 #include <Bpp/Clonable.h> 00050 #include <Bpp/Numeric/ParameterList.h> 00051 #include <Bpp/Numeric/Prob/DiscreteDistribution.h> 00052 #include <Bpp/Numeric/Function/Optimizer.h> 00053 #include <Bpp/Numeric/Function/SimpleMultiDimensions.h> 00054 #include <Bpp/Numeric/Function/MetaOptimizer.h> 00055 00056 // From SeqLib: 00057 #include <Bpp/Seq/Container/SiteContainer.h> 00058 00059 namespace bpp 00060 { 00061 00065 class TwoTreeLikelihood: 00066 public AbstractDiscreteRatesAcrossSitesTreeLikelihood 00067 { 00068 private: 00069 SiteContainer* shrunkData_; 00070 std::vector<std::string> seqnames_; 00071 SubstitutionModel* model_; 00072 ParameterList brLenParameters_; 00073 00074 mutable VVVdouble pxy_; 00075 00076 mutable VVVdouble dpxy_; 00077 00078 mutable VVVdouble d2pxy_; 00079 00091 std::vector<size_t> rootPatternLinks_; 00092 00096 std::vector<unsigned int> rootWeights_; 00097 00098 //some values we'll need: 00099 size_t nbSites_, //the number of sites in the container 00100 nbClasses_, //the number of rate classes 00101 nbStates_, //the number of states in the alphabet 00102 nbDistinctSites_; //the number of distinct sites in the container 00103 00104 mutable VVVdouble rootLikelihoods_; 00105 mutable VVdouble rootLikelihoodsS_; 00106 mutable Vdouble rootLikelihoodsSR_; 00107 mutable Vdouble dLikelihoods_; 00108 mutable Vdouble d2Likelihoods_; 00109 mutable VVdouble leafLikelihoods1_, leafLikelihoods2_; 00110 00111 double minimumBrLen_; 00112 Constraint* brLenConstraint_; 00113 double brLen_; 00114 00115 public: 00116 TwoTreeLikelihood( 00117 const std::string& seq1, const std::string& seq2, 00118 const SiteContainer& data, 00119 SubstitutionModel* model, 00120 DiscreteDistribution* rDist, 00121 bool verbose) throw (Exception); 00122 00123 TwoTreeLikelihood(const TwoTreeLikelihood& lik); 00124 00125 TwoTreeLikelihood& operator=(const TwoTreeLikelihood& lik); 00126 00127 TwoTreeLikelihood* clone() const { return new TwoTreeLikelihood(*this); } 00128 00129 virtual ~TwoTreeLikelihood(); 00130 00131 public: 00132 00140 TreeLikelihoodData* getLikelihoodData() throw (NotImplementedException) 00141 { 00142 throw NotImplementedException("TwoTreeLikelihood::getLikelihoodData."); 00143 } 00144 const TreeLikelihoodData* getLikelihoodData() const throw (NotImplementedException) 00145 { 00146 throw NotImplementedException("TwoTreeLikelihood::getLikelihoodData."); 00147 } 00148 double getLikelihood() const; 00149 double getLogLikelihood() const; 00150 double getLikelihoodForASite (size_t site) const; 00151 double getLogLikelihoodForASite(size_t site) const; 00152 ParameterList getBranchLengthsParameters() const; 00153 ParameterList getSubstitutionModelParameters() const; 00154 SubstitutionModel* getSubstitutionModel(int nodeId, size_t siteIndex) throw (NodeNotFoundException) { return model_; } 00155 const SubstitutionModel* getSubstitutionModel(int nodeId, size_t siteIndex) const throw (NodeNotFoundException) { return model_; } 00156 const std::vector<double>& getRootFrequencies(size_t siteIndex) const { return model_->getFrequencies(); } 00157 size_t getSiteIndex(size_t site) const throw (IndexOutOfBoundsException) { return rootPatternLinks_[site]; } 00161 VVVdouble getTransitionProbabilitiesPerRateClass(int nodeId, size_t siteIndex) const { return pxy_; } 00162 void setData(const SiteContainer& sites) throw (Exception) {} 00163 void initialize() throw(Exception); 00171 double getLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const; 00172 double getLogLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const; 00173 double getLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const; 00174 double getLogLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const; 00182 const SubstitutionModel* getSubstitutionModel() const { return model_; } 00183 00189 SubstitutionModel* getSubstitutionModel() { return model_; } 00190 00191 ConstBranchModelIterator* getNewBranchModelIterator(int nodeId) const throw (NotImplementedException) 00192 { 00193 throw NotImplementedException("TwoTreeLikelihood::getNewBranchSiteModelIterator. This class does not (yet) provide support for partition models."); 00194 } 00195 00196 ConstSiteModelIterator* getNewSiteModelIterator(size_t siteIndex) const throw (NotImplementedException) 00197 { 00198 throw NotImplementedException("TwoTreeLikelihood::getNewSiteModelIterator. This class is for inner use only and does not provide site model iterators."); 00199 } 00200 00201 00202 00216 void setParameters(const ParameterList& parameters) throw (ParameterNotFoundException, ConstraintException); 00217 double getValue() const throw(Exception); 00218 00224 double getFirstOrderDerivative(const std::string& variable) const throw (Exception); 00232 double getSecondOrderDerivative(const std::string& variable) const throw (Exception); 00233 double getSecondOrderDerivative(const std::string& variable1, const std::string& variable2) const throw (Exception) { return 0; } // Not implemented for now. 00236 virtual void initBranchLengthsParameters(); 00237 00238 virtual void setMinimumBranchLength(double minimum) 00239 { 00240 minimumBrLen_ = minimum; 00241 if (brLenConstraint_) delete brLenConstraint_; 00242 brLenConstraint_ = new IntervalConstraint(1, minimumBrLen_, true); 00243 initBranchLengthsParameters(); 00244 } 00245 00246 virtual double getMinimumBranchLength() const { return minimumBrLen_; } 00247 00248 protected: 00249 00264 virtual void initTreeLikelihoods(const SequenceContainer & sequences) throw (Exception); 00265 00266 void fireParameterChanged(const ParameterList & params); 00267 virtual void computeTreeLikelihood(); 00268 virtual void computeTreeDLikelihood(); 00269 virtual void computeTreeD2Likelihood(); 00274 virtual void initParameters(); 00275 00282 virtual void applyParameters() throw (Exception); 00283 00284 }; 00285 00298 class DistanceEstimation: 00299 public virtual Clonable 00300 { 00301 private: 00302 auto_ptr<SubstitutionModel> model_; 00303 auto_ptr<DiscreteDistribution> rateDist_; 00304 const SiteContainer* sites_; 00305 DistanceMatrix* dist_; 00306 Optimizer* optimizer_; 00307 MetaOptimizer* defaultOptimizer_; 00308 size_t verbose_; 00309 ParameterList parameters_; 00310 00311 public: 00312 00327 DistanceEstimation( 00328 SubstitutionModel* model, 00329 DiscreteDistribution* rateDist, 00330 size_t verbose = 1) : 00331 model_(model), 00332 rateDist_(rateDist), 00333 sites_(0), 00334 dist_(0), 00335 optimizer_(0), 00336 defaultOptimizer_(0), 00337 verbose_(verbose), 00338 parameters_() 00339 { 00340 init_(); 00341 } 00342 00360 DistanceEstimation( 00361 SubstitutionModel* model, 00362 DiscreteDistribution* rateDist, 00363 const SiteContainer* sites, 00364 size_t verbose = 1, 00365 bool computeMat = true) : 00366 model_(model), 00367 rateDist_(rateDist), 00368 sites_(sites), 00369 dist_(0), 00370 optimizer_(0), 00371 defaultOptimizer_(0), 00372 verbose_(verbose), 00373 parameters_() 00374 { 00375 init_(); 00376 if(computeMat) computeMatrix(); 00377 } 00378 00386 DistanceEstimation(const DistanceEstimation& distanceEstimation): 00387 model_(distanceEstimation.model_->clone()), 00388 rateDist_(distanceEstimation.rateDist_->clone()), 00389 sites_(distanceEstimation.sites_), 00390 dist_(0), 00391 optimizer_(dynamic_cast<Optimizer *>(distanceEstimation.optimizer_->clone())), 00392 defaultOptimizer_(dynamic_cast<MetaOptimizer *>(defaultOptimizer_->clone())), 00393 verbose_(distanceEstimation.verbose_), 00394 parameters_(distanceEstimation.parameters_) 00395 { 00396 if(distanceEstimation.dist_ != 0) 00397 dist_ = new DistanceMatrix(*distanceEstimation.dist_); 00398 else 00399 dist_ = 0; 00400 } 00401 00410 DistanceEstimation& operator=(const DistanceEstimation& distanceEstimation) 00411 { 00412 model_.reset(distanceEstimation.model_->clone()); 00413 rateDist_.reset(distanceEstimation.rateDist_->clone()); 00414 sites_ = distanceEstimation.sites_; 00415 if (distanceEstimation.dist_ != 0) 00416 dist_ = new DistanceMatrix(*distanceEstimation.dist_); 00417 else 00418 dist_ = 0; 00419 optimizer_ = dynamic_cast<Optimizer *>(distanceEstimation.optimizer_->clone()); 00420 // _defaultOptimizer has already been initialized since the default constructor has been called. 00421 verbose_ = distanceEstimation.verbose_; 00422 parameters_ = distanceEstimation.parameters_; 00423 return *this; 00424 } 00425 00426 virtual ~DistanceEstimation() 00427 { 00428 if (dist_) delete dist_; 00429 delete defaultOptimizer_; 00430 delete optimizer_; 00431 } 00432 00433 #ifndef NO_VIRTUAL_COV 00434 DistanceEstimation* 00435 #else 00436 Clonable* 00437 #endif 00438 clone() const { return new DistanceEstimation(*this); } 00439 00440 private: 00441 void init_() 00442 { 00443 MetaOptimizerInfos* desc = new MetaOptimizerInfos(); 00444 std::vector<std::string> name; 00445 name.push_back("BrLen"); 00446 desc->addOptimizer("Branch length", new PseudoNewtonOptimizer(0), name, 2, MetaOptimizerInfos::IT_TYPE_FULL); 00447 ParameterList tmp = model_->getParameters(); 00448 tmp.addParameters(rateDist_->getParameters()); 00449 desc->addOptimizer("substitution model and rate distribution", new SimpleMultiDimensions(0), tmp.getParameterNames(), 0, MetaOptimizerInfos::IT_TYPE_STEP); 00450 defaultOptimizer_ = new MetaOptimizer(0, desc); 00451 defaultOptimizer_->setMessageHandler(0); 00452 defaultOptimizer_->setProfiler(0); 00453 defaultOptimizer_->getStopCondition()->setTolerance(0.0001); 00454 optimizer_ = dynamic_cast<Optimizer*>(defaultOptimizer_->clone()); 00455 } 00456 00457 public: 00458 00467 void computeMatrix() throw (NullPointerException); 00468 00474 DistanceMatrix* getMatrix() const { return dist_ == 0 ? 0 : new DistanceMatrix(*dist_); } 00475 00476 bool hasSubstitutionModel() const { return model_.get(); } 00477 00478 const SubstitutionModel& getSubstitutionModel() const throw (Exception) { 00479 if (hasSubstitutionModel()) 00480 return *model_; 00481 else 00482 throw Exception("DistanceEstimation::getSubstitutionModel(). No model assciated to this instance."); 00483 } 00484 00485 void resetSubstitutionModel(SubstitutionModel* model = 0) { model_.reset(model); } 00486 00487 bool hasRateDistribution() const { return rateDist_.get(); } 00488 00489 const DiscreteDistribution& getRateDistribution() const throw (Exception) { 00490 if (hasRateDistribution()) 00491 return *rateDist_; 00492 else 00493 throw Exception("DistanceEstimation::getRateDistribution(). No rate distribution assciated to this instance."); 00494 } 00495 00496 void resetRateDistribution(DiscreteDistribution* rateDist = 0) { rateDist_.reset(rateDist); } 00497 00498 void setData(const SiteContainer* sites) { sites_ = sites; } 00499 const SiteContainer* getData() const { return sites_; } 00500 void resetData() { sites_ = 0; } 00501 00502 void setOptimizer(const Optimizer * optimizer) 00503 { 00504 if (optimizer_) delete optimizer_; 00505 optimizer_ = dynamic_cast<Optimizer *>(optimizer->clone()); 00506 } 00507 const Optimizer* getOptimizer() const { return optimizer_; } 00508 Optimizer* getOptimizer() { return optimizer_; } 00509 void resetOptimizer() { optimizer_ = dynamic_cast<Optimizer*>(defaultOptimizer_->clone()); } 00510 00518 void setAdditionalParameters(const ParameterList& parameters) 00519 { 00520 parameters_ = parameters; 00521 } 00522 00526 void resetAdditionalParameters() 00527 { 00528 parameters_.reset(); 00529 } 00530 00534 void setVerbose(size_t verbose) { verbose_ = verbose; } 00538 size_t getVerbose() const { return verbose_; } 00539 }; 00540 00541 } //end of namespace bpp. 00542 00543 #endif //_DISTANCEESTIMATION_H_ 00544