bpp-phyl  2.1.0
Bpp/Phyl/Distance/DistanceEstimation.h
Go to the documentation of this file.
00001 //
00002 // File: DistanceEstimation.h
00003 // Created by: Julien Dutheil
00004 //             Vincent Ranwez
00005 // Created on: Wed jun 08 10:39 2005
00006 //
00007 
00008 /*
00009 Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
00010 
00011 This software is a computer program whose purpose is to provide classes
00012 for phylogenetic data analysis.
00013 
00014 This software is governed by the CeCILL  license under French law and
00015 abiding by the rules of distribution of free software.  You can  use, 
00016 modify and/ or redistribute the software under the terms of the CeCILL
00017 license as circulated by CEA, CNRS and INRIA at the following URL
00018 "http://www.cecill.info". 
00019 
00020 As a counterpart to the access to the source code and  rights to copy,
00021 modify and redistribute granted by the license, users are provided only
00022 with a limited warranty  and the software's author,  the holder of the
00023 economic rights,  and the successive licensors  have only  limited
00024 liability. 
00025 
00026 In this respect, the user's attention is drawn to the risks associated
00027 with loading,  using,  modifying and/or developing or reproducing the
00028 software by the user in light of its specific status of free software,
00029 that may mean  that it is complicated to manipulate,  and  that  also
00030 therefore means  that it is reserved for developers  and  experienced
00031 professionals having in-depth computer knowledge. Users are therefore
00032 encouraged to load and test the software's suitability as regards their
00033 requirements in conditions enabling the security of their systems and/or 
00034 data to be ensured and,  more generally, to use and operate it in the 
00035 same conditions as regards security. 
00036 
00037 The fact that you are presently reading this means that you have had
00038 knowledge of the CeCILL license and that you accept its terms.
00039 */
00040 
00041 #ifndef _DISTANCEESTIMATION_H_
00042 #define _DISTANCEESTIMATION_H_
00043 
00044 #include "../Model/SubstitutionModel.h"
00045 #include "../Likelihood/AbstractTreeLikelihood.h"
00046 #include "../Likelihood/DRHomogeneousTreeLikelihood.h"
00047 #include "../Likelihood/PseudoNewtonOptimizer.h"
00048 
00049 #include <Bpp/Clonable.h>
00050 #include <Bpp/Numeric/ParameterList.h>
00051 #include <Bpp/Numeric/Prob/DiscreteDistribution.h>
00052 #include <Bpp/Numeric/Function/Optimizer.h>
00053 #include <Bpp/Numeric/Function/SimpleMultiDimensions.h>
00054 #include <Bpp/Numeric/Function/MetaOptimizer.h>
00055 
00056 // From SeqLib:
00057 #include <Bpp/Seq/Container/SiteContainer.h>
00058 
00059 namespace bpp
00060 {
00061 
00065 class TwoTreeLikelihood:
00066   public AbstractDiscreteRatesAcrossSitesTreeLikelihood  
00067 {
00068   private:
00069     SiteContainer* shrunkData_;
00070     std::vector<std::string> seqnames_;
00071     SubstitutionModel* model_;
00072     ParameterList brLenParameters_;
00073     
00074     mutable VVVdouble pxy_;
00075 
00076     mutable VVVdouble dpxy_;
00077 
00078     mutable VVVdouble d2pxy_;
00079 
00091     std::vector<size_t> rootPatternLinks_;
00092 
00096     std::vector<unsigned int> rootWeights_;
00097 
00098     //some values we'll need:
00099     size_t nbSites_,         //the number of sites in the container
00100            nbClasses_,       //the number of rate classes
00101            nbStates_,        //the number of states in the alphabet
00102            nbDistinctSites_; //the number of distinct sites in the container
00103 
00104     mutable VVVdouble rootLikelihoods_;
00105     mutable VVdouble rootLikelihoodsS_;
00106     mutable Vdouble rootLikelihoodsSR_;
00107     mutable Vdouble dLikelihoods_;
00108     mutable Vdouble d2Likelihoods_;
00109     mutable VVdouble leafLikelihoods1_, leafLikelihoods2_;
00110   
00111     double minimumBrLen_;
00112     Constraint* brLenConstraint_;
00113     double brLen_;
00114 
00115   public:
00116     TwoTreeLikelihood(
00117       const std::string& seq1, const std::string& seq2,  
00118       const SiteContainer& data,
00119       SubstitutionModel* model,
00120       DiscreteDistribution* rDist,
00121       bool verbose)  throw (Exception);
00122 
00123     TwoTreeLikelihood(const TwoTreeLikelihood& lik);
00124     
00125     TwoTreeLikelihood& operator=(const TwoTreeLikelihood& lik);
00126 
00127     TwoTreeLikelihood* clone() const { return new TwoTreeLikelihood(*this); } 
00128 
00129     virtual ~TwoTreeLikelihood();
00130 
00131   public:
00132 
00140     TreeLikelihoodData* getLikelihoodData() throw (NotImplementedException)
00141     {
00142       throw NotImplementedException("TwoTreeLikelihood::getLikelihoodData.");
00143     }
00144     const TreeLikelihoodData* getLikelihoodData() const throw (NotImplementedException)
00145     {
00146       throw NotImplementedException("TwoTreeLikelihood::getLikelihoodData.");
00147     }  
00148     double getLikelihood() const;
00149     double getLogLikelihood() const;
00150     double getLikelihoodForASite (size_t site) const;
00151     double getLogLikelihoodForASite(size_t site) const;
00152     ParameterList getBranchLengthsParameters() const;
00153     ParameterList getSubstitutionModelParameters() const;
00154     SubstitutionModel* getSubstitutionModel(int nodeId, size_t siteIndex) throw (NodeNotFoundException) { return model_; }
00155     const SubstitutionModel* getSubstitutionModel(int nodeId, size_t siteIndex) const throw (NodeNotFoundException) { return model_; }
00156     const std::vector<double>& getRootFrequencies(size_t siteIndex) const { return model_->getFrequencies(); }
00157     size_t getSiteIndex(size_t site) const throw (IndexOutOfBoundsException) { return rootPatternLinks_[site]; }
00161     VVVdouble getTransitionProbabilitiesPerRateClass(int nodeId, size_t siteIndex) const { return pxy_; }
00162     void setData(const SiteContainer& sites) throw (Exception) {}
00163     void initialize() throw(Exception);
00171     double getLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const;
00172     double getLogLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const;
00173     double getLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const;
00174     double getLogLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const;
00182     const SubstitutionModel* getSubstitutionModel() const { return model_; }
00183     
00189     SubstitutionModel* getSubstitutionModel() { return model_; }
00190 
00191     ConstBranchModelIterator* getNewBranchModelIterator(int nodeId) const throw (NotImplementedException)
00192     {
00193       throw NotImplementedException("TwoTreeLikelihood::getNewBranchSiteModelIterator. This class does not (yet) provide support for partition models.");
00194     }
00195 
00196     ConstSiteModelIterator* getNewSiteModelIterator(size_t siteIndex) const throw (NotImplementedException)
00197     {
00198       throw NotImplementedException("TwoTreeLikelihood::getNewSiteModelIterator. This class is for inner use only and does not provide site model iterators.");
00199     }
00200 
00201     
00202 
00216     void setParameters(const ParameterList& parameters) throw (ParameterNotFoundException, ConstraintException);
00217     double getValue() const throw(Exception);
00218     
00224     double getFirstOrderDerivative(const std::string& variable) const throw (Exception);
00232     double getSecondOrderDerivative(const std::string& variable) const throw (Exception);
00233     double getSecondOrderDerivative(const std::string& variable1, const std::string& variable2) const throw (Exception) { return 0; } // Not implemented for now.
00236     virtual void initBranchLengthsParameters();
00237 
00238     virtual void setMinimumBranchLength(double minimum)
00239     {
00240       minimumBrLen_ = minimum;
00241       if (brLenConstraint_) delete brLenConstraint_;
00242       brLenConstraint_ = new IntervalConstraint(1, minimumBrLen_, true);
00243       initBranchLengthsParameters();
00244     }
00245 
00246     virtual double getMinimumBranchLength() const { return minimumBrLen_; }
00247 
00248   protected:
00249     
00264     virtual void initTreeLikelihoods(const SequenceContainer & sequences) throw (Exception);
00265 
00266     void fireParameterChanged(const ParameterList & params);
00267     virtual void computeTreeLikelihood();
00268     virtual void computeTreeDLikelihood();
00269     virtual void computeTreeD2Likelihood();
00274     virtual void initParameters();
00275 
00282     virtual void applyParameters() throw (Exception);  
00283 
00284 };
00285 
00298 class DistanceEstimation:
00299   public virtual Clonable
00300 {
00301   private:
00302     auto_ptr<SubstitutionModel> model_;
00303     auto_ptr<DiscreteDistribution> rateDist_;
00304     const SiteContainer* sites_;
00305     DistanceMatrix* dist_;
00306     Optimizer* optimizer_;
00307     MetaOptimizer* defaultOptimizer_;
00308     size_t verbose_;
00309     ParameterList parameters_;
00310 
00311   public:
00312   
00327     DistanceEstimation(
00328         SubstitutionModel* model,
00329         DiscreteDistribution* rateDist,
00330         size_t verbose = 1) :
00331       model_(model),
00332       rateDist_(rateDist),
00333       sites_(0),
00334       dist_(0),
00335       optimizer_(0),
00336       defaultOptimizer_(0),
00337       verbose_(verbose),
00338       parameters_()
00339     {
00340       init_();
00341     }
00342   
00360     DistanceEstimation(
00361         SubstitutionModel* model,
00362         DiscreteDistribution* rateDist,
00363         const SiteContainer* sites,
00364         size_t verbose = 1,
00365         bool computeMat = true) :
00366       model_(model),
00367       rateDist_(rateDist),
00368       sites_(sites),
00369       dist_(0),
00370       optimizer_(0),
00371       defaultOptimizer_(0),
00372       verbose_(verbose),
00373       parameters_()
00374     {
00375       init_();
00376       if(computeMat) computeMatrix();
00377     }
00378     
00386     DistanceEstimation(const DistanceEstimation& distanceEstimation):
00387       model_(distanceEstimation.model_->clone()),
00388       rateDist_(distanceEstimation.rateDist_->clone()),
00389       sites_(distanceEstimation.sites_),
00390       dist_(0),
00391       optimizer_(dynamic_cast<Optimizer *>(distanceEstimation.optimizer_->clone())),
00392       defaultOptimizer_(dynamic_cast<MetaOptimizer *>(defaultOptimizer_->clone())),
00393       verbose_(distanceEstimation.verbose_),
00394       parameters_(distanceEstimation.parameters_)
00395     {
00396       if(distanceEstimation.dist_ != 0)
00397         dist_ = new DistanceMatrix(*distanceEstimation.dist_);
00398       else
00399         dist_ = 0;
00400     }
00401 
00410     DistanceEstimation& operator=(const DistanceEstimation& distanceEstimation)
00411     {
00412       model_.reset(distanceEstimation.model_->clone());
00413       rateDist_.reset(distanceEstimation.rateDist_->clone());
00414       sites_      = distanceEstimation.sites_;
00415       if (distanceEstimation.dist_ != 0)
00416         dist_     = new DistanceMatrix(*distanceEstimation.dist_);
00417       else
00418         dist_     = 0;
00419       optimizer_  = dynamic_cast<Optimizer *>(distanceEstimation.optimizer_->clone());
00420       // _defaultOptimizer has already been initialized since the default constructor has been called.
00421       verbose_    = distanceEstimation.verbose_;
00422       parameters_ = distanceEstimation.parameters_;
00423       return *this;
00424     }
00425 
00426     virtual ~DistanceEstimation()
00427     {
00428       if (dist_) delete dist_;
00429       delete defaultOptimizer_;
00430       delete optimizer_;
00431     }
00432 
00433 #ifndef NO_VIRTUAL_COV
00434     DistanceEstimation*
00435 #else
00436     Clonable*
00437 #endif
00438     clone() const { return new DistanceEstimation(*this); }
00439     
00440   private:
00441     void init_()
00442     {
00443       MetaOptimizerInfos* desc = new MetaOptimizerInfos();
00444       std::vector<std::string> name;
00445       name.push_back("BrLen");
00446       desc->addOptimizer("Branch length", new PseudoNewtonOptimizer(0), name, 2, MetaOptimizerInfos::IT_TYPE_FULL);
00447       ParameterList tmp = model_->getParameters();
00448       tmp.addParameters(rateDist_->getParameters());
00449       desc->addOptimizer("substitution model and rate distribution", new SimpleMultiDimensions(0), tmp.getParameterNames(), 0, MetaOptimizerInfos::IT_TYPE_STEP);
00450       defaultOptimizer_ = new MetaOptimizer(0, desc);
00451       defaultOptimizer_->setMessageHandler(0);
00452       defaultOptimizer_->setProfiler(0);
00453       defaultOptimizer_->getStopCondition()->setTolerance(0.0001);
00454       optimizer_ = dynamic_cast<Optimizer*>(defaultOptimizer_->clone());
00455     }
00456 
00457   public:
00458 
00467     void computeMatrix() throw (NullPointerException);
00468     
00474     DistanceMatrix* getMatrix() const { return dist_ == 0 ? 0 : new DistanceMatrix(*dist_); }
00475 
00476     bool hasSubstitutionModel() const { return model_.get(); }
00477 
00478     const SubstitutionModel& getSubstitutionModel() const throw (Exception) {
00479       if (hasSubstitutionModel())
00480         return *model_;
00481       else
00482         throw Exception("DistanceEstimation::getSubstitutionModel(). No model assciated to this instance.");
00483     }
00484 
00485     void resetSubstitutionModel(SubstitutionModel* model = 0) { model_.reset(model); }
00486 
00487     bool hasRateDistribution() const { return rateDist_.get(); }
00488 
00489     const DiscreteDistribution& getRateDistribution() const throw (Exception) {
00490       if (hasRateDistribution())
00491         return *rateDist_;
00492       else
00493         throw Exception("DistanceEstimation::getRateDistribution(). No rate distribution assciated to this instance.");
00494     }
00495 
00496     void resetRateDistribution(DiscreteDistribution* rateDist = 0) { rateDist_.reset(rateDist); }
00497 
00498     void setData(const SiteContainer* sites) { sites_ = sites; }
00499     const SiteContainer* getData() const { return sites_; }
00500     void resetData() { sites_ = 0; }
00501     
00502     void setOptimizer(const Optimizer * optimizer)
00503     { 
00504       if (optimizer_) delete optimizer_;
00505       optimizer_ = dynamic_cast<Optimizer *>(optimizer->clone());
00506     }
00507     const Optimizer* getOptimizer() const { return optimizer_; }
00508     Optimizer* getOptimizer() { return optimizer_; }
00509     void resetOptimizer() { optimizer_ = dynamic_cast<Optimizer*>(defaultOptimizer_->clone()); }
00510 
00518     void setAdditionalParameters(const ParameterList& parameters)
00519     {
00520       parameters_ = parameters;
00521     }
00522 
00526     void resetAdditionalParameters()
00527     {
00528       parameters_.reset();
00529     }
00530 
00534     void setVerbose(size_t verbose) { verbose_ = verbose; }
00538     size_t getVerbose() const { return verbose_; }
00539 };
00540 
00541 } //end of namespace bpp.
00542 
00543 #endif //_DISTANCEESTIMATION_H_
00544 
 All Classes Namespaces Files Functions Variables Typedefs Friends