bpp-phyl  2.1.0
Bpp/Phyl/Simulation/NonHomogeneousSequenceSimulator.cpp
Go to the documentation of this file.
00001 //
00002 // File: NonHomogeneousSequenceSimulator.cpp
00003 //       (previously SequenceSimulator.cpp, then HomogeneousSequenceSimulator.cpp)
00004 // Created by: Julien Dutheil
00005 //             Bastien Boussau
00006 // Created on: Wed Feb  4 16:30:51 2004
00007 //
00008 
00009 /*
00010    Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
00011 
00012    This software is a computer program whose purpose is to provide classes
00013    for phylogenetic data analysis.
00014 
00015    This software is governed by the CeCILL  license under French law and
00016    abiding by the rules of distribution of free software.  You can  use,
00017    modify and/ or redistribute the software under the terms of the CeCILL
00018    license as circulated by CEA, CNRS and INRIA at the following URL
00019    "http://www.cecill.info".
00020 
00021    As a counterpart to the access to the source code and  rights to copy,
00022    modify and redistribute granted by the license, users are provided only
00023    with a limited warranty  and the software's author,  the holder of the
00024    economic rights,  and the successive licensors  have only  limited
00025    liability.
00026 
00027    In this respect, the user's attention is drawn to the risks associated
00028    with loading,  using,  modifying and/or developing or reproducing the
00029    software by the user in light of its specific status of free software,
00030    that may mean  that it is complicated to manipulate,  and  that  also
00031    therefore means  that it is reserved for developers  and  experienced
00032    professionals having in-depth computer knowledge. Users are therefore
00033    encouraged to load and test the software's suitability as regards their
00034    requirements in conditions enabling the security of their systems and/or
00035    data to be ensured and,  more generally, to use and operate it in the
00036    same conditions as regards security.
00037 
00038    The fact that you are presently reading this means that you have had
00039    knowledge of the CeCILL license and that you accept its terms.
00040  */
00041 
00042 #include "NonHomogeneousSequenceSimulator.h"
00043 #include "../Model/SubstitutionModelSetTools.h"
00044 
00045 #include <Bpp/App/ApplicationTools.h>
00046 #include <Bpp/Numeric/VectorTools.h>
00047 #include <Bpp/Numeric/Matrix/MatrixTools.h>
00048 
00049 // From SeqLib:
00050 #include <Bpp/Seq/Container/VectorSiteContainer.h>
00051 
00052 using namespace bpp;
00053 using namespace std;
00054 
00055 /******************************************************************************/
00056 
00057 NonHomogeneousSequenceSimulator::NonHomogeneousSequenceSimulator(
00058   const SubstitutionModelSet* modelSet,
00059   const DiscreteDistribution* rate,
00060   const Tree* tree) throw (Exception) :
00061   modelSet_(modelSet),
00062   alphabet_(modelSet_->getAlphabet()),
00063   rate_(rate),
00064   templateTree_(tree),
00065   tree_(*tree),
00066   ownModelSet_(false),
00067   leaves_(tree_.getLeaves()),
00068   seqNames_(),
00069   nbNodes_(),
00070   nbClasses_(rate_->getNumberOfCategories()),
00071   nbStates_(modelSet_->getNumberOfStates()),
00072   continuousRates_(false)
00073 {
00074   if (!modelSet->isFullySetUpFor(*tree))
00075     throw Exception("NonHomogeneousSequenceSimulator(constructor). Model set is not fully specified.");
00076   init();
00077 }
00078 
00079 /******************************************************************************/
00080 
00081 NonHomogeneousSequenceSimulator::NonHomogeneousSequenceSimulator(
00082   const SubstitutionModel* model,
00083   const DiscreteDistribution* rate,
00084   const Tree* tree) :
00085   modelSet_(0),
00086   alphabet_(model->getAlphabet()),
00087   rate_(rate),
00088   templateTree_(tree),
00089   tree_(*tree),
00090   ownModelSet_(true),
00091   leaves_(tree_.getLeaves()),
00092   seqNames_(),
00093   nbNodes_(),
00094   nbClasses_(rate_->getNumberOfCategories()),
00095   nbStates_(model->getNumberOfStates()),
00096   continuousRates_(false)
00097 {
00098   FixedFrequenciesSet* fSet = new FixedFrequenciesSet(model->getAlphabet(), model->getFrequencies());
00099   fSet->setNamespace("anc.");
00100   modelSet_ = SubstitutionModelSetTools::createHomogeneousModelSet(dynamic_cast<SubstitutionModel*>(model->clone()), fSet, templateTree_);
00101   init();
00102 }
00103 
00104 /******************************************************************************/
00105 void NonHomogeneousSequenceSimulator::init()
00106 {
00107   seqNames_.resize(leaves_.size());
00108   for (size_t i = 0; i < seqNames_.size(); i++)
00109   {
00110     seqNames_[i] = leaves_[i]->getName();
00111   }
00112   // Initialize cumulative pxy:
00113   vector<SNode*> nodes = tree_.getNodes();
00114   nodes.pop_back(); // remove root
00115   nbNodes_ = nodes.size();
00116 
00117   for (size_t i = 0; i < nodes.size(); i++)
00118   {
00119     SNode* node = nodes[i];
00120     node->getInfos().model = modelSet_->getModelForNode(node->getId());
00121     double d = node->getDistanceToFather();
00122     VVVdouble* cumpxy_node_ = &node->getInfos().cumpxy;
00123     cumpxy_node_->resize(nbClasses_);
00124     for (size_t c = 0; c < nbClasses_; c++)
00125     {
00126       VVdouble* cumpxy_node_c_ = &(*cumpxy_node_)[c];
00127       cumpxy_node_c_->resize(nbStates_);
00128       RowMatrix<double> P = node->getInfos().model->getPij_t(d * rate_->getCategory(c));
00129       for (size_t x = 0; x < nbStates_; x++)
00130       {
00131         Vdouble* cumpxy_node_c_x_ = &(*cumpxy_node_c_)[x];
00132         cumpxy_node_c_x_->resize(nbStates_);
00133         (*cumpxy_node_c_x_)[0] = P(x, 0);
00134         for (size_t y = 1; y < nbStates_; y++)
00135         {
00136           (*cumpxy_node_c_x_)[y] = (*cumpxy_node_c_x_)[y - 1] + P(x, y);
00137         }
00138       }
00139     }
00140   }
00141 }
00142 
00143 /******************************************************************************/
00144 Site* NonHomogeneousSequenceSimulator::simulate() const
00145 {
00146   // Draw an initial state randomly according to equilibrum frequencies:
00147   int initialState = 0;
00148   double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
00149   double cumprob = 0;
00150   vector<double> freqs = modelSet_->getRootFrequencies();
00151   for (size_t i = 0; i < nbStates_; i++)
00152   {
00153     cumprob += freqs[i];
00154     if (r <= cumprob)
00155     {
00156       initialState = static_cast<int>(i);
00157       break;
00158     }
00159   }
00160   return simulate(initialState);
00161 }
00162 
00163 /******************************************************************************/
00164 Site* NonHomogeneousSequenceSimulator::simulate(int initialState) const
00165 {
00166   if (continuousRates_)
00167   {
00168     // Draw a random rate:
00169     double rate = rate_->randC();
00170     // Make this state evolve:
00171     return simulate(initialState, rate);
00172   }
00173   else
00174   {
00175     // Draw a random rate:
00176     size_t rateClass = static_cast<size_t>(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(rate_->getNumberOfCategories())));
00177     // Make this state evolve:
00178     return simulate(initialState, rateClass);
00179   }
00180 }
00181 
00182 /******************************************************************************/
00183 Site* NonHomogeneousSequenceSimulator::simulate(int initialState, size_t rateClass) const
00184 {
00185   // Launch recursion:
00186   SNode* root = tree_.getRootNode();
00187   root->getInfos().state = initialState;
00188   for (size_t i = 0; i < root->getNumberOfSons(); i++)
00189   {
00190     evolveInternal(root->getSon(i), rateClass);
00191   }
00192   // Now create a Site object:
00193   Vint site(leaves_.size());
00194   for (size_t i = 0; i < leaves_.size(); i++)
00195   {
00196     site[i] = leaves_[i]->getInfos().model->getAlphabetChar(leaves_[i]->getInfos().state);
00197   }
00198   return new Site(site, alphabet_);
00199 }
00200 
00201 /******************************************************************************/
00202 Site* NonHomogeneousSequenceSimulator::simulate(int initialState, double rate) const
00203 {
00204   // Launch recursion:
00205   SNode* root = tree_.getRootNode();
00206   root->getInfos().state = initialState;
00207   for (size_t i = 0; i < root->getNumberOfSons(); i++)
00208   {
00209     evolveInternal(root->getSon(i), rate);
00210   }
00211   // Now create a Site object:
00212   Vint site(leaves_.size());
00213   for (size_t i = 0; i < leaves_.size(); i++)
00214   {
00215     site[i] = leaves_[i]->getInfos().model->getAlphabetChar(leaves_[i]->getInfos().state);
00216   }
00217   return new Site(site, alphabet_);
00218 }
00219 
00220 /******************************************************************************/
00221 Site* NonHomogeneousSequenceSimulator::simulate(double rate) const
00222 {
00223   // Draw an initial state randomly according to equilibrum frequencies:
00224   int initialState = 0;
00225   double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
00226   double cumprob = 0;
00227   vector<double> freqs = modelSet_->getRootFrequencies();
00228   for (size_t i = 0; i < nbStates_; i++)
00229   {
00230     cumprob += freqs[i];
00231     if (r <= cumprob)
00232     {
00233       initialState = (int)i;
00234       break;
00235     }
00236   }
00237   // Make this state evolve:
00238   return simulate(initialState, rate);
00239 }
00240 
00241 /******************************************************************************/
00242 SiteContainer* NonHomogeneousSequenceSimulator::simulate(size_t numberOfSites) const
00243 {
00244   Vint initialStates(numberOfSites, 0);
00245   for (size_t j = 0; j < numberOfSites; j++)
00246   {
00247     double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
00248     double cumprob = 0;
00249     vector<double> freqs = modelSet_->getRootFrequencies();
00250     for (size_t i = 0; i < nbStates_; i++)
00251     {
00252       cumprob += freqs[i];
00253       if (r <= cumprob)
00254       {
00255         initialStates[j] = (int)i;
00256         break;
00257       }
00258     }
00259   }
00260   if (continuousRates_)
00261   {
00262     VectorSiteContainer* sites = new VectorSiteContainer(seqNames_.size(), alphabet_);
00263     sites->setSequencesNames(seqNames_);
00264     for (size_t j = 0; j < numberOfSites; j++)
00265     {
00266       Site* site = simulate();
00267       site->setPosition(static_cast<int>(j));
00268       sites->addSite(*site);
00269       delete site;
00270     }
00271     return sites;
00272   }
00273   else
00274   {
00275     // More efficient to do site this way:
00276     // Draw random rates:
00277     vector<size_t> rateClasses(numberOfSites);
00278     size_t nCat = rate_->getNumberOfCategories();
00279     for (size_t j = 0; j < numberOfSites; j++)
00280     {
00281       rateClasses[j] = static_cast<size_t>(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(nCat)));
00282     }
00283     // Make these states evolve:
00284     SiteContainer* sites = multipleEvolve(initialStates, rateClasses);
00285     return sites;
00286   }
00287 }
00288 
00289 /******************************************************************************/
00290 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulate() const
00291 {
00292   // Draw an initial state randomly according to equilibrum frequencies:
00293   int initialState = 0;
00294   double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
00295   double cumprob = 0;
00296   vector<double> freqs = modelSet_->getRootFrequencies();
00297   for (size_t i = 0; i < nbStates_; i++)
00298   {
00299     cumprob += freqs[i];
00300     if (r <= cumprob)
00301     {
00302       initialState = static_cast<int>(i);
00303       break;
00304     }
00305   }
00306 
00307   return dSimulate(initialState);
00308 }
00309 
00310 /******************************************************************************/
00311 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulate(int initialState) const
00312 {
00313   // Draw a random rate:
00314   if (continuousRates_)
00315   {
00316     double rate = rate_->randC();
00317     return dSimulate(initialState, rate);
00318   }
00319   else
00320   {
00321     size_t rateClass = static_cast<size_t>(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(rate_->getNumberOfCategories())));
00322     return dSimulate(initialState, rateClass);
00323     // NB: this is more efficient than dSimulate(initialState, rDist_->rand())
00324   }
00325 }
00326 
00327 /******************************************************************************/
00328 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulate(int initialState, double rate) const
00329 {
00330   // Make this state evolve:
00331   RASiteSimulationResult* hssr = new RASiteSimulationResult(templateTree_, modelSet_->getAlphabet(), initialState, rate);
00332   dEvolve(initialState, rate, *hssr);
00333   return hssr;
00334 }
00335 
00336 /******************************************************************************/
00337 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulate(int initialState, size_t rateClass) const
00338 {
00339   return dSimulate(initialState, rate_->getCategory(rateClass));
00340 }
00341 
00342 /******************************************************************************/
00343 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulate(double rate) const
00344 {
00345   // Draw an initial state randomly according to equilibrum frequencies:
00346   int initialState = 0;
00347   double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
00348   double cumprob = 0;
00349   vector<double> freqs = modelSet_->getRootFrequencies();
00350   for (size_t i = 0; i < nbStates_; i++)
00351   {
00352     cumprob += freqs[i];
00353     if (r <= cumprob)
00354     {
00355       initialState = static_cast<int>(i);
00356       break;
00357     }
00358   }
00359   return dSimulate(initialState, rate);
00360 }
00361 
00362 /******************************************************************************/
00363 int NonHomogeneousSequenceSimulator::evolve(const SNode* node, int initialState, size_t rateClass) const
00364 {
00365   const Vdouble* cumpxy_node_c_x_ = &node->getInfos().cumpxy[rateClass][initialState];
00366   double rand = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
00367   for (int y = 0; y < static_cast<int>(nbStates_); y++)
00368   {
00369     if (rand < (*cumpxy_node_c_x_)[y]) return y;
00370   }
00371   cerr << "DEBUG: This message should never happen! (HomogeneousSequenceSimulator::evolve)" << endl;
00372   cout << "   rand = " << rand << endl;
00373   return -1;
00374 }
00375 
00376 /******************************************************************************/
00377 int NonHomogeneousSequenceSimulator::evolve(const SNode* node, int initialState, double rate) const
00378 {
00379   double cumpxy = 0;
00380   double rand = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
00381   double l = rate * node->getDistanceToFather();
00382   const SubstitutionModel* model = node->getInfos().model;
00383   for (int y = 0; y < static_cast<int>(nbStates_); y++)
00384   {
00385     cumpxy += model->Pij_t(initialState, y, l);
00386     if (rand < cumpxy) return y;
00387   }
00388   cerr << "DEBUG: This message should never happen! (NonHomogeneousSequenceSimulator::evolve)" << endl;
00389   cout << "  rand = " << rand << endl;
00390   cout << "cumpxy = " << cumpxy << endl;
00391   MatrixTools::print(model->getPij_t(l));
00392   return -1;
00393 }
00394 
00395 /******************************************************************************/
00396 void NonHomogeneousSequenceSimulator::multipleEvolve(const SNode* node, const Vint& initialStates, const vector<size_t>& rateClasses, Vint& finalStates) const
00397 {
00398   const VVVdouble* cumpxy_node_ = &node->getInfos().cumpxy;
00399   for (size_t i = 0; i < initialStates.size(); i++)
00400   {
00401     const Vdouble* cumpxy_node_c_x_ = &(*cumpxy_node_)[rateClasses[i]][initialStates[i]];
00402     double rand = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
00403     for (size_t y = 0; y < nbStates_; y++)
00404     {
00405       if (rand < (*cumpxy_node_c_x_)[y])
00406       {
00407         finalStates[i] = (int)y;
00408         break;
00409       }
00410     }
00411   }
00412 }
00413 
00414 /******************************************************************************/
00415 void NonHomogeneousSequenceSimulator::evolveInternal(SNode* node, size_t rateClass) const
00416 {
00417   if (!node->hasFather())
00418   {
00419     cerr << "DEBUG: NonHomogeneousSequenceSimulator::evolveInternal. Forbidden call of method on root node." << endl;
00420     return;
00421   }
00422   node->getInfos().state = evolve(node, node->getFather()->getInfos().state, rateClass);
00423   for (size_t i = 0; i < node->getNumberOfSons(); i++)
00424   {
00425     evolveInternal(node->getSon(i), rateClass);
00426   }
00427 }
00428 
00429 /******************************************************************************/
00430 void NonHomogeneousSequenceSimulator::evolveInternal(SNode* node, double rate) const
00431 {
00432   if (!node->hasFather())
00433   {
00434     cerr << "DEBUG: NonHomogeneousSequenceSimulator::evolveInternal. Forbidden call of method on root node." << endl;
00435     return;
00436   }
00437   node->getInfos().state = evolve(node, node->getFather()->getInfos().state, rate);
00438   for (size_t i = 0; i < node->getNumberOfSons(); i++)
00439   {
00440     evolveInternal(node->getSon(i), rate);
00441   }
00442 }
00443 
00444 /******************************************************************************/
00445 void NonHomogeneousSequenceSimulator::multipleEvolveInternal(SNode* node, const vector<size_t>& rateClasses) const
00446 {
00447   if (!node->hasFather())
00448   {
00449     cerr << "DEBUG: NonHomogeneousSequenceSimulator::multipleEvolveInternal. Forbidden call of method on root node." << endl;
00450     return;
00451   }
00452   const vector<int>* initialStates = &node->getFather()->getInfos().states;
00453   size_t n = initialStates->size();
00454   node->getInfos().states.resize(n); // allocation.
00455   multipleEvolve(node, node->getFather()->getInfos().states, rateClasses, node->getInfos().states);
00456   for (size_t i = 0; i < node->getNumberOfSons(); i++)
00457   {
00458     multipleEvolveInternal(node->getSon(i), rateClasses);
00459   }
00460 }
00461 
00462 /******************************************************************************/
00463 SiteContainer* NonHomogeneousSequenceSimulator::multipleEvolve(const Vint& initialStates, const vector<size_t>& rateClasses) const
00464 {
00465   // Launch recursion:
00466   SNode* root = tree_.getRootNode();
00467   root->getInfos().states = initialStates;
00468   for (size_t i = 0; i < root->getNumberOfSons(); i++)
00469   {
00470     multipleEvolveInternal(root->getSon(i), rateClasses);
00471   }
00472   // Now create a SiteContainer object:
00473   AlignedSequenceContainer* sites = new AlignedSequenceContainer(alphabet_);
00474   size_t n = leaves_.size();
00475   size_t nbSites = initialStates.size();
00476   const SubstitutionModel* model = 0;
00477   for (size_t i = 0; i < n; i++)
00478   {
00479     vector<int> content(nbSites);
00480     vector<int>* states = &leaves_[i]->getInfos().states;
00481     model = leaves_[i]->getInfos().model;
00482     for (size_t j = 0; j < nbSites; j++)
00483     {
00484       content[j] = model->getAlphabetChar((*states)[j]);
00485     }
00486     sites->addSequence(BasicSequence(leaves_[i]->getName(), content, alphabet_), false);
00487   }
00488   return sites;
00489 }
00490 
00491 /******************************************************************************/
00492 void NonHomogeneousSequenceSimulator::dEvolve(int initialState, double rate, RASiteSimulationResult& rassr) const
00493 {
00494   // Launch recursion:
00495   SNode* root = tree_.getRootNode();
00496   root->getInfos().state = initialState;
00497   for (size_t i = 0; i < root->getNumberOfSons(); i++)
00498   {
00499     dEvolveInternal(root->getSon(i), rate, rassr);
00500   }
00501 }
00502 
00503 /******************************************************************************/
00504 void NonHomogeneousSequenceSimulator::dEvolveInternal(SNode* node, double rate, RASiteSimulationResult& rassr) const
00505 {
00506   if (!node->hasFather())
00507   {
00508     cerr << "DEBUG: NonHomogeneousSequenceSimulator::evolveInternal. Forbidden call of method on root node." << endl;
00509     return;
00510   }
00511   SimpleMutationProcess process(node->getInfos().model);
00512   MutationPath mp = process.detailedEvolve(node->getFather()->getInfos().state, node->getDistanceToFather() * rate);
00513   node->getInfos().state = mp.getFinalState();
00514 
00515   // Now append infos in rassr:
00516   rassr.addNode(node->getId(), mp);
00517 
00518   // Now jump to son nodes:
00519   for (size_t i = 0; i < node->getNumberOfSons(); i++)
00520   {
00521     dEvolveInternal(node->getSon(i), rate, rassr);
00522   }
00523 }
00524 
00525 /******************************************************************************/
00526 
 All Classes Namespaces Files Functions Variables Typedefs Friends