|
bpp-phyl
2.1.0
|
00001 // 00002 // File: NonHomogeneousSequenceSimulator.cpp 00003 // (previously SequenceSimulator.cpp, then HomogeneousSequenceSimulator.cpp) 00004 // Created by: Julien Dutheil 00005 // Bastien Boussau 00006 // Created on: Wed Feb 4 16:30:51 2004 00007 // 00008 00009 /* 00010 Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) 00011 00012 This software is a computer program whose purpose is to provide classes 00013 for phylogenetic data analysis. 00014 00015 This software is governed by the CeCILL license under French law and 00016 abiding by the rules of distribution of free software. You can use, 00017 modify and/ or redistribute the software under the terms of the CeCILL 00018 license as circulated by CEA, CNRS and INRIA at the following URL 00019 "http://www.cecill.info". 00020 00021 As a counterpart to the access to the source code and rights to copy, 00022 modify and redistribute granted by the license, users are provided only 00023 with a limited warranty and the software's author, the holder of the 00024 economic rights, and the successive licensors have only limited 00025 liability. 00026 00027 In this respect, the user's attention is drawn to the risks associated 00028 with loading, using, modifying and/or developing or reproducing the 00029 software by the user in light of its specific status of free software, 00030 that may mean that it is complicated to manipulate, and that also 00031 therefore means that it is reserved for developers and experienced 00032 professionals having in-depth computer knowledge. Users are therefore 00033 encouraged to load and test the software's suitability as regards their 00034 requirements in conditions enabling the security of their systems and/or 00035 data to be ensured and, more generally, to use and operate it in the 00036 same conditions as regards security. 00037 00038 The fact that you are presently reading this means that you have had 00039 knowledge of the CeCILL license and that you accept its terms. 00040 */ 00041 00042 #include "NonHomogeneousSequenceSimulator.h" 00043 #include "../Model/SubstitutionModelSetTools.h" 00044 00045 #include <Bpp/App/ApplicationTools.h> 00046 #include <Bpp/Numeric/VectorTools.h> 00047 #include <Bpp/Numeric/Matrix/MatrixTools.h> 00048 00049 // From SeqLib: 00050 #include <Bpp/Seq/Container/VectorSiteContainer.h> 00051 00052 using namespace bpp; 00053 using namespace std; 00054 00055 /******************************************************************************/ 00056 00057 NonHomogeneousSequenceSimulator::NonHomogeneousSequenceSimulator( 00058 const SubstitutionModelSet* modelSet, 00059 const DiscreteDistribution* rate, 00060 const Tree* tree) throw (Exception) : 00061 modelSet_(modelSet), 00062 alphabet_(modelSet_->getAlphabet()), 00063 rate_(rate), 00064 templateTree_(tree), 00065 tree_(*tree), 00066 ownModelSet_(false), 00067 leaves_(tree_.getLeaves()), 00068 seqNames_(), 00069 nbNodes_(), 00070 nbClasses_(rate_->getNumberOfCategories()), 00071 nbStates_(modelSet_->getNumberOfStates()), 00072 continuousRates_(false) 00073 { 00074 if (!modelSet->isFullySetUpFor(*tree)) 00075 throw Exception("NonHomogeneousSequenceSimulator(constructor). Model set is not fully specified."); 00076 init(); 00077 } 00078 00079 /******************************************************************************/ 00080 00081 NonHomogeneousSequenceSimulator::NonHomogeneousSequenceSimulator( 00082 const SubstitutionModel* model, 00083 const DiscreteDistribution* rate, 00084 const Tree* tree) : 00085 modelSet_(0), 00086 alphabet_(model->getAlphabet()), 00087 rate_(rate), 00088 templateTree_(tree), 00089 tree_(*tree), 00090 ownModelSet_(true), 00091 leaves_(tree_.getLeaves()), 00092 seqNames_(), 00093 nbNodes_(), 00094 nbClasses_(rate_->getNumberOfCategories()), 00095 nbStates_(model->getNumberOfStates()), 00096 continuousRates_(false) 00097 { 00098 FixedFrequenciesSet* fSet = new FixedFrequenciesSet(model->getAlphabet(), model->getFrequencies()); 00099 fSet->setNamespace("anc."); 00100 modelSet_ = SubstitutionModelSetTools::createHomogeneousModelSet(dynamic_cast<SubstitutionModel*>(model->clone()), fSet, templateTree_); 00101 init(); 00102 } 00103 00104 /******************************************************************************/ 00105 void NonHomogeneousSequenceSimulator::init() 00106 { 00107 seqNames_.resize(leaves_.size()); 00108 for (size_t i = 0; i < seqNames_.size(); i++) 00109 { 00110 seqNames_[i] = leaves_[i]->getName(); 00111 } 00112 // Initialize cumulative pxy: 00113 vector<SNode*> nodes = tree_.getNodes(); 00114 nodes.pop_back(); // remove root 00115 nbNodes_ = nodes.size(); 00116 00117 for (size_t i = 0; i < nodes.size(); i++) 00118 { 00119 SNode* node = nodes[i]; 00120 node->getInfos().model = modelSet_->getModelForNode(node->getId()); 00121 double d = node->getDistanceToFather(); 00122 VVVdouble* cumpxy_node_ = &node->getInfos().cumpxy; 00123 cumpxy_node_->resize(nbClasses_); 00124 for (size_t c = 0; c < nbClasses_; c++) 00125 { 00126 VVdouble* cumpxy_node_c_ = &(*cumpxy_node_)[c]; 00127 cumpxy_node_c_->resize(nbStates_); 00128 RowMatrix<double> P = node->getInfos().model->getPij_t(d * rate_->getCategory(c)); 00129 for (size_t x = 0; x < nbStates_; x++) 00130 { 00131 Vdouble* cumpxy_node_c_x_ = &(*cumpxy_node_c_)[x]; 00132 cumpxy_node_c_x_->resize(nbStates_); 00133 (*cumpxy_node_c_x_)[0] = P(x, 0); 00134 for (size_t y = 1; y < nbStates_; y++) 00135 { 00136 (*cumpxy_node_c_x_)[y] = (*cumpxy_node_c_x_)[y - 1] + P(x, y); 00137 } 00138 } 00139 } 00140 } 00141 } 00142 00143 /******************************************************************************/ 00144 Site* NonHomogeneousSequenceSimulator::simulate() const 00145 { 00146 // Draw an initial state randomly according to equilibrum frequencies: 00147 int initialState = 0; 00148 double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.); 00149 double cumprob = 0; 00150 vector<double> freqs = modelSet_->getRootFrequencies(); 00151 for (size_t i = 0; i < nbStates_; i++) 00152 { 00153 cumprob += freqs[i]; 00154 if (r <= cumprob) 00155 { 00156 initialState = static_cast<int>(i); 00157 break; 00158 } 00159 } 00160 return simulate(initialState); 00161 } 00162 00163 /******************************************************************************/ 00164 Site* NonHomogeneousSequenceSimulator::simulate(int initialState) const 00165 { 00166 if (continuousRates_) 00167 { 00168 // Draw a random rate: 00169 double rate = rate_->randC(); 00170 // Make this state evolve: 00171 return simulate(initialState, rate); 00172 } 00173 else 00174 { 00175 // Draw a random rate: 00176 size_t rateClass = static_cast<size_t>(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(rate_->getNumberOfCategories()))); 00177 // Make this state evolve: 00178 return simulate(initialState, rateClass); 00179 } 00180 } 00181 00182 /******************************************************************************/ 00183 Site* NonHomogeneousSequenceSimulator::simulate(int initialState, size_t rateClass) const 00184 { 00185 // Launch recursion: 00186 SNode* root = tree_.getRootNode(); 00187 root->getInfos().state = initialState; 00188 for (size_t i = 0; i < root->getNumberOfSons(); i++) 00189 { 00190 evolveInternal(root->getSon(i), rateClass); 00191 } 00192 // Now create a Site object: 00193 Vint site(leaves_.size()); 00194 for (size_t i = 0; i < leaves_.size(); i++) 00195 { 00196 site[i] = leaves_[i]->getInfos().model->getAlphabetChar(leaves_[i]->getInfos().state); 00197 } 00198 return new Site(site, alphabet_); 00199 } 00200 00201 /******************************************************************************/ 00202 Site* NonHomogeneousSequenceSimulator::simulate(int initialState, double rate) const 00203 { 00204 // Launch recursion: 00205 SNode* root = tree_.getRootNode(); 00206 root->getInfos().state = initialState; 00207 for (size_t i = 0; i < root->getNumberOfSons(); i++) 00208 { 00209 evolveInternal(root->getSon(i), rate); 00210 } 00211 // Now create a Site object: 00212 Vint site(leaves_.size()); 00213 for (size_t i = 0; i < leaves_.size(); i++) 00214 { 00215 site[i] = leaves_[i]->getInfos().model->getAlphabetChar(leaves_[i]->getInfos().state); 00216 } 00217 return new Site(site, alphabet_); 00218 } 00219 00220 /******************************************************************************/ 00221 Site* NonHomogeneousSequenceSimulator::simulate(double rate) const 00222 { 00223 // Draw an initial state randomly according to equilibrum frequencies: 00224 int initialState = 0; 00225 double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.); 00226 double cumprob = 0; 00227 vector<double> freqs = modelSet_->getRootFrequencies(); 00228 for (size_t i = 0; i < nbStates_; i++) 00229 { 00230 cumprob += freqs[i]; 00231 if (r <= cumprob) 00232 { 00233 initialState = (int)i; 00234 break; 00235 } 00236 } 00237 // Make this state evolve: 00238 return simulate(initialState, rate); 00239 } 00240 00241 /******************************************************************************/ 00242 SiteContainer* NonHomogeneousSequenceSimulator::simulate(size_t numberOfSites) const 00243 { 00244 Vint initialStates(numberOfSites, 0); 00245 for (size_t j = 0; j < numberOfSites; j++) 00246 { 00247 double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.); 00248 double cumprob = 0; 00249 vector<double> freqs = modelSet_->getRootFrequencies(); 00250 for (size_t i = 0; i < nbStates_; i++) 00251 { 00252 cumprob += freqs[i]; 00253 if (r <= cumprob) 00254 { 00255 initialStates[j] = (int)i; 00256 break; 00257 } 00258 } 00259 } 00260 if (continuousRates_) 00261 { 00262 VectorSiteContainer* sites = new VectorSiteContainer(seqNames_.size(), alphabet_); 00263 sites->setSequencesNames(seqNames_); 00264 for (size_t j = 0; j < numberOfSites; j++) 00265 { 00266 Site* site = simulate(); 00267 site->setPosition(static_cast<int>(j)); 00268 sites->addSite(*site); 00269 delete site; 00270 } 00271 return sites; 00272 } 00273 else 00274 { 00275 // More efficient to do site this way: 00276 // Draw random rates: 00277 vector<size_t> rateClasses(numberOfSites); 00278 size_t nCat = rate_->getNumberOfCategories(); 00279 for (size_t j = 0; j < numberOfSites; j++) 00280 { 00281 rateClasses[j] = static_cast<size_t>(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(nCat))); 00282 } 00283 // Make these states evolve: 00284 SiteContainer* sites = multipleEvolve(initialStates, rateClasses); 00285 return sites; 00286 } 00287 } 00288 00289 /******************************************************************************/ 00290 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulate() const 00291 { 00292 // Draw an initial state randomly according to equilibrum frequencies: 00293 int initialState = 0; 00294 double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.); 00295 double cumprob = 0; 00296 vector<double> freqs = modelSet_->getRootFrequencies(); 00297 for (size_t i = 0; i < nbStates_; i++) 00298 { 00299 cumprob += freqs[i]; 00300 if (r <= cumprob) 00301 { 00302 initialState = static_cast<int>(i); 00303 break; 00304 } 00305 } 00306 00307 return dSimulate(initialState); 00308 } 00309 00310 /******************************************************************************/ 00311 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulate(int initialState) const 00312 { 00313 // Draw a random rate: 00314 if (continuousRates_) 00315 { 00316 double rate = rate_->randC(); 00317 return dSimulate(initialState, rate); 00318 } 00319 else 00320 { 00321 size_t rateClass = static_cast<size_t>(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(rate_->getNumberOfCategories()))); 00322 return dSimulate(initialState, rateClass); 00323 // NB: this is more efficient than dSimulate(initialState, rDist_->rand()) 00324 } 00325 } 00326 00327 /******************************************************************************/ 00328 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulate(int initialState, double rate) const 00329 { 00330 // Make this state evolve: 00331 RASiteSimulationResult* hssr = new RASiteSimulationResult(templateTree_, modelSet_->getAlphabet(), initialState, rate); 00332 dEvolve(initialState, rate, *hssr); 00333 return hssr; 00334 } 00335 00336 /******************************************************************************/ 00337 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulate(int initialState, size_t rateClass) const 00338 { 00339 return dSimulate(initialState, rate_->getCategory(rateClass)); 00340 } 00341 00342 /******************************************************************************/ 00343 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulate(double rate) const 00344 { 00345 // Draw an initial state randomly according to equilibrum frequencies: 00346 int initialState = 0; 00347 double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.); 00348 double cumprob = 0; 00349 vector<double> freqs = modelSet_->getRootFrequencies(); 00350 for (size_t i = 0; i < nbStates_; i++) 00351 { 00352 cumprob += freqs[i]; 00353 if (r <= cumprob) 00354 { 00355 initialState = static_cast<int>(i); 00356 break; 00357 } 00358 } 00359 return dSimulate(initialState, rate); 00360 } 00361 00362 /******************************************************************************/ 00363 int NonHomogeneousSequenceSimulator::evolve(const SNode* node, int initialState, size_t rateClass) const 00364 { 00365 const Vdouble* cumpxy_node_c_x_ = &node->getInfos().cumpxy[rateClass][initialState]; 00366 double rand = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.); 00367 for (int y = 0; y < static_cast<int>(nbStates_); y++) 00368 { 00369 if (rand < (*cumpxy_node_c_x_)[y]) return y; 00370 } 00371 cerr << "DEBUG: This message should never happen! (HomogeneousSequenceSimulator::evolve)" << endl; 00372 cout << " rand = " << rand << endl; 00373 return -1; 00374 } 00375 00376 /******************************************************************************/ 00377 int NonHomogeneousSequenceSimulator::evolve(const SNode* node, int initialState, double rate) const 00378 { 00379 double cumpxy = 0; 00380 double rand = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.); 00381 double l = rate * node->getDistanceToFather(); 00382 const SubstitutionModel* model = node->getInfos().model; 00383 for (int y = 0; y < static_cast<int>(nbStates_); y++) 00384 { 00385 cumpxy += model->Pij_t(initialState, y, l); 00386 if (rand < cumpxy) return y; 00387 } 00388 cerr << "DEBUG: This message should never happen! (NonHomogeneousSequenceSimulator::evolve)" << endl; 00389 cout << " rand = " << rand << endl; 00390 cout << "cumpxy = " << cumpxy << endl; 00391 MatrixTools::print(model->getPij_t(l)); 00392 return -1; 00393 } 00394 00395 /******************************************************************************/ 00396 void NonHomogeneousSequenceSimulator::multipleEvolve(const SNode* node, const Vint& initialStates, const vector<size_t>& rateClasses, Vint& finalStates) const 00397 { 00398 const VVVdouble* cumpxy_node_ = &node->getInfos().cumpxy; 00399 for (size_t i = 0; i < initialStates.size(); i++) 00400 { 00401 const Vdouble* cumpxy_node_c_x_ = &(*cumpxy_node_)[rateClasses[i]][initialStates[i]]; 00402 double rand = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.); 00403 for (size_t y = 0; y < nbStates_; y++) 00404 { 00405 if (rand < (*cumpxy_node_c_x_)[y]) 00406 { 00407 finalStates[i] = (int)y; 00408 break; 00409 } 00410 } 00411 } 00412 } 00413 00414 /******************************************************************************/ 00415 void NonHomogeneousSequenceSimulator::evolveInternal(SNode* node, size_t rateClass) const 00416 { 00417 if (!node->hasFather()) 00418 { 00419 cerr << "DEBUG: NonHomogeneousSequenceSimulator::evolveInternal. Forbidden call of method on root node." << endl; 00420 return; 00421 } 00422 node->getInfos().state = evolve(node, node->getFather()->getInfos().state, rateClass); 00423 for (size_t i = 0; i < node->getNumberOfSons(); i++) 00424 { 00425 evolveInternal(node->getSon(i), rateClass); 00426 } 00427 } 00428 00429 /******************************************************************************/ 00430 void NonHomogeneousSequenceSimulator::evolveInternal(SNode* node, double rate) const 00431 { 00432 if (!node->hasFather()) 00433 { 00434 cerr << "DEBUG: NonHomogeneousSequenceSimulator::evolveInternal. Forbidden call of method on root node." << endl; 00435 return; 00436 } 00437 node->getInfos().state = evolve(node, node->getFather()->getInfos().state, rate); 00438 for (size_t i = 0; i < node->getNumberOfSons(); i++) 00439 { 00440 evolveInternal(node->getSon(i), rate); 00441 } 00442 } 00443 00444 /******************************************************************************/ 00445 void NonHomogeneousSequenceSimulator::multipleEvolveInternal(SNode* node, const vector<size_t>& rateClasses) const 00446 { 00447 if (!node->hasFather()) 00448 { 00449 cerr << "DEBUG: NonHomogeneousSequenceSimulator::multipleEvolveInternal. Forbidden call of method on root node." << endl; 00450 return; 00451 } 00452 const vector<int>* initialStates = &node->getFather()->getInfos().states; 00453 size_t n = initialStates->size(); 00454 node->getInfos().states.resize(n); // allocation. 00455 multipleEvolve(node, node->getFather()->getInfos().states, rateClasses, node->getInfos().states); 00456 for (size_t i = 0; i < node->getNumberOfSons(); i++) 00457 { 00458 multipleEvolveInternal(node->getSon(i), rateClasses); 00459 } 00460 } 00461 00462 /******************************************************************************/ 00463 SiteContainer* NonHomogeneousSequenceSimulator::multipleEvolve(const Vint& initialStates, const vector<size_t>& rateClasses) const 00464 { 00465 // Launch recursion: 00466 SNode* root = tree_.getRootNode(); 00467 root->getInfos().states = initialStates; 00468 for (size_t i = 0; i < root->getNumberOfSons(); i++) 00469 { 00470 multipleEvolveInternal(root->getSon(i), rateClasses); 00471 } 00472 // Now create a SiteContainer object: 00473 AlignedSequenceContainer* sites = new AlignedSequenceContainer(alphabet_); 00474 size_t n = leaves_.size(); 00475 size_t nbSites = initialStates.size(); 00476 const SubstitutionModel* model = 0; 00477 for (size_t i = 0; i < n; i++) 00478 { 00479 vector<int> content(nbSites); 00480 vector<int>* states = &leaves_[i]->getInfos().states; 00481 model = leaves_[i]->getInfos().model; 00482 for (size_t j = 0; j < nbSites; j++) 00483 { 00484 content[j] = model->getAlphabetChar((*states)[j]); 00485 } 00486 sites->addSequence(BasicSequence(leaves_[i]->getName(), content, alphabet_), false); 00487 } 00488 return sites; 00489 } 00490 00491 /******************************************************************************/ 00492 void NonHomogeneousSequenceSimulator::dEvolve(int initialState, double rate, RASiteSimulationResult& rassr) const 00493 { 00494 // Launch recursion: 00495 SNode* root = tree_.getRootNode(); 00496 root->getInfos().state = initialState; 00497 for (size_t i = 0; i < root->getNumberOfSons(); i++) 00498 { 00499 dEvolveInternal(root->getSon(i), rate, rassr); 00500 } 00501 } 00502 00503 /******************************************************************************/ 00504 void NonHomogeneousSequenceSimulator::dEvolveInternal(SNode* node, double rate, RASiteSimulationResult& rassr) const 00505 { 00506 if (!node->hasFather()) 00507 { 00508 cerr << "DEBUG: NonHomogeneousSequenceSimulator::evolveInternal. Forbidden call of method on root node." << endl; 00509 return; 00510 } 00511 SimpleMutationProcess process(node->getInfos().model); 00512 MutationPath mp = process.detailedEvolve(node->getFather()->getInfos().state, node->getDistanceToFather() * rate); 00513 node->getInfos().state = mp.getFinalState(); 00514 00515 // Now append infos in rassr: 00516 rassr.addNode(node->getId(), mp); 00517 00518 // Now jump to son nodes: 00519 for (size_t i = 0; i < node->getNumberOfSons(); i++) 00520 { 00521 dEvolveInternal(node->getSon(i), rate, rassr); 00522 } 00523 } 00524 00525 /******************************************************************************/ 00526