|
bpp-phyl
2.1.0
|
00001 // 00002 // File: MarginalAncestralStateReconstruction.cpp 00003 // Created by: Julien Dutheil 00004 // Created on: Fri Jul 08 13:32 2005 00005 // 00006 00007 /* 00008 Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) 00009 00010 This software is a computer program whose purpose is to provide classes 00011 for phylogenetic data analysis. 00012 00013 This software is governed by the CeCILL license under French law and 00014 abiding by the rules of distribution of free software. You can use, 00015 modify and/ or redistribute the software under the terms of the CeCILL 00016 license as circulated by CEA, CNRS and INRIA at the following URL 00017 "http://www.cecill.info". 00018 00019 As a counterpart to the access to the source code and rights to copy, 00020 modify and redistribute granted by the license, users are provided only 00021 with a limited warranty and the software's author, the holder of the 00022 economic rights, and the successive licensors have only limited 00023 liability. 00024 00025 In this respect, the user's attention is drawn to the risks associated 00026 with loading, using, modifying and/or developing or reproducing the 00027 software by the user in light of its specific status of free software, 00028 that may mean that it is complicated to manipulate, and that also 00029 therefore means that it is reserved for developers and experienced 00030 professionals having in-depth computer knowledge. Users are therefore 00031 encouraged to load and test the software's suitability as regards their 00032 requirements in conditions enabling the security of their systems and/or 00033 data to be ensured and, more generally, to use and operate it in the 00034 same conditions as regards security. 00035 00036 The fact that you are presently reading this means that you have had 00037 knowledge of the CeCILL license and that you accept its terms. 00038 */ 00039 00040 #include "MarginalAncestralStateReconstruction.h" 00041 #include <Bpp/Numeric/VectorTools.h> 00042 #include <Bpp/Numeric/Random/RandomTools.h> 00043 00044 using namespace bpp; 00045 using namespace std; 00046 00047 vector<size_t> MarginalAncestralStateReconstruction::getAncestralStatesForNode(int nodeId, VVdouble& probs, bool sample) const 00048 { 00049 vector<size_t> ancestors(nbDistinctSites_); 00050 probs.resize(nbDistinctSites_); 00051 double cumProb = 0; 00052 double r; 00053 if (likelihood_->getTree().isLeaf(nodeId)) 00054 { 00055 VVdouble larray = likelihood_->getLikelihoodData()->getLeafLikelihoods(nodeId); 00056 for (size_t i = 0; i < nbDistinctSites_; i++) 00057 { 00058 Vdouble* probs_i = &probs[i]; 00059 probs_i->resize(nbStates_); 00060 size_t j = VectorTools::whichMax(larray[i]); 00061 ancestors[i] = (int)j; 00062 (*probs_i)[j] = 1.; 00063 } 00064 } 00065 else 00066 { 00067 VVVdouble larray; 00068 00069 likelihood_->computeLikelihoodAtNode(nodeId, larray); 00070 for (size_t i = 0; i < nbDistinctSites_; i++) 00071 { 00072 VVdouble* larray_i = &larray[i]; 00073 Vdouble* probs_i = &probs[i]; 00074 probs_i->resize(nbStates_); 00075 for (size_t c = 0; c < nbClasses_; c++) 00076 { 00077 Vdouble* larray_i_c = &(*larray_i)[c]; 00078 for (size_t x = 0; x < nbStates_; x++) 00079 { 00080 (*probs_i)[x] += (*larray_i_c)[x] * r_[c] / l_[i]; 00081 } 00082 } 00083 if (sample) 00084 { 00085 cumProb = 0; 00086 r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.); 00087 for (size_t j = 0; j < nbStates_; j++) 00088 { 00089 cumProb += (*probs_i)[j]; 00090 if (r <= cumProb) 00091 { 00092 ancestors[i] = (int)j; 00093 break; 00094 } 00095 } 00096 } 00097 else 00098 ancestors[i] = VectorTools::whichMax(*probs_i); 00099 } 00100 } 00101 return ancestors; 00102 } 00103 00104 map<int, vector<size_t> > MarginalAncestralStateReconstruction::getAllAncestralStates() const 00105 { 00106 map<int, vector<size_t> > ancestors; 00107 // Clone the data into a AlignedSequenceContainer for more efficiency: 00108 AlignedSequenceContainer* data = new AlignedSequenceContainer(*likelihood_->getLikelihoodData()->getShrunkData()); 00109 recursiveMarginalAncestralStates(tree_.getRootNode(), ancestors, *data); 00110 delete data; 00111 return ancestors; 00112 } 00113 00114 Sequence* MarginalAncestralStateReconstruction::getAncestralSequenceForNode(int nodeId, VVdouble* probs, bool sample) const 00115 { 00116 string name = tree_.hasNodeName(nodeId) ? tree_.getNodeName(nodeId) : ("" + TextTools::toString(nodeId)); 00117 const vector<size_t>* rootPatternLinks = &likelihood_->getLikelihoodData()->getRootArrayPositions(); 00118 const SubstitutionModel* model = likelihood_->getSubstitutionModel(tree_.getNodesId()[0], 0); // We assume all nodes have a model with the same number of states. 00119 vector<size_t> states; 00120 vector<int> allStates(nbSites_); 00121 VVdouble patternedProbs; 00122 if (probs) 00123 { 00124 states = getAncestralStatesForNode(nodeId, patternedProbs, sample); 00125 probs->resize(nbSites_); 00126 for (size_t i = 0; i < nbSites_; i++) 00127 { 00128 allStates[i] = model->getAlphabetChar(states[(*rootPatternLinks)[i]]); 00129 (*probs)[i] = patternedProbs[(*rootPatternLinks)[i]]; 00130 } 00131 } 00132 else 00133 { 00134 states = getAncestralStatesForNode(nodeId, patternedProbs, sample); 00135 for (size_t i = 0; i < nbSites_; i++) 00136 { 00137 allStates[i] = model->getAlphabetChar(states[(*rootPatternLinks)[i]]); 00138 } 00139 } 00140 return new BasicSequence(name, allStates, alphabet_); 00141 } 00142 00143 void MarginalAncestralStateReconstruction::recursiveMarginalAncestralStates( 00144 const Node* node, 00145 map<int, vector<size_t> >& ancestors, 00146 AlignedSequenceContainer& data) const 00147 { 00148 if (node->isLeaf()) 00149 { 00150 vector<int> content = data.getContent(node->getName()); 00151 vector<size_t>* v = &ancestors[node->getId()]; 00152 v->resize(content.size()); 00153 // This is a tricky way to store the real sequence as an ancestral one... 00154 // In case of Markov Modulated models, we consider that the real sequences 00155 // Are all in the first category. 00156 const SubstitutionModel* model = likelihood_->getSubstitutionModel(tree_.getNodesId()[0], 0); // We assume all nodes have a model with the same number of states. 00157 for (size_t i = 0; i < content.size(); i++) 00158 { 00159 (*v)[i] = model->getModelStates(content[i])[0]; 00160 } 00161 } 00162 else 00163 { 00164 ancestors[node->getId()] = getAncestralStatesForNode(node->getId()); 00165 for (size_t i = 0; i < node->getNumberOfSons(); i++) 00166 { 00167 recursiveMarginalAncestralStates(node->getSon(i), ancestors, data); 00168 } 00169 } 00170 } 00171 00172 AlignedSequenceContainer* MarginalAncestralStateReconstruction::getAncestralSequences(bool sample) const 00173 { 00174 AlignedSequenceContainer* asc = new AlignedSequenceContainer(alphabet_); 00175 vector<int> ids = tree_.getInnerNodesId(); 00176 for (size_t i = 0; i < ids.size(); i++) 00177 { 00178 Sequence* seq = getAncestralSequenceForNode(ids[i], NULL, sample); 00179 asc->addSequence(*seq); 00180 delete seq; 00181 } 00182 return asc; 00183 } 00184