bpp-phyl  2.1.0
Bpp/Phyl/Likelihood/MarginalAncestralStateReconstruction.cpp
Go to the documentation of this file.
00001 //
00002 // File: MarginalAncestralStateReconstruction.cpp
00003 // Created by: Julien Dutheil
00004 // Created on: Fri Jul 08 13:32 2005
00005 //
00006 
00007 /*
00008    Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
00009 
00010    This software is a computer program whose purpose is to provide classes
00011    for phylogenetic data analysis.
00012 
00013    This software is governed by the CeCILL  license under French law and
00014    abiding by the rules of distribution of free software.  You can  use,
00015    modify and/ or redistribute the software under the terms of the CeCILL
00016    license as circulated by CEA, CNRS and INRIA at the following URL
00017    "http://www.cecill.info".
00018 
00019    As a counterpart to the access to the source code and  rights to copy,
00020    modify and redistribute granted by the license, users are provided only
00021    with a limited warranty  and the software's author,  the holder of the
00022    economic rights,  and the successive licensors  have only  limited
00023    liability.
00024 
00025    In this respect, the user's attention is drawn to the risks associated
00026    with loading,  using,  modifying and/or developing or reproducing the
00027    software by the user in light of its specific status of free software,
00028    that may mean  that it is complicated to manipulate,  and  that  also
00029    therefore means  that it is reserved for developers  and  experienced
00030    professionals having in-depth computer knowledge. Users are therefore
00031    encouraged to load and test the software's suitability as regards their
00032    requirements in conditions enabling the security of their systems and/or
00033    data to be ensured and,  more generally, to use and operate it in the
00034    same conditions as regards security.
00035 
00036    The fact that you are presently reading this means that you have had
00037    knowledge of the CeCILL license and that you accept its terms.
00038  */
00039 
00040 #include "MarginalAncestralStateReconstruction.h"
00041 #include <Bpp/Numeric/VectorTools.h>
00042 #include <Bpp/Numeric/Random/RandomTools.h>
00043 
00044 using namespace bpp;
00045 using namespace std;
00046 
00047 vector<size_t> MarginalAncestralStateReconstruction::getAncestralStatesForNode(int nodeId, VVdouble& probs, bool sample) const
00048 {
00049   vector<size_t> ancestors(nbDistinctSites_);
00050   probs.resize(nbDistinctSites_);
00051   double cumProb = 0;
00052   double r;
00053   if (likelihood_->getTree().isLeaf(nodeId))
00054   {
00055     VVdouble larray = likelihood_->getLikelihoodData()->getLeafLikelihoods(nodeId);
00056     for (size_t i = 0; i < nbDistinctSites_; i++)
00057     {
00058       Vdouble* probs_i = &probs[i];
00059       probs_i->resize(nbStates_);
00060       size_t j = VectorTools::whichMax(larray[i]);
00061       ancestors[i] = (int)j;
00062       (*probs_i)[j] = 1.;
00063     }
00064   }
00065   else
00066   {
00067     VVVdouble larray;
00068 
00069     likelihood_->computeLikelihoodAtNode(nodeId, larray);
00070     for (size_t i = 0; i < nbDistinctSites_; i++)
00071     {
00072       VVdouble* larray_i = &larray[i];
00073       Vdouble* probs_i = &probs[i];
00074       probs_i->resize(nbStates_);
00075       for (size_t c = 0; c < nbClasses_; c++)
00076       {
00077         Vdouble* larray_i_c = &(*larray_i)[c];
00078         for (size_t x = 0; x < nbStates_; x++)
00079         {
00080           (*probs_i)[x] += (*larray_i_c)[x] * r_[c] / l_[i];
00081         }
00082       }
00083       if (sample)
00084       {
00085         cumProb = 0;
00086         r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
00087         for (size_t j = 0; j < nbStates_; j++)
00088         {
00089           cumProb += (*probs_i)[j];
00090           if (r <= cumProb)
00091           {
00092             ancestors[i] = (int)j;
00093             break;
00094           }
00095         }
00096       }
00097       else
00098         ancestors[i] = VectorTools::whichMax(*probs_i);
00099     }
00100   }
00101   return ancestors;
00102 }
00103 
00104 map<int, vector<size_t> > MarginalAncestralStateReconstruction::getAllAncestralStates() const
00105 {
00106   map<int, vector<size_t> > ancestors;
00107   // Clone the data into a AlignedSequenceContainer for more efficiency:
00108   AlignedSequenceContainer* data = new AlignedSequenceContainer(*likelihood_->getLikelihoodData()->getShrunkData());
00109   recursiveMarginalAncestralStates(tree_.getRootNode(), ancestors, *data);
00110   delete data;
00111   return ancestors;
00112 }
00113 
00114 Sequence* MarginalAncestralStateReconstruction::getAncestralSequenceForNode(int nodeId, VVdouble* probs, bool sample) const
00115 {
00116   string name = tree_.hasNodeName(nodeId) ? tree_.getNodeName(nodeId) : ("" + TextTools::toString(nodeId));
00117   const vector<size_t>* rootPatternLinks = &likelihood_->getLikelihoodData()->getRootArrayPositions();
00118   const SubstitutionModel* model = likelihood_->getSubstitutionModel(tree_.getNodesId()[0], 0); // We assume all nodes have a model with the same number of states.
00119   vector<size_t> states;
00120   vector<int> allStates(nbSites_);
00121   VVdouble patternedProbs;
00122   if (probs)
00123   {
00124     states = getAncestralStatesForNode(nodeId, patternedProbs, sample);
00125     probs->resize(nbSites_);
00126     for (size_t i = 0; i < nbSites_; i++)
00127     {
00128       allStates[i] = model->getAlphabetChar(states[(*rootPatternLinks)[i]]);
00129       (*probs)[i] = patternedProbs[(*rootPatternLinks)[i]];
00130     }
00131   }
00132   else
00133   {
00134     states = getAncestralStatesForNode(nodeId, patternedProbs, sample);
00135     for (size_t i = 0; i < nbSites_; i++)
00136     {
00137       allStates[i] = model->getAlphabetChar(states[(*rootPatternLinks)[i]]);
00138     }
00139   }
00140   return new BasicSequence(name, allStates, alphabet_);
00141 }
00142 
00143 void MarginalAncestralStateReconstruction::recursiveMarginalAncestralStates(
00144   const Node* node,
00145   map<int, vector<size_t> >& ancestors,
00146   AlignedSequenceContainer& data) const
00147 {
00148   if (node->isLeaf())
00149   {
00150     vector<int> content = data.getContent(node->getName());
00151     vector<size_t>* v = &ancestors[node->getId()];
00152     v->resize(content.size());
00153     // This is a tricky way to store the real sequence as an ancestral one...
00154     // In case of Markov Modulated models, we consider that the real sequences
00155     // Are all in the first category.
00156     const SubstitutionModel* model = likelihood_->getSubstitutionModel(tree_.getNodesId()[0], 0); // We assume all nodes have a model with the same number of states.
00157     for (size_t i = 0; i < content.size(); i++)
00158     {
00159       (*v)[i] = model->getModelStates(content[i])[0];
00160     }
00161   }
00162   else
00163   {
00164     ancestors[node->getId()] = getAncestralStatesForNode(node->getId());
00165     for (size_t i = 0; i < node->getNumberOfSons(); i++)
00166     {
00167       recursiveMarginalAncestralStates(node->getSon(i), ancestors, data);
00168     }
00169   }
00170 }
00171 
00172 AlignedSequenceContainer* MarginalAncestralStateReconstruction::getAncestralSequences(bool sample) const
00173 {
00174   AlignedSequenceContainer* asc = new AlignedSequenceContainer(alphabet_);
00175   vector<int> ids = tree_.getInnerNodesId();
00176   for (size_t i = 0; i < ids.size(); i++)
00177   {
00178     Sequence* seq = getAncestralSequenceForNode(ids[i], NULL, sample);
00179     asc->addSequence(*seq);
00180     delete seq;
00181   }
00182   return asc;
00183 }
00184 
 All Classes Namespaces Files Functions Variables Typedefs Friends