bpp-phyl  2.1.0
Bpp/Phyl/TreeTemplateTools.cpp
Go to the documentation of this file.
00001 //
00002 // File: TreeTemplateTools.cpp
00003 // Created by: Julien Dutheil
00004 // Created on: Fri Oct  13 13:00 2006
00005 // From file TreeTools.cpp
00006 // Created on: Wed Aug  6 13:45:28 2003
00007 //
00008 
00009 /*
00010    Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
00011 
00012    This software is a computer program whose purpose is to provide classes
00013    for phylogenetic data analysis.
00014 
00015    This software is governed by the CeCILL  license under French law and
00016    abiding by the rules of distribution of free software.  You can  use,
00017    modify and/ or redistribute the software under the terms of the CeCILL
00018    license as circulated by CEA, CNRS and INRIA at the following URL
00019    "http://www.cecill.info".
00020 
00021    As a counterpart to the access to the source code and  rights to copy,
00022    modify and redistribute granted by the license, users are provided only
00023    with a limited warranty  and the software's author,  the holder of the
00024    economic rights,  and the successive licensors  have only  limited
00025    liability.
00026 
00027    In this respect, the user's attention is drawn to the risks associated
00028    with loading,  using,  modifying and/or developing or reproducing the
00029    software by the user in light of its specific status of free software,
00030    that may mean  that it is complicated to manipulate,  and  that  also
00031    therefore means  that it is reserved for developers  and  experienced
00032    professionals having in-depth computer knowledge. Users are therefore
00033    encouraged to load and test the software's suitability as regards their
00034    requirements in conditions enabling the security of their systems and/or
00035    data to be ensured and,  more generally, to use and operate it in the
00036    same conditions as regards security.
00037 
00038    The fact that you are presently reading this means that you have had
00039    knowledge of the CeCILL license and that you accept its terms.
00040  */
00041 
00042 #include "TreeTemplateTools.h"
00043 #include "TreeTemplate.h"
00044 
00045 #include <Bpp/Numeric/Number.h>
00046 #include <Bpp/BppString.h>
00047 #include <Bpp/Text/StringTokenizer.h>
00048 #include <Bpp/Text/NestedStringTokenizer.h>
00049 #include <Bpp/Text/TextTools.h>
00050 #include <Bpp/Numeric/Random/RandomTools.h>
00051 
00052 using namespace bpp;
00053 
00054 // From the STL:
00055 #include <iostream>
00056 #include <sstream>
00057 #include <limits>
00058 
00059 using namespace std;
00060 
00061 /******************************************************************************/
00062 
00063 bool TreeTemplateTools::isMultifurcating(const Node& node)
00064 {
00065   if (node.getNumberOfSons() > 2)
00066     return true;
00067   else
00068   {
00069     bool b = false;
00070     for (size_t i = 0; i < node.getNumberOfSons(); i++)
00071     {
00072       b = b || isMultifurcating(*node.getSon(i));
00073     }
00074     return b;
00075   }
00076 }
00077 
00078 /******************************************************************************/
00079 
00080 unsigned int TreeTemplateTools::getNumberOfLeaves(const Node& node)
00081 {
00082   unsigned int nbLeaves = 0;
00083   if (node.isLeaf())
00084   {
00085     nbLeaves++;
00086   }
00087   for (int i = 0; i < static_cast<int>(node.getNumberOfSons()); i++)
00088   {
00089     nbLeaves += getNumberOfLeaves(*node[i]);
00090   }
00091   return nbLeaves;
00092 }
00093 
00094 /******************************************************************************/
00095 
00096 unsigned int TreeTemplateTools::getNumberOfNodes(const Node& node)
00097 {
00098   unsigned int nbNodes = 1;
00099   for (int i = 0; i < static_cast<int>(node.getNumberOfSons()); i++)
00100   {
00101     nbNodes += getNumberOfNodes(*node[i]);
00102   }
00103   return nbNodes;
00104 }
00105 
00106 /******************************************************************************/
00107 
00108 vector<string> TreeTemplateTools::getLeavesNames(const Node& node)
00109 {
00110   vector<string> names;
00111   if (node.isLeaf())
00112   {
00113     names.push_back(node.getName());
00114   }
00115   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00116   {
00117     vector<string> subNames = getLeavesNames(*node.getSon(i));
00118     for (size_t j = 0; j < subNames.size(); j++)
00119     {
00120       names.push_back(subNames[j]);
00121     }
00122   }
00123   return names;
00124 }
00125 
00126 /******************************************************************************/
00127 
00128 unsigned int TreeTemplateTools::getDepth(const Node& node)
00129 {
00130   unsigned int d = 0;
00131   for (int i = 0; i < static_cast<int>(node.getNumberOfSons()); i++)
00132   {
00133     unsigned int c = getDepth(*node[i]) + 1;
00134     if (c > d)
00135       d = c;
00136   }
00137   return d;
00138 }
00139 
00140 /******************************************************************************/
00141 
00142 unsigned int TreeTemplateTools::getDepths(const Node& node, map<const Node*, unsigned int>& depths)
00143 {
00144   unsigned int d = 0;
00145   for (int i = 0; i < static_cast<int>(node.getNumberOfSons()); i++)
00146   {
00147     unsigned int c = getDepths(*node[i], depths) + 1;
00148     if (c > d)
00149       d = c;
00150   }
00151   depths[&node] = d;
00152   return d;
00153 }
00154 
00155 /******************************************************************************/
00156 
00157 double TreeTemplateTools::getHeight(const Node& node)
00158 {
00159   double d = 0;
00160   for (int i = 0; i < static_cast<int>(node.getNumberOfSons()); i++)
00161   {
00162     const Node* son = node[i];
00163     double dist = son->getDistanceToFather();
00164     double c = getHeight(*son) + dist;
00165     if (c > d)
00166       d = c;
00167   }
00168   return d;
00169 }
00170 
00171 /******************************************************************************/
00172 
00173 double TreeTemplateTools::getHeights(const Node& node, map<const Node*, double>& heights)
00174 {
00175   double d = 0;
00176   for (int i = 0; i < static_cast<int>(node.getNumberOfSons()); i++)
00177   {
00178     const Node* son = node[i];
00179     double dist = son->getDistanceToFather();
00180     double c = getHeights(*son, heights) + dist;
00181     if (c > d)
00182       d = c;
00183   }
00184   heights[&node] = d;
00185   return d;
00186 }
00187 
00188 /******************************************************************************/
00189 
00190 TreeTemplateTools::Element TreeTemplateTools::getElement(const string& elt) throw (IOException)
00191 {
00192   Element element;
00193   element.length    = ""; // default
00194   element.bootstrap = ""; // default
00195   element.isLeaf    = false; // default
00196 
00197   size_t colonIndex;
00198   bool hasColon = false;
00199   for (colonIndex = elt.size(); colonIndex > 0 && elt[colonIndex] != ')'; colonIndex--)
00200   {
00201     if (elt[colonIndex] == ':')
00202     {
00203       hasColon = true;
00204       break;
00205     }
00206   }
00207   try
00208   {
00209     string elt2;
00210     if (hasColon)
00211     {
00212       // this is an element with length:
00213       elt2 = elt.substr(0, colonIndex);
00214       element.length = TextTools::removeSurroundingWhiteSpaces(elt.substr(colonIndex + 1));
00215     }
00216     else
00217     {
00218       // this is an element without length;
00219       elt2 = elt;
00220     }
00221 
00222     string::size_type lastP = elt2.rfind(')');
00223     string::size_type firstP = elt2.find('(');
00224     if (firstP == string::npos)
00225     {
00226       // This is a leaf:
00227       element.content = elt2;
00228       element.isLeaf = true;
00229     }
00230     else
00231     {
00232       // This is a node:
00233       if (lastP < firstP)
00234         throw IOException("TreeTemplateTools::getElement(). Invalid format: bad closing parenthesis in " + elt2);
00235       element.content = TextTools::removeSurroundingWhiteSpaces(elt2.substr(firstP + 1, lastP - firstP - 1));
00236       string bootstrap = TextTools::removeSurroundingWhiteSpaces(elt2.substr(lastP + 1));
00237       // cout << "ELEMENT: BOOTSTRAP: " << bootstrap << endl;
00238       if (!TextTools::isEmpty(bootstrap))
00239       {
00240         element.bootstrap = bootstrap;
00241       }
00242     }
00243   }
00244   catch (exception e)
00245   {
00246     throw IOException("Bad tree description: " + elt);
00247   }
00248   return element;
00249 }
00250 
00251 /******************************************************************************/
00252 
00253 
00254 Node* TreeTemplateTools::parenthesisToNode(const string& description, bool bootstrap, const string& propertyName, bool withId)
00255 {
00256   //cout << "NODE: " << description << endl;
00257   Element elt = getElement(description);
00258 
00259   // New node:
00260   Node* node = new Node();
00261   if (!TextTools::isEmpty(elt.length))
00262   {
00263     node->setDistanceToFather(TextTools::toDouble(elt.length));
00264     // cout << "NODE: LENGTH: " << * elt.length << endl;
00265   }
00266   if (!TextTools::isEmpty(elt.bootstrap))
00267   {
00268     if (withId)
00269     {
00270       node->setId(TextTools::toInt(elt.bootstrap));
00271     }
00272     else
00273     {
00274       if (bootstrap)
00275       {
00276         node->setBranchProperty(TreeTools::BOOTSTRAP, Number<double>(TextTools::toDouble(elt.bootstrap)));
00277         // cout << "NODE: BOOTSTRAP: " << * elt.bootstrap << endl;
00278       }
00279       else
00280       {
00281         node->setBranchProperty(propertyName, BppString(elt.bootstrap));
00282       }
00283     }
00284   }
00285 
00286   NestedStringTokenizer nt(elt.content, "(", ")", ",");
00287   vector<string> elements;
00288   while (nt.hasMoreToken())
00289   {
00290     elements.push_back(nt.nextToken());
00291   }
00292 
00293   if (elt.isLeaf)
00294   {
00295     //This is a leaf:
00296     string name = TextTools::removeSurroundingWhiteSpaces(elements[0]);
00297     if (withId)
00298     {
00299       StringTokenizer st(name, "_", true, true);
00300       ostringstream realName;
00301       for (int i = 0; i < static_cast<int>(st.numberOfRemainingTokens()) - 1; i++)
00302       {
00303         if (i != 0)
00304         {
00305           realName << "_";
00306         }
00307         realName << st.getToken(i);
00308       }
00309       node->setName(realName.str());
00310       node->setId(TextTools::toInt(st.getToken(st.numberOfRemainingTokens() - 1)));
00311     }
00312     else
00313     {
00314       node->setName(name);
00315     }
00316   }
00317   else
00318   {
00319     // This is a node:
00320     for (size_t i = 0; i < elements.size(); i++)
00321     {
00322       // cout << "NODE: SUBNODE: " << i << ", " << elements[i] << endl;
00323       Node* son = parenthesisToNode(elements[i], bootstrap, propertyName, withId);
00324       node->addSon(son);
00325     }
00326   }
00327   return node;
00328 }
00329 
00330 /******************************************************************************/
00331 
00332 TreeTemplate<Node>* TreeTemplateTools::parenthesisToTree(const string& description, bool bootstrap, const string& propertyName, bool withId) throw (Exception)
00333 {
00334   string::size_type semi = description.rfind(';');
00335   if (semi == string::npos)
00336     throw Exception("TreeTemplateTools::parenthesisToTree(). Bad format: no semi-colon found.");
00337   string content = description.substr(0, semi);
00338   Node* node = parenthesisToNode(content, bootstrap, propertyName, withId);
00339   TreeTemplate<Node>* tree = new TreeTemplate<Node>();
00340   tree->setRootNode(node);
00341   if (!withId)
00342   {
00343     tree->resetNodesId();
00344   }
00345   return tree;
00346 }
00347 
00348 /******************************************************************************/
00349 
00350 string TreeTemplateTools::nodeToParenthesis(const Node& node, bool writeId)
00351 {
00352   ostringstream s;
00353   if (node.isLeaf())
00354   {
00355     s << node.getName();
00356   }
00357   else
00358   {
00359     s << "(";
00360     s << nodeToParenthesis(*node[0], writeId);
00361     for (int i = 1; i < static_cast<int>(node.getNumberOfSons()); i++)
00362     {
00363       s << "," << nodeToParenthesis(*node[i], writeId);
00364     }
00365     s << ")";
00366   }
00367   if (writeId)
00368   {
00369     if (node.isLeaf())
00370       s << "_";
00371     s << node.getId();
00372   }
00373   else
00374   {
00375     if (node.hasBranchProperty(TreeTools::BOOTSTRAP))
00376       s << (dynamic_cast<const Number<double>*>(node.getBranchProperty(TreeTools::BOOTSTRAP))->getValue());
00377   }
00378   if (node.hasDistanceToFather())
00379     s << ":" << node.getDistanceToFather();
00380   return s.str();
00381 }
00382 
00383 /******************************************************************************/
00384 
00385 string TreeTemplateTools::nodeToParenthesis(const Node& node, bool bootstrap, const string& propertyName)
00386 {
00387   ostringstream s;
00388   if (node.isLeaf())
00389   {
00390     s << node.getName();
00391   }
00392   else
00393   {
00394     s << "(";
00395     s << nodeToParenthesis(*node[0], bootstrap, propertyName);
00396     for (int i = 1; i < static_cast<int>(node.getNumberOfSons()); i++)
00397     {
00398       s << "," << nodeToParenthesis(*node[i], bootstrap, propertyName);
00399     }
00400     s << ")";
00401 
00402     if (bootstrap)
00403     {
00404       if (node.hasBranchProperty(TreeTools::BOOTSTRAP))
00405         s << (dynamic_cast<const Number<double>*>(node.getBranchProperty(TreeTools::BOOTSTRAP))->getValue());
00406     }
00407     else
00408     {
00409       if (node.hasBranchProperty(propertyName))
00410       {
00411         const BppString* ppt = dynamic_cast<const BppString*>(node.getBranchProperty(propertyName));
00412         if (ppt) 
00413           s << *ppt;
00414         else
00415           throw Exception("TreeTemplateTools::nodeToParenthesis. Property should be a BppString.");
00416       }
00417     }
00418   }
00419   if (node.hasDistanceToFather())
00420     s << ":" << node.getDistanceToFather();
00421   return s.str();
00422 }
00423 
00424 /******************************************************************************/
00425 
00426 string TreeTemplateTools::treeToParenthesis(const TreeTemplate<Node>& tree, bool writeId)
00427 {
00428   ostringstream s;
00429   s << "(";
00430   const Node* node = tree.getRootNode();
00431   if (node->isLeaf() && node->hasName()) //In case we have a tree like ((A:1.0)); where the root node is an unamed leaf!
00432   {
00433     s << node->getName();
00434     for (size_t i = 0; i < node->getNumberOfSons(); ++i)
00435     {
00436       s << "," << nodeToParenthesis(*node->getSon(i), writeId);
00437     }
00438   }
00439   else
00440   {
00441     s << nodeToParenthesis(*node->getSon(0), writeId);
00442     for (size_t i = 1; i < node->getNumberOfSons(); ++i)
00443     {
00444       s << "," << nodeToParenthesis(*node->getSon(i), writeId);
00445     }
00446   }
00447   s << ")";
00448   if (node->hasDistanceToFather())
00449     s << ":" << node->getDistanceToFather();
00450   s << ";" << endl;
00451   return s.str();
00452 }
00453 
00454 /******************************************************************************/
00455 
00456 string TreeTemplateTools::treeToParenthesis(const TreeTemplate<Node>& tree, bool bootstrap, const string& propertyName)
00457 {
00458   ostringstream s;
00459   s << "(";
00460   const Node* node = tree.getRootNode();
00461   if (node->isLeaf())
00462   {
00463     s << node->getName();
00464     for (size_t i = 0; i < node->getNumberOfSons(); i++)
00465     {
00466       s << "," << nodeToParenthesis(*node->getSon(i), bootstrap, propertyName);
00467     }
00468   }
00469   else
00470   {
00471     s << nodeToParenthesis(*node->getSon(0), bootstrap, propertyName);
00472     for (size_t i = 1; i < node->getNumberOfSons(); i++)
00473     {
00474       s << "," << nodeToParenthesis(*node->getSon(i), bootstrap, propertyName);
00475     }
00476   }
00477   s << ")";
00478   if (bootstrap)
00479   {
00480     if (node->hasBranchProperty(TreeTools::BOOTSTRAP))
00481       s << (dynamic_cast<const Number<double>*>(node->getBranchProperty(TreeTools::BOOTSTRAP))->getValue());
00482   }
00483   else
00484   {
00485     if (node->hasBranchProperty(propertyName)) {
00486       const BppString* ppt =dynamic_cast<const BppString*>(node->getBranchProperty(propertyName));
00487       if (ppt)
00488         s << *ppt;
00489       else
00490         throw Exception("TreeTemplateTools::nodeToParenthesis. Property should be a BppString.");
00491     }
00492   }
00493   s << ";" << endl;
00494   return s.str();
00495 }
00496 
00497 /******************************************************************************/
00498 
00499 Vdouble TreeTemplateTools::getBranchLengths(const Node& node) throw (NodePException)
00500 {
00501   Vdouble brLen(1);
00502   brLen[0] = node.getDistanceToFather();
00503   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00504   {
00505     Vdouble sonBrLen = getBranchLengths(*node.getSon(i));
00506     for (size_t j = 0; j < sonBrLen.size(); j++)
00507     {
00508       brLen.push_back(sonBrLen[j]);
00509     }
00510   }
00511   return brLen;
00512 }
00513 
00514 /******************************************************************************/
00515 
00516 double TreeTemplateTools::getTotalLength(const Node& node, bool includeAncestor) throw (NodePException)
00517 {
00518   if (includeAncestor && !node.hasDistanceToFather())
00519     throw NodePException("TreeTools::getTotalLength(). No branch length.", &node);
00520   double length = includeAncestor ? node.getDistanceToFather() : 0;
00521   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00522   {
00523     length += getTotalLength(*node.getSon(i), true);
00524   }
00525   return length;
00526 }
00527 
00528 /******************************************************************************/
00529 
00530 void TreeTemplateTools::setBranchLengths(Node& node, double brLen)
00531 {
00532   node.setDistanceToFather(brLen);
00533   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00534   {
00535     setBranchLengths(*node.getSon(i), brLen);
00536   }
00537 }
00538 
00539 /******************************************************************************/
00540 
00541 void TreeTemplateTools::deleteBranchLengths(Node& node)
00542 {
00543   node.deleteDistanceToFather();
00544   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00545   {
00546     deleteBranchLengths(*node.getSon(i));
00547   }
00548 }
00549 
00550 /******************************************************************************/
00551 
00552 void TreeTemplateTools::setVoidBranchLengths(Node& node, double brLen)
00553 {
00554   if (!node.hasDistanceToFather())
00555     node.setDistanceToFather(brLen);
00556   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00557   {
00558     setVoidBranchLengths(*node.getSon(i), brLen);
00559   }
00560 }
00561 
00562 /******************************************************************************/
00563 
00564 void TreeTemplateTools::scaleTree(Node& node, double factor) throw (NodePException)
00565 {
00566   if (node.hasFather())
00567   {
00568     node.setDistanceToFather(node.getDistanceToFather() * factor);
00569   }
00570   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00571   {
00572     scaleTree(*node.getSon(i), factor);
00573   }
00574 }
00575 
00576 /******************************************************************************/
00577 
00578 TreeTemplate<Node>* TreeTemplateTools::getRandomTree(vector<string>& leavesNames, bool rooted)
00579 {
00580   if (leavesNames.size() == 0)
00581     return 0;  // No taxa.
00582   // This vector will contain all nodes.
00583   // Start with all leaves, and then group nodes randomly 2 by 2.
00584   // Att the end, contains only the root node of the tree.
00585   vector<Node*> nodes(leavesNames.size());
00586   // Create all leaves nodes:
00587   for (size_t i = 0; i < leavesNames.size(); ++i)
00588   {
00589     nodes[i] = new Node(leavesNames[i]);
00590   }
00591   // Now group all nodes:
00592   while (nodes.size() > (rooted ? 2 : 3))
00593   {
00594     // Select random nodes:
00595     int pos1 = RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(nodes.size()));
00596     Node* node1 = nodes[pos1];
00597     nodes.erase(nodes.begin() + pos1);
00598     int pos2 = RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(nodes.size()));
00599     Node* node2 = nodes[pos2];
00600     nodes.erase(nodes.begin() + pos2);
00601     // Add new node:
00602     Node* parent = new Node();
00603     parent->addSon(node1);
00604     parent->addSon(node2);
00605     nodes.push_back(parent);
00606   }
00607   // Return tree with last node as root node:
00608   Node* root = new Node();
00609   for (size_t i = 0; i < nodes.size(); ++i)
00610   {
00611     root->addSon(nodes[i]);
00612   }
00613   TreeTemplate<Node>* tree = new TreeTemplate<Node>(root);
00614   tree->resetNodesId();
00615   return tree;
00616 }
00617 
00618 /******************************************************************************/
00619 
00620 vector<Node*> TreeTemplateTools::getPathBetweenAnyTwoNodes(Node& node1, Node& node2, bool includeAncestor)
00621 {
00622   vector<Node*> path;
00623   vector<Node*> pathMatrix1;
00624   vector<Node*> pathMatrix2;
00625 
00626   Node* nodeUp = &node1;
00627   while (nodeUp->hasFather())   // while(nodeUp != root)
00628   {
00629     pathMatrix1.push_back(nodeUp);
00630     nodeUp = nodeUp->getFather();
00631   }
00632   pathMatrix1.push_back(nodeUp); // The root.
00633 
00634   nodeUp = &node2;
00635   while (nodeUp->hasFather())
00636   {
00637     pathMatrix2.push_back(nodeUp);
00638     nodeUp = nodeUp->getFather();
00639   }
00640   pathMatrix2.push_back(nodeUp); // The root.
00641   // Must check that the two nodes have the same root!!!
00642 
00643   size_t tmp1 = pathMatrix1.size() - 1;
00644   size_t tmp2 = pathMatrix2.size() - 1;
00645 
00646   while (pathMatrix1[tmp1] == pathMatrix2[tmp2]
00647          and tmp1 > 0
00648          and tmp2 > 0)
00649   {
00650     tmp1--; tmp2--;
00651   }
00652 
00653   for (size_t y = 0; y < tmp1; ++y)
00654   {
00655     path.push_back(pathMatrix1[y]);
00656   }
00657   if (includeAncestor)
00658     path.push_back(pathMatrix1[tmp1]);  // pushing once, the Node that was common to both.
00659   for (size_t j = tmp2; j > 0; --j)
00660   {
00661     path.push_back(pathMatrix2[j - 1]);
00662   }
00663   return path;
00664 }
00665 
00666 /******************************************************************************/
00667 
00668 vector<const Node*> TreeTemplateTools::getPathBetweenAnyTwoNodes(const Node& node1, const Node& node2, bool includeAncestor)
00669 {
00670   vector<const Node*> path;
00671   vector<const Node*> pathMatrix1;
00672   vector<const Node*> pathMatrix2;
00673 
00674   const Node* nodeUp = &node1;
00675   while (nodeUp->hasFather())   // while(nodeUp != root)
00676   {
00677     pathMatrix1.push_back(nodeUp);
00678     nodeUp = nodeUp->getFather();
00679   }
00680   pathMatrix1.push_back(nodeUp); // The root.
00681 
00682   nodeUp = &node2;
00683   while (nodeUp->hasFather())
00684   {
00685     pathMatrix2.push_back(nodeUp);
00686     nodeUp = nodeUp->getFather();
00687   }
00688   pathMatrix2.push_back(nodeUp); // The root.
00689   // Must check that the two nodes have the same root!!!
00690 
00691   size_t tmp1 = pathMatrix1.size() - 1;
00692   size_t tmp2 = pathMatrix2.size() - 1;
00693 
00694   while (pathMatrix1[tmp1] == pathMatrix2[tmp2]
00695          and tmp1 > 0
00696          and tmp2 > 0)
00697   {
00698     tmp1--; tmp2--;
00699   }
00700 
00701   for (size_t y = 0; y < tmp1; ++y)
00702   {
00703     path.push_back(pathMatrix1[y]);
00704   }
00705   if (includeAncestor)
00706     path.push_back(pathMatrix1[tmp1]);  // pushing once, the Node that was common to both.
00707   for (size_t j = tmp2; j > 0; --j)
00708   {
00709     path.push_back(pathMatrix2[j - 1]);
00710   }
00711   return path;
00712 }
00713 
00714 /******************************************************************************/
00715 
00716 double TreeTemplateTools::getDistanceBetweenAnyTwoNodes(const Node& node1, const Node& node2)
00717 {
00718   vector<const Node*> path = getPathBetweenAnyTwoNodes(node1, node2, false);
00719   double d = 0;
00720   for (size_t i = 0; i < path.size(); i++)
00721   {
00722     d += path[i]->getDistanceToFather();
00723   }
00724   return d;
00725 }
00726 
00727 /******************************************************************************/
00728 
00729 void TreeTemplateTools::processDistsInSubtree_(const Node* node, DistanceMatrix& matrix, vector< std::pair<string, double> >& distsToNodeFather)
00730 {
00731   distsToNodeFather.clear();
00732 
00733   // node-is-leaf case
00734   if (node->getNumberOfSons() == 0)
00735   {
00736     distsToNodeFather.push_back(make_pair(node->getName(), node->getDistanceToFather()));
00737     return;
00738   }
00739 
00740   // For all leaves in node's subtree, get leaf-to-node distances.
00741   // Leaves are classified upon node's sons.
00742   map<const Node*, vector< pair<string, double> > > leavesDists;
00743   for (size_t i = 0; i < node->getNumberOfSons(); ++i)
00744   {
00745     const Node* son = node->getSon(i);
00746     processDistsInSubtree_(son, matrix, leavesDists[son]); // recursivity
00747   }
00748   // Write leaf-leaf distances to the distance matrix.
00749   // Only pairs in which the two leaves belong to different
00750   // sons are considered.
00751   for (size_t son1_loc = 0; son1_loc < node->getNumberOfSons(); ++son1_loc)
00752   {
00753     for (size_t son2_loc = 0; son2_loc < son1_loc; ++son2_loc)
00754     {
00755       const Node* son1 = node->getSon(son1_loc);
00756       const Node* son2 = node->getSon(son2_loc);
00757 
00758       for (vector< pair<string, double> >::iterator son1_leaf = leavesDists[son1].begin();
00759            son1_leaf != leavesDists[son1].end();
00760            ++son1_leaf)
00761       {
00762         for (vector< pair<string, double> >::iterator son2_leaf = leavesDists[son2].begin();
00763              son2_leaf != leavesDists[son2].end();
00764              ++son2_leaf)
00765         {
00766           matrix(son1_leaf->first, son2_leaf->first) =
00767             matrix(son2_leaf->first, son1_leaf->first) =
00768               ( son1_leaf->second + son2_leaf->second );
00769         }
00770       }
00771     }
00772   }
00773 
00774   // node-is-root case
00775   if (!node->hasFather())
00776   {
00777     // node-is-root-and-leaf case
00778     if (node->isLeaf() )
00779     {
00780       string root_name = node->getName();
00781       for (vector< pair<string, double> >::iterator other_leaf = leavesDists[node->getSon(0)].begin();
00782            other_leaf != leavesDists[node->getSon(0)].end();
00783            ++other_leaf)
00784       {
00785         matrix(root_name, other_leaf->first) = matrix( other_leaf->first, root_name) = other_leaf->second;
00786       }
00787     }
00788 
00789     return;
00790   }
00791 
00792   // Get distances from node's father to considered leaves
00793   distsToNodeFather.clear();
00794   double nodeToFather = node->getDistanceToFather();
00795   for (map<const Node*, vector<pair<string, double> > >::iterator son = leavesDists.begin(); son != leavesDists.end(); ++son)
00796   {
00797     for (vector< pair<string, double> >::iterator leaf = (son->second).begin(); leaf != (son->second).end(); ++leaf)
00798     {
00799       distsToNodeFather.push_back(make_pair(leaf->first, (leaf->second + nodeToFather)));
00800     }
00801   }
00802 }
00803 
00804 DistanceMatrix* TreeTemplateTools::getDistanceMatrix(const TreeTemplate<Node>& tree)
00805 {
00806   DistanceMatrix* matrix = new DistanceMatrix(tree.getLeavesNames());
00807   vector< pair<string, double> > distsToRoot;
00808   processDistsInSubtree_(tree.getRootNode(), *matrix, distsToRoot);
00809   return matrix;
00810 }
00811 
00812 /******************************************************************************/
00813 
00814 std::vector<const Node*> TreeTemplateTools::getRemainingNeighbors(const Node* node1, const Node* node2, const Node* node3)
00815 {
00816   vector<const Node*> neighbors = node1->getNeighbors();
00817   vector<const Node*> neighbors2;
00818   for (size_t k = 0; k < neighbors.size(); k++)
00819   {
00820     const Node* n = neighbors[k];
00821     if (n != node2 && n != node3)
00822       neighbors2.push_back(n);
00823   }
00824   return neighbors2;
00825 }
00826 
00827 /******************************************************************************/
00828 
00829 void TreeTemplateTools::incrementAllIds(Node* node, int increment)
00830 {
00831   node->setId(node->getId() + increment);
00832   for (size_t i = 0; i < node->getNumberOfSons(); i++)
00833   {
00834     incrementAllIds(node->getSon(i), increment);
00835   }
00836 }
00837 
00838 /******************************************************************************/
00839 
00840 void TreeTemplateTools::getNodePropertyNames(const Node& node, vector<string>& propertyNames)
00841 {
00842   VectorTools::extend(propertyNames, node.getNodePropertyNames());
00843   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00844   {
00845     getNodePropertyNames(*node.getSon(i), propertyNames);
00846   }
00847 }
00848 
00849 void TreeTemplateTools::getNodeProperties(const Node& node, const string& propertyName, map<int, const Clonable*>& properties)
00850 {
00851   if (node.hasNodeProperty(propertyName))
00852     properties[node.getId()] = node.getNodeProperty(propertyName);
00853   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00854   {
00855     getNodeProperties(*node.getSon(i), propertyName, properties);
00856   }
00857 }
00858 
00859 void TreeTemplateTools::getNodeProperties(Node& node, const string& propertyName, map<int, Clonable*>& properties)
00860 {
00861   if (node.hasNodeProperty(propertyName))
00862     properties[node.getId()] = node.getNodeProperty(propertyName);
00863   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00864   {
00865     getNodeProperties(*node.getSon(i), propertyName, properties);
00866   }
00867 }
00868 
00869 /******************************************************************************/
00870 
00871 void TreeTemplateTools::getBranchPropertyNames(const Node& node, vector<string>& propertyNames)
00872 {
00873   VectorTools::extend(propertyNames, node.getBranchPropertyNames());
00874   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00875   {
00876     getBranchPropertyNames(*node.getSon(i), propertyNames);
00877   }
00878 }
00879 
00880 void TreeTemplateTools::getBranchProperties(const Node& node, const string& propertyName, map<int, const Clonable*>& properties)
00881 {
00882   if (node.hasBranchProperty(propertyName))
00883     properties[node.getId()] = node.getBranchProperty(propertyName);
00884   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00885   {
00886     getBranchProperties(*node.getSon(i), propertyName, properties);
00887   }
00888 }
00889 
00890 void TreeTemplateTools::getBranchProperties(Node& node, const string& propertyName, map<int, Clonable*>& properties)
00891 {
00892   if (node.hasBranchProperty(propertyName))
00893     properties[node.getId()] = node.getBranchProperty(propertyName);
00894   for (size_t i = 0; i < node.getNumberOfSons(); i++)
00895   {
00896     getBranchProperties(*node.getSon(i), propertyName, properties);
00897   }
00898 }
00899 
00900 /******************************************************************************/
00901 
00902 bool TreeTemplateTools::haveSameOrderedTopology(const Node& n1, const Node& n2)
00903 {
00904   if (n1.isLeaf() && n2.isLeaf() && n1.getName() != n2.getName())
00905     return false;
00906   size_t nl1 = n1.getNumberOfSons();
00907   size_t nl2 = n2.getNumberOfSons();
00908   if (nl1 != nl2)
00909     return false;
00910 
00911   bool test = true;
00912   for (size_t i = 0; test && i < n1.getNumberOfSons(); ++i)
00913   {
00914     test &= haveSameOrderedTopology(*n1.getSon(i), *n2.getSon(i));
00915   }
00916   return test;
00917 }
00918 
00919 /******************************************************************************/
00920 
00921 TreeTemplateTools::OrderTreeData_ TreeTemplateTools::orderTree_(Node& node, bool downward, bool orderLeaves)
00922 {
00923   OrderTreeData_ otd;
00924 
00925   if (node.isLeaf() && node.hasFather())
00926   {
00927     otd.size = 1;
00928     otd.firstLeaf = node.getName();
00929   }
00930   else
00931   {
00932     vector<size_t> nbSons;
00933     vector<string> firstLeaves;
00934     for (size_t i = 0; i < node.getNumberOfSons(); i++)
00935     {
00936       OrderTreeData_ otdsub = orderTree_(*node.getSon(i), downward, orderLeaves);
00937       if (i == 0)
00938         otd.firstLeaf = otdsub.firstLeaf;
00939       else if (orderLeaves && otdsub.firstLeaf < otd.firstLeaf)
00940         otd.firstLeaf = otdsub.firstLeaf;
00941       nbSons.push_back(otdsub.size);
00942       firstLeaves.push_back(otdsub.firstLeaf);
00943     }
00944     otd.size = VectorTools::sum(nbSons);
00945 
00946     // Now swap nodes:
00947     if (downward)
00948     {
00949       for (size_t i = 0; i < nbSons.size() - 1; ++i)
00950       {
00951         size_t pos;
00952         vector<size_t> index = VectorTools::whichMaxAll(nbSons);
00953         if (index.size() == 1 || !orderLeaves)
00954         {
00955           pos = index[0];
00956         }
00957         else
00958         {
00959           // There are ties to solve:
00960           vector<string> v;
00961           for (size_t j = 0; j < index.size(); ++j)
00962           {
00963             v.push_back(firstLeaves[index[j]]);
00964           }
00965           size_t mx = VectorTools::whichMax(v);
00966           pos = index[mx];
00967         }
00968         if (pos != i)
00969         {
00970           node.swap(i, pos);
00971           nbSons[pos] = nbSons[i];
00972         }
00973         nbSons[i] = 0;
00974       }
00975     }
00976     else
00977     {
00978       for (size_t i = 0; i < nbSons.size() - 1; ++i)
00979       {
00980         size_t pos;
00981         vector<size_t> index = VectorTools::whichMinAll(nbSons);
00982         if (index.size() == 1 || !orderLeaves)
00983         {
00984           pos = index[0];
00985         }
00986         else
00987         {
00988           // There are ties to solve:
00989           vector<string> v;
00990           for (size_t j = 0; j < index.size(); ++j)
00991           {
00992             v.push_back(firstLeaves[index[j]]);
00993           }
00994           size_t mx = VectorTools::whichMin(v);
00995           pos = index[mx];
00996         }
00997         if (pos != i)
00998         {
00999           node.swap(i, pos);
01000           nbSons[pos] = nbSons[i];
01001         }
01002         nbSons[i] = otd.size + 1;
01003       }
01004     }
01005   }
01006   return otd;
01007 }
01008 
01009 /******************************************************************************/
01010 
01011 void TreeTemplateTools::midRoot (TreeTemplate<Node>& tree, const string& criterion)
01012 {
01013   if(not (criterion == "variance" || "sum of squares"))
01014     throw Exception("TreeTemplateTools::reRoot Illegal criterion value '" + criterion + "'");
01015 
01016   if(tree.isRooted())
01017     tree.unroot();
01018   Node* ref_root = tree.getRootNode();
01019    /*
01020    * The bestRoot object records :
01021    * -- the current best branch : .first
01022    * -- the current best value of the criterion : .second["value"]
01023    * -- the best position of the root on the branch : .second["position"]
01024    *      0 is toward the original root, 1 is away from it
01025    */
01026   pair<Node*, map<string, double> > best_root_branch;
01027   best_root_branch.first = ref_root; // nota: the root does not correspond to a branch as it has no father
01028   best_root_branch.second ["position"] = -1;
01029   best_root_branch.second ["score"] = numeric_limits<double>::max();
01030 
01031   // find the best root
01032   TreeTemplateTools::getBestRootInSubtree(tree, criterion, ref_root, best_root_branch);
01033   tree.rootAt(ref_root); // back to the original root
01034 
01035   // reroot
01036   const double pos = best_root_branch.second["position"];
01037   if(pos<1e-6 or pos>1-1e-6)
01038     // The best root position is on a node (this is often the case with the sum of squares criterion)
01039     tree.rootAt(pos<1e-6 ? best_root_branch.first->getFather() : best_root_branch.first);
01040   else
01041     // The best root position is somewhere on a branch (a new Node is created)
01042     {
01043     Node* new_root = new Node();
01044     new_root->setId( TreeTools::getMPNUId(tree, tree.getRootId()) );
01045 
01046     double root_branch_length = best_root_branch.first->getDistanceToFather();
01047     Node* best_root_father = best_root_branch.first->getFather();
01048 
01049     best_root_father->removeSon(best_root_branch.first);
01050     best_root_father->addSon(new_root);
01051     new_root->addSon(best_root_branch.first);
01052 
01053     new_root->setDistanceToFather(max(pos * root_branch_length, 1e-6));
01054     best_root_branch.first->setDistanceToFather(max((1-pos) * root_branch_length, 1e-6));
01055 
01056     // The two branches leaving the root must have the same branch properties
01057     const vector<string> branch_properties = best_root_branch.first->getBranchPropertyNames();
01058     for(vector<string>::const_iterator p = branch_properties.begin(); p!=branch_properties.end(); ++p)
01059       new_root->setBranchProperty(*p, *best_root_branch.first->getBranchProperty(*p));
01060 
01061     tree.rootAt(new_root);
01062     }
01063 }
01064 
01065 /******************************************************************************/
01066 
01067 double TreeTemplateTools::getRadius (TreeTemplate<Node>& tree)
01068 {
01069   TreeTemplateTools::midRoot(tree, "sum of squares");
01070   Moments_ moments = getSubtreeMoments(tree.getRootNode());
01071   double radius = moments.sum / moments.numberOfLeaves;
01072   return radius;
01073 }
01074 
01075 /******************************************************************************/
01076 
01077 void TreeTemplateTools::getBestRootInSubtree (TreeTemplate<Node>& tree, const string& criterion, Node* node, pair<Node*, map<string, double> >& bestRoot)
01078 {
01079   const vector<Node*> sons = node->getSons(); // copy
01080   tree.rootAt(node);
01081 
01082   // Try to place the root on each branch downward node
01083   for(vector<Node*>::const_iterator son=sons.begin(); son!=sons.end(); ++son)
01084     {
01085     // Compute the moment of the subtree on son's side
01086     Moments_ son_moment = getSubtreeMoments(*son);
01087 
01088     // Compute the moment of the subtree on node's side
01089     tree.rootAt(*son);
01090     Moments_ node_moment = getSubtreeMoments(node);
01091     tree.rootAt(node);
01092 
01093     /*
01094      * Get the position of the root on this branch that
01095      * minimizes the root-to-leaves distances variance.
01096      *
01097      * This variance can be written in the form A x^2 + B x + C
01098      */
01099     double min_criterion_value;
01100     double best_position; // 0 is toward the root, 1 is away from it
01101 
01102     const TreeTemplateTools::Moments_& m1 = node_moment;
01103     const TreeTemplateTools::Moments_& m2 = son_moment;
01104     const double d = (**son).getDistanceToFather();
01105     const double n1 = m1.numberOfLeaves;
01106     const double n2 = m2.numberOfLeaves;
01107 
01108     double A=0, B=0, C=0;
01109     if(criterion == "sum of squares")
01110       {
01111       A = (n1 + n2) * d * d;
01112       B = 2 * d * (m1.sum - m2.sum) - 2 * n2 * d * d;
01113       C = m1.squaresSum + m2.squaresSum
01114           + 2 * m2.sum * d
01115           + n2 * d * d;
01116       }
01117     else if(criterion == "variance")
01118       {
01119       A = 4 * n1 * n2 * d*d;
01120       B = 4 * d * ( n2 * m1.sum - n1 * m2.sum - d * n1 * n2);
01121       C = (n1 + n2) * (m1.squaresSum + m2.squaresSum) + n1 * d * n2 * d
01122           + 2 * n1 * d * m2.sum - 2 * n2 * d * m1.sum
01123           - (m1.sum + m2.sum) * (m1.sum + m2.sum);
01124       }
01125 
01126     if(A < 1e-20)
01127       {
01128       min_criterion_value = numeric_limits<double>::max();
01129       best_position = 0.5;
01130       }
01131     else
01132       {
01133       min_criterion_value = C - B*B / (4*A);
01134       best_position = - B / (2*A);
01135       if(best_position < 0)
01136         {
01137         best_position = 0;
01138         min_criterion_value = C;
01139         }
01140       else if(best_position > 1)
01141         {
01142         best_position = 1;
01143         min_criterion_value = A+B+C;
01144         }
01145       }
01146 
01147     // Is this branch is the best seen, update 'bestRoot'
01148     if(min_criterion_value < bestRoot.second["score"])
01149       {
01150       bestRoot.first = *son;
01151       bestRoot.second["position"] = best_position;
01152       bestRoot.second["score"] = min_criterion_value;
01153       }
01154 
01155     // Recurse
01156     TreeTemplateTools::getBestRootInSubtree(tree, criterion, *son, bestRoot);
01157     }
01158 }
01159 
01160 /******************************************************************************/
01161 
01162 TreeTemplateTools::Moments_ TreeTemplateTools::getSubtreeMoments (const Node* node)
01163 {
01164   TreeTemplateTools::Moments_ moments = {0,0,0};
01165 
01166   if(node->isLeaf())
01167     {
01168     moments.numberOfLeaves = 1;
01169     }
01170   else
01171     {
01172     const size_t nsons = node->getNumberOfSons();
01173     for(size_t i=0; i<nsons; ++i)
01174       {
01175       const Node* son = node->getSon(i);
01176       const TreeTemplateTools::Moments_ son_moments = TreeTemplateTools::getSubtreeMoments(son);
01177       const double d = son->getDistanceToFather();
01178       moments.numberOfLeaves += son_moments.numberOfLeaves;
01179       moments.sum += son_moments.sum + d * son_moments.numberOfLeaves;
01180       moments.squaresSum += son_moments.squaresSum + 2 * d * son_moments.sum + son_moments.numberOfLeaves * d*d;
01181       }
01182     }
01183 
01184   return moments;
01185 }
01186 
01187 /******************************************************************************/
 All Classes Namespaces Files Functions Variables Typedefs Friends