|
bpp-phyl
2.1.0
|
00001 // 00002 // File: TreeTemplateTools.cpp 00003 // Created by: Julien Dutheil 00004 // Created on: Fri Oct 13 13:00 2006 00005 // From file TreeTools.cpp 00006 // Created on: Wed Aug 6 13:45:28 2003 00007 // 00008 00009 /* 00010 Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) 00011 00012 This software is a computer program whose purpose is to provide classes 00013 for phylogenetic data analysis. 00014 00015 This software is governed by the CeCILL license under French law and 00016 abiding by the rules of distribution of free software. You can use, 00017 modify and/ or redistribute the software under the terms of the CeCILL 00018 license as circulated by CEA, CNRS and INRIA at the following URL 00019 "http://www.cecill.info". 00020 00021 As a counterpart to the access to the source code and rights to copy, 00022 modify and redistribute granted by the license, users are provided only 00023 with a limited warranty and the software's author, the holder of the 00024 economic rights, and the successive licensors have only limited 00025 liability. 00026 00027 In this respect, the user's attention is drawn to the risks associated 00028 with loading, using, modifying and/or developing or reproducing the 00029 software by the user in light of its specific status of free software, 00030 that may mean that it is complicated to manipulate, and that also 00031 therefore means that it is reserved for developers and experienced 00032 professionals having in-depth computer knowledge. Users are therefore 00033 encouraged to load and test the software's suitability as regards their 00034 requirements in conditions enabling the security of their systems and/or 00035 data to be ensured and, more generally, to use and operate it in the 00036 same conditions as regards security. 00037 00038 The fact that you are presently reading this means that you have had 00039 knowledge of the CeCILL license and that you accept its terms. 00040 */ 00041 00042 #include "TreeTemplateTools.h" 00043 #include "TreeTemplate.h" 00044 00045 #include <Bpp/Numeric/Number.h> 00046 #include <Bpp/BppString.h> 00047 #include <Bpp/Text/StringTokenizer.h> 00048 #include <Bpp/Text/NestedStringTokenizer.h> 00049 #include <Bpp/Text/TextTools.h> 00050 #include <Bpp/Numeric/Random/RandomTools.h> 00051 00052 using namespace bpp; 00053 00054 // From the STL: 00055 #include <iostream> 00056 #include <sstream> 00057 #include <limits> 00058 00059 using namespace std; 00060 00061 /******************************************************************************/ 00062 00063 bool TreeTemplateTools::isMultifurcating(const Node& node) 00064 { 00065 if (node.getNumberOfSons() > 2) 00066 return true; 00067 else 00068 { 00069 bool b = false; 00070 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00071 { 00072 b = b || isMultifurcating(*node.getSon(i)); 00073 } 00074 return b; 00075 } 00076 } 00077 00078 /******************************************************************************/ 00079 00080 unsigned int TreeTemplateTools::getNumberOfLeaves(const Node& node) 00081 { 00082 unsigned int nbLeaves = 0; 00083 if (node.isLeaf()) 00084 { 00085 nbLeaves++; 00086 } 00087 for (int i = 0; i < static_cast<int>(node.getNumberOfSons()); i++) 00088 { 00089 nbLeaves += getNumberOfLeaves(*node[i]); 00090 } 00091 return nbLeaves; 00092 } 00093 00094 /******************************************************************************/ 00095 00096 unsigned int TreeTemplateTools::getNumberOfNodes(const Node& node) 00097 { 00098 unsigned int nbNodes = 1; 00099 for (int i = 0; i < static_cast<int>(node.getNumberOfSons()); i++) 00100 { 00101 nbNodes += getNumberOfNodes(*node[i]); 00102 } 00103 return nbNodes; 00104 } 00105 00106 /******************************************************************************/ 00107 00108 vector<string> TreeTemplateTools::getLeavesNames(const Node& node) 00109 { 00110 vector<string> names; 00111 if (node.isLeaf()) 00112 { 00113 names.push_back(node.getName()); 00114 } 00115 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00116 { 00117 vector<string> subNames = getLeavesNames(*node.getSon(i)); 00118 for (size_t j = 0; j < subNames.size(); j++) 00119 { 00120 names.push_back(subNames[j]); 00121 } 00122 } 00123 return names; 00124 } 00125 00126 /******************************************************************************/ 00127 00128 unsigned int TreeTemplateTools::getDepth(const Node& node) 00129 { 00130 unsigned int d = 0; 00131 for (int i = 0; i < static_cast<int>(node.getNumberOfSons()); i++) 00132 { 00133 unsigned int c = getDepth(*node[i]) + 1; 00134 if (c > d) 00135 d = c; 00136 } 00137 return d; 00138 } 00139 00140 /******************************************************************************/ 00141 00142 unsigned int TreeTemplateTools::getDepths(const Node& node, map<const Node*, unsigned int>& depths) 00143 { 00144 unsigned int d = 0; 00145 for (int i = 0; i < static_cast<int>(node.getNumberOfSons()); i++) 00146 { 00147 unsigned int c = getDepths(*node[i], depths) + 1; 00148 if (c > d) 00149 d = c; 00150 } 00151 depths[&node] = d; 00152 return d; 00153 } 00154 00155 /******************************************************************************/ 00156 00157 double TreeTemplateTools::getHeight(const Node& node) 00158 { 00159 double d = 0; 00160 for (int i = 0; i < static_cast<int>(node.getNumberOfSons()); i++) 00161 { 00162 const Node* son = node[i]; 00163 double dist = son->getDistanceToFather(); 00164 double c = getHeight(*son) + dist; 00165 if (c > d) 00166 d = c; 00167 } 00168 return d; 00169 } 00170 00171 /******************************************************************************/ 00172 00173 double TreeTemplateTools::getHeights(const Node& node, map<const Node*, double>& heights) 00174 { 00175 double d = 0; 00176 for (int i = 0; i < static_cast<int>(node.getNumberOfSons()); i++) 00177 { 00178 const Node* son = node[i]; 00179 double dist = son->getDistanceToFather(); 00180 double c = getHeights(*son, heights) + dist; 00181 if (c > d) 00182 d = c; 00183 } 00184 heights[&node] = d; 00185 return d; 00186 } 00187 00188 /******************************************************************************/ 00189 00190 TreeTemplateTools::Element TreeTemplateTools::getElement(const string& elt) throw (IOException) 00191 { 00192 Element element; 00193 element.length = ""; // default 00194 element.bootstrap = ""; // default 00195 element.isLeaf = false; // default 00196 00197 size_t colonIndex; 00198 bool hasColon = false; 00199 for (colonIndex = elt.size(); colonIndex > 0 && elt[colonIndex] != ')'; colonIndex--) 00200 { 00201 if (elt[colonIndex] == ':') 00202 { 00203 hasColon = true; 00204 break; 00205 } 00206 } 00207 try 00208 { 00209 string elt2; 00210 if (hasColon) 00211 { 00212 // this is an element with length: 00213 elt2 = elt.substr(0, colonIndex); 00214 element.length = TextTools::removeSurroundingWhiteSpaces(elt.substr(colonIndex + 1)); 00215 } 00216 else 00217 { 00218 // this is an element without length; 00219 elt2 = elt; 00220 } 00221 00222 string::size_type lastP = elt2.rfind(')'); 00223 string::size_type firstP = elt2.find('('); 00224 if (firstP == string::npos) 00225 { 00226 // This is a leaf: 00227 element.content = elt2; 00228 element.isLeaf = true; 00229 } 00230 else 00231 { 00232 // This is a node: 00233 if (lastP < firstP) 00234 throw IOException("TreeTemplateTools::getElement(). Invalid format: bad closing parenthesis in " + elt2); 00235 element.content = TextTools::removeSurroundingWhiteSpaces(elt2.substr(firstP + 1, lastP - firstP - 1)); 00236 string bootstrap = TextTools::removeSurroundingWhiteSpaces(elt2.substr(lastP + 1)); 00237 // cout << "ELEMENT: BOOTSTRAP: " << bootstrap << endl; 00238 if (!TextTools::isEmpty(bootstrap)) 00239 { 00240 element.bootstrap = bootstrap; 00241 } 00242 } 00243 } 00244 catch (exception e) 00245 { 00246 throw IOException("Bad tree description: " + elt); 00247 } 00248 return element; 00249 } 00250 00251 /******************************************************************************/ 00252 00253 00254 Node* TreeTemplateTools::parenthesisToNode(const string& description, bool bootstrap, const string& propertyName, bool withId) 00255 { 00256 //cout << "NODE: " << description << endl; 00257 Element elt = getElement(description); 00258 00259 // New node: 00260 Node* node = new Node(); 00261 if (!TextTools::isEmpty(elt.length)) 00262 { 00263 node->setDistanceToFather(TextTools::toDouble(elt.length)); 00264 // cout << "NODE: LENGTH: " << * elt.length << endl; 00265 } 00266 if (!TextTools::isEmpty(elt.bootstrap)) 00267 { 00268 if (withId) 00269 { 00270 node->setId(TextTools::toInt(elt.bootstrap)); 00271 } 00272 else 00273 { 00274 if (bootstrap) 00275 { 00276 node->setBranchProperty(TreeTools::BOOTSTRAP, Number<double>(TextTools::toDouble(elt.bootstrap))); 00277 // cout << "NODE: BOOTSTRAP: " << * elt.bootstrap << endl; 00278 } 00279 else 00280 { 00281 node->setBranchProperty(propertyName, BppString(elt.bootstrap)); 00282 } 00283 } 00284 } 00285 00286 NestedStringTokenizer nt(elt.content, "(", ")", ","); 00287 vector<string> elements; 00288 while (nt.hasMoreToken()) 00289 { 00290 elements.push_back(nt.nextToken()); 00291 } 00292 00293 if (elt.isLeaf) 00294 { 00295 //This is a leaf: 00296 string name = TextTools::removeSurroundingWhiteSpaces(elements[0]); 00297 if (withId) 00298 { 00299 StringTokenizer st(name, "_", true, true); 00300 ostringstream realName; 00301 for (int i = 0; i < static_cast<int>(st.numberOfRemainingTokens()) - 1; i++) 00302 { 00303 if (i != 0) 00304 { 00305 realName << "_"; 00306 } 00307 realName << st.getToken(i); 00308 } 00309 node->setName(realName.str()); 00310 node->setId(TextTools::toInt(st.getToken(st.numberOfRemainingTokens() - 1))); 00311 } 00312 else 00313 { 00314 node->setName(name); 00315 } 00316 } 00317 else 00318 { 00319 // This is a node: 00320 for (size_t i = 0; i < elements.size(); i++) 00321 { 00322 // cout << "NODE: SUBNODE: " << i << ", " << elements[i] << endl; 00323 Node* son = parenthesisToNode(elements[i], bootstrap, propertyName, withId); 00324 node->addSon(son); 00325 } 00326 } 00327 return node; 00328 } 00329 00330 /******************************************************************************/ 00331 00332 TreeTemplate<Node>* TreeTemplateTools::parenthesisToTree(const string& description, bool bootstrap, const string& propertyName, bool withId) throw (Exception) 00333 { 00334 string::size_type semi = description.rfind(';'); 00335 if (semi == string::npos) 00336 throw Exception("TreeTemplateTools::parenthesisToTree(). Bad format: no semi-colon found."); 00337 string content = description.substr(0, semi); 00338 Node* node = parenthesisToNode(content, bootstrap, propertyName, withId); 00339 TreeTemplate<Node>* tree = new TreeTemplate<Node>(); 00340 tree->setRootNode(node); 00341 if (!withId) 00342 { 00343 tree->resetNodesId(); 00344 } 00345 return tree; 00346 } 00347 00348 /******************************************************************************/ 00349 00350 string TreeTemplateTools::nodeToParenthesis(const Node& node, bool writeId) 00351 { 00352 ostringstream s; 00353 if (node.isLeaf()) 00354 { 00355 s << node.getName(); 00356 } 00357 else 00358 { 00359 s << "("; 00360 s << nodeToParenthesis(*node[0], writeId); 00361 for (int i = 1; i < static_cast<int>(node.getNumberOfSons()); i++) 00362 { 00363 s << "," << nodeToParenthesis(*node[i], writeId); 00364 } 00365 s << ")"; 00366 } 00367 if (writeId) 00368 { 00369 if (node.isLeaf()) 00370 s << "_"; 00371 s << node.getId(); 00372 } 00373 else 00374 { 00375 if (node.hasBranchProperty(TreeTools::BOOTSTRAP)) 00376 s << (dynamic_cast<const Number<double>*>(node.getBranchProperty(TreeTools::BOOTSTRAP))->getValue()); 00377 } 00378 if (node.hasDistanceToFather()) 00379 s << ":" << node.getDistanceToFather(); 00380 return s.str(); 00381 } 00382 00383 /******************************************************************************/ 00384 00385 string TreeTemplateTools::nodeToParenthesis(const Node& node, bool bootstrap, const string& propertyName) 00386 { 00387 ostringstream s; 00388 if (node.isLeaf()) 00389 { 00390 s << node.getName(); 00391 } 00392 else 00393 { 00394 s << "("; 00395 s << nodeToParenthesis(*node[0], bootstrap, propertyName); 00396 for (int i = 1; i < static_cast<int>(node.getNumberOfSons()); i++) 00397 { 00398 s << "," << nodeToParenthesis(*node[i], bootstrap, propertyName); 00399 } 00400 s << ")"; 00401 00402 if (bootstrap) 00403 { 00404 if (node.hasBranchProperty(TreeTools::BOOTSTRAP)) 00405 s << (dynamic_cast<const Number<double>*>(node.getBranchProperty(TreeTools::BOOTSTRAP))->getValue()); 00406 } 00407 else 00408 { 00409 if (node.hasBranchProperty(propertyName)) 00410 { 00411 const BppString* ppt = dynamic_cast<const BppString*>(node.getBranchProperty(propertyName)); 00412 if (ppt) 00413 s << *ppt; 00414 else 00415 throw Exception("TreeTemplateTools::nodeToParenthesis. Property should be a BppString."); 00416 } 00417 } 00418 } 00419 if (node.hasDistanceToFather()) 00420 s << ":" << node.getDistanceToFather(); 00421 return s.str(); 00422 } 00423 00424 /******************************************************************************/ 00425 00426 string TreeTemplateTools::treeToParenthesis(const TreeTemplate<Node>& tree, bool writeId) 00427 { 00428 ostringstream s; 00429 s << "("; 00430 const Node* node = tree.getRootNode(); 00431 if (node->isLeaf() && node->hasName()) //In case we have a tree like ((A:1.0)); where the root node is an unamed leaf! 00432 { 00433 s << node->getName(); 00434 for (size_t i = 0; i < node->getNumberOfSons(); ++i) 00435 { 00436 s << "," << nodeToParenthesis(*node->getSon(i), writeId); 00437 } 00438 } 00439 else 00440 { 00441 s << nodeToParenthesis(*node->getSon(0), writeId); 00442 for (size_t i = 1; i < node->getNumberOfSons(); ++i) 00443 { 00444 s << "," << nodeToParenthesis(*node->getSon(i), writeId); 00445 } 00446 } 00447 s << ")"; 00448 if (node->hasDistanceToFather()) 00449 s << ":" << node->getDistanceToFather(); 00450 s << ";" << endl; 00451 return s.str(); 00452 } 00453 00454 /******************************************************************************/ 00455 00456 string TreeTemplateTools::treeToParenthesis(const TreeTemplate<Node>& tree, bool bootstrap, const string& propertyName) 00457 { 00458 ostringstream s; 00459 s << "("; 00460 const Node* node = tree.getRootNode(); 00461 if (node->isLeaf()) 00462 { 00463 s << node->getName(); 00464 for (size_t i = 0; i < node->getNumberOfSons(); i++) 00465 { 00466 s << "," << nodeToParenthesis(*node->getSon(i), bootstrap, propertyName); 00467 } 00468 } 00469 else 00470 { 00471 s << nodeToParenthesis(*node->getSon(0), bootstrap, propertyName); 00472 for (size_t i = 1; i < node->getNumberOfSons(); i++) 00473 { 00474 s << "," << nodeToParenthesis(*node->getSon(i), bootstrap, propertyName); 00475 } 00476 } 00477 s << ")"; 00478 if (bootstrap) 00479 { 00480 if (node->hasBranchProperty(TreeTools::BOOTSTRAP)) 00481 s << (dynamic_cast<const Number<double>*>(node->getBranchProperty(TreeTools::BOOTSTRAP))->getValue()); 00482 } 00483 else 00484 { 00485 if (node->hasBranchProperty(propertyName)) { 00486 const BppString* ppt =dynamic_cast<const BppString*>(node->getBranchProperty(propertyName)); 00487 if (ppt) 00488 s << *ppt; 00489 else 00490 throw Exception("TreeTemplateTools::nodeToParenthesis. Property should be a BppString."); 00491 } 00492 } 00493 s << ";" << endl; 00494 return s.str(); 00495 } 00496 00497 /******************************************************************************/ 00498 00499 Vdouble TreeTemplateTools::getBranchLengths(const Node& node) throw (NodePException) 00500 { 00501 Vdouble brLen(1); 00502 brLen[0] = node.getDistanceToFather(); 00503 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00504 { 00505 Vdouble sonBrLen = getBranchLengths(*node.getSon(i)); 00506 for (size_t j = 0; j < sonBrLen.size(); j++) 00507 { 00508 brLen.push_back(sonBrLen[j]); 00509 } 00510 } 00511 return brLen; 00512 } 00513 00514 /******************************************************************************/ 00515 00516 double TreeTemplateTools::getTotalLength(const Node& node, bool includeAncestor) throw (NodePException) 00517 { 00518 if (includeAncestor && !node.hasDistanceToFather()) 00519 throw NodePException("TreeTools::getTotalLength(). No branch length.", &node); 00520 double length = includeAncestor ? node.getDistanceToFather() : 0; 00521 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00522 { 00523 length += getTotalLength(*node.getSon(i), true); 00524 } 00525 return length; 00526 } 00527 00528 /******************************************************************************/ 00529 00530 void TreeTemplateTools::setBranchLengths(Node& node, double brLen) 00531 { 00532 node.setDistanceToFather(brLen); 00533 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00534 { 00535 setBranchLengths(*node.getSon(i), brLen); 00536 } 00537 } 00538 00539 /******************************************************************************/ 00540 00541 void TreeTemplateTools::deleteBranchLengths(Node& node) 00542 { 00543 node.deleteDistanceToFather(); 00544 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00545 { 00546 deleteBranchLengths(*node.getSon(i)); 00547 } 00548 } 00549 00550 /******************************************************************************/ 00551 00552 void TreeTemplateTools::setVoidBranchLengths(Node& node, double brLen) 00553 { 00554 if (!node.hasDistanceToFather()) 00555 node.setDistanceToFather(brLen); 00556 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00557 { 00558 setVoidBranchLengths(*node.getSon(i), brLen); 00559 } 00560 } 00561 00562 /******************************************************************************/ 00563 00564 void TreeTemplateTools::scaleTree(Node& node, double factor) throw (NodePException) 00565 { 00566 if (node.hasFather()) 00567 { 00568 node.setDistanceToFather(node.getDistanceToFather() * factor); 00569 } 00570 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00571 { 00572 scaleTree(*node.getSon(i), factor); 00573 } 00574 } 00575 00576 /******************************************************************************/ 00577 00578 TreeTemplate<Node>* TreeTemplateTools::getRandomTree(vector<string>& leavesNames, bool rooted) 00579 { 00580 if (leavesNames.size() == 0) 00581 return 0; // No taxa. 00582 // This vector will contain all nodes. 00583 // Start with all leaves, and then group nodes randomly 2 by 2. 00584 // Att the end, contains only the root node of the tree. 00585 vector<Node*> nodes(leavesNames.size()); 00586 // Create all leaves nodes: 00587 for (size_t i = 0; i < leavesNames.size(); ++i) 00588 { 00589 nodes[i] = new Node(leavesNames[i]); 00590 } 00591 // Now group all nodes: 00592 while (nodes.size() > (rooted ? 2 : 3)) 00593 { 00594 // Select random nodes: 00595 int pos1 = RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(nodes.size())); 00596 Node* node1 = nodes[pos1]; 00597 nodes.erase(nodes.begin() + pos1); 00598 int pos2 = RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(nodes.size())); 00599 Node* node2 = nodes[pos2]; 00600 nodes.erase(nodes.begin() + pos2); 00601 // Add new node: 00602 Node* parent = new Node(); 00603 parent->addSon(node1); 00604 parent->addSon(node2); 00605 nodes.push_back(parent); 00606 } 00607 // Return tree with last node as root node: 00608 Node* root = new Node(); 00609 for (size_t i = 0; i < nodes.size(); ++i) 00610 { 00611 root->addSon(nodes[i]); 00612 } 00613 TreeTemplate<Node>* tree = new TreeTemplate<Node>(root); 00614 tree->resetNodesId(); 00615 return tree; 00616 } 00617 00618 /******************************************************************************/ 00619 00620 vector<Node*> TreeTemplateTools::getPathBetweenAnyTwoNodes(Node& node1, Node& node2, bool includeAncestor) 00621 { 00622 vector<Node*> path; 00623 vector<Node*> pathMatrix1; 00624 vector<Node*> pathMatrix2; 00625 00626 Node* nodeUp = &node1; 00627 while (nodeUp->hasFather()) // while(nodeUp != root) 00628 { 00629 pathMatrix1.push_back(nodeUp); 00630 nodeUp = nodeUp->getFather(); 00631 } 00632 pathMatrix1.push_back(nodeUp); // The root. 00633 00634 nodeUp = &node2; 00635 while (nodeUp->hasFather()) 00636 { 00637 pathMatrix2.push_back(nodeUp); 00638 nodeUp = nodeUp->getFather(); 00639 } 00640 pathMatrix2.push_back(nodeUp); // The root. 00641 // Must check that the two nodes have the same root!!! 00642 00643 size_t tmp1 = pathMatrix1.size() - 1; 00644 size_t tmp2 = pathMatrix2.size() - 1; 00645 00646 while (pathMatrix1[tmp1] == pathMatrix2[tmp2] 00647 and tmp1 > 0 00648 and tmp2 > 0) 00649 { 00650 tmp1--; tmp2--; 00651 } 00652 00653 for (size_t y = 0; y < tmp1; ++y) 00654 { 00655 path.push_back(pathMatrix1[y]); 00656 } 00657 if (includeAncestor) 00658 path.push_back(pathMatrix1[tmp1]); // pushing once, the Node that was common to both. 00659 for (size_t j = tmp2; j > 0; --j) 00660 { 00661 path.push_back(pathMatrix2[j - 1]); 00662 } 00663 return path; 00664 } 00665 00666 /******************************************************************************/ 00667 00668 vector<const Node*> TreeTemplateTools::getPathBetweenAnyTwoNodes(const Node& node1, const Node& node2, bool includeAncestor) 00669 { 00670 vector<const Node*> path; 00671 vector<const Node*> pathMatrix1; 00672 vector<const Node*> pathMatrix2; 00673 00674 const Node* nodeUp = &node1; 00675 while (nodeUp->hasFather()) // while(nodeUp != root) 00676 { 00677 pathMatrix1.push_back(nodeUp); 00678 nodeUp = nodeUp->getFather(); 00679 } 00680 pathMatrix1.push_back(nodeUp); // The root. 00681 00682 nodeUp = &node2; 00683 while (nodeUp->hasFather()) 00684 { 00685 pathMatrix2.push_back(nodeUp); 00686 nodeUp = nodeUp->getFather(); 00687 } 00688 pathMatrix2.push_back(nodeUp); // The root. 00689 // Must check that the two nodes have the same root!!! 00690 00691 size_t tmp1 = pathMatrix1.size() - 1; 00692 size_t tmp2 = pathMatrix2.size() - 1; 00693 00694 while (pathMatrix1[tmp1] == pathMatrix2[tmp2] 00695 and tmp1 > 0 00696 and tmp2 > 0) 00697 { 00698 tmp1--; tmp2--; 00699 } 00700 00701 for (size_t y = 0; y < tmp1; ++y) 00702 { 00703 path.push_back(pathMatrix1[y]); 00704 } 00705 if (includeAncestor) 00706 path.push_back(pathMatrix1[tmp1]); // pushing once, the Node that was common to both. 00707 for (size_t j = tmp2; j > 0; --j) 00708 { 00709 path.push_back(pathMatrix2[j - 1]); 00710 } 00711 return path; 00712 } 00713 00714 /******************************************************************************/ 00715 00716 double TreeTemplateTools::getDistanceBetweenAnyTwoNodes(const Node& node1, const Node& node2) 00717 { 00718 vector<const Node*> path = getPathBetweenAnyTwoNodes(node1, node2, false); 00719 double d = 0; 00720 for (size_t i = 0; i < path.size(); i++) 00721 { 00722 d += path[i]->getDistanceToFather(); 00723 } 00724 return d; 00725 } 00726 00727 /******************************************************************************/ 00728 00729 void TreeTemplateTools::processDistsInSubtree_(const Node* node, DistanceMatrix& matrix, vector< std::pair<string, double> >& distsToNodeFather) 00730 { 00731 distsToNodeFather.clear(); 00732 00733 // node-is-leaf case 00734 if (node->getNumberOfSons() == 0) 00735 { 00736 distsToNodeFather.push_back(make_pair(node->getName(), node->getDistanceToFather())); 00737 return; 00738 } 00739 00740 // For all leaves in node's subtree, get leaf-to-node distances. 00741 // Leaves are classified upon node's sons. 00742 map<const Node*, vector< pair<string, double> > > leavesDists; 00743 for (size_t i = 0; i < node->getNumberOfSons(); ++i) 00744 { 00745 const Node* son = node->getSon(i); 00746 processDistsInSubtree_(son, matrix, leavesDists[son]); // recursivity 00747 } 00748 // Write leaf-leaf distances to the distance matrix. 00749 // Only pairs in which the two leaves belong to different 00750 // sons are considered. 00751 for (size_t son1_loc = 0; son1_loc < node->getNumberOfSons(); ++son1_loc) 00752 { 00753 for (size_t son2_loc = 0; son2_loc < son1_loc; ++son2_loc) 00754 { 00755 const Node* son1 = node->getSon(son1_loc); 00756 const Node* son2 = node->getSon(son2_loc); 00757 00758 for (vector< pair<string, double> >::iterator son1_leaf = leavesDists[son1].begin(); 00759 son1_leaf != leavesDists[son1].end(); 00760 ++son1_leaf) 00761 { 00762 for (vector< pair<string, double> >::iterator son2_leaf = leavesDists[son2].begin(); 00763 son2_leaf != leavesDists[son2].end(); 00764 ++son2_leaf) 00765 { 00766 matrix(son1_leaf->first, son2_leaf->first) = 00767 matrix(son2_leaf->first, son1_leaf->first) = 00768 ( son1_leaf->second + son2_leaf->second ); 00769 } 00770 } 00771 } 00772 } 00773 00774 // node-is-root case 00775 if (!node->hasFather()) 00776 { 00777 // node-is-root-and-leaf case 00778 if (node->isLeaf() ) 00779 { 00780 string root_name = node->getName(); 00781 for (vector< pair<string, double> >::iterator other_leaf = leavesDists[node->getSon(0)].begin(); 00782 other_leaf != leavesDists[node->getSon(0)].end(); 00783 ++other_leaf) 00784 { 00785 matrix(root_name, other_leaf->first) = matrix( other_leaf->first, root_name) = other_leaf->second; 00786 } 00787 } 00788 00789 return; 00790 } 00791 00792 // Get distances from node's father to considered leaves 00793 distsToNodeFather.clear(); 00794 double nodeToFather = node->getDistanceToFather(); 00795 for (map<const Node*, vector<pair<string, double> > >::iterator son = leavesDists.begin(); son != leavesDists.end(); ++son) 00796 { 00797 for (vector< pair<string, double> >::iterator leaf = (son->second).begin(); leaf != (son->second).end(); ++leaf) 00798 { 00799 distsToNodeFather.push_back(make_pair(leaf->first, (leaf->second + nodeToFather))); 00800 } 00801 } 00802 } 00803 00804 DistanceMatrix* TreeTemplateTools::getDistanceMatrix(const TreeTemplate<Node>& tree) 00805 { 00806 DistanceMatrix* matrix = new DistanceMatrix(tree.getLeavesNames()); 00807 vector< pair<string, double> > distsToRoot; 00808 processDistsInSubtree_(tree.getRootNode(), *matrix, distsToRoot); 00809 return matrix; 00810 } 00811 00812 /******************************************************************************/ 00813 00814 std::vector<const Node*> TreeTemplateTools::getRemainingNeighbors(const Node* node1, const Node* node2, const Node* node3) 00815 { 00816 vector<const Node*> neighbors = node1->getNeighbors(); 00817 vector<const Node*> neighbors2; 00818 for (size_t k = 0; k < neighbors.size(); k++) 00819 { 00820 const Node* n = neighbors[k]; 00821 if (n != node2 && n != node3) 00822 neighbors2.push_back(n); 00823 } 00824 return neighbors2; 00825 } 00826 00827 /******************************************************************************/ 00828 00829 void TreeTemplateTools::incrementAllIds(Node* node, int increment) 00830 { 00831 node->setId(node->getId() + increment); 00832 for (size_t i = 0; i < node->getNumberOfSons(); i++) 00833 { 00834 incrementAllIds(node->getSon(i), increment); 00835 } 00836 } 00837 00838 /******************************************************************************/ 00839 00840 void TreeTemplateTools::getNodePropertyNames(const Node& node, vector<string>& propertyNames) 00841 { 00842 VectorTools::extend(propertyNames, node.getNodePropertyNames()); 00843 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00844 { 00845 getNodePropertyNames(*node.getSon(i), propertyNames); 00846 } 00847 } 00848 00849 void TreeTemplateTools::getNodeProperties(const Node& node, const string& propertyName, map<int, const Clonable*>& properties) 00850 { 00851 if (node.hasNodeProperty(propertyName)) 00852 properties[node.getId()] = node.getNodeProperty(propertyName); 00853 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00854 { 00855 getNodeProperties(*node.getSon(i), propertyName, properties); 00856 } 00857 } 00858 00859 void TreeTemplateTools::getNodeProperties(Node& node, const string& propertyName, map<int, Clonable*>& properties) 00860 { 00861 if (node.hasNodeProperty(propertyName)) 00862 properties[node.getId()] = node.getNodeProperty(propertyName); 00863 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00864 { 00865 getNodeProperties(*node.getSon(i), propertyName, properties); 00866 } 00867 } 00868 00869 /******************************************************************************/ 00870 00871 void TreeTemplateTools::getBranchPropertyNames(const Node& node, vector<string>& propertyNames) 00872 { 00873 VectorTools::extend(propertyNames, node.getBranchPropertyNames()); 00874 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00875 { 00876 getBranchPropertyNames(*node.getSon(i), propertyNames); 00877 } 00878 } 00879 00880 void TreeTemplateTools::getBranchProperties(const Node& node, const string& propertyName, map<int, const Clonable*>& properties) 00881 { 00882 if (node.hasBranchProperty(propertyName)) 00883 properties[node.getId()] = node.getBranchProperty(propertyName); 00884 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00885 { 00886 getBranchProperties(*node.getSon(i), propertyName, properties); 00887 } 00888 } 00889 00890 void TreeTemplateTools::getBranchProperties(Node& node, const string& propertyName, map<int, Clonable*>& properties) 00891 { 00892 if (node.hasBranchProperty(propertyName)) 00893 properties[node.getId()] = node.getBranchProperty(propertyName); 00894 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00895 { 00896 getBranchProperties(*node.getSon(i), propertyName, properties); 00897 } 00898 } 00899 00900 /******************************************************************************/ 00901 00902 bool TreeTemplateTools::haveSameOrderedTopology(const Node& n1, const Node& n2) 00903 { 00904 if (n1.isLeaf() && n2.isLeaf() && n1.getName() != n2.getName()) 00905 return false; 00906 size_t nl1 = n1.getNumberOfSons(); 00907 size_t nl2 = n2.getNumberOfSons(); 00908 if (nl1 != nl2) 00909 return false; 00910 00911 bool test = true; 00912 for (size_t i = 0; test && i < n1.getNumberOfSons(); ++i) 00913 { 00914 test &= haveSameOrderedTopology(*n1.getSon(i), *n2.getSon(i)); 00915 } 00916 return test; 00917 } 00918 00919 /******************************************************************************/ 00920 00921 TreeTemplateTools::OrderTreeData_ TreeTemplateTools::orderTree_(Node& node, bool downward, bool orderLeaves) 00922 { 00923 OrderTreeData_ otd; 00924 00925 if (node.isLeaf() && node.hasFather()) 00926 { 00927 otd.size = 1; 00928 otd.firstLeaf = node.getName(); 00929 } 00930 else 00931 { 00932 vector<size_t> nbSons; 00933 vector<string> firstLeaves; 00934 for (size_t i = 0; i < node.getNumberOfSons(); i++) 00935 { 00936 OrderTreeData_ otdsub = orderTree_(*node.getSon(i), downward, orderLeaves); 00937 if (i == 0) 00938 otd.firstLeaf = otdsub.firstLeaf; 00939 else if (orderLeaves && otdsub.firstLeaf < otd.firstLeaf) 00940 otd.firstLeaf = otdsub.firstLeaf; 00941 nbSons.push_back(otdsub.size); 00942 firstLeaves.push_back(otdsub.firstLeaf); 00943 } 00944 otd.size = VectorTools::sum(nbSons); 00945 00946 // Now swap nodes: 00947 if (downward) 00948 { 00949 for (size_t i = 0; i < nbSons.size() - 1; ++i) 00950 { 00951 size_t pos; 00952 vector<size_t> index = VectorTools::whichMaxAll(nbSons); 00953 if (index.size() == 1 || !orderLeaves) 00954 { 00955 pos = index[0]; 00956 } 00957 else 00958 { 00959 // There are ties to solve: 00960 vector<string> v; 00961 for (size_t j = 0; j < index.size(); ++j) 00962 { 00963 v.push_back(firstLeaves[index[j]]); 00964 } 00965 size_t mx = VectorTools::whichMax(v); 00966 pos = index[mx]; 00967 } 00968 if (pos != i) 00969 { 00970 node.swap(i, pos); 00971 nbSons[pos] = nbSons[i]; 00972 } 00973 nbSons[i] = 0; 00974 } 00975 } 00976 else 00977 { 00978 for (size_t i = 0; i < nbSons.size() - 1; ++i) 00979 { 00980 size_t pos; 00981 vector<size_t> index = VectorTools::whichMinAll(nbSons); 00982 if (index.size() == 1 || !orderLeaves) 00983 { 00984 pos = index[0]; 00985 } 00986 else 00987 { 00988 // There are ties to solve: 00989 vector<string> v; 00990 for (size_t j = 0; j < index.size(); ++j) 00991 { 00992 v.push_back(firstLeaves[index[j]]); 00993 } 00994 size_t mx = VectorTools::whichMin(v); 00995 pos = index[mx]; 00996 } 00997 if (pos != i) 00998 { 00999 node.swap(i, pos); 01000 nbSons[pos] = nbSons[i]; 01001 } 01002 nbSons[i] = otd.size + 1; 01003 } 01004 } 01005 } 01006 return otd; 01007 } 01008 01009 /******************************************************************************/ 01010 01011 void TreeTemplateTools::midRoot (TreeTemplate<Node>& tree, const string& criterion) 01012 { 01013 if(not (criterion == "variance" || "sum of squares")) 01014 throw Exception("TreeTemplateTools::reRoot Illegal criterion value '" + criterion + "'"); 01015 01016 if(tree.isRooted()) 01017 tree.unroot(); 01018 Node* ref_root = tree.getRootNode(); 01019 /* 01020 * The bestRoot object records : 01021 * -- the current best branch : .first 01022 * -- the current best value of the criterion : .second["value"] 01023 * -- the best position of the root on the branch : .second["position"] 01024 * 0 is toward the original root, 1 is away from it 01025 */ 01026 pair<Node*, map<string, double> > best_root_branch; 01027 best_root_branch.first = ref_root; // nota: the root does not correspond to a branch as it has no father 01028 best_root_branch.second ["position"] = -1; 01029 best_root_branch.second ["score"] = numeric_limits<double>::max(); 01030 01031 // find the best root 01032 TreeTemplateTools::getBestRootInSubtree(tree, criterion, ref_root, best_root_branch); 01033 tree.rootAt(ref_root); // back to the original root 01034 01035 // reroot 01036 const double pos = best_root_branch.second["position"]; 01037 if(pos<1e-6 or pos>1-1e-6) 01038 // The best root position is on a node (this is often the case with the sum of squares criterion) 01039 tree.rootAt(pos<1e-6 ? best_root_branch.first->getFather() : best_root_branch.first); 01040 else 01041 // The best root position is somewhere on a branch (a new Node is created) 01042 { 01043 Node* new_root = new Node(); 01044 new_root->setId( TreeTools::getMPNUId(tree, tree.getRootId()) ); 01045 01046 double root_branch_length = best_root_branch.first->getDistanceToFather(); 01047 Node* best_root_father = best_root_branch.first->getFather(); 01048 01049 best_root_father->removeSon(best_root_branch.first); 01050 best_root_father->addSon(new_root); 01051 new_root->addSon(best_root_branch.first); 01052 01053 new_root->setDistanceToFather(max(pos * root_branch_length, 1e-6)); 01054 best_root_branch.first->setDistanceToFather(max((1-pos) * root_branch_length, 1e-6)); 01055 01056 // The two branches leaving the root must have the same branch properties 01057 const vector<string> branch_properties = best_root_branch.first->getBranchPropertyNames(); 01058 for(vector<string>::const_iterator p = branch_properties.begin(); p!=branch_properties.end(); ++p) 01059 new_root->setBranchProperty(*p, *best_root_branch.first->getBranchProperty(*p)); 01060 01061 tree.rootAt(new_root); 01062 } 01063 } 01064 01065 /******************************************************************************/ 01066 01067 double TreeTemplateTools::getRadius (TreeTemplate<Node>& tree) 01068 { 01069 TreeTemplateTools::midRoot(tree, "sum of squares"); 01070 Moments_ moments = getSubtreeMoments(tree.getRootNode()); 01071 double radius = moments.sum / moments.numberOfLeaves; 01072 return radius; 01073 } 01074 01075 /******************************************************************************/ 01076 01077 void TreeTemplateTools::getBestRootInSubtree (TreeTemplate<Node>& tree, const string& criterion, Node* node, pair<Node*, map<string, double> >& bestRoot) 01078 { 01079 const vector<Node*> sons = node->getSons(); // copy 01080 tree.rootAt(node); 01081 01082 // Try to place the root on each branch downward node 01083 for(vector<Node*>::const_iterator son=sons.begin(); son!=sons.end(); ++son) 01084 { 01085 // Compute the moment of the subtree on son's side 01086 Moments_ son_moment = getSubtreeMoments(*son); 01087 01088 // Compute the moment of the subtree on node's side 01089 tree.rootAt(*son); 01090 Moments_ node_moment = getSubtreeMoments(node); 01091 tree.rootAt(node); 01092 01093 /* 01094 * Get the position of the root on this branch that 01095 * minimizes the root-to-leaves distances variance. 01096 * 01097 * This variance can be written in the form A x^2 + B x + C 01098 */ 01099 double min_criterion_value; 01100 double best_position; // 0 is toward the root, 1 is away from it 01101 01102 const TreeTemplateTools::Moments_& m1 = node_moment; 01103 const TreeTemplateTools::Moments_& m2 = son_moment; 01104 const double d = (**son).getDistanceToFather(); 01105 const double n1 = m1.numberOfLeaves; 01106 const double n2 = m2.numberOfLeaves; 01107 01108 double A=0, B=0, C=0; 01109 if(criterion == "sum of squares") 01110 { 01111 A = (n1 + n2) * d * d; 01112 B = 2 * d * (m1.sum - m2.sum) - 2 * n2 * d * d; 01113 C = m1.squaresSum + m2.squaresSum 01114 + 2 * m2.sum * d 01115 + n2 * d * d; 01116 } 01117 else if(criterion == "variance") 01118 { 01119 A = 4 * n1 * n2 * d*d; 01120 B = 4 * d * ( n2 * m1.sum - n1 * m2.sum - d * n1 * n2); 01121 C = (n1 + n2) * (m1.squaresSum + m2.squaresSum) + n1 * d * n2 * d 01122 + 2 * n1 * d * m2.sum - 2 * n2 * d * m1.sum 01123 - (m1.sum + m2.sum) * (m1.sum + m2.sum); 01124 } 01125 01126 if(A < 1e-20) 01127 { 01128 min_criterion_value = numeric_limits<double>::max(); 01129 best_position = 0.5; 01130 } 01131 else 01132 { 01133 min_criterion_value = C - B*B / (4*A); 01134 best_position = - B / (2*A); 01135 if(best_position < 0) 01136 { 01137 best_position = 0; 01138 min_criterion_value = C; 01139 } 01140 else if(best_position > 1) 01141 { 01142 best_position = 1; 01143 min_criterion_value = A+B+C; 01144 } 01145 } 01146 01147 // Is this branch is the best seen, update 'bestRoot' 01148 if(min_criterion_value < bestRoot.second["score"]) 01149 { 01150 bestRoot.first = *son; 01151 bestRoot.second["position"] = best_position; 01152 bestRoot.second["score"] = min_criterion_value; 01153 } 01154 01155 // Recurse 01156 TreeTemplateTools::getBestRootInSubtree(tree, criterion, *son, bestRoot); 01157 } 01158 } 01159 01160 /******************************************************************************/ 01161 01162 TreeTemplateTools::Moments_ TreeTemplateTools::getSubtreeMoments (const Node* node) 01163 { 01164 TreeTemplateTools::Moments_ moments = {0,0,0}; 01165 01166 if(node->isLeaf()) 01167 { 01168 moments.numberOfLeaves = 1; 01169 } 01170 else 01171 { 01172 const size_t nsons = node->getNumberOfSons(); 01173 for(size_t i=0; i<nsons; ++i) 01174 { 01175 const Node* son = node->getSon(i); 01176 const TreeTemplateTools::Moments_ son_moments = TreeTemplateTools::getSubtreeMoments(son); 01177 const double d = son->getDistanceToFather(); 01178 moments.numberOfLeaves += son_moments.numberOfLeaves; 01179 moments.sum += son_moments.sum + d * son_moments.numberOfLeaves; 01180 moments.squaresSum += son_moments.squaresSum + 2 * d * son_moments.sum + son_moments.numberOfLeaves * d*d; 01181 } 01182 } 01183 01184 return moments; 01185 } 01186 01187 /******************************************************************************/