bpp-phyl  2.1.0
 All Classes Namespaces Files Functions Variables Friends Pages
TreeTools.cpp
Go to the documentation of this file.
1 //
2 // File: TreeTools.cpp
3 // Created by: Julien Dutheil
4 // Created on: Wed Aug 6 13:45:28 2003
5 //
6 
7 /*
8  Copyright or © or Copr. CNRS, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #include "TreeTools.h"
41 #include "Tree.h"
42 #include "BipartitionTools.h"
43 #include "Model/Nucleotide/JCnuc.h"
45 #include "Distance/BioNJ.h"
47 #include "OptimizationTools.h"
48 
49 #include <Bpp/Text/TextTools.h>
51 #include <Bpp/Numeric/Number.h>
52 #include <Bpp/BppString.h>
57 
58 // From SeqLib:
59 #include <Bpp/Seq/Alphabet/DNA.h>
61 
62 using namespace bpp;
63 
64 // From the STL:
65 #include <iostream>
66 #include <sstream>
67 
68 using namespace std;
69 
70 /******************************************************************************/
71 
72 const string TreeTools::BOOTSTRAP = "bootstrap";
73 
74 /******************************************************************************/
75 
76 vector<int> TreeTools::getLeavesId(const Tree& tree, int nodeId) throw (NodeNotFoundException)
77 {
78  vector<int> leaves;
79  getLeavesId(tree, nodeId, leaves);
80  return leaves;
81 }
82 
83 void TreeTools::getLeavesId(const Tree& tree, int nodeId, std::vector<int>& leaves) throw (NodeNotFoundException)
84 {
85  if (!tree.hasNode(nodeId))
86  throw NodeNotFoundException("TreeTools::getLeavesId", nodeId);
87  if (tree.isLeaf(nodeId))
88  {
89  leaves.push_back(nodeId);
90  }
91  vector<int> sons = tree.getSonsId(nodeId);
92  for (size_t i = 0; i < sons.size(); i++)
93  {
94  getLeavesId(tree, sons[i], leaves);
95  }
96 }
97 
98 size_t TreeTools::getNumberOfLeaves(const Tree& tree, int nodeId) throw (NodeNotFoundException)
99 {
100  if (!tree.hasNode(nodeId))
101  throw NodeNotFoundException("TreeTools::getNumberOfLeaves", nodeId);
102 
103  size_t n = 0;
104  if (tree.isLeaf(nodeId))
105  {
106  ++n;
107  }
108  vector<int> sons = tree.getSonsId(nodeId);
109  for (size_t i = 0; i < sons.size(); ++i)
110  {
111  n += getNumberOfLeaves(tree, sons[i]);
112  }
113  return n;
114 }
115 
116 /******************************************************************************/
117 
118 int TreeTools::getLeafId(const Tree& tree, int nodeId, const std::string& name)
119 throw (NodeNotFoundException)
120 {
121  int* id = NULL;
122  searchLeaf(tree, nodeId, name, id);
123  if (id == NULL)
124  throw NodeNotFoundException("TreeTools::getLeafId().", name);
125  else
126  {
127  int i = *id;
128  delete id;
129  return i;
130  }
131 }
132 
133 void TreeTools::searchLeaf(const Tree& tree, int nodeId, const string& name, int*& id)
134 throw (NodeNotFoundException)
135 {
136  if (tree.isLeaf(nodeId))
137  {
138  if (tree.getNodeName(nodeId) == name)
139  {
140  id = new int(nodeId);
141  return;
142  }
143  }
144  vector<int> sons;
145  for (size_t i = 0; i < sons.size(); i++)
146  {
147  searchLeaf(tree, nodeId, name, id);
148  }
149 }
150 
151 /******************************************************************************/
152 
153 vector<int> TreeTools::getNodesId(const Tree& tree, int nodeId) throw (NodeNotFoundException)
154 {
155  vector<int> nodes;
156  getNodesId(tree, nodeId, nodes);
157  return nodes;
158 }
159 
160 void TreeTools::getNodesId(const Tree& tree, int nodeId, vector<int>& nodes) throw (NodeNotFoundException)
161 {
162  if (!tree.hasNode(nodeId))
163  throw NodeNotFoundException("TreeTools::getNodesId", nodeId);
164  vector<int> sons = tree.getSonsId(nodeId);
165  for (size_t i = 0; i < sons.size(); i++)
166  {
167  getNodesId(tree, sons[i], nodes);
168  }
169  nodes.push_back(nodeId);
170 }
171 
172 /******************************************************************************/
173 
174 size_t TreeTools::getDepth(const Tree& tree, int nodeId) throw (NodeNotFoundException)
175 {
176  if (!tree.hasNode(nodeId))
177  throw NodeNotFoundException("TreeTools::getDepth", nodeId);
178  size_t d = 0;
179  vector<int> sons = tree.getSonsId(nodeId);
180  for (size_t i = 0; i < sons.size(); i++)
181  {
182  size_t c = getDepth(tree, sons[i]) + 1;
183  if (c > d)
184  d = c;
185  }
186  return d;
187 }
188 
189 /******************************************************************************/
190 
191 size_t TreeTools::getDepths(const Tree& tree, int nodeId, map<int, size_t>& depths) throw (NodeNotFoundException)
192 {
193  if (!tree.hasNode(nodeId))
194  throw NodeNotFoundException("TreeTools::getDepth", nodeId);
195  size_t d = 0;
196  vector<int> sons = tree.getSonsId(nodeId);
197  for (size_t i = 0; i < sons.size(); i++)
198  {
199  size_t c = getDepths(tree, sons[i], depths) + 1;
200  if (c > d)
201  d = c;
202  }
203  depths[nodeId] = d;
204  return d;
205 }
206 
207 /******************************************************************************/
208 
209 double TreeTools::getHeight(const Tree& tree, int nodeId) throw (NodeNotFoundException, NodeException)
210 {
211  if (!tree.hasNode(nodeId))
212  throw NodeNotFoundException("TreeTools::getHeight", nodeId);
213  double d = 0;
214  vector<int> sons = tree.getSonsId(nodeId);
215  for (size_t i = 0; i < sons.size(); i++)
216  {
217  double dist = 0;
218  if (tree.hasDistanceToFather(sons[i]))
219  dist = tree.getDistanceToFather(sons[i]);
220  else
221  throw NodeException("Node without branch length.", sons[i]);
222  double c = getHeight(tree, sons[i]) + dist;
223  if (c > d)
224  d = c;
225  }
226  return d;
227 }
228 
229 /******************************************************************************/
230 
231 double TreeTools::getHeights(const Tree& tree, int nodeId, map<int, double>& heights) throw (NodeNotFoundException, NodeException)
232 {
233  if (!tree.hasNode(nodeId))
234  throw NodeNotFoundException("TreeTools::getHeight", nodeId);
235  double d = 0;
236  vector<int> sons = tree.getSonsId(nodeId);
237  for (size_t i = 0; i < sons.size(); i++)
238  {
239  double dist = 0;
240  if (tree.hasDistanceToFather(sons[i]))
241  dist = tree.getDistanceToFather(sons[i]);
242  else
243  throw NodeException("Node without branch length.", sons[i]);
244  double c = getHeights(tree, sons[i], heights) + dist;
245  if (c > d)
246  d = c;
247  }
248  heights[nodeId] = d;
249  return d;
250 }
251 
252 /******************************************************************************/
253 
254 string TreeTools::nodeToParenthesis(const Tree& tree, int nodeId, bool writeId) throw (NodeNotFoundException)
255 {
256  if (!tree.hasNode(nodeId))
257  throw NodeNotFoundException("TreeTools::nodeToParenthesis", nodeId);
258  ostringstream s;
259  if (tree.isLeaf(nodeId))
260  {
261  s << tree.getNodeName(nodeId);
262  }
263  else
264  {
265  s << "(";
266  vector<int> sonsId = tree.getSonsId(nodeId);
267  s << nodeToParenthesis(tree, sonsId[0], writeId);
268  for (size_t i = 1; i < sonsId.size(); i++)
269  {
270  s << "," << nodeToParenthesis(tree, sonsId[i], writeId);
271  }
272  s << ")";
273  }
274  if (writeId)
275  {
276  if (tree.isLeaf(nodeId))
277  s << "_";
278  s << nodeId;
279  }
280  else
281  {
282  if (tree.hasBranchProperty(nodeId, BOOTSTRAP))
283  s << (dynamic_cast<const Number<double>*>(tree.getBranchProperty(nodeId, BOOTSTRAP))->getValue());
284  }
285  if (tree.hasDistanceToFather(nodeId))
286  s << ":" << tree.getDistanceToFather(nodeId);
287  return s.str();
288 }
289 
290 /******************************************************************************/
291 
292 string TreeTools::nodeToParenthesis(const Tree& tree, int nodeId, bool bootstrap, const string& propertyName) throw (NodeNotFoundException)
293 {
294  if (!tree.hasNode(nodeId))
295  throw NodeNotFoundException("TreeTools::nodeToParenthesis", nodeId);
296  ostringstream s;
297  if (tree.isLeaf(nodeId))
298  {
299  s << tree.getNodeName(nodeId);
300  }
301  else
302  {
303  s << "(";
304  vector<int> sonsId = tree.getSonsId(nodeId);
305  s << nodeToParenthesis(tree, sonsId[0], bootstrap, propertyName);
306  for (size_t i = 1; i < sonsId.size(); i++)
307  {
308  s << "," << nodeToParenthesis(tree, sonsId[i], bootstrap, propertyName);
309  }
310  s << ")";
311 
312  if (bootstrap)
313  {
314  if (tree.hasBranchProperty(nodeId, BOOTSTRAP))
315  s << (dynamic_cast<const Number<double>*>(tree.getBranchProperty(nodeId, BOOTSTRAP))->getValue());
316  }
317  else
318  {
319  if (tree.hasBranchProperty(nodeId, propertyName))
320  s << *(dynamic_cast<const BppString*>(tree.getBranchProperty(nodeId, propertyName)));
321  }
322  }
323  if (tree.hasDistanceToFather(nodeId))
324  s << ":" << tree.getDistanceToFather(nodeId);
325  return s.str();
326 }
327 
328 /******************************************************************************/
329 
330 string TreeTools::treeToParenthesis(const Tree& tree, bool writeId)
331 {
332  ostringstream s;
333  s << "(";
334  int rootId = tree.getRootId();
335  vector<int> sonsId = tree.getSonsId(rootId);
336  if (tree.isLeaf(rootId))
337  {
338  s << tree.getNodeName(rootId);
339  for (size_t i = 0; i < sonsId.size(); i++)
340  {
341  s << "," << nodeToParenthesis(tree, sonsId[i], writeId);
342  }
343  }
344  else
345  {
346  if (sonsId.size() > 0)
347  {
348  s << nodeToParenthesis(tree, sonsId[0], writeId);
349  for (size_t i = 1; i < sonsId.size(); i++)
350  {
351  s << "," << nodeToParenthesis(tree, sonsId[i], writeId);
352  }
353  }
354  // Otherwise, this is an empty tree!
355  }
356  s << ");" << endl;
357  return s.str();
358 }
359 
360 /******************************************************************************/
361 
362 string TreeTools::treeToParenthesis(const Tree& tree, bool bootstrap, const string& propertyName)
363 {
364  ostringstream s;
365  s << "(";
366  int rootId = tree.getRootId();
367  vector<int> sonsId = tree.getSonsId(rootId);
368  if (tree.isLeaf(rootId))
369  {
370  s << tree.getNodeName(rootId);
371  for (size_t i = 0; i < sonsId.size(); i++)
372  {
373  s << "," << nodeToParenthesis(tree, sonsId[i], bootstrap, propertyName);
374  }
375  }
376  else
377  {
378  s << nodeToParenthesis(tree, sonsId[0], bootstrap, propertyName);
379  for (size_t i = 1; i < sonsId.size(); i++)
380  {
381  s << "," << nodeToParenthesis(tree, sonsId[i], bootstrap, propertyName);
382  }
383  }
384  s << ")";
385  if (bootstrap)
386  {
387  if (tree.hasBranchProperty(rootId, BOOTSTRAP))
388  s << (dynamic_cast<const Number<double>*>(tree.getBranchProperty(rootId, BOOTSTRAP))->getValue());
389  }
390  else
391  {
392  if (tree.hasBranchProperty(rootId, propertyName))
393  s << *(dynamic_cast<const BppString*>(tree.getBranchProperty(rootId, propertyName)));
394  }
395  s << ";" << endl;
396  return s.str();
397 }
398 
399 /******************************************************************************/
400 
401 vector<int> TreeTools::getPathBetweenAnyTwoNodes(const Tree& tree, int nodeId1, int nodeId2, bool includeAncestor)
402 throw (NodeNotFoundException)
403 {
404  if (!tree.hasNode(nodeId1))
405  throw NodeNotFoundException("TreeTools::getPathBetweenAnyTwoNodes", nodeId1);
406  if (!tree.hasNode(nodeId2))
407  throw NodeNotFoundException("TreeTools::getPathBetweenAnyTwoNodes", nodeId2);
408  vector<int> path;
409  vector<int> pathMatrix1;
410  vector<int> pathMatrix2;
411 
412  int nodeUp = nodeId1;
413  while (tree.hasFather(nodeUp))
414  {
415  pathMatrix1.push_back(nodeUp);
416  nodeUp = tree.getFatherId(nodeUp);
417  }
418  pathMatrix1.push_back(nodeUp); // The root.
419 
420  nodeUp = nodeId2;
421  while (tree.hasFather(nodeUp))
422  {
423  pathMatrix2.push_back(nodeUp);
424  nodeUp = tree.getFatherId(nodeUp);
425  }
426  pathMatrix2.push_back(nodeUp); // The root.
427  // Must check that the two nodes have the same root!!!
428 
429  size_t tmp1 = pathMatrix1.size();
430  size_t tmp2 = pathMatrix2.size();
431 
432  while ((tmp1 > 0) && (tmp2 > 0))
433  {
434  if (pathMatrix1[tmp1 - 1] != pathMatrix2[tmp2 - 1])
435  break;
436  tmp1--; tmp2--;
437  }
438  // (tmp1 - 1) and (tmp2 - 1) now point toward the first non-common nodes
439 
440  for (size_t y = 0; y < tmp1; ++y)
441  {
442  path.push_back(pathMatrix1[y]);
443  }
444  if (includeAncestor)
445  path.push_back(pathMatrix1[tmp1]); // pushing once, the Node that was common to both.
446  for (size_t j = tmp2; j > 0; --j)
447  {
448  path.push_back(pathMatrix2[j - 1]);
449  }
450  return path;
451 }
452 
453 /******************************************************************************/
454 
455 double TreeTools::getDistanceBetweenAnyTwoNodes(const Tree& tree, int nodeId1, int nodeId2)
456 {
457  if (!tree.hasNode(nodeId1))
458  throw NodeNotFoundException("TreeTools::getDistanceBetweenAnyTwoNodes", nodeId1);
459  if (!tree.hasNode(nodeId2))
460  throw NodeNotFoundException("TreeTools::getDistanceBetweenAnyTwoNodes", nodeId2);
461  vector<int> path = getPathBetweenAnyTwoNodes(tree, nodeId1, nodeId2, false);
462  double d = 0;
463  for (size_t i = 0; i < path.size(); i++)
464  {
465  d += tree.getDistanceToFather(path[i]);
466  }
467  return d;
468 }
469 
470 /******************************************************************************/
471 
473 {
474  if (!tree.hasNode(nodeId))
475  throw NodeNotFoundException("TreeTools::getBranchLengths", nodeId);
476  Vdouble brLen(1);
477  if (tree.hasDistanceToFather(nodeId))
478  brLen[0] = tree.getDistanceToFather(nodeId);
479  else
480  throw NodeException("TreeTools::getbranchLengths(). No branch length.", nodeId);
481  vector<int> sons = tree.getSonsId(nodeId);
482  for (size_t i = 0; i < sons.size(); i++)
483  {
484  Vdouble sonBrLen = getBranchLengths(tree, sons[i]);
485  for (size_t j = 0; j < sonBrLen.size(); j++)
486  {
487  brLen.push_back(sonBrLen[j]);
488  }
489  }
490  return brLen;
491 }
492 
493 /******************************************************************************/
494 
495 double TreeTools::getTotalLength(const Tree& tree, int nodeId, bool includeAncestor) throw (NodeNotFoundException, NodeException)
496 {
497  if (!tree.hasNode(nodeId))
498  throw NodeNotFoundException("TreeTools::getTotalLength", nodeId);
499  if (includeAncestor && !tree.hasDistanceToFather(nodeId))
500  throw NodeException("TreeTools::getTotalLength(). No branch length.", nodeId);
501  double length = includeAncestor ? tree.getDistanceToFather(nodeId) : 0;
502  vector<int> sons = tree.getSonsId(nodeId);
503  for (size_t i = 0; i < sons.size(); i++)
504  {
505  length += getTotalLength(tree, sons[i], true);
506  }
507  return length;
508 }
509 
510 /******************************************************************************/
511 
512 void TreeTools::setBranchLengths(Tree& tree, int nodeId, double brLen) throw (NodeNotFoundException)
513 {
514  if (!tree.hasNode(nodeId))
515  throw NodeNotFoundException("TreeTools::setBranchLengths", nodeId);
516  vector<int> nodes = getNodesId(tree, nodeId);
517  for (size_t i = 0; i < nodes.size(); i++)
518  {
519  tree.setDistanceToFather(nodes[i], brLen);
520  }
521 }
522 
523 /******************************************************************************/
524 
525 void TreeTools::setVoidBranchLengths(Tree& tree, int nodeId, double brLen) throw (NodeNotFoundException)
526 {
527  if (!tree.hasNode(nodeId))
528  throw NodeNotFoundException("TreeTools::setVoidBranchLengths", nodeId);
529  vector<int> nodes = getNodesId(tree, nodeId);
530  for (size_t i = 0; i < nodes.size(); i++)
531  {
532  if (!tree.hasDistanceToFather(nodes[i]))
533  tree.setDistanceToFather(nodes[i], brLen);
534  }
535 }
536 
537 /******************************************************************************/
538 
539 void TreeTools::scaleTree(Tree& tree, int nodeId, double factor) throw (NodeNotFoundException, NodeException)
540 {
541  if (!tree.hasNode(nodeId))
542  throw NodeNotFoundException("TreeTools::scaleTree", nodeId);
543  vector<int> nodes = getNodesId(tree, nodeId);
544  for (size_t i = 0; i < nodes.size(); i++)
545  {
546  if (tree.hasFather(nodes[i]))
547  {
548  if (!tree.hasDistanceToFather(nodes[i]))
549  throw NodeException("TreeTools::scaleTree(). Branch with no length", nodes[i]);
550  double brLen = tree.getDistanceToFather(nodes[i]) * factor;
551  tree.setDistanceToFather(nodes[i], brLen);
552  }
553  }
554 }
555 
556 /******************************************************************************/
557 
559 {
560  if (!tree.hasNode(nodeId))
561  throw NodeNotFoundException("TreeTools::initBranchLengthsGrafen", nodeId);
562  vector<int> sons = tree.getSonsId(nodeId);
563  vector<size_t> h(sons.size());
564  for (size_t i = 0; i < sons.size(); i++)
565  {
566  h[i] = initBranchLengthsGrafen(tree, sons[i]);
567  }
568  size_t thish = sons.size() == 0 ? 0 : VectorTools::sum<size_t>(h) + sons.size() - 1;
569  for (size_t i = 0; i < sons.size(); i++)
570  {
571  tree.setDistanceToFather(sons[i], (double)(thish - h[i]));
572  }
573  return thish;
574 }
575 
577 {
578  initBranchLengthsGrafen(tree, tree.getRootId());
579 }
580 
581 /******************************************************************************/
582 
584  Tree& tree,
585  int nodeId,
586  double power,
587  double total,
588  double& height,
589  double& heightRaised)
591 {
592  if (!tree.hasNode(nodeId))
593  throw NodeNotFoundException("TreeTools::computeBranchLengthsGrafen", nodeId);
594  vector<int> sons = tree.getSonsId(nodeId);
595  vector<double> hr(sons.size());
596  height = 0;
597  for (size_t i = 0; i < sons.size(); i++)
598  {
599  int son = sons[i];
600  if (tree.hasDistanceToFather(son))
601  {
602  double h;
603  computeBranchLengthsGrafen(tree, sons[i], power, total, h, hr[i]);
604  double d = h + tree.getDistanceToFather(son);
605  if (d > height)
606  height = d;
607  }
608  else
609  throw NodeException ("TreeTools::computeBranchLengthsGrafen. Branch length lacking.", son);
610  }
611  heightRaised = pow(height / total, power) * total;
612  for (size_t i = 0; i < sons.size(); i++)
613  {
614  tree.setDistanceToFather(sons[i], heightRaised - hr[i]);
615  }
616 }
617 
618 void TreeTools::computeBranchLengthsGrafen(Tree& tree, double power, bool init)
619 throw (NodeException)
620 {
621  int rootId = tree.getRootId();
622  if (init)
623  {
624  initBranchLengthsGrafen(tree);
625  }
626  // Scale by total heigth:
627  double totalHeight = getHeight(tree, rootId);
628  double h, hr;
629  computeBranchLengthsGrafen(tree, rootId, power, totalHeight, h, hr);
630 }
631 
632 /******************************************************************************/
633 
634 double TreeTools::convertToClockTree(Tree& tree, int nodeId, bool noneg)
635 {
636  if (!tree.hasNode(nodeId))
637  throw NodeNotFoundException("TreeTools::convertToClockTree", nodeId);
638  vector<int> sons = tree.getSonsId(nodeId);
639  vector<double> h(sons.size());
640  // We compute the mean height:
641  double l = 0;
642  double maxh = -1.;
643  for (size_t i = 0; i < sons.size(); i++)
644  {
645  int son = sons[i];
646  if (tree.hasDistanceToFather(son))
647  {
648  h[i] = convertToClockTree(tree, son);
649  if (h[i] > maxh)
650  maxh = h[i];
651  l += h[i] + tree.getDistanceToFather(son);
652  }
653  else
654  throw NodeException ("TreeTools::convertToClockTree. Branch length lacking.", son);
655  }
656  if (sons.size() > 0)
657  l /= (double)sons.size();
658  if (l < maxh)
659  l = maxh;
660  for (size_t i = 0; i < sons.size(); i++)
661  {
662  tree.setDistanceToFather(sons[i], l - h[i]);
663  }
664  return l;
665 }
666 
667 /******************************************************************************/
668 
669 double TreeTools::convertToClockTree2(Tree& tree, int nodeId)
670 {
671  if (!tree.hasNode(nodeId))
672  throw NodeNotFoundException("TreeTools::convertToClockTree2", nodeId);
673  vector<int> sons = tree.getSonsId(nodeId);
674  vector<double> h(sons.size());
675  // We compute the mean height:
676  double l = 0;
677  double maxh = -1.;
678  for (size_t i = 0; i < sons.size(); i++)
679  {
680  int son = sons[i];
681  if (tree.hasDistanceToFather(son))
682  {
683  h[i] = convertToClockTree2(tree, son);
684  if (h[i] > maxh)
685  maxh = h[i];
686  l += h[i] + tree.getDistanceToFather(son);
687  }
688  else
689  throw NodeException ("TreeTools::convertToClockTree2. Branch length lacking.", son);
690  }
691  if (sons.size() > 0)
692  l /= (double)sons.size();
693  for (size_t i = 0; i < sons.size(); i++)
694  {
695  scaleTree(tree, sons[i], h[i] > 0 ? l / h[i] : 0);
696  }
697  return l;
698 }
699 
700 /******************************************************************************/
701 
703 {
704  vector<string> names = tree.getLeavesNames();
705  DistanceMatrix* mat = new DistanceMatrix(names);
706  for (size_t i = 0; i < names.size(); i++)
707  {
708  (*mat)(i, i) = 0;
709  for (size_t j = 0; j < i; j++)
710  {
711  (*mat)(i, j) = (*mat)(j, i) = getDistanceBetweenAnyTwoNodes(tree, tree.getLeafId(names[i]), tree.getLeafId(names[j]));
712  }
713  }
714  return mat;
715 }
716 
717 /******************************************************************************/
718 
720 {
721  if (tree.isRooted())
722  tree.unroot();
723  DistanceMatrix* dist = getDistanceMatrix(tree);
724  vector<size_t> pos = MatrixTools::whichMax(*dist);
725  double dmid = (*dist)(pos[0], pos[1]) / 2;
726  int id1 = tree.getLeafId(dist->getName(pos[0]));
727  int id2 = tree.getLeafId(dist->getName(pos[1]));
728  int rootId = tree.getRootId();
729  double d1 = getDistanceBetweenAnyTwoNodes(tree, id1, rootId);
730  double d2 = getDistanceBetweenAnyTwoNodes(tree, id2, rootId);
731  int current = d2 > d1 ? id2 : id1;
732  delete dist;
733  double l = tree.getDistanceToFather(current);
734  double c = l;
735  while (c < dmid)
736  {
737  current = tree.getFatherId(current);
738  l = tree.getDistanceToFather(current);
739  c += l;
740  }
741  tree.newOutGroup(current);
742  int brother = tree.getSonsId(tree.getRootId())[1];
743  if (brother == current)
744  brother = tree.getSonsId(tree.getRootId())[0];
745  tree.setDistanceToFather(current, l - (c - dmid));
746  tree.setDistanceToFather(brother, c - dmid);
747 }
748 
749 /******************************************************************************/
750 
751 int TreeTools::getMaxId(const Tree& tree, int id)
752 {
753  int maxId = id;
754  vector<int> sonsId = tree.getSonsId(id);
755  for (size_t i = 0; i < sonsId.size(); i++)
756  {
757  int subMax = getMaxId(tree, sonsId[i]);
758  if (subMax > maxId)
759  maxId = subMax;
760  }
761  return maxId;
762 }
763 
764 /******************************************************************************/
765 
766 int TreeTools::getMPNUId(const Tree& tree, int id)
767 {
768  vector<int> ids = getNodesId(tree, id);
769  sort(ids.begin(), ids.end());
770  // Check if some id is "missing" in the subtree:
771  for (size_t i = 0; i < ids.size(); i++)
772  {
773  if (ids[i] != (int)i)
774  return (int)i;
775  }
776  // Well, all ids are from 0 to n, so send n+1:
777  return (int)ids.size();
778 }
779 
780 /******************************************************************************/
781 
782 bool TreeTools::checkIds(const Tree& tree, bool throwException) throw (Exception)
783 {
784  vector<int> ids = tree.getNodesId();
785  sort(ids.begin(), ids.end());
786  for (size_t i = 1; i < ids.size(); i++)
787  {
788  if (ids[i] == ids[i - 1])
789  {
790  if (throwException)
791  throw Exception("TreeTools::checkIds. This id is used at least twice: " + TextTools::toString(ids[i]));
792  return false;
793  }
794  }
795  return true;
796 }
797 
798 /******************************************************************************/
799 
800 VectorSiteContainer* TreeTools::MRPEncode(const vector<Tree*>& vecTr)
801 {
802  vector<BipartitionList*> vecBipL;
803  for (size_t i = 0; i < vecTr.size(); i++)
804  {
805  vecBipL.push_back(new BipartitionList(*vecTr[i]));
806  }
807 
809 
810  for (size_t i = 0; i < vecTr.size(); i++)
811  {
812  delete vecBipL[i];
813  }
814 
815  return cont;
816 }
817 
818 /******************************************************************************/
819 
820 bool TreeTools::haveSameTopology(const Tree& tr1, const Tree& tr2)
821 {
822  size_t jj, nbbip;
823  BipartitionList* bipL1, * bipL2;
824  vector<size_t> size1, size2;
825 
826  /* compare sets of leaves */
828  return false;
829 
830  /* construct bipartitions */
831  bipL1 = new BipartitionList(tr1, true);
832  bipL1->removeTrivialBipartitions();
834  bipL1->sortByPartitionSize();
835  bipL2 = new BipartitionList(tr2, true);
836  bipL2->removeTrivialBipartitions();
838  bipL2->sortByPartitionSize();
839 
840  /* compare numbers of bipartitions */
841  if (bipL1->getNumberOfBipartitions() != bipL2->getNumberOfBipartitions())
842  return false;
843  nbbip = bipL1->getNumberOfBipartitions();
844 
845  /* compare partition sizes */
846  for (size_t i = 0; i < nbbip; i++)
847  {
848  size1.push_back(bipL1->getPartitionSize(i));
849  size2.push_back(bipL1->getPartitionSize(i));
850  if (size1[i] != size2[i])
851  return false;
852  }
853 
854  /* compare bipartitions */
855  for (size_t i = 0; i < nbbip; i++)
856  {
857  for (jj = 0; jj < nbbip; jj++)
858  {
859  if (size1[i] == size2[jj] && BipartitionTools::areIdentical(*bipL1, i, *bipL2, jj))
860  break;
861  }
862  if (jj == nbbip)
863  return false;
864  }
865 
866  return true;
867 }
868 
869 /******************************************************************************/
870 
871 int TreeTools::robinsonFouldsDistance(const Tree& tr1, const Tree& tr2, bool checkNames, int* missing_in_tr2, int* missing_in_tr1) throw (Exception)
872 {
873  BipartitionList* bipL1, * bipL2;
874  size_t i, j;
875  vector<size_t> size1, size2;
876  vector<bool> bipOK2;
877 
878 
879  if (checkNames && !VectorTools::haveSameElements(tr1.getLeavesNames(), tr2.getLeavesNames()))
880  throw Exception("Distinct leaf sets between trees ");
881 
882  /* prepare things */
883  int missing1 = 0;
884  int missing2 = 0;
885 
886  bipL1 = new BipartitionList(tr1, true);
887  bipL1->removeTrivialBipartitions();
888  bipL1->sortByPartitionSize();
889  bipL2 = new BipartitionList(tr2, true);
890  bipL2->removeTrivialBipartitions();
891  bipL2->sortByPartitionSize();
892 
893 
894  for (i = 0; i < bipL1->getNumberOfBipartitions(); i++)
895  {
896  size1.push_back(bipL1->getPartitionSize(i));
897  }
898  for (i = 0; i < bipL2->getNumberOfBipartitions(); i++)
899  {
900  size2.push_back(bipL2->getPartitionSize(i));
901  }
902 
903  for (i = 0; i < bipL2->getNumberOfBipartitions(); i++)
904  {
905  bipOK2.push_back(false);
906  }
907 
908  /* main loops */
909 
910  for (i = 0; i < bipL1->getNumberOfBipartitions(); i++)
911  {
912  for (j = 0; j < bipL2->getNumberOfBipartitions(); j++)
913  {
914  if (bipOK2[j])
915  continue;
916  if (size1[i] == size2[j] && BipartitionTools::areIdentical(*bipL1, i, *bipL2, j))
917  {
918  bipOK2[j] = true;
919  break;
920  }
921  }
922  if (j == bipL2->getNumberOfBipartitions())
923  missing2++;
924  }
925 
926  missing1 = static_cast<int>(bipL2->getNumberOfBipartitions()) - static_cast<int>(bipL1->getNumberOfBipartitions()) + missing2;
927 
928  if (missing_in_tr1)
929  *missing_in_tr1 = missing1;
930  if (missing_in_tr2)
931  *missing_in_tr2 = missing2;
932  return missing1 + missing2;
933 }
934 
935 /******************************************************************************/
936 
937 BipartitionList* TreeTools::bipartitionOccurrences(const vector<Tree*>& vecTr, vector<size_t>& bipScore)
938 {
939  vector<BipartitionList*> vecBipL;
940  BipartitionList* mergedBipL;
941  vector<size_t> bipSize;
942  size_t nbBip;
943 
944  /* build and merge bipartitions */
945  for (size_t i = 0; i < vecTr.size(); i++)
946  {
947  vecBipL.push_back(new BipartitionList(*vecTr[i]));
948  }
949  mergedBipL = BipartitionTools::mergeBipartitionLists(vecBipL);
950  for (size_t i = 0; i < vecTr.size(); i++)
951  {
952  delete vecBipL[i];
953  }
954 
955  mergedBipL->removeTrivialBipartitions();
956  nbBip = mergedBipL->getNumberOfBipartitions();
957  bipScore.clear();
958  for (size_t i = 0; i < nbBip; i++)
959  {
960  bipSize.push_back(mergedBipL->getPartitionSize(i));
961  bipScore.push_back(1);
962  }
963 
964  /* compare bipartitions */
965  for (size_t i = nbBip; i > 0; i--)
966  {
967  if (bipScore[i - 1] == 0)
968  continue;
969  for (size_t j = i - 1; j > 0; j--)
970  {
971  if (bipScore[j - 1] && bipSize[i - 1] == bipSize[j - 1] && mergedBipL->areIdentical(i - 1, j - 1))
972  {
973  bipScore[i - 1]++;
974  bipScore[j - 1] = 0;
975  }
976  }
977  }
978 
979  /* keep only distinct bipartitions */
980  for (size_t i = nbBip; i > 0; i--)
981  {
982  if (bipScore[i - 1] == 0)
983  {
984  bipScore.erase(bipScore.begin() + i - 1);
985  mergedBipL->deleteBipartition(i - 1);
986  }
987  }
988 
989  /* add terminal branches */
990  mergedBipL->addTrivialBipartitions(false);
991  for (size_t i = 0; i < mergedBipL->getNumberOfElements(); i++)
992  {
993  bipScore.push_back(vecTr.size());
994  }
995 
996  return mergedBipL;
997 }
998 
999 /******************************************************************************/
1000 
1001 TreeTemplate<Node>* TreeTools::thresholdConsensus(const vector<Tree*>& vecTr, double threshold, bool checkNames) throw (Exception)
1002 {
1003  vector<size_t> bipScore;
1004  vector<string> tr0leaves;
1005  BipartitionList* bipL;
1006  double score;
1007 
1008  if (vecTr.size() == 0)
1009  throw Exception("TreeTools::thresholdConsensus. Empty vector passed");
1010 
1011  /* check names */
1012  if (checkNames)
1013  {
1014  tr0leaves = vecTr[0]->getLeavesNames();
1015  for (size_t i = 1; i < vecTr.size(); i++)
1016  {
1017  if (!VectorTools::haveSameElements(vecTr[i]->getLeavesNames(), tr0leaves))
1018  throw Exception("TreeTools::thresholdConsensus. Distinct leaf sets between trees");
1019  }
1020  }
1021 
1022  bipL = bipartitionOccurrences(vecTr, bipScore);
1023 
1024  for (size_t i = bipL->getNumberOfBipartitions(); i > 0; i--)
1025  {
1026  if (bipL->getPartitionSize(i - 1) == 1)
1027  continue;
1028  score = static_cast<int>(bipScore[i - 1]) / static_cast<double>(vecTr.size());
1029  if (score <= threshold && score != 1.)
1030  {
1031  bipL->deleteBipartition(i - 1);
1032  continue;
1033  }
1034  if (score > 0.5)
1035  continue;
1036  for (size_t j = bipL->getNumberOfBipartitions(); j > i; j--)
1037  {
1038  if (!bipL->areCompatible(i - 1, j - 1))
1039  {
1040  bipL->deleteBipartition(i - 1);
1041  break;
1042  }
1043  }
1044  }
1045 
1046  TreeTemplate<Node>* tr = bipL->toTree();
1047  delete bipL;
1048  return tr;
1049 }
1050 
1051 /******************************************************************************/
1052 
1053 TreeTemplate<Node>* TreeTools::fullyResolvedConsensus(const vector<Tree*>& vecTr, bool checkNames)
1054 {
1055  return thresholdConsensus(vecTr, 0., checkNames);
1056 }
1057 
1058 /******************************************************************************/
1059 
1060 TreeTemplate<Node>* TreeTools::majorityConsensus(const vector<Tree*>& vecTr, bool checkNames)
1061 {
1062  return thresholdConsensus(vecTr, 0.5, checkNames);
1063 }
1064 
1065 /******************************************************************************/
1066 
1067 TreeTemplate<Node>* TreeTools::strictConsensus(const vector<Tree*>& vecTr, bool checkNames)
1068 {
1069  return thresholdConsensus(vecTr, 1., checkNames);
1070 }
1071 
1072 /******************************************************************************/
1073 
1074 Tree* TreeTools::MRP(const vector<Tree*>& vecTr)
1075 {
1076  // matrix representation
1077  VectorSiteContainer* sites = TreeTools::MRPEncode(vecTr);
1078 
1079  // starting bioNJ tree
1080  const DNA* alphabet = dynamic_cast<const DNA*>(sites->getAlphabet());
1081  JCnuc* jc = new JCnuc(alphabet);
1082  ConstantDistribution* constRate = new ConstantDistribution(1.);
1083  DistanceEstimation distFunc(jc, constRate, sites, 0, true);
1084  BioNJ bionjTreeBuilder(false, false);
1085  bionjTreeBuilder.setDistanceMatrix(*(distFunc.getMatrix()));
1086  bionjTreeBuilder.computeTree();
1089  TreeTemplate<Node>* startTree = new TreeTemplate<Node>(*bionjTreeBuilder.getTree());
1090 
1091  // MP optimization
1092  DRTreeParsimonyScore* MPScore = new DRTreeParsimonyScore(*startTree, *sites, false);
1093  MPScore = OptimizationTools::optimizeTreeNNI(MPScore, 0);
1094  delete startTree;
1095  Tree* retTree = new TreeTemplate<Node>(MPScore->getTree());
1096  delete MPScore;
1097 
1098  return retTree;
1099 }
1100 
1101 /******************************************************************************/
1102 
1103 void TreeTools::computeBootstrapValues(Tree& tree, const vector<Tree*>& vecTr, bool verbose)
1104 {
1105  vector<int> index;
1106  BipartitionList bpTree(tree, true, &index);
1107  vector<size_t> occurences;
1108  BipartitionList* bpList = bipartitionOccurrences(vecTr, occurences);
1109 
1110  vector< Number<double> > bootstrapValues(bpTree.getNumberOfBipartitions());
1111 
1112  for (size_t i = 0; i < bpTree.getNumberOfBipartitions(); i++)
1113  {
1114  if (verbose)
1116  for (size_t j = 0; j < bpList->getNumberOfBipartitions(); j++)
1117  {
1118  if (BipartitionTools::areIdentical(bpTree, i, *bpList, j))
1119  {
1120  bootstrapValues[i] = (double) occurences[j] * 100. / (double) vecTr.size();
1121  break;
1122  }
1123  }
1124  }
1125 
1126  for (size_t i = 0; i < index.size(); i++)
1127  {
1128  if (!tree.isLeaf(index[i]))
1129  tree.setBranchProperty(index[i], BOOTSTRAP, bootstrapValues[i]);
1130  }
1131 
1132  delete bpList;
1133 }
1134 
1135 /******************************************************************************/
1136 
1137 vector<int> TreeTools::getAncestors(const Tree& tree, int nodeId) throw (NodeNotFoundException)
1138 {
1139  vector<int> ids;
1140  int currentId = nodeId;
1141  while (tree.hasFather(currentId))
1142  {
1143  currentId = tree.getFatherId(currentId);
1144  ids.push_back(currentId);
1145  }
1146  return ids;
1147 }
1148 
1149 /******************************************************************************/
1150 
1151 int TreeTools::getLastCommonAncestor(const Tree& tree, const vector<int>& nodeIds) throw (NodeNotFoundException, Exception)
1152 {
1153  if (nodeIds.size() == 0)
1154  throw Exception("TreeTools::getLastCommonAncestor(). You must provide at least one node id.");
1155  vector< vector<int> > ancestors(nodeIds.size());
1156  for (size_t i = 0; i < nodeIds.size(); i++)
1157  {
1158  ancestors[i] = getAncestors(tree, nodeIds[i]);
1159  ancestors[i].insert(ancestors[i].begin(), nodeIds[i]);
1160  }
1161  int lca = tree.getRootId();
1162  size_t count = 1;
1163  for ( ; ; )
1164  {
1165  if (ancestors[0].size() <= count)
1166  return lca;
1167  int current = ancestors[0][ancestors[0].size() - count - 1];
1168  for (size_t i = 1; i < nodeIds.size(); i++)
1169  {
1170  if (ancestors[i].size() <= count)
1171  return lca;
1172  if (ancestors[i][ancestors[i].size() - count - 1] != current)
1173  return lca;
1174  }
1175  lca = current;
1176  count++;
1177  }
1178  // This line is never reached!
1179  return lca;
1180 }
1181 
1182 /******************************************************************************/
1183 
1185 {
1186  // is the tree rooted?
1187  if (!tree.isRooted())
1188  throw Exception("The tree has to be rooted on the branch of interest to determine the midpoint position of the root");
1189  // is the tree multifurcating?
1190  if (tree.isMultifurcating())
1191  throw Exception("The tree is multifurcated, which is not allowed.");
1192 
1193  double length = 0.;
1194  vector<int> sonsIds = tree.getSonsId(tree.getRootId());
1195  // Length of the branch containing the root:
1196  length = tree.getDistanceToFather(sonsIds.at(0)) + tree.getDistanceToFather(sonsIds.at(1));
1197  // The fraction of the original branch allowing to split its length and to place the root:
1198  double x = bestRootPosition_(tree, sonsIds.at(0), sonsIds.at(1), length);
1199  // The new branch lengths are then computed:
1200  tree.setDistanceToFather(sonsIds.at(0), length * x);
1201  tree.setDistanceToFather(sonsIds.at(1), length * (1 - x));
1202 }
1203 
1204 /******************************************************************************/
1205 
1206 double TreeTools::bestRootPosition_(Tree& tree, int nodeId1, int nodeId2, double length)
1207 {
1208  double x;
1209  Moments_ m1, m2;
1210  double A, B; // C;
1211  // The variance is expressed as a degree 2 polynomial : variance(x) = A * x * x + B * x + C
1212  // The fraction x is then obtained by differentiating this equation.
1213  m1 = statFromNode_(tree, nodeId1);
1214  m2 = statFromNode_(tree, nodeId2);
1215  A = 4 * m1.N * (m2.N * length) * length;
1216  B = 4 * length * (m2.N * m1.sum - m1.N * m2.sum - length * m1.N * m2.N);
1217 // C = (m1.N + m2.N) * (m1.squaredSum + m2.squaredSum) + m1.N * length * m2.N * length +
1218 // 2 * m1.N * length * m2.sum - 2 * m2.N * length * m1.sum -
1219 // (m1.sum + m2.sum) * (m1.sum + m2.sum);
1220 
1221  if (A < 1e-20)
1222  x = 0.5;
1223  else
1224  x = -B / (2 * A);
1225  if (x < 0)
1226  x = 0;
1227  else if (x > 1)
1228  x = 1;
1229 
1230  return x;
1231 }
1232 
1233 /******************************************************************************/
1234 
1236 {
1237  // This function recursively calculates both the sum of the branch lengths and the sum of the squared branch lengths down the node whose ID is rootId.
1238  // If below a particular node there are N leaves, the branch between this node and its father is taken into account N times in the calculation.
1239  Moments_ m;
1240  static Moments_ mtmp;
1241 
1242  if (tree.isLeaf(rootId))
1243  {
1244  m.N = 1;
1245  m.sum = 0.;
1246  m.squaredSum = 0.;
1247  }
1248  else
1249  {
1250  vector<int> sonsId = tree.getSonsId(rootId);
1251  for (size_t i = 0; i < sonsId.size(); i++)
1252  {
1253  mtmp = statFromNode_(tree, sonsId.at(i));
1254  double bLength = tree.getDistanceToFather(sonsId.at(i));
1255  m.N += mtmp.N;
1256  m.sum += mtmp.sum + bLength * mtmp.N;
1257  m.squaredSum += mtmp.squaredSum + 2 * bLength * mtmp.sum + mtmp.N * bLength * bLength;
1258  }
1259  }
1260 
1261  return m;
1262 }
1263