bpp-phyl  2.4.0
SubstitutionMappingTools.cpp
Go to the documentation of this file.
1 //
2 // File: SubstitutionMappingTools.cpp
3 // Created by: Julien Dutheil
4 // Created on: Wed Apr 5 13:04 2006
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004, 2005, 2006)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
42 #include "DecompositionReward.h"
44 #include "RewardMappingTools.h"
45 #include "../Likelihood/DRTreeLikelihoodTools.h"
46 #include "../Likelihood/MarginalAncestralStateReconstruction.h"
47 
48 #include <Bpp/Text/TextTools.h>
51 #include <Bpp/Numeric/DataTable.h>
53 
54 using namespace bpp;
55 
56 // From the STL:
57 #include <iomanip>
58 
59 using namespace std;
60 
61 /******************************************************************************/
62 
64  const DRTreeLikelihood& drtl,
65  const vector<int>& nodeIds,
66  SubstitutionCount& substitutionCount,
67  bool verbose)
68 {
69  // Preamble:
70  if (!drtl.isInitialized())
71  throw Exception("SubstitutionMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
72 
73  // A few variables we'll need:
74 
75  const TreeTemplate<Node> tree(drtl.getTree());
76  const SiteContainer* sequences = drtl.getData();
77  const DiscreteDistribution* rDist = drtl.getRateDistribution();
78 
79  size_t nbSites = sequences->getNumberOfSites();
80  size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites();
81  size_t nbStates = sequences->getAlphabet()->getSize();
82  size_t nbClasses = rDist->getNumberOfCategories();
83  size_t nbTypes = substitutionCount.getNumberOfSubstitutionTypes();
84  vector<const Node*> nodes = tree.getNodes();
85  const vector<size_t>* rootPatternLinks
87  nodes.pop_back(); // Remove root node.
88  size_t nbNodes = nodes.size();
89 
90  // We create a new ProbabilisticSubstitutionMapping object:
91  ProbabilisticSubstitutionMapping* substitutions = new ProbabilisticSubstitutionMapping(tree, &substitutionCount, nbSites);
92 
93  // Store likelihood for each rate for each site:
94  VVVdouble lik;
95  drtl.computeLikelihoodAtNode(tree.getRootId(), lik);
96  Vdouble Lr(nbDistinctSites, 0);
97  Vdouble rcProbs = rDist->getProbabilities();
98  Vdouble rcRates = rDist->getCategories();
99  for (size_t i = 0; i < nbDistinctSites; i++)
100  {
101  VVdouble* lik_i = &lik[i];
102  for (size_t c = 0; c < nbClasses; c++)
103  {
104  Vdouble* lik_i_c = &(*lik_i)[c];
105  double rc = rDist->getProbability(c);
106  for (size_t s = 0; s < nbStates; s++)
107  {
108  Lr[i] += (*lik_i_c)[s] * rc;
109  }
110  }
111  }
112 
113  // Compute the number of substitutions for each class and each branch in the tree:
114  if (verbose)
115  ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
116 
117  for (size_t l = 0; l < nbNodes; ++l)
118  {
119  // For each node,
120  const Node* currentNode = nodes[l];
121  if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId()))
122  continue;
123 
124  const Node* father = currentNode->getFather();
125 
126  double d = currentNode->getDistanceToFather();
127 
128  if (verbose)
129  ApplicationTools::displayGauge(l, nbNodes - 1);
130  VVdouble substitutionsForCurrentNode(nbDistinctSites);
131  for (size_t i = 0; i < nbDistinctSites; ++i)
132  {
133  substitutionsForCurrentNode[i].resize(nbTypes);
134  }
135 
136  // Now we've got to compute likelihoods in a smart manner... ;)
137  VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
138  for (size_t i = 0; i < nbDistinctSites; i++)
139  {
140  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
141  likelihoodsFatherConstantPart_i->resize(nbClasses);
142  for (size_t c = 0; c < nbClasses; c++)
143  {
144  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
145  likelihoodsFatherConstantPart_i_c->resize(nbStates);
146  double rc = rDist->getProbability(c);
147  for (size_t s = 0; s < nbStates; s++)
148  {
149  // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
150  // freq is already accounted in the array
151  (*likelihoodsFatherConstantPart_i_c)[s] = rc;
152  }
153  }
154  }
155 
156  // First, what will remain constant:
157  size_t nbSons = father->getNumberOfSons();
158  for (size_t n = 0; n < nbSons; n++)
159  {
160  const Node* currentSon = father->getSon(n);
161  if (currentSon->getId() != currentNode->getId())
162  {
163  const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
164 
165  // Now iterate over all site partitions:
166  unique_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentSon->getId()));
167  VVVdouble pxy;
168  bool first;
169  while (mit->hasNext())
170  {
172  unique_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
173  first = true;
174  while (sit->hasNext())
175  {
176  size_t i = sit->next();
177  // We retrieve the transition probabilities for this site partition:
178  if (first)
179  {
180  pxy = drtl.getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
181  first = false;
182  }
183  const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
184  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
185  for (size_t c = 0; c < nbClasses; c++)
186  {
187  const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
188  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
189  VVdouble* pxy_c = &pxy[c];
190  for (size_t x = 0; x < nbStates; x++)
191  {
192  Vdouble* pxy_c_x = &(*pxy_c)[x];
193  double likelihood = 0.;
194  for (size_t y = 0; y < nbStates; y++)
195  {
196  likelihood += (*pxy_c_x)[y] * (*likelihoodsFather_son_i_c)[y];
197  }
198  (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
199  }
200  }
201  }
202  }
203  }
204  }
205  if (father->hasFather())
206  {
207  const Node* currentSon = father->getFather();
208  const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
209  // Now iterate over all site partitions:
210  unique_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(father->getId()));
211  VVVdouble pxy;
212  bool first;
213  while (mit->hasNext())
214  {
216  unique_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
217  first = true;
218  while (sit->hasNext())
219  {
220  size_t i = sit->next();
221  // We retrieve the transition probabilities for this site partition:
222  if (first)
223  {
224  pxy = drtl.getTransitionProbabilitiesPerRateClass(father->getId(), i);
225  first = false;
226  }
227  const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
228  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
229  for (size_t c = 0; c < nbClasses; c++)
230  {
231  const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
232  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
233  VVdouble* pxy_c = &pxy[c];
234  for (size_t x = 0; x < nbStates; x++)
235  {
236  double likelihood = 0.;
237  for (size_t y = 0; y < nbStates; y++)
238  {
239  Vdouble* pxy_c_x = &(*pxy_c)[y];
240  likelihood += (*pxy_c_x)[x] * (*likelihoodsFather_son_i_c)[y];
241  }
242  (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
243  }
244  }
245  }
246  }
247  }
248  else
249  {
250  // Account for root frequencies:
251  for (size_t i = 0; i < nbDistinctSites; i++)
252  {
253  vector<double> freqs = drtl.getRootFrequencies(i);
254  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
255  for (size_t c = 0; c < nbClasses; c++)
256  {
257  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
258  for (size_t x = 0; x < nbStates; x++)
259  {
260  (*likelihoodsFatherConstantPart_i_c)[x] *= freqs[x];
261  }
262  }
263  }
264  }
265 
266 
267  // Then, we deal with the node of interest.
268  // We first average upon 'y' to save computations, and then upon 'x'.
269  // ('y' is the state at 'node' and 'x' the state at 'father'.)
270 
271  // Iterate over all site partitions:
272  const VVVdouble* likelihoodsFather_node = &(drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentNode->getId()));
273  unique_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId()));
274  VVVdouble pxy;
275  bool first;
276  while (mit->hasNext())
277  {
279  substitutionCount.setSubstitutionModel(bmd->getSubstitutionModel());
280  // compute all nxy first:
281  VVVVdouble nxy(nbClasses);
282  for (size_t c = 0; c < nbClasses; ++c)
283  {
284  VVVdouble* nxy_c = &nxy[c];
285  double rc = rcRates[c];
286  nxy_c->resize(nbTypes);
287  for (size_t t = 0; t < nbTypes; ++t)
288  {
289  VVdouble* nxy_c_t = &(*nxy_c)[t];
290  Matrix<double>* nijt = substitutionCount.getAllNumbersOfSubstitutions(d * rc, t + 1);
291 
292  nxy_c_t->resize(nbStates);
293  for (size_t x = 0; x < nbStates; ++x)
294  {
295  Vdouble* nxy_c_t_x = &(*nxy_c_t)[x];
296  nxy_c_t_x->resize(nbStates);
297  for (size_t y = 0; y < nbStates; ++y)
298  {
299  (*nxy_c_t_x)[y] = (*nijt)(x, y);
300  }
301  }
302  delete nijt;
303  }
304  }
305 
306  // Now loop over sites:
307  unique_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
308  first = true;
309  while (sit->hasNext())
310  {
311  size_t i = sit->next();
312  // We retrieve the transition probabilities and substitution counts for this site partition:
313  if (first)
314  {
315  pxy = drtl.getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
316  first = false;
317  }
318  const VVdouble* likelihoodsFather_node_i = &(*likelihoodsFather_node)[i];
319  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
320  for (size_t c = 0; c < nbClasses; ++c)
321  {
322  const Vdouble* likelihoodsFather_node_i_c = &(*likelihoodsFather_node_i)[c];
323  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
324  const VVdouble* pxy_c = &pxy[c];
325  VVVdouble* nxy_c = &nxy[c];
326  for (size_t x = 0; x < nbStates; ++x)
327  {
328  double* likelihoodsFatherConstantPart_i_c_x = &(*likelihoodsFatherConstantPart_i_c)[x];
329  const Vdouble* pxy_c_x = &(*pxy_c)[x];
330  for (size_t y = 0; y < nbStates; ++y)
331  {
332  double likelihood_cxy = (*likelihoodsFatherConstantPart_i_c_x)
333  * (*pxy_c_x)[y]
334  * (*likelihoodsFather_node_i_c)[y];
335 
336  for (size_t t = 0; t < nbTypes; ++t)
337  {
338  // Now the vector computation:
339  substitutionsForCurrentNode[i][t] += likelihood_cxy * (*nxy_c)[t][x][y];
340  // <------------> <--------------->
341  // Posterior probability | |
342  // for site i and rate class c * | |
343  // likelihood for this site----------------+ |
344  // |
345  // Substitution function for site i and rate class c----------+
346  }
347  }
348  }
349 
350  }
351  }
352  }
353 
354  // Now we just have to copy the substitutions into the result vector:
355  for (size_t i = 0; i < nbSites; ++i)
356  {
357  for (size_t t = 0; t < nbTypes; ++t)
358  {
359  (*substitutions)(l, i, t) = substitutionsForCurrentNode[(*rootPatternLinks)[i]][t] / Lr[(*rootPatternLinks)[i]];
360  }
361  }
362  }
363  if (verbose)
364  {
368  }
369 
370  return substitutions;
371 }
372 
373 /******************************************************************************/
374 
376  const DRTreeLikelihood& drtl,
377  const SubstitutionModelSet& modelSet,
378  const vector<int>& nodeIds,
379  SubstitutionCount& substitutionCount,
380  bool verbose)
381 {
382  // Preamble:
383  if (!drtl.isInitialized())
384  throw Exception("SubstitutionMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
385 
386  // A few variables we'll need:
387 
388  const TreeTemplate<Node> tree(drtl.getTree());
389  const SiteContainer* sequences = drtl.getData();
390  const DiscreteDistribution* rDist = drtl.getRateDistribution();
391 
392  size_t nbSites = sequences->getNumberOfSites();
393  size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites();
394  size_t nbStates = sequences->getAlphabet()->getSize();
395  size_t nbClasses = rDist->getNumberOfCategories();
396  size_t nbTypes = substitutionCount.getNumberOfSubstitutionTypes();
397  vector<const Node*> nodes = tree.getNodes();
398  const vector<size_t>* rootPatternLinks
400  nodes.pop_back(); // Remove root node.
401  size_t nbNodes = nodes.size();
402 
403  // We create a new ProbabilisticSubstitutionMapping object:
404  ProbabilisticSubstitutionMapping* substitutions = new ProbabilisticSubstitutionMapping(tree, &substitutionCount, nbSites);
405 
406  // Store likelihood for each rate for each site:
407  VVVdouble lik;
408  drtl.computeLikelihoodAtNode(tree.getRootId(), lik);
409  Vdouble Lr(nbDistinctSites, 0);
410  Vdouble rcProbs = rDist->getProbabilities();
411  Vdouble rcRates = rDist->getCategories();
412  for (size_t i = 0; i < nbDistinctSites; i++)
413  {
414  VVdouble* lik_i = &lik[i];
415  for (size_t c = 0; c < nbClasses; c++)
416  {
417  Vdouble* lik_i_c = &(*lik_i)[c];
418  double rc = rDist->getProbability(c);
419  for (size_t s = 0; s < nbStates; s++)
420  {
421  Lr[i] += (*lik_i_c)[s] * rc;
422  }
423  }
424  }
425 
426  // Compute the number of substitutions for each class and each branch in the tree:
427  if (verbose)
428  ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
429 
430  for (size_t l = 0; l < nbNodes; ++l)
431  {
432  // For each node,
433  const Node* currentNode = nodes[l];
434  if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId()))
435  continue;
436 
437  const Node* father = currentNode->getFather();
438 
439  double d = currentNode->getDistanceToFather();
440 
441  if (verbose)
442  ApplicationTools::displayGauge(l, nbNodes - 1);
443  VVdouble substitutionsForCurrentNode(nbDistinctSites);
444  for (size_t i = 0; i < nbDistinctSites; ++i)
445  {
446  substitutionsForCurrentNode[i].resize(nbTypes);
447  }
448 
449  // Now we've got to compute likelihoods in a smart manner... ;)
450  VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
451  for (size_t i = 0; i < nbDistinctSites; i++)
452  {
453  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
454  likelihoodsFatherConstantPart_i->resize(nbClasses);
455  for (size_t c = 0; c < nbClasses; c++)
456  {
457  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
458  likelihoodsFatherConstantPart_i_c->resize(nbStates);
459  double rc = rDist->getProbability(c);
460  for (size_t s = 0; s < nbStates; s++)
461  {
462  // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
463  // freq is already accounted in the array
464  (*likelihoodsFatherConstantPart_i_c)[s] = rc;
465  }
466  }
467  }
468 
469  // First, what will remain constant:
470  size_t nbSons = father->getNumberOfSons();
471  for (size_t n = 0; n < nbSons; n++)
472  {
473  const Node* currentSon = father->getSon(n);
474  if (currentSon->getId() != currentNode->getId())
475  {
476  const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
477 
478  // Now iterate over all site partitions:
479  unique_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentSon->getId()));
480  VVVdouble pxy;
481  bool first;
482  while (mit->hasNext())
483  {
485  unique_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
486  first = true;
487  while (sit->hasNext())
488  {
489  size_t i = sit->next();
490  // We retrieve the transition probabilities for this site partition:
491  if (first)
492  {
493  pxy = drtl.getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
494  first = false;
495  }
496  const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
497  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
498  for (size_t c = 0; c < nbClasses; c++)
499  {
500  const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
501  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
502  VVdouble* pxy_c = &pxy[c];
503  for (size_t x = 0; x < nbStates; x++)
504  {
505  Vdouble* pxy_c_x = &(*pxy_c)[x];
506  double likelihood = 0.;
507  for (size_t y = 0; y < nbStates; y++)
508  {
509  likelihood += (*pxy_c_x)[y] * (*likelihoodsFather_son_i_c)[y];
510  }
511  (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
512  }
513  }
514  }
515  }
516  }
517  }
518  if (father->hasFather())
519  {
520  const Node* currentSon = father->getFather();
521  const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
522  // Now iterate over all site partitions:
523  unique_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(father->getId()));
524  VVVdouble pxy;
525  bool first;
526  while (mit->hasNext())
527  {
529  unique_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
530  first = true;
531  while (sit->hasNext())
532  {
533  size_t i = sit->next();
534  // We retrieve the transition probabilities for this site partition:
535  if (first)
536  {
537  pxy = drtl.getTransitionProbabilitiesPerRateClass(father->getId(), i);
538  first = false;
539  }
540  const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
541  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
542  for (size_t c = 0; c < nbClasses; c++)
543  {
544  const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
545  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
546  VVdouble* pxy_c = &pxy[c];
547  for (size_t x = 0; x < nbStates; x++)
548  {
549  double likelihood = 0.;
550  for (size_t y = 0; y < nbStates; y++)
551  {
552  Vdouble* pxy_c_x = &(*pxy_c)[y];
553  likelihood += (*pxy_c_x)[x] * (*likelihoodsFather_son_i_c)[y];
554  }
555  (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
556  }
557  }
558  }
559  }
560  }
561  else
562  {
563  // Account for root frequencies:
564  for (size_t i = 0; i < nbDistinctSites; i++)
565  {
566  vector<double> freqs = drtl.getRootFrequencies(i);
567  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
568  for (size_t c = 0; c < nbClasses; c++)
569  {
570  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
571  for (size_t x = 0; x < nbStates; x++)
572  {
573  (*likelihoodsFatherConstantPart_i_c)[x] *= freqs[x];
574  }
575  }
576  }
577  }
578 
579 
580  // Then, we deal with the node of interest.
581  // We first average upon 'y' to save computations, and then upon 'x'.
582  // ('y' is the state at 'node' and 'x' the state at 'father'.)
583 
584  // Iterate over all site partitions:
585  const VVVdouble* likelihoodsFather_node = &(drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentNode->getId()));
586  unique_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId()));
587  VVVdouble pxy;
588  bool first;
589  while (mit->hasNext())
590  {
592  substitutionCount.setSubstitutionModel(modelSet.getSubstitutionModelForNode(currentNode->getId()));
593 
594  // compute all nxy first:
595  VVVVdouble nxy(nbClasses);
596  for (size_t c = 0; c < nbClasses; ++c)
597  {
598  VVVdouble* nxy_c = &nxy[c];
599  double rc = rcRates[c];
600  nxy_c->resize(nbTypes);
601  for (size_t t = 0; t < nbTypes; ++t)
602  {
603  VVdouble* nxy_c_t = &(*nxy_c)[t];
604  Matrix<double>* nijt = substitutionCount.getAllNumbersOfSubstitutions(d * rc, t + 1);
605  nxy_c_t->resize(nbStates);
606  for (size_t x = 0; x < nbStates; ++x)
607  {
608  Vdouble* nxy_c_t_x = &(*nxy_c_t)[x];
609  nxy_c_t_x->resize(nbStates);
610  for (size_t y = 0; y < nbStates; ++y)
611  {
612  (*nxy_c_t_x)[y] = (*nijt)(x, y);
613  }
614  }
615  delete nijt;
616  }
617  }
618 
619  // Now loop over sites:
620  unique_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
621  first = true;
622  while (sit->hasNext())
623  {
624  size_t i = sit->next();
625  // We retrieve the transition probabilities and substitution counts for this site partition:
626  if (first)
627  {
628  pxy = drtl.getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
629  first = false;
630  }
631  const VVdouble* likelihoodsFather_node_i = &(*likelihoodsFather_node)[i];
632  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
633  for (size_t c = 0; c < nbClasses; ++c)
634  {
635  const Vdouble* likelihoodsFather_node_i_c = &(*likelihoodsFather_node_i)[c];
636  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
637  const VVdouble* pxy_c = &pxy[c];
638  VVVdouble* nxy_c = &nxy[c];
639  for (size_t x = 0; x < nbStates; ++x)
640  {
641  double* likelihoodsFatherConstantPart_i_c_x = &(*likelihoodsFatherConstantPart_i_c)[x];
642  const Vdouble* pxy_c_x = &(*pxy_c)[x];
643  for (size_t y = 0; y < nbStates; ++y)
644  {
645  double likelihood_cxy = (*likelihoodsFatherConstantPart_i_c_x)
646  * (*pxy_c_x)[y]
647  * (*likelihoodsFather_node_i_c)[y];
648 
649  for (size_t t = 0; t < nbTypes; ++t)
650  {
651  // Now the vector computation:
652  substitutionsForCurrentNode[i][t] += likelihood_cxy * (*nxy_c)[t][x][y];
653  // <------------> <--------------->
654  // Posterior probability | |
655  // for site i and rate class c * | |
656  // likelihood for this site----------------+ |
657  // |
658  // Substitution function for site i and rate class c----------+
659  }
660  }
661  }
662  }
663  }
664  }
665 
666  // Now we just have to copy the substitutions into the result vector:
667  for (size_t i = 0; i < nbSites; ++i)
668  {
669  for (size_t t = 0; t < nbTypes; ++t)
670  {
671  (*substitutions)(l, i, t) = substitutionsForCurrentNode[(*rootPatternLinks)[i]][t] / Lr[(*rootPatternLinks)[i]];
672  }
673  }
674  }
675  if (verbose)
676  {
680  }
681 
682  return substitutions;
683 }
684 
685 /**************************************************************************************************/
686 
688  const DRTreeLikelihood& drtl,
689  SubstitutionCount& substitutionCount,
690  bool verbose)
691 {
692  // Preamble:
693  if (!drtl.isInitialized())
694  throw Exception("SubstitutionMappingTools::computeSubstitutionVectorsNoAveraging(). Likelihood object is not initialized.");
695 
696  // A few variables we'll need:
697  const TreeTemplate<Node> tree(drtl.getTree());
698  const SiteContainer* sequences = drtl.getData();
699  const DiscreteDistribution* rDist = drtl.getRateDistribution();
700 
701  size_t nbSites = sequences->getNumberOfSites();
702  size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites();
703  size_t nbStates = sequences->getAlphabet()->getSize();
704  size_t nbClasses = rDist->getNumberOfCategories();
705  size_t nbTypes = substitutionCount.getNumberOfSubstitutionTypes();
706  vector<const Node*> nodes = tree.getNodes();
707  const vector<size_t>* rootPatternLinks
709  nodes.pop_back(); // Remove root node.
710  size_t nbNodes = nodes.size();
711 
712  // We create a new ProbabilisticSubstitutionMapping object:
713  ProbabilisticSubstitutionMapping* substitutions = new ProbabilisticSubstitutionMapping(tree, &substitutionCount, nbSites);
714 
715  Vdouble rcRates = rDist->getCategories();
716 
717  // Compute the number of substitutions for each class and each branch in the tree:
718  if (verbose)
719  ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
720 
721  for (size_t l = 0; l < nbNodes; ++l)
722  {
723  // For each node,
724  const Node* currentNode = nodes[l];
725 
726  const Node* father = currentNode->getFather();
727 
728  double d = currentNode->getDistanceToFather();
729 
730  if (verbose)
731  ApplicationTools::displayGauge(l, nbNodes - 1);
732  VVdouble substitutionsForCurrentNode(nbDistinctSites);
733  for (size_t i = 0; i < nbDistinctSites; ++i)
734  {
735  substitutionsForCurrentNode[i].resize(nbTypes);
736  }
737 
738  // Now we've got to compute likelihoods in a smart manner... ;)
739  VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
740  for (size_t i = 0; i < nbDistinctSites; ++i)
741  {
742  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
743  likelihoodsFatherConstantPart_i->resize(nbClasses);
744  for (size_t c = 0; c < nbClasses; ++c)
745  {
746  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
747  likelihoodsFatherConstantPart_i_c->resize(nbStates);
748  double rc = rDist->getProbability(c);
749  for (size_t s = 0; s < nbStates; ++s)
750  {
751  // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
752  // freq is already accounted in the array
753  (*likelihoodsFatherConstantPart_i_c)[s] = rc;
754  }
755  }
756  }
757 
758  // First, what will remain constant:
759  size_t nbSons = father->getNumberOfSons();
760  for (size_t n = 0; n < nbSons; ++n)
761  {
762  const Node* currentSon = father->getSon(n);
763  if (currentSon->getId() != currentNode->getId())
764  {
765  const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
766 
767  // Now iterate over all site partitions:
768  unique_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentSon->getId()));
769  VVVdouble pxy;
770  bool first;
771  while (mit->hasNext())
772  {
774  unique_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
775  first = true;
776  while (sit->hasNext())
777  {
778  size_t i = sit->next();
779  // We retrieve the transition probabilities for this site partition:
780  if (first)
781  {
782  pxy = drtl.getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
783  first = false;
784  }
785  const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
786  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
787  for (size_t c = 0; c < nbClasses; ++c)
788  {
789  const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
790  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
791  VVdouble* pxy_c = &pxy[c];
792  for (size_t x = 0; x < nbStates; ++x)
793  {
794  Vdouble* pxy_c_x = &(*pxy_c)[x];
795  double likelihood = 0.;
796  for (size_t y = 0; y < nbStates; ++y)
797  {
798  likelihood += (*pxy_c_x)[y] * (*likelihoodsFather_son_i_c)[y];
799  }
800  (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
801  }
802  }
803  }
804  }
805  }
806  }
807  if (father->hasFather())
808  {
809  const Node* currentSon = father->getFather();
810  const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
811  // Now iterate over all site partitions:
812  unique_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(father->getId()));
813  VVVdouble pxy;
814  bool first;
815  while (mit->hasNext())
816  {
818  unique_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
819  first = true;
820  while (sit->hasNext())
821  {
822  size_t i = sit->next();
823  // We retrieve the transition probabilities for this site partition:
824  if (first)
825  {
826  pxy = drtl.getTransitionProbabilitiesPerRateClass(father->getId(), i);
827  first = false;
828  }
829  const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
830  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
831  for (size_t c = 0; c < nbClasses; ++c)
832  {
833  const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
834  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
835  VVdouble* pxy_c = &pxy[c];
836  for (size_t x = 0; x < nbStates; ++x)
837  {
838  double likelihood = 0.;
839  for (size_t y = 0; y < nbStates; ++y)
840  {
841  Vdouble* pxy_c_x = &(*pxy_c)[y];
842  likelihood += (*pxy_c_x)[x] * (*likelihoodsFather_son_i_c)[y];
843  }
844  (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
845  }
846  }
847  }
848  }
849  }
850  else
851  {
852  // Account for root frequencies:
853  for (size_t i = 0; i < nbDistinctSites; ++i)
854  {
855  vector<double> freqs = drtl.getRootFrequencies(i);
856  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
857  for (size_t c = 0; c < nbClasses; ++c)
858  {
859  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
860  for (size_t x = 0; x < nbStates; ++x)
861  {
862  (*likelihoodsFatherConstantPart_i_c)[x] *= freqs[x];
863  }
864  }
865  }
866  }
867 
868  // Then, we deal with the node of interest.
869  // We first average uppon 'y' to save computations, and then uppon 'x'.
870  // ('y' is the state at 'node' and 'x' the state at 'father'.)
871 
872  // Iterate over all site partitions:
873  const VVVdouble* likelihoodsFather_node = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentNode->getId());
874  unique_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId()));
875  VVVdouble pxy;
876  bool first;
877  while (mit->hasNext())
878  {
880  substitutionCount.setSubstitutionModel(bmd->getSubstitutionModel());
881  // compute all nxy first:
882  VVVVdouble nxy(nbClasses);
883  for (size_t c = 0; c < nbClasses; ++c)
884  {
885  double rc = rcRates[c];
886  VVVdouble* nxy_c = &nxy[c];
887  nxy_c->resize(nbTypes);
888  for (size_t t = 0; t < nbTypes; ++t)
889  {
890  VVdouble* nxy_c_t = &(*nxy_c)[t];
891  nxy_c_t->resize(nbStates);
892  Matrix<double>* nijt = substitutionCount.getAllNumbersOfSubstitutions(d * rc, t + 1);
893  for (size_t x = 0; x < nbStates; ++x)
894  {
895  Vdouble* nxy_c_t_x = &(*nxy_c_t)[x];
896  nxy_c_t_x->resize(nbStates);
897  for (size_t y = 0; y < nbStates; ++y)
898  {
899  (*nxy_c_t_x)[y] = (*nijt)(x, y);
900  }
901  }
902  delete nijt;
903  }
904  }
905 
906  // Now loop over sites:
907  unique_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
908  first = true;
909  while (sit->hasNext())
910  {
911  size_t i = sit->next();
912  // We retrieve the transition probabilities and substitution counts for this site partition:
913  if (first)
914  {
915  pxy = drtl.getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
916  first = false;
917  }
918  const VVdouble* likelihoodsFather_node_i = &(*likelihoodsFather_node)[i];
919  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
920  RowMatrix<double> pairProbabilities(nbStates, nbStates);
921  MatrixTools::fill(pairProbabilities, 0.);
922  VVVdouble subsCounts(nbStates);
923  for (size_t j = 0; j < nbStates; ++j)
924  {
925  subsCounts[j].resize(nbStates);
926  for (size_t k = 0; k < nbStates; ++k)
927  {
928  subsCounts[j][k].resize(nbTypes);
929  }
930  }
931  for (size_t c = 0; c < nbClasses; ++c)
932  {
933  const Vdouble* likelihoodsFather_node_i_c = &(*likelihoodsFather_node_i)[c];
934  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
935  const VVdouble* pxy_c = &pxy[c];
936  VVVdouble* nxy_c = &nxy[c];
937  for (size_t x = 0; x < nbStates; ++x)
938  {
939  double* likelihoodsFatherConstantPart_i_c_x = &(*likelihoodsFatherConstantPart_i_c)[x];
940  const Vdouble* pxy_c_x = &(*pxy_c)[x];
941  for (size_t y = 0; y < nbStates; ++y)
942  {
943  double likelihood_cxy = (*likelihoodsFatherConstantPart_i_c_x)
944  * (*pxy_c_x)[y]
945  * (*likelihoodsFather_node_i_c)[y];
946  pairProbabilities(x, y) += likelihood_cxy; // Sum over all rate classes.
947  for (size_t t = 0; t < nbTypes; ++t)
948  {
949  subsCounts[x][y][t] += likelihood_cxy * (*nxy_c)[t][x][y];
950  }
951  }
952  }
953  }
954  // Now the vector computation:
955  // Here we do not average over all possible pair of ancestral states,
956  // We only consider the one with max likelihood:
957  vector<size_t> xy = MatrixTools::whichMax(pairProbabilities);
958  for (size_t t = 0; t < nbTypes; ++t)
959  {
960  substitutionsForCurrentNode[i][t] += subsCounts[xy[0]][xy[1]][t] / pairProbabilities(xy[0], xy[1]);
961  }
962  }
963  }
964  // Now we just have to copy the substitutions into the result vector:
965  for (size_t i = 0; i < nbSites; i++)
966  {
967  for (size_t t = 0; t < nbTypes; t++)
968  {
969  (*substitutions)(l, i, t) = substitutionsForCurrentNode[(*rootPatternLinks)[i]][t];
970  }
971  }
972  }
973  if (verbose)
974  {
978  }
979  return substitutions;
980 }
981 
982 /**************************************************************************************************/
983 
985  const DRTreeLikelihood& drtl,
986  SubstitutionCount& substitutionCount,
987  bool verbose)
988 {
989  // Preamble:
990  if (!drtl.isInitialized())
991  throw Exception("SubstitutionMappingTools::computeSubstitutionVectorsNoAveragingMarginal(). Likelihood object is not initialized.");
992 
993  // A few variables we'll need:
994 
995  const TreeTemplate<Node> tree(drtl.getTree());
996  const SiteContainer* sequences = drtl.getData();
997  const DiscreteDistribution* rDist = drtl.getRateDistribution();
998  const Alphabet* alpha = sequences->getAlphabet();
999 
1000  size_t nbSites = sequences->getNumberOfSites();
1001  size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites();
1002  size_t nbStates = alpha->getSize();
1003  size_t nbTypes = substitutionCount.getNumberOfSubstitutionTypes();
1004  vector<const Node*> nodes = tree.getNodes();
1005  const vector<size_t>* rootPatternLinks
1007  nodes.pop_back(); // Remove root node.
1008  size_t nbNodes = nodes.size();
1009 
1010  // We create a new ProbabilisticSubstitutionMapping object:
1011  ProbabilisticSubstitutionMapping* substitutions = new ProbabilisticSubstitutionMapping(tree, &substitutionCount, nbSites);
1012 
1013  // Compute the whole likelihood of the tree according to the specified model:
1014 
1015  Vdouble rcRates = rDist->getCategories();
1016 
1017  // Compute the number of substitutions for each class and each branch in the tree:
1018  if (verbose)
1019  ApplicationTools::displayTask("Compute marginal ancestral states");
1021  map<int, vector<size_t> > ancestors = masr.getAllAncestralStates();
1022  if (verbose)
1024 
1025  // Now we just have to compute the substitution vectors:
1026  if (verbose)
1027  ApplicationTools::displayTask("Compute substitution vectors", true);
1028 
1029  for (size_t l = 0; l < nbNodes; l++)
1030  {
1031  const Node* currentNode = nodes[l];
1032 
1033  const Node* father = currentNode->getFather();
1034 
1035  double d = currentNode->getDistanceToFather();
1036 
1037  vector<size_t> nodeStates = ancestors[currentNode->getId()]; // These are not 'true' ancestors ;)
1038  vector<size_t> fatherStates = ancestors[father->getId()];
1039 
1040  // For each node,
1041  if (verbose)
1042  ApplicationTools::displayGauge(l, nbNodes - 1);
1043  VVdouble substitutionsForCurrentNode(nbDistinctSites);
1044  for (size_t i = 0; i < nbDistinctSites; ++i)
1045  {
1046  substitutionsForCurrentNode[i].resize(nbTypes);
1047  }
1048 
1049  // Here, we have no likelihood computation to do!
1050 
1051  // Then, we deal with the node of interest.
1052  // ('y' is the state at 'node' and 'x' the state at 'father'.)
1053  // Iterate over all site partitions:
1054  unique_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId()));
1055  while (mit->hasNext())
1056  {
1058  substitutionCount.setSubstitutionModel(bmd->getSubstitutionModel());
1059  // compute all nxy first:
1060  VVVdouble nxyt(nbTypes);
1061  for (size_t t = 0; t < nbTypes; ++t)
1062  {
1063  nxyt[t].resize(nbStates);
1064  Matrix<double>* nxy = substitutionCount.getAllNumbersOfSubstitutions(d, t + 1);
1065  for (size_t x = 0; x < nbStates; ++x)
1066  {
1067  nxyt[t][x].resize(nbStates);
1068  for (size_t y = 0; y < nbStates; ++y)
1069  {
1070  nxyt[t][x][y] = (*nxy)(x, y);
1071  }
1072  }
1073  delete nxy;
1074  }
1075  // Now loop over sites:
1076  unique_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
1077  while (sit->hasNext())
1078  {
1079  size_t i = sit->next();
1080  size_t fatherState = fatherStates[i];
1081  size_t nodeState = nodeStates[i];
1082  if (fatherState >= nbStates || nodeState >= nbStates)
1083  for (size_t t = 0; t < nbTypes; ++t)
1084  {
1085  substitutionsForCurrentNode[i][t] = 0;
1086  } // To be conservative! Only in case there are generic characters.
1087  else
1088  for (size_t t = 0; t < nbTypes; ++t)
1089  {
1090  substitutionsForCurrentNode[i][t] = nxyt[t][fatherState][nodeState];
1091  }
1092  }
1093  }
1094 
1095  // Now we just have to copy the substitutions into the result vector:
1096  for (size_t i = 0; i < nbSites; i++)
1097  {
1098  for (size_t t = 0; t < nbTypes; t++)
1099  {
1100  (*substitutions)(l, i, t) = substitutionsForCurrentNode[(*rootPatternLinks)[i]][t];
1101  }
1102  }
1103  }
1104  if (verbose)
1105  {
1107  *ApplicationTools::message << " ";
1109  }
1110  return substitutions;
1111 }
1112 
1113 /**************************************************************************************************/
1114 
1116  const DRTreeLikelihood& drtl,
1117  SubstitutionCount& substitutionCount,
1118  bool verbose)
1119 {
1120  // Preamble:
1121  if (!drtl.isInitialized())
1122  throw Exception("SubstitutionMappingTools::computeSubstitutionVectorsMarginal(). Likelihood object is not initialized.");
1123 
1124  // A few variables we'll need:
1125 
1126  const TreeTemplate<Node> tree(drtl.getTree());
1127  const SiteContainer* sequences = drtl.getData();
1128  const DiscreteDistribution* rDist = drtl.getRateDistribution();
1129 
1130  size_t nbSites = sequences->getNumberOfSites();
1131  size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites();
1132  size_t nbStates = sequences->getAlphabet()->getSize();
1133  size_t nbClasses = rDist->getNumberOfCategories();
1134  size_t nbTypes = substitutionCount.getNumberOfSubstitutionTypes();
1135  vector<const Node*> nodes = tree.getNodes();
1136  const vector<size_t>* rootPatternLinks
1138  nodes.pop_back(); // Remove root node.
1139  size_t nbNodes = nodes.size();
1140 
1141  // We create a new ProbabilisticSubstitutionMapping object:
1142  ProbabilisticSubstitutionMapping* substitutions = new ProbabilisticSubstitutionMapping(tree, &substitutionCount, nbSites);
1143 
1144  // Compute the whole likelihood of the tree according to the specified model:
1145 
1146  Vdouble rcProbs = rDist->getProbabilities();
1147  Vdouble rcRates = rDist->getCategories();
1148 
1149  // II) Compute the number of substitutions for each class and each branch in the tree:
1150  if (verbose)
1151  ApplicationTools::displayTask("Compute marginal node-pairs likelihoods", true);
1152 
1153  for (size_t l = 0; l < nbNodes; l++)
1154  {
1155  const Node* currentNode = nodes[l];
1156 
1157  const Node* father = currentNode->getFather();
1158 
1159  double d = currentNode->getDistanceToFather();
1160 
1161  // For each node,
1162  if (verbose)
1163  ApplicationTools::displayGauge(l, nbNodes - 1);
1164  VVdouble substitutionsForCurrentNode(nbDistinctSites);
1165  for (size_t i = 0; i < nbDistinctSites; ++i)
1166  {
1167  substitutionsForCurrentNode[i].resize(nbTypes);
1168  }
1169 
1170  // Then, we deal with the node of interest.
1171  // ('y' is the state at 'node' and 'x' the state at 'father'.)
1174 
1175  // Iterate over all site partitions:
1176  unique_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId()));
1177  while (mit->hasNext())
1178  {
1180  substitutionCount.setSubstitutionModel(bmd->getSubstitutionModel());
1181  // compute all nxy first:
1182  VVVVdouble nxy(nbClasses);
1183  for (size_t c = 0; c < nbClasses; ++c)
1184  {
1185  VVVdouble* nxy_c = &nxy[c];
1186  double rc = rcRates[c];
1187  nxy_c->resize(nbTypes);
1188  for (size_t t = 0; t < nbTypes; ++t)
1189  {
1190  VVdouble* nxy_c_t = &(*nxy_c)[t];
1191  Matrix<double>* nijt = substitutionCount.getAllNumbersOfSubstitutions(d * rc, t + 1);
1192  nxy_c_t->resize(nbStates);
1193  for (size_t x = 0; x < nbStates; ++x)
1194  {
1195  Vdouble* nxy_c_t_x = &(*nxy_c_t)[x];
1196  nxy_c_t_x->resize(nbStates);
1197  for (size_t y = 0; y < nbStates; ++y)
1198  {
1199  (*nxy_c_t_x)[y] = (*nijt)(x, y);
1200  }
1201  }
1202  delete nijt;
1203  }
1204  }
1205 
1206  // Now loop over sites:
1207  unique_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
1208  while (sit->hasNext())
1209  {
1210  size_t i = sit->next();
1211  VVdouble* probsNode_i = &probsNode[i];
1212  VVdouble* probsFather_i = &probsFather[i];
1213  for (size_t c = 0; c < nbClasses; ++c)
1214  {
1215  Vdouble* probsNode_i_c = &(*probsNode_i)[c];
1216  Vdouble* probsFather_i_c = &(*probsFather_i)[c];
1217  VVVdouble* nxy_c = &nxy[c];
1218  for (size_t x = 0; x < nbStates; ++x)
1219  {
1220  for (size_t y = 0; y < nbStates; ++y)
1221  {
1222  double prob_cxy = (*probsFather_i_c)[x] * (*probsNode_i_c)[y];
1223  // Now the vector computation:
1224  for (size_t t = 0; t < nbTypes; ++t)
1225  {
1226  substitutionsForCurrentNode[i][t] += prob_cxy * (*nxy_c)[t][x][y];
1227  // <------> <--------------->
1228  // Posterior probability | |
1229  // for site i and rate class c * | |
1230  // likelihood for this site--------------+ |
1231  // |
1232  // Substitution function for site i and rate class c-------+
1233  }
1234  }
1235  }
1236  }
1237  }
1238  }
1239 
1240  // Now we just have to copy the substitutions into the result vector:
1241  for (size_t i = 0; i < nbSites; ++i)
1242  {
1243  for (size_t t = 0; t < nbTypes; ++t)
1244  {
1245  (*substitutions)(l, i, t) = substitutionsForCurrentNode[(*rootPatternLinks)[i]][t];
1246  }
1247  }
1248  }
1249  if (verbose)
1250  {
1252  *ApplicationTools::message << " ";
1254  }
1255  return substitutions;
1256 }
1257 
1258 /**************************************************************************************************/
1259 
1261  const ProbabilisticSubstitutionMapping& substitutions,
1262  const SiteContainer& sites,
1263  size_t type,
1264  ostream& out)
1265 {
1266  if (!out)
1267  throw IOException("SubstitutionMappingTools::writeToFile. Can't write to stream.");
1268  out << "Branches";
1269  out << "\tMean";
1270  for (size_t i = 0; i < substitutions.getNumberOfSites(); i++)
1271  {
1272  out << "\tSite" << sites.getSite(i).getPosition();
1273  }
1274  out << endl;
1275 
1276  for (size_t j = 0; j < substitutions.getNumberOfBranches(); j++)
1277  {
1278  out << substitutions.getNode(j)->getId() << "\t" << substitutions.getNode(j)->getDistanceToFather();
1279  for (size_t i = 0; i < substitutions.getNumberOfSites(); i++)
1280  {
1281  out << "\t" << substitutions(j, i, type);
1282  }
1283  out << endl;
1284  }
1285 }
1286 
1287 /**************************************************************************************************/
1288 
1290 {
1291  try
1292  {
1293  DataTable* data = DataTable::read(in, "\t", true, -1);
1294  vector<string> ids = data->getColumn(0);
1295  data->deleteColumn(0); // Remove ids
1296  data->deleteColumn(0); // Remove means
1297  // Now parse the table:
1298  size_t nbSites = data->getNumberOfColumns();
1299  substitutions.setNumberOfSites(nbSites);
1300  size_t nbBranches = data->getNumberOfRows();
1301  for (size_t i = 0; i < nbBranches; i++)
1302  {
1303  int id = TextTools::toInt(ids[i]);
1304  size_t br = substitutions.getNodeIndex(id);
1305  for (size_t j = 0; j < nbSites; j++)
1306  {
1307  substitutions(br, j, type) = TextTools::toDouble((*data)(i, j));
1308  }
1309  }
1310  // Parse the header:
1311  for (size_t i = 0; i < nbSites; i++)
1312  {
1313  string siteTxt = data->getColumnName(i);
1314  int site = 0;
1315  if (siteTxt.substr(0, 4) == "Site")
1316  site = TextTools::to<int>(siteTxt.substr(4));
1317  else
1318  site = TextTools::to<int>(siteTxt);
1319  substitutions.setSitePosition(i, site);
1320  }
1321 
1322  delete data;
1323  }
1324  catch (Exception& e)
1325  {
1326  throw IOException(string("Bad input file. ") + e.what());
1327  }
1328 }
1329 
1330 /**************************************************************************************************/
1331 
1333 {
1334  size_t nbBranches = smap.getNumberOfBranches();
1335  size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1336  Vdouble v(nbBranches);
1337  for (size_t l = 0; l < nbBranches; ++l)
1338  {
1339  v[l] = 0;
1340  for (size_t t = 0; t < nbTypes; ++t)
1341  {
1342  v[l] += smap(l, siteIndex, t);
1343  }
1344  }
1345  return v;
1346 }
1347 
1348 /**************************************************************************************************/
1349 
1351 {
1352  size_t nbBranches = smap.getNumberOfBranches();
1353  size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1354  Vdouble v(nbTypes);
1355  for (size_t t = 0; t < nbTypes; ++t)
1356  {
1357  v[t] = 0;
1358  for (size_t l = 0; l < nbBranches; ++l)
1359  v[t] += smap(l, siteIndex, t);
1360 
1361  }
1362  return v;
1363 }
1364 
1365 /**************************************************************************************************/
1366 
1368 {
1369  double sumSquare = 0;
1370  for (size_t l = 0; l < smap.getNumberOfBranches(); ++l)
1371  {
1372  double sum = 0;
1373  for (size_t t = 0; t < smap.getNumberOfSubstitutionTypes(); ++t)
1374  {
1375  sum += smap(l, siteIndex, t);
1376  }
1377  sumSquare += sum * sum;
1378  }
1379  return sqrt(sumSquare);
1380 }
1381 
1382 /**************************************************************************************************/
1383 
1384 vector<double> SubstitutionMappingTools::computeSumForBranch(const SubstitutionMapping& smap, size_t branchIndex)
1385 {
1386  size_t nbSites = smap.getNumberOfSites();
1387  size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1388  Vdouble v(nbTypes, 0);
1389  for (size_t i = 0; i < nbSites; ++i)
1390  {
1391  for (size_t t = 0; t < nbTypes; ++t)
1392  {
1393  v[t] += smap(branchIndex, i, t);
1394  }
1395  }
1396  return v;
1397 }
1398 
1399 /**************************************************************************************************/
1400 
1401 vector<double> SubstitutionMappingTools::computeSumForSite(const SubstitutionMapping& smap, size_t siteIndex)
1402 {
1403  size_t nbBranches = smap.getNumberOfBranches();
1404  size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1405  Vdouble v(nbTypes, 0);
1406  for (size_t i = 0; i < nbBranches; ++i)
1407  {
1408  for (size_t t = 0; t < nbTypes; ++t)
1409  {
1410  v[t] += smap(i, siteIndex, t);
1411  }
1412  }
1413  return v;
1414 }
1415 
1416 /**************************************************************************************************/
1417 
1419  DRTreeLikelihood& drtl,
1420  const vector<int>& ids,
1421  SubstitutionModel* model,
1422  const SubstitutionRegister& reg,
1423  double threshold,
1424  bool verbose)
1425 {
1426  SubstitutionRegister* reg2 = reg.clone();
1427 
1428  unique_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(model, reg2));
1429 
1430  unique_ptr<ProbabilisticSubstitutionMapping> mapping(SubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, *count, false));
1431 
1432  vector< vector<double> > counts(ids.size());
1433  size_t nbSites = mapping->getNumberOfSites();
1434  size_t nbTypes = mapping->getNumberOfSubstitutionTypes();
1435 
1436  for (size_t k = 0; k < ids.size(); ++k)
1437  {
1438  vector<double> countsf(nbTypes, 0);
1439  vector<double> tmp(nbTypes, 0);
1440  size_t nbIgnored = 0;
1441  bool error = false;
1442  for (size_t i = 0; !error && i < nbSites; ++i)
1443  {
1444  double s = 0;
1445  for (size_t t = 0; t < nbTypes; ++t)
1446  {
1447  tmp[t] = (*mapping)(mapping->getNodeIndex(ids[k]), i, t);
1448  error = std::isnan(tmp[t]);
1449  if (error)
1450  goto ERROR;
1451  s += tmp[t];
1452  }
1453  if (threshold >= 0)
1454  {
1455  if (s <= threshold)
1456  countsf += tmp;
1457  else
1458  {
1459  nbIgnored++;
1460  }
1461  }
1462  else
1463  {
1464  countsf += tmp;
1465  }
1466  }
1467 
1468 ERROR:
1469  if (error)
1470  {
1471  // We do nothing. This happens for small branches.
1472  if (verbose)
1473  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", counts could not be computed.");
1474  for (size_t t = 0; t < nbTypes; ++t)
1475  {
1476  countsf[t] = 0;
1477  }
1478  }
1479  else
1480  {
1481  if (nbIgnored > 0)
1482  {
1483  if (verbose)
1484  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", " + TextTools::toString(nbIgnored) + " sites (" + TextTools::toString(ceil(static_cast<double>(nbIgnored * 100) / static_cast<double>(nbSites))) + "%) have been ignored because they are presumably saturated.");
1485  }
1486  }
1487 
1488  counts[k].resize(countsf.size());
1489  for (size_t j = 0; j < countsf.size(); ++j)
1490  {
1491  counts[k][j] = countsf[j];
1492  }
1493  }
1494 
1495  return counts;
1496 }
1497 
1498 /**************************************************************************************************/
1499 
1501  DRTreeLikelihood& drtl,
1502  const vector<int>& ids,
1503  const SubstitutionModelSet& modelSet,
1504  const SubstitutionRegister& reg,
1505  double threshold,
1506  bool verbose)
1507 {
1508  SubstitutionRegister* reg2 = reg.clone();
1509 
1510  unique_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(modelSet.getSubstitutionModel(0), reg2));
1511 
1512  unique_ptr<ProbabilisticSubstitutionMapping> mapping(SubstitutionMappingTools::computeSubstitutionVectors(drtl, modelSet, ids, *count, false));
1513 
1514  vector< vector<double> > counts(ids.size());
1515  size_t nbSites = mapping->getNumberOfSites();
1516  size_t nbTypes = mapping->getNumberOfSubstitutionTypes();
1517 
1518  for (size_t k = 0; k < ids.size(); ++k)
1519  {
1520  vector<double> countsf(nbTypes, 0);
1521  vector<double> tmp(nbTypes, 0);
1522  size_t nbIgnored = 0;
1523  bool error = false;
1524  for (size_t i = 0; !error && i < nbSites; ++i)
1525  {
1526  double s = 0;
1527  for (size_t t = 0; t < nbTypes; ++t)
1528  {
1529  tmp[t] = (*mapping)(mapping->getNodeIndex(ids[k]), i, t);
1530  error = std::isnan(tmp[t]);
1531  if (error)
1532  goto ERROR;
1533  s += tmp[t];
1534  }
1535  if (threshold >= 0)
1536  {
1537  if (s <= threshold)
1538  countsf += tmp;
1539  else
1540  {
1541  nbIgnored++;
1542  }
1543  }
1544  else
1545  {
1546  countsf += tmp;
1547  }
1548  }
1549 
1550 ERROR:
1551  if (error)
1552  {
1553  // We do nothing. This happens for small branches.
1554  if (verbose)
1555  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", counts could not be computed.");
1556  for (size_t t = 0; t < nbTypes; ++t)
1557  {
1558  countsf[t] = 0;
1559  }
1560  }
1561  else
1562  {
1563  if (nbIgnored > 0)
1564  {
1565  if (verbose)
1566  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", " + TextTools::toString(nbIgnored) + " sites (" + TextTools::toString(ceil(static_cast<double>(nbIgnored * 100) / static_cast<double>(nbSites))) + "%) have been ignored because they are presumably saturated.");
1567  }
1568  }
1569 
1570  counts[k].resize(countsf.size());
1571  for (size_t j = 0; j < countsf.size(); ++j)
1572  {
1573  counts[k][j] = countsf[j];
1574  }
1575  }
1576 
1577  return counts;
1578 }
1579 
1580 /**************************************************************************************************/
1581 
1583  DRTreeLikelihood& drtl,
1584  const vector<int>& ids,
1585  const SubstitutionModel* nullModel,
1586  const SubstitutionRegister& reg,
1587  bool verbose)
1588 {
1589  size_t nbTypes = reg.getNumberOfSubstitutionTypes();
1590  size_t nbStates = nullModel->getAlphabet()->getSize();
1591  size_t nbSites = drtl.getNumberOfSites();
1592  vector<int> supportedStates = nullModel->getAlphabetStates();
1593 
1594  // compute the AlphabetIndex for each substitutionType
1595  vector<UserAlphabetIndex1 > usai(nbTypes, UserAlphabetIndex1(nullModel->getAlphabet()));
1596 
1597  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1598  for (size_t i = 0; i < nbStates; i++)
1599  usai[nbt].setIndex(supportedStates[i], 0);
1600 
1601  for (size_t i = 0; i < nbStates; i++)
1602  {
1603  for (size_t j = 0; j < nbStates; j++)
1604  {
1605  if (i != j)
1606  {
1607  size_t nbt = reg.getType(i, j);
1608  if (nbt != 0)
1609  usai[nbt - 1].setIndex(supportedStates[i], usai[nbt - 1].getIndex(supportedStates[i]) + nullModel->Qij(i, j));
1610  }
1611  }
1612  }
1613 
1614  // compute the normalization for each substitutionType
1615  vector< vector<double> > rewards(ids.size());
1616 
1617  for (size_t k = 0; k < ids.size(); ++k)
1618  {
1619  rewards[k].resize(nbTypes);
1620  }
1621 
1622  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1623  {
1624  unique_ptr<Reward> reward(new DecompositionReward(nullModel, &usai[nbt]));
1625 
1626  unique_ptr<ProbabilisticRewardMapping> mapping(RewardMappingTools::computeRewardVectors(drtl, ids, *reward, false));
1627 
1628  for (size_t k = 0; k < ids.size(); ++k)
1629  {
1630  double s = 0;
1631  for (size_t i = 0; i < nbSites; ++i)
1632  {
1633  double tmp = (*mapping)(k, i);
1634  if (std::isnan(tmp))
1635  {
1636  if (verbose)
1637  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", reward for type " + reg.getTypeName(nbt + 1) + " could not be computed.");
1638  s = 0;
1639  break;
1640  }
1641  s += tmp;
1642  }
1643  rewards[k][nbt] = s;
1644  }
1645  reward.reset();
1646  mapping.reset();
1647  }
1648  return rewards;
1649 }
1650 
1651 /**************************************************************************************************/
1652 
1654  DRTreeLikelihood& drtl,
1655  const vector<int>& ids,
1656  const SubstitutionModelSet* nullModelSet,
1657  const SubstitutionRegister& reg,
1658  bool verbose)
1659 {
1660  size_t nbTypes = reg.getNumberOfSubstitutionTypes();
1661  size_t nbStates = nullModelSet->getAlphabet()->getSize();
1662  size_t nbSites = drtl.getNumberOfSites();
1663  size_t nbModels = nullModelSet->getNumberOfModels();
1664 
1665  // compute the AlphabetIndex for each substitutionType
1666  // compute the normalization for each substitutionType
1667  vector< vector<double> > rewards(ids.size());
1668 
1669  for (size_t k = 0; k < ids.size(); ++k)
1670  {
1671  rewards[k].resize(nbTypes);
1672  }
1673 
1674  vector<UserAlphabetIndex1 > usai(nbTypes, UserAlphabetIndex1(nullModelSet->getAlphabet()));
1675 
1676  for (size_t nbm = 0; nbm < nbModels; nbm++)
1677  {
1678  vector<int> mids = VectorTools::vectorIntersection(ids, nullModelSet->getNodesWithModel(nbm));
1679 
1680  if (mids.size()>0)
1681  {
1682  const SubstitutionModel* modn = nullModelSet->getSubstitutionModel(nbm);
1683  vector<int> supportedStates = modn->getAlphabetStates();
1684 
1685  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1686  for (size_t i = 0; i < nbStates; i++)
1687  usai[nbt].setIndex(supportedStates[i], 0);
1688 
1689  for (size_t i = 0; i < nbStates; i++)
1690  {
1691  for (size_t j = 0; j < nbStates; j++)
1692  {
1693  if (i != j)
1694  {
1695  size_t nbt = reg.getType(i, j);
1696  if (nbt != 0)
1697  usai[nbt - 1].setIndex(supportedStates[i], usai[nbt - 1].getIndex(supportedStates[i]) + modn->Qij(i, j));
1698  }
1699  }
1700  }
1701 
1702  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1703  {
1704  unique_ptr<Reward> reward(new DecompositionReward(nullModelSet->getSubstitutionModel(nbm), &usai[nbt]));
1705 
1706  unique_ptr<ProbabilisticRewardMapping> mapping(RewardMappingTools::computeRewardVectors(drtl, mids, *reward, false));
1707 
1708  for (size_t k = 0; k < mids.size(); k++)
1709  {
1710  double s = 0;
1711  for (size_t i = 0; i < nbSites; ++i)
1712  {
1713  double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
1714  if (std::isnan(tmp))
1715  {
1716  if (verbose)
1717  ApplicationTools::displayWarning("On branch " + TextTools::toString(mids[k]) + ", reward for type " + reg.getTypeName(nbt + 1) + " could not be computed.");
1718  s = 0;
1719  break;
1720  }
1721  else
1722  s += tmp;
1723  }
1724 
1725  rewards[VectorTools::which(ids, mids[k])][nbt] = s;
1726  }
1727  reward.reset();
1728  mapping.reset();
1729  }
1730  }
1731  }
1732 
1733 
1734  return rewards;
1735 }
1736 
1737 /**************************************************************************************************/
1738 
1740  DRTreeLikelihood& drtl,
1741  const vector<int>& ids,
1742  SubstitutionModel* model,
1743  SubstitutionModel* nullModel,
1744  const SubstitutionRegister& reg,
1745  VVdouble& result,
1746  bool perTime,
1747  bool perWord,
1748  bool verbose)
1749 {
1750  vector< vector<double> > factors;
1751 
1752  result = getCountsPerBranch(drtl, ids, model, reg, -1, verbose);
1753  factors = getNormalizationsPerBranch(drtl, ids, nullModel, reg, verbose);
1754 
1755 
1756  // check if perWord
1757 
1758  const CoreWordAlphabet* wAlp=dynamic_cast<const CoreWordAlphabet*>(nullModel->getAlphabet());
1759 
1760  float sizeWord=float((wAlp!=NULL) & !perWord ?wAlp->getLength():1);
1761 
1762 
1763  size_t nbTypes = result[0].size();
1764 
1765  for (size_t k = 0; k < ids.size(); ++k)
1766  {
1767  for (size_t t = 0; t < nbTypes; ++t)
1768  {
1769  if (factors[k][t] != 0)
1770  result[k][t] /= (factors[k][t]*sizeWord);
1771  }
1772  }
1773 
1774  // Multiply by the lengths of the branches of the input tree
1775 
1776  if (!perTime)
1777  {
1778  const TreeTemplate<Node> tree(drtl.getTree());
1779 
1780  for (size_t k = 0; k < ids.size(); ++k)
1781  {
1782  double l = tree.getNode(ids[k])->getDistanceToFather();
1783  for (size_t t = 0; t < nbTypes; ++t)
1784  {
1785  result[k][t] *= l;
1786  }
1787  }
1788  }
1789 
1790 }
1791 
1792 /**************************************************************************************************/
1793 
1795  DRTreeLikelihood& drtl,
1796  const vector<int>& ids,
1797  SubstitutionModelSet* modelSet,
1798  SubstitutionModelSet* nullModelSet,
1799  const SubstitutionRegister& reg,
1800  VVdouble& result,
1801  bool perTime,
1802  bool perWord,
1803  bool verbose)
1804 {
1805  vector< vector<double> > factors;
1806 
1807  result = getCountsPerBranch(drtl, ids, *modelSet, reg, -1, verbose);
1808  factors = getNormalizationsPerBranch(drtl, ids, nullModelSet, reg, verbose);
1809 
1810  size_t nbTypes = result[0].size();
1811 
1812  // check if perWord
1813 
1814  const CoreWordAlphabet* wAlp=dynamic_cast<const CoreWordAlphabet*>(nullModelSet->getAlphabet());
1815 
1816  float sizeWord=float((wAlp!=NULL) & !perWord?wAlp->getLength():1);
1817 
1818 
1819  for (size_t k = 0; k < ids.size(); ++k)
1820  {
1821  for (size_t t = 0; t < nbTypes; ++t)
1822  {
1823  if (factors[k][t] != 0)
1824  result[k][t] /= (factors[k][t]*sizeWord);
1825  }
1826  }
1827 
1828  // Multiply by the lengths of the branches of the input tree
1829 
1830  if (!perTime)
1831  {
1832  const TreeTemplate<Node> tree(drtl.getTree());
1833 
1834  for (size_t k = 0; k < ids.size(); ++k)
1835  {
1836  double l = tree.getNode(ids[k])->getDistanceToFather();
1837  for (size_t t = 0; t < nbTypes; ++t)
1838  {
1839  result[k][t] *= l;
1840  }
1841  }
1842  }
1843 }
1844 
1845 /**************************************************************************************************/
1846 
1848  DRTreeLikelihood& drtl,
1849  const vector<int>& ids,
1850  SubstitutionModel* model,
1851  const SubstitutionRegister& reg,
1852  VVdouble& result,
1853  double threshold,
1854  bool verbose)
1855 {
1856  result = getCountsPerBranch(drtl, ids, model, reg, threshold, verbose);
1857 
1858  const CategorySubstitutionRegister* creg = dynamic_cast<const CategorySubstitutionRegister*>(&reg);
1859 
1860  if ((creg!=NULL) && !creg->isStationary())
1861  {
1862  size_t nbTypes = result[0].size();
1863 
1864  for (size_t k = 0; k < ids.size(); ++k)
1865  {
1866  vector<double> freqs = DRTreeLikelihoodTools::getPosteriorStateFrequencies(drtl, ids[k]);
1867  // Compute frequencies for types:
1868  vector<double> freqsTypes(creg->getNumberOfCategories());
1869  for (size_t i = 0; i < freqs.size(); ++i)
1870  {
1871  size_t c = creg->getCategory(i);
1872  freqsTypes[c - 1] += freqs[i];
1873  }
1874 
1875  // We devide the counts by the frequencies and rescale:
1876  double s = VectorTools::sum(result[k]);
1877  for (size_t t = 0; t < nbTypes; ++t)
1878  {
1879  result[k][t] /= freqsTypes[creg->getCategoryFrom(t + 1) - 1];
1880  }
1881 
1882  double s2 = VectorTools::sum(result[k]);
1883  // Scale:
1884  result[k] = (result[k] / s2) * s;
1885  }
1886  }
1887 }
1888 
1889 /**************************************************************************************************/
1890 
1892  DRTreeLikelihood& drtl,
1893  const vector<int>& ids,
1894  SubstitutionModel* model,
1895  const SubstitutionRegister& reg,
1896  VVdouble& result)
1897 {
1898  unique_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(model, reg.clone()));
1899  unique_ptr<ProbabilisticSubstitutionMapping> smap(SubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, *count, false));
1900 
1901  size_t nbSites = smap->getNumberOfSites();
1902  size_t nbBr = ids.size();
1903 
1904  VectorTools::resize2(result, nbSites, nbBr);
1905 
1906  vector<size_t> sdi(nbBr); // reverse of ids
1907  for (size_t i = 0; i < nbBr; ++i)
1908  {
1909  sdi[i] = smap->getNodeIndex(ids[i]);
1910  }
1911 
1912  for (size_t k = 0; k < nbSites; ++k)
1913  {
1915  Vdouble* resS=&result[k];
1916 
1917  for (size_t i = 0; i < nbBr; ++i)
1918  (*resS)[i]= countsf[sdi[i]];
1919  }
1920 }
1921 
1922 
1923 /**************************************************************************************************/
1924 
1926  DRTreeLikelihood& drtl,
1927  const vector<int>& ids,
1928  SubstitutionModel* model,
1929  const SubstitutionRegister& reg,
1930  VVdouble& result)
1931 {
1932  unique_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(model, reg.clone()));
1933  unique_ptr<ProbabilisticSubstitutionMapping> smap(SubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, *count, false));
1934 
1935  size_t nbSites = smap->getNumberOfSites();
1936  size_t nbTypes = smap->getNumberOfSubstitutionTypes();
1937 
1938  VectorTools::resize2(result, nbSites, nbTypes);
1939 
1940  vector<unique_ptr<ProbabilisticRewardMapping> > rmap;
1941 
1942  for (size_t k = 0; k < nbSites; ++k)
1943  {
1945 
1946  Vdouble* resS=&result[k];
1947 
1948  for (size_t i = 0; i < nbTypes; ++i)
1949  (*resS)[i]=countsf[i];
1950  }
1951 }
1952 
1953 
1954 /**************************************************************************************************/
1955 
1957  DRTreeLikelihood& drtl,
1958  const vector<int>& ids,
1959  SubstitutionModel* model,
1960  SubstitutionModel* nullModel,
1961  const SubstitutionRegister& reg,
1962  VVdouble& result,
1963  bool perTime,
1964  bool perWord)
1965 {
1966 
1967  unique_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(model, reg.clone()));
1968 
1969  unique_ptr<ProbabilisticSubstitutionMapping> smap(SubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, *count, false));
1970 
1971  size_t nbSites = smap->getNumberOfSites();
1972  size_t nbTypes = smap->getNumberOfSubstitutionTypes();
1973  size_t nbStates = nullModel->getAlphabet()->getSize();
1974  vector<int> supportedStates = nullModel->getAlphabetStates();
1975 
1976  VectorTools::resize2(result, nbSites, nbTypes);
1977 
1978  // compute the AlphabetIndex for each substitutionType
1979  vector<UserAlphabetIndex1 > usai(nbTypes, UserAlphabetIndex1(nullModel->getAlphabet()));
1980 
1981  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1982  for (size_t i = 0; i < nbStates; i++)
1983  usai[nbt].setIndex(supportedStates[i], 0);
1984 
1985  for (size_t i = 0; i < nbStates; i++)
1986  {
1987  for (size_t j = 0; j < nbStates; j++)
1988  {
1989  if (i != j)
1990  {
1991  size_t nbt = reg.getType(i, j);
1992  if (nbt != 0)
1993  usai[nbt - 1].setIndex(supportedStates[i], usai[nbt - 1].getIndex(supportedStates[i]) + nullModel->Qij(i, j));
1994  }
1995  }
1996  }
1997 
1998  // compute the normalization for each site for each substitutionType
1999  vector< vector<double> > rewards(nbSites);
2000 
2001  for (size_t k = 0; k < nbSites; ++k)
2002  {
2003  rewards[k].resize(nbTypes);
2004  }
2005 
2006  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2007  {
2008  unique_ptr<Reward> reward(new DecompositionReward(nullModel, &usai[nbt]));
2009 
2010  unique_ptr<ProbabilisticRewardMapping> mapping(RewardMappingTools::computeRewardVectors(drtl, ids, *reward, false));
2011 
2012  for (size_t i = 0; i < nbSites; ++i)
2013  {
2014  double s = 0;
2015  for (size_t k = 0; k < ids.size(); ++k)
2016  {
2017  double tmp = (*mapping)(mapping->getNodeIndex(ids[k]), i);
2018  if (std::isnan(tmp))
2019  {
2020  s = 0;
2021  break;
2022  }
2023  s += tmp;
2024  }
2025  rewards[i][nbt] = s;
2026  }
2027 
2028  reward.reset();
2029  mapping.reset();
2030  }
2031 
2032  // Compute the sum of lengths of concerned branchs
2033 
2034  double brlen=0;
2035 
2036  if (!perTime)
2037  {
2038  const TreeTemplate<Node> tree(drtl.getTree());
2039  for (size_t k = 0; k < ids.size(); ++k)
2040  brlen += tree.getNode(ids[k])->getDistanceToFather();
2041  }
2042  else
2043  brlen=1;
2044 
2045  // check if perWord
2046 
2047  const CoreWordAlphabet* wAlp=dynamic_cast<const CoreWordAlphabet*>(nullModel->getAlphabet());
2048 
2049  float sizeWord=float((wAlp!=NULL) & !perWord?wAlp->getLength():1);
2050 
2051  // output
2052 
2053  for (size_t k = 0; k < nbSites; ++k)
2054  {
2056 
2057  Vdouble& resS=result[k];
2058  for (size_t i = 0; i < nbTypes; ++i)
2059  {
2060  resS[i] = countsf[i]/rewards[k][i]*brlen/sizeWord;
2061  }
2062  }
2063 }
2064 
2065 
2066 /**************************************************************************************************/
2067 
2069  DRTreeLikelihood& drtl,
2070  const vector<int>& ids,
2071  SubstitutionModelSet* modelSet,
2072  SubstitutionModelSet* nullModelSet,
2073  const SubstitutionRegister& reg,
2074  VVdouble& result,
2075  bool perTime,
2076  bool perWord)
2077 {
2078  unique_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(modelSet->getSubstitutionModel(0), reg.clone()));
2079 
2080  unique_ptr<ProbabilisticSubstitutionMapping> smap(SubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, *count, false));
2081 
2082  size_t nbSites = smap->getNumberOfSites();
2083  size_t nbTypes = smap->getNumberOfSubstitutionTypes();
2084  size_t nbStates = nullModelSet->getAlphabet()->getSize();
2085  size_t nbModels = nullModelSet->getNumberOfModels();
2086 
2087  VectorTools::resize2(result, nbSites, nbTypes);
2088 
2089  // compute the normalization for each site for each substitutionType
2090  vector< vector<double> > rewards(nbSites);
2091 
2092  for (size_t k = 0; k < nbSites; ++k)
2093  {
2094  rewards[k].resize(nbTypes);
2095  }
2096 
2097  vector<UserAlphabetIndex1 > usai(nbTypes, UserAlphabetIndex1(nullModelSet->getAlphabet()));
2098 
2099  for (size_t nbm = 0; nbm < nbModels; nbm++)
2100  {
2101  vector<int> mids = VectorTools::vectorIntersection(ids, nullModelSet->getNodesWithModel(nbm));
2102 
2103  if (mids.size()>0)
2104  {
2105  const SubstitutionModel* modn = nullModelSet->getSubstitutionModel(nbm);
2106  vector<int> supportedStates = modn->getAlphabetStates();
2107 
2108  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2109  for (size_t i = 0; i < nbStates; i++)
2110  usai[nbt].setIndex(supportedStates[i], 0);
2111 
2112  for (size_t i = 0; i < nbStates; i++)
2113  {
2114  for (size_t j = 0; j < nbStates; j++)
2115  {
2116  if (i != j)
2117  {
2118  size_t nbt = reg.getType(i, j);
2119  if (nbt != 0)
2120  usai[nbt - 1].setIndex(supportedStates[i], usai[nbt - 1].getIndex(supportedStates[i]) + modn->Qij(i, j));
2121  }
2122  }
2123  }
2124 
2125  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2126  {
2127  unique_ptr<Reward> reward(new DecompositionReward(nullModelSet->getSubstitutionModel(nbm), &usai[nbt]));
2128 
2129  unique_ptr<ProbabilisticRewardMapping> mapping(RewardMappingTools::computeRewardVectors(drtl, mids, *reward, false));
2130 
2131  for (size_t i = 0; i < nbSites; ++i)
2132  {
2133  double s = 0;
2134  for (size_t k = 0; k < mids.size(); k++)
2135  {
2136  double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
2137 
2138  if (std::isnan(tmp))
2139  {
2140  s = 0;
2141  break;
2142  }
2143  else
2144  s += tmp;
2145  }
2146  rewards[i][nbt] += s;
2147  }
2148  reward.reset();
2149  mapping.reset();
2150  }
2151  }
2152  }
2153 
2154  // Compute the sum of lengths of concerned branchs
2155 
2156 
2157  double brlen=0;
2158 
2159  if (!perTime){
2160  const TreeTemplate<Node> tree(drtl.getTree());
2161  for (size_t k = 0; k < ids.size(); ++k)
2162  brlen += tree.getNode(ids[k])->getDistanceToFather();
2163  }
2164  else
2165  brlen = 1.;
2166 
2167 
2168  // check if perWord
2169 
2170 
2171  const CoreWordAlphabet* wAlp=dynamic_cast<const CoreWordAlphabet*>(nullModelSet->getAlphabet());
2172 
2173  float sizeWord=float((wAlp!=NULL) & !perWord?wAlp->getLength():1);
2174 
2175  // output
2176 
2177  for (size_t k = 0; k < nbSites; ++k)
2178  {
2180 
2181  Vdouble& resS=result[k];
2182 
2183  for (size_t i = 0; i < nbTypes; ++i)
2184  {
2185  resS[i] = countsf[i]/rewards[k][i]*brlen/sizeWord;
2186  }
2187  }
2188 }
2189 
2190 
2191 /**************************************************************************************************/
2192 
2194  DRTreeLikelihood& drtl,
2195  const vector<int>& ids,
2196  SubstitutionModel* model,
2197  const SubstitutionRegister& reg,
2198  VVVdouble& result)
2199 {
2200  unique_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(model, reg.clone()));
2201  unique_ptr<ProbabilisticSubstitutionMapping> smap(SubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, *count, false));
2202 
2203  size_t nbSites = smap->getNumberOfSites();
2204  size_t nbBr = ids.size();
2205  size_t nbTypes= reg.getNumberOfSubstitutionTypes();
2206 
2207  VectorTools::resize3(result, nbSites, nbBr, nbTypes);
2208 
2209  for (size_t i = 0; i < nbTypes; ++i)
2210  {
2211  for (size_t j = 0; j < nbSites; ++j)
2212  {
2213  VVdouble& resS=result[j];
2214 
2215  for (size_t k = 0; k < nbBr; ++k)
2216  {
2217  resS[k][i] = (*smap)(smap->getNodeIndex(ids[k]), j, i);
2218  }
2219  }
2220  }
2221 }
2222 
2223 /**************************************************************************************************/
2224 
2226  DRTreeLikelihood& drtl,
2227  const vector<int>& ids,
2228  SubstitutionModel* model,
2229  SubstitutionModel* nullModel,
2230  const SubstitutionRegister& reg,
2231  VVVdouble& result,
2232  bool perTime,
2233  bool perWord)
2234 {
2235  unique_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(model, reg.clone()));
2236 
2237  unique_ptr<ProbabilisticSubstitutionMapping> smap(SubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, *count, false));
2238 
2239  size_t nbSites = smap->getNumberOfSites();
2240  size_t nbTypes = smap->getNumberOfSubstitutionTypes();
2241  size_t nbStates = nullModel->getAlphabet()->getSize();
2242  size_t nbBr = ids.size();
2243 
2244  VectorTools::resize3(result, nbSites, nbBr, nbTypes);
2245 
2246  // compute the normalization for each site for each substitutionType
2247  map<int, vector< vector<double> > > rewards;
2248 
2249  for (auto id : ids)
2250  VectorTools::resize2(rewards[id], nbSites, nbTypes);
2251 
2252  vector<int> supportedStates = nullModel->getAlphabetStates();
2253 
2254  // compute the AlphabetIndex for each substitutionType
2255  vector<UserAlphabetIndex1 > usai(nbTypes, UserAlphabetIndex1(nullModel->getAlphabet()));
2256 
2257  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2258  for (size_t i = 0; i < nbStates; i++)
2259  usai[nbt].setIndex(supportedStates[i], 0);
2260 
2261  for (size_t i = 0; i < nbStates; i++)
2262  {
2263  for (size_t j = 0; j < nbStates; j++)
2264  {
2265  if (i != j)
2266  {
2267  size_t nbt = reg.getType(i, j);
2268  if (nbt != 0)
2269  usai[nbt - 1].setIndex(supportedStates[i], usai[nbt - 1].getIndex(supportedStates[i]) + nullModel->Qij(i, j));
2270  }
2271  }
2272  }
2273 
2274  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2275  {
2276  unique_ptr<Reward> reward(new DecompositionReward(nullModel, &usai[nbt]));
2277 
2278  unique_ptr<ProbabilisticRewardMapping> mapping(RewardMappingTools::computeRewardVectors(drtl, ids, *reward, false));
2279 
2280  for (size_t i = 0; i < nbSites; ++i)
2281  {
2282  for (size_t k = 0; k < ids.size(); ++k)
2283  {
2284  double tmp = (*mapping)(mapping->getNodeIndex(ids[k]), i);
2285 
2286  if (std::isnan(tmp))
2287  tmp = 0;
2288 
2289  rewards[ids[k]][i][nbt] = tmp;
2290  }
2291  }
2292 
2293  reward.reset();
2294  mapping.reset();
2295  }
2296 
2297  // check if perWord
2298 
2299  const CoreWordAlphabet* wAlp=dynamic_cast<const CoreWordAlphabet*>(nullModel->getAlphabet());
2300 
2301  float sizeWord=float((wAlp!=NULL) & !perWord?wAlp->getLength():1);
2302 
2303 
2304  // output
2305  const TreeTemplate<Node>& tree=drtl.getTree();
2306 
2307  for (size_t i = 0; i < nbTypes; ++i)
2308  {
2309  for (size_t j = 0; j < nbSites; ++j)
2310  {
2311  VVdouble& resS=result[j];
2312 
2313  for (size_t k = 0; k < nbBr; ++k)
2314  {
2315  resS[k][i] = (*smap)(smap->getNodeIndex(ids[k]), j, i)/rewards[ids[k]][j][i]*(perTime?1:tree.getNode(ids[k])->getDistanceToFather())/sizeWord;
2316  }
2317  }
2318  }
2319 }
2320 
2321 /**************************************************************************************************/
2322 
2324  DRTreeLikelihood& drtl,
2325  const vector<int>& ids,
2326  SubstitutionModelSet* modelSet,
2327  SubstitutionModelSet* nullModelSet,
2328  const SubstitutionRegister& reg,
2329  VVVdouble& result,
2330  bool perTime,
2331  bool perWord)
2332 {
2333  unique_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(modelSet->getSubstitutionModel(0), reg.clone()));
2334 
2335  unique_ptr<ProbabilisticSubstitutionMapping> smap(SubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, *count, false));
2336 
2337  size_t nbSites = smap->getNumberOfSites();
2338  size_t nbTypes = smap->getNumberOfSubstitutionTypes();
2339  size_t nbStates = nullModelSet->getAlphabet()->getSize();
2340  size_t nbModels = nullModelSet->getNumberOfModels();
2341  size_t nbBr = ids.size();
2342 
2343  VectorTools::resize3(result, nbSites, nbBr, nbTypes);
2344 
2345  // compute the normalization for each site for each substitutionType
2346  map<int, vector< vector<double> > > rewards;
2347 
2348  for (auto id : ids)
2349  VectorTools::resize2(rewards[id], nbSites, nbTypes);
2350 
2351  vector<UserAlphabetIndex1 > usai(nbTypes, UserAlphabetIndex1(nullModelSet->getAlphabet()));
2352 
2353  for (size_t nbm = 0; nbm < nbModels; nbm++)
2354  {
2355  vector<int> mids = VectorTools::vectorIntersection(ids, nullModelSet->getNodesWithModel(nbm));
2356 
2357  if (mids.size()>0)
2358  {
2359  const SubstitutionModel* modn = nullModelSet->getSubstitutionModel(nbm);
2360  vector<int> supportedStates = modn->getAlphabetStates();
2361 
2362  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2363  for (size_t i = 0; i < nbStates; i++)
2364  usai[nbt].setIndex(supportedStates[i], 0);
2365 
2366  for (size_t i = 0; i < nbStates; i++)
2367  {
2368  for (size_t j = 0; j < nbStates; j++)
2369  {
2370  if (i != j)
2371  {
2372  size_t nbt = reg.getType(i, j);
2373  if (nbt != 0)
2374  usai[nbt - 1].setIndex(supportedStates[i], usai[nbt - 1].getIndex(supportedStates[i]) + modn->Qij(i, j));
2375  }
2376  }
2377  }
2378 
2379  for (size_t nbt = 0; nbt < nbTypes; nbt++)
2380  {
2381  unique_ptr<Reward> reward(new DecompositionReward(nullModelSet->getSubstitutionModel(nbm), &usai[nbt]));
2382 
2383  unique_ptr<ProbabilisticRewardMapping> mapping(RewardMappingTools::computeRewardVectors(drtl, mids, *reward, false));
2384 
2385  for (size_t i = 0; i < nbSites; ++i)
2386  {
2387  for (size_t k = 0; k < mids.size(); k++)
2388  {
2389  double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
2390 
2391  if (std::isnan(tmp))
2392  tmp = 0;
2393 
2394  rewards[mids[k]][i][nbt] = tmp;
2395  }
2396  }
2397 
2398  reward.reset();
2399  mapping.reset();
2400  }
2401  }
2402  }
2403 
2404  // check if perWord
2405 
2406  const CoreWordAlphabet* wAlp=dynamic_cast<const CoreWordAlphabet*>(nullModelSet->getAlphabet());
2407 
2408  float sizeWord=float((wAlp!=NULL) & !perWord?wAlp->getLength():1);
2409 
2410 
2411  // output
2412  const TreeTemplate<Node>& tree=drtl.getTree();
2413 
2414  for (size_t i = 0; i < nbTypes; ++i)
2415  {
2416  for (size_t j = 0; j < nbSites; ++j)
2417  {
2418  VVdouble& resS=result[j];
2419 
2420  for (size_t k = 0; k < nbBr; ++k)
2421  {
2422  resS[k][i] = (*smap)(smap->getNodeIndex(ids[k]), j, i)/rewards[ids[k]][j][i]*(perTime?1:tree.getNode(ids[k])->getDistanceToFather())/sizeWord;
2423  }
2424  }
2425  }
2426 }
2427 
2428 
2429 /**************************************************************************************************/
2430 
2432  const string& filename,
2433  const vector<int>& ids,
2434  const VVdouble& counts)
2435 {
2436  size_t nbSites=counts.size();
2437  if (nbSites==0)
2438  return;
2439  size_t nbBr=counts[0].size();
2440 
2441  ofstream file;
2442  file.open(filename.c_str());
2443 
2444  file << "sites";
2445  for (size_t i = 0; i < nbBr; ++i)
2446  {
2447  file << "\t" << ids[i];
2448  }
2449  file << endl;
2450 
2451  for (size_t k = 0; k < nbSites; ++k)
2452  {
2453  const Vdouble& countS=counts[k];
2454  file << k;
2455  for (size_t i = 0; i < nbBr; ++i)
2456  {
2457  file << "\t" << countS[i];
2458  }
2459  file << endl;
2460  }
2461  file.close();
2462 }
2463 
2464 
2465 /**************************************************************************************************/
2466 
2468  const string& filename,
2469  const SubstitutionRegister& reg,
2470  const VVdouble& counts)
2471 {
2472 
2473  size_t nbSites=counts.size();
2474  if (nbSites==0)
2475  return;
2476  size_t nbTypes=counts[0].size();
2477 
2478  ofstream file;
2479  file.open(filename.c_str());
2480 
2481  file << "sites";
2482  for (size_t i = 0; i < nbTypes; ++i)
2483  {
2484  file << "\t" << reg.getTypeName(i + 1);
2485  }
2486  file << endl;
2487 
2488  for (size_t k = 0; k < nbSites; ++k)
2489  {
2490  file << k;
2491  const Vdouble& resS=counts[k];
2492  for (size_t i = 0; i < nbTypes; ++i)
2493  {
2494  file << "\t" << resS[i];
2495  }
2496  file << endl;
2497  }
2498  file.close();
2499 }
2500 
2501 
2502 /**************************************************************************************************/
2503 
2505  const string& filenamePrefix,
2506  const vector<int>& ids,
2507  const SubstitutionRegister& reg,
2508  const VVVdouble& counts)
2509 {
2510  size_t nbSites=counts.size();
2511  if (nbSites==0)
2512  return;
2513  size_t nbBr = counts[0].size();
2514  if (nbBr==0)
2515  return;
2516  size_t nbTypes = counts[0][0].size();
2517 
2518  ofstream file;
2519 
2520  for (size_t i = 0; i < nbTypes; ++i)
2521  {
2522  string name=reg.getTypeName(i+1);
2523  if (name=="")
2524  name=TextTools::toString(i + 1);
2525 
2526  string path = filenamePrefix + name + string(".count");
2527 
2528  ApplicationTools::displayResult(string("Output counts of type ") + TextTools::toString(i + 1) + string(" to file"), path);
2529  file.open(path.c_str());
2530 
2531  file << "sites";
2532  for (size_t k = 0; k < nbBr; ++k)
2533  {
2534  file << "\t" << ids[k];
2535  }
2536  file << endl;
2537 
2538  for (size_t j = 0; j < nbSites; ++j)
2539  {
2540  const VVdouble& resS=counts[j];
2541 
2542  file << j;
2543  for (size_t k = 0; k < nbBr; ++k)
2544  {
2545  file << "\t" << resS[k][i];
2546  }
2547  file << endl;
2548  }
2549  file.close();
2550  }
2551 }
static ProbabilisticSubstitutionMapping * computeSubstitutionVectorsNoAveraging(const DRTreeLikelihood &drtl, SubstitutionCount &substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
A pair of SubstitutionModel / SiteIterator.
std::string getColumnName(size_t index) const
Likelihood ancestral states reconstruction: marginal method.
size_t getNumberOfRows() const
virtual void setNumberOfSites(size_t numberOfSites)
Substitution models manager for non-homogeneous / non-reversible models of evolution.
static void displayTaskDone()
Interface for all substitution models.
static void displayWarning(const std::string &text)
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
Definition: Node.h:283
virtual const SiteContainer * getData() const =0
Get the dataset for which the likelihood must be evaluated.
static std::vector< double > computeSumForBranch(const SubstitutionMapping &smap, size_t branchIndex)
Sum all substitutions for each type of a given branch (specified by its index).
static void computeCountsPerSitePerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, VVdouble &array)
Compute the sum over all types of the counts per site per branch.
virtual size_t getNumberOfCategories() const =0
virtual SiteIterator * getNewSiteIterator() const =0
static std::vector< double > computeTotalSubstitutionVectorForSitePerType(const SubstitutionMapping &smap, size_t siteIndex)
Sum all type of substitutions for each type of a given position (specified by its index)...
std::map< int, std::vector< size_t > > getAllAncestralStates() const
Get all ancestral states for all nodes.
static double computeNormForSite(const SubstitutionMapping &smap, size_t siteIndex)
Compute the norm of a substitution vector for a given position (specified by its index).
static void computeCountsPerTypePerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, VVdouble &result, double threshold=-1, bool verbose=true)
Compute the sum over all branches of the counts per type per branch.
virtual SubstitutionRegister * clone() const =0
virtual double getProbability(size_t categoryIndex) const =0
Data storage class for probabilistic substitution mappings.
General interface for storing mapping data.
The SubstitutionRegister interface.
static T sum(const std::vector< T > &v1)
virtual unsigned int getSize() const =0
std::size_t count(const std::string &s, const std::string &pattern)
static std::vector< T > vectorIntersection(const std::vector< T > &vec1, const std::vector< T > &vec2)
static void displayResult(const std::string &text, const T &result)
static void displayTask(const std::string &text, bool eof=false)
STL namespace.
Analytical reward using the eigen decomposition method.
The phylogenetic tree class.
static ProbabilisticSubstitutionMapping * computeSubstitutionVectorsMarginal(const DRTreeLikelihood &drtl, SubstitutionCount &substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
virtual const std::vector< double > & getRootFrequencies(size_t siteIndex) const =0
Get the values of the frequencies for each state in the alphabet at the root node.
double toDouble(const std::string &s, char dec= '.', char scientificNotation= 'e')
static bool contains(const std::vector< T > &vec, T el)
static void resize2(VVdouble &vv, size_t n1, size_t n2)
static ProbabilisticRewardMapping * computeRewardVectors(const DRTreeLikelihood &drtl, const std::vector< int > &nodeIds, Reward &reward, bool verbose=true)
Compute the reward vectors for a particular dataset using the double-recursive likelihood computation...
virtual const Node * getNode(size_t nodeIndex) const
Definition: Mapping.h:208
virtual int getId() const
Get the node&#39;s id.
Definition: Node.h:203
static size_t which(const std::vector< T > &v, const T &which)
virtual size_t getNumberOfBranches() const =0
virtual Vdouble getCategories() const =0
The CategorySubstitutionRegisters.
const Alphabet * getAlphabet() const
virtual ConstBranchModelIterator * getNewBranchModelIterator(int nodeId) const =0
const char * what() const noexceptoverride
std::vector< std::string > & getColumn(size_t index)
virtual const SubstitutionModel * getSubstitutionModel() const =0
static std::vector< double > computeTotalSubstitutionVectorForSitePerBranch(const SubstitutionMapping &smap, size_t siteIndex)
Sum all type of substitutions for each branch of a given position (specified by its index)...
static void computeCountsPerSitePerType(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, VVdouble &result)
Compute the sum over all branches of the counts per type per site,.
virtual size_t getNumberOfSites() const =0
Get the number of sites in the dataset.
virtual const std::vector< int > & getAlphabetStates() const =0
std::vector< size_t > & getRootArrayPositions()
virtual const Node * getFather() const
Get the father of this node is there is one.
Definition: Node.h:339
static ProbabilisticSubstitutionMapping * computeSubstitutionVectorsNoAveragingMarginal(const DRTreeLikelihood &drtl, SubstitutionCount &substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
virtual double Qij(size_t i, size_t j) const =0
virtual const Tree & getTree() const =0
Get the tree (topology and branch lengths).
std::vector< double > Vdouble
virtual size_t getCategory(size_t state) const
static std::vector< size_t > whichMax(const Matrix &m)
static std::shared_ptr< OutputStream > message
virtual ConstBranchModelDescription * next()=0
VVVdouble & getLikelihoodArray(int parentId, int neighborId)
virtual const Site & getSite(size_t siteIndex) const =0
virtual const Node * getSon(size_t pos) const
Definition: Node.h:395
static void outputPerSitePerType(const std::string &filename, const SubstitutionRegister &reg, const VVdouble &counts)
Output Per Site Per Type.
Analytical (weighted) substitution count using the uniformization method.
virtual size_t getNumberOfSubstitutionTypes() const
Short cut function, equivalent to getSubstitutionRegister().getNumberOfSubstitutionTypes().
void setSitePosition(size_t index, int position)
Set the position of a given site.
Definition: Mapping.h:198
virtual const Alphabet * getAlphabet() const =0
The SubstitutionsCount interface.
virtual DRASDRTreeLikelihoodData * getLikelihoodData()=0
static std::vector< double > computeSumForSite(const SubstitutionMapping &smap, size_t siteIndex)
Sum all substitutions for each type of a given site (specified by its index).
virtual bool isInitialized() const =0
virtual int getPosition() const
virtual const DiscreteDistribution * getRateDistribution() const =0
Get the rate distribution used for the computation.
The phylogenetic node class.
Definition: Node.h:90
virtual size_t getNumberOfSubstitutionTypes() const =0
size_t getNumberOfBranches() const
Definition: Mapping.h:206
static void outputPerSitePerBranchPerType(const std::string &filenamePrefix, const std::vector< int > &ids, const SubstitutionRegister &reg, const VVVdouble &counts)
Output Per Site Per Branch Per Type.
virtual size_t getType(size_t fromState, size_t toState) const =0
Get the substitution type far a given pair of model states.
virtual size_t getCategoryFrom(size_t type) const
static void outputPerSitePerBranch(const std::string &filename, const std::vector< int > &ids, const VVdouble &counts)
Output Per Site Per Branch.
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:379
virtual VVVdouble getTransitionProbabilitiesPerRateClass(int nodeId, size_t siteIndex) const =0
Retrieves all Pij(t) for a particular branch, defined by the upper node.
virtual size_t getNumberOfSons() const
Definition: Node.h:388
const SubstitutionModel * getSubstitutionModel(size_t i) const
Return a markovian substitution model (or null)
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
virtual std::string getTypeName(size_t type) const =0
Get the name of a given substitution type.
std::vector< VVVdouble > VVVVdouble
const SubstitutionModel * getSubstitutionModelForNode(int nodeId) const
void deleteColumn(size_t index)
static std::vector< std::vector< double > > getNormalizationsPerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, const SubstitutionModel *nullModel, const SubstitutionRegister &reg, bool verbose=true)
Returns the normalization factors due to the null model on each branch, for each register.
virtual void computeLikelihoodAtNode(int nodeId, VVVdouble &likelihoodArray) const =0
Compute the likelihood array at a given node.
static DataTable * read(std::istream &in, const std::string &sep="\t", bool header=true, int rowNames=-1)
Interface for double-recursive (DR) implementation of the likelihood computation. ...
virtual void setSubstitutionModel(const SubstitutionModel *model)=0
Set the substitution model associated with this count, if relevent.
virtual size_t getNumberOfSites() const =0
static void readFromStream(std::istream &in, ProbabilisticSubstitutionMapping &substitutions, size_t type)
Read the substitutions vectors from a stream.
std::string toString(T t)
virtual unsigned int getLength() const =0
virtual Vdouble getProbabilities() const =0
static Vdouble getPosteriorStateFrequencies(const DRTreeLikelihood &drl, int nodeId)
Compute the posterior probabilities for each state for a given node.
virtual Matrix< double > * getAllNumbersOfSubstitutions(double length, size_t type) const =0
Get the numbers of susbstitutions on a branch, for each initial and final states, and given the branc...
static VVVdouble getPosteriorProbabilitiesForEachStateForEachRate(const DRTreeLikelihood &drl, int nodeId)
Compute the posterior probabilities for each state and each rate of each distinct site...
std::vector< VVdouble > VVVdouble
static std::vector< std::vector< double > > getCountsPerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, double threshold=-1, bool verbose=true)
std::vector< Vdouble > VVdouble
size_t getNumberOfSites() const
Definition: Mapping.h:204
static void computeCountsPerSitePerBranchPerType(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, VVVdouble &result)
Compute counts per site per branch per type.
static void fill(Matrix &M, Scalar x)
virtual size_t getNumberOfSubstitutionTypes() const =0
static void resize3(VVVdouble &vvv, size_t n1, size_t n2, size_t n3)
virtual N * getNode(int id, bool checkId=false)
Definition: TreeTemplate.h:421
size_t getNumberOfColumns() const
int toInt(const std::string &s, char scientificNotation= 'e')
const std::vector< int > & getNodesWithModel(size_t i) const
Get a list of nodes id for which the given model is associated.
static void writeToStream(const ProbabilisticSubstitutionMapping &substitutions, const SiteContainer &sites, size_t type, std::ostream &out)
Write the substitutions vectors to a stream.
virtual size_t getNodeIndex(int nodeId) const
Definition: Mapping.h:226
static ProbabilisticSubstitutionMapping * computeSubstitutionVectors(const DRTreeLikelihood &drtl, SubstitutionCount &substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...