bpp-phyl  2.4.0
SubstitutionMappingTools.h
Go to the documentation of this file.
1 //
2 // File: SubstitutionMappingTools.h
3 // Created by: Julien Dutheil
4 // Created on: Wed Apr 5 13:04 2006
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004, 2005, 2006)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38 */
39 
40 #ifndef _SUBSTITUTIONMAPPINGTOOLS_H_
41 #define _SUBSTITUTIONMAPPINGTOOLS_H_
42 
44 #include "SubstitutionCount.h"
46 #include "../Likelihood/DRTreeLikelihood.h"
47 
48 namespace bpp
49 {
64  {
65  public:
68 
69  public:
81  const DRTreeLikelihood& drtl,
82  SubstitutionCount& substitutionCount,
83  bool verbose = true)
84  {
85  std::vector<int> nodeIds;
86  return computeSubstitutionVectors(drtl, nodeIds, substitutionCount, verbose);
87  }
88 
103  const DRTreeLikelihood& drtl,
104  const std::vector<int>& nodeIds,
105  SubstitutionCount& substitutionCount,
106  bool verbose = true);
107 
109  const DRTreeLikelihood& drtl,
110  const SubstitutionModelSet& modelSet,
111  const std::vector<int>& nodeIds,
112  SubstitutionCount& substitutionCount,
113  bool verbose = true);
114 
136  const DRTreeLikelihood& drtl,
137  SubstitutionCount& substitutionCount,
138  bool verbose = true);
139 
140 
160  const DRTreeLikelihood& drtl,
161  SubstitutionCount& substitutionCount,
162  bool verbose = true);
163 
164 
181  const DRTreeLikelihood& drtl,
182  SubstitutionCount& substitutionCount,
183  bool verbose = true);
184 
185 
194  const DRTreeLikelihood& drtl,
195  bool verbose = true)
196  {
197  OneJumpSubstitutionCount ojsm(0);
198  return computeSubstitutionVectors(drtl, drtl.getTree().getNodesId(), ojsm, 0);
199  }
200 
201 
213  static void writeToStream(
214  const ProbabilisticSubstitutionMapping& substitutions,
215  const SiteContainer& sites,
216  size_t type,
217  std::ostream& out);
218 
219 
228  static void readFromStream(std::istream& in, ProbabilisticSubstitutionMapping& substitutions, size_t type);
229 
230 
240  static std::vector<double> computeTotalSubstitutionVectorForSitePerBranch(const SubstitutionMapping& smap, size_t siteIndex);
241 
251  static std::vector<double> computeTotalSubstitutionVectorForSitePerType(const SubstitutionMapping& smap, size_t siteIndex);
252 
264  static double computeNormForSite(const SubstitutionMapping& smap, size_t siteIndex);
265 
273  static std::vector<double> computeSumForBranch(const SubstitutionMapping& smap, size_t branchIndex);
274 
275 
283  static std::vector<double> computeSumForSite(const SubstitutionMapping& smap, size_t siteIndex);
284 
285 
293  static std::vector< std::vector<double> > getCountsPerBranch(
294  DRTreeLikelihood& drtl,
295  const std::vector<int>& ids,
296  SubstitutionModel* model,
297  const SubstitutionRegister& reg,
298  double threshold = -1,
299  bool verbose = true);
300 
301  static std::vector< std::vector<double> > getCountsPerBranch(
302  DRTreeLikelihood& drtl,
303  const std::vector<int>& ids,
304  const SubstitutionModelSet& modelSet,
305  const SubstitutionRegister& reg,
306  double threshold = -1,
307  bool verbose = true);
308 
309 
322  static std::vector< std::vector<double> > getNormalizationsPerBranch(
323  DRTreeLikelihood& drtl,
324  const std::vector<int>& ids,
325  const SubstitutionModel* nullModel,
326  const SubstitutionRegister& reg,
327  bool verbose = true);
328 
329 
342  static std::vector< std::vector<double> > getNormalizationsPerBranch(
343  DRTreeLikelihood& drtl,
344  const std::vector<int>& ids,
345  const SubstitutionModelSet* nullModelSet,
346  const SubstitutionRegister& reg,
347  bool verbose = true);
348 
349 
367  static std::vector< std::vector<double> > getRelativeCountsPerBranch(
368  DRTreeLikelihood& drtl,
369  const std::vector<int>& ids,
370  SubstitutionModel* model,
371  const SubstitutionRegister& reg,
372  double threshold = -1,
373  bool verbose= true)
374  {
375  std::vector< std::vector<double> > result;
376  computeCountsPerTypePerBranch(drtl, ids, model, reg, result, threshold, verbose);
377  return result;
378  }
379 
396  static std::vector< std::vector<double> > getNormalizedCountsPerBranch(
397  DRTreeLikelihood& drtl,
398  const std::vector<int>& ids,
399  SubstitutionModel* model,
400  SubstitutionModel* nullModel,
401  const SubstitutionRegister& reg,
402  bool perTime,
403  bool perWord,
404  bool verbose = true)
405  {
406  std::vector< std::vector<double> > result;
407  computeCountsPerTypePerBranch(drtl, ids, model, nullModel, reg, result, perTime, perWord, verbose);
408  return result;
409  }
410 
427  static std::vector< std::vector<double> > getNormalizedCountsPerBranch(
428  DRTreeLikelihood& drtl,
429  const std::vector<int>& ids,
430  SubstitutionModelSet* modelSet,
431  SubstitutionModelSet* nullModelSet,
432  const SubstitutionRegister& reg,
433  bool perTime,
434  bool perWord,
435  bool verbose = true)
436  {
437  std::vector< std::vector<double> > result;
438  computeCountsPerTypePerBranch(drtl, ids, modelSet, nullModelSet, reg, result, perTime, perWord, verbose);
439  return result;
440  }
441 
464  static void computeCountsPerSitePerBranch(
465  DRTreeLikelihood& drtl,
466  const std::vector<int>& ids,
467  SubstitutionModel* model,
468  const SubstitutionRegister& reg,
469  VVdouble& array);
470 
471 
498  static void computeCountsPerTypePerBranch(
499  DRTreeLikelihood& drtl,
500  const std::vector<int>& ids,
501  SubstitutionModel* model,
502  const SubstitutionRegister& reg,
503  VVdouble& result,
504  double threshold = -1,
505  bool verbose = true);
506 
526  static void computeCountsPerTypePerBranch(
527  DRTreeLikelihood& drtl,
528  const std::vector<int>& ids,
529  SubstitutionModel* model,
530  SubstitutionModel* nullModel,
531  const SubstitutionRegister& reg,
532  VVdouble& result,
533  bool perTime,
534  bool perWord,
535  bool verbose = true);
536 
558  static void computeCountsPerTypePerBranch(
559  DRTreeLikelihood& drtl,
560  const std::vector<int>& ids,
561  SubstitutionModelSet* modelSet,
562  SubstitutionModelSet* nullModelSet,
563  const SubstitutionRegister& reg,
564  VVdouble& result,
565  bool perTime,
566  bool perWord,
567  bool verbose = true);
568 
591  static void computeCountsPerSitePerType(
592  DRTreeLikelihood& drtl,
593  const std::vector<int>& ids,
594  SubstitutionModel* model,
595  const SubstitutionRegister& reg,
596  VVdouble& result);
597 
616  static void computeCountsPerSitePerType(
617  DRTreeLikelihood& drtl,
618  const std::vector<int>& ids,
619  SubstitutionModel* model,
620  SubstitutionModel* nullModel,
621  const SubstitutionRegister& reg,
622  VVdouble& result,
623  bool perTime,
624  bool perWord);
625 
644  static void computeCountsPerSitePerType(
645  DRTreeLikelihood& drtl,
646  const std::vector<int>& ids,
647  SubstitutionModelSet* modelSet,
648  SubstitutionModelSet* nullModelSet,
649  const SubstitutionRegister& reg,
650  VVdouble& result,
651  bool perTime,
652  bool perWord);
653 
676  DRTreeLikelihood& drtl,
677  const std::vector<int>& ids,
678  SubstitutionModel* model,
679  const SubstitutionRegister& reg,
680  VVVdouble& result);
681 
700  DRTreeLikelihood& drtl,
701  const std::vector<int>& ids,
702  SubstitutionModel* model,
703  SubstitutionModel* nullModel,
704  const SubstitutionRegister& reg,
705  VVVdouble& result,
706  bool perTime,
707  bool perWord);
708 
727  DRTreeLikelihood& drtl,
728  const std::vector<int>& ids,
729  SubstitutionModelSet* modelSet,
730  SubstitutionModelSet* nullModelSet,
731  const SubstitutionRegister& reg,
732  VVVdouble& result,
733  bool perTime,
734  bool perWord);
735 
736  /*
737  *
738  * @brief Outputs of counts
739  *
740  * @{
741  */
742 
746  static void outputPerSitePerBranch(const std::string& filename,
747  const std::vector<int>& ids,
748  const VVdouble& counts);
749 
753  static void outputPerSitePerType(const std::string& filename,
754  const SubstitutionRegister& reg,
755  const VVdouble& counts);
756 
760  static void outputPerSitePerBranchPerType(const std::string& filenamePrefix,
761  const std::vector<int>& ids,
762  const SubstitutionRegister& reg,
763  const VVVdouble& counts);
764 
765 
766  /*
767  *
768  *@}
769  */
770 
771 
777  };
778 } // end of namespace bpp.
779 
780 #endif // _SUBSTITUTIONMAPPINGTOOLS_H_
static ProbabilisticSubstitutionMapping * computeSubstitutionVectorsNoAveraging(const DRTreeLikelihood &drtl, SubstitutionCount &substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
Substitution models manager for non-homogeneous / non-reversible models of evolution.
Interface for all substitution models.
static ProbabilisticSubstitutionMapping * computeOneJumpProbabilityVectors(const DRTreeLikelihood &drtl, bool verbose=true)
This method computes for each site and for each branch the probability that at least one jump occurre...
static std::vector< double > computeSumForBranch(const SubstitutionMapping &smap, size_t branchIndex)
Sum all substitutions for each type of a given branch (specified by its index).
static void computeCountsPerSitePerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, VVdouble &array)
Compute the sum over all types of the counts per site per branch.
static std::vector< double > computeTotalSubstitutionVectorForSitePerType(const SubstitutionMapping &smap, size_t siteIndex)
Sum all type of substitutions for each type of a given position (specified by its index)...
static double computeNormForSite(const SubstitutionMapping &smap, size_t siteIndex)
Compute the norm of a substitution vector for a given position (specified by its index).
static void computeCountsPerTypePerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, VVdouble &result, double threshold=-1, bool verbose=true)
Compute the sum over all branches of the counts per type per branch.
Data storage class for probabilistic substitution mappings.
General interface for storing mapping data.
The SubstitutionRegister interface.
virtual std::vector< int > getNodesId() const =0
static ProbabilisticSubstitutionMapping * computeSubstitutionVectorsMarginal(const DRTreeLikelihood &drtl, SubstitutionCount &substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static std::vector< std::vector< double > > getRelativeCountsPerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, double threshold=-1, bool verbose=true)
Returns the counts relative to the frequency of the states in case of non-stationarity.
static std::vector< double > computeTotalSubstitutionVectorForSitePerBranch(const SubstitutionMapping &smap, size_t siteIndex)
Sum all type of substitutions for each branch of a given position (specified by its index)...
static void computeCountsPerSitePerType(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, VVdouble &result)
Compute the sum over all branches of the counts per type per site,.
static std::vector< std::vector< double > > getNormalizedCountsPerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModelSet *modelSet, SubstitutionModelSet *nullModelSet, const SubstitutionRegister &reg, bool perTime, bool perWord, bool verbose=true)
Returns the counts normalized by a null model set.
static ProbabilisticSubstitutionMapping * computeSubstitutionVectorsNoAveragingMarginal(const DRTreeLikelihood &drtl, SubstitutionCount &substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
Computes the probability that at least one jump occured on a branch, given the initial and final stat...
virtual const Tree & getTree() const =0
Get the tree (topology and branch lengths).
static void outputPerSitePerType(const std::string &filename, const SubstitutionRegister &reg, const VVdouble &counts)
Output Per Site Per Type.
The SubstitutionsCount interface.
static std::vector< double > computeSumForSite(const SubstitutionMapping &smap, size_t siteIndex)
Sum all substitutions for each type of a given site (specified by its index).
static std::vector< std::vector< double > > getNormalizedCountsPerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, SubstitutionModel *nullModel, const SubstitutionRegister &reg, bool perTime, bool perWord, bool verbose=true)
Returns the counts normalized by a null model.
static void outputPerSitePerBranchPerType(const std::string &filenamePrefix, const std::vector< int > &ids, const SubstitutionRegister &reg, const VVVdouble &counts)
Output Per Site Per Branch Per Type.
static void outputPerSitePerBranch(const std::string &filename, const std::vector< int > &ids, const VVdouble &counts)
Output Per Site Per Branch.
Provide methods to compute substitution mappings.
static std::vector< std::vector< double > > getNormalizationsPerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, const SubstitutionModel *nullModel, const SubstitutionRegister &reg, bool verbose=true)
Returns the normalization factors due to the null model on each branch, for each register.
Interface for double-recursive (DR) implementation of the likelihood computation. ...
static void readFromStream(std::istream &in, ProbabilisticSubstitutionMapping &substitutions, size_t type)
Read the substitutions vectors from a stream.
std::vector< VVdouble > VVVdouble
static std::vector< std::vector< double > > getCountsPerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, double threshold=-1, bool verbose=true)
std::vector< Vdouble > VVdouble
static void computeCountsPerSitePerBranchPerType(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, VVVdouble &result)
Compute counts per site per branch per type.
static void writeToStream(const ProbabilisticSubstitutionMapping &substitutions, const SiteContainer &sites, size_t type, std::ostream &out)
Write the substitutions vectors to a stream.
static ProbabilisticSubstitutionMapping * computeSubstitutionVectors(const DRTreeLikelihood &drtl, SubstitutionCount &substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...