bpp-phyl  2.4.0
PhylogeneticsApplicationTools.cpp
Go to the documentation of this file.
1 //
2 // File: PhylogeneticsApplicationTools.cpp
3 // Created by: Julien Dutheil
4 // Created on: Fri Oct 21 16:49 2005
5 // from old file ApplicationTools.cpp created on Sun Dec 14 09:36:26 2003
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for phylogenetic data analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39  */
40 
42 #include "../Model/SubstitutionModel.h"
43 #include "../Model/WrappedModel.h"
44 #include "../Model/Protein/Coala.h"
45 #include "../Model/FrequenciesSet/MvaFrequenciesSet.h"
46 #include "../Likelihood/TreeLikelihood.h"
47 #include "../Mapping/LaplaceSubstitutionCount.h"
48 #include "../Mapping/UniformizationSubstitutionCount.h"
49 #include "../Mapping/DecompositionSubstitutionCount.h"
50 #include "../Mapping/NaiveSubstitutionCount.h"
51 #include "../Mapping/OneJumpSubstitutionCount.h"
52 #include "../OptimizationTools.h"
53 #include "../Tree.h"
54 #include "../Io/BppOTreeReaderFormat.h"
55 #include "../Io/BppOMultiTreeReaderFormat.h"
56 #include "../Io/BppOTreeWriterFormat.h"
57 #include "../Io/BppOMultiTreeWriterFormat.h"
58 #include "../Io/BppOSubstitutionModelFormat.h"
59 #include "../Io/BppOTransitionModelFormat.h"
60 #include "../Io/BppOFrequenciesSetFormat.h"
61 #include "../Io/BppORateDistributionFormat.h"
62 
63 // From bpp-core
66 #include <Bpp/Io/FileTools.h>
67 #include <Bpp/Text/TextTools.h>
70 #include <Bpp/Text/KeyvalTools.h>
75 
76 // From bpp-seq:
80 
81 using namespace bpp;
82 
83 // From the STL:
84 #include <fstream>
85 #include <memory>
86 #include <set>
87 #include <vector>
88 
89 using namespace std;
90 
91 
92 /*************************************************************/
93 /***************** TREES ************************************/
94 /*************************************************************/
95 
96 
97 /******************************************************************************/
98 
100  map<string, string>& params,
101  const string& prefix,
102  const string& suffix,
103  bool suffixIsOptional,
104  bool verbose,
105  int warn)
106 {
107  string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn);
108  string treeFilePath = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, true, suffix, suffixIsOptional, "none", warn);
109 
110  BppOTreeReaderFormat bppoReader(warn);
111  unique_ptr<ITree> iTree(bppoReader.read(format));
112  if (verbose)
113  {
114  ApplicationTools::displayResult("Input tree file " + suffix, treeFilePath);
115  ApplicationTools::displayResult("Input tree format " + suffix, iTree->getFormatName());
116  }
117  Tree* tree = iTree->read(treeFilePath);
118  return tree;
119 }
120 
121 /******************************************************************************/
122 
124  map<string, string>& params,
125  const string& prefix,
126  const string& suffix,
127  bool suffixIsOptional,
128  bool verbose,
129  int warn)
130 {
131  string format = ApplicationTools::getStringParameter(prefix + "trees.format", params, "Newick", suffix, suffixIsOptional, warn);
132  string treeFilePath = ApplicationTools::getAFilePath(prefix + "trees.file", params, true, true, suffix, suffixIsOptional, "none", warn);
133 
134  BppOMultiTreeReaderFormat bppoReader(warn);
135  unique_ptr<IMultiTree> iTrees(bppoReader.read(format));
136  if (verbose)
137  {
138  ApplicationTools::displayResult("Input trees file " + suffix, treeFilePath);
139  ApplicationTools::displayResult("Input trees format " + suffix, iTrees->getFormatName());
140  }
141  vector<Tree*> trees;
142  iTrees->read(treeFilePath, trees);
143 
144  if (verbose)
145  {
146  ApplicationTools::displayResult("Number of trees in file", trees.size());
147  }
148  return trees;
149 }
150 
151 
152 /*************************************************************/
153 /******* MODELS **********************************************/
154 /*************************************************************/
155 
156 /******************************************************************************/
157 
159  const Alphabet* alphabet,
160  const GeneticCode* gCode,
161  const SiteContainer* data,
162  std::map<std::string, std::string>& params,
163  const string& suffix,
164  bool suffixIsOptional,
165  bool verbose,
166  int warn)
167 {
168  BppOSubstitutionModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, verbose, warn + 1);
169  string modelDescription;
170  const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(alphabet);
171  if (ca)
172  {
173  modelDescription = ApplicationTools::getStringParameter("model", params, "CodonRate(model=JC69)", suffix, suffixIsOptional, warn);
174  if (!gCode)
175  throw Exception("PhylogeneticsApplicationTools::getSubstitutionModel(): a GeneticCode instance is required for instanciating a codon model.");
176  bIO.setGeneticCode(gCode);
177  }
178  else if (AlphabetTools::isWordAlphabet(alphabet))
179  modelDescription = ApplicationTools::getStringParameter("model", params, "Word(model=JC69)", suffix, suffixIsOptional, warn);
180  else
181  modelDescription = ApplicationTools::getStringParameter("model", params, "JC69", suffix, suffixIsOptional, warn);
182 
183  SubstitutionModel* model = bIO.read(alphabet, modelDescription, data, true);
184  return model;
185 }
186 
188  const Alphabet* alphabet,
189  const GeneticCode* gCode,
190  const SiteContainer* data,
191  std::map<std::string, std::string>& params,
192  const string& suffix,
193  bool suffixIsOptional,
194  bool verbose,
195  int warn)
196 {
197  BppOTransitionModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, verbose, warn + 1);
198  string modelDescription;
199  const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(alphabet);
200  if (ca)
201  {
202  modelDescription = ApplicationTools::getStringParameter("model", params, "CodonRate(model=JC69)", suffix, suffixIsOptional, warn);
203  if (!gCode)
204  throw Exception("PhylogeneticsApplicationTools::getSubstitutionModel(): a GeneticCode instance is required for instanciating a codon model.");
205  bIO.setGeneticCode(gCode);
206  }
207  else if (AlphabetTools::isWordAlphabet(alphabet))
208  modelDescription = ApplicationTools::getStringParameter("model", params, "Word(model=JC69)", suffix, suffixIsOptional, warn);
209  else
210  modelDescription = ApplicationTools::getStringParameter("model", params, "JC69", suffix, suffixIsOptional, warn);
211 
212  TransitionModel* model = bIO.readTransitionModel(alphabet, modelDescription, data, true);
213  return model;
214 }
215 
216 /******************************************************************************/
217 
219  TransitionModel& model,
220  std::map<std::string, std::string>& unparsedParameterValues,
221  size_t modelNumber,
222  const SiteContainer* data,
223  std::map<std::string, std::string>& sharedParams,
224  bool verbose)
225 {
226  string initFreqs = ApplicationTools::getStringParameter(model.getNamespace() + "initFreqs", unparsedParameterValues, "", "", true, 2);
227 
228  if (verbose)
229  ApplicationTools::displayResult("Frequencies Initialization for model", (initFreqs == "") ? "None" : initFreqs);
230 
231  if (initFreqs != "")
232  {
233  if (initFreqs == "observed")
234  {
235  if (!data)
236  throw Exception("Missing data for observed frequencies");
237  unsigned int psi = ApplicationTools::getParameter<unsigned int>(model.getNamespace() + "initFreqs.observedPseudoCount", unparsedParameterValues, 0);
238  model.setFreqFromData(*data, psi);
239  }
240  else if (initFreqs.substr(0, 6) == "values")
241  {
242  // Initialization using the "values" argument
243  map<int, double> frequencies;
244 
245  string rf = initFreqs.substr(6);
246  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
247  int i = 0;
248  while (strtok.hasMoreToken())
249  frequencies[i++] = TextTools::toDouble(strtok.nextToken());
250  model.setFreq(frequencies);
251  }
252  else
253  throw Exception("Unknown initFreqs argument");
254  }
255 
256 
258  for (size_t i = 0; i < pl.size(); ++i)
259  {
260  AutoParameter ap(pl[i]);
262  pl.setParameter(i, ap);
263  }
264 
265  for (size_t i = 0; i < pl.size(); ++i)
266  {
267  const string pName = pl[i].getName();
268  size_t posp = model.getParameterNameWithoutNamespace(pName).rfind(".");
269  string value;
270  bool test1 = (initFreqs == "");
271  bool test2 = (model.getParameterNameWithoutNamespace(pName).substr(posp + 1, 5) != "theta");
272  bool test3 = (unparsedParameterValues.find(pName) != unparsedParameterValues.end());
273 
274  if (test1 || test2 || test3)
275  {
276  if (!test1 && !test2 && test3)
277  ApplicationTools::displayWarning("Warning, initFreqs argument is set and a value is set for parameter " + pName);
278 
279  value = ApplicationTools::getStringParameter(pName, unparsedParameterValues, TextTools::toString(pl[i].getValue()));
280 
281  try
282  {
283  pl[i].setValue(TextTools::toDouble(value));
284  if (verbose)
285  ApplicationTools::displayResult("Parameter found", pName + +"_" + TextTools::toString(modelNumber) + "=" + TextTools::toString(pl[i].getValue()));
286  }
287  catch (Exception& e)
288  {
289  sharedParams[pl[i].getName() + "_" + TextTools::toString(modelNumber)] = value;
290  }
291  }
292  }
293 
294  model.matchParametersValues(pl);
295 }
296 
297 
298 /******************************************************/
299 /**** FREQUENCIES SET *********************************/
300 /******************************************************/
301 
302 /******************************************************************************/
303 
305  const Alphabet* alphabet,
306  const GeneticCode* gCode,
307  const SiteContainer* data,
308  std::map<std::string, std::string>& params,
309  std::map<std::string, std::string>& sharedparams,
310  const std::vector<double>& rateFreqs,
311  const std::string& suffix,
312  bool suffixIsOptional,
313  bool verbose,
314  int warn)
315 {
316  string freqDescription = ApplicationTools::getStringParameter("nonhomogeneous.root_freq", params, "Full(init=observed)", suffix, suffixIsOptional, warn);
317  if (freqDescription == "None")
318  {
319  return 0;
320  }
321  else
322  {
323  map<string, string> unparams;
324 
325  FrequenciesSet* freq = getFrequenciesSet(alphabet, gCode, freqDescription, data, unparams, rateFreqs, verbose, warn + 1);
326  freq->setNamespace("root." + freq->getNamespace());
327 
328  map<string, string>::iterator it;
329  for (it = unparams.begin(); it != unparams.end(); it++)
330  {
331  sharedparams["root." + it->first] = it->second;
332  }
333 
334  if (verbose)
335  ApplicationTools::displayResult("Root frequencies ", freq->getName());
336  return freq;
337  }
338 }
339 
340 /******************************************************************************/
341 
343  const Alphabet* alphabet,
344  const GeneticCode* gCode,
345  const std::string& freqDescription,
346  const SiteContainer* data,
347  std::map<std::string, std::string>& sharedparams,
348  const std::vector<double>& rateFreqs,
349  bool verbose,
350  int warn)
351 {
352  map<string, string> unparsedParameterValues;
354  if (AlphabetTools::isCodonAlphabet(alphabet))
355  {
356  if (!gCode)
357  throw Exception("PhylogeneticsApplicationTools::getFrequenciesSet(): a GeneticCode instance is required for instanciating a codon frequencies set.");
358  bIO.setGeneticCode(gCode);
359  }
360  unique_ptr<FrequenciesSet> pFS(bIO.read(alphabet, freqDescription, data, true));
361 
362  std::map<std::string, std::string> unparsedparam = bIO.getUnparsedArguments();
363 
364  sharedparams.insert(unparsedparam.begin(), unparsedparam.end());
365 
366  // /////// To be changed for input normalization
367  if (rateFreqs.size() > 0)
368  {
369  pFS.reset(new MarkovModulatedFrequenciesSet(pFS.release(), rateFreqs));
370  }
371 
372  return pFS.release();
373 }
374 
375 /******************************************************/
376 /**** SUBSTITUTION MODEL SET **************************/
377 /******************************************************/
378 
379 /******************************************************************************/
380 
382  const Alphabet* alphabet,
383  const GeneticCode* gCode,
384  const SiteContainer* data,
385  std::map<std::string, std::string>& params,
386  const std::string& suffix,
387  bool suffixIsOptional,
388  bool verbose,
389  int warn)
390 {
391  if (!ApplicationTools::parameterExists("nonhomogeneous.number_of_models", params))
392  throw Exception("A value is needed for this parameter: nonhomogeneous.number_of_models .");
393  size_t nbModels = ApplicationTools::getParameter<size_t>("nonhomogeneous.number_of_models", params, 1, suffix, suffixIsOptional, warn);
394  if (nbModels == 0)
395  throw Exception("The number of models can't be 0 !");
396 
397  // bool nomix = true;
398  // for (size_t i = 0; nomix &(i < nbModels); i++)
399  // {
400  // string prefix = "model" + TextTools::toString(i + 1);
401  // string modelDesc;
402  // modelDesc = ApplicationTools::getStringParameter(prefix, params, "", suffix, suffixIsOptional, warn);
403 
404  // if (modelDesc.find("Mixed") != string::npos)
405  // nomix = false;
406  // }
407 
408  SubstitutionModelSet* modelSet, * modelSet1 = 0;
409  modelSet1 = new SubstitutionModelSet(alphabet);
410  setSubstitutionModelSet(*modelSet1, alphabet, gCode, data, params, suffix, suffixIsOptional, verbose, warn);
411 
412  if (modelSet1->hasMixedSubstitutionModel())
413  {
414  modelSet = new MixedSubstitutionModelSet(*modelSet1);
415  completeMixedSubstitutionModelSet(*dynamic_cast<MixedSubstitutionModelSet*>(modelSet), alphabet, data, params, suffix, suffixIsOptional, verbose);
416  }
417  else
418  modelSet = modelSet1;
419 
420  return modelSet;
421 }
422 
423 /******************************************************************************/
424 
426  SubstitutionModelSet& modelSet,
427  const Alphabet* alphabet,
428  const GeneticCode* gCode,
429  const SiteContainer* data,
430  map<string, string>& params,
431  const string& suffix,
432  bool suffixIsOptional,
433  bool verbose,
434  int warn)
435 {
436  modelSet.clear();
437  if (!ApplicationTools::parameterExists("nonhomogeneous.number_of_models", params))
438  throw Exception("You must specify this parameter: 'nonhomogeneous.number_of_models'.");
439  size_t nbModels = ApplicationTools::getParameter<size_t>("nonhomogeneous.number_of_models", params, 1, suffix, suffixIsOptional, warn);
440  if (nbModels == 0)
441  throw Exception("The number of models can't be 0 !");
442 
443  if (verbose)
444  ApplicationTools::displayResult("Number of distinct models", TextTools::toString(nbModels));
445 
446  BppOTransitionModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
447 
448  // ///////////////////////////////////////////
449  // Build a new model set object:
450 
451  vector<double> rateFreqs;
452  string tmpDesc;
453  if (AlphabetTools::isCodonAlphabet(alphabet))
454  {
455  if (!gCode)
456  throw Exception("PhylogeneticsApplicationTools::setSubstitutionModelSet(): a GeneticCode instance is required for instanciating a codon model.");
457  bIO.setGeneticCode(gCode);
458  tmpDesc = ApplicationTools::getStringParameter("model1", params, "CodonRate(model=JC69)", suffix, suffixIsOptional, warn);
459  }
460  else if (AlphabetTools::isWordAlphabet(alphabet))
461  tmpDesc = ApplicationTools::getStringParameter("model1", params, "Word(model=JC69)", suffix, suffixIsOptional, warn);
462  else
463  tmpDesc = ApplicationTools::getStringParameter("model1", params, "JC69", suffix, suffixIsOptional, warn);
464 
465  unique_ptr<TransitionModel> tmp(bIO.readTransitionModel(alphabet, tmpDesc, data, false));
466 
467 
468  if (tmp->getNumberOfStates() != alphabet->getSize())
469  {
470  // Markov-Modulated Markov Model...
471  size_t n = static_cast<size_t>(tmp->getNumberOfStates() / alphabet->getSize());
472  rateFreqs = vector<double>(n, 1. / static_cast<double>(n)); // Equal rates assumed for now, may be changed later (actually, in the most general case,
473  }
474 
475  // ////////////////////////////////////
476  // Deal with root frequencies
477 
478  map<string, string> unparsedParameters;
479 
480  bool stationarity = ApplicationTools::getBooleanParameter("nonhomogeneous.stationarity", params, false, "", true, warn);
481  FrequenciesSet* rootFrequencies = 0;
482  if (!stationarity)
483  {
484  rootFrequencies = getRootFrequenciesSet(alphabet, gCode, data, params, unparsedParameters, rateFreqs, suffix, suffixIsOptional, verbose);
485  stationarity = !rootFrequencies;
486  string freqDescription = ApplicationTools::getStringParameter("nonhomogeneous.root_freq", params, "", suffix, suffixIsOptional, warn);
487  if (freqDescription.substr(0, 10) == "MVAprotein")
488  {
489  if (dynamic_cast<Coala*>(tmp.get()))
490  dynamic_cast<MvaFrequenciesSet*>(rootFrequencies)->initSet(dynamic_cast<CoalaCore*>(tmp.get()));
491  else
492  throw Exception("The MVAprotein frequencies set at the root can only be used if a Coala model is used on branches.");
493  }
494  }
495 
496  ApplicationTools::displayBooleanResult("Stationarity assumed", stationarity);
497 
498  if (!stationarity)
499  modelSet.setRootFrequencies(rootFrequencies);
500 
501 
502  // //////////////////////////////////////
503  // Now parse all models:
504 
505  bIO.setVerbose(true);
506 
507  for (size_t i = 0; i < nbModels; i++)
508  {
509  string prefix = "model" + TextTools::toString(i + 1);
510  string modelDesc;
511  if (AlphabetTools::isCodonAlphabet(alphabet))
512  modelDesc = ApplicationTools::getStringParameter(prefix, params, "CodonRate(model=JC69)", suffix, suffixIsOptional, warn);
513  else if (AlphabetTools::isWordAlphabet(alphabet))
514  modelDesc = ApplicationTools::getStringParameter(prefix, params, "Word(model=JC69)", suffix, suffixIsOptional, warn);
515  else
516  modelDesc = ApplicationTools::getStringParameter(prefix, params, "JC69", suffix, suffixIsOptional, warn);
517 
518  unique_ptr<TransitionModel> model(bIO.readTransitionModel(alphabet, modelDesc, data, false));
519 
520  map<string, string> unparsedModelParameters = bIO.getUnparsedArguments();
521  map<string, string> sharedParameters;
522 
523  setSubstitutionModelParametersInitialValuesWithAliases(
524  *model,
525  unparsedModelParameters, i + 1, data,
526  sharedParameters,
527  verbose);
528 
529  unparsedParameters.insert(sharedParameters.begin(), sharedParameters.end());
530 
531  vector<int> nodesId = ApplicationTools::getVectorParameter<int>(prefix + ".nodes_id", params, ',', ':', "", suffix, suffixIsOptional, warn);
532 
533  if (verbose)
534  ApplicationTools::displayResult("Model" + TextTools::toString(i + 1) + " is associated to", TextTools::toString(nodesId.size()) + " node(s).");
535 
536  modelSet.addModel(model.release(), nodesId);
537  }
538 
539  // Finally check parameter aliasing:
540  string aliasDesc = ApplicationTools::getStringParameter("nonhomogeneous.alias", params, "", suffix, suffixIsOptional, warn);
541  StringTokenizer st(aliasDesc, ",");
542  while (st.hasMoreToken())
543  {
544  string alias = st.nextToken();
545  string::size_type index = alias.find("->");
546  if (index == string::npos)
547  throw Exception("PhylogeneticsApplicationTools::setSubstitutionModelSet. Bad alias syntax, should contain `->' symbol: " + alias);
548  unparsedParameters[alias.substr(0, index)] = alias.substr(index + 2);
549  }
550 
551  // alias unparsedParameters
552 
553  modelSet.aliasParameters(unparsedParameters, verbose);
554 }
555 
556 /******************************************************************************/
558  MixedSubstitutionModelSet& mixedModelSet,
559  const Alphabet* alphabet,
560  const SiteContainer* data,
561  map<string, string>& params,
562  const string& suffix,
563  bool suffixIsOptional,
564  bool verbose,
565  int warn)
566 {
567  // /////////////////////////////////////////
568  // Looks for the allowed paths
569 
570  size_t numd;
571  if (!ApplicationTools::parameterExists("site.number_of_paths", params))
572  numd = 0;
573  else
574  numd = ApplicationTools::getParameter<size_t>("site.number_of_paths", params, 1, suffix, suffixIsOptional, warn);
575 
576  if (verbose)
577  ApplicationTools::displayResult("Number of distinct paths", TextTools::toString(numd));
578 
579  vector<string> vdesc;
580  while (numd)
581  {
582  string desc = ApplicationTools::getStringParameter("site.path" + TextTools::toString(numd), params, "", suffix, suffixIsOptional, warn);
583  if (desc.size() == 0)
584  break;
585  else
586  vdesc.push_back(desc);
587  numd--;
588  }
589 
590  if (vdesc.size() == 0)
591  {
592  mixedModelSet.complete();
593  mixedModelSet.computeHyperNodesProbabilities();
594  return;
595  }
596 
597  for (vector<string>::iterator it(vdesc.begin()); it != vdesc.end(); it++)
598  {
599  mixedModelSet.addEmptyHyperNode();
600  StringTokenizer st(*it, "&");
601  while (st.hasMoreToken())
602  {
603  string submodel = st.nextToken();
604  string::size_type indexo = submodel.find("[");
605  string::size_type indexf = submodel.find("]");
606  if ((indexo == string::npos) | (indexf == string::npos))
607  throw Exception("PhylogeneticsApplicationTools::setMixedSubstitutionModelSet. Bad path syntax, should contain `[]' symbols: " + submodel);
608  size_t num = TextTools::to<size_t>(submodel.substr(5, indexo - 5));
609  string p2 = submodel.substr(indexo + 1, indexf - indexo - 1);
610 
611  const MixedSubstitutionModel* pSM = dynamic_cast<const MixedSubstitutionModel*>(mixedModelSet.getSubstitutionModel(num - 1));
612  if (!pSM)
613  throw BadIntegerException("PhylogeneticsApplicationTools::setMixedSubstitutionModelSet: Wrong model for number", static_cast<int>(num - 1));
614  Vint submodnb = pSM->getSubmodelNumbers(p2);
615 
616  mixedModelSet.addToHyperNode(num - 1, submodnb);
617  }
618 
619  if (!mixedModelSet.getHyperNode(mixedModelSet.getNumberOfHyperNodes() - 1).isComplete())
620  throw Exception("A path should own at least a submodel of each mixed model: " + *it);
621 
622  if (verbose)
623  ApplicationTools::displayResult("Site Path", *it);
624  }
625 
626  // / Checks if the paths are separate
627  if (!mixedModelSet.hasExclusivePaths())
628  throw Exception("All paths must be disjoint.");
629 
630  // / Puts all the remaining models in a new path
631  string st;
632  st = (mixedModelSet.complete()) ? "Yes" : "No";
633 
634  if (verbose)
635  ApplicationTools::displayResult("Site Path Completion", st);
636 
637  mixedModelSet.computeHyperNodesProbabilities();
638 
639  if (!mixedModelSet.getHyperNode(mixedModelSet.getNumberOfHyperNodes() - 1).isComplete())
640  throw Exception("The remaining submodels can not create a complete path.");
641 }
642 
643 
644 /******************************************************/
645 /*** DISTRIBUTIONS ********************************/
646 /******************************************************/
647 
648 
649 /******************************************************************************/
650 
652  const std::string& distDescription,
653  std::map<std::string, std::string>& unparsedParameterValues,
654  bool verbose)
655 {
656  string distName;
658  map<string, string> args;
659  KeyvalTools::parseProcedure(distDescription, distName, args);
660 
661  if (distName == "Dirichlet")
662  {
663  if (args.find("classes") == args.end())
664  throw Exception("Missing argument 'classes' (vector of number of classes) in " + distName
665  + " distribution");
666  if (args.find("alphas") == args.end())
667  throw Exception("Missing argument 'alphas' (vector of Dirichlet shape parameters) in Dirichlet distribution");
668  vector<double> alphas;
669  vector<size_t> classes;
670 
671  string rf = args["alphas"];
672  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
673  while (strtok.hasMoreToken())
674  alphas.push_back(TextTools::toDouble(strtok.nextToken()));
675 
676  rf = args["classes"];
677  StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
678  while (strtok2.hasMoreToken())
679  classes.push_back(TextTools::to<size_t>(strtok2.nextToken()));
680 
681  pMDD = new DirichletDiscreteDistribution(classes, alphas);
682  vector<string> v = pMDD->getParameters().getParameterNames();
683 
684  for (size_t i = 0; i < v.size(); i++)
685  {
686  unparsedParameterValues[v[i]] = TextTools::toString(pMDD->getParameterValue(pMDD->getParameterNameWithoutNamespace(v[i])));
687  }
688  }
689  else
690  throw Exception("Unknown multiple distribution name: " + distName);
691 
692  return pMDD;
693 }
694 
695 /******************************************************************************/
696 
698  map<string, string>& params,
699  const string& suffix,
700  bool suffixIsOptional,
701  bool verbose)
702 {
703  string distDescription = ApplicationTools::getStringParameter("rate_distribution", params, "Constant()", suffix, suffixIsOptional);
704 
705  string distName;
706  map<string, string> args;
707  KeyvalTools::parseProcedure(distDescription, distName, args);
708 
709  BppORateDistributionFormat bIO(true);
710  unique_ptr<DiscreteDistribution> rDist(bIO.read(distDescription, true));
711 
712  if (verbose)
713  {
714  ApplicationTools::displayResult("Rate distribution", distName);
715  ApplicationTools::displayResult("Number of classes", TextTools::toString(rDist->getNumberOfCategories()));
716  }
717 
718  return rDist.release();
719 }
720 
721 
722 /*************************************************************/
723 /***** OPTIMIZATORS *****************************************/
724 /*************************************************************/
725 
726 /******************************************************************************/
727 
729  TreeLikelihood* tl,
730  const ParameterList& parameters,
731  std::map<std::string, std::string>& params,
732  const std::string& suffix,
733  bool suffixIsOptional,
734  bool verbose,
735  int warn)
736 {
737  string optimization = ApplicationTools::getStringParameter("optimization", params, "FullD(derivatives=Newton)", suffix, suffixIsOptional, warn);
738  if (optimization == "None")
739  return tl;
740  string optName;
741  map<string, string> optArgs;
742  KeyvalTools::parseProcedure(optimization, optName, optArgs);
743 
744  unsigned int optVerbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", params, 2, suffix, suffixIsOptional, warn + 1);
745 
746  string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", params, false, false, suffix, suffixIsOptional, "none", warn + 1);
747  OutputStream* messageHandler =
748  (mhPath == "none") ? 0 :
749  (mhPath == "std") ? ApplicationTools::message.get() :
750  new StlOutputStream(new ofstream(mhPath.c_str(), ios::out));
751  if (verbose)
752  ApplicationTools::displayResult("Message handler", mhPath);
753 
754  string prPath = ApplicationTools::getAFilePath("optimization.profiler", params, false, false, suffix, suffixIsOptional, "none", warn + 1);
755  OutputStream* profiler =
756  (prPath == "none") ? 0 :
757  (prPath == "std") ? ApplicationTools::message.get() :
758  new StlOutputStream(new ofstream(prPath.c_str(), ios::out));
759  if (profiler)
760  profiler->setPrecision(20);
761  if (verbose)
762  ApplicationTools::displayResult("Profiler", prPath);
763 
764  bool scaleFirst = ApplicationTools::getBooleanParameter("optimization.scale_first", params, false, suffix, suffixIsOptional, warn + 1);
765  if (scaleFirst)
766  {
767  // We scale the tree before optimizing each branch length separately:
768  if (verbose)
769  ApplicationTools::displayMessage("Scaling the tree before optimizing each branch length separately.");
770  double tolerance = ApplicationTools::getDoubleParameter("optimization.scale_first.tolerance", params, .0001, suffix, suffixIsOptional, warn + 1);
771  if (verbose)
772  ApplicationTools::displayResult("Scaling tolerance", TextTools::toString(tolerance));
773  unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.scale_first.max_number_f_eval", params, 1000000, suffix, suffixIsOptional, warn + 1);
774  if (verbose)
775  ApplicationTools::displayResult("Scaling max # f eval", TextTools::toString(nbEvalMax));
777  tl,
778  tolerance,
779  nbEvalMax,
780  messageHandler,
781  profiler);
782  if (verbose)
783  ApplicationTools::displayResult("New tree likelihood", -tl->getValue());
784  }
785 
786  // Should I ignore some parameters?
787  ParameterList parametersToEstimate = parameters;
788  vector<string> parNames = parametersToEstimate.getParameterNames();
789 
790  if (params.find("optimization.ignore_parameter") != params.end())
791  throw Exception("optimization.ignore_parameter is deprecated, use optimization.ignore_parameters instead!");
792  string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameters", params, "", suffix, suffixIsOptional, warn + 1);
793  StringTokenizer st(paramListDesc, ",");
794  while (st.hasMoreToken())
795  {
796  try
797  {
798  string param = st.nextToken();
799  if (param == "BrLen")
800  {
801  vector<string> vs = tl->getBranchLengthsParameters().getParameterNames();
802  parametersToEstimate.deleteParameters(vs);
803  if (verbose)
804  ApplicationTools::displayResult("Parameter ignored", string("Branch lengths"));
805  }
806  else if (param == "Ancient")
807  {
809  if (!nhtl)
810  ApplicationTools::displayWarning("The 'Ancient' parameters do not exist in homogeneous models, and will be ignored.");
811  else
812  {
813  vector<string> vs = nhtl->getRootFrequenciesParameters().getParameterNames();
814  parametersToEstimate.deleteParameters(vs);
815  }
816  if (verbose)
817  ApplicationTools::displayResult("Parameter ignored", string("Root frequencies"));
818  }
819  else if (param == "Model")
820  {
821  vector<string> vs;
822  vector<string> vs1 = tl->getSubstitutionModelParameters().getParameterNames();
824  if (nhtl != NULL)
825  {
826  vector<string> vs2 = nhtl->getRootFrequenciesParameters().getParameterNames();
827  VectorTools::diff(vs1, vs2, vs);
828  }
829  else
830  vs = vs1;
831 
832  parametersToEstimate.deleteParameters(vs);
833  if (verbose)
834  ApplicationTools::displayResult("Parameter ignored", string("Model"));
835  }
836  else if (param.find("*") != string::npos)
837  {
838  vector<string> vs = ApplicationTools::matchingParameters(param, parNames);
839 
840  for (vector<string>::iterator it = vs.begin(); it != vs.end(); it++)
841  {
842  parametersToEstimate.deleteParameter(*it);
843  if (verbose)
844  ApplicationTools::displayResult("Parameter ignored", *it);
845  }
846  }
847  else
848  {
849  parametersToEstimate.deleteParameter(param);
850  if (verbose)
851  ApplicationTools::displayResult("Parameter ignored", param);
852  }
853  }
854  catch (ParameterNotFoundException& pnfe)
855  {
856  ApplicationTools::displayWarning("Parameter '" + pnfe.getParameter() + "' not found, and so can't be ignored!");
857  }
858  }
859 
860  // Should I constrain some parameters?
861  vector<string> parToEstNames = parametersToEstimate.getParameterNames();
862 
863  if (params.find("optimization.constrain_parameter") != params.end())
864  throw Exception("optimization.constrain_parameter is deprecated, use optimization.constrain_parameters instead!");
865  paramListDesc = ApplicationTools::getStringParameter("optimization.constrain_parameters", params, "", suffix, suffixIsOptional, warn + 1);
866 
867  string constraint = "";
868  string pc, param = "";
869 
870  StringTokenizer st2(paramListDesc, ",");
871  while (st2.hasMoreToken())
872  {
873  try
874  {
875  pc = st2.nextToken();
876  string::size_type index = pc.find("=");
877  if (index == string::npos)
878  throw Exception("PhylogeneticsApplicationTools::optimizeParamaters. Bad constrain syntax, should contain `=' symbol: " + pc);
879  param = pc.substr(0, index);
880  constraint = pc.substr(index + 1);
881  IntervalConstraint ic(constraint);
882 
883  vector<string> parNames2;
884 
885  if (param == "BrLen")
886  parNames2 = tl->getBranchLengthsParameters().getParameterNames();
887  else if (param == "Ancient")
888  {
890  if (!nhtl)
891  ApplicationTools::displayWarning("The 'Ancient' parameters do not exist in homogeneous models, and will be ignored.");
892  else
893  {
894  parNames2 = nhtl->getRootFrequenciesParameters().getParameterNames();
895  ApplicationTools::displayResult("Parameter ignored", string("Root frequencies"));
896  }
897  }
898  else if (param == "Model")
899  {
900  vector<string> vs1 = tl->getSubstitutionModelParameters().getParameterNames();
902  if (nhtl != NULL)
903  {
904  vector<string> vs2 = nhtl->getRootFrequenciesParameters().getParameterNames();
905  VectorTools::diff(vs1, vs2, parNames2);
906  }
907  else
908  parNames2 = vs1;
909  }
910  else if (param.find("*") != string::npos)
911  parNames2 = ApplicationTools::matchingParameters(param, parToEstNames);
912  else
913  parNames2.push_back(param);
914 
915 
916  for (size_t i = 0; i < parNames2.size(); i++)
917  {
918  Parameter& par = parametersToEstimate.getParameter(parNames2[i]);
919  if (par.hasConstraint())
920  {
921  par.setConstraint(ic & (*par.getConstraint()), true);
922  if (par.getConstraint()->isEmpty())
923  throw Exception("Empty interval for parameter " + parNames[i] + par.getConstraint()->getDescription());
924  }
925  else
926  par.setConstraint(ic.clone(), true);
927 
928  if (verbose)
929  ApplicationTools::displayResult("Parameter constrained " + par.getName(), par.getConstraint()->getDescription());
930  }
931  }
932  catch (ParameterNotFoundException& pnfe)
933  {
934  ApplicationTools::displayWarning("Parameter '" + pnfe.getParameter() + "' not found, and so can't be constrained!");
935  }
936  catch (ConstraintException& pnfe)
937  {
938  throw Exception("Parameter '" + param + "' does not fit the constraint " + constraint);
939  }
940  }
941 
942 
943  // /////
944  // / optimization options
945 
946  unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", params, 1000000, suffix, suffixIsOptional, warn + 1);
947  if (verbose)
948  ApplicationTools::displayResult("Max # ML evaluations", TextTools::toString(nbEvalMax));
949 
950  double tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", params, .000001, suffix, suffixIsOptional, warn + 1);
951  if (verbose)
952  ApplicationTools::displayResult("Tolerance", TextTools::toString(tolerance));
953 
954  // Backing up or restoring?
955  unique_ptr<BackupListener> backupListener;
956  string backupFile = ApplicationTools::getAFilePath("optimization.backup.file", params, false, false, suffix, suffixIsOptional, "none", warn + 1);
957  if (backupFile != "none")
958  {
959  ApplicationTools::displayResult("Parameters will be backup to", backupFile);
960  backupListener.reset(new BackupListener(backupFile));
961  if (FileTools::fileExists(backupFile))
962  {
963  ApplicationTools::displayMessage("A backup file was found! Try to restore parameters from previous run...");
964  ifstream bck(backupFile.c_str(), ios::in);
965  vector<string> lines = FileTools::putStreamIntoVectorOfStrings(bck);
966  double fval = TextTools::toDouble(lines[0].substr(5));
967  ParameterList pl = tl->getParameters();
968  for (size_t l = 1; l < lines.size(); ++l)
969  {
970  if (!TextTools::isEmpty(lines[l]))
971  {
972  StringTokenizer stp(lines[l], "=");
973  if (stp.numberOfRemainingTokens() != 2)
974  {
975  cerr << "Corrupted backup file!!!" << endl;
976  cerr << "at line " << l << ": " << lines[l] << endl;
977  }
978  string pname = stp.nextToken();
979  string pvalue = stp.nextToken();
980  try {
981  size_t p = pl.whichParameterHasName(pname);
982  pl.setParameter(p, AutoParameter(pl[p]));
983  pl[p].setValue(TextTools::toDouble(pvalue));
984  }
985  catch(Exception& e)
986  {
987  }
988  }
989  }
990  bck.close();
991  tl->setParameters(pl);
992  if (abs(tl->getValue() - fval) > 0.000001)
993  ApplicationTools::displayWarning("Warning, incorrect likelihood value after restoring from backup file.");
994  ApplicationTools::displayResult("Restoring log-likelihood", -fval);
995  }
996  }
997 
998  // There it goes...
999  bool optimizeTopo = ApplicationTools::getBooleanParameter("optimization.topology", params, false, suffix, suffixIsOptional, warn + 1);
1000  if (verbose)
1001  ApplicationTools::displayResult("Optimize topology", optimizeTopo ? "yes" : "no");
1002  string nniMethod = ApplicationTools::getStringParameter("optimization.topology.algorithm_nni.method", params, "phyml", suffix, suffixIsOptional, warn + 1);
1003  string nniAlgo;
1004  if (nniMethod == "fast")
1005  {
1006  nniAlgo = NNITopologySearch::FAST;
1007  }
1008  else if (nniMethod == "better")
1009  {
1010  nniAlgo = NNITopologySearch::BETTER;
1011  }
1012  else if (nniMethod == "phyml")
1013  {
1014  nniAlgo = NNITopologySearch::PHYML;
1015  }
1016  else
1017  throw Exception("Unknown NNI algorithm: '" + nniMethod + "'.");
1018 
1019 
1020  string order = ApplicationTools::getStringParameter("derivatives", optArgs, "Newton", "", true, warn + 1);
1021  string optMethodDeriv;
1022  if (order == "Gradient")
1023  {
1025  }
1026  else if (order == "Newton")
1027  {
1028  optMethodDeriv = OptimizationTools::OPTIMIZATION_NEWTON;
1029  }
1030  else if (order == "BFGS")
1031  {
1032  optMethodDeriv = OptimizationTools::OPTIMIZATION_BFGS;
1033  }
1034  else
1035  throw Exception("Unknown derivatives algorithm: '" + order + "'.");
1036  if (verbose)
1037  ApplicationTools::displayResult("Optimization method", optName);
1038  if (verbose)
1039  ApplicationTools::displayResult("Algorithm used for derivable parameters", order);
1040 
1041  // See if we should reparametrize:
1042  bool reparam = ApplicationTools::getBooleanParameter("optimization.reparametrization", params, false, suffix, suffixIsOptional, warn + 1);
1043  if (verbose)
1044  ApplicationTools::displayResult("Reparametrization", (reparam ? "yes" : "no"));
1045 
1046  // See if we should use a molecular clock constraint:
1047  string clock = ApplicationTools::getStringParameter("optimization.clock", params, "None", suffix, suffixIsOptional, warn + 1);
1048  if (clock != "None" && clock != "Global")
1049  throw Exception("Molecular clock option not recognized, should be one of 'Global' or 'None'.");
1050  bool useClock = (clock == "Global");
1051  if (useClock && optimizeTopo)
1052  throw Exception("PhylogeneticsApplicationTools::optimizeParameters. Cannot optimize topology with a molecular clock.");
1053  if (verbose)
1054  ApplicationTools::displayResult("Molecular clock", clock);
1055 
1056  unsigned int n = 0;
1057  if ((optName == "D-Brent") || (optName == "D-BFGS"))
1058  {
1059  // Uses Newton-Brent method or Newton-BFGS method
1060  string optMethodModel;
1061  if (optName == "D-Brent")
1062  optMethodModel = OptimizationTools::OPTIMIZATION_BRENT;
1063  else
1064  optMethodModel = OptimizationTools::OPTIMIZATION_BFGS;
1065 
1066  unsigned int nstep = ApplicationTools::getParameter<unsigned int>("nstep", optArgs, 1, "", true, warn + 1);
1067 
1068  if (optimizeTopo)
1069  {
1070  bool optNumFirst = ApplicationTools::getBooleanParameter("optimization.topology.numfirst", params, true, suffix, suffixIsOptional, warn + 1);
1071  unsigned int topoNbStep = ApplicationTools::getParameter<unsigned int>("optimization.topology.nstep", params, 1, suffix, suffixIsOptional, warn + 1);
1072  double tolBefore = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.before", params, 100, suffix, suffixIsOptional, warn + 1);
1073  double tolDuring = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.during", params, 100, suffix, suffixIsOptional, warn + 1);
1075  dynamic_cast<NNIHomogeneousTreeLikelihood*>(tl), parametersToEstimate,
1076  optNumFirst, tolBefore, tolDuring, nbEvalMax, topoNbStep, messageHandler, profiler,
1077  reparam, optVerbose, optMethodDeriv, nstep, nniAlgo);
1078  }
1079 
1080  if (verbose && nstep > 1)
1081  ApplicationTools::displayResult("# of precision steps", TextTools::toString(nstep));
1082  parametersToEstimate.matchParametersValues(tl->getParameters());
1084  dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*>(tl), parametersToEstimate,
1085  backupListener.get(), nstep, tolerance, nbEvalMax, messageHandler, profiler, reparam, optVerbose, optMethodDeriv, optMethodModel);
1086  }
1087  else if (optName == "FullD")
1088  {
1089  // Uses Newton-raphson algorithm with numerical derivatives when required.
1090 
1091  if (optimizeTopo)
1092  {
1093  bool optNumFirst = ApplicationTools::getBooleanParameter("optimization.topology.numfirst", params, true, suffix, suffixIsOptional, warn + 1);
1094  unsigned int topoNbStep = ApplicationTools::getParameter<unsigned int>("optimization.topology.nstep", params, 1, suffix, suffixIsOptional, warn + 1);
1095  double tolBefore = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.before", params, 100, suffix, suffixIsOptional, warn + 1);
1096  double tolDuring = ApplicationTools::getDoubleParameter("optimization.topology.tolerance.during", params, 100, suffix, suffixIsOptional, warn + 1);
1098  dynamic_cast<NNIHomogeneousTreeLikelihood*>(tl), parametersToEstimate,
1099  optNumFirst, tolBefore, tolDuring, nbEvalMax, topoNbStep, messageHandler, profiler,
1100  reparam, optVerbose, optMethodDeriv, nniAlgo);
1101  }
1102 
1103  parametersToEstimate.matchParametersValues(tl->getParameters());
1105  dynamic_cast<DiscreteRatesAcrossSitesTreeLikelihood*>(tl), parametersToEstimate,
1106  backupListener.get(), tolerance, nbEvalMax, messageHandler, profiler, reparam, useClock, optVerbose, optMethodDeriv);
1107  }
1108  else
1109  throw Exception("Unknown optimization method: " + optName);
1110 
1111  string finalMethod = ApplicationTools::getStringParameter("optimization.final", params, "none", suffix, suffixIsOptional, warn + 1);
1112  Optimizer* finalOptimizer = 0;
1113  if (finalMethod == "none")
1114  {}
1115  else if (finalMethod == "simplex")
1116  {
1117  finalOptimizer = new DownhillSimplexMethod(tl);
1118  }
1119  else if (finalMethod == "powell")
1120  {
1121  finalOptimizer = new PowellMultiDimensions(tl);
1122  }
1123  else
1124  throw Exception("Unknown final optimization method: " + finalMethod);
1125 
1126  if (finalOptimizer)
1127  {
1128  parametersToEstimate.matchParametersValues(tl->getParameters());
1129  if (verbose)
1130  ApplicationTools::displayResult("Final optimization step", finalMethod);
1131  finalOptimizer->setProfiler(profiler);
1132  finalOptimizer->setMessageHandler(messageHandler);
1133  finalOptimizer->setMaximumNumberOfEvaluations(nbEvalMax);
1134  finalOptimizer->getStopCondition()->setTolerance(tolerance);
1135  finalOptimizer->setVerbose(verbose);
1137  finalOptimizer->init(parametersToEstimate);
1138  finalOptimizer->optimize();
1139  n += finalOptimizer->getNumberOfEvaluations();
1140  delete finalOptimizer;
1141  }
1142 
1143  if (verbose)
1144  ApplicationTools::displayResult("Performed", TextTools::toString(n) + " function evaluations.");
1145  if (backupFile != "none")
1146  {
1147  string bf=backupFile+".def";
1148  rename(backupFile.c_str(),bf.c_str());
1149  }
1150  return tl;
1151 }
1152 
1153 /******************************************************************************/
1154 
1157  const ParameterList& parameters,
1158  map<string, string>& params,
1159  const string& suffix,
1160  bool suffixIsOptional,
1161  bool verbose,
1162  int warn)
1163 {
1164  string optimization = ApplicationTools::getStringParameter("optimization", params, "FullD(derivatives=Newton)", suffix, suffixIsOptional, warn);
1165  if (optimization == "None")
1166  return;
1167  string optName;
1168  map<string, string> optArgs;
1169  KeyvalTools::parseProcedure(optimization, optName, optArgs);
1170 
1171  unsigned int optVerbose = ApplicationTools::getParameter<unsigned int>("optimization.verbose", params, 2, suffix, suffixIsOptional, warn + 1);
1172 
1173  string mhPath = ApplicationTools::getAFilePath("optimization.message_handler", params, false, false, suffix, suffixIsOptional, "none", warn + 1);
1174  OutputStream* messageHandler =
1175  (mhPath == "none") ? 0 :
1176  (mhPath == "std") ? ApplicationTools::message.get() :
1177  new StlOutputStream(new ofstream(mhPath.c_str(), ios::out));
1178  if (verbose)
1179  ApplicationTools::displayResult("Message handler", mhPath);
1180 
1181  string prPath = ApplicationTools::getAFilePath("optimization.profiler", params, false, false, suffix, suffixIsOptional, "none", warn + 1);
1182  OutputStream* profiler =
1183  (prPath == "none") ? 0 :
1184  (prPath == "std") ? ApplicationTools::message.get() :
1185  new StlOutputStream(new ofstream(prPath.c_str(), ios::out));
1186  if (profiler)
1187  profiler->setPrecision(20);
1188  if (verbose)
1189  ApplicationTools::displayResult("Profiler", prPath);
1190 
1191  ParameterList parametersToEstimate = parameters;
1192 
1193  // Should I ignore some parameters?
1194  if (params.find("optimization.ignore_parameter") != params.end())
1195  throw Exception("optimization.ignore_parameter is deprecated, use optimization.ignore_parameters instead!");
1196  string paramListDesc = ApplicationTools::getStringParameter("optimization.ignore_parameters", params, "", suffix, suffixIsOptional, warn + 1);
1197  StringTokenizer st(paramListDesc, ",");
1198  while (st.hasMoreToken())
1199  {
1200  try
1201  {
1202  string param = st.nextToken();
1203  if (param == "BrLen")
1204  {
1205  vector<string> vs = tl->getBranchLengthsParameters().getParameterNames();
1206  parametersToEstimate.deleteParameters(vs);
1207  if (verbose)
1208  ApplicationTools::displayResult("Parameter ignored", string("Branch lengths"));
1209  }
1210  else if (param == "Ancient")
1211  {
1213  if (!nhtl)
1214  ApplicationTools::displayWarning("The 'Ancient' parameters do not exist in homogeneous models, and will be ignored.");
1215  else
1216  {
1217  vector<string> vs = nhtl->getRootFrequenciesParameters().getParameterNames();
1218  parametersToEstimate.deleteParameters(vs);
1219  }
1220  if (verbose)
1221  ApplicationTools::displayResult("Parameter ignored", string("Root frequencies"));
1222  }
1223  else
1224  {
1225  parametersToEstimate.deleteParameter(param);
1226  if (verbose)
1227  ApplicationTools::displayResult("Parameter ignored", param);
1228  }
1229  }
1230  catch (ParameterNotFoundException& pnfe)
1231  {
1232  ApplicationTools::displayError("Parameter '" + pnfe.getParameter() + "' not found, and so can't be ignored!");
1233  }
1234  }
1235 
1236  unsigned int nbEvalMax = ApplicationTools::getParameter<unsigned int>("optimization.max_number_f_eval", params, 1000000, suffix, suffixIsOptional, warn + 1);
1237  if (verbose)
1238  ApplicationTools::displayResult("Max # ML evaluations", TextTools::toString(nbEvalMax));
1239 
1240  double tolerance = ApplicationTools::getDoubleParameter("optimization.tolerance", params, .000001, suffix, suffixIsOptional, warn + 1);
1241  if (verbose)
1242  ApplicationTools::displayResult("Tolerance", TextTools::toString(tolerance));
1243 
1244  string order = ApplicationTools::getStringParameter("derivatives", optArgs, "Gradient", "", true, warn + 1);
1245  string optMethod, derMethod;
1246  if (order == "Gradient")
1247  {
1249  }
1250  else if (order == "Newton")
1251  {
1253  }
1254  else
1255  throw Exception("Option '" + order + "' is not known for 'optimization.method.derivatives'.");
1256  if (verbose)
1257  ApplicationTools::displayResult("Optimization method", optName);
1258  if (verbose)
1259  ApplicationTools::displayResult("Algorithm used for derivable parameters", order);
1260 
1261  // Backing up or restoring?
1262  unique_ptr<BackupListener> backupListener;
1263  string backupFile = ApplicationTools::getAFilePath("optimization.backup.file", params, false, false, suffix, suffixIsOptional, "none", warn + 1);
1264  if (backupFile != "none")
1265  {
1266  ApplicationTools::displayResult("Parameters will be backup to", backupFile);
1267  backupListener.reset(new BackupListener(backupFile));
1268  if (FileTools::fileExists(backupFile))
1269  {
1270  ApplicationTools::displayMessage("A backup file was found! Try to restore parameters from previous run...");
1271  ifstream bck(backupFile.c_str(), ios::in);
1272  vector<string> lines = FileTools::putStreamIntoVectorOfStrings(bck);
1273  double fval = TextTools::toDouble(lines[0].substr(5));
1274  ParameterList pl = tl->getParameters();
1275  for (size_t l = 1; l < lines.size(); ++l)
1276  {
1277  if (!TextTools::isEmpty(lines[l]))
1278  {
1279  StringTokenizer stp(lines[l], "=");
1280  if (stp.numberOfRemainingTokens() != 2)
1281  {
1282  cerr << "Corrupted backup file!!!" << endl;
1283  cerr << "at line " << l << ": " << lines[l] << endl;
1284  }
1285  string pname = stp.nextToken();
1286  string pvalue = stp.nextToken();
1287  size_t p = pl.whichParameterHasName(pname);
1288  pl.setParameter(p, AutoParameter(pl[p]));
1289  pl[p].setValue(TextTools::toDouble(pvalue));
1290  }
1291  }
1292  bck.close();
1293  tl->setParameters(pl);
1294  if (abs(tl->getValue() - fval) > 0.000001)
1295  throw Exception("Incorrect likelihood value after restoring, from backup file. Remove backup file and start from scratch :s");
1296  ApplicationTools::displayResult("Restoring log-likelihood", -fval);
1297  }
1298  }
1299 
1300  size_t n = 0;
1301  if (optName == "D-Brent")
1302  {
1303  // Uses Newton-Brent method:
1304  unsigned int nstep = ApplicationTools::getParameter<unsigned int>("nstep", optArgs, 1, "", true, warn + 1);
1305  if (verbose && nstep > 1)
1306  ApplicationTools::displayResult("# of precision steps", TextTools::toString(nstep));
1308  tl,
1309  parametersToEstimate,
1310  backupListener.get(),
1311  nstep,
1312  tolerance,
1313  nbEvalMax,
1314  messageHandler,
1315  profiler,
1316  optVerbose,
1317  optMethod);
1318  }
1319  else if (optName == "FullD")
1320  {
1321  // Uses Newton-raphson alogrithm with numerical derivatives when required.
1323  tl,
1324  parametersToEstimate,
1325  backupListener.get(),
1326  tolerance,
1327  nbEvalMax,
1328  messageHandler,
1329  profiler,
1330  optVerbose,
1331  optMethod);
1332  }
1333  else
1334  throw Exception("Unknown optimization method: " + optName);
1335 
1336  string finalMethod = ApplicationTools::getStringParameter("optimization.final", params, "none", suffix, suffixIsOptional, warn + 1);
1337  Optimizer* finalOptimizer = 0;
1338  if (finalMethod == "none")
1339  {}
1340  else if (finalMethod == "simplex")
1341  {
1342  finalOptimizer = new DownhillSimplexMethod(tl);
1343  }
1344  else if (finalMethod == "powell")
1345  {
1346  finalOptimizer = new PowellMultiDimensions(tl);
1347  }
1348  else
1349  throw Exception("Unknown final optimization method: " + finalMethod);
1350 
1351  if (finalOptimizer)
1352  {
1353  parametersToEstimate.matchParametersValues(tl->getParameters());
1354  ApplicationTools::displayResult("Final optimization step", finalMethod);
1355  finalOptimizer->setProfiler(profiler);
1356  finalOptimizer->setMessageHandler(messageHandler);
1357  finalOptimizer->setMaximumNumberOfEvaluations(nbEvalMax);
1358  finalOptimizer->getStopCondition()->setTolerance(tolerance);
1359  finalOptimizer->setVerbose(verbose);
1361  finalOptimizer->init(parametersToEstimate);
1362  finalOptimizer->optimize();
1363  n += finalOptimizer->getNumberOfEvaluations();
1364  delete finalOptimizer;
1365  }
1366 
1367  if (verbose)
1368  ApplicationTools::displayResult("Performed", TextTools::toString(n) + " function evaluations.");
1369  if (backupFile != "none")
1370  {
1371  string bf=backupFile+".def";
1372  rename(backupFile.c_str(),bf.c_str());
1373  }
1374 }
1375 
1376 /******************************************************************************/
1377 
1379 {
1380  for (size_t i = 0; i < pl.size(); ++i)
1381  {
1382  const Constraint* constraint = pl[i].getConstraint();
1383  if (constraint)
1384  {
1385  double value = pl[i].getValue();
1386  if (!constraint->isCorrect(value - 1e-6) || !constraint->isCorrect(value + 1e-6))
1387  {
1388  ApplicationTools::displayWarning("This parameter has a value close to the boundary: " + pl[i].getName() + "(" + TextTools::toString(value) + ").");
1389  }
1390  }
1391  }
1392 }
1393 
1394 /******************************************************************************/
1395 
1397  const TreeTemplate<Node>& tree,
1398  map<string, string>& params,
1399  const string& prefix,
1400  const string& suffix,
1401  bool suffixIsOptional,
1402  bool verbose,
1403  bool checkOnly,
1404  int warn)
1405 {
1406  string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn);
1407  string file = ApplicationTools::getAFilePath(prefix + "tree.file", params, false, false, suffix, suffixIsOptional, "none", warn);
1408 
1409  BppOTreeWriterFormat bppoWriter(warn);
1410  unique_ptr<OTree> oTree(bppoWriter.read(format));
1411  if (verbose)
1412  {
1413  ApplicationTools::displayResult("Output tree file " + suffix, file);
1414  ApplicationTools::displayResult("Output tree format " + suffix, oTree->getFormatName());
1415  }
1416  if (!checkOnly && file != "none")
1417  oTree->write(tree, file, true);
1418 }
1419 
1420 /******************************************************************************/
1421 
1423  const vector<Tree*>& trees,
1424  map<string, string>& params,
1425  const string& prefix,
1426  const string& suffix,
1427  bool suffixIsOptional,
1428  bool verbose,
1429  bool checkOnly,
1430  int warn)
1431 {
1432  string format = ApplicationTools::getStringParameter(prefix + "trees.format", params, "Newick", suffix, suffixIsOptional, warn);
1433  string file = ApplicationTools::getAFilePath(prefix + "trees.file", params, true, false, suffix, suffixIsOptional, "none", warn);
1434 
1435  BppOMultiTreeWriterFormat bppoWriter(warn);
1436  unique_ptr<OMultiTree> oTrees(bppoWriter.read(format));
1437  if (verbose)
1438  {
1439  ApplicationTools::displayResult("Output trees file " + suffix, file);
1440  ApplicationTools::displayResult("Output trees format " + suffix, oTrees->getFormatName());
1441  }
1442  if (!checkOnly && file != "none")
1443  oTrees->write(trees, file, true);
1444 
1445 }
1446 
1447 /******************************************************************************/
1448 
1449 void PhylogeneticsApplicationTools::printParameters(const TransitionModel* model, OutputStream& out, int warn, bool withAlias)
1450 {
1451  out << "model=";
1452  map<string, string> globalAliases;
1453  vector<string> writtenNames;
1454  BppOSubstitutionModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
1455  bIO.write(*model, out, globalAliases, writtenNames);
1456  out.endLine();
1457 }
1458 
1459 /******************************************************************************/
1460 
1461 void PhylogeneticsApplicationTools::printParameters(const SubstitutionModelSet* modelSet, OutputStream& out, int warn, bool withAlias)
1462 {
1463  (out << "nonhomogeneous=general").endLine();
1464  (out << "nonhomogeneous.number_of_models=" << modelSet->getNumberOfModels()).endLine();
1465 
1466  if (modelSet->isStationary())
1467  (out << "nonhomogeneous.stationarity = yes");
1468 
1469  // Get the parameter links:
1470  map< size_t, vector<string> > modelLinks; // for each model index, stores the list of global parameters.
1471  map< string, set<size_t> > parameterLinks; // for each parameter name, stores the list of model indices, wich should be sorted.
1472  vector<string> writtenNames;
1473 
1474  // Loop over all models:
1475  for (size_t i = 0; i < modelSet->getNumberOfModels(); i++)
1476  {
1477  const TransitionModel* model = modelSet->getModel(i);
1478 
1479  map<string, string> aliases;
1480 
1481  // First get the aliases for this model:
1482 
1483  if (withAlias)
1484  {
1485  ParameterList pl = model->getParameters();
1486 
1487  for (size_t np = 0; np < pl.size(); np++)
1488  {
1489  string nfrom = modelSet->getFrom(pl[np].getName() + "_" + TextTools::toString(i + 1));
1490  if (nfrom != "")
1491  aliases[pl[np].getName()] = nfrom;
1492  }
1493  }
1494 
1495  // Now print it:
1496  writtenNames.clear();
1497  out.endLine() << "model" << (i + 1) << "=";
1498  BppOSubstitutionModelFormat bIOsm(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
1499  bIOsm.write(*model, out, aliases, writtenNames);
1500 
1501  out.endLine();
1502  vector<int> ids = modelSet->getNodesWithModel(i);
1503  out << "model" << (i + 1) << ".nodes_id=" << ids[0];
1504  for (size_t j = 1; j < ids.size(); ++j)
1505  {
1506  out << "," << ids[j];
1507  }
1508  out.endLine();
1509  }
1510 
1511  // First get the aliases for this frequencies set
1512 
1513  if (!modelSet->isStationary())
1514  {
1515 
1516  const FrequenciesSet* pFS = modelSet->getRootFrequenciesSet();
1517 
1518  ParameterList plf = pFS->getParameters();
1519 
1520  map<string, string> aliases;
1521 
1522  if (withAlias)
1523  {
1524  for (size_t np = 0; np < plf.size(); np++)
1525  {
1526  string nfrom = modelSet->getFrom(plf[np].getName());
1527  if (nfrom != "")
1528  aliases[plf[np].getName()] = nfrom;
1529  }
1530  }
1531 
1532  // Root frequencies:
1533  out.endLine();
1534  (out << "# Root frequencies:").endLine();
1535  out << "nonhomogeneous.root_freq=";
1536 
1538  bIO.write(pFS, out, aliases, writtenNames);
1539  }
1540 
1541 }
1542 
1543 /******************************************************************************/
1544 
1546 {
1547  out << "rate_distribution=";
1548  map<string, string> globalAliases;
1549  vector<string> writtenNames;
1551 
1552  bIO->write(*rDist, out, globalAliases, writtenNames);
1553  delete bIO;
1554  out.endLine();
1555 }
1556 
1557 /************************
1558 * Substitution Mapping *
1559 ************************/
1560 
1562  const Alphabet* alphabet,
1563  const SubstitutionModel* model,
1564  map<string, string>& params,
1565  string suffix,
1566  bool verbose,
1567  int warn)
1568 {
1569  SubstitutionCount* substitutionCount = 0;
1570  string nijtOption;
1571  map<string, string> nijtParams;
1572  string nijtText = ApplicationTools::getStringParameter("nijt", params, "Uniformization", suffix, true, warn);
1573  KeyvalTools::parseProcedure(nijtText, nijtOption, nijtParams);
1574 
1575  if (nijtOption == "Laplace")
1576  {
1577  size_t trunc = ApplicationTools::getParameter<size_t>("trunc", nijtParams, 10, suffix, true, warn + 1);
1578  substitutionCount = new LaplaceSubstitutionCount(model, trunc);
1579  }
1580  else if (nijtOption == "Uniformization")
1581  {
1582  string weightOption = ApplicationTools::getStringParameter("weight", nijtParams, "None", "", true, warn + 1);
1583  AlphabetIndex2* weights = SequenceApplicationTools::getAlphabetIndex2(alphabet, weightOption, "Substitution weight scheme:");
1584  substitutionCount = new UniformizationSubstitutionCount(model, new TotalSubstitutionRegister(model), weights);
1585  }
1586  else if (nijtOption == "Decomposition")
1587  {
1588  string weightOption = ApplicationTools::getStringParameter("weight", nijtParams, "None", "", true, warn + 1);
1589  AlphabetIndex2* weights = SequenceApplicationTools::getAlphabetIndex2(alphabet, weightOption, "Substitution weight scheme:");
1590  const ReversibleSubstitutionModel* revModel = dynamic_cast<const ReversibleSubstitutionModel*>(model);
1591  if (revModel)
1592  substitutionCount = new DecompositionSubstitutionCount(revModel, new TotalSubstitutionRegister(model), weights);
1593  else
1594  throw Exception("Decomposition method can only be used with reversible substitution models.");
1595  }
1596  else if (nijtOption == "Naive")
1597  {
1598  string weightOption = ApplicationTools::getStringParameter("weight", nijtParams, "None", "", true, warn + 1);
1599  AlphabetIndex2* weights = SequenceApplicationTools::getAlphabetIndex2(alphabet, weightOption, "Substitution weight scheme:");
1600  substitutionCount = new NaiveSubstitutionCount(model, new TotalSubstitutionRegister(model), false, weights);
1601  }
1602  else if (nijtOption == "Label")
1603  {
1604  substitutionCount = new LabelSubstitutionCount(model);
1605  }
1606  else if (nijtOption == "ProbOneJump")
1607  {
1608  substitutionCount = new OneJumpSubstitutionCount(model);
1609  }
1610  else
1611  {
1612  ApplicationTools::displayError("Invalid option '" + nijtOption + ", in 'nijt' parameter.");
1613  exit(-1);
1614  }
1615  ApplicationTools::displayResult("Substitution count procedure", nijtOption);
1616 
1617  // Send results:
1618  return substitutionCount;
1619 }
1620 
1621 /****************************************************************************/
1622 
1623 
1624 SubstitutionRegister* PhylogeneticsApplicationTools::getSubstitutionRegister(const std::string& regTypeDesc, const SubstitutionModel* model, bool verbose)
1625 {
1626  string regType = "";
1627  map<string, string> regArgs;
1628  KeyvalTools::parseProcedure(regTypeDesc, regType, regArgs);
1629 
1630  SubstitutionRegister* reg = 0;
1631 
1632  if (regType=="Combination")
1633  {
1635 
1636  size_t i = 0;
1637  while (++i)
1638  {
1639  string regDesc = ApplicationTools::getStringParameter("reg" + TextTools::toString(i), regArgs, "", "", false, 1);
1640  if (regDesc=="")
1641  break;
1642 
1643  SubstitutionRegister* sreg=getSubstitutionRegister(regDesc, model);
1644 
1645  vreg->addRegister(sreg);
1646  }
1647 
1648  reg=vreg;
1649  }
1650  else if (regType == "All")
1651  {
1652  reg = new ComprehensiveSubstitutionRegister(model, false);
1653  }
1654  else if (regType == "Total")
1655  {
1656  reg = new TotalSubstitutionRegister(model);
1657  }
1658  else if (regType == "Selected"){
1659  string subsList = ApplicationTools::getStringParameter("substitution.list", regArgs, "All", "", true, false);
1660  reg = new SelectedSubstitutionRegister(model, subsList);
1661  }
1662 
1663 
1664  // Alphabet dependent registers
1665 
1666 
1667  else if (AlphabetTools::isNucleicAlphabet(model->getAlphabet()))
1668  {
1669  const NucleotideSubstitutionModel* nmodel=dynamic_cast<const NucleotideSubstitutionModel*>(model);
1670  if (!nmodel)
1671  {
1672  const WrappedSubstitutionModel* wmodel=dynamic_cast<const WrappedSubstitutionModel*>(model);
1673  if (wmodel)
1674  {
1675  nmodel=dynamic_cast<const NucleotideSubstitutionModel*>(&wmodel->getSubstitutionModel());
1676  }
1677  }
1678 
1679  if (!nmodel)
1680  throw Exception("PhylogeneticsApplicationTools::getSubstitutionRegister : model and alphabet do not fit " + model->getAlphabet()->getAlphabetType() + " vs " + model->getName());
1681 
1682 
1683  if (regType == "GC")
1684  reg = new GCSubstitutionRegister(nmodel, false);
1685  else if (regType == "TsTv")
1686  reg = new TsTvSubstitutionRegister(nmodel);
1687  else if (regType == "SW")
1688  reg = new SWSubstitutionRegister(nmodel);
1689  else
1690  throw Exception("Unsupported substitution categorization: " + regType + " for alphabet " + model->getAlphabet()->getAlphabetType());
1691  }
1692 
1693  else if (AlphabetTools::isCodonAlphabet(model->getAlphabet()))
1694  {
1695  const CodonSubstitutionModel* cmodel=dynamic_cast<const CodonSubstitutionModel*>(model);
1696  if (!cmodel)
1697  {
1698  const WrappedSubstitutionModel* wmodel=dynamic_cast<const WrappedSubstitutionModel*>(model);
1699  if (wmodel)
1700  {
1701  cmodel=dynamic_cast<const CodonSubstitutionModel*>(&wmodel->getSubstitutionModel());
1702  }
1703  }
1704 
1705  if (!cmodel)
1706  throw Exception("PhylogeneticsApplicationTools::getSubstitutionRegister : model and alphabet do not fit " + model->getAlphabet()->getAlphabetType() + " vs " + model->getName());
1707 
1708  if (regType == "IntraAA")
1709  reg = new AAInteriorSubstitutionRegister(cmodel);
1710  else if (regType == "InterAA")
1711  reg = new AAExteriorSubstitutionRegister(cmodel);
1712  else if (regType == "GC")
1713  reg = new GCSynonymousSubstitutionRegister(cmodel);
1714  else if (regType == "TsTv")
1715  reg = new TsTvSubstitutionRegister(cmodel);
1716  else if (regType == "SW")
1717  reg = new SWSubstitutionRegister(cmodel);
1718  else if (regType == "KrKc")
1719  reg = new KrKcSubstitutionRegister(cmodel);
1720  else if (regType == "DnDs")
1721  reg = new DnDsSubstitutionRegister(cmodel, false);
1722  else
1723  throw Exception("Unsupported substitution categorization: " + regType + " for alphabet " + model->getAlphabet()->getAlphabetType());
1724  }
1725 
1726  else if (AlphabetTools::isProteicAlphabet(model->getAlphabet()))
1727  {
1728  const ProteinSubstitutionModel* pmodel=dynamic_cast<const ProteinSubstitutionModel*>(model);
1729  if (!pmodel)
1730  {
1731  const WrappedSubstitutionModel* wmodel=dynamic_cast<const WrappedSubstitutionModel*>(model);
1732  if (wmodel)
1733  {
1734  pmodel=dynamic_cast<const ProteinSubstitutionModel*>(&wmodel->getSubstitutionModel());
1735  }
1736  }
1737 
1738  if (!pmodel)
1739  throw Exception("PhylogeneticsApplicationTools::getSubstitutionRegister : model and alphabet do not fit " + model->getAlphabet()->getAlphabetType() + " vs " + model->getName());
1740 
1741  if (regType == "KrKc")
1742  reg = new KrKcSubstitutionRegister(pmodel);
1743  else
1744  throw Exception("Unsupported substitution categorization: " + regType + " for alphabet " + model->getAlphabet()->getAlphabetType());
1745  }
1746 
1748  if (csr)
1749  csr->setStationarity(ApplicationTools::getBooleanParameter("stationarity", regArgs, true));
1750 
1751  if (verbose)
1752  ApplicationTools::displayResult("Substitution Register", regType);
1753 
1754  return reg;
1755 }
1756 
const std::map< std::string, std::string > & getUnparsedArguments() const
virtual size_t whichParameterHasName(const std::string &name) const
static Tree * getTree(std::map< std::string, std::string > &params, const std::string &prefix="input.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a Tree object according to options.
std::string getFrom(const std::string &name) const
static unsigned int optimizeNumericalParameters2(DiscreteRatesAcrossSitesTreeLikelihood *tl, const ParameterList &parameters, OptimizationListener *listener=0, double tolerance=0.000001, unsigned int tlEvalMax=1000000, OutputStream *messageHandler=ApplicationTools::message.get(), OutputStream *profiler=ApplicationTools::message.get(), bool reparametrization=false, bool useClock=false, unsigned int verbose=1, const std::string &optMethodDeriv=OPTIMIZATION_NEWTON)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static SubstitutionRegister * getSubstitutionRegister(const std::string &regTypeDesc, const SubstitutionModel *model, bool verbose=true)
Get a Register instance.
virtual bool matchParametersValues(const ParameterList &parameters)=0
static unsigned int optimizeTreeScale(TreeLikelihood *tl, double tolerance=0.000001, unsigned int tlEvalMax=1000000, OutputStream *messageHandler=ApplicationTools::message.get(), OutputStream *profiler=ApplicationTools::message.get(), unsigned int verbose=1)
Optimize the scale of a TreeLikelihood.
Substitution models manager for non-homogeneous / non-reversible models of evolution.
static bool parameterExists(const std::string &parameterName, const std::map< std::string, std::string > &params)
virtual void setFreq(std::map< int, double > &frequencies)=0
Set equilibrium frequencies.
Interface for all substitution models.
static void displayWarning(const std::string &text)
void addRegister(SubstitutionRegister *reg)
virtual std::vector< std::string > getParameterNames() const
Transition model I/O in BppO format.
virtual void setMessageHandler(OutputStream *mh)=0
static unsigned int optimizeNumericalParametersWithGlobalClock(DiscreteRatesAcrossSitesClockTreeLikelihood *cl, const ParameterList &parameters, OptimizationListener *listener=0, unsigned int nstep=1, double tolerance=0.000001, unsigned int tlEvalMax=1000000, OutputStream *messageHandler=ApplicationTools::message.get(), OutputStream *profiler=ApplicationTools::message.get(), unsigned int verbose=1, const std::string &optMethodDeriv=OPTIMIZATION_GRADIENT)
Optimize numerical parameters assuming a global clock (branch heights, substitution model & rate dist...
static void printParameters(const TransitionModel *model, OutputStream &out, int warn=1, bool withAlias=true)
Output a SubstitutionModel description to a file.
static const std::string FAST
Sets a Register based on a vector of Registers. The categories are intersection of categories of thos...
void write(const FrequenciesSet *pfreqset, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
Write a substitution model to a stream.
Analytical substitution count using the eigen decomposition method.
virtual void setParameters(const ParameterList &parameters)=0
static void writeTree(const TreeTemplate< Node > &tree, std::map< std::string, std::string > &params, const std::string &prefix="output.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, bool checkOnly=false, int warn=1)
Write a tree according to options.
Frequencies set I/O in BppO format.
virtual bool hasConstraint() const
static SubstitutionModelSet * getSubstitutionModelSet(const Alphabet *alphabet, const GeneticCode *gcode, const SiteContainer *data, std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Gets a SubstitutionModelSet object according to options.
Distinguishes AT<->GC from GC<->AT.
static std::vector< std::string > matchingParameters(const std::string &pattern, const std::map< std::string, std::string > &params)
virtual ParameterList getBranchLengthsParameters() const =0
Get the branch lengths parameters.
TransitionModel * readTransitionModel(const Alphabet *alphabet, const std::string &modelDescription, const SiteContainer *data=0, bool parseArguments=true)
static void setSubstitutionModelParametersInitialValuesWithAliases(TransitionModel &model, std::map< std::string, std::string > &unparsedParameterValues, size_t modelNumber, const SiteContainer *data, std::map< std::string, std::string > &sharedParams, bool verbose)
Set parameter initial values of a given model in a set according to options.
virtual void setFreqFromData(const SequenceContainer &data, double pseudoCount=0)=0
Set equilibrium frequencies equal to the frequencies estimated from the data.
static unsigned int optimizeNumericalParameters(DiscreteRatesAcrossSitesTreeLikelihood *tl, const ParameterList &parameters, OptimizationListener *listener=0, unsigned int nstep=1, double tolerance=0.000001, unsigned int tlEvalMax=1000000, OutputStream *messageHandler=ApplicationTools::message.get(), OutputStream *profiler=ApplicationTools::message.get(), bool reparametrization=false, unsigned int verbose=1, const std::string &optMethodDeriv=OPTIMIZATION_NEWTON, const std::string &optMethodModel=OPTIMIZATION_BRENT)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
void setRootFrequencies(FrequenciesSet *rootFreqs)
Sets a given FrequenciesSet for root frequencies.
const std::string & nextToken()
virtual bool matchParametersValues(const ParameterList &params, std::vector< size_t > *updatedParameters=0)
virtual double getParameterValue(const std::string &name) const =0
virtual const std::string & getName() const
size_t numberOfRemainingTokens() const
Count conservative and radical amino-acid substitutions.
virtual ParameterList getRootFrequenciesParameters() const =0
virtual void deleteParameters(const std::vector< std::string > &names, bool mustExist=true)
virtual double optimize()=0
virtual std::string getNamespace() const =0
The SubstitutionRegister interface.
The TreeLikelihood interface.
virtual unsigned int getSize() const =0
static void displayResult(const std::string &text, const T &result)
virtual const ParameterList & getParameters() const =0
Specialized interface for protein substitution model.
STL namespace.
static SubstitutionModel * getSubstitutionModel(const Alphabet *alphabet, const GeneticCode *gCode, const SiteContainer *data, std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a SubstitutionModel object according to options.
void write(const TransitionModel &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
Write a substitution model to a stream.
const std::map< std::string, std::string > & getUnparsedArguments() const
The phylogenetic tree class.
static std::string OPTIMIZATION_GRADIENT
static FrequenciesSet * getFrequenciesSet(const Alphabet *alphabet, const GeneticCode *gCode, const std::string &freqDescription, const SiteContainer *data, std::map< std::string, std::string > &sharedParams, const std::vector< double > &rateFreqs, bool verbose=true, int warn=1)
Get A FrequenciesSet object according to options.
double toDouble(const std::string &s, char dec= '.', char scientificNotation= 'e')
static unsigned int optimizeNumericalParametersWithGlobalClock2(DiscreteRatesAcrossSitesClockTreeLikelihood *cl, const ParameterList &parameters, OptimizationListener *listener=0, double tolerance=0.000001, unsigned int tlEvalMax=1000000, OutputStream *messageHandler=ApplicationTools::message.get(), OutputStream *profiler=ApplicationTools::message.get(), unsigned int verbose=1, const std::string &optMethodDeriv=OPTIMIZATION_GRADIENT)
Optimize numerical parameters assuming a global clock (branch heights, substitution model & rate dist...
virtual void deleteParameter(const std::string &name)
const FrequenciesSet * getRootFrequenciesSet() const
IMultiTree * read(const std::string &description)
Read a IMultiTree object from a string.
Interface for phylogenetic tree objects.
Definition: Tree.h:148
Tree I/O in BppO format.
FrequenciesSet to be used with a Markov-modulated substitution model.
OTree * read(const std::string &description)
Read a OTree object from a string.
virtual void setTolerance(double tolerance)=0
const TransitionModel * getModel(size_t i) const
Get one model from the set knowing its index.
static AlphabetIndex2 * getAlphabetIndex2(const Alphabet *alphabet, const std::string &description, const std::string &message="Alphabet distance:", bool verbose=true)
virtual bool isCorrect(double value) const =0
Indexes only intra amino-acid substitutions. Every group represents a substitutions for the same amin...
virtual OutputStream & setPrecision(int digit)=0
static void diff(std::vector< T > &v1, std::vector< T > &v2, std::vector< T > &v3)
virtual const SubstitutionModel & getSubstitutionModel() const =0
Indexes only substitutions between amino-acids. Groups are distinguished by their direction...
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static std::vector< std::string > putStreamIntoVectorOfStrings(std::istream &input)
The CategorySubstitutionRegisters.
static MultipleDiscreteDistribution * getMultipleDistributionDefaultInstance(const std::string &distDescription, std::map< std::string, std::string > &unparsedParameterValues, bool verbose=true)
Build a multi-dimension distribution as a MultipleDiscreteDistribution object with default parameter ...
static std::vector< Tree * > getTrees(std::map< std::string, std::string > &params, const std::string &prefix="input.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a list ofTree objects according to options.
void clear()
Resets all the information contained in this object.
static std::shared_ptr< OutputStream > warning
static bool isCodonAlphabet(const Alphabet *alphabet)
Interface for reversible substitution models.
virtual void setMessageHandler(OutputStream *mh)
Class inheriting from GeneralSubstitutionRegister, this one uses a special constructor which allows i...
void addToHyperNode(size_t nM, const Vint &vnS, int nH=-1)
ITree * read(const std::string &description)
Read a ITree object from a string.
std::vector< int > Vint
virtual const Constraint * getConstraint() const
static void completeMixedSubstitutionModelSet(MixedSubstitutionModelSet &mixedModelSet, const Alphabet *alphabet, const SiteContainer *data, std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Complete a MixedSubstitutionModelSet object according to options, given this model has already been f...
DiscreteDistribution * read(const std::string &distDescription, bool parseArguments)
static bool isNucleicAlphabet(const Alphabet *alphabet)
virtual std::string getDescription() const =0
virtual std::string getName() const =0
virtual void setConstraint(Constraint *constraint, bool attach=false)
Distinguishes synonymous from non-synonymous substitutions.
Computes the probability that at least one jump occured on a branch, given the initial and final stat...
static std::string OPTIMIZATION_BRENT
Substitution models manager for Mixed Substitution Models. This class inherits from SubstitutionModel...
FrequenciesSet * read(const Alphabet *alphabet, const std::string &freqDescription, const SiteContainer *data, bool parseArguments=true)
Read a frequencies set from a string.
static std::shared_ptr< OutputStream > message
Interface for likelihood computation with a global clock and rate across sites variation.
Rate Distribution I/O in BppO format.
Distinguishes transitions from transversions.
virtual const ParameterList & getIndependentParameters() const =0
void write(const DiscreteDistribution &dist, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
static TransitionModel * getTransitionModel(const Alphabet *alphabet, const GeneticCode *gCode, const SiteContainer *data, std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
static TreeLikelihood * optimizeParameters(TreeLikelihood *tl, const ParameterList &parameters, std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Optimize parameters according to options.
Analytical (weighted) substitution count using the uniformization method.
Labelling substitution count.
virtual const Alphabet * getAlphabet() const =0
The SubstitutionsCount interface.
static void displayMessage(const std::string &text)
virtual unsigned int getNumberOfEvaluations() const =0
virtual OutputStream & endLine()=0
static std::string getAFilePath(const std::string &parameter, const std::map< std::string, std::string > &params, bool isRequired=true, bool mustExist=true, const std::string &suffix="", bool suffixIsOptional=false, const std::string &defaultPath="none", int warn=0)
virtual void setParameter(size_t index, const Parameter &param)
static bool getBooleanParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, bool defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
size_t size() const
virtual void setVerbose(unsigned int v)=0
virtual bool isEmpty() const =0
virtual std::string getParameterNameWithoutNamespace(const std::string &name) const =0
Substitution model I/O in BppO format.
virtual void setMaximumNumberOfEvaluations(unsigned int max)=0
static const std::string BETTER
static const std::string PHYML
void setGeneticCode(const GeneticCode *gCode)
Set the genetic code to use in case a codon frequencies set should be built.
static void setSubstitutionModelSet(SubstitutionModelSet &modelSet, const Alphabet *alphabet, const GeneticCode *gcode, const SiteContainer *data, std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Sets a SubstitutionModelSet object according to options.
Specialisation interface for nucleotide substitution model.
bool hasMoreToken() const
Specialization of the TreeLikelihood interface for the branch non-homogeneous and non-stationary mode...
virtual Vint getSubmodelNumbers(const std::string &desc) const =0
const SubstitutionModel * getSubstitutionModel(size_t i) const
Return a markovian substitution model (or null)
static void displayBooleanResult(const std::string &text, bool result)
virtual std::string getName() const =0
Get the name of the model.
static std::string CONSTRAINTS_AUTO
static bool fileExists(const std::string &filename)
static std::string getStringParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, const std::string &defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
static NNIHomogeneousTreeLikelihood * optimizeTreeNNI(NNIHomogeneousTreeLikelihood *tl, const ParameterList &parameters, bool optimizeNumFirst=true, double tolBefore=100, double tolDuring=100, unsigned int tlEvalMax=1000000, unsigned int numStep=1, OutputStream *messageHandler=ApplicationTools::message.get(), OutputStream *profiler=ApplicationTools::message.get(), bool reparametrization=false, unsigned int verbose=1, const std::string &optMethod=OptimizationTools::OPTIMIZATION_NEWTON, unsigned int nStep=1, const std::string &nniMethod=NNITopologySearch::PHYML)
Optimize all parameters from a TreeLikelihood object, including tree topology using Nearest Neighbor ...
SubstitutionModel * read(const Alphabet *alphabet, const std::string &modelDescription, const SiteContainer *data=0, bool parseArguments=true)
Read a substitution model from a string.
Distinguishes substitutions given the link between the changed nucleotides : S for strong (GC) and W ...
Distinguishes all types of substitutions.
void aliasParameters(const std::string &p1, const std::string &p2)
virtual std::string getParameter() const
virtual void setProfiler(OutputStream *profiler)=0
static void checkEstimatedParameters(const ParameterList &pl)
Check if parameter values are close to their definition boundary.
void setGeneticCode(const GeneticCode *gCode)
Set the genetic code to use in case a codon frequencies set should be built.
virtual void setNamespace(const std::string &prefix)=0
void addModel(TransitionModel *model, const std::vector< int > &nodesId)
Add a new model to the set, and set relationships with nodes and params.
bool isEmpty(const std::string &s)
std::string toString(T t)
Naive substitution count.
A frequencies set used to estimate frequencies at the root with the COaLA model. Frequencies at the r...
virtual void init(const ParameterList &params)=0
static std::string OPTIMIZATION_BFGS
static bool isWordAlphabet(const Alphabet *alphabet)
static bool isProteicAlphabet(const Alphabet *alphabet)
Laplace estimate of the substitution count.
static NNIHomogeneousTreeLikelihood * optimizeTreeNNI2(NNIHomogeneousTreeLikelihood *tl, const ParameterList &parameters, bool optimizeNumFirst=true, double tolBefore=100, double tolDuring=100, unsigned int tlEvalMax=1000000, unsigned int numStep=1, OutputStream *messageHandler=ApplicationTools::message.get(), OutputStream *profiler=ApplicationTools::message.get(), bool reparametrization=false, unsigned int verbose=1, const std::string &optMethod=OptimizationTools::OPTIMIZATION_NEWTON, const std::string &nniMethod=NNITopologySearch::PHYML)
Optimize all parameters from a TreeLikelihood object, including tree topology using Nearest Neighbor ...
Distinguishes AT->GC vs GC->AT inside synonymous substitutions on third codon position.
static DiscreteDistribution * getRateDistribution(std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true)
Build a DiscreteDistribution object according to options.
static void displayError(const std::string &text)
static SubstitutionCount * getSubstitutionCount(const Alphabet *alphabet, const SubstitutionModel *model, map< string, string > &params, string suffix="", bool verbose=true, int warn=1)
Get a SubstitutionCount instance.
static FrequenciesSet * getRootFrequenciesSet(const Alphabet *alphabet, const GeneticCode *gCode, const SiteContainer *data, std::map< std::string, std::string > &params, std::map< std::string, std::string > &sharedParams, const std::vector< double > &rateFreqs, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Get A FrequenciesSet object for root frequencies (NH models) according to options.
virtual ParameterList getSubstitutionModelParameters() const =0
Get the parameters associated to substitution model(s).
static std::string OPTIMIZATION_NEWTON
const std::vector< int > & getNodesWithModel(size_t i) const
Get a list of nodes id for which the given model is associated.
IntervalConstraint * clone() const
virtual double getValue() const =0
virtual void setConstraintPolicy(const std::string &constraintPolicy)=0
virtual std::string getAlphabetType() const =0
Tree I/O in BppO format.
virtual OptimizationStopCondition * getStopCondition()=0
OMultiTree * read(const std::string &description)
Read a OMultiTree object from a string.
Interface for Substitution models, defined as a mixture of "simple" substitution models.
static void writeTrees(const std::vector< Tree * > &trees, std::map< std::string, std::string > &params, const std::string &prefix="output.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, bool checkOnly=false, int warn=1)
Write a tree according to options.