bpp-core  2.1.0
 All Classes Namespaces Files Functions Variables Typedefs Friends
MixtureOfDiscreteDistributions.cpp
Go to the documentation of this file.
1 //
2 // File: MixtureOfDiscreteDistributions
3 // Created by: Laurent Guéguen
4 // Created on: mercredi 9 juin 2010, à 23h 09
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for numerical calculus.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
41 #include "../NumConstants.h"
42 #include "../../Utils/MapTools.h"
43 #include "../../Text/TextTools.h"
44 
45 using namespace bpp;
46 using namespace std;
47 
48 MixtureOfDiscreteDistributions::MixtureOfDiscreteDistributions(const vector<DiscreteDistribution*>& distributions,
49  const vector<double>& probas ) :
50  AbstractParameterAliasable("Mixture."),
51  AbstractDiscreteDistribution(1, "Mixture."),
52  vdd_(),
53  probas_(),
54  vNestedPrefix_()
55 {
56  if (distributions.size() != probas.size())
57  {
58  throw Exception("MixtureOfDiscreteDistributions. Distributions and probabilities vectors must have the same size (" + TextTools::toString(distributions.size()) + " != " + TextTools::toString(probas.size()) + ").");
59  }
60 
61  size_t size = distributions.size();
62  for (size_t i = 0; i < size; i++)
63  {
64  if (distributions[i] == 0)
65  throw Exception("MixtureOfDiscreteDistributions. Null DiscreteDistribution* in argument vector at index " + TextTools::toString(i));
66  }
67 
68  for (size_t i = 0; i < size; i++)
69  {
70  probas_.push_back(probas[i]);
71  }
72 
73  double sum = VectorTools::sum(probas);
74  if (fabs(1. - sum) > precision())
75  throw Exception("MixtureOfDiscreteDistributions. Probabilities must equal 1 (sum =" + TextTools::toString(sum) + ").");
76 
77  double y = 1;
78  for (size_t i = 0; i < size - 1; i++)
79  {
80  addParameter_(new Parameter("Mixture.theta" + TextTools::toString(i + 1), probas[i] / y, &Parameter::PROP_CONSTRAINT_IN));
81  y -= probas[i];
82  }
83 
84 
85  for (size_t i = 0; i < size; i++)
86  {
87  vdd_.push_back(distributions[i]->clone());
88  }
89 
90  // Parameters
91 
92  for (size_t i = 0; i < size; i++)
93  {
94  vNestedPrefix_.push_back(TextTools::toString(i + 1) + "_" + distributions[i]->getNamespace());
95  }
96 
97  for (size_t i = 0; i < size; i++)
98  {
99  vdd_[i]->setNamespace("Mixture." + vNestedPrefix_[i]);
100  }
101 
102  for (size_t i = 0; i < size; i++)
103  {
105  }
106 
108 }
109 
113  vdd_(),
114  probas_(),
115  vNestedPrefix_()
116 {
117  for (size_t i = 0; i < mdd.vdd_.size(); i++)
118  {
119  probas_.push_back(mdd.probas_[i]);
120  vdd_.push_back(mdd.vdd_[i]->clone());
121  vNestedPrefix_.push_back(mdd.vNestedPrefix_[i]);
122  }
123 }
124 
126 {
129  vdd_.clear();
130  probas_.clear();
131  vNestedPrefix_.clear();
132 
133  for (size_t i = 0; i < mdd.vdd_.size(); i++)
134  {
135  probas_.push_back(mdd.probas_[i]);
136  vdd_.push_back(mdd.vdd_[i]->clone());
137  vNestedPrefix_.push_back(mdd.vNestedPrefix_[i]);
138  }
139 
140  return *this;
141 }
142 
144 {
145  for (size_t i = 0; i < vdd_.size(); i++)
146  {
147  delete vdd_[i];
148  }
149 
150  vdd_.clear();
151 }
152 
154 {
155  for (size_t i = 0; i < vdd_.size(); i++)
156  {
157  vdd_[i]->setNumberOfCategories(nbClasses);
158  }
159 
161 }
162 
163 
165 {
167  size_t size = vdd_.size();
168  double x = 1.0;
169  for (size_t i = 0; i < size - 1; i++)
170  {
171  probas_[i] = getParameterValue("theta" + TextTools::toString(i + 1)) * x;
172  x *= 1 - getParameterValue("theta" + TextTools::toString(i + 1));
173  }
174 
175  probas_[size - 1] = x;
176 
177 
178  for (size_t i = 0; i < size; i++)
179  {
180  vdd_[i]->matchParametersValues(parameters);
181  }
182 
184 }
185 
187 {
188  size_t size = vdd_.size();
189  distribution_.clear();
190  // calculation of distribution
191 
192  for (size_t i = 0; i < size; i++)
193  {
194  vector<double> values = vdd_[i]->getCategories();
195  for (size_t j = 0; j < values.size(); j++)
196  {
197  distribution_[values[j]] = 0;
198  }
199  }
200 
201  for (size_t i = 0; i < size; i++)
202  {
203  vector<double> values = vdd_[i]->getCategories();
204  vector<double> probas2 = vdd_[i]->getProbabilities();
205  for (size_t j = 0; j < values.size(); j++)
206  {
207  distribution_[values[j]] += probas2[j] * probas_[i];
208  }
209  }
210 
212 
213  // intMinMax_
214 
215  double uB, lB;
216  uB = -NumConstants::VERY_BIG();
217  lB = NumConstants::VERY_BIG();
218 
219  bool suB = true, slB = true;
220 
221  for (size_t i = 0; i < size; i++)
222  {
223  if (vdd_[i]->getLowerBound() <= lB)
224  {
225  lB = vdd_[i]->getLowerBound();
226  slB = vdd_[i]->strictLowerBound();
227  }
228  if (vdd_[i]->getUpperBound() >= uB)
229  {
230  uB = vdd_[i]->getUpperBound();
231  suB = vdd_[i]->strictUpperBound();
232  }
233  }
234 
235  intMinMax_.setLowerBound(lB, slB);
236  intMinMax_.setUpperBound(uB, suB);
237 
238  // Compute midpoint bounds_:
239  vector<double> values = MapTools::getKeys<double, double, AbstractDiscreteDistribution::Order>(distribution_);
240 
241  bounds_.resize(numberOfCategories_ - 1);
242 
243  // Fill from 0 to numberOfCategories_-1 with midpoints:
244  for (size_t i = 0; i < numberOfCategories_ - 1; i++)
245  {
246  bounds_[i] = (values[i] + values[i + 1]) / 2.;
247  }
248 }
249 
251 {
252  if (median_ != median)
253  {
254  median_ = median;
255  for (size_t i = 0; i < vdd_.size(); i++)
256  {
257  vdd_[i]->setMedian(median);
258  }
260  }
261 }
263 {
264  for (size_t i = 0; i < vdd_.size(); i++)
265  {
266  vdd_[i]->discretize();
267  }
268 
270 }
271 
273 {
274  double s = 0;
275  for (size_t i = 0; i < vdd_.size(); i++)
276  {
277  s += probas_[i] * vdd_[i]->pProb(x);
278  }
279  return s;
280 }
281 
283 {
284  throw Exception("MixtureOfDiscreteDistributions::qProb to difficult to compute: not implemented");
285  return 0;
286 }
287 
289 {
290  double s = 0;
291  for (size_t i = 0; i < vdd_.size(); i++)
292  {
293  s += probas_[i] * vdd_[i]->Expectation(a);
294  }
295  return s;
296 }
297 
299 {
300  for (size_t i = 0; i < vdd_.size(); i++)
301  {
302  vdd_[i]->restrictToConstraint(c);
303  }
304 
306 }
307 
308 /******************************************************************************/
309 
311 {
313  // We also need to update the namespace of the nested distributions:
314  for (size_t i = 0; i < vdd_.size(); i++)
315  {
316  vdd_[i]->setNamespace(prefix + vNestedPrefix_[i]);
317  }
318 }
319