bpp-phyl  2.1.0
Bpp/Phyl/Model/AbstractSubstitutionModel.cpp
Go to the documentation of this file.
00001 //
00002 // File: AbstractSubstitutionModel.cpp
00003 // Created by: Julien Dutheil
00004 // Created on: Tue May 27 10:31:49 2003
00005 //
00006 
00007 /*
00008    Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
00009 
00010    This software is a computer program whose purpose is to provide classes
00011    for phylogenetic data analysis.
00012 
00013    This software is governed by the CeCILL  license under French law and
00014    abiding by the rules of distribution of free software.  You can  use,
00015    modify and/ or redistribute the software under the terms of the CeCILL
00016    license as circulated by CEA, CNRS and INRIA at the following URL
00017    "http://www.cecill.info".
00018 
00019    As a counterpart to the access to the source code and  rights to copy,
00020    modify and redistribute granted by the license, users are provided only
00021    with a limited warranty  and the software's author,  the holder of the
00022    economic rights,  and the successive licensors  have only  limited
00023    liability.
00024 
00025    In this respect, the user's attention is drawn to the risks associated
00026    with loading,  using,  modifying and/or developing or reproducing the
00027    software by the user in light of its specific status of free software,
00028    that may mean  that it is complicated to manipulate,  and  that  also
00029    therefore means  that it is reserved for developers  and  experienced
00030    professionals having in-depth computer knowledge. Users are therefore
00031    encouraged to load and test the software's suitability as regards their
00032    requirements in conditions enabling the security of their systems and/or
00033    data to be ensured and,  more generally, to use and operate it in the
00034    same conditions as regards security.
00035 
00036    The fact that you are presently reading this means that you have had
00037    knowledge of the CeCILL license and that you accept its terms.
00038  */
00039 
00040 #include "AbstractSubstitutionModel.h"
00041 
00042 #include <Bpp/Text/TextTools.h>
00043 #include <Bpp/Numeric/VectorTools.h>
00044 #include <Bpp/Numeric/Matrix/MatrixTools.h>
00045 #include <Bpp/Numeric/Matrix/EigenValue.h>
00046 #include <Bpp/Numeric/NumConstants.h>
00047 
00048 // From SeqLib:
00049 #include <Bpp/Seq/Container/SequenceContainerTools.h>
00050 
00051 using namespace bpp;
00052 using namespace std;
00053 
00054 /******************************************************************************/
00055 
00056 AbstractSubstitutionModel::AbstractSubstitutionModel(const Alphabet* alpha, const std::string& prefix) :
00057   AbstractParameterAliasable(prefix),
00058   alphabet_(alpha),
00059   size_(alpha->getSize()),
00060   rate_(1),
00061   chars_(size_),
00062   generator_(size_, size_),
00063   freq_(size_),
00064   exchangeability_(size_, size_),
00065   pijt_(size_, size_),
00066   dpijt_(size_, size_),
00067   d2pijt_(size_, size_),
00068   eigenDecompose_(true),
00069   eigenValues_(size_),
00070   iEigenValues_(size_),
00071   isDiagonalizable_(false),
00072   rightEigenVectors_(size_, size_),
00073   isNonSingular_(false),
00074   leftEigenVectors_(size_, size_),
00075   vPowGen_(),
00076   tmpMat_(size_, size_)
00077 {
00078   for (size_t i = 0; i < size_; i++)
00079   {
00080     freq_[i] = 1.0 / static_cast<double>(size_);
00081     chars_[i] = static_cast<int>(i);
00082   }
00083 }
00084 
00085 /******************************************************************************/
00086 
00087 void AbstractSubstitutionModel::updateMatrices()
00088 {
00089   // if the object is not an AbstractReversibleSubstitutionModel,
00090   // computes the exchangeability_ Matrix (otherwise the generator_
00091   // has been computed from the exchangeability_)
00092 
00093   if (!dynamic_cast<AbstractReversibleSubstitutionModel*>(this)) {
00094     for (size_t i = 0; i < size_; i++)
00095     {
00096       for (size_t j = 0; j < size_; j++)
00097       {
00098         exchangeability_(i, j) = generator_(i, j) / freq_[j];
00099       }
00100     }
00101   }
00102 
00103   // Compute eigen values and vectors:
00104   if (enableEigenDecomposition())
00105   {
00106     EigenValue<double> ev(generator_);
00107     rightEigenVectors_ = ev.getV();
00108     eigenValues_ = ev.getRealEigenValues();
00109     iEigenValues_ = ev.getImagEigenValues();
00110     try
00111     {
00112       MatrixTools::inv(rightEigenVectors_, leftEigenVectors_);
00113       isNonSingular_ = true;
00114       isDiagonalizable_ = true;
00115       for (size_t i = 0; i < size_ && isDiagonalizable_; i++)
00116       {
00117         if (abs(iEigenValues_[i]) > NumConstants::TINY())
00118           isDiagonalizable_ = false;
00119       }
00120     }
00121     catch (ZeroDivisionException& e)
00122     {
00123       ApplicationTools::displayMessage("Singularity during diagonalization. Taylor series used instead.");
00124 
00125       isNonSingular_ = false;
00126       isDiagonalizable_ = false;
00127       MatrixTools::Taylor(generator_, 30, vPowGen_);
00128     }
00129   }
00130 }
00131 
00132 
00133 /******************************************************************************/
00134 
00135 const Matrix<double>& AbstractSubstitutionModel::getPij_t(double t) const
00136 {
00137   if (t == 0)
00138   {
00139     MatrixTools::getId(size_, pijt_);
00140   }
00141   else if (isNonSingular_)
00142   {
00143     if (isDiagonalizable_)
00144     {
00145       MatrixTools::mult<double>(rightEigenVectors_, VectorTools::exp(eigenValues_ * (rate_ * t)), leftEigenVectors_, pijt_);
00146     }
00147     else
00148     {
00149       std::vector<double> vdia(size_);
00150       std::vector<double> vup(size_ - 1);
00151       std::vector<double> vlo(size_ - 1);
00152       double c = 0, s = 0;
00153       double l = rate_ * t;
00154       for (size_t i = 0; i < size_; i++)
00155       {
00156         vdia[i] = std::exp(eigenValues_[i] * l);
00157         if (iEigenValues_[i] != 0)
00158         {
00159           s = std::sin(iEigenValues_[i] * l);
00160           c = std::cos(iEigenValues_[i] * l);
00161           vdia[i] *= c;
00162           vup[i] = vdia[i] * s;
00163           vlo[i] = -vup[i];
00164           vdia[i + 1] = vdia[i]; // trick to avoid computation
00165           i++;
00166         }
00167         else
00168         {
00169           if (i < size_ - 1)
00170           {
00171             vup[i] = 0;
00172             vlo[i] = 0;
00173           }
00174         }
00175       }
00176       MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, pijt_);
00177     }
00178   }
00179   else
00180   {
00181     MatrixTools::getId(size_, pijt_);
00182     double s = 1.0;
00183     double v = rate_ * t;
00184     size_t m = 0;
00185     while (v > 0.5)    // exp(r*t*A)=(exp(r*t/(2^m) A))^(2^m)
00186     {
00187       m += 1;
00188       v /= 2;
00189     }
00190     for (size_t i = 1; i < vPowGen_.size(); i++)
00191     {
00192       s *= v / static_cast<double>(i);
00193       MatrixTools::add(pijt_, s, vPowGen_[i]);
00194     }
00195     while (m > 0)  // recover the 2^m
00196     {
00197       MatrixTools::mult(pijt_, pijt_, tmpMat_);
00198       MatrixTools::copy(tmpMat_, pijt_);
00199       m--;
00200     }
00201   }
00202   //MatrixTools::print(pijt_);
00203   return pijt_;
00204 }
00205 
00206 const Matrix<double>& AbstractSubstitutionModel::getdPij_dt(double t) const
00207 {
00208   if (isNonSingular_)
00209   {
00210     if (isDiagonalizable_)
00211     {
00212       MatrixTools::mult(rightEigenVectors_, rate_ * eigenValues_ * VectorTools::exp(eigenValues_ * (rate_ * t)), leftEigenVectors_, dpijt_);
00213     }
00214     else
00215     {
00216       std::vector<double> vdia(size_);
00217       std::vector<double> vup(size_ - 1);
00218       std::vector<double> vlo(size_ - 1);
00219       double c, s, e;
00220       double l = rate_ * t;
00221       for (size_t i = 0; i < size_; i++)
00222       {
00223         e = std::exp(eigenValues_[i] * l);
00224         if (iEigenValues_[i] != 0)
00225         {
00226           s = std::sin(iEigenValues_[i] * l);
00227           c = std::cos(iEigenValues_[i] * l);
00228           vdia[i] = rate_ * (eigenValues_[i] * c - iEigenValues_[i] * s) * e;
00229           vup[i] = rate_ * (eigenValues_[i] * s + iEigenValues_[i] * c) * e;
00230           vlo[i] = -vup[i];
00231           vdia[i + 1] = vdia[i]; // trick to avoid computation
00232           i++;
00233         }
00234         else
00235         {
00236           if (i < size_ - 1)
00237           {
00238             vup[i] = 0;
00239             vlo[i] = 0;
00240           }
00241         }
00242       }
00243       MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, dpijt_);
00244     }
00245   }
00246   else
00247   {
00248     MatrixTools::getId(size_, dpijt_);
00249     double s = 1.0;
00250     double v = rate_ * t;
00251     size_t m = 0;
00252     while (v > 0.5)    // r*A*exp(t*r*A)=r*A*(exp(r*t/(2^m) A))^(2^m)
00253     {
00254       m += 1;
00255       v /= 2;
00256     }
00257     for (size_t i = 1; i < vPowGen_.size(); i++)
00258     {
00259       s *= v / static_cast<double>(i);
00260       MatrixTools::add(dpijt_, s, vPowGen_[i]);
00261     }
00262     while (m > 0)  // recover the 2^m
00263     {
00264       MatrixTools::mult(dpijt_, dpijt_, tmpMat_);
00265       MatrixTools::copy(tmpMat_, dpijt_);
00266       m--;
00267     }
00268     MatrixTools::scale(dpijt_, rate_);
00269     MatrixTools::mult(vPowGen_[1], dpijt_, tmpMat_);
00270     MatrixTools::copy(tmpMat_, dpijt_);
00271   }
00272   return dpijt_;
00273 }
00274 
00275 const Matrix<double>& AbstractSubstitutionModel::getd2Pij_dt2(double t) const
00276 {
00277   if (isNonSingular_)
00278   {
00279     if (isDiagonalizable_)
00280     {
00281       MatrixTools::mult(rightEigenVectors_, NumTools::sqr(rate_ * eigenValues_) * VectorTools::exp(eigenValues_ * (rate_ * t)), leftEigenVectors_, d2pijt_);
00282     }
00283     else
00284     {
00285       std::vector<double> vdia(size_);
00286       std::vector<double> vup(size_ - 1);
00287       std::vector<double> vlo(size_ - 1);
00288       double c, s, e;
00289       double l = rate_ * t;
00290       for (size_t i = 0; i < size_; i++)
00291       {
00292         e = std::exp(eigenValues_[i] * l);
00293         if (iEigenValues_[i] != 0)
00294         {
00295           s = std::sin(iEigenValues_[i] * l);
00296           c = std::cos(iEigenValues_[i] * l);
00297           vdia[i] = NumTools::sqr(rate_)
00298                     * ((NumTools::sqr(eigenValues_[i]) - NumTools::sqr(iEigenValues_[i])) * c
00299                        - 2 * eigenValues_[i] * iEigenValues_[i] * s) * e;
00300           vup[i] = NumTools::sqr(rate_)
00301                    * ((NumTools::sqr(eigenValues_[i]) - NumTools::sqr(iEigenValues_[i])) * s
00302                       - 2 * eigenValues_[i] * iEigenValues_[i] * c) * e;
00303           vlo[i] = -vup[i];
00304           vdia[i + 1] = vdia[i]; // trick to avoid computation
00305           i++;
00306         }
00307         else
00308         {
00309           if (i < size_ - 1)
00310           {
00311             vup[i] = 0;
00312             vlo[i] = 0;
00313           }
00314         }
00315       }
00316       MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, d2pijt_);
00317     }
00318   }
00319   else
00320   {
00321     MatrixTools::getId(size_, d2pijt_);
00322     double s = 1.0;
00323     double v = rate_ * t;
00324     size_t m = 0;
00325     while (v > 0.5)    // r^2*A^2*exp(t*r*A)=r^2*A^2*(exp(r*t/(2^m) A))^(2^m)
00326     {
00327       m += 1;
00328       v /= 2;
00329     }
00330     for (size_t i = 1; i < vPowGen_.size(); i++)
00331     {
00332       s *= v / static_cast<double>(i);
00333       MatrixTools::add(d2pijt_, s, vPowGen_[i]);
00334     }
00335     while (m > 0)  // recover the 2^m
00336     {
00337       MatrixTools::mult(d2pijt_, d2pijt_, tmpMat_);
00338       MatrixTools::copy(tmpMat_, d2pijt_);
00339       m--;
00340     }
00341     MatrixTools::scale(d2pijt_, rate_ * rate_);
00342     MatrixTools::mult(vPowGen_[2], d2pijt_, tmpMat_);
00343     MatrixTools::copy(tmpMat_, d2pijt_);
00344   }
00345   return d2pijt_;
00346 }
00347 
00348 /******************************************************************************/
00349 
00350 double AbstractSubstitutionModel::getInitValue(size_t i, int state) const throw (IndexOutOfBoundsException, BadIntException)
00351 {
00352   if (i >= size_)
00353     throw IndexOutOfBoundsException("AbstractSubstitutionModel::getInitValue", i, 0, size_ - 1);
00354   if (state < 0 || !alphabet_->isIntInAlphabet(state))
00355     throw BadIntException(state, "AbstractSubstitutionModel::getInitValue. Character " + alphabet_->intToChar(state) + " is not allowed in model.");
00356   vector<int> states = alphabet_->getAlias(state);
00357   for (size_t j = 0; j < states.size(); j++)
00358   {
00359     if (getAlphabetChar(i) == states[j])
00360       return 1.;
00361   }
00362   return 0.;
00363 }
00364 
00365 /******************************************************************************/
00366 
00367 void AbstractSubstitutionModel::setFreqFromData(const SequenceContainer& data, double pseudoCount)
00368 {
00369   map<int, int> counts;
00370   SequenceContainerTools::getCounts(data, counts);
00371   double t = 0;
00372   map<int, double> freqs;
00373 
00374   for (int i = 0; i < static_cast<int>(size_); i++)
00375   {
00376     t += counts[i] + pseudoCount;
00377   }
00378   for (int i = 0; i < static_cast<int>(size_); i++)
00379   {
00380     freqs[i] = (static_cast<double>(counts[i]) + pseudoCount) / t;
00381   }
00382 
00383   // Re-compute generator and eigen values:
00384   setFreq(freqs);
00385 }
00386 
00387 /******************************************************************************/
00388 
00389 void AbstractSubstitutionModel::setFreq(map<int, double>& freqs)
00390 {
00391   for (int i = 0; i < static_cast<int>(size_); i++)
00392   {
00393     freq_[i] = freqs[i];
00394   }
00395   // Re-compute generator and eigen values:
00396   updateMatrices();
00397 }
00398 
00399 /******************************************************************************/
00400 
00401 double AbstractSubstitutionModel::getScale() const
00402 {
00403   vector<double> v;
00404   MatrixTools::diag(generator_, v);
00405   return -VectorTools::scalar<double, double>(v, freq_);
00406 }
00407 
00408 /******************************************************************************/
00409 
00410 void AbstractSubstitutionModel::setScale(double scale) {
00411   MatrixTools::scale(generator_, scale);
00412 }
00413 
00414 /******************************************************************************/
00415 
00416 double AbstractSubstitutionModel::getRate() const
00417 {
00418   return rate_;
00419 }
00420 
00421 /******************************************************************************/
00422 
00423 void AbstractSubstitutionModel::setRate(double rate)
00424 {
00425   if (rate <= 0)
00426     throw Exception("Bad value for rate: " + TextTools::toString(rate));
00427 
00428   if (hasParameter("rate"))
00429     setParameterValue("rate", rate_);
00430 
00431   rate_ = rate;
00432 }
00433 
00434 void AbstractSubstitutionModel::addRateParameter()
00435 {
00436   addParameter_(new Parameter(getNamespace() + "rate", rate_, &Parameter::R_PLUS_STAR));
00437 }
00438 
00439 /******************************************************************************/
00440 
00441 void AbstractReversibleSubstitutionModel::updateMatrices()
00442 {
00443   RowMatrix<double> Pi;
00444   MatrixTools::diag(freq_, Pi);
00445   MatrixTools::mult(exchangeability_, Pi, generator_); // Diagonal elements of the exchangability matrix will be ignored.
00446   // Compute diagonal elements of the generator:
00447   for (size_t i = 0; i < size_; i++)
00448   {
00449     double lambda = 0;
00450     for (size_t j = 0; j < size_; j++)
00451     {
00452       if (j != i)
00453         lambda += generator_(i, j);
00454     }
00455     generator_(i, i) = -lambda;
00456   }
00457   // Normalization:
00458   double scale = getScale();
00459   MatrixTools::scale(generator_, 1. / scale);
00460 
00461   // Normalize exchangeability matrix too:
00462   MatrixTools::scale(exchangeability_, 1. / scale);
00463   // Compute diagonal elements of the exchangeability matrix:
00464   for (size_t i = 0; i < size_; i++)
00465   {
00466     exchangeability_(i, i) = generator_(i, i) / freq_[i];
00467   }
00468   AbstractSubstitutionModel::updateMatrices();
00469 }
00470 
00471 /******************************************************************************/
00472 
 All Classes Namespaces Files Functions Variables Typedefs Friends