|
bpp-phyl
2.1.0
|
00001 // 00002 // File: AbstractSubstitutionModel.cpp 00003 // Created by: Julien Dutheil 00004 // Created on: Tue May 27 10:31:49 2003 00005 // 00006 00007 /* 00008 Copyright or © or Copr. Bio++ Development Team, (November 16, 2004) 00009 00010 This software is a computer program whose purpose is to provide classes 00011 for phylogenetic data analysis. 00012 00013 This software is governed by the CeCILL license under French law and 00014 abiding by the rules of distribution of free software. You can use, 00015 modify and/ or redistribute the software under the terms of the CeCILL 00016 license as circulated by CEA, CNRS and INRIA at the following URL 00017 "http://www.cecill.info". 00018 00019 As a counterpart to the access to the source code and rights to copy, 00020 modify and redistribute granted by the license, users are provided only 00021 with a limited warranty and the software's author, the holder of the 00022 economic rights, and the successive licensors have only limited 00023 liability. 00024 00025 In this respect, the user's attention is drawn to the risks associated 00026 with loading, using, modifying and/or developing or reproducing the 00027 software by the user in light of its specific status of free software, 00028 that may mean that it is complicated to manipulate, and that also 00029 therefore means that it is reserved for developers and experienced 00030 professionals having in-depth computer knowledge. Users are therefore 00031 encouraged to load and test the software's suitability as regards their 00032 requirements in conditions enabling the security of their systems and/or 00033 data to be ensured and, more generally, to use and operate it in the 00034 same conditions as regards security. 00035 00036 The fact that you are presently reading this means that you have had 00037 knowledge of the CeCILL license and that you accept its terms. 00038 */ 00039 00040 #include "AbstractSubstitutionModel.h" 00041 00042 #include <Bpp/Text/TextTools.h> 00043 #include <Bpp/Numeric/VectorTools.h> 00044 #include <Bpp/Numeric/Matrix/MatrixTools.h> 00045 #include <Bpp/Numeric/Matrix/EigenValue.h> 00046 #include <Bpp/Numeric/NumConstants.h> 00047 00048 // From SeqLib: 00049 #include <Bpp/Seq/Container/SequenceContainerTools.h> 00050 00051 using namespace bpp; 00052 using namespace std; 00053 00054 /******************************************************************************/ 00055 00056 AbstractSubstitutionModel::AbstractSubstitutionModel(const Alphabet* alpha, const std::string& prefix) : 00057 AbstractParameterAliasable(prefix), 00058 alphabet_(alpha), 00059 size_(alpha->getSize()), 00060 rate_(1), 00061 chars_(size_), 00062 generator_(size_, size_), 00063 freq_(size_), 00064 exchangeability_(size_, size_), 00065 pijt_(size_, size_), 00066 dpijt_(size_, size_), 00067 d2pijt_(size_, size_), 00068 eigenDecompose_(true), 00069 eigenValues_(size_), 00070 iEigenValues_(size_), 00071 isDiagonalizable_(false), 00072 rightEigenVectors_(size_, size_), 00073 isNonSingular_(false), 00074 leftEigenVectors_(size_, size_), 00075 vPowGen_(), 00076 tmpMat_(size_, size_) 00077 { 00078 for (size_t i = 0; i < size_; i++) 00079 { 00080 freq_[i] = 1.0 / static_cast<double>(size_); 00081 chars_[i] = static_cast<int>(i); 00082 } 00083 } 00084 00085 /******************************************************************************/ 00086 00087 void AbstractSubstitutionModel::updateMatrices() 00088 { 00089 // if the object is not an AbstractReversibleSubstitutionModel, 00090 // computes the exchangeability_ Matrix (otherwise the generator_ 00091 // has been computed from the exchangeability_) 00092 00093 if (!dynamic_cast<AbstractReversibleSubstitutionModel*>(this)) { 00094 for (size_t i = 0; i < size_; i++) 00095 { 00096 for (size_t j = 0; j < size_; j++) 00097 { 00098 exchangeability_(i, j) = generator_(i, j) / freq_[j]; 00099 } 00100 } 00101 } 00102 00103 // Compute eigen values and vectors: 00104 if (enableEigenDecomposition()) 00105 { 00106 EigenValue<double> ev(generator_); 00107 rightEigenVectors_ = ev.getV(); 00108 eigenValues_ = ev.getRealEigenValues(); 00109 iEigenValues_ = ev.getImagEigenValues(); 00110 try 00111 { 00112 MatrixTools::inv(rightEigenVectors_, leftEigenVectors_); 00113 isNonSingular_ = true; 00114 isDiagonalizable_ = true; 00115 for (size_t i = 0; i < size_ && isDiagonalizable_; i++) 00116 { 00117 if (abs(iEigenValues_[i]) > NumConstants::TINY()) 00118 isDiagonalizable_ = false; 00119 } 00120 } 00121 catch (ZeroDivisionException& e) 00122 { 00123 ApplicationTools::displayMessage("Singularity during diagonalization. Taylor series used instead."); 00124 00125 isNonSingular_ = false; 00126 isDiagonalizable_ = false; 00127 MatrixTools::Taylor(generator_, 30, vPowGen_); 00128 } 00129 } 00130 } 00131 00132 00133 /******************************************************************************/ 00134 00135 const Matrix<double>& AbstractSubstitutionModel::getPij_t(double t) const 00136 { 00137 if (t == 0) 00138 { 00139 MatrixTools::getId(size_, pijt_); 00140 } 00141 else if (isNonSingular_) 00142 { 00143 if (isDiagonalizable_) 00144 { 00145 MatrixTools::mult<double>(rightEigenVectors_, VectorTools::exp(eigenValues_ * (rate_ * t)), leftEigenVectors_, pijt_); 00146 } 00147 else 00148 { 00149 std::vector<double> vdia(size_); 00150 std::vector<double> vup(size_ - 1); 00151 std::vector<double> vlo(size_ - 1); 00152 double c = 0, s = 0; 00153 double l = rate_ * t; 00154 for (size_t i = 0; i < size_; i++) 00155 { 00156 vdia[i] = std::exp(eigenValues_[i] * l); 00157 if (iEigenValues_[i] != 0) 00158 { 00159 s = std::sin(iEigenValues_[i] * l); 00160 c = std::cos(iEigenValues_[i] * l); 00161 vdia[i] *= c; 00162 vup[i] = vdia[i] * s; 00163 vlo[i] = -vup[i]; 00164 vdia[i + 1] = vdia[i]; // trick to avoid computation 00165 i++; 00166 } 00167 else 00168 { 00169 if (i < size_ - 1) 00170 { 00171 vup[i] = 0; 00172 vlo[i] = 0; 00173 } 00174 } 00175 } 00176 MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, pijt_); 00177 } 00178 } 00179 else 00180 { 00181 MatrixTools::getId(size_, pijt_); 00182 double s = 1.0; 00183 double v = rate_ * t; 00184 size_t m = 0; 00185 while (v > 0.5) // exp(r*t*A)=(exp(r*t/(2^m) A))^(2^m) 00186 { 00187 m += 1; 00188 v /= 2; 00189 } 00190 for (size_t i = 1; i < vPowGen_.size(); i++) 00191 { 00192 s *= v / static_cast<double>(i); 00193 MatrixTools::add(pijt_, s, vPowGen_[i]); 00194 } 00195 while (m > 0) // recover the 2^m 00196 { 00197 MatrixTools::mult(pijt_, pijt_, tmpMat_); 00198 MatrixTools::copy(tmpMat_, pijt_); 00199 m--; 00200 } 00201 } 00202 //MatrixTools::print(pijt_); 00203 return pijt_; 00204 } 00205 00206 const Matrix<double>& AbstractSubstitutionModel::getdPij_dt(double t) const 00207 { 00208 if (isNonSingular_) 00209 { 00210 if (isDiagonalizable_) 00211 { 00212 MatrixTools::mult(rightEigenVectors_, rate_ * eigenValues_ * VectorTools::exp(eigenValues_ * (rate_ * t)), leftEigenVectors_, dpijt_); 00213 } 00214 else 00215 { 00216 std::vector<double> vdia(size_); 00217 std::vector<double> vup(size_ - 1); 00218 std::vector<double> vlo(size_ - 1); 00219 double c, s, e; 00220 double l = rate_ * t; 00221 for (size_t i = 0; i < size_; i++) 00222 { 00223 e = std::exp(eigenValues_[i] * l); 00224 if (iEigenValues_[i] != 0) 00225 { 00226 s = std::sin(iEigenValues_[i] * l); 00227 c = std::cos(iEigenValues_[i] * l); 00228 vdia[i] = rate_ * (eigenValues_[i] * c - iEigenValues_[i] * s) * e; 00229 vup[i] = rate_ * (eigenValues_[i] * s + iEigenValues_[i] * c) * e; 00230 vlo[i] = -vup[i]; 00231 vdia[i + 1] = vdia[i]; // trick to avoid computation 00232 i++; 00233 } 00234 else 00235 { 00236 if (i < size_ - 1) 00237 { 00238 vup[i] = 0; 00239 vlo[i] = 0; 00240 } 00241 } 00242 } 00243 MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, dpijt_); 00244 } 00245 } 00246 else 00247 { 00248 MatrixTools::getId(size_, dpijt_); 00249 double s = 1.0; 00250 double v = rate_ * t; 00251 size_t m = 0; 00252 while (v > 0.5) // r*A*exp(t*r*A)=r*A*(exp(r*t/(2^m) A))^(2^m) 00253 { 00254 m += 1; 00255 v /= 2; 00256 } 00257 for (size_t i = 1; i < vPowGen_.size(); i++) 00258 { 00259 s *= v / static_cast<double>(i); 00260 MatrixTools::add(dpijt_, s, vPowGen_[i]); 00261 } 00262 while (m > 0) // recover the 2^m 00263 { 00264 MatrixTools::mult(dpijt_, dpijt_, tmpMat_); 00265 MatrixTools::copy(tmpMat_, dpijt_); 00266 m--; 00267 } 00268 MatrixTools::scale(dpijt_, rate_); 00269 MatrixTools::mult(vPowGen_[1], dpijt_, tmpMat_); 00270 MatrixTools::copy(tmpMat_, dpijt_); 00271 } 00272 return dpijt_; 00273 } 00274 00275 const Matrix<double>& AbstractSubstitutionModel::getd2Pij_dt2(double t) const 00276 { 00277 if (isNonSingular_) 00278 { 00279 if (isDiagonalizable_) 00280 { 00281 MatrixTools::mult(rightEigenVectors_, NumTools::sqr(rate_ * eigenValues_) * VectorTools::exp(eigenValues_ * (rate_ * t)), leftEigenVectors_, d2pijt_); 00282 } 00283 else 00284 { 00285 std::vector<double> vdia(size_); 00286 std::vector<double> vup(size_ - 1); 00287 std::vector<double> vlo(size_ - 1); 00288 double c, s, e; 00289 double l = rate_ * t; 00290 for (size_t i = 0; i < size_; i++) 00291 { 00292 e = std::exp(eigenValues_[i] * l); 00293 if (iEigenValues_[i] != 0) 00294 { 00295 s = std::sin(iEigenValues_[i] * l); 00296 c = std::cos(iEigenValues_[i] * l); 00297 vdia[i] = NumTools::sqr(rate_) 00298 * ((NumTools::sqr(eigenValues_[i]) - NumTools::sqr(iEigenValues_[i])) * c 00299 - 2 * eigenValues_[i] * iEigenValues_[i] * s) * e; 00300 vup[i] = NumTools::sqr(rate_) 00301 * ((NumTools::sqr(eigenValues_[i]) - NumTools::sqr(iEigenValues_[i])) * s 00302 - 2 * eigenValues_[i] * iEigenValues_[i] * c) * e; 00303 vlo[i] = -vup[i]; 00304 vdia[i + 1] = vdia[i]; // trick to avoid computation 00305 i++; 00306 } 00307 else 00308 { 00309 if (i < size_ - 1) 00310 { 00311 vup[i] = 0; 00312 vlo[i] = 0; 00313 } 00314 } 00315 } 00316 MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, d2pijt_); 00317 } 00318 } 00319 else 00320 { 00321 MatrixTools::getId(size_, d2pijt_); 00322 double s = 1.0; 00323 double v = rate_ * t; 00324 size_t m = 0; 00325 while (v > 0.5) // r^2*A^2*exp(t*r*A)=r^2*A^2*(exp(r*t/(2^m) A))^(2^m) 00326 { 00327 m += 1; 00328 v /= 2; 00329 } 00330 for (size_t i = 1; i < vPowGen_.size(); i++) 00331 { 00332 s *= v / static_cast<double>(i); 00333 MatrixTools::add(d2pijt_, s, vPowGen_[i]); 00334 } 00335 while (m > 0) // recover the 2^m 00336 { 00337 MatrixTools::mult(d2pijt_, d2pijt_, tmpMat_); 00338 MatrixTools::copy(tmpMat_, d2pijt_); 00339 m--; 00340 } 00341 MatrixTools::scale(d2pijt_, rate_ * rate_); 00342 MatrixTools::mult(vPowGen_[2], d2pijt_, tmpMat_); 00343 MatrixTools::copy(tmpMat_, d2pijt_); 00344 } 00345 return d2pijt_; 00346 } 00347 00348 /******************************************************************************/ 00349 00350 double AbstractSubstitutionModel::getInitValue(size_t i, int state) const throw (IndexOutOfBoundsException, BadIntException) 00351 { 00352 if (i >= size_) 00353 throw IndexOutOfBoundsException("AbstractSubstitutionModel::getInitValue", i, 0, size_ - 1); 00354 if (state < 0 || !alphabet_->isIntInAlphabet(state)) 00355 throw BadIntException(state, "AbstractSubstitutionModel::getInitValue. Character " + alphabet_->intToChar(state) + " is not allowed in model."); 00356 vector<int> states = alphabet_->getAlias(state); 00357 for (size_t j = 0; j < states.size(); j++) 00358 { 00359 if (getAlphabetChar(i) == states[j]) 00360 return 1.; 00361 } 00362 return 0.; 00363 } 00364 00365 /******************************************************************************/ 00366 00367 void AbstractSubstitutionModel::setFreqFromData(const SequenceContainer& data, double pseudoCount) 00368 { 00369 map<int, int> counts; 00370 SequenceContainerTools::getCounts(data, counts); 00371 double t = 0; 00372 map<int, double> freqs; 00373 00374 for (int i = 0; i < static_cast<int>(size_); i++) 00375 { 00376 t += counts[i] + pseudoCount; 00377 } 00378 for (int i = 0; i < static_cast<int>(size_); i++) 00379 { 00380 freqs[i] = (static_cast<double>(counts[i]) + pseudoCount) / t; 00381 } 00382 00383 // Re-compute generator and eigen values: 00384 setFreq(freqs); 00385 } 00386 00387 /******************************************************************************/ 00388 00389 void AbstractSubstitutionModel::setFreq(map<int, double>& freqs) 00390 { 00391 for (int i = 0; i < static_cast<int>(size_); i++) 00392 { 00393 freq_[i] = freqs[i]; 00394 } 00395 // Re-compute generator and eigen values: 00396 updateMatrices(); 00397 } 00398 00399 /******************************************************************************/ 00400 00401 double AbstractSubstitutionModel::getScale() const 00402 { 00403 vector<double> v; 00404 MatrixTools::diag(generator_, v); 00405 return -VectorTools::scalar<double, double>(v, freq_); 00406 } 00407 00408 /******************************************************************************/ 00409 00410 void AbstractSubstitutionModel::setScale(double scale) { 00411 MatrixTools::scale(generator_, scale); 00412 } 00413 00414 /******************************************************************************/ 00415 00416 double AbstractSubstitutionModel::getRate() const 00417 { 00418 return rate_; 00419 } 00420 00421 /******************************************************************************/ 00422 00423 void AbstractSubstitutionModel::setRate(double rate) 00424 { 00425 if (rate <= 0) 00426 throw Exception("Bad value for rate: " + TextTools::toString(rate)); 00427 00428 if (hasParameter("rate")) 00429 setParameterValue("rate", rate_); 00430 00431 rate_ = rate; 00432 } 00433 00434 void AbstractSubstitutionModel::addRateParameter() 00435 { 00436 addParameter_(new Parameter(getNamespace() + "rate", rate_, &Parameter::R_PLUS_STAR)); 00437 } 00438 00439 /******************************************************************************/ 00440 00441 void AbstractReversibleSubstitutionModel::updateMatrices() 00442 { 00443 RowMatrix<double> Pi; 00444 MatrixTools::diag(freq_, Pi); 00445 MatrixTools::mult(exchangeability_, Pi, generator_); // Diagonal elements of the exchangability matrix will be ignored. 00446 // Compute diagonal elements of the generator: 00447 for (size_t i = 0; i < size_; i++) 00448 { 00449 double lambda = 0; 00450 for (size_t j = 0; j < size_; j++) 00451 { 00452 if (j != i) 00453 lambda += generator_(i, j); 00454 } 00455 generator_(i, i) = -lambda; 00456 } 00457 // Normalization: 00458 double scale = getScale(); 00459 MatrixTools::scale(generator_, 1. / scale); 00460 00461 // Normalize exchangeability matrix too: 00462 MatrixTools::scale(exchangeability_, 1. / scale); 00463 // Compute diagonal elements of the exchangeability matrix: 00464 for (size_t i = 0; i < size_; i++) 00465 { 00466 exchangeability_(i, i) = generator_(i, i) / freq_[i]; 00467 } 00468 AbstractSubstitutionModel::updateMatrices(); 00469 } 00470 00471 /******************************************************************************/ 00472