|
bpp-phyl
2.1.0
|
00001 // 00002 // File: WordFrequenciesSet.cpp 00003 // Created by: Laurent Gueguen 00004 // Created on: lundi 2 avril 2012, à 14h 02 00005 // 00006 00007 /* 00008 Copyright or (c) or Copr. Bio++ Development Team, (November 16, 2004) 00009 00010 This software is a computer program whose purpose is to provide classes 00011 for phylogenetic data analysis. 00012 00013 This software is governed by the CeCILL license under French law and 00014 abiding by the rules of distribution of free software. You can use, 00015 modify and/ or redistribute the software under the terms of the CeCILL 00016 license as circulated by CEA, CNRS and INRIA at the following URL 00017 "http://www.cecill.info". 00018 00019 As a counterpart to the access to the source code and rights to copy, 00020 modify and redistribute granted by the license, users are provided only 00021 with a limited warranty and the software's author, the holder of the 00022 economic rights, and the successive licensors have only limited 00023 liability. 00024 00025 In this respect, the user's attention is drawn to the risks associated 00026 with loading, using, modifying and/or developing or reproducing the 00027 software by the user in light of its specific status of free software, 00028 that may mean that it is complicated to manipulate, and that also 00029 therefore means that it is reserved for developers and experienced 00030 professionals having in-depth computer knowledge. Users are therefore 00031 encouraged to load and test the software's suitability as regards their 00032 requirements in conditions enabling the security of their systems and/or 00033 data to be ensured and, more generally, to use and operate it in the 00034 same conditions as regards security. 00035 00036 The fact that you are presently reading this means that you have had 00037 knowledge of the CeCILL license and that you accept its terms. 00038 */ 00039 00040 #include "WordFrequenciesSet.h" 00041 00042 00043 using namespace bpp; 00044 00045 #include <cmath> 00046 using namespace std; 00047 00048 size_t AbstractWordFrequenciesSet::getSizeFromVector(const std::vector<FrequenciesSet*>& freqVector) 00049 { 00050 size_t s = 1; 00051 size_t l = freqVector.size(); 00052 00053 for (size_t i = 0; i < l; i++) 00054 { 00055 s *= freqVector[i]->getAlphabet()->getSize(); 00056 } 00057 00058 return s; 00059 } 00060 00061 AbstractWordFrequenciesSet::AbstractWordFrequenciesSet(size_t size, const Alphabet* palph, const string& prefix, const string& name) : 00062 AbstractFrequenciesSet(size, palph, prefix, name) 00063 {} 00064 00065 size_t AbstractWordFrequenciesSet::getLength() const 00066 { 00067 return dynamic_cast<const WordAlphabet*>(getAlphabet())->getLength(); 00068 } 00069 00070 AbstractWordFrequenciesSet::~AbstractWordFrequenciesSet() 00071 {} 00072 00073 // /////////////////////////////////////////////////////////////////// 00074 // // WordFromIndependentFrequenciesSet 00075 00076 00077 WordFromIndependentFrequenciesSet::WordFromIndependentFrequenciesSet( 00078 const WordAlphabet* pWA, 00079 const std::vector<FrequenciesSet*>& freqVector, 00080 const string& prefix, const string& name) : 00081 AbstractWordFrequenciesSet(pWA->getSize(), pWA, prefix, name), 00082 vFreq_(), 00083 vNestedPrefix_() 00084 { 00085 size_t sf = getSizeFromVector(freqVector); 00086 if (pWA->getSize() != sf) 00087 throw Exception("WordFromIndependentFrequenciesSet: Size of the frequencies does not match size of the alphabet : " + TextTools::toString(sf) + " vs " + TextTools::toString(pWA->getSize())); 00088 00089 size_t l = freqVector.size(); 00090 00091 for (size_t i = 0; i < l; i++) 00092 { 00093 vFreq_.push_back(freqVector[i]); 00094 vNestedPrefix_.push_back(freqVector[i]->getNamespace()); 00095 vFreq_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + vNestedPrefix_[i]); 00096 addParameters_(vFreq_[i]->getParameters()); 00097 } 00098 00099 updateFrequencies(); 00100 } 00101 00102 WordFromIndependentFrequenciesSet::WordFromIndependentFrequenciesSet(const WordFromIndependentFrequenciesSet& iwfs) : 00103 AbstractWordFrequenciesSet(iwfs), 00104 vFreq_(iwfs.vFreq_.size()), 00105 vNestedPrefix_(iwfs.vNestedPrefix_) 00106 { 00107 for (unsigned i = 0; i < iwfs.vFreq_.size(); i++) 00108 { 00109 vFreq_[i] = iwfs.vFreq_[i]->clone(); 00110 } 00111 updateFrequencies(); 00112 } 00113 00114 WordFromIndependentFrequenciesSet::~WordFromIndependentFrequenciesSet() 00115 { 00116 for (unsigned i = 0; i < vFreq_.size(); i++) 00117 { 00118 delete vFreq_[i]; 00119 } 00120 } 00121 00122 WordFromIndependentFrequenciesSet& WordFromIndependentFrequenciesSet::operator=(const WordFromIndependentFrequenciesSet& iwfs) 00123 { 00124 AbstractWordFrequenciesSet::operator=(iwfs); 00125 vNestedPrefix_ = iwfs.vNestedPrefix_; 00126 00127 //Clean current frequencies first: 00128 for (unsigned i = 0; i < vFreq_.size(); i++) 00129 { 00130 delete vFreq_[i]; 00131 } 00132 00133 vFreq_.resize(iwfs.vFreq_.size()); 00134 for (unsigned i = 0; i < vFreq_.size(); i++) 00135 { 00136 vFreq_[i] = iwfs.vFreq_[i]->clone(); 00137 } 00138 updateFrequencies(); 00139 00140 return *this; 00141 } 00142 00143 void WordFromIndependentFrequenciesSet::fireParameterChanged(const ParameterList& pl) 00144 { 00145 size_t l = vFreq_.size(); 00146 00147 bool f = 0; 00148 for (size_t i = 0; i < l; i++) 00149 { 00150 f |= vFreq_[i]->matchParametersValues(pl); 00151 } 00152 00153 if (f) 00154 updateFrequencies(); 00155 } 00156 00157 void WordFromIndependentFrequenciesSet::updateFrequencies() 00158 { 00159 size_t l = vFreq_.size(); 00160 size_t s = getAlphabet()->getSize(); 00161 vector<double> f[l]; 00162 00163 size_t i, p, t, i2; 00164 00165 for (i = 0; i < l; i++) 00166 { 00167 f[i] = vFreq_[i]->getFrequencies(); 00168 } 00169 00170 for (i = 0; i < s; i++) 00171 { 00172 i2 = i; 00173 getFreq_(i) = 1; 00174 for (p = l; p > 0; p--) 00175 { 00176 t = vFreq_[p - 1]->getAlphabet()->getSize(); 00177 getFreq_(i) *= f[p - 1][i2 % t]; 00178 i2 /= t; 00179 } 00180 } 00181 } 00182 00183 void WordFromIndependentFrequenciesSet::setFrequencies(const vector<double>& frequencies) 00184 { 00185 if (frequencies.size() != getAlphabet()->getSize()) 00186 throw DimensionException("WordFromIndependentFrequenciesSet::setFrequencies", frequencies.size(), getAlphabet()->getSize()); 00187 double sum = 0.0; 00188 size_t size = frequencies.size(); 00189 for (size_t i = 0; i < size; i++) 00190 { 00191 sum += frequencies[i]; 00192 } 00193 if (fabs(1. - sum) > 0.000001) 00194 throw Exception("WordFromIndependentFrequenciesSet::setFrequencies. Frequencies must equal 1 (sum = " + TextTools::toString(sum) + ")."); 00195 00196 size_t d, i, j, s, l = vFreq_.size(); 00197 int k; 00198 vector<double> freq; 00199 00200 d = size; 00201 for (i = 0; i < l; i++) 00202 { 00203 s = vFreq_[i]->getAlphabet()->getSize(); 00204 freq.resize(s); 00205 d /= s; 00206 for (j = 0; j < s; j++) 00207 { 00208 freq[j] = 0; 00209 } 00210 for (k = 0; k < (int)size; k++) 00211 { 00212 freq[(k / d) % s] += frequencies[k]; 00213 } 00214 vFreq_[i]->setFrequencies(freq); 00215 } 00216 00217 for (i = 0; i < l; i++) 00218 { 00219 matchParametersValues(vFreq_[i]->getParameters()); 00220 } 00221 00222 updateFrequencies(); 00223 } 00224 00225 00226 size_t WordFromIndependentFrequenciesSet::getLength() const 00227 { 00228 return vFreq_.size(); 00229 } 00230 00231 void WordFromIndependentFrequenciesSet::setNamespace(const std::string& prefix) 00232 { 00233 AbstractFrequenciesSet::setNamespace(prefix); 00234 for (size_t i = 0; i < vFreq_.size(); i++) 00235 { 00236 vFreq_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + vNestedPrefix_[i]); 00237 } 00238 } 00239 00240 std::string WordFromIndependentFrequenciesSet::getDescription() const 00241 { 00242 string s = getName() +" : " + vFreq_[0]->getName(); 00243 for (size_t i = 1; i < vFreq_.size(); i++) 00244 { 00245 s += " * " + vFreq_[i]->getName(); 00246 } 00247 return s; 00248 } 00249 00250 // /////////////////////////////////////////////////////////////////// 00251 // // WordFromUniqueFrequenciesSet 00252 00253 00254 WordFromUniqueFrequenciesSet::WordFromUniqueFrequenciesSet(const WordAlphabet* pWA, 00255 FrequenciesSet* pabsfreq, 00256 const string& prefix, 00257 const string& name) : 00258 AbstractWordFrequenciesSet(pWA->getSize(), pWA, prefix, name), 00259 pFreq_(pabsfreq), 00260 NestedPrefix_(pabsfreq->getNamespace()), 00261 length_(pWA->getLength()) 00262 { 00263 size_t i; 00264 00265 string st = ""; 00266 for (i = 0; i < length_; i++) 00267 { 00268 st += TextTools::toString(i + 1); 00269 } 00270 00271 pFreq_->setNamespace(prefix+ st + "_" + NestedPrefix_); 00272 addParameters_(pFreq_->getParameters()); 00273 00274 updateFrequencies(); 00275 } 00276 00277 WordFromUniqueFrequenciesSet::WordFromUniqueFrequenciesSet(const WordFromUniqueFrequenciesSet& iwfs) : 00278 AbstractWordFrequenciesSet(iwfs), 00279 pFreq_(iwfs.pFreq_->clone()), 00280 NestedPrefix_(iwfs.NestedPrefix_), 00281 length_(iwfs.length_) 00282 { 00283 updateFrequencies(); 00284 } 00285 00286 00287 WordFromUniqueFrequenciesSet& WordFromUniqueFrequenciesSet::operator=(const WordFromUniqueFrequenciesSet& iwfs) 00288 { 00289 AbstractWordFrequenciesSet::operator=(iwfs); 00290 delete pFreq_; 00291 pFreq_ = iwfs.pFreq_->clone(); 00292 NestedPrefix_ = iwfs.NestedPrefix_; 00293 length_ = iwfs.length_; 00294 00295 updateFrequencies(); 00296 return *this; 00297 } 00298 00299 WordFromUniqueFrequenciesSet::~WordFromUniqueFrequenciesSet() 00300 { 00301 if (pFreq_) 00302 delete pFreq_; 00303 pFreq_ = 0; 00304 } 00305 00306 void WordFromUniqueFrequenciesSet::fireParameterChanged(const ParameterList& pl) 00307 { 00308 if (pFreq_->matchParametersValues(pl)) 00309 updateFrequencies(); 00310 } 00311 00312 void WordFromUniqueFrequenciesSet::updateFrequencies() 00313 { 00314 size_t s = getAlphabet()->getSize(); 00315 vector<double> f; 00316 int letsi = pFreq_->getAlphabet()->getSize(); 00317 00318 size_t i, p, i2; 00319 00320 f = pFreq_->getFrequencies(); 00321 00322 for (i = 0; i < s; i++) 00323 { 00324 i2 = i; 00325 getFreq_(i2) = 1; 00326 for (p = length_; p > 0; p--) 00327 { 00328 getFreq_(i) *= f[i2 % letsi]; 00329 i2 /= letsi; 00330 } 00331 } 00332 } 00333 00334 void WordFromUniqueFrequenciesSet::setFrequencies(const vector<double>& frequencies) 00335 { 00336 if (frequencies.size() != getAlphabet()->getSize()) 00337 throw DimensionException("WordFromUniqueFrequenciesSet::setFrequencies", frequencies.size(), getAlphabet()->getSize()); 00338 double sum = 0.0; 00339 size_t size = frequencies.size(); 00340 for (size_t i = 0; i < size; i++) 00341 { 00342 sum += frequencies[i]; 00343 } 00344 if (fabs(1. - sum) > 0.000001) 00345 throw Exception("WordFromUniqueFrequenciesSet::setFrequencies. Frequencies must equal 1 (sum = " + TextTools::toString(sum) + ")."); 00346 00347 size_t d, i, j; 00348 int k; 00349 vector<double> freq; 00350 00351 size_t letsi = pFreq_->getAlphabet()->getSize(); 00352 freq.resize(letsi); 00353 00354 for (j = 0; j < letsi; j++) 00355 { 00356 freq[j] = 0; 00357 } 00358 00359 d = size; 00360 for (i = 0; i < length_; i++) 00361 { 00362 d /= letsi; 00363 for (k = 0; k < (int)size; k++) 00364 { 00365 freq[(k / d) % letsi] += frequencies[k]; 00366 } 00367 } 00368 for (j = 0; j < letsi; j++) 00369 { 00370 freq[j] /= static_cast<double>(length_); 00371 } 00372 00373 pFreq_->setFrequencies(freq); 00374 matchParametersValues(pFreq_->getParameters()); 00375 updateFrequencies(); 00376 } 00377 00378 00379 void WordFromUniqueFrequenciesSet::setNamespace(const string& prefix) 00380 { 00381 AbstractFrequenciesSet::setNamespace(prefix); 00382 string st = ""; 00383 for (unsigned i = 0; i < length_; i++) 00384 { 00385 st += TextTools::toString(i + 1); 00386 } 00387 pFreq_->setNamespace(prefix + st + "_" + NestedPrefix_); 00388 } 00389 00390 00391 string WordFromUniqueFrequenciesSet::getDescription() const 00392 { 00393 return getName() + " : " + pFreq_->getName() + " * " + TextTools::toString(length_); 00394 } 00395 00396