|
bpp-seq
2.1.0
|
00001 // 00002 // File: GeneticCode.cpp 00003 // Created by: Julien Dutheil 00004 // Created on: Mon Oct 13 15:37:25 2003 00005 // 00006 00007 /* 00008 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) 00009 00010 This software is a computer program whose purpose is to provide classes 00011 for sequences analysis. 00012 00013 This software is governed by the CeCILL license under French law and 00014 abiding by the rules of distribution of free software. You can use, 00015 modify and/ or redistribute the software under the terms of the CeCILL 00016 license as circulated by CEA, CNRS and INRIA at the following URL 00017 "http://www.cecill.info". 00018 00019 As a counterpart to the access to the source code and rights to copy, 00020 modify and redistribute granted by the license, users are provided only 00021 with a limited warranty and the software's author, the holder of the 00022 economic rights, and the successive licensors have only limited 00023 liability. 00024 00025 In this respect, the user's attention is drawn to the risks associated 00026 with loading, using, modifying and/or developing or reproducing the 00027 software by the user in light of its specific status of free software, 00028 that may mean that it is complicated to manipulate, and that also 00029 therefore means that it is reserved for developers and experienced 00030 professionals having in-depth computer knowledge. Users are therefore 00031 encouraged to load and test the software's suitability as regards their 00032 requirements in conditions enabling the security of their systems and/or 00033 data to be ensured and, more generally, to use and operate it in the 00034 same conditions as regards security. 00035 00036 The fact that you are presently reading this means that you have had 00037 knowledge of the CeCILL license and that you accept its terms. 00038 */ 00039 00040 #include "GeneticCode.h" 00041 #include "../SequenceTools.h" 00042 #include "../Alphabet/AlphabetTools.h" 00043 00044 using namespace bpp; 00045 using namespace std; 00046 00047 /**********************************************************************************************/ 00048 00049 StopCodonException::StopCodonException(const std::string& text, const std::string& codon) : 00050 Exception("StopCodonException: " + text + "(" + codon + ")"), 00051 codon_(codon) {} 00052 00053 /**********************************************************************************************/ 00054 00055 int GeneticCode::translate(int state) const throw (BadIntException, Exception) 00056 { 00057 if (isStop(state)) 00058 throw StopCodonException("GeneticCode::translate().", codonAlphabet_.intToChar(state)); 00059 00060 map<int, int>::const_iterator it = tlnTable_.find(state); 00061 if (it == tlnTable_.end()) 00062 throw BadIntException(state, "GeneticCode::translate()."); 00063 00064 return it->second; 00065 } 00066 00067 /**********************************************************************************************/ 00068 00069 std::string GeneticCode::translate(const std::string& state) const throw (BadCharException, Exception) 00070 { 00071 int x = codonAlphabet_.charToInt(state); 00072 return proteicAlphabet_.intToChar(translate(x)); 00073 } 00074 00075 /**********************************************************************************************/ 00076 00077 vector<int> GeneticCode::getSynonymous(int aminoacid) const throw (BadIntException) 00078 { 00079 // test: 00080 proteicAlphabet_.intToChar(aminoacid); 00081 00082 vector<int> synonymes; 00083 for (int i = 0; i < static_cast<int>(codonAlphabet_.getSize()); ++i) 00084 { 00085 try 00086 { 00087 if (translate(i) == aminoacid) 00088 synonymes.push_back(i); 00089 } 00090 catch (StopCodonException) 00091 { } 00092 } 00093 return synonymes; 00094 } 00095 00096 /**********************************************************************************************/ 00097 00098 std::vector<std::string> GeneticCode::getSynonymous(const std::string& aminoacid) const throw (BadCharException) 00099 { 00100 // test: 00101 int aa = proteicAlphabet_.charToInt(aminoacid); 00102 00103 vector<string> synonymes; 00104 for (int i = 0; i < static_cast<int>(codonAlphabet_.getSize()); ++i) 00105 { 00106 try 00107 { 00108 if (translate(i) == aa) 00109 synonymes.push_back(codonAlphabet_.intToChar(i)); 00110 } 00111 catch (StopCodonException) 00112 { } 00113 } 00114 return synonymes; 00115 } 00116 00117 /**********************************************************************************************/ 00118 00119 bool GeneticCode::isFourFoldDegenerated(int val) const 00120 { 00121 if (isStop(val)) 00122 return false; 00123 00124 vector<int> codon = codonAlphabet_.getPositions(val); 00125 int acid = translate(val); 00126 00127 // test all the substitution on third codon position 00128 for (int an = 0; an < 4; an++) 00129 { 00130 if (an == codon[2]) 00131 continue; 00132 vector<int> mutcodon = codon; 00133 mutcodon[2] = an; 00134 int intcodon = codonAlphabet_.getCodon(mutcodon[0], mutcodon[1], mutcodon[2]); 00135 if (isStop(intcodon)) 00136 return false; 00137 int altacid = translate(intcodon); 00138 if (altacid != acid) // if non-synonymous 00139 { 00140 return false; 00141 } 00142 } 00143 00144 return true; 00145 } 00146 00147 /**********************************************************************************************/ 00148 00149 Sequence* GeneticCode::getCodingSequence(const Sequence& sequence, bool lookForInitCodon, bool includeInitCodon) const throw (Exception) 00150 { 00151 size_t initPos = 0; 00152 size_t stopPos = sequence.size(); 00153 if (AlphabetTools::isCodonAlphabet(sequence.getAlphabet())) 00154 { 00155 // Look for AUG(or ATG) codon: 00156 if (lookForInitCodon) 00157 { 00158 for (size_t i = 0; i < sequence.size(); i++) 00159 { 00160 vector<int> pos = codonAlphabet_.getPositions(sequence[i]); 00161 if (pos[0] == 0 && pos[1] == 3 && pos[2] == 2) 00162 { 00163 initPos = includeInitCodon ? i : i + 1; 00164 break; 00165 } 00166 } 00167 } 00168 // Look for stop codon: 00169 for (size_t i = initPos; i < sequence.size(); i++) 00170 { 00171 if (isStop(sequence[i])) 00172 { 00173 stopPos = i; 00174 break; 00175 } 00176 } 00177 } 00178 else if (AlphabetTools::isNucleicAlphabet(sequence.getAlphabet())) 00179 { 00180 // Look for AUG(or ATG) codon: 00181 if (lookForInitCodon) 00182 { 00183 for (size_t i = 0; i < sequence.size() - 2; i++) 00184 { 00185 if (sequence[i] == 0 && sequence[i + 1] == 3 && sequence[i + 2] == 2) 00186 { 00187 initPos = includeInitCodon ? i : i + 3; 00188 break; 00189 } 00190 } 00191 } 00192 // Look for stop codon: 00193 const NucleicAlphabet* nucAlpha = codonAlphabet_.getNucleicAlphabet(); 00194 for (size_t i = initPos; i < sequence.size() - 2; i += 3) 00195 { 00196 string codon = nucAlpha->intToChar(sequence[i]) 00197 + nucAlpha->intToChar(sequence[i + 1]) 00198 + nucAlpha->intToChar(sequence[i + 2]); 00199 if (isStop(codon)) 00200 { 00201 stopPos = i; 00202 break; 00203 } 00204 } 00205 } 00206 else 00207 throw AlphabetMismatchException("Sequence must have alphabet of type nucleic or codon in GeneticCode::getCodingSequence.", 0, sequence.getAlphabet()); 00208 00209 return SequenceTools::subseq(sequence, initPos, stopPos - 1); 00210 } 00211 00212 /**********************************************************************************************/ 00213