bpp-seq  2.1.0
Bpp/Seq/GeneticCode/GeneticCode.cpp
Go to the documentation of this file.
00001 //
00002 // File: GeneticCode.cpp
00003 // Created by: Julien Dutheil
00004 // Created on: Mon Oct 13 15:37:25 2003
00005 //
00006 
00007 /*
00008    Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
00009 
00010    This software is a computer program whose purpose is to provide classes
00011    for sequences analysis.
00012 
00013    This software is governed by the CeCILL  license under French law and
00014    abiding by the rules of distribution of free software.  You can  use,
00015    modify and/ or redistribute the software under the terms of the CeCILL
00016    license as circulated by CEA, CNRS and INRIA at the following URL
00017    "http://www.cecill.info".
00018 
00019    As a counterpart to the access to the source code and  rights to copy,
00020    modify and redistribute granted by the license, users are provided only
00021    with a limited warranty  and the software's author,  the holder of the
00022    economic rights,  and the successive licensors  have only  limited
00023    liability.
00024 
00025    In this respect, the user's attention is drawn to the risks associated
00026    with loading,  using,  modifying and/or developing or reproducing the
00027    software by the user in light of its specific status of free software,
00028    that may mean  that it is complicated to manipulate,  and  that  also
00029    therefore means  that it is reserved for developers  and  experienced
00030    professionals having in-depth computer knowledge. Users are therefore
00031    encouraged to load and test the software's suitability as regards their
00032    requirements in conditions enabling the security of their systems and/or
00033    data to be ensured and,  more generally, to use and operate it in the
00034    same conditions as regards security.
00035 
00036    The fact that you are presently reading this means that you have had
00037    knowledge of the CeCILL license and that you accept its terms.
00038  */
00039 
00040 #include "GeneticCode.h"
00041 #include "../SequenceTools.h"
00042 #include "../Alphabet/AlphabetTools.h"
00043 
00044 using namespace bpp;
00045 using namespace std;
00046 
00047 /**********************************************************************************************/
00048 
00049 StopCodonException::StopCodonException(const std::string& text, const std::string& codon) :
00050   Exception("StopCodonException: " + text + "(" + codon + ")"),
00051   codon_(codon) {}
00052 
00053 /**********************************************************************************************/
00054 
00055 int GeneticCode::translate(int state) const throw (BadIntException, Exception)
00056 {
00057   if (isStop(state))
00058     throw StopCodonException("GeneticCode::translate().", codonAlphabet_.intToChar(state)); 
00059     
00060   map<int, int>::const_iterator it = tlnTable_.find(state);
00061   if (it == tlnTable_.end())
00062     throw BadIntException(state, "GeneticCode::translate().");
00063   
00064   return it->second;
00065 }   
00066 
00067 /**********************************************************************************************/
00068 
00069 std::string GeneticCode::translate(const std::string& state) const throw (BadCharException, Exception)
00070 {
00071   int x = codonAlphabet_.charToInt(state);
00072   return proteicAlphabet_.intToChar(translate(x));
00073 }
00074 
00075 /**********************************************************************************************/
00076 
00077 vector<int> GeneticCode::getSynonymous(int aminoacid) const throw (BadIntException)
00078 {
00079   // test:
00080   proteicAlphabet_.intToChar(aminoacid);
00081 
00082   vector<int> synonymes;
00083   for (int i = 0; i < static_cast<int>(codonAlphabet_.getSize()); ++i)
00084   {
00085     try
00086     {
00087       if (translate(i) == aminoacid)
00088         synonymes.push_back(i);
00089     }
00090     catch (StopCodonException)
00091     { }
00092   }
00093   return synonymes;
00094 }
00095 
00096 /**********************************************************************************************/
00097 
00098 std::vector<std::string> GeneticCode::getSynonymous(const std::string& aminoacid) const throw (BadCharException)
00099 {
00100   // test:
00101   int aa = proteicAlphabet_.charToInt(aminoacid);
00102 
00103   vector<string> synonymes;
00104   for (int i = 0; i < static_cast<int>(codonAlphabet_.getSize()); ++i)
00105   {
00106     try
00107     {
00108       if (translate(i) == aa)
00109         synonymes.push_back(codonAlphabet_.intToChar(i));
00110     }
00111     catch (StopCodonException)
00112     { }
00113   }
00114   return synonymes;
00115 }
00116 
00117 /**********************************************************************************************/
00118 
00119 bool GeneticCode::isFourFoldDegenerated(int val) const
00120 {
00121   if (isStop(val))
00122     return false;
00123 
00124   vector<int> codon = codonAlphabet_.getPositions(val);
00125   int acid = translate(val);
00126 
00127   // test all the substitution on third codon position
00128   for (int an = 0; an < 4; an++)
00129   {
00130     if (an == codon[2])
00131       continue;
00132     vector<int> mutcodon = codon;
00133     mutcodon[2] = an;
00134     int intcodon = codonAlphabet_.getCodon(mutcodon[0], mutcodon[1], mutcodon[2]);
00135     if (isStop(intcodon))
00136       return false;
00137     int altacid = translate(intcodon);
00138     if (altacid != acid)   // if non-synonymous
00139     {
00140       return false;
00141     }
00142   }
00143 
00144   return true;
00145 }
00146 
00147 /**********************************************************************************************/
00148 
00149 Sequence* GeneticCode::getCodingSequence(const Sequence& sequence, bool lookForInitCodon, bool includeInitCodon) const throw (Exception)
00150 {
00151   size_t initPos = 0;
00152   size_t stopPos = sequence.size();
00153   if (AlphabetTools::isCodonAlphabet(sequence.getAlphabet()))
00154   {
00155     // Look for AUG(or ATG) codon:
00156     if (lookForInitCodon)
00157     {
00158       for (size_t i = 0; i < sequence.size(); i++)
00159       {
00160         vector<int> pos = codonAlphabet_.getPositions(sequence[i]);
00161         if (pos[0] == 0 && pos[1] == 3 && pos[2] == 2)
00162         {
00163           initPos = includeInitCodon ? i : i + 1;
00164           break;
00165         }
00166       }
00167     }
00168     // Look for stop codon:
00169     for (size_t i = initPos; i < sequence.size(); i++)
00170     {
00171       if (isStop(sequence[i]))
00172       {
00173         stopPos = i;
00174         break;
00175       }
00176     }
00177   }
00178   else if (AlphabetTools::isNucleicAlphabet(sequence.getAlphabet()))
00179   {
00180     // Look for AUG(or ATG) codon:
00181     if (lookForInitCodon)
00182     {
00183       for (size_t i = 0; i < sequence.size() - 2; i++)
00184       {
00185         if (sequence[i] == 0 && sequence[i + 1] == 3 && sequence[i + 2] == 2)
00186         {
00187           initPos = includeInitCodon ? i : i + 3;
00188           break;
00189         }
00190       }
00191     }
00192     // Look for stop codon:
00193     const NucleicAlphabet* nucAlpha = codonAlphabet_.getNucleicAlphabet();
00194     for (size_t i = initPos; i < sequence.size() - 2; i += 3)
00195     {
00196       string codon = nucAlpha->intToChar(sequence[i])
00197                      + nucAlpha->intToChar(sequence[i + 1])
00198                      + nucAlpha->intToChar(sequence[i + 2]);
00199       if (isStop(codon))
00200       {
00201         stopPos = i;
00202         break;
00203       }
00204     }
00205   }
00206   else
00207     throw AlphabetMismatchException("Sequence must have alphabet of type nucleic or codon in GeneticCode::getCodingSequence.", 0, sequence.getAlphabet());
00208 
00209   return SequenceTools::subseq(sequence, initPos, stopPos - 1);
00210 }
00211 
00212 /**********************************************************************************************/
00213 
 All Classes Namespaces Files Functions Variables Typedefs Friends