bpp-seq  2.1.0
Bpp/Seq/Alphabet/WordAlphabet.cpp
Go to the documentation of this file.
00001 //
00002 // File: WordAlphabet.h
00003 // Authors: Laurent Gueguen
00004 //          Sylvain Gaillard
00005 // Created on: Sun Dec 28 2008
00006 //
00007 
00008 /*
00009    Copyright or © or Copr. CNRS, (November 17, 2004)
00010 
00011    This software is a computer program whose purpose is to provide classes
00012    for sequences analysis.
00013 
00014    This software is governed by the CeCILL  license under French law and
00015    abiding by the rules of distribution of free software.  You can  use,
00016    modify and/ or redistribute the software under the terms of the CeCILL
00017    license as circulated by CEA, CNRS and INRIA at the following URL
00018    "http://www.cecill.info".
00019 
00020    As a counterpart to the access to the source code and  rights to copy,
00021    modify and redistribute granted by the license, users are provided only
00022    with a limited warranty  and the software's author,  the holder of the
00023    economic rights,  and the successive licensors  have only  limited
00024    liability.
00025 
00026    In this respect, the user's attention is drawn to the risks associated
00027    with loading,  using,  modifying and/or developing or reproducing the
00028    software by the user in light of its specific status of free software,
00029    that may mean  that it is complicated to manipulate,  and  that  also
00030    therefore means  that it is reserved for developers  and  experienced
00031    professionals having in-depth computer knowledge. Users are therefore
00032    encouraged to load and test the software's suitability as regards their
00033    requirements in conditions enabling the security of their systems and/or
00034    data to be ensured and,  more generally, to use and operate it in the
00035    same conditions as regards security.
00036 
00037    The fact that you are presently reading this means that you have had
00038    knowledge of the CeCILL license and that you accept its terms.
00039  */
00040 
00041 #include "WordAlphabet.h"
00042 #include <Bpp/Text/TextTools.h>
00043 
00044 using namespace bpp;
00045 
00046 // From the STL:
00047 #include <iostream>
00048 
00049 using namespace std;
00050 
00051 WordAlphabet::WordAlphabet(const vector<const Alphabet*>& vAlpha) :
00052   AbstractAlphabet(),
00053   vAbsAlph_(vAlpha)
00054 {
00055   build_();
00056 }
00057 
00058 WordAlphabet::WordAlphabet(const Alphabet* pAlpha, unsigned int num) :
00059   AbstractAlphabet(),
00060   vAbsAlph_(0)
00061 {
00062   for (unsigned int i = 0; i < num; i++)
00063   {
00064     vAbsAlph_.push_back(pAlpha);
00065   }
00066 
00067   build_();
00068 }
00069 
00070 void WordAlphabet::build_()
00071 {
00072   unsigned int size = 1;
00073 
00074   for (unsigned int i = 0; i < vAbsAlph_.size(); i++)
00075   {
00076     size *= vAbsAlph_[i]->getSize();
00077   }
00078 
00079   resize(size + 2);
00080 
00081   string s = "";
00082   for (unsigned int i = 0; i < vAbsAlph_.size(); i++)
00083   {
00084     s += "-";
00085   }
00086 
00087   setState(0, AlphabetState(-1, s, "gap"));
00088 
00089   for (unsigned int i = 0; i < size; i++)
00090   {
00091     setState(i + 1, AlphabetState(i, "", ""));
00092   }
00093 
00094   unsigned lr = size;
00095   char c;
00096   for (unsigned int na = 0; na < vAbsAlph_.size(); na++)
00097   {
00098     lr /= vAbsAlph_[na]->getSize();
00099     unsigned int j = 1;
00100     unsigned int i = 0;
00101     while (j <= size)
00102     {
00103       c = vAbsAlph_[na]->intToChar(i)[0];
00104 
00105       for (unsigned int k = 0; k < lr; k++)
00106       {
00107         getStateAt(j).setLetter(getStateAt(j).getLetter() + c);
00108         j++;
00109         // alphabet[j++].letter += c;
00110       }
00111 
00112       if (++i == vAbsAlph_[na]->getSize())
00113         i = 0;
00114     }
00115   }
00116 
00117   s = "";
00118   for (unsigned i = 0; i < vAbsAlph_.size(); i++)
00119   {
00120     s += "N";
00121   }
00122 
00123   setState(size + 1, AlphabetState(size, s, "Unresolved"));
00124   remap();
00125 }
00126 
00127 /******************************************************************************/
00128 
00129 std::string WordAlphabet::getAlphabetType() const
00130 {
00131   string s = "Word alphabet:";
00132   for (unsigned int i = 0; i < vAbsAlph_.size(); i++)
00133   {
00134     s += " " +  vAbsAlph_[i]->getAlphabetType();
00135   }
00136 
00137   return s;
00138 }
00139 
00140 bool WordAlphabet::hasUniqueAlphabet() const
00141 {
00142   string s = vAbsAlph_[0]->getAlphabetType();
00143   for (unsigned int i = 1; i < vAbsAlph_.size(); i++)
00144   {
00145     if (vAbsAlph_[i]->getAlphabetType() != s)
00146       return false;
00147   }
00148   return true;
00149 }
00150 
00151 bool WordAlphabet::containsUnresolved(const std::string& state) const throw (BadCharException)
00152 {
00153   size_t s = vAbsAlph_.size();
00154   if (state.length() != s)
00155     throw BadCharException(state, "WordAlphabet::containsUnresolved", this);
00156 
00157   for (size_t i = 0; i < vAbsAlph_.size(); i++)
00158   {
00159     if (vAbsAlph_[i]->isUnresolved(state.substr(i, 1)))
00160     {
00161       return true;
00162     }
00163   }
00164   return false;
00165 }
00166 
00167 /******************************************************************************/
00168 
00169 bool WordAlphabet::containsGap(const std::string& state) const throw (BadCharException)
00170 {
00171   size_t s = vAbsAlph_.size();
00172   if (state.length() != s)
00173     throw BadCharException(state, "WordAlphabet::containsGap", this);
00174 
00175   for (size_t i = 0; i < vAbsAlph_.size(); i++)
00176   {
00177     if (vAbsAlph_[i]->isGap(state.substr(i, 1)))
00178       return true;
00179   }
00180 
00181   return false;
00182 }
00183 
00184 /******************************************************************************/
00185 
00186 std::string WordAlphabet::getName(const std::string& state) const throw (BadCharException)
00187 {
00188   if (state.size() != vAbsAlph_.size())
00189     throw BadCharException(state, "WordAlphabet::getName", this);
00190   if (containsUnresolved(state))
00191     return getStateAt(getSize() + 1).getName();
00192   if (containsGap(state))
00193     return getStateAt(0).getName();
00194   else
00195     return AbstractAlphabet::getName(state);
00196 }
00197 
00198 /******************************************************************************/
00199 
00200 std::vector<int> WordAlphabet::getAlias(int state) const throw (BadIntException)
00201 {
00202   if (!isIntInAlphabet(state))
00203     throw BadIntException(state, "WordAlphabet::getAlias(int): Specified base unknown.");
00204   vector<int> v;
00205   int i, s = getSize();
00206 
00207   if (state == s)
00208   {
00209     v.resize(s);
00210     for (i = 0; i < s; i++)
00211     {
00212       v[i] = i;
00213     }
00214   }
00215   else
00216   {
00217     v.resize(1); v[0] = state;
00218   }
00219   return v;
00220 }
00221 
00222 /******************************************************************************/
00223 
00224 std::vector<std::string> WordAlphabet::getAlias(const std::string& state) const throw (BadCharException)
00225 {
00226   string locstate = TextTools::toUpper(state);
00227   if (!isCharInAlphabet(locstate))
00228     throw BadCharException(locstate, "WordAlphabet::getAlias(string): Specified base unknown.");
00229   vector<string> v;
00230 
00231   unsigned int i, s = getSize();
00232 
00233   string st = "";
00234   for (i = 0; i < vAbsAlph_.size(); i++)
00235   {
00236     st += "N";
00237   }
00238 
00239   if (locstate == st)
00240   {
00241     v.resize(s);
00242     for (i = 0; i < s; i++)
00243     {
00244       v[i] = intToChar(i);
00245     }
00246   }
00247   else
00248   {
00249     v.resize(1); v[0] = state;
00250   }
00251   return v;
00252 }
00253 
00254 /******************************************************************************/
00255 
00256 int WordAlphabet::getGeneric(const std::vector<int>& states) const throw (BadIntException)
00257 {
00258   return states[0];
00259 }
00260 
00261 /******************************************************************************/
00262 
00263 std::string WordAlphabet::getGeneric(const std::vector<std::string>& states) const throw (BadCharException)
00264 {
00265   return states[0];
00266 }
00267 
00268 /******************************************************************************/
00269 
00270 int WordAlphabet::getWord(const std::vector<int>& vint, size_t pos) const throw (IndexOutOfBoundsException)
00271 {
00272   if (vint.size() < pos + vAbsAlph_.size())
00273     throw IndexOutOfBoundsException("WordAlphabet::getWord", pos, 0, vint.size() - vAbsAlph_.size());
00274 
00275   vector<string> vs;
00276   for (size_t i = 0; i < vAbsAlph_.size(); i++)
00277   {
00278     vs.push_back(vAbsAlph_[i]->intToChar(vint[i + pos]));
00279   }
00280 
00281   return charToInt(getWord(vs)); // This can't throw a BadCharException!
00282 }
00283 
00284 /****************************************************************************************/
00285 
00286 std::string WordAlphabet::getWord(const std::vector<string>& vpos, size_t pos) const throw (IndexOutOfBoundsException, BadCharException)
00287 {
00288   if (vpos.size() < pos + vAbsAlph_.size())
00289     throw IndexOutOfBoundsException("WordAlphabet::getWord", pos, 0, vpos.size() - vAbsAlph_.size());
00290 
00291   string s = "";
00292   for (size_t i = 0; i < vAbsAlph_.size(); i++)
00293   {
00294     s += vpos[pos + i];
00295   }
00296   // test
00297   charToInt(s);
00298   return s;
00299 }
00300 
00301 /****************************************************************************************/
00302 
00303 Sequence* WordAlphabet::translate(const Sequence& sequence, size_t pos) const throw (AlphabetMismatchException, Exception)
00304 {
00305   if ((!hasUniqueAlphabet()) or
00306       (sequence.getAlphabet()->getAlphabetType() != vAbsAlph_[0]->getAlphabetType()))
00307     throw AlphabetMismatchException("No matching alphabets", sequence.getAlphabet(), vAbsAlph_[0]);
00308 
00309   vector<int> v1 = sequence.getContent();
00310   vector<int> v2;
00311 
00312   size_t s = sequence.size();
00313   unsigned int l = getLength();
00314   size_t i = pos;
00315 
00316   while (i + l < s)
00317   {
00318     v2.push_back(getWord(v1, i));
00319     i += l;
00320   }
00321 
00322   return new BasicSequence(sequence.getName(), v2, this);
00323 }
00324 
00325 /****************************************************************************************/
00326 
00327 Sequence* WordAlphabet::reverse(const Sequence& sequence) const throw (AlphabetMismatchException, Exception)
00328 {
00329   if ((!hasUniqueAlphabet()) or
00330       (sequence.getAlphabet()->getAlphabetType() != getAlphabetType()))
00331     throw AlphabetMismatchException("No matching alphabets");
00332 
00333   Sequence* pseq = new BasicSequence(sequence.getName(), "", getNAlphabet(0));
00334 
00335   size_t s = sequence.size();
00336   for (size_t i = 0; i < s; i++)
00337   {
00338     pseq->append(getPositions(sequence[i]));
00339   }
00340 
00341   return pseq;
00342 }
00343 
00344 /****************************************************************************************/
00345 
 All Classes Namespaces Files Functions Variables Typedefs Friends