|
bpp-seq
2.1.0
|
00001 // 00002 // File: SequenceWithQualityTools.h 00003 // Authors: Vincent Cahais 00004 // Sylvain Gaillard 00005 // Created on: 16 Apr 2010 00006 // 00007 00008 /* 00009 Copyright or © or Copr. Bio++ Development Team, (Apr 16, 2010) 00010 00011 This software is a computer program whose purpose is to provide classes 00012 for sequences analysis. 00013 00014 This software is governed by the CeCILL license under French law and 00015 abiding by the rules of distribution of free software. You can use, 00016 modify and/ or redistribute the software under the terms of the CeCILL 00017 license as circulated by CEA, CNRS and INRIA at the following URL 00018 "http://www.cecill.info". 00019 00020 As a counterpart to the access to the source code and rights to copy, 00021 modify and redistribute granted by the license, users are provided only 00022 with a limited warranty and the software's author, the holder of the 00023 economic rights, and the successive licensors have only limited 00024 liability. 00025 00026 In this respect, the user's attention is drawn to the risks associated 00027 with loading, using, modifying and/or developing or reproducing the 00028 software by the user in light of its specific status of free software, 00029 that may mean that it is complicated to manipulate, and that also 00030 therefore means that it is reserved for developers and experienced 00031 professionals having in-depth computer knowledge. Users are therefore 00032 encouraged to load and test the software's suitability as regards their 00033 requirements in conditions enabling the security of their systems and/or 00034 data to be ensured and, more generally, to use and operate it in the 00035 same conditions as regards security. 00036 00037 The fact that you are presently reading this means that you have had 00038 knowledge of the CeCILL license and that you accept its terms. 00039 */ 00040 00041 #include "SequenceWithQualityTools.h" 00042 00043 using namespace bpp; 00044 using namespace std; 00045 00046 DNA SequenceWithQualityTools::_DNA; 00047 RNA SequenceWithQualityTools::_RNA; 00048 NucleicAcidsReplication SequenceWithQualityTools::_DNARep(& _DNA, & _DNA); 00049 NucleicAcidsReplication SequenceWithQualityTools::_RNARep(& _RNA, & _RNA); 00050 NucleicAcidsReplication SequenceWithQualityTools::_transc(& _DNA, & _RNA); 00051 00052 /******************************************************************************/ 00053 00054 SequenceWithQuality * SequenceWithQualityTools::subseq(const SequenceWithQuality & sequence, unsigned int begin, unsigned int end) throw (IndexOutOfBoundsException, Exception) 00055 { 00056 // Checking interval 00057 if (end >= sequence.size()) throw IndexOutOfBoundsException ("SequenceTools::subseq : Invalid upper bound", end, 0, sequence.size()); 00058 if (end < begin) throw Exception ("SequenceTools::subseq : Invalid interval"); 00059 00060 // Copy sequence 00061 vector<int> temp(sequence.getContent()); 00062 vector<int> qualtemp(sequence.getQualities()); 00063 00064 // Truncate sequence 00065 temp.erase(temp.begin() + end + 1, temp.end()); 00066 temp.erase(temp.begin(), temp.begin() + begin); 00067 qualtemp.erase(qualtemp.begin() + end + 1, qualtemp.end()); 00068 qualtemp.erase(qualtemp.begin(), qualtemp.begin() + begin); 00069 00070 // New sequence creation 00071 return new SequenceWithQuality(sequence.getName(), temp, qualtemp, sequence.getComments(), sequence.getAlphabet()); 00072 00073 } 00074 00075 /******************************************************************************/ 00076 00077 SequenceWithQuality* SequenceWithQualityTools::concatenate(const SequenceWithQuality& seqwq1, const SequenceWithQuality& seqwq2) throw (AlphabetMismatchException, Exception) 00078 { 00079 // Sequence's alphabets matching verification 00080 if ((seqwq1.getAlphabet()->getAlphabetType()) != (seqwq2.getAlphabet()->getAlphabetType())) 00081 throw AlphabetMismatchException("SequenceTools::concatenate : Sequence's alphabets don't match ", seqwq1.getAlphabet(), seqwq2.getAlphabet()); 00082 00083 // Sequence's names matching verification 00084 if (seqwq1.getName() != seqwq2.getName()) 00085 throw Exception ("SequenceTools::concatenate : Sequence's names don't match"); 00086 00087 // Concatenate sequences and send result 00088 vector<int> sequence = seqwq1.getContent(); 00089 vector<int> sequence2 = seqwq2.getContent(); 00090 vector<int> qualities = seqwq1.getQualities(); 00091 vector<int> qualities2 = seqwq2.getQualities(); 00092 00093 sequence.insert(sequence.end(), sequence2.begin(), sequence2.end()); 00094 qualities.insert(qualities.end(), qualities2.begin(), qualities2.end()); 00095 return new SequenceWithQuality(seqwq1.getName(), sequence, qualities, seqwq1.getComments(), seqwq1.getAlphabet()); 00096 } 00097 00098 /******************************************************************************/ 00099 00100 SequenceWithQuality* SequenceWithQualityTools::complement(const SequenceWithQuality& sequence) throw (AlphabetException) 00101 { 00102 // Alphabet type checking 00103 NucleicAcidsReplication * NAR; 00104 if (sequence.getAlphabet()->getAlphabetType() == "DNA alphabet") 00105 { 00106 NAR = &_DNARep; 00107 } 00108 else if(sequence.getAlphabet()->getAlphabetType() == "RNA alphabet") 00109 { 00110 NAR = &_RNARep; 00111 } 00112 else 00113 { 00114 throw AlphabetException ("SequenceTools::complement : Sequence must be nucleic.", sequence.getAlphabet()); 00115 } 00116 Sequence * seq = NAR->translate(sequence); 00117 SequenceWithQuality * seqwq = new SequenceWithQuality(sequence.getName(), seq->getContent(), sequence.getQualities(), sequence.getComments(), sequence.getAlphabet()); 00118 delete seq; 00119 return seqwq; 00120 } 00121 00122 /******************************************************************************/ 00123 00124 SequenceWithQuality* SequenceWithQualityTools::transcript(const SequenceWithQuality& sequence) throw (AlphabetException) 00125 { 00126 // Alphabet type checking 00127 if (sequence.getAlphabet()->getAlphabetType() != "DNA alphabet") 00128 { 00129 throw AlphabetException ("SequenceTools::transcript : Sequence must be DNA", sequence.getAlphabet()); 00130 } 00131 Sequence * seq = _transc.translate(sequence); 00132 SequenceWithQuality * seqwq = new SequenceWithQuality(sequence.getName(), seq->getContent(), sequence.getQualities(), sequence.getComments(), sequence.getAlphabet()); 00133 delete seq; 00134 return seqwq; 00135 } 00136 00137 /******************************************************************************/ 00138 00139 SequenceWithQuality* SequenceWithQualityTools::reverseTranscript(const SequenceWithQuality& sequence) throw (AlphabetException) 00140 { 00141 // Alphabet type checking 00142 if (sequence.getAlphabet()->getAlphabetType() != "RNA alphabet") 00143 { 00144 throw AlphabetException ("SequenceTools::reverseTranscript : Sequence must be RNA", sequence.getAlphabet()); 00145 } 00146 00147 Sequence * seq = _transc.reverse(sequence); 00148 SequenceWithQuality * seqwq = new SequenceWithQuality(sequence.getName(), seq->getContent(), sequence.getQualities(), sequence.getComments(), sequence.getAlphabet()); 00149 delete seq; 00150 return seqwq; 00151 } 00152 00153 /******************************************************************************/ 00154 00155 SequenceWithQuality* SequenceWithQualityTools::invert(const SequenceWithQuality& sequence) 00156 { 00157 vector<int> iContent(sequence.getContent().rbegin(),sequence.getContent().rend()); 00158 vector<int> iQualities(sequence.getQualities().rbegin(),sequence.getQualities().rend()); 00159 SequenceWithQuality* iSeq = sequence.clone(); 00160 iSeq->setContent(iContent); 00161 iSeq->setQualities(iQualities); 00162 00163 return iSeq; 00164 } 00165 00166 /******************************************************************************/ 00167 00168 SequenceWithQuality* SequenceWithQualityTools::removeGaps(const SequenceWithQuality& seq) 00169 { 00170 vector<int> content; 00171 vector<int> qualities; 00172 const Alphabet * alpha = seq.getAlphabet(); 00173 for(unsigned int i = 0; i < seq.size(); i++) 00174 { 00175 if(! alpha->isGap(seq[i])) 00176 { 00177 content.push_back(seq[i]); 00178 qualities.push_back(seq.getQualities()[i]); 00179 } 00180 } 00181 SequenceWithQuality * newSeq = dynamic_cast<SequenceWithQuality *>(seq.clone()); 00182 newSeq->setContent(content); 00183 newSeq->setQualities(qualities); 00184 return newSeq; 00185 } 00186 00187 /******************************************************************************/ 00188 00189 SequenceWithQuality& SequenceWithQualityTools::trimLeft(SequenceWithQuality& seq) { 00190 bool badqual = false; 00191 while (badqual) { 00192 } 00193 return seq; 00194 }