|
bpp-seq
2.1.0
|
00001 // 00002 // File: Clustal.cpp 00003 // Created by: Julien Dutheil 00004 // Created on: ? 00005 // 00006 00007 /* 00008 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004) 00009 00010 This software is a computer program whose purpose is to provide classes 00011 for sequences analysis. 00012 00013 This software is governed by the CeCILL license under French law and 00014 abiding by the rules of distribution of free software. You can use, 00015 modify and/ or redistribute the software under the terms of the CeCILL 00016 license as circulated by CEA, CNRS and INRIA at the following URL 00017 "http://www.cecill.info". 00018 00019 As a counterpart to the access to the source code and rights to copy, 00020 modify and redistribute granted by the license, users are provided only 00021 with a limited warranty and the software's author, the holder of the 00022 economic rights, and the successive licensors have only limited 00023 liability. 00024 00025 In this respect, the user's attention is drawn to the risks associated 00026 with loading, using, modifying and/or developing or reproducing the 00027 software by the user in light of its specific status of free software, 00028 that may mean that it is complicated to manipulate, and that also 00029 therefore means that it is reserved for developers and experienced 00030 professionals having in-depth computer knowledge. Users are therefore 00031 encouraged to load and test the software's suitability as regards their 00032 requirements in conditions enabling the security of their systems and/or 00033 data to be ensured and, more generally, to use and operate it in the 00034 same conditions as regards security. 00035 00036 The fact that you are presently reading this means that you have had 00037 knowledge of the CeCILL license and that you accept its terms. 00038 */ 00039 00040 #include "Clustal.h" 00041 #include <Bpp/Text/TextTools.h> 00042 #include <Bpp/Text/StringTokenizer.h> 00043 #include <Bpp/Io/FileTools.h> 00044 00045 using namespace bpp; 00046 00047 // From the STL: 00048 #include <iostream> 00049 #include <iomanip> 00050 using namespace std; 00051 00052 void Clustal::appendAlignmentFromStream(std::istream& input, SiteContainer & sc) const throw (Exception) 00053 { 00054 // Checking the existence of specified file 00055 if (!input) { throw IOException ("Clustal::read : fail to open file"); } 00056 00057 const Alphabet * alpha = sc.getAlphabet(); 00058 vector<BasicSequence> sequences; 00059 00060 string lineRead(""); 00061 00062 Comments comments(1); 00063 comments[0] = FileTools::getNextLine(input); // First line gives file generator. 00064 00065 lineRead = FileTools::getNextLine(input); // This is the first sequence of the first block. 00066 00067 string::size_type beginSeq = 0; 00068 unsigned int count = 0; 00069 for (size_t i = lineRead.size(); i > 0; i--) { 00070 char c = lineRead[i-1]; 00071 if (c == ' ') { 00072 count++; 00073 if (count == nbSpacesBeforeSeq_) { 00074 beginSeq = i - 1 + nbSpacesBeforeSeq_; 00075 break; 00076 } 00077 } 00078 else count = 0; 00079 } 00080 if (beginSeq == 0) throw IOException("Clustal::read. Bad intput file."); 00081 00082 unsigned int countSequences = 0; 00083 00084 //Read first sequences block: 00085 bool test = true; 00086 do { 00087 sequences.push_back(BasicSequence(TextTools::removeSurroundingWhiteSpaces(lineRead.substr(0, beginSeq - nbSpacesBeforeSeq_)), lineRead.substr(beginSeq), alpha)); 00088 getline(input, lineRead, '\n'); 00089 countSequences++; 00090 test = !TextTools::isEmpty(lineRead) && !TextTools::isEmpty(lineRead.substr(0, beginSeq - nbSpacesBeforeSeq_)); 00091 } 00092 while (input && test); 00093 00094 // Read other blocks 00095 lineRead = FileTools::getNextLine(input); // Read first sequence of next block. 00096 while (!TextTools::isEmpty(lineRead)) { 00097 // Read next block: 00098 for (unsigned int i = 0; i < countSequences; ++i) { 00099 // Complete sequences 00100 if (TextTools::isEmpty(lineRead)) 00101 throw IOException("Clustal::read. Bad intput file."); 00102 sequences[i].append(lineRead.substr(beginSeq)); 00103 getline(input, lineRead, '\n'); 00104 } 00105 //At this point, lineRead is the first line after the current block. 00106 lineRead = FileTools::getNextLine(input); 00107 } 00108 00109 for (unsigned int i = 0; i < countSequences; ++i) 00110 sc.addSequence(sequences[i], checkNames_); 00111 sc.setGeneralComments(comments); 00112 } 00113 00114 void Clustal::writeAlignment(std::ostream& output, const SiteContainer& sc) const throw (Exception) 00115 { 00116 output << "CLUSTAL W (1.81) multiple sequence alignment" << endl; 00117 output << endl; 00118 if (sc.getNumberOfSequences() == 0) 00119 return; 00120 00121 vector<string> text; 00122 size_t length = 0; 00123 for (size_t i = 0; i < sc.getNumberOfSequences(); ++i ) { 00124 const Sequence& seq = sc.getSequence(i); 00125 if (seq.getName().size() > length) 00126 length = seq.getName().size(); 00127 text.push_back(sc.getSequence(i).toString()); 00128 } 00129 length += nbSpacesBeforeSeq_; 00130 for (unsigned int j = 0; j < text[0].size(); j += charsByLine_) { 00131 for (unsigned int i = 0; i < sc.getNumberOfSequences(); ++i ) { 00132 output << TextTools::resizeRight(sc.getSequence(i).getName(), length); 00133 output << text[i].substr(j, charsByLine_) << endl; 00134 } 00135 output << endl; 00136 } 00137 } 00138