bpp-seq  2.1.0
Bpp/Seq/Io/Clustal.cpp
Go to the documentation of this file.
00001 //
00002 // File: Clustal.cpp
00003 // Created by: Julien Dutheil
00004 // Created on: ?
00005 //
00006 
00007 /*
00008 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
00009 
00010 This software is a computer program whose purpose is to provide classes
00011 for sequences analysis.
00012 
00013 This software is governed by the CeCILL  license under French law and
00014 abiding by the rules of distribution of free software.  You can  use, 
00015 modify and/ or redistribute the software under the terms of the CeCILL
00016 license as circulated by CEA, CNRS and INRIA at the following URL
00017 "http://www.cecill.info". 
00018 
00019 As a counterpart to the access to the source code and  rights to copy,
00020 modify and redistribute granted by the license, users are provided only
00021 with a limited warranty  and the software's author,  the holder of the
00022 economic rights,  and the successive licensors  have only  limited
00023 liability. 
00024 
00025 In this respect, the user's attention is drawn to the risks associated
00026 with loading,  using,  modifying and/or developing or reproducing the
00027 software by the user in light of its specific status of free software,
00028 that may mean  that it is complicated to manipulate,  and  that  also
00029 therefore means  that it is reserved for developers  and  experienced
00030 professionals having in-depth computer knowledge. Users are therefore
00031 encouraged to load and test the software's suitability as regards their
00032 requirements in conditions enabling the security of their systems and/or 
00033 data to be ensured and,  more generally, to use and operate it in the 
00034 same conditions as regards security. 
00035 
00036 The fact that you are presently reading this means that you have had
00037 knowledge of the CeCILL license and that you accept its terms.
00038 */
00039 
00040 #include "Clustal.h"
00041 #include <Bpp/Text/TextTools.h>
00042 #include <Bpp/Text/StringTokenizer.h>
00043 #include <Bpp/Io/FileTools.h>
00044 
00045 using namespace bpp;
00046 
00047 // From the STL:
00048 #include <iostream>
00049 #include <iomanip>
00050 using namespace std;
00051 
00052 void Clustal::appendAlignmentFromStream(std::istream& input, SiteContainer & sc) const throw (Exception)
00053 {
00054   // Checking the existence of specified file
00055   if (!input) { throw IOException ("Clustal::read : fail to open file"); }
00056 
00057   const Alphabet * alpha = sc.getAlphabet();
00058   vector<BasicSequence> sequences;
00059 
00060   string lineRead("");
00061 
00062   Comments comments(1);
00063   comments[0] = FileTools::getNextLine(input); // First line gives file generator.
00064 
00065   lineRead = FileTools::getNextLine(input); // This is the first sequence of the first block.
00066     
00067   string::size_type beginSeq = 0;
00068   unsigned int count = 0;
00069   for (size_t i = lineRead.size(); i > 0; i--) {
00070     char c = lineRead[i-1];
00071     if (c == ' ') {
00072       count++;
00073       if (count == nbSpacesBeforeSeq_) {
00074         beginSeq = i - 1 + nbSpacesBeforeSeq_;
00075         break;
00076       }
00077     }
00078     else count = 0;
00079   }
00080   if (beginSeq == 0) throw IOException("Clustal::read. Bad intput file.");
00081 
00082   unsigned int countSequences = 0;
00083 
00084   //Read first sequences block:
00085   bool test = true;
00086   do {
00087     sequences.push_back(BasicSequence(TextTools::removeSurroundingWhiteSpaces(lineRead.substr(0, beginSeq - nbSpacesBeforeSeq_)), lineRead.substr(beginSeq), alpha));
00088     getline(input, lineRead, '\n');
00089     countSequences++;
00090     test = !TextTools::isEmpty(lineRead) && !TextTools::isEmpty(lineRead.substr(0, beginSeq - nbSpacesBeforeSeq_));
00091   }
00092   while (input && test);
00093 
00094   // Read other blocks
00095   lineRead = FileTools::getNextLine(input); // Read first sequence of next block.
00096   while (!TextTools::isEmpty(lineRead)) {
00097     // Read next block:
00098     for (unsigned int i = 0; i < countSequences; ++i) {
00099       // Complete sequences
00100       if (TextTools::isEmpty(lineRead))
00101         throw IOException("Clustal::read. Bad intput file.");
00102        sequences[i].append(lineRead.substr(beginSeq));
00103       getline(input, lineRead, '\n');
00104     }
00105     //At this point, lineRead is the first line after the current block.
00106     lineRead = FileTools::getNextLine(input);
00107   }
00108 
00109   for (unsigned int i = 0; i < countSequences; ++i)
00110     sc.addSequence(sequences[i], checkNames_);
00111   sc.setGeneralComments(comments);
00112 }
00113 
00114 void Clustal::writeAlignment(std::ostream& output, const SiteContainer& sc) const throw (Exception)
00115 {
00116   output << "CLUSTAL W (1.81) multiple sequence alignment" << endl;
00117   output << endl;
00118   if (sc.getNumberOfSequences() == 0)
00119     return;
00120 
00121   vector<string> text;
00122   size_t length = 0;
00123   for (size_t i = 0; i < sc.getNumberOfSequences(); ++i ) {
00124     const Sequence& seq = sc.getSequence(i);
00125     if (seq.getName().size() > length)
00126       length = seq.getName().size();
00127     text.push_back(sc.getSequence(i).toString());
00128   }
00129   length += nbSpacesBeforeSeq_;
00130   for (unsigned int j = 0; j < text[0].size(); j += charsByLine_) {
00131     for (unsigned int i = 0; i < sc.getNumberOfSequences(); ++i ) {
00132       output << TextTools::resizeRight(sc.getSequence(i).getName(), length);
00133       output << text[i].substr(j, charsByLine_) << endl;
00134     }
00135     output << endl;
00136   }
00137 }
00138 
 All Classes Namespaces Files Functions Variables Typedefs Friends