bpp-core  2.1.0
 All Classes Namespaces Files Functions Variables Typedefs Friends
TextTools.cpp
Go to the documentation of this file.
1 //
2 // File: TextTools.cpp
3 // Created by: Julien Dutheil
4 // Created on: Fri Aug 8 12:57:50 2003
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10  This software is a computer program whose purpose is to provide utilitary
11  classes. This file belongs to the Bio++ Project.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #include "TextTools.h"
41 
42 using namespace bpp;
43 
44 #include <ctype.h>
45 #include <sstream>
46 #include <iomanip>
47 
48 using namespace std;
49 
50 /******************************************************************************/
51 
52 bool TextTools::isEmpty(const std::string& s)
53 {
54  for (unsigned int i = 0; i < s.size(); i++)
55  {
56  char c = s[i];
57  if (c != ' ' && c != '\n' && c != '\t')
58  return false;
59  }
60  return true;
61 }
62 
63 /******************************************************************************/
64 
65 std::string TextTools::toUpper(const std::string& s)
66 {
67  string result = "";
68  for (size_t i = 0; i < s.size(); i++)
69  {
70  result += static_cast<char>(toupper(static_cast<int>(s[i])));
71  }
72  return result;
73 }
74 
75 /******************************************************************************/
76 
77 std::string TextTools::toLower(const std::string& s)
78 {
79  string result = "";
80  for (size_t i = 0; i < s.size(); i++)
81  {
82  result += static_cast<char>(tolower(static_cast<int>(s[i])));
83  }
84  return result;
85 }
86 
87 /******************************************************************************/
88 
90 {
91  return (c == ' ')
92  || (c == '\t')
93  || (c == '\n')
94  || (c == '\r')
95  || (c == '\f');
96 }
97 
98 /******************************************************************************/
99 
100 std::string TextTools::removeWhiteSpaces(const std::string& s)
101 {
102  // Copy sequence
103  string st (s);
104 
105  // For all sequence's characters
106  for (unsigned int i = 0; i < st.size(); i++)
107  {
108  if (isWhiteSpaceCharacter(st[i]))
109  {
110  st.erase(st.begin() + i); // Remove character
111  i--;
112  }
113  }
114 
115  // Send result
116  return st;
117 }
118 
119 /******************************************************************************/
120 
121 std::string TextTools::removeFirstWhiteSpaces(const std::string& s)
122 {
123  // Copy sequence
124  string st (s);
125 
126  while (st.size() > 0 && isWhiteSpaceCharacter(st[0]))
127  {
128  st.erase(st.begin());
129  }
130 
131  // Send result
132  return st;
133 }
134 
135 /******************************************************************************/
136 
137 std::string TextTools::removeLastWhiteSpaces(const std::string& s)
138 {
139  // Copy sequence
140  string st (s);
141 
142  while (st.size() > 0 && isWhiteSpaceCharacter(st[st.size() - 1]))
143  {
144  st.erase(st.end() - 1);
145  }
146 
147  // Send result
148  return st;
149 }
150 
151 /******************************************************************************/
152 
153 std::string TextTools::removeSurroundingWhiteSpaces(const std::string& s)
154 {
155  return removeFirstWhiteSpaces(removeLastWhiteSpaces(s));
156 }
157 
158 /******************************************************************************/
159 
161 {
162  return (c == '\n')
163  || (c == '\r');
164 }
165 
166 /******************************************************************************/
167 
168 std::string TextTools::removeNewLines(const std::string& s)
169 {
170  // Copy string
171  string st (s);
172 
173  // For all string's characters
174  for (unsigned int i = 0; i < st.size(); i++)
175  {
176  if (isNewLineCharacter(st[i]))
177  {
178  st.erase(st.begin() + i); // Remove character
179  i--;
180  }
181  }
182 
183  // Send result
184  return st;
185 }
186 
187 /******************************************************************************/
188 
189 std::string TextTools::removeLastNewLines(const std::string& s)
190 {
191  // Copy string
192  string st (s);
193 
194  while (st.size() > 0 && isNewLineCharacter(st[st.size() - 1]))
195  {
196  st.erase(st.end() - 1);
197  }
198 
199  // Send result
200  return st;
201 }
202 
203 /******************************************************************************/
204 
206 {
207  if (c == '0' || c == '1' || c == '2' || c == '3' || c == '4'
208  || c == '5' || c == '6' || c == '7' || c == '8' || c == '9')
209  return true;
210  else
211  return false;
212 }
213 
214 /******************************************************************************/
215 
216 bool TextTools::isDecimalNumber(const std::string& s, char dec, char scientificNotation)
217 {
218  size_t sepCount = 0;
219  size_t sciCount = 0;
220  size_t i = 0;
221  if (s[0] == '-') i = 1;
222  for (; i < s.size(); ++i)
223  {
224  char c = s[i];
225  if (c == dec)
226  sepCount++;
227  else if (c == scientificNotation) {
228  sciCount++;
229  if (i == s.size() - 1) return false; //Must be sthg after scientific notation.
230  c = s[i + 1];
231  if (c == '-') i++;
232  if (i == s.size() - 1) return false; //Must be sthg after scientific notation.
233  if (sepCount == 0) sepCount = 1; //We do not want any dec in the exponent.
234  } else if (!isDecimalNumber(c))
235  return false;
236  if (sepCount > 1 || sciCount > 1)
237  return false;
238  }
239  return true;
240 }
241 
242 /******************************************************************************/
243 
244 bool TextTools::isDecimalInteger(const std::string& s, char scientificNotation)
245 {
246  size_t sciCount = 0;
247  size_t i = 0;
248  if (s[0] == '-') i = 1;
249  for (; i < s.size(); ++i)
250  {
251  char c = s[i];
252  if (c == scientificNotation) {
253  sciCount++;
254  if (i == s.size() - 1) return false; //Must be sthg after scientific notation.
255  c = s[i + 1];
256  if (c == '-') return false; //Not an integer then!
257  } else if (!isDecimalNumber(c))
258  return false;
259  if (sciCount > 1)
260  return false;
261  }
262  return true;
263 }
264 
265 /******************************************************************************/
266 
267 std::string TextTools::toString(int i)
268 {
269  ostringstream oss;
270  oss << i;
271  return oss.str();
272 }
273 
274 /******************************************************************************/
275 
276 std::string TextTools::toString(char c)
277 {
278  ostringstream oss;
279  oss << c;
280  return oss.str();
281 }
282 
283 /******************************************************************************/
284 
285 std::string TextTools::toString(double d, int precision)
286 {
287  ostringstream oss;
288  oss << setprecision(precision) << d;
289  return oss.str();
290 }
291 
292 /******************************************************************************/
293 
294 int TextTools::toInt(const std::string& s) throw (Exception)
295 {
296  if (!isDecimalInteger(s)) throw Exception("TextTools::toInt(). Invalid number specification: " + s);
297  istringstream iss(s);
298  int i;
299  iss >> i;
300  return i;
301 }
302 
303 /******************************************************************************/
304 
305 double TextTools::toDouble(const std::string& s) throw (Exception)
306 {
307  if (!isDecimalNumber(s)) throw Exception("TextTools::toDouble(). Invalid number specification: " + s);
308  istringstream iss(s);
309  double d;
310  iss >> d;
311  return d;
312 }
313 
314 /******************************************************************************/
315 
316 std::string TextTools::resizeRight(const std::string& s, size_t newSize, char fill)
317 {
318  if (s.size() > newSize)
319  return s.substr(0, newSize);
320  else
321  return s + string(newSize - s.size(), fill);
322 }
323 
324 /******************************************************************************/
325 
326 std::string TextTools::resizeLeft(const std::string& s, size_t newSize, char fill)
327 {
328  if (s.size() > newSize)
329  return s.substr(s.size() - newSize);
330  else
331  return string(newSize - s.size(), fill) + s;
332 }
333 
334 /******************************************************************************/
335 
336 std::vector<std::string> TextTools::split(const std::string& s, size_t n)
337 {
338  vector<string> v;
339  string tmp = s;
340  while (tmp.size() > n)
341  {
342  v.push_back(tmp.substr(0, n));
343  tmp = tmp.substr(n);
344  }
345  v.push_back(tmp);
346  return v;
347 }
348 
349 /******************************************************************************/
350 
351 std::string TextTools::removeSubstrings(const std::string& s, char blockBeginning, char blockEnding)
352 throw (Exception)
353 {
354  string t = "";
355  int blockCount = 0;
356  int begPos = 0;
357  for (unsigned int i = 0; i < s.size(); i++)
358  {
359  char current = s[i];
360  if (current == blockBeginning)
361  {
362  blockCount++;
363  t += s.substr(begPos, i - begPos);
364  }
365  else if (current == blockEnding)
366  {
367  blockCount--;
368  if (blockCount == 0)
369  {
370  begPos = i + 1;
371  }
372  else if (blockCount < 0)
373  throw Exception("TextTools::removeSubstrings(). " +
374  string("Ending block character without corresponding beginning one at position ") + toString((int)i) + ".");
375  }
376  }
377  t += s.substr(begPos);
378  return t;
379 }
380 
381 /******************************************************************************/
382 
383 std::string TextTools::removeSubstrings(const std::string& s, char blockBeginning, char blockEnding, std::vector<string>& exceptionsBeginning, std::vector<string>& exceptionsEnding)
384 throw (Exception)
385 {
386  string t = "";
387  int blockCount = 0;
388  size_t begPos = 0;
389  for (size_t i = 0; i < s.size(); i++)
390  {
391  char current = s[i];
392  if (current == blockBeginning)
393  {
394  bool except = false;
395  for (size_t j = 0; j < exceptionsBeginning.size(); j++)
396  {
397  size_t pos = exceptionsBeginning[j].find(blockBeginning);
398  if (pos != string::npos) {
399  size_t left = i - pos;
400  size_t right = i + exceptionsBeginning[j].length() - pos;
401  if ((right < s.length() - 1) && (hasSubstring (s.substr(left, right), exceptionsBeginning[j])))
402  {
403  except = true;
404  break;
405  }
406  }
407  }
408  if (!except)
409  {
410  blockCount++;
411  t += s.substr(begPos, i - begPos);
412  }
413  }
414  else if ( (current == blockEnding) && (blockCount > 0) )
415  {
416  for (size_t j = 0; j < exceptionsEnding.size(); j++)
417  {
418  size_t pos = exceptionsEnding[j].find(blockEnding);
419  if (pos != string::npos) {
420  size_t left = i - pos;
421  size_t right = i + exceptionsEnding[j].length() - pos;
422  if ((right < s.length() - 1 ) && (hasSubstring (s.substr(left, right), exceptionsEnding[j])))
423  {
424  break;
425  }
426  }
427  }
428  blockCount--;
429  if (blockCount == 0)
430  {
431  begPos = i + 1;
432  }
433  else if (blockCount < 0)
434  throw Exception("TextTools::removeSubstrings(). " +
435  string("Ending block character without corresponding beginning one at position ") + toString((int)i) + ".");
436  }
437  }
438  t += s.substr(begPos);
439  return t;
440 }
441 
442 /******************************************************************************/
443 
444 std::string TextTools::removeChar(const std::string& s, char c)
445 {
446  // Copy sequence
447  string st(s);
448 
449  // For all sequence's characters
450  for (unsigned int i = 0; i < st.size(); i++)
451  {
452  if (st[i] == c)
453  {
454  st.erase(st.begin() + i); // Remove character
455  i--;
456  }
457  }
458 
459  // Send result
460  return st;
461 }
462 
463 /******************************************************************************/
464 
465 unsigned int TextTools::count(const std::string& s, const std::string& pattern)
466 {
467  unsigned int count = 0;
468  string::size_type index = s.find(pattern);
469  while (index != string::npos)
470  {
471  count++;
472  index = s.find(pattern, index + 1);
473  }
474  return count;
475 }
476 
477 /******************************************************************************/
478 
479 bool TextTools::startsWith(const std::string& s, const std::string& pattern)
480 {
481  if (s.size() < pattern.size())
482  return false;
483  return s.substr(0, pattern.size()) == pattern;
484 }
485 
486 /******************************************************************************/
487 
488 bool TextTools::endsWith(const std::string& s, const std::string& pattern)
489 {
490  if (s.size() < pattern.size())
491  return false;
492  return s.substr(s.size() - pattern.size()) == pattern;
493 }
494 
495 /******************************************************************************/
496 
497 bool TextTools::hasSubstring(const std::string& s, const std::string& pattern)
498 {
499  if (s.size() < pattern.size())
500  return false;
501  for (size_t i = 0; i < s.size() - pattern.size() + 1; ++i)
502  {
503  if (s.substr(i, pattern.size()) == pattern)
504  return true;
505  }
506  return false;
507 }
508 
509 /******************************************************************************/
510 
511 void TextTools::replaceAll(std::string& target, const std::string& query, const std::string& replacement)
512 {
513  if (query.empty())
514  return;
515  size_t pos = 0;
516  while (pos != string::npos) {
517  pos = target.find(query, pos);
518  target.replace(pos, query.length(), replacement);
519  pos += replacement.length(); //We prevent recursivity!
520 
521  }
522 }
523 
524 /******************************************************************************/
525