Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

stringutils.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2002 Pascal Vincent, Yoshua Bengio and University of Montreal 00006 // 00007 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 00038 00039 /* ******************************************************* 00040 * $Id: stringutils.cc,v 1.21 2004/07/21 16:30:51 chrish42 Exp $ 00041 * AUTHORS: Pascal Vincent 00042 * This file is part of the PLearn library. 00043 ******************************************************* */ 00044 00045 #include "stringutils.h" 00046 #include "general.h" 00047 00048 #if USING_MPI 00049 #include <plearn/sys/PLMPI.h> 00050 #endif //USING_MPI 00051 00052 namespace PLearn { 00053 using namespace std; 00054 00055 00056 string left(const string& s, size_t width, char padding) 00057 { 00058 if(s.length()>width) 00059 return s; 00060 else 00061 return s+string(width-s.length(),padding); 00062 } 00063 00064 string right(const string& s, size_t width, char padding) 00065 { 00066 if(s.length()>width) 00067 return s; 00068 else 00069 return string(width-s.length(),padding)+s; 00070 } 00071 00072 string center(const string& s, size_t width, char padding) 00073 { 00074 if(s.length()>width) 00075 return s; 00076 else 00077 return string((width-s.length())/2,padding) + s + 00078 string((width-s.length()+1)/2,padding); 00079 } 00080 00081 // this function handle numbers with exponents (such as 10.2E09) 00082 // as well as Nans. String can have trailing whitespaces on both sides 00083 bool pl_isnumber(const string& str, double* dbl) 00084 { 00085 double d; 00086 string s=removeblanks(str); 00087 char* l; 00088 d = strtod(s.c_str(),&l); 00089 if(s=="")d=MISSING_VALUE; 00090 if(dbl!=NULL)*dbl=d; 00091 return ((unsigned char)(l-s.c_str())==s.length()); 00092 } 00093 00094 // norman: there is no strtof in .NET 00095 #ifndef WIN32 00096 bool pl_isnumber(const string& str, float* dbl) { 00097 float d; 00098 string s=removeblanks(str); 00099 char* l; 00100 d = strtof(s.c_str(),&l); 00101 if(s=="")d=MISSING_VALUE; 00102 if(dbl!=NULL)*dbl=d; 00103 return ((unsigned char)(l-s.c_str())==s.length()); 00104 } 00105 #endif // WIN32 00106 00107 long tolong(const string& s, int base) 00108 { 00109 const char* nptr = s.c_str(); 00110 char* endptr; 00111 long result = strtol(nptr,&endptr,base); 00112 if(endptr==nptr) { // no character to be read 00113 string err = string("in toint string is not an int: ") + s; 00114 PLERROR(err.c_str()); 00115 } 00116 return result; 00117 } 00118 00119 bool tobool(const string& s) 00120 { 00121 if (s=="true" || s=="1") return true; 00122 if (s=="false" || s=="0") return false; 00123 PLERROR("tobool: can't convert string %s into a boolean",s.c_str()); 00124 return false; 00125 } 00126 00127 double todouble(const string& s) 00128 { 00129 const char* nptr = s.c_str(); 00130 char* endptr; 00131 double result = strtod(nptr,&endptr); 00132 if(endptr==nptr) // no character to be read 00133 result = MISSING_VALUE; 00134 return result; 00135 } 00136 00137 string extract_filename(const string& filepath) 00138 { 00139 size_t p = filepath.rfind(slash); 00140 if (p != string::npos) 00141 return filepath.substr(p+1,filepath.length()-(p+1)); 00142 else 00143 return filepath; 00144 } 00145 00146 string extract_directory(const string& filepath) 00147 { 00148 size_t p = filepath.rfind(slash); 00149 if (p != string::npos) 00150 return filepath.substr(0,p+1); 00151 else 00152 { 00153 string dot = "."; 00154 return dot+slash; 00155 } 00156 } 00157 00158 string extract_extension(const string& filepath) 00159 { 00160 string filename = extract_filename(filepath); 00161 size_t p = filename.rfind("."); 00162 if (p != string::npos) 00163 return filename.substr(p,filename.length()-p); 00164 else 00165 return ""; 00166 } 00167 00168 string extract_filename_without_extension(const string& filepath) 00169 { 00170 string filename = extract_filename(filepath); 00171 size_t p = filename.rfind("."); 00172 if (p != string::npos) 00173 return filename.substr(0,p); 00174 else 00175 return filename; 00176 } 00177 00178 string remove_extension(const string& filename) 00179 { 00180 size_t p = filename.rfind("."); 00181 if (p != string::npos) 00182 return filename.substr(0,p); 00183 else 00184 return filename; 00185 } 00186 00187 string* data_filename_2_filenames(const string& filename, int& nb_files) 00188 { 00189 ifstream in(filename.c_str()); 00190 if (!in) 00191 PLERROR("In data_filename_2_filenames: couldn't open file %s", 00192 filename.c_str()); 00193 00194 const int buffersize = 100; 00195 string* filenames = new string[buffersize]; 00196 nb_files = 0; 00197 string fname; 00198 while (getline(in, fname, '\n')) 00199 filenames[nb_files++] = fname; 00200 00201 return filenames; 00202 } 00203 00204 string removeblanks(const string& s) 00205 { 00206 size_t start=0; 00207 size_t end=0; 00208 size_t i; 00209 for(i=0; i<s.length(); i++) 00210 if(s[i]!=' ' && s[i]!='\t' && s[i]!='\n' && s[i]!='\r') 00211 break; 00212 00213 if(i==s.length()) 00214 return string(""); 00215 else 00216 start = i; 00217 00218 for(i=s.length()-1; i>=0; i--) 00219 if(s[i]!=' ' && s[i]!='\t' && s[i]!='\n' && s[i]!='\r') 00220 break; 00221 end = i; 00222 return s.substr(start,end-start+1); 00223 } 00224 00225 string removeallblanks(const string& s) 00226 { 00227 string res; 00228 size_t l = s.length(); 00229 for(size_t i=0; i<l; i++) 00230 { 00231 char c = s[i]; 00232 if(c!=' ' && c!='\t' && c!='\n' && c!='\r') 00233 res += c; 00234 } 00235 return res; 00236 } 00237 00238 string removenewline(const string& s) 00239 { 00240 size_t pos = s.length()-1; 00241 while(pos>=0 && (s[pos]=='\r' || s[pos]=='\n')) 00242 pos--; 00243 return s.substr(0,pos+1); 00244 } 00245 00246 string remove_trailing_slash(const string& s) 00247 { 00248 size_t pos = s.length()-1; 00249 while( s[pos]==slash_char && pos>=0 ) 00250 pos--; 00251 return s.substr(0,pos+1); 00252 } 00253 00254 string append_slash(const string& path) 00255 { 00256 size_t l = path.length(); 00257 if(l>0 && path[l-1]!=slash_char) 00258 return path+slash; 00259 else 00260 return path; 00261 } 00262 00263 string lowerstring(const string& ss) 00264 { 00265 string s(ss); 00266 string::iterator it = s.begin(), end = s.end(); 00267 00268 // for some reason toupper and tolower from ctype.h seem to create 00269 // problems when compiling in optimized mode, so we do this 00270 for (; it != end; ++it) 00271 { 00272 if(*it>='A' && *it<='Z') 00273 *it += 'a'-'A'; 00274 } 00275 return s; 00276 } 00277 00278 string upperstring(const string& ss) 00279 { 00280 string s(ss); 00281 string::iterator it = s.begin(), end = s.end(); 00282 00283 // for some reason toupper and tolower from ctype.h seem to create 00284 // problems when compiling in optimized mode, so we do this 00285 for (; it != end; ++it) 00286 { 00287 if(*it>='a' && *it<='z') 00288 *it -= 'a'-'A'; 00289 } 00290 return s; 00291 } 00292 00293 string pgetline(istream& in) 00294 { 00295 string line; 00296 getline(in,line); 00297 //cout << "output: " << line.length() << endl; 00298 //cout << "line: " << line << endl; 00299 //cout << (int) in.peek() << endl; 00300 return removenewline(line); 00301 } 00302 00303 bool isBlank(const string& s) 00304 { 00305 int l = (int)s.length(); 00306 for(int i=0; i<l; i++) 00307 { 00308 char c = s[i]; 00309 if(c=='#' || c=='\n' || c=='\r') 00310 return true; 00311 else if(c!=' ' && c!='\t') 00312 return false; 00313 } 00314 return true; // empty line 00315 } 00316 00317 00318 bool isParagraphBlank(const string& s) 00319 { 00320 int l = (int)s.length(); 00321 bool in_comment=false; 00322 for(int i=0; i<l; i++) 00323 { 00324 char c = s[i]; 00325 if(c=='#') 00326 in_comment=true; 00327 else if(c=='\n' || c=='\r') 00328 in_comment=false; 00329 else if(c!=' ' && c!='\t' && !in_comment) 00330 return false; 00331 } 00332 return true; // empty line 00333 } 00334 00335 string space_to_underscore(string str) 00336 { 00337 for(size_t i=0; i<str.size(); i++) 00338 { 00339 if(str[i]<=' ') 00340 str[i] = '_'; 00341 } 00342 return str; 00343 } 00344 00345 string underscore_to_space(string str) 00346 { 00347 for(size_t i=0; i<str.size(); i++) 00348 { 00349 if(str[i]=='_') 00350 str[i] = ' '; 00351 } 00352 return str; 00353 } 00354 00355 string backslash_to_slash(string str) 00356 { 00357 for(size_t i=0; i<str.size(); i++) 00358 { 00359 if(str[i]=='\\') 00360 str[i] = '/'; 00361 } 00362 return str; 00363 } 00364 00365 00366 int search_replace(string& text, const string& searchstr, const string& replacestr) 00367 { 00368 int n = 0; 00369 size_t startpos = text.find(searchstr, 0); 00370 while(startpos!=string::npos) 00371 { 00372 text.replace(startpos, searchstr.length(), replacestr); 00373 ++n; 00374 startpos = text.find(searchstr, startpos+replacestr.length()); 00375 } 00376 return n; 00377 } 00378 00379 00380 vector<string> split(const string& s, char delimiter) 00381 { 00382 vector<string> res; 00383 int l = (int)s.length(); 00384 int beg = 0; 00385 int end = 0; 00386 00387 while(end<=l) 00388 { 00389 while(end<l && s[end]!=delimiter) 00390 ++end; 00391 res.push_back(s.substr(beg,end-beg)); 00392 ++end; 00393 beg = end; 00394 } 00395 00396 return res; 00397 } 00398 00399 vector<string> split(const string& s, const string& delimiters, bool keep_delimiters) 00400 { 00401 vector<string> result; 00402 00403 size_t startpos = 0; 00404 size_t endpos = 0; 00405 00406 for(;;) 00407 { 00408 startpos = endpos; 00409 endpos = s.find_first_not_of(delimiters,startpos); 00410 if(endpos==string::npos) 00411 { 00412 if(keep_delimiters) 00413 result.push_back(s.substr(startpos)); 00414 break; 00415 } 00416 if(keep_delimiters && endpos>startpos) 00417 result.push_back(s.substr(startpos,endpos-startpos)); 00418 00419 startpos = endpos; 00420 endpos = s.find_first_of(delimiters,startpos); 00421 if(endpos==string::npos) 00422 { 00423 result.push_back(s.substr(startpos)); 00424 break; 00425 } 00426 result.push_back(s.substr(startpos,endpos-startpos)); 00427 } 00428 00429 return result; 00430 } 00431 00432 /* 00433 int pos = 0; 00434 int startpos = 0; 00435 while(pos<s.length()) 00436 { 00437 startpos = pos; 00438 // while character at position pos is a delimiter 00439 while (pos<s.length() && delimiters.find(s.substr(pos,1))!=string::npos) 00440 pos++; 00441 if(keepdelimiters && pos>startpos) 00442 result.push_back(s.substr(startpos, pos-startpos)); 00443 00444 if(pos==s.length()) 00445 break; 00446 00447 startpos = pos; 00448 // while character at position pos is NOT a delimiter 00449 while (pos<s.length() && delimiters.find(s.substr(pos,1))==string::npos) 00450 pos++; 00451 if(pos>startpos) 00452 result.push_back(s.substr(startpos, pos-startpos)); 00453 } 00454 return result; 00455 00456 */ 00457 00458 00459 void split_on_first(const string& s, 00460 const string& delimiters, string& left, string& right) 00461 { 00462 size_t pos = s.find_first_of(delimiters); 00463 if (pos != string::npos) 00464 { 00465 left = s.substr(0,pos); 00466 right = s.substr(pos+1); 00467 } 00468 else 00469 { 00470 left = s; 00471 right = ""; 00472 } 00473 } 00474 00475 pair<string,string> split_on_first(const string& s, 00476 const string& delimiters) 00477 { 00478 string left, right; 00479 split_on_first(s, delimiters, left, right); 00480 return make_pair(left,right); 00481 } 00482 00483 00484 void remove_comments(string& text, const string& commentstart) 00485 { 00486 size_t startpos=0; 00487 size_t endpos=0; 00488 while(endpos!=string::npos) 00489 { 00490 startpos = text.find(commentstart,startpos); 00491 if(startpos==string::npos) 00492 break; 00493 endpos = text.find_first_of("\n\r",startpos); 00494 text.erase(startpos, endpos-startpos); 00495 } 00496 } 00497 00498 00499 string join(const vector<string>& s, const string& separator) 00500 { 00501 string result; 00502 vector<string>::const_iterator it = s.begin(); 00503 if(it!=s.end()) 00504 { 00505 for(;;) 00506 { 00507 result += *it; 00508 ++it; 00509 if(it==s.end()) 00510 break; 00511 result += separator; 00512 } 00513 } 00514 return result; 00515 } 00516 00517 vector<string> remove(const vector<string> &v, string element) 00518 { 00519 vector<string> res; 00520 for (size_t i=0;i<v.size();i++) 00521 if (v[i]!=element) res.push_back(v[i]); 00522 return res; 00523 } 00524 00525 int findit(const vector<string> &v, string element) 00526 { 00527 for (size_t i=0;i<v.size();i++) 00528 if (v[i]==element) return (int)i; 00529 return -1; 00530 } 00531 00532 vector<string> addprepostfix(const string& prefix, const vector<string>& names, const string& postfix) 00533 { 00534 vector<string> newnames(names.size()); 00535 vector<string>::const_iterator it = names.begin(); 00536 vector<string>::iterator newit = newnames.begin(); 00537 while(it!=names.end()) 00538 { 00539 *newit = prefix + *it + postfix; 00540 ++it; 00541 ++newit; 00542 } 00543 return newnames; 00544 } 00545 00546 string addprepostfix(const string& prefix, const string& text, const string& postfix) 00547 { 00548 size_t startpos = 0; 00549 size_t endpos = 0; 00550 string txt = removenewline(text); 00551 string res; 00552 while(endpos!=string::npos) 00553 { 00554 endpos = txt.find_first_of("\n",startpos); 00555 if(endpos!=string::npos) 00556 res += prefix + txt.substr(startpos, endpos-startpos) + postfix + "\n"; 00557 else 00558 res += prefix + txt.substr(startpos) + postfix + "\n"; 00559 startpos = endpos + 1; 00560 } 00561 return res; 00562 } 00563 00564 vector<string> stringvector(int argc, char** argv) 00565 { 00566 if(argc>0) 00567 { 00568 vector<string> result(argc); 00569 for(int i=0; i<argc; i++) 00570 result[i] = string(argv[i]); 00571 return result; 00572 } 00573 else 00574 return vector<string>(); 00575 } 00576 00577 string get_option(const vector<string> &command_line, 00578 const string& option, const string& default_value) 00579 { 00580 int n=(int)command_line.size(); 00581 for (int i=0;i<n;i++) 00582 if (command_line[i]==option && i+1<n) return command_line[i+1]; 00583 return default_value; 00584 } 00585 00586 bool find(const vector<string> &command_line, const string& option) 00587 { 00588 int n=(int)command_line.size(); 00589 for (int i=0;i<n;i++) 00590 if (command_line[i]==option) return true; 00591 return false; 00592 } 00593 00594 vector<string> getNonBlankLines(const string & in) 00595 { 00596 vector<string> lines; 00597 vector<string> nblines; 00598 00599 char sep[3]={10,13,0}; 00600 lines= split(in,sep); 00601 for(size_t i=0;i<lines.size();i++) 00602 if(!isBlank(lines[i])) 00603 nblines.push_back(lines[i]); 00604 return nblines; 00605 } 00606 00607 00608 ostream& operator<<(ostream& out, const vector<string>& vs) 00609 { 00610 vector<string>::const_iterator it = vs.begin(); 00611 if(it!=vs.end()) 00612 { 00613 out << *it; 00614 ++it; 00615 } 00616 while(it!=vs.end()) 00617 { 00618 out << ", " << *it; 00619 ++it; 00620 } 00621 return out; 00622 } 00623 00624 string tostring(const double& x) 00625 { 00626 ostringstream out; 00627 int ix = (int)x; 00628 if (ix == x) 00629 out << ix; 00630 else { 00631 out.precision(12); 00632 out << x; 00633 } 00634 return out.str(); 00635 } 00636 00637 string tostring(const float& x) 00638 { 00639 ostringstream out; 00640 int ix = (int)x; 00641 if (ix == x) 00642 out << ix; 00643 else { 00644 out.precision(8); 00645 out << x; 00646 } 00647 return out.str(); 00648 } 00649 00650 00651 } // end of namespace PLearn 00652 00653 00654 00655

Generated on Tue Aug 17 16:07:31 2004 for PLearn by doxygen 1.3.7