core/vul/vul_string.cxx
Go to the documentation of this file.
00001 // This is core/vul/vul_string.cxx
00002 #include "vul_string.h"
00003 //:
00004 // \file
00005 
00006 #include <vcl_cassert.h>
00007 #include <vcl_cstdlib.h>
00008 #include <vcl_cstring.h>
00009 #include <vcl_cctype.h>
00010 #include <vcl_algorithm.h>
00011 #include <vcl_sstream.h>
00012 #include <vcl_cmath.h>
00013 #include <vul/vul_reg_exp.h>
00014 #include <vul/vul_sprintf.h>
00015 
00016 #ifndef END_OF_STRING                           // If END_OF_STRING not defined
00017 #define END_OF_STRING (0)
00018 #endif
00019 
00020 // Converts all alphabetical characters to uppercase.
00021 char* vul_string_c_upcase(char* s)  // Convert entire string to upper case
00022 {
00023   char* p = s;                   // Point to beginning of string
00024   while (*p) {                   // While there are still valid characters
00025     if (vcl_islower(*p))         // if this is lower case
00026       *p = (char)vcl_toupper(*p);// convert to uppercase
00027     p++;                         // Advance pointer
00028   }
00029   return s;                      // Return reference to modified string
00030 }
00031 
00032 // Converts all alphabetical characters to lowercase.
00033 char* vul_string_c_downcase(char* s)  // Convert entire string to lower case
00034 {
00035   char* p = s;                   // Point to beginning of string
00036   while (*p) {                   // While there are still valid characters
00037     if (vcl_isupper(*p))         // if this is upper case
00038       *p = (char)vcl_tolower(*p);// convert to lowercase
00039     p++;                         // Advance pointer
00040   }
00041   return s;                      // Return reference to modified string
00042 }
00043 
00044 // Capitalizes all words in a string. A word is defined as
00045 // a sequence of characters separated by non-alphanumerics.
00046 char* vul_string_c_capitalize(char* s)  // Capitalize each word in string
00047 {
00048   char* p = s;                           // Point to beginning of string
00049   while (true) {                         // Infinite loop
00050     for (; *p && !vcl_isalnum(*p); p++) ;// Skip to first alphanumeric
00051     if (*p == END_OF_STRING)             // If end of string
00052       return s;                          // Return string
00053     *p = (char)vcl_toupper(*p);          // Convert character
00054     while (*++p && vcl_isalnum(*p)) ;    // Search for next word
00055   }
00056 }
00057 
00058 // Removes any occurrence of the string rem from string str,
00059 // and returns the modified string str.
00060 char* vul_string_c_trim(char* str, const char* rem) // Trim characters from string
00061 {
00062   char* s = str;
00063   char* result = str;
00064   register char c;
00065   while ((c=*s++) != END_OF_STRING) {
00066     register const char* r = rem;
00067     register char t;
00068     while ((t=*r++) != END_OF_STRING && t != c) ; // Scan for match
00069     if (t == END_OF_STRING)                       // If no match found
00070       *result++ = c;
00071   }
00072   *result = END_OF_STRING;                        // NULL terminate string
00073   return str;                                     // Return pointer to string
00074 }
00075 
00076 // Removes any prefix occurrence of the string rem from
00077 // the first string str, and returns the modified string str.
00078 char* vul_string_c_left_trim(char* str, const char* rem) // Trim prefix from string
00079 {
00080   char* result = str;
00081   char* s;
00082   register char c;
00083   for (s=str; (c=*s) != END_OF_STRING; s++) {
00084     register const char* r = rem;
00085     register char t;
00086     while ((t=*r++) != END_OF_STRING && t != c) ; // Scan for match
00087     if (t == END_OF_STRING)                       // If no match found
00088       break;
00089   }
00090   if (s != result)                                // when characters trimed
00091     while ((*result++ = *s++) != END_OF_STRING) ; // shift string down
00092   return str;                                     // Return pointer to string
00093 }
00094 
00095 // Removes any suffix occurrence of the string rem
00096 // from the first string str, and returns the modified string str.
00097 char* vul_string_c_right_trim(char* str, const char* rem) // Trim suffix from string
00098 {
00099   char* s = str + vcl_strlen(str) - 1;            // last character of str
00100   for (; s >= str; s--) {
00101     register const char* r = rem;
00102     register char t;
00103     register char c = *s;
00104     while ((t=*r++) != END_OF_STRING && t != c) ; // Scan for match
00105     if (t == END_OF_STRING)                       // If no match found
00106       break;
00107   }
00108   *(s+1) = END_OF_STRING;
00109   return str;                                     // Return pointer to string
00110 }
00111 
00112 // Reverses the order of the characters in char*.
00113 char* vul_string_c_reverse(char* c)     // Reverse the order of characters
00114 {
00115   int length = (int)vcl_strlen(c);      // Number of characters in string
00116   char temp;
00117 
00118   for (int i = 0, j = length-1;         // Counting from front and rear
00119        i < j; ++i, --j)                 // until we reach the middle
00120   {
00121     temp = c[i];                        // Save front character
00122     c[i] = c[j];                        // Switch with rear character
00123     c[j] = temp;                        // Copy new rear character
00124   }
00125   return c;
00126 }
00127 
00128 // Reverses the order of the characters in string
00129 vcl_string& vul_string_reverse(vcl_string& s)
00130 {
00131   for (int i=0, j=(int)vcl_strlen(s.c_str())-1; i<j; ++i,--j)
00132   {
00133     char c = s[i]; s[i] = s[j]; s[j] = c;
00134   }
00135   return s;
00136 }
00137 
00138 // In some implementations of <cctype>, toupper and tolower are macros
00139 // instead of functions.  In that case, they cannot be passed as 4th argument
00140 // to std::transform.  Hence it's easier to "inline" std::transform here,
00141 // instead of using it explicitly. - PVr.
00142 
00143 // Converts all alphabetical characters in string s to uppercase.
00144 vcl_string& vul_string_upcase(vcl_string& s)
00145 {
00146   for (vcl_string::iterator i=s.begin(); i != s.end(); ++i)
00147     *i = (char)vcl_toupper(*i);
00148   return s;
00149 }
00150 
00151 // Converts all alphabetical characters in string s to lowercase.
00152 vcl_string& vul_string_downcase(vcl_string& s)
00153 {
00154   for (vcl_string::iterator i=s.begin(); i != s.end(); ++i)
00155     *i = (char)vcl_tolower(*i);
00156   return s;
00157 }
00158 
00159 // Capitalizes all words in string s.
00160 vcl_string& vul_string_capitalize(vcl_string& s)
00161 {
00162   // Word beginnings are defined as the transition from
00163   // non-alphanumeric to alphanumeric, and word endings as the reverse
00164   // transition.
00165   vcl_string::iterator si;
00166   bool in_word = false;
00167   for ( si = s.begin(); si != s.end(); ++si ) {
00168     if ( !in_word && vcl_isalnum( *si ) ) {
00169       *si = (char)vcl_toupper( *si );
00170       in_word = true;
00171     }
00172     else if ( in_word && !vcl_isalnum( *si ) ) {
00173       in_word = false;
00174     }
00175   }
00176   return s;
00177 }
00178 
00179 // Removes any occurrence of the character string rem
00180 // from the string sr, and returns the modified string sr.
00181 vcl_string& vul_string_trim(vcl_string& sr, const char* rem)
00182 {
00183   int l = (int)vcl_strlen(rem);
00184   for (;;) {
00185     vcl_string::size_type loc = sr.find(rem);
00186     if (loc == vcl_string::npos)
00187       break;
00188     sr.erase(loc, l);
00189   }
00190   return sr;
00191 }
00192 
00193 // Removes any prefix occurrence of the character string rem
00194 // from the string sr, and returns the modified string sr.
00195 vcl_string& vul_string_left_trim(vcl_string& sr, const char* rem)
00196 {
00197   int l = (int)vcl_strlen(rem);
00198   if (vcl_strncmp(sr.c_str(), rem, l) == 0)
00199     sr.erase(0, l);
00200   return sr;
00201 }
00202 
00203 // Removes any suffix occurrence of the character string rem
00204 // from the string sr, and returns the modified string sr.
00205 vcl_string& vul_string_right_trim(vcl_string& sr, const char* rem)
00206 {
00207   int l = (int)vcl_strlen(rem);
00208   int lsr = int(sr.length());
00209   if (vcl_strncmp(sr.c_str() + lsr - l, rem, l) == 0)
00210     sr.erase(lsr - l, l);
00211   return sr;
00212 }
00213 
00214 int vul_string_atoi(vcl_string const& s)
00215 {
00216   return vcl_atoi(s.c_str());
00217 }
00218 
00219 double vul_string_atof(vcl_string const& s)
00220 {
00221   return vcl_atof(s.c_str());
00222 }
00223 
00224 
00225 //: Reads a double from a string, with k, kb, M, etc suffix.
00226 // No space is allowed between the number and the suffix.
00227 // k=10^3, kb=2^10, M=10^6, Mb=2^20, G=10^9, Gb=2^30, T=10^12, Tb=2^40
00228 // If parse fails, return 0.0;
00229 double vul_string_atof_withsuffix(vcl_string const& s)
00230 {
00231   vcl_istringstream ss(s);
00232   double d;
00233   ss >> d;
00234   if (!ss) return 0.0;
00235   if (ss.eof()) return d;
00236 
00237   char c='A';
00238   ss >> c;
00239   if (ss.eof()) return d;
00240 
00241   double e=0;
00242   switch (c)
00243   {
00244     case 'k': e=1; break;
00245     case 'M': e=2; break;
00246     case 'G': e=3; break;
00247     case 'T': e=4; break;
00248     default: return 0.0;
00249   }
00250   if (ss.eof()) return d*vcl_pow(10.0,3.0*e);
00251 
00252   c='A';
00253   ss >> c;
00254   if (ss.eof()) return d*vcl_pow(10.0,3.0*e);
00255   if (!ss || c!='i') return 0.0;
00256 
00257   ss >> c;
00258   if (!ss.eof()) return 0.0;
00259 
00260   return d*vcl_pow(2.0,10.0*e);
00261 }
00262 
00263 static bool NotSpace(char a)
00264 {
00265   return !vcl_isspace(a);
00266 }
00267 
00268 template <class IT>
00269 static bool myequals(IT b1, IT e1,
00270                      const char * b2, const char * e2)
00271 {
00272   for (;b1 != e1 && b2 != e2; ++b1, ++b2)
00273     if (vcl_toupper(*b1) != *b2) return false;
00274   return b1 == e1
00275       && b2 == e2;
00276 }
00277 
00278 bool vul_string_to_bool(const vcl_string &str)
00279 {
00280   vcl_string::const_iterator begin = vcl_find_if(str.begin(), str.end(), NotSpace);
00281   const vcl_string::const_reverse_iterator rend(begin);
00282   vcl_string::const_iterator end = vcl_find_if(str.rbegin(), rend, NotSpace).base();
00283   const char *syes = "YES";
00284   const char *strue = "TRUE";
00285   const char *s1 = "1";
00286   const char *son = "ON";
00287   return myequals(begin, end, syes, syes+3)
00288      ||  myequals(begin, end, strue, strue+4)
00289      ||  myequals(begin, end, s1, s1+1)
00290      ||  myequals(begin, end, son, son+2);
00291 }
00292 
00293 
00294 //: Convert a string to a list of ints, using the matlab index format.
00295 // e.g. "0,1,10:14,20:-2:10" results in 0,1,10,11,12,13,14,20,18,16,14,12,10
00296 // No spaces are allowed.
00297 // \return empty on error.
00298 vcl_vector<int> vul_string_to_int_list(vcl_string str)
00299 {
00300   vcl_vector<int> rv;
00301 
00302 
00303 #define REGEXP_INTEGER "\\-?[0123456789]+"
00304 
00305   vul_reg_exp range_regexp("(" REGEXP_INTEGER ")"      // int
00306                            "([:-]" REGEXP_INTEGER ")?" // :int [optional]
00307                            "([:-]" REGEXP_INTEGER ")?" // :int [optional]
00308                           );
00309 
00310 
00311   while (str.length() > 0 && range_regexp.find(str)) {
00312     // the start/end positions (ref from 0) of the
00313     //    current ',' separated token.
00314     vcl_ptrdiff_t start= range_regexp.start(0);
00315     vcl_ptrdiff_t endp = range_regexp.end(0);
00316     if (start != 0)
00317     {
00318       rv.clear();
00319       return rv;
00320     }
00321 
00322 
00323     vcl_string match1 = range_regexp.match(1);
00324     vcl_string match2 = range_regexp.match(2);
00325     vcl_string match3 = range_regexp.match(3);
00326 
00327 
00328     // Remove this match from the front of string.
00329     str.erase(0, endp);
00330     if (str.size() > 1 && str[0] == ',' ) str.erase(0, 1);
00331 
00332     bool matched2 = range_regexp.match(2).size() > 0;
00333     bool matched3 = range_regexp.match(3).size() > 0;
00334 
00335     int s = vul_string_atoi(match1);
00336     int d = 1;
00337     int e = s;
00338     if (matched3) {
00339       // "1:2:10"
00340       d = vul_string_atoi(match2.substr(1));
00341       e = vul_string_atoi(match3.substr(1));
00342     }
00343     else if (matched2)
00344       e = vul_string_atoi(match2.substr(1));
00345 
00346     if (d==0)
00347     {
00348       rv.clear();
00349       return rv;
00350     }
00351 
00352     if (e >= s)
00353     {
00354       if (d < 0) d = -d;
00355       for (int i = s; i <= e; i += d)
00356         rv.push_back(i);
00357     }
00358     else
00359     {
00360       if (d > 0) d = -d;
00361       for (int i = s; i >= e; i += d)
00362         rv.push_back(i);
00363     }
00364   }
00365 
00366   if (!str.empty())
00367     rv.clear();
00368 
00369   return rv;
00370 }
00371 
00372 
00373 //Leave verbatim in to avoid $->LaTeX munging.
00374 
00375 //: Expand any environment variables in the string.
00376 // Expands "foo$VARfoo" to "foobarfoo" when $VAR=bar. If both $VAR and $VARfoo
00377 // exist, an arbitrary choice will be made of which variable to use.
00378 // This problem can be avoided by using the syntax "foo${VAR}foo." "$(VAR)"
00379 // and "$[VAR]" can also be used.
00380 // There are no inbuilt variables like in shell scripting, and variable names
00381 // cannot contain whitespace or "$"s.
00382 // "$$" can be used to insert a literal "$" into the output.
00383 // \returns false if a matching variable could not be found.
00384 bool vul_string_expand_var(vcl_string &str)
00385 {
00386   vcl_string::size_type i = 0; // index to current char.
00387   const vcl_string::size_type npos = vcl_string::npos;
00388 
00389   // If there is a problem, carry on trying to convert rest
00390   bool success=true; //  of string, but remember failure.
00391 
00392   enum {not_in_var, start_var, in_var, in_bracket_var} state = not_in_var;
00393   vcl_string::size_type var_begin = 0;
00394 
00395   vcl_string::size_type bracket_type = npos; //index into open_brackets.
00396   const vcl_string  open_brackets("{([");
00397   const vcl_string close_brackets("})]");
00398 
00399   while (i<str.size())
00400   {
00401     switch (state)
00402     {
00403      case not_in_var: // not currently in a variable
00404       if (str[i] == '$')
00405       {
00406         state = start_var;
00407         var_begin = i;
00408       }
00409       break;
00410      case start_var: // just started a variable
00411       if (str[i] == '$')
00412       {
00413         str.erase(i,1);
00414         state=not_in_var;
00415         continue;
00416       }
00417       else if ((bracket_type = open_brackets.find_first_of(str[i])) != npos)
00418       {
00419         state=in_bracket_var;
00420         break;
00421       }
00422       else // or this is the first letter of the variable, in which case go through
00423         state=in_var;
00424      case in_var:  // in a non-bracketed variable
00425       assert(var_begin+1 < str.size());
00426       assert(i > var_begin);
00427       if (str[i] == '$')
00428       { // no dollars allowed - assume we missed last variable and this is a new one.
00429         success=false;
00430         state = start_var;
00431         var_begin = i;
00432         break;
00433       }
00434       else
00435       {
00436         const char * value= vcl_getenv(str.substr(var_begin+1, i-var_begin).c_str());
00437         if (value)
00438         {
00439           str.replace(var_begin, i+1-var_begin, value);
00440           i = var_begin + vcl_strlen(value);
00441           state=not_in_var;
00442           continue;
00443         }
00444       }
00445       break;
00446      case in_bracket_var:  // in a bracketed variable
00447       if (str[i] == close_brackets[bracket_type])
00448       {
00449         assert(var_begin+2 < str.size());
00450         assert(i > var_begin+1);
00451         state=not_in_var;
00452         if (i==var_begin+2) // empty variable name
00453         {
00454           success=false;
00455           break;
00456         }
00457         else
00458         {
00459           const char * value= vcl_getenv(str.substr(var_begin+2, i-var_begin-2).c_str());
00460           if (value)
00461           {
00462             str.replace(var_begin, i+1-var_begin, value);
00463             i = var_begin + vcl_strlen(value);
00464             continue;
00465           }
00466           else
00467             success=false;
00468         }
00469       }
00470       break;
00471      default: // do nothing (silently ignore invalid state)
00472       break;
00473     }
00474     ++i;
00475   }
00476   return success;
00477 }
00478 
00479 //: replaces instances "find_str" in "full_str" with "replace_str" a given "num_times".
00480 //  \returns true iff at least one replacement took place.
00481 bool vul_string_replace(vcl_string& full_str,
00482                         const vcl_string& find_str,
00483                         const vcl_string& replace_str,
00484                         int num_times)
00485 {
00486   bool rep=false;
00487   for (int i = 0; i<num_times; i++)
00488   {
00489     int loc = int(full_str.find( find_str,0));
00490     if (loc >= 0)
00491     {
00492       full_str.replace( loc, find_str.length(), replace_str );
00493       rep=true;
00494     }
00495     else
00496     {
00497       return rep;
00498     }
00499   }
00500   return rep;
00501 }
00502 
00503 
00504 //: Replace control chars with escaped representations.
00505 // Space and "\n" are preserved, but tabs, CR, etc are escaped.
00506 // This is not aimed and is not suitable for any particular input-validation
00507 // security problem, such as sql-injection.
00508 vcl_string vul_string_escape_ctrl_chars(const vcl_string &in)
00509 {
00510   vcl_string out;
00511 
00512   const static vcl_string special("\t\v\b\r\f\a\\");
00513   const static vcl_string special_tr("tvbrfa\\");
00514 
00515   for (vcl_string::const_iterator it=in.begin(), end=in.end(); it!=end; ++it)
00516   {
00517     if (!vcl_iscntrl(*it) || *it=='\n')
00518       out+=*it;
00519     else
00520     {
00521       vcl_string::size_type i=special.find(*it);
00522       if (i==vcl_string::npos)
00523         out+=vul_sprintf("\\x%02x",static_cast<int>(*it));
00524       else
00525       {
00526         out+='\\';
00527         out+=special_tr[i];
00528       }
00529     }
00530   }
00531   return out;
00532 }