00001 // This is core/vul/vul_reg_exp.h 00002 #ifndef vul_reg_exph 00003 #define vul_reg_exph 00004 //: 00005 // \file 00006 // \brief contains class for pattern matching with regular expressions 00007 // \author Texas Instruments Incorporated. 00008 // 00009 // \verbatim 00010 // Modifications 00011 // PDA (Manchester) 21/03/2001: Tidied up the documentation 00012 // Peter Vanroose 27/05/2001: Corrected the documentation 00013 // Peter Vanroose 07/02/2002: brief doxygen comment placed on single line 00014 // Peter Vanroose 13/06/2002: bug fix: crash in match() when startp==endp==0 00015 // Ian Scott 08/06/2003: Add protect(char) function 00016 // \endverbatim 00017 // 00018 // Original Copyright notice: 00019 // Copyright (C) 1991 Texas Instruments Incorporated. 00020 // 00021 // Permission is granted to any individual or institution to use, copy, modify, 00022 // and distribute this software, provided that this complete copyright and 00023 // permission notice is maintained, intact, in all copies and supporting 00024 // documentation. 00025 // 00026 // Texas Instruments Incorporated provides this software "as is" without 00027 // express or implied warranty. 00028 00029 #include <vcl_string.h> 00030 #include <vcl_cstddef.h> // for vcl_ptrdiff_t 00031 00032 const int vul_reg_exp_nsubexp = 10; 00033 00034 //: Pattern matching with regular expressions. 00035 // A regular expression allows a programmer to specify complex 00036 // patterns that can be searched for and matched against the 00037 // character string of a string object. In its simplest form, a 00038 // regular expression is a sequence of characters used to search for 00039 // exact character matches. However, many times the exact sequence to 00040 // be found is not known, or only a match at the beginning or end of 00041 // a string is desired. This regular expression class implements 00042 // regular expression pattern matching as is found and implemented in 00043 // many UNIX commands and utilities. 00044 // 00045 // Example: The perl code 00046 // \code 00047 // $filename =~ m"([a-z]+)\.cc"; 00048 // print $1; 00049 // \endcode 00050 // is written as follows in C++ 00051 // \code 00052 // vul_reg_exp re("([a-z]+)\\.cc"); 00053 // re.find(filename); 00054 // vcl_cout << re.match(1); 00055 // \endcode 00056 // 00057 // The regular expression class provides a convenient mechanism for 00058 // specifying and manipulating regular expressions. The regular 00059 // expression object allows specification of such patterns by using 00060 // the following regular expression metacharacters: 00061 // 00062 // - ^ Matches at beginning of a line 00063 // - $ Matches at end of a line 00064 // - . Matches any single character 00065 // - [ ] Matches any character(s) inside the brackets 00066 // - [^ ] Matches any character(s) not inside the brackets 00067 // - [ - ] Matches any character in range on either side of a dash 00068 // - * Matches preceding pattern zero or more times 00069 // - + Matches preceding pattern one or more times 00070 // - ? Matches preceding pattern at most once 00071 // - () Saves a matched expression and uses it in a later match 00072 // 00073 // Note that more than one of these metacharacters can be used in a 00074 // single regular expression in order to create complex search 00075 // patterns. For example, the pattern [^ab1-9] says to match any 00076 // character sequence that does not begin with the characters "a", 00077 // "b", or the characters "1" through "9". 00078 // 00079 class vul_reg_exp 00080 { 00081 //: anchor point of start position for n-th matching regular expression 00082 const char* startp[vul_reg_exp_nsubexp]; 00083 //: anchor point of end position for n-th matching regular expression 00084 const char* endp[vul_reg_exp_nsubexp]; 00085 //: Internal use only 00086 char regstart; 00087 //: Internal use only 00088 char reganch; 00089 //: Internal use only 00090 const char* regmust; 00091 //: Internal use only 00092 int regmlen; 00093 char* program; 00094 int progsize; 00095 const char* searchstring; 00096 public: 00097 //: Creates an empty regular expression. 00098 inline vul_reg_exp() : program(0) { clear_bufs(); } 00099 //: Creates a regular expression from string s, and compiles s. 00100 inline vul_reg_exp(char const* s) : program(0) { clear_bufs(); compile(s); } 00101 //: Copy constructor 00102 vul_reg_exp(vul_reg_exp const&); 00103 //: Frees space allocated for regular expression. 00104 inline ~vul_reg_exp() { delete[] this->program; } 00105 //: Compiles char* --> regexp 00106 void compile(char const*); 00107 //: true if regexp in char* arg 00108 bool find(char const*); 00109 //: true if regexp in char* arg 00110 bool find(vcl_string const&); 00111 //: Returns the start index of the last item found. 00112 inline vcl_ptrdiff_t start() const { return this->startp[0] - searchstring; } 00113 //: Returns the end index of the last item found. 00114 inline vcl_ptrdiff_t end() const { return this->endp[0] - searchstring; } 00115 //: Equality operator 00116 bool operator==(vul_reg_exp const&) const; 00117 //: Inequality operator 00118 inline bool operator!=(vul_reg_exp const& r) const { return !operator==(r); } 00119 //: Same regexp and state? 00120 bool deep_equal(vul_reg_exp const&) const; 00121 //: Returns true if a valid RE is compiled and ready for pattern matching. 00122 inline bool is_valid() const { return this->program != 0; } 00123 //: Invalidates regular expression. 00124 inline void set_invalid() { delete[] this->program; this->program = 0; clear_bufs(); } 00125 00126 //: Return start index of nth submatch. 00127 // start(0) is the start of the full match. 00128 inline vcl_ptrdiff_t start(long n) const { return this->startp[n] - searchstring; } 00129 //: Return end index of nth submatch. 00130 // end(0) is the end of the full match. 00131 inline vcl_ptrdiff_t end(long n) const { return this->endp[n] - searchstring; } 00132 //: Return nth submatch as a string. 00133 vcl_string match(int n) const { 00134 return this->endp[n] == this->startp[n] ? vcl_string("") : 00135 vcl_string(this->startp[n], this->endp[n] - this->startp[n]); 00136 } 00137 //: Return an expression that will match precisely c 00138 // The returned string is owned by the function, and 00139 // will be overwritten in subsequent calls. 00140 static const char * protect(char c); 00141 00142 private: 00143 //: private function to clear startp[] and endp[] 00144 void clear_bufs() { for (int n=0; n<vul_reg_exp_nsubexp; ++n) startp[n]=endp[n]=0; } 00145 }; 00146 00147 #endif // vul_reg_exph