00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "regexp.h"
00023
00024 #include "lexer.h"
00025 #include <stdio.h>
00026 #include <stdlib.h>
00027 #include <string.h>
00028
00029 using namespace KJS;
00030
00031 RegExp::RegExp(const UString &p, int f)
00032 : pat(p), flgs(f), m_notEmpty(false), valid(true)
00033 {
00034 nrSubPatterns = 0;
00035
00036
00037
00038
00039 UString intern;
00040 if (p.find('\\') >= 0) {
00041 bool escape = false;
00042 for (int i = 0; i < p.size(); ++i) {
00043 UChar c = p[i];
00044 if (escape) {
00045 escape = false;
00046
00047 if (c == 'u' && i + 4 < p.size()) {
00048 int c0 = p[i+1].unicode();
00049 int c1 = p[i+2].unicode();
00050 int c2 = p[i+3].unicode();
00051 int c3 = p[i+4].unicode();
00052 if (Lexer::isHexDigit(c0) && Lexer::isHexDigit(c1) &&
00053 Lexer::isHexDigit(c2) && Lexer::isHexDigit(c3)) {
00054 c = Lexer::convertUnicode(c0, c1, c2, c3);
00055 intern += UString(&c, 1);
00056 i += 4;
00057 continue;
00058 }
00059 }
00060 intern += UString('\\');
00061 intern += UString(&c, 1);
00062 } else {
00063 if (c == '\\')
00064 escape = true;
00065 else
00066 intern += UString(&c, 1);
00067 }
00068 }
00069 } else {
00070 intern = p;
00071 }
00072
00073 #ifdef HAVE_PCREPOSIX
00074 int pcreflags = 0;
00075 const char *perrormsg;
00076 int errorOffset;
00077
00078 if (flgs & IgnoreCase)
00079 pcreflags |= PCRE_CASELESS;
00080
00081 if (flgs & Multiline)
00082 pcreflags |= PCRE_MULTILINE;
00083
00084 pcregex = pcre_compile(intern.ascii(), pcreflags,
00085 &perrormsg, &errorOffset, NULL);
00086 if (!pcregex) {
00087 #ifndef NDEBUG
00088 fprintf(stderr, "KJS: pcre_compile() failed with '%s'\n", perrormsg);
00089 #endif
00090 valid = false;
00091 return;
00092 }
00093
00094 #ifdef PCRE_INFO_CAPTURECOUNT
00095
00096 int rc = pcre_fullinfo( pcregex, NULL, PCRE_INFO_CAPTURECOUNT, &nrSubPatterns);
00097 if (rc != 0)
00098 #endif
00099 nrSubPatterns = 0;
00100
00101 #else
00102
00103 int regflags = 0;
00104 #ifdef REG_EXTENDED
00105 regflags |= REG_EXTENDED;
00106 #endif
00107 #ifdef REG_ICASE
00108 if ( f & IgnoreCase )
00109 regflags |= REG_ICASE;
00110 #endif
00111
00112
00113
00114
00115
00116
00117 int errorCode = regcomp(&preg, intern.ascii(), regflags);
00118 if (errorCode != 0) {
00119 #ifndef NDEBUG
00120 char errorMessage[80];
00121 regerror(errorCode, &preg, errorMessage, sizeof errorMessage);
00122 fprintf(stderr, "KJS: regcomp failed with '%s'", errorMessage);
00123 #endif
00124 valid = false;
00125 }
00126 #endif
00127 }
00128
00129 RegExp::~RegExp()
00130 {
00131 #ifdef HAVE_PCREPOSIX
00132 if (pcregex)
00133 pcre_free(pcregex);
00134 #else
00135
00136 regfree(&preg);
00137 #endif
00138 }
00139
00140 UString RegExp::match(const UString &s, int i, int *pos, int **ovector)
00141 {
00142 if (i < 0)
00143 i = 0;
00144 if (ovector)
00145 *ovector = 0L;
00146 int dummyPos;
00147 if (!pos)
00148 pos = &dummyPos;
00149 *pos = -1;
00150 if (i > s.size() || s.isNull())
00151 return UString::null;
00152
00153 #ifdef HAVE_PCREPOSIX
00154 CString buffer(s.cstring());
00155 int bufferSize = buffer.size();
00156 int ovecsize = (nrSubPatterns+1)*3;
00157 if (ovector) *ovector = new int[ovecsize];
00158 if (!pcregex)
00159 return UString::null;
00160
00161 if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i,
00162 m_notEmpty ? (PCRE_NOTEMPTY | PCRE_ANCHORED) : 0,
00163 ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH)
00164 {
00165
00166 if ((flgs & Global) && m_notEmpty && ovector)
00167 {
00168
00169
00170
00171 #ifdef KJS_VERBOSE
00172 fprintf(stderr, "No match after m_notEmpty. +1 and keep going.\n");
00173 #endif
00174 m_notEmpty = 0;
00175 if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i+1, 0,
00176 ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH)
00177 return UString::null;
00178 }
00179 else
00180 return UString::null;
00181 }
00182
00183
00184
00185 if (!ovector)
00186 return UString::null;
00187 #else
00188 const uint maxMatch = 10;
00189 regmatch_t rmatch[maxMatch];
00190
00191 char *str = strdup(s.ascii());
00192 if (regexec(&preg, str + i, maxMatch, rmatch, 0)) {
00193 free(str);
00194 return UString::null;
00195 }
00196 free(str);
00197
00198 if (!ovector) {
00199 *pos = rmatch[0].rm_so + i;
00200 return s.substr(rmatch[0].rm_so + i, rmatch[0].rm_eo - rmatch[0].rm_so);
00201 }
00202
00203
00204 nrSubPatterns = 0;
00205 for (uint j = 0; j < maxMatch && rmatch[j].rm_so >= 0 ; j++) {
00206 nrSubPatterns++;
00207
00208
00209 if (m_notEmpty && rmatch[j].rm_so == rmatch[j].rm_eo)
00210 return UString::null;
00211 }
00212
00213 if (nrSubPatterns == 0) nrSubPatterns = 1;
00214
00215 int ovecsize = (nrSubPatterns)*3;
00216 *ovector = new int[ovecsize];
00217 for (uint j = 0; j < nrSubPatterns; j++) {
00218 (*ovector)[2*j] = rmatch[j].rm_so + i;
00219 (*ovector)[2*j+1] = rmatch[j].rm_eo + i;
00220 }
00221 #endif
00222
00223 *pos = (*ovector)[0];
00224 if ( *pos == (*ovector)[1] && (flgs & Global) )
00225 {
00226
00227 m_notEmpty=true;
00228 }
00229 return s.substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]);
00230 }
00231
00232 #if 0 // unused
00233 bool RegExp::test(const UString &s, int)
00234 {
00235 #ifdef HAVE_PCREPOSIX
00236 int ovector[300];
00237 CString buffer(s.cstring());
00238
00239 if (s.isNull() ||
00240 pcre_exec(pcregex, NULL, buffer.c_str(), buffer.size(), 0,
00241 0, ovector, 300) == PCRE_ERROR_NOMATCH)
00242 return false;
00243 else
00244 return true;
00245
00246 #else
00247
00248 char *str = strdup(s.ascii());
00249 int r = regexec(&preg, str, 0, 0, 0);
00250 free(str);
00251
00252 return r == 0;
00253 #endif
00254 }
00255 #endif