regexp_object.cpp

00001 // -*- c-basic-offset: 2 -*-
00002 /*
00003  *  This file is part of the KDE libraries
00004  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
00005  *  Copyright (C) 2003 Apple Computer, Inc.
00006  *
00007  *  This library is free software; you can redistribute it and/or
00008  *  modify it under the terms of the GNU Lesser General Public
00009  *  License as published by the Free Software Foundation; either
00010  *  version 2 of the License, or (at your option) any later version.
00011  *
00012  *  This library is distributed in the hope that it will be useful,
00013  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  *  Lesser General Public License for more details.
00016  *
00017  *  You should have received a copy of the GNU Lesser General Public
00018  *  License along with this library; if not, write to the Free Software
00019  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00020  *
00021  */
00022 
00023 #include <stdio.h>
00024 
00025 #include "value.h"
00026 #include "object.h"
00027 #include "types.h"
00028 #include "interpreter.h"
00029 #include "operations.h"
00030 #include "internal.h"
00031 #include "regexp.h"
00032 #include "regexp_object.h"
00033 #include "error_object.h"
00034 #include "lookup.h"
00035 
00036 using namespace KJS;
00037 
00038 // ------------------------------ RegExpPrototypeImp ---------------------------
00039 
00040 // ECMA 15.9.4
00041 
00042 const ClassInfo RegExpPrototypeImp::info = {"RegExp", 0, 0, 0};
00043 
00044 RegExpPrototypeImp::RegExpPrototypeImp(ExecState *exec,
00045                                        ObjectPrototypeImp *objProto,
00046                                        FunctionPrototypeImp *funcProto)
00047   : ObjectImp(objProto)
00048 {
00049   Value protect(this);
00050   setInternalValue(String(""));
00051 
00052   // The constructor will be added later in RegExpObject's constructor (?)
00053 
00054   static const Identifier execPropertyName("exec");
00055   putDirect(execPropertyName,
00056         new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::Exec,     0, execPropertyName), DontEnum);
00057   static const Identifier testPropertyName("test");
00058   putDirect(testPropertyName,
00059         new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::Test,     0, testPropertyName), DontEnum);
00060   putDirect(toStringPropertyName,
00061         new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::ToString, 0, toStringPropertyName), DontEnum);
00062   static const Identifier compilePropertyName("compile");
00063   putDirect(compilePropertyName,
00064             new RegExpProtoFuncImp(exec,funcProto,RegExpProtoFuncImp::Compile,  1, compilePropertyName), DontEnum);
00065 }
00066 
00067 // ------------------------------ RegExpProtoFuncImp ---------------------------
00068 
00069 RegExpProtoFuncImp::RegExpProtoFuncImp(ExecState * /*exec*/, FunctionPrototypeImp *funcProto,
00070                                        int i, int len, const Identifier &_ident)
00071   : InternalFunctionImp(funcProto), id(i)
00072 {
00073   Value protect(this);
00074   putDirect(lengthPropertyName, len, DontDelete|ReadOnly|DontEnum);
00075   ident = _ident;
00076 }
00077 
00078 bool RegExpProtoFuncImp::implementsCall() const
00079 {
00080   return true;
00081 }
00082 
00083 Value RegExpProtoFuncImp::call(ExecState *exec, Object &thisObj, const List &args)
00084 {
00085   if (!thisObj.inherits(&RegExpImp::info)) {
00086     if (thisObj.inherits(&RegExpPrototypeImp::info)) {
00087       switch (id) {
00088         case ToString: return String("//"); // FireFox returns /(?:)/
00089       }
00090     }
00091     Object err = Error::create(exec,TypeError);
00092     exec->setException(err);
00093     return err;
00094   }
00095 
00096   RegExpImp *reimp = static_cast<RegExpImp*>(thisObj.imp());
00097   RegExp *re = reimp->regExp();
00098   String s;
00099   UString str;
00100   switch (id) {
00101   case Exec:      // 15.10.6.2
00102   case Test:
00103   {
00104     s = args[0].toString(exec);
00105     int length = s.value().size();
00106 
00107     // Get values from the last time (in case of /g)
00108     Value lastIndex = thisObj.get(exec,"lastIndex");
00109     int i = lastIndex.isValid() ? lastIndex.toInt32(exec) : 0;
00110     bool globalFlag = thisObj.get(exec,"global").toBoolean(exec);
00111     if (!globalFlag)
00112       i = 0;
00113     if (i < 0 || i > length) {
00114       thisObj.put(exec,"lastIndex", Number(0), DontDelete | DontEnum);
00115       if (id == Test)
00116         return Boolean(false);
00117       else
00118         return Null();
00119     }
00120     RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp().imp());
00121     int **ovector = regExpObj->registerRegexp( re, s.value() );
00122 
00123     str = re->match(s.value(), i, 0L, ovector);
00124     regExpObj->setSubPatterns(re->subPatterns());
00125 
00126     if (id == Test)
00127       return Boolean(!str.isNull());
00128 
00129     if (str.isNull()) // no match
00130     {
00131       if (globalFlag)
00132         thisObj.put(exec,"lastIndex",Number(0), DontDelete | DontEnum);
00133       return Null();
00134     }
00135     else // success
00136     {
00137       if (globalFlag)
00138         thisObj.put(exec,"lastIndex",Number( (*ovector)[1] ), DontDelete | DontEnum);
00139       return regExpObj->arrayOfMatches(exec,str);
00140     }
00141   }
00142   break;
00143   case ToString:
00144     s = thisObj.get(exec,"source").toString(exec);
00145     str = "/";
00146     str += s.value();
00147     str += "/";
00148     if (thisObj.get(exec,"global").toBoolean(exec)) {
00149       str += "g";
00150     }
00151     if (thisObj.get(exec,"ignoreCase").toBoolean(exec)) {
00152       str += "i";
00153     }
00154     if (thisObj.get(exec,"multiline").toBoolean(exec)) {
00155       str += "m";
00156     }
00157     return String(str);
00158   case Compile: {
00159       RegExp* newEngine = RegExpObjectImp::makeEngine(exec, args[0].toString(exec), args[1]);
00160       if (!newEngine)
00161         return exec->exception();
00162       reimp->setRegExp(newEngine);
00163       return Value(reimp);
00164     }
00165   }
00166   
00167 
00168   return Undefined();
00169 }
00170 
00171 // ------------------------------ RegExpImp ------------------------------------
00172 
00173 const ClassInfo RegExpImp::info = {"RegExp", 0, 0, 0};
00174 
00175 RegExpImp::RegExpImp(RegExpPrototypeImp *regexpProto)
00176   : ObjectImp(regexpProto), reg(0L)
00177 {
00178 }
00179 
00180 RegExpImp::~RegExpImp()
00181 {
00182   delete reg;
00183 }
00184 
00185 void RegExpImp::setRegExp(RegExp *r)
00186 {
00187   delete reg;
00188   reg = r;
00189 
00190   Object protect(this);//Protect self from GC (we are allocating a StringImp, and may be new)
00191   putDirect("global", (r->flags() & RegExp::Global) ? BooleanImp::staticTrue : BooleanImp::staticFalse, 
00192             DontDelete | ReadOnly | DontEnum);
00193   putDirect("ignoreCase", (r->flags() & RegExp::IgnoreCase) ? BooleanImp::staticTrue : BooleanImp::staticFalse, 
00194             DontDelete | ReadOnly | DontEnum);
00195   putDirect("multiline", (r->flags() & RegExp::Multiline) ? BooleanImp::staticTrue : BooleanImp::staticFalse, 
00196             DontDelete | ReadOnly | DontEnum);
00197 
00198   putDirect("source", new StringImp(r->pattern()), DontDelete | ReadOnly | DontEnum);
00199   putDirect("lastIndex", NumberImp::zero(), DontDelete | DontEnum);
00200 }
00201 
00202 // ------------------------------ RegExpObjectImp ------------------------------
00203 
00204 RegExpObjectImp::RegExpObjectImp(ExecState * /*exec*/,
00205                                  FunctionPrototypeImp *funcProto,
00206                                  RegExpPrototypeImp *regProto)
00207 
00208   : InternalFunctionImp(funcProto), lastOvector(0L), lastNrSubPatterns(0)
00209 {
00210   Value protect(this);
00211   // ECMA 15.10.5.1 RegExp.prototype
00212   putDirect(prototypePropertyName, regProto, DontEnum|DontDelete|ReadOnly);
00213 
00214   // no. of arguments for constructor
00215   putDirect(lengthPropertyName, NumberImp::two(), ReadOnly|DontDelete|DontEnum);
00216 }
00217 
00218 RegExpObjectImp::~RegExpObjectImp()
00219 {
00220   delete [] lastOvector;
00221 }
00222 
00223 int **RegExpObjectImp::registerRegexp( const RegExp* re, const UString& s )
00224 {
00225   lastString = s;
00226   delete [] lastOvector;
00227   lastOvector = 0;
00228   lastNrSubPatterns = re->subPatterns();
00229   return &lastOvector;
00230 }
00231 
00232 Object RegExpObjectImp::arrayOfMatches(ExecState *exec, const UString &result) const
00233 {
00234   List list;
00235   // The returned array contains 'result' as first item, followed by the list of matches
00236   list.append(String(result));
00237   if ( lastOvector )
00238     for ( unsigned int i = 1 ; i < lastNrSubPatterns + 1 ; ++i )
00239     {
00240       UString substring = lastString.substr( lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i] );
00241       list.append(String(substring));
00242     }
00243   Object arr = exec->lexicalInterpreter()->builtinArray().construct(exec, list);
00244   arr.put(exec, "index", Number(lastOvector[0]));
00245   arr.put(exec, "input", String(lastString));
00246   return arr;
00247 }
00248 
00249 Value RegExpObjectImp::get(ExecState *exec, const Identifier &p) const
00250 {
00251   UString s = p.ustring();
00252   if (s[0] == '$' && lastOvector)
00253   {
00254     bool ok;
00255     unsigned long i = s.substr(1).toULong(&ok);
00256     if (ok)
00257     {
00258       if (i < lastNrSubPatterns + 1)
00259       {
00260         UString substring = lastString.substr( lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i] );
00261         return String(substring);
00262       }
00263       return String("");
00264     }
00265   }
00266   return InternalFunctionImp::get(exec, p);
00267 }
00268 
00269 bool RegExpObjectImp::implementsConstruct() const
00270 {
00271   return true;
00272 }
00273 
00274 RegExp* RegExpObjectImp::makeEngine(ExecState *exec, const UString &p, const Value &flagsInput)
00275 {
00276   UString flags = flagsInput.type() == UndefinedType ? UString("") : flagsInput.toString(exec);
00277 
00278   // Check for validity of flags
00279   for (int pos = 0; pos < flags.size(); ++pos) {
00280     switch (flags[pos].unicode()) {
00281     case 'g':
00282     case 'i':
00283     case 'm':
00284       break;
00285     default: {
00286         Object err = Error::create(exec, SyntaxError,
00287                     "Invalid regular expression flags");
00288         exec->setException(err);
00289         return 0;
00290       }
00291     }
00292   }
00293 
00294   bool global = (flags.find("g") >= 0);
00295   bool ignoreCase = (flags.find("i") >= 0);
00296   bool multiline = (flags.find("m") >= 0);
00297 
00298   int reflags = RegExp::None;
00299   if (global)
00300       reflags |= RegExp::Global;
00301   if (ignoreCase)
00302       reflags |= RegExp::IgnoreCase;
00303   if (multiline)
00304       reflags |= RegExp::Multiline;
00305 
00306   RegExp *re = new RegExp(p, reflags);
00307   if (!re->isValid()) {
00308     Object err = Error::create(exec, SyntaxError,
00309                                "Invalid regular expression");
00310     exec->setException(err);
00311     delete re;
00312     return 0;
00313   }
00314   return re;
00315 }
00316 
00317 // ECMA 15.10.4
00318 Object RegExpObjectImp::construct(ExecState *exec, const List &args)
00319 {
00320   UString p;
00321   if (args.isEmpty()) {
00322       p = "";
00323   } else {
00324     Value a0 = args[0];
00325     if (a0.isA(ObjectType) && a0.toObject(exec).inherits(&RegExpImp::info)) {
00326       // It's a regexp. Check that no flags were passed.
00327       if (args.size() > 1 && args[1].type() != UndefinedType) {
00328           Object err = Error::create(exec,TypeError);
00329           exec->setException(err);
00330           return err;
00331       }
00332       RegExpImp *rimp = static_cast<RegExpImp*>(Object::dynamicCast(a0).imp());
00333       p = rimp->regExp()->pattern();
00334     } else {
00335       p = a0.toString(exec);
00336     }
00337   }
00338 
00339   RegExp* re = makeEngine(exec, p, args[1]);
00340   if (!re)
00341     return exec->exception().toObject(exec);
00342 
00343   RegExpPrototypeImp *proto = static_cast<RegExpPrototypeImp*>(exec->lexicalInterpreter()->builtinRegExpPrototype().imp());
00344   RegExpImp *dat = new RegExpImp(proto);
00345   Object obj(dat); // protect from GC
00346   dat->setRegExp(re);
00347 
00348   return obj;
00349 }
00350 
00351 bool RegExpObjectImp::implementsCall() const
00352 {
00353   return true;
00354 }
00355 
00356 // ECMA 15.10.3
00357 Value RegExpObjectImp::call(ExecState *exec, Object &/*thisObj*/,
00358                 const List &args)
00359 {
00360   // TODO: handle RegExp argument case (15.10.3.1)
00361 
00362   return construct(exec, args);
00363 }
KDE Home | KDE Accessibility Home | Description of Access Keys