lexer.cpp

00001 // -*- c-basic-offset: 2 -*-
00002 /*
00003  *  This file is part of the KDE libraries
00004  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
00005  *
00006  *  This library is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU Library General Public
00008  *  License as published by the Free Software Foundation; either
00009  *  version 2 of the License, or (at your option) any later version.
00010  *
00011  *  This library is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  *  Library General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU Library General Public License
00017  *  along with this library; see the file COPYING.LIB.  If not, write to
00018  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019  *  Boston, MA 02110-1301, USA.
00020  *
00021  */
00022 
00023 #ifdef HAVE_CONFIG_H
00024 #include <config.h>
00025 #endif
00026 
00027 #include <ctype.h>
00028 #include <stdlib.h>
00029 #include <stdio.h>
00030 #include <string.h>
00031 #include <assert.h>
00032 
00033 #include "value.h"
00034 #include "object.h"
00035 #include "types.h"
00036 #include "interpreter.h"
00037 #include "nodes.h"
00038 #include "lexer.h"
00039 #include "identifier.h"
00040 #include "lookup.h"
00041 #include "internal.h"
00042 
00043 // we can't specify the namespace in yacc's C output, so do it here
00044 using namespace KJS;
00045 
00046 static Lexer *currLexer = 0;
00047 
00048 #ifndef KDE_USE_FINAL
00049 #include "grammar.h"
00050 #endif
00051 
00052 #include "lexer.lut.h"
00053 
00054 extern YYLTYPE yylloc; // global bison variable holding token info
00055 
00056 // a bridge for yacc from the C world to C++
00057 int kjsyylex()
00058 {
00059   return Lexer::curr()->lex();
00060 }
00061 
00062 Lexer::Lexer()
00063   : yylineno(1),
00064     size8(128), size16(128), restrKeyword(false),
00065     eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
00066     code(0), length(0),
00067 #ifndef KJS_PURE_ECMA
00068     bol(true),
00069 #endif
00070     current(0), next1(0), next2(0), next3(0),
00071     strings(0), numStrings(0), stringsCapacity(0),
00072     identifiers(0), numIdentifiers(0), identifiersCapacity(0)
00073 {
00074   // allocate space for read buffers
00075   buffer8 = new char[size8];
00076   buffer16 = new UChar[size16];
00077   currLexer = this;
00078 }
00079 
00080 Lexer::~Lexer()
00081 {
00082   delete [] buffer8;
00083   delete [] buffer16;
00084 }
00085 
00086 Lexer *Lexer::curr()
00087 {
00088   if (!currLexer) {
00089     // create singleton instance
00090     currLexer = new Lexer();
00091   }
00092   return currLexer;
00093 }
00094 
00095 #ifdef KJS_DEBUG_MEM
00096 void Lexer::globalClear()
00097 {
00098   delete currLexer;
00099   currLexer = 0L;
00100 }
00101 #endif
00102 
00103 void Lexer::setCode(const UChar *c, unsigned int len)
00104 {
00105   yylineno = 1;
00106   restrKeyword = false;
00107   delimited = false;
00108   eatNextIdentifier = false;
00109   stackToken = -1;
00110   lastToken = -1;
00111   foundBad = false;
00112   pos = 0;
00113   code = c;
00114   length = len;
00115   skipLF = false;
00116   skipCR = false;
00117 #ifndef KJS_PURE_ECMA
00118   bol = true;
00119 #endif
00120 
00121   // read first characters
00122   current = (length > 0) ? code[0].uc : 0;
00123   next1 = (length > 1) ? code[1].uc : 0;
00124   next2 = (length > 2) ? code[2].uc : 0;
00125   next3 = (length > 3) ? code[3].uc : 0;
00126 }
00127 
00128 void Lexer::shift(unsigned int p)
00129 {
00130   while (p--) {
00131     pos++;
00132     current = next1;
00133     next1 = next2;
00134     next2 = next3;
00135     next3 = (pos + 3 < length) ? code[pos+3].uc : 0;
00136   }
00137 }
00138 
00139 // called on each new line
00140 void Lexer::nextLine()
00141 {
00142   yylineno++;
00143 #ifndef KJS_PURE_ECMA
00144   bol = true;
00145 #endif
00146 }
00147 
00148 void Lexer::setDone(State s)
00149 {
00150   state = s;
00151   done = true;
00152 }
00153 
00154 int Lexer::lex()
00155 {
00156   int token = 0;
00157   state = Start;
00158   unsigned short stringType = 0; // either single or double quotes
00159   pos8 = pos16 = 0;
00160   done = false;
00161   terminator = false;
00162   skipLF = false;
00163   skipCR = false;
00164 
00165   // did we push a token on the stack previously ?
00166   // (after an automatic semicolon insertion)
00167   if (stackToken >= 0) {
00168     setDone(Other);
00169     token = stackToken;
00170     stackToken = 0;
00171   }
00172 
00173   while (!done) {
00174     if (skipLF && current != '\n') // found \r but not \n afterwards
00175         skipLF = false;
00176     if (skipCR && current != '\r') // found \n but not \r afterwards
00177         skipCR = false;
00178     if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
00179     {
00180         skipLF = false;
00181         skipCR = false;
00182         shift(1);
00183     }
00184 
00185     bool cr = (current == '\r');
00186     bool lf = (current == '\n');
00187     if (cr)
00188       skipLF = true;
00189     else if (lf)
00190       skipCR = true;
00191     bool isLineTerminator = cr || lf;
00192 
00193     switch (state) {
00194     case Start:
00195       if (isWhiteSpace(current)) {
00196         // do nothing
00197       } else if (current == '/' && next1 == '/') {
00198         shift(1);
00199         state = InSingleLineComment;
00200       } else if (current == '/' && next1 == '*') {
00201         shift(1);
00202         state = InMultiLineComment;
00203       } else if (current == 0) {
00204         if (!terminator && !delimited) {
00205           // automatic semicolon insertion if program incomplete
00206           token = ';';
00207           stackToken = 0;
00208           setDone(Other);
00209         } else
00210           setDone(Eof);
00211       } else if (isLineTerminator) {
00212         nextLine();
00213         terminator = true;
00214         if (restrKeyword) {
00215           token = ';';
00216           setDone(Other);
00217         }
00218       } else if (current == '"' || current == '\'') {
00219         state = InString;
00220         stringType = current;
00221       } else if (isIdentLetter(current)) {
00222         record16(current);
00223         state = InIdentifier;
00224       } else if (current == '0') {
00225         record8(current);
00226         state = InNum0;
00227       } else if (isDecimalDigit(current)) {
00228         record8(current);
00229         state = InNum;
00230       } else if (current == '.' && isDecimalDigit(next1)) {
00231         record8(current);
00232         state = InDecimal;
00233 #ifndef KJS_PURE_ECMA
00234         // <!-- marks the beginning of a line comment (for www usage)
00235       } else if (current == '<' && next1 == '!' &&
00236                  next2 == '-' && next3 == '-') {
00237         shift(3);
00238         state = InSingleLineComment;
00239         // same for -->
00240       } else if (bol && current == '-' && next1 == '-' &&  next2 == '>') {
00241         shift(2);
00242         state = InSingleLineComment;
00243 #endif
00244       } else {
00245         token = matchPunctuator(current, next1, next2, next3);
00246         if (token != -1) {
00247           setDone(Other);
00248         } else {
00249           //      cerr << "encountered unknown character" << endl;
00250           setDone(Bad);
00251         }
00252       }
00253       break;
00254     case InString:
00255       if (current == stringType) {
00256         shift(1);
00257         setDone(String);
00258       } else if (current == 0 || isLineTerminator) {
00259         setDone(Bad);
00260       } else if (current == '\\') {
00261         state = InEscapeSequence;
00262       } else {
00263         record16(current);
00264       }
00265       break;
00266     // Escape Sequences inside of strings
00267     case InEscapeSequence:
00268       if (isOctalDigit(current)) {
00269         if (current >= '0' && current <= '3' &&
00270             isOctalDigit(next1) && isOctalDigit(next2)) {
00271           record16(convertOctal(current, next1, next2));
00272           shift(2);
00273           state = InString;
00274         } else if (isOctalDigit(current) && isOctalDigit(next1)) {
00275           record16(convertOctal('0', current, next1));
00276           shift(1);
00277           state = InString;
00278         } else if (isOctalDigit(current)) {
00279           record16(convertOctal('0', '0', current));
00280           state = InString;
00281         } else {
00282           setDone(Bad);
00283         }
00284       } else if (current == 'x')
00285         state = InHexEscape;
00286       else if (current == 'u')
00287         state = InUnicodeEscape;
00288       else {
00289     if (isLineTerminator)
00290       nextLine();
00291         record16(singleEscape(current));
00292         state = InString;
00293       }
00294       break;
00295     case InHexEscape:
00296       if (isHexDigit(current) && isHexDigit(next1)) {
00297         state = InString;
00298         record16(convertHex(current, next1));
00299         shift(1);
00300       } else if (current == stringType) {
00301         record16('x');
00302         shift(1);
00303         setDone(String);
00304       } else {
00305         record16('x');
00306         record16(current);
00307         state = InString;
00308       }
00309       break;
00310     case InUnicodeEscape:
00311       if (isHexDigit(current) && isHexDigit(next1) &&
00312           isHexDigit(next2) && isHexDigit(next3)) {
00313         record16(convertUnicode(current, next1, next2, next3));
00314         shift(3);
00315         state = InString;
00316       } else if (current == stringType) {
00317         record16('u');
00318         shift(1);
00319         setDone(String);
00320       } else {
00321         setDone(Bad);
00322       }
00323       break;
00324     case InSingleLineComment:
00325       if (isLineTerminator) {
00326         nextLine();
00327         terminator = true;
00328         if (restrKeyword) {
00329           token = ';';
00330           setDone(Other);
00331         } else
00332           state = Start;
00333       } else if (current == 0) {
00334         setDone(Eof);
00335       }
00336       break;
00337     case InMultiLineComment:
00338       if (current == 0) {
00339         setDone(Bad);
00340       } else if (isLineTerminator) {
00341         nextLine();
00342       } else if (current == '*' && next1 == '/') {
00343         state = Start;
00344         shift(1);
00345       }
00346       break;
00347     case InIdentifier:
00348       if (isIdentLetter(current) || isDecimalDigit(current)) {
00349         record16(current);
00350         break;
00351       }
00352       setDone(Identifier);
00353       break;
00354     case InNum0:
00355       if (current == 'x' || current == 'X') {
00356         record8(current);
00357         state = InHex;
00358       } else if (current == '.') {
00359         record8(current);
00360         state = InDecimal;
00361       } else if (current == 'e' || current == 'E') {
00362         record8(current);
00363         state = InExponentIndicator;
00364       } else if (isOctalDigit(current)) {
00365         record8(current);
00366         state = InOctal;
00367       } else if (isDecimalDigit(current)) {
00368         record8(current);
00369         state = InDecimal;
00370       } else {
00371         setDone(Number);
00372       }
00373       break;
00374     case InHex:
00375       if (isHexDigit(current)) {
00376         record8(current);
00377       } else {
00378         setDone(Hex);
00379       }
00380       break;
00381     case InOctal:
00382       if (isOctalDigit(current)) {
00383         record8(current);
00384       }
00385       else if (isDecimalDigit(current)) {
00386         record8(current);
00387         state = InDecimal;
00388       } else
00389         setDone(Octal);
00390       break;
00391     case InNum:
00392       if (isDecimalDigit(current)) {
00393         record8(current);
00394       } else if (current == '.') {
00395         record8(current);
00396         state = InDecimal;
00397       } else if (current == 'e' || current == 'E') {
00398         record8(current);
00399         state = InExponentIndicator;
00400       } else
00401         setDone(Number);
00402       break;
00403     case InDecimal:
00404       if (isDecimalDigit(current)) {
00405         record8(current);
00406       } else if (current == 'e' || current == 'E') {
00407         record8(current);
00408         state = InExponentIndicator;
00409       } else
00410         setDone(Number);
00411       break;
00412     case InExponentIndicator:
00413       if (current == '+' || current == '-') {
00414         record8(current);
00415       } else if (isDecimalDigit(current)) {
00416         record8(current);
00417         state = InExponent;
00418       } else
00419         setDone(Bad);
00420       break;
00421     case InExponent:
00422       if (isDecimalDigit(current)) {
00423         record8(current);
00424       } else
00425         setDone(Number);
00426       break;
00427     default:
00428       assert(!"Unhandled state in switch statement");
00429     }
00430 
00431     // move on to the next character
00432     if (!done)
00433       shift(1);
00434 #ifndef KJS_PURE_ECMA
00435     if (state != Start && state != InSingleLineComment)
00436       bol = false;
00437 #endif
00438   }
00439 
00440   // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
00441   if ((state == Number || state == Octal || state == Hex)
00442       && isIdentLetter(current))
00443     state = Bad;
00444 
00445   // terminate string
00446   buffer8[pos8] = '\0';
00447 
00448 #ifdef KJS_DEBUG_LEX
00449   fprintf(stderr, "line: %d ", lineNo());
00450   fprintf(stderr, "yytext (%x): ", buffer8[0]);
00451   fprintf(stderr, "%s ", buffer8);
00452 #endif
00453 
00454   long double dval = 0;
00455   if (state == Number) {
00456     dval = strtod(buffer8, 0L);
00457   } else if (state == Hex) { // scan hex numbers
00458     dval = 0;
00459     if (buffer8[0] == '0' && (buffer8[1] == 'x' || buffer8[1] == 'X')) {
00460       for (const char *p = buffer8+2; *p; p++) {
00461     if (!isHexDigit(*p)) {
00462       dval = 0;
00463       break;
00464     }
00465     dval = dval * 16 + convertHex(*p);
00466       }
00467     }
00468     state = Number;
00469   } else if (state == Octal) {   // scan octal number
00470     dval = 0;
00471     if (buffer8[0] == '0') {
00472       for (const char *p = buffer8+1; *p; p++) {
00473     if (*p < '0' || *p > '7') {
00474       dval = 0;
00475       break;
00476     }
00477     dval = dval * 8 + *p - '0';
00478       }
00479     }
00480     state = Number;
00481   }
00482 
00483 #ifdef KJS_DEBUG_LEX
00484   switch (state) {
00485   case Eof:
00486     printf("(EOF)\n");
00487     break;
00488   case Other:
00489     printf("(Other)\n");
00490     break;
00491   case Identifier:
00492     printf("(Identifier)/(Keyword)\n");
00493     break;
00494   case String:
00495     printf("(String)\n");
00496     break;
00497   case Number:
00498     printf("(Number)\n");
00499     break;
00500   default:
00501     printf("(unknown)");
00502   }
00503 #endif
00504 
00505   if (state != Identifier && eatNextIdentifier)
00506     eatNextIdentifier = false;
00507 
00508   restrKeyword = false;
00509   delimited = false;
00510   kjsyylloc.first_line = yylineno; // ???
00511   kjsyylloc.last_line = yylineno;
00512 
00513   switch (state) {
00514   case Eof:
00515     token = 0;
00516     break;
00517   case Other:
00518     if(token == '}' || token == ';') {
00519       delimited = true;
00520     }
00521     break;
00522   case Identifier:
00523     if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
00524       // Lookup for keyword failed, means this is an identifier
00525       // Apply anonymous-function hack below (eat the identifier)
00526       if (eatNextIdentifier) {
00527         eatNextIdentifier = false;
00528 #ifdef KJS_VERBOSE
00529         UString debugstr(buffer16, pos16); fprintf(stderr,"Anonymous function hack: eating identifier %s\n",debugstr.ascii());
00530 #endif
00531         token = lex();
00532         break;
00533       }
00534       /* TODO: close leak on parse error. same holds true for String */
00535       kjsyylval.ident = makeIdentifier(buffer16, pos16);
00536       token = IDENT;
00537       break;
00538     }
00539 
00540     eatNextIdentifier = false;
00541     // Hack for "f = function somename() { ... }", too hard to get into the grammar
00542     // Same for building an array with function pointers ( 'name', func1, 'name2', func2 )
00543     // There are lots of other uses, we really have to get this into the grammar
00544     if ( token == FUNCTION &&
00545          ( lastToken == '=' || lastToken == ',' || lastToken == '(' ) )
00546             eatNextIdentifier = true;
00547 
00548     if (token == CONTINUE || token == BREAK ||
00549         token == RETURN || token == THROW)
00550       restrKeyword = true;
00551     break;
00552   case String:
00553     kjsyylval.ustr = makeUString(buffer16, pos16);
00554     token = STRING;
00555     break;
00556   case Number:
00557     kjsyylval.dval = dval;
00558     token = NUMBER;
00559     break;
00560   case Bad:
00561     foundBad = true;
00562     return -1;
00563   default:
00564     assert(!"unhandled numeration value in switch");
00565     return -1;
00566   }
00567   lastToken = token;
00568   return token;
00569 }
00570 
00571 bool Lexer::isWhiteSpace(unsigned short c)
00572 {
00573   return (c == ' ' || c == '\t' ||
00574           c == 0x0b || c == 0x0c || c == 0xa0);
00575 }
00576 
00577 bool Lexer::isIdentLetter(unsigned short c)
00578 {
00579   // Allow any character in the Unicode categories
00580   // Uppercase letter (Lu), Lowercase letter (Ll),
00581   // Titlecase letter (Lt)", Modifier letter (Lm),
00582   // Other letter (Lo), or Letter number (Nl).
00583   // Also see: http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */
00584   return (c >= 'a' && c <= 'z' ||
00585           c >= 'A' && c <= 'Z' ||
00586           // A with grave - O with diaeresis
00587           c >= 0x00c0 && c <= 0x00d6 ||
00588           // O with stroke - o with diaeresis
00589           c >= 0x00d8 && c <= 0x00f6 ||
00590           // o with stroke - turned h with fishook and tail
00591           c >= 0x00f8 && c <= 0x02af ||
00592           // Greek etc. TODO: not precise
00593           c >= 0x0388 && c <= 0x1ffc ||
00594           c == '$' || c == '_');
00595   /* TODO: use complete category table */
00596 }
00597 
00598 bool Lexer::isDecimalDigit(unsigned short c)
00599 {
00600   return (c >= '0' && c <= '9');
00601 }
00602 
00603 bool Lexer::isHexDigit(unsigned short c)
00604 {
00605   return (c >= '0' && c <= '9' ||
00606           c >= 'a' && c <= 'f' ||
00607           c >= 'A' && c <= 'F');
00608 }
00609 
00610 bool Lexer::isOctalDigit(unsigned short c)
00611 {
00612   return (c >= '0' && c <= '7');
00613 }
00614 
00615 int Lexer::matchPunctuator(unsigned short c1, unsigned short c2,
00616                               unsigned short c3, unsigned short c4)
00617 {
00618   if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
00619     shift(4);
00620     return URSHIFTEQUAL;
00621   } else if (c1 == '=' && c2 == '=' && c3 == '=') {
00622     shift(3);
00623     return STREQ;
00624   } else if (c1 == '!' && c2 == '=' && c3 == '=') {
00625     shift(3);
00626     return STRNEQ;
00627    } else if (c1 == '>' && c2 == '>' && c3 == '>') {
00628     shift(3);
00629     return URSHIFT;
00630   } else if (c1 == '<' && c2 == '<' && c3 == '=') {
00631     shift(3);
00632     return LSHIFTEQUAL;
00633   } else if (c1 == '>' && c2 == '>' && c3 == '=') {
00634     shift(3);
00635     return RSHIFTEQUAL;
00636   } else if (c1 == '<' && c2 == '=') {
00637     shift(2);
00638     return LE;
00639   } else if (c1 == '>' && c2 == '=') {
00640     shift(2);
00641     return GE;
00642   } else if (c1 == '!' && c2 == '=') {
00643     shift(2);
00644     return NE;
00645   } else if (c1 == '+' && c2 == '+') {
00646     shift(2);
00647     if (terminator)
00648       return AUTOPLUSPLUS;
00649     else
00650       return PLUSPLUS;
00651   } else if (c1 == '-' && c2 == '-') {
00652     shift(2);
00653     if (terminator)
00654       return AUTOMINUSMINUS;
00655     else
00656       return MINUSMINUS;
00657   } else if (c1 == '=' && c2 == '=') {
00658     shift(2);
00659     return EQEQ;
00660   } else if (c1 == '+' && c2 == '=') {
00661     shift(2);
00662     return PLUSEQUAL;
00663   } else if (c1 == '-' && c2 == '=') {
00664     shift(2);
00665     return MINUSEQUAL;
00666   } else if (c1 == '*' && c2 == '=') {
00667     shift(2);
00668     return MULTEQUAL;
00669   } else if (c1 == '/' && c2 == '=') {
00670     shift(2);
00671     return DIVEQUAL;
00672   } else if (c1 == '&' && c2 == '=') {
00673     shift(2);
00674     return ANDEQUAL;
00675   } else if (c1 == '^' && c2 == '=') {
00676     shift(2);
00677     return XOREQUAL;
00678   } else if (c1 == '%' && c2 == '=') {
00679     shift(2);
00680     return MODEQUAL;
00681   } else if (c1 == '|' && c2 == '=') {
00682     shift(2);
00683     return OREQUAL;
00684   } else if (c1 == '<' && c2 == '<') {
00685     shift(2);
00686     return LSHIFT;
00687   } else if (c1 == '>' && c2 == '>') {
00688     shift(2);
00689     return RSHIFT;
00690   } else if (c1 == '&' && c2 == '&') {
00691     shift(2);
00692     return AND;
00693   } else if (c1 == '|' && c2 == '|') {
00694     shift(2);
00695     return OR;
00696   }
00697 
00698   switch(c1) {
00699     case '=':
00700     case '>':
00701     case '<':
00702     case ',':
00703     case '!':
00704     case '~':
00705     case '?':
00706     case ':':
00707     case '.':
00708     case '+':
00709     case '-':
00710     case '*':
00711     case '/':
00712     case '&':
00713     case '|':
00714     case '^':
00715     case '%':
00716     case '(':
00717     case ')':
00718     case '{':
00719     case '}':
00720     case '[':
00721     case ']':
00722     case ';':
00723       shift(1);
00724       return static_cast<int>(c1);
00725     default:
00726       return -1;
00727   }
00728 }
00729 
00730 unsigned short Lexer::singleEscape(unsigned short c) const
00731 {
00732   switch(c) {
00733   case 'b':
00734     return 0x08;
00735   case 't':
00736     return 0x09;
00737   case 'n':
00738     return 0x0A;
00739   case 'v':
00740     return 0x0B;
00741   case 'f':
00742     return 0x0C;
00743   case 'r':
00744     return 0x0D;
00745   case '"':
00746     return 0x22;
00747   case '\'':
00748     return 0x27;
00749   case '\\':
00750     return 0x5C;
00751   default:
00752     return c;
00753   }
00754 }
00755 
00756 unsigned short Lexer::convertOctal(unsigned short c1, unsigned short c2,
00757                                       unsigned short c3) const
00758 {
00759   return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
00760 }
00761 
00762 unsigned char Lexer::convertHex(unsigned short c)
00763 {
00764   if (c >= '0' && c <= '9')
00765     return (c - '0');
00766   else if (c >= 'a' && c <= 'f')
00767     return (c - 'a' + 10);
00768   else
00769     return (c - 'A' + 10);
00770 }
00771 
00772 unsigned char Lexer::convertHex(unsigned short c1, unsigned short c2)
00773 {
00774   return ((convertHex(c1) << 4) + convertHex(c2));
00775 }
00776 
00777 UChar Lexer::convertUnicode(unsigned short c1, unsigned short c2,
00778                                      unsigned short c3, unsigned short c4)
00779 {
00780   return UChar((convertHex(c1) << 4) + convertHex(c2),
00781                (convertHex(c3) << 4) + convertHex(c4));
00782 }
00783 
00784 void Lexer::record8(unsigned short c)
00785 {
00786   assert(c <= 0xff);
00787 
00788   // enlarge buffer if full
00789   if (pos8 >= size8 - 1) {
00790     char *tmp = new char[2 * size8];
00791     memcpy(tmp, buffer8, size8 * sizeof(char));
00792     delete [] buffer8;
00793     buffer8 = tmp;
00794     size8 *= 2;
00795   }
00796 
00797   buffer8[pos8++] = (char) c;
00798 }
00799 
00800 void Lexer::record16(UChar c)
00801 {
00802   // enlarge buffer if full
00803   if (pos16 >= size16 - 1) {
00804     UChar *tmp = new UChar[2 * size16];
00805     memcpy(tmp, buffer16, size16 * sizeof(UChar));
00806     delete [] buffer16;
00807     buffer16 = tmp;
00808     size16 *= 2;
00809   }
00810 
00811   buffer16[pos16++] = c;
00812 }
00813 
00814 bool Lexer::scanRegExp()
00815 {
00816   pos16 = 0;
00817   bool lastWasEscape = false;
00818   bool inBrackets = false;
00819 
00820   while (1) {
00821     if (current == '\r' || current == '\n' || current == 0)
00822       return false;
00823     else if (current != '/' || lastWasEscape == true || inBrackets == true)
00824     {
00825         // keep track of '[' and ']'
00826         if ( !lastWasEscape ) {
00827           if ( current == '[' && !inBrackets )
00828             inBrackets = true;
00829           if ( current == ']' && inBrackets )
00830             inBrackets = false;
00831         }
00832         record16(current);
00833         lastWasEscape =
00834             !lastWasEscape && (current == '\\');
00835     }
00836     else { // end of regexp
00837       pattern = UString(buffer16, pos16);
00838       pos16 = 0;
00839       shift(1);
00840       break;
00841     }
00842     shift(1);
00843   }
00844 
00845   while (isIdentLetter(current)) {
00846     record16(current);
00847     shift(1);
00848   }
00849   flags = UString(buffer16, pos16);
00850 
00851   return true;
00852 }
00853 
00854 
00855 void Lexer::doneParsing()
00856 {
00857   for (unsigned i = 0; i < numIdentifiers; i++) {
00858     delete identifiers[i];
00859   }
00860   free(identifiers);
00861   identifiers = 0;
00862   numIdentifiers = 0;
00863   identifiersCapacity = 0;
00864 
00865   for (unsigned i = 0; i < numStrings; i++) {
00866     delete strings[i];
00867   }
00868   free(strings);
00869   strings = 0;
00870   numStrings = 0;
00871   stringsCapacity = 0;
00872 }
00873 
00874 const int initialCapacity = 64;
00875 const int growthFactor = 2;
00876 
00877 Identifier *Lexer::makeIdentifier(UChar *buffer, unsigned int pos)
00878 {
00879   if (numIdentifiers == identifiersCapacity) {
00880     identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
00881     identifiers = (KJS::Identifier **)realloc(identifiers, sizeof(KJS::Identifier *) * identifiersCapacity);
00882   }
00883 
00884   KJS::Identifier *identifier = new KJS::Identifier(buffer, pos);
00885   identifiers[numIdentifiers++] = identifier;
00886   return identifier;
00887 }
00888 
00889 UString *Lexer::makeUString(UChar *buffer, unsigned int pos)
00890 {
00891   if (numStrings == stringsCapacity) {
00892     stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
00893     strings = (UString **)realloc(strings, sizeof(UString *) * stringsCapacity);
00894   }
00895 
00896   UString *string = new UString(buffer, pos);
00897   strings[numStrings++] = string;
00898   return string;
00899 }
KDE Home | KDE Accessibility Home | Description of Access Keys