lexer.h

00001 // -*- c-basic-offset: 2 -*-
00002 /*
00003  *  This file is part of the KDE libraries
00004  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
00005  *
00006  *  This library is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU Library General Public
00008  *  License as published by the Free Software Foundation; either
00009  *  version 2 of the License, or (at your option) any later version.
00010  *
00011  *  This library is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  *  Library General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU Library General Public License
00017  *  along with this library; see the file COPYING.LIB.  If not, write to
00018  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019  *  Boston, MA 02110-1301, USA.
00020  *
00021  */
00022 
00023 #ifndef _KJSLEXER_H_
00024 #define _KJSLEXER_H_
00025 
00026 #include "ustring.h"
00027 
00028 
00029 namespace KJS {
00030 
00031   class Identifier;
00032 
00033   class RegExp;
00034 
00035   class Lexer {
00036   public:
00037     Lexer();
00038     ~Lexer();
00039     static Lexer *curr();
00040 
00041     void setCode(const UChar *c, unsigned int len);
00042     int lex();
00043 
00044     int lineNo() const { return yylineno + 1; }
00045 
00046     bool prevTerminator() const { return terminator; }
00047 
00048     enum State { Start,
00049                  Identifier,
00050                  InIdentifier,
00051                  InSingleLineComment,
00052                  InMultiLineComment,
00053                  InNum,
00054                  InNum0,
00055                  InHex,
00056                  InOctal,
00057                  InDecimal,
00058                  InExponentIndicator,
00059                  InExponent,
00060                  Hex,
00061                  Octal,
00062                  Number,
00063                  String,
00064                  Eof,
00065                  InString,
00066                  InEscapeSequence,
00067                  InHexEscape,
00068                  InUnicodeEscape,
00069                  Other,
00070                  Bad };
00071 
00072     bool scanRegExp();
00073     UString pattern, flags;
00074     bool hadError() const { return foundBad; }
00075 
00076     static bool isWhiteSpace(unsigned short c);
00077     static bool isIdentLetter(unsigned short c);
00078     static bool isDecimalDigit(unsigned short c);
00079     static bool isHexDigit(unsigned short c);
00080     static bool isOctalDigit(unsigned short c);
00081 
00082   private:
00083     int yylineno;
00084     bool done;
00085     char *buffer8;
00086     UChar *buffer16;
00087     unsigned int size8, size16;
00088     unsigned int pos8, pos16;
00089     bool terminator;
00090     bool restrKeyword;
00091     // encountered delimiter like "'" and "}" on last run
00092     bool delimited;
00093     bool skipLF;
00094     bool skipCR;
00095     bool eatNextIdentifier;
00096     int stackToken;
00097     int lastToken;
00098     bool foundBad;
00099 
00100     State state;
00101     void setDone(State s);
00102     unsigned int pos;
00103     void shift(unsigned int p);
00104     void nextLine();
00105     int lookupKeyword(const char *);
00106 
00107     int matchPunctuator(unsigned short c1, unsigned short c2,
00108                         unsigned short c3, unsigned short c4);
00109     unsigned short singleEscape(unsigned short c) const;
00110     unsigned short convertOctal(unsigned short c1, unsigned short c2,
00111                                 unsigned short c3) const;
00112   public:
00113     static unsigned char convertHex(unsigned short c1);
00114     static unsigned char convertHex(unsigned short c1, unsigned short c2);
00115     static UChar convertUnicode(unsigned short c1, unsigned short c2,
00116                                 unsigned short c3, unsigned short c4);
00117 
00118 #ifdef KJS_DEBUG_MEM
00119 
00122     static void globalClear();
00123 #endif
00124 
00125     void doneParsing();
00126 
00127   private:
00128 
00129     void record8(unsigned short c);
00130     void record16(UChar c);
00131 
00132     KJS::Identifier *makeIdentifier(UChar *buffer, unsigned int pos);
00133     UString *makeUString(UChar *buffer, unsigned int pos);
00134 
00135     const UChar *code;
00136     unsigned int length;
00137     int yycolumn;
00138 #ifndef KJS_PURE_ECMA
00139     int bol;     // begin of line
00140 #endif
00141 
00142     // current and following unicode characters
00143     unsigned short current, next1, next2, next3;
00144 
00145     UString **strings;
00146     unsigned int numStrings;
00147     unsigned int stringsCapacity;
00148 
00149     KJS::Identifier **identifiers;
00150     unsigned int numIdentifiers;
00151     unsigned int identifiersCapacity;
00152 
00153     // for future extensions
00154     class LexerPrivate;
00155     LexerPrivate *priv;
00156   };
00157 
00158 } // namespace
00159 
00160 #endif
KDE Home | KDE Accessibility Home | Description of Access Keys