From b9aea3fa46f675899e152caf4502e1ccdf9df7b0 Mon Sep 17 00:00:00 2001 From: megacz Date: Fri, 30 Jan 2004 06:59:06 +0000 Subject: [PATCH] 2003/04/23 05:22:31 darcs-hash:20040130065906-2ba56-cd28fed76c13d3d196b61a8891470a37d30e41c9.gz --- src/org/xwt/js/Lexer.java | 491 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 491 insertions(+) create mode 100644 src/org/xwt/js/Lexer.java diff --git a/src/org/xwt/js/Lexer.java b/src/org/xwt/js/Lexer.java new file mode 100644 index 0000000..123491c --- /dev/null +++ b/src/org/xwt/js/Lexer.java @@ -0,0 +1,491 @@ +// This file was derived from org.mozilla.javascript.TokenStream; it +// is covered by the NPL 1.1. + +/** + * The contents of this file are subject to the Netscape Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/NPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Initial Developer of the Original Code is Netscape + * Communications Corporation. + * + * Contributor(s): Roger Lawrence, Mike McCabe + */ + +package org.xwt.js; +import java.io.*; + +class Lexer { + + SmartReader in; + int op; + Number number; + String string; + public Lexer(Reader r) { in = new SmartReader(r); } + + // Token Constants ////////////////////////////////////////////////////////// + + public final static int + EOL = 1, // end of line + RETURN = 5, + GOTO = 6, + BITOR = 11, + BITXOR = 12, + BITAND = 13, + EQ = 14, + NE = 15, + LT = 16, + LE = 17, + GT = 18, + GE = 19, + LSH = 20, + RSH = 21, + URSH = 22, + ADD = 23, + SUB = 24, + MUL = 25, + DIV = 26, + MOD = 27, + BITNOT = 28, + DELPROP = 31, + TYPEOF = 32, + NAME = 44, + NUMBER = 45, + STRING = 46, + NULL = 49, + THIS = 50, + FALSE = 51, + TRUE = 52, + SHEQ = 53, // shallow equality (===) + SHNE = 54, // shallow inequality (!==) + THROW = 62, + IN = 63, + INSTANCEOF = 64, + TRY = 75, + SEMI = 89, // semicolon + LB = 90, // left bracket + RB = 91, // right bracket + LC = 92, // left curly brace + RC = 93, // right curly brace + LP = 94, // left paren + RP = 95, // right paren + COMMA = 96, // comma operator + ASSIGN = 97, // assignment ops (= += -= etc.) + HOOK = 98, // conditional (?:) + COLON = 99, // colon + OR = 100, // logical or (||) + AND = 101, // logical and (&&) + EQOP = 102, // equality ops (== !=) + RELOP = 103, // relational ops (< <= > >=) + SHOP = 104, // shift ops (<< >> >>>) + UNARYOP = 105, // unary prefix operator + INC = 106, // increment (++) + DEC = 107, // decrement (--) + DOT = 108, // member operator (.) + PRIMARY = 109, // true, false, null, this + FUNCTION = 110, // function keyword + + IF = 113, // if keyword + ELSE = 114, // else keyword + SWITCH = 115, // switch keyword + CASE = 116, // case keyword + DEFAULT = 117, // default keyword + WHILE = 118, // while keyword + DO = 119, // do keyword + FOR = 120, // for keyword + BREAK = 121, // break keyword + CONTINUE = 122, // continue keyword + VAR = 123, // var keyword + WITH = 124, // with keyword + CATCH = 125, // catch keyword + FINALLY = 126, // finally keyword + RESERVED = 127, // reserved keywords + NOP = 128, // NOP + VOID = 132, // void keyword + ASSERT = 150; // assert keyword + + + // Predicates /////////////////////////////////////////////////////////////////////// + + protected static boolean isJSIdentifier(String s) { + int length = s.length(); + if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) return false; + for (int i=1; i= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); } + private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); } + private static boolean isLineBreak(int c) { return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); } + private static int xDigitToInt(int c) { + if ('0' <= c && c <= '9') return c - '0'; + if ('a' <= c && c <= 'f') return c - ('a' - 10); + if ('A' <= c && c <= 'F') return c - ('A' - 10); + return -1; + } + public static boolean isWhiteSpace(int c) { + if (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0') return true; + return Character.getType((char)c) == Character.SPACE_SEPARATOR; + } + + + // Token Subtype Handlers ///////////////////////////////////////////////////////// + + private int getKeyword(String name) throws IOException { + final int + Id_break = BREAK, + Id_case = CASE, + Id_continue = CONTINUE, + Id_default = DEFAULT, + Id_delete = DELPROP, + Id_do = DO, + Id_else = ELSE, + Id_false = PRIMARY | (FALSE << 8), + Id_for = FOR, + Id_function = FUNCTION, + Id_if = IF, + Id_in = RELOP | (IN << 8), + Id_null = PRIMARY | (NULL << 8), + Id_return = RETURN, + Id_switch = SWITCH, + Id_this = PRIMARY | (THIS << 8), + Id_true = PRIMARY | (TRUE << 8), + Id_typeof = UNARYOP | (TYPEOF << 8), + Id_var = VAR, + Id_void = UNARYOP | (VOID << 8), + Id_while = WHILE, + Id_with = WITH, + + // the following are #ifdef RESERVE_JAVA_KEYWORDS in jsscan.c + Id_abstract = RESERVED, + Id_boolean = RESERVED, + Id_byte = RESERVED, + Id_catch = CATCH, + Id_char = RESERVED, + Id_class = RESERVED, + Id_const = RESERVED, + Id_debugger = RESERVED, + Id_double = RESERVED, + Id_enum = RESERVED, + Id_extends = RESERVED, + Id_final = RESERVED, + Id_finally = FINALLY, + Id_float = RESERVED, + Id_goto = RESERVED, + Id_implements = RESERVED, + Id_instanceof = RELOP | (INSTANCEOF << 8), + Id_int = RESERVED, + Id_interface = RESERVED, + Id_long = RESERVED, + Id_native = RESERVED, + Id_package = RESERVED, + Id_private = RESERVED, + Id_protected = RESERVED, + Id_public = RESERVED, + Id_assert = ASSERT, + Id_short = RESERVED, + Id_static = RESERVED, + Id_super = RESERVED, + Id_synchronized = RESERVED, + Id_throw = THROW, + Id_throws = RESERVED, + Id_transient = RESERVED, + Id_try = TRY, + Id_volatile = RESERVED; + + int id; + String s = name; + L0: { id = -1; String X = null; int c; + L: switch (s.length()) { + case 2: c=s.charAt(1); + if (c=='f') { if (s.charAt(0)=='i') {id=Id_if; break L0;} } + else if (c=='n') { if (s.charAt(0)=='i') {id=Id_in; break L0;} } + else if (c=='o') { if (s.charAt(0)=='d') {id=Id_do; break L0;} } + break L; + case 3: switch (s.charAt(0)) { + case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') {id=Id_for; break L0;} break L; + case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') {id=Id_int; break L0;} break L; + case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') + throw new IOException("the new keyword is not permitted in XWT scripts"); + break L; + case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') {id=Id_try; break L0;} break L; + case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') {id=Id_var; break L0;} break L; + } break L; + case 4: switch (s.charAt(0)) { + case 'b': X="byte";id=Id_byte; break L; + case 'c': c=s.charAt(3); + if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') {id=Id_case; break L0;} } + else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') {id=Id_char; break L0;} } + break L; + case 'e': c=s.charAt(3); + if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') {id=Id_else; break L0;} } + else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') {id=Id_enum; break L0;} } + break L; + case 'g': X="goto";id=Id_goto; break L; + case 'l': X="long";id=Id_long; break L; + case 'n': X="null";id=Id_null; break L; + case 't': c=s.charAt(3); + if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') {id=Id_true; break L0;} } + else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') {id=Id_this; break L0;} } + break L; + case 'v': X="void";id=Id_void; break L; + case 'w': X="with";id=Id_with; break L; + } break L; + case 5: switch (s.charAt(2)) { + case 'a': X="class";id=Id_class; break L; + case 'e': X="break";id=Id_break; break L; + case 'i': X="while";id=Id_while; break L; + case 'l': X="false";id=Id_false; break L; + case 'n': c=s.charAt(0); + if (c=='c') { X="const"; throw new IOException("the const keyword is not permitted in XWT"); } + else if (c=='f') { X="final";id=Id_final; } + break L; + case 'o': c=s.charAt(0); + if (c=='f') { X="float";id=Id_float; } + else if (c=='s') { X="short";id=Id_short; } + break L; + case 'p': X="super";id=Id_super; break L; + case 'r': X="throw";id=Id_throw; break L; + case 't': X="catch";id=Id_catch; break L; + } break L; + case 6: switch (s.charAt(1)) { + case 'a': X="native";id=Id_native; break L; + case 'e': c=s.charAt(0); + if (c=='d') { X="delete"; throw new IOException("the delete keyword is not permitted in XWT scripts"); } + else if (c=='r') { X="return";id=Id_return; } + break L; + case 'h': X="throws";id=Id_throws; break L; + case 'o': X="double";id=Id_double; break L; + case 's': X="assert";id=Id_assert; break L; + case 'u': X="public";id=Id_public; break L; + case 'w': X="switch";id=Id_switch; break L; + case 'y': X="typeof";id=Id_typeof; break L; + } break L; + case 7: switch (s.charAt(1)) { + case 'a': X="package";id=Id_package; break L; + case 'e': X="default";id=Id_default; break L; + case 'i': X="finally";id=Id_finally; break L; + case 'o': X="boolean";id=Id_boolean; break L; + case 'r': X="private";id=Id_private; break L; + case 'x': X="extends";id=Id_extends; break L; + } break L; + case 8: switch (s.charAt(0)) { + case 'a': X="abstract";id=Id_abstract; break L; + case 'c': X="continue";id=Id_continue; break L; + case 'd': X="debugger";id=Id_debugger; break L; + case 'f': X="function";id=Id_function; break L; + case 'v': X="volatile";id=Id_volatile; break L; + } break L; + case 9: c=s.charAt(0); + if (c=='i') { X="interface";id=Id_interface; } + else if (c=='p') { X="protected";id=Id_protected; } + else if (c=='t') { X="transient";id=Id_transient; } + break L; + case 10: c=s.charAt(1); + if (c=='m') { X="implements";id=Id_implements; } + else if (c=='n') { X="instanceof"; throw new IOException("the instanceof keyword is not permitted in XWT scripts"); } + break L; + case 12: X="synchronized";id=Id_synchronized; break L; + } + if (X!=null && X!=s && !X.equals(s)) id = -1; + } + if (id == -1) { return -1; } + this.op = id >> 8; + return id & 0xff; + } + + private int getIdentifier(int c) throws IOException { + in.startString(); + while (Character.isJavaIdentifierPart((char)(c = in.read()))); + in.unread(); + String str = in.getString(); + int result = getKeyword(str); + if (result != -1) return result; + this.string = str; + return NAME; + } + + private int getNumber(int c) throws IOException { + int base = 10; + in.startString(); + double dval = Double.NaN; + long longval = 0; + boolean isInteger = true; + + // figure out what base we're using + if (c == '0') { + if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); } + else if (isDigit(c)) base = 8; + } + + while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read(); + if (base == 10 && (c == '.' || c == 'e' || c == 'E')) { + isInteger = false; + if (c == '.') do { c = in.read(); } while (isDigit(c)); + if (c == 'e' || c == 'E') { + c = in.read(); + if (c == '+' || c == '-') c = in.read(); + if (!isDigit(c)) throw new IOException("msg.missing.exponent"); + do { c = in.read(); } while (isDigit(c)); + } + } + in.unread(); + + String numString = in.getString(); + if (base == 10 && !isInteger) { + try { dval = (Double.valueOf(numString)).doubleValue(); } + catch (NumberFormatException ex) { throw new IOException("msg.caught.nfe"); } + } else { + if (isInteger) { + longval = Long.parseLong(numString, base); + dval = (double)longval; + else { + // FIXME: we're not handling hex/octal fractions... does that matter? + dval = Double.parseDouble(numString); + longval = (long) dval; + if (longval == dval) isInteger = true; + } + } + + if (!isInteger) this.number = new Double(dval); + else if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE) this.number = new Byte((byte)longval); + else if (Short.MIN_VALUE <= longval && longval <= Short.MAX_VALUE) this.number = new Short((short)longval); + else if (Integer.MIN_VALUE <= longval && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval); + else this.number = new Double(longval); + return NUMBER; + } + + private int getString(int c) throws IOException { + StringBuffer stringBuf = null; + int quoteChar = c; + int val = 0; + c = in.read(); + in.startString(); // start after the first " + while(c != quoteChar) { + if (c == '\n' || c == -1) throw new IOException("msg.unterminated.string.lit"); + if (c == '\\') { + if (stringBuf == null) { + in.unread(); // Don't include the backslash + stringBuf = new StringBuffer(in.getString()); + in.read(); + } + switch (c = in.read()) { + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\u000B'; break; + default: throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities"); + } + } + if (stringBuf != null) stringBuf.append((char) c); + c = in.read(); + } + if (stringBuf != null) this.string = stringBuf.toString(); + else { + in.unread(); // miss the trailing " + this.string = in.getString(); + in.read(); + } + return STRING; + } + + // hack because you can't do "foo ? 1 : (bar(); 2)" in Java + final int op(int set, int ret) { this.op = set; return ret; } + + public int getToken() throws IOException { + int c; + do { if ((c = in.read()) == '\n') break; } while (isWhiteSpace(c) || c == '\n'); + if (c == -1) return -1; + if (c == '\\' && in.peek() == 'u') throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities"); + if (Character.isJavaIdentifierStart((char)c)) return getIdentifier(c); + if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c); + if (c == '"' || c == '\'') return getString(c); + switch (c) { + case '\n': return EOL; + case ';': return SEMI; + case '[': return LB; + case ']': return RB; + case '{': return LC; + case '}': return RC; + case '(': return LP; + case ')': return RP; + case ',': return COMMA; + case '?': return HOOK; + case ':': return COLON; + case '.': return DOT; + case '|': return in.match('|') ? OR : (in.match('=') ? op(BITOR, ASSIGN) : BITOR); + case '^': return in.match('=') ? op(BITXOR, ASSIGN) : BITXOR; + case '&': return in.match('&') ? AND : in.match('=') ? op(BITAND, ASSIGN) : BITAND; + case '=': return !in.match('=') ? op(NOP, ASSIGN) : op(in.match('=') ? SHEQ : EQ, EQOP); + case '!': return !in.match('=') ? op(NOP, UNARYOP) : op(in.match('=') ? SHNE : NE, EQOP); + case '%': return op(MOD, in.match('=') ? ASSIGN : MOD); + case '~': return op(BITNOT, UNARYOP); + case '+': return in.match('=') ? op(ADD, ASSIGN) : in.match('+') ? INC : ADD; + case '-': return in.match('=') ? op(SUB, ASSIGN) : in.match('-') ? DEC : SUB; + case '*': return in.match('=') ? op(MUL, ASSIGN) : MUL; + case '<': return !in.match('<') ? + op(in.match('=') ? LE : LT, RELOP) : + in.match('=') ? op(LSH, ASSIGN) : op(LSH, SHOP); + case '>': return !in.match('>') ? + op(in.match('=') ? GE : GT, RELOP) : + in.match('>') ? + op(URSH, in.match('=') ? ASSIGN : SHOP) : + op(RSH, in.match('=') ? ASSIGN : SHOP); + case '/': + if (in.match('=')) { op(DIV, ASSIGN); } + if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); } + if (!in.match('*')) return DIV; + while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) { + if (c == '\n' || c != '/' || !in.match('*')) continue; + if (in.match('/')) return getToken(); + throw new IOException("msg.nested.comment"); + } + if (c == -1) throw new IOException("msg.unterminated.comment"); + return getToken(); // `goto retry' + default: throw new IOException("illegal character: " + c); + } + } + + private static class SmartReader { + PushbackReader reader = null; + int lastread = -1; + + public SmartReader(Reader r) { reader = new PushbackReader(r); } + public void unread() throws IOException { reader.unread(lastread); } + public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; } + public int peek() throws IOException { + int peeked = reader.read(); + if (peeked != -1) reader.unread((char)peeked); + return peeked; + } + public int read() throws IOException { + lastread = reader.read(); + if (accumulator != null) accumulator.append(lastread); + return lastread; + } + + // FIXME: could be much more efficient + StringBuffer accumulator = null; + public void startString() { accumulator = new StringBuffer(); } + public String getString() throws IOException { + String ret = accumulator.toString(); + accumulator = null; + return ret; + } + } + +} -- 1.7.10.4