From 2b9f73a3d850503353fab448c6bbfe7f70efdfbe Mon Sep 17 00:00:00 2001 From: megacz Date: Fri, 30 Jan 2004 06:59:12 +0000 Subject: [PATCH] 2003/04/24 10:25:09 darcs-hash:20040130065912-2ba56-b79c1d3e3f4b153c7114975e9eafcb1fffa039b9.gz --- src/org/xwt/js/Lexer.java | 997 ++++++++++++++++++++++---------------------- src/org/xwt/js/Parser.java | 276 ++++++++++++ 2 files changed, 782 insertions(+), 491 deletions(-) create mode 100644 src/org/xwt/js/Parser.java diff --git a/src/org/xwt/js/Lexer.java b/src/org/xwt/js/Lexer.java index 123491c..172c593 100644 --- a/src/org/xwt/js/Lexer.java +++ b/src/org/xwt/js/Lexer.java @@ -1,491 +1,506 @@ -// This file was derived from org.mozilla.javascript.TokenStream; it -// is covered by the NPL 1.1. - -/** - * The contents of this file are subject to the Netscape Public - * License Version 1.1 (the "License"); you may not use this file - * except in compliance with the License. You may obtain a copy of - * the License at http://www.mozilla.org/NPL/ - * - * Software distributed under the License is distributed on an "AS - * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or - * implied. See the License for the specific language governing - * rights and limitations under the License. - * - * The Initial Developer of the Original Code is Netscape - * Communications Corporation. - * - * Contributor(s): Roger Lawrence, Mike McCabe - */ - -package org.xwt.js; -import java.io.*; - -class Lexer { - - SmartReader in; - int op; - Number number; - String string; - public Lexer(Reader r) { in = new SmartReader(r); } - - // Token Constants ////////////////////////////////////////////////////////// - - public final static int - EOL = 1, // end of line - RETURN = 5, - GOTO = 6, - BITOR = 11, - BITXOR = 12, - BITAND = 13, - EQ = 14, - NE = 15, - LT = 16, - LE = 17, - GT = 18, - GE = 19, - LSH = 20, - RSH = 21, - URSH = 22, - ADD = 23, - SUB = 24, - MUL = 25, - DIV = 26, - MOD = 27, - BITNOT = 28, - DELPROP = 31, - TYPEOF = 32, - NAME = 44, - NUMBER = 45, - STRING = 46, - NULL = 49, - THIS = 50, - FALSE = 51, - TRUE = 52, - SHEQ = 53, // shallow equality (===) - SHNE = 54, // shallow inequality (!==) - THROW = 62, - IN = 63, - INSTANCEOF = 64, - TRY = 75, - SEMI = 89, // semicolon - LB = 90, // left bracket - RB = 91, // right bracket - LC = 92, // left curly brace - RC = 93, // right curly brace - LP = 94, // left paren - RP = 95, // right paren - COMMA = 96, // comma operator - ASSIGN = 97, // assignment ops (= += -= etc.) - HOOK = 98, // conditional (?:) - COLON = 99, // colon - OR = 100, // logical or (||) - AND = 101, // logical and (&&) - EQOP = 102, // equality ops (== !=) - RELOP = 103, // relational ops (< <= > >=) - SHOP = 104, // shift ops (<< >> >>>) - UNARYOP = 105, // unary prefix operator - INC = 106, // increment (++) - DEC = 107, // decrement (--) - DOT = 108, // member operator (.) - PRIMARY = 109, // true, false, null, this - FUNCTION = 110, // function keyword - - IF = 113, // if keyword - ELSE = 114, // else keyword - SWITCH = 115, // switch keyword - CASE = 116, // case keyword - DEFAULT = 117, // default keyword - WHILE = 118, // while keyword - DO = 119, // do keyword - FOR = 120, // for keyword - BREAK = 121, // break keyword - CONTINUE = 122, // continue keyword - VAR = 123, // var keyword - WITH = 124, // with keyword - CATCH = 125, // catch keyword - FINALLY = 126, // finally keyword - RESERVED = 127, // reserved keywords - NOP = 128, // NOP - VOID = 132, // void keyword - ASSERT = 150; // assert keyword - - - // Predicates /////////////////////////////////////////////////////////////////////// - - protected static boolean isJSIdentifier(String s) { - int length = s.length(); - if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) return false; - for (int i=1; i= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); } - private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); } - private static boolean isLineBreak(int c) { return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); } - private static int xDigitToInt(int c) { - if ('0' <= c && c <= '9') return c - '0'; - if ('a' <= c && c <= 'f') return c - ('a' - 10); - if ('A' <= c && c <= 'F') return c - ('A' - 10); - return -1; - } - public static boolean isWhiteSpace(int c) { - if (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0') return true; - return Character.getType((char)c) == Character.SPACE_SEPARATOR; - } - - - // Token Subtype Handlers ///////////////////////////////////////////////////////// - - private int getKeyword(String name) throws IOException { - final int - Id_break = BREAK, - Id_case = CASE, - Id_continue = CONTINUE, - Id_default = DEFAULT, - Id_delete = DELPROP, - Id_do = DO, - Id_else = ELSE, - Id_false = PRIMARY | (FALSE << 8), - Id_for = FOR, - Id_function = FUNCTION, - Id_if = IF, - Id_in = RELOP | (IN << 8), - Id_null = PRIMARY | (NULL << 8), - Id_return = RETURN, - Id_switch = SWITCH, - Id_this = PRIMARY | (THIS << 8), - Id_true = PRIMARY | (TRUE << 8), - Id_typeof = UNARYOP | (TYPEOF << 8), - Id_var = VAR, - Id_void = UNARYOP | (VOID << 8), - Id_while = WHILE, - Id_with = WITH, - - // the following are #ifdef RESERVE_JAVA_KEYWORDS in jsscan.c - Id_abstract = RESERVED, - Id_boolean = RESERVED, - Id_byte = RESERVED, - Id_catch = CATCH, - Id_char = RESERVED, - Id_class = RESERVED, - Id_const = RESERVED, - Id_debugger = RESERVED, - Id_double = RESERVED, - Id_enum = RESERVED, - Id_extends = RESERVED, - Id_final = RESERVED, - Id_finally = FINALLY, - Id_float = RESERVED, - Id_goto = RESERVED, - Id_implements = RESERVED, - Id_instanceof = RELOP | (INSTANCEOF << 8), - Id_int = RESERVED, - Id_interface = RESERVED, - Id_long = RESERVED, - Id_native = RESERVED, - Id_package = RESERVED, - Id_private = RESERVED, - Id_protected = RESERVED, - Id_public = RESERVED, - Id_assert = ASSERT, - Id_short = RESERVED, - Id_static = RESERVED, - Id_super = RESERVED, - Id_synchronized = RESERVED, - Id_throw = THROW, - Id_throws = RESERVED, - Id_transient = RESERVED, - Id_try = TRY, - Id_volatile = RESERVED; - - int id; - String s = name; - L0: { id = -1; String X = null; int c; - L: switch (s.length()) { - case 2: c=s.charAt(1); - if (c=='f') { if (s.charAt(0)=='i') {id=Id_if; break L0;} } - else if (c=='n') { if (s.charAt(0)=='i') {id=Id_in; break L0;} } - else if (c=='o') { if (s.charAt(0)=='d') {id=Id_do; break L0;} } - break L; - case 3: switch (s.charAt(0)) { - case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') {id=Id_for; break L0;} break L; - case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') {id=Id_int; break L0;} break L; - case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') - throw new IOException("the new keyword is not permitted in XWT scripts"); - break L; - case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') {id=Id_try; break L0;} break L; - case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') {id=Id_var; break L0;} break L; - } break L; - case 4: switch (s.charAt(0)) { - case 'b': X="byte";id=Id_byte; break L; - case 'c': c=s.charAt(3); - if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') {id=Id_case; break L0;} } - else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') {id=Id_char; break L0;} } - break L; - case 'e': c=s.charAt(3); - if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') {id=Id_else; break L0;} } - else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') {id=Id_enum; break L0;} } - break L; - case 'g': X="goto";id=Id_goto; break L; - case 'l': X="long";id=Id_long; break L; - case 'n': X="null";id=Id_null; break L; - case 't': c=s.charAt(3); - if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') {id=Id_true; break L0;} } - else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') {id=Id_this; break L0;} } - break L; - case 'v': X="void";id=Id_void; break L; - case 'w': X="with";id=Id_with; break L; - } break L; - case 5: switch (s.charAt(2)) { - case 'a': X="class";id=Id_class; break L; - case 'e': X="break";id=Id_break; break L; - case 'i': X="while";id=Id_while; break L; - case 'l': X="false";id=Id_false; break L; - case 'n': c=s.charAt(0); - if (c=='c') { X="const"; throw new IOException("the const keyword is not permitted in XWT"); } - else if (c=='f') { X="final";id=Id_final; } - break L; - case 'o': c=s.charAt(0); - if (c=='f') { X="float";id=Id_float; } - else if (c=='s') { X="short";id=Id_short; } - break L; - case 'p': X="super";id=Id_super; break L; - case 'r': X="throw";id=Id_throw; break L; - case 't': X="catch";id=Id_catch; break L; - } break L; - case 6: switch (s.charAt(1)) { - case 'a': X="native";id=Id_native; break L; - case 'e': c=s.charAt(0); - if (c=='d') { X="delete"; throw new IOException("the delete keyword is not permitted in XWT scripts"); } - else if (c=='r') { X="return";id=Id_return; } - break L; - case 'h': X="throws";id=Id_throws; break L; - case 'o': X="double";id=Id_double; break L; - case 's': X="assert";id=Id_assert; break L; - case 'u': X="public";id=Id_public; break L; - case 'w': X="switch";id=Id_switch; break L; - case 'y': X="typeof";id=Id_typeof; break L; - } break L; - case 7: switch (s.charAt(1)) { - case 'a': X="package";id=Id_package; break L; - case 'e': X="default";id=Id_default; break L; - case 'i': X="finally";id=Id_finally; break L; - case 'o': X="boolean";id=Id_boolean; break L; - case 'r': X="private";id=Id_private; break L; - case 'x': X="extends";id=Id_extends; break L; - } break L; - case 8: switch (s.charAt(0)) { - case 'a': X="abstract";id=Id_abstract; break L; - case 'c': X="continue";id=Id_continue; break L; - case 'd': X="debugger";id=Id_debugger; break L; - case 'f': X="function";id=Id_function; break L; - case 'v': X="volatile";id=Id_volatile; break L; - } break L; - case 9: c=s.charAt(0); - if (c=='i') { X="interface";id=Id_interface; } - else if (c=='p') { X="protected";id=Id_protected; } - else if (c=='t') { X="transient";id=Id_transient; } - break L; - case 10: c=s.charAt(1); - if (c=='m') { X="implements";id=Id_implements; } - else if (c=='n') { X="instanceof"; throw new IOException("the instanceof keyword is not permitted in XWT scripts"); } - break L; - case 12: X="synchronized";id=Id_synchronized; break L; - } - if (X!=null && X!=s && !X.equals(s)) id = -1; - } - if (id == -1) { return -1; } - this.op = id >> 8; - return id & 0xff; - } - - private int getIdentifier(int c) throws IOException { - in.startString(); - while (Character.isJavaIdentifierPart((char)(c = in.read()))); - in.unread(); - String str = in.getString(); - int result = getKeyword(str); - if (result != -1) return result; - this.string = str; - return NAME; - } - - private int getNumber(int c) throws IOException { - int base = 10; - in.startString(); - double dval = Double.NaN; - long longval = 0; - boolean isInteger = true; - - // figure out what base we're using - if (c == '0') { - if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); } - else if (isDigit(c)) base = 8; - } - - while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read(); - if (base == 10 && (c == '.' || c == 'e' || c == 'E')) { - isInteger = false; - if (c == '.') do { c = in.read(); } while (isDigit(c)); - if (c == 'e' || c == 'E') { - c = in.read(); - if (c == '+' || c == '-') c = in.read(); - if (!isDigit(c)) throw new IOException("msg.missing.exponent"); - do { c = in.read(); } while (isDigit(c)); - } - } - in.unread(); - - String numString = in.getString(); - if (base == 10 && !isInteger) { - try { dval = (Double.valueOf(numString)).doubleValue(); } - catch (NumberFormatException ex) { throw new IOException("msg.caught.nfe"); } - } else { - if (isInteger) { - longval = Long.parseLong(numString, base); - dval = (double)longval; - else { - // FIXME: we're not handling hex/octal fractions... does that matter? - dval = Double.parseDouble(numString); - longval = (long) dval; - if (longval == dval) isInteger = true; - } - } - - if (!isInteger) this.number = new Double(dval); - else if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE) this.number = new Byte((byte)longval); - else if (Short.MIN_VALUE <= longval && longval <= Short.MAX_VALUE) this.number = new Short((short)longval); - else if (Integer.MIN_VALUE <= longval && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval); - else this.number = new Double(longval); - return NUMBER; - } - - private int getString(int c) throws IOException { - StringBuffer stringBuf = null; - int quoteChar = c; - int val = 0; - c = in.read(); - in.startString(); // start after the first " - while(c != quoteChar) { - if (c == '\n' || c == -1) throw new IOException("msg.unterminated.string.lit"); - if (c == '\\') { - if (stringBuf == null) { - in.unread(); // Don't include the backslash - stringBuf = new StringBuffer(in.getString()); - in.read(); - } - switch (c = in.read()) { - case 'b': c = '\b'; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - case 'v': c = '\u000B'; break; - default: throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities"); - } - } - if (stringBuf != null) stringBuf.append((char) c); - c = in.read(); - } - if (stringBuf != null) this.string = stringBuf.toString(); - else { - in.unread(); // miss the trailing " - this.string = in.getString(); - in.read(); - } - return STRING; - } - - // hack because you can't do "foo ? 1 : (bar(); 2)" in Java - final int op(int set, int ret) { this.op = set; return ret; } - - public int getToken() throws IOException { - int c; - do { if ((c = in.read()) == '\n') break; } while (isWhiteSpace(c) || c == '\n'); - if (c == -1) return -1; - if (c == '\\' && in.peek() == 'u') throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities"); - if (Character.isJavaIdentifierStart((char)c)) return getIdentifier(c); - if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c); - if (c == '"' || c == '\'') return getString(c); - switch (c) { - case '\n': return EOL; - case ';': return SEMI; - case '[': return LB; - case ']': return RB; - case '{': return LC; - case '}': return RC; - case '(': return LP; - case ')': return RP; - case ',': return COMMA; - case '?': return HOOK; - case ':': return COLON; - case '.': return DOT; - case '|': return in.match('|') ? OR : (in.match('=') ? op(BITOR, ASSIGN) : BITOR); - case '^': return in.match('=') ? op(BITXOR, ASSIGN) : BITXOR; - case '&': return in.match('&') ? AND : in.match('=') ? op(BITAND, ASSIGN) : BITAND; - case '=': return !in.match('=') ? op(NOP, ASSIGN) : op(in.match('=') ? SHEQ : EQ, EQOP); - case '!': return !in.match('=') ? op(NOP, UNARYOP) : op(in.match('=') ? SHNE : NE, EQOP); - case '%': return op(MOD, in.match('=') ? ASSIGN : MOD); - case '~': return op(BITNOT, UNARYOP); - case '+': return in.match('=') ? op(ADD, ASSIGN) : in.match('+') ? INC : ADD; - case '-': return in.match('=') ? op(SUB, ASSIGN) : in.match('-') ? DEC : SUB; - case '*': return in.match('=') ? op(MUL, ASSIGN) : MUL; - case '<': return !in.match('<') ? - op(in.match('=') ? LE : LT, RELOP) : - in.match('=') ? op(LSH, ASSIGN) : op(LSH, SHOP); - case '>': return !in.match('>') ? - op(in.match('=') ? GE : GT, RELOP) : - in.match('>') ? - op(URSH, in.match('=') ? ASSIGN : SHOP) : - op(RSH, in.match('=') ? ASSIGN : SHOP); - case '/': - if (in.match('=')) { op(DIV, ASSIGN); } - if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); } - if (!in.match('*')) return DIV; - while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) { - if (c == '\n' || c != '/' || !in.match('*')) continue; - if (in.match('/')) return getToken(); - throw new IOException("msg.nested.comment"); - } - if (c == -1) throw new IOException("msg.unterminated.comment"); - return getToken(); // `goto retry' - default: throw new IOException("illegal character: " + c); - } - } - - private static class SmartReader { - PushbackReader reader = null; - int lastread = -1; - - public SmartReader(Reader r) { reader = new PushbackReader(r); } - public void unread() throws IOException { reader.unread(lastread); } - public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; } - public int peek() throws IOException { - int peeked = reader.read(); - if (peeked != -1) reader.unread((char)peeked); - return peeked; - } - public int read() throws IOException { - lastread = reader.read(); - if (accumulator != null) accumulator.append(lastread); - return lastread; - } - - // FIXME: could be much more efficient - StringBuffer accumulator = null; - public void startString() { accumulator = new StringBuffer(); } - public String getString() throws IOException { - String ret = accumulator.toString(); - accumulator = null; - return ret; - } - } - -} +// This file was derived from org.mozilla.javascript.TokenStream; it +// is covered by the NPL 1.1. + +/** + * The contents of this file are subject to the Netscape Public + * License Version 1.1 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.mozilla.org/NPL/ + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * The Initial Developer of the Original Code is Netscape + * Communications Corporation. + * + * Contributor(s): Roger Lawrence, Mike McCabe + */ + +package org.xwt.js; +import java.io.*; + +class Lexer { + + private SmartReader in; + private boolean pushedBack = false; + + private int op; + public Number number; + public String string; + + public Lexer(Reader r) throws IOException { in = new SmartReader(r); } + public int peekToken() throws IOException { int ret = peekToken(); pushBackToken(); return ret; } + public void pushBackToken() { if (pushedBack) throw new Error("can't push back twice"); pushedBack = true; } + + // Token Constants ////////////////////////////////////////////////////////// + + public final static int + EOL = 1, // end of line + RETURN = 5, // return + GOTO = 6, // goto + BITOR = 11, // | + ASSIGN_BITOR = 211, // |= + BITXOR = 12, // ^ + ASSIGN_BITXOR= 212, // ^= + BITAND = 13, // & + ASSIGN_BITAND= 213, // &= + EQ = 14, // == + NE = 15, // != + LT = 16, // < + LE = 17, // <= + GT = 18, // > + GE = 19, // >= + LSH = 20, // << + ASSIGN_LSH = 220, // <<= + RSH = 21, // >> + ASSIGN_RSH = 221, // >>= + URSH = 22, // >>> + ASSIGN_URSH = 222, // >>>= + ADD = 23, // + + ASSIGN_ADD = 223, // += + SUB = 24, // - + ASSIGN_SUB = 224, // -= + MUL = 25, // * + ASSIGN_MUL = 225, // *= + DIV = 26, // / + ASSIGN_DIV = 226, // /= + MOD = 27, // % + ASSIGN_MOD = 227, // %= + BITNOT = 28, // ~ + ASSIGN_BITNOT= 228, // ~= + DELPROP = 31, // delete + TYPEOF = 32, // typeof + NAME = 44, // *** identifiers *** + NUMBER = 45, // *** numeric literals *** + STRING = 46, // *** string literals *** + NULL = 49, // null + THIS = 50, // this + FALSE = 51, // false + TRUE = 52, // true + SHEQ = 53, // === + SHNE = 54, // !== + THROW = 62, // throw + IN = 63, // in + INSTANCEOF = 64, // instanceof + TRY = 75, // try + SEMI = 89, // ; + LB = 90, // [ + RB = 91, // ] + LC = 92, // { + RC = 93, // } + LP = 94, // ( + RP = 95, // ) + COMMA = 96, // , + ASSIGN = 97, // = + HOOK = 98, // ? + COLON = 99, // : + OR = 100, // || + AND = 101, // && + INC = 106, // ++ + DEC = 107, // -- + DOT = 108, // . + FUNCTION = 110, // function + IF = 113, // if keyword + ELSE = 114, // else keyword + SWITCH = 115, // switch keyword + CASE = 116, // case keyword + DEFAULT = 117, // default keyword + WHILE = 118, // while keyword + DO = 119, // do keyword + FOR = 120, // for keyword + BREAK = 121, // break keyword + CONTINUE = 122, // continue keyword + VAR = 123, // var keyword + WITH = 124, // with keyword + CATCH = 125, // catch keyword + FINALLY = 126, // finally keyword + RESERVED = 127, // reserved keywords + NOP = 128, // NOP + VOID = 132, // void keyword + MOD_ASSIGN = 133, // %= + BANG = 134, // %= + ASSERT = 150; // assert keyword + + + // Predicates /////////////////////////////////////////////////////////////////////// + + protected static boolean isJSIdentifier(String s) { + int length = s.length(); + if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) return false; + for (int i=1; i= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); } + private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); } + private static boolean isLineBreak(int c) { return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); } + private static int xDigitToInt(int c) { + if ('0' <= c && c <= '9') return c - '0'; + if ('a' <= c && c <= 'f') return c - ('a' - 10); + if ('A' <= c && c <= 'F') return c - ('A' - 10); + return -1; + } + public static boolean isWhiteSpace(int c) { + if (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0') return true; + return Character.getType((char)c) == Character.SPACE_SEPARATOR; + } + + + // Token Subtype Handlers ///////////////////////////////////////////////////////// + + private int getKeyword(String name) throws IOException { + final int + Id_break = BREAK, + Id_case = CASE, + Id_continue = CONTINUE, + Id_default = DEFAULT, + Id_delete = DELPROP, + Id_do = DO, + Id_else = ELSE, + Id_false = FALSE, + Id_for = FOR, + Id_function = FUNCTION, + Id_if = IF, + Id_in = IN, + Id_null = NULL, + Id_return = RETURN, + Id_switch = SWITCH, + Id_this = THIS, + Id_true = TRUE, + Id_typeof = TYPEOF, + Id_var = VAR, + Id_void = VOID, + Id_while = WHILE, + Id_with = WITH, + + // the following are #ifdef RESERVE_JAVA_KEYWORDS in jsscan.c + Id_abstract = RESERVED, + Id_boolean = RESERVED, + Id_byte = RESERVED, + Id_catch = CATCH, + Id_char = RESERVED, + Id_class = RESERVED, + Id_const = RESERVED, + Id_debugger = RESERVED, + Id_double = RESERVED, + Id_enum = RESERVED, + Id_extends = RESERVED, + Id_final = RESERVED, + Id_finally = FINALLY, + Id_float = RESERVED, + Id_goto = RESERVED, + Id_implements = RESERVED, + Id_instanceof = INSTANCEOF, + Id_int = RESERVED, + Id_interface = RESERVED, + Id_long = RESERVED, + Id_native = RESERVED, + Id_package = RESERVED, + Id_private = RESERVED, + Id_protected = RESERVED, + Id_public = RESERVED, + Id_assert = ASSERT, + Id_short = RESERVED, + Id_static = RESERVED, + Id_super = RESERVED, + Id_synchronized = RESERVED, + Id_throw = THROW, + Id_throws = RESERVED, + Id_transient = RESERVED, + Id_try = TRY, + Id_volatile = RESERVED; + + int id; + String s = name; + L0: { id = -1; String X = null; int c; + L: switch (s.length()) { + case 2: c=s.charAt(1); + if (c=='f') { if (s.charAt(0)=='i') {id=Id_if; break L0;} } + else if (c=='n') { if (s.charAt(0)=='i') {id=Id_in; break L0;} } + else if (c=='o') { if (s.charAt(0)=='d') {id=Id_do; break L0;} } + break L; + case 3: switch (s.charAt(0)) { + case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') {id=Id_for; break L0;} break L; + case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') {id=Id_int; break L0;} break L; + case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') + throw new IOException("the new keyword is not permitted in XWT scripts"); + break L; + case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') {id=Id_try; break L0;} break L; + case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') {id=Id_var; break L0;} break L; + } break L; + case 4: switch (s.charAt(0)) { + case 'b': X="byte";id=Id_byte; break L; + case 'c': c=s.charAt(3); + if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') {id=Id_case; break L0;} } + else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') {id=Id_char; break L0;} } + break L; + case 'e': c=s.charAt(3); + if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') {id=Id_else; break L0;} } + else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') {id=Id_enum; break L0;} } + break L; + case 'g': X="goto";id=Id_goto; break L; + case 'l': X="long";id=Id_long; break L; + case 'n': X="null";id=Id_null; break L; + case 't': c=s.charAt(3); + if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') {id=Id_true; break L0;} } + else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') {id=Id_this; break L0;} } + break L; + case 'v': X="void";id=Id_void; break L; + case 'w': X="with";id=Id_with; break L; + } break L; + case 5: switch (s.charAt(2)) { + case 'a': X="class";id=Id_class; break L; + case 'e': X="break";id=Id_break; break L; + case 'i': X="while";id=Id_while; break L; + case 'l': X="false";id=Id_false; break L; + case 'n': c=s.charAt(0); + if (c=='c') { X="const"; throw new IOException("the const keyword is not permitted in XWT"); } + else if (c=='f') { X="final";id=Id_final; } + break L; + case 'o': c=s.charAt(0); + if (c=='f') { X="float";id=Id_float; } + else if (c=='s') { X="short";id=Id_short; } + break L; + case 'p': X="super";id=Id_super; break L; + case 'r': X="throw";id=Id_throw; break L; + case 't': X="catch";id=Id_catch; break L; + } break L; + case 6: switch (s.charAt(1)) { + case 'a': X="native";id=Id_native; break L; + case 'e': c=s.charAt(0); + if (c=='d') { X="delete"; throw new IOException("the delete keyword is not permitted in XWT scripts"); } + else if (c=='r') { X="return";id=Id_return; } + break L; + case 'h': X="throws";id=Id_throws; break L; + case 'o': X="double";id=Id_double; break L; + case 's': X="assert";id=Id_assert; break L; + case 'u': X="public";id=Id_public; break L; + case 'w': X="switch";id=Id_switch; break L; + case 'y': X="typeof";id=Id_typeof; break L; + } break L; + case 7: switch (s.charAt(1)) { + case 'a': X="package";id=Id_package; break L; + case 'e': X="default";id=Id_default; break L; + case 'i': X="finally";id=Id_finally; break L; + case 'o': X="boolean";id=Id_boolean; break L; + case 'r': X="private";id=Id_private; break L; + case 'x': X="extends";id=Id_extends; break L; + } break L; + case 8: switch (s.charAt(0)) { + case 'a': X="abstract";id=Id_abstract; break L; + case 'c': X="continue";id=Id_continue; break L; + case 'd': X="debugger";id=Id_debugger; break L; + case 'f': X="function";id=Id_function; break L; + case 'v': X="volatile";id=Id_volatile; break L; + } break L; + case 9: c=s.charAt(0); + if (c=='i') { X="interface";id=Id_interface; } + else if (c=='p') { X="protected";id=Id_protected; } + else if (c=='t') { X="transient";id=Id_transient; } + break L; + case 10: c=s.charAt(1); + if (c=='m') { X="implements";id=Id_implements; } + else if (c=='n') { X="instanceof"; throw new IOException("the instanceof keyword is not permitted in XWT scripts"); } + break L; + case 12: X="synchronized";id=Id_synchronized; break L; + } + if (X!=null && X!=s && !X.equals(s)) id = -1; + } + if (id == -1) { return -1; } + this.op = id >> 8; + return id & 0xff; + } + + private int getIdentifier(int c) throws IOException { + in.startString(); + while (Character.isJavaIdentifierPart((char)(c = in.read()))); + in.unread(); + String str = in.getString(); + int result = getKeyword(str); + if (result != -1) return result; + this.string = str; + return NAME; + } + + private int getNumber(int c) throws IOException { + int base = 10; + in.startString(); + double dval = Double.NaN; + long longval = 0; + boolean isInteger = true; + + // figure out what base we're using + if (c == '0') { + if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); } + else if (isDigit(c)) base = 8; + } + + while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read(); + if (base == 10 && (c == '.' || c == 'e' || c == 'E')) { + isInteger = false; + if (c == '.') do { c = in.read(); } while (isDigit(c)); + if (c == 'e' || c == 'E') { + c = in.read(); + if (c == '+' || c == '-') c = in.read(); + if (!isDigit(c)) throw new IOException("msg.missing.exponent"); + do { c = in.read(); } while (isDigit(c)); + } + } + in.unread(); + + String numString = in.getString(); + if (base == 10 && !isInteger) { + try { dval = (Double.valueOf(numString)).doubleValue(); } + catch (NumberFormatException ex) { throw new IOException("msg.caught.nfe"); } + } else { + if (isInteger) { + longval = Long.parseLong(numString, base); + dval = (double)longval; + } else { + // FIXME: we're not handling hex/octal fractions... does that matter? + dval = Double.parseDouble(numString); + longval = (long) dval; + if (longval == dval) isInteger = true; + } + } + + if (!isInteger) this.number = new Double(dval); + else if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE) this.number = new Byte((byte)longval); + else if (Short.MIN_VALUE <= longval && longval <= Short.MAX_VALUE) this.number = new Short((short)longval); + else if (Integer.MIN_VALUE <= longval && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval); + else this.number = new Double(longval); + return NUMBER; + } + + private int getString(int c) throws IOException { + StringBuffer stringBuf = null; + int quoteChar = c; + int val = 0; + c = in.read(); + in.startString(); // start after the first " + while(c != quoteChar) { + if (c == '\n' || c == -1) throw new IOException("msg.unterminated.string.lit"); + if (c == '\\') { + if (stringBuf == null) { + in.unread(); // Don't include the backslash + stringBuf = new StringBuffer(in.getString()); + in.read(); + } + switch (c = in.read()) { + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\u000B'; break; + default: throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities"); + } + } + if (stringBuf != null) stringBuf.append((char) c); + c = in.read(); + } + if (stringBuf != null) this.string = stringBuf.toString(); + else { + in.unread(); // miss the trailing " + this.string = in.getString(); + in.read(); + } + return STRING; + } + + public int getToken() throws IOException { + if (pushedBack) { pushedBack = false; return op; } + return (op = getToken()); + } + + public int _getToken() throws IOException { + int c; + do { if ((c = in.read()) == '\n') break; } while (isWhiteSpace(c) || c == '\n'); + if (c == -1) return -1; + if (c == '\\' && in.peek() == 'u') throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities"); + if (Character.isJavaIdentifierStart((char)c)) return getIdentifier(c); + if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c); + if (c == '"' || c == '\'') return getString(c); + switch (c) { + case '\n': return EOL; + case ';': return SEMI; + case '[': return LB; + case ']': return RB; + case '{': return LC; + case '}': return RC; + case '(': return LP; + case ')': return RP; + case ',': return COMMA; + case '?': return HOOK; + case ':': return COLON; + case '.': return DOT; + case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR); + case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR; + case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND; + case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ; + case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE; + case '%': return in.match('=') ? MOD_ASSIGN : MOD; + case '~': return BITNOT; + case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? INC : ADD; + case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? DEC : SUB; + case '*': return in.match('=') ? ASSIGN_MUL : MUL; + case '<': return !in.match('<') ? + (in.match('=') ? LE : LT) : + in.match('=') ? ASSIGN_LSH : LSH; + case '>': return !in.match('>') ? + (in.match('=') ? GE : GT) : + in.match('>') ? + (in.match('=') ? ASSIGN_URSH : URSH) : + (in.match('=') ? ASSIGN_RSH : RSH); + case '/': + if (in.match('=')) return ASSIGN_DIV; + if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); } + if (!in.match('*')) return DIV; + while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) { + if (c == '\n' || c != '/' || !in.match('*')) continue; + if (in.match('/')) return getToken(); + throw new IOException("msg.nested.comment"); + } + if (c == -1) throw new IOException("msg.unterminated.comment"); + return getToken(); // `goto retry' + default: throw new IOException("illegal character: " + c); + } + } + + private static class SmartReader { + PushbackReader reader = null; + int lastread = -1; + + public SmartReader(Reader r) { reader = new PushbackReader(r); } + public void unread() throws IOException { reader.unread(lastread); } + public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; } + public int peek() throws IOException { + int peeked = reader.read(); + if (peeked != -1) reader.unread((char)peeked); + return peeked; + } + public int read() throws IOException { + lastread = reader.read(); + if (accumulator != null) accumulator.append(lastread); + return lastread; + } + + // FIXME: could be much more efficient + StringBuffer accumulator = null; + public void startString() { accumulator = new StringBuffer(); } + public String getString() throws IOException { + String ret = accumulator.toString(); + accumulator = null; + return ret; + } + } + +} diff --git a/src/org/xwt/js/Parser.java b/src/org/xwt/js/Parser.java new file mode 100644 index 0000000..861432c --- /dev/null +++ b/src/org/xwt/js/Parser.java @@ -0,0 +1,276 @@ +package org.xwt.js; +import org.xwt.util.*; +import java.io.*; + +// FIXME: for..in +// FIXME: delete keyword +public class Parser extends Lexer { + + public Parser(Reader r) throws IOException { super(r); } + private Parser skipToken() throws IOException { getToken(); return this; } + + /** sorta like gcc trees */ + public static class Expr { + int code = -1; + + Expr left = null; + Expr right = null; + Expr extra = null; + + Expr next = null; // if this expr is part of a list + + String string = null; + + public Expr(String s) { this.string = s; } // an identifier or label + public Expr(int code) { this(code, null, null, null); } + public Expr(int code, Expr left) { this(code, left, null, null); } + public Expr(int code, Expr left, Expr right) { this(code, left, right, null); } + public Expr(int code, Expr left, Expr right, Expr extra) { this.left = left; this.right = right; this.extra = extra; this.code = code; } + } + + /** parses a single statement */ + public Expr parseStatement() throws IOException { + int tok; + Expr ret; + switch(tok = peekToken()) { + + case LC: + ret = parseBlock(true); + + case THROW: case RETURN: case ASSERT: + ret = new Expr(ASSERT, skipToken().parseExpr()); + + case GOTO: case BREAK: case CONTINUE: + skipToken(); + if (getToken() == NAME) + ret = new Expr(tok, new Expr(string)); + else if (tok == GOTO) + throw new Error("goto must be followed by a label"); + else + ret = new Expr(tok); + + default: + ret = parseExpr(); + } + + if (getToken() != SEMI) throw new Error("expected ;"); + return ret; + } + + /** a block is either a single statement or a list of statements surrounded by curly braces; all expressions are also statements */ + public Expr parseBlock(boolean requireBraces) throws IOException { + int tok = peekToken(); + if (requireBraces && tok != LC) throw new Error("expected {"); + if (tok != LC) return parseStatement(); + skipToken(); + Expr head = null; + Expr tail = null; + while(peekToken() != RC) + if (head == null) head = tail = parseStatement(); else tail = tail.next = parseStatement(); + skipToken(); + return new Expr(LC, head); + } + + /** Subexpressions come in two flavors: starters and continuers. + * Starters can appear at the start of an expression or after a + * continuer, and continuers, which can appear after a starter. + */ + public Expr parseExpr() throws IOException { + Expr e = parseStarter(); + while(true) { + Expr e2 = parseContinuer(e); + if (e2 == null) return e; + e = e2; + } + } + + public Expr parseStarter() throws IOException { + Expr e1 = null; + Expr e2 = null; + Expr e3 = null; + Expr head = null; + Expr tail = null; + int tok = getToken(); + switch(tok) { + + case SWITCH: { + if (getToken() != LP) throw new Error("expected left paren"); + Expr switchExpr = parseExpr(); + if (getToken() != RP) throw new Error("expected left paren"); + if (getToken() != LC) throw new Error("expected left brace"); + Expr firstExpr = null; + Expr lastExpr = null; + while(true) { + if (getToken() != CASE) throw new Error("expected CASE"); + Expr caseExpr = parseExpr(); + if (getToken() != COLON) throw new Error("expected COLON"); + Expr e = new Expr(CASE, caseExpr, parseBlock(false)); + if (lastExpr == null) firstExpr = e; + else lastExpr.next = e; + lastExpr = e; + if (getToken() == RC) return new Expr(SWITCH, switchExpr, firstExpr); + } + } + + case FUNCTION: { + if (getToken() != LP) throw new Error("function keyword must be followed by a left paren"); + Expr formalArgs = null, cur = null; + tok = getToken(); + while(tok != RP) { + if (tok != NAME) throw new Error("expected a variable name"); + if (cur == null) { formalArgs = cur = new Expr(string); } + else { cur.next = new Expr(string); cur = cur.next; } + tok = getToken(); + if (tok == RP) break; + if (tok != COMMA) throw new Error("function argument list must consist of alternating NAMEs and COMMAs"); + tok = getToken(); + } + return new Expr(tok, formalArgs, parseBlock(true)); + } + + case VAR: + while(true) { + if (getToken() != NAME) throw new Error("variable declarations must start with a variable name"); + Expr name = new Expr(string); + Expr initVal = null; + tok = peekToken(); + if (tok == ASSIGN) { + skipToken(); + initVal = parseExpr(); + tok = peekToken(); + } + Expr e = new Expr(VAR, name, initVal); + if (head == null) head = tail = e; else tail = tail.next = e; + if (tok != COMMA) break; + skipToken(); + } + return new Expr(VAR, head); + + case LC: + tok = getToken(); + while(true) { + if (tok == RP) return new Expr(LC, head); + if (tok != NAME) throw new Error("expecting name"); + Expr name = parseExpr(); + if (tok != COLON) throw new Error("expecting colon"); + e1 = new Expr(COLON, name, parseExpr()); + if (head == null) head = tail = e1; else tail = tail.next = e1; + tok = getToken(); + if (tok != COMMA && tok != RP) throw new Error("expected right curly or comma"); + } + + case LB: + tok = getToken(); + while(true) { + if (tok == RB) return new Expr(LB, head); + if (head == null) head = tail = parseExpr(); else tail = tail.next = parseExpr(); + tok = getToken(); + if (tok != COMMA && tok != RP) throw new Error("expected right bracket or comma"); + } + + case NAME: + return new Expr(string); + + case INC: case DEC: case TYPEOF: + return new Expr(tok, parseExpr()); + + case TRUE: case FALSE: case NOP: + return new Expr(tok); + + case TRY: { + // FIXME: we deliberately allow you to omit braces in catch{}/finally{} if they are single statements... + Expr tryBlock = parseBlock(true); + while ((tok = peekToken()) == CATCH) + if (head == null) head = tail = parseBlock(false); else tail = tail.next = parseBlock(false); + if (head == null) throw new Error("try without catch"); + return new Expr(TRY, tryBlock, head, tok == FINALLY ? skipToken().parseBlock(false) : null); + } + + case IF: case WHILE: { + if (getToken() != LP) throw new Error("expected left paren"); + Expr parenExpr = parseExpr(); + if (getToken() != RP) throw new Error("expected right paren"); + Expr firstBlock = parseBlock(false); + if (tok == IF && peekToken() == ELSE) return new Expr(tok, parenExpr, firstBlock, skipToken().parseBlock(false)); + return new Expr(tok, parenExpr, firstBlock); + } + + case FOR: + // FIXME: for..in + if (getToken() != LP) throw new Error("expected left paren"); + e1 = parseStatement(); + e2 = parseStatement(); + e3 = parseStatement(); // FIXME: this guy has to be okay with ending via a ) + if (getToken() != RP) throw new Error("expected right paren"); + throw new Error("not yet implemented"); + //return new Expr(FOR, e1, e2, e3, parseBlock(false)); + + case DO: { + Expr firstBlock = parseBlock(false); + if (getToken() != WHILE) throw new Error("expecting WHILE"); + if (getToken() != LP) throw new Error("expected left paren"); + Expr whileExpr = parseExpr(); + if (getToken() != RP) throw new Error("expected right paren"); + if (getToken() != SEMI) throw new Error("semicolon"); + return new Expr(DO, firstBlock, whileExpr); + } + + case VOID: case RESERVED: + throw new Error("reserved word that you shouldn't be using"); + + case WITH: + throw new Error("WITH not yet implemented"); // FIXME + + default: throw new Error("I wasn't expecting a " + tok); + } + } + + // called after each parseExpr(); returns null if we can't make the expression any bigger + public Expr parseContinuer(Expr prefix) throws IOException { + Expr head = null; + Expr tail = null; + Expr e1, e2, e3; + Expr ret = null; + int tok; + + // FIXME: postfix and infix operators -- need to handle precedence + switch (tok = getToken()) { + + case BITOR: case BITXOR: case BITAND: case EQ: case NE: case LT: case LE: + case GT: case GE: case LSH: case RSH: case URSH: case ADD: case SUB: case MUL: + case DIV: case MOD: case BITNOT: case SHEQ: case SHNE: case INSTANCEOF: + case OR: case AND: case COMMA: case INC: case DEC: + throw new Error("haven't figured out how to handle postfix/infix operators yet"); + //return new Expr(tok, prefix, (tok == INC || tok == DEC) ? null : parseExpr()); + + case ASSIGN: + throw new Error("haven't figured out how to handle postfix/infix operators yet"); + + case LP: + while(peekToken() != RP) { + if (head == null) head = tail = parseExpr(); else tail = tail.next = parseExpr(); + tok = getToken(); + if (tok == RP) break; + if (tok != COMMA) throw new Error("expected comma or right paren"); + } + return new Expr(LP, prefix, head); + + case LB: + e1 = parseExpr(); + if (getToken() != RB) throw new Error("expected a right brace"); + return new Expr(LB, prefix, e1); + + case HOOK: + e2 = parseExpr(); + if (getToken() != COLON) throw new Error("expected colon to close ?: expression"); + e3 = parseExpr(); + return new Expr(HOOK, prefix, e2, e3); + + default: + pushBackToken(); + return null; + } + } + +} + -- 1.7.10.4