+++ /dev/null
-/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-\r
- *\r
- * The contents of this file are subject to the Netscape Public\r
- * License Version 1.1 (the "License"); you may not use this file\r
- * except in compliance with the License. You may obtain a copy of\r
- * the License at http://www.mozilla.org/NPL/\r
- *\r
- * Software distributed under the License is distributed on an "AS\r
- * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express oqr\r
- * implied. See the License for the specific language governing\r
- * rights and limitations under the License.\r
- *\r
- * The Original Code is Rhino code, released\r
- * May 6, 1999.\r
- *\r
- * The Initial Developer of the Original Code is Netscape\r
- * Communications Corporation. Portions created by Netscape are\r
- * Copyright (C) 1997-1999 Netscape Communications Corporation. All\r
- * Rights Reserved.\r
- *\r
- * Contributor(s): \r
- * Roger Lawrence\r
- * Mike McCabe\r
- *\r
- * Alternatively, the contents of this file may be used under the\r
- * terms of the GNU Public License (the "GPL"), in which case the\r
- * provisions of the GPL are applicable instead of those above.\r
- * If you wish to allow use of your version of this file only\r
- * under the terms of the GPL and not to allow others to use your\r
- * version of this file under the NPL, indicate your decision by\r
- * deleting the provisions above and replace them with the notice\r
- * and other provisions required by the GPL. If you do not delete\r
- * the provisions above, a recipient may use your version of this\r
- * file under either the NPL or the GPL.\r
- */\r
-\r
-package org.mozilla.javascript;\r
-\r
-import java.io.*;\r
-\r
-/**\r
- * This class implements the JavaScript scanner.\r
- *\r
- * It is based on the C source files jsscan.c and jsscan.h\r
- * in the jsref package.\r
- *\r
- * @see org.mozilla.javascript.Parser\r
- *\r
- * @author Mike McCabe\r
- * @author Brendan Eich\r
- */\r
-\r
-public class TokenStream {\r
- /*\r
- * JSTokenStream flags, mirroring those in jsscan.h. These are used\r
- * by the parser to change/check the state of the scanner.\r
- */\r
-\r
- public final static int\r
- TSF_NEWLINES = 0x0001, // tokenize newlines\r
- TSF_FUNCTION = 0x0002, // scanning inside function body\r
- TSF_RETURN_EXPR = 0x0004, // function has 'return expr;'\r
- TSF_RETURN_VOID = 0x0008, // function has 'return;'\r
- TSF_REGEXP = 0x0010; // looking for a regular expression\r
-\r
- /*\r
- * For chars - because we need something out-of-range\r
- * to check. (And checking EOF by exception is annoying.)\r
- * Note distinction from EOF token type!\r
- */\r
- private final static int\r
- EOF_CHAR = -1;\r
-\r
- /**\r
- * Token types. These values correspond to JSTokenType values in\r
- * jsscan.c.\r
- */\r
-\r
- public final static int\r
- // start enum\r
- ERROR = -1, // well-known as the only code < EOF\r
- EOF = 0, // end of file token - (not EOF_CHAR)\r
- EOL = 1, // end of line\r
- // Beginning here are interpreter bytecodes. Their values\r
- // must not exceed 127.\r
- POPV = 2,\r
- ENTERWITH = 3,\r
- LEAVEWITH = 4,\r
- RETURN = 5,\r
- GOTO = 6,\r
- IFEQ = 7,\r
- IFNE = 8,\r
- DUP = 9,\r
- SETNAME = 10,\r
- BITOR = 11,\r
- BITXOR = 12,\r
- BITAND = 13,\r
- EQ = 14,\r
- NE = 15,\r
- LT = 16,\r
- LE = 17,\r
- GT = 18,\r
- GE = 19,\r
- LSH = 20,\r
- RSH = 21,\r
- URSH = 22,\r
- ADD = 23,\r
- SUB = 24,\r
- MUL = 25,\r
- DIV = 26,\r
- MOD = 27,\r
- BITNOT = 28,\r
- NEG = 29,\r
- NEW = 30,\r
- DELPROP = 31,\r
- TYPEOF = 32,\r
- NAMEINC = 33,\r
- PROPINC = 34,\r
- ELEMINC = 35,\r
- NAMEDEC = 36,\r
- PROPDEC = 37,\r
- ELEMDEC = 38,\r
- GETPROP = 39,\r
- SETPROP = 40,\r
- GETELEM = 41,\r
- SETELEM = 42,\r
- CALL = 43,\r
- NAME = 44,\r
- NUMBER = 45,\r
- STRING = 46,\r
- ZERO = 47,\r
- ONE = 48,\r
- NULL = 49,\r
- THIS = 50,\r
- FALSE = 51,\r
- TRUE = 52,\r
- SHEQ = 53, // shallow equality (===)\r
- SHNE = 54, // shallow inequality (!==)\r
- CLOSURE = 55,\r
- OBJECT = 56,\r
- POP = 57,\r
- POS = 58,\r
- VARINC = 59,\r
- VARDEC = 60,\r
- BINDNAME = 61,\r
- THROW = 62,\r
- IN = 63,\r
- INSTANCEOF = 64,\r
- GOSUB = 65,\r
- RETSUB = 66,\r
- CALLSPECIAL = 67,\r
- GETTHIS = 68,\r
- NEWTEMP = 69,\r
- USETEMP = 70,\r
- GETBASE = 71,\r
- GETVAR = 72,\r
- SETVAR = 73,\r
- UNDEFINED = 74,\r
- TRY = 75,\r
- ENDTRY = 76,\r
- NEWSCOPE = 77,\r
- TYPEOFNAME = 78,\r
- ENUMINIT = 79,\r
- ENUMNEXT = 80,\r
- GETPROTO = 81,\r
- GETPARENT = 82,\r
- SETPROTO = 83,\r
- SETPARENT = 84,\r
- SCOPE = 85,\r
- GETSCOPEPARENT = 86,\r
- THISFN = 87,\r
- JTHROW = 88,\r
- // End of interpreter bytecodes\r
- SEMI = 89, // semicolon\r
- LB = 90, // left and right brackets\r
- RB = 91,\r
- LC = 92, // left and right curlies (braces)\r
- RC = 93,\r
- LP = 94, // left and right parentheses\r
- RP = 95,\r
- COMMA = 96, // comma operator\r
- ASSIGN = 97, // assignment ops (= += -= etc.)\r
- HOOK = 98, // conditional (?:)\r
- COLON = 99,\r
- OR = 100, // logical or (||)\r
- AND = 101, // logical and (&&)\r
- EQOP = 102, // equality ops (== !=)\r
- RELOP = 103, // relational ops (< <= > >=)\r
- SHOP = 104, // shift ops (<< >> >>>)\r
- UNARYOP = 105, // unary prefix operator\r
- INC = 106, // increment/decrement (++ --)\r
- DEC = 107,\r
- DOT = 108, // member operator (.)\r
- PRIMARY = 109, // true, false, null, this\r
- FUNCTION = 110, // function keyword\r
- EXPORT = 111, // export keyword\r
- IMPORT = 112, // import keyword\r
- IF = 113, // if keyword\r
- ELSE = 114, // else keyword\r
- SWITCH = 115, // switch keyword\r
- CASE = 116, // case keyword\r
- DEFAULT = 117, // default keyword\r
- WHILE = 118, // while keyword\r
- DO = 119, // do keyword\r
- FOR = 120, // for keyword\r
- BREAK = 121, // break keyword\r
- CONTINUE = 122, // continue keyword\r
- VAR = 123, // var keyword\r
- WITH = 124, // with keyword\r
- CATCH = 125, // catch keyword\r
- FINALLY = 126, // finally keyword\r
- RESERVED = 127, // reserved keywords\r
-\r
- /** Added by Mike - these are JSOPs in the jsref, but I\r
- * don't have them yet in the java implementation...\r
- * so they go here. Also whatever I needed.\r
-\r
- * Most of these go in the 'op' field when returning\r
- * more general token types, eg. 'DIV' as the op of 'ASSIGN'.\r
- */\r
- NOP = 128, // NOP\r
- NOT = 129, // etc.\r
- PRE = 130, // for INC, DEC nodes.\r
- POST = 131,\r
-\r
- /**\r
- * For JSOPs associated with keywords...\r
- * eg. op = THIS; token = PRIMARY\r
- */\r
-\r
- VOID = 132,\r
-\r
- /* types used for the parse tree - these never get returned\r
- * by the scanner.\r
- */\r
- BLOCK = 133, // statement block\r
- ARRAYLIT = 134, // array literal\r
- OBJLIT = 135, // object literal\r
- LABEL = 136, // label\r
- TARGET = 137,\r
- LOOP = 138,\r
- ENUMDONE = 139,\r
- EXPRSTMT = 140,\r
- PARENT = 141,\r
- CONVERT = 142,\r
- JSR = 143,\r
- NEWLOCAL = 144,\r
- USELOCAL = 145,\r
- SCRIPT = 146, // top-level node for entire script\r
- \r
- /**\r
- * For the interpreted mode indicating a line number change in icodes.\r
- */\r
- LINE = 147,\r
- SOURCEFILE = 148,\r
- \r
- // For debugger\r
- \r
- BREAKPOINT = 149,\r
- ASSERT = 150; // XWT assert hack\r
- // end enum\r
-\r
-\r
- /* for mapping int token types to printable strings.\r
- * make sure to add 1 to index before using these!\r
- */\r
- private static String names[];\r
- private static void checkNames() {\r
- if (Context.printTrees && names == null) {\r
- String[] a = {\r
- "error",\r
- "eof",\r
- "eol",\r
- "popv",\r
- "enterwith",\r
- "leavewith",\r
- "return",\r
- "goto",\r
- "ifeq",\r
- "ifne",\r
- "dup",\r
- "setname",\r
- "bitor",\r
- "bitxor",\r
- "bitand",\r
- "eq",\r
- "ne",\r
- "lt",\r
- "le",\r
- "gt",\r
- "ge",\r
- "lsh",\r
- "rsh",\r
- "ursh",\r
- "add",\r
- "sub",\r
- "mul",\r
- "div",\r
- "mod",\r
- "bitnot",\r
- "neg",\r
- "new",\r
- "delprop",\r
- "typeof",\r
- "nameinc",\r
- "propinc",\r
- "eleminc",\r
- "namedec",\r
- "propdec",\r
- "elemdec",\r
- "getprop",\r
- "setprop",\r
- "getelem",\r
- "setelem",\r
- "call",\r
- "name",\r
- "number",\r
- "string",\r
- "zero",\r
- "one",\r
- "null",\r
- "this",\r
- "false",\r
- "true",\r
- "sheq",\r
- "shne",\r
- "closure",\r
- "object",\r
- "pop",\r
- "pos",\r
- "varinc",\r
- "vardec",\r
- "bindname",\r
- "throw",\r
- "in",\r
- "instanceof",\r
- "gosub",\r
- "retsub",\r
- "callspecial",\r
- "getthis",\r
- "newtemp",\r
- "usetemp",\r
- "getbase",\r
- "getvar",\r
- "setvar",\r
- "undefined",\r
- "try",\r
- "endtry",\r
- "newscope",\r
- "typeofname",\r
- "enuminit",\r
- "enumnext",\r
- "getproto",\r
- "getparent",\r
- "setproto",\r
- "setparent",\r
- "scope",\r
- "getscopeparent",\r
- "thisfn",\r
- "jthrow",\r
- "semi",\r
- "lb",\r
- "rb",\r
- "lc",\r
- "rc",\r
- "lp",\r
- "rp",\r
- "comma",\r
- "assign",\r
- "hook",\r
- "colon",\r
- "or",\r
- "and",\r
- "eqop",\r
- "relop",\r
- "shop",\r
- "unaryop",\r
- "inc",\r
- "dec",\r
- "dot",\r
- "primary",\r
- "function",\r
- "export",\r
- "import",\r
- "if",\r
- "else",\r
- "switch",\r
- "case",\r
- "default",\r
- "while",\r
- "do",\r
- "for",\r
- "break",\r
- "continue",\r
- "var",\r
- "with",\r
- "catch",\r
- "finally",\r
- "reserved",\r
- "nop",\r
- "not",\r
- "pre",\r
- "post",\r
- "void",\r
- "block",\r
- "arraylit",\r
- "objlit",\r
- "label",\r
- "target",\r
- "loop",\r
- "enumdone",\r
- "exprstmt",\r
- "parent",\r
- "convert",\r
- "jsr",\r
- "newlocal",\r
- "uselocal",\r
- "script",\r
- "line",\r
- "sourcefile",\r
- };\r
- names = a;\r
- }\r
- }\r
-\r
- /* This function uses the cached op, string and number fields in\r
- * TokenStream; if getToken has been called since the passed token\r
- * was scanned, the op or string printed may be incorrect.\r
- */\r
- public String tokenToString(int token) {\r
- if (Context.printTrees) {\r
- checkNames();\r
- if (token + 1 >= names.length)\r
- return null;\r
-\r
- if (token == UNARYOP ||\r
- token == ASSIGN ||\r
- token == PRIMARY ||\r
- token == EQOP ||\r
- token == SHOP ||\r
- token == RELOP) {\r
- return names[token + 1] + " " + names[this.op + 1];\r
- }\r
-\r
- if (token == STRING || token == OBJECT || token == NAME)\r
- return names[token + 1] + " `" + this.string + "'";\r
-\r
- if (token == NUMBER)\r
- return "NUMBER " + this.number;\r
-\r
- return names[token + 1];\r
- }\r
- return "";\r
- }\r
-\r
- public static String tokenToName(int type) {\r
- checkNames();\r
- return names == null ? "" : names[type + 1];\r
- }\r
-\r
-\r
- private int stringToKeyword(String name) {\r
-// #string_id_map#\r
-// The following assumes that EOF == 0\r
- final int \r
- Id_break = BREAK,\r
- Id_case = CASE,\r
- Id_continue = CONTINUE,\r
- Id_default = DEFAULT,\r
- Id_delete = DELPROP,\r
- Id_do = DO,\r
- Id_else = ELSE,\r
- Id_export = EXPORT,\r
- Id_false = PRIMARY | (FALSE << 8),\r
- Id_for = FOR,\r
- Id_function = FUNCTION,\r
- Id_if = IF,\r
- Id_in = RELOP | (IN << 8),\r
- Id_new = NEW,\r
- Id_null = PRIMARY | (NULL << 8),\r
- Id_return = RETURN,\r
- Id_switch = SWITCH,\r
- Id_this = PRIMARY | (THIS << 8),\r
- Id_true = PRIMARY | (TRUE << 8),\r
- Id_typeof = UNARYOP | (TYPEOF << 8),\r
- Id_var = VAR,\r
- Id_void = UNARYOP | (VOID << 8),\r
- Id_while = WHILE,\r
- Id_with = WITH,\r
-\r
- // the following are #ifdef RESERVE_JAVA_KEYWORDS in jsscan.c\r
- Id_abstract = RESERVED,\r
- Id_boolean = RESERVED,\r
- Id_byte = RESERVED,\r
- Id_catch = CATCH,\r
- Id_char = RESERVED,\r
- Id_class = RESERVED,\r
- Id_const = RESERVED,\r
- Id_debugger = RESERVED,\r
- Id_double = RESERVED,\r
- Id_enum = RESERVED,\r
- Id_extends = RESERVED,\r
- Id_final = RESERVED,\r
- Id_finally = FINALLY,\r
- Id_float = RESERVED,\r
- Id_goto = RESERVED,\r
- Id_implements = RESERVED,\r
- Id_import = IMPORT,\r
- Id_instanceof = RELOP | (INSTANCEOF << 8),\r
- Id_int = RESERVED,\r
- Id_interface = RESERVED,\r
- Id_long = RESERVED,\r
- Id_native = RESERVED,\r
- Id_package = RESERVED,\r
- Id_private = RESERVED,\r
- Id_protected = RESERVED,\r
- Id_public = RESERVED,\r
- Id_assert = ASSERT,\r
- Id_short = RESERVED,\r
- Id_static = RESERVED,\r
- Id_super = RESERVED,\r
- Id_synchronized = RESERVED,\r
- Id_throw = THROW,\r
- Id_throws = RESERVED,\r
- Id_transient = RESERVED,\r
- Id_try = TRY,\r
- Id_volatile = RESERVED;\r
- \r
- int id;\r
- String s = name;\r
-// #generated# Last update: 2001-06-01 17:45:01 CEST\r
- L0: { id = 0; String X = null; int c;\r
- L: switch (s.length()) {\r
- case 2: c=s.charAt(1);\r
- if (c=='f') { if (s.charAt(0)=='i') {id=Id_if; break L0;} }\r
- else if (c=='n') { if (s.charAt(0)=='i') {id=Id_in; break L0;} }\r
- else if (c=='o') { if (s.charAt(0)=='d') {id=Id_do; break L0;} }\r
- break L;\r
- case 3: switch (s.charAt(0)) {\r
- case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') {id=Id_for; break L0;} break L;\r
- case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') {id=Id_int; break L0;} break L;\r
- case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') {id=Id_new; break L0;} break L;\r
- case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') {id=Id_try; break L0;} break L;\r
- case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') {id=Id_var; break L0;} break L;\r
- } break L;\r
- case 4: switch (s.charAt(0)) {\r
- case 'b': X="byte";id=Id_byte; break L;\r
- case 'c': c=s.charAt(3);\r
- if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') {id=Id_case; break L0;} }\r
- else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') {id=Id_char; break L0;} }\r
- break L;\r
- case 'e': c=s.charAt(3);\r
- if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') {id=Id_else; break L0;} }\r
- else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') {id=Id_enum; break L0;} }\r
- break L;\r
- case 'g': X="goto";id=Id_goto; break L;\r
- case 'l': X="long";id=Id_long; break L;\r
- case 'n': X="null";id=Id_null; break L;\r
- case 't': c=s.charAt(3);\r
- if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') {id=Id_true; break L0;} }\r
- else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') {id=Id_this; break L0;} }\r
- break L;\r
- case 'v': X="void";id=Id_void; break L;\r
- case 'w': X="with";id=Id_with; break L;\r
- } break L;\r
- case 5: switch (s.charAt(2)) {\r
- case 'a': X="class";id=Id_class; break L;\r
- case 'e': X="break";id=Id_break; break L;\r
- case 'i': X="while";id=Id_while; break L;\r
- case 'l': X="false";id=Id_false; break L;\r
- case 'n': c=s.charAt(0);\r
- if (c=='c') { X="const";id=Id_const; }\r
- else if (c=='f') { X="final";id=Id_final; }\r
- break L;\r
- case 'o': c=s.charAt(0);\r
- if (c=='f') { X="float";id=Id_float; }\r
- else if (c=='s') { X="short";id=Id_short; }\r
- break L;\r
- case 'p': X="super";id=Id_super; break L;\r
- case 'r': X="throw";id=Id_throw; break L;\r
- case 't': X="catch";id=Id_catch; break L;\r
- } break L;\r
- case 6: switch (s.charAt(1)) {\r
- case 'a': X="native";id=Id_native; break L;\r
- case 'e': c=s.charAt(0);\r
- if (c=='d') { X="delete";id=Id_delete; }\r
- else if (c=='r') { X="return";id=Id_return; }\r
- break L;\r
- case 'h': X="throws";id=Id_throws; break L;\r
- case 'm': X="import";id=Id_import; break L;\r
- case 'o': X="double";id=Id_double; break L;\r
-\r
- // commented out by Adam Megacz for XWT\r
- /*\r
- case 't': X="static";id=Id_static; break L;\r
- */\r
-\r
- case 's': X="assert";id=Id_assert; break L;\r
- case 'u': X="public";id=Id_public; break L;\r
- case 'w': X="switch";id=Id_switch; break L;\r
- case 'x': X="export";id=Id_export; break L;\r
- case 'y': X="typeof";id=Id_typeof; break L;\r
- } break L;\r
- case 7: switch (s.charAt(1)) {\r
- case 'a': X="package";id=Id_package; break L;\r
- case 'e': X="default";id=Id_default; break L;\r
- case 'i': X="finally";id=Id_finally; break L;\r
- case 'o': X="boolean";id=Id_boolean; break L;\r
- case 'r': X="private";id=Id_private; break L;\r
- case 'x': X="extends";id=Id_extends; break L;\r
- } break L;\r
- case 8: switch (s.charAt(0)) {\r
- case 'a': X="abstract";id=Id_abstract; break L;\r
- case 'c': X="continue";id=Id_continue; break L;\r
- case 'd': X="debugger";id=Id_debugger; break L;\r
- case 'f': X="function";id=Id_function; break L;\r
- case 'v': X="volatile";id=Id_volatile; break L;\r
- } break L;\r
- case 9: c=s.charAt(0);\r
- if (c=='i') { X="interface";id=Id_interface; }\r
- else if (c=='p') { X="protected";id=Id_protected; }\r
- else if (c=='t') { X="transient";id=Id_transient; }\r
- break L;\r
- case 10: c=s.charAt(1);\r
- if (c=='m') { X="implements";id=Id_implements; }\r
- else if (c=='n') { X="instanceof";id=Id_instanceof; }\r
- break L;\r
- case 12: X="synchronized";id=Id_synchronized; break L;\r
- }\r
- if (X!=null && X!=s && !X.equals(s)) id = 0;\r
- if (id == Id_const) Context.reportError("The 'const' keyword is not allowed in XWT scripts",\r
- getSourceName(), getLineno(), getLine(), getOffset());\r
- if (id == Id_instanceof) Context.reportError("The 'instanceof' keyword is not allowed in XWT scripts",\r
- getSourceName(), getLineno(), getLine(), getOffset());\r
- if (id == Id_delete) Context.reportError("The 'delete' keyword is not allowed in XWT scripts",\r
- getSourceName(), getLineno(), getLine(), getOffset());\r
-\r
- }\r
- if (id == Id_new) Context.reportError("The 'new' keyword is not allowed in XWT scripts",\r
- getSourceName(), getLineno(), getLine(), getOffset());\r
-// #/generated#\r
-// #/string_id_map#\r
- if (id == 0) { return EOF; }\r
- this.op = id >> 8;\r
- return id & 0xff;\r
- }\r
-\r
- public TokenStream(Reader in, Scriptable scope,\r
- String sourceName, int lineno)\r
- {\r
- this.in = new LineBuffer(in, lineno);\r
- this.scope = scope;\r
- this.pushbackToken = EOF;\r
- this.sourceName = sourceName;\r
- flags = 0;\r
- }\r
- \r
- public Scriptable getScope() { \r
- return scope;\r
- }\r
-\r
- /* return and pop the token from the stream if it matches...\r
- * otherwise return null\r
- */\r
- public boolean matchToken(int toMatch) throws IOException {\r
- int token = getToken();\r
- if (token == toMatch)\r
- return true;\r
-\r
- // didn't match, push back token\r
- tokenno--;\r
- this.pushbackToken = token;\r
- return false;\r
- }\r
-\r
- public void clearPushback() {\r
- this.pushbackToken = EOF;\r
- }\r
-\r
- public void ungetToken(int tt) {\r
- if (this.pushbackToken != EOF && tt != ERROR) {\r
- String message = Context.getMessage2("msg.token.replaces.pushback",\r
- tokenToString(tt), tokenToString(this.pushbackToken));\r
- throw new RuntimeException(message);\r
- }\r
- this.pushbackToken = tt;\r
- tokenno--;\r
- }\r
-\r
- public int peekToken() throws IOException {\r
- int result = getToken();\r
-\r
- this.pushbackToken = result;\r
- tokenno--;\r
- return result;\r
- }\r
-\r
- public int peekTokenSameLine() throws IOException {\r
- int result;\r
-\r
- flags |= TSF_NEWLINES; // SCAN_NEWLINES from jsscan.h\r
- result = peekToken();\r
- flags &= ~TSF_NEWLINES; // HIDE_NEWLINES from jsscan.h\r
- if (this.pushbackToken == EOL)\r
- this.pushbackToken = EOF;\r
- return result;\r
- }\r
-\r
- protected static boolean isJSIdentifier(String s) {\r
-\r
- int length = s.length();\r
-\r
- if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0)))\r
- return false;\r
-\r
- for (int i=1; i<length; i++) {\r
- char c = s.charAt(i);\r
- if (!Character.isJavaIdentifierPart(c))\r
- if (c == '\\')\r
- if (! ((i + 5) < length)\r
- && (s.charAt(i + 1) == 'u')\r
- && 0 <= xDigitToInt(s.charAt(i + 2))\r
- && 0 <= xDigitToInt(s.charAt(i + 3))\r
- && 0 <= xDigitToInt(s.charAt(i + 4))\r
- && 0 <= xDigitToInt(s.charAt(i + 5)))\r
- \r
- return false;\r
- }\r
-\r
- return true;\r
- }\r
-\r
- private static boolean isAlpha(int c) {\r
- return ((c >= 'a' && c <= 'z')\r
- || (c >= 'A' && c <= 'Z'));\r
- }\r
-\r
- static boolean isDigit(int c) {\r
- return (c >= '0' && c <= '9');\r
- }\r
-\r
- static int xDigitToInt(int c) {\r
- if ('0' <= c && c <= '9') { return c - '0'; }\r
- if ('a' <= c && c <= 'f') { return c - ('a' - 10); }\r
- if ('A' <= c && c <= 'F') { return c - ('A' - 10); }\r
- return -1;\r
- }\r
-\r
- /* As defined in ECMA. jsscan.c uses C isspace() (which allows\r
- * \v, I think.) note that code in in.read() implicitly accepts\r
- * '\r' == \u000D as well.\r
- */\r
- public static boolean isJSSpace(int c) {\r
- return (c == '\u0020' || c == '\u0009'\r
- || c == '\u000C' || c == '\u000B'\r
- || c == '\u00A0' \r
- || Character.getType((char)c) == Character.SPACE_SEPARATOR);\r
- }\r
-\r
- public static boolean isJSLineTerminator(int c) {\r
- return (c == '\n' || c == '\r'\r
- || c == 0x2028 || c == 0x2029);\r
- }\r
- \r
- public int getToken() throws IOException {\r
- int c;\r
- tokenno++;\r
-\r
- // Check for pushed-back token\r
- if (this.pushbackToken != EOF) {\r
- int result = this.pushbackToken;\r
- this.pushbackToken = EOF;\r
- return result;\r
- }\r
-\r
- // Eat whitespace, possibly sensitive to newlines.\r
- do {\r
- c = in.read();\r
- if (c == '\n')\r
- if ((flags & TSF_NEWLINES) != 0)\r
- break;\r
- } while (isJSSpace(c) || c == '\n');\r
-\r
- if (c == EOF_CHAR)\r
- return EOF;\r
-\r
- // HACK: XWT "and" as "&&" support
- if (c == 'a' || c == 'A') {
- if (in.match('n') || in.match('N')) {
- if (in.match('d') || in.match('D')) return AND;
- in.unread();
- }
- }
-
- // identifier/keyword/instanceof?\r
- // watch out for starting with a <backslash>\r
- boolean isUnicodeEscapeStart = false;\r
- if (c == '\\') {\r
- c = in.read();\r
- if (c == 'u')\r
- isUnicodeEscapeStart = true;\r
- else\r
- c = '\\';\r
- // always unread the 'u' or whatever, we need \r
- // to start the string below at the <backslash>.\r
- in.unread();\r
- }\r
- if (isUnicodeEscapeStart ||\r
- Character.isJavaIdentifierStart((char)c)) {\r
- in.startString();\r
-\r
- boolean containsEscape = isUnicodeEscapeStart; \r
- do {\r
- c = in.read();\r
- if (c == '\\') {\r
- c = in.read();\r
- containsEscape = (c == 'u');\r
- } \r
- } while (Character.isJavaIdentifierPart((char)c));\r
- in.unread();\r
-\r
- int result;\r
-\r
- String str = in.getString();\r
- // OPT we shouldn't have to make a string (object!) to\r
- // check if it's a keyword.\r
- \r
- // strictly speaking we should probably push-back\r
- // all the bad characters if the <backslash>uXXXX \r
- // sequence is malformed. But since there isn't a \r
- // correct context(is there?) for a bad Unicode \r
- // escape sequence after an identifier, we can report\r
- // an error here.\r
- if (containsEscape) {\r
- char ca[] = str.toCharArray();\r
- int L = str.length();\r
- int destination = 0;\r
- for (int i = 0; i != L;) {\r
- c = ca[i];\r
- ++i;\r
- if (c == '\\' && i != L && ca[i] == 'u') {\r
- boolean goodEscape = false;\r
- if (i + 4 < L) {\r
- int val = xDigitToInt(ca[i + 1]);\r
- if (val >= 0) {\r
- val = (val << 4) | xDigitToInt(ca[i + 2]);\r
- if (val >= 0) {\r
- val = (val << 4) | xDigitToInt(ca[i + 3]);\r
- if (val >= 0) {\r
- val = (val << 4) | xDigitToInt(ca[i + 4]);\r
- if (val >= 0) {\r
- c = (char)val;\r
- i += 5;\r
- goodEscape = true;\r
- }\r
- }\r
- }\r
- }\r
- }\r
- if (!goodEscape) {\r
- reportSyntaxError("msg.invalid.escape", null);\r
- return ERROR;\r
- }\r
- }\r
- ca[destination] = (char)c;\r
- ++destination;\r
- }\r
- str = new String(ca, 0, destination);\r
- }\r
- else\r
- // Return the corresponding token if it's a keyword\r
- if ((result = stringToKeyword(str)) != EOF) {\r
- return result;\r
- }\r
-\r
- this.string = str;\r
- return NAME;\r
- }\r
-\r
- // is it a number?\r
- if (isDigit(c) || (c == '.' && isDigit(in.peek()))) {\r
- int base = 10;\r
- in.startString();\r
-\r
- double dval = ScriptRuntime.NaN;\r
- long longval = 0;\r
- boolean isInteger = true;\r
-\r
- if (c == '0') {\r
- c = in.read();\r
- if (c == 'x' || c == 'X') {\r
- c = in.read();\r
- base = 16;\r
- // restart the string, losing leading 0x\r
- in.startString();\r
- } else if (isDigit(c)) {\r
- base = 8;\r
- }\r
- }\r
-\r
- while (0 <= xDigitToInt(c)) {\r
- if (base < 16) {\r
- if (isAlpha(c))\r
- break;\r
- /*\r
- * We permit 08 and 09 as decimal numbers, which\r
- * makes our behavior a superset of the ECMA\r
- * numeric grammar. We might not always be so\r
- * permissive, so we warn about it.\r
- */\r
- if (base == 8 && c >= '8') {\r
- Object[] errArgs = { c == '8' ? "8" : "9" };\r
- Context.reportWarning(\r
- Context.getMessage("msg.bad.octal.literal",\r
- errArgs),\r
- getSourceName(),\r
- in.getLineno(), getLine(), getOffset());\r
- base = 10;\r
- }\r
- }\r
- c = in.read();\r
- }\r
-\r
- if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {\r
- isInteger = false;\r
- if (c == '.') {\r
- do {\r
- c = in.read();\r
- } while (isDigit(c));\r
- }\r
-\r
- if (c == 'e' || c == 'E') {\r
- c = in.read();\r
- if (c == '+' || c == '-') {\r
- c = in.read();\r
- }\r
- if (!isDigit(c)) {\r
- in.getString(); // throw away string in progress\r
- reportSyntaxError("msg.missing.exponent", null);\r
- return ERROR;\r
- }\r
- do {\r
- c = in.read();\r
- } while (isDigit(c));\r
- }\r
- }\r
- in.unread();\r
- String numString = in.getString();\r
-\r
- if (base == 10 && !isInteger) {\r
- try {\r
- // Use Java conversion to number from string...\r
- dval = (Double.valueOf(numString)).doubleValue();\r
- }\r
- catch (NumberFormatException ex) {\r
- Object[] errArgs = { ex.getMessage() };\r
- reportSyntaxError("msg.caught.nfe", errArgs);\r
- return ERROR;\r
- }\r
- } else {\r
- dval = ScriptRuntime.stringToNumber(numString, 0, base);\r
- longval = (long) dval;\r
-\r
- // is it an integral fits-in-a-long value?\r
- if (longval != dval)\r
- isInteger = false;\r
- }\r
-\r
- if (!isInteger) {\r
- /* Can't handle floats right now, because postfix INC/DEC\r
- generate Doubles, but I would generate a Float through this\r
- path, and it causes a stack mismatch. FIXME (MS)\r
- if (Float.MIN_VALUE <= dval && dval <= Float.MAX_VALUE)\r
- this.number = new Xloat((float) dval);\r
- else\r
- */\r
- this.number = new Double(dval);\r
- } else {\r
- // We generate the smallest possible type here\r
- if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE)\r
- this.number = new Byte((byte)longval);\r
- else if (Short.MIN_VALUE <= longval &&\r
- longval <= Short.MAX_VALUE)\r
- this.number = new Short((short)longval);\r
- else if (Integer.MIN_VALUE <= longval &&\r
- longval <= Integer.MAX_VALUE)\r
- this.number = new Integer((int)longval);\r
- else {\r
- // May lose some precision here, but that's the \r
- // appropriate semantics.\r
- this.number = new Double(longval);\r
- }\r
- }\r
- return NUMBER;\r
- }\r
-\r
- // is it a string?\r
- if (c == '"' || c == '\'') {\r
- // We attempt to accumulate a string the fast way, by\r
- // building it directly out of the reader. But if there\r
- // are any escaped characters in the string, we revert to\r
- // building it out of a StringBuffer.\r
-\r
- StringBuffer stringBuf = null;\r
-\r
- int quoteChar = c;\r
- int val = 0;\r
-\r
- c = in.read();\r
- in.startString(); // start after the first "\r
- while(c != quoteChar) {\r
- if (c == '\n' || c == EOF_CHAR) {\r
- in.unread();\r
- in.getString(); // throw away the string in progress\r
- reportSyntaxError("msg.unterminated.string.lit", null);\r
- return ERROR;\r
- }\r
-\r
- if (c == '\\') {\r
- // We've hit an escaped character; revert to the\r
- // slow method of building a string.\r
- if (stringBuf == null) {\r
- // Don't include the backslash\r
- in.unread();\r
- stringBuf = new StringBuffer(in.getString());\r
- in.read();\r
- }\r
-\r
- switch (c = in.read()) {\r
- case 'b': c = '\b'; break;\r
- case 'f': c = '\f'; break;\r
- case 'n': c = '\n'; break;\r
- case 'r': c = '\r'; break;\r
- case 't': c = '\t'; break;\r
- case 'v': c = '\u000B'; break;\r
- // \v a late addition to the ECMA spec.\r
- // '\v' doesn't seem to be valid Java.\r
-\r
- default:\r
- if (isDigit(c) && c < '8') {\r
- val = c - '0';\r
- c = in.read();\r
- if (isDigit(c) && c < '8') {\r
- val = 8 * val + c - '0';\r
- c = in.read();\r
- if (isDigit(c) && c < '8') {\r
- val = 8 * val + c - '0';\r
- c = in.read();\r
- }\r
- }\r
- in.unread();\r
- if (val > 0377) {\r
- reportSyntaxError("msg.oct.esc.too.large", null);\r
- return ERROR;\r
- }\r
- c = val;\r
- } else if (c == 'u') {\r
- /*\r
- * Get 4 hex digits; if the u escape is not\r
- * followed by 4 hex digits, use 'u' + the literal\r
- * character sequence that follows. Do some manual\r
- * match (OK because we're in a string) to avoid\r
- * multi-char match on the underlying stream.\r
- */\r
- int c1 = in.read();\r
- c = xDigitToInt(c1);\r
- if (c < 0) {\r
- in.unread();\r
- c = 'u';\r
- } else {\r
- int c2 = in.read();\r
- c = (c << 4) | xDigitToInt(c2);\r
- if (c < 0) {\r
- in.unread();\r
- stringBuf.append('u');\r
- c = c1;\r
- } else {\r
- int c3 = in.read();\r
- c = (c << 4) | xDigitToInt(c3);\r
- if (c < 0) {\r
- in.unread();\r
- stringBuf.append('u');\r
- stringBuf.append((char)c1);\r
- c = c2;\r
- } else {\r
- int c4 = in.read();\r
- c = (c << 4) | xDigitToInt(c4);\r
- if (c < 0) {\r
- in.unread();\r
- stringBuf.append('u');\r
- stringBuf.append((char)c1);\r
- stringBuf.append((char)c2);\r
- c = c3;\r
- } else {\r
- // got 4 hex digits! Woo Hoo!\r
- }\r
- }\r
- }\r
- }\r
- } else if (c == 'x') {\r
- /* Get 2 hex digits, defaulting to 'x' + literal\r
- * sequence, as above.\r
- */\r
- int c1 = in.read();\r
- c = xDigitToInt(c1);\r
- if (c < 0) {\r
- in.unread();\r
- c = 'x';\r
- } else {\r
- int c2 = in.read();\r
- c = (c << 4) | xDigitToInt(c2);\r
- if (c < 0) {\r
- in.unread();\r
- stringBuf.append('x');\r
- c = c1;\r
- } else {\r
- // got 2 hex digits\r
- }\r
- }\r
- }\r
- }\r
- }\r
- \r
- if (stringBuf != null)\r
- stringBuf.append((char) c);\r
- c = in.read();\r
- }\r
-\r
- if (stringBuf != null)\r
- this.string = stringBuf.toString();\r
- else {\r
- in.unread(); // miss the trailing "\r
- this.string = in.getString();\r
- in.read();\r
- }\r
- return STRING;\r
- }\r
-\r
- switch (c)\r
- {\r
- case '\n': return EOL;\r
- case ';': return SEMI;\r
- case '[': return LB;\r
- case ']': return RB;\r
- case '{': return LC;\r
- case '}': return RC;\r
- case '(': return LP;\r
- case ')': return RP;\r
- case ',': return COMMA;\r
- case '?': return HOOK;\r
- case ':': return COLON;\r
- case '.': return DOT;\r
-\r
- case '|':\r
- if (in.match('|')) {\r
- return OR;\r
- } else if (in.match('=')) {\r
- this.op = BITOR;\r
- return ASSIGN;\r
- } else {\r
- return BITOR;\r
- }\r
-\r
- case '^':\r
- if (in.match('=')) {\r
- this.op = BITXOR;\r
- return ASSIGN;\r
- } else {\r
- return BITXOR;\r
- }\r
-\r
- case '&':\r
- if (in.match('&')) {\r
- return AND;\r
- } else if (in.match('=')) {\r
- this.op = BITAND;\r
- return ASSIGN;\r
- } else {\r
- return BITAND;\r
- }\r
-\r
- case '=':\r
- if (in.match('=')) {\r
- if (in.match('='))\r
- this.op = SHEQ;\r
- else\r
- this.op = EQ;\r
- return EQOP;\r
- } else {\r
- this.op = NOP;\r
- return ASSIGN;\r
- }\r
-\r
- case '!':\r
- if (in.match('=')) {\r
- if (in.match('='))\r
- this.op = SHNE;\r
- else\r
- this.op = NE;\r
- return EQOP;\r
- } else {\r
- this.op = NOT;\r
- return UNARYOP;\r
- }\r
-\r
- case '<':\r
- /* NB:treat HTML begin-comment as comment-till-eol */\r
- if (in.match('!')) {\r
- if (in.match('-')) {\r
- if (in.match('-')) {\r
- while ((c = in.read()) != EOF_CHAR && c != '\n')\r
- /* skip to end of line */;\r
- in.unread();\r
- return getToken(); // in place of 'goto retry'\r
- }\r
- in.unread();\r
- }\r
- in.unread();\r
- }\r
- if (in.match('<')) {\r
- if (in.match('=')) {\r
- this.op = LSH;\r
- return ASSIGN;\r
- } else {\r
- this.op = LSH;\r
- return SHOP;\r
- }\r
- } else {\r
- if (in.match('=')) {\r
- this.op = LE;\r
- return RELOP;\r
- } else {\r
- this.op = LT;\r
- return RELOP;\r
- }\r
- }\r
-\r
- case '>':\r
- if (in.match('>')) {\r
- if (in.match('>')) {\r
- if (in.match('=')) {\r
- this.op = URSH;\r
- return ASSIGN;\r
- } else {\r
- this.op = URSH;\r
- return SHOP;\r
- }\r
- } else {\r
- if (in.match('=')) {\r
- this.op = RSH;\r
- return ASSIGN;\r
- } else {\r
- this.op = RSH;\r
- return SHOP;\r
- }\r
- }\r
- } else {\r
- if (in.match('=')) {\r
- this.op = GE;\r
- return RELOP;\r
- } else {\r
- this.op = GT;\r
- return RELOP;\r
- }\r
- }\r
-\r
- case '*':\r
- if (in.match('=')) {\r
- this.op = MUL;\r
- return ASSIGN;\r
- } else {\r
- return MUL;\r
- }\r
-\r
- case '/':\r
- // is it a // comment?\r
- if (in.match('/')) {\r
- while ((c = in.read()) != EOF_CHAR && c != '\n')\r
- /* skip to end of line */;\r
- in.unread();\r
- return getToken();\r
- }\r
- if (in.match('*')) {\r
- while ((c = in.read()) != -1\r
- && !(c == '*' && in.match('/'))) {\r
- if (c == '\n') {\r
- } else if (c == '/' && in.match('*')) {\r
- if (in.match('/'))\r
- return getToken();\r
- reportSyntaxError("msg.nested.comment", null);\r
- return ERROR;\r
- }\r
- }\r
- if (c == EOF_CHAR) {\r
- reportSyntaxError("msg.unterminated.comment", null);\r
- return ERROR;\r
- }\r
- return getToken(); // `goto retry'\r
- }\r
-\r
- // is it a regexp?\r
- if ((flags & TSF_REGEXP) != 0) {\r
- // We don't try to use the in.startString/in.getString\r
- // approach, because escaped characters (which break it)\r
- // seem likely to be common.\r
- StringBuffer re = new StringBuffer();\r
- while ((c = in.read()) != '/') {\r
- if (c == '\n' || c == EOF_CHAR) {\r
- in.unread();\r
- reportSyntaxError("msg.unterminated.re.lit", null);\r
- return ERROR;\r
- }\r
- if (c == '\\') {\r
- re.append((char) c);\r
- c = in.read();\r
- }\r
-\r
- re.append((char) c);\r
- }\r
-\r
- StringBuffer flagsBuf = new StringBuffer();\r
- while (true) {\r
- if (in.match('g'))\r
- flagsBuf.append('g');\r
- else if (in.match('i'))\r
- flagsBuf.append('i');\r
- else if (in.match('m'))\r
- flagsBuf.append('m');\r
- else\r
- break;\r
- }\r
-\r
- if (isAlpha(in.peek())) {\r
- reportSyntaxError("msg.invalid.re.flag", null);\r
- return ERROR;\r
- }\r
-\r
- this.string = re.toString();\r
- this.regExpFlags = flagsBuf.toString();\r
- return OBJECT;\r
- }\r
-\r
-\r
- if (in.match('=')) {\r
- this.op = DIV;\r
- return ASSIGN;\r
- } else {\r
- return DIV;\r
- }\r
-\r
- case '%':\r
- this.op = MOD;\r
- if (in.match('=')) {\r
- return ASSIGN;\r
- } else {\r
- return MOD;\r
- }\r
-\r
- case '~':\r
- this.op = BITNOT;\r
- return UNARYOP;\r
-\r
- case '+':\r
- case '-':\r
- if (in.match('=')) {\r
- if (c == '+') {\r
- this.op = ADD;\r
- return ASSIGN;\r
- } else {\r
- this.op = SUB;\r
- return ASSIGN;\r
- }\r
- } else if (in.match((char) c)) {\r
- if (c == '+') {\r
- return INC;\r
- } else {\r
- return DEC;\r
- }\r
- } else if (c == '-') {\r
- return SUB;\r
- } else {\r
- return ADD;\r
- }\r
-\r
- default:\r
- reportSyntaxError("msg.illegal.character", null);\r
- return ERROR;\r
- }\r
- }\r
-\r
- public void reportSyntaxError(String messageProperty, Object[] args) {\r
- String message = Context.getMessage(messageProperty, args);\r
- if (scope != null) {\r
- // We're probably in an eval. Need to throw an exception.\r
- throw NativeGlobal.constructError(\r
- Context.getContext(), "SyntaxError",\r
- message, scope, getSourceName(),\r
- getLineno(), getOffset(), getLine());\r
- } else {\r
- Context.reportError(message, getSourceName(),\r
- getLineno(), getLine(), getOffset());\r
- }\r
- }\r
-\r
- public String getSourceName() { return sourceName; }\r
- public int getLineno() { return in.getLineno(); }\r
- public int getOp() { return op; }\r
- public String getString() { return string; }\r
- public Number getNumber() { return number; }\r
- public String getLine() { return in.getLine(); }\r
- public int getOffset() { return in.getOffset(); }\r
- public int getTokenno() { return tokenno; }\r
- public boolean eof() { return in.eof(); }\r
-\r
- // instance variables\r
- private LineBuffer in;\r
-\r
-\r
- /* for TSF_REGEXP, etc.\r
- * should this be manipulated by gettor/settor functions?\r
- * should it be passed to getToken();\r
- */\r
- public int flags;\r
- public String regExpFlags;\r
-\r
- private String sourceName;\r
- private String line;\r
- private Scriptable scope;\r
- private int pushbackToken;\r
- private int tokenno;\r
-\r
- private int op;\r
-\r
- // Set this to an inital non-null value so that the Parser has\r
- // something to retrieve even if an error has occured and no\r
- // string is found. Fosters one class of error, but saves lots of\r
- // code.\r
- private String string = "";\r
- private Number number;\r
-}\r