X-Git-Url: http://git.megacz.com/?p=org.ibex.core.git;a=blobdiff_plain;f=src%2Forg%2Fxwt%2Fjs%2FParser.java;h=b67b59c9bff43c08c1a02f799d4300b6e1704fbd;hp=9af090f8e5c791be53d71155eaee094e04504391;hb=6261c41b2ac9d182d8c3541e8e0e5fd00062fa43;hpb=3b4175d23bee1a58fb66f70aa46df94cf9c166de diff --git a/src/org/xwt/js/Parser.java b/src/org/xwt/js/Parser.java index 9af090f..b67b59c 100644 --- a/src/org/xwt/js/Parser.java +++ b/src/org/xwt/js/Parser.java @@ -1,30 +1,81 @@ -// Copyright 2002 Adam Megacz, see the COPYING file for licensing [GPL] +// Copyright 2003 Adam Megacz, see the COPYING file for licensing [GPL] package org.xwt.js; import org.xwt.util.*; import java.io.*; +/** + * Parses a stream of lexed tokens into a tree of JSFunction's. + * + * There are three kinds of things we parse: blocks, statements, and + * expressions. + * + * - Expressions are a special type of statement that evaluates to a + * value (for example, "break" is not an expression, * but "3+2" + * is). Some tokens sequences start expressions (for * example, + * literal numbers) and others continue an expression which * has + * already been begun (for example, '+'). Finally, some * + * expressions are valid targets for an assignment operation; after + * * each of these expressions, continueExprAfterAssignable() is + * called * to check for an assignment operation. + * + * - A statement ends with a semicolon and does not return a value. + * + * - A block is a single statement or a sequence of statements + * surrounded by curly braces. + * + * Each parsing method saves the parserLine before doing its actual + * work and restores it afterwards. This ensures that parsing a + * subexpression does not modify the line number until a token + * *after* the subexpression has been consumed by the parent + * expression. + * + * Technically it would be a better design for this class to build an + * intermediate parse tree and use that to emit bytecode. Here's the + * tradeoff: + * + * Advantages of building a parse tree: + * - easier to apply optimizations + * - would let us handle more sophisticated languages than JavaScript + * + * Advantages of leaving out the parse tree + * - faster compilation + * - less load on the garbage collector + * - much simpler code, easier to understand + * - less error-prone + * + * Fortunately JS is such a simple language that we can get away with + * the half-assed approach and still produce a working, complete + * compiler. + * + * The bytecode language emitted doesn't really cause any appreciable + * semantic loss, and is itself a parseable language very similar to + * Forth or a postfix variant of LISP. This means that the bytecode + * can be transformed into a parse tree, which can be manipulated. + * So if we ever want to add an optimizer, it could easily be done by + * producing a parse tree from the bytecode, optimizing that tree, + * and then re-emitting the bytecode. The parse tree node class + * would also be much simpler since the bytecode language has so few + * operators. + * + * Actually, the above paragraph is slightly inaccurate -- there are + * places where we push a value and then perform an arbitrary number + * of operations using it before popping it; this doesn't parse well. + * But these cases are clearly marked and easy to change if we do + * need to move to a parse tree format. + */ +class Parser extends Lexer implements ByteCodes { -/** parses a stream of lexed tokens into a tree of Expr's */ -public class Parser extends Lexer { // Constructors ////////////////////////////////////////////////////// - public Parser(Reader r, String sourceName, int line) throws IOException { - super(r); - this.sourceName = sourceName; - this.line = line; - } + public Parser(Reader r, String sourceName, int line) throws IOException { super(r, sourceName, line); } /** for debugging */ public static void main(String[] s) throws Exception { - Parser p = new Parser(new InputStreamReader(System.in), "stdin", 0); - while(true) { - Expr block = p.parseBlock(false); - if (block == null) return; - System.out.println(block); - if (p.peekToken() == -1) return; - } + JSFunction block = JSFunction.fromReader("stdin", 0, new InputStreamReader(System.in)); + if (block == null) return; + System.out.println(block); } @@ -32,1168 +83,753 @@ public class Parser extends Lexer { static byte[] precedence = new byte[MAX_TOKEN + 1]; static boolean[] isRightAssociative = new boolean[MAX_TOKEN + 1]; + // Use this as the precedence when we want anything up to the comma + private final static int NO_COMMA = 2; static { - precedence[ASSIGN] = 1; - isRightAssociative[ASSIGN] = true; - precedence[HOOK] = 2; - precedence[COMMA] = 3; - precedence[OR] = precedence[AND] = 4; - precedence[GT] = precedence[GE] = 5; - precedence[BITOR] = 6; - precedence[BITXOR] = 7; - precedence[BITAND] = 8; - precedence[EQ] = precedence[NE] = 9; - precedence[LT] = precedence[LE] = 10; - precedence[SHEQ] = precedence[SHNE] = 11; - precedence[LSH] = precedence[RSH] = precedence[URSH] = 12; - precedence[ADD] = precedence[SUB] = 13; - precedence[MUL] = precedence[DIV] = precedence[MOD] = 14; - precedence[BITNOT] = precedence[INSTANCEOF] = 15; - precedence[INC] = precedence[DEC] = 16; - precedence[LP] = 17; - precedence[LB] = 18; - precedence[DOT] = 19; + isRightAssociative[ASSIGN] = + isRightAssociative[ASSIGN_BITOR] = + isRightAssociative[ASSIGN_BITXOR] = + isRightAssociative[ASSIGN_BITAND] = + isRightAssociative[ASSIGN_LSH] = + isRightAssociative[ASSIGN_RSH] = + isRightAssociative[ASSIGN_URSH] = + isRightAssociative[ASSIGN_ADD] = + isRightAssociative[ASSIGN_SUB] = + isRightAssociative[ASSIGN_MUL] = + isRightAssociative[ASSIGN_DIV] = + isRightAssociative[ASSIGN_MOD] = true; + + precedence[COMMA] = 1; + // 2 is intentionally left unassigned. we use minPrecedence==2 for comma separated lists + precedence[ASSIGN] = + precedence[ASSIGN_BITOR] = + precedence[ASSIGN_BITXOR] = + precedence[ASSIGN_BITAND] = + precedence[ASSIGN_LSH] = + precedence[ASSIGN_RSH] = + precedence[ASSIGN_URSH] = + precedence[ASSIGN_ADD] = + precedence[ASSIGN_SUB] = + precedence[ASSIGN_MUL] = + precedence[ASSIGN_DIV] = + precedence[ASSIGN_MOD] = 3; + precedence[HOOK] = 4; + precedence[OR] = 5; + precedence[AND] = 6; + precedence[BITOR] = 7; + precedence[BITXOR] = 8; + precedence[BITAND] = 9; + precedence[EQ] = precedence[NE] = precedence[SHEQ] = precedence[SHNE] = 10; + precedence[LT] = precedence[LE] = precedence[GT] = precedence[GE] = 11; + precedence[LSH] = precedence[RSH] = precedence[URSH] = 12; + precedence[ADD] = precedence[SUB] = 12; + precedence[MUL] = precedence[DIV] = precedence[MOD] = 13; + precedence[BITNOT] = precedence[BANG] = precedence[TYPEOF] = 14; + precedence[DOT] = precedence[LB] = precedence[LP] = precedence[INC] = precedence[DEC] = 15; } // Parsing Logic ///////////////////////////////////////////////////////// - public void consume(int code) throws IOException { - if (getToken() != code) - throw new ParserException("expected " + codeToString[op] + ", got " + (op == -1 ? "EOL" : codeToString[op])); + /** gets a token and throws an exception if it is not code */ + private void consume(int code) throws IOException { + if (getToken() != code) { + if(code == NAME) switch(op) { + case RETURN: case TYPEOF: case BREAK: case CONTINUE: case TRY: case THROW: + case ASSERT: case NULL: case TRUE: case FALSE: case IN: case IF: case ELSE: + case SWITCH: case CASE: case DEFAULT: case WHILE: case VAR: case WITH: + case CATCH: case FINALLY: + throw pe("Bad variable name; '" + codeToString[op].toLowerCase() + "' is a javascript keyword"); + } + throw pe("expected " + codeToString[code] + ", got " + (op == -1 ? "EOF" : codeToString[op])); + } } - /** parses the largest possible expression */ - public Expr parseMaximalExpr() throws IOException { return parseMaximalExpr(null, -1); } - public Expr parseMaximalExpr(Expr prefix, int minPrecedence) throws IOException { - while(true) { - if (peekToken() == -1) break; - Expr save = prefix; - prefix = parseSingleExpr(prefix, minPrecedence); - if (save == prefix) break; - if (prefix == null) throw new ParserException("parseSingleExpr() returned null"); - } - return prefix; + /** + * Parse the largest possible expression containing no operators + * of precedence below minPrecedence and append the + * bytecodes for that expression to appendTo; the + * appended bytecodes MUST grow the stack by exactly one element. + */ + private void startExpr(JSFunction appendTo, int minPrecedence) throws IOException { + int saveParserLine = parserLine; + _startExpr(appendTo, minPrecedence); + parserLine = saveParserLine; + } + private void _startExpr(JSFunction appendTo, int minPrecedence) throws IOException { + int tok = getToken(); + JSFunction b = appendTo; + + switch (tok) { + case -1: throw pe("expected expression"); + + // all of these simply push values onto the stack + case NUMBER: b.add(parserLine, LITERAL, number); break; + case STRING: b.add(parserLine, LITERAL, string); break; + case NULL: b.add(parserLine, LITERAL, null); break; + case TRUE: case FALSE: b.add(parserLine, LITERAL, JS.B(tok == TRUE)); break; + + // (.foo) syntax + case DOT: { + consume(NAME); + b.add(parserLine, TOPSCOPE); + b.add(parserLine, LITERAL, ""); + b.add(parserLine, GET); + b.add(parserLine, LITERAL, string); + b.add(parserLine, GET); + continueExpr(b, minPrecedence); + break; + } + + case LB: { + b.add(parserLine, ARRAY, JS.ZERO); // push an array onto the stack + int size0 = b.size; + int i = 0; + if (peekToken() != RB) + while(true) { // iterate over the initialization values + int size = b.size; + b.add(parserLine, LITERAL, JS.N(i++)); // push the index in the array to place it into + if (peekToken() == COMMA || peekToken() == RB) + b.add(parserLine, LITERAL, null); // for stuff like [1,,2,] + else + startExpr(b, NO_COMMA); // push the value onto the stack + b.add(parserLine, PUT); // put it into the array + b.add(parserLine, POP); // discard the value remaining on the stack + if (peekToken() == RB) break; + consume(COMMA); + } + b.set(size0 - 1, JS.N(i)); // back at the ARRAY instruction, write the size of the array + consume(RB); + break; + } + case SUB: { // negative literal (like "3 * -1") + consume(NUMBER); + b.add(parserLine, LITERAL, JS.N(number.doubleValue() * -1)); + break; + } + case LP: { // grouping (not calling) + startExpr(b, -1); + consume(RP); + break; + } + case INC: case DEC: { // prefix (not postfix) + startExpr(b, precedence[tok]); + int prev = b.size - 1; + if (b.get(prev) == GET && b.getArg(prev) != null) + b.set(prev, LITERAL, b.getArg(prev)); + else if(b.get(prev) == GET) + b.pop(); + else + throw pe("prefixed increment/decrement can only be performed on a valid assignment target"); + b.add(parserLine, GET_PRESERVE, Boolean.TRUE); + b.add(parserLine, LITERAL, JS.N(1)); + b.add(parserLine, tok == INC ? ADD : SUB, JS.N(2)); + b.add(parserLine, PUT, null); + b.add(parserLine, SWAP, null); + b.add(parserLine, POP, null); + break; + } + case BANG: case BITNOT: case TYPEOF: { + startExpr(b, precedence[tok]); + b.add(parserLine, tok); + break; + } + case LC: { // object constructor + b.add(parserLine, OBJECT, null); // put an object on the stack + if (peekToken() != RC) + while(true) { + if (peekToken() != NAME && peekToken() != STRING) + throw pe("expected NAME or STRING"); + getToken(); + b.add(parserLine, LITERAL, string); // grab the key + consume(COLON); + startExpr(b, NO_COMMA); // grab the value + b.add(parserLine, PUT); // put the value into the object + b.add(parserLine, POP); // discard the remaining value + if (peekToken() == RC) break; + consume(COMMA); + if (peekToken() == RC) break; // we permit {,,} -- I'm not sure if ECMA does + } + consume(RC); + break; + } + case NAME: { + b.add(parserLine, TOPSCOPE); + b.add(parserLine, LITERAL, string); + continueExprAfterAssignable(b,minPrecedence); + break; + } + case FUNCTION: { + consume(LP); + int numArgs = 0; + JSFunction b2 = new JSFunction(sourceName, parserLine, null); + b.add(parserLine, NEWFUNCTION, b2); + + // function prelude; arguments array is already on the stack + b2.add(parserLine, TOPSCOPE); + b2.add(parserLine, SWAP); + b2.add(parserLine, DECLARE, "arguments"); // declare arguments (equivalent to 'var arguments;') + b2.add(parserLine, SWAP); // set this.arguments and leave the value on the stack + b2.add(parserLine, PUT); + + while(peekToken() != RP) { // run through the list of argument names + numArgs++; + if (peekToken() == NAME) { + consume(NAME); // a named argument + String varName = string; + + b2.add(parserLine, DUP); // dup the args array + b2.add(parserLine, GET, JS.N(numArgs - 1)); // retrieve it from the arguments array + b2.add(parserLine, TOPSCOPE); + b2.add(parserLine, SWAP); + b2.add(parserLine, DECLARE, varName); // declare the name + b2.add(parserLine, SWAP); + b2.add(parserLine, PUT); + b2.add(parserLine, POP); // pop the value + b2.add(parserLine, POP); // pop the scope + } + if (peekToken() == RP) break; + consume(COMMA); + } + consume(RP); + + b2.numFormalArgs = numArgs; + b2.add(parserLine, POP); // pop off the arguments array + b2.add(parserLine, POP); // pop off TOPSCOPE + + if(peekToken() != LC) + throw pe("JSFunctions must have a block surrounded by curly brackets"); + + parseBlock(b2, null); // the function body + + b2.add(parserLine, LITERAL, null); // in case we "fall out the bottom", return NULL + b2.add(parserLine, RETURN); + + break; + } + default: throw pe("expected expression, found " + codeToString[tok] + ", which cannot start an expression"); + } + + // attempt to continue the expression + continueExpr(b, minPrecedence); } - public Expr parseSingleExpr(Expr prefix, int minPrecedence) throws IOException { - Expr e1 = null, e2 = null, e3 = null, head = null, tail = null, ret = null; - - int tok = peekToken(); - if (minPrecedence > 0 && - tok < precedence.length && - precedence[tok] != 0 && - (isRightAssociative[tok] ? - (precedence[tok] < minPrecedence) : - (precedence[tok] <= minPrecedence))) { - return prefix; - } - getToken(); - int curLine = line; - - // these case arms match the precedence of operators; each arm is a precedence level. - switch (tok) { - - case VAR: { - if (prefix != null) { pushBackToken(); return prefix; } - ByteCode b = new ByteCode(curLine); - b.add(b.THIS, NO_ARG); - while(true) { - consume(NAME); - String name = string; - b.add(b.DECLARE, name); - if (peekToken() == ASSIGN) { - b.add(b.LITERAL, name); - consume(ASSIGN); - b.add(b.EXPR, parseMaximalExpr()); - b.add(b.PUT, NO_ARG); - b.add(b.POP, NO_ARG); - } - if (peekToken() != COMMA) break; - consume(COMMA); - } - return b; - } - - case IN: pushBackToken(); return prefix; - - case IF: { - if (prefix != null) { pushBackToken(); return prefix; } - ByteCode b = new ByteCode(curLine); - consume(LP); - b.add(b.EXPR, parseMaximalExpr()); - consume(RP); - b.add(b.JF, new Integer(3)); - b.add(b.EXPR, parseBlock(false)); - b.add(b.JMP, new Integer(2)); - if (peekToken() != ELSE) return b.add(b.LITERAL, null); - consume(ELSE); - b.add(b.EXPR, parseBlock(false)); - return b; - } - - // FIXME: ugly hack!! - case ASSIGN_BITOR: if (tok == ASSIGN_BITOR) tok = BITOR; - case ASSIGN_BITXOR: if (tok == ASSIGN_BITXOR) tok = BITXOR; - case ASSIGN_BITAND: if (tok == ASSIGN_BITAND) tok = BITAND; - case ASSIGN_LSH: if (tok == ASSIGN_LSH) tok = LSH; - case ASSIGN_RSH: if (tok == ASSIGN_RSH) tok = RSH; - case ASSIGN_URSH: if (tok == ASSIGN_URSH) tok = URSH; - case ASSIGN_ADD: if (tok == ASSIGN_ADD) tok = ADD; - case ASSIGN_SUB: if (tok == ASSIGN_SUB) tok = SUB; - case ASSIGN_MUL: if (tok == ASSIGN_MUL) tok = MUL; - case ASSIGN_DIV: if (tok == ASSIGN_DIV) tok = DIV; - case ASSIGN_MOD: if (tok == ASSIGN_MOD) tok = MOD; - { - ByteCode b = (ByteCode)prefix; - b.set(b.size() - 1, b.GET_PRESERVE, new Boolean(true)); - b.add(b.EXPR, parseMaximalExpr(null, precedence[tok])); - b.add(tok, NO_ARG); - b.add(b.PUT, NO_ARG); - b.add(b.SWAP, NO_ARG); - b.add(b.POP, NO_ARG); - return b; - } - - case INC: case DEC: - if (prefix == null) { - // prefix - ByteCode b = (ByteCode)parseMaximalExpr(null, precedence[tok]); - b.set(b.size() - 1, tok, new Boolean(true)); - return b; - } else { - // postfix - ByteCode b = (ByteCode)prefix; - b.set(b.size() - 1, tok, new Boolean(false)); - return b; - } - - case LP: - if (prefix == null) { // grouping - ByteCode b = new ByteCode(curLine, ByteCode.EXPR, parseMaximalExpr()); - consume(RP); - return b; - } else { // invocation - ByteCode b = new ByteCode(curLine); - int i = 0; - b.add(b.EXPR, prefix); - while(peekToken() != RP) { - b.add(b.EXPR, parseMaximalExpr()); - i++; - if (peekToken() == RP) break; - consume(COMMA); - } - consume(RP); - b.add(b.CALL, new Integer(i)); - return b; - } + /** + * Assuming that a complete assignable (lvalue) has just been + * parsed and the object and key are on the stack, + * continueExprAfterAssignable will attempt to parse an + * expression that modifies the assignable. This method always + * decreases the stack depth by exactly one element. + */ + private void continueExprAfterAssignable(JSFunction b,int minPrecedence) throws IOException { + int saveParserLine = parserLine; + _continueExprAfterAssignable(b,minPrecedence); + parserLine = saveParserLine; + } + private void _continueExprAfterAssignable(JSFunction b,int minPrecedence) throws IOException { + if (b == null) throw new Error("got null b; this should never happen"); + int tok = getToken(); + if (minPrecedence != -1 && (precedence[tok] < minPrecedence || (precedence[tok] == minPrecedence && !isRightAssociative[tok]))) + // force the default case + tok = -1; + switch(tok) { + case ASSIGN_BITOR: case ASSIGN_BITXOR: case ASSIGN_BITAND: case ASSIGN_LSH: case ASSIGN_RSH: case ASSIGN_URSH: + case ASSIGN_MUL: case ASSIGN_DIV: case ASSIGN_MOD: case ASSIGN_ADD: case ASSIGN_SUB: { + b.add(parserLine, GET_PRESERVE); + startExpr(b, precedence[tok]); + int size = b.size; + if (tok == ASSIGN_ADD || tok == ASSIGN_SUB) { + b.add(parserLine, tok); + } + // tok-1 is always s/^ASSIGN_// (0 is BITOR, 1 is ASSIGN_BITOR, etc) + b.add(parserLine, tok - 1, tok-1==ADD ? JS.N(2) : null); + b.add(parserLine, PUT); + b.add(parserLine, SWAP); + b.add(parserLine, POP); + if (tok == ASSIGN_ADD || tok == ASSIGN_SUB) b.set(size, tok, JS.N(b.size - size)); + break; + } + case INC: case DEC: { // postfix + b.add(parserLine, GET_PRESERVE, Boolean.TRUE); + b.add(parserLine, LITERAL, JS.N(1)); + b.add(parserLine, tok == INC ? ADD : SUB, JS.N(2)); + b.add(parserLine, PUT, null); + b.add(parserLine, SWAP, null); + b.add(parserLine, POP, null); + b.add(parserLine, LITERAL, JS.N(1)); + b.add(parserLine, tok == INC ? SUB : ADD, null); // undo what we just did, since this is postfix + break; + } + case ASSIGN: { + startExpr(b, precedence[tok]); + b.add(parserLine, PUT); + b.add(parserLine, SWAP); + b.add(parserLine, POP); + break; + } + case LP: { + + // Method calls are implemented by doing a GET_PRESERVE + // first. If the object supports method calls, it will + // return JS.METHOD + int n = parseArgs(b, 2); + b.add(parserLine, GET_PRESERVE); + b.add(parserLine, CALLMETHOD, JS.N(n)); + break; + } + default: { + pushBackToken(); + if(b.get(b.size-1) == LITERAL && b.getArg(b.size-1) != null) + b.set(b.size-1,GET,b.getArg(b.size-1)); + else + b.add(parserLine, GET); + return; + } + } + } - case BANG: case BITNOT: case INSTANCEOF: case TYPEOF: { - if (prefix != null) { pushBackToken(); return prefix; } - ByteCode b = new ByteCode(curLine); - b.add(b.EXPR, parseMaximalExpr(null, precedence[tok])); - b.add(tok, NO_ARG); - return b; - } - case SUB: - if (prefix == null && peekToken() == NUMBER) { - getToken(); - return new ByteCode(curLine, ByteCode.LITERAL, new Double(number.doubleValue() * -1)); - } // else fall through + /** + * Assuming that a complete expression has just been parsed, + * continueExpr will attempt to extend this expression by + * parsing additional tokens and appending additional bytecodes. + * + * No operators with precedence less than minPrecedence + * will be parsed. + * + * If any bytecodes are appended, they will not alter the stack + * depth. + */ + private void continueExpr(JSFunction b, int minPrecedence) throws IOException { + int saveParserLine = parserLine; + _continueExpr(b, minPrecedence); + parserLine = saveParserLine; + } + private void _continueExpr(JSFunction b, int minPrecedence) throws IOException { + if (b == null) throw new Error("got null b; this should never happen"); + int tok = getToken(); + if (tok == -1) return; + if (minPrecedence != -1 && (precedence[tok] < minPrecedence || (precedence[tok] == minPrecedence && !isRightAssociative[tok]))) { + pushBackToken(); + return; + } + + switch (tok) { + case LP: { // invocation (not grouping) + int n = parseArgs(b, 1); + b.add(parserLine, CALL, JS.N(n)); + break; + } case BITOR: case BITXOR: case BITAND: case SHEQ: case SHNE: case LSH: - case RSH: case URSH: case ADD: case MUL: case DIV: case MOD: - case GT: case GE: case EQ: case NE: case LT: case LE: { - if (prefix == null) throw new ParserException("the " + codeToString[tok] + " token cannot start an expression"); - ByteCode b = new ByteCode(curLine); - b.add(b.EXPR, prefix); - b.add(b.EXPR, parseMaximalExpr(null, precedence[tok])); - b.add(tok, NO_ARG); - return b; - } - - // includes short-circuit logic - case OR: case AND: { - if (prefix == null) throw new ParserException("the " + codeToString[tok] + " token cannot start an expression"); - ByteCode b = new ByteCode(curLine); - b.add(b.LITERAL, tok == AND ? new Boolean(false) : new Boolean(true)); - b.add(b.EXPR, prefix); - b.add(tok == AND ? b.JF : b.JT, new Integer(3)); - b.add(b.POP, NO_ARG); - b.add(b.EXPR, parseMaximalExpr(null, precedence[tok])); - return b; - } - - case ASSIGN: { - if (prefix == null) throw new ParserException("the " + codeToString[tok] + " token cannot start an expression"); - ByteCode b = new ByteCode(curLine); - b.add(b.THIS, NO_ARG); - b.add(b.LITERAL, prefix.string); // FIXME, this is ass-ugly - b.add(b.EXPR, parseMaximalExpr(null, precedence[ASSIGN])); - b.add(b.PUT, NO_ARG); - b.add(b.SWAP, NO_ARG); - b.add(b.POP, NO_ARG); - return b; - } - - case WITH: throw new ParserException("XWT does not allow the WITH keyword"); - case VOID: case RESERVED: throw new ParserException("reserved word that you shouldn't be using"); - - case NUMBER: - if (prefix != null) { pushBackToken(); return prefix; } - return new ByteCode(curLine, ByteCode.LITERAL, number); - - case STRING: - if (prefix != null) { pushBackToken(); return prefix; } - return new ByteCode(curLine, ByteCode.LITERAL, string); - - case NULL: case TRUE: case FALSE: case NOP: - if (prefix != null) { pushBackToken(); return prefix; } - return new ByteCode(curLine, ByteCode.LITERAL, (tok == NULL || tok == NOP) ? null : new Boolean(tok == TRUE)); - - case COMMA: pushBackToken(); return prefix; - - case THIS: - if (prefix != null) { pushBackToken(); return prefix; } - return new ByteCode(curLine, ByteCode.THIS, NO_ARG); - - // potential lvalues - - case NAME: { - if (prefix != null) { pushBackToken(); return prefix; } - String name = string; - ByteCode b = new ByteCode(curLine); - if (peekToken() == ASSIGN) { - consume(ASSIGN); - b.add(ByteCode.THIS, NO_ARG); - b.add(ByteCode.LITERAL, name); - b.add(ByteCode.EXPR, parseMaximalExpr(null, minPrecedence)); - b.add(ByteCode.PUT, NO_ARG); - b.add(ByteCode.SWAP, NO_ARG); - b.add(ByteCode.POP, NO_ARG); - return b; - } else { - b.add(ByteCode.THIS, NO_ARG); - b.add(ByteCode.LITERAL, name); - b.add(ByteCode.GET, NO_ARG); - return parseMaximalExpr(b, minPrecedence); - } - } - - case DOT: { - consume(NAME); - String target = string; - ByteCode b = new ByteCode(curLine); - b.add(b.EXPR, prefix); - if (peekToken() == ASSIGN) { - consume(ASSIGN); - Expr val = parseMaximalExpr(); - b.add(b.LITERAL, target); - b.add(b.EXPR, val); - b.add(b.PUT, NO_ARG); - b.add(b.SWAP, NO_ARG); - b.add(b.POP, NO_ARG); - } else { - b.add(b.LITERAL, target); - b.add(b.GET, NO_ARG); - } - return b; - } - - case LB: { - ByteCode b = new ByteCode(curLine); - if (prefix == null) { - b.add(b.ARRAY, new Integer(0)); - int i = 0; - while(true) { - Expr e = parseMaximalExpr(); - if (e == null && peekToken() == RB) { consume(RB); return b; } - b.add(b.LITERAL, new Integer(i++)); - if (e == null) b.add(b.LITERAL, null); - else b.add(b.EXPR, e); - b.add(b.PUT, NO_ARG); - b.add(b.POP, NO_ARG); - if (peekToken() == RB) { consume(RB); return b; } - consume(COMMA); - } - } else { - b.add(b.EXPR, prefix); - b.add(b.EXPR, parseMaximalExpr()); - consume(RB); - if (peekToken() == ASSIGN) { - consume(ASSIGN); - b.add(b.EXPR, parseMaximalExpr()); - b.add(b.PUT, NO_ARG); - b.add(b.SWAP, NO_ARG); - b.add(b.POP, NO_ARG); - } else { - b.add(b.GET, NO_ARG); - } - return b; - } - } - - case LC: { - if (prefix != null) { pushBackToken(); return prefix; } - ByteCode b = new ByteCode(curLine); - b.add(b.OBJECT, null); - if (peekToken() == RC) { consume(RC); return b; } - while(true) { - if (peekToken() != NAME && peekToken() != STRING) throw new Error("expected NAME or STRING"); - getToken(); - b.add(b.LITERAL, string); - consume(COLON); - b.add(b.EXPR, parseMaximalExpr()); - b.add(b.PUT, NO_ARG); - b.add(b.POP, NO_ARG); - if (peekToken() == RC) { consume(RC); return b; } - consume(COMMA); - if (peekToken() == RC) { consume(RC); return b; } - } - } - - case HOOK: { - ByteCode b = new ByteCode(curLine); - b.add(b.EXPR, prefix); - b.add(b.JF, new Integer(3)); - b.add(b.EXPR, parseMaximalExpr()); - b.add(b.JMP, new Integer(2)); - consume(COLON); - b.add(b.EXPR, parseMaximalExpr()); - return b; - } - - case FUNCTION: { - if (prefix != null) { pushBackToken(); return prefix; } - consume(LP); - ByteCode b = new ByteCode(curLine); - int numArgs = 0; - b.add(b.THIS, NO_ARG); - b.add(b.SWAP, NO_ARG); - b.add(b.LITERAL, "arguments"); - b.add(b.LITERAL, "arguments"); - b.add(b.DECLARE, NO_ARG); - b.add(b.SWAP, NO_ARG); - b.add(b.PUT, NO_ARG); - b.add(b.SWAP, NO_ARG); - b.add(b.POP, NO_ARG); - if (peekToken() == RP) consume(RP); - else while(true) { - if (peekToken() == COMMA) { - consume(COMMA); - } else { - consume(NAME); - - // declare the name - b.add(b.LITERAL, string); - b.add(b.DECLARE, NO_ARG); - - // retrieve it from the arguments array - b.add(b.LITERAL, new Integer(numArgs)); - b.add(b.GET_PRESERVE, NO_ARG); - b.add(b.SWAP, NO_ARG); - b.add(b.POP, NO_ARG); - - // put it to the current scope - b.add(b.THIS, NO_ARG); - b.add(b.SWAP, NO_ARG); - b.add(b.LITERAL, string); - b.add(b.SWAP, NO_ARG); - b.add(b.PUT, NO_ARG); - - // clean the stack - b.add(b.POP, NO_ARG); - b.add(b.POP, NO_ARG); - - if (peekToken() == RP) { consume(RP); break; } - consume(COMMA); - } - numArgs++; - } - // pop off the arguments array - b.add(b.POP, NO_ARG); - parseBlock(true, b); - return new ByteCode(curLine, b.FUNCTION, b); - } - - case WHILE: { - if (prefix != null) { pushBackToken(); return prefix; } - consume(LP); - ByteCode r = new ByteCode(curLine); - ByteCode loop = new ByteCode(curLine); - r.add(loop.LOOP, loop); - r.add(r.LITERAL, null); - - loop.add(loop.EXPR, parseMaximalExpr()); - loop.add(loop.JT, new Integer(2)); - loop.add(Lexer.BREAK, NO_ARG); - consume(RP); - parseBlock(false, loop); - - // if we fall out of the end, definately continue - loop.add(CONTINUE, NO_ARG); - return r; - } - - case SWITCH: { - if (prefix != null) { pushBackToken(); return prefix; } - consume(LP); - ByteCode r = new ByteCode(curLine); - ByteCode loop = new ByteCode(curLine); - r.add(loop.LOOP, loop); - r.add(r.LITERAL, null); - loop.add(loop.EXPR, parseMaximalExpr()); - consume(RP); - consume(LC); - while(true) { - Expr caseExpr; - tok = getToken(); - if (tok == CASE) { - loop.add(loop.DUP, NO_ARG); - loop.add(loop.EXPR, parseMaximalExpr()); - loop.add(EQ, NO_ARG); - loop.add(loop.JF, new Integer(2)); - } else if (tok != DEFAULT) throw new ParserException("expected CASE or DEFAULT"); - consume(COLON); - ByteCode b = new ByteCode(curLine); - while(peekToken() != CASE && peekToken() != DEFAULT && peekToken() != RC) { - if ((e1 = parseBlock(false)) == null) break; - b.add(b.EXPR, e1); - } - loop.add(loop.EXPR, b); - if (peekToken() == RC) { - consume(RC); - r.add(BREAK, NO_ARG); - return r; - } - } - } - - case DO: { - if (prefix != null) { pushBackToken(); return prefix; } - ByteCode r = new ByteCode(curLine); - ByteCode loop = new ByteCode(curLine); - r.add(loop.LOOP, loop); - r.add(r.LITERAL, null); - - parseBlock(false, loop); - consume(WHILE); - consume(LP); - loop.add(loop.EXPR, parseMaximalExpr()); - loop.add(loop.JT, new Integer(2)); - loop.add(Lexer.BREAK, NO_ARG); - loop.add(Lexer.CONTINUE, NO_ARG); - consume(RP); - consume(SEMI); - return r; - } - - // Needs break // - - case FOR: - if (prefix != null) { pushBackToken(); return prefix; } - if (getToken() != LP) throw new ParserException("expected left paren"); - e1 = parseMaximalExpr(); - if (peekToken() == IN) { - getToken(); - e2 = parseMaximalExpr(); - if (getToken() != RP) throw new ParserException("expected right paren"); - return new Expr(curLine, FOR, new Expr(curLine, IN, e1.left, e2), parseBlock(false)); - - } else { - Expr initExpr = e1; - if (initExpr == null) initExpr = new Expr(curLine, NULL); - consume(SEMI); - Expr whileExpr = parseMaximalExpr(); - consume(SEMI); - Expr incExpr = parseMaximalExpr(); - consume(RP); - Expr body = parseBlock(false); - Expr loop = new Expr(curLine, WHILE, whileExpr, body); - ExprList list = new ExprList(curLine, LC); - list.add(initExpr); - ExprList list2 = new ExprList(curLine, WHILE); - list.add(list2); - list2.add(whileExpr); - list2.add(body); - list2.add(incExpr); - return list; - } - - case TRY: { - // We deliberately allow you to omit braces in catch{}/finally{} if they are single statements... - if (prefix != null) { pushBackToken(); return prefix; } - Expr tryBlock = parseBlock(true); - - tok = peekToken(); - ExprList list = new ExprList(curLine, TRY); - if (tok == CATCH) { - getToken(); - if (getToken() != LP) throw new ParserException("expected ("); - if (getToken() != NAME) throw new ParserException("expected name"); - Expr name = new Expr(curLine, NAME, string); - if (getToken() != RP) throw new ParserException("expected )"); - list.add(new Expr(curLine, CATCH, name, parseBlock(false))); - tok = peekToken(); - } - if (tok == FINALLY) { - getToken(); - list.add(new Expr(curLine, FINALLY, parseBlock(false))); - } - - if (list.size() == 0) throw new ParserException("try without catch or finally"); - return new Expr(curLine, TRY, tryBlock, list); - } - - default: - pushBackToken(); - return prefix; - } + case RSH: case URSH: case MUL: case DIV: case MOD: + case GT: case GE: case EQ: case NE: case LT: case LE: case SUB: { + startExpr(b, precedence[tok]); + b.add(parserLine, tok); + break; + } + case ADD: { + int count=1; + int nextTok; + do { + startExpr(b,precedence[tok]); + count++; + nextTok = getToken(); + } while(nextTok == tok); + pushBackToken(); + b.add(parserLine, tok, JS.N(count)); + break; + } + case OR: case AND: { + b.add(parserLine, tok == AND ? b.JF : b.JT, JS.ZERO); // test to see if we can short-circuit + int size = b.size; + startExpr(b, precedence[tok]); // otherwise check the second value + b.add(parserLine, JMP, JS.N(2)); // leave the second value on the stack and jump to the end + b.add(parserLine, LITERAL, tok == AND ? + JS.B(false) : JS.B(true)); // target of the short-circuit jump is here + b.set(size - 1, JS.N(b.size - size)); // write the target of the short-circuit jump + break; + } + case DOT: { + // support foo..bar syntax for foo[""].bar + if (peekToken() == DOT) { + string = ""; + } else { + consume(NAME); + } + b.add(parserLine, LITERAL, string); + continueExprAfterAssignable(b,minPrecedence); + break; + } + case LB: { // subscripting (not array constructor) + startExpr(b, -1); + consume(RB); + continueExprAfterAssignable(b,minPrecedence); + break; + } + case HOOK: { + b.add(parserLine, JF, JS.ZERO); // jump to the if-false expression + int size = b.size; + startExpr(b, minPrecedence); // write the if-true expression + b.add(parserLine, JMP, JS.ZERO); // if true, jump *over* the if-false expression + b.set(size - 1, JS.N(b.size - size + 1)); // now we know where the target of the jump is + consume(COLON); + size = b.size; + startExpr(b, minPrecedence); // write the if-false expression + b.set(size - 1, JS.N(b.size - size + 1)); // this is the end; jump to here + break; + } + case COMMA: { + // pop the result of the previous expression, it is ignored + b.add(parserLine,POP); + startExpr(b,-1); + break; + } + default: { + pushBackToken(); + return; + } + } + + continueExpr(b, minPrecedence); // try to continue the expression } - // Expr ////////////////////////////////////////////////////////////////////// - - public static final Object NO_ARG = new Object(); - - class ByteCode extends Expr { - - // This class interprets a small forthlike bytecode language - // we use to represent JavaScript. It's a stack machine. - - // Each instruction is an opcode and a literal. Any operation - // that accesses the top of the stack and does not have a - // mandatory literal can be performed on a literal instead of - // the top of the stack. - - // opcodes: - public static final byte arithmetic = -1; // -- arithmetic operators from parser - public static final byte LITERAL = -2; // < String | Number | null > -- push a literal onto the stack - public static final byte ARRAY = -3; // < size > -- create a new array of size - public static final byte OBJECT = -4; // -- push an empty object onto the stack - public static final byte FUNCTION = -5; // < bytecode_block > -- push a new instance of a function with the given bytecode - public static final byte DECLARE = -6; // < name > -- declare in the current scope - public static final byte THIS = -7; // -- push the topmost non-transparent scope onto the stack - public static final byte GET = -8; // -- get stack[0] from stack[1] - public static final byte GET_PRESERVE = -80; // -- get stack[0] from stack[1] - public static final byte PUT = -9; // -- put stack[1] to key stack[0] on stack[2]; leaves object on the stack - public static final byte THROW = -10; // -- throw topmost element - public static final byte RETURN = -11; // -- return the topmost value on the stack - public static final byte ASSERT = -12; // -- fail if topmost element is not true - public static final byte JT = -13; // < relative_address > -- pop the stack; if true, jump to - public static final byte JF = -21; // < relative_address > -- pop the stack; if false, jump to - public static final byte JMP = -22; // < relative_address > -- jump to - static public final byte POP = -14; // -- discard the top element on the stack - - public static final byte CALL = -15; // < numargs > -- call stack[0] with the topmost values as arguments - public static final byte TRY = -16; // < bytecode_block > -- run the block pointed to; returns with thrown exn on top of stack - - public static final byte INSTANCEOF = -17; // -- ?? - public static final byte TYPEOF = -18; // -- - - public static final byte FOR__IN = -19; // -- ?? - public static final byte EXPR = -20; // -- transitional - public static final byte SWAP = -23; // -- transitional - public static final byte SCOPE = -30; // -- transitional - public static final byte LOOP = -40; // -- transitional - public static final byte DUP = -50; // -- transitional - - int[] op = new int[10]; - Object[] arg = new Object[10]; - int size = 0; - - public ByteCode(int line) { super(line, "foo"); } - public ByteCode(int line, int op_, Object arg_) { this(line); add(op_, arg_); } - - public int size() { return size; } - public void set(int pos, int op_, Object arg_) { op[pos] = op_; arg[pos] = arg_; } - public ByteCode add(int op_, Object arg_) { - if (size == op.length - 1) { - int[] op2 = new int[op.length * 2]; System.arraycopy(op, 0, op2, 0, op.length); op = op2; - Object[] arg2 = new Object[op.length * 2]; System.arraycopy(arg, 0, arg2, 0, arg.length); arg = arg2; - } - op[size] = op_; - arg[size] = arg_; - size++; - return this; - } - - public Object eval(final JS.Scope s) throws ControlTransferException, JS.Exn { - return eval(s, new Parser.Thread()); - } - public Object eval(final JS.Scope s, Parser.Thread t) throws ControlTransferException { - for(int i=0; i= 0; j--) arguments.setElementAt(t.pop(), j); - JS.Function f = (JS.Function)t.pop(); - if (f == null) throw new JS.Exn(new EvaluatorException("attempted to call null")); - t.push(f.call(arguments)); - break; - - case FUNCTION: { - final ByteCode myBytes = (ByteCode)arg[i]; - t.push(new JS.Function() { - public String toString() { return sourceName + ":" + line; } - public String getSourceName() throws JS.Exn { return sourceName; } - public Object _call(final JS.Array args) throws JS.Exn { - Function save = JS.getCurrentFunction(); - JS.currentFunction.put(java.lang.Thread.currentThread(), this); - JS.Scope scope = new JS.Scope(s) { - // FIXME - public String getSourceName() { return sourceName; } - public Object get(Object key) throws JS.Exn { - if (key.equals("trapee")) return org.xwt.Trap.currentTrapee(); - else if (key.equals("cascade")) return org.xwt.Trap.cascadeFunction; - return super.get(key); - } - }; - Parser.Thread t0 = new Parser.Thread(); - t0.push(args); - try { - return myBytes.eval(scope, t0); - } catch (ReturnException r) { - return r.retval; - } catch (ControlTransferException c) { - throw new EvaluatorException("error, ControlTransferException tried to leave a function: " + c); - } finally { - if (save == null) JS.currentFunction.remove(java.lang.Thread.currentThread()); - else JS.currentFunction.put(java.lang.Thread.currentThread(), save); - } - } - }); - break; - } - - case RETURN: break; - case TRY: break; - case INSTANCEOF: break; - case TYPEOF: break; - case FOR__IN: break; - - case Lexer.BITNOT: t.push(new Long(~toLong(t.pop()))); break; - case Lexer.BANG: t.push(new Boolean(!toBoolean(t.pop()))); break; - - case Lexer.INC: case Lexer.DEC: { - boolean isPrefix = toBoolean(arg[i]); - Object key = t.pop(); - JS obj = (JS)t.pop(); - Number num = toNumber(obj.get(key)); - Number val = new Double(op[i] == Lexer.INC ? num.doubleValue() + 1.0 : num.doubleValue() - 1.0); - obj.put(key, val); - t.push(isPrefix ? val : num); - break; - } - - default: { - Object right = t.pop(); - Object left = t.pop(); - switch(op[i]) { - - case Lexer.BITOR: t.push(new Long(toLong(left) | toLong(right))); break; - case Lexer.BITXOR: t.push(new Long(toLong(left) ^ toLong(right))); break; - case Lexer.BITAND: t.push(new Long(toLong(left) & toLong(right))); break; - - case Lexer.ADD: { - Object l = left; - Object r = right; - if (l instanceof String || r instanceof String) { - if (l == null) l = "null"; - if (r == null) r = "null"; - if (l instanceof Number && ((Number)l).doubleValue() == ((Number)l).longValue()) - l = new Long(((Number)l).longValue()); - if (r instanceof Number && ((Number)r).doubleValue() == ((Number)r).longValue()) - r = new Long(((Number)r).longValue()); - t.push(l.toString() + r.toString()); break; - } - t.push(new Double(toDouble(l) + toDouble(r))); break; - } - - case Lexer.SUB: t.push(new Double(toDouble(left) - toDouble(right))); break; - case Lexer.MUL: t.push(new Double(toDouble(left) * toDouble(right))); break; - case Lexer.DIV: t.push(new Double(toDouble(left) / toDouble(right))); break; - case Lexer.MOD: t.push(new Double(toDouble(left) % toDouble(right))); break; - - case Lexer.LSH: t.push(new Long(toLong(left) << toLong(right))); break; - case Lexer.RSH: t.push(new Long(toLong(left) >> toLong(right))); break; - case Lexer.URSH: t.push(new Long(toLong(left) >>> toLong(right))); break; - - // FIXME: these need to work on strings - case Lexer.LT: t.push(toDouble(left) < toDouble(right) ? Boolean.TRUE : Boolean.FALSE); break; - case Lexer.LE: t.push(toDouble(left) <= toDouble(right) ? Boolean.TRUE : Boolean.FALSE); break; - case Lexer.GT: t.push(toDouble(left) > toDouble(right) ? Boolean.TRUE : Boolean.FALSE); break; - case Lexer.GE: t.push(toDouble(left) >= toDouble(right) ? Boolean.TRUE : Boolean.FALSE); break; - - case Lexer.EQ: - case Lexer.NE: { - // FIXME: should use Javascript coercion-equality rules - Object l = left; - Object r = right; - boolean ret; - if (l == null) { Object tmp = r; r = l; l = tmp; } - if (l == null && r == null) ret = true; - else if (l instanceof Boolean) ret = new Boolean(toBoolean(r)).equals(l); - else if (l instanceof Number) ret = toNumber(r).doubleValue() == toNumber(l).doubleValue(); - else if (l instanceof String) ret = r != null && l.equals(r.toString()); - else ret = l.equals(r); - t.push(new Boolean(op[i] == Lexer.EQ ? ret : !ret)); break; - } - - default: throw new Error("unknown opcode " + op[i]); - } } - } - if (t.size() != 1) { - throw new EvaluatorException("eval() terminated with " + t.size() + " elements on the stack; one expected"); - } - return t.pop(); - } - - public Object doGet(final Object o, final Object v) { - if (o == null) throw new EvaluatorException("tried to get property \"" + v + "\" from the null value"); - if (o instanceof String) { - if (v.equals("length")) return new Integer(((String)o).length()); - else if (v.equals("substring")) return new JS.Function() { - public Object _call(JS.Array args) { - if (args.length() == 1) return ((String)o).substring(toNumber(args.elementAt(0)).intValue()); - else if (args.length() == 2) return ((String)o).substring(toNumber(args.elementAt(0)).intValue(), - toNumber(args.elementAt(1)).intValue()); - else throw new Error("String.substring() can only take one or two arguments"); - } - }; - else if (v.equals("toLowerCase")) return new JS.Function() { - public Object _call(JS.Array args) { - return ((String)o).toLowerCase(); - } }; - else if (v.equals("toUpperCase")) return new JS.Function() { - public Object _call(JS.Array args) { - return ((String)o).toString().toUpperCase(); - } }; - else if (v.equals("charAt")) return new JS.Function() { - public Object _call(JS.Array args) { - return ((String)o).charAt(toNumber(args.elementAt(0)).intValue()) + ""; - } }; - else if (v.equals("lastIndexOf")) return new JS.Function() { - public Object _call(JS.Array args) { - if (args.length() != 1) return null; - return new Integer(((String)o).lastIndexOf(args.elementAt(0).toString())); - } }; - else if (v.equals("indexOf")) return new JS.Function() { - public Object _call(JS.Array args) { - if (args.length() != 1) return null; - return new Integer(((String)o).indexOf(args.elementAt(0).toString())); - } }; - throw new Error("Not Implemented: propery " + v + " on String objects"); - } else if (o instanceof Boolean) { - throw new Error("Not Implemented: properties on Boolean objects"); - } else if (o instanceof Number) { - Log.log(this, "Not Implemented: properties on Number objects"); - return null; - //throw new Error("Not Implemented: properties on Number objects"); - } else if (o instanceof JS) { - return ((JS)o).get(v); - } - return null; - } + // parse a set of comma separated function arguments, assume LP has already been consumed + // if swap is true, (because the function is already on the stack) we will SWAP after each argument to keep it on top + private int parseArgs(JSFunction b, int pushdown) throws IOException { + int i = 0; + while(peekToken() != RP) { + i++; + if (peekToken() != COMMA) { + startExpr(b, NO_COMMA); + b.add(parserLine, SWAP, JS.N(pushdown)); + if (peekToken() == RP) break; + } + consume(COMMA); + } + consume(RP); + return i; } - - class Thread { - public Object[] os = new Object[256]; - private int size = 0; - public void push(Object o) { - os[size++] = o; - } - public Object pop() { - return os[--size]; - } - public Object peek() { return os[size - 1]; } - public void swap() { Object temp = os[size - 1]; os[size - 1] = os[size - 2]; os[size - 2] = temp; } - public int size() { return size; } + + /** Parse a block of statements which must be surrounded by LC..RC. */ + void parseBlock(JSFunction b) throws IOException { parseBlock(b, null); } + void parseBlock(JSFunction b, String label) throws IOException { + int saveParserLine = parserLine; + _parseBlock(b, label); + parserLine = saveParserLine; } - - class ExprList extends Expr { - Vec v = new Vec(); - public ExprList(int curLine, int code) { super(curLine, code); } - public void add(Expr e) { v.addElement(e); } - public int numExprs() { return v.size(); } - public int size() { return v.size(); } - public Expr elementAt(int i) { return (Expr)v.elementAt(i); } - public Object eval(final JS.Scope s) throws ControlTransferException, JS.Exn { - switch(code) { - case LC: { - // Block - JS.Scope scope = new JS.Scope(s); - for(int i=0; i