1 // Copyright 2003 Adam Megacz, see the COPYING file for licensing [GPL]
4 // FIXME: line number accuracy
5 // FIXME: scope management
11 * Parses a stream of lexed tokens into a tree of CompiledFunction's.
14 * There are three kinds of things we parse: blocks, statements,
15 * expressions. Expressions are a special type of statement that
16 * evaluates to a value (for example, "break" is not an expression,
17 * but "3+2" is). AssignmentTargets are a special kind of expression
18 * that can be 'put' to (for example, "foo()" is not an
19 * assignmentTarget, but "foo[7]" is). FIXME.
22 * Technically it would be a better design for this class to build an
23 * intermediate parse tree and use that to emit bytecode. Here's the
26 * Advantages of building a parse tree:
27 * - easier to apply optimizations
28 * - would let us handle more sophisticated languages than JavaScript
30 * Advantages of leaving out the parse tree
31 * - faster compilation
32 * - less load on the garbage collector
33 * - much simpler code, easier to understand
36 * Fortunately JS is such a simple language that we can get away with
37 * the half-assed approach and still produce a working, complete
40 * The bytecode language emitted doesn't really cause any appreciable
41 * semantic loss, and is itself a parseable language very similar to
42 * Forth or a postfix variant of LISP. This means that the bytecode
43 * can be transformed into a parse tree, which can be manipulated.
44 * So if we ever want to add an optimizer, it could easily be done by
45 * producing a parse tree from the bytecode, optimizing that tree,
46 * and then re-emitting the bytecode. The parse tree node class
47 * would also be much simpler since the bytecode language has so few
50 * Actually, the above paragraph is slightly inaccurate -- there are
51 * places where we push a value and then perform an arbitrary number
52 * of operations using it before popping it; this doesn't parse well.
53 * But these cases are clearly marked and easy to change if we do
54 * need to move to a parse tree format.
56 class Parser extends Lexer implements ByteCodes {
59 // Constructors //////////////////////////////////////////////////////
61 public Parser(Reader r, String sourceName, int line) throws IOException { super(r, sourceName, line); }
64 public static void main(String[] s) throws Exception {
65 CompiledFunction block = new CompiledFunction("stdin", 0, new InputStreamReader(System.in), null);
66 if (block == null) return;
67 System.out.println(block);
71 // Statics ////////////////////////////////////////////////////////////
73 static byte[] precedence = new byte[MAX_TOKEN + 1];
74 static boolean[] isRightAssociative = new boolean[MAX_TOKEN + 1];
76 isRightAssociative[ASSIGN] = true;
78 precedence[ASSIGN] = 1;
80 precedence[COMMA] = 3;
81 precedence[OR] = precedence[AND] = 4;
82 precedence[GT] = precedence[GE] = 5;
83 precedence[BITOR] = 6;
84 precedence[BITXOR] = 7;
85 precedence[BITAND] = 8;
86 precedence[EQ] = precedence[NE] = 9;
87 precedence[LT] = precedence[LE] = 10;
88 precedence[SHEQ] = precedence[SHNE] = 11;
89 precedence[LSH] = precedence[RSH] = precedence[URSH] = 12;
90 precedence[ADD] = precedence[SUB] = 13;
91 precedence[MUL] = precedence[DIV] = precedence[MOD] = 14;
92 precedence[BITNOT] = 15;
93 precedence[INC] = precedence[DEC] = 16;
100 // Parsing Logic /////////////////////////////////////////////////////////
102 /** gets a token and throws an exception if it is not <tt>code</tt> */
103 private void consume(int code) throws IOException {
104 if (getToken() != code) throw new ParserException("expected " + codeToString[code] + ", got " + (op == -1 ? "EOF" : codeToString[op]));
108 * Parse the largest possible expression containing no operators
109 * of precedence below <tt>minPrecedence</tt> and append the
110 * bytecodes for that expression to <tt>appendTo</tt>; the
111 * appended bytecodes MUST grow the stack by exactly one element.
113 private void startExpr(CompiledFunction appendTo, int minPrecedence) throws IOException {
114 int tok = getToken();
115 CompiledFunction b = appendTo;
118 case -1: throw new ParserException("expected expression");
120 // all of these simply push values onto the stack
121 case NUMBER: b.add(line, LITERAL, number); break;
122 case STRING: b.add(line, LITERAL, string); break;
123 case THIS: b.add(line, TOPSCOPE, null); break;
124 case NULL: b.add(line, LITERAL, null); break;
125 case TRUE: case FALSE: b.add(line, LITERAL, new Boolean(tok == TRUE)); break;
128 b.add(line, ARRAY, new Integer(0)); // push an array onto the stack
129 int size0 = b.size();
131 if (peekToken() != RB)
132 while(true) { // iterate over the initialization values
134 if (peekToken() == COMMA || peekToken() == RB)
135 b.add(line, LITERAL, null); // for stuff like [1,,2,]
137 startExpr(b, -1); // push the value onto the stack
138 b.add(line, LITERAL, new Integer(i++)); // push the index in the array to place it into
139 b.add(line, PUT); // put it into the array
140 b.add(line, POP); // discard the value remaining on the stack
141 if (peekToken() == RB) break;
144 b.set(size0 - 1, new Integer(i)); // back at the ARRAY instruction, write the size of the array
148 case SUB: { // negative literal (like "3 * -1")
150 b.add(line, LITERAL, new Double(number.doubleValue() * -1));
153 case LP: { // grouping (not calling)
158 case INC: case DEC: { // prefix (not postfix)
159 startExpr(b, precedence[tok]);
160 b.set(b.size() - 1, tok, new Boolean(true)); // FIXME, ugly; need startAssignTarget
163 case BANG: case BITNOT: case TYPEOF: {
164 startExpr(b, precedence[tok]);
168 case LC: { // object constructor
169 b.add(line, OBJECT, null); // put an object on the stack
170 if (peekToken() != RC)
172 if (peekToken() != NAME && peekToken() != STRING)
173 throw new ParserException("expected NAME or STRING");
175 b.add(line, LITERAL, string); // grab the key
177 startExpr(b, -1); // grab the value
178 b.add(line, PUT); // put the value into the object
179 b.add(line, POP); // discard the remaining value
180 if (peekToken() == RC) break;
182 if (peekToken() == RC) break; // we permit {,,} -- I'm not sure if ECMA does
187 case NAME: { // FIXME; this is an lvalue
188 String name = string;
189 if (peekToken() == ASSIGN) {
191 b.add(line, TOPSCOPE);
192 b.add(line, LITERAL, name);
193 startExpr(b, minPrecedence);
198 b.add(line, TOPSCOPE);
199 b.add(line, LITERAL, name);
207 CompiledFunction b2 = new CompiledFunction(sourceName, line, null);
208 b.add(line, NEWFUNCTION, b2);
210 // function prelude; arguments array is already on the stack
211 b2.add(line, TOPSCOPE); // push the scope onto the stack
212 b2.add(line, SWAP); // swap 'this' and 'arguments'
214 b2.add(line, LITERAL, "arguments"); // declare arguments (equivalent to 'var arguments;')
215 b2.add(line, DECLARE);
217 b2.add(line, LITERAL, "arguments"); // set this.arguments and leave the value on the stack
223 while(peekToken() != RP) { // run through the list of argument names
224 if (peekToken() == NAME) {
225 consume(NAME); // a named argument
227 b2.add(line, LITERAL, string); // declare the name
228 b2.add(line, DECLARE);
230 b2.add(line, LITERAL, new Integer(numArgs)); // retrieve it from the arguments array
231 b2.add(line, GET_PRESERVE);
235 b2.add(line, TOPSCOPE); // put it to the current scope
237 b2.add(line, LITERAL, string);
241 b2.add(line, POP); // clean the stack
244 if (peekToken() == RP) break;
250 b2.add(line, POP); // pop off the arguments array
252 parseStatement(b2, null); // the function body
254 b2.add(line, LITERAL, null); // in case we "fall out the bottom", return NULL
255 b2.add(line, RETURN);
259 default: throw new ParserException("expected expression, found " + codeToString[tok] + ", which cannot start an expression");
262 // attempt to continue the expression
263 continueExpr(b, minPrecedence);
267 * Assuming that a complete expression has just been parsed,
268 * <tt>continueExpr</tt> will attempt to extend this expression by
269 * parsing additional tokens and appending additional bytecodes.
271 * No operators with precedence less than <tt>minPrecedence</tt>
274 * If any bytecodes are appended, they will not alter the stack
277 private void continueExpr(CompiledFunction b, int minPrecedence) throws IOException {
278 if (b == null) throw new Error("got null b; this should never happen");
279 int tok = getToken();
280 if (tok == -1) return;
281 if (minPrecedence != -1 && (precedence[tok] < minPrecedence || (precedence[tok] == minPrecedence && !isRightAssociative[tok]))) {
287 case ASSIGN_BITOR: case ASSIGN_BITXOR: case ASSIGN_BITAND: case ASSIGN_LSH: case ASSIGN_RSH: case ASSIGN_URSH:
288 case ASSIGN_ADD: case ASSIGN_SUB: case ASSIGN_MUL: case ASSIGN_DIV: case ASSIGN_MOD: {
289 b.set(b.size() - 1, b.GET_PRESERVE, new Boolean(true)); // FIXME should use AssignTarget
290 startExpr(b, precedence[tok - 1]);
291 b.add(line, tok - 1);
297 case INC: case DEC: { // postfix
298 b.set(b.size() - 1, tok, new Boolean(false)); // FIXME use assignmenttarget
301 case LP: { // invocation (not grouping)
303 while(peekToken() != RP) {
305 if (peekToken() != COMMA) {
307 if (peekToken() == RP) break;
312 b.add(line, CALL, new Integer(i));
315 case BITOR: case BITXOR: case BITAND: case SHEQ: case SHNE: case LSH:
316 case RSH: case URSH: case ADD: case MUL: case DIV: case MOD:
317 case GT: case GE: case EQ: case NE: case LT: case LE: case SUB: {
318 startExpr(b, precedence[tok]);
323 b.add(line, tok == AND ? b.JF : b.JT, new Integer(0)); // test to see if we can short-circuit
325 startExpr(b, precedence[tok]); // otherwise check the second value
326 b.add(line, JMP, new Integer(2)); // leave the second value on the stack and jump to the end
327 b.add(line, LITERAL, tok == AND ? new Boolean(false) : new Boolean(true)); // target of the short-circuit jump is here
328 b.set(size - 1, new Integer(b.size() - size)); // write the target of the short-circuit jump
331 case DOT: { // FIXME, assigntarget
333 String target = string;
334 if (peekToken() == ASSIGN) {
336 b.add(line, LITERAL, target);
342 b.add(line, LITERAL, target);
347 case LB: { // subscripting (not array constructor)
350 if (peekToken() == ASSIGN) { // FIXME: assigntarget
362 b.add(line, JF, new Integer(0)); // jump to the if-false expression
364 startExpr(b, -1); // write the if-true expression
365 b.add(line, JMP, new Integer(0)); // if true, jump *over* the if-false expression
366 b.set(size - 1, new Integer(b.size() - size + 1)); // now we know where the target of the jump is
369 startExpr(b, -1); // write the if-false expression
370 b.set(size - 1, new Integer(b.size() - size + 1)); // this is the end; jump to here
379 continueExpr(b, minPrecedence); // try to continue the expression
382 /** Parse a block of statements which must be surrounded by LC..RC. */
383 void parseBlock(CompiledFunction b) throws IOException { parseBlock(b, null); }
384 void parseBlock(CompiledFunction b, String label) throws IOException {
385 if (peekToken() == -1) return;
386 else if (peekToken() != LC) parseStatement(b, null);
389 while(peekToken() != RC && peekToken() != -1) parseStatement(b, null);
394 /** Parse a single statement, consuming the RC or SEMI which terminates it. */
395 void parseStatement(CompiledFunction b, String label) throws IOException {
396 int tok = peekToken();
397 if (tok == -1) return;
398 switch(tok = getToken()) {
400 case THROW: case ASSERT: case RETURN: {
401 if (tok == RETURN && peekToken() == SEMI)
402 b.add(line, LITERAL, null);
409 case BREAK: case CONTINUE: {
410 if (peekToken() == NAME) consume(NAME);
411 b.add(line, tok, string);
416 b.add(line, TOPSCOPE); // push the current scope
419 String name = string;
420 b.add(line, LITERAL, name); // push the name to be declared
421 b.add(line, DECLARE); // declare it
422 if (peekToken() == ASSIGN) { // if there is an '=' after the variable name
423 b.add(line, LITERAL, name); // put the var name back on the stack
426 b.add(line, PUT); // assign it
427 b.add(line, POP); // clean the stack
429 if (peekToken() != COMMA) break;
432 b.add(line, POP); // pop off the topscope
433 if ((mostRecentlyReadToken != RC || peekToken() == SEMI) && peekToken() != -1) consume(SEMI);
441 b.add(line, JF, new Integer(0)); // if false, jump to the else-block
443 parseStatement(b, null);
445 if (peekToken() == ELSE) {
447 b.add(line, JMP, new Integer(0)); // if we took the true-block, jump over the else-block
448 b.set(size - 1, new Integer(b.size() - size + 1));
450 parseStatement(b, null);
452 b.set(size - 1, new Integer(b.size() - size + 1)); // regardless of which branch we took, b[size] needs to point here
457 if (label != null) b.add(line, LABEL, label);
460 b.add(line, POP); // discard the first-iteration indicator
462 b.add(line, JT, new Integer(2)); // if the while() clause is true, jump over the BREAK
465 parseStatement(b, null);
466 b.add(line, CONTINUE); // if we fall out of the end, definately continue
467 b.set(size - 1, new Integer(b.size() - size + 1)); // end of the loop
472 if (label != null) b.add(line, LABEL, label);
474 int size0 = b.size();
479 if (peekToken() == CASE) { // we compile CASE statements like a bunch of if..else's
481 b.add(line, DUP); // duplicate the switch() value; we'll consume one copy
484 b.add(line, EQ); // check if we should do this case-block
485 b.add(line, JF, new Integer(0)); // if not, jump to the next one
487 while(peekToken() != CASE && peekToken() != DEFAULT && peekToken() != RC) parseStatement(b, null);
488 b.set(size - 1, new Integer(1 + b.size() - size));
489 } else if (peekToken() == DEFAULT) {
492 while(peekToken() != CASE && peekToken() != DEFAULT && peekToken() != RC) parseStatement(b, null);
493 } else if (peekToken() == RC) {
495 b.add(line, BREAK); // break out of the loop if we 'fall through'
498 throw new ParserException("expected CASE, DEFAULT, or RC; got " + codeToString[peekToken()]);
500 b.set(size0 - 1, new Integer(b.size() - size0 + 1)); // end of the loop
505 if (label != null) b.add(line, LABEL, label);
508 parseStatement(b, null);
512 b.add(line, JT, new Integer(2)); // check the while() clause; jump over the BREAK if true
514 b.add(line, CONTINUE);
517 b.set(size - 1, new Integer(b.size() - size + 1)); // end of the loop; write this location to the LOOP instruction
524 parseStatement(b, null); // parse the expression to be TRYed
525 b.add(line, POP); // pop the TryMarker
526 b.add(line, JMP); // jump forward to the end of the catch block
527 int size2 = b.size();
528 b.set(size - 1, new Integer(b.size() - size + 1)); // the TRY argument points at the start of the CATCH block
530 if (peekToken() == CATCH) {
535 // FIXME, we need an extra scope here
536 b.add(line, TOPSCOPE); // the exception is on top of the stack; put it to the chosen name
538 b.add(line, LITERAL);
543 parseStatement(b, null);
546 // jump here if no exception was thrown
547 b.set(size2 - 1, new Integer(b.size() - size2 + 1));
549 // FIXME: not implemented correctly
550 if (peekToken() == FINALLY) {
552 parseStatement(b, null);
561 boolean hadVar = false; // if it's a for..in, we ignore the VAR
562 if (tok == VAR) { hadVar = true; tok = getToken(); }
563 String varName = string;
564 boolean forIn = peekToken() == IN; // determine if this is a for..in loop or not
565 pushBackToken(tok, varName);
568 b.add(line, NEWSCOPE);
569 b.add(line, LITERAL, varName); // declare the new variable
570 b.add(line, DECLARE);
572 b.add(line, LOOP); // we actually only add this to ensure that BREAK works
573 b.add(line, POP); // discard the first-iteration indicator
578 b.add(line, PUSHKEYS); // push the keys as an array; check the length
579 b.add(line, LITERAL, "length");
583 b.add(line, LITERAL, new Integer(1)); // decrement the length
586 b.add(line, LITERAL, new Integer(0)); // see if we've exhausted all the elements
588 b.add(line, JF, new Integer(2));
589 b.add(line, BREAK); // if we have, then BREAK
590 b.add(line, GET_PRESERVE); // get the key out of the keys array
591 b.add(line, LITERAL, varName);
592 b.add(line, PUT); // write it to this[varName]
593 parseStatement(b, null); // do some stuff
594 b.add(line, CONTINUE); // continue if we fall out the bottom
596 b.set(size - 1, new Integer(b.size() - size + 1)); // BREAK to here
597 b.add(line, OLDSCOPE); // restore the scope
600 if (hadVar) pushBackToken(VAR, null); // yeah, this actually matters
601 b.add(line, NEWSCOPE); // grab a fresh scope
603 parseStatement(b, null); // initializer
604 CompiledFunction e2 = // we need to put the incrementor before the test
605 new CompiledFunction(sourceName, line, null); // so we save the test here
606 if (peekToken() != SEMI)
609 e2.add(line, b.LITERAL, Boolean.TRUE); // handle the for(foo;;foo) case
611 if (label != null) b.add(line, LABEL, label);
613 int size2 = b.size();
615 b.add(line, JT, new Integer(0)); // if we're on the first iteration, jump over the incrementor
617 if (peekToken() != RP) { // do the increment thing
621 b.set(size - 1, new Integer(b.size() - size + 1));
624 b.paste(e2); // ok, *now* test if we're done yet
625 b.add(line, JT, new Integer(2)); // break out if we don't meet the test
627 parseStatement(b, null);
628 b.add(line, CONTINUE); // if we fall out the bottom, CONTINUE
629 b.set(size2 - 1, new Integer(b.size() - size2 + 1)); // end of the loop
631 b.add(line, OLDSCOPE); // get our scope back
636 case NAME: { // either a label or an identifier; this is the one place we're not LL(1)
637 String possiblyTheLabel = string;
638 if (peekToken() == COLON) { // label
640 parseStatement(b, possiblyTheLabel);
642 } else { // expression
643 pushBackToken(NAME, possiblyTheLabel);
646 if ((mostRecentlyReadToken != RC || peekToken() == SEMI) && peekToken() != -1) consume(SEMI);
651 case SEMI: return; // yep, the null statement is valid
653 case LC: { // blocks are statements too
655 parseBlock(b, label);
659 default: { // hope that it's an expression
663 if ((mostRecentlyReadToken != RC || peekToken() == SEMI) && peekToken() != -1) consume(SEMI);
670 // ParserException //////////////////////////////////////////////////////////////////////
672 private class ParserException extends IOException { public ParserException(String s) { super(sourceName + ":" + line + " " + s); } }