1 // Copyright 2003 Adam Megacz, see the COPYING file for licensing [GPL]
4 // FIXME: line number accuracy
10 * Parses a stream of lexed tokens into a tree of CompiledFunction's.
13 * There are three kinds of things we parse: blocks, statements,
14 * expressions. Expressions are a special type of statement that
15 * evaluates to a value (for example, "break" is not an expression,
16 * but "3+2" is). AssignmentTargets are a special kind of expression
17 * that can be 'put' to (for example, "foo()" is not an
18 * assignmentTarget, but "foo[7]" is). FIXME.
21 * Technically it would be a better design for this class to build an
22 * intermediate parse tree and use that to emit bytecode. Here's the
25 * Advantages of building a parse tree:
26 * - easier to apply optimizations
27 * - would let us handle more sophisticated languages than JavaScript
29 * Advantages of leaving out the parse tree
30 * - faster compilation
31 * - less load on the garbage collector
32 * - much simpler code, easier to understand
35 * Fortunately JS is such a simple language that we can get away with
36 * the half-assed approach and still produce a working, complete
39 * The bytecode language emitted doesn't really cause any appreciable
40 * semantic loss, and is itself a parseable language very similar to
41 * Forth or a postfix variant of LISP. This means that the bytecode
42 * can be transformed into a parse tree, which can be manipulated.
43 * So if we ever want to add an optimizer, it could easily be done by
44 * producing a parse tree from the bytecode, optimizing that tree,
45 * and then re-emitting the bytecode. The parse tree node class
46 * would also be much simpler since the bytecode language has so few
49 * Actually, the above paragraph is slightly inaccurate -- there are
50 * places where we push a value and then perform an arbitrary number
51 * of operations using it before popping it; this doesn't parse well.
52 * But these cases are clearly marked and easy to change if we do
53 * need to move to a parse tree format.
55 class Parser extends Lexer implements ByteCodes {
58 // Constructors //////////////////////////////////////////////////////
60 public Parser(Reader r, String sourceName, int line) throws IOException { super(r, sourceName, line); }
63 public static void main(String[] s) throws Exception {
64 CompiledFunction block = new CompiledFunction("stdin", 0, new InputStreamReader(System.in), null);
65 if (block == null) return;
66 System.out.println(block);
70 // Statics ////////////////////////////////////////////////////////////
72 static byte[] precedence = new byte[MAX_TOKEN + 1];
73 static boolean[] isRightAssociative = new boolean[MAX_TOKEN + 1];
75 isRightAssociative[ASSIGN] = true;
77 precedence[ASSIGN] = 1;
79 precedence[COMMA] = 3;
80 precedence[OR] = precedence[AND] = 4;
81 precedence[GT] = precedence[GE] = 5;
82 precedence[BITOR] = 6;
83 precedence[BITXOR] = 7;
84 precedence[BITAND] = 8;
85 precedence[EQ] = precedence[NE] = 9;
86 precedence[LT] = precedence[LE] = 10;
87 precedence[SHEQ] = precedence[SHNE] = 11;
88 precedence[LSH] = precedence[RSH] = precedence[URSH] = 12;
89 precedence[ADD] = precedence[SUB] = 13;
90 precedence[MUL] = precedence[DIV] = precedence[MOD] = 14;
91 precedence[BITNOT] = 15;
92 precedence[INC] = precedence[DEC] = 16;
99 // Parsing Logic /////////////////////////////////////////////////////////
101 /** gets a token and throws an exception if it is not <tt>code</tt> */
102 private void consume(int code) throws IOException {
103 if (getToken() != code) throw new ParserException("expected " + codeToString[code] + ", got " + (op == -1 ? "EOF" : codeToString[op]));
107 * Parse the largest possible expression containing no operators
108 * of precedence below <tt>minPrecedence</tt> and append the
109 * bytecodes for that expression to <tt>appendTo</tt>; the
110 * appended bytecodes MUST grow the stack by exactly one element.
112 private void startExpr(CompiledFunction appendTo, int minPrecedence) throws IOException {
113 int tok = getToken();
114 CompiledFunction b = appendTo;
117 case -1: throw new ParserException("expected expression");
119 // all of these simply push values onto the stack
120 case NUMBER: b.add(line, LITERAL, number); break;
121 case STRING: b.add(line, LITERAL, string); break;
122 case THIS: b.add(line, TOPSCOPE, null); break;
123 case NULL: b.add(line, LITERAL, null); break;
124 case TRUE: case FALSE: b.add(line, LITERAL, new Boolean(tok == TRUE)); break;
127 b.add(line, ARRAY, new Integer(0)); // push an array onto the stack
128 int size0 = b.size();
130 if (peekToken() != RB)
131 while(true) { // iterate over the initialization values
133 if (peekToken() == COMMA || peekToken() == RB)
134 b.add(line, LITERAL, null); // for stuff like [1,,2,]
136 startExpr(b, -1); // push the value onto the stack
137 b.add(line, LITERAL, new Integer(i++)); // push the index in the array to place it into
138 b.add(line, PUT); // put it into the array
139 b.add(line, POP); // discard the value remaining on the stack
140 if (peekToken() == RB) break;
143 b.set(size0 - 1, new Integer(i)); // back at the ARRAY instruction, write the size of the array
147 case SUB: { // negative literal (like "3 * -1")
149 b.add(line, LITERAL, new Double(number.doubleValue() * -1));
152 case LP: { // grouping (not calling)
157 case INC: case DEC: { // prefix (not postfix)
158 startExpr(b, precedence[tok]);
159 b.set(b.size() - 1, tok, new Boolean(true)); // FIXME, ugly; need startAssignTarget
162 case BANG: case BITNOT: case TYPEOF: {
163 startExpr(b, precedence[tok]);
167 case LC: { // object constructor
168 b.add(line, OBJECT, null); // put an object on the stack
169 if (peekToken() != RC)
171 if (peekToken() != NAME && peekToken() != STRING)
172 throw new ParserException("expected NAME or STRING");
174 b.add(line, LITERAL, string); // grab the key
176 startExpr(b, -1); // grab the value
177 b.add(line, PUT); // put the value into the object
178 b.add(line, POP); // discard the remaining value
179 if (peekToken() == RC) break;
181 if (peekToken() == RC) break; // we permit {,,} -- I'm not sure if ECMA does
186 case NAME: { // FIXME; this is an lvalue
187 String name = string;
188 if (peekToken() == ASSIGN) {
190 b.add(line, TOPSCOPE);
191 b.add(line, LITERAL, name);
192 startExpr(b, minPrecedence);
197 b.add(line, TOPSCOPE);
198 b.add(line, LITERAL, name);
206 CompiledFunction b2 = new CompiledFunction(sourceName, line, null);
207 b.add(line, NEWFUNCTION, b2);
209 // function prelude; arguments array is already on the stack
210 b2.add(line, TOPSCOPE); // push the scope onto the stack
211 b2.add(line, SWAP); // swap 'this' and 'arguments'
213 b2.add(line, LITERAL, "arguments"); // declare arguments (equivalent to 'var arguments;')
214 b2.add(line, DECLARE);
216 b2.add(line, LITERAL, "arguments"); // set this.arguments and leave the value on the stack
222 while(peekToken() != RP) { // run through the list of argument names
223 if (peekToken() == NAME) {
224 consume(NAME); // a named argument
226 b2.add(line, LITERAL, string); // declare the name
227 b2.add(line, DECLARE);
229 b2.add(line, LITERAL, new Integer(numArgs)); // retrieve it from the arguments array
230 b2.add(line, GET_PRESERVE);
234 b2.add(line, TOPSCOPE); // put it to the current scope
236 b2.add(line, LITERAL, string);
240 b2.add(line, POP); // clean the stack
243 if (peekToken() == RP) break;
249 b2.add(line, POP); // pop off the arguments array
251 parseStatement(b2, null); // the function body
253 b2.add(line, LITERAL, null); // in case we "fall out the bottom", return NULL
254 b2.add(line, RETURN);
258 default: throw new ParserException("expected expression, found " + codeToString[tok] + ", which cannot start an expression");
261 // attempt to continue the expression
262 continueExpr(b, minPrecedence);
266 * Assuming that a complete expression has just been parsed,
267 * <tt>continueExpr</tt> will attempt to extend this expression by
268 * parsing additional tokens and appending additional bytecodes.
270 * No operators with precedence less than <tt>minPrecedence</tt>
273 * If any bytecodes are appended, they will not alter the stack
276 private void continueExpr(CompiledFunction b, int minPrecedence) throws IOException {
277 if (b == null) throw new Error("got null b; this should never happen");
278 int tok = getToken();
279 if (tok == -1) return;
280 if (minPrecedence != -1 && (precedence[tok] < minPrecedence || (precedence[tok] == minPrecedence && !isRightAssociative[tok]))) {
286 case ASSIGN_BITOR: case ASSIGN_BITXOR: case ASSIGN_BITAND: case ASSIGN_LSH: case ASSIGN_RSH: case ASSIGN_URSH:
287 case ASSIGN_ADD: case ASSIGN_SUB: case ASSIGN_MUL: case ASSIGN_DIV: case ASSIGN_MOD: {
288 b.set(b.size() - 1, b.GET_PRESERVE, new Boolean(true)); // FIXME should use AssignTarget
289 startExpr(b, precedence[tok - 1]);
290 b.add(line, tok - 1);
296 case INC: case DEC: { // postfix
297 b.set(b.size() - 1, tok, new Boolean(false)); // FIXME use assignmenttarget
300 case LP: { // invocation (not grouping)
302 while(peekToken() != RP) {
304 if (peekToken() != COMMA) {
306 if (peekToken() == RP) break;
311 b.add(line, CALL, new Integer(i));
314 case BITOR: case BITXOR: case BITAND: case SHEQ: case SHNE: case LSH:
315 case RSH: case URSH: case ADD: case MUL: case DIV: case MOD:
316 case GT: case GE: case EQ: case NE: case LT: case LE: case SUB: {
317 startExpr(b, precedence[tok]);
322 b.add(line, tok == AND ? b.JF : b.JT, new Integer(0)); // test to see if we can short-circuit
324 startExpr(b, precedence[tok]); // otherwise check the second value
325 b.add(line, JMP, new Integer(2)); // leave the second value on the stack and jump to the end
326 b.add(line, LITERAL, tok == AND ? new Boolean(false) : new Boolean(true)); // target of the short-circuit jump is here
327 b.set(size - 1, new Integer(b.size() - size)); // write the target of the short-circuit jump
330 case DOT: { // FIXME, assigntarget
332 String target = string;
333 if (peekToken() == ASSIGN) {
335 b.add(line, LITERAL, target);
341 b.add(line, LITERAL, target);
346 case LB: { // subscripting (not array constructor)
349 if (peekToken() == ASSIGN) { // FIXME: assigntarget
361 b.add(line, JF, new Integer(0)); // jump to the if-false expression
363 startExpr(b, -1); // write the if-true expression
364 b.add(line, JMP, new Integer(0)); // if true, jump *over* the if-false expression
365 b.set(size - 1, new Integer(b.size() - size + 1)); // now we know where the target of the jump is
368 startExpr(b, -1); // write the if-false expression
369 b.set(size - 1, new Integer(b.size() - size + 1)); // this is the end; jump to here
378 continueExpr(b, minPrecedence); // try to continue the expression
381 /** Parse a block of statements which must be surrounded by LC..RC. */
382 void parseBlock(CompiledFunction b) throws IOException { parseBlock(b, null); }
383 void parseBlock(CompiledFunction b, String label) throws IOException {
384 if (peekToken() == -1) return;
385 else if (peekToken() != LC) parseStatement(b, null);
388 while(peekToken() != RC && peekToken() != -1) parseStatement(b, null);
393 /** Parse a single statement, consuming the RC or SEMI which terminates it. */
394 void parseStatement(CompiledFunction b, String label) throws IOException {
395 int tok = peekToken();
396 if (tok == -1) return;
397 switch(tok = getToken()) {
399 case THROW: case ASSERT: case RETURN: {
400 if (tok == RETURN && peekToken() == SEMI) b.add(line, LITERAL, null);
401 else startExpr(b, -1);
407 case BREAK: case CONTINUE: {
408 if (peekToken() == NAME) consume(NAME);
409 b.add(line, tok, string);
415 b.add(line, TOPSCOPE); // push the current scope
418 String name = string;
419 b.add(line, LITERAL, name); // push the name to be declared
420 b.add(line, DECLARE); // declare it
421 if (peekToken() == ASSIGN) { // if there is an '=' after the variable name
422 b.add(line, LITERAL, name); // put the var name back on the stack
428 if (peekToken() != COMMA) break;
432 if ((mostRecentlyReadToken != RC || peekToken() == SEMI) && peekToken() != -1) consume(SEMI);
441 b.add(line, JF, new Integer(0));
443 parseStatement(b, null);
445 if (peekToken() == ELSE) {
447 b.set(size - 1, new Integer(2 + b.size() - size));
448 b.add(line, JMP, new Integer(0));
450 parseStatement(b, null);
452 b.set(size - 1, new Integer(1 + b.size() - size));
458 if (label != null) b.add(line, LABEL, label);
463 b.add(line, JT, new Integer(2));
466 parseStatement(b, null);
467 b.add(line, CONTINUE); // if we fall out of the end, definately continue
468 b.set(size - 1, new Integer(b.size() - size + 1)); // end of the loop
474 if (label != null) b.add(line, LABEL, label);
476 int size0 = b.size();
481 if (peekToken() == CASE) {
487 b.add(line, JF, new Integer(0));
489 while(peekToken() != CASE && peekToken() != DEFAULT && peekToken() != RC) {
490 int size2 = b.size();
491 parseStatement(b, null);
492 if (size2 == b.size()) break;
494 b.set(size - 1, new Integer(1 + b.size() - size));
495 } else if (peekToken() == DEFAULT) {
498 while(peekToken() != CASE && peekToken() != DEFAULT && peekToken() != RC) {
499 int size2 = b.size();
500 parseStatement(b, null);
501 if (size2 == b.size()) break;
503 } else if (peekToken() == RC) {
508 throw new ParserException("expected CASE, DEFAULT, or RC; got " + codeToString[peekToken()]);
511 b.set(size0 - 1, new Integer(b.size() - size0 + 1)); // end of the loop
516 if (label != null) b.add(line, LABEL, label);
519 parseStatement(b, null);
523 b.add(line, JT, new Integer(2));
525 b.add(line, CONTINUE);
528 b.set(size - 1, new Integer(b.size() - size + 1)); // end of the loop
533 // We deliberately allow you to omit braces in catch{}/finally{} if they are single statements...
537 b.add(line, POP); // pop the TryMarker
538 b.add(line, JMP); // jump forward to the end of the catch block
539 int size2 = b.size();
540 b.set(size - 1, new Integer(b.size() - size + 1));// the TRY argument points at the start of the CATCH block
542 if (peekToken() == CATCH) {
547 // FIXME, we need an extra scope here
548 b.add(line, TOPSCOPE); // the exception is on top of the stack; put it to the variable
550 b.add(line, LITERAL);
555 parseStatement(b, null);
558 b.set(size2 - 1, new Integer(b.size() - size2 + 1)); // jump here if no exception was thrown
560 // FIXME: not implemented correctly
561 if (peekToken() == FINALLY) {
563 parseStatement(b, null);
572 boolean hadVar = false;
573 if (tok == VAR) { hadVar = true; tok = getToken(); }
574 String varName = string;
575 boolean forIn = peekToken() == IN;
576 pushBackToken(tok, varName);
579 // FIXME: break needs to work in here
583 b.add(line, PUSHKEYS);
584 b.add(line, LITERAL, "length");
587 CompiledFunction b2 = new CompiledFunction(sourceName, line, null);
589 b.add(line, NEWSCOPE);
591 b.add(line, LITERAL, new Integer(1));
594 b.add(line, LITERAL, new Integer(0));
596 b.add(line, JT, new Integer(7));
597 b.add(line, GET_PRESERVE);
598 b.add(line, LITERAL, varName);
599 b.add(line, LITERAL, varName);
600 b.add(line, DECLARE);
602 parseStatement(b, null);
604 b.add(line, OLDSCOPE);
609 if (hadVar) pushBackToken(VAR, null);
610 b.add(line, NEWSCOPE);
612 parseStatement(b, null);
613 CompiledFunction e2 = new CompiledFunction(sourceName, line, null);
614 if (peekToken() != SEMI) {
617 e2.add(line, b.LITERAL, Boolean.TRUE);
620 if (label != null) b.add(line, LABEL, label);
622 int size2 = b.size();
624 b.add(line, JT, new Integer(0));
626 if (peekToken() != RP) {
630 b.set(size - 1, new Integer(b.size() - size + 1));
634 b.add(line, JT, new Integer(2));
636 parseStatement(b, null);
637 b.add(line, CONTINUE);
638 b.set(size2 - 1, new Integer(b.size() - size2 + 1)); // end of the loop
640 b.add(line, OLDSCOPE);
646 String possiblyTheLabel = string;
647 if (peekToken() == COLON) {
649 label = possiblyTheLabel;
650 parseStatement(b, label);
653 pushBackToken(NAME, possiblyTheLabel);
656 if ((mostRecentlyReadToken != RC || peekToken() == SEMI) && peekToken() != -1) consume(SEMI);
664 parseBlock(b, label);
672 if ((mostRecentlyReadToken != RC || peekToken() == SEMI) && peekToken() != -1) consume(SEMI);
679 // ParserException //////////////////////////////////////////////////////////////////////
681 private class ParserException extends IOException { public ParserException(String s) { super(sourceName + ":" + line + " " + s); } }