1 // Copyright 2003 Adam Megacz, see the COPYING file for licensing [GPL]
7 /** parses a stream of lexed tokens into ForthBlock's */
8 public class Parser extends Lexer implements OpCodes {
10 // Constructors //////////////////////////////////////////////////////
12 public Parser(Reader r, String sourceName, int line) throws IOException {
14 this.sourceName = sourceName;
19 public static void main(String[] s) throws Exception {
20 Parser p = new Parser(new InputStreamReader(System.in), "stdin", 0);
22 ForthBlock block = new ForthBlock(0, "stdin");
23 p.parseStatement(false, block);
24 if (block == null) return;
25 System.out.println(block);
26 if (p.peekToken() == -1) return;
31 // Statics ////////////////////////////////////////////////////////////
33 static byte[] precedence = new byte[MAX_TOKEN + 1];
34 static boolean[] isRightAssociative = new boolean[MAX_TOKEN + 1];
35 static boolean[] mustStartExpression = new boolean[MAX_TOKEN + 1];
36 static boolean[] cannotStartExpression = new boolean[MAX_TOKEN + 1];
39 mustStartExpression[VAR] = true;
40 mustStartExpression[IF] = true;
41 mustStartExpression[BANG] = true;
42 mustStartExpression[BITNOT] = true;
43 mustStartExpression[INSTANCEOF] = true;
44 mustStartExpression[TYPEOF] = true;
45 mustStartExpression[NUMBER] = true;
46 mustStartExpression[STRING] = true;
47 mustStartExpression[NULL] = true;
48 mustStartExpression[TRUE] = true;
49 mustStartExpression[FALSE] = true;
50 mustStartExpression[Tokens.FUNCTION] = true;
51 mustStartExpression[NAME] = true;
52 mustStartExpression[LC] = true;
53 mustStartExpression[THIS] = true;
55 cannotStartExpression[OR] = true;
56 cannotStartExpression[AND] = true;
57 cannotStartExpression[BITOR] = true;
58 cannotStartExpression[BITXOR] = true;
59 cannotStartExpression[BITAND] = true;
60 cannotStartExpression[SHEQ] = true;
61 cannotStartExpression[SHNE] = true;
62 cannotStartExpression[LSH] = true;
63 cannotStartExpression[RSH] = true;
64 cannotStartExpression[URSH] = true;
65 cannotStartExpression[ADD] = true;
66 cannotStartExpression[MUL] = true;
67 cannotStartExpression[DIV] = true;
68 cannotStartExpression[MOD] = true;
69 cannotStartExpression[GT] = true;
70 cannotStartExpression[GE] = true;
71 cannotStartExpression[EQ] = true;
72 cannotStartExpression[NE] = true;
73 cannotStartExpression[LT] = true;
74 cannotStartExpression[LE] = true;
75 cannotStartExpression[DOT] = true;
76 cannotStartExpression[HOOK] = true;
77 cannotStartExpression[ASSIGN_BITOR] = true;
78 cannotStartExpression[ASSIGN_BITXOR] = true;
79 cannotStartExpression[ASSIGN_BITAND] = true;
80 cannotStartExpression[ASSIGN_LSH] = true;
81 cannotStartExpression[ASSIGN_RSH] = true;
82 cannotStartExpression[ASSIGN_URSH] = true;
83 cannotStartExpression[ASSIGN_ADD] = true;
84 cannotStartExpression[ASSIGN_SUB] = true;
85 cannotStartExpression[ASSIGN_MUL] = true;
86 cannotStartExpression[ASSIGN_DIV] = true;
87 cannotStartExpression[ASSIGN_MOD] = true;
89 isRightAssociative[ASSIGN] = true;
91 precedence[ASSIGN] = 1;
93 precedence[COMMA] = 3;
94 precedence[OR] = precedence[AND] = 4;
95 precedence[GT] = precedence[GE] = 5;
96 precedence[BITOR] = 6;
97 precedence[BITXOR] = 7;
98 precedence[BITAND] = 8;
99 precedence[EQ] = precedence[NE] = 9;
100 precedence[LT] = precedence[LE] = 10;
101 precedence[SHEQ] = precedence[SHNE] = 11;
102 precedence[LSH] = precedence[RSH] = precedence[URSH] = 12;
103 precedence[ADD] = precedence[SUB] = 13;
104 precedence[MUL] = precedence[DIV] = precedence[MOD] = 14;
105 precedence[BITNOT] = precedence[INSTANCEOF] = 15;
106 precedence[INC] = precedence[DEC] = 16;
109 precedence[DOT] = 19;
113 // Parsing Logic /////////////////////////////////////////////////////////
115 /** gets a token and throws an exception if it is not <tt>code</tt> */
116 public void consume(int code) throws IOException {
117 if (getToken() != code)
118 throw new ParserException("expected " + codeToString[code] + ", got " + (op == -1 ? "EOL" : codeToString[op]));
121 /** parses the largest possible expression */
122 public ForthBlock parseExpr() throws IOException { return parseExpr(null, -1); }
124 /** return the largest expression beginning with prefix containing no operators of precedence below <tt>minPrecedence</tt> */
125 public ForthBlock parseExpr(ForthBlock prefix, int minPrecedence) throws IOException {
126 return parseExpr(prefix, minPrecedence, new ForthBlock(line, sourceName));
129 /** append the largest expression beginning with prefix containing no operators of precedence below <tt>minPrecedence</tt> */
130 public ForthBlock parseExpr(ForthBlock prefix, int minPrecedence, ForthBlock appendTo) throws IOException {
132 int tok = getToken();
134 if (tok == -1) return prefix;
135 if (minPrecedence > 0 && precedence[tok] != 0)
136 if (precedence[tok] < minPrecedence || (precedence[tok] == minPrecedence && !isRightAssociative[tok]))
137 { pushBackToken(); return prefix; }
139 if (prefix != null && mustStartExpression[tok]) { pushBackToken(); return prefix; }
140 if (prefix == null && cannotStartExpression[tok]) { pushBackToken(); return prefix; }
142 ForthBlock b = appendTo;
146 case NUMBER: return parseExpr(b.add(ForthBlock.LITERAL, number), minPrecedence);
147 case STRING: return parseExpr(b.add(ForthBlock.LITERAL, string), minPrecedence);
148 case THIS: return parseExpr(b.add(THIS, null), minPrecedence);
149 case NULL: return parseExpr(b.add(ForthBlock.LITERAL, null), minPrecedence);
150 case TRUE: case FALSE: return parseExpr(b.add(ForthBlock.LITERAL, new Boolean(tok == TRUE)), minPrecedence);
152 case ASSIGN_BITOR: case ASSIGN_BITXOR: case ASSIGN_BITAND: case ASSIGN_LSH: case ASSIGN_RSH: case ASSIGN_URSH:
153 case ASSIGN_ADD: case ASSIGN_SUB: case ASSIGN_MUL: case ASSIGN_DIV: case ASSIGN_MOD: {
154 b.add(b.EXPR, prefix);
155 prefix.set(prefix.size() - 1, b.GET_PRESERVE, new Boolean(true));
156 prefix.add(prefix.EXPR, parseExpr(null, precedence[tok - 1]));
161 return parseExpr(b, minPrecedence);
165 if (prefix == null) {
167 ForthBlock subexp = parseExpr(null, precedence[tok]);
168 subexp.set(subexp.size() - 1, tok, new Boolean(true));
169 b.add(b.EXPR, subexp);
170 return parseExpr(b, minPrecedence);
173 prefix.set(prefix.size() - 1, tok, new Boolean(false));
174 b.add(b.EXPR, prefix);
175 return parseExpr(b, minPrecedence);
179 if (prefix == null) { // grouping
180 b.add(EXPR, parseExpr());
182 return parseExpr(b, minPrecedence);
184 } else { // invocation
186 b.add(b.EXPR, prefix);
187 while(peekToken() != RP) {
188 b.add(b.EXPR, parseExpr());
190 if (peekToken() == RP) break;
194 b.add(b.CALL, new Integer(i));
195 return parseExpr(b, minPrecedence);
198 case BANG: case BITNOT: case INSTANCEOF: case TYPEOF: {
199 b.add(b.EXPR, parseExpr(null, precedence[tok]));
201 return parseExpr(b, minPrecedence);
205 if (prefix == null) {
207 return parseExpr(b.add(ForthBlock.LITERAL, new Double(number.doubleValue() * -1)), minPrecedence);
210 case BITOR: case BITXOR: case BITAND: case SHEQ: case SHNE: case LSH:
211 case RSH: case URSH: case ADD: case MUL: case DIV: case MOD:
212 case GT: case GE: case EQ: case NE: case LT: case LE: {
213 b.add(b.EXPR, prefix);
214 b.add(b.EXPR, parseExpr(null, precedence[tok]));
216 return parseExpr(b, minPrecedence);
220 b.add(b.LITERAL, tok == AND ? new Boolean(false) : new Boolean(true));
221 b.add(b.EXPR, prefix);
222 b.add(tok == AND ? b.JF : b.JT, new Integer(3));
224 b.add(b.EXPR, parseExpr(null, precedence[tok]));
225 return parseExpr(b, minPrecedence);
229 String name = string;
230 if (peekToken() == ASSIGN) {
233 b.add(ForthBlock.LITERAL, name);
234 b.add(ForthBlock.EXPR, parseExpr(null, minPrecedence));
235 b.add(ForthBlock.PUT);
236 b.add(ForthBlock.SWAP);
237 b.add(ForthBlock.POP);
240 b.add(ForthBlock.LITERAL, name);
241 b.add(ForthBlock.GET);
243 return parseExpr(b, minPrecedence);
248 String target = string;
249 b.add(b.EXPR, prefix);
250 if (peekToken() == ASSIGN) {
252 ForthBlock val = parseExpr();
253 b.add(b.LITERAL, target);
259 b.add(b.LITERAL, target);
262 return parseExpr(b, minPrecedence);
266 if (prefix == null) {
267 b.add(b.ARRAY, new Integer(0));
270 ForthBlock e = parseExpr();
271 if (e == null && peekToken() == RB) { consume(RB); return parseExpr(b, minPrecedence); }
272 b.add(b.LITERAL, new Integer(i++));
273 if (e == null) b.add(b.LITERAL, null);
274 else b.add(b.EXPR, e);
277 if (peekToken() == RB) { consume(RB); return parseExpr(b, minPrecedence); }
281 b.add(b.EXPR, prefix);
282 b.add(b.EXPR, parseExpr());
284 if (peekToken() == ASSIGN) {
286 b.add(b.EXPR, parseExpr());
293 return parseExpr(b, minPrecedence);
298 b.add(b.OBJECT, null);
299 if (peekToken() == RC) { consume(RC); return parseExpr(b, minPrecedence); }
301 if (peekToken() != NAME && peekToken() != STRING) throw new Error("expected NAME or STRING");
303 b.add(b.LITERAL, string);
305 b.add(b.EXPR, parseExpr());
308 if (peekToken() == RC) { consume(RC); return parseExpr(b, minPrecedence); }
310 if (peekToken() == RC) { consume(RC); return parseExpr(b, minPrecedence); }
315 b.add(b.EXPR, prefix);
316 b.add(b.JF, new Integer(3));
317 b.add(b.EXPR, parseExpr());
318 b.add(b.JMP, new Integer(2));
320 b.add(b.EXPR, parseExpr());
321 return parseExpr(b, minPrecedence);
324 case Tokens.FUNCTION: {
327 ForthBlock b2 = new ForthBlock(curLine, sourceName);
330 b2.add(b.LITERAL, "arguments");
331 b2.add(b.LITERAL, "arguments");
337 if (peekToken() == RP) consume(RP);
339 if (peekToken() == COMMA) {
340 // FIXME: pop an item off the stack here?
346 b2.add(b.LITERAL, string);
349 // retrieve it from the arguments array
350 b2.add(b.LITERAL, new Integer(numArgs));
351 b2.add(b.GET_PRESERVE);
355 // put it to the current scope
358 b2.add(b.LITERAL, string);
366 if (peekToken() == RP) { consume(RP); break; }
371 // pop off the arguments array
373 parseStatement(true, b2);
374 b2.add(b.LITERAL, null);
376 return parseExpr(b.add(OpCodes.FUNCTION, b2), minPrecedence);
379 default: pushBackToken(); return prefix;
383 /** a block is either a single statement or a list of statements surrounded by curly braces; all expressions are also statements */
384 public ForthBlock parseStatement() throws IOException {
385 ForthBlock ret = new ForthBlock(line, sourceName);
386 ret.add(ret.LITERAL, null);
387 parseStatement(false, ret);
388 if (ret.size() == 1) return null;
392 public void parseStatement(boolean requireBraces) throws IOException {
393 parseStatement(requireBraces, new ForthBlock(line, sourceName));
395 public void parseStatement(boolean requireBraces, ForthBlock b) throws IOException {
396 int tok = peekToken();
397 if (tok == -1) return;
398 boolean braced = tok == LC;
399 if (requireBraces && !braced) throw new ParserException("expected {, got " + codeToString[tok]);
400 if (braced) consume(LC);
403 switch(tok = peekToken()) {
405 case THROW: case RETURN: case ASSERT: {
407 if (tok == RETURN && peekToken() == SEMI) b.add(b.LITERAL, null);
408 else b.add(b.EXPR, parseExpr());
414 case BREAK: case CONTINUE: {
416 if (peekToken() == NAME) consume(NAME);
429 b.add(THIS); // push the current scope
432 String name = string;
433 b.add(b.LITERAL, name); // push the name to be declared
434 b.add(b.DECLARE); // declare it
435 if (peekToken() == ASSIGN) { // if there is an '=' after the variable name
436 b.add(b.LITERAL, name); // put the var name back on the stack
438 b.add(b.EXPR, parseExpr());
442 if (peekToken() != COMMA) break;
446 if (peekToken() == SEMI) consume(SEMI);
453 b.add(b.EXPR, parseExpr());
455 b.add(b.JF, new Integer(4));
456 b.add(b.EXPR, parseStatement());
458 b.add(b.JMP, new Integer(3));
459 if (peekToken() == ELSE) {
461 b.add(b.EXPR, parseStatement());
464 b.add(b.JMP, new Integer(1)); // nop
465 b.add(b.JMP, new Integer(1)); // nop
473 ForthBlock loop = new ForthBlock(curLine, sourceName);
474 b.add(loop.LOOP, loop);
477 loop.add(loop.EXPR, parseExpr());
478 loop.add(loop.JT, new Integer(2));
479 loop.add(Lexer.BREAK);
481 parseStatement(false, loop);
483 // if we fall out of the end, definately continue
491 ForthBlock loop = new ForthBlock(curLine, sourceName);
492 b.add(loop.LOOP, loop);
493 loop.add(loop.EXPR, parseExpr());
497 ForthBlock caseForthBlock;
501 loop.add(loop.EXPR, parseExpr());
503 loop.add(loop.JF, new Integer(2));
504 } else if (tok != DEFAULT) throw new ParserException("expected CASE or DEFAULT");
506 ForthBlock b2 = new ForthBlock(curLine, sourceName);
507 ForthBlock e1 = null;
508 while(peekToken() != CASE && peekToken() != DEFAULT && peekToken() != RC) {
509 if ((e1 = parseStatement()) == null) break;
513 loop.add(loop.EXPR, b2);
514 if (peekToken() == RC) {
525 ForthBlock loop = new ForthBlock(curLine, sourceName);
526 b.add(loop.LOOP, loop);
528 parseStatement(false, loop);
531 loop.add(loop.EXPR, parseExpr());
532 loop.add(loop.JT, new Integer(2));
533 loop.add(Lexer.BREAK);
534 loop.add(Lexer.CONTINUE);
541 // FIXME: don't just ignore this!
542 // We deliberately allow you to omit braces in catch{}/finally{} if they are single statements...
544 parseStatement(true, b);
546 if (peekToken() == CATCH) {
551 parseStatement(); // just discard the catchblock
554 if (peekToken() == FINALLY) {
556 parseStatement(false, b);
566 if (tok == VAR) tok = getToken();
567 String varName = string;
568 boolean forIn = peekToken() == IN;
569 pushBackToken(tok, varName);
574 b.add(b.EXPR, parseExpr());
576 b.add(b.LITERAL, "length");
579 ForthBlock b2 = new ForthBlock(curLine, sourceName);
581 b2.add(b.LITERAL, new Integer(1));
584 b2.add(b.LITERAL, new Integer(0));
586 b2.add(b.JT, new Integer(7));
587 b2.add(b.GET_PRESERVE);
588 b2.add(b.LITERAL, varName);
589 b2.add(b.LITERAL, varName);
592 b2.add(b.EXPR, parseStatement());
593 //b2.add(b.LITERAL, null);
597 ForthBlock b2 = new ForthBlock(curLine, sourceName);
601 ForthBlock e1 = parseExpr();
602 if (e1 == null) e1 = new ForthBlock(curLine, sourceName, b.LITERAL, null);
607 ForthBlock e2 = parseExpr();
609 ForthBlock e3 = parseExpr();
612 if (e2 == null) e2 = new ForthBlock(curLine, sourceName, b.LITERAL, null);
613 if (e3 == null) e3 = new ForthBlock(curLine, sourceName, b.LITERAL, null);
615 ForthBlock b3 = new ForthBlock(curLine, sourceName);
617 b2.add(b.LITERAL, null);
618 b3.add(b.JT, new Integer(3));
622 b3.add(b.JT, new Integer(2));
624 parseStatement(false, b3);
632 String name = string;
633 if (peekToken() == COLON) {
635 b.add(ForthBlock.LABEL, string);
638 pushBackToken(NAME, name);
639 // fall through to default case
644 if (tok == RC && braced) { consume(RC); return; }
647 ForthBlock ret = parseExpr();
648 if (ret == null) return;
651 if (peekToken() == SEMI) consume(SEMI);
660 class ParserException extends RuntimeException {
661 public ParserException(String s) { super(sourceName + ":" + line + " " + s); }