X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=src%2Forg%2Fxwt%2Fjs%2FLexer.java;h=f49310b928cb200e1815c3d09b776c214bef0e5e;hb=bf615a8d1871c2f8a9bf0044297ff253600862e3;hp=2a28338df7935f0b9613177ffd9456cc6de857d5;hpb=56387d062db4aca0510daa34579aa139570bac87;p=org.ibex.core.git diff --git a/src/org/xwt/js/Lexer.java b/src/org/xwt/js/Lexer.java index 2a28338..f49310b 100644 --- a/src/org/xwt/js/Lexer.java +++ b/src/org/xwt/js/Lexer.java @@ -20,55 +20,47 @@ package org.xwt.js; import java.io.*; +/** Lexes a stream of characters into a stream of Tokens */ class Lexer implements Tokens { + /** for debugging */ public static void main(String[] s) throws Exception { Lexer l = new Lexer(new InputStreamReader(System.in)); int tok = 0; - while((tok = l.getToken()) != -1) - System.out.println(codeToString[tok]); + while((tok = l.getToken()) != -1) System.out.println(codeToString[tok]); } - public int op; - public Number number = null; - public String string = null; + /** the token that was just parsed */ + protected int op; - public int line = 0; - public int col = 0; + /** if the token just parsed was a NUMBER, this is the numeric value */ + protected Number number = null; - private SmartReader in; - public String sourceName = "unknown"; + /** if the token just parsed was a NAME or STRING, this is the string value */ + protected String string = null; + + /** the line number of the current token */ + protected int line = 0; + + /** the column number of the current token */ + protected int col = 0; + /** the name of the source code file being lexed */ + protected String sourceName = "unknown"; + + private SmartReader in; public Lexer(Reader r) throws IOException { in = new SmartReader(r); } // Predicates /////////////////////////////////////////////////////////////////////// - protected static boolean isJSIdentifier(String s) { - int length = s.length(); - if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) return false; - for (int i=1; i= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); } private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); } - private static boolean isLineBreak(int c) { return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); } private static int xDigitToInt(int c) { if ('0' <= c && c <= '9') return c - '0'; - if ('a' <= c && c <= 'f') return c - ('a' - 10); - if ('A' <= c && c <= 'F') return c - ('A' - 10); - return -1; - } - public static boolean isWhiteSpace(int c) { - if (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0') return true; - return Character.getType((char)c) == Character.SPACE_SEPARATOR; + else if ('a' <= c && c <= 'f') return c - ('a' - 10); + else if ('A' <= c && c <= 'F') return c - ('A' - 10); + else return -1; } @@ -85,8 +77,8 @@ class Lexer implements Tokens { case 3: switch (s.charAt(0)) { case 'a': if (s.charAt(2)=='d' && s.charAt(1)=='n') return AND; break; case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') return FOR; break; - case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') return RESERVED; break; - case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') throw new IOException("the new keyword is not permitted in XWT scripts"); + case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') return RESERVED; + case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') return RESERVED; case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') return TRY; break; case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') return VAR; break; } break; @@ -100,15 +92,15 @@ class Lexer implements Tokens { if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') return ELSE; } else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') return RESERVED; } return -1; - case 'g': return s.equals("goto") ? GOTO : -1; + case 'g': return s.equals("goto") ? RESERVED : -1; case 'l': return s.equals("long") ? RESERVED : -1; case 'n': return s.equals("null") ? NULL : -1; case 't': c=s.charAt(3); if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') return TRUE; } else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') return THIS; } return -1; - case 'v': return s.equals("void") ? RESERVED : -1; - case 'w': return s.equals("with") ? WITH : -1; + case 'w': if (s.equals("with")) return RESERVED; else return -1; + case 'v': if (s.equals("void")) return RESERVED; else return -1; } break; case 5: switch (s.charAt(2)) { case 'a': return s.equals("class") ? RESERVED : -1; @@ -116,7 +108,7 @@ class Lexer implements Tokens { case 'i': return s.equals("while") ? WHILE : -1; case 'l': return s.equals("false") ? FALSE : -1; case 'n': c=s.charAt(0); - if (s.equals("const")) throw new IOException("the const keyword is not permitted in XWT"); + if (s.equals("const")) return RESERVED; else if (s.equals("final")) return RESERVED; return -1; case 'o': c=s.charAt(0); @@ -130,7 +122,7 @@ class Lexer implements Tokens { case 6: switch (s.charAt(1)) { case 'a': return s.equals("class") ? RESERVED : -1; case 'e': c=s.charAt(0); - if (s.equals("delete")) throw new IOException("the delete keyword is not permitted in XWT scripts"); + if (s.equals("delete")) return RESERVED; else if (c=='r') return s.equals("return") ? RETURN : -1; break; case 'h': return s.equals("throws") ? RESERVED : -1; @@ -162,7 +154,7 @@ class Lexer implements Tokens { break; case 10: c=s.charAt(1); if (c=='m') return s.equals("implements") ? RESERVED : -1; - else if (c=='n' && s.equals("instanceof")) throw new IOException("the instanceof keyword is not permitted in XWT scripts"); + else if (c=='n' && s.equals("instanceof")) return RESERVED; break; case 12: return s.equals("synchronized") ? RESERVED : -1; } @@ -175,8 +167,9 @@ class Lexer implements Tokens { in.unread(); String str = in.getString(); int result = getKeyword(str); + if (result == RESERVED) throw new LexerException("The reserved word \"" + str + "\" is not permitted in XWT scripts"); if (result != -1) return result; - this.string = str; + this.string = str.intern(); return NAME; } @@ -200,7 +193,7 @@ class Lexer implements Tokens { if (c == 'e' || c == 'E') { c = in.read(); if (c == '+' || c == '-') c = in.read(); - if (!isDigit(c)) throw new IOException("msg.missing.exponent"); + if (!isDigit(c)) throw new LexerException("float listeral did not have an exponent value"); do { c = in.read(); } while (isDigit(c)); } } @@ -209,7 +202,7 @@ class Lexer implements Tokens { String numString = in.getString(); if (base == 10 && !isInteger) { try { dval = (Double.valueOf(numString)).doubleValue(); } - catch (NumberFormatException ex) { throw new IOException("msg.caught.nfe"); } + catch (NumberFormatException ex) { throw new LexerException("invalid numeric literal: \"" + numString + "\""); } } else { if (isInteger) { longval = Long.parseLong(numString, base); @@ -236,7 +229,7 @@ class Lexer implements Tokens { c = in.read(); in.startString(); // start after the first " while(c != quoteChar) { - if (c == '\n' || c == -1) throw new IOException("msg.unterminated.string.lit"); + if (c == '\n' || c == -1) throw new LexerException("unterminated string literal"); if (c == '\\') { if (stringBuf == null) { in.unread(); // Don't include the backslash @@ -256,7 +249,7 @@ class Lexer implements Tokens { for(int i=0; i<4; i++) { int ci = in.read(); if (!((ci >= '0' && ci <= '9') || (ci >= 'a' && ci <= 'f') || (ci >= 'A' && ci <= 'F'))) - throw new IOException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence"); + throw new LexerException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence"); v = (v << 8) | Integer.parseInt(ci + "", 16); } c = (char)v; @@ -270,10 +263,10 @@ class Lexer implements Tokens { if (stringBuf != null) stringBuf.append((char) c); c = in.read(); } - if (stringBuf != null) this.string = stringBuf.toString(); + if (stringBuf != null) this.string = stringBuf.toString().intern(); else { in.unread(); // miss the trailing " - this.string = in.getString(); + this.string = in.getString().intern(); in.read(); } return STRING; @@ -281,13 +274,12 @@ class Lexer implements Tokens { public int _getToken() throws IOException { int c; - do { if ((c = in.read()) == '\n') break; } while (isWhiteSpace(c) || c == '\n'); + do { c = in.read(); } while (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\n' ); if (c == -1) return -1; if (c == '\\' || Character.isJavaIdentifierStart((char)c)) return getIdentifier(c); if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c); if (c == '"' || c == '\'') return getString(c); switch (c) { - case '\n': return EOL; case ';': return SEMI; case '[': return LB; case ']': return RB; @@ -304,19 +296,14 @@ class Lexer implements Tokens { case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND; case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ; case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE; - case '%': return in.match('=') ? MOD_ASSIGN : MOD; + case '%': return in.match('=') ? ASSIGN_MOD : MOD; case '~': return BITNOT; case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? INC : ADD; case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? DEC : SUB; case '*': return in.match('=') ? ASSIGN_MUL : MUL; - case '<': return !in.match('<') ? - (in.match('=') ? LE : LT) : - in.match('=') ? ASSIGN_LSH : LSH; - case '>': return !in.match('>') ? - (in.match('=') ? GE : GT) : - in.match('>') ? - (in.match('=') ? ASSIGN_URSH : URSH) : - (in.match('=') ? ASSIGN_RSH : RSH); + case '<': return !in.match('<') ? (in.match('=') ? LE : LT) : in.match('=') ? ASSIGN_LSH : LSH; + case '>': return !in.match('>') ? (in.match('=') ? GE : GT) : + in.match('>') ? (in.match('=') ? ASSIGN_URSH : URSH) : (in.match('=') ? ASSIGN_RSH : RSH); case '/': if (in.match('=')) return ASSIGN_DIV; if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); } @@ -324,14 +311,18 @@ class Lexer implements Tokens { while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) { if (c == '\n' || c != '/' || !in.match('*')) continue; if (in.match('/')) return getToken(); - throw new IOException("msg.nested.comment"); + throw new LexerException("nested comments are not permitted"); } - if (c == -1) throw new IOException("msg.unterminated.comment"); + if (c == -1) throw new LexerException("unterminated comment"); return getToken(); // `goto retry' - default: throw new IOException("illegal character: " + ((char)c)); + default: throw new LexerException("illegal character: \'" + ((char)c) + "\'"); } } + + // SmartReader //////////////////////////////////////////////////////////////// + + /** a Reader that tracks line numbers and can push back tokens */ private class SmartReader { PushbackReader reader = null; int lastread = -1; @@ -352,6 +343,7 @@ class Lexer implements Tokens { lastread = reader.read(); if (accumulator != null) accumulator.append((char)lastread); if (lastread != '\n' && lastread != '\r') col++; + if (lastread == '\n') { line++; col = 0; } return lastread; } @@ -369,12 +361,13 @@ class Lexer implements Tokens { } - // PushBack Stuff //////////////////////////////////////////////////////////// + // Token PushBack code //////////////////////////////////////////////////////////// - int pushBackDepth = 0; - int[] pushBackInts = new int[10]; - Object[] pushBackObjects = new Object[10]; + private int pushBackDepth = 0; + private int[] pushBackInts = new int[10]; + private Object[] pushBackObjects = new Object[10]; + /** push back a token */ public void pushBackToken(int op, Object obj) { if (pushBackDepth >= pushBackInts.length - 1) { int[] newInts = new int[pushBackInts.length * 2]; @@ -389,31 +382,31 @@ class Lexer implements Tokens { pushBackDepth++; } + /** push back the most recently read token */ public void pushBackToken() { pushBackToken(op, number != null ? (Object)number : (Object)string); } + /** read a token but leave it in the stream */ public int peekToken() throws IOException { int ret = getToken(); pushBackToken(); return ret; } + /** read a token */ public int getToken() throws IOException { number = null; string = null; - if (pushBackDepth > 0) { - pushBackDepth--; - op = pushBackInts[pushBackDepth]; - if (pushBackObjects[pushBackDepth] != null) { - number = pushBackObjects[pushBackDepth] instanceof Number ? (Number)pushBackObjects[pushBackDepth] : null; - string = pushBackObjects[pushBackDepth] instanceof String ? (String)pushBackObjects[pushBackDepth] : null; - } - } else { - do { - op = _getToken(); - if (op == EOL) { line++; col = 0; } - } while (op == EOL); + if (pushBackDepth == 0) return _getToken(); + pushBackDepth--; + op = pushBackInts[pushBackDepth]; + if (pushBackObjects[pushBackDepth] != null) { + number = pushBackObjects[pushBackDepth] instanceof Number ? (Number)pushBackObjects[pushBackDepth] : null; + string = pushBackObjects[pushBackDepth] instanceof String ? (String)pushBackObjects[pushBackDepth] : null; } return op; } + class LexerException extends IOException { + public LexerException(String s) { super(sourceName + ":" + line + "," + col + " " + s); } + } }