-// This file was derived from org.mozilla.javascript.TokenStream; it
-// is covered by the NPL 1.1.
+// Derived from org.mozilla.javascript.TokenStream [NPL]
/**
* The contents of this file are subject to the Netscape Public
package org.xwt.js;
import java.io.*;
-class Lexer {
+/** Lexes a stream of characters into a stream of Tokens */
+class Lexer implements Tokens {
+ /** for debugging */
public static void main(String[] s) throws Exception {
- Lexer l = new Lexer(new InputStreamReader(System.in));
- int tok = 0;
- while((tok = l.getToken()) != -1)
- System.out.println(codeToString[tok]);
+ Lexer l = new Lexer(new InputStreamReader(System.in), "stdin", 0);
+ int tok = 0;
+ while((tok = l.getToken()) != -1) System.out.println(codeToString[tok]);
}
- private SmartReader in;
- private boolean pushedBack = false;
-
- private int op;
- public Number number;
- public String string;
-
- public Lexer(Reader r) throws IOException { in = new SmartReader(r); }
- public int peekToken() throws IOException { int ret = getToken(); pushBackToken(); return ret; }
- public void pushBackToken() { if (pushedBack) throw new Error("can't push back twice"); pushedBack = true; }
-
- // Token Constants //////////////////////////////////////////////////////////
-
- public final static int
- EOL = 1, // end of line
- RETURN = 2, // return
- GOTO = 3, // goto
- BITOR = 4, // |
- ASSIGN_BITOR = 5, // |=
- BITXOR = 6, // ^
- ASSIGN_BITXOR= 7, // ^=
- BITAND = 8, // &
- ASSIGN_BITAND= 9, // &=
- EQ = 10, // ==
- NE = 11, // !=
- LT = 12, // <
- LE = 13, // <=
- GT = 14, // >
- GE = 15, // >=
- LSH = 16, // <<
- ASSIGN_LSH = 17, // <<=
- RSH = 18, // >>
- ASSIGN_RSH = 19, // >>=
- URSH = 20, // >>>
- ASSIGN_URSH = 21, // >>>=
- ADD = 22, // +
- ASSIGN_ADD = 23, // +=
- SUB = 24, // -
- ASSIGN_SUB = 25, // -=
- MUL = 26, // *
- ASSIGN_MUL = 27, // *=
- DIV = 28, // /
- ASSIGN_DIV = 29, // /=
- MOD = 30, // %
- ASSIGN_MOD = 31, // %=
- BITNOT = 32, // ~
- ASSIGN_BITNOT= 33, // ~=
- DELPROP = 34, // delete
- TYPEOF = 35, // typeof
- NAME = 36, // *** identifiers ***
- NUMBER = 37, // *** numeric literals ***
- STRING = 38, // *** string literals ***
- NULL = 39, // null
- THIS = 40, // this
- FALSE = 41, // false
- TRUE = 42, // true
- SHEQ = 43, // ===
- SHNE = 44, // !==
- THROW = 45, // throw
- IN = 46, // in
- INSTANCEOF = 47, // instanceof
- TRY = 48, // try
- SEMI = 49, // ;
- LB = 50, // [
- RB = 51, // ]
- LC = 52, // {
- RC = 53, // }
- LP = 54, // (
- RP = 55, // )
- COMMA = 56, // ,
- ASSIGN = 57, // =
- HOOK = 58, // ?
- COLON = 59, // :
- OR = 60, // ||
- AND = 61, // &&
- INC = 62, // ++
- DEC = 63, // --
- DOT = 64, // .
- FUNCTION = 65, // function
- IF = 66, // if keyword
- ELSE = 67, // else keyword
- SWITCH = 68, // switch keyword
- CASE = 69, // case keyword
- DEFAULT = 70, // default keyword
- WHILE = 71, // while keyword
- DO = 72, // do keyword
- FOR = 73, // for keyword
- BREAK = 74, // break keyword
- CONTINUE = 75, // continue keyword
- VAR = 76, // var keyword
- WITH = 77, // with keyword
- CATCH = 78, // catch keyword
- FINALLY = 79, // finally keyword
- RESERVED = 80, // reserved keywords
- NOP = 81, // NOP
- VOID = 82, // void keyword
- MOD_ASSIGN = 83, // %=
- BANG = 84, // %=
- ASSERT = 85; // assert keyword
-
- public static final int MAX_TOKEN = ASSERT;
-
- public final static String[] codeToString = new String[] {
- "0", "EOL", "RETURN", "GOTO", "BITOR", "ASSIGN_BITOR",
- "BITXOR", "ASSIGN_BITXOR", "BITAND", "ASSIGN_BITAND", "EQ",
- "NE", "LT", "LE", "GT", "GE", "LSH", "ASSIGN_LSH", "RSH",
- "ASSIGN_RSH", "URSH", "ASSIGN_URSH", "ADD", "ASSIGN_ADD",
- "SUB", "ASSIGN_SUB", "MUL", "ASSIGN_MUL", "DIV", "ASSIGN_DIV",
- "MOD", "ASSIGN_MOD", "BITNOT", "ASSIGN_BITNOT=", "DELPROP",
- "TYPEOF", "NAME", "NUMBER", "STRING", "NULL", "THIS", "FALSE",
- "TRUE", "SHEQ", "SHNE", "THROW", "IN", "INSTANCEOF", "TRY",
- "SEMI", "LB", "RB", "LC", "RC", "LP", "RP", "COMMA", "ASSIGN",
- "HOOK", "COLON", "OR", "AND", "INC", "DEC", "DOT", "FUNCTION",
- "IF", "ELSE", "SWITCH", "CASE", "DEFAULT", "WHILE", "DO",
- "FOR", "BREAK", "CONTINUE", "VAR", "WITH", "CATCH", "FINALLY",
- "RESERVED", "NOP", "VOID", "MOD_ASSIGN", "BANG", "ASSERT" };
+ /** the token that was just parsed */
+ protected int op;
+
+ /** the most recently parsed token, <i>regardless of pushbacks</i> */
+ protected int mostRecentlyReadToken;
+ /** if the token just parsed was a NUMBER, this is the numeric value */
+ protected Number number = null;
- // Predicates ///////////////////////////////////////////////////////////////////////
+ /** if the token just parsed was a NAME or STRING, this is the string value */
+ protected String string = null;
- protected static boolean isJSIdentifier(String s) {
- int length = s.length();
- if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) return false;
- for (int i=1; i<length; i++) {
- char c = s.charAt(i);
- if (!Character.isJavaIdentifierPart(c) && c == '\\' && !((i + 5) < length) &&
- (s.charAt(i + 1) == 'u') && 0 <= xDigitToInt(s.charAt(i + 2)) && 0 <= xDigitToInt(s.charAt(i + 3)) &&
- 0 <= xDigitToInt(s.charAt(i + 4)) && 0 <= xDigitToInt(s.charAt(i + 5)))
- return false;
- }
- return true;
+ /** the line number of the most recently <i>lexed</i> token */
+ private int line = 0;
+
+ /** the line number of the most recently <i>parsed</i> token */
+ protected int parserLine = 0;
+
+ /** the column number of the current token */
+ protected int col = 0;
+
+ /** the name of the source code file being lexed */
+ protected String sourceName;
+
+ private SmartReader in;
+ public Lexer(Reader r, String sourceName, int line) throws IOException {
+ this.sourceName = sourceName;
+ this.line = line;
+ this.parserLine = line;
+ in = new SmartReader(r);
}
+
+ // Predicates ///////////////////////////////////////////////////////////////////////
+
private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
- private static boolean isLineBreak(int c) { return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); }
private static int xDigitToInt(int c) {
if ('0' <= c && c <= '9') return c - '0';
- if ('a' <= c && c <= 'f') return c - ('a' - 10);
- if ('A' <= c && c <= 'F') return c - ('A' - 10);
- return -1;
- }
- public static boolean isWhiteSpace(int c) {
- if (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0') return true;
- return Character.getType((char)c) == Character.SPACE_SEPARATOR;
+ else if ('a' <= c && c <= 'f') return c - ('a' - 10);
+ else if ('A' <= c && c <= 'F') return c - ('A' - 10);
+ else return -1;
}
// Token Subtype Handlers /////////////////////////////////////////////////////////
private int getKeyword(String s) throws IOException {
- char c;
- switch (s.length()) {
+ char c;
+ switch (s.length()) {
case 2: c=s.charAt(1);
if (c=='f') { if (s.charAt(0)=='i') return IF; }
else if (c=='n') { if (s.charAt(0)=='i') return IN; }
else if (c=='o') { if (s.charAt(0)=='d') return DO; }
break;
case 3: switch (s.charAt(0)) {
+ case 'a': if (s.charAt(2)=='d' && s.charAt(1)=='n') return AND; break;
case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') return FOR; break;
- case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') return RESERVED; break;
- case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') throw new IOException("the new keyword is not permitted in XWT scripts");
+ case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') return RESERVED;
+ case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') return RESERVED;
case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') return TRY; break;
case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') return VAR; break;
} break;
case 'c': c=s.charAt(3);
if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') return CASE; }
else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') return RESERVED; }
- return -1;
+ return -1;
case 'e': c=s.charAt(3);
if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') return ELSE; }
else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') return RESERVED; }
- return -1;
- case 'g': return s.equals("goto") ? GOTO : -1;
- case 'l': return s.equals("long") ? RESERVED : -1;
+ return -1;
+ case 'g': return s.equals("goto") ? RESERVED : -1;
+ case 'l': return s.equals("long") ? RESERVED : -1;
case 'n': return s.equals("null") ? NULL : -1;
case 't': c=s.charAt(3);
if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') return TRUE; }
else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') return THIS; }
- return -1;
- case 'v': return s.equals("void") ? RESERVED : -1;
- case 'w': return s.equals("with") ? WITH : -1;
+ return -1;
+ case 'w': if (s.equals("with")) return RESERVED; else return -1;
+ case 'v': if (s.equals("void")) return RESERVED; else return -1;
} break;
case 5: switch (s.charAt(2)) {
case 'a': return s.equals("class") ? RESERVED : -1;
- case 'e': return s.equals("break") ? BREAK : -1;
+ case 'e': return s.equals("break") ? BREAK : -1;
case 'i': return s.equals("while") ? WHILE : -1;
- case 'l': return s.equals("false") ? FALSE : -1;
+ case 'l': return s.equals("false") ? FALSE : -1;
case 'n': c=s.charAt(0);
- if (s.equals("const")) throw new IOException("the const keyword is not permitted in XWT");
+ if (s.equals("const")) return RESERVED;
else if (s.equals("final")) return RESERVED;
- return -1;
- case 'o': c=s.charAt(0);
- if (c == 'c') return s.equals("float") ? RESERVED : -1;
- else if (c=='s') return s.equals("final") ? RESERVED : -1;
- break;
- case 'p': return s.equals("super") ? RESERVED : -1;
+ return -1;
+ case 'o': c=s.charAt(0);
+ if (c == 'c') return s.equals("float") ? RESERVED : -1;
+ else if (c=='s') return s.equals("final") ? RESERVED : -1;
+ break;
+ case 'p': return s.equals("super") ? RESERVED : -1;
case 'r': return s.equals("throw") ? THROW : -1;
case 't': return s.equals("catch") ? CATCH : -1;
} break;
case 6: switch (s.charAt(1)) {
- case 'a': return s.equals("class") ? RESERVED : -1;
+ case 'a': return s.equals("class") ? RESERVED : -1;
case 'e': c=s.charAt(0);
- if (s.equals("delete")) throw new IOException("the delete keyword is not permitted in XWT scripts");
+ if (s.equals("delete")) return RESERVED;
else if (c=='r') return s.equals("return") ? RETURN : -1;
break;
- case 'h': return s.equals("throws") ? RESERVED : -1;
+ case 'h': return s.equals("throws") ? RESERVED : -1;
case 'o': return s.equals("double") ? RESERVED : -1;
case 's': return s.equals("assert") ? ASSERT : -1;
case 'u': return s.equals("public") ? RESERVED : -1;
break;
case 10: c=s.charAt(1);
if (c=='m') return s.equals("implements") ? RESERVED : -1;
- else if (c=='n' && s.equals("instanceof")) throw new IOException("the instanceof keyword is not permitted in XWT scripts");
+ else if (c=='n' && s.equals("instanceof")) return RESERVED;
break;
case 12: return s.equals("synchronized") ? RESERVED : -1;
}
- return -1;
+ return -1;
}
private int getIdentifier(int c) throws IOException {
in.unread();
String str = in.getString();
int result = getKeyword(str);
+ if (result == RESERVED) throw new LexerException("The reserved word \"" + str + "\" is not permitted in XWT scripts");
if (result != -1) return result;
- this.string = str;
+ this.string = str.intern();
return NAME;
}
if (c == 'e' || c == 'E') {
c = in.read();
if (c == '+' || c == '-') c = in.read();
- if (!isDigit(c)) throw new IOException("msg.missing.exponent");
+ if (!isDigit(c)) throw new LexerException("float listeral did not have an exponent value");
do { c = in.read(); } while (isDigit(c));
}
}
String numString = in.getString();
if (base == 10 && !isInteger) {
try { dval = (Double.valueOf(numString)).doubleValue(); }
- catch (NumberFormatException ex) { throw new IOException("msg.caught.nfe"); }
+ catch (NumberFormatException ex) { throw new LexerException("invalid numeric literal: \"" + numString + "\""); }
} else {
if (isInteger) {
longval = Long.parseLong(numString, base);
dval = (double)longval;
- } else {
- // FIXME: we're not handling hex/octal fractions... does that matter?
+ } else {
dval = Double.parseDouble(numString);
longval = (long) dval;
if (longval == dval) isInteger = true;
c = in.read();
in.startString(); // start after the first "
while(c != quoteChar) {
- if (c == '\n' || c == -1) throw new IOException("msg.unterminated.string.lit");
+ if (c == '\n' || c == -1) throw new LexerException("unterminated string literal");
if (c == '\\') {
if (stringBuf == null) {
in.unread(); // Don't include the backslash
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\u000B'; break;
- default: throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities");
+ case '\\': c = '\\'; break;
+ case 'u': {
+ int v = 0;
+ for(int i=0; i<4; i++) {
+ int ci = in.read();
+ if (!((ci >= '0' && ci <= '9') || (ci >= 'a' && ci <= 'f') || (ci >= 'A' && ci <= 'F')))
+ throw new LexerException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence");
+ v = (v << 8) | Integer.parseInt(ci + "", 16);
+ }
+ c = (char)v;
+ break;
+ }
+ default:
+ // just use the character that was escaped
+ break;
}
}
if (stringBuf != null) stringBuf.append((char) c);
c = in.read();
}
- if (stringBuf != null) this.string = stringBuf.toString();
+ if (stringBuf != null) this.string = stringBuf.toString().intern();
else {
in.unread(); // miss the trailing "
- this.string = in.getString();
+ this.string = in.getString().intern();
in.read();
}
return STRING;
}
- public int getToken() throws IOException {
- if (pushedBack) { pushedBack = false; return op; }
- return (op = _getToken());
- }
-
- public int _getToken() throws IOException {
+ private int _getToken() throws IOException {
int c;
- do { if ((c = in.read()) == '\n') break; } while (isWhiteSpace(c) || c == '\n');
+ do { c = in.read(); } while (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\n' );
if (c == -1) return -1;
- if (c == '\\' && in.peek() == 'u') throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities");
- if (Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
+ if (c == '\\' || Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
if (c == '"' || c == '\'') return getString(c);
switch (c) {
- case '\n': return EOL;
case ';': return SEMI;
case '[': return LB;
case ']': return RB;
case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND;
case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ;
case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE;
- case '%': return in.match('=') ? MOD_ASSIGN : MOD;
+ case '%': return in.match('=') ? ASSIGN_MOD : MOD;
case '~': return BITNOT;
case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? INC : ADD;
case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? DEC : SUB;
case '*': return in.match('=') ? ASSIGN_MUL : MUL;
- case '<': return !in.match('<') ?
- (in.match('=') ? LE : LT) :
- in.match('=') ? ASSIGN_LSH : LSH;
- case '>': return !in.match('>') ?
- (in.match('=') ? GE : GT) :
- in.match('>') ?
- (in.match('=') ? ASSIGN_URSH : URSH) :
- (in.match('=') ? ASSIGN_RSH : RSH);
+ case '<': return !in.match('<') ? (in.match('=') ? LE : LT) : in.match('=') ? ASSIGN_LSH : LSH;
+ case '>': return !in.match('>') ? (in.match('=') ? GE : GT) :
+ in.match('>') ? (in.match('=') ? ASSIGN_URSH : URSH) : (in.match('=') ? ASSIGN_RSH : RSH);
case '/':
if (in.match('=')) return ASSIGN_DIV;
if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
if (c == '\n' || c != '/' || !in.match('*')) continue;
if (in.match('/')) return getToken();
- throw new IOException("msg.nested.comment");
+ throw new LexerException("nested comments are not permitted");
}
- if (c == -1) throw new IOException("msg.unterminated.comment");
+ if (c == -1) throw new LexerException("unterminated comment");
return getToken(); // `goto retry'
- default: throw new IOException("illegal character: " + c);
+ default: throw new LexerException("illegal character: \'" + ((char)c) + "\'");
}
}
- private static class SmartReader {
+
+ // SmartReader ////////////////////////////////////////////////////////////////
+
+ /** a Reader that tracks line numbers and can push back tokens */
+ private class SmartReader {
PushbackReader reader = null;
int lastread = -1;
public SmartReader(Reader r) { reader = new PushbackReader(r); }
- public void unread() throws IOException {
- reader.unread(lastread);
- if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
- }
+ public void unread() throws IOException { unread((char)lastread); }
+ public void unread(char c) throws IOException {
+ reader.unread(c);
+ if(c == '\n') col = -1;
+ else col--;
+ if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
+ }
public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
public int peek() throws IOException {
int peeked = reader.read();
public int read() throws IOException {
lastread = reader.read();
if (accumulator != null) accumulator.append((char)lastread);
+ if (lastread != '\n' && lastread != '\r') col++;
+ if (lastread == '\n') {
+ // col is -1 if we just unread a newline, this is sort of ugly
+ if (col != -1) parserLine = ++line;
+ col = 0;
+ }
return lastread;
}
- // FIXME: could be much more efficient
+ // FEATURE: could be much more efficient
StringBuffer accumulator = null;
public void startString() {
- accumulator = new StringBuffer();
- accumulator.append((char)lastread);
- }
+ accumulator = new StringBuffer();
+ accumulator.append((char)lastread);
+ }
public String getString() throws IOException {
String ret = accumulator.toString();
accumulator = null;
}
}
+
+ // Token PushBack code ////////////////////////////////////////////////////////////
+
+ private int pushBackDepth = 0;
+ private int[] pushBackInts = new int[10];
+ private Object[] pushBackObjects = new Object[10];
+
+ /** push back a token */
+ public final void pushBackToken(int op, Object obj) {
+ if (pushBackDepth >= pushBackInts.length - 1) {
+ int[] newInts = new int[pushBackInts.length * 2];
+ System.arraycopy(pushBackInts, 0, newInts, 0, pushBackInts.length);
+ pushBackInts = newInts;
+ Object[] newObjects = new Object[pushBackObjects.length * 2];
+ System.arraycopy(pushBackObjects, 0, newObjects, 0, pushBackObjects.length);
+ pushBackObjects = newObjects;
+ }
+ pushBackInts[pushBackDepth] = op;
+ pushBackObjects[pushBackDepth] = obj;
+ pushBackDepth++;
+ }
+
+ /** push back the most recently read token */
+ public final void pushBackToken() { pushBackToken(op, number != null ? (Object)number : (Object)string); }
+
+ /** read a token but leave it in the stream */
+ public final int peekToken() throws IOException {
+ int ret = getToken();
+ pushBackToken();
+ return ret;
+ }
+
+ /** read a token */
+ public final int getToken() throws IOException {
+ number = null;
+ string = null;
+ if (pushBackDepth == 0) {
+ mostRecentlyReadToken = op;
+ return op = _getToken();
+ }
+ pushBackDepth--;
+ op = pushBackInts[pushBackDepth];
+ if (pushBackObjects[pushBackDepth] != null) {
+ number = pushBackObjects[pushBackDepth] instanceof Number ? (Number)pushBackObjects[pushBackDepth] : null;
+ string = pushBackObjects[pushBackDepth] instanceof String ? (String)pushBackObjects[pushBackDepth] : null;
+ }
+ return op;
+ }
+
+ class LexerException extends IOException {
+ public LexerException(String s) { super(sourceName + ":" + line + "," + col + ": " + s); }
+ }
}