/** for debugging */
public static void main(String[] s) throws Exception {
- Lexer l = new Lexer(new InputStreamReader(System.in));
- int tok = 0;
- while((tok = l.getToken()) != -1) System.out.println(codeToString[tok]);
+ Lexer l = new Lexer(new InputStreamReader(System.in), "stdin", 0);
+ int tok = 0;
+ while((tok = l.getToken()) != -1) System.out.println(codeToString[tok]);
}
/** the token that was just parsed */
protected int op;
+
+ /** the most recently parsed token, <i>regardless of pushbacks</i> */
+ protected int mostRecentlyReadToken;
/** if the token just parsed was a NUMBER, this is the numeric value */
protected Number number = null;
/** if the token just parsed was a NAME or STRING, this is the string value */
protected String string = null;
- /** the line number of the current token */
- protected int line = 0;
+ /** the line number of the most recently <i>lexed</i> token */
+ private int line = 0;
+
+ /** the line number of the most recently <i>parsed</i> token */
+ protected int parserLine = 0;
/** the column number of the current token */
protected int col = 0;
/** the name of the source code file being lexed */
- protected String sourceName = "unknown";
+ protected String sourceName;
private SmartReader in;
- public Lexer(Reader r) throws IOException { in = new SmartReader(r); }
+ public Lexer(Reader r, String sourceName, int line) throws IOException {
+ this.sourceName = sourceName;
+ this.line = line;
+ this.parserLine = line;
+ in = new SmartReader(r);
+ }
// Predicates ///////////////////////////////////////////////////////////////////////
// Token Subtype Handlers /////////////////////////////////////////////////////////
private int getKeyword(String s) throws IOException {
- char c;
- switch (s.length()) {
+ char c;
+ switch (s.length()) {
case 2: c=s.charAt(1);
if (c=='f') { if (s.charAt(0)=='i') return IF; }
else if (c=='n') { if (s.charAt(0)=='i') return IN; }
case 'c': c=s.charAt(3);
if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') return CASE; }
else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') return RESERVED; }
- return -1;
+ return -1;
case 'e': c=s.charAt(3);
if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') return ELSE; }
else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') return RESERVED; }
- return -1;
+ return -1;
case 'g': return s.equals("goto") ? RESERVED : -1;
- case 'l': return s.equals("long") ? RESERVED : -1;
+ case 'l': return s.equals("long") ? RESERVED : -1;
case 'n': return s.equals("null") ? NULL : -1;
case 't': c=s.charAt(3);
if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') return TRUE; }
else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') return THIS; }
- return -1;
- case 'w': if (s.equals("with")) return RESERVED; else return -1;
- case 'v': if (s.equals("void")) return RESERVED; else return -1;
+ return -1;
+ case 'w': if (s.equals("with")) return RESERVED; else return -1;
+ case 'v': if (s.equals("void")) return RESERVED; else return -1;
} break;
case 5: switch (s.charAt(2)) {
case 'a': return s.equals("class") ? RESERVED : -1;
- case 'e': return s.equals("break") ? BREAK : -1;
+ case 'e': return s.equals("break") ? BREAK : -1;
case 'i': return s.equals("while") ? WHILE : -1;
- case 'l': return s.equals("false") ? FALSE : -1;
+ case 'l': return s.equals("false") ? FALSE : -1;
case 'n': c=s.charAt(0);
- if (s.equals("const")) return RESERVED;
+ if (s.equals("const")) return RESERVED;
else if (s.equals("final")) return RESERVED;
- return -1;
- case 'o': c=s.charAt(0);
- if (c == 'c') return s.equals("float") ? RESERVED : -1;
- else if (c=='s') return s.equals("final") ? RESERVED : -1;
- break;
- case 'p': return s.equals("super") ? RESERVED : -1;
+ return -1;
+ case 'o': c=s.charAt(0);
+ if (c == 'c') return s.equals("float") ? RESERVED : -1;
+ else if (c=='s') return s.equals("final") ? RESERVED : -1;
+ break;
+ case 'p': return s.equals("super") ? RESERVED : -1;
case 'r': return s.equals("throw") ? THROW : -1;
case 't': return s.equals("catch") ? CATCH : -1;
} break;
case 6: switch (s.charAt(1)) {
- case 'a': return s.equals("class") ? RESERVED : -1;
+ case 'a': return s.equals("class") ? RESERVED : -1;
case 'e': c=s.charAt(0);
if (s.equals("delete")) return RESERVED;
else if (c=='r') return s.equals("return") ? RETURN : -1;
break;
- case 'h': return s.equals("throws") ? RESERVED : -1;
+ case 'h': return s.equals("throws") ? RESERVED : -1;
case 'o': return s.equals("double") ? RESERVED : -1;
case 's': return s.equals("assert") ? ASSERT : -1;
case 'u': return s.equals("public") ? RESERVED : -1;
break;
case 12: return s.equals("synchronized") ? RESERVED : -1;
}
- return -1;
+ return -1;
}
private int getIdentifier(int c) throws IOException {
in.unread();
String str = in.getString();
int result = getKeyword(str);
- if (result == RESERVED) throw new LexerException("The reserved word \"" + str + "\" is not permitted in XWT scripts");
+ if (result == RESERVED) throw new LexerException("The reserved word \"" + str + "\" is not permitted in XWT scripts");
if (result != -1) return result;
this.string = str.intern();
return NAME;
if (isInteger) {
longval = Long.parseLong(numString, base);
dval = (double)longval;
- } else {
+ } else {
dval = Double.parseDouble(numString);
longval = (long) dval;
if (longval == dval) isInteger = true;
case 't': c = '\t'; break;
case 'v': c = '\u000B'; break;
case '\\': c = '\\'; break;
- case 'u': {
- int v = 0;
- for(int i=0; i<4; i++) {
- int ci = in.read();
- if (!((ci >= '0' && ci <= '9') || (ci >= 'a' && ci <= 'f') || (ci >= 'A' && ci <= 'F')))
- throw new LexerException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence");
- v = (v << 8) | Integer.parseInt(ci + "", 16);
- }
- c = (char)v;
- break;
- }
+ case 'u': {
+ int v = 0;
+ for(int i=0; i<4; i++) {
+ int ci = in.read();
+ if (!((ci >= '0' && ci <= '9') || (ci >= 'a' && ci <= 'f') || (ci >= 'A' && ci <= 'F')))
+ throw new LexerException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence");
+ v = (v << 8) | Integer.parseInt(ci + "", 16);
+ }
+ c = (char)v;
+ break;
+ }
default:
- // just use the character that was escaped
- break;
+ // just use the character that was escaped
+ break;
}
}
if (stringBuf != null) stringBuf.append((char) c);
return STRING;
}
- public int _getToken() throws IOException {
+ private int _getToken() throws IOException {
int c;
do { c = in.read(); } while (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\n' );
if (c == -1) return -1;
case '*': return in.match('=') ? ASSIGN_MUL : MUL;
case '<': return !in.match('<') ? (in.match('=') ? LE : LT) : in.match('=') ? ASSIGN_LSH : LSH;
case '>': return !in.match('>') ? (in.match('=') ? GE : GT) :
- in.match('>') ? (in.match('=') ? ASSIGN_URSH : URSH) : (in.match('=') ? ASSIGN_RSH : RSH);
+ in.match('>') ? (in.match('=') ? ASSIGN_URSH : URSH) : (in.match('=') ? ASSIGN_RSH : RSH);
case '/':
if (in.match('=')) return ASSIGN_DIV;
if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
public SmartReader(Reader r) { reader = new PushbackReader(r); }
public void unread() throws IOException { unread((char)lastread); }
public void unread(char c) throws IOException {
- reader.unread(c);
- if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
- }
+ reader.unread(c);
+ if(c == '\n') col = -1;
+ else col--;
+ if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
+ }
public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
public int peek() throws IOException {
int peeked = reader.read();
public int read() throws IOException {
lastread = reader.read();
if (accumulator != null) accumulator.append((char)lastread);
- if (lastread != '\n' && lastread != '\r') col++;
- if (lastread == '\n') { line++; col = 0; }
+ if (lastread != '\n' && lastread != '\r') col++;
+ if (lastread == '\n') {
+ // col is -1 if we just unread a newline, this is sort of ugly
+ if (col != -1) parserLine = ++line;
+ col = 0;
+ }
return lastread;
}
// FEATURE: could be much more efficient
StringBuffer accumulator = null;
public void startString() {
- accumulator = new StringBuffer();
- accumulator.append((char)lastread);
- }
+ accumulator = new StringBuffer();
+ accumulator.append((char)lastread);
+ }
public String getString() throws IOException {
String ret = accumulator.toString();
accumulator = null;
private Object[] pushBackObjects = new Object[10];
/** push back a token */
- public void pushBackToken(int op, Object obj) {
- if (pushBackDepth >= pushBackInts.length - 1) {
- int[] newInts = new int[pushBackInts.length * 2];
- System.arraycopy(pushBackInts, 0, newInts, 0, pushBackInts.length);
- pushBackInts = newInts;
- Object[] newObjects = new Object[pushBackObjects.length * 2];
- System.arraycopy(pushBackObjects, 0, newObjects, 0, pushBackObjects.length);
- pushBackObjects = newObjects;
- }
- pushBackInts[pushBackDepth] = op;
- pushBackObjects[pushBackDepth] = obj;
- pushBackDepth++;
+ public final void pushBackToken(int op, Object obj) {
+ if (pushBackDepth >= pushBackInts.length - 1) {
+ int[] newInts = new int[pushBackInts.length * 2];
+ System.arraycopy(pushBackInts, 0, newInts, 0, pushBackInts.length);
+ pushBackInts = newInts;
+ Object[] newObjects = new Object[pushBackObjects.length * 2];
+ System.arraycopy(pushBackObjects, 0, newObjects, 0, pushBackObjects.length);
+ pushBackObjects = newObjects;
+ }
+ pushBackInts[pushBackDepth] = op;
+ pushBackObjects[pushBackDepth] = obj;
+ pushBackDepth++;
}
/** push back the most recently read token */
- public void pushBackToken() { pushBackToken(op, number != null ? (Object)number : (Object)string); }
+ public final void pushBackToken() { pushBackToken(op, number != null ? (Object)number : (Object)string); }
/** read a token but leave it in the stream */
- public int peekToken() throws IOException {
- int ret = getToken();
- pushBackToken();
- return ret;
+ public final int peekToken() throws IOException {
+ int ret = getToken();
+ pushBackToken();
+ return ret;
}
/** read a token */
- public int getToken() throws IOException {
- number = null;
- string = null;
- if (pushBackDepth == 0) return _getToken();
- pushBackDepth--;
- op = pushBackInts[pushBackDepth];
- if (pushBackObjects[pushBackDepth] != null) {
- number = pushBackObjects[pushBackDepth] instanceof Number ? (Number)pushBackObjects[pushBackDepth] : null;
- string = pushBackObjects[pushBackDepth] instanceof String ? (String)pushBackObjects[pushBackDepth] : null;
- }
- return op;
+ public final int getToken() throws IOException {
+ number = null;
+ string = null;
+ if (pushBackDepth == 0) {
+ mostRecentlyReadToken = op;
+ return op = _getToken();
+ }
+ pushBackDepth--;
+ op = pushBackInts[pushBackDepth];
+ if (pushBackObjects[pushBackDepth] != null) {
+ number = pushBackObjects[pushBackDepth] instanceof Number ? (Number)pushBackObjects[pushBackDepth] : null;
+ string = pushBackObjects[pushBackDepth] instanceof String ? (String)pushBackObjects[pushBackDepth] : null;
+ }
+ return op;
}
class LexerException extends IOException {
- public LexerException(String s) { super(sourceName + ":" + line + "," + col + ": " + s); }
+ public LexerException(String s) { super(sourceName + ":" + line + "," + col + ": " + s); }
}
}