1 // Derived from org.mozilla.javascript.TokenStream [NPL]
4 * The contents of this file are subject to the Netscape Public
5 * License Version 1.1 (the "License"); you may not use this file
6 * except in compliance with the License. You may obtain a copy of
7 * the License at http://www.mozilla.org/NPL/
9 * Software distributed under the License is distributed on an "AS
10 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11 * implied. See the License for the specific language governing
12 * rights and limitations under the License.
14 * The Initial Developer of the Original Code is Netscape
15 * Communications Corporation.
17 * Contributor(s): Roger Lawrence, Mike McCabe
20 // FIXME: mark lots of these methods 'final' so they get inlined
25 /** Lexes a stream of characters into a stream of Tokens */
26 class Lexer implements Tokens {
29 public static void main(String[] s) throws Exception {
30 Lexer l = new Lexer(new InputStreamReader(System.in), "stdin", 0);
32 while((tok = l.getToken()) != -1) System.out.println(codeToString[tok]);
35 /** the token that was just parsed */
38 /** the most recently parsed token, <i>regardless of pushbacks</i> */
39 protected int mostRecentlyReadToken;
41 /** if the token just parsed was a NUMBER, this is the numeric value */
42 protected Number number = null;
44 /** if the token just parsed was a NAME or STRING, this is the string value */
45 protected String string = null;
47 /** the line number of the most recently <i>lexed</i> token */
50 /** the line number of the most recently <i>parsed</i> token */
51 protected int parserLine = 0;
53 /** the column number of the current token */
54 protected int col = 0;
56 /** the name of the source code file being lexed */
57 protected String sourceName;
59 private SmartReader in;
60 public Lexer(Reader r, String sourceName, int line) throws IOException {
61 this.sourceName = sourceName;
63 this.parserLine = line;
64 in = new SmartReader(r);
68 // Predicates ///////////////////////////////////////////////////////////////////////
70 private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
71 private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
72 private static int xDigitToInt(int c) {
73 if ('0' <= c && c <= '9') return c - '0';
74 else if ('a' <= c && c <= 'f') return c - ('a' - 10);
75 else if ('A' <= c && c <= 'F') return c - ('A' - 10);
80 // Token Subtype Handlers /////////////////////////////////////////////////////////
82 private int getKeyword(String s) throws IOException {
85 case 2: c=s.charAt(1);
86 if (c=='f') { if (s.charAt(0)=='i') return IF; }
87 else if (c=='n') { if (s.charAt(0)=='i') return IN; }
88 else if (c=='o') { if (s.charAt(0)=='d') return DO; }
90 case 3: switch (s.charAt(0)) {
91 case 'a': if (s.charAt(2)=='d' && s.charAt(1)=='n') return AND; break;
92 case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') return FOR; break;
93 case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') return RESERVED;
94 case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') return RESERVED;
95 case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') return TRY; break;
96 case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') return VAR; break;
98 case 4: switch (s.charAt(0)) {
99 case 'b': return s.equals("byte") ? RESERVED : -1;
100 case 'c': c=s.charAt(3);
101 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') return CASE; }
102 else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') return RESERVED; }
104 case 'e': c=s.charAt(3);
105 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') return ELSE; }
106 else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') return RESERVED; }
108 case 'g': return s.equals("goto") ? RESERVED : -1;
109 case 'l': return s.equals("long") ? RESERVED : -1;
110 case 'n': return s.equals("null") ? NULL : -1;
111 case 't': c=s.charAt(3);
112 if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') return TRUE; }
113 else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') return THIS; }
115 case 'w': if (s.equals("with")) return RESERVED; else return -1;
116 case 'v': if (s.equals("void")) return RESERVED; else return -1;
118 case 5: switch (s.charAt(2)) {
119 case 'a': return s.equals("class") ? RESERVED : -1;
120 case 'e': return s.equals("break") ? BREAK : -1;
121 case 'i': return s.equals("while") ? WHILE : -1;
122 case 'l': return s.equals("false") ? FALSE : -1;
123 case 'n': c=s.charAt(0);
124 if (s.equals("const")) return RESERVED;
125 else if (s.equals("final")) return RESERVED;
127 case 'o': c=s.charAt(0);
128 if (c == 'c') return s.equals("float") ? RESERVED : -1;
129 else if (c=='s') return s.equals("final") ? RESERVED : -1;
131 case 'p': return s.equals("super") ? RESERVED : -1;
132 case 'r': return s.equals("throw") ? THROW : -1;
133 case 't': return s.equals("catch") ? CATCH : -1;
135 case 6: switch (s.charAt(1)) {
136 case 'a': return s.equals("class") ? RESERVED : -1;
137 case 'e': c=s.charAt(0);
138 if (s.equals("delete")) return RESERVED;
139 else if (c=='r') return s.equals("return") ? RETURN : -1;
141 case 'h': return s.equals("throws") ? RESERVED : -1;
142 case 'o': return s.equals("double") ? RESERVED : -1;
143 case 's': return s.equals("assert") ? ASSERT : -1;
144 case 'u': return s.equals("public") ? RESERVED : -1;
145 case 'w': return s.equals("switch") ? SWITCH : -1;
146 case 'y': return s.equals("typeof") ? TYPEOF : -1;
148 case 7: switch (s.charAt(1)) {
149 case 'a': return s.equals("package") ? RESERVED : -1;
150 case 'e': return s.equals("default") ? DEFAULT : -1;
151 case 'i': return s.equals("finally") ? FINALLY : -1;
152 case 'o': return s.equals("boolean") ? RESERVED : -1;
153 case 'r': return s.equals("private") ? RESERVED : -1;
154 case 'x': return s.equals("extends") ? RESERVED : -1;
156 case 8: switch (s.charAt(0)) {
157 case 'a': return s.equals("abstract") ? RESERVED : -1;
158 case 'c': return s.equals("continue") ? CONTINUE : -1;
159 case 'd': return s.equals("debugger") ? RESERVED : -1;
160 case 'f': return s.equals("function") ? FUNCTION : -1;
161 case 'v': return s.equals("volatile") ? RESERVED : -1;
163 case 9: c=s.charAt(0);
164 if (c=='i') return s.equals("interface") ? RESERVED : -1;
165 else if (c=='p') return s.equals("protected") ? RESERVED : -1;
166 else if (c=='t') return s.equals("transient") ? RESERVED : -1;
168 case 10: c=s.charAt(1);
169 if (c=='m') return s.equals("implements") ? RESERVED : -1;
170 else if (c=='n' && s.equals("instanceof")) return RESERVED;
172 case 12: return s.equals("synchronized") ? RESERVED : -1;
177 private int getIdentifier(int c) throws IOException {
179 while (Character.isJavaIdentifierPart((char)(c = in.read())));
181 String str = in.getString();
182 int result = getKeyword(str);
183 if (result == RESERVED) throw new LexerException("The reserved word \"" + str + "\" is not permitted in XWT scripts");
184 if (result != -1) return result;
185 this.string = str.intern();
189 private int getNumber(int c) throws IOException {
192 double dval = Double.NaN;
194 boolean isInteger = true;
196 // figure out what base we're using
198 if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); }
199 else if (isDigit(c)) base = 8;
202 while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read();
203 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
205 if (c == '.') do { c = in.read(); } while (isDigit(c));
206 if (c == 'e' || c == 'E') {
208 if (c == '+' || c == '-') c = in.read();
209 if (!isDigit(c)) throw new LexerException("float listeral did not have an exponent value");
210 do { c = in.read(); } while (isDigit(c));
215 String numString = in.getString();
216 if (base == 10 && !isInteger) {
217 try { dval = (Double.valueOf(numString)).doubleValue(); }
218 catch (NumberFormatException ex) { throw new LexerException("invalid numeric literal: \"" + numString + "\""); }
221 longval = Long.parseLong(numString, base);
222 dval = (double)longval;
224 dval = Double.parseDouble(numString);
225 longval = (long) dval;
226 if (longval == dval) isInteger = true;
230 if (!isInteger) this.number = new Double(dval);
231 else if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE) this.number = new Byte((byte)longval);
232 else if (Short.MIN_VALUE <= longval && longval <= Short.MAX_VALUE) this.number = new Short((short)longval);
233 else if (Integer.MIN_VALUE <= longval && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval);
234 else this.number = new Double(longval);
238 private int getString(int c) throws IOException {
239 StringBuffer stringBuf = null;
243 in.startString(); // start after the first "
244 while(c != quoteChar) {
245 if (c == '\n' || c == -1) throw new LexerException("unterminated string literal");
247 if (stringBuf == null) {
248 in.unread(); // Don't include the backslash
249 stringBuf = new StringBuffer(in.getString());
252 switch (c = in.read()) {
253 case 'b': c = '\b'; break;
254 case 'f': c = '\f'; break;
255 case 'n': c = '\n'; break;
256 case 'r': c = '\r'; break;
257 case 't': c = '\t'; break;
258 case 'v': c = '\u000B'; break;
259 case '\\': c = '\\'; break;
262 for(int i=0; i<4; i++) {
264 if (!((ci >= '0' && ci <= '9') || (ci >= 'a' && ci <= 'f') || (ci >= 'A' && ci <= 'F')))
265 throw new LexerException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence");
266 v = (v << 8) | Integer.parseInt(ci + "", 16);
272 // just use the character that was escaped
276 if (stringBuf != null) stringBuf.append((char) c);
279 if (stringBuf != null) this.string = stringBuf.toString().intern();
281 in.unread(); // miss the trailing "
282 this.string = in.getString().intern();
288 public int _getToken() throws IOException {
290 do { c = in.read(); } while (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\n' );
291 if (c == -1) return -1;
292 if (c == '\\' || Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
293 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
294 if (c == '"' || c == '\'') return getString(c);
296 case ';': return SEMI;
303 case ',': return COMMA;
304 case '?': return HOOK;
305 case ':': return COLON;
306 case '.': return DOT;
307 case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR);
308 case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR;
309 case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND;
310 case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ;
311 case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE;
312 case '%': return in.match('=') ? ASSIGN_MOD : MOD;
313 case '~': return BITNOT;
314 case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? INC : ADD;
315 case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? DEC : SUB;
316 case '*': return in.match('=') ? ASSIGN_MUL : MUL;
317 case '<': return !in.match('<') ? (in.match('=') ? LE : LT) : in.match('=') ? ASSIGN_LSH : LSH;
318 case '>': return !in.match('>') ? (in.match('=') ? GE : GT) :
319 in.match('>') ? (in.match('=') ? ASSIGN_URSH : URSH) : (in.match('=') ? ASSIGN_RSH : RSH);
321 if (in.match('=')) return ASSIGN_DIV;
322 if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
323 if (!in.match('*')) return DIV;
324 while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
325 if (c == '\n' || c != '/' || !in.match('*')) continue;
326 if (in.match('/')) return getToken();
327 throw new LexerException("nested comments are not permitted");
329 if (c == -1) throw new LexerException("unterminated comment");
330 return getToken(); // `goto retry'
331 default: throw new LexerException("illegal character: \'" + ((char)c) + "\'");
336 // SmartReader ////////////////////////////////////////////////////////////////
338 /** a Reader that tracks line numbers and can push back tokens */
339 private class SmartReader {
340 PushbackReader reader = null;
343 public SmartReader(Reader r) { reader = new PushbackReader(r); }
344 public void unread() throws IOException { unread((char)lastread); }
345 public void unread(char c) throws IOException {
347 if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
349 public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
350 public int peek() throws IOException {
351 int peeked = reader.read();
352 if (peeked != -1) reader.unread((char)peeked);
355 public int read() throws IOException {
356 lastread = reader.read();
357 if (accumulator != null) accumulator.append((char)lastread);
358 if (lastread != '\n' && lastread != '\r') col++;
359 if (lastread == '\n') { parserLine = ++line; col = 0; }
363 // FEATURE: could be much more efficient
364 StringBuffer accumulator = null;
365 public void startString() {
366 accumulator = new StringBuffer();
367 accumulator.append((char)lastread);
369 public String getString() throws IOException {
370 String ret = accumulator.toString();
377 // Token PushBack code ////////////////////////////////////////////////////////////
379 private int pushBackDepth = 0;
380 private int[] pushBackInts = new int[10];
381 private Object[] pushBackObjects = new Object[10];
383 /** push back a token */
384 public void pushBackToken(int op, Object obj) {
385 if (pushBackDepth >= pushBackInts.length - 1) {
386 int[] newInts = new int[pushBackInts.length * 2];
387 System.arraycopy(pushBackInts, 0, newInts, 0, pushBackInts.length);
388 pushBackInts = newInts;
389 Object[] newObjects = new Object[pushBackObjects.length * 2];
390 System.arraycopy(pushBackObjects, 0, newObjects, 0, pushBackObjects.length);
391 pushBackObjects = newObjects;
393 pushBackInts[pushBackDepth] = op;
394 pushBackObjects[pushBackDepth] = obj;
398 /** push back the most recently read token */
399 public void pushBackToken() { pushBackToken(op, number != null ? (Object)number : (Object)string); }
401 /** read a token but leave it in the stream */
402 public int peekToken() throws IOException {
403 int ret = getToken();
409 public int getToken() throws IOException {
412 if (pushBackDepth == 0) {
413 mostRecentlyReadToken = op;
414 return op = _getToken();
417 op = pushBackInts[pushBackDepth];
418 if (pushBackObjects[pushBackDepth] != null) {
419 number = pushBackObjects[pushBackDepth] instanceof Number ? (Number)pushBackObjects[pushBackDepth] : null;
420 string = pushBackObjects[pushBackDepth] instanceof String ? (String)pushBackObjects[pushBackDepth] : null;
425 class LexerException extends IOException {
426 public LexerException(String s) { super(sourceName + ":" + line + "," + col + ": " + s); }