1 // Derived from org.mozilla.javascript.TokenStream [NPL]
4 * The contents of this file are subject to the Netscape Public
5 * License Version 1.1 (the "License"); you may not use this file
6 * except in compliance with the License. You may obtain a copy of
7 * the License at http://www.mozilla.org/NPL/
9 * Software distributed under the License is distributed on an "AS
10 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11 * implied. See the License for the specific language governing
12 * rights and limitations under the License.
14 * The Initial Developer of the Original Code is Netscape
15 * Communications Corporation.
17 * Contributor(s): Roger Lawrence, Mike McCabe
25 public static void main(String[] s) throws Exception {
26 Lexer l = new Lexer(new InputStreamReader(System.in));
28 while((tok = l.getToken()) != -1)
29 System.out.println(codeToString[tok]);
32 private SmartReader in;
33 private boolean pushedBack = false;
39 public String sourceName = "unknown";
43 public Lexer(Reader r) throws IOException { in = new SmartReader(r); }
44 public int peekToken() throws IOException { int ret = getToken(); pushBackToken(); return ret; }
45 public void pushBackToken() { pushBackToken(op); }
46 public void pushBackToken(int i) { if (pushedBack) throw new Error("can't push back twice"); pushedBack = true; op = i; }
48 // Token Constants //////////////////////////////////////////////////////////
50 public final static int
51 EOL = 1, // end of line
55 ASSIGN_BITOR = 5, // |=
57 ASSIGN_BITXOR= 7, // ^=
59 ASSIGN_BITAND= 9, // &=
67 ASSIGN_LSH = 17, // <<=
69 ASSIGN_RSH = 19, // >>=
71 ASSIGN_URSH = 21, // >>>=
73 ASSIGN_ADD = 23, // +=
75 ASSIGN_SUB = 25, // -=
77 ASSIGN_MUL = 27, // *=
79 ASSIGN_DIV = 29, // /=
81 ASSIGN_MOD = 31, // %=
83 ASSIGN_BITNOT= 33, // ~=
84 DELPROP = 34, // delete
85 TYPEOF = 35, // typeof
86 NAME = 36, // *** identifiers ***
87 NUMBER = 37, // *** numeric literals ***
88 STRING = 38, // *** string literals ***
97 INSTANCEOF = 47, // instanceof
115 FUNCTION = 65, // function
116 IF = 66, // if keyword
117 ELSE = 67, // else keyword
118 SWITCH = 68, // switch keyword
119 CASE = 69, // case keyword
120 DEFAULT = 70, // default keyword
121 WHILE = 71, // while keyword
122 DO = 72, // do keyword
123 FOR = 73, // for keyword
124 BREAK = 74, // break keyword
125 CONTINUE = 75, // continue keyword
126 VAR = 76, // var keyword
127 WITH = 77, // with keyword
128 CATCH = 78, // catch keyword
129 FINALLY = 79, // finally keyword
130 RESERVED = 80, // reserved keywords
132 VOID = 82, // void keyword
133 MOD_ASSIGN = 83, // %=
135 ASSERT = 85; // assert keyword
137 public static final int MAX_TOKEN = ASSERT;
139 public final static String[] codeToString = new String[] {
140 "0", "EOL", "RETURN", "GOTO", "BITOR", "ASSIGN_BITOR",
141 "BITXOR", "ASSIGN_BITXOR", "BITAND", "ASSIGN_BITAND", "EQ",
142 "NE", "LT", "LE", "GT", "GE", "LSH", "ASSIGN_LSH", "RSH",
143 "ASSIGN_RSH", "URSH", "ASSIGN_URSH", "ADD", "ASSIGN_ADD",
144 "SUB", "ASSIGN_SUB", "MUL", "ASSIGN_MUL", "DIV", "ASSIGN_DIV",
145 "MOD", "ASSIGN_MOD", "BITNOT", "ASSIGN_BITNOT=", "DELPROP",
146 "TYPEOF", "NAME", "NUMBER", "STRING", "NULL", "THIS", "FALSE",
147 "TRUE", "SHEQ", "SHNE", "THROW", "IN", "INSTANCEOF", "TRY",
148 "SEMI", "LB", "RB", "LC", "RC", "LP", "RP", "COMMA", "ASSIGN",
149 "HOOK", "COLON", "OR", "AND", "INC", "DEC", "DOT", "FUNCTION",
150 "IF", "ELSE", "SWITCH", "CASE", "DEFAULT", "WHILE", "DO",
151 "FOR", "BREAK", "CONTINUE", "VAR", "WITH", "CATCH", "FINALLY",
152 "RESERVED", "NOP", "VOID", "MOD_ASSIGN", "BANG", "ASSERT" };
155 // Predicates ///////////////////////////////////////////////////////////////////////
157 protected static boolean isJSIdentifier(String s) {
158 int length = s.length();
159 if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) return false;
160 for (int i=1; i<length; i++) {
161 char c = s.charAt(i);
162 if (!Character.isJavaIdentifierPart(c) && c == '\\' && !((i + 5) < length) &&
163 (s.charAt(i + 1) == 'u') && 0 <= xDigitToInt(s.charAt(i + 2)) && 0 <= xDigitToInt(s.charAt(i + 3)) &&
164 0 <= xDigitToInt(s.charAt(i + 4)) && 0 <= xDigitToInt(s.charAt(i + 5)))
170 private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
171 private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
172 private static boolean isLineBreak(int c) { return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); }
173 private static int xDigitToInt(int c) {
174 if ('0' <= c && c <= '9') return c - '0';
175 if ('a' <= c && c <= 'f') return c - ('a' - 10);
176 if ('A' <= c && c <= 'F') return c - ('A' - 10);
179 public static boolean isWhiteSpace(int c) {
180 if (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0') return true;
181 return Character.getType((char)c) == Character.SPACE_SEPARATOR;
185 // Token Subtype Handlers /////////////////////////////////////////////////////////
187 private int getKeyword(String s) throws IOException {
189 switch (s.length()) {
190 case 2: c=s.charAt(1);
191 if (c=='f') { if (s.charAt(0)=='i') return IF; }
192 else if (c=='n') { if (s.charAt(0)=='i') return IN; }
193 else if (c=='o') { if (s.charAt(0)=='d') return DO; }
195 case 3: switch (s.charAt(0)) {
196 case 'a': if (s.charAt(2)=='d' && s.charAt(1)=='n') return AND; break;
197 case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') return FOR; break;
198 case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') return RESERVED; break;
199 case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') throw new IOException("the new keyword is not permitted in XWT scripts");
200 case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') return TRY; break;
201 case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') return VAR; break;
203 case 4: switch (s.charAt(0)) {
204 case 'b': return s.equals("byte") ? RESERVED : -1;
205 case 'c': c=s.charAt(3);
206 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') return CASE; }
207 else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') return RESERVED; }
209 case 'e': c=s.charAt(3);
210 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') return ELSE; }
211 else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') return RESERVED; }
213 case 'g': return s.equals("goto") ? GOTO : -1;
214 case 'l': return s.equals("long") ? RESERVED : -1;
215 case 'n': return s.equals("null") ? NULL : -1;
216 case 't': c=s.charAt(3);
217 if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') return TRUE; }
218 else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') return THIS; }
220 case 'v': return s.equals("void") ? RESERVED : -1;
221 case 'w': return s.equals("with") ? WITH : -1;
223 case 5: switch (s.charAt(2)) {
224 case 'a': return s.equals("class") ? RESERVED : -1;
225 case 'e': return s.equals("break") ? BREAK : -1;
226 case 'i': return s.equals("while") ? WHILE : -1;
227 case 'l': return s.equals("false") ? FALSE : -1;
228 case 'n': c=s.charAt(0);
229 if (s.equals("const")) throw new IOException("the const keyword is not permitted in XWT");
230 else if (s.equals("final")) return RESERVED;
232 case 'o': c=s.charAt(0);
233 if (c == 'c') return s.equals("float") ? RESERVED : -1;
234 else if (c=='s') return s.equals("final") ? RESERVED : -1;
236 case 'p': return s.equals("super") ? RESERVED : -1;
237 case 'r': return s.equals("throw") ? THROW : -1;
238 case 't': return s.equals("catch") ? CATCH : -1;
240 case 6: switch (s.charAt(1)) {
241 case 'a': return s.equals("class") ? RESERVED : -1;
242 case 'e': c=s.charAt(0);
243 if (s.equals("delete")) throw new IOException("the delete keyword is not permitted in XWT scripts");
244 else if (c=='r') return s.equals("return") ? RETURN : -1;
246 case 'h': return s.equals("throws") ? RESERVED : -1;
247 case 'o': return s.equals("double") ? RESERVED : -1;
248 case 's': return s.equals("assert") ? ASSERT : -1;
249 case 'u': return s.equals("public") ? RESERVED : -1;
250 case 'w': return s.equals("switch") ? SWITCH : -1;
251 case 'y': return s.equals("typeof") ? TYPEOF : -1;
253 case 7: switch (s.charAt(1)) {
254 case 'a': return s.equals("package") ? RESERVED : -1;
255 case 'e': return s.equals("default") ? DEFAULT : -1;
256 case 'i': return s.equals("finally") ? FINALLY : -1;
257 case 'o': return s.equals("boolean") ? RESERVED : -1;
258 case 'r': return s.equals("private") ? RESERVED : -1;
259 case 'x': return s.equals("extends") ? RESERVED : -1;
261 case 8: switch (s.charAt(0)) {
262 case 'a': return s.equals("abstract") ? RESERVED : -1;
263 case 'c': return s.equals("continue") ? CONTINUE : -1;
264 case 'd': return s.equals("debugger") ? RESERVED : -1;
265 case 'f': return s.equals("function") ? FUNCTION : -1;
266 case 'v': return s.equals("volatile") ? RESERVED : -1;
268 case 9: c=s.charAt(0);
269 if (c=='i') return s.equals("interface") ? RESERVED : -1;
270 else if (c=='p') return s.equals("protected") ? RESERVED : -1;
271 else if (c=='t') return s.equals("transient") ? RESERVED : -1;
273 case 10: c=s.charAt(1);
274 if (c=='m') return s.equals("implements") ? RESERVED : -1;
275 else if (c=='n' && s.equals("instanceof")) throw new IOException("the instanceof keyword is not permitted in XWT scripts");
277 case 12: return s.equals("synchronized") ? RESERVED : -1;
282 private int getIdentifier(int c) throws IOException {
284 while (Character.isJavaIdentifierPart((char)(c = in.read())));
286 String str = in.getString();
287 int result = getKeyword(str);
288 if (result != -1) return result;
293 private int getNumber(int c) throws IOException {
296 double dval = Double.NaN;
298 boolean isInteger = true;
300 // figure out what base we're using
302 if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); }
303 else if (isDigit(c)) base = 8;
306 while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read();
307 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
309 if (c == '.') do { c = in.read(); } while (isDigit(c));
310 if (c == 'e' || c == 'E') {
312 if (c == '+' || c == '-') c = in.read();
313 if (!isDigit(c)) throw new IOException("msg.missing.exponent");
314 do { c = in.read(); } while (isDigit(c));
319 String numString = in.getString();
320 if (base == 10 && !isInteger) {
321 try { dval = (Double.valueOf(numString)).doubleValue(); }
322 catch (NumberFormatException ex) { throw new IOException("msg.caught.nfe"); }
325 longval = Long.parseLong(numString, base);
326 dval = (double)longval;
328 dval = Double.parseDouble(numString);
329 longval = (long) dval;
330 if (longval == dval) isInteger = true;
334 if (!isInteger) this.number = new Double(dval);
335 else if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE) this.number = new Byte((byte)longval);
336 else if (Short.MIN_VALUE <= longval && longval <= Short.MAX_VALUE) this.number = new Short((short)longval);
337 else if (Integer.MIN_VALUE <= longval && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval);
338 else this.number = new Double(longval);
342 private int getString(int c) throws IOException {
343 StringBuffer stringBuf = null;
347 in.startString(); // start after the first "
348 while(c != quoteChar) {
349 if (c == '\n' || c == -1) throw new IOException("msg.unterminated.string.lit");
351 if (stringBuf == null) {
352 in.unread(); // Don't include the backslash
353 stringBuf = new StringBuffer(in.getString());
356 switch (c = in.read()) {
357 case 'b': c = '\b'; break;
358 case 'f': c = '\f'; break;
359 case 'n': c = '\n'; break;
360 case 'r': c = '\r'; break;
361 case 't': c = '\t'; break;
362 case 'v': c = '\u000B'; break;
363 case '\\': c = '\\'; break;
366 for(int i=0; i<4; i++) {
368 if (!((ci >= '0' && ci <= '9') || (ci >= 'a' && ci <= 'f') || (ci >= 'A' && ci <= 'F')))
369 throw new IOException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence");
370 v = (v << 8) | Integer.parseInt(ci + "", 16);
376 // just use the character that was escaped
380 if (stringBuf != null) stringBuf.append((char) c);
383 if (stringBuf != null) this.string = stringBuf.toString();
385 in.unread(); // miss the trailing "
386 this.string = in.getString();
392 public int getToken() throws IOException {
393 if (pushedBack) { pushedBack = false; return op; }
395 if (op != EOL) twoBack = op;
397 if (op == EOL) { line++; col = 0; }
402 public int _getToken() throws IOException {
404 do { if ((c = in.read()) == '\n') break; } while (isWhiteSpace(c) || c == '\n');
405 if (c == -1) return -1;
406 if (c == '\\' || Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
407 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
408 if (c == '"' || c == '\'') return getString(c);
410 case '\n': return EOL;
411 case ';': return SEMI;
418 case ',': return COMMA;
419 case '?': return HOOK;
420 case ':': return COLON;
421 case '.': return DOT;
422 case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR);
423 case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR;
424 case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND;
425 case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ;
426 case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE;
427 case '%': return in.match('=') ? MOD_ASSIGN : MOD;
428 case '~': return BITNOT;
429 case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? INC : ADD;
430 case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? DEC : SUB;
431 case '*': return in.match('=') ? ASSIGN_MUL : MUL;
432 case '<': return !in.match('<') ?
433 (in.match('=') ? LE : LT) :
434 in.match('=') ? ASSIGN_LSH : LSH;
435 case '>': return !in.match('>') ?
436 (in.match('=') ? GE : GT) :
438 (in.match('=') ? ASSIGN_URSH : URSH) :
439 (in.match('=') ? ASSIGN_RSH : RSH);
441 if (in.match('=')) return ASSIGN_DIV;
442 if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
443 if (!in.match('*')) return DIV;
444 while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
445 if (c == '\n' || c != '/' || !in.match('*')) continue;
446 if (in.match('/')) return getToken();
447 throw new IOException("msg.nested.comment");
449 if (c == -1) throw new IOException("msg.unterminated.comment");
450 return getToken(); // `goto retry'
451 default: throw new IOException("illegal character: " + ((char)c));
455 private class SmartReader {
456 PushbackReader reader = null;
459 public SmartReader(Reader r) { reader = new PushbackReader(r); }
460 public void unread() throws IOException { unread((char)lastread); }
461 public void unread(char c) throws IOException {
463 if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
465 public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
466 public int peek() throws IOException {
467 int peeked = reader.read();
468 if (peeked != -1) reader.unread((char)peeked);
471 public int read() throws IOException {
472 lastread = reader.read();
473 if (accumulator != null) accumulator.append((char)lastread);
474 if (lastread != '\n' && lastread != '\r') col++;
478 // FEATURE: could be much more efficient
479 StringBuffer accumulator = null;
480 public void startString() {
481 accumulator = new StringBuffer();
482 accumulator.append((char)lastread);
484 public String getString() throws IOException {
485 String ret = accumulator.toString();