1 // Derived from org.mozilla.javascript.TokenStream [NPL]
4 * The contents of this file are subject to the Netscape Public
5 * License Version 1.1 (the "License"); you may not use this file
6 * except in compliance with the License. You may obtain a copy of
7 * the License at http://www.mozilla.org/NPL/
9 * Software distributed under the License is distributed on an "AS
10 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11 * implied. See the License for the specific language governing
12 * rights and limitations under the License.
14 * The Initial Developer of the Original Code is Netscape
15 * Communications Corporation.
17 * Contributor(s): Roger Lawrence, Mike McCabe
25 public static void main(String[] s) throws Exception {
26 Lexer l = new Lexer(new InputStreamReader(System.in));
28 while((tok = l.getToken()) != -1)
29 System.out.println(codeToString[tok]);
33 public Number number = null;
34 public String string = null;
39 private SmartReader in;
40 public String sourceName = "unknown";
42 public Lexer(Reader r) throws IOException { in = new SmartReader(r); }
45 // Token Constants //////////////////////////////////////////////////////////
47 public final static int
48 EOL = 1, // end of line
52 ASSIGN_BITOR = 5, // |=
54 ASSIGN_BITXOR= 7, // ^=
56 ASSIGN_BITAND= 9, // &=
64 ASSIGN_LSH = 17, // <<=
66 ASSIGN_RSH = 19, // >>=
68 ASSIGN_URSH = 21, // >>>=
70 ASSIGN_ADD = 23, // +=
72 ASSIGN_SUB = 25, // -=
74 ASSIGN_MUL = 27, // *=
76 ASSIGN_DIV = 29, // /=
78 ASSIGN_MOD = 31, // %=
80 ASSIGN_BITNOT= 33, // ~=
81 DELPROP = 34, // delete
82 TYPEOF = 35, // typeof
83 NAME = 36, // *** identifiers ***
84 NUMBER = 37, // *** numeric literals ***
85 STRING = 38, // *** string literals ***
94 INSTANCEOF = 47, // instanceof
112 FUNCTION = 65, // function
113 IF = 66, // if keyword
114 ELSE = 67, // else keyword
115 SWITCH = 68, // switch keyword
116 CASE = 69, // case keyword
117 DEFAULT = 70, // default keyword
118 WHILE = 71, // while keyword
119 DO = 72, // do keyword
120 FOR = 73, // for keyword
121 BREAK = 74, // break keyword
122 CONTINUE = 75, // continue keyword
123 VAR = 76, // var keyword
124 WITH = 77, // with keyword
125 CATCH = 78, // catch keyword
126 FINALLY = 79, // finally keyword
127 RESERVED = 80, // reserved keywords
129 VOID = 82, // void keyword
130 MOD_ASSIGN = 83, // %=
132 ASSERT = 85; // assert keyword
134 public static final int MAX_TOKEN = ASSERT;
136 public final static String[] codeToString = new String[] {
137 "0", "EOL", "RETURN", "GOTO", "BITOR", "ASSIGN_BITOR",
138 "BITXOR", "ASSIGN_BITXOR", "BITAND", "ASSIGN_BITAND", "EQ",
139 "NE", "LT", "LE", "GT", "GE", "LSH", "ASSIGN_LSH", "RSH",
140 "ASSIGN_RSH", "URSH", "ASSIGN_URSH", "ADD", "ASSIGN_ADD",
141 "SUB", "ASSIGN_SUB", "MUL", "ASSIGN_MUL", "DIV", "ASSIGN_DIV",
142 "MOD", "ASSIGN_MOD", "BITNOT", "ASSIGN_BITNOT=", "DELPROP",
143 "TYPEOF", "NAME", "NUMBER", "STRING", "NULL", "THIS", "FALSE",
144 "TRUE", "SHEQ", "SHNE", "THROW", "IN", "INSTANCEOF", "TRY",
145 "SEMI", "LB", "RB", "LC", "RC", "LP", "RP", "COMMA", "ASSIGN",
146 "HOOK", "COLON", "OR", "AND", "INC", "DEC", "DOT", "FUNCTION",
147 "IF", "ELSE", "SWITCH", "CASE", "DEFAULT", "WHILE", "DO",
148 "FOR", "BREAK", "CONTINUE", "VAR", "WITH", "CATCH", "FINALLY",
149 "RESERVED", "NOP", "VOID", "MOD_ASSIGN", "BANG", "ASSERT" };
152 // Predicates ///////////////////////////////////////////////////////////////////////
154 protected static boolean isJSIdentifier(String s) {
155 int length = s.length();
156 if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) return false;
157 for (int i=1; i<length; i++) {
158 char c = s.charAt(i);
159 if (!Character.isJavaIdentifierPart(c) && c == '\\' && !((i + 5) < length) &&
160 (s.charAt(i + 1) == 'u') && 0 <= xDigitToInt(s.charAt(i + 2)) && 0 <= xDigitToInt(s.charAt(i + 3)) &&
161 0 <= xDigitToInt(s.charAt(i + 4)) && 0 <= xDigitToInt(s.charAt(i + 5)))
167 private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
168 private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
169 private static boolean isLineBreak(int c) { return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); }
170 private static int xDigitToInt(int c) {
171 if ('0' <= c && c <= '9') return c - '0';
172 if ('a' <= c && c <= 'f') return c - ('a' - 10);
173 if ('A' <= c && c <= 'F') return c - ('A' - 10);
176 public static boolean isWhiteSpace(int c) {
177 if (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0') return true;
178 return Character.getType((char)c) == Character.SPACE_SEPARATOR;
182 // Token Subtype Handlers /////////////////////////////////////////////////////////
184 private int getKeyword(String s) throws IOException {
186 switch (s.length()) {
187 case 2: c=s.charAt(1);
188 if (c=='f') { if (s.charAt(0)=='i') return IF; }
189 else if (c=='n') { if (s.charAt(0)=='i') return IN; }
190 else if (c=='o') { if (s.charAt(0)=='d') return DO; }
192 case 3: switch (s.charAt(0)) {
193 case 'a': if (s.charAt(2)=='d' && s.charAt(1)=='n') return AND; break;
194 case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') return FOR; break;
195 case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') return RESERVED; break;
196 case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') throw new IOException("the new keyword is not permitted in XWT scripts");
197 case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') return TRY; break;
198 case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') return VAR; break;
200 case 4: switch (s.charAt(0)) {
201 case 'b': return s.equals("byte") ? RESERVED : -1;
202 case 'c': c=s.charAt(3);
203 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') return CASE; }
204 else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') return RESERVED; }
206 case 'e': c=s.charAt(3);
207 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') return ELSE; }
208 else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') return RESERVED; }
210 case 'g': return s.equals("goto") ? GOTO : -1;
211 case 'l': return s.equals("long") ? RESERVED : -1;
212 case 'n': return s.equals("null") ? NULL : -1;
213 case 't': c=s.charAt(3);
214 if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') return TRUE; }
215 else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') return THIS; }
217 case 'v': return s.equals("void") ? RESERVED : -1;
218 case 'w': return s.equals("with") ? WITH : -1;
220 case 5: switch (s.charAt(2)) {
221 case 'a': return s.equals("class") ? RESERVED : -1;
222 case 'e': return s.equals("break") ? BREAK : -1;
223 case 'i': return s.equals("while") ? WHILE : -1;
224 case 'l': return s.equals("false") ? FALSE : -1;
225 case 'n': c=s.charAt(0);
226 if (s.equals("const")) throw new IOException("the const keyword is not permitted in XWT");
227 else if (s.equals("final")) return RESERVED;
229 case 'o': c=s.charAt(0);
230 if (c == 'c') return s.equals("float") ? RESERVED : -1;
231 else if (c=='s') return s.equals("final") ? RESERVED : -1;
233 case 'p': return s.equals("super") ? RESERVED : -1;
234 case 'r': return s.equals("throw") ? THROW : -1;
235 case 't': return s.equals("catch") ? CATCH : -1;
237 case 6: switch (s.charAt(1)) {
238 case 'a': return s.equals("class") ? RESERVED : -1;
239 case 'e': c=s.charAt(0);
240 if (s.equals("delete")) throw new IOException("the delete keyword is not permitted in XWT scripts");
241 else if (c=='r') return s.equals("return") ? RETURN : -1;
243 case 'h': return s.equals("throws") ? RESERVED : -1;
244 case 'o': return s.equals("double") ? RESERVED : -1;
245 case 's': return s.equals("assert") ? ASSERT : -1;
246 case 'u': return s.equals("public") ? RESERVED : -1;
247 case 'w': return s.equals("switch") ? SWITCH : -1;
248 case 'y': return s.equals("typeof") ? TYPEOF : -1;
250 case 7: switch (s.charAt(1)) {
251 case 'a': return s.equals("package") ? RESERVED : -1;
252 case 'e': return s.equals("default") ? DEFAULT : -1;
253 case 'i': return s.equals("finally") ? FINALLY : -1;
254 case 'o': return s.equals("boolean") ? RESERVED : -1;
255 case 'r': return s.equals("private") ? RESERVED : -1;
256 case 'x': return s.equals("extends") ? RESERVED : -1;
258 case 8: switch (s.charAt(0)) {
259 case 'a': return s.equals("abstract") ? RESERVED : -1;
260 case 'c': return s.equals("continue") ? CONTINUE : -1;
261 case 'd': return s.equals("debugger") ? RESERVED : -1;
262 case 'f': return s.equals("function") ? FUNCTION : -1;
263 case 'v': return s.equals("volatile") ? RESERVED : -1;
265 case 9: c=s.charAt(0);
266 if (c=='i') return s.equals("interface") ? RESERVED : -1;
267 else if (c=='p') return s.equals("protected") ? RESERVED : -1;
268 else if (c=='t') return s.equals("transient") ? RESERVED : -1;
270 case 10: c=s.charAt(1);
271 if (c=='m') return s.equals("implements") ? RESERVED : -1;
272 else if (c=='n' && s.equals("instanceof")) throw new IOException("the instanceof keyword is not permitted in XWT scripts");
274 case 12: return s.equals("synchronized") ? RESERVED : -1;
279 private int getIdentifier(int c) throws IOException {
281 while (Character.isJavaIdentifierPart((char)(c = in.read())));
283 String str = in.getString();
284 int result = getKeyword(str);
285 if (result != -1) return result;
290 private int getNumber(int c) throws IOException {
293 double dval = Double.NaN;
295 boolean isInteger = true;
297 // figure out what base we're using
299 if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); }
300 else if (isDigit(c)) base = 8;
303 while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read();
304 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
306 if (c == '.') do { c = in.read(); } while (isDigit(c));
307 if (c == 'e' || c == 'E') {
309 if (c == '+' || c == '-') c = in.read();
310 if (!isDigit(c)) throw new IOException("msg.missing.exponent");
311 do { c = in.read(); } while (isDigit(c));
316 String numString = in.getString();
317 if (base == 10 && !isInteger) {
318 try { dval = (Double.valueOf(numString)).doubleValue(); }
319 catch (NumberFormatException ex) { throw new IOException("msg.caught.nfe"); }
322 longval = Long.parseLong(numString, base);
323 dval = (double)longval;
325 dval = Double.parseDouble(numString);
326 longval = (long) dval;
327 if (longval == dval) isInteger = true;
331 if (!isInteger) this.number = new Double(dval);
332 else if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE) this.number = new Byte((byte)longval);
333 else if (Short.MIN_VALUE <= longval && longval <= Short.MAX_VALUE) this.number = new Short((short)longval);
334 else if (Integer.MIN_VALUE <= longval && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval);
335 else this.number = new Double(longval);
339 private int getString(int c) throws IOException {
340 StringBuffer stringBuf = null;
344 in.startString(); // start after the first "
345 while(c != quoteChar) {
346 if (c == '\n' || c == -1) throw new IOException("msg.unterminated.string.lit");
348 if (stringBuf == null) {
349 in.unread(); // Don't include the backslash
350 stringBuf = new StringBuffer(in.getString());
353 switch (c = in.read()) {
354 case 'b': c = '\b'; break;
355 case 'f': c = '\f'; break;
356 case 'n': c = '\n'; break;
357 case 'r': c = '\r'; break;
358 case 't': c = '\t'; break;
359 case 'v': c = '\u000B'; break;
360 case '\\': c = '\\'; break;
363 for(int i=0; i<4; i++) {
365 if (!((ci >= '0' && ci <= '9') || (ci >= 'a' && ci <= 'f') || (ci >= 'A' && ci <= 'F')))
366 throw new IOException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence");
367 v = (v << 8) | Integer.parseInt(ci + "", 16);
373 // just use the character that was escaped
377 if (stringBuf != null) stringBuf.append((char) c);
380 if (stringBuf != null) this.string = stringBuf.toString();
382 in.unread(); // miss the trailing "
383 this.string = in.getString();
389 public int _getToken() throws IOException {
391 do { if ((c = in.read()) == '\n') break; } while (isWhiteSpace(c) || c == '\n');
392 if (c == -1) return -1;
393 if (c == '\\' || Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
394 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
395 if (c == '"' || c == '\'') return getString(c);
397 case '\n': return EOL;
398 case ';': return SEMI;
405 case ',': return COMMA;
406 case '?': return HOOK;
407 case ':': return COLON;
408 case '.': return DOT;
409 case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR);
410 case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR;
411 case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND;
412 case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ;
413 case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE;
414 case '%': return in.match('=') ? MOD_ASSIGN : MOD;
415 case '~': return BITNOT;
416 case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? INC : ADD;
417 case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? DEC : SUB;
418 case '*': return in.match('=') ? ASSIGN_MUL : MUL;
419 case '<': return !in.match('<') ?
420 (in.match('=') ? LE : LT) :
421 in.match('=') ? ASSIGN_LSH : LSH;
422 case '>': return !in.match('>') ?
423 (in.match('=') ? GE : GT) :
425 (in.match('=') ? ASSIGN_URSH : URSH) :
426 (in.match('=') ? ASSIGN_RSH : RSH);
428 if (in.match('=')) return ASSIGN_DIV;
429 if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
430 if (!in.match('*')) return DIV;
431 while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
432 if (c == '\n' || c != '/' || !in.match('*')) continue;
433 if (in.match('/')) return getToken();
434 throw new IOException("msg.nested.comment");
436 if (c == -1) throw new IOException("msg.unterminated.comment");
437 return getToken(); // `goto retry'
438 default: throw new IOException("illegal character: " + ((char)c));
442 private class SmartReader {
443 PushbackReader reader = null;
446 public SmartReader(Reader r) { reader = new PushbackReader(r); }
447 public void unread() throws IOException { unread((char)lastread); }
448 public void unread(char c) throws IOException {
450 if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
452 public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
453 public int peek() throws IOException {
454 int peeked = reader.read();
455 if (peeked != -1) reader.unread((char)peeked);
458 public int read() throws IOException {
459 lastread = reader.read();
460 if (accumulator != null) accumulator.append((char)lastread);
461 if (lastread != '\n' && lastread != '\r') col++;
465 // FEATURE: could be much more efficient
466 StringBuffer accumulator = null;
467 public void startString() {
468 accumulator = new StringBuffer();
469 accumulator.append((char)lastread);
471 public String getString() throws IOException {
472 String ret = accumulator.toString();
479 // PushBack Stuff ////////////////////////////////////////////////////////////
481 int pushBackDepth = 0;
482 int[] pushBackInts = new int[10];
483 Object[] pushBackObjects = new Object[10];
485 public void pushBackToken() {
486 if (pushBackDepth >= pushBackInts.length - 1) {
487 int[] newInts = new int[pushBackInts.length * 2];
488 System.arraycopy(pushBackInts, 0, newInts, 0, pushBackInts.length);
489 pushBackInts = newInts;
490 Object[] newObjects = new Object[pushBackObjects.length * 2];
491 System.arraycopy(pushBackObjects, 0, newObjects, 0, pushBackObjects.length);
492 pushBackObjects = newObjects;
494 pushBackInts[pushBackDepth] = op;
495 pushBackObjects[pushBackDepth] = number != null ? (Object)number : (Object)string;
499 public int peekToken() throws IOException {
500 int ret = getToken();
505 public int getToken() throws IOException {
508 if (pushBackDepth > 0) {
510 op = pushBackInts[pushBackDepth];
511 if (pushBackObjects[pushBackDepth] != null) {
512 number = pushBackObjects[pushBackDepth] instanceof Number ? (Number)pushBackObjects[pushBackDepth] : null;
513 string = pushBackObjects[pushBackDepth] instanceof String ? (String)pushBackObjects[pushBackDepth] : null;
518 if (op == EOL) { line++; col = 0; }