1 // Derived from org.mozilla.javascript.TokenStream [NPL]
4 * The contents of this file are subject to the Netscape Public
5 * License Version 1.1 (the "License"); you may not use this file
6 * except in compliance with the License. You may obtain a copy of
7 * the License at http://www.mozilla.org/NPL/
9 * Software distributed under the License is distributed on an "AS
10 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11 * implied. See the License for the specific language governing
12 * rights and limitations under the License.
14 * The Initial Developer of the Original Code is Netscape
15 * Communications Corporation.
17 * Contributor(s): Roger Lawrence, Mike McCabe
25 public static void main(String[] s) throws Exception {
26 Lexer l = new Lexer(new InputStreamReader(System.in));
28 while((tok = l.getToken()) != -1)
29 System.out.println(codeToString[tok]);
32 private SmartReader in;
33 private boolean pushedBack = false;
40 public Lexer(Reader r) throws IOException { in = new SmartReader(r); }
41 public int peekToken() throws IOException { int ret = getToken(); pushBackToken(); return ret; }
42 public void pushBackToken() { pushBackToken(op); }
43 public void pushBackToken(int i) { if (pushedBack) throw new Error("can't push back twice"); pushedBack = true; op = i; }
45 // Token Constants //////////////////////////////////////////////////////////
47 public final static int
48 EOL = 1, // end of line
52 ASSIGN_BITOR = 5, // |=
54 ASSIGN_BITXOR= 7, // ^=
56 ASSIGN_BITAND= 9, // &=
64 ASSIGN_LSH = 17, // <<=
66 ASSIGN_RSH = 19, // >>=
68 ASSIGN_URSH = 21, // >>>=
70 ASSIGN_ADD = 23, // +=
72 ASSIGN_SUB = 25, // -=
74 ASSIGN_MUL = 27, // *=
76 ASSIGN_DIV = 29, // /=
78 ASSIGN_MOD = 31, // %=
80 ASSIGN_BITNOT= 33, // ~=
81 DELPROP = 34, // delete
82 TYPEOF = 35, // typeof
83 NAME = 36, // *** identifiers ***
84 NUMBER = 37, // *** numeric literals ***
85 STRING = 38, // *** string literals ***
94 INSTANCEOF = 47, // instanceof
112 FUNCTION = 65, // function
113 IF = 66, // if keyword
114 ELSE = 67, // else keyword
115 SWITCH = 68, // switch keyword
116 CASE = 69, // case keyword
117 DEFAULT = 70, // default keyword
118 WHILE = 71, // while keyword
119 DO = 72, // do keyword
120 FOR = 73, // for keyword
121 BREAK = 74, // break keyword
122 CONTINUE = 75, // continue keyword
123 VAR = 76, // var keyword
124 WITH = 77, // with keyword
125 CATCH = 78, // catch keyword
126 FINALLY = 79, // finally keyword
127 RESERVED = 80, // reserved keywords
129 VOID = 82, // void keyword
130 MOD_ASSIGN = 83, // %=
132 ASSERT = 85; // assert keyword
134 public static final int MAX_TOKEN = ASSERT;
136 public final static String[] codeToString = new String[] {
137 "0", "EOL", "RETURN", "GOTO", "BITOR", "ASSIGN_BITOR",
138 "BITXOR", "ASSIGN_BITXOR", "BITAND", "ASSIGN_BITAND", "EQ",
139 "NE", "LT", "LE", "GT", "GE", "LSH", "ASSIGN_LSH", "RSH",
140 "ASSIGN_RSH", "URSH", "ASSIGN_URSH", "ADD", "ASSIGN_ADD",
141 "SUB", "ASSIGN_SUB", "MUL", "ASSIGN_MUL", "DIV", "ASSIGN_DIV",
142 "MOD", "ASSIGN_MOD", "BITNOT", "ASSIGN_BITNOT=", "DELPROP",
143 "TYPEOF", "NAME", "NUMBER", "STRING", "NULL", "THIS", "FALSE",
144 "TRUE", "SHEQ", "SHNE", "THROW", "IN", "INSTANCEOF", "TRY",
145 "SEMI", "LB", "RB", "LC", "RC", "LP", "RP", "COMMA", "ASSIGN",
146 "HOOK", "COLON", "OR", "AND", "INC", "DEC", "DOT", "FUNCTION",
147 "IF", "ELSE", "SWITCH", "CASE", "DEFAULT", "WHILE", "DO",
148 "FOR", "BREAK", "CONTINUE", "VAR", "WITH", "CATCH", "FINALLY",
149 "RESERVED", "NOP", "VOID", "MOD_ASSIGN", "BANG", "ASSERT" };
152 // Predicates ///////////////////////////////////////////////////////////////////////
154 protected static boolean isJSIdentifier(String s) {
155 int length = s.length();
156 if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) return false;
157 for (int i=1; i<length; i++) {
158 char c = s.charAt(i);
159 if (!Character.isJavaIdentifierPart(c) && c == '\\' && !((i + 5) < length) &&
160 (s.charAt(i + 1) == 'u') && 0 <= xDigitToInt(s.charAt(i + 2)) && 0 <= xDigitToInt(s.charAt(i + 3)) &&
161 0 <= xDigitToInt(s.charAt(i + 4)) && 0 <= xDigitToInt(s.charAt(i + 5)))
167 private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
168 private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
169 private static boolean isLineBreak(int c) { return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); }
170 private static int xDigitToInt(int c) {
171 if ('0' <= c && c <= '9') return c - '0';
172 if ('a' <= c && c <= 'f') return c - ('a' - 10);
173 if ('A' <= c && c <= 'F') return c - ('A' - 10);
176 public static boolean isWhiteSpace(int c) {
177 if (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0') return true;
178 return Character.getType((char)c) == Character.SPACE_SEPARATOR;
182 // Token Subtype Handlers /////////////////////////////////////////////////////////
184 private int getKeyword(String s) throws IOException {
186 switch (s.length()) {
187 case 2: c=s.charAt(1);
188 if (c=='f') { if (s.charAt(0)=='i') return IF; }
189 else if (c=='n') { if (s.charAt(0)=='i') return IN; }
190 else if (c=='o') { if (s.charAt(0)=='d') return DO; }
192 case 3: switch (s.charAt(0)) {
193 case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') return FOR; break;
194 case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') return RESERVED; break;
195 case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') throw new IOException("the new keyword is not permitted in XWT scripts");
196 case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') return TRY; break;
197 case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') return VAR; break;
199 case 4: switch (s.charAt(0)) {
200 case 'b': return s.equals("byte") ? RESERVED : -1;
201 case 'c': c=s.charAt(3);
202 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') return CASE; }
203 else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') return RESERVED; }
205 case 'e': c=s.charAt(3);
206 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') return ELSE; }
207 else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') return RESERVED; }
209 case 'g': return s.equals("goto") ? GOTO : -1;
210 case 'l': return s.equals("long") ? RESERVED : -1;
211 case 'n': return s.equals("null") ? NULL : -1;
212 case 't': c=s.charAt(3);
213 if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') return TRUE; }
214 else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') return THIS; }
216 case 'v': return s.equals("void") ? RESERVED : -1;
217 case 'w': return s.equals("with") ? WITH : -1;
219 case 5: switch (s.charAt(2)) {
220 case 'a': return s.equals("class") ? RESERVED : -1;
221 case 'e': return s.equals("break") ? BREAK : -1;
222 case 'i': return s.equals("while") ? WHILE : -1;
223 case 'l': return s.equals("false") ? FALSE : -1;
224 case 'n': c=s.charAt(0);
225 if (s.equals("const")) throw new IOException("the const keyword is not permitted in XWT");
226 else if (s.equals("final")) return RESERVED;
228 case 'o': c=s.charAt(0);
229 if (c == 'c') return s.equals("float") ? RESERVED : -1;
230 else if (c=='s') return s.equals("final") ? RESERVED : -1;
232 case 'p': return s.equals("super") ? RESERVED : -1;
233 case 'r': return s.equals("throw") ? THROW : -1;
234 case 't': return s.equals("catch") ? CATCH : -1;
236 case 6: switch (s.charAt(1)) {
237 case 'a': return s.equals("class") ? RESERVED : -1;
238 case 'e': c=s.charAt(0);
239 if (s.equals("delete")) throw new IOException("the delete keyword is not permitted in XWT scripts");
240 else if (c=='r') return s.equals("return") ? RETURN : -1;
242 case 'h': return s.equals("throws") ? RESERVED : -1;
243 case 'o': return s.equals("double") ? RESERVED : -1;
244 case 's': return s.equals("assert") ? ASSERT : -1;
245 case 'u': return s.equals("public") ? RESERVED : -1;
246 case 'w': return s.equals("switch") ? SWITCH : -1;
247 case 'y': return s.equals("typeof") ? TYPEOF : -1;
249 case 7: switch (s.charAt(1)) {
250 case 'a': return s.equals("package") ? RESERVED : -1;
251 case 'e': return s.equals("default") ? DEFAULT : -1;
252 case 'i': return s.equals("finally") ? FINALLY : -1;
253 case 'o': return s.equals("boolean") ? RESERVED : -1;
254 case 'r': return s.equals("private") ? RESERVED : -1;
255 case 'x': return s.equals("extends") ? RESERVED : -1;
257 case 8: switch (s.charAt(0)) {
258 case 'a': return s.equals("abstract") ? RESERVED : -1;
259 case 'c': return s.equals("continue") ? CONTINUE : -1;
260 case 'd': return s.equals("debugger") ? RESERVED : -1;
261 case 'f': return s.equals("function") ? FUNCTION : -1;
262 case 'v': return s.equals("volatile") ? RESERVED : -1;
264 case 9: c=s.charAt(0);
265 if (c=='i') return s.equals("interface") ? RESERVED : -1;
266 else if (c=='p') return s.equals("protected") ? RESERVED : -1;
267 else if (c=='t') return s.equals("transient") ? RESERVED : -1;
269 case 10: c=s.charAt(1);
270 if (c=='m') return s.equals("implements") ? RESERVED : -1;
271 else if (c=='n' && s.equals("instanceof")) throw new IOException("the instanceof keyword is not permitted in XWT scripts");
273 case 12: return s.equals("synchronized") ? RESERVED : -1;
278 private int getIdentifier(int c) throws IOException {
280 while (Character.isJavaIdentifierPart((char)(c = in.read())));
282 String str = in.getString();
283 int result = getKeyword(str);
284 if (result != -1) return result;
289 private int getNumber(int c) throws IOException {
292 double dval = Double.NaN;
294 boolean isInteger = true;
296 // figure out what base we're using
298 if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); }
299 else if (isDigit(c)) base = 8;
302 while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read();
303 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
305 if (c == '.') do { c = in.read(); } while (isDigit(c));
306 if (c == 'e' || c == 'E') {
308 if (c == '+' || c == '-') c = in.read();
309 if (!isDigit(c)) throw new IOException("msg.missing.exponent");
310 do { c = in.read(); } while (isDigit(c));
315 String numString = in.getString();
316 if (base == 10 && !isInteger) {
317 try { dval = (Double.valueOf(numString)).doubleValue(); }
318 catch (NumberFormatException ex) { throw new IOException("msg.caught.nfe"); }
321 longval = Long.parseLong(numString, base);
322 dval = (double)longval;
324 // FIXME: we're not handling hex/octal fractions... does that matter?
325 dval = Double.parseDouble(numString);
326 longval = (long) dval;
327 if (longval == dval) isInteger = true;
331 if (!isInteger) this.number = new Double(dval);
332 else if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE) this.number = new Byte((byte)longval);
333 else if (Short.MIN_VALUE <= longval && longval <= Short.MAX_VALUE) this.number = new Short((short)longval);
334 else if (Integer.MIN_VALUE <= longval && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval);
335 else this.number = new Double(longval);
339 private int getString(int c) throws IOException {
340 StringBuffer stringBuf = null;
344 in.startString(); // start after the first "
345 while(c != quoteChar) {
346 if (c == '\n' || c == -1) throw new IOException("msg.unterminated.string.lit");
348 if (stringBuf == null) {
349 in.unread(); // Don't include the backslash
350 stringBuf = new StringBuffer(in.getString());
353 switch (c = in.read()) {
354 case 'b': c = '\b'; break;
355 case 'f': c = '\f'; break;
356 case 'n': c = '\n'; break;
357 case 'r': c = '\r'; break;
358 case 't': c = '\t'; break;
359 case 'v': c = '\u000B'; break;
360 default: throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities");
363 if (stringBuf != null) stringBuf.append((char) c);
366 if (stringBuf != null) this.string = stringBuf.toString();
368 in.unread(); // miss the trailing "
369 this.string = in.getString();
375 public int getToken() throws IOException {
376 if (pushedBack) { pushedBack = false; return op; }
378 if (op != EOL) twoBack = op;
380 } while (op == EOL); // FIXME
384 public int _getToken() throws IOException {
386 do { if ((c = in.read()) == '\n') break; } while (isWhiteSpace(c) || c == '\n');
387 if (c == -1) return -1;
388 if (c == '\\' && in.peek() == 'u') throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities");
389 if (Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
390 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
391 if (c == '"' || c == '\'') return getString(c);
393 case '\n': return EOL;
394 case ';': return SEMI;
401 case ',': return COMMA;
402 case '?': return HOOK;
403 case ':': return COLON;
404 case '.': return DOT;
405 case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR);
406 case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR;
407 case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND;
408 case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ;
409 case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE;
410 case '%': return in.match('=') ? MOD_ASSIGN : MOD;
411 case '~': return BITNOT;
412 case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? INC : ADD;
413 case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? DEC : SUB;
414 case '*': return in.match('=') ? ASSIGN_MUL : MUL;
415 case '<': return !in.match('<') ?
416 (in.match('=') ? LE : LT) :
417 in.match('=') ? ASSIGN_LSH : LSH;
418 case '>': return !in.match('>') ?
419 (in.match('=') ? GE : GT) :
421 (in.match('=') ? ASSIGN_URSH : URSH) :
422 (in.match('=') ? ASSIGN_RSH : RSH);
424 if (in.match('=')) return ASSIGN_DIV;
425 if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
426 if (!in.match('*')) return DIV;
427 while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
428 if (c == '\n' || c != '/' || !in.match('*')) continue;
429 if (in.match('/')) return getToken();
430 throw new IOException("msg.nested.comment");
432 if (c == -1) throw new IOException("msg.unterminated.comment");
433 return getToken(); // `goto retry'
434 default: throw new IOException("illegal character: " + c);
438 private static class SmartReader {
439 PushbackReader reader = null;
442 public SmartReader(Reader r) { reader = new PushbackReader(r); }
443 public void unread() throws IOException {
444 reader.unread(lastread);
445 if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
447 public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
448 public int peek() throws IOException {
449 int peeked = reader.read();
450 if (peeked != -1) reader.unread((char)peeked);
453 public int read() throws IOException {
454 lastread = reader.read();
455 if (accumulator != null) accumulator.append((char)lastread);
459 // FIXME: could be much more efficient
460 StringBuffer accumulator = null;
461 public void startString() {
462 accumulator = new StringBuffer();
463 accumulator.append((char)lastread);
465 public String getString() throws IOException {
466 String ret = accumulator.toString();