1 // This file was derived from org.mozilla.javascript.TokenStream; it
2 // is covered by the NPL 1.1.
5 * The contents of this file are subject to the Netscape Public
6 * License Version 1.1 (the "License"); you may not use this file
7 * except in compliance with the License. You may obtain a copy of
8 * the License at http://www.mozilla.org/NPL/
10 * Software distributed under the License is distributed on an "AS
11 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
12 * implied. See the License for the specific language governing
13 * rights and limitations under the License.
15 * The Initial Developer of the Original Code is Netscape
16 * Communications Corporation.
18 * Contributor(s): Roger Lawrence, Mike McCabe
26 public static void main(String[] s) throws Exception {
27 Lexer l = new Lexer(new InputStreamReader(System.in));
29 while((tok = l.getToken()) != -1)
30 System.out.println(codeToString[tok]);
33 private SmartReader in;
34 private boolean pushedBack = false;
40 public Lexer(Reader r) throws IOException { in = new SmartReader(r); }
41 public int peekToken() throws IOException { int ret = getToken(); pushBackToken(); return ret; }
42 public void pushBackToken() { if (pushedBack) throw new Error("can't push back twice"); pushedBack = true; }
44 // Token Constants //////////////////////////////////////////////////////////
46 public final static int
47 EOL = 1, // end of line
51 ASSIGN_BITOR = 5, // |=
53 ASSIGN_BITXOR= 7, // ^=
55 ASSIGN_BITAND= 9, // &=
63 ASSIGN_LSH = 17, // <<=
65 ASSIGN_RSH = 19, // >>=
67 ASSIGN_URSH = 21, // >>>=
69 ASSIGN_ADD = 23, // +=
71 ASSIGN_SUB = 25, // -=
73 ASSIGN_MUL = 27, // *=
75 ASSIGN_DIV = 29, // /=
77 ASSIGN_MOD = 31, // %=
79 ASSIGN_BITNOT= 33, // ~=
80 DELPROP = 34, // delete
81 TYPEOF = 35, // typeof
82 NAME = 36, // *** identifiers ***
83 NUMBER = 37, // *** numeric literals ***
84 STRING = 38, // *** string literals ***
93 INSTANCEOF = 47, // instanceof
111 FUNCTION = 65, // function
112 IF = 66, // if keyword
113 ELSE = 67, // else keyword
114 SWITCH = 68, // switch keyword
115 CASE = 69, // case keyword
116 DEFAULT = 70, // default keyword
117 WHILE = 71, // while keyword
118 DO = 72, // do keyword
119 FOR = 73, // for keyword
120 BREAK = 74, // break keyword
121 CONTINUE = 75, // continue keyword
122 VAR = 76, // var keyword
123 WITH = 77, // with keyword
124 CATCH = 78, // catch keyword
125 FINALLY = 79, // finally keyword
126 RESERVED = 80, // reserved keywords
128 VOID = 82, // void keyword
129 MOD_ASSIGN = 83, // %=
131 ASSERT = 85; // assert keyword
133 public static final int MAX_TOKEN = ASSERT;
135 public final static String[] codeToString = new String[] {
136 "0", "EOL", "RETURN", "GOTO", "BITOR", "ASSIGN_BITOR",
137 "BITXOR", "ASSIGN_BITXOR", "BITAND", "ASSIGN_BITAND", "EQ",
138 "NE", "LT", "LE", "GT", "GE", "LSH", "ASSIGN_LSH", "RSH",
139 "ASSIGN_RSH", "URSH", "ASSIGN_URSH", "ADD", "ASSIGN_ADD",
140 "SUB", "ASSIGN_SUB", "MUL", "ASSIGN_MUL", "DIV", "ASSIGN_DIV",
141 "MOD", "ASSIGN_MOD", "BITNOT", "ASSIGN_BITNOT=", "DELPROP",
142 "TYPEOF", "NAME", "NUMBER", "STRING", "NULL", "THIS", "FALSE",
143 "TRUE", "SHEQ", "SHNE", "THROW", "IN", "INSTANCEOF", "TRY",
144 "SEMI", "LB", "RB", "LC", "RC", "LP", "RP", "COMMA", "ASSIGN",
145 "HOOK", "COLON", "OR", "AND", "INC", "DEC", "DOT", "FUNCTION",
146 "IF", "ELSE", "SWITCH", "CASE", "DEFAULT", "WHILE", "DO",
147 "FOR", "BREAK", "CONTINUE", "VAR", "WITH", "CATCH", "FINALLY",
148 "RESERVED", "NOP", "VOID", "MOD_ASSIGN", "BANG", "ASSERT" };
151 // Predicates ///////////////////////////////////////////////////////////////////////
153 protected static boolean isJSIdentifier(String s) {
154 int length = s.length();
155 if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) return false;
156 for (int i=1; i<length; i++) {
157 char c = s.charAt(i);
158 if (!Character.isJavaIdentifierPart(c) && c == '\\' && !((i + 5) < length) &&
159 (s.charAt(i + 1) == 'u') && 0 <= xDigitToInt(s.charAt(i + 2)) && 0 <= xDigitToInt(s.charAt(i + 3)) &&
160 0 <= xDigitToInt(s.charAt(i + 4)) && 0 <= xDigitToInt(s.charAt(i + 5)))
166 private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
167 private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
168 private static boolean isLineBreak(int c) { return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); }
169 private static int xDigitToInt(int c) {
170 if ('0' <= c && c <= '9') return c - '0';
171 if ('a' <= c && c <= 'f') return c - ('a' - 10);
172 if ('A' <= c && c <= 'F') return c - ('A' - 10);
175 public static boolean isWhiteSpace(int c) {
176 if (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0') return true;
177 return Character.getType((char)c) == Character.SPACE_SEPARATOR;
181 // Token Subtype Handlers /////////////////////////////////////////////////////////
183 private int getKeyword(String s) throws IOException {
185 switch (s.length()) {
186 case 2: c=s.charAt(1);
187 if (c=='f') { if (s.charAt(0)=='i') return IF; }
188 else if (c=='n') { if (s.charAt(0)=='i') return IN; }
189 else if (c=='o') { if (s.charAt(0)=='d') return DO; }
191 case 3: switch (s.charAt(0)) {
192 case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') return FOR; break;
193 case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') return RESERVED; break;
194 case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') throw new IOException("the new keyword is not permitted in XWT scripts");
195 case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') return TRY; break;
196 case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') return VAR; break;
198 case 4: switch (s.charAt(0)) {
199 case 'b': return s.equals("byte") ? RESERVED : -1;
200 case 'c': c=s.charAt(3);
201 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') return CASE; }
202 else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') return RESERVED; }
204 case 'e': c=s.charAt(3);
205 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') return ELSE; }
206 else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') return RESERVED; }
208 case 'g': return s.equals("goto") ? GOTO : -1;
209 case 'l': return s.equals("long") ? RESERVED : -1;
210 case 'n': return s.equals("null") ? NULL : -1;
211 case 't': c=s.charAt(3);
212 if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') return TRUE; }
213 else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') return THIS; }
215 case 'v': return s.equals("void") ? RESERVED : -1;
216 case 'w': return s.equals("with") ? WITH : -1;
218 case 5: switch (s.charAt(2)) {
219 case 'a': return s.equals("class") ? RESERVED : -1;
220 case 'e': return s.equals("break") ? BREAK : -1;
221 case 'i': return s.equals("while") ? WHILE : -1;
222 case 'l': return s.equals("false") ? FALSE : -1;
223 case 'n': c=s.charAt(0);
224 if (s.equals("const")) throw new IOException("the const keyword is not permitted in XWT");
225 else if (s.equals("final")) return RESERVED;
227 case 'o': c=s.charAt(0);
228 if (c == 'c') return s.equals("float") ? RESERVED : -1;
229 else if (c=='s') return s.equals("final") ? RESERVED : -1;
231 case 'p': return s.equals("super") ? RESERVED : -1;
232 case 'r': return s.equals("throw") ? THROW : -1;
233 case 't': return s.equals("catch") ? CATCH : -1;
235 case 6: switch (s.charAt(1)) {
236 case 'a': return s.equals("class") ? RESERVED : -1;
237 case 'e': c=s.charAt(0);
238 if (s.equals("delete")) throw new IOException("the delete keyword is not permitted in XWT scripts");
239 else if (c=='r') return s.equals("return") ? RETURN : -1;
241 case 'h': return s.equals("throws") ? RESERVED : -1;
242 case 'o': return s.equals("double") ? RESERVED : -1;
243 case 's': return s.equals("assert") ? ASSERT : -1;
244 case 'u': return s.equals("public") ? RESERVED : -1;
245 case 'w': return s.equals("switch") ? SWITCH : -1;
246 case 'y': return s.equals("typeof") ? TYPEOF : -1;
248 case 7: switch (s.charAt(1)) {
249 case 'a': return s.equals("package") ? RESERVED : -1;
250 case 'e': return s.equals("default") ? DEFAULT : -1;
251 case 'i': return s.equals("finally") ? FINALLY : -1;
252 case 'o': return s.equals("boolean") ? RESERVED : -1;
253 case 'r': return s.equals("private") ? RESERVED : -1;
254 case 'x': return s.equals("extends") ? RESERVED : -1;
256 case 8: switch (s.charAt(0)) {
257 case 'a': return s.equals("abstract") ? RESERVED : -1;
258 case 'c': return s.equals("continue") ? CONTINUE : -1;
259 case 'd': return s.equals("debugger") ? RESERVED : -1;
260 case 'f': return s.equals("function") ? FUNCTION : -1;
261 case 'v': return s.equals("volatile") ? RESERVED : -1;
263 case 9: c=s.charAt(0);
264 if (c=='i') return s.equals("interface") ? RESERVED : -1;
265 else if (c=='p') return s.equals("protected") ? RESERVED : -1;
266 else if (c=='t') return s.equals("transient") ? RESERVED : -1;
268 case 10: c=s.charAt(1);
269 if (c=='m') return s.equals("implements") ? RESERVED : -1;
270 else if (c=='n' && s.equals("instanceof")) throw new IOException("the instanceof keyword is not permitted in XWT scripts");
272 case 12: return s.equals("synchronized") ? RESERVED : -1;
277 private int getIdentifier(int c) throws IOException {
279 while (Character.isJavaIdentifierPart((char)(c = in.read())));
281 String str = in.getString();
282 int result = getKeyword(str);
283 if (result != -1) return result;
288 private int getNumber(int c) throws IOException {
291 double dval = Double.NaN;
293 boolean isInteger = true;
295 // figure out what base we're using
297 if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); }
298 else if (isDigit(c)) base = 8;
301 while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read();
302 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
304 if (c == '.') do { c = in.read(); } while (isDigit(c));
305 if (c == 'e' || c == 'E') {
307 if (c == '+' || c == '-') c = in.read();
308 if (!isDigit(c)) throw new IOException("msg.missing.exponent");
309 do { c = in.read(); } while (isDigit(c));
314 String numString = in.getString();
315 if (base == 10 && !isInteger) {
316 try { dval = (Double.valueOf(numString)).doubleValue(); }
317 catch (NumberFormatException ex) { throw new IOException("msg.caught.nfe"); }
320 longval = Long.parseLong(numString, base);
321 dval = (double)longval;
323 // FIXME: we're not handling hex/octal fractions... does that matter?
324 dval = Double.parseDouble(numString);
325 longval = (long) dval;
326 if (longval == dval) isInteger = true;
330 if (!isInteger) this.number = new Double(dval);
331 else if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE) this.number = new Byte((byte)longval);
332 else if (Short.MIN_VALUE <= longval && longval <= Short.MAX_VALUE) this.number = new Short((short)longval);
333 else if (Integer.MIN_VALUE <= longval && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval);
334 else this.number = new Double(longval);
338 private int getString(int c) throws IOException {
339 StringBuffer stringBuf = null;
343 in.startString(); // start after the first "
344 while(c != quoteChar) {
345 if (c == '\n' || c == -1) throw new IOException("msg.unterminated.string.lit");
347 if (stringBuf == null) {
348 in.unread(); // Don't include the backslash
349 stringBuf = new StringBuffer(in.getString());
352 switch (c = in.read()) {
353 case 'b': c = '\b'; break;
354 case 'f': c = '\f'; break;
355 case 'n': c = '\n'; break;
356 case 'r': c = '\r'; break;
357 case 't': c = '\t'; break;
358 case 'v': c = '\u000B'; break;
359 default: throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities");
362 if (stringBuf != null) stringBuf.append((char) c);
365 if (stringBuf != null) this.string = stringBuf.toString();
367 in.unread(); // miss the trailing "
368 this.string = in.getString();
374 public int getToken() throws IOException {
375 if (pushedBack) { pushedBack = false; return op; }
376 return (op = _getToken());
379 public int _getToken() throws IOException {
381 do { if ((c = in.read()) == '\n') break; } while (isWhiteSpace(c) || c == '\n');
382 if (c == -1) return -1;
383 if (c == '\\' && in.peek() == 'u') throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities");
384 if (Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
385 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
386 if (c == '"' || c == '\'') return getString(c);
388 case '\n': return EOL;
389 case ';': return SEMI;
396 case ',': return COMMA;
397 case '?': return HOOK;
398 case ':': return COLON;
399 case '.': return DOT;
400 case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR);
401 case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR;
402 case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND;
403 case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ;
404 case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE;
405 case '%': return in.match('=') ? MOD_ASSIGN : MOD;
406 case '~': return BITNOT;
407 case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? INC : ADD;
408 case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? DEC : SUB;
409 case '*': return in.match('=') ? ASSIGN_MUL : MUL;
410 case '<': return !in.match('<') ?
411 (in.match('=') ? LE : LT) :
412 in.match('=') ? ASSIGN_LSH : LSH;
413 case '>': return !in.match('>') ?
414 (in.match('=') ? GE : GT) :
416 (in.match('=') ? ASSIGN_URSH : URSH) :
417 (in.match('=') ? ASSIGN_RSH : RSH);
419 if (in.match('=')) return ASSIGN_DIV;
420 if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
421 if (!in.match('*')) return DIV;
422 while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
423 if (c == '\n' || c != '/' || !in.match('*')) continue;
424 if (in.match('/')) return getToken();
425 throw new IOException("msg.nested.comment");
427 if (c == -1) throw new IOException("msg.unterminated.comment");
428 return getToken(); // `goto retry'
429 default: throw new IOException("illegal character: " + c);
433 private static class SmartReader {
434 PushbackReader reader = null;
437 public SmartReader(Reader r) { reader = new PushbackReader(r); }
438 public void unread() throws IOException {
439 reader.unread(lastread);
440 if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
442 public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
443 public int peek() throws IOException {
444 int peeked = reader.read();
445 if (peeked != -1) reader.unread((char)peeked);
448 public int read() throws IOException {
449 lastread = reader.read();
450 if (accumulator != null) accumulator.append((char)lastread);
454 // FIXME: could be much more efficient
455 StringBuffer accumulator = null;
456 public void startString() {
457 accumulator = new StringBuffer();
458 accumulator.append((char)lastread);
460 public String getString() throws IOException {
461 String ret = accumulator.toString();