1 // This file was derived from org.mozilla.javascript.TokenStream; it
2 // is covered by the NPL 1.1.
5 * The contents of this file are subject to the Netscape Public
6 * License Version 1.1 (the "License"); you may not use this file
7 * except in compliance with the License. You may obtain a copy of
8 * the License at http://www.mozilla.org/NPL/
10 * Software distributed under the License is distributed on an "AS
11 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
12 * implied. See the License for the specific language governing
13 * rights and limitations under the License.
15 * The Initial Developer of the Original Code is Netscape
16 * Communications Corporation.
18 * Contributor(s): Roger Lawrence, Mike McCabe
26 public static void main(String[] s) throws Exception {
27 Lexer l = new Lexer(new InputStreamReader(System.in));
29 while((tok = l.getToken()) != -1)
30 System.out.println(codeToString[tok]);
33 private SmartReader in;
34 private boolean pushedBack = false;
41 public Lexer(Reader r) throws IOException { in = new SmartReader(r); }
42 public int peekToken() throws IOException { int ret = getToken(); pushBackToken(); return ret; }
43 public void pushBackToken() { pushBackToken(op); }
44 public void pushBackToken(int i) { if (pushedBack) throw new Error("can't push back twice"); pushedBack = true; op = i; }
46 // Token Constants //////////////////////////////////////////////////////////
48 public final static int
49 EOL = 1, // end of line
53 ASSIGN_BITOR = 5, // |=
55 ASSIGN_BITXOR= 7, // ^=
57 ASSIGN_BITAND= 9, // &=
65 ASSIGN_LSH = 17, // <<=
67 ASSIGN_RSH = 19, // >>=
69 ASSIGN_URSH = 21, // >>>=
71 ASSIGN_ADD = 23, // +=
73 ASSIGN_SUB = 25, // -=
75 ASSIGN_MUL = 27, // *=
77 ASSIGN_DIV = 29, // /=
79 ASSIGN_MOD = 31, // %=
81 ASSIGN_BITNOT= 33, // ~=
82 DELPROP = 34, // delete
83 TYPEOF = 35, // typeof
84 NAME = 36, // *** identifiers ***
85 NUMBER = 37, // *** numeric literals ***
86 STRING = 38, // *** string literals ***
95 INSTANCEOF = 47, // instanceof
113 FUNCTION = 65, // function
114 IF = 66, // if keyword
115 ELSE = 67, // else keyword
116 SWITCH = 68, // switch keyword
117 CASE = 69, // case keyword
118 DEFAULT = 70, // default keyword
119 WHILE = 71, // while keyword
120 DO = 72, // do keyword
121 FOR = 73, // for keyword
122 BREAK = 74, // break keyword
123 CONTINUE = 75, // continue keyword
124 VAR = 76, // var keyword
125 WITH = 77, // with keyword
126 CATCH = 78, // catch keyword
127 FINALLY = 79, // finally keyword
128 RESERVED = 80, // reserved keywords
130 VOID = 82, // void keyword
131 MOD_ASSIGN = 83, // %=
133 ASSERT = 85; // assert keyword
135 public static final int MAX_TOKEN = ASSERT;
137 public final static String[] codeToString = new String[] {
138 "0", "EOL", "RETURN", "GOTO", "BITOR", "ASSIGN_BITOR",
139 "BITXOR", "ASSIGN_BITXOR", "BITAND", "ASSIGN_BITAND", "EQ",
140 "NE", "LT", "LE", "GT", "GE", "LSH", "ASSIGN_LSH", "RSH",
141 "ASSIGN_RSH", "URSH", "ASSIGN_URSH", "ADD", "ASSIGN_ADD",
142 "SUB", "ASSIGN_SUB", "MUL", "ASSIGN_MUL", "DIV", "ASSIGN_DIV",
143 "MOD", "ASSIGN_MOD", "BITNOT", "ASSIGN_BITNOT=", "DELPROP",
144 "TYPEOF", "NAME", "NUMBER", "STRING", "NULL", "THIS", "FALSE",
145 "TRUE", "SHEQ", "SHNE", "THROW", "IN", "INSTANCEOF", "TRY",
146 "SEMI", "LB", "RB", "LC", "RC", "LP", "RP", "COMMA", "ASSIGN",
147 "HOOK", "COLON", "OR", "AND", "INC", "DEC", "DOT", "FUNCTION",
148 "IF", "ELSE", "SWITCH", "CASE", "DEFAULT", "WHILE", "DO",
149 "FOR", "BREAK", "CONTINUE", "VAR", "WITH", "CATCH", "FINALLY",
150 "RESERVED", "NOP", "VOID", "MOD_ASSIGN", "BANG", "ASSERT" };
153 // Predicates ///////////////////////////////////////////////////////////////////////
155 protected static boolean isJSIdentifier(String s) {
156 int length = s.length();
157 if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) return false;
158 for (int i=1; i<length; i++) {
159 char c = s.charAt(i);
160 if (!Character.isJavaIdentifierPart(c) && c == '\\' && !((i + 5) < length) &&
161 (s.charAt(i + 1) == 'u') && 0 <= xDigitToInt(s.charAt(i + 2)) && 0 <= xDigitToInt(s.charAt(i + 3)) &&
162 0 <= xDigitToInt(s.charAt(i + 4)) && 0 <= xDigitToInt(s.charAt(i + 5)))
168 private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
169 private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
170 private static boolean isLineBreak(int c) { return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); }
171 private static int xDigitToInt(int c) {
172 if ('0' <= c && c <= '9') return c - '0';
173 if ('a' <= c && c <= 'f') return c - ('a' - 10);
174 if ('A' <= c && c <= 'F') return c - ('A' - 10);
177 public static boolean isWhiteSpace(int c) {
178 if (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0') return true;
179 return Character.getType((char)c) == Character.SPACE_SEPARATOR;
183 // Token Subtype Handlers /////////////////////////////////////////////////////////
185 private int getKeyword(String s) throws IOException {
187 switch (s.length()) {
188 case 2: c=s.charAt(1);
189 if (c=='f') { if (s.charAt(0)=='i') return IF; }
190 else if (c=='n') { if (s.charAt(0)=='i') return IN; }
191 else if (c=='o') { if (s.charAt(0)=='d') return DO; }
193 case 3: switch (s.charAt(0)) {
194 case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') return FOR; break;
195 case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') return RESERVED; break;
196 case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') throw new IOException("the new keyword is not permitted in XWT scripts");
197 case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') return TRY; break;
198 case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') return VAR; break;
200 case 4: switch (s.charAt(0)) {
201 case 'b': return s.equals("byte") ? RESERVED : -1;
202 case 'c': c=s.charAt(3);
203 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') return CASE; }
204 else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') return RESERVED; }
206 case 'e': c=s.charAt(3);
207 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') return ELSE; }
208 else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') return RESERVED; }
210 case 'g': return s.equals("goto") ? GOTO : -1;
211 case 'l': return s.equals("long") ? RESERVED : -1;
212 case 'n': return s.equals("null") ? NULL : -1;
213 case 't': c=s.charAt(3);
214 if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') return TRUE; }
215 else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') return THIS; }
217 case 'v': return s.equals("void") ? RESERVED : -1;
218 case 'w': return s.equals("with") ? WITH : -1;
220 case 5: switch (s.charAt(2)) {
221 case 'a': return s.equals("class") ? RESERVED : -1;
222 case 'e': return s.equals("break") ? BREAK : -1;
223 case 'i': return s.equals("while") ? WHILE : -1;
224 case 'l': return s.equals("false") ? FALSE : -1;
225 case 'n': c=s.charAt(0);
226 if (s.equals("const")) throw new IOException("the const keyword is not permitted in XWT");
227 else if (s.equals("final")) return RESERVED;
229 case 'o': c=s.charAt(0);
230 if (c == 'c') return s.equals("float") ? RESERVED : -1;
231 else if (c=='s') return s.equals("final") ? RESERVED : -1;
233 case 'p': return s.equals("super") ? RESERVED : -1;
234 case 'r': return s.equals("throw") ? THROW : -1;
235 case 't': return s.equals("catch") ? CATCH : -1;
237 case 6: switch (s.charAt(1)) {
238 case 'a': return s.equals("class") ? RESERVED : -1;
239 case 'e': c=s.charAt(0);
240 if (s.equals("delete")) throw new IOException("the delete keyword is not permitted in XWT scripts");
241 else if (c=='r') return s.equals("return") ? RETURN : -1;
243 case 'h': return s.equals("throws") ? RESERVED : -1;
244 case 'o': return s.equals("double") ? RESERVED : -1;
245 case 's': return s.equals("assert") ? ASSERT : -1;
246 case 'u': return s.equals("public") ? RESERVED : -1;
247 case 'w': return s.equals("switch") ? SWITCH : -1;
248 case 'y': return s.equals("typeof") ? TYPEOF : -1;
250 case 7: switch (s.charAt(1)) {
251 case 'a': return s.equals("package") ? RESERVED : -1;
252 case 'e': return s.equals("default") ? DEFAULT : -1;
253 case 'i': return s.equals("finally") ? FINALLY : -1;
254 case 'o': return s.equals("boolean") ? RESERVED : -1;
255 case 'r': return s.equals("private") ? RESERVED : -1;
256 case 'x': return s.equals("extends") ? RESERVED : -1;
258 case 8: switch (s.charAt(0)) {
259 case 'a': return s.equals("abstract") ? RESERVED : -1;
260 case 'c': return s.equals("continue") ? CONTINUE : -1;
261 case 'd': return s.equals("debugger") ? RESERVED : -1;
262 case 'f': return s.equals("function") ? FUNCTION : -1;
263 case 'v': return s.equals("volatile") ? RESERVED : -1;
265 case 9: c=s.charAt(0);
266 if (c=='i') return s.equals("interface") ? RESERVED : -1;
267 else if (c=='p') return s.equals("protected") ? RESERVED : -1;
268 else if (c=='t') return s.equals("transient") ? RESERVED : -1;
270 case 10: c=s.charAt(1);
271 if (c=='m') return s.equals("implements") ? RESERVED : -1;
272 else if (c=='n' && s.equals("instanceof")) throw new IOException("the instanceof keyword is not permitted in XWT scripts");
274 case 12: return s.equals("synchronized") ? RESERVED : -1;
279 private int getIdentifier(int c) throws IOException {
281 while (Character.isJavaIdentifierPart((char)(c = in.read())));
283 String str = in.getString();
284 int result = getKeyword(str);
285 if (result != -1) return result;
290 private int getNumber(int c) throws IOException {
293 double dval = Double.NaN;
295 boolean isInteger = true;
297 // figure out what base we're using
299 if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); }
300 else if (isDigit(c)) base = 8;
303 while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read();
304 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
306 if (c == '.') do { c = in.read(); } while (isDigit(c));
307 if (c == 'e' || c == 'E') {
309 if (c == '+' || c == '-') c = in.read();
310 if (!isDigit(c)) throw new IOException("msg.missing.exponent");
311 do { c = in.read(); } while (isDigit(c));
316 String numString = in.getString();
317 if (base == 10 && !isInteger) {
318 try { dval = (Double.valueOf(numString)).doubleValue(); }
319 catch (NumberFormatException ex) { throw new IOException("msg.caught.nfe"); }
322 longval = Long.parseLong(numString, base);
323 dval = (double)longval;
325 // FIXME: we're not handling hex/octal fractions... does that matter?
326 dval = Double.parseDouble(numString);
327 longval = (long) dval;
328 if (longval == dval) isInteger = true;
332 if (!isInteger) this.number = new Double(dval);
333 else if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE) this.number = new Byte((byte)longval);
334 else if (Short.MIN_VALUE <= longval && longval <= Short.MAX_VALUE) this.number = new Short((short)longval);
335 else if (Integer.MIN_VALUE <= longval && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval);
336 else this.number = new Double(longval);
340 private int getString(int c) throws IOException {
341 StringBuffer stringBuf = null;
345 in.startString(); // start after the first "
346 while(c != quoteChar) {
347 if (c == '\n' || c == -1) throw new IOException("msg.unterminated.string.lit");
349 if (stringBuf == null) {
350 in.unread(); // Don't include the backslash
351 stringBuf = new StringBuffer(in.getString());
354 switch (c = in.read()) {
355 case 'b': c = '\b'; break;
356 case 'f': c = '\f'; break;
357 case 'n': c = '\n'; break;
358 case 'r': c = '\r'; break;
359 case 't': c = '\t'; break;
360 case 'v': c = '\u000B'; break;
361 default: throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities");
364 if (stringBuf != null) stringBuf.append((char) c);
367 if (stringBuf != null) this.string = stringBuf.toString();
369 in.unread(); // miss the trailing "
370 this.string = in.getString();
376 public int getToken() throws IOException {
377 if (pushedBack) { pushedBack = false; return op; }
379 if (op != EOL) twoBack = op;
381 } while (op == EOL); // FIXME
382 //if (op == SEMI) throw new Error();
386 public int _getToken() throws IOException {
388 do { if ((c = in.read()) == '\n') break; } while (isWhiteSpace(c) || c == '\n');
389 if (c == -1) return -1;
390 if (c == '\\' && in.peek() == 'u') throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities");
391 if (Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
392 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
393 if (c == '"' || c == '\'') return getString(c);
395 case '\n': return EOL;
396 case ';': return SEMI;
403 case ',': return COMMA;
404 case '?': return HOOK;
405 case ':': return COLON;
406 case '.': return DOT;
407 case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR);
408 case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR;
409 case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND;
410 case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ;
411 case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE;
412 case '%': return in.match('=') ? MOD_ASSIGN : MOD;
413 case '~': return BITNOT;
414 case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? INC : ADD;
415 case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? DEC : SUB;
416 case '*': return in.match('=') ? ASSIGN_MUL : MUL;
417 case '<': return !in.match('<') ?
418 (in.match('=') ? LE : LT) :
419 in.match('=') ? ASSIGN_LSH : LSH;
420 case '>': return !in.match('>') ?
421 (in.match('=') ? GE : GT) :
423 (in.match('=') ? ASSIGN_URSH : URSH) :
424 (in.match('=') ? ASSIGN_RSH : RSH);
426 if (in.match('=')) return ASSIGN_DIV;
427 if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
428 if (!in.match('*')) return DIV;
429 while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
430 if (c == '\n' || c != '/' || !in.match('*')) continue;
431 if (in.match('/')) return getToken();
432 throw new IOException("msg.nested.comment");
434 if (c == -1) throw new IOException("msg.unterminated.comment");
435 return getToken(); // `goto retry'
436 default: throw new IOException("illegal character: " + c);
440 private static class SmartReader {
441 PushbackReader reader = null;
444 public SmartReader(Reader r) { reader = new PushbackReader(r); }
445 public void unread() throws IOException {
446 reader.unread(lastread);
447 if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
449 public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
450 public int peek() throws IOException {
451 int peeked = reader.read();
452 if (peeked != -1) reader.unread((char)peeked);
455 public int read() throws IOException {
456 lastread = reader.read();
457 if (accumulator != null) accumulator.append((char)lastread);
461 // FIXME: could be much more efficient
462 StringBuffer accumulator = null;
463 public void startString() {
464 accumulator = new StringBuffer();
465 accumulator.append((char)lastread);
467 public String getString() throws IOException {
468 String ret = accumulator.toString();