1 // This file was derived from org.mozilla.javascript.TokenStream; it
\r
2 // is covered by the NPL 1.1.
\r
5 * The contents of this file are subject to the Netscape Public
\r
6 * License Version 1.1 (the "License"); you may not use this file
\r
7 * except in compliance with the License. You may obtain a copy of
\r
8 * the License at http://www.mozilla.org/NPL/
\r
10 * Software distributed under the License is distributed on an "AS
\r
11 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
\r
12 * implied. See the License for the specific language governing
\r
13 * rights and limitations under the License.
\r
15 * The Initial Developer of the Original Code is Netscape
\r
16 * Communications Corporation.
\r
18 * Contributor(s): Roger Lawrence, Mike McCabe
\r
30 public Lexer(Reader r) { in = new SmartReader(r); }
\r
32 // Token Constants //////////////////////////////////////////////////////////
\r
34 public final static int
\r
35 EOL = 1, // end of line
\r
65 SHEQ = 53, // shallow equality (===)
\r
66 SHNE = 54, // shallow inequality (!==)
\r
71 SEMI = 89, // semicolon
\r
72 LB = 90, // left bracket
\r
73 RB = 91, // right bracket
\r
74 LC = 92, // left curly brace
\r
75 RC = 93, // right curly brace
\r
76 LP = 94, // left paren
\r
77 RP = 95, // right paren
\r
78 COMMA = 96, // comma operator
\r
79 ASSIGN = 97, // assignment ops (= += -= etc.)
\r
80 HOOK = 98, // conditional (?:)
\r
81 COLON = 99, // colon
\r
82 OR = 100, // logical or (||)
\r
83 AND = 101, // logical and (&&)
\r
84 EQOP = 102, // equality ops (== !=)
\r
85 RELOP = 103, // relational ops (< <= > >=)
\r
86 SHOP = 104, // shift ops (<< >> >>>)
\r
87 UNARYOP = 105, // unary prefix operator
\r
88 INC = 106, // increment (++)
\r
89 DEC = 107, // decrement (--)
\r
90 DOT = 108, // member operator (.)
\r
91 PRIMARY = 109, // true, false, null, this
\r
92 FUNCTION = 110, // function keyword
\r
94 IF = 113, // if keyword
\r
95 ELSE = 114, // else keyword
\r
96 SWITCH = 115, // switch keyword
\r
97 CASE = 116, // case keyword
\r
98 DEFAULT = 117, // default keyword
\r
99 WHILE = 118, // while keyword
\r
100 DO = 119, // do keyword
\r
101 FOR = 120, // for keyword
\r
102 BREAK = 121, // break keyword
\r
103 CONTINUE = 122, // continue keyword
\r
104 VAR = 123, // var keyword
\r
105 WITH = 124, // with keyword
\r
106 CATCH = 125, // catch keyword
\r
107 FINALLY = 126, // finally keyword
\r
108 RESERVED = 127, // reserved keywords
\r
110 VOID = 132, // void keyword
\r
111 ASSERT = 150; // assert keyword
\r
114 // Predicates ///////////////////////////////////////////////////////////////////////
\r
116 protected static boolean isJSIdentifier(String s) {
\r
117 int length = s.length();
\r
118 if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) return false;
\r
119 for (int i=1; i<length; i++) {
\r
120 char c = s.charAt(i);
\r
121 if (!Character.isJavaIdentifierPart(c) && c == '\\' && !((i + 5) < length) &&
\r
122 (s.charAt(i + 1) == 'u') && 0 <= xDigitToInt(s.charAt(i + 2)) && 0 <= xDigitToInt(s.charAt(i + 3)) &&
\r
123 0 <= xDigitToInt(s.charAt(i + 4)) && 0 <= xDigitToInt(s.charAt(i + 5)))
\r
129 private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
\r
130 private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
\r
131 private static boolean isLineBreak(int c) { return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); }
\r
132 private static int xDigitToInt(int c) {
\r
133 if ('0' <= c && c <= '9') return c - '0';
\r
134 if ('a' <= c && c <= 'f') return c - ('a' - 10);
\r
135 if ('A' <= c && c <= 'F') return c - ('A' - 10);
\r
138 public static boolean isWhiteSpace(int c) {
\r
139 if (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0') return true;
\r
140 return Character.getType((char)c) == Character.SPACE_SEPARATOR;
\r
144 // Token Subtype Handlers /////////////////////////////////////////////////////////
\r
146 private int getKeyword(String name) throws IOException {
\r
150 Id_continue = CONTINUE,
\r
151 Id_default = DEFAULT,
\r
152 Id_delete = DELPROP,
\r
155 Id_false = PRIMARY | (FALSE << 8),
\r
157 Id_function = FUNCTION,
\r
159 Id_in = RELOP | (IN << 8),
\r
160 Id_null = PRIMARY | (NULL << 8),
\r
161 Id_return = RETURN,
\r
162 Id_switch = SWITCH,
\r
163 Id_this = PRIMARY | (THIS << 8),
\r
164 Id_true = PRIMARY | (TRUE << 8),
\r
165 Id_typeof = UNARYOP | (TYPEOF << 8),
\r
167 Id_void = UNARYOP | (VOID << 8),
\r
171 // the following are #ifdef RESERVE_JAVA_KEYWORDS in jsscan.c
\r
172 Id_abstract = RESERVED,
\r
173 Id_boolean = RESERVED,
\r
174 Id_byte = RESERVED,
\r
176 Id_char = RESERVED,
\r
177 Id_class = RESERVED,
\r
178 Id_const = RESERVED,
\r
179 Id_debugger = RESERVED,
\r
180 Id_double = RESERVED,
\r
181 Id_enum = RESERVED,
\r
182 Id_extends = RESERVED,
\r
183 Id_final = RESERVED,
\r
184 Id_finally = FINALLY,
\r
185 Id_float = RESERVED,
\r
186 Id_goto = RESERVED,
\r
187 Id_implements = RESERVED,
\r
188 Id_instanceof = RELOP | (INSTANCEOF << 8),
\r
190 Id_interface = RESERVED,
\r
191 Id_long = RESERVED,
\r
192 Id_native = RESERVED,
\r
193 Id_package = RESERVED,
\r
194 Id_private = RESERVED,
\r
195 Id_protected = RESERVED,
\r
196 Id_public = RESERVED,
\r
197 Id_assert = ASSERT,
\r
198 Id_short = RESERVED,
\r
199 Id_static = RESERVED,
\r
200 Id_super = RESERVED,
\r
201 Id_synchronized = RESERVED,
\r
203 Id_throws = RESERVED,
\r
204 Id_transient = RESERVED,
\r
206 Id_volatile = RESERVED;
\r
210 L0: { id = -1; String X = null; int c;
\r
211 L: switch (s.length()) {
\r
212 case 2: c=s.charAt(1);
\r
213 if (c=='f') { if (s.charAt(0)=='i') {id=Id_if; break L0;} }
\r
214 else if (c=='n') { if (s.charAt(0)=='i') {id=Id_in; break L0;} }
\r
215 else if (c=='o') { if (s.charAt(0)=='d') {id=Id_do; break L0;} }
\r
217 case 3: switch (s.charAt(0)) {
\r
218 case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') {id=Id_for; break L0;} break L;
\r
219 case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') {id=Id_int; break L0;} break L;
\r
220 case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e')
\r
221 throw new IOException("the new keyword is not permitted in XWT scripts");
\r
223 case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') {id=Id_try; break L0;} break L;
\r
224 case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') {id=Id_var; break L0;} break L;
\r
226 case 4: switch (s.charAt(0)) {
\r
227 case 'b': X="byte";id=Id_byte; break L;
\r
228 case 'c': c=s.charAt(3);
\r
229 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') {id=Id_case; break L0;} }
\r
230 else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') {id=Id_char; break L0;} }
\r
232 case 'e': c=s.charAt(3);
\r
233 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') {id=Id_else; break L0;} }
\r
234 else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') {id=Id_enum; break L0;} }
\r
236 case 'g': X="goto";id=Id_goto; break L;
\r
237 case 'l': X="long";id=Id_long; break L;
\r
238 case 'n': X="null";id=Id_null; break L;
\r
239 case 't': c=s.charAt(3);
\r
240 if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') {id=Id_true; break L0;} }
\r
241 else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') {id=Id_this; break L0;} }
\r
243 case 'v': X="void";id=Id_void; break L;
\r
244 case 'w': X="with";id=Id_with; break L;
\r
246 case 5: switch (s.charAt(2)) {
\r
247 case 'a': X="class";id=Id_class; break L;
\r
248 case 'e': X="break";id=Id_break; break L;
\r
249 case 'i': X="while";id=Id_while; break L;
\r
250 case 'l': X="false";id=Id_false; break L;
\r
251 case 'n': c=s.charAt(0);
\r
252 if (c=='c') { X="const"; throw new IOException("the const keyword is not permitted in XWT"); }
\r
253 else if (c=='f') { X="final";id=Id_final; }
\r
255 case 'o': c=s.charAt(0);
\r
256 if (c=='f') { X="float";id=Id_float; }
\r
257 else if (c=='s') { X="short";id=Id_short; }
\r
259 case 'p': X="super";id=Id_super; break L;
\r
260 case 'r': X="throw";id=Id_throw; break L;
\r
261 case 't': X="catch";id=Id_catch; break L;
\r
263 case 6: switch (s.charAt(1)) {
\r
264 case 'a': X="native";id=Id_native; break L;
\r
265 case 'e': c=s.charAt(0);
\r
266 if (c=='d') { X="delete"; throw new IOException("the delete keyword is not permitted in XWT scripts"); }
\r
267 else if (c=='r') { X="return";id=Id_return; }
\r
269 case 'h': X="throws";id=Id_throws; break L;
\r
270 case 'o': X="double";id=Id_double; break L;
\r
271 case 's': X="assert";id=Id_assert; break L;
\r
272 case 'u': X="public";id=Id_public; break L;
\r
273 case 'w': X="switch";id=Id_switch; break L;
\r
274 case 'y': X="typeof";id=Id_typeof; break L;
\r
276 case 7: switch (s.charAt(1)) {
\r
277 case 'a': X="package";id=Id_package; break L;
\r
278 case 'e': X="default";id=Id_default; break L;
\r
279 case 'i': X="finally";id=Id_finally; break L;
\r
280 case 'o': X="boolean";id=Id_boolean; break L;
\r
281 case 'r': X="private";id=Id_private; break L;
\r
282 case 'x': X="extends";id=Id_extends; break L;
\r
284 case 8: switch (s.charAt(0)) {
\r
285 case 'a': X="abstract";id=Id_abstract; break L;
\r
286 case 'c': X="continue";id=Id_continue; break L;
\r
287 case 'd': X="debugger";id=Id_debugger; break L;
\r
288 case 'f': X="function";id=Id_function; break L;
\r
289 case 'v': X="volatile";id=Id_volatile; break L;
\r
291 case 9: c=s.charAt(0);
\r
292 if (c=='i') { X="interface";id=Id_interface; }
\r
293 else if (c=='p') { X="protected";id=Id_protected; }
\r
294 else if (c=='t') { X="transient";id=Id_transient; }
\r
296 case 10: c=s.charAt(1);
\r
297 if (c=='m') { X="implements";id=Id_implements; }
\r
298 else if (c=='n') { X="instanceof"; throw new IOException("the instanceof keyword is not permitted in XWT scripts"); }
\r
300 case 12: X="synchronized";id=Id_synchronized; break L;
\r
302 if (X!=null && X!=s && !X.equals(s)) id = -1;
\r
304 if (id == -1) { return -1; }
\r
309 private int getIdentifier(int c) throws IOException {
\r
311 while (Character.isJavaIdentifierPart((char)(c = in.read())));
\r
313 String str = in.getString();
\r
314 int result = getKeyword(str);
\r
315 if (result != -1) return result;
\r
320 private int getNumber(int c) throws IOException {
\r
323 double dval = Double.NaN;
\r
325 boolean isInteger = true;
\r
327 // figure out what base we're using
\r
329 if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); }
\r
330 else if (isDigit(c)) base = 8;
\r
333 while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read();
\r
334 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
\r
336 if (c == '.') do { c = in.read(); } while (isDigit(c));
\r
337 if (c == 'e' || c == 'E') {
\r
339 if (c == '+' || c == '-') c = in.read();
\r
340 if (!isDigit(c)) throw new IOException("msg.missing.exponent");
\r
341 do { c = in.read(); } while (isDigit(c));
\r
346 String numString = in.getString();
\r
347 if (base == 10 && !isInteger) {
\r
348 try { dval = (Double.valueOf(numString)).doubleValue(); }
\r
349 catch (NumberFormatException ex) { throw new IOException("msg.caught.nfe"); }
\r
352 longval = Long.parseLong(numString, base);
\r
353 dval = (double)longval;
\r
355 // FIXME: we're not handling hex/octal fractions... does that matter?
\r
356 dval = Double.parseDouble(numString);
\r
357 longval = (long) dval;
\r
358 if (longval == dval) isInteger = true;
\r
362 if (!isInteger) this.number = new Double(dval);
\r
363 else if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE) this.number = new Byte((byte)longval);
\r
364 else if (Short.MIN_VALUE <= longval && longval <= Short.MAX_VALUE) this.number = new Short((short)longval);
\r
365 else if (Integer.MIN_VALUE <= longval && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval);
\r
366 else this.number = new Double(longval);
\r
370 private int getString(int c) throws IOException {
\r
371 StringBuffer stringBuf = null;
\r
375 in.startString(); // start after the first "
\r
376 while(c != quoteChar) {
\r
377 if (c == '\n' || c == -1) throw new IOException("msg.unterminated.string.lit");
\r
379 if (stringBuf == null) {
\r
380 in.unread(); // Don't include the backslash
\r
381 stringBuf = new StringBuffer(in.getString());
\r
384 switch (c = in.read()) {
\r
385 case 'b': c = '\b'; break;
\r
386 case 'f': c = '\f'; break;
\r
387 case 'n': c = '\n'; break;
\r
388 case 'r': c = '\r'; break;
\r
389 case 't': c = '\t'; break;
\r
390 case 'v': c = '\u000B'; break;
\r
391 default: throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities");
\r
394 if (stringBuf != null) stringBuf.append((char) c);
\r
397 if (stringBuf != null) this.string = stringBuf.toString();
\r
399 in.unread(); // miss the trailing "
\r
400 this.string = in.getString();
\r
406 // hack because you can't do "foo ? 1 : (bar(); 2)" in Java
\r
407 final int op(int set, int ret) { this.op = set; return ret; }
\r
409 public int getToken() throws IOException {
\r
411 do { if ((c = in.read()) == '\n') break; } while (isWhiteSpace(c) || c == '\n');
\r
412 if (c == -1) return -1;
\r
413 if (c == '\\' && in.peek() == 'u') throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities");
\r
414 if (Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
\r
415 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
\r
416 if (c == '"' || c == '\'') return getString(c);
\r
418 case '\n': return EOL;
\r
419 case ';': return SEMI;
\r
420 case '[': return LB;
\r
421 case ']': return RB;
\r
422 case '{': return LC;
\r
423 case '}': return RC;
\r
424 case '(': return LP;
\r
425 case ')': return RP;
\r
426 case ',': return COMMA;
\r
427 case '?': return HOOK;
\r
428 case ':': return COLON;
\r
429 case '.': return DOT;
\r
430 case '|': return in.match('|') ? OR : (in.match('=') ? op(BITOR, ASSIGN) : BITOR);
\r
431 case '^': return in.match('=') ? op(BITXOR, ASSIGN) : BITXOR;
\r
432 case '&': return in.match('&') ? AND : in.match('=') ? op(BITAND, ASSIGN) : BITAND;
\r
433 case '=': return !in.match('=') ? op(NOP, ASSIGN) : op(in.match('=') ? SHEQ : EQ, EQOP);
\r
434 case '!': return !in.match('=') ? op(NOP, UNARYOP) : op(in.match('=') ? SHNE : NE, EQOP);
\r
435 case '%': return op(MOD, in.match('=') ? ASSIGN : MOD);
\r
436 case '~': return op(BITNOT, UNARYOP);
\r
437 case '+': return in.match('=') ? op(ADD, ASSIGN) : in.match('+') ? INC : ADD;
\r
438 case '-': return in.match('=') ? op(SUB, ASSIGN) : in.match('-') ? DEC : SUB;
\r
439 case '*': return in.match('=') ? op(MUL, ASSIGN) : MUL;
\r
440 case '<': return !in.match('<') ?
\r
441 op(in.match('=') ? LE : LT, RELOP) :
\r
442 in.match('=') ? op(LSH, ASSIGN) : op(LSH, SHOP);
\r
443 case '>': return !in.match('>') ?
\r
444 op(in.match('=') ? GE : GT, RELOP) :
\r
446 op(URSH, in.match('=') ? ASSIGN : SHOP) :
\r
447 op(RSH, in.match('=') ? ASSIGN : SHOP);
\r
449 if (in.match('=')) { op(DIV, ASSIGN); }
\r
450 if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
\r
451 if (!in.match('*')) return DIV;
\r
452 while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
\r
453 if (c == '\n' || c != '/' || !in.match('*')) continue;
\r
454 if (in.match('/')) return getToken();
\r
455 throw new IOException("msg.nested.comment");
\r
457 if (c == -1) throw new IOException("msg.unterminated.comment");
\r
458 return getToken(); // `goto retry'
\r
459 default: throw new IOException("illegal character: " + c);
\r
463 private static class SmartReader {
\r
464 PushbackReader reader = null;
\r
467 public SmartReader(Reader r) { reader = new PushbackReader(r); }
\r
468 public void unread() throws IOException { reader.unread(lastread); }
\r
469 public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
\r
470 public int peek() throws IOException {
\r
471 int peeked = reader.read();
\r
472 if (peeked != -1) reader.unread((char)peeked);
\r
475 public int read() throws IOException {
\r
476 lastread = reader.read();
\r
477 if (accumulator != null) accumulator.append(lastread);
\r
481 // FIXME: could be much more efficient
\r
482 StringBuffer accumulator = null;
\r
483 public void startString() { accumulator = new StringBuffer(); }
\r
484 public String getString() throws IOException {
\r
485 String ret = accumulator.toString();
\r
486 accumulator = null;
\r