1 // This file was derived from org.mozilla.javascript.TokenStream; it
2 // is covered by the NPL 1.1.
5 * The contents of this file are subject to the Netscape Public
6 * License Version 1.1 (the "License"); you may not use this file
7 * except in compliance with the License. You may obtain a copy of
8 * the License at http://www.mozilla.org/NPL/
10 * Software distributed under the License is distributed on an "AS
11 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
12 * implied. See the License for the specific language governing
13 * rights and limitations under the License.
15 * The Initial Developer of the Original Code is Netscape
16 * Communications Corporation.
18 * Contributor(s): Roger Lawrence, Mike McCabe
26 private SmartReader in;
27 private boolean pushedBack = false;
33 public Lexer(Reader r) throws IOException { in = new SmartReader(r); }
34 public int peekToken() throws IOException { int ret = peekToken(); pushBackToken(); return ret; }
35 public void pushBackToken() { if (pushedBack) throw new Error("can't push back twice"); pushedBack = true; }
37 // Token Constants //////////////////////////////////////////////////////////
39 public final static int
40 EOL = 1, // end of line
44 ASSIGN_BITOR = 211, // |=
46 ASSIGN_BITXOR= 212, // ^=
48 ASSIGN_BITAND= 213, // &=
56 ASSIGN_LSH = 220, // <<=
58 ASSIGN_RSH = 221, // >>=
60 ASSIGN_URSH = 222, // >>>=
62 ASSIGN_ADD = 223, // +=
64 ASSIGN_SUB = 224, // -=
66 ASSIGN_MUL = 225, // *=
68 ASSIGN_DIV = 226, // /=
70 ASSIGN_MOD = 227, // %=
72 ASSIGN_BITNOT= 228, // ~=
73 DELPROP = 31, // delete
74 TYPEOF = 32, // typeof
75 NAME = 44, // *** identifiers ***
76 NUMBER = 45, // *** numeric literals ***
77 STRING = 46, // *** string literals ***
86 INSTANCEOF = 64, // instanceof
104 FUNCTION = 110, // function
105 IF = 113, // if keyword
106 ELSE = 114, // else keyword
107 SWITCH = 115, // switch keyword
108 CASE = 116, // case keyword
109 DEFAULT = 117, // default keyword
110 WHILE = 118, // while keyword
111 DO = 119, // do keyword
112 FOR = 120, // for keyword
113 BREAK = 121, // break keyword
114 CONTINUE = 122, // continue keyword
115 VAR = 123, // var keyword
116 WITH = 124, // with keyword
117 CATCH = 125, // catch keyword
118 FINALLY = 126, // finally keyword
119 RESERVED = 127, // reserved keywords
121 VOID = 132, // void keyword
122 MOD_ASSIGN = 133, // %=
124 ASSERT = 150; // assert keyword
127 // Predicates ///////////////////////////////////////////////////////////////////////
129 protected static boolean isJSIdentifier(String s) {
130 int length = s.length();
131 if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0))) return false;
132 for (int i=1; i<length; i++) {
133 char c = s.charAt(i);
134 if (!Character.isJavaIdentifierPart(c) && c == '\\' && !((i + 5) < length) &&
135 (s.charAt(i + 1) == 'u') && 0 <= xDigitToInt(s.charAt(i + 2)) && 0 <= xDigitToInt(s.charAt(i + 3)) &&
136 0 <= xDigitToInt(s.charAt(i + 4)) && 0 <= xDigitToInt(s.charAt(i + 5)))
142 private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
143 private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
144 private static boolean isLineBreak(int c) { return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); }
145 private static int xDigitToInt(int c) {
146 if ('0' <= c && c <= '9') return c - '0';
147 if ('a' <= c && c <= 'f') return c - ('a' - 10);
148 if ('A' <= c && c <= 'F') return c - ('A' - 10);
151 public static boolean isWhiteSpace(int c) {
152 if (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0') return true;
153 return Character.getType((char)c) == Character.SPACE_SEPARATOR;
157 // Token Subtype Handlers /////////////////////////////////////////////////////////
159 private int getKeyword(String name) throws IOException {
163 Id_continue = CONTINUE,
164 Id_default = DEFAULT,
170 Id_function = FUNCTION,
184 // the following are #ifdef RESERVE_JAVA_KEYWORDS in jsscan.c
185 Id_abstract = RESERVED,
186 Id_boolean = RESERVED,
192 Id_debugger = RESERVED,
193 Id_double = RESERVED,
195 Id_extends = RESERVED,
197 Id_finally = FINALLY,
200 Id_implements = RESERVED,
201 Id_instanceof = INSTANCEOF,
203 Id_interface = RESERVED,
205 Id_native = RESERVED,
206 Id_package = RESERVED,
207 Id_private = RESERVED,
208 Id_protected = RESERVED,
209 Id_public = RESERVED,
212 Id_static = RESERVED,
214 Id_synchronized = RESERVED,
216 Id_throws = RESERVED,
217 Id_transient = RESERVED,
219 Id_volatile = RESERVED;
223 L0: { id = -1; String X = null; int c;
224 L: switch (s.length()) {
225 case 2: c=s.charAt(1);
226 if (c=='f') { if (s.charAt(0)=='i') {id=Id_if; break L0;} }
227 else if (c=='n') { if (s.charAt(0)=='i') {id=Id_in; break L0;} }
228 else if (c=='o') { if (s.charAt(0)=='d') {id=Id_do; break L0;} }
230 case 3: switch (s.charAt(0)) {
231 case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') {id=Id_for; break L0;} break L;
232 case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') {id=Id_int; break L0;} break L;
233 case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e')
234 throw new IOException("the new keyword is not permitted in XWT scripts");
236 case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') {id=Id_try; break L0;} break L;
237 case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') {id=Id_var; break L0;} break L;
239 case 4: switch (s.charAt(0)) {
240 case 'b': X="byte";id=Id_byte; break L;
241 case 'c': c=s.charAt(3);
242 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') {id=Id_case; break L0;} }
243 else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') {id=Id_char; break L0;} }
245 case 'e': c=s.charAt(3);
246 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') {id=Id_else; break L0;} }
247 else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') {id=Id_enum; break L0;} }
249 case 'g': X="goto";id=Id_goto; break L;
250 case 'l': X="long";id=Id_long; break L;
251 case 'n': X="null";id=Id_null; break L;
252 case 't': c=s.charAt(3);
253 if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') {id=Id_true; break L0;} }
254 else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') {id=Id_this; break L0;} }
256 case 'v': X="void";id=Id_void; break L;
257 case 'w': X="with";id=Id_with; break L;
259 case 5: switch (s.charAt(2)) {
260 case 'a': X="class";id=Id_class; break L;
261 case 'e': X="break";id=Id_break; break L;
262 case 'i': X="while";id=Id_while; break L;
263 case 'l': X="false";id=Id_false; break L;
264 case 'n': c=s.charAt(0);
265 if (c=='c') { X="const"; throw new IOException("the const keyword is not permitted in XWT"); }
266 else if (c=='f') { X="final";id=Id_final; }
268 case 'o': c=s.charAt(0);
269 if (c=='f') { X="float";id=Id_float; }
270 else if (c=='s') { X="short";id=Id_short; }
272 case 'p': X="super";id=Id_super; break L;
273 case 'r': X="throw";id=Id_throw; break L;
274 case 't': X="catch";id=Id_catch; break L;
276 case 6: switch (s.charAt(1)) {
277 case 'a': X="native";id=Id_native; break L;
278 case 'e': c=s.charAt(0);
279 if (c=='d') { X="delete"; throw new IOException("the delete keyword is not permitted in XWT scripts"); }
280 else if (c=='r') { X="return";id=Id_return; }
282 case 'h': X="throws";id=Id_throws; break L;
283 case 'o': X="double";id=Id_double; break L;
284 case 's': X="assert";id=Id_assert; break L;
285 case 'u': X="public";id=Id_public; break L;
286 case 'w': X="switch";id=Id_switch; break L;
287 case 'y': X="typeof";id=Id_typeof; break L;
289 case 7: switch (s.charAt(1)) {
290 case 'a': X="package";id=Id_package; break L;
291 case 'e': X="default";id=Id_default; break L;
292 case 'i': X="finally";id=Id_finally; break L;
293 case 'o': X="boolean";id=Id_boolean; break L;
294 case 'r': X="private";id=Id_private; break L;
295 case 'x': X="extends";id=Id_extends; break L;
297 case 8: switch (s.charAt(0)) {
298 case 'a': X="abstract";id=Id_abstract; break L;
299 case 'c': X="continue";id=Id_continue; break L;
300 case 'd': X="debugger";id=Id_debugger; break L;
301 case 'f': X="function";id=Id_function; break L;
302 case 'v': X="volatile";id=Id_volatile; break L;
304 case 9: c=s.charAt(0);
305 if (c=='i') { X="interface";id=Id_interface; }
306 else if (c=='p') { X="protected";id=Id_protected; }
307 else if (c=='t') { X="transient";id=Id_transient; }
309 case 10: c=s.charAt(1);
310 if (c=='m') { X="implements";id=Id_implements; }
311 else if (c=='n') { X="instanceof"; throw new IOException("the instanceof keyword is not permitted in XWT scripts"); }
313 case 12: X="synchronized";id=Id_synchronized; break L;
315 if (X!=null && X!=s && !X.equals(s)) id = -1;
317 if (id == -1) { return -1; }
322 private int getIdentifier(int c) throws IOException {
324 while (Character.isJavaIdentifierPart((char)(c = in.read())));
326 String str = in.getString();
327 int result = getKeyword(str);
328 if (result != -1) return result;
333 private int getNumber(int c) throws IOException {
336 double dval = Double.NaN;
338 boolean isInteger = true;
340 // figure out what base we're using
342 if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); }
343 else if (isDigit(c)) base = 8;
346 while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read();
347 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
349 if (c == '.') do { c = in.read(); } while (isDigit(c));
350 if (c == 'e' || c == 'E') {
352 if (c == '+' || c == '-') c = in.read();
353 if (!isDigit(c)) throw new IOException("msg.missing.exponent");
354 do { c = in.read(); } while (isDigit(c));
359 String numString = in.getString();
360 if (base == 10 && !isInteger) {
361 try { dval = (Double.valueOf(numString)).doubleValue(); }
362 catch (NumberFormatException ex) { throw new IOException("msg.caught.nfe"); }
365 longval = Long.parseLong(numString, base);
366 dval = (double)longval;
368 // FIXME: we're not handling hex/octal fractions... does that matter?
369 dval = Double.parseDouble(numString);
370 longval = (long) dval;
371 if (longval == dval) isInteger = true;
375 if (!isInteger) this.number = new Double(dval);
376 else if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE) this.number = new Byte((byte)longval);
377 else if (Short.MIN_VALUE <= longval && longval <= Short.MAX_VALUE) this.number = new Short((short)longval);
378 else if (Integer.MIN_VALUE <= longval && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval);
379 else this.number = new Double(longval);
383 private int getString(int c) throws IOException {
384 StringBuffer stringBuf = null;
388 in.startString(); // start after the first "
389 while(c != quoteChar) {
390 if (c == '\n' || c == -1) throw new IOException("msg.unterminated.string.lit");
392 if (stringBuf == null) {
393 in.unread(); // Don't include the backslash
394 stringBuf = new StringBuffer(in.getString());
397 switch (c = in.read()) {
398 case 'b': c = '\b'; break;
399 case 'f': c = '\f'; break;
400 case 'n': c = '\n'; break;
401 case 'r': c = '\r'; break;
402 case 't': c = '\t'; break;
403 case 'v': c = '\u000B'; break;
404 default: throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities");
407 if (stringBuf != null) stringBuf.append((char) c);
410 if (stringBuf != null) this.string = stringBuf.toString();
412 in.unread(); // miss the trailing "
413 this.string = in.getString();
419 public int getToken() throws IOException {
420 if (pushedBack) { pushedBack = false; return op; }
421 return (op = getToken());
424 public int _getToken() throws IOException {
426 do { if ((c = in.read()) == '\n') break; } while (isWhiteSpace(c) || c == '\n');
427 if (c == -1) return -1;
428 if (c == '\\' && in.peek() == 'u') throw new IOException("\\u and \\0 escapes not currently supported -- use XML entities");
429 if (Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
430 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
431 if (c == '"' || c == '\'') return getString(c);
433 case '\n': return EOL;
434 case ';': return SEMI;
441 case ',': return COMMA;
442 case '?': return HOOK;
443 case ':': return COLON;
444 case '.': return DOT;
445 case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR);
446 case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR;
447 case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND;
448 case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ;
449 case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE;
450 case '%': return in.match('=') ? MOD_ASSIGN : MOD;
451 case '~': return BITNOT;
452 case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? INC : ADD;
453 case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? DEC : SUB;
454 case '*': return in.match('=') ? ASSIGN_MUL : MUL;
455 case '<': return !in.match('<') ?
456 (in.match('=') ? LE : LT) :
457 in.match('=') ? ASSIGN_LSH : LSH;
458 case '>': return !in.match('>') ?
459 (in.match('=') ? GE : GT) :
461 (in.match('=') ? ASSIGN_URSH : URSH) :
462 (in.match('=') ? ASSIGN_RSH : RSH);
464 if (in.match('=')) return ASSIGN_DIV;
465 if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
466 if (!in.match('*')) return DIV;
467 while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
468 if (c == '\n' || c != '/' || !in.match('*')) continue;
469 if (in.match('/')) return getToken();
470 throw new IOException("msg.nested.comment");
472 if (c == -1) throw new IOException("msg.unterminated.comment");
473 return getToken(); // `goto retry'
474 default: throw new IOException("illegal character: " + c);
478 private static class SmartReader {
479 PushbackReader reader = null;
482 public SmartReader(Reader r) { reader = new PushbackReader(r); }
483 public void unread() throws IOException { reader.unread(lastread); }
484 public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
485 public int peek() throws IOException {
486 int peeked = reader.read();
487 if (peeked != -1) reader.unread((char)peeked);
490 public int read() throws IOException {
491 lastread = reader.read();
492 if (accumulator != null) accumulator.append(lastread);
496 // FIXME: could be much more efficient
497 StringBuffer accumulator = null;
498 public void startString() { accumulator = new StringBuffer(); }
499 public String getString() throws IOException {
500 String ret = accumulator.toString();