1 /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
\r
3 * The contents of this file are subject to the Netscape Public
\r
4 * License Version 1.1 (the "License"); you may not use this file
\r
5 * except in compliance with the License. You may obtain a copy of
\r
6 * the License at http://www.mozilla.org/NPL/
\r
8 * Software distributed under the License is distributed on an "AS
\r
9 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express oqr
\r
10 * implied. See the License for the specific language governing
\r
11 * rights and limitations under the License.
\r
13 * The Original Code is Rhino code, released
\r
16 * The Initial Developer of the Original Code is Netscape
\r
17 * Communications Corporation. Portions created by Netscape are
\r
18 * Copyright (C) 1997-1999 Netscape Communications Corporation. All
\r
25 * Alternatively, the contents of this file may be used under the
\r
26 * terms of the GNU Public License (the "GPL"), in which case the
\r
27 * provisions of the GPL are applicable instead of those above.
\r
28 * If you wish to allow use of your version of this file only
\r
29 * under the terms of the GPL and not to allow others to use your
\r
30 * version of this file under the NPL, indicate your decision by
\r
31 * deleting the provisions above and replace them with the notice
\r
32 * and other provisions required by the GPL. If you do not delete
\r
33 * the provisions above, a recipient may use your version of this
\r
34 * file under either the NPL or the GPL.
\r
37 package org.mozilla.javascript;
\r
42 * This class implements the JavaScript scanner.
\r
44 * It is based on the C source files jsscan.c and jsscan.h
\r
45 * in the jsref package.
\r
47 * @see org.mozilla.javascript.Parser
\r
49 * @author Mike McCabe
\r
50 * @author Brendan Eich
\r
53 public class TokenStream {
\r
55 * JSTokenStream flags, mirroring those in jsscan.h. These are used
\r
56 * by the parser to change/check the state of the scanner.
\r
59 public final static int
\r
60 TSF_NEWLINES = 0x0001, // tokenize newlines
\r
61 TSF_FUNCTION = 0x0002, // scanning inside function body
\r
62 TSF_RETURN_EXPR = 0x0004, // function has 'return expr;'
\r
63 TSF_RETURN_VOID = 0x0008, // function has 'return;'
\r
64 TSF_REGEXP = 0x0010; // looking for a regular expression
\r
67 * For chars - because we need something out-of-range
\r
68 * to check. (And checking EOF by exception is annoying.)
\r
69 * Note distinction from EOF token type!
\r
71 private final static int
\r
75 * Token types. These values correspond to JSTokenType values in
\r
79 public final static int
\r
81 ERROR = -1, // well-known as the only code < EOF
\r
82 EOF = 0, // end of file token - (not EOF_CHAR)
\r
83 EOL = 1, // end of line
\r
84 // Beginning here are interpreter bytecodes. Their values
\r
85 // must not exceed 127.
\r
137 SHEQ = 53, // shallow equality (===)
\r
138 SHNE = 54, // shallow inequality (!==)
\r
170 GETSCOPEPARENT = 86,
\r
173 // End of interpreter bytecodes
\r
174 SEMI = 89, // semicolon
\r
175 LB = 90, // left and right brackets
\r
177 LC = 92, // left and right curlies (braces)
\r
179 LP = 94, // left and right parentheses
\r
181 COMMA = 96, // comma operator
\r
182 ASSIGN = 97, // assignment ops (= += -= etc.)
\r
183 HOOK = 98, // conditional (?:)
\r
185 OR = 100, // logical or (||)
\r
186 AND = 101, // logical and (&&)
\r
187 EQOP = 102, // equality ops (== !=)
\r
188 RELOP = 103, // relational ops (< <= > >=)
\r
189 SHOP = 104, // shift ops (<< >> >>>)
\r
190 UNARYOP = 105, // unary prefix operator
\r
191 INC = 106, // increment/decrement (++ --)
\r
193 DOT = 108, // member operator (.)
\r
194 PRIMARY = 109, // true, false, null, this
\r
195 FUNCTION = 110, // function keyword
\r
196 EXPORT = 111, // export keyword
\r
197 IMPORT = 112, // import keyword
\r
198 IF = 113, // if keyword
\r
199 ELSE = 114, // else keyword
\r
200 SWITCH = 115, // switch keyword
\r
201 CASE = 116, // case keyword
\r
202 DEFAULT = 117, // default keyword
\r
203 WHILE = 118, // while keyword
\r
204 DO = 119, // do keyword
\r
205 FOR = 120, // for keyword
\r
206 BREAK = 121, // break keyword
\r
207 CONTINUE = 122, // continue keyword
\r
208 VAR = 123, // var keyword
\r
209 WITH = 124, // with keyword
\r
210 CATCH = 125, // catch keyword
\r
211 FINALLY = 126, // finally keyword
\r
212 RESERVED = 127, // reserved keywords
\r
214 /** Added by Mike - these are JSOPs in the jsref, but I
\r
215 * don't have them yet in the java implementation...
\r
216 * so they go here. Also whatever I needed.
\r
218 * Most of these go in the 'op' field when returning
\r
219 * more general token types, eg. 'DIV' as the op of 'ASSIGN'.
\r
223 PRE = 130, // for INC, DEC nodes.
\r
227 * For JSOPs associated with keywords...
\r
228 * eg. op = THIS; token = PRIMARY
\r
233 /* types used for the parse tree - these never get returned
\r
236 BLOCK = 133, // statement block
\r
237 ARRAYLIT = 134, // array literal
\r
238 OBJLIT = 135, // object literal
\r
239 LABEL = 136, // label
\r
249 SCRIPT = 146, // top-level node for entire script
\r
252 * For the interpreted mode indicating a line number change in icodes.
\r
260 ASSERT = 150; // XWT assert hack
\r
264 /* for mapping int token types to printable strings.
\r
265 * make sure to add 1 to index before using these!
\r
267 private static String names[];
\r
268 private static void checkNames() {
\r
269 if (Context.printTrees && names == null) {
\r
426 /* This function uses the cached op, string and number fields in
\r
427 * TokenStream; if getToken has been called since the passed token
\r
428 * was scanned, the op or string printed may be incorrect.
\r
430 public String tokenToString(int token) {
\r
431 if (Context.printTrees) {
\r
433 if (token + 1 >= names.length)
\r
436 if (token == UNARYOP ||
\r
438 token == PRIMARY ||
\r
442 return names[token + 1] + " " + names[this.op + 1];
\r
445 if (token == STRING || token == OBJECT || token == NAME)
\r
446 return names[token + 1] + " `" + this.string + "'";
\r
448 if (token == NUMBER)
\r
449 return "NUMBER " + this.number;
\r
451 return names[token + 1];
\r
456 public static String tokenToName(int type) {
\r
458 return names == null ? "" : names[type + 1];
\r
462 private int stringToKeyword(String name) {
\r
464 // The following assumes that EOF == 0
\r
468 Id_continue = CONTINUE,
\r
469 Id_default = DEFAULT,
\r
470 Id_delete = DELPROP,
\r
473 Id_export = EXPORT,
\r
474 Id_false = PRIMARY | (FALSE << 8),
\r
476 Id_function = FUNCTION,
\r
478 Id_in = RELOP | (IN << 8),
\r
480 Id_null = PRIMARY | (NULL << 8),
\r
481 Id_return = RETURN,
\r
482 Id_switch = SWITCH,
\r
483 Id_this = PRIMARY | (THIS << 8),
\r
484 Id_true = PRIMARY | (TRUE << 8),
\r
485 Id_typeof = UNARYOP | (TYPEOF << 8),
\r
487 Id_void = UNARYOP | (VOID << 8),
\r
491 // the following are #ifdef RESERVE_JAVA_KEYWORDS in jsscan.c
\r
492 Id_abstract = RESERVED,
\r
493 Id_boolean = RESERVED,
\r
494 Id_byte = RESERVED,
\r
496 Id_char = RESERVED,
\r
497 Id_class = RESERVED,
\r
498 Id_const = RESERVED,
\r
499 Id_debugger = RESERVED,
\r
500 Id_double = RESERVED,
\r
501 Id_enum = RESERVED,
\r
502 Id_extends = RESERVED,
\r
503 Id_final = RESERVED,
\r
504 Id_finally = FINALLY,
\r
505 Id_float = RESERVED,
\r
506 Id_goto = RESERVED,
\r
507 Id_implements = RESERVED,
\r
508 Id_import = IMPORT,
\r
509 Id_instanceof = RELOP | (INSTANCEOF << 8),
\r
511 Id_interface = RESERVED,
\r
512 Id_long = RESERVED,
\r
513 Id_native = RESERVED,
\r
514 Id_package = RESERVED,
\r
515 Id_private = RESERVED,
\r
516 Id_protected = RESERVED,
\r
517 Id_public = RESERVED,
\r
518 Id_assert = ASSERT,
\r
519 Id_short = RESERVED,
\r
520 Id_static = RESERVED,
\r
521 Id_super = RESERVED,
\r
522 Id_synchronized = RESERVED,
\r
524 Id_throws = RESERVED,
\r
525 Id_transient = RESERVED,
\r
527 Id_volatile = RESERVED;
\r
531 // #generated# Last update: 2001-06-01 17:45:01 CEST
\r
532 L0: { id = 0; String X = null; int c;
\r
533 L: switch (s.length()) {
\r
534 case 2: c=s.charAt(1);
\r
535 if (c=='f') { if (s.charAt(0)=='i') {id=Id_if; break L0;} }
\r
536 else if (c=='n') { if (s.charAt(0)=='i') {id=Id_in; break L0;} }
\r
537 else if (c=='o') { if (s.charAt(0)=='d') {id=Id_do; break L0;} }
\r
539 case 3: switch (s.charAt(0)) {
\r
540 case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') {id=Id_for; break L0;} break L;
\r
541 case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') {id=Id_int; break L0;} break L;
\r
542 case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') {id=Id_new; break L0;} break L;
\r
543 case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') {id=Id_try; break L0;} break L;
\r
544 case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') {id=Id_var; break L0;} break L;
\r
546 case 4: switch (s.charAt(0)) {
\r
547 case 'b': X="byte";id=Id_byte; break L;
\r
548 case 'c': c=s.charAt(3);
\r
549 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='a') {id=Id_case; break L0;} }
\r
550 else if (c=='r') { if (s.charAt(2)=='a' && s.charAt(1)=='h') {id=Id_char; break L0;} }
\r
552 case 'e': c=s.charAt(3);
\r
553 if (c=='e') { if (s.charAt(2)=='s' && s.charAt(1)=='l') {id=Id_else; break L0;} }
\r
554 else if (c=='m') { if (s.charAt(2)=='u' && s.charAt(1)=='n') {id=Id_enum; break L0;} }
\r
556 case 'g': X="goto";id=Id_goto; break L;
\r
557 case 'l': X="long";id=Id_long; break L;
\r
558 case 'n': X="null";id=Id_null; break L;
\r
559 case 't': c=s.charAt(3);
\r
560 if (c=='e') { if (s.charAt(2)=='u' && s.charAt(1)=='r') {id=Id_true; break L0;} }
\r
561 else if (c=='s') { if (s.charAt(2)=='i' && s.charAt(1)=='h') {id=Id_this; break L0;} }
\r
563 case 'v': X="void";id=Id_void; break L;
\r
564 case 'w': X="with";id=Id_with; break L;
\r
566 case 5: switch (s.charAt(2)) {
\r
567 case 'a': X="class";id=Id_class; break L;
\r
568 case 'e': X="break";id=Id_break; break L;
\r
569 case 'i': X="while";id=Id_while; break L;
\r
570 case 'l': X="false";id=Id_false; break L;
\r
571 case 'n': c=s.charAt(0);
\r
572 if (c=='c') { X="const";id=Id_const; }
\r
573 else if (c=='f') { X="final";id=Id_final; }
\r
575 case 'o': c=s.charAt(0);
\r
576 if (c=='f') { X="float";id=Id_float; }
\r
577 else if (c=='s') { X="short";id=Id_short; }
\r
579 case 'p': X="super";id=Id_super; break L;
\r
580 case 'r': X="throw";id=Id_throw; break L;
\r
581 case 't': X="catch";id=Id_catch; break L;
\r
583 case 6: switch (s.charAt(1)) {
\r
584 case 'a': X="native";id=Id_native; break L;
\r
585 case 'e': c=s.charAt(0);
\r
586 if (c=='d') { X="delete";id=Id_delete; }
\r
587 else if (c=='r') { X="return";id=Id_return; }
\r
589 case 'h': X="throws";id=Id_throws; break L;
\r
590 case 'm': X="import";id=Id_import; break L;
\r
591 case 'o': X="double";id=Id_double; break L;
\r
593 // commented out by Adam Megacz for XWT
\r
595 case 't': X="static";id=Id_static; break L;
\r
598 case 's': X="assert";id=Id_assert; break L;
\r
599 case 'u': X="public";id=Id_public; break L;
\r
600 case 'w': X="switch";id=Id_switch; break L;
\r
601 case 'x': X="export";id=Id_export; break L;
\r
602 case 'y': X="typeof";id=Id_typeof; break L;
\r
604 case 7: switch (s.charAt(1)) {
\r
605 case 'a': X="package";id=Id_package; break L;
\r
606 case 'e': X="default";id=Id_default; break L;
\r
607 case 'i': X="finally";id=Id_finally; break L;
\r
608 case 'o': X="boolean";id=Id_boolean; break L;
\r
609 case 'r': X="private";id=Id_private; break L;
\r
610 case 'x': X="extends";id=Id_extends; break L;
\r
612 case 8: switch (s.charAt(0)) {
\r
613 case 'a': X="abstract";id=Id_abstract; break L;
\r
614 case 'c': X="continue";id=Id_continue; break L;
\r
615 case 'd': X="debugger";id=Id_debugger; break L;
\r
616 case 'f': X="function";id=Id_function; break L;
\r
617 case 'v': X="volatile";id=Id_volatile; break L;
\r
619 case 9: c=s.charAt(0);
\r
620 if (c=='i') { X="interface";id=Id_interface; }
\r
621 else if (c=='p') { X="protected";id=Id_protected; }
\r
622 else if (c=='t') { X="transient";id=Id_transient; }
\r
624 case 10: c=s.charAt(1);
\r
625 if (c=='m') { X="implements";id=Id_implements; }
\r
626 else if (c=='n') { X="instanceof";id=Id_instanceof; }
\r
628 case 12: X="synchronized";id=Id_synchronized; break L;
\r
630 if (X!=null && X!=s && !X.equals(s)) id = 0;
\r
631 if (id == Id_const) Context.reportError("The 'const' keyword is not allowed in XWT scripts",
\r
632 getSourceName(), getLineno(), getLine(), getOffset());
\r
633 if (id == Id_instanceof) Context.reportError("The 'instanceof' keyword is not allowed in XWT scripts",
\r
634 getSourceName(), getLineno(), getLine(), getOffset());
\r
635 if (id == Id_delete) Context.reportError("The 'delete' keyword is not allowed in XWT scripts",
\r
636 getSourceName(), getLineno(), getLine(), getOffset());
\r
639 if (id == Id_new) Context.reportError("The 'new' keyword is not allowed in XWT scripts",
\r
640 getSourceName(), getLineno(), getLine(), getOffset());
\r
642 // #/string_id_map#
\r
643 if (id == 0) { return EOF; }
\r
648 public TokenStream(Reader in, Scriptable scope,
\r
649 String sourceName, int lineno)
\r
651 this.in = new LineBuffer(in, lineno);
\r
652 this.scope = scope;
\r
653 this.pushbackToken = EOF;
\r
654 this.sourceName = sourceName;
\r
658 public Scriptable getScope() {
\r
662 /* return and pop the token from the stream if it matches...
\r
663 * otherwise return null
\r
665 public boolean matchToken(int toMatch) throws IOException {
\r
666 int token = getToken();
\r
667 if (token == toMatch)
\r
670 // didn't match, push back token
\r
672 this.pushbackToken = token;
\r
676 public void clearPushback() {
\r
677 this.pushbackToken = EOF;
\r
680 public void ungetToken(int tt) {
\r
681 if (this.pushbackToken != EOF && tt != ERROR) {
\r
682 String message = Context.getMessage2("msg.token.replaces.pushback",
\r
683 tokenToString(tt), tokenToString(this.pushbackToken));
\r
684 throw new RuntimeException(message);
\r
686 this.pushbackToken = tt;
\r
690 public int peekToken() throws IOException {
\r
691 int result = getToken();
\r
693 this.pushbackToken = result;
\r
698 public int peekTokenSameLine() throws IOException {
\r
701 flags |= TSF_NEWLINES; // SCAN_NEWLINES from jsscan.h
\r
702 result = peekToken();
\r
703 flags &= ~TSF_NEWLINES; // HIDE_NEWLINES from jsscan.h
\r
704 if (this.pushbackToken == EOL)
\r
705 this.pushbackToken = EOF;
\r
709 protected static boolean isJSIdentifier(String s) {
\r
711 int length = s.length();
\r
713 if (length == 0 || !Character.isJavaIdentifierStart(s.charAt(0)))
\r
716 for (int i=1; i<length; i++) {
\r
717 char c = s.charAt(i);
\r
718 if (!Character.isJavaIdentifierPart(c))
\r
720 if (! ((i + 5) < length)
\r
721 && (s.charAt(i + 1) == 'u')
\r
722 && 0 <= xDigitToInt(s.charAt(i + 2))
\r
723 && 0 <= xDigitToInt(s.charAt(i + 3))
\r
724 && 0 <= xDigitToInt(s.charAt(i + 4))
\r
725 && 0 <= xDigitToInt(s.charAt(i + 5)))
\r
733 private static boolean isAlpha(int c) {
\r
734 return ((c >= 'a' && c <= 'z')
\r
735 || (c >= 'A' && c <= 'Z'));
\r
738 static boolean isDigit(int c) {
\r
739 return (c >= '0' && c <= '9');
\r
742 static int xDigitToInt(int c) {
\r
743 if ('0' <= c && c <= '9') { return c - '0'; }
\r
744 if ('a' <= c && c <= 'f') { return c - ('a' - 10); }
\r
745 if ('A' <= c && c <= 'F') { return c - ('A' - 10); }
\r
749 /* As defined in ECMA. jsscan.c uses C isspace() (which allows
\r
750 * \v, I think.) note that code in in.read() implicitly accepts
\r
751 * '\r' == \u000D as well.
\r
753 public static boolean isJSSpace(int c) {
\r
754 return (c == '\u0020' || c == '\u0009'
\r
755 || c == '\u000C' || c == '\u000B'
\r
757 || Character.getType((char)c) == Character.SPACE_SEPARATOR);
\r
760 public static boolean isJSLineTerminator(int c) {
\r
761 return (c == '\n' || c == '\r'
\r
762 || c == 0x2028 || c == 0x2029);
\r
765 public int getToken() throws IOException {
\r
769 // Check for pushed-back token
\r
770 if (this.pushbackToken != EOF) {
\r
771 int result = this.pushbackToken;
\r
772 this.pushbackToken = EOF;
\r
776 // Eat whitespace, possibly sensitive to newlines.
\r
780 if ((flags & TSF_NEWLINES) != 0)
\r
782 } while (isJSSpace(c) || c == '\n');
\r
787 // HACK: XWT "and" as "&&" support
788 if (c == 'a' || c == 'A') {
789 if (in.match('n') || in.match('N')) {
790 if (in.match('d') || in.match('D')) return AND;
795 // identifier/keyword/instanceof?
\r
796 // watch out for starting with a <backslash>
\r
797 boolean isUnicodeEscapeStart = false;
\r
801 isUnicodeEscapeStart = true;
\r
804 // always unread the 'u' or whatever, we need
\r
805 // to start the string below at the <backslash>.
\r
808 if (isUnicodeEscapeStart ||
\r
809 Character.isJavaIdentifierStart((char)c)) {
\r
812 boolean containsEscape = isUnicodeEscapeStart;
\r
817 containsEscape = (c == 'u');
\r
819 } while (Character.isJavaIdentifierPart((char)c));
\r
824 String str = in.getString();
\r
825 // OPT we shouldn't have to make a string (object!) to
\r
826 // check if it's a keyword.
\r
828 // strictly speaking we should probably push-back
\r
829 // all the bad characters if the <backslash>uXXXX
\r
830 // sequence is malformed. But since there isn't a
\r
831 // correct context(is there?) for a bad Unicode
\r
832 // escape sequence after an identifier, we can report
\r
834 if (containsEscape) {
\r
835 char ca[] = str.toCharArray();
\r
836 int L = str.length();
\r
837 int destination = 0;
\r
838 for (int i = 0; i != L;) {
\r
841 if (c == '\\' && i != L && ca[i] == 'u') {
\r
842 boolean goodEscape = false;
\r
844 int val = xDigitToInt(ca[i + 1]);
\r
846 val = (val << 4) | xDigitToInt(ca[i + 2]);
\r
848 val = (val << 4) | xDigitToInt(ca[i + 3]);
\r
850 val = (val << 4) | xDigitToInt(ca[i + 4]);
\r
861 reportSyntaxError("msg.invalid.escape", null);
\r
865 ca[destination] = (char)c;
\r
868 str = new String(ca, 0, destination);
\r
871 // Return the corresponding token if it's a keyword
\r
872 if ((result = stringToKeyword(str)) != EOF) {
\r
881 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) {
\r
885 double dval = ScriptRuntime.NaN;
\r
887 boolean isInteger = true;
\r
891 if (c == 'x' || c == 'X') {
\r
894 // restart the string, losing leading 0x
\r
896 } else if (isDigit(c)) {
\r
901 while (0 <= xDigitToInt(c)) {
\r
906 * We permit 08 and 09 as decimal numbers, which
\r
907 * makes our behavior a superset of the ECMA
\r
908 * numeric grammar. We might not always be so
\r
909 * permissive, so we warn about it.
\r
911 if (base == 8 && c >= '8') {
\r
912 Object[] errArgs = { c == '8' ? "8" : "9" };
\r
913 Context.reportWarning(
\r
914 Context.getMessage("msg.bad.octal.literal",
\r
917 in.getLineno(), getLine(), getOffset());
\r
924 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
\r
929 } while (isDigit(c));
\r
932 if (c == 'e' || c == 'E') {
\r
934 if (c == '+' || c == '-') {
\r
938 in.getString(); // throw away string in progress
\r
939 reportSyntaxError("msg.missing.exponent", null);
\r
944 } while (isDigit(c));
\r
948 String numString = in.getString();
\r
950 if (base == 10 && !isInteger) {
\r
952 // Use Java conversion to number from string...
\r
953 dval = (Double.valueOf(numString)).doubleValue();
\r
955 catch (NumberFormatException ex) {
\r
956 Object[] errArgs = { ex.getMessage() };
\r
957 reportSyntaxError("msg.caught.nfe", errArgs);
\r
961 dval = ScriptRuntime.stringToNumber(numString, 0, base);
\r
962 longval = (long) dval;
\r
964 // is it an integral fits-in-a-long value?
\r
965 if (longval != dval)
\r
970 /* Can't handle floats right now, because postfix INC/DEC
\r
971 generate Doubles, but I would generate a Float through this
\r
972 path, and it causes a stack mismatch. FIXME (MS)
\r
973 if (Float.MIN_VALUE <= dval && dval <= Float.MAX_VALUE)
\r
974 this.number = new Xloat((float) dval);
\r
977 this.number = new Double(dval);
\r
979 // We generate the smallest possible type here
\r
980 if (Byte.MIN_VALUE <= longval && longval <= Byte.MAX_VALUE)
\r
981 this.number = new Byte((byte)longval);
\r
982 else if (Short.MIN_VALUE <= longval &&
\r
983 longval <= Short.MAX_VALUE)
\r
984 this.number = new Short((short)longval);
\r
985 else if (Integer.MIN_VALUE <= longval &&
\r
986 longval <= Integer.MAX_VALUE)
\r
987 this.number = new Integer((int)longval);
\r
989 // May lose some precision here, but that's the
\r
990 // appropriate semantics.
\r
991 this.number = new Double(longval);
\r
998 if (c == '"' || c == '\'') {
\r
999 // We attempt to accumulate a string the fast way, by
\r
1000 // building it directly out of the reader. But if there
\r
1001 // are any escaped characters in the string, we revert to
\r
1002 // building it out of a StringBuffer.
\r
1004 StringBuffer stringBuf = null;
\r
1006 int quoteChar = c;
\r
1010 in.startString(); // start after the first "
\r
1011 while(c != quoteChar) {
\r
1012 if (c == '\n' || c == EOF_CHAR) {
\r
1014 in.getString(); // throw away the string in progress
\r
1015 reportSyntaxError("msg.unterminated.string.lit", null);
\r
1020 // We've hit an escaped character; revert to the
\r
1021 // slow method of building a string.
\r
1022 if (stringBuf == null) {
\r
1023 // Don't include the backslash
\r
1025 stringBuf = new StringBuffer(in.getString());
\r
1029 switch (c = in.read()) {
\r
1030 case 'b': c = '\b'; break;
\r
1031 case 'f': c = '\f'; break;
\r
1032 case 'n': c = '\n'; break;
\r
1033 case 'r': c = '\r'; break;
\r
1034 case 't': c = '\t'; break;
\r
1035 case 'v': c = '\u000B'; break;
\r
1036 // \v a late addition to the ECMA spec.
\r
1037 // '\v' doesn't seem to be valid Java.
\r
1040 if (isDigit(c) && c < '8') {
\r
1043 if (isDigit(c) && c < '8') {
\r
1044 val = 8 * val + c - '0';
\r
1046 if (isDigit(c) && c < '8') {
\r
1047 val = 8 * val + c - '0';
\r
1053 reportSyntaxError("msg.oct.esc.too.large", null);
\r
1057 } else if (c == 'u') {
\r
1059 * Get 4 hex digits; if the u escape is not
\r
1060 * followed by 4 hex digits, use 'u' + the literal
\r
1061 * character sequence that follows. Do some manual
\r
1062 * match (OK because we're in a string) to avoid
\r
1063 * multi-char match on the underlying stream.
\r
1065 int c1 = in.read();
\r
1066 c = xDigitToInt(c1);
\r
1071 int c2 = in.read();
\r
1072 c = (c << 4) | xDigitToInt(c2);
\r
1075 stringBuf.append('u');
\r
1078 int c3 = in.read();
\r
1079 c = (c << 4) | xDigitToInt(c3);
\r
1082 stringBuf.append('u');
\r
1083 stringBuf.append((char)c1);
\r
1086 int c4 = in.read();
\r
1087 c = (c << 4) | xDigitToInt(c4);
\r
1090 stringBuf.append('u');
\r
1091 stringBuf.append((char)c1);
\r
1092 stringBuf.append((char)c2);
\r
1095 // got 4 hex digits! Woo Hoo!
\r
1100 } else if (c == 'x') {
\r
1101 /* Get 2 hex digits, defaulting to 'x' + literal
\r
1102 * sequence, as above.
\r
1104 int c1 = in.read();
\r
1105 c = xDigitToInt(c1);
\r
1110 int c2 = in.read();
\r
1111 c = (c << 4) | xDigitToInt(c2);
\r
1114 stringBuf.append('x');
\r
1117 // got 2 hex digits
\r
1124 if (stringBuf != null)
\r
1125 stringBuf.append((char) c);
\r
1129 if (stringBuf != null)
\r
1130 this.string = stringBuf.toString();
\r
1132 in.unread(); // miss the trailing "
\r
1133 this.string = in.getString();
\r
1141 case '\n': return EOL;
\r
1142 case ';': return SEMI;
\r
1143 case '[': return LB;
\r
1144 case ']': return RB;
\r
1145 case '{': return LC;
\r
1146 case '}': return RC;
\r
1147 case '(': return LP;
\r
1148 case ')': return RP;
\r
1149 case ',': return COMMA;
\r
1150 case '?': return HOOK;
\r
1151 case ':': return COLON;
\r
1152 case '.': return DOT;
\r
1155 if (in.match('|')) {
\r
1157 } else if (in.match('=')) {
\r
1165 if (in.match('=')) {
\r
1173 if (in.match('&')) {
\r
1175 } else if (in.match('=')) {
\r
1183 if (in.match('=')) {
\r
1184 if (in.match('='))
\r
1195 if (in.match('=')) {
\r
1196 if (in.match('='))
\r
1207 /* NB:treat HTML begin-comment as comment-till-eol */
\r
1208 if (in.match('!')) {
\r
1209 if (in.match('-')) {
\r
1210 if (in.match('-')) {
\r
1211 while ((c = in.read()) != EOF_CHAR && c != '\n')
\r
1212 /* skip to end of line */;
\r
1214 return getToken(); // in place of 'goto retry'
\r
1220 if (in.match('<')) {
\r
1221 if (in.match('=')) {
\r
1229 if (in.match('=')) {
\r
1239 if (in.match('>')) {
\r
1240 if (in.match('>')) {
\r
1241 if (in.match('=')) {
\r
1249 if (in.match('=')) {
\r
1258 if (in.match('=')) {
\r
1268 if (in.match('=')) {
\r
1276 // is it a // comment?
\r
1277 if (in.match('/')) {
\r
1278 while ((c = in.read()) != EOF_CHAR && c != '\n')
\r
1279 /* skip to end of line */;
\r
1281 return getToken();
\r
1283 if (in.match('*')) {
\r
1284 while ((c = in.read()) != -1
\r
1285 && !(c == '*' && in.match('/'))) {
\r
1287 } else if (c == '/' && in.match('*')) {
\r
1288 if (in.match('/'))
\r
1289 return getToken();
\r
1290 reportSyntaxError("msg.nested.comment", null);
\r
1294 if (c == EOF_CHAR) {
\r
1295 reportSyntaxError("msg.unterminated.comment", null);
\r
1298 return getToken(); // `goto retry'
\r
1301 // is it a regexp?
\r
1302 if ((flags & TSF_REGEXP) != 0) {
\r
1303 // We don't try to use the in.startString/in.getString
\r
1304 // approach, because escaped characters (which break it)
\r
1305 // seem likely to be common.
\r
1306 StringBuffer re = new StringBuffer();
\r
1307 while ((c = in.read()) != '/') {
\r
1308 if (c == '\n' || c == EOF_CHAR) {
\r
1310 reportSyntaxError("msg.unterminated.re.lit", null);
\r
1314 re.append((char) c);
\r
1318 re.append((char) c);
\r
1321 StringBuffer flagsBuf = new StringBuffer();
\r
1323 if (in.match('g'))
\r
1324 flagsBuf.append('g');
\r
1325 else if (in.match('i'))
\r
1326 flagsBuf.append('i');
\r
1327 else if (in.match('m'))
\r
1328 flagsBuf.append('m');
\r
1333 if (isAlpha(in.peek())) {
\r
1334 reportSyntaxError("msg.invalid.re.flag", null);
\r
1338 this.string = re.toString();
\r
1339 this.regExpFlags = flagsBuf.toString();
\r
1344 if (in.match('=')) {
\r
1353 if (in.match('=')) {
\r
1365 if (in.match('=')) {
\r
1373 } else if (in.match((char) c)) {
\r
1379 } else if (c == '-') {
\r
1386 reportSyntaxError("msg.illegal.character", null);
\r
1391 public void reportSyntaxError(String messageProperty, Object[] args) {
\r
1392 String message = Context.getMessage(messageProperty, args);
\r
1393 if (scope != null) {
\r
1394 // We're probably in an eval. Need to throw an exception.
\r
1395 throw NativeGlobal.constructError(
\r
1396 Context.getContext(), "SyntaxError",
\r
1397 message, scope, getSourceName(),
\r
1398 getLineno(), getOffset(), getLine());
\r
1400 Context.reportError(message, getSourceName(),
\r
1401 getLineno(), getLine(), getOffset());
\r
1405 public String getSourceName() { return sourceName; }
\r
1406 public int getLineno() { return in.getLineno(); }
\r
1407 public int getOp() { return op; }
\r
1408 public String getString() { return string; }
\r
1409 public Number getNumber() { return number; }
\r
1410 public String getLine() { return in.getLine(); }
\r
1411 public int getOffset() { return in.getOffset(); }
\r
1412 public int getTokenno() { return tokenno; }
\r
1413 public boolean eof() { return in.eof(); }
\r
1415 // instance variables
\r
1416 private LineBuffer in;
\r
1419 /* for TSF_REGEXP, etc.
\r
1420 * should this be manipulated by gettor/settor functions?
\r
1421 * should it be passed to getToken();
\r
1424 public String regExpFlags;
\r
1426 private String sourceName;
\r
1427 private String line;
\r
1428 private Scriptable scope;
\r
1429 private int pushbackToken;
\r
1430 private int tokenno;
\r
1434 // Set this to an inital non-null value so that the Parser has
\r
1435 // something to retrieve even if an error has occured and no
\r
1436 // string is found. Fosters one class of error, but saves lots of
\r
1438 private String string = "";
\r
1439 private Number number;
\r