1 // Derived from org.mozilla.javascript.TokenStream [NPL]
4 * The contents of this file are subject to the Netscape Public
5 * License Version 1.1 (the "License"); you may not use this file
6 * except in compliance with the License. You may obtain a copy of
7 * the License at http://www.mozilla.org/NPL/
9 * Software distributed under the License is distributed on an "AS
10 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11 * implied. See the License for the specific language governing
12 * rights and limitations under the License.
14 * The Initial Developer of the Original Code is Netscape
15 * Communications Corporation.
17 * Contributor(s): Roger Lawrence, Mike McCabe
23 /** Lexes a stream of characters into a stream of Tokens */
24 class Lexer implements Tokens {
27 public static void main(String[] s) throws IOException {
28 Lexer l = new Lexer(new InputStreamReader(System.in), "stdin", 0);
30 while((tok = l.getToken()) != -1) System.out.println(codeToString[tok]);
33 /** the token that was just parsed */
36 /** the most recently parsed token, <i>regardless of pushbacks</i> */
37 protected int mostRecentlyReadToken;
39 /** if the token just parsed was a NUMBER, this is the numeric value */
40 protected Number number = null;
42 /** if the token just parsed was a NAME or STRING, this is the string value */
43 protected String string = null;
45 /** the line number of the most recently <i>lexed</i> token */
46 protected int line = 0;
48 /** the line number of the most recently <i>parsed</i> token */
49 protected int parserLine = 0;
51 /** the column number of the current token */
52 protected int col = 0;
54 /** the name of the source code file being lexed */
55 protected String sourceName;
57 private SmartReader in;
58 public Lexer(Reader r, String sourceName, int line) throws IOException {
59 this.sourceName = sourceName;
61 this.parserLine = line;
62 in = new SmartReader(r);
66 // Predicates ///////////////////////////////////////////////////////////////////////
68 private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
69 private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
70 private static int xDigitToInt(int c) {
71 if ('0' <= c && c <= '9') return c - '0';
72 else if ('a' <= c && c <= 'f') return c - ('a' - 10);
73 else if ('A' <= c && c <= 'F') return c - ('A' - 10);
78 // Token Subtype Handlers /////////////////////////////////////////////////////////
80 private int getKeyword(String name) throws IOException {
81 final String ccSwitch0 = (String)(name); SUCCESS:do { switch(ccSwitch0.length()) {
82 case 2: { switch(ccSwitch0.charAt(0)) { case 'd': if ("do".equals(ccSwitch0)) { if (true) do { return DO;
83 } while(false); break SUCCESS; } break; case 'g': if ("gt".equals(ccSwitch0)) { if (true) do { return GT;
84 } while(false); break SUCCESS; } break; case 'i': { switch(ccSwitch0.charAt(1)) { case 'f': if ("if".equals(ccSwitch0)) { if (true) do { return IF;
85 } while(false); break SUCCESS; } break; case 'n': if ("in".equals(ccSwitch0)) { if (true) do { return IN;
86 } while(false); break SUCCESS; } break; } break; } case 'l': if ("lt".equals(ccSwitch0)) { if (true) do { return LT;
87 } while(false); break SUCCESS; } break; case 'o': if ("or".equals(ccSwitch0)) { if (true) do { return OR;
88 } while(false); break SUCCESS; } break; }; break; } case 3: { switch(ccSwitch0.charAt(0)) { case 'a': if ("and".equals(ccSwitch0)) { if (true) do { return AND;
89 } while(false); break SUCCESS; } break; case 'f': if ("for".equals(ccSwitch0)) { if (true) do { return FOR;
90 } while(false); break SUCCESS; } break; case 'i': if ("int".equals(ccSwitch0)) { if (true) do { return RESERVED;
91 } while(false); break SUCCESS; } break; case 'n': if ("new".equals(ccSwitch0)) { if (true) do { return RESERVED;
92 } while(false); break SUCCESS; } break; case 't': if ("try".equals(ccSwitch0)) { if (true) do { return TRY;
93 } while(false); break SUCCESS; } break; case 'v': if ("var".equals(ccSwitch0)) { if (true) do { return VAR;
94 } while(false); break SUCCESS; } break; }; break; } case 4: { switch(ccSwitch0.charAt(0)) { case 'b': if ("byte".equals(ccSwitch0)) { if (true) do { return RESERVED;
95 } while(false); break SUCCESS; } break; case 'c': { switch(ccSwitch0.charAt(1)) { case 'a': if ("case".equals(ccSwitch0)) { if (true) do { return CASE;
96 } while(false); break SUCCESS; } break; case 'h': if ("char".equals(ccSwitch0)) { if (true) do { return RESERVED;
97 } while(false); break SUCCESS; } break; } break; } case 'e': { switch(ccSwitch0.charAt(1)) { case 'l': if ("else".equals(ccSwitch0)) { if (true) do { return ELSE;
98 } while(false); break SUCCESS; } break; case 'n': if ("enum".equals(ccSwitch0)) { if (true) do { return RESERVED;
99 } while(false); break SUCCESS; } break; } break; } case 'g': if ("goto".equals(ccSwitch0)) { if (true) do { return RESERVED;
100 } while(false); break SUCCESS; } break; case 'l': if ("long".equals(ccSwitch0)) { if (true) do { return RESERVED;
101 } while(false); break SUCCESS; } break; case 'n': if ("null".equals(ccSwitch0)) { if (true) do { return NULL;
102 } while(false); break SUCCESS; } break; case 't': if ("true".equals(ccSwitch0)) { if (true) do { return TRUE;
103 } while(false); break SUCCESS; } break; case 'v': if ("void".equals(ccSwitch0)) { if (true) do { return RESERVED;
104 } while(false); break SUCCESS; } break; case 'w': if ("with".equals(ccSwitch0)) { if (true) do { return RESERVED;
105 } while(false); break SUCCESS; } break; }; break; } case 5: { switch(ccSwitch0.charAt(0)) { case 'b': if ("break".equals(ccSwitch0)) { if (true) do { return BREAK;
106 } while(false); break SUCCESS; } break; case 'c': { switch(ccSwitch0.charAt(1)) { case 'a': if ("catch".equals(ccSwitch0)) { if (true) do { return CATCH;
107 } while(false); break SUCCESS; } break; case 'l': if ("class".equals(ccSwitch0)) { if (true) do { return RESERVED;
108 } while(false); break SUCCESS; } break; case 'o': if ("const".equals(ccSwitch0)) { if (true) do { return RESERVED;
109 } while(false); break SUCCESS; } break; } break; } case 'f': { switch(ccSwitch0.charAt(1)) { case 'a': if ("false".equals(ccSwitch0)) { if (true) do { return FALSE;
110 } while(false); break SUCCESS; } break; case 'i': if ("final".equals(ccSwitch0)) { if (true) do { return RESERVED;
111 } while(false); break SUCCESS; } break; } break; } case 's': if ("super".equals(ccSwitch0)) { if (true) do { return RESERVED;
112 } while(false); break SUCCESS; } break; case 't': if ("throw".equals(ccSwitch0)) { if (true) do { return THROW;
113 } while(false); break SUCCESS; } break; case 'w': if ("while".equals(ccSwitch0)) { if (true) do { return WHILE;
114 } while(false); break SUCCESS; } break; }; break; } case 6: { switch(ccSwitch0.charAt(0)) { case 'a': if ("assert".equals(ccSwitch0)) { if (true) do { return ASSERT;
115 } while(false); break SUCCESS; } break; case 'd': { switch(ccSwitch0.charAt(1)) { case 'e': if ("delete".equals(ccSwitch0)) { if (true) do { return RESERVED;
116 } while(false); break SUCCESS; } break; case 'o': if ("double".equals(ccSwitch0)) { if (true) do { return RESERVED;
117 } while(false); break SUCCESS; } break; } break; } case 'p': if ("public".equals(ccSwitch0)) { if (true) do { return RESERVED;
118 } while(false); break SUCCESS; } break; case 'r': if ("return".equals(ccSwitch0)) { if (true) do { return RETURN;
119 } while(false); break SUCCESS; } break; case 's': if ("switch".equals(ccSwitch0)) { if (true) do { return SWITCH;
120 } while(false); break SUCCESS; } break; case 't': { switch(ccSwitch0.charAt(1)) { case 'h': if ("throws".equals(ccSwitch0)) { if (true) do { return RESERVED;
121 } while(false); break SUCCESS; } break; case 'y': if ("typeof".equals(ccSwitch0)) { if (true) do { return TYPEOF;
122 } while(false); break SUCCESS; } break; } break; } }; break; } case 7: { switch(ccSwitch0.charAt(0)) { case 'b': if ("boolean".equals(ccSwitch0)) { if (true) do { return RESERVED;
123 } while(false); break SUCCESS; } break; case 'd': if ("default".equals(ccSwitch0)) { if (true) do { return DEFAULT;
124 } while(false); break SUCCESS; } break; case 'e': if ("extends".equals(ccSwitch0)) { if (true) do { return RESERVED;
125 } while(false); break SUCCESS; } break; case 'f': if ("finally".equals(ccSwitch0)) { if (true) do { return FINALLY;
126 } while(false); break SUCCESS; } break; case 'p': { switch(ccSwitch0.charAt(1)) { case 'a': if ("package".equals(ccSwitch0)) { if (true) do { return RESERVED;
127 } while(false); break SUCCESS; } break; case 'r': if ("private".equals(ccSwitch0)) { if (true) do { return RESERVED;
128 } while(false); break SUCCESS; } break; } break; } }; break; } case 8: { switch(ccSwitch0.charAt(0)) { case 'a': if ("abstract".equals(ccSwitch0)) { if (true) do { return RESERVED;
129 } while(false); break SUCCESS; } break; case 'c': if ("continue".equals(ccSwitch0)) { if (true) do { return CONTINUE;
130 } while(false); break SUCCESS; } break; case 'd': if ("debugger".equals(ccSwitch0)) { if (true) do { return RESERVED;
131 } while(false); break SUCCESS; } break; case 'f': if ("function".equals(ccSwitch0)) { if (true) do { return FUNCTION;
132 } while(false); break SUCCESS; } break; case 'v': if ("volatile".equals(ccSwitch0)) { if (true) do { return RESERVED;
133 } while(false); break SUCCESS; } break; }; break; } case 9: { switch(ccSwitch0.charAt(0)) { case 'i': if ("interface".equals(ccSwitch0)) { if (true) do { return RESERVED;
134 } while(false); break SUCCESS; } break; case 'p': if ("protected".equals(ccSwitch0)) { if (true) do { return RESERVED;
135 } while(false); break SUCCESS; } break; case 't': if ("transient".equals(ccSwitch0)) { if (true) do { return RESERVED;
136 } while(false); break SUCCESS; } break; }; break; } case 10: { switch(ccSwitch0.charAt(0)) { case 'i': { switch(ccSwitch0.charAt(1)) { case 'm': if ("implements".equals(ccSwitch0)) { if (true) do { return RESERVED;
137 } while(false); break SUCCESS; } break; case 'n': if ("instanceof".equals(ccSwitch0)) { if (true) do { return RESERVED;
138 } while(false); break SUCCESS; } break; } break; } }; break; } case 12: { switch(ccSwitch0.charAt(0)) { case 's': if ("synchronized".equals(ccSwitch0)) { if (true) do { return RESERVED;
139 } while(false); break SUCCESS; } break; }; break; } } /* switch */ } while(false); /* OUTER */
143 private int getIdentifier(int c) throws IOException {
145 while (Character.isJavaIdentifierPart((char)(c = in.read())));
147 String str = in.getString();
148 int result = getKeyword(str);
149 if (result == RESERVED) throw new LexerException("The reserved word \"" + str + "\" is not permitted in Ibex scripts");
150 if (result != -1) return result;
151 this.string = str.intern();
155 private int getNumber(int c) throws IOException {
158 double dval = Double.NaN;
160 boolean isInteger = true;
162 // figure out what base we're using
164 if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); }
165 else if (isDigit(c)) base = 8;
168 while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read();
169 if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
171 if (c == '.') do { c = in.read(); } while (isDigit(c));
172 if (c == 'e' || c == 'E') {
174 if (c == '+' || c == '-') c = in.read();
175 if (!isDigit(c)) throw new LexerException("float listeral did not have an exponent value");
176 do { c = in.read(); } while (isDigit(c));
181 String numString = in.getString();
182 if (base == 10 && !isInteger) {
183 try { dval = (Double.valueOf(numString)).doubleValue(); }
184 catch (NumberFormatException ex) { throw new LexerException("invalid numeric literal: \"" + numString + "\""); }
187 longval = Long.parseLong(numString, base);
188 dval = (double)longval;
190 dval = Double.parseDouble(numString);
191 longval = (long) dval;
192 if (longval == dval) isInteger = true;
196 if (!isInteger) this.number = JS.N(dval);
197 else this.number = JS.N(longval);
201 private int getString(int c) throws IOException {
202 StringBuffer stringBuf = null;
205 in.startString(); // start after the first "
206 while(c != quoteChar) {
207 if (c == '\n' || c == -1) throw new LexerException("unterminated string literal");
209 if (stringBuf == null) {
210 in.unread(); // Don't include the backslash
211 stringBuf = new StringBuffer(in.getString());
214 switch (c = in.read()) {
215 case 'b': c = '\b'; break;
216 case 'f': c = '\f'; break;
217 case 'n': c = '\n'; break;
218 case 'r': c = '\r'; break;
219 case 't': c = '\t'; break;
220 case 'v': c = '\u000B'; break;
221 case '\\': c = '\\'; break;
224 for(int i=0; i<4; i++) {
226 if (!((ci >= '0' && ci <= '9') || (ci >= 'a' && ci <= 'f') || (ci >= 'A' && ci <= 'F')))
227 throw new LexerException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence");
228 v = (v << 8) | Integer.parseInt(ci + "", 16);
234 // just use the character that was escaped
238 if (stringBuf != null) stringBuf.append((char) c);
241 if (stringBuf != null) this.string = stringBuf.toString().intern();
243 in.unread(); // miss the trailing "
244 this.string = in.getString().intern();
250 private int _getToken() throws IOException {
252 do { c = in.read(); } while (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\n' );
253 if (c == -1) return -1;
254 if (c == '\\' || Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
255 if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
256 if (c == '"' || c == '\'') return getString(c);
258 case ';': return SEMI;
265 case ',': return COMMA;
266 case '?': return HOOK;
267 case ':': return !in.match(':') ? COLON : in.match('=') ? GRAMMAR : le(":: is not a valid token");
268 case '.': return DOT;
269 case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR);
270 case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR;
271 case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND;
272 case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ;
273 case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE;
274 case '%': return in.match('=') ? ASSIGN_MOD : MOD;
275 case '~': return BITNOT;
276 case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? (in.match('=') ? ADD_TRAP : INC) : ADD;
277 case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? (in.match('=') ? DEL_TRAP : DEC) : SUB;
278 case '*': return in.match('=') ? ASSIGN_MUL : MUL;
279 case '<': return !in.match('<') ? (in.match('=') ? LE : LT) : in.match('=') ? ASSIGN_LSH : LSH;
280 case '>': return !in.match('>') ? (in.match('=') ? GE : GT) :
281 in.match('>') ? (in.match('=') ? ASSIGN_URSH : URSH) : (in.match('=') ? ASSIGN_RSH : RSH);
283 if (in.match('=')) return ASSIGN_DIV;
284 if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
285 if (!in.match('*')) return DIV;
286 while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
287 if (c == '\n' || c != '/' || !in.match('*')) continue;
288 if (in.match('/')) return getToken();
289 throw new LexerException("nested comments are not permitted");
291 if (c == -1) throw new LexerException("unterminated comment");
292 return getToken(); // `goto retry'
293 default: throw new LexerException("illegal character: \'" + ((char)c) + "\'");
297 private int le(String s) throws LexerException { if (true) throw new LexerException(s); return 0; }
299 // SmartReader ////////////////////////////////////////////////////////////////
301 /** a Reader that tracks line numbers and can push back tokens */
302 private class SmartReader {
303 PushbackReader reader = null;
306 public SmartReader(Reader r) { reader = new PushbackReader(r); }
307 public void unread() throws IOException { unread((char)lastread); }
308 public void unread(char c) throws IOException {
310 if(c == '\n') col = -1;
312 if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
314 public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
315 public int peek() throws IOException {
316 int peeked = reader.read();
317 if (peeked != -1) reader.unread((char)peeked);
320 public int read() throws IOException {
321 lastread = reader.read();
322 if (accumulator != null) accumulator.append((char)lastread);
323 if (lastread != '\n' && lastread != '\r') col++;
324 if (lastread == '\n') {
325 // col is -1 if we just unread a newline, this is sort of ugly
326 if (col != -1) parserLine = ++line;
332 // FEATURE: could be much more efficient
333 StringBuffer accumulator = null;
334 public void startString() {
335 accumulator = new StringBuffer();
336 accumulator.append((char)lastread);
338 public String getString() throws IOException {
339 String ret = accumulator.toString().intern();
346 // Token PushBack code ////////////////////////////////////////////////////////////
348 private int pushBackDepth = 0;
349 private int[] pushBackInts = new int[10];
350 private Object[] pushBackObjects = new Object[10];
352 /** push back a token */
353 public final void pushBackToken(int op, Object obj) {
354 if (pushBackDepth >= pushBackInts.length - 1) {
355 int[] newInts = new int[pushBackInts.length * 2];
356 System.arraycopy(pushBackInts, 0, newInts, 0, pushBackInts.length);
357 pushBackInts = newInts;
358 Object[] newObjects = new Object[pushBackObjects.length * 2];
359 System.arraycopy(pushBackObjects, 0, newObjects, 0, pushBackObjects.length);
360 pushBackObjects = newObjects;
362 pushBackInts[pushBackDepth] = op;
363 pushBackObjects[pushBackDepth] = obj;
367 /** push back the most recently read token */
368 public final void pushBackToken() { pushBackToken(op, number != null ? (Object)number : (Object)string); }
370 /** read a token but leave it in the stream */
371 public final int peekToken() throws IOException {
372 int ret = getToken();
378 public final int getToken() throws IOException {
381 if (pushBackDepth == 0) {
382 mostRecentlyReadToken = op;
383 return op = _getToken();
386 op = pushBackInts[pushBackDepth];
387 if (pushBackObjects[pushBackDepth] != null) {
388 number = pushBackObjects[pushBackDepth] instanceof Number ? (Number)pushBackObjects[pushBackDepth] : null;
389 string = pushBackObjects[pushBackDepth] instanceof String ? (String)pushBackObjects[pushBackDepth] : null;
394 class LexerException extends IOException {
395 public LexerException(String s) { super(sourceName + ":" + line + "," + col + ": " + s); }