1 /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
\r
3 * The contents of this file are subject to the Netscape Public
\r
4 * License Version 1.1 (the "License"); you may not use this file
\r
5 * except in compliance with the License. You may obtain a copy of
\r
6 * the License at http://www.mozilla.org/NPL/
\r
8 * Software distributed under the License is distributed on an "AS
\r
9 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express oqr
\r
10 * implied. See the License for the specific language governing
\r
11 * rights and limitations under the License.
\r
13 * The Original Code is Rhino code, released
\r
16 * The Initial Developer of the Original Code is Netscape
\r
17 * Communications Corporation. Portions created by Netscape are
\r
18 * Copyright (C) 1997-1999 Netscape Communications Corporation. All
\r
24 * Alternatively, the contents of this file may be used under the
\r
25 * terms of the GNU Public License (the "GPL"), in which case the
\r
26 * provisions of the GPL are applicable instead of those above.
\r
27 * If you wish to allow use of your version of this file only
\r
28 * under the terms of the GPL and not to allow others to use your
\r
29 * version of this file under the NPL, indicate your decision by
\r
30 * deleting the provisions above and replace them with the notice
\r
31 * and other provisions required by the GPL. If you do not delete
\r
32 * the provisions above, a recipient may use your version of this
\r
33 * file under either the NPL or the GPL.
\r
36 package org.mozilla.javascript;
\r
38 import java.io.Reader;
\r
39 import java.io.IOException;
\r
42 * An input buffer that combines fast character-based access with
\r
43 * (slower) support for retrieving the text of the current line. It
\r
44 * also supports building strings directly out of the internal buffer
\r
45 * to support fast scanning with minimal object creation.
\r
47 * Note that it is customized in several ways to support the
\r
48 * TokenStream class, and should not be considered general.
\r
50 * Credits to Kipp Hickman and John Bandhauer.
\r
52 * @author Mike McCabe
\r
54 final class LineBuffer {
\r
56 * for smooth operation of getLine(), this should be greater than
\r
57 * the length of any expected line. Currently, 256 is 3% slower
\r
58 * than 4096 for large compiles, but seems safer given evaluateString.
\r
59 * Strings for the scanner are are built with StringBuffers
\r
60 * instead of directly out of the buffer whenever a string crosses
\r
61 * a buffer boundary, so small buffer sizes will mean that more
\r
62 * objects are created.
\r
64 static final int BUFLEN = 256;
\r
66 LineBuffer(Reader in, int lineno) {
\r
68 this.lineno = lineno;
\r
71 int read() throws IOException {
\r
73 if (end == offset && !fill())
\r
76 // Do only a bitmask + branch per character, at the cost of
\r
77 // three branches per low-bits-only (or 2028/9) character.
\r
78 if ((buffer[offset] & '\udfd0') == 0) {
\r
79 if (buffer[offset] == '\r') {
\r
80 // if the next character is a newline, skip past it.
\r
81 if ((offset + 1) < end) {
\r
82 if (buffer[offset + 1] == '\n')
\r
85 // set a flag for fill(), in case the first char of the
\r
86 // next fill is a newline.
\r
91 if ((buffer[offset] != '\n')
\r
92 && (buffer[offset] != '\u2028')
\r
93 && (buffer[offset] != '\u2029'))
\r
95 if (Character.getType(buffer[offset])
\r
96 == Character.FORMAT) {
\r
97 hadCFSinceStringStart = true;
\r
101 return (int) buffer[offset++];
\r
104 prevStart = lineStart;
\r
105 lineStart = offset;
\r
109 if ((buffer[offset] >= 128)
\r
110 && (Character.getType(buffer[offset]) == Character.FORMAT)) {
\r
111 hadCFSinceStringStart = true;
\r
118 return (int) buffer[offset++];
\r
123 // We can get here when we're asked to unread() an
\r
124 // implicit EOF_CHAR.
\r
126 // This would also be wrong behavior in the general case,
\r
127 // because a peek() could map a buffer.length offset to 0
\r
128 // in the process of a fill(), and leave it there. But
\r
129 // the scanner never calls peek() or a failed match()
\r
130 // followed by unread()... this would violate 1-character
\r
131 // lookahead. So we're OK.
\r
134 if ((buffer[offset] & '\ufff0') == 0
\r
135 && (buffer[offset] == '\r' || buffer[offset] == '\n')) {
\r
136 // back off from the line start we presumably just registered...
\r
137 lineStart = prevStart;
\r
142 int peek() throws IOException {
\r
143 if (end == offset && !fill())
\r
146 if (buffer[offset] == '\r')
\r
149 return buffer[offset];
\r
152 boolean match(char c) throws IOException {
\r
153 if (end == offset && !fill())
\r
156 // This'd be a place where we'd need to map '\r' to '\n' and
\r
157 // do other updates, but TokenStream never looks ahead for
\r
158 // '\n', so we don't bother.
\r
159 if (buffer[offset] == c) {
\r
166 // Reconstruct a source line from the buffers. This can be slow...
\r
168 StringBuffer result = new StringBuffer();
\r
170 int start = lineStart;
\r
171 if (start >= offset) {
\r
172 // the line begins somewhere in the other buffer; get that first.
\r
173 if (otherStart < otherEnd)
\r
174 // if a line ending was seen in the other buffer... otherwise
\r
175 // just ignore this strange case.
\r
176 result.append(otherBuffer, otherStart,
\r
177 otherEnd - otherStart);
\r
181 // get the part of the line in the current buffer.
\r
182 result.append(buffer, start, offset - start);
\r
184 // Get the remainder of the line.
\r
187 if (i == buffer.length) {
\r
188 // we're out of buffer, let's just expand it. We do
\r
189 // this instead of reading into a StringBuffer to
\r
190 // preserve the stream for later reads.
\r
191 char[] newBuffer = new char[buffer.length * 2];
\r
192 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
\r
193 buffer = newBuffer;
\r
196 charsRead = in.read(buffer, end, buffer.length - end);
\r
197 } catch (IOException ioe) {
\r
198 // ignore it, we're already displaying an error...
\r
204 if (buffer[i] == '\r' || buffer[i] == '\n')
\r
209 result.append(buffer, offset, i - offset);
\r
210 return result.toString();
\r
213 // Get the offset of the current character, relative to
\r
214 // the line that getLine() returns.
\r
216 if (lineStart >= offset)
\r
217 // The line begins somewhere in the other buffer.
\r
218 return offset + (otherEnd - otherStart);
\r
220 return offset - lineStart;
\r
223 // Set a mark to indicate that the reader should begin
\r
224 // accumulating characters for getString(). The string begins
\r
225 // with the last character read.
\r
226 void startString() {
\r
228 // We can get here if startString is called after a peek()
\r
229 // or failed match() with offset past the end of the
\r
232 // We're at the beginning of the buffer, and the previous character
\r
233 // (which we want to include) is at the end of the last one, so
\r
234 // we just go to StringBuffer mode.
\r
235 stringSoFar = new StringBuffer();
\r
237 stringSoFar.append(otherBuffer, otherEnd - 1, 1);
\r
239 stringStart = -1; // Set sentinel value.
\r
240 hadCFSinceStringStart = ((otherBuffer[otherEnd - 1] >= 128)
\r
241 && Character.getType(otherBuffer[otherEnd - 1])
\r
242 == Character.FORMAT);
\r
244 // Support restarting strings
\r
245 stringSoFar = null;
\r
246 stringStart = offset - 1;
\r
247 hadCFSinceStringStart = ((buffer[stringStart] >= 128)
\r
248 && Character.getType(buffer[stringStart]) == Character.FORMAT);
\r
253 // Get a string consisting of the characters seen since the last
\r
255 String getString() {
\r
259 * There's one strange case here: If the character offset currently
\r
260 * points to (which we never want to include in the string) is
\r
261 * a newline, then if the previous character is a carriage return,
\r
262 * we probably want to exclude that as well. If the offset is 0,
\r
263 * then we hope that fill() handled excluding it from stringSoFar.
\r
265 int loseCR = (offset > 0 &&
\r
266 buffer[offset] == '\n' && buffer[offset - 1] == '\r') ?
\r
269 if (stringStart != -1) {
\r
270 // String mark is valid, and in this buffer.
\r
272 result = new String(buffer, stringStart,
\r
273 offset - stringStart - loseCR);
\r
275 if (stringSoFar == null)
\r
276 stringSoFar = new StringBuffer();
\r
277 // Exclude cr as well as nl of newline. If offset is 0, then
\r
278 // hopefully fill() did the right thing.
\r
279 result = (stringSoFar.append(buffer, 0, offset - loseCR)).toString();
\r
283 stringSoFar = null;
\r
285 if (hadCFSinceStringStart) {
\r
286 char c[] = result.toCharArray();
\r
287 StringBuffer x = null;
\r
288 for (int i = 0; i < c.length; i++) {
\r
289 if (Character.getType(c[i]) == Character.FORMAT) {
\r
291 x = new StringBuffer();
\r
296 if (x != null) x.append(c[i]);
\r
298 if (x != null) result = x.toString();
\r
304 boolean fill() throws IOException {
\r
305 // not sure I care...
\r
306 if (end - offset != 0)
\r
307 throw new IOException("fill of non-empty buffer");
\r
309 // If there's a string currently being accumulated, save
\r
310 // off the progress.
\r
313 * Exclude an end-of-buffer carriage return. NOTE this is not
\r
314 * fully correct in the general case, because we really only
\r
315 * want to exclude the carriage return if it's followed by a
\r
316 * linefeed at the beginning of the next buffer. But we fudge
\r
317 * because the scanner doesn't do this.
\r
319 int loseCR = (offset > 0 && lastWasCR) ? 1 : 0;
\r
321 if (stringStart != -1) {
\r
322 // The mark is in the current buffer, save off from the mark to the
\r
324 stringSoFar = new StringBuffer();
\r
326 stringSoFar.append(buffer, stringStart, end - stringStart - loseCR);
\r
328 } else if (stringSoFar != null) {
\r
329 // the string began prior to the current buffer, so save the
\r
330 // whole current buffer.
\r
331 stringSoFar.append(buffer, 0, end - loseCR);
\r
335 char[] tempBuffer = buffer;
\r
336 buffer = otherBuffer;
\r
337 otherBuffer = tempBuffer;
\r
339 // allocate the buffers lazily, in case we're handed a short string.
\r
340 if (buffer == null) {
\r
341 buffer = new char[BUFLEN];
\r
344 // buffers have switched, so move the newline marker.
\r
345 otherStart = lineStart;
\r
348 // set lineStart to a sentinel value, unless this is the first
\r
350 prevStart = lineStart = (otherBuffer == null) ? 0 : buffer.length + 1;
\r
353 end = in.read(buffer, 0, buffer.length);
\r
357 // can't null buffers here, because a string might be retrieved
\r
358 // out of the other buffer, and a 0-length string might be
\r
359 // retrieved out of this one.
\r
365 // If the last character of the previous fill was a carriage return,
\r
366 // then ignore a newline.
\r
368 // There's another bizzare special case here. If lastWasCR is
\r
369 // true, and we see a newline, and the buffer length is
\r
370 // 1... then we probably just read the last character of the
\r
371 // file, and returning after advancing offset is not the right
\r
372 // thing to do. Instead, we try to ignore the newline (and
\r
373 // likely get to EOF for real) by doing yet another fill().
\r
375 if (buffer[0] == '\n') {
\r
380 lineStart = offset;
\r
386 int getLineno() { return lineno; }
\r
387 boolean eof() { return hitEOF; }
\r
390 private char[] otherBuffer = null;
\r
391 private char[] buffer = null;
\r
393 // Yes, there are too too many of these.
\r
394 private int offset = 0;
\r
395 private int end = 0;
\r
396 private int otherEnd;
\r
397 private int lineno;
\r
399 private int lineStart = 0;
\r
400 private int otherStart = 0;
\r
401 private int prevStart = 0;
\r
403 private boolean lastWasCR = false;
\r
404 private boolean hitEOF = false;
\r
406 private int stringStart = -1;
\r
407 private StringBuffer stringSoFar = null;
\r
408 private boolean hadCFSinceStringStart = false;
\r