src/org/mozilla/javascript/LineBuffer.java

   1 /* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-\r
   2  *\r
   3  * The contents of this file are subject to the Netscape Public\r
   4  * License Version 1.1 (the "License"); you may not use this file\r
   5  * except in compliance with the License. You may obtain a copy of\r
   6  * the License at http://www.mozilla.org/NPL/\r
   7  *\r
   8  * Software distributed under the License is distributed on an "AS\r
   9  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express oqr\r
  10  * implied. See the License for the specific language governing\r
  11  * rights and limitations under the License.\r
  12  *\r
  13  * The Original Code is Rhino code, released\r
  14  * May 6, 1999.\r
  15  *\r
  16  * The Initial Developer of the Original Code is Netscape\r
  17  * Communications Corporation.  Portions created by Netscape are\r
  18  * Copyright (C) 1997-1999 Netscape Communications Corporation. All\r
  19  * Rights Reserved.\r
  20  *\r
  21  * Contributor(s): \r
  22  * Mike McCabe\r
  23  *\r
  24  * Alternatively, the contents of this file may be used under the\r
  25  * terms of the GNU Public License (the "GPL"), in which case the\r
  26  * provisions of the GPL are applicable instead of those above.\r
  27  * If you wish to allow use of your version of this file only\r
  28  * under the terms of the GPL and not to allow others to use your\r
  29  * version of this file under the NPL, indicate your decision by\r
  30  * deleting the provisions above and replace them with the notice\r
  31  * and other provisions required by the GPL.  If you do not delete\r
  32  * the provisions above, a recipient may use your version of this\r
  33  * file under either the NPL or the GPL.\r
  34  */\r
  35 \r
  36 package org.mozilla.javascript;\r
  37 \r
  38 import java.io.Reader;\r
  39 import java.io.IOException;\r
  40 \r
  41 /**\r
  42  * An input buffer that combines fast character-based access with\r
  43  * (slower) support for retrieving the text of the current line.  It\r
  44  * also supports building strings directly out of the internal buffer\r
  45  * to support fast scanning with minimal object creation.\r
  46  *\r
  47  * Note that it is customized in several ways to support the\r
  48  * TokenStream class, and should not be considered general.\r
  49  *\r
  50  * Credits to Kipp Hickman and John Bandhauer.\r
  51  *\r
  52  * @author Mike McCabe\r
  53  */\r
  54 final class LineBuffer {\r
  55     /*\r
  56      * for smooth operation of getLine(), this should be greater than\r
  57      * the length of any expected line.  Currently, 256 is 3% slower\r
  58      * than 4096 for large compiles, but seems safer given evaluateString.\r
  59      * Strings for the scanner are are built with StringBuffers\r
  60      * instead of directly out of the buffer whenever a string crosses\r
  61      * a buffer boundary, so small buffer sizes will mean that more\r
  62      * objects are created.\r
  63      */\r
  64     static final int BUFLEN = 256;\r
  65 \r
  66     LineBuffer(Reader in, int lineno) {\r
  67         this.in = in;\r
  68         this.lineno = lineno;\r
  69     }\r
  70 \r
  71     int read() throws IOException {\r
  72                 for(;;) {\r
  73                         if (end == offset && !fill())\r
  74                             return -1;\r
  75 \r
  76                         // Do only a bitmask + branch per character, at the cost of\r
  77                         // three branches per low-bits-only (or 2028/9) character.\r
  78             if ((buffer[offset] & '\udfd0') == 0) {\r
  79                             if (buffer[offset] == '\r') {\r
  80                                 // if the next character is a newline, skip past it.\r
  81                                 if ((offset + 1) < end) {\r
  82                                     if (buffer[offset + 1] == '\n')\r
  83                                         offset++;\r
  84                                 } else {\r
  85                                     // set a flag for fill(), in case the first char of the\r
  86                                     // next fill is a newline.\r
  87                                     lastWasCR = true;\r
  88                                 }\r
  89                             }\r
  90                 else \r
  91                     if ((buffer[offset] != '\n') \r
  92                         && (buffer[offset] != '\u2028')\r
  93                         && (buffer[offset] != '\u2029'))\r
  94                     { \r
  95                         if (Character.getType(buffer[offset])\r
  96                                                     == Character.FORMAT) {\r
  97                                             hadCFSinceStringStart = true;\r
  98                                             offset++;\r
  99                             continue;\r
 100                         }\r
 101                         return (int) buffer[offset++];\r
 102                                 }\r
 103                             offset++;\r
 104                             prevStart = lineStart;\r
 105                             lineStart = offset;\r
 106                             lineno++;\r
 107                             return '\n';\r
 108                         }\r
 109                         if ((buffer[offset] >= 128) \r
 110                                   && (Character.getType(buffer[offset]) == Character.FORMAT)) {\r
 111                                 hadCFSinceStringStart = true;\r
 112                                 offset++;\r
 113                         }\r
 114                         else\r
 115                                 break;\r
 116                 }\r
 117                 \r
 118         return (int) buffer[offset++];\r
 119     }\r
 120 \r
 121     void unread() {\r
 122         if (offset == 0)\r
 123             // We can get here when we're asked to unread() an\r
 124             // implicit EOF_CHAR.\r
 125             \r
 126             // This would also be wrong behavior in the general case,\r
 127             // because a peek() could map a buffer.length offset to 0\r
 128             // in the process of a fill(), and leave it there.  But\r
 129             // the scanner never calls peek() or a failed match()\r
 130             // followed by unread()... this would violate 1-character\r
 131             // lookahead.  So we're OK.\r
 132             return;\r
 133         offset--;\r
 134         if ((buffer[offset] & '\ufff0') == 0\r
 135             && (buffer[offset] == '\r' || buffer[offset] == '\n')) {\r
 136             // back off from the line start we presumably just registered...\r
 137             lineStart = prevStart;\r
 138             lineno--;\r
 139         }\r
 140     }\r
 141 \r
 142     int peek() throws IOException {\r
 143         if (end == offset && !fill())\r
 144             return -1;\r
 145 \r
 146         if (buffer[offset] == '\r')\r
 147             return '\n';\r
 148 \r
 149         return buffer[offset];\r
 150     }\r
 151 \r
 152     boolean match(char c) throws IOException {\r
 153         if (end == offset && !fill())\r
 154             return false;\r
 155 \r
 156         // This'd be a place where we'd need to map '\r' to '\n' and\r
 157         // do other updates, but TokenStream never looks ahead for\r
 158         // '\n', so we don't bother.\r
 159         if (buffer[offset] == c) {\r
 160             offset++;\r
 161             return true;\r
 162         }\r
 163         return false;\r
 164     }\r
 165 \r
 166     // Reconstruct a source line from the buffers.  This can be slow...\r
 167     String getLine() {\r
 168         StringBuffer result = new StringBuffer();\r
 169 \r
 170         int start = lineStart;\r
 171         if (start >= offset) {\r
 172             // the line begins somewhere in the other buffer; get that first.\r
 173             if (otherStart < otherEnd)\r
 174                 // if a line ending was seen in the other buffer... otherwise\r
 175                 // just ignore this strange case.\r
 176                 result.append(otherBuffer, otherStart,\r
 177                               otherEnd - otherStart);\r
 178             start = 0;\r
 179         }\r
 180 \r
 181         // get the part of the line in the current buffer.\r
 182         result.append(buffer, start, offset - start);\r
 183 \r
 184         // Get the remainder of the line.\r
 185         int i = offset;\r
 186         while(true) {\r
 187             if (i == buffer.length) {\r
 188                 // we're out of buffer, let's just expand it.  We do\r
 189                 // this instead of reading into a StringBuffer to\r
 190                 // preserve the stream for later reads.\r
 191                 char[] newBuffer = new char[buffer.length * 2];\r
 192                 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);\r
 193                 buffer = newBuffer;\r
 194                 int charsRead = 0;\r
 195                 try {\r
 196                     charsRead = in.read(buffer, end, buffer.length - end);\r
 197                 } catch (IOException ioe) {\r
 198                     // ignore it, we're already displaying an error...\r
 199                 }\r
 200                 if (charsRead < 0)\r
 201                     break;\r
 202                 end += charsRead;\r
 203             }\r
 204             if (buffer[i] == '\r' || buffer[i] == '\n')\r
 205                 break;\r
 206             i++;\r
 207         }\r
 208 \r
 209         result.append(buffer, offset, i - offset);\r
 210         return result.toString();\r
 211     }\r
 212 \r
 213     // Get the offset of the current character, relative to\r
 214     // the line that getLine() returns.\r
 215     int getOffset() {\r
 216         if (lineStart >= offset)\r
 217             // The line begins somewhere in the other buffer.\r
 218             return offset + (otherEnd - otherStart);\r
 219         else\r
 220             return offset - lineStart;\r
 221     }\r
 222 \r
 223     // Set a mark to indicate that the reader should begin\r
 224     // accumulating characters for getString().  The string begins\r
 225     // with the last character read.\r
 226     void startString() {\r
 227         if (offset == 0) {\r
 228             // We can get here if startString is called after a peek()\r
 229             // or failed match() with offset past the end of the\r
 230             // buffer.\r
 231 \r
 232             // We're at the beginning of the buffer, and the previous character\r
 233             // (which we want to include) is at the end of the last one, so\r
 234             // we just go to StringBuffer mode.\r
 235             stringSoFar = new StringBuffer();\r
 236                         \r
 237             stringSoFar.append(otherBuffer, otherEnd - 1, 1);\r
 238 \r
 239             stringStart = -1; // Set sentinel value.\r
 240                         hadCFSinceStringStart = ((otherBuffer[otherEnd - 1] >= 128) \r
 241                                         && Character.getType(otherBuffer[otherEnd - 1])\r
 242                                                                                                         == Character.FORMAT);\r
 243         } else {\r
 244             // Support restarting strings\r
 245             stringSoFar = null;\r
 246             stringStart = offset - 1;\r
 247                         hadCFSinceStringStart = ((buffer[stringStart] >= 128) \r
 248                                         && Character.getType(buffer[stringStart]) == Character.FORMAT);\r
 249         }\r
 250                 \r
 251     }\r
 252 \r
 253     // Get a string consisting of the characters seen since the last\r
 254     // startString.\r
 255     String getString() {\r
 256         String result;\r
 257 \r
 258         /*\r
 259          * There's one strange case here:  If the character offset currently\r
 260          * points to (which we never want to include in the string) is\r
 261          * a newline, then if the previous character is a carriage return,\r
 262          * we probably want to exclude that as well.  If the offset is 0,\r
 263          * then we hope that fill() handled excluding it from stringSoFar.\r
 264          */\r
 265         int loseCR = (offset > 0 &&\r
 266                       buffer[offset] == '\n' && buffer[offset - 1] == '\r') ?\r
 267             1 : 0;\r
 268 \r
 269         if (stringStart != -1) {\r
 270             // String mark is valid, and in this buffer.\r
 271 \r
 272             result = new String(buffer, stringStart, \r
 273                                 offset - stringStart - loseCR);\r
 274         } else {\r
 275             if (stringSoFar == null) \r
 276                 stringSoFar = new StringBuffer();\r
 277             // Exclude cr as well as nl of newline.  If offset is 0, then\r
 278             // hopefully fill() did the right thing.\r
 279             result = (stringSoFar.append(buffer, 0, offset - loseCR)).toString();\r
 280         }\r
 281         \r
 282         stringStart = -1;\r
 283         stringSoFar = null;\r
 284                 \r
 285                 if (hadCFSinceStringStart) {\r
 286                         char c[] = result.toCharArray();\r
 287                         StringBuffer x = null;\r
 288                         for (int i = 0; i < c.length; i++) {\r
 289                                 if (Character.getType(c[i]) == Character.FORMAT) {\r
 290                                         if (x == null) {\r
 291                                                 x = new StringBuffer();\r
 292                                                 x.append(c, 0, i);\r
 293                                         }\r
 294                                 }\r
 295                                 else\r
 296                                         if (x != null) x.append(c[i]);\r
 297                         }\r
 298                         if (x != null) result = x.toString();   \r
 299                 }\r
 300                 \r
 301         return result;\r
 302     }            \r
 303 \r
 304     boolean fill() throws IOException {\r
 305         // not sure I care...\r
 306         if (end - offset != 0) \r
 307             throw new IOException("fill of non-empty buffer");\r
 308 \r
 309         // If there's a string currently being accumulated, save\r
 310         // off the progress.\r
 311 \r
 312         /*\r
 313          * Exclude an end-of-buffer carriage return.  NOTE this is not\r
 314          * fully correct in the general case, because we really only\r
 315          * want to exclude the carriage return if it's followed by a\r
 316          * linefeed at the beginning of the next buffer.  But we fudge\r
 317          * because the scanner doesn't do this.\r
 318          */\r
 319         int loseCR = (offset > 0 && lastWasCR) ? 1 : 0;\r
 320 \r
 321         if (stringStart != -1) {\r
 322             // The mark is in the current buffer, save off from the mark to the\r
 323             // end.\r
 324             stringSoFar = new StringBuffer();\r
 325 \r
 326             stringSoFar.append(buffer, stringStart, end - stringStart - loseCR);\r
 327             stringStart = -1;\r
 328         } else if (stringSoFar != null) {\r
 329             // the string began prior to the current buffer, so save the\r
 330             // whole current buffer.\r
 331             stringSoFar.append(buffer, 0, end - loseCR);\r
 332         }\r
 333 \r
 334         // swap buffers\r
 335         char[] tempBuffer = buffer;\r
 336         buffer = otherBuffer;\r
 337         otherBuffer = tempBuffer;\r
 338 \r
 339         // allocate the buffers lazily, in case we're handed a short string.\r
 340         if (buffer == null) {\r
 341             buffer = new char[BUFLEN];\r
 342         }\r
 343 \r
 344         // buffers have switched, so move the newline marker.\r
 345         otherStart = lineStart;\r
 346         otherEnd = end;\r
 347 \r
 348         // set lineStart to a sentinel value, unless this is the first\r
 349         // time around.\r
 350         prevStart = lineStart = (otherBuffer == null) ? 0 : buffer.length + 1;\r
 351         \r
 352         offset = 0;\r
 353         end = in.read(buffer, 0, buffer.length);\r
 354         if (end < 0) {\r
 355             end = 0;\r
 356 \r
 357             // can't null buffers here, because a string might be retrieved\r
 358             // out of the other buffer, and a 0-length string might be\r
 359             // retrieved out of this one.\r
 360 \r
 361             hitEOF = true;\r
 362             return false;\r
 363         }\r
 364 \r
 365         // If the last character of the previous fill was a carriage return,\r
 366         // then ignore a newline.\r
 367 \r
 368         // There's another bizzare special case here.  If lastWasCR is\r
 369         // true, and we see a newline, and the buffer length is\r
 370         // 1... then we probably just read the last character of the\r
 371         // file, and returning after advancing offset is not the right\r
 372         // thing to do.  Instead, we try to ignore the newline (and\r
 373         // likely get to EOF for real) by doing yet another fill().\r
 374         if (lastWasCR) {\r
 375             if (buffer[0] == '\n') {\r
 376               offset++;\r
 377               if (end == 1)\r
 378                   return fill();\r
 379             }\r
 380             lineStart = offset;\r
 381             lastWasCR = false;\r
 382         }\r
 383         return true;\r
 384     }\r
 385 \r
 386     int getLineno() { return lineno; }\r
 387     boolean eof() { return hitEOF; }\r
 388     \r
 389     private Reader in;\r
 390     private char[] otherBuffer = null;\r
 391     private char[] buffer = null;\r
 392 \r
 393     // Yes, there are too too many of these.\r
 394     private int offset = 0;\r
 395     private int end = 0;\r
 396     private int otherEnd;\r
 397     private int lineno;\r
 398 \r
 399     private int lineStart = 0;\r
 400     private int otherStart = 0;\r
 401     private int prevStart = 0;\r
 402     \r
 403     private boolean lastWasCR = false;\r
 404     private boolean hitEOF = false;\r
 405 \r
 406     private int stringStart = -1;\r
 407     private StringBuffer stringSoFar = null;\r
 408         private boolean hadCFSinceStringStart = false;\r
 409 \r
 410 }\r
 411 \r
 412 \r