src/org/ibex/util/XML.java

   1 // Copyright (C) 2003 Adam Megacz <adam@ibex.org> all rights reserved.
   2 //
   3 // You may modify, copy, and redistribute this code under the terms of
   4 // the GNU Library Public License version 2.1, with the exception of
   5 // the portion of clause 6a after the semicolon (aka the "obnoxious
   6 // relink clause")
   7
   8 package org.ibex.util;
   9
  10 import java.io.Reader;
  11 import java.io.Writer;
  12 import java.io.IOException;
  13 import java.io.EOFException;
  14
  15 /**
  16  * An Event-Driving, Non-Validating XML Parser with Namespace support.
  17  *
  18  * A subclass can implement the abstract functions for receiving details
  19  * about an xml file as it is parsed. To initate a parse, use the parse()
  20  * function.
  21  *
  22  * <h3>Implementation Notes</h3>
  23  * <p>As the parser traverses into an element, it adds it to the linked list
  24  * called <tt>elements</tt>. However, <tt>elements</tt> has been pre-filled
  25  * with instances of the Element inner class. So in the vast majority of
  26  * cases, the pointer current is moved along one, and the values for the
  27  * new element are filled into the current object.</p>
  28  *
  29  * <p>This parser supports all the unicode ranges required by the XML
  30  * Specification. However, it is optimised for well-formed ASCII documents.
  31  * Documents containing unicode Names and Attributes will take much longer
  32  * to process, and invalid documents (badly formed Names or invalid attributes)
  33  * will be run through a test on every single unicode character range before
  34  * being declared invalid.</p>
  35  *
  36  * <ul>
  37  *  <li>Each time the buffer offset <tt>off</tt> is moved, the length
  38  *   <tt>len</tt> must be decreased.</li>
  39  *  <li>Each time the buffer length is decreased, it must be checked to make
  40  *   sure it is &gt;0.</li>
  41  *  <li><i>error</i> is defined as a Validity Constraint Violation and
  42  *   is recoverable</li>
  43  *  <li><i>fatal error</i> is defined as a Well-formedness Constraint
  44  *   Violation and is not recoverable</li>
  45  * </ul>
  46  *
  47  * @author David Crawshaw
  48  * @see <a href="http://w3.org/TR/REC-xml">XML Specification</a>
  49  * @see <a href="http://w3.org/TR/REC-xml-names">XML Namespaces</a>
  50  */
  51 public abstract class XML {
  52
  53     /////////////////////////////////////////////////////////////////////////////////////////////
  54     // XML Parser
  55     /////////////////////////////////////////////////////////////////////////////////////////////
  56
  57     public static final int BUFFER_SIZE = 255;
  58
  59     /** static pool of XML.Element instances shared by all XML Parsers. */
  60     private static final Queue elements = new Queue(30);
  61
  62     private static final char[] single_amp  = new char[] { '&'  };
  63     private static final char[] single_apos = new char[] { '\'' };
  64     private static final char[] single_gt   = new char[] { '>'  };
  65     private static final char[] single_lt   = new char[] { '<'  };
  66     private static final char[] single_quot = new char[] { '"'  };
  67
  68     int line;
  69     int col;
  70
  71     Reader in;
  72     char[] buf;
  73     int    off;
  74     int    base;  // base+off == distance into the stream
  75     int    len;
  76
  77     Element current = null;
  78
  79     // used in readEntity() to process a single character without creating a new array
  80     private char[] singlechar = new char[1];
  81
  82
  83     public XML() { this(BUFFER_SIZE); }
  84
  85     public XML(int bSize) {
  86         buf = new char[bSize];
  87         //current = (Element)elements.remove(false);
  88         if (current == null) current = newElement();
  89     }
  90
  91     /** Returns the line number at the beginning of the last process call. */
  92     public int getLine() { return line; }
  93
  94     /** Returns the column number at the beginning of the last process call. */
  95     public int getCol()  { return col; }
  96
  97     /** Returns the global file offset at the beginning of the last process call. */
  98     public int getGlobalOffset() { return base + off; }
  99
 100     Element newElement() { return new Element(); }
 101
 102     /**
 103      * Parse given input and call the abstract event functions.
 104      *
 105      * Careful with threading, as this function is not synchronized.
 106      */
 107     public final void parse(Reader reader) throws IOException, Exn {
 108         in  = reader;
 109         off = len = 0;
 110         line = col = 1;
 111
 112         clear(); // clean up possible mid-way linked-list element
 113
 114         try {
 115             // process the stream
 116             while (true) {
 117                 if (!buffer(1)) {
 118                     if (current.qName == null) break;
 119                     throw new Exn("reached eof without closing <"+current.qName+"> element", Exn.WFC, getLine(), getCol());
 120                 }
 121
 122                 if (buf[off] == '<') readTag();
 123                 readChars(current.qName != null);
 124             }
 125         } finally { clear(); } // clean up elements
 126     }
 127
 128     // Stuff below here is Adam's hack //////////////////////////////////////////////////////////////////////////////
 129
 130     boolean done = false;
 131     public static class Pull extends XML {
 132         public Pull(Reader in) { this.in = in; off = len = 0; line = col = 1; clear(); }
 133         StringBuffer sb = new StringBuffer();
 134         Element pending = null;
 135         boolean emptytag = true;
 136         public int level = 0;
 137         public final void startElement(Element e) throws Exn { emptytag = false; level++; pending = e; }
 138         public final void endElement(Element e) throws Exn, IOException { emptytag=pending!=null; level--; }
 139         public final void whitespace(char[] ch, int start, int length) throws Exn, IOException { }
 140         public final void characters(char[] ch, int start, int length) throws Exn, IOException {
 141             emptytag=false; sb.append(ch,start,length);}
 142         public Object read() throws Exn, IOException {
 143             while(!done) {
 144                 if (pending != null) { Element ret = pending; pending = null; ret.level = level-(emptytag?0:1); return ret; }
 145                 if (sb.length() > 0) { String ret = sb.toString(); sb.setLength(0); return ret; }
 146                 if (!buffer(1)) {
 147                     if (done) return null;
 148                     throw new Exn("reached eof without closing <"+current.qName+"> element", Exn.WFC, getLine(), getCol());
 149                 }
 150                 if (buf[off] == '<') readTag(); else readChars(!done);
 151             }
 152             return null;
 153         }
 154     }
 155
 156
 157     // Stuff above here is Adam's hack //////////////////////////////////////////////////////////////////////////////
 158
 159     /** remove any leftover elements from the linked list and queue them */
 160     final void clear() {
 161         for (Element last = current; current.parent != null; ) {
 162             current = current.parent;
 163             last.clear();
 164             elements.append(last);
 165         }
 166         current.clear();
 167     }
 168
 169     /** reads in a tag. expects <tt>buf[off] == '&#60;'</tt> */
 170     final void readTag() throws IOException, Exn {
 171         // Start Tag    '<' Name (S Attribute)* S? '>'
 172         boolean starttag  = true;
 173
 174         // End Tag     '</' Name S? '>'
 175         boolean endtag    = false;
 176
 177         // if (starttag & endtag) then: EmptyElemTag '<' Name (S Attribute)* S? '/>'
 178
 179         // Position in the name of the ':' namespace prefix
 180         int prefix = -1;
 181
 182         int namelen   = 0;
 183
 184         col++; off++; len--;
 185         if (!buffer(1)) throw new EOFException("Unexpected EOF processing element tag");
 186
 187         // work out what we can from the beginning of the tag
 188         char s = buf[off];
 189         if (s == '!') {
 190             // definitions here don't necessarily conform to xml spec (as DTDs not yet implemented)
 191             col++; off++; len--;
 192             if (!buffer(4)) throw new EOFException("Unexpected EOF processing <! element");
 193
 194             boolean bad = false;
 195             switch (buf[off]) {
 196                 case '-':
 197                     if (buf[off+1] != '-') { bad = true; break; }
 198                     col += 2; off += 2; len -= 2;
 199
 200                     // Comment        '<!--'      ((Char - '-') | ('-' (Char - '-')))* '-->'
 201                     readChars(false, "-->", false);
 202                     col += 3; off += 3; len -= 3;
 203                     break;
 204
 205                 // we don't care about the following definitions
 206
 207                 case 'A':
 208                     if (!buffer(7)
 209                             || buf[off+1] != 'T' || buf[off+2] != 'T' || buf[off+3] != 'L'
 210                             || buf[off+4] != 'I' || buf[off+5] != 'S' || buf[off+6] != 'T') {
 211                         bad = true; break;
 212                     }
 213                     col += 7; off += 7; len -= 7;
 214
 215                     // ATTLIST        '<!ATTLIST'   (Char* - '>') '>'
 216                     readChars(false, ">", true);
 217                     col++; off++; len--;
 218                     break;
 219                 case 'D':
 220                     if (!buffer(7)
 221                             || buf[off+1] != 'O' || buf[off+2] != 'C' || buf[off+3] != 'T'
 222                             || buf[off+4] != 'Y' || buf[off+5] != 'P' || buf[off+6] != 'E') {
 223                         bad = true; break;
 224                     }
 225                     col += 7; off += 7; len -= 7;
 226
 227                     // DTD            '<!DOCTYPE'   (Char* - '>') '>'
 228                     readChars(false, ">", true);
 229                     col++; off++; len--;
 230                     break;
 231                 case 'E':
 232                     if (!buffer(7)) {
 233                         bad = true;
 234                     } else if (buf[off+1] == 'L' && buf[off+2] == 'E' && buf[off+3] == 'M'
 235                             && buf[off+4] == 'E' && buf[off+5] == 'N' && buf[off+6] == 'T') {
 236                         // ELEMENT        '<!ELEMENT'   (Char* - '>') '>'
 237                         readChars(false, ">", true);
 238                         col++; off++; len--;
 239
 240                     } else if (buf[off+1] == 'N' && buf[off+2] == 'T' && buf[off+3] == 'I'
 241                             && buf[off+4] == 'T' && buf[off+5] == 'Y') {
 242                         // ENTITY         '<!ENTITY'    (Char* - '>') '>'
 243                         readChars(false, ">", true);
 244                         col++; off++; len--;
 245
 246                     } else {
 247                         bad = true;
 248                     }
 249                     break;
 250
 251                 case 'N':
 252                     if (!buffer(8)
 253                             || buf[off+1] != 'O' || buf[off+2] != 'T' || buf[off+3] != 'A' || buf[off+4] != 'T'
 254                             || buf[off+5] != 'I' || buf[off+6] != 'O' || buf[off+7] != 'N') {
 255                         bad = true; break;
 256                     }
 257                     col += 8; off += 8; len -= 8;
 258                     // NOTATION       '<!NOTATION'  (Char* - '>') '>'
 259                     readChars(false, ">", true);
 260                     col++; off++; len--;
 261
 262                     break;
 263                 default: bad = true;
 264             }
 265
 266             if (bad) throw new Exn("element tag start character is invalid", Exn.MARKUP, getLine(), getCol());
 267
 268         } else if (s == '?') {
 269             // PI (Ignored)   '<?'  (Char* - (Char* '?>' Char*))  '?>'
 270             col++; off++; len--;
 271             readChars(false, "?>", true);
 272             if (!buffer(2)) throw new EOFException("Unexpected EOF at end of Processing Instruction");
 273             col += 2; off += 2; len -= 2;
 274
 275         } else if (s == '[') {
 276             if (!buffer(7)
 277                     || buf[off+1] != 'C' || buf[off+2] != 'D' || buf[off+3] != 'A'
 278                     || buf[off+4] != 'T' || buf[off+5] != 'A' || buf[off+6] != '[') {
 279                 col++; off--; len++;
 280                 // Conditional    '<![' (Char* - (Char* ']]>' Char*)) ']]>'
 281                 readChars(false, "]]>", false);
 282             } else {
 283                 col += 7; off += 7; len -=7;
 284                 // CDATA          '<![CDATA[' (Char* - (Char* ']]>' Char*))        ']]>'
 285                 readChars(true, "]]>", false);
 286             }
 287             col += 3; off += 3; len -= 3;
 288         } else {
 289             if (s == '/') {
 290                 // End Tag        '</' Name S? '>'
 291                 starttag = false;
 292                 endtag = true;
 293
 294                 col++; off++; len--;
 295                 if (!buffer(1)) throw new EOFException("Unexpected EOF processing end tag");
 296                 s = buf[off];
 297             }
 298
 299             if (!Name(s)) throw new Exn("invalid starting character in element name", Exn.MARKUP, getLine(), getCol());
 300
 301             // find the element name (defined in XML Spec: section 2.3)
 302             for (namelen = 0; ; namelen++) {
 303                 if (!buffer(namelen+1)) throw new EOFException("Unexpected EOF in element tag name");
 304
 305                 s = buf[off+namelen];
 306
 307                 if (S(s) || s == '>') {
 308                     break;
 309                 } else if (s == '/') {
 310                     endtag = true;
 311                     break;
 312                 } else if (s == ':' && namelen > 0 && prefix < 1) {
 313                     // we have a definition of the prefix range available
 314                     prefix = namelen;
 315                 } else if (!NameChar(s)) {
 316                     throw new Exn("element name contains invalid character", Exn.MARKUP, getLine(), getCol());
 317                 }
 318             }
 319
 320             // process name (based on calculated region)
 321             if (namelen < 1) throw new Exn("element name is null", Exn.MARKUP, getLine(), getCol());
 322
 323             // we have marked out the name region, so turn it into a string and move on
 324             String qName = new String(buf, off, namelen);
 325
 326             col += namelen; off += namelen; len -= namelen;
 327
 328             if (starttag) {
 329                 // create the in-memory element representation of this beast
 330                 // if current.qName == null then this is the root element we're dealing with
 331                 if (current.qName != null) {
 332                     Element next = newElement();
 333                     //next.clear(); // TODO: remove as elements now checked as they're added to the queue
 334                     next.parent = current;
 335                     current = next;
 336                 }
 337
 338                 current.qName = qName;
 339
 340                 if (prefix > 0) {
 341                     current.prefix = current.qName.substring(0, prefix);
 342                     current.localName = current.qName.substring(prefix+1);
 343                 } else {
 344                     current.prefix = null;
 345                     current.localName = current.qName;
 346                 }
 347
 348                 // process attributes
 349                 readWhitespace();
 350                 if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 1");
 351                 while (buf[off] != '/' && buf[off] != '>') {
 352                     readAttribute();
 353                     if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 2");
 354                     readWhitespace();
 355                 }
 356
 357                 // work out the uri of this element
 358                 current.uri = current.getUri(current.getPrefix());
 359                 if (current.getUri().equals("") && current.getPrefix() != null)
 360                     current.addError(new Exn("undefined prefix '"+current.getPrefix()+"'", Exn.NC, getLine(), getCol()));
 361
 362             } else {
 363                 // this is an end-of-element tag
 364                 if (!qName.equals(current.getQName())) throw new Exn(
 365                     "end tag </"+qName+"> does not line up with start tag <"+current.getQName()+">", Exn.WFC, getLine(), getCol()
 366                 );
 367             }
 368
 369             // deal with whitespace
 370             readWhitespace();
 371
 372             // process tag close
 373             if (!buffer(1)) throw new EOFException("Unexpected EOF before end of tag");
 374             if (buf[off] == '/') {
 375                 endtag = true;
 376                 off++; len--; col++;
 377             }
 378             if (!buffer(1)) throw new EOFException("Unexpected EOF before end of endtag");
 379             if (buf[off] == '>') {
 380                 off++; len--; col++;
 381             } else {
 382                 throw new Exn("missing '>' character from element '"+qName+"'", Exn.MARKUP, getLine(), getCol());
 383             }
 384
 385             // send element signals
 386             if (starttag) startElement(current);
 387             if (endtag) {
 388                 endElement(current);
 389
 390                 // we just closed an element, so remove it from the element 'stack'
 391                 if (current.getParent() == null) {
 392                     // we just finished the root element
 393                     done = true;
 394                 } else {
 395                     Element last = current;
 396                     current = current.parent;
 397                     //last.clear();  FIXME
 398                     elements.append(last);
 399                 }
 400             }
 401         }
 402     }
 403
 404
 405     /** reads in an attribute of an element. expects Name(buf[off]) */
 406     private final void readAttribute() throws IOException, Exn {
 407         int ref = 0;
 408         int prefix = 0;
 409         String n, v, p, u; // attribute name, value, prefix and uri respectively
 410         n = v = p = u = null;
 411         char s;
 412
 413         // find the element name (defined in XML Spec: section 2.3)
 414         for (ref= 0; ; ref++) {
 415             if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in read attribute loop part 1");
 416
 417             s = buf[off+ref];
 418
 419             if (s == '=' || S(s)) {
 420                 break;
 421             } else if (s == ':' && ref > 0 && prefix < 1) {
 422                 // we have a definition of the prefix range available
 423                 prefix = ref+1;
 424             } else if (!NameChar(s)) {
 425                 throw new Exn("attribute name contains invalid characters", Exn.MARKUP, getLine(), getCol());
 426             }
 427         }
 428
 429         // determine prefix and key name
 430         if (prefix > 0) {
 431             p = new String(buf, off, prefix-1);
 432             col += prefix; off += prefix; len -= prefix; ref -= prefix;
 433         }
 434         n = new String(buf, off, ref);
 435         col += ref; off += ref; len -= ref;
 436
 437         // find name/value divider ('=')
 438         readWhitespace();
 439         if (!buffer(1)) throw new EOFException("Unexpected EOF before attribute '=' divider");
 440         if (buf[off] != '=') throw new Exn("attribute name not followed by '=' sign", Exn.MARKUP, getLine(), getCol());
 441
 442         col++; off++; len--;
 443         readWhitespace();
 444
 445         if (!buffer(1)) throw new EOFException("Unexpected EOF after attribute '=' divider");
 446
 447         char wrap;
 448         if (buf[off] == '\'' || buf[off] == '"') {
 449             wrap = buf[off];
 450         } else {
 451             throw new Exn("attribute '"+n+"' must have attribute wrapped in ' or \"", Exn.MARKUP, getLine(), getCol());
 452         }
 453         col++; off++; len--;
 454
 455         // find the attribute value
 456         attval: for (ref = 0; ; ref++) {
 457             if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in attribute value");
 458
 459             if (buf[off+ref] == wrap) {
 460                 break attval;
 461             } else if (buf[off+ref] == '<') {
 462                 throw new Exn("attribute value for '"+n+"' must not contain '<'", Exn.WFC, getLine(), getCol());
 463             }
 464         }
 465
 466         v = new String(buf, off, ref);
 467         col += ref; off += ref; len -= ref;
 468
 469         // remove end wrapper character
 470         col++; off++; len--;
 471
 472         // process attribute
 473         if (p != null && p.equals("xmlns")) {
 474             current.addUri(n, v);
 475         } else if (n.equals("xmlns")) {
 476             if (current.getUri().equals("")) {
 477                 current.addUri("", v);
 478             } else {
 479                 current.addError(new Exn("default namespace definition repeated", Exn.NC, getLine(), getCol()));
 480             }
 481         } else {
 482             // find attribute uri
 483             u = current.getUri(p);
 484             if (p != null && u.equals("")) current.addError(new Exn("undefined attribute prefix '"+p+"'", Exn.NC, getLine(), getCol()));
 485
 486             // check to see if attribute is a repeat
 487             for (int i=0; current.len > i; i++) if (n.equals(current.getAttrKey(i)) && u.equals(current.getAttrUri(i))) throw new Exn(
 488                 "attribute name '"+n+"' may not appear more than once in the same element tag", Exn.WFC, getLine(), getCol()
 489             );
 490
 491             current.addAttr(n, v, u);
 492         }
 493     }
 494
 495     /** reads an entity and processes out its value. expects buf[off] == '&amp;' */
 496     private final void readEntity() throws IOException, Exn {
 497         off++; len--;
 498         if (!buffer(2)) throw new EOFException("Unexpected EOF reading entity");
 499
 500         boolean unknown = false;
 501         switch (buf[off]) {
 502             case '#':
 503                 off++; len--;
 504
 505                 int radix;
 506                 if (buf[off] == 'x') { off++; len--; radix = 16; } else { radix = 10; }
 507                 int c = 0;
 508
 509                 // read in each char, then shift total value to the left and add the extra
 510                 // style of loop is slightly different from all the others, as this should run a limited number of times
 511                 findchar: while (true) {
 512                     if (!buffer(1)) throw new EOFException("Unexpected EOF reading entity");
 513                     int d = Character.digit(buf[off], radix);
 514                     if (d == -1) {
 515                         if (buf[off] != ';') throw new Exn("illegal characters in entity reference", Exn.WFC, getLine(), getCol());
 516                         off++; len--; col++;
 517                         break findchar;
 518                     }
 519                     c = (c * radix) + d;
 520
 521                     off++; len--;
 522                 }
 523
 524                 singlechar[0] = Character.forDigit(c, radix);
 525                 characters(singlechar, 0, 1);
 526                 break;
 527
 528             case 'a':
 529                 if (buffer(4) && buf[off+1] == 'm' && buf[off+2] == 'p' && buf[off+3] == ';') {
 530                     characters(single_amp, 0, 1); // &amp;
 531                     off += 4; len -= 4; col++;
 532                 } else if (buffer(5) && buf[off+1] == 'p' && buf[off+2] == 'o' && buf[off+3] == 's' && buf[off+4] == ';') {
 533                     characters(single_apos, 0, 1); // &apos;
 534                     off += 5; len -= 5; col++;
 535                 } else {
 536                     unknown = true;
 537                 }
 538                 break;
 539
 540             case 'g':
 541                 if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
 542                     characters(single_gt, 0, 1); // &gt;
 543                     off += 3; len -= 3; col++;
 544                 } else {
 545                     unknown = true;
 546                 }
 547                 break;
 548
 549             case 'l':
 550                 if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
 551                     characters(single_lt, 0, 1); // &lt;
 552                     off += 3; len -= 3; col++;
 553                 } else {
 554                     unknown = true;
 555                 }
 556                 break;
 557
 558             case 'q':
 559                 if (buffer(5) && buf[off+1] == 'u' && buf[off+2] == 'o' && buf[off+3] == 't' && buf[off+4] == ';') {
 560                     characters(single_quot, 0, 1); // &quot;
 561                     off += 5; len -= 5; col++;
 562                 } else {
 563                     unknown = true;
 564                 }
 565                 break;
 566
 567             // TODO: check a parser-level Hash of defined entities
 568         }
 569
 570         if (unknown) throw new Exn("unknown entity (<!ENTITY> not supported)", Exn.WFC, getLine(), getCol());
 571     }
 572
 573     /** reads until the passed string is encountered. */
 574     final void readChars(boolean p, String match, boolean entities) throws IOException, Exn {
 575         int ref;
 576         char[] end = match.toCharArray();
 577
 578         for (boolean more = true; more;) {
 579             if (!buffer(1)) return;
 580
 581             buf: for (ref = 0; ref < len; ref++) {
 582                 switch (buf[off+ref]) {
 583                     case '\r': // windows or macos9 newline
 584                         // normalise and process
 585                         buf[off+ref] = '\n'; ref++;
 586                         if (p) characters(buf, off, ref);
 587                         off += ref; len -= ref; ref = -1;
 588                         line++; col = 1;
 589
 590                         // windows double-char newline; skip the next char
 591                         if (!buffer(1)) return;
 592                         if (buf[off] == '\n') { off++; len--; }
 593                         break;
 594
 595                     case '\n': // unix newline
 596                         ref++;
 597                         if (p) characters(buf, off, ref);
 598                         off += ref; len -= ref; ref = -1;
 599                         line++; col = 1;
 600                         break;
 601
 602                     case '&':  // entity
 603                         if (entities) {
 604                             if (p) {
 605                                 if (ref > 0) characters(buf, off, ref);
 606                                 off += ref; len -= ref; ref = -1;
 607                                 readEntity();
 608                             }
 609                             break;
 610                         }
 611
 612                     default:
 613                         if (!buffer(ref+end.length)) continue buf;
 614                         for (int i=0; end.length > i; i++) if (end[i] != buf[off+ref+i]) continue buf;
 615                         more = false;
 616                         break buf;
 617                 }
 618             }
 619
 620             if (p && ref > 0) characters(buf, off, ref);
 621             off += ref; len -= ref; col += ref;
 622         }
 623     }
 624
 625     /**
 626      * reads until a <tt>&#60;</tt> symbol is encountered
 627      * @param p If true call the characters(char[],int,int) funciton for the processed characters
 628      */
 629     final void readChars(boolean p) throws IOException, Exn {
 630         int ref;
 631
 632         for (boolean more = true; more;) {
 633             if (!buffer(1)) return;
 634
 635             buf: for (ref = 0; ref < len; ref++) {
 636                 switch (buf[off+ref]) {
 637                     case '\r': // windows or macos9 newline
 638                         // normalise and process
 639                         buf[off+ref] = '\n'; ref++;
 640                         if (p) characters(buf, off, ref);
 641                         off += ref; len -= ref; ref = -1;
 642                         line++; col = 1;
 643
 644                         // windows double-char newline; skip the next char
 645                         if (!buffer(1)) return;
 646                         if (buf[off] == '\n') { off++; len--; }
 647                         break;
 648
 649                     case '\n': // unix newline
 650                         ref++;
 651                         if (p) characters(buf, off, ref);
 652                         off += ref; len -= ref; ref = -1;
 653                         line++; col = 1;
 654                         break;
 655
 656                     case '&':  // entity
 657                         if (p) {
 658                             if (ref > 0) characters(buf, off, ref);
 659                             off += ref; len -= ref; ref = -1;
 660                             readEntity();
 661                         }
 662                         break;
 663
 664                     case '<':  // end of chars section
 665                         more = false;
 666                         break buf;
 667                 }
 668             }
 669
 670             if (p && ref > 0) characters(buf, off, ref);
 671             off += ref; len -= ref; col += ref;
 672         }
 673     }
 674
 675     /** reads until a non-whitespace symbol is encountered */
 676     private final void readWhitespace() throws IOException, Exn {
 677         int ref;
 678
 679         for (boolean more = true; more;) {
 680             if (!buffer(1)) return;
 681
 682             buf: for (ref = 0; ref < len; ref++) {
 683                 switch (buf[off+ref]) {
 684                     case '\r': // windows or macos9 newline
 685                         // normalise and process
 686                         buf[off+ref] = '\n';
 687                         whitespace(buf, off, ++ref);
 688                         off += ref; len -= ref; ref = -1;
 689                         line++; col = 1;
 690
 691                         // windows double-char newline; skip the next char
 692                         if (!buffer(1)) return;
 693                         if (buf[off] == '\n') { off++; len--; }
 694                         break;
 695
 696                     case '\n': // unix newline
 697                         whitespace(buf, off, ++ref);
 698                         off += ref; len -= ref; ref = -1;
 699                         line++; col = 1;
 700                         break;
 701
 702                     case ' ':  // space
 703                     case '\t': // tab
 704                         break;
 705
 706                     default:   // end of whitespace
 707                         more = false;
 708                         break buf;
 709                 }
 710             }
 711
 712             off += ref; len -= ref; col += ref;
 713         }
 714     }
 715
 716     /**
 717      * attempt to fill the buffer.
 718      *
 719      * @param min Minimum number of characters to read (even if we have to block to do it).
 720      * @return return false if min can't be reached.
 721      */
 722     final boolean buffer(int min) throws IOException {
 723         if (len > min) return true;
 724
 725         if (buf.length - (off+len) >= min) {
 726             // plenty of space left on the end of the buffer
 727         } else if (off >= min) {
 728             // moving offset data to start will leave enough free space on the end
 729             System.arraycopy(buf, off, buf, 0, len);
 730             base += off;
 731             off = 0;
 732         } else {
 733             // buffer size will have to be increased
 734             char[] newbuf = new char[buf.length * 2];
 735             System.arraycopy(buf, off, newbuf, 0, len);
 736             buf = newbuf;
 737             base += off;
 738             off = 0;
 739         }
 740
 741         while (min > len) {
 742             int newlen = in.read(buf, off+len, buf.length-(off+len));
 743             if (newlen < 0) return false;
 744             len += newlen;
 745         }
 746
 747         return true;
 748     }
 749
 750
 751     /////////////////////////////////////////////////////////////////////////////////////////////
 752     // Abstract SAX-Like Interface
 753     /////////////////////////////////////////////////////////////////////////////////////////////
 754
 755     /**
 756      * Called when the start of an element is processed.
 757      *
 758      * <p><b>DO NOT</b> store a reference to the Element object, as
 759      * they are reused by XML Parser.</p>
 760      */
 761     public abstract void startElement(Element e) throws Exn;
 762
 763     /**
 764      * Represents up to a line of character data.
 765      *
 766      * <p>Newlines are all normalised to the Unix \n as per the XML Spec,
 767      * and a newline will only appear as the last character in the passed
 768      * array segment.</p>
 769      *
 770      * <p>XML.getLine() and XML.getCol() report the position at the
 771      * beginning of this character segment, which can be processed in a
 772      * line-by-line fashion due to the above newline restriction.</p>
 773      */
 774     public abstract void characters(char[] ch, int start, int length) throws Exn, IOException;
 775
 776     /** Represents up to a line of ignorable whitespace. */
 777     public abstract void whitespace(char[] ch, int start, int length) throws Exn, IOException;
 778
 779     /** Represents the end of an Element. */
 780     public abstract void endElement(Element e) throws Exn, IOException;
 781
 782     /////////////////////////////////////////////////////////////////////////////////////////////
 783     // Inner Classes for Parser Support
 784     /////////////////////////////////////////////////////////////////////////////////////////////
 785
 786     /**
 787      * Represents an element in an XML document. Stores a reference to its
 788      * parent, forming a one-way linked list.
 789      *
 790      * Element objects are reused, so client code making use of them must
 791      * drop their references after the specific element process function
 792      * has returned.
 793      */
 794     public static final class Element {
 795
 796         private static final int DEFAULT_ATTR_SIZE = 10;
 797
 798         protected Element parent = null;
 799
 800         protected String uri = null;
 801         protected String localName = null;
 802         protected String qName = null;
 803         protected String prefix = null;
 804         public int level = 0;
 805
 806         protected Hash urimap = new Hash(3,3);
 807
 808         protected String[] keys = new String[DEFAULT_ATTR_SIZE];
 809         protected String[] vals = new String[DEFAULT_ATTR_SIZE];
 810         protected String[] uris = new String[DEFAULT_ATTR_SIZE];
 811         protected int len = 0;
 812
 813         protected Exn[] errors = new Exn[] {};
 814
 815         /** Parent of current element. */
 816         public Element getParent() { return parent; }
 817
 818         /** Qualified Name of current element.  XML Namespace Spec 14-Jan-1999 [6] */
 819         public String getQName() { return qName; }
 820
 821         /** LocalPart of current element. XML Namespace Spec 14-Jan-1999 [8] */
 822         public String getLocalName() { return localName; }
 823
 824         /** Prefix of current element. Substring of qName. XML Namespace Spec 14-Jan-1999 [7] */
 825         public String getPrefix() { return prefix; }
 826
 827         // HACK
 828         public Hash getUriMap() {
 829             Hash map = new Hash();
 830             for (Element e = this; e != null; e = e.getParent()) {
 831                 java.util.Enumeration en = e.urimap.keys();
 832                 while(en.hasMoreElements()) {
 833                     String key = (String)en.nextElement();
 834                     String val = getUri(key);
 835                     map.put(key, val);
 836                 }
 837             }
 838             return map;
 839         }
 840
 841         /** URI of current tag. XML Namespace Spec 14-Jan-1999 section 1 */
 842         public String getUri() { return getUri(prefix); }
 843
 844         /** URI of a given prefix. Never returns null, instead gives "". */
 845         public String getUri(String p) {
 846             String ret = null;
 847             for (Element e = this; e != null && ret == null; e = e.getParent()) {
 848                 ret = (String)e.urimap.get(p == null ? "" : p);
 849             }
 850             return ret == null ? "" : ret;
 851         }
 852
 853         /** An array of attribute names. */
 854         public String getAttrKey(int pos) { return len > pos ? keys[pos] : null; }
 855
 856         /** An array of attribute values. */
 857         public String getAttrVal(int pos) { return len > pos ? vals[pos] : null; }
 858
 859         /** An array of attribute uris. */
 860         public String getAttrUri(int pos) { return len > pos ? uris[pos] : null; }
 861
 862         /** Current number of attributes in the element. */
 863         public int getAttrLen() { return len; }
 864
 865         /** Poor performance, but easier to use when speed is not a concern */
 866         public Hash getAttrHash() {
 867             Hash ret = new Hash(getAttrLen() * 2, 3);
 868             for(int i=0; i<len; i++)
 869                 ret.put(getAttrKey(i), getAttrVal(i));
 870             return ret;
 871         }
 872
 873         /** Poor performance, but easier to use */
 874         public String getAttrVal(String key) {
 875             for(int i=0; i<len; i++) if (keys[i].equals(key)) return vals[i];
 876             return null;
 877         }
 878
 879         /** An array of non-fatal errors related to this element. */
 880         public Exn[] getErrors() { return errors; }
 881
 882         protected Element() { }
 883
 884         /** Add (replace if exists in current element) a Namespace prefix/uri map. */
 885         public void addUri(String name, String value) {
 886             urimap.put(name, value);
 887         }
 888
 889         /** Add an attribute. */
 890         protected void addAttr(String key, String val, String uri) {
 891             if (len == keys.length) {
 892                 // increase the size of the attributes arrays
 893                 String[] newkeys = new String[keys.length*2];
 894                 String[] newvals = new String[vals.length*2];
 895                 String[] newuris = new String[uris.length*2];
 896                 System.arraycopy(keys, 0, newkeys, 0, keys.length);
 897                 System.arraycopy(vals, 0, newvals, 0, vals.length);
 898                 System.arraycopy(uris, 0, newuris, 0, uris.length);
 899                 keys = newkeys; vals = newvals; uris = newuris;
 900             }
 901
 902             keys[len] = key;
 903             vals[len] = val;
 904             uris[len] = uri;
 905             len++;
 906         }
 907
 908         /** Add an error. */
 909         protected void addError(Exn e) {
 910             // it doesn't really matter about continually expanding the array, as this case is quite rare
 911             Exn[] newe = new Exn[errors.length+1];
 912             System.arraycopy(errors, 0, newe, 0, errors.length);
 913             newe[errors.length] = e;
 914             errors = newe;
 915         }
 916
 917         /** Empty out all the data from the Element. */
 918         protected void clear() {
 919             parent = null;
 920             uri = localName = qName = prefix = null;
 921             urimap.clear();
 922
 923             if (keys.length != vals.length || vals.length != uris.length) {
 924                 keys = new String[DEFAULT_ATTR_SIZE];
 925                 vals = new String[DEFAULT_ATTR_SIZE];
 926                 uris = new String[DEFAULT_ATTR_SIZE];
 927             } else {
 928                 for (int i=0; keys.length > i; i++) { keys[i] = null; vals[i] = null; uris[i] = null; };
 929             }
 930             len = 0;
 931
 932             errors = new Exn[] {};
 933         }
 934     }
 935
 936     /** Parse or Structural Error */
 937     public static class Exn extends Exception {
 938         /** Violation of Markup restrictions in XML Specification - Fatal Error */
 939         public static final int MARKUP = 1;
 940
 941         /** Well-Formedness Constraint Violation - Fatal Error */
 942         public static final int WFC = 2;
 943
 944         /** Namespace Constraint Violation - Recoverable Error */
 945         public static final int NC = 3;
 946
 947         /** Schema Violation - Fatal Error */
 948         public static final int SCHEMA = 4;
 949
 950         private String error;
 951         private int type;
 952         private int line;
 953         private int col;
 954
 955         public Exn(String e) { this(e, MARKUP, -1, -1); }
 956
 957         public Exn(String e, int type, int line, int col) {
 958             this.error = e;
 959             this.type  = type;
 960             this.line  = line;
 961             this.col   = col;
 962         }
 963
 964         public int getType() { return this.type; }
 965         public int getLine() { return this.line; }
 966         public int getCol()  { return this.col;  }
 967         public String getMessage() { return this.error + (line >= 0 && col >= 0 ? " at " + line + ":" + col: ""); }
 968     }
 969
 970
 971     /////////////////////////////////////////////////////////////////////////////////////////////
 972     // Static Support Functions for the XML Specification
 973     /////////////////////////////////////////////////////////////////////////////////////////////
 974
 975     // attempt to avoid these functions unless you *expect* the input to fall in the given range.
 976
 977     /** First Character of Name - XML Specification 1.0 [5] */
 978     private static final boolean Name(char c) {
 979         return BaseCharAscii(c) || c == '_' || c == ':' || Letter(c);
 980     }
 981
 982     /** NameChar - XML Specification 1.0 [4] */
 983     private static final boolean NameChar(char c) {
 984         return BaseCharAscii(c) || c == '.' || c == '-' || c == '_' || c == ':'
 985             || Digit(c) || Letter(c) || Extender(c); // TODO: || CombiningChar(c);
 986     }
 987
 988     /** BaseChar - XMl Specification 1.0 [84] */
 989     private static final boolean Letter(char c) {
 990         return BaseChar(c) || Ideographic(c);
 991     }
 992
 993     /** Elements of BaseChar that exist in ASCII. */
 994     private static final boolean BaseCharAscii(char c) {
 995         return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A');
 996     }
 997
 998     /** Char - XML Specification 1.0 [2] */
 999     private static final boolean Char(char c) {
1000         // u000A == r and u000D == n, but the javac compiler can't handle the \ u form
1001         return c == '\u0009' || c == '\r' || c == '\n'
1002             || (c >= '\u0020' && c <= '\uD7FF')
1003             || (c >= '\uE000' && c <= '\uFFFD');
1004     }
1005
1006     /** BaseChar - XML Specification 1.0 [85] */
1007     private static final boolean BaseChar(char c) {
1008         return  BaseCharAscii(c) || (c >= '\u00C0' && c <= '\u00D6')
1009             || (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131')
1010             || (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E')
1011             || (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5')
1012             || (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1')
1013             || (c == '\u0386')                  || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C')
1014             || (c >= '\u038E' && c <= '\u03A1') || (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6')
1015             || (c == '\u03DA')                  || (c == '\u03DC')                  || (c == '\u03DE')
1016             || (c == '\u03E0')
1017             || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F')
1018             || (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4')
1019             || (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB')
1020             || (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556')
1021             || (c == '\u0559')
1022             || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2')
1023             || (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7')
1024             || (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3')
1025             || (c == '\u06D5')
1026             || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939')
1027             || (c == '\u093D')
1028             || (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990')
1029             || (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0')
1030             || (c == '\u09B2')
1031             || (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DF' && c <= '\u09E1') || (c >= '\u09F0' && c <= '\u09F1')
1032             || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || (c >= '\u0A13' && c <= '\u0A28')
1033             || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || (c >= '\u0A35' && c <= '\u0A36')
1034             || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C')
1035             || (c == '\u0A5E')
1036             || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B')
1037             || (c == '\u0A8D')
1038             || (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0')
1039             || (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9')
1040             || (c == '\u0ABD')
1041             || (c == '\u0AE0')
1042             || (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28')
1043             || (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39')
1044             || (c == '\u0B3D')
1045             || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || (c >= '\u0B85' && c <= '\u0B8A')
1046             || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || (c >= '\u0B99' && c <= '\u0B9A')
1047             || (c == '\u0B9C')
1048             || (c >= '\u0B9E' && c <= '\u0B9F') || (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA')
1049             || (c >= '\u0BAE' && c <= '\u0BB5') || (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C')
1050             || (c >= '\u0C0E' && c <= '\u0C10') || (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33')
1051             || (c >= '\u0C35' && c <= '\u0C39') || (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C')
1052             || (c >= '\u0C8E' && c <= '\u0C90') || (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3')
1053             || (c >= '\u0CB5' && c <= '\u0CB9')
1054             || (c == '\u0CDE')
1055             || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || (c >= '\u0D0E' && c <= '\u0D10')
1056             || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || (c >= '\u0D60' && c <= '\u0D61')
1057             || (c >= '\u0E01' && c <= '\u0E2E')
1058             || (c == '\u0E30')
1059             || (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82')
1060             || (c == '\u0E84')
1061             || (c >= '\u0E87' && c <= '\u0E88')
1062             || (c == '\u0E8A')
1063             || (c == '\u0E8D')
1064             || (c >= '\u0E94' && c <= '\u0E97') || (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3')
1065             || (c == '\u0EA5')
1066             || (c == '\u0EA7')
1067             || (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE')
1068             || (c == '\u0EB0')
1069             || (c >= '\u0EB2' && c <= '\u0EB3')
1070             || (c == '\u0EBD')
1071             || (c >= '\u0EC0' && c <= '\u0EC4') || (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69')
1072             || (c >= '\u10A0' && c <= '\u10C5') || (c >= '\u10D0' && c <= '\u10F6')
1073             || (c == '\u1100')
1074             || (c >= '\u1102' && c <= '\u1103') || (c >= '\u1105' && c <= '\u1107')
1075             || (c == '\u1109')
1076             || (c >= '\u110B' && c <= '\u110C') || (c >= '\u110E' && c <= '\u1112')
1077             || (c == '\u113C')
1078             || (c == '\u113E')
1079             || (c == '\u1140')
1080             || (c == '\u114C')
1081             || (c == '\u114E')
1082             || (c == '\u1150')
1083             || (c >= '\u1154' && c <= '\u1155')
1084             || (c == '\u1159')
1085             || (c >= '\u115F' && c <= '\u1161')
1086             || (c == '\u1163')
1087             || (c == '\u1165')
1088             || (c == '\u1167')
1089             || (c == '\u1169')
1090             || (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173')
1091             || (c == '\u1175')
1092             || (c == '\u119E')
1093             || (c == '\u11A8')
1094             || (c == '\u11AB')
1095             || (c >= '\u11AE' && c <= '\u11AF') || (c >= '\u11B7' && c <= '\u11B8')
1096             || (c == '\u11BA')
1097             || (c >= '\u11BC' && c <= '\u11C2')
1098             || (c == '\u11EB')
1099             || (c == '\u11F0')
1100             || (c == '\u11F9')
1101             || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || (c >= '\u1F00' && c <= '\u1F15')
1102             || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || (c >= '\u1F48' && c <= '\u1F4D')
1103             || (c >= '\u1F50' && c <= '\u1F57')
1104             || (c == '\u1F59')
1105             || (c == '\u1F5B')
1106             || (c == '\u1F5D')
1107             || (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC')
1108             || (c == '\u1FBE')
1109             || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || (c >= '\u1FD0' && c <= '\u1FD3')
1110             || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || (c >= '\u1FF2' && c <= '\u1FF4')
1111             || (c >= '\u1FF6' && c <= '\u1FFC')
1112             || (c == '\u2126')
1113             || (c >= '\u212A' && c <= '\u212B')
1114             || (c == '\u212E')
1115             || (c >= '\u2180' && c <= '\u2182') || (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA')
1116             || (c >= '\u3105' && c <= '\u312C') || (c >= '\uAC00' && c <= '\uD7A3');
1117     }
1118
1119     /** BaseChar - XMl Specification 1.0 [86] */
1120     private static final boolean Ideographic(char c) {
1121         return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029');
1122     }
1123
1124     /** CombiningChar - XMl Specification 1.0 [87] */
1125     /*private static final boolean CombiningChar(char c) {
1126         return (c >= '\u0300' && c <= '\u0345')
1127             || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || (c >= '\u0591' && c <= '\u05A1')
1128             || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD')
1129             || (c == '\u05BF')
1130             || (c >= '\u05C1' && c <= '\u05C2')
1131             || (c == '\u05C4')
1132             || (c >= '\u064B' && c <= '\u0652')
1133             || (c == '\u0670')
1134             || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4')
1135             || (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903')
1136             || (c == '\u093C')
1137             || (c >= '\u093E' && c <= '\u094C')
1138             || (c == '\u094D')
1139             || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || (c >= '\u0981' && c <= '\u0983')
1140             || (c == '\u09BC')
1141             || (c == '\u09BE')
1142             || (c == '\u09BF')
1143             || (c >= '\u09C0' && c <= '\u09C4') || (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD')
1144             || (c == '\u09D7')
1145             || (c >= '\u09E2' && c <= '\u09E3')
1146             || (c == '\u0A02')
1147             || (c == '\u0A3C')
1148             || (c == '\u0A3E')
1149             || (c == '\u0A3F')
1150             || (c >= '\u0A40' && c <= '\u0A42') || (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D')
1151             || (c >= '\u0A70' && c <= '\u0A71') || (c >= '\u0A81' && c <= '\u0A83')
1152             || (c == '\u0ABC')
1153             || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || (c >= '\u0ACB' && c <= '\u0ACD')
1154             || (c >= '\u0B01' && c <= '\u0B03')
1155             || (c == '\u0B3C')
1156             || (c >= '\u0B3E' && c <= '\u0B43') || (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D')
1157             || (c >= '\u0B56' && c <= '\u0B57') || (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2')
1158             || (c >= '\u0BC6' && c <= '\u0BC8') || (c >= '\u0BCA' && c <= '\u0BCD')
1159             || (c == '\u0BD7')
1160             || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || (c >= '\u0C46' && c <= '\u0C48')
1161             || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || (c >= '\u0C82' && c <= '\u0C83')
1162             || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || (c >= '\u0CCA' && c <= '\u0CCD')
1163             || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || (c >= '\u0D3E' && c <= '\u0D43')
1164             || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D')
1165             || (c == '\u0D57')
1166             || (c == '\u0E31')
1167             || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E')
1168             || (c == '\u0EB1')
1169             || (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD')
1170             || (c >= '\u0F18' && c <= '\u0F19')
1171             || (c == '\u0F35')
1172             || (c == '\u0F37')
1173             || (c == '\u0F39')
1174             || (c == '\u0F3E')
1175             || (c == '\u0F3F')
1176             || (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95')
1177             || (c == '\u0F97')
1178             || (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7')
1179             || (c == '\u0FB9')
1180             || (c >= '\u20D0' && c <= '\u20DC')
1181             || (c == '\u20E1')
1182             || (c >= '\u302A' && c <= '\u302F')
1183             || (c == '\u3099')
1184             || (c == '\u309A');
1185     }*/
1186
1187     /** Digit - XMl Specification 1.0 [88] */
1188     private static final boolean Digit(char c) {
1189         return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9')
1190             || (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F')
1191             || (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF')
1192             || (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F')
1193             || (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29');
1194     }
1195
1196     /** Extender - XMl Specification 1.0 [89] */
1197     private static final boolean Extender(char c) {
1198         return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387'
1199             || c == '\u0640' || c == '\u0E46' || c == '\u0EC6' || c == '\u3005'
1200             || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE');
1201     }
1202
1203     /** Whitespace - XML Specification 1.0 [3] */
1204     private static final boolean S(char c) {
1205         return c == '\u0020' || c == '\u0009' || c == '\r' || c == '\n';
1206     }
1207 }