src/org/xwt/util/XML.java

   1 package org.xwt.util;
   2
   3 import java.io.Reader;
   4 import java.io.IOException;
   5 import java.io.EOFException;
   6
   7 /**
   8  * An Event-Driving, Non-Validating XML Parser with Namespace support.
   9  *
  10  * A subclass can implement the abstract functions for receiving details
  11  * about an xml file as it is parsed. To initate a parse, use the parse()
  12  * function.
  13  *
  14  * <h3>IMPLEMENTATION NOTES</h3>
  15  * <p>As the parser traverses into an element, it adds it to the linked list
  16  * called <tt>elements</tt>. However, <tt>elements</tt> has been pre-filled
  17  * with instances of the Element inner class. So in the vast majority of
  18  * cases, the pointer current is moved along one, and the values for the
  19  * new element are filled into the current object.</p>
  20  *
  21  * <p>This parser supports all the unicode ranges required by the XML
  22  * Specification. However, it is optimised for well-formed ASCII documents.
  23  * Documents containing unicode Names and Attributes will take much longer
  24  * to process, and invalid documents (badly formed Names or invalid attributes)
  25  * will be run through a test on every single unicode character range before
  26  * being declared invalid.</p>
  27  *
  28  * <h3>IMPLEMENTATION RULES</h3>
  29  * <ul>
  30  *  <li>Each time the buffer offset <tt>off</tt> is moved, the length
  31  *   <tt>len</tt> must be decreased.</li>
  32  *  <li>Each time the buffer length is decreased, it must be checked to make
  33  *   sure it is &gt;0.</li>
  34  * </ul>
  35  *
  36  * <h3>Other Notes</h3>
  37  * <ul>
  38  *  <li><i>error</i> is defined as a Validity Constraint Violation and is recoverable</li>
  39  *  <li><i>fatal error</i> is defined as a Well-formedness Constraint Violation and is not recoverable</li>
  40  * </ul>
  41  *
  42  * @author David Crawshaw
  43  * @see XML-Specification-1.0 http://w3.org/TR/REC-xml
  44  */
  45 public abstract class XML
  46 {
  47     /////////////////////////////////////////////////////////////////////////////////////////////
  48     // XML Parser
  49     /////////////////////////////////////////////////////////////////////////////////////////////
  50
  51     public static final int BUFFER_SIZE = 255;
  52
  53     /** static pool of XML.Element instances shared by all XML Parsers.
  54      * elements in the queue have dirty prev and next references, that need cleaning before use. */
  55     private static final Queue elements = new Queue(30);
  56
  57     private static final char[] single_amp  = new char[] { '&'  };
  58     private static final char[] single_apos = new char[] { '\'' };
  59     private static final char[] single_gt   = new char[] { '>'  };
  60     private static final char[] single_lt   = new char[] { '<'  };
  61     private static final char[] single_quot = new char[] { '"'  };
  62
  63     private int line;
  64     private int col;
  65
  66     private Reader in;
  67     private char[] buf;
  68     private int    off;
  69     private int    len;
  70
  71     private Element current;
  72
  73     // used in readEntity() to process a single character without creating a new array
  74     private char[] singlechar = new char[1];
  75
  76
  77     public XML() { this(BUFFER_SIZE); }
  78
  79     public XML(int bSize) {
  80         buf = new char[bSize];
  81
  82         current = (Element)elements.remove(false);
  83         if (current == null) current = new Element();
  84         current.prev = current.next = null;
  85     }
  86
  87
  88     /** Returns the line number at the beginning of the last process call. */
  89     public int getLine() { return line; }
  90
  91     /** Returns the column number at the beginning of the last process call. */
  92     public int getCol()  { return col; }
  93
  94     /**
  95      * Parse given input and call the abstract event functions.
  96      *
  97      * Careful with threading, as this function is not synchronized.
  98      */
  99     public final void parse(Reader reader) throws IOException, XMLException {
 100         in  = reader;
 101         off = len = 0;
 102         line = col = 1;
 103
 104         clean(); // clean up possible mid-way linked-list element
 105
 106         try {
 107             // process the stream
 108             while (true) {
 109                 if (!buffer(1)) {
 110                     if (current.qName == null) break;
 111                     throw new WFCException("reached eof without closing <"+current.qName+"> element", getLine(), getCol());
 112                 }
 113
 114                 if (buf[off] == '<') readTag();
 115                 readChars(current.qName != null);
 116             }
 117         } finally { clean(); } // clean up elements
 118     }
 119
 120     /** remove any leftover elements from the linked list and queue them */
 121     private final void clean() {
 122         while (current.prev != null) elements.append((current = current.prev).next);
 123         current.next = null;
 124         current.qName = null;
 125     }
 126
 127     /** reads in a tag. expects <tt>buf[off] == '&#60;'</tt> */
 128     private final void readTag() throws IOException, XMLException {
 129         // Start Tag    '<' Name (S Attribute)* S? '>'
 130         boolean starttag  = true;
 131
 132         // End Tag     '</' Name S? '>'
 133         boolean endtag    = false;
 134
 135         // if (starttag & endtag) then: EmptyElemTag '<' Name (S Attribute)* S? '/>'
 136
 137         // Position in the name of the ':' namespace prefix
 138         int prefix = -1;
 139
 140         int namelen   = 0;
 141
 142         col++; off++; len--;
 143         if (!buffer(1)) throw new EOFException("Unexpected EOF processing element tag");
 144
 145         // work out what we can from the beginning of the tag
 146         char s = buf[off];
 147         if (s == '!') {
 148             // definitions here don't necessarily conform to xml spec (as DTDs not yet implemented)
 149             col++; off++; len--;
 150             if (!buffer(4)) throw new EOFException("Unexpected EOF processing <! element");
 151
 152             boolean bad = false;
 153             switch (buf[off]) {
 154                 case '-':
 155                     if (buf[off+1] != '-') { bad = true; break; }
 156                     col += 2; off += 2; len -= 2;
 157
 158                     // Comment        '<!--'      ((Char - '-') | ('-' (Char - '-')))* '-->'
 159                     readChars(false, "-->", false);
 160                     col += 3; off += 3; len -= 3;
 161                     break;
 162
 163                 // we don't care about the following definitions
 164
 165                 case 'A':
 166                     if (!buffer(7)
 167                             || buf[off+1] != 'T' || buf[off+2] != 'T' || buf[off+3] != 'L'
 168                             || buf[off+4] != 'I' || buf[off+5] != 'S' || buf[off+6] != 'T') {
 169                         bad = true; break;
 170                     }
 171                     col += 7; off += 7; len -= 7;
 172
 173                     // ATTLIST        '<!ATTLIST'   (Char* - '>') '>'
 174                     readChars(false, ">", true);
 175                     col++; off++; len--;
 176                     break;
 177                 case 'D':
 178                     if (!buffer(7)
 179                             || buf[off+1] != 'O' || buf[off+2] != 'C' || buf[off+3] != 'T'
 180                             || buf[off+4] != 'Y' || buf[off+5] != 'P' || buf[off+6] != 'E') {
 181                         bad = true; break;
 182                     }
 183                     col += 7; off += 7; len -= 7;
 184
 185                     // DTD            '<!DOCTYPE'   (Char* - '>') '>'
 186                     readChars(false, ">", true);
 187                     col++; off++; len--;
 188                     break;
 189                 case 'E':
 190                     if (!buffer(7)) {
 191                         bad = true;
 192                     } else if (buf[off+1] == 'L' && buf[off+2] == 'E' && buf[off+3] == 'M'
 193                             && buf[off+4] == 'E' && buf[off+5] == 'N' && buf[off+6] == 'T') {
 194                         // ELEMENT        '<!ELEMENT'   (Char* - '>') '>'
 195                         readChars(false, ">", true);
 196                         col++; off++; len--;
 197
 198                     } else if (buf[off+1] == 'N' && buf[off+2] == 'T' && buf[off+3] == 'I'
 199                             && buf[off+4] == 'T' && buf[off+5] == 'Y') {
 200                         // ENTITY         '<!ENTITY'    (Char* - '>') '>'
 201                         readChars(false, ">", true);
 202                         col++; off++; len--;
 203
 204                     } else {
 205                         bad = true;
 206                     }
 207                     break;
 208
 209                 case 'N':
 210                     if (!buffer(8)
 211                             || buf[off+1] != 'O' || buf[off+2] != 'T' || buf[off+3] != 'A' || buf[off+4] != 'T'
 212                             || buf[off+5] != 'I' || buf[off+6] != 'O' || buf[off+7] != 'N') {
 213                         bad = true; break;
 214                     }
 215                     col += 8; off += 8; len -= 8;
 216                     // NOTATION       '<!NOTATION'  (Char* - '>') '>'
 217                     readChars(false, ">", true);
 218                     col++; off++; len--;
 219
 220                     break;
 221                 default: bad = true;
 222             }
 223
 224             if (bad) throw new MarkupException("element tag start character is invalid", getLine(), getCol());
 225
 226         } else if (s == '?') {
 227             // PI (Ignored)   '<?'  (Char* - (Char* '?>' Char*))  '?>'
 228             col++; off++; len--;
 229             readChars(false, "?>", true);
 230             if (!buffer(2)) throw new EOFException("Unexpected EOF at end of Processing Instruction");
 231             col += 2; off += 2; len -= 2;
 232
 233         } else if (s == '[') {
 234             if (!buffer(7)
 235                     || buf[off+1] != 'C' || buf[off+2] != 'D' || buf[off+3] != 'A'
 236                     || buf[off+4] != 'T' || buf[off+5] != 'A' || buf[off+6] != '[') {
 237                 col++; off--; len++;
 238                 // Conditional    '<![' (Char* - (Char* ']]>' Char*)) ']]>'
 239                 readChars(false, "]]>", false);
 240             } else {
 241                 col += 7; off += 7; len -=7;
 242                 // CDATA          '<![CDATA[' (Char* - (Char* ']]>' Char*))        ']]>'
 243                 readChars(true, "]]>", false);
 244             }
 245             col += 3; off += 3; len -= 3;
 246         } else {
 247             if (s == '/') {
 248                 // End Tag        '</' Name S? '>'
 249                 starttag = false;
 250                 endtag = true;
 251
 252                 col++; off++; len--;
 253                 if (!buffer(1)) throw new EOFException("Unexpected EOF processing end tag");
 254                 s = buf[off];
 255             }
 256
 257             if (!Name(s)) throw new MarkupException("invalid starting character in element name", getLine(), getCol());
 258
 259             // find the element name (defined in XML Spec: section 2.3)
 260             for (namelen = 0; ; namelen++) {
 261                 if (!buffer(namelen+1)) throw new EOFException("Unexpected EOF in element tag name");
 262
 263                 s = buf[off+namelen];
 264
 265                 if (S(s) || s == '>') {
 266                     break;
 267                 } else if (s == '/') {
 268                     endtag = true;
 269                     break;
 270                 } else if (s == ':' && namelen > 0 && prefix < 1) {
 271                     // we have a definition of the prefix range available
 272                     prefix = namelen;
 273                 } else if (!NameChar(s)) {
 274                     throw new MarkupException("element name contains invalid character", getLine(), getCol());
 275                 }
 276             }
 277
 278             // process name (based on calculated region)
 279             if (namelen < 1) throw new MarkupException("element name is null", getLine(), getCol());
 280
 281             // we have marked out the name region, so turn it into a string and move on
 282             String qName = new String(buf, off, namelen);
 283
 284             col += namelen; off += namelen; len -= namelen;
 285
 286             if (starttag) {
 287                 // create the in-memory element representation of this beast
 288                 // if current.qName == null then this is the root element we're dealing with
 289                 if (current.qName != null) {
 290                     if (current.next == null)  {
 291                         // we're at the end of the default element depth
 292                         current.next = (Element)elements.remove(false);
 293                         if (current.next == null) current.next = new Element();
 294                         current.next.prev = current;
 295                         current.next.next = null;
 296                     }
 297                     current = current.next;
 298                 }
 299
 300                 current.clear();
 301                 current.qName = qName;
 302                 current.defaultUri = current.uri = null;
 303
 304                 if (prefix > 0) {
 305                     current.prefix = current.qName.substring(0, prefix);
 306                     current.localName = current.qName.substring(prefix+1);
 307                 } else {
 308                     current.prefix = null;
 309                     current.localName = current.qName;
 310                 }
 311
 312                 // process attributes
 313                 readWhitespace();
 314                 if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 1");
 315                 while (buf[off] != '/' && buf[off] != '>') {
 316                     readAttribute();
 317                     if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 2");
 318                     readWhitespace();
 319                 }
 320
 321                 // inherit namespace default uri if attribute was not provided
 322                 if (current.defaultUri == null) {
 323                     current.defaultUri = (current.prev != null) ? current.prev.defaultUri : null;
 324                 }
 325
 326                 // work out the uri of this element
 327                 if (current.prefix == null) {
 328                     // element has no prefix, therefore is the default uri
 329                     current.uri = current.defaultUri;
 330                 } else {
 331                     // work back through the hashtables until uri is found
 332                     for (Element e = current; e != null && current.uri == null; e = e.prev) {
 333                         current.uri = (String)e.urimap.get(current.prefix);
 334                     }
 335                     if (current.uri == null) current.addError(new NCException("undefined prefix '"+current.prefix+"'", getLine(), getCol()));
 336                 }
 337
 338             } else {
 339                 // this is an end-of-element tag
 340                 if (!qName.equals(current.qName)) throw new WFCException(
 341                     "end tag </"+qName+"> does not line up with start tag <"+current.qName+">", getLine(), getCol()
 342                 );
 343             }
 344
 345             // deal with whitespace
 346             readWhitespace();
 347
 348             // process tag close
 349             if (!buffer(1)) throw new EOFException("Unexpected EOF before end of tag");
 350             if (buf[off] == '/') {
 351                 endtag = true;
 352                 off++; len--; col++;
 353             }
 354             if (!buffer(1)) throw new EOFException("Unexpected EOF before end of endtag");
 355             if (buf[off] == '>') {
 356                 off++; len--; col++;
 357             } else {
 358                 throw new MarkupException("missing '>' character from element '"+qName+"'", getLine(), getCol());
 359             }
 360
 361             // send element signals
 362             if (starttag) startElement(current);
 363             if (endtag) {
 364                 endElement(current);
 365
 366                 // we just closed an element, so remove it from the element 'stack'
 367                 if (current.prev == null) {
 368                     // we just finished the root element
 369                     current.qName = null;
 370                 } else {
 371                     elements.append((current = current.prev).next);
 372                     current.next = null;
 373                 }
 374             }
 375         }
 376     }
 377
 378     /** reads in an attribute of an element. expects Name(buf[off]) */
 379     private final void readAttribute() throws IOException, XMLException {
 380         int ref = 0;
 381         int prefix = 0;
 382         String n, v, p, u; // attribute name, value, prefix and uri respectively
 383         n = v = p = u = null;
 384         char s;
 385
 386         // find the element name (defined in XML Spec: section 2.3)
 387         for (ref= 0; ; ref++) {
 388             if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in read attribute loop part 1");
 389
 390             s = buf[off+ref];
 391
 392             if (s == '=' || S(s)) {
 393                 break;
 394             } else if (s == ':' && ref > 0 && prefix < 1) {
 395                 // we have a definition of the prefix range available
 396                 prefix = ref+1;
 397             } else if (!NameChar(s)) {
 398                 throw new MarkupException("attribute name contains invalid characters", getLine(), getCol());
 399             }
 400         }
 401
 402         // determine prefix and key name
 403         if (prefix > 0) {
 404             p = new String(buf, off, prefix-1);
 405             col += prefix; off += prefix; len -= prefix; ref -= prefix;
 406         }
 407         n = new String(buf, off, ref);
 408         col += ref; off += ref; len -= ref;
 409
 410         // find name/value divider ('=')
 411         readWhitespace();
 412         if (!buffer(1)) throw new EOFException("Unexpected EOF before attribute '=' divider");
 413         if (buf[off] != '=') throw new MarkupException("attribute name not followed by '=' sign", getLine(), getCol());
 414
 415         col++; off++; len--;
 416         readWhitespace();
 417
 418         if (!buffer(1)) throw new EOFException("Unexpected EOF after attribute '=' divider");
 419
 420         char wrap;
 421         if (buf[off] == '\'' || buf[off] == '"') {
 422             wrap = buf[off];
 423         } else {
 424             throw new MarkupException("attribute '"+n+"' must have attribute wrapped in ' or \"", getLine(), getCol());
 425         }
 426         col++; off++; len--;
 427
 428         // find the attribute value
 429         attval: for (ref = 0; ; ref++) {
 430             if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in attribute value");
 431
 432             if (buf[off+ref] == wrap) {
 433                 break attval;
 434             } else if (buf[off+ref] == '<') {
 435                 throw new WFCException("attribute value for '"+n+"' must not contain '<'", getLine(), getCol());
 436             }
 437         }
 438
 439         v = new String(buf, off, ref);
 440         col += ref; off += ref; len -= ref;
 441
 442         // remove end wrapper character
 443         col++; off++; len--;
 444
 445         // process attribute
 446         if (p != null && p.equals("xmlns")) {
 447             current.urimap.put(n, v);
 448         } else if (n.equals("xmlns")) {
 449             if (current.defaultUri != null) {
 450                 current.addError(new NCException("default namespace definition repeated", getLine(), getCol()));
 451             } else {
 452                 current.defaultUri = v;
 453             }
 454         } else {
 455             // find attribute uri
 456             if (p == null) {
 457                 for (Element e = current; e != null && u == null; e = e.prev) { u = e.uri; }
 458             } else {
 459                 for (Element e = current; e != null && u == null; e = e.prev) { u = (String)e.urimap.get(p); }
 460                 if (u == null) current.addError(new NCException("undefined attribute prefix '"+current.prefix+"'", getLine(), getCol()));
 461             }
 462
 463             // check to see if attribute is a repeat
 464             for (int i=0; current.len > i; i++) if (n.equals(current.keys[i]) && u.equals(current.uris[i])) throw new WFCException(
 465                 "attribute name '"+n+"' may not appear more than once in the same element tag", getLine(), getCol()
 466             );
 467
 468             // add attribute to the attribute arrays
 469             if (current.len == current.keys.length) current.morekeys();
 470             current.keys[current.len] = n;
 471             current.vals[current.len] = v;
 472             current.uris[current.len] = u;
 473             current.len++;
 474         }
 475     }
 476
 477     /** reads an entity and processes out its value. expects buf[off] == '&amp;' */
 478     private final void readEntity() throws IOException, XMLException {
 479         off++; len--;
 480         if (!buffer(2)) throw new EOFException("Unexpected EOF reading entity");
 481
 482         boolean unknown = false;
 483         switch (buf[off]) {
 484             case '#':
 485                 off++; len--;
 486
 487                 int radix;
 488                 if (buf[off] == 'x') { off++; len--; radix = 16; } else { radix = 10; }
 489                 int c = 0;
 490
 491                 // read in each char, then shift total value to the left and add the extra
 492                 // style of loop is slightly different from all the others, as this should run a limited number of times
 493                 findchar: while (true) {
 494                     if (!buffer(1)) throw new EOFException("Unexpected EOF reading entity");
 495                     int d = Character.digit(buf[off], radix);
 496                     if (d == -1) {
 497                         if (buf[off] != ';') throw new WFCException("illegal characters in entity reference", getLine(), getCol());
 498                         off++; len--; col++;
 499                         break findchar;
 500                     }
 501                     c = (c * radix) + d;
 502
 503                     off++; len--;
 504                 }
 505
 506                 singlechar[0] = Character.forDigit(c, radix);
 507                 characters(singlechar, 0, 1);
 508                 break;
 509
 510             case 'a':
 511                 if (buffer(4) && buf[off+1] == 'm' && buf[off+2] == 'p' && buf[off+3] == ';') {
 512                     characters(single_amp, 0, 1); // &amp;
 513                     off += 4; len -= 4; col++;
 514                 } else if (buffer(5) && buf[off+1] == 'p' && buf[off+2] == 'o' && buf[off+3] == 's' && buf[off+4] == ';') {
 515                     characters(single_apos, 0, 1); // &apos;
 516                     off += 5; len -= 5; col++;
 517                 } else {
 518                     unknown = true;
 519                 }
 520                 break;
 521
 522             case 'g':
 523                 if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
 524                     characters(single_gt, 0, 1); // &gt;
 525                     off += 3; len -= 3; col++;
 526                 } else {
 527                     unknown = true;
 528                 }
 529                 break;
 530
 531             case 'l':
 532                 if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
 533                     characters(single_lt, 0, 1); // &lt;
 534                     off += 3; len -= 3; col++;
 535                 } else {
 536                     unknown = true;
 537                 }
 538                 break;
 539
 540             case 'q':
 541                 if (buffer(5) && buf[off+1] == 'u' && buf[off+2] == 'o' && buf[off+3] == 't' && buf[off+4] == ';') {
 542                     characters(single_quot, 0, 1); // &quot;
 543                     off += 5; len -= 5; col++;
 544                 } else {
 545                     unknown = true;
 546                 }
 547                 break;
 548
 549             // TODO: check a parser-level Hash of defined entities
 550         }
 551
 552         if (unknown) throw new WFCException("unknown entity (<!ENTITY> not supported)", getLine(), getCol());
 553     }
 554
 555     /** reads until the passed string is encountered. */
 556     private final void readChars(boolean p, String match, boolean entities) throws IOException, XMLException {
 557         int ref;
 558         char[] end = match.toCharArray();
 559
 560         for (boolean more = true; more;) {
 561             if (!buffer(1)) return;
 562
 563             buf: for (ref = 0; ref < len; ref++) {
 564                 switch (buf[off+ref]) {
 565                     case '\r': // windows or macos9 newline
 566                         // normalise and process
 567                         buf[off+ref] = '\n'; ref++;
 568                         if (p) characters(buf, off, ref);
 569                         off += ref; len -= ref; ref = -1;
 570                         line++; col = 1;
 571
 572                         // windows double-char newline; skip the next char
 573                         if (!buffer(1)) return;
 574                         if (buf[off] == '\n') { off++; len--; }
 575                         break;
 576
 577                     case '\n': // unix newline
 578                         ref++;
 579                         if (p) characters(buf, off, ref);
 580                         off += ref; len -= ref; ref = -1;
 581                         line++; col = 1;
 582                         break;
 583
 584                     case '&':  // entity
 585                         if (entities) {
 586                             if (p) {
 587                                 if (ref > 0) characters(buf, off, ref);
 588                                 off += ref; len -= ref; ref = -1;
 589                                 readEntity();
 590                             }
 591                             break;
 592                         }
 593
 594                     default:
 595                         if (!buffer(ref+end.length)) continue buf;
 596                         for (int i=0; end.length > i; i++) if (end[i] != buf[off+ref+i]) continue buf;
 597                         more = false;
 598                         break buf;
 599                 }
 600             }
 601
 602             if (p && ref > 0) characters(buf, off, ref);
 603             off += ref; len -= ref; col += ref;
 604         }
 605     }
 606
 607     /**
 608      * reads until a <tt>&#60;</tt> symbol is encountered
 609      * @param p If true call the characters(char[],int,int) funciton for the processed characters
 610      */
 611     private final void readChars(boolean p) throws IOException, XMLException {
 612         int ref;
 613
 614         for (boolean more = true; more;) {
 615             if (!buffer(1)) return;
 616
 617             buf: for (ref = 0; ref < len; ref++) {
 618                 switch (buf[off+ref]) {
 619                     case '\r': // windows or macos9 newline
 620                         // normalise and process
 621                         buf[off+ref] = '\n'; ref++;
 622                         if (p) characters(buf, off, ref);
 623                         off += ref; len -= ref; ref = -1;
 624                         line++; col = 1;
 625
 626                         // windows double-char newline; skip the next char
 627                         if (!buffer(1)) return;
 628                         if (buf[off] == '\n') { off++; len--; }
 629                         break;
 630
 631                     case '\n': // unix newline
 632                         ref++;
 633                         if (p) characters(buf, off, ref);
 634                         off += ref; len -= ref; ref = -1;
 635                         line++; col = 1;
 636                         break;
 637
 638                     case '&':  // entity
 639                         if (p) {
 640                             if (ref > 0) characters(buf, off, ref);
 641                             off += ref; len -= ref; ref = -1;
 642                             readEntity();
 643                         }
 644                         break;
 645
 646                     case '<':  // end of chars section
 647                         more = false;
 648                         break buf;
 649                 }
 650             }
 651
 652             if (p && ref > 0) characters(buf, off, ref);
 653             off += ref; len -= ref; col += ref;
 654         }
 655     }
 656
 657     /** reads until a non-whitespace symbol is encountered */
 658     private final void readWhitespace() throws IOException, XMLException {
 659         int ref;
 660
 661         for (boolean more = true; more;) {
 662             if (!buffer(1)) return;
 663
 664             buf: for (ref = 0; ref < len; ref++) {
 665                 switch (buf[off+ref]) {
 666                     case '\r': // windows or macos9 newline
 667                         // normalise and process
 668                         buf[off+ref] = '\n';
 669                         whitespace(buf, off, ++ref);
 670                         off += ref; len -= ref; ref = -1;
 671                         line++; col = 1;
 672
 673                         // windows double-char newline; skip the next char
 674                         if (!buffer(1)) return;
 675                         if (buf[off] == '\n') { off++; len--; }
 676                         break;
 677
 678                     case '\n': // unix newline
 679                         whitespace(buf, off, ++ref);
 680                         off += ref; len -= ref; ref = -1;
 681                         line++; col = 1;
 682                         break;
 683
 684                     case ' ':  // space
 685                     case '\t': // tab
 686                         break;
 687
 688                     default:   // end of whitespace
 689                         more = false;
 690                         break buf;
 691                 }
 692             }
 693
 694             off += ref; len -= ref; col += ref;
 695         }
 696     }
 697
 698     /**
 699      * attempt to fill the buffer.
 700      *
 701      * @param min Minimum number of characters to read (even if we have to block to do it).
 702      * @return return false if min can't be reached.
 703      */
 704     private final boolean buffer(int min) throws IOException {
 705         if (len > min) return true;
 706
 707         if (buf.length - (off+len) >= min) {
 708             // plenty of space left on the end of the buffer
 709         } else if (off >= min) {
 710             // moving offset data to start will leave enough free space on the end
 711             System.arraycopy(buf, off, buf, 0, len);
 712             off = 0;
 713         } else {
 714             // buffer size will have to be increased
 715             char[] newbuf = new char[buf.length * 2];
 716             System.arraycopy(buf, off, newbuf, 0, len);
 717             buf = newbuf;
 718             off = 0;
 719         }
 720
 721         while (min > len) {
 722             int newlen = in.read(buf, off+len, buf.length-(off+len));
 723             if (newlen < 0) return false;
 724             len += newlen;
 725         }
 726
 727         return true;
 728     }
 729
 730
 731     /////////////////////////////////////////////////////////////////////////////////////////////
 732     // Abstract SAX-Like Interface
 733     /////////////////////////////////////////////////////////////////////////////////////////////
 734
 735     /**
 736      * Called when the start of an element is processed.
 737      *
 738      * <p>The array of Attribute names and values may be longer than the
 739      * number of entries they contain, but all the entries will be
 740      * packed at the top.</p>
 741      *
 742      * <p><b>DO NOT</b> store a reference to the attribute arrays, as
 743      * they are reused by other elements.</p>
 744      */
 745     public abstract void startElement(Element e) throws SchemaException;
 746
 747     /**
 748      * Represents a line of character data.
 749      *
 750      * <p>Newlines are all normalised to the Unix \n as per the XML Spec,
 751      * and a newline will only appear as the last character in the passed
 752      * array segment.</p>
 753      *
 754      * <p>XML.getLine() and XML.getCol() report the position at the
 755      * beginning of this character segment, which can be processed in a
 756      * line-by-line fashion due to the above newline restriction.</p>
 757      */
 758     public abstract void characters(char[] ch, int start, int length) throws SchemaException;
 759
 760     /** Represents a line of ignorable whitespace. */
 761     public abstract void whitespace(char[] ch, int start, int length) throws SchemaException;
 762
 763     /** Represents the end of an Element. */
 764     public abstract void endElement(Element e) throws SchemaException;
 765
 766
 767     /////////////////////////////////////////////////////////////////////////////////////////////
 768     // Inner Classes for Parser Support
 769     /////////////////////////////////////////////////////////////////////////////////////////////
 770
 771     /**
 772      * Used as a struct for holding information about a current element,
 773      * and acts as a linked list entry.
 774      *
 775      * <p>Each element stores a hashtable of namespace definitions against
 776      * their respective prefix, and a variable holding their default
 777      * uri. If they did not specify a default uri, their
 778      * parent's uri is copied in to keep up the sembelence of speedy
 779      * parsing.</p>
 780      *
 781      * <h3>SLOWEST PART OF THE XML PARSER</h3>
 782      * <p>To implement the Namespace Specification exactly, we have to
 783      * store prefix mappings for elements away from its parents and
 784      * siblings. This means if a child of a child of-a child uses
 785      * a prefix defined in the root, we have to search each Hashtable
 786      * in each Element until we get to the root.</p>
 787      *
 788      * <p>Unfortunetally, every other solution I can think of requires
 789      * more work than this one, shifted to different parts of the
 790      * parser.</p>
 791      */
 792     public static final class Element
 793     {
 794         public Element next, prev;
 795
 796         /** A hashtable of all namespace prefixes that are defined by this element. */
 797         public Hash urimap;
 798
 799         /** An array of attribute names. */
 800         public String[] keys;
 801
 802         /** An array of attribute values. */
 803         public String[] vals;
 804
 805         /** An array of attribute uris. */
 806         public String[] uris;
 807
 808         /** An array of non-fatal errors related to this element. */
 809         public XMLException[] errors;
 810
 811         /** Current number of attributes in the <tt>keys</tt> and <tt>vals</tt> arrays. */
 812         public int len;
 813
 814         /** Default URI for this element and its children with no prefix. */
 815         public String defaultUri;
 816
 817         /** URI of current tag. XML Namespace Spec 14-Jan-1999 section 1 */
 818         public String uri;
 819
 820         /** LocalPart of current element. XML Namespace Spec 14-Jan-1999 [8] */
 821         public String localName;
 822
 823         /** Qualified Name of current element.  XML Namespace Spec 14-Jan-1999 [6] */
 824         public String qName;
 825
 826         /** Prefix of current element. Substring of qName. XML Namespace Spec 14-Jan-1999 [7] */
 827         public String prefix;
 828
 829         public Element() {
 830             defaultUri = uri = prefix = localName = qName = null;
 831             urimap = new Hash(3,3);
 832             keys = new String[10];
 833             vals = new String[10];
 834             uris = new String[10];
 835             errors = new XMLException[] {};
 836             len = 0;
 837         }
 838
 839         /** increase the size of the attributes arrays */
 840         void morekeys() {
 841             String[] newkeys = new String[keys.length+5];
 842             String[] newvals = new String[vals.length+5];
 843             String[] newuris = new String[uris.length+5];
 844             System.arraycopy(keys, 0, newkeys, 0, keys.length);
 845             System.arraycopy(vals, 0, newvals, 0, vals.length);
 846             System.arraycopy(uris, 0, newuris, 0, uris.length);
 847             keys = newkeys; vals = newvals; uris = newuris;
 848         }
 849
 850         /** empty out the arrays */
 851         void clear() {
 852             if (keys.length != vals.length || vals.length != uris.length) {
 853                 keys = new String[10]; vals = new String[10]; uris = new String[10];
 854             } else {
 855                 for (int i=0; keys.length > i; i++) { keys[i] = null; vals[i] = null; uris[i] = null; }; len = 0;
 856             }
 857             errors = new XMLException[] {};
 858         }
 859
 860         /** add an error to the errors array */
 861         void addError(XMLException e) {
 862             // it doesn't really matter about continually expanding the array, as this case is quite rare
 863             XMLException[] newe = new XMLException[errors.length+1];
 864             System.arraycopy(errors, 0, newe, 0, errors.length);
 865             newe[errors.length] = e;
 866             errors = newe;
 867         }
 868     }
 869
 870     /** Parse or Structural Error */
 871     public static class XMLException extends Exception
 872     {
 873         private int line;
 874         private int col;
 875         private String error;
 876
 877         public XMLException(String e) { this(e, -1, -1); }
 878
 879         public XMLException(String e, int l, int c) {
 880             this.error = e;
 881             this.line  = l;
 882             this.col   = c;
 883         }
 884
 885         public int getLine()     { return this.line;  }
 886         public int getCol()      { return this.col;   }
 887         public String getMessage() { return this.error; }
 888     }
 889
 890     /** Violation of Markup restrictions in XML Specification - Fatal Error */
 891     public static class MarkupException extends XMLException { public MarkupException(String e, int l, int c) { super(e,l,c); } }
 892
 893     /** Well-Formedness Constraint Violation - Fatal Error */
 894     public static final class WFCException extends MarkupException { public WFCException(String e, int l, int c) { super(e,l,c); } }
 895
 896     /** Namespace Constraint Violation - Recoverable Error */
 897     public static final class NCException extends XMLException { public NCException(String e, int l, int c) { super(e,l,c); } }
 898
 899     /** Schema Violation - Fatal Error */
 900     public static class SchemaException extends XMLException {
 901         public SchemaException(String e) { this(e, -1, -1); }
 902         public SchemaException(String e, int l, int c) { super(e,l,c); }
 903     }
 904
 905
 906     /////////////////////////////////////////////////////////////////////////////////////////////
 907     // Static Support JSFunctions for the XML Specification
 908     /////////////////////////////////////////////////////////////////////////////////////////////
 909
 910     // attempt to avoid these functions unless you *expect* the input to fall in the given range.
 911
 912     /** First Character of Name - XML Specification 1.0 [5] */
 913     private static final boolean Name(char c) {
 914         return BaseCharAscii(c) || c == '_' || c == ':' || Letter(c);
 915     }
 916
 917     /** NameChar - XML Specification 1.0 [4] */
 918     private static final boolean NameChar(char c) {
 919         return BaseCharAscii(c) || c == '.' || c == '-' || c == '_' || c == ':'
 920             || Digit(c) || Letter(c) || Extender(c); // TODO: || CombiningChar(c);
 921     }
 922
 923     /** BaseChar - XMl Specification 1.0 [84] */
 924     private static final boolean Letter(char c) {
 925         return BaseChar(c) || Ideographic(c);
 926     }
 927
 928     /** Elements of BaseChar that exist in ASCII. */
 929     private static final boolean BaseCharAscii(char c) {
 930         return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A');
 931     }
 932
 933     /** Char - XML Specification 1.0 [2] */
 934     private static final boolean Char(char c) {
 935         // u000A == r and u000D == n, but the javac compiler can't handle the \ u form
 936         return c == '\u0009' || c == '\r' || c == '\n'
 937             || (c >= '\u0020' && c <= '\uD7FF')
 938             || (c >= '\uE000' && c <= '\uFFFD');
 939     }
 940
 941     /** BaseChar - XML Specification 1.0 [85] */
 942     private static final boolean BaseChar(char c) {
 943         return  BaseCharAscii(c) || (c >= '\u00C0' && c <= '\u00D6')
 944             || (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131')
 945             || (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E')
 946             || (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5')
 947             || (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1')
 948             || (c == '\u0386')                  || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C')
 949             || (c >= '\u038E' && c <= '\u03A1') || (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6')
 950             || (c == '\u03DA')                  || (c == '\u03DC')                  || (c == '\u03DE')
 951             || (c == '\u03E0')
 952             || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F')
 953             || (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4')
 954             || (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB')
 955             || (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556')
 956             || (c == '\u0559')
 957             || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2')
 958             || (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7')
 959             || (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3')
 960             || (c == '\u06D5')
 961             || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939')
 962             || (c == '\u093D')
 963             || (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990')
 964             || (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0')
 965             || (c == '\u09B2')
 966             || (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DF' && c <= '\u09E1') || (c >= '\u09F0' && c <= '\u09F1')
 967             || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || (c >= '\u0A13' && c <= '\u0A28')
 968             || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || (c >= '\u0A35' && c <= '\u0A36')
 969             || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C')
 970             || (c == '\u0A5E')
 971             || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B')
 972             || (c == '\u0A8D')
 973             || (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0')
 974             || (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9')
 975             || (c == '\u0ABD')
 976             || (c == '\u0AE0')
 977             || (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28')
 978             || (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39')
 979             || (c == '\u0B3D')
 980             || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || (c >= '\u0B85' && c <= '\u0B8A')
 981             || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || (c >= '\u0B99' && c <= '\u0B9A')
 982             || (c == '\u0B9C')
 983             || (c >= '\u0B9E' && c <= '\u0B9F') || (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA')
 984             || (c >= '\u0BAE' && c <= '\u0BB5') || (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C')
 985             || (c >= '\u0C0E' && c <= '\u0C10') || (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33')
 986             || (c >= '\u0C35' && c <= '\u0C39') || (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C')
 987             || (c >= '\u0C8E' && c <= '\u0C90') || (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3')
 988             || (c >= '\u0CB5' && c <= '\u0CB9')
 989             || (c == '\u0CDE')
 990             || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || (c >= '\u0D0E' && c <= '\u0D10')
 991             || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || (c >= '\u0D60' && c <= '\u0D61')
 992             || (c >= '\u0E01' && c <= '\u0E2E')
 993             || (c == '\u0E30')
 994             || (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82')
 995             || (c == '\u0E84')
 996             || (c >= '\u0E87' && c <= '\u0E88')
 997             || (c == '\u0E8A')
 998             || (c == '\u0E8D')
 999             || (c >= '\u0E94' && c <= '\u0E97') || (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3')
1000             || (c == '\u0EA5')
1001             || (c == '\u0EA7')
1002             || (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE')
1003             || (c == '\u0EB0')
1004             || (c >= '\u0EB2' && c <= '\u0EB3')
1005             || (c == '\u0EBD')
1006             || (c >= '\u0EC0' && c <= '\u0EC4') || (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69')
1007             || (c >= '\u10A0' && c <= '\u10C5') || (c >= '\u10D0' && c <= '\u10F6')
1008             || (c == '\u1100')
1009             || (c >= '\u1102' && c <= '\u1103') || (c >= '\u1105' && c <= '\u1107')
1010             || (c == '\u1109')
1011             || (c >= '\u110B' && c <= '\u110C') || (c >= '\u110E' && c <= '\u1112')
1012             || (c == '\u113C')
1013             || (c == '\u113E')
1014             || (c == '\u1140')
1015             || (c == '\u114C')
1016             || (c == '\u114E')
1017             || (c == '\u1150')
1018             || (c >= '\u1154' && c <= '\u1155')
1019             || (c == '\u1159')
1020             || (c >= '\u115F' && c <= '\u1161')
1021             || (c == '\u1163')
1022             || (c == '\u1165')
1023             || (c == '\u1167')
1024             || (c == '\u1169')
1025             || (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173')
1026             || (c == '\u1175')
1027             || (c == '\u119E')
1028             || (c == '\u11A8')
1029             || (c == '\u11AB')
1030             || (c >= '\u11AE' && c <= '\u11AF') || (c >= '\u11B7' && c <= '\u11B8')
1031             || (c == '\u11BA')
1032             || (c >= '\u11BC' && c <= '\u11C2')
1033             || (c == '\u11EB')
1034             || (c == '\u11F0')
1035             || (c == '\u11F9')
1036             || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || (c >= '\u1F00' && c <= '\u1F15')
1037             || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || (c >= '\u1F48' && c <= '\u1F4D')
1038             || (c >= '\u1F50' && c <= '\u1F57')
1039             || (c == '\u1F59')
1040             || (c == '\u1F5B')
1041             || (c == '\u1F5D')
1042             || (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC')
1043             || (c == '\u1FBE')
1044             || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || (c >= '\u1FD0' && c <= '\u1FD3')
1045             || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || (c >= '\u1FF2' && c <= '\u1FF4')
1046             || (c >= '\u1FF6' && c <= '\u1FFC')
1047             || (c == '\u2126')
1048             || (c >= '\u212A' && c <= '\u212B')
1049             || (c == '\u212E')
1050             || (c >= '\u2180' && c <= '\u2182') || (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA')
1051             || (c >= '\u3105' && c <= '\u312C') || (c >= '\uAC00' && c <= '\uD7A3');
1052     }
1053
1054     /** BaseChar - XMl Specification 1.0 [86] */
1055     private static final boolean Ideographic(char c) {
1056         return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029');
1057     }
1058
1059     /** CombiningChar - XMl Specification 1.0 [87] */
1060     /*private static final boolean CombiningChar(char c) {
1061         return (c >= '\u0300' && c <= '\u0345')
1062             || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || (c >= '\u0591' && c <= '\u05A1')
1063             || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD')
1064             || (c == '\u05BF')
1065             || (c >= '\u05C1' && c <= '\u05C2')
1066             || (c == '\u05C4')
1067             || (c >= '\u064B' && c <= '\u0652')
1068             || (c == '\u0670')
1069             || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4')
1070             || (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903')
1071             || (c == '\u093C')
1072             || (c >= '\u093E' && c <= '\u094C')
1073             || (c == '\u094D')
1074             || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || (c >= '\u0981' && c <= '\u0983')
1075             || (c == '\u09BC')
1076             || (c == '\u09BE')
1077             || (c == '\u09BF')
1078             || (c >= '\u09C0' && c <= '\u09C4') || (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD')
1079             || (c == '\u09D7')
1080             || (c >= '\u09E2' && c <= '\u09E3')
1081             || (c == '\u0A02')
1082             || (c == '\u0A3C')
1083             || (c == '\u0A3E')
1084             || (c == '\u0A3F')
1085             || (c >= '\u0A40' && c <= '\u0A42') || (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D')
1086             || (c >= '\u0A70' && c <= '\u0A71') || (c >= '\u0A81' && c <= '\u0A83')
1087             || (c == '\u0ABC')
1088             || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || (c >= '\u0ACB' && c <= '\u0ACD')
1089             || (c >= '\u0B01' && c <= '\u0B03')
1090             || (c == '\u0B3C')
1091             || (c >= '\u0B3E' && c <= '\u0B43') || (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D')
1092             || (c >= '\u0B56' && c <= '\u0B57') || (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2')
1093             || (c >= '\u0BC6' && c <= '\u0BC8') || (c >= '\u0BCA' && c <= '\u0BCD')
1094             || (c == '\u0BD7')
1095             || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || (c >= '\u0C46' && c <= '\u0C48')
1096             || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || (c >= '\u0C82' && c <= '\u0C83')
1097             || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || (c >= '\u0CCA' && c <= '\u0CCD')
1098             || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || (c >= '\u0D3E' && c <= '\u0D43')
1099             || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D')
1100             || (c == '\u0D57')
1101             || (c == '\u0E31')
1102             || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E')
1103             || (c == '\u0EB1')
1104             || (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD')
1105             || (c >= '\u0F18' && c <= '\u0F19')
1106             || (c == '\u0F35')
1107             || (c == '\u0F37')
1108             || (c == '\u0F39')
1109             || (c == '\u0F3E')
1110             || (c == '\u0F3F')
1111             || (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95')
1112             || (c == '\u0F97')
1113             || (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7')
1114             || (c == '\u0FB9')
1115             || (c >= '\u20D0' && c <= '\u20DC')
1116             || (c == '\u20E1')
1117             || (c >= '\u302A' && c <= '\u302F')
1118             || (c == '\u3099')
1119             || (c == '\u309A');
1120     }*/
1121
1122     /** Digit - XMl Specification 1.0 [88] */
1123     private static final boolean Digit(char c) {
1124         return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9')
1125             || (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F')
1126             || (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF')
1127             || (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F')
1128             || (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29');
1129     }
1130
1131     /** Extender - XMl Specification 1.0 [89] */
1132     private static final boolean Extender(char c) {
1133         return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387'
1134             || c == '\u0640' || c == '\u0E46' || c == '\u0EC6' || c == '\u3005'
1135             || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE');
1136     }
1137
1138     /** Whitespace - XML Specification 1.0 [3] */
1139     private static final boolean S(char c) {
1140         return c == '\u0020' || c == '\u0009' || c == '\r' || c == '\n';
1141     }
1142 }