1 // Copyright (C) 2003 Adam Megacz <adam@xwt.org> all rights reserved.
3 // You may modify, copy, and redistribute this code under the terms of
4 // the GNU Library Public License version 2.1, with the exception of
5 // the portion of clause 6a after the semicolon (aka the "obnoxious
10 import java.io.Reader;
11 import java.io.IOException;
12 import java.io.EOFException;
15 * An Event-Driving, Non-Validating XML Parser with Namespace support.
17 * A subclass can implement the abstract functions for receiving details
18 * about an xml file as it is parsed. To initate a parse, use the parse()
21 * <h3>IMPLEMENTATION NOTES</h3>
22 * <p>As the parser traverses into an element, it adds it to the linked list
23 * called <tt>elements</tt>. However, <tt>elements</tt> has been pre-filled
24 * with instances of the Element inner class. So in the vast majority of
25 * cases, the pointer current is moved along one, and the values for the
26 * new element are filled into the current object.</p>
28 * <p>This parser supports all the unicode ranges required by the XML
29 * Specification. However, it is optimised for well-formed ASCII documents.
30 * Documents containing unicode Names and Attributes will take much longer
31 * to process, and invalid documents (badly formed Names or invalid attributes)
32 * will be run through a test on every single unicode character range before
33 * being declared invalid.</p>
35 * <h3>IMPLEMENTATION RULES</h3>
37 * <li>Each time the buffer offset <tt>off</tt> is moved, the length
38 * <tt>len</tt> must be decreased.</li>
39 * <li>Each time the buffer length is decreased, it must be checked to make
40 * sure it is >0.</li>
43 * <h3>Other Notes</h3>
45 * <li><i>error</i> is defined as a Validity Constraint Violation and is recoverable</li>
46 * <li><i>fatal error</i> is defined as a Well-formedness Constraint Violation and is not recoverable</li>
49 * @author David Crawshaw
50 * @see XML-Specification-1.0 http://w3.org/TR/REC-xml
52 public abstract class XML
54 /////////////////////////////////////////////////////////////////////////////////////////////
56 /////////////////////////////////////////////////////////////////////////////////////////////
58 public static final int BUFFER_SIZE = 255;
60 /** static pool of XML.Element instances shared by all XML Parsers.
61 * elements in the queue have dirty prev and next references, that need cleaning before use. */
62 private static final Queue elements = new Queue(30);
64 private static final char[] single_amp = new char[] { '&' };
65 private static final char[] single_apos = new char[] { '\'' };
66 private static final char[] single_gt = new char[] { '>' };
67 private static final char[] single_lt = new char[] { '<' };
68 private static final char[] single_quot = new char[] { '"' };
78 private Element current;
80 // used in readEntity() to process a single character without creating a new array
81 private char[] singlechar = new char[1];
84 public XML() { this(BUFFER_SIZE); }
86 public XML(int bSize) {
87 buf = new char[bSize];
89 current = (Element)elements.remove(false);
90 if (current == null) current = new Element();
91 current.prev = current.next = null;
95 /** Returns the line number at the beginning of the last process call. */
96 public int getLine() { return line; }
98 /** Returns the column number at the beginning of the last process call. */
99 public int getCol() { return col; }
102 * Parse given input and call the abstract event functions.
104 * Careful with threading, as this function is not synchronized.
106 public final void parse(Reader reader) throws IOException, XMLException {
111 clean(); // clean up possible mid-way linked-list element
114 // process the stream
117 if (current.qName == null) break;
118 throw new WFCException("reached eof without closing <"+current.qName+"> element", getLine(), getCol());
121 if (buf[off] == '<') readTag();
122 readChars(current.qName != null);
124 } finally { clean(); } // clean up elements
127 /** remove any leftover elements from the linked list and queue them */
128 private final void clean() {
129 while (current.prev != null) elements.append((current = current.prev).next);
131 current.qName = null;
134 /** reads in a tag. expects <tt>buf[off] == '<'</tt> */
135 private final void readTag() throws IOException, XMLException {
136 // Start Tag '<' Name (S Attribute)* S? '>'
137 boolean starttag = true;
139 // End Tag '</' Name S? '>'
140 boolean endtag = false;
142 // if (starttag & endtag) then: EmptyElemTag '<' Name (S Attribute)* S? '/>'
144 // Position in the name of the ':' namespace prefix
150 if (!buffer(1)) throw new EOFException("Unexpected EOF processing element tag");
152 // work out what we can from the beginning of the tag
155 // definitions here don't necessarily conform to xml spec (as DTDs not yet implemented)
157 if (!buffer(4)) throw new EOFException("Unexpected EOF processing <! element");
162 if (buf[off+1] != '-') { bad = true; break; }
163 col += 2; off += 2; len -= 2;
165 // Comment '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
166 readChars(false, "-->", false);
167 col += 3; off += 3; len -= 3;
170 // we don't care about the following definitions
174 || buf[off+1] != 'T' || buf[off+2] != 'T' || buf[off+3] != 'L'
175 || buf[off+4] != 'I' || buf[off+5] != 'S' || buf[off+6] != 'T') {
178 col += 7; off += 7; len -= 7;
180 // ATTLIST '<!ATTLIST' (Char* - '>') '>'
181 readChars(false, ">", true);
186 || buf[off+1] != 'O' || buf[off+2] != 'C' || buf[off+3] != 'T'
187 || buf[off+4] != 'Y' || buf[off+5] != 'P' || buf[off+6] != 'E') {
190 col += 7; off += 7; len -= 7;
192 // DTD '<!DOCTYPE' (Char* - '>') '>'
193 readChars(false, ">", true);
199 } else if (buf[off+1] == 'L' && buf[off+2] == 'E' && buf[off+3] == 'M'
200 && buf[off+4] == 'E' && buf[off+5] == 'N' && buf[off+6] == 'T') {
201 // ELEMENT '<!ELEMENT' (Char* - '>') '>'
202 readChars(false, ">", true);
205 } else if (buf[off+1] == 'N' && buf[off+2] == 'T' && buf[off+3] == 'I'
206 && buf[off+4] == 'T' && buf[off+5] == 'Y') {
207 // ENTITY '<!ENTITY' (Char* - '>') '>'
208 readChars(false, ">", true);
218 || buf[off+1] != 'O' || buf[off+2] != 'T' || buf[off+3] != 'A' || buf[off+4] != 'T'
219 || buf[off+5] != 'I' || buf[off+6] != 'O' || buf[off+7] != 'N') {
222 col += 8; off += 8; len -= 8;
223 // NOTATION '<!NOTATION' (Char* - '>') '>'
224 readChars(false, ">", true);
231 if (bad) throw new MarkupException("element tag start character is invalid", getLine(), getCol());
233 } else if (s == '?') {
234 // PI (Ignored) '<?' (Char* - (Char* '?>' Char*)) '?>'
236 readChars(false, "?>", true);
237 if (!buffer(2)) throw new EOFException("Unexpected EOF at end of Processing Instruction");
238 col += 2; off += 2; len -= 2;
240 } else if (s == '[') {
242 || buf[off+1] != 'C' || buf[off+2] != 'D' || buf[off+3] != 'A'
243 || buf[off+4] != 'T' || buf[off+5] != 'A' || buf[off+6] != '[') {
245 // Conditional '<![' (Char* - (Char* ']]>' Char*)) ']]>'
246 readChars(false, "]]>", false);
248 col += 7; off += 7; len -=7;
249 // CDATA '<![CDATA[' (Char* - (Char* ']]>' Char*)) ']]>'
250 readChars(true, "]]>", false);
252 col += 3; off += 3; len -= 3;
255 // End Tag '</' Name S? '>'
260 if (!buffer(1)) throw new EOFException("Unexpected EOF processing end tag");
264 if (!Name(s)) throw new MarkupException("invalid starting character in element name", getLine(), getCol());
266 // find the element name (defined in XML Spec: section 2.3)
267 for (namelen = 0; ; namelen++) {
268 if (!buffer(namelen+1)) throw new EOFException("Unexpected EOF in element tag name");
270 s = buf[off+namelen];
272 if (S(s) || s == '>') {
274 } else if (s == '/') {
277 } else if (s == ':' && namelen > 0 && prefix < 1) {
278 // we have a definition of the prefix range available
280 } else if (!NameChar(s)) {
281 throw new MarkupException("element name contains invalid character", getLine(), getCol());
285 // process name (based on calculated region)
286 if (namelen < 1) throw new MarkupException("element name is null", getLine(), getCol());
288 // we have marked out the name region, so turn it into a string and move on
289 String qName = new String(buf, off, namelen);
291 col += namelen; off += namelen; len -= namelen;
294 // create the in-memory element representation of this beast
295 // if current.qName == null then this is the root element we're dealing with
296 if (current.qName != null) {
297 if (current.next == null) {
298 // we're at the end of the default element depth
299 current.next = (Element)elements.remove(false);
300 if (current.next == null) current.next = new Element();
301 current.next.prev = current;
302 current.next.next = null;
304 current = current.next;
308 current.qName = qName;
309 current.defaultUri = current.uri = null;
312 current.prefix = current.qName.substring(0, prefix);
313 current.localName = current.qName.substring(prefix+1);
315 current.prefix = null;
316 current.localName = current.qName;
319 // process attributes
321 if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 1");
322 while (buf[off] != '/' && buf[off] != '>') {
324 if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 2");
328 // inherit namespace default uri if attribute was not provided
329 if (current.defaultUri == null) {
330 current.defaultUri = (current.prev != null) ? current.prev.defaultUri : null;
333 // work out the uri of this element
334 if (current.prefix == null) {
335 // element has no prefix, therefore is the default uri
336 current.uri = current.defaultUri;
338 // work back through the hashtables until uri is found
339 for (Element e = current; e != null && current.uri == null; e = e.prev) {
340 current.uri = (String)e.urimap.get(current.prefix);
342 if (current.uri == null) current.addError(new NCException("undefined prefix '"+current.prefix+"'", getLine(), getCol()));
346 // this is an end-of-element tag
347 if (!qName.equals(current.qName)) throw new WFCException(
348 "end tag </"+qName+"> does not line up with start tag <"+current.qName+">", getLine(), getCol()
352 // deal with whitespace
356 if (!buffer(1)) throw new EOFException("Unexpected EOF before end of tag");
357 if (buf[off] == '/') {
361 if (!buffer(1)) throw new EOFException("Unexpected EOF before end of endtag");
362 if (buf[off] == '>') {
365 throw new MarkupException("missing '>' character from element '"+qName+"'", getLine(), getCol());
368 // send element signals
369 if (starttag) startElement(current);
373 // we just closed an element, so remove it from the element 'stack'
374 if (current.prev == null) {
375 // we just finished the root element
376 current.qName = null;
378 elements.append((current = current.prev).next);
385 /** reads in an attribute of an element. expects Name(buf[off]) */
386 private final void readAttribute() throws IOException, XMLException {
389 String n, v, p, u; // attribute name, value, prefix and uri respectively
390 n = v = p = u = null;
393 // find the element name (defined in XML Spec: section 2.3)
394 for (ref= 0; ; ref++) {
395 if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in read attribute loop part 1");
399 if (s == '=' || S(s)) {
401 } else if (s == ':' && ref > 0 && prefix < 1) {
402 // we have a definition of the prefix range available
404 } else if (!NameChar(s)) {
405 throw new MarkupException("attribute name contains invalid characters", getLine(), getCol());
409 // determine prefix and key name
411 p = new String(buf, off, prefix-1);
412 col += prefix; off += prefix; len -= prefix; ref -= prefix;
414 n = new String(buf, off, ref);
415 col += ref; off += ref; len -= ref;
417 // find name/value divider ('=')
419 if (!buffer(1)) throw new EOFException("Unexpected EOF before attribute '=' divider");
420 if (buf[off] != '=') throw new MarkupException("attribute name not followed by '=' sign", getLine(), getCol());
425 if (!buffer(1)) throw new EOFException("Unexpected EOF after attribute '=' divider");
428 if (buf[off] == '\'' || buf[off] == '"') {
431 throw new MarkupException("attribute '"+n+"' must have attribute wrapped in ' or \"", getLine(), getCol());
435 // find the attribute value
436 attval: for (ref = 0; ; ref++) {
437 if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in attribute value");
439 if (buf[off+ref] == wrap) {
441 } else if (buf[off+ref] == '<') {
442 throw new WFCException("attribute value for '"+n+"' must not contain '<'", getLine(), getCol());
446 v = new String(buf, off, ref);
447 col += ref; off += ref; len -= ref;
449 // remove end wrapper character
453 if (p != null && p.equals("xmlns")) {
454 current.urimap.put(n, v);
455 } else if (n.equals("xmlns")) {
456 if (current.defaultUri != null) {
457 current.addError(new NCException("default namespace definition repeated", getLine(), getCol()));
459 current.defaultUri = v;
462 // find attribute uri
464 for (Element e = current; e != null && u == null; e = e.prev) { u = e.uri; }
466 for (Element e = current; e != null && u == null; e = e.prev) { u = (String)e.urimap.get(p); }
467 if (u == null) current.addError(new NCException("undefined attribute prefix '"+current.prefix+"'", getLine(), getCol()));
470 // check to see if attribute is a repeat
471 for (int i=0; current.len > i; i++) if (n.equals(current.keys[i]) && u.equals(current.uris[i])) throw new WFCException(
472 "attribute name '"+n+"' may not appear more than once in the same element tag", getLine(), getCol()
475 // add attribute to the attribute arrays
476 if (current.len == current.keys.length) current.morekeys();
477 current.keys[current.len] = n;
478 current.vals[current.len] = v;
479 current.uris[current.len] = u;
484 /** reads an entity and processes out its value. expects buf[off] == '&' */
485 private final void readEntity() throws IOException, XMLException {
487 if (!buffer(2)) throw new EOFException("Unexpected EOF reading entity");
489 boolean unknown = false;
495 if (buf[off] == 'x') { off++; len--; radix = 16; } else { radix = 10; }
498 // read in each char, then shift total value to the left and add the extra
499 // style of loop is slightly different from all the others, as this should run a limited number of times
500 findchar: while (true) {
501 if (!buffer(1)) throw new EOFException("Unexpected EOF reading entity");
502 int d = Character.digit(buf[off], radix);
504 if (buf[off] != ';') throw new WFCException("illegal characters in entity reference", getLine(), getCol());
513 singlechar[0] = Character.forDigit(c, radix);
514 characters(singlechar, 0, 1);
518 if (buffer(4) && buf[off+1] == 'm' && buf[off+2] == 'p' && buf[off+3] == ';') {
519 characters(single_amp, 0, 1); // &
520 off += 4; len -= 4; col++;
521 } else if (buffer(5) && buf[off+1] == 'p' && buf[off+2] == 'o' && buf[off+3] == 's' && buf[off+4] == ';') {
522 characters(single_apos, 0, 1); // '
523 off += 5; len -= 5; col++;
530 if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
531 characters(single_gt, 0, 1); // >
532 off += 3; len -= 3; col++;
539 if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
540 characters(single_lt, 0, 1); // <
541 off += 3; len -= 3; col++;
548 if (buffer(5) && buf[off+1] == 'u' && buf[off+2] == 'o' && buf[off+3] == 't' && buf[off+4] == ';') {
549 characters(single_quot, 0, 1); // "
550 off += 5; len -= 5; col++;
556 // TODO: check a parser-level Hash of defined entities
559 if (unknown) throw new WFCException("unknown entity (<!ENTITY> not supported)", getLine(), getCol());
562 /** reads until the passed string is encountered. */
563 private final void readChars(boolean p, String match, boolean entities) throws IOException, XMLException {
565 char[] end = match.toCharArray();
567 for (boolean more = true; more;) {
568 if (!buffer(1)) return;
570 buf: for (ref = 0; ref < len; ref++) {
571 switch (buf[off+ref]) {
572 case '\r': // windows or macos9 newline
573 // normalise and process
574 buf[off+ref] = '\n'; ref++;
575 if (p) characters(buf, off, ref);
576 off += ref; len -= ref; ref = -1;
579 // windows double-char newline; skip the next char
580 if (!buffer(1)) return;
581 if (buf[off] == '\n') { off++; len--; }
584 case '\n': // unix newline
586 if (p) characters(buf, off, ref);
587 off += ref; len -= ref; ref = -1;
594 if (ref > 0) characters(buf, off, ref);
595 off += ref; len -= ref; ref = -1;
602 if (!buffer(ref+end.length)) continue buf;
603 for (int i=0; end.length > i; i++) if (end[i] != buf[off+ref+i]) continue buf;
609 if (p && ref > 0) characters(buf, off, ref);
610 off += ref; len -= ref; col += ref;
615 * reads until a <tt><</tt> symbol is encountered
616 * @param p If true call the characters(char[],int,int) funciton for the processed characters
618 private final void readChars(boolean p) throws IOException, XMLException {
621 for (boolean more = true; more;) {
622 if (!buffer(1)) return;
624 buf: for (ref = 0; ref < len; ref++) {
625 switch (buf[off+ref]) {
626 case '\r': // windows or macos9 newline
627 // normalise and process
628 buf[off+ref] = '\n'; ref++;
629 if (p) characters(buf, off, ref);
630 off += ref; len -= ref; ref = -1;
633 // windows double-char newline; skip the next char
634 if (!buffer(1)) return;
635 if (buf[off] == '\n') { off++; len--; }
638 case '\n': // unix newline
640 if (p) characters(buf, off, ref);
641 off += ref; len -= ref; ref = -1;
647 if (ref > 0) characters(buf, off, ref);
648 off += ref; len -= ref; ref = -1;
653 case '<': // end of chars section
659 if (p && ref > 0) characters(buf, off, ref);
660 off += ref; len -= ref; col += ref;
664 /** reads until a non-whitespace symbol is encountered */
665 private final void readWhitespace() throws IOException, XMLException {
668 for (boolean more = true; more;) {
669 if (!buffer(1)) return;
671 buf: for (ref = 0; ref < len; ref++) {
672 switch (buf[off+ref]) {
673 case '\r': // windows or macos9 newline
674 // normalise and process
676 whitespace(buf, off, ++ref);
677 off += ref; len -= ref; ref = -1;
680 // windows double-char newline; skip the next char
681 if (!buffer(1)) return;
682 if (buf[off] == '\n') { off++; len--; }
685 case '\n': // unix newline
686 whitespace(buf, off, ++ref);
687 off += ref; len -= ref; ref = -1;
695 default: // end of whitespace
701 off += ref; len -= ref; col += ref;
706 * attempt to fill the buffer.
708 * @param min Minimum number of characters to read (even if we have to block to do it).
709 * @return return false if min can't be reached.
711 private final boolean buffer(int min) throws IOException {
712 if (len > min) return true;
714 if (buf.length - (off+len) >= min) {
715 // plenty of space left on the end of the buffer
716 } else if (off >= min) {
717 // moving offset data to start will leave enough free space on the end
718 System.arraycopy(buf, off, buf, 0, len);
721 // buffer size will have to be increased
722 char[] newbuf = new char[buf.length * 2];
723 System.arraycopy(buf, off, newbuf, 0, len);
729 int newlen = in.read(buf, off+len, buf.length-(off+len));
730 if (newlen < 0) return false;
738 /////////////////////////////////////////////////////////////////////////////////////////////
739 // Abstract SAX-Like Interface
740 /////////////////////////////////////////////////////////////////////////////////////////////
743 * Called when the start of an element is processed.
745 * <p>The array of Attribute names and values may be longer than the
746 * number of entries they contain, but all the entries will be
747 * packed at the top.</p>
749 * <p><b>DO NOT</b> store a reference to the attribute arrays, as
750 * they are reused by other elements.</p>
752 public abstract void startElement(Element e) throws SchemaException;
755 * Represents a line of character data.
757 * <p>Newlines are all normalised to the Unix \n as per the XML Spec,
758 * and a newline will only appear as the last character in the passed
761 * <p>XML.getLine() and XML.getCol() report the position at the
762 * beginning of this character segment, which can be processed in a
763 * line-by-line fashion due to the above newline restriction.</p>
765 public abstract void characters(char[] ch, int start, int length) throws SchemaException;
767 /** Represents a line of ignorable whitespace. */
768 public abstract void whitespace(char[] ch, int start, int length) throws SchemaException;
770 /** Represents the end of an Element. */
771 public abstract void endElement(Element e) throws SchemaException;
774 /////////////////////////////////////////////////////////////////////////////////////////////
775 // Inner Classes for Parser Support
776 /////////////////////////////////////////////////////////////////////////////////////////////
779 * Used as a struct for holding information about a current element,
780 * and acts as a linked list entry.
782 * <p>Each element stores a hashtable of namespace definitions against
783 * their respective prefix, and a variable holding their default
784 * uri. If they did not specify a default uri, their
785 * parent's uri is copied in to keep up the sembelence of speedy
788 * <h3>SLOWEST PART OF THE XML PARSER</h3>
789 * <p>To implement the Namespace Specification exactly, we have to
790 * store prefix mappings for elements away from its parents and
791 * siblings. This means if a child of a child of-a child uses
792 * a prefix defined in the root, we have to search each Hashtable
793 * in each Element until we get to the root.</p>
795 * <p>Unfortunetally, every other solution I can think of requires
796 * more work than this one, shifted to different parts of the
799 public static final class Element
801 public Element next, prev;
803 /** A hashtable of all namespace prefixes that are defined by this element. */
806 /** An array of attribute names. */
807 public String[] keys;
809 /** An array of attribute values. */
810 public String[] vals;
812 /** An array of attribute uris. */
813 public String[] uris;
815 /** An array of non-fatal errors related to this element. */
816 public XMLException[] errors;
818 /** Current number of attributes in the <tt>keys</tt> and <tt>vals</tt> arrays. */
821 /** Default URI for this element and its children with no prefix. */
822 public String defaultUri;
824 /** URI of current tag. XML Namespace Spec 14-Jan-1999 section 1 */
827 /** LocalPart of current element. XML Namespace Spec 14-Jan-1999 [8] */
828 public String localName;
830 /** Qualified Name of current element. XML Namespace Spec 14-Jan-1999 [6] */
833 /** Prefix of current element. Substring of qName. XML Namespace Spec 14-Jan-1999 [7] */
834 public String prefix;
837 defaultUri = uri = prefix = localName = qName = null;
838 urimap = new Hash(3,3);
839 keys = new String[10];
840 vals = new String[10];
841 uris = new String[10];
842 errors = new XMLException[] {};
846 /** increase the size of the attributes arrays */
848 String[] newkeys = new String[keys.length+5];
849 String[] newvals = new String[vals.length+5];
850 String[] newuris = new String[uris.length+5];
851 System.arraycopy(keys, 0, newkeys, 0, keys.length);
852 System.arraycopy(vals, 0, newvals, 0, vals.length);
853 System.arraycopy(uris, 0, newuris, 0, uris.length);
854 keys = newkeys; vals = newvals; uris = newuris;
857 /** empty out the arrays */
859 if (keys.length != vals.length || vals.length != uris.length) {
860 keys = new String[10]; vals = new String[10]; uris = new String[10];
862 for (int i=0; keys.length > i; i++) { keys[i] = null; vals[i] = null; uris[i] = null; }; len = 0;
864 errors = new XMLException[] {};
867 /** add an error to the errors array */
868 void addError(XMLException e) {
869 // it doesn't really matter about continually expanding the array, as this case is quite rare
870 XMLException[] newe = new XMLException[errors.length+1];
871 System.arraycopy(errors, 0, newe, 0, errors.length);
872 newe[errors.length] = e;
877 /** Parse or Structural Error */
878 public static class XMLException extends Exception
882 private String error;
884 public XMLException(String e) { this(e, -1, -1); }
886 public XMLException(String e, int l, int c) {
892 public int getLine() { return this.line; }
893 public int getCol() { return this.col; }
894 public String getMessage() { return this.error; }
897 /** Violation of Markup restrictions in XML Specification - Fatal Error */
898 public static class MarkupException extends XMLException { public MarkupException(String e, int l, int c) { super(e,l,c); } }
900 /** Well-Formedness Constraint Violation - Fatal Error */
901 public static final class WFCException extends MarkupException { public WFCException(String e, int l, int c) { super(e,l,c); } }
903 /** Namespace Constraint Violation - Recoverable Error */
904 public static final class NCException extends XMLException { public NCException(String e, int l, int c) { super(e,l,c); } }
906 /** Schema Violation - Fatal Error */
907 public static class SchemaException extends XMLException {
908 public SchemaException(String e) { this(e, -1, -1); }
909 public SchemaException(String e, int l, int c) { super(e,l,c); }
913 /////////////////////////////////////////////////////////////////////////////////////////////
914 // Static Support JSFunctions for the XML Specification
915 /////////////////////////////////////////////////////////////////////////////////////////////
917 // attempt to avoid these functions unless you *expect* the input to fall in the given range.
919 /** First Character of Name - XML Specification 1.0 [5] */
920 private static final boolean Name(char c) {
921 return BaseCharAscii(c) || c == '_' || c == ':' || Letter(c);
924 /** NameChar - XML Specification 1.0 [4] */
925 private static final boolean NameChar(char c) {
926 return BaseCharAscii(c) || c == '.' || c == '-' || c == '_' || c == ':'
927 || Digit(c) || Letter(c) || Extender(c); // TODO: || CombiningChar(c);
930 /** BaseChar - XMl Specification 1.0 [84] */
931 private static final boolean Letter(char c) {
932 return BaseChar(c) || Ideographic(c);
935 /** Elements of BaseChar that exist in ASCII. */
936 private static final boolean BaseCharAscii(char c) {
937 return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A');
940 /** Char - XML Specification 1.0 [2] */
941 private static final boolean Char(char c) {
942 // u000A == r and u000D == n, but the javac compiler can't handle the \ u form
943 return c == '\u0009' || c == '\r' || c == '\n'
944 || (c >= '\u0020' && c <= '\uD7FF')
945 || (c >= '\uE000' && c <= '\uFFFD');
948 /** BaseChar - XML Specification 1.0 [85] */
949 private static final boolean BaseChar(char c) {
950 return BaseCharAscii(c) || (c >= '\u00C0' && c <= '\u00D6')
951 || (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131')
952 || (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E')
953 || (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5')
954 || (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1')
955 || (c == '\u0386') || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C')
956 || (c >= '\u038E' && c <= '\u03A1') || (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6')
957 || (c == '\u03DA') || (c == '\u03DC') || (c == '\u03DE')
959 || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F')
960 || (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4')
961 || (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB')
962 || (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556')
964 || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2')
965 || (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7')
966 || (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3')
968 || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939')
970 || (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990')
971 || (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0')
973 || (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DF' && c <= '\u09E1') || (c >= '\u09F0' && c <= '\u09F1')
974 || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || (c >= '\u0A13' && c <= '\u0A28')
975 || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || (c >= '\u0A35' && c <= '\u0A36')
976 || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C')
978 || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B')
980 || (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0')
981 || (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9')
984 || (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28')
985 || (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39')
987 || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || (c >= '\u0B85' && c <= '\u0B8A')
988 || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || (c >= '\u0B99' && c <= '\u0B9A')
990 || (c >= '\u0B9E' && c <= '\u0B9F') || (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA')
991 || (c >= '\u0BAE' && c <= '\u0BB5') || (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C')
992 || (c >= '\u0C0E' && c <= '\u0C10') || (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33')
993 || (c >= '\u0C35' && c <= '\u0C39') || (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C')
994 || (c >= '\u0C8E' && c <= '\u0C90') || (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3')
995 || (c >= '\u0CB5' && c <= '\u0CB9')
997 || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || (c >= '\u0D0E' && c <= '\u0D10')
998 || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || (c >= '\u0D60' && c <= '\u0D61')
999 || (c >= '\u0E01' && c <= '\u0E2E')
1001 || (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82')
1003 || (c >= '\u0E87' && c <= '\u0E88')
1006 || (c >= '\u0E94' && c <= '\u0E97') || (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3')
1009 || (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE')
1011 || (c >= '\u0EB2' && c <= '\u0EB3')
1013 || (c >= '\u0EC0' && c <= '\u0EC4') || (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69')
1014 || (c >= '\u10A0' && c <= '\u10C5') || (c >= '\u10D0' && c <= '\u10F6')
1016 || (c >= '\u1102' && c <= '\u1103') || (c >= '\u1105' && c <= '\u1107')
1018 || (c >= '\u110B' && c <= '\u110C') || (c >= '\u110E' && c <= '\u1112')
1025 || (c >= '\u1154' && c <= '\u1155')
1027 || (c >= '\u115F' && c <= '\u1161')
1032 || (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173')
1037 || (c >= '\u11AE' && c <= '\u11AF') || (c >= '\u11B7' && c <= '\u11B8')
1039 || (c >= '\u11BC' && c <= '\u11C2')
1043 || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || (c >= '\u1F00' && c <= '\u1F15')
1044 || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || (c >= '\u1F48' && c <= '\u1F4D')
1045 || (c >= '\u1F50' && c <= '\u1F57')
1049 || (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC')
1051 || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || (c >= '\u1FD0' && c <= '\u1FD3')
1052 || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || (c >= '\u1FF2' && c <= '\u1FF4')
1053 || (c >= '\u1FF6' && c <= '\u1FFC')
1055 || (c >= '\u212A' && c <= '\u212B')
1057 || (c >= '\u2180' && c <= '\u2182') || (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA')
1058 || (c >= '\u3105' && c <= '\u312C') || (c >= '\uAC00' && c <= '\uD7A3');
1061 /** BaseChar - XMl Specification 1.0 [86] */
1062 private static final boolean Ideographic(char c) {
1063 return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029');
1066 /** CombiningChar - XMl Specification 1.0 [87] */
1067 /*private static final boolean CombiningChar(char c) {
1068 return (c >= '\u0300' && c <= '\u0345')
1069 || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || (c >= '\u0591' && c <= '\u05A1')
1070 || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD')
1072 || (c >= '\u05C1' && c <= '\u05C2')
1074 || (c >= '\u064B' && c <= '\u0652')
1076 || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4')
1077 || (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903')
1079 || (c >= '\u093E' && c <= '\u094C')
1081 || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || (c >= '\u0981' && c <= '\u0983')
1085 || (c >= '\u09C0' && c <= '\u09C4') || (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD')
1087 || (c >= '\u09E2' && c <= '\u09E3')
1092 || (c >= '\u0A40' && c <= '\u0A42') || (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D')
1093 || (c >= '\u0A70' && c <= '\u0A71') || (c >= '\u0A81' && c <= '\u0A83')
1095 || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || (c >= '\u0ACB' && c <= '\u0ACD')
1096 || (c >= '\u0B01' && c <= '\u0B03')
1098 || (c >= '\u0B3E' && c <= '\u0B43') || (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D')
1099 || (c >= '\u0B56' && c <= '\u0B57') || (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2')
1100 || (c >= '\u0BC6' && c <= '\u0BC8') || (c >= '\u0BCA' && c <= '\u0BCD')
1102 || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || (c >= '\u0C46' && c <= '\u0C48')
1103 || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || (c >= '\u0C82' && c <= '\u0C83')
1104 || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || (c >= '\u0CCA' && c <= '\u0CCD')
1105 || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || (c >= '\u0D3E' && c <= '\u0D43')
1106 || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D')
1109 || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E')
1111 || (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD')
1112 || (c >= '\u0F18' && c <= '\u0F19')
1118 || (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95')
1120 || (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7')
1122 || (c >= '\u20D0' && c <= '\u20DC')
1124 || (c >= '\u302A' && c <= '\u302F')
1129 /** Digit - XMl Specification 1.0 [88] */
1130 private static final boolean Digit(char c) {
1131 return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9')
1132 || (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F')
1133 || (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF')
1134 || (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F')
1135 || (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29');
1138 /** Extender - XMl Specification 1.0 [89] */
1139 private static final boolean Extender(char c) {
1140 return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387'
1141 || c == '\u0640' || c == '\u0E46' || c == '\u0EC6' || c == '\u3005'
1142 || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE');
1145 /** Whitespace - XML Specification 1.0 [3] */
1146 private static final boolean S(char c) {
1147 return c == '\u0020' || c == '\u0009' || c == '\r' || c == '\n';