From: david Date: Fri, 30 Jan 2004 06:59:16 +0000 (+0000) Subject: 2003/04/24 14:33:29 X-Git-Tag: RC3~1034 X-Git-Url: http://git.megacz.com/?p=org.ibex.core.git;a=commitdiff_plain;h=2d2a6c22bb01be755934af5418e4a490a038c9dc 2003/04/24 14:33:29 darcs-hash:20040130065916-0c9ea-f8d7ce1d5e2817dc300129904109b1ab7088fd14.gz --- diff --git a/src/org/xwt/util/Vec.java b/src/org/xwt/util/Vec.java index 7b55023..d4b45f7 100644 --- a/src/org/xwt/util/Vec.java +++ b/src/org/xwt/util/Vec.java @@ -71,6 +71,12 @@ public class Vec implements Serializable { out[i] = store[i]; } + public void fromArray(Object[] in) { + setSize(in.length); + for(int i=0; i= size || i < 0) throw new RuntimeException("tried to remove an element outside the vector's limits"); for(int j=i; jIMPLEMENTATION NOTES + *

As the parser traverses into an element, it adds it to the linked list + * called elements. However, elements has been pre-filled + * with instances of the Element inner class. So in the vast majority of + * cases, the pointer current is moved along one, and the values for the + * new element are filled into the current object.

+ * + *

This parser supports all the unicode ranges required by the XML + * Specification. However, it is optimised for well-formed ASCII documents. + * Documents containing unicode Names and Attributes will take much longer + * to process, and invalid documents (badly formed Names or invalid attributes) + * will be run through a test on every single unicode character range before + * being declared invalid.

+ * + *

IMPLEMENTATION RULES

+ *
    + *
  • Each time the buffer offset off is moved, the length + * len must be decreased.
  • + *
  • Each time the buffer length is decreased, it must be checked to make + * sure it is >0.
  • + *
+ * + *

Other Notes

+ *
    + *
  • error is defined as a Validity Constraint Violation and is recoverable
  • + *
  • fatal error is defined as a Well-formedness Constraint Violation and is not recoverable
  • + *
+ * + * @author David Crawshaw + * @see XML-Specification-1.0 http://w3.org/TR/REC-xml + */ +public abstract class XML +{ + ///////////////////////////////////////////////////////////////////////////////////////////// + // XML Parser + ///////////////////////////////////////////////////////////////////////////////////////////// + + public static final int BUFFER_SIZE = 255; + + /** static pool of XML.Element instances shared by all XML Parsers. + * elements in the queue have dirty prev and next references, that need cleaning before use. */ + private static final Queue elements = new Queue(30); + + private static final char[] single_amp = new char[] { '&' }; + private static final char[] single_apos = new char[] { '\'' }; + private static final char[] single_gt = new char[] { '>' }; + private static final char[] single_lt = new char[] { '<' }; + private static final char[] single_quot = new char[] { '"' }; + + private int line; + private int col; + + private Reader in; + private char[] buf; + private int off; + private int len; + + private Element current; + + // used in readEntity() to process a single character without creating a new array + private char[] singlechar = new char[1]; + + + public XML() { this(BUFFER_SIZE); } + + public XML(int bSize) { + buf = new char[bSize]; + + current = (Element)elements.remove(false); + if (current == null) current = new Element(); + current.prev = current.next = null; + } + + + /** Returns the line number at the beginning of the last process call. */ + public int getLine() { return line; } + + /** Returns the column number at the beginning of the last process call. */ + public int getCol() { return col; } + + /** + * Parse given input and call the abstract event functions. + * + * Careful with threading, as this function is not synchronized. + */ + public final void parse(Reader reader) throws IOException, XMLException { + in = reader; + off = len = 0; + line = col = 1; + + clean(); // clean up possible mid-way linked-list element + + try { + // process the stream + while (true) { + if (!buffer(1)) { + if (current.qName == null) break; + throw new WFCException("reached eof without closing <"+current.qName+"> element", getLine(), getCol()); + } + + if (buf[off] == '<') readTag(); + readChars(current.qName != null); + } + } finally { clean(); } // clean up elements + } + + /** remove any leftover elements from the linked list and queue them */ + private final void clean() { + while (current.prev != null) elements.append((current = current.prev).next); + current.next = null; + current.qName = null; + } + + /** reads in a tag. expects buf[off] == '<' */ + private final void readTag() throws IOException, XMLException { + // Start Tag '<' Name (S Attribute)* S? '>' + boolean starttag = true; + + // End Tag '' + boolean endtag = false; + + // if (starttag & endtag) then: EmptyElemTag '<' Name (S Attribute)* S? '/>' + + // Position in the name of the ':' namespace prefix + int prefix = -1; + + int namelen = 0; + + col++; off++; len--; + if (!buffer(1)) throw new EOFException("Unexpected EOF processing element tag"); + + // work out what we can from the beginning of the tag + char s = buf[off]; + if (s == '!') { + // definitions here don't necessarily conform to xml spec (as DTDs not yet implemented) + col++; off++; len--; + if (!buffer(4)) throw new EOFException("Unexpected EOF processing ' + readChars(false, "-->", false); + col += 3; off += 3; len -= 3; + break; + case '[': + if (!buffer(7) + || buf[off+1] != 'C' || buf[off+2] != 'D' || buf[off+3] != 'A' + || buf[off+4] != 'T' || buf[off+5] != 'A' || buf[off+6] != '[') { + col++; off--; len++; + // Conditional '' Char*)) ']]>' + readChars(false, "]]>", false); + } else { + col += 7; off += 7; len -=7; + // CDATA '' Char*)) ']]>' + readChars(true, "]]>", false); + } + col += 3; off += 3; len -= 3; + break; + + // we don't care about the following definitions + + case 'A': + if (!buffer(7) + || buf[off+1] != 'T' || buf[off+2] != 'T' || buf[off+3] != 'L' + || buf[off+4] != 'I' || buf[off+5] != 'S' || buf[off+6] != 'T') { + bad = true; break; + } + col += 7; off += 7; len -= 7; + + // ATTLIST '') '>' + readChars(false, ">", true); + col++; off++; len--; + break; + case 'D': + if (!buffer(7) + || buf[off+1] != 'O' || buf[off+2] != 'C' || buf[off+3] != 'T' + || buf[off+4] != 'Y' || buf[off+5] != 'P' || buf[off+6] != 'E') { + bad = true; break; + } + col += 7; off += 7; len -= 7; + + // DTD '') '>' + readChars(false, ">", true); + col++; off++; len--; + break; + case 'E': + if (!buffer(7)) { + bad = true; + } else if (buf[off+1] == 'L' && buf[off+2] == 'E' && buf[off+3] == 'M' + && buf[off+4] == 'E' && buf[off+5] == 'N' && buf[off+6] == 'T') { + // ELEMENT '') '>' + readChars(false, ">", true); + col++; off++; len--; + + } else if (buf[off+1] == 'N' && buf[off+2] == 'T' && buf[off+3] == 'I' + && buf[off+4] == 'T' && buf[off+5] == 'Y') { + // ENTITY '') '>' + readChars(false, ">", true); + col++; off++; len--; + + } else { + bad = true; + } + break; + + case 'N': + if (!buffer(8) + || buf[off+1] != 'O' || buf[off+2] != 'T' || buf[off+3] != 'A' || buf[off+4] != 'T' + || buf[off+5] != 'I' || buf[off+6] != 'O' || buf[off+7] != 'N') { + bad = true; break; + } + col += 8; off += 8; len -= 8; + // NOTATION '') '>' + readChars(false, ">", true); + col++; off++; len--; + + break; + default: bad = true; + } + + if (bad) throw new MarkupException("element tag start character is invalid", getLine(), getCol()); + + } else if (s == '?') { + // PI (Ignored) '' Char*)) '?>' + col++; off++; len--; + readChars(false, "?>", true); + if (!buffer(2)) throw new EOFException("Unexpected EOF at end of Processing Instruction"); + col += 2; off += 2; len -= 2; + + } else { + if (s == '/') { + // End Tag '' + starttag = false; + endtag = true; + + col++; off++; len--; + if (!buffer(1)) throw new EOFException("Unexpected EOF processing end tag"); + s = buf[off]; + } + + if (!Name(s)) throw new MarkupException("invalid starting character in element name", getLine(), getCol()); + + // find the element name (defined in XML Spec: section 2.3) + for (namelen = 0; ; namelen++) { + if (!buffer(namelen+1)) throw new EOFException("Unexpected EOF in element tag name"); + + s = buf[off+namelen]; + + if (S(s) || s == '>') { + break; + } else if (s == '/') { + endtag = true; + break; + } else if (s == ':' && namelen > 0 && prefix < 1) { + // we have a definition of the prefix range available + prefix = namelen; + } else if (!NameChar(s)) { + throw new MarkupException("element name contains invalid character", getLine(), getCol()); + } + } + + // process name (based on calculated region) + if (namelen < 1) throw new MarkupException("element name is null", getLine(), getCol()); + + // we have marked out the name region, so turn it into a string and move on + String qName = new String(buf, off, namelen); + + col += namelen; off += namelen; len -= namelen; + + if (starttag) { + // create the in-memory element representation of this beast + // if current.qName == null then this is the root element we're dealing with + if (current.qName != null) { + if (current.next == null) { + // we're at the end of the default element depth + current.next = (Element)elements.remove(false); + if (current.next == null) current.next = new Element(); + current.next.prev = current; + current.next.next = null; + } + current = current.next; + } + + current.clear(); + current.qName = qName; + current.defaultUri = current.uri = null; + + if (prefix > 0) { + current.prefix = current.qName.substring(0, prefix); + current.localName = current.qName.substring(prefix+1); + } else { + current.prefix = null; + current.localName = current.qName; + } + + // process attributes + readWhitespace(); + if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 1"); + while (buf[off] != '/' && buf[off] != '>') { + readAttribute(); + if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 2"); + readWhitespace(); + } + + // inherit namespace default uri if attribute was not provided + if (current.defaultUri == null) { + current.defaultUri = (current.prev != null) ? current.prev.defaultUri : null; + } + + // work out the uri of this element + if (current.prefix == null) { + // element has no prefix, therefore is the default uri + current.uri = current.defaultUri; + } else { + // work back through the hashtables until uri is found + for (Element e = current; e != null && current.uri == null; e = e.prev) { + current.uri = (String)e.urimap.get(current.prefix); + } + if (current.uri == null) current.addError(new NCException("undefined prefix '"+current.prefix+"'", getLine(), getCol())); + } + + } else { + // this is an end-of-element tag + if (!qName.equals(current.qName)) throw new WFCException( + "end tag does not line up with start tag <"+current.qName+">", getLine(), getCol() + ); + } + + // deal with whitespace + readWhitespace(); + + // process tag close + if (!buffer(1)) throw new EOFException("Unexpected EOF before end of tag"); + if (buf[off] == '/') { + endtag = true; + off++; len--; col++; + } + if (!buffer(1)) throw new EOFException("Unexpected EOF before end of endtag"); + if (buf[off] == '>') { + off++; len--; col++; + } else { + throw new MarkupException("missing '>' character from element '"+qName+"'", getLine(), getCol()); + } + + // send element signals + if (starttag) startElement(current); + if (endtag) { + endElement(current); + + // we just closed an element, so remove it from the element 'stack' + if (current.prev == null) { + // we just finished the root element + current.qName = null; + } else { + elements.append((current = current.prev).next); + current.next = null; + } + } + } + } + + /** reads in an attribute of an element. expects Name(buf[off]) */ + private final void readAttribute() throws IOException, XMLException { + int ref = 0; + int prefix = 0; + boolean xmlns = false; + String n, v; + n = v = null; + char s; + + // find the element name (defined in XML Spec: section 2.3) + for (ref= 0; ; ref++) { + if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in read attribute loop part 1"); + + s = buf[off+ref]; + + if (s == '=' || S(s)) { + break; + } else if (s == ':' && ref > 0 && prefix < 1) { + // we have a definition of the prefix range available + prefix = ref+1; + } else if (!NameChar(s)) { + throw new MarkupException("attribute name contains invalid characters", getLine(), getCol()); + } + } + + // work out if attribute is related to XML Namespacing + if (ref > 4 && buf[off] == 'x' && buf[off+1] == 'm' && buf[off+2] == 'l' && buf[off+3] == 'n' && buf[off+4] == 's') { + if (ref == 5) { + xmlns = true; + } else if (prefix > 0) { + xmlns = true; + n = new String(buf, off+prefix, ref-prefix); + } else { + n = new String(buf, off, ref); + } + } else { + n = new String(buf, off, ref); + } + col += ref; off += ref; len -= ref; + + readWhitespace(); + if (!buffer(1)) throw new EOFException("Unexpected EOF before attribute '=' divider"); + if (buf[off] != '=') throw new MarkupException("attribute name not followed by '=' sign", getLine(), getCol()); + + col++; off++; len--; + readWhitespace(); + + if (!buffer(1)) throw new EOFException("Unexpected EOF after attribute '=' divider"); + + char wrap; + if (buf[off] == '\'' || buf[off] == '"') { + wrap = buf[off]; + } else { + throw new MarkupException("attribute '"+n+"' must have attribute wrapped in ' or \"", getLine(), getCol()); + } + col++; off++; len--; + + // find the attribute value + attval: for (ref = 0; ; ref++) { + if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in attribute value"); + + if (buf[off+ref] == wrap) { + break attval; + } else if (buf[off+ref] == '<') { + throw new WFCException("attribute value for '"+n+"' must not contain '<'", getLine(), getCol()); + } + } + + v = new String(buf, off, ref); + col += ref; off += ref; len -= ref; + + // remove end wrapper character + col++; off++; len--; + + if (xmlns) { + if (n == null) { + if (current.defaultUri != null) { + current.addError(new NCException("default namespace definition repeated", getLine(), getCol())); + } else { + current.defaultUri = v; + } + } else { + current.urimap.put(n, v); + } + } else { + // check to see if attribute is a repeat + for (int i=0; current.len > i; i++) if (n.equals(current.keys[i])) throw new WFCException( + "attribute name '"+n+"' may not appear more than once in the same element tag", getLine(), getCol() + ); + + // add attribute to the attribute arrays + if (current.len == current.keys.length) current.morekeys(); + current.keys[current.len] = n; + current.vals[current.len] = v; + current.len++; + } + } + + /** reads an entity and processes out its value. expects buf[off] == '&' */ + private final void readEntity() throws IOException, XMLException { + off++; len--; + if (!buffer(2)) throw new EOFException("Unexpected EOF reading entity"); + + boolean unknown = false; + switch (buf[off]) { + case '#': + off++; len--; + + int radix; + if (buf[off] == 'x') { off++; len--; radix = 16; } else { radix = 10; } + int c = 0; + + // read in each char, then shift total value to the left and add the extra + // style of loop is slightly different from all the others, as this should run a limited number of times + findchar: while (true) { + if (!buffer(1)) throw new EOFException("Unexpected EOF reading entity"); + int d = Character.digit(buf[off], radix); + if (d == -1) { + if (buf[off] != ';') throw new WFCException("illegal characters in entity reference", getLine(), getCol()); + off++; len--; col++; + break findchar; + } + c = (c * radix) + d; + + off++; len--; + } + + singlechar[0] = Character.forDigit(c, radix); + characters(singlechar, 0, 1); + break; + + case 'a': + if (buffer(4) && buf[off+1] == 'm' && buf[off+2] == 'p' && buf[off+3] == ';') { + characters(single_amp, 0, 1); // & + off += 4; len -= 4; col++; + } else if (buffer(5) && buf[off+1] == 'p' && buf[off+2] == 'o' && buf[off+3] == 's' && buf[off+4] == ';') { + characters(single_apos, 0, 1); // ' + off += 5; len -= 5; col++; + } else { + unknown = true; + } + break; + + case 'g': + if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') { + characters(single_gt, 0, 1); // > + off += 3; len -= 3; col++; + } else { + unknown = true; + } + break; + + case 'l': + if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') { + characters(single_lt, 0, 1); // < + off += 3; len -= 3; col++; + } else { + unknown = true; + } + break; + + case 'q': + if (buffer(5) && buf[off+1] == 'u' && buf[off+2] == 'o' && buf[off+3] == 't' && buf[off+4] == ';') { + characters(single_quot, 0, 1); // " + off += 5; len -= 5; col++; + } else { + unknown = true; + } + break; + + // TODO: check a parser-level Hash of defined entities + } + + if (unknown) throw new WFCException("unknown entity ( not supported)", getLine(), getCol()); + } + + /** reads until the passed string is encountered. */ + private final void readChars(boolean p, String match, boolean entities) throws IOException, XMLException { + int ref; + char[] end = match.toCharArray(); + + for (boolean more = true; more;) { + if (!buffer(1)) return; + + buf: for (ref = 0; ref < len; ref++) { + switch (buf[off+ref]) { + case '\r': // windows or macos9 newline + // normalise and process + buf[off+ref] = '\n'; ref++; + if (p) characters(buf, off, ref); + off += ref; len -= ref; ref = -1; + line++; col = 1; + + // windows double-char newline; skip the next char + if (!buffer(1)) return; + if (buf[off] == '\n') { off++; len--; } + break; + + case '\n': // unix newline + ref++; + if (p) characters(buf, off, ref); + off += ref; len -= ref; ref = -1; + line++; col = 1; + break; + + case '&': // entity + if (entities) { + if (p) { + if (ref > 0) characters(buf, off, ref); + off += ref; len -= ref; ref = -1; + readEntity(); + } + break; + } + + default: + if (!buffer(ref+end.length)) continue buf; + for (int i=0; end.length > i; i++) if (end[i] != buf[off+ref+i]) continue buf; + more = false; + break buf; + } + } + + if (p && ref > 0) characters(buf, off, ref); + off += ref; len -= ref; col += ref; + } + } + + /** + * reads until a < symbol is encountered + * @param p If true call the characters(char[],int,int) funciton for the processed characters + */ + private final void readChars(boolean p) throws IOException, XMLException { + int ref; + + for (boolean more = true; more;) { + if (!buffer(1)) return; + + buf: for (ref = 0; ref < len; ref++) { + switch (buf[off+ref]) { + case '\r': // windows or macos9 newline + // normalise and process + buf[off+ref] = '\n'; ref++; + if (p) characters(buf, off, ref); + off += ref; len -= ref; ref = -1; + line++; col = 1; + + // windows double-char newline; skip the next char + if (!buffer(1)) return; + if (buf[off] == '\n') { off++; len--; } + break; + + case '\n': // unix newline + ref++; + if (p) characters(buf, off, ref); + off += ref; len -= ref; ref = -1; + line++; col = 1; + break; + + case '&': // entity + if (p) { + if (ref > 0) characters(buf, off, ref); + off += ref; len -= ref; ref = -1; + readEntity(); + } + break; + + case '<': // end of chars section + more = false; + break buf; + } + } + + if (p && ref > 0) characters(buf, off, ref); + off += ref; len -= ref; col += ref; + } + } + + /** reads until a non-whitespace symbol is encountered */ + private final void readWhitespace() throws IOException, XMLException { + int ref; + + for (boolean more = true; more;) { + if (!buffer(1)) return; + + buf: for (ref = 0; ref < len; ref++) { + switch (buf[off+ref]) { + case '\r': // windows or macos9 newline + // normalise and process + buf[off+ref] = '\n'; + whitespace(buf, off, ++ref); + off += ref; len -= ref; ref = -1; + line++; col = 1; + + // windows double-char newline; skip the next char + if (!buffer(1)) return; + if (buf[off] == '\n') { off++; len--; } + break; + + case '\n': // unix newline + whitespace(buf, off, ++ref); + off += ref; len -= ref; ref = -1; + line++; col = 1; + break; + + case ' ': // space + case '\t': // tab + break; + + default: // end of whitespace + more = false; + break buf; + } + } + + off += ref; len -= ref; col += ref; + } + } + + /** + * attempt to fill the buffer. + * + * @param min Minimum number of characters to read (even if we have to block to do it). + * @return return false if min can't be reached. + */ + private final boolean buffer(int min) throws IOException { + if (len > min) return true; + + if (buf.length - (off+len) >= min) { + // plenty of space left on the end of the buffer + } else if (off >= min) { + // moving offset data to start will leave enough free space on the end + System.arraycopy(buf, off, buf, 0, len); + off = 0; + } else { + // buffer size will have to be increased + char[] newbuf = new char[buf.length * 2]; + System.arraycopy(buf, off, newbuf, 0, len); + buf = newbuf; + off = 0; + } + + while (min > len) { + int newlen = in.read(buf, off+len, buf.length-(off+len)); + if (newlen < 0) return false; + len += newlen; + } + + return true; + } + + + ///////////////////////////////////////////////////////////////////////////////////////////// + // Abstract SAX-Like Interface + ///////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Called when the start of an element is processed. + * + *

The array of Attribute names and values may be longer than the + * number of entries they contain, but all the entries will be + * packed at the top.

+ * + *

DO NOT store a reference to the attribute arrays, as + * they are reused by other elements.

+ */ + public abstract void startElement(Element e) throws SchemaException; + + /** + * Represents a line of character data. + * + *

Newlines are all normalised to the Unix \n as per the XML Spec, + * and a newline will only appear as the last character in the passed + * array segment.

+ * + *

XML.getLine() and XML.getCol() report the position at the + * beginning of this character segment, which can be processed in a + * line-by-line fashion due to the above newline restriction.

+ */ + public abstract void characters(char[] ch, int start, int length) throws SchemaException; + + /** Represents a line of ignorable whitespace. */ + public abstract void whitespace(char[] ch, int start, int length) throws SchemaException; + + /** Represents the end of an Element. */ + public abstract void endElement(Element e) throws SchemaException; + + + ///////////////////////////////////////////////////////////////////////////////////////////// + // Inner Classes for Parser Support + ///////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Used as a struct for holding information about a current element, + * and acts as a linked list entry. + * + *

Each element stores a hashtable of namespace definitions against + * their respective prefix, and a variable holding their default + * uri. If they did not specify a default uri, their + * parent's uri is copied in to keep up the sembelence of speedy + * parsing.

+ * + *

SLOWEST PART OF THE XML PARSER

+ *

To implement the Namespace Specification exactly, we have to + * store prefix mappings for elements away from its parents and + * siblings. This means if a child of a child of-a child uses + * a prefix defined in the root, we have to search each Hashtable + * in each Element until we get to the root.

+ * + *

Unfortunetally, every other solution I can think of requires + * more work than this one, shifted to different parts of the + * parser.

+ */ + public static final class Element + { + public Element next, prev; + + /** A hashtable of all namespace prefixes that are defined by this element. */ + public Hash urimap; + + /** An array of attribute names. */ + public String[] keys; + + /** An array of attribute values. */ + public String[] vals; + + /** An array of non-fatal errors related to this element. */ + public XMLException[] errors; + + /** Current number of attributes in the keys and vals arrays. */ + public int len; + + /** Default URI for this element and its children with no prefix. */ + public String defaultUri; + + /** URI of current tag. XML Namespace Spec 14-Jan-1999 section 1 */ + public String uri; + + /** LocalPart of current element. XML Namespace Spec 14-Jan-1999 [8] */ + public String localName; + + /** Qualified Name of current element. XML Namespace Spec 14-Jan-1999 [6] */ + public String qName; + + /** Prefix of current element. Substring of qName. XML Namespace Spec 14-Jan-1999 [7] */ + public String prefix; + + public Element() { + defaultUri = uri = prefix = localName = qName = null; + urimap = new Hash(3,3); + keys = new String[10]; + vals = new String[10]; + errors = new XMLException[] {}; + len = 0; + } + + /** increase the size of the attributes arrays */ + void morekeys() { + String[] newkeys = new String[keys.length+5]; + String[] newvals = new String[vals.length+5]; + System.arraycopy(keys, 0, newkeys, 0, keys.length); + System.arraycopy(vals, 0, newvals, 0, vals.length); + keys = newkeys; vals = newvals; + } + + /** empty out the arrays */ + void clear() { + for (int i=0; len > i; i++) { keys[i] = null; vals[i] = null; }; len = 0; + errors = new XMLException[] {}; + } + + /** add an error to the errors array */ + void addError(XMLException e) { + // it doesn't really matter about continually expanding the array, as this case is quite rare + XMLException[] newe = new XMLException[errors.length+1]; + System.arraycopy(errors, 0, newe, 0, errors.length); + newe[errors.length] = e; + errors = newe; + } + } + + /** Parse or Structural Error */ + public static class XMLException extends Exception + { + private int line; + private int col; + private String error; + + public XMLException(String e) { this(e, -1, -1); } + + public XMLException(String e, int l, int c) { + this.error = e; + this.line = l; + this.col = c; + } + + public int getLine() { return this.line; } + public int getCol() { return this.col; } + public String getMessage() { return this.error; } + } + + /** Violation of Markup restrictions in XML Specification - Fatal Error */ + public static class MarkupException extends XMLException { public MarkupException(String e, int l, int c) { super(e,l,c); } } + + /** Well-Formedness Constraint Violation - Fatal Error */ + public static final class WFCException extends MarkupException { public WFCException(String e, int l, int c) { super(e,l,c); } } + + /** Namespace Constraint Violation - Recoverable Error */ + public static final class NCException extends XMLException { public NCException(String e, int l, int c) { super(e,l,c); } } + + /** Schema Violation - Fatal Error */ + public static class SchemaException extends XMLException { + public SchemaException(String e) { this(e, -1, -1); } + public SchemaException(String e, int l, int c) { super(e,l,c); } + } + + + ///////////////////////////////////////////////////////////////////////////////////////////// + // Static Support Functions for the XML Specification + ///////////////////////////////////////////////////////////////////////////////////////////// + + // attempt to avoid these functions unless you *expect* the input to fall in the given range. + + /** First Character of Name - XML Specification 1.0 [5] */ + private static final boolean Name(char c) { + return BaseCharAscii(c) || c == '_' || c == ':' || Letter(c); + } + + /** NameChar - XML Specification 1.0 [4] */ + private static final boolean NameChar(char c) { + return BaseCharAscii(c) || c == '.' || c == '-' || c == '_' || c == ':' + || Digit(c) || Letter(c) || Extender(c); // TODO: || CombiningChar(c); + } + + /** BaseChar - XMl Specification 1.0 [84] */ + private static final boolean Letter(char c) { + return BaseChar(c) || Ideographic(c); + } + + /** Elements of BaseChar that exist in ASCII. */ + private static final boolean BaseCharAscii(char c) { + return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A'); + } + + /** Char - XML Specification 1.0 [2] */ + private static final boolean Char(char c) { + // u000A == r and u000D == n, but the javac compiler can't handle the \ u form + return c == '\u0009' || c == '\r' || c == '\n' + || (c >= '\u0020' && c <= '\uD7FF') + || (c >= '\uE000' && c <= '\uFFFD'); + } + + /** BaseChar - XML Specification 1.0 [85] */ + private static final boolean BaseChar(char c) { + return BaseCharAscii(c) || (c >= '\u00C0' && c <= '\u00D6') + || (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131') + || (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E') + || (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5') + || (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1') + || (c == '\u0386') || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C') + || (c >= '\u038E' && c <= '\u03A1') || (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6') + || (c == '\u03DA') || (c == '\u03DC') || (c == '\u03DE') + || (c == '\u03E0') + || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F') + || (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4') + || (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB') + || (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556') + || (c == '\u0559') + || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2') + || (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7') + || (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3') + || (c == '\u06D5') + || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939') + || (c == '\u093D') + || (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990') + || (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0') + || (c == '\u09B2') + || (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DF' && c <= '\u09E1') || (c >= '\u09F0' && c <= '\u09F1') + || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || (c >= '\u0A13' && c <= '\u0A28') + || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || (c >= '\u0A35' && c <= '\u0A36') + || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C') + || (c == '\u0A5E') + || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B') + || (c == '\u0A8D') + || (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0') + || (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9') + || (c == '\u0ABD') + || (c == '\u0AE0') + || (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28') + || (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39') + || (c == '\u0B3D') + || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || (c >= '\u0B85' && c <= '\u0B8A') + || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || (c >= '\u0B99' && c <= '\u0B9A') + || (c == '\u0B9C') + || (c >= '\u0B9E' && c <= '\u0B9F') || (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA') + || (c >= '\u0BAE' && c <= '\u0BB5') || (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C') + || (c >= '\u0C0E' && c <= '\u0C10') || (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33') + || (c >= '\u0C35' && c <= '\u0C39') || (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C') + || (c >= '\u0C8E' && c <= '\u0C90') || (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3') + || (c >= '\u0CB5' && c <= '\u0CB9') + || (c == '\u0CDE') + || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || (c >= '\u0D0E' && c <= '\u0D10') + || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || (c >= '\u0D60' && c <= '\u0D61') + || (c >= '\u0E01' && c <= '\u0E2E') + || (c == '\u0E30') + || (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82') + || (c == '\u0E84') + || (c >= '\u0E87' && c <= '\u0E88') + || (c == '\u0E8A') + || (c == '\u0E8D') + || (c >= '\u0E94' && c <= '\u0E97') || (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3') + || (c == '\u0EA5') + || (c == '\u0EA7') + || (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE') + || (c == '\u0EB0') + || (c >= '\u0EB2' && c <= '\u0EB3') + || (c == '\u0EBD') + || (c >= '\u0EC0' && c <= '\u0EC4') || (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69') + || (c >= '\u10A0' && c <= '\u10C5') || (c >= '\u10D0' && c <= '\u10F6') + || (c == '\u1100') + || (c >= '\u1102' && c <= '\u1103') || (c >= '\u1105' && c <= '\u1107') + || (c == '\u1109') + || (c >= '\u110B' && c <= '\u110C') || (c >= '\u110E' && c <= '\u1112') + || (c == '\u113C') + || (c == '\u113E') + || (c == '\u1140') + || (c == '\u114C') + || (c == '\u114E') + || (c == '\u1150') + || (c >= '\u1154' && c <= '\u1155') + || (c == '\u1159') + || (c >= '\u115F' && c <= '\u1161') + || (c == '\u1163') + || (c == '\u1165') + || (c == '\u1167') + || (c == '\u1169') + || (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173') + || (c == '\u1175') + || (c == '\u119E') + || (c == '\u11A8') + || (c == '\u11AB') + || (c >= '\u11AE' && c <= '\u11AF') || (c >= '\u11B7' && c <= '\u11B8') + || (c == '\u11BA') + || (c >= '\u11BC' && c <= '\u11C2') + || (c == '\u11EB') + || (c == '\u11F0') + || (c == '\u11F9') + || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || (c >= '\u1F00' && c <= '\u1F15') + || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || (c >= '\u1F48' && c <= '\u1F4D') + || (c >= '\u1F50' && c <= '\u1F57') + || (c == '\u1F59') + || (c == '\u1F5B') + || (c == '\u1F5D') + || (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC') + || (c == '\u1FBE') + || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || (c >= '\u1FD0' && c <= '\u1FD3') + || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || (c >= '\u1FF2' && c <= '\u1FF4') + || (c >= '\u1FF6' && c <= '\u1FFC') + || (c == '\u2126') + || (c >= '\u212A' && c <= '\u212B') + || (c == '\u212E') + || (c >= '\u2180' && c <= '\u2182') || (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA') + || (c >= '\u3105' && c <= '\u312C') || (c >= '\uAC00' && c <= '\uD7A3'); + } + + /** BaseChar - XMl Specification 1.0 [86] */ + private static final boolean Ideographic(char c) { + return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029'); + } + + /** CombiningChar - XMl Specification 1.0 [87] */ + /*private static final boolean CombiningChar(char c) { + return (c >= '\u0300' && c <= '\u0345') + || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || (c >= '\u0591' && c <= '\u05A1') + || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD') + || (c == '\u05BF') + || (c >= '\u05C1' && c <= '\u05C2') + || (c == '\u05C4') + || (c >= '\u064B' && c <= '\u0652') + || (c == '\u0670') + || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4') + || (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903') + || (c == '\u093C') + || (c >= '\u093E' && c <= '\u094C') + || (c == '\u094D') + || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || (c >= '\u0981' && c <= '\u0983') + || (c == '\u09BC') + || (c == '\u09BE') + || (c == '\u09BF') + || (c >= '\u09C0' && c <= '\u09C4') || (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD') + || (c == '\u09D7') + || (c >= '\u09E2' && c <= '\u09E3') + || (c == '\u0A02') + || (c == '\u0A3C') + || (c == '\u0A3E') + || (c == '\u0A3F') + || (c >= '\u0A40' && c <= '\u0A42') || (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D') + || (c >= '\u0A70' && c <= '\u0A71') || (c >= '\u0A81' && c <= '\u0A83') + || (c == '\u0ABC') + || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || (c >= '\u0ACB' && c <= '\u0ACD') + || (c >= '\u0B01' && c <= '\u0B03') + || (c == '\u0B3C') + || (c >= '\u0B3E' && c <= '\u0B43') || (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D') + || (c >= '\u0B56' && c <= '\u0B57') || (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2') + || (c >= '\u0BC6' && c <= '\u0BC8') || (c >= '\u0BCA' && c <= '\u0BCD') + || (c == '\u0BD7') + || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || (c >= '\u0C46' && c <= '\u0C48') + || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || (c >= '\u0C82' && c <= '\u0C83') + || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || (c >= '\u0CCA' && c <= '\u0CCD') + || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || (c >= '\u0D3E' && c <= '\u0D43') + || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D') + || (c == '\u0D57') + || (c == '\u0E31') + || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E') + || (c == '\u0EB1') + || (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD') + || (c >= '\u0F18' && c <= '\u0F19') + || (c == '\u0F35') + || (c == '\u0F37') + || (c == '\u0F39') + || (c == '\u0F3E') + || (c == '\u0F3F') + || (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95') + || (c == '\u0F97') + || (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7') + || (c == '\u0FB9') + || (c >= '\u20D0' && c <= '\u20DC') + || (c == '\u20E1') + || (c >= '\u302A' && c <= '\u302F') + || (c == '\u3099') + || (c == '\u309A'); + }*/ + + /** Digit - XMl Specification 1.0 [88] */ + private static final boolean Digit(char c) { + return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9') + || (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F') + || (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF') + || (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F') + || (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29'); + } + + /** Extender - XMl Specification 1.0 [89] */ + private static final boolean Extender(char c) { + return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387' + || c == '\u0640' || c == '\u0E46' || c == '\u0EC6' || c == '\u3005' + || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE'); + } + + /** Whitespace - XML Specification 1.0 [3] */ + private static final boolean S(char c) { + return c == '\u0020' || c == '\u0009' || c == '\r' || c == '\n'; + } +}