X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=src%2Forg%2Fxwt%2Futil%2FXML.java;fp=src%2Forg%2Fxwt%2Futil%2FXML.java;h=0000000000000000000000000000000000000000;hb=3591b88b94a6bb378af3d4abe6eb5233ce583104;hp=e3d0b146b5a76c451215ec13fad7240decd96b7d;hpb=de378041d5ca2aca1a2b5a31ef15ae90a86c977f;p=org.ibex.core.git diff --git a/src/org/xwt/util/XML.java b/src/org/xwt/util/XML.java deleted file mode 100644 index e3d0b14..0000000 --- a/src/org/xwt/util/XML.java +++ /dev/null @@ -1,1145 +0,0 @@ -// Copyright (C) 2003 Adam Megacz all rights reserved. -// -// You may modify, copy, and redistribute this code under the terms of -// the GNU Library Public License version 2.1, with the exception of -// the portion of clause 6a after the semicolon (aka the "obnoxious -// relink clause") - -package org.xwt.util; - -import java.io.Reader; -import java.io.IOException; -import java.io.EOFException; - -/** - * An Event-Driving, Non-Validating XML Parser with Namespace support. - * - * A subclass can implement the abstract functions for receiving details - * about an xml file as it is parsed. To initate a parse, use the parse() - * function. - * - *

Implementation Notes

- *

As the parser traverses into an element, it adds it to the linked list - * called elements. However, elements has been pre-filled - * with instances of the Element inner class. So in the vast majority of - * cases, the pointer current is moved along one, and the values for the - * new element are filled into the current object.

- * - *

This parser supports all the unicode ranges required by the XML - * Specification. However, it is optimised for well-formed ASCII documents. - * Documents containing unicode Names and Attributes will take much longer - * to process, and invalid documents (badly formed Names or invalid attributes) - * will be run through a test on every single unicode character range before - * being declared invalid.

- * - * - * - * @author David Crawshaw - * @see XML Specification - * @see XML Namespaces - */ -public abstract class XML -{ - ///////////////////////////////////////////////////////////////////////////////////////////// - // XML Parser - ///////////////////////////////////////////////////////////////////////////////////////////// - - public static final int BUFFER_SIZE = 255; - - /** static pool of XML.Element instances shared by all XML Parsers. */ - private static final Queue elements = new Queue(30); - - private static final char[] single_amp = new char[] { '&' }; - private static final char[] single_apos = new char[] { '\'' }; - private static final char[] single_gt = new char[] { '>' }; - private static final char[] single_lt = new char[] { '<' }; - private static final char[] single_quot = new char[] { '"' }; - - private int line; - private int col; - - private Reader in; - private char[] buf; - private int off; - private int len; - - private Element current; - - // used in readEntity() to process a single character without creating a new array - private char[] singlechar = new char[1]; - - - public XML() { this(BUFFER_SIZE); } - - public XML(int bSize) { - buf = new char[bSize]; - - current = (Element)elements.remove(false); - if (current == null) current = new Element(); - } - - - /** Returns the line number at the beginning of the last process call. */ - public int getLine() { return line; } - - /** Returns the column number at the beginning of the last process call. */ - public int getCol() { return col; } - - /** - * Parse given input and call the abstract event functions. - * - * Careful with threading, as this function is not synchronized. - */ - public final void parse(Reader reader) throws IOException, Exn { - in = reader; - off = len = 0; - line = col = 1; - - clear(); // clean up possible mid-way linked-list element - - try { - // process the stream - while (true) { - if (!buffer(1)) { - if (current.qName == null) break; - throw new Exn("reached eof without closing <"+current.qName+"> element", Exn.WFC, getLine(), getCol()); - } - - if (buf[off] == '<') readTag(); - readChars(current.qName != null); - } - } finally { clear(); } // clean up elements - } - - /** remove any leftover elements from the linked list and queue them */ - private final void clear() { - for (Element last = current; current.parent != null; ) { - current = current.parent; - last.clear(); - elements.append(last); - } - current.clear(); - } - - /** reads in a tag. expects buf[off] == '<' */ - private final void readTag() throws IOException, Exn { - // Start Tag '<' Name (S Attribute)* S? '>' - boolean starttag = true; - - // End Tag '' - boolean endtag = false; - - // if (starttag & endtag) then: EmptyElemTag '<' Name (S Attribute)* S? '/>' - - // Position in the name of the ':' namespace prefix - int prefix = -1; - - int namelen = 0; - - col++; off++; len--; - if (!buffer(1)) throw new EOFException("Unexpected EOF processing element tag"); - - // work out what we can from the beginning of the tag - char s = buf[off]; - if (s == '!') { - // definitions here don't necessarily conform to xml spec (as DTDs not yet implemented) - col++; off++; len--; - if (!buffer(4)) throw new EOFException("Unexpected EOF processing ' - readChars(false, "-->", false); - col += 3; off += 3; len -= 3; - break; - - // we don't care about the following definitions - - case 'A': - if (!buffer(7) - || buf[off+1] != 'T' || buf[off+2] != 'T' || buf[off+3] != 'L' - || buf[off+4] != 'I' || buf[off+5] != 'S' || buf[off+6] != 'T') { - bad = true; break; - } - col += 7; off += 7; len -= 7; - - // ATTLIST '') '>' - readChars(false, ">", true); - col++; off++; len--; - break; - case 'D': - if (!buffer(7) - || buf[off+1] != 'O' || buf[off+2] != 'C' || buf[off+3] != 'T' - || buf[off+4] != 'Y' || buf[off+5] != 'P' || buf[off+6] != 'E') { - bad = true; break; - } - col += 7; off += 7; len -= 7; - - // DTD '') '>' - readChars(false, ">", true); - col++; off++; len--; - break; - case 'E': - if (!buffer(7)) { - bad = true; - } else if (buf[off+1] == 'L' && buf[off+2] == 'E' && buf[off+3] == 'M' - && buf[off+4] == 'E' && buf[off+5] == 'N' && buf[off+6] == 'T') { - // ELEMENT '') '>' - readChars(false, ">", true); - col++; off++; len--; - - } else if (buf[off+1] == 'N' && buf[off+2] == 'T' && buf[off+3] == 'I' - && buf[off+4] == 'T' && buf[off+5] == 'Y') { - // ENTITY '') '>' - readChars(false, ">", true); - col++; off++; len--; - - } else { - bad = true; - } - break; - - case 'N': - if (!buffer(8) - || buf[off+1] != 'O' || buf[off+2] != 'T' || buf[off+3] != 'A' || buf[off+4] != 'T' - || buf[off+5] != 'I' || buf[off+6] != 'O' || buf[off+7] != 'N') { - bad = true; break; - } - col += 8; off += 8; len -= 8; - // NOTATION '') '>' - readChars(false, ">", true); - col++; off++; len--; - - break; - default: bad = true; - } - - if (bad) throw new Exn("element tag start character is invalid", Exn.MARKUP, getLine(), getCol()); - - } else if (s == '?') { - // PI (Ignored) '' Char*)) '?>' - col++; off++; len--; - readChars(false, "?>", true); - if (!buffer(2)) throw new EOFException("Unexpected EOF at end of Processing Instruction"); - col += 2; off += 2; len -= 2; - - } else if (s == '[') { - if (!buffer(7) - || buf[off+1] != 'C' || buf[off+2] != 'D' || buf[off+3] != 'A' - || buf[off+4] != 'T' || buf[off+5] != 'A' || buf[off+6] != '[') { - col++; off--; len++; - // Conditional '' Char*)) ']]>' - readChars(false, "]]>", false); - } else { - col += 7; off += 7; len -=7; - // CDATA '' Char*)) ']]>' - readChars(true, "]]>", false); - } - col += 3; off += 3; len -= 3; - } else { - if (s == '/') { - // End Tag '' - starttag = false; - endtag = true; - - col++; off++; len--; - if (!buffer(1)) throw new EOFException("Unexpected EOF processing end tag"); - s = buf[off]; - } - - if (!Name(s)) throw new Exn("invalid starting character in element name", Exn.MARKUP, getLine(), getCol()); - - // find the element name (defined in XML Spec: section 2.3) - for (namelen = 0; ; namelen++) { - if (!buffer(namelen+1)) throw new EOFException("Unexpected EOF in element tag name"); - - s = buf[off+namelen]; - - if (S(s) || s == '>') { - break; - } else if (s == '/') { - endtag = true; - break; - } else if (s == ':' && namelen > 0 && prefix < 1) { - // we have a definition of the prefix range available - prefix = namelen; - } else if (!NameChar(s)) { - throw new Exn("element name contains invalid character", Exn.MARKUP, getLine(), getCol()); - } - } - - // process name (based on calculated region) - if (namelen < 1) throw new Exn("element name is null", Exn.MARKUP, getLine(), getCol()); - - // we have marked out the name region, so turn it into a string and move on - String qName = new String(buf, off, namelen); - - col += namelen; off += namelen; len -= namelen; - - if (starttag) { - // create the in-memory element representation of this beast - // if current.qName == null then this is the root element we're dealing with - if (current.qName != null) { - Element next = (Element)elements.remove(false); - if (next == null) next = new Element(); - //next.clear(); // TODO: remove as elements now checked as they're added to the queue - next.parent = current; - current = next; - } - - current.qName = qName; - - if (prefix > 0) { - current.prefix = current.qName.substring(0, prefix); - current.localName = current.qName.substring(prefix+1); - } else { - current.prefix = null; - current.localName = current.qName; - } - - // process attributes - readWhitespace(); - if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 1"); - while (buf[off] != '/' && buf[off] != '>') { - readAttribute(); - if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 2"); - readWhitespace(); - } - - // work out the uri of this element - current.uri = current.getUri(current.getPrefix()); - if (current.getUri().equals("") && current.getPrefix() != null) - current.addError(new Exn("undefined prefix '"+current.getPrefix()+"'", Exn.NC, getLine(), getCol())); - - } else { - // this is an end-of-element tag - if (!qName.equals(current.getQName())) throw new Exn( - "end tag does not line up with start tag <"+current.getQName()+">", Exn.WFC, getLine(), getCol() - ); - } - - // deal with whitespace - readWhitespace(); - - // process tag close - if (!buffer(1)) throw new EOFException("Unexpected EOF before end of tag"); - if (buf[off] == '/') { - endtag = true; - off++; len--; col++; - } - if (!buffer(1)) throw new EOFException("Unexpected EOF before end of endtag"); - if (buf[off] == '>') { - off++; len--; col++; - } else { - throw new Exn("missing '>' character from element '"+qName+"'", Exn.MARKUP, getLine(), getCol()); - } - - // send element signals - if (starttag) startElement(current); - if (endtag) { - endElement(current); - - // we just closed an element, so remove it from the element 'stack' - if (current.getParent() == null) { - // we just finished the root element - current.clear(); - } else { - Element last = current; - current = current.parent; - last.clear(); - elements.append(last); - } - } - } - } - - /** reads in an attribute of an element. expects Name(buf[off]) */ - private final void readAttribute() throws IOException, Exn { - int ref = 0; - int prefix = 0; - String n, v, p, u; // attribute name, value, prefix and uri respectively - n = v = p = u = null; - char s; - - // find the element name (defined in XML Spec: section 2.3) - for (ref= 0; ; ref++) { - if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in read attribute loop part 1"); - - s = buf[off+ref]; - - if (s == '=' || S(s)) { - break; - } else if (s == ':' && ref > 0 && prefix < 1) { - // we have a definition of the prefix range available - prefix = ref+1; - } else if (!NameChar(s)) { - throw new Exn("attribute name contains invalid characters", Exn.MARKUP, getLine(), getCol()); - } - } - - // determine prefix and key name - if (prefix > 0) { - p = new String(buf, off, prefix-1); - col += prefix; off += prefix; len -= prefix; ref -= prefix; - } - n = new String(buf, off, ref); - col += ref; off += ref; len -= ref; - - // find name/value divider ('=') - readWhitespace(); - if (!buffer(1)) throw new EOFException("Unexpected EOF before attribute '=' divider"); - if (buf[off] != '=') throw new Exn("attribute name not followed by '=' sign", Exn.MARKUP, getLine(), getCol()); - - col++; off++; len--; - readWhitespace(); - - if (!buffer(1)) throw new EOFException("Unexpected EOF after attribute '=' divider"); - - char wrap; - if (buf[off] == '\'' || buf[off] == '"') { - wrap = buf[off]; - } else { - throw new Exn("attribute '"+n+"' must have attribute wrapped in ' or \"", Exn.MARKUP, getLine(), getCol()); - } - col++; off++; len--; - - // find the attribute value - attval: for (ref = 0; ; ref++) { - if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in attribute value"); - - if (buf[off+ref] == wrap) { - break attval; - } else if (buf[off+ref] == '<') { - throw new Exn("attribute value for '"+n+"' must not contain '<'", Exn.WFC, getLine(), getCol()); - } - } - - v = new String(buf, off, ref); - col += ref; off += ref; len -= ref; - - // remove end wrapper character - col++; off++; len--; - - // process attribute - if (p != null && p.equals("xmlns")) { - current.addUri(n, v); - } else if (n.equals("xmlns")) { - if (current.getUri().equals("")) { - current.addUri("", v); - } else { - current.addError(new Exn("default namespace definition repeated", Exn.NC, getLine(), getCol())); - } - } else { - // find attribute uri - u = current.getUri(p); - if (p != null && u.equals("")) current.addError(new Exn("undefined attribute prefix '"+p+"'", Exn.NC, getLine(), getCol())); - - // check to see if attribute is a repeat - for (int i=0; current.len > i; i++) if (n.equals(current.getAttrKey(i)) && u.equals(current.getAttrUri(i))) throw new Exn( - "attribute name '"+n+"' may not appear more than once in the same element tag", Exn.WFC, getLine(), getCol() - ); - - current.addAttr(n, v, u); - } - } - - /** reads an entity and processes out its value. expects buf[off] == '&' */ - private final void readEntity() throws IOException, Exn { - off++; len--; - if (!buffer(2)) throw new EOFException("Unexpected EOF reading entity"); - - boolean unknown = false; - switch (buf[off]) { - case '#': - off++; len--; - - int radix; - if (buf[off] == 'x') { off++; len--; radix = 16; } else { radix = 10; } - int c = 0; - - // read in each char, then shift total value to the left and add the extra - // style of loop is slightly different from all the others, as this should run a limited number of times - findchar: while (true) { - if (!buffer(1)) throw new EOFException("Unexpected EOF reading entity"); - int d = Character.digit(buf[off], radix); - if (d == -1) { - if (buf[off] != ';') throw new Exn("illegal characters in entity reference", Exn.WFC, getLine(), getCol()); - off++; len--; col++; - break findchar; - } - c = (c * radix) + d; - - off++; len--; - } - - singlechar[0] = Character.forDigit(c, radix); - characters(singlechar, 0, 1); - break; - - case 'a': - if (buffer(4) && buf[off+1] == 'm' && buf[off+2] == 'p' && buf[off+3] == ';') { - characters(single_amp, 0, 1); // & - off += 4; len -= 4; col++; - } else if (buffer(5) && buf[off+1] == 'p' && buf[off+2] == 'o' && buf[off+3] == 's' && buf[off+4] == ';') { - characters(single_apos, 0, 1); // ' - off += 5; len -= 5; col++; - } else { - unknown = true; - } - break; - - case 'g': - if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') { - characters(single_gt, 0, 1); // > - off += 3; len -= 3; col++; - } else { - unknown = true; - } - break; - - case 'l': - if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') { - characters(single_lt, 0, 1); // < - off += 3; len -= 3; col++; - } else { - unknown = true; - } - break; - - case 'q': - if (buffer(5) && buf[off+1] == 'u' && buf[off+2] == 'o' && buf[off+3] == 't' && buf[off+4] == ';') { - characters(single_quot, 0, 1); // " - off += 5; len -= 5; col++; - } else { - unknown = true; - } - break; - - // TODO: check a parser-level Hash of defined entities - } - - if (unknown) throw new Exn("unknown entity ( not supported)", Exn.WFC, getLine(), getCol()); - } - - /** reads until the passed string is encountered. */ - private final void readChars(boolean p, String match, boolean entities) throws IOException, Exn { - int ref; - char[] end = match.toCharArray(); - - for (boolean more = true; more;) { - if (!buffer(1)) return; - - buf: for (ref = 0; ref < len; ref++) { - switch (buf[off+ref]) { - case '\r': // windows or macos9 newline - // normalise and process - buf[off+ref] = '\n'; ref++; - if (p) characters(buf, off, ref); - off += ref; len -= ref; ref = -1; - line++; col = 1; - - // windows double-char newline; skip the next char - if (!buffer(1)) return; - if (buf[off] == '\n') { off++; len--; } - break; - - case '\n': // unix newline - ref++; - if (p) characters(buf, off, ref); - off += ref; len -= ref; ref = -1; - line++; col = 1; - break; - - case '&': // entity - if (entities) { - if (p) { - if (ref > 0) characters(buf, off, ref); - off += ref; len -= ref; ref = -1; - readEntity(); - } - break; - } - - default: - if (!buffer(ref+end.length)) continue buf; - for (int i=0; end.length > i; i++) if (end[i] != buf[off+ref+i]) continue buf; - more = false; - break buf; - } - } - - if (p && ref > 0) characters(buf, off, ref); - off += ref; len -= ref; col += ref; - } - } - - /** - * reads until a < symbol is encountered - * @param p If true call the characters(char[],int,int) funciton for the processed characters - */ - private final void readChars(boolean p) throws IOException, Exn { - int ref; - - for (boolean more = true; more;) { - if (!buffer(1)) return; - - buf: for (ref = 0; ref < len; ref++) { - switch (buf[off+ref]) { - case '\r': // windows or macos9 newline - // normalise and process - buf[off+ref] = '\n'; ref++; - if (p) characters(buf, off, ref); - off += ref; len -= ref; ref = -1; - line++; col = 1; - - // windows double-char newline; skip the next char - if (!buffer(1)) return; - if (buf[off] == '\n') { off++; len--; } - break; - - case '\n': // unix newline - ref++; - if (p) characters(buf, off, ref); - off += ref; len -= ref; ref = -1; - line++; col = 1; - break; - - case '&': // entity - if (p) { - if (ref > 0) characters(buf, off, ref); - off += ref; len -= ref; ref = -1; - readEntity(); - } - break; - - case '<': // end of chars section - more = false; - break buf; - } - } - - if (p && ref > 0) characters(buf, off, ref); - off += ref; len -= ref; col += ref; - } - } - - /** reads until a non-whitespace symbol is encountered */ - private final void readWhitespace() throws IOException, Exn { - int ref; - - for (boolean more = true; more;) { - if (!buffer(1)) return; - - buf: for (ref = 0; ref < len; ref++) { - switch (buf[off+ref]) { - case '\r': // windows or macos9 newline - // normalise and process - buf[off+ref] = '\n'; - whitespace(buf, off, ++ref); - off += ref; len -= ref; ref = -1; - line++; col = 1; - - // windows double-char newline; skip the next char - if (!buffer(1)) return; - if (buf[off] == '\n') { off++; len--; } - break; - - case '\n': // unix newline - whitespace(buf, off, ++ref); - off += ref; len -= ref; ref = -1; - line++; col = 1; - break; - - case ' ': // space - case '\t': // tab - break; - - default: // end of whitespace - more = false; - break buf; - } - } - - off += ref; len -= ref; col += ref; - } - } - - /** - * attempt to fill the buffer. - * - * @param min Minimum number of characters to read (even if we have to block to do it). - * @return return false if min can't be reached. - */ - private final boolean buffer(int min) throws IOException { - if (len > min) return true; - - if (buf.length - (off+len) >= min) { - // plenty of space left on the end of the buffer - } else if (off >= min) { - // moving offset data to start will leave enough free space on the end - System.arraycopy(buf, off, buf, 0, len); - off = 0; - } else { - // buffer size will have to be increased - char[] newbuf = new char[buf.length * 2]; - System.arraycopy(buf, off, newbuf, 0, len); - buf = newbuf; - off = 0; - } - - while (min > len) { - int newlen = in.read(buf, off+len, buf.length-(off+len)); - if (newlen < 0) return false; - len += newlen; - } - - return true; - } - - - ///////////////////////////////////////////////////////////////////////////////////////////// - // Abstract SAX-Like Interface - ///////////////////////////////////////////////////////////////////////////////////////////// - - /** - * Called when the start of an element is processed. - * - *

DO NOT store a reference to the Element object, as - * they are reused by XML Parser.

- */ - public abstract void startElement(Element e) throws Exn; - - /** - * Represents up to a line of character data. - * - *

Newlines are all normalised to the Unix \n as per the XML Spec, - * and a newline will only appear as the last character in the passed - * array segment.

- * - *

XML.getLine() and XML.getCol() report the position at the - * beginning of this character segment, which can be processed in a - * line-by-line fashion due to the above newline restriction.

- */ - public abstract void characters(char[] ch, int start, int length) throws Exn, IOException; - - /** Represents up to a line of ignorable whitespace. */ - public abstract void whitespace(char[] ch, int start, int length) throws Exn, IOException; - - /** Represents the end of an Element. */ - public abstract void endElement(Element e) throws Exn, IOException; - - - ///////////////////////////////////////////////////////////////////////////////////////////// - // Inner Classes for Parser Support - ///////////////////////////////////////////////////////////////////////////////////////////// - - /** - * Represents an element in an XML document. Stores a reference to its - * parent, forming a one-way linked list. - * - * Element objects are reused, so client code making use of them must - * drop their references after the specific element process function - * has returned. - */ - public static final class Element { - - private static final int DEFAULT_ATTR_SIZE = 10; - - protected Element parent = null; - - protected String uri = null; - protected String localName = null; - protected String qName = null; - protected String prefix = null; - - protected Hash urimap = new Hash(3,3); - - protected String[] keys = new String[DEFAULT_ATTR_SIZE]; - protected String[] vals = new String[DEFAULT_ATTR_SIZE]; - protected String[] uris = new String[DEFAULT_ATTR_SIZE]; - protected int len = 0; - - protected Exn[] errors = new Exn[] {}; - - - /** Parent of current element. */ - public Element getParent() { return parent; } - - /** Qualified Name of current element. XML Namespace Spec 14-Jan-1999 [6] */ - public String getQName() { return qName; } - - /** LocalPart of current element. XML Namespace Spec 14-Jan-1999 [8] */ - public String getLocalName() { return localName; } - - /** Prefix of current element. Substring of qName. XML Namespace Spec 14-Jan-1999 [7] */ - public String getPrefix() { return prefix; } - - public Hash getUriMap() { return urimap; } // HACK - - /** URI of current tag. XML Namespace Spec 14-Jan-1999 section 1 */ - public String getUri() { return getUri(prefix); } - - /** URI of a given prefix. Never returns null, instead gives "". */ - public String getUri(String p) { - String ret = null; - for (Element e = this; e != null && ret == null; e = e.getParent()) { - ret = (String)e.urimap.get(p == null ? "" : p); - } - return ret == null ? "" : ret; - } - - /** An array of attribute names. */ - public String getAttrKey(int pos) { return len > pos ? keys[pos] : null; } - - /** An array of attribute values. */ - public String getAttrVal(int pos) { return len > pos ? vals[pos] : null; } - - /** An array of attribute uris. */ - public String getAttrUri(int pos) { return len > pos ? uris[pos] : null; } - - /** Current number of attributes in the element. */ - public int getAttrLen() { return len; } - - /** An array of non-fatal errors related to this element. */ - public Exn[] getErrors() { return errors; } - - - protected Element() { } - - /** Add (replace if exists in current element) a Namespace prefix/uri map. */ - protected void addUri(String name, String value) { - urimap.put(name, value); - } - - /** Add an attribute. */ - protected void addAttr(String key, String val, String uri) { - if (len == keys.length) { - // increase the size of the attributes arrays - String[] newkeys = new String[keys.length*2]; - String[] newvals = new String[vals.length*2]; - String[] newuris = new String[uris.length*2]; - System.arraycopy(keys, 0, newkeys, 0, keys.length); - System.arraycopy(vals, 0, newvals, 0, vals.length); - System.arraycopy(uris, 0, newuris, 0, uris.length); - keys = newkeys; vals = newvals; uris = newuris; - } - - keys[len] = key; - vals[len] = val; - uris[len] = uri; - len++; - } - - /** Add an error. */ - protected void addError(Exn e) { - // it doesn't really matter about continually expanding the array, as this case is quite rare - Exn[] newe = new Exn[errors.length+1]; - System.arraycopy(errors, 0, newe, 0, errors.length); - newe[errors.length] = e; - errors = newe; - } - - /** Empty out all the data from the Element. */ - protected void clear() { - parent = null; - uri = localName = qName = prefix = null; - urimap.clear(); - - if (keys.length != vals.length || vals.length != uris.length) { - keys = new String[DEFAULT_ATTR_SIZE]; - vals = new String[DEFAULT_ATTR_SIZE]; - uris = new String[DEFAULT_ATTR_SIZE]; - } else { - for (int i=0; keys.length > i; i++) { keys[i] = null; vals[i] = null; uris[i] = null; }; - } - len = 0; - - errors = new Exn[] {}; - } - } - - /** Parse or Structural Error */ - public static class Exn extends Exception { - /** Violation of Markup restrictions in XML Specification - Fatal Error */ - public static final int MARKUP = 1; - - /** Well-Formedness Constraint Violation - Fatal Error */ - public static final int WFC = 2; - - /** Namespace Constraint Violation - Recoverable Error */ - public static final int NC = 3; - - /** Schema Violation - Fatal Error */ - public static final int SCHEMA = 4; - - private String error; - private int type; - private int line; - private int col; - - public Exn(String e) { this(e, MARKUP, -1, -1); } - - public Exn(String e, int type, int line, int col) { - this.error = e; - this.type = type; - this.line = line; - this.col = col; - } - - public int getType() { return this.type; } - public int getLine() { return this.line; } - public int getCol() { return this.col; } - public String getMessage() { return this.error + (line >= 0 && col >= 0 ? " at " + line + ":" + col: ""); } - } - - - ///////////////////////////////////////////////////////////////////////////////////////////// - // Static Support Functions for the XML Specification - ///////////////////////////////////////////////////////////////////////////////////////////// - - // attempt to avoid these functions unless you *expect* the input to fall in the given range. - - /** First Character of Name - XML Specification 1.0 [5] */ - private static final boolean Name(char c) { - return BaseCharAscii(c) || c == '_' || c == ':' || Letter(c); - } - - /** NameChar - XML Specification 1.0 [4] */ - private static final boolean NameChar(char c) { - return BaseCharAscii(c) || c == '.' || c == '-' || c == '_' || c == ':' - || Digit(c) || Letter(c) || Extender(c); // TODO: || CombiningChar(c); - } - - /** BaseChar - XMl Specification 1.0 [84] */ - private static final boolean Letter(char c) { - return BaseChar(c) || Ideographic(c); - } - - /** Elements of BaseChar that exist in ASCII. */ - private static final boolean BaseCharAscii(char c) { - return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A'); - } - - /** Char - XML Specification 1.0 [2] */ - private static final boolean Char(char c) { - // u000A == r and u000D == n, but the javac compiler can't handle the \ u form - return c == '\u0009' || c == '\r' || c == '\n' - || (c >= '\u0020' && c <= '\uD7FF') - || (c >= '\uE000' && c <= '\uFFFD'); - } - - /** BaseChar - XML Specification 1.0 [85] */ - private static final boolean BaseChar(char c) { - return BaseCharAscii(c) || (c >= '\u00C0' && c <= '\u00D6') - || (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131') - || (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E') - || (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5') - || (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1') - || (c == '\u0386') || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C') - || (c >= '\u038E' && c <= '\u03A1') || (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6') - || (c == '\u03DA') || (c == '\u03DC') || (c == '\u03DE') - || (c == '\u03E0') - || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F') - || (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4') - || (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB') - || (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556') - || (c == '\u0559') - || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2') - || (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7') - || (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3') - || (c == '\u06D5') - || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939') - || (c == '\u093D') - || (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990') - || (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0') - || (c == '\u09B2') - || (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DF' && c <= '\u09E1') || (c >= '\u09F0' && c <= '\u09F1') - || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || (c >= '\u0A13' && c <= '\u0A28') - || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || (c >= '\u0A35' && c <= '\u0A36') - || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C') - || (c == '\u0A5E') - || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B') - || (c == '\u0A8D') - || (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0') - || (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9') - || (c == '\u0ABD') - || (c == '\u0AE0') - || (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28') - || (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39') - || (c == '\u0B3D') - || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || (c >= '\u0B85' && c <= '\u0B8A') - || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || (c >= '\u0B99' && c <= '\u0B9A') - || (c == '\u0B9C') - || (c >= '\u0B9E' && c <= '\u0B9F') || (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA') - || (c >= '\u0BAE' && c <= '\u0BB5') || (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C') - || (c >= '\u0C0E' && c <= '\u0C10') || (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33') - || (c >= '\u0C35' && c <= '\u0C39') || (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C') - || (c >= '\u0C8E' && c <= '\u0C90') || (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3') - || (c >= '\u0CB5' && c <= '\u0CB9') - || (c == '\u0CDE') - || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || (c >= '\u0D0E' && c <= '\u0D10') - || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || (c >= '\u0D60' && c <= '\u0D61') - || (c >= '\u0E01' && c <= '\u0E2E') - || (c == '\u0E30') - || (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82') - || (c == '\u0E84') - || (c >= '\u0E87' && c <= '\u0E88') - || (c == '\u0E8A') - || (c == '\u0E8D') - || (c >= '\u0E94' && c <= '\u0E97') || (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3') - || (c == '\u0EA5') - || (c == '\u0EA7') - || (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE') - || (c == '\u0EB0') - || (c >= '\u0EB2' && c <= '\u0EB3') - || (c == '\u0EBD') - || (c >= '\u0EC0' && c <= '\u0EC4') || (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69') - || (c >= '\u10A0' && c <= '\u10C5') || (c >= '\u10D0' && c <= '\u10F6') - || (c == '\u1100') - || (c >= '\u1102' && c <= '\u1103') || (c >= '\u1105' && c <= '\u1107') - || (c == '\u1109') - || (c >= '\u110B' && c <= '\u110C') || (c >= '\u110E' && c <= '\u1112') - || (c == '\u113C') - || (c == '\u113E') - || (c == '\u1140') - || (c == '\u114C') - || (c == '\u114E') - || (c == '\u1150') - || (c >= '\u1154' && c <= '\u1155') - || (c == '\u1159') - || (c >= '\u115F' && c <= '\u1161') - || (c == '\u1163') - || (c == '\u1165') - || (c == '\u1167') - || (c == '\u1169') - || (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173') - || (c == '\u1175') - || (c == '\u119E') - || (c == '\u11A8') - || (c == '\u11AB') - || (c >= '\u11AE' && c <= '\u11AF') || (c >= '\u11B7' && c <= '\u11B8') - || (c == '\u11BA') - || (c >= '\u11BC' && c <= '\u11C2') - || (c == '\u11EB') - || (c == '\u11F0') - || (c == '\u11F9') - || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || (c >= '\u1F00' && c <= '\u1F15') - || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || (c >= '\u1F48' && c <= '\u1F4D') - || (c >= '\u1F50' && c <= '\u1F57') - || (c == '\u1F59') - || (c == '\u1F5B') - || (c == '\u1F5D') - || (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC') - || (c == '\u1FBE') - || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || (c >= '\u1FD0' && c <= '\u1FD3') - || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || (c >= '\u1FF2' && c <= '\u1FF4') - || (c >= '\u1FF6' && c <= '\u1FFC') - || (c == '\u2126') - || (c >= '\u212A' && c <= '\u212B') - || (c == '\u212E') - || (c >= '\u2180' && c <= '\u2182') || (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA') - || (c >= '\u3105' && c <= '\u312C') || (c >= '\uAC00' && c <= '\uD7A3'); - } - - /** BaseChar - XMl Specification 1.0 [86] */ - private static final boolean Ideographic(char c) { - return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029'); - } - - /** CombiningChar - XMl Specification 1.0 [87] */ - /*private static final boolean CombiningChar(char c) { - return (c >= '\u0300' && c <= '\u0345') - || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || (c >= '\u0591' && c <= '\u05A1') - || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD') - || (c == '\u05BF') - || (c >= '\u05C1' && c <= '\u05C2') - || (c == '\u05C4') - || (c >= '\u064B' && c <= '\u0652') - || (c == '\u0670') - || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4') - || (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903') - || (c == '\u093C') - || (c >= '\u093E' && c <= '\u094C') - || (c == '\u094D') - || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || (c >= '\u0981' && c <= '\u0983') - || (c == '\u09BC') - || (c == '\u09BE') - || (c == '\u09BF') - || (c >= '\u09C0' && c <= '\u09C4') || (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD') - || (c == '\u09D7') - || (c >= '\u09E2' && c <= '\u09E3') - || (c == '\u0A02') - || (c == '\u0A3C') - || (c == '\u0A3E') - || (c == '\u0A3F') - || (c >= '\u0A40' && c <= '\u0A42') || (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D') - || (c >= '\u0A70' && c <= '\u0A71') || (c >= '\u0A81' && c <= '\u0A83') - || (c == '\u0ABC') - || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || (c >= '\u0ACB' && c <= '\u0ACD') - || (c >= '\u0B01' && c <= '\u0B03') - || (c == '\u0B3C') - || (c >= '\u0B3E' && c <= '\u0B43') || (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D') - || (c >= '\u0B56' && c <= '\u0B57') || (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2') - || (c >= '\u0BC6' && c <= '\u0BC8') || (c >= '\u0BCA' && c <= '\u0BCD') - || (c == '\u0BD7') - || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || (c >= '\u0C46' && c <= '\u0C48') - || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || (c >= '\u0C82' && c <= '\u0C83') - || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || (c >= '\u0CCA' && c <= '\u0CCD') - || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || (c >= '\u0D3E' && c <= '\u0D43') - || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D') - || (c == '\u0D57') - || (c == '\u0E31') - || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E') - || (c == '\u0EB1') - || (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD') - || (c >= '\u0F18' && c <= '\u0F19') - || (c == '\u0F35') - || (c == '\u0F37') - || (c == '\u0F39') - || (c == '\u0F3E') - || (c == '\u0F3F') - || (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95') - || (c == '\u0F97') - || (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7') - || (c == '\u0FB9') - || (c >= '\u20D0' && c <= '\u20DC') - || (c == '\u20E1') - || (c >= '\u302A' && c <= '\u302F') - || (c == '\u3099') - || (c == '\u309A'); - }*/ - - /** Digit - XMl Specification 1.0 [88] */ - private static final boolean Digit(char c) { - return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9') - || (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F') - || (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF') - || (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F') - || (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29'); - } - - /** Extender - XMl Specification 1.0 [89] */ - private static final boolean Extender(char c) { - return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387' - || c == '\u0640' || c == '\u0E46' || c == '\u0EC6' || c == '\u3005' - || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE'); - } - - /** Whitespace - XML Specification 1.0 [3] */ - private static final boolean S(char c) { - return c == '\u0020' || c == '\u0009' || c == '\r' || c == '\n'; - } -}