+++ /dev/null
-// Copyright (C) 2003 Adam Megacz <adam@xwt.org> all rights reserved.
-//
-// You may modify, copy, and redistribute this code under the terms of
-// the GNU Library Public License version 2.1, with the exception of
-// the portion of clause 6a after the semicolon (aka the "obnoxious
-// relink clause")
-
-package org.xwt.util;
-
-import java.io.Reader;
-import java.io.IOException;
-import java.io.EOFException;
-
-/**
- * An Event-Driving, Non-Validating XML Parser with Namespace support.
- *
- * A subclass can implement the abstract functions for receiving details
- * about an xml file as it is parsed. To initate a parse, use the parse()
- * function.
- *
- * <h3>Implementation Notes</h3>
- * <p>As the parser traverses into an element, it adds it to the linked list
- * called <tt>elements</tt>. However, <tt>elements</tt> has been pre-filled
- * with instances of the Element inner class. So in the vast majority of
- * cases, the pointer current is moved along one, and the values for the
- * new element are filled into the current object.</p>
- *
- * <p>This parser supports all the unicode ranges required by the XML
- * Specification. However, it is optimised for well-formed ASCII documents.
- * Documents containing unicode Names and Attributes will take much longer
- * to process, and invalid documents (badly formed Names or invalid attributes)
- * will be run through a test on every single unicode character range before
- * being declared invalid.</p>
- *
- * <ul>
- * <li>Each time the buffer offset <tt>off</tt> is moved, the length
- * <tt>len</tt> must be decreased.</li>
- * <li>Each time the buffer length is decreased, it must be checked to make
- * sure it is >0.</li>
- * <li><i>error</i> is defined as a Validity Constraint Violation and
- * is recoverable</li>
- * <li><i>fatal error</i> is defined as a Well-formedness Constraint
- * Violation and is not recoverable</li>
- * </ul>
- *
- * @author David Crawshaw
- * @see <a href="http://w3.org/TR/REC-xml">XML Specification</a>
- * @see <a href="http://w3.org/TR/REC-xml-names">XML Namespaces</a>
- */
-public abstract class XML
-{
- /////////////////////////////////////////////////////////////////////////////////////////////
- // XML Parser
- /////////////////////////////////////////////////////////////////////////////////////////////
-
- public static final int BUFFER_SIZE = 255;
-
- /** static pool of XML.Element instances shared by all XML Parsers. */
- private static final Queue elements = new Queue(30);
-
- private static final char[] single_amp = new char[] { '&' };
- private static final char[] single_apos = new char[] { '\'' };
- private static final char[] single_gt = new char[] { '>' };
- private static final char[] single_lt = new char[] { '<' };
- private static final char[] single_quot = new char[] { '"' };
-
- private int line;
- private int col;
-
- private Reader in;
- private char[] buf;
- private int off;
- private int len;
-
- private Element current;
-
- // used in readEntity() to process a single character without creating a new array
- private char[] singlechar = new char[1];
-
-
- public XML() { this(BUFFER_SIZE); }
-
- public XML(int bSize) {
- buf = new char[bSize];
-
- current = (Element)elements.remove(false);
- if (current == null) current = new Element();
- }
-
-
- /** Returns the line number at the beginning of the last process call. */
- public int getLine() { return line; }
-
- /** Returns the column number at the beginning of the last process call. */
- public int getCol() { return col; }
-
- /**
- * Parse given input and call the abstract event functions.
- *
- * Careful with threading, as this function is not synchronized.
- */
- public final void parse(Reader reader) throws IOException, Exn {
- in = reader;
- off = len = 0;
- line = col = 1;
-
- clear(); // clean up possible mid-way linked-list element
-
- try {
- // process the stream
- while (true) {
- if (!buffer(1)) {
- if (current.qName == null) break;
- throw new Exn("reached eof without closing <"+current.qName+"> element", Exn.WFC, getLine(), getCol());
- }
-
- if (buf[off] == '<') readTag();
- readChars(current.qName != null);
- }
- } finally { clear(); } // clean up elements
- }
-
- /** remove any leftover elements from the linked list and queue them */
- private final void clear() {
- for (Element last = current; current.parent != null; ) {
- current = current.parent;
- last.clear();
- elements.append(last);
- }
- current.clear();
- }
-
- /** reads in a tag. expects <tt>buf[off] == '<'</tt> */
- private final void readTag() throws IOException, Exn {
- // Start Tag '<' Name (S Attribute)* S? '>'
- boolean starttag = true;
-
- // End Tag '</' Name S? '>'
- boolean endtag = false;
-
- // if (starttag & endtag) then: EmptyElemTag '<' Name (S Attribute)* S? '/>'
-
- // Position in the name of the ':' namespace prefix
- int prefix = -1;
-
- int namelen = 0;
-
- col++; off++; len--;
- if (!buffer(1)) throw new EOFException("Unexpected EOF processing element tag");
-
- // work out what we can from the beginning of the tag
- char s = buf[off];
- if (s == '!') {
- // definitions here don't necessarily conform to xml spec (as DTDs not yet implemented)
- col++; off++; len--;
- if (!buffer(4)) throw new EOFException("Unexpected EOF processing <! element");
-
- boolean bad = false;
- switch (buf[off]) {
- case '-':
- if (buf[off+1] != '-') { bad = true; break; }
- col += 2; off += 2; len -= 2;
-
- // Comment '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
- readChars(false, "-->", false);
- col += 3; off += 3; len -= 3;
- break;
-
- // we don't care about the following definitions
-
- case 'A':
- if (!buffer(7)
- || buf[off+1] != 'T' || buf[off+2] != 'T' || buf[off+3] != 'L'
- || buf[off+4] != 'I' || buf[off+5] != 'S' || buf[off+6] != 'T') {
- bad = true; break;
- }
- col += 7; off += 7; len -= 7;
-
- // ATTLIST '<!ATTLIST' (Char* - '>') '>'
- readChars(false, ">", true);
- col++; off++; len--;
- break;
- case 'D':
- if (!buffer(7)
- || buf[off+1] != 'O' || buf[off+2] != 'C' || buf[off+3] != 'T'
- || buf[off+4] != 'Y' || buf[off+5] != 'P' || buf[off+6] != 'E') {
- bad = true; break;
- }
- col += 7; off += 7; len -= 7;
-
- // DTD '<!DOCTYPE' (Char* - '>') '>'
- readChars(false, ">", true);
- col++; off++; len--;
- break;
- case 'E':
- if (!buffer(7)) {
- bad = true;
- } else if (buf[off+1] == 'L' && buf[off+2] == 'E' && buf[off+3] == 'M'
- && buf[off+4] == 'E' && buf[off+5] == 'N' && buf[off+6] == 'T') {
- // ELEMENT '<!ELEMENT' (Char* - '>') '>'
- readChars(false, ">", true);
- col++; off++; len--;
-
- } else if (buf[off+1] == 'N' && buf[off+2] == 'T' && buf[off+3] == 'I'
- && buf[off+4] == 'T' && buf[off+5] == 'Y') {
- // ENTITY '<!ENTITY' (Char* - '>') '>'
- readChars(false, ">", true);
- col++; off++; len--;
-
- } else {
- bad = true;
- }
- break;
-
- case 'N':
- if (!buffer(8)
- || buf[off+1] != 'O' || buf[off+2] != 'T' || buf[off+3] != 'A' || buf[off+4] != 'T'
- || buf[off+5] != 'I' || buf[off+6] != 'O' || buf[off+7] != 'N') {
- bad = true; break;
- }
- col += 8; off += 8; len -= 8;
- // NOTATION '<!NOTATION' (Char* - '>') '>'
- readChars(false, ">", true);
- col++; off++; len--;
-
- break;
- default: bad = true;
- }
-
- if (bad) throw new Exn("element tag start character is invalid", Exn.MARKUP, getLine(), getCol());
-
- } else if (s == '?') {
- // PI (Ignored) '<?' (Char* - (Char* '?>' Char*)) '?>'
- col++; off++; len--;
- readChars(false, "?>", true);
- if (!buffer(2)) throw new EOFException("Unexpected EOF at end of Processing Instruction");
- col += 2; off += 2; len -= 2;
-
- } else if (s == '[') {
- if (!buffer(7)
- || buf[off+1] != 'C' || buf[off+2] != 'D' || buf[off+3] != 'A'
- || buf[off+4] != 'T' || buf[off+5] != 'A' || buf[off+6] != '[') {
- col++; off--; len++;
- // Conditional '<![' (Char* - (Char* ']]>' Char*)) ']]>'
- readChars(false, "]]>", false);
- } else {
- col += 7; off += 7; len -=7;
- // CDATA '<![CDATA[' (Char* - (Char* ']]>' Char*)) ']]>'
- readChars(true, "]]>", false);
- }
- col += 3; off += 3; len -= 3;
- } else {
- if (s == '/') {
- // End Tag '</' Name S? '>'
- starttag = false;
- endtag = true;
-
- col++; off++; len--;
- if (!buffer(1)) throw new EOFException("Unexpected EOF processing end tag");
- s = buf[off];
- }
-
- if (!Name(s)) throw new Exn("invalid starting character in element name", Exn.MARKUP, getLine(), getCol());
-
- // find the element name (defined in XML Spec: section 2.3)
- for (namelen = 0; ; namelen++) {
- if (!buffer(namelen+1)) throw new EOFException("Unexpected EOF in element tag name");
-
- s = buf[off+namelen];
-
- if (S(s) || s == '>') {
- break;
- } else if (s == '/') {
- endtag = true;
- break;
- } else if (s == ':' && namelen > 0 && prefix < 1) {
- // we have a definition of the prefix range available
- prefix = namelen;
- } else if (!NameChar(s)) {
- throw new Exn("element name contains invalid character", Exn.MARKUP, getLine(), getCol());
- }
- }
-
- // process name (based on calculated region)
- if (namelen < 1) throw new Exn("element name is null", Exn.MARKUP, getLine(), getCol());
-
- // we have marked out the name region, so turn it into a string and move on
- String qName = new String(buf, off, namelen);
-
- col += namelen; off += namelen; len -= namelen;
-
- if (starttag) {
- // create the in-memory element representation of this beast
- // if current.qName == null then this is the root element we're dealing with
- if (current.qName != null) {
- Element next = (Element)elements.remove(false);
- if (next == null) next = new Element();
- //next.clear(); // TODO: remove as elements now checked as they're added to the queue
- next.parent = current;
- current = next;
- }
-
- current.qName = qName;
-
- if (prefix > 0) {
- current.prefix = current.qName.substring(0, prefix);
- current.localName = current.qName.substring(prefix+1);
- } else {
- current.prefix = null;
- current.localName = current.qName;
- }
-
- // process attributes
- readWhitespace();
- if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 1");
- while (buf[off] != '/' && buf[off] != '>') {
- readAttribute();
- if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 2");
- readWhitespace();
- }
-
- // work out the uri of this element
- current.uri = current.getUri(current.getPrefix());
- if (current.getUri().equals("") && current.getPrefix() != null)
- current.addError(new Exn("undefined prefix '"+current.getPrefix()+"'", Exn.NC, getLine(), getCol()));
-
- } else {
- // this is an end-of-element tag
- if (!qName.equals(current.getQName())) throw new Exn(
- "end tag </"+qName+"> does not line up with start tag <"+current.getQName()+">", Exn.WFC, getLine(), getCol()
- );
- }
-
- // deal with whitespace
- readWhitespace();
-
- // process tag close
- if (!buffer(1)) throw new EOFException("Unexpected EOF before end of tag");
- if (buf[off] == '/') {
- endtag = true;
- off++; len--; col++;
- }
- if (!buffer(1)) throw new EOFException("Unexpected EOF before end of endtag");
- if (buf[off] == '>') {
- off++; len--; col++;
- } else {
- throw new Exn("missing '>' character from element '"+qName+"'", Exn.MARKUP, getLine(), getCol());
- }
-
- // send element signals
- if (starttag) startElement(current);
- if (endtag) {
- endElement(current);
-
- // we just closed an element, so remove it from the element 'stack'
- if (current.getParent() == null) {
- // we just finished the root element
- current.clear();
- } else {
- Element last = current;
- current = current.parent;
- last.clear();
- elements.append(last);
- }
- }
- }
- }
-
- /** reads in an attribute of an element. expects Name(buf[off]) */
- private final void readAttribute() throws IOException, Exn {
- int ref = 0;
- int prefix = 0;
- String n, v, p, u; // attribute name, value, prefix and uri respectively
- n = v = p = u = null;
- char s;
-
- // find the element name (defined in XML Spec: section 2.3)
- for (ref= 0; ; ref++) {
- if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in read attribute loop part 1");
-
- s = buf[off+ref];
-
- if (s == '=' || S(s)) {
- break;
- } else if (s == ':' && ref > 0 && prefix < 1) {
- // we have a definition of the prefix range available
- prefix = ref+1;
- } else if (!NameChar(s)) {
- throw new Exn("attribute name contains invalid characters", Exn.MARKUP, getLine(), getCol());
- }
- }
-
- // determine prefix and key name
- if (prefix > 0) {
- p = new String(buf, off, prefix-1);
- col += prefix; off += prefix; len -= prefix; ref -= prefix;
- }
- n = new String(buf, off, ref);
- col += ref; off += ref; len -= ref;
-
- // find name/value divider ('=')
- readWhitespace();
- if (!buffer(1)) throw new EOFException("Unexpected EOF before attribute '=' divider");
- if (buf[off] != '=') throw new Exn("attribute name not followed by '=' sign", Exn.MARKUP, getLine(), getCol());
-
- col++; off++; len--;
- readWhitespace();
-
- if (!buffer(1)) throw new EOFException("Unexpected EOF after attribute '=' divider");
-
- char wrap;
- if (buf[off] == '\'' || buf[off] == '"') {
- wrap = buf[off];
- } else {
- throw new Exn("attribute '"+n+"' must have attribute wrapped in ' or \"", Exn.MARKUP, getLine(), getCol());
- }
- col++; off++; len--;
-
- // find the attribute value
- attval: for (ref = 0; ; ref++) {
- if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in attribute value");
-
- if (buf[off+ref] == wrap) {
- break attval;
- } else if (buf[off+ref] == '<') {
- throw new Exn("attribute value for '"+n+"' must not contain '<'", Exn.WFC, getLine(), getCol());
- }
- }
-
- v = new String(buf, off, ref);
- col += ref; off += ref; len -= ref;
-
- // remove end wrapper character
- col++; off++; len--;
-
- // process attribute
- if (p != null && p.equals("xmlns")) {
- current.addUri(n, v);
- } else if (n.equals("xmlns")) {
- if (current.getUri().equals("")) {
- current.addUri("", v);
- } else {
- current.addError(new Exn("default namespace definition repeated", Exn.NC, getLine(), getCol()));
- }
- } else {
- // find attribute uri
- u = current.getUri(p);
- if (p != null && u.equals("")) current.addError(new Exn("undefined attribute prefix '"+p+"'", Exn.NC, getLine(), getCol()));
-
- // check to see if attribute is a repeat
- for (int i=0; current.len > i; i++) if (n.equals(current.getAttrKey(i)) && u.equals(current.getAttrUri(i))) throw new Exn(
- "attribute name '"+n+"' may not appear more than once in the same element tag", Exn.WFC, getLine(), getCol()
- );
-
- current.addAttr(n, v, u);
- }
- }
-
- /** reads an entity and processes out its value. expects buf[off] == '&' */
- private final void readEntity() throws IOException, Exn {
- off++; len--;
- if (!buffer(2)) throw new EOFException("Unexpected EOF reading entity");
-
- boolean unknown = false;
- switch (buf[off]) {
- case '#':
- off++; len--;
-
- int radix;
- if (buf[off] == 'x') { off++; len--; radix = 16; } else { radix = 10; }
- int c = 0;
-
- // read in each char, then shift total value to the left and add the extra
- // style of loop is slightly different from all the others, as this should run a limited number of times
- findchar: while (true) {
- if (!buffer(1)) throw new EOFException("Unexpected EOF reading entity");
- int d = Character.digit(buf[off], radix);
- if (d == -1) {
- if (buf[off] != ';') throw new Exn("illegal characters in entity reference", Exn.WFC, getLine(), getCol());
- off++; len--; col++;
- break findchar;
- }
- c = (c * radix) + d;
-
- off++; len--;
- }
-
- singlechar[0] = Character.forDigit(c, radix);
- characters(singlechar, 0, 1);
- break;
-
- case 'a':
- if (buffer(4) && buf[off+1] == 'm' && buf[off+2] == 'p' && buf[off+3] == ';') {
- characters(single_amp, 0, 1); // &
- off += 4; len -= 4; col++;
- } else if (buffer(5) && buf[off+1] == 'p' && buf[off+2] == 'o' && buf[off+3] == 's' && buf[off+4] == ';') {
- characters(single_apos, 0, 1); // '
- off += 5; len -= 5; col++;
- } else {
- unknown = true;
- }
- break;
-
- case 'g':
- if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
- characters(single_gt, 0, 1); // >
- off += 3; len -= 3; col++;
- } else {
- unknown = true;
- }
- break;
-
- case 'l':
- if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
- characters(single_lt, 0, 1); // <
- off += 3; len -= 3; col++;
- } else {
- unknown = true;
- }
- break;
-
- case 'q':
- if (buffer(5) && buf[off+1] == 'u' && buf[off+2] == 'o' && buf[off+3] == 't' && buf[off+4] == ';') {
- characters(single_quot, 0, 1); // "
- off += 5; len -= 5; col++;
- } else {
- unknown = true;
- }
- break;
-
- // TODO: check a parser-level Hash of defined entities
- }
-
- if (unknown) throw new Exn("unknown entity (<!ENTITY> not supported)", Exn.WFC, getLine(), getCol());
- }
-
- /** reads until the passed string is encountered. */
- private final void readChars(boolean p, String match, boolean entities) throws IOException, Exn {
- int ref;
- char[] end = match.toCharArray();
-
- for (boolean more = true; more;) {
- if (!buffer(1)) return;
-
- buf: for (ref = 0; ref < len; ref++) {
- switch (buf[off+ref]) {
- case '\r': // windows or macos9 newline
- // normalise and process
- buf[off+ref] = '\n'; ref++;
- if (p) characters(buf, off, ref);
- off += ref; len -= ref; ref = -1;
- line++; col = 1;
-
- // windows double-char newline; skip the next char
- if (!buffer(1)) return;
- if (buf[off] == '\n') { off++; len--; }
- break;
-
- case '\n': // unix newline
- ref++;
- if (p) characters(buf, off, ref);
- off += ref; len -= ref; ref = -1;
- line++; col = 1;
- break;
-
- case '&': // entity
- if (entities) {
- if (p) {
- if (ref > 0) characters(buf, off, ref);
- off += ref; len -= ref; ref = -1;
- readEntity();
- }
- break;
- }
-
- default:
- if (!buffer(ref+end.length)) continue buf;
- for (int i=0; end.length > i; i++) if (end[i] != buf[off+ref+i]) continue buf;
- more = false;
- break buf;
- }
- }
-
- if (p && ref > 0) characters(buf, off, ref);
- off += ref; len -= ref; col += ref;
- }
- }
-
- /**
- * reads until a <tt><</tt> symbol is encountered
- * @param p If true call the characters(char[],int,int) funciton for the processed characters
- */
- private final void readChars(boolean p) throws IOException, Exn {
- int ref;
-
- for (boolean more = true; more;) {
- if (!buffer(1)) return;
-
- buf: for (ref = 0; ref < len; ref++) {
- switch (buf[off+ref]) {
- case '\r': // windows or macos9 newline
- // normalise and process
- buf[off+ref] = '\n'; ref++;
- if (p) characters(buf, off, ref);
- off += ref; len -= ref; ref = -1;
- line++; col = 1;
-
- // windows double-char newline; skip the next char
- if (!buffer(1)) return;
- if (buf[off] == '\n') { off++; len--; }
- break;
-
- case '\n': // unix newline
- ref++;
- if (p) characters(buf, off, ref);
- off += ref; len -= ref; ref = -1;
- line++; col = 1;
- break;
-
- case '&': // entity
- if (p) {
- if (ref > 0) characters(buf, off, ref);
- off += ref; len -= ref; ref = -1;
- readEntity();
- }
- break;
-
- case '<': // end of chars section
- more = false;
- break buf;
- }
- }
-
- if (p && ref > 0) characters(buf, off, ref);
- off += ref; len -= ref; col += ref;
- }
- }
-
- /** reads until a non-whitespace symbol is encountered */
- private final void readWhitespace() throws IOException, Exn {
- int ref;
-
- for (boolean more = true; more;) {
- if (!buffer(1)) return;
-
- buf: for (ref = 0; ref < len; ref++) {
- switch (buf[off+ref]) {
- case '\r': // windows or macos9 newline
- // normalise and process
- buf[off+ref] = '\n';
- whitespace(buf, off, ++ref);
- off += ref; len -= ref; ref = -1;
- line++; col = 1;
-
- // windows double-char newline; skip the next char
- if (!buffer(1)) return;
- if (buf[off] == '\n') { off++; len--; }
- break;
-
- case '\n': // unix newline
- whitespace(buf, off, ++ref);
- off += ref; len -= ref; ref = -1;
- line++; col = 1;
- break;
-
- case ' ': // space
- case '\t': // tab
- break;
-
- default: // end of whitespace
- more = false;
- break buf;
- }
- }
-
- off += ref; len -= ref; col += ref;
- }
- }
-
- /**
- * attempt to fill the buffer.
- *
- * @param min Minimum number of characters to read (even if we have to block to do it).
- * @return return false if min can't be reached.
- */
- private final boolean buffer(int min) throws IOException {
- if (len > min) return true;
-
- if (buf.length - (off+len) >= min) {
- // plenty of space left on the end of the buffer
- } else if (off >= min) {
- // moving offset data to start will leave enough free space on the end
- System.arraycopy(buf, off, buf, 0, len);
- off = 0;
- } else {
- // buffer size will have to be increased
- char[] newbuf = new char[buf.length * 2];
- System.arraycopy(buf, off, newbuf, 0, len);
- buf = newbuf;
- off = 0;
- }
-
- while (min > len) {
- int newlen = in.read(buf, off+len, buf.length-(off+len));
- if (newlen < 0) return false;
- len += newlen;
- }
-
- return true;
- }
-
-
- /////////////////////////////////////////////////////////////////////////////////////////////
- // Abstract SAX-Like Interface
- /////////////////////////////////////////////////////////////////////////////////////////////
-
- /**
- * Called when the start of an element is processed.
- *
- * <p><b>DO NOT</b> store a reference to the Element object, as
- * they are reused by XML Parser.</p>
- */
- public abstract void startElement(Element e) throws Exn;
-
- /**
- * Represents up to a line of character data.
- *
- * <p>Newlines are all normalised to the Unix \n as per the XML Spec,
- * and a newline will only appear as the last character in the passed
- * array segment.</p>
- *
- * <p>XML.getLine() and XML.getCol() report the position at the
- * beginning of this character segment, which can be processed in a
- * line-by-line fashion due to the above newline restriction.</p>
- */
- public abstract void characters(char[] ch, int start, int length) throws Exn, IOException;
-
- /** Represents up to a line of ignorable whitespace. */
- public abstract void whitespace(char[] ch, int start, int length) throws Exn, IOException;
-
- /** Represents the end of an Element. */
- public abstract void endElement(Element e) throws Exn, IOException;
-
-
- /////////////////////////////////////////////////////////////////////////////////////////////
- // Inner Classes for Parser Support
- /////////////////////////////////////////////////////////////////////////////////////////////
-
- /**
- * Represents an element in an XML document. Stores a reference to its
- * parent, forming a one-way linked list.
- *
- * Element objects are reused, so client code making use of them must
- * drop their references after the specific element process function
- * has returned.
- */
- public static final class Element {
-
- private static final int DEFAULT_ATTR_SIZE = 10;
-
- protected Element parent = null;
-
- protected String uri = null;
- protected String localName = null;
- protected String qName = null;
- protected String prefix = null;
-
- protected Hash urimap = new Hash(3,3);
-
- protected String[] keys = new String[DEFAULT_ATTR_SIZE];
- protected String[] vals = new String[DEFAULT_ATTR_SIZE];
- protected String[] uris = new String[DEFAULT_ATTR_SIZE];
- protected int len = 0;
-
- protected Exn[] errors = new Exn[] {};
-
-
- /** Parent of current element. */
- public Element getParent() { return parent; }
-
- /** Qualified Name of current element. XML Namespace Spec 14-Jan-1999 [6] */
- public String getQName() { return qName; }
-
- /** LocalPart of current element. XML Namespace Spec 14-Jan-1999 [8] */
- public String getLocalName() { return localName; }
-
- /** Prefix of current element. Substring of qName. XML Namespace Spec 14-Jan-1999 [7] */
- public String getPrefix() { return prefix; }
-
- public Hash getUriMap() { return urimap; } // HACK
-
- /** URI of current tag. XML Namespace Spec 14-Jan-1999 section 1 */
- public String getUri() { return getUri(prefix); }
-
- /** URI of a given prefix. Never returns null, instead gives "". */
- public String getUri(String p) {
- String ret = null;
- for (Element e = this; e != null && ret == null; e = e.getParent()) {
- ret = (String)e.urimap.get(p == null ? "" : p);
- }
- return ret == null ? "" : ret;
- }
-
- /** An array of attribute names. */
- public String getAttrKey(int pos) { return len > pos ? keys[pos] : null; }
-
- /** An array of attribute values. */
- public String getAttrVal(int pos) { return len > pos ? vals[pos] : null; }
-
- /** An array of attribute uris. */
- public String getAttrUri(int pos) { return len > pos ? uris[pos] : null; }
-
- /** Current number of attributes in the element. */
- public int getAttrLen() { return len; }
-
- /** An array of non-fatal errors related to this element. */
- public Exn[] getErrors() { return errors; }
-
-
- protected Element() { }
-
- /** Add (replace if exists in current element) a Namespace prefix/uri map. */
- protected void addUri(String name, String value) {
- urimap.put(name, value);
- }
-
- /** Add an attribute. */
- protected void addAttr(String key, String val, String uri) {
- if (len == keys.length) {
- // increase the size of the attributes arrays
- String[] newkeys = new String[keys.length*2];
- String[] newvals = new String[vals.length*2];
- String[] newuris = new String[uris.length*2];
- System.arraycopy(keys, 0, newkeys, 0, keys.length);
- System.arraycopy(vals, 0, newvals, 0, vals.length);
- System.arraycopy(uris, 0, newuris, 0, uris.length);
- keys = newkeys; vals = newvals; uris = newuris;
- }
-
- keys[len] = key;
- vals[len] = val;
- uris[len] = uri;
- len++;
- }
-
- /** Add an error. */
- protected void addError(Exn e) {
- // it doesn't really matter about continually expanding the array, as this case is quite rare
- Exn[] newe = new Exn[errors.length+1];
- System.arraycopy(errors, 0, newe, 0, errors.length);
- newe[errors.length] = e;
- errors = newe;
- }
-
- /** Empty out all the data from the Element. */
- protected void clear() {
- parent = null;
- uri = localName = qName = prefix = null;
- urimap.clear();
-
- if (keys.length != vals.length || vals.length != uris.length) {
- keys = new String[DEFAULT_ATTR_SIZE];
- vals = new String[DEFAULT_ATTR_SIZE];
- uris = new String[DEFAULT_ATTR_SIZE];
- } else {
- for (int i=0; keys.length > i; i++) { keys[i] = null; vals[i] = null; uris[i] = null; };
- }
- len = 0;
-
- errors = new Exn[] {};
- }
- }
-
- /** Parse or Structural Error */
- public static class Exn extends Exception {
- /** Violation of Markup restrictions in XML Specification - Fatal Error */
- public static final int MARKUP = 1;
-
- /** Well-Formedness Constraint Violation - Fatal Error */
- public static final int WFC = 2;
-
- /** Namespace Constraint Violation - Recoverable Error */
- public static final int NC = 3;
-
- /** Schema Violation - Fatal Error */
- public static final int SCHEMA = 4;
-
- private String error;
- private int type;
- private int line;
- private int col;
-
- public Exn(String e) { this(e, MARKUP, -1, -1); }
-
- public Exn(String e, int type, int line, int col) {
- this.error = e;
- this.type = type;
- this.line = line;
- this.col = col;
- }
-
- public int getType() { return this.type; }
- public int getLine() { return this.line; }
- public int getCol() { return this.col; }
- public String getMessage() { return this.error + (line >= 0 && col >= 0 ? " at " + line + ":" + col: ""); }
- }
-
-
- /////////////////////////////////////////////////////////////////////////////////////////////
- // Static Support Functions for the XML Specification
- /////////////////////////////////////////////////////////////////////////////////////////////
-
- // attempt to avoid these functions unless you *expect* the input to fall in the given range.
-
- /** First Character of Name - XML Specification 1.0 [5] */
- private static final boolean Name(char c) {
- return BaseCharAscii(c) || c == '_' || c == ':' || Letter(c);
- }
-
- /** NameChar - XML Specification 1.0 [4] */
- private static final boolean NameChar(char c) {
- return BaseCharAscii(c) || c == '.' || c == '-' || c == '_' || c == ':'
- || Digit(c) || Letter(c) || Extender(c); // TODO: || CombiningChar(c);
- }
-
- /** BaseChar - XMl Specification 1.0 [84] */
- private static final boolean Letter(char c) {
- return BaseChar(c) || Ideographic(c);
- }
-
- /** Elements of BaseChar that exist in ASCII. */
- private static final boolean BaseCharAscii(char c) {
- return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A');
- }
-
- /** Char - XML Specification 1.0 [2] */
- private static final boolean Char(char c) {
- // u000A == r and u000D == n, but the javac compiler can't handle the \ u form
- return c == '\u0009' || c == '\r' || c == '\n'
- || (c >= '\u0020' && c <= '\uD7FF')
- || (c >= '\uE000' && c <= '\uFFFD');
- }
-
- /** BaseChar - XML Specification 1.0 [85] */
- private static final boolean BaseChar(char c) {
- return BaseCharAscii(c) || (c >= '\u00C0' && c <= '\u00D6')
- || (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131')
- || (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E')
- || (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5')
- || (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1')
- || (c == '\u0386') || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C')
- || (c >= '\u038E' && c <= '\u03A1') || (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6')
- || (c == '\u03DA') || (c == '\u03DC') || (c == '\u03DE')
- || (c == '\u03E0')
- || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F')
- || (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4')
- || (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB')
- || (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556')
- || (c == '\u0559')
- || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2')
- || (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7')
- || (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3')
- || (c == '\u06D5')
- || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939')
- || (c == '\u093D')
- || (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990')
- || (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0')
- || (c == '\u09B2')
- || (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DF' && c <= '\u09E1') || (c >= '\u09F0' && c <= '\u09F1')
- || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || (c >= '\u0A13' && c <= '\u0A28')
- || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || (c >= '\u0A35' && c <= '\u0A36')
- || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C')
- || (c == '\u0A5E')
- || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B')
- || (c == '\u0A8D')
- || (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0')
- || (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9')
- || (c == '\u0ABD')
- || (c == '\u0AE0')
- || (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28')
- || (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39')
- || (c == '\u0B3D')
- || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || (c >= '\u0B85' && c <= '\u0B8A')
- || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || (c >= '\u0B99' && c <= '\u0B9A')
- || (c == '\u0B9C')
- || (c >= '\u0B9E' && c <= '\u0B9F') || (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA')
- || (c >= '\u0BAE' && c <= '\u0BB5') || (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C')
- || (c >= '\u0C0E' && c <= '\u0C10') || (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33')
- || (c >= '\u0C35' && c <= '\u0C39') || (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C')
- || (c >= '\u0C8E' && c <= '\u0C90') || (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3')
- || (c >= '\u0CB5' && c <= '\u0CB9')
- || (c == '\u0CDE')
- || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || (c >= '\u0D0E' && c <= '\u0D10')
- || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || (c >= '\u0D60' && c <= '\u0D61')
- || (c >= '\u0E01' && c <= '\u0E2E')
- || (c == '\u0E30')
- || (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82')
- || (c == '\u0E84')
- || (c >= '\u0E87' && c <= '\u0E88')
- || (c == '\u0E8A')
- || (c == '\u0E8D')
- || (c >= '\u0E94' && c <= '\u0E97') || (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3')
- || (c == '\u0EA5')
- || (c == '\u0EA7')
- || (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE')
- || (c == '\u0EB0')
- || (c >= '\u0EB2' && c <= '\u0EB3')
- || (c == '\u0EBD')
- || (c >= '\u0EC0' && c <= '\u0EC4') || (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69')
- || (c >= '\u10A0' && c <= '\u10C5') || (c >= '\u10D0' && c <= '\u10F6')
- || (c == '\u1100')
- || (c >= '\u1102' && c <= '\u1103') || (c >= '\u1105' && c <= '\u1107')
- || (c == '\u1109')
- || (c >= '\u110B' && c <= '\u110C') || (c >= '\u110E' && c <= '\u1112')
- || (c == '\u113C')
- || (c == '\u113E')
- || (c == '\u1140')
- || (c == '\u114C')
- || (c == '\u114E')
- || (c == '\u1150')
- || (c >= '\u1154' && c <= '\u1155')
- || (c == '\u1159')
- || (c >= '\u115F' && c <= '\u1161')
- || (c == '\u1163')
- || (c == '\u1165')
- || (c == '\u1167')
- || (c == '\u1169')
- || (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173')
- || (c == '\u1175')
- || (c == '\u119E')
- || (c == '\u11A8')
- || (c == '\u11AB')
- || (c >= '\u11AE' && c <= '\u11AF') || (c >= '\u11B7' && c <= '\u11B8')
- || (c == '\u11BA')
- || (c >= '\u11BC' && c <= '\u11C2')
- || (c == '\u11EB')
- || (c == '\u11F0')
- || (c == '\u11F9')
- || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || (c >= '\u1F00' && c <= '\u1F15')
- || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || (c >= '\u1F48' && c <= '\u1F4D')
- || (c >= '\u1F50' && c <= '\u1F57')
- || (c == '\u1F59')
- || (c == '\u1F5B')
- || (c == '\u1F5D')
- || (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC')
- || (c == '\u1FBE')
- || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || (c >= '\u1FD0' && c <= '\u1FD3')
- || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || (c >= '\u1FF2' && c <= '\u1FF4')
- || (c >= '\u1FF6' && c <= '\u1FFC')
- || (c == '\u2126')
- || (c >= '\u212A' && c <= '\u212B')
- || (c == '\u212E')
- || (c >= '\u2180' && c <= '\u2182') || (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA')
- || (c >= '\u3105' && c <= '\u312C') || (c >= '\uAC00' && c <= '\uD7A3');
- }
-
- /** BaseChar - XMl Specification 1.0 [86] */
- private static final boolean Ideographic(char c) {
- return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029');
- }
-
- /** CombiningChar - XMl Specification 1.0 [87] */
- /*private static final boolean CombiningChar(char c) {
- return (c >= '\u0300' && c <= '\u0345')
- || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || (c >= '\u0591' && c <= '\u05A1')
- || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD')
- || (c == '\u05BF')
- || (c >= '\u05C1' && c <= '\u05C2')
- || (c == '\u05C4')
- || (c >= '\u064B' && c <= '\u0652')
- || (c == '\u0670')
- || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4')
- || (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903')
- || (c == '\u093C')
- || (c >= '\u093E' && c <= '\u094C')
- || (c == '\u094D')
- || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || (c >= '\u0981' && c <= '\u0983')
- || (c == '\u09BC')
- || (c == '\u09BE')
- || (c == '\u09BF')
- || (c >= '\u09C0' && c <= '\u09C4') || (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD')
- || (c == '\u09D7')
- || (c >= '\u09E2' && c <= '\u09E3')
- || (c == '\u0A02')
- || (c == '\u0A3C')
- || (c == '\u0A3E')
- || (c == '\u0A3F')
- || (c >= '\u0A40' && c <= '\u0A42') || (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D')
- || (c >= '\u0A70' && c <= '\u0A71') || (c >= '\u0A81' && c <= '\u0A83')
- || (c == '\u0ABC')
- || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || (c >= '\u0ACB' && c <= '\u0ACD')
- || (c >= '\u0B01' && c <= '\u0B03')
- || (c == '\u0B3C')
- || (c >= '\u0B3E' && c <= '\u0B43') || (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D')
- || (c >= '\u0B56' && c <= '\u0B57') || (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2')
- || (c >= '\u0BC6' && c <= '\u0BC8') || (c >= '\u0BCA' && c <= '\u0BCD')
- || (c == '\u0BD7')
- || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || (c >= '\u0C46' && c <= '\u0C48')
- || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || (c >= '\u0C82' && c <= '\u0C83')
- || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || (c >= '\u0CCA' && c <= '\u0CCD')
- || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || (c >= '\u0D3E' && c <= '\u0D43')
- || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D')
- || (c == '\u0D57')
- || (c == '\u0E31')
- || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E')
- || (c == '\u0EB1')
- || (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD')
- || (c >= '\u0F18' && c <= '\u0F19')
- || (c == '\u0F35')
- || (c == '\u0F37')
- || (c == '\u0F39')
- || (c == '\u0F3E')
- || (c == '\u0F3F')
- || (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95')
- || (c == '\u0F97')
- || (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7')
- || (c == '\u0FB9')
- || (c >= '\u20D0' && c <= '\u20DC')
- || (c == '\u20E1')
- || (c >= '\u302A' && c <= '\u302F')
- || (c == '\u3099')
- || (c == '\u309A');
- }*/
-
- /** Digit - XMl Specification 1.0 [88] */
- private static final boolean Digit(char c) {
- return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9')
- || (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F')
- || (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF')
- || (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F')
- || (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29');
- }
-
- /** Extender - XMl Specification 1.0 [89] */
- private static final boolean Extender(char c) {
- return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387'
- || c == '\u0640' || c == '\u0E46' || c == '\u0EC6' || c == '\u3005'
- || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE');
- }
-
- /** Whitespace - XML Specification 1.0 [3] */
- private static final boolean S(char c) {
- return c == '\u0020' || c == '\u0009' || c == '\r' || c == '\n';
- }
-}