X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=src%2Forg%2Fxwt%2Futil%2FXML.java;h=3c9d31aa686d2407c5906ee9efc03672ae9b1d01;hb=163ff4c57ec557bb8615f80e875f045cdfbad696;hp=099acce3738f6302b9a8fa5878f606f1d2e2a09a;hpb=8b9fa0fd9b166e4451afb1e462a6c63b0e70728e;p=org.ibex.core.git diff --git a/src/org/xwt/util/XML.java b/src/org/xwt/util/XML.java index 099acce..3c9d31a 100644 --- a/src/org/xwt/util/XML.java +++ b/src/org/xwt/util/XML.java @@ -1,3 +1,10 @@ +// Copyright (C) 2003 Adam Megacz all rights reserved. +// +// You may modify, copy, and redistribute this code under the terms of +// the GNU Library Public License version 2.1, with the exception of +// the portion of clause 6a after the semicolon (aka the "obnoxious +// relink clause") + package org.xwt.util; import java.io.Reader; @@ -11,7 +18,7 @@ import java.io.EOFException; * about an xml file as it is parsed. To initate a parse, use the parse() * function. * - *

IMPLEMENTATION NOTES

+ *

Implementation Notes

*

As the parser traverses into an element, it adds it to the linked list * called elements. However, elements has been pre-filled * with instances of the Element inner class. So in the vast majority of @@ -25,22 +32,20 @@ import java.io.EOFException; * will be run through a test on every single unicode character range before * being declared invalid.

* - *

IMPLEMENTATION RULES

* - * - *

Other Notes

- * * * @author David Crawshaw - * @see XML-Specification-1.0 http://w3.org/TR/REC-xml + * @see XML Specification + * @see XML Namespaces */ public abstract class XML { @@ -50,8 +55,7 @@ public abstract class XML public static final int BUFFER_SIZE = 255; - /** static pool of XML.Element instances shared by all XML Parsers. - * elements in the queue have dirty prev and next references, that need cleaning before use. */ + /** static pool of XML.Element instances shared by all XML Parsers. */ private static final Queue elements = new Queue(30); private static final char[] single_amp = new char[] { '&' }; @@ -81,7 +85,6 @@ public abstract class XML current = (Element)elements.remove(false); if (current == null) current = new Element(); - current.prev = current.next = null; } @@ -96,36 +99,39 @@ public abstract class XML * * Careful with threading, as this function is not synchronized. */ - public final void parse(Reader reader) throws IOException, XMLException { + public final void parse(Reader reader) throws IOException, Exn { in = reader; off = len = 0; line = col = 1; - clean(); // clean up possible mid-way linked-list element + clear(); // clean up possible mid-way linked-list element try { // process the stream while (true) { if (!buffer(1)) { if (current.qName == null) break; - throw new WFCException("reached eof without closing <"+current.qName+"> element", getLine(), getCol()); + throw new Exn("reached eof without closing <"+current.qName+"> element", Exn.WFC, getLine(), getCol()); } if (buf[off] == '<') readTag(); readChars(current.qName != null); } - } finally { clean(); } // clean up elements + } finally { clear(); } // clean up elements } /** remove any leftover elements from the linked list and queue them */ - private final void clean() { - while (current.prev != null) elements.append((current = current.prev).next); - current.next = null; - current.qName = null; + private final void clear() { + for (Element last = current; current.parent != null; ) { + current = current.parent; + last.clear(); + elements.append(last); + } + current.clear(); } /** reads in a tag. expects buf[off] == '<' */ - private final void readTag() throws IOException, XMLException { + private final void readTag() throws IOException, Exn { // Start Tag '<' Name (S Attribute)* S? '>' boolean starttag = true; @@ -221,7 +227,7 @@ public abstract class XML default: bad = true; } - if (bad) throw new MarkupException("element tag start character is invalid", getLine(), getCol()); + if (bad) throw new Exn("element tag start character is invalid", Exn.MARKUP, getLine(), getCol()); } else if (s == '?') { // PI (Ignored) '' Char*)) '?>' @@ -254,7 +260,7 @@ public abstract class XML s = buf[off]; } - if (!Name(s)) throw new MarkupException("invalid starting character in element name", getLine(), getCol()); + if (!Name(s)) throw new Exn("invalid starting character in element name", Exn.MARKUP, getLine(), getCol()); // find the element name (defined in XML Spec: section 2.3) for (namelen = 0; ; namelen++) { @@ -271,12 +277,12 @@ public abstract class XML // we have a definition of the prefix range available prefix = namelen; } else if (!NameChar(s)) { - throw new MarkupException("element name contains invalid character", getLine(), getCol()); + throw new Exn("element name contains invalid character", Exn.MARKUP, getLine(), getCol()); } } // process name (based on calculated region) - if (namelen < 1) throw new MarkupException("element name is null", getLine(), getCol()); + if (namelen < 1) throw new Exn("element name is null", Exn.MARKUP, getLine(), getCol()); // we have marked out the name region, so turn it into a string and move on String qName = new String(buf, off, namelen); @@ -287,19 +293,14 @@ public abstract class XML // create the in-memory element representation of this beast // if current.qName == null then this is the root element we're dealing with if (current.qName != null) { - if (current.next == null) { - // we're at the end of the default element depth - current.next = (Element)elements.remove(false); - if (current.next == null) current.next = new Element(); - current.next.prev = current; - current.next.next = null; - } - current = current.next; + Element next = (Element)elements.remove(false); + if (next == null) next = new Element(); + //next.clear(); // TODO: remove as elements now checked as they're added to the queue + next.parent = current; + current = next; } - current.clear(); current.qName = qName; - current.defaultUri = current.uri = null; if (prefix > 0) { current.prefix = current.qName.substring(0, prefix); @@ -318,27 +319,15 @@ public abstract class XML readWhitespace(); } - // inherit namespace default uri if attribute was not provided - if (current.defaultUri == null) { - current.defaultUri = (current.prev != null) ? current.prev.defaultUri : null; - } - // work out the uri of this element - if (current.prefix == null) { - // element has no prefix, therefore is the default uri - current.uri = current.defaultUri; - } else { - // work back through the hashtables until uri is found - for (Element e = current; e != null && current.uri == null; e = e.prev) { - current.uri = (String)e.urimap.get(current.prefix); - } - if (current.uri == null) current.addError(new NCException("undefined prefix '"+current.prefix+"'", getLine(), getCol())); - } + current.uri = current.getUri(current.getPrefix()); + if (current.getUri().equals("") && current.getPrefix() != null) + current.addError(new Exn("undefined prefix '"+current.getPrefix()+"'", Exn.NC, getLine(), getCol())); } else { // this is an end-of-element tag - if (!qName.equals(current.qName)) throw new WFCException( - "end tag does not line up with start tag <"+current.qName+">", getLine(), getCol() + if (!qName.equals(current.getQName())) throw new Exn( + "end tag does not line up with start tag <"+current.getQName()+">", Exn.WFC, getLine(), getCol() ); } @@ -355,7 +344,7 @@ public abstract class XML if (buf[off] == '>') { off++; len--; col++; } else { - throw new MarkupException("missing '>' character from element '"+qName+"'", getLine(), getCol()); + throw new Exn("missing '>' character from element '"+qName+"'", Exn.MARKUP, getLine(), getCol()); } // send element signals @@ -364,19 +353,21 @@ public abstract class XML endElement(current); // we just closed an element, so remove it from the element 'stack' - if (current.prev == null) { + if (current.getParent() == null) { // we just finished the root element - current.qName = null; + current.clear(); } else { - elements.append((current = current.prev).next); - current.next = null; + Element last = current; + current = current.parent; + last.clear(); + elements.append(last); } } } } /** reads in an attribute of an element. expects Name(buf[off]) */ - private final void readAttribute() throws IOException, XMLException { + private final void readAttribute() throws IOException, Exn { int ref = 0; int prefix = 0; String n, v, p, u; // attribute name, value, prefix and uri respectively @@ -395,7 +386,7 @@ public abstract class XML // we have a definition of the prefix range available prefix = ref+1; } else if (!NameChar(s)) { - throw new MarkupException("attribute name contains invalid characters", getLine(), getCol()); + throw new Exn("attribute name contains invalid characters", Exn.MARKUP, getLine(), getCol()); } } @@ -410,7 +401,7 @@ public abstract class XML // find name/value divider ('=') readWhitespace(); if (!buffer(1)) throw new EOFException("Unexpected EOF before attribute '=' divider"); - if (buf[off] != '=') throw new MarkupException("attribute name not followed by '=' sign", getLine(), getCol()); + if (buf[off] != '=') throw new Exn("attribute name not followed by '=' sign", Exn.MARKUP, getLine(), getCol()); col++; off++; len--; readWhitespace(); @@ -421,7 +412,7 @@ public abstract class XML if (buf[off] == '\'' || buf[off] == '"') { wrap = buf[off]; } else { - throw new MarkupException("attribute '"+n+"' must have attribute wrapped in ' or \"", getLine(), getCol()); + throw new Exn("attribute '"+n+"' must have attribute wrapped in ' or \"", Exn.MARKUP, getLine(), getCol()); } col++; off++; len--; @@ -432,7 +423,7 @@ public abstract class XML if (buf[off+ref] == wrap) { break attval; } else if (buf[off+ref] == '<') { - throw new WFCException("attribute value for '"+n+"' must not contain '<'", getLine(), getCol()); + throw new Exn("attribute value for '"+n+"' must not contain '<'", Exn.WFC, getLine(), getCol()); } } @@ -444,40 +435,29 @@ public abstract class XML // process attribute if (p != null && p.equals("xmlns")) { - current.urimap.put(n, v); + current.addUri(n, v); } else if (n.equals("xmlns")) { - if (current.defaultUri != null) { - current.addError(new NCException("default namespace definition repeated", getLine(), getCol())); + if (current.getUri().equals("")) { + current.addUri("", v); } else { - current.defaultUri = v; + current.addError(new Exn("default namespace definition repeated", Exn.NC, getLine(), getCol())); } } else { + // find attribute uri + u = current.getUri(p); + if (p != null && u.equals("")) current.addError(new Exn("undefined attribute prefix '"+p+"'", Exn.NC, getLine(), getCol())); + // check to see if attribute is a repeat - for (int i=0; current.len > i; i++) if (n.equals(current.keys[i])) throw new WFCException( - "attribute name '"+n+"' may not appear more than once in the same element tag", getLine(), getCol() + for (int i=0; current.len > i; i++) if (n.equals(current.getAttrKey(i)) && u.equals(current.getAttrUri(i))) throw new Exn( + "attribute name '"+n+"' may not appear more than once in the same element tag", Exn.WFC, getLine(), getCol() ); - // find attribute uri - if (p == null) { - u = current.uri; - } else { - for (Element e = current; e != null && u == null; e = e.prev) { - u = (String)e.urimap.get(p); - } - if (u == null) current.addError(new NCException("undefined attribute prefix '"+current.prefix+"'", getLine(), getCol())); - } - - // add attribute to the attribute arrays - if (current.len == current.keys.length) current.morekeys(); - current.keys[current.len] = n; - current.vals[current.len] = v; - current.uris[current.len] = u; - current.len++; + current.addAttr(n, v, u); } } /** reads an entity and processes out its value. expects buf[off] == '&' */ - private final void readEntity() throws IOException, XMLException { + private final void readEntity() throws IOException, Exn { off++; len--; if (!buffer(2)) throw new EOFException("Unexpected EOF reading entity"); @@ -496,7 +476,7 @@ public abstract class XML if (!buffer(1)) throw new EOFException("Unexpected EOF reading entity"); int d = Character.digit(buf[off], radix); if (d == -1) { - if (buf[off] != ';') throw new WFCException("illegal characters in entity reference", getLine(), getCol()); + if (buf[off] != ';') throw new Exn("illegal characters in entity reference", Exn.WFC, getLine(), getCol()); off++; len--; col++; break findchar; } @@ -551,11 +531,11 @@ public abstract class XML // TODO: check a parser-level Hash of defined entities } - if (unknown) throw new WFCException("unknown entity ( not supported)", getLine(), getCol()); + if (unknown) throw new Exn("unknown entity ( not supported)", Exn.WFC, getLine(), getCol()); } /** reads until the passed string is encountered. */ - private final void readChars(boolean p, String match, boolean entities) throws IOException, XMLException { + private final void readChars(boolean p, String match, boolean entities) throws IOException, Exn { int ref; char[] end = match.toCharArray(); @@ -610,7 +590,7 @@ public abstract class XML * reads until a < symbol is encountered * @param p If true call the characters(char[],int,int) funciton for the processed characters */ - private final void readChars(boolean p) throws IOException, XMLException { + private final void readChars(boolean p) throws IOException, Exn { int ref; for (boolean more = true; more;) { @@ -657,7 +637,7 @@ public abstract class XML } /** reads until a non-whitespace symbol is encountered */ - private final void readWhitespace() throws IOException, XMLException { + private final void readWhitespace() throws IOException, Exn { int ref; for (boolean more = true; more;) { @@ -737,17 +717,13 @@ public abstract class XML /** * Called when the start of an element is processed. * - *

The array of Attribute names and values may be longer than the - * number of entries they contain, but all the entries will be - * packed at the top.

- * - *

DO NOT store a reference to the attribute arrays, as - * they are reused by other elements.

+ *

DO NOT store a reference to the Element object, as + * they are reused by XML Parser.

*/ - public abstract void startElement(Element e) throws SchemaException; + public abstract void startElement(Element e) throws Exn; /** - * Represents a line of character data. + * Represents up to a line of character data. * *

Newlines are all normalised to the Unix \n as per the XML Spec, * and a newline will only appear as the last character in the passed @@ -757,13 +733,13 @@ public abstract class XML * beginning of this character segment, which can be processed in a * line-by-line fashion due to the above newline restriction.

*/ - public abstract void characters(char[] ch, int start, int length) throws SchemaException; + public abstract void characters(char[] ch, int start, int length) throws Exn, IOException; - /** Represents a line of ignorable whitespace. */ - public abstract void whitespace(char[] ch, int start, int length) throws SchemaException; + /** Represents up to a line of ignorable whitespace. */ + public abstract void whitespace(char[] ch, int start, int length) throws Exn, IOException; /** Represents the end of an Element. */ - public abstract void endElement(Element e) throws SchemaException; + public abstract void endElement(Element e) throws Exn, IOException; ///////////////////////////////////////////////////////////////////////////////////////////// @@ -771,138 +747,165 @@ public abstract class XML ///////////////////////////////////////////////////////////////////////////////////////////// /** - * Used as a struct for holding information about a current element, - * and acts as a linked list entry. - * - *

Each element stores a hashtable of namespace definitions against - * their respective prefix, and a variable holding their default - * uri. If they did not specify a default uri, their - * parent's uri is copied in to keep up the sembelence of speedy - * parsing.

- * - *

SLOWEST PART OF THE XML PARSER

- *

To implement the Namespace Specification exactly, we have to - * store prefix mappings for elements away from its parents and - * siblings. This means if a child of a child of-a child uses - * a prefix defined in the root, we have to search each Hashtable - * in each Element until we get to the root.

+ * Represents an element in an XML document. Stores a reference to its + * parent, forming a one-way linked list. * - *

Unfortunetally, every other solution I can think of requires - * more work than this one, shifted to different parts of the - * parser.

+ * Element objects are reused, so client code making use of them must + * drop their references after the specific element process function + * has returned. */ - public static final class Element - { - public Element next, prev; + public static final class Element { - /** A hashtable of all namespace prefixes that are defined by this element. */ - public Hash urimap; + private static final int DEFAULT_ATTR_SIZE = 10; - /** An array of attribute names. */ - public String[] keys; - - /** An array of attribute values. */ - public String[] vals; + protected Element parent = null; - /** An array of attribute uris. */ - public String[] uris; + protected String uri = null; + protected String localName = null; + protected String qName = null; + protected String prefix = null; - /** An array of non-fatal errors related to this element. */ - public XMLException[] errors; + protected Hash urimap = new Hash(3,3); - /** Current number of attributes in the keys and vals arrays. */ - public int len; + protected String[] keys = new String[DEFAULT_ATTR_SIZE]; + protected String[] vals = new String[DEFAULT_ATTR_SIZE]; + protected String[] uris = new String[DEFAULT_ATTR_SIZE]; + protected int len = 0; - /** Default URI for this element and its children with no prefix. */ - public String defaultUri; + protected Exn[] errors = new Exn[] {}; - /** URI of current tag. XML Namespace Spec 14-Jan-1999 section 1 */ - public String uri; - /** LocalPart of current element. XML Namespace Spec 14-Jan-1999 [8] */ - public String localName; + /** Parent of current element. */ + public Element getParent() { return parent; } /** Qualified Name of current element. XML Namespace Spec 14-Jan-1999 [6] */ - public String qName; + public String getQName() { return qName; } + + /** LocalPart of current element. XML Namespace Spec 14-Jan-1999 [8] */ + public String getLocalName() { return localName; } /** Prefix of current element. Substring of qName. XML Namespace Spec 14-Jan-1999 [7] */ - public String prefix; - - public Element() { - defaultUri = uri = prefix = localName = qName = null; - urimap = new Hash(3,3); - keys = new String[10]; - vals = new String[10]; - uris = new String[10]; - errors = new XMLException[] {}; - len = 0; + public String getPrefix() { return prefix; } + + /** URI of current tag. XML Namespace Spec 14-Jan-1999 section 1 */ + public String getUri() { return getUri(prefix); } + + /** URI of a given prefix. Never returns null, instead gives "". */ + public String getUri(String p) { + String ret = null; + for (Element e = this; e != null && ret == null; e = e.getParent()) { + ret = (String)e.urimap.get(p == null ? "" : p); + } + return ret == null ? "" : ret; } - /** increase the size of the attributes arrays */ - void morekeys() { - String[] newkeys = new String[keys.length+5]; - String[] newvals = new String[vals.length+5]; - String[] newuris = new String[uris.length+5]; - System.arraycopy(keys, 0, newkeys, 0, keys.length); - System.arraycopy(vals, 0, newvals, 0, vals.length); - System.arraycopy(uris, 0, newuris, 0, uris.length); - keys = newkeys; vals = newvals; uris = newuris; + /** An array of attribute names. */ + public String getAttrKey(int pos) { return len > pos ? keys[pos] : null; } + + /** An array of attribute values. */ + public String getAttrVal(int pos) { return len > pos ? vals[pos] : null; } + + /** An array of attribute uris. */ + public String getAttrUri(int pos) { return len > pos ? uris[pos] : null; } + + /** Current number of attributes in the element. */ + public int getAttrLen() { return len; } + + /** An array of non-fatal errors related to this element. */ + public Exn[] getErrors() { return errors; } + + + protected Element() { } + + /** Add (replace if exists in current element) a Namespace prefix/uri map. */ + protected void addUri(String name, String value) { + urimap.put(name, value); } - /** empty out the arrays */ - void clear() { - for (int i=0; len > i; i++) { keys[i] = null; vals[i] = null; uris[i] = null; }; len = 0; - errors = new XMLException[] {}; + /** Add an attribute. */ + protected void addAttr(String key, String val, String uri) { + if (len == keys.length) { + // increase the size of the attributes arrays + String[] newkeys = new String[keys.length*2]; + String[] newvals = new String[vals.length*2]; + String[] newuris = new String[uris.length*2]; + System.arraycopy(keys, 0, newkeys, 0, keys.length); + System.arraycopy(vals, 0, newvals, 0, vals.length); + System.arraycopy(uris, 0, newuris, 0, uris.length); + keys = newkeys; vals = newvals; uris = newuris; + } + + keys[len] = key; + vals[len] = val; + uris[len] = uri; + len++; } - /** add an error to the errors array */ - void addError(XMLException e) { + /** Add an error. */ + protected void addError(Exn e) { // it doesn't really matter about continually expanding the array, as this case is quite rare - XMLException[] newe = new XMLException[errors.length+1]; + Exn[] newe = new Exn[errors.length+1]; System.arraycopy(errors, 0, newe, 0, errors.length); newe[errors.length] = e; errors = newe; } + + /** Empty out all the data from the Element. */ + protected void clear() { + parent = null; + uri = localName = qName = prefix = null; + urimap.clear(); + + if (keys.length != vals.length || vals.length != uris.length) { + keys = new String[DEFAULT_ATTR_SIZE]; + vals = new String[DEFAULT_ATTR_SIZE]; + uris = new String[DEFAULT_ATTR_SIZE]; + } else { + for (int i=0; keys.length > i; i++) { keys[i] = null; vals[i] = null; uris[i] = null; }; + } + len = 0; + + errors = new Exn[] {}; + } } /** Parse or Structural Error */ - public static class XMLException extends Exception - { - private int line; - private int col; - private String error; + public static class Exn extends Exception { + /** Violation of Markup restrictions in XML Specification - Fatal Error */ + public static final int MARKUP = 1; - public XMLException(String e) { this(e, -1, -1); } + /** Well-Formedness Constraint Violation - Fatal Error */ + public static final int WFC = 2; - public XMLException(String e, int l, int c) { - this.error = e; - this.line = l; - this.col = c; - } + /** Namespace Constraint Violation - Recoverable Error */ + public static final int NC = 3; - public int getLine() { return this.line; } - public int getCol() { return this.col; } - public String getMessage() { return this.error; } - } + /** Schema Violation - Fatal Error */ + public static final int SCHEMA = 4; - /** Violation of Markup restrictions in XML Specification - Fatal Error */ - public static class MarkupException extends XMLException { public MarkupException(String e, int l, int c) { super(e,l,c); } } + private String error; + private int type; + private int line; + private int col; - /** Well-Formedness Constraint Violation - Fatal Error */ - public static final class WFCException extends MarkupException { public WFCException(String e, int l, int c) { super(e,l,c); } } + public Exn(String e) { this(e, MARKUP, -1, -1); } - /** Namespace Constraint Violation - Recoverable Error */ - public static final class NCException extends XMLException { public NCException(String e, int l, int c) { super(e,l,c); } } + public Exn(String e, int type, int line, int col) { + this.error = e; + this.type = type; + this.line = line; + this.col = col; + } - /** Schema Violation - Fatal Error */ - public static class SchemaException extends XMLException { - public SchemaException(String e) { this(e, -1, -1); } - public SchemaException(String e, int l, int c) { super(e,l,c); } + public int getType() { return this.type; } + public int getLine() { return this.line; } + public int getCol() { return this.col; } + public String getMessage() { return this.error + (line >= 0 && col >= 0 ? " at " + line + ":" + col: ""); } } ///////////////////////////////////////////////////////////////////////////////////////////// - // Static Support JSFunctions for the XML Specification + // Static Support Functions for the XML Specification ///////////////////////////////////////////////////////////////////////////////////////////// // attempt to avoid these functions unless you *expect* the input to fall in the given range.