1 // Copyright 2000-2005 the Contributors, as shown in the revision logs.
2 // Licensed under the Apache Public Source License 2.0 ("the License").
3 // You may not use this file except in compliance with the License.
7 import java.io.EOFException;
8 import java.io.IOException;
9 import java.io.OutputStream;
10 import java.io.Reader;
11 import java.io.Writer;
12 import java.io.Serializable;
15 * An non-validating XML Parser with Namespace support.
17 * <h3>SAX-like usage</h3>
19 * <p>Subclass XML and implement the four abstract functions. Call
20 * <tt>parse()</tt> to begin synchronously processing reader input.
21 * Any number of documents may be <tt>parse()</tt>ed.</p>
23 * <h3>DOM-like usage</h3>
25 * <p>Instansiate <tt>XML.Document</tt> and call <tt>parse()</tt>. The
26 * root of the document tree can be accessed by calling <tt>getRoot()</tt>.
27 * See the public interface <tt>XML.Element</tt> for tree traversal.</p>
29 * <p>Only one document may be <tt>parse()</tt>ed per <tt>XML.Document</tt>
32 * <h3>Implementation Notes</h3>
34 * <p>This parser supports all the unicode ranges required by the XML
35 * Specification. However, it is optimised for well-formed ASCII documents.
36 * Documents containing unicode Names and Attributes will take much longer
37 * to process, and invalid documents (badly formed Names or invalid attributes)
38 * will be run through a test on every single unicode character range before
39 * being declared invalid.</p>
42 * <li>Each time the buffer offset <tt>off</tt> is moved, the length
43 * <tt>len</tt> must be decreased.</li>
44 * <li>Each time the buffer length is decreased, it must be checked to make
45 * sure it is >0.</li>
46 * <li><i>error</i> is defined as a Validity Constraint Violation and
48 * <li><i>fatal error</i> is defined as a Well-formedness Constraint
49 * Violation and is not recoverable</li>
52 * @author crawshaw@ibex.org
53 * @see <a href="http://w3.org/TR/REC-xml">XML Specification</a>
54 * @see <a href="http://w3.org/TR/REC-xml-names">XML Namespaces</a>
56 public abstract class XML
58 // XML Parser /////////////////////////////////////////////////////////////
60 /** Default initial buffer size. */
61 public static final int BUFFER_SIZE = 256;
63 private static final int DEFAULT_ATTR_COUNT = 2;
64 private static final int DEFAULT_PFX_COUNT = 2;
66 /** static pool of XML.Elem instances shared by all XML Parsers. */
67 private static final Basket.List elements = new Basket.Array();
69 private static final char[] single_amp = new char[] { '&' };
70 private static final char[] single_apos = new char[] { '\'' };
71 private static final char[] single_gt = new char[] { '>' };
72 private static final char[] single_lt = new char[] { '<' };
73 private static final char[] single_quot = new char[] { '"' };
75 private final boolean poolElements;
83 private int base; // base+off == distance into the stream
88 // used in readEntity() to process a single character without creating a new array
89 private char[] singlechar = new char[1];
92 /** Creates a new XML parser with that has a default initial
93 * buffer size and reuses its signal objects. */
94 protected XML() { this(BUFFER_SIZE, true); }
96 /** Creates a new XML parser.
97 * @param bSize initial buffer size.
98 * @param poolElements if true the objects passed to the signal functions are reused.
100 protected XML(int bSize, boolean poolElements) {
101 buf = new char[bSize];
102 this.poolElements = poolElements;
107 /** Returns the line number at the beginning of the last process call. */
108 public int getLine() { return line; }
110 /** Returns the column number at the beginning of the last process call. */
111 public int getCol() { return col; }
113 /** Returns the global file offset at the beginning of the last process call. */
114 public int getGlobalOffset() { return base + off; }
116 /** Set the reader used as a data source by the XML parser. */
117 public void setReader(Reader reader) { in = reader; }
119 /** Parse given input and call the abstract event functions.
120 * Equivalent to calling <tt>setReader(reader); parse();</tt>. */
121 public final void parse(Reader reader) throws IOException, Exn {
122 setReader(reader); parse();
125 /** Parse given input and call the abstract event functions.
127 * <p>This function is synchronous with event functions, meaning it
128 * will only return <i>after</i> it has finished calling all signal
131 public final void parse() throws IOException, Exn {
137 // process the stream
140 if (current == null) break;
141 throw new Exn("reached eof without closing <"+current.qName+"> element",
142 Exn.WFC, getLine(), getCol());
145 if (buf[off] == '<') {
146 if (current == null) current = element();
149 readChars(current != null);
151 } finally { clear(); } // clean up elements
154 /** Parses the next tag or block of character data, calling the
155 * abstract event functions to process the data.
157 * @return True if successfully processed a block of data.
159 public boolean parseNext() throws IOException, Exn {
161 if (current == null) return false;
162 throw new Exn("reached eof without closing <"+current.qName+"> element",
163 Exn.WFC, getLine(), getCol());
166 // move through meaningless data
167 if (current != null) readChars(false);
169 if (buf[off] == '<') {
170 // proecess and return a tag
171 if (current == null) current = element();
174 // processes a block of character data
181 /** Returns the current Tree.Element, or null if outside the root node. */
182 public Tree.Element current() { return current; }
184 /** Empty the linked list. */
185 private void clear() {
186 while (current != null) {
188 current = (Elem)current.parent;
193 /** Provides a fresh element. */
194 private Elem element() {
196 if (poolElements) synchronized (elements) {
197 if (elements.size() > 0) e = (Elem)elements.remove(elements.size() - 1);
199 if (e == null) e = new Elem();
203 /** Frees a used element. */
204 private void element(Elem e) {
205 if (e == null || !poolElements) return;
206 e.clear(); synchronized (elements) { elements.add(e); }
210 /** Reads in a tag. Expects <tt>buf[off] == '<'</tt>. */
211 private void readTag() throws IOException, Exn {
212 // Start Tag '<' Name (S Attribute)* S? '>'
213 boolean starttag = true;
215 // End Tag '</' Name S? '>'
216 boolean endtag = false;
218 // if (starttag & endtag) then: EmptyElemTag '<' Name (S Attribute)* S? '/>'
220 // Position in the name of the ':' namespace prefix
226 if (!buffer(1)) throw new EOFException("Unexpected EOF processing element tag");
228 // work out what we can from the beginning of the tag
231 // definitions here don't necessarily conform to xml spec (as DTDs not yet implemented)
233 if (!buffer(4)) throw new EOFException("Unexpected EOF processing <! element");
238 if (buf[off+1] != '-') { bad = true; break; }
239 col += 2; off += 2; len -= 2;
241 // Comment '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
242 readChars(false, "-->", false);
243 col += 3; off += 3; len -= 3;
246 // we don't care about the following definitions
250 || buf[off+1] != 'T' || buf[off+2] != 'T' || buf[off+3] != 'L'
251 || buf[off+4] != 'I' || buf[off+5] != 'S' || buf[off+6] != 'T') {
254 col += 7; off += 7; len -= 7;
256 // ATTLIST '<!ATTLIST' (Char* - '>') '>'
257 readChars(false, ">", true);
262 || buf[off+1] != 'O' || buf[off+2] != 'C' || buf[off+3] != 'T'
263 || buf[off+4] != 'Y' || buf[off+5] != 'P' || buf[off+6] != 'E') {
266 col += 7; off += 7; len -= 7;
268 // DTD '<!DOCTYPE' (Char* - '>') '>'
269 readChars(false, ">", true);
275 } else if (buf[off+1] == 'L' && buf[off+2] == 'E' && buf[off+3] == 'M'
276 && buf[off+4] == 'E' && buf[off+5] == 'N' && buf[off+6] == 'T') {
277 // ELEMENT '<!ELEMENT' (Char* - '>') '>'
278 readChars(false, ">", true);
281 } else if (buf[off+1] == 'N' && buf[off+2] == 'T' && buf[off+3] == 'I'
282 && buf[off+4] == 'T' && buf[off+5] == 'Y') {
283 // ENTITY '<!ENTITY' (Char* - '>') '>'
284 readChars(false, ">", true);
294 || buf[off+1] != 'O' || buf[off+2] != 'T' || buf[off+3] != 'A' || buf[off+4] != 'T'
295 || buf[off+5] != 'I' || buf[off+6] != 'O' || buf[off+7] != 'N') {
298 col += 8; off += 8; len -= 8;
299 // NOTATION '<!NOTATION' (Char* - '>') '>'
300 readChars(false, ">", true);
307 if (bad) throw new Exn("element tag start character is invalid", Exn.MARKUP, getLine(), getCol());
309 } else if (s == '?') {
310 // PI (Ignored) '<?' (Char* - (Char* '?>' Char*)) '?>'
312 readChars(false, "?>", true);
313 if (!buffer(2)) throw new EOFException("Unexpected EOF at end of Processing Instruction");
314 col += 2; off += 2; len -= 2;
316 } else if (s == '[') {
318 || buf[off+1] != 'C' || buf[off+2] != 'D' || buf[off+3] != 'A'
319 || buf[off+4] != 'T' || buf[off+5] != 'A' || buf[off+6] != '[') {
321 // Conditional '<![' (Char* - (Char* ']]>' Char*)) ']]>'
322 readChars(false, "]]>", false);
324 col += 7; off += 7; len -=7;
325 // CDATA '<![CDATA[' (Char* - (Char* ']]>' Char*)) ']]>'
326 readChars(true, "]]>", false);
328 col += 3; off += 3; len -= 3;
331 // End Tag '</' Name S? '>'
336 if (!buffer(1)) throw new EOFException("Unexpected EOF processing end tag");
340 if (!Name(s)) throw new Exn("invalid starting character in element name", Exn.MARKUP, getLine(), getCol());
342 // find the element name (defined in XML Spec: section 2.3)
343 for (namelen = 0; ; namelen++) {
344 if (!buffer(namelen+1)) throw new EOFException("Unexpected EOF in element tag name");
346 s = buf[off+namelen];
348 if (S(s) || s == '>') {
350 } else if (s == '/') {
353 } else if (s == ':' && namelen > 0 && prefix < 1) {
354 // we have a definition of the prefix range available
356 } else if (!NameChar(s)) {
357 throw new Exn("element name contains invalid character", Exn.MARKUP, getLine(), getCol());
361 // process name (based on calculated region)
362 if (namelen < 1) throw new Exn("element name is null", Exn.MARKUP, getLine(), getCol());
364 // we have marked out the name region, so turn it into a string and move on
365 String qName = new String(buf, off, namelen);
367 col += namelen; off += namelen; len -= namelen;
370 // create the in-memory element representation of this beast
371 // if current.qName == null then this is the root element we're dealing with
372 if (current.qName != null) {
373 Elem next = element();
374 next.parent = current;
378 current.qName = qName;
381 current.prefix = current.qName.substring(0, prefix);
382 current.localName = current.qName.substring(prefix+1);
384 current.prefix = null;
385 current.localName = current.qName;
388 // process attributes
390 if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 1");
391 while (buf[off] != '/' && buf[off] != '>') {
393 if (!buffer(1)) throw new EOFException("Unexpected EOF - processing attributes part 2");
397 // work out the uri of this element
398 String p = current.getPrefix();
399 String uri = current.uri(p);
400 if (uri == null && p != null && !p.equals("")) error(new Exn("undefined prefix '"+current.getPrefix()+"'", Exn.NC, getLine(), getCol()));
401 else current.uri = uri;
404 // this is an end-of-element tag
405 if (!qName.equals(current.getQName())) throw new Exn(
406 "end tag </"+qName+"> does not line up with start tag <"+current.getQName()+">", Exn.WFC, getLine(), getCol()
410 // deal with whitespace
414 if (!buffer(1)) throw new EOFException("Unexpected EOF before end of tag");
415 if (buf[off] == '/') {
419 if (!buffer(1)) throw new EOFException("Unexpected EOF before end of endtag");
420 if (buf[off] == '>') {
423 throw new Exn("missing '>' character from element '"+qName+"'", Exn.MARKUP, getLine(), getCol());
426 // send element signals
427 if (starttag) startElement(current);
432 current = (Elem)current.parent;
438 /** reads in an attribute of an element. expects Name(buf[off]) */
439 private final void readAttribute() throws IOException, Exn {
442 String n, v, p, u; // attribute name, value, prefix and uri respectively
443 n = v = p = u = null;
446 // find the element name (defined in XML Spec: section 2.3)
447 for (ref= 0; ; ref++) {
448 if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in read attribute loop part 1");
452 if (s == '=' || S(s)) {
454 } else if (s == ':' && ref > 0 && prefix < 1) {
455 // we have a definition of the prefix range available
457 } else if (!NameChar(s)) {
458 throw new Exn("attribute name contains invalid characters", Exn.MARKUP, getLine(), getCol());
462 // determine prefix and key name
464 p = new String(buf, off, prefix-1);
465 col += prefix; off += prefix; len -= prefix; ref -= prefix;
467 n = new String(buf, off, ref);
468 col += ref; off += ref; len -= ref;
470 // find name/value divider ('=')
472 if (!buffer(1)) throw new EOFException("Unexpected EOF before attribute '=' divider");
473 if (buf[off] != '=') throw new Exn("attribute name not followed by '=' sign", Exn.MARKUP, getLine(), getCol());
478 if (!buffer(1)) throw new EOFException("Unexpected EOF after attribute '=' divider");
481 if (buf[off] == '\'' || buf[off] == '"') {
484 throw new Exn("attribute '"+n+"' must have attribute wrapped in ' or \"", Exn.MARKUP, getLine(), getCol());
488 // find the attribute value
489 attval: for (ref = 0; ; ref++) {
490 if (!buffer(ref+1)) throw new EOFException("Unexpected EOF in attribute value");
492 if (buf[off+ref] == wrap) {
494 } else if (buf[off+ref] == '<') {
495 throw new Exn("attribute value for '"+n+"' must not contain '<'", Exn.WFC, getLine(), getCol());
499 v = new String(buf, off, ref);
500 col += ref; off += ref; len -= ref;
502 // remove end wrapper character
506 if (p != null && p.equals("xmlns")) {
507 current.addPrefix(n, v);
508 } else if (n.equals("xmlns")) {
509 if (current.getUri() == null || current.getUri().equals("")) {
510 current.addPrefix("", v);
512 error(new Exn("default namespace definition repeated", Exn.NC, getLine(), getCol()));
515 // find attribute uri
517 if (u == null && p != null) error(new Exn("undefined attribute prefix '"+p+"'", Exn.NC, getLine(), getCol()));
519 // check to see if attribute is a repeat
520 for (int i=0; current.attrSize() > i; i++) if (n.equals(current.getKey(i)) && u.equals(current.getUri(i))) throw new Exn(
521 "attribute name '"+n+"' may not appear more than once in the same element tag", Exn.WFC, getLine(), getCol()
524 current.addAttr(n, v, u, p);
528 /** reads an entity and processes out its value. expects buf[off] == '&' */
529 private final void readEntity() throws IOException, Exn {
531 if (!buffer(2)) throw new EOFException("Unexpected EOF reading entity");
533 boolean unknown = false;
539 if (buf[off] == 'x') { off++; len--; radix = 16; } else { radix = 10; }
542 // read in each char, then shift total value to the left and add the extra
543 // style of loop is slightly different from all the others, as this should run a limited number of times
544 findchar: while (true) {
545 if (!buffer(1)) throw new EOFException("Unexpected EOF reading entity");
546 int d = Character.digit(buf[off], radix);
548 if (buf[off] != ';') throw new Exn("illegal characters in entity reference", Exn.WFC, getLine(), getCol());
557 singlechar[0] = Character.forDigit(c, radix);
558 characters(singlechar, 0, 1);
562 if (buffer(4) && buf[off+1] == 'm' && buf[off+2] == 'p' && buf[off+3] == ';') {
563 characters(single_amp, 0, 1); // &
564 off += 4; len -= 4; col++;
565 } else if (buffer(5) && buf[off+1] == 'p' && buf[off+2] == 'o' && buf[off+3] == 's' && buf[off+4] == ';') {
566 characters(single_apos, 0, 1); // '
567 off += 5; len -= 5; col++;
574 if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
575 characters(single_gt, 0, 1); // >
576 off += 3; len -= 3; col++;
583 if (buffer(3) && buf[off+1] == 't' && buf[off+2] == ';') {
584 characters(single_lt, 0, 1); // <
585 off += 3; len -= 3; col++;
592 if (buffer(5) && buf[off+1] == 'u' && buf[off+2] == 'o' && buf[off+3] == 't' && buf[off+4] == ';') {
593 characters(single_quot, 0, 1); // "
594 off += 5; len -= 5; col++;
600 // TODO: check a parser-level Hash of defined entities
603 if (unknown) throw new Exn("unknown entity (<!ENTITY> not supported)", Exn.WFC, getLine(), getCol());
606 /** reads until the passed string is encountered. */
607 private final void readChars(boolean p, String match, boolean entities) throws IOException, Exn {
609 char[] end = match.toCharArray();
611 for (boolean more = true; more;) {
612 if (!buffer(1)) return;
614 buf: for (ref = 0; ref < len; ref++) {
615 switch (buf[off+ref]) {
616 case '\r': // windows or macos9 newline
617 // normalise and process
618 buf[off+ref] = '\n'; ref++;
619 if (p) characters(buf, off, ref);
620 off += ref; len -= ref; ref = -1;
623 // windows double-char newline; skip the next char
624 if (!buffer(1)) return;
625 if (buf[off] == '\n') { off++; len--; }
628 case '\n': // unix newline
630 if (p) characters(buf, off, ref);
631 off += ref; len -= ref; ref = -1;
638 if (ref > 0) characters(buf, off, ref);
639 off += ref; len -= ref; ref = -1;
646 if (!buffer(ref+end.length)) continue buf;
647 for (int i=0; end.length > i; i++) if (end[i] != buf[off+ref+i]) continue buf;
653 if (p && ref > 0) characters(buf, off, ref);
654 off += ref; len -= ref; col += ref;
659 * Reads until a <tt><</tt> symbol is encountered.
660 * @param p If true call the characters(char[],int,int) funciton for the processed characters
662 private final void readChars(boolean p) throws IOException, Exn {
663 boolean lastWhite = false;
666 for (boolean more = true; more;) {
667 if (!buffer(1)) return;
668 boolean readWhite = false;
670 buf: for (ref = 0; ref < len; ref++) {
672 switch (buf[off+ref]) {
673 case '\r': // windows or macos9 newline
674 if (lastWhite) { readWhite = true; break buf; }
677 // normalise and process
678 buf[off+ref] = '\n'; ref++;
679 if (p) characters(buf, off, ref);
680 off += ref; len -= ref; ref = -1;
683 // windows double-char newline; skip the next char
684 if (!buffer(1)) return;
685 if (buf[off] == '\n') { off++; len--; }
688 case '\n': // unix newline
689 if (lastWhite) { readWhite = true; break buf; }
693 if (p) characters(buf, off, ref);
694 off += ref; len -= ref; ref = -1;
696 if (buffer(1) && S(buf[off])) {
697 readWhite = true; break buf;
703 if (lastWhite) { readWhite = true; break buf; }
707 case '<': // end of chars section
713 if (ref > 0) characters(buf, off, ref);
714 off += ref; len -= ref; ref = -1;
723 if (p) characters(buf, off, ref);
724 off += ref; len -= ref; col += ref;
729 more = buffer(1) && !(buf[off] == '<');
735 /** reads until a non-whitespace symbol is encountered */
736 private final void readWhitespace() throws IOException, Exn {
739 for (boolean more = true; more;) {
740 if (!buffer(1)) return;
742 buf: for (ref = 0; ref < len; ref++) {
743 switch (buf[off+ref]) {
744 case '\r': // windows or macos9 newline
745 // normalise and process
747 whitespace(buf, off, ++ref);
748 off += ref; len -= ref; ref = -1;
751 // windows double-char newline; skip the next char
752 if (buffer(1) && buf[off] == '\n') { off++; len--; }
755 case '\n': // unix newline
756 whitespace(buf, off, ++ref);
757 off += ref; len -= ref; ref = -1;
765 default: // end of whitespace
772 whitespace(buf, off, ref);
773 off += ref; len -= ref; col += ref;
779 * Attempt to fill the buffer.
781 * @param min Minimum number of characters to read (even if we have to block to do it).
782 * @return return false if min can't be reached.
784 private final boolean buffer(int min) throws IOException {
785 if (len > min) return true;
787 if (buf.length - (off+len) >= min) {
788 // plenty of space left on the end of the buffer
789 } else if (off >= min) {
790 // moving offset data to start will leave enough free space on the end
791 System.arraycopy(buf, off, buf, 0, len);
795 // buffer size will have to be increased
796 char[] newbuf = new char[buf.length * 2];
797 System.arraycopy(buf, off, newbuf, 0, len);
804 int newlen = in.read(buf, off+len, buf.length-(off+len));
805 if (newlen < 0) return false;
813 // SAX-like Interface /////////////////////////////////////////////////////
815 /** Called when the start of an element is processed.
817 * <p>If poolElements == true (default), <b>DO NOT</b> store a
818 * reference to the Element object, as they are reused by
821 public abstract void startElement(Tree.Element e) throws Exn;
823 /** Called when up to a line of character data is processed.
825 * <p>Newlines are all normalised to the Unix \n as per the XML Spec,
826 * and a newline will only appear as the last character in the passed
829 * <p>XML.getLine() and XML.getCol() report the position at the
830 * beginning of this character segment, which can be processed in a
831 * line-by-line fashion due to the above newline restriction.</p>
833 public abstract void characters(char[] ch, int start, int length) throws Exn, IOException;
835 /** Called when the end of an Tree.Element is processed. */
836 public abstract void endElement(Tree.Element e) throws Exn, IOException;
838 /** Optional callback; called when when up to a line of ignorable whitespace is processed. */
839 public void whitespace(char[] ch, int start, int length) throws Exn, IOException {}
841 /** Optonal callback; called when a recoverable parsing error has been encountered. */
842 public void error(Exn e) throws Exn, IOException {}
844 // DOM-like Interface /////////////////////////////////////////////////////
846 /** A Document Object Model extension to the XML Parser.
848 * <p>To use, instaniate XML.Document and call parse(Reader).The
849 * full Block tree can then be accessed starting from the root
850 * element by calling getRoot().</p>
852 public static class Document {
853 private final DXML xml;
854 private Tree.Element root = null;
856 /** Creates a new XML.Document. Default initial buffer size is used. */
857 public Document() { this(BUFFER_SIZE); }
859 /** Creates a new XML.Document with a sepcified initial buffer size. */
860 public Document(int bSize) { xml = new DXML(bSize, false); }
862 /** Returns the root Tree.Element of the parsed xml document. */
863 public Tree.Element getRoot() { return root; }
865 /** Sets the root element of this document. */
866 public void setRoot(Tree.Element e) { root = e; }
868 /** Parse given input create the document model. */
869 public void parse(Reader r) throws IOException, Exn { xml.parse(r); }
871 /** Returns a character representation of this document. */
872 /*public String toXML() throws IOException { FIXME
873 StringWriter w = new StringWriter(); toXML(w); return w.toString();
876 /** Writes the character representation of this document to
877 * the given writer. Calls <tt>root.toXML(Writer)</tt>.*/
878 //public void toXML(Writer w) throws IOException { if (root == null) return; root.toXML(w); }
880 /** Used to hide implementation from public interface. */
881 private final class DXML extends XML {
882 private StringBuffer chars = null;
884 private DXML(int b, boolean r) { super(b, r); }
886 public void startElement(Tree.Element e) {
887 if (root == null) root = e;
889 if (chars != null) addText((Tree.Element)e.getParent());
890 e.getParent().getChildren().add(e);
893 public void characters(char[] ch, int s, int l) {
894 if (chars == null) chars = new StringBuffer();
895 chars.append(ch, s, l);
897 public void endElement(Tree.Element e) { if (chars != null) addText(e); }
899 private void addText(Tree.Element e) {
900 e.getChildren().add(new Text(e, chars.toString())); chars = null;
905 // Pull Interface /////////////////////////////////////////////////////////
907 public static class Stream implements Tree.Stream {
908 private final SXML xml;
909 private int depth = -1;
911 /** Creates a new XML.Stream. Default initial buffer size is used. */
912 public Stream() { this(null, BUFFER_SIZE); }
914 public Stream(Reader in) { this(in, BUFFER_SIZE); }
916 /** Creates a new XML.Stram with a sepcified character source and
917 * initial buffer size. */
918 public Stream(Reader in, int bSize) {
919 xml = new SXML(bSize); setReader(in);
922 public int available() { return 0; }
923 public int getDepth() { return depth; }
925 /** Set the current character source. */
926 public void setReader(Reader in) { xml.setReader(in); }
928 public Tree.Leaf next() throws IOException { return xml.next(); }
930 public void skip() throws IOException {
932 do { next(); } while (d != depth);
935 /** Used to hide implementation from public interface. */
936 private final class SXML extends XML {
937 private StringBuffer chars = new StringBuffer();
938 private Tree.Element element = null;
940 private SXML(int b) { super(b, false); }
941 public void startElement(Tree.Element e) { depth++; element = e; }
942 public void endElement(Tree.Element e) { depth--; element = e; }
943 public void characters(char[] ch, int s, int l) { chars.append(ch, s, l); }
944 private Tree.Leaf next() throws IOException, Exn {
945 if (!parseNext()) return null;
946 if (element != null) {
947 Tree.Element e = element; element = null; return e;
948 } else if (chars.length() > 0) {
949 Tree.Leaf l = new Text(current(), chars.toString());
950 chars = new StringBuffer(); return l;
957 // Public Interfaces //////////////////////////////////////////////////////
960 // Support Classes ////////////////////////////////////////////////////////
962 /** Represents a block of text in an XML.Document model. */
963 private static final class Text implements Tree.Leaf, Serializable {
968 Text(Tree.Node p, String t) { this.t = t; this.p = p; }
970 public Tree.Node getParent() { return p; }
971 public void setParent(Tree.Node parent) { p = parent; }
972 public void out(Writer out) throws IOException { out.write(t); }
973 public void out(OutputStream out) { throw new UnsupportedOperationException(); }
976 public static final class Elem
977 implements Tree.Element, Tree.Attributes, Tree.Prefixes, Serializable {
978 private String uri = null;
979 private String localName = null;
980 private String qName = null;
981 private String prefix = null;
983 private Tree.Node parent = null;
984 private Basket.List children = null;
986 private Tree.Attributes a = this;
987 private Tree.Prefixes p = this;
991 public Tree.Node getParent() { return parent; }
992 public void setParent(Tree.Node p) { parent = p; }
993 public Basket.List getChildren() { return children == null ? children = new Basket.Array() : children; }
995 public Tree.Attributes getAttributes() { return a; }
996 public Tree.Prefixes getPrefixes() { return p; }
998 public void setAttributes(Tree.Attributes a) { this.a = a; }
999 public void setPrefixes(Tree.Prefixes p) { this.p = p; }
1001 public String getQName() { return qName; }
1002 public String getLocalName() { return localName; }
1003 public String getPrefix() { return prefix; }
1004 public String getUri() { return uri; }
1008 /** attr[index + 0] // localName
1009 * attr[index + 1] // value
1010 * attr[index + 2] // uri
1011 * attr[index + 3] // prefix */
1012 private String[] attr = null;
1013 private int attrSize = 0;
1015 void addAttr(String key, String val, String uri, String pfx) {
1017 attr = new String[DEFAULT_ATTR_COUNT * 4];
1018 } else if (attrSize == attr.length / 4) {
1019 String[] newattr = new String[attr.length * 2];
1020 System.arraycopy(attr, 0, newattr, 0, attr.length);
1024 attr[attrSize * 4 + 0] = key;
1025 attr[attrSize * 4 + 1] = val;
1026 attr[attrSize * 4 + 2] = uri;
1027 attr[attrSize * 4 + 3] = pfx;
1031 public int getIndex(String qName) {
1032 if (qName == null || qName.length() == 0) return -1;
1033 int pos = qName.indexOf(':');
1034 if (pos < 1) return getIndex(null, qName);
1035 return getIndex(uri(qName.substring(0, pos)), qName.substring(pos+1));
1038 public int getIndex(String uri, String key) {
1039 if (attr != null) for(int i=0; i < attrSize; i++)
1040 if (attr[i*4].equals(key) && attr[i*4+1].equals(uri)) return i;
1044 public String getKey(int pos) { return attr != null && attrSize > pos && pos >= 0 ? attr[pos * 4] : null; }
1045 public String getVal(int pos) { return attr != null && attrSize > pos && pos >= 0 ? attr[pos * 4 + 1] : null; }
1046 public String getUri(int pos) { return attr != null && attrSize > pos && pos >= 0 ? attr[pos * 4 + 2] : null; }
1047 public String getPrefix(int pos) { return attr != null && attrSize > pos && pos >= 0 ? attr[pos * 4 + 3] : null; }
1048 public String getQName(int pos) { String pfx = getPrefix(pos); return (pfx != null ? pfx + ":" : "") + getKey(pos); }
1049 public int attrSize() { return attrSize; }
1053 /** pfx[index + 0] // prefix
1054 * pfx[index + 1] // uri */
1055 private String[] pfx = null;
1056 private int pfxSize = 0;
1058 void addPrefix(String prefix, String uri) {
1060 pfx = new String[DEFAULT_PFX_COUNT * 2];
1061 } else if (pfxSize == pfx.length / 4) {
1062 String[] newpfx = new String[pfx.length * 2];
1063 System.arraycopy(pfx, 0, newpfx, 0, pfx.length);
1067 pfx[pfxSize * 2 + 0] = prefix == null ? "" : prefix;
1068 pfx[pfxSize * 2 + 1] = uri == null ? "" : uri;
1072 public int getPrefixIndexKey(String p) {
1073 if (p == null) p = ""; for (int i=0; i < pfxSize; i++) if (p.equals(pfx[i * 2])) return i;
1076 public int getPrefixIndexVal(String u) {
1077 if (u == null) u = ""; for (int i=0; i < pfxSize; i++) if (u.equals(pfx[i * 2 + 1])) return i;
1080 public String getPrefixKey(int i) {
1081 if (i >= pfxSize) throw new IndexOutOfBoundsException(
1082 "index "+i+" exceeds boundary "+pfxSize);
1085 public String getPrefixVal(int i) {
1086 if (i >= pfxSize) throw new IndexOutOfBoundsException(
1087 "index "+i+" exceeds boundary "+pfxSize);
1088 return pfx[i * 2 + 1];
1090 public int pfxSize() { return pfxSize; }
1094 public void out(OutputStream out) throws IOException { throw new UnsupportedOperationException(); }
1095 public void out(Writer w) throws IOException {
1096 Tree.Attributes a = getAttributes();
1097 Tree.Prefixes p = getPrefixes();
1099 w.write('<'); w.write(getQName());
1101 for (int i=0; i < p.pfxSize(); i++) {
1103 String pfx = p.getPrefixKey(i);
1104 if (pfx != null && pfx.length() > 0) {
1105 w.write(':'); w.write(pfx);
1108 w.write(p.getPrefixVal(i));
1112 for (int i=0; i < a.attrSize(); i++) {
1114 String pfx = a.getPrefix(i);
1115 if (pfx != null && pfx.length() > 0) {
1116 w.write(pfx); w.write(':');
1118 w.write(a.getKey(i));
1120 w.write(a.getVal(i));
1124 Basket.List c = getChildren();
1125 if (c == null || c.size() == 0) {
1130 for (int i=0; i < c.size(); i++) ((Tree.Leaf)c.get(i)).out(w);
1133 w.write(getQName());
1141 if (children != null) children.clear();
1142 if (attrSize > 0) { attr = null; attrSize = 0; }
1143 if (pfxSize > 0) { pfx = null; pfxSize = 0; }
1145 uri = localName = qName = prefix = null;
1150 String uri(String prefix) {
1152 for (Tree.Element e = this; e != null; e = (Tree.Element)e.getParent()) {
1153 int u = e.getPrefixes().getPrefixIndexKey(prefix);
1154 if (u >= 0) { uri = e.getPrefixes().getPrefixVal(u); break; }
1160 /** Parse or Structural Error */
1161 public static class Exn extends IOException implements Serializable {
1162 /** Violation of Markup restrictions in XML Specification - Fatal Error */
1163 public static final int MARKUP = 1;
1165 /** Well-Formedness Constraint Violation - Fatal Error */
1166 public static final int WFC = 2;
1168 /** Namespace Constraint Violation - Recoverable Error */
1169 public static final int NC = 3;
1171 /** Schema Violation - Fatal Error */
1172 public static final int SCHEMA = 4;
1174 private String error;
1179 public Exn(String e) { this(e, MARKUP, -1, -1); }
1181 public Exn(String e, int type, int line, int col) {
1188 public String getError() { return error; }
1189 public int getType() { return type; }
1190 public int getLine() { return line; }
1191 public int getCol() { return col; }
1192 public String getMessage() { return error + (line >= 0 && col >= 0 ? " at " + line + ":" + col: ""); }
1193 public String toString() { return "XML.Exn: " + getMessage(); }
1197 // Static Support Functions for the XML Specification /////////////////////
1199 // For effeciency's sake, attempt to avoid these functions unless you
1200 // *expect* the input to fall in the given range. Thanks to unicode,
1201 // even range matching can result in hundreds of comparisons to return
1204 /** First Character of Name - XML Specification 1.0 [5] */
1205 private static final boolean Name(char c) {
1206 return BaseCharAscii(c) || c == '_' || c == ':' || Letter(c);
1209 /** NameChar - XML Specification 1.0 [4] */
1210 private static final boolean NameChar(char c) {
1211 return BaseCharAscii(c) || c == '.' || c == '-' || c == '_' || c == ':'
1212 || Digit(c) || Letter(c) || Extender(c); // TODO: || CombiningChar(c);
1215 /** BaseChar - XMl Specification 1.0 [84] */
1216 private static final boolean Letter(char c) {
1217 return BaseChar(c) || Ideographic(c);
1220 /** Elements of BaseChar that exist in ASCII. */
1221 private static final boolean BaseCharAscii(char c) {
1222 return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A');
1225 /** Char - XML Specification 1.0 [2] */
1226 private static final boolean Char(char c) {
1227 // u000A == r and u000D == n, but the javac compiler can't handle the \ u form
1228 return c == '\u0009' || c == '\r' || c == '\n'
1229 || (c >= '\u0020' && c <= '\uD7FF')
1230 || (c >= '\uE000' && c <= '\uFFFD');
1233 /** BaseChar - XML Specification 1.0 [85] */
1234 private static final boolean BaseChar(char c) {
1235 return BaseCharAscii(c) || (c >= '\u00C0' && c <= '\u00D6')
1236 || (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131')
1237 || (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E')
1238 || (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5')
1239 || (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1')
1240 || (c == '\u0386') || (c >= '\u0388' && c <= '\u038A') || (c == '\u038C')
1241 || (c >= '\u038E' && c <= '\u03A1') || (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6')
1242 || (c == '\u03DA') || (c == '\u03DC') || (c == '\u03DE')
1244 || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F')
1245 || (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4')
1246 || (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB')
1247 || (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556')
1249 || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2')
1250 || (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7')
1251 || (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3')
1253 || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939')
1255 || (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990')
1256 || (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0')
1258 || (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DF' && c <= '\u09E1') || (c >= '\u09F0' && c <= '\u09F1')
1259 || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || (c >= '\u0A13' && c <= '\u0A28')
1260 || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || (c >= '\u0A35' && c <= '\u0A36')
1261 || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C')
1263 || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B')
1265 || (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0')
1266 || (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9')
1269 || (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28')
1270 || (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39')
1272 || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || (c >= '\u0B85' && c <= '\u0B8A')
1273 || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || (c >= '\u0B99' && c <= '\u0B9A')
1275 || (c >= '\u0B9E' && c <= '\u0B9F') || (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA')
1276 || (c >= '\u0BAE' && c <= '\u0BB5') || (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C')
1277 || (c >= '\u0C0E' && c <= '\u0C10') || (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33')
1278 || (c >= '\u0C35' && c <= '\u0C39') || (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C')
1279 || (c >= '\u0C8E' && c <= '\u0C90') || (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3')
1280 || (c >= '\u0CB5' && c <= '\u0CB9')
1282 || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || (c >= '\u0D0E' && c <= '\u0D10')
1283 || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || (c >= '\u0D60' && c <= '\u0D61')
1284 || (c >= '\u0E01' && c <= '\u0E2E')
1286 || (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82')
1288 || (c >= '\u0E87' && c <= '\u0E88')
1291 || (c >= '\u0E94' && c <= '\u0E97') || (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3')
1294 || (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE')
1296 || (c >= '\u0EB2' && c <= '\u0EB3')
1298 || (c >= '\u0EC0' && c <= '\u0EC4') || (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69')
1299 || (c >= '\u10A0' && c <= '\u10C5') || (c >= '\u10D0' && c <= '\u10F6')
1301 || (c >= '\u1102' && c <= '\u1103') || (c >= '\u1105' && c <= '\u1107')
1303 || (c >= '\u110B' && c <= '\u110C') || (c >= '\u110E' && c <= '\u1112')
1310 || (c >= '\u1154' && c <= '\u1155')
1312 || (c >= '\u115F' && c <= '\u1161')
1317 || (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173')
1322 || (c >= '\u11AE' && c <= '\u11AF') || (c >= '\u11B7' && c <= '\u11B8')
1324 || (c >= '\u11BC' && c <= '\u11C2')
1328 || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || (c >= '\u1F00' && c <= '\u1F15')
1329 || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || (c >= '\u1F48' && c <= '\u1F4D')
1330 || (c >= '\u1F50' && c <= '\u1F57')
1334 || (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC')
1336 || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || (c >= '\u1FD0' && c <= '\u1FD3')
1337 || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || (c >= '\u1FF2' && c <= '\u1FF4')
1338 || (c >= '\u1FF6' && c <= '\u1FFC')
1340 || (c >= '\u212A' && c <= '\u212B')
1342 || (c >= '\u2180' && c <= '\u2182') || (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA')
1343 || (c >= '\u3105' && c <= '\u312C') || (c >= '\uAC00' && c <= '\uD7A3');
1346 /** BaseChar - XMl Specification 1.0 [86] */
1347 private static final boolean Ideographic(char c) {
1348 return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029');
1351 /** CombiningChar - XMl Specification 1.0 [87] */
1352 /*private static final boolean CombiningChar(char c) {
1353 return (c >= '\u0300' && c <= '\u0345')
1354 || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || (c >= '\u0591' && c <= '\u05A1')
1355 || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD')
1357 || (c >= '\u05C1' && c <= '\u05C2')
1359 || (c >= '\u064B' && c <= '\u0652')
1361 || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4')
1362 || (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903')
1364 || (c >= '\u093E' && c <= '\u094C')
1366 || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || (c >= '\u0981' && c <= '\u0983')
1370 || (c >= '\u09C0' && c <= '\u09C4') || (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD')
1372 || (c >= '\u09E2' && c <= '\u09E3')
1377 || (c >= '\u0A40' && c <= '\u0A42') || (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D')
1378 || (c >= '\u0A70' && c <= '\u0A71') || (c >= '\u0A81' && c <= '\u0A83')
1380 || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || (c >= '\u0ACB' && c <= '\u0ACD')
1381 || (c >= '\u0B01' && c <= '\u0B03')
1383 || (c >= '\u0B3E' && c <= '\u0B43') || (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D')
1384 || (c >= '\u0B56' && c <= '\u0B57') || (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2')
1385 || (c >= '\u0BC6' && c <= '\u0BC8') || (c >= '\u0BCA' && c <= '\u0BCD')
1387 || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || (c >= '\u0C46' && c <= '\u0C48')
1388 || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || (c >= '\u0C82' && c <= '\u0C83')
1389 || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || (c >= '\u0CCA' && c <= '\u0CCD')
1390 || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || (c >= '\u0D3E' && c <= '\u0D43')
1391 || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D')
1394 || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E')
1396 || (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD')
1397 || (c >= '\u0F18' && c <= '\u0F19')
1403 || (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95')
1405 || (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7')
1407 || (c >= '\u20D0' && c <= '\u20DC')
1409 || (c >= '\u302A' && c <= '\u302F')
1414 /** Digit - XMl Specification 1.0 [88] */
1415 private static final boolean Digit(char c) {
1416 return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9')
1417 || (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F')
1418 || (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF')
1419 || (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F')
1420 || (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29');
1423 /** Extender - XMl Specification 1.0 [89] */
1424 private static final boolean Extender(char c) {
1425 return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387'
1426 || c == '\u0640' || c == '\u0E46' || c == '\u0EC6' || c == '\u3005'
1427 || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || (c >= '\u30FC' && c <= '\u30FE');
1430 /** Whitespace - XML Specification 1.0 [3] */
1431 private static final boolean S(char c) {
1432 return c == '\u0020' || c == '\u0009' || c == '\r' || c == '\n';