// Copyright 2002 Adam Megacz, see the COPYING file for licensing [GPL]
package org.xwt;
-import org.xwt.util.*;
import java.util.*;
import java.net.*;
import java.io.*;
+import org.xwt.js.*;
+import org.xwt.util.*;
/*
- While entities are limited to a subset of Unicode characters ,
- numeric character references can specify any character. Numeric
- character references may be given in decimal or hexadecimal, though
- browser support is stronger for decimal references. Decimal
- references are of the form &#number; while hexadecimal references
- take the case-insensitive form &#xnumber;. Examples of numeric
- character references include © or © for the copyright
- symbol, Α or Α for the Greek capital letter alpha, and
- ا or ا for the Arabic letter ALEF.
-
- http://www.htmlhelp.com/reference/html40/entities/special.html
- http://www.htmlhelp.com/reference/html40/entities/symbols.html
- http://www.htmlhelp.com/reference/html40/entities/latin1.html
-
- FIXME FIXME FIXME: <li> tags close enclosing <li> tags
-*/
+ * While entities are limited to a subset of Unicode characters ,
+ * numeric character references can specify any character. Numeric
+ * character references may be given in decimal or hexadecimal, though
+ * browser support is stronger for decimal references. Decimal
+ * references are of the form &#number; while hexadecimal references
+ * take the case-insensitive form &#xnumber;. Examples of numeric
+ * character references include © or © for the copyright
+ * symbol, Α or Α for the Greek capital letter alpha, and
+ * ا or ا for the Arabic letter ALEF.
+ *
+ * http://www.htmlhelp.com/reference/html40/entities/special.html
+ * http://www.htmlhelp.com/reference/html40/entities/symbols.html
+ * http://www.htmlhelp.com/reference/html40/entities/latin1.html
+ */
/**
* This class parses an InputStream containing HTML and returns it
/** we keep a StringBuffer around for use by removeRedundantWhitespace() */
private static StringBuffer sbuf = null;
- public static synchronized JSObject parseReader(Reader r) throws IOException {
+ /** true iff we have encountered an LI more recently than the last OL/UL */
+ private static boolean withinLI = false;
+
+ public static synchronized JS parseReader(Reader r) throws IOException {
CharStream cs = new CharStream(r);
- JSObject h = new JSObject();
+ JS.Obj h = new JS.Obj();
+ withinLI = false;
h.put("$name", "html");
try {
// continue until we get an EOFException
}
- Object[] ids = h.getIds();
+ Object[] ids = h.keys();
for(int i=0; i<ids.length; i++) {
Object el = h.get((String)ids[i]);
- if (el instanceof JSObject && "html".equals(((JSObject)el).get("$name")))
- return (JSObject)el;
+ if (el instanceof JS && "html".equals(((JS)el).get("$name")))
+ return (JS)el;
}
return h;
* facilitate correcting broken HTML. Otherwise, this returns
* null.
*/
- private static String parseElement(CharStream cs, JSObject h) throws IOException {
+ private static String parseElement(CharStream cs, JS h) throws IOException {
// scan element name
while(Character.isSpace(cs.peek())) cs.get();
String elementName = parseElementName(cs);
+ // FIXME: this might not deal correctly with EOFExceptions
+ boolean saveWithinLI = withinLI;
+ if (elementName.equals("li")) {
+ if (withinLI) {
+ cs.unread(new char[] { '<', 'l', 'i', ' ' });
+ return "li";
+ } else {
+ withinLI = true;
+ }
+ } else if (elementName.equals("ol") || elementName.equals("ul")) {
+ withinLI = false;
+ }
+
h.put("$name", elementName);
if (elementName.equals("!--")) {
h.put("0", parseComment(cs));
return null;
// scan body
- return parseBody(cs, h, elementName);
+ String ret = parseBody(cs, h, elementName);
+ withinLI = saveWithinLI;
+ return ret;
}
/**
* positioned at the character immediately after the right
* bracket closing the start-tag
*/
- private static String parseBody(CharStream cs, JSObject h, String elementName) throws IOException {
+ private static String parseBody(CharStream cs, JS h, String elementName) throws IOException {
String cdata = "";
int length = h.get("$numchildren") == null ? 0 : Integer.parseInt(h.get("$numchildren").toString());
while(true) {
try {
// scan subelement
if (cs.peek() != '/') {
- JSObject kid = new JSObject();
+ JS kid = new JS.Obj();
closetag = parseElement(cs, kid);
h.put(String.valueOf(length), kid);
h.put("$numchildren", new Integer(++length));
// CharStream /////////////////////////////////////////////////////////////////////
private static class CharStream extends PushbackReader {
- public CharStream(Reader r) { super(r); }
+ public CharStream(Reader r) { super(r, 1024); }
public char peek() throws IOException {
char c = get();