_____________________________________________________________________________
Immediately
+ - Check if the only remaining stack is lame
+ - write a testcase for this
+
- circular gramars
s = A
A = A | "b"
______________________________________________________________________________
Later
+ - understand and implement the RNGLR "kernel state" optimization.
+ The _Practical Early Parsing_ paper may help.
+
- Partly-Linear-PATR? (O(n^6) unification grammar)
- Implement a k-token peek buffer (for each state, see if it "dead
void gather(HashSet<Forest<NodeType>> ht) {
touched();
+
+ // FIXME: do something more sensible here
+ if (ht.contains(this)) {
+ System.err.println("WARNING: grammar produced a circular forest\n" + this);
+ //throw new Error("grammar produced a circular forest:\n" + this);
+ return;
+ }
+
ht.add(this);
for(Forest<NodeType> f : hp) f.gather(ht);
}
public static final char left = (char)9998;
public static final char right = (char)9999;
- public static final Atom leftBrace = new CharAtom(left,left) { public String toString() { return "[{]"; } };
- public static final Atom rightBrace = new CharAtom(right,right) { public String toString() { return "[}]"; } };
- public static final Atom braces = new CharAtom(left,right) { public String toString() { return "[{}]"; } };
+ public static final Atom leftBrace = new CharAtom(left,left) { public String toString() { return "\\{"; } };
+ public static final Atom rightBrace = new CharAtom(right,right) { public String toString() { return "\\}"; } };
+ public static final Atom braces = new CharAtom(left,right) { public String toString() { return "[\\{\\}]"; } };
public static Atom set(Range.Set r) { return new CharAtom(new CharTopology(r)); }
public String toString() { return t.toString(); }
import edu.berkeley.sbp.Input.Location;
public class CharInput extends Cartesian.Input<Character> {
- private final Reader r;
+
+ private static class RollbackReader extends Reader {
+ private final Reader r;
+ public RollbackReader(Reader r) { this.r = r; }
+
+ private char[] queue = new char[1024];
+ private int head = 0;
+ private int tail = 0;
+
+ private void unread(char c) {
+ if (tail >= queue.length) {
+ if (tail - head > queue.length/2) {
+ char[] queue2 = new char[queue.length * 2];
+ System.arraycopy(queue, head, queue2, 0, tail-head);
+ } else {
+ System.arraycopy(queue, head, queue, 0, tail-head);
+ }
+ tail = tail-head;
+ head = 0;
+ }
+ queue[tail++] = c;
+ }
+
+ public void close() throws IOException { r.close(); }
+ public int read() throws IOException {
+ if (tail>head)
+ return queue[head++];
+ return r.read();
+ }
+ public int read(char cbuf[]) throws IOException { return read(cbuf, 0, cbuf.length); }
+ public int read(char cbuf[], int off, int len) throws IOException {
+ if (tail>head) {
+ int count = Math.min(len, tail-head);
+ System.arraycopy(queue, head, cbuf, off, count);
+ head += count;
+ return count;
+ }
+ return r.read(cbuf, off, len);
+ }
+ public long skip(long n) throws IOException { return r.skip(n); }
+ public boolean ready() throws IOException { return true; }
+ public boolean markSupported() { return false; }
+ public void mark(int readAheadLimit) throws IOException { throw new IOException("not supported"); }
+ public void reset() throws IOException { throw new IOException("not supported"); }
+ }
+
+ private final RollbackReader r;
public CharInput(String s) { this(new StringReader(s)); }
public CharInput(Reader r) { this(r, null); }
- public CharInput(Reader r, String s) { this.r = new BufferedReader(r); }
+ public CharInput(Reader r, String s) { this.r = new RollbackReader(new BufferedReader(r)); }
public CharInput(InputStream i) { this(i, null); }
public CharInput(InputStream i, String s) { this(new InputStreamReader(i), s); }
-
+
+ public CharInput(InputStream i, String s, boolean indent) {
+ this(new InputStreamReader(i), s);
+ this.indent = indent;
+ }
+
boolean cr = false;
+ boolean indent = false;
private int count = 0;
private StringBuilder cache = new StringBuilder();
else if (cache == null) cache = new StringBuilder();
}
+ int indentation = -1;
+ int lastIndentation = 0;
+ int queuedIndentation = 0;
+ char queuedCharacter = ' ';
+ int delta = 0;
+
public boolean isCR() { return cr; }
public Character _next() throws IOException {
+ Character ret = __next();
+ if (ret==null) return null;
+ char c = ret.charValue();
+ if (indent) {
+ if (ret==CharAtom.left) System.out.print("\033[31m{\033[0m");
+ else if (ret==CharAtom.right) System.out.print("\033[31m}\033[0m");
+ else System.out.print(c+"");
+ }
+ return ret;
+ }
+ public Character __next() throws IOException {
+
cr = false;
+
+ if (queuedIndentation < 0) {
+ queuedIndentation++;
+ return CharAtom.right;
+ }
+ if (queuedSpaces > 0) {
+ queuedSpaces--;
+ return ' ';
+ }
+ if (queuedIndentation > 0) {
+ queuedIndentation--;
+ return CharAtom.left;
+ }
+
+ if (queuedCharacter != ' ') {
+ char ret = queuedCharacter;
+ queuedCharacter = ' ';
+ return ret;
+ }
+
int i = r.read();
- if (i==-1) { /*System.err.print("\r...done \r"); */return null; }
+ if (i==-1) {
+ /*System.err.print("\r...done \r"); */
+ if (indent && indentation >= 0) {
+ redent(indentation - lastIndentation);
+ //System.err.println("\r \rindent: " + (indentation - lastIndentation));
+ lastIndentation = indentation;
+ indentation = -1;
+ return __next();
+ }
+ return null;
+ }
char c = (char)i;
if (cache != null) cache.append(c);
cr = c=='\n';
- /*
- if ((count++) % 100 == 0)
- System.err.print(" " + count + "\r");
- */
+
+ if (indent)
+ if (cr) {
+ indentation = 0;
+ } else if (c==' ' && indentation >= 0) {
+ indentation++;
+ } else if (indentation >= 0) {
+ //System.err.println("\r \rindent: " + (indentation - lastIndentation));
+ redent(indentation - lastIndentation);
+ r.unread(c);
+ lastIndentation = indentation;
+ indentation = -1;
+ return __next();
+ }
+
return c;
}
+ private void redent(int i) {
+ if (i<0) { r.unread(CharAtom.right); redent(i+1); return; }
+ if (i>0) { r.unread(CharAtom.left); redent(i-1); return; }
+ }
+
+ int queuedSpaces = 0;
+
public String showRegion(Region<Character> rc) {
if (cache == null) return null;
Cartesian.Region r = (Cartesian.Region)rc;
+
+
// DO NOT EDIT STUFF BELOW: IT IS AUTOMATICALLY GENERATED
new edu.berkeley.sbp.Tree(null, new BindingFunctor("Grammar", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.GrammarNode.class.getConstructor(new Class[] {java.lang.Object[].class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("NonTerminal", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.NonTerminalNode.class.getConstructor(new Class[] {java.lang.String.class,edu.berkeley.sbp.meta.MetaGrammarBindings.Seq[][].class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Word", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("word", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { })})}),
new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("psx", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("psx", new Class[] {edu.berkeley.sbp.meta.MetaGrammarBindings.Seq.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Elements", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("seq2", new Class[] {edu.berkeley.sbp.meta.MetaGrammarBindings.ElementNode[].class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("!", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("bang", new Class[] {edu.berkeley.sbp.meta.MetaGrammarBindings.ElementNode.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("NonTerminalReference", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.NonTerminalReferenceNode.class.getConstructor(new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Word", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("word", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "w", new edu.berkeley.sbp.Tree[] { }),
new edu.berkeley.sbp.Tree(null, new BindingFunctor("NonTerminalReference", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.NonTerminalReferenceNode.class.getConstructor(new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Word", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("word", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "w", new edu.berkeley.sbp.Tree[] { }),
new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { })})})})})}),
new edu.berkeley.sbp.Tree(null, new BindingFunctor("psx", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("psx", new Class[] {edu.berkeley.sbp.meta.MetaGrammarBindings.Seq.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Elements", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("seq2", new Class[] {edu.berkeley.sbp.meta.MetaGrammarBindings.ElementNode[].class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("^", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("caret", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Quoted", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("quoted", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Quoted", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("quoted", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "~", new edu.berkeley.sbp.Tree[] { })})})})}),
- new edu.berkeley.sbp.Tree(null, new BindingFunctor("NonTerminalReference", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.NonTerminalReferenceNode.class.getConstructor(new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Word", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("word", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "e", new edu.berkeley.sbp.Tree[] { })})})})})})})})})}),
+ new edu.berkeley.sbp.Tree(null, new BindingFunctor("NonTerminalReference", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.NonTerminalReferenceNode.class.getConstructor(new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Word", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("word", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "e", new edu.berkeley.sbp.Tree[] { })})})})})})}),
+ new edu.berkeley.sbp.Tree(null, new BindingFunctor("psx", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("psx", new Class[] {edu.berkeley.sbp.meta.MetaGrammarBindings.Seq.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Elements", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("seq2", new Class[] {edu.berkeley.sbp.meta.MetaGrammarBindings.ElementNode[].class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("^", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("caret", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Quoted", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("quoted", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Quoted", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("quoted", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "{", new edu.berkeley.sbp.Tree[] { })})})})})})})}),
+ new edu.berkeley.sbp.Tree(null, new BindingFunctor("psx", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("psx", new Class[] {edu.berkeley.sbp.meta.MetaGrammarBindings.Seq.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Elements", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("seq2", new Class[] {edu.berkeley.sbp.meta.MetaGrammarBindings.ElementNode[].class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("^", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("caret", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Quoted", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("quoted", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Quoted", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("quoted", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "}", new edu.berkeley.sbp.Tree[] { })})})})})})})})})})}),
new edu.berkeley.sbp.Tree(null, new BindingFunctor("NonTerminal", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.NonTerminalNode.class.getConstructor(new Class[] {java.lang.String.class,edu.berkeley.sbp.meta.MetaGrammarBindings.Seq[][].class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new BindingFunctor("Word", Bindable.create(edu.berkeley.sbp.meta.MetaGrammarBindings.class.getMethod("word", new Class[] {java.lang.String.class})).createBinding()), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, new ArrayBuildingTreeFunctor(), new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "N", new edu.berkeley.sbp.Tree[] { }),
new edu.berkeley.sbp.Tree(null, "o", new edu.berkeley.sbp.Tree[] { }),
new edu.berkeley.sbp.Tree(null, "n", new edu.berkeley.sbp.Tree[] { }),
+
+
public static @bind.as("Elements") Seq seq2(ElementNode[] elements) { return new Seq(elements); }
public static @bind.as Seq psx(Seq s) { return s; }
public static @bind.as(":") ElementNode colon(String s, ElementNode e) { return new Label(s, e); }
+ public static @bind.as("{") ElementNode leftBrace() {
+ return new Drop(new CharClass(new Range[] { new Range(CharAtom.left, CharAtom.left) })); }
+ public static @bind.as("}") ElementNode rightBrace() {
+ return new Drop(new CharClass(new Range[] { new Range(CharAtom.right, CharAtom.right) })); }
public static @bind.as(")") void close(String foo) { throw new Error("not supported"); }
public static @bind.as("()") ElementNode epsilon() { return new Constant(epsilon); }
public static @bind.as("~") ElementNode tilde(final ElementNode e) {
return new ElementNodeWrapper(e) {
public Atom toAtom(Context cx) {
- return infer((Topology<Character>)e.toAtom(cx).complement().minus(CharAtom.braces));
+ return infer((Topology<Character>)e.toAtom(cx).complement()/*.minus(CharAtom.braces)*/);
}
public Element build(Context cx, NonTerminalNode cnt) {
- return infer((Topology<Character>)e.toAtom(cx).complement().minus(CharAtom.braces));
+ return infer((Topology<Character>)e.toAtom(cx).complement()/*.minus(CharAtom.braces)*/);
} }; }
public static @bind.as("Word") String word(String s) { return s; }
public static @bind.as("Quoted") String quoted(String s) { return s; }
- public static @bind.as("escaped") String c(char c) { return c+""; }
+ public static @bind.as("escaped") String c(char c) {
+ if (c=='{') return CharAtom.left+"";
+ if (c=='}') return CharAtom.right+"";
+ return c+"";
+ }
public static @bind.as("EmptyString") String emptystring() { return ""; }
public static @bind.as("\n") String retur() { return "\n"; }
public static @bind.as("\r") String lf() { return "\r"; }
long now = System.currentTimeMillis();
if (now-then > 10) {
then = now;
- System.out.print(s + " \r");
+ //System.out.print(s + " \r");
}
if (isCR()) {
line++;
}
*/
});
- Tree ret = new CharParser(meta).parse(new FileInputStream(targetFile)).expand1();
+ System.out.println();
+ System.out.println();
+ System.out.println();
+ CharInput input = new CharInput(new FileInputStream(targetFile), "", true);
+ Tree ret = new CharParser(meta).parse(input).expand1();
if (ret==null) throw new NullPointerException("CharParser returned null");
return ret;
} catch (Throwable e) {
| ^"^" Quoted
> ^"(" RHS ")" /ws
| ^"~" e
+ | ^"{"
+ | ^"}"
NonTerminalReference = Word
Literal = Quoted
// s = a:"a" b:"b"
//}
+
+testcase {
+ input "a c";
+ s = first:: A WSA B? WSB C
+ A = "a"
+ B = "b"
+ C = "c"
+ WSA = WSA:: " "**
+ WSB = () -> ~" "
+ | WSB:: " "++
+}