From: adam Date: Thu, 5 Jan 2006 07:39:30 +0000 (-0500) Subject: it all works X-Git-Tag: tag_for_25-Mar~432 X-Git-Url: http://git.megacz.com/?p=sbp.git;a=commitdiff_plain;h=f33c05adc5aa3dd324c5352cdbd6f4b55359acad;hp=ce56b1e47ae08b71599fe5794f71034f6f53ec5d;ds=sidebyside it all works darcs-hash:20060105073930-5007d-0aa9bef7e97fbe87265879b06083b3ebca3522de.gz --- diff --git a/TODO b/TODO index 3e67a2e..31d33a0 100644 --- a/TODO +++ b/TODO @@ -3,14 +3,11 @@ Immediately - Performance - - Next target: TopologicalBag (make it wickedfast: preoptimize) - - Forest: keep() and valid() -- can we do this with states rather than subtrees? - hash Long->long: it's all bogus - * huge performance improvement (try for more) * pick back up cleaning up end of Parser.java (Reduction) * some weird edge cases; check last regression test, 'make doc' diff --git a/src/edu/berkeley/sbp/Forest.java b/src/edu/berkeley/sbp/Forest.java index 6437341..5c09170 100644 --- a/src/edu/berkeley/sbp/Forest.java +++ b/src/edu/berkeley/sbp/Forest.java @@ -44,6 +44,7 @@ public abstract class Forest { this.tag = tag; this.tokens = tokens==null ? emptyForestArray : new Forest[tokens.length]; if (tokens != null) System.arraycopy(tokens, 0, this.tokens, 0, tokens.length); + if (tokens != null) for(int i=0; i { return needs <= -1 * creator.needs.size(); } - + private boolean rep = false; public String toString() { - StringBuffer ret = new StringBuffer(); - for(int i=0; i 0) { - ret.append(q); - ret.append(" "); + if (rep) return "***"; + try { + rep = true; + StringBuffer ret = new StringBuffer(); + for(int i=0; i 0) { + ret.append(q); + ret.append(" "); + } } + String tail = ret.toString().trim(); + String head = (tag!=null && !tag.toString().equals("")) ? (tail.length() > 0 ? tag+":" : tag+"") : ""; + if (tail.length() > 0) tail = "{" + tail + "}"; + return head + tail; + } finally { + rep = false; } - String tail = ret.toString().trim(); - String head = (tag!=null && !tag.toString().equals("")) ? (tail.length() > 0 ? tag+":" : tag+"") : ""; - if (tail.length() > 0) tail = "{" + tail + "}"; - return head + tail; } } @@ -138,9 +145,10 @@ public abstract class Forest { public boolean valid = false; public Ref() { } public void merge(Forest p) { + //if (p==null) throw new Error("bad evil bad!"); if (res != null) throw new Error("already resolved!"); if (p==null) throw new Error(); - if (p!=this) hp.add(p); + if (p!=this) hp.add(p, true); } public Iterator> iterator() { return ((IterableForest)resolve()).iterator(); } public HashSet> expand(boolean toss) { return resolve().expand(toss); } @@ -152,8 +160,12 @@ public abstract class Forest { FastSet nh = new FastSet(); for(Forest p : hp) for(Body b : (IterableForest)p) { - if (b.keep() && (b.creator==null || !b.creator.lame)) { valid = true; b.addTo(nh); } - else results = new HashSet(); + if (b.keep() && (b.creator==null || !b.creator.lame)) { + valid = true; + b.addTo(nh); + } else { + results = new HashSet(); + } } if (results != null) { for(Forest p : hp) @@ -168,7 +180,8 @@ public abstract class Forest { } } hp = null; - return res = new MultiForest(nh, valid); + res = new MultiForest(nh, valid); + return res; } } diff --git a/src/edu/berkeley/sbp/GSS.java b/src/edu/berkeley/sbp/GSS.java index 1d401a5..16725f1 100644 --- a/src/edu/berkeley/sbp/GSS.java +++ b/src/edu/berkeley/sbp/GSS.java @@ -73,23 +73,22 @@ class GSS { * @param fromEmptyReduction true iff this node is being created as a result of a reduction of length zero (see GRMLR paper) * @param start the earliest part of the input contributing to this node (used to make merging decisions) */ - public void newNode(Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction, Phase start) { - Node p = hash.get(code(state, start)); - if (p != null) newNode2(p, parent, pending, state, fromEmptyReduction, start); - else newNode3(parent, pending, state, fromEmptyReduction, start); + public void newNode(Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction) { + Node p = hash.get(code(state, parent==null?null:parent.phase())); + if (p != null) newNode2(p, parent, pending, state, fromEmptyReduction); + else newNode3(parent, pending, state, fromEmptyReduction); } - private void newNode2(Node p, Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction, Phase start) { + private void newNode2(Node p, Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction) { p.holder.merge(pending); if (p.parents().contains(parent)) return; - p.addParent(parent, fromEmptyReduction); + p.parents().add(parent); + if (p!=parent && !fromEmptyReduction) p.queueReductions(parent); } - private void newNode3(Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction, Phase start) { + private void newNode3(Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction) { do { if (token != null && state.canShift(token)) break; if (state.isAccepting()) break; if (token==null) break; - int count = 0; - Parser.Table.Reduction r = null; if (!state.canReduce(token)) return; //if (count > 1) break; //if (r.numPop == 0) break; @@ -97,7 +96,7 @@ class GSS { //return; } while(false); - Node n = new Node(parent, pending, state, start); // ALLOC + Node n = new Node(parent, pending, state); // ALLOC n.queueEmptyReductions(); if (!fromEmptyReduction) n.queueReductions(parent); } @@ -124,7 +123,7 @@ class GSS { } public void invoke(Parser.Table.State st, Forest result, Node n) { - next.newNode(n, result, st, true, this); + next.newNode(n, result, st, true); } private Phase next = null; @@ -184,12 +183,6 @@ class GSS { /** a node in the GSS */ public final class Node extends FastSet implements Invokable { - public void addParent(Node parent, boolean fromEmptyReduction) { - if (parents().contains(parent)) return; - parents().add(parent); - if (this!=parent && !fromEmptyReduction) queueReductions(parent); - } - private Forest.Ref holder = null; private boolean allqueued = false; @@ -207,21 +200,11 @@ class GSS { if (allqueued) return; allqueued = true; int where = parents().size(); - /* - for(Parser.Table.Reduction r : state.getReductions(token)) - if (r.numPop > 0) - r.reduce(this); - */ state.invokeReductions(token, this, this, null); } public void queueReductions(Node n2) { if (!allqueued) { queueReductions(); return; } - /* - for(Parser.Table.Reduction r : state.getReductions(token)) - if (r.numPop > 0) - r.reduce(this, n2); - */ state.invokeReductions(token, this, this, n2); } @@ -231,24 +214,17 @@ class GSS { return; } if (r.numPop==0) return; - if (n2==null) { - r.reduce(n); - } else { - r.reduce(n, n2); - } + if (n2==null) r.reduce(n); + else r.reduce(n, n2); } public void queueEmptyReductions() { if (!reducing) return; - /* - for(Parser.Table.Reduction r : state.getReductions(token)) - if (r.numPop==0) - r.reduce(this); - */ state.invokeReductions(token, this, null, null); } - private Node(Node parent, Forest pending, Parser.Table.State state, Phase start) { + private Node(Node parent, Forest pending, Parser.Table.State state) { this.state = state; + Phase start = parent==null ? null : parent.phase(); if (pending != null) this.holder().merge(pending); if (parent != null) parents().add(parent); if (Phase.this.hash.get(code(state, start)) != null) throw new Error("severe problem!"); @@ -269,6 +245,6 @@ class GSS { /** this is something of a hack right now */ private static long code(Parser.Table.State state, Phase start) { - return (((long)state.idx) << 32) | (start==null ? 0 : start.pos); + return (((long)state.idx) << 32) | (start==null ? 0 : (start.pos+1)); } } diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index 81b4597..4244d21 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -37,7 +37,7 @@ public abstract class Parser { GSS gss = new GSS(); Token.Location loc = input.getLocation(); GSS.Phase current = gss.new Phase(null, input.next(), loc); - current.newNode(null, null, pt.start, true, null); + current.newNode(null, null, pt.start, true); for(;;) { loc = input.getLocation(); GSS.Phase next = gss.new Phase(current, input.next(), loc); @@ -142,6 +142,10 @@ public abstract class Parser { if (p.element() != null && p.element() instanceof Atom) state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()))); } + for(State state : all_states.values()) { + state.oreductions = state.reductions.optimize(); + state.oshifts = state.shifts.optimize(); + } } /** a single state in the LR table and the transitions possible from it */ @@ -179,15 +183,15 @@ public abstract class Parser { private TopologicalBag shifts = new TopologicalBag(); private boolean accept = false; - private VisitableMap oshifts = shifts; - //private TopologicalBag reductions2 = reductions; + private VisitableMap oshifts = null; + private VisitableMap oreductions = null; // Interface Methods ////////////////////////////////////////////////////////////////////////////// public boolean isAccepting() { return accept; } - public boolean canShift(Token t) { return shifts.contains(t); } - public boolean canReduce(Token t) { return t==null ? eofReductions.size()>0 : reductions.has(t); } + public boolean canShift(Token t) { return oshifts.contains(t); } + public boolean canReduce(Token t) { return t==null ? eofReductions.size()>0 : oreductions.contains(t); } public Iterator iterator() { return hs.iterator(); } @@ -196,7 +200,7 @@ public abstract class Parser { } public void invokeReductions(Token t, Invokable irbc, B b, C c) { if (t==null) for(Reduction r : eofReductions) irbc.invoke(r, b, c); - else reductions.invoke(t, irbc, b, c); + else oreductions.invoke(t, irbc, b, c); } // Constructor ////////////////////////////////////////////////////////////////////////////// @@ -337,6 +341,7 @@ public abstract class Parser { public void reduce(GSS.Phase.Node parent, GSS.Phase.Node onlychild) { if (numPop<=0) throw new Error("called wrong form of reduce()"); int pos = numPop-1; + Forest old = holder[pos]; holder[pos] = parent.pending(); if (pos==0) { System.arraycopy(holder, 0, position.holder, 0, holder.length); @@ -344,23 +349,27 @@ public abstract class Parser { } else { reduce(onlychild, pos-1, parent.phase()); } + holder[pos] = old; } // FIXME: this could be more elegant and/or cleaner and/or somewhere else private void reduce(GSS.Phase.Node parent, int pos, GSS.Phase target) { + Forest old = holder[pos]; holder[pos] = parent.pending(); if (pos==0) { System.arraycopy(holder, 0, position.holder, 0, holder.length); + for(int i=0; i { final Forest rewrite(Token.Location loc) { if (this==firstp() && eps) return epsilonForm; eps = true; - for(int i=pos; i ret = Sequence.this.postReduce(loc, holder); for(int k=0; k { public Unwrap(Element[] e, HashSet and, HashSet not) { super(e, and, not); this.drops = null; } public Unwrap(Element[] e, boolean[] drops, HashSet and, HashSet not) { super(e, and, not); this.drops = drops; } public Forest postReduce(Token.Location loc, Forest[] args) { + for(int i=0; i implements Iterator, Iterable { } public void add(T t, boolean check) { //if (check) for(Object o : this) if (o.equals(t)) return; - if (check) for(Object o : this) if (o==t) return; + if (check) { + if (only==t) return; + if (array != null) + for(int i=0; i implements MapBag,V>, VisitableMap< return ret; } } + + public VisitableMap optimize() { + ArrayList min_ = new ArrayList(); + ArrayList max_ = new ArrayList(); + ArrayList v_ = new ArrayList(); + for(Topology t : h.keySet()) { + ArrayList al = new ArrayList(); + for(V vv : h.get(t)) al.add(vv); + Object[] vs = new Object[al.size()]; + al.toArray(vs); + IntegerTopology it = (IntegerTopology)t; + for(Range r : it.getRanges()) { + min_.add(r.isMinNegInf() ? Long.MIN_VALUE : r.getMin()); + max_.add(r.isMaxPosInf() ? Long.MAX_VALUE : r.getMax()); + v_.add(vs); + } + } + final int size = v_.size(); + final long[] min = new long[size]; for(int i=0; i() { + public boolean contains(K k) { + IntegerTopology.IntegerMappable im = (IntegerTopology.IntegerMappable)k; + int asint = im.toInt(); + for(int i=0; i= asint && v[i].length > 0) + return true; + return false; + } + public void invoke(K k, Invokable ivbc, B b, C c) { + IntegerTopology.IntegerMappable im = (IntegerTopology.IntegerMappable)k; + int asint = im.toInt(); + for(int i=0; i= asint) { + Object[] arr = v[i]; + for(int j=0; j emptyIterator = new EmptyIterator(); } diff --git a/tests/input.tibdoc b/tests/input.tibdoc index ac0d1ea..7fab26b 100644 --- a/tests/input.tibdoc +++ b/tests/input.tibdoc @@ -4,7 +4,6 @@ header comment = my homepage is at == Introduction == - this is the body adam@megacz.com text + this is the body adam@megacz.com + - the following paragraph demonstrates verbatim stuff, as well as a - footnote ((like this)) because they are coool diff --git a/tests/regression.tc b/tests/regression.tc index a249286..f0815a8 100644 --- a/tests/regression.tc +++ b/tests/regression.tc @@ -272,59 +272,59 @@ testcase { q ::= [a-z]++ => "q" } -//testcase { -// -// input " -// -// -// while x>0 -// while y>0 -// foo() -// bar() -// -// while x>0 -// while y>0 -// foo() -// bar() -// -// -//"; -// output "smt:{while:{>:{{x} {0}} while:{>:{{y} {0}} sbb:{{f o o} {b a r}}}}}"; -// output "smt:{while:{>:{{x} {0}} sbb:{while:{>:{{y} {0}} {f o o}} {b a r}}}}"; -// -//indent !::= ww -//outdent !::= " " outdent " " -// | " " (~[]*) "\n" -// -//any !::= ~[]* -//s ::= any "\n\n" ww statement ww "\n\n" any => smt -//ww !::= sp* -//ws !::= sp** -//sp ::= " " -// -//block ::= "\n" indent blockBody -// &~ "\n" outdent ~[\ ] ~[]* -// -//blockBody ::= statement -// > statement blockBody /ws => "sbb" -// -//statement ::= call -// | ^"while" expr block /ws -// -//expr ::= ident -// | call -// | expr ^">" expr /ws -// | num -// -//call ::= expr "()" /ws -// -//num ::= [0-9]++ -// -//ident ::= [a-z]++ &~ keyword -//keyword ::= "if" | "then" | "else" | "while" -// -//w ::= " " | "\n" | "\r" -//ws ::= w* -// -// -//} +testcase { + + input " + + + while x>0 + while y>0 + foo() + bar() + + while x>0 + while y>0 + foo() + bar() + + +"; + output "smt:{while:{>:{{x} {0}} while:{>:{{y} {0}} sbb:{{f o o} {b a r}}}}}"; + output "smt:{while:{>:{{x} {0}} sbb:{while:{>:{{y} {0}} {f o o}} {b a r}}}}"; + +indent !::= ww +outdent !::= " " outdent " " + | " " (~[]*) "\n" + +any !::= ~[]* +s ::= any "\n\n" ww statement ww "\n\n" any => smt +ww !::= sp* +ws !::= sp** +sp ::= " " + +block ::= "\n" indent blockBody + &~ "\n" outdent ~[\ ] ~[]* + +blockBody ::= statement + > statement blockBody /ws => "sbb" + +statement ::= call + | ^"while" expr block /ws + +expr ::= ident + | call + | expr ^">" expr /ws + | num + +call ::= expr "()" /ws + +num ::= [0-9]++ + +ident ::= [a-z]++ &~ keyword +keyword ::= "if" | "then" | "else" | "while" + +w ::= " " | "\n" | "\r" +ws ::= w* + + +} diff --git a/tests/tibdoc.g b/tests/tibdoc.g index 5926194..99bab83 100644 --- a/tests/tibdoc.g +++ b/tests/tibdoc.g @@ -47,11 +47,10 @@ SectionHeaderBody ::= "=" SectionHeaderBody "=" kv ::= word "=" text /ws => kv1 -num !::= [0-9]++ => "stringify" Paragraph ::= { "\"\"" ws text } => "blockquote" > { "*" " " ws text } => "ul" > { "#" " " ws text } => "ol" - > { num " " ws text } => "ol" + > { num " " ws text => "ol" } > { "---" "-"* } => "hr" > { text } => "p" @@ -100,12 +99,12 @@ method ::= [+\-.a-z0-9]+ port ::= [0-9]+ domain ::= part +/ "." -part ::= [a-zA-Z0-9\-]++ => "stringify" // interesting use of boolean grammars +part ::= [A-Za-z0-9\-]++ => "stringify" // &~ ([\-0-9] ~[]* | ~[]* [\-0-9]) email ::= username "@" host => email -host ::= [0-9]+ "." [0-9]+ "." [0-9]+ "." [0-9]+ => "ip" - | domain +host ::= domain + | [0-9]+ "." [0-9]+ "." [0-9]+ "." [0-9]+ => "ip" @@ -124,7 +123,8 @@ escaped ::= "\\n" => "\n" // Chars /////////////////////////////////////////////////////////////// alpha ::= [a-zA-Z] -num ::= [0-9] +num !::= [0-9]++ => "stringify" +//num ::= [0-9] alphanum ::= [a-zA-Z0-9] sym ::= ~[a-zA-Z0-9\ \r\n]