From 7fbee73b4dd985cb5b217ed297710c00fd9d7004 Mon Sep 17 00:00:00 2001 From: adam Date: Wed, 4 Jan 2006 02:17:05 -0500 Subject: [PATCH] checkpoint darcs-hash:20060104071705-5007d-ec8270ebcc902b2364afb0ef91ebb8bf71ff6253.gz --- src/edu/berkeley/sbp/Forest.java | 5 ++ src/edu/berkeley/sbp/GSS.java | 65 ++++++++++++++---------- src/edu/berkeley/sbp/Parser.java | 40 ++++++++++++++- src/edu/berkeley/sbp/misc/MetaGrammar.java | 17 ++++--- src/edu/berkeley/sbp/tib/TibDoc.java | 14 +++++- src/edu/berkeley/sbp/util/PrintableTree.java | 69 ++++++++++++++++++++++++-- src/edu/berkeley/sbp/util/ToJava.java | 10 ++++ src/edu/berkeley/sbp/util/Topology.java | 1 - tests/meta.g | 2 +- tests/regression.tc | 7 +-- tests/tibdoc.g | 12 ++--- 11 files changed, 192 insertions(+), 50 deletions(-) create mode 100644 src/edu/berkeley/sbp/util/ToJava.java diff --git a/src/edu/berkeley/sbp/Forest.java b/src/edu/berkeley/sbp/Forest.java index 8587dcb..ed1c9ac 100644 --- a/src/edu/berkeley/sbp/Forest.java +++ b/src/edu/berkeley/sbp/Forest.java @@ -197,6 +197,11 @@ public abstract class Forest { public String toString() { if (toString != null) return toString; StringBuffer ret = new StringBuffer(); + if (results.size()==1) { + for(Forest.Body r : results) + ret.append(r); + return toString = ret.toString(); + } ret.append(" r : results) { diff --git a/src/edu/berkeley/sbp/GSS.java b/src/edu/berkeley/sbp/GSS.java index 5ed0988..dcc8f33 100644 --- a/src/edu/berkeley/sbp/GSS.java +++ b/src/edu/berkeley/sbp/GSS.java @@ -45,14 +45,15 @@ class GSS { /** all reductions (pending and completed) */ private HashSet reductions = new HashSet(); /* ALLOC */ - + /** all nodes, keyed by the value returned by code() */ private HashMap hash = new HashMap(); /* ALLOC */ /** the number of pending reductions */ private int pendingReductions = 0; private int totalReductions = 0; - private HashSet pendingReduct = new HashSet(); + //private HashSet pendingReduct = new HashSet(); + private LinkedList pendingReduct = new LinkedList(); /** the number of nodes in this phase */ private int numNodes = 0; @@ -102,8 +103,8 @@ class GSS { int count = 0; Parser.Table.Reduction r = null; for(Parser.Table.Reduction red : token==null ? state.getEofReductions() : state.getReductions(token)) { r = red; count++; } - //if (count==0) return; -- BEWARE! this optimization is suspected to cause really nasty heisenbugs - if (count > 1) break; + //if (count==0) return; // BEWARE! this optimization is suspected to cause really nasty heisenbugs + //if (count > 1) break; //if (r.numPop == 0) break; //r.reduce(pending, parent, null, Phase.this, null); //return; @@ -116,12 +117,15 @@ class GSS { /** perform all reduction operations */ public void reduce() { - for(Phase.Node n : hash.values()) { + HashSet s = new HashSet(); + s.addAll(hash.values()); + for(Phase.Node n : s) { n.queueEmptyReductions(); n.queueReductions(); } while(pendingReduct.size()>0) - pendingReduct.iterator().next().go(); + //pendingReduct.iterator().next().go(); + pendingReduct.removeFirst().go(); } /** perform all shift operations, adding promoted nodes to next */ @@ -173,9 +177,9 @@ class GSS { // GSS Nodes ////////////////////////////////////////////////////////////////////////////// - private HashMap pcache = new HashMap(); + //private HashMap pcache = new HashMap(); /** a node in the GSS */ - public class Node { + public final class Node { private Forest.Ref holder = null; @@ -184,13 +188,13 @@ class GSS { /** the set of nodes to which there is an edge starting at this node */ public final FastSet parents = new FastSet(); /* ALLOC */ - /** what state this node is in */ public final Parser.Table.State state; /** which Phase this Node belongs to (node that Node is also a non-static inner class of Phase) */ public final Phase phase = Phase.this; - public HashMap cache() { return cache==null ? (cache = new HashMap()) : cache; } + public HashMap cache() { + return cache==null ? (cache = new HashMap()) : cache; } public Forest.Ref holder() { return holder==null ? (holder = new Forest.Ref()) : holder; } public Forest pending() { return Phase.this.closed ? holder().resolve() : holder; } public FastSet parents() { return parents; } @@ -203,18 +207,7 @@ class GSS { /** FIXME */ public void queueReductions(Node n2) { - new Reduct(this, n2, null); - for(Parser.Table.Reduction r : token==null ? state.getEofReductions() : state.getReductions(token)) { - - // currently we have this weird problem where we - // have to do an individual reduct for each child - // when the reduction length is one (ie the - // children wind up being children of the newly - // created node rather than part of the popped - // sequence - - if (r.numPop == 1) new Reduct(this, n2, r); - } + newReduct(this, n2, null); } @@ -222,7 +215,7 @@ class GSS { public void queueEmptyReductions() { for(Parser.Table.Reduction r : token==null ? state.getEofReductions() : state.getReductions(token)) { if (r.numPop==0) - new Reduct(this, null, r); /* ALLOC */ + newReduct(this, null, r); /* ALLOC */ } } @@ -237,6 +230,9 @@ class GSS { } } + public void newReduct(Node n, Node n2, Parser.Table.Reduction r) { + new Reduct(n, n2, r)/*.go()*/; + } // Forest / Completed Reductions ////////////////////////////////////////////////////////////////////////////// @@ -265,7 +261,7 @@ class GSS { this.r = r; if (reductions.contains(this)) { done = true; return; } reductions.add(this); - pendingReduct.add(this); + pendingReduct.addFirst(this); pendingReductions++; } @@ -276,13 +272,28 @@ class GSS { pendingReduct.remove(this); pendingReductions--; + if (r==null) + for(Parser.Table.Reduction r : token==null ? n.state.getEofReductions() : n.state.getReductions(token)) { + + // currently we have this weird problem where we + // have to do an individual reduct for each child + // when the reduction length is one (ie the + // children wind up being children of the newly + // created node rather than part of the popped + // sequence + + if (r.numPop == 1) new Reduct(n, n2, r).go(); + } + + // FIXME: explain this if (r==null) { for(Parser.Table.Reduction r : token==null ? n.state.getEofReductions() : n.state.getReductions(token)) { if (r.numPop <= 1) continue; r.reduce(n, n2, Phase.this, null); } - } else if (r.numPop<=1) { + } else if (r.numPop==0) { r.reduce(n, n2, n.phase, r.zero()); + } else if (r.numPop==1) { // UGLY HACK // The problem here is that a "reduction of length 0/1" // performed twice with different values of n2 needs @@ -294,9 +305,9 @@ class GSS { // cache instances here as a way of avoiding // recreating them. - Forest ret = (r.numPop==0 ? pcache : n.cache()).get(r); + Forest ret = n.cache().get(r); if (ret != null) r.reduce(n, n2, n.phase, ret); - else (r.numPop==0 ? pcache : n.cache()).put(r, r.reduce(n, n2, n.phase, null)); + else n.cache().put(r, r.reduce(n, n2, n.phase, null)); } else { r.reduce(n, n2, Phase.this, null); diff --git a/src/edu/berkeley/sbp/Parser.java b/src/edu/berkeley/sbp/Parser.java index 3fad1dd..58d0bc2 100644 --- a/src/edu/berkeley/sbp/Parser.java +++ b/src/edu/berkeley/sbp/Parser.java @@ -22,7 +22,15 @@ public abstract class Parser { /** parse input for a exactly one unique result, throwing Ambiguous if not unique or Failed if none */ - public Tree parse1(Token.Stream input) throws IOException, Failed, Ambiguous { return parse(input).expand1(); } + public Tree parse1(Token.Stream input) throws IOException, Failed, Ambiguous { + Forest ret = parse(input); + try { return ret.expand1(); } + catch (Ambiguous a) { + System.out.println("while expanding:"); + System.out.println(ret); + throw a; + } + } /** parse input, using the table pt to drive the parser */ public Forest parse(Token.Stream input) throws IOException, Failed { @@ -139,6 +147,27 @@ public abstract class Parser { /** a single state in the LR table and the transitions possible from it */ public class State implements Comparable, Iterable { + /* + public boolean isResolvable(Token t) { + boolean found = false; + for(Reduction r : getReductions(t)) { + Position p = r.position; + if (!p.isRightNullable(cache)) continue; + if (p.owner().firstp()==p) continue; + if (found) { + // found two items meeting criteria #1 + return false; + } else { + found = true; + continue; + } + if (p.element()==null) continue; + Topology first = new Walk.First(top(), cache).walk(p.element()); + if (first.contains(t)) + } + } + */ + public final int idx = master_state_idx++; private final HashSet hs; @@ -265,7 +294,7 @@ public abstract class Parser { public class Reduction { // FIXME: cleanup; almost everything in here could go in either Sequence.Position.getRewrite() or else in GSS.Reduct public final int numPop; - private final Position position; + /*private*/ final Position position; private final Forest[] holder; // to avoid constant reallocation public int hashCode() { return position.hashCode(); } public boolean equals(Object o) { @@ -289,6 +318,13 @@ public abstract class Parser { return reduce(parent, numPop-1, rex, onlychild, target); } + private Forest zero = null; + public Forest zero() { + if (zero != null) return zero; + if (numPop > 0) throw new Error(); + return zero = position.rewrite(null); + } + // FIXME: this could be more elegant and/or cleaner and/or somewhere else private Forest reduce(GSS.Phase.Node parent, int pos, Forest rex, GSS.Phase.Node onlychild, GSS.Phase target) { if (pos>=0) holder[pos] = parent.pending(); diff --git a/src/edu/berkeley/sbp/misc/MetaGrammar.java b/src/edu/berkeley/sbp/misc/MetaGrammar.java index 74c352f..73e1732 100644 --- a/src/edu/berkeley/sbp/misc/MetaGrammar.java +++ b/src/edu/berkeley/sbp/misc/MetaGrammar.java @@ -113,7 +113,7 @@ public class MetaGrammar extends StringWalker { else if ("literal".equals(head)) { Element ret = string(string(tree.child(0))); dropAll.add(ret); return ret; } else if ("-".equals(head)) return new Range(walk(tree, 0).toString().charAt(0), walk(tree,1).toString().charAt(0)); else if ("range".equals(head)) return new Range(walk(tree, 0).toString().charAt(0), walk(tree,0).toString().charAt(0)); - else if ("gram".equals(head)) return walk(tree, 1); + else if ("gram".equals(head)) return walk(tree, 0); else if ("=>".equals(head)) { PreSequence p = (PreSequence)walk(tree, 0); p.tag = string(tree.child(1)); return p; } else if ("psy".equals(head)) return (PreSequence)walk(tree, 0); else if ("psyl".equals(head)) throw new Error("not supported"); @@ -334,12 +334,13 @@ public class MetaGrammar extends StringWalker { + + + + // DO NOT EDIT STUFF BELOW: IT IS AUTOMATICALLY GENERATED -new edu.berkeley.sbp.Tree(null, "gram", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { }), - new edu.berkeley.sbp.Tree(null, "grammar", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "::=", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { })}), - new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "=>", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "psy", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "ps", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "w", new edu.berkeley.sbp.Tree[] { }), - new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { })})}), - new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "g", new edu.berkeley.sbp.Tree[] { }), +new edu.berkeley.sbp.Tree(null, "gram", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "grammar", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "::=", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { })}), + new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "=>", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "psy", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "ps", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "g", new edu.berkeley.sbp.Tree[] { }), new edu.berkeley.sbp.Tree(null, "r", new edu.berkeley.sbp.Tree[] { }), new edu.berkeley.sbp.Tree(null, "a", new edu.berkeley.sbp.Tree[] { }), new edu.berkeley.sbp.Tree(null, "m", new edu.berkeley.sbp.Tree[] { }), @@ -923,3 +924,7 @@ new edu.berkeley.sbp.Tree(null, "gram", new edu.berkeley.sbp.Tree[] { new edu.be + + + + diff --git a/src/edu/berkeley/sbp/tib/TibDoc.java b/src/edu/berkeley/sbp/tib/TibDoc.java index f519729..347f59e 100644 --- a/src/edu/berkeley/sbp/tib/TibDoc.java +++ b/src/edu/berkeley/sbp/tib/TibDoc.java @@ -21,7 +21,19 @@ public class TibDoc { System.out.println("\nparsing " + s[1]); res = new CharToken.CharToStringParser(mg).parse1(new Tib(new FileInputStream(s[1]))); - System.out.println(res); + System.out.println(((Tree)walk(res)).toString(0, 0, 120)); + } + + public static Tree walk(Tree tree) { + String head = tree.head(); + if ("stringify".equals(head)) { + String ret = ""; + for(Tree t : tree.child(0)) ret += t; + return new Tree(null, ret); + } + Tree[] children = new Tree[tree.numChildren()]; + for(int i=0; i(null, head, children); } diff --git a/src/edu/berkeley/sbp/util/PrintableTree.java b/src/edu/berkeley/sbp/util/PrintableTree.java index a50aff9..c805cab 100644 --- a/src/edu/berkeley/sbp/util/PrintableTree.java +++ b/src/edu/berkeley/sbp/util/PrintableTree.java @@ -1,16 +1,79 @@ -package edu.berkeley.sbp; -import edu.berkeley.sbp.*; +package edu.berkeley.sbp.util; import edu.berkeley.sbp.*; import edu.berkeley.sbp.util.*; import java.io.*; import java.util.*; import java.lang.reflect.*; -public abstract class PrintableTree implements Iterable { +public abstract class PrintableTree implements Iterable, ToJava { protected abstract String headToString(); protected abstract String headToJava(); + private boolean empty() { + for(T t : this) return false; + return true; + } + + private static final int MAXDEPTH=3; + public int depth() { + int depth = headToString()==null ? 0 : 1; + int ret = depth; + for(T t : this) ret = Math.max(ret, depth+t.depth()); + return ret; + } + public String toString(int indent0, int cur, int limit) { + int indent = indent0; + String s = toString(); + if (depth()indent) { + ret.append('\n'); for(int i=0; i 0) ret.append("\n "); + } + ret.append(s); + if (s.indexOf('\n')!=-1) + cur = s.length()-s.lastIndexOf('\n'); + else + cur += s.length(); + if (cur>indent) { + ret.append(' '); + cur += s.length()+1; + } + } + if (head==null) { + ret.append("}"); + } else if (cur>indent) { + /* + indent = indent0; + ret.append('\n'); for(int i=0; iV (roughly, infinite sets of V's equipped with union/intersection/complement) */ public interface Topology { diff --git a/tests/meta.g b/tests/meta.g index 884f2aa..98a62a3 100644 --- a/tests/meta.g +++ b/tests/meta.g @@ -1,4 +1,4 @@ -s ::= ws grammar ws => "gram" +s ::= grammar ws => "gram" ws !::= w** | w** "//" (~[\n]*) "\n" ws wp !::= w++ grammar ::= r +/ ws => "grammar" diff --git a/tests/regression.tc b/tests/regression.tc index 863b62e..a713405 100644 --- a/tests/regression.tc +++ b/tests/regression.tc @@ -107,11 +107,12 @@ testcase { } testcase { - input "xbambambam"; + input "qxbambambam"; output "bam:{a bam:{a bam:{a x:{x}}}}"; - s ::= a s ^"bam" - s ::= ^"x" + s ::= "q" z + z ::= a z ^"bam" + z ::= ^"x" a ::= () => "a" } diff --git a/tests/tibdoc.g b/tests/tibdoc.g index 1ea798d..9cff556 100644 --- a/tests/tibdoc.g +++ b/tests/tibdoc.g @@ -46,7 +46,7 @@ SectionHeaderBody ::= "=" SectionHeaderBody "=" kv ::= word "=" text /ws => kv1 -num !::= [0-9]++ +num !::= [0-9]++ => "stringify" Paragraph ::= { "\"\"" ws text } => "blockquote" > { "*" " " ws text } => "ul" > { "#" " " ws text } => "ol" @@ -61,7 +61,7 @@ item ::= pre > structured > styled > "\"" text "\"" => quoted - > [a-zA-Z0-9]++ + > alphanum++ => "stringify" > symbol symbol ::= symbolx & sym++ @@ -88,8 +88,8 @@ glyph ::= "(r)" | "(c)" | "(tm)" // euro symbol? // only gets parsed once urlpath ::= urlchar* -username ::= [a-zA-Z0-9;/?:&=$\-_.+]++ -password ::= [a-zA-Z0-9;/?:&=$\-_.+]++ +username ::= [a-zA-Z0-9;/?:&=$\-_.+]++ => "stringify" +password ::= [a-zA-Z0-9;/?:&=$\-_.+]++ => "stringify" urlchar ::= [a-zA-Z0-9;/?:&=$\-_.+@] | "%" [0-9] [0-9] => "%" url ::= "mailto" ":" email @@ -99,7 +99,7 @@ method ::= [+\-.a-z0-9]+ port ::= [0-9]+ domain ::= part +/ "." -part ::= [a-zA-Z0-9\-]++ // interesting use of boolean grammars +part ::= [a-zA-Z0-9\-]++ => "stringify" // interesting use of boolean grammars // &~ ([\-0-9] ~[]* | ~[]* [\-0-9]) email ::= username "@" host => email @@ -110,7 +110,7 @@ host ::= [0-9]+ "." [0-9]+ "." [0-9]+ "." [0-9]+ => "ip" // Tokens /////////////////////////////////////////////////////////////////// -word ::= alphanum++ +word ::= alphanum++ => "stringify" | quoted quoted ::= "\"" ((~[\"\\] | escaped)+) "\"" -- 1.7.10.4