public String toString() {
if (toString != null) return toString;
StringBuffer ret = new StringBuffer();
+ if (results.size()==1) {
+ for(Forest.Body<T> r : results)
+ ret.append(r);
+ return toString = ret.toString();
+ }
ret.append("<?");
boolean first = true;
for(Forest.Body<T> r : results) {
/** all reductions (pending and completed) */
private HashSet<Phase.Reduct> reductions = new HashSet<Phase.Reduct>(); /* ALLOC */
-
+
/** all nodes, keyed by the value returned by code() */
private HashMap<Long,Phase.Node> hash = new HashMap<Long,Phase.Node>(); /* ALLOC */
/** the number of pending reductions */
private int pendingReductions = 0;
private int totalReductions = 0;
- private HashSet<Reduct> pendingReduct = new HashSet<Reduct>();
+ //private HashSet<Reduct> pendingReduct = new HashSet<Reduct>();
+ private LinkedList<Reduct> pendingReduct = new LinkedList<Reduct>();
/** the number of nodes in this phase */
private int numNodes = 0;
int count = 0;
Parser.Table.Reduction r = null;
for(Parser.Table.Reduction red : token==null ? state.getEofReductions() : state.getReductions(token)) { r = red; count++; }
- //if (count==0) return; -- BEWARE! this optimization is suspected to cause really nasty heisenbugs
- if (count > 1) break;
+ //if (count==0) return; // BEWARE! this optimization is suspected to cause really nasty heisenbugs
+ //if (count > 1) break;
//if (r.numPop == 0) break;
//r.reduce(pending, parent, null, Phase.this, null);
//return;
/** perform all reduction operations */
public void reduce() {
- for(Phase.Node n : hash.values()) {
+ HashSet<Phase.Node> s = new HashSet<Phase.Node>();
+ s.addAll(hash.values());
+ for(Phase.Node n : s) {
n.queueEmptyReductions();
n.queueReductions();
}
while(pendingReduct.size()>0)
- pendingReduct.iterator().next().go();
+ //pendingReduct.iterator().next().go();
+ pendingReduct.removeFirst().go();
}
/** perform all shift operations, adding promoted nodes to <tt>next</tt> */
// GSS Nodes //////////////////////////////////////////////////////////////////////////////
- private HashMap<Parser.Table.Reduction,Forest> pcache = new HashMap<Parser.Table.Reduction,Forest>();
+ //private HashMap<Parser.Table.Reduction,Forest> pcache = new HashMap<Parser.Table.Reduction,Forest>();
/** a node in the GSS */
- public class Node {
+ public final class Node {
private Forest.Ref holder = null;
/** the set of nodes to which there is an edge starting at this node */
public final FastSet<Node> parents = new FastSet<Node>(); /* ALLOC */
-
/** what state this node is in */
public final Parser.Table.State state;
/** which Phase this Node belongs to (node that Node is also a non-static inner class of Phase) */
public final Phase phase = Phase.this;
- public HashMap<Parser.Table.Reduction,Forest> cache() { return cache==null ? (cache = new HashMap<Parser.Table.Reduction,Forest>()) : cache; }
+ public HashMap<Parser.Table.Reduction,Forest> cache() {
+ return cache==null ? (cache = new HashMap<Parser.Table.Reduction,Forest>()) : cache; }
public Forest.Ref holder() { return holder==null ? (holder = new Forest.Ref()) : holder; }
public Forest pending() { return Phase.this.closed ? holder().resolve() : holder; }
public FastSet<Node> parents() { return parents; }
/** FIXME */
public void queueReductions(Node n2) {
- new Reduct(this, n2, null);
- for(Parser.Table.Reduction r : token==null ? state.getEofReductions() : state.getReductions(token)) {
-
- // currently we have this weird problem where we
- // have to do an individual reduct for each child
- // when the reduction length is one (ie the
- // children wind up being children of the newly
- // created node rather than part of the popped
- // sequence
-
- if (r.numPop == 1) new Reduct(this, n2, r);
- }
+ newReduct(this, n2, null);
}
public void queueEmptyReductions() {
for(Parser.Table.Reduction r : token==null ? state.getEofReductions() : state.getReductions(token)) {
if (r.numPop==0)
- new Reduct(this, null, r); /* ALLOC */
+ newReduct(this, null, r); /* ALLOC */
}
}
}
}
+ public void newReduct(Node n, Node n2, Parser.Table.Reduction r) {
+ new Reduct(n, n2, r)/*.go()*/;
+ }
// Forest / Completed Reductions //////////////////////////////////////////////////////////////////////////////
this.r = r;
if (reductions.contains(this)) { done = true; return; }
reductions.add(this);
- pendingReduct.add(this);
+ pendingReduct.addFirst(this);
pendingReductions++;
}
pendingReduct.remove(this);
pendingReductions--;
+ if (r==null)
+ for(Parser.Table.Reduction r : token==null ? n.state.getEofReductions() : n.state.getReductions(token)) {
+
+ // currently we have this weird problem where we
+ // have to do an individual reduct for each child
+ // when the reduction length is one (ie the
+ // children wind up being children of the newly
+ // created node rather than part of the popped
+ // sequence
+
+ if (r.numPop == 1) new Reduct(n, n2, r).go();
+ }
+
+
// FIXME: explain this
if (r==null) {
for(Parser.Table.Reduction r : token==null ? n.state.getEofReductions() : n.state.getReductions(token)) {
if (r.numPop <= 1) continue;
r.reduce(n, n2, Phase.this, null);
}
- } else if (r.numPop<=1) {
+ } else if (r.numPop==0) { r.reduce(n, n2, n.phase, r.zero());
+ } else if (r.numPop==1) {
// UGLY HACK
// The problem here is that a "reduction of length 0/1"
// performed twice with different values of n2 needs
// cache instances here as a way of avoiding
// recreating them.
- Forest ret = (r.numPop==0 ? pcache : n.cache()).get(r);
+ Forest ret = n.cache().get(r);
if (ret != null) r.reduce(n, n2, n.phase, ret);
- else (r.numPop==0 ? pcache : n.cache()).put(r, r.reduce(n, n2, n.phase, null));
+ else n.cache().put(r, r.reduce(n, n2, n.phase, null));
} else {
r.reduce(n, n2, Phase.this, null);
/** parse <tt>input</tt> for a exactly one unique result, throwing <tt>Ambiguous</tt> if not unique or <tt>Failed</tt> if none */
- public Tree<R> parse1(Token.Stream<T> input) throws IOException, Failed, Ambiguous { return parse(input).expand1(); }
+ public Tree<R> parse1(Token.Stream<T> input) throws IOException, Failed, Ambiguous {
+ Forest<R> ret = parse(input);
+ try { return ret.expand1(); }
+ catch (Ambiguous a) {
+ System.out.println("while expanding:");
+ System.out.println(ret);
+ throw a;
+ }
+ }
/** parse <tt>input</tt>, using the table <tt>pt</tt> to drive the parser */
public Forest<R> parse(Token.Stream<T> input) throws IOException, Failed {
/** a single state in the LR table and the transitions possible from it */
public class State implements Comparable<Table.State>, Iterable<Position> {
+ /*
+ public boolean isResolvable(Token t) {
+ boolean found = false;
+ for(Reduction r : getReductions(t)) {
+ Position p = r.position;
+ if (!p.isRightNullable(cache)) continue;
+ if (p.owner().firstp()==p) continue;
+ if (found) {
+ // found two items meeting criteria #1
+ return false;
+ } else {
+ found = true;
+ continue;
+ }
+ if (p.element()==null) continue;
+ Topology first = new Walk.First(top(), cache).walk(p.element());
+ if (first.contains(t))
+ }
+ }
+ */
+
public final int idx = master_state_idx++;
private final HashSet<Position> hs;
public class Reduction {
// FIXME: cleanup; almost everything in here could go in either Sequence.Position.getRewrite() or else in GSS.Reduct
public final int numPop;
- private final Position position;
+ /*private*/ final Position position;
private final Forest[] holder; // to avoid constant reallocation
public int hashCode() { return position.hashCode(); }
public boolean equals(Object o) {
return reduce(parent, numPop-1, rex, onlychild, target);
}
+ private Forest zero = null;
+ public Forest zero() {
+ if (zero != null) return zero;
+ if (numPop > 0) throw new Error();
+ return zero = position.rewrite(null);
+ }
+
// FIXME: this could be more elegant and/or cleaner and/or somewhere else
private Forest reduce(GSS.Phase.Node parent, int pos, Forest rex, GSS.Phase.Node onlychild, GSS.Phase target) {
if (pos>=0) holder[pos] = parent.pending();
else if ("literal".equals(head)) { Element ret = string(string(tree.child(0))); dropAll.add(ret); return ret; }
else if ("-".equals(head)) return new Range(walk(tree, 0).toString().charAt(0), walk(tree,1).toString().charAt(0));
else if ("range".equals(head)) return new Range(walk(tree, 0).toString().charAt(0), walk(tree,0).toString().charAt(0));
- else if ("gram".equals(head)) return walk(tree, 1);
+ else if ("gram".equals(head)) return walk(tree, 0);
else if ("=>".equals(head)) { PreSequence p = (PreSequence)walk(tree, 0); p.tag = string(tree.child(1)); return p; }
else if ("psy".equals(head)) return (PreSequence)walk(tree, 0);
else if ("psyl".equals(head)) throw new Error("not supported");
+
+
+
+
// DO NOT EDIT STUFF BELOW: IT IS AUTOMATICALLY GENERATED
-new edu.berkeley.sbp.Tree(null, "gram", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { }),
- new edu.berkeley.sbp.Tree(null, "grammar", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "::=", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { })}),
- new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "=>", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "psy", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "ps", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "w", new edu.berkeley.sbp.Tree[] { }),
- new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { })})}),
- new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "g", new edu.berkeley.sbp.Tree[] { }),
+new edu.berkeley.sbp.Tree(null, "gram", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "grammar", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "::=", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "s", new edu.berkeley.sbp.Tree[] { })}),
+ new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "=>", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "psy", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "ps", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "nonTerminal", new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, null, new edu.berkeley.sbp.Tree[] { new edu.berkeley.sbp.Tree(null, "g", new edu.berkeley.sbp.Tree[] { }),
new edu.berkeley.sbp.Tree(null, "r", new edu.berkeley.sbp.Tree[] { }),
new edu.berkeley.sbp.Tree(null, "a", new edu.berkeley.sbp.Tree[] { }),
new edu.berkeley.sbp.Tree(null, "m", new edu.berkeley.sbp.Tree[] { }),
+
+
+
+
System.out.println("\nparsing " + s[1]);
res = new CharToken.CharToStringParser(mg).parse1(new Tib(new FileInputStream(s[1])));
- System.out.println(res);
+ System.out.println(((Tree)walk(res)).toString(0, 0, 120));
+ }
+
+ public static Tree<String> walk(Tree<String> tree) {
+ String head = tree.head();
+ if ("stringify".equals(head)) {
+ String ret = "";
+ for(Tree<String> t : tree.child(0)) ret += t;
+ return new Tree<String>(null, ret);
+ }
+ Tree<String>[] children = new Tree[tree.numChildren()];
+ for(int i=0; i<children.length; i++) children[i] = walk(tree.child(i));
+ return new Tree<String>(null, head, children);
}
-package edu.berkeley.sbp;
-import edu.berkeley.sbp.*;
+package edu.berkeley.sbp.util;
import edu.berkeley.sbp.*;
import edu.berkeley.sbp.util.*;
import java.io.*;
import java.util.*;
import java.lang.reflect.*;
-public abstract class PrintableTree<T extends PrintableTree> implements Iterable<T> {
+public abstract class PrintableTree<T extends PrintableTree> implements Iterable<T>, ToJava {
protected abstract String headToString();
protected abstract String headToJava();
+ private boolean empty() {
+ for(T t : this) return false;
+ return true;
+ }
+
+ private static final int MAXDEPTH=3;
+ public int depth() {
+ int depth = headToString()==null ? 0 : 1;
+ int ret = depth;
+ for(T t : this) ret = Math.max(ret, depth+t.depth());
+ return ret;
+ }
+ public String toString(int indent0, int cur, int limit) {
+ int indent = indent0;
+ String s = toString();
+ if (depth()<MAXDEPTH && s.length() < limit-cur) return s;
+ StringBuffer ret = new StringBuffer();
+
+ if (cur>indent) {
+ ret.append('\n'); for(int i=0; i<indent; i++) ret.append(' ');
+ cur = indent;
+ }
+
+ String head = headToString();
+ if (head==null) {
+ ret.append("{");
+ } else {
+ ret.append(head);
+ if (empty()) return ret.toString();
+ ret.append(":");
+ indent += 2;
+ ret.append('\n'); for(int i=0; i<indent; i++) ret.append(' ');
+ cur = indent;
+ }
+
+ for(T t : this) {
+ s = t.toString(indent+2, cur, limit);
+ while(s.indexOf('\n') != -1) {
+ ret.append(s.substring(0, s.indexOf('\n')));
+ s = s.substring(s.indexOf('\n')+1);
+ if (s.length() > 0) ret.append("\n ");
+ }
+ ret.append(s);
+ if (s.indexOf('\n')!=-1)
+ cur = s.length()-s.lastIndexOf('\n');
+ else
+ cur += s.length();
+ if (cur>indent) {
+ ret.append(' ');
+ cur += s.length()+1;
+ }
+ }
+ if (head==null) {
+ ret.append("}");
+ } else if (cur>indent) {
+ /*
+ indent = indent0;
+ ret.append('\n'); for(int i=0; i<indent; i++) ret.append(' ');
+ cur = indent;
+ */
+ }
+ return ret.toString();
+ }
+
public String toString() {
StringBuffer ret = new StringBuffer();
for(T t : this) {
--- /dev/null
+package edu.berkeley.sbp.util;
+import edu.berkeley.sbp.*;
+import edu.berkeley.sbp.util.*;
+import java.io.*;
+import java.util.*;
+import java.lang.reflect.*;
+
+public interface ToJava {
+ public void toJava(StringBuffer sb);
+}
import java.lang.reflect.*;
import java.lang.ref.*;
-// FIXME: this should be a value class -- add/remove/etc should return new Topology objects
/** values inhabiting a topology over <tt>V</tt> (roughly, infinite sets of <tt>V</tt>'s equipped with union/intersection/complement) */
public interface Topology<V> {
-s ::= ws grammar ws => "gram"
+s ::= grammar ws => "gram"
ws !::= w** | w** "//" (~[\n]*) "\n" ws
wp !::= w++
grammar ::= r +/ ws => "grammar"
}
testcase {
- input "xbambambam";
+ input "qxbambambam";
output "bam:{a bam:{a bam:{a x:{x}}}}";
- s ::= a s ^"bam"
- s ::= ^"x"
+ s ::= "q" z
+ z ::= a z ^"bam"
+ z ::= ^"x"
a ::= () => "a"
}
kv ::= word "=" text /ws => kv1
-num !::= [0-9]++
+num !::= [0-9]++ => "stringify"
Paragraph ::= { "\"\"" ws text } => "blockquote"
> { "*" " " ws text } => "ul"
> { "#" " " ws text } => "ol"
> structured
> styled
> "\"" text "\"" => quoted
- > [a-zA-Z0-9]++
+ > alphanum++ => "stringify"
> symbol
symbol ::= symbolx & sym++
// only gets parsed once
urlpath ::= urlchar*
-username ::= [a-zA-Z0-9;/?:&=$\-_.+]++
-password ::= [a-zA-Z0-9;/?:&=$\-_.+]++
+username ::= [a-zA-Z0-9;/?:&=$\-_.+]++ => "stringify"
+password ::= [a-zA-Z0-9;/?:&=$\-_.+]++ => "stringify"
urlchar ::= [a-zA-Z0-9;/?:&=$\-_.+@]
| "%" [0-9] [0-9] => "%"
url ::= "mailto" ":" email
port ::= [0-9]+
domain ::= part +/ "."
-part ::= [a-zA-Z0-9\-]++ // interesting use of boolean grammars
+part ::= [a-zA-Z0-9\-]++ => "stringify" // interesting use of boolean grammars
// &~ ([\-0-9] ~[]* | ~[]* [\-0-9])
email ::= username "@" host => email
// Tokens ///////////////////////////////////////////////////////////////////
-word ::= alphanum++
+word ::= alphanum++ => "stringify"
| quoted
quoted ::= "\"" ((~[\"\\] | escaped)+) "\""