package edu.berkeley.sbp;
import edu.berkeley.sbp.util.*;
-import edu.berkeley.sbp.Sequence.Position;
-import edu.berkeley.sbp.Sequence.Position;
+import edu.berkeley.sbp.Sequence.Pos;
+import edu.berkeley.sbp.Sequence.Pos;
import java.io.*;
import java.util.*;
/** a parser which translates an Input<Token> into a Forest<NodeType> */
-public abstract class Parser<Token, NodeType> {
+public abstract class Parser<Token, NodeType> implements Serializable {
final Table pt;
public abstract Topology<Token> emptyTopology();
public String toString() { return pt.toString(); }
- Grammar cache() { return pt; }
/** parse <tt>input</tt>, and return the shared packed parse forest (or throw an exception) */
public Forest<NodeType> parse(Input<Token> input) throws IOException, ParseFailed {
+ long start = System.currentTimeMillis();
verbose = System.getProperty("sbp.verbose", null) != null;
spinpos = 0;
GSS gss = new GSS(input, this);
+ int idmax = 0;
+ int[][] count = new int[1024*1024][];
+ HashMap<Pos,Integer> ids = new HashMap<Pos,Integer>();
try {
for(GSS.Phase current = gss.new Phase<Token>(pt.start); ;) {
if (verbose) debug(current.token, gss, input);
if (current.isDone()) return (Forest<NodeType>)current.finalResult;
Input.Region region = current.getLocation().createRegion(current.getNextLocation());
Forest forest = shiftToken((Token)current.token, region);
+ /*
+ int maxid = 0;
+ for(Reduction r : gss.finishedReductions)
+ if (ids.get(r.reduction())==null)
+ ids.put(r.reduction(), idmax++);
+ count[current.pos] = new int[idmax];
+ for(Reduction r : gss.finishedReductions)
+ count[current.pos][ids.get(r.reduction())]++;
+ */
current = gss.new Phase<Token>(current, forest);
}
} finally {
if (verbose) {
- System.err.print("\r"+ANSI.clreol());
+ long time = System.currentTimeMillis() - start;
+ System.err.println("\r parse time: " + time +"ms "+ ANSI.clreol());
debug(null, gss, input);
}
+ /*
+ PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.plot")));
+ boolean[] use = new boolean[idmax];
+ for(int i=0; i<count.length; i++)
+ if (count[i]!=null)
+ for(int j=0; j<count[i].length; j++)
+ if (count[i][j]>20)
+ use[j] = true;
+ for(int i=0; i<count.length; i++)
+ if (count[i]!=null) {
+ int row = 0;
+ for(int j=0; j<use.length; j++)
+ if (use[j]) {
+ row++;
+ pw.println(i+", "+row+", "+(j>=count[i].length ? 0 : count[i][j]));
+ }
+ pw.println();
+ }
+ pw.close();
+ pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("test.plot")));
+ pw.println("set terminal postscript enhanced color");
+ pw.println("set output \"out.ps\"");
+ pw.println("set pm3d map");
+ pw.println("set autoscale");
+ pw.println("set view 0,0");
+ pw.println("set ytics (\\");
+ int q = -1;
+ for(int j=0; j<use.length; j++)
+ if (use[j]) {
+ q++;
+ for(Pos p : ids.keySet())
+ if (ids.get(p) == j) {
+ String title = p.toString();
+ System.out.println(q + " " + title);
+ pw.println("\""+q+"\" "+(((double)q)+0.5)+",\\");
+ break;
+ }
+ }
+ pw.println("\".\" "+(q+1)+")");
+ pw.println("set size square");
+ pw.println("splot \"out.plot\"");
+ pw.close();
+ */
}
}
// Table //////////////////////////////////////////////////////////////////////////////
/** an SLR(1) parse table which may contain conflicts */
- class Table extends Grammar<Token> implements Serializable {
+ class Table implements Serializable {
/** the start state */
final State<Token> start;
private int master_state_idx = 0;
/** all the states for this table */
- private transient HashSet<State<Token>> all_states = new HashSet<State<Token>>();
+ private transient HashSet<State<Token>> all_states = new HashSet<State<Token>>();
/** all the doomed states in this table */
- private transient HashMap<HashSet<Position>,State<Token>> doomed_states = new HashMap<HashSet<Position>,State<Token>>();
+ private transient HashMap<HashSet<Pos>,State<Token>> doomed_states = new HashMap<HashSet<Pos>,State<Token>>();
/** all the non-doomed states in this table */
- private transient HashMap<HashSet<Position>,State<Token>> normal_states = new HashMap<HashSet<Position>,State<Token>>();
+ private transient HashMap<HashSet<Pos>,State<Token>> normal_states = new HashMap<HashSet<Pos>,State<Token>>();
- Topology<Token> emptyTopology() { return Parser.this.emptyTopology(); }
-
/** construct a parse table for the given grammar */
Table(Union ux) {
- super(new Union("0", Sequence.create(ux), true));
+ Union rootUnion = new Union("0", Sequence.create(ux), true);
+ Grammar<Token> grammar = new Grammar<Token>(rootUnion) {
+ public Topology<Token> emptyTopology() { return Parser.this.emptyTopology(); }
+ };
// create the "dead state"
- this.dead_state = new State<Token>(new HashSet<Position>(), true);
+ this.dead_state = new State<Token>(new HashSet<Pos>(), true, grammar);
// construct the start state; this will recursively create *all* the states
- this.start = new State<Token>(reachable(rootUnion), false);
+ this.start = new State<Token>(reachable(rootUnion), false, grammar);
- buildReductions();
- sortReductions();
+ buildReductions(grammar);
+ sortReductions(grammar);
}
/** fill in the reductions table */
- private void buildReductions() {
+ private void buildReductions(Grammar<Token> grammar) {
// for each state, fill in the corresponding "row" of the parse table
for(State<Token> state : all_states)
- for(Position p : state.hs) {
+ for(Pos p : state.hs) {
// if the element following this position is an atom, copy the corresponding
// set of rows out of the "master" goto table and into this state's shift table
// RNGLR: we can potentially reduce from any "right-nullable" position -- that is,
// any position for which all Elements after it in the Sequence are capable of
// matching the empty string.
- if (!isRightNullable(p)) continue;
- Topology<Token> follow = follow(p.owner());
- for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) {
+ if (!grammar.isRightNullable(p)) continue;
+ Topology<Token> follow = grammar.follow(p.owner());
+ for(Pos p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) {
if (!(p2.element() instanceof Union))
throw new Error("impossible -- only Unions can be nullable");
// not just the follow-set of the last non-nullable element, but the
// follow-sets of the nulled elements as well.
for(Sequence s : ((Union)p2.element()))
- follow = follow.intersect(follow(s));
- Topology<Token> set = epsilonFollowSet((Union)p2.element());
+ follow = follow.intersect(grammar.follow(s));
+ Topology<Token> set = grammar.epsilonFollowSet((Union)p2.element());
if (set != null) follow = follow.intersect(set);
}
// indicate that when the next token is in the set "follow", nodes in this
- // state should reduce according to Position "p"
+ // state should reduce according to Pos "p"
state.reductions.put(follow, p);
- if (followEof.contains(p.owner())) state.eofReductions.add(p);
+ if (grammar.followEof.contains(p.owner())) state.eofReductions.add(p);
}
// optimize the reductions table
}
// FIXME: this method needs to be cleaned up and documented
- private void sortReductions() {
+ private void sortReductions(Grammar<Token> grammar) {
// crude algorithm to assing an ordinal ordering to every position
// al will be sorted in DECREASING order (al[0] >= al[1])
- ArrayList<Sequence.Position> al = new ArrayList<Sequence.Position>();
+ ArrayList<Sequence.Pos> al = new ArrayList<Sequence.Pos>();
for(State s : all_states) {
for(Object po : s.positions()) {
- Sequence.Position p = (Sequence.Position)po;
+ Sequence.Pos p = (Sequence.Pos)po;
if (al.contains(p)) continue;
int i=0;
for(; i<al.size(); i++) {
- if (comparePositions(p, al.get(i)) < 0)
+ if (grammar.comparePositions(p, al.get(i)) < 0)
break;
}
al.add(i, p);
OUTER: while(true) {
for(int i=0; i<al.size(); i++)
for(int j=i+1; j<al.size(); j++)
- if (comparePositions(al.get(i), al.get(j)) > 0) {
- Sequence.Position p = al.remove(j);
+ if (grammar.comparePositions(al.get(i), al.get(j)) > 0) {
+ Sequence.Pos p = al.remove(j);
al.add(i, p);
continue OUTER;
}
for(int i=0; i<al.size(); i++) {
boolean inc = false;
for(int k=pk; k<i; k++) {
- if (comparePositions(al.get(k), al.get(i)) > 0)
+ if (grammar.comparePositions(al.get(k), al.get(i)) > 0)
{ inc = true; break; }
}
inc = true;
* A single state in the LR table and the transitions
* possible from it
*
- * A state corresponds to a set of Sequence.Position's. Each
- * Node in the GSS has a State; the Node represents a set of
- * possible parses, one for each Position in the State.
+ * A state corresponds to a set of Sequence.Pos's. Each
+ * StateNode in the GSS has a State; the StateNode represents a set of
+ * possible parses, one for each Pos in the State.
*
- * Every state is either "doomed" or "normal". If a Position
+ * Every state is either "doomed" or "normal". If a Pos
* is part of a Sequence which is a conjunct (that is, it was
- * passed to Sequence.{and(),andnot()}), then that Position
+ * passed to Sequence.{and(),andnot()}), then that Pos
* will appear only in doomed States. Furthermore, any set
* of Positions reachable from a doomed State also forms a
* doomed State. Note that in this latter case, a doomed
* Nodes with non-doomed states represent nodes which
* contribute to actual valid parses. Nodes with doomed
* States exist for no other purpose than to enable/disable
- * some future reduction from a non-doomed Node. Because of
+ * some future reduction from a non-doomed StateNode. Because of
* this, we "garbage-collect" Nodes with doomed states if
* there are no more non-doomed Nodes which they could
- * affect (see Result, Reduction, and Node for details).
+ * affect (see ResultNode, Reduction, and StateNode for details).
*
* Without this optimization, many seemingly-innocuous uses
* of positive and negative conjuncts can trigger O(n^2)
class State<Token> implements IntegerMappable, Serializable {
public final int idx = master_state_idx++;
- private final transient HashSet<Position> hs;
+ private final transient HashSet<Pos> hs;
public HashSet<State<Token>> conjunctStates = new HashSet<State<Token>>();
HashMap<Pos,State<Token>> gotoSetNonTerminals = new HashMap<Pos,State<Token>>();
private transient TopologicalBag<Token,State<Token>> gotoSetTerminals = new TopologicalBag<Token,State<Token>>();
- private TopologicalBag<Token,Pos> reductions = new TopologicalBag<Token,Pos>();
- private HashSet<Pos> eofReductions = new HashSet<Pos>();
+ TopologicalBag<Token,Pos> reductions = new TopologicalBag<Token,Pos>();
+ HashSet<Pos> eofReductions = new HashSet<Pos>();
private TopologicalBag<Token,State<Token>> shifts = new TopologicalBag<Token,State<Token>>();
private boolean accept = false;
private VisitableMap<Token,State<Token>> oshifts = null;
- private VisitableMap<Token,Position> oreductions = null;
+ private VisitableMap<Token,Pos> oreductions = null;
public final boolean doomed;
// Interface Methods //////////////////////////////////////////////////////////////////////////////
public boolean doomed() { return doomed; }
boolean isAccepting() { return accept; }
- Iterable<Position> positions() { return hs; }
+ Iterable<Pos> positions() { return hs; }
boolean canShift(Token t) { return oshifts!=null && oshifts.contains(t); }
- void invokeShifts(Token t, GSS.Phase phase, Result r) { oshifts.invoke(t, phase, r); }
+ void invokeShifts(Token t, GSS.Phase phase, StateNode pred, Forest f) { oshifts.invoke(t, phase, pred, f); }
boolean canReduce(Token t) {
return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); }
- void invokeEpsilonReductions(Token t, Node node) {
- if (t==null) for(Position r : eofReductions) node.invoke(r, null);
- else oreductions.invoke(t, node, null);
+ void invokeEpsilonReductions(Token t, StateNode node) {
+ if (t==null) for(Pos r : eofReductions) node.invoke(r, null, null);
+ else oreductions.invoke(t, node, null, null);
}
- void invokeReductions(Token t, Node node, Result b) {
- if (t==null) for(Position r : eofReductions) node.invoke(r, b);
- else oreductions.invoke(t, node, b);
+ void invokeReductions(Token t, StateNode node, ResultNode only) {
+ if (t==null) for(Pos r : eofReductions) node.invoke(r, only, null);
+ else oreductions.invoke(t, node, only, null);
}
// Constructor //////////////////////////////////////////////////////////////////////////////
/**
- * create a new state consisting of all the <tt>Position</tt>s in <tt>hs</tt>
- * @param hs the set of <tt>Position</tt>s comprising this <tt>State</tt>
+ * create a new state consisting of all the <tt>Pos</tt>s in <tt>hs</tt>
+ * @param hs the set of <tt>Pos</tt>s comprising this <tt>State</tt>
* @param all the set of all elements (Atom instances need not be included)
*
* In principle these two steps could be merged, but they
* for non-Atom Elements.
* </ul>
*/
- public State(HashSet<Position> hs, boolean doomed) {
+ public State(HashSet<Pos> hs, boolean doomed, Grammar<Token> grammar) {
this.hs = hs;
this.doomed = doomed;
((HashMap)(doomed ? doomed_states : normal_states)).put(hs, this);
((HashSet)all_states).add(this);
- for(Position p : hs) {
+ for(Pos p : hs) {
// Step 1a: take note if we are an accepting state
// (last position of the root Union's sequence)
- if (p.next()==null && !doomed && rootUnion.contains(p.owner()))
+ if (p.next()==null && !doomed && grammar.rootUnion.contains(p.owner()))
accept = true;
- // Step 1b: If any Position in the set is the first position of its sequence, then this
+ // Step 1b: If any Pos in the set is the first position of its sequence, then this
// state is responsible for spawning the "doomed" states for each of the
// Sequence's conjuncts. This obligation is recorded by adding the to-be-spawned
// states to conjunctStates.
if (!p.isFirst()) continue;
for(Sequence s : p.owner().needs())
if (!hs.contains(s.firstp()))
- conjunctStates.add(mkstate(reachable(s.firstp()), true));
+ conjunctStates.add(mkstate(reachable(s.firstp()), true, grammar));
for(Sequence s : p.owner().hates())
if (!hs.contains(s.firstp()))
- conjunctStates.add(mkstate(reachable(s.firstp()), true));
+ conjunctStates.add(mkstate(reachable(s.firstp()), true, grammar));
}
- // Step 2a: examine all Position's in this state and compute the mappings from
+ // Step 2a: examine all Pos's in this state and compute the mappings from
// sets of follow tokens (tokens which could follow this position) to sets
// of _new_ positions (positions after shifting). These mappings are
// collectively known as the _closure_
- TopologicalBag<Token,Position> bag0 = new TopologicalBag<Token,Position>();
- for(Position position : hs) {
+ TopologicalBag<Token,Pos> bag0 = new TopologicalBag<Token,Pos>();
+ for(Pos position : hs) {
if (position.isLast() || !(position.element() instanceof Atom)) continue;
Atom a = (Atom)position.element();
- HashSet<Position> hp = new HashSet<Position>();
+ HashSet<Pos> hp = new HashSet<Pos>();
reachable(position.next(), hp);
bag0.addAll(a.getTokenTopology(), hp);
}
// computed next-position set).
for(Topology<Token> r : bag0) {
- HashSet<Position> h = new HashSet<Position>();
- for(Position p : bag0.getAll(r)) h.add(p);
- ((TopologicalBag)gotoSetTerminals).put(r, mkstate(h, doomed));
+ HashSet<Pos> h = new HashSet<Pos>();
+ for(Pos p : bag0.getAll(r)) h.add(p);
+ ((TopologicalBag)gotoSetTerminals).put(r, mkstate(h, doomed, grammar));
}
// Step 3: for every Sequence, compute the closure over every position in this set which
// to avoid having to iteratively construct our set of States as shown in most
// expositions of the algorithm (ie "keep doing XYZ until things stop changing").
- HashMapBag<Sequence,Position> move = new HashMapBag<Sequence,Position>();
- for(Position p : hs)
+ HashMapBag<Sequence,Pos> move = new HashMapBag<Sequence,Pos>();
+ for(Pos p : hs)
if (!p.isLast() && p.element() instanceof Union)
for(Sequence s : ((Union)p.element())) {
- HashSet<Position> hp = new HashSet<Position>();
+ HashSet<Pos> hp = new HashSet<Pos>();
reachable(p.next(), hp);
move.addAll(s, hp);
}
OUTER: for(Sequence y : move) {
// if a reduction is "lame", it should wind up in the dead_state after reducing
- HashSet<Position> h = move.getAll(y);
- State<Token> s = mkstate(h, doomed);
- for(Position p : hs)
+ HashSet<Pos> h = move.getAll(y);
+ State<Token> s = mkstate(h, doomed, grammar);
+ for(Pos p : hs)
if (p.element() != null && (p.element() instanceof Union))
for(Sequence seq : ((Union)p.element()))
if (seq.needs.contains(y) || seq.hates.contains(y)) {
// FIXME: assumption that no sequence is ever both usefully (non-lamely) matched
// and also directly lamely matched
- for(Position pp = y.firstp(); pp != null; pp = pp.next())
+ for(Pos pp = y.firstp(); pp != null; pp = pp.next())
((HashMap)gotoSetNonTerminals).put(pp, dead_state);
continue OUTER;
}
- for(Position pp = y.firstp(); pp != null; pp = pp.next())
+ for(Pos pp = y.firstp(); pp != null; pp = pp.next())
gotoSetNonTerminals.put(pp, s);
}
}
- private State<Token> mkstate(HashSet<Position> h, boolean b) {
+ private State<Token> mkstate(HashSet<Pos> h, boolean b, Grammar<Token> grammar) {
State ret = (b?doomed_states:normal_states).get(h);
- if (ret==null) ret = new State<Token>(h,b);
+ if (ret==null) ret = new State<Token>(h,b, grammar);
return ret;
}
public int toInt() { return idx; }
public String toString() {
StringBuffer ret = new StringBuffer();
- for(Position p : hs)
+ for(Pos p : hs)
ret.append(p+"\n");
return ret.toString();
}
// Helpers //////////////////////////////////////////////////////////////////////////////
- private static HashSet<Position> reachable(Element e) {
- HashSet<Position> h = new HashSet<Position>();
+ private static HashSet<Pos> reachable(Element e) {
+ HashSet<Pos> h = new HashSet<Pos>();
reachable(e, h);
return h;
}
- private static void reachable(Element e, HashSet<Position> h) {
+ private static void reachable(Element e, HashSet<Pos> h) {
if (e instanceof Atom) return;
for(Sequence s : ((Union)e))
reachable(s.firstp(), h);
}
- private static void reachable(Position p, HashSet<Position> h) {
+ private static void reachable(Pos p, HashSet<Pos> h) {
if (h.contains(p)) return;
h.add(p);
if (p.element() != null) reachable(p.element(), h);
}
- private static HashSet<Position> reachable(Position p) {
- HashSet<Position> ret = new HashSet<Position>();
+ private static HashSet<Pos> reachable(Pos p) {
+ HashSet<Pos> ret = new HashSet<Pos>();
reachable(p, ret);
return ret;
}