-// Copyright 2006 all rights reserved; see LICENSE file for BSD-style license
+// Copyright 2006-2007 all rights reserved; see LICENSE file for BSD-style license
package edu.berkeley.sbp;
import edu.berkeley.sbp.util.*;
-import edu.berkeley.sbp.Sequence.Position;
+import edu.berkeley.sbp.Sequence.Pos;
+import edu.berkeley.sbp.Sequence.Pos;
import java.io.*;
import java.util.*;
/** a parser which translates an Input<Token> into a Forest<NodeType> */
-public abstract class Parser<Token, NodeType> {
+public abstract class Parser<Token, NodeType> implements Serializable {
final Table pt;
public abstract Topology<Token> emptyTopology();
public String toString() { return pt.toString(); }
- Grammar cache() { return pt; }
/** parse <tt>input</tt>, and return the shared packed parse forest (or throw an exception) */
public Forest<NodeType> parse(Input<Token> input) throws IOException, ParseFailed {
_last = c;
switch(c) {
case edu.berkeley.sbp.chr.CharAtom.left:
- buf += "\033[31m{\033[0m";
+ buf += "\033[31m>\033[0m";
break;
case edu.berkeley.sbp.chr.CharAtom.right:
- buf += "\033[31m}\033[0m";
+ buf += "\033[31m<\033[0m";
break;
case -1: // FIXME
case '\n':
// Table //////////////////////////////////////////////////////////////////////////////
/** an SLR(1) parse table which may contain conflicts */
- class Table extends Grammar<Token> {
+ class Table implements Serializable {
/** the start state */
final State<Token> start;
private int master_state_idx = 0;
/** all the states for this table */
- HashSet<State<Token>> all_states = new HashSet<State<Token>>();
+ private transient HashSet<State<Token>> all_states = new HashSet<State<Token>>();
/** all the doomed states in this table */
- HashMap<HashSet<Position>,State<Token>> doomed_states = new HashMap<HashSet<Position>,State<Token>>();
+ private transient HashMap<HashSet<Pos>,State<Token>> doomed_states = new HashMap<HashSet<Pos>,State<Token>>();
/** all the non-doomed states in this table */
- HashMap<HashSet<Position>,State<Token>> normal_states = new HashMap<HashSet<Position>,State<Token>>();
+ private transient HashMap<HashSet<Pos>,State<Token>> normal_states = new HashMap<HashSet<Pos>,State<Token>>();
- Topology<Token> emptyTopology() { return Parser.this.emptyTopology(); }
-
/** construct a parse table for the given grammar */
Table(Union ux) {
- super(new Union("0", Sequence.create(ux), true));
+ Union rootUnion = new Union("0", Sequence.create(ux), true);
+ Grammar<Token> grammar = new Grammar<Token>(rootUnion) {
+ public Topology<Token> emptyTopology() { return Parser.this.emptyTopology(); }
+ };
// create the "dead state"
- this.dead_state = new State<Token>(new HashSet<Position>(), true);
+ this.dead_state = new State<Token>(new HashSet<Pos>(), true, grammar);
// construct the start state; this will recursively create *all* the states
- this.start = new State<Token>(reachable(rootUnion), false);
+ this.start = new State<Token>(reachable(rootUnion), false, grammar);
- buildReductions();
- sortReductions();
+ buildReductions(grammar);
+ sortReductions(grammar);
}
/** fill in the reductions table */
- private void buildReductions() {
+ private void buildReductions(Grammar<Token> grammar) {
// for each state, fill in the corresponding "row" of the parse table
for(State<Token> state : all_states)
- for(Position p : state.hs) {
+ for(Pos p : state.hs) {
// if the element following this position is an atom, copy the corresponding
// set of rows out of the "master" goto table and into this state's shift table
// RNGLR: we can potentially reduce from any "right-nullable" position -- that is,
// any position for which all Elements after it in the Sequence are capable of
// matching the empty string.
- if (!isRightNullable(p)) continue;
- Topology<Token> follow = follow(p.owner());
- for(Position p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) {
+ if (!grammar.isRightNullable(p)) continue;
+ Topology<Token> follow = grammar.follow(p.owner());
+ for(Pos p2 = p; p2 != null && p2.element() != null; p2 = p2.next()) {
if (!(p2.element() instanceof Union))
throw new Error("impossible -- only Unions can be nullable");
// not just the follow-set of the last non-nullable element, but the
// follow-sets of the nulled elements as well.
for(Sequence s : ((Union)p2.element()))
- follow = follow.intersect(follow(s));
- Topology<Token> set = epsilonFollowSet((Union)p2.element());
+ follow = follow.intersect(grammar.follow(s));
+ Topology<Token> set = grammar.epsilonFollowSet((Union)p2.element());
if (set != null) follow = follow.intersect(set);
}
// indicate that when the next token is in the set "follow", nodes in this
- // state should reduce according to Position "p"
+ // state should reduce according to Pos "p"
state.reductions.put(follow, p);
- if (followEof.contains(p.owner())) state.eofReductions.add(p);
+ if (grammar.followEof.contains(p.owner())) state.eofReductions.add(p);
}
// optimize the reductions table
}
// FIXME: this method needs to be cleaned up and documented
- private void sortReductions() {
+ private void sortReductions(Grammar<Token> grammar) {
// crude algorithm to assing an ordinal ordering to every position
// al will be sorted in DECREASING order (al[0] >= al[1])
- ArrayList<Sequence.Position> al = new ArrayList<Sequence.Position>();
+ ArrayList<Sequence.Pos> al = new ArrayList<Sequence.Pos>();
for(State s : all_states) {
- for(Object po : s) {
- Sequence.Position p = (Sequence.Position)po;
+ for(Object po : s.positions()) {
+ Sequence.Pos p = (Sequence.Pos)po;
if (al.contains(p)) continue;
int i=0;
for(; i<al.size(); i++) {
- if (comparePositions(p, al.get(i)) < 0)
+ if (grammar.comparePositions(p, al.get(i)) < 0)
break;
}
al.add(i, p);
OUTER: while(true) {
for(int i=0; i<al.size(); i++)
for(int j=i+1; j<al.size(); j++)
- if (comparePositions(al.get(i), al.get(j)) > 0) {
- Sequence.Position p = al.remove(j);
+ if (grammar.comparePositions(al.get(i), al.get(j)) > 0) {
+ Sequence.Pos p = al.remove(j);
al.add(i, p);
continue OUTER;
}
for(int i=0; i<al.size(); i++) {
boolean inc = false;
for(int k=pk; k<i; k++) {
- if (comparePositions(al.get(k), al.get(i)) > 0)
+ if (grammar.comparePositions(al.get(k), al.get(i)) > 0)
{ inc = true; break; }
}
inc = true;
* A single state in the LR table and the transitions
* possible from it
*
- * A state corresponds to a set of Sequence.Position's. Each
+ * A state corresponds to a set of Sequence.Pos's. Each
* Node in the GSS has a State; the Node represents a set of
- * possible parses, one for each Position in the State.
+ * possible parses, one for each Pos in the State.
*
- * Every state is either "doomed" or "normal". If a Position
+ * Every state is either "doomed" or "normal". If a Pos
* is part of a Sequence which is a conjunct (that is, it was
- * passed to Sequence.{and(),andnot()}), then that Position
+ * passed to Sequence.{and(),andnot()}), then that Pos
* will appear only in doomed States. Furthermore, any set
* of Positions reachable from a doomed State also forms a
* doomed State. Note that in this latter case, a doomed
* space+time complexity in otherwise simple grammars. There
* is an example of this in the regression suite.
*/
- class State<Token> implements IntegerMappable, Iterable<Position> {
+ class State<Token> implements IntegerMappable, Serializable {
public final int idx = master_state_idx++;
- private final HashSet<Position> hs;
+ private final transient HashSet<Pos> hs;
public HashSet<State<Token>> conjunctStates = new HashSet<State<Token>>();
- HashMap<Sequence,State<Token>> gotoSetNonTerminals = new HashMap<Sequence,State<Token>>();
+ HashMap<Pos,State<Token>> gotoSetNonTerminals = new HashMap<Pos,State<Token>>();
private transient TopologicalBag<Token,State<Token>> gotoSetTerminals = new TopologicalBag<Token,State<Token>>();
- private TopologicalBag<Token,Position> reductions = new TopologicalBag<Token,Position>();
- private HashSet<Position> eofReductions = new HashSet<Position>();
+ TopologicalBag<Token,Pos> reductions = new TopologicalBag<Token,Pos>();
+ HashSet<Pos> eofReductions = new HashSet<Pos>();
private TopologicalBag<Token,State<Token>> shifts = new TopologicalBag<Token,State<Token>>();
private boolean accept = false;
private VisitableMap<Token,State<Token>> oshifts = null;
- private VisitableMap<Token,Position> oreductions = null;
+ private VisitableMap<Token,Pos> oreductions = null;
public final boolean doomed;
// Interface Methods //////////////////////////////////////////////////////////////////////////////
public boolean doomed() { return doomed; }
boolean isAccepting() { return accept; }
- public Iterator<Position> iterator() { return hs.iterator(); }
+
+ Iterable<Pos> positions() { return hs; }
+
boolean canShift(Token t) { return oshifts!=null && oshifts.contains(t); }
- void invokeShifts(Token t, GSS.Phase phase, Result r) { oshifts.invoke(t, phase, r); }
+ void invokeShifts(Token t, GSS.Phase phase, Node pred, Forest f) { oshifts.invoke(t, phase, pred, f); }
boolean canReduce(Token t) {
return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); }
void invokeEpsilonReductions(Token t, Node node) {
- if (t==null) for(Position r : eofReductions) node.invoke(r, null);
- else oreductions.invoke(t, node, null);
+ if (t==null) for(Pos r : eofReductions) node.invoke(r, null, null);
+ else oreductions.invoke(t, node, null, null);
}
void invokeReductions(Token t, Node node, Result b) {
- if (t==null) for(Position r : eofReductions) node.invoke(r, b);
- else oreductions.invoke(t, node, b);
+ if (t==null) for(Pos r : eofReductions) node.invoke(r, b, null);
+ else oreductions.invoke(t, node, b, null);
}
// Constructor //////////////////////////////////////////////////////////////////////////////
/**
- * create a new state consisting of all the <tt>Position</tt>s in <tt>hs</tt>
- * @param hs the set of <tt>Position</tt>s comprising this <tt>State</tt>
+ * create a new state consisting of all the <tt>Pos</tt>s in <tt>hs</tt>
+ * @param hs the set of <tt>Pos</tt>s comprising this <tt>State</tt>
* @param all the set of all elements (Atom instances need not be included)
*
* In principle these two steps could be merged, but they
* for non-Atom Elements.
* </ul>
*/
- public State(HashSet<Position> hs, boolean doomed) {
+ public State(HashSet<Pos> hs, boolean doomed, Grammar<Token> grammar) {
this.hs = hs;
this.doomed = doomed;
((HashMap)(doomed ? doomed_states : normal_states)).put(hs, this);
((HashSet)all_states).add(this);
- for(Position p : hs) {
+ for(Pos p : hs) {
// Step 1a: take note if we are an accepting state
// (last position of the root Union's sequence)
- if (p.next()==null && !doomed && rootUnion.contains(p.owner()))
+ if (p.next()==null && !doomed && grammar.rootUnion.contains(p.owner()))
accept = true;
- // Step 1b: If any Position in the set is the first position of its sequence, then this
+ // Step 1b: If any Pos in the set is the first position of its sequence, then this
// state is responsible for spawning the "doomed" states for each of the
// Sequence's conjuncts. This obligation is recorded by adding the to-be-spawned
// states to conjunctStates.
if (!p.isFirst()) continue;
for(Sequence s : p.owner().needs())
if (!hs.contains(s.firstp()))
- conjunctStates.add(mkstate(reachable(s.firstp()), true));
+ conjunctStates.add(mkstate(reachable(s.firstp()), true, grammar));
for(Sequence s : p.owner().hates())
if (!hs.contains(s.firstp()))
- conjunctStates.add(mkstate(reachable(s.firstp()), true));
+ conjunctStates.add(mkstate(reachable(s.firstp()), true, grammar));
}
- // Step 2a: examine all Position's in this state and compute the mappings from
+ // Step 2a: examine all Pos's in this state and compute the mappings from
// sets of follow tokens (tokens which could follow this position) to sets
// of _new_ positions (positions after shifting). These mappings are
// collectively known as the _closure_
- TopologicalBag<Token,Position> bag0 = new TopologicalBag<Token,Position>();
- for(Position position : hs) {
+ TopologicalBag<Token,Pos> bag0 = new TopologicalBag<Token,Pos>();
+ for(Pos position : hs) {
if (position.isLast() || !(position.element() instanceof Atom)) continue;
Atom a = (Atom)position.element();
- HashSet<Position> hp = new HashSet<Position>();
+ HashSet<Pos> hp = new HashSet<Pos>();
reachable(position.next(), hp);
bag0.addAll(a.getTokenTopology(), hp);
}
// computed next-position set).
for(Topology<Token> r : bag0) {
- HashSet<Position> h = new HashSet<Position>();
- for(Position p : bag0.getAll(r)) h.add(p);
- ((TopologicalBag)gotoSetTerminals).put(r, mkstate(h, doomed));
+ HashSet<Pos> h = new HashSet<Pos>();
+ for(Pos p : bag0.getAll(r)) h.add(p);
+ ((TopologicalBag)gotoSetTerminals).put(r, mkstate(h, doomed, grammar));
}
// Step 3: for every Sequence, compute the closure over every position in this set which
// to avoid having to iteratively construct our set of States as shown in most
// expositions of the algorithm (ie "keep doing XYZ until things stop changing").
- HashMapBag<Sequence,Position> move = new HashMapBag<Sequence,Position>();
- for(Position p : hs)
+ HashMapBag<Sequence,Pos> move = new HashMapBag<Sequence,Pos>();
+ for(Pos p : hs)
if (!p.isLast() && p.element() instanceof Union)
for(Sequence s : ((Union)p.element())) {
- HashSet<Position> hp = new HashSet<Position>();
+ HashSet<Pos> hp = new HashSet<Pos>();
reachable(p.next(), hp);
move.addAll(s, hp);
}
OUTER: for(Sequence y : move) {
// if a reduction is "lame", it should wind up in the dead_state after reducing
- HashSet<Position> h = move.getAll(y);
- State<Token> s = mkstate(h, doomed);
- for(Position p : hs)
+ HashSet<Pos> h = move.getAll(y);
+ State<Token> s = mkstate(h, doomed, grammar);
+ for(Pos p : hs)
if (p.element() != null && (p.element() instanceof Union))
for(Sequence seq : ((Union)p.element()))
if (seq.needs.contains(y) || seq.hates.contains(y)) {
// FIXME: assumption that no sequence is ever both usefully (non-lamely) matched
// and also directly lamely matched
- ((HashMap)gotoSetNonTerminals).put(y, dead_state);
+ for(Pos pp = y.firstp(); pp != null; pp = pp.next())
+ ((HashMap)gotoSetNonTerminals).put(pp, dead_state);
continue OUTER;
}
- gotoSetNonTerminals.put(y, s);
+ for(Pos pp = y.firstp(); pp != null; pp = pp.next())
+ gotoSetNonTerminals.put(pp, s);
}
}
- private State<Token> mkstate(HashSet<Position> h, boolean b) {
+ private State<Token> mkstate(HashSet<Pos> h, boolean b, Grammar<Token> grammar) {
State ret = (b?doomed_states:normal_states).get(h);
- if (ret==null) ret = new State<Token>(h,b);
+ if (ret==null) ret = new State<Token>(h,b, grammar);
return ret;
}
public int toInt() { return idx; }
public String toString() {
StringBuffer ret = new StringBuffer();
- for(Position p : hs)
+ for(Pos p : hs)
ret.append(p+"\n");
return ret.toString();
}
// Helpers //////////////////////////////////////////////////////////////////////////////
- private static HashSet<Position> reachable(Element e) {
- HashSet<Position> h = new HashSet<Position>();
+ private static HashSet<Pos> reachable(Element e) {
+ HashSet<Pos> h = new HashSet<Pos>();
reachable(e, h);
return h;
}
- private static void reachable(Element e, HashSet<Position> h) {
+ private static void reachable(Element e, HashSet<Pos> h) {
if (e instanceof Atom) return;
for(Sequence s : ((Union)e))
reachable(s.firstp(), h);
}
- private static void reachable(Position p, HashSet<Position> h) {
+ private static void reachable(Pos p, HashSet<Pos> h) {
if (h.contains(p)) return;
h.add(p);
if (p.element() != null) reachable(p.element(), h);
}
- private static HashSet<Position> reachable(Position p) {
- HashSet<Position> ret = new HashSet<Position>();
+ private static HashSet<Pos> reachable(Pos p) {
+ HashSet<Pos> ret = new HashSet<Pos>();
reachable(p, ret);
return ret;
}