public String toString() { return pt.toString(); }
- /** parse <tt>input</tt>, using the table <tt>pt</tt> to drive the parser */
+ /** parse <tt>input</tt>, and return the shared packed parse forest (or throw an exception) */
public Forest<Result> parse(Input<Tok> input) throws IOException, ParseFailed {
GSS gss = new GSS();
Input.Location loc = input.getLocation();
public final Walk.Cache cache = this;
- private void walk(Element e, HashSet<Element> hs) {
+ private void walk(Element e, HashSet<SequenceOrElement> hs) {
if (e==null) return;
if (hs.contains(e)) return;
hs.add(e);
if (e instanceof Atom) return;
- for(Sequence s : (Union)e) {
- hs.add(s);
- for(Position p = s.firstp(); p != null; p = p.next())
- walk(p.element(), hs);
- }
+ for(Sequence s : (Union)e)
+ walk(s, hs);
+ }
+ private void walk(Sequence s, HashSet<SequenceOrElement> hs) {
+ hs.add(s);
+ for(Position p = s.firstp(); p != null; p = p.next())
+ walk(p.element(), hs);
+ for(Sequence ss : s.needs()) walk(ss, hs);
+ for(Sequence ss : s.hates()) walk(ss, hs);
}
/** the start state */
- public final State<Tok> start;
+ public final State<Tok> start;
+
+ /** the state from which no reductions can be done */
+ private final State<Tok> dead_state;
/** used to generate unique values for State.idx */
private int master_state_idx = 0;
cache.eof.put(start0, true);
// construct the set of states
- HashSet<Element> all_elements = new HashSet<Element>();
+ HashSet<SequenceOrElement> all_elements = new HashSet<SequenceOrElement>();
walk(start0, all_elements);
- for(Element e : all_elements)
+ for(SequenceOrElement e : all_elements)
cache.ys.addAll(e, new Walk.YieldSet(e, cache).walk());
HashSet<Position> hp = new HashSet<Position>();
reachable(start0, hp);
+
+ this.dead_state = new State<Tok>(new HashSet<Position>(), all_states, all_elements);
this.start = new State<Tok>(hp, all_states, all_elements);
// for each state, fill in the corresponding "row" of the parse table
// if the element following this position is an atom, copy the corresponding
// set of rows out of the "master" goto table and into this state's shift table
if (p.element() != null && p.element() instanceof Atom)
- state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element())));
+ state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()).getTokenTopology()));
}
if (top instanceof IntegerTopology)
for(State<Tok> state : all_states.values()) {
public final int idx = master_state_idx++;
private final HashSet<Position> hs;
- public transient HashMap<Element,State<Tok>> gotoSetNonTerminals = new HashMap<Element,State<Tok>>();
+ public transient HashMap<Sequence,State<Tok>> gotoSetNonTerminals = new HashMap<Sequence,State<Tok>>();
private transient TopologicalBag<Tok,State<Tok>> gotoSetTerminals = new TopologicalBag<Tok,State<Tok>>();
private TopologicalBag<Tok,Position> reductions = new TopologicalBag<Tok,Position>();
// Interface Methods //////////////////////////////////////////////////////////////////////////////
- boolean isAccepting() { return accept; }
- public Iterator<Position> iterator() { return hs.iterator(); }
+ boolean isAccepting() { return accept; }
+ public Iterator<Position> iterator() { return hs.iterator(); }
- boolean canShift(Tok t) { return oshifts.contains(t); }
+ boolean canShift(Tok t) { return oshifts!=null && oshifts.contains(t); }
<B,C> void invokeShifts(Tok t, Invokable<State<Tok>,B,C> irbc, B b, C c) {
oshifts.invoke(t, irbc, b, c);
}
- boolean canReduce(Tok t) { return t==null ? eofReductions.size()>0 : oreductions.contains(t); }
+ boolean canReduce(Tok t) { return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); }
<B,C> void invokeReductions(Tok t, Invokable<Position,B,C> irbc, B b, C c) {
if (t==null) for(Position r : eofReductions) irbc.invoke(r, b, c);
else oreductions.invoke(t, irbc, b, c);
*/
public State(HashSet<Position> hs,
HashMap<HashSet<Position>,State<Tok>> all_states,
- HashSet<Element> all_elements) {
+ HashSet<SequenceOrElement> all_elements) {
this.hs = hs;
// register ourselves in the all_states hash so that no
Atom a = (Atom)position.element();
HashSet<Position> hp = new HashSet<Position>();
reachable(position.next(), hp);
- bag0.addAll(a, hp);
+ bag0.addAll(a.getTokenTopology(), hp);
}
// Step 1b: for each _minimal, contiguous_ set of characters having an identical next-position
// "yields" [in one or more step] is used instead of "produces" [in exactly one step]
// to avoid having to iteratively construct our set of States as shown in most
// expositions of the algorithm (ie "keep doing XYZ until things stop changing").
- HashMapBag<Element,Position> move = new HashMapBag<Element,Position>();
+
+ HashMapBag<SequenceOrElement,Position> move = new HashMapBag<SequenceOrElement,Position>();
for(Position p : hs) {
Element e = p.element();
if (e==null) continue;
- for(Element y : cache.ys.getAll(e)) {
+ for(SequenceOrElement y : cache.ys.getAll(e)) {
HashSet<Position> hp = new HashSet<Position>();
reachable(p.next(), hp);
move.addAll(y, hp);
}
}
- for(Element y : move) {
+ OUTER: for(SequenceOrElement y : move) {
HashSet<Position> h = move.getAll(y);
State<Tok> s = all_states.get(h) == null ? new State<Tok>(h, all_states, all_elements) : all_states.get(h);
- gotoSetNonTerminals.put(y, s);
+ // if a reduction is "lame", it should wind up in the dead_state after reducing
+ if (y instanceof Sequence) {
+ for(Position p : hs) {
+ if (p.element() != null && (p.element() instanceof Union)) {
+ Union u = (Union)p.element();
+ for(Sequence seq : u)
+ if (seq.needs.contains((Sequence)y) || seq.hates.contains((Sequence)y)) {
+ // FIXME: what if there are two "routes" to get to the sequence?
+ ((HashMap)gotoSetNonTerminals).put((Sequence)y, dead_state);
+ continue OUTER;
+ }
+ }
+ }
+ gotoSetNonTerminals.put((Sequence)y, s);
+ }
}
}
// Helpers //////////////////////////////////////////////////////////////////////////////
+ private static void reachable(Sequence s, HashSet<Position> h) {
+ reachable(s.firstp(), h);
+ for(Sequence ss : s.needs()) reachable(ss, h);
+ for(Sequence ss : s.hates()) reachable(ss, h);
+ }
private static void reachable(Element e, HashSet<Position> h) {
if (e instanceof Atom) return;
for(Sequence s : ((Union)e))
- reachable(s.firstp(), h);
+ reachable(s, h);
}
private static void reachable(Position p, HashSet<Position> h) {
if (h.contains(p)) return;