checkpoint
[sbp.git] / src / edu / berkeley / sbp / Parser.java
index 2344e15..a74ea5e 100644 (file)
@@ -21,7 +21,7 @@ public abstract class Parser<Tok, Result> {
 
     public String toString() { return pt.toString(); }
 
-    /** parse <tt>input</tt>, using the table <tt>pt</tt> to drive the parser */
+    /** parse <tt>input</tt>, and return the shared packed parse forest (or throw an exception) */
     public Forest<Result> parse(Input<Tok> input) throws IOException, ParseFailed {
         GSS gss = new GSS();
         Input.Location loc = input.getLocation();
@@ -77,7 +77,7 @@ public abstract class Parser<Tok, Result> {
 
         public final Walk.Cache cache = this;
 
-        private void walk(Element e, HashSet<Element> hs) {
+        private void walk(Element e, HashSet<SequenceOrElement> hs) {
             if (e==null) return;
             if (hs.contains(e)) return;
             hs.add(e);
@@ -85,7 +85,7 @@ public abstract class Parser<Tok, Result> {
             for(Sequence s : (Union)e)
                 walk(s, hs);
         }
-        private void walk(Sequence s, HashSet<Element> hs) {
+        private void walk(Sequence s, HashSet<SequenceOrElement> hs) {
             hs.add(s);
             for(Position p = s.firstp(); p != null; p = p.next())
                 walk(p.element(), hs);
@@ -114,9 +114,9 @@ public abstract class Parser<Tok, Result> {
             cache.eof.put(start0, true);
 
             // construct the set of states
-            HashSet<Element>                        all_elements  = new HashSet<Element>();
+            HashSet<SequenceOrElement>                        all_elements  = new HashSet<SequenceOrElement>();
             walk(start0, all_elements);
-            for(Element e : all_elements)
+            for(SequenceOrElement e : all_elements)
                 cache.ys.addAll(e, new Walk.YieldSet(e, cache).walk());
             HashSet<Position> hp = new HashSet<Position>();
             reachable(start0, hp);
@@ -144,7 +144,7 @@ public abstract class Parser<Tok, Result> {
                     // if the element following this position is an atom, copy the corresponding
                     // set of rows out of the "master" goto table and into this state's shift table
                     if (p.element() != null && p.element() instanceof Atom)
-                        state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element())));
+                        state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()).getTokenTopology()));
                 }
             if (top instanceof IntegerTopology)
                 for(State<Tok> state : all_states.values()) {
@@ -166,7 +166,7 @@ public abstract class Parser<Tok, Result> {
             public  final     int               idx    = master_state_idx++;
             private final     HashSet<Position> hs;
 
-            public transient HashMap<Element,State<Tok>>          gotoSetNonTerminals = new HashMap<Element,State<Tok>>();
+            public transient HashMap<Sequence,State<Tok>>         gotoSetNonTerminals = new HashMap<Sequence,State<Tok>>();
             private transient TopologicalBag<Tok,State<Tok>>     gotoSetTerminals    = new TopologicalBag<Tok,State<Tok>>();
 
             private           TopologicalBag<Tok,Position> reductions          = new TopologicalBag<Tok,Position>();
@@ -219,7 +219,7 @@ public abstract class Parser<Tok, Result> {
              */
             public State(HashSet<Position> hs,
                          HashMap<HashSet<Position>,State<Tok>> all_states,
-                         HashSet<Element> all_elements) {
+                         HashSet<SequenceOrElement> all_elements) {
                 this.hs = hs;
 
                 // register ourselves in the all_states hash so that no
@@ -237,7 +237,7 @@ public abstract class Parser<Tok, Result> {
                     Atom a = (Atom)position.element();
                     HashSet<Position> hp = new HashSet<Position>();
                     reachable(position.next(), hp);
-                    bag0.addAll(a, hp);
+                    bag0.addAll(a.getTokenTopology(), hp);
                 }
 
                 // Step 1b: for each _minimal, contiguous_ set of characters having an identical next-position
@@ -258,24 +258,34 @@ public abstract class Parser<Tok, Result> {
                 //         to avoid having to iteratively construct our set of States as shown in most
                 //         expositions of the algorithm (ie "keep doing XYZ until things stop changing").
 
-                HashMapBag<Element,Position> move = new HashMapBag<Element,Position>();
+                HashMapBag<SequenceOrElement,Position> move = new HashMapBag<SequenceOrElement,Position>();
                 for(Position p : hs) {
                     Element e = p.element();
                     if (e==null) continue;
-                    for(Element y : cache.ys.getAll(e)) {
+                    for(SequenceOrElement y : cache.ys.getAll(e)) {
                         HashSet<Position> hp = new HashSet<Position>();
                         reachable(p.next(), hp);
                         move.addAll(y, hp);
                     }
                 }
-                for(Element y : move) {
+                OUTER: for(SequenceOrElement y : move) {
                     HashSet<Position> h = move.getAll(y);
                     State<Tok> s = all_states.get(h) == null ? new State<Tok>(h, all_states, all_elements) : all_states.get(h);
                     // if a reduction is "lame", it should wind up in the dead_state after reducing
-                    if (y instanceof Sequence && ((Sequence)y).lame)
-                        ((HashMap)gotoSetNonTerminals).put(y, dead_state);
-                    else
-                        gotoSetNonTerminals.put(y, s);
+                    if (y instanceof Sequence) {
+                        for(Position p : hs) {
+                            if (p.element() != null && (p.element() instanceof Union)) {
+                                Union u = (Union)p.element();
+                                for(Sequence seq : u)
+                                    if (seq.needs.contains((Sequence)y) || seq.hates.contains((Sequence)y)) {
+                                        // FIXME: what if there are two "routes" to get to the sequence?
+                                        ((HashMap)gotoSetNonTerminals).put((Sequence)y, dead_state);
+                                        continue OUTER;
+                                    }
+                            }
+                        }
+                        gotoSetNonTerminals.put((Sequence)y, s);
+                    }
                 }
             }