checkpoint
[sbp.git] / src / edu / berkeley / sbp / Parser.java
index 0504a11..a74ea5e 100644 (file)
@@ -21,7 +21,7 @@ public abstract class Parser<Tok, Result> {
 
     public String toString() { return pt.toString(); }
 
-    /** parse <tt>input</tt>, using the table <tt>pt</tt> to drive the parser */
+    /** parse <tt>input</tt>, and return the shared packed parse forest (or throw an exception) */
     public Forest<Result> parse(Input<Tok> input) throws IOException, ParseFailed {
         GSS gss = new GSS();
         Input.Location loc = input.getLocation();
@@ -77,20 +77,27 @@ public abstract class Parser<Tok, Result> {
 
         public final Walk.Cache cache = this;
 
-        private void walk(Element e, HashSet<Element> hs) {
+        private void walk(Element e, HashSet<SequenceOrElement> hs) {
             if (e==null) return;
             if (hs.contains(e)) return;
             hs.add(e);
             if (e instanceof Atom) return;
-            for(Sequence s : (Union)e) {
-                hs.add(s);
-                for(Position p = s.firstp(); p != null; p = p.next())
-                    walk(p.element(), hs);
-            }
+            for(Sequence s : (Union)e)
+                walk(s, hs);
+        }
+        private void walk(Sequence s, HashSet<SequenceOrElement> hs) {
+            hs.add(s);
+            for(Position p = s.firstp(); p != null; p = p.next())
+                walk(p.element(), hs);
+            for(Sequence ss : s.needs()) walk(ss, hs);
+            for(Sequence ss : s.hates()) walk(ss, hs);
         }
 
         /** the start state */
-        public final State<Tok>   start;
+        public  final State<Tok>   start;
+
+        /** the state from which no reductions can be done */
+        private final State<Tok>   dead_state;
 
         /** used to generate unique values for State.idx */
         private int master_state_idx = 0;
@@ -107,12 +114,14 @@ public abstract class Parser<Tok, Result> {
             cache.eof.put(start0, true);
 
             // construct the set of states
-            HashSet<Element>                        all_elements  = new HashSet<Element>();
+            HashSet<SequenceOrElement>                        all_elements  = new HashSet<SequenceOrElement>();
             walk(start0, all_elements);
-            for(Element e : all_elements)
+            for(SequenceOrElement e : all_elements)
                 cache.ys.addAll(e, new Walk.YieldSet(e, cache).walk());
             HashSet<Position> hp = new HashSet<Position>();
             reachable(start0, hp);
+
+            this.dead_state = new State<Tok>(new HashSet<Position>(), all_states, all_elements);
             this.start = new State<Tok>(hp, all_states, all_elements);
 
             // for each state, fill in the corresponding "row" of the parse table
@@ -135,7 +144,7 @@ public abstract class Parser<Tok, Result> {
                     // if the element following this position is an atom, copy the corresponding
                     // set of rows out of the "master" goto table and into this state's shift table
                     if (p.element() != null && p.element() instanceof Atom)
-                        state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element())));
+                        state.shifts.addAll(state.gotoSetTerminals.subset(((Atom)p.element()).getTokenTopology()));
                 }
             if (top instanceof IntegerTopology)
                 for(State<Tok> state : all_states.values()) {
@@ -157,7 +166,7 @@ public abstract class Parser<Tok, Result> {
             public  final     int               idx    = master_state_idx++;
             private final     HashSet<Position> hs;
 
-            public transient HashMap<Element,State<Tok>>          gotoSetNonTerminals = new HashMap<Element,State<Tok>>();
+            public transient HashMap<Sequence,State<Tok>>         gotoSetNonTerminals = new HashMap<Sequence,State<Tok>>();
             private transient TopologicalBag<Tok,State<Tok>>     gotoSetTerminals    = new TopologicalBag<Tok,State<Tok>>();
 
             private           TopologicalBag<Tok,Position> reductions          = new TopologicalBag<Tok,Position>();
@@ -170,15 +179,15 @@ public abstract class Parser<Tok, Result> {
 
             // Interface Methods //////////////////////////////////////////////////////////////////////////////
 
-            boolean             isAccepting()               { return accept; }
-            public Iterator<Position>  iterator()                  { return hs.iterator(); }
+            boolean             isAccepting()           { return accept; }
+            public Iterator<Position>  iterator()       { return hs.iterator(); }
 
-            boolean             canShift(Tok t)           { return oshifts.contains(t); }
+            boolean             canShift(Tok t)         { return oshifts!=null && oshifts.contains(t); }
             <B,C> void          invokeShifts(Tok t, Invokable<State<Tok>,B,C> irbc, B b, C c) {
                 oshifts.invoke(t, irbc, b, c);
             }
 
-            boolean             canReduce(Tok t)          { return t==null ? eofReductions.size()>0 : oreductions.contains(t); }
+            boolean             canReduce(Tok t)        { return oreductions != null && (t==null ? eofReductions.size()>0 : oreductions.contains(t)); }
             <B,C> void          invokeReductions(Tok t, Invokable<Position,B,C> irbc, B b, C c) {
                 if (t==null) for(Position r : eofReductions) irbc.invoke(r, b, c);
                 else         oreductions.invoke(t, irbc, b, c);
@@ -210,7 +219,7 @@ public abstract class Parser<Tok, Result> {
              */
             public State(HashSet<Position> hs,
                          HashMap<HashSet<Position>,State<Tok>> all_states,
-                         HashSet<Element> all_elements) {
+                         HashSet<SequenceOrElement> all_elements) {
                 this.hs = hs;
 
                 // register ourselves in the all_states hash so that no
@@ -228,7 +237,7 @@ public abstract class Parser<Tok, Result> {
                     Atom a = (Atom)position.element();
                     HashSet<Position> hp = new HashSet<Position>();
                     reachable(position.next(), hp);
-                    bag0.addAll(a, hp);
+                    bag0.addAll(a.getTokenTopology(), hp);
                 }
 
                 // Step 1b: for each _minimal, contiguous_ set of characters having an identical next-position
@@ -248,20 +257,35 @@ public abstract class Parser<Tok, Result> {
                 //         "yields" [in one or more step] is used instead of "produces" [in exactly one step]
                 //         to avoid having to iteratively construct our set of States as shown in most
                 //         expositions of the algorithm (ie "keep doing XYZ until things stop changing").
-                HashMapBag<Element,Position> move = new HashMapBag<Element,Position>();
+
+                HashMapBag<SequenceOrElement,Position> move = new HashMapBag<SequenceOrElement,Position>();
                 for(Position p : hs) {
                     Element e = p.element();
                     if (e==null) continue;
-                    for(Element y : cache.ys.getAll(e)) {
+                    for(SequenceOrElement y : cache.ys.getAll(e)) {
                         HashSet<Position> hp = new HashSet<Position>();
                         reachable(p.next(), hp);
                         move.addAll(y, hp);
                     }
                 }
-                for(Element y : move) {
+                OUTER: for(SequenceOrElement y : move) {
                     HashSet<Position> h = move.getAll(y);
                     State<Tok> s = all_states.get(h) == null ? new State<Tok>(h, all_states, all_elements) : all_states.get(h);
-                    gotoSetNonTerminals.put(y, s);
+                    // if a reduction is "lame", it should wind up in the dead_state after reducing
+                    if (y instanceof Sequence) {
+                        for(Position p : hs) {
+                            if (p.element() != null && (p.element() instanceof Union)) {
+                                Union u = (Union)p.element();
+                                for(Sequence seq : u)
+                                    if (seq.needs.contains((Sequence)y) || seq.hates.contains((Sequence)y)) {
+                                        // FIXME: what if there are two "routes" to get to the sequence?
+                                        ((HashMap)gotoSetNonTerminals).put((Sequence)y, dead_state);
+                                        continue OUTER;
+                                    }
+                            }
+                        }
+                        gotoSetNonTerminals.put((Sequence)y, s);
+                    }
                 }
             }
 
@@ -287,10 +311,15 @@ public abstract class Parser<Tok, Result> {
 
     // Helpers //////////////////////////////////////////////////////////////////////////////
     
+    private static void reachable(Sequence s, HashSet<Position> h) {
+        reachable(s.firstp(), h);
+        for(Sequence ss : s.needs()) reachable(ss, h);
+        for(Sequence ss : s.hates()) reachable(ss, h);
+    }
     private static void reachable(Element e, HashSet<Position> h) {
         if (e instanceof Atom) return;
         for(Sequence s : ((Union)e))
-            reachable(s.firstp(), h);
+            reachable(s, h);
     }
     private static void reachable(Position p, HashSet<Position> h) {
         if (h.contains(p)) return;