unrolling forests without recursion
[sbp.git] / src / edu / berkeley / sbp / GSS.java
index 6db929b..7af8b07 100644 (file)
@@ -1,70 +1,94 @@
 package edu.berkeley.sbp;
 import edu.berkeley.sbp.*;
-import edu.berkeley.sbp.*;
-import edu.berkeley.sbp.*;
 import edu.berkeley.sbp.util.*;
+import edu.berkeley.sbp.Parser.Table.*;
+import edu.berkeley.sbp.Sequence.Position;
 import java.io.*;
 import java.util.*;
 import java.lang.reflect.*;
 
-//////////////////////////////////////////////////////////////////////////////
-// TODO:
-//
-//  - fix public/package/private status
-//
-
-//////////////////////////////////////////////////////////////////////////////
-// Optimizations to add
-//
-// ** NOTE: not all of these are appropriate for this class -- it is
-//          simply a list of optimizations not implemented.  This
-//          class is meant to remain simple and easy to understand;
-//          optimizations which obscure that do not belong here (they
-//          should go into the compiled version instead)
-
 /** implements Tomita's Graph Structured Stack */
-class GSS {
+public class GSS {
 
+    public static int count = 0;
+    public static int shifts = 0;
+    public static int reductions = 0;
+    
     public GSS() { }
 
     private Phase.Node[] reducing_list = null;
+    public int resets = 0;
+    public int waits = 0;
+
+    // FIXME: right now, these are the performance bottleneck
+    HashMapBag<Sequence,Phase.Waiting> waiting         = new HashMapBag<Sequence,Phase.Waiting>();
+    HashMapBag<Integer,Sequence>       performed       = new HashMapBag<Integer,Sequence>();
+    HashMapBag<Integer,Sequence>       lastperformed   = new HashMapBag<Integer,Sequence>();
+    HashMapBag<Integer,Sequence>       expected        = new HashMapBag<Integer,Sequence>();
+    
+    /** FIXME */
+    public  Forest.Ref finalResult;
 
     /** corresponds to a positions <i>between tokens</i> the input stream; same as Tomita's U_i's */
-    public class Phase {
+    class Phase<Tok> implements Invokable<State, Forest, Phase<Tok>.Node>, IntegerMappable, GraphViz.ToGraphViz, Iterable<Phase.Node> {
+
+        public Iterator<Phase.Node> iterator() { return hash.iterator(); }
+        public void invoke(State st, Forest result, Node n) {
+            shifts++;
+            good |= next.newNode(n, result, st, false);
+        }
 
         /** the token immediately after this phase */
-        public  final Token token;
+        final Tok token;
 
-        /** currently this is necessary only for the code() hack -- it doesn't actually correspond to the input */
         private final int pos;
 
-        /** FIXME */
-        public  Forest.Ref finalResult = null;
-
-        /** all nodes, keyed by the value returned by code() */
-        private HashMap<Long,Phase.Node> hash    = new HashMap<Long,Phase.Node>();  /* ALLOC */
-
-        /** the number of nodes in this phase */
-        private int numNodes = 0;
-
-        boolean closed = false;
-
-        private Token.Location location;
-        public Phase(Phase previous, Token token, Token.Location location) {
+        boolean reducing;
+        private IntPairMap<Phase.Node> hash;  /* ALLOC */
+        private boolean closed;
+        private boolean good;
+        private Phase next = null;
+        private Phase prev;
+        private Input.Location location;
+        public final Parser parser;
+
+        private Forest forest;
+
+        public Phase(Phase prev, Parser parser, Phase previous, Tok token, Input.Location location, Forest forest) throws ParseFailed {
+            this.prev = prev;
+            this.forest = forest;
+            this.parser = parser;
             this.pos = previous==null ? 0 : previous.pos+1;
             this.token = token;
             this.location = location;
+            performed.clear();
+            reset();
         }
 
-        public boolean isDone() { return token == null; }
+        public void reset() throws ParseFailed {
+            waiting.clear();
+            expected.clear();
+            lastperformed.clear();
+            lastperformed.addAll(performed);
+            performed.clear();
+            hash = new IntPairMap<Phase.Node>();
+            reset = false;
+            good = false;
+            closed = false;
+            reducing = false;
+            finalResult = null;
+            if (prev != null) prev.shift(this, forest);
+        }
 
-        private String error = "generic syntax error";
-        public void checkFailure() throws Parser.Failed {
-            if (numNodes <= 0)
-                throw new Parser.Failed(error, getLocation());
+      
+        public boolean isDone() throws ParseFailed {
+            if (token != null) return false;
+            if (token==null && finalResult==null)
+                throw new ParseFailed(ParseFailed.error(ANSI.red("unexpected end of file\n"), token, hash.values()), getLocation());
+            return true;
         }
 
-        public Token.Location getLocation() { return location; }
+        public Input.Location getLocation() { return location; }
 
         /** add a new node (merging with existing nodes if possible)
          *  @param parent             the parent of the new node
@@ -73,201 +97,310 @@ class GSS {
          *  @param fromEmptyReduction true iff this node is being created as a result of a reduction of length zero (see GRMLR paper)
          *  @param start              the earliest part of the input contributing to this node (used to make merging decisions)
          */
-        public void newNode(Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction, Phase start) {
-            Node p = hash.get(code(state, start));
-            if (p != null)  newNode2(p, parent, pending, state, fromEmptyReduction, start);
-            else            newNode3(parent, pending, state, fromEmptyReduction, start);
+        public boolean newNode(Node parent, Forest pending, State state, boolean fromEmptyReduction) {
+            Node p = hash.get(state, parent==null?null:parent.phase());
+            if (p != null)  return newNode2(p, parent, pending, state, fromEmptyReduction);
+            else            return newNode3(parent, pending, state, fromEmptyReduction);
         }
-        private void newNode2(Node p, Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction, Phase start) {
-            p.holder.merge(pending);
-            if (p.parents().contains(parent)) return;
+        public void newNode(Node parent, Forest pending, State state, boolean fromEmptyReduction, Position reduction) {
+            int pos = parent==null?0:parent.phase()==null?0:parent.phase().pos;
+            Sequence owner = reduction==null ? null : reduction.owner();
+            if (reduction!=null) {
+                if (owner.hates!=null) {
+                    for (Sequence s : performed.getAll(pos))
+                        if (owner.hates.contains(s))
+                            return;
+                    for (Sequence s : lastperformed.getAll(pos))
+                        if (owner.hates.contains(s)) {
+                            //System.out.println("now expecting ["+pos+"] => " + s);
+                            expected.add(pos, s);
+                            return;
+                        }
+                }
+                if (owner.needs != null)
+                    for(Sequence s : owner.needs)
+                        if (!performed.contains(pos, s)) {
+                            waiting.add(s, new Waiting(parent, pending, state, fromEmptyReduction, reduction));
+                            return;
+                        }
+                if (!performed.contains(pos, owner)) {
+                    performed.add(pos, owner);
+                    if (owner.hated != null)
+                        for(Sequence seq : owner.hated)
+                            if (performed.contains(pos, seq)) {
+                                performed.remove(pos, seq);
+                                reset = true;
+                            }
+                }
+            }
+            if (!owner.lame)
+                newNode(parent, pending, state, fromEmptyReduction);
+            if (reduction != null) {
+                boolean redo = true;
+                while(redo) {
+                    redo = false;
+                    for(Waiting w : waiting.getAll(owner)) {
+                        if (w.parent==parent || (parent!=null&&w.parent!=null&&w.parent.phase()==parent.phase())) {
+                            waiting.remove(owner, w);
+                            w.perform();
+                            redo = true;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        private boolean newNode2(Node p, Node parent, Forest pending, State state, boolean fromEmptyReduction) {
+            if (p.merge(parent, pending)) return true;
             p.parents().add(parent, true);
-            if (p!=parent && !fromEmptyReduction) p.queueReductions(parent);
+            if (p!=parent && !fromEmptyReduction && reducing) p.performReductions(parent);
+            return true;
         }
-        private void newNode3(Node parent, Forest pending, Parser.Table.State state, boolean fromEmptyReduction, Phase start) {
+
+        private boolean newNode3(Node parent, Forest pending, State state, boolean fromEmptyReduction) {
             do {
                 if (token != null && state.canShift(token)) break;
                 if (state.isAccepting()) break;
                 if (token==null) break;
-                int count = 0;
-                Parser.Table.Reduction r = null;
-                for(Parser.Table.Reduction red : token==null ? state.getEofReductions() : state.getReductions(token)) { r = red; count++; }
-                if (count==0) return;     // BEWARE! this optimization is suspected to cause really nasty heisenbugs
+                if (!state.canReduce(token)) return false;
                 //if (count > 1) break;
                 //if (r.numPop == 0) break;
                 //r.reduce(pending, parent, null, Phase.this, null);
                 //return;
             } while(false);
 
-            Node n = new Node(parent, pending, state, start);  // ALLOC
-            n.queueEmptyReductions();
-            if (!fromEmptyReduction) n.queueReductions(parent);
+            Node n = new Node(parent, pending, state);  // ALLOC
+            if (reducing) {
+                n.performEmptyReductions();
+                if (!fromEmptyReduction) n.performReductions(parent);
+            }
+            return true;
         }
 
-        
-        boolean reducing = false;
         /** perform all reduction operations */
-        public void reduce() {
-            reducing = true;
-            if (reducing_list==null || reducing_list.length < hash.size())
-                reducing_list = new Phase.Node[hash.size() * 4];
-            Collection<Node> hv = hash.values();
-            hv.toArray(reducing_list);
-            int num = hv.size();
-            for(int i=0; i<num; i++) {
-                Node n = reducing_list[i];
-                reducing_list[i] = null;
-                n.queueEmptyReductions();
-                n.queueReductions();
+        public void reduce() throws ParseFailed {
+            try {
+                reducing = true;
+                if (reducing_list==null || reducing_list.length < hash.size())
+                    reducing_list = new Phase.Node[hash.size() * 4];
+                hash.toArray(reducing_list);
+                int num = hash.size();
+                for(int i=0; i<num; i++) {
+                    Node n = reducing_list[i];
+                    n.performEmptyReductions();
+                    // INVARIANT: we never "see" a node until its parent-set is complete, modulo merges
+                }
+                for(int i=0; i<num; i++) {
+                    Node n = reducing_list[i];
+                    reducing_list[i] = null;
+                    n.performReductions();
+                }
+                if (reset) {
+                    reset = false;
+                    resets++;
+                    throw new Reset();
+                }                
+                for(int i : expected)
+                    for(Sequence s : expected.getAll(i))
+                        if (!performed.contains(i, s)) {
+                            //System.out.println("resetting due to pos="+i+": " + s + " " + System.identityHashCode(s));
+                            resets++;
+                            throw new Reset();
+                        }
+            } catch (Reset r) {
+                reset();
+                reduce();
             }
+            count = 0;
         }
 
+        private boolean reset = false;
+        class Reset extends RuntimeException { }
+
         /** perform all shift operations, adding promoted nodes to <tt>next</tt> */
-        public void shift(Phase next, Forest result) {
+        public void shift(Phase next, Forest result) throws ParseFailed {
+            // this massively improves GC performance
+            if (prev!=null && parser.helpgc) {
+                prev.hash = null;
+            }
+            this.next = next;
             closed = true;
             Forest res = null;
             boolean ok = false;
             for(Phase.Node n : hash.values()) {
-                if (n.holder==null) continue;
-                n.holder.resolve();
                 if (token == null && n.state.isAccepting()) {
-                    ok = true;
                     if (finalResult==null) finalResult = new Forest.Ref();
-                    finalResult.merge(n.holder);
+                    for(Object f : n.results())
+                        finalResult.merge((Forest)f);
                 }
-                if (!n.holder.valid()) continue;
                 if (token == null) continue;
-                for(Parser.Table.State st : n.state.getShifts(token)) {
-                    if (res == null) res = result;
-                    next.newNode(n, res, st, true, this);
-                    ok = true;
-                }
+                n.state.invokeShifts(token, this, result, n);
             }
 
-            if (!ok && token != null) {
-                StringBuffer error = new StringBuffer();
-                error.append("error: unable to shift token \"" + token + "\"\n");
-                //error.append("  before: " +pendingReductions+ "\n");
-                //error.append("  before: " +totalReductions+ "\n");
-                //for(Phase.Node n : hash.values()) {
-                //n.queueReductions();
-                //n.queueEmptyReductions();
-                //}
-                //error.append("  after: " +pendingReductions+ "\n");
-                error.append("  candidate states:\n");
-                for(Phase.Node n : hash.values()) {
-                    //for(Sequence.Position p : n.state) error.append("        " + p + "\n");
-                    //error.append("        --\n");
-                    for(Parser.Table.Reduction r : n.state.getReductions(token)) error.append("        " + r + "\n");
-                    //error.append("        ==\n");
-                }
-                next.error = error.toString();
-            }
-
-            // this massively improves GC performance
-            hash = null;
+            if (!good && token!=null)
+                throw new ParseFailed(ParseFailed.error(ANSI.red("unexpected character ")+" \'"+
+                                                        ANSI.purple(StringUtil.escapify(token+"", "\\\'\r\n"))+
+                                                        "\' encountered at "+
+                                                        ANSI.green(getLocation())+"\n", token, hash.values()),
+                                        getLocation());
+            if (token==null && finalResult==null)
+                throw new ParseFailed(ParseFailed.error(ANSI.red("unexpected end of file\n"), token, hash.values()),
+                                        getLocation());
         }
 
+
+        class Waiting {
+            Node parent;
+            Forest pending;
+            State state;
+            boolean fromEmptyReduction;
+            Position reduction;
+            public Waiting(Node parent, Forest pending, State state, boolean fromEmptyReduction, Position reduction) {
+                waits++;
+                this.parent = parent;
+                this.pending = pending;
+                this.state = state;
+                this.fromEmptyReduction = fromEmptyReduction;
+                this.reduction = reduction;
+            }
+            public void perform() {
+                //System.out.println("performing: " + reduction.position);
+                newNode(parent, pending, state, fromEmptyReduction, reduction);
+            }
+        }
        
-        // GSS Nodes //////////////////////////////////////////////////////////////////////////////
+        // Node /////////////////////////////////////////////////////////////////////////////////
 
         /** a node in the GSS */
-        public final class Node extends FastSet<Node> {
+        final class Node implements Invokable<Position, Node, Node>, IntegerMappable, GraphViz.ToGraphViz {
+            public FastSet<Node> set = new FastSet<Node>();
 
-            private Forest.Ref holder = null;
+           
             private boolean allqueued = false;
 
-            private HashMap<Parser.Table.Reduction,Forest> cache = null;
-
-            /** the set of nodes to which there is an edge starting at this node */
-            //public final FastSet<Node> parents = new FastSet<Node>();  /* ALLOC */
-
             /** what state this node is in */
-            public final Parser.Table.State state;
+            public final Parser.Table<Tok>.State<Tok> state;
 
             /** which Phase this Node belongs to (node that Node is also a non-static inner class of Phase) */
-            public final Phase phase = Phase.this;
-
-            public  HashMap<Parser.Table.Reduction,Forest> cache() {
-                return cache==null ? (cache = new HashMap<Parser.Table.Reduction,Forest>()) : cache; }
-            public  Forest.Ref holder() { return holder==null ? (holder = new Forest.Ref()) : holder; }
-            public  Forest pending() { return Phase.this.closed ? holder().resolve() : holder; }
-            public  FastSet<Node> parents() { return this; }
+            public  Phase phase() { return Phase.this; }
+
+            private HashSet<Forest.Ref> resultMap = new HashSet<Forest.Ref>();
+            public Iterable<Forest.Ref> results() { return resultMap; }
+            public FastSet<Node> parents() { return set; }
+            public boolean merge(Node parent, Forest result) {
+                // FIXME: inefficient!
+                for(Forest.Ref f : results()) {
+                    if (f.parents.contains(parent) /* UGLY: */ && f.parents.size()==1) {
+                        f.merge(result);
+                        return true;
+                    }
+                }
+                Forest.Ref f = new Forest.Ref();
+                f.parents.add(parent);
+                f.merge(result);
+                resultMap.add(f);
+                set.add(parent, true);
+                return false;
+            }
 
-            /** FIXME */
-            public void queueReductions() {
+            public void performReductions() {
                 if (allqueued) return;
                 allqueued = true;
-                int where = parents().size();
-                for(int i=0; i<where; i++) queueReductions(get(i));
+                state.invokeReductions(token, this, this, null);
+            }
+
+            public void performReductions(Node n2) {
+                if (!allqueued) performReductions();
+                else            state.invokeReductions(token, this, this, n2);
             }
 
-            /** FIXME */
-            public void queueReductions(Node n2) {
-                if (!allqueued) { queueReductions(); return; }
-                Node n = this;
-                for(Parser.Table.Reduction r : token==null ? n.state.getEofReductions() : n.state.getReductions(token)) {
-                    
-                    // UGLY HACK
-                    // The problem here is that a "reduction of length 1"
-                    // performed twice with different values of n2 needs
-                    // to only create a *single* new result, but must add
-                    // multiple parents to the node holding that result.
-                    // The current reducer doesn't differentiate between
-                    // the next node of an n-pop reduction and the
-                    // ultimate parent of the last pop, so we need to
-                    // cache instances here as a way of avoiding
-                    // recreating them.
-                    
-                    // currently we have this weird problem where we
-                    // have to do an individual reduct for each child
-                    // when the reduction length is one (ie the
-                    // children wind up being children of the newly
-                    // created node rather than part of the popped
-                    // sequence
-                    if (r.numPop <= 0) continue;
-                    if (r.numPop == 1) {
-                        Forest ret = n.cache().get(r);
-                        if (ret != null) r.reduce(n, n2, n.phase, ret);
-                        else n.cache().put(r, r.reduce(n, n2, n.phase, null));
-                    } else {
-                        r.reduce(n, n2, Phase.this, null);
+            public void performEmptyReductions() { state.invokeReductions(token, this, null, null); }
+            public final void invoke(Position r, Node n, Node n2) {
+                reductions++;
+                if (n==null || n2==null || r.pos==0) {
+                    if (r.pos==0) {
+                        if (n==null) n = this;
+                        else return;
                     }
+                    if (n==null) return;
+                    Forest[] holder = new Forest[r.pos];
+                    if (r.pos==0) n.finish(r, r.zero(), n.phase());
+                    else          n.reduce(r, r.pos-1,  n.phase(), null);
+                } else {
+                    if (r.pos<=0) throw new Error("called wrong form of reduce()");
+                    int pos = r.pos-1;
+                    n.reduce(r, pos, n.phase(), n2);
                 }
             }
 
+            public void reduce(Position r, int pos, Phase target, Node only) {
+                Forest[] holder = r.holder;
+                Forest old = holder[pos];
+
+                for(Forest result : results())
+                    for(Node child : ((Forest.Ref<?>)result).parents) {
+                        if (only != null && child!=only) continue;
+                        holder[pos] = result;
+                        if (pos==0) child.finish(r, r.rewrite(phase().getLocation()), target);
+                        else        child.reduce(r, pos-1, target, null);
+                    }
+
+                holder[pos] = old;
+            }
 
-            /** FIXME */
-            public void queueEmptyReductions() {
-                if (reducing)
-                    for(Parser.Table.Reduction r : token==null ? state.getEofReductions() : state.getReductions(token))
-                        if (r.numPop==0)
-                            r.reduce(this, null, this.phase, r.zero());
+            public void finish(Position r, Forest result, Phase<Tok> target) {
+                Parser.Table<Tok>.State<Tok> state0 = state.gotoSetNonTerminals.get(r.owner());
+                if (result==null) throw new Error();
+                if (state0!=null)
+                    target.newNode(this, result, state0, r.pos<=0, r);
             }
 
-            private Node(Node parent, Forest pending, Parser.Table.State state, Phase start) {
+            private Node(Node parent, Forest pending, State state) {
                 this.state = state;
-                if (pending != null) this.holder().merge(pending);
+                this.merge(parent, pending);
+                Phase start = parent==null ? null : parent.phase();
                 if (parent != null) parents().add(parent, true);
-                if (Phase.this.hash.get(code(state, start)) != null) throw new Error("severe problem!");
-                Phase.this.hash.put(code(state, start), this);
-                Phase.this.numNodes++;
-                if (parent==null) holder().valid = true; // hack to make sure that the "base" node is always considered valid
+                if (Phase.this.hash.get(state, start) != null) throw new Error("severe problem!");
+                Phase.this.hash.put(state, start, this);
             }
+            public int toInt() { return idx; }
+            private final int idx = node_idx++;
+
+            // GraphViz //////////////////////////////////////////////////////////////////////////////
+
+            public GraphViz.Node toGraphViz(GraphViz gv) {
+                if (gv.hasNode(this)) return gv.createNode(this);
+                GraphViz.Node n = gv.createNode(this);
+                n.label = ""+state.toStringx();
+                n.shape = "rectangle";
+                n.fill = "green";
+                for(Forest result : results()) n.edge(result, "");
+                for(Node parent : parents()) n.edge(parent, "");
+                ((GraphViz.Group)phase().toGraphViz(gv)).add(n);
+                return n;
+            }
+            public boolean isTransparent() { return false; }
+            public boolean isHidden() { return false; }
+
         }
+        private int node_idx = 0;
 
-    }
+        public int toInt() { return pos+1; }
+        public int size() { return hash==null ? 0 : hash.size(); }
 
-    /** helper method */
-    private static boolean equal(Object a, Object b) {
-        if (a==null && b==null) return true;
-        if (a==null || b==null) return false;
-        return a.equals(b);
-    }
+        // GraphViz //////////////////////////////////////////////////////////////////////////////
+
+        public GraphViz.Node toGraphViz(GraphViz gv) {
+            if (gv.hasNode(this)) return gv.createNode(this);
+            GraphViz.Group g = gv.createGroup(this);
+            g.label = "Phase " + pos;
+            g.color = "gray";
+            g.cluster = true;
+            return g;
+        }
+        public boolean isTransparent() { return false; }
+        public boolean isHidden() { return false; }
 
-    /** this is something of a hack right now */
-    private static long code(Parser.Table.State state, Phase start) {
-        return (((long)state.idx) << 32) | (start==null ? 0 : start.pos);
     }
-    public boolean yak = false;
 }