better reporting of indentation related parse errors
[sbp.git] / src / edu / berkeley / sbp / GSS.java
index 0358ef6..5c1b92f 100644 (file)
@@ -12,322 +12,198 @@ import java.lang.reflect.*;
 /** implements Tomita's Graph Structured Stack */
 class GSS {
 
-    public static Queue<Node> removals = new LinkedList<Node>();
-
-    static String note = "";
-    static int single_newnode = 0;
-    static int toplevel_reductions = 0;
-    static int multi_newnode = 0;
-    static int waiting_newnode = 0;
-    static int shifts = 0;
-
-    static int count = 0;
-    static int reductions = 0;
-    int resets = 0;
-    int waits = 0;
-    
     Input input;
+    private Parser parser;
+    public GSS(Input input, Parser parser) { this.input = input; this.parser = parser;}
+    public Input getInput() { return input; }
 
-    public GSS(Input input) { this.input = input; }
-
-    private Node[] reducing_list = null;
-
-    // FIXME: right now, these are the performance bottleneck
-    HashMapBag<Sequence,Phase.Waiting> waiting         = new HashMapBag<Sequence,Phase.Waiting>();
-    HashMapBag<Integer,Sequence>       performed       = new HashMapBag<Integer,Sequence>();
-    HashMapBag<Integer,Sequence>       lastperformed   = new HashMapBag<Integer,Sequence>();
-    HashMapBag<Integer,Sequence>       expected        = new HashMapBag<Integer,Sequence>();
-    
-    /** FIXME */
-    Forest.Many finalResult;
+    int numNewNodes = 0;
+    int numOldNodes = 0;
+    int viewPos = 0;
+    int numReductions = 0;
 
     /** corresponds to a positions <i>between tokens</i> the input stream; same as Tomita's U_i's */
-    class Phase<Tok> implements Invokable<State, Forest, Node>, IntegerMappable, GraphViz.ToGraphViz, Iterable<Node> {
+    class Phase<Tok> implements Invokable<State, Result>, IntegerMappable, GraphViz.ToGraphViz, Iterable<Node> {
 
-        public int pos() { return pos; }
-        public boolean closed() { return closed; }
-        public Tok token() { return token; }
+        // FIXME: right now, these are the performance bottleneck
+        private HashMapBag<Integer,Sequence>       performed       = new HashMapBag<Integer,Sequence>();
 
-        public Iterator<Node> iterator() { return hash.iterator(); }
-        public void invoke(State st, Forest result, Node n) {
-            shifts++;
-            good |= next.newNode(n, result, st, false);
+        public Forest.Many finalResult;
+        private PriorityQueue<Reduction> reductionQueue = new PriorityQueue<Reduction>();
+
+        Parser parser() { return parser; }
+        public void addReduction(Reduction r) {
+            //System.out.println("+ " + r);
+            parser.spin();
+            reductionQueue.add(r);
+        }
+        public void invoke(State st, Result result) {
+            parser.spin();
+            good |= next.newNode(result, st, false);
         }
 
         /** the token immediately after this phase */
         final Tok token;
+        final int pos;
 
-        private final int pos;
-
-        boolean reducing;
-        public IntPairMap<Node> hash;  /* ALLOC */
-        private boolean closed;
-        private boolean good;
+        public IntPairMap<Node> hash = new IntPairMap<Node>();  /* ALLOC */
+        private boolean good = false;
         private Phase next = null;
         private Phase prev;
         private Input.Location location;
         private Input.Location nextLocation;
         private Input.Location prevLocation;
         
-        public final Parser parser;
-
         private Forest forest;
 
-        public Phase(Phase prev, Parser parser, Phase previous, Tok token, Input.Location location,
-                     Input.Location nextLocation, Forest forest) throws ParseFailed {
-            this.prevLocation = prev==null ? location : prev.getLocation();
+        public Phase(State startState) throws ParseFailed, IOException {
+            this(null, null);
+            Result primordealResult = new Result(null, null, null);
+            newNode(primordealResult, startState, true);
+        }
+        public Phase(Phase prev, Forest forest) throws ParseFailed, IOException {
+            this.prevLocation = input.getLocation();
+            this.token = (Tok)input.next();
+            this.location = input.getLocation();
             this.prev = prev;
             this.forest = forest;
-            this.parser = parser;
-            this.pos = previous==null ? 0 : previous.pos+1;
-            this.token = token;
-            this.location = location;
-            this.nextLocation = nextLocation;
-            performed.clear();
-            reset();
-        }
-
-        public void reset() throws ParseFailed {
-            waiting.clear();
-            expected.clear();
-            lastperformed.clear();
-            lastperformed.addAll(performed);
-            performed.clear();
-            hash = new IntPairMap<Node>();
-            reset = false;
-            good = false;
-            closed = false;
-            reducing = false;
-            finalResult = null;
+            this.pos = prev==null ? 0 : prev.pos+1;
+            this.nextLocation = input.getLocation();
             if (prev != null) prev.shift(this, forest);
+            numReductions = 0;
+
+            int minPhasePos = Integer.MAX_VALUE;
+            int maxOrd = -1;
+            Reduction best = null;
+            //System.out.println("==============================================================================");
+            while(!reductionQueue.isEmpty()) {
+                Reduction r = reductionQueue.poll();
+                //System.out.println("- " + r);
+                if (r.parentPhase() != null)
+                    if (r.parentPhase().pos > minPhasePos)
+                        throw new Error();
+                r.perform();
+                if (r.parentPhase() != null) {
+                    if (r.parentPhase().pos < minPhasePos) {
+                        minPhasePos = r.parentPhase().pos;
+                        maxOrd = r.reduction().ord;
+                        best = r;
+                    } else if (r.parentPhase().pos == minPhasePos) {
+                        /*
+                        if (best != null && Parser.mastercache.comparePositions(r.reduction(), best.reduction()) < 0)
+                            throw new Error("\n"+r+"\n"+best+"\n"+
+                                            Parser.mastercache.comparePositions(r.reduction(), best.reduction())+"\n"+r.compareTo(best)+
+                                            "\n"+(r.reduction().ord-best.reduction().ord));
+                        */
+                        maxOrd = r.reduction().ord;
+                        best = r;
+                    }
+                }
+                numReductions++;
+            }
+            if (token==null) shift(null, null);
         }
 
-      
         public boolean isDone() throws ParseFailed {
             if (token != null) return false;
             if (token==null && finalResult==null)
-                ParseFailed.error("unexpected end of file",
-                                  getLocation(),
-                                  token,
-                                  hash.values(),
-                                  getLocation().createRegion(getLocation()),
-                                  input,
-                                  GSS.this);
+                ParseFailed.error("unexpected end of file", this, null,
+                                  getLocation().createRegion(getLocation()));
             return true;
         }
 
         public Input.Location getPrevLocation() { return prevLocation; }
         public Input.Location getLocation() { return location; }
-        public Input.Region   getRegion() { return getPrevLocation().createRegion(getLocation()); }
+        public Input.Region getRegion() { return prevLocation.createRegion(location); }
         public Input.Location getNextLocation() { return nextLocation; }
+        public boolean        isFrontier() { return hash!=null; }
 
-        /** add a new node (merging with existing nodes if possible)
-         *  @param parent             the parent of the new node
-         *  @param result             the SPPF result corresponding to the new node
-         *  @param state              the state that the new node is in
-         *  @param fromEmptyReduction true iff this node is being created as a result of a reduction of length zero (see GRMLR paper)
-         *  @param start              the earliest part of the input contributing to this node (used to make merging decisions)
-         */
-        public boolean newNode(Node parent, Forest pending, State state, boolean fromEmptyReduction) {
-            Node p = hash.get(state, parent==null?null:parent.phase());
-            if (p != null)  return newNode2(p, parent, pending, state, fromEmptyReduction);
-            else            return newNode3(parent, pending, state, fromEmptyReduction);
-        }
-        public void newNode(Node parent, Forest pending, State state, boolean fromEmptyReduction, Position reduction) {
-            int pos = parent==null?0:parent.phase()==null?0:parent.phase().pos;
-            Sequence owner = reduction==null ? null : reduction.owner();
-            if (reduction!=null) {
-                if (owner.hates!=null) {
-                    for (Sequence s : performed.getAll(pos))
-                        if (owner.hates.contains(s))
-                            return;
-                    for (Sequence s : lastperformed.getAll(pos))
-                        if (owner.hates.contains(s)) {
-                            //System.out.println("now expecting ["+pos+"] => " + s);
-                            expected.add(pos, s);
-                            return;
-                        }
-                }
-                if (owner.needs != null)
-                    for(Sequence s : owner.needs)
-                        if (!performed.contains(pos, s)) {
-                            waiting.add(s, new Waiting(parent, pending, state, fromEmptyReduction, reduction));
-                            return;
-                        }
-                if (!performed.contains(pos, owner)) {
-                    performed.add(pos, owner);
-                    if (owner.hated != null)
-                        for(Sequence seq : owner.hated)
-                            if (performed.contains(pos, seq)) {
-                                performed.remove(pos, seq);
-                                reset = true;
-                            }
+        /** perform all shift operations, adding promoted nodes to <tt>next</tt> */
+        private void shift(Phase next, Forest result) throws ParseFailed {
+            this.next = next;
+            // this massively improves GC performance
+            if (prev != null) {
+                IntPairMap<Node> h = prev.hash;
+                prev.hash = null;
+                prev.performed = null;
+                for(Node n : h)
+                    n.check();
+            }
+            numOldNodes = hash.size();
+            for(Node n : hash.values()) {
+                if (token == null && n.state().isAccepting()) {
+                    if (finalResult==null) finalResult = new Forest.Many();
+                    for(Result r : n)
+                        finalResult.merge(r.getForest());
                 }
+                if (token == null) continue;
+                n.state().invokeShifts(token, this, new Result(result, n, null));
             }
-            newNode(parent, pending, state, fromEmptyReduction);
-            if (reduction != null) {
-                boolean redo = true;
-                while(redo) {
-                    redo = false;
-                    for(Waiting w : waiting.getAll(owner)) {
-                        if (w.parent==parent || (parent!=null&&w.parent!=null&&w.parent.phase()==parent.phase())) {
-                            waiting.remove(owner, w);
-                            w.perform();
-                            redo = true;
-                            break;
-                        }
-                    }
+            numNewNodes = next==null ? 0 : next.hash.size();
+            viewPos = this.pos;
+            if (!good && token!=null) {
+                String toks = token+"";
+                if (toks.length()==1 && toks.charAt(0) == edu.berkeley.sbp.chr.CharAtom.left) {
+                    ParseFailed.error("unexpected increase in indentation", this,
+                                      token, getRegionFromThisToNext());
+                } else if (toks.length()==1 && toks.charAt(0) == edu.berkeley.sbp.chr.CharAtom.right) {
+                    ParseFailed.error("unexpected decrease in indentation", this,
+                                      token, getRegionFromThisToNext());
+                } else {
+                    ParseFailed.error("unexpected character '"+ANSI.cyan(StringUtil.escapify(token+"",
+                                                                                             "\\\'\r\n"))+"'",
+                                      this, token, getRegionFromThisToNext());
                 }
             }
+            if (token==null && finalResult==null)
+                ParseFailed.error("unexpected end of file", this, null,
+                                  getLocation().createRegion(getLocation()));
+            for(Node n : hash) n.check();
         }
 
-        private boolean newNode2(Node p, Node parent, Forest pending, State state, boolean fromEmptyReduction) {
-            if (p.merge(parent, pending)) return true;
-            p.addParent(parent, true);
-            if (p!=parent && !fromEmptyReduction && reducing) p.performReductions(parent);
-            return true;
+        void newNodeFromReduction(Result result, State state, Position reduction) {
+            int pos = result.phase().pos;
+            Sequence owner = reduction.owner();
+            for(Sequence s : owner.hates())
+                if (performed.contains(pos, s))
+                    return;
+            for(Sequence s : owner.needs())
+                if (!performed.contains(pos, s))
+                    return;
+            if (owner.needed_or_hated && !performed.contains(pos, owner))
+                performed.add(pos, owner);
+            if (state!=null)
+                newNode(result, state, reduction.pos<=0);
         }
 
-        private boolean newNode3(Node parent, Forest pending, State state, boolean fromEmptyReduction) {
+        /** add a new node (merging with existing nodes if possible)
+         *  @param parent             the parent of the new node
+         *  @param result             the SPPF result corresponding to the new node
+         *  @param state              the state that the new node is in
+         *  @param fromEmptyReduction true iff this node is being created as a result of a reduction of length zero (see GRMLR paper)
+         *  @param start              the earliest part of the input contributing to this node (used to make merging decisions)
+         */
+        private boolean newNode(Result result, State state, boolean fromEmptyReduction) {
+            Node p = hash.get(state, result.phase());
+            if (p != null) { p.addResult(result); return true; }
             do {
                 if (token != null && state.canShift(token)) break;
                 if (state.isAccepting()) break;
                 if (token==null) break;
                 if (!state.canReduce(token)) return false;
-                //if (count > 1) break;
-                //if (r.numPop == 0) break;
-                //r.reduce(pending, parent, null, Phase.this, null);
-                //return;
             } while(false);
-
-            Node n = new Node(Phase.this, parent, pending, state);  // ALLOC
-            if (reducing) {
-                n.performEmptyReductions();
-                if (!fromEmptyReduction) n.performReductions(parent);
-            }
+            Node n = new Node(Phase.this, result, state, fromEmptyReduction);  // ALLOC
+            for(Object s : state.conjunctStates)
+                newNode(new Result(null, n, null), (State)s, fromEmptyReduction);
             return true;
         }
 
-        public LinkedList<Node> reductionQueue = new LinkedList<Node>();
-
-        /** perform all reduction operations */
-        public void reduce() throws ParseFailed {
-            try {
-                reducing = true;
-                if (reducing_list==null || reducing_list.length < hash.size())
-                    reducing_list = new Node[hash.size() * 4];
-                hash.toArray(reducing_list);
-                int num = hash.size();
-                for(int i=0; i<num; i++) {
-                    Node n = reducing_list[i];
-                    n.performEmptyReductions();
-                    // INVARIANT: we never "see" a node until its parent-set is complete, modulo merges
-                }
-                for(int i=0; i<num; i++) {
-                    reductionQueue.add(reducing_list[i]);
-                    reducing_list[i] = null;
-                }
-                while(!reductionQueue.isEmpty()) {
-                    reductionQueue.remove().performReductions();
-                }
-                if (reset) {
-                    reset = false;
-                    resets++;
-                    throw new Reset();
-                }                
-                for(int i : expected)
-                    for(Sequence s : expected.getAll(i))
-                        if (!performed.contains(i, s)) {
-                            //System.out.println("resetting due to pos="+i+": " + s + " " + System.identityHashCode(s));
-                            resets++;
-                            throw new Reset();
-                        }
-            } catch (Reset r) {
-                reset();
-                reduce();
-            }
-            count = 0;
-        }
-
-        private boolean reset = false;
-        class Reset extends RuntimeException { }
-
-        /** perform all shift operations, adding promoted nodes to <tt>next</tt> */
-        public void shift(Phase next, Forest result) throws ParseFailed {
-            // this massively improves GC performance
-            if (prev!=null && parser.helpgc) {
-                //prev.hash = null;
-                //System.out.println("\r" + /*shifts + " " + */ single_newnode /*+ "/"+multi_newnode + " " + waiting_newnode*/);
-                //System.out.println("\r" + shifts + " " + note);
-                //System.out.println("\r" + shifts);
-                //System.out.println("\r" + toplevel_reductions);
-                //System.out.println("\r" + multi_newnode);
-                single_newnode = 0;
-                note = "";
-                multi_newnode = 0;
-                toplevel_reductions = 0;
-                waiting_newnode = 0;
-                shifts = 0;
-            }
-            this.next = next;
-            closed = true;
-            Forest res = null;
-            boolean ok = false;
-            int count = 0;
-            for(Node n : hash.values()) {
-                if (token == null && n.state().isAccepting()) {
-                    if (finalResult==null) finalResult = new Forest.Many();
-                    for(Object f : n.results())
-                        finalResult.merge((Forest)f);
-                }
-                if (token == null) continue;
-                n.state().invokeShifts(token, this, result, n);
-            }
-            //System.out.println(next.hash.size());
-            if (!good && token!=null)
-                ParseFailed.error("unexpected character",
-                                  getLocation(),
-                                  token,
-                                  hash.values(),
-                                  getRegion(),
-                                  input,
-                                  GSS.this);
-
-            if (token==null && finalResult==null)
-                ParseFailed.error("unexpected end of file",
-                                  getLocation(),
-                                  token,
-                                  hash.values(),
-                                  getLocation().createRegion(getLocation()),
-                                  input,
-                                  GSS.this);
-        }
-
-
-        class Waiting {
-            Node parent;
-            Forest pending;
-            State state;
-            boolean fromEmptyReduction;
-            Position reduction;
-            public Waiting(Node parent, Forest pending, State state, boolean fromEmptyReduction, Position reduction) {
-                waits++;
-                this.parent = parent;
-                this.pending = pending;
-                this.state = state;
-                this.fromEmptyReduction = fromEmptyReduction;
-                this.reduction = reduction;
-            }
-            public void perform() {
-                //System.out.println("performing: " + reduction.position);
-                waiting_newnode++;
-                newNode(parent, pending, state, fromEmptyReduction, reduction);
-            }
-        }
-       
-
         public int toInt() { return pos+1; }
         public int size() { return hash==null ? 0 : hash.size(); }
+        public int pos() { return pos; }
+        public Tok getToken() { return token; }
+        public Iterator<Node> iterator() { return hash.iterator(); }
+        public GSS getGSS() { return GSS.this; }
 
         // GraphViz //////////////////////////////////////////////////////////////////////////////
 
@@ -342,6 +218,17 @@ class GSS {
         public boolean isTransparent() { return false; }
         public boolean isHidden() { return false; }
 
+        public void dumpGraphViz(String filename) throws IOException {
+            FileOutputStream fos = new FileOutputStream(filename);
+            PrintWriter p = new PrintWriter(new OutputStreamWriter(fos));
+            GraphViz gv = new GraphViz();
+            for(Object n : this)
+                ((Node)n).toGraphViz(gv);
+            gv.dump(p);
+            p.flush();
+            p.close();
+        }
+
     }
 
 }