cleanups, reorg, and commenting
[sbp.git] / src / edu / berkeley / sbp / Forest.java
index 0a20228..cf64ed2 100644 (file)
+// Copyright 2006 all rights reserved; see LICENSE file for BSD-style license
+
 package edu.berkeley.sbp;
-import edu.berkeley.sbp.*;
-import edu.berkeley.sbp.*;
 import edu.berkeley.sbp.util.*;
 import java.io.*;
 import java.util.*;
-import java.lang.reflect.*;
 
-/** an efficient representation of a collection of trees (Tomita's shared packed parse forest) */
-public abstract class Forest<T> extends PrintableTree<Forest.Body<T>> implements Iterable<Forest.Body<T>> {
+/**
+ *   <font color=blue>
+ *   An efficient representation of a collection of trees (Tomita's
+ *   shared packed parse forest).
+ *   </font>
+ */
+public abstract class Forest<NodeType> implements GraphViz.ToGraphViz {
 
     /** assume that this forest contains exactly one tree and return it; otherwise throw an exception */
-    public final Tree<T> expand1() throws Ambiguous, ParseFailed {
-        Iterator<Tree<T>> it = expand(true).iterator();
-        if (!it.hasNext()) throw new ParseFailed();
-        return it.next();
-    }
+    public abstract Tree<NodeType> expand1() throws Ambiguous;
 
     /** expand this forest into a set of trees */
-    public abstract HashSet<Tree<T>>  expand(boolean toss);
+    public Iterable<Tree<NodeType>> expand() {
+        HashSet<Tree<NodeType>> ht = new HashSet<Tree<NodeType>>();
+        expand(ht, new HashSet<Forest<NodeType>>(), null);
+        return ht;
+    }
+
+    /** returns the input Region which this Forest was parsed from */
+    public abstract Input.Region getRegion();
+
+    // Package-Private //////////////////////////////////////////////////////////////////////////////
 
-    static        <T> Forest<T> singleton(Input.Location loc)                       { return create(loc, null, new Forest[] { }, false, true); }
-    static        <T> Forest<T> singleton(Input.Location loc, Forest<T> body)       { return create(loc, null, new Forest[] { body },  false, true); }
-    static        <T> Forest<T> leaf(Input.Location loc, T tag) { return create(loc, tag, null, false, false); }
-    public static <T> Forest<T> create(Input.Location loc, T tag, Forest<T>[] tokens, boolean unwrap, boolean singleton) {
-        return new MultiForest<T>(loc, tag, tokens, unwrap, singleton);
+    static <NodeType> Forest<NodeType> create(Input.Region region, NodeType head, Forest<NodeType>[] children, boolean lift) {
+        if (region == null) throw new RuntimeException("invoked Forest.create(region=null) -- this should never happen");
+        return new One<NodeType>(region, head, children, lift);
     }
 
-    // Body //////////////////////////////////////////////////////////////////////////////
+    /** create a new forest */
+    public static <NodeType> Forest<NodeType> create(Input.Region region, NodeType head, Forest<NodeType>[] children) {
+        return Forest.create(region, head, children, false); }
 
-    protected static class Body<T> extends PrintableTree<Forest<T>> implements Iterable<Forest<T>> {
+    abstract void expand(HashSet<Tree<NodeType>> ht, HashSet<Forest<NodeType>> ignore, Tree<NodeType> bogus);
+    abstract void gather(HashSet<Forest<NodeType>> ignore);
+    abstract void edges(GraphViz.Node n);
+    boolean ambiguous() { return false; }
+    
+    // One //////////////////////////////////////////////////////////////////////////////
 
-        private final Input.Location    location;
-        private final T                 tag;
-        private final Forest<T>[]       tokens;
-        private final boolean           unwrap;
-        private final boolean           singleton;
+    /** A "single" forest with a head and child subforests */    
+    private static class One<NodeType> extends Forest<NodeType> {
 
-        private Body(Input.Location loc, T tag, Forest<T>[] tokens, boolean unwrap, boolean singleton) {
-            this.location = loc;
-            this.tag = tag;
-            this.tokens = tokens==null ? emptyForestArray : new Forest[tokens.length];
-            if (tokens != null) System.arraycopy(tokens, 0, this.tokens, 0, tokens.length);
-            if (tokens != null) for(int i=0; i<tokens.length; i++) if (tokens[i]==null) throw new Error(i+"");
-            this.unwrap = unwrap;
-            this.singleton = singleton;
-        }
+        private final Input.Region      location;
+        private final NodeType                head;
+        private final Forest<NodeType>[]       children;
 
-        private HashSet<Tree<T>> expand(boolean toss, ArrayList<Tree<T>> toks, int i, HashSet<Tree<T>> h) {
-            if (singleton) {
-                for(Body<T> b : tokens[0]) b.expand(toss, toks, i, h);
+        /** if true, the last child's children are considered children of this node */
+        private final boolean           lift;
 
-            } else if (i==tokens.length) {
-                h.add(new Tree<T>(null, tag, toks.toArray(tree_hint)));
+        public Input.Region getRegion() { return location; }
+
+        private One(Input.Region loc, NodeType head, Forest<NodeType>[] children, boolean lift) {
+            this.location = loc;
+            this.head = head;
+            if (head==null) throw new RuntimeException("invoked Forest.create(,null,,,) -- this should never happen");
+            this.children = children==null ? emptyForestArray : new Forest[children.length];
+            if (children != null) System.arraycopy(children, 0, this.children, 0, children.length);
+            if (children != null) for(int i=0; i<children.length; i++) if (children[i]==null) throw new Error(i+"");
+            this.lift = lift;
+        }
 
-            } else if (unwrap && i==tokens.length-1) {
-                if (tokens[i] != null)
-                    for(Body b : tokens[i])
-                        b.expand(toss, toks, 0, h);
+        public Tree<NodeType> expand1() throws Ambiguous {
+            Tree<NodeType>[] ret = new Tree[children.length];
+            for(int i=0; i<children.length; i++) ret[i] = children[i].expand1();
+            return new Tree<NodeType>(location, head, ret, lift);
+        }
 
+        void gather(HashSet<Forest<NodeType>> hf) {
+            hf.add(this);
+            for(Forest<NodeType> f : children) f.gather(hf);
+        }
+        void expand(HashSet<Tree<NodeType>> ht, HashSet<Forest<NodeType>> ignore, Tree<NodeType> bogus) {
+            if (ignore.contains(this)) { ht.add(bogus); return; }
+            expand(0, new Tree[children.length], ht, ignore, bogus);
+        }
+        private void expand(final int i, Tree<NodeType>[] ta, HashSet<Tree<NodeType>> ht, HashSet<Forest<NodeType>> ignore,
+                            Tree<NodeType> bogus) {
+            if (i==children.length) {
+                ht.add(new Tree<NodeType>(location, head, ta, lift));
             } else {
-                boolean hit = false;
-                for(Tree<T> r : tokens[i].expand(toss)) {
-                    hit = true;
-                    int old = toks.size();
-                    toks.add(r);
-                    expand(toss, toks, i+1, h);
-                    while(toks.size() > old) toks.remove(toks.size()-1);
+                HashSet<Tree<NodeType>> ht2 = new HashSet<Tree<NodeType>>();
+                children[i].expand(ht2, ignore, bogus);
+                for(Tree<NodeType> tc : ht2) {
+                    ta[i] = tc;
+                    expand(i+1, ta, ht, ignore, bogus);
+                    ta[i] = null;
                 }
-                //if (!hit) throw new Error();
             }
-            return h;
         }
 
-        void addTo(FastSet<Body> h) {
-            if (!singleton) h.add(this, true);
-            else for(Body b : tokens[0]) b.addTo(h);
+        // GraphViz, ToInt //////////////////////////////////////////////////////////////////////////////
+
+        public boolean isTransparent() { return false; }
+        public boolean isHidden() { return false; }
+        public GraphViz.Node toGraphViz(GraphViz gv) {
+            if (gv.hasNode(this)) return gv.createNode(this);
+            GraphViz.Node n = gv.createNode(this);
+            n.label = headToString()==null?"":headToString();
+            n.directed = true;
+            edges(n);
+            return n;
+        }
+        boolean edges = false; // FIXME ??
+        public void edges(GraphViz.Node n) {
+            if (edges) return;
+            edges = true;
+            for(int i=0; i<children.length; i++) {
+                if (i==children.length-1 && lift && !children[i].ambiguous()) {
+                    children[i].edges(n);
+                } else {
+                    n.edge(children[i], null);
+                }
+            }
         }
 
-        protected String  headToString()         { return null; }
-        protected String  headToJava()           { return null; }
+        protected String  headToString()         { return head==null?null:head.toString(); }
+        protected String  headToJava()           { return "null"; }
         protected String  left()                 { return "{"; }
         protected String  right()                { return "}"; }
         protected boolean ignoreSingleton()      { return false; }
-        public    Iterator<Forest<T>> iterator() { return new ArrayIterator<Forest<T>>(tokens); }
     }
 
 
-    // Ref //////////////////////////////////////////////////////////////////////////////
-
-    /**
-     *  This class represents a partially complete collection of
-     *  forests to be viewed as a forest at some later date; once
-     *  viewed, it becomes immutable
-     */
-    static class Ref<T> extends Forest<T> {
-        private FastSet<Forest> hp = new FastSet<Forest>();
-        private Forest res = null;
-        public Ref() { }
-        public void merge(Forest p) {
-            if (res != null) throw new Error("already resolved!");
-            if (p==null) throw new Error();
-            if (p!=this) hp.add(p, true);
-        }
-        public Iterator<Body<T>> iterator() { return ((Forest<T>)resolve()).iterator(); }
-        public HashSet<Tree<T>> expand(boolean toss) { return resolve().expand(toss); }
-        public Forest resolve() {
-            if (hp==null) return res;
-            FastSet<Body> nh      = new FastSet<Body>();
-            for(Forest<?> p : hp)
-                for(Body<?> b : (Forest<?>)p)
-                    b.addTo(nh);
-            res = new MultiForest(nh);
-            hp = null;
-            return res;
+    // Many //////////////////////////////////////////////////////////////////////////////
+
+    /** An "ambiguity node"; this is immutable once it has been "looked at" */
+    static class Many<NodeType> extends Forest<NodeType> {
+
+        private FastSet<Forest<NodeType>> hp = new FastSet<Forest<NodeType>>();
+        private boolean touched = false;
+
+        public Many() { }
+
+        public Input.Region getRegion() { return hp.iterator().next().getRegion(); } // all should be identical
+
+        public Tree<NodeType> expand1() throws Ambiguous {
+            touched();
+            if (hp.size() > 1) {
+                HashSet<Forest<NodeType>> hf0 = new HashSet<Forest<NodeType>>();
+                Iterator<Forest<NodeType>> ih = hp.iterator();
+                ih.next().gather(hf0);
+                for(Forest<NodeType> f : hp) {
+                    HashSet<Forest<NodeType>> hf1 = new HashSet<Forest<NodeType>>();
+                    f.gather(hf1);
+                    hf0.retainAll(hf1);
+                }
+                HashSet<Tree<NodeType>> ht = new HashSet<Tree<NodeType>>();
+                expand(ht, hf0, new Tree(null, "*"));
+                throw new Ambiguous((Forest<?>)this,
+                                    (HashSet<Tree<?>>)(Object)ht);
+            }
+            return hp.iterator().next().expand1();
         }
-    }
+        
+        void gather(HashSet<Forest<NodeType>> ht) {
+            touched();
+
+            // FIXME: do something more sensible here
+            if (ht.contains(this)) {
+                System.err.println("WARNING: grammar produced a circular forest\n" + this);
+                //throw new Error("grammar produced a circular forest:\n" + this);
+                return;
+            }
+
+            ht.add(this);
+            for(Forest<NodeType> f : hp) f.gather(ht);
+        }
+
+        private void touched() {
+            if (touched) return;
+            touched = true;
+            /*
+            FastSet<Forest<NodeType>> f2 = new FastSet<Forest<NodeType>>();
+            for(Forest f : hp)
+                if (f instanceof Forest.One) f2.add(f);
+                else for(Forest ff : ((Forest.Many<NodeType>)f))
+                    f2.add(ff);
+            hp = f2;
+            */
+        }
+        public boolean contains(Forest f) {
+            touched();
+            return hp.contains(f);
+        }
+        public void merge(Forest p) { 
+            if (touched) throw new RuntimeException("attempt to merge() on a Forest.Many that has already been examined");
+            if (p==this) throw new RuntimeException("attempt to merge() a Forest.Many to itself!");
+            hp.add(p, true);
+        }
+        boolean ambiguous() {
+            touched();
+            if (hp.size()==0) return false;
+            if (hp.size()==1) return hp.iterator().next().ambiguous();
+            return true;
+        }
+
+        void expand(HashSet<Tree<NodeType>> ht, HashSet<Forest<NodeType>> ignore, Tree<NodeType> bogus) {
+            touched();
+            if (ignore.contains(this)) { ht.add(bogus); return; }
+            for (Forest<NodeType> f : hp) f.expand(ht, ignore, bogus);
+        }
+
 
-    // Implementations //////////////////////////////////////////////////////////////////////////////
+        // GraphViz, ToInt //////////////////////////////////////////////////////////////////////////////
 
-    private static class MultiForest<T> extends Forest<T> {
-        private final FastSet<Body<T>> results;
-        private MultiForest(FastSet<Body<T>> results) { this.results = results; }
-        public MultiForest(Input.Location loc, T tag, Forest<T>[] tokens, boolean unwrap, boolean singleton) {
-            this.results = new FastSet<Body<T>>(new Body(loc, tag, tokens, unwrap, singleton));
+        public boolean isTransparent() { return hp.size()==1; }
+        public boolean isHidden() { return hp.size()==0; }
+        public void edges(GraphViz.Node n) {
+            if (hp.size()==1) { hp.iterator().next().edges(n); return; }
+            for(Forest f : hp) f.edges(n);
         }
-        public Iterator<Body<T>> iterator() { return results.iterator(); }
-        public HashSet<Tree<T>> expand(boolean toss) {
-            HashSet<Tree<T>> ret = new HashSet<Tree<T>>();
-            for(Body<T> b : results)
-                ret.addAll(b.expand(toss, new ArrayList<Tree<T>>(), 0, new HashSet<Tree<T>>()));
-            if (toss && ret.size() > 1) throw new Ambiguous(this);
-            return ret;
+        public GraphViz.Node toGraphViz(GraphViz gv) {
+            if (hp.size()==1) return hp.iterator().next().toGraphViz(gv);
+            if (gv.hasNode(this)) return gv.createNode(this);
+            GraphViz.Node n = gv.createNode(this);
+            n.label = "?";
+            n.color = "red";
+            for(Forest f : hp) n.edge(f, null);
+            return n;
         }
     }
 
     // Statics //////////////////////////////////////////////////////////////////////////////
 
     private static Tree[] tree_hint = new Tree[0];
+    private static String[] string_hint = new String[0];
     private static final Forest[] emptyForestArray = new Forest[0];
 
     protected String  headToString()    { return null; }
-    protected String  headToJava()      { return null; }
+    protected String  headToJava()      { return "null"; }
     protected String  left()            { return "<?"; }
     protected String  right()           { return "?>"; }
     protected boolean ignoreSingleton() { return true; }