fix major bug: create YieldSet2 which does not consider needs/hates
[sbp.git] / src / edu / berkeley / sbp / Walk.java
index 5754726..e2ac662 100644 (file)
@@ -1,23 +1,24 @@
+// Copyright 2006 all rights reserved; see LICENSE file for BSD-style license
+
 package edu.berkeley.sbp;
-import edu.berkeley.sbp.util.*;
-import edu.berkeley.sbp.*;
 import edu.berkeley.sbp.*;
+import edu.berkeley.sbp.util.*;
 import edu.berkeley.sbp.Sequence.Position;
 import java.io.*;
 import java.util.*;
 import java.lang.reflect.*;
 import java.lang.ref.*;
 
-/** a traversal of the grammar performed by mapping from Elements to a lattice and computing the resulting LUB */
+/** a traversal of the grammar performed by mapping from SequenceOrElements to a lattice and computing the resulting LUB */
 abstract class Walk<T> {
-    protected HashSet<Element> acc = new HashSet<Element>();
-    protected abstract T bottom(Element e);
+    protected HashSet<SequenceOrElement> acc = new HashSet<SequenceOrElement>();
+    protected abstract T bottom(SequenceOrElement e);
 
     protected final Cache c;
     public Walk() { this(null); }
     public Walk(Cache c) { this.c = c; }
 
-    public  T sequence(Sequence s) {
+    public  T walkSequence(Sequence s) {
         T ret = bottom(s);
         for(Position p = s.firstp(); p!=null && !p.isLast(); p = p.next())
             ret = sequence(s, ret, walk(p.element()));
@@ -27,34 +28,84 @@ abstract class Walk<T> {
     public      T walkAtom(Atom r) { return walk(r); }
     public      T union(Union u, T a, T b) { return  bottom(u); }
     public      T sequence(Sequence s, T a, T b) { return  bottom(s); }
-    protected   T walk(Element e) {
+    protected   T walk(SequenceOrElement e) {
         if (acc.contains(e)) return bottom(e);
         acc.add(e);
         return walk2(e);
     }
 
-    protected T walk2(Element e) {
+    protected T walk2(SequenceOrElement e) {
         if      (e instanceof Atom)     return walkAtom((Atom)e);
-        else if (e instanceof Sequence) return sequence((Sequence)e);
+        else if (e instanceof Sequence) return walkSequence((Sequence)e);
         else if (e instanceof Union) {
             T ret = bottom(e);
-            for(Sequence s : (Union)e) ret = union((Union)e, ret, walk(s));
+            for(Sequence s : (Union)e)
+                ret = union((Union)e, ret, walk(s));
             return ret;
         } else {
             throw new Error("unknown element of class " + e.getClass().getName() + ": " + e);
         }
     }
 
-    static class YieldSet extends Walk<HashSet<Element>> {
-        private final Element e;
-        public final HashSet<Element> walk() { return walk(e); }
-        public YieldSet(Element e, Cache c)  { super(c); this.e = e; }
-        public HashSet<Element> bottom(Element e)     { return acc; }
-        public HashSet<Element> sequence(Sequence seq) { return bottom(seq); }
-        public HashSet<Element> walkAtom(Atom r) {
-            c.atoms.put(e, c.atoms.get(e)==null ? r.top() : c.atoms.get(e).union(r.top()));
+    static class YieldSet extends Walk<HashSet<SequenceOrElement>> {
+        private final SequenceOrElement e;
+        public final HashSet<SequenceOrElement> walk() { return walk(e); }
+        public YieldSet(SequenceOrElement e, Cache c)  { super(c); this.e = e; }
+        public HashSet<SequenceOrElement> bottom(SequenceOrElement e)     { return acc; }
+        public HashSet<SequenceOrElement> walkSequence(Sequence seq) { return bottom(seq); }
+        public HashSet<SequenceOrElement> walkAtom(Atom r) {
+            c.atoms.put(e, c.atoms.get(e)==null ? r : c.atoms.get(e).union(r));
             return super.walkAtom(r);
         }
+        protected HashSet<SequenceOrElement> walk2(SequenceOrElement e) {
+            HashSet<SequenceOrElement> ret = super.walk2(e);
+            if (e instanceof Union)
+                for(Sequence s : (Union)e) {
+                    for(Sequence ss : s.needs()) ret = union((Union)e, ret, walk(ss));
+                    for(Sequence ss : s.hates()) ret = union((Union)e, ret, walk(ss));
+                }
+            return ret;
+        }
+    }
+
+    static class YieldSet2 extends Walk<HashSet<SequenceOrElement>> {
+        private final SequenceOrElement e;
+        public final HashSet<SequenceOrElement> walk() { return walk(e); }
+        public YieldSet2(SequenceOrElement e, Cache c)  { super(c); this.e = e; }
+        public HashSet<SequenceOrElement> bottom(SequenceOrElement e)     { return acc; }
+        public HashSet<SequenceOrElement> walkSequence(Sequence seq) { return bottom(seq); }
+        public HashSet<SequenceOrElement> walkAtom(Atom r) {
+            c.atoms.put(e, c.atoms.get(e)==null ? r : c.atoms.get(e).union(r));
+            return super.walkAtom(r);
+        }
+    }
+
+    static class EquivalentTo extends Walk<HashSet<Sequence>> {
+        private final Sequence s;
+        private final HashSet<Sequence> eq = new HashSet<Sequence>();
+        public final HashSet<Sequence> walk() { return walk(s); }
+        public EquivalentTo(Sequence e, Cache c)  {
+            super(c); this.s = e;
+        }
+        public HashSet<Sequence> bottom(SequenceOrElement e)     { return eq; }
+        public HashSet<Sequence> walkSequence(Sequence seq) {
+            eq.add(seq);
+            Position p = seq.firstp();
+            for(; !p.isLast(); p = p.next()) {
+                if (!p.isLast() && isRightNullable(p.next()))
+                    walk(p.element());
+                if (!c.possiblyEpsilon(p.element())) break;
+            }
+            return eq;
+        }
+        public HashSet<Sequence> walkAtom(Atom r) {
+            return eq;
+        }
+        private boolean isRightNullable(Position p) {
+            if (p.isLast()) return true;
+            if (!c.possiblyEpsilon(p.element())) return false;
+            return isRightNullable(p.next());
+        }
     }
 
 
@@ -64,9 +115,9 @@ abstract class Walk<T> {
         public Boolean walkAtom(Atom r) { return false; }
         public Boolean sequence(Sequence s, Boolean a, Boolean b)  { return new Boolean(a && b); }
         public Boolean union(Union u, Boolean a, Boolean b)     { return new Boolean(a || b); }
-        public Boolean bottom(Element e)    { return (e instanceof Union) ? false : true; }
-        private HashMap<Element,Boolean> hm = new HashMap<Element,Boolean>();
-        protected Boolean walk(Element e) {
+        public Boolean bottom(SequenceOrElement e)    { return (e instanceof Union) ? false : true; }
+        private HashMap<SequenceOrElement,Boolean> hm = new HashMap<SequenceOrElement,Boolean>();
+        protected Boolean walk(SequenceOrElement e) {
             if (hm.get(e) != null) return hm.get(e);
             hm.put(e, false);
             Boolean ret = walk2(e);
@@ -75,59 +126,75 @@ abstract class Walk<T> {
         }
     }
 
+    static class EpsilonFollowSet extends Walk<Atom> {
+        Atom all;
+        Atom empty;
+        public EpsilonFollowSet(Atom a, Atom empty, Cache c) {
+            super(c);
+            this.all = all;
+            this.empty = empty;
+        }
+        public Atom walkAtom(Atom r) { return all; }
+        public Atom walkSequence(Sequence s) {
+            if (s.follow==null) return all;
+            return s.follow;
+        }
+        public Atom sequence(Sequence s, Atom a, Atom b)  {
+            throw new RuntimeException();
+        }
+        public Atom union(Union u, Atom a, Atom b) {
+            /*
+            if (a==null) return b;
+            if (b==null) return a;
+            */
+            if (a==null || b==null) return all;
+            return (Atom)a.union(b);
+        }
+        public Atom bottom(SequenceOrElement e) {
+            return (e instanceof Union) ? empty : all;
+        }
+    }
 
-    // Token-Set //////////////////////////////////////////////////////////////////////////////
+    // Input-Set //////////////////////////////////////////////////////////////////////////////
 
-    static abstract class WalkTokenSet<Tok extends Token> extends Walk<Topology<Tok>> {
+    static abstract class WalkTokenSet<Tok extends Input> extends Walk<Topology<Tok>> {
         public Topology<Tok> cs;
         public WalkTokenSet(Topology<Tok> cs)          { this.cs = cs; }
         public WalkTokenSet(Topology<Tok> cs, Cache c) { super(c); this.cs = cs; }
-        public Topology<Tok> bottom(Element e)         { return cs; }
-        public Topology<Tok> walkAtom(Atom r)          { cs.add(r.top()); return cs; }
+        public Topology<Tok> bottom(SequenceOrElement e)         { return cs; }
+        public Topology<Tok> walkAtom(Atom r)          { cs = cs.union(r.getTokenTopology()); return cs; }
     }
 
-    class First<Tok extends Token> extends WalkTokenSet<Tok> {
+    // feature: intersect with "first" set of all positive conjuncts
+    static class First<Tok extends Input> extends WalkTokenSet<Tok> {
         public First(Topology<Tok> cs, Walk.Cache cache) { super(cs, cache); }
-        public Topology<Tok> sequence(Sequence seq) {
+        public Topology<Tok> walkSequence(Sequence seq) {
             for(Position p = seq.firstp(); p!=null && !p.isLast(); p = p.next()) {
                 walk(p.element());
-                if (!p.element().possiblyEpsilon(c)) break;
+                if (!c.possiblyEpsilon(p.element())) break;
             }
             return cs;
         }
     }
 
-    class Last<Tok extends Token> extends WalkTokenSet<Tok> {
-        public Last(Topology<Tok> cs, Walk.Cache cache) { super(cs, cache); }
-        public Topology<Tok> sequence(Sequence seq) { sequence(seq.firstp()); return cs; }
-        private Topology<Tok> sequence(Position p) {
-            if (p==null) return null;
-            Topology<Tok> ret = sequence(p.next());
-            if (ret!=null) return ret;
-            if (p.isLast()) return null;
-            if (p.element().possiblyEpsilon(c)) return null;
-            if (p.element()==null) return null;
-            return walk(p.element());
-        }
-    }
-
-    static class Follow<Tok extends Token> extends WalkTokenSet<Tok> {
-        private final Element me;
-        private final HashSet<Element> all;
+    static class Follow<Tok extends Input> extends WalkTokenSet<Tok> {
+        private final SequenceOrElement me;
+        private final HashSet<SequenceOrElement> all;
         private boolean eof = false;
         public boolean includesEof() { return eof; }
-        public Follow(Topology<Tok> cs, Element me, HashSet<Element> all, Cache c)  { super(cs, c); this.me = me; this.all = all; }
-        public Topology<Tok> bottom(Element e)                       { return cs; }
-        public Topology<Tok> sequence(Sequence seq)                  { return cs; }
-        public Topology<Tok> walkAtom(Atom r) { return walk((Element)r); }
-        public Topology<Tok> walk(Element e) {
+        public Follow(Topology<Tok> cs, SequenceOrElement me, HashSet<SequenceOrElement> all, Cache c)  {
+          super(cs, c); this.me = me; this.all = all; }
+        public Topology<Tok> bottom(SequenceOrElement e)                       { return cs; }
+        public Topology<Tok> walkSequence(Sequence seq)                  { return cs; }
+        public Topology<Tok> walkAtom(Atom r) { return walk((SequenceOrElement)r); }
+        public Topology<Tok> walk(SequenceOrElement e) {
             if (acc.contains(e)) return bottom(e);
             acc.add(e);
 
             if (c != null) {
                 Topology<Tok> cached = (Topology<Tok>)c.follow.get(e);
                 if (cached != null) {
-                    cs.add(cached);
+                    cs = cs.union(cached);
                     eof |= c.eof.get(e);
                     return cs;
                 }
@@ -135,26 +202,24 @@ abstract class Walk<T> {
 
             Topology<Tok> cso = cs;
             boolean eofo = eof;
-            eof = false;
-            cs = cso.fresh();
+            eof = c.eof.get(e) != null && c.eof.get(e).booleanValue();
+            cs = cso.empty();
 
-            if (e instanceof Parser.Table.Top) eof = true;
-            for(Element x : all) {
+            for(SequenceOrElement x : all) {
                 boolean matched = false;
-                if (x instanceof Parser.Table.Top) walk(x); // because this symbol might not appear in any other Sequence
                 if (!(x instanceof Sequence)) continue;
                 Sequence a = (Sequence)x;
                 Position mp = null;
                 for(Position pos = a.firstp(); pos != null && !pos.isLast(); pos = pos.next()) {
-                    if (matched) cs.add(new First<Tok>(cs.fresh(), c).walk(pos.element()));
-                    if (pos.isLast()) { matched = (matched && pos.element().possiblyEpsilon(c)); continue; }
+                    if (matched) cs = cs.union(c.first(pos.element(), cs.empty()));
+                    if (pos.isLast()) { matched = (matched && c.possiblyEpsilon(pos.element())); continue; }
                     boolean good = false;
                     if (e instanceof Atom) {
                         Topology top = c.atoms.get(pos.element());
                         if (top==null) continue;
-                        if (!(top.containsAll(((Atom)e).top()))) continue;
+                        if (!(top.containsAll(((Atom)e)))) continue;
                     } else {
-                        if (c.ys.get(pos.element()).contains(e)) good = true;
+                        if (c.ys.contains(pos.element(),e)) good = true;
                     }
                     if (good) {
                         mp = pos;
@@ -164,15 +229,17 @@ abstract class Walk<T> {
                 if (matched) walk(a);
             }
 
-            if (e instanceof Repeat.MaximalSequence || e instanceof Repeat.Maximal)
-                cs.remove(new Last<Tok>(cs.fresh(), c).walk(e));
+            if (e instanceof Sequence) {
+                Sequence s = (Sequence)e;
+                if (s.follow != null) cs = cs.intersect(s.follow.getTokenTopology());
+            }
 
             if (c != null && e==me) {
-                c.follow.put(e, cs.dup());
+                c.follow.put(e, cs);
                 c.eof.put(e, eof);
             }
 
-            cso.add(cs);
+            cso = cso.union(cs);
             cs = cso;
             eofo |= eof;
             eof = eofo;
@@ -182,10 +249,22 @@ abstract class Walk<T> {
     }
 
     static class Cache {
-        public final HashMap<Element,Boolean> possiblyEpsilon = new HashMap<Element,Boolean>();
-        public HashMap<Element,Boolean> eof = new HashMap<Element,Boolean>();
-        public HashMap<Element,Topology> follow = new HashMap<Element,Topology>();
-        public HashMap<Element,HashSet<Element>>  ys            = new HashMap<Element,HashSet<Element>>();
-        public HashMap<Element,Topology> atoms = new HashMap<Element,Topology>();
+        public final HashMap<SequenceOrElement,Boolean> possiblyEpsilon = new HashMap<SequenceOrElement,Boolean>();
+        public HashMap<SequenceOrElement,Boolean> eof = new HashMap<SequenceOrElement,Boolean>();
+        public HashMap<SequenceOrElement,Topology> follow = new HashMap<SequenceOrElement,Topology>();
+        public HashMapBag<SequenceOrElement,SequenceOrElement>  ys = new HashMapBag<SequenceOrElement,SequenceOrElement>();
+        public HashMapBag<SequenceOrElement,SequenceOrElement>  ys2 = new HashMapBag<SequenceOrElement,SequenceOrElement>();
+        public HashMap<SequenceOrElement,Topology> atoms = new HashMap<SequenceOrElement,Topology>();
+        public <Tok extends Input> Topology<Tok> first(SequenceOrElement e, Topology<Tok> empty) {
+            return new Walk.First<Tok>(empty, this).walk(e);
+        }
+        final boolean possiblyEpsilon(SequenceOrElement e) {
+            Walk.Cache cache = this;
+            Boolean ret = possiblyEpsilon.get(e);
+            if (ret != null) return ret.booleanValue();
+            ret = new Walk.PossiblyEpsilon().walk(e) ? Boolean.TRUE : Boolean.FALSE;
+            possiblyEpsilon.put(e, ret);
+            return ret;
+        }
     }
 }