move verifyClean into CleanupUtils
[fleet.git] / src / edu / berkeley / fleet / ir / Process.java
index eb31fc2..0378fd3 100644 (file)
@@ -1,4 +1,6 @@
 package edu.berkeley.fleet.ir;
+import edu.berkeley.fleet.loops.*;
+import java.util.concurrent.Semaphore;
 import java.util.*;
 import java.net.*;
 import edu.berkeley.fleet.two.*;
@@ -10,11 +12,6 @@ import edu.berkeley.fleet.api.Instruction.Set.*;
 import static edu.berkeley.fleet.api.Predicate.*;
 import static edu.berkeley.fleet.util.BitManipulations.*;
 
-
-/*
- - refactor the cleanup into the subclasses of Port (phase1, phase2, etc)
- */
-
 // does peer.recvWord() have to honor the currently-set predicate?
 
 // public class ReplaceModule extends Module { }
@@ -26,6 +23,7 @@ import static edu.berkeley.fleet.util.BitManipulations.*;
 
 public class Process {
 
+
     public static int reset_count = 0;
     public static HashSet<Dock> torpedoes = new HashSet<Dock>();
 
@@ -43,15 +41,93 @@ public class Process {
         for(Module mod : modules)
             mod.build(ctx);
     }
-    public void reset(Context ctx, int phase) {
+    public void reset(Context ctx, int phase, Destination ackDestination) {
         reset_count = 0;
         torpedoes.clear();
         for(Module mod : modules)
-            mod.reset(ctx, phase);
+            mod.reset(ctx, phase, ackDestination);
     }
 
     public class Module {
 
+        void doReset(Context ctx, int phase, Dock dock, Port peer, Destination ackDestination, boolean peerUsed) {
+            if (dock.getShip().getType().equals("Debug")) return;
+
+            switch(phase) {
+
+                // Phase 0: torpedo every output dock, put it in
+                // collecting mode.  Cannot combine with phase 1,
+                // because until output docks are in vacuum mode we
+                // cannot be sure that the tokens to the input docks
+                // will eventually succeed.  This may cause the
+                // instructions sent after the tokens to back up into
+                // the switch fabric.
+                case 0: {
+                    if (!dock.isInputDock()) {
+                        torpedoes.add(dock);
+                        LoopFactory lf = new LoopFactory(ctx, dock, 1);
+                        lf.sendToken(ackDestination);
+                        lf = lf.makeNext(0);
+                        lf.abortLoopIfTorpedoPresent();
+                        lf.collectWord();
+                        reset_count++;
+                    }
+                    break;
+                }
+
+                // Phase 1: torpedo every input dock, put it in loopback mode
+                case 1: {
+                    if (dock.isInputDock()) {
+                        torpedoes.add(dock);
+                        LoopFactory lf = new LoopFactory(ctx, dock, 1);
+                        lf.sendToken(ackDestination);
+
+                        // FIXME: this won't work right for ports that
+                        // get "shared" by two senders (for example,
+                        // inAddrRead1/2)
+
+                        if (peerUsed && peer!=null) {
+                            lf = lf.makeNext(0);
+                            lf.abortLoopIfTorpedoPresent();
+                            ((OutPort)peer).recvWord(lf);
+                            ((OutPort)peer).sendToken(lf);
+                        }
+                        reset_count++;
+                    }
+                    break;
+                }
+
+                // Phase 2: torpedo every output dock, have it absorb tokens
+                case 2: {
+                    if (!dock.isInputDock()) {
+                        torpedoes.add(dock);
+                        LoopFactory lf = new LoopFactory(ctx, dock, 1);
+                        if (peer != null)
+                            for(int i=0; i<((InPort)peer).getTokensToAbsorb(); i++)
+                                lf.recvToken();
+                        lf.sendToken(ackDestination);
+                        reset_count++;
+                    }
+                    break;
+                }
+
+                // Phase 3: torpedo every input dock, and we're done
+                case 3: {
+                    if (dock.isInputDock()) {
+                        if (peerUsed && peer!=null) {
+                            torpedoes.add(dock);
+                        }
+                        LoopFactory lf = new LoopFactory(ctx, dock, 1);
+                        lf.sendToken(ackDestination);
+                        reset_count++;
+                    }
+                    break;
+                }
+
+
+            }
+        }
+
         public Module() {
             Process.this.modules.add(this);
         }
@@ -62,7 +138,9 @@ public class Process {
         public OutPort getOutPort(String name) { return (OutPort)ports.get(name); }
         
         public void build(Context ctx) { for(Port p : ports.values()) p.build(ctx); }
-        public void reset(Context ctx, int phase) { for(Port p : ports.values()) p.reset(ctx, phase); }
+        public void reset(Context ctx, int phase, Destination ackDestination) {
+            for(Port p : ports.values()) p.reset(ctx, phase, ackDestination);
+        }
 
         public abstract class Port {
             public final String name;
@@ -72,7 +150,7 @@ public class Process {
                 Module.this.ports.put(name,this);
             }
             public abstract void build(Context ctx);
-            public abstract void reset(Context ctx, int phase);
+            public abstract void reset(Context ctx, int phase, Destination ackDestination);
         }
 
         public abstract class InPort extends Port {
@@ -88,9 +166,9 @@ public class Process {
             }
 
             /** this port's peer (an OutPort) invokes this to have "recvToken" or equivalent inserted */
-            public abstract void recvToken(Context.LoopFactory loopfactory_at_output_dock);
+            public abstract void recvToken(LoopFactory loopfactory_at_output_dock);
             /** this port's peer (an OutPort) invokes this to have "sendWord" or equivalent inserted */
-            public abstract void sendWord(Context.LoopFactory loopfactory_at_output_dock);
+            public abstract void sendWord(LoopFactory loopfactory_at_output_dock);
 
             public int getTokensToAbsorb() { return 0; }
         }
@@ -108,9 +186,9 @@ public class Process {
             }
 
             /** this port's peer (an InPort) invokes this to have "sendToken" or equivalent inserted */
-            public abstract void sendToken(Context.LoopFactory loopfactory_at_input_dock);
+            public abstract void sendToken(LoopFactory loopfactory_at_input_dock);
             /** this port's peer (an InPort) invokes this to have "recvWord" or equivalent inserted */
-            public abstract void recvWord(Context.LoopFactory loopfactory_at_input_dock);
+            public abstract void recvWord(LoopFactory loopfactory_at_input_dock);
         }
 
         public final class DockInPort extends InPort {
@@ -125,62 +203,38 @@ public class Process {
                 this.count = count;
                 this.pattern = pattern;
             }
-            public void recvToken(Context.LoopFactory lf) { lf.recvToken(); }
-            public void sendWord(Context.LoopFactory lf) { lf.sendWord(dock.getDataDestination()); }
-            public void build(Context ctx) {
-                if (peer==null && pattern.length==1 && pattern[0]==null) return;
-                build(ctx, ctx.new LoopFactory(dock, 1));
-            }
+            public void recvToken(LoopFactory lf) { lf.recvToken(); }
+            public void sendWord(LoopFactory lf) { lf.sendWord(dock.getDataDestination()); }
+            public void build(Context ctx) { build(ctx, new LoopFactory(ctx, dock, 1)); }
             // number-in-flight is considered a property of the input dock in a pair
-            //public int getInflight() { return 4; }
-            public int getInflight() { return 1; }
+            public int getInflight() { return 4; }
+            //public int getInflight() { return 1; }
             public int getTokensToAbsorb() { return getInflight(); }
-            public void reset(Context ctx, int phase) {
-                if (dock.getShip().getType().equals("Debug")) {
-                    return;
-                }
-                if (peer==null && pattern.length==1 && pattern[0]==null) return;
-                switch(phase) {
-                    case 0: {
-                        torpedoes.add(dock);
-                        break;
-                    }
-                    case 1: {
-                        // FIXME
-                        reset_count++;
-                        Context.LoopFactory lf = ctx.new LoopFactory(dock, 1);
-                        lf.sendToken(fleet.getShip("Debug",0).getDock("in").getDataDestination());
-                        if (peer != null && peer instanceof DockOutPort) {
-                            DockOutPort dop_peer = (DockOutPort)peer;
-                            lf = lf.makeNext(0);
-                            lf.abortLoopIfTorpedoPresent();
-                            lf.recvToken();
-                            lf.sendToken(dop_peer.dock.getDataDestination());
-                        }
-                        break;
-                    }
-                    case 2: {
-                        if (peer != null && peer instanceof DockOutPort) {
-                            torpedoes.add(dock);
-                        }
-                        break;
-                    }
-                }
+            private boolean peerUsed() {
+                if (peer==null) return false;
+                for(int i=0; i<pattern.length; i++) if (pattern[i]==null) return true;
+                return false;
+            }
+            public void reset(Context ctx, int phase, Destination ackDestination) {
+                doReset(ctx, phase, dock, peer, ackDestination, peerUsed());
             }
-            protected void build(Context ctx, Context.LoopFactory lf) {
-                if (peer==null && pattern.length==1 && pattern[0]==null) return;
+            protected void build(Context ctx, LoopFactory lf) {
                 int inflight = (count != 0 && count < getInflight()) ? count : getInflight();
 
                 if (peer!=null)
                     for(int i=0; i<inflight; i++) peer.sendToken(lf);
 
-                lf = lf.makeNext(count);
+                lf = lf.makeNext(count, true);
                 for(int i=0; i<pattern.length; i++) {
                     if (pattern[i]==null) {
-                        lf.abortLoopIfTorpedoPresent();
-                        peer.recvWord(lf);
-                        lf.deliver();
-                        peer.sendToken(lf);
+                        if (peer!=null) {
+                            lf.abortLoopIfTorpedoPresent();
+                            peer.recvWord(lf);
+                            peer.sendToken(lf);
+                            lf.deliver();
+                        } else {
+                            lf.interruptibleNop();
+                        }
                     } else {
                         lf.literal(pattern[i]);
                         lf.abortLoopIfTorpedoPresent();
@@ -194,6 +248,9 @@ public class Process {
                     lf.recvToken();
                 }
             }
+            public BitVector getConstant(String constantName) {
+                return dock.getConstant(constantName);
+            }
         }
 
         public /*final*/ class DockOutPort extends OutPort {
@@ -201,57 +258,32 @@ public class Process {
             public final int count;
             public DockOutPort(String name, Dock dock) { this(name, dock, 0); }
             public DockOutPort(String name, Dock dock, int count) { super(name); this.dock = dock; this.count = count; }
-            public void sendToken(Context.LoopFactory lf) { lf.sendToken(dock.getDataDestination()); }
-            public void recvWord(Context.LoopFactory lf) { lf.recvWord(); }
-            public void build(Context ctx) { build(ctx, ctx.new LoopFactory(dock, 1)); }
-            protected void build(Context ctx, Context.LoopFactory lf) {
+            public void sendToken(LoopFactory lf) { lf.sendToken(dock.getDataDestination()); }
+            public void recvWord(LoopFactory lf) { lf.recvWord(); }
+            public void build(Context ctx) { build(ctx, new LoopFactory(ctx, dock, 1)); }
+            protected void build(Context ctx, LoopFactory lf) {
                 if (peer==null) return;
-                // FIXME: no setup?
                 lf = lf.makeNext(count);
                 lf.abortLoopIfTorpedoPresent();
                 peer.recvToken(lf);
-                //lf.abortLoopIfTorpedoPresent();  // FIXME: do I need this twice?
                 lf.collectWord();
                 peer.sendWord(lf);
-                // FIXME: cleanup
             }
-            public void reset(Context ctx, int phase) {
-                switch(phase) {
-                    case 0: {
-                        torpedoes.add(dock);
-                        break;
-                    }
-                    case 1: {
-                        reset_count++;
-                        Context.LoopFactory lf = ctx.new LoopFactory(dock, 1);
-
-                        if (peer != null) {
-                            for(int i=0; i<peer.getTokensToAbsorb(); i++) {
-                                lf.recvToken();
-                            }
-                            //lf.sendToken(dip_peer.dock.getInstructionDestination());
-                        }
-
-                        lf.sendToken(fleet.getShip("Debug",0).getDock("in").getDataDestination());
-                        lf = lf.makeNext(0);
-                        lf.abortLoopIfTorpedoPresent();
-                        lf.collectWord();
-                        break;
-                    }
-                    case 2: {
-                        torpedoes.add(dock);
-                        break;
-                    }
-                }
+            public void reset(Context ctx, int phase, Destination ackDestination) {
+                doReset(ctx, phase, dock, peer, ackDestination, true);
             }
         }
     }
 
-    private BitVector bv(long l) { return new BitVector(fleet.getWordWidth()).set(l); }
+    private static BitVector bv(long l) { return new BitVector(/*FIXME fleet.getWordWidth()*/37).set(l); }
+    private static BitVector[] bv(long[] l) {
+        BitVector[] ret = new BitVector[l.length];
+        for(int i=0; i<ret.length; i++) ret[i] = bv(l[i]);
+        return ret;
+    }
 
     /**
-     *  For every datum transmitted to in, pass it along to out and
-     *  deliver the constant at out.  Flow control in<->out is
+     *  Deliver the constant at out forever.  Flow control in<->out is
      *  maintained, but out is not flow-controlled, so be sure
      *  that every datum sent there is consumed synchronously wiht
      *  data items sent to out.
@@ -259,10 +291,10 @@ public class Process {
     public class ForeverModule extends Module {
         private BitVector bv;
         public final OutPort out = new OutPort("out") {
-                public void sendToken(Context.LoopFactory lf) { }
-                public void recvWord(Context.LoopFactory lf) { }
+                public void sendToken(LoopFactory lf) { }
+                public void recvWord(LoopFactory lf) { }
                 public void build(Context ctx) { }
-                public void reset(Context ctx, int phase) { }
+                public void reset(Context ctx, int phase, Destination ackDestination) { }
                 public void setPeer(InPort peer) {
                     this.peer = peer;
                     DockInPort pip = ((DockInPort)peer);
@@ -279,10 +311,10 @@ public class Process {
     public class OnceModule extends Module {
         private BitVector bv;
         public final OutPort out = new OutPort("out") {
-                public void sendToken(Context.LoopFactory lf) { }
-                public void recvWord(Context.LoopFactory lf) { }
+                public void sendToken(LoopFactory lf) { }
+                public void recvWord(LoopFactory lf) { }
                 public void build(Context ctx) { }
-                public void reset(Context ctx, int phase) { }
+                public void reset(Context ctx, int phase, Destination ackDestination) { }
                 public void setPeer(InPort peer) {
                     this.peer = peer;
                     DockInPort pip = ((DockInPort)peer);
@@ -324,7 +356,9 @@ public class Process {
         public  final OutPort out   = new DockOutPort("out", ship.getDock("out"));
         public  final InPort  val   = new DockInPort("in1",  ship.getDock("in1"));
         public  final InPort  count = new DockInPort("in2",  ship.getDock("in2"), 0, new BitVector[] { null, bv(1) });
-        public  final InPort  op    = new DockInPort("inOp", ship.getDock("inOp"), 0, new BitVector[] { bv(6 /*PASS_C2_V1*/), bv(10 /*DROP_C2_V1*/) } );
+        public  final InPort  op    = new DockInPort("inOp", ship.getDock("inOp"), 0, new BitVector[] {
+                ship.getDock("inOp").getConstant("PASS_C2_V1"),
+                ship.getDock("inOp").getConstant("DROP_C2_V1") } );
         public UnPunctuatorModule() { }
     }
 
@@ -333,7 +367,9 @@ public class Process {
         private final Ship    ship  = pool.allocateShip("Counter");
         public  final OutPort out   = new DockOutPort("out", ship.getDock("out"));
         public  final InPort  val   = new DockInPort("in1",  ship.getDock("in1"));
-        public  final InPort  op    = new DockInPort("inOp", ship.getDock("inOp"), 0, new BitVector[] { bv(6 /*PASS_C2_V1*/), bv(7 /*PASS_C2_V2*/) } );
+        public  final InPort  op    = new DockInPort("inOp", ship.getDock("inOp"), 0, new BitVector[] {
+                ship.getDock("inOp").getConstant("PASS_C2_V1"),
+                ship.getDock("inOp").getConstant("PASS_C2_V2") } );
         public  final InPort  count;
         public PunctuatorModule(long punc) {
             this.punc = punc;
@@ -354,7 +390,8 @@ public class Process {
         public final Ship    ship  = pool.allocateShip("Counter");
         public final InPort  start = new DockInPort("in1",  ship.getDock("in1"));
         public final InPort  incr  = new DockInPort("in2",  ship.getDock("in2"));
-        public final InPort  inOp  = new DockInPort("inOp", ship.getDock("inOp"), 0, new BitVector[] { bv(12 /*COUNTDOWN*/) });
+        public final InPort  inOp  = new DockInPort("inOp", ship.getDock("inOp"), 0, new BitVector[] {
+                ship.getDock("inOp").getConstant("COUNT") });
         public final OutPort out   = new DockOutPort("out", ship.getDock("out"));
         public DownCounterModule() { }
     }
@@ -363,7 +400,8 @@ public class Process {
         public final Ship    ship   = pool.allocateShip("Counter");
         public final InPort  count  = new DockInPort("in1",  ship.getDock("in1"));
         public final InPort  val    = new DockInPort("in2",  ship.getDock("in2"));
-        public final InPort  inOP   = new DockInPort("inOp", ship.getDock("inOp"), 0, new BitVector[] { bv(1 /*REPEAT_C1_V2*/) });
+        public final InPort  inOP   = new DockInPort("inOp", ship.getDock("inOp"), 0, new BitVector[] {
+                ship.getDock("inOp").getConstant("REPEAT_C1_V2") });
         public final OutPort out    = new DockOutPort("out", ship.getDock("out"));
         public RepeatModule() { }
     }
@@ -372,13 +410,15 @@ public class Process {
         public final Ship    ship = pool.allocateShip("Alu");
         public final InPort  in1  = new DockInPort("in1",  ship.getDock("in1"));
         public final InPort  in2  = new DockInPort("in2",  ship.getDock("in2"));
-        public final InPort  inOp = new DockInPort("inOp", ship.getDock("inOp"), 0, new BitVector[] { bv(9 /*MAXMERGE*/) });
+        public final InPort  inOp = new DockInPort("inOp", ship.getDock("inOp"), 0, new BitVector[] {
+                ship.getDock("inOp").getConstant("MAXMERGE") });
         public final OutPort out  = new DockOutPort("out", ship.getDock("out"));
         public SortedMergeModule() { }
     }
 
     public class MemoryModule extends Module {
         public final Ship    ship;
+        public final InPort  inCBD;
         public final InPort  inAddrRead1;
         public final InPort  inAddrRead2;
         public final InPort  inAddrWrite;
@@ -388,46 +428,39 @@ public class Process {
         public final OutPort outWrite;
         public MemoryModule(Ship memoryShip) {
             this.ship = memoryShip;
+            this.inCBD        = ship.getType().equals("Memory") ? new DockInPort("inCBD", ship.getDock("inCBD")) : null;
             this.inAddrWrite  = new DockInPort("inAddrWrite", ship.getDock("inAddrWrite"));
             this.inDataWrite  = new DockInPort("inDataWrite", ship.getDock("inDataWrite"));
             this.inAddrRead1  = new InPort("inAddrRead1") {
-                    public void recvToken(Context.LoopFactory lf) { lf.recvToken(); }
-                    public void sendWord(Context.LoopFactory lf) { lf.sendWord(ship.getDock("inAddrRead").getDataDestination(), new BitVector(1).set(0)); }
+                    public void recvToken(LoopFactory lf) { lf.recvToken(); }
+                    public void sendWord(LoopFactory lf) { lf.sendWord(ship.getDock("inAddrRead").getDataDestination(), new BitVector(1).set(0)); }
                     public void build(Context ctx) { }
                     public int getTokensToAbsorb() { return outRead1.peer.getTokensToAbsorb(); }
-                    public void reset(Context ctx, int phase) {
-                        if (phase==2) {
-                            torpedoes.add(ship.getDock("inAddrRead"));
-                            torpedoes.add(ship.getDock("out"));
-                        }
+                    public void reset(Context ctx, int phase, Destination ackDestination) {
+                        doReset(ctx, phase, ship.getDock("inAddrRead"), null, ackDestination, false);
                     }
                 };
             this.inAddrRead2  = new InPort("inAddrRead2") {
-                    public void recvToken(Context.LoopFactory lf) { lf.recvToken(); }
-                    public void sendWord(Context.LoopFactory lf) { lf.sendWord(ship.getDock("inAddrRead").getDataDestination(), new BitVector(1).set(1)); }
+                    public void recvToken(LoopFactory lf) { lf.recvToken(); }
+                    public void sendWord(LoopFactory lf) { lf.sendWord(ship.getDock("inAddrRead").getDataDestination(), new BitVector(1).set(1)); }
                     public void build(Context ctx) { }
                     public int getTokensToAbsorb() { return outRead2.peer.getTokensToAbsorb(); }
-                    public void reset(Context ctx, int phase) {
-                        if (phase==2) {
-                            torpedoes.add(ship.getDock("inAddrRead"));
-                            torpedoes.add(ship.getDock("out"));
-                        }
-                    }
+                    public void reset(Context ctx, int phase, Destination ackDestination) { }
                 };
             this.outRead1 = new OutPort("outRead1") {
-                    public void sendToken(Context.LoopFactory lf) { inAddrRead1.peer.sendToken(lf); }
-                    public void recvWord(Context.LoopFactory lf) { lf.recvWord(); }
+                    public void sendToken(LoopFactory lf) { inAddrRead1.peer.sendToken(lf); }
+                    public void recvWord(LoopFactory lf) { lf.recvWord(); }
                     public void build(Context ctx) { }
-                    public void reset(Context ctx, int phase) { }
+                    public void reset(Context ctx, int phase, Destination ackDestination) { }
                 };
             this.outRead2 = new OutPort("outRead2") {
-                    public void sendToken(Context.LoopFactory lf) { inAddrRead2.peer.sendToken(lf); }
-                    public void recvWord(Context.LoopFactory lf) { lf.recvWord(); }
+                    public void sendToken(LoopFactory lf) { inAddrRead2.peer.sendToken(lf); }
+                    public void recvWord(LoopFactory lf) { lf.recvWord(); }
                     public void build(Context ctx) { }
-                    public void reset(Context ctx, int phase) { }
+                    public void reset(Context ctx, int phase, Destination ackDestination) { }
                 };
             this.outWrite = new DockOutPort("out", ship.getDock("out")) {
-                    protected void build(Context ctx, Context.LoopFactory lf) {
+                    protected void build(Context ctx, LoopFactory lf) {
                         lf = lf.makeNext(0);
                         lf.abortLoopIfTorpedoPresent();
                         lf.collectWord();
@@ -435,6 +468,8 @@ public class Process {
                         lf.setFlags(FlagFunction.ZERO, FlagFunction.ZERO.add(FlagC));
                         if (this.peer != null) {
                             lf.setPredicate(Predicate.FlagB);
+                            lf.literal(77);
+                            lf.abortLoopIfTorpedoPresent();
                             this.peer.recvToken(lf);
                             this.peer.sendWord(lf);
                         }
@@ -457,9 +492,9 @@ public class Process {
         }
         public void build(Context ctx) {
             super.build(ctx);
-            Context.LoopFactory lf;
+            LoopFactory lf;
 
-            lf = ctx.new LoopFactory(ship.getDock("inAddrRead"), 0);
+            lf = new LoopFactory(ctx, ship.getDock("inAddrRead"), 0);
             lf.abortLoopIfTorpedoPresent();
             lf.recvWord();
             lf.setFlags(FlagFunction.ZERO.add(FlagC), FlagFunction.ZERO);
@@ -474,168 +509,120 @@ public class Process {
 
     public static void main(String[] s) throws Exception {
         Fleet fleet = new Fpga();
+        //Fleet fleet = new Interpreter(false);
+
         Random random = new Random(System.currentTimeMillis());
         long[] vals = new long[256];
         for(int i=0; i<vals.length; i++) {
             vals[i] = Math.abs(random.nextInt());
         }
 
+        Ship mem1 = fleet.getShip("Memory", 0);
+        Ship mem2 = fleet.getShip("Memory", 1);
+        //Ship mem2 = fleet.getShip("DDR2", 0);
+
         FleetProcess fp;
         int stride = 1;
         fp = null;
-        while(stride < vals.length) {
-            if (fp==null) fp = fleet.run(new Instruction[0]);
-            System.out.println("stride " + stride);
-            vals = mergeSort(fp, fleet, vals, stride);
-            stride = stride * 2;
-            //fp.terminate(); fp = null;
-            System.out.println();
-            System.out.println("results:");
-            for(int i=0; i<vals.length; i++)
-                System.out.println(vals[i]);
-        }
-    }
 
-    // won't verify that the switch fabric is empty, however
-    public static void verifyClean(FleetProcess fp) {
-        Ship debug   = fp.getFleet().getShip("Debug", 0);
-        Dock debugIn = debug.getDock("in");
+        fp = fleet.run(new Instruction[0]);
+        MemoryUtils.writeMem(fp, mem1, 0, bv(vals));
+        int vals_length = vals.length;
 
-        Context ctx;
-        Context.LoopFactory lf;
+        // Disable readback/writeback inside the loop
+        vals = null;
 
-        ctx = new Context(fp.getFleet());
-        lf = ctx.new LoopFactory(debugIn, 1);
-        lf.literal(12);
-        lf.deliver();
-        lf.literal(5);
-        lf.deliver();
-        ArrayList<Instruction> ai = new ArrayList<Instruction>();
-        ctx.emit(ai);
-        for(Instruction ins : ai) fp.sendInstruction(ins);
-        fp.flush();
+        while(stride < vals_length) {
+            
+            // reset the FleetProcess
+            //fp.terminate(); fp = null;
 
-        System.out.println("checking debug.in");
-        if (fp.recvWord().toLong() != 12) throw new RuntimeException("debug dock not properly initialized");
-        if (fp.recvWord().toLong() != 5)  throw new RuntimeException("debug dock not properly initialized");
+            System.out.println("stride " + stride);
 
-        long k = 0;
-        for(Ship ship : fp.getFleet())
-            if (!"Debug".equals(ship.getType()))
-                for (Dock dock : ship) {
-                    System.out.println("checking " + dock);
+            // if we reset the FleetProcess, restart it
+            if (fp==null) fp = fleet.run(new Instruction[0]);
 
-                    k = (k + 23) % 65535;
-                    ctx = new Context(fp.getFleet());
+            // do the mergeSort
+            vals = mergeSort(fp, fleet, vals, vals_length, stride, mem1, mem2);
 
-                    boolean reverse = (k%2)==0;
+            // verify the cleanup
+            //CleanupUtils.verifyClean(fp);
 
-                    lf = ctx.new LoopFactory(debugIn, 2);
-                    lf.recvToken();
-                    lf.setFlags(FlagFunction.ZERO.add(FlagC), FlagFunction.ZERO);
-                    lf.setPredicate(Predicate.NotFlagA);
-                    lf.literal(k);
-                    lf.setPredicate(Predicate.FlagA);
-                    lf.literal(k+1);
-                    lf.setPredicate(null);
-                    lf.deliver();
-
-                    lf = ctx.new LoopFactory(dock, 1);
-                    lf.sendToken(dock.getDataDestination(), new BitVector(1).set(reverse ? 1 : 0));
-                    lf.sendToken(dock.getDataDestination(), new BitVector(1).set(reverse ? 0 : 1));
-                    lf = lf.makeNext(2);
-                    lf.recvToken();
-                    lf.setFlags(FlagFunction.ZERO.add(FlagC), FlagFunction.ZERO);
-                    lf.setPredicate(Predicate.NotFlagA);
-                    lf.sendToken(debugIn.getDataDestination(), new BitVector(1).set(0));
-                    lf.setPredicate(Predicate.FlagA);
-                    lf.sendToken(debugIn.getDataDestination(), new BitVector(1).set(1));
-                    lf.setPredicate(null);
-
-                    ai = new ArrayList<Instruction>();
-                    ctx.emit(ai);
-                    for(Instruction ins : ai) fp.sendInstruction(ins);
-                    fp.flush();
-
-                    long kk;
-                    kk = fp.recvWord().toLong();
-                    if (kk != (reverse ? k+1 : k))
-                        throw new RuntimeException(dock+" not properly initialized (1)");
-                    kk = fp.recvWord().toLong();
-                    if (kk != (reverse ? k   : k+1))
-                        throw new RuntimeException(dock+" not properly initialized (2)");
-                }
+            Ship mem = mem1; mem1=mem2; mem2=mem;
+
+            stride = stride * 2;
+            System.out.println();
+        }
+
+        BitVector[] bvs = new BitVector[vals_length];
+        MemoryUtils.readMem(fp, mem1, 0, bvs);
+        System.out.println("results:");
+        for(int i=0; i<vals_length; i++)
+            System.out.println(bvs[i].toLong());
     }
 
-    // FIXME: numbers seem to get duplicated when stride=2
-    public static long[] mergeSort(FleetProcess fp, Fleet fleet, long[] vals, int stride_length) throws Exception {
 
-        BitVector[] mem = new BitVector[vals.length];
-        for(int i=0; i<mem.length; i++) mem[i] = new BitVector(fleet.getWordWidth()).set(vals[i]);
+    public static long[] mergeSort(FleetProcess fp, Fleet fleet,
+                                   long[] vals, int vals_length, int stride_length,
+                                   Ship memoryShip1, Ship memoryShip2) throws Exception {
 
-        Ship memoryShip = fleet.getShip("DRAM", 0);
-        Gadgets.writeMem(fp, memoryShip, 0, mem);
+        if (vals != null) {
+            BitVector[] mem = new BitVector[vals_length];
+            for(int i=0; i<mem.length; i++) mem[i] = new BitVector(fleet.getWordWidth()).set(vals[i]);
+            MemoryUtils.writeMem(fp, memoryShip1, 0, mem);
+        }
 
         //////////////////////////////////////////////////////////////////////////////
 
         Process proc = new Process(fleet);
         DebugModule dm = proc.new DebugModule();
 
-        int end_of_data = vals.length;
+        int end_of_data = vals_length;
         int num_strides = end_of_data / (stride_length * 2);
 
-        MemoryModule mm = proc.new MemoryModule(memoryShip);
+        MemoryModule mm  = proc.new MemoryModule(memoryShip1);
         SortedMergeModule sm = proc.new SortedMergeModule();
 
-
+        // So far: we have four spare Counter ships; one can be used for resetting
         for(int i=0; i<2; i++) {
 
-            Module.OutPort stride_length_1       = proc.new OnceModule(stride_length).out;
-            Module.OutPort stride_length_2       = proc.new OnceModule(stride_length).out;
-            Module.OutPort twice_stride_length_1 = proc.new OnceModule(stride_length*2).out;
-            Module.OutPort end_of_data_1         = proc.new OnceModule(end_of_data + i*stride_length).out;
-
-            Module.OutPort num_strides_1         = proc.new OnceModule(num_strides).out;
-            Module.OutPort num_strides_2         = proc.new OnceModule(num_strides).out;
-
-            RepeatModule r0 = proc.new RepeatModule();
-            RepeatModule r1 = proc.new RepeatModule();
-            RepeatModule r2 = proc.new RepeatModule();
             DownCounterModule c0 = proc.new DownCounterModule();
             DownCounterModule c1 = proc.new DownCounterModule();
-            ForeverModule fmm  = proc.new ForeverModule(1);
-            ForeverModule fmm2 = proc.new ForeverModule(2); // 2=ADD
-            AluModule alu = proc.new AluModule();
 
-            stride_length_1.connect(r0.val);
-            num_strides_1.connect(r0.count);
-            r0.out.connect(c0.start);
-            fmm.out.connect(c0.incr);
-            c0.out.connect(alu.in2);
-
-            end_of_data_1.connect(c1.start);
-            twice_stride_length_1.connect(c1.incr);
-            c1.out.connect(r1.val);
-            stride_length_2.connect(r2.val);
-            num_strides_2.connect(r2.count);
-            r2.out.connect(r1.count);
-            r1.out.connect(alu.in1);
-            fmm2.out.connect(alu.inOp);
+            c0.start.connect(proc.new ForeverModule(stride_length).out);
+            c0.incr.connect(proc.new ForeverModule(1).out);
+
+            c1.start.connect(proc.new OnceModule(end_of_data + i*stride_length).out);
+            c1.incr.connect(proc.new OnceModule(stride_length*2).out);
 
+            RepeatModule r1 = proc.new RepeatModule();
+            r1.val.connect(c1.out);
+            r1.count.connect(proc.new ForeverModule(stride_length).out);
+
+            AluModule alu = proc.new AluModule();
+            alu.in1.connect(r1.out);
+            alu.in2.connect(c0.out);
+            alu.inOp.connect(proc.new ForeverModule(((Module.DockInPort)alu.inOp).getConstant("ADD")).out);
             alu.out.connect(i==0 ? mm.inAddrRead1 : mm.inAddrRead2);
 
-            ForeverModule fm = proc.new ForeverModule(stride_length);
             PunctuatorModule punc = proc.new PunctuatorModule(-1);
-            fm.out.connect(punc.count);
-            (i==0 ? mm.outRead1 : mm.outRead2).connect(punc.val);
+            punc.count.connect(proc.new ForeverModule(stride_length).out);
+            punc.val.connect(i==0 ? mm.outRead1 : mm.outRead2);
             punc.out.connect(i==0 ? sm.in1 : sm.in2);
         }
 
-        ForeverModule fm = proc.new ForeverModule(2*stride_length);
         UnPunctuatorModule unpunc = proc.new UnPunctuatorModule();
-        sm.out.connect(unpunc.val);
-        fm.out.connect(unpunc.count);
-        unpunc.out.connect(dm.in);
+        unpunc.val.connect(sm.out);
+        unpunc.count.connect(proc.new ForeverModule(2*stride_length).out);
+
+        DownCounterModule cw = proc.new DownCounterModule();
+        cw.start.connect(proc.new OnceModule(end_of_data).out);
+        cw.incr.connect(proc.new OnceModule(1).out);
+
+        MemoryModule mm2 = proc.new MemoryModule(memoryShip2);
+        mm2.inAddrWrite.connect(cw.out);
+        mm2.inDataWrite.connect(unpunc.out);
+        mm2.outWrite.connect(dm.in);
 
         //////////////////////////////////////////////////////////////////////////////
 
@@ -646,22 +633,17 @@ public class Process {
         proc.build(ctx);
         ctx.emit(ai);
         for(Instruction ins : ai) {
-            System.out.println(ins);
+            //System.out.println(ins);
             fp.sendInstruction(ins);
         }
         fp.flush();
 
-        System.out.println("reading back...");
-        int inc=0;
-        for(int i=0; i<vals.length; i++) {
-            inc++;
-            BitVector bv = fp.recvWord();
-            System.out.println("\r"+bv + " " + bv.toLong() + "          #read="+inc);
-            mem[i] = bv;
+        for(int i=0; i<vals_length; i++) {
+            System.out.print("\rreading back... " + i+"/"+vals_length+"  ");
+            BitVector rec = fp.recvWord();
+            System.out.print(" (prev result: " + rec + " = " + rec.toLong() + ")");
         }
         System.out.println("\rdone.                                                                    ");
-        long[] ret = new long[vals.length];
-        for(int i=0; i<ret.length; i++) ret[i] = mem[ret.length-i-1].toLong();
 
         //if (true) return ret;
 
@@ -669,52 +651,74 @@ public class Process {
         Dock debugIn = fleet.getShip("Debug",0).getDock("in");
         Dock fred = debugIn;
         fp.sendToken(debugIn.getInstructionDestination());
+        fp.flush();
 
-        Context.LoopFactory lf = ctx2.new LoopFactory(debugIn, 0);
+        LoopFactory lf = new LoopFactory(ctx2, debugIn, 0);
         lf.literal(0);
         lf.abortLoopIfTorpedoPresent();
         lf.recvToken();
         lf.deliver();
 
-        ctx2.emit(ai = new ArrayList<Instruction>());
-        for(Instruction ins : ai)
-            fp.sendInstruction(ins);
+        ctx2.dispatch(fp);
         fp.flush();
 
         int count = 0;
 
-        for(int phase=0; phase<=2; phase++) {
+        Ship counter = proc.pool.allocateShip("Counter");
+
+        for(int phase=0; phase<=3; phase++) {
             System.out.println("== phase "+phase+" ==================================================================");
             ctx2 = new Context(fp.getFleet());
-            proc.reset(ctx2, phase);
+
+            Destination ackDestination = counter.getDock("in2").getDataDestination();
+            proc.reset(ctx2, phase, ackDestination);
+
+            Context ctx3 = new Context(fp.getFleet());
+            lf = new LoopFactory(ctx3, counter.getDock("inOp"), 1);
+            lf.literal("DROP_C1_V2");
+            lf.deliver();
+            lf.literal(5);
+            lf.deliver();
+            lf = new LoopFactory(ctx3, counter.getDock("in1"), 1);
+            lf.literal(reset_count-1);
+            lf.deliver();
+            lf.literal(1);
+            lf.deliver();
+            lf = new LoopFactory(ctx3, counter.getDock("in2"), 0);
+            lf.abortLoopIfTorpedoPresent();
+            lf.recvWord();
+            lf.deliver();
+            lf = new LoopFactory(ctx3, counter.getDock("out"), 1);
+            lf.collectWord();
+            lf.sendToken(counter.getDock("in2").getInstructionDestination());  // HACK: we don't check to make sure this hits
+            lf.sendToken(debugIn.getDataDestination());
+            ctx3.dispatch(fp);  // HACK: we don't check to make sure that this is "firmly in place"
+
             for(Dock dock : torpedoes) fp.sendToken(dock.getInstructionDestination());
-            ctx2.emit(ai = new ArrayList<Instruction>());
-            for(Instruction ins : ai) fp.sendInstruction(ins);
+            ctx2.dispatch(fp);
             fp.flush();
             System.out.println("flushed");
-            for(int ii=0; ii<reset_count; ii++)
-                System.out.print("\r phase "+phase+" ==> " + fp.recvWord().toLong() + " " + (ii+1) + " / " + reset_count);
+
+            fp.recvWord();
+            System.out.println("phase done");
+
             System.out.println();
         }
 
-        /*
-        ctx2 = new Context(fp.getFleet());
-        ai = new ArrayList<Instruction>();
-        for(Ship ship : ctx.allocatedShips)
-            if (!ship.getType().equals("Debug"))
-                for(Dock dock : ship)
-                    if (dock.isInputDock()) {
-                        lf = ctx2.new LoopFactory(dock, 0);
-                        lf.recvWord();
-                        
-                    }
-        */
-
         fp.sendToken(debugIn.getInstructionDestination());
         fp.flush();
 
         //System.out.println("verifying cleanup:");
-        //verifyClean(fp);
+        //CleanupUtils.verifyClean(fp);
+
+        System.out.println("reading back:");
+        long[] ret = null;
+        if (vals != null) {
+            ret = new long[vals_length];
+            BitVector[] mem = new BitVector[vals_length];
+            MemoryUtils.readMem(fp, memoryShip2, 0, mem);
+            for(int i=0; i<ret.length; i++) ret[i] = mem[i].toLong();
+        }
         return ret;
     }