single-mode Alu3
[fleet.git] / ships / Alu3.ship
index f88fbb8..8c8cec6 100644 (file)
@@ -6,31 +6,51 @@ data  in:   in2
 data  in:   in3
 
 data  out:  out1
+  shortcut to: in1
 data  out:  out2
+  shortcut to: in2
+data  out:  out3
+  shortcut to: in3
+data  out:  outBits
 
 == Constants ========================================================
 == TeX ==============================================================
 
-This ship performs addition of three inputs, producing two output
-values in carry-save form.  To complete the addition, send the two
-output values to an Alu2 with opcode ADD.  For summing a set of four
-or more numbers, Alu3 followed by Alu2 is often faster than repeated
-use of Alu2.
-
 == Fleeterpreter ====================================================
+boolean mode = false;
+BitFifo.BitStorage outBits = new BitFifo.BitStorage(74);
 public void service() {
   if (box_in1.dataReadyForShip() &&
       box_in2.dataReadyForShip() &&
       box_in3.dataReadyForShip() &&
+      outBits.hasSpace(1) &&
       box_out1.readyForDataFromShip() &&
-      box_out2.readyForDataFromShip()) {
-      long v1     = box_in1.removeDataForShip();
-      long v2     = box_in2.removeDataForShip();
-      long v3     = box_in3.removeDataForShip();
-      long o1     = ((v1 & v2) | (v2 & v3) | (v1 & v3)) << 1;
-      long o2     = v1 ^ v2 ^ v3;
+      box_out2.readyForDataFromShip() &&
+      box_out3.readyForDataFromShip()) {
+      long v1 = box_in1.removeDataForShip();
+      long v2 = box_in2.removeDataForShip();
+      long v3 = box_in3.removeDataForShip();
+      long o1, o2, o3;
+      mode = true;
+      if (!mode) {
+        o1 = v1;
+        o2 = v2 >>> 1;
+        o3 = ((v2 & 0x1L) == 0) ? 0 : v1;
+      } else {
+        o1 = ((v1 & v2) | (v2 & v3) | (v1 & v3))/* << 1*/;
+        o2 = (v1 ^ v2 ^ v3) >> 1;
+        o3 = 0;
+        outBits.add((v1 ^ v2 ^ v3) & 0x1L, 1);
+      }
       box_out1.addDataFromShip(o1);
       box_out2.addDataFromShip(o2);
+      box_out3.addDataFromShip(o3);
+      mode = !mode;
+  }
+
+  if (box_outBits.readyForDataFromShip() &&
+      outBits.size() >= 37) {
+      box_outBits.addDataFromShip(outBits.get(37));
   }
 }
 
@@ -38,45 +58,94 @@ public void service() {
 
 == FPGA ==============================================================
 
-  reg                    have_a;
-  reg [(`DATAWIDTH-1):0] a;
-  reg                    have_b;
-  reg [(`DATAWIDTH-1):0] b;
-  reg                    have_c;
-  reg [(`DATAWIDTH-1):0] c;
-  reg                    have_out1;
-  reg                    have_out2;
+  reg                    mode;         initial mode = 0;
+  reg                    have_in1;     initial have_in1 = 0;
+  reg                    have_in2;     initial have_in2 = 0;
+  reg                    have_in3;     initial have_in3 = 0;
+  reg [(`DATAWIDTH-1):0] keep_in1;     initial keep_in1 = 0;
+  reg [(`DATAWIDTH-1):0] keep_in2;     initial keep_in2 = 0;
+  reg [(`DATAWIDTH-1):0] keep_in3;     initial keep_in3 = 0;
+  reg                    have_out1;    initial have_out1 = 0;
+  reg                    have_out2;    initial have_out2 = 0;
+  reg                    have_out3;    initial have_out3 = 0;
+  reg [73:0] bitstorage;
+  reg [7:0] bitstorage_count;          initial bitstorage_count = 0;
+  reg wrote;                           initial wrote = 0;
 
   always @(posedge clk) begin
+    wrote = 0;
     if (have_out1) begin
       `onwrite(out1_r, out1_a) have_out1 <= 0; end
-    end
-    if (have_out2) begin
+    end else if (have_out2) begin
       `onwrite(out2_r, out2_a) have_out2 <= 0; end
-    end
-
-    if (!have_out1 && !have_out2) begin
-      if (!have_a) begin
-        `onread(in1_r, in1_a) have_a <= 1; a <= in1_d; end
-        end
-      if (!have_b) begin
-        `onread(in2_r, in2_a) have_b <= 1; b <= in2_d; end
+    end else if (have_out3) begin
+      `onwrite(out3_r, out3_a) have_out3 <= 0; end
+    end else if (!have_in1) begin
+      `onread(in1_r, in1_a) have_in1 <= 1; keep_in1 <= in1_d; end
+    end else if (!have_in2) begin
+      `onread(in2_r, in2_a) have_in2 <= 1; keep_in2 <= in2_d; end
+    end else if (!have_in3) begin
+      `onread(in3_r, in3_a) have_in3 <= 1; keep_in3 <= in3_d; end
+    end else begin
+        mode = 1;
+        if (mode == 0) begin
+          out1_d    <= keep_in1;
+          out2_d    <= { 1'b0, keep_in2[(`DATAWIDTH-1):1] };
+          out3_d    <= (keep_in2[0]==0) ? 0 : keep_in1;
+        end else begin
+          out1_d           <= { ((keep_in1 & keep_in2) | (keep_in2 & keep_in3) | (keep_in1 & keep_in3)) };
+          out2_d                       <= (keep_in1 ^ keep_in2 ^ keep_in3) >> 1;
+          out3_d                       <= 0;
+          bitstorage_count             <= bitstorage_count+1;
+          bitstorage[bitstorage_count] = (keep_in1[0] ^ keep_in2[0] ^ keep_in3[0]);
+          outBits_d                    = bitstorage[(`DATAWIDTH-1):0];
+          wrote = 1;
         end
-      if (!have_c) begin
-        `onread(in3_r, in3_a) have_c <= 1; c <= in3_d; end
-        end
-  
-      if (have_a && have_b && have_c) begin
-        out1_d    <= ((a & b) | (b & c) | (a & c)) << 1;
-        out2_d    <= a ^ b ^ c;
         have_out1 <= 1;
         have_out2 <= 1;
-      end
+        have_out3 <= 1;
+        have_in1  <= 0;
+        have_in2  <= 0;
+        have_in3  <= 0;
+//        mode <= ~mode;
     end
+
+    if (!wrote && bitstorage_count >= `DATAWIDTH) begin
+      `onwrite(outBits_r, outBits_a) begin
+        bitstorage_count <= bitstorage_count - `DATAWIDTH;
+        bitstorage       = bitstorage >> `DATAWIDTH;
+      end
+    end    
   end
+end
+
+
+== Test ========================================================================
+#expect 100488372224
+#expect 8
+
 
+#ship alu3  : Alu3
+#ship debug : Debug
 
+1000000:       sendto alu3.in1;
+1200000:       sendto alu3.in2;
+0:             sendto alu3.in3;
+0:             sendto alu3.in1;
+0:             sendto alu3.in2;
+alu3.in1:      [*] take, deliver;
+alu3.in2:      [*] take, deliver;
+alu3.in3:      [*] take, deliver;
+alu3.out1:     [74] take, sendto alu3.in1;
+               [74] take, sendto alu3.in1;
+alu3.out2:     [74] take, sendto alu3.in2;
+               [74] take, sendto alu3.in2;
+alu3.out3:     [74] take, sendto alu3.in3;
+               [74] take, sendto alu3.in3;
+alu3.outBits:  [2] take, sendto debug.in;
+debug.in:      [*] take, deliver;
 
 
 == Contributors =========================================================
+Amir Kamil <kamil@cs.berkeley.edu>
 Adam Megacz <megacz@cs.berkeley.edu>