shortcut to: in1
data out: out2
shortcut to: in2
-data out: out3
- shortcut to: in3
data out: outBits
== Constants ========================================================
== TeX ==============================================================
+{\tt Alu3} is a three-input adder which produces a pair of outputs in
+carry-save form. It has no opcode input.
+
+This ship also contains a private ``bit fifo'' similar to the {\tt
+BitFifo} ship, except that only the dequeueing (output) interface is
+exposed to the programmer. Each addition operation performed causes
+the lowest bit of the {\it save} output to be enqueued into the bit
+fifo. This can be used to produce a very efficient multiplier; see
+the test case for this ship for more details.
+
+\subsection*{Semantics}
+
+When a value is present at each of {\tt in1}, {\tt in2} and {\tt in3},
+these three values are consumed. The {\it carry} result of carry-save
+addition is placed in {\tt out1}, and the {\it save} result of
+carry-save addition is placed in {\tt out2}.
+
+\subsection*{To Do}
+
+Is the second output supposed to be shifted?
+
+Provide a way to clear/flush the internal bitfifo.
+
+Do we even need this? Can we do the same thing with {\tt Lut3} and
+{\tt BitFifo} together?
+
+
== Fleeterpreter ====================================================
boolean mode = false;
BitFifo.BitStorage outBits = new BitFifo.BitStorage(74);
box_in3.dataReadyForShip() &&
outBits.hasSpace(1) &&
box_out1.readyForDataFromShip() &&
- box_out2.readyForDataFromShip() &&
- box_out3.readyForDataFromShip()) {
+ box_out2.readyForDataFromShip()) {
long v1 = box_in1.removeDataForShip();
long v2 = box_in2.removeDataForShip();
long v3 = box_in3.removeDataForShip();
long o1, o2, o3;
o1 = ((v1 & v2) | (v2 & v3) | (v1 & v3))/* << 1*/;
o2 = (v1 ^ v2 ^ v3) >> 1;
- o3 = 0;
outBits.add((v1 ^ v2 ^ v3) & 0x1L, 1);
box_out1.addDataFromShip(o1);
box_out2.addDataFromShip(o2);
- box_out3.addDataFromShip(o3);
}
}
== FPGA ==============================================================
- reg mode; initial mode = 0;
- reg have_in1; initial have_in1 = 0;
- reg have_in2; initial have_in2 = 0;
- reg have_in3; initial have_in3 = 0;
- reg [(`DATAWIDTH-1):0] keep_in1; initial keep_in1 = 0;
- reg [(`DATAWIDTH-1):0] keep_in2; initial keep_in2 = 0;
- reg [(`DATAWIDTH-1):0] keep_in3; initial keep_in3 = 0;
- reg have_out1; initial have_out1 = 0;
- reg have_out2; initial have_out2 = 0;
- reg have_out3; initial have_out3 = 0;
- reg [73:0] bitstorage; initial bitstorage = 0;
+ reg [73:0] bitstorage; initial bitstorage = 0;
reg [7:0] bitstorage_count; initial bitstorage_count = 0;
- reg wrote; initial wrote = 0;
always @(posedge clk) begin
- wrote = 0;
- if (have_out1) begin
- `onwrite(out1_r, out1_a) have_out1 <= 0; end
- end else if (have_out2) begin
- `onwrite(out2_r, out2_a) have_out2 <= 0; end
- end else if (have_out3) begin
- `onwrite(out3_r, out3_a) have_out3 <= 0; end
- end else if (!have_in1) begin
- `onread(in1_r, in1_a) have_in1 <= 1; keep_in1 <= in1_d; end
- end else if (!have_in2) begin
- `onread(in2_r, in2_a) have_in2 <= 1; keep_in2 <= in2_d; end
- end else if (!have_in3) begin
- `onread(in3_r, in3_a) have_in3 <= 1; keep_in3 <= in3_d; end
- end else if (bitstorage_count >= `DATAWIDTH) begin
- outBits_d = bitstorage[(`DATAWIDTH-1):0];
- `onwrite(outBits_r, outBits_a)
- bitstorage_count <= 0;
- end
+ if (!rst) begin
+ `reset
+ bitstorage <= 0;
+ bitstorage_count <= 0;
end else begin
- out1_d <= { ((keep_in1 & keep_in2) | (keep_in2 & keep_in3) | (keep_in1 & keep_in3)) };
- out2_d <= { 1'b0, (keep_in1[(`DATAWIDTH-1):1] ^
- keep_in2[(`DATAWIDTH-1):1] ^
- keep_in3[(`DATAWIDTH-1):1]) };
- out3_d <= 0;
- bitstorage[bitstorage_count] = (keep_in1[0] ^ keep_in2[0] ^ keep_in3[0]);
- bitstorage_count <= bitstorage_count+1;
- have_out1 <= 1;
- have_out2 <= 1;
- have_out3 <= 1;
- have_in1 <= 0;
- have_in2 <= 0;
- have_in3 <= 0;
+ if (out1_r && out1_a) out1_r <= 0;
+ if (out2_r && out2_a) out2_r <= 0;
+ if (outBits_r && outBits_a) outBits_r <= 0;
+ if (!in1_r && in1_a) in1_a <= 0;
+ if (!in2_r && in2_a) in2_a <= 0;
+ if (!in3_r && in3_a) in3_a <= 0;
+ if (!out1_r && !out2_r && !outBits_r && in1_r && in2_r && in3_r) begin
+ out1_d <= { ((in1_d & in2_d)
+ | (in2_d & in3_d)
+ | (in1_d & in3_d)) };
+ out2_d <= { 1'b0, (in1_d[(`DATAWIDTH-1):1] ^
+ in2_d[(`DATAWIDTH-1):1] ^
+ in3_d[(`DATAWIDTH-1):1]) };
+ if (bitstorage_count >= `DATAWIDTH-1) begin
+ outBits_d <= bitstorage[(`DATAWIDTH-1):0];
+ outBits_r <= 1;
+ bitstorage_count <= 0;
+ bitstorage <= bitstorage >> `DATAWIDTH;
+ end
+ bitstorage[bitstorage_count] <= (in1_d[0] ^ in2_d[0] ^ in3_d[0]);
+ bitstorage_count <= bitstorage_count+1;
+ out1_r <= 1;
+ out2_r <= 1;
+ in1_a <= 1;
+ in2_a <= 1;
+ in3_a <= 1;
+ end
end
-
end
== Test ========================================================================
-
#ship alu3 : Alu3
#ship lut3 : Lut3
#ship bitfifo : BitFifo
#ship debug : Debug
+#ship fifo : Fifo
+#ship rotator : Rotator
-#expect 31509911677
-#expect 1855678
+#expect 0
+#expect 2
+#expect 1
// 0: 100100100111110000000
// sel 011110100001001000000
// 1: 111000101000011000011
// r: 111000100110111000000
-1000000: sendto bitfifo.inEnqueue;
-0: sendto bitfifo.inEnqueue;
-bitfifo.inEnqueue: [*] take, deliver;
-bitfifo.outDequeue: [*] wait, take, sendto lut3.in2;
-lut3.in2: notify bitfifo.outDequeue;
- [74] take, deliver, notify bitfifo.outDequeue;
-
-// mux on second input
-226: sendto lut3.inLut;
-lut3.inLut: take;
- [74] deliver;
-
-1855683: sendto lut3.in1;
-0: sendto lut3.in1;
-lut3.in1: take;
- [37] deliver;
- take;
- [37] deliver;
-
-1200000: sendto lut3.in3;
-0: sendto lut3.in3;
-lut3.in3: take;
- [37] deliver;
- take;
- [37] deliver;
-
-lut3.out: [*] take, sendto alu3.in2;
-
-0: sendto alu3.in3;
-0: sendto alu3.in1;
-alu3.in1: [*] take, deliver;
-alu3.in2: [*] take, deliver;
-alu3.in3: [*] take, deliver;
-alu3.out1: [74] take, sendto alu3.in1;
-alu3.out2: [74] take, sendto alu3.in3;
-alu3.out3: [74] take;
-alu3.outBits: [*] take, sendto debug.in;
-
-
-debug.in: [*] take, deliver;
+alu3.in1: literal 1; deliver; load repeat counter with 36; deliver;
+alu3.in2: literal 0; deliver; literal 1; load repeat counter with 36; deliver;
+alu3.in3: literal 4; deliver; load repeat counter with 36; deliver;
+
+alu3.out1: take; sendto debug.in; [*] take;
+alu3.out2: take; wait; sendto debug.in; [*] take;
+alu3.outBits: take; wait; sendto debug.in;
+
+debug.in:
+ take, deliver;
+ notify alu3.out2;
+ take, deliver;
+ notify alu3.outBits;
+ take, deliver;
== Contributors =========================================================