data in: in3
data out: out1
+ shortcut to: in1
data out: out2
+ shortcut to: in2
+data out: outBits
== Constants ========================================================
== TeX ==============================================================
-This ship performs addition of three inputs, producing two output
-values in carry-save form. To complete the addition, send the two
-output values to an Alu2 with opcode ADD. For summing a set of four
-or more numbers, Alu3 followed by Alu2 is often faster than repeated
-use of Alu2.
+{\tt Alu3} is a three-input adder which produces a pair of outputs in
+carry-save form. It has no opcode input.
+
+This ship also contains a private ``bit fifo'' similar to the {\tt
+BitFifo} ship, except that only the dequeueing (output) interface is
+exposed to the programmer. Each addition operation performed causes
+the lowest bit of the {\it save} output to be enqueued into the bit
+fifo. This can be used to produce a very efficient multiplier; see
+the test case for this ship for more details.
+
+\subsection*{Semantics}
+
+When a value is present at each of {\tt in1}, {\tt in2} and {\tt in3},
+these three values are consumed. The {\it carry} result of carry-save
+addition is placed in {\tt out1}, and the {\it save} result of
+carry-save addition is placed in {\tt out2}.
+
+\subsection*{To Do}
+
+Is the second output supposed to be shifted?
+
+Provide a way to clear/flush the internal bitfifo.
+
+Do we even need this? Can we do the same thing with {\tt Lut3} and
+{\tt BitFifo} together?
+
== Fleeterpreter ====================================================
+static class BitStorage {
+ long[] bits;
+ int numbits = 0;
+ int start = 0;
+ BitStorage(int size) {
+ bits = new long[size / 64 + (size % 64 != 0 ? 1 : 0)];
+ }
+ int size() {
+ return numbits;
+ }
+ boolean hasSpace(int num) {
+ return bits.length * 64 - numbits >= num;
+ }
+ boolean peekTailBit() {
+ int entry = (((numbits-1) + start) / 64) % bits.length;
+ int off = ((numbits-1) + start) % 64;
+ int maxadd = 64 - off;
+ long mask = maxadd < 64 ? (-1L >>> maxadd) : 0L;
+ return (bits[entry] & ~mask) != 0;
+ }
+ boolean add(long data, int num) {
+ if (!hasSpace(num)) return false;
+ int entry = ((numbits + start) / 64) % bits.length;
+ int off = (numbits + start) % 64;
+ int maxadd = 64 - off;
+ long mask = maxadd < 64 ? (-1L >>> maxadd) : 0L;
+ bits[entry] = (bits[entry] & mask) | (data << off);
+ if (num > maxadd) {
+ numbits += maxadd;
+ return add(data >>> maxadd, num - maxadd);
+ } else {
+ numbits += num;
+ return true;
+ }
+ }
+ long get(int num) {
+ assert size() >= num : "too few bits in storage";
+ int entry = start / 64;
+ int off = start % 64;
+ int max = 64 - off;
+ int n = num > max ? max : num;
+ long mask = n > 0 ? (-1L >>> (64 - n)) : 0L;
+ long res = (bits[entry] >>> off) & mask;
+ int oldstart = start;
+ if (n < num) {
+ int n2 = num - n;
+ long mask2 = n2 > 0 ? (-1L >>> (64 - n2)) : 0L;
+ res |= (bits[(entry + 1) % bits.length] & mask2) << n;
+ }
+ start = (start + num) % (64 * bits.length);
+ numbits -= num;
+ return res;
+ }
+ int capacity() {
+ return 64 * bits.length;
+ }
+ // Test code for BitStorage
+ static void test3(String[] args) {
+ BitStorage bs = new BitStorage(37);
+ Random rand = new Random();
+ Vector ins = new Vector();
+ Vector ret = new Vector();
+ StringBuffer sbi = new StringBuffer();
+ StringBuffer sbr = new StringBuffer();
+ System.out.println("==== Test #3 ====");
+ System.out.println("inserting...");
+ long data = rand.nextLong();
+ bs.add(data, 0);
+ ins.add(new Integer(0));
+ data = rand.nextLong();
+ int num = rand.nextInt(37);
+ int s = bs.size();
+ bs.add(data, num);
+ assert bs.size() - s == num : "bad size: " + s + " + " + num + " != " + bs.size();
+ ins.add(new Integer(num));
+ add(sbi, data, num);
+ print(data, num);
+ System.out.println("\nretrieving...");
+ num = bs.size();
+ data = bs.get(num);
+ ret.add(new Integer(num));
+ add(sbr, data, num);
+ print(data, num);
+ System.out.println("\ninsertion sequence:");
+ for (int i = 0; i < ins.size(); i++) {
+ System.out.print(" " + ins.get(i));
+ }
+ System.out.println("\nretrieval sequence:");
+ for (int i = 0; i < ret.size(); i++) {
+ System.out.print(" " + ret.get(i));
+ }
+ System.out.println();
+ check(sbi, sbr);
+ }
+ static void test2(String[] args) {
+ int iters = (args.length > 0 ? Integer.parseInt(args[0]) : 10);
+ BitStorage bs = new BitStorage(37 * iters);
+ Random rand = new Random();
+ Vector ins = new Vector();
+ Vector ret = new Vector();
+ StringBuffer sbi = new StringBuffer();
+ StringBuffer sbr = new StringBuffer();
+ System.out.println("==== Test #2 ====");
+ for (int i = 0; i < iters; i++) {
+ long data = rand.nextLong();
+ int num = rand.nextInt(37);
+ int s = bs.size();
+ bs.add(data, num);
+ assert bs.size() - s == num : "bad size: " + s + " + " + num + " != " + bs.size();
+ ins.add(new Integer(num));
+ add(sbi, data, num);
+ num = rand.nextInt(Math.min(37, bs.size()));
+ s = bs.size();
+ data = bs.get(num);
+ assert s - bs.size() == num : "bad size: " + s + " - " + num + " != " + bs.size();
+ ret.add(new Integer(num));
+ add(sbr, data, num);
+ }
+ //for (int i = 0; i < iters; i++) {
+ while (bs.size() > 0) {
+ int num = Math.min(rand.nextInt(37), bs.size());
+ //int num = Math.min(33, bs.size());
+ int s = bs.size();
+ long data = bs.get(num);
+ assert s - bs.size() == num : "bad size: " + s + " - " + num + " != " + bs.size();
+ ret.add(new Integer(num));
+ add(sbr, data, num);
+ }
+ System.out.println("inserted:");
+ System.out.println(sbi);
+ System.out.println("retrieved:");
+ System.out.println(sbr);
+ System.out.println("insertion sequence:");
+ for (int i = 0; i < ins.size(); i++) {
+ System.out.print(" " + ins.get(i));
+ }
+ System.out.println("\nretrieval sequence:");
+ for (int i = 0; i < ret.size(); i++) {
+ System.out.print(" " + ret.get(i));
+ }
+ System.out.println();
+ check(sbi, sbr);
+ }
+ static void test1(String[] args) {
+ int iters = (args.length > 0 ? Integer.parseInt(args[0]) : 10);
+ BitStorage bs = new BitStorage(37 * iters);
+ Random rand = new Random();
+ Vector ins = new Vector();
+ Vector ret = new Vector();
+ StringBuffer sbi = new StringBuffer();
+ StringBuffer sbr = new StringBuffer();
+ System.out.println("==== Test #1 ====");
+ System.out.println("inserting...");
+ for (int i = 0; i < iters; i++) {
+ long data = rand.nextLong();
+ int num = rand.nextInt(37);
+ int s = bs.size();
+ bs.add(data, num);
+ assert bs.size() - s == num : "bad size: " + s + " + " + num + " != " + bs.size();
+ ins.add(new Integer(num));
+ add(sbi, data, num);
+ print(data, num);
+ }
+ System.out.println("\nretrieving...");
+ //for (int i = 0; i < iters; i++) {
+ while (bs.size() > 0) {
+ int num = Math.min(rand.nextInt(37), bs.size());
+ //int num = Math.min(33, bs.size());
+ int s = bs.size();
+ long data = bs.get(num);
+ assert s - bs.size() == num : "bad size: " + s + " - " + num + " != " + bs.size();
+ ret.add(new Integer(num));
+ add(sbr, data, num);
+ print(data, num);
+ }
+ System.out.println("\ninsertion sequence:");
+ for (int i = 0; i < ins.size(); i++) {
+ System.out.print(" " + ins.get(i));
+ }
+ System.out.println("\nretrieval sequence:");
+ for (int i = 0; i < ret.size(); i++) {
+ System.out.print(" " + ret.get(i));
+ }
+ System.out.println();
+ check(sbi, sbr);
+ }
+ static void print(long data, int num) {
+ for (int i = 0; i < num; i++) {
+ System.out.print((data >>> i) & 0x1);
+ }
+ }
+ static void add(StringBuffer sb, long data, int num) {
+ for (int i = 0; i < num; i++) {
+ sb.append((int) ((data >>> i) & 0x1));
+ }
+ }
+ static void check(StringBuffer sb1, StringBuffer sb2) {
+ int len = sb2.length();
+ if (len > sb1.length()) {
+ System.out.println("error: retrieval sequence is longer than insertion sequence");
+ len = sb1.length();
+ }
+ for (int i = 0; i < sb2.length(); i++) {
+ if (sb1.charAt(i) != sb2.charAt(i)) {
+ System.out.println("error: bit at position " + i + " does not match");
+ }
+ }
+ }
+}
+boolean mode = false;
+BitStorage outBits = new BitStorage(74);
public void service() {
+ if (outBits.size() >= 37) {
+ if (box_outBits.readyForDataFromShip()) {
+ box_outBits.addDataFromShip(outBits.get(37));
+ }
+ } else
if (box_in1.dataReadyForShip() &&
box_in2.dataReadyForShip() &&
box_in3.dataReadyForShip() &&
+ outBits.hasSpace(1) &&
box_out1.readyForDataFromShip() &&
box_out2.readyForDataFromShip()) {
- long v1 = box_in1.removeDataForShip();
- long v2 = box_in2.removeDataForShip();
- long v3 = box_in3.removeDataForShip();
- long o1 = ((v1 & v2) | (v2 & v3) | (v1 & v3)) << 1;
- long o2 = v1 ^ v2 ^ v3;
+ long v1 = box_in1.removeDataForShip();
+ long v2 = box_in2.removeDataForShip();
+ long v3 = box_in3.removeDataForShip();
+ long o1, o2, o3;
+ o1 = ((v1 & v2) | (v2 & v3) | (v1 & v3))/* << 1*/;
+ o2 = (v1 ^ v2 ^ v3) >> 1;
+ outBits.add((v1 ^ v2 ^ v3) & 0x1L, 1);
box_out1.addDataFromShip(o1);
box_out2.addDataFromShip(o2);
}
== FPGA ==============================================================
- reg have_a;
- reg [(`DATAWIDTH-1):0] a;
- reg have_b;
- reg [(`DATAWIDTH-1):0] b;
- reg have_c;
- reg [(`DATAWIDTH-1):0] c;
- reg have_out1;
- reg have_out2;
+ reg [73:0] bitstorage; initial bitstorage = 0;
+ reg [7:0] bitstorage_count; initial bitstorage_count = 0;
always @(posedge clk) begin
- if (have_out1) begin
- `onwrite(out1_r, out1_a) have_out1 <= 0; end
-
- end else if (have_out2) begin
- `onwrite(out2_r, out2_a) have_out2 <= 0; end
-
- end else if (!have_out1 && !have_out2) begin
- if (!have_a) begin
- `onread(in1_r, in1_a) have_a <= 1; a <= in1_d; end
+ if (!rst) begin
+ `reset
+ bitstorage <= 0;
+ bitstorage_count <= 0;
+ end else begin
+ if (out1_r && out1_a) out1_r <= 0;
+ if (out2_r && out2_a) out2_r <= 0;
+ if (outBits_r && outBits_a) outBits_r <= 0;
+ if (!in1_r && in1_a) in1_a <= 0;
+ if (!in2_r && in2_a) in2_a <= 0;
+ if (!in3_r && in3_a) in3_a <= 0;
+ if (!out1_r && !out2_r && !outBits_r && in1_r && in2_r && in3_r) begin
+ out1_d <= { ((in1_d & in2_d)
+ | (in2_d & in3_d)
+ | (in1_d & in3_d)) };
+ out2_d <= { 1'b0, (in1_d[(`DATAWIDTH-1):1] ^
+ in2_d[(`DATAWIDTH-1):1] ^
+ in3_d[(`DATAWIDTH-1):1]) };
+ if (bitstorage_count >= `DATAWIDTH-1) begin
+ outBits_d <= bitstorage[(`DATAWIDTH-1):0];
+ outBits_r <= 1;
+ bitstorage_count <= 0;
+ bitstorage <= bitstorage >> `DATAWIDTH;
end
- if (!have_b) begin
- `onread(in2_r, in2_a) have_b <= 1; b <= in2_d; end
- end
- if (!have_c) begin
- `onread(in3_r, in3_a) have_c <= 1; c <= in3_d; end
- end
-
- if (have_a && have_b && have_c) begin
- out1_d <= { { ((a & b) | (b & c) | (a & c)) } , 1'b0 };
- out2_d <= a ^ b ^ c;
- have_a <= 0;
- have_b <= 0;
- have_c <= 0;
- have_out1 <= 1;
- have_out2 <= 1;
+ bitstorage[bitstorage_count] <= (in1_d[0] ^ in2_d[0] ^ in3_d[0]);
+ bitstorage_count <= bitstorage_count+1;
+ out1_r <= 1;
+ out2_r <= 1;
+ in1_a <= 1;
+ in2_a <= 1;
+ in3_a <= 1;
end
end
end
+== Test ========================================================================
+#skip
+#ship alu3 : Alu3
+#ship lut3 : Lut3
+#ship debug : Debug
+#ship fifo : Fifo
+#ship rotator : Rotator
+
+#expect 0
+#expect 2
+#expect 1
+
+// 0: 100100100111110000000
+// sel 011110100001001000000
+// 1: 111000101000011000011
+// r: 111000100110111000000
+
+alu3.in1: literal 1; deliver; load repeat counter with 36; deliver;
+alu3.in2: literal 0; deliver; literal 1; load repeat counter with 36; deliver;
+alu3.in3: literal 4; deliver; load repeat counter with 36; deliver;
+
+alu3.out1: take; sendto debug.in; [*] take;
+alu3.out2: take; wait; sendto debug.in; [*] take;
+alu3.outBits: take; wait; sendto debug.in;
+
+debug.in:
+ take, deliver;
+ notify alu3.out2;
+ take, deliver;
+ notify alu3.outBits;
+ take, deliver;
+
== Contributors =========================================================
+Amir Kamil <kamil@cs.berkeley.edu>
Adam Megacz <megacz@cs.berkeley.edu>