reorder Alu3 instructions to deal with two-instruction literals
[fleet.git] / ships / Memory.ship
index c8cd957..e103fbd 100644 (file)
@@ -2,16 +2,80 @@ ship: Memory
 
 == Ports ===========================================================
 data  in:    inCBD
-data  in:    inAddr.read
-data  in:    inAddr.write
-data  in:    inAddr.readMany
-data  in:    inAddr.writeMany
-data  in:    inData
+data  in:    inAddrRead
+data  in:    inAddrWrite
+data  in:    inDataWrite
 data  in:    inStride
 data  in:    inCount
 
 data  out:   out
 
+== TeX ==============================================================
+
+The {\tt Memory} ship represents an interface to a storage space,
+which can be used to read from it or write to it.  This storage space
+might be a fast on-chip cache, off chip DRAM, or perhaps even a disk drive.
+
+There may be multiple {\tt Memory} ships which interface to the same
+physical storage space.  An implementation of Fleet must provide
+additional documentation to the programmer indicating which {\tt
+Memory} ships correspond to which storage spaces.  A single {\tt
+Memory} ship may also access a ``virtual storage space'' formed by
+concatenating multiple physical storage spaces.
+
+\subsection*{Code Bag Fetch}
+
+When a word appears at the {\tt inCBD} port, it is treated as a {\it
+code bag descriptor}, as shown below:
+
+\begin{center}
+\setlength{\bitwidth}{3mm}
+{\tt
+\begin{bytefield}{37}
+  \bitheader[b]{36,6,5,0}\\
+  \bitbox{31}{Address} 
+  \bitbox{6}{size} 
+\end{bytefield}
+}
+\end{center}
+
+When a word arrives at the {\tt inCBD} port, it is treated as a memory
+read with {\tt inAddrRead=Address}, {\tt inStride=1}, and {\tt
+inCount=size}.
+
+\subsection*{Reading}
+
+When a word is delivered to {\tt inAddrRead}, the word residing in
+memory at that address is provided at {\tt out}.
+
+\subsection*{Writing}
+
+When a word is delivered to {\tt inAddrWrite} and {\tt inDataWrite},
+the word at {\tt inDataWrite} is written to the address specified by
+{\tt inAddrWrite}.  Once the word is successfully committed to memory,
+the value {\tt inAddr+inStride} is provided at {\tt out} (that is, the
+address of the next word to be written).
+
+\subsection*{To Do}
+
+Stride and count are not implemented.
+
+We need a way to do an ``unordered fetch'' -- a way to tell the memory
+unit to retrieve some block of words in any order it likes.  This can
+considerably accelerate fetches when the first word of the region is
+not cached, but other parts are cached.  This can also be used for
+dispatching codebags efficiently -- but how will we make sure that
+instructions destined for a given pump are dispatched in the correct
+order (source sequence guarantee)?
+
+A more advanced form would be ``unordered fetch of ordered records''
+-- the ability to specify a record size (in words), the offset of the
+first record, and the number of records to be fetched.  The memory
+unit would then fetch the records in any order it likes, but would be
+sure to return the words comprising a record in the order in which
+they appear in memory.  This feature could be used to solve the source
+sequence guarantee problem mentioned in the previous paragraph.
+
 == Fleeterpreter ====================================================
     private long[] mem = new long[0];
     public long readMem(int addr) { return mem[addr]; }
@@ -51,6 +115,11 @@ data  out:   out
         dispatch(base, size);
     }
 
+    private long stride = 0;
+    private long count = 0;
+    private long addr = 0;
+    private boolean writing = false;
+
     public void service() {
         if (box_inCBD.dataReadyForShip()) {
             long val = box_inCBD.removeDataForShip();
@@ -58,15 +127,34 @@ data  out:   out
             long size = val & 0x3f;
             dispatch((int)addr, (int)size);
         }
-        if (box_inAddr.dataReadyForShip() && box_out.readyForItemFromShip()) {
-            Packet packet = box_inAddr.peekPacketForShip();
-            if (packet.destination.getDestinationName().equals("read")) {
-                box_out.addDataFromShip(readMem((int)box_inAddr.removeDataForShip()));
-            } else if (packet.destination.getDestinationName().equals("write") && box_inData.dataReadyForShip()) {
-                writeMem((int)box_inAddr.removeDataForShip(),
-                         box_inData.removeDataForShip());
-                box_out.addDataFromShip(0);
+        if (count > 0) {
+            if (writing) {
+              if (box_inDataWrite.dataReadyForShip() && box_out.readyForDataFromShip()) {
+                 writeMem((int)addr, box_inDataWrite.removeDataForShip());
+                 box_out.addDataFromShip(0);
+                 count--;
+                 addr += stride;
+              }
+            } else {
+              if (box_out.readyForDataFromShip()) {
+                 box_out.addDataFromShip(readMem((int)addr));
+                 count--;
+                 addr += stride;
+              }
             }
+
+        } else if (box_inAddrRead.dataReadyForShip()) {
+            addr = box_inAddrRead.removeDataForShip();
+            stride = 0;
+            count = 1;
+            writing = false;
+
+        } else if (box_inAddrWrite.dataReadyForShip()) {
+            addr = box_inAddrWrite.peekPacketForShip().value;
+            box_inAddrWrite.removeDataForShip();
+            stride = 0;
+            count = 1;
+            writing = true;
         }
     }
 
@@ -75,92 +163,218 @@ data  out:   out
 == FPGA ==============================================================
 `include "macros.v"
 `define BRAM_ADDR_WIDTH 14
-`define BRAM_DATA_WIDTH `DATAWIDTH
-`define BRAM_NAME dscratch_bram
-`include "bram.inc"
+`define BRAM_DATA_WIDTH `INSTRUCTION_WIDTH
+`define BRAM_NAME some_bram
+
+/* bram.inc */
+module `BRAM_NAME(clk, we, a, dpra, di, spo, dpo); 
+    input  clk; 
+    input  we; 
+    input  [(`BRAM_ADDR_WIDTH-1):0] a; 
+    input  [(`BRAM_ADDR_WIDTH-1):0] dpra; 
+    input  [(`BRAM_DATA_WIDTH-1):0] di; 
+    output [(`BRAM_DATA_WIDTH-1):0] spo; 
+    output [(`BRAM_DATA_WIDTH-1):0] dpo; 
+    reg    [(`BRAM_DATA_WIDTH-1):0] ram [((1<<(`BRAM_ADDR_WIDTH))-1):0];
+    reg    [(`BRAM_ADDR_WIDTH-1):0] read_a; 
+    reg    [(`BRAM_ADDR_WIDTH-1):0] read_dpra; 
+    always @(posedge clk) begin 
+        if (we) 
+            ram[a] <= di; 
+        read_a <= a; 
+        read_dpra <= dpra; 
+    end
+    assign spo = ram[read_a]; 
+    assign dpo = ram[read_dpra]; 
+endmodule 
+/* bram.inc */
 
-module dscratch (clk, 
-               read_addr_r,    read_addr_a_,   read_addr_d,
-               read_data_r_,   read_data_a,    read_data_d_,
+module memory (clk, 
+               cbd_r,          cbd_a_,         cbd_d,
+               in_addr_r,      in_addr_a_,     in_addr_d,
                write_addr_r,   write_addr_a_,  write_addr_d,
                write_data_r,   write_data_a_,  write_data_d,
-               write_done_r_,  write_done_a,   write_done_d_
+               stride_r,       stride_a_,      stride_d,
+               count_r,        count_a_,       count_d,
+               out_r_,         out_a,          out_d_,
+               preload_r,      preload_a_,     preload_d,
+               ihorn_r_,       ihorn_a,        ihorn_d_,
+               dhorn_r_,       dhorn_a,        dhorn_d_
               );
 
   input  clk;
-  `input(read_addr_r,    read_addr_a,   read_addr_a_,   [(`DATAWIDTH-1):0],  read_addr_d)
-  `output(read_data_r,   read_data_r_,  read_data_a,    [(`DATAWIDTH-1):0],  read_data_d_)
-  `defreg(read_data_d_,                                 [(`DATAWIDTH-1):0],  read_data_d)
-
-  `input(write_addr_r,   write_addr_a,  write_addr_a_,  [(`DATAWIDTH-1):0],  write_addr_d)
-  `input(write_data_r,   write_data_a,  write_data_a_,  [(`DATAWIDTH-1):0],  write_data_d)
-  `output(write_done_r,  write_done_r_, write_done_a,   [(`DATAWIDTH-1):0],  write_done_d_)
-  `defreg(write_done_d_,                                [(`DATAWIDTH-1):0],  write_done_d)
-
-  reg                           bram_we;
-  wire                          bram_we_;
-  assign bram_we_ = bram_we;
-  wire [(`BRAM_DATA_WIDTH-1):0] bram_read_data;
-  reg  [(`BRAM_ADDR_WIDTH-1):0] bram_write_address;
-  wire [(`BRAM_ADDR_WIDTH-1):0] bram_read_address;
-  reg  [(`BRAM_DATA_WIDTH-1):0] bram_write_data;
-  wire [(`BRAM_DATA_WIDTH-1):0] bram_write_data_;
-  assign bram_write_data_ = bram_write_data;
-  `BRAM_NAME mybram(clk,
-                    bram_we_,          bram_write_address,
-                    bram_read_address, bram_write_data_,
-                    not_connected,     bram_read_data);
-
-  reg send_done;
-
-  reg have_read;    initial have_read = 0;
-  reg read_pending; initial read_pending = 0;
-  assign bram_read_address = read_addr_d;
+  `input(in_addr_r,      in_addr_a,     in_addr_a_,     [(2+`DATAWIDTH-1):0],       in_addr_d)
+  `input(write_addr_r,   write_addr_a,  write_addr_a_,  [(2+`DATAWIDTH-1):0],       write_addr_d)
+  `input(write_data_r,   write_data_a,  write_data_a_,  [(`DATAWIDTH-1):0],         write_data_d)
+  `input(stride_r,       stride_a,      stride_a_,      [(`DATAWIDTH-1):0],         stride_d)
+  `input(count_r,        count_a,       count_a_,       [(`DATAWIDTH-1):0],         count_d)
+  `output(out_r,         out_r_,        out_a,          [(`DATAWIDTH-1):0],         out_d_)
+  `input(preload_r,      preload_a,     preload_a_,     [(`DATAWIDTH-1):0],         preload_d)
+  `input(cbd_r,          cbd_a,         cbd_a_,         [(`DATAWIDTH-1):0],         cbd_d)
+  `output(ihorn_r,       ihorn_r_,      ihorn_a,        [(`PACKET_WIDTH-1):0], ihorn_d_)
+  `defreg(ihorn_d_,                                     [(`PACKET_WIDTH-1):0], ihorn_d)
+  `output(dhorn_r,       dhorn_r_,      dhorn_a,        [(`PACKET_WIDTH-1):0],      dhorn_d_)
+  `defreg(dhorn_d_,                                     [(`PACKET_WIDTH-1):0],      dhorn_d)
+
+  reg ihorn_full;
+  initial ihorn_full = 0;
+  reg dhorn_full;
+  initial dhorn_full = 0;
+  reg command_valid;
+  initial command_valid = 0;
+
+  reg [(`BRAM_ADDR_WIDTH-1):0]    preload_pos;
+  reg [(`BRAM_ADDR_WIDTH-1):0]    preload_size;
+  initial preload_size = 0;
+
+  reg [(`BRAM_ADDR_WIDTH-1):0]    current_instruction_read_from;
+  reg [(`BRAM_ADDR_WIDTH-1):0]    temp_base;
+  reg [(`CODEBAG_SIZE_BITS-1):0]  temp_size;
+  reg [(`BRAM_ADDR_WIDTH-1):0]    cbd_base;
+  reg [(`CODEBAG_SIZE_BITS-1):0]  cbd_size;
+  reg [(`CODEBAG_SIZE_BITS-1):0]  cbd_pos;
+  reg [(`INSTRUCTION_WIDTH-1):0]  command;
+  reg [(`BRAM_DATA_WIDTH-1):0]    ram [((1<<(`BRAM_ADDR_WIDTH))-1):0];
+  reg                             send_done;
+  reg                             send_read;
+
+  reg [(`INSTRUCTION_WIDTH-(2+`DESTINATION_ADDRESS_BITS)):0] temp;
+  reg [(`DATAWIDTH-1):0]                                     data;
+
+  reg                             write_flag;
+  reg [(`BRAM_ADDR_WIDTH-1):0]    in_addr;
+  reg [(`BRAM_DATA_WIDTH-1):0]    write_data;
+
+  wire [(`BRAM_DATA_WIDTH-1):0]   ramread;
+
+  reg command_valid_read;
+  initial command_valid_read = 0;
+
+  reg launched;
+  initial launched = 0;
+
+  some_bram mybram(clk, write_flag, in_addr, current_instruction_read_from, write_data, not_connected, ramread);
+  assign out_d_ = ramread;
 
   always @(posedge clk) begin
-    bram_we = 0;
-    if (send_done) begin
-      `onwrite(write_done_r, write_done_a)
-        send_done = 0;
+
+    write_flag <= 0;
+
+    if (!in_addr_r && in_addr_a) in_addr_a = 0;
+    if (!write_data_r && write_data_a) write_data_a = 0;
+    if (!write_addr_r && write_addr_a) write_addr_a = 0;
+
+    if (command_valid_read) begin
+      command_valid_read  <= 0;
+      command_valid       <= 1;
+
+    end else  if (send_done) begin
+      `onwrite(out_r, out_a)
+        send_done <= 0;
       end
-    end else begin
-      if (!write_addr_r && write_addr_a) write_addr_a = 0;
-      if (!write_data_r && write_data_a) write_data_a = 0;
-      if (write_addr_r && write_data_r) begin
-        write_addr_a = 1;
-        write_data_a = 1;
-        bram_we = 1;
-        send_done = 1;
-        bram_write_address = write_addr_d;
-        bram_write_data = write_data_d;
+
+    end else  if (send_read) begin
+      `onwrite(out_r, out_a)
+        send_read <= 0;
       end
-    end
 
-    if (read_pending) begin
-        read_pending <= 0;
-        have_read    <= 1;
-        read_data_d  <= bram_read_data;
-    end else if (have_read) begin
-      `onwrite(read_data_r, read_data_a)
-        have_read <= 0;
+    end else if (in_addr_r) begin
+      in_addr_a                        = 1;
+      send_read                       <= 1;
+      current_instruction_read_from   <= in_addr_d[(`DATAWIDTH-1):0];
+
+    end else if (write_addr_r && write_data_r) begin
+      write_addr_a       = 1;
+      write_data_a       = 1;
+      send_done         <= 1;
+      write_flag        <= 1;
+      in_addr           <= write_addr_d[(`DATAWIDTH-1):0];
+      write_data        <= write_data_d;
+
+    end else if (ihorn_full && launched) begin
+      `onwrite(ihorn_r, ihorn_a)
+        ihorn_full <= 0;
       end
+
+    end else if (dhorn_full) begin
+      `onwrite(dhorn_r, dhorn_a)
+        dhorn_full <= 0;
+      end
+
+    end else if (command_valid) begin
+      command_valid <= 0;
+      command = ramread;
+      ihorn_full  <= 1;
+      `packet_data(ihorn_d) <= `instruction_data(command);
+      `packet_dest(ihorn_d) <= `instruction_dest(command);
+
+    end else if (cbd_pos < cbd_size) begin
+      current_instruction_read_from <= cbd_base+cbd_pos;
+      command_valid_read            <= 1;
+      cbd_pos                       <= cbd_pos + 1;
+
     end else begin
-      `onread(read_addr_r, read_addr_a)
-        // ======= Careful with the timing here! =====================
-        // We MUST capture bram_read_data on the very next clock since
-        // read_addr_d is free to change after the next clock
-        // ===========================================================
-        read_pending <= 1;
+      `onread(cbd_r, cbd_a)
+        cbd_pos       <= 0;
+        cbd_size      <= cbd_d[(`CODEBAG_SIZE_BITS-1):0];
+        cbd_base      <= cbd_d[(`INSTRUCTION_WIDTH-1):(`CODEBAG_SIZE_BITS)];
+
+      end else begin
+        `onread(preload_r, preload_a)
+          if (preload_size == 0) begin
+            preload_size     <= preload_d;
+          end else if (!launched) begin
+            write_flag <= 1;
+            write_data <= preload_d;
+            in_addr <= preload_pos;
+            if (preload_pos == 0) begin
+              temp_base = preload_d[(`INSTRUCTION_WIDTH-(3+`DESTINATION_ADDRESS_BITS)):(`CODEBAG_SIZE_BITS)];
+              temp_size = preload_d[(`CODEBAG_SIZE_BITS-1):0];
+            end
+            if ((preload_pos+1) == preload_size) begin
+              cbd_pos  <= 0;
+              cbd_base <= temp_base;
+              cbd_size <= temp_size;
+              launched <= 1;
+            end
+            preload_pos      <= preload_pos + 1;
+          end
+        end
       end
     end
-
   end
-
 endmodule
 
+  
+
+
+
+== Test ==============================================================
+// expected output
+#expect 12
+#expect 13
+#expect 14
+
+// ships required in order to run this code
+#ship debug          : Debug
+#ship memory         : Memory
+
+// instructions not in any codebag are part of the "root codebag"
+// which is dispatched when the code is loaded
+
+memory.inCBD:
+  literal BOB;
+  deliver;
+
+BOB: {
+  debug.in:
+    literal 12; deliver;
+    literal 13; deliver;
+    literal 14; deliver;
+}
+
 
 == Constants ========================================================
-== TeX ==============================================================
 
 == Contributors =========================================================
 Adam Megacz <megacz@cs.berkeley.edu>