disable Alu3 test temporarily (clogs switch fabric with two-instruction literals)
[fleet.git] / ships / Memory.ship
index ae99d10..e103fbd 100644 (file)
@@ -10,6 +10,72 @@ data  in:    inCount
 
 data  out:   out
 
+== TeX ==============================================================
+
+The {\tt Memory} ship represents an interface to a storage space,
+which can be used to read from it or write to it.  This storage space
+might be a fast on-chip cache, off chip DRAM, or perhaps even a disk drive.
+
+There may be multiple {\tt Memory} ships which interface to the same
+physical storage space.  An implementation of Fleet must provide
+additional documentation to the programmer indicating which {\tt
+Memory} ships correspond to which storage spaces.  A single {\tt
+Memory} ship may also access a ``virtual storage space'' formed by
+concatenating multiple physical storage spaces.
+
+\subsection*{Code Bag Fetch}
+
+When a word appears at the {\tt inCBD} port, it is treated as a {\it
+code bag descriptor}, as shown below:
+
+\begin{center}
+\setlength{\bitwidth}{3mm}
+{\tt
+\begin{bytefield}{37}
+  \bitheader[b]{36,6,5,0}\\
+  \bitbox{31}{Address} 
+  \bitbox{6}{size} 
+\end{bytefield}
+}
+\end{center}
+
+When a word arrives at the {\tt inCBD} port, it is treated as a memory
+read with {\tt inAddrRead=Address}, {\tt inStride=1}, and {\tt
+inCount=size}.
+
+\subsection*{Reading}
+
+When a word is delivered to {\tt inAddrRead}, the word residing in
+memory at that address is provided at {\tt out}.
+
+\subsection*{Writing}
+
+When a word is delivered to {\tt inAddrWrite} and {\tt inDataWrite},
+the word at {\tt inDataWrite} is written to the address specified by
+{\tt inAddrWrite}.  Once the word is successfully committed to memory,
+the value {\tt inAddr+inStride} is provided at {\tt out} (that is, the
+address of the next word to be written).
+
+\subsection*{To Do}
+
+Stride and count are not implemented.
+
+We need a way to do an ``unordered fetch'' -- a way to tell the memory
+unit to retrieve some block of words in any order it likes.  This can
+considerably accelerate fetches when the first word of the region is
+not cached, but other parts are cached.  This can also be used for
+dispatching codebags efficiently -- but how will we make sure that
+instructions destined for a given pump are dispatched in the correct
+order (source sequence guarantee)?
+
+A more advanced form would be ``unordered fetch of ordered records''
+-- the ability to specify a record size (in words), the offset of the
+first record, and the number of records to be fetched.  The memory
+unit would then fetch the records in any order it likes, but would be
+sure to return the words comprising a record in the order in which
+they appear in memory.  This feature could be used to solve the source
+sequence guarantee problem mentioned in the previous paragraph.
+
 == Fleeterpreter ====================================================
     private long[] mem = new long[0];
     public long readMem(int addr) { return mem[addr]; }
@@ -61,44 +127,34 @@ data  out:   out
             long size = val & 0x3f;
             dispatch((int)addr, (int)size);
         }
-        if (count > 0 && writing) {
-            if (box_inDataWrite.dataReadyForShip() && box_out.readyForDataFromShip()) {
-               writeMem((int)addr, box_inDataWrite.removeDataForShip());
-               box_out.addDataFromShip(0);
-               count--;
-               addr += stride;
-            }
-
-        } else if (count > 0 && !writing) {
-            if (box_out.readyForDataFromShip()) {
-               box_out.addDataFromShip(readMem((int)addr));
-               count--;
-               addr += stride;
+        if (count > 0) {
+            if (writing) {
+              if (box_inDataWrite.dataReadyForShip() && box_out.readyForDataFromShip()) {
+                 writeMem((int)addr, box_inDataWrite.removeDataForShip());
+                 box_out.addDataFromShip(0);
+                 count--;
+                 addr += stride;
+              }
+            } else {
+              if (box_out.readyForDataFromShip()) {
+                 box_out.addDataFromShip(readMem((int)addr));
+                 count--;
+                 addr += stride;
+              }
             }
 
-        } else if (box_inAddrRead.dataReadyForShip() && box_out.readyForDataFromShip()) {
-            Packet packet = box_inAddrRead.peekPacketForShip();
-            if (packet.destination.getDestinationName().equals("read")) {
-                box_out.addDataFromShip(readMem((int)box_inAddrRead.removeDataForShip()));
-            } else if (packet.destination.getDestinationName().equals("write") && box_inDataWrite.dataReadyForShip()) {
-                writeMem((int)box_inAddrRead.removeDataForShip(),
-                         box_inDataWrite.removeDataForShip());
-                box_out.addDataFromShip(0);
-            } else if (packet.destination.getDestinationName().equals("writeMany")
-                       && box_inStride.dataReadyForShip()
-                       && box_inCount.dataReadyForShip()) {
-                addr = box_inAddrRead.removeDataForShip();
-                stride = box_inStride.removeDataForShip();
-                count = box_inCount.removeDataForShip();
-                writing = true;
-            } else if (packet.destination.getDestinationName().equals("readMany")
-                       && box_inStride.dataReadyForShip()
-                       && box_inCount.dataReadyForShip()) {
-                addr = box_inAddrRead.removeDataForShip();
-                stride = box_inStride.removeDataForShip();
-                count = box_inCount.removeDataForShip();
-                writing = false;
-            }
+        } else if (box_inAddrRead.dataReadyForShip()) {
+            addr = box_inAddrRead.removeDataForShip();
+            stride = 0;
+            count = 1;
+            writing = false;
+
+        } else if (box_inAddrWrite.dataReadyForShip()) {
+            addr = box_inAddrWrite.peekPacketForShip().value;
+            box_inAddrWrite.removeDataForShip();
+            stride = 0;
+            count = 1;
+            writing = true;
         }
     }
 
@@ -153,12 +209,10 @@ module memory (clk,
   `input(stride_r,       stride_a,      stride_a_,      [(`DATAWIDTH-1):0],         stride_d)
   `input(count_r,        count_a,       count_a_,       [(`DATAWIDTH-1):0],         count_d)
   `output(out_r,         out_r_,        out_a,          [(`DATAWIDTH-1):0],         out_d_)
-  //`defreg(out_d_,                                     [(`DATAWIDTH-1):0],         out_d)
-
   `input(preload_r,      preload_a,     preload_a_,     [(`DATAWIDTH-1):0],         preload_d)
   `input(cbd_r,          cbd_a,         cbd_a_,         [(`DATAWIDTH-1):0],         cbd_d)
-  `output(ihorn_r,       ihorn_r_,      ihorn_a,        [(`INSTRUCTION_WIDTH-1):0], ihorn_d_)
-  `defreg(ihorn_d_,                                     [(`INSTRUCTION_WIDTH-1):0], ihorn_d)
+  `output(ihorn_r,       ihorn_r_,      ihorn_a,        [(`PACKET_WIDTH-1):0], ihorn_d_)
+  `defreg(ihorn_d_,                                     [(`PACKET_WIDTH-1):0], ihorn_d)
   `output(dhorn_r,       dhorn_r_,      dhorn_a,        [(`PACKET_WIDTH-1):0],      dhorn_d_)
   `defreg(dhorn_d_,                                     [(`PACKET_WIDTH-1):0],      dhorn_d)
 
@@ -250,32 +304,9 @@ module memory (clk,
     end else if (command_valid) begin
       command_valid <= 0;
       command = ramread;
-      case (command[(`INSTRUCTION_WIDTH-1):(`INSTRUCTION_WIDTH-2)])
-        0: begin
-            ihorn_full  <= 1;
-            ihorn_d     <= command;
-           end
-        1: begin
-            dhorn_full  <= 1;
-            temp    = command[(`INSTRUCTION_WIDTH-(2+`DESTINATION_ADDRESS_BITS)):0];
-            temp    = temp + ( { current_instruction_read_from, {(`CODEBAG_SIZE_BITS){1'b0}} });
-            data[(`DATAWIDTH-1):(`CODEBAG_SIZE_BITS)] = temp;
-            data[(`CODEBAG_SIZE_BITS-1):0]            = command[(`CODEBAG_SIZE_BITS-1):0];
-            `packet_data(dhorn_d) <= temp;
-            `packet_dest(dhorn_d) <=
-                  command[(`INSTRUCTION_WIDTH-3):(`INSTRUCTION_WIDTH-(3+`DESTINATION_ADDRESS_BITS)+1)];
-           end
-        2: begin
-            dhorn_full            <= 1;
-            `packet_data(dhorn_d) <= { {(`DATAWIDTH-24){command[23]}}, command[23:0] };
-            `packet_dest(dhorn_d) <= command[34:24];
-           end
-        3: begin
-            dhorn_full            <= 1;
-            `packet_data(dhorn_d) <= { {(`DATAWIDTH-24){command[23]}}, command[23:0] } + current_instruction_read_from;
-            `packet_dest(dhorn_d) <= command[34:24];
-           end
-      endcase
+      ihorn_full  <= 1;
+      `packet_data(ihorn_d) <= `instruction_data(command);
+      `packet_dest(ihorn_d) <= `instruction_dest(command);
 
     end else if (cbd_pos < cbd_size) begin
       current_instruction_read_from <= cbd_base+cbd_pos;
@@ -331,28 +362,19 @@ endmodule
 // instructions not in any codebag are part of the "root codebag"
 // which is dispatched when the code is loaded
 
-BOB:              sendto memory.inCBD;
-memory.inCBD:     [*] take, deliver;
-debug.in:         [*] take, deliver;
-
-
-// This codebag illustrates how to do a loop.  Notice that this
-// is actually an uncontrolled data emitter -- it could clog the
-//  switch fabric!
+memory.inCBD:
+  literal BOB;
+  deliver;
 
 BOB: {
-  12:           sendto debug.in;
-  13:           sendto debug.in;
-  14:           sendto debug.in;
+  debug.in:
+    literal 12; deliver;
+    literal 13; deliver;
+    literal 14; deliver;
 }
 
 
 == Constants ========================================================
-== TeX ==============================================================
-\begin{verbatim}
-TODO: count/stride
-TODO: multiple interfaces to a single memory
-\end{verbatim}
 
 == Contributors =========================================================
 Adam Megacz <megacz@cs.berkeley.edu>