ship: Memory == Ports =========================================================== data in: inCBD data in: inAddrRead data in: inAddrWrite data in: inDataWrite data in: inStride data in: inCount data out: out == TeX ============================================================== The {\tt Memory} ship represents an interface to a storage space, which can be used to read from it or write to it. This storage space might be a fast on-chip cache, off chip DRAM, or perhaps even a disk drive. There may be multiple {\tt Memory} ships which interface to the same physical storage space. An implementation of Fleet must provide additional documentation to the programmer indicating which {\tt Memory} ships correspond to which storage spaces. A single {\tt Memory} ship may also access a ``virtual storage space'' formed by concatenating multiple physical storage spaces. \subsection*{Code Bag Fetch} When a word appears at the {\tt inCBD} port, it is treated as a {\it code bag descriptor}, as shown below: \begin{center} \setlength{\bitwidth}{3mm} {\tt \begin{bytefield}{37} \bitheader[b]{36,6,5,0}\\ \bitbox{31}{Address} \bitbox{6}{size} \end{bytefield} } \end{center} When a word arrives at the {\tt inCBD} port, it is treated as a memory read with {\tt inAddrRead=Address}, {\tt inStride=1}, and {\tt inCount=size}. \subsection*{Reading} When a word is delivered to {\tt inAddrRead}, the word residing in memory at that address is provided at {\tt out}. \subsection*{Writing} When a word is delivered to {\tt inAddrWrite} and {\tt inDataWrite}, the word at {\tt inDataWrite} is written to the address specified by {\tt inAddrWrite}. Once the word is successfully committed to memory, the value {\tt inAddr+inStride} is provided at {\tt out} (that is, the address of the next word to be written). \subsection*{To Do} Stride and count are not implemented. We need a way to do an ``unordered fetch'' -- a way to tell the memory unit to retrieve some block of words in any order it likes. This can considerably accelerate fetches when the first word of the region is not cached, but other parts are cached. This can also be used for dispatching codebags efficiently -- but how will we make sure that instructions destined for a given pump are dispatched in the correct order (source sequence guarantee)? A more advanced form would be ``unordered fetch of ordered records'' -- the ability to specify a record size (in words), the offset of the first record, and the number of records to be fetched. The memory unit would then fetch the records in any order it likes, but would be sure to return the words comprising a record in the order in which they appear in memory. This feature could be used to solve the source sequence guarantee problem mentioned in the previous paragraph. == Fleeterpreter ==================================================== private long[] mem = new long[0]; public long readMem(int addr) { return mem[addr]; } public void writeMem(int addr, long val) { if (addr >= mem.length) { long[] newmem = new long[addr * 2 + 1]; System.arraycopy(mem, 0, newmem, 0, mem.length); mem = newmem; } mem[addr] = val; } public void dispatch(int addr, int size) { for(int i=addr; i> 6); base = base & ~(0xffffffff << 18); int size = (int)launch; size = size & ~(0xffffffff << 6); dispatch(base, size); } private long stride = 0; private long count = 0; private long addr = 0; private boolean writing = false; public void service() { if (box_inCBD.dataReadyForShip()) { long val = box_inCBD.removeDataForShip(); long addr = val >> 6; long size = val & 0x3f; dispatch((int)addr, (int)size); } if (count > 0) { if (writing) { if (box_inDataWrite.dataReadyForShip() && box_out.readyForDataFromShip()) { writeMem((int)addr, box_inDataWrite.removeDataForShip()); box_out.addDataFromShip(0); count--; addr += stride; } } else { if (box_out.readyForDataFromShip()) { box_out.addDataFromShip(readMem((int)addr)); count--; addr += stride; } } } else if (box_inAddrRead.dataReadyForShip()) { addr = box_inAddrRead.removeDataForShip(); stride = 0; count = 1; writing = false; } else if (box_inAddrWrite.dataReadyForShip()) { addr = box_inAddrWrite.peekPacketForShip().value; box_inAddrWrite.removeDataForShip(); stride = 0; count = 1; writing = true; } } == FleetSim ============================================================== == FPGA ============================================================== `include "macros.v" `define BRAM_ADDR_WIDTH 14 `define BRAM_DATA_WIDTH `INSTRUCTION_WIDTH `define BRAM_NAME some_bram /* bram.inc */ module `BRAM_NAME(clk, we, a, dpra, di, spo, dpo); input clk; input we; input [(`BRAM_ADDR_WIDTH-1):0] a; input [(`BRAM_ADDR_WIDTH-1):0] dpra; input [(`BRAM_DATA_WIDTH-1):0] di; output [(`BRAM_DATA_WIDTH-1):0] spo; output [(`BRAM_DATA_WIDTH-1):0] dpo; reg [(`BRAM_DATA_WIDTH-1):0] ram [((1<<(`BRAM_ADDR_WIDTH))-1):0]; reg [(`BRAM_ADDR_WIDTH-1):0] read_a; reg [(`BRAM_ADDR_WIDTH-1):0] read_dpra; always @(posedge clk) begin if (we) ram[a] <= di; read_a <= a; read_dpra <= dpra; end assign spo = ram[read_a]; assign dpo = ram[read_dpra]; endmodule /* bram.inc */ module memory (clk, cbd_r, cbd_a_, cbd_d, in_addr_r, in_addr_a_, in_addr_d, write_addr_r, write_addr_a_, write_addr_d, write_data_r, write_data_a_, write_data_d, stride_r, stride_a_, stride_d, count_r, count_a_, count_d, out_r_, out_a, out_d_, preload_r, preload_a_, preload_d, ihorn_r_, ihorn_a, ihorn_d_, dhorn_r_, dhorn_a, dhorn_d_ ); input clk; `input(in_addr_r, in_addr_a, in_addr_a_, [(2+`DATAWIDTH-1):0], in_addr_d) `input(write_addr_r, write_addr_a, write_addr_a_, [(2+`DATAWIDTH-1):0], write_addr_d) `input(write_data_r, write_data_a, write_data_a_, [(`DATAWIDTH-1):0], write_data_d) `input(stride_r, stride_a, stride_a_, [(`DATAWIDTH-1):0], stride_d) `input(count_r, count_a, count_a_, [(`DATAWIDTH-1):0], count_d) `output(out_r, out_r_, out_a, [(`DATAWIDTH-1):0], out_d_) `input(preload_r, preload_a, preload_a_, [(`DATAWIDTH-1):0], preload_d) `input(cbd_r, cbd_a, cbd_a_, [(`DATAWIDTH-1):0], cbd_d) `output(ihorn_r, ihorn_r_, ihorn_a, [(`PACKET_WIDTH-1):0], ihorn_d_) `defreg(ihorn_d_, [(`PACKET_WIDTH-1):0], ihorn_d) `output(dhorn_r, dhorn_r_, dhorn_a, [(`PACKET_WIDTH-1):0], dhorn_d_) `defreg(dhorn_d_, [(`PACKET_WIDTH-1):0], dhorn_d) reg ihorn_full; initial ihorn_full = 0; reg dhorn_full; initial dhorn_full = 0; reg command_valid; initial command_valid = 0; reg [(`BRAM_ADDR_WIDTH-1):0] preload_pos; reg [(`BRAM_ADDR_WIDTH-1):0] preload_size; initial preload_size = 0; reg [(`BRAM_ADDR_WIDTH-1):0] current_instruction_read_from; reg [(`BRAM_ADDR_WIDTH-1):0] temp_base; reg [(`CODEBAG_SIZE_BITS-1):0] temp_size; reg [(`BRAM_ADDR_WIDTH-1):0] cbd_base; reg [(`CODEBAG_SIZE_BITS-1):0] cbd_size; reg [(`CODEBAG_SIZE_BITS-1):0] cbd_pos; reg [(`INSTRUCTION_WIDTH-1):0] command; reg [(`BRAM_DATA_WIDTH-1):0] ram [((1<<(`BRAM_ADDR_WIDTH))-1):0]; reg send_done; reg send_read; reg [(`INSTRUCTION_WIDTH-(2+`DESTINATION_ADDRESS_BITS)):0] temp; reg [(`DATAWIDTH-1):0] data; reg write_flag; reg [(`BRAM_ADDR_WIDTH-1):0] in_addr; reg [(`BRAM_DATA_WIDTH-1):0] write_data; wire [(`BRAM_DATA_WIDTH-1):0] ramread; reg command_valid_read; initial command_valid_read = 0; reg launched; initial launched = 0; some_bram mybram(clk, write_flag, in_addr, current_instruction_read_from, write_data, not_connected, ramread); assign out_d_ = ramread; always @(posedge clk) begin write_flag <= 0; if (!in_addr_r && in_addr_a) in_addr_a = 0; if (!write_data_r && write_data_a) write_data_a = 0; if (!write_addr_r && write_addr_a) write_addr_a = 0; if (command_valid_read) begin command_valid_read <= 0; command_valid <= 1; end else if (send_done) begin `onwrite(out_r, out_a) send_done <= 0; end end else if (send_read) begin `onwrite(out_r, out_a) send_read <= 0; end end else if (in_addr_r) begin in_addr_a = 1; send_read <= 1; current_instruction_read_from <= in_addr_d[(`DATAWIDTH-1):0]; end else if (write_addr_r && write_data_r) begin write_addr_a = 1; write_data_a = 1; send_done <= 1; write_flag <= 1; in_addr <= write_addr_d[(`DATAWIDTH-1):0]; write_data <= write_data_d; end else if (ihorn_full && launched) begin `onwrite(ihorn_r, ihorn_a) ihorn_full <= 0; end end else if (dhorn_full) begin `onwrite(dhorn_r, dhorn_a) dhorn_full <= 0; end end else if (command_valid) begin command_valid <= 0; command = ramread; ihorn_full <= 1; `packet_data(ihorn_d) <= `instruction_data(command); `packet_dest(ihorn_d) <= `instruction_dest(command); end else if (cbd_pos < cbd_size) begin current_instruction_read_from <= cbd_base+cbd_pos; command_valid_read <= 1; cbd_pos <= cbd_pos + 1; end else begin `onread(cbd_r, cbd_a) cbd_pos <= 0; cbd_size <= cbd_d[(`CODEBAG_SIZE_BITS-1):0]; cbd_base <= cbd_d[(`INSTRUCTION_WIDTH-1):(`CODEBAG_SIZE_BITS)]; end else begin `onread(preload_r, preload_a) if (preload_size == 0) begin preload_size <= preload_d; end else if (!launched) begin write_flag <= 1; write_data <= preload_d; in_addr <= preload_pos; if (preload_pos == 0) begin temp_base = preload_d[(`INSTRUCTION_WIDTH-(3+`DESTINATION_ADDRESS_BITS)):(`CODEBAG_SIZE_BITS)]; temp_size = preload_d[(`CODEBAG_SIZE_BITS-1):0]; end if ((preload_pos+1) == preload_size) begin cbd_pos <= 0; cbd_base <= temp_base; cbd_size <= temp_size; launched <= 1; end preload_pos <= preload_pos + 1; end end end end end endmodule == Test ============================================================== // expected output #expect 12 #expect 13 #expect 14 // ships required in order to run this code #ship debug : Debug #ship memory : Memory // instructions not in any codebag are part of the "root codebag" // which is dispatched when the code is loaded memory.inCBD: literal BOB; deliver; BOB: { debug.in: literal 12; deliver; literal 13; deliver; literal 14; deliver; } == Constants ======================================================== == Contributors ========================================================= Adam Megacz