X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ships%2FMemory.ship;h=f49d3dd90c57d20f031940b569a9e1d83e48c9cd;hb=f109c425b886206b95a5c95e726d742c0e9fe09d;hp=b330ecb2016033f6fd96262941483ab81d6255d3;hpb=231b1594e84c2a647dce1bad91b80f0a18f77620;p=fleet.git diff --git a/ships/Memory.ship b/ships/Memory.ship index b330ecb..f49d3dd 100644 --- a/ships/Memory.ship +++ b/ships/Memory.ship @@ -2,19 +2,101 @@ ship: Memory == Ports =========================================================== data in: inCBD -data in: inAddr.read -data in: inAddr.write -data in: inAddr.readMany -data in: inAddr.writeMany -data in: inData -data in: inStride -data in: inCount +data in: inAddrRead +data in: inAddrWrite +data in: inDataWrite data out: out +== TeX ============================================================== + +The {\tt Memory} ship represents an interface to a storage space, +which can be used to read from it or write to it. This storage space +might be a fast on-chip cache, off chip DRAM, or perhaps even a disk +drive. + +Generally, distinct {\tt Memory} ships do not access the same backing +storage, although this is not strictly prohibited. + +Each {\tt Memory} ship may have multiple {\it interfaces}, numbered +starting with {\tt 0}. Each interface may have any subset of the +following docks: {\tt inCBD}, {\tt inAddrRead}, {\tt inAddrWrite}, +{\tt inDataWrite}, and {\tt out}. If {\tt inCBD} or {\tt inAddrRead} +is present on an interface, then {\tt out} must be present as well. +If {\tt inAddrWrite} is present then {\tt inDataWrite} must be present +as well. + +Each interface serializes the operations presented to it; this means +that an interface with both read and write capabilities will not be +able to read and write concurrently. Instead, a {\tt Memory} ship +with the ability to read and write concurrently should have two +interfaces, one which is read-only and one which is write-only. + +There may be multiple {\tt Memory} ships which interface to the same +physical storage space. An implementation of Fleet must provide +additional documentation to the programmer indicating which {\tt +Memory} ships correspond to which storage spaces. A single {\tt +Memory} ship may also access a ``virtual storage space'' formed by +concatenating multiple physical storage spaces. + +\subsection*{Code Bag Fetch} + +When a word appears at the {\tt inCBD} port, it is treated as a {\it +code bag descriptor}, as shown below: + +\begin{center} +\setlength{\bitwidth}{3mm} +{\tt +\begin{bytefield}{37} + \bitheader[b]{36,6,5,0}\\ + \bitbox{31}{Address} + \bitbox{6}{size} +\end{bytefield} +} +\end{center} + +When a word arrives at the {\tt inCBD} port, it is treated as a memory +read with {\tt inAddrRead=Address}, {\tt inStride=1}, and {\tt +inCount=size}. + +\subsection*{Reading} + +When a word is delivered to {\tt inAddrRead}, the word residing in +memory at that address is provided at {\tt out}. The {\tt c-flag} at +the {\tt out} port is set to zero. + +\subsection*{Writing} + +When a word is delivered to {\tt inAddrWrite} and {\tt inDataWrite}, +the word at {\tt inDataWrite} is written to the address specified by +{\tt inAddrWrite}. Once the word is successfully committed to memory, +the value {\tt inAddr+inStride} is provided at {\tt out} (that is, the +address of the next word to be written). The {\tt c-flag} at +the {\tt out} port is set to one. + +\subsection*{To Do} + +Stride and count are not implemented. + +We need a way to do an ``unordered fetch'' -- a way to tell the memory +unit to retrieve some block of words in any order it likes. This can +considerably accelerate fetches when the first word of the region is +not cached, but other parts are cached. This can also be used for +dispatching codebags efficiently -- but how will we make sure that +instructions destined for a given pump are dispatched in the correct +order (source sequence guarantee)? + +A more advanced form would be ``unordered fetch of ordered records'' +-- the ability to specify a record size (in words), the offset of the +first record, and the number of records to be fetched. The memory +unit would then fetch the records in any order it likes, but would be +sure to return the words comprising a record in the order in which +they appear in memory. This feature could be used to solve the source +sequence guarantee problem mentioned in the previous paragraph. + == Fleeterpreter ==================================================== private long[] mem = new long[0]; - public long readMem(int addr) { return mem[addr]; } + public long readMem(int addr) { return addr >= mem.length ? 0 : mem[addr]; } public void writeMem(int addr, long val) { if (addr >= mem.length) { long[] newmem = new long[addr * 2 + 1]; @@ -24,312 +106,171 @@ data out: out mem[addr] = val; } - public void dispatch(int addr, int size) { - for(int i=addr; i> 6); - base = base & ~(0xffffffff << 18); - int size = (int)launch; - size = size & ~(0xffffffff << 6); - dispatch(base, size); - } - private long stride = 0; private long count = 0; private long addr = 0; private boolean writing = false; + private Queue toDispatch = new LinkedList(); public void service() { - if (box_inCBD.dataReadyForShip()) { + + if (toDispatch.size() > 0) { + //if (!box_out.readyForDataFromShip()) return; + //box_out.addDataFromShip(toDispatch.remove()); + getInterpreter().dispatch(getInterpreter().readInstruction(toDispatch.remove(), getDock("out"))); + } + + if (box_inCBD.dataReadyForShip() && box_out.readyForDataFromShip()) { long val = box_inCBD.removeDataForShip(); long addr = val >> 6; long size = val & 0x3f; - dispatch((int)addr, (int)size); + for(int i=0; i 0 && writing) { - if (box_inData.dataReadyForShip() && box_out.readyForDataFromShip()) { - writeMem((int)addr, box_inData.removeDataForShip()); - box_out.addDataFromShip(0); - count--; - addr += stride; - } - - } else if (count > 0 && !writing) { - if (box_out.readyForDataFromShip()) { - box_out.addDataFromShip(readMem((int)addr)); - count--; - addr += stride; + if (count > 0) { + if (writing) { + if (box_inDataWrite.dataReadyForShip() && box_out.readyForDataFromShip()) { + writeMem((int)addr, box_inDataWrite.removeDataForShip()); + box_out.addDataFromShip(0); + count--; + addr += stride; + } + } else { + if (box_out.readyForDataFromShip()) { + box_out.addDataFromShip(readMem((int)addr)); + count--; + addr += stride; + } } - } else if (box_inAddr.dataReadyForShip() && box_out.readyForDataFromShip()) { - Packet packet = box_inAddr.peekPacketForShip(); - if (packet.destination.getDestinationName().equals("read")) { - box_out.addDataFromShip(readMem((int)box_inAddr.removeDataForShip())); - } else if (packet.destination.getDestinationName().equals("write") && box_inData.dataReadyForShip()) { - writeMem((int)box_inAddr.removeDataForShip(), - box_inData.removeDataForShip()); - box_out.addDataFromShip(0); - } else if (packet.destination.getDestinationName().equals("writeMany") - && box_inStride.dataReadyForShip() - && box_inCount.dataReadyForShip()) { - addr = box_inAddr.removeDataForShip(); - stride = box_inStride.removeDataForShip(); - count = box_inCount.removeDataForShip(); - writing = true; - } else if (packet.destination.getDestinationName().equals("readMany") - && box_inStride.dataReadyForShip() - && box_inCount.dataReadyForShip()) { - addr = box_inAddr.removeDataForShip(); - stride = box_inStride.removeDataForShip(); - count = box_inCount.removeDataForShip(); - writing = false; - } + } else if (box_inAddrRead.dataReadyForShip()) { + addr = box_inAddrRead.removeDataForShip(); + stride = 0; + count = 1; + writing = false; + + } else if (box_inAddrWrite.dataReadyForShip()) { + addr = box_inAddrWrite.removeDataForShip(); + stride = 0; + count = 1; + writing = true; } } == FleetSim ============================================================== == FPGA ============================================================== -`include "macros.v" -`define BRAM_ADDR_WIDTH 14 -`define BRAM_DATA_WIDTH `INSTRUCTION_WIDTH -`define BRAM_NAME some_bram -`include "bram.inc" - -module memory (clk, - cbd_r, cbd_a_, cbd_d, - in_addr_r, in_addr_a_, in_addr_d, - write_data_r, write_data_a_, write_data_d, - stride_r, stride_a_, stride_d, - count_r, count_a_, count_d, - out_r_, out_a, out_d_, - preload_r, preload_a_, preload_d, - ihorn_r_, ihorn_a, ihorn_d_, - dhorn_r_, dhorn_a, dhorn_d_ - ); - - input clk; - `input(in_addr_r, in_addr_a, in_addr_a_, [(2+`DATAWIDTH-1):0], in_addr_d) - `input(write_data_r, write_data_a, write_data_a_, [(`DATAWIDTH-1):0], write_data_d) - `input(stride_r, stride_a, stride_a_, [(`DATAWIDTH-1):0], stride_d) - `input(count_r, count_a, count_a_, [(`DATAWIDTH-1):0], count_d) - `output(out_r, out_r_, out_a, [(`DATAWIDTH-1):0], out_d_) - //`defreg(out_d_, [(`DATAWIDTH-1):0], out_d) - - `input(preload_r, preload_a, preload_a_, [(`DATAWIDTH-1):0], preload_d) - `input(cbd_r, cbd_a, cbd_a_, [(`DATAWIDTH-1):0], cbd_d) - `output(ihorn_r, ihorn_r_, ihorn_a, [(`INSTRUCTION_WIDTH-1):0], ihorn_d_) - `defreg(ihorn_d_, [(`INSTRUCTION_WIDTH-1):0], ihorn_d) - `output(dhorn_r, dhorn_r_, dhorn_a, [(`PACKET_WIDTH-1):0], dhorn_d_) - `defreg(dhorn_d_, [(`PACKET_WIDTH-1):0], dhorn_d) - - reg ihorn_full; - initial ihorn_full = 0; - reg dhorn_full; - initial dhorn_full = 0; - reg command_valid; - initial command_valid = 0; - - reg [(`BRAM_ADDR_WIDTH-1):0] preload_pos; - reg [(`BRAM_ADDR_WIDTH-1):0] preload_size; - initial preload_size = 0; - - reg [(`BRAM_ADDR_WIDTH-1):0] current_instruction_read_from; - reg [(`BRAM_ADDR_WIDTH-1):0] temp_base; - reg [(`CODEBAG_SIZE_BITS-1):0] temp_size; - reg [(`BRAM_ADDR_WIDTH-1):0] cbd_base; - reg [(`CODEBAG_SIZE_BITS-1):0] cbd_size; - reg [(`CODEBAG_SIZE_BITS-1):0] cbd_pos; - reg [(`INSTRUCTION_WIDTH-1):0] command; - reg [(`BRAM_DATA_WIDTH-1):0] ram [((1<<(`BRAM_ADDR_WIDTH))-1):0]; - reg send_done; - reg send_read; - - reg [(`INSTRUCTION_WIDTH-(2+`DESTINATION_ADDRESS_BITS)):0] temp; - reg [(`DATAWIDTH-1):0] data; - - reg write_flag; - reg [(`BRAM_ADDR_WIDTH-1):0] in_addr; - reg [(`BRAM_DATA_WIDTH-1):0] write_data; - wire [(`BRAM_DATA_WIDTH-1):0] ramread; + `define BRAM_ADDR_WIDTH 14 + `define BRAM_SIZE (1<<(`BRAM_ADDR_WIDTH)) - reg command_valid_read; - initial command_valid_read = 0; + reg [(`WORDWIDTH-1):0] ram [((`BRAM_SIZE)-1):0]; + reg [(`BRAM_ADDR_WIDTH-1):0] addr1; + reg [(`BRAM_ADDR_WIDTH-1):0] addr2; + reg [(`WORDWIDTH-1):0] out1; + reg [(`WORDWIDTH-1):0] out2; - reg launched; - initial launched = 0; + reg out_w; + reg write_flag; + reg [(`BRAM_ADDR_WIDTH-1):0] cursor; + reg [(`CODEBAG_SIZE_BITS-1):0] counter; - some_bram mybram(clk, write_flag, in_addr, current_instruction_read_from, write_data, not_connected, ramread); - assign out_d_ = ramread; + assign out_d_ = { out_w, out1 }; + // I use "blocking assignment" here in order to facilitate BRAM inferencea always @(posedge clk) begin + write_flag = 0; - write_flag <= 0; - - if (!in_addr_r && in_addr_a) in_addr_a = 0; - if (!write_data_r && write_data_a) write_data_a = 0; - - if (command_valid_read) begin - command_valid_read <= 0; - command_valid <= 1; - - end else if (send_done) begin - `onwrite(out_r, out_a) - send_done <= 0; - end - - end else if (send_read) begin - `onwrite(out_r, out_a) - send_read <= 0; - end - - end else if (in_addr_r && !in_addr_d[`DATAWIDTH]) begin - in_addr_a = 1; - send_read <= 1; - current_instruction_read_from <= in_addr_d[(`DATAWIDTH-1):0]; - - end else if (in_addr_r && in_addr_d[`DATAWIDTH] && write_data_r) begin - in_addr_a = 1; - write_data_a = 1; - send_done <= 1; - write_flag <= 1; - in_addr <= in_addr_d[(`DATAWIDTH-1):0]; - write_data <= write_data_d; - - end else if (ihorn_full && launched) begin - `onwrite(ihorn_r, ihorn_a) - ihorn_full <= 0; - end - - end else if (dhorn_full) begin - `onwrite(dhorn_r, dhorn_a) - dhorn_full <= 0; - end - - end else if (command_valid) begin - command_valid <= 0; - command = ramread; - case (command[(`INSTRUCTION_WIDTH-1):(`INSTRUCTION_WIDTH-2)]) - 0: begin - ihorn_full <= 1; - ihorn_d <= command; - end - 1: begin - dhorn_full <= 1; - temp = command[(`INSTRUCTION_WIDTH-(2+`DESTINATION_ADDRESS_BITS)):0]; - temp = temp + ( { current_instruction_read_from, {(`CODEBAG_SIZE_BITS){1'b0}} }); - data[(`DATAWIDTH-1):(`CODEBAG_SIZE_BITS)] = temp; - data[(`CODEBAG_SIZE_BITS-1):0] = command[(`CODEBAG_SIZE_BITS-1):0]; - `packet_data(dhorn_d) <= temp; - `packet_dest(dhorn_d) <= - command[(`INSTRUCTION_WIDTH-3):(`INSTRUCTION_WIDTH-(3+`DESTINATION_ADDRESS_BITS)+1)]; - end - 2: begin - dhorn_full <= 1; - `packet_data(dhorn_d) <= { {(`DATAWIDTH-24){command[23]}}, command[23:0] }; - `packet_dest(dhorn_d) <= command[34:24]; - end - 3: begin - dhorn_full <= 1; - `packet_data(dhorn_d) <= { {(`DATAWIDTH-24){command[23]}}, command[23:0] } + current_instruction_read_from; - `packet_dest(dhorn_d) <= command[34:24]; - end - endcase - - end else if (cbd_pos < cbd_size) begin - current_instruction_read_from <= cbd_base+cbd_pos; - command_valid_read <= 1; - cbd_pos <= cbd_pos + 1; - + if (!rst) begin + `reset + cursor = 0; + counter = 0; end else begin - `onread(cbd_r, cbd_a) - cbd_pos <= 0; - cbd_size <= cbd_d[(`CODEBAG_SIZE_BITS-1):0]; - cbd_base <= cbd_d[(`INSTRUCTION_WIDTH-1):(`CODEBAG_SIZE_BITS)]; - - end else begin - `onread(preload_r, preload_a) - if (preload_size == 0) begin - preload_size <= preload_d; - end else if (!launched) begin - write_flag <= 1; - write_data <= preload_d; - in_addr <= preload_pos; - if (preload_pos == 0) begin - temp_base = preload_d[(`INSTRUCTION_WIDTH-(3+`DESTINATION_ADDRESS_BITS)):(`CODEBAG_SIZE_BITS)]; - temp_size = preload_d[(`CODEBAG_SIZE_BITS-1):0]; - end - if ((preload_pos+1) == preload_size) begin - cbd_pos <= 0; - cbd_base <= temp_base; - cbd_size <= temp_size; - launched <= 1; - end - preload_pos <= preload_pos + 1; - end + `flush + `cleanup + + if (counter!=0) begin + if (`out_empty) begin + `fill_out + out_w = 0; + addr1 = cursor; + cursor = cursor + 1; + counter = counter - 1; end + + end else if (`inCBD_full) begin + cursor = inCBD_d[(`WORDWIDTH-1):(`CODEBAG_SIZE_BITS)]; + counter = inCBD_d[(`CODEBAG_SIZE_BITS-1):0]; + addr1 = cursor; + `drain_inCBD + + end else if (`out_empty && `inAddrRead_full) begin + addr1 = inAddrRead_d[(`WORDWIDTH-1):0]; + `drain_inAddrRead + `fill_out + out_w = 0; + + end else if (`out_empty && `inAddrWrite_full && `inDataWrite_full) begin + write_flag = 1; + `drain_inAddrWrite + `drain_inDataWrite + `fill_out + addr2 = inAddrWrite_d[(`WORDWIDTH-1):0]; + out_w = 1; + end end - end -endmodule - + // this must appear at the end of the block, outside of any if..then's + if (write_flag) + ram[addr2] <= inDataWrite_d; + out1 <= ram[addr1]; + out2 <= ram[addr2]; + end + == Test ============================================================== +// Note: this only tests the read/write interfaces, not the inCBD interface +// FIXME: test c-flag at out dock + // expected output -#expect 12 -#expect 13 -#expect 14 +#expect 10 // ships required in order to run this code #ship debug : Debug #ship memory : Memory -// instructions not in any codebag are part of the "root codebag" -// which is dispatched when the code is loaded +memory.inAddrWrite: + set word=3; + deliver; + deliver; -BOB: sendto memory.inCBD; -memory.inCBD: [*] take, deliver; -debug.in: [*] take, deliver; +memory.inDataWrite: + set word=4; + deliver; + set word=10; + deliver; +memory.inAddrRead: + recv token; + set word=3; + deliver; -// This codebag illustrates how to do a loop. Notice that this -// is actually an uncontrolled data emitter -- it could clog the -// switch fabric! +memory.out: + collect; + collect; + send token to memory.inAddrRead; + collect; + send to debug.in; -BOB: { - 12: sendto debug.in; - 13: sendto debug.in; - 14: sendto debug.in; -} +debug.in: + set ilc=*; + recv, deliver; == Constants ======================================================== -== TeX ============================================================== -\begin{verbatim} -TODO: count/stride -TODO: multiple interfaces to a single memory -\end{verbatim} == Contributors ========================================================= Adam Megacz