== Ports ===========================================================
data in: inCBD
-data in: inAddr.read
-data in: inAddr.write
-data in: inAddr.readMany
-data in: inAddr.writeMany
-data in: inData
-data in: inStride
-data in: inCount
+data in: inAddrRead
+data in: inAddrWrite
+data in: inDataWrite
data out: out
+== TeX ==============================================================
+
+The {\tt Memory} ship represents an interface to a storage space,
+which can be used to read from it or write to it. This storage space
+might be a fast on-chip cache, off chip DRAM, or perhaps even a disk
+drive.
+
+Generally, distinct {\tt Memory} ships do not access the same backing
+storage, although this is not strictly prohibited.
+
+Each {\tt Memory} ship may have multiple {\it interfaces}, numbered
+starting with {\tt 0}. Each interface may have any subset of the
+following docks: {\tt inCBD}, {\tt inAddrRead}, {\tt inAddrWrite},
+{\tt inDataWrite}, and {\tt out}. If {\tt inCBD} or {\tt inAddrRead}
+is present on an interface, then {\tt out} must be present as well.
+If {\tt inAddrWrite} is present then {\tt inDataWrite} must be present
+as well.
+
+Each interface serializes the operations presented to it; this means
+that an interface with both read and write capabilities will not be
+able to read and write concurrently. Instead, a {\tt Memory} ship
+with the ability to read and write concurrently should have two
+interfaces, one which is read-only and one which is write-only.
+
+There may be multiple {\tt Memory} ships which interface to the same
+physical storage space. An implementation of Fleet must provide
+additional documentation to the programmer indicating which {\tt
+Memory} ships correspond to which storage spaces. A single {\tt
+Memory} ship may also access a ``virtual storage space'' formed by
+concatenating multiple physical storage spaces.
+
+\subsection*{Code Bag Fetch}
+
+When a word appears at the {\tt inCBD} port, it is treated as a {\it
+code bag descriptor}, as shown below:
+
+\begin{center}
+\setlength{\bitwidth}{3mm}
+{\tt
+\begin{bytefield}{37}
+ \bitheader[b]{36,6,5,0}\\
+ \bitbox{31}{Address}
+ \bitbox{6}{size}
+\end{bytefield}
+}
+\end{center}
+
+When a word arrives at the {\tt inCBD} port, it is treated as a memory
+read with {\tt inAddrRead=Address}, {\tt inStride=1}, and {\tt
+inCount=size}.
+
+\subsection*{Reading}
+
+When a word is delivered to {\tt inAddrRead}, the word residing in
+memory at that address is provided at {\tt out}. The {\tt c-flag} at
+the {\tt out} port is set to zero.
+
+\subsection*{Writing}
+
+When a word is delivered to {\tt inAddrWrite} and {\tt inDataWrite},
+the word at {\tt inDataWrite} is written to the address specified by
+{\tt inAddrWrite}. Once the word is successfully committed to memory,
+the value {\tt inAddr+inStride} is provided at {\tt out} (that is, the
+address of the next word to be written). The {\tt c-flag} at
+the {\tt out} port is set to one.
+
+\subsection*{To Do}
+
+Stride and count are not implemented.
+
+We need a way to do an ``unordered fetch'' -- a way to tell the memory
+unit to retrieve some block of words in any order it likes. This can
+considerably accelerate fetches when the first word of the region is
+not cached, but other parts are cached. This can also be used for
+dispatching codebags efficiently -- but how will we make sure that
+instructions destined for a given pump are dispatched in the correct
+order (source sequence guarantee)?
+
+A more advanced form would be ``unordered fetch of ordered records''
+-- the ability to specify a record size (in words), the offset of the
+first record, and the number of records to be fetched. The memory
+unit would then fetch the records in any order it likes, but would be
+sure to return the words comprising a record in the order in which
+they appear in memory. This feature could be used to solve the source
+sequence guarantee problem mentioned in the previous paragraph.
+
== Fleeterpreter ====================================================
private long[] mem = new long[0];
- public long readMem(int addr) { return mem[addr]; }
+ public long readMem(int addr) { return addr >= mem.length ? 0 : mem[addr]; }
public void writeMem(int addr, long val) {
if (addr >= mem.length) {
long[] newmem = new long[addr * 2 + 1];
}
mem[addr] = val;
}
-
- public void dispatch(int addr, int size) {
- for(int i=addr; i<addr+size; i++) {
- Instruction instr = ((Interpreter)getFleet()).readInstruction(readMem(i));
- ((Interpreter)getFleet()).dispatch(instr, i);
- }
+ private Queue<Long> toDispatch = new LinkedList<Long>();
+ public void reset() {
+ super.reset();
+ mem = new long[0];
+ toDispatch.clear();
}
-
- public void boot(byte[] instructions) {
- Interpreter fleet = (Interpreter)getFleet();
- // load the iscratch and take note of the 0-address INCBD
- long launch = 0;
- for(int i=0; i<instructions.length; i+=6) {
- long word = 0;
- for(int j=0; j<6; j++)
- word = (word << 8) | (instructions[i+j] & 0xff);
- writeMem(i/6, word);
- if (i==0) launch = word;
- }
-
- // dispatch the 0-address INCBD
- int base = (int)(launch >> 6);
- base = base & ~(0xffffffff << 18);
- int size = (int)launch;
- size = size & ~(0xffffffff << 6);
- dispatch(base, size);
- }
-
- private long stride = 0;
- private long count = 0;
- private long addr = 0;
- private boolean writing = false;
-
public void service() {
+ if (toDispatch.size() > 0) {
+ if (!box_out.readyForDataFromShip()) return;
+ box_out.addDataFromShip(toDispatch.remove());
+ }
if (box_inCBD.dataReadyForShip()) {
long val = box_inCBD.removeDataForShip();
- long addr = val >> 6;
- long size = val & 0x3f;
- dispatch((int)addr, (int)size);
- }
- if (count > 0 && writing) {
- if (box_inData.dataReadyForShip() && box_out.readyForDataFromShip()) {
- writeMem((int)addr, box_inData.removeDataForShip());
- box_out.addDataFromShip(0);
- count--;
- addr += stride;
- }
-
- } else if (count > 0 && !writing) {
- if (box_out.readyForDataFromShip()) {
- box_out.addDataFromShip(readMem((int)addr));
- count--;
- addr += stride;
- }
-
- } else if (box_inAddr.dataReadyForShip() && box_out.readyForDataFromShip()) {
- Packet packet = box_inAddr.peekPacketForShip();
- if (packet.destination.getDestinationName().equals("read")) {
- box_out.addDataFromShip(readMem((int)box_inAddr.removeDataForShip()));
- } else if (packet.destination.getDestinationName().equals("write") && box_inData.dataReadyForShip()) {
- writeMem((int)box_inAddr.removeDataForShip(),
- box_inData.removeDataForShip());
- box_out.addDataFromShip(0);
- } else if (packet.destination.getDestinationName().equals("writeMany")
- && box_inStride.dataReadyForShip()
- && box_inCount.dataReadyForShip()) {
- addr = box_inAddr.removeDataForShip();
- stride = box_inStride.removeDataForShip();
- count = box_inCount.removeDataForShip();
- writing = true;
- } else if (packet.destination.getDestinationName().equals("readMany")
- && box_inStride.dataReadyForShip()
- && box_inCount.dataReadyForShip()) {
- addr = box_inAddr.removeDataForShip();
- stride = box_inStride.removeDataForShip();
- count = box_inCount.removeDataForShip();
- writing = false;
- }
+ long addr = ((Interpreter)getFleet()).CBD_OFFSET.getval(val);
+ long size = ((Interpreter)getFleet()).CBD_SIZE.getval(val);
+ for(int i=0; i<size; i++)
+ toDispatch.add(readMem((int)(addr+i)));
+ } else if (box_inAddrWrite.dataReadyForShip() && box_inDataWrite.dataReadyForShip() && box_out.readyForDataFromShip()) {
+ writeMem((int)box_inAddrWrite.removeDataForShip(), box_inDataWrite.removeDataForShip());
+ box_out.addDataFromShip(0,true);
+ } else if (box_inAddrRead.dataReadyForShip() && box_out.readyForDataFromShip()) {
+ box_out.addDataFromShip(readMem((int)box_inAddrRead.removeDataForShip()),false);
}
}
== FleetSim ==============================================================
== FPGA ==============================================================
-`include "macros.v"
-`define BRAM_ADDR_WIDTH 14
-`define BRAM_DATA_WIDTH `INSTRUCTION_WIDTH
-`define BRAM_NAME some_bram
-
-/* bram.inc */
-module `BRAM_NAME(clk, we, a, dpra, di, spo, dpo);
- input clk;
- input we;
- input [(`BRAM_ADDR_WIDTH-1):0] a;
- input [(`BRAM_ADDR_WIDTH-1):0] dpra;
- input [(`BRAM_DATA_WIDTH-1):0] di;
- output [(`BRAM_DATA_WIDTH-1):0] spo;
- output [(`BRAM_DATA_WIDTH-1):0] dpo;
- reg [(`BRAM_DATA_WIDTH-1):0] ram [((1<<(`BRAM_ADDR_WIDTH))-1):0];
- reg [(`BRAM_ADDR_WIDTH-1):0] read_a;
- reg [(`BRAM_ADDR_WIDTH-1):0] read_dpra;
- always @(posedge clk) begin
- if (we)
- ram[a] <= di;
- read_a <= a;
- read_dpra <= dpra;
- end
- assign spo = ram[read_a];
- assign dpo = ram[read_dpra];
-endmodule
-/* bram.inc */
-
-module memory (clk,
- cbd_r, cbd_a_, cbd_d,
- in_addr_r, in_addr_a_, in_addr_d,
- write_data_r, write_data_a_, write_data_d,
- stride_r, stride_a_, stride_d,
- count_r, count_a_, count_d,
- out_r_, out_a, out_d_,
- preload_r, preload_a_, preload_d,
- ihorn_r_, ihorn_a, ihorn_d_,
- dhorn_r_, dhorn_a, dhorn_d_
- );
-
- input clk;
- `input(in_addr_r, in_addr_a, in_addr_a_, [(2+`DATAWIDTH-1):0], in_addr_d)
- `input(write_data_r, write_data_a, write_data_a_, [(`DATAWIDTH-1):0], write_data_d)
- `input(stride_r, stride_a, stride_a_, [(`DATAWIDTH-1):0], stride_d)
- `input(count_r, count_a, count_a_, [(`DATAWIDTH-1):0], count_d)
- `output(out_r, out_r_, out_a, [(`DATAWIDTH-1):0], out_d_)
- //`defreg(out_d_, [(`DATAWIDTH-1):0], out_d)
-
- `input(preload_r, preload_a, preload_a_, [(`DATAWIDTH-1):0], preload_d)
- `input(cbd_r, cbd_a, cbd_a_, [(`DATAWIDTH-1):0], cbd_d)
- `output(ihorn_r, ihorn_r_, ihorn_a, [(`INSTRUCTION_WIDTH-1):0], ihorn_d_)
- `defreg(ihorn_d_, [(`INSTRUCTION_WIDTH-1):0], ihorn_d)
- `output(dhorn_r, dhorn_r_, dhorn_a, [(`PACKET_WIDTH-1):0], dhorn_d_)
- `defreg(dhorn_d_, [(`PACKET_WIDTH-1):0], dhorn_d)
-
- reg ihorn_full;
- initial ihorn_full = 0;
- reg dhorn_full;
- initial dhorn_full = 0;
- reg command_valid;
- initial command_valid = 0;
-
- reg [(`BRAM_ADDR_WIDTH-1):0] preload_pos;
- reg [(`BRAM_ADDR_WIDTH-1):0] preload_size;
- initial preload_size = 0;
-
- reg [(`BRAM_ADDR_WIDTH-1):0] current_instruction_read_from;
- reg [(`BRAM_ADDR_WIDTH-1):0] temp_base;
- reg [(`CODEBAG_SIZE_BITS-1):0] temp_size;
- reg [(`BRAM_ADDR_WIDTH-1):0] cbd_base;
- reg [(`CODEBAG_SIZE_BITS-1):0] cbd_size;
- reg [(`CODEBAG_SIZE_BITS-1):0] cbd_pos;
- reg [(`INSTRUCTION_WIDTH-1):0] command;
- reg [(`BRAM_DATA_WIDTH-1):0] ram [((1<<(`BRAM_ADDR_WIDTH))-1):0];
- reg send_done;
- reg send_read;
-
- reg [(`INSTRUCTION_WIDTH-(2+`DESTINATION_ADDRESS_BITS)):0] temp;
- reg [(`DATAWIDTH-1):0] data;
-
- reg write_flag;
- reg [(`BRAM_ADDR_WIDTH-1):0] in_addr;
- reg [(`BRAM_DATA_WIDTH-1):0] write_data;
- wire [(`BRAM_DATA_WIDTH-1):0] ramread;
+ `define BRAM_ADDR_WIDTH 14
+ `define BRAM_SIZE (1<<(`BRAM_ADDR_WIDTH))
- reg command_valid_read;
- initial command_valid_read = 0;
+ reg [(`WORDWIDTH-1):0] ram [((`BRAM_SIZE)-1):0];
+ reg [(`BRAM_ADDR_WIDTH-1):0] addr1;
+ reg [(`BRAM_ADDR_WIDTH-1):0] addr2;
+ reg [(`WORDWIDTH-1):0] out1;
+ reg [(`WORDWIDTH-1):0] out2;
- reg launched;
- initial launched = 0;
+ reg out_w;
+ reg write_flag;
+ reg [(`BRAM_ADDR_WIDTH-1):0] cursor;
+ reg [(`CODEBAG_SIZE_BITS-1):0] counter;
- some_bram mybram(clk, write_flag, in_addr, current_instruction_read_from, write_data, not_connected, ramread);
- assign out_d_ = ramread;
+ assign out_d_ = { out_w, out1 };
+ // I use "blocking assignment" here in order to facilitate BRAM inference
always @(posedge clk) begin
+ write_flag = 0;
- write_flag <= 0;
-
- if (!in_addr_r && in_addr_a) in_addr_a = 0;
- if (!write_data_r && write_data_a) write_data_a = 0;
-
- if (command_valid_read) begin
- command_valid_read <= 0;
- command_valid <= 1;
-
- end else if (send_done) begin
- `onwrite(out_r, out_a)
- send_done <= 0;
- end
-
- end else if (send_read) begin
- `onwrite(out_r, out_a)
- send_read <= 0;
- end
-
- end else if (in_addr_r && !in_addr_d[`DATAWIDTH]) begin
- in_addr_a = 1;
- send_read <= 1;
- current_instruction_read_from <= in_addr_d[(`DATAWIDTH-1):0];
-
- end else if (in_addr_r && in_addr_d[`DATAWIDTH] && write_data_r) begin
- in_addr_a = 1;
- write_data_a = 1;
- send_done <= 1;
- write_flag <= 1;
- in_addr <= in_addr_d[(`DATAWIDTH-1):0];
- write_data <= write_data_d;
-
- end else if (ihorn_full && launched) begin
- `onwrite(ihorn_r, ihorn_a)
- ihorn_full <= 0;
- end
-
- end else if (dhorn_full) begin
- `onwrite(dhorn_r, dhorn_a)
- dhorn_full <= 0;
- end
-
- end else if (command_valid) begin
- command_valid <= 0;
- command = ramread;
- case (command[(`INSTRUCTION_WIDTH-1):(`INSTRUCTION_WIDTH-2)])
- 0: begin
- ihorn_full <= 1;
- ihorn_d <= command;
- end
- 1: begin
- dhorn_full <= 1;
- temp = command[(`INSTRUCTION_WIDTH-(2+`DESTINATION_ADDRESS_BITS)):0];
- temp = temp + ( { current_instruction_read_from, {(`CODEBAG_SIZE_BITS){1'b0}} });
- data[(`DATAWIDTH-1):(`CODEBAG_SIZE_BITS)] = temp;
- data[(`CODEBAG_SIZE_BITS-1):0] = command[(`CODEBAG_SIZE_BITS-1):0];
- `packet_data(dhorn_d) <= temp;
- `packet_dest(dhorn_d) <=
- command[(`INSTRUCTION_WIDTH-3):(`INSTRUCTION_WIDTH-(3+`DESTINATION_ADDRESS_BITS)+1)];
- end
- 2: begin
- dhorn_full <= 1;
- `packet_data(dhorn_d) <= { {(`DATAWIDTH-24){command[23]}}, command[23:0] };
- `packet_dest(dhorn_d) <= command[34:24];
- end
- 3: begin
- dhorn_full <= 1;
- `packet_data(dhorn_d) <= { {(`DATAWIDTH-24){command[23]}}, command[23:0] } + current_instruction_read_from;
- `packet_dest(dhorn_d) <= command[34:24];
- end
- endcase
-
- end else if (cbd_pos < cbd_size) begin
- current_instruction_read_from <= cbd_base+cbd_pos;
- command_valid_read <= 1;
- cbd_pos <= cbd_pos + 1;
-
+ if (rst) begin
+ `reset
+ cursor = 0;
+ counter = 0;
end else begin
- `onread(cbd_r, cbd_a)
- cbd_pos <= 0;
- cbd_size <= cbd_d[(`CODEBAG_SIZE_BITS-1):0];
- cbd_base <= cbd_d[(`INSTRUCTION_WIDTH-1):(`CODEBAG_SIZE_BITS)];
-
- end else begin
- `onread(preload_r, preload_a)
- if (preload_size == 0) begin
- preload_size <= preload_d;
- end else if (!launched) begin
- write_flag <= 1;
- write_data <= preload_d;
- in_addr <= preload_pos;
- if (preload_pos == 0) begin
- temp_base = preload_d[(`INSTRUCTION_WIDTH-(3+`DESTINATION_ADDRESS_BITS)):(`CODEBAG_SIZE_BITS)];
- temp_size = preload_d[(`CODEBAG_SIZE_BITS-1):0];
- end
- if ((preload_pos+1) == preload_size) begin
- cbd_pos <= 0;
- cbd_base <= temp_base;
- cbd_size <= temp_size;
- launched <= 1;
- end
- preload_pos <= preload_pos + 1;
- end
+ `cleanup
+
+ if (counter!=0) begin
+ if (`out_empty) begin
+ `fill_out
+ out_w = 0;
+ addr1 = cursor;
+ cursor = cursor + 1;
+ counter = counter - 1;
end
+
+ end else if (`inCBD_full) begin
+ cursor = inCBD_d[(`WORDWIDTH-1):(`CODEBAG_SIZE_BITS)];
+ counter = inCBD_d[(`CODEBAG_SIZE_BITS-1):0];
+ addr1 = cursor;
+ `drain_inCBD
+
+ end else if (`out_empty && `inAddrRead_full) begin
+ addr1 = inAddrRead_d[(`WORDWIDTH-1):0];
+ `drain_inAddrRead
+ `fill_out
+ out_w = 0;
+
+ end else if (`out_empty && `inAddrWrite_full && `inDataWrite_full) begin
+ write_flag = 1;
+ `drain_inAddrWrite
+ `drain_inDataWrite
+ `fill_out
+ addr2 = inAddrWrite_d[(`WORDWIDTH-1):0];
+ out_w = 1;
+
end
end
- end
-endmodule
-
+ // this must appear at the end of the block, outside of any if..then's
+ if (write_flag)
+ ram[addr2] <= inDataWrite_d;
+ out1 <= ram[addr1];
+ out2 <= ram[addr2];
+ end
+
== Test ==============================================================
+// Note: this only tests the read/write interfaces, not the inCBD interface
+// FIXME: test c-flag at out dock
+
// expected output
-#expect 12
-#expect 13
-#expect 14
+#expect 10
// ships required in order to run this code
#ship debug : Debug
#ship memory : Memory
-// instructions not in any codebag are part of the "root codebag"
-// which is dispatched when the code is loaded
+memory.inAddrWrite:
+ set word=3;
+ deliver;
+ deliver;
-BOB: sendto memory.inCBD;
-memory.inCBD: [*] take, deliver;
-debug.in: [*] take, deliver;
+memory.inDataWrite:
+ set word=4;
+ deliver;
+ set word=10;
+ deliver;
+memory.inAddrRead:
+ recv token;
+ set word=3;
+ deliver;
-// This codebag illustrates how to do a loop. Notice that this
-// is actually an uncontrolled data emitter -- it could clog the
-// switch fabric!
+memory.out:
+ collect;
+ collect;
+ send token to memory.inAddrRead;
+ collect;
+ send to debug.in;
-BOB: {
- 12: sendto debug.in;
- 13: sendto debug.in;
- 14: sendto debug.in;
-}
+debug.in:
+ set ilc=*;
+ recv, deliver;
== Constants ========================================================
-== TeX ==============================================================
-\begin{verbatim}
-TODO: count/stride
-TODO: multiple interfaces to a single memory
-\end{verbatim}
== Contributors =========================================================
Adam Megacz <megacz@cs.berkeley.edu>