3 == Ports ===========================================================
13 == TeX ==============================================================
15 The {\tt Memory} ship represents an interface to a storage space,
16 which can be used to read from it or write to it. This storage space
17 might be a fast on-chip cache, off chip DRAM, or perhaps even a disk drive.
19 There may be multiple {\tt Memory} ships which interface to the same
20 physical storage space. An implementation of Fleet must provide
21 additional documentation to the programmer indicating which {\tt
22 Memory} ships correspond to which storage spaces. A single {\tt
23 Memory} ship may also access a ``virtual storage space'' formed by
24 concatenating multiple physical storage spaces.
26 \subsection*{Code Bag Fetch}
28 When a word appears at the {\tt inCBD} port, it is treated as a {\it
29 code bag descriptor}, as shown below:
32 \setlength{\bitwidth}{3mm}
35 \bitheader[b]{36,6,5,0}\\
42 When a word arrives at the {\tt inCBD} port, it is treated as a memory
43 read with {\tt inAddrRead=Address}, {\tt inStride=1}, and {\tt
48 When a word is delivered to {\tt inAddrRead}, the word residing in
49 memory at that address is provided at {\tt out}.
53 When a word is delivered to {\tt inAddrWrite} and {\tt inDataWrite},
54 the word at {\tt inDataWrite} is written to the address specified by
55 {\tt inAddrWrite}. Once the word is successfully committed to memory,
56 the value {\tt inAddr+inStride} is provided at {\tt out} (that is, the
57 address of the next word to be written).
61 Stride and count are not implemented.
63 We need a way to do an ``unordered fetch'' -- a way to tell the memory
64 unit to retrieve some block of words in any order it likes. This can
65 considerably accelerate fetches when the first word of the region is
66 not cached, but other parts are cached. This can also be used for
67 dispatching codebags efficiently -- but how will we make sure that
68 instructions destined for a given pump are dispatched in the correct
69 order (source sequence guarantee)?
71 A more advanced form would be ``unordered fetch of ordered records''
72 -- the ability to specify a record size (in words), the offset of the
73 first record, and the number of records to be fetched. The memory
74 unit would then fetch the records in any order it likes, but would be
75 sure to return the words comprising a record in the order in which
76 they appear in memory. This feature could be used to solve the source
77 sequence guarantee problem mentioned in the previous paragraph.
79 == Fleeterpreter ====================================================
80 private long[] mem = new long[0];
81 public long readMem(int addr) { return mem[addr]; }
82 public void writeMem(int addr, long val) {
83 if (addr >= mem.length) {
84 long[] newmem = new long[addr * 2 + 1];
85 System.arraycopy(mem, 0, newmem, 0, mem.length);
91 public void dispatch(int addr, int size) {
92 for(int i=addr; i<addr+size; i++) {
93 Instruction instr = ((Interpreter)getFleet()).readInstruction(readMem(i));
94 ((Interpreter)getFleet()).dispatch(instr, i);
98 public void boot(byte[] instructions) {
99 Interpreter fleet = (Interpreter)getFleet();
100 // load the iscratch and take note of the 0-address INCBD
102 for(int i=0; i<instructions.length; i+=6) {
104 for(int j=0; j<6; j++)
105 word = (word << 8) | (instructions[i+j] & 0xff);
107 if (i==0) launch = word;
110 // dispatch the 0-address INCBD
111 int base = (int)(launch >> 6);
112 base = base & ~(0xffffffff << 18);
113 int size = (int)launch;
114 size = size & ~(0xffffffff << 6);
115 dispatch(base, size);
118 private long stride = 0;
119 private long count = 0;
120 private long addr = 0;
121 private boolean writing = false;
123 public void service() {
124 if (box_inCBD.dataReadyForShip()) {
125 long val = box_inCBD.removeDataForShip();
126 long addr = val >> 6;
127 long size = val & 0x3f;
128 dispatch((int)addr, (int)size);
130 if (count > 0 && writing) {
131 if (box_inDataWrite.dataReadyForShip() && box_out.readyForDataFromShip()) {
132 writeMem((int)addr, box_inDataWrite.removeDataForShip());
133 box_out.addDataFromShip(0);
138 } else if (count > 0 && !writing) {
139 if (box_out.readyForDataFromShip()) {
140 box_out.addDataFromShip(readMem((int)addr));
145 } else if (box_inAddrRead.dataReadyForShip() && box_out.readyForDataFromShip()) {
146 Packet packet = box_inAddrRead.peekPacketForShip();
147 if (packet.destination.getDestinationName().equals("read")) {
148 box_out.addDataFromShip(readMem((int)box_inAddrRead.removeDataForShip()));
149 } else if (packet.destination.getDestinationName().equals("write") && box_inDataWrite.dataReadyForShip()) {
150 writeMem((int)box_inAddrRead.removeDataForShip(),
151 box_inDataWrite.removeDataForShip());
152 box_out.addDataFromShip(0);
153 } else if (packet.destination.getDestinationName().equals("writeMany")
154 && box_inStride.dataReadyForShip()
155 && box_inCount.dataReadyForShip()) {
156 addr = box_inAddrRead.removeDataForShip();
157 stride = box_inStride.removeDataForShip();
158 count = box_inCount.removeDataForShip();
160 } else if (packet.destination.getDestinationName().equals("readMany")
161 && box_inStride.dataReadyForShip()
162 && box_inCount.dataReadyForShip()) {
163 addr = box_inAddrRead.removeDataForShip();
164 stride = box_inStride.removeDataForShip();
165 count = box_inCount.removeDataForShip();
171 == FleetSim ==============================================================
173 == FPGA ==============================================================
175 `define BRAM_ADDR_WIDTH 14
176 `define BRAM_DATA_WIDTH `INSTRUCTION_WIDTH
177 `define BRAM_NAME some_bram
180 module `BRAM_NAME(clk, we, a, dpra, di, spo, dpo);
183 input [(`BRAM_ADDR_WIDTH-1):0] a;
184 input [(`BRAM_ADDR_WIDTH-1):0] dpra;
185 input [(`BRAM_DATA_WIDTH-1):0] di;
186 output [(`BRAM_DATA_WIDTH-1):0] spo;
187 output [(`BRAM_DATA_WIDTH-1):0] dpo;
188 reg [(`BRAM_DATA_WIDTH-1):0] ram [((1<<(`BRAM_ADDR_WIDTH))-1):0];
189 reg [(`BRAM_ADDR_WIDTH-1):0] read_a;
190 reg [(`BRAM_ADDR_WIDTH-1):0] read_dpra;
191 always @(posedge clk) begin
197 assign spo = ram[read_a];
198 assign dpo = ram[read_dpra];
203 cbd_r, cbd_a_, cbd_d,
204 in_addr_r, in_addr_a_, in_addr_d,
205 write_addr_r, write_addr_a_, write_addr_d,
206 write_data_r, write_data_a_, write_data_d,
207 stride_r, stride_a_, stride_d,
208 count_r, count_a_, count_d,
209 out_r_, out_a, out_d_,
210 preload_r, preload_a_, preload_d,
211 ihorn_r_, ihorn_a, ihorn_d_,
212 dhorn_r_, dhorn_a, dhorn_d_
216 `input(in_addr_r, in_addr_a, in_addr_a_, [(2+`DATAWIDTH-1):0], in_addr_d)
217 `input(write_addr_r, write_addr_a, write_addr_a_, [(2+`DATAWIDTH-1):0], write_addr_d)
218 `input(write_data_r, write_data_a, write_data_a_, [(`DATAWIDTH-1):0], write_data_d)
219 `input(stride_r, stride_a, stride_a_, [(`DATAWIDTH-1):0], stride_d)
220 `input(count_r, count_a, count_a_, [(`DATAWIDTH-1):0], count_d)
221 `output(out_r, out_r_, out_a, [(`DATAWIDTH-1):0], out_d_)
222 `input(preload_r, preload_a, preload_a_, [(`DATAWIDTH-1):0], preload_d)
223 `input(cbd_r, cbd_a, cbd_a_, [(`DATAWIDTH-1):0], cbd_d)
224 `output(ihorn_r, ihorn_r_, ihorn_a, [(`PACKET_WIDTH-1):0], ihorn_d_)
225 `defreg(ihorn_d_, [(`PACKET_WIDTH-1):0], ihorn_d)
226 `output(dhorn_r, dhorn_r_, dhorn_a, [(`PACKET_WIDTH-1):0], dhorn_d_)
227 `defreg(dhorn_d_, [(`PACKET_WIDTH-1):0], dhorn_d)
230 initial ihorn_full = 0;
232 initial dhorn_full = 0;
234 initial command_valid = 0;
236 reg [(`BRAM_ADDR_WIDTH-1):0] preload_pos;
237 reg [(`BRAM_ADDR_WIDTH-1):0] preload_size;
238 initial preload_size = 0;
240 reg [(`BRAM_ADDR_WIDTH-1):0] current_instruction_read_from;
241 reg [(`BRAM_ADDR_WIDTH-1):0] temp_base;
242 reg [(`CODEBAG_SIZE_BITS-1):0] temp_size;
243 reg [(`BRAM_ADDR_WIDTH-1):0] cbd_base;
244 reg [(`CODEBAG_SIZE_BITS-1):0] cbd_size;
245 reg [(`CODEBAG_SIZE_BITS-1):0] cbd_pos;
246 reg [(`INSTRUCTION_WIDTH-1):0] command;
247 reg [(`BRAM_DATA_WIDTH-1):0] ram [((1<<(`BRAM_ADDR_WIDTH))-1):0];
251 reg [(`INSTRUCTION_WIDTH-(2+`DESTINATION_ADDRESS_BITS)):0] temp;
252 reg [(`DATAWIDTH-1):0] data;
255 reg [(`BRAM_ADDR_WIDTH-1):0] in_addr;
256 reg [(`BRAM_DATA_WIDTH-1):0] write_data;
258 wire [(`BRAM_DATA_WIDTH-1):0] ramread;
260 reg command_valid_read;
261 initial command_valid_read = 0;
264 initial launched = 0;
266 some_bram mybram(clk, write_flag, in_addr, current_instruction_read_from, write_data, not_connected, ramread);
267 assign out_d_ = ramread;
269 always @(posedge clk) begin
273 if (!in_addr_r && in_addr_a) in_addr_a = 0;
274 if (!write_data_r && write_data_a) write_data_a = 0;
275 if (!write_addr_r && write_addr_a) write_addr_a = 0;
277 if (command_valid_read) begin
278 command_valid_read <= 0;
281 end else if (send_done) begin
282 `onwrite(out_r, out_a)
286 end else if (send_read) begin
287 `onwrite(out_r, out_a)
291 end else if (in_addr_r) begin
294 current_instruction_read_from <= in_addr_d[(`DATAWIDTH-1):0];
296 end else if (write_addr_r && write_data_r) begin
301 in_addr <= write_addr_d[(`DATAWIDTH-1):0];
302 write_data <= write_data_d;
304 end else if (ihorn_full && launched) begin
305 `onwrite(ihorn_r, ihorn_a)
309 end else if (dhorn_full) begin
310 `onwrite(dhorn_r, dhorn_a)
314 end else if (command_valid) begin
318 `packet_data(ihorn_d) <= `instruction_data(command);
319 `packet_dest(ihorn_d) <= `instruction_dest(command);
321 end else if (cbd_pos < cbd_size) begin
322 current_instruction_read_from <= cbd_base+cbd_pos;
323 command_valid_read <= 1;
324 cbd_pos <= cbd_pos + 1;
327 `onread(cbd_r, cbd_a)
329 cbd_size <= cbd_d[(`CODEBAG_SIZE_BITS-1):0];
330 cbd_base <= cbd_d[(`INSTRUCTION_WIDTH-1):(`CODEBAG_SIZE_BITS)];
333 `onread(preload_r, preload_a)
334 if (preload_size == 0) begin
335 preload_size <= preload_d;
336 end else if (!launched) begin
338 write_data <= preload_d;
339 in_addr <= preload_pos;
340 if (preload_pos == 0) begin
341 temp_base = preload_d[(`INSTRUCTION_WIDTH-(3+`DESTINATION_ADDRESS_BITS)):(`CODEBAG_SIZE_BITS)];
342 temp_size = preload_d[(`CODEBAG_SIZE_BITS-1):0];
344 if ((preload_pos+1) == preload_size) begin
346 cbd_base <= temp_base;
347 cbd_size <= temp_size;
350 preload_pos <= preload_pos + 1;
362 == Test ==============================================================
368 // ships required in order to run this code
370 #ship memory : Memory
372 // instructions not in any codebag are part of the "root codebag"
373 // which is dispatched when the code is loaded
387 == Constants ========================================================
389 == Contributors =========================================================
390 Adam Megacz <megacz@cs.berkeley.edu>