6600dd56fae58cc64cd287626f0ce40304503b7a
[fleet.git] / ships / Memory.ship
1 ship: Memory
2
3 == Ports ===========================================================
4 data  in:    inCBD
5 data  in:    inAddrRead
6 data  in:    inAddrWrite
7 data  in:    inDataWrite
8 data  in:    inStride
9 data  in:    inCount
10
11 data  out:   out
12
13 == TeX ==============================================================
14
15 The {\tt Memory} ship represents an interface to a storage space,
16 which can be used to read from it or write to it.  This storage space
17 might be a fast on-chip cache, off chip DRAM, or perhaps even a disk drive.
18
19 There may be multiple {\tt Memory} ships which interface to the same
20 physical storage space.  An implementation of Fleet must provide
21 additional documentation to the programmer indicating which {\tt
22 Memory} ships correspond to which storage spaces.  A single {\tt
23 Memory} ship may also access a ``virtual storage space'' formed by
24 concatenating multiple physical storage spaces.
25
26 \subsection*{Code Bag Fetch}
27
28 When a word appears at the {\tt inCBD} port, it is treated as a {\it
29 code bag descriptor}, as shown below:
30
31 \begin{center}
32 \setlength{\bitwidth}{3mm}
33 {\tt
34 \begin{bytefield}{37}
35   \bitheader[b]{36,6,5,0}\\
36   \bitbox{31}{Address} 
37   \bitbox{6}{size} 
38 \end{bytefield}
39 }
40 \end{center}
41
42 When a word arrives at the {\tt inCBD} port, it is treated as a memory
43 read with {\tt inAddrRead=Address}, {\tt inStride=1}, and {\tt
44 inCount=size}.
45
46 \subsection*{Reading}
47
48 When a word is delivered to {\tt inAddrRead}, the word residing in
49 memory at that address is provided at {\tt out}.
50
51 \subsection*{Writing}
52
53 When a word is delivered to {\tt inAddrWrite} and {\tt inDataWrite},
54 the word at {\tt inDataWrite} is written to the address specified by
55 {\tt inAddrWrite}.  Once the word is successfully committed to memory,
56 the value {\tt inAddr+inStride} is provided at {\tt out} (that is, the
57 address of the next word to be written).
58
59 \subsection*{To Do}
60
61 Stride and count are not implemented.
62
63 We need a way to do an ``unordered fetch'' -- a way to tell the memory
64 unit to retrieve some block of words in any order it likes.  This can
65 considerably accelerate fetches when the first word of the region is
66 not cached, but other parts are cached.  This can also be used for
67 dispatching codebags efficiently -- but how will we make sure that
68 instructions destined for a given pump are dispatched in the correct
69 order (source sequence guarantee)?
70
71 A more advanced form would be ``unordered fetch of ordered records''
72 -- the ability to specify a record size (in words), the offset of the
73 first record, and the number of records to be fetched.  The memory
74 unit would then fetch the records in any order it likes, but would be
75 sure to return the words comprising a record in the order in which
76 they appear in memory.  This feature could be used to solve the source
77 sequence guarantee problem mentioned in the previous paragraph.
78
79 == Fleeterpreter ====================================================
80     private long[] mem = new long[0];
81     public long readMem(int addr) { return mem[addr]; }
82     public void writeMem(int addr, long val) {
83         if (addr >= mem.length) {
84             long[] newmem = new long[addr * 2 + 1];
85             System.arraycopy(mem, 0, newmem, 0, mem.length);
86             mem = newmem;
87         }
88         mem[addr] = val;
89     }
90
91     public void dispatch(int addr, int size) {
92         for(int i=addr; i<addr+size; i++) {
93             Instruction instr = ((Interpreter)getFleet()).readInstruction(readMem(i));
94             ((Interpreter)getFleet()).dispatch(instr, i);
95         }
96     }
97
98     public void boot(byte[] instructions) {
99         Interpreter fleet = (Interpreter)getFleet();
100         // load the iscratch and take note of the 0-address INCBD
101         long launch = 0;
102         for(int i=0; i<instructions.length; i+=6) {
103             long word = 0;
104             for(int j=0; j<6; j++)
105                 word = (word << 8) | (instructions[i+j] & 0xff);
106             writeMem(i/6, word);
107             if (i==0) launch = word;
108         }
109
110         // dispatch the 0-address INCBD
111         int base = (int)(launch >> 6);
112         base = base & ~(0xffffffff << 18);
113         int size = (int)launch;
114         size = size & ~(0xffffffff <<  6);
115         dispatch(base, size);
116     }
117
118     private long stride = 0;
119     private long count = 0;
120     private long addr = 0;
121     private boolean writing = false;
122
123     public void service() {
124         if (box_inCBD.dataReadyForShip()) {
125             long val = box_inCBD.removeDataForShip();
126             long addr = val >> 6;
127             long size = val & 0x3f;
128             dispatch((int)addr, (int)size);
129         }
130         if (count > 0 && writing) {
131             if (box_inDataWrite.dataReadyForShip() && box_out.readyForDataFromShip()) {
132                writeMem((int)addr, box_inDataWrite.removeDataForShip());
133                box_out.addDataFromShip(0);
134                count--;
135                addr += stride;
136             }
137
138         } else if (count > 0 && !writing) {
139             if (box_out.readyForDataFromShip()) {
140                box_out.addDataFromShip(readMem((int)addr));
141                count--;
142                addr += stride;
143             }
144
145         } else if (box_inAddrRead.dataReadyForShip() && box_out.readyForDataFromShip()) {
146             Packet packet = box_inAddrRead.peekPacketForShip();
147             if (packet.destination.getDestinationName().equals("read")) {
148                 box_out.addDataFromShip(readMem((int)box_inAddrRead.removeDataForShip()));
149             } else if (packet.destination.getDestinationName().equals("write") && box_inDataWrite.dataReadyForShip()) {
150                 writeMem((int)box_inAddrRead.removeDataForShip(),
151                          box_inDataWrite.removeDataForShip());
152                 box_out.addDataFromShip(0);
153             } else if (packet.destination.getDestinationName().equals("writeMany")
154                        && box_inStride.dataReadyForShip()
155                        && box_inCount.dataReadyForShip()) {
156                 addr = box_inAddrRead.removeDataForShip();
157                 stride = box_inStride.removeDataForShip();
158                 count = box_inCount.removeDataForShip();
159                 writing = true;
160             } else if (packet.destination.getDestinationName().equals("readMany")
161                        && box_inStride.dataReadyForShip()
162                        && box_inCount.dataReadyForShip()) {
163                 addr = box_inAddrRead.removeDataForShip();
164                 stride = box_inStride.removeDataForShip();
165                 count = box_inCount.removeDataForShip();
166                 writing = false;
167             }
168         }
169     }
170
171 == FleetSim ==============================================================
172
173 == FPGA ==============================================================
174 `include "macros.v"
175 `define BRAM_ADDR_WIDTH 14
176 `define BRAM_DATA_WIDTH `INSTRUCTION_WIDTH
177 `define BRAM_NAME some_bram
178
179 /* bram.inc */
180 module `BRAM_NAME(clk, we, a, dpra, di, spo, dpo); 
181     input  clk; 
182     input  we; 
183     input  [(`BRAM_ADDR_WIDTH-1):0] a; 
184     input  [(`BRAM_ADDR_WIDTH-1):0] dpra; 
185     input  [(`BRAM_DATA_WIDTH-1):0] di; 
186     output [(`BRAM_DATA_WIDTH-1):0] spo; 
187     output [(`BRAM_DATA_WIDTH-1):0] dpo; 
188     reg    [(`BRAM_DATA_WIDTH-1):0] ram [((1<<(`BRAM_ADDR_WIDTH))-1):0];
189     reg    [(`BRAM_ADDR_WIDTH-1):0] read_a; 
190     reg    [(`BRAM_ADDR_WIDTH-1):0] read_dpra; 
191     always @(posedge clk) begin 
192         if (we) 
193             ram[a] <= di; 
194         read_a <= a; 
195         read_dpra <= dpra; 
196     end
197     assign spo = ram[read_a]; 
198     assign dpo = ram[read_dpra]; 
199 endmodule 
200 /* bram.inc */
201
202 module memory (clk, 
203                cbd_r,          cbd_a_,         cbd_d,
204                in_addr_r,      in_addr_a_,     in_addr_d,
205                write_addr_r,   write_addr_a_,  write_addr_d,
206                write_data_r,   write_data_a_,  write_data_d,
207                stride_r,       stride_a_,      stride_d,
208                count_r,        count_a_,       count_d,
209                out_r_,         out_a,          out_d_,
210                preload_r,      preload_a_,     preload_d,
211                ihorn_r_,       ihorn_a,        ihorn_d_,
212                dhorn_r_,       dhorn_a,        dhorn_d_
213               );
214
215   input  clk;
216   `input(in_addr_r,      in_addr_a,     in_addr_a_,     [(2+`DATAWIDTH-1):0],       in_addr_d)
217   `input(write_addr_r,   write_addr_a,  write_addr_a_,  [(2+`DATAWIDTH-1):0],       write_addr_d)
218   `input(write_data_r,   write_data_a,  write_data_a_,  [(`DATAWIDTH-1):0],         write_data_d)
219   `input(stride_r,       stride_a,      stride_a_,      [(`DATAWIDTH-1):0],         stride_d)
220   `input(count_r,        count_a,       count_a_,       [(`DATAWIDTH-1):0],         count_d)
221   `output(out_r,         out_r_,        out_a,          [(`DATAWIDTH-1):0],         out_d_)
222   `input(preload_r,      preload_a,     preload_a_,     [(`DATAWIDTH-1):0],         preload_d)
223   `input(cbd_r,          cbd_a,         cbd_a_,         [(`DATAWIDTH-1):0],         cbd_d)
224   `output(ihorn_r,       ihorn_r_,      ihorn_a,        [(`PACKET_WIDTH-1):0], ihorn_d_)
225   `defreg(ihorn_d_,                                     [(`PACKET_WIDTH-1):0], ihorn_d)
226   `output(dhorn_r,       dhorn_r_,      dhorn_a,        [(`PACKET_WIDTH-1):0],      dhorn_d_)
227   `defreg(dhorn_d_,                                     [(`PACKET_WIDTH-1):0],      dhorn_d)
228
229   reg ihorn_full;
230   initial ihorn_full = 0;
231   reg dhorn_full;
232   initial dhorn_full = 0;
233   reg command_valid;
234   initial command_valid = 0;
235
236   reg [(`BRAM_ADDR_WIDTH-1):0]    preload_pos;
237   reg [(`BRAM_ADDR_WIDTH-1):0]    preload_size;
238   initial preload_size = 0;
239
240   reg [(`BRAM_ADDR_WIDTH-1):0]    current_instruction_read_from;
241   reg [(`BRAM_ADDR_WIDTH-1):0]    temp_base;
242   reg [(`CODEBAG_SIZE_BITS-1):0]  temp_size;
243   reg [(`BRAM_ADDR_WIDTH-1):0]    cbd_base;
244   reg [(`CODEBAG_SIZE_BITS-1):0]  cbd_size;
245   reg [(`CODEBAG_SIZE_BITS-1):0]  cbd_pos;
246   reg [(`INSTRUCTION_WIDTH-1):0]  command;
247   reg [(`BRAM_DATA_WIDTH-1):0]    ram [((1<<(`BRAM_ADDR_WIDTH))-1):0];
248   reg                             send_done;
249   reg                             send_read;
250
251   reg [(`INSTRUCTION_WIDTH-(2+`DESTINATION_ADDRESS_BITS)):0] temp;
252   reg [(`DATAWIDTH-1):0]                                     data;
253
254   reg                             write_flag;
255   reg [(`BRAM_ADDR_WIDTH-1):0]    in_addr;
256   reg [(`BRAM_DATA_WIDTH-1):0]    write_data;
257
258   wire [(`BRAM_DATA_WIDTH-1):0]   ramread;
259
260   reg command_valid_read;
261   initial command_valid_read = 0;
262
263   reg launched;
264   initial launched = 0;
265
266   some_bram mybram(clk, write_flag, in_addr, current_instruction_read_from, write_data, not_connected, ramread);
267   assign out_d_ = ramread;
268
269   always @(posedge clk) begin
270
271     write_flag <= 0;
272
273     if (!in_addr_r && in_addr_a) in_addr_a = 0;
274     if (!write_data_r && write_data_a) write_data_a = 0;
275     if (!write_addr_r && write_addr_a) write_addr_a = 0;
276
277     if (command_valid_read) begin
278       command_valid_read  <= 0;
279       command_valid       <= 1;
280
281     end else  if (send_done) begin
282       `onwrite(out_r, out_a)
283         send_done <= 0;
284       end
285
286     end else  if (send_read) begin
287       `onwrite(out_r, out_a)
288         send_read <= 0;
289       end
290
291     end else if (in_addr_r) begin
292       in_addr_a                        = 1;
293       send_read                       <= 1;
294       current_instruction_read_from   <= in_addr_d[(`DATAWIDTH-1):0];
295
296     end else if (write_addr_r && write_data_r) begin
297       write_addr_a       = 1;
298       write_data_a       = 1;
299       send_done         <= 1;
300       write_flag        <= 1;
301       in_addr           <= write_addr_d[(`DATAWIDTH-1):0];
302       write_data        <= write_data_d;
303
304     end else if (ihorn_full && launched) begin
305       `onwrite(ihorn_r, ihorn_a)
306         ihorn_full <= 0;
307       end
308
309     end else if (dhorn_full) begin
310       `onwrite(dhorn_r, dhorn_a)
311         dhorn_full <= 0;
312       end
313
314     end else if (command_valid) begin
315       command_valid <= 0;
316       command = ramread;
317       ihorn_full  <= 1;
318       `packet_data(ihorn_d) <= `instruction_data(command);
319       `packet_dest(ihorn_d) <= `instruction_dest(command);
320
321     end else if (cbd_pos < cbd_size) begin
322       current_instruction_read_from <= cbd_base+cbd_pos;
323       command_valid_read            <= 1;
324       cbd_pos                       <= cbd_pos + 1;
325
326     end else begin
327       `onread(cbd_r, cbd_a)
328         cbd_pos       <= 0;
329         cbd_size      <= cbd_d[(`CODEBAG_SIZE_BITS-1):0];
330         cbd_base      <= cbd_d[(`INSTRUCTION_WIDTH-1):(`CODEBAG_SIZE_BITS)];
331
332       end else begin
333         `onread(preload_r, preload_a)
334           if (preload_size == 0) begin
335             preload_size     <= preload_d;
336           end else if (!launched) begin
337             write_flag <= 1;
338             write_data <= preload_d;
339             in_addr <= preload_pos;
340             if (preload_pos == 0) begin
341               temp_base = preload_d[(`INSTRUCTION_WIDTH-(3+`DESTINATION_ADDRESS_BITS)):(`CODEBAG_SIZE_BITS)];
342               temp_size = preload_d[(`CODEBAG_SIZE_BITS-1):0];
343             end
344             if ((preload_pos+1) == preload_size) begin
345               cbd_pos  <= 0;
346               cbd_base <= temp_base;
347               cbd_size <= temp_size;
348               launched <= 1;
349             end
350             preload_pos      <= preload_pos + 1;
351           end
352         end
353       end
354     end
355   end
356 endmodule
357
358   
359
360
361
362 == Test ==============================================================
363 // expected output
364 #expect 12
365 #expect 13
366 #expect 14
367
368 // ships required in order to run this code
369 #ship debug          : Debug
370 #ship memory         : Memory
371
372 // instructions not in any codebag are part of the "root codebag"
373 // which is dispatched when the code is loaded
374
375 memory.inCBD:
376   literal BOB;
377   deliver;
378
379 BOB: {
380   debug.in:
381     literal 12; deliver;
382     literal 13; deliver;
383     literal 14; deliver;
384 }
385
386
387 == Constants ========================================================
388
389 == Contributors =========================================================
390 Adam Megacz <megacz@cs.berkeley.edu>