doc updates to ships
[fleet.git] / ships / Memory.ship
1 ship: Memory
2
3 == Ports ===========================================================
4 data  in:    inCBD
5 data  in:    inAddrRead
6 data  in:    inAddrWrite
7 data  in:    inDataWrite
8 data  in:    inStride
9 data  in:    inCount
10
11 data  out:   out
12
13 == TeX ==============================================================
14
15 The {\tt Memory} ship represents an interface to a storage space,
16 which can be used to read from it or write to it.  This storage space
17 might be a fast on-chip cache, off chip DRAM, or perhaps even a disk drive.
18
19 There may be multiple {\tt Memory} ships which interface to the same
20 physical storage space.  An implementation of Fleet must provide
21 additional documentation to the programmer indicating which {\tt
22 Memory} ships correspond to which storage spaces.  A single {\tt
23 Memory} ship may also access a ``virtual storage space'' formed by
24 concatenating multiple physical storage spaces.
25
26 \subsection*{Code Bag Fetch}
27
28 When a word appears at the {\tt inCBD} port, it is treated as a {\it
29 code bag descriptor}, as shown below:
30
31 \begin{center}
32 \setlength{\bitwidth}{3mm}
33 {\tt
34 \begin{bytefield}{37}
35   \bitheader[b]{36,6,5,0}\\
36   \bitbox{31}{Address} 
37   \bitbox{6}{size} 
38 \end{bytefield}
39 }
40 \end{center}
41
42 When a word arrives at the {\tt inCBD} port, it is treated as a memory
43 read with {\tt inAddrRead=Address}, {\tt inStride=1}, and {\tt
44 inCount=size}.
45
46 \subsection*{Reading}
47
48 When a word is delivered to {\tt inAddrRead}, the word residing in
49 memory at that address is provided at {\tt out}.
50
51 \subsection*{Writing}
52
53 When a word is delivered to {\tt inAddrWrite} and {\tt inDataWrite},
54 the word at {\tt inDataWrite} is written to the address specified by
55 {\tt inAddrWrite}.  Once the word is successfully committed to memory,
56 a value (undefined) is provided at {\tt out}.
57
58 \subsection*{To Do}
59
60 Stride and count are not implemented.
61
62 We need a way to do an ``unordered fetch'' -- a way to tell the memory
63 unit to retrieve some block of words in any order it likes.  This can
64 considerably accelerate fetches when the first word of the region is
65 not cached, but other parts are cached.  This can also be used for
66 dispatching codebags efficiently -- but how will we make sure that
67 instructions destined for a given pump are dispatched in the correct
68 order (source sequence guarantee)?
69
70 A more advanced form would be ``unordered fetch of ordered records''
71 -- the ability to specify a record size (in words), the offset of the
72 first record, and the number of records to be fetched.  The memory
73 unit would then fetch the records in any order it likes, but would be
74 sure to return the words comprising a record in the order in which
75 they appear in memory.  This feature could be used to solve the source
76 sequence guarantee problem mentioned in the previous paragraph.
77
78 == Fleeterpreter ====================================================
79     private long[] mem = new long[0];
80     public long readMem(int addr) { return mem[addr]; }
81     public void writeMem(int addr, long val) {
82         if (addr >= mem.length) {
83             long[] newmem = new long[addr * 2 + 1];
84             System.arraycopy(mem, 0, newmem, 0, mem.length);
85             mem = newmem;
86         }
87         mem[addr] = val;
88     }
89
90     public void dispatch(int addr, int size) {
91         for(int i=addr; i<addr+size; i++) {
92             Instruction instr = ((Interpreter)getFleet()).readInstruction(readMem(i));
93             ((Interpreter)getFleet()).dispatch(instr, i);
94         }
95     }
96
97     public void boot(byte[] instructions) {
98         Interpreter fleet = (Interpreter)getFleet();
99         // load the iscratch and take note of the 0-address INCBD
100         long launch = 0;
101         for(int i=0; i<instructions.length; i+=6) {
102             long word = 0;
103             for(int j=0; j<6; j++)
104                 word = (word << 8) | (instructions[i+j] & 0xff);
105             writeMem(i/6, word);
106             if (i==0) launch = word;
107         }
108
109         // dispatch the 0-address INCBD
110         int base = (int)(launch >> 6);
111         base = base & ~(0xffffffff << 18);
112         int size = (int)launch;
113         size = size & ~(0xffffffff <<  6);
114         dispatch(base, size);
115     }
116
117     private long stride = 0;
118     private long count = 0;
119     private long addr = 0;
120     private boolean writing = false;
121
122     public void service() {
123         if (box_inCBD.dataReadyForShip()) {
124             long val = box_inCBD.removeDataForShip();
125             long addr = val >> 6;
126             long size = val & 0x3f;
127             dispatch((int)addr, (int)size);
128         }
129         if (count > 0 && writing) {
130             if (box_inDataWrite.dataReadyForShip() && box_out.readyForDataFromShip()) {
131                writeMem((int)addr, box_inDataWrite.removeDataForShip());
132                box_out.addDataFromShip(0);
133                count--;
134                addr += stride;
135             }
136
137         } else if (count > 0 && !writing) {
138             if (box_out.readyForDataFromShip()) {
139                box_out.addDataFromShip(readMem((int)addr));
140                count--;
141                addr += stride;
142             }
143
144         } else if (box_inAddrRead.dataReadyForShip() && box_out.readyForDataFromShip()) {
145             Packet packet = box_inAddrRead.peekPacketForShip();
146             if (packet.destination.getDestinationName().equals("read")) {
147                 box_out.addDataFromShip(readMem((int)box_inAddrRead.removeDataForShip()));
148             } else if (packet.destination.getDestinationName().equals("write") && box_inDataWrite.dataReadyForShip()) {
149                 writeMem((int)box_inAddrRead.removeDataForShip(),
150                          box_inDataWrite.removeDataForShip());
151                 box_out.addDataFromShip(0);
152             } else if (packet.destination.getDestinationName().equals("writeMany")
153                        && box_inStride.dataReadyForShip()
154                        && box_inCount.dataReadyForShip()) {
155                 addr = box_inAddrRead.removeDataForShip();
156                 stride = box_inStride.removeDataForShip();
157                 count = box_inCount.removeDataForShip();
158                 writing = true;
159             } else if (packet.destination.getDestinationName().equals("readMany")
160                        && box_inStride.dataReadyForShip()
161                        && box_inCount.dataReadyForShip()) {
162                 addr = box_inAddrRead.removeDataForShip();
163                 stride = box_inStride.removeDataForShip();
164                 count = box_inCount.removeDataForShip();
165                 writing = false;
166             }
167         }
168     }
169
170 == FleetSim ==============================================================
171
172 == FPGA ==============================================================
173 `include "macros.v"
174 `define BRAM_ADDR_WIDTH 14
175 `define BRAM_DATA_WIDTH `INSTRUCTION_WIDTH
176 `define BRAM_NAME some_bram
177
178 /* bram.inc */
179 module `BRAM_NAME(clk, we, a, dpra, di, spo, dpo); 
180     input  clk; 
181     input  we; 
182     input  [(`BRAM_ADDR_WIDTH-1):0] a; 
183     input  [(`BRAM_ADDR_WIDTH-1):0] dpra; 
184     input  [(`BRAM_DATA_WIDTH-1):0] di; 
185     output [(`BRAM_DATA_WIDTH-1):0] spo; 
186     output [(`BRAM_DATA_WIDTH-1):0] dpo; 
187     reg    [(`BRAM_DATA_WIDTH-1):0] ram [((1<<(`BRAM_ADDR_WIDTH))-1):0];
188     reg    [(`BRAM_ADDR_WIDTH-1):0] read_a; 
189     reg    [(`BRAM_ADDR_WIDTH-1):0] read_dpra; 
190     always @(posedge clk) begin 
191         if (we) 
192             ram[a] <= di; 
193         read_a <= a; 
194         read_dpra <= dpra; 
195     end
196     assign spo = ram[read_a]; 
197     assign dpo = ram[read_dpra]; 
198 endmodule 
199 /* bram.inc */
200
201 module memory (clk, 
202                cbd_r,          cbd_a_,         cbd_d,
203                in_addr_r,      in_addr_a_,     in_addr_d,
204                write_addr_r,   write_addr_a_,  write_addr_d,
205                write_data_r,   write_data_a_,  write_data_d,
206                stride_r,       stride_a_,      stride_d,
207                count_r,        count_a_,       count_d,
208                out_r_,         out_a,          out_d_,
209                preload_r,      preload_a_,     preload_d,
210                ihorn_r_,       ihorn_a,        ihorn_d_,
211                dhorn_r_,       dhorn_a,        dhorn_d_
212               );
213
214   input  clk;
215   `input(in_addr_r,      in_addr_a,     in_addr_a_,     [(2+`DATAWIDTH-1):0],       in_addr_d)
216   `input(write_addr_r,   write_addr_a,  write_addr_a_,  [(2+`DATAWIDTH-1):0],       write_addr_d)
217   `input(write_data_r,   write_data_a,  write_data_a_,  [(`DATAWIDTH-1):0],         write_data_d)
218   `input(stride_r,       stride_a,      stride_a_,      [(`DATAWIDTH-1):0],         stride_d)
219   `input(count_r,        count_a,       count_a_,       [(`DATAWIDTH-1):0],         count_d)
220   `output(out_r,         out_r_,        out_a,          [(`DATAWIDTH-1):0],         out_d_)
221   `input(preload_r,      preload_a,     preload_a_,     [(`DATAWIDTH-1):0],         preload_d)
222   `input(cbd_r,          cbd_a,         cbd_a_,         [(`DATAWIDTH-1):0],         cbd_d)
223   `output(ihorn_r,       ihorn_r_,      ihorn_a,        [(`PACKET_WIDTH-1):0], ihorn_d_)
224   `defreg(ihorn_d_,                                     [(`PACKET_WIDTH-1):0], ihorn_d)
225   `output(dhorn_r,       dhorn_r_,      dhorn_a,        [(`PACKET_WIDTH-1):0],      dhorn_d_)
226   `defreg(dhorn_d_,                                     [(`PACKET_WIDTH-1):0],      dhorn_d)
227
228   reg ihorn_full;
229   initial ihorn_full = 0;
230   reg dhorn_full;
231   initial dhorn_full = 0;
232   reg command_valid;
233   initial command_valid = 0;
234
235   reg [(`BRAM_ADDR_WIDTH-1):0]    preload_pos;
236   reg [(`BRAM_ADDR_WIDTH-1):0]    preload_size;
237   initial preload_size = 0;
238
239   reg [(`BRAM_ADDR_WIDTH-1):0]    current_instruction_read_from;
240   reg [(`BRAM_ADDR_WIDTH-1):0]    temp_base;
241   reg [(`CODEBAG_SIZE_BITS-1):0]  temp_size;
242   reg [(`BRAM_ADDR_WIDTH-1):0]    cbd_base;
243   reg [(`CODEBAG_SIZE_BITS-1):0]  cbd_size;
244   reg [(`CODEBAG_SIZE_BITS-1):0]  cbd_pos;
245   reg [(`INSTRUCTION_WIDTH-1):0]  command;
246   reg [(`BRAM_DATA_WIDTH-1):0]    ram [((1<<(`BRAM_ADDR_WIDTH))-1):0];
247   reg                             send_done;
248   reg                             send_read;
249
250   reg [(`INSTRUCTION_WIDTH-(2+`DESTINATION_ADDRESS_BITS)):0] temp;
251   reg [(`DATAWIDTH-1):0]                                     data;
252
253   reg                             write_flag;
254   reg [(`BRAM_ADDR_WIDTH-1):0]    in_addr;
255   reg [(`BRAM_DATA_WIDTH-1):0]    write_data;
256
257   wire [(`BRAM_DATA_WIDTH-1):0]   ramread;
258
259   reg command_valid_read;
260   initial command_valid_read = 0;
261
262   reg launched;
263   initial launched = 0;
264
265   some_bram mybram(clk, write_flag, in_addr, current_instruction_read_from, write_data, not_connected, ramread);
266   assign out_d_ = ramread;
267
268   always @(posedge clk) begin
269
270     write_flag <= 0;
271
272     if (!in_addr_r && in_addr_a) in_addr_a = 0;
273     if (!write_data_r && write_data_a) write_data_a = 0;
274     if (!write_addr_r && write_addr_a) write_addr_a = 0;
275
276     if (command_valid_read) begin
277       command_valid_read  <= 0;
278       command_valid       <= 1;
279
280     end else  if (send_done) begin
281       `onwrite(out_r, out_a)
282         send_done <= 0;
283       end
284
285     end else  if (send_read) begin
286       `onwrite(out_r, out_a)
287         send_read <= 0;
288       end
289
290     end else if (in_addr_r) begin
291       in_addr_a                        = 1;
292       send_read                       <= 1;
293       current_instruction_read_from   <= in_addr_d[(`DATAWIDTH-1):0];
294
295     end else if (write_addr_r && write_data_r) begin
296       write_addr_a       = 1;
297       write_data_a       = 1;
298       send_done         <= 1;
299       write_flag        <= 1;
300       in_addr           <= write_addr_d[(`DATAWIDTH-1):0];
301       write_data        <= write_data_d;
302
303     end else if (ihorn_full && launched) begin
304       `onwrite(ihorn_r, ihorn_a)
305         ihorn_full <= 0;
306       end
307
308     end else if (dhorn_full) begin
309       `onwrite(dhorn_r, dhorn_a)
310         dhorn_full <= 0;
311       end
312
313     end else if (command_valid) begin
314       command_valid <= 0;
315       command = ramread;
316       ihorn_full  <= 1;
317       `packet_data(ihorn_d) <= `instruction_data(command);
318       `packet_dest(ihorn_d) <= `instruction_dest(command);
319
320     end else if (cbd_pos < cbd_size) begin
321       current_instruction_read_from <= cbd_base+cbd_pos;
322       command_valid_read            <= 1;
323       cbd_pos                       <= cbd_pos + 1;
324
325     end else begin
326       `onread(cbd_r, cbd_a)
327         cbd_pos       <= 0;
328         cbd_size      <= cbd_d[(`CODEBAG_SIZE_BITS-1):0];
329         cbd_base      <= cbd_d[(`INSTRUCTION_WIDTH-1):(`CODEBAG_SIZE_BITS)];
330
331       end else begin
332         `onread(preload_r, preload_a)
333           if (preload_size == 0) begin
334             preload_size     <= preload_d;
335           end else if (!launched) begin
336             write_flag <= 1;
337             write_data <= preload_d;
338             in_addr <= preload_pos;
339             if (preload_pos == 0) begin
340               temp_base = preload_d[(`INSTRUCTION_WIDTH-(3+`DESTINATION_ADDRESS_BITS)):(`CODEBAG_SIZE_BITS)];
341               temp_size = preload_d[(`CODEBAG_SIZE_BITS-1):0];
342             end
343             if ((preload_pos+1) == preload_size) begin
344               cbd_pos  <= 0;
345               cbd_base <= temp_base;
346               cbd_size <= temp_size;
347               launched <= 1;
348             end
349             preload_pos      <= preload_pos + 1;
350           end
351         end
352       end
353     end
354   end
355 endmodule
356
357   
358
359
360
361 == Test ==============================================================
362 // expected output
363 #expect 12
364 #expect 13
365 #expect 14
366
367 // ships required in order to run this code
368 #ship debug          : Debug
369 #ship memory         : Memory
370
371 // instructions not in any codebag are part of the "root codebag"
372 // which is dispatched when the code is loaded
373
374 memory.inCBD:
375   literal BOB;
376   deliver;
377
378 BOB: {
379   debug.in:
380     literal 12; deliver;
381     literal 13; deliver;
382     literal 14; deliver;
383 }
384
385
386 == Constants ========================================================
387
388 == Contributors =========================================================
389 Adam Megacz <megacz@cs.berkeley.edu>