disable Alu3 test temporarily (clogs switch fabric with two-instruction literals)
[fleet.git] / ships / Memory.ship
1 ship: Memory
2
3 == Ports ===========================================================
4 data  in:    inCBD
5 data  in:    inAddrRead
6 data  in:    inAddrWrite
7 data  in:    inDataWrite
8 data  in:    inStride
9 data  in:    inCount
10
11 data  out:   out
12
13 == TeX ==============================================================
14
15 The {\tt Memory} ship represents an interface to a storage space,
16 which can be used to read from it or write to it.  This storage space
17 might be a fast on-chip cache, off chip DRAM, or perhaps even a disk drive.
18
19 There may be multiple {\tt Memory} ships which interface to the same
20 physical storage space.  An implementation of Fleet must provide
21 additional documentation to the programmer indicating which {\tt
22 Memory} ships correspond to which storage spaces.  A single {\tt
23 Memory} ship may also access a ``virtual storage space'' formed by
24 concatenating multiple physical storage spaces.
25
26 \subsection*{Code Bag Fetch}
27
28 When a word appears at the {\tt inCBD} port, it is treated as a {\it
29 code bag descriptor}, as shown below:
30
31 \begin{center}
32 \setlength{\bitwidth}{3mm}
33 {\tt
34 \begin{bytefield}{37}
35   \bitheader[b]{36,6,5,0}\\
36   \bitbox{31}{Address} 
37   \bitbox{6}{size} 
38 \end{bytefield}
39 }
40 \end{center}
41
42 When a word arrives at the {\tt inCBD} port, it is treated as a memory
43 read with {\tt inAddrRead=Address}, {\tt inStride=1}, and {\tt
44 inCount=size}.
45
46 \subsection*{Reading}
47
48 When a word is delivered to {\tt inAddrRead}, the word residing in
49 memory at that address is provided at {\tt out}.
50
51 \subsection*{Writing}
52
53 When a word is delivered to {\tt inAddrWrite} and {\tt inDataWrite},
54 the word at {\tt inDataWrite} is written to the address specified by
55 {\tt inAddrWrite}.  Once the word is successfully committed to memory,
56 the value {\tt inAddr+inStride} is provided at {\tt out} (that is, the
57 address of the next word to be written).
58
59 \subsection*{To Do}
60
61 Stride and count are not implemented.
62
63 We need a way to do an ``unordered fetch'' -- a way to tell the memory
64 unit to retrieve some block of words in any order it likes.  This can
65 considerably accelerate fetches when the first word of the region is
66 not cached, but other parts are cached.  This can also be used for
67 dispatching codebags efficiently -- but how will we make sure that
68 instructions destined for a given pump are dispatched in the correct
69 order (source sequence guarantee)?
70
71 A more advanced form would be ``unordered fetch of ordered records''
72 -- the ability to specify a record size (in words), the offset of the
73 first record, and the number of records to be fetched.  The memory
74 unit would then fetch the records in any order it likes, but would be
75 sure to return the words comprising a record in the order in which
76 they appear in memory.  This feature could be used to solve the source
77 sequence guarantee problem mentioned in the previous paragraph.
78
79 == Fleeterpreter ====================================================
80     private long[] mem = new long[0];
81     public long readMem(int addr) { return mem[addr]; }
82     public void writeMem(int addr, long val) {
83         if (addr >= mem.length) {
84             long[] newmem = new long[addr * 2 + 1];
85             System.arraycopy(mem, 0, newmem, 0, mem.length);
86             mem = newmem;
87         }
88         mem[addr] = val;
89     }
90
91     public void dispatch(int addr, int size) {
92         for(int i=addr; i<addr+size; i++) {
93             Instruction instr = ((Interpreter)getFleet()).readInstruction(readMem(i));
94             ((Interpreter)getFleet()).dispatch(instr, i);
95         }
96     }
97
98     public void boot(byte[] instructions) {
99         Interpreter fleet = (Interpreter)getFleet();
100         // load the iscratch and take note of the 0-address INCBD
101         long launch = 0;
102         for(int i=0; i<instructions.length; i+=6) {
103             long word = 0;
104             for(int j=0; j<6; j++)
105                 word = (word << 8) | (instructions[i+j] & 0xff);
106             writeMem(i/6, word);
107             if (i==0) launch = word;
108         }
109
110         // dispatch the 0-address INCBD
111         int base = (int)(launch >> 6);
112         base = base & ~(0xffffffff << 18);
113         int size = (int)launch;
114         size = size & ~(0xffffffff <<  6);
115         dispatch(base, size);
116     }
117
118     private long stride = 0;
119     private long count = 0;
120     private long addr = 0;
121     private boolean writing = false;
122
123     public void service() {
124         if (box_inCBD.dataReadyForShip()) {
125             long val = box_inCBD.removeDataForShip();
126             long addr = val >> 6;
127             long size = val & 0x3f;
128             dispatch((int)addr, (int)size);
129         }
130         if (count > 0) {
131             if (writing) {
132               if (box_inDataWrite.dataReadyForShip() && box_out.readyForDataFromShip()) {
133                  writeMem((int)addr, box_inDataWrite.removeDataForShip());
134                  box_out.addDataFromShip(0);
135                  count--;
136                  addr += stride;
137               }
138             } else {
139               if (box_out.readyForDataFromShip()) {
140                  box_out.addDataFromShip(readMem((int)addr));
141                  count--;
142                  addr += stride;
143               }
144             }
145
146         } else if (box_inAddrRead.dataReadyForShip()) {
147             addr = box_inAddrRead.removeDataForShip();
148             stride = 0;
149             count = 1;
150             writing = false;
151
152         } else if (box_inAddrWrite.dataReadyForShip()) {
153             addr = box_inAddrWrite.peekPacketForShip().value;
154             box_inAddrWrite.removeDataForShip();
155             stride = 0;
156             count = 1;
157             writing = true;
158         }
159     }
160
161 == FleetSim ==============================================================
162
163 == FPGA ==============================================================
164 `include "macros.v"
165 `define BRAM_ADDR_WIDTH 14
166 `define BRAM_DATA_WIDTH `INSTRUCTION_WIDTH
167 `define BRAM_NAME some_bram
168
169 /* bram.inc */
170 module `BRAM_NAME(clk, we, a, dpra, di, spo, dpo); 
171     input  clk; 
172     input  we; 
173     input  [(`BRAM_ADDR_WIDTH-1):0] a; 
174     input  [(`BRAM_ADDR_WIDTH-1):0] dpra; 
175     input  [(`BRAM_DATA_WIDTH-1):0] di; 
176     output [(`BRAM_DATA_WIDTH-1):0] spo; 
177     output [(`BRAM_DATA_WIDTH-1):0] dpo; 
178     reg    [(`BRAM_DATA_WIDTH-1):0] ram [((1<<(`BRAM_ADDR_WIDTH))-1):0];
179     reg    [(`BRAM_ADDR_WIDTH-1):0] read_a; 
180     reg    [(`BRAM_ADDR_WIDTH-1):0] read_dpra; 
181     always @(posedge clk) begin 
182         if (we) 
183             ram[a] <= di; 
184         read_a <= a; 
185         read_dpra <= dpra; 
186     end
187     assign spo = ram[read_a]; 
188     assign dpo = ram[read_dpra]; 
189 endmodule 
190 /* bram.inc */
191
192 module memory (clk, 
193                cbd_r,          cbd_a_,         cbd_d,
194                in_addr_r,      in_addr_a_,     in_addr_d,
195                write_addr_r,   write_addr_a_,  write_addr_d,
196                write_data_r,   write_data_a_,  write_data_d,
197                stride_r,       stride_a_,      stride_d,
198                count_r,        count_a_,       count_d,
199                out_r_,         out_a,          out_d_,
200                preload_r,      preload_a_,     preload_d,
201                ihorn_r_,       ihorn_a,        ihorn_d_,
202                dhorn_r_,       dhorn_a,        dhorn_d_
203               );
204
205   input  clk;
206   `input(in_addr_r,      in_addr_a,     in_addr_a_,     [(2+`DATAWIDTH-1):0],       in_addr_d)
207   `input(write_addr_r,   write_addr_a,  write_addr_a_,  [(2+`DATAWIDTH-1):0],       write_addr_d)
208   `input(write_data_r,   write_data_a,  write_data_a_,  [(`DATAWIDTH-1):0],         write_data_d)
209   `input(stride_r,       stride_a,      stride_a_,      [(`DATAWIDTH-1):0],         stride_d)
210   `input(count_r,        count_a,       count_a_,       [(`DATAWIDTH-1):0],         count_d)
211   `output(out_r,         out_r_,        out_a,          [(`DATAWIDTH-1):0],         out_d_)
212   `input(preload_r,      preload_a,     preload_a_,     [(`DATAWIDTH-1):0],         preload_d)
213   `input(cbd_r,          cbd_a,         cbd_a_,         [(`DATAWIDTH-1):0],         cbd_d)
214   `output(ihorn_r,       ihorn_r_,      ihorn_a,        [(`PACKET_WIDTH-1):0], ihorn_d_)
215   `defreg(ihorn_d_,                                     [(`PACKET_WIDTH-1):0], ihorn_d)
216   `output(dhorn_r,       dhorn_r_,      dhorn_a,        [(`PACKET_WIDTH-1):0],      dhorn_d_)
217   `defreg(dhorn_d_,                                     [(`PACKET_WIDTH-1):0],      dhorn_d)
218
219   reg ihorn_full;
220   initial ihorn_full = 0;
221   reg dhorn_full;
222   initial dhorn_full = 0;
223   reg command_valid;
224   initial command_valid = 0;
225
226   reg [(`BRAM_ADDR_WIDTH-1):0]    preload_pos;
227   reg [(`BRAM_ADDR_WIDTH-1):0]    preload_size;
228   initial preload_size = 0;
229
230   reg [(`BRAM_ADDR_WIDTH-1):0]    current_instruction_read_from;
231   reg [(`BRAM_ADDR_WIDTH-1):0]    temp_base;
232   reg [(`CODEBAG_SIZE_BITS-1):0]  temp_size;
233   reg [(`BRAM_ADDR_WIDTH-1):0]    cbd_base;
234   reg [(`CODEBAG_SIZE_BITS-1):0]  cbd_size;
235   reg [(`CODEBAG_SIZE_BITS-1):0]  cbd_pos;
236   reg [(`INSTRUCTION_WIDTH-1):0]  command;
237   reg [(`BRAM_DATA_WIDTH-1):0]    ram [((1<<(`BRAM_ADDR_WIDTH))-1):0];
238   reg                             send_done;
239   reg                             send_read;
240
241   reg [(`INSTRUCTION_WIDTH-(2+`DESTINATION_ADDRESS_BITS)):0] temp;
242   reg [(`DATAWIDTH-1):0]                                     data;
243
244   reg                             write_flag;
245   reg [(`BRAM_ADDR_WIDTH-1):0]    in_addr;
246   reg [(`BRAM_DATA_WIDTH-1):0]    write_data;
247
248   wire [(`BRAM_DATA_WIDTH-1):0]   ramread;
249
250   reg command_valid_read;
251   initial command_valid_read = 0;
252
253   reg launched;
254   initial launched = 0;
255
256   some_bram mybram(clk, write_flag, in_addr, current_instruction_read_from, write_data, not_connected, ramread);
257   assign out_d_ = ramread;
258
259   always @(posedge clk) begin
260
261     write_flag <= 0;
262
263     if (!in_addr_r && in_addr_a) in_addr_a = 0;
264     if (!write_data_r && write_data_a) write_data_a = 0;
265     if (!write_addr_r && write_addr_a) write_addr_a = 0;
266
267     if (command_valid_read) begin
268       command_valid_read  <= 0;
269       command_valid       <= 1;
270
271     end else  if (send_done) begin
272       `onwrite(out_r, out_a)
273         send_done <= 0;
274       end
275
276     end else  if (send_read) begin
277       `onwrite(out_r, out_a)
278         send_read <= 0;
279       end
280
281     end else if (in_addr_r) begin
282       in_addr_a                        = 1;
283       send_read                       <= 1;
284       current_instruction_read_from   <= in_addr_d[(`DATAWIDTH-1):0];
285
286     end else if (write_addr_r && write_data_r) begin
287       write_addr_a       = 1;
288       write_data_a       = 1;
289       send_done         <= 1;
290       write_flag        <= 1;
291       in_addr           <= write_addr_d[(`DATAWIDTH-1):0];
292       write_data        <= write_data_d;
293
294     end else if (ihorn_full && launched) begin
295       `onwrite(ihorn_r, ihorn_a)
296         ihorn_full <= 0;
297       end
298
299     end else if (dhorn_full) begin
300       `onwrite(dhorn_r, dhorn_a)
301         dhorn_full <= 0;
302       end
303
304     end else if (command_valid) begin
305       command_valid <= 0;
306       command = ramread;
307       ihorn_full  <= 1;
308       `packet_data(ihorn_d) <= `instruction_data(command);
309       `packet_dest(ihorn_d) <= `instruction_dest(command);
310
311     end else if (cbd_pos < cbd_size) begin
312       current_instruction_read_from <= cbd_base+cbd_pos;
313       command_valid_read            <= 1;
314       cbd_pos                       <= cbd_pos + 1;
315
316     end else begin
317       `onread(cbd_r, cbd_a)
318         cbd_pos       <= 0;
319         cbd_size      <= cbd_d[(`CODEBAG_SIZE_BITS-1):0];
320         cbd_base      <= cbd_d[(`INSTRUCTION_WIDTH-1):(`CODEBAG_SIZE_BITS)];
321
322       end else begin
323         `onread(preload_r, preload_a)
324           if (preload_size == 0) begin
325             preload_size     <= preload_d;
326           end else if (!launched) begin
327             write_flag <= 1;
328             write_data <= preload_d;
329             in_addr <= preload_pos;
330             if (preload_pos == 0) begin
331               temp_base = preload_d[(`INSTRUCTION_WIDTH-(3+`DESTINATION_ADDRESS_BITS)):(`CODEBAG_SIZE_BITS)];
332               temp_size = preload_d[(`CODEBAG_SIZE_BITS-1):0];
333             end
334             if ((preload_pos+1) == preload_size) begin
335               cbd_pos  <= 0;
336               cbd_base <= temp_base;
337               cbd_size <= temp_size;
338               launched <= 1;
339             end
340             preload_pos      <= preload_pos + 1;
341           end
342         end
343       end
344     end
345   end
346 endmodule
347
348   
349
350
351
352 == Test ==============================================================
353 // expected output
354 #expect 12
355 #expect 13
356 #expect 14
357
358 // ships required in order to run this code
359 #ship debug          : Debug
360 #ship memory         : Memory
361
362 // instructions not in any codebag are part of the "root codebag"
363 // which is dispatched when the code is loaded
364
365 memory.inCBD:
366   literal BOB;
367   deliver;
368
369 BOB: {
370   debug.in:
371     literal 12; deliver;
372     literal 13; deliver;
373     literal 14; deliver;
374 }
375
376
377 == Constants ========================================================
378
379 == Contributors =========================================================
380 Adam Megacz <megacz@cs.berkeley.edu>