1 //*****************************************************************************
2 // DISCLAIMER OF LIABILITY
4 // This text/file contains proprietary, confidential
5 // information of Xilinx, Inc., is distributed under license
6 // from Xilinx, Inc., and may be used, copied and/or
7 // disclosed only pursuant to the terms of a valid license
8 // agreement with Xilinx, Inc. Xilinx hereby grants you a
9 // license to use this text/file solely for design, simulation,
10 // implementation and creation of design files limited
11 // to Xilinx devices or technologies. Use with non-Xilinx
12 // devices or technologies is expressly prohibited and
13 // immediately terminates your license unless covered by
14 // a separate agreement.
16 // Xilinx is providing this design, code, or information
17 // "as-is" solely for use in developing programs and
18 // solutions for Xilinx devices, with no obligation on the
19 // part of Xilinx to provide support. By providing this design,
20 // code, or information as one possible implementation of
21 // this feature, application or standard, Xilinx is making no
22 // representation that this implementation is free from any
23 // claims of infringement. You are responsible for
24 // obtaining any rights you may require for your implementation.
25 // Xilinx expressly disclaims any warranty whatsoever with
26 // respect to the adequacy of the implementation, including
27 // but not limited to any warranties or representations that this
28 // implementation is free from claims of infringement, implied
29 // warranties of merchantability or fitness for a particular
32 // Xilinx products are not intended for use in life support
33 // appliances, devices, or systems. Use in such applications is
34 // expressly prohibited.
36 // Any modifications that are made to the Source Code are
37 // done at the users sole risk and will be unsupported.
39 // Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
41 // This copyright and support notice must be retained as part
42 // of this text at all times.
43 //*****************************************************************************
46 // /___/ \ / Vendor: Xilinx
47 // \ \ \/ Version: 2.3
48 // \ \ Application: MIG
49 // / / Filename: ddr2_phy_calib.v
50 // /___/ /\ Date Last Modified: $Date: 2008/07/02 14:03:08 $
51 // \ \ / \ Date Created: Thu Aug 10 2006
57 // This module handles calibration after memory initialization.
60 //*****************************************************************************
64 module ddr2_phy_calib #
66 // Following parameters are for 72-bit RDIMM design (for ML561 Reference
67 // board design). Actual values may be different. Actual parameters values
68 // are passed from design top module ddr2_sdram module. Please refer to
69 // the ddr2_sdram module for actual values.
70 parameter DQ_WIDTH = 72,
71 parameter DQ_BITS = 7,
72 parameter DQ_PER_DQS = 8,
73 parameter DQS_BITS = 4,
74 parameter DQS_WIDTH = 9,
75 parameter ADDITIVE_LAT = 0,
76 parameter CAS_LAT = 5,
77 parameter REG_ENABLE = 1,
78 parameter CLK_PERIOD = 3000,
79 parameter SIM_ONLY = 0,
80 parameter DEBUG_EN = 0
86 input [3:0] calib_start,
89 input [DQ_WIDTH-1:0] rd_data_rise,
90 input [DQ_WIDTH-1:0] rd_data_fall,
92 output reg [3:0] calib_done,
93 output reg calib_ref_req,
94 output [DQS_WIDTH-1:0] calib_rden,
95 output reg [DQS_WIDTH-1:0] calib_rden_sel,
97 output reg [DQ_WIDTH-1:0] dlyce_dq,
98 output reg [DQ_WIDTH-1:0] dlyinc_dq,
99 output reg dlyrst_dqs,
100 output reg [DQS_WIDTH-1:0] dlyce_dqs,
101 output reg [DQS_WIDTH-1:0] dlyinc_dqs,
102 output reg [DQS_WIDTH-1:0] dlyrst_gate,
103 output reg [DQS_WIDTH-1:0] dlyce_gate,
104 output reg [DQS_WIDTH-1:0] dlyinc_gate,
105 output [DQS_WIDTH-1:0] en_dqs,
106 output [DQS_WIDTH-1:0] rd_data_sel,
107 // Debug signals (optional use)
108 input dbg_idel_up_all,
109 input dbg_idel_down_all,
110 input dbg_idel_up_dq,
111 input dbg_idel_down_dq,
112 input dbg_idel_up_dqs,
113 input dbg_idel_down_dqs,
114 input dbg_idel_up_gate,
115 input dbg_idel_down_gate,
116 input [DQ_BITS-1:0] dbg_sel_idel_dq,
117 input dbg_sel_all_idel_dq,
118 input [DQS_BITS:0] dbg_sel_idel_dqs,
119 input dbg_sel_all_idel_dqs,
120 input [DQS_BITS:0] dbg_sel_idel_gate,
121 input dbg_sel_all_idel_gate,
122 output [3:0] dbg_calib_done,
123 output [3:0] dbg_calib_err,
124 output [(6*DQ_WIDTH)-1:0] dbg_calib_dq_tap_cnt,
125 output [(6*DQS_WIDTH)-1:0] dbg_calib_dqs_tap_cnt,
126 output [(6*DQS_WIDTH)-1:0] dbg_calib_gate_tap_cnt,
127 output [DQS_WIDTH-1:0] dbg_calib_rd_data_sel,
128 output [(5*DQS_WIDTH)-1:0] dbg_calib_rden_dly,
129 output [(5*DQS_WIDTH)-1:0] dbg_calib_gate_dly
132 // minimum time (in IDELAY taps) for which capture data must be stable for
133 // algorithm to consider
134 localparam MIN_WIN_SIZE = 5;
135 // IDEL_SET_VAL = (# of cycles - 1) to wait after changing IDELAY value
136 // we only have to wait enough for input with new IDELAY value to
137 // propagate through pipeline stages.
138 localparam IDEL_SET_VAL = 3'b111;
139 // # of clock cycles to delay read enable to determine if read data pattern
140 // is correct for stage 3/4 (RDEN, DQS gate) calibration
141 localparam CALIB_RDEN_PIPE_LEN = 31;
142 // translate CAS latency into number of clock cycles for read valid delay
143 // determination. Really only needed for CL = 2.5 (set to 2)
144 localparam CAS_LAT_RDEN = (CAS_LAT == 25) ? 2 : CAS_LAT;
145 // an SRL32 is used to delay CTRL_RDEN to generate read valid signal. This
146 // is min possible value delay through SRL32 can be
147 localparam RDEN_BASE_DELAY = CAS_LAT_RDEN + ADDITIVE_LAT + REG_ENABLE;
148 // an SRL32 is used to delay the CTRL_RDEN from the read postamble DQS
149 // gate. This is min possible value the SRL32 delay can be:
150 // - Delay from end of deassertion of CTRL_RDEN to last falling edge of
151 // read burst = 3.5 (CTRL_RDEN -> CAS delay) + 3 (min CAS latency) = 6.5
152 // - Minimum time for DQS gate circuit to be generated:
153 // * 1 cyc to register CTRL_RDEN from controller
154 // * 1 cyc after RDEN_CTRL falling edge
155 // * 1 cyc min through SRL32
156 // * 1 cyc through SRL32 output flop
157 // * 0 (<1) cyc of synchronization to DQS domain via IDELAY
158 // * 1 cyc of delay through IDDR to generate CE to DQ IDDR's
159 // Total = 5 cyc < 6.5 cycles
160 // The total should be less than 5.5 cycles to account prop delays
161 // adding one cycle to the synchronization time via the IDELAY.
162 // NOTE: Value differs because of optional pipeline register added
163 // for case of RDEN_BASE_DELAY > 3 to improve timing
164 localparam GATE_BASE_DELAY = RDEN_BASE_DELAY - 3;
165 localparam GATE_BASE_INIT = (GATE_BASE_DELAY <= 1) ? 0 : GATE_BASE_DELAY;
166 // used for RDEN calibration: difference between shift value used during
167 // calibration, and shift value for actual RDEN SRL. Only applies when
168 // RDEN edge is immediately captured by CLKDIV0. If not (depends on phase
169 // of CLK0 and CLKDIV0 when RDEN is asserted), then add 1 to this value.
170 localparam CAL3_RDEN_SRL_DLY_DELTA = 6;
171 // fix minimum value of DQS to be 1 to handle the case where's there's only
172 // one DQS group. We could also enforce that user always inputs minimum
173 // value of 1 for DQS_BITS (even when DQS_WIDTH=1). Leave this as safeguard
174 // Assume we don't have to do this for DQ, DQ_WIDTH always > 1
175 localparam DQS_BITS_FIX = (DQS_BITS == 0) ? 1 : DQS_BITS;
176 // how many taps to "pre-delay" DQ before stg 1 calibration - not needed for
177 // current calibration, but leave for debug
178 localparam DQ_IDEL_INIT = 6'b000000;
179 // # IDELAY taps per bit time (i.e. half cycle). Limit to 63.
180 localparam integer BIT_TIME_TAPS = (CLK_PERIOD/150 < 64) ?
183 // used in various places during stage 4 cal: (1) determines maximum taps
184 // to increment when finding right edge, (2) amount to decrement after
185 // finding left edge, (3) amount to increment after finding right edge
186 localparam CAL4_IDEL_BIT_VAL = (BIT_TIME_TAPS >= 6'b100000) ?
187 6'b100000 : BIT_TIME_TAPS;
189 localparam CAL1_IDLE = 4'h0;
190 localparam CAL1_INIT = 4'h1;
191 localparam CAL1_INC_IDEL = 4'h2;
192 localparam CAL1_FIND_FIRST_EDGE = 4'h3;
193 localparam CAL1_FIRST_EDGE_IDEL_WAIT = 4'h4;
194 localparam CAL1_FOUND_FIRST_EDGE_WAIT = 4'h5;
195 localparam CAL1_FIND_SECOND_EDGE = 4'h6;
196 localparam CAL1_SECOND_EDGE_IDEL_WAIT = 4'h7;
197 localparam CAL1_CALC_IDEL = 4'h8;
198 localparam CAL1_DEC_IDEL = 4'h9;
199 localparam CAL1_DONE = 4'hA;
201 localparam CAL2_IDLE = 4'h0;
202 localparam CAL2_INIT = 4'h1;
203 localparam CAL2_INIT_IDEL_WAIT = 4'h2;
204 localparam CAL2_FIND_EDGE_POS = 4'h3;
205 localparam CAL2_FIND_EDGE_IDEL_WAIT_POS = 4'h4;
206 localparam CAL2_FIND_EDGE_NEG = 4'h5;
207 localparam CAL2_FIND_EDGE_IDEL_WAIT_NEG = 4'h6;
208 localparam CAL2_DEC_IDEL = 4'h7;
209 localparam CAL2_DONE = 4'h8;
211 localparam CAL3_IDLE = 3'h0;
212 localparam CAL3_INIT = 3'h1;
213 localparam CAL3_DETECT = 3'h2;
214 localparam CAL3_RDEN_PIPE_CLR_WAIT = 3'h3;
215 localparam CAL3_DONE = 3'h4;
217 localparam CAL4_IDLE = 3'h0;
218 localparam CAL4_INIT = 3'h1;
219 localparam CAL4_FIND_WINDOW = 3'h2;
220 localparam CAL4_FIND_EDGE = 3'h3;
221 localparam CAL4_IDEL_WAIT = 3'h4;
222 localparam CAL4_RDEN_PIPE_CLR_WAIT = 3'h5;
223 localparam CAL4_ADJ_IDEL = 3'h6;
224 localparam CAL4_DONE = 3'h7;
228 reg [5:0] cal1_bit_time_tap_cnt;
229 reg [1:0] cal1_data_chk_last;
230 reg cal1_data_chk_last_valid;
231 reg [1:0] cal1_data_chk_r;
234 reg cal1_dqs_dq_init_phase;
235 reg cal1_detect_edge;
236 reg cal1_detect_stable;
237 reg cal1_found_second_edge;
238 reg cal1_found_rising;
239 reg cal1_found_window;
240 reg cal1_first_edge_done;
241 reg [5:0] cal1_first_edge_tap_cnt;
242 reg [6:0] cal1_idel_dec_cnt;
243 reg [5:0] cal1_idel_inc_cnt;
244 reg [5:0] cal1_idel_max_tap;
245 reg cal1_idel_max_tap_we;
246 reg [5:0] cal1_idel_tap_cnt;
247 reg cal1_idel_tap_limit_hit;
248 reg [6:0] cal1_low_freq_idel_dec;
251 reg [3:0] cal1_state;
252 reg [3:0] cal1_window_cnt;
254 wire cal2_detect_edge;
257 reg [5:0] cal2_idel_dec_cnt;
258 reg [5:0] cal2_idel_tap_cnt;
259 reg [5:0] cal2_idel_tap_limit;
260 reg cal2_idel_tap_limit_hit;
261 reg cal2_rd_data_fall_last_neg;
262 reg cal2_rd_data_fall_last_pos;
263 reg cal2_rd_data_last_valid_neg;
264 reg cal2_rd_data_last_valid_pos;
265 reg cal2_rd_data_rise_last_neg;
266 reg cal2_rd_data_rise_last_pos;
267 reg [DQS_WIDTH-1:0] cal2_rd_data_sel;
268 wire cal2_rd_data_sel_edge;
269 reg [DQS_WIDTH-1:0] cal2_rd_data_sel_r;
271 reg [3:0] cal2_state;
273 reg cal3_data_match_stgd;
274 wire cal3_data_valid;
275 wire cal3_match_found;
276 wire [4:0] cal3_rden_dly;
277 reg [4:0] cal3_rden_srl_a;
278 reg [2:0] cal3_state;
281 reg cal4_data_match_stgd;
282 wire cal4_data_valid;
284 reg cal4_dlyinc_gate;
285 reg cal4_dlyrst_gate;
286 reg [4:0] cal4_gate_srl_a;
287 reg [5:0] cal4_idel_adj_cnt;
288 reg cal4_idel_adj_inc;
289 reg cal4_idel_bit_tap;
290 reg [5:0] cal4_idel_tap_cnt;
291 reg cal4_idel_max_tap;
292 reg [4:0] cal4_rden_srl_a;
295 reg cal4_stable_window;
296 reg [2:0] cal4_state;
297 reg [3:0] cal4_window_cnt;
298 reg [3:0] calib_done_tmp; // only for stg1/2/4
299 reg calib_ctrl_gate_pulse_r;
301 reg calib_ctrl_rden_r;
302 wire calib_ctrl_rden_negedge;
303 reg calib_ctrl_rden_negedge_r;
304 reg [3:0] calib_done_r;
306 reg [1:0] calib_err_2;
307 wire calib_init_gate_pulse;
308 reg calib_init_gate_pulse_r;
309 reg calib_init_gate_pulse_r1;
311 reg calib_init_rden_r;
312 reg [4:0] calib_rden_srl_a;
313 wire [4:0] calib_rden_srl_a_r;
314 reg [(5*DQS_WIDTH)-1:0] calib_rden_dly;
315 reg calib_rden_edge_r;
316 reg [4:0] calib_rden_pipe_cnt;
317 wire calib_rden_srl_out;
318 wire calib_rden_srl_out_r;
319 reg calib_rden_srl_out_r1;
320 reg calib_rden_valid;
321 reg calib_rden_valid_stgd;
322 reg [DQ_BITS-1:0] count_dq;
323 reg [DQS_BITS_FIX-1:0] count_dqs;
324 reg [DQS_BITS_FIX-1:0] count_gate;
325 reg [DQS_BITS_FIX-1:0] count_rden;
328 reg [(5*DQS_WIDTH)-1:0] gate_dly;
329 wire [(5*DQS_WIDTH)-1:0] gate_dly_r;
331 wire [DQS_WIDTH-1:0] gate_srl_out;
332 wire [DQS_WIDTH-1:0] gate_srl_out_r;
333 reg [2:0] idel_set_cnt;
335 reg [DQ_BITS-1:0] next_count_dq;
336 reg [DQS_BITS_FIX-1:0] next_count_dqs;
337 reg [DQS_BITS_FIX-1:0] next_count_gate;
339 reg phy_init_rden_r1;
340 reg [DQ_WIDTH-1:0] rd_data_fall_1x_r;
341 reg [DQS_WIDTH-1:0] rd_data_fall_1x_r1;
342 reg [DQS_WIDTH-1:0] rd_data_fall_2x_r;
343 wire [DQS_WIDTH-1:0] rd_data_fall_chk_q1;
344 wire [DQS_WIDTH-1:0] rd_data_fall_chk_q2;
345 reg [DQ_WIDTH-1:0] rd_data_rise_1x_r;
346 reg [DQS_WIDTH-1:0] rd_data_rise_1x_r1;
347 reg [DQS_WIDTH-1:0] rd_data_rise_2x_r;
348 wire [DQS_WIDTH-1:0] rd_data_rise_chk_q1;
349 wire [DQS_WIDTH-1:0] rd_data_rise_chk_q2;
360 reg [DQS_BITS_FIX-1:0] rdd_mux_sel;
362 reg [(5*DQS_WIDTH)-1:0] rden_dly;
363 wire [(5*DQS_WIDTH)-1:0] rden_dly_r;
364 reg [4:0] rden_dly_0;
366 reg [DQS_WIDTH-1:0] rden_mux;
367 wire [DQS_WIDTH-1:0] rden_srl_out;
371 reg [5:0] dbg_dq_tap_cnt [DQ_WIDTH-1:0];
372 reg [5:0] dbg_dqs_tap_cnt [DQS_WIDTH-1:0];
373 reg [5:0] dbg_gate_tap_cnt [DQS_WIDTH-1:0];
375 //***************************************************************************
376 // Debug output ("dbg_phy_calib_*")
378 // 1. All debug outputs coming out of PHY_CALIB are clocked off CLKDIV0,
379 // although they are also static after calibration is complete. This
380 // means the user can either connect them to a Chipscope ILA, or to
381 // either a sync/async VIO input block. Using an async VIO has the
382 // advantage of not requiring these paths to meet cycle-to-cycle timing.
383 // 2. The widths of most of these debug buses are dependent on the # of
384 // DQS/DQ bits (e.g. dq_tap_cnt width = 6 * (# of DQ bits)
385 // SIGNAL DESCRIPTION:
386 // 1. calib_done: 4 bits - each one asserted as each phase of calibration
388 // 2. calib_err: 4 bits - each one asserted when a calibration error
389 // encountered for that stage. Some of these bits may not
390 // be used (not all cal stages report an error).
391 // 3. dq_tap_cnt: final IDELAY tap counts for all DQ IDELAYs
392 // 4. dqs_tap_cnt: final IDELAY tap counts for all DQS IDELAYs
393 // 5. gate_tap_cnt: final IDELAY tap counts for all DQS gate
394 // synchronization IDELAYs
395 // 6. rd_data_sel: final read capture MUX (either "positive" or "negative"
396 // edge capture) settings for all DQS groups
397 // 7. rden_dly: related to # of cycles after issuing a read until when
398 // read data is valid - for all DQS groups
399 // 8. gate_dly: related to # of cycles after issuing a read until when
400 // clock enable for all DQ's is deasserted to prevent
401 // effect of DQS postamble glitch - for all DQS groups
402 //***************************************************************************
404 //*****************************************************************
405 // Record IDELAY tap values by "snooping" IDELAY control signals
406 //*****************************************************************
408 // record DQ IDELAY tap values
411 for (dbg_dq_tc_i = 0; dbg_dq_tc_i < DQ_WIDTH;
412 dbg_dq_tc_i = dbg_dq_tc_i + 1) begin: gen_dbg_dq_tap_cnt
413 assign dbg_calib_dq_tap_cnt[(6*dbg_dq_tc_i)+5:(6*dbg_dq_tc_i)]
414 = dbg_dq_tap_cnt[dbg_dq_tc_i];
415 always @(posedge clkdiv)
416 if (rstdiv | dlyrst_dq)
417 dbg_dq_tap_cnt[dbg_dq_tc_i] <= 6'b000000;
419 if (dlyce_dq[dbg_dq_tc_i])
420 if (dlyinc_dq[dbg_dq_tc_i])
421 dbg_dq_tap_cnt[dbg_dq_tc_i]
422 <= dbg_dq_tap_cnt[dbg_dq_tc_i] + 1;
424 dbg_dq_tap_cnt[dbg_dq_tc_i]
425 <= dbg_dq_tap_cnt[dbg_dq_tc_i] - 1;
429 // record DQS IDELAY tap values
432 for (dbg_dqs_tc_i = 0; dbg_dqs_tc_i < DQS_WIDTH;
433 dbg_dqs_tc_i = dbg_dqs_tc_i + 1) begin: gen_dbg_dqs_tap_cnt
434 assign dbg_calib_dqs_tap_cnt[(6*dbg_dqs_tc_i)+5:(6*dbg_dqs_tc_i)]
435 = dbg_dqs_tap_cnt[dbg_dqs_tc_i];
436 always @(posedge clkdiv)
437 if (rstdiv | dlyrst_dqs)
438 dbg_dqs_tap_cnt[dbg_dqs_tc_i] <= 6'b000000;
440 if (dlyce_dqs[dbg_dqs_tc_i])
441 if (dlyinc_dqs[dbg_dqs_tc_i])
442 dbg_dqs_tap_cnt[dbg_dqs_tc_i]
443 <= dbg_dqs_tap_cnt[dbg_dqs_tc_i] + 1;
445 dbg_dqs_tap_cnt[dbg_dqs_tc_i]
446 <= dbg_dqs_tap_cnt[dbg_dqs_tc_i] - 1;
450 // record DQS gate IDELAY tap values
451 genvar dbg_gate_tc_i;
453 for (dbg_gate_tc_i = 0; dbg_gate_tc_i < DQS_WIDTH;
454 dbg_gate_tc_i = dbg_gate_tc_i + 1) begin: gen_dbg_gate_tap_cnt
455 assign dbg_calib_gate_tap_cnt[(6*dbg_gate_tc_i)+5:(6*dbg_gate_tc_i)]
456 = dbg_gate_tap_cnt[dbg_gate_tc_i];
457 always @(posedge clkdiv)
458 if (rstdiv | dlyrst_gate[dbg_gate_tc_i])
459 dbg_gate_tap_cnt[dbg_gate_tc_i] <= 6'b000000;
461 if (dlyce_gate[dbg_gate_tc_i])
462 if (dlyinc_gate[dbg_gate_tc_i])
463 dbg_gate_tap_cnt[dbg_gate_tc_i]
464 <= dbg_gate_tap_cnt[dbg_gate_tc_i] + 1;
466 dbg_gate_tap_cnt[dbg_gate_tc_i]
467 <= dbg_gate_tap_cnt[dbg_gate_tc_i] - 1;
471 assign dbg_calib_done = calib_done;
472 assign dbg_calib_err = calib_err;
473 assign dbg_calib_rd_data_sel = cal2_rd_data_sel;
474 assign dbg_calib_rden_dly = rden_dly;
475 assign dbg_calib_gate_dly = gate_dly;
477 //***************************************************************************
478 // Read data pipelining, and read data "ISERDES" data width expansion
479 //***************************************************************************
481 // For all data bits, register incoming capture data to slow clock to improve
482 // timing. Adding single pipeline stage does not affect functionality (as
483 // long as we make sure to wait extra clock cycle after changing DQ IDELAY)
484 // Also note in this case that we're "missing" every other clock cycle's
485 // worth of data capture since we're sync'ing to the slow clock. This is
486 // fine for stage 1 and stage 2 cal, but not for stage 3 and 4 (see below
487 // for different circuit to handle those stages)
488 always @(posedge clkdiv) begin
489 rd_data_rise_1x_r <= rd_data_rise;
490 rd_data_fall_1x_r <= rd_data_fall;
493 // For every DQ_PER_DQS bit, generate what is essentially a ISERDES-type
494 // data width expander. Will need this for stage 3 and 4 cal, where we need
495 // to compare data over consecutive clock cycles. We can also use this for
496 // stage 2 as well (stage 2 doesn't require every bit to be looked at, only
497 // one bit per DQS group)
500 for (rdd_i = 0; rdd_i < DQS_WIDTH; rdd_i = rdd_i + 1) begin: gen_rdd
501 // first stage: keep data in fast clk domain. Store data over two
502 // consecutive clock cycles for rise/fall data for proper transfer
503 // to slow clock domain
504 always @(posedge clk) begin
505 rd_data_rise_2x_r[rdd_i] <= rd_data_rise[(rdd_i*DQ_PER_DQS)];
506 rd_data_fall_2x_r[rdd_i] <= rd_data_fall[(rdd_i*DQ_PER_DQS)];
508 // second stage, register first stage to slow clock domain, 2nd stage
509 // consists of both these flops, and the rd_data_rise_1x_r flops
510 always @(posedge clkdiv) begin
511 rd_data_rise_1x_r1[rdd_i] <= rd_data_rise_2x_r[rdd_i];
512 rd_data_fall_1x_r1[rdd_i] <= rd_data_fall_2x_r[rdd_i];
514 // now we have four outputs - representing rise/fall outputs over last
515 // 2 fast clock cycles. However, the ordering these represent can either
516 // be: (1) Q2 = data @ time = n, Q1 = data @ time = n+1, or (2)
517 // Q2 = data @ time = n - 1, Q1 = data @ time = n (and data at [Q1,Q2]
518 // is "staggered") - leave it up to the stage of calibration using this
519 // to figure out which is which, if they care at all (e.g. stage 2 cal
520 // doesn't care about the ordering)
521 assign rd_data_rise_chk_q1[rdd_i]
522 = rd_data_rise_1x_r[(rdd_i*DQ_PER_DQS)];
523 assign rd_data_rise_chk_q2[rdd_i]
524 = rd_data_rise_1x_r1[rdd_i];
525 assign rd_data_fall_chk_q1[rdd_i]
526 = rd_data_fall_1x_r[(rdd_i*DQ_PER_DQS)];
527 assign rd_data_fall_chk_q2[rdd_i]
528 = rd_data_fall_1x_r1[rdd_i];
532 //*****************************************************************
533 // Outputs of these simplified ISERDES circuits then feed MUXes based on
534 // which DQ the current calibration algorithm needs to look at
535 //*****************************************************************
537 // generate MUX control; assume that adding an extra pipeline stage isn't
538 // an issue - whatever stage cal logic is using output of MUX will wait
539 // enough time after changing it
540 always @(posedge clkdiv) begin
541 (* full_case, parallel_case *) case (calib_done[2:0])
542 3'b001: rdd_mux_sel <= next_count_dqs;
543 3'b011: rdd_mux_sel <= count_rden;
544 3'b111: rdd_mux_sel <= next_count_gate;
548 always @(posedge clkdiv) begin
549 rdd_rise_q1 <= rd_data_rise_chk_q1[rdd_mux_sel];
550 rdd_rise_q2 <= rd_data_rise_chk_q2[rdd_mux_sel];
551 rdd_fall_q1 <= rd_data_fall_chk_q1[rdd_mux_sel];
552 rdd_fall_q2 <= rd_data_fall_chk_q2[rdd_mux_sel];
555 //***************************************************************************
556 // Demultiplexor to control (reset, increment, decrement) IDELAY tap values
558 // STG1: for per-bit-deskew, only inc/dec the current DQ. For non-per
559 // deskew, increment all bits in the current DQS set
560 // STG2: inc/dec all DQ's in the current DQS set.
561 // NOTE: Nice to add some error checking logic here (or elsewhere in the
562 // code) to check if logic attempts to overflow tap value
563 //***************************************************************************
565 // don't use DLYRST to reset value of IDELAY after reset. Need to change this
566 // if we want to allow user to recalibrate after initial reset
567 always @(posedge clkdiv)
576 always @(posedge clkdiv) begin
588 // stage 1 cal: change only specified DQ
589 if (cal1_dlyce_dq) begin
590 if (SIM_ONLY == 0) begin
591 dlyce_dq[count_dq] <= 1'b1;
592 dlyinc_dq[count_dq] <= cal1_dlyinc_dq;
594 // if simulation, then calibrate only first DQ, apply results
595 // to all DQs (i.e. assume delay on all DQs is the same)
596 for (i = 0; i < DQ_WIDTH; i = i + 1) begin: loop_sim_dq_dly
598 dlyinc_dq[i] <= cal1_dlyinc_dq;
601 end else if (cal2_dlyce_dqs) begin
602 // stage 2 cal: change DQS and all corresponding DQ's
603 if (SIM_ONLY == 0) begin
604 dlyce_dqs[count_dqs] <= 1'b1;
605 dlyinc_dqs[count_dqs] <= cal2_dlyinc_dqs;
606 for (i = 0; i < DQ_PER_DQS; i = i + 1) begin: loop_dqs_dly
607 dlyce_dq[(DQ_PER_DQS*count_dqs)+i] <= 1'b1;
608 dlyinc_dq[(DQ_PER_DQS*count_dqs)+i] <= cal2_dlyinc_dqs;
611 for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_dqs_dly
612 // if simulation, then calibrate only first DQS
613 dlyce_dqs[i] <= 1'b1;
614 dlyinc_dqs[i] <= cal2_dlyinc_dqs;
615 for (j = 0; j < DQ_PER_DQS; j = j + 1) begin: loop_sim_dq_dqs_dly
616 dlyce_dq[(DQ_PER_DQS*i)+j] <= 1'b1;
617 dlyinc_dq[(DQ_PER_DQS*i)+j] <= cal2_dlyinc_dqs;
621 end else if (DEBUG_EN != 0) begin
622 // DEBUG: allow user to vary IDELAY tap settings
623 // For DQ IDELAY taps
624 if (dbg_idel_up_all || dbg_idel_down_all ||
625 dbg_sel_all_idel_dq) begin
626 for (x = 0; x < DQ_WIDTH; x = x + 1) begin: loop_dly_inc_dq
627 dlyce_dq[x] <= dbg_idel_up_all | dbg_idel_down_all |
628 dbg_idel_up_dq | dbg_idel_down_dq;
629 dlyinc_dq[x] <= dbg_idel_up_all | dbg_idel_up_dq;
633 dlyce_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq |
635 dlyinc_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq;
637 // For DQS IDELAY taps
638 if (dbg_idel_up_all || dbg_idel_down_all ||
639 dbg_sel_all_idel_dqs) begin
640 for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_dqs
641 dlyce_dqs[x] <= dbg_idel_up_all | dbg_idel_down_all |
642 dbg_idel_up_dqs | dbg_idel_down_dqs;
643 dlyinc_dqs[x] <= dbg_idel_up_all | dbg_idel_up_dqs;
647 dlyce_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs |
649 dlyinc_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs;
655 // GATE synchronization is handled directly by Stage 4 calibration FSM
656 always @(posedge clkdiv)
658 dlyrst_gate <= {DQS_WIDTH{1'b1}};
659 dlyce_gate <= {DQS_WIDTH{1'b0}};
660 dlyinc_gate <= {DQS_WIDTH{1'b0}};
662 dlyrst_gate <= {DQS_WIDTH{1'b0}};
663 dlyce_gate <= {DQS_WIDTH{1'b0}};
664 dlyinc_gate <= {DQS_WIDTH{1'b0}};
666 if (cal4_dlyrst_gate) begin
668 dlyrst_gate[count_gate] <= 1'b1;
670 for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly_rst
671 dlyrst_gate[i] <= 1'b1;
675 if (cal4_dlyce_gate) begin
676 if (SIM_ONLY == 0) begin
677 dlyce_gate[count_gate] <= 1'b1;
678 dlyinc_gate[count_gate] <= cal4_dlyinc_gate;
680 // if simulation, then calibrate only first gate
681 for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly
682 dlyce_gate[i] <= 1'b1;
683 dlyinc_gate[i] <= cal4_dlyinc_gate;
686 end else if (DEBUG_EN != 0) begin
687 // DEBUG: allow user to vary IDELAY tap settings
688 if (dbg_idel_up_all || dbg_idel_down_all ||
689 dbg_sel_all_idel_gate) begin
690 for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_gate
691 dlyce_gate[x] <= dbg_idel_up_all | dbg_idel_down_all |
692 dbg_idel_up_gate | dbg_idel_down_gate;
693 dlyinc_gate[x] <= dbg_idel_up_all | dbg_idel_up_gate;
696 dlyce_gate <= {DQS_WIDTH{1'b0}};
697 dlyce_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate |
699 dlyinc_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate;
704 //***************************************************************************
705 // signal to tell calibration state machines to wait and give IDELAY time to
706 // settle after it's value is changed (both time for IDELAY chain to settle,
707 // and for settled output to propagate through ISERDES). For general use: use
708 // for any calibration state machines that modify any IDELAY.
709 // Should give at least enough time for IDELAY output to settle (technically
710 // for V5, this should be "glitchless" when IDELAY taps are changed, so don't
711 // need any time here), and also time for new data to propagate through both
712 // ISERDES and the "RDD" MUX + associated pipelining
713 // For now, give very "generous" delay - doesn't really matter since only
714 // needed during calibration
715 //***************************************************************************
717 // determine if calibration polarity has changed
718 always @(posedge clkdiv)
719 cal2_rd_data_sel_r <= cal2_rd_data_sel;
721 assign cal2_rd_data_sel_edge = |(cal2_rd_data_sel ^ cal2_rd_data_sel_r);
723 // combine requests to modify any of the IDELAYs into one. Also when second
724 // stage capture "edge" polarity is changed (IDELAY isn't changed in this
725 // case, but use the same counter to stall cal logic)
726 assign dlyce_or = cal1_dlyce_dq |
728 cal2_rd_data_sel_edge |
732 // SYN_NOTE: Can later recode to avoid combinational path
733 assign idel_set_wait = dlyce_or || (idel_set_cnt != IDEL_SET_VAL);
735 always @(posedge clkdiv)
737 idel_set_cnt <= 4'b0000;
739 idel_set_cnt <= 4'b0000;
740 else if (idel_set_cnt != IDEL_SET_VAL)
741 idel_set_cnt <= idel_set_cnt + 1;
743 // generate request to PHY_INIT logic to issue auto-refresh
744 // used by certain states to force prech/auto-refresh part way through
745 // calibration to avoid a tRAS violation (which will happen if that
746 // stage of calibration lasts long enough). This signal must meet the
747 // following requirements: (1) only transition from 0->1 when the refresh
748 // request is needed, (2) stay at 1 and only transition 1->0 when
749 // CALIB_REF_DONE is asserted
750 always @(posedge clkdiv)
752 calib_ref_req <= 1'b0;
754 calib_ref_req <= cal1_ref_req | cal2_ref_req | cal4_ref_req;
756 // stage 1 calibration requests auto-refresh every 4 bits
758 if (DQ_BITS < 2) begin: gen_cal1_refresh_dq_lte4
759 assign cal1_refresh = 1'b0;
760 end else begin: gen_cal1_refresh_dq_gt4
761 assign cal1_refresh = (next_count_dq[1:0] == 2'b00);
765 //***************************************************************************
766 // First stage calibration: DQ-DQS
768 // edge: detected when varying IDELAY, and current capture data != prev
770 // valid bit window: detected when current capture data == prev capture
771 // data for more than half the bit time
772 // starting conditions for DQS-DQ phase:
773 // case 1: when DQS starts somewhere in rising edge bit window, or
774 // on the right edge of the rising bit window.
775 // case 2: when DQS starts somewhere in falling edge bit window, or
776 // on the right edge of the falling bit window.
777 // Algorithm Description:
778 // 1. Increment DQ IDELAY until we find an edge.
779 // 2. While we're finding the first edge, note whether a valid bit window
780 // has been detected before we found an edge. If so, then figure out if
781 // this is the rising or falling bit window. If rising, then our starting
782 // DQS-DQ phase is case 1. If falling, then it's case 2. If don't detect
783 // a valid bit window, then we must have started on the edge of a window.
784 // Need to wait until later on to decide which case we are.
785 // - Store FIRST_EDGE IDELAY value
786 // 3. Now look for second edge.
787 // 4. While we're finding the second edge, note whether valid bit window
788 // is detected. If so, then use to, along with results from (2) to figure
789 // out what the starting case is. If in rising bit window, then we're in
790 // case 2. If falling, then case 1.
791 // - Store SECOND_EDGE IDELAY value
793 // a. Finding two edges allows us to calculate the bit time (although
794 // not the "same" bit time polarity - need to investigate this
796 // b. If we run out of taps looking for the second edge, then the bit
797 // time must be too long (>= 2.5ns, and DQS-DQ starting phase must be
799 // 5. Calculate absolute amount to delay DQ as:
800 // If second edge found, and case 1:
801 // - DQ_IDELAY = FIRST_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE)
802 // If second edge found, and case 2:
803 // - DQ_IDELAY = SECOND_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE)
804 // If second edge not found, then need to make an approximation on
805 // how much to shift by (should be okay, because we have more timing
807 // - DQ_IDELAY = FIRST_EDGE - 0.5 * (bit_time)
808 // NOTE: Does this account for either case 1 or case 2?????
809 // NOTE: It's also possible even when we find the second edge, that
810 // to instead just use half the bit time to subtract from either
811 // FIRST or SECOND_EDGE. Finding the actual bit time (which is
812 // what (SECOND_EDGE - FIRST_EDGE) is, is slightly more accurate,
813 // since it takes into account duty cycle distortion.
814 // 6. Repeat for each DQ in current DQS set.
815 //***************************************************************************
817 //*****************************************************************
818 // for first stage calibration - used for checking if DQS is aligned to the
819 // particular DQ, such that we're in the data valid window. Basically, this
821 // = [falling data, rising data]
822 // = [0, 1] = rising DQS aligned in proper (rising edge) bit window
823 // = [1, 0] = rising DQS aligned in wrong (falling edge) bit window
824 // = [0, 0], or [1,1] = in uncertain region between windows
825 //*****************************************************************
827 // SYN_NOTE: May have to split this up into multiple levels - MUX can get
828 // very wide - as wide as the data bus width
829 always @(posedge clkdiv)
830 cal1_data_chk_r <= {rd_data_fall_1x_r[next_count_dq],
831 rd_data_rise_1x_r[next_count_dq]};
833 //*****************************************************************
834 // determine when an edge has occurred - when either the current value
835 // is different from the previous latched value or when the DATA_CHK
836 // outputs are the same (rare, but indicates that we're at an edge)
837 // This is only valid when the IDELAY output and propagation of the
838 // data through the capture flops has had a chance to settle out.
839 //*****************************************************************
841 // write CAL1_DETECT_EDGE and CAL1_DETECT_STABLE in such a way that
842 // if X's are captured on the bus during functional simulation, that
843 // the logic will register this as an edge detected. Do this to allow
844 // use of this HDL with Denali memory models (Denali models drive DQ
845 // to X's on both edges of the data valid window to simulate jitter)
846 // This is only done for functional simulation purposes. **Should not**
847 // make the final synthesized logic more complicated, but it does make
848 // the HDL harder to understand b/c we have to "phrase" the logic
849 // slightly differently than when not worrying about X's
851 // no edge found if: (1) we have recorded prev edge, and rise
852 // data == fall data, (2) we haven't yet recorded prev edge, but
853 // rise/fall data is equal to either [0,1] or [1,0] (i.e. rise/fall
854 // data isn't either X's, or [0,0] or [1,1], which indicates we're
855 // in the middle of an edge, since normally rise != fall data for stg1)
856 if ((cal1_data_chk_last_valid &&
857 (cal1_data_chk_r == cal1_data_chk_last)) ||
858 (!cal1_data_chk_last_valid &&
859 ((cal1_data_chk_r == 2'b01) || (cal1_data_chk_r == 2'b10))))
860 cal1_detect_edge = 1'b0;
862 cal1_detect_edge = 1'b1;
866 // assert if we've found a region where data valid window is stable
867 // over consecutive IDELAY taps, and either rise/fall = [1,0], or [0,1]
868 if ((cal1_data_chk_last_valid &&
869 (cal1_data_chk_r == cal1_data_chk_last)) &&
870 ((cal1_data_chk_r == 2'b01) || (cal1_data_chk_r == 2'b10)))
871 cal1_detect_stable <= 1'b1;
873 cal1_detect_stable <= 1'b0;
876 //*****************************************************************
877 // Find valid window: keep track of how long we've been in the same data
878 // window. If it's been long enough, then declare that we've found a valid
879 // window. Also returns whether we found a rising or falling window (only
880 // valid when found_window is asserted)
881 //*****************************************************************
883 always @(posedge clkdiv) begin
884 if (cal1_state == CAL1_INIT) begin
885 cal1_window_cnt <= 4'b0000;
886 cal1_found_window <= 1'b0;
887 cal1_found_rising <= 1'bx;
888 end else if (!cal1_data_chk_last_valid) begin
889 // if we haven't stored a previous value of CAL1_DATA_CHK (or it got
890 // invalidated because we detected an edge, and are now looking for the
891 // second edge), then make sure FOUND_WINDOW deasserted on following
892 // clock edge (to avoid finding a false window immediately after finding
893 // an edge). Note that because of jitter, it's possible to not find an
894 // edge at the end of the IDELAY increment settling time, but to find an
895 // edge on the next clock cycle (e.g. during CAL1_FIND_FIRST_EDGE)
896 cal1_window_cnt <= 4'b0000;
897 cal1_found_window <= 1'b0;
898 cal1_found_rising <= 1'bx;
899 end else if (((cal1_state == CAL1_FIRST_EDGE_IDEL_WAIT) ||
900 (cal1_state == CAL1_SECOND_EDGE_IDEL_WAIT)) &&
901 !idel_set_wait) begin
902 // while finding the first and second edges, see if we can detect a
903 // stable bit window (occurs over MIN_WIN_SIZE number of taps). If
904 // so, then we're away from an edge, and can conclusively determine the
905 // starting DQS-DQ phase.
906 if (cal1_detect_stable) begin
907 cal1_window_cnt <= cal1_window_cnt + 1;
908 if (cal1_window_cnt == MIN_WIN_SIZE-1) begin
909 cal1_found_window <= 1'b1;
910 if (cal1_data_chk_r == 2'b01)
911 cal1_found_rising <= 1'b1;
913 cal1_found_rising <= 1'b0;
916 // otherwise, we're not in a data valid window, reset the window
917 // counter, and indicate we're not currently in window. This should
918 // happen by design at least once after finding the first edge.
919 cal1_window_cnt <= 4'b0000;
920 cal1_found_window <= 1'b0;
921 cal1_found_rising <= 1'bx;
926 //*****************************************************************
927 // keep track of edge tap counts found, and whether we've
928 // incremented to the maximum number of taps allowed
929 //*****************************************************************
931 always @(posedge clkdiv)
932 if (cal1_state == CAL1_INIT) begin
933 cal1_idel_tap_limit_hit <= 1'b0;
934 cal1_idel_tap_cnt <= 6'b000000;
935 end else if (cal1_dlyce_dq) begin
936 if (cal1_dlyinc_dq) begin
937 cal1_idel_tap_cnt <= cal1_idel_tap_cnt + 1;
938 cal1_idel_tap_limit_hit <= (cal1_idel_tap_cnt == 6'b111110);
940 cal1_idel_tap_cnt <= cal1_idel_tap_cnt - 1;
941 cal1_idel_tap_limit_hit <= 1'b0;
945 //*****************************************************************
946 // Pipeline for better timing - amount to decrement by if second
948 //*****************************************************************
949 // if only one edge found (possible for low frequencies), then:
950 // 1. Assume starting DQS-DQ phase has DQS in DQ window (aka "case 1")
951 // 2. We have to decrement by (63 - first_edge_tap_cnt) + (BIT_TIME_TAPS/2)
952 // (i.e. decrement by 63-first_edge_tap_cnt to get to right edge of
953 // DQ window. Then decrement again by (BIT_TIME_TAPS/2) to get to center
955 // 3. Clamp the above value at 63 to ensure we don't underflow IDELAY
956 // (note: clamping happens in the CAL1 state machine)
957 always @(posedge clkdiv)
958 cal1_low_freq_idel_dec
959 <= (7'b0111111 - {1'b0, cal1_first_edge_tap_cnt}) +
962 //*****************************************************************
963 // Keep track of max taps used during stage 1, use this to limit
964 // the number of taps that can be used in stage 2
965 //*****************************************************************
967 always @(posedge clkdiv)
969 cal1_idel_max_tap <= 6'b000000;
970 cal1_idel_max_tap_we <= 1'b0;
972 // pipeline latch enable for CAL1_IDEL_MAX_TAP - we have plenty
973 // of time, tap count gets updated, then dead cycles waiting for
974 // IDELAY output to settle
975 cal1_idel_max_tap_we <= (cal1_idel_max_tap < cal1_idel_tap_cnt);
976 // record maximum # of taps used for stg 1 cal
977 if ((cal1_state == CAL1_DONE) && cal1_idel_max_tap_we)
978 cal1_idel_max_tap <= cal1_idel_tap_cnt;
981 //*****************************************************************
983 always @(posedge clkdiv)
985 calib_done[0] <= 1'b0;
986 calib_done_tmp[0] <= 1'bx;
987 calib_err[0] <= 1'b0;
988 count_dq <= {DQ_BITS{1'b0}};
989 next_count_dq <= {DQ_BITS{1'b0}};
990 cal1_bit_time_tap_cnt <= 6'bxxxxxx;
991 cal1_data_chk_last <= 2'bxx;
992 cal1_data_chk_last_valid <= 1'bx;
993 cal1_dlyce_dq <= 1'b0;
994 cal1_dlyinc_dq <= 1'b0;
995 cal1_dqs_dq_init_phase <= 1'bx;
996 cal1_first_edge_done <= 1'bx;
997 cal1_found_second_edge <= 1'bx;
998 cal1_first_edge_tap_cnt <= 6'bxxxxxx;
999 cal1_idel_dec_cnt <= 7'bxxxxxxx;
1000 cal1_idel_inc_cnt <= 6'bxxxxxx;
1001 cal1_ref_req <= 1'b0;
1002 cal1_state <= CAL1_IDLE;
1004 // default values for all "pulse" outputs
1005 cal1_ref_req <= 1'b0;
1006 cal1_dlyce_dq <= 1'b0;
1007 cal1_dlyinc_dq <= 1'b0;
1011 count_dq <= {DQ_BITS{1'b0}};
1012 next_count_dq <= {DQ_BITS{1'b0}};
1013 if (calib_start[0]) begin
1014 calib_done[0] <= 1'b0;
1015 calib_done_tmp[0] <= 1'b0;
1016 cal1_state <= CAL1_INIT;
1021 cal1_data_chk_last_valid <= 1'b0;
1022 cal1_found_second_edge <= 1'b0;
1023 cal1_dqs_dq_init_phase <= 1'b0;
1024 cal1_idel_inc_cnt <= 6'b000000;
1025 cal1_state <= CAL1_INC_IDEL;
1028 // increment DQ IDELAY so that either: (1) DQS starts somewhere in
1029 // first rising DQ window, or (2) DQS starts in first falling DQ
1030 // window. The amount to shift is frequency dependent (and is either
1031 // precalculated by MIG or possibly adjusted by the user)
1033 if ((cal1_idel_inc_cnt == DQ_IDEL_INIT) && !idel_set_wait) begin
1034 cal1_state <= CAL1_FIND_FIRST_EDGE;
1035 end else if (cal1_idel_inc_cnt != DQ_IDEL_INIT) begin
1036 cal1_idel_inc_cnt <= cal1_idel_inc_cnt + 1;
1037 cal1_dlyce_dq <= 1'b1;
1038 cal1_dlyinc_dq <= 1'b1;
1041 // look for first edge
1042 CAL1_FIND_FIRST_EDGE: begin
1043 // Determine DQS-DQ phase if we can detect enough of a valid window
1044 if (cal1_found_window)
1045 cal1_dqs_dq_init_phase <= ~cal1_found_rising;
1046 // find first edge - if found then record position
1047 if (cal1_detect_edge) begin
1048 cal1_state <= CAL1_FOUND_FIRST_EDGE_WAIT;
1049 cal1_first_edge_done <= 1'b0;
1050 cal1_first_edge_tap_cnt <= cal1_idel_tap_cnt;
1051 cal1_data_chk_last_valid <= 1'b0;
1053 // otherwise, store the current value of DATA_CHK, increment
1054 // DQ IDELAY, and compare again
1055 cal1_state <= CAL1_FIRST_EDGE_IDEL_WAIT;
1056 cal1_data_chk_last <= cal1_data_chk_r;
1057 // avoid comparing against DATA_CHK_LAST for previous iteration
1058 cal1_data_chk_last_valid <= 1'b1;
1059 cal1_dlyce_dq <= 1'b1;
1060 cal1_dlyinc_dq <= 1'b1;
1064 // wait for DQ IDELAY to settle
1065 CAL1_FIRST_EDGE_IDEL_WAIT:
1067 cal1_state <= CAL1_FIND_FIRST_EDGE;
1069 // delay state between finding first edge and looking for second
1070 // edge. Necessary in order to invalidate CAL1_FOUND_WINDOW before
1071 // starting to look for second edge
1072 CAL1_FOUND_FIRST_EDGE_WAIT:
1073 cal1_state <= CAL1_FIND_SECOND_EDGE;
1075 // Try and find second edge
1076 CAL1_FIND_SECOND_EDGE: begin
1077 // When looking for 2nd edge, first make sure data stabilized (by
1078 // detecting valid data window) - needed to avoid false edges
1079 if (cal1_found_window) begin
1080 cal1_first_edge_done <= 1'b1;
1081 cal1_dqs_dq_init_phase <= cal1_found_rising;
1083 // exit if run out of taps to increment
1084 if (cal1_idel_tap_limit_hit)
1085 cal1_state <= CAL1_CALC_IDEL;
1087 // found second edge, record the current edge count
1088 if (cal1_first_edge_done && cal1_detect_edge) begin
1089 cal1_state <= CAL1_CALC_IDEL;
1090 cal1_found_second_edge <= 1'b1;
1091 cal1_bit_time_tap_cnt <= cal1_idel_tap_cnt -
1092 cal1_first_edge_tap_cnt + 1;
1094 cal1_state <= CAL1_SECOND_EDGE_IDEL_WAIT;
1095 cal1_data_chk_last <= cal1_data_chk_r;
1096 cal1_data_chk_last_valid <= 1'b1;
1097 cal1_dlyce_dq <= 1'b1;
1098 cal1_dlyinc_dq <= 1'b1;
1103 // wait for DQ IDELAY to settle, then store ISERDES output
1104 CAL1_SECOND_EDGE_IDEL_WAIT:
1106 cal1_state <= CAL1_FIND_SECOND_EDGE;
1108 // pipeline delay state to calculate amount to decrement DQ IDELAY
1109 // NOTE: We're calculating the amount to decrement by, not the
1110 // absolute setting for DQ IDELAY
1111 CAL1_CALC_IDEL: begin
1112 // if two edges found
1113 if (cal1_found_second_edge)
1114 // case 1: DQS was in DQ window to start with. First edge found
1115 // corresponds to left edge of DQ rising window. Backup by 1.5*BT
1116 // NOTE: In this particular case, it is possible to decrement
1117 // "below 0" in the case where DQS delay is less than 0.5*BT,
1118 // need to limit decrement to prevent IDELAY tap underflow
1119 if (!cal1_dqs_dq_init_phase)
1120 cal1_idel_dec_cnt <= {1'b0, cal1_bit_time_tap_cnt} +
1121 {1'b0, (cal1_bit_time_tap_cnt >> 1)};
1122 // case 2: DQS was in wrong DQ window (in DQ falling window).
1123 // First edge found is right edge of DQ rising window. Second
1124 // edge is left edge of DQ rising window. Backup by 0.5*BT
1126 cal1_idel_dec_cnt <= {1'b0, (cal1_bit_time_tap_cnt >> 1)};
1127 // if only one edge found - assume will always be case 1 - DQS in
1128 // DQS window. Case 2 only possible if path delay on DQS > 5ns
1130 cal1_idel_dec_cnt <= cal1_low_freq_idel_dec;
1131 cal1_state <= CAL1_DEC_IDEL;
1134 // decrement DQ IDELAY for final adjustment
1136 // once adjustment is complete, we're done with calibration for
1137 // this DQ, now return to IDLE state and repeat for next DQ
1138 // Add underflow protection for case of 2 edges found and DQS
1139 // starting in DQ window (see comments for above state) - note we
1140 // have to take into account delayed value of CAL1_IDEL_TAP_CNT -
1141 // gets updated one clock cycle after CAL1_DLYCE/INC_DQ
1142 if ((cal1_idel_dec_cnt == 7'b0000000) ||
1143 (cal1_dlyce_dq && (cal1_idel_tap_cnt == 6'b000001))) begin
1144 cal1_state <= CAL1_DONE;
1145 // stop when all DQ's calibrated, or DQ[0] cal'ed (for sim)
1146 if ((count_dq == DQ_WIDTH-1) || (SIM_ONLY != 0))
1147 calib_done_tmp[0] <= 1'b1;
1149 // need for VHDL simulation to prevent out-of-index error
1150 next_count_dq <= count_dq + 1;
1152 // keep decrementing until final tap count reached
1153 cal1_idel_dec_cnt <= cal1_idel_dec_cnt - 1;
1154 cal1_dlyce_dq <= 1'b1;
1155 cal1_dlyinc_dq <= 1'b0;
1158 // delay state to allow count_dq and DATA_CHK to point to the next
1159 // DQ bit (allows us to potentially begin checking for an edge on
1160 // next DQ right away).
1162 if (!idel_set_wait) begin
1163 count_dq <= next_count_dq;
1164 if (calib_done_tmp[0]) begin
1165 calib_done[0] <= 1'b1;
1166 cal1_state <= CAL1_IDLE;
1168 // request auto-refresh after every 8-bits calibrated to
1169 // avoid tRAS violation
1170 if (cal1_refresh) begin
1171 cal1_ref_req <= 1'b1;
1173 cal1_state <= CAL1_INIT;
1175 // if no need this time for refresh, proceed to next bit
1176 cal1_state <= CAL1_INIT;
1182 //***************************************************************************
1183 // Second stage calibration: DQS-FPGA Clock
1184 // Algorithm Description:
1185 // 1. Assumes a training pattern that will produce a pattern oscillating at
1186 // half the core clock frequency each on rise and fall outputs, and such
1187 // that rise and fall outputs are 180 degrees out of phase from each
1188 // other. Note that since the calibration logic runs at half the speed
1189 // of the interface, expect that data sampled with the slow clock always
1190 // to be constant (either always = 1, or = 0, and rise data != fall data)
1191 // unless we cross the edge of the data valid window
1192 // 2. Start by setting RD_DATA_SEL = 0. This selects the rising capture data
1193 // sync'ed to rising edge of core clock, and falling edge data sync'ed
1194 // to falling edge of core clock
1195 // 3. Start looking for an edge. An edge is defined as either: (1) a
1196 // change in capture value or (2) an invalid capture value (e.g. rising
1197 // data != falling data for that same clock cycle).
1198 // 4. If an edge is found, go to step (6). If edge hasn't been found, then
1199 // set RD_DATA_SEL = 1, and try again.
1200 // 5. If no edge is found, then increment IDELAY and return to step (3)
1201 // 6. If an edge if found, then invert RD_DATA_SEL - this shifts the
1202 // capture point 180 degrees from the edge of the window (minus duty
1203 // cycle distortion, delay skew between rising/falling edge capture
1205 // 7. If no edge is found by CAL2_IDEL_TAP_LIMIT (= 63 - # taps used for
1206 // stage 1 calibration), then decrement IDELAY (without reinverting
1207 // RD_DATA_SEL) by CAL2_IDEL_TAP_LIMIT/2. This guarantees we at least
1208 // have CAL2_IDEL_TAP_LIMIT/2 of slack both before and after the
1209 // capture point (not optimal, but best we can do not having found an
1210 // of the window). This happens only for very low frequencies.
1211 // 8. Repeat for each DQS group.
1212 // NOTE: Step 6 is not optimal. A better (and perhaps more complicated)
1213 // algorithm might be to find both edges of the data valid window (using
1214 // the same polarity of RD_DATA_SEL), and then decrement to the midpoint.
1215 //***************************************************************************
1217 // RD_DATA_SEL should be tagged with FROM-TO (multi-cycle) constraint in
1218 // UCF file to relax timing. This net is "pseudo-static" (after value is
1219 // changed, FSM waits number of cycles before using the output).
1220 // Note that we are adding one clock cycle of delay (to isolate it from
1221 // the other logic CAL2_RD_DATA_SEL feeds), make sure FSM waits long
1222 // enough to compensate (by default it does, it waits a few cycles more
1223 // than minimum # of clock cycles)
1226 for (rd_i = 0; rd_i < DQS_WIDTH; rd_i = rd_i+1) begin: gen_rd_data_sel
1227 FDRSE u_ff_rd_data_sel
1229 .Q (rd_data_sel[rd_i]),
1232 .D (cal2_rd_data_sel[rd_i]),
1240 //*****************************************************************
1241 // Max number of taps used for stg2 cal dependent on number of taps
1242 // used for stg1 (give priority to stg1 cal - let it use as many
1243 // taps as it needs - the remainder of the IDELAY taps can be used
1245 //*****************************************************************
1247 always @(posedge clkdiv)
1248 cal2_idel_tap_limit <= 6'b111111 - cal1_idel_max_tap;
1250 //*****************************************************************
1251 // second stage calibration uses readback pattern of "1100" (i.e.
1252 // 1st rising = 1, 1st falling = 1, 2nd rising = 0, 2nd falling = 0)
1253 // only look at the first bit of each DQS group
1254 //*****************************************************************
1256 // deasserted when captured data has changed since IDELAY was
1257 // incremented, or when we're right on the edge (i.e. rise data =
1259 assign cal2_detect_edge =
1260 ((((rdd_rise_q1 != cal2_rd_data_rise_last_pos) ||
1261 (rdd_fall_q1 != cal2_rd_data_fall_last_pos)) &&
1262 cal2_rd_data_last_valid_pos && (!cal2_curr_sel)) ||
1263 (((rdd_rise_q1 != cal2_rd_data_rise_last_neg) ||
1264 (rdd_fall_q1 != cal2_rd_data_fall_last_neg)) &&
1265 cal2_rd_data_last_valid_neg && (cal2_curr_sel)) ||
1266 (rdd_rise_q1 != rdd_fall_q1));
1268 //*****************************************************************
1269 // keep track of edge tap counts found, and whether we've
1270 // incremented to the maximum number of taps allowed
1271 // NOTE: Assume stage 2 cal always increments the tap count (never
1272 // decrements) when searching for edge of the data valid window
1273 //*****************************************************************
1275 always @(posedge clkdiv)
1276 if (cal2_state == CAL2_INIT) begin
1277 cal2_idel_tap_limit_hit <= 1'b0;
1278 cal2_idel_tap_cnt <= 6'b000000;
1279 end else if (cal2_dlyce_dqs) begin
1280 cal2_idel_tap_cnt <= cal2_idel_tap_cnt + 1;
1281 cal2_idel_tap_limit_hit <= (cal2_idel_tap_cnt ==
1282 cal2_idel_tap_limit - 1);
1285 //*****************************************************************
1287 always @(posedge clkdiv)
1289 calib_done[1] <= 1'b0;
1290 calib_done_tmp[1] <= 1'bx;
1291 calib_err[1] <= 1'b0;
1293 next_count_dqs <= 'b0;
1294 cal2_dlyce_dqs <= 1'b0;
1295 cal2_dlyinc_dqs <= 1'b0;
1296 cal2_idel_dec_cnt <= 6'bxxxxxx;
1297 cal2_rd_data_last_valid_neg <= 1'bx;
1298 cal2_rd_data_last_valid_pos <= 1'bx;
1299 cal2_rd_data_sel <= 'b0;
1300 cal2_ref_req <= 1'b0;
1301 cal2_state <= CAL2_IDLE;
1303 cal2_ref_req <= 1'b0;
1304 cal2_dlyce_dqs <= 1'b0;
1305 cal2_dlyinc_dqs <= 1'b0;
1310 next_count_dqs <= 'b0;
1311 if (calib_start[1]) begin
1312 cal2_rd_data_sel <= {DQS_WIDTH{1'b0}};
1313 calib_done[1] <= 1'b0;
1314 calib_done_tmp[1] <= 1'b0;
1315 cal2_state <= CAL2_INIT;
1319 // Pass through this state every time we calibrate a new DQS group
1321 cal2_curr_sel <= 1'b0;
1322 cal2_rd_data_last_valid_neg <= 1'b0;
1323 cal2_rd_data_last_valid_pos <= 1'b0;
1324 cal2_state <= CAL2_INIT_IDEL_WAIT;
1327 // Stall state only used if calibration run more than once. Can take
1328 // this state out if design never runs calibration more than once.
1329 // We need this state to give time for MUX'ed data to settle after
1330 // resetting RD_DATA_SEL
1331 CAL2_INIT_IDEL_WAIT:
1333 cal2_state <= CAL2_FIND_EDGE_POS;
1335 // Look for an edge - first check "positive-edge" stage 2 capture
1336 CAL2_FIND_EDGE_POS: begin
1337 // if found an edge, then switch to the opposite edge stage 2
1338 // capture and we're done - no need to decrement the tap count,
1339 // since switching to the opposite edge will shift the capture
1340 // point by 180 degrees
1341 if (cal2_detect_edge) begin
1342 cal2_curr_sel <= 1'b1;
1343 cal2_state <= CAL2_DONE;
1344 // set all DQS groups to be the same for simulation
1346 cal2_rd_data_sel <= {DQS_WIDTH{1'b1}};
1348 cal2_rd_data_sel[count_dqs] <= 1'b1;
1349 if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
1350 calib_done_tmp[1] <= 1'b1;
1352 // MIG 2.1: Fix for simulation out-of-bounds error when
1353 // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
1354 next_count_dqs <= count_dqs + 1;
1356 // otherwise, invert polarity of stage 2 capture and look for
1357 // an edge with opposite capture clock polarity
1358 cal2_curr_sel <= 1'b1;
1359 cal2_rd_data_sel[count_dqs] <= 1'b1;
1360 cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_POS;
1361 cal2_rd_data_rise_last_pos <= rdd_rise_q1;
1362 cal2_rd_data_fall_last_pos <= rdd_fall_q1;
1363 cal2_rd_data_last_valid_pos <= 1'b1;
1367 // Give time to switch from positive-edge to negative-edge second
1368 // stage capture (need time for data to filter though pipe stages)
1369 CAL2_FIND_EDGE_IDEL_WAIT_POS:
1371 cal2_state <= CAL2_FIND_EDGE_NEG;
1373 // Look for an edge - check "negative-edge" stage 2 capture
1375 if (cal2_detect_edge) begin
1376 cal2_curr_sel <= 1'b0;
1377 cal2_state <= CAL2_DONE;
1378 // set all DQS groups to be the same for simulation
1380 cal2_rd_data_sel <= {DQS_WIDTH{1'b0}};
1382 cal2_rd_data_sel[count_dqs] <= 1'b0;
1383 if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
1384 calib_done_tmp[1] <= 1'b1;
1386 // MIG 2.1: Fix for simulation out-of-bounds error when
1387 // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
1388 next_count_dqs <= count_dqs + 1;
1389 end else if (cal2_idel_tap_limit_hit) begin
1390 // otherwise, if we've run out of taps, then immediately
1391 // backoff by half # of taps used - that's our best estimate
1392 // for optimal calibration point. Doesn't matter whether which
1393 // polarity we're using for capture (we don't know which one is
1395 cal2_idel_dec_cnt <= {1'b0, cal2_idel_tap_limit[5:1]};
1396 cal2_state <= CAL2_DEC_IDEL;
1397 if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
1398 calib_done_tmp[1] <= 1'b1;
1400 // MIG 2.1: Fix for simulation out-of-bounds error when
1401 // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
1402 next_count_dqs <= count_dqs + 1;
1404 // otherwise, increment IDELAY, and start looking for edge again
1405 cal2_curr_sel <= 1'b0;
1406 cal2_rd_data_sel[count_dqs] <= 1'b0;
1407 cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_NEG;
1408 cal2_rd_data_rise_last_neg <= rdd_rise_q1;
1409 cal2_rd_data_fall_last_neg <= rdd_fall_q1;
1410 cal2_rd_data_last_valid_neg <= 1'b1;
1411 cal2_dlyce_dqs <= 1'b1;
1412 cal2_dlyinc_dqs <= 1'b1;
1415 CAL2_FIND_EDGE_IDEL_WAIT_NEG:
1417 cal2_state <= CAL2_FIND_EDGE_POS;
1419 // if no edge found, then decrement by half # of taps used
1420 CAL2_DEC_IDEL: begin
1421 if (cal2_idel_dec_cnt == 6'b000000)
1422 cal2_state <= CAL2_DONE;
1424 cal2_idel_dec_cnt <= cal2_idel_dec_cnt - 1;
1425 cal2_dlyce_dqs <= 1'b1;
1426 cal2_dlyinc_dqs <= 1'b0;
1430 // delay state to allow count_dqs and ISERDES data to point to next
1431 // DQ bit (DQS group) before going to INIT
1433 if (!idel_set_wait) begin
1434 count_dqs <= next_count_dqs;
1435 if (calib_done_tmp[1]) begin
1436 calib_done[1] <= 1'b1;
1437 cal2_state <= CAL2_IDLE;
1439 // request auto-refresh after every DQS group calibrated to
1440 // avoid tRAS violation
1441 cal2_ref_req <= 1'b1;
1443 cal2_state <= CAL2_INIT;
1449 //***************************************************************************
1450 // Stage 3 calibration: Read Enable
1452 // read enable calibration determines the "round-trip" time (in # of CLK0
1453 // cycles) between when a read command is issued by the controller, and
1454 // when the corresponding read data is synchronized by into the CLK0 domain
1455 // this is a long delay chain to delay read enable signal from controller/
1456 // initialization logic (i.e. this is used for both initialization and
1457 // during normal controller operation). Stage 3 calibration logic decides
1458 // which delayed version is appropriate to use (which is affected by the
1459 // round trip delay of DQ/DQS) as a "valid" signal to tell rest of logic
1460 // when the captured data output from ISERDES is valid.
1461 //***************************************************************************
1463 //*****************************************************************
1464 // Delay chains: Use shift registers
1465 // Two sets of delay chains are used:
1466 // 1. One to delay RDEN from PHY_INIT module for calibration
1467 // purposes (delay required for RDEN for calibration is different
1468 // than during normal operation)
1469 // 2. One per DQS group to delay RDEN from controller for normal
1470 // operation - the value to delay for each DQS group can be different
1471 // as is determined during calibration
1472 //*****************************************************************
1474 //*****************************************************************
1475 // First delay chain, use only for calibration
1476 // input = asserted on rising edge of RDEN from PHY_INIT module
1477 //*****************************************************************
1479 always @(posedge clk) begin
1480 ctrl_rden_r <= ctrl_rden;
1481 phy_init_rden_r <= phy_init_rden;
1482 phy_init_rden_r1 <= phy_init_rden_r;
1483 calib_rden_edge_r <= phy_init_rden_r & ~phy_init_rden_r1;
1486 // Calibration shift register used for both Stage 3 and Stage 4 cal
1487 // (not strictly necessary for stage 4, but use as an additional check
1488 // to make sure we're checking for correct data on the right clock cycle)
1489 always @(posedge clkdiv)
1491 calib_rden_srl_a <= cal3_rden_srl_a;
1493 calib_rden_srl_a <= cal4_rden_srl_a;
1495 // Flops for targetting of multi-cycle path in UCF
1496 genvar cal_rden_ff_i;
1498 for (cal_rden_ff_i = 0; cal_rden_ff_i < 5;
1499 cal_rden_ff_i = cal_rden_ff_i+1) begin: gen_cal_rden_dly
1500 FDRSE u_ff_cal_rden_dly
1502 .Q (calib_rden_srl_a_r[cal_rden_ff_i]),
1505 .D (calib_rden_srl_a[cal_rden_ff_i]),
1513 SRLC32E u_calib_rden_srl
1515 .Q (calib_rden_srl_out),
1517 .A (calib_rden_srl_a_r),
1520 .D (calib_rden_edge_r)
1523 FDRSE u_calib_rden_srl_out_r
1525 .Q (calib_rden_srl_out_r),
1528 .D (calib_rden_srl_out),
1533 // convert to CLKDIV domain. Two version are generated because we need
1534 // to be able to tell exactly which fast (clk) clock cycle the read
1535 // enable was asserted in. Only one of CALIB_DATA_VALID or
1536 // CALIB_DATA_VALID_STGD will be asserted for any given shift value
1537 always @(posedge clk)
1538 calib_rden_srl_out_r1 <= calib_rden_srl_out_r;
1540 always @(posedge clkdiv) begin
1541 calib_rden_valid <= calib_rden_srl_out_r;
1542 calib_rden_valid_stgd <= calib_rden_srl_out_r1;
1545 //*****************************************************************
1546 // Second set of delays chain, use for normal reads
1547 // input = RDEN from controller
1548 //*****************************************************************
1550 // Flops for targetting of multi-cycle path in UCF
1553 for (rden_ff_i = 0; rden_ff_i < 5*DQS_WIDTH;
1554 rden_ff_i = rden_ff_i+1) begin: gen_rden_dly
1557 .Q (rden_dly_r[rden_ff_i]),
1560 .D (rden_dly[rden_ff_i]),
1568 // NOTE: Comment this section explaining purpose of SRL's
1571 for (rden_i = 0; rden_i < DQS_WIDTH; rden_i = rden_i + 1) begin: gen_rden
1574 .Q (rden_srl_out[rden_i]),
1576 .A ({rden_dly_r[(rden_i*5)+4],
1577 rden_dly_r[(rden_i*5)+3],
1578 rden_dly_r[(rden_i*5)+2],
1579 rden_dly_r[(rden_i*5)+1],
1580 rden_dly_r[(rden_i*5)]}),
1585 FDRSE u_calib_rden_r
1587 .Q (calib_rden[rden_i]),
1590 .D (rden_srl_out[rden_i]),
1597 //*****************************************************************
1598 // indicates that current received data is the correct pattern. Check both
1599 // rising and falling data for first DQ in each DQS group. Note that
1600 // we're checking using a pipelined version of read data, so need to take
1601 // this inherent delay into account in determining final read valid delay
1602 // Data is written to the memory in the following order (first -> last):
1603 // 0x1, 0xE, 0xE, 0x1, 0x1, 0xE, 0xE, 0x1
1604 // Looking just at LSb, expect data in sequence (in binary):
1605 // 1, 0, 0, 1, 1, 0, 0, 1
1606 // Check for the presence of the first 7 words, and compensate read valid
1607 // delay accordingly. Don't check last falling edge data, it may be
1608 // corrupted by the DQS tri-state glitch at end of read postamble
1609 // (glitch protection not yet active until stage 4 cal)
1610 //*****************************************************************
1612 always @(posedge clkdiv) begin
1613 rdd_rise_q1_r <= rdd_rise_q1;
1614 rdd_fall_q1_r <= rdd_fall_q1;
1615 rdd_rise_q2_r <= rdd_rise_q2;
1616 rdd_fall_q2_r <= rdd_fall_q2;
1617 rdd_rise_q1_r1 <= rdd_rise_q1_r;
1618 rdd_fall_q1_r1 <= rdd_fall_q1_r;
1621 always @(posedge clkdiv) begin
1622 // For the following sequence from memory:
1623 // rise[0], fall[0], rise[1], fall[1]
1624 // if data is aligned out of fabric ISERDES:
1625 // RDD_RISE_Q2 = rise[0]
1626 // RDD_FALL_Q2 = fall[0]
1627 // RDD_RISE_Q1 = rise[1]
1628 // RDD_FALL_Q1 = fall[1]
1629 cal3_data_match <= ((rdd_rise_q2_r == 1) &&
1630 (rdd_fall_q2_r == 0) &&
1631 (rdd_rise_q1_r == 0) &&
1632 (rdd_fall_q1_r == 1) &&
1633 (rdd_rise_q2 == 1) &&
1634 (rdd_fall_q2 == 0) &&
1635 (rdd_rise_q1 == 0));
1637 // if data is staggered out of fabric ISERDES:
1638 // RDD_RISE_Q1_R = rise[0]
1639 // RDD_FALL_Q1_R = fall[0]
1640 // RDD_RISE_Q2 = rise[1]
1641 // RDD_FALL_Q2 = fall[1]
1642 cal3_data_match_stgd <= ((rdd_rise_q1_r1 == 1) &&
1643 (rdd_fall_q1_r1 == 0) &&
1644 (rdd_rise_q2_r == 0) &&
1645 (rdd_fall_q2_r == 1) &&
1646 (rdd_rise_q1_r == 1) &&
1647 (rdd_fall_q1_r == 0) &&
1648 (rdd_rise_q2 == 0));
1651 assign cal3_rden_dly = cal3_rden_srl_a - CAL3_RDEN_SRL_DLY_DELTA;
1652 assign cal3_data_valid = (calib_rden_valid | calib_rden_valid_stgd);
1653 assign cal3_match_found
1654 = ((calib_rden_valid && cal3_data_match) ||
1655 (calib_rden_valid_stgd && cal3_data_match_stgd));
1657 // when calibrating, check to see which clock cycle (after the read is
1658 // issued) does the expected data pattern arrive. Record this result
1659 // NOTE: Can add error checking here in case valid data not found on any
1660 // of the available pipeline stages
1661 always @(posedge clkdiv) begin
1663 cal3_rden_srl_a <= 5'bxxxxx;
1664 cal3_state <= CAL3_IDLE;
1665 calib_done[2] <= 1'b0;
1666 calib_err_2[0] <= 1'b0;
1667 count_rden <= {DQS_WIDTH{1'b0}};
1668 rden_dly <= {5*DQS_WIDTH{1'b0}};
1673 count_rden <= {DQS_WIDTH{1'b0}};
1674 if (calib_start[2]) begin
1675 calib_done[2] <= 1'b0;
1676 cal3_state <= CAL3_INIT;
1681 cal3_rden_srl_a <= RDEN_BASE_DELAY;
1682 // let SRL pipe clear after loading initial shift value
1683 cal3_state <= CAL3_RDEN_PIPE_CLR_WAIT;
1687 if (cal3_data_valid)
1688 // if match found at the correct clock cycle
1689 if (cal3_match_found) begin
1691 // For simulation, load SRL addresses for all DQS with same value
1692 if (SIM_ONLY != 0) begin
1693 for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_rden_dly
1694 rden_dly[(i*5)] <= cal3_rden_dly[0];
1695 rden_dly[(i*5)+1] <= cal3_rden_dly[1];
1696 rden_dly[(i*5)+2] <= cal3_rden_dly[2];
1697 rden_dly[(i*5)+3] <= cal3_rden_dly[3];
1698 rden_dly[(i*5)+4] <= cal3_rden_dly[4];
1701 rden_dly[(count_rden*5)] <= cal3_rden_dly[0];
1702 rden_dly[(count_rden*5)+1] <= cal3_rden_dly[1];
1703 rden_dly[(count_rden*5)+2] <= cal3_rden_dly[2];
1704 rden_dly[(count_rden*5)+3] <= cal3_rden_dly[3];
1705 rden_dly[(count_rden*5)+4] <= cal3_rden_dly[4];
1708 // Use for stage 4 calibration
1709 calib_rden_dly[(count_rden*5)] <= cal3_rden_srl_a[0];
1710 calib_rden_dly[(count_rden*5)+1] <= cal3_rden_srl_a[1];
1711 calib_rden_dly[(count_rden*5)+2] <= cal3_rden_srl_a[2];
1712 calib_rden_dly[(count_rden*5)+3] <= cal3_rden_srl_a[3];
1713 calib_rden_dly[(count_rden*5)+4] <= cal3_rden_srl_a[4];
1714 cal3_state <= CAL3_DONE;
1716 // If we run out of stages to shift, without finding correct
1717 // result, the stop and assert error
1718 if (cal3_rden_srl_a == 5'b11111) begin
1719 calib_err_2[0] <= 1'b1;
1720 cal3_state <= CAL3_IDLE;
1722 // otherwise, increase the shift value and try again
1723 cal3_rden_srl_a <= cal3_rden_srl_a + 1;
1724 cal3_state <= CAL3_RDEN_PIPE_CLR_WAIT;
1728 // give additional time for RDEN_R pipe to clear from effects of
1729 // previous pipeline or IDELAY tap change
1730 CAL3_RDEN_PIPE_CLR_WAIT:
1731 if (calib_rden_pipe_cnt == 5'b00000)
1732 cal3_state <= CAL3_DETECT;
1735 if ((count_rden == DQS_WIDTH-1) || (SIM_ONLY != 0)) begin
1736 calib_done[2] <= 1'b1;
1737 cal3_state <= CAL3_IDLE;
1739 count_rden <= count_rden + 1;
1740 cal3_state <= CAL3_INIT;
1747 //*****************************************************************
1748 // Last part of stage 3 calibration - compensate for differences
1749 // in delay between different DQS groups. Assume that in the worst
1750 // case, DQS groups can only differ by one clock cycle. Data for
1751 // certain DQS groups must be delayed by one clock cycle.
1752 // NOTE: May need to increase allowable variation to greater than
1753 // one clock cycle in certain customer designs.
1755 // 1. Record shift delay value for DQS[0]
1756 // 2. Compare each DQS[x] delay value to that of DQS[0]:
1757 // - If different, than record this fact (RDEN_MUX)
1758 // - If greater than DQS[0], set RDEN_INC. Assume greater by
1759 // one clock cycle only - this is a key assumption, assume no
1760 // more than a one clock cycle variation.
1761 // - If less than DQS[0], set RDEN_DEC
1762 // 3. After calibration is complete, set control for DQS group
1763 // delay (CALIB_RDEN_SEL):
1764 // - If RDEN_DEC = 1, then assume that DQS[0] is the lowest
1765 // delay (and at least one other DQS group has a higher
1767 // - If RDEN_INC = 1, then assume that DQS[0] is the highest
1768 // delay (and that all other DQS groups have the same or
1770 // - If both RDEN_INC and RDEN_DEC = 1, then flag error
1771 // (variation is too high for this algorithm to handle)
1772 //*****************************************************************
1774 always @(posedge clkdiv) begin
1776 calib_err_2[1] <= 1'b0;
1777 calib_rden_sel <= {DQS_WIDTH{1'bx}};
1779 rden_dly_0 <= 5'bxxxxx;
1781 rden_mux <= {DQS_WIDTH{1'b0}};
1783 // if a match if found, then store the value of rden_dly
1784 if (!calib_done[2]) begin
1785 if ((cal3_state == CAL3_DETECT) && cal3_match_found) begin
1786 // store the value for DQS[0] as a reference
1787 if (count_rden == 0) begin
1788 // for simulation, RDEN calibration only happens for DQS[0]
1789 // set RDEN_MUX for all DQS groups to be the same as DQS[0]
1791 rden_mux <= {DQS_WIDTH{1'b0}};
1793 // otherwise, load values for DQS[0]
1794 rden_dly_0 <= cal3_rden_srl_a;
1795 rden_mux[0] <= 1'b0;
1797 end else if (SIM_ONLY == 0) begin
1798 // for all other DQS groups, compare RDEN_DLY delay value with
1800 if (rden_dly_0 != cal3_rden_srl_a) begin
1801 // record that current DQS group has a different delay
1802 // than DQS[0] (the "reference" DQS group)
1803 rden_mux[count_rden] <= 1'b1;
1804 if (rden_dly_0 > cal3_rden_srl_a)
1806 else if (rden_dly_0 < cal3_rden_srl_a)
1808 // otherwise, if current DQS group has same delay as DQS[0],
1809 // then rden_mux[count_rden] remains at 0 (since rden_mux
1810 // array contents initialized to 0)
1815 // Otherwise - if we're done w/ stage 2 calibration:
1816 // set final value for RDEN data delay
1817 // flag error if there's more than one cycle variation from DQS[0]
1818 calib_err_2[1] <= (rden_inc && rden_dec);
1820 // if DQS[0] delay represents max delay
1821 calib_rden_sel <= ~rden_mux;
1823 // if DQS[0] delay represents min delay (or all the delays are
1824 // the same between DQS groups)
1825 calib_rden_sel <= rden_mux;
1830 // flag error for stage 3 if appropriate
1831 always @(posedge clkdiv)
1832 calib_err[2] <= calib_err_2[0] | calib_err_2[1];
1834 //***************************************************************************
1835 // Stage 4 calibration: DQS gate
1836 //***************************************************************************
1838 //*****************************************************************
1839 // indicates that current received data is the correct pattern. Same as
1840 // for READ VALID calibration, except that the expected data sequence is
1841 // different since DQS gate is asserted after the 6th word.
1843 // Arrives from memory (at FPGA input) (R, F): 1 0 0 1 1 0 0 1
1844 // After gating the sequence looks like: 1 0 0 1 1 0 1 0 (7th word =
1845 // 5th word, 8th word = 6th word)
1846 // What is the gate timing is off? Need to make sure we can distinquish
1847 // between the results of correct vs. incorrect gate timing. We also use
1848 // the "read_valid" signal from stage 3 calibration to help us determine
1849 // when to check for a valid sequence for stage 4 calibration (i.e. use
1850 // CAL4_DATA_VALID in addition to CAL4_DATA_MATCH/CAL4_DATA_MATCH_STGD)
1851 // Note that since the gate signal from the CLK0 domain is synchronized
1852 // to the falling edge of DQS, that the effect of the gate will only be
1853 // seen starting with a rising edge data (although it is possible
1854 // the GATE IDDR output could go metastable and cause a unexpected result
1855 // on the first rising and falling edges after the gate is enabled).
1856 // Also note that the actual DQS glitch can come more than 0.5*tCK after
1857 // the last falling edge of DQS and the constraint for this path is can
1858 // be > 0.5*tCK; however, this means when calibrating, the output of the
1859 // GATE IDDR may miss the setup time requirement of the rising edge flop
1860 // and only meet it for the falling edge flop. Therefore the rising
1861 // edge data immediately following the assertion of the gate can either
1862 // be a 1 or 0 (can rely on either)
1863 // As the timing on the gate is varied, we expect to see (sequence of
1864 // captured read data shown below):
1865 // - 1 0 0 1 1 0 0 1 (gate is really early, starts and ends before
1866 // read burst even starts)
1867 // - x 0 0 1 1 0 0 1 (gate pulse starts before the burst, and ends
1868 // - x y 0 1 1 0 0 1 sometime during the burst; x,y = 0, or 1, but
1869 // - x y x 1 1 0 0 1 all bits that show an x are the same value,
1870 // - x y x y 1 0 0 1 and y are the same value)
1871 // - x y x y x 0 0 1
1872 // - x y x y x y 0 1 (gate starts just before start of burst)
1873 // - 1 0 x 0 x 0 x 0 (gate starts after 1st falling word. The "x"
1874 // represents possiblity that gate may not disable
1875 // clock for 2nd rising word in time)
1876 // - 1 0 0 1 x 1 x 1 (gate starts after 2nd falling word)
1877 // - 1 0 0 1 1 0 x 0 (gate starts after 3rd falling word - GOOD!!)
1878 // - 1 0 0 1 1 0 0 1 (gate starts after burst is already done)
1879 //*****************************************************************
1881 assign cal4_data_valid = calib_rden_valid | calib_rden_valid_stgd;
1882 assign cal4_data_good = (calib_rden_valid &
1884 (calib_rden_valid_stgd &
1885 cal4_data_match_stgd);
1887 always @(posedge clkdiv) begin
1888 // if data is aligned out of fabric ISERDES:
1889 cal4_data_match <= ((rdd_rise_q2_r == 1) &&
1890 (rdd_fall_q2_r == 0) &&
1891 (rdd_rise_q1_r == 0) &&
1892 (rdd_fall_q1_r == 1) &&
1893 (rdd_rise_q2 == 1) &&
1894 (rdd_fall_q2 == 0) &&
1895 // MIG 2.1: Last rising edge data value not
1896 // guaranteed to be certain value at higher
1898 // (rdd_rise_q1 == 0) &&
1899 (rdd_fall_q1 == 0));
1900 // if data is staggered out of fabric ISERDES:
1901 cal4_data_match_stgd <= ((rdd_rise_q1_r1 == 1) &&
1902 (rdd_fall_q1_r1 == 0) &&
1903 (rdd_rise_q2_r == 0) &&
1904 (rdd_fall_q2_r == 1) &&
1905 (rdd_rise_q1_r == 1) &&
1906 (rdd_fall_q1_r == 0) &&
1907 // MIG 2.1: Last rising edge data value not
1908 // guaranteed to be certain value at higher
1910 // (rdd_rise_q2 == 0) &&
1911 (rdd_fall_q2 == 0));
1914 //*****************************************************************
1915 // DQS gate enable generation:
1916 // This signal gets synchronized to DQS domain, and drives IDDR
1917 // register that in turn asserts/deasserts CE to all 4 or 8 DQ
1918 // IDDR's in that DQS group.
1919 // 1. During normal (post-cal) operation, this is only for 2 clock
1920 // cycles following the end of a burst. Check for falling edge
1921 // of RDEN. But must also make sure NOT assert for a read-idle-
1922 // read (two non-consecutive reads, separated by exactly one
1923 // idle cycle) - in this case, don't assert the gate because:
1924 // (1) we don't have enough time to deassert the gate before the
1925 // first rising edge of DQS for second burst (b/c of fact
1926 // that DQS gate is generated in the fabric only off rising
1927 // edge of CLK0 - if we somehow had an ODDR in fabric, we
1928 // could pull this off, (2) assumption is that the DQS glitch
1929 // will not rise enough to cause a glitch because the
1930 // post-amble of the first burst is followed immediately by
1931 // the pre-amble of the next burst
1932 // 2. During stage 4 calibration, assert for 3 clock cycles
1933 // (assert gate enable one clock cycle early), since we gate out
1934 // the last two words (in addition to the crap on the DQ bus after
1935 // the DQS read postamble).
1936 // NOTE: PHY_INIT_RDEN and CTRL_RDEN have slightly different timing w/r
1937 // to when they are asserted w/r to the start of the read burst
1938 // (PHY_INIT_RDEN is one cycle earlier than CTRL_RDEN).
1939 //*****************************************************************
1941 // register for timing purposes for fast clock path - currently only
1942 // calib_done_r[2] used
1943 always @(posedge clk)
1944 calib_done_r <= calib_done;
1947 calib_ctrl_rden = ctrl_rden;
1948 calib_init_rden = calib_done_r[2] & phy_init_rden;
1951 assign calib_ctrl_rden_negedge = ~calib_ctrl_rden & calib_ctrl_rden_r;
1952 // check for read-idle-read before asserting DQS pulse at end of read
1953 assign calib_ctrl_gate_pulse = calib_ctrl_rden_negedge_r &
1955 always @(posedge clk) begin
1956 calib_ctrl_rden_r <= calib_ctrl_rden;
1957 calib_ctrl_rden_negedge_r <= calib_ctrl_rden_negedge;
1958 calib_ctrl_gate_pulse_r <= calib_ctrl_gate_pulse;
1961 assign calib_init_gate_pulse = ~calib_init_rden & calib_init_rden_r;
1962 always @(posedge clk) begin
1963 calib_init_rden_r <= calib_init_rden;
1964 calib_init_gate_pulse_r <= calib_init_gate_pulse;
1965 calib_init_gate_pulse_r1 <= calib_init_gate_pulse_r;
1968 // Gate is asserted: (1) during cal, for 3 cycles, starting 1 cycle
1969 // after falling edge of CTRL_RDEN, (2) during normal ops, for 2
1970 // cycles, starting 2 cycles after falling edge of CTRL_RDEN
1971 assign gate_srl_in = ~((calib_ctrl_gate_pulse |
1972 calib_ctrl_gate_pulse_r) |
1973 (calib_init_gate_pulse |
1974 calib_init_gate_pulse_r |
1975 calib_init_gate_pulse_r1));
1977 //*****************************************************************
1978 // generate DQS enable signal for each DQS group
1979 // There are differences between DQS gate signal for calibration vs. during
1980 // normal operation:
1981 // * calibration gates the second to last clock cycle of the burst,
1982 // rather than after the last word (e.g. for a 8-word, 4-cycle burst,
1983 // cycle 4 is gated for calibration; during normal operation, cycle
1984 // 5 (i.e. cycle after the last word) is gated)
1985 // enable for DQS is deasserted for two clock cycles, except when
1986 // we have the preamble for the next read immediately following
1987 // the postamble of the current read - assume DQS does not glitch
1988 // during this time, that it stays low. Also if we did have to gate
1989 // the DQS for this case, then we don't have enough time to deassert
1990 // the gate in time for the first rising edge of DQS for the second
1992 //*****************************************************************
1994 // Flops for targetting of multi-cycle path in UCF
1997 for (gate_ff_i = 0; gate_ff_i < 5*DQS_WIDTH;
1998 gate_ff_i = gate_ff_i+1) begin: gen_gate_dly
2001 .Q (gate_dly_r[gate_ff_i]),
2004 .D (gate_dly[gate_ff_i]),
2014 for (gate_i = 0; gate_i < DQS_WIDTH; gate_i = gate_i + 1) begin: gen_gate
2017 .Q (gate_srl_out[gate_i]),
2019 .A ({gate_dly_r[(gate_i*5)+4],
2020 gate_dly_r[(gate_i*5)+3],
2021 gate_dly_r[(gate_i*5)+2],
2022 gate_dly_r[(gate_i*5)+1],
2023 gate_dly_r[(gate_i*5)]}),
2029 // For GATE_BASE_DELAY > 0, have one extra cycle to register outputs
2030 // from controller before generating DQS gate pulse. In PAR, the
2031 // location of the controller logic can be far from the DQS gate
2032 // logic (DQS gate logic located near the DQS I/O's), contributing
2033 // to large net delays. Registering the controller outputs for
2034 // CL >= 4 (above 200MHz) adds a stage of pipelining to reduce net
2036 if (GATE_BASE_DELAY > 0) begin: gen_gate_base_dly_gt3
2037 // add flop between SRL32 and EN_DQS flop (which is located near the
2041 .Q (gate_srl_out_r[gate_i]),
2044 .D (gate_srl_out[gate_i]),
2048 end else begin: gen_gate_base_dly_le3
2049 assign gate_srl_out_r[gate_i] = gate_srl_out[gate_i];
2054 .Q (en_dqs[gate_i]),
2057 .D (gate_srl_out_r[gate_i]),
2065 //*****************************************************************
2066 // Find valid window: keep track of how long we've been in the same data
2067 // window. If it's been long enough, then declare that we've found a stable
2068 // valid window - in particular, that we're past any region of instability
2069 // associated with the edge of the window. Use only when finding left edge
2070 //*****************************************************************
2072 always @(posedge clkdiv)
2073 // reset before we start to look for window
2074 if (cal4_state == CAL4_INIT) begin
2075 cal4_window_cnt <= 4'b0000;
2076 cal4_stable_window <= 1'b0;
2077 end else if ((cal4_state == CAL4_FIND_EDGE) && cal4_seek_left) begin
2078 // if we're looking for left edge, and incrementing IDELAY, count
2079 // consecutive taps over which we're in the window
2080 if (cal4_data_valid) begin
2082 cal4_window_cnt <= cal4_window_cnt + 1;
2084 cal4_window_cnt <= 4'b0000;
2087 if (cal4_window_cnt == MIN_WIN_SIZE-1)
2088 cal4_stable_window <= 1'b1;
2091 //*****************************************************************
2092 // keep track of edge tap counts found, and whether we've
2093 // incremented to the maximum number of taps allowed
2094 //*****************************************************************
2096 always @(posedge clkdiv)
2097 if ((cal4_state == CAL4_INIT) || cal4_dlyrst_gate) begin
2098 cal4_idel_max_tap <= 1'b0;
2099 cal4_idel_bit_tap <= 1'b0;
2100 cal4_idel_tap_cnt <= 6'b000000;
2101 end else if (cal4_dlyce_gate) begin
2102 if (cal4_dlyinc_gate) begin
2103 cal4_idel_tap_cnt <= cal4_idel_tap_cnt + 1;
2104 cal4_idel_bit_tap <= (cal4_idel_tap_cnt == CAL4_IDEL_BIT_VAL-2);
2105 cal4_idel_max_tap <= (cal4_idel_tap_cnt == 6'b111110);
2107 cal4_idel_tap_cnt <= cal4_idel_tap_cnt - 1;
2108 cal4_idel_bit_tap <= 1'b0;
2109 cal4_idel_max_tap <= 1'b0;
2113 always @(posedge clkdiv)
2114 if ((cal4_state != CAL4_RDEN_PIPE_CLR_WAIT) &&
2115 (cal3_state != CAL3_RDEN_PIPE_CLR_WAIT))
2116 calib_rden_pipe_cnt <= CALIB_RDEN_PIPE_LEN-1;
2118 calib_rden_pipe_cnt <= calib_rden_pipe_cnt - 1;
2120 //*****************************************************************
2121 // Stage 4 cal state machine
2122 //*****************************************************************
2124 always @(posedge clkdiv)
2126 calib_done[3] <= 1'b0;
2127 calib_done_tmp[3] <= 1'b0;
2128 calib_err[3] <= 1'b0;
2131 next_count_gate <= 'b0;
2132 cal4_idel_adj_cnt <= 6'bxxxxxx;
2133 cal4_dlyce_gate <= 1'b0;
2134 cal4_dlyinc_gate <= 1'b0;
2135 cal4_dlyrst_gate <= 1'b0; // reset handled elsewhere in code
2136 cal4_gate_srl_a <= 5'bxxxxx;
2137 cal4_rden_srl_a <= 5'bxxxxx;
2138 cal4_ref_req <= 1'b0;
2139 cal4_seek_left <= 1'bx;
2140 cal4_state <= CAL4_IDLE;
2142 cal4_ref_req <= 1'b0;
2143 cal4_dlyce_gate <= 1'b0;
2144 cal4_dlyinc_gate <= 1'b0;
2145 cal4_dlyrst_gate <= 1'b0;
2150 next_count_gate <= 'b0;
2151 if (calib_start[3]) begin
2153 calib_done[3] <= 1'b0;
2154 cal4_state <= CAL4_INIT;
2159 // load: (1) initial value of gate delay SRL, (2) appropriate
2160 // value of RDEN SRL (so that we get correct "data valid" timing)
2161 cal4_gate_srl_a <= GATE_BASE_INIT;
2162 cal4_rden_srl_a <= {calib_rden_dly[(count_gate*5)+4],
2163 calib_rden_dly[(count_gate*5)+3],
2164 calib_rden_dly[(count_gate*5)+2],
2165 calib_rden_dly[(count_gate*5)+1],
2166 calib_rden_dly[(count_gate*5)]};
2167 // let SRL pipe clear after loading initial shift value
2168 cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT;
2171 // sort of an initial state - start checking to see whether we're
2172 // already in the window or not
2174 // decide right away if we start in the proper window - this
2175 // determines if we are then looking for the left (trailing) or
2176 // right (leading) edge of the data valid window
2177 if (cal4_data_valid) begin
2178 // if we find a match - then we're already in window, now look
2179 // for left edge. Otherwise, look for right edge of window
2180 cal4_seek_left <= cal4_data_good;
2181 cal4_state <= CAL4_FIND_EDGE;
2185 // don't do anything until the exact clock cycle when to check that
2186 // readback data is valid or not
2187 if (cal4_data_valid) begin
2188 // we're currently in the window, look for left edge of window
2189 if (cal4_seek_left) begin
2190 // make sure we've passed the right edge before trying to detect
2191 // the left edge (i.e. avoid any edge "instability") - else, we
2192 // may detect an "false" edge too soon. By design, if we start in
2193 // the data valid window, always expect at least
2194 // MIN(BIT_TIME_TAPS,32) (-/+ jitter, see below) taps of valid
2195 // window before we hit the left edge (this is because when stage
2196 // 4 calibration first begins (i.e., gate_dly = 00, and IDELAY =
2197 // 00), we're guaranteed to NOT be in the window, and we always
2198 // start searching for MIN(BIT_TIME_TAPS,32) for the right edge
2199 // of window. If we don't find it, increment gate_dly, and if we
2200 // now start in the window, we have at least approximately
2201 // CLK_PERIOD-MIN(BIT_TIME_TAPS,32) = MIN(BIT_TIME_TAPS,32) taps.
2202 // It's approximately because jitter, noise, etc. can bring this
2203 // value down slightly. Because of this (although VERY UNLIKELY),
2204 // we have to protect against not decrementing IDELAY below 0
2205 // during adjustment phase).
2206 if (cal4_stable_window && !cal4_data_good) begin
2207 // found left edge of window, dec by MIN(BIT_TIME_TAPS,32)
2208 cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL;
2209 cal4_idel_adj_inc <= 1'b0;
2210 cal4_state <= CAL4_ADJ_IDEL;
2212 // Otherwise, keep looking for left edge:
2213 if (cal4_idel_max_tap) begin
2214 // ran out of taps looking for left edge (max=63) - happens
2215 // for low frequency case, decrement by 32
2216 cal4_idel_adj_cnt <= 6'b100000;
2217 cal4_idel_adj_inc <= 1'b0;
2218 cal4_state <= CAL4_ADJ_IDEL;
2220 cal4_dlyce_gate <= 1'b1;
2221 cal4_dlyinc_gate <= 1'b1;
2222 cal4_state <= CAL4_IDEL_WAIT;
2226 // looking for right edge of window:
2227 // look for the first match - this means we've found the right
2228 // (leading) edge of the data valid window, increment by
2229 // MIN(BIT_TIME_TAPS,32)
2230 if (cal4_data_good) begin
2231 cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL;
2232 cal4_idel_adj_inc <= 1'b1;
2233 cal4_state <= CAL4_ADJ_IDEL;
2235 // Otherwise, keep looking:
2236 // only look for MIN(BIT_TIME_TAPS,32) taps for right edge,
2237 // if we haven't found it, then inc gate delay, try again
2238 if (cal4_idel_bit_tap) begin
2239 // if we're already maxed out on gate delay, then error out
2240 // (simulation only - calib_err isn't currently connected)
2241 if (cal4_gate_srl_a == 5'b11111) begin
2242 calib_err[3] <= 1'b1;
2243 cal4_state <= CAL4_IDLE;
2245 // otherwise, increment gate delay count, and start
2247 cal4_gate_srl_a <= cal4_gate_srl_a + 1;
2248 cal4_dlyrst_gate <= 1'b1;
2249 cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT;
2252 // keep looking for right edge
2253 cal4_dlyce_gate <= 1'b1;
2254 cal4_dlyinc_gate <= 1'b1;
2255 cal4_state <= CAL4_IDEL_WAIT;
2261 // wait for GATE IDELAY to settle, after reset or increment
2262 CAL4_IDEL_WAIT: begin
2263 // For simulation, load SRL addresses for all DQS with same value
2264 if (SIM_ONLY != 0) begin
2265 for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly
2266 gate_dly[(i*5)+4] <= cal4_gate_srl_a[4];
2267 gate_dly[(i*5)+3] <= cal4_gate_srl_a[3];
2268 gate_dly[(i*5)+2] <= cal4_gate_srl_a[2];
2269 gate_dly[(i*5)+1] <= cal4_gate_srl_a[1];
2270 gate_dly[(i*5)] <= cal4_gate_srl_a[0];
2273 gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4];
2274 gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3];
2275 gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2];
2276 gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1];
2277 gate_dly[(count_gate*5)] <= cal4_gate_srl_a[0];
2279 // check to see if we've found edge of window
2281 cal4_state <= CAL4_FIND_EDGE;
2284 // give additional time for RDEN_R pipe to clear from effects of
2285 // previous pipeline (and IDELAY reset)
2286 CAL4_RDEN_PIPE_CLR_WAIT: begin
2287 // MIG 2.2: Bug fix - make sure to update GATE_DLY count, since
2288 // possible for FIND_EDGE->RDEN_PIPE_CLR_WAIT->FIND_WINDOW
2289 // transition (i.e. need to make sure the gate count updated in
2290 // FIND_EDGE gets reflected in GATE_DLY by the time we reach
2291 // state FIND_WINDOW) - previously GATE_DLY only being updated
2292 // during state CAL4_IDEL_WAIT
2293 if (SIM_ONLY != 0) begin
2294 for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly_pipe
2295 gate_dly[(i*5)+4] <= cal4_gate_srl_a[4];
2296 gate_dly[(i*5)+3] <= cal4_gate_srl_a[3];
2297 gate_dly[(i*5)+2] <= cal4_gate_srl_a[2];
2298 gate_dly[(i*5)+1] <= cal4_gate_srl_a[1];
2299 gate_dly[(i*5)] <= cal4_gate_srl_a[0];
2302 gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4];
2303 gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3];
2304 gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2];
2305 gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1];
2306 gate_dly[(count_gate*5)] <= cal4_gate_srl_a[0];
2308 // look for new window
2309 if (calib_rden_pipe_cnt == 5'b00000)
2310 cal4_state <= CAL4_FIND_WINDOW;
2313 // increment/decrement DQS/DQ IDELAY for final adjustment
2315 // add underflow protection for corner case when left edge found
2316 // using fewer than MIN(BIT_TIME_TAPS,32) taps
2317 if ((cal4_idel_adj_cnt == 6'b000000) ||
2318 (cal4_dlyce_gate && !cal4_dlyinc_gate &&
2319 (cal4_idel_tap_cnt == 6'b000001))) begin
2320 cal4_state <= CAL4_DONE;
2321 // stop when all gates calibrated, or gate[0] cal'ed (for sim)
2322 if ((count_gate == DQS_WIDTH-1) || (SIM_ONLY != 0))
2323 calib_done_tmp[3] <= 1'b1;
2325 // need for VHDL simulation to prevent out-of-index error
2326 next_count_gate <= count_gate + 1;
2328 cal4_idel_adj_cnt <= cal4_idel_adj_cnt - 1;
2329 cal4_dlyce_gate <= 1'b1;
2330 // whether inc or dec depends on whether left or right edge found
2331 cal4_dlyinc_gate <= cal4_idel_adj_inc;
2334 // wait for IDELAY output to settle after decrement. Check current
2335 // COUNT_GATE value and decide if we're done
2337 if (!idel_set_wait) begin
2338 count_gate <= next_count_gate;
2339 if (calib_done_tmp[3]) begin
2340 calib_done[3] <= 1'b1;
2341 cal4_state <= CAL4_IDLE;
2343 // request auto-refresh after every DQS group calibrated to
2344 // avoid tRAS violation
2345 cal4_ref_req <= 1'b1;
2347 cal4_state <= CAL4_INIT;