From: megacz Date: Sun, 1 Mar 2009 22:06:32 +0000 (-0800) Subject: add DDR2 controller, generated via MIG X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=0e179ecc6a0cd616fbc633aa97530c76a33ba4c7;p=fleet.git add DDR2 controller, generated via MIG --- diff --git a/Makefile b/Makefile index aa7638a..daa0c43 100644 --- a/Makefile +++ b/Makefile @@ -107,7 +107,8 @@ effort = high synth: cd build/fpga; ln -sf ../../src/edu/berkeley/fleet/fpga/* . cd build/fpga; ln -sf ../../src/edu/berkeley/fleet/fpga/mem/* . - cd build/fpga; ln -sf ../../src/edu/berkeley/fleet/fpga/greg/* . + cd build/fpga; ln -sf ../../src/edu/berkeley/fleet/fpga/ddr2/* . + #cd build/fpga; ln -sf ../../src/edu/berkeley/fleet/fpga/greg/* . rm -f build/fpga/main.lso echo work >> build/fpga/main.lso rm -f build/fpga/main.prj diff --git a/ships/DDR2.ship b/ships/DDR2.ship index 1092f73..374866c 100644 --- a/ships/DDR2.ship +++ b/ships/DDR2.ship @@ -7,26 +7,26 @@ data in: inDataWrite data out: out -percolate up: DDR2_CAS_B 1 -percolate up: DDR2_CKE 2 -percolate up: DDR2_RAS_B 1 -percolate up: DDR2_WE_B 1 -percolate up: DDR2_ODT 2 -percolate up: DDR2_CS0_B 2 -percolate up: DDR2_CLK_N 2 -percolate up: DDR2_CLK_P 2 -percolate up: DDR2_A 14 -percolate up: DDR2_BA 3 -percolate inout: DDR2_DQ 64 -percolate up: DDR2_DM 8 -percolate inout: DDR2_DQS_N 8 -percolate inout: DDR2_DQS_P 8 - -percolate inout: I2C_DDR2_SCL 1 -percolate inout: I2C_DDR2_SDA 1 - -percolate down: CLKBUF_Q1_N 1 -percolate down: CLKBUF_Q1_P 1 +percolate up: gpio_led_0 1 +percolate up: gpio_led_1 1 +percolate up: gpio_led_2 1 +percolate up: gpio_led_3 1 + +percolate inout: ddr2_dq 64 +percolate up: ddr2_a 13 +percolate up: ddr2_ba 2 +percolate up: ddr2_ras_n 1 +percolate up: ddr2_cas_n 1 +percolate up: ddr2_we_n 1 +percolate up: ddr2_cs_n 1 +percolate up: ddr2_odt 1 +percolate up: ddr2_cke 1 +percolate up: ddr2_dm 8 +percolate up: phy_init_done 1 +percolate inout: ddr2_dqs 8 +percolate inout: ddr2_dqs_n 8 +percolate up: ddr2_ck 2 +percolate up: ddr2_ck_n 2 == TeX ============================================================== @@ -36,193 +36,173 @@ percolate down: CLKBUF_Q1_P 1 == FPGA ============================================================== -// Nearly all of this was copied from Greg Gibeling's work; copyright shown below: - -// Everything here was copied from -// GateLib/Firmware/DRAM/Hardware/DDR2SDRAM/Test/FPGA_TOP_ML505_DDR2SDRAMTest.v - -//============================================================================== -// Section: License -//============================================================================== -// Copyright (c) 2005-2008, Regents of the University of California -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// - Neither the name of the University of California, Berkeley nor the -// names of its contributors may be used to endorse or promote -// products derived from this software without specific prior -// written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -// ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -//============================================================================== - -parameter - ClockFreq = 200000000, - BAWidth = 2, - RAWidth = 13, - CAWidth = 10, - DWidth = 128, // 128b SDR internal transfers - UWidth = 8, // This will almost ALWAYS be 8 - BurstLen = 2, // 256b total burst, 2 words DWidth words at SDR, or 4 external words at DDR - EnableMask = 1, - EnableECC = 0, - Board = 0; - -localparam - UCount = DWidth / UWidth, - // 128b/8 = 16b per mask means per-byte masking - MWidth = (EnableECC || (EnableMask == 0)) ? 0 : UCount, - // Unused lower address bits, the -1 is to get a proper log2 - UAWidth = `log2(UCount-1), - TAWidth = CAWidth + RAWidth + BAWidth, - // Note that the components are in order according to where in the - // address they appear, the -1 is to account for DDR - AWidth = TAWidth + UAWidth - 1, - ECheck = EnableECC ? 2 : 0, - ECorrect = EnableECC ? 1 : 0, - CWidth = 3, - EHWidth = `max(`log2(ECheck), 1), - ERWidth = `max(`log2(ECheck), 1); - -wire Reset; -assign Reset = !rst; - -reg [AWidth-1:0] CommandAddress; -reg [CWidth-1:0] Command; -wire CommandReady; -reg CommandValid; - -wire [DWidth-1:0] DataIn; -wire [MWidth-1:0] DataInMask; -reg DataInValid; -wire DataInReady; - -wire [DWidth-1:0] DataOut; -wire [EHWidth-1:0] DataOutErrorChecked; -wire [ERWidth-1:0] DataOutErrorCorrected; -wire DataOutValid; -reg DataOutReady; - -wire Clock_IBUFG; -wire Clock, ClockD2, ClockP90; -wire Clock_DCM, ClockD2_DCM, ClockP90_DCM; -wire Locked; - -reg [`WORDWIDTH:0] out_d; -assign out_d_ = out_d; - -assign DDR2_BA[2] = 1'b0; -assign DDR2_CS0_B[1] = 1'b1; -assign DDR2_ODT[1] = 1'b0; -assign DDR2_CKE[1] = 1'b0; - -IBUFGDS ClockIBufG(.I(CLKBUF_Q1_P), .IB(CLKBUF_Q1_N), .O(Clock_IBUFG)); -DCM_BASE - #( - .CLKIN_PERIOD(5.0), - .CLKDV_DIVIDE(2.0), - .DLL_FREQUENCY_MODE("HIGH"), - .DUTY_CYCLE_CORRECTION("TRUE"), - .FACTORY_JF(16'hF0F0) - ) - DCMBase( - .CLK0(Clock_DCM), - .CLK180( ), - .CLK270( ), - .CLK2X( ), - .CLK2X180( ), - .CLK90(ClockP90_DCM), - .CLKDV(ClockD2_DCM), - .CLKFX( ), - .CLKFX180( ), - .LOCKED(Locked), - .CLKFB(Clock), - .CLKIN(Clock_IBUFG), - .RST(Reset)); - // synthesis attribute CLKIN_PERIOD of DCMBase is "5.0" - // synthesis attribute CLKDV_DIVIDE of DCMBase is "2.0" - // synthesis attribute DLL_FREQUENCY_MODE of DCMBase is "HIGH" - // synthesis attribute DUTY_CYCLE_CORRECTION of DCMBase is "TRUE" - // synthesis attribute FACTORY_JF of DCMBase is "16'hF0F0" - BUFG ClockBufG(.I(Clock_DCM), .O(Clock)); - BUFG ClockP90BufG(.I(ClockP90_DCM), .O(ClockP90)); - BUFG ClockD2BufG(.I(ClockD2_DCM), .O(ClockD2)); - -DDR2SDRAM DDR2SDRAM( - .Clock(Clock), - .ClockD2(ClockD2), - .ClockP90(ClockP90), - .Reset(Reset), - .Locked(Locked), - .ClockF200(Clock), - .Initialized( ), - .PoweredUp( ), - - .CommandClock(clk), - .DataInClock(clk), - .DataOutClock(clk), - .CommandReset(Reset), - .DataInReset(Reset), - .DataOutReset(Reset), - - .CommandAddress(CommandAddress), - .Command(Command), - .CommandValid(CommandValid), - .CommandReady(CommandReady), - .DataIn(DataIn), - .DataInMask(DataInMask), - .DataInValid(DataInValid), - .DataInReady(DataInReady), - .DataOut(DataOut), - .DataOutErrorChecked(DataOutErrorChecked), - .DataOutErrorCorrected(DataOutErrorCorrected), - .DataOutValid(DataOutValid), - .DataOutReady(DataOutReady), - .DDR2_DQ(DDR2_DQ), - .DDR2_A(DDR2_A), - .DDR2_BA(DDR2_BA[1:0]), - .DDR2_RAS_B(DDR2_RAS_B), - .DDR2_CAS_B(DDR2_CAS_B), - .DDR2_WE_B(DDR2_WE_B), - .DDR2_CS0_B(DDR2_CS0_B[0]), - .DDR2_ODT(DDR2_ODT[0]), - .DDR2_CKE(DDR2_CKE[0]), - .DDR2_DM(DDR2_DM), - .DDR2_DQS_P(DDR2_DQS_P), - .DDR2_DQS_N(DDR2_DQS_N), - .DDR2_CLK_P(DDR2_CLK_P), - .DDR2_CLK_N(DDR2_CLK_N)); - defparam DDR2SDRAM.UWidth = UWidth; - defparam DDR2SDRAM.BAWidth = BAWidth; - defparam DDR2SDRAM.RAWidth = RAWidth; - defparam DDR2SDRAM.CAWidth = CAWidth; - defparam DDR2SDRAM.DWidth = DWidth; - defparam DDR2SDRAM.BurstLen = BurstLen; - defparam DDR2SDRAM.EnableMask = EnableMask; - defparam DDR2SDRAM.EnableECC = EnableECC; - defparam DDR2SDRAM.Board = Board; - defparam DDR2SDRAM.MultiClock = 1; - - assign DataIn = inDataWrite_d; - assign DataInMask = 16'b1111111111111111; - +/* +percolate inout: ddr2_dq 8 +percolate up: ddr2_a 15 +percolate up: ddr2_ba 3 +percolate up: ddr2_ras_n 1 +percolate up: ddr2_cas_n 1 +percolate up: ddr2_we_n 1 +percolate up: ddr2_cs_n 1 +percolate up: ddr2_odt 1 +percolate up: ddr2_cke 1 +percolate up: ddr2_dm 1 +percolate up: phy_init_done 1 +percolate inout: ddr2_dqs 1 +percolate inout: ddr2_dqs_n 1 +percolate up: ddr2_ck 1 +percolate up: ddr2_ck_n 1 +*/ + +//NET "sys_clk_p" LOC = "H17" ; #Bank 3 +//NET "sys_clk_n" LOC = "H18" ; #Bank 3 +//NET "clk200_p" LOC = "K17" ; #Bank 3 +//NET "clk200_n" LOC = "L18" ; #Bank 3 +//NET "sys_rst_n" LOC = "L24" ; #Bank 19 + +/******************************************************************************* +* This file is owned and controlled by Xilinx and must be used * +* solely for design, simulation, implementation and creation of * +* design files limited to Xilinx devices or technologies. Use * +* with non-Xilinx devices or technologies is expressly prohibited * +* and immediately terminates your license. * +* * +* XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS" * +* SOLELY FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR * +* XILINX DEVICES. BY PROVIDING THIS DESIGN, CODE, OR INFORMATION * +* AS ONE POSSIBLE IMPLEMENTATION OF THIS FEATURE, APPLICATION * +* OR STANDARD, XILINX IS MAKING NO REPRESENTATION THAT THIS * +* IMPLEMENTATION IS FREE FROM ANY CLAIMS OF INFRINGEMENT, * +* AND YOU ARE RESPONSIBLE FOR OBTAINING ANY RIGHTS YOU MAY REQUIRE * +* FOR YOUR IMPLEMENTATION. XILINX EXPRESSLY DISCLAIMS ANY * +* WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE * +* IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR * +* REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF * +* INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * +* FOR A PARTICULAR PURPOSE. * +* * +* Xilinx products are not intended for use in life support * +* appliances, devices, or systems. Use in such applications are * +* expressly prohibited. * +* * +* (c) Copyright 1995-2006 Xilinx, Inc. * +* All rights reserved. * +*******************************************************************************/ +// The following must be inserted into your Verilog file for this +// core to be instantiated. Change the instance name and port connections +// (in parentheses) to your own signal names. + +//----------- Begin Cut here for INSTANTIATION Template ---// INST_TAG + +wire clk200_p; +wire clk200_n; +wire clk_rebuffered; + + ddr2_sdram # ( + .BANK_WIDTH(2), // # of memory bank addr bits. + .CKE_WIDTH(1), // # of memory clock enable outputs. + .CLK_WIDTH(2), // # of clock outputs. + .COL_WIDTH(10), // # of memory column bits. + .CS_NUM(1), // # of separate memory chip selects. + .CS_WIDTH(1), // # of total memory chip selects. + .CS_BITS(0), // set to log2(CS_NUM) (rounded up). + .DM_WIDTH(8), // # of data mask bits. + .DQ_WIDTH(64), // # of data width. + .DQ_PER_DQS(8), // # of DQ data bits per strobe. + .DQS_WIDTH(8), // # of DQS strobes. + .DQ_BITS(6), // set to log2(DQS_WIDTH*DQ_PER_DQS). + .DQS_BITS(3), // set to log2(DQS_WIDTH). + .ODT_WIDTH(1), // # of memory on-die term enables. + .ROW_WIDTH(13), // # of memory row and # of addr bits. + .ADDITIVE_LAT(0), // additive write latency. + .BURST_LEN(4), // burst length (in double words). + .BURST_TYPE(0), // burst type (=0 seq; =1 interleaved). + .CAS_LAT(4), // CAS latency. + .ECC_ENABLE(0), // enable ECC (=1 enable). + .APPDATA_WIDTH(128), // # of usr read/write data bus bits. + .MULTI_BANK_EN(1), // Keeps multiple banks open. (= 1 enable). + .TWO_T_TIME_EN(1), // 2t timing for unbuffered dimms. + .ODT_TYPE(1), // ODT (=0(none),=1(75),=2(150),=3(50)). + .REDUCE_DRV(0), // reduced strength mem I/O (=1 yes). + .REG_ENABLE(0), // registered addr/ctrl (=1 yes). + .TREFI_NS(7800), // auto refresh interval (ns). + .TRAS(40000), // active->precharge delay. + .TRCD(15000), // active->read/write delay. + .TRFC(127500), // refresh->refresh, refresh->active delay. + .TRP(15000), // precharge->command delay. + .TRTP(7500), // read->precharge delay. + .TWR(15000), // used to determine write->precharge. + .TWTR(7500), // write->read delay. + .HIGH_PERFORMANCE_MODE("TRUE"), // # = TRUE, the IODELAY performance mode is set to high. + // # = FALSE, the IODELAY performance mode is set to low. + .SIM_ONLY(0), // = 1 to skip SDRAM power up delay. + .DEBUG_EN(0), // Enable debug signals/controls. + // When this parameter is changed from 0 to 1, + // make sure to uncomment the coregen commands + // in ise_flow.bat or create_ise.bat files in + // par folder. + .CLK_PERIOD(5000), // Core/Memory clock period (in ps). + .DQS_IO_COL(16'b0000000000000000), // I/O column location of DQS groups + // (=0, left; =1 center, =2 right). + //.DQ_IO_MS(64'b10100101_10100101_10100101_10100101_10100101_10100101_10100101_10100101), + .DQ_IO_MS(64'b01110101_00111101_00001111_00011110_00101110_11000011_11000001_10111100), + // Master/Slave location of DQ I/O (=0 slave). + .CLK_TYPE("SINGLE_ENDED"), // # = "DIFFERENTIAL " ->; Differential input clocks , + // # = "SINGLE_ENDED" -> Single ended input clocks. + .DLL_FREQ_MODE("HIGH"), // DCM Frequency range. + .RST_ACT_LOW(1) // =1 for active low reset, =0 for active high. +) +ddr2_sdram ( + .sys_clk (clk), + .idly_clk_200 (clk200_p), + .sys_rst_n (!rst), + + .ddr2_dq (ddr2_dq), + .ddr2_a (ddr2_a), + .ddr2_ba (ddr2_ba), + .ddr2_ras_n (ddr2_ras_n), + .ddr2_cas_n (ddr2_cas_n), + .ddr2_we_n (ddr2_we_n), + .ddr2_cs_n (ddr2_cs_n), + .ddr2_odt (ddr2_odt), + .ddr2_cke (ddr2_cke), + .ddr2_dm (ddr2_dm), + .ddr2_dqs (ddr2_dqs), + .ddr2_dqs_n (ddr2_dqs_n), + .ddr2_ck (ddr2_ck), + .ddr2_ck_n (ddr2_ck_n), + + .phy_init_done (gpio_led_0), + + .app_wdf_afull (gpio_led_1), + .app_af_afull (gpio_led_2), + .rd_data_valid (gpio_led_3), + + .app_wdf_wren (1'b1), + .app_af_wren (app_af_wren), + .app_af_addr (app_af_addr), + .app_af_cmd (app_af_cmd), + + .rd_data_fifo_out (rd_data_fifo_out), + .app_wdf_data (app_wdf_data), + .app_wdf_mask_data (app_wdf_mask_data) +); + + wire clk200_p_fb; + DCM // 200Mhz DDR clock + #( + .CLKFX_MULTIPLY(2), + .CLKFX_DIVIDE(1), + .CLKIN_PERIOD("10 ns") + ) vgadcm ( + .CLKIN (clk), + .CLKFB (clk200_p_fb), + .CLKFX (clk200_n), + .CLKFX180 (clk200_p), + .CLK0 (clk200_p_fb) + ); + +/* always @(posedge clk) begin if (rst) begin @@ -266,6 +246,7 @@ DDR2SDRAM DDR2SDRAM( end end end +*/ == Test ============================================================== diff --git a/ships/DDR2.ship- b/ships/DDR2.ship- new file mode 100644 index 0000000..1092f73 --- /dev/null +++ b/ships/DDR2.ship- @@ -0,0 +1,303 @@ +ship: DDR2 + +== Ports =========================================================== +data in: inAddrRead +data in: inAddrWrite +data in: inDataWrite + +data out: out + +percolate up: DDR2_CAS_B 1 +percolate up: DDR2_CKE 2 +percolate up: DDR2_RAS_B 1 +percolate up: DDR2_WE_B 1 +percolate up: DDR2_ODT 2 +percolate up: DDR2_CS0_B 2 +percolate up: DDR2_CLK_N 2 +percolate up: DDR2_CLK_P 2 +percolate up: DDR2_A 14 +percolate up: DDR2_BA 3 +percolate inout: DDR2_DQ 64 +percolate up: DDR2_DM 8 +percolate inout: DDR2_DQS_N 8 +percolate inout: DDR2_DQS_P 8 + +percolate inout: I2C_DDR2_SCL 1 +percolate inout: I2C_DDR2_SDA 1 + +percolate down: CLKBUF_Q1_N 1 +percolate down: CLKBUF_Q1_P 1 + +== TeX ============================================================== + +== Fleeterpreter ==================================================== + public void service() { } +== FleetSim ============================================================== + +== FPGA ============================================================== + +// Nearly all of this was copied from Greg Gibeling's work; copyright shown below: + +// Everything here was copied from +// GateLib/Firmware/DRAM/Hardware/DDR2SDRAM/Test/FPGA_TOP_ML505_DDR2SDRAMTest.v + +//============================================================================== +// Section: License +//============================================================================== +// Copyright (c) 2005-2008, Regents of the University of California +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// - Neither the name of the University of California, Berkeley nor the +// names of its contributors may be used to endorse or promote +// products derived from this software without specific prior +// written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +// ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//============================================================================== + +parameter + ClockFreq = 200000000, + BAWidth = 2, + RAWidth = 13, + CAWidth = 10, + DWidth = 128, // 128b SDR internal transfers + UWidth = 8, // This will almost ALWAYS be 8 + BurstLen = 2, // 256b total burst, 2 words DWidth words at SDR, or 4 external words at DDR + EnableMask = 1, + EnableECC = 0, + Board = 0; + +localparam + UCount = DWidth / UWidth, + // 128b/8 = 16b per mask means per-byte masking + MWidth = (EnableECC || (EnableMask == 0)) ? 0 : UCount, + // Unused lower address bits, the -1 is to get a proper log2 + UAWidth = `log2(UCount-1), + TAWidth = CAWidth + RAWidth + BAWidth, + // Note that the components are in order according to where in the + // address they appear, the -1 is to account for DDR + AWidth = TAWidth + UAWidth - 1, + ECheck = EnableECC ? 2 : 0, + ECorrect = EnableECC ? 1 : 0, + CWidth = 3, + EHWidth = `max(`log2(ECheck), 1), + ERWidth = `max(`log2(ECheck), 1); + +wire Reset; +assign Reset = !rst; + +reg [AWidth-1:0] CommandAddress; +reg [CWidth-1:0] Command; +wire CommandReady; +reg CommandValid; + +wire [DWidth-1:0] DataIn; +wire [MWidth-1:0] DataInMask; +reg DataInValid; +wire DataInReady; + +wire [DWidth-1:0] DataOut; +wire [EHWidth-1:0] DataOutErrorChecked; +wire [ERWidth-1:0] DataOutErrorCorrected; +wire DataOutValid; +reg DataOutReady; + +wire Clock_IBUFG; +wire Clock, ClockD2, ClockP90; +wire Clock_DCM, ClockD2_DCM, ClockP90_DCM; +wire Locked; + +reg [`WORDWIDTH:0] out_d; +assign out_d_ = out_d; + +assign DDR2_BA[2] = 1'b0; +assign DDR2_CS0_B[1] = 1'b1; +assign DDR2_ODT[1] = 1'b0; +assign DDR2_CKE[1] = 1'b0; + +IBUFGDS ClockIBufG(.I(CLKBUF_Q1_P), .IB(CLKBUF_Q1_N), .O(Clock_IBUFG)); +DCM_BASE + #( + .CLKIN_PERIOD(5.0), + .CLKDV_DIVIDE(2.0), + .DLL_FREQUENCY_MODE("HIGH"), + .DUTY_CYCLE_CORRECTION("TRUE"), + .FACTORY_JF(16'hF0F0) + ) + DCMBase( + .CLK0(Clock_DCM), + .CLK180( ), + .CLK270( ), + .CLK2X( ), + .CLK2X180( ), + .CLK90(ClockP90_DCM), + .CLKDV(ClockD2_DCM), + .CLKFX( ), + .CLKFX180( ), + .LOCKED(Locked), + .CLKFB(Clock), + .CLKIN(Clock_IBUFG), + .RST(Reset)); + // synthesis attribute CLKIN_PERIOD of DCMBase is "5.0" + // synthesis attribute CLKDV_DIVIDE of DCMBase is "2.0" + // synthesis attribute DLL_FREQUENCY_MODE of DCMBase is "HIGH" + // synthesis attribute DUTY_CYCLE_CORRECTION of DCMBase is "TRUE" + // synthesis attribute FACTORY_JF of DCMBase is "16'hF0F0" + BUFG ClockBufG(.I(Clock_DCM), .O(Clock)); + BUFG ClockP90BufG(.I(ClockP90_DCM), .O(ClockP90)); + BUFG ClockD2BufG(.I(ClockD2_DCM), .O(ClockD2)); + +DDR2SDRAM DDR2SDRAM( + .Clock(Clock), + .ClockD2(ClockD2), + .ClockP90(ClockP90), + .Reset(Reset), + .Locked(Locked), + .ClockF200(Clock), + .Initialized( ), + .PoweredUp( ), + + .CommandClock(clk), + .DataInClock(clk), + .DataOutClock(clk), + .CommandReset(Reset), + .DataInReset(Reset), + .DataOutReset(Reset), + + .CommandAddress(CommandAddress), + .Command(Command), + .CommandValid(CommandValid), + .CommandReady(CommandReady), + .DataIn(DataIn), + .DataInMask(DataInMask), + .DataInValid(DataInValid), + .DataInReady(DataInReady), + .DataOut(DataOut), + .DataOutErrorChecked(DataOutErrorChecked), + .DataOutErrorCorrected(DataOutErrorCorrected), + .DataOutValid(DataOutValid), + .DataOutReady(DataOutReady), + .DDR2_DQ(DDR2_DQ), + .DDR2_A(DDR2_A), + .DDR2_BA(DDR2_BA[1:0]), + .DDR2_RAS_B(DDR2_RAS_B), + .DDR2_CAS_B(DDR2_CAS_B), + .DDR2_WE_B(DDR2_WE_B), + .DDR2_CS0_B(DDR2_CS0_B[0]), + .DDR2_ODT(DDR2_ODT[0]), + .DDR2_CKE(DDR2_CKE[0]), + .DDR2_DM(DDR2_DM), + .DDR2_DQS_P(DDR2_DQS_P), + .DDR2_DQS_N(DDR2_DQS_N), + .DDR2_CLK_P(DDR2_CLK_P), + .DDR2_CLK_N(DDR2_CLK_N)); + defparam DDR2SDRAM.UWidth = UWidth; + defparam DDR2SDRAM.BAWidth = BAWidth; + defparam DDR2SDRAM.RAWidth = RAWidth; + defparam DDR2SDRAM.CAWidth = CAWidth; + defparam DDR2SDRAM.DWidth = DWidth; + defparam DDR2SDRAM.BurstLen = BurstLen; + defparam DDR2SDRAM.EnableMask = EnableMask; + defparam DDR2SDRAM.EnableECC = EnableECC; + defparam DDR2SDRAM.Board = Board; + defparam DDR2SDRAM.MultiClock = 1; + + assign DataIn = inDataWrite_d; + assign DataInMask = 16'b1111111111111111; + + always @(posedge clk) begin + + if (rst) begin + `reset + CommandValid <= 0; + DataOutReady <= 0; + end else begin + `cleanup + + CommandValid <= 0; + DataInValid <= 0; + + if (`out_empty) begin + DataOutReady <= 1; + end + + if (DataOutReady && DataOutValid && `out_empty) begin + out_d <= { 1'b0, DataOut[`WORDWIDTH-1:0] }; + `fill_out + DataOutReady <= 0; + + end else if (DataOutReady && CommandReady && DataInReady && `out_empty) begin + if (`inAddrWrite_full && `inDataWrite_full) begin + `drain_inDataWrite + `drain_inAddrWrite + CommandAddress <= inAddrWrite_d; + Command <= 3'b000; + CommandValid <= 1; + DataInValid <= 1; + out_d <= { 1'b1, 37'b0 }; + `fill_out + DataOutReady <= 0; + end else if (`inAddrRead_full) begin + `drain_inAddrRead + CommandAddress <= inAddrRead_d; + CommandValid <= 1; + Command <= 3'b001; + DataInValid <= 0; + DataOutReady <= 1; + end + end + end + end + +== Test ============================================================== + +#skip +#expect 0 + +#ship debug : Debug +#ship ddr : DDR2 + +debug.in: + recv, deliver; + +ddr.out: + collect; + set flags a=!c,b=b; + send to debug.in; + collect; + set flags a=!c,b=b; + send to debug.in; +ddr.inAddrWrite: + set word=0; + deliver; + deliver; +ddr.inDataWrite: + set word=1; + deliver; + deliver; + + + + +== Constants ======================================================== + +== Contributors ========================================================= +Adam Megacz diff --git a/ships/Dvi.ship b/ships/Dvi.ship index 8e559b6..335d9bd 100644 --- a/ships/Dvi.ship +++ b/ships/Dvi.ship @@ -24,20 +24,13 @@ percolate up: dvi_xclk_n 1 percolate up: dvi_xclk_p 1 percolate up: dvi_de 1 percolate up: dvi_reset_b 1 -percolate down: dvi_gpio1 1 - -percolate up: dvi_iic_scl 1 -percolate inout: dvi_iic_sda 1 percolate up: gpio_led_c 1 percolate up: gpio_led_e 1 percolate up: gpio_led_n 1 percolate up: gpio_led_s 1 percolate up: gpio_led_w 1 -percolate up: gpio_led_0 1 -percolate up: gpio_led_1 1 -percolate up: gpio_led_2 1 -percolate up: gpio_led_3 1 + percolate up: gpio_led_4 1 percolate up: gpio_led_5 1 percolate up: gpio_led_6 1 @@ -46,6 +39,9 @@ percolate up: gpio_led_7 1 == TeX ============================================================== == Fleeterpreter ==================================================== +//percolate down: dvi_gpio1 1 +//percolate up: dvi_iic_scl 1 +//percolate inout: dvi_iic_sda 1 public void service() { } @@ -72,11 +68,11 @@ percolate up: gpio_led_7 1 assign gpio_led_n = 1; assign gpio_led_s = 0; - assign gpio_led_0 = dvi_gpio1; - assign gpio_led_1 = 0; - assign gpio_led_2 = 1; - assign gpio_led_3 = 0; - assign gpio_led_4 = 1; +// assign gpio_led_0 = dvi_gpio1; +// assign gpio_led_1 = 0; +// assign gpio_led_2 = 1; +// assign gpio_led_3 = 0; +// assign gpio_led_4 = 1; assign gpio_led_5 = 0; assign gpio_led_6 = 1; assign gpio_led_7 = 0; diff --git a/src/edu/berkeley/fleet/fpga/Fpga.java b/src/edu/berkeley/fleet/fpga/Fpga.java index 70f83fe..0ca5447 100644 --- a/src/edu/berkeley/fleet/fpga/Fpga.java +++ b/src/edu/berkeley/fleet/fpga/Fpga.java @@ -82,6 +82,7 @@ public class Fpga extends FleetTwoFleet { //boolean small = false; if (small) { + /* for(int i=0; i<1; i++) createShip("Memory"); for(int i=0; i<2; i++) createShip("Fifo"); for(int i=0; i<2; i++) createShip("Alu"); @@ -89,7 +90,9 @@ public class Fpga extends FleetTwoFleet { createShip("CarrySaveAdder"); createShip("Rotator"); createShip("Lut3"); + */ createShip("DDR2"); + createShip("Dvi"); } else { for(int i=0; i<3; i++) createShip("Memory"); for(int i=0; i<3; i++) createShip("Alu"); @@ -102,7 +105,7 @@ public class Fpga extends FleetTwoFleet { //createShip("Rotator"); //createShip("Lut3"); - createShip("DDR2"); + //createShip("DDR2"); //createShip("Video"); } diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_chipscope.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_chipscope.v new file mode 100644 index 0000000..d1d8ebe --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_chipscope.v @@ -0,0 +1,118 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_chipscope.v +// /___/ /\ Date Last Modified: $Data$ +// \ \ / \ Date Created: 9/14/06 +// \___\/\___\ +// +//Device: Virtex-5 +//Purpose: +// Skeleton Chipscope module declarations - for simulation only +//Reference: +//Revision History: +// +//***************************************************************************** + +`timescale 1ns/1ps + +module icon4 + ( + control0, + control1, + control2, + control3 + ) + /* synthesis syn_black_box syn_noprune = 1 */; + output [35:0] control0; + output [35:0] control1; + output [35:0] control2; + output [35:0] control3; +endmodule + +module vio_async_in192 + ( + control, + async_in + ) + /* synthesis syn_black_box syn_noprune = 1 */; + input [35:0] control; + input [191:0] async_in; +endmodule + +module vio_async_in96 + ( + control, + async_in + ) + /* synthesis syn_black_box syn_noprune = 1 */; + input [35:0] control; + input [95:0] async_in; +endmodule + +module vio_async_in100 + ( + control, + async_in + ) + /* synthesis syn_black_box syn_noprune = 1 */; + input [35:0] control; + input [99:0] async_in; +endmodule + +module vio_sync_out32 + ( + control, + clk, + sync_out + ) + /* synthesis syn_black_box syn_noprune = 1 */; + input [35:0] control; + input clk; + output [31:0] sync_out; +endmodule \ No newline at end of file diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_ctrl.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_ctrl.v new file mode 100644 index 0000000..1160208 --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_ctrl.v @@ -0,0 +1,1227 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_ctrl.v +// /___/ /\ Date Last Modified: $Date: 2008/07/29 15:24:03 $ +// \ \ / \ Date Created: Wed Aug 30 2006 +// \___\/\___\ +// +// +//Device: Virtex-5 +//Design Name: DDR/DDR2 +//Purpose: +// This module is the main control logic of the memory interface. All +// commands are issued from here according to the burst, CAS Latency and the +// user commands. +//Reference: +//Revision History: +// Rev 1.2 - Fixed auto refresh to activate bug. KP 11-19-2007 +// Rev 1.3 - For Dual Rank parts support CS logic modified. KP. 05/08/08 +// Rev 1.4 - AUTO_REFRESH_WAIT state modified for Auto Refresh flag asserted +// immediately after calibration is completed. KP. 07/28/08 +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_ctrl # + ( + // Following parameters are for 72-bit RDIMM design (for ML561 Reference + // board design). Actual values may be different. Actual parameters values + // are passed from design top module ddr2_sdram module. Please refer to + // the ddr2_sdram module for actual values. + parameter BANK_WIDTH = 2, + parameter COL_WIDTH = 10, + parameter CS_BITS = 0, + parameter CS_NUM = 1, + parameter ROW_WIDTH = 14, + parameter ADDITIVE_LAT = 0, + parameter BURST_LEN = 4, + parameter CAS_LAT = 5, + parameter ECC_ENABLE = 0, + parameter REG_ENABLE = 1, + parameter TREFI_NS = 7800, + parameter TRAS = 40000, + parameter TRCD = 15000, + parameter TRRD = 10000, + parameter TRFC = 105000, + parameter TRP = 15000, + parameter TRTP = 7500, + parameter TWR = 15000, + parameter TWTR = 10000, + parameter CLK_PERIOD = 3000, + parameter MULTI_BANK_EN = 1, + parameter TWO_T_TIME_EN = 0, + parameter DDR_TYPE = 1 + ) + ( + input clk, + input rst, + input [2:0] af_cmd, + input [30:0] af_addr, + input af_empty, + input phy_init_done, + output ctrl_ref_flag, + output ctrl_af_rden, + output reg ctrl_wren, + output reg ctrl_rden, + output [ROW_WIDTH-1:0] ctrl_addr, + output [BANK_WIDTH-1:0] ctrl_ba, + output ctrl_ras_n, + output ctrl_cas_n, + output ctrl_we_n, + output [CS_NUM-1:0] ctrl_cs_n + ); + + // input address split into various ranges + localparam ROW_RANGE_START = COL_WIDTH; + localparam ROW_RANGE_END = ROW_WIDTH + ROW_RANGE_START - 1; + localparam BANK_RANGE_START = ROW_RANGE_END + 1; + localparam BANK_RANGE_END = BANK_WIDTH + BANK_RANGE_START - 1; + localparam CS_RANGE_START = BANK_RANGE_START + BANK_WIDTH; + localparam CS_RANGE_END = CS_BITS + CS_RANGE_START - 1; + // compare address (for determining bank/row hits) split into various ranges + // (compare address doesn't include column bits) + localparam CMP_WIDTH = CS_BITS + BANK_WIDTH + ROW_WIDTH; + localparam CMP_ROW_RANGE_START = 0; + localparam CMP_ROW_RANGE_END = ROW_WIDTH + CMP_ROW_RANGE_START - 1; + localparam CMP_BANK_RANGE_START = CMP_ROW_RANGE_END + 1; + localparam CMP_BANK_RANGE_END = BANK_WIDTH + CMP_BANK_RANGE_START - 1; + localparam CMP_CS_RANGE_START = CMP_BANK_RANGE_END + 1; + localparam CMP_CS_RANGE_END = CS_BITS + CMP_CS_RANGE_START-1; + + localparam BURST_LEN_DIV2 = BURST_LEN / 2; + localparam OPEN_BANK_NUM = 4; + localparam CS_BITS_FIX = (CS_BITS == 0) ? 1 : CS_BITS; + + // calculation counters based on clock cycle and memory parameters + // TRAS: ACTIVE->PRECHARGE interval - 2 + localparam integer TRAS_CYC = (TRAS + CLK_PERIOD)/CLK_PERIOD; + // TRCD: ACTIVE->READ/WRITE interval - 3 (for DDR2 factor in ADD_LAT) + localparam integer TRRD_CYC = (TRRD + CLK_PERIOD)/CLK_PERIOD; + localparam integer TRCD_CYC = (((TRCD + CLK_PERIOD)/CLK_PERIOD) > + ADDITIVE_LAT )? + ((TRCD+CLK_PERIOD)/ CLK_PERIOD) - ADDITIVE_LAT : 0; + // TRFC: REFRESH->REFRESH, REFRESH->ACTIVE interval - 2 + localparam integer TRFC_CYC = (TRFC + CLK_PERIOD)/CLK_PERIOD; + // TRP: PRECHARGE->COMMAND interval - 2 + // for precharge all add 1 extra clock cycle + localparam integer TRP_CYC = ((TRP + CLK_PERIOD)/CLK_PERIOD) +1; + // TRTP: READ->PRECHARGE interval - 2 (Al + BL/2 + (max (TRTP, 2tck))-2 + localparam integer TRTP_TMP_MIN = (((TRTP + CLK_PERIOD)/CLK_PERIOD) >= 2)? + ((TRTP + CLK_PERIOD)/CLK_PERIOD) : 2; + localparam integer TRTP_CYC = TRTP_TMP_MIN + ADDITIVE_LAT + + BURST_LEN_DIV2 - 2; + // TWR: WRITE->PRECHARGE interval - 2 + localparam integer WR_LAT = (DDR_TYPE > 0) ? CAS_LAT + ADDITIVE_LAT - 1 : 1; + localparam integer TWR_CYC = ((TWR + CLK_PERIOD)/CLK_PERIOD) + + WR_LAT + BURST_LEN_DIV2 ; + // TWTR: WRITE->READ interval - 3 (for DDR1, TWTR = 2 clks) + // DDR2 = CL-1 + BL/2 +TWTR + localparam integer TWTR_TMP_MIN = (TWTR + CLK_PERIOD)/CLK_PERIOD; + localparam integer TWTR_CYC = (DDR_TYPE > 0) ? (TWTR_TMP_MIN + (CAS_LAT -1) + + BURST_LEN_DIV2 ): 2; + + // TRTW: READ->WRITE interval - 3 + // DDR1: CL + (BL/2) + // DDR2: (BL/2) + 2. Two more clocks are added to + // the DDR2 counter to account for the delay in + // arrival of the DQS during reads (pcb trace + buffer + // delays + memory parameters). + localparam TRTW_CYC = (DDR_TYPE > 0) ? BURST_LEN_DIV2 + 4 : + (CAS_LAT == 25) ? 2 + BURST_LEN_DIV2 : CAS_LAT + BURST_LEN_DIV2; + + localparam integer CAS_LAT_RD = (CAS_LAT == 25) ? 2 : CAS_LAT; + + // Make sure all values >= 0 (some may be = 0) + localparam TRAS_COUNT = (TRAS_CYC > 0) ? TRAS_CYC : 0; + localparam TRCD_COUNT = (TRCD_CYC > 0) ? TRCD_CYC : 0; + localparam TRRD_COUNT = (TRRD_CYC > 0) ? TRRD_CYC : 0; + localparam TRFC_COUNT = (TRFC_CYC > 0) ? TRFC_CYC : 0; + localparam TRP_COUNT = (TRP_CYC > 0) ? TRP_CYC : 0; + localparam TRTP_COUNT = (TRTP_CYC > 0) ? TRTP_CYC : 0; + localparam TWR_COUNT = (TWR_CYC > 0) ? TWR_CYC : 0; + localparam TWTR_COUNT = (TWTR_CYC > 0) ? TWTR_CYC : 0; + localparam TRTW_COUNT = (TRTW_CYC > 0) ? TRTW_CYC : 0; + + // Auto refresh interval + localparam TREFI_COUNT = ((TREFI_NS * 1000)/CLK_PERIOD) - 1; + + // memory controller states + localparam CTRL_IDLE = 5'h00; + localparam CTRL_PRECHARGE = 5'h01; + localparam CTRL_PRECHARGE_WAIT = 5'h02; + localparam CTRL_AUTO_REFRESH = 5'h03; + localparam CTRL_AUTO_REFRESH_WAIT = 5'h04; + localparam CTRL_ACTIVE = 5'h05; + localparam CTRL_ACTIVE_WAIT = 5'h06; + localparam CTRL_BURST_READ = 5'h07; + localparam CTRL_READ_WAIT = 5'h08; + localparam CTRL_BURST_WRITE = 5'h09; + localparam CTRL_WRITE_WAIT = 5'h0A; + localparam CTRL_PRECHARGE_WAIT1 = 5'h0B; + + + reg [CMP_WIDTH-1:0] act_addr_r; + wire [30:0] af_addr_r; + reg [30:0] af_addr_r1; + reg [30:0] af_addr_r2; + reg [30:0] af_addr_r3; + wire [2:0] af_cmd_r; + reg [2:0] af_cmd_r1; + reg [2:0] af_cmd_r2; + reg af_valid_r; + reg af_valid_r1; + reg af_valid_r2; + reg [CS_BITS_FIX :0] auto_cnt_r; + reg auto_ref_r; + reg [(OPEN_BANK_NUM*CMP_WIDTH)-1:0] bank_cmp_addr_r; + reg [OPEN_BANK_NUM-1:0] bank_hit; + reg [OPEN_BANK_NUM-1:0] bank_hit_r; + reg [OPEN_BANK_NUM-1:0] bank_hit_r1; + reg [OPEN_BANK_NUM-1:0] bank_valid_r; + reg bank_conflict_r; + reg conflict_resolved_r; + reg ctrl_af_rden_r; + reg conflict_detect_r; + wire conflict_detect; + reg cs_change_r; + reg cs_change_sticky_r; + reg [ROW_WIDTH-1:0] ddr_addr_r; + wire [ROW_WIDTH-1:0] ddr_addr_col; + wire [ROW_WIDTH-1:0] ddr_addr_row; + reg [BANK_WIDTH-1:0] ddr_ba_r; + reg ddr_cas_n_r; + reg [CS_NUM-1:0] ddr_cs_n_r; + reg ddr_ras_n_r; + reg ddr_we_n_r; + reg [4:0] next_state; + reg no_precharge_wait_r; + reg no_precharge_r; + reg no_precharge_r1; + reg phy_init_done_r; + reg [4:0] precharge_ok_cnt_r; + reg precharge_ok_r; + reg [4:0] ras_cnt_r; + reg [3:0] rcd_cnt_r; + reg rcd_cnt_ok_r; + reg [2:0] rdburst_cnt_r; + reg rdburst_ok_r; + reg rdburst_rden_ok_r; + reg rd_af_flag_r; + wire rd_flag; + reg rd_flag_r; + reg [4:0] rd_to_wr_cnt_r; + reg rd_to_wr_ok_r; + reg ref_flag_r; + reg [11:0] refi_cnt_r; + reg refi_cnt_ok_r; + reg rst_r + /* synthesis syn_preserve = 1 */; + reg rst_r1 + /* synthesis syn_maxfan = 10 */; + reg [7:0] rfc_cnt_r; + reg rfc_ok_r; + reg [3:0] row_miss; + reg [3:0] row_conflict_r; + reg [3:0] rp_cnt_r; + reg rp_cnt_ok_r; + reg [CMP_WIDTH-1:0] sb_open_add_r; + reg [4:0] state_r; + reg [4:0] state_r1; + wire sm_rden; + reg sm_rden_r; + reg [2:0] trrd_cnt_r; + reg trrd_cnt_ok_r; + reg [2:0] two_t_enable_r; + reg [CS_NUM-1:0] two_t_enable_r1; + reg [2:0] wrburst_cnt_r; + reg wrburst_ok_r; + reg wrburst_wren_ok_r; + wire wr_flag; + reg wr_flag_r; + reg [4:0] wr_to_rd_cnt_r; + reg wr_to_rd_ok_r; + + // XST attributes for local reset "tree" + // synthesis attribute shreg_extract of rst_r is "no"; + // synthesis attribute shreg_extract of rst_r1 is "no"; + // synthesis attribute equivalent_register_removal of rst_r is "no" + + //*************************************************************************** + + // sm_rden is used to assert read enable to the address FIFO + assign sm_rden = ((state_r == CTRL_BURST_WRITE) || + (state_r == CTRL_BURST_READ)) ; + + // assert read flag to the adress FIFO + assign ctrl_af_rden = sm_rden || rd_af_flag_r; + + // local reset "tree" for controller logic only. Create this to ease timing + // on reset path. Prohibit equivalent register removal on RST_R to prevent + // "sharing" with other local reset trees (caution: make sure global fanout + // limit is set to large enough value, otherwise SLICES may be used for + // fanout control on RST_R. + always @(posedge clk) begin + rst_r <= rst; + rst_r1 <= rst_r; + end + + //***************************************************************** + // interpret commands from Command/Address FIFO + //***************************************************************** + + assign wr_flag = (af_valid_r2) ? ((af_cmd_r2 == 3'b000) ? 1'b1 : 1'b0): 1'b0; + assign rd_flag = (af_valid_r2) ? ((af_cmd_r2 == 3'b001) ? 1'b1 : 1'b0): 1'b0; + + always @(posedge clk) begin + rd_flag_r <= rd_flag; + wr_flag_r <= wr_flag; + end + + ////////////////////////////////////////////////// + // The data from the address FIFO is fetched and + // stored in two register stages. The data will be + // pulled out of the second register stage whenever + // the state machine can handle new data from the + // address FIFO. + + // This flag is asserted when there is no + // cmd & address in the pipe. When there is + // valid cmd & addr from the address FIFO the + // af_valid signals will be asserted. This flag will + // be set the cycle af_valid_r is de-asserted. + always @(posedge clk) begin + // for simulation purposes - to force CTRL_AF_RDEN low during reset + if (rst_r1) + rd_af_flag_r <= 1'd0; + else if((ctrl_af_rden_r) || + (rd_af_flag_r && (af_valid_r || af_valid_r1))) + rd_af_flag_r <= 1'd0; + else if (~af_valid_r1 || ~af_valid_r) + rd_af_flag_r <= 1'd1; + + end + + // First register stage for the cmd & add from the FIFO. + // The af_valid_r signal gives the status of the data + // in this stage. The af_valid_r will be asserted when there + // is valid data. This register stage will be updated + // 1. read to the FIFO and the FIFO not empty + // 2. After write and read states + // 3. The valid signal is not asserted in the last stage. + always @(posedge clk) begin + if (rst_r1)begin + af_valid_r <= 1'd0; + end else begin + if (ctrl_af_rden_r || sm_rden_r || ~af_valid_r1 + || ~af_valid_r2)begin + af_valid_r <= ctrl_af_rden_r; + end + end + end + + // The output register in the FIFO is used. The addr + // and command are already registered in the FIFO. + assign af_addr_r = af_addr; + assign af_cmd_r = af_cmd; + + // Second register stage for the cmd & add from the FIFO. + // The af_valid_r1 signal gives the status of the data + // in this stage. The af_valid_r will be asserted when there + // is valid data. This register stage will be updated + // 1. read to the FIFO and the FIFO not empty and there + // is no valid data on this stage + // 2. After write and read states + // 3. The valid signal is not asserted in the last stage. + always@(posedge clk) begin + if (rst_r1)begin + af_valid_r1 <= 1'd0; + af_addr_r1 <= {31{1'bx}}; + af_cmd_r1 <= {3{1'bx}}; + end else if (~af_valid_r1 || sm_rden_r || + ~af_valid_r2) begin + af_valid_r1 <= af_valid_r; + af_addr_r1 <= af_addr_r; + af_cmd_r1 <= af_cmd_r; + end + end + + // The state machine uses the address and command in this + // register stage. The data is fetched from the second + // register stage whenever the state machine can accept new + // addr. The conflict flags are also generated based on the + // second register stage and updated when the new address + // is loaded for the state machine. + always@(posedge clk) begin + if (rst_r1)begin + af_valid_r2 <= 1'd0; + af_addr_r2 <= {31{1'bx}}; + af_cmd_r2 <= {3{1'bx}}; + bank_hit_r <= {OPEN_BANK_NUM{1'bx}}; + bank_conflict_r <= 1'bx; + row_conflict_r <= 4'bx; + end else if(sm_rden || ~af_valid_r2)begin + af_valid_r2 <= af_valid_r1; + af_addr_r2 <= af_addr_r1; + af_cmd_r2 <= af_cmd_r1; + if(MULTI_BANK_EN)begin + bank_hit_r <= bank_hit; + row_conflict_r <= row_miss; + bank_conflict_r <= (~(|bank_hit)); + end else begin + bank_hit_r <= {OPEN_BANK_NUM{1'b0}}; + bank_conflict_r <= 1'd0; + row_conflict_r[0] <= (af_addr_r1[CS_RANGE_END:ROW_RANGE_START] + != sb_open_add_r[CMP_WIDTH-1:0]); + end + end + end // always@ (posedge clk) + + //detecting cs change for multi chip select case + generate + if(CS_NUM > 1) begin: gen_cs_change + always @(posedge clk) begin + if(sm_rden || ~af_valid_r2)begin + cs_change_r <= af_addr_r1[CS_RANGE_END:CS_RANGE_START] != + af_addr_r2[CS_RANGE_END:CS_RANGE_START] ; + cs_change_sticky_r <= + af_addr_r1[CS_RANGE_END:CS_RANGE_START] != + af_addr_r2[CS_RANGE_END:CS_RANGE_START] ; + end else + cs_change_r <= 1'd0; + end + end // block: gen_cs_change + else begin: gen_cs_0 + always @(posedge clk) begin + cs_change_r <= 1'd0; + cs_change_sticky_r <= 1'd0; + end + end + endgenerate + + assign conflict_detect = (MULTI_BANK_EN) ? + ((|(row_conflict_r[3:0] & bank_hit_r[3:0])) + | bank_conflict_r) & af_valid_r2 : + row_conflict_r[0] & af_valid_r2; + + always @(posedge clk) begin + conflict_detect_r <= conflict_detect; + sm_rden_r <= sm_rden; + af_addr_r3 <= af_addr_r2; + ctrl_af_rden_r <= ctrl_af_rden & ~af_empty; + end + + // conflict resolved signal. When this signal is asserted + // the conflict is resolved. The address to be compared + // for the conflict_resolved_r will be stored in act_add_r + // when the bank is opened. + always @(posedge clk) begin + conflict_resolved_r <= (act_addr_r == + af_addr_r2[CS_RANGE_END:ROW_RANGE_START]); + if((state_r == CTRL_ACTIVE)) + act_addr_r <= af_addr_r2[CS_RANGE_END:ROW_RANGE_START]; + end + + //*************************************************************************** + // Bank management logic + // Semi-hardcoded for now for 4 banks + // will keep multiple banks open if MULTI_BANK_EN is true. + //*************************************************************************** + + genvar bank_i; + generate // if multiple bank option chosen + if(MULTI_BANK_EN) begin: gen_multi_bank_open + + for (bank_i = 0; bank_i < OPEN_BANK_NUM; + bank_i = bank_i + 1) begin: gen_bank_hit1 + // asserted if bank address match + open bank entry is valid + always @(*) begin + bank_hit[bank_i] + = ((bank_cmp_addr_r[(CMP_WIDTH*(bank_i+1))-1: + (CMP_WIDTH*bank_i)+ROW_WIDTH] == + af_addr_r1[CS_RANGE_END:BANK_RANGE_START]) && + bank_valid_r[bank_i]); + // asserted if row address match (no check for bank entry valid, rely + // on this term to be used in conjunction with BANK_HIT[]) + row_miss[bank_i] + = (bank_cmp_addr_r[(CMP_WIDTH*bank_i)+ROW_WIDTH-1: + (CMP_WIDTH*bank_i)] != + af_addr_r1[ROW_RANGE_END:ROW_RANGE_START]); + end + end + + always @(posedge clk) begin + no_precharge_wait_r <= bank_valid_r[3] & bank_conflict_r; + bank_hit_r1 <= bank_hit_r; + end + + always@(*) + no_precharge_r = ~bank_valid_r[3] & bank_conflict_r; + + always@(posedge clk) + no_precharge_r1 <= no_precharge_r; + + + always @(posedge clk) begin + // Clear all bank valid bits during AR (i.e. since all banks get + // precharged during auto-refresh) + if ((state_r1 == CTRL_AUTO_REFRESH)) begin + bank_valid_r <= {(OPEN_BANK_NUM-1){1'b0}}; + bank_cmp_addr_r <= {(OPEN_BANK_NUM*CMP_WIDTH-1){1'b0}}; + end else begin + if (state_r1 == CTRL_ACTIVE) begin + // 00 is always going to have the latest bank and row. + bank_cmp_addr_r[CMP_WIDTH-1:0] + <= af_addr_r3[CS_RANGE_END:ROW_RANGE_START]; + // This indicates the bank was activated + bank_valid_r[0] <= 1'b1; + + case ({bank_hit_r1[2:0]}) + 3'b001: begin + bank_cmp_addr_r[CMP_WIDTH-1:0] + <= af_addr_r3[CS_RANGE_END:ROW_RANGE_START]; + // This indicates the bank was activated + bank_valid_r[0] <= 1'b1; + end + 3'b010: begin //(b0->b1) + bank_cmp_addr_r[(2*CMP_WIDTH)-1:CMP_WIDTH] + <= bank_cmp_addr_r[CMP_WIDTH-1:0]; + bank_valid_r[1] <= bank_valid_r[0]; + end + 3'b100:begin //(b0->b1, b1->b2) + bank_cmp_addr_r[(2*CMP_WIDTH)-1:CMP_WIDTH] + <= bank_cmp_addr_r[CMP_WIDTH-1:0]; + bank_cmp_addr_r[(3*CMP_WIDTH)-1:2*CMP_WIDTH] + <= bank_cmp_addr_r[(2*CMP_WIDTH)-1:CMP_WIDTH]; + bank_valid_r[1] <= bank_valid_r[0]; + bank_valid_r[2] <= bank_valid_r[1]; + end + default: begin //(b0->b1, b1->b2, b2->b3) + bank_cmp_addr_r[(2*CMP_WIDTH)-1:CMP_WIDTH] + <= bank_cmp_addr_r[CMP_WIDTH-1:0]; + bank_cmp_addr_r[(3*CMP_WIDTH)-1:2*CMP_WIDTH] + <= bank_cmp_addr_r[(2*CMP_WIDTH)-1:CMP_WIDTH]; + bank_cmp_addr_r[(4*CMP_WIDTH)-1:3*CMP_WIDTH] + <= bank_cmp_addr_r[(3*CMP_WIDTH)-1:2*CMP_WIDTH]; + bank_valid_r[1] <= bank_valid_r[0]; + bank_valid_r[2] <= bank_valid_r[1]; + bank_valid_r[3] <= bank_valid_r[2]; + end + endcase + end + end + end + end else begin: gen_single_bank_open // single bank option + always @(posedge clk) begin + no_precharge_r <= 1'd0; + no_precharge_r1 <= 1'd0; + no_precharge_wait_r <= 1'd0; + if (rst_r1) + sb_open_add_r <= {CMP_WIDTH{1'b0}}; + else if (state_r == CTRL_ACTIVE) + sb_open_add_r <= af_addr_r2[CS_RANGE_END:ROW_RANGE_START]; + end + end + endgenerate + + //*************************************************************************** + // Timing counters + //*************************************************************************** + + //***************************************************************** + // Write and read enable generation for PHY + //***************************************************************** + + // write burst count. Counts from (BL/2 to 1). + // Also logic for controller write enable. + always @(posedge clk) begin + if (state_r == CTRL_BURST_WRITE) begin + wrburst_cnt_r <= BURST_LEN_DIV2; + end else if (wrburst_cnt_r >= 3'd1) + wrburst_cnt_r <= wrburst_cnt_r - 1; + end // always @ (posedge clk) + + + always @(posedge clk) begin + if (rst_r1) begin + ctrl_wren <= 1'b0; + end else if (state_r == CTRL_BURST_WRITE) begin + ctrl_wren <= 1'b1; + end else if (wrburst_wren_ok_r) + ctrl_wren <= 1'b0; + end + + + always @(posedge clk) begin + if ((state_r == CTRL_BURST_WRITE) + && (BURST_LEN_DIV2 > 2)) + wrburst_ok_r <= 1'd0; + else if ((wrburst_cnt_r <= 3'd3) || + (BURST_LEN_DIV2 <= 2)) + wrburst_ok_r <= 1'b1; + end + + // flag to check when wrburst count has reached + // a value of 1. This flag is used in the ctrl_wren + // logic + always @(posedge clk) begin + if(wrburst_cnt_r == 3'd2) + wrburst_wren_ok_r <=1'b1; + else + wrburst_wren_ok_r <= 1'b0; + end + + + // read burst count. Counts from (BL/2 to 1) + always @(posedge clk) begin + if (state_r == CTRL_BURST_READ) begin + rdburst_cnt_r <= BURST_LEN_DIV2; + end else if (rdburst_cnt_r >= 3'd1) + rdburst_cnt_r <= rdburst_cnt_r - 1; + end // always @ (posedge clk) + + + always @(posedge clk) begin + if (rst_r1) begin + ctrl_rden <= 1'b0; + end else if (state_r == CTRL_BURST_READ) begin + ctrl_rden <= 1'b1; + end else if (rdburst_rden_ok_r) + ctrl_rden <= 1'b0; + end + + // the rd_burst_ok_r signal will be asserted one cycle later + // in multi chip select cases if the back to back read is to + // different chip selects. The cs_changed_sticky_r signal will + // be asserted only for multi chip select cases. + always @(posedge clk) begin + if ((state_r == CTRL_BURST_READ) + && (BURST_LEN_DIV2 > 2)) + rdburst_ok_r <= 1'd0; + else if ((rdburst_cnt_r <=( 3'd3 - cs_change_sticky_r)) || + (BURST_LEN_DIV2 <= 2)) + rdburst_ok_r <= 1'b1; + end + + // flag to check when rdburst count has reached + // a value of 1. This flag is used in the ctrl_rden + // logic + always @(posedge clk) begin + if (rdburst_cnt_r == 3'd2) + rdburst_rden_ok_r <= 1'b1; + else + rdburst_rden_ok_r <= 1'b0; + end + + + //***************************************************************** + // Various delay counters + // The counters are checked for value of <= 3 to determine the + // if the count values are reached during different commands. + // It is checked for 3 because + // 1. The counters are loaded during the state when the command + // state is reached (+1) + // 2. After the <= 3 condition is reached the sm takes two cycles + // to transition to the new command state (+2) + //***************************************************************** + + // tRP count - precharge command period + always @(posedge clk) begin + if (state_r == CTRL_PRECHARGE) + rp_cnt_r <= TRP_COUNT; + else if (rp_cnt_r != 4'd0) + rp_cnt_r <= rp_cnt_r - 1; + end + + always @(posedge clk) begin + if (state_r == CTRL_PRECHARGE) + rp_cnt_ok_r <= 1'd0; + else if (rp_cnt_r <= 4'd3) + rp_cnt_ok_r <= 1'd1; + end + + // tRFC count - refresh-refresh, refresh-active + always @(posedge clk) begin + if (state_r == CTRL_AUTO_REFRESH) + rfc_cnt_r <= TRFC_COUNT; + else if (rfc_cnt_r != 8'd0) + rfc_cnt_r <= rfc_cnt_r - 1; + end + + always @(posedge clk) begin + if (state_r == CTRL_AUTO_REFRESH) + rfc_ok_r <= 1'b0; + else if(rfc_cnt_r <= 8'd3) + rfc_ok_r <= 1'b1; + end + + // tRCD count - active to read/write + always @(posedge clk) begin + if (state_r == CTRL_ACTIVE) + rcd_cnt_r <= TRCD_COUNT; + else if (rcd_cnt_r != 4'd0) + rcd_cnt_r <= rcd_cnt_r - 1; + end + + always @(posedge clk) begin + if ((state_r == CTRL_ACTIVE) + && (TRCD_COUNT > 2)) + rcd_cnt_ok_r <= 1'd0; + else if (rcd_cnt_r <= 4'd3) + rcd_cnt_ok_r <= 1; + end + + // tRRD count - active to active + always @(posedge clk) begin + if (state_r == CTRL_ACTIVE) + trrd_cnt_r <= TRRD_COUNT; + else if (trrd_cnt_r != 3'd0) + trrd_cnt_r <= trrd_cnt_r - 1; + end + + always @(posedge clk) begin + if (state_r == CTRL_ACTIVE) + trrd_cnt_ok_r <= 1'd0; + else if (trrd_cnt_r <= 3'd3) + trrd_cnt_ok_r <= 1; + end + + // tRAS count - active to precharge + always @(posedge clk) begin + if (state_r == CTRL_ACTIVE) + ras_cnt_r <= TRAS_COUNT; + else if (ras_cnt_r != 5'd0) + ras_cnt_r <= ras_cnt_r - 1; + end + + // counter for write to prcharge + // read to precharge and + // activate to precharge + // precharge_ok_cnt_r is added with trtp count, + // there can be cases where the sm can go from + // activate to read and the act->pre count time + // would not have been satisfied. The rd->pre + // time is very less. wr->pre time is almost the + // same as act-> pre + always @(posedge clk) begin + if (state_r == CTRL_BURST_READ) begin + // assign only if the cnt is < TRTP_COUNT + if (precharge_ok_cnt_r < TRTP_COUNT) + precharge_ok_cnt_r <= TRTP_COUNT; + end else if (state_r == CTRL_BURST_WRITE) + precharge_ok_cnt_r <= TWR_COUNT; + else if (state_r == CTRL_ACTIVE) + precharge_ok_cnt_r <= TRAS_COUNT; + else if (precharge_ok_cnt_r != 5'd0) + precharge_ok_cnt_r <= precharge_ok_cnt_r - 1; + end + + always @(posedge clk) begin + if ((state_r == CTRL_BURST_READ) || + (state_r == CTRL_BURST_WRITE)|| + (state_r == CTRL_ACTIVE)) + precharge_ok_r <= 1'd0; + else if(precharge_ok_cnt_r <= 5'd3) + precharge_ok_r <=1'd1; + end + + // write to read counter + // write to read includes : write latency + burst time + tWTR + always @(posedge clk) begin + if (rst_r1) + wr_to_rd_cnt_r <= 5'd0; + else if (state_r == CTRL_BURST_WRITE) + wr_to_rd_cnt_r <= (TWTR_COUNT); + else if (wr_to_rd_cnt_r != 5'd0) + wr_to_rd_cnt_r <= wr_to_rd_cnt_r - 1; + end + + always @(posedge clk) begin + if (state_r == CTRL_BURST_WRITE) + wr_to_rd_ok_r <= 1'd0; + else if (wr_to_rd_cnt_r <= 5'd3) + wr_to_rd_ok_r <= 1'd1; + end + + // read to write counter + always @(posedge clk) begin + if (rst_r1) + rd_to_wr_cnt_r <= 5'd0; + else if (state_r == CTRL_BURST_READ) + rd_to_wr_cnt_r <= (TRTW_COUNT); + else if (rd_to_wr_cnt_r != 5'd0) + rd_to_wr_cnt_r <= rd_to_wr_cnt_r - 1; + end + + always @(posedge clk) begin + if (state_r == CTRL_BURST_READ) + rd_to_wr_ok_r <= 1'b0; + else if (rd_to_wr_cnt_r <= 5'd3) + rd_to_wr_ok_r <= 1'b1; + end + + always @(posedge clk) begin + if(refi_cnt_r == (TREFI_COUNT -1)) + refi_cnt_ok_r <= 1'b1; + else + refi_cnt_ok_r <= 1'b0; + end + + // auto refresh interval counter in refresh_clk domain + always @(posedge clk) begin + if ((rst_r1) || (refi_cnt_ok_r)) begin + refi_cnt_r <= 12'd0; + end else begin + refi_cnt_r <= refi_cnt_r + 1; + end + end // always @ (posedge clk) + + // auto refresh flag + always @(posedge clk) begin + if (refi_cnt_ok_r) begin + ref_flag_r <= 1'b1; + end else begin + ref_flag_r <= 1'b0; + end + end // always @ (posedge clk) + + assign ctrl_ref_flag = ref_flag_r; + + //refresh flag detect + //auto_ref high indicates auto_refresh requirement + //auto_ref is held high until auto refresh command is issued. + always @(posedge clk)begin + if (rst_r1) + auto_ref_r <= 1'b0; + else if (ref_flag_r) + auto_ref_r <= 1'b1; + else if (state_r == CTRL_AUTO_REFRESH) + auto_ref_r <= 1'b0; + end + + + // keep track of which chip selects got auto-refreshed (avoid auto-refreshing + // all CS's at once to avoid current spike) + always @(posedge clk)begin + if (rst_r1 || (state_r1 == CTRL_PRECHARGE)) + auto_cnt_r <= 'd0; + else if (state_r1 == CTRL_AUTO_REFRESH) + auto_cnt_r <= auto_cnt_r + 1; + end + + // register for timing purposes. Extra delay doesn't really matter + always @(posedge clk) + phy_init_done_r <= phy_init_done; + + always @(posedge clk)begin + if (rst_r1) begin + state_r <= CTRL_IDLE; + state_r1 <= CTRL_IDLE; + end else begin + state_r <= next_state; + state_r1 <= state_r; + end + end + + //*************************************************************************** + // main control state machine + //*************************************************************************** + + always @(*) begin + next_state = state_r; + (* full_case, parallel_case *) case (state_r) + CTRL_IDLE: begin + // perform auto refresh as soon as we are done with calibration. + // The calibration logic does not do any refreshes. + if (phy_init_done_r) + next_state = CTRL_AUTO_REFRESH; + end + + CTRL_PRECHARGE: begin + if (auto_ref_r) + next_state = CTRL_PRECHARGE_WAIT1; + // when precharging an LRU bank, do not have to go to wait state + // since we can't possibly be activating row in same bank next + // disabled for 2t timing. There needs to be a gap between cmds + // in 2t timing + else if (no_precharge_wait_r && !TWO_T_TIME_EN) + next_state = CTRL_ACTIVE; + else + next_state = CTRL_PRECHARGE_WAIT; + end + + CTRL_PRECHARGE_WAIT:begin + if (rp_cnt_ok_r)begin + if (auto_ref_r) + // precharge again to make sure we close all the banks + next_state = CTRL_PRECHARGE; + else + next_state = CTRL_ACTIVE; + end + end + + CTRL_PRECHARGE_WAIT1: + if (rp_cnt_ok_r) + next_state = CTRL_AUTO_REFRESH; + + CTRL_AUTO_REFRESH: + next_state = CTRL_AUTO_REFRESH_WAIT; + + CTRL_AUTO_REFRESH_WAIT: + //staggering Auto refresh for multi + // chip select designs. The SM waits + // for the rfc time before issuing the + // next auto refresh. + if (auto_cnt_r < (CS_NUM))begin + if (rfc_ok_r ) + next_state = CTRL_AUTO_REFRESH; + end else if (rfc_ok_r)begin + if(auto_ref_r) + // MIG 2.3: For deep designs if Auto Refresh + // flag asserted immediately after calibration is completed + next_state = CTRL_PRECHARGE; + else if ( wr_flag || rd_flag) + next_state = CTRL_ACTIVE; + end + + CTRL_ACTIVE: + next_state = CTRL_ACTIVE_WAIT; + + CTRL_ACTIVE_WAIT: begin + if (rcd_cnt_ok_r) begin + if ((conflict_detect_r && ~conflict_resolved_r) || + auto_ref_r) begin + if (no_precharge_r1 && ~auto_ref_r && trrd_cnt_ok_r) + next_state = CTRL_ACTIVE; + else if(precharge_ok_r) + next_state = CTRL_PRECHARGE; + end else if ((wr_flag_r) && (rd_to_wr_ok_r)) + next_state = CTRL_BURST_WRITE; + else if ((rd_flag_r)&& (wr_to_rd_ok_r)) + next_state = CTRL_BURST_READ; + end + end + + // beginning of write burst + CTRL_BURST_WRITE: begin + if (BURST_LEN_DIV2 == 1) begin + // special case if BL = 2 (i.e. burst lasts only one clk cycle) + if (wr_flag) + // if we have another non-conflict write command right after the + // current write, then stay in this state + next_state = CTRL_BURST_WRITE; + else + // otherwise, if we're done with this burst, and have no write + // immediately scheduled after this one, wait until write-read + // delay has passed + next_state = CTRL_WRITE_WAIT; + end else + // otherwise BL > 2, and we have at least one more write cycle for + // current burst + next_state = CTRL_WRITE_WAIT; + // continuation of write burst (also covers waiting after write burst + // has completed for write-read delay to pass) + end + + CTRL_WRITE_WAIT: begin + if ((conflict_detect) || auto_ref_r) begin + if (no_precharge_r && ~auto_ref_r && wrburst_ok_r) + next_state = CTRL_ACTIVE; + else if (precharge_ok_r) + next_state = CTRL_PRECHARGE; + end else if (wrburst_ok_r && wr_flag) + next_state = CTRL_BURST_WRITE; + else if ((rd_flag) && (wr_to_rd_ok_r)) + next_state = CTRL_BURST_READ; + end + + CTRL_BURST_READ: begin + if (BURST_LEN_DIV2 == 1) begin + // special case if BL = 2 (i.e. burst lasts only one clk cycle) + if (rd_flag) + next_state = CTRL_BURST_READ; + else + next_state = CTRL_READ_WAIT; + end else + next_state = CTRL_READ_WAIT; + end + + CTRL_READ_WAIT: begin + if ((conflict_detect) || auto_ref_r)begin + if (no_precharge_r && ~auto_ref_r && rdburst_ok_r) + next_state = CTRL_ACTIVE; + else if (precharge_ok_r) + next_state = CTRL_PRECHARGE; + // for burst of 4 in multi chip select + // if there is a change in cs wait one cycle before the + // next read command. cs_change_r will be asserted. + end else if (rdburst_ok_r && rd_flag && ~cs_change_r) + next_state = CTRL_BURST_READ; + else if (wr_flag && (rd_to_wr_ok_r)) + next_state = CTRL_BURST_WRITE; + end + endcase + end + + //*************************************************************************** + // control signals to memory + //*************************************************************************** + + always @(posedge clk) begin + if ((state_r == CTRL_AUTO_REFRESH) || + (state_r == CTRL_ACTIVE) || + (state_r == CTRL_PRECHARGE)) begin + ddr_ras_n_r <= 1'b0; + two_t_enable_r[0] <= 1'b0; + end else begin + if (TWO_T_TIME_EN) + ddr_ras_n_r <= two_t_enable_r[0] ; + else + ddr_ras_n_r <= 1'd1; + two_t_enable_r[0] <= 1'b1; + end + end + + always @(posedge clk)begin + if ((state_r == CTRL_BURST_WRITE) || + (state_r == CTRL_BURST_READ) || + (state_r == CTRL_AUTO_REFRESH)) begin + ddr_cas_n_r <= 1'b0; + two_t_enable_r[1] <= 1'b0; + end else begin + if (TWO_T_TIME_EN) + ddr_cas_n_r <= two_t_enable_r[1]; + else + ddr_cas_n_r <= 1'b1; + two_t_enable_r[1] <= 1'b1; + end + end + + always @(posedge clk) begin + if ((state_r == CTRL_BURST_WRITE) || + (state_r == CTRL_PRECHARGE)) begin + ddr_we_n_r <= 1'b0; + two_t_enable_r[2] <= 1'b0; + end else begin + if(TWO_T_TIME_EN) + ddr_we_n_r <= two_t_enable_r[2]; + else + ddr_we_n_r <= 1'b1; + two_t_enable_r[2] <= 1'b1; + end + end + + // turn off auto-precharge when issuing commands (A10 = 0) + // mapping the col add for linear addressing. + generate + if (TWO_T_TIME_EN) begin: gen_addr_col_two_t + if (COL_WIDTH == ROW_WIDTH-1) begin: gen_ddr_addr_col_0 + assign ddr_addr_col = {af_addr_r3[COL_WIDTH-1:10], 1'b0, + af_addr_r3[9:0]}; + end else begin + if (COL_WIDTH > 10) begin: gen_ddr_addr_col_1 + assign ddr_addr_col = {{(ROW_WIDTH-COL_WIDTH-1){1'b0}}, + af_addr_r3[COL_WIDTH-1:10], 1'b0, + af_addr_r3[9:0]}; + end else begin: gen_ddr_addr_col_2 + assign ddr_addr_col = {{(ROW_WIDTH-COL_WIDTH-1){1'b0}}, 1'b0, + af_addr_r3[COL_WIDTH-1:0]}; + end + end + end else begin: gen_addr_col_one_t + if (COL_WIDTH == ROW_WIDTH-1) begin: gen_ddr_addr_col_0_1 + assign ddr_addr_col = {af_addr_r2[COL_WIDTH-1:10], 1'b0, + af_addr_r2[9:0]}; + end else begin + if (COL_WIDTH > 10) begin: gen_ddr_addr_col_1_1 + assign ddr_addr_col = {{(ROW_WIDTH-COL_WIDTH-1){1'b0}}, + af_addr_r2[COL_WIDTH-1:10], 1'b0, + af_addr_r2[9:0]}; + end else begin: gen_ddr_addr_col_2_1 + assign ddr_addr_col = {{(ROW_WIDTH-COL_WIDTH-1){1'b0}}, 1'b0, + af_addr_r2[COL_WIDTH-1:0]}; + end + end + end + endgenerate + + // Assign address during row activate + generate + if (TWO_T_TIME_EN) + assign ddr_addr_row = af_addr_r3[ROW_RANGE_END:ROW_RANGE_START]; + else + assign ddr_addr_row = af_addr_r2[ROW_RANGE_END:ROW_RANGE_START]; + endgenerate + + + always @(posedge clk)begin + if ((state_r == CTRL_ACTIVE) || + ((state_r1 == CTRL_ACTIVE) && TWO_T_TIME_EN)) + ddr_addr_r <= ddr_addr_row; + else if ((state_r == CTRL_BURST_WRITE) || + (state_r == CTRL_BURST_READ) || + (((state_r1 == CTRL_BURST_WRITE) || + (state_r1 == CTRL_BURST_READ)) && + TWO_T_TIME_EN)) + ddr_addr_r <= ddr_addr_col; + else if (((state_r == CTRL_PRECHARGE) || + ((state_r1 == CTRL_PRECHARGE) && TWO_T_TIME_EN)) + && auto_ref_r) begin + // if we're precharging as a result of AUTO-REFRESH, precharge all banks + ddr_addr_r <= {ROW_WIDTH{1'b0}}; + ddr_addr_r[10] <= 1'b1; + end else if ((state_r == CTRL_PRECHARGE) || + ((state_r1 == CTRL_PRECHARGE) && TWO_T_TIME_EN)) + // if we're precharging to close a specific bank/row, set A10=0 + ddr_addr_r <= {ROW_WIDTH{1'b0}}; + else + ddr_addr_r <= {ROW_WIDTH{1'bx}}; + end + + always @(posedge clk)begin + // whenever we're precharging, we're either: (1) precharging all banks (in + // which case banks bits are don't care, (2) precharging the LRU bank, + // b/c we've exceeded the limit of # of banks open (need to close the LRU + // bank to make room for a new one), (3) we haven't exceed the maximum # + // of banks open, but we trying to open a different row in a bank that's + // already open + if (((state_r == CTRL_PRECHARGE) || + ((state_r1 == CTRL_PRECHARGE) && TWO_T_TIME_EN)) && + bank_conflict_r && MULTI_BANK_EN) + // When LRU bank needs to be closed + ddr_ba_r <= bank_cmp_addr_r[(3*CMP_WIDTH)+CMP_BANK_RANGE_END: + (3*CMP_WIDTH)+CMP_BANK_RANGE_START]; + else begin + // Either precharge due to refresh or bank hit case + if (TWO_T_TIME_EN) + ddr_ba_r <= af_addr_r3[BANK_RANGE_END:BANK_RANGE_START]; + else + ddr_ba_r <= af_addr_r2[BANK_RANGE_END:BANK_RANGE_START]; + end + end + + // chip enable generation logic + generate + // if only one chip select, always assert it after reset + if (CS_BITS == 0) begin: gen_ddr_cs_0 + always @(posedge clk) + if (rst_r1) + ddr_cs_n_r[0] <= 1'b1; + else + ddr_cs_n_r[0] <= 1'b0; + // otherwise if we have multiple chip selects + end else begin: gen_ddr_cs_1 + if(TWO_T_TIME_EN) begin: gen_2t_cs + always @(posedge clk) + if (rst_r1) + ddr_cs_n_r <= {CS_NUM{1'b1}}; + else if ((state_r1 == CTRL_AUTO_REFRESH)) begin + // if auto-refreshing, only auto-refresh one CS at any time (avoid + // beating on the ground plane by refreshing all CS's at same time) + ddr_cs_n_r <= {CS_NUM{1'b1}}; + ddr_cs_n_r[auto_cnt_r] <= 1'b0; + end else if (auto_ref_r && (state_r1 == CTRL_PRECHARGE)) begin + ddr_cs_n_r <= {CS_NUM{1'b0}}; + end else if ((state_r1 == CTRL_PRECHARGE) && ( bank_conflict_r + && MULTI_BANK_EN))begin + // precharging the LRU bank + ddr_cs_n_r <= {CS_NUM{1'b1}}; + ddr_cs_n_r[bank_cmp_addr_r[(3*CMP_WIDTH)+CMP_CS_RANGE_END: + (3*CMP_WIDTH)+CMP_CS_RANGE_START]] <= 1'b0; + end else begin + // otherwise, check the upper address bits to see which CS to assert + ddr_cs_n_r <= {CS_NUM{1'b1}}; + ddr_cs_n_r[af_addr_r3[CS_RANGE_END:CS_RANGE_START]] <= 1'b0; + end // else: !if(((state_r == CTRL_PRECHARGE) ||... + end else begin: gen_1t_cs // block: gen_2t_cs + always @(posedge clk) + if (rst_r1) + ddr_cs_n_r <= {CS_NUM{1'b1}}; + else if ((state_r == CTRL_AUTO_REFRESH) ) begin + // if auto-refreshing, only auto-refresh one CS at any time (avoid + // beating on the ground plane by refreshing all CS's at same time) + ddr_cs_n_r <= {CS_NUM{1'b1}}; + ddr_cs_n_r[auto_cnt_r] <= 1'b0; + end else if (auto_ref_r && (state_r == CTRL_PRECHARGE) ) begin + ddr_cs_n_r <= {CS_NUM{1'b0}}; + end else if ((state_r == CTRL_PRECHARGE) && + (bank_conflict_r && MULTI_BANK_EN))begin + // precharging the LRU bank + ddr_cs_n_r <= {CS_NUM{1'b1}}; + ddr_cs_n_r[bank_cmp_addr_r[(3*CMP_WIDTH)+CMP_CS_RANGE_END: + (3*CMP_WIDTH)+CMP_CS_RANGE_START]] <= 1'b0; + end else begin + // otherwise, check the upper address bits to see which CS to assert + ddr_cs_n_r <= {CS_NUM{1'b1}}; + ddr_cs_n_r[af_addr_r2[CS_RANGE_END:CS_RANGE_START]] <= 1'b0; + end // else: !if(((state_r == CTRL_PRECHARGE) ||... + end // block: gen_1t_cs + end + endgenerate + + // registring the two_t timing enable signal. + // This signal will be asserted (low) when the + // chip select has to be asserted. + always @(posedge clk)begin + if(&two_t_enable_r) + two_t_enable_r1 <= {CS_NUM{1'b1}}; + else + two_t_enable_r1 <= {CS_NUM{1'b0}}; + end + + assign ctrl_addr = ddr_addr_r; + assign ctrl_ba = ddr_ba_r; + assign ctrl_ras_n = ddr_ras_n_r; + assign ctrl_cas_n = ddr_cas_n_r; + assign ctrl_we_n = ddr_we_n_r; + assign ctrl_cs_n = (TWO_T_TIME_EN) ? + (ddr_cs_n_r | two_t_enable_r1) : + ddr_cs_n_r; + +endmodule + diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_idelay_ctrl.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_idelay_ctrl.v new file mode 100644 index 0000000..5742feb --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_idelay_ctrl.v @@ -0,0 +1,97 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_idelay_ctrl.v +// /___/ /\ Date Last Modified: $Date: 2008/05/08 15:20:47 $ +// \ \ / \ Date Created: Wed Aug 16 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +// This module instantiates the IDELAYCTRL primitive of the Virtex-5 device +// which continuously calibrates the IDELAY elements in the region in case of +// varying operating conditions. It takes a 200MHz clock as an input +//Reference: +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_idelay_ctrl # + ( + // Following parameters are for 72-bit RDIMM design (for ML561 Reference + // board design). Actual values may be different. Actual parameters values + // are passed from design top module ddr2_sdram module. Please refer to + // the ddr2_sdram module for actual values. + parameter IDELAYCTRL_NUM = 4 + ) + + ( + input clk200, + input rst200, + output idelay_ctrl_rdy + ); + +wire [IDELAYCTRL_NUM-1 : 0] idelay_ctrl_rdy_i; + +genvar bnk_i; +generate +for(bnk_i=0; bnk_i CAS delay) + 3 (min CAS latency) = 6.5 + // - Minimum time for DQS gate circuit to be generated: + // * 1 cyc to register CTRL_RDEN from controller + // * 1 cyc after RDEN_CTRL falling edge + // * 1 cyc min through SRL32 + // * 1 cyc through SRL32 output flop + // * 0 (<1) cyc of synchronization to DQS domain via IDELAY + // * 1 cyc of delay through IDDR to generate CE to DQ IDDR's + // Total = 5 cyc < 6.5 cycles + // The total should be less than 5.5 cycles to account prop delays + // adding one cycle to the synchronization time via the IDELAY. + // NOTE: Value differs because of optional pipeline register added + // for case of RDEN_BASE_DELAY > 3 to improve timing + localparam GATE_BASE_DELAY = RDEN_BASE_DELAY - 3; + localparam GATE_BASE_INIT = (GATE_BASE_DELAY <= 1) ? 0 : GATE_BASE_DELAY; + // used for RDEN calibration: difference between shift value used during + // calibration, and shift value for actual RDEN SRL. Only applies when + // RDEN edge is immediately captured by CLKDIV0. If not (depends on phase + // of CLK0 and CLKDIV0 when RDEN is asserted), then add 1 to this value. + localparam CAL3_RDEN_SRL_DLY_DELTA = 6; + // fix minimum value of DQS to be 1 to handle the case where's there's only + // one DQS group. We could also enforce that user always inputs minimum + // value of 1 for DQS_BITS (even when DQS_WIDTH=1). Leave this as safeguard + // Assume we don't have to do this for DQ, DQ_WIDTH always > 1 + localparam DQS_BITS_FIX = (DQS_BITS == 0) ? 1 : DQS_BITS; + // how many taps to "pre-delay" DQ before stg 1 calibration - not needed for + // current calibration, but leave for debug + localparam DQ_IDEL_INIT = 6'b000000; + // # IDELAY taps per bit time (i.e. half cycle). Limit to 63. + localparam integer BIT_TIME_TAPS = (CLK_PERIOD/150 < 64) ? + CLK_PERIOD/150 : 63; + + // used in various places during stage 4 cal: (1) determines maximum taps + // to increment when finding right edge, (2) amount to decrement after + // finding left edge, (3) amount to increment after finding right edge + localparam CAL4_IDEL_BIT_VAL = (BIT_TIME_TAPS >= 6'b100000) ? + 6'b100000 : BIT_TIME_TAPS; + + localparam CAL1_IDLE = 4'h0; + localparam CAL1_INIT = 4'h1; + localparam CAL1_INC_IDEL = 4'h2; + localparam CAL1_FIND_FIRST_EDGE = 4'h3; + localparam CAL1_FIRST_EDGE_IDEL_WAIT = 4'h4; + localparam CAL1_FOUND_FIRST_EDGE_WAIT = 4'h5; + localparam CAL1_FIND_SECOND_EDGE = 4'h6; + localparam CAL1_SECOND_EDGE_IDEL_WAIT = 4'h7; + localparam CAL1_CALC_IDEL = 4'h8; + localparam CAL1_DEC_IDEL = 4'h9; + localparam CAL1_DONE = 4'hA; + + localparam CAL2_IDLE = 4'h0; + localparam CAL2_INIT = 4'h1; + localparam CAL2_INIT_IDEL_WAIT = 4'h2; + localparam CAL2_FIND_EDGE_POS = 4'h3; + localparam CAL2_FIND_EDGE_IDEL_WAIT_POS = 4'h4; + localparam CAL2_FIND_EDGE_NEG = 4'h5; + localparam CAL2_FIND_EDGE_IDEL_WAIT_NEG = 4'h6; + localparam CAL2_DEC_IDEL = 4'h7; + localparam CAL2_DONE = 4'h8; + + localparam CAL3_IDLE = 3'h0; + localparam CAL3_INIT = 3'h1; + localparam CAL3_DETECT = 3'h2; + localparam CAL3_RDEN_PIPE_CLR_WAIT = 3'h3; + localparam CAL3_DONE = 3'h4; + + localparam CAL4_IDLE = 3'h0; + localparam CAL4_INIT = 3'h1; + localparam CAL4_FIND_WINDOW = 3'h2; + localparam CAL4_FIND_EDGE = 3'h3; + localparam CAL4_IDEL_WAIT = 3'h4; + localparam CAL4_RDEN_PIPE_CLR_WAIT = 3'h5; + localparam CAL4_ADJ_IDEL = 3'h6; + localparam CAL4_DONE = 3'h7; + + integer i, j; + + reg [5:0] cal1_bit_time_tap_cnt; + reg [1:0] cal1_data_chk_last; + reg cal1_data_chk_last_valid; + reg [1:0] cal1_data_chk_r; + reg cal1_dlyce_dq; + reg cal1_dlyinc_dq; + reg cal1_dqs_dq_init_phase; + reg cal1_detect_edge; + reg cal1_detect_stable; + reg cal1_found_second_edge; + reg cal1_found_rising; + reg cal1_found_window; + reg cal1_first_edge_done; + reg [5:0] cal1_first_edge_tap_cnt; + reg [6:0] cal1_idel_dec_cnt; + reg [5:0] cal1_idel_inc_cnt; + reg [5:0] cal1_idel_max_tap; + reg cal1_idel_max_tap_we; + reg [5:0] cal1_idel_tap_cnt; + reg cal1_idel_tap_limit_hit; + reg [6:0] cal1_low_freq_idel_dec; + reg cal1_ref_req; + wire cal1_refresh; + reg [3:0] cal1_state; + reg [3:0] cal1_window_cnt; + reg cal2_curr_sel; + wire cal2_detect_edge; + reg cal2_dlyce_dqs; + reg cal2_dlyinc_dqs; + reg [5:0] cal2_idel_dec_cnt; + reg [5:0] cal2_idel_tap_cnt; + reg [5:0] cal2_idel_tap_limit; + reg cal2_idel_tap_limit_hit; + reg cal2_rd_data_fall_last_neg; + reg cal2_rd_data_fall_last_pos; + reg cal2_rd_data_last_valid_neg; + reg cal2_rd_data_last_valid_pos; + reg cal2_rd_data_rise_last_neg; + reg cal2_rd_data_rise_last_pos; + reg [DQS_WIDTH-1:0] cal2_rd_data_sel; + wire cal2_rd_data_sel_edge; + reg [DQS_WIDTH-1:0] cal2_rd_data_sel_r; + reg cal2_ref_req; + reg [3:0] cal2_state; + reg cal3_data_match; + reg cal3_data_match_stgd; + wire cal3_data_valid; + wire cal3_match_found; + wire [4:0] cal3_rden_dly; + reg [4:0] cal3_rden_srl_a; + reg [2:0] cal3_state; + wire cal4_data_good; + reg cal4_data_match; + reg cal4_data_match_stgd; + wire cal4_data_valid; + reg cal4_dlyce_gate; + reg cal4_dlyinc_gate; + reg cal4_dlyrst_gate; + reg [4:0] cal4_gate_srl_a; + reg [5:0] cal4_idel_adj_cnt; + reg cal4_idel_adj_inc; + reg cal4_idel_bit_tap; + reg [5:0] cal4_idel_tap_cnt; + reg cal4_idel_max_tap; + reg [4:0] cal4_rden_srl_a; + reg cal4_ref_req; + reg cal4_seek_left; + reg cal4_stable_window; + reg [2:0] cal4_state; + reg [3:0] cal4_window_cnt; + reg [3:0] calib_done_tmp; // only for stg1/2/4 + reg calib_ctrl_gate_pulse_r; + reg calib_ctrl_rden; + reg calib_ctrl_rden_r; + wire calib_ctrl_rden_negedge; + reg calib_ctrl_rden_negedge_r; + reg [3:0] calib_done_r; + reg [3:0] calib_err; + reg [1:0] calib_err_2; + wire calib_init_gate_pulse; + reg calib_init_gate_pulse_r; + reg calib_init_gate_pulse_r1; + reg calib_init_rden; + reg calib_init_rden_r; + reg [4:0] calib_rden_srl_a; + wire [4:0] calib_rden_srl_a_r; + reg [(5*DQS_WIDTH)-1:0] calib_rden_dly; + reg calib_rden_edge_r; + reg [4:0] calib_rden_pipe_cnt; + wire calib_rden_srl_out; + wire calib_rden_srl_out_r; + reg calib_rden_srl_out_r1; + reg calib_rden_valid; + reg calib_rden_valid_stgd; + reg [DQ_BITS-1:0] count_dq; + reg [DQS_BITS_FIX-1:0] count_dqs; + reg [DQS_BITS_FIX-1:0] count_gate; + reg [DQS_BITS_FIX-1:0] count_rden; + reg ctrl_rden_r; + wire dlyce_or; + reg [(5*DQS_WIDTH)-1:0] gate_dly; + wire [(5*DQS_WIDTH)-1:0] gate_dly_r; + wire gate_srl_in; + wire [DQS_WIDTH-1:0] gate_srl_out; + wire [DQS_WIDTH-1:0] gate_srl_out_r; + reg [2:0] idel_set_cnt; + wire idel_set_wait; + reg [DQ_BITS-1:0] next_count_dq; + reg [DQS_BITS_FIX-1:0] next_count_dqs; + reg [DQS_BITS_FIX-1:0] next_count_gate; + reg phy_init_rden_r; + reg phy_init_rden_r1; + reg [DQ_WIDTH-1:0] rd_data_fall_1x_r; + reg [DQS_WIDTH-1:0] rd_data_fall_1x_r1; + reg [DQS_WIDTH-1:0] rd_data_fall_2x_r; + wire [DQS_WIDTH-1:0] rd_data_fall_chk_q1; + wire [DQS_WIDTH-1:0] rd_data_fall_chk_q2; + reg [DQ_WIDTH-1:0] rd_data_rise_1x_r; + reg [DQS_WIDTH-1:0] rd_data_rise_1x_r1; + reg [DQS_WIDTH-1:0] rd_data_rise_2x_r; + wire [DQS_WIDTH-1:0] rd_data_rise_chk_q1; + wire [DQS_WIDTH-1:0] rd_data_rise_chk_q2; + reg rdd_fall_q1; + reg rdd_fall_q1_r; + reg rdd_fall_q1_r1; + reg rdd_fall_q2; + reg rdd_fall_q2_r; + reg rdd_rise_q1; + reg rdd_rise_q1_r; + reg rdd_rise_q1_r1; + reg rdd_rise_q2; + reg rdd_rise_q2_r; + reg [DQS_BITS_FIX-1:0] rdd_mux_sel; + reg rden_dec; + reg [(5*DQS_WIDTH)-1:0] rden_dly; + wire [(5*DQS_WIDTH)-1:0] rden_dly_r; + reg [4:0] rden_dly_0; + reg rden_inc; + reg [DQS_WIDTH-1:0] rden_mux; + wire [DQS_WIDTH-1:0] rden_srl_out; + + // Debug + integer x; + reg [5:0] dbg_dq_tap_cnt [DQ_WIDTH-1:0]; + reg [5:0] dbg_dqs_tap_cnt [DQS_WIDTH-1:0]; + reg [5:0] dbg_gate_tap_cnt [DQS_WIDTH-1:0]; + + //*************************************************************************** + // Debug output ("dbg_phy_calib_*") + // NOTES: + // 1. All debug outputs coming out of PHY_CALIB are clocked off CLKDIV0, + // although they are also static after calibration is complete. This + // means the user can either connect them to a Chipscope ILA, or to + // either a sync/async VIO input block. Using an async VIO has the + // advantage of not requiring these paths to meet cycle-to-cycle timing. + // 2. The widths of most of these debug buses are dependent on the # of + // DQS/DQ bits (e.g. dq_tap_cnt width = 6 * (# of DQ bits) + // SIGNAL DESCRIPTION: + // 1. calib_done: 4 bits - each one asserted as each phase of calibration + // is completed. + // 2. calib_err: 4 bits - each one asserted when a calibration error + // encountered for that stage. Some of these bits may not + // be used (not all cal stages report an error). + // 3. dq_tap_cnt: final IDELAY tap counts for all DQ IDELAYs + // 4. dqs_tap_cnt: final IDELAY tap counts for all DQS IDELAYs + // 5. gate_tap_cnt: final IDELAY tap counts for all DQS gate + // synchronization IDELAYs + // 6. rd_data_sel: final read capture MUX (either "positive" or "negative" + // edge capture) settings for all DQS groups + // 7. rden_dly: related to # of cycles after issuing a read until when + // read data is valid - for all DQS groups + // 8. gate_dly: related to # of cycles after issuing a read until when + // clock enable for all DQ's is deasserted to prevent + // effect of DQS postamble glitch - for all DQS groups + //*************************************************************************** + + //***************************************************************** + // Record IDELAY tap values by "snooping" IDELAY control signals + //***************************************************************** + + // record DQ IDELAY tap values + genvar dbg_dq_tc_i; + generate + for (dbg_dq_tc_i = 0; dbg_dq_tc_i < DQ_WIDTH; + dbg_dq_tc_i = dbg_dq_tc_i + 1) begin: gen_dbg_dq_tap_cnt + assign dbg_calib_dq_tap_cnt[(6*dbg_dq_tc_i)+5:(6*dbg_dq_tc_i)] + = dbg_dq_tap_cnt[dbg_dq_tc_i]; + always @(posedge clkdiv) + if (rstdiv | dlyrst_dq) + dbg_dq_tap_cnt[dbg_dq_tc_i] <= 6'b000000; + else + if (dlyce_dq[dbg_dq_tc_i]) + if (dlyinc_dq[dbg_dq_tc_i]) + dbg_dq_tap_cnt[dbg_dq_tc_i] + <= dbg_dq_tap_cnt[dbg_dq_tc_i] + 1; + else + dbg_dq_tap_cnt[dbg_dq_tc_i] + <= dbg_dq_tap_cnt[dbg_dq_tc_i] - 1; + end + endgenerate + + // record DQS IDELAY tap values + genvar dbg_dqs_tc_i; + generate + for (dbg_dqs_tc_i = 0; dbg_dqs_tc_i < DQS_WIDTH; + dbg_dqs_tc_i = dbg_dqs_tc_i + 1) begin: gen_dbg_dqs_tap_cnt + assign dbg_calib_dqs_tap_cnt[(6*dbg_dqs_tc_i)+5:(6*dbg_dqs_tc_i)] + = dbg_dqs_tap_cnt[dbg_dqs_tc_i]; + always @(posedge clkdiv) + if (rstdiv | dlyrst_dqs) + dbg_dqs_tap_cnt[dbg_dqs_tc_i] <= 6'b000000; + else + if (dlyce_dqs[dbg_dqs_tc_i]) + if (dlyinc_dqs[dbg_dqs_tc_i]) + dbg_dqs_tap_cnt[dbg_dqs_tc_i] + <= dbg_dqs_tap_cnt[dbg_dqs_tc_i] + 1; + else + dbg_dqs_tap_cnt[dbg_dqs_tc_i] + <= dbg_dqs_tap_cnt[dbg_dqs_tc_i] - 1; + end + endgenerate + + // record DQS gate IDELAY tap values + genvar dbg_gate_tc_i; + generate + for (dbg_gate_tc_i = 0; dbg_gate_tc_i < DQS_WIDTH; + dbg_gate_tc_i = dbg_gate_tc_i + 1) begin: gen_dbg_gate_tap_cnt + assign dbg_calib_gate_tap_cnt[(6*dbg_gate_tc_i)+5:(6*dbg_gate_tc_i)] + = dbg_gate_tap_cnt[dbg_gate_tc_i]; + always @(posedge clkdiv) + if (rstdiv | dlyrst_gate[dbg_gate_tc_i]) + dbg_gate_tap_cnt[dbg_gate_tc_i] <= 6'b000000; + else + if (dlyce_gate[dbg_gate_tc_i]) + if (dlyinc_gate[dbg_gate_tc_i]) + dbg_gate_tap_cnt[dbg_gate_tc_i] + <= dbg_gate_tap_cnt[dbg_gate_tc_i] + 1; + else + dbg_gate_tap_cnt[dbg_gate_tc_i] + <= dbg_gate_tap_cnt[dbg_gate_tc_i] - 1; + end + endgenerate + + assign dbg_calib_done = calib_done; + assign dbg_calib_err = calib_err; + assign dbg_calib_rd_data_sel = cal2_rd_data_sel; + assign dbg_calib_rden_dly = rden_dly; + assign dbg_calib_gate_dly = gate_dly; + + //*************************************************************************** + // Read data pipelining, and read data "ISERDES" data width expansion + //*************************************************************************** + + // For all data bits, register incoming capture data to slow clock to improve + // timing. Adding single pipeline stage does not affect functionality (as + // long as we make sure to wait extra clock cycle after changing DQ IDELAY) + // Also note in this case that we're "missing" every other clock cycle's + // worth of data capture since we're sync'ing to the slow clock. This is + // fine for stage 1 and stage 2 cal, but not for stage 3 and 4 (see below + // for different circuit to handle those stages) + always @(posedge clkdiv) begin + rd_data_rise_1x_r <= rd_data_rise; + rd_data_fall_1x_r <= rd_data_fall; + end + + // For every DQ_PER_DQS bit, generate what is essentially a ISERDES-type + // data width expander. Will need this for stage 3 and 4 cal, where we need + // to compare data over consecutive clock cycles. We can also use this for + // stage 2 as well (stage 2 doesn't require every bit to be looked at, only + // one bit per DQS group) + genvar rdd_i; + generate + for (rdd_i = 0; rdd_i < DQS_WIDTH; rdd_i = rdd_i + 1) begin: gen_rdd + // first stage: keep data in fast clk domain. Store data over two + // consecutive clock cycles for rise/fall data for proper transfer + // to slow clock domain + always @(posedge clk) begin + rd_data_rise_2x_r[rdd_i] <= rd_data_rise[(rdd_i*DQ_PER_DQS)]; + rd_data_fall_2x_r[rdd_i] <= rd_data_fall[(rdd_i*DQ_PER_DQS)]; + end + // second stage, register first stage to slow clock domain, 2nd stage + // consists of both these flops, and the rd_data_rise_1x_r flops + always @(posedge clkdiv) begin + rd_data_rise_1x_r1[rdd_i] <= rd_data_rise_2x_r[rdd_i]; + rd_data_fall_1x_r1[rdd_i] <= rd_data_fall_2x_r[rdd_i]; + end + // now we have four outputs - representing rise/fall outputs over last + // 2 fast clock cycles. However, the ordering these represent can either + // be: (1) Q2 = data @ time = n, Q1 = data @ time = n+1, or (2) + // Q2 = data @ time = n - 1, Q1 = data @ time = n (and data at [Q1,Q2] + // is "staggered") - leave it up to the stage of calibration using this + // to figure out which is which, if they care at all (e.g. stage 2 cal + // doesn't care about the ordering) + assign rd_data_rise_chk_q1[rdd_i] + = rd_data_rise_1x_r[(rdd_i*DQ_PER_DQS)]; + assign rd_data_rise_chk_q2[rdd_i] + = rd_data_rise_1x_r1[rdd_i]; + assign rd_data_fall_chk_q1[rdd_i] + = rd_data_fall_1x_r[(rdd_i*DQ_PER_DQS)]; + assign rd_data_fall_chk_q2[rdd_i] + = rd_data_fall_1x_r1[rdd_i]; + end + endgenerate + + //***************************************************************** + // Outputs of these simplified ISERDES circuits then feed MUXes based on + // which DQ the current calibration algorithm needs to look at + //***************************************************************** + + // generate MUX control; assume that adding an extra pipeline stage isn't + // an issue - whatever stage cal logic is using output of MUX will wait + // enough time after changing it + always @(posedge clkdiv) begin + (* full_case, parallel_case *) case (calib_done[2:0]) + 3'b001: rdd_mux_sel <= next_count_dqs; + 3'b011: rdd_mux_sel <= count_rden; + 3'b111: rdd_mux_sel <= next_count_gate; + endcase + end + + always @(posedge clkdiv) begin + rdd_rise_q1 <= rd_data_rise_chk_q1[rdd_mux_sel]; + rdd_rise_q2 <= rd_data_rise_chk_q2[rdd_mux_sel]; + rdd_fall_q1 <= rd_data_fall_chk_q1[rdd_mux_sel]; + rdd_fall_q2 <= rd_data_fall_chk_q2[rdd_mux_sel]; + end + + //*************************************************************************** + // Demultiplexor to control (reset, increment, decrement) IDELAY tap values + // For DQ: + // STG1: for per-bit-deskew, only inc/dec the current DQ. For non-per + // deskew, increment all bits in the current DQS set + // STG2: inc/dec all DQ's in the current DQS set. + // NOTE: Nice to add some error checking logic here (or elsewhere in the + // code) to check if logic attempts to overflow tap value + //*************************************************************************** + + // don't use DLYRST to reset value of IDELAY after reset. Need to change this + // if we want to allow user to recalibrate after initial reset + always @(posedge clkdiv) + if (rstdiv) begin + dlyrst_dq <= 1'b1; + dlyrst_dqs <= 1'b1; + end else begin + dlyrst_dq <= 1'b0; + dlyrst_dqs <= 1'b0; + end + + always @(posedge clkdiv) begin + if (rstdiv) begin + dlyce_dq <= 'b0; + dlyinc_dq <= 'b0; + dlyce_dqs <= 'b0; + dlyinc_dqs <= 'b0; + end else begin + dlyce_dq <= 'b0; + dlyinc_dq <= 'b0; + dlyce_dqs <= 'b0; + dlyinc_dqs <= 'b0; + + // stage 1 cal: change only specified DQ + if (cal1_dlyce_dq) begin + if (SIM_ONLY == 0) begin + dlyce_dq[count_dq] <= 1'b1; + dlyinc_dq[count_dq] <= cal1_dlyinc_dq; + end else begin + // if simulation, then calibrate only first DQ, apply results + // to all DQs (i.e. assume delay on all DQs is the same) + for (i = 0; i < DQ_WIDTH; i = i + 1) begin: loop_sim_dq_dly + dlyce_dq[i] <= 1'b1; + dlyinc_dq[i] <= cal1_dlyinc_dq; + end + end + end else if (cal2_dlyce_dqs) begin + // stage 2 cal: change DQS and all corresponding DQ's + if (SIM_ONLY == 0) begin + dlyce_dqs[count_dqs] <= 1'b1; + dlyinc_dqs[count_dqs] <= cal2_dlyinc_dqs; + for (i = 0; i < DQ_PER_DQS; i = i + 1) begin: loop_dqs_dly + dlyce_dq[(DQ_PER_DQS*count_dqs)+i] <= 1'b1; + dlyinc_dq[(DQ_PER_DQS*count_dqs)+i] <= cal2_dlyinc_dqs; + end + end else begin + for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_dqs_dly + // if simulation, then calibrate only first DQS + dlyce_dqs[i] <= 1'b1; + dlyinc_dqs[i] <= cal2_dlyinc_dqs; + for (j = 0; j < DQ_PER_DQS; j = j + 1) begin: loop_sim_dq_dqs_dly + dlyce_dq[(DQ_PER_DQS*i)+j] <= 1'b1; + dlyinc_dq[(DQ_PER_DQS*i)+j] <= cal2_dlyinc_dqs; + end + end + end + end else if (DEBUG_EN != 0) begin + // DEBUG: allow user to vary IDELAY tap settings + // For DQ IDELAY taps + if (dbg_idel_up_all || dbg_idel_down_all || + dbg_sel_all_idel_dq) begin + for (x = 0; x < DQ_WIDTH; x = x + 1) begin: loop_dly_inc_dq + dlyce_dq[x] <= dbg_idel_up_all | dbg_idel_down_all | + dbg_idel_up_dq | dbg_idel_down_dq; + dlyinc_dq[x] <= dbg_idel_up_all | dbg_idel_up_dq; + end + end else begin + dlyce_dq <= 'b0; + dlyce_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq | + dbg_idel_down_dq; + dlyinc_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq; + end + // For DQS IDELAY taps + if (dbg_idel_up_all || dbg_idel_down_all || + dbg_sel_all_idel_dqs) begin + for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_dqs + dlyce_dqs[x] <= dbg_idel_up_all | dbg_idel_down_all | + dbg_idel_up_dqs | dbg_idel_down_dqs; + dlyinc_dqs[x] <= dbg_idel_up_all | dbg_idel_up_dqs; + end + end else begin + dlyce_dqs <= 'b0; + dlyce_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs | + dbg_idel_down_dqs; + dlyinc_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs; + end + end + end + end + + // GATE synchronization is handled directly by Stage 4 calibration FSM + always @(posedge clkdiv) + if (rstdiv) begin + dlyrst_gate <= {DQS_WIDTH{1'b1}}; + dlyce_gate <= {DQS_WIDTH{1'b0}}; + dlyinc_gate <= {DQS_WIDTH{1'b0}}; + end else begin + dlyrst_gate <= {DQS_WIDTH{1'b0}}; + dlyce_gate <= {DQS_WIDTH{1'b0}}; + dlyinc_gate <= {DQS_WIDTH{1'b0}}; + + if (cal4_dlyrst_gate) begin + if (SIM_ONLY == 0) + dlyrst_gate[count_gate] <= 1'b1; + else + for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly_rst + dlyrst_gate[i] <= 1'b1; + end + end + + if (cal4_dlyce_gate) begin + if (SIM_ONLY == 0) begin + dlyce_gate[count_gate] <= 1'b1; + dlyinc_gate[count_gate] <= cal4_dlyinc_gate; + end else begin + // if simulation, then calibrate only first gate + for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly + dlyce_gate[i] <= 1'b1; + dlyinc_gate[i] <= cal4_dlyinc_gate; + end + end + end else if (DEBUG_EN != 0) begin + // DEBUG: allow user to vary IDELAY tap settings + if (dbg_idel_up_all || dbg_idel_down_all || + dbg_sel_all_idel_gate) begin + for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_gate + dlyce_gate[x] <= dbg_idel_up_all | dbg_idel_down_all | + dbg_idel_up_gate | dbg_idel_down_gate; + dlyinc_gate[x] <= dbg_idel_up_all | dbg_idel_up_gate; + end + end else begin + dlyce_gate <= {DQS_WIDTH{1'b0}}; + dlyce_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate | + dbg_idel_down_gate; + dlyinc_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate; + end + end + end + + //*************************************************************************** + // signal to tell calibration state machines to wait and give IDELAY time to + // settle after it's value is changed (both time for IDELAY chain to settle, + // and for settled output to propagate through ISERDES). For general use: use + // for any calibration state machines that modify any IDELAY. + // Should give at least enough time for IDELAY output to settle (technically + // for V5, this should be "glitchless" when IDELAY taps are changed, so don't + // need any time here), and also time for new data to propagate through both + // ISERDES and the "RDD" MUX + associated pipelining + // For now, give very "generous" delay - doesn't really matter since only + // needed during calibration + //*************************************************************************** + + // determine if calibration polarity has changed + always @(posedge clkdiv) + cal2_rd_data_sel_r <= cal2_rd_data_sel; + + assign cal2_rd_data_sel_edge = |(cal2_rd_data_sel ^ cal2_rd_data_sel_r); + + // combine requests to modify any of the IDELAYs into one. Also when second + // stage capture "edge" polarity is changed (IDELAY isn't changed in this + // case, but use the same counter to stall cal logic) + assign dlyce_or = cal1_dlyce_dq | + cal2_dlyce_dqs | + cal2_rd_data_sel_edge | + cal4_dlyce_gate | + cal4_dlyrst_gate; + + // SYN_NOTE: Can later recode to avoid combinational path + assign idel_set_wait = dlyce_or || (idel_set_cnt != IDEL_SET_VAL); + + always @(posedge clkdiv) + if (rstdiv) + idel_set_cnt <= 4'b0000; + else if (dlyce_or) + idel_set_cnt <= 4'b0000; + else if (idel_set_cnt != IDEL_SET_VAL) + idel_set_cnt <= idel_set_cnt + 1; + + // generate request to PHY_INIT logic to issue auto-refresh + // used by certain states to force prech/auto-refresh part way through + // calibration to avoid a tRAS violation (which will happen if that + // stage of calibration lasts long enough). This signal must meet the + // following requirements: (1) only transition from 0->1 when the refresh + // request is needed, (2) stay at 1 and only transition 1->0 when + // CALIB_REF_DONE is asserted + always @(posedge clkdiv) + if (rstdiv) + calib_ref_req <= 1'b0; + else + calib_ref_req <= cal1_ref_req | cal2_ref_req | cal4_ref_req; + + // stage 1 calibration requests auto-refresh every 4 bits + generate + if (DQ_BITS < 2) begin: gen_cal1_refresh_dq_lte4 + assign cal1_refresh = 1'b0; + end else begin: gen_cal1_refresh_dq_gt4 + assign cal1_refresh = (next_count_dq[1:0] == 2'b00); + end + endgenerate + + //*************************************************************************** + // First stage calibration: DQ-DQS + // Definitions: + // edge: detected when varying IDELAY, and current capture data != prev + // capture data + // valid bit window: detected when current capture data == prev capture + // data for more than half the bit time + // starting conditions for DQS-DQ phase: + // case 1: when DQS starts somewhere in rising edge bit window, or + // on the right edge of the rising bit window. + // case 2: when DQS starts somewhere in falling edge bit window, or + // on the right edge of the falling bit window. + // Algorithm Description: + // 1. Increment DQ IDELAY until we find an edge. + // 2. While we're finding the first edge, note whether a valid bit window + // has been detected before we found an edge. If so, then figure out if + // this is the rising or falling bit window. If rising, then our starting + // DQS-DQ phase is case 1. If falling, then it's case 2. If don't detect + // a valid bit window, then we must have started on the edge of a window. + // Need to wait until later on to decide which case we are. + // - Store FIRST_EDGE IDELAY value + // 3. Now look for second edge. + // 4. While we're finding the second edge, note whether valid bit window + // is detected. If so, then use to, along with results from (2) to figure + // out what the starting case is. If in rising bit window, then we're in + // case 2. If falling, then case 1. + // - Store SECOND_EDGE IDELAY value + // NOTES: + // a. Finding two edges allows us to calculate the bit time (although + // not the "same" bit time polarity - need to investigate this + // more). + // b. If we run out of taps looking for the second edge, then the bit + // time must be too long (>= 2.5ns, and DQS-DQ starting phase must be + // case 1). + // 5. Calculate absolute amount to delay DQ as: + // If second edge found, and case 1: + // - DQ_IDELAY = FIRST_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE) + // If second edge found, and case 2: + // - DQ_IDELAY = SECOND_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE) + // If second edge not found, then need to make an approximation on + // how much to shift by (should be okay, because we have more timing + // margin): + // - DQ_IDELAY = FIRST_EDGE - 0.5 * (bit_time) + // NOTE: Does this account for either case 1 or case 2????? + // NOTE: It's also possible even when we find the second edge, that + // to instead just use half the bit time to subtract from either + // FIRST or SECOND_EDGE. Finding the actual bit time (which is + // what (SECOND_EDGE - FIRST_EDGE) is, is slightly more accurate, + // since it takes into account duty cycle distortion. + // 6. Repeat for each DQ in current DQS set. + //*************************************************************************** + + //***************************************************************** + // for first stage calibration - used for checking if DQS is aligned to the + // particular DQ, such that we're in the data valid window. Basically, this + // is one giant MUX. + // = [falling data, rising data] + // = [0, 1] = rising DQS aligned in proper (rising edge) bit window + // = [1, 0] = rising DQS aligned in wrong (falling edge) bit window + // = [0, 0], or [1,1] = in uncertain region between windows + //***************************************************************** + + // SYN_NOTE: May have to split this up into multiple levels - MUX can get + // very wide - as wide as the data bus width + always @(posedge clkdiv) + cal1_data_chk_r <= {rd_data_fall_1x_r[next_count_dq], + rd_data_rise_1x_r[next_count_dq]}; + + //***************************************************************** + // determine when an edge has occurred - when either the current value + // is different from the previous latched value or when the DATA_CHK + // outputs are the same (rare, but indicates that we're at an edge) + // This is only valid when the IDELAY output and propagation of the + // data through the capture flops has had a chance to settle out. + //***************************************************************** + + // write CAL1_DETECT_EDGE and CAL1_DETECT_STABLE in such a way that + // if X's are captured on the bus during functional simulation, that + // the logic will register this as an edge detected. Do this to allow + // use of this HDL with Denali memory models (Denali models drive DQ + // to X's on both edges of the data valid window to simulate jitter) + // This is only done for functional simulation purposes. **Should not** + // make the final synthesized logic more complicated, but it does make + // the HDL harder to understand b/c we have to "phrase" the logic + // slightly differently than when not worrying about X's + always @(*) begin + // no edge found if: (1) we have recorded prev edge, and rise + // data == fall data, (2) we haven't yet recorded prev edge, but + // rise/fall data is equal to either [0,1] or [1,0] (i.e. rise/fall + // data isn't either X's, or [0,0] or [1,1], which indicates we're + // in the middle of an edge, since normally rise != fall data for stg1) + if ((cal1_data_chk_last_valid && + (cal1_data_chk_r == cal1_data_chk_last)) || + (!cal1_data_chk_last_valid && + ((cal1_data_chk_r == 2'b01) || (cal1_data_chk_r == 2'b10)))) + cal1_detect_edge = 1'b0; + else + cal1_detect_edge = 1'b1; + end + + always @(*) begin + // assert if we've found a region where data valid window is stable + // over consecutive IDELAY taps, and either rise/fall = [1,0], or [0,1] + if ((cal1_data_chk_last_valid && + (cal1_data_chk_r == cal1_data_chk_last)) && + ((cal1_data_chk_r == 2'b01) || (cal1_data_chk_r == 2'b10))) + cal1_detect_stable <= 1'b1; + else + cal1_detect_stable <= 1'b0; + end + + //***************************************************************** + // Find valid window: keep track of how long we've been in the same data + // window. If it's been long enough, then declare that we've found a valid + // window. Also returns whether we found a rising or falling window (only + // valid when found_window is asserted) + //***************************************************************** + + always @(posedge clkdiv) begin + if (cal1_state == CAL1_INIT) begin + cal1_window_cnt <= 4'b0000; + cal1_found_window <= 1'b0; + cal1_found_rising <= 1'bx; + end else if (!cal1_data_chk_last_valid) begin + // if we haven't stored a previous value of CAL1_DATA_CHK (or it got + // invalidated because we detected an edge, and are now looking for the + // second edge), then make sure FOUND_WINDOW deasserted on following + // clock edge (to avoid finding a false window immediately after finding + // an edge). Note that because of jitter, it's possible to not find an + // edge at the end of the IDELAY increment settling time, but to find an + // edge on the next clock cycle (e.g. during CAL1_FIND_FIRST_EDGE) + cal1_window_cnt <= 4'b0000; + cal1_found_window <= 1'b0; + cal1_found_rising <= 1'bx; + end else if (((cal1_state == CAL1_FIRST_EDGE_IDEL_WAIT) || + (cal1_state == CAL1_SECOND_EDGE_IDEL_WAIT)) && + !idel_set_wait) begin + // while finding the first and second edges, see if we can detect a + // stable bit window (occurs over MIN_WIN_SIZE number of taps). If + // so, then we're away from an edge, and can conclusively determine the + // starting DQS-DQ phase. + if (cal1_detect_stable) begin + cal1_window_cnt <= cal1_window_cnt + 1; + if (cal1_window_cnt == MIN_WIN_SIZE-1) begin + cal1_found_window <= 1'b1; + if (cal1_data_chk_r == 2'b01) + cal1_found_rising <= 1'b1; + else + cal1_found_rising <= 1'b0; + end + end else begin + // otherwise, we're not in a data valid window, reset the window + // counter, and indicate we're not currently in window. This should + // happen by design at least once after finding the first edge. + cal1_window_cnt <= 4'b0000; + cal1_found_window <= 1'b0; + cal1_found_rising <= 1'bx; + end + end + end + + //***************************************************************** + // keep track of edge tap counts found, and whether we've + // incremented to the maximum number of taps allowed + //***************************************************************** + + always @(posedge clkdiv) + if (cal1_state == CAL1_INIT) begin + cal1_idel_tap_limit_hit <= 1'b0; + cal1_idel_tap_cnt <= 6'b000000; + end else if (cal1_dlyce_dq) begin + if (cal1_dlyinc_dq) begin + cal1_idel_tap_cnt <= cal1_idel_tap_cnt + 1; + cal1_idel_tap_limit_hit <= (cal1_idel_tap_cnt == 6'b111110); + end else begin + cal1_idel_tap_cnt <= cal1_idel_tap_cnt - 1; + cal1_idel_tap_limit_hit <= 1'b0; + end + end + + //***************************************************************** + // Pipeline for better timing - amount to decrement by if second + // edge not found + //***************************************************************** + // if only one edge found (possible for low frequencies), then: + // 1. Assume starting DQS-DQ phase has DQS in DQ window (aka "case 1") + // 2. We have to decrement by (63 - first_edge_tap_cnt) + (BIT_TIME_TAPS/2) + // (i.e. decrement by 63-first_edge_tap_cnt to get to right edge of + // DQ window. Then decrement again by (BIT_TIME_TAPS/2) to get to center + // of DQ window. + // 3. Clamp the above value at 63 to ensure we don't underflow IDELAY + // (note: clamping happens in the CAL1 state machine) + always @(posedge clkdiv) + cal1_low_freq_idel_dec + <= (7'b0111111 - {1'b0, cal1_first_edge_tap_cnt}) + + (BIT_TIME_TAPS/2); + + //***************************************************************** + // Keep track of max taps used during stage 1, use this to limit + // the number of taps that can be used in stage 2 + //***************************************************************** + + always @(posedge clkdiv) + if (rstdiv) begin + cal1_idel_max_tap <= 6'b000000; + cal1_idel_max_tap_we <= 1'b0; + end else begin + // pipeline latch enable for CAL1_IDEL_MAX_TAP - we have plenty + // of time, tap count gets updated, then dead cycles waiting for + // IDELAY output to settle + cal1_idel_max_tap_we <= (cal1_idel_max_tap < cal1_idel_tap_cnt); + // record maximum # of taps used for stg 1 cal + if ((cal1_state == CAL1_DONE) && cal1_idel_max_tap_we) + cal1_idel_max_tap <= cal1_idel_tap_cnt; + end + + //***************************************************************** + + always @(posedge clkdiv) + if (rstdiv) begin + calib_done[0] <= 1'b0; + calib_done_tmp[0] <= 1'bx; + calib_err[0] <= 1'b0; + count_dq <= {DQ_BITS{1'b0}}; + next_count_dq <= {DQ_BITS{1'b0}}; + cal1_bit_time_tap_cnt <= 6'bxxxxxx; + cal1_data_chk_last <= 2'bxx; + cal1_data_chk_last_valid <= 1'bx; + cal1_dlyce_dq <= 1'b0; + cal1_dlyinc_dq <= 1'b0; + cal1_dqs_dq_init_phase <= 1'bx; + cal1_first_edge_done <= 1'bx; + cal1_found_second_edge <= 1'bx; + cal1_first_edge_tap_cnt <= 6'bxxxxxx; + cal1_idel_dec_cnt <= 7'bxxxxxxx; + cal1_idel_inc_cnt <= 6'bxxxxxx; + cal1_ref_req <= 1'b0; + cal1_state <= CAL1_IDLE; + end else begin + // default values for all "pulse" outputs + cal1_ref_req <= 1'b0; + cal1_dlyce_dq <= 1'b0; + cal1_dlyinc_dq <= 1'b0; + + case (cal1_state) + CAL1_IDLE: begin + count_dq <= {DQ_BITS{1'b0}}; + next_count_dq <= {DQ_BITS{1'b0}}; + if (calib_start[0]) begin + calib_done[0] <= 1'b0; + calib_done_tmp[0] <= 1'b0; + cal1_state <= CAL1_INIT; + end + end + + CAL1_INIT: begin + cal1_data_chk_last_valid <= 1'b0; + cal1_found_second_edge <= 1'b0; + cal1_dqs_dq_init_phase <= 1'b0; + cal1_idel_inc_cnt <= 6'b000000; + cal1_state <= CAL1_INC_IDEL; + end + + // increment DQ IDELAY so that either: (1) DQS starts somewhere in + // first rising DQ window, or (2) DQS starts in first falling DQ + // window. The amount to shift is frequency dependent (and is either + // precalculated by MIG or possibly adjusted by the user) + CAL1_INC_IDEL: + if ((cal1_idel_inc_cnt == DQ_IDEL_INIT) && !idel_set_wait) begin + cal1_state <= CAL1_FIND_FIRST_EDGE; + end else if (cal1_idel_inc_cnt != DQ_IDEL_INIT) begin + cal1_idel_inc_cnt <= cal1_idel_inc_cnt + 1; + cal1_dlyce_dq <= 1'b1; + cal1_dlyinc_dq <= 1'b1; + end + + // look for first edge + CAL1_FIND_FIRST_EDGE: begin + // Determine DQS-DQ phase if we can detect enough of a valid window + if (cal1_found_window) + cal1_dqs_dq_init_phase <= ~cal1_found_rising; + // find first edge - if found then record position + if (cal1_detect_edge) begin + cal1_state <= CAL1_FOUND_FIRST_EDGE_WAIT; + cal1_first_edge_done <= 1'b0; + cal1_first_edge_tap_cnt <= cal1_idel_tap_cnt; + cal1_data_chk_last_valid <= 1'b0; + end else begin + // otherwise, store the current value of DATA_CHK, increment + // DQ IDELAY, and compare again + cal1_state <= CAL1_FIRST_EDGE_IDEL_WAIT; + cal1_data_chk_last <= cal1_data_chk_r; + // avoid comparing against DATA_CHK_LAST for previous iteration + cal1_data_chk_last_valid <= 1'b1; + cal1_dlyce_dq <= 1'b1; + cal1_dlyinc_dq <= 1'b1; + end + end + + // wait for DQ IDELAY to settle + CAL1_FIRST_EDGE_IDEL_WAIT: + if (!idel_set_wait) + cal1_state <= CAL1_FIND_FIRST_EDGE; + + // delay state between finding first edge and looking for second + // edge. Necessary in order to invalidate CAL1_FOUND_WINDOW before + // starting to look for second edge + CAL1_FOUND_FIRST_EDGE_WAIT: + cal1_state <= CAL1_FIND_SECOND_EDGE; + + // Try and find second edge + CAL1_FIND_SECOND_EDGE: begin + // When looking for 2nd edge, first make sure data stabilized (by + // detecting valid data window) - needed to avoid false edges + if (cal1_found_window) begin + cal1_first_edge_done <= 1'b1; + cal1_dqs_dq_init_phase <= cal1_found_rising; + end + // exit if run out of taps to increment + if (cal1_idel_tap_limit_hit) + cal1_state <= CAL1_CALC_IDEL; + else begin + // found second edge, record the current edge count + if (cal1_first_edge_done && cal1_detect_edge) begin + cal1_state <= CAL1_CALC_IDEL; + cal1_found_second_edge <= 1'b1; + cal1_bit_time_tap_cnt <= cal1_idel_tap_cnt - + cal1_first_edge_tap_cnt + 1; + end else begin + cal1_state <= CAL1_SECOND_EDGE_IDEL_WAIT; + cal1_data_chk_last <= cal1_data_chk_r; + cal1_data_chk_last_valid <= 1'b1; + cal1_dlyce_dq <= 1'b1; + cal1_dlyinc_dq <= 1'b1; + end + end + end + + // wait for DQ IDELAY to settle, then store ISERDES output + CAL1_SECOND_EDGE_IDEL_WAIT: + if (!idel_set_wait) + cal1_state <= CAL1_FIND_SECOND_EDGE; + + // pipeline delay state to calculate amount to decrement DQ IDELAY + // NOTE: We're calculating the amount to decrement by, not the + // absolute setting for DQ IDELAY + CAL1_CALC_IDEL: begin + // if two edges found + if (cal1_found_second_edge) + // case 1: DQS was in DQ window to start with. First edge found + // corresponds to left edge of DQ rising window. Backup by 1.5*BT + // NOTE: In this particular case, it is possible to decrement + // "below 0" in the case where DQS delay is less than 0.5*BT, + // need to limit decrement to prevent IDELAY tap underflow + if (!cal1_dqs_dq_init_phase) + cal1_idel_dec_cnt <= {1'b0, cal1_bit_time_tap_cnt} + + {1'b0, (cal1_bit_time_tap_cnt >> 1)}; + // case 2: DQS was in wrong DQ window (in DQ falling window). + // First edge found is right edge of DQ rising window. Second + // edge is left edge of DQ rising window. Backup by 0.5*BT + else + cal1_idel_dec_cnt <= {1'b0, (cal1_bit_time_tap_cnt >> 1)}; + // if only one edge found - assume will always be case 1 - DQS in + // DQS window. Case 2 only possible if path delay on DQS > 5ns + else + cal1_idel_dec_cnt <= cal1_low_freq_idel_dec; + cal1_state <= CAL1_DEC_IDEL; + end + + // decrement DQ IDELAY for final adjustment + CAL1_DEC_IDEL: + // once adjustment is complete, we're done with calibration for + // this DQ, now return to IDLE state and repeat for next DQ + // Add underflow protection for case of 2 edges found and DQS + // starting in DQ window (see comments for above state) - note we + // have to take into account delayed value of CAL1_IDEL_TAP_CNT - + // gets updated one clock cycle after CAL1_DLYCE/INC_DQ + if ((cal1_idel_dec_cnt == 7'b0000000) || + (cal1_dlyce_dq && (cal1_idel_tap_cnt == 6'b000001))) begin + cal1_state <= CAL1_DONE; + // stop when all DQ's calibrated, or DQ[0] cal'ed (for sim) + if ((count_dq == DQ_WIDTH-1) || (SIM_ONLY != 0)) + calib_done_tmp[0] <= 1'b1; + else + // need for VHDL simulation to prevent out-of-index error + next_count_dq <= count_dq + 1; + end else begin + // keep decrementing until final tap count reached + cal1_idel_dec_cnt <= cal1_idel_dec_cnt - 1; + cal1_dlyce_dq <= 1'b1; + cal1_dlyinc_dq <= 1'b0; + end + + // delay state to allow count_dq and DATA_CHK to point to the next + // DQ bit (allows us to potentially begin checking for an edge on + // next DQ right away). + CAL1_DONE: + if (!idel_set_wait) begin + count_dq <= next_count_dq; + if (calib_done_tmp[0]) begin + calib_done[0] <= 1'b1; + cal1_state <= CAL1_IDLE; + end else begin + // request auto-refresh after every 8-bits calibrated to + // avoid tRAS violation + if (cal1_refresh) begin + cal1_ref_req <= 1'b1; + if (calib_ref_done) + cal1_state <= CAL1_INIT; + end else + // if no need this time for refresh, proceed to next bit + cal1_state <= CAL1_INIT; + end + end + endcase + end + + //*************************************************************************** + // Second stage calibration: DQS-FPGA Clock + // Algorithm Description: + // 1. Assumes a training pattern that will produce a pattern oscillating at + // half the core clock frequency each on rise and fall outputs, and such + // that rise and fall outputs are 180 degrees out of phase from each + // other. Note that since the calibration logic runs at half the speed + // of the interface, expect that data sampled with the slow clock always + // to be constant (either always = 1, or = 0, and rise data != fall data) + // unless we cross the edge of the data valid window + // 2. Start by setting RD_DATA_SEL = 0. This selects the rising capture data + // sync'ed to rising edge of core clock, and falling edge data sync'ed + // to falling edge of core clock + // 3. Start looking for an edge. An edge is defined as either: (1) a + // change in capture value or (2) an invalid capture value (e.g. rising + // data != falling data for that same clock cycle). + // 4. If an edge is found, go to step (6). If edge hasn't been found, then + // set RD_DATA_SEL = 1, and try again. + // 5. If no edge is found, then increment IDELAY and return to step (3) + // 6. If an edge if found, then invert RD_DATA_SEL - this shifts the + // capture point 180 degrees from the edge of the window (minus duty + // cycle distortion, delay skew between rising/falling edge capture + // paths, etc.) + // 7. If no edge is found by CAL2_IDEL_TAP_LIMIT (= 63 - # taps used for + // stage 1 calibration), then decrement IDELAY (without reinverting + // RD_DATA_SEL) by CAL2_IDEL_TAP_LIMIT/2. This guarantees we at least + // have CAL2_IDEL_TAP_LIMIT/2 of slack both before and after the + // capture point (not optimal, but best we can do not having found an + // of the window). This happens only for very low frequencies. + // 8. Repeat for each DQS group. + // NOTE: Step 6 is not optimal. A better (and perhaps more complicated) + // algorithm might be to find both edges of the data valid window (using + // the same polarity of RD_DATA_SEL), and then decrement to the midpoint. + //*************************************************************************** + + // RD_DATA_SEL should be tagged with FROM-TO (multi-cycle) constraint in + // UCF file to relax timing. This net is "pseudo-static" (after value is + // changed, FSM waits number of cycles before using the output). + // Note that we are adding one clock cycle of delay (to isolate it from + // the other logic CAL2_RD_DATA_SEL feeds), make sure FSM waits long + // enough to compensate (by default it does, it waits a few cycles more + // than minimum # of clock cycles) + genvar rd_i; + generate + for (rd_i = 0; rd_i < DQS_WIDTH; rd_i = rd_i+1) begin: gen_rd_data_sel + FDRSE u_ff_rd_data_sel + ( + .Q (rd_data_sel[rd_i]), + .C (clkdiv), + .CE (1'b1), + .D (cal2_rd_data_sel[rd_i]), + .R (1'b0), + .S (1'b0) + ) /* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + end + endgenerate + + //***************************************************************** + // Max number of taps used for stg2 cal dependent on number of taps + // used for stg1 (give priority to stg1 cal - let it use as many + // taps as it needs - the remainder of the IDELAY taps can be used + // by stg2) + //***************************************************************** + + always @(posedge clkdiv) + cal2_idel_tap_limit <= 6'b111111 - cal1_idel_max_tap; + + //***************************************************************** + // second stage calibration uses readback pattern of "1100" (i.e. + // 1st rising = 1, 1st falling = 1, 2nd rising = 0, 2nd falling = 0) + // only look at the first bit of each DQS group + //***************************************************************** + + // deasserted when captured data has changed since IDELAY was + // incremented, or when we're right on the edge (i.e. rise data = + // fall data). + assign cal2_detect_edge = + ((((rdd_rise_q1 != cal2_rd_data_rise_last_pos) || + (rdd_fall_q1 != cal2_rd_data_fall_last_pos)) && + cal2_rd_data_last_valid_pos && (!cal2_curr_sel)) || + (((rdd_rise_q1 != cal2_rd_data_rise_last_neg) || + (rdd_fall_q1 != cal2_rd_data_fall_last_neg)) && + cal2_rd_data_last_valid_neg && (cal2_curr_sel)) || + (rdd_rise_q1 != rdd_fall_q1)); + + //***************************************************************** + // keep track of edge tap counts found, and whether we've + // incremented to the maximum number of taps allowed + // NOTE: Assume stage 2 cal always increments the tap count (never + // decrements) when searching for edge of the data valid window + //***************************************************************** + + always @(posedge clkdiv) + if (cal2_state == CAL2_INIT) begin + cal2_idel_tap_limit_hit <= 1'b0; + cal2_idel_tap_cnt <= 6'b000000; + end else if (cal2_dlyce_dqs) begin + cal2_idel_tap_cnt <= cal2_idel_tap_cnt + 1; + cal2_idel_tap_limit_hit <= (cal2_idel_tap_cnt == + cal2_idel_tap_limit - 1); + end + + //***************************************************************** + + always @(posedge clkdiv) + if (rstdiv) begin + calib_done[1] <= 1'b0; + calib_done_tmp[1] <= 1'bx; + calib_err[1] <= 1'b0; + count_dqs <= 'b0; + next_count_dqs <= 'b0; + cal2_dlyce_dqs <= 1'b0; + cal2_dlyinc_dqs <= 1'b0; + cal2_idel_dec_cnt <= 6'bxxxxxx; + cal2_rd_data_last_valid_neg <= 1'bx; + cal2_rd_data_last_valid_pos <= 1'bx; + cal2_rd_data_sel <= 'b0; + cal2_ref_req <= 1'b0; + cal2_state <= CAL2_IDLE; + end else begin + cal2_ref_req <= 1'b0; + cal2_dlyce_dqs <= 1'b0; + cal2_dlyinc_dqs <= 1'b0; + + case (cal2_state) + CAL2_IDLE: begin + count_dqs <= 'b0; + next_count_dqs <= 'b0; + if (calib_start[1]) begin + cal2_rd_data_sel <= {DQS_WIDTH{1'b0}}; + calib_done[1] <= 1'b0; + calib_done_tmp[1] <= 1'b0; + cal2_state <= CAL2_INIT; + end + end + + // Pass through this state every time we calibrate a new DQS group + CAL2_INIT: begin + cal2_curr_sel <= 1'b0; + cal2_rd_data_last_valid_neg <= 1'b0; + cal2_rd_data_last_valid_pos <= 1'b0; + cal2_state <= CAL2_INIT_IDEL_WAIT; + end + + // Stall state only used if calibration run more than once. Can take + // this state out if design never runs calibration more than once. + // We need this state to give time for MUX'ed data to settle after + // resetting RD_DATA_SEL + CAL2_INIT_IDEL_WAIT: + if (!idel_set_wait) + cal2_state <= CAL2_FIND_EDGE_POS; + + // Look for an edge - first check "positive-edge" stage 2 capture + CAL2_FIND_EDGE_POS: begin + // if found an edge, then switch to the opposite edge stage 2 + // capture and we're done - no need to decrement the tap count, + // since switching to the opposite edge will shift the capture + // point by 180 degrees + if (cal2_detect_edge) begin + cal2_curr_sel <= 1'b1; + cal2_state <= CAL2_DONE; + // set all DQS groups to be the same for simulation + if (SIM_ONLY != 0) + cal2_rd_data_sel <= {DQS_WIDTH{1'b1}}; + else + cal2_rd_data_sel[count_dqs] <= 1'b1; + if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0)) + calib_done_tmp[1] <= 1'b1; + else + // MIG 2.1: Fix for simulation out-of-bounds error when + // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL) + next_count_dqs <= count_dqs + 1; + end else begin + // otherwise, invert polarity of stage 2 capture and look for + // an edge with opposite capture clock polarity + cal2_curr_sel <= 1'b1; + cal2_rd_data_sel[count_dqs] <= 1'b1; + cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_POS; + cal2_rd_data_rise_last_pos <= rdd_rise_q1; + cal2_rd_data_fall_last_pos <= rdd_fall_q1; + cal2_rd_data_last_valid_pos <= 1'b1; + end + end + + // Give time to switch from positive-edge to negative-edge second + // stage capture (need time for data to filter though pipe stages) + CAL2_FIND_EDGE_IDEL_WAIT_POS: + if (!idel_set_wait) + cal2_state <= CAL2_FIND_EDGE_NEG; + + // Look for an edge - check "negative-edge" stage 2 capture + CAL2_FIND_EDGE_NEG: + if (cal2_detect_edge) begin + cal2_curr_sel <= 1'b0; + cal2_state <= CAL2_DONE; + // set all DQS groups to be the same for simulation + if (SIM_ONLY != 0) + cal2_rd_data_sel <= {DQS_WIDTH{1'b0}}; + else + cal2_rd_data_sel[count_dqs] <= 1'b0; + if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0)) + calib_done_tmp[1] <= 1'b1; + else + // MIG 2.1: Fix for simulation out-of-bounds error when + // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL) + next_count_dqs <= count_dqs + 1; + end else if (cal2_idel_tap_limit_hit) begin + // otherwise, if we've run out of taps, then immediately + // backoff by half # of taps used - that's our best estimate + // for optimal calibration point. Doesn't matter whether which + // polarity we're using for capture (we don't know which one is + // best to use) + cal2_idel_dec_cnt <= {1'b0, cal2_idel_tap_limit[5:1]}; + cal2_state <= CAL2_DEC_IDEL; + if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0)) + calib_done_tmp[1] <= 1'b1; + else + // MIG 2.1: Fix for simulation out-of-bounds error when + // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL) + next_count_dqs <= count_dqs + 1; + end else begin + // otherwise, increment IDELAY, and start looking for edge again + cal2_curr_sel <= 1'b0; + cal2_rd_data_sel[count_dqs] <= 1'b0; + cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_NEG; + cal2_rd_data_rise_last_neg <= rdd_rise_q1; + cal2_rd_data_fall_last_neg <= rdd_fall_q1; + cal2_rd_data_last_valid_neg <= 1'b1; + cal2_dlyce_dqs <= 1'b1; + cal2_dlyinc_dqs <= 1'b1; + end + + CAL2_FIND_EDGE_IDEL_WAIT_NEG: + if (!idel_set_wait) + cal2_state <= CAL2_FIND_EDGE_POS; + + // if no edge found, then decrement by half # of taps used + CAL2_DEC_IDEL: begin + if (cal2_idel_dec_cnt == 6'b000000) + cal2_state <= CAL2_DONE; + else begin + cal2_idel_dec_cnt <= cal2_idel_dec_cnt - 1; + cal2_dlyce_dqs <= 1'b1; + cal2_dlyinc_dqs <= 1'b0; + end + end + + // delay state to allow count_dqs and ISERDES data to point to next + // DQ bit (DQS group) before going to INIT + CAL2_DONE: + if (!idel_set_wait) begin + count_dqs <= next_count_dqs; + if (calib_done_tmp[1]) begin + calib_done[1] <= 1'b1; + cal2_state <= CAL2_IDLE; + end else begin + // request auto-refresh after every DQS group calibrated to + // avoid tRAS violation + cal2_ref_req <= 1'b1; + if (calib_ref_done) + cal2_state <= CAL2_INIT; + end + end + endcase + end + + //*************************************************************************** + // Stage 3 calibration: Read Enable + // Description: + // read enable calibration determines the "round-trip" time (in # of CLK0 + // cycles) between when a read command is issued by the controller, and + // when the corresponding read data is synchronized by into the CLK0 domain + // this is a long delay chain to delay read enable signal from controller/ + // initialization logic (i.e. this is used for both initialization and + // during normal controller operation). Stage 3 calibration logic decides + // which delayed version is appropriate to use (which is affected by the + // round trip delay of DQ/DQS) as a "valid" signal to tell rest of logic + // when the captured data output from ISERDES is valid. + //*************************************************************************** + + //***************************************************************** + // Delay chains: Use shift registers + // Two sets of delay chains are used: + // 1. One to delay RDEN from PHY_INIT module for calibration + // purposes (delay required for RDEN for calibration is different + // than during normal operation) + // 2. One per DQS group to delay RDEN from controller for normal + // operation - the value to delay for each DQS group can be different + // as is determined during calibration + //***************************************************************** + + //***************************************************************** + // First delay chain, use only for calibration + // input = asserted on rising edge of RDEN from PHY_INIT module + //***************************************************************** + + always @(posedge clk) begin + ctrl_rden_r <= ctrl_rden; + phy_init_rden_r <= phy_init_rden; + phy_init_rden_r1 <= phy_init_rden_r; + calib_rden_edge_r <= phy_init_rden_r & ~phy_init_rden_r1; + end + + // Calibration shift register used for both Stage 3 and Stage 4 cal + // (not strictly necessary for stage 4, but use as an additional check + // to make sure we're checking for correct data on the right clock cycle) + always @(posedge clkdiv) + if (!calib_done[2]) + calib_rden_srl_a <= cal3_rden_srl_a; + else + calib_rden_srl_a <= cal4_rden_srl_a; + + // Flops for targetting of multi-cycle path in UCF + genvar cal_rden_ff_i; + generate + for (cal_rden_ff_i = 0; cal_rden_ff_i < 5; + cal_rden_ff_i = cal_rden_ff_i+1) begin: gen_cal_rden_dly + FDRSE u_ff_cal_rden_dly + ( + .Q (calib_rden_srl_a_r[cal_rden_ff_i]), + .C (clkdiv), + .CE (1'b1), + .D (calib_rden_srl_a[cal_rden_ff_i]), + .R (1'b0), + .S (1'b0) + ) /* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + end + endgenerate + + SRLC32E u_calib_rden_srl + ( + .Q (calib_rden_srl_out), + .Q31 (), + .A (calib_rden_srl_a_r), + .CE (1'b1), + .CLK (clk), + .D (calib_rden_edge_r) + ); + + FDRSE u_calib_rden_srl_out_r + ( + .Q (calib_rden_srl_out_r), + .C (clk), + .CE (1'b1), + .D (calib_rden_srl_out), + .R (1'b0), + .S (1'b0) + ) /* synthesis syn_preserve = 1 */; + + // convert to CLKDIV domain. Two version are generated because we need + // to be able to tell exactly which fast (clk) clock cycle the read + // enable was asserted in. Only one of CALIB_DATA_VALID or + // CALIB_DATA_VALID_STGD will be asserted for any given shift value + always @(posedge clk) + calib_rden_srl_out_r1 <= calib_rden_srl_out_r; + + always @(posedge clkdiv) begin + calib_rden_valid <= calib_rden_srl_out_r; + calib_rden_valid_stgd <= calib_rden_srl_out_r1; + end + + //***************************************************************** + // Second set of delays chain, use for normal reads + // input = RDEN from controller + //***************************************************************** + + // Flops for targetting of multi-cycle path in UCF + genvar rden_ff_i; + generate + for (rden_ff_i = 0; rden_ff_i < 5*DQS_WIDTH; + rden_ff_i = rden_ff_i+1) begin: gen_rden_dly + FDRSE u_ff_rden_dly + ( + .Q (rden_dly_r[rden_ff_i]), + .C (clkdiv), + .CE (1'b1), + .D (rden_dly[rden_ff_i]), + .R (1'b0), + .S (1'b0) + ) /* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + end + endgenerate + + // NOTE: Comment this section explaining purpose of SRL's + genvar rden_i; + generate + for (rden_i = 0; rden_i < DQS_WIDTH; rden_i = rden_i + 1) begin: gen_rden + SRLC32E u_rden_srl + ( + .Q (rden_srl_out[rden_i]), + .Q31 (), + .A ({rden_dly_r[(rden_i*5)+4], + rden_dly_r[(rden_i*5)+3], + rden_dly_r[(rden_i*5)+2], + rden_dly_r[(rden_i*5)+1], + rden_dly_r[(rden_i*5)]}), + .CE (1'b1), + .CLK (clk), + .D (ctrl_rden_r) + ); + FDRSE u_calib_rden_r + ( + .Q (calib_rden[rden_i]), + .C (clk), + .CE (1'b1), + .D (rden_srl_out[rden_i]), + .R (1'b0), + .S (1'b0) + ) /* synthesis syn_preserve = 1 */; + end + endgenerate + + //***************************************************************** + // indicates that current received data is the correct pattern. Check both + // rising and falling data for first DQ in each DQS group. Note that + // we're checking using a pipelined version of read data, so need to take + // this inherent delay into account in determining final read valid delay + // Data is written to the memory in the following order (first -> last): + // 0x1, 0xE, 0xE, 0x1, 0x1, 0xE, 0xE, 0x1 + // Looking just at LSb, expect data in sequence (in binary): + // 1, 0, 0, 1, 1, 0, 0, 1 + // Check for the presence of the first 7 words, and compensate read valid + // delay accordingly. Don't check last falling edge data, it may be + // corrupted by the DQS tri-state glitch at end of read postamble + // (glitch protection not yet active until stage 4 cal) + //***************************************************************** + + always @(posedge clkdiv) begin + rdd_rise_q1_r <= rdd_rise_q1; + rdd_fall_q1_r <= rdd_fall_q1; + rdd_rise_q2_r <= rdd_rise_q2; + rdd_fall_q2_r <= rdd_fall_q2; + rdd_rise_q1_r1 <= rdd_rise_q1_r; + rdd_fall_q1_r1 <= rdd_fall_q1_r; + end + + always @(posedge clkdiv) begin + // For the following sequence from memory: + // rise[0], fall[0], rise[1], fall[1] + // if data is aligned out of fabric ISERDES: + // RDD_RISE_Q2 = rise[0] + // RDD_FALL_Q2 = fall[0] + // RDD_RISE_Q1 = rise[1] + // RDD_FALL_Q1 = fall[1] + cal3_data_match <= ((rdd_rise_q2_r == 1) && + (rdd_fall_q2_r == 0) && + (rdd_rise_q1_r == 0) && + (rdd_fall_q1_r == 1) && + (rdd_rise_q2 == 1) && + (rdd_fall_q2 == 0) && + (rdd_rise_q1 == 0)); + + // if data is staggered out of fabric ISERDES: + // RDD_RISE_Q1_R = rise[0] + // RDD_FALL_Q1_R = fall[0] + // RDD_RISE_Q2 = rise[1] + // RDD_FALL_Q2 = fall[1] + cal3_data_match_stgd <= ((rdd_rise_q1_r1 == 1) && + (rdd_fall_q1_r1 == 0) && + (rdd_rise_q2_r == 0) && + (rdd_fall_q2_r == 1) && + (rdd_rise_q1_r == 1) && + (rdd_fall_q1_r == 0) && + (rdd_rise_q2 == 0)); + end + + assign cal3_rden_dly = cal3_rden_srl_a - CAL3_RDEN_SRL_DLY_DELTA; + assign cal3_data_valid = (calib_rden_valid | calib_rden_valid_stgd); + assign cal3_match_found + = ((calib_rden_valid && cal3_data_match) || + (calib_rden_valid_stgd && cal3_data_match_stgd)); + + // when calibrating, check to see which clock cycle (after the read is + // issued) does the expected data pattern arrive. Record this result + // NOTE: Can add error checking here in case valid data not found on any + // of the available pipeline stages + always @(posedge clkdiv) begin + if (rstdiv) begin + cal3_rden_srl_a <= 5'bxxxxx; + cal3_state <= CAL3_IDLE; + calib_done[2] <= 1'b0; + calib_err_2[0] <= 1'b0; + count_rden <= {DQS_WIDTH{1'b0}}; + rden_dly <= {5*DQS_WIDTH{1'b0}}; + end else begin + + case (cal3_state) + CAL3_IDLE: begin + count_rden <= {DQS_WIDTH{1'b0}}; + if (calib_start[2]) begin + calib_done[2] <= 1'b0; + cal3_state <= CAL3_INIT; + end + end + + CAL3_INIT: begin + cal3_rden_srl_a <= RDEN_BASE_DELAY; + // let SRL pipe clear after loading initial shift value + cal3_state <= CAL3_RDEN_PIPE_CLR_WAIT; + end + + CAL3_DETECT: + if (cal3_data_valid) + // if match found at the correct clock cycle + if (cal3_match_found) begin + + // For simulation, load SRL addresses for all DQS with same value + if (SIM_ONLY != 0) begin + for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_rden_dly + rden_dly[(i*5)] <= cal3_rden_dly[0]; + rden_dly[(i*5)+1] <= cal3_rden_dly[1]; + rden_dly[(i*5)+2] <= cal3_rden_dly[2]; + rden_dly[(i*5)+3] <= cal3_rden_dly[3]; + rden_dly[(i*5)+4] <= cal3_rden_dly[4]; + end + end else begin + rden_dly[(count_rden*5)] <= cal3_rden_dly[0]; + rden_dly[(count_rden*5)+1] <= cal3_rden_dly[1]; + rden_dly[(count_rden*5)+2] <= cal3_rden_dly[2]; + rden_dly[(count_rden*5)+3] <= cal3_rden_dly[3]; + rden_dly[(count_rden*5)+4] <= cal3_rden_dly[4]; + end + + // Use for stage 4 calibration + calib_rden_dly[(count_rden*5)] <= cal3_rden_srl_a[0]; + calib_rden_dly[(count_rden*5)+1] <= cal3_rden_srl_a[1]; + calib_rden_dly[(count_rden*5)+2] <= cal3_rden_srl_a[2]; + calib_rden_dly[(count_rden*5)+3] <= cal3_rden_srl_a[3]; + calib_rden_dly[(count_rden*5)+4] <= cal3_rden_srl_a[4]; + cal3_state <= CAL3_DONE; + end else begin + // If we run out of stages to shift, without finding correct + // result, the stop and assert error + if (cal3_rden_srl_a == 5'b11111) begin + calib_err_2[0] <= 1'b1; + cal3_state <= CAL3_IDLE; + end else begin + // otherwise, increase the shift value and try again + cal3_rden_srl_a <= cal3_rden_srl_a + 1; + cal3_state <= CAL3_RDEN_PIPE_CLR_WAIT; + end + end + + // give additional time for RDEN_R pipe to clear from effects of + // previous pipeline or IDELAY tap change + CAL3_RDEN_PIPE_CLR_WAIT: + if (calib_rden_pipe_cnt == 5'b00000) + cal3_state <= CAL3_DETECT; + + CAL3_DONE: begin + if ((count_rden == DQS_WIDTH-1) || (SIM_ONLY != 0)) begin + calib_done[2] <= 1'b1; + cal3_state <= CAL3_IDLE; + end else begin + count_rden <= count_rden + 1; + cal3_state <= CAL3_INIT; + end + end + endcase + end + end + + //***************************************************************** + // Last part of stage 3 calibration - compensate for differences + // in delay between different DQS groups. Assume that in the worst + // case, DQS groups can only differ by one clock cycle. Data for + // certain DQS groups must be delayed by one clock cycle. + // NOTE: May need to increase allowable variation to greater than + // one clock cycle in certain customer designs. + // Algorithm is: + // 1. Record shift delay value for DQS[0] + // 2. Compare each DQS[x] delay value to that of DQS[0]: + // - If different, than record this fact (RDEN_MUX) + // - If greater than DQS[0], set RDEN_INC. Assume greater by + // one clock cycle only - this is a key assumption, assume no + // more than a one clock cycle variation. + // - If less than DQS[0], set RDEN_DEC + // 3. After calibration is complete, set control for DQS group + // delay (CALIB_RDEN_SEL): + // - If RDEN_DEC = 1, then assume that DQS[0] is the lowest + // delay (and at least one other DQS group has a higher + // delay). + // - If RDEN_INC = 1, then assume that DQS[0] is the highest + // delay (and that all other DQS groups have the same or + // lower delay). + // - If both RDEN_INC and RDEN_DEC = 1, then flag error + // (variation is too high for this algorithm to handle) + //***************************************************************** + + always @(posedge clkdiv) begin + if (rstdiv) begin + calib_err_2[1] <= 1'b0; + calib_rden_sel <= {DQS_WIDTH{1'bx}}; + rden_dec <= 1'b0; + rden_dly_0 <= 5'bxxxxx; + rden_inc <= 1'b0; + rden_mux <= {DQS_WIDTH{1'b0}}; + end else begin + // if a match if found, then store the value of rden_dly + if (!calib_done[2]) begin + if ((cal3_state == CAL3_DETECT) && cal3_match_found) begin + // store the value for DQS[0] as a reference + if (count_rden == 0) begin + // for simulation, RDEN calibration only happens for DQS[0] + // set RDEN_MUX for all DQS groups to be the same as DQS[0] + if (SIM_ONLY != 0) + rden_mux <= {DQS_WIDTH{1'b0}}; + else begin + // otherwise, load values for DQS[0] + rden_dly_0 <= cal3_rden_srl_a; + rden_mux[0] <= 1'b0; + end + end else if (SIM_ONLY == 0) begin + // for all other DQS groups, compare RDEN_DLY delay value with + // that of DQS[0] + if (rden_dly_0 != cal3_rden_srl_a) begin + // record that current DQS group has a different delay + // than DQS[0] (the "reference" DQS group) + rden_mux[count_rden] <= 1'b1; + if (rden_dly_0 > cal3_rden_srl_a) + rden_inc <= 1'b1; + else if (rden_dly_0 < cal3_rden_srl_a) + rden_dec <= 1'b1; + // otherwise, if current DQS group has same delay as DQS[0], + // then rden_mux[count_rden] remains at 0 (since rden_mux + // array contents initialized to 0) + end + end + end + end else begin + // Otherwise - if we're done w/ stage 2 calibration: + // set final value for RDEN data delay + // flag error if there's more than one cycle variation from DQS[0] + calib_err_2[1] <= (rden_inc && rden_dec); + if (rden_inc) + // if DQS[0] delay represents max delay + calib_rden_sel <= ~rden_mux; + else + // if DQS[0] delay represents min delay (or all the delays are + // the same between DQS groups) + calib_rden_sel <= rden_mux; + end + end + end + + // flag error for stage 3 if appropriate + always @(posedge clkdiv) + calib_err[2] <= calib_err_2[0] | calib_err_2[1]; + + //*************************************************************************** + // Stage 4 calibration: DQS gate + //*************************************************************************** + + //***************************************************************** + // indicates that current received data is the correct pattern. Same as + // for READ VALID calibration, except that the expected data sequence is + // different since DQS gate is asserted after the 6th word. + // Data sequence: + // Arrives from memory (at FPGA input) (R, F): 1 0 0 1 1 0 0 1 + // After gating the sequence looks like: 1 0 0 1 1 0 1 0 (7th word = + // 5th word, 8th word = 6th word) + // What is the gate timing is off? Need to make sure we can distinquish + // between the results of correct vs. incorrect gate timing. We also use + // the "read_valid" signal from stage 3 calibration to help us determine + // when to check for a valid sequence for stage 4 calibration (i.e. use + // CAL4_DATA_VALID in addition to CAL4_DATA_MATCH/CAL4_DATA_MATCH_STGD) + // Note that since the gate signal from the CLK0 domain is synchronized + // to the falling edge of DQS, that the effect of the gate will only be + // seen starting with a rising edge data (although it is possible + // the GATE IDDR output could go metastable and cause a unexpected result + // on the first rising and falling edges after the gate is enabled). + // Also note that the actual DQS glitch can come more than 0.5*tCK after + // the last falling edge of DQS and the constraint for this path is can + // be > 0.5*tCK; however, this means when calibrating, the output of the + // GATE IDDR may miss the setup time requirement of the rising edge flop + // and only meet it for the falling edge flop. Therefore the rising + // edge data immediately following the assertion of the gate can either + // be a 1 or 0 (can rely on either) + // As the timing on the gate is varied, we expect to see (sequence of + // captured read data shown below): + // - 1 0 0 1 1 0 0 1 (gate is really early, starts and ends before + // read burst even starts) + // - x 0 0 1 1 0 0 1 (gate pulse starts before the burst, and ends + // - x y 0 1 1 0 0 1 sometime during the burst; x,y = 0, or 1, but + // - x y x 1 1 0 0 1 all bits that show an x are the same value, + // - x y x y 1 0 0 1 and y are the same value) + // - x y x y x 0 0 1 + // - x y x y x y 0 1 (gate starts just before start of burst) + // - 1 0 x 0 x 0 x 0 (gate starts after 1st falling word. The "x" + // represents possiblity that gate may not disable + // clock for 2nd rising word in time) + // - 1 0 0 1 x 1 x 1 (gate starts after 2nd falling word) + // - 1 0 0 1 1 0 x 0 (gate starts after 3rd falling word - GOOD!!) + // - 1 0 0 1 1 0 0 1 (gate starts after burst is already done) + //***************************************************************** + + assign cal4_data_valid = calib_rden_valid | calib_rden_valid_stgd; + assign cal4_data_good = (calib_rden_valid & + cal4_data_match) | + (calib_rden_valid_stgd & + cal4_data_match_stgd); + + always @(posedge clkdiv) begin + // if data is aligned out of fabric ISERDES: + cal4_data_match <= ((rdd_rise_q2_r == 1) && + (rdd_fall_q2_r == 0) && + (rdd_rise_q1_r == 0) && + (rdd_fall_q1_r == 1) && + (rdd_rise_q2 == 1) && + (rdd_fall_q2 == 0) && + // MIG 2.1: Last rising edge data value not + // guaranteed to be certain value at higher + // frequencies + // (rdd_rise_q1 == 0) && + (rdd_fall_q1 == 0)); + // if data is staggered out of fabric ISERDES: + cal4_data_match_stgd <= ((rdd_rise_q1_r1 == 1) && + (rdd_fall_q1_r1 == 0) && + (rdd_rise_q2_r == 0) && + (rdd_fall_q2_r == 1) && + (rdd_rise_q1_r == 1) && + (rdd_fall_q1_r == 0) && + // MIG 2.1: Last rising edge data value not + // guaranteed to be certain value at higher + // frequencies + // (rdd_rise_q2 == 0) && + (rdd_fall_q2 == 0)); + end + + //***************************************************************** + // DQS gate enable generation: + // This signal gets synchronized to DQS domain, and drives IDDR + // register that in turn asserts/deasserts CE to all 4 or 8 DQ + // IDDR's in that DQS group. + // 1. During normal (post-cal) operation, this is only for 2 clock + // cycles following the end of a burst. Check for falling edge + // of RDEN. But must also make sure NOT assert for a read-idle- + // read (two non-consecutive reads, separated by exactly one + // idle cycle) - in this case, don't assert the gate because: + // (1) we don't have enough time to deassert the gate before the + // first rising edge of DQS for second burst (b/c of fact + // that DQS gate is generated in the fabric only off rising + // edge of CLK0 - if we somehow had an ODDR in fabric, we + // could pull this off, (2) assumption is that the DQS glitch + // will not rise enough to cause a glitch because the + // post-amble of the first burst is followed immediately by + // the pre-amble of the next burst + // 2. During stage 4 calibration, assert for 3 clock cycles + // (assert gate enable one clock cycle early), since we gate out + // the last two words (in addition to the crap on the DQ bus after + // the DQS read postamble). + // NOTE: PHY_INIT_RDEN and CTRL_RDEN have slightly different timing w/r + // to when they are asserted w/r to the start of the read burst + // (PHY_INIT_RDEN is one cycle earlier than CTRL_RDEN). + //***************************************************************** + + // register for timing purposes for fast clock path - currently only + // calib_done_r[2] used + always @(posedge clk) + calib_done_r <= calib_done; + + always @(*) begin + calib_ctrl_rden = ctrl_rden; + calib_init_rden = calib_done_r[2] & phy_init_rden; + end + + assign calib_ctrl_rden_negedge = ~calib_ctrl_rden & calib_ctrl_rden_r; + // check for read-idle-read before asserting DQS pulse at end of read + assign calib_ctrl_gate_pulse = calib_ctrl_rden_negedge_r & + ~calib_ctrl_rden; + always @(posedge clk) begin + calib_ctrl_rden_r <= calib_ctrl_rden; + calib_ctrl_rden_negedge_r <= calib_ctrl_rden_negedge; + calib_ctrl_gate_pulse_r <= calib_ctrl_gate_pulse; + end + + assign calib_init_gate_pulse = ~calib_init_rden & calib_init_rden_r; + always @(posedge clk) begin + calib_init_rden_r <= calib_init_rden; + calib_init_gate_pulse_r <= calib_init_gate_pulse; + calib_init_gate_pulse_r1 <= calib_init_gate_pulse_r; + end + + // Gate is asserted: (1) during cal, for 3 cycles, starting 1 cycle + // after falling edge of CTRL_RDEN, (2) during normal ops, for 2 + // cycles, starting 2 cycles after falling edge of CTRL_RDEN + assign gate_srl_in = ~((calib_ctrl_gate_pulse | + calib_ctrl_gate_pulse_r) | + (calib_init_gate_pulse | + calib_init_gate_pulse_r | + calib_init_gate_pulse_r1)); + + //***************************************************************** + // generate DQS enable signal for each DQS group + // There are differences between DQS gate signal for calibration vs. during + // normal operation: + // * calibration gates the second to last clock cycle of the burst, + // rather than after the last word (e.g. for a 8-word, 4-cycle burst, + // cycle 4 is gated for calibration; during normal operation, cycle + // 5 (i.e. cycle after the last word) is gated) + // enable for DQS is deasserted for two clock cycles, except when + // we have the preamble for the next read immediately following + // the postamble of the current read - assume DQS does not glitch + // during this time, that it stays low. Also if we did have to gate + // the DQS for this case, then we don't have enough time to deassert + // the gate in time for the first rising edge of DQS for the second + // read + //***************************************************************** + + // Flops for targetting of multi-cycle path in UCF + genvar gate_ff_i; + generate + for (gate_ff_i = 0; gate_ff_i < 5*DQS_WIDTH; + gate_ff_i = gate_ff_i+1) begin: gen_gate_dly + FDRSE u_ff_gate_dly + ( + .Q (gate_dly_r[gate_ff_i]), + .C (clkdiv), + .CE (1'b1), + .D (gate_dly[gate_ff_i]), + .R (1'b0), + .S (1'b0) + ) /* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + end + endgenerate + + genvar gate_i; + generate + for (gate_i = 0; gate_i < DQS_WIDTH; gate_i = gate_i + 1) begin: gen_gate + SRLC32E u_gate_srl + ( + .Q (gate_srl_out[gate_i]), + .Q31 (), + .A ({gate_dly_r[(gate_i*5)+4], + gate_dly_r[(gate_i*5)+3], + gate_dly_r[(gate_i*5)+2], + gate_dly_r[(gate_i*5)+1], + gate_dly_r[(gate_i*5)]}), + .CE (1'b1), + .CLK (clk), + .D (gate_srl_in) + ); + + // For GATE_BASE_DELAY > 0, have one extra cycle to register outputs + // from controller before generating DQS gate pulse. In PAR, the + // location of the controller logic can be far from the DQS gate + // logic (DQS gate logic located near the DQS I/O's), contributing + // to large net delays. Registering the controller outputs for + // CL >= 4 (above 200MHz) adds a stage of pipelining to reduce net + // delays + if (GATE_BASE_DELAY > 0) begin: gen_gate_base_dly_gt3 + // add flop between SRL32 and EN_DQS flop (which is located near the + // DDR2 IOB's) + FDRSE u_gate_srl_ff + ( + .Q (gate_srl_out_r[gate_i]), + .C (clk), + .CE (1'b1), + .D (gate_srl_out[gate_i]), + .R (1'b0), + .S (1'b0) + ) /* synthesis syn_preserve = 1 */; + end else begin: gen_gate_base_dly_le3 + assign gate_srl_out_r[gate_i] = gate_srl_out[gate_i]; + end + + FDRSE u_en_dqs_ff + ( + .Q (en_dqs[gate_i]), + .C (clk), + .CE (1'b1), + .D (gate_srl_out_r[gate_i]), + .R (1'b0), + .S (1'b0) + ) /* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + end + endgenerate + + //***************************************************************** + // Find valid window: keep track of how long we've been in the same data + // window. If it's been long enough, then declare that we've found a stable + // valid window - in particular, that we're past any region of instability + // associated with the edge of the window. Use only when finding left edge + //***************************************************************** + + always @(posedge clkdiv) + // reset before we start to look for window + if (cal4_state == CAL4_INIT) begin + cal4_window_cnt <= 4'b0000; + cal4_stable_window <= 1'b0; + end else if ((cal4_state == CAL4_FIND_EDGE) && cal4_seek_left) begin + // if we're looking for left edge, and incrementing IDELAY, count + // consecutive taps over which we're in the window + if (cal4_data_valid) begin + if (cal4_data_good) + cal4_window_cnt <= cal4_window_cnt + 1; + else + cal4_window_cnt <= 4'b0000; + end + + if (cal4_window_cnt == MIN_WIN_SIZE-1) + cal4_stable_window <= 1'b1; + end + + //***************************************************************** + // keep track of edge tap counts found, and whether we've + // incremented to the maximum number of taps allowed + //***************************************************************** + + always @(posedge clkdiv) + if ((cal4_state == CAL4_INIT) || cal4_dlyrst_gate) begin + cal4_idel_max_tap <= 1'b0; + cal4_idel_bit_tap <= 1'b0; + cal4_idel_tap_cnt <= 6'b000000; + end else if (cal4_dlyce_gate) begin + if (cal4_dlyinc_gate) begin + cal4_idel_tap_cnt <= cal4_idel_tap_cnt + 1; + cal4_idel_bit_tap <= (cal4_idel_tap_cnt == CAL4_IDEL_BIT_VAL-2); + cal4_idel_max_tap <= (cal4_idel_tap_cnt == 6'b111110); + end else begin + cal4_idel_tap_cnt <= cal4_idel_tap_cnt - 1; + cal4_idel_bit_tap <= 1'b0; + cal4_idel_max_tap <= 1'b0; + end + end + + always @(posedge clkdiv) + if ((cal4_state != CAL4_RDEN_PIPE_CLR_WAIT) && + (cal3_state != CAL3_RDEN_PIPE_CLR_WAIT)) + calib_rden_pipe_cnt <= CALIB_RDEN_PIPE_LEN-1; + else + calib_rden_pipe_cnt <= calib_rden_pipe_cnt - 1; + + //***************************************************************** + // Stage 4 cal state machine + //***************************************************************** + + always @(posedge clkdiv) + if (rstdiv) begin + calib_done[3] <= 1'b0; + calib_done_tmp[3] <= 1'b0; + calib_err[3] <= 1'b0; + count_gate <= 'b0; + gate_dly <= 'b0; + next_count_gate <= 'b0; + cal4_idel_adj_cnt <= 6'bxxxxxx; + cal4_dlyce_gate <= 1'b0; + cal4_dlyinc_gate <= 1'b0; + cal4_dlyrst_gate <= 1'b0; // reset handled elsewhere in code + cal4_gate_srl_a <= 5'bxxxxx; + cal4_rden_srl_a <= 5'bxxxxx; + cal4_ref_req <= 1'b0; + cal4_seek_left <= 1'bx; + cal4_state <= CAL4_IDLE; + end else begin + cal4_ref_req <= 1'b0; + cal4_dlyce_gate <= 1'b0; + cal4_dlyinc_gate <= 1'b0; + cal4_dlyrst_gate <= 1'b0; + + case (cal4_state) + CAL4_IDLE: begin + count_gate <= 'b0; + next_count_gate <= 'b0; + if (calib_start[3]) begin + gate_dly <= 'b0; + calib_done[3] <= 1'b0; + cal4_state <= CAL4_INIT; + end + end + + CAL4_INIT: begin + // load: (1) initial value of gate delay SRL, (2) appropriate + // value of RDEN SRL (so that we get correct "data valid" timing) + cal4_gate_srl_a <= GATE_BASE_INIT; + cal4_rden_srl_a <= {calib_rden_dly[(count_gate*5)+4], + calib_rden_dly[(count_gate*5)+3], + calib_rden_dly[(count_gate*5)+2], + calib_rden_dly[(count_gate*5)+1], + calib_rden_dly[(count_gate*5)]}; + // let SRL pipe clear after loading initial shift value + cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT; + end + + // sort of an initial state - start checking to see whether we're + // already in the window or not + CAL4_FIND_WINDOW: + // decide right away if we start in the proper window - this + // determines if we are then looking for the left (trailing) or + // right (leading) edge of the data valid window + if (cal4_data_valid) begin + // if we find a match - then we're already in window, now look + // for left edge. Otherwise, look for right edge of window + cal4_seek_left <= cal4_data_good; + cal4_state <= CAL4_FIND_EDGE; + end + + CAL4_FIND_EDGE: + // don't do anything until the exact clock cycle when to check that + // readback data is valid or not + if (cal4_data_valid) begin + // we're currently in the window, look for left edge of window + if (cal4_seek_left) begin + // make sure we've passed the right edge before trying to detect + // the left edge (i.e. avoid any edge "instability") - else, we + // may detect an "false" edge too soon. By design, if we start in + // the data valid window, always expect at least + // MIN(BIT_TIME_TAPS,32) (-/+ jitter, see below) taps of valid + // window before we hit the left edge (this is because when stage + // 4 calibration first begins (i.e., gate_dly = 00, and IDELAY = + // 00), we're guaranteed to NOT be in the window, and we always + // start searching for MIN(BIT_TIME_TAPS,32) for the right edge + // of window. If we don't find it, increment gate_dly, and if we + // now start in the window, we have at least approximately + // CLK_PERIOD-MIN(BIT_TIME_TAPS,32) = MIN(BIT_TIME_TAPS,32) taps. + // It's approximately because jitter, noise, etc. can bring this + // value down slightly. Because of this (although VERY UNLIKELY), + // we have to protect against not decrementing IDELAY below 0 + // during adjustment phase). + if (cal4_stable_window && !cal4_data_good) begin + // found left edge of window, dec by MIN(BIT_TIME_TAPS,32) + cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL; + cal4_idel_adj_inc <= 1'b0; + cal4_state <= CAL4_ADJ_IDEL; + end else begin + // Otherwise, keep looking for left edge: + if (cal4_idel_max_tap) begin + // ran out of taps looking for left edge (max=63) - happens + // for low frequency case, decrement by 32 + cal4_idel_adj_cnt <= 6'b100000; + cal4_idel_adj_inc <= 1'b0; + cal4_state <= CAL4_ADJ_IDEL; + end else begin + cal4_dlyce_gate <= 1'b1; + cal4_dlyinc_gate <= 1'b1; + cal4_state <= CAL4_IDEL_WAIT; + end + end + end else begin + // looking for right edge of window: + // look for the first match - this means we've found the right + // (leading) edge of the data valid window, increment by + // MIN(BIT_TIME_TAPS,32) + if (cal4_data_good) begin + cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL; + cal4_idel_adj_inc <= 1'b1; + cal4_state <= CAL4_ADJ_IDEL; + end else begin + // Otherwise, keep looking: + // only look for MIN(BIT_TIME_TAPS,32) taps for right edge, + // if we haven't found it, then inc gate delay, try again + if (cal4_idel_bit_tap) begin + // if we're already maxed out on gate delay, then error out + // (simulation only - calib_err isn't currently connected) + if (cal4_gate_srl_a == 5'b11111) begin + calib_err[3] <= 1'b1; + cal4_state <= CAL4_IDLE; + end else begin + // otherwise, increment gate delay count, and start + // over again + cal4_gate_srl_a <= cal4_gate_srl_a + 1; + cal4_dlyrst_gate <= 1'b1; + cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT; + end + end else begin + // keep looking for right edge + cal4_dlyce_gate <= 1'b1; + cal4_dlyinc_gate <= 1'b1; + cal4_state <= CAL4_IDEL_WAIT; + end + end + end + end + + // wait for GATE IDELAY to settle, after reset or increment + CAL4_IDEL_WAIT: begin + // For simulation, load SRL addresses for all DQS with same value + if (SIM_ONLY != 0) begin + for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly + gate_dly[(i*5)+4] <= cal4_gate_srl_a[4]; + gate_dly[(i*5)+3] <= cal4_gate_srl_a[3]; + gate_dly[(i*5)+2] <= cal4_gate_srl_a[2]; + gate_dly[(i*5)+1] <= cal4_gate_srl_a[1]; + gate_dly[(i*5)] <= cal4_gate_srl_a[0]; + end + end else begin + gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4]; + gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3]; + gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2]; + gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1]; + gate_dly[(count_gate*5)] <= cal4_gate_srl_a[0]; + end + // check to see if we've found edge of window + if (!idel_set_wait) + cal4_state <= CAL4_FIND_EDGE; + end + + // give additional time for RDEN_R pipe to clear from effects of + // previous pipeline (and IDELAY reset) + CAL4_RDEN_PIPE_CLR_WAIT: begin + // MIG 2.2: Bug fix - make sure to update GATE_DLY count, since + // possible for FIND_EDGE->RDEN_PIPE_CLR_WAIT->FIND_WINDOW + // transition (i.e. need to make sure the gate count updated in + // FIND_EDGE gets reflected in GATE_DLY by the time we reach + // state FIND_WINDOW) - previously GATE_DLY only being updated + // during state CAL4_IDEL_WAIT + if (SIM_ONLY != 0) begin + for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly_pipe + gate_dly[(i*5)+4] <= cal4_gate_srl_a[4]; + gate_dly[(i*5)+3] <= cal4_gate_srl_a[3]; + gate_dly[(i*5)+2] <= cal4_gate_srl_a[2]; + gate_dly[(i*5)+1] <= cal4_gate_srl_a[1]; + gate_dly[(i*5)] <= cal4_gate_srl_a[0]; + end + end else begin + gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4]; + gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3]; + gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2]; + gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1]; + gate_dly[(count_gate*5)] <= cal4_gate_srl_a[0]; + end + // look for new window + if (calib_rden_pipe_cnt == 5'b00000) + cal4_state <= CAL4_FIND_WINDOW; + end + + // increment/decrement DQS/DQ IDELAY for final adjustment + CAL4_ADJ_IDEL: + // add underflow protection for corner case when left edge found + // using fewer than MIN(BIT_TIME_TAPS,32) taps + if ((cal4_idel_adj_cnt == 6'b000000) || + (cal4_dlyce_gate && !cal4_dlyinc_gate && + (cal4_idel_tap_cnt == 6'b000001))) begin + cal4_state <= CAL4_DONE; + // stop when all gates calibrated, or gate[0] cal'ed (for sim) + if ((count_gate == DQS_WIDTH-1) || (SIM_ONLY != 0)) + calib_done_tmp[3] <= 1'b1; + else + // need for VHDL simulation to prevent out-of-index error + next_count_gate <= count_gate + 1; + end else begin + cal4_idel_adj_cnt <= cal4_idel_adj_cnt - 1; + cal4_dlyce_gate <= 1'b1; + // whether inc or dec depends on whether left or right edge found + cal4_dlyinc_gate <= cal4_idel_adj_inc; + end + + // wait for IDELAY output to settle after decrement. Check current + // COUNT_GATE value and decide if we're done + CAL4_DONE: + if (!idel_set_wait) begin + count_gate <= next_count_gate; + if (calib_done_tmp[3]) begin + calib_done[3] <= 1'b1; + cal4_state <= CAL4_IDLE; + end else begin + // request auto-refresh after every DQS group calibrated to + // avoid tRAS violation + cal4_ref_req <= 1'b1; + if (calib_ref_done) + cal4_state <= CAL4_INIT; + end + end + endcase + end + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_ctl_io.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_ctl_io.v new file mode 100644 index 0000000..625a5dc --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_ctl_io.v @@ -0,0 +1,306 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_phy_ctl_io.v +// /___/ /\ Date Last Modified: $Date: 2008/07/29 15:24:03 $ +// \ \ / \ Date Created: Thu Aug 24 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +// This module puts the memory control signals like address, bank address, +// row address strobe, column address strobe, write enable and clock enable +// in the IOBs. +//Reference: +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_phy_ctl_io # + ( + // Following parameters are for 72-bit RDIMM design (for ML561 Reference + // board design). Actual values may be different. Actual parameters values + // are passed from design top module ddr2_sdram module. Please refer to + // the ddr2_sdram module for actual values. + parameter BANK_WIDTH = 2, + parameter CKE_WIDTH = 1, + parameter COL_WIDTH = 10, + parameter CS_NUM = 1, + parameter TWO_T_TIME_EN = 0, + parameter CS_WIDTH = 1, + parameter ODT_WIDTH = 1, + parameter ROW_WIDTH = 14, + parameter DDR_TYPE = 1 + ) + ( + input clk0, + input clk90, + input rst0, + input rst90, + input [ROW_WIDTH-1:0] ctrl_addr, + input [BANK_WIDTH-1:0] ctrl_ba, + input ctrl_ras_n, + input ctrl_cas_n, + input ctrl_we_n, + input [CS_NUM-1:0] ctrl_cs_n, + input [ROW_WIDTH-1:0] phy_init_addr, + input [BANK_WIDTH-1:0] phy_init_ba, + input phy_init_ras_n, + input phy_init_cas_n, + input phy_init_we_n, + input [CS_NUM-1:0] phy_init_cs_n, + input [CKE_WIDTH-1:0] phy_init_cke, + input phy_init_data_sel, + input [CS_NUM-1:0] odt, + output [ROW_WIDTH-1:0] ddr_addr, + output [BANK_WIDTH-1:0] ddr_ba, + output ddr_ras_n, + output ddr_cas_n, + output ddr_we_n, + output [CKE_WIDTH-1:0] ddr_cke, + output [CS_WIDTH-1:0] ddr_cs_n, + output [ODT_WIDTH-1:0] ddr_odt + ); + + reg [ROW_WIDTH-1:0] addr_mux; + reg [BANK_WIDTH-1:0] ba_mux; + reg cas_n_mux; + reg [CS_NUM-1:0] cs_n_mux; + reg ras_n_mux; + reg we_n_mux; + + + + //*************************************************************************** + + + + + // MUX to choose from either PHY or controller for SDRAM control + + generate // in 2t timing mode the extra register stage cannot be used. + if(TWO_T_TIME_EN) begin // the control signals are asserted for two cycles + always @(*)begin + if (phy_init_data_sel) begin + addr_mux = ctrl_addr; + ba_mux = ctrl_ba; + cas_n_mux = ctrl_cas_n; + cs_n_mux = ctrl_cs_n; + ras_n_mux = ctrl_ras_n; + we_n_mux = ctrl_we_n; + end else begin + addr_mux = phy_init_addr; + ba_mux = phy_init_ba; + cas_n_mux = phy_init_cas_n; + cs_n_mux = phy_init_cs_n; + ras_n_mux = phy_init_ras_n; + we_n_mux = phy_init_we_n; + end + end + end else begin + always @(posedge clk0)begin // register the signals in non 2t mode + if (phy_init_data_sel) begin + addr_mux <= ctrl_addr; + ba_mux <= ctrl_ba; + cas_n_mux <= ctrl_cas_n; + cs_n_mux <= ctrl_cs_n; + ras_n_mux <= ctrl_ras_n; + we_n_mux <= ctrl_we_n; + end else begin + addr_mux <= phy_init_addr; + ba_mux <= phy_init_ba; + cas_n_mux <= phy_init_cas_n; + cs_n_mux <= phy_init_cs_n; + ras_n_mux <= phy_init_ras_n; + we_n_mux <= phy_init_we_n; + end + end + end + endgenerate + + //*************************************************************************** + // Output flop instantiation + // NOTE: Make sure all control/address flops are placed in IOBs + //*************************************************************************** + + // RAS: = 1 at reset + (* IOB = "TRUE" *) FDCPE u_ff_ras_n + ( + .Q (ddr_ras_n), + .C (clk0), + .CE (1'b1), + .CLR (1'b0), + .D (ras_n_mux), + .PRE (rst0) + ) /* synthesis syn_useioff = 1 */; + + // CAS: = 1 at reset + (* IOB = "TRUE" *) FDCPE u_ff_cas_n + ( + .Q (ddr_cas_n), + .C (clk0), + .CE (1'b1), + .CLR (1'b0), + .D (cas_n_mux), + .PRE (rst0) + ) /* synthesis syn_useioff = 1 */; + + // WE: = 1 at reset + (* IOB = "TRUE" *) FDCPE u_ff_we_n + ( + .Q (ddr_we_n), + .C (clk0), + .CE (1'b1), + .CLR (1'b0), + .D (we_n_mux), + .PRE (rst0) + ) /* synthesis syn_useioff = 1 */; + + // CKE: = 0 at reset + genvar cke_i; + generate + for (cke_i = 0; cke_i < CKE_WIDTH; cke_i = cke_i + 1) begin: gen_cke + (* IOB = "TRUE" *) FDCPE u_ff_cke + ( + .Q (ddr_cke[cke_i]), + .C (clk0), + .CE (1'b1), + .CLR (rst0), + .D (phy_init_cke[cke_i]), + .PRE (1'b0) + ) /* synthesis syn_useioff = 1 */; + end + endgenerate + + // chip select: = 1 at reset + // For unbuffered dimms the loading will be high. The chip select + // can be asserted early if the loading is very high. The + // code as is uses clock 0. If needed clock 270 can be used to + // toggle chip select 1/4 clock cycle early. The code has + // the clock 90 input for the early assertion of chip select. + + genvar cs_i; + generate + for(cs_i = 0; cs_i < CS_WIDTH; cs_i = cs_i + 1) begin: gen_cs_n + if(TWO_T_TIME_EN) begin + (* IOB = "TRUE" *) FDCPE u_ff_cs_n + ( + .Q (ddr_cs_n[cs_i]), + .C (clk0), + .CE (1'b1), + .CLR (1'b0), + .D (cs_n_mux[(cs_i*CS_NUM)/CS_WIDTH]), + .PRE (rst0) + ) /* synthesis syn_useioff = 1 */; + end else begin // if (TWO_T_TIME_EN) + (* IOB = "TRUE" *) FDCPE u_ff_cs_n + ( + .Q (ddr_cs_n[cs_i]), + .C (clk0), + .CE (1'b1), + .CLR (1'b0), + .D (cs_n_mux[(cs_i*CS_NUM)/CS_WIDTH]), + .PRE (rst0) + ) /* synthesis syn_useioff = 1 */; + end // else: !if(TWO_T_TIME_EN) + end + endgenerate + + // address: = X at reset + genvar addr_i; + generate + for (addr_i = 0; addr_i < ROW_WIDTH; addr_i = addr_i + 1) begin: gen_addr + (* IOB = "TRUE" *) FDCPE u_ff_addr + ( + .Q (ddr_addr[addr_i]), + .C (clk0), + .CE (1'b1), + .CLR (1'b0), + .D (addr_mux[addr_i]), + .PRE (1'b0) + ) /* synthesis syn_useioff = 1 */; + end + endgenerate + + // bank address = X at reset + genvar ba_i; + generate + for (ba_i = 0; ba_i < BANK_WIDTH; ba_i = ba_i + 1) begin: gen_ba + (* IOB = "TRUE" *) FDCPE u_ff_ba + ( + .Q (ddr_ba[ba_i]), + .C (clk0), + .CE (1'b1), + .CLR (1'b0), + .D (ba_mux[ba_i]), + .PRE (1'b0) + ) /* synthesis syn_useioff = 1 */; + end + endgenerate + + // ODT control = 0 at reset + genvar odt_i; + generate + if (DDR_TYPE > 0) begin: gen_odt_ddr2 + for (odt_i = 0; odt_i < ODT_WIDTH; odt_i = odt_i + 1) begin: gen_odt + (* IOB = "TRUE" *) FDCPE u_ff_odt + ( + .Q (ddr_odt[odt_i]), + .C (clk0), + .CE (1'b1), + .CLR (rst0), + .D (odt[(odt_i*CS_NUM)/ODT_WIDTH]), + .PRE (1'b0) + ) /* synthesis syn_useioff = 1 */; + end + end + endgenerate + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_dm_iob.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_dm_iob.v new file mode 100644 index 0000000..fc6b5c2 --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_dm_iob.v @@ -0,0 +1,108 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_phy_dm_iob.v +// /___/ /\ Date Last Modified: $Date: 2008/05/21 14:53:09 $ +// \ \ / \ Date Created: Wed Aug 16 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +// This module places the data mask signals into the IOBs. +//Reference: +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_phy_dm_iob + ( + input clk90, + input dm_ce, + input mask_data_rise, + input mask_data_fall, + output ddr_dm + ); + + wire dm_out; + wire dm_ce_r; + + FDRSE_1 u_dm_ce + ( + .Q (dm_ce_r), + .C (clk90), + .CE (1'b1), + .D (dm_ce), + .R (1'b0), + .S (1'b0) + ); + + ODDR # + ( + .SRTYPE("SYNC"), + .DDR_CLK_EDGE("SAME_EDGE") + ) + u_oddr_dm + ( + .Q (dm_out), + .C (clk90), + .CE (dm_ce_r), + .D1 (mask_data_rise), + .D2 (mask_data_fall), + .R (1'b0), + .S (1'b0) + ); + + OBUF u_obuf_dm + ( + .I (dm_out), + .O (ddr_dm) + ); + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_dq_iob.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_dq_iob.v new file mode 100644 index 0000000..854079a --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_dq_iob.v @@ -0,0 +1,929 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_phy_dq_iob.v +// /___/ /\ Date Last Modified: $Date: 2008/07/17 07:52:27 $ +// \ \ / \ Date Created: Wed Aug 16 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +// This module places the data in the IOBs. +//Reference: +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_phy_dq_iob # + ( + parameter DQ_COL = 0, + parameter DQ_MS = 0, + parameter HIGH_PERFORMANCE_MODE = "TRUE" + ) + ( + input clk0, + input clk90, + input clkdiv0, + input rst90, + input dlyinc, + input dlyce, + input dlyrst, + input [1:0] dq_oe_n, + input dqs, + input ce, + input rd_data_sel, + input wr_data_rise, + input wr_data_fall, + output rd_data_rise, + output rd_data_fall, + inout ddr_dq + ); + + wire dq_iddr_clk; + wire dq_idelay; + wire dq_in; + wire dq_oe_n_r; + wire dq_out; + wire stg2a_out_fall; + wire stg2a_out_rise; + wire stg2b_out_fall; + wire stg2b_out_rise; + wire stg3a_out_fall; + wire stg3a_out_rise; + wire stg3b_out_fall; + wire stg3b_out_rise; + + //*************************************************************************** + // Directed routing constraints for route between IDDR and stage 2 capture + // in fabric. + // Only 2 out of the 12 wire declarations will be used for any given + // instantiation of this module. + // Varies according: + // (1) I/O column (left, center, right) used + // (2) Which I/O in I/O pair (master, slave) used + // Nomenclature: _Xy, X = column (0 = left, 1 = center, 2 = right), + // y = master or slave + //*************************************************************************** + + // master, left + (* syn_keep = "1", keep = "TRUE", + ROUTE = "{3;1;5vlx50tff1136;93a1e3bb!-1;-78112;-4200;S!0;-143;-1248!1;-452;0!2;2747;1575!3;2461;81!4;2732;-960!4;2732;-984!5;404;8!6;404;8!7;683;-568;L!8;843;24;L!}" *) + wire stg1_out_rise_0m; + (* syn_keep = "1", keep = "TRUE", + ROUTE = "{3;1;5vlx50tff1136;907923a!-1;-78112;-4192;S!0;-143;-1192!0;-143;-1272!1;-452;0!2;-452;0!3;2723;-385!4;2731;-311!5;3823;-1983!6;5209;1271!7;1394;3072!8;0;-8!9;404;8!10;0;-144!11;683;-536;L!12;404;8!14;843;8;L!}" *) + wire stg1_out_fall_0m; + // slave, left + (* syn_keep = "1", keep = "TRUE", + ROUTE = "{3;1;5vlx50tff1136;53bb9d6f!-1;-78112;-4600;S!0;-143;-712!1;-452;0!2;1008;-552!3;2780;1360!4;0;-8!5;0;-240!5;0;-264!6;404;8!7;404;8!8;683;-568;L!9;843;24;L!}" *) + wire stg1_out_rise_0s; + (* syn_keep = "1", keep = "TRUE", + ROUTE = "{3;1;5vlx50tff1136;46bf60d8!-1;-78112;-4592;S!0;-143;-800!1;-452;0!2;1040;1592!3;5875;-85!4;-3127;-843!4;-3127;-939!5;404;8!6;404;8!7;683;-696;L!8;843;-136;L!}" *) + wire stg1_out_fall_0s; + // master, center + (* syn_keep = "1", keep = "TRUE", + ROUTE = "{3;1;5vlx50tff1136;9ee47800!-1;-6504;-50024;S!0;-175;-1136!1;-484;0!2;-3208;1552!3;-4160;-2092!4;-1428;1172!4;-1428;1076!5;404;8!6;404;8!7;843;-152;L!8;683;-728;L!}" *) + wire stg1_out_rise_1m; + (* syn_keep = "1", keep = "TRUE", + ROUTE = "{3;1;5vlx50tff1136;e7df31c2!-1;-6504;-50016;S!0;-175;-1192!1;-484;0!2;-5701;1523!3;-3095;-715!3;-4423;2421!4;0;-8!5;1328;-3288!6;0;-240!7;404;8!8;404;8!9;683;-696;L!10;843;-136;L!}" *) + wire stg1_out_fall_1m; + // slave, center + (* syn_keep = "1", keep = "TRUE", + ROUTE = "{3;1;5vlx50tff1136;a8c11eb3!-1;-6504;-50424;S!0;-175;-856!1;-484;0!2;-5677;-337!3;1033;1217!3;-295;4353!4;0;-8!5;1328;-3288!6;0;-120!7;404;8!8;404;8!9;683;-696;L!10;843;-152;L!}" *) + wire stg1_out_rise_1s; + (* syn_keep = "1", keep = "TRUE", + ROUTE = "{3;1;5vlx50tff1136;ed30cce!-1;-6504;-50416;S!0;-175;-848!1;-484;0!2;-3192;-432!3;-1452;1368!3;-6645;85!4;0;-8!5;5193;1035!6;0;-264!7;404;8!8;404;8!9;683;-568;L!10;843;24;L!}" *) + wire stg1_out_fall_1s; + // master, right + (* syn_keep = "1", keep = "TRUE", + ROUTE = "{3;1;5vlx50tff1136;4d035a44!-1;54728;-108896;S!0;-175;-1248!1;-484;0!2;-3192;-424!3;-4208;2092!4;-1396;-972!4;-1396;-996!5;404;8!6;404;8!7;683;-568;L!8;843;24;L!}" *) + wire stg1_out_rise_2m; + (* syn_keep = "1", keep = "TRUE", + ROUTE = "{3;1;5vlx50tff1136;92ae8739!-1;54728;-108888;S!0;-175;-1272!1;-484;0!2;-5677;-329!3;-1691;-83!4;-1428;1076!4;-1428;1052!5;404;8!6;404;8!7;683;-728;L!8;843;-136;L!}" *) + wire stg1_out_fall_2m; + // slave, right + (* syn_keep = "1", keep = "TRUE", + ROUTE = "{3;1;5vlx50tff1136;9de34bf1!-1;54728;-109296;S!0;-175;-712!1;-484;0!2;-5685;-475!3;1041;1107!3;1041;1011!4;404;8!5;404;8!6;683;-536;L!7;843;24;L!}" *) + wire stg1_out_rise_2s; + (* syn_keep = "1", keep = "TRUE", + ROUTE = "{3;1;5vlx50tff1136;1df9e65d!-1;54728;-109288;S!0;-175;-800!1;-484;0!2;-3208;1608!3;-1436;-792!4;0;-8!5;0;-240!5;0;-144!6;404;8!7;404;8!8;843;-136;L!9;683;-696;L!}" *) + wire stg1_out_fall_2s; + + //*************************************************************************** + // Bidirectional I/O + //*************************************************************************** + + IOBUF u_iobuf_dq + ( + .I (dq_out), + .T (dq_oe_n_r), + .IO (ddr_dq), + .O (dq_in) + ); + + //*************************************************************************** + // Write (output) path + //*************************************************************************** + + // on a write, rising edge of DQS corresponds to rising edge of CLK180 + // (aka falling edge of CLK0 -> rising edge DQS). We also know: + // 1. data must be driven 1/4 clk cycle before corresponding DQS edge + // 2. first rising DQS edge driven on falling edge of CLK0 + // 3. rising data must be driven 1/4 cycle before falling edge of CLK0 + // 4. therefore, rising data driven on rising edge of CLK + ODDR # + ( + .SRTYPE("SYNC"), + .DDR_CLK_EDGE("SAME_EDGE") + ) + u_oddr_dq + ( + .Q (dq_out), + .C (clk90), + .CE (1'b1), + .D1 (wr_data_rise), + .D2 (wr_data_fall), + .R (1'b0), + .S (1'b0) + ); + + // make sure output is tri-state during reset (DQ_OE_N_R = 1) + ODDR # + ( + .SRTYPE("ASYNC"), + .DDR_CLK_EDGE("SAME_EDGE") + ) + u_tri_state_dq + ( + .Q (dq_oe_n_r), + .C (clk90), + .CE (1'b1), + .D1 (dq_oe_n[0]), + .D2 (dq_oe_n[1]), + .R (1'b0), + .S (rst90) + ); + + //*************************************************************************** + // Read data capture scheme description: + // Data capture consists of 3 ranks of flops, and a MUX + // 1. Rank 1 ("Stage 1"): IDDR captures delayed DDR DQ from memory using + // delayed DQS. + // - Data is split into 2 SDR streams, one each for rise and fall data. + // - BUFIO (DQS) input inverted to IDDR. IDDR configured in SAME_EDGE + // mode. This means that: (1) Q1 = fall data, Q2 = rise data, + // (2) Both rise and fall data are output on falling edge of DQS - + // rather than rise output being output on one edge of DQS, and fall + // data on the other edge if the IDDR were configured in OPPOSITE_EDGE + // mode. This simplifies Stage 2 capture (only one core clock edge + // used, removing effects of duty-cycle-distortion), and saves one + // fabric flop in Rank 3. + // 2. Rank 2 ("Stage 2"): Fabric flops are used to capture output of first + // rank into FPGA clock (CLK) domain. Each rising/falling SDR stream + // from IDDR is feed into two flops, one clocked off rising and one off + // falling edge of CLK. One of these flops is chosen, with the choice + // being the one that reduces # of DQ/DQS taps necessary to align Stage + // 1 and Stage 2. Same edge is used to capture both rise and fall SDR + // streams. + // 3. Rank 3 ("Stage 3"): Removes half-cycle paths in CLK domain from + // output of Rank 2. This stage, like Stage 2, is clocked by CLK. Note + // that Stage 3 can be expanded to also support SERDES functionality + // 4. Output MUX: Selects whether Stage 1 output is aligned to rising or + // falling edge of CLK (i.e. specifically this selects whether IDDR + // rise/fall output is transfered to rising or falling edge of CLK). + // Implementation: + // 1. Rank 1 is implemented using an IDDR primitive + // 2. Rank 2 is implemented using: + // - An RPM to fix the location of the capture flops near the DQ I/O. + // The exact RPM used depends on which I/O column (left, center, + // right) the DQ I/O is placed at - this affects the optimal location + // of the slice flops (or does it - can we always choose the two + // columns to slices to the immediate right of the I/O to use, no + // matter what the column?). The origin of the RPM must be set in the + // UCF file using the RLOC_ORIGIN constraint (where the original is + // based on the DQ I/O location). + // - Directed Routing Constraints ("DIRT strings") to fix the routing + // to the rank 2 fabric flops. This is done to minimize: (1) total + // route delay (and therefore minimize voltage/temperature-related + // variations), and (2) minimize skew both within each rising and + // falling data net, as well as between the rising and falling nets. + // The exact DIRT string used depends on: (1) which I/O column the + // DQ I/O is placed, and (2) whether the DQ I/O is placed on the + // "Master" or "Slave" I/O of a diff pair (DQ is not differential, but + // the routing will be affected by which of each I/O pair is used) + // 3. Rank 3 is implemented using fabric flops. No LOC or DIRT contraints + // are used, tools are expected to place these and meet PERIOD timing + // without constraints (constraints may be necessary for "full" designs, + // in this case, user may need to add LOC constraints - if this is the + // case, there are no constraints - other than meeting PERIOD timing - + // for rank 3 flops. + //*************************************************************************** + + //*************************************************************************** + // MIG 2.2: Define AREA_GROUP = "DDR_CAPTURE_FFS" contain all RPM flops in + // design. In UCF file, add constraint: + // AREA_GROUP "DDR_CAPTURE_FFS" GROUP = CLOSED; + // This is done to prevent MAP from packing unrelated logic into + // the slices used by the RPMs. Doing so may cause the DIRT strings + // that define the IDDR -> fabric flop routing to later become + // unroutable during PAR because the unrelated logic placed by MAP + // may use routing resources required by the DIRT strings. MAP + // does not currently take into account DIRT strings when placing + // logic + //*************************************************************************** + + // IDELAY to delay incoming data for synchronization purposes + IODELAY # + ( + .DELAY_SRC ("I"), + .IDELAY_TYPE ("VARIABLE"), + .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE), + .IDELAY_VALUE (0), + .ODELAY_VALUE (0) + ) + u_idelay_dq + ( + .DATAOUT (dq_idelay), + .C (clkdiv0), + .CE (dlyce), + .DATAIN (), + .IDATAIN (dq_in), + .INC (dlyinc), + .ODATAIN (), + .RST (dlyrst), + .T () + ); + + //*************************************************************************** + // Rank 1 capture: Use IDDR to generate two SDR outputs + //*************************************************************************** + + // invert clock to IDDR in order to use SAME_EDGE mode (otherwise, we "run + // out of clocks" because DQS is not continuous + assign dq_iddr_clk = ~dqs; + + //*************************************************************************** + // Rank 2 capture: Use fabric flops to capture Rank 1 output. Use RPM and + // DIRT strings here. + // BEL ("Basic Element of Logic") and relative location constraints for + // second stage capture. C + // Varies according: + // (1) I/O column (left, center, right) used + // (2) Which I/O in I/O pair (master, slave) used + //*************************************************************************** + + // Six different cases for the different I/O column, master/slave + // combinations (can't seem to do this using a localparam, which + // would be easier, XST doesn't allow it) + generate + if ((DQ_MS == 1) && (DQ_COL == 0)) begin: gen_stg2_0m + + //***************************************************************** + // master, left + //***************************************************************** + + IDDR # + ( + .DDR_CLK_EDGE ("SAME_EDGE") + ) + u_iddr_dq + ( + .Q1 (stg1_out_fall_0m), + .Q2 (stg1_out_rise_0m), + .C (dq_iddr_clk), + .CE (ce), + .D (dq_idelay), + .R (1'b0), + .S (1'b0) + ); + + //********************************************************* + // Slice #1 (posedge CLK): Used for: + // 1. IDDR transfer to CLK0 rising edge domain ("stg2a") + // 2. stg2 falling edge -> stg3 rising edge transfer + //********************************************************* + + // Stage 2 capture + (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "DFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg2a_fall + ( + .Q (stg2a_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg1_out_fall_0m), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "CFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg2a_rise + ( + .Q (stg2a_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg1_out_rise_0m), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + // Stage 3 falling -> rising edge translation + (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "BFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg3b_fall + ( + .Q (stg3b_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg2b_out_fall), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "AFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg3b_rise + ( + .Q (stg3b_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg2b_out_rise), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + //********************************************************* + // Slice #2 (posedge CLK): Used for: + // 1. IDDR transfer to CLK0 falling edge domain ("stg2b") + //********************************************************* + + (* HU_SET = "stg2_capture", RLOC = "X3Y0", BEL = "DFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE_1 u_ff_stg2b_fall + ( + .Q (stg2b_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg1_out_fall_0m), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + (* HU_SET = "stg2_capture", RLOC = "X3Y0", BEL = "CFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE_1 u_ff_stg2b_rise + ( + .Q (stg2b_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg1_out_rise_0m), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + end else if ((DQ_MS == 0) && (DQ_COL == 0)) begin: gen_stg2_0s + + //***************************************************************** + // slave, left + //***************************************************************** + + IDDR # + ( + .DDR_CLK_EDGE ("SAME_EDGE") + ) + u_iddr_dq + ( + .Q1 (stg1_out_fall_0s), + .Q2 (stg1_out_rise_0s), + .C (dq_iddr_clk), + .CE (ce), + .D (dq_idelay), + .R (1'b0), + .S (1'b0) + ); + + (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "BFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg2a_fall + ( + .Q (stg2a_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg1_out_fall_0s), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "CFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg2a_rise + ( + .Q (stg2a_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg1_out_rise_0s), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "DFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg3b_fall + ( + .Q (stg3b_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg2b_out_fall), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "AFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg3b_rise + ( + .Q (stg3b_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg2b_out_rise), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + (* HU_SET = "stg2_capture", RLOC = "X1Y0", BEL = "AFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE_1 u_ff_stg2b_fall + ( + .Q (stg2b_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg1_out_fall_0s), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X1Y0", BEL = "CFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE_1 u_ff_stg2b_rise + ( + .Q (stg2b_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg1_out_rise_0s), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + end else if ((DQ_MS == 1) && (DQ_COL == 1)) begin: gen_stg2_1m + + //***************************************************************** + // master, center + //***************************************************************** + + IDDR # + ( + .DDR_CLK_EDGE ("SAME_EDGE") + ) + u_iddr_dq + ( + .Q1 (stg1_out_fall_1m), + .Q2 (stg1_out_rise_1m), + .C (dq_iddr_clk), + .CE (ce), + .D (dq_idelay), + .R (1'b0), + .S (1'b0) + ); + + (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "BFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg2a_fall + ( + .Q (stg2a_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg1_out_fall_1m), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "AFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg2a_rise + ( + .Q (stg2a_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg1_out_rise_1m), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "DFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg3b_fall + ( + .Q (stg3b_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg2b_out_fall), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "CFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg3b_rise + ( + .Q (stg3b_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg2b_out_rise), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + (* HU_SET = "stg2_capture", RLOC = "X1Y0", BEL = "AFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE_1 u_ff_stg2b_fall + ( + .Q (stg2b_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg1_out_fall_1m), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X1Y0", BEL = "BFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE_1 u_ff_stg2b_rise + ( + .Q (stg2b_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg1_out_rise_1m), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + end else if ((DQ_MS == 0) && (DQ_COL == 1)) begin: gen_stg2_1s + + //***************************************************************** + // slave, center + //***************************************************************** + + IDDR # + ( + .DDR_CLK_EDGE ("SAME_EDGE") + ) + u_iddr_dq + ( + .Q1 (stg1_out_fall_1s), + .Q2 (stg1_out_rise_1s), + .C (dq_iddr_clk), + .CE (ce), + .D (dq_idelay), + .R (1'b0), + .S (1'b0) + ); + + (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "CFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg2a_fall + ( + .Q (stg2a_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg1_out_fall_1s), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "BFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg2a_rise + ( + .Q (stg2a_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg1_out_rise_1s), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "DFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg3b_fall + ( + .Q (stg3b_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg2b_out_fall), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "AFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg3b_rise + ( + .Q (stg3b_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg2b_out_rise), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + (* HU_SET = "stg2_capture", RLOC = "X3Y0", BEL = "CFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE_1 u_ff_stg2b_fall + ( + .Q (stg2b_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg1_out_fall_1s), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X3Y0", BEL = "BFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE_1 u_ff_stg2b_rise + ( + .Q (stg2b_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg1_out_rise_1s), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + end else if ((DQ_MS == 1) && (DQ_COL == 2)) begin: gen_stg2_2m + + //***************************************************************** + // master, right + //***************************************************************** + + IDDR # + ( + .DDR_CLK_EDGE ("SAME_EDGE") + ) + u_iddr_dq + ( + .Q1 (stg1_out_fall_2m), + .Q2 (stg1_out_rise_2m), + .C (dq_iddr_clk), + .CE (ce), + .D (dq_idelay), + .R (1'b0), + .S (1'b0) + ); + + (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "AFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg2a_fall + ( + .Q (stg2a_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg1_out_fall_2m), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "CFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg2a_rise + ( + .Q (stg2a_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg1_out_rise_2m), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "DFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg3b_fall + ( + .Q (stg3b_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg2b_out_fall), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "BFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg3b_rise + ( + .Q (stg3b_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg2b_out_rise), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + (* HU_SET = "stg2_capture", RLOC = "X1Y0", BEL = "AFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE_1 u_ff_stg2b_fall + ( + .Q (stg2b_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg1_out_fall_2m), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X1Y0", BEL = "CFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE_1 u_ff_stg2b_rise + ( + .Q (stg2b_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg1_out_rise_2m), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + end else if ((DQ_MS == 0) && (DQ_COL == 2)) begin: gen_stg2_2s + + //***************************************************************** + // slave, right + //***************************************************************** + + IDDR # + ( + .DDR_CLK_EDGE ("SAME_EDGE") + ) + u_iddr_dq + ( + .Q1 (stg1_out_fall_2s), + .Q2 (stg1_out_rise_2s), + .C (dq_iddr_clk), + .CE (ce), + .D (dq_idelay), + .R (1'b0), + .S (1'b0) + ); + + (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "BFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg2a_fall + ( + .Q (stg2a_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg1_out_fall_2s), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "DFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg2a_rise + ( + .Q (stg2a_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg1_out_rise_2s), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "CFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg3b_fall + ( + .Q (stg3b_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg2b_out_fall), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "AFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE u_ff_stg3b_rise + ( + .Q (stg3b_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg2b_out_rise), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + (* HU_SET = "stg2_capture", RLOC = "X3Y0", BEL = "AFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE_1 u_ff_stg2b_fall + ( + .Q (stg2b_out_fall), + .C (clk0), + .CE (1'b1), + .D (stg1_out_fall_2s), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + (* HU_SET = "stg2_capture", RLOC = "X3Y0", BEL = "CFF", + AREA_GROUP = "DDR_CAPTURE_FFS" *) + FDRSE_1 u_ff_stg2b_rise + ( + .Q (stg2b_out_rise), + .C (clk0), + .CE (1'b1), + .D (stg1_out_rise_2s), + .R (1'b0), + .S (1'b0) + )/* synthesis syn_preserve = 1 */ + /* synthesis syn_replicate = 0 */; + + end + endgenerate + + //*************************************************************************** + // Second stage flops clocked by posedge CLK0 don't need another layer of + // registering + //*************************************************************************** + + assign stg3a_out_rise = stg2a_out_rise; + assign stg3a_out_fall = stg2a_out_fall; + + //******************************************************************* + + assign rd_data_rise = (rd_data_sel) ? stg3a_out_rise : stg3b_out_rise; + assign rd_data_fall = (rd_data_sel) ? stg3a_out_fall : stg3b_out_fall; + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_dqs_iob.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_dqs_iob.v new file mode 100644 index 0000000..a626f72 --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_dqs_iob.v @@ -0,0 +1,259 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_phy_dqs_iob.v +// /___/ /\ Date Last Modified: $Date: 2008/07/22 15:41:06 $ +// \ \ / \ Date Created: Wed Aug 16 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +// This module places the data strobes in the IOBs. +//Reference: +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_phy_dqs_iob # + ( + parameter DDR_TYPE = 1, + parameter HIGH_PERFORMANCE_MODE = "TRUE" + ) + ( + input clk0, + input clkdiv0, + input rst0, + input dlyinc_dqs, + input dlyce_dqs, + input dlyrst_dqs, + input dlyinc_gate, + input dlyce_gate, + input dlyrst_gate, + input dqs_oe_n, + input dqs_rst_n, + input en_dqs, + inout ddr_dqs, + inout ddr_dqs_n, + output dq_ce, + output delayed_dqs + ); + + wire clk180; + wire dqs_bufio; + + wire dqs_ibuf; + wire dqs_idelay; + wire dqs_oe_n_delay; + wire dqs_oe_n_r; + wire dqs_rst_n_delay; + reg dqs_rst_n_r /* synthesis syn_preserve = 1*/; + wire dqs_out; + wire en_dqs_sync /* synthesis syn_keep = 1 */; + + // for simulation only. Synthesis should ignore this delay + localparam DQS_NET_DELAY = 0.8; + + assign clk180 = ~clk0; + + // add delta delay to inputs clocked by clk180 to avoid delta-delay + // simulation issues + assign dqs_rst_n_delay = dqs_rst_n; + assign dqs_oe_n_delay = dqs_oe_n; + + //*************************************************************************** + // DQS input-side resources: + // - IODELAY (pad -> IDELAY) + // - BUFIO (IDELAY -> BUFIO) + //*************************************************************************** + + // Route DQS from PAD to IDELAY + IODELAY # + ( + .DELAY_SRC("I"), + .IDELAY_TYPE("VARIABLE"), + .HIGH_PERFORMANCE_MODE(HIGH_PERFORMANCE_MODE), + .IDELAY_VALUE(0), + .ODELAY_VALUE(0) + ) + u_idelay_dqs + ( + .DATAOUT (dqs_idelay), + .C (clkdiv0), + .CE (dlyce_dqs), + .DATAIN (), + .IDATAIN (dqs_ibuf), + .INC (dlyinc_dqs), + .ODATAIN (), + .RST (dlyrst_dqs), + .T () + ); + + // From IDELAY to BUFIO + BUFIO u_bufio_dqs + ( + .I (dqs_idelay), + .O (dqs_bufio) + ); + + // To model additional delay of DQS BUFIO + gating network + // for behavioral simulation. Make sure to select a delay number smaller + // than half clock cycle (otherwise output will not track input changes + // because of inertial delay). Duplicate to avoid delta delay issues. + assign #(DQS_NET_DELAY) i_delayed_dqs = dqs_bufio; + assign #(DQS_NET_DELAY) delayed_dqs = dqs_bufio; + + //*************************************************************************** + // DQS gate circuit (not supported for all controllers) + //*************************************************************************** + + // Gate routing: + // en_dqs -> IDELAY -> en_dqs_sync -> IDDR.S -> dq_ce -> + // capture IDDR.CE + + // Delay CE control so that it's in phase with delayed DQS + IODELAY # + ( + .DELAY_SRC ("DATAIN"), + .IDELAY_TYPE ("VARIABLE"), + .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE), + .IDELAY_VALUE (0), + .ODELAY_VALUE (0) + ) + u_iodelay_dq_ce + ( + .DATAOUT (en_dqs_sync), + .C (clkdiv0), + .CE (dlyce_gate), + .DATAIN (en_dqs), + .IDATAIN (), + .INC (dlyinc_gate), + .ODATAIN (), + .RST (dlyrst_gate), + .T () + ); + + // Generate sync'ed CE to DQ IDDR's using an IDDR clocked by DQS + // We could also instantiate a negative-edge SDR flop here + IDDR # + ( + .DDR_CLK_EDGE ("OPPOSITE_EDGE"), + .INIT_Q1 (1'b0), + .INIT_Q2 (1'b0), + .SRTYPE ("ASYNC") + ) + u_iddr_dq_ce + ( + .Q1 (), + .Q2 (dq_ce), // output on falling edge + .C (i_delayed_dqs), + .CE (1'b1), + .D (en_dqs_sync), + .R (1'b0), + .S (en_dqs_sync) + ); + + //*************************************************************************** + // DQS output-side resources + //*************************************************************************** + + // synthesis attribute keep of dqs_rst_n_r is "true" + always @(posedge clk180) + dqs_rst_n_r <= dqs_rst_n_delay; + + ODDR # + ( + .SRTYPE("SYNC"), + .DDR_CLK_EDGE("OPPOSITE_EDGE") + ) + u_oddr_dqs + ( + .Q (dqs_out), + .C (clk180), + .CE (1'b1), + .D1 (dqs_rst_n_r), // keep output deasserted for write preamble + .D2 (1'b0), + .R (1'b0), + .S (1'b0) + ); + + (* IOB = "TRUE" *) FDP u_tri_state_dqs + ( + .D (dqs_oe_n_delay), + .Q (dqs_oe_n_r), + .C (clk180), + .PRE (rst0) + ) /* synthesis syn_useioff = 1 */; + + //*************************************************************************** + + // use either single-ended (for DDR1) or differential (for DDR2) DQS input + + generate + if (DDR_TYPE > 0) begin: gen_dqs_iob_ddr2 + IOBUFDS u_iobuf_dqs + ( + .O (dqs_ibuf), + .IO (ddr_dqs), + .IOB (ddr_dqs_n), + .I (dqs_out), + .T (dqs_oe_n_r) + ); + end else begin: gen_dqs_iob_ddr1 + IOBUF u_iobuf_dqs + ( + .O (dqs_ibuf), + .IO (ddr_dqs), + .I (dqs_out), + .T (dqs_oe_n_r) + ); + end + endgenerate + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_init.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_init.v new file mode 100644 index 0000000..4282b63 --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_init.v @@ -0,0 +1,1150 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_phy_init.v +// /___/ /\ Date Last Modified: $Date: 2008/07/22 15:41:06 $ +// \ \ / \ Date Created: Thu Aug 24 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +//Reference: +// This module is the intialization control logic of the memory interface. +// All commands are issued from here acoording to the burst, CAS Latency and +// the user commands. +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_phy_init # + ( + // Following parameters are for 72-bit RDIMM design (for ML561 Reference + // board design). Actual values may be different. Actual parameters values + // are passed from design top module ddr2_sdram module. Please refer to + // the ddr2_sdram module for actual values. + parameter BANK_WIDTH = 2, + parameter CKE_WIDTH = 1, + parameter COL_WIDTH = 10, + parameter CS_NUM = 1, + parameter DQ_WIDTH = 72, + parameter ODT_WIDTH = 1, + parameter ROW_WIDTH = 14, + parameter ADDITIVE_LAT = 0, + parameter BURST_LEN = 4, + parameter TWO_T_TIME_EN = 0, + parameter BURST_TYPE = 0, + parameter CAS_LAT = 5, + parameter ODT_TYPE = 1, + parameter REDUCE_DRV = 0, + parameter REG_ENABLE = 1, + parameter TWR = 15000, + parameter CLK_PERIOD = 3000, + parameter DDR_TYPE = 1, + parameter SIM_ONLY = 0 + ) + ( + input clk0, + input clkdiv0, + input rst0, + input rstdiv0, + input [3:0] calib_done, + input ctrl_ref_flag, + input calib_ref_req, + output reg [3:0] calib_start, + output reg calib_ref_done, + output reg phy_init_wren, + output reg phy_init_rden, + output [ROW_WIDTH-1:0] phy_init_addr, + output [BANK_WIDTH-1:0] phy_init_ba, + output phy_init_ras_n, + output phy_init_cas_n, + output phy_init_we_n, + output [CS_NUM-1:0] phy_init_cs_n, + output [CKE_WIDTH-1:0] phy_init_cke, + output reg phy_init_done, + output phy_init_data_sel + ); + + // time to wait between consecutive commands in PHY_INIT - this is a + // generic number, and must be large enough to account for worst case + // timing parameter (tRFC - refresh-to-active) across all memory speed + // grades and operating frequencies. Expressed in CLKDIV clock cycles. + localparam CNTNEXT_CMD = 7'b1111111; + // time to wait between read and read or precharge for stage 3 & 4 + // the larger CNTNEXT_CMD can also be used, use smaller number to + // speed up calibration - avoid tRAS violation, and speeds up simulation + localparam CNTNEXT_RD = 4'b1111; + + // Write recovery (WR) time - is defined by + // tWR (in nanoseconds) by tCK (in nanoseconds) and rounding up a + // noninteger value to the next integer + localparam integer WR_RECOVERY = ((TWR + CLK_PERIOD) - 1)/CLK_PERIOD; + + localparam INIT_CAL1_READ = 5'h00; + localparam INIT_CAL2_READ = 5'h01; + localparam INIT_CAL3_READ = 5'h02; + localparam INIT_CAL4_READ = 5'h03; + localparam INIT_CAL1_WRITE = 5'h04; + localparam INIT_CAL2_WRITE = 5'h05; + localparam INIT_CAL3_WRITE = 5'h06; + localparam INIT_DUMMY_ACTIVE_WAIT = 5'h07; + localparam INIT_PRECHARGE = 5'h08; + localparam INIT_LOAD_MODE = 5'h09; + localparam INIT_AUTO_REFRESH = 5'h0A; + localparam INIT_IDLE = 5'h0B; + localparam INIT_CNT_200 = 5'h0C; + localparam INIT_CNT_200_WAIT = 5'h0D; + localparam INIT_PRECHARGE_WAIT = 5'h0E; + localparam INIT_MODE_REGISTER_WAIT = 5'h0F; + localparam INIT_AUTO_REFRESH_WAIT = 5'h10; + localparam INIT_DEEP_MEMORY_ST = 5'h11; + localparam INIT_DUMMY_ACTIVE = 5'h12; + localparam INIT_CAL1_WRITE_READ = 5'h13; + localparam INIT_CAL1_READ_WAIT = 5'h14; + localparam INIT_CAL2_WRITE_READ = 5'h15; + localparam INIT_CAL2_READ_WAIT = 5'h16; + localparam INIT_CAL3_WRITE_READ = 5'h17; + localparam INIT_CAL3_READ_WAIT = 5'h18; + localparam INIT_CAL4_READ_WAIT = 5'h19; + localparam INIT_CALIB_REF = 5'h1A; + localparam INIT_ZQCL = 5'h1B; + localparam INIT_WAIT_DLLK_ZQINIT = 5'h1C; + + localparam INIT_CNTR_INIT = 4'h0; + localparam INIT_CNTR_PRECH_1 = 4'h1; + localparam INIT_CNTR_EMR2_INIT = 4'h2; + localparam INIT_CNTR_EMR3_INIT = 4'h3; + localparam INIT_CNTR_EMR_EN_DLL = 4'h4; + localparam INIT_CNTR_MR_RST_DLL = 4'h5; + localparam INIT_CNTR_CNT_200_WAIT = 4'h6; + localparam INIT_CNTR_PRECH_2 = 4'h7; + localparam INIT_CNTR_AR_1 = 4'h8; + localparam INIT_CNTR_AR_2 = 4'h9; + localparam INIT_CNTR_MR_ACT_DLL = 4'hA; + localparam INIT_CNTR_EMR_DEF_OCD = 4'hB; + localparam INIT_CNTR_EMR_EXIT_OCD = 4'hC; + localparam INIT_CNTR_DEEP_MEM = 4'hD; + localparam INIT_CNTR_PRECH_3 = 4'hE; + localparam INIT_CNTR_DONE = 4'hF; + + localparam DDR1 = 0; + localparam DDR2 = 1; + localparam DDR3 = 2; + + + reg [1:0] burst_addr_r; + reg [1:0] burst_cnt_r; + wire [1:0] burst_val; + wire cal_read; + wire cal_write; + wire cal_write_read; + reg cal1_started_r; + reg cal2_started_r; + reg cal4_started_r; + reg [3:0] calib_done_r; + reg calib_ref_req_posedge; + reg calib_ref_req_r; + reg [15:0] calib_start_shift0_r; + reg [15:0] calib_start_shift1_r; + reg [15:0] calib_start_shift2_r; + reg [15:0] calib_start_shift3_r; + reg [1:0] chip_cnt_r; + reg [4:0] cke_200us_cnt_r; + reg cke_200us_cnt_en_r; + reg [7:0] cnt_200_cycle_r; + reg cnt_200_cycle_done_r; + reg [6:0] cnt_cmd_r; + reg cnt_cmd_ok_r; + reg [3:0] cnt_rd_r; + reg cnt_rd_ok_r; + reg ctrl_ref_flag_r; + reg done_200us_r; + reg [ROW_WIDTH-1:0] ddr_addr_r; + reg [ROW_WIDTH-1:0] ddr_addr_r1; + reg [BANK_WIDTH-1:0] ddr_ba_r; + reg [BANK_WIDTH-1:0] ddr_ba_r1; + reg ddr_cas_n_r; + reg ddr_cas_n_r1; + reg [CKE_WIDTH-1:0] ddr_cke_r; + reg [CS_NUM-1:0] ddr_cs_n_r; + reg [CS_NUM-1:0] ddr_cs_n_r1; + reg [CS_NUM-1:0] ddr_cs_disable_r; + reg ddr_ras_n_r; + reg ddr_ras_n_r1; + reg ddr_we_n_r; + reg ddr_we_n_r1; + wire [15:0] ext_mode_reg; + reg [3:0] init_cnt_r; + reg init_done_r; + reg [4:0] init_next_state; + reg [4:0] init_state_r; + reg [4:0] init_state_r1; + reg [4:0] init_state_r2; + wire [15:0] load_mode_reg; + wire [15:0] load_mode_reg0; + wire [15:0] load_mode_reg1; + wire [15:0] load_mode_reg2; + wire [15:0] load_mode_reg3; + reg phy_init_done_r; + reg phy_init_done_r1; + reg phy_init_done_r2; + reg phy_init_done_r3; + reg refresh_req; + wire [3:0] start_cal; + + //*************************************************************************** + + //***************************************************************** + // DDR1 and DDR2 Load mode register + // Mode Register (MR): + // [15:14] - unused - 00 + // [13] - reserved - 0 + // [12] - Power-down mode - 0 (normal) + // [11:9] - write recovery - for Auto Precharge (tWR/tCK) + // [8] - DLL reset - 0 or 1 + // [7] - Test Mode - 0 (normal) + // [6:4] - CAS latency - CAS_LAT + // [3] - Burst Type - BURST_TYPE + // [2:0] - Burst Length - BURST_LEN + //***************************************************************** + + generate + if (DDR_TYPE == DDR2) begin: gen_load_mode_reg_ddr2 + assign load_mode_reg[2:0] = (BURST_LEN == 8) ? 3'b011 : + ((BURST_LEN == 4) ? 3'b010 : 3'b111); + assign load_mode_reg[3] = BURST_TYPE; + assign load_mode_reg[6:4] = (CAS_LAT == 3) ? 3'b011 : + ((CAS_LAT == 4) ? 3'b100 : + ((CAS_LAT == 5) ? 3'b101 : 3'b111)); + assign load_mode_reg[7] = 1'b0; + assign load_mode_reg[8] = 1'b0; // init value only (DLL not reset) + assign load_mode_reg[11:9] = (WR_RECOVERY == 6) ? 3'b101 : + ((WR_RECOVERY == 5) ? 3'b100 : + ((WR_RECOVERY == 4) ? 3'b011 : + ((WR_RECOVERY == 3) ? 3'b010 : + 3'b001))); + assign load_mode_reg[15:12] = 4'b000; + end else if (DDR_TYPE == DDR1)begin: gen_load_mode_reg_ddr1 + assign load_mode_reg[2:0] = (BURST_LEN == 8) ? 3'b011 : + ((BURST_LEN == 4) ? 3'b010 : + ((BURST_LEN == 2) ? 3'b001 : 3'b111)); + assign load_mode_reg[3] = BURST_TYPE; + assign load_mode_reg[6:4] = (CAS_LAT == 2) ? 3'b010 : + ((CAS_LAT == 3) ? 3'b011 : + ((CAS_LAT == 25) ? 3'b110 : 3'b111)); + assign load_mode_reg[12:7] = 6'b000000; // init value only + assign load_mode_reg[15:13] = 3'b000; + end + endgenerate + + //***************************************************************** + // DDR1 and DDR2 ext mode register + // Extended Mode Register (MR): + // [15:14] - unused - 00 + // [13] - reserved - 0 + // [12] - output enable - 0 (enabled) + // [11] - RDQS enable - 0 (disabled) + // [10] - DQS# enable - 0 (enabled) + // [9:7] - OCD Program - 111 or 000 (first 111, then 000 during init) + // [6] - RTT[1] - RTT[1:0] = 0(no ODT), 1(75), 2(150), 3(50) + // [5:3] - Additive CAS - ADDITIVE_CAS + // [2] - RTT[0] + // [1] - Output drive - REDUCE_DRV (= 0(full), = 1 (reduced) + // [0] - DLL enable - 0 (normal) + //***************************************************************** + + generate + if (DDR_TYPE == DDR2) begin: gen_ext_mode_reg_ddr2 + assign ext_mode_reg[0] = 1'b0; + assign ext_mode_reg[1] = REDUCE_DRV; + assign ext_mode_reg[2] = ((ODT_TYPE == 1) || (ODT_TYPE == 3)) ? + 1'b1 : 1'b0; + assign ext_mode_reg[5:3] = (ADDITIVE_LAT == 0) ? 3'b000 : + ((ADDITIVE_LAT == 1) ? 3'b001 : + ((ADDITIVE_LAT == 2) ? 3'b010 : + ((ADDITIVE_LAT == 3) ? 3'b011 : + ((ADDITIVE_LAT == 4) ? 3'b100 : + 3'b111)))); + assign ext_mode_reg[6] = ((ODT_TYPE == 2) || (ODT_TYPE == 3)) ? + 1'b1 : 1'b0; + assign ext_mode_reg[9:7] = 3'b000; + assign ext_mode_reg[10] = 1'b0; + assign ext_mode_reg[15:10] = 6'b000000; + end else if (DDR_TYPE == DDR1)begin: gen_ext_mode_reg_ddr1 + assign ext_mode_reg[0] = 1'b0; + assign ext_mode_reg[1] = REDUCE_DRV; + assign ext_mode_reg[12:2] = 11'b00000000000; + assign ext_mode_reg[15:13] = 3'b000; + end + endgenerate + + //***************************************************************** + // DDR3 Load mode reg0 + // Mode Register (MR0): + // [15:13] - unused - 000 + // [12] - Precharge Power-down DLL usage - 0 (DLL frozen, slow-exit), + // 1 (DLL maintained) + // [11:9] - write recovery for Auto Precharge (tWR/tCK = 6) + // [8] - DLL reset - 0 or 1 + // [7] - Test Mode - 0 (normal) + // [6:4],[2] - CAS latency - CAS_LAT + // [3] - Burst Type - BURST_TYPE + // [1:0] - Burst Length - BURST_LEN + //***************************************************************** + + generate + if (DDR_TYPE == DDR3) begin: gen_load_mode_reg0_ddr3 + assign load_mode_reg0[1:0] = (BURST_LEN == 8) ? 2'b00 : + ((BURST_LEN == 4) ? 2'b10 : 2'b11); + // Part of CAS latency. This bit is '0' for all CAS latencies + assign load_mode_reg0[2] = 1'b0; + assign load_mode_reg0[3] = BURST_TYPE; + assign load_mode_reg0[6:4] = (CAS_LAT == 5) ? 3'b001 : + (CAS_LAT == 6) ? 3'b010 : 3'b111; + assign load_mode_reg0[7] = 1'b0; + // init value only (DLL reset) + assign load_mode_reg0[8] = 1'b1; + assign load_mode_reg0[11:9] = 3'b010; + // Precharge Power-Down DLL 'slow-exit' + assign load_mode_reg0[12] = 1'b0; + assign load_mode_reg0[15:13] = 3'b000; + end + endgenerate + + //***************************************************************** + // DDR3 Load mode reg1 + // Mode Register (MR1): + // [15:13] - unused - 00 + // [12] - output enable - 0 (enabled for DQ, DQS, DQS#) + // [11] - TDQS enable - 0 (TDQS disabled and DM enabled) + // [10] - reserved - 0 (must be '0') + // [9] - RTT[2] - 0 + // [8] - reserved - 0 (must be '0') + // [7] - write leveling - 0 (disabled), 1 (enabled) + // [6] - RTT[1] - RTT[1:0] = 0(no ODT), 1(75), 2(150), 3(50) + // [5] - Output driver impedance[1] - 0 (RZQ/6 and RZQ/7) + // [4:3] - Additive CAS - ADDITIVE_CAS + // [2] - RTT[0] + // [1] - Output driver impedance[0] - 0(RZQ/6), or 1 (RZQ/7) + // [0] - DLL enable - 0 (normal) + //***************************************************************** + + generate + if (DDR_TYPE == DDR3) begin: gen_ext_mode_reg1_ddr3 + // DLL enabled during Imitialization + assign load_mode_reg1[0] = 1'b0; + // RZQ/6 + assign load_mode_reg1[1] = REDUCE_DRV; + assign load_mode_reg1[2] = ((ODT_TYPE == 1) || (ODT_TYPE == 3)) ? + 1'b1 : 1'b0; + assign load_mode_reg1[4:3] = (ADDITIVE_LAT == 0) ? 2'b00 : + ((ADDITIVE_LAT == 1) ? 2'b01 : + ((ADDITIVE_LAT == 2) ? 2'b10 : + 3'b111)); + // RZQ/6 + assign load_mode_reg1[5] = 1'b0; + assign load_mode_reg1[6] = ((ODT_TYPE == 2) || (ODT_TYPE == 3)) ? + 1'b1 : 1'b0; + // Make zero WRITE_LEVEL + assign load_mode_reg1[7] = 0; + assign load_mode_reg1[8] = 1'b0; + assign load_mode_reg1[9] = 1'b0; + assign load_mode_reg1[10] = 1'b0; + assign load_mode_reg1[15:11] = 5'b00000; + end + endgenerate + + //***************************************************************** + // DDR3 Load mode reg2 + // Mode Register (MR2): + // [15:11] - unused - 00 + // [10:9] - RTT_WR - 00 (Dynamic ODT off) + // [8] - reserved - 0 (must be '0') + // [7] - self-refresh temperature range - + // 0 (normal), 1 (extended) + // [6] - Auto Self-Refresh - 0 (manual), 1(auto) + // [5:3] - CAS Write Latency (CWL) - + // 000 (5 for 400 MHz device), + // 001 (6 for 400 MHz to 533 MHz devices), + // 010 (7 for 533 MHz to 667 MHz devices), + // 011 (8 for 667 MHz to 800 MHz) + // [2:0] - Partial Array Self-Refresh (Optional) - + // 000 (full array) + //***************************************************************** + + generate + if (DDR_TYPE == DDR3) begin: gen_ext_mode_reg2_ddr3 + assign load_mode_reg2[2:0] = 3'b000; + assign load_mode_reg2[5:3] = (CAS_LAT == 5) ? 3'b000 : + (CAS_LAT == 6) ? 3'b001 : 3'b111; + assign load_mode_reg2[6] = 1'b0; // Manual Self-Refresh + assign load_mode_reg2[7] = 1'b0; + assign load_mode_reg2[8] = 1'b0; + assign load_mode_reg2[10:9] = 2'b00; + assign load_mode_reg2[15:11] = 5'b00000; + end + endgenerate + + //***************************************************************** + // DDR3 Load mode reg3 + // Mode Register (MR3): + // [15:3] - unused - All zeros + // [2] - MPR Operation - 0(normal operation), 1(data flow from MPR) + // [1:0] - MPR location - 00 (Predefined pattern) + //***************************************************************** + + generate + if (DDR_TYPE == DDR3)begin: gen_ext_mode_reg3_ddr3 + assign load_mode_reg3[1:0] = 2'b00; + assign load_mode_reg3[2] = 1'b0; + assign load_mode_reg3[15:3] = 13'b0000000000000; + end + endgenerate + + //*************************************************************************** + // Logic for calibration start, and for auto-refresh during cal request + // CALIB_REF_REQ is used by calibration logic to request auto-refresh + // durign calibration (used to avoid tRAS violation is certain calibration + // stages take a long time). Once the auto-refresh is complete and cal can + // be resumed, CALIB_REF_DONE is asserted by PHY_INIT. + //*************************************************************************** + + // generate pulse for each of calibration start controls + assign start_cal[0] = ((init_state_r1 == INIT_CAL1_READ) && + (init_state_r2 != INIT_CAL1_READ)); + assign start_cal[1] = ((init_state_r1 == INIT_CAL2_READ) && + (init_state_r2 != INIT_CAL2_READ)); + assign start_cal[2] = ((init_state_r1 == INIT_CAL3_READ) && + (init_state_r2 == INIT_CAL3_WRITE_READ)); + assign start_cal[3] = ((init_state_r1 == INIT_CAL4_READ) && + (init_state_r2 == INIT_DUMMY_ACTIVE_WAIT)); + + // Generate positive-edge triggered, latched signal to force initialization + // to pause calibration, and to issue auto-refresh. Clear flag as soon as + // refresh initiated + always @(posedge clkdiv0) + if (rstdiv0) begin + calib_ref_req_r <= 1'b0; + calib_ref_req_posedge <= 1'b0; + refresh_req <= 1'b0; + end else begin + calib_ref_req_r <= calib_ref_req; + calib_ref_req_posedge <= calib_ref_req & ~calib_ref_req_r; + if (init_state_r1 == INIT_AUTO_REFRESH) + refresh_req <= 1'b0; + else if (calib_ref_req_posedge) + refresh_req <= 1'b1; + end + + // flag to tell cal1 calibration was started. + // This flag is used for cal1 auto refreshes + // some of these bits may not be needed - only needed for those stages that + // need refreshes within the stage (i.e. very long stages) + always @(posedge clkdiv0) + if (rstdiv0) begin + cal1_started_r <= 1'b0; + cal2_started_r <= 1'b0; + cal4_started_r <= 1'b0; + end else begin + if (calib_start[0]) + cal1_started_r <= 1'b1; + if (calib_start[1]) + cal2_started_r <= 1'b1; + if (calib_start[3]) + cal4_started_r <= 1'b1; + end + + // Delay start of each calibration by 16 clock cycles to + // ensure that when calibration logic begins, that read data is already + // appearing on the bus. Don't really need it, it's more for simulation + // purposes. Each circuit should synthesize using an SRL16. + // In first stage of calibration periodic auto refreshes + // will be issued to meet memory timing. calib_start_shift0_r[15] will be + // asserted more than once.calib_start[0] is anded with cal1_started_r so + // that it is asserted only once. cal1_refresh_done is anded with + // cal1_started_r so that it is asserted after the auto refreshes. + always @(posedge clkdiv0) begin + calib_start_shift0_r <= {calib_start_shift0_r[14:0], start_cal[0]}; + calib_start_shift1_r <= {calib_start_shift1_r[14:0], start_cal[1]}; + calib_start_shift2_r <= {calib_start_shift2_r[14:0], start_cal[2]}; + calib_start_shift3_r <= {calib_start_shift3_r[14:0], start_cal[3]}; + calib_start[0] <= calib_start_shift0_r[15] & ~cal1_started_r; + calib_start[1] <= calib_start_shift1_r[15] & ~cal2_started_r; + calib_start[2] <= calib_start_shift2_r[15]; + calib_start[3] <= calib_start_shift3_r[15] & ~cal4_started_r; + calib_ref_done <= calib_start_shift0_r[15] | + calib_start_shift1_r[15] | + calib_start_shift3_r[15]; + end + + // generate delay for various states that require it (no maximum delay + // requirement, make sure that terminal count is large enough to cover + // all cases) + always @(posedge clkdiv0) begin + case (init_state_r) + INIT_PRECHARGE_WAIT, + INIT_MODE_REGISTER_WAIT, + INIT_AUTO_REFRESH_WAIT, + INIT_DUMMY_ACTIVE_WAIT, + INIT_CAL1_WRITE_READ, + INIT_CAL1_READ_WAIT, + INIT_CAL2_WRITE_READ, + INIT_CAL2_READ_WAIT, + INIT_CAL3_WRITE_READ: + cnt_cmd_r <= cnt_cmd_r + 1; + default: + cnt_cmd_r <= 7'b0000000; + endcase + end + + // assert when count reaches the value + always @(posedge clkdiv0) begin + if(cnt_cmd_r == CNTNEXT_CMD) + cnt_cmd_ok_r <= 1'b1; + else + cnt_cmd_ok_r <= 1'b0; + end + + always @(posedge clkdiv0) begin + case (init_state_r) + INIT_CAL3_READ_WAIT, + INIT_CAL4_READ_WAIT: + cnt_rd_r <= cnt_rd_r + 1; + default: + cnt_rd_r <= 4'b0000; + endcase + end + + always @(posedge clkdiv0) begin + if(cnt_rd_r == CNTNEXT_RD) + cnt_rd_ok_r <= 1'b1; + else + cnt_rd_ok_r <= 1'b0; + end + + //*************************************************************************** + // Initial delay after power-on + //*************************************************************************** + + // register the refresh flag from the controller. + // The refresh flag is in full frequency domain - so a pulsed version must + // be generated for half freq domain using 2 consecutive full clk cycles + // The registered version is used for the 200us counter + always @(posedge clk0) + ctrl_ref_flag_r <= ctrl_ref_flag; + always @(posedge clkdiv0) + cke_200us_cnt_en_r <= ctrl_ref_flag || ctrl_ref_flag_r; + + // 200us counter for cke + always @(posedge clkdiv0) + if (rstdiv0) begin + // skip power-up count if only simulating + if (SIM_ONLY) + cke_200us_cnt_r <= 5'b00001; + else + cke_200us_cnt_r <= 5'd27; + end else if (cke_200us_cnt_en_r) + cke_200us_cnt_r <= cke_200us_cnt_r - 1; + + always @(posedge clkdiv0) + if (rstdiv0) + done_200us_r <= 1'b0; + else if (!done_200us_r) + done_200us_r <= (cke_200us_cnt_r == 5'b00000); + + // 200 clocks counter - count value : h'64 required for initialization + // Counts 100 divided by two clocks + always @(posedge clkdiv0) + if (rstdiv0 || (init_state_r == INIT_CNT_200)) + cnt_200_cycle_r <= 8'h64; + else if (init_state_r == INIT_ZQCL) // ddr3 + cnt_200_cycle_r <= 8'hC8; + else if (cnt_200_cycle_r != 8'h00) + cnt_200_cycle_r <= cnt_200_cycle_r - 1; + + always @(posedge clkdiv0) + if (rstdiv0 || (init_state_r == INIT_CNT_200) + || (init_state_r == INIT_ZQCL)) + cnt_200_cycle_done_r <= 1'b0; + else if (cnt_200_cycle_r == 8'h00) + cnt_200_cycle_done_r <= 1'b1; + + //***************************************************************** + // handle deep memory configuration: + // During initialization: Repeat initialization sequence once for each + // chip select. Note that we could perform initalization for all chip + // selects simulataneously. Probably fine - any potential SI issues with + // auto refreshing all chip selects at once? + // Once initialization complete, assert only CS[0] for calibration. + //***************************************************************** + + always @(posedge clkdiv0) + if (rstdiv0) begin + chip_cnt_r <= 2'b00; + end else if (init_state_r == INIT_DEEP_MEMORY_ST) begin + if (chip_cnt_r != CS_NUM) + chip_cnt_r <= chip_cnt_r + 1; + else + chip_cnt_r <= 2'b00; + end + + always @(posedge clkdiv0) + if (rstdiv0) begin + ddr_cs_n_r <= {CS_NUM{1'b1}}; + end else begin + ddr_cs_n_r <= {CS_NUM{1'b1}}; + if ((init_state_r == INIT_DUMMY_ACTIVE) || + (init_state_r == INIT_PRECHARGE) || + (init_state_r == INIT_LOAD_MODE) || + (init_state_r == INIT_AUTO_REFRESH) || + (init_state_r == INIT_ZQCL ) || + (((init_state_r == INIT_CAL1_READ) || + (init_state_r == INIT_CAL2_READ) || + (init_state_r == INIT_CAL3_READ) || + (init_state_r == INIT_CAL4_READ) || + (init_state_r == INIT_CAL1_WRITE) || + (init_state_r == INIT_CAL2_WRITE) || + (init_state_r == INIT_CAL3_WRITE)) && (burst_cnt_r == 2'b00))) + ddr_cs_n_r[chip_cnt_r] <= 1'b0; + else + ddr_cs_n_r[chip_cnt_r] <= 1'b1; + end + + //*************************************************************************** + // Write/read burst logic + //*************************************************************************** + + assign cal_write = ((init_state_r == INIT_CAL1_WRITE) || + (init_state_r == INIT_CAL2_WRITE) || + (init_state_r == INIT_CAL3_WRITE)); + assign cal_read = ((init_state_r == INIT_CAL1_READ) || + (init_state_r == INIT_CAL2_READ) || + (init_state_r == INIT_CAL3_READ) || + (init_state_r == INIT_CAL4_READ)); + assign cal_write_read = ((init_state_r == INIT_CAL1_READ) || + (init_state_r == INIT_CAL2_READ) || + (init_state_r == INIT_CAL3_READ) || + (init_state_r == INIT_CAL4_READ) || + (init_state_r == INIT_CAL1_WRITE) || + (init_state_r == INIT_CAL2_WRITE) || + (init_state_r == INIT_CAL3_WRITE)); + + assign burst_val = (BURST_LEN == 4) ? 2'b00 : + (BURST_LEN == 8) ? 2'b01 : 2'b00; + + // keep track of current address - need this if burst length < 8 for + // stage 2-4 calibration writes and reads. Make sure value always gets + // initialized to 0 before we enter write/read state. This is used to + // keep track of when another burst must be issued + always @(posedge clkdiv0) + if (cal_write_read) + burst_addr_r <= burst_addr_r + 2; + else + burst_addr_r <= 2'b00; + + // write/read burst count + always @(posedge clkdiv0) + if (cal_write_read) + if (burst_cnt_r == 2'b00) + burst_cnt_r <= burst_val; + else // SHOULD THIS BE -2 CHECK THIS LOGIC + burst_cnt_r <= burst_cnt_r - 1; + else + burst_cnt_r <= 2'b00; + + // indicate when a write is occurring + always @(posedge clkdiv0) + // MIG 2.1: Remove (burst_addr_r<4) term - not used + // phy_init_wren <= cal_write && (burst_addr_r < 3'd4); + phy_init_wren <= cal_write; + + // used for read enable calibration, pulse to indicate when read issued + always @(posedge clkdiv0) + // MIG 2.1: Remove (burst_addr_r<4) term - not used + // phy_init_rden <= cal_read && (burst_addr_r < 3'd4); + phy_init_rden <= cal_read; + + //*************************************************************************** + // Initialization state machine + //*************************************************************************** + + always @(posedge clkdiv0) + // every time we need to initialize another rank of memory, need to + // reset init count, and repeat the entire initialization (but not + // calibration) sequence + if (rstdiv0 || (init_state_r == INIT_DEEP_MEMORY_ST)) + init_cnt_r <= INIT_CNTR_INIT; + else if ((DDR_TYPE == DDR1) && (init_state_r == INIT_PRECHARGE) && + (init_cnt_r == INIT_CNTR_PRECH_1)) + // skip EMR(2) and EMR(3) register loads + init_cnt_r <= INIT_CNTR_EMR_EN_DLL; + else if ((DDR_TYPE == DDR1) && (init_state_r == INIT_LOAD_MODE) && + (init_cnt_r == INIT_CNTR_MR_ACT_DLL)) + // skip OCD calibration for DDR1 + init_cnt_r <= INIT_CNTR_DEEP_MEM; + else if ((DDR_TYPE == DDR3) && (init_state_r == INIT_ZQCL)) + // skip states for DDR3 + init_cnt_r <= INIT_CNTR_DEEP_MEM; + else if ((init_state_r == INIT_LOAD_MODE) || + ((init_state_r == INIT_PRECHARGE) + && (init_state_r1 != INIT_CALIB_REF))|| + ((init_state_r == INIT_AUTO_REFRESH) + && (~init_done_r))|| + (init_state_r == INIT_CNT_200)) + init_cnt_r <= init_cnt_r + 1; + + always @(posedge clkdiv0) begin + if ((init_state_r == INIT_IDLE) && (init_cnt_r == INIT_CNTR_DONE)) begin + phy_init_done_r <= 1'b1; + end else + phy_init_done_r <= 1'b0; + end + + // phy_init_done to the controller and the user interface. + // It is delayed by four clocks to account for the + // multi cycle path constraint to the (phy_init_data_sel) + // to the phy layer. + always @(posedge clkdiv0)begin + phy_init_done_r1 <= phy_init_done_r; + phy_init_done_r2 <= phy_init_done_r1; + phy_init_done_r3 <= phy_init_done_r2; + phy_init_done <= phy_init_done_r3; + end + + // Instantiate primitive to allow this flop to be attached to multicycle + // path constraint in UCF. This signal goes to PHY_WRITE and PHY_CTL_IO + // datapath logic only. Because it is a multi-cycle path, it can be + // clocked by either CLKDIV0 or CLK0. + FDRSE u_ff_phy_init_data_sel + ( + .Q (phy_init_data_sel), + .C (clkdiv0), + .CE (1'b1), + .D (phy_init_done_r1), + .R (1'b0), + .S (1'b0) + ) /* synthesis syn_preserve=1 */ + /* synthesis syn_replicate = 0 */; + + //synthesis translate_off + always @(posedge calib_done[0]) + $display ("First Stage Calibration completed at time %t", $time); + + always @(posedge calib_done[1]) + $display ("Second Stage Calibration completed at time %t", $time); + + always @(posedge calib_done[2]) begin + $display ("Third Stage Calibration completed at time %t", $time); + end + + always @(posedge calib_done[3]) begin + $display ("Fourth Stage Calibration completed at time %t", $time); + $display ("Calibration completed at time %t", $time); + end + //synthesis translate_on + + always @(posedge clkdiv0) begin + if ((init_cnt_r >= INIT_CNTR_DEEP_MEM))begin + init_done_r <= 1'b1; + end else + init_done_r <= 1'b0; + end + + //***************************************************************** + + always @(posedge clkdiv0) + if (rstdiv0) begin + init_state_r <= INIT_IDLE; + init_state_r1 <= INIT_IDLE; + init_state_r2 <= INIT_IDLE; + calib_done_r <= 4'b0000; + end else begin + init_state_r <= init_next_state; + init_state_r1 <= init_state_r; + init_state_r2 <= init_state_r1; + calib_done_r <= calib_done; // register for timing + end + + always @(*) begin + init_next_state = init_state_r; + (* full_case, parallel_case *) case (init_state_r) + INIT_IDLE: begin + if (done_200us_r) begin + (* parallel_case *) case (init_cnt_r) + INIT_CNTR_INIT: + init_next_state = INIT_CNT_200; + INIT_CNTR_PRECH_1: + init_next_state = INIT_PRECHARGE; + INIT_CNTR_EMR2_INIT: + init_next_state = INIT_LOAD_MODE; // EMR(2) + INIT_CNTR_EMR3_INIT: + init_next_state = INIT_LOAD_MODE; // EMR(3); + INIT_CNTR_EMR_EN_DLL: + init_next_state = INIT_LOAD_MODE; // EMR, enable DLL + INIT_CNTR_MR_RST_DLL: + init_next_state = INIT_LOAD_MODE; // MR, reset DLL + INIT_CNTR_CNT_200_WAIT:begin + if(DDR_TYPE == DDR3) + init_next_state = INIT_ZQCL; // DDR3 + else + // Wait 200cc after reset DLL + init_next_state = INIT_CNT_200; + end + INIT_CNTR_PRECH_2: + init_next_state = INIT_PRECHARGE; + INIT_CNTR_AR_1: + init_next_state = INIT_AUTO_REFRESH; + INIT_CNTR_AR_2: + init_next_state = INIT_AUTO_REFRESH; + INIT_CNTR_MR_ACT_DLL: + init_next_state = INIT_LOAD_MODE; // MR, unreset DLL + INIT_CNTR_EMR_DEF_OCD: + init_next_state = INIT_LOAD_MODE; // EMR, OCD default + INIT_CNTR_EMR_EXIT_OCD: + init_next_state = INIT_LOAD_MODE; // EMR, enable OCD exit + INIT_CNTR_DEEP_MEM: begin + if ((chip_cnt_r < CS_NUM-1)) + init_next_state = INIT_DEEP_MEMORY_ST; + else if (cnt_200_cycle_done_r) + init_next_state = INIT_DUMMY_ACTIVE; + else + init_next_state = INIT_IDLE; + end + INIT_CNTR_PRECH_3: + init_next_state = INIT_PRECHARGE; + INIT_CNTR_DONE: + init_next_state = INIT_IDLE; + default : + init_next_state = INIT_IDLE; + endcase + end + end + INIT_CNT_200: + init_next_state = INIT_CNT_200_WAIT; + INIT_CNT_200_WAIT: + if (cnt_200_cycle_done_r) + init_next_state = INIT_IDLE; + INIT_PRECHARGE: + init_next_state = INIT_PRECHARGE_WAIT; + INIT_PRECHARGE_WAIT: + if (cnt_cmd_ok_r)begin + if (init_done_r && (!(&calib_done_r))) + init_next_state = INIT_AUTO_REFRESH; + else + init_next_state = INIT_IDLE; + end + INIT_ZQCL: + init_next_state = INIT_WAIT_DLLK_ZQINIT; + INIT_WAIT_DLLK_ZQINIT: + if (cnt_200_cycle_done_r) + init_next_state = INIT_IDLE; + INIT_LOAD_MODE: + init_next_state = INIT_MODE_REGISTER_WAIT; + INIT_MODE_REGISTER_WAIT: + if (cnt_cmd_ok_r) + init_next_state = INIT_IDLE; + INIT_AUTO_REFRESH: + init_next_state = INIT_AUTO_REFRESH_WAIT; + INIT_AUTO_REFRESH_WAIT: + if (cnt_cmd_ok_r)begin + if(init_done_r) + init_next_state = INIT_DUMMY_ACTIVE; + else + init_next_state = INIT_IDLE; + end + INIT_DEEP_MEMORY_ST: + init_next_state = INIT_IDLE; + // single row activate. All subsequent calibration writes and + // read will take place in this row + INIT_DUMMY_ACTIVE: + init_next_state = INIT_DUMMY_ACTIVE_WAIT; + INIT_DUMMY_ACTIVE_WAIT: + if (cnt_cmd_ok_r)begin + if (~calib_done_r[0]) begin + // if returning to stg1 after refresh, don't need to write + if (cal1_started_r) + init_next_state = INIT_CAL1_READ; + // if first entering stg1, need to write training pattern + else + init_next_state = INIT_CAL1_WRITE; + end else if (~calib_done[1]) begin + if (cal2_started_r) + init_next_state = INIT_CAL2_READ; + else + init_next_state = INIT_CAL2_WRITE; + end else if (~calib_done_r[2]) + init_next_state = INIT_CAL3_WRITE; + else + init_next_state = INIT_CAL4_READ; + end + // Stage 1 calibration (write and continuous read) + INIT_CAL1_WRITE: + if (burst_addr_r == 2'b10) + init_next_state = INIT_CAL1_WRITE_READ; + INIT_CAL1_WRITE_READ: + if (cnt_cmd_ok_r) + init_next_state = INIT_CAL1_READ; + INIT_CAL1_READ: + // Stage 1 requires inter-stage auto-refresh + if (calib_done_r[0] || refresh_req) + init_next_state = INIT_CAL1_READ_WAIT; + INIT_CAL1_READ_WAIT: + if (cnt_cmd_ok_r) + init_next_state = INIT_CALIB_REF; + // Stage 2 calibration (write and continuous read) + INIT_CAL2_WRITE: + if (burst_addr_r == 2'b10) + init_next_state = INIT_CAL2_WRITE_READ; + INIT_CAL2_WRITE_READ: + if (cnt_cmd_ok_r) + init_next_state = INIT_CAL2_READ; + INIT_CAL2_READ: + // Stage 2 requires inter-stage auto-refresh + if (calib_done_r[1] || refresh_req) + init_next_state = INIT_CAL2_READ_WAIT; + INIT_CAL2_READ_WAIT: + if(cnt_cmd_ok_r) + init_next_state = INIT_CALIB_REF; + // Stage 3 calibration (write and continuous read) + INIT_CAL3_WRITE: + if (burst_addr_r == 2'b10) + init_next_state = INIT_CAL3_WRITE_READ; + INIT_CAL3_WRITE_READ: + if (cnt_cmd_ok_r) + init_next_state = INIT_CAL3_READ; + INIT_CAL3_READ: + if (burst_addr_r == 2'b10) + init_next_state = INIT_CAL3_READ_WAIT; + INIT_CAL3_READ_WAIT: begin + if (cnt_rd_ok_r) + if (calib_done_r[2]) begin + init_next_state = INIT_CALIB_REF; + end else + init_next_state = INIT_CAL3_READ; + end + // Stage 4 calibration (continuous read only, same pattern as stage 3) + // only used if DQS_GATE supported + INIT_CAL4_READ: + if (burst_addr_r == 2'b10) + init_next_state = INIT_CAL4_READ_WAIT; + INIT_CAL4_READ_WAIT: begin + if (cnt_rd_ok_r) + // Stage 4 requires inter-stage auto-refresh + if (calib_done_r[3] || refresh_req) + init_next_state = INIT_PRECHARGE; + else + init_next_state = INIT_CAL4_READ; + end + INIT_CALIB_REF: + init_next_state = INIT_PRECHARGE; + endcase + end + + //*************************************************************************** + // Memory control/address + //*************************************************************************** + + always @(posedge clkdiv0) + if ((init_state_r == INIT_DUMMY_ACTIVE) || + (init_state_r == INIT_PRECHARGE) || + (init_state_r == INIT_LOAD_MODE) || + (init_state_r == INIT_AUTO_REFRESH)) begin + ddr_ras_n_r <= 1'b0; + end else begin + ddr_ras_n_r <= 1'b1; + end + + always @(posedge clkdiv0) + if ((init_state_r == INIT_LOAD_MODE) || + (init_state_r == INIT_AUTO_REFRESH) || + (cal_write_read && (burst_cnt_r == 2'b00))) begin + ddr_cas_n_r <= 1'b0; + end else begin + ddr_cas_n_r <= 1'b1; + end + + always @(posedge clkdiv0) + if ((init_state_r == INIT_LOAD_MODE) || + (init_state_r == INIT_PRECHARGE) || + (init_state_r == INIT_ZQCL) || + (cal_write && (burst_cnt_r == 2'b00)))begin + ddr_we_n_r <= 1'b0; + end else begin + ddr_we_n_r <= 1'b1; + end + + //***************************************************************** + // memory address during init + //***************************************************************** + + always @(posedge clkdiv0) begin + if ((init_state_r == INIT_PRECHARGE) + || (init_state_r == INIT_ZQCL))begin + // Precharge all - set A10 = 1 + ddr_addr_r <= {ROW_WIDTH{1'b0}}; + ddr_addr_r[10] <= 1'b1; + ddr_ba_r <= {BANK_WIDTH{1'b0}}; + end else if (init_state_r == INIT_LOAD_MODE) begin + ddr_ba_r <= {BANK_WIDTH{1'b0}}; + ddr_addr_r <= {ROW_WIDTH{1'b0}}; + case (init_cnt_r) + // EMR (2) + INIT_CNTR_EMR2_INIT: begin + ddr_ba_r[1:0] <= 2'b10; + ddr_addr_r <= {ROW_WIDTH{1'b0}}; + end + // EMR (3) + INIT_CNTR_EMR3_INIT: begin + ddr_ba_r[1:0] <= 2'b11; + if(DDR_TYPE == DDR3) + ddr_addr_r <= load_mode_reg3[ROW_WIDTH-1:0]; + else + ddr_addr_r <= {ROW_WIDTH{1'b0}}; + end + // EMR write - A0 = 0 for DLL enable + INIT_CNTR_EMR_EN_DLL: begin + ddr_ba_r[1:0] <= 2'b01; + if(DDR_TYPE == DDR3) + ddr_addr_r <= load_mode_reg1[ROW_WIDTH-1:0]; + else + ddr_addr_r <= ext_mode_reg[ROW_WIDTH-1:0]; + end + // MR write, reset DLL (A8=1) + INIT_CNTR_MR_RST_DLL: begin + if(DDR_TYPE == DDR3) + ddr_addr_r <= load_mode_reg0[ROW_WIDTH-1:0]; + else + ddr_addr_r <= load_mode_reg[ROW_WIDTH-1:0]; + ddr_ba_r[1:0] <= 2'b00; + ddr_addr_r[8] <= 1'b1; + end + // MR write, unreset DLL (A8=0) + INIT_CNTR_MR_ACT_DLL: begin + ddr_ba_r[1:0] <= 2'b00; + ddr_addr_r <= load_mode_reg[ROW_WIDTH-1:0]; + end + // EMR write, OCD default state + INIT_CNTR_EMR_DEF_OCD: begin + ddr_ba_r[1:0] <= 2'b01; + ddr_addr_r <= ext_mode_reg[ROW_WIDTH-1:0]; + ddr_addr_r[9:7] <= 3'b111; + end + // EMR write - OCD exit + INIT_CNTR_EMR_EXIT_OCD: begin + ddr_ba_r[1:0] <= 2'b01; + ddr_addr_r <= ext_mode_reg[ROW_WIDTH-1:0]; + end + default: begin + ddr_ba_r <= {BANK_WIDTH{1'bx}}; + ddr_addr_r <= {ROW_WIDTH{1'bx}}; + end + endcase + end else if (cal_write_read) begin + // when writing or reading for Stages 2-4, since training pattern is + // either 4 (stage 2) or 8 (stage 3-4) long, if BURST LEN < 8, then + // need to issue multiple bursts to read entire training pattern + ddr_addr_r[ROW_WIDTH-1:3] <= {ROW_WIDTH-4{1'b0}}; + ddr_addr_r[2:0] <= {burst_addr_r, 1'b0}; + ddr_ba_r <= {BANK_WIDTH-1{1'b0}}; + end else if (init_state_r == INIT_DUMMY_ACTIVE) begin + // all calibration writing read takes place in row 0x0 only + ddr_ba_r <= {BANK_WIDTH{1'b0}}; + ddr_addr_r <= {ROW_WIDTH{1'b0}}; + end else begin + // otherwise, cry me a river + ddr_ba_r <= {BANK_WIDTH{1'bx}}; + ddr_addr_r <= {ROW_WIDTH{1'bx}}; + end + end + + // Keep CKE asserted after initial power-on delay + always @(posedge clkdiv0) + ddr_cke_r <= {CKE_WIDTH{done_200us_r}}; + + // register commands to memory. Two clock cycle delay from state -> output + always @(posedge clk0) begin + ddr_addr_r1 <= ddr_addr_r; + ddr_ba_r1 <= ddr_ba_r; + ddr_cas_n_r1 <= ddr_cas_n_r; + ddr_ras_n_r1 <= ddr_ras_n_r; + ddr_we_n_r1 <= ddr_we_n_r; + ddr_cs_n_r1 <= ddr_cs_n_r; + end // always @ (posedge clk0) + + // logic to toggle chip select. The chip_select is + // clocked of clkdiv0 and will be asserted for + // two clock cycles. + always @(posedge clk0) begin + if(rst0) + ddr_cs_disable_r <= {CS_NUM{1'b0}}; + else begin + if(ddr_cs_disable_r[chip_cnt_r]) + ddr_cs_disable_r[chip_cnt_r] <= 1'b0; + else begin + if(TWO_T_TIME_EN) + ddr_cs_disable_r[chip_cnt_r] <= ~ddr_cs_n_r1[chip_cnt_r]; + else + ddr_cs_disable_r[chip_cnt_r] <= ~ddr_cs_n_r[chip_cnt_r]; + end + end + end + + + assign phy_init_addr = ddr_addr_r; + assign phy_init_ba = ddr_ba_r; + assign phy_init_cas_n = ddr_cas_n_r; + assign phy_init_cke = ddr_cke_r; + assign phy_init_ras_n = ddr_ras_n_r; + assign phy_init_we_n = ddr_we_n_r; + assign phy_init_cs_n = (TWO_T_TIME_EN) ? + ddr_cs_n_r1 | ddr_cs_disable_r + : ddr_cs_n_r| ddr_cs_disable_r; + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_io.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_io.v new file mode 100644 index 0000000..ba82d5b --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_io.v @@ -0,0 +1,354 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_phy_io.v +// /___/ /\ Date Last Modified: $Date: 2008/07/29 15:24:03 $ +// \ \ / \ Date Created: Wed Aug 16 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +// This module instantiates calibration logic, data, data strobe and the +// data mask iobs. +//Reference: +//Revision History: +// Rev 1.1 - DM_IOB instance made based on USE_DM_PORT value . PK. 25/6/08 +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_phy_io # + ( + // Following parameters are for 72-bit RDIMM design (for ML561 Reference + // board design). Actual values may be different. Actual parameters values + // are passed from design top module ddr2_sdram module. Please refer to + // the ddr2_sdram module for actual values. + parameter CLK_WIDTH = 1, + parameter USE_DM_PORT = 1, + parameter DM_WIDTH = 9, + parameter DQ_WIDTH = 72, + parameter DQ_BITS = 7, + parameter DQ_PER_DQS = 8, + parameter DQS_BITS = 4, + parameter DQS_WIDTH = 9, + parameter HIGH_PERFORMANCE_MODE = "TRUE", + parameter ODT_WIDTH = 1, + parameter ADDITIVE_LAT = 0, + parameter CAS_LAT = 5, + parameter REG_ENABLE = 1, + parameter CLK_PERIOD = 3000, + parameter DDR_TYPE = 1, + parameter SIM_ONLY = 0, + parameter DEBUG_EN = 0, + parameter DQS_IO_COL = 0, + parameter DQ_IO_MS = 0 + ) + ( + input clk0, + input clk90, + input clkdiv0, + input rst0, + input rst90, + input rstdiv0, + input dm_ce, + input [1:0] dq_oe_n, + input dqs_oe_n, + input dqs_rst_n, + input [3:0] calib_start, + input ctrl_rden, + input phy_init_rden, + input calib_ref_done, + output [3:0] calib_done, + output calib_ref_req, + output [DQS_WIDTH-1:0] calib_rden, + output [DQS_WIDTH-1:0] calib_rden_sel, + input [DQ_WIDTH-1:0] wr_data_rise, + input [DQ_WIDTH-1:0] wr_data_fall, + input [(DQ_WIDTH/8)-1:0] mask_data_rise, + input [(DQ_WIDTH/8)-1:0] mask_data_fall, + output [(DQ_WIDTH)-1:0] rd_data_rise, + output [(DQ_WIDTH)-1:0] rd_data_fall, + output [CLK_WIDTH-1:0] ddr_ck, + output [CLK_WIDTH-1:0] ddr_ck_n, + output [DM_WIDTH-1:0] ddr_dm, + inout [DQS_WIDTH-1:0] ddr_dqs, + inout [DQS_WIDTH-1:0] ddr_dqs_n, + inout [DQ_WIDTH-1:0] ddr_dq, + // Debug signals (optional use) + input dbg_idel_up_all, + input dbg_idel_down_all, + input dbg_idel_up_dq, + input dbg_idel_down_dq, + input dbg_idel_up_dqs, + input dbg_idel_down_dqs, + input dbg_idel_up_gate, + input dbg_idel_down_gate, + input [DQ_BITS-1:0] dbg_sel_idel_dq, + input dbg_sel_all_idel_dq, + input [DQS_BITS:0] dbg_sel_idel_dqs, + input dbg_sel_all_idel_dqs, + input [DQS_BITS:0] dbg_sel_idel_gate, + input dbg_sel_all_idel_gate, + output [3:0] dbg_calib_done, + output [3:0] dbg_calib_err, + output [(6*DQ_WIDTH)-1:0] dbg_calib_dq_tap_cnt, + output [(6*DQS_WIDTH)-1:0] dbg_calib_dqs_tap_cnt, + output [(6*DQS_WIDTH)-1:0] dbg_calib_gate_tap_cnt, + output [DQS_WIDTH-1:0] dbg_calib_rd_data_sel, + output [(5*DQS_WIDTH)-1:0] dbg_calib_rden_dly, + output [(5*DQS_WIDTH)-1:0] dbg_calib_gate_dly + ); + + // ratio of # of physical DM outputs to bytes in data bus + // may be different - e.g. if using x4 components + localparam DM_TO_BYTE_RATIO = DM_WIDTH / (DQ_WIDTH/8); + + wire [CLK_WIDTH-1:0] ddr_ck_q; + wire [DQS_WIDTH-1:0] delayed_dqs; + wire [DQ_WIDTH-1:0] dlyce_dq; + wire [DQS_WIDTH-1:0] dlyce_dqs; + wire [DQS_WIDTH-1:0] dlyce_gate; + wire [DQ_WIDTH-1:0] dlyinc_dq; + wire [DQS_WIDTH-1:0] dlyinc_dqs; + wire [DQS_WIDTH-1:0] dlyinc_gate; + wire dlyrst_dq; + wire dlyrst_dqs; + wire [DQS_WIDTH-1:0] dlyrst_gate; + wire [DQS_WIDTH-1:0] dq_ce; + (* KEEP = "TRUE" *) wire [DQS_WIDTH-1:0] en_dqs /* synthesis syn_keep = 1 */; + wire [DQS_WIDTH-1:0] rd_data_sel; + + //*************************************************************************** + + ddr2_phy_calib # + ( + .DQ_WIDTH (DQ_WIDTH), + .DQ_BITS (DQ_BITS), + .DQ_PER_DQS (DQ_PER_DQS), + .DQS_BITS (DQS_BITS), + .DQS_WIDTH (DQS_WIDTH), + .ADDITIVE_LAT (ADDITIVE_LAT), + .CAS_LAT (CAS_LAT), + .REG_ENABLE (REG_ENABLE), + .CLK_PERIOD (CLK_PERIOD), + .SIM_ONLY (SIM_ONLY), + .DEBUG_EN (DEBUG_EN) + ) + u_phy_calib + ( + .clk (clk0), + .clkdiv (clkdiv0), + .rstdiv (rstdiv0), + .calib_start (calib_start), + .ctrl_rden (ctrl_rden), + .phy_init_rden (phy_init_rden), + .rd_data_rise (rd_data_rise), + .rd_data_fall (rd_data_fall), + .calib_ref_done (calib_ref_done), + .calib_done (calib_done), + .calib_ref_req (calib_ref_req), + .calib_rden (calib_rden), + .calib_rden_sel (calib_rden_sel), + .dlyrst_dq (dlyrst_dq), + .dlyce_dq (dlyce_dq), + .dlyinc_dq (dlyinc_dq), + .dlyrst_dqs (dlyrst_dqs), + .dlyce_dqs (dlyce_dqs), + .dlyinc_dqs (dlyinc_dqs), + .dlyrst_gate (dlyrst_gate), + .dlyce_gate (dlyce_gate), + .dlyinc_gate (dlyinc_gate), + .en_dqs (en_dqs), + .rd_data_sel (rd_data_sel), + .dbg_idel_up_all (dbg_idel_up_all), + .dbg_idel_down_all (dbg_idel_down_all), + .dbg_idel_up_dq (dbg_idel_up_dq), + .dbg_idel_down_dq (dbg_idel_down_dq), + .dbg_idel_up_dqs (dbg_idel_up_dqs), + .dbg_idel_down_dqs (dbg_idel_down_dqs), + .dbg_idel_up_gate (dbg_idel_up_gate), + .dbg_idel_down_gate (dbg_idel_down_gate), + .dbg_sel_idel_dq (dbg_sel_idel_dq), + .dbg_sel_all_idel_dq (dbg_sel_all_idel_dq), + .dbg_sel_idel_dqs (dbg_sel_idel_dqs), + .dbg_sel_all_idel_dqs (dbg_sel_all_idel_dqs), + .dbg_sel_idel_gate (dbg_sel_idel_gate), + .dbg_sel_all_idel_gate (dbg_sel_all_idel_gate), + .dbg_calib_done (dbg_calib_done), + .dbg_calib_err (dbg_calib_err), + .dbg_calib_dq_tap_cnt (dbg_calib_dq_tap_cnt), + .dbg_calib_dqs_tap_cnt (dbg_calib_dqs_tap_cnt), + .dbg_calib_gate_tap_cnt (dbg_calib_gate_tap_cnt), + .dbg_calib_rd_data_sel (dbg_calib_rd_data_sel), + .dbg_calib_rden_dly (dbg_calib_rden_dly), + .dbg_calib_gate_dly (dbg_calib_gate_dly) + ); + + //*************************************************************************** + // Memory clock generation + //*************************************************************************** + + genvar ck_i; + generate + for(ck_i = 0; ck_i < CLK_WIDTH; ck_i = ck_i+1) begin: gen_ck + ODDR # + ( + .SRTYPE ("SYNC"), + .DDR_CLK_EDGE ("OPPOSITE_EDGE") + ) + u_oddr_ck_i + ( + .Q (ddr_ck_q[ck_i]), + .C (clk0), + .CE (1'b1), + .D1 (1'b0), + .D2 (1'b1), + .R (1'b0), + .S (1'b0) + ); + // Can insert ODELAY here if required + OBUFDS u_obuf_ck_i + ( + .I (ddr_ck_q[ck_i]), + .O (ddr_ck[ck_i]), + .OB (ddr_ck_n[ck_i]) + ); + end + endgenerate + + //*************************************************************************** + // DQS instances + //*************************************************************************** + + genvar dqs_i; + generate + for(dqs_i = 0; dqs_i < DQS_WIDTH; dqs_i = dqs_i+1) begin: gen_dqs + ddr2_phy_dqs_iob # + ( + .DDR_TYPE (DDR_TYPE), + .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE) + ) + u_iob_dqs + ( + .clk0 (clk0), + .clkdiv0 (clkdiv0), + .rst0 (rst0), + .dlyinc_dqs (dlyinc_dqs[dqs_i]), + .dlyce_dqs (dlyce_dqs[dqs_i]), + .dlyrst_dqs (dlyrst_dqs), + .dlyinc_gate (dlyinc_gate[dqs_i]), + .dlyce_gate (dlyce_gate[dqs_i]), + .dlyrst_gate (dlyrst_gate[dqs_i]), + .dqs_oe_n (dqs_oe_n), + .dqs_rst_n (dqs_rst_n), + .en_dqs (en_dqs[dqs_i]), + .ddr_dqs (ddr_dqs[dqs_i]), + .ddr_dqs_n (ddr_dqs_n[dqs_i]), + .dq_ce (dq_ce[dqs_i]), + .delayed_dqs (delayed_dqs[dqs_i]) + ); + end + endgenerate + + //*************************************************************************** + // DM instances + //*************************************************************************** + + genvar dm_i; + generate + if (USE_DM_PORT) begin: gen_dm_inst + for(dm_i = 0; dm_i < DM_WIDTH; dm_i = dm_i+1) begin: gen_dm + ddr2_phy_dm_iob u_iob_dm + ( + .clk90 (clk90), + .dm_ce (dm_ce), + .mask_data_rise (mask_data_rise[dm_i/DM_TO_BYTE_RATIO]), + .mask_data_fall (mask_data_fall[dm_i/DM_TO_BYTE_RATIO]), + .ddr_dm (ddr_dm[dm_i]) + ); + end + end + endgenerate + + //*************************************************************************** + // DQ IOB instances + //*************************************************************************** + + genvar dq_i; + generate + for(dq_i = 0; dq_i < DQ_WIDTH; dq_i = dq_i+1) begin: gen_dq + ddr2_phy_dq_iob # + ( + .DQ_COL (DQS_IO_COL[2*(dq_i/DQ_PER_DQS)+1:2*(dq_i/DQ_PER_DQS)]), + .DQ_MS (DQ_IO_MS[dq_i]), + .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE) + ) + u_iob_dq + ( + .clk0 (clk0), + .clk90 (clk90), + .clkdiv0 (clkdiv0), + .rst90 (rst90), + .dlyinc (dlyinc_dq[dq_i]), + .dlyce (dlyce_dq[dq_i]), + .dlyrst (dlyrst_dq), + .dq_oe_n (dq_oe_n), + .dqs (delayed_dqs[dq_i/DQ_PER_DQS]), + .ce (dq_ce[dq_i/DQ_PER_DQS]), + .rd_data_sel (rd_data_sel[dq_i/DQ_PER_DQS]), + .wr_data_rise (wr_data_rise[dq_i]), + .wr_data_fall (wr_data_fall[dq_i]), + .rd_data_rise (rd_data_rise[dq_i]), + .rd_data_fall (rd_data_fall[dq_i]), + .ddr_dq (ddr_dq[dq_i]) + ); + end + endgenerate + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_top.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_top.v new file mode 100644 index 0000000..d9a9830 --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_top.v @@ -0,0 +1,393 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_phy_top.v +// /___/ /\ Date Last Modified: $Date: 2008/07/22 15:41:06 $ +// \ \ / \ Date Created: Wed Aug 16 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +// Top-level for memory physical layer (PHY) interface +//Reference: +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +(* X_CORE_INFO = "mig_v2_3_ddr2_v5, Coregen 10.1.02" , CORE_GENERATION_INFO = "ddr2_v5,mig_v2_3,{component_name=ddr2_phy_top, BANK_WIDTH=2, CKE_WIDTH=1, CLK_WIDTH=2, COL_WIDTH=10, CS_NUM=1, CS_WIDTH=1, DM_WIDTH=8, DQ_WIDTH=64, DQ_PER_DQS=8, DQS_WIDTH=8, ODT_WIDTH=1, ROW_WIDTH=13, ADDITIVE_LAT=0, BURST_LEN=4, BURST_TYPE=0, CAS_LAT=4, ECC_ENABLE=0, MULTI_BANK_EN=1, TWO_T_TIME_EN=1, ODT_TYPE=1, REDUCE_DRV=0, REG_ENABLE=0, TREFI_NS=7800, TRAS=40000, TRCD=15000, TRFC=127500, TRP=15000, TRTP=7500, TWR=15000, TWTR=7500, DDR2_CLK_PERIOD=5000, RST_ACT_LOW=1}" *) +module ddr2_phy_top # + ( + // Following parameters are for 72-bit RDIMM design (for ML561 Reference + // board design). Actual values may be different. Actual parameters values + // are passed from design top module ddr2_sdram module. Please refer to + // the ddr2_sdram module for actual values. + parameter BANK_WIDTH = 2, + parameter CLK_WIDTH = 1, + parameter CKE_WIDTH = 1, + parameter COL_WIDTH = 10, + parameter CS_NUM = 1, + parameter CS_WIDTH = 1, + parameter USE_DM_PORT = 1, + parameter DM_WIDTH = 9, + parameter DQ_WIDTH = 72, + parameter DQ_BITS = 7, + parameter DQ_PER_DQS = 8, + parameter DQS_WIDTH = 9, + parameter DQS_BITS = 4, + parameter HIGH_PERFORMANCE_MODE = "TRUE", + parameter ODT_WIDTH = 1, + parameter ROW_WIDTH = 14, + parameter ADDITIVE_LAT = 0, + parameter TWO_T_TIME_EN = 0, + parameter BURST_LEN = 4, + parameter BURST_TYPE = 0, + parameter CAS_LAT = 5, + parameter TWR = 15000, + parameter ECC_ENABLE = 0, + parameter ODT_TYPE = 1, + parameter DDR_TYPE = 1, + parameter REDUCE_DRV = 0, + parameter REG_ENABLE = 1, + parameter CLK_PERIOD = 3000, + parameter SIM_ONLY = 0, + parameter DEBUG_EN = 0, + parameter DQS_IO_COL = 0, + parameter DQ_IO_MS = 0 + ) + ( + input clk0, + input clk90, + input clkdiv0, + input rst0, + input rst90, + input rstdiv0, + input ctrl_wren, + input [ROW_WIDTH-1:0] ctrl_addr, + input [BANK_WIDTH-1:0] ctrl_ba, + input ctrl_ras_n, + input ctrl_cas_n, + input ctrl_we_n, + input [CS_NUM-1:0] ctrl_cs_n, + input ctrl_rden, + input ctrl_ref_flag, + input [(2*DQ_WIDTH)-1:0] wdf_data, + input [(2*DQ_WIDTH/8)-1:0] wdf_mask_data, + output wdf_rden, + output phy_init_done, + output [DQS_WIDTH-1:0] phy_calib_rden, + output [DQS_WIDTH-1:0] phy_calib_rden_sel, + output [DQ_WIDTH-1:0] rd_data_rise, + output [DQ_WIDTH-1:0] rd_data_fall, + output [CLK_WIDTH-1:0] ddr_ck, + output [CLK_WIDTH-1:0] ddr_ck_n, + output [ROW_WIDTH-1:0] ddr_addr, + output [BANK_WIDTH-1:0] ddr_ba, + output ddr_ras_n, + output ddr_cas_n, + output ddr_we_n, + output [CS_WIDTH-1:0] ddr_cs_n, + output [CKE_WIDTH-1:0] ddr_cke, + output [ODT_WIDTH-1:0] ddr_odt, + output [DM_WIDTH-1:0] ddr_dm, + inout [DQS_WIDTH-1:0] ddr_dqs, + inout [DQS_WIDTH-1:0] ddr_dqs_n, + inout [DQ_WIDTH-1:0] ddr_dq, + // Debug signals (optional use) + input dbg_idel_up_all, + input dbg_idel_down_all, + input dbg_idel_up_dq, + input dbg_idel_down_dq, + input dbg_idel_up_dqs, + input dbg_idel_down_dqs, + input dbg_idel_up_gate, + input dbg_idel_down_gate, + input [DQ_BITS-1:0] dbg_sel_idel_dq, + input dbg_sel_all_idel_dq, + input [DQS_BITS:0] dbg_sel_idel_dqs, + input dbg_sel_all_idel_dqs, + input [DQS_BITS:0] dbg_sel_idel_gate, + input dbg_sel_all_idel_gate, + output [3:0] dbg_calib_done, + output [3:0] dbg_calib_err, + output [(6*DQ_WIDTH)-1:0] dbg_calib_dq_tap_cnt, + output [(6*DQS_WIDTH)-1:0] dbg_calib_dqs_tap_cnt, + output [(6*DQS_WIDTH)-1:0] dbg_calib_gate_tap_cnt, + output [DQS_WIDTH-1:0] dbg_calib_rd_data_sel, + output [(5*DQS_WIDTH)-1:0] dbg_calib_rden_dly, + output [(5*DQS_WIDTH)-1:0] dbg_calib_gate_dly + ); + + wire [3:0] calib_done; + wire calib_ref_done; + wire calib_ref_req; + wire [3:0] calib_start; + wire dm_ce; + wire [1:0] dq_oe_n; + wire dqs_oe_n; + wire dqs_rst_n; + wire [(DQ_WIDTH/8)-1:0] mask_data_fall; + wire [(DQ_WIDTH/8)-1:0] mask_data_rise; + wire [CS_NUM-1:0] odt; + wire [ROW_WIDTH-1:0] phy_init_addr; + wire [BANK_WIDTH-1:0] phy_init_ba; + wire phy_init_cas_n; + wire [CKE_WIDTH-1:0] phy_init_cke; + wire [CS_NUM-1:0] phy_init_cs_n; + wire phy_init_data_sel; + wire phy_init_ras_n; + wire phy_init_rden; + wire phy_init_we_n; + wire phy_init_wren; + wire [DQ_WIDTH-1:0] wr_data_fall; + wire [DQ_WIDTH-1:0] wr_data_rise; + + //*************************************************************************** + + ddr2_phy_write # + ( + .DQ_WIDTH (DQ_WIDTH), + .CS_NUM (CS_NUM), + .ADDITIVE_LAT (ADDITIVE_LAT), + .CAS_LAT (CAS_LAT), + .ECC_ENABLE (ECC_ENABLE), + .ODT_TYPE (ODT_TYPE), + .REG_ENABLE (REG_ENABLE), + .DDR_TYPE (DDR_TYPE) + ) + u_phy_write + ( + .clk0 (clk0), + .clk90 (clk90), + .rst90 (rst90), + .wdf_data (wdf_data), + .wdf_mask_data (wdf_mask_data), + .ctrl_wren (ctrl_wren), + .phy_init_wren (phy_init_wren), + .phy_init_data_sel (phy_init_data_sel), + .dm_ce (dm_ce), + .dq_oe_n (dq_oe_n), + .dqs_oe_n (dqs_oe_n), + .dqs_rst_n (dqs_rst_n), + .wdf_rden (wdf_rden), + .odt (odt), + .wr_data_rise (wr_data_rise), + .wr_data_fall (wr_data_fall), + .mask_data_rise (mask_data_rise), + .mask_data_fall (mask_data_fall) + ); + + ddr2_phy_io # + ( + .CLK_WIDTH (CLK_WIDTH), + .USE_DM_PORT (USE_DM_PORT), + .DM_WIDTH (DM_WIDTH), + .DQ_WIDTH (DQ_WIDTH), + .DQ_BITS (DQ_BITS), + .DQ_PER_DQS (DQ_PER_DQS), + .DQS_BITS (DQS_BITS), + .DQS_WIDTH (DQS_WIDTH), + .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE), + .ODT_WIDTH (ODT_WIDTH), + .ADDITIVE_LAT (ADDITIVE_LAT), + .CAS_LAT (CAS_LAT), + .REG_ENABLE (REG_ENABLE), + .CLK_PERIOD (CLK_PERIOD), + .DDR_TYPE (DDR_TYPE), + .SIM_ONLY (SIM_ONLY), + .DEBUG_EN (DEBUG_EN), + .DQS_IO_COL (DQS_IO_COL), + .DQ_IO_MS (DQ_IO_MS) + ) + u_phy_io + ( + .clk0 (clk0), + .clk90 (clk90), + .clkdiv0 (clkdiv0), + .rst0 (rst0), + .rst90 (rst90), + .rstdiv0 (rstdiv0), + .dm_ce (dm_ce), + .dq_oe_n (dq_oe_n), + .dqs_oe_n (dqs_oe_n), + .dqs_rst_n (dqs_rst_n), + .calib_start (calib_start), + .ctrl_rden (ctrl_rden), + .phy_init_rden (phy_init_rden), + .calib_ref_done (calib_ref_done), + .calib_done (calib_done), + .calib_ref_req (calib_ref_req), + .calib_rden (phy_calib_rden), + .calib_rden_sel (phy_calib_rden_sel), + .wr_data_rise (wr_data_rise), + .wr_data_fall (wr_data_fall), + .mask_data_rise (mask_data_rise), + .mask_data_fall (mask_data_fall), + .rd_data_rise (rd_data_rise), + .rd_data_fall (rd_data_fall), + .ddr_ck (ddr_ck), + .ddr_ck_n (ddr_ck_n), + .ddr_dm (ddr_dm), + .ddr_dqs (ddr_dqs), + .ddr_dqs_n (ddr_dqs_n), + .ddr_dq (ddr_dq), + .dbg_idel_up_all (dbg_idel_up_all), + .dbg_idel_down_all (dbg_idel_down_all), + .dbg_idel_up_dq (dbg_idel_up_dq), + .dbg_idel_down_dq (dbg_idel_down_dq), + .dbg_idel_up_dqs (dbg_idel_up_dqs), + .dbg_idel_down_dqs (dbg_idel_down_dqs), + .dbg_idel_up_gate (dbg_idel_up_gate), + .dbg_idel_down_gate (dbg_idel_down_gate), + .dbg_sel_idel_dq (dbg_sel_idel_dq), + .dbg_sel_all_idel_dq (dbg_sel_all_idel_dq), + .dbg_sel_idel_dqs (dbg_sel_idel_dqs), + .dbg_sel_all_idel_dqs (dbg_sel_all_idel_dqs), + .dbg_sel_idel_gate (dbg_sel_idel_gate), + .dbg_sel_all_idel_gate (dbg_sel_all_idel_gate), + .dbg_calib_done (dbg_calib_done), + .dbg_calib_err (dbg_calib_err), + .dbg_calib_dq_tap_cnt (dbg_calib_dq_tap_cnt), + .dbg_calib_dqs_tap_cnt (dbg_calib_dqs_tap_cnt), + .dbg_calib_gate_tap_cnt (dbg_calib_gate_tap_cnt), + .dbg_calib_rd_data_sel (dbg_calib_rd_data_sel), + .dbg_calib_rden_dly (dbg_calib_rden_dly), + .dbg_calib_gate_dly (dbg_calib_gate_dly) + ); + + ddr2_phy_ctl_io # + ( + .BANK_WIDTH (BANK_WIDTH), + .CKE_WIDTH (CKE_WIDTH), + .COL_WIDTH (COL_WIDTH), + .CS_NUM (CS_NUM), + .CS_WIDTH (CS_WIDTH), + .TWO_T_TIME_EN (TWO_T_TIME_EN), + .ODT_WIDTH (ODT_WIDTH), + .ROW_WIDTH (ROW_WIDTH), + .DDR_TYPE (DDR_TYPE) + ) + u_phy_ctl_io + ( + .clk0 (clk0), + .clk90 (clk90), + .rst0 (rst0), + .rst90 (rst90), + .ctrl_addr (ctrl_addr), + .ctrl_ba (ctrl_ba), + .ctrl_ras_n (ctrl_ras_n), + .ctrl_cas_n (ctrl_cas_n), + .ctrl_we_n (ctrl_we_n), + .ctrl_cs_n (ctrl_cs_n), + .phy_init_addr (phy_init_addr), + .phy_init_ba (phy_init_ba), + .phy_init_ras_n (phy_init_ras_n), + .phy_init_cas_n (phy_init_cas_n), + .phy_init_we_n (phy_init_we_n), + .phy_init_cs_n (phy_init_cs_n), + .phy_init_cke (phy_init_cke), + .phy_init_data_sel (phy_init_data_sel), + .odt (odt), + .ddr_addr (ddr_addr), + .ddr_ba (ddr_ba), + .ddr_ras_n (ddr_ras_n), + .ddr_cas_n (ddr_cas_n), + .ddr_we_n (ddr_we_n), + .ddr_cke (ddr_cke), + .ddr_cs_n (ddr_cs_n), + .ddr_odt (ddr_odt) + ); + + ddr2_phy_init # + ( + .BANK_WIDTH (BANK_WIDTH), + .CKE_WIDTH (CKE_WIDTH), + .COL_WIDTH (COL_WIDTH), + .CS_NUM (CS_NUM), + .DQ_WIDTH (DQ_WIDTH), + .ODT_WIDTH (ODT_WIDTH), + .ROW_WIDTH (ROW_WIDTH), + .ADDITIVE_LAT (ADDITIVE_LAT), + .BURST_LEN (BURST_LEN), + .BURST_TYPE (BURST_TYPE), + .TWO_T_TIME_EN(TWO_T_TIME_EN), + .CAS_LAT (CAS_LAT), + .ODT_TYPE (ODT_TYPE), + .REDUCE_DRV (REDUCE_DRV), + .REG_ENABLE (REG_ENABLE), + .TWR (TWR), + .CLK_PERIOD (CLK_PERIOD), + .DDR_TYPE (DDR_TYPE), + .SIM_ONLY (SIM_ONLY) + ) + u_phy_init + ( + .clk0 (clk0), + .clkdiv0 (clkdiv0), + .rst0 (rst0), + .rstdiv0 (rstdiv0), + .calib_done (calib_done), + .ctrl_ref_flag (ctrl_ref_flag), + .calib_ref_req (calib_ref_req), + .calib_start (calib_start), + .calib_ref_done (calib_ref_done), + .phy_init_wren (phy_init_wren), + .phy_init_rden (phy_init_rden), + .phy_init_addr (phy_init_addr), + .phy_init_ba (phy_init_ba), + .phy_init_ras_n (phy_init_ras_n), + .phy_init_cas_n (phy_init_cas_n), + .phy_init_we_n (phy_init_we_n), + .phy_init_cs_n (phy_init_cs_n), + .phy_init_cke (phy_init_cke), + .phy_init_done (phy_init_done), + .phy_init_data_sel (phy_init_data_sel) + ); + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_write.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_write.v new file mode 100644 index 0000000..4b9b3e0 --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_phy_write.v @@ -0,0 +1,446 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_phy_write.v +// /___/ /\ Date Last Modified: $Date: 2008/07/29 15:24:03 $ +// \ \ / \ Date Created: Thu Aug 24 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +//Reference: +// Handles delaying various write control signals appropriately depending +// on CAS latency, additive latency, etc. Also splits the data and mask in +// rise and fall buses. +//Revision History: +// Rev 1.1 - For Dual Rank parts support ODT logic corrected. PK. 08/05/08 +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_phy_write # + ( + // Following parameters are for 72-bit RDIMM design (for ML561 Reference + // board design). Actual values may be different. Actual parameters values + // are passed from design top module ddr2_sdram module. Please refer to + // the ddr2_sdram module for actual values. + parameter DQ_WIDTH = 72, + parameter CS_NUM = 1, + parameter ADDITIVE_LAT = 0, + parameter CAS_LAT = 5, + parameter ECC_ENABLE = 0, + parameter ODT_TYPE = 1, + parameter REG_ENABLE = 1, + parameter DDR_TYPE = 1 + ) + ( + input clk0, + input clk90, + input rst90, + input [(2*DQ_WIDTH)-1:0] wdf_data, + input [(2*DQ_WIDTH/8)-1:0] wdf_mask_data, + input ctrl_wren, + input phy_init_wren, + input phy_init_data_sel, + output reg dm_ce, + output reg [1:0] dq_oe_n, + output reg dqs_oe_n , + output reg dqs_rst_n , + output wdf_rden, + output reg [CS_NUM-1:0] odt , + output [DQ_WIDTH-1:0] wr_data_rise, + output [DQ_WIDTH-1:0] wr_data_fall, + output [(DQ_WIDTH/8)-1:0] mask_data_rise, + output [(DQ_WIDTH/8)-1:0] mask_data_fall + ); + + localparam MASK_WIDTH = DQ_WIDTH/8; + localparam DDR1 = 0; + localparam DDR2 = 1; + localparam DDR3 = 2; + + // (MIN,MAX) value of WR_LATENCY for DDR1: + // REG_ENABLE = (0,1) + // ECC_ENABLE = (0,1) + // Write latency = 1 + // Total: (1,3) + // (MIN,MAX) value of WR_LATENCY for DDR2: + // REG_ENABLE = (0,1) + // ECC_ENABLE = (0,1) + // Write latency = ADDITIVE_CAS + CAS_LAT - 1 = (0,4) + (3,5) - 1 = (2,8) + // ADDITIVE_LAT = (0,4) (JEDEC79-2B) + // CAS_LAT = (3,5) (JEDEC79-2B) + // Total: (2,10) + localparam WR_LATENCY = (DDR_TYPE == DDR3) ? + (ADDITIVE_LAT + (CAS_LAT) + REG_ENABLE ) : + (DDR_TYPE == DDR2) ? + (ADDITIVE_LAT + (CAS_LAT-1) + REG_ENABLE ) : + (1 + REG_ENABLE ); + + // NOTE that ODT timing does not need to be delayed for registered + // DIMM case, since like other control/address signals, it gets + // delayed by one clock cycle at the DIMM + localparam ODT_WR_LATENCY = WR_LATENCY - REG_ENABLE; + + wire dm_ce_0; + reg dm_ce_r; + wire [1:0] dq_oe_0; + reg [1:0] dq_oe_n_90_r1; + reg [1:0] dq_oe_270; + wire dqs_oe_0; + reg dqs_oe_270; + reg dqs_oe_n_180_r1; + wire dqs_rst_0; + reg dqs_rst_n_180_r1; + reg dqs_rst_270; + reg ecc_dm_error_r; + reg ecc_dm_error_r1; + reg [(DQ_WIDTH-1):0] init_data_f; + reg [(DQ_WIDTH-1):0] init_data_r; + reg [3:0] init_wdf_cnt_r; + wire odt_0; + reg rst90_r /* synthesis syn_maxfan = 10 */; + reg [10:0] wr_stages ; + reg [(2*DQ_WIDTH)-1:0] wdf_data_r; + reg [(2*DQ_WIDTH/8)-1:0] wdf_mask_r; + wire [(2*DQ_WIDTH/8)-1:0] wdf_ecc_mask; + + reg [(2*DQ_WIDTH/8)-1:0] wdf_mask_r1; + wire wdf_rden_0; + reg calib_rden_90_r; + reg wdf_rden_90_r; + reg wdf_rden_90_r1; + reg wdf_rden_270; + + always @(posedge clk90) + rst90_r <= rst90; + + //*************************************************************************** + // Analysis of additional pipeline delays: + // 1. dq_oe (DQ 3-state): 1 CLK90 cyc in IOB 3-state FF + // 2. dqs_oe (DQS 3-state): 1 CLK180 cyc in IOB 3-state FF + // 3. dqs_rst (DQS output value reset): 1 CLK180 cyc in FF + 1 CLK180 cyc + // in IOB DDR + // 4. odt (ODT control): 1 CLK0 cyc in IOB FF + // 5. write data (output two cyc after wdf_rden - output of RAMB_FIFO w/ + // output register enabled): 2 CLK90 cyc in OSERDES + //*************************************************************************** + + // DQS 3-state must be asserted one extra clock cycle due b/c of write + // pre- and post-amble (extra half clock cycle for each) + assign dqs_oe_0 = wr_stages[WR_LATENCY-1] | wr_stages[WR_LATENCY-2]; + + // same goes for ODT, need to handle both pre- and post-amble (generate + // ODT only for DDR2) + // ODT generation for DDR2 based on write latency. The MIN write + // latency is 2. Based on the write latency ODT is asserted. + generate + if ((DDR_TYPE != DDR1) && (ODT_TYPE > 0))begin: gen_odt_ddr2 + if(ODT_WR_LATENCY > 2) + assign odt_0 = + wr_stages[ODT_WR_LATENCY-1] | + wr_stages[ODT_WR_LATENCY-2] | + wr_stages[ODT_WR_LATENCY-3] ; + else + assign odt_0 = + wr_stages[ODT_WR_LATENCY] | + wr_stages[ODT_WR_LATENCY-1] | + wr_stages[ODT_WR_LATENCY-2] ; + end else + assign odt_0 = 1'b0; + endgenerate + + assign dq_oe_0[0] = wr_stages[WR_LATENCY-1] | wr_stages[WR_LATENCY]; + assign dq_oe_0[1] = wr_stages[WR_LATENCY-1] | wr_stages[WR_LATENCY-2]; + assign dqs_rst_0 = ~wr_stages[WR_LATENCY-2]; + assign dm_ce_0 = wr_stages[WR_LATENCY] | wr_stages[WR_LATENCY-1] + | wr_stages[WR_LATENCY-2]; + + // write data fifo, read flag assertion + generate + if (DDR_TYPE != DDR1) begin: gen_wdf_ddr2 + if (WR_LATENCY > 2) + assign wdf_rden_0 = wr_stages[WR_LATENCY-3]; + else + assign wdf_rden_0 = wr_stages[WR_LATENCY-2]; + end else begin: gen_wdf_ddr1 + assign wdf_rden_0 = wr_stages[WR_LATENCY-2]; + end + endgenerate + + // first stage isn't registered + always @(*) + wr_stages[0] = (phy_init_data_sel) ? ctrl_wren : phy_init_wren; + + always @(posedge clk0) begin + wr_stages[1] <= wr_stages[0]; + wr_stages[2] <= wr_stages[1]; + wr_stages[3] <= wr_stages[2]; + wr_stages[4] <= wr_stages[3]; + wr_stages[5] <= wr_stages[4]; + wr_stages[6] <= wr_stages[5]; + wr_stages[7] <= wr_stages[6]; + wr_stages[8] <= wr_stages[7]; + wr_stages[9] <= wr_stages[8]; + wr_stages[10] <= wr_stages[9]; + end + + // intermediate synchronization to CLK270 + always @(negedge clk90) begin + dq_oe_270 <= dq_oe_0; + dqs_oe_270 <= dqs_oe_0; + dqs_rst_270 <= dqs_rst_0; + wdf_rden_270 <= wdf_rden_0; + end + + // synchronize DQS signals to CLK180 + always @(negedge clk0) begin + dqs_oe_n_180_r1 <= ~dqs_oe_270; + dqs_rst_n_180_r1 <= ~dqs_rst_270; + end + + // All write data-related signals synced to CLK90 + always @(posedge clk90) begin + dq_oe_n_90_r1 <= ~dq_oe_270; + wdf_rden_90_r <= wdf_rden_270; + end + + // generate for wdf_rden and calib rden. These signals + // are asserted based on write latency. For write + // latency of 2, the extra register stage is taken out. + generate + if (WR_LATENCY > 2) begin + always @(posedge clk90) begin + // assert wdf rden only for non calibration opertations + wdf_rden_90_r1 <= wdf_rden_90_r & + phy_init_data_sel; + // rden for calibration + calib_rden_90_r <= wdf_rden_90_r; + end + end else begin + always @(*) begin + wdf_rden_90_r1 = wdf_rden_90_r + & phy_init_data_sel; + calib_rden_90_r = wdf_rden_90_r; + end + end // else: !if(WR_LATENCY > 2) + endgenerate + + // dm CE signal to stop dm oscilation + always @(negedge clk90)begin + dm_ce_r <= dm_ce_0; + dm_ce <= dm_ce_r; + end + + // When in ECC mode the upper byte [71:64] will have the + // ECC parity. Mapping the bytes which have valid data + // to the upper byte in ecc mode. Also in ecc mode there + // is an extra register stage to account for timing. + + genvar mask_i; + generate + if(ECC_ENABLE) begin + for (mask_i = 0; mask_i < (2*DQ_WIDTH)/72; + mask_i = mask_i+1) begin: gen_mask + assign wdf_ecc_mask[((mask_i*9)+9)-1:(mask_i*9)] = + {&wdf_mask_data[(mask_i*8)+(7+mask_i):mask_i*9], + wdf_mask_data[(mask_i*8)+(7+mask_i):mask_i*9]}; + end + end + endgenerate + + generate + if (ECC_ENABLE) begin:gen_ecc_reg + always @(posedge clk90)begin + if(phy_init_data_sel) + wdf_mask_r <= wdf_ecc_mask; + else + wdf_mask_r <= {(2*DQ_WIDTH/8){1'b0}}; + end + end else begin + always@(posedge clk90) begin + if (phy_init_data_sel) + wdf_mask_r <= wdf_mask_data; + else + wdf_mask_r <= {(2*DQ_WIDTH/8){1'b0}}; + end + end + endgenerate + + always @(posedge clk90) begin + if(phy_init_data_sel) + wdf_data_r <= wdf_data; + else + wdf_data_r <={init_data_f,init_data_r}; + end + + // Error generation block during simulation. + // Error will be displayed when all the DM + // bits are not zero. The error will be + // displayed only during the start of the sequence + // for errors that are continous over many cycles. + generate + if (ECC_ENABLE) begin: gen_ecc_error + always @(posedge clk90) begin + //synthesis translate_off + wdf_mask_r1 <= wdf_mask_r; + if(DQ_WIDTH > 72) + ecc_dm_error_r + <= ( + (~wdf_mask_r1[35] && (|wdf_mask_r1[34:27])) || + (~wdf_mask_r1[26] && (|wdf_mask_r1[25:18])) || + (~wdf_mask_r1[17] && (|wdf_mask_r1[16:9])) || + (~wdf_mask_r1[8] && (|wdf_mask_r1[7:0]))) && phy_init_data_sel; + else + ecc_dm_error_r + <= ((~wdf_mask_r1[17] && (|wdf_mask_r1[16:9])) || + (~wdf_mask_r1[8] && (|wdf_mask_r1[7:0]))) && phy_init_data_sel; + ecc_dm_error_r1 <= ecc_dm_error_r ; + if (ecc_dm_error_r && ~ecc_dm_error_r1) // assert the error only once. + $display ("ECC DM ERROR. "); + //synthesis translate_on + end + end + endgenerate + + //*************************************************************************** + // State logic to write calibration training patterns + //*************************************************************************** + + always @(posedge clk90) begin + if (rst90_r) begin + init_wdf_cnt_r <= 4'd0; + init_data_r <= {64{1'bx}}; + init_data_f <= {64{1'bx}}; + end else begin + init_wdf_cnt_r <= init_wdf_cnt_r + calib_rden_90_r; + casex (init_wdf_cnt_r) + // First stage calibration. Pattern (rise/fall) = 1(r)->0(f) + // The rise data and fall data are already interleaved in the manner + // required for data into the WDF write FIFO + 4'b00xx: begin + init_data_r <= {DQ_WIDTH{1'b1}}; + init_data_f <= {DQ_WIDTH{1'b0}}; + end + // Second stage calibration. Pattern = 1(r)->1(f)->0(r)->0(f) + 4'b01x0: begin + init_data_r <= {DQ_WIDTH{1'b1}}; + init_data_f <= {DQ_WIDTH{1'b1}}; + end + 4'b01x1: begin + init_data_r <= {DQ_WIDTH{1'b0}}; + init_data_f <= {DQ_WIDTH{1'b0}}; + end + // MIG 2.1: Changed Stage 3/4 training pattern + // Third and fourth stage calibration patern = + // 11(r)->ee(f)->ee(r)->11(f)-11(r)->ee(f)->ee(r)->11(f) + 4'b1000: begin + init_data_r <= {DQ_WIDTH/4{4'h1}}; + init_data_f <= {DQ_WIDTH/4{4'hE}}; + end + 4'b1001: begin + init_data_r <= {DQ_WIDTH/4{4'hE}}; + init_data_f <= {DQ_WIDTH/4{4'h1}}; + end + 4'b1010: begin + init_data_r <= {(DQ_WIDTH/4){4'h1}}; + init_data_f <= {(DQ_WIDTH/4){4'hE}}; + end + 4'b1011: begin + init_data_r <= {(DQ_WIDTH/4){4'hE}}; + init_data_f <= {(DQ_WIDTH/4){4'h1}}; + end + default: begin + init_data_f <= {(2*DQ_WIDTH){1'bx}}; + init_data_r <= {(2*DQ_WIDTH){1'bx}}; + end + endcase + end + end + + //*************************************************************************** + + always @(posedge clk90) + dq_oe_n <= dq_oe_n_90_r1; + + always @(negedge clk0) + dqs_oe_n <= dqs_oe_n_180_r1; + + always @(negedge clk0) + dqs_rst_n <= dqs_rst_n_180_r1; + + // generate for odt. odt is asserted based on + // write latency. For write latency of 2 + // the extra register stage is taken out. + generate + if (ODT_WR_LATENCY > 2) begin + always @(posedge clk0) begin + odt <= 'b0; + odt[0] <= odt_0; + end + end else begin + always @ (*) begin + odt = 'b0; + odt[0] = odt_0; + end + end + endgenerate + + assign wdf_rden = wdf_rden_90_r1; + + //*************************************************************************** + // Format write data/mask: Data is in format: {fall, rise} + //*************************************************************************** + + assign wr_data_rise = wdf_data_r[DQ_WIDTH-1:0]; + assign wr_data_fall = wdf_data_r[(2*DQ_WIDTH)-1:DQ_WIDTH]; + assign mask_data_rise = wdf_mask_r[MASK_WIDTH-1:0]; + assign mask_data_fall = wdf_mask_r[(2*MASK_WIDTH)-1:MASK_WIDTH]; + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_sdram.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_sdram.v new file mode 100644 index 0000000..3234480 --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_sdram.v @@ -0,0 +1,624 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_sdram.v +// /___/ /\ Date Last Modified: $Date: 2008/07/09 12:33:12 $ +// \ \ / \ Date Created: Wed Aug 16 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +// Top-level module. Simple model for what the user might use +// Typically, the user will only instantiate MEM_INTERFACE_TOP in their +// code, and generate all backend logic (test bench) separately. +// In addition to the memory controller, the module instantiates: +// 1. Clock generation/distribution, reset logic +// 2. IDELAY control block +//Reference: +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +(* X_CORE_INFO = "mig_v2_3_ddr2_sdram_v5, Coregen 10.1.02" , CORE_GENERATION_INFO = "ddr2_sdram_v5,mig_v2_3,{component_name=ddr2_sdram, BANK_WIDTH=2, CKE_WIDTH=1, CLK_WIDTH=2, COL_WIDTH=10, CS_NUM=1, CS_WIDTH=1, DM_WIDTH=8, DQ_WIDTH=64, DQ_PER_DQS=8, DQS_WIDTH=8, ODT_WIDTH=1, ROW_WIDTH=13, ADDITIVE_LAT=0, BURST_LEN=4, BURST_TYPE=0, CAS_LAT=4, ECC_ENABLE=0, MULTI_BANK_EN=1, TWO_T_TIME_EN=1, ODT_TYPE=1, REDUCE_DRV=0, REG_ENABLE=0, TREFI_NS=7800, TRAS=40000, TRCD=15000, TRFC=127500, TRP=15000, TRTP=7500, TWR=15000, TWTR=7500, DDR2_CLK_PERIOD=5000, RST_ACT_LOW=1}" *) +module ddr2_sdram # + ( + parameter BANK_WIDTH = 2, + // # of memory bank addr bits. + parameter CKE_WIDTH = 1, + // # of memory clock enable outputs. + parameter CLK_WIDTH = 2, + // # of clock outputs. + parameter COL_WIDTH = 10, + // # of memory column bits. + parameter CS_NUM = 1, + // # of separate memory chip selects. + parameter CS_WIDTH = 1, + // # of total memory chip selects. + parameter CS_BITS = 0, + // set to log2(CS_NUM) (rounded up). + parameter DM_WIDTH = 8, + // # of data mask bits. + parameter DQ_WIDTH = 64, + // # of data width. + parameter DQ_PER_DQS = 8, + // # of DQ data bits per strobe. + parameter DQS_WIDTH = 8, + // # of DQS strobes. + parameter DQ_BITS = 6, + // set to log2(DQS_WIDTH*DQ_PER_DQS). + parameter DQS_BITS = 3, + // set to log2(DQS_WIDTH). + parameter ODT_WIDTH = 1, + // # of memory on-die term enables. + parameter ROW_WIDTH = 13, + // # of memory row and # of addr bits. + parameter ADDITIVE_LAT = 0, + // additive write latency. + parameter BURST_LEN = 4, + // burst length (in double words). + parameter BURST_TYPE = 0, + // burst type (=0 seq; =1 interleaved). + parameter CAS_LAT = 4, + // CAS latency. + parameter ECC_ENABLE = 0, + // enable ECC (=1 enable). + parameter APPDATA_WIDTH = 128, + // # of usr read/write data bus bits. + parameter MULTI_BANK_EN = 1, + // Keeps multiple banks open. (= 1 enable). + parameter TWO_T_TIME_EN = 1, + // 2t timing for unbuffered dimms. + parameter ODT_TYPE = 1, + // ODT (=0(none),=1(75),=2(150),=3(50)). + parameter REDUCE_DRV = 0, + // reduced strength mem I/O (=1 yes). + parameter REG_ENABLE = 0, + // registered addr/ctrl (=1 yes). + parameter TREFI_NS = 7800, + // auto refresh interval (ns). + parameter TRAS = 40000, + // active->precharge delay. + parameter TRCD = 15000, + // active->read/write delay. + parameter TRFC = 127500, + // refresh->refresh, refresh->active delay. + parameter TRP = 15000, + // precharge->command delay. + parameter TRTP = 7500, + // read->precharge delay. + parameter TWR = 15000, + // used to determine write->precharge. + parameter TWTR = 7500, + // write->read delay. + parameter HIGH_PERFORMANCE_MODE = "TRUE", + // # = TRUE, the IODELAY performance mode is set + // to high. + // # = FALSE, the IODELAY performance mode is set + // to low. + parameter SIM_ONLY = 0, + // = 1 to skip SDRAM power up delay. + parameter DEBUG_EN = 0, + // Enable debug signals/controls. + // When this parameter is changed from 0 to 1, + // make sure to uncomment the coregen commands + // in ise_flow.bat or create_ise.bat files in + // par folder. + parameter CLK_PERIOD = 5000, + // Core/Memory clock period (in ps). + parameter DQS_IO_COL = 16'b0000000000000000, + // I/O column location of DQS groups + // (=0, left; =1 center, =2 right). + + //parameter DQ_IO_MS = 64'b10100101_10100101_10100101_10100101_10100101_10100101_10100101_10100101, + parameter DQ_IO_MS = 64'b01110101_00111101_00001111_00011110_00101110_11000011_11000001_10111100, + // Master/Slave location of DQ I/O (=0 slave). + parameter CLK_TYPE = "SINGLE_ENDED", + // # = "DIFFERENTIAL " ->; Differential input clocks , + // # = "SINGLE_ENDED" -> Single ended input clocks. + parameter DLL_FREQ_MODE = "HIGH", + // DCM Frequency range. + parameter RST_ACT_LOW = 1 + // =1 for active low reset, =0 for active high. + ) + ( + inout [DQ_WIDTH-1:0] ddr2_dq, + output [ROW_WIDTH-1:0] ddr2_a, + output [BANK_WIDTH-1:0] ddr2_ba, + output ddr2_ras_n, + output ddr2_cas_n, + output ddr2_we_n, + output [CS_WIDTH-1:0] ddr2_cs_n, + output [ODT_WIDTH-1:0] ddr2_odt, + output [CKE_WIDTH-1:0] ddr2_cke, + output [DM_WIDTH-1:0] ddr2_dm, + input sys_clk, + input idly_clk_200, + input sys_rst_n, + output phy_init_done, + output rst0_tb, + output clk0_tb, + output app_wdf_afull, + output app_af_afull, + output rd_data_valid, + input app_wdf_wren, + input app_af_wren, + input [30:0] app_af_addr, + input [2:0] app_af_cmd, + output [(APPDATA_WIDTH)-1:0] rd_data_fifo_out, + input [(APPDATA_WIDTH)-1:0] app_wdf_data, + input [(APPDATA_WIDTH/8)-1:0] app_wdf_mask_data, + inout [DQS_WIDTH-1:0] ddr2_dqs, + inout [DQS_WIDTH-1:0] ddr2_dqs_n, + output [CLK_WIDTH-1:0] ddr2_ck, + output [CLK_WIDTH-1:0] ddr2_ck_n + ); + + ///////////////////////////////////////////////////////////////////////////// + // The following parameter "IDELAYCTRL_NUM" indicates the number of + // IDELAYCTRLs that are LOCed for the design. The IDELAYCTRL LOCs are + // provided in the UCF file of par folder. MIG provides the parameter value + // and the LOCs in the UCF file based on the selected Data Read banks for + // the design. You must not alter this value unless it is needed. If you + // modify this value, you should make sure that the value of "IDELAYCTRL_NUM" + // and IDELAYCTRL LOCs in UCF file are same and are relavent to the Data Read + // banks used. + ///////////////////////////////////////////////////////////////////////////// + + localparam IDELAYCTRL_NUM = 3; + + + + + + wire sys_clk_p; + wire sys_clk_n; + wire clk200_p; + wire clk200_n; + wire rst0; + wire rst90; + wire rstdiv0; + wire rst200; + wire clk0; + wire clk90; + wire clkdiv0; + wire clk200; + wire idelay_ctrl_rdy; + + + //Debug signals + + + wire [3:0] dbg_calib_done; + wire [3:0] dbg_calib_err; + wire [(6*DQ_WIDTH)-1:0] dbg_calib_dq_tap_cnt; + wire [(6*DQS_WIDTH)-1:0] dbg_calib_dqs_tap_cnt; + wire [(6*DQS_WIDTH)-1:0] dbg_calib_gate_tap_cnt; + wire [DQS_WIDTH-1:0] dbg_calib_rd_data_sel; + wire [(5*DQS_WIDTH)-1:0] dbg_calib_rden_dly; + wire [(5*DQS_WIDTH)-1:0] dbg_calib_gate_dly; + wire dbg_idel_up_all; + wire dbg_idel_down_all; + wire dbg_idel_up_dq; + wire dbg_idel_down_dq; + wire dbg_idel_up_dqs; + wire dbg_idel_down_dqs; + wire dbg_idel_up_gate; + wire dbg_idel_down_gate; + wire [DQ_BITS-1:0] dbg_sel_idel_dq; + wire dbg_sel_all_idel_dq; + wire [DQS_BITS:0] dbg_sel_idel_dqs; + wire dbg_sel_all_idel_dqs; + wire [DQS_BITS:0] dbg_sel_idel_gate; + wire dbg_sel_all_idel_gate; + + + // Debug signals (optional use) + + //*********************************** + // PHY Debug Port demo + //*********************************** + wire [35:0] cs_control0; + wire [35:0] cs_control1; + wire [35:0] cs_control2; + wire [35:0] cs_control3; + wire [191:0] vio0_in; + wire [95:0] vio1_in; + wire [99:0] vio2_in; + wire [31:0] vio3_out; + + + + //*************************************************************************** + + assign rst0_tb = rst0; + assign clk0_tb = clk0; + assign sys_clk_p = 1'b1; + assign sys_clk_n = 1'b0; + assign clk200_p = 1'b1; + assign clk200_n = 1'b0; + + ddr2_idelay_ctrl # + ( + .IDELAYCTRL_NUM (IDELAYCTRL_NUM) + ) + u_ddr2_idelay_ctrl + ( + .rst200 (rst200), + .clk200 (clk200), + .idelay_ctrl_rdy (idelay_ctrl_rdy) + ); + + ddr2_infrastructure # + ( + .CLK_PERIOD (CLK_PERIOD), + .CLK_TYPE (CLK_TYPE), + .DLL_FREQ_MODE (DLL_FREQ_MODE), + .RST_ACT_LOW (RST_ACT_LOW) + ) +u_ddr2_infrastructure + ( + .sys_clk_p (sys_clk_p), + .sys_clk_n (sys_clk_n), + .sys_clk (sys_clk), + .clk200_p (clk200_p), + .clk200_n (clk200_n), + .idly_clk_200 (idly_clk_200), + .sys_rst_n (sys_rst_n), + .rst0 (rst0), + .rst90 (rst90), + .rstdiv0 (rstdiv0), + .rst200 (rst200), + .clk0 (clk0), + .clk90 (clk90), + .clkdiv0 (clkdiv0), + .clk200 (clk200), + .idelay_ctrl_rdy (idelay_ctrl_rdy) + ); + + ddr2_top # + ( + .BANK_WIDTH (BANK_WIDTH), + .CKE_WIDTH (CKE_WIDTH), + .CLK_WIDTH (CLK_WIDTH), + .COL_WIDTH (COL_WIDTH), + .CS_NUM (CS_NUM), + .CS_WIDTH (CS_WIDTH), + .CS_BITS (CS_BITS), + .DM_WIDTH (DM_WIDTH), + .DQ_WIDTH (DQ_WIDTH), + .DQ_PER_DQS (DQ_PER_DQS), + .DQS_WIDTH (DQS_WIDTH), + .DQ_BITS (DQ_BITS), + .DQS_BITS (DQS_BITS), + .ODT_WIDTH (ODT_WIDTH), + .ROW_WIDTH (ROW_WIDTH), + .ADDITIVE_LAT (ADDITIVE_LAT), + .BURST_LEN (BURST_LEN), + .BURST_TYPE (BURST_TYPE), + .CAS_LAT (CAS_LAT), + .ECC_ENABLE (ECC_ENABLE), + .APPDATA_WIDTH (APPDATA_WIDTH), + .MULTI_BANK_EN (MULTI_BANK_EN), + .TWO_T_TIME_EN (TWO_T_TIME_EN), + .ODT_TYPE (ODT_TYPE), + .REDUCE_DRV (REDUCE_DRV), + .REG_ENABLE (REG_ENABLE), + .TREFI_NS (TREFI_NS), + .TRAS (TRAS), + .TRCD (TRCD), + .TRFC (TRFC), + .TRP (TRP), + .TRTP (TRTP), + .TWR (TWR), + .TWTR (TWTR), + .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE), + .SIM_ONLY (SIM_ONLY), + .DEBUG_EN (DEBUG_EN), + .CLK_PERIOD (CLK_PERIOD), + .DQS_IO_COL (DQS_IO_COL), + .DQ_IO_MS (DQ_IO_MS), + .USE_DM_PORT (1) + ) +u_ddr2_top_0 +( + .ddr2_dq (ddr2_dq), + .ddr2_a (ddr2_a), + .ddr2_ba (ddr2_ba), + .ddr2_ras_n (ddr2_ras_n), + .ddr2_cas_n (ddr2_cas_n), + .ddr2_we_n (ddr2_we_n), + .ddr2_cs_n (ddr2_cs_n), + .ddr2_odt (ddr2_odt), + .ddr2_cke (ddr2_cke), + .ddr2_dm (ddr2_dm), + .phy_init_done (phy_init_done), + .rst0 (rst0), + .rst90 (rst90), + .rstdiv0 (rstdiv0), + .clk0 (clk0), + .clk90 (clk90), + .clkdiv0 (clkdiv0), + .app_wdf_afull (app_wdf_afull), + .app_af_afull (app_af_afull), + .rd_data_valid (rd_data_valid), + .app_wdf_wren (app_wdf_wren), + .app_af_wren (app_af_wren), + .app_af_addr (app_af_addr), + .app_af_cmd (app_af_cmd), + .rd_data_fifo_out (rd_data_fifo_out), + .app_wdf_data (app_wdf_data), + .app_wdf_mask_data (app_wdf_mask_data), + .ddr2_dqs (ddr2_dqs), + .ddr2_dqs_n (ddr2_dqs_n), + .ddr2_ck (ddr2_ck), + .rd_ecc_error (), + .ddr2_ck_n (ddr2_ck_n), + + .dbg_calib_done (dbg_calib_done), + .dbg_calib_err (dbg_calib_err), + .dbg_calib_dq_tap_cnt (dbg_calib_dq_tap_cnt), + .dbg_calib_dqs_tap_cnt (dbg_calib_dqs_tap_cnt), + .dbg_calib_gate_tap_cnt (dbg_calib_gate_tap_cnt), + .dbg_calib_rd_data_sel (dbg_calib_rd_data_sel), + .dbg_calib_rden_dly (dbg_calib_rden_dly), + .dbg_calib_gate_dly (dbg_calib_gate_dly), + .dbg_idel_up_all (dbg_idel_up_all), + .dbg_idel_down_all (dbg_idel_down_all), + .dbg_idel_up_dq (dbg_idel_up_dq), + .dbg_idel_down_dq (dbg_idel_down_dq), + .dbg_idel_up_dqs (dbg_idel_up_dqs), + .dbg_idel_down_dqs (dbg_idel_down_dqs), + .dbg_idel_up_gate (dbg_idel_up_gate), + .dbg_idel_down_gate (dbg_idel_down_gate), + .dbg_sel_idel_dq (dbg_sel_idel_dq), + .dbg_sel_all_idel_dq (dbg_sel_all_idel_dq), + .dbg_sel_idel_dqs (dbg_sel_idel_dqs), + .dbg_sel_all_idel_dqs (dbg_sel_all_idel_dqs), + .dbg_sel_idel_gate (dbg_sel_idel_gate), + .dbg_sel_all_idel_gate (dbg_sel_all_idel_gate) + ); + + + //***************************************************************** + // Hooks to prevent sim/syn compilation errors (mainly for VHDL - but + // keep it also in Verilog version of code) w/ floating inputs if + // DEBUG_EN = 0. + //***************************************************************** + + generate + if (DEBUG_EN == 0) begin: gen_dbg_tie_off + assign dbg_idel_up_all = 'b0; + assign dbg_idel_down_all = 'b0; + assign dbg_idel_up_dq = 'b0; + assign dbg_idel_down_dq = 'b0; + assign dbg_idel_up_dqs = 'b0; + assign dbg_idel_down_dqs = 'b0; + assign dbg_idel_up_gate = 'b0; + assign dbg_idel_down_gate = 'b0; + assign dbg_sel_idel_dq = 'b0; + assign dbg_sel_all_idel_dq = 'b0; + assign dbg_sel_idel_dqs = 'b0; + assign dbg_sel_all_idel_dqs = 'b0; + assign dbg_sel_idel_gate = 'b0; + assign dbg_sel_all_idel_gate = 'b0; + end else begin: gen_dbg_enable + + //***************************************************************** + // PHY Debug Port example - see MIG User's Guide, XAPP858 or + // Answer Record 29443 + // This logic supports up to 32 DQ and 8 DQS I/O + // NOTES: + // 1. PHY Debug Port demo connects to 4 VIO modules: + // - 3 VIO modules with only asynchronous inputs + // * Monitor IDELAY taps for DQ, DQS, DQS Gate + // * Calibration status + // - 1 VIO module with synchronous outputs + // * Allow dynamic adjustment o f IDELAY taps + // 2. User may need to modify this code to incorporate other + // chipscope-related modules in their larger design (e.g. + // if they have other ILA/VIO modules, they will need to + // for example instantiate a larger ICON module). In addition + // user may want to instantiate more VIO modules to control + // IDELAY for more DQ, DQS than is shown here + //***************************************************************** + + icon4 u_icon + ( + .control0 (cs_control0), + .control1 (cs_control1), + .control2 (cs_control2), + .control3 (cs_control3) + ); + + //***************************************************************** + // VIO ASYNC input: Display current IDELAY setting for up to 32 + // DQ taps (32x6) = 192 + //***************************************************************** + + vio_async_in192 u_vio0 + ( + .control (cs_control0), + .async_in (vio0_in) + ); + + //***************************************************************** + // VIO ASYNC input: Display current IDELAY setting for up to 8 DQS + // and DQS Gate taps (8x6x2) = 96 + //***************************************************************** + + vio_async_in96 u_vio1 + ( + .control (cs_control1), + .async_in (vio1_in) + ); + + //***************************************************************** + // VIO ASYNC input: Display other calibration results + //***************************************************************** + + vio_async_in100 u_vio2 + ( + .control (cs_control2), + .async_in (vio2_in) + ); + + //***************************************************************** + // VIO SYNC output: Dynamically change IDELAY taps + //***************************************************************** + + vio_sync_out32 u_vio3 + ( + .control (cs_control3), + .clk (clkdiv0), + .sync_out (vio3_out) + ); + + //***************************************************************** + // Bit assignments: + // NOTE: Not all VIO, ILA inputs/outputs may be used - these will + // be dependent on the user's particular bit width + //***************************************************************** + + if (DQ_WIDTH <= 32) begin: gen_dq_le_32 + assign vio0_in[(6*DQ_WIDTH)-1:0] + = dbg_calib_dq_tap_cnt[(6*DQ_WIDTH)-1:0]; + end else begin: gen_dq_gt_32 + assign vio0_in = dbg_calib_dq_tap_cnt[191:0]; + end + + if (DQS_WIDTH <= 8) begin: gen_dqs_le_8 + assign vio1_in[(6*DQS_WIDTH)-1:0] + = dbg_calib_dqs_tap_cnt[(6*DQS_WIDTH)-1:0]; + assign vio1_in[(12*DQS_WIDTH)-1:(6*DQS_WIDTH)] + = dbg_calib_gate_tap_cnt[(6*DQS_WIDTH)-1:0]; + end else begin: gen_dqs_gt_32 + assign vio1_in[47:0] = dbg_calib_dqs_tap_cnt[47:0]; + assign vio1_in[95:48] = dbg_calib_gate_tap_cnt[47:0]; + end + +//dbg_calib_rd_data_sel + + if (DQS_WIDTH <= 8) begin: gen_rdsel_le_8 + assign vio2_in[(DQS_WIDTH)+7:8] + = dbg_calib_rd_data_sel[(DQS_WIDTH)-1:0]; + end else begin: gen_rdsel_gt_32 + assign vio2_in[15:8] + = dbg_calib_rd_data_sel[7:0]; + end + +//dbg_calib_rden_dly + + if (DQS_WIDTH <= 8) begin: gen_calrd_le_8 + assign vio2_in[(5*DQS_WIDTH)+19:20] + = dbg_calib_rden_dly[(5*DQS_WIDTH)-1:0]; + end else begin: gen_calrd_gt_32 + assign vio2_in[59:20] + = dbg_calib_rden_dly[39:0]; + end + +//dbg_calib_gate_dly + + if (DQS_WIDTH <= 8) begin: gen_calgt_le_8 + assign vio2_in[(5*DQS_WIDTH)+59:60] + = dbg_calib_gate_dly[(5*DQS_WIDTH)-1:0]; + end else begin: gen_calgt_gt_32 + assign vio2_in[99:60] + = dbg_calib_gate_dly[39:0]; + end + +//dbg_sel_idel_dq + + if (DQ_BITS <= 5) begin: gen_selid_le_5 + assign dbg_sel_idel_dq[DQ_BITS-1:0] + = vio3_out[DQ_BITS+7:8]; + end else begin: gen_selid_gt_32 + assign dbg_sel_idel_dq[4:0] + = vio3_out[12:8]; + end + +//dbg_sel_idel_dqs + + if (DQS_BITS <= 3) begin: gen_seldqs_le_3 + assign dbg_sel_idel_dqs[DQS_BITS:0] + = vio3_out[(DQS_BITS+16):16]; + end else begin: gen_seldqs_gt_32 + assign dbg_sel_idel_dqs[3:0] + = vio3_out[19:16]; + end + +//dbg_sel_idel_gate + + if (DQS_BITS <= 3) begin: gen_gtdqs_le_3 + assign dbg_sel_idel_gate[DQS_BITS:0] + = vio3_out[(DQS_BITS+21):21]; + end else begin: gen_gtdqs_gt_32 + assign dbg_sel_idel_gate[3:0] + = vio3_out[24:21]; + end + + + assign vio2_in[3:0] = dbg_calib_done; + assign vio2_in[7:4] = dbg_calib_err; + + assign dbg_idel_up_all = vio3_out[0]; + assign dbg_idel_down_all = vio3_out[1]; + assign dbg_idel_up_dq = vio3_out[2]; + assign dbg_idel_down_dq = vio3_out[3]; + assign dbg_idel_up_dqs = vio3_out[4]; + assign dbg_idel_down_dqs = vio3_out[5]; + assign dbg_idel_up_gate = vio3_out[6]; + assign dbg_idel_down_gate = vio3_out[7]; + assign dbg_sel_all_idel_dq = vio3_out[15]; + assign dbg_sel_all_idel_dqs = vio3_out[20]; + assign dbg_sel_all_idel_gate = vio3_out[25]; + end + endgenerate + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_top.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_top.v new file mode 100644 index 0000000..000d06c --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_top.v @@ -0,0 +1,277 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_top.v +// /___/ /\ Date Last Modified: $Date: 2008/07/29 15:24:03 $ +// \ \ / \ Date Created: Wed Aug 16 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +// System level module. This level contains just the memory controller. +// This level will be intiantated when the user wants to remove the +// synthesizable test bench, IDELAY control block and the clock +// generation modules. +//Reference: +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_top # + ( + // Following parameters are for 72-bit RDIMM design (for ML561 Reference + // board design). Actual values may be different. Actual parameters values + // are passed from design top module ddr2_sdram module. Please refer to + // the ddr2_sdram module for actual values. + parameter BANK_WIDTH = 2, // # of memory bank addr bits + parameter CKE_WIDTH = 1, // # of memory clock enable outputs + parameter CLK_WIDTH = 1, // # of clock outputs + parameter COL_WIDTH = 10, // # of memory column bits + parameter CS_NUM = 1, // # of separate memory chip selects + parameter CS_BITS = 0, // set to log2(CS_NUM) (rounded up) + parameter CS_WIDTH = 1, // # of total memory chip selects + parameter USE_DM_PORT = 1, // enable Data Mask (=1 enable) + parameter DM_WIDTH = 9, // # of data mask bits + parameter DQ_WIDTH = 72, // # of data width + parameter DQ_BITS = 7, // set to log2(DQS_WIDTH*DQ_PER_DQS) + parameter DQ_PER_DQS = 8, // # of DQ data bits per strobe + parameter DQS_WIDTH = 9, // # of DQS strobes + parameter DQS_BITS = 4, // set to log2(DQS_WIDTH) + parameter HIGH_PERFORMANCE_MODE = "TRUE", // IODELAY Performance Mode + parameter ODT_WIDTH = 1, // # of memory on-die term enables + parameter ROW_WIDTH = 14, // # of memory row & # of addr bits + parameter APPDATA_WIDTH = 144, // # of usr read/write data bus bits + parameter ADDITIVE_LAT = 0, // additive write latency + parameter BURST_LEN = 4, // burst length (in double words) + parameter BURST_TYPE = 0, // burst type (=0 seq; =1 interlved) + parameter CAS_LAT = 5, // CAS latency + parameter ECC_ENABLE = 0, // enable ECC (=1 enable) + parameter ODT_TYPE = 1, // ODT (=0(none),=1(75),=2(150),=3(50)) + parameter MULTI_BANK_EN = 1, // enable bank management + parameter TWO_T_TIME_EN = 0, // 2t timing for unbuffered dimms + parameter REDUCE_DRV = 0, // reduced strength mem I/O (=1 yes) + parameter REG_ENABLE = 1, // registered addr/ctrl (=1 yes) + parameter TREFI_NS = 7800, // auto refresh interval (ns) + parameter TRAS = 40000, // active->precharge delay + parameter TRCD = 15000, // active->read/write delay + parameter TRFC = 105000, // ref->ref, ref->active delay + parameter TRP = 15000, // precharge->command delay + parameter TRTP = 7500, // read->precharge delay + parameter TWR = 15000, // used to determine wr->prech + parameter TWTR = 10000, // write->read delay + parameter CLK_PERIOD = 3000, // Core/Mem clk period (in ps) + parameter SIM_ONLY = 0, // = 1 to skip power up delay + parameter DEBUG_EN = 0, // Enable debug signals/controls + parameter DQS_IO_COL = 0, // I/O column location of DQS groups + parameter DQ_IO_MS = 0 // Master/Slave location of DQ I/O + ) + ( + input clk0, + input clk90, + input clkdiv0, + input rst0, + input rst90, + input rstdiv0, + input [2:0] app_af_cmd, + input [30:0] app_af_addr, + input app_af_wren, + input app_wdf_wren, + input [APPDATA_WIDTH-1:0] app_wdf_data, + input [(APPDATA_WIDTH/8)-1:0] app_wdf_mask_data, + output app_af_afull, + output app_wdf_afull, + output rd_data_valid, + output [APPDATA_WIDTH-1:0] rd_data_fifo_out, + output [1:0] rd_ecc_error, + output phy_init_done, + output [CLK_WIDTH-1:0] ddr2_ck, + output [CLK_WIDTH-1:0] ddr2_ck_n, + output [ROW_WIDTH-1:0] ddr2_a, + output [BANK_WIDTH-1:0] ddr2_ba, + output ddr2_ras_n, + output ddr2_cas_n, + output ddr2_we_n, + output [CS_WIDTH-1:0] ddr2_cs_n, + output [CKE_WIDTH-1:0] ddr2_cke, + output [ODT_WIDTH-1:0] ddr2_odt, + output [DM_WIDTH-1:0] ddr2_dm, + inout [DQS_WIDTH-1:0] ddr2_dqs, + inout [DQS_WIDTH-1:0] ddr2_dqs_n, + inout [DQ_WIDTH-1:0] ddr2_dq, + // Debug signals (optional use) + input dbg_idel_up_all, + input dbg_idel_down_all, + input dbg_idel_up_dq, + input dbg_idel_down_dq, + input dbg_idel_up_dqs, + input dbg_idel_down_dqs, + input dbg_idel_up_gate, + input dbg_idel_down_gate, + input [DQ_BITS-1:0] dbg_sel_idel_dq, + input dbg_sel_all_idel_dq, + input [DQS_BITS:0] dbg_sel_idel_dqs, + input dbg_sel_all_idel_dqs, + input [DQS_BITS:0] dbg_sel_idel_gate, + input dbg_sel_all_idel_gate, + output [3:0] dbg_calib_done, + output [3:0] dbg_calib_err, + output [(6*DQ_WIDTH)-1:0] dbg_calib_dq_tap_cnt, + output [(6*DQS_WIDTH)-1:0] dbg_calib_dqs_tap_cnt, + output [(6*DQS_WIDTH)-1:0] dbg_calib_gate_tap_cnt, + output [DQS_WIDTH-1:0] dbg_calib_rd_data_sel, + output [(5*DQS_WIDTH)-1:0] dbg_calib_rden_dly, + output [(5*DQS_WIDTH)-1:0] dbg_calib_gate_dly + ); + + // memory initialization/control logic + ddr2_mem_if_top # + ( + .BANK_WIDTH (BANK_WIDTH), + .CKE_WIDTH (CKE_WIDTH), + .CLK_WIDTH (CLK_WIDTH), + .COL_WIDTH (COL_WIDTH), + .CS_BITS (CS_BITS), + .CS_NUM (CS_NUM), + .CS_WIDTH (CS_WIDTH), + .USE_DM_PORT (USE_DM_PORT), + .DM_WIDTH (DM_WIDTH), + .DQ_WIDTH (DQ_WIDTH), + .DQ_BITS (DQ_BITS), + .DQ_PER_DQS (DQ_PER_DQS), + .DQS_BITS (DQS_BITS), + .DQS_WIDTH (DQS_WIDTH), + .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE), + .ODT_WIDTH (ODT_WIDTH), + .ROW_WIDTH (ROW_WIDTH), + .APPDATA_WIDTH (APPDATA_WIDTH), + .ADDITIVE_LAT (ADDITIVE_LAT), + .BURST_LEN (BURST_LEN), + .BURST_TYPE (BURST_TYPE), + .CAS_LAT (CAS_LAT), + .ECC_ENABLE (ECC_ENABLE), + .MULTI_BANK_EN (MULTI_BANK_EN), + .TWO_T_TIME_EN (TWO_T_TIME_EN), + .ODT_TYPE (ODT_TYPE), + .DDR_TYPE (1), + .REDUCE_DRV (REDUCE_DRV), + .REG_ENABLE (REG_ENABLE), + .TREFI_NS (TREFI_NS), + .TRAS (TRAS), + .TRCD (TRCD), + .TRFC (TRFC), + .TRP (TRP), + .TRTP (TRTP), + .TWR (TWR), + .TWTR (TWTR), + .CLK_PERIOD (CLK_PERIOD), + .SIM_ONLY (SIM_ONLY), + .DEBUG_EN (DEBUG_EN), + .DQS_IO_COL (DQS_IO_COL), + .DQ_IO_MS (DQ_IO_MS) + ) + u_mem_if_top + ( + .clk0 (clk0), + .clk90 (clk90), + .clkdiv0 (clkdiv0), + .rst0 (rst0), + .rst90 (rst90), + .rstdiv0 (rstdiv0), + .app_af_cmd (app_af_cmd), + .app_af_addr (app_af_addr), + .app_af_wren (app_af_wren), + .app_wdf_wren (app_wdf_wren), + .app_wdf_data (app_wdf_data), + .app_wdf_mask_data (app_wdf_mask_data), + .app_af_afull (app_af_afull), + .app_wdf_afull (app_wdf_afull), + .rd_data_valid (rd_data_valid), + .rd_data_fifo_out (rd_data_fifo_out), + .rd_ecc_error (rd_ecc_error), + .phy_init_done (phy_init_done), + .ddr_ck (ddr2_ck), + .ddr_ck_n (ddr2_ck_n), + .ddr_addr (ddr2_a), + .ddr_ba (ddr2_ba), + .ddr_ras_n (ddr2_ras_n), + .ddr_cas_n (ddr2_cas_n), + .ddr_we_n (ddr2_we_n), + .ddr_cs_n (ddr2_cs_n), + .ddr_cke (ddr2_cke), + .ddr_odt (ddr2_odt), + .ddr_dm (ddr2_dm), + .ddr_dqs (ddr2_dqs), + .ddr_dqs_n (ddr2_dqs_n), + .ddr_dq (ddr2_dq), + .dbg_idel_up_all (dbg_idel_up_all), + .dbg_idel_down_all (dbg_idel_down_all), + .dbg_idel_up_dq (dbg_idel_up_dq), + .dbg_idel_down_dq (dbg_idel_down_dq), + .dbg_idel_up_dqs (dbg_idel_up_dqs), + .dbg_idel_down_dqs (dbg_idel_down_dqs), + .dbg_idel_up_gate (dbg_idel_up_gate), + .dbg_idel_down_gate (dbg_idel_down_gate), + .dbg_sel_idel_dq (dbg_sel_idel_dq), + .dbg_sel_all_idel_dq (dbg_sel_all_idel_dq), + .dbg_sel_idel_dqs (dbg_sel_idel_dqs), + .dbg_sel_all_idel_dqs (dbg_sel_all_idel_dqs), + .dbg_sel_idel_gate (dbg_sel_idel_gate), + .dbg_sel_all_idel_gate (dbg_sel_all_idel_gate), + .dbg_calib_done (dbg_calib_done), + .dbg_calib_err (dbg_calib_err), + .dbg_calib_dq_tap_cnt (dbg_calib_dq_tap_cnt), + .dbg_calib_dqs_tap_cnt (dbg_calib_dqs_tap_cnt), + .dbg_calib_gate_tap_cnt (dbg_calib_gate_tap_cnt), + .dbg_calib_rd_data_sel (dbg_calib_rd_data_sel), + .dbg_calib_rden_dly (dbg_calib_rden_dly), + .dbg_calib_gate_dly (dbg_calib_gate_dly) + ); + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_usr_addr_fifo.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_usr_addr_fifo.v new file mode 100644 index 0000000..18302a7 --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_usr_addr_fifo.v @@ -0,0 +1,135 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_usr_addr_fifo.v +// /___/ /\ Date Last Modified: $Date: 2008/05/08 15:20:47 $ +// \ \ / \ Date Created: Mon Aug 28 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +// This module instantiates the block RAM based FIFO to store the user +// address and the command information. Also calculates potential bank/row +// conflicts by comparing the new address with last address issued. +//Reference: +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_usr_addr_fifo # + ( + // Following parameters are for 72-bit RDIMM design (for ML561 Reference + // board design). Actual values may be different. Actual parameters values + // are passed from design top module ddr2_sdram module. Please refer to + // the ddr2_sdram module for actual values. + parameter BANK_WIDTH = 2, + parameter COL_WIDTH = 10, + parameter CS_BITS = 0, + parameter ROW_WIDTH = 14 + ) + ( + input clk0, + input rst0, + input [2:0] app_af_cmd, + input [30:0] app_af_addr, + input app_af_wren, + input ctrl_af_rden, + output [2:0] af_cmd, + output [30:0] af_addr, + output af_empty, + output app_af_afull + ); + + wire [35:0] fifo_data_out; + reg rst_r; + + + always @(posedge clk0) + rst_r <= rst0; + + + //*************************************************************************** + + assign af_cmd = fifo_data_out[33:31]; + assign af_addr = fifo_data_out[30:0]; + + //*************************************************************************** + + FIFO36 # + ( + .ALMOST_EMPTY_OFFSET (13'h0007), + .ALMOST_FULL_OFFSET (13'h000F), + .DATA_WIDTH (36), + .DO_REG (1), + .EN_SYN ("TRUE"), + .FIRST_WORD_FALL_THROUGH ("FALSE") + ) + u_af + ( + .ALMOSTEMPTY (), + .ALMOSTFULL (app_af_afull), + .DO (fifo_data_out[31:0]), + .DOP (fifo_data_out[35:32]), + .EMPTY (af_empty), + .FULL (), + .RDCOUNT (), + .RDERR (), + .WRCOUNT (), + .WRERR (), + .DI ({app_af_cmd[0],app_af_addr}), + .DIP ({2'b00,app_af_cmd[2:1]}), + .RDCLK (clk0), + .RDEN (ctrl_af_rden), + .RST (rst_r), + .WRCLK (clk0), + .WREN (app_af_wren) + ); + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_usr_rd.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_usr_rd.v new file mode 100644 index 0000000..8f20e0d --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_usr_rd.v @@ -0,0 +1,301 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_usr_rd.v +// /___/ /\ Date Last Modified: $Date: 2008/07/02 14:03:08 $ +// \ \ / \ Date Created: Tue Aug 29 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +// The delay between the read data with respect to the command issued is +// calculted in terms of no. of clocks. This data is then stored into the +// FIFOs and then read back and given as the ouput for comparison. +//Reference: +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_usr_rd # + ( + // Following parameters are for 72-bit RDIMM design (for ML561 Reference + // board design). Actual values may be different. Actual parameters values + // are passed from design top module ddr2_sdram module. Please refer to + // the ddr2_sdram module for actual values. + parameter DQ_PER_DQS = 8, + parameter DQS_WIDTH = 9, + parameter APPDATA_WIDTH = 144, + parameter ECC_WIDTH = 72, + parameter ECC_ENABLE = 0 + ) + ( + input clk0, + input rst0, + input [(DQS_WIDTH*DQ_PER_DQS)-1:0] rd_data_in_rise, + input [(DQS_WIDTH*DQ_PER_DQS)-1:0] rd_data_in_fall, + input [DQS_WIDTH-1:0] ctrl_rden, + input [DQS_WIDTH-1:0] ctrl_rden_sel, + output reg [1:0] rd_ecc_error, + output rd_data_valid, + output reg [(APPDATA_WIDTH/2)-1:0] rd_data_out_rise, + output reg [(APPDATA_WIDTH/2)-1:0] rd_data_out_fall + ); + + // determine number of FIFO72's to use based on data width + localparam RDF_FIFO_NUM = ((APPDATA_WIDTH/2)+63)/64; + + reg [DQS_WIDTH-1:0] ctrl_rden_r; + wire [(DQS_WIDTH*DQ_PER_DQS)-1:0] fall_data; + reg [(DQS_WIDTH*DQ_PER_DQS)-1:0] rd_data_in_fall_r; + reg [(DQS_WIDTH*DQ_PER_DQS)-1:0] rd_data_in_rise_r; + wire rden; + reg [DQS_WIDTH-1:0] rden_sel_r + /* synthesis syn_preserve=1 */; + wire [DQS_WIDTH-1:0] rden_sel_mux; + wire [(DQS_WIDTH*DQ_PER_DQS)-1:0] rise_data; + + // ECC specific signals + wire [((RDF_FIFO_NUM -1) *2)+1:0] db_ecc_error; + reg [(DQS_WIDTH*DQ_PER_DQS)-1:0] fall_data_r; + reg fifo_rden_r0; + reg fifo_rden_r1; + reg fifo_rden_r2; + reg fifo_rden_r3; + reg fifo_rden_r4; + reg fifo_rden_r5; + reg fifo_rden_r6; + wire [(APPDATA_WIDTH/2)-1:0] rd_data_out_fall_temp; + wire [(APPDATA_WIDTH/2)-1:0] rd_data_out_rise_temp; + reg rst_r; + reg [(DQS_WIDTH*DQ_PER_DQS)-1:0] rise_data_r; + wire [((RDF_FIFO_NUM -1) *2)+1:0] sb_ecc_error; + + + //*************************************************************************** + + always @(posedge clk0) begin + rden_sel_r <= ctrl_rden_sel; + ctrl_rden_r <= ctrl_rden; + rd_data_in_rise_r <= rd_data_in_rise; + rd_data_in_fall_r <= rd_data_in_fall; + end + + // Instantiate primitive to allow this flop to be attached to multicycle + // path constraint in UCF. Multicycle path allowed for data from read FIFO. + // This is the same signal as RDEN_SEL_R, but is only used to select data + // (does not affect control signals) + genvar rd_i; + generate + for (rd_i = 0; rd_i < DQS_WIDTH; rd_i = rd_i+1) begin: gen_rden_sel_mux + FDRSE u_ff_rden_sel_mux + ( + .Q (rden_sel_mux[rd_i]), + .C (clk0), + .CE (1'b1), + .D (ctrl_rden_sel[rd_i]), + .R (1'b0), + .S (1'b0) + ) /* synthesis syn_preserve=1 */; + end + endgenerate + + // determine correct read data valid signal timing + assign rden = (rden_sel_r[0]) ? ctrl_rden[0] : ctrl_rden_r[0]; + + // assign data based on the skew + genvar data_i; + generate + for(data_i = 0; data_i < DQS_WIDTH; data_i = data_i+1) begin: gen_data + assign rise_data[(data_i*DQ_PER_DQS)+(DQ_PER_DQS-1): + (data_i*DQ_PER_DQS)] + = (rden_sel_mux[data_i]) ? + rd_data_in_rise[(data_i*DQ_PER_DQS)+(DQ_PER_DQS-1) : + (data_i*DQ_PER_DQS)] : + rd_data_in_rise_r[(data_i*DQ_PER_DQS)+(DQ_PER_DQS-1): + (data_i*DQ_PER_DQS)]; + assign fall_data[(data_i*DQ_PER_DQS)+(DQ_PER_DQS-1): + (data_i*DQ_PER_DQS)] + = (rden_sel_mux[data_i]) ? + rd_data_in_fall[(data_i*DQ_PER_DQS)+(DQ_PER_DQS-1): + (data_i*DQ_PER_DQS)] : + rd_data_in_fall_r[(data_i*DQ_PER_DQS)+(DQ_PER_DQS-1): + (data_i*DQ_PER_DQS)]; + end + endgenerate + + // Generate RST for FIFO reset AND for read/write enable: + // ECC FIFO always being read from and written to + always @(posedge clk0) + rst_r <= rst0; + + genvar rdf_i; + generate + if (ECC_ENABLE) begin + always @(posedge clk0) begin + rd_ecc_error[0] <= (|sb_ecc_error) & fifo_rden_r5; + rd_ecc_error[1] <= (|db_ecc_error) & fifo_rden_r5; + rd_data_out_rise <= rd_data_out_rise_temp; + rd_data_out_fall <= rd_data_out_fall_temp; + rise_data_r <= rise_data; + fall_data_r <= fall_data; + end + + // can use any of the read valids, they're all delayed by same amount + assign rd_data_valid = fifo_rden_r6; + + // delay read valid to take into account max delay difference btw + // the read enable coming from the different DQS groups + always @(posedge clk0) begin + if (rst0) begin + fifo_rden_r0 <= 1'b0; + fifo_rden_r1 <= 1'b0; + fifo_rden_r2 <= 1'b0; + fifo_rden_r3 <= 1'b0; + fifo_rden_r4 <= 1'b0; + fifo_rden_r5 <= 1'b0; + fifo_rden_r6 <= 1'b0; + end else begin + fifo_rden_r0 <= rden; + fifo_rden_r1 <= fifo_rden_r0; + fifo_rden_r2 <= fifo_rden_r1; + fifo_rden_r3 <= fifo_rden_r2; + fifo_rden_r4 <= fifo_rden_r3; + fifo_rden_r5 <= fifo_rden_r4; + fifo_rden_r6 <= fifo_rden_r5; + end + end + + for (rdf_i = 0; rdf_i < RDF_FIFO_NUM; rdf_i = rdf_i + 1) begin: gen_rdf + + FIFO36_72 # // rise fifo + ( + .ALMOST_EMPTY_OFFSET (9'h007), + .ALMOST_FULL_OFFSET (9'h00F), + .DO_REG (1), // extra CC output delay + .EN_ECC_WRITE ("FALSE"), + .EN_ECC_READ ("TRUE"), + .EN_SYN ("FALSE"), + .FIRST_WORD_FALL_THROUGH ("FALSE") + ) + u_rdf + ( + .ALMOSTEMPTY (), + .ALMOSTFULL (), + .DBITERR (db_ecc_error[rdf_i + rdf_i]), + .DO (rd_data_out_rise_temp[(64*(rdf_i+1))-1: + (64 *rdf_i)]), + .DOP (), + .ECCPARITY (), + .EMPTY (), + .FULL (), + .RDCOUNT (), + .RDERR (), + .SBITERR (sb_ecc_error[rdf_i + rdf_i]), + .WRCOUNT (), + .WRERR (), + .DI (rise_data_r[((64*(rdf_i+1)) + (rdf_i*8))-1: + (64 *rdf_i)+(rdf_i*8)]), + .DIP (rise_data_r[(72*(rdf_i+1))-1: + (64*(rdf_i+1))+ (8*rdf_i)]), + .RDCLK (clk0), + .RDEN (~rst_r), + .RST (rst_r), + .WRCLK (clk0), + .WREN (~rst_r) + ); + + FIFO36_72 # // fall_fifo + ( + .ALMOST_EMPTY_OFFSET (9'h007), + .ALMOST_FULL_OFFSET (9'h00F), + .DO_REG (1), // extra CC output delay + .EN_ECC_WRITE ("FALSE"), + .EN_ECC_READ ("TRUE"), + .EN_SYN ("FALSE"), + .FIRST_WORD_FALL_THROUGH ("FALSE") + ) + u_rdf1 + ( + .ALMOSTEMPTY (), + .ALMOSTFULL (), + .DBITERR (db_ecc_error[(rdf_i+1) + rdf_i]), + .DO (rd_data_out_fall_temp[(64*(rdf_i+1))-1: + (64 *rdf_i)]), + .DOP (), + .ECCPARITY (), + .EMPTY (), + .FULL (), + .RDCOUNT (), + .RDERR (), + .SBITERR (sb_ecc_error[(rdf_i+1) + rdf_i]), + .WRCOUNT (), + .WRERR (), + .DI (fall_data_r[((64*(rdf_i+1)) + (rdf_i*8))-1: + (64*rdf_i)+(rdf_i*8)]), + .DIP (fall_data_r[(72*(rdf_i+1))-1: + (64*(rdf_i+1))+ (8*rdf_i)]), + .RDCLK (clk0), + .RDEN (~rst_r), + .RST (rst_r), // or can use rst0 + .WRCLK (clk0), + .WREN (~rst_r) + ); + end + end else begin + assign rd_data_valid = fifo_rden_r0; + always @(posedge clk0) begin + rd_data_out_rise <= rise_data; + rd_data_out_fall <= fall_data; + fifo_rden_r0 <= rden; + end + end + endgenerate + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_usr_top.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_usr_top.v new file mode 100644 index 0000000..1c25ce1 --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_usr_top.v @@ -0,0 +1,185 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_usr_top.v +// /___/ /\ Date Last Modified: $Date: 2008/05/08 15:20:47 $ +// \ \ / \ Date Created: Mon Aug 28 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR2 +//Purpose: +// This module interfaces with the user. The user should provide the data +// and various commands. +//Reference: +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_usr_top # + ( + // Following parameters are for 72-bit RDIMM design (for ML561 Reference + // board design). Actual values may be different. Actual parameters values + // are passed from design top module ddr2_sdram module. Please refer to + // the ddr2_sdram module for actual values. + parameter BANK_WIDTH = 2, + parameter CS_BITS = 0, + parameter COL_WIDTH = 10, + parameter DQ_WIDTH = 72, + parameter DQ_PER_DQS = 8, + parameter APPDATA_WIDTH = 144, + parameter ECC_ENABLE = 0, + parameter DQS_WIDTH = 9, + parameter ROW_WIDTH = 14 + ) + ( + input clk0, + input clk90, + input rst0, + input [DQ_WIDTH-1:0] rd_data_in_rise, + input [DQ_WIDTH-1:0] rd_data_in_fall, + input [DQS_WIDTH-1:0] phy_calib_rden, + input [DQS_WIDTH-1:0] phy_calib_rden_sel, + output rd_data_valid, + output [APPDATA_WIDTH-1:0] rd_data_fifo_out, + input [2:0] app_af_cmd, + input [30:0] app_af_addr, + input app_af_wren, + input ctrl_af_rden, + output [2:0] af_cmd, + output [30:0] af_addr, + output af_empty, + output app_af_afull, + output [1:0] rd_ecc_error, + input app_wdf_wren, + input [APPDATA_WIDTH-1:0] app_wdf_data, + input [(APPDATA_WIDTH/8)-1:0] app_wdf_mask_data, + input wdf_rden, + output app_wdf_afull, + output [(2*DQ_WIDTH)-1:0] wdf_data, + output [((2*DQ_WIDTH)/8)-1:0] wdf_mask_data + ); + + wire [(APPDATA_WIDTH/2)-1:0] i_rd_data_fifo_out_fall; + wire [(APPDATA_WIDTH/2)-1:0] i_rd_data_fifo_out_rise; + + //*************************************************************************** + + assign rd_data_fifo_out = {i_rd_data_fifo_out_fall, + i_rd_data_fifo_out_rise}; + + // read data de-skew and ECC calculation + ddr2_usr_rd # + ( + .DQ_PER_DQS (DQ_PER_DQS), + .ECC_ENABLE (ECC_ENABLE), + .APPDATA_WIDTH (APPDATA_WIDTH), + .DQS_WIDTH (DQS_WIDTH) + ) + u_usr_rd + ( + .clk0 (clk0), + .rst0 (rst0), + .rd_data_in_rise (rd_data_in_rise), + .rd_data_in_fall (rd_data_in_fall), + .rd_ecc_error (rd_ecc_error), + .ctrl_rden (phy_calib_rden), + .ctrl_rden_sel (phy_calib_rden_sel), + .rd_data_valid (rd_data_valid), + .rd_data_out_rise (i_rd_data_fifo_out_rise), + .rd_data_out_fall (i_rd_data_fifo_out_fall) + ); + + // Command/Addres FIFO + ddr2_usr_addr_fifo # + ( + .BANK_WIDTH (BANK_WIDTH), + .COL_WIDTH (COL_WIDTH), + .CS_BITS (CS_BITS), + .ROW_WIDTH (ROW_WIDTH) + ) + u_usr_addr_fifo + ( + .clk0 (clk0), + .rst0 (rst0), + .app_af_cmd (app_af_cmd), + .app_af_addr (app_af_addr), + .app_af_wren (app_af_wren), + .ctrl_af_rden (ctrl_af_rden), + .af_cmd (af_cmd), + .af_addr (af_addr), + .af_empty (af_empty), + .app_af_afull (app_af_afull) + ); + + ddr2_usr_wr # + ( + .BANK_WIDTH (BANK_WIDTH), + .COL_WIDTH (COL_WIDTH), + .CS_BITS (CS_BITS), + .DQ_WIDTH (DQ_WIDTH), + .APPDATA_WIDTH (APPDATA_WIDTH), + .ECC_ENABLE (ECC_ENABLE), + .ROW_WIDTH (ROW_WIDTH) + ) + u_usr_wr + ( + .clk0 (clk0), + .clk90 (clk90), + .rst0 (rst0), + .app_wdf_wren (app_wdf_wren), + .app_wdf_data (app_wdf_data), + .app_wdf_mask_data (app_wdf_mask_data), + .wdf_rden (wdf_rden), + .app_wdf_afull (app_wdf_afull), + .wdf_data (wdf_data), + .wdf_mask_data (wdf_mask_data) + ); + +endmodule \ No newline at end of file diff --git a/src/edu/berkeley/fleet/fpga/ddr2/ddr2_usr_wr.v b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_usr_wr.v new file mode 100644 index 0000000..df2e260 --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/ddr2/ddr2_usr_wr.v @@ -0,0 +1,338 @@ +//***************************************************************************** +// DISCLAIMER OF LIABILITY +// +// This text/file contains proprietary, confidential +// information of Xilinx, Inc., is distributed under license +// from Xilinx, Inc., and may be used, copied and/or +// disclosed only pursuant to the terms of a valid license +// agreement with Xilinx, Inc. Xilinx hereby grants you a +// license to use this text/file solely for design, simulation, +// implementation and creation of design files limited +// to Xilinx devices or technologies. Use with non-Xilinx +// devices or technologies is expressly prohibited and +// immediately terminates your license unless covered by +// a separate agreement. +// +// Xilinx is providing this design, code, or information +// "as-is" solely for use in developing programs and +// solutions for Xilinx devices, with no obligation on the +// part of Xilinx to provide support. By providing this design, +// code, or information as one possible implementation of +// this feature, application or standard, Xilinx is making no +// representation that this implementation is free from any +// claims of infringement. You are responsible for +// obtaining any rights you may require for your implementation. +// Xilinx expressly disclaims any warranty whatsoever with +// respect to the adequacy of the implementation, including +// but not limited to any warranties or representations that this +// implementation is free from claims of infringement, implied +// warranties of merchantability or fitness for a particular +// purpose. +// +// Xilinx products are not intended for use in life support +// appliances, devices, or systems. Use in such applications is +// expressly prohibited. +// +// Any modifications that are made to the Source Code are +// done at the users sole risk and will be unsupported. +// +// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved. +// +// This copyright and support notice must be retained as part +// of this text at all times. +//***************************************************************************** +// ____ ____ +// / /\/ / +// /___/ \ / Vendor: Xilinx +// \ \ \/ Version: 2.3 +// \ \ Application: MIG +// / / Filename: ddr2_usr_wr.v +// /___/ /\ Date Last Modified: $Date: 2008/05/08 15:20:47 $ +// \ \ / \ Date Created: Mon Aug 28 2006 +// \___\/\___\ +// +//Device: Virtex-5 +//Design Name: DDR/DDR2 +//Purpose: +// This module instantiates the modules containing internal FIFOs +//Reference: +//Revision History: +//***************************************************************************** + +`timescale 1ns/1ps + +module ddr2_usr_wr # + ( + // Following parameters are for 72-bit RDIMM design (for ML561 Reference + // board design). Actual values may be different. Actual parameters values + // are passed from design top module ddr2_sdram module. Please refer to + // the ddr2_sdram module for actual values. + parameter BANK_WIDTH = 2, + parameter COL_WIDTH = 10, + parameter CS_BITS = 0, + parameter DQ_WIDTH = 72, + parameter APPDATA_WIDTH = 144, + parameter ECC_ENABLE = 0, + parameter ROW_WIDTH = 14 + ) + ( + input clk0, + input clk90, + input rst0, + // Write data FIFO interface + input app_wdf_wren, + input [APPDATA_WIDTH-1:0] app_wdf_data, + input [(APPDATA_WIDTH/8)-1:0] app_wdf_mask_data, + input wdf_rden, + output app_wdf_afull, + output [(2*DQ_WIDTH)-1:0] wdf_data, + output [((2*DQ_WIDTH)/8)-1:0] wdf_mask_data + ); + + // determine number of FIFO72's to use based on data width + // round up to next integer value when determining WDF_FIFO_NUM + localparam WDF_FIFO_NUM = (ECC_ENABLE) ? (APPDATA_WIDTH+63)/64 : + ((2*DQ_WIDTH)+63)/64; + // MASK_WIDTH = number of bytes in data bus + localparam MASK_WIDTH = DQ_WIDTH/8; + + wire [WDF_FIFO_NUM-1:0] i_wdf_afull; + wire [DQ_WIDTH-1:0] i_wdf_data_fall_in; + wire [DQ_WIDTH-1:0] i_wdf_data_fall_out; + wire [(64*WDF_FIFO_NUM)-1:0] i_wdf_data_in; + wire [(64*WDF_FIFO_NUM)-1:0] i_wdf_data_out; + wire [DQ_WIDTH-1:0] i_wdf_data_rise_in; + wire [DQ_WIDTH-1:0] i_wdf_data_rise_out; + wire [MASK_WIDTH-1:0] i_wdf_mask_data_fall_in; + wire [MASK_WIDTH-1:0] i_wdf_mask_data_fall_out; + wire [(8*WDF_FIFO_NUM)-1:0] i_wdf_mask_data_in; + wire [(8*WDF_FIFO_NUM)-1:0] i_wdf_mask_data_out; + wire [MASK_WIDTH-1:0] i_wdf_mask_data_rise_in; + wire [MASK_WIDTH-1:0] i_wdf_mask_data_rise_out; + reg rst_r; + + // ECC signals + wire [(2*DQ_WIDTH)-1:0] i_wdf_data_out_ecc; + wire [((2*DQ_WIDTH)/8)-1:0] i_wdf_mask_data_out_ecc; + wire [63:0] i_wdf_mask_data_out_ecc_wire; + wire [((2*DQ_WIDTH)/8)-1:0] mask_data_in_ecc; + wire [63:0] mask_data_in_ecc_wire; + + //*************************************************************************** + + assign app_wdf_afull = i_wdf_afull[0]; + + always @(posedge clk0 ) + rst_r <= rst0; + + genvar wdf_di_i; + genvar wdf_do_i; + genvar mask_i; + genvar wdf_i; + generate + if(ECC_ENABLE) begin // ECC code + + assign wdf_data = i_wdf_data_out_ecc; + + // the byte 9 dm is always held to 0 + assign wdf_mask_data = i_wdf_mask_data_out_ecc; + + + + // generate for write data fifo . + for (wdf_i = 0; wdf_i < WDF_FIFO_NUM; wdf_i = wdf_i + 1) begin: gen_wdf + + FIFO36_72 # + ( + .ALMOST_EMPTY_OFFSET (9'h007), + .ALMOST_FULL_OFFSET (9'h00F), + .DO_REG (1), // extra CC output delay + .EN_ECC_WRITE ("TRUE"), + .EN_ECC_READ ("FALSE"), + .EN_SYN ("FALSE"), + .FIRST_WORD_FALL_THROUGH ("FALSE") + ) + u_wdf_ecc + ( + .ALMOSTEMPTY (), + .ALMOSTFULL (i_wdf_afull[wdf_i]), + .DBITERR (), + .DO (i_wdf_data_out_ecc[((64*(wdf_i+1))+(wdf_i *8))-1: + (64*wdf_i)+(wdf_i *8)]), + .DOP (i_wdf_data_out_ecc[(72*(wdf_i+1))-1: + (64*(wdf_i+1))+ (8*wdf_i) ]), + .ECCPARITY (), + .EMPTY (), + .FULL (), + .RDCOUNT (), + .RDERR (), + .SBITERR (), + .WRCOUNT (), + .WRERR (), + .DI (app_wdf_data[(64*(wdf_i+1))-1: + (64*wdf_i)]), + .DIP (), + .RDCLK (clk90), + .RDEN (wdf_rden), + .RST (rst_r), // or can use rst0 + .WRCLK (clk0), + .WREN (app_wdf_wren) + ); + end + + // remapping the mask data. The mask data from user i/f does not have + // the mask for the ECC byte. Assigning 0 to the ECC mask byte. + for (mask_i = 0; mask_i < (DQ_WIDTH)/36; + mask_i = mask_i +1) begin: gen_mask + assign mask_data_in_ecc[((8*(mask_i+1))+ mask_i)-1:((8*mask_i)+mask_i)] + = app_wdf_mask_data[(8*(mask_i+1))-1:8*(mask_i)] ; + assign mask_data_in_ecc[((8*(mask_i+1))+mask_i)] = 1'd0; + end + + // assign ecc bits to temp variables to avoid + // sim warnings. Not all the 64 bits of the fifo + // are used in ECC mode. + assign mask_data_in_ecc_wire[((2*DQ_WIDTH)/8)-1:0] = mask_data_in_ecc; + assign mask_data_in_ecc_wire[63:((2*DQ_WIDTH)/8)] = + {(64-((2*DQ_WIDTH)/8)){1'b0}}; + assign i_wdf_mask_data_out_ecc = + i_wdf_mask_data_out_ecc_wire[((2*DQ_WIDTH)/8)-1:0]; + + + FIFO36_72 # + ( + .ALMOST_EMPTY_OFFSET (9'h007), + .ALMOST_FULL_OFFSET (9'h00F), + .DO_REG (1), // extra CC output delay + .EN_ECC_WRITE ("TRUE"), + .EN_ECC_READ ("FALSE"), + .EN_SYN ("FALSE"), + .FIRST_WORD_FALL_THROUGH ("FALSE") + ) + u_wdf_ecc_mask + ( + .ALMOSTEMPTY (), + .ALMOSTFULL (), + .DBITERR (), + .DO (i_wdf_mask_data_out_ecc_wire), + .DOP (), + .ECCPARITY (), + .EMPTY (), + .FULL (), + .RDCOUNT (), + .RDERR (), + .SBITERR (), + .WRCOUNT (), + .WRERR (), + .DI (mask_data_in_ecc_wire), + .DIP (), + .RDCLK (clk90), + .RDEN (wdf_rden), + .RST (rst_r), // or can use rst0 + .WRCLK (clk0), + .WREN (app_wdf_wren) + ); + end else begin + + //*********************************************************************** + + // Define intermediate buses: + assign i_wdf_data_rise_in + = app_wdf_data[DQ_WIDTH-1:0]; + assign i_wdf_data_fall_in + = app_wdf_data[(2*DQ_WIDTH)-1:DQ_WIDTH]; + assign i_wdf_mask_data_rise_in + = app_wdf_mask_data[MASK_WIDTH-1:0]; + assign i_wdf_mask_data_fall_in + = app_wdf_mask_data[(2*MASK_WIDTH)-1:MASK_WIDTH]; + + //*********************************************************************** + // Write data FIFO Input: + // Arrange DQ's so that the rise data and fall data are interleaved. + // the data arrives at the input of the wdf fifo as {fall,rise}. + // It is remapped as: + // {...fall[15:8],rise[15:8],fall[7:0],rise[7:0]} + // This is done to avoid having separate fifo's for rise and fall data + // and to keep rise/fall data for the same DQ's on same FIFO + // Data masks are interleaved in a similar manner + // NOTE: Initialization data from PHY_INIT module does not need to be + // interleaved - it's already in the correct format - and the same + // initialization pattern from PHY_INIT is sent to all write FIFOs + //*********************************************************************** + + for (wdf_di_i = 0; wdf_di_i < MASK_WIDTH; + wdf_di_i = wdf_di_i + 1) begin: gen_wdf_data_in + assign i_wdf_data_in[(16*wdf_di_i)+15:(16*wdf_di_i)] + = {i_wdf_data_fall_in[(8*wdf_di_i)+7:(8*wdf_di_i)], + i_wdf_data_rise_in[(8*wdf_di_i)+7:(8*wdf_di_i)]}; + assign i_wdf_mask_data_in[(2*wdf_di_i)+1:(2*wdf_di_i)] + = {i_wdf_mask_data_fall_in[wdf_di_i], + i_wdf_mask_data_rise_in[wdf_di_i]}; + end + + //*********************************************************************** + // Write data FIFO Output: + // FIFO DQ and mask outputs must be untangled and put in the standard + // format of {fall,rise}. Same goes for mask output + //*********************************************************************** + + for (wdf_do_i = 0; wdf_do_i < MASK_WIDTH; + wdf_do_i = wdf_do_i + 1) begin: gen_wdf_data_out + assign i_wdf_data_rise_out[(8*wdf_do_i)+7:(8*wdf_do_i)] + = i_wdf_data_out[(16*wdf_do_i)+7:(16*wdf_do_i)]; + assign i_wdf_data_fall_out[(8*wdf_do_i)+7:(8*wdf_do_i)] + = i_wdf_data_out[(16*wdf_do_i)+15:(16*wdf_do_i)+8]; + assign i_wdf_mask_data_rise_out[wdf_do_i] + = i_wdf_mask_data_out[2*wdf_do_i]; + assign i_wdf_mask_data_fall_out[wdf_do_i] + = i_wdf_mask_data_out[(2*wdf_do_i)+1]; + end + + assign wdf_data = {i_wdf_data_fall_out, + i_wdf_data_rise_out}; + + assign wdf_mask_data = {i_wdf_mask_data_fall_out, + i_wdf_mask_data_rise_out}; + + //*********************************************************************** + + for (wdf_i = 0; wdf_i < WDF_FIFO_NUM; wdf_i = wdf_i + 1) begin: gen_wdf + + FIFO36_72 # + ( + .ALMOST_EMPTY_OFFSET (9'h007), + .ALMOST_FULL_OFFSET (9'h00F), + .DO_REG (1), // extra CC output delay + .EN_ECC_WRITE ("FALSE"), + .EN_ECC_READ ("FALSE"), + .EN_SYN ("FALSE"), + .FIRST_WORD_FALL_THROUGH ("FALSE") + ) + u_wdf + ( + .ALMOSTEMPTY (), + .ALMOSTFULL (i_wdf_afull[wdf_i]), + .DBITERR (), + .DO (i_wdf_data_out[(64*(wdf_i+1))-1:64*wdf_i]), + .DOP (i_wdf_mask_data_out[(8*(wdf_i+1))-1:8*wdf_i]), + .ECCPARITY (), + .EMPTY (), + .FULL (), + .RDCOUNT (), + .RDERR (), + .SBITERR (), + .WRCOUNT (), + .WRERR (), + .DI (i_wdf_data_in[(64*(wdf_i+1))-1:64*wdf_i]), + .DIP (i_wdf_mask_data_in[(8*(wdf_i+1))-1:8*wdf_i]), + .RDCLK (clk90), + .RDEN (wdf_rden), + .RST (rst_r), // or can use rst0 + .WRCLK (clk0), + .WREN (app_wdf_wren) + ); + end + end + endgenerate + +endmodule diff --git a/src/edu/berkeley/fleet/fpga/main.ucf b/src/edu/berkeley/fleet/fpga/main.ucf index 24fbf68..799515f 100644 --- a/src/edu/berkeley/fleet/fpga/main.ucf +++ b/src/edu/berkeley/fleet/fpga/main.ucf @@ -1,145 +1,3 @@ -Net DDR2_CAS_B LOC=E31; Net DDR2_CAS_B IOSTANDARD="SSTL18_II"; - -Net DDR2_CKE<1> LOC=U30; Net DDR2_CKE<1> IOSTANDARD="SSTL18_II"; -Net DDR2_CKE<0> LOC=T28; Net DDR2_CKE<0> IOSTANDARD="SSTL18_II"; - -Net DDR2_RAS_B LOC=H30; Net DDR2_RAS_B IOSTANDARD="SSTL18_II"; - -Net DDR2_WE_B LOC=K29; Net DDR2_WE_B IOSTANDARD="SSTL18_II"; - -Net DDR2_ODT<1> LOC=F30; Net DDR2_ODT<1> IOSTANDARD="SSTL18_II"; -Net DDR2_ODT<0> LOC=F31; Net DDR2_ODT<0> IOSTANDARD="SSTL18_II"; - -Net DDR2_CS0_B<1> LOC=J29; Net DDR2_CS0_B<1> IOSTANDARD="SSTL18_II"; -Net DDR2_CS0_B<0> LOC=L29; Net DDR2_CS0_B<0> IOSTANDARD="SSTL18_II"; - -Net DDR2_CLK_N<1> LOC=F28; Net DDR2_CLK_N<1> IOSTANDARD="DIFF_SSTL18_II"; -Net DDR2_CLK_N<0> LOC=AJ29; Net DDR2_CLK_N<0> IOSTANDARD="DIFF_SSTL18_II"; - -Net DDR2_CLK_P<1> LOC=E28; Net DDR2_CLK_P<1> IOSTANDARD="DIFF_SSTL18_II"; -Net DDR2_CLK_P<0> LOC=AK29; Net DDR2_CLK_P<0> IOSTANDARD="DIFF_SSTL18_II"; - -Net DDR2_A<13> LOC=H29; Net DDR2_A<13> IOSTANDARD="SSTL18_II"; -Net DDR2_A<12> LOC=T31; Net DDR2_A<12> IOSTANDARD="SSTL18_II"; -Net DDR2_A<11> LOC=R29; Net DDR2_A<11> IOSTANDARD="SSTL18_II"; -Net DDR2_A<10> LOC=J31; Net DDR2_A<10> IOSTANDARD="SSTL18_II"; -Net DDR2_A<9> LOC=R28; Net DDR2_A<9> IOSTANDARD="SSTL18_II"; -Net DDR2_A<8> LOC=M31; Net DDR2_A<8> IOSTANDARD="SSTL18_II"; -Net DDR2_A<7> LOC=P30; Net DDR2_A<7> IOSTANDARD="SSTL18_II"; -Net DDR2_A<6> LOC=P31; Net DDR2_A<6> IOSTANDARD="SSTL18_II"; -Net DDR2_A<5> LOC=L31; Net DDR2_A<5> IOSTANDARD="SSTL18_II"; -Net DDR2_A<4> LOC=K31; Net DDR2_A<4> IOSTANDARD="SSTL18_II"; -Net DDR2_A<3> LOC=P29; Net DDR2_A<3> IOSTANDARD="SSTL18_II"; -Net DDR2_A<2> LOC=N29; Net DDR2_A<2> IOSTANDARD="SSTL18_II"; -Net DDR2_A<1> LOC=M30; Net DDR2_A<1> IOSTANDARD="SSTL18_II"; -Net DDR2_A<0> LOC=L30; Net DDR2_A<0> IOSTANDARD="SSTL18_II"; - -Net DDR2_BA<2> LOC=R31; Net DDR2_BA<2> IOSTANDARD="SSTL18_II"; -Net DDR2_BA<1> LOC=J30; Net DDR2_BA<1> IOSTANDARD="SSTL18_II"; -Net DDR2_BA<0> LOC=G31; Net DDR2_BA<0> IOSTANDARD="SSTL18_II"; - -Net DDR2_DQ<63> LOC=L24; Net DDR2_DQ<63> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<62> LOC=L25; Net DDR2_DQ<62> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<61> LOC=M25; Net DDR2_DQ<61> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<60> LOC=J27; Net DDR2_DQ<60> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<59> LOC=L26; Net DDR2_DQ<59> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<58> LOC=J24; Net DDR2_DQ<58> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<57> LOC=M26; Net DDR2_DQ<57> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<56> LOC=G25; Net DDR2_DQ<56> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<55> LOC=G26; Net DDR2_DQ<55> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<54> LOC=H24; Net DDR2_DQ<54> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<53> LOC=K28; Net DDR2_DQ<53> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<52> LOC=K27; Net DDR2_DQ<52> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<51> LOC=H25; Net DDR2_DQ<51> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<50> LOC=F25; Net DDR2_DQ<50> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<49> LOC=L28; Net DDR2_DQ<49> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<48> LOC=M28; Net DDR2_DQ<48> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<47> LOC=N28; Net DDR2_DQ<47> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<46> LOC=P27; Net DDR2_DQ<46> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<45> LOC=N25; Net DDR2_DQ<45> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<44> LOC=T24; Net DDR2_DQ<44> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<43> LOC=P26; Net DDR2_DQ<43> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<42> LOC=N24; Net DDR2_DQ<42> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<41> LOC=P25; Net DDR2_DQ<41> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<40> LOC=R24; Net DDR2_DQ<40> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<39> LOC=V24; Net DDR2_DQ<39> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<38> LOC=W26; Net DDR2_DQ<38> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<37> LOC=W25; Net DDR2_DQ<37> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<36> LOC=V28; Net DDR2_DQ<36> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<35> LOC=W24; Net DDR2_DQ<35> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<34> LOC=Y26; Net DDR2_DQ<34> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<33> LOC=Y27; Net DDR2_DQ<33> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<32> LOC=V29; Net DDR2_DQ<32> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<31> LOC=W27; Net DDR2_DQ<31> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<30> LOC=V27; Net DDR2_DQ<30> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<29> LOC=W29; Net DDR2_DQ<29> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<28> LOC=AC30; Net DDR2_DQ<28> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<27> LOC=V30; Net DDR2_DQ<27> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<26> LOC=W31; Net DDR2_DQ<26> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<25> LOC=AB30; Net DDR2_DQ<25> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<24> LOC=AC29; Net DDR2_DQ<24> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<23> LOC=AA25; Net DDR2_DQ<23> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<22> LOC=AB27; Net DDR2_DQ<22> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<21> LOC=AA24; Net DDR2_DQ<21> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<20> LOC=AB26; Net DDR2_DQ<20> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<19> LOC=AA26; Net DDR2_DQ<19> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<18> LOC=AC27; Net DDR2_DQ<18> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<17> LOC=AB25; Net DDR2_DQ<17> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<16> LOC=AC28; Net DDR2_DQ<16> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<15> LOC=AB28; Net DDR2_DQ<15> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<14> LOC=AG28; Net DDR2_DQ<14> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<13> LOC=AJ26; Net DDR2_DQ<13> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<12> LOC=AG25; Net DDR2_DQ<12> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<11> LOC=AA28; Net DDR2_DQ<11> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<10> LOC=AH28; Net DDR2_DQ<10> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<9> LOC=AF28; Net DDR2_DQ<9> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<8> LOC=AH27; Net DDR2_DQ<8> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<7> LOC=AE29; Net DDR2_DQ<7> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<6> LOC=AD29; Net DDR2_DQ<6> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<5> LOC=AF29; Net DDR2_DQ<5> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<4> LOC=AJ30; Net DDR2_DQ<4> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<3> LOC=AD30; Net DDR2_DQ<3> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<2> LOC=AF31; Net DDR2_DQ<2> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<1> LOC=AK31; Net DDR2_DQ<1> IOSTANDARD="SSTL18_II_DCI"; -Net DDR2_DQ<0> LOC=AF30; Net DDR2_DQ<0> IOSTANDARD="SSTL18_II_DCI"; - -Net DDR2_DM<7> LOC=J25; Net DDR2_DM<7> IOSTANDARD="SSTL18_II"; -Net DDR2_DM<6> LOC=F26; Net DDR2_DM<6> IOSTANDARD="SSTL18_II"; -Net DDR2_DM<5> LOC=P24; Net DDR2_DM<5> IOSTANDARD="SSTL18_II"; -Net DDR2_DM<4> LOC=V25; Net DDR2_DM<4> IOSTANDARD="SSTL18_II"; -Net DDR2_DM<3> LOC=Y31; Net DDR2_DM<3> IOSTANDARD="SSTL18_II"; -Net DDR2_DM<2> LOC=Y24; Net DDR2_DM<2> IOSTANDARD="SSTL18_II"; -Net DDR2_DM<1> LOC=AE28; Net DDR2_DM<1> IOSTANDARD="SSTL18_II"; -Net DDR2_DM<0> LOC=AJ31; Net DDR2_DM<0> IOSTANDARD="SSTL18_II"; - -Net DDR2_DQS_N<7> LOC=H27; Net DDR2_DQS_N<7> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_N<6> LOC=G28; Net DDR2_DQS_N<6> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_N<5> LOC=E27; Net DDR2_DQS_N<5> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_N<4> LOC=Y29; Net DDR2_DQS_N<4> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_N<3> LOC=AA31; Net DDR2_DQS_N<3> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_N<2> LOC=AJ27; Net DDR2_DQS_N<2> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_N<1> LOC=AK27; Net DDR2_DQS_N<1> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_N<0> LOC=AA30; Net DDR2_DQS_N<0> IOSTANDARD="DIFF_SSTL18_II_DCI"; - -Net DDR2_DQS_P<7> LOC=G27; Net DDR2_DQS_P<7> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_P<6> LOC=H28; Net DDR2_DQS_P<6> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_P<5> LOC=E26; Net DDR2_DQS_P<5> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_P<4> LOC=Y28; Net DDR2_DQS_P<4> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_P<3> LOC=AB31; Net DDR2_DQS_P<3> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_P<2> LOC=AK26; Net DDR2_DQS_P<2> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_P<1> LOC=AK28; Net DDR2_DQS_P<1> IOSTANDARD="DIFF_SSTL18_II_DCI"; -Net DDR2_DQS_P<0> LOC=AA29; Net DDR2_DQS_P<0> IOSTANDARD="DIFF_SSTL18_II_DCI"; - -Net I2C_DDR2_SCL LOC=E29; - -Net I2C_DDR2_SDA LOC=F29; - -Net CLKBUF_Q1_N LOC=J19; -#Net CLKBUF_Q1_N PERIOD="200 Mhz"; -Net CLKBUF_Q1_P LOC=K18; -#Net CLKBUF_Q1_P PERIOD="200 Mhz"; - - ## Clock, Reset ############################################################################## Net clk_pin LOC=AH17; @@ -175,6 +33,46 @@ Net uart_out LOC=AG20; Net uart_out TIG; Net uart_out PULLUP; +## DVI ############################################################################## + +NET dvi_d0 LOC="AB8"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_d1 LOC="AC8"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_d2 LOC="AN12"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_d3 LOC="AP12"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_d4 LOC="AA9"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_d5 LOC="AA8"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_d6 LOC="AM13"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_d7 LOC="AN13"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_d8 LOC="AA10"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_d9 LOC="AB10"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_d10 LOC="AP14"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_d11 LOC="AN14"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_de LOC="AE8"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_h LOC="AM12"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_reset_b LOC="AK6"; # Bank 18, Vcco=3.3V, No DCI +NET dvi_v LOC="AM11"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_xclk_n LOC="AL10"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors +NET dvi_xclk_p LOC="AL11"; # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors + +#NET dvi_gpio1 LOC="N30" | IOSTANDARD="LVCMOS18"; # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors +#NET dvi_iic_scl LOC="U27" | IOSTANDARD="LVCMOS18"; # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors +#NET dvi_iic_sda LOC="T29" | IOSTANDARD="LVCMOS18"; # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors + +NET gpio_led_c LOC="AJ6"; # Bank 18, Vcco=3.3V, No DCI +NET gpio_led_e LOC="AK7"; # Bank 18, Vcco=3.3V, No DCI +NET gpio_led_n LOC="U8"; # Bank 18, Vcco=3.3V, No DCI +NET gpio_led_s LOC="V8"; # Bank 18, Vcco=3.3V, No DCI +NET gpio_led_w LOC="AJ7"; # Bank 18, Vcco=3.3V, No DCI +NET gpio_led_0 LOC="H18"; # Bank 3, Vcco=2.5V, No DCI +NET gpio_led_1 LOC="L18"; # Bank 3, Vcco=2.5V, No DCI +NET gpio_led_2 LOC="G15"; # Bank 3, Vcco=2.5V, No DCI +NET gpio_led_3 LOC="AD26" | IOSTANDARD="LVCMOS18"; # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors +NET gpio_led_4 LOC="G16"; # Bank 3, Vcco=2.5V, No DCI +NET gpio_led_5 LOC="AD25" | IOSTANDARD="LVCMOS18"; # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors +NET gpio_led_6 LOC="AD24" | IOSTANDARD="LVCMOS18"; # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors +NET gpio_led_7 LOC="AE24" | IOSTANDARD="LVCMOS18"; # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors + + ## VGA ############################################################################## #net "vga_hsync" loc = f9; @@ -225,315 +123,393 @@ Net uart_out PULLUP; #net "vga_*" iostandard = lvcmos33; # -### DRAM ############################################################################## - -NET "clk_pin" TNM="SYS_CLK"; -#NET "*/*/clkgen/write_clk_u" TNM="WRITE_CLK"; -#NET "*/*/clkgen/write_clk90_u" TNM="WRITE_CLK"; -#NET "*/*/clkgen/read_clk_u" TNM="READ_CLK"; -#TIMESPEC "TS_SYS_DDRREAD"=FROM "SYS_CLK" TO "WRITE_CLK" TIG; -#TIMESPEC "TS_DDRREAD_SYS"=FROM "WRITE_CLK" TO "SYS_CLK" TIG; -#TIMESPEC "TS_SYS_DDRWRITE"=FROM "SYS_CLK" TO "READ_CLK" TIG; -#TIMESPEC "TS_DDRWRITE_SYS"=FROM "READ_CLK" TO "SYS_CLK" TIG; -#TIMESPEC "TS_DDRREAD_DDRWRITE"=FROM "READ_CLK" TO "WRITE_CLK" TIG; -#TIMESPEC "TS_DDRWRITE_DDRREAD"=FROM "WRITE_CLK" TO "READ_CLK" TIG; - - -##============================================================================== -## File: $URL: svn+ssh://repositorypub@repository.eecs.berkeley.edu/public/Projects/GateLib/branches/dev/Firmware/DRAM/Hardware/DDR2SDRAM/Constraints/DDR2SDRAM_ML505_110.ucf $ -## Version: $Revision: 16601 $ -## Author: Greg Gibeling (http://www.eecs.berkeley.edu/~gdgib/) -## Copyright: Copyright 2005-2008 UC Berkeley -##============================================================================== - -##============================================================================== -## Section: License -##============================================================================== -## Copyright (c) 2005-2008, Regents of the University of California -## All rights reserved. +############################################################################## +############################################################################## +############################################################################## +# DDR2 + +############################################################################ +## +## Xilinx, Inc. 2006 www.xilinx.com +## ÐÇÆÚÒ» ¾ÅÔ 22 11:53:57 2008 +## Generated by MIG Version 2.3 +## +############################################################################ +## File name : ddr2_sdram.ucf +## +## Details : Constraints file +## FPGA family: virtex5 +## FPGA: xc5vlx110t-ff1136 +## Speedgrade: -1 +## Design Entry: VERILOG +## Frequency: 266.66 MHz +## Design: with Test bench +## DCM Used: Enable +## Two Bytes per Bank:Disable +## No.Of Controllers: 1 ## -## Redistribution and use in source and binary forms, with or without modification, -## are permitted provided that the following conditions are met: -## -## - Redistributions of source code must retain the above copyright notice, -## this list of conditions and the following disclaimer. -## - Redistributions in binary form must reproduce the above copyright -## notice, this list of conditions and the following disclaimer -## in the documentation and/or other materials provided with the -## distribution. -## - Neither the name of the University of California, Berkeley nor the -## names of its contributors may be used to endorse or promote -## products derived from this software without specific prior -## written permission. -## -## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -## ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -## ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -## (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -## ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -## (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -## SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -##============================================================================== - -##============================================================================== -## Xilinx, Inc. 2006 www.xilinx.com -## Mon Jul 7 12:05:40 2008 -## Generated by MIG Version 2.2 -##============================================================================== -## File name: ddr2_sdram.ucf -## Details: Constraints file -## FPGA family: virtex5 -## FPGA: xc5vlx110t-ff1136 -## Speedgrade: -1 -## Design Entry: VERILOG -## Frequency: 266.66 MHz -## Design: without Test bench -## DCM Used: Disable -## Two Bytes per Bank: Disable -## Compatible FPGA's: xc5vlx85t-ff1136,xc5vlx50t-ff1136,xc5vlx155t-ff1136,xc5vfx70t-ff1136,xc5vfx100t-ff1136,xc5vsx50t-ff1136,xc5vsx95t-ff1136 -## No.Of Controllers: 1 -##============================================================================== - -##------------------------------------------------------------------------------ -## Section: Pin Constraints (use FPGA_TOP instead) -##------------------------------------------------------------------------------ -#NET "ddr2_dq[*]" IOSTANDARD = SSTL18_II_DCI; -#NET "ddr2_a[*]" IOSTANDARD = SSTL18_II; -#NET "ddr2_ba[*]" IOSTANDARD = SSTL18_II; -#NET "ddr2_ras_n" IOSTANDARD = SSTL18_II; -#NET "ddr2_cas_n" IOSTANDARD = SSTL18_II; -#NET "ddr2_we_n" IOSTANDARD = SSTL18_II; -#NET "ddr2_cs_n[*]" IOSTANDARD = SSTL18_II; -#NET "ddr2_odt[*]" IOSTANDARD = SSTL18_II; -#NET "ddr2_cke[*]" IOSTANDARD = SSTL18_II; -#NET "ddr2_dm[*]" IOSTANDARD = SSTL18_II; -#NET "ddr2_dqs[*]" IOSTANDARD = DIFF_SSTL18_II_DCI; -#NET "ddr2_dqs_n[*]" IOSTANDARD = DIFF_SSTL18_II_DCI; -#NET "ddr2_ck[*]" IOSTANDARD = DIFF_SSTL18_II; -#NET "ddr2_ck_n[*]" IOSTANDARD = DIFF_SSTL18_II; -##------------------------------------------------------------------------------ - -##------------------------------------------------------------------------------ -## Section: Clock Constraints -##------------------------------------------------------------------------------ -NET "**/Clock" TNM_NET = "SYS_Clock"; -TIMESPEC "TS_SYS_Clock" = PERIOD "SYS_Clock" 5 ns HIGH 50 %; - -NET "**/ClockP90" TNM_NET = "SYS_ClockP90"; -TIMESPEC "TS_SYS_ClockP90" = PERIOD "SYS_ClockP90" 5 ns HIGH 50 %; - -NET "**/ClockD2" TNM_NET = "SYS_ClockD2"; -TIMESPEC "TS_SYS_ClockD2" = PERIOD "SYS_ClockD2" 7.5 ns HIGH 50 %; - -#NET "*/ClockF200" TNM_NET = "SYS_ClockF200"; -#TIMESPEC "TS_SYS_ClockF200" = PERIOD "SYS_ClockF200" 5 ns HIGH 50 %; -##------------------------------------------------------------------------------ - -##============================================================================== -## Section: Controller 0 -## Desc: Memory Device: DDR2_SDRAM->SODIMMs->MT4HTF3264HY-53E -## Data Width: 64 -## Data Mask: 1 -##============================================================================== - -##------------------------------------------------------------------------------ -## Section: IDELAYCTRL Location Constraints -##------------------------------------------------------------------------------ -INST "**/*IDELAYCTRL_INST[0].u_idelayctrl" LOC=IDELAYCTRL_X0Y2; -INST "**/*IDELAYCTRL_INST[1].u_idelayctrl" LOC=IDELAYCTRL_X0Y1; -INST "**/*IDELAYCTRL_INST[2].u_idelayctrl" LOC=IDELAYCTRL_X0Y6; -##------------------------------------------------------------------------------ - -##------------------------------------------------------------------------------ -## Section: Multicycle Paths -## Desc: Define multicycle paths - these paths may take longer because -## additional time allowed for logic to settle in -## calibration/initialization FSM -##------------------------------------------------------------------------------ +############################################################################ + +############################################################################ +# Clock constraints # +############################################################################ + +NET "*/u_ddr2_infrastructure/sys_clk_ibufg" TNM_NET = "SYS_CLK"; +TIMESPEC "TS_SYS_CLK" = PERIOD "SYS_CLK" 5 ns HIGH 50 %; + +NET "*/u_ddr2_infrastructure/clk200_ibufg" TNM_NET = "SYS_CLK_200"; +TIMESPEC "TS_SYS_CLK_200" = PERIOD "SYS_CLK_200" 5 ns HIGH 50 %; + +############################################################################ +######################################################################## +# Controller 0 +# Memory Device: DDR2_SDRAM->SODIMMs->MT4HTF3264HY-53E # +# Data Width: 64 # +# Data Mask: 1 # +######################################################################## + +################################################################################ +# I/O STANDARDS +################################################################################ + +NET "ddr2_dq[*]" IOSTANDARD = SSTL18_II_DCI; +NET "ddr2_a[*]" IOSTANDARD = SSTL18_II; +NET "ddr2_ba[*]" IOSTANDARD = SSTL18_II; +NET "ddr2_ras_n" IOSTANDARD = SSTL18_II; +NET "ddr2_cas_n" IOSTANDARD = SSTL18_II; +NET "ddr2_we_n" IOSTANDARD = SSTL18_II; +NET "ddr2_cs_n[*]" IOSTANDARD = SSTL18_II; +NET "ddr2_odt[*]" IOSTANDARD = SSTL18_II; +NET "ddr2_cke[*]" IOSTANDARD = SSTL18_II; +NET "ddr2_dm[*]" IOSTANDARD = SSTL18_II; +#NET "sys_clk_p" IOSTANDARD = LVDS_25 | DIFF_TERM = TRUE; +#NET "sys_clk_n" IOSTANDARD = LVDS_25 | DIFF_TERM = TRUE; +#NET "clk200_p" IOSTANDARD = LVDS_25 | DIFF_TERM = TRUE; +#NET "clk200_n" IOSTANDARD = LVDS_25 | DIFF_TERM = TRUE; +#NET "sys_rst_n" IOSTANDARD = LVCMOS18; +#NET "phy_init_done" IOSTANDARD = LVCMOS18; +#NET "error" IOSTANDARD = LVCMOS18; +NET "ddr2_dqs[*]" IOSTANDARD = DIFF_SSTL18_II_DCI; +NET "ddr2_dqs_n[*]" IOSTANDARD = DIFF_SSTL18_II_DCI; +NET "ddr2_ck[*]" IOSTANDARD = DIFF_SSTL18_II; +NET "ddr2_ck_n[*]" IOSTANDARD = DIFF_SSTL18_II; + +################################################################################ +# Location Constraints +################################################################################ + +NET "ddr2_dq[0]" LOC = "AF30" ; #Bank 17 +NET "ddr2_dq[1]" LOC = "AK31" ; #Bank 17 +NET "ddr2_dq[2]" LOC = "AF31" ; #Bank 17 +NET "ddr2_dq[3]" LOC = "AD30" ; #Bank 17 +NET "ddr2_dq[4]" LOC = "AJ30" ; #Bank 17 +NET "ddr2_dq[5]" LOC = "AF29" ; #Bank 17 +NET "ddr2_dq[6]" LOC = "AD29" ; #Bank 17 +NET "ddr2_dq[7]" LOC = "AE29" ; #Bank 17 +NET "ddr2_dq[8]" LOC = "AH27" ; #Bank 21 +NET "ddr2_dq[9]" LOC = "AF28" ; #Bank 21 +NET "ddr2_dq[10]" LOC = "AH28" ; #Bank 21 +NET "ddr2_dq[11]" LOC = "AA28" ; #Bank 21 +NET "ddr2_dq[12]" LOC = "AG25" ; #Bank 21 +NET "ddr2_dq[13]" LOC = "AJ26" ; #Bank 21 +NET "ddr2_dq[14]" LOC = "AG28" ; #Bank 21 +NET "ddr2_dq[15]" LOC = "AB28" ; #Bank 21 +NET "ddr2_dq[16]" LOC = "AC28" ; #Bank 21 +NET "ddr2_dq[17]" LOC = "AB25" ; #Bank 21 +NET "ddr2_dq[18]" LOC = "AC27" ; #Bank 21 +NET "ddr2_dq[19]" LOC = "AA26" ; #Bank 21 +NET "ddr2_dq[20]" LOC = "AB26" ; #Bank 21 +NET "ddr2_dq[21]" LOC = "AA24" ; #Bank 21 +NET "ddr2_dq[22]" LOC = "AB27" ; #Bank 21 +NET "ddr2_dq[23]" LOC = "AA25" ; #Bank 21 +NET "ddr2_dq[24]" LOC = "AC29" ; #Bank 17 +NET "ddr2_dq[25]" LOC = "AB30" ; #Bank 17 +NET "ddr2_dq[26]" LOC = "W31" ; #Bank 17 +NET "ddr2_dq[27]" LOC = "V30" ; #Bank 17 +NET "ddr2_dq[28]" LOC = "AC30" ; #Bank 17 +NET "ddr2_dq[29]" LOC = "W29" ; #Bank 17 +NET "ddr2_dq[30]" LOC = "V27" ; #Bank 17 +NET "ddr2_dq[31]" LOC = "W27" ; #Bank 17 +NET "ddr2_dq[32]" LOC = "V29" ; #Bank 17 +NET "ddr2_dq[33]" LOC = "Y27" ; #Bank 17 +NET "ddr2_dq[34]" LOC = "Y26" ; #Bank 17 +NET "ddr2_dq[35]" LOC = "W24" ; #Bank 17 +NET "ddr2_dq[36]" LOC = "V28" ; #Bank 17 +NET "ddr2_dq[37]" LOC = "W25" ; #Bank 17 +NET "ddr2_dq[38]" LOC = "W26" ; #Bank 17 +NET "ddr2_dq[39]" LOC = "V24" ; #Bank 17 +NET "ddr2_dq[40]" LOC = "R24" ; #Bank 19 +NET "ddr2_dq[41]" LOC = "P25" ; #Bank 19 +NET "ddr2_dq[42]" LOC = "N24" ; #Bank 19 +NET "ddr2_dq[43]" LOC = "P26" ; #Bank 19 +NET "ddr2_dq[44]" LOC = "T24" ; #Bank 19 +NET "ddr2_dq[45]" LOC = "N25" ; #Bank 19 +NET "ddr2_dq[46]" LOC = "P27" ; #Bank 19 +NET "ddr2_dq[47]" LOC = "N28" ; #Bank 19 +NET "ddr2_dq[48]" LOC = "M28" ; #Bank 19 +NET "ddr2_dq[49]" LOC = "L28" ; #Bank 19 +NET "ddr2_dq[50]" LOC = "F25" ; #Bank 19 +NET "ddr2_dq[51]" LOC = "H25" ; #Bank 19 +NET "ddr2_dq[52]" LOC = "K27" ; #Bank 19 +NET "ddr2_dq[53]" LOC = "K28" ; #Bank 19 +NET "ddr2_dq[54]" LOC = "H24" ; #Bank 19 +NET "ddr2_dq[55]" LOC = "G26" ; #Bank 19 +NET "ddr2_dq[56]" LOC = "G25" ; #Bank 19 +NET "ddr2_dq[57]" LOC = "M26" ; #Bank 19 +NET "ddr2_dq[58]" LOC = "J24" ; #Bank 19 +NET "ddr2_dq[59]" LOC = "L26" ; #Bank 19 +NET "ddr2_dq[60]" LOC = "J27" ; #Bank 19 +NET "ddr2_dq[61]" LOC = "M25" ; #Bank 19 +NET "ddr2_dq[62]" LOC = "L25" ; #Bank 19 +NET "ddr2_dq[63]" LOC = "L24" ; #Bank 19 +NET "ddr2_a[12]" LOC = "T31" ; #Bank 15 +NET "ddr2_a[11]" LOC = "R29" ; #Bank 15 +NET "ddr2_a[10]" LOC = "J31" ; #Bank 15 +NET "ddr2_a[9]" LOC = "R28" ; #Bank 15 +NET "ddr2_a[8]" LOC = "M31" ; #Bank 15 +NET "ddr2_a[7]" LOC = "P30" ; #Bank 15 +NET "ddr2_a[6]" LOC = "P31" ; #Bank 15 +NET "ddr2_a[5]" LOC = "L31" ; #Bank 15 +NET "ddr2_a[4]" LOC = "K31" ; #Bank 15 +NET "ddr2_a[3]" LOC = "P29" ; #Bank 15 +NET "ddr2_a[2]" LOC = "N29" ; #Bank 15 +NET "ddr2_a[1]" LOC = "M30" ; #Bank 15 +NET "ddr2_a[0]" LOC = "L30" ; #Bank 15 +NET "ddr2_ba[1]" LOC = "J30" ; #Bank 15 +NET "ddr2_ba[0]" LOC = "G31" ; #Bank 15 +NET "ddr2_ras_n" LOC = "H30" ; #Bank 15 +NET "ddr2_cas_n" LOC = "E31" ; #Bank 15 +NET "ddr2_we_n" LOC = "K29" ; #Bank 15 +NET "ddr2_cs_n[0]" LOC = "L29" ; #Bank 15 +NET "ddr2_odt[0]" LOC = "F31" ; #Bank 15 +NET "ddr2_cke[0]" LOC = "T28" ; #Bank 15 +NET "ddr2_dm[0]" LOC = "AJ31" ; #Bank 17 +NET "ddr2_dm[1]" LOC = "AE28" ; #Bank 21 +NET "ddr2_dm[2]" LOC = "Y24" ; #Bank 21 +NET "ddr2_dm[3]" LOC = "Y31" ; #Bank 17 +NET "ddr2_dm[4]" LOC = "V25" ; #Bank 17 +NET "ddr2_dm[5]" LOC = "P24" ; #Bank 19 +NET "ddr2_dm[6]" LOC = "F26" ; #Bank 19 +NET "ddr2_dm[7]" LOC = "J25" ; #Bank 19 +NET "sys_clk_p" LOC = "H14" ; #Bank 3 +NET "sys_clk_n" LOC = "H15" ; #Bank 3 +NET "clk200_p" LOC = "L19" ; #Bank 3 +NET "clk200_n" LOC = "K19" ; #Bank 3 +NET "sys_rst_n" LOC = "E9"; #Bank 20 +#NET "phy_init_done" LOC = "H18" ; #Bank 3 +NET "error" LOC = "F6"; #Bank 12 +NET "ddr2_dqs[0]" LOC = "AA29" ; #Bank 17 +NET "ddr2_dqs_n[0]" LOC = "AA30" ; #Bank 17 +NET "ddr2_dqs[1]" LOC = "AK28" ; #Bank 21 +NET "ddr2_dqs_n[1]" LOC = "AK27" ; #Bank 21 +NET "ddr2_dqs[2]" LOC = "AK26" ; #Bank 21 +NET "ddr2_dqs_n[2]" LOC = "AJ27" ; #Bank 21 +NET "ddr2_dqs[3]" LOC = "AB31" ; #Bank 17 +NET "ddr2_dqs_n[3]" LOC = "AA31" ; #Bank 17 +NET "ddr2_dqs[4]" LOC = "Y28" ; #Bank 17 +NET "ddr2_dqs_n[4]" LOC = "Y29" ; #Bank 17 +NET "ddr2_dqs[5]" LOC = "E26" ; #Bank 19 +NET "ddr2_dqs_n[5]" LOC = "E27" ; #Bank 19 +NET "ddr2_dqs[6]" LOC = "H28" ; #Bank 19 +NET "ddr2_dqs_n[6]" LOC = "G28" ; #Bank 19 +NET "ddr2_dqs[7]" LOC = "G27" ; #Bank 19 +NET "ddr2_dqs_n[7]" LOC = "H27" ; #Bank 19 +NET "ddr2_ck[0]" LOC = "AK29" ; #Bank 21 +NET "ddr2_ck_n[0]" LOC = "AJ29" ; #Bank 21 +NET "ddr2_ck[1]" LOC = "E28" ; #Bank 19 +NET "ddr2_ck_n[1]" LOC = "F28" ; #Bank 19 + +################################################################################ +#IDELAYCTRL Location Constraints +################################################################################ + +INST "*/IDELAYCTRL_INST[0].u_idelayctrl" LOC=IDELAYCTRL_X0Y1; +INST "*/IDELAYCTRL_INST[1].u_idelayctrl" LOC=IDELAYCTRL_X0Y2; +INST "*/IDELAYCTRL_INST[2].u_idelayctrl" LOC=IDELAYCTRL_X0Y6; + +############################################################################### +# Define multicycle paths - these paths may take longer because additional +# time allowed for logic to settle in calibration/initialization FSM +############################################################################### + # MIG 2.1: Eliminate Timegroup definitions for CLK0, and CLK90. Instead trace # multicycle paths from originating flip-flop to ANY destination # flip-flop (or in some cases, it can also be a BRAM) # MUX Select for either rising/falling CLK0 for 2nd stage read capture -INST "**/u_phy_calib/*gen_rd_data_sel*.u_ff_rd_data_sel" TNM = "TNM_RD_DATA_SEL"; -TIMESPEC "TS_MC_RD_DATA_SEL" = FROM "TNM_RD_DATA_SEL" TO FFS "TS_SYS_Clock" * 4; - +INST "*/u_phy_calib/gen_rd_data_sel*.u_ff_rd_data_sel" TNM = "TNM_RD_DATA_SEL"; +TIMESPEC "TS_MC_RD_DATA_SEL" = FROM "TNM_RD_DATA_SEL" TO FFS +"TS_SYS_CLK" * 4; # MUX select for read data - optional delay on data to account for byte skews -#INST "*/u_usr_rd/*gen_rden_sel_mux*.u_ff_rden_sel_mux" TNM = "TNM_RDEN_SEL_MUX"; -#TIMESPEC "TS_MC_RDEN_SEL_MUX" = FROM "TNM_RDEN_SEL_MUX" TO FFS "TS_SYS_Clock" * 4; - +#INST "*/u_usr_rd/gen_rden_sel_mux*.u_ff_rden_sel_mux" TNM = "TNM_RDEN_SEL_MUX"; +#TIMESPEC "TS_MC_RDEN_SEL_MUX" = FROM "TNM_RDEN_SEL_MUX" TO FFS +#"TS_SYS_CLK" * 4; # Calibration/Initialization complete status flag (for PHY logic only) - can # be used to drive both flip-flops and BRAMs INST "*/u_phy_init/u_ff_phy_init_data_sel" TNM = "TNM_PHY_INIT_DATA_SEL"; -TIMESPEC "TS_MC_PHY_INIT_DATA_SEL_0" = FROM "TNM_PHY_INIT_DATA_SEL" TO FFS "TS_SYS_Clock" * 4; -TIMESPEC "TS_MC_PHY_INIT_DATA_SEL_90" = FROM "TNM_PHY_INIT_DATA_SEL" TO RAMS "TS_SYS_Clock" * 4; - +TIMESPEC "TS_MC_PHY_INIT_DATA_SEL_0" = FROM "TNM_PHY_INIT_DATA_SEL" TO FFS +"TS_SYS_CLK" * 4; +TIMESPEC "TS_MC_PHY_INIT_DATA_SEL_90" = FROM "TNM_PHY_INIT_DATA_SEL" TO RAMS +"TS_SYS_CLK" * 4; # Select (address) bits for SRL32 shift registers used in stage3/stage4 # calibration -INST "**/u_phy_calib/*gen_gate_dly*.u_ff_gate_dly" TNM = "TNM_GATE_DLY"; -TIMESPEC "TS_MC_GATE_DLY" = FROM "TNM_GATE_DLY" TO FFS "TS_SYS_Clock" * 4; -INST "**/u_phy_calib/*gen_rden_dly*.u_ff_rden_dly" TNM = "TNM_RDEN_DLY"; -TIMESPEC "TS_MC_RDEN_DLY" = FROM "TNM_RDEN_DLY" TO FFS "TS_SYS_Clock" * 4; -INST "**/u_phy_calib/*gen_cal_rden_dly*.u_ff_cal_rden_dly" TNM = "TNM_CAL_RDEN_DLY"; -TIMESPEC "TS_MC_CAL_RDEN_DLY" = FROM "TNM_CAL_RDEN_DLY" TO FFS "TS_SYS_Clock" * 4; -##------------------------------------------------------------------------------ - -##------------------------------------------------------------------------------ -## Section: DQS Squelch Constraints -## Desc: DQS Read Post amble Glitch Squelch circuit related constraints -## LOC placement of DQS-squelch related IDDR and IDELAY elements -## Each circuit can be located at any of the following locations: -## 1. Unused "N"-side of DQS differential pair I/O -## 2. DM data mask (output only, input side is free for use) -## 3. Any output-only site -##------------------------------------------------------------------------------ -INST "**/*gen_dqs[0].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y96"; -INST "**/*gen_dqs[0].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y96"; -INST "**/*gen_dqs[1].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y58"; -INST "**/*gen_dqs[1].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y58"; -INST "**/*gen_dqs[2].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y62"; -INST "**/*gen_dqs[2].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y62"; -INST "**/*gen_dqs[3].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y100"; -INST "**/*gen_dqs[3].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y100"; -INST "**/*gen_dqs[4].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y102"; -INST "**/*gen_dqs[4].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y102"; -INST "**/*gen_dqs[5].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y256"; -INST "**/*gen_dqs[5].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y256"; -INST "**/*gen_dqs[6].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y260"; -INST "**/*gen_dqs[6].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y260"; -INST "**/*gen_dqs[7].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y262"; -INST "**/*gen_dqs[7].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y262"; -##------------------------------------------------------------------------------ - -##------------------------------------------------------------------------------ -## Section: DQS CE LOCs -## Desc: LOC and timing constraints for flop driving DQS CE enable signal -## from fabric logic. Even though the absolute delay on this path -## is calibrated out (when synchronizing this output to DQS), the -## delay should still be kept as low as possible to reduce -## post-calibration voltage/temp variations - these are roughly -## proportional to the absolute delay of the path -##------------------------------------------------------------------------------ -INST "**/u_phy_calib/*gen_gate[0].u_en_dqs_ff" LOC = SLICE_X0Y48; -INST "**/u_phy_calib/*gen_gate[1].u_en_dqs_ff" LOC = SLICE_X0Y29; -INST "**/u_phy_calib/*gen_gate[2].u_en_dqs_ff" LOC = SLICE_X0Y31; -INST "**/u_phy_calib/*gen_gate[3].u_en_dqs_ff" LOC = SLICE_X0Y50; -INST "**/u_phy_calib/*gen_gate[4].u_en_dqs_ff" LOC = SLICE_X0Y51; -INST "**/u_phy_calib/*gen_gate[5].u_en_dqs_ff" LOC = SLICE_X0Y128; -INST "**/u_phy_calib/*gen_gate[6].u_en_dqs_ff" LOC = SLICE_X0Y130; -INST "**/u_phy_calib/*gen_gate[7].u_en_dqs_ff" LOC = SLICE_X0Y131; -##------------------------------------------------------------------------------ - -##------------------------------------------------------------------------------ -## Section: DQS Gate Control -## Desc: Control for DQS gate - from fabric flop. Prevent "runaway" -## delay - two parts to this path: (1) from fabric flop to IDELAY, -## (2) from IDELAY to asynchronous reset of IDDR that drives the DQ -## CE's. This can be relaxed by the user for lower frequencies: -## 300MHz = 850ps, 267MHz = 900ps. At 200MHz = 950ps. -## In general PAR should be able to route this within 900ps over -## all speed grades. -##------------------------------------------------------------------------------ -NET "**/u_phy_io/en_dqs*" MAXDELAY = 600 ps; -NET "**/u_phy_io/*gen_dqs*.u_iob_dqs/en_dqs_sync" MAXDELAY = 850 ps; -##------------------------------------------------------------------------------ - -##------------------------------------------------------------------------------ -## Section: IDDR Half Cycles -## Desc: "Half-cycle" path constraint from IDDR to CE pin for all DQ -## IDDR's for DQS Read Post amble Glitch Squelch circuit. -##------------------------------------------------------------------------------ +INST "*/u_phy_calib/gen_gate_dly*.u_ff_gate_dly" TNM = "TNM_GATE_DLY"; +TIMESPEC "TS_MC_GATE_DLY" = FROM "TNM_GATE_DLY" TO FFS "TS_SYS_CLK" * 4; +INST "*/u_phy_calib/gen_rden_dly*.u_ff_rden_dly" TNM = "TNM_RDEN_DLY"; +TIMESPEC "TS_MC_RDEN_DLY" = FROM "TNM_RDEN_DLY" TO FFS "TS_SYS_CLK" * 4; +INST "*/u_phy_calib/gen_cal_rden_dly*.u_ff_cal_rden_dly" + TNM = "TNM_CAL_RDEN_DLY"; +TIMESPEC "TS_MC_CAL_RDEN_DLY" = FROM "TNM_CAL_RDEN_DLY" TO FFS + "TS_SYS_CLK" * 4; + +############################################################################### +# DQS Read Post amble Glitch Squelch circuit related constraints +############################################################################### + +############################################################################### +# LOC placement of DQS-squelch related IDDR and IDELAY elements +# Each circuit can be located at any of the following locations: +# 1. Unused "N"-side of DQS differential pair I/O +# 2. DM data mask (output only, input side is free for use) +# 3. Any output-only site +############################################################################### + +INST "*/gen_dqs[0].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y96"; +INST "*/gen_dqs[0].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y96"; +INST "*/gen_dqs[1].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y58"; +INST "*/gen_dqs[1].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y58"; +INST "*/gen_dqs[2].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y62"; +INST "*/gen_dqs[2].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y62"; +INST "*/gen_dqs[3].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y100"; +INST "*/gen_dqs[3].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y100"; +INST "*/gen_dqs[4].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y102"; +INST "*/gen_dqs[4].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y102"; +INST "*/gen_dqs[5].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y256"; +INST "*/gen_dqs[5].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y256"; +INST "*/gen_dqs[6].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y260"; +INST "*/gen_dqs[6].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y260"; +INST "*/gen_dqs[7].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y262"; +INST "*/gen_dqs[7].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y262"; + +############################################################################### +# LOC and timing constraints for flop driving DQS CE enable signal +# from fabric logic. Even though the absolute delay on this path is +# calibrated out (when synchronizing this output to DQS), the delay +# should still be kept as low as possible to reduce post-calibration +# voltage/temp variations - these are roughly proportional to the +# absolute delay of the path +############################################################################### + +INST "*/u_phy_calib/gen_gate[0].u_en_dqs_ff" LOC = SLICE_X0Y48; +INST "*/u_phy_calib/gen_gate[1].u_en_dqs_ff" LOC = SLICE_X0Y29; +INST "*/u_phy_calib/gen_gate[2].u_en_dqs_ff" LOC = SLICE_X0Y31; +INST "*/u_phy_calib/gen_gate[3].u_en_dqs_ff" LOC = SLICE_X0Y50; +INST "*/u_phy_calib/gen_gate[4].u_en_dqs_ff" LOC = SLICE_X0Y51; +INST "*/u_phy_calib/gen_gate[5].u_en_dqs_ff" LOC = SLICE_X0Y128; +INST "*/u_phy_calib/gen_gate[6].u_en_dqs_ff" LOC = SLICE_X0Y130; +INST "*/u_phy_calib/gen_gate[7].u_en_dqs_ff" LOC = SLICE_X0Y131; + +# Control for DQS gate - from fabric flop. Prevent "runaway" delay - +# two parts to this path: (1) from fabric flop to IDELAY, (2) from +# IDELAY to asynchronous reset of IDDR that drives the DQ CE's +# This can be relaxed by the user for lower frequencies: +# 300MHz = 850ps, 267MHz = 900ps. At 200MHz = 950ps. +# In general PAR should be able to route this +# within 900ps over all speed grades. +NET "*/u_phy_io/en_dqs*" MAXDELAY = 600 ps; +NET "*/u_phy_io/gen_dqs*.u_iob_dqs/en_dqs_sync" MAXDELAY = 850 ps; + +############################################################################### +# "Half-cycle" path constraint from IDDR to CE pin for all DQ IDDR's +# for DQS Read Post amble Glitch Squelch circuit +############################################################################### + # Max delay from output of IDDR to CE input of DQ IDDRs = tRPST + some slack # where slack account for rise-time of DQS on board. For now assume slack = # 0.400ns (based on initial SPICE simulations, assumes use of ODT), so # time = 0.4*Tcyc + 0.40ns = 1.6ns @333MHz -INST "**/*gen_dqs[*].u_iob_dqs/u_iddr_dq_ce" TNM = "TNM_DQ_CE_IDDR"; -INST "**/*gen_dq[*].u_iob_dq/*gen_stg2_*.u_iddr_dq" TNM = "TNM_DQS_FLOPS"; +INST "*/gen_dqs[*].u_iob_dqs/u_iddr_dq_ce" TNM = "TNM_DQ_CE_IDDR"; +INST "*/gen_dq[*].u_iob_dq/gen_stg2_*.u_iddr_dq" TNM = "TNM_DQS_FLOPS"; TIMESPEC "TS_DQ_CE" = FROM "TNM_DQ_CE_IDDR" TO "TNM_DQS_FLOPS" 1.9 ns; -##------------------------------------------------------------------------------ - -##------------------------------------------------------------------------------ -## Section: Area Group -## Desc: MIG 2.2: Prevent unrelated logic from being packed into any -## slices used by read data capture RPM's - if unrelated logic gets -## packed into these slices, it could cause the DIRT strings that -## define the IDDR -> fabric flop routing to become unroutable -## during PAR stage (unrelated logic may require routing resources -## required by the DIRT strings - MAP does not currently take into -## account DIRT strings when placing logic -##------------------------------------------------------------------------------ + +############################################################################### +# MIG 2.2: Prevent unrelated logic from being packed into any slices used +# by read data capture RPM's - if unrelated logic gets packed into +# these slices, it could cause the DIRT strings that define the +# IDDR -> fabric flop routing to become unroutable during PAR stage +# (unrelated logic may require routing resources required by the +# DIRT strings - MAP does not currently take into account DIRT +# strings when placing logic +############################################################################### + AREA_GROUP "DDR_CAPTURE_FFS" GROUP = CLOSED; -##------------------------------------------------------------------------------ - -##------------------------------------------------------------------------------ -## Section: DQ LOC Constraints -## Desc: Location constraints for DQ read-data capture flops in fabric -## (for 2nd stage capture) -##------------------------------------------------------------------------------ -INST "**/*gen_dq[0].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y42; -INST "**/*gen_dq[1].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y43; -INST "**/*gen_dq[2].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y45; -INST "**/*gen_dq[3].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y46; -INST "**/*gen_dq[4].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y41; -INST "**/*gen_dq[5].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y42; -INST "**/*gen_dq[6].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y44; -INST "**/*gen_dq[7].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y44; -INST "**/*gen_dq[8].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y28; -INST "**/*gen_dq[9].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y32; -INST "**/*gen_dq[10].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y33; -INST "**/*gen_dq[11].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y34; -INST "**/*gen_dq[12].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y26; -INST "**/*gen_dq[13].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y28; -INST "**/*gen_dq[14].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y33; -INST "**/*gen_dq[15].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y34; -INST "**/*gen_dq[16].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y35; -INST "**/*gen_dq[17].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y36; -INST "**/*gen_dq[18].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y38; -INST "**/*gen_dq[19].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y39; -INST "**/*gen_dq[20].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y36; -INST "**/*gen_dq[21].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y37; -INST "**/*gen_dq[22].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y38; -INST "**/*gen_dq[23].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y39; -INST "**/*gen_dq[24].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y46; -INST "**/*gen_dq[25].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y49; -INST "**/*gen_dq[26].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y53; -INST "**/*gen_dq[27].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y55; -INST "**/*gen_dq[28].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y49; -INST "**/*gen_dq[29].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y52; -INST "**/*gen_dq[30].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y54; -INST "**/*gen_dq[31].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y56; -INST "**/*gen_dq[32].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y52; -INST "**/*gen_dq[33].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y56; -INST "**/*gen_dq[34].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y58; -INST "**/*gen_dq[35].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y59; -INST "**/*gen_dq[36].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y54; -INST "**/*gen_dq[37].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y57; -INST "**/*gen_dq[38].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y58; -INST "**/*gen_dq[39].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y59; -INST "**/*gen_dq[40].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y120; -INST "**/*gen_dq[41].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y121; -INST "**/*gen_dq[42].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y122; -INST "**/*gen_dq[43].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y123; -INST "**/*gen_dq[44].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y120; -INST "**/*gen_dq[45].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y121; -INST "**/*gen_dq[46].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y123; -INST "**/*gen_dq[47].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y124; -INST "**/*gen_dq[48].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y124; -INST "**/*gen_dq[49].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y126; -INST "**/*gen_dq[50].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y132; -INST "**/*gen_dq[51].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y133; -INST "**/*gen_dq[52].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y125; -INST "**/*gen_dq[53].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y126; -INST "**/*gen_dq[54].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y133; -INST "**/*gen_dq[55].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y134; -INST "**/*gen_dq[56].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y134; -INST "**/*gen_dq[57].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y136; -INST "**/*gen_dq[58].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y137; -INST "**/*gen_dq[59].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y138; -INST "**/*gen_dq[60].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y135; -INST "**/*gen_dq[61].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y136; -INST "**/*gen_dq[62].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y138; -INST "**/*gen_dq[63].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y139; -##------------------------------------------------------------------------------ + +############################################################################### +# Location constraints for DQ read-data capture flops in fabric (for 2nd +# stage capture) +############################################################################### + +INST "*/gen_dq[0].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y42; # AF30 X0Y22 * +INST "*/gen_dq[1].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y43; # AK31 X0Y23 +INST "*/gen_dq[2].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y45; # AF31 X0Y25 +INST "*/gen_dq[3].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y46; # AD30 X0Y26 +INST "*/gen_dq[4].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y41; # AJ30 X0Y21 +INST "*/gen_dq[5].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y42; # AF29 X0Y22 *** +INST "*/gen_dq[6].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y44; # AD29 X0Y24 +INST "*/gen_dq[7].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y44; # AE29 X0Y24 +INST "*/gen_dq[8].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y28; # AH27 X0Y8 *** +INST "*/gen_dq[9].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y32; # AF28 X0Y12 +INST "*/gen_dq[10].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y33; # AH28 X0Y13 +INST "*/gen_dq[11].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y34; # AA28 X0Y14 +INST "*/gen_dq[12].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y26; # AG25 X0Y6 +INST "*/gen_dq[13].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y28; # AJ26 X0Y8 * +INST "*/gen_dq[14].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y33; # AG28 X0Y13 +INST "*/gen_dq[15].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y34; # AB28 X0Y14 +INST "*/gen_dq[16].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y35; # AC28 X0Y15 +INST "*/gen_dq[17].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y36; # AB25 X0Y16 *** +INST "*/gen_dq[18].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y38; # AC27 X0Y18 +INST "*/gen_dq[19].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y39; # AA26 X0Y19 +INST "*/gen_dq[20].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y36; # AB26 X0Y16 * +INST "*/gen_dq[21].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y37; # AA24 X0Y17 +INST "*/gen_dq[22].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y38; # AB27 X0Y18 +INST "*/gen_dq[23].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y39; # AA25 X0Y19 +INST "*/gen_dq[24].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y46; # AC29 X0Y26 +INST "*/gen_dq[25].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y49; # AB30 X0Y29 *** +INST "*/gen_dq[26].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y53; # W31 X0Y33 +INST "*/gen_dq[27].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y55; # V30 X0Y35 +INST "*/gen_dq[28].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y49; # AC30 X0Y29 * +INST "*/gen_dq[29].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y52; # W29 X0Y32 +INST "*/gen_dq[30].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y54; # V27 X0Y34 *** +INST "*/gen_dq[31].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y56; # W27 X0Y36 +INST "*/gen_dq[32].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y52; # V29 X0Y32 +INST "*/gen_dq[33].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y56; # Y27 X0Y36 +INST "*/gen_dq[34].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y58; # Y26 X0Y38 +INST "*/gen_dq[35].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y59; # W24 X0Y39 +INST "*/gen_dq[36].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y54; # V28 X0Y34 * +INST "*/gen_dq[37].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y57; # W25 X0Y37 +INST "*/gen_dq[38].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y58; # W26 X0Y38 +INST "*/gen_dq[39].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y59; # V24 X0Y39 +INST "*/gen_dq[40].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y120; # R24 X0Y100 +INST "*/gen_dq[41].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y121; # P25 X0Y101 +INST "*/gen_dq[42].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y122; # N24 X0Y102 +INST "*/gen_dq[43].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y123; # P26 X0Y103 +INST "*/gen_dq[44].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y120; # T24 X0Y100 +INST "*/gen_dq[45].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y121; # N25 X0Y101 +INST "*/gen_dq[46].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y123; # P27 X0Y103 +INST "*/gen_dq[47].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y124; # N28 X0Y104 +INST "*/gen_dq[48].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y124; # M28 X0Y104 +INST "*/gen_dq[49].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y126; # L28 X0Y106 +INST "*/gen_dq[50].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y132; # F25 X0Y112 +INST "*/gen_dq[51].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y133; # H25 X0Y113 +INST "*/gen_dq[52].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y125; # K27 X0Y105 +INST "*/gen_dq[53].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y126; # K28 X0Y106 +INST "*/gen_dq[54].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y133; # H24 X0Y113 +INST "*/gen_dq[55].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y134; # G26 X0Y114 +INST "*/gen_dq[56].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y134; # G25 X0Y114 +INST "*/gen_dq[57].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y136; # M26 X0Y116 +INST "*/gen_dq[58].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y137; # J24 X0Y117 +INST "*/gen_dq[59].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y138; # L26 X0Y118 +INST "*/gen_dq[60].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y135; # J27 X0Y115 +INST "*/gen_dq[61].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y136; # M25 X0Y116 +INST "*/gen_dq[62].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y138; # L25 X0Y118 +INST "*/gen_dq[63].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y139; # L24 X0Y119 + diff --git a/src/edu/berkeley/fleet/fpga/main.ucf- b/src/edu/berkeley/fleet/fpga/main.ucf- new file mode 100644 index 0000000..a47921b --- /dev/null +++ b/src/edu/berkeley/fleet/fpga/main.ucf- @@ -0,0 +1,388 @@ +############################################################################ +## +## Xilinx, Inc. 2006 www.xilinx.com +## Sat Feb 28 21:05:46 2009 +## Generated by MIG Version 2.3 +## +############################################################################ +## File name : ddr2_sdram.ucf +## +## Details : Constraints file +## FPGA family: virtex5 +## FPGA: xc5vlx110t-ff1136 +## Speedgrade: -1 +## Design Entry: VERILOG +## Frequency: 200 MHz +## Design: without Test bench +## DCM Used: Enable +## Two Bytes per Bank:Disable +## No.Of Controllers: 1 +## +############################################################################ + +############################################################################ +# Clock constraints # +############################################################################ + +NET "ddr2_0/ddr2_sdram/u_ddr2_infrastructure/sys_clk_ibufg" TNM_NET = "SYS_CLK"; +TIMESPEC "TS_SYS_CLK" = PERIOD "SYS_CLK" 5 ns HIGH 50 %; + +NET "ddr2_0/ddr2_sdram/u_ddr2_infrastructure/clk200_ibufg" TNM_NET = "SYS_CLK_200"; +TIMESPEC "TS_SYS_CLK_200" = PERIOD "SYS_CLK_200" 5 ns HIGH 50 %; + +############################################################################ +######################################################################## +# Controller 0 +# Memory Device: DDR2_SDRAM->SODIMMs->MT8HTF3264HY-53E # +# Data Width: 64 # +# Data Mask: 1 # +######################################################################## + +################################################################################ +# I/O STANDARDS +################################################################################ + +NET "ddr2_dq[*]" IOSTANDARD = SSTL18_II_DCI; +NET "ddr2_a[*]" IOSTANDARD = SSTL18_II; +NET "ddr2_ba[*]" IOSTANDARD = SSTL18_II; +NET "ddr2_ras_n" IOSTANDARD = SSTL18_II; +NET "ddr2_cas_n" IOSTANDARD = SSTL18_II; +NET "ddr2_we_n" IOSTANDARD = SSTL18_II; +NET "ddr2_cs_n[*]" IOSTANDARD = SSTL18_II; +NET "ddr2_odt[*]" IOSTANDARD = SSTL18_II; +NET "ddr2_cke[*]" IOSTANDARD = SSTL18_II; +NET "ddr2_dm[*]" IOSTANDARD = SSTL18_II; +NET "sys_clk_p" IOSTANDARD = LVPECL_25; +NET "sys_clk_n" IOSTANDARD = LVPECL_25; +NET "clk200_p" IOSTANDARD = LVPECL_25; +NET "clk200_n" IOSTANDARD = LVPECL_25; +NET "sys_rst_n" IOSTANDARD = LVCMOS18; +NET "phy_init_done" IOSTANDARD = LVCMOS18; +NET "ddr2_dqs[*]" IOSTANDARD = DIFF_SSTL18_II_DCI; +NET "ddr2_dqs_n[*]" IOSTANDARD = DIFF_SSTL18_II_DCI; +NET "ddr2_ck[*]" IOSTANDARD = DIFF_SSTL18_II; +NET "ddr2_ck_n[*]" IOSTANDARD = DIFF_SSTL18_II; + +################################################################################ +# Location Constraints +################################################################################ + +NET "ddr2_dq[0]" LOC = "C20" ; #Bank 23 +NET "ddr2_dq[1]" LOC = "B20" ; #Bank 23 +NET "ddr2_dq[2]" LOC = "B21" ; #Bank 23 +NET "ddr2_dq[3]" LOC = "A21" ; #Bank 23 +NET "ddr2_dq[4]" LOC = "C18" ; #Bank 23 +NET "ddr2_dq[5]" LOC = "C22" ; #Bank 23 +NET "ddr2_dq[6]" LOC = "B22" ; #Bank 23 +NET "ddr2_dq[7]" LOC = "B18" ; #Bank 23 +NET "ddr2_dq[8]" LOC = "C23" ; #Bank 23 +NET "ddr2_dq[9]" LOC = "B23" ; #Bank 23 +NET "ddr2_dq[10]" LOC = "A19" ; #Bank 23 +NET "ddr2_dq[11]" LOC = "A20" ; #Bank 23 +NET "ddr2_dq[12]" LOC = "A24" ; #Bank 23 +NET "ddr2_dq[13]" LOC = "D26" ; #Bank 23 +NET "ddr2_dq[14]" LOC = "C27" ; #Bank 23 +NET "ddr2_dq[15]" LOC = "A29" ; #Bank 23 +NET "ddr2_dq[16]" LOC = "C28" ; #Bank 23 +NET "ddr2_dq[17]" LOC = "D27" ; #Bank 23 +NET "ddr2_dq[18]" LOC = "B31" ; #Bank 23 +NET "ddr2_dq[19]" LOC = "A31" ; #Bank 23 +NET "ddr2_dq[20]" LOC = "D29" ; #Bank 23 +NET "ddr2_dq[21]" LOC = "D31" ; #Bank 23 +NET "ddr2_dq[22]" LOC = "D30" ; #Bank 23 +NET "ddr2_dq[23]" LOC = "A30" ; #Bank 23 +NET "ddr2_dq[24]" LOC = "K24" ; #Bank 19 +NET "ddr2_dq[25]" LOC = "L24" ; #Bank 19 +NET "ddr2_dq[26]" LOC = "L25" ; #Bank 19 +NET "ddr2_dq[27]" LOC = "L26" ; #Bank 19 +NET "ddr2_dq[28]" LOC = "J25" ; #Bank 19 +NET "ddr2_dq[29]" LOC = "M25" ; #Bank 19 +NET "ddr2_dq[30]" LOC = "M26" ; #Bank 19 +NET "ddr2_dq[31]" LOC = "J27" ; #Bank 19 +NET "ddr2_dq[32]" LOC = "G25" ; #Bank 19 +NET "ddr2_dq[33]" LOC = "G26" ; #Bank 19 +NET "ddr2_dq[34]" LOC = "H25" ; #Bank 19 +NET "ddr2_dq[35]" LOC = "H24" ; #Bank 19 +NET "ddr2_dq[36]" LOC = "F26" ; #Bank 19 +NET "ddr2_dq[37]" LOC = "K28" ; #Bank 19 +NET "ddr2_dq[38]" LOC = "L28" ; #Bank 19 +NET "ddr2_dq[39]" LOC = "K27" ; #Bank 19 +NET "ddr2_dq[40]" LOC = "M28" ; #Bank 19 +NET "ddr2_dq[41]" LOC = "N28" ; #Bank 19 +NET "ddr2_dq[42]" LOC = "P26" ; #Bank 19 +NET "ddr2_dq[43]" LOC = "P27" ; #Bank 19 +NET "ddr2_dq[44]" LOC = "P24" ; #Bank 19 +NET "ddr2_dq[45]" LOC = "P25" ; #Bank 19 +NET "ddr2_dq[46]" LOC = "N25" ; #Bank 19 +NET "ddr2_dq[47]" LOC = "R24" ; #Bank 19 +NET "ddr2_dq[48]" LOC = "E29" ; #Bank 15 +NET "ddr2_dq[49]" LOC = "F29" ; #Bank 15 +NET "ddr2_dq[50]" LOC = "G30" ; #Bank 15 +NET "ddr2_dq[51]" LOC = "F30" ; #Bank 15 +NET "ddr2_dq[52]" LOC = "J29" ; #Bank 15 +NET "ddr2_dq[53]" LOC = "F31" ; #Bank 15 +NET "ddr2_dq[54]" LOC = "E31" ; #Bank 15 +NET "ddr2_dq[55]" LOC = "L29" ; #Bank 15 +NET "ddr2_dq[56]" LOC = "H30" ; #Bank 15 +NET "ddr2_dq[57]" LOC = "G31" ; #Bank 15 +NET "ddr2_dq[58]" LOC = "J30" ; #Bank 15 +NET "ddr2_dq[59]" LOC = "J31" ; #Bank 15 +NET "ddr2_dq[60]" LOC = "M30" ; #Bank 15 +NET "ddr2_dq[61]" LOC = "T31" ; #Bank 15 +NET "ddr2_dq[62]" LOC = "R31" ; #Bank 15 +NET "ddr2_dq[63]" LOC = "U30" ; #Bank 15 +NET "ddr2_a[12]" LOC = "B30" ; #Bank 23 +NET "ddr2_a[11]" LOC = "T24" ; #Bank 19 +NET "ddr2_a[10]" LOC = "P31" ; #Bank 15 +NET "ddr2_a[9]" LOC = "P30" ; #Bank 15 +NET "ddr2_a[8]" LOC = "M31" ; #Bank 15 +NET "ddr2_a[7]" LOC = "N30" ; #Bank 15 +NET "ddr2_a[6]" LOC = "T28" ; #Bank 15 +NET "ddr2_a[5]" LOC = "T29" ; #Bank 15 +NET "ddr2_a[4]" LOC = "U27" ; #Bank 15 +NET "ddr2_a[3]" LOC = "U28" ; #Bank 15 +NET "ddr2_a[2]" LOC = "R26" ; #Bank 15 +NET "ddr2_a[1]" LOC = "R27" ; #Bank 15 +NET "ddr2_a[0]" LOC = "U26" ; #Bank 15 +NET "ddr2_ba[1]" LOC = "T26" ; #Bank 15 +NET "ddr2_ba[0]" LOC = "U25" ; #Bank 15 +NET "ddr2_ras_n" LOC = "T25" ; #Bank 15 +NET "ddr2_cas_n" LOC = "B32" ; #Bank 11 +NET "ddr2_we_n" LOC = "A33" ; #Bank 11 +NET "ddr2_cs_n[0]" LOC = "B33" ; #Bank 11 +NET "ddr2_odt[0]" LOC = "C33" ; #Bank 11 +NET "ddr2_cke[0]" LOC = "C32" ; #Bank 11 +NET "ddr2_dm[0]" LOC = "C19" ; #Bank 23 +NET "ddr2_dm[1]" LOC = "A23" ; #Bank 23 +NET "ddr2_dm[2]" LOC = "C30" ; #Bank 23 +NET "ddr2_dm[3]" LOC = "J24" ; #Bank 19 +NET "ddr2_dm[4]" LOC = "F25" ; #Bank 19 +NET "ddr2_dm[5]" LOC = "N24" ; #Bank 19 +NET "ddr2_dm[6]" LOC = "H29" ; #Bank 15 +NET "ddr2_dm[7]" LOC = "L30" ; #Bank 15 +NET "sys_clk_p" LOC = "H17" ; #Bank 3 +NET "sys_clk_n" LOC = "H18" ; #Bank 3 +NET "clk200_p" LOC = "K17" ; #Bank 3 +NET "clk200_n" LOC = "L18" ; #Bank 3 +NET "sys_rst_n" LOC = "D32" ; #Bank 11 +NET "phy_init_done" LOC = "C34" ; #Bank 11 +NET "ddr2_dqs[0]" LOC = "C24" ; #Bank 23 +NET "ddr2_dqs_n[0]" LOC = "D25" ; #Bank 23 +NET "ddr2_dqs[1]" LOC = "B26" ; #Bank 23 +NET "ddr2_dqs_n[1]" LOC = "A25" ; #Bank 23 +NET "ddr2_dqs[2]" LOC = "B27" ; #Bank 23 +NET "ddr2_dqs_n[2]" LOC = "A26" ; #Bank 23 +NET "ddr2_dqs[3]" LOC = "G27" ; #Bank 19 +NET "ddr2_dqs_n[3]" LOC = "H27" ; #Bank 19 +NET "ddr2_dqs[4]" LOC = "H28" ; #Bank 19 +NET "ddr2_dqs_n[4]" LOC = "G28" ; #Bank 19 +NET "ddr2_dqs[5]" LOC = "E28" ; #Bank 19 +NET "ddr2_dqs_n[5]" LOC = "F28" ; #Bank 19 +NET "ddr2_dqs[6]" LOC = "N29" ; #Bank 15 +NET "ddr2_dqs_n[6]" LOC = "P29" ; #Bank 15 +NET "ddr2_dqs[7]" LOC = "K31" ; #Bank 15 +NET "ddr2_dqs_n[7]" LOC = "L31" ; #Bank 15 +NET "ddr2_ck[0]" LOC = "B25" ; #Bank 23 +NET "ddr2_ck_n[0]" LOC = "C25" ; #Bank 23 +NET "ddr2_ck[1]" LOC = "E26" ; #Bank 19 +NET "ddr2_ck_n[1]" LOC = "E27" ; #Bank 19 + +################################################################################ +#IDELAYCTRL Location Constraints +################################################################################ +INST "*/IDELAYCTRL_INST[0].u_idelayctrl" LOC=IDELAYCTRL_X0Y7; +INST "*/IDELAYCTRL_INST[1].u_idelayctrl" LOC=IDELAYCTRL_X0Y6; +INST "*/IDELAYCTRL_INST[2].u_idelayctrl" LOC=IDELAYCTRL_X0Y5; + +############################################################################### +# Define multicycle paths - these paths may take longer because additional +# time allowed for logic to settle in calibration/initialization FSM +############################################################################### + +# MIG 2.1: Eliminate Timegroup definitions for CLK0, and CLK90. Instead trace +# multicycle paths from originating flip-flop to ANY destination +# flip-flop (or in some cases, it can also be a BRAM) +# MUX Select for either rising/falling CLK0 for 2nd stage read capture +INST "*/u_phy_calib/gen_rd_data_sel*.u_ff_rd_data_sel" TNM = "TNM_RD_DATA_SEL"; +TIMESPEC "TS_MC_RD_DATA_SEL" = FROM "TNM_RD_DATA_SEL" TO FFS +"TS_SYS_CLK" * 4; + +# Commented out -- AM +# MUX select for read data - optional delay on data to account for byte skews +#INST "*/u_usr_rd/gen_rden_sel_mux*.u_ff_rden_sel_mux" TNM = "TNM_RDEN_SEL_MUX"; +#TIMESPEC "TS_MC_RDEN_SEL_MUX" = FROM "TNM_RDEN_SEL_MUX" TO FFS +#"TS_SYS_CLK" * 4; + +# Calibration/Initialization complete status flag (for PHY logic only) - can +# be used to drive both flip-flops and BRAMs +INST "*/u_phy_init/u_ff_phy_init_data_sel" TNM = "TNM_PHY_INIT_DATA_SEL"; +TIMESPEC "TS_MC_PHY_INIT_DATA_SEL_0" = FROM "TNM_PHY_INIT_DATA_SEL" TO FFS +"TS_SYS_CLK" * 4; +TIMESPEC "TS_MC_PHY_INIT_DATA_SEL_90" = FROM "TNM_PHY_INIT_DATA_SEL" TO RAMS +"TS_SYS_CLK" * 4; +# Select (address) bits for SRL32 shift registers used in stage3/stage4 +# calibration +INST "*/u_phy_calib/gen_gate_dly*.u_ff_gate_dly" TNM = "TNM_GATE_DLY"; +TIMESPEC "TS_MC_GATE_DLY" = FROM "TNM_GATE_DLY" TO FFS "TS_SYS_CLK" * 4; + +# Commented out -- AM +#INST "*/u_phy_calib/gen_rden_dly*.u_ff_rden_dly" TNM = "TNM_RDEN_DLY"; +#TIMESPEC "TS_MC_RDEN_DLY" = FROM "TNM_RDEN_DLY" TO FFS "TS_SYS_CLK" * 4; + +INST "*/u_phy_calib/gen_cal_rden_dly*.u_ff_cal_rden_dly" + TNM = "TNM_CAL_RDEN_DLY"; +TIMESPEC "TS_MC_CAL_RDEN_DLY" = FROM "TNM_CAL_RDEN_DLY" TO FFS + "TS_SYS_CLK" * 4; + +############################################################################### +# DQS Read Post amble Glitch Squelch circuit related constraints +############################################################################### + +############################################################################### +# LOC placement of DQS-squelch related IDDR and IDELAY elements +# Each circuit can be located at any of the following locations: +# 1. Unused "N"-side of DQS differential pair I/O +# 2. DM data mask (output only, input side is free for use) +# 3. Any output-only site +############################################################################### + +INST "*/gen_dqs[0].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y302"; +INST "*/gen_dqs[0].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y302"; +INST "*/gen_dqs[1].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y300"; +INST "*/gen_dqs[1].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y300"; +INST "*/gen_dqs[2].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y298"; +INST "*/gen_dqs[2].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y298"; +INST "*/gen_dqs[3].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y262"; +INST "*/gen_dqs[3].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y262"; +INST "*/gen_dqs[4].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y260"; +INST "*/gen_dqs[4].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y260"; +INST "*/gen_dqs[5].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y258"; +INST "*/gen_dqs[5].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y258"; +INST "*/gen_dqs[6].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y222"; +INST "*/gen_dqs[6].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y222"; +INST "*/gen_dqs[7].u_iob_dqs/u_iddr_dq_ce" LOC = "ILOGIC_X0Y220"; +INST "*/gen_dqs[7].u_iob_dqs/u_iodelay_dq_ce" LOC = "IODELAY_X0Y220"; + +############################################################################### +# LOC and timing constraints for flop driving DQS CE enable signal +# from fabric logic. Even though the absolute delay on this path is +# calibrated out (when synchronizing this output to DQS), the delay +# should still be kept as low as possible to reduce post-calibration +# voltage/temp variations - these are roughly proportional to the +# absolute delay of the path +############################################################################### + +INST "*/u_phy_calib/gen_gate[0].u_en_dqs_ff" LOC = SLICE_X0Y151; +INST "*/u_phy_calib/gen_gate[1].u_en_dqs_ff" LOC = SLICE_X0Y150; +INST "*/u_phy_calib/gen_gate[2].u_en_dqs_ff" LOC = SLICE_X0Y149; +INST "*/u_phy_calib/gen_gate[3].u_en_dqs_ff" LOC = SLICE_X0Y131; +INST "*/u_phy_calib/gen_gate[4].u_en_dqs_ff" LOC = SLICE_X0Y130; +INST "*/u_phy_calib/gen_gate[5].u_en_dqs_ff" LOC = SLICE_X0Y129; +INST "*/u_phy_calib/gen_gate[6].u_en_dqs_ff" LOC = SLICE_X0Y111; +INST "*/u_phy_calib/gen_gate[7].u_en_dqs_ff" LOC = SLICE_X0Y110; + +# Control for DQS gate - from fabric flop. Prevent "runaway" delay - +# two parts to this path: (1) from fabric flop to IDELAY, (2) from +# IDELAY to asynchronous reset of IDDR that drives the DQ CE's +# This can be relaxed by the user for lower frequencies: +# 300MHz = 850ps, 267MHz = 900ps. At 200MHz = 950ps. +# In general PAR should be able to route this +# within 900ps over all speed grades. +NET "*/u_phy_io/en_dqs*" MAXDELAY = 600 ps; +NET "*/u_phy_io/gen_dqs*.u_iob_dqs/en_dqs_sync" MAXDELAY = 850 ps; + +############################################################################### +# "Half-cycle" path constraint from IDDR to CE pin for all DQ IDDR's +# for DQS Read Post amble Glitch Squelch circuit +############################################################################### + +# Max delay from output of IDDR to CE input of DQ IDDRs = tRPST + some slack +# where slack account for rise-time of DQS on board. For now assume slack = +# 0.400ns (based on initial SPICE simulations, assumes use of ODT), so +# time = 0.4*Tcyc + 0.40ns = 1.6ns @333MHz +INST "*/gen_dqs[*].u_iob_dqs/u_iddr_dq_ce" TNM = "TNM_DQ_CE_IDDR"; +INST "*/gen_dq[*].u_iob_dq/gen_stg2_*.u_iddr_dq" TNM = "TNM_DQS_FLOPS"; +TIMESPEC "TS_DQ_CE" = FROM "TNM_DQ_CE_IDDR" TO "TNM_DQS_FLOPS" 2.4 ns; + +############################################################################### +# MIG 2.2: Prevent unrelated logic from being packed into any slices used +# by read data capture RPM's - if unrelated logic gets packed into +# these slices, it could cause the DIRT strings that define the +# IDDR -> fabric flop routing to become unroutable during PAR stage +# (unrelated logic may require routing resources required by the +# DIRT strings - MAP does not currently take into account DIRT +# strings when placing logic +############################################################################### + +AREA_GROUP "DDR_CAPTURE_FFS" GROUP = CLOSED; + +############################################################################### +# Location constraints for DQ read-data capture flops in fabric (for 2nd +# stage capture) +############################################################################### + +INST "*/gen_dq[0].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y159; +INST "*/gen_dq[1].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y159; +INST "*/gen_dq[2].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y158; +INST "*/gen_dq[3].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y158; +INST "*/gen_dq[4].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y157; +INST "*/gen_dq[5].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y156; +INST "*/gen_dq[6].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y156; +INST "*/gen_dq[7].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y155; +INST "*/gen_dq[8].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y154; +INST "*/gen_dq[9].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y154; +INST "*/gen_dq[10].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y153; +INST "*/gen_dq[11].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y153; +INST "*/gen_dq[12].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y152; +INST "*/gen_dq[13].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y146; +INST "*/gen_dq[14].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y146; +INST "*/gen_dq[15].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y145; +INST "*/gen_dq[16].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y144; +INST "*/gen_dq[17].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y144; +INST "*/gen_dq[18].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y143; +INST "*/gen_dq[19].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y143; +INST "*/gen_dq[20].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y142; +INST "*/gen_dq[21].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y141; +INST "*/gen_dq[22].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y141; +INST "*/gen_dq[23].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y140; +INST "*/gen_dq[24].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y139; +INST "*/gen_dq[25].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y139; +INST "*/gen_dq[26].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y138; +INST "*/gen_dq[27].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y138; +INST "*/gen_dq[28].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y137; +INST "*/gen_dq[29].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y136; +INST "*/gen_dq[30].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y136; +INST "*/gen_dq[31].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y135; +INST "*/gen_dq[32].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y134; +INST "*/gen_dq[33].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y134; +INST "*/gen_dq[34].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y133; +INST "*/gen_dq[35].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y133; +INST "*/gen_dq[36].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y132; +INST "*/gen_dq[37].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y126; +INST "*/gen_dq[38].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y126; +INST "*/gen_dq[39].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y125; +INST "*/gen_dq[40].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y124; +INST "*/gen_dq[41].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y124; +INST "*/gen_dq[42].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y123; +INST "*/gen_dq[43].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y123; +INST "*/gen_dq[44].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y122; +INST "*/gen_dq[45].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y121; +INST "*/gen_dq[46].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y121; +INST "*/gen_dq[47].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y120; +INST "*/gen_dq[48].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y119; +INST "*/gen_dq[49].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y119; +INST "*/gen_dq[50].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y118; +INST "*/gen_dq[51].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y118; +INST "*/gen_dq[52].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y117; +INST "*/gen_dq[53].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y116; +INST "*/gen_dq[54].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y116; +INST "*/gen_dq[55].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y115; +INST "*/gen_dq[56].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y114; +INST "*/gen_dq[57].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y114; +INST "*/gen_dq[58].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y113; +INST "*/gen_dq[59].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y113; +INST "*/gen_dq[60].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y112; +INST "*/gen_dq[61].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y106; +INST "*/gen_dq[62].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y106; +INST "*/gen_dq[63].u_iob_dq/gen_stg2_*.u_ff_stg2a_rise" RLOC_ORIGIN = X0Y105; +