move to ml505, import Greg\'s memory controller
authormegacz <adam@megacz.com>
Tue, 30 Dec 2008 01:47:51 +0000 (17:47 -0800)
committermegacz <adam@megacz.com>
Tue, 30 Dec 2008 01:47:51 +0000 (17:47 -0800)
31 files changed:
Makefile
misc/program.sh
ships/DDR2.ship [new file with mode: 0644]
ships/Debug.ship
src/edu/berkeley/fleet/fpga/greg/Const.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/DDR2SDRAM.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/Readme.txt [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/asyncfifo_dmem_1b.ngc [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/asyncfifo_dmem_1b.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_ctrl.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_idelay_ctrl.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_infrastructure.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_mem_if_top.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_phy_calib.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_phy_ctl_io.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_phy_dm_iob.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_phy_dq_iob.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_phy_dqs_iob.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_phy_init.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_phy_io.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_phy_top.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_phy_write.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_sdram.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_top.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_usr_addr_fifo.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_usr_rd.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_usr_top.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/greg/ddr2_usr_wr.v [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/main-ml410.ucf [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/main-ml50x.ucf [new file with mode: 0644]
src/edu/berkeley/fleet/fpga/main.ucf

index 7b86e6b..79a028e 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -35,8 +35,12 @@ host = intel2950-5.eecs.berkeley.edu
 # 128Gb ram, 2x4-core.  small-config:1189s  large-config:3065s
 #host = amdr905-1.eecs.berkeley.edu
 
-remote_ise = /tools/xilinx/ISE9.1i_lin
-remote_edk = /tools/xilinx/EDK9.1i
+remote_ise = /tools/xilinx/10.1/ISE
+remote_edk = /tools/xilinx/10.1/EDK
+
+#remote_ise = /tools/xilinx/ISE9.1i_lin
+#remote_edk = /tools/xilinx/EDK9.1i
+
 #remote_dir = /scratch/megacz/fleet/
 #remote_dir = /vol/hitz/home/megacz/fleet/
 remote_dir = /tmp/megacz/fleet/
@@ -50,7 +54,7 @@ remote_dir = /tmp/megacz/fleet/
 xilinx =  cd build/fpga;
 xilinx += LD_LIBRARY_PATH=$$LD_LIBRARY_PATH:$(remote_ise)/bin/lin:$(remote_edk)/bin/lin
 xilinx += PATH=$$PATH:$(remote_ise)/bin/lin:$(remote_edk)/bin/lin
-xilinx += XST_VERSION=9.2i
+#xilinx += XST_VERSION=9.2i
 xilinx += XILINX=$(remote_ise)
 xilinx += XIL_XST_HIDEMESSAGES=hdl_and_low_levels
 xilinx += XILINX_EDK=$(remote_edk)
@@ -58,9 +62,23 @@ xilinx += XILINX_EDK=$(remote_edk)
 xilinx_ise = $(xilinx) $(remote_ise)/bin/lin/
 xilinx_edk = $(xilinx) $(remote_edk)/bin/lin/
 
-speed_grade = 11
-device = xc4vfx60ff1152-${speed_grade}
-#device = xc2vp70-7ff1704
+# ML50X
+speed_grade = 1
+part = xc5vlx110t
+package = ff1136
+board = ml505   # ??
+
+# ML410
+#part = xc4vfx60
+#package = ff1152
+#speed_grade = 11
+#board = ml410
+
+# BEE2
+#part = xc2vp70
+#package = 7ff1704
+
+device = ${part}${package}-${speed_grade}
 
 upload: fleet.jar build/fpga/main.bit
        mkdir -p build
@@ -80,9 +98,12 @@ build/fpga/main.bit: $(java_files) $(ship_files)
        scp ${host}:${remote_dir}/build/fpga/main.bit build/fpga/
 
 pcore = ${remote_edk}/hw/XilinxProcessorIPLib/pcores
+#effort = std
+effort = high
 synth:
        cd build/fpga; ln -sf ../../src/edu/berkeley/fleet/fpga/* .
        cd build/fpga; ln -sf ../../src/edu/berkeley/fleet/fpga/mem/* .
+       cd build/fpga; ln -sf ../../src/edu/berkeley/fleet/fpga/greg/* .
        rm -f build/fpga/main.lso
        echo work                        >> build/fpga/main.lso
        rm -f build/fpga/main.prj
@@ -174,14 +195,14 @@ synth:
        echo '-g Security:NONE' >> build/fpga/main.ut
        echo '-g Persist:No' >> build/fpga/main.ut
 
-       $(xilinx_ise)xst -intstyle xflow -ifn main.xst -ofn main.syr < main.xst
-       $(xilinx_ise)ngdbuild -aul -intstyle xflow -dd _ngo -nt timestamp -uc main.ucf -p $(device) main.ngc main.ngd
-       $(xilinx_ise)map -cm area -intstyle xflow -p $(device) -pr b -ol high -o main_map.ncd main.ngd main.pcf
-       $(xilinx_ise)par -w -intstyle xflow -t 99 -pl high -rl high main_map.ncd main.ncd main.pcf
-       $(xilinx_ise)bitgen -intstyle xflow -f main.ut main.ncd
-#      $(xilinx_ise)trce -intstyle xflow -e 3 -l 3 -s ${speed_grade} -xml main main.ncd -o main.twr main.pcf
-       $(xilinx_edk)xmd -tcl $(remote_edk)/data/xmd/genace.tcl -jprog -hw main.bit -board ml410 -ace mainx.ace
-       mv build/fpga/mainx.ace build/fpga/main.ace   # genace throws a fit if the filename prefix is the same?
+       $(xilinx_ise)xst      -intstyle xflow -ifn main.xst -ofn main.syr < main.xst
+       $(xilinx_ise)ngdbuild -intstyle xflow -aul -dd _ngo -nt timestamp -uc main.ucf -p $(device) main.ngc main.ngd
+       $(xilinx_ise)map      -intstyle xflow -ol ${effort} -p $(device) -pr b -cm area -o main_map.ncd main.ngd main.pcf
+       $(xilinx_ise)par      -intstyle xflow -ol ${effort} -w main_map.ncd main.ncd main.pcf
+       $(xilinx_ise)bitgen   -intstyle xflow -f main.ut main.ncd
+#      $(xilinx_ise)trce     -intstyle xflow -e 3 -l 3 -s ${speed_grade} -xml main main.ncd -o main.twr main.pcf
+#      $(xilinx_edk)xmd -tcl $(remote_edk)/data/xmd/genace.tcl -jprog -hw main.bit -board ${board} -ace mainx.ace
+#      mv build/fpga/mainx.ace build/fpga/main.ace   # genace throws a fit if the filename prefix is the same?
 
 
 runserver: fleet.jar
index bb9357d..588b9f5 100755 (executable)
@@ -34,14 +34,17 @@ export LD_PRELOAD=`pwd`/misc/libusb-driver.so
 
 sudo ln -sf $XILINX/bin/lin/xusbdfwu.hex /usr/share/xusbdfwu.hex
 
+# ml410: use -p 2
+# ml505: use -p 5
+
 $XILINX/bin/lin/impact -batch <<EOF
 cleancablelock
 setMode -bs
 setCable -port usb21 -baud 12000000
 identify
 setMode -bs
-assignFile -p 2 -file $BITFILE
-program -p 2
+assignFile -p 5 -file $BITFILE
+program -p 5
 quit
 EOF
 
diff --git a/ships/DDR2.ship b/ships/DDR2.ship
new file mode 100644 (file)
index 0000000..5360b2a
--- /dev/null
@@ -0,0 +1,303 @@
+ship: DDR2
+
+== Ports ===========================================================
+data  in:    inAddrRead
+data  in:    inAddrWrite
+data  in:    inDataWrite
+
+data  out:   out
+
+percolate up:    DDR2_CAS_B   1
+percolate up:    DDR2_CKE     2
+percolate up:    DDR2_RAS_B   1
+percolate up:    DDR2_WE_B    1
+percolate up:    DDR2_ODT     2
+percolate up:    DDR2_CS0_B   2
+percolate up:    DDR2_CLK_N   2
+percolate up:    DDR2_CLK_P   2
+percolate up:    DDR2_A       14
+percolate up:    DDR2_BA      3
+percolate inout: DDR2_DQ      64
+percolate up:    DDR2_DM      8
+percolate inout: DDR2_DQS_N   8
+percolate inout: DDR2_DQS_P   8
+
+percolate inout: I2C_DDR2_SCL 1
+percolate inout: I2C_DDR2_SDA 1
+
+percolate down:  CLKBUF_Q1_N  1
+percolate down:  CLKBUF_Q1_P  1
+
+== TeX ==============================================================
+
+== Fleeterpreter ====================================================
+    public void service() { }
+== FleetSim ==============================================================
+
+== FPGA ==============================================================
+
+// Nearly all of this was copied from Greg Gibeling's work; copyright shown below:
+
+// Everything here was copied from
+// GateLib/Firmware/DRAM/Hardware/DDR2SDRAM/Test/FPGA_TOP_ML505_DDR2SDRAMTest.v
+
+//==============================================================================
+//      Section:        License
+//==============================================================================
+//      Copyright (c) 2005-2008, Regents of the University of California
+//      All rights reserved.
+//
+//      Redistribution and use in source and binary forms, with or without modification,
+//      are permitted provided that the following conditions are met:
+//
+//              - Redistributions of source code must retain the above copyright notice,
+//                      this list of conditions and the following disclaimer.
+//              - Redistributions in binary form must reproduce the above copyright
+//                      notice, this list of conditions and the following disclaimer
+//                      in the documentation and/or other materials provided with the
+//                      distribution.
+//              - Neither the name of the University of California, Berkeley nor the
+//                      names of its contributors may be used to endorse or promote
+//                      products derived from this software without specific prior
+//                      written permission.
+//
+//      THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+//      ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+//      WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+//      DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+//      ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+//      (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+//      LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+//      ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+//      (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+//      SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//==============================================================================
+
+parameter
+    ClockFreq  = 200000000,
+    BAWidth    = 2,
+    RAWidth    = 13,
+    CAWidth    = 10,                                                     
+    DWidth     = 128,       // 128b SDR internal transfers
+    UWidth     = 8,         // This will almost ALWAYS be 8
+    BurstLen   = 2,         // 256b total burst, 2 words DWidth words at SDR, or 4 external words at DDR
+    EnableMask = 1,
+    EnableECC  = 0,
+    Board      = 0;
+
+localparam
+    UCount   =  DWidth / UWidth,
+    // 128b/8 = 16b per mask means per-byte masking
+    MWidth   =  (EnableECC || (EnableMask == 0)) ? 0 : UCount,
+    // Unused lower address bits, the -1 is to get a proper log2
+    UAWidth  =  `log2(UCount-1),
+    TAWidth  =  CAWidth + RAWidth + BAWidth,
+    // Note that the components are in order according to where in the
+    // address they appear, the -1 is to account for DDR
+    AWidth   =  TAWidth + UAWidth - 1,
+    ECheck   =  EnableECC ? 2 : 0,
+    ECorrect =  EnableECC ? 1 : 0,
+    CWidth   =  3,
+    EHWidth  =  `max(`log2(ECheck), 1),
+    ERWidth  =  `max(`log2(ECheck), 1);
+
+wire   Reset;
+assign Reset = !rst;
+
+reg     [AWidth-1:0]    CommandAddress;
+reg     [CWidth-1:0]    Command; 
+wire                    CommandReady;
+reg                     CommandValid;
+
+wire    [DWidth-1:0]    DataIn;
+wire    [MWidth-1:0]    DataInMask;
+reg                     DataInValid;
+wire                    DataInReady;
+        
+wire    [DWidth-1:0]    DataOut;
+wire    [EHWidth-1:0]   DataOutErrorChecked;
+wire    [ERWidth-1:0]   DataOutErrorCorrected;
+wire                    DataOutValid;
+reg                     DataOutReady;
+
+wire                    Clock_IBUFG;
+wire                    Clock, ClockD2, ClockP90;
+wire                    Clock_DCM, ClockD2_DCM, ClockP90_DCM;
+wire                    Locked;
+
+reg  [`WORDWIDTH:0] out_d;
+assign out_d_ = out_d;
+
+assign  DDR2_BA[2]    = 1'b0;
+assign  DDR2_CS0_B[1] = 1'b1;
+assign  DDR2_ODT[1]   = 1'b0;
+assign  DDR2_CKE[1]   = 1'b0;
+
+IBUFGDS ClockIBufG(.I(CLKBUF_Q1_P), .IB(CLKBUF_Q1_N), .O(Clock_IBUFG));
+DCM_BASE
+  #( 
+     .CLKIN_PERIOD(5.0),
+     .CLKDV_DIVIDE(2.0),
+     .DLL_FREQUENCY_MODE("HIGH"),
+     .DUTY_CYCLE_CORRECTION("TRUE"),
+     .FACTORY_JF(16'hF0F0)
+   )
+  DCMBase(
+     .CLK0(Clock_DCM),
+     .CLK180( ),
+     .CLK270( ),
+     .CLK2X( ),
+     .CLK2X180( ),
+     .CLK90(ClockP90_DCM),
+     .CLKDV(ClockD2_DCM),
+     .CLKFX( ),
+     .CLKFX180( ),
+     .LOCKED(Locked),
+     .CLKFB(Clock),
+     .CLKIN(Clock_IBUFG),
+     .RST(Reset));
+  // synthesis attribute CLKIN_PERIOD          of DCMBase is "5.0"
+  // synthesis attribute CLKDV_DIVIDE          of DCMBase is "2.0"
+  // synthesis attribute DLL_FREQUENCY_MODE    of DCMBase is "HIGH"
+  // synthesis attribute DUTY_CYCLE_CORRECTION of DCMBase is "TRUE"
+  // synthesis attribute FACTORY_JF            of DCMBase is "16'hF0F0"
+  BUFG    ClockBufG(.I(Clock_DCM), .O(Clock));
+  BUFG    ClockP90BufG(.I(ClockP90_DCM), .O(ClockP90));
+  BUFG    ClockD2BufG(.I(ClockD2_DCM), .O(ClockD2));
+
+DDR2SDRAM DDR2SDRAM(
+      .Clock(Clock),
+      .ClockD2(ClockD2),
+      .ClockP90(ClockP90),
+      .Reset(Reset),
+      .Locked(Locked),
+      .ClockF200(Clock),
+      .Initialized( ),
+      .PoweredUp( ),
+
+      .CommandClock(clk),
+      .DataInClock(clk),
+      .DataOutClock(clk),
+      .CommandReset(Reset),
+      .DataInReset(Reset),
+      .DataOutReset(Reset),
+
+      .CommandAddress(CommandAddress),
+      .Command(Command),
+      .CommandValid(CommandValid),
+      .CommandReady(CommandReady),
+      .DataIn(DataIn),
+      .DataInMask(DataInMask),
+      .DataInValid(DataInValid),
+      .DataInReady(DataInReady),
+      .DataOut(DataOut),
+      .DataOutErrorChecked(DataOutErrorChecked),
+      .DataOutErrorCorrected(DataOutErrorCorrected),
+      .DataOutValid(DataOutValid),
+      .DataOutReady(DataOutReady),
+      .DDR2_DQ(DDR2_DQ),
+      .DDR2_A(DDR2_A),
+      .DDR2_BA(DDR2_BA[1:0]),
+      .DDR2_RAS_B(DDR2_RAS_B),
+      .DDR2_CAS_B(DDR2_CAS_B),
+      .DDR2_WE_B(DDR2_WE_B),
+      .DDR2_CS0_B(DDR2_CS0_B[0]),
+      .DDR2_ODT(DDR2_ODT[0]),
+      .DDR2_CKE(DDR2_CKE[0]),
+      .DDR2_DM(DDR2_DM),
+      .DDR2_DQS_P(DDR2_DQS_P),
+      .DDR2_DQS_N(DDR2_DQS_N),
+      .DDR2_CLK_P(DDR2_CLK_P),
+      .DDR2_CLK_N(DDR2_CLK_N));
+      defparam DDR2SDRAM.UWidth     = UWidth;
+      defparam DDR2SDRAM.BAWidth    = BAWidth;
+      defparam DDR2SDRAM.RAWidth    = RAWidth;
+      defparam DDR2SDRAM.CAWidth    = CAWidth;
+      defparam DDR2SDRAM.DWidth     = DWidth;
+      defparam DDR2SDRAM.BurstLen   = BurstLen;
+      defparam DDR2SDRAM.EnableMask = EnableMask;
+      defparam DDR2SDRAM.EnableECC  = EnableECC;
+      defparam DDR2SDRAM.Board      = Board;
+      defparam DDR2SDRAM.MultiClock = 1;
+
+  assign DataIn     = inDataWrite_d;
+  assign DataInMask = 16'b1111111111111111;
+
+  always @(posedge clk) begin
+
+    if (!rst) begin
+      `reset
+      CommandValid <= 0;
+      DataOutReady <= 0;
+    end else begin
+      `flush
+      `cleanup
+
+      CommandValid <= 0;
+      DataInValid  <= 0;
+
+      if (`out_empty) begin
+          DataOutReady <= 1;      
+      end
+
+      if (DataOutReady && DataOutValid && `out_empty) begin
+          out_d <= { 1'b0, DataOut[`WORDWIDTH-1:0] };
+          `fill_out
+          DataOutReady <= 0;
+
+      end else if (DataOutReady && CommandReady && DataInReady && `out_empty) begin
+          if (`inAddrWrite_full && `inDataWrite_full) begin
+            `drain_inDataWrite
+            `drain_inAddrWrite
+            CommandAddress <= inAddrWrite_d;
+            Command        <= 3'b000;
+            CommandValid   <= 1;
+            DataInValid    <= 1;
+            out_d <= { 1'b1, 37'b0 };
+            `fill_out
+            DataOutReady <= 0;
+          end else if (`inAddrRead_full) begin
+            `drain_inAddrRead
+            CommandAddress <= inAddrRead_d;
+            CommandValid   <= 1;
+            Command        <= 3'b001;
+            DataInValid    <= 0;
+            DataOutReady   <= 1;
+          end
+      end
+    end
+  end
+
+== Test ==============================================================
+
+#expect 0
+
+#ship debug : Debug
+#ship ddr   : DDR2
+
+debug.in:
+  recv, deliver;
+
+ddr.out:
+  collect;
+  set flags a=!c,b=b;
+  send to debug.in;
+  collect;
+  set flags a=!c,b=b;
+  send to debug.in;
+ddr.inAddrWrite:
+  set word=0;
+  deliver;
+  deliver;
+ddr.inDataWrite:
+  set word=1;
+  deliver;
+  deliver;
+
+
+
+
+== Constants ========================================================
+
+== Contributors =========================================================
+Adam Megacz <megacz@cs.berkeley.edu>
index 549633c..a34679b 100644 (file)
@@ -65,7 +65,10 @@ public void service() {
   assign uart_cts = 0;
   assign rst_out = rst_in && !break;
 
-  sasc_brg sasc_brg(clk, rst_in, 3, 65, sio_ce, sio_ce_x4);
+  // fst=3 means clock divider is 3+2=5 for a 50Mhz clock => 10Mhz
+  // using a 33Mhz clock,
+  //   33.333Mhz / 38400hz * 4 = 217.013 => 215+2,1
+  sasc_brg sasc_brg(clk, rst_in, 215, 1, sio_ce, sio_ce_x4);
   sasc_top sasc_top(clk, rst_in,
                     uart_in,
                     uart_out,
diff --git a/src/edu/berkeley/fleet/fpga/greg/Const.v b/src/edu/berkeley/fleet/fpga/greg/Const.v
new file mode 100644 (file)
index 0000000..57d173e
--- /dev/null
@@ -0,0 +1,212 @@
+//==============================================================================
+//     File:           $URL: svn+ssh://repositorypub@repository.eecs.berkeley.edu/public/Projects/GateLib/branches/dev/GateLibCore/Hardware/Library/Const.v $
+//     Version:        $Revision: 16355 $
+//     Author:         Greg Gibeling (http://gdgib.gotdns.com/~gdgib/)
+//     Copyright:      Copyright 2003-2008 UC Berkeley
+//==============================================================================
+
+//==============================================================================
+//     Section:        License
+//==============================================================================
+//     Copyright (c) 2005-2008, Regents of the University of California
+//     All rights reserved.
+//
+//     Redistribution and use in source and binary forms, with or without modification,
+//     are permitted provided that the following conditions are met:
+//
+//             - Redistributions of source code must retain the above copyright notice,
+//                     this list of conditions and the following disclaimer.
+//             - Redistributions in binary form must reproduce the above copyright
+//                     notice, this list of conditions and the following disclaimer
+//                     in the documentation and/or other materials provided with the
+//                     distribution.
+//             - Neither the name of the University of California, Berkeley nor the
+//                     names of its contributors may be used to endorse or promote
+//                     products derived from this software without specific prior
+//                     written permission.
+//
+//     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+//     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+//     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+//     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+//     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+//     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+//     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+//     ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+//     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+//     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//==============================================================================
+
+//------------------------------------------------------------------------------
+//     Section:        Simulation Flag
+//     Desc:           This little nebulous block will define the flags:
+//                             -SIMULATION     Simulating
+//                             -MODELSIM       Simulating using ModelSim
+//                             -XST            Synthesizing with XST
+//                             -SYNPLIFY       Synthesizing with Synplify
+//                             -SYNTHESIS      Synthesizing
+//                             -MACROSAFE      Safe to use macros (Synplify or ModelSim)
+//
+//     YOU SHOULD DEFINE THE "MODELSIM" FLAG FOR SIMULATION!!!!
+//------------------------------------------------------------------------------
+`ifdef synthesis                // if Synplify
+       `define SYNPLIFY
+       `define SYNTHESIS
+       `define MACROSAFE
+`else                           // if not Synplify
+       `ifdef MODELSIM
+               `define SIMULATION
+               `define MACROSAFE
+       `else
+               `define XST
+               // synthesis translate_off    // if XST then stop compiling
+                       `undef XST
+                       `define SIMULATION
+                       `define MODELSIM
+               // synthesis translate_on     // if XST then resume compiling
+               `ifdef XST
+                       `define SYNTHESIS
+                       `define MACROSAFE
+               `endif
+       `endif
+`endif
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+//     Section:        Log2 Macro
+//     Desc:           A macro to take the log base 2 of any number.  Useful for
+//                             calculating bitwidths.  Warning, this actually calculates
+//                             log2(x-1), not log2(x).
+//------------------------------------------------------------------------------
+`ifdef MACROSAFE
+`define log2(x)                ((((x) > 1) ? 1 : 0) + \
+                       (((x) > 2) ? 1 : 0) + \
+                       (((x) > 4) ? 1 : 0) + \
+                       (((x) > 8) ? 1 : 0) + \
+                       (((x) > 16) ? 1 : 0) + \
+                       (((x) > 32) ? 1 : 0) + \
+                       (((x) > 64) ? 1 : 0) + \
+                       (((x) > 128) ? 1 : 0) + \
+                       (((x) > 256) ? 1 : 0) + \
+                       (((x) > 512) ? 1 : 0) + \
+                       (((x) > 1024) ? 1 : 0) + \
+                       (((x) > 2048) ? 1 : 0) + \
+                       (((x) > 4096) ? 1 : 0) + \
+                       (((x) > 8192) ? 1 : 0) + \
+                       (((x) > 16384) ? 1 : 0) + \
+                       (((x) > 32768) ? 1 : 0) + \
+                       (((x) > 65536) ? 1 : 0) + \
+                       (((x) > 131072) ? 1 : 0) + \
+                       (((x) > 262144) ? 1 : 0) + \
+                       (((x) > 524288) ? 1 : 0) + \
+                       (((x) > 1048576) ? 1 : 0) + \
+                       (((x) > 2097152) ? 1 : 0) + \
+                       (((x) > 4194304) ? 1 : 0) + \
+                       (((x) > 8388608) ? 1 : 0) + \
+                       (((x) > 16777216) ? 1 : 0) + \
+                       (((x) > 33554432) ? 1 : 0) + \
+                       (((x) > 67108864) ? 1 : 0) + \
+                       (((x) > 134217728) ? 1 : 0) + \
+                       (((x) > 268435456) ? 1 : 0) + \
+                       (((x) > 536870912) ? 1 : 0) + \
+                       (((x) > 1073741824) ? 1 : 0))
+`endif
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+//     Section:        Log2 Floor Macro
+//     Desc:           A macro to take the floor of the log base 2 of any number.
+//------------------------------------------------------------------------------
+`ifdef MACROSAFE
+`define log2f(x)       ((((x) >= 2) ? 1 : 0) + \
+                       (((x) >= 4) ? 1 : 0) + \
+                       (((x) >= 8) ? 1 : 0) + \
+                       (((x) >= 16) ? 1 : 0) + \
+                       (((x) >= 32) ? 1 : 0) + \
+                       (((x) >= 64) ? 1 : 0) + \
+                       (((x) >= 128) ? 1 : 0) + \
+                       (((x) >= 256) ? 1 : 0) + \
+                       (((x) >= 512) ? 1 : 0) + \
+                       (((x) >= 1024) ? 1 : 0) + \
+                       (((x) >= 2048) ? 1 : 0) + \
+                       (((x) >= 4096) ? 1 : 0) + \
+                       (((x) >= 8192) ? 1 : 0) + \
+                       (((x) >= 16384) ? 1 : 0) + \
+                       (((x) >= 32768) ? 1 : 0) + \
+                       (((x) >= 65536) ? 1 : 0) + \
+                       (((x) >= 131072) ? 1 : 0) + \
+                       (((x) >= 262144) ? 1 : 0) + \
+                       (((x) >= 524288) ? 1 : 0) + \
+                       (((x) >= 1048576) ? 1 : 0) + \
+                       (((x) >= 2097152) ? 1 : 0) + \
+                       (((x) >= 4194304) ? 1 : 0) + \
+                       (((x) >= 8388608) ? 1 : 0) + \
+                       (((x) >= 16777216) ? 1 : 0) + \
+                       (((x) >= 33554432) ? 1 : 0) + \
+                       (((x) >= 67108864) ? 1 : 0) + \
+                       (((x) >= 134217728) ? 1 : 0) + \
+                       (((x) >= 268435456) ? 1 : 0) + \
+                       (((x) >= 536870912) ? 1 : 0) + \
+                       (((x) >= 1073741824) ? 1 : 0))
+`endif
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+//     Section:        Pow2 Macro
+//     Desc:           A macro to take the 2 to the power of any number.  Useful for
+//                             calculating bitwidths.
+//------------------------------------------------------------------------------
+`ifdef MACROSAFE
+`define pow2(x)                ((((x) >= 1) ? 2 : 1) * \
+                       (((x) >= 2) ? 2 : 1) * \
+                       (((x) >= 3) ? 2 : 1) * \
+                       (((x) >= 4) ? 2 : 1) * \
+                       (((x) >= 5) ? 2 : 1) * \
+                       (((x) >= 6) ? 2 : 1) * \
+                       (((x) >= 7) ? 2 : 1) * \
+                       (((x) >= 8) ? 2 : 1) * \
+                       (((x) >= 9) ? 2 : 1) * \
+                       (((x) >= 10) ? 2 : 1) * \
+                       (((x) >= 11) ? 2 : 1) * \
+                       (((x) >= 12) ? 2 : 1) * \
+                       (((x) >= 13) ? 2 : 1) * \
+                       (((x) >= 14) ? 2 : 1) * \
+                       (((x) >= 15) ? 2 : 1) * \
+                       (((x) >= 16) ? 2 : 1) * \
+                       (((x) >= 17) ? 2 : 1) * \
+                       (((x) >= 18) ? 2 : 1) * \
+                       (((x) >= 19) ? 2 : 1) * \
+                       (((x) >= 20) ? 2 : 1) * \
+                       (((x) >= 21) ? 2 : 1) * \
+                       (((x) >= 22) ? 2 : 1) * \
+                       (((x) >= 23) ? 2 : 1) * \
+                       (((x) >= 24) ? 2 : 1) * \
+                       (((x) >= 25) ? 2 : 1) * \
+                       (((x) >= 26) ? 2 : 1) * \
+                       (((x) >= 27) ? 2 : 1) * \
+                       (((x) >= 28) ? 2 : 1) * \
+                       (((x) >= 29) ? 2 : 1) * \
+                       (((x) >= 30) ? 2 : 1) * \
+                       (((x) >= 31) ? 2 : 1))
+`endif
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+//     Section:        Max/Min Macros
+//     Desc:           Standard binary max/min macros
+//------------------------------------------------------------------------------
+`ifdef MACROSAFE
+`define max(x,y)       ((x) > (y) ? (x) : (y))
+`define min(x,y)       ((x) < (y) ? (x) : (y))
+`endif
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+//     Section:        Integer Division Macros
+//     Desc:           Rounding and ceiling for integer division
+//------------------------------------------------------------------------------
+`ifdef MACROSAFE
+`define        divceil(x,y)    (((x) + ((y) - 1)) / (y))
+`define        divrnd(x,y)     (((x) + ((y) >> 1)) / (y))
+`endif
+//------------------------------------------------------------------------------
diff --git a/src/edu/berkeley/fleet/fpga/greg/DDR2SDRAM.v b/src/edu/berkeley/fleet/fpga/greg/DDR2SDRAM.v
new file mode 100644 (file)
index 0000000..bba6f40
--- /dev/null
@@ -0,0 +1,424 @@
+//==============================================================================
+//     File:           $URL: svn+ssh://repositorypub@repository.eecs.berkeley.edu/public/Projects/GateLib/branches/dev/Firmware/DRAM/Hardware/DDR2SDRAM/DDR2SDRAM.v $
+//     Version:        $Revision: 16878 $
+//     Author:         Greg Gibeling (http://gdgib.gotdns.com/~gdgib/)
+//     Copyright:      Copyright 2005-2008 UC Berkeley
+//==============================================================================
+
+//==============================================================================
+//     Section:        License
+//==============================================================================
+//     Copyright (c) 2005-2008, Regents of the University of California
+//     All rights reserved.
+//
+//     Redistribution and use in source and binary forms, with or without modification,
+//     are permitted provided that the following conditions are met:
+//
+//             - Redistributions of source code must retain the above copyright notice,
+//                     this list of conditions and the following disclaimer.
+//             - Redistributions in binary form must reproduce the above copyright
+//                     notice, this list of conditions and the following disclaimer
+//                     in the documentation and/or other materials provided with the
+//                     distribution.
+//             - Neither the name of the University of California, Berkeley nor the
+//                     names of its contributors may be used to endorse or promote
+//                     products derived from this software without specific prior
+//                     written permission.
+//
+//     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+//     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+//     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+//     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+//     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+//     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+//     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+//     ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+//     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+//     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//==============================================================================
+
+//==============================================================================
+//     Includes
+//==============================================================================
+`include "Const.v"
+//==============================================================================
+
+//------------------------------------------------------------------------------
+//     Module:         DDR2SDRAM
+//     Based On:       svn+ssh://repositorypub@repository.eecs.berkeley.edu/public/Projects/GateLib/trunk/Firmware/DRAM/DRAM.v
+//     Author:         <a href="http://gdgib.gotdns.com/~gdgib/">Greg Gibeling</a>
+//     Version:        $Revision: 16878 $
+//------------------------------------------------------------------------------
+module DDR2SDRAM(
+                       //------------------------------------------------------------------
+                       //      Clock & Reset Inputs
+                       //------------------------------------------------------------------
+                       Clock,
+                       ClockD2,
+                       ClockP90,
+                       Reset,
+                       Locked,
+                       ClockF200,
+                       //------------------------------------------------------------------
+       
+                       //------------------------------------------------------------------
+                       //      Status Outputs
+                       //------------------------------------------------------------------
+                       Initialized,
+                       PoweredUp,
+                       //------------------------------------------------------------------
+       
+                       //------------------------------------------------------------------
+                       //      Command Interface
+                       //------------------------------------------------------------------
+                       CommandClock,
+                       CommandReset,
+                       
+                       CommandAddress,
+                       Command,
+                       CommandValid,
+                       CommandReady,
+                       //------------------------------------------------------------------
+       
+                       //------------------------------------------------------------------
+                       //      Data Input (Write) Interface
+                       //------------------------------------------------------------------
+                       DataInClock,
+                       DataInReset,
+                       
+                       DataIn,
+                       DataInMask,
+                       DataInValid,
+                       DataInReady,
+                       //------------------------------------------------------------------
+       
+                       //------------------------------------------------------------------
+                       //      Data Output (Read) Interface
+                       //------------------------------------------------------------------
+                       DataOutClock,
+                       DataOutReset,
+                       
+                       DataOut,
+                       DataOutErrorChecked,
+                       DataOutErrorCorrected,
+                       DataOutValid,
+                       DataOutReady,
+                       //------------------------------------------------------------------
+                       
+                       //------------------------------------------------------------------
+                       //      DDR2 Memory Interface
+                       //------------------------------------------------------------------
+                       DDR2_DQ,
+                       DDR2_A,
+                       DDR2_BA,
+                       DDR2_RAS_B,
+                       DDR2_CAS_B,
+                       DDR2_WE_B,
+                       DDR2_CS0_B,
+                       DDR2_ODT,
+                       DDR2_CKE,
+                       DDR2_DM,
+                       DDR2_DQS_P,
+                       DDR2_DQS_N,
+                       DDR2_CLK_P,
+                       DDR2_CLK_N
+                       //------------------------------------------------------------------
+       );
+       //--------------------------------------------------------------------------
+       //      Parameters
+       //--------------------------------------------------------------------------
+       parameter                               UWidth =                                8,      // This will almost ALWAYS be 8
+                                                       BAWidth =                               2,      // Setup the addressing line for the ML505@256MB, Address components are listed from most to least significant
+                                                       RAWidth =                               13,
+                                                       CAWidth =                               10,                                                     
+                                                       DWidth =                                128, // 128b SDR internal transfers
+                                                       BurstLen =                              2,      // 256b total burst, 2 words DWidth words at SDR, or 4 external words at DDR
+                                                       EnableMask =                    1,
+                                                       EnableECC =                             0,
+                                                       Board =                                 0,      // 0 - ML505, 1 - BEE3A, 2 - BEE3B
+                                                       MultiClock =                    0,      // 0 - Use Clock & Reset for everything, 1 - Use the individual FIFO clock and resets
+                                                       AdditiveLatency =               0;
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      Fixed Constants
+       //--------------------------------------------------------------------------
+       `ifdef MACROSAFE
+       localparam                              UCount =                                DWidth / UWidth,
+                                                       MWidth =                                (EnableECC || (EnableMask == 0)) ? 0 : UCount, // 128b/8 = 16b per mask means per-byte masking                          
+                                                       UAWidth =                               `log2(UCount-1), // Unused lower address bits, the -1 is to get a proper log2
+                                                       XAWidth =                               `max(RAWidth, CAWidth),
+                                                       TAWidth =                               CAWidth + RAWidth + BAWidth,
+                                                       AWidth =                                TAWidth + UAWidth - 1, // Note that the components are in order according to where in the address they appear, the -1 is to account for DDR
+                                                       CommandMask =                   7'b0000011, // Read & Write only
+                                                       ECheck =                                EnableECC ? 2 : 0,
+                                                       ECorrect =                              EnableECC ? 1 : 0,
+                                                       CWidth =                                3,
+                                                       COMMAND_Write =                 3'b000,
+                                                       COMMAND_Read =                  3'b001,
+                                                       COMMAND_Flush =                 3'b010,
+                                                       COMMAND_Refresh =               3'b011,
+                                                       COMMAND_AutoRefresh =   3'b100,
+                                                       COMMAND_PowerDown =             3'b101,
+                                                       COMMAND_PowerUp =               3'b110,
+                                                       EHWidth =                               `max(`log2(ECheck), 1),
+                                                       ERWidth =                               `max(`log2(ECheck), 1);
+       `endif
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      Per-Board Settings
+       //--------------------------------------------------------------------------
+       // SHOULD BE A CASE BASED ON "Board"
+       localparam                              CKE_WIDTH =                             Board ? 1 : 1,
+                                                       CLK_WIDTH =                             Board ? 1 : 2,
+                                                       CS_NUM =                                Board ? 1 : 1,
+                                                       CS_WIDTH =                              Board ? 1 : 1,
+                                                       DM_WIDTH =                              EnableMask ? (UCount / 2) : 0,
+                                                       DQ_WIDTH =                              (DWidth / 2) + (EnableECC ? (DWidth / 8) : 0),
+                                                       DQ_PER_DQS =                    Board ? 4 : 8,
+                                                       DQS_WIDTH =                             DQ_WIDTH/DQ_PER_DQS,
+                                                       ODT_WIDTH =                             Board ? 1 : 1,
+                                                       ADDITIVE_LAT =                  AdditiveLatency, 
+                                                       CAS_LAT =                               Board ? 4 : 4,
+                                                       MULTI_BANK_EN =                 Board ? 1 : 1,
+                                                       TWO_T_TIME_EN =                 Board ? 0 : 1,
+                                                       ODT_TYPE =                              Board ? 3 : 1,
+                                                       REDUCE_DRV =                    Board ? 1 : 0,
+                                                       TREFI_NS =                              Board ? 7800 : 7800,
+                                                       TRAS =                                  Board ? 40000 : 40000,
+                                                       TRCD =                                  Board ? 15000 : 15000,
+                                                       TRFC =                                  Board ? 127500 : 105000,
+                                                       TRP =                                   Board ? 15000 : 15000,
+                                                       TRTP =                                  Board ? 7500 : 7500,
+                                                       TWR =                                   Board ? 15000 : 15000,
+                                                       TWTR =                                  Board ? 10000 : 7500,
+                                                       HIGH_PERFORMANCE_MODE = "TRUE",
+                                                       DQS_IO_COL =                    Board ? ((Board == 2) ? 36'b010000000000000000000101101010101010 : 36'b010000000000000000000101101010101010) : 16'b0000000000000000,
+                                                       DQ_IO_MS =                              Board ? ((Board == 2) ? 72'b11011110_10011100_11001001_00111000_01101110_01100101_10101100_01011010_10011010 : 72'b11011010_10110001_01110101_10110100_10101100_01100101_11010100_11101000_10100011) : 64'b01110101_00111101_00001111_00011110_00101110_11000011_11000001_10111100,
+                                                       CLK_PERIOD =                    Board ? 3750 : 3750;
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      Internal Constants
+       //--------------------------------------------------------------------------
+       `ifdef MODELSIM
+       localparam                              SIM_ONLY =                              1;
+       `else
+       localparam                              SIM_ONLY =                              0;
+       `endif
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      Clock & Reset Inputs
+       //--------------------------------------------------------------------------
+       input                                   Clock, ClockD2, ClockP90, Reset, Locked;
+       input                                   ClockF200;
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      Status Outputs
+       //--------------------------------------------------------------------------
+       output                                  Initialized;
+       output                                  PoweredUp;
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      Command Interface
+       //--------------------------------------------------------------------------
+       input                                   CommandClock, CommandReset;
+       
+       input   [AWidth-1:0]    CommandAddress;
+       input   [CWidth-1:0]    Command; 
+       input                                   CommandValid;
+       output                                  CommandReady;
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      Data Input (Write) Interface
+       //--------------------------------------------------------------------------
+       input                                   DataInClock, DataInReset;
+       
+       input   [DWidth-1:0]    DataIn;
+       input   [MWidth-1:0]    DataInMask;
+       input                                   DataInValid;
+       output                                  DataInReady;
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      Data Output (Read) Interface
+       //--------------------------------------------------------------------------
+       input                                   DataOutClock, DataOutReset;
+       
+       output  [DWidth-1:0]    DataOut;
+       output  [EHWidth-1:0]   DataOutErrorChecked;
+       output  [ERWidth-1:0]   DataOutErrorCorrected;
+       output                                  DataOutValid;
+       input                                   DataOutReady;
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      DDR2 Memory Interface
+       //--------------------------------------------------------------------------
+       inout   [DQ_WIDTH-1:0]  DDR2_DQ;
+       output  [XAWidth-1:0]   DDR2_A;
+       output  [BAWidth-1:0]   DDR2_BA;
+       output                                  DDR2_RAS_B;
+       output                                  DDR2_CAS_B;
+       output                                  DDR2_WE_B;
+       output  [CS_WIDTH-1:0]  DDR2_CS0_B;
+       output                                  DDR2_ODT;
+       output                                  DDR2_CKE;
+       output  [DM_WIDTH-1:0]  DDR2_DM;
+       inout   [DQS_WIDTH-1:0] DDR2_DQS_P;
+       inout   [DQS_WIDTH-1:0] DDR2_DQS_N;
+       output  [CLK_WIDTH-1:0] DDR2_CLK_P;
+       output  [CLK_WIDTH-1:0] DDR2_CLK_N;
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      Internal Wires
+       //--------------------------------------------------------------------------
+       wire                                    app_af_afull, app_wdf_afull, rd_data_valid;
+       wire    [1:0]                   rd_ecc_error;
+       
+       wire                                    Gate;
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      Assign Statements
+       //--------------------------------------------------------------------------
+       assign  CommandReady =                                                  ~app_af_afull & Gate;
+       assign  DataInReady =                                                   ~app_wdf_afull & Gate;
+       assign  DataOutValid =                                                  rd_data_valid & Gate;
+       
+       assign  DataOutErrorChecked =                                   EnableECC ? rd_ecc_error[1] : {EHWidth{1'b0}};
+       assign  DataOutErrorCorrected =                                 EnableECC ? rd_ecc_error[0] : {ERWidth{1'b0}};
+       
+       assign  PoweredUp =                                                             1'b1;
+       assign  Gate =                                                                  Initialized & ~Reset;
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      Checks
+       //--------------------------------------------------------------------------
+       `ifdef MODELSIM
+               initial if ((Board < 0) || (Board > 2)) $display("ERROR: Board type ", Board, " is not supported 0 - ML505, 1 - BEE3A, 2 - BEE3B are valid!");
+               initial if (Board == 0) begin
+                       if (DWidth != 128) $display("ERROR: ML505 supports only 128b DWidth!");
+                       if (UWidth != 8) $display("ERROR: ML505 supports only 8b UWidth!");
+                       if (EnableMask == 0) $display("ERROR: ML505 requires mask!");
+                       if (EnableECC != 0) $display("ERROR: ML505 does not support ECC!");
+               end
+               initial if ((Board == 1) || (Board == 2)) begin
+                       if (EnableECC != 0) begin
+                               if (DWidth != 128) $display("ERROR: BEE3 supports only 128b DWidth when ECC is enabled!");
+                       end else begin
+                               if ((DWidth != 144) && (DWidth != 128)) $display("ERROR: BEE3 supports 128 or 144 DWidth when ECC is disabled!");
+                       end
+                       if (UWidth != 8) $display("ERROR: BEE3 supports only 8b UWidth!");
+                       if (EnableMask != 0) $display("ERROR: BEE3 does not support mask!");
+               end
+               initial if ((DQ_WIDTH % UWidth) != 0) $display("ERROR: External data transfer width must be a multiple of the unit width: ", DQ_WIDTH, " is not a multiple of ", UWidth);
+               initial if ((BurstLen != 2) && (BurstLen != 4)) $display("ERROR: Interface burst length must be 2 or 4 (4 or 8 at DDR) not: ", BurstLen);
+               initial if (TAWidth > 31) $display("ERROR: MIG2.3 core only supports up to 31 internal address bits (TAWidth > 31 is invalid)!");
+               always @ (posedge Clock) begin
+                       if (CommandReady & CommandValid & ~((CommandMask >> Command) & 1'b1)) $display("ERROR: Unsupported memory command: ", Command);
+               end
+       `endif
+       //--------------------------------------------------------------------------
+       
+       //--------------------------------------------------------------------------
+       //      MIG 2.x DDR2 SDRAM Controller
+       //--------------------------------------------------------------------------
+       ddr2_sdram              #(                      .BANK_WIDTH(            BAWidth),               // # of memory bank addr bits
+                                                               .CKE_WIDTH(                     CKE_WIDTH),             // # of memory clock enable outputs
+                                                               .CLK_WIDTH(                     CLK_WIDTH),             // # of clock outputs
+                                                               .COL_WIDTH(                     CAWidth),               // # of memory column bits
+                                                               .CS_NUM(                        CS_NUM),                // # of separate memory chip selects
+                                                               .CS_WIDTH(                      CS_WIDTH),              // # of total memory chip selects
+                                                               .CS_BITS(                       `log2(CS_NUM)), // set to log2(CS_NUM) (rounded up)
+                                                               .DM_WIDTH(                      DM_WIDTH),              // # of data mask bits
+                                                               .DQ_WIDTH(                      DQ_WIDTH),              // # of data width
+                                                               .DQ_PER_DQS(            DQ_PER_DQS),    // # of DQ data bits per strobe
+                                                               .DQS_WIDTH(                     DQS_WIDTH),             // # of DQS strobes
+                                                               .DQ_BITS(                       `log2(DQ_WIDTH-1)), // set to log2(DQS_WIDTH*DQ_PER_DQS)
+                                                               .DQS_BITS(                      `log2((DQ_WIDTH/DQ_PER_DQS)-1)), // set to log2(DQS_WIDTH)
+                                                               .ODT_WIDTH(                     ODT_WIDTH),             // # of memory on-die term enables
+                                                               .ROW_WIDTH(                     RAWidth),               // # of memory row and # of addr bits
+                                                               .ADDITIVE_LAT(          ADDITIVE_LAT),  // additive write latency 
+                                                               .BURST_LEN(                     BurstLen * 2),  // burst length (in double words)
+                                                               .BURST_TYPE(            0),                             // burst type (=0 seq; =1 interleaved), totally useless thanks to the DDR to SDR conversion
+                                                               .CAS_LAT(                       CAS_LAT),               // CAS latency
+                                                               .ECC_ENABLE(            EnableECC),             // enable ECC (=1 enable)
+                                                               .APPDATA_WIDTH(         DWidth),                // # of usr read/write data bus bits
+                                                               .MULTI_BANK_EN(         MULTI_BANK_EN), // Keeps multiple banks open. (= 1 enable)
+                                                               .TWO_T_TIME_EN(         TWO_T_TIME_EN), // 2t timing for unbuffered dimms
+                                                               .ODT_TYPE(                      ODT_TYPE),              // ODT (=0(none),=1(75),=2(150),=3(50))
+                                                               .REDUCE_DRV(            REDUCE_DRV),    // reduced strength mem I/O (=1 yes)
+                                                               .REG_ENABLE(            0),                             // registered addr/ctrl (=1 yes), cannot be changed without rerunning MIG/CoreGen
+                                                               .TREFI_NS(                      TREFI_NS),              // auto refresh interval (ns)
+                                                               .TRAS(                          TRAS),                  // active->precharge delay
+                                                               .TRCD(                          TRCD),                  // active->read/write delay
+                                                               .TRFC(                          TRFC),                  // refresh->refresh, refresh->active delay
+                                                               .TRP(                           TRP),                   // precharge->command delay
+                                                               .TRTP(                          TRTP),                  // read->precharge delay
+                                                               .TWR(                           TWR),                   // used to determine write->precharge
+                                                               .TWTR(                          TWTR),                  // write->read delay
+                                                               .HIGH_PERFORMANCE_MODE(HIGH_PERFORMANCE_MODE), // TRUE, the IODELAY performance mode is set to high. FALSE, the IODELAY performance mode is set to low.       
+                                                               .SIM_ONLY(                      SIM_ONLY),              // = 1 to skip SDRAM power up delay
+                                                               .DEBUG_EN(                      0),                             // Enable debug signals/controls
+                                                               .DQS_IO_COL(            DQS_IO_COL),    // I/O column location of DQS groups (=0, left; =1 center, =2 right)
+                                                               .DQ_IO_MS(                      DQ_IO_MS),              // Master/Slave location of DQ I/O (=0 slave) 
+                                                               .CLK_PERIOD(            CLK_PERIOD),    // Core/Memory clock period (in ps)
+                                                               .RST_ACT_LOW(           0),                             // =1 for active low reset, =0 for active high
+                                                               .EN_SYN(                        MultiClock ? "FALSE" : "TRUE"))
+                                       mig20(          .ddr2_dq(                       DDR2_DQ),
+                                                               .ddr2_a(                        DDR2_A),
+                                                               .ddr2_ba(                       DDR2_BA),
+                                                               .ddr2_ras_n(            DDR2_RAS_B),
+                                                               .ddr2_cas_n(            DDR2_CAS_B),
+                                                               .ddr2_we_n(                     DDR2_WE_B),
+                                                               .ddr2_cs_n(                     DDR2_CS0_B),
+                                                               .ddr2_odt(                      DDR2_ODT),
+                                                               .ddr2_cke(                      DDR2_CKE),
+                                                               .ddr2_dm(                       DDR2_DM),
+                                                               .ddr2_dqs(                      DDR2_DQS_P),
+                                                               .ddr2_dqs_n(            DDR2_DQS_N),
+                                                               .ddr2_ck(                       DDR2_CLK_P),
+                                                               .ddr2_ck_n(                     DDR2_CLK_N),
+
+                                                               .sys_rst_n(                     Reset),
+                                                               .phy_init_done(         Initialized),
+                                                               .dcm_lock(                      Locked),
+                                                               .rst0_tb(                       ),
+                                                               .clk0_tb(                       ),
+                                                               .clk0(                          Clock),
+                                                               .clk90(                         ClockP90),
+                                                               .clk200(                        ClockF200),
+                                                               .clkdiv0(                       ClockD2),
+
+                                                               .af_clk(                        MultiClock ? CommandClock : Clock),
+                                                               .af_rst(                        MultiClock ? CommandReset : Reset),
+                                                               .app_af_afull(          app_af_afull),
+                                                               .app_af_wren(           CommandValid & (~|Command[2:1]) & CommandReady & Gate),
+                                                               .app_af_addr(           {{((30 - AWidth) + UAWidth){1'b0}}, CommandAddress[AWidth-1:UAWidth], 1'b0}), // The final 1'b0 is to compensate for the DDR to SDR conversion
+                                                               .app_af_cmd(            {2'b00, Command[0]}),
+                                                               
+                                                               .wb_clk(                        MultiClock ? DataInClock : Clock),
+                                                               .wb_rst(                        MultiClock ? DataInReset : Reset),
+                                                               .app_wdf_afull(         app_wdf_afull),
+                                                               .app_wdf_wren(          DataInValid & DataInReady & Gate),
+                                                               .app_wdf_data(          DataIn),
+                                                               .app_wdf_mask_data(     DataInMask),                                                                                                            
+                                                               
+                                                               .rb_clk(                        MultiClock ? DataOutClock : Clock),
+                                                               .rb_rst(                        MultiClock ? DataOutReset : Reset),
+                                                               .rd_data_valid(         rd_data_valid),
+                                                               .rd_ecc_error(          rd_ecc_error),
+                                                               .rd_data_rden(          DataOutReady & Gate),
+                                                               .rd_data_fifo_out(      DataOut),
+                                                               .rb_full(                       ));
+       //--------------------------------------------------------------------------
+endmodule      
+//------------------------------------------------------------------------------
diff --git a/src/edu/berkeley/fleet/fpga/greg/Readme.txt b/src/edu/berkeley/fleet/fpga/greg/Readme.txt
new file mode 100644 (file)
index 0000000..2cd7345
--- /dev/null
@@ -0,0 +1,110 @@
+================================================================================
+DDR2SDRAM MIG Core v1.2008.10.16 - by Greg Gibeling - Copyright 2005-2008 UC Berkeley
+
+File:        $URL: svn+ssh://repositorypub@repository.eecs.berkeley.edu/public/Projects/GateLib/branches/dev/Firmware/DRAM/Hardware/DDR2SDRAM/MIG/Readme.txt $
+Version:     $Revision: 16601 $
+Author:      Greg Gibeling (http://gdgib.gotdns.com/~gdgib/)
+Copyright:   Copyright 2005-2008 UC Berkeley
+================================================================================
+
+================================================================================
+Copyright (c) 2005-2008, Regents of the University of California
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    - Redistributions of source code must retain the above copyright notice,
+        this list of conditions and the following disclaimer. 
+    - Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer
+        in the documentation and/or other materials provided with the
+        distribution. 
+    - Neither the name of the University of California, Berkeley nor the
+        names of its contributors may be used to endorse or promote
+        products derived from this software without specific prior
+        written permission. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+================================================================================
+
+================================================================================
+Licensing
+       Note that the BSD license above only applies to this file and any changes to
+       the MIG design after it was generated by CoreGen/MIG, not to the complete
+       MIG generated core!
+
+Board Instructions
+       Set ML505 board to 200MHz -> 01001010 = 0x4A
+
+Instructions for Updating MIG version
+       Xilinx Webupdate (MIG gets updated a lot)
+       Launch Coregen
+       Create a project in any empty directory
+               ML505/Externals/DDR2CoreGen
+               May need to rename the mig_xx directory to match new core name
+               commit
+               delete all files from working copy only
+       Project Settings
+               XC5VLX50T, FFG1136, -1C
+               Verilog Output, XST, NGC, B[n:m] busses
+       Memories & Storage Elements -> Memory Interface Generator -> MIG2.3
+               Option1: Update the Design using MIG
+                       You will need to provide the prj file & ucf
+                       MIG will generate a complete new design with the same options
+               Option2: Recreate Design from Scratch
+                       Page1
+                               Create a design
+                               Name: ???
+                               1 Controller
+                       Page2 - No additional parts (why bother?)
+                       Page3 - Use DDR2 SDRAM
+                       Page4
+                               Speed: 266MHz
+                               Type: SODIMMs
+                               Part: MT4HTF3264HY-53E
+                               Width: 64b
+                               DataMask: Enable
+                       Page5 (default)
+                               Burst Length: 4
+                               Burst Type: Sequential
+                       Page6 (default)
+                               Output Drive Strength: Fullstrength
+                               RTT (nominal) - ODT: 75ohms
+                               Additive Latency (AL): 0
+                       Page7
+                               Use DCM: disable
+                               DCI for DQ/DQS: Enable
+                               DCI for Address/Control: Disable
+                               SSTL for Address/Control: Class II
+                               Debug Signals: Disable
+                       Page8 - Pin reservations, no change, empty on right
+                       Page9 - Defaults
+                       Page10 - Summary
+                       Page11 - Accept license for sim models
+                       Page12 - Ignore
+                       Page13 - Design Notes (may want to print)
+       Merge into SVN
+               can use diff to review changes
+               old version had ML505 overlay in it, make sure relevant changes stay (icon/ila/chipscope of course, but also the UCF!)
+               commit new version
+       Merge into ML505/Hardware/DDR2SDRAM
+               Delete old files (make sure changes had been commited)
+               copy the user_design/RTL into Hardwre/DDR2SDRAM/MIG
+               Use diff to review changes
+               Make sure ML505 specific changes stick around
+                       file not in MIG: ddr2_sdram.ucf
+                       multiple changes: ddr2_sdram.v
+                       rd_data_rden: ddr2_mem_if_top, ddr2_top, ddr2_usr_top, ddr2_usr_rd
+                       rd_fifo_clear, wr_fifo_clear: ddr2_usr_rd, ddr2_usr_wr, ddr2_usr_top
+               Update DDR2SDRAM.v with any new parameters or signals
+================================================================================
\ No newline at end of file
diff --git a/src/edu/berkeley/fleet/fpga/greg/asyncfifo_dmem_1b.ngc b/src/edu/berkeley/fleet/fpga/greg/asyncfifo_dmem_1b.ngc
new file mode 100644 (file)
index 0000000..8abaea6
--- /dev/null
@@ -0,0 +1,3 @@
+XILINX-XDB 0.1 STUB 0.1 ASCII
+XILINX-XDM V1.4e
+$0de\7f4g<,[o}e~g`n;"2*413&;$>"9 > %3844=6;2;%<<>41;KMTPR=l`d7<7>11392>JSSX\^1hb{{<183:4=5<299?>?44g96ri~28}lm=<=9-03827=1<=1=557;;7;:246<>03=5;86974:2<?19l1=6D@_UU8svjaXmdz\7fuRzgrdqk80<76;?0:7GAPTV9twi`Wlg{xtQ{hsgplZgt{lx\7f084?>3782?OIX\^1|\7fah_dosp|Ys`{oxdR`jg`vf80<768n0:7GAPTV9twi`Wo\7fg`Rzgrdqk80<76;>0:7GAPTV9twi`Wo\7fg`Rzgrdqk[dutm{~797>12595>LHW]]0{~biPftno[qnumzbTbhintd>6>58602<1CXZ_UU8geqgXkf\7fex1;50?3a?3<H]]Z^X7|k_ecweZeh}g~797>11c95>JSSX\^1{\7fQkauc\gjsi|5?1<3<46B682FED02<HONMLCB33?3EDKJIHONMLCBA@G7=1M8:04=>72121056399::==570521446709>;>=?>;868=56>9912<=7?0123456>89397487;845=3?>>;1J<?5N139B67=F;;1J8?5N589BW\HDW[OL?6LZ299AQVYNFOE:7N:4C@:B6>EB9;1HDHMD_MK@AKUBW]S[I>5LLS;8GJKJA]^NH<:4CNPF[BCIM[K_EB@PIODL3>EUMH^NH<5K119GBC@ALOLMJIHIFGF0?AVH=2N[^L>:;ERQE43<LYXJ>85KPSC01>BWZH>>7I^]A408BA5<NMI?7KJLE29E@U2<NMZN==5IF1;3=5?AN9:;<=>>0:DE<5>709LM<=>?0178Bdjtm81L?6IAD09J6>O7:2C:>6G=2:K06>O3:2C>56GAIUQWEQC43@D]?6G@B79OKDBBL>1GCJGLAM68HPR5<2F^X>:4LTV70>JR\<>0@XZ95:OPCJHd3Dkac\7fXjrrkljf=JageyZh||inl0?K53;2D8:>5A4218J1243G>=?6@;829M0<5<F<397C8<;O437>H19o1ENRLZSQKM[UTHXZ=0BHZXOSI2?J4<GB;0\95_ASVb?UOIWK_XEIVm;QKM[GSTFHGN?6^]E09R<>TFE?>S=I84SNWQG@5<[YE?7YW_E218Q5)`zo$yj"ilx/aoo})JpfxT~iQ\7fnup\flhXpfx;<=>PRdqvhq74;2_;#j|i.sd,cf~)keas#@v`r^pg[uhszVhbbRv`r1235ZTb{|f\7f=>=4U1-dvc(un&mht#mcky-N|jtXzmU{by|Pbhl\|jt7898T^h}zlu307>S7'nxm"\7fh gbz-gim\7f'Drd~R|k_qlwvZdnfVrd~=>?3^Pfwpjs9:90Y=!hrg,qb*adp'iggu!Bxnp\vaYwf}xTnd`Pxnp3452XZly~`y?<2:W3+bta&{l$knv!cmi{+H~hzVxoS}`{r^e`[}iu89:;S_k|umv277=R8&myj#|i/fa{*fjlp&Gsc\7fQ}d^rmpwY`kVrd~=>?1^Pfwpjs9:80Y=!hrg,qb*adp'iggu!Bxnp\vaYwf}xTknQwos2347YUmz\7fgx<==;T2,cw`)zo%lou lljz,I}iuW{nT|cz}_fa\|jt7899T^h}zlu306>S7'nxm"\7fh gbz-gim\7f'Drd~R|k_qlwvZadWqey<=>;_Sgpqir6;:1^<"i}f/pe+be\7f&jf`t"Cwos]tvZvi|{UiecQwos2345YUmz\7fgx<=<;T2,cw`)zo%lou lljz,I}iuW~xT|cz}_ckm[}iu89::S_k|umv276=R8&myj#|i/fa{*fjlp&Gsc\7fQxr^rmpwYeagUsc\7f>?03]Qavsk|8987X> gsd-vc)`kq$h`fv Mymq[rtXxg~ySoga_ymq4564W[oxyaz>329V4*aun'xm#jmw.bnh|*K\7fg{U|~R~ats]amkY\7fg{:;<9Q]erwop4553\:$k\7fh!rg-dg}(ddbr$Aua}_vp\tkruWniTtb|?012\V`urd};8>6[?/fpe*w`(ojr%oaew/LzlvZquWyd\7f~Ril_ymq4566W[oxyaz>339V4*aun'xm#jmw.bnh|*K\7fg{U|~R~ats]dgZ~hz9:;>R\jstnw564<]9%l~k }f.e`|+ekcq%Ftb|Pws]sjqtXojUsc\7f>?02]Qavsk|8997X> gsd-vc)`kq$h`fv Mymq[rtXxg~ySjmPxnp3452XZly~`y?=1:W3+bta&{l$knv!cmi{+wbXxg~ySoga<1<15>S7'nxm"\7fh gbz-gim\7f'{nT|cz}_ckm848592_;#j|i.sd,cf~)keas#\7fjPpovq[goi4;49=6[?/fpe*w`(ojr%oaew/sf\tkruWkce0>0=1:W3+bta&{l$knv!cmi{+wbXxg~ySoga<5<15>S7'nxm"\7fh gbz-gim\7f'{nT|cz}_ckm808582_;#j|i.sd,cf~)keas#\7fjPpovq[goiW98;7X> gsd-vc)`kq$h`fv re]sjqtXj`dT=?>4U1-dvc(un&mht#mcky-q`Zvi|{UiecQ=219V4*aun'xm#jmw.bnh|*tcWyd\7f~Rlfn^114>S7'nxm"\7fh gbz-gim\7f'{nT|cz}_ckm[1473\:$k\7fh!rg-dg}(ddbr$~iQ\7fnup\flhX=;<0Y=!hrg,qb*adp'iggu!}d^rmpwYeagUsc\7f>?0105?P6(o{l%~k!hcy,`hn~(zmU{by|Pbhl\|jt789;9:6[?/fpe*w`(ojr%oaew/sf\tkruWkceSua}012163=R8&myj#|i/fa{*fjlp&xoS}`{r^`jjZ~hz9:;??84U1-dvc(un&mht#mcky-q`Zvi|{UiecQwos2341473\:$k\7fh!rg-dg}(ddbr$~iQ\7fnup\cf:76;:0Y=!hrg,qb*adp'iggu!}d^rmpwY`k5;5>=5Z0.eqb+ta'nis"nbdx.pg[uhszVmh0?0=0:W3+bta&{l$knv!cmi{+wbXxg~ySjm33?03?P6(o{l%~k!hcy,`hn~(zmU{by|Pgb>7:76<]9%l~k }f.e`|+ekcq%yhR~ats]dg9399o1^<"i}f/pe+be\7f&jf`t"|k_qlwvZadW9;m7X> gsd-vc)`kq$h`fv re]sjqtXojU:=k5Z0.eqb+ta'nis"nbdx.pg[uhszVmhS??i;T2,cw`)zo%lou lljz,vaYwf}xTknQ<1g9V4*aun'xm#jmw.bnh|*tcWyd\7f~Ril_53e?P6(o{l%~k!hcy,`hn~(zmU{by|Pgb]663=R8&myj#|i/fa{*fjlp&xoS}`{r^e`[duumn6;2?84U1-dvc(un&mht#mcky-q`Zvi|{UloRo|rde?5;413\:$k\7fh!rg-dg}(ddbr$~iQ\7fnup\cfYf{{ol0?0=6:W3+bta&{l$knv!cmi{+wbXxg~ySjmParpfc959:?1^<"i}f/pe+be\7f&jf`t"|k_qlwvZadWhyyij2;>348Q5)`zo$yj"ilx/aoo})ulVzex\7fQhc^cpv`a;=78>7X> gsd-vc)`kq$h`fv re]sjqtXojUj\7f\7fkh_106?P6(o{l%~k!hcy,`hn~(zmU{by|Pgb]bwwc`W88>7X> gsd-vc)`kq$h`fv re]sjqtXojUj\7f\7fkh_306?P6(o{l%~k!hcy,`hn~(zmU{by|Pgb]bwwc`W:8>7X> gsd-vc)`kq$h`fv re]sjqtXojUj\7f\7fkh_506?P6(o{l%~k!hcy,`hn~(zmU{by|Pgb]bwwc`W<827X> gsd-vc)`kq$h`fv re]sjqtXojUj\7f\7fkh_e3?4;4>3\:$k\7fh!rg-dg}(ddbr$~iQ\7fnup\cfYf{{olSi?31?0:?P6(o{l%~k!hcy,`hn~(zmU{by|Pgb]bwwc`Wm;7>3<6;T2,cw`)zo%lou lljz,vaYwf}xTknQnssgd[a7;;7827X> gsd-vc)`kq$h`fv re]sjqtXojUj\7f\7fkh_e3?0;4>3\:$k\7fh!rg-dg}(ddbr$~iQ\7fnup\cfYf{{olSi?35?0;?P6(o{l%~k!hcy,`hn~(zmU{by|Pgb]bwwc`Wm;T<?64U1-dvc(un&mht#mcky-q`Zvi|{UloRo|rde\`4Y6:11^<"i}f/pe+be\7f&jf`t"|k_qlwvZadWhyyijQk1^01<>S7'nxm"\7fh gbz-gim\7f'{nT|cz}_fa\evtboVn:S><7;T2,cw`)zo%lou lljz,vaYwf}xTknQnssgd[a7X<;20Y=!hrg,qb*adp'iggu!}d^rmpwY`kVkx~hiPd0]660=R8&myj#|i/fa{*fjlp&xoS}`{r^e`[}iu89:;>85Z0.eqb+ta'nis"nbdx.pg[uhszVmhSua}012260=R8&myj#|i/fa{*fjlp&xoS}`{r^e`[}iu89:9>85Z0.eqb+ta'nis"nbdx.pg[uhszVmhSua}012060=R8&myj#|i/fa{*fjlp&xoS}`{r^e`[}iu89:?><5Z0.eqb+ta'nis"nbdx.uq[uhszVhbb1>1209V4*aun'xm#jmw.bnh|*quWyd\7f~Rlfn=3=64=R8&myj#|i/fa{*fjlp&}yS}`{r^`jj949:81^<"i}f/pe+be\7f&jf`t"y}_qlwvZdnf595><5Z0.eqb+ta'nis"nbdx.uq[uhszVhbb1:1209V4*aun'xm#jmw.bnh|*quWyd\7f~Rlfn=7=65=R8&myj#|i/fa{*fjlp&}yS}`{r^`jjZ6582_;#j|i.sd,cf~)keas#z|Ppovq[goiW88;7X> gsd-vc)`kq$h`fv ws]sjqtXj`dT>?>4U1-dvc(un&mht#mcky-tvZvi|{UiecQ<219V4*aun'xm#jmw.bnh|*quWyd\7f~Rlfn^614>S7'nxm"\7fh gbz-gim\7f'~xT|cz}_ckm[0413\:$k\7fh!rg-dg}(ddbr${\7fQ\7fnup\flhXpfx;<=>=6:W3+bta&{l$knv!cmi{+rtXxg~ySoga_ymq4566:?1^<"i}f/pe+be\7f&jf`t"y}_qlwvZdnfVrd~=>?2348Q5)`zo$yj"ilx/aoo})pzVzex\7fQmio]{kw678:8=7X> gsd-vc)`kq$h`fv ws]sjqtXj`dTtb|?01614>S7'nxm"\7fh gbz-gim\7f'~xT|cz}_fa?4;473\:$k\7fh!rg-dg}(ddbr${\7fQ\7fnup\cf:66;:0Y=!hrg,qb*adp'iggu!xr^rmpwY`k585>=5Z0.eqb+ta'nis"nbdx.uq[uhszVmh0>0=0:W3+bta&{l$knv!cmi{+rtXxg~ySjm34?03?P6(o{l%~k!hcy,`hn~(\7f{U{by|Pgb>6:4`<]9%l~k }f.e`|+ekcq%|~R~ats]dgZ66n2_;#j|i.sd,cf~)keas#z|Ppovq[beX98l0Y=!hrg,qb*adp'iggu!xr^rmpwY`kV8:j6[?/fpe*w`(ojr%oaew/vp\tkruWniT?<h4U1-dvc(un&mht#mcky-tvZvi|{UloR:>f:W3+bta&{l$knv!cmi{+rtXxg~ySjmP5348Q5)`zo$yj"ilx/aoo})pzVzex\7fQhc^cpv`a;878=7X> gsd-vc)`kq$h`fv ws]sjqtXojUj\7f\7fkh<0<12>S7'nxm"\7fh gbz-gim\7f'~xT|cz}_fa\evtbo585>;5Z0.eqb+ta'nis"nbdx.uq[uhszVmhSl}}ef>0:70<]9%l~k }f.e`|+ekcq%|~R~ats]dgZgtzlm783<9;T2,cw`)zo%lou lljz,swYwf}xTknQnssgd8085=2_;#j|i.sd,cf~)keas#z|Ppovq[beXizxnkR>=5:W3+bta&{l$knv!cmi{+rtXxg~ySjmParpfcZ75=2_;#j|i.sd,cf~)keas#z|Ppovq[beXizxnkR<=5:W3+bta&{l$knv!cmi{+rtXxg~ySjmParpfcZ55=2_;#j|i.sd,cf~)keas#z|Ppovq[beXizxnkR:=5:W3+bta&{l$knv!cmi{+rtXxg~ySjmParpfcZ3512_;#j|i.sd,cf~)keas#z|Ppovq[beXizxnkRj><1<1=>S7'nxm"\7fh gbz-gim\7f'~xT|cz}_fa\evtboVn:0<0=9:W3+bta&{l$knv!cmi{+rtXxg~ySjmParpfcZb64;4956[?/fpe*w`(ojr%oaew/vp\tkruWniTm~|jg^f2868512_;#j|i.sd,cf~)keas#z|Ppovq[beXizxnkRj><5<1=>S7'nxm"\7fh gbz-gim\7f'~xT|cz}_fa\evtboVn:080=8:W3+bta&{l$knv!cmi{+rtXxg~ySjmParpfcZb6W9837X> gsd-vc)`kq$h`fv ws]sjqtXojUj\7f\7fkh_e3\57><]9%l~k }f.e`|+ekcq%|~R~ats]dgZgtzlmTh<Q=299V4*aun'xm#jmw.bnh|*quWyd\7f~Ril_`qqabYc9V9946[?/fpe*w`(ojr%oaew/vp\tkruWniTm~|jg^f2[14?3\:$k\7fh!rg-dg}(ddbr${\7fQ\7fnup\cfYf{{olSi?P5378Q5)`zo$yj"ilx/aoo})pzVzex\7fQhc^zlv5678;?0Y=!hrg,qb*adp'iggu!xr^rmpwY`kVrd~=>?1378Q5)`zo$yj"ilx/aoo})pzVzex\7fQhc^zlv567:;?0Y=!hrg,qb*adp'iggu!xr^rmpwY`kVrd~=>?3378Q5)`zo$yj"ilx/aoo})pzVzex\7fQhc^zlv567<;k0Y=!hrg,qb*ak8'xo#j|>.sdtbq)UIDUYHRKA_GUEP44>3\:$k\7fh!rg-dh5(ul&my=#|iwgv,VDKXZLMDYYQJN010?P6(o{l%~k!hl1,q`*au9'xm{kz ctpq[cqa|Vy\7fmykPFRO\BCb6;?1^<"i}f/pe+bj7&{n$k\7f?!rguep*erz{Um{kzPsucwaZ@TEVLMh<#Fn258Q5)`zo$yj"ic0/pg+bt6&{l|jy!lusp\br`sWz~jxhQISL]EBa7*Ag;8?6[?/fpe*w`(oe:%~i!hr0,qbr`s'j\7fy~Rhxfu]ppdrbWOYFSKHk2248Q5)`zo$yj"ic0/pg+bt6&{l|jy!lusp\br`sWz~jxhQISL]EBa4*Ag9<7X> gsd-vc)`d9$yh"i}1/pescr(k|xySkyit^qweqcXNZGTJKj=-Hl26==R8&myj#|i/fn3*wb(o{;%~kyit.gntq\7fXn~l\7fSkl=7:W3+bta&{l$ka>!re-dv4(un~l\7f#hc\7ftx]escrXa;o0Y=!hrg,qb*ak8'xo#j|>.sdtbq)bey~rSkyit^k\kw67898m7X> gsd-vc)`d9$yh"i}1/pescr(mdz\7fuRhxfu]j[jt789::>45Z0.eqb+ta'nf;"\7fj gscp*wus{&xjaRkbpu{\bgYn;91^<"i}f/pe+bj7&{n$k\7fo|.sqww*tfeVof|ywPfc]j[jt789:8?6[?/fpe*w`(oe:%~i!hr`q-vvrt'{kfShc\7ftx]efZoXg{:;<=?>2218Q5)`zo$yj"ic0/pg+btf{'xxx~!}al]fiur~WohTeRa}01235234;2_;#j|i.sd,ci6)zm%l~l}!rrvp+wgjWlg{xtQib^k\kw6789899>=4U1-dvc(un&mg<#|k/fpbw+tt|z%ym`Qjmqvz[cdXaVey<=>?25306>S7'nxm"\7fh gm2-va)`zhy%~~z|/scn[`kw|pUmnRgPos234541;?1^<"i}f/pe+bj7&{n$k\7fo|.sqww*tfeVof|ywPfc]j[jt789:=<R]X0318Q5)`zo$yj"ic0/pg+wvi|{%Fob{at^alqkr5=2_;#j|i.sd,ci6)zm%y|cz}/LalqkrXkf\7fex<<=5:W3+bta&{l$ka>!re-qtkru'DidyczPcnwmp435<2_;#j|i.sd,ci6)zm%y|cz}/LalqkrXkf\7fex><;;T2,cw`)zo%l`= }d.psjqt(Eje~byQlotlw272<]9%l~k }f.eo4+tc'{zex\7f!BcnwmpZeh}g~2??5Z0.eqb+ta'nf;"\7fj rqlwv*Kdg|d\7fSnaznu]{kw:768;TECXP03a8Q5)`zo$yj"ic0/pg+wvi|{%Fob{at^alqkrXpfx7=3?>2b9V4*aun'xm#jb?.sf,vuhsz&Ghcx`{_bmvjqY\7fg{692<?=c:W3+bta&{l$ka>!re-qtkru'DidyczPcnwmpZ~hz595=<<l;T2,cw`)zo%l`= }d.psjqt(Eje~byQlotlw[}iu4=4:=?m4U1-dvc(un&mg<#|k/srmpw)Jkf\7fexRm`uov\|jt;=7;:=k5Z0.eqb+ta'nf;"\7fj rqlwv*eh}g~7<3?i;T2,cw`)zo%l`= }d.psjqt(kf\7fex1?11g9V4*aun'xm#jb?.sf,vuhsz&idycz32?3e?P6(o{l%~k!hl1,q`*twf}x$ob{at=1=5c=R8&myj#|i/fn3*wb(zyd\7f~"m`uov?0;7a3\:$k\7fh!rg-dh5(ul&x{by| cnwmp9399l1^<"i}f/pe+bj7&{n$~}`{r.alqkrX88o0Y=!hrg,qb*ak8'xo#\7f~ats-`kphsW8;n7X> gsd-vc)`d9$yh"|\7fnup,gjsi|V8:i6[?/fpe*w`(oe:%~i!}povq+firf}U8=h5Z0.eqb+ta'nf;"\7fj rqlwv*eh}g~T8<k4U1-dvc(un&mg<#|k/srmpw)dg|d\7fS8<=;T2,cw`)zo%l`= }d.psjqt(kf\7fexRj><1<16>S7'nxm"\7fh gm2-va)uxg~y#naznu]g5979:;1^<"i}f/pe+bj7&{n$~}`{r.alqkrXl8692?<4U1-dvc(un&mg<#|k/srmpw)dg|d\7fSi?33?01?P6(o{l%~k!hl1,q`*twf}x$ob{at^f28185:2_;#j|i.sd,ci6)zm%y|cz}/bmvjqYc95?5><5Z0.eqb+ta'nf;"\7fj rqlwv*eh}g~Th<Q?209V4*aun'xm#jb?.sf,vuhsz&idyczPd0]264=R8&myj#|i/fn3*wb(zyd\7f~"m`uov\`4Y5:81^<"i}f/pe+bj7&{n$~}`{r.alqkrXl8U8><5Z0.eqb+ta'nf;"\7fj rqlwv*eh}g~Th<Q;209V4*aun'xm#jb?.sf,vuhsz&idyczPd0]661=R8&myj#|i/fn3*wb(zyd\7f~"m`uov\jjr789;946[?/fpe*w`(oe:%{\7f!hw`q-svrt'{kfSk{cl^da[l423\:$k\7fh!rg-dh5(pz&m|m~ xsuq,vdkXn|fgSd<l;T2,cw`)zo%l`= xr.etev(p{}y$~lcPftno[lYhz9:;<?h4U1-dvc(un&mg<#y}/fubw+qt|z%ym`Qiumn\mZiu89:;=<<=f:W3+bta&{l$ka>!ws-dsdu)\7fz~x#\7fob_gwohZoXg{:;<=?853d8Q5)`zo$yj"ic0/uq+bqf{'}xx~!}al]eqijXaVey<=>?2371b>S7'nxm"\7fh gm2-sw)`\7fhy%{~z|/scn[cskdVcTc\7f>?0107565<]9%l~k }f.eo4+qu'n}j\7f#y|tr-qehYa}efTeRa}0123617X[^::h6[?/fpe*w`(oe:%{\7f!}al]tvZciW`;9?6[?/fpe*w`(oe:%{\7f!xpovq+Heh}g~Tob{at378Q5)`zo$yj"ic0/uq+rvi|{%Fob{at^alqkr6:;?0Y=!hrg,qb*ak8'}y#z~ats-Ngjsi|Vidycz>5368Q5)`zo$yj"ic0/uq+rvi|{%Fob{at^alqkr4:=1^<"i}f/pe+bj7&~x${}`{r.O`kphsWje~by8=4:W3+bta&{l$ka>!ws-ttkru'DidyczPcnwmp<553\:$k\7fh!rg-dh5(pz&}{by| MbmvjqYdg|d\7fSua}<1<25ZOI^V:9o6[?/fpe*w`(oe:%{\7f!xpovq+Heh}g~Tob{at^zlv979988h7X> gsd-vc)`d9$|~"y\7fnup,Ifirf}Uhcx`{_ymq87869;i0Y=!hrg,qb*ak8'}y#z~ats-Ngjsi|VidyczPxnp?7;76:j1^<"i}f/pe+bj7&~x${}`{r.O`kphsWje~byQwos>7:475k2_;#j|i.sd,ci6)\7f{%||cz}/LalqkrXkf\7fexRv`r=7=547a3\:$k\7fh!rg-dh5(pz&}{by| cnwmp9699o1^<"i}f/pe+bj7&~x${}`{r.alqkr;97;m7X> gsd-vc)`d9$|~"y\7fnup,gjsi|585=k5Z0.eqb+ta'nf;"z| wqlwv*eh}g~7?3?i;T2,cw`)zo%l`= xr.usjqt(kf\7fex1:11g9V4*aun'xm#jb?.vp,suhsz&idycz35?3f?P6(o{l%~k!hl1,tv*qwf}x$ob{at^22a>S7'nxm"\7fh gm2-sw)pxg~y#naznu]25`=R8&myj#|i/fn3*rt(\7fyd\7f~"m`uov\64c<]9%l~k }f.eo4+qu'~zex\7f!lotlw[67b3\:$k\7fh!rg-dh5(pz&}{by| cnwmpZ26m2_;#j|i.sd,ci6)\7f{%||cz}/bmvjqY2:;1^<"i}f/pe+bj7&~x${}`{r.alqkrXl86;2?<4U1-dvc(un&mg<#y}/vrmpw)dg|d\7fSi?31?01?P6(o{l%~k!hl1,tv*qwf}x$ob{at^f28785:2_;#j|i.sd,ci6)\7f{%||cz}/bmvjqYc9595>?5Z0.eqb+ta'nf;"z| wqlwv*eh}g~Th<2;>308Q5)`zo$yj"ic0/uq+rvi|{%hcx`{_e3?1;463\:$k\7fh!rg-dh5(pz&}{by| cnwmpZb6W98:7X> gsd-vc)`d9$|~"y\7fnup,gjsi|Vn:S<<>;T2,cw`)zo%l`= xr.usjqt(kf\7fexRj>_302?P6(o{l%~k!hl1,tv*qwf}x$ob{at^f2[6463\:$k\7fh!rg-dh5(pz&}{by| cnwmpZb6W=8:7X> gsd-vc)`d9$|~"y\7fnup,gjsi|Vn:S8<=;T2,cw`)zo%l`= xr.usjqt(kf\7fexRj=<1<16>S7'nxm"\7fh gm2-sw)pxg~y#naznu]g6979:;1^<"i}f/pe+bj7&~x${}`{r.alqkrXl;692?<4U1-dvc(un&mg<#y}/vrmpw)dg|d\7fSi<33?01?P6(o{l%~k!hl1,tv*qwf}x$ob{at^f18185:2_;#j|i.sd,ci6)\7f{%||cz}/bmvjqYc:5?5><5Z0.eqb+ta'nf;"z| wqlwv*eh}g~Th?Q?209V4*aun'xm#jb?.vp,suhsz&idyczPd3]264=R8&myj#|i/fn3*rt(\7fyd\7f~"m`uov\`7Y5:81^<"i}f/pe+bj7&~x${}`{r.alqkrXl;U8><5Z0.eqb+ta'nf;"z| wqlwv*eh}g~Th?Q;209V4*aun'xm#jb?.vp,suhsz&idyczPd3]653=R8&myj#|i/lgn+air|VcT<<j4U1-dvc(un&gna"ikm/fn+HtfeVXJA?94U1-dvc(un&gna"ikm/fn+HtfeVXJARgaddz34576m2_;#j|i.sd,i`k(omg%h`!kotv\m9699m1^<"i}f/pe+hcj'nnf"ic dnww[lY79>1^<"i}f/pe+wgjW{olcxzPeo30?P6(o{l%~k!}al]tvZci9m1^<"i}f/pe+wusjea$~iQ}su]bwwc`:91^<"i}f/pe+wusjea$~iQ}su]bwwc`Wm;9<6[?/fpe*w`(zz~i`f!}d^pppZgtzlmTh??l;T2,cw`)zo%y\7fylck.pg[wusWjefn<j4U1-dvc(un&xxxobd/sf\vvrXkfgi=<k4U1-dvc(un&xxxobd/sf\vvrXzlm7<3?j;T2,cw`)zo%y\7fylck.pg[wusW{ol0<0>e:W3+bta&{l$~~zmlj-q`Ztt|Vxnk1<11e9V4*aun'xm#\7f}{bmi,vaYu{}UyijQ?1e9V4*aun'xm#\7f}{bmi,vaYu{}UyijQ>1e9V4*aun'xm#\7f}{bmi,vaYu{}UyijQ=1e9V4*aun'xm#\7f}{bmi,swYu{}Uj\7f\7fkh219V4*aun'xm#\7f}{bmi,swYu{}Uj\7f\7fkh_e314>S7'nxm"\7fh rrvahn)pzVxxxRo|rde\`77d3\:$k\7fh!rg-qwqdkc&}yS\7f}{_bmnf4b<]9%l~k }f.pppgjl'~xT~~zPcnoa54c<]9%l~k }f.pppgjl'~xT~~zPrde?4;7b3\:$k\7fh!rg-qwqdkc&}yS\7f}{_sgd8486l2_;#j|i.sd,vvredb%|~R||t^pfcZ66l2_;#j|i.sd,vvredb%|~R||t^pfcZ7f3\YN^ROCI@Q`?PUBZV\B_DLCE29UGF?<^@O\SYW_E59TGIM53^O:h6VFLHL[)]BE(9$:,^ZZB!2-5%GTK@C=7U][LH@4?]USWNDOn6VPCNPF[LHAG?1ShoQFdg9[`mYWz`g]i\7f}foo33?]bjWDkac\7fXjrrklj46<PmgTAd``rWgqwliik2kgab}{_dosp|d<iegd\7fyQiumn25>gtqgimekaPdlgn[4d5n2kxucmiigm\`hcjW8h'jdh`_fgmawgsg{U}8R=#NNLF(KIIM:h>7l}vnbdjbjYcelgT=o"iigm\c`hbzh~d~Rx;_2.xgZnf{VcexRmck<2/gZnf{Vyy\7fy3?,b]q`Z`umx7: nQgar]q`Zbf|hUhcx`{=1.`[aoiW~coxe3>,b]kevYpzVnjxlQlotlw95*dWo\7fg`Rhcafq\vvrX~hf6=!mPilroahci|h~bccQ{yqg>6)eX}zoTi`ljdegg[wc`59&hSz|Pfsgr94*dW|ynSiazt^ppp87+kV\7fehh|ilnu\hjq:8%iTdl}Powgqbiip59&hS}|jlncg[igsmgir1="l_emvpZqnl}b6=!mPmreljZpfd4;'oR|k_egspm;1<%iTdl}Puoffvcjh\7f4:'oRfns^coijusWo\7fg`0>#c^jbwZtt|4;'oRjnt`]`kphsW~coxe39,b]kevYpzVkhg0>#c^wpaZcdk4:'oRm`mlmm[fjhkb7; nQxr^c`oZjh\7f4:'oR|k_qlwvZqnl}b6:!mPurg\br`sWmk\7fmRm`uov>4)eX`hyTmac`su]fiur~59&hS\7fjPd`vb[firf}U|eizg=7.`[jpbzofd{Rb`w<2/gZquWyd\7f~Ryfduj>2)eXx{elShc\7ftx]w}uc:8%iT{\7fQkauc\gjsi|V}bhyf26-a\twckghnT~hi|=0.`[air|VxxxRxnl<2/gZnf{VnjxlQlotlw95*dWyxdkRhzlm]wlwct`Vdnklzj_wco937+kV}ySik\7fti?50)eXx{elShc\7ftx]wlwct`Vdnklzj_wco90*dWyxdkRkbpu{\pmtb{aUj\7f~k}t^tbh82+kVbj\7fRxnlhf>4)eXx{elSk{cl^vkv`uoWhyxi\7fzPv`n>27*dgdzdbh`{_ocna8gtqgimekaPdlgn[4d+kV|j`djPlnu>4)eXx{cfSkgio^vzt`;29;r8:!mPpsmd[cskdV~r|h3?,b]nahiuqV~r|h3=,|0a?du~fjlbjbQkmdo\5gYaaoeTkh`jr`vlvZp3W:Us\7fyQ>6:`bgnswl2hjof{\7f_lcqo`t43jf`h6jnt`]`kphs 9#o7io{a^alqkr/9 n0hlzn_bmvjq.5!m1omyoPcnwmp-5.l2njxlQlotlw,1/c3mk\7fmRm`uov+1,`<lh~jSnaznu>6>5803mhbxh|}6:fjj-6.02nbb1>50?48`lh;87=0hb{{(1+:?air|5:1<394dnww858?3lnbj?`hd49fiur~;?1mekaPgdlfvdrhzV|?S>"/Xhnjj}&DG[O+Kh`jr`vlv%77&8$;?Rg\7f359eqij03`d\7fSnbd8:ldggsndm20c{k}fmmte>vugnUna}zv159svjaXmdz\7fuRzgrdqk,5/6<2zycjQjmqvz[qnumzb#=$?;;qplcZcjx}sTxe|jsi*1-42<x{elShc\7ftx]wlwct`!9"=95\7frne\ahvsqV~c~h}g(5+20>vugnUna}zv_ujqavn/= ;=7}|`g^gntq\7fX|axn\7fe2::1<2f>vugnUna}zv_ujqavnXizyn~y&?)0`8twi`Wlg{xtQ{hsgplZgt{lx\7f$<'>b:rqkbYbey~rSyf}erj\evubz}"9%<l4psmd[`kw|pU\7fd\7fk|h^cpw`ts :#:n6~}of]fiur~W}byi~fParqfvq.3!8h0|\7fah_dosp|Ys`{oxdRo|sdpw,0/6l2zycjQjmqvz[qnumzbTm~}jru>6>586j2zycjQjmqvz[qnumzbTbhintd*3-4d<x{elShc\7ftx]wlwct`Vdnklzj(0+2f>vugnUna}zv_ujqavnXflmjxh&=)0`8twi`Wlg{xtQ{hsgplZhboh~n$>'>b:rqkbYbey~rSyf}erj\j`af|l"?%<l4psmd[`kw|pU\7fd\7fk|h^lfcdrb <#:h6~}of]fiur~W}byi~fPndebp`:229427}|`g^dvhi743yxdkRhzlm]wlwct`!:"=>5\7frne\bpjkW}byi~f'1(30?uthoVl~`aQ{hsgpl-4.9:1{~biPftno[qnumzb#?$?<;qplcZ`rdeU\7fd\7fk|h)6*56=wzfmTjxbc_ujqavn/= ;>7}|`g^dvhiYs`{oxd1;50?3b?uthoVl~`aQ{hsgplZgt{lx\7f$='>a:rqkbYa}efTxe|jsi]bwvcu|!;"=l5\7frne\bpjkW}byi~fParqfvq.5!8k0|\7fah_gwohZrozlycSl}|esv+7,7f3yxdkRhzlm]wlwct`Vkx\7fh|{(5+2e>vugnUmyabPtipfwmYf{zoyx%;&1b9svjaXn|fgSyf}erj\evubz}6>6=0>a:rqkbYa}efTxe|jsi]mabgsm!:"=l5\7frne\bpjkW}byi~fPndebp`.6!8k0|\7fah_gwohZrozlycSckhaug+6,7f3yxdkRhzlm]wlwct`Vdnklzj(2+2e>vugnUmyabPtipfwmYimnk\7fi%:&1`9svjaXn|fgSyf}erj\j`af|l">%<m4psmd[cskdV~c~h}g_ogdeqc;=3:5:6|k_bnh55=ulVnjxlQlotlw,5/682xoSio{a^alqkr/9 ;;7\7fjPd`vb[firf}"9%<>4re]geqgXkf\7fex%=&119q`Zbf|hUhcx`{(5+24>tcWmk\7fmRm`uov+1,773{nThlzn_bmvjq:76880~iQkauc\gjsi|5?1<3;4re]fj3=ulVxxx>5}su58wgosm{x?7~||t0f8phv(Wjm$m~wacgkekZbjmdU:n#`ho59wvpc>3|doi\7fhcov78rdjnl?1|~Rolk79tvZekc8:0{\7fQkauc\gjsi|!:"==5xr^fbpdYdg|d\7f$<'>0:uq[agsiVidycz'2(33?rtXlh~jSnaznu*0-46<\7f{UomyoPcnwmp-2.991|~Rjnt`]`kphs <#:>6y}_ecweZeh}g~797>15:uq[`h13~xT~~zr@Ar7a==GHq;m<7H54;3xW62=9hl1?7?<27;gb?51m<hpb<79:09m5<1=<2.:594>919~W64=9hl1?7?<27;gb?51m<h0_8<51c694?74:?3oj7=9e4`8W64=9k>1<7?<27;gb?51m1>0h<7n:182>4}T;=0:mk4<:0112<ba2:<n9o5yT6094?7=933ow^=;:0ce>6<6;;<2hk4<6d7a?!7?l3?87[?65;0xq0g=92\7f>n7>4}%4`><=e90k1<7753;;xL4>e3S8h6?u::58~ 22=90k0(<7<:0`2?l7bj3:17d?id;29?j7e:3:17b?m0;29?l7a=3:17d?jd;29?j7fk3:1(;k51`f8j3b=821b=l750;&5a?7fi2d=h7>4;h3f6?6=3k;3o7>51;294~N60k1/;94>8b9l2g<722wi8:4?:083>5}O91h0(::5469l03<722wi>o4?:429=2<>lrB:4o5U2b822~`=9:0::7??:05957<22=0:87?::03950<6?3?1=;4>0;6951<6;3;96<?5f;\7f'31<61m1/9<4;5:&04?7>j2.8?7?6c:&53?0f3f;247>5;h3fg?6=3`;io7>5$7g95gb<f?n1<65f1c`94?"1m3;ih6`9d;38?l7ei3:1(;k51cf8j3b=:21b=o750;&5a?7el2d=h7=4;h3a<?6=,?o1=oj4n7f90>=n9k=1<7*9e;3a`>h1l3?07b?lc;29?l7bn3:17b?i3;29?l7c13:1(;k51ec8j3b=821b=i650;&5a?7ci2d=h7?4;h3g3?6=,?o1=io4n7f96>=n9m<1<7*9e;3ge>h1l3907d?k5;29 3c=9mk0b;j54:9j5a2=83.=i7?ka:l5`?3<3`;nn7>5;n3a1?6=3f;i:7>5;h3:b?6=3f;n47>5$7g95`g<f?n1<65`1d594?"1m3;nm6`9d;38?j7b>3:1(;k51dc8j3b=:21d=h;50;&5a?7bi2d=h7=4;n3f0?6=,?o1=ho4n7f90>=h9l91<7*9e;3fe>h1l3?07d?l3;29 3c=9j>0b;j50:9j5f4=83.=i7?l4:l5`?7<3`;h=7>5$7g95f2<f?n1>65f1b294?"1m3;h86`9d;18?l7en3:1(;k51b68j3b=<21b=ok50;&5a?7d<2d=h7;4;h3e`?6=3f;mn7>5$7g95ce<f?n1<65`1gc94?"1m3;mo6`9d;38?j7a13:1(;k51ga8j3b=:21d=k650;&5a?7ak2d=h7=4;n3e3?6=,?o1=km4n7f90>=h9o<1<7*9e;3eg>h1l3?07b?m2;29?j7e83:17d?k2;29 3c=9m90b;j50:9j5a7=83.=i7?k3:l5`?7<3`;o<7>5$7g95a5<f?n1>65f1bd94?"1m3;o?6`9d;18?l7dm3:1(;k51e18j3b=<21b=nj50;&5a?7c;2d=h7;4;h3e1?6=3`;i?7>5;h3b4?6=3`;mi7>5;n3fa?6=3f;m87>5;n3e6?6=3`;nh7>5;n3bg?6=,?o1=lj4n7f94>=n9h31<7*9e;3be>h1l3:07b?69;29?l7>m3:17d?j0;29 3c=9l;0b;j50:9j5a`=83.=i7?j1:l5`?7<3`;oi7>5$7g95`7<f?n1>65f1ef94?"1m3;n=6`9d;18?l7ck3:1(;k51d38j3b=<21b=il50;&5a?7b92d=h7;4;n3b2?6=,?o1=l94n7f94>=h9h?1<7*9e;3b3>h1l3;07b?n4;29 3c=9h=0b;j52:9l5d5=83.=i7?n7:l5`?5<3f;j>7>5$7g95d1<f?n1865`1`394?"1m3;j;6`9d;78?l7di3:1(;k51b`8j3b=821b=n750;&5a?7dj2d=h7?4;h3`<?6=,?o1=nl4n7f96>=n9j=1<7*9e;3`f>h1l3907d?l6;29 3c=9jh0b;j54:9j5f3=83.=i7?lb:l5`?3<3f;j47>5;h3f6?6=3k;2>7>51;294~"0<3><7E?61:J2<g=h<?0;66sm16:94?5=83:p(::53d9K5<7<@82i7E<n;%7:>4`63->m6<5f2e83>>o4=3:17b9?:188yg7?;3:1?7>50z&40?5b3A;2=6F>8c9K6d=#=00:j<5+4g82?l4c2900e>;50;9l35<722wi=:950;194?6|,>>1?h5G1838L4>e3A8j7);6:0d2?!2a281b>i4?::k01?6=3f=;6=44}c3;5?6==3:1<v*84;61?M7>92B:4o5G2`9'1<<6n81/8k4>;h0g>5<<a;l1<75f3483>>o1n3:17b9?:188yg7?:3:1?7>50z&40?5b3A;2=6F>8c9K6d=#=00:j<5+4g82?l4c2900e>;50;9l35<722wi=:850;794?6|,>>18?5G1838L4>e3A8j7);6:0d2?!2a281b>i4?::k1b?6=3`9>6=44i7d94?=h?90;66sm19294?2=83:p(::5409K5<7<@82i7):i:09j6a<722c897>5;h4e>5<<g>:1<75rb05e>5<3290;w)9;:538L4?63A;3n6*;f;38m7b=831b?84?::k5b?6=3f=;6=44}c34a?6=<3:1<v*84;62?M7>92B:4o5+4g82?l4c2900e>;50;9j2c<722e<<7>5;|`2<<<72=0;6=u+75875>N6181C=5l4$5d95>o5l3:17d=::188m3`=831d;=4?::\7fa5=>=83>1<7>t$66904=O90;0D<6m;%6e>4=n:m0;66g<5;29?l0a2900c:>50;9~f414290>6=4?{%57>16<@83:7E?7b:&7b?7<a;n1<75f2g83>>o493:17d=::188k26=831vn<8i:186>5<7s-=?69>4H0;2?M7?j2.?j7?4i3f94?=n:o0;66g<1;29?l522900c:>50;9~f46>290?6=4?{%57>17<@83:7E?7b:&7b?4<a;n1<75f3483>>o1n3:17b9?:188yg75n3:197>50z&40?273A;2=6F>8c9'0c<63`8o6=44i3d94?=n;80;66g<5;29?j172900qo?<0;291?6=8r.<87:?;I3:5>N60k1/8k4>;h0g>5<<a;l1<75f3083>>o4=3:17b9?:188yg7493:197>50z&40?273A;2=6F>8c9'0c<63`8o6=44i3d94?=n;80;66g<5;29?j172900qo?<2;291?6=8r.<87:?;I3:5>N60k1/8k4>;h0g>5<<a;l1<75f3083>>o4=3:17b9?:188yg74;3:197>50z&40?273A;2=6F>8c9'0c<63`8o6=44i3d94?=n;80;66g<5;29?j172900qo?<4;291?6=8r.<87:?;I3:5>N60k1/8k4>;h0g>5<<a;l1<75f3083>>o4=3:17b9?:188yg74?3:187>50z&40?263A;2=6F>8c9'0c<63`8o6=44i2794?=n>o0;66a80;29?xd6;?0;694?:1y'31<392B:5<5G19`8 1`=92c9h7>5;h16>5<<a?l1<75`7183>>{e9>:1<7?>:183\7f!132>;0D<7>;I3;f>o5<3:17d<::188m70=831b>:4?::k1<?6=3`826=44i2794?=n;10;66g<9;29?l5f2900e>l50;9j7f<722c8h7>5;h3;a?6=3`;3j7>5;n50>5<<g:=1<75rb000>5<2290;w)9;:508L4?63A;3n6*;f;08m7b=831b>k4?::k01?6=3`<m6=44o6294?=zj88?6=4::183\7f!132=:0D<7>;I3;f>"3n3;0e?j50;9j6c<722c8=7>5;h16>5<<g>:1<75rb006>5<2290;w)9;:528L4?63A;3n6*;f;38m7b=831b>k4?::k05?6=3`9>6=44o6294?=zj88=6=4::183\7f!132=:0D<7>;I3;f>"3n3;0e?j50;9j6c<722c8=7>5;h16>5<<g>:1<75rb004>5<2290;w)9;:528L4?63A;3n6*;f;38m7b=831b>k4?::k05?6=3`9>6=44o6294?=zj8836=4::183\7f!132=:0D<7>;I3;f>"3n3;0e?j50;9j6c<722c8=7>5;h16>5<<g>:1<75rb8694?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb8194?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb8094?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb8394?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb8294?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb9d94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbbg94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbbf94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbba94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbb`94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbbc94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbb;94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbg594?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbg494?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbg794?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbg694?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbg194?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbg094?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbcf94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbca94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbc`94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbcc94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbc;94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbc:94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbd`94?2=83:p(::53g9K5<7<@82i7E<n;%7:>4`63->m6<5f2e83>>o493:17d=::188k26=831vnho50;694?6|,>>1?k5G1838L4>e3A8j7);6:0d2?!2a281b>i4?::k05?6=3`9>6=44o6294?=zjl31<7:50;2x 22=;o1C=4?4H0:a?M4f3-?26<h>;%6e>4=n:m0;66g<1;29?l522900c:>50;9~f`>=83>1<7>t$6697c=O90;0D<6m;I0b?!3>28l:7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbd594?2=83:p(::53g9K5<7<@82i7E<n;%7:>4`63->m6<5f2e83>>o493:17d=::188k26=831vnh850;694?6|,>>1?k5G1838L4>e3A8j7);6:0d2?!2a281b>i4?::k05?6=3`9>6=44o6294?=zjk;1<7:50;2x 22=;o1C=4?4H0:a?M4f3-?26<h>;%6e>4=n:m0;66g<1;29?l522900c:>50;9~fg6=83>1<7>t$6697c=O90;0D<6m;I0b?!3>28l:7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb`d94?2=83:p(::53g9K5<7<@82i7E<n;%7:>4`63->m6<5f2e83>>o493:17d=::188k26=831vnlk50;694?6|,>>1?k5G1838L4>e3A8j7);6:0d2?!2a281b>i4?::k05?6=3`9>6=44o6294?=zjhn1<7:50;2x 22=;o1C=4?4H0:a?M4f3-?26<h>;%6e>4=n:m0;66g<1;29?l522900c:>50;9~fde=83>1<7>t$6697c=O90;0D<6m;I0b?!3>28l:7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb`794?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb`694?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb`194?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb`094?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb`394?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb`294?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbed94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbeg94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbef94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbea94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbe`94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rbec94?2=83:p(::53g9K5<7<@82i7):i:09j6a<722c8=7>5;h16>5<<g>:1<75rb021>5<3290;w)9;:2d8L4?63A;3n6*;f;38m7b=831b?<4?::k01?6=3f=;6=44}cde>5<3290;w)9;:2d8L4?63A;3n6*;f;38m7b=831b?<4?::k01?6=3f=;6=44}c332?6=<3:1<v*84;62?M7>92B:4o5+4g81?l4c2900e>;50;9j2c<722e<<7>5;|`240<72=0;6=u+75875>N6181C=5l4$5d96>o5l3:17d=::188m3`=831d;=4?::\7fa503=83?1<7>t$66905=O90;0D<6m;%6e>4=n:m0;66g=f;29?l562900e>;50;9l35<722wi=8:50;794?6|,>>18=5G1838L4>e3->m6<5f2e83>>o5n3:17d=>:188m63=831d;=4?::\7fa500=83?1<7>t$66905=O90;0D<6m;%6e>4=n:m0;66g=f;29?l562900e>;50;9l35<722wi=8<50;794?6|,>>18?5G1838L4>e3->m6?5f2e83>>o5n3:17d=::188m3`=831d;=4?::\7fa507=83?1<7>t$66905=O90;0D<6m;%6e>4=n:m0;66g=f;29?l562900e>;50;9l35<722wi=8=50;794?6|,>>18=5G1838L4>e3->m6<5f2e83>>o5n3:17d=>:188m63=831d;=4?::\7fa50`=83?1<7>t$66905=O90;0D<6m;%6e>4=n:m0;66g=f;29?l562900e>;50;9l35<722wi=8j50;794?6|,>>18?5G1838L4>e3->m6?5f2e83>>o5n3:17d=::188m3`=831d;=4?::\7fa50c=83?1<7>t$66905=O90;0D<6m;%6e>4=n:m0;66g=f;29?l562900e>;50;9l35<722wi=;<50;794?6|,>>18=5G1838L4>e3->m6<5f2e83>>o5n3:17d=>:188m63=831d;=4?::\7fa536=83?1<7>t$66905=O90;0D<6m;%6e>4=n:m0;66g=f;29?l562900e>;50;9l35<722wi=;?50;794?6|,>>18=5G1838L4>e3->m6<5f2e83>>o5n3:17d=>:188m63=831d;=4?::\7fa53c=83?1<7>t$66905=O90;0D<6m;%6e>4=n:m0;66g=f;29?l562900e>;50;9l35<722wi=;j50;794?6|,>>18=5G1838L4>e3->m6<5f2e83>>o5n3:17d=>:188m63=831d;=4?::\7fa53e=83?1<7>t$66905=O90;0D<6m;%6e>4=n:m0;66g=f;29?l562900e>;50;9l35<722wi=;l50;794?6|,>>18=5G1838L4>e3->m6<5f2e83>>o5n3:17d=>:188m63=831d;=4?::\7fa53g=83?1<7>t$66905=O90;0D<6m;%6e>4=n:m0;66g=f;29?l562900e>;50;9l35<722wi=;750;794?6|,>>18=5G1838L4>e3->m6<5f2e83>>o5n3:17d=>:188m63=831d;=4?::\7fa5=3=8391<7>t$66911=O90;0D<6m;%6e>==n<00;66g;a;29?j0>2900qo?8a;297?6=8r.<87;;;I3:5>N60k1/8k47;h6:>5<<a=k1<75`6883>>{e9=<1<7=50;2x 22===1C=4?4H0:a?!2a2h1b844?::k7e?6=3f<26=44}c32<?6=;3:1<v*84;77?M7>92B:4o5+4g8b?l2>2900e9o50;9l2<<722wi454?:283>5}#?=0>86F>909K5=d<,=l1m6g;9;29?l2f2900c;750;9~f=1=8391<7>t$66911=O90;0D<6m;%6e>d=n<00;66g;a;29?j0>2900qo69:180>5<7s-=?68:4H0;2?M7?j2.?j7o4i5;94?=n<h0;66a99;29?xd?=3:1?7>50z&40?333A;2=6F>8c9'0c<f3`>26=44i5c94?=h>00;66sm8583>6<729q/;94:4:J2=4=O91h0(9h5a:k7=?6=3`>j6=44o7;94?=zj>o1<7=50;2x 22===1C=4?4H0:a?!2a2h1b844?::k7e?6=3f<26=44}c5g>5<4290;w)9;:468L4?63A;3n6*;f;c8m1?=831b8l4?::m5=?6=3th<o7>53;294~"0<3??7E?61:J2<g=#<o0j7d:6:188m1g=831d:44?::\7fa3g<72:0;6=u+75860>N6181C=5l4$5d9e>o313:17d:n:188k3?=831vn:o50;194?6|,>>1995G1838L4>e3->m6l5f4883>>o3i3:17b86:188yg73?3:187>50z&40?323A;2=6F>8c9'0c<602c?57>5;h6b>5<<a=h1<75`6883>>{e9831<7:50;2x 22==<1C=4?4H0:a?!2a2820e9750;9j0d<722c?n7>5;n4:>5<<uk;;47>55;294~"0<3?=7E?61:J2<g=#<o09i6g;9;29?l2f2900e9l50;9j0f<722e=57>5;|`20=<72<0;6=u+75862>N6181C=5l4$5d95<=n<00;66g;a;29?l2e2900e9m50;9l2<<722wi=<o50;794?6|,>>19;5G1838L4>e3->m6<74i5;94?=n<h0;66g;b;29?l2d2900c;750;9~f42>290=6=4?{%57>01<@83:7E?7b:&7b?7f3`>26=44i5c94?=n<k0;66g;c;29?l2c2900c;750;9~f47e290=6=4?{%57>01<@83:7E?7b:&7b?7f3`>26=44i5c94?=n<k0;66g;c;29?l2c2900c;750;9~f42f290<6=4?{%57>0><@83:7E?7b:&7b?7e3`>26=44i5c94?=n<k0;66g;c;29?l2c2900e9k50;9l2<<722wi=<m50;594?6|,>>1955G1838L4>e3->m6<l4i5;94?=n<h0;66g;b;29?l2d2900e9j50;9j0`<722e=57>5;|`ea?6=<3:1<v*84;76?M7>92B:4o5+4g8a?l2>2900e9o50;9j0g<722e=57>5;|`244<72=0;6=u+75861>N6181C=5l4$5d95f=n<00;66g;a;29?l2e2900c;750;9~f45b29086=4?{%57>02<@83:7E?7b:&7b?0<a=31<75f4`83>>i113:17plid;291?6=8r.<87;9;I3:5>N60k1/8k48;h6:>5<<a=k1<75f4c83>>o3k3:17b86:188yg7413:1;7>50z&40?3?3A;2=6F>8c9'0c<6l2c?57>5;h6b>5<<a=h1<75f4b83>>o3l3:17d:j:188k3?=831vn<=n:184>5<7s-=?6864H0;2?M7?j2.?j7<>;h6:>5<<a=k1<75f4c83>>o3k3:17d:k:188m1c=831d:44?::\7fa56d=83=1<7>t$6691==O90;0D<6m;%6e>77<a=31<75f4`83>>o3j3:17d:l:188m1b=831b8h4?::m5=?6=3th:<k4?:283>5}#?=0>86F>909K5=d<,=l1=k5f4883>>o3i3:17b86:188yg77j3:1;7>50z&40?3?3A;2=6F>8c9'0c<6l2c?57>5;h6b>5<<a=h1<75f4b83>>o3l3:17d:j:188k3?=831vn<>l:184>5<7s-=?6864H0;2?M7?j2.?j7<>;h6:>5<<a=k1<75f4c83>>o3k3:17d:k:188m1c=831d:44?::\7fa55b=83=1<7>t$6691==O90;0D<6m;%6e>4c<a=31<75f4`83>>o3j3:17d:l:188m1b=831b8h4?::m5=?6=3th<j7>53;294~"0<3??7E?61:J2<g=#<o0j7d:6:188m1g=831d:44?::\7fa30<72:0;6=u+75860>N6181C=5l4$5d9e>o313:17d:n:188k3?=831vn<??:186>5<7s-=?6884H0;2?M7?j2.?j7<?;h6:>5<<a=k1<75f4c83>>o3k3:17b86:188yg77m3:1;7>50z&40?3?3A;2=6F>8c9'0c<3<2c?57>5;h6b>5<<a=h1<75f4b83>>o3l3:17d:j:188k3?=831vn<=k:184>5<7s-=?6864H0;2?M7?j2.?j7=9;h6:>5<<a=k1<75f4c83>>o3k3:17d:k:188m1c=831d:44?::\7fa56e=83=1<7>t$6691==O90;0D<6m;%6e>15<a=31<75f4`83>>o3j3:17d:l:188m1b=831b8h4?::m5=?6=3th3<7>54;294~"0<3?>7E?61:J2<g=#<o09>6g;9;29?l2f2900e9l50;9l2<<722wi;;4?:583>5}#?=0>96F>909K5=d<,=l1>?5f4883>>o3i3:17d:m:188k3?=831vn<>;:186>5<7s-=?6884H0;2?M7?j2.?j7m4i5;94?=n<h0;66g;b;29?l2d2900c;750;9~f=7=83?1<7>t$66913=O90;0D<6m;%6e>a=n<00;66g;a;29?l2e2900e9m50;9l2<<722wi;:4?:483>5}#?=0>:6F>909K5=d<,=l1h6g;9;29?l2f2900e9l50;9j0f<722e=57>5;|`;6?6=>3:1<v*84;74?M7>92B:4o5+4g817>o313:17d:n:188m1d=831b8n4?::k7`?6=3f<26=44}c5;>5<1290;w)9;:458L4?63A;3n6*;f;00?l2>2900e9o50;9j0g<722c?o7>5;h6g>5<<g?31<75rb9194?1=83:p(::5599K5<7<@82i7):i:d9j0<<722c?m7>5;h6a>5<<a=i1<75f4e83>>o3m3:17b86:188yg1>290<6=4?{%57>0><@83:7E?7b:&7b?c<a=31<75f4`83>>o3j3:17d:l:188m1b=831b8h4?::m5=?6=3th:884?:383>5}#?=0><6F>909K5=d<a=21<75`6883>>{e98=1<7<50;2x 22==91C=4?4H0:a?l2?2900c;750;9~w4ce290=wS?jb:?24=<3k27mi7:m;<335?2e34lo6974=027>1e<uz;n57>525y]5d><V8327S?i4:\2b7=Y9020R<kj;_3a1>X6n:1U=o84^0a`?[7f>2T:m85Q1`68Z4g43W;j>6P>a09]5cd<V8lj7S?i9:\2b==Y9o=0R<h9;_3f<>X6m>1U=h84^0g6?[7b<2T:i>52180903=:91;1?852164970=:91:1?85216d970=:9>o1?85219;970=:9121?85rs0gg>5<4sW;nh63>8085b>;6??0=j6s|1c294?4|V8h;70??6;53?xu6nm0;6>uQ1gf8945b2=301<=k:5;8yv7b:3:1??uQ1d08941?2;n01<98:3f894112;n01<6?:3f8941a2;n01<9j:3f894142;n01<8i:3f8946>2;n01<<i:3f894572;n01<=>:3f894552;n01<=<:3f894532;n01<<<:3f894432;n01<<::3f894412;n01<<8:3f8944?2;n01nk52e9>ga<5l27ho7<k;<aa>7b<5jk1>i52c881`>;a?38o70h9:3f89c3=:m16j94=d:?e7?4c34l96?j4=d`96a=:mh09h63j9;0g?8c?2;n01h952e9>a3<5l27j97<k;<c7>7b<5h91>i52a381`>;f938o70o?:3f894652;n01kh52e9>550=:m16==;52e9~w4d52909wS?m2:?272<082wx=k;50;10\7f[7a=27:4>4=d:?2<4<5l27:4?4=d:?2<<<5l27:454=d:?272<5l27:?;4=d:?235<60l16594=d:?:7?4c34396?j4=8396a=:1909h637f;0g?8dc2;n01om52e9>fg<5l27im7<k;<`:>7b<5k21>i52b081`>;e838o70oi:3f89dc=:m16mi4=d:?bg?4c34nm6?j4=eg96a=:lm09h63kc;0g?8be2;n01io52e9>503=:m16=8:52e9>500=:m16=8<52e9>507=:m16=8=52e9>50`=:m16=8j52e9>50c=:m16=;<52e9>536=:m16=;?52e9>53c=:m16=;j52e9>53e=:m16=;l52e9>53g=:m16=;752e9~w4gb2909wS?nc:?22c<082wx=ll50;0xZ4g>34;<<7=:;|q232<72:q6=:65349>521=?916=:852g9~w41?2909w0?88;53?870i3>27p}>8383>6}:9191?85219396c=:9181;=5rs0:0>5<5s4;3?79?;<3;1?2>3ty:;;4?:2y>521=;<16=:85719>52g=<h1v\7f<6>:180\7f87?93=;70?72;16?87?=3>j7p}>7883>1}:91:1:k5216d92c=:9>o1:k5216c92<=z{8=o6=4>2z?2<5<0827:<449f:?26c<4927:?=4<1:?274<4927:??4<1:?276<4927:?94<1:?266<1n27:>94<1:?260<4927:>;4<1:?262<4927:>54<1:?247<4927mj7=>;<332?0a34;;978i;|q23f<7283p1<9i:6289fc=;816oi4<1:?`g?5634ii6>?4=bc974=:k008=63i7;12?8`12:;01k;5309>b1<4927m?7=>;<d1>67<5lh1?<52e`805>;b139:70k7:2389`1=;816i;4<1:?b1?5634k?6>?4=`1974=:i;08=63n1;12?8g72:;0q~?8b;297~;6?l0<<63>72805>;6>o08=6s|19694?5|58226;h4=0:;>3`<582>6;74}r3;3?6=9<q6=575719>561=>o16=>856g9>503=;816=8:5309>500=;816=8<56g9>507=;816=8=5309>50`=;816=8j56g9>50c=;816=;<5309>536=;816=;?5309>53c=;816=;j5309>53e=;816=;l5309>53g=;816=;75309~w4>1290:5v3>89844>;><39:707<:2389<4=;8165<4<1:?:4?56342m6>?4=cf974=:jj08=63mb;12?8df2:;01o75309>f=<4927i=7=>;<`3>67<5hl1?<52ad805>;fl39:70ol:2389a`=;816hh4<1:?g`?5634nh6>?4=e`974=:lh08=6s|12794?`|58=86?h4=00e>7`<589;6?h4=012>7`<58996?h4=010>7`<589?6?h4=000>7`<588?6?h4=006>7`<588=6?h4=004>7`<58836?h4=gf92<=:99o18o5rs052>5<5s4;<?7=:;<344?503ty:;?4?:3y>525=?916=;h5349~w4132909w0?9f;0e?8`b2?30q~??a;296~;68008963>0d85=>{t9931<7=t=02:>26<5on18l5211690d=z{8;o6=46{<31b?5234;9?79?;<32<?2f34;:57:6;<32e?2>34;:n7:6;<32g?2>34;;j7:n;<323?2?3ty:>44?:5y>57`=?916=:>5399>3`<3i27:<n4;9:\7fp54c=832p1<=?:27894432>:01<?7:5;8947>2=h01<?n:5`8947e2=h01<?l:5`8946e2=o0q~?=a;291~;6;90<<63>7180=>;0m3>2709k:5c8946d2=n0q~?>f;293~;6;808963>24844>;6900?m63>1`87g>;69k0?o63>1b87g>;68k0?h6s|13`94?3|589:6:>4=053>6g<5>n184527b87e>;68j0?n6s|13294?0|58996>;4=005>26<58;j69o4=03a>1b<58;h69j4=02a>1e<uz;9o7>55z?277<0827:;=4<b:?4g?2>34=i69o4=02g>1e<uz;9=7>55z?276<4=27:>:480:?25g<3i27:=n4;e:?255<3j2wx=?j50;7x94542>:01<9?:2a892d=<016;l4;a:?24a<3m2wx=?<50;6x94532:?01<<7:628947d2=k01<??:5;8yv75m3:19v3>35844>;6?908h63n5;16?81f2=301<>k:5f8yv7403:1?v3>36801>;6;?08963>3b85=>{t9:<1<7=t=015>26<589n69o4=01g>1g<uz;=?7>53z?235<5<27::4480:?;<?2f3ty::94?:5y>526=:<16=;o5719><=<31273;7:n;|q220<72=q6=:>5279>53d=?9164:4;9:?;2?2f3ty::;4?:5y>526=:>16=;m5719><3<3127397:n;|q222<72=q6=:>5299>53b=?916484;9:?;0?2f3ty::54?:5y>526=:016hk4<5:?22`<0827387:6;|q230<728>p1<9?:0:e?872=38m70?:4;0e?872>38m70?:2;0e?872938m70?:3;0e?872n38m70?:d;0e?872m38m70?92;0e?871838m70?91;0e?871m38m70?9d;0e?871k38m70?9b;0e?871i38m70?99;0e?874m3<27p}>1083>7}:9;91?85210592<=z{8;?6=4={<310?5234;:4786;|q250<72;q6=?;5349>54?=>01v\7f<?9:181\7f875>39>70?>a;4:?xu69;0;6?u2135970=:98h1:45rs030>5<5s4;947=:;<32g?0>3tyi;7>57z?:0?5234ho6:>4=6790d=:??0?m6387;6b?81?2=k01:754`9~w=c=839p14:5719>56d=<j16=>m5489~w<3=838p14=5349>30<112wx4i4?:2y>=6<0827:?o4;a:?27a<3j2wx5;4?:3y>=7<4=27<:786;|q;g?6=;r72>79?;<30=?2e34;8n7:j;|q:3?6=:r72=7=:;<54>3?<uz2i6=4<{<;2>26<589269o4=01b>1e<uz336=4={<;3>63<5>21:45rs9c94?5|50:1;=5212;90<=:9:k18h5rs8;94?4|51l1?8527885=>{t000;6>u28g844>;6;h0?m63>3e87g>{tn80;6:u2cd801>;a?3=;709i:5c89=6=<h164<4;a:?;6?2f342869o4}ra;>5<4s4in6:>4=02g>1g<58;;69o4}rae>5<5s4io6>;4=6d92<=z{j=1<7=t=bf935=:99n18o5210290f=z{m:1<7<t=ba970=:090=56s|c783>6}:kj0<<63>0c87f>;68m0?56s|d083>7}:kk0896371;4:?xud=3:1?v3lb;53?877j3>j70??c;6`?xuc:3:1>v3la;16?8>52?30q~m;:180\7f8ef2>:01<>m:5;8946d2=o0q~j<:181\7f8e>2:?015=5689~wf5=839p1n75719>55`=<016==m54`9~w`3=838p1k95349>ag<082wxi94?:3y>b3<4=27nm79?;|qe4?6=>r7m:79?;<5e>1?<51:184528087=>;?:3>2706<:5;8yvc42909w0h::2789`?=?91v\7fhh50;7x9c3=?9164=4;b:?;5?2e342969l4=9190g=z{l81<7<t=g6970=:m10<<6s|ed83>1}:n=0<<6371;6`?8>52=i015=54b9~w`7=838p1k=5349>a2<082wxii4?:2y>b6<08273>7:k;<:0>1b<uzo;6=4={<d1>63<5l<1;=5rsda94?4|5o81;=528287a>{tik0;6?u2be801>;e93=;7p}na;296~;ek39>70l?:628yvd1290=w0ll:628923=<016;;4;9:?43?2>34=36974=6;90<=z{h31<7<t=c`970=:io0<<6s|b483>0}:jk0<<6386;6a?8102=h01:654c9>3<<3j2wxm54?:3y>fd<4=27ji79?;|qa0?6=<r7im79?;<54>1e<5>218n527887g>{ti>0;6?u2b8801>;fl3=;7p}m3;297~;e13=;7097:5f892?=<m1v\7fl850;0x9g>=;<16mn480:\7fpf7<72;q6n5480:?4=?2b3tyo57>52z?ff?5234nm6:>4}rf;>5<5s4oj6>;4=eg935=z{m=1<7<t=d;970=:lm0<<6s|d783>7}:m108963kc;53?xuc=3:1>v3j7;16?8be2>:0q~j;:181\7f8c12:?01io5719~w<`=838p1o?5349>e0<082wx5h4?:3y>f5<4=27j879?;|q:`?6=:r7jj7=:;<c0>26<uz3h6=4={<cf>63<5h81;=5rs8`94?4|5hn1?852a0844>{t1h0;6?u2ab801>;f83=;7p}me;296~;f<39>709n:7;8yvda2909w0o<:27892d=>01v\7fn>50;0x9d4=;<16;n499:\7fpg4<72;q6m<4<5:?4`?0>3tyh>7>52z?b4?5234=n6;74}rd;>5<5s4nn6>;4=9692<=z{o31<7<t=ef970=:0<0=56s|f`83>7}:lj0896376;4:?xuaj3:1>v3kb;16?8>02?30q~hl:181\7f8bf2:?01565689~w4642909w0??2;16?877<3<27p}>0383>3}:9981;=5211:90g=:nl0?m63>0087=>;al3>i70??4;6:?xu6890;6?u2fg801>;6880=56s|fg83>3}:no0<<63>0987e>;am3>270??1;6b?8`c2=i01<>;:5`8yv77?3:1?v3>07801>;68<08963>0985=>{t99?1<7<t=026>26<58:36974}r374?6=:r7:984<5:?20<<112wx=9h50;7x94322>:01<8>:278942>2=k01<:n:5a8945c2=n0q~?;4;296~;6==08963>4985=>{t9=o1<78t=077>26<58<;6>;4=06;>1g<58>269l4=06b>1d<589269m4}r375?6=:r7:9;4<5:?20d<112wx=8>50;6x94312>:01<8=:278942f2=k01<=l:5c8yv73:3:1>v3>53801>;6<?0=56s|15a94?>|58?96:>4=07f>63<58>=6974=064>1d<58>369l4=06:>1e<58>j69j4=01:>1c<uz;8j7>52z?214<4=27:88499:\7fp51d=833p1<;>:628943c2:?01<:9:5c894202=301<:7:5;8942>2=301<:n:5;8945c2=o01<:::5:8yv73;3:1>v3>52801>;6<>0=56s|15f94?1|58?86:>4=07e>63<58><69o4=06;>1e<58>269j4=06b>1c<589269j4}r36=?6=;r7:9k480:?22g<4=27:?l4;b:\7fp501=839p1<;k:628940>2:?01<=n:5;8yv7203:1?v3>5d844>;6>h08963>3`87`>{t9<i1<7=t=041>26<58<n6>;4=01a>1d<uz;>m7>53z?225<0827::n4<5:?27g<3l2wx=8l50;1x94062>:01<8k:278945e2=30q~?<9;296~;6;00=563>3b87a>{t9:k1<7<t=01b>3?<589h69m4}r30f?6=:r7:?o499:?27f<3l2wx==h50;0x946a2?301<>j:5;8yv77j3:1>v3>0c85=>;68l0?i6s|11a94?4|58:h6;74=02f>1e<uz;;h7>52z?24a<1127:<h4;d:\7fp1f<72;q6=<>5689>55c=<h1v\7f8j50;0x945c2?301<=l:5`8yxu6ik0;6?uQ1`;897d=9h30(<6n:768yv7b:3:1>vP>e39>6g<6m;1/=5o55d9~w4ce2909wS?jb:?1f?7bj2.:4l4:f:\7fp5`b=838pR<kk;<0a>4cc3-;3m78?;|q2b0<72;qU=k;4=3`95c3<,82j6;?4}r3e`?6=:rT:ji522c82ba=#91k1:?5rs0cf>5<5sW;jo63=b;3bg>"60h0=?6s|1c294?4|V8h;70<m:0`3?!7?i3<>7p}>b383>7}Y9k801?l51c08 4>f2?<0qp`;4383>7}O91h0qc:;3;296~N60k1vb9:;:181\7fM7?j2we89;50;0xL4>e3td?8;4?:3yK5=d<ug>?;7>52zJ2<g=zf=>36=4={I3;f>{i<=31<7<tH0:a?xh3<h0;6?uG19`8yk5f>3:1=vF>8c9~j6ga290:wE?7b:\7fm7g6=83;pD<6m;|l0f4<728qC=5l4}o1a6?6=9rB:4o5rn2`0>5<6sA;3n6sa3c694?7|@82i7p`<b483>4}O91h0qc=m6;295~N60k1vb>l8:182\7fM7?j2we?o650;3xL4>e3td8n44?:0yK5=d<ug9im7>51zJ2<g=zf:hi6=4>{I3;f>{i;ki1<7?tH0:a?xh4jm0;6<uG19`8yk5em3:1=vF>8c9~j6da290:wE?7b:\7fm7f6=83;pD<6m;|l0g4<728qC=5l4}o1`6?6=9rB:4o5rn2a0>5<6sA;3n6sa3b694?7|@82i7p`<c483>4}O91h0qc=l6;295~N60k1vb>m8:182\7fM7?j2we?n650;3xL4>e3td8o44?:0yK5=d<ug9hm7>51zJ2<g=zf:ii6=4>{I3;f>{i;ji1<7?tH0:a?xh4km0;6<uG19`8yk5dm3:1=vF>8c9~j6ea290:wE?7b:\7fm7a6=83;pD<6m;|l0`4<728qC=5l4}o1g6?6=9rB:4o5rn2f0>5<6sA;3n6sa3e694?7|@82i7p`<d483>4}O91h0qc=k6;295~N60k1vb>j8:182\7fM7?j2we?i650;3xL4>e3td8h44?:0yK5=d<ug9om7>51zJ2<g=zf:ni6=4>{I3;f>{i;mi1<7?tH0:a?xh4lm0;6<uG19`8yk5cm3:1=vF>8c9~j6ba290:wE?7b:\7fm7`6=83;pD<6m;|l0a4<728qC=5l4}o1f6?6=9rB:4o5rn2g0>5<6sA;3n6sa3d694?7|@82i7p`<e483>4}O91h0qc=j6;295~N60k1vb>k8:182\7fM7?j2we?h650;3xL4>e3td8i44?:0yK5=d<ug9nm7>51zJ2<g=zf:oi6=4>{I3;f>{i;li1<7?tH0:a?xh4mm0;6<uG19`8yk5bm3:1=vF>8c9~j6ca290:wE?7b:\7fm7c6=83;pD<6m;|l0b4<728qC=5l4}o1e6?6=9rB:4o5rn2d0>5<6sA;3n6sa3g694?7|@82i7p`<f483>4}O91h0qc=i6;295~N60k1vb>h8:182\7fM7?j2we?k650;3xL4>e3td8j44?:0yK5=d<ug9mm7>51zJ2<g=zf:li6=4>{I3;f>{i;oi1<7?tH0:a?xh4nm0;6<uG19`8yk5am3:1=vF>8c9~j6`a290:wE?7b:\7fm056=83;pD<6m;|l744<728qC=5l4}o636?6=9rB:4o5rn520>5<6sA;3n6sa41694?7|@82i7p`;0483>4}O91h0qc:?6;295~N60k1vb9>8:182\7fM7?j2we8=650;3xL4>e3td?<44?:0yK5=d<ug>;m7>51zJ2<g=zf=:i6=4>{I3;f>{i<9i1<7?tH0:a?xh38m0;6<uG19`8yk27m3:1=vF>8c9~j16a290:wE?7b:\7fm046=83;pD<6m;|l754<728qC=5l4}o626?6=9rB:4o5rn530>5<6sA;3n6sa40694?7|@82i7p`;1483>4}O91h0qc:>6;295~N60k1vb9?8:182\7fM7?j2we8<650;3xL4>e3td?=44?:0yK5=d<ug>:m7>51zJ2<g=zf=;i6=4>{I3;f>{i<8i1<7?tH0:a?xh39m0;6<uG19`8yk26m3:1=vF>8c9~j17a290:wE?7b:\7fm076=83;pD<6m;|l764<728qC=5l4}o616?6=9rB:4o5rn500>5<6sA;3n6sa43694?7|@82i7p`;2483>4}O91h0qc:=6;295~N60k1vb9<8:182\7fM7?j2we8?650;3xL4>e3td?>44?:0yK5=d<ug>9m7>51zJ2<g=zf=8i6=4>{I3;f>{i<;i1<7?tH0:a?xh3:m0;6<uG19`8yk25m3:1=vF>8c9~j14a290:wE?7b:\7fm066=83;pD<6m;|l774<728qC=5l4}o606?6=9rB:4o5rn510>5<6sA;3n6sa42694?7|@82i7p`;3483>4}O91h0qc:<6;295~N60k1vb9=8:182\7fM7?j2we8>650;3xL4>e3td??44?:0yK5=d<ug>8m7>51zJ2<g=zf=9i6=4>{I3;f>{i<:i1<7?tH0:a?xh3;m0;6<uG19`8yk24m3:1=vF>8c9~j15a290:wE?7b:\7fm016=83;pD<6m;|l704<728qC=5l4}|\7f~DEE|<l21m9<8113ayEFEs9wKL]ur@A
\ No newline at end of file
diff --git a/src/edu/berkeley/fleet/fpga/greg/asyncfifo_dmem_1b.v b/src/edu/berkeley/fleet/fpga/greg/asyncfifo_dmem_1b.v
new file mode 100644 (file)
index 0000000..3ae28b4
--- /dev/null
@@ -0,0 +1,165 @@
+/*******************************************************************************
+*     This file is owned and controlled by Xilinx and must be used             *
+*     solely for design, simulation, implementation and creation of            *
+*     design files limited to Xilinx devices or technologies. Use              *
+*     with non-Xilinx devices or technologies is expressly prohibited          *
+*     and immediately terminates your license.                                 *
+*                                                                              *
+*     XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS"            *
+*     SOLELY FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR                  *
+*     XILINX DEVICES.  BY PROVIDING THIS DESIGN, CODE, OR INFORMATION          *
+*     AS ONE POSSIBLE IMPLEMENTATION OF THIS FEATURE, APPLICATION              *
+*     OR STANDARD, XILINX IS MAKING NO REPRESENTATION THAT THIS                *
+*     IMPLEMENTATION IS FREE FROM ANY CLAIMS OF INFRINGEMENT,                  *
+*     AND YOU ARE RESPONSIBLE FOR OBTAINING ANY RIGHTS YOU MAY REQUIRE         *
+*     FOR YOUR IMPLEMENTATION.  XILINX EXPRESSLY DISCLAIMS ANY                 *
+*     WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE                  *
+*     IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR           *
+*     REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF          *
+*     INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS          *
+*     FOR A PARTICULAR PURPOSE.                                                *
+*                                                                              *
+*     Xilinx products are not intended for use in life support                 *
+*     appliances, devices, or systems. Use in such applications are            *
+*     expressly prohibited.                                                    *
+*                                                                              *
+*     (c) Copyright 1995-2007 Xilinx, Inc.                                     *
+*     All rights reserved.                                                     *
+*******************************************************************************/
+// The synthesis directives "translate_off/translate_on" specified below are
+// supported by Xilinx, Mentor Graphics and Synplicity synthesis
+// tools. Ensure they are correct for your synthesis tool(s).
+
+// You must compile the wrapper file asyncfifo_dmem_1b.v when simulating
+// the core, asyncfifo_dmem_1b. When compiling the wrapper file, be sure to
+// reference the XilinxCoreLib Verilog simulation library. For detailed
+// instructions, please refer to the "CORE Generator Help".
+
+`timescale 1ns/1ps
+
+module asyncfifo_dmem_1b(
+       din,
+       rd_clk,
+       rd_en,
+       rst,
+       wr_clk,
+       wr_en,
+       dout,
+       empty,
+       full);
+
+
+input [0 : 0] din;
+input rd_clk;
+input rd_en;
+input rst;
+input wr_clk;
+input wr_en;
+output [0 : 0] dout;
+output empty;
+output full;
+
+// synthesis translate_off
+
+      FIFO_GENERATOR_V4_3 #(
+               .C_COMMON_CLOCK(0),
+               .C_COUNT_TYPE(0),
+               .C_DATA_COUNT_WIDTH(6),
+               .C_DEFAULT_VALUE("BlankString"),
+               .C_DIN_WIDTH(1),
+               .C_DOUT_RST_VAL("0"),
+               .C_DOUT_WIDTH(1),
+               .C_ENABLE_RLOCS(0),
+               .C_FAMILY("virtex5"),
+               .C_FULL_FLAGS_RST_VAL(1),
+               .C_HAS_ALMOST_EMPTY(0),
+               .C_HAS_ALMOST_FULL(0),
+               .C_HAS_BACKUP(0),
+               .C_HAS_DATA_COUNT(0),
+               .C_HAS_INT_CLK(0),
+               .C_HAS_MEMINIT_FILE(0),
+               .C_HAS_OVERFLOW(0),
+               .C_HAS_RD_DATA_COUNT(0),
+               .C_HAS_RD_RST(0),
+               .C_HAS_RST(1),
+               .C_HAS_SRST(0),
+               .C_HAS_UNDERFLOW(0),
+               .C_HAS_VALID(0),
+               .C_HAS_WR_ACK(0),
+               .C_HAS_WR_DATA_COUNT(0),
+               .C_HAS_WR_RST(0),
+               .C_IMPLEMENTATION_TYPE(2),
+               .C_INIT_WR_PNTR_VAL(0),
+               .C_MEMORY_TYPE(2),
+               .C_MIF_FILE_NAME("BlankString"),
+               .C_MSGON_VAL(1),
+               .C_OPTIMIZATION_MODE(0),
+               .C_OVERFLOW_LOW(0),
+               .C_PRELOAD_LATENCY(0),
+               .C_PRELOAD_REGS(1),
+               .C_PRIM_FIFO_TYPE("512x36"),
+               .C_PROG_EMPTY_THRESH_ASSERT_VAL(4),
+               .C_PROG_EMPTY_THRESH_NEGATE_VAL(5),
+               .C_PROG_EMPTY_TYPE(0),
+               .C_PROG_FULL_THRESH_ASSERT_VAL(62),
+               .C_PROG_FULL_THRESH_NEGATE_VAL(61),
+               .C_PROG_FULL_TYPE(0),
+               .C_RD_DATA_COUNT_WIDTH(6),
+               .C_RD_DEPTH(64),
+               .C_RD_FREQ(1),
+               .C_RD_PNTR_WIDTH(6),
+               .C_UNDERFLOW_LOW(0),
+               .C_USE_DOUT_RST(1),
+               .C_USE_ECC(0),
+               .C_USE_EMBEDDED_REG(0),
+               .C_USE_FIFO16_FLAGS(0),
+               .C_USE_FWFT_DATA_COUNT(0),
+               .C_VALID_LOW(0),
+               .C_WR_ACK_LOW(0),
+               .C_WR_DATA_COUNT_WIDTH(6),
+               .C_WR_DEPTH(64),
+               .C_WR_FREQ(1),
+               .C_WR_PNTR_WIDTH(6),
+               .C_WR_RESPONSE_LATENCY(1))
+       inst (
+               .DIN(din),
+               .RD_CLK(rd_clk),
+               .RD_EN(rd_en),
+               .RST(rst),
+               .WR_CLK(wr_clk),
+               .WR_EN(wr_en),
+               .DOUT(dout),
+               .EMPTY(empty),
+               .FULL(full),
+               .CLK(),
+               .INT_CLK(),
+               .BACKUP(),
+               .BACKUP_MARKER(),
+               .PROG_EMPTY_THRESH(),
+               .PROG_EMPTY_THRESH_ASSERT(),
+               .PROG_EMPTY_THRESH_NEGATE(),
+               .PROG_FULL_THRESH(),
+               .PROG_FULL_THRESH_ASSERT(),
+               .PROG_FULL_THRESH_NEGATE(),
+               .RD_RST(),
+               .SRST(),
+               .WR_RST(),
+               .ALMOST_EMPTY(),
+               .ALMOST_FULL(),
+               .DATA_COUNT(),
+               .OVERFLOW(),
+               .PROG_EMPTY(),
+               .PROG_FULL(),
+               .VALID(),
+               .RD_DATA_COUNT(),
+               .UNDERFLOW(),
+               .WR_ACK(),
+               .WR_DATA_COUNT(),
+               .SBITERR(),
+               .DBITERR());
+
+
+// synthesis translate_on
+
+endmodule
+
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_ctrl.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_ctrl.v
new file mode 100644 (file)
index 0000000..f0d80fe
--- /dev/null
@@ -0,0 +1,1237 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+//
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a
+// license to use this text/file solely for design, simulation,
+// implementation and creation of design files limited
+// to Xilinx devices or technologies. Use with non-Xilinx
+// devices or technologies is expressly prohibited and
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information
+// "as-is" solely for use in developing programs and
+// solutions for Xilinx devices, with no obligation on the
+// part of Xilinx to provide support. By providing this design,
+// code, or information as one possible implementation of
+// this feature, application or standard, Xilinx is making no
+// representation that this implementation is free from any
+// claims of infringement. You are responsible for
+// obtaining any rights you may require for your implementation.
+// Xilinx expressly disclaims any warranty whatsoever with
+// respect to the adequacy of the implementation, including
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied
+// warranties of merchantability or fitness for a particular
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are
+// done at the user�s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part
+// of this text at all times.
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_ctrl.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/29 15:24:03 $
+// \   \  /  \    Date Created: Wed Aug 30 2006
+//  \___\/\___\
+//
+//
+//Device: Virtex-5
+//Design Name: DDR/DDR2
+//Purpose:
+//   This module is the main control logic of the memory interface. All
+//   commands are issued from here according to the burst, CAS Latency and the
+//   user commands.
+//Reference:
+//Revision History:
+//   Rev 1.2 - Fixed auto refresh to activate bug. KP 11-19-2007
+//   Rev 1.3 - For Dual Rank parts support CS logic modified. KP. 05/08/08
+//   Rev 1.4 - AUTO_REFRESH_WAIT state modified for Auto Refresh flag asserted
+//             immediately after calibration is completed. KP. 07/28/08
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_ctrl #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference
+   // board design). Actual values may be different. Actual parameters values
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter BANK_WIDTH    = 2,
+   parameter COL_WIDTH     = 10,
+   parameter CS_BITS       = 0,
+   parameter CS_NUM        = 1,
+   parameter ROW_WIDTH     = 14,
+   parameter ADDITIVE_LAT  = 0,
+   parameter BURST_LEN     = 4,
+   parameter CAS_LAT       = 5,
+   parameter ECC_ENABLE    = 0,
+   parameter REG_ENABLE    = 1,
+   parameter TREFI_NS      = 7800,
+   parameter TRAS          = 40000,
+   parameter TRCD          = 15000,
+   parameter TRRD          = 10000,
+   parameter TRFC          = 105000,
+   parameter TRP           = 15000,
+   parameter TRTP          = 7500,
+   parameter TWR           = 15000,
+   parameter TWTR          = 10000,
+   parameter CLK_PERIOD    = 3000,
+   parameter MULTI_BANK_EN = 1,
+   parameter TWO_T_TIME_EN = 0,
+   parameter DDR_TYPE      = 1
+   )
+  (
+   input                   clk,
+   input                   rst,
+   input [2:0]             af_cmd,
+   input [30:0]            af_addr,
+   input                   af_empty,
+   input                   rd_fifo_clear,
+   input                   wr_fifo_clear,
+   output reg              wr_fifo_burst,
+   input                   phy_init_done,
+   output                  ctrl_ref_flag,
+   output                  ctrl_af_rden,
+   output reg              ctrl_wren,
+   output reg              ctrl_rden,
+   output [ROW_WIDTH-1:0]  ctrl_addr,
+   output [BANK_WIDTH-1:0] ctrl_ba,
+   output                  ctrl_ras_n,
+   output                  ctrl_cas_n,
+   output                  ctrl_we_n,
+   output [CS_NUM-1:0]     ctrl_cs_n
+   );
+
+  // input address split into various ranges
+  localparam ROW_RANGE_START     = COL_WIDTH;
+  localparam ROW_RANGE_END       = ROW_WIDTH + ROW_RANGE_START - 1;
+  localparam BANK_RANGE_START    = ROW_RANGE_END + 1;
+  localparam BANK_RANGE_END      = BANK_WIDTH + BANK_RANGE_START - 1;
+  localparam CS_RANGE_START      = BANK_RANGE_START + BANK_WIDTH;
+  localparam CS_RANGE_END        = CS_BITS + CS_RANGE_START - 1;
+  // compare address (for determining bank/row hits) split into various ranges
+  // (compare address doesn't include column bits)
+  localparam CMP_WIDTH            = CS_BITS + BANK_WIDTH + ROW_WIDTH;
+  localparam CMP_ROW_RANGE_START  = 0;
+  localparam CMP_ROW_RANGE_END    = ROW_WIDTH + CMP_ROW_RANGE_START - 1;
+  localparam CMP_BANK_RANGE_START = CMP_ROW_RANGE_END + 1;
+  localparam CMP_BANK_RANGE_END   = BANK_WIDTH + CMP_BANK_RANGE_START - 1;
+  localparam CMP_CS_RANGE_START   = CMP_BANK_RANGE_END + 1;
+  localparam CMP_CS_RANGE_END     = CS_BITS + CMP_CS_RANGE_START-1;
+
+  localparam BURST_LEN_DIV2      = BURST_LEN / 2;
+  localparam OPEN_BANK_NUM       = 4;
+  localparam CS_BITS_FIX         = (CS_BITS == 0) ? 1 : CS_BITS;
+
+  // calculation counters based on clock cycle and memory parameters
+  // TRAS: ACTIVE->PRECHARGE interval - 2
+  localparam integer TRAS_CYC = (TRAS + CLK_PERIOD)/CLK_PERIOD;
+  // TRCD: ACTIVE->READ/WRITE interval - 3 (for DDR2 factor in ADD_LAT)
+  localparam integer TRRD_CYC = (TRRD + CLK_PERIOD)/CLK_PERIOD;
+  localparam integer TRCD_CYC = (((TRCD + CLK_PERIOD)/CLK_PERIOD) >
+                                 ADDITIVE_LAT )?
+             ((TRCD+CLK_PERIOD)/ CLK_PERIOD) - ADDITIVE_LAT : 0;
+  // TRFC: REFRESH->REFRESH, REFRESH->ACTIVE interval - 2
+  localparam integer TRFC_CYC = (TRFC + CLK_PERIOD)/CLK_PERIOD;
+  // TRP: PRECHARGE->COMMAND interval - 2
+   // for precharge all add 1 extra clock cycle
+  localparam integer TRP_CYC =  ((TRP + CLK_PERIOD)/CLK_PERIOD) +1;
+  // TRTP: READ->PRECHARGE interval - 2 (Al + BL/2 + (max (TRTP, 2tck))-2
+  localparam integer TRTP_TMP_MIN = (((TRTP + CLK_PERIOD)/CLK_PERIOD) >= 2)?
+                                     ((TRTP + CLK_PERIOD)/CLK_PERIOD) : 2;
+  localparam integer TRTP_CYC = TRTP_TMP_MIN + ADDITIVE_LAT
+                                + BURST_LEN_DIV2 - 2;
+  // TWR: WRITE->PRECHARGE interval - 2
+  localparam integer WR_LAT = (DDR_TYPE > 0) ? CAS_LAT + ADDITIVE_LAT - 1 : 1;
+  localparam integer TWR_CYC = ((TWR + CLK_PERIOD)/CLK_PERIOD) +
+             WR_LAT + BURST_LEN_DIV2 ;
+  // TWTR: WRITE->READ interval - 3 (for DDR1, TWTR = 2 clks)
+  // DDR2 = CL-1 + BL/2 +TWTR
+  localparam integer TWTR_TMP_MIN = (TWTR + CLK_PERIOD)/CLK_PERIOD;
+  localparam integer TWTR_CYC = (DDR_TYPE > 0) ? (TWTR_TMP_MIN + (CAS_LAT -1)
+                                 + BURST_LEN_DIV2 ): 2;
+
+  //  TRTW: READ->WRITE interval - 3
+  //  DDR1: CL + (BL/2)
+  //  DDR2: (BL/2) + 2. Two more clocks are added to
+  //  the DDR2 counter to account for the delay in
+  //  arrival of the DQS during reads (pcb trace + buffer
+  //  delays + memory parameters).
+  localparam TRTW_CYC = (DDR_TYPE > 0) ? BURST_LEN_DIV2 + 4 :
+             (CAS_LAT == 25) ? 2 + BURST_LEN_DIV2 : CAS_LAT + BURST_LEN_DIV2;
+
+  localparam integer CAS_LAT_RD = (CAS_LAT == 25) ? 2 : CAS_LAT;
+
+  // Make sure all values >= 0 (some may be = 0)
+  localparam TRAS_COUNT = (TRAS_CYC > 0) ? TRAS_CYC : 0;
+  localparam TRCD_COUNT = (TRCD_CYC > 0) ? TRCD_CYC : 0;
+  localparam TRRD_COUNT = (TRRD_CYC > 0) ? TRRD_CYC : 0;
+  localparam TRFC_COUNT = (TRFC_CYC > 0) ? TRFC_CYC : 0;
+  localparam TRP_COUNT  = (TRP_CYC > 0)  ? TRP_CYC  : 0;
+  localparam TRTP_COUNT = (TRTP_CYC > 0) ? TRTP_CYC : 0;
+  localparam TWR_COUNT  = (TWR_CYC > 0)  ? TWR_CYC  : 0;
+  localparam TWTR_COUNT = (TWTR_CYC > 0) ? TWTR_CYC : 0;
+  localparam TRTW_COUNT = (TRTW_CYC > 0) ? TRTW_CYC : 0;
+
+  // Auto refresh interval
+  localparam TREFI_COUNT = ((TREFI_NS * 1000)/CLK_PERIOD) - 1;
+
+  // memory controller states
+  localparam   CTRL_IDLE                =     5'h00;
+  localparam   CTRL_PRECHARGE           =     5'h01;
+  localparam   CTRL_PRECHARGE_WAIT      =     5'h02;
+  localparam   CTRL_AUTO_REFRESH        =     5'h03;
+  localparam   CTRL_AUTO_REFRESH_WAIT   =     5'h04;
+  localparam   CTRL_ACTIVE              =     5'h05;
+  localparam   CTRL_ACTIVE_WAIT         =     5'h06;
+  localparam   CTRL_BURST_READ          =     5'h07;
+  localparam   CTRL_READ_WAIT           =     5'h08;
+  localparam   CTRL_BURST_WRITE         =     5'h09;
+  localparam   CTRL_WRITE_WAIT          =     5'h0A;
+  localparam   CTRL_PRECHARGE_WAIT1     =     5'h0B;
+
+
+  reg [CMP_WIDTH-1:0]                      act_addr_r;
+  wire [30:0]                              af_addr_r;
+  reg [30:0]                               af_addr_r1;
+  reg [30:0]                               af_addr_r2;
+  reg [30:0]                               af_addr_r3;
+  wire [2:0]                               af_cmd_r;
+  reg [2:0]                                af_cmd_r1;
+  reg [2:0]                                af_cmd_r2;
+  reg                                      af_valid_r;
+  reg                                      af_valid_r1;
+  reg                                      af_valid_r2;
+  reg [CS_BITS_FIX :0]                     auto_cnt_r;
+  reg                                      auto_ref_r;
+  reg [(OPEN_BANK_NUM*CMP_WIDTH)-1:0]      bank_cmp_addr_r;
+  reg [OPEN_BANK_NUM-1:0]                  bank_hit;
+  reg [OPEN_BANK_NUM-1:0]                  bank_hit_r;
+  reg [OPEN_BANK_NUM-1:0]                  bank_hit_r1;
+  reg [OPEN_BANK_NUM-1:0]                  bank_valid_r;
+  reg                                      bank_conflict_r;
+  reg                                      conflict_resolved_r;
+  reg                                      ctrl_af_rden_r;
+  reg                                      conflict_detect_r;
+  wire                                     conflict_detect;
+  reg                                      cs_change_r;
+  reg                                      cs_change_sticky_r;
+  reg [ROW_WIDTH-1:0]                      ddr_addr_r;
+  wire [ROW_WIDTH-1:0]                     ddr_addr_col;
+  wire [ROW_WIDTH-1:0]                     ddr_addr_row;
+  reg [BANK_WIDTH-1:0]                     ddr_ba_r;
+  reg                                      ddr_cas_n_r;
+  reg [CS_NUM-1:0]                         ddr_cs_n_r;
+  reg                                      ddr_ras_n_r;
+  reg                                      ddr_we_n_r;
+  reg [4:0]                                next_state;
+  reg                                      no_precharge_wait_r;
+  reg                                      no_precharge_r;
+  reg                                      no_precharge_r1;
+  reg                                      phy_init_done_r;
+  reg [4:0]                                precharge_ok_cnt_r;
+  reg                                      precharge_ok_r;
+  reg [4:0]                                ras_cnt_r;
+  reg [3:0]                                rcd_cnt_r;
+  reg                                      rcd_cnt_ok_r;
+  reg [2:0]                                rdburst_cnt_r;
+  reg                                      rdburst_ok_r;
+  reg                                      rdburst_rden_ok_r;
+  reg                                      rd_af_flag_r;
+  wire                                     rd_flag;
+  reg                                      rd_flag_r;
+  reg [4:0]                                rd_to_wr_cnt_r;
+  reg                                      rd_to_wr_ok_r;
+  reg                                      ref_flag_r;
+  reg [11:0]                               refi_cnt_r;
+  reg                                      refi_cnt_ok_r;
+  reg                                      rst_r
+                                           /* synthesis syn_preserve = 1 */;
+  reg                                      rst_r1
+                                           /* synthesis syn_maxfan = 10 */;
+  reg [7:0]                                rfc_cnt_r;
+  reg                                      rfc_ok_r;
+  reg [3:0]                                row_miss;
+  reg [3:0]                                row_conflict_r;
+  reg [3:0]                                rp_cnt_r;
+  reg                                      rp_cnt_ok_r;
+  reg [CMP_WIDTH-1:0]                      sb_open_add_r;
+  reg [4:0]                                state_r;
+  reg [4:0]                                state_r1;
+  wire                                     sm_rden;
+  reg                                      sm_rden_r;
+  reg [2:0]                                trrd_cnt_r;
+  reg                                      trrd_cnt_ok_r;
+  reg [2:0]                                two_t_enable_r;
+  reg [CS_NUM-1:0]                         two_t_enable_r1;
+  reg [2:0]                                wrburst_cnt_r;
+  reg                                      wrburst_ok_r;
+  reg                                      wrburst_wren_ok_r;
+  wire                                     wr_flag;
+  reg                                      wr_flag_r;
+  reg [4:0]                                wr_to_rd_cnt_r;
+  reg                                      wr_to_rd_ok_r;
+
+  // XST attributes for local reset "tree"
+  // synthesis attribute shreg_extract of rst_r is "no";
+  // synthesis attribute shreg_extract of rst_r1 is "no";
+  // synthesis attribute equivalent_register_removal of rst_r is "no"
+
+  //***************************************************************************
+
+  // sm_rden is used to assert read enable to the address FIFO
+  assign sm_rden = ((state_r == CTRL_BURST_WRITE) ||
+                    (state_r == CTRL_BURST_READ)) ;
+
+  // assert read flag to the adress FIFO
+  assign ctrl_af_rden = sm_rden || rd_af_flag_r;
+
+  // local reset "tree" for controller logic only. Create this to ease timing
+  // on reset path. Prohibit equivalent register removal on RST_R to prevent
+  // "sharing" with other local reset trees (caution: make sure global fanout
+  // limit is set to large enough value, otherwise SLICES may be used for
+  // fanout control on RST_R.
+  always @(posedge clk) begin
+    rst_r  <= rst;
+    rst_r1 <= rst_r;
+  end
+
+  //*****************************************************************
+  // interpret commands from Command/Address FIFO
+  //*****************************************************************
+
+  assign wr_flag = (af_valid_r2) ? ((af_cmd_r2 == 3'b000) ? 1'b1 : 1'b0): 1'b0;
+  assign rd_flag = (af_valid_r2) ? ((af_cmd_r2 == 3'b001) ? 1'b1 : 1'b0): 1'b0;
+
+  always @(posedge clk) begin
+    rd_flag_r <= rd_flag;
+    wr_flag_r <= wr_flag;
+  end
+
+  //////////////////////////////////////////////////
+  // The data from the address FIFO is fetched and
+  // stored in two register stages. The data will be
+  // pulled out of the second register stage whenever
+  // the state machine can handle new data from the
+  // address FIFO.
+
+  // This flag is asserted when there is no
+  // cmd & address in the pipe. When there is
+  // valid cmd & addr from the address FIFO the
+  // af_valid signals will be asserted. This flag will
+  // be set the cycle af_valid_r is de-asserted.
+  always @(posedge clk) begin
+    // for simulation purposes - to force CTRL_AF_RDEN low during reset
+    if (rst_r1)
+      rd_af_flag_r <= 1'd0;
+    else if((ctrl_af_rden_r) ||
+            (rd_af_flag_r && (af_valid_r || af_valid_r1)))
+         rd_af_flag_r <= 1'd0;
+    else if (~af_valid_r1 || ~af_valid_r)
+         rd_af_flag_r <= 1'd1;
+
+  end
+
+  // First register stage for the cmd & add from the FIFO.
+  // The af_valid_r signal gives the status of the data
+  // in this stage. The af_valid_r will be asserted when there
+  // is valid data. This register stage will be updated
+  // 1. read to the FIFO and the FIFO not empty
+  // 2. After write and read states
+  // 3. The valid signal is not asserted in the last stage.
+  always @(posedge clk) begin
+    if (rst_r1)begin
+      af_valid_r <= 1'd0;
+    end else begin
+      if (ctrl_af_rden_r || sm_rden_r || ~af_valid_r1
+          || ~af_valid_r2)begin
+        af_valid_r <= ctrl_af_rden_r;
+      end
+    end
+  end
+
+  // The output register in the FIFO is used. The addr
+  // and command are already registered in the FIFO.
+  assign af_addr_r = af_addr;
+  assign af_cmd_r = af_cmd;
+
+  // Second register stage for the cmd & add from the FIFO.
+  // The af_valid_r1 signal gives the status of the data
+  // in this stage. The af_valid_r will be asserted when there
+  // is valid data. This register stage will be updated
+  // 1. read to the FIFO and the FIFO not empty and there
+  // is no valid data on this stage
+  // 2. After write and read states
+  // 3. The valid signal is not asserted in the last stage.
+  always@(posedge clk) begin
+    if (rst_r1)begin
+      af_valid_r1 <= 1'd0;
+      af_addr_r1 <= {31{1'bx}};
+      af_cmd_r1 <= {3{1'bx}};
+    end else if (~af_valid_r1 || sm_rden_r ||
+                  ~af_valid_r2) begin
+      af_valid_r1 <= af_valid_r;
+      af_addr_r1 <= af_addr_r;
+      af_cmd_r1 <= af_cmd_r;
+    end
+  end
+
+  // The state machine uses the address and command in this
+  // register stage. The data is fetched from the second
+  // register stage whenever the state machine can accept new
+  // addr. The conflict flags are also generated based on the
+  // second register stage and updated when the new address
+  // is loaded for the state machine.
+  always@(posedge clk) begin
+    if (rst_r1)begin
+      af_valid_r2 <= 1'd0;
+      af_addr_r2 <= {31{1'bx}};
+      af_cmd_r2 <= {3{1'bx}};
+      bank_hit_r <= {OPEN_BANK_NUM{1'bx}};
+      bank_conflict_r <= 1'bx;
+      row_conflict_r <= 4'bx;
+    end else if(sm_rden || ~af_valid_r2)begin
+      af_valid_r2 <= af_valid_r1;
+      af_addr_r2 <= af_addr_r1;
+      af_cmd_r2 <= af_cmd_r1;
+      if(MULTI_BANK_EN)begin
+        bank_hit_r <= bank_hit;
+        row_conflict_r <= row_miss;
+        bank_conflict_r <= (~(|bank_hit));
+      end else begin
+        bank_hit_r <= {OPEN_BANK_NUM{1'b0}};
+        bank_conflict_r <= 1'd0;
+        row_conflict_r[0] <= (af_addr_r1[CS_RANGE_END:ROW_RANGE_START]
+                              != sb_open_add_r[CMP_WIDTH-1:0]);
+      end
+    end
+  end // always@ (posedge clk)
+
+  //detecting cs change for multi chip select case
+  generate
+    if(CS_NUM > 1) begin: gen_cs_change
+       always @(posedge clk) begin
+          if(sm_rden || ~af_valid_r2)begin
+            cs_change_r <= af_addr_r1[CS_RANGE_END:CS_RANGE_START] !=
+                       af_addr_r2[CS_RANGE_END:CS_RANGE_START] ;
+            cs_change_sticky_r <=
+             af_addr_r1[CS_RANGE_END:CS_RANGE_START] !=
+             af_addr_r2[CS_RANGE_END:CS_RANGE_START] ;
+          end else
+            cs_change_r <= 1'd0;
+       end
+    end // block: gen_cs_change
+    else begin: gen_cs_0
+       always @(posedge clk) begin
+          cs_change_r <= 1'd0;
+          cs_change_sticky_r <= 1'd0;
+       end
+    end
+ endgenerate
+
+  assign conflict_detect = (MULTI_BANK_EN) ?
+                           ((|(row_conflict_r[3:0] & bank_hit_r[3:0]))
+                            | bank_conflict_r) & af_valid_r2 :
+                           row_conflict_r[0] & af_valid_r2;
+
+  always @(posedge clk) begin
+    conflict_detect_r <= conflict_detect;
+    sm_rden_r <= sm_rden;
+    af_addr_r3 <= af_addr_r2;
+    ctrl_af_rden_r <= ctrl_af_rden & ~af_empty;
+  end
+
+  // conflict resolved signal. When this signal is asserted
+  // the conflict is resolved. The address to be compared
+  // for the conflict_resolved_r will be stored in act_add_r
+  // when the bank is opened.
+  always @(posedge clk) begin
+   conflict_resolved_r <= (act_addr_r ==
+                           af_addr_r2[CS_RANGE_END:ROW_RANGE_START]);
+    if((state_r == CTRL_ACTIVE))
+      act_addr_r <= af_addr_r2[CS_RANGE_END:ROW_RANGE_START];
+  end
+
+  //***************************************************************************
+  // Bank management logic
+  // Semi-hardcoded for now for 4 banks
+  // will keep multiple banks open if MULTI_BANK_EN is true.
+  //***************************************************************************
+
+  genvar bank_i;
+  generate // if multiple bank option chosen
+    if(MULTI_BANK_EN) begin: gen_multi_bank_open
+
+      for (bank_i = 0; bank_i < OPEN_BANK_NUM;
+           bank_i = bank_i + 1) begin: gen_bank_hit1
+        // asserted if bank address match + open bank entry is valid
+        always @(*) begin
+          bank_hit[bank_i]
+            = ((bank_cmp_addr_r[(CMP_WIDTH*(bank_i+1))-1:
+                                (CMP_WIDTH*bank_i)+ROW_WIDTH] ==
+                af_addr_r1[CS_RANGE_END:BANK_RANGE_START]) &&
+               bank_valid_r[bank_i]);
+          // asserted if row address match (no check for bank entry valid, rely
+          // on this term to be used in conjunction with BANK_HIT[])
+          row_miss[bank_i]
+            = (bank_cmp_addr_r[(CMP_WIDTH*bank_i)+ROW_WIDTH-1:
+                               (CMP_WIDTH*bank_i)] !=
+               af_addr_r1[ROW_RANGE_END:ROW_RANGE_START]);
+        end
+      end
+
+      always @(posedge clk) begin
+        no_precharge_wait_r  <= bank_valid_r[3] & bank_conflict_r;
+        bank_hit_r1 <= bank_hit_r;
+      end
+
+      always@(*)
+        no_precharge_r = ~bank_valid_r[3] & bank_conflict_r;
+
+      always@(posedge clk)
+        no_precharge_r1 <= no_precharge_r;
+
+
+      always @(posedge clk) begin
+        // Clear all bank valid bits during AR (i.e. since all banks get
+        // precharged during auto-refresh)
+        if ((state_r1 == CTRL_AUTO_REFRESH)) begin
+          bank_valid_r    <= {(OPEN_BANK_NUM-1){1'b0}};
+          bank_cmp_addr_r <= {(OPEN_BANK_NUM*CMP_WIDTH-1){1'b0}};
+        end else begin
+          if (state_r1 == CTRL_ACTIVE) begin
+            // 00 is always going to have the latest bank and row.
+            bank_cmp_addr_r[CMP_WIDTH-1:0]
+              <= af_addr_r3[CS_RANGE_END:ROW_RANGE_START];
+            // This indicates the bank was activated
+            bank_valid_r[0] <= 1'b1;
+
+            case ({bank_hit_r1[2:0]})
+              3'b001: begin
+                bank_cmp_addr_r[CMP_WIDTH-1:0]
+                  <= af_addr_r3[CS_RANGE_END:ROW_RANGE_START];
+                // This indicates the bank was activated
+                bank_valid_r[0] <= 1'b1;
+              end
+              3'b010: begin //(b0->b1)
+                bank_cmp_addr_r[(2*CMP_WIDTH)-1:CMP_WIDTH]
+                  <= bank_cmp_addr_r[CMP_WIDTH-1:0];
+                bank_valid_r[1] <= bank_valid_r[0];
+              end
+              3'b100:begin //(b0->b1, b1->b2)
+                bank_cmp_addr_r[(2*CMP_WIDTH)-1:CMP_WIDTH]
+                  <= bank_cmp_addr_r[CMP_WIDTH-1:0];
+                bank_cmp_addr_r[(3*CMP_WIDTH)-1:2*CMP_WIDTH]
+                  <= bank_cmp_addr_r[(2*CMP_WIDTH)-1:CMP_WIDTH];
+                bank_valid_r[1] <= bank_valid_r[0];
+                bank_valid_r[2] <= bank_valid_r[1];
+              end
+              default: begin //(b0->b1, b1->b2, b2->b3)
+                bank_cmp_addr_r[(2*CMP_WIDTH)-1:CMP_WIDTH]
+                  <= bank_cmp_addr_r[CMP_WIDTH-1:0];
+                bank_cmp_addr_r[(3*CMP_WIDTH)-1:2*CMP_WIDTH]
+                  <= bank_cmp_addr_r[(2*CMP_WIDTH)-1:CMP_WIDTH];
+                bank_cmp_addr_r[(4*CMP_WIDTH)-1:3*CMP_WIDTH]
+                  <= bank_cmp_addr_r[(3*CMP_WIDTH)-1:2*CMP_WIDTH];
+                bank_valid_r[1] <= bank_valid_r[0];
+                bank_valid_r[2] <= bank_valid_r[1];
+                bank_valid_r[3] <= bank_valid_r[2];
+              end
+            endcase
+          end
+        end
+      end
+    end else begin: gen_single_bank_open // single bank option
+      always @(posedge clk) begin
+        no_precharge_r       <= 1'd0;
+        no_precharge_r1      <= 1'd0;
+        no_precharge_wait_r  <= 1'd0;
+        if (rst_r1)
+          sb_open_add_r <= {CMP_WIDTH{1'b0}};
+        else if (state_r == CTRL_ACTIVE)
+          sb_open_add_r <= af_addr_r2[CS_RANGE_END:ROW_RANGE_START];
+      end
+    end
+  endgenerate
+
+  //***************************************************************************
+  // Timing counters
+  //***************************************************************************
+
+  //*****************************************************************
+  // Write and read enable generation for PHY
+  //*****************************************************************
+
+  // write burst count. Counts from (BL/2 to 1).
+  // Also logic for controller write enable.
+  always @(posedge clk) begin
+    if (state_r == CTRL_BURST_WRITE) begin
+      wrburst_cnt_r <= BURST_LEN_DIV2;
+    end else if (wrburst_cnt_r >= 3'd1)
+      wrburst_cnt_r <= wrburst_cnt_r - 1;
+  end // always @ (posedge clk)
+
+
+  always @(posedge clk) begin
+    if (rst_r1) begin
+      ctrl_wren   <= 1'b0;
+    end else if (state_r == CTRL_BURST_WRITE) begin
+      ctrl_wren   <= 1'b1;
+    end else if (wrburst_wren_ok_r)
+      ctrl_wren   <= 1'b0;
+  end
+
+
+  always @(posedge clk) begin
+    if ((state_r == CTRL_BURST_WRITE)
+        && (BURST_LEN_DIV2 > 2))
+      wrburst_ok_r <= 1'd0;
+    else if ((wrburst_cnt_r <= 3'd3) ||
+             (BURST_LEN_DIV2 <= 2))
+      wrburst_ok_r <= 1'b1;
+  end
+
+  // flag to check when wrburst count has reached
+  // a value of 1. This flag is used in the ctrl_wren
+  // logic
+  always @(posedge clk) begin
+     if(wrburst_cnt_r == 3'd2)
+       wrburst_wren_ok_r <=1'b1;
+     else
+       wrburst_wren_ok_r <= 1'b0;
+  end
+
+
+  // read burst count. Counts from (BL/2 to 1)
+  always @(posedge clk) begin
+   if (state_r == CTRL_BURST_READ) begin
+      rdburst_cnt_r <= BURST_LEN_DIV2;
+    end else if (rdburst_cnt_r >= 3'd1)
+      rdburst_cnt_r <= rdburst_cnt_r - 1;
+  end // always @ (posedge clk)
+
+
+   always @(posedge clk) begin
+    if (rst_r1) begin
+      ctrl_rden   <= 1'b0;
+    end else if (state_r == CTRL_BURST_READ) begin
+      ctrl_rden   <= 1'b1;
+    end else if (rdburst_rden_ok_r)
+      ctrl_rden   <= 1'b0;
+   end
+
+  // the rd_burst_ok_r signal will be asserted one cycle later
+  // in multi chip select cases if the back to back read is to
+  // different chip selects. The cs_changed_sticky_r signal will
+  // be asserted only for multi chip select cases.
+  always @(posedge clk) begin
+    if ((state_r == CTRL_BURST_READ)
+        && (BURST_LEN_DIV2 > 2))
+      rdburst_ok_r <= 1'd0;
+    else if ((rdburst_cnt_r <=( 3'd3 - cs_change_sticky_r)) ||
+             (BURST_LEN_DIV2 <= 2))
+      rdburst_ok_r <= 1'b1;
+  end
+
+  // flag to check when rdburst count has reached
+  // a value of 1. This flag is used in the ctrl_rden
+  // logic
+  always @(posedge clk) begin
+     if (rdburst_cnt_r == 3'd2)
+       rdburst_rden_ok_r <= 1'b1;
+     else
+       rdburst_rden_ok_r <= 1'b0;
+  end
+
+
+  //*****************************************************************
+  // Various delay counters
+  // The counters are checked for value of <= 3 to determine the
+  // if the count values are reached during different commands.
+  // It is checked for 3 because
+  // 1. The counters are loaded during the state when the command
+  //    state is reached (+1)
+  // 2. After the <= 3 condition is reached the sm takes two cycles
+  //    to transition to the new command state (+2)
+  //*****************************************************************
+
+  // tRP count - precharge command period
+  always @(posedge clk) begin
+    if (state_r == CTRL_PRECHARGE)
+      rp_cnt_r <= TRP_COUNT;
+    else if (rp_cnt_r != 4'd0)
+      rp_cnt_r <= rp_cnt_r - 1;
+  end
+
+  always @(posedge clk) begin
+    if (state_r == CTRL_PRECHARGE)
+      rp_cnt_ok_r <= 1'd0;
+    else if (rp_cnt_r <= 4'd3)
+      rp_cnt_ok_r <= 1'd1;
+  end
+
+  // tRFC count - refresh-refresh, refresh-active
+  always @(posedge clk) begin
+    if (state_r == CTRL_AUTO_REFRESH)
+      rfc_cnt_r <= TRFC_COUNT;
+    else if (rfc_cnt_r != 8'd0)
+      rfc_cnt_r <= rfc_cnt_r - 1;
+  end
+
+  always @(posedge clk) begin
+    if (state_r == CTRL_AUTO_REFRESH)
+      rfc_ok_r <= 1'b0;
+    else if(rfc_cnt_r <= 8'd3)
+      rfc_ok_r <= 1'b1;
+  end
+
+  // tRCD count - active to read/write
+  always @(posedge clk) begin
+    if (state_r == CTRL_ACTIVE)
+      rcd_cnt_r <= TRCD_COUNT;
+    else if (rcd_cnt_r != 4'd0)
+      rcd_cnt_r <= rcd_cnt_r - 1;
+  end
+
+  always @(posedge clk) begin
+    if ((state_r == CTRL_ACTIVE)
+        && (TRCD_COUNT > 2))
+      rcd_cnt_ok_r <= 1'd0;
+    else if (rcd_cnt_r <= 4'd3)
+      rcd_cnt_ok_r <= 1;
+  end
+
+  // tRRD count - active to active
+  always @(posedge clk) begin
+    if (state_r == CTRL_ACTIVE)
+      trrd_cnt_r <= TRRD_COUNT;
+    else if (trrd_cnt_r != 3'd0)
+      trrd_cnt_r <= trrd_cnt_r - 1;
+  end
+
+  always @(posedge clk) begin
+    if (state_r == CTRL_ACTIVE)
+      trrd_cnt_ok_r <= 1'd0;
+    else if (trrd_cnt_r <= 3'd3)
+      trrd_cnt_ok_r <= 1;
+  end
+
+  // tRAS count - active to precharge
+  always @(posedge clk) begin
+    if (state_r == CTRL_ACTIVE)
+      ras_cnt_r <= TRAS_COUNT;
+    else if (ras_cnt_r != 5'd0)
+      ras_cnt_r <= ras_cnt_r - 1;
+  end
+
+  // counter for write to prcharge
+  // read to precharge and
+  // activate to precharge
+  // precharge_ok_cnt_r is added with trtp count,
+  // there can be cases where the sm can go from
+  // activate to read and the act->pre count time
+  // would not have been satisfied. The rd->pre
+   // time is very less. wr->pre time is almost the
+   // same as act-> pre
+  always @(posedge clk) begin
+    if (state_r == CTRL_BURST_READ) begin
+      // assign only if the cnt is < TRTP_COUNT
+      if (precharge_ok_cnt_r < TRTP_COUNT)
+        precharge_ok_cnt_r <= TRTP_COUNT;
+    end else if (state_r == CTRL_BURST_WRITE)
+      precharge_ok_cnt_r <= TWR_COUNT;
+    else if (state_r == CTRL_ACTIVE)
+      precharge_ok_cnt_r <= TRAS_COUNT;
+    else if (precharge_ok_cnt_r != 5'd0)
+      precharge_ok_cnt_r <= precharge_ok_cnt_r - 1;
+  end
+
+  always @(posedge clk) begin
+    if ((state_r == CTRL_BURST_READ) ||
+        (state_r == CTRL_BURST_WRITE)||
+        (state_r == CTRL_ACTIVE))
+      precharge_ok_r <= 1'd0;
+    else if(precharge_ok_cnt_r <= 5'd3)
+      precharge_ok_r <=1'd1;
+  end
+
+  // write to read counter
+  // write to read includes : write latency + burst time + tWTR
+  always @(posedge clk) begin
+    if (rst_r1)
+      wr_to_rd_cnt_r <= 5'd0;
+    else if (state_r == CTRL_BURST_WRITE)
+      wr_to_rd_cnt_r <= (TWTR_COUNT);
+    else if (wr_to_rd_cnt_r != 5'd0)
+      wr_to_rd_cnt_r <= wr_to_rd_cnt_r - 1;
+  end
+
+  always @(posedge clk) begin
+    if (state_r == CTRL_BURST_WRITE)
+      wr_to_rd_ok_r <= 1'd0;
+    else if (wr_to_rd_cnt_r <= 5'd3)
+      wr_to_rd_ok_r <= 1'd1;
+  end
+
+  // read to write counter
+  always @(posedge clk) begin
+    if (rst_r1)
+      rd_to_wr_cnt_r <= 5'd0;
+    else if (state_r == CTRL_BURST_READ)
+      rd_to_wr_cnt_r <= (TRTW_COUNT);
+    else if (rd_to_wr_cnt_r != 5'd0)
+      rd_to_wr_cnt_r <= rd_to_wr_cnt_r - 1;
+  end
+
+  always @(posedge clk) begin
+    if (state_r == CTRL_BURST_READ)
+      rd_to_wr_ok_r <= 1'b0;
+    else if (rd_to_wr_cnt_r <= 5'd3)
+      rd_to_wr_ok_r <= 1'b1;
+  end
+
+  always @(posedge clk) begin
+     if(refi_cnt_r == (TREFI_COUNT -1))
+       refi_cnt_ok_r <= 1'b1;
+     else
+       refi_cnt_ok_r <= 1'b0;
+  end
+
+  // auto refresh interval counter in refresh_clk domain
+  always @(posedge clk) begin
+    if ((rst_r1) || (refi_cnt_ok_r))  begin
+      refi_cnt_r <= 12'd0;
+    end else begin
+      refi_cnt_r <= refi_cnt_r + 1;
+    end
+  end // always @ (posedge clk)
+
+  // auto refresh flag
+  always @(posedge clk) begin
+    if (refi_cnt_ok_r) begin
+      ref_flag_r <= 1'b1;
+    end else begin
+      ref_flag_r <= 1'b0;
+    end
+  end // always @ (posedge clk)
+
+  assign ctrl_ref_flag = ref_flag_r;
+
+  //refresh flag detect
+  //auto_ref high indicates auto_refresh requirement
+  //auto_ref is held high until auto refresh command is issued.
+  always @(posedge clk)begin
+    if (rst_r1)
+      auto_ref_r <= 1'b0;
+    else if (ref_flag_r)
+      auto_ref_r <= 1'b1;
+    else if (state_r == CTRL_AUTO_REFRESH)
+      auto_ref_r <= 1'b0;
+  end
+
+
+  // keep track of which chip selects got auto-refreshed (avoid auto-refreshing
+  // all CS's at once to avoid current spike)
+  always @(posedge clk)begin
+    if (rst_r1 || (state_r1 == CTRL_PRECHARGE))
+      auto_cnt_r <= 'd0;
+    else if (state_r1 == CTRL_AUTO_REFRESH)
+      auto_cnt_r <= auto_cnt_r + 1;
+  end
+
+  // register for timing purposes. Extra delay doesn't really matter
+  always @(posedge clk)
+    phy_init_done_r <= phy_init_done;
+
+  always @(posedge clk)begin
+    if (rst_r1) begin
+      state_r    <= CTRL_IDLE;
+      state_r1 <= CTRL_IDLE;
+    end else begin
+      state_r    <= next_state;
+      state_r1 <= state_r;
+    end
+  end
+
+  //***************************************************************************
+  // main control state machine
+  //***************************************************************************
+
+  always @(*) begin
+    next_state = state_r;
+    wr_fifo_burst = 1'b0;
+    
+    (* full_case, parallel_case *) case (state_r)
+      CTRL_IDLE: begin
+        // perform auto refresh as soon as we are done with calibration.
+        // The calibration logic does not do any refreshes.
+        if (phy_init_done_r)
+          next_state = CTRL_AUTO_REFRESH;
+      end
+
+      CTRL_PRECHARGE: begin
+        if (auto_ref_r)
+          next_state = CTRL_PRECHARGE_WAIT1;
+        // when precharging an LRU bank, do not have to go to wait state
+        // since we can't possibly be activating row in same bank next
+        // disabled for 2t timing. There needs to be a gap between cmds
+        // in 2t timing
+        else if (no_precharge_wait_r && !TWO_T_TIME_EN)
+          next_state = CTRL_ACTIVE;
+        else
+          next_state = CTRL_PRECHARGE_WAIT;
+      end
+
+      CTRL_PRECHARGE_WAIT:begin
+        if (rp_cnt_ok_r)begin
+          if (auto_ref_r)
+            // precharge again to make sure we close all the banks
+            next_state = CTRL_PRECHARGE;
+          else
+            next_state = CTRL_ACTIVE;
+        end
+      end
+
+      CTRL_PRECHARGE_WAIT1:
+        if (rp_cnt_ok_r)
+          next_state = CTRL_AUTO_REFRESH;
+
+      CTRL_AUTO_REFRESH:
+        next_state = CTRL_AUTO_REFRESH_WAIT;
+
+      CTRL_AUTO_REFRESH_WAIT:
+      //staggering Auto refresh for multi
+      // chip select designs. The SM waits
+      // for the rfc time before issuing the
+      // next auto refresh.
+        if (auto_cnt_r < (CS_NUM))begin
+           if (rfc_ok_r )
+              next_state = CTRL_AUTO_REFRESH;
+           end else if (rfc_ok_r)begin
+              if(auto_ref_r)
+                // MIG 2.3: For deep designs if Auto Refresh
+                // flag asserted immediately after calibration is completed
+                next_state = CTRL_PRECHARGE;
+              else if  ( wr_flag || rd_flag)
+                next_state = CTRL_ACTIVE;
+            end
+
+      CTRL_ACTIVE:
+        next_state = CTRL_ACTIVE_WAIT;
+
+      CTRL_ACTIVE_WAIT: begin
+        if (rcd_cnt_ok_r) begin
+          if ((conflict_detect_r && ~conflict_resolved_r) ||
+              auto_ref_r) begin
+            if (no_precharge_r1 && ~auto_ref_r && trrd_cnt_ok_r)
+              next_state = CTRL_ACTIVE;
+            else  if(precharge_ok_r)
+              next_state = CTRL_PRECHARGE;
+          end else if ((wr_flag_r) && (rd_to_wr_ok_r) && wr_fifo_clear) begin
+            next_state = CTRL_BURST_WRITE;
+            wr_fifo_burst = 1'b1;
+          end else if ((rd_flag_r)&& (wr_to_rd_ok_r) && rd_fifo_clear)
+            next_state = CTRL_BURST_READ;
+        end
+      end
+
+      // beginning of write burst
+      CTRL_BURST_WRITE: begin
+        if (BURST_LEN_DIV2 == 1) begin
+          // special case if BL = 2 (i.e. burst lasts only one clk cycle)
+          if (wr_flag && wr_fifo_clear) begin
+            // if we have another non-conflict write command right after the
+            // current write, then stay in this state
+            next_state = CTRL_BURST_WRITE;
+            wr_fifo_burst = 1'b1;
+          end else
+            // otherwise, if we're done with this burst, and have no write
+            // immediately scheduled after this one, wait until write-read
+            // delay has passed
+            next_state = CTRL_WRITE_WAIT;
+        end else
+          // otherwise BL > 2, and we  have at least one more write cycle for
+          // current burst
+          next_state = CTRL_WRITE_WAIT;
+        // continuation of write burst (also covers waiting after write burst
+        // has completed for write-read delay to pass)
+      end
+
+      CTRL_WRITE_WAIT: begin
+        if ((conflict_detect) || auto_ref_r) begin
+          if (no_precharge_r && ~auto_ref_r && wrburst_ok_r)
+            next_state = CTRL_ACTIVE;
+          else if (precharge_ok_r)
+            next_state = CTRL_PRECHARGE;
+        end else if (wrburst_ok_r && wr_flag && wr_fifo_clear) begin
+          next_state = CTRL_BURST_WRITE;
+          wr_fifo_burst = 1'b1;
+        end else if ((rd_flag) && (wr_to_rd_ok_r) && rd_fifo_clear)
+          next_state = CTRL_BURST_READ;
+      end
+
+      CTRL_BURST_READ: begin
+        if (BURST_LEN_DIV2 == 1) begin
+          // special case if BL = 2 (i.e. burst lasts only one clk cycle)
+          if (rd_flag && rd_fifo_clear)
+            next_state = CTRL_BURST_READ;
+          else
+            next_state = CTRL_READ_WAIT;
+        end else
+          next_state = CTRL_READ_WAIT;
+      end
+
+      CTRL_READ_WAIT: begin
+        if ((conflict_detect) || auto_ref_r)begin
+          if (no_precharge_r && ~auto_ref_r && rdburst_ok_r)
+            next_state = CTRL_ACTIVE;
+          else if (precharge_ok_r)
+            next_state = CTRL_PRECHARGE;
+        // for burst of 4 in multi chip select
+        // if there is a change in cs wait one cycle before the
+        // next read command. cs_change_r will be asserted.
+        end else if (rdburst_ok_r  && rd_flag && ~cs_change_r && rd_fifo_clear)
+          next_state = CTRL_BURST_READ;
+        else if (wr_flag && (rd_to_wr_ok_r) && wr_fifo_clear) begin
+          next_state = CTRL_BURST_WRITE;
+          wr_fifo_burst = 1'b1;
+        end
+      end
+    endcase
+  end
+
+  //***************************************************************************
+  // control signals to memory
+  //***************************************************************************
+
+  always @(posedge clk) begin
+     if ((state_r == CTRL_AUTO_REFRESH) ||
+         (state_r == CTRL_ACTIVE) ||
+         (state_r == CTRL_PRECHARGE)) begin
+       ddr_ras_n_r <= 1'b0;
+       two_t_enable_r[0] <= 1'b0;
+     end else begin
+       if (TWO_T_TIME_EN)
+         ddr_ras_n_r <= two_t_enable_r[0] ;
+       else
+         ddr_ras_n_r <= 1'd1;
+       two_t_enable_r[0] <= 1'b1;
+     end
+  end
+
+  always @(posedge clk)begin
+    if ((state_r == CTRL_BURST_WRITE) ||
+        (state_r == CTRL_BURST_READ) ||
+        (state_r == CTRL_AUTO_REFRESH)) begin
+      ddr_cas_n_r <= 1'b0;
+      two_t_enable_r[1] <= 1'b0;
+    end else begin
+      if (TWO_T_TIME_EN)
+        ddr_cas_n_r <= two_t_enable_r[1];
+      else
+        ddr_cas_n_r <= 1'b1;
+      two_t_enable_r[1] <= 1'b1;
+    end
+  end
+
+  always @(posedge clk) begin
+    if ((state_r == CTRL_BURST_WRITE) ||
+        (state_r == CTRL_PRECHARGE)) begin
+      ddr_we_n_r <= 1'b0;
+      two_t_enable_r[2] <= 1'b0;
+    end else begin
+      if(TWO_T_TIME_EN)
+        ddr_we_n_r <= two_t_enable_r[2];
+      else
+        ddr_we_n_r <= 1'b1;
+      two_t_enable_r[2] <= 1'b1;
+    end
+  end
+
+  // turn off auto-precharge when issuing commands (A10 = 0)
+  // mapping the col add for linear addressing.
+  generate
+    if (TWO_T_TIME_EN) begin: gen_addr_col_two_t
+      if (COL_WIDTH == ROW_WIDTH-1) begin: gen_ddr_addr_col_0
+        assign ddr_addr_col = {af_addr_r3[COL_WIDTH-1:10], 1'b0,
+                               af_addr_r3[9:0]};
+      end else begin
+        if (COL_WIDTH > 10) begin: gen_ddr_addr_col_1
+          assign ddr_addr_col = {{(ROW_WIDTH-COL_WIDTH-1){1'b0}},
+                                 af_addr_r3[COL_WIDTH-1:10], 1'b0,
+                                 af_addr_r3[9:0]};
+        end else begin: gen_ddr_addr_col_2
+          assign ddr_addr_col = {{(ROW_WIDTH-COL_WIDTH-1){1'b0}}, 1'b0,
+                               af_addr_r3[COL_WIDTH-1:0]};
+        end
+      end
+    end else begin: gen_addr_col_one_t
+      if (COL_WIDTH == ROW_WIDTH-1) begin: gen_ddr_addr_col_0_1
+        assign ddr_addr_col = {af_addr_r2[COL_WIDTH-1:10], 1'b0,
+                               af_addr_r2[9:0]};
+      end else begin
+        if (COL_WIDTH > 10) begin: gen_ddr_addr_col_1_1
+          assign ddr_addr_col = {{(ROW_WIDTH-COL_WIDTH-1){1'b0}},
+                                 af_addr_r2[COL_WIDTH-1:10], 1'b0,
+                                 af_addr_r2[9:0]};
+        end else begin: gen_ddr_addr_col_2_1
+          assign ddr_addr_col = {{(ROW_WIDTH-COL_WIDTH-1){1'b0}}, 1'b0,
+                                 af_addr_r2[COL_WIDTH-1:0]};
+        end
+      end
+    end
+  endgenerate
+
+  // Assign address during row activate
+  generate
+    if (TWO_T_TIME_EN)
+      assign ddr_addr_row = af_addr_r3[ROW_RANGE_END:ROW_RANGE_START];
+    else
+      assign ddr_addr_row = af_addr_r2[ROW_RANGE_END:ROW_RANGE_START];
+  endgenerate
+
+
+  always @(posedge clk)begin
+    if ((state_r == CTRL_ACTIVE) ||
+        ((state_r1 == CTRL_ACTIVE) && TWO_T_TIME_EN))
+      ddr_addr_r <= ddr_addr_row;
+    else if ((state_r == CTRL_BURST_WRITE) ||
+             (state_r == CTRL_BURST_READ)  ||
+             (((state_r1 == CTRL_BURST_WRITE) ||
+               (state_r1 == CTRL_BURST_READ)) &&
+              TWO_T_TIME_EN))
+      ddr_addr_r <= ddr_addr_col;
+    else if (((state_r == CTRL_PRECHARGE)  ||
+              ((state_r1 == CTRL_PRECHARGE) && TWO_T_TIME_EN))
+             && auto_ref_r) begin
+      // if we're precharging as a result of AUTO-REFRESH, precharge all banks
+      ddr_addr_r <= {ROW_WIDTH{1'b0}};
+      ddr_addr_r[10] <= 1'b1;
+    end else if ((state_r == CTRL_PRECHARGE) ||
+                 ((state_r1 == CTRL_PRECHARGE) && TWO_T_TIME_EN))
+      // if we're precharging to close a specific bank/row, set A10=0
+      ddr_addr_r <= {ROW_WIDTH{1'b0}};
+    else
+      ddr_addr_r <= {ROW_WIDTH{1'bx}};
+  end
+
+  always @(posedge clk)begin
+    // whenever we're precharging, we're either: (1) precharging all banks (in
+    // which case banks bits are don't care, (2) precharging the LRU bank,
+    // b/c we've exceeded the limit of # of banks open (need to close the LRU
+    // bank to make room for a new one), (3) we haven't exceed the maximum #
+    // of banks open, but we trying to open a different row in a bank that's
+    // already open
+    if (((state_r == CTRL_PRECHARGE)  ||
+         ((state_r1 == CTRL_PRECHARGE) && TWO_T_TIME_EN)) &&
+        bank_conflict_r && MULTI_BANK_EN)
+      // When LRU bank needs to be closed
+      ddr_ba_r <= bank_cmp_addr_r[(3*CMP_WIDTH)+CMP_BANK_RANGE_END:
+                                  (3*CMP_WIDTH)+CMP_BANK_RANGE_START];
+    else begin
+      // Either precharge due to refresh or bank hit case
+      if (TWO_T_TIME_EN)
+        ddr_ba_r <= af_addr_r3[BANK_RANGE_END:BANK_RANGE_START];
+      else
+        ddr_ba_r <= af_addr_r2[BANK_RANGE_END:BANK_RANGE_START];
+    end
+  end
+
+  // chip enable generation logic
+  generate
+    // if only one chip select, always assert it after reset
+    if (CS_BITS == 0) begin: gen_ddr_cs_0
+      always @(posedge clk)
+        if (rst_r1)
+          ddr_cs_n_r[0] <= 1'b1;
+        else
+          ddr_cs_n_r[0] <= 1'b0;
+    // otherwise if we have multiple chip selects
+      end else begin: gen_ddr_cs_1
+      if(TWO_T_TIME_EN) begin: gen_2t_cs
+         always @(posedge clk)
+           if (rst_r1)
+             ddr_cs_n_r <= {CS_NUM{1'b1}};
+           else if ((state_r1 == CTRL_AUTO_REFRESH)) begin
+             // if auto-refreshing, only auto-refresh one CS at any time (avoid
+             // beating on the ground plane by refreshing all CS's at same time)
+             ddr_cs_n_r <= {CS_NUM{1'b1}};
+             ddr_cs_n_r[auto_cnt_r] <= 1'b0;
+           end else if (auto_ref_r && (state_r1 == CTRL_PRECHARGE)) begin
+             ddr_cs_n_r <= {CS_NUM{1'b0}};
+           end else if ((state_r1 == CTRL_PRECHARGE) && ( bank_conflict_r
+                    && MULTI_BANK_EN))begin
+                  // precharging the LRU bank
+                  ddr_cs_n_r <= {CS_NUM{1'b1}};
+                  ddr_cs_n_r[bank_cmp_addr_r[(3*CMP_WIDTH)+CMP_CS_RANGE_END:
+                  (3*CMP_WIDTH)+CMP_CS_RANGE_START]] <= 1'b0;
+           end else begin
+          // otherwise, check the upper address bits to see which CS to assert
+             ddr_cs_n_r <= {CS_NUM{1'b1}};
+             ddr_cs_n_r[af_addr_r3[CS_RANGE_END:CS_RANGE_START]] <= 1'b0;
+           end // else: !if(((state_r == CTRL_PRECHARGE)  ||...
+        end else begin: gen_1t_cs // block: gen_2t_cs
+         always @(posedge clk)
+           if (rst_r1)
+             ddr_cs_n_r <= {CS_NUM{1'b1}};
+           else if ((state_r == CTRL_AUTO_REFRESH) ) begin
+             // if auto-refreshing, only auto-refresh one CS at any time (avoid
+             // beating on the ground plane by refreshing all CS's at same time)
+             ddr_cs_n_r <= {CS_NUM{1'b1}};
+             ddr_cs_n_r[auto_cnt_r] <= 1'b0;
+           end else if (auto_ref_r && (state_r == CTRL_PRECHARGE) ) begin
+             ddr_cs_n_r <= {CS_NUM{1'b0}};
+           end else if ((state_r == CTRL_PRECHARGE)  &&
+                 (bank_conflict_r && MULTI_BANK_EN))begin
+                  // precharging the LRU bank
+                  ddr_cs_n_r <= {CS_NUM{1'b1}};
+                  ddr_cs_n_r[bank_cmp_addr_r[(3*CMP_WIDTH)+CMP_CS_RANGE_END:
+                  (3*CMP_WIDTH)+CMP_CS_RANGE_START]] <= 1'b0;
+           end else begin
+          // otherwise, check the upper address bits to see which CS to assert
+             ddr_cs_n_r <= {CS_NUM{1'b1}};
+             ddr_cs_n_r[af_addr_r2[CS_RANGE_END:CS_RANGE_START]] <= 1'b0;
+           end // else: !if(((state_r == CTRL_PRECHARGE)  ||...
+        end // block: gen_1t_cs
+    end
+  endgenerate
+
+  // registring the two_t timing enable signal.
+  // This signal will be asserted (low) when the
+  // chip select has to be asserted.
+  always @(posedge clk)begin
+     if(&two_t_enable_r)
+        two_t_enable_r1 <= {CS_NUM{1'b1}};
+     else
+        two_t_enable_r1 <= {CS_NUM{1'b0}};
+  end
+
+  assign ctrl_addr  = ddr_addr_r;
+  assign ctrl_ba    = ddr_ba_r;
+  assign ctrl_ras_n = ddr_ras_n_r;
+  assign ctrl_cas_n = ddr_cas_n_r;
+  assign ctrl_we_n  = ddr_we_n_r;
+  assign ctrl_cs_n  = (TWO_T_TIME_EN) ?
+                      (ddr_cs_n_r | two_t_enable_r1) :
+                      ddr_cs_n_r;
+
+endmodule
+
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_idelay_ctrl.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_idelay_ctrl.v
new file mode 100644 (file)
index 0000000..fea0025
--- /dev/null
@@ -0,0 +1,97 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+// 
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a 
+// license to use this text/file solely for design, simulation, 
+// implementation and creation of design files limited 
+// to Xilinx devices or technologies. Use with non-Xilinx 
+// devices or technologies is expressly prohibited and 
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information 
+// "as-is" solely for use in developing programs and 
+// solutions for Xilinx devices, with no obligation on the 
+// part of Xilinx to provide support. By providing this design, 
+// code, or information as one possible implementation of 
+// this feature, application or standard, Xilinx is making no 
+// representation that this implementation is free from any 
+// claims of infringement. You are responsible for 
+// obtaining any rights you may require for your implementation. 
+// Xilinx expressly disclaims any warranty whatsoever with 
+// respect to the adequacy of the implementation, including 
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied 
+// warranties of merchantability or fitness for a particular 
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are 
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part 
+// of this text at all times. 
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_idelay_ctrl.v
+// /___/   /\     Date Last Modified: $Date: 2008/05/08 15:20:47 $
+// \   \  /  \    Date Created: Wed Aug 16 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   This module instantiates the IDELAYCTRL primitive of the Virtex-5 device
+//   which continuously calibrates the IDELAY elements in the region in case of
+//   varying operating conditions. It takes a 200MHz clock as an input
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_idelay_ctrl #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference 
+   // board design). Actual values may be different. Actual parameters values 
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter IDELAYCTRL_NUM  = 4
+   )
+
+  (
+   input  clk200,
+   input  rst200,
+   output idelay_ctrl_rdy
+   );
+
+wire [IDELAYCTRL_NUM-1 : 0] idelay_ctrl_rdy_i;
+
+genvar bnk_i;
+generate
+for(bnk_i=0; bnk_i<IDELAYCTRL_NUM; bnk_i=bnk_i+1)begin : IDELAYCTRL_INST
+IDELAYCTRL u_idelayctrl
+  (
+   .RDY(idelay_ctrl_rdy_i[bnk_i]),
+   .REFCLK(clk200),
+   .RST(rst200)
+   );
+end
+endgenerate
+
+assign idelay_ctrl_rdy = &idelay_ctrl_rdy_i;
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_infrastructure.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_infrastructure.v
new file mode 100644 (file)
index 0000000..6420010
--- /dev/null
@@ -0,0 +1,156 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+// 
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a 
+// license to use this text/file solely for design, simulation, 
+// implementation and creation of design files limited 
+// to Xilinx devices or technologies. Use with non-Xilinx 
+// devices or technologies is expressly prohibited and 
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information 
+// "as-is" solely for use in developing programs and 
+// solutions for Xilinx devices, with no obligation on the 
+// part of Xilinx to provide support. By providing this design, 
+// code, or information as one possible implementation of 
+// this feature, application or standard, Xilinx is making no 
+// representation that this implementation is free from any 
+// claims of infringement. You are responsible for 
+// obtaining any rights you may require for your implementation. 
+// Xilinx expressly disclaims any warranty whatsoever with 
+// respect to the adequacy of the implementation, including 
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied 
+// warranties of merchantability or fitness for a particular 
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are 
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part 
+// of this text at all times. 
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_infrastructure.v
+// /___/   /\     Date Last Modified: $Date: 2008/05/08 15:20:47 $
+// \   \  /  \    Date Created: Wed Aug 16 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   Clock distribution and reset synchronization
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_infrastructure #
+  (
+   parameter RST_ACT_LOW  = 1
+   )
+  (
+   input clk0,
+   input clk90,
+   input clk200,
+   input clkdiv0,
+   input dcm_lock,
+   input  sys_rst_n,
+   input  idelay_ctrl_rdy,
+   output rst0,
+   output rst90,
+   output rst200,
+   output rstdiv0
+   );
+
+  // # of clock cycles to delay deassertion of reset. Needs to be a fairly
+  // high number not so much for metastability protection, but to give time
+  // for reset (i.e. stable clock cycles) to propagate through all state
+  // machines and to all control signals (i.e. not all control signals have
+  // resets, instead they rely on base state logic being reset, and the effect
+  // of that reset propagating through the logic). Need this because we may not
+  // be getting stable clock cycles while reset asserted (i.e. since reset
+  // depends on DCM lock status)
+  localparam RST_SYNC_NUM = 25;
+
+  reg [RST_SYNC_NUM-1:0]     rst0_sync_r    /* synthesis syn_maxfan = 10 */;
+  reg [RST_SYNC_NUM-1:0]     rst200_sync_r  /* synthesis syn_maxfan = 10 */;
+  reg [RST_SYNC_NUM-1:0]     rst90_sync_r   /* synthesis syn_maxfan = 10 */;
+  reg [(RST_SYNC_NUM/2)-1:0] rstdiv0_sync_r /* synthesis syn_maxfan = 10 */;
+  wire                       rst_tmp;
+  wire                       sys_clk_ibufg;
+  wire                       sys_rst;
+
+  assign sys_rst = RST_ACT_LOW ? ~sys_rst_n: sys_rst_n;
+
+
+
+  //***************************************************************************
+  // Reset synchronization
+  // NOTES:
+  //   1. shut down the whole operation if the DCM hasn't yet locked (and by
+  //      inference, this means that external SYS_RST_IN has been asserted -
+  //      DCM deasserts DCM_LOCK as soon as SYS_RST_IN asserted)
+  //   2. In the case of all resets except rst200, also assert reset if the
+  //      IDELAY master controller is not yet ready
+  //   3. asynchronously assert reset. This was we can assert reset even if
+  //      there is no clock (needed for things like 3-stating output buffers).
+  //      reset deassertion is synchronous.
+  //***************************************************************************
+
+  assign rst_tmp = sys_rst | ~dcm_lock | ~idelay_ctrl_rdy;
+
+  // synthesis attribute max_fanout of rst0_sync_r is 10
+  always @(posedge clk0 or posedge rst_tmp)
+    if (rst_tmp)
+      rst0_sync_r <= {RST_SYNC_NUM{1'b1}};
+    else
+      // logical left shift by one (pads with 0)
+      rst0_sync_r <= rst0_sync_r << 1;
+
+  // synthesis attribute max_fanout of rstdiv0_sync_r is 10
+  always @(posedge clkdiv0 or posedge rst_tmp)
+    if (rst_tmp)
+      rstdiv0_sync_r <= {(RST_SYNC_NUM/2){1'b1}};
+    else
+      // logical left shift by one (pads with 0)
+      rstdiv0_sync_r <= rstdiv0_sync_r << 1;
+
+  // synthesis attribute max_fanout of rst90_sync_r is 10
+  always @(posedge clk90 or posedge rst_tmp)
+    if (rst_tmp)
+      rst90_sync_r <= {RST_SYNC_NUM{1'b1}};
+    else
+      rst90_sync_r <= rst90_sync_r << 1;
+
+  // make sure CLK200 doesn't depend on IDELAY_CTRL_RDY, else chicken n' egg
+   // synthesis attribute max_fanout of rst200_sync_r is 10
+  always @(posedge clk200 or negedge dcm_lock)
+    if (!dcm_lock)
+      rst200_sync_r <= {RST_SYNC_NUM{1'b1}};
+    else
+      rst200_sync_r <= rst200_sync_r << 1;
+
+
+  assign rst0    = rst0_sync_r[RST_SYNC_NUM-1];
+  assign rst90   = rst90_sync_r[RST_SYNC_NUM-1];
+  assign rst200  = rst200_sync_r[RST_SYNC_NUM-1];
+  assign rstdiv0 = rstdiv0_sync_r[(RST_SYNC_NUM/2)-1];
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_mem_if_top.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_mem_if_top.v
new file mode 100644 (file)
index 0000000..69454c0
--- /dev/null
@@ -0,0 +1,408 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+//
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a
+// license to use this text/file solely for design, simulation,
+// implementation and creation of design files limited
+// to Xilinx devices or technologies. Use with non-Xilinx
+// devices or technologies is expressly prohibited and
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information
+// "as-is" solely for use in developing programs and
+// solutions for Xilinx devices, with no obligation on the
+// part of Xilinx to provide support. By providing this design,
+// code, or information as one possible implementation of
+// this feature, application or standard, Xilinx is making no
+// representation that this implementation is free from any
+// claims of infringement. You are responsible for
+// obtaining any rights you may require for your implementation.
+// Xilinx expressly disclaims any warranty whatsoever with
+// respect to the adequacy of the implementation, including
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied
+// warranties of merchantability or fitness for a particular
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are
+// done at the user�s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part
+// of this text at all times.
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_mem_if_top.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/22 15:41:06 $
+// \   \  /  \    Date Created: Wed Aug 16 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR/DDR2
+//Purpose:
+//   Top-level for parameterizable (DDR or DDR2) memory interface
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_mem_if_top #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference
+   // board design). Actual values may be different. Actual parameters values
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter BANK_WIDTH            = 2,
+   parameter CKE_WIDTH             = 1,
+   parameter CLK_WIDTH             = 1,
+   parameter COL_WIDTH             = 10,
+   parameter CS_BITS               = 0,
+   parameter CS_NUM                = 1,
+   parameter CS_WIDTH              = 1,
+   parameter USE_DM_PORT           = 1,
+   parameter DM_WIDTH              = 9,
+   parameter DQ_WIDTH              = 72,
+   parameter DQ_BITS               = 7,
+   parameter DQ_PER_DQS            = 8,
+   parameter DQS_BITS              = 4,
+   parameter DQS_WIDTH             = 9,
+   parameter HIGH_PERFORMANCE_MODE = "TRUE",
+   parameter ODT_WIDTH             = 1,
+   parameter ROW_WIDTH             = 14,
+   parameter APPDATA_WIDTH         = 144,
+   parameter ADDITIVE_LAT          = 0,
+   parameter BURST_LEN             = 4,
+   parameter BURST_TYPE            = 0,
+   parameter CAS_LAT               = 5,
+   parameter ECC_ENABLE            = 0,
+   parameter MULTI_BANK_EN         = 1,
+   parameter TWO_T_TIME_EN         = 0,
+   parameter ODT_TYPE              = 1,
+   parameter DDR_TYPE              = 1,
+   parameter REDUCE_DRV            = 0,
+   parameter REG_ENABLE            = 1,
+   parameter TREFI_NS              = 7800,
+   parameter TRAS                  = 40000,
+   parameter TRCD                  = 15000,
+   parameter TRFC                  = 105000,
+   parameter TRP                   = 15000,
+   parameter TRTP                  = 7500,
+   parameter TWR                   = 15000,
+   parameter TWTR                  = 10000,
+   parameter CLK_PERIOD            = 3000,
+   parameter SIM_ONLY              = 0,
+   parameter DEBUG_EN              = 0,
+   parameter DQS_IO_COL            = 0,
+   parameter DQ_IO_MS              = 0,
+   parameter EN_SYN                = "FALSE"
+   )
+  (
+   input                                    clk0,
+   input                                    clk90,
+   input                                    clkdiv0,
+   input                                    rst0,
+   input                                    rst90,
+   input                                    rstdiv0,
+   //added by xtan & gdgib
+   input                                   af_clk,                     //address fifo clk
+   input                                   rb_clk,                     //read buffer clk
+   input                                   wb_clk,                     //write buffer clk
+   input                                   af_rst,                     //address fifo rst
+   input                                   rb_rst,                     //read buffer rst
+   input                                   wb_rst,                     //write buffer rst
+   output                                  rb_full,                    //read buffer is full
+   //end of add
+   input [2:0]                              app_af_cmd,
+   input [30:0]                             app_af_addr,
+   input                                    app_af_wren,
+   input                                    app_wdf_wren,
+   input [APPDATA_WIDTH-1:0]                app_wdf_data,
+   input [(APPDATA_WIDTH/8)-1:0]            app_wdf_mask_data,
+   output [1:0]                             rd_ecc_error,
+   output                                   app_af_afull,
+   output                                   app_wdf_afull,
+   output                                   rd_data_valid,
+   input                                    rd_data_rden,
+   output [APPDATA_WIDTH-1:0]               rd_data_fifo_out,
+   output                                   phy_init_done,
+   output [CLK_WIDTH-1:0]                   ddr_ck,
+   output [CLK_WIDTH-1:0]                   ddr_ck_n,
+   output [ROW_WIDTH-1:0]                   ddr_addr,
+   output [BANK_WIDTH-1:0]                  ddr_ba,
+   output                                   ddr_ras_n,
+   output                                   ddr_cas_n,
+   output                                   ddr_we_n,
+   output [CS_WIDTH-1:0]                    ddr_cs_n,
+   output [CKE_WIDTH-1:0]                   ddr_cke,
+   output [ODT_WIDTH-1:0]                   ddr_odt,
+   output [DM_WIDTH-1:0]                    ddr_dm,
+   inout [DQS_WIDTH-1:0]                    ddr_dqs,
+   inout [DQS_WIDTH-1:0]                    ddr_dqs_n,
+   inout [DQ_WIDTH-1:0]                     ddr_dq,
+   // Debug signals (optional use)
+   input                                    dbg_idel_up_all,
+   input                                    dbg_idel_down_all,
+   input                                    dbg_idel_up_dq,
+   input                                    dbg_idel_down_dq,
+   input                                    dbg_idel_up_dqs,
+   input                                    dbg_idel_down_dqs,
+   input                                    dbg_idel_up_gate,
+   input                                    dbg_idel_down_gate,
+   input [DQ_BITS-1:0]                      dbg_sel_idel_dq,
+   input                                    dbg_sel_all_idel_dq,
+   input [DQS_BITS:0]                       dbg_sel_idel_dqs,
+   input                                    dbg_sel_all_idel_dqs,
+   input [DQS_BITS:0]                       dbg_sel_idel_gate,
+   input                                    dbg_sel_all_idel_gate,
+   output [3:0]                             dbg_calib_done,
+   output [3:0]                             dbg_calib_err,
+   output [(6*DQ_WIDTH)-1:0]                dbg_calib_dq_tap_cnt,
+   output [(6*DQS_WIDTH)-1:0]               dbg_calib_dqs_tap_cnt,
+   output [(6*DQS_WIDTH)-1:0]               dbg_calib_gate_tap_cnt,
+   output [DQS_WIDTH-1:0]                   dbg_calib_rd_data_sel,
+   output [(5*DQS_WIDTH)-1:0]               dbg_calib_rden_dly,
+   output [(5*DQS_WIDTH)-1:0]               dbg_calib_gate_dly
+   );
+
+  wire [30:0]                       af_addr;
+  wire [2:0]                        af_cmd;
+  wire                              af_empty;
+  wire [ROW_WIDTH-1:0]              ctrl_addr;
+  wire                              ctrl_af_rden;
+  wire [BANK_WIDTH-1:0]             ctrl_ba;
+  wire                              ctrl_cas_n;
+  wire [CS_NUM-1:0]                 ctrl_cs_n;
+  wire                              ctrl_ras_n;
+  wire                              ctrl_rden;
+  wire                              ctrl_ref_flag;
+  wire                              ctrl_we_n;
+  wire                              ctrl_wren;
+  wire [DQS_WIDTH-1:0]              phy_calib_rden;
+  wire [DQS_WIDTH-1:0]              phy_calib_rden_sel;
+  wire [DQ_WIDTH-1:0]               rd_data_fall;
+  wire [DQ_WIDTH-1:0]               rd_data_rise;
+  wire [(2*DQ_WIDTH)-1:0]           wdf_data;
+  wire [((2*DQ_WIDTH)/8)-1:0]       wdf_mask_data;
+  wire                              wdf_rden;
+  wire                              wr_fifo_clear, wr_fifo_burst, rd_fifo_clear;
+
+  //***************************************************************************
+
+  ddr2_phy_top #
+    (
+     .BANK_WIDTH            (BANK_WIDTH),
+     .CKE_WIDTH             (CKE_WIDTH),
+     .CLK_WIDTH             (CLK_WIDTH),
+     .COL_WIDTH             (COL_WIDTH),
+     .CS_NUM                (CS_NUM),
+     .CS_WIDTH              (CS_WIDTH),
+     .USE_DM_PORT           (USE_DM_PORT),
+     .DM_WIDTH              (DM_WIDTH),
+     .DQ_WIDTH              (DQ_WIDTH),
+     .DQ_BITS               (DQ_BITS),
+     .DQ_PER_DQS            (DQ_PER_DQS),
+     .DQS_BITS              (DQS_BITS),
+     .DQS_WIDTH             (DQS_WIDTH),
+     .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE),
+     .ODT_WIDTH             (ODT_WIDTH),
+     .ROW_WIDTH             (ROW_WIDTH),
+     .TWO_T_TIME_EN         (TWO_T_TIME_EN),
+     .ADDITIVE_LAT          (ADDITIVE_LAT),
+     .BURST_LEN             (BURST_LEN),
+     .BURST_TYPE            (BURST_TYPE),
+     .CAS_LAT               (CAS_LAT),
+     .ECC_ENABLE            (ECC_ENABLE),
+     .ODT_TYPE              (ODT_TYPE),
+     .DDR_TYPE              (DDR_TYPE),
+     .REDUCE_DRV            (REDUCE_DRV),
+     .REG_ENABLE            (REG_ENABLE),
+     .TWR                   (TWR),
+     .CLK_PERIOD            (CLK_PERIOD),
+     .SIM_ONLY              (SIM_ONLY),
+     .DEBUG_EN              (DEBUG_EN),
+     .DQS_IO_COL            (DQS_IO_COL),
+     .DQ_IO_MS              (DQ_IO_MS)
+     )
+    u_phy_top
+      (
+       .clk0                   (clk0),
+       .clk90                  (clk90),
+       .clkdiv0                (clkdiv0),
+       .rst0                   (rst0),
+       .rst90                  (rst90),
+       .rstdiv0                (rstdiv0),
+       .ctrl_wren              (ctrl_wren),
+       .ctrl_addr              (ctrl_addr),
+       .ctrl_ba                (ctrl_ba),
+       .ctrl_ras_n             (ctrl_ras_n),
+       .ctrl_cas_n             (ctrl_cas_n),
+       .ctrl_we_n              (ctrl_we_n),
+       .ctrl_cs_n              (ctrl_cs_n),
+       .ctrl_rden              (ctrl_rden),
+       .ctrl_ref_flag          (ctrl_ref_flag),
+       .wdf_data               (wdf_data),
+       .wdf_mask_data          (wdf_mask_data),
+       .wdf_rden               (wdf_rden),
+       .phy_init_done          (phy_init_done),
+       .phy_calib_rden         (phy_calib_rden),
+       .phy_calib_rden_sel     (phy_calib_rden_sel),
+       .rd_data_rise           (rd_data_rise),
+       .rd_data_fall           (rd_data_fall),
+       .ddr_ck                 (ddr_ck),
+       .ddr_ck_n               (ddr_ck_n),
+       .ddr_addr               (ddr_addr),
+       .ddr_ba                 (ddr_ba),
+       .ddr_ras_n              (ddr_ras_n),
+       .ddr_cas_n              (ddr_cas_n),
+       .ddr_we_n               (ddr_we_n),
+       .ddr_cs_n               (ddr_cs_n),
+       .ddr_cke                (ddr_cke),
+       .ddr_odt                (ddr_odt),
+       .ddr_dm                 (ddr_dm),
+       .ddr_dqs                (ddr_dqs),
+       .ddr_dqs_n              (ddr_dqs_n),
+       .ddr_dq                 (ddr_dq),
+       .dbg_idel_up_all        (dbg_idel_up_all),
+       .dbg_idel_down_all      (dbg_idel_down_all),
+       .dbg_idel_up_dq         (dbg_idel_up_dq),
+       .dbg_idel_down_dq       (dbg_idel_down_dq),
+       .dbg_idel_up_dqs        (dbg_idel_up_dqs),
+       .dbg_idel_down_dqs      (dbg_idel_down_dqs),
+       .dbg_idel_up_gate       (dbg_idel_up_gate),
+       .dbg_idel_down_gate     (dbg_idel_down_gate),
+       .dbg_sel_idel_dq        (dbg_sel_idel_dq),
+       .dbg_sel_all_idel_dq    (dbg_sel_all_idel_dq),
+       .dbg_sel_idel_dqs       (dbg_sel_idel_dqs),
+       .dbg_sel_all_idel_dqs   (dbg_sel_all_idel_dqs),
+       .dbg_sel_idel_gate      (dbg_sel_idel_gate),
+       .dbg_sel_all_idel_gate  (dbg_sel_all_idel_gate),
+       .dbg_calib_done         (dbg_calib_done),
+       .dbg_calib_err          (dbg_calib_err),
+       .dbg_calib_dq_tap_cnt   (dbg_calib_dq_tap_cnt),
+       .dbg_calib_dqs_tap_cnt  (dbg_calib_dqs_tap_cnt),
+       .dbg_calib_gate_tap_cnt (dbg_calib_gate_tap_cnt),
+       .dbg_calib_rd_data_sel  (dbg_calib_rd_data_sel),
+       .dbg_calib_rden_dly     (dbg_calib_rden_dly),
+       .dbg_calib_gate_dly     (dbg_calib_gate_dly)
+       );
+
+  ddr2_usr_top #
+    (
+     .BANK_WIDTH    (BANK_WIDTH),
+     .COL_WIDTH     (COL_WIDTH),
+     .CS_BITS       (CS_BITS),
+     .DQ_WIDTH      (DQ_WIDTH),
+     .DQ_PER_DQS    (DQ_PER_DQS),
+     .DQS_WIDTH     (DQS_WIDTH),
+     .APPDATA_WIDTH (APPDATA_WIDTH),
+     .APPDATA_BURST_LEN(BURST_LEN * DQ_WIDTH / APPDATA_WIDTH),
+     .APPDATA_BURST_BITS(3),
+     .ECC_ENABLE    (ECC_ENABLE),
+     .ROW_WIDTH     (ROW_WIDTH),
+     .EN_SYN        (EN_SYN)
+     )
+    u_usr_top
+      (
+       .clk0              (clk0),
+       .clk90             (clk90),
+       .rst0              (rst0),
+       .af_clk            (af_clk),
+       .rb_clk            (rb_clk),
+       .wb_clk            (wb_clk),
+       .af_rst            (af_rst),
+       .rb_rst            (rb_rst),
+       .wb_rst            (wb_rst),
+       .rb_full           (rb_full),
+       .rd_data_in_rise   (rd_data_rise),
+       .rd_data_in_fall   (rd_data_fall),
+       .phy_calib_rden    (phy_calib_rden),
+       .phy_calib_rden_sel(phy_calib_rden_sel),
+       .rd_data_valid     (rd_data_valid),
+       .rd_data_rden      (rd_data_rden),
+       .rd_ecc_error      (rd_ecc_error),
+       .rd_data_fifo_out  (rd_data_fifo_out),
+       .app_af_cmd        (app_af_cmd),
+       .app_af_addr       (app_af_addr),
+       .app_af_wren       (app_af_wren),
+       .ctrl_af_rden      (ctrl_af_rden),
+       .af_cmd            (af_cmd),
+       .af_addr           (af_addr),
+       .af_empty          (af_empty),
+       .app_af_afull      (app_af_afull),
+       .app_wdf_wren      (app_wdf_wren),
+       .app_wdf_data      (app_wdf_data),
+       .app_wdf_mask_data (app_wdf_mask_data),
+       .wdf_rden          (wdf_rden),
+       .app_wdf_afull     (app_wdf_afull),
+       .wdf_data          (wdf_data),
+       .wdf_mask_data     (wdf_mask_data),
+       .wr_fifo_clear     (wr_fifo_clear),
+       .wr_fifo_burst     (wr_fifo_burst),
+       .rd_fifo_clear     (rd_fifo_clear)
+       );
+
+
+  ddr2_ctrl #
+    (
+     .BANK_WIDTH    (BANK_WIDTH),
+     .COL_WIDTH     (COL_WIDTH),
+     .CS_BITS       (CS_BITS),
+     .CS_NUM        (CS_NUM),
+     .ROW_WIDTH     (ROW_WIDTH),
+     .ADDITIVE_LAT  (ADDITIVE_LAT),
+     .BURST_LEN     (BURST_LEN),
+     .CAS_LAT       (CAS_LAT),
+     .ECC_ENABLE    (ECC_ENABLE),
+     .REG_ENABLE    (REG_ENABLE),
+     .MULTI_BANK_EN (MULTI_BANK_EN),
+     .TWO_T_TIME_EN (TWO_T_TIME_EN),
+     .TREFI_NS      (TREFI_NS),
+     .TRAS          (TRAS),
+     .TRCD          (TRCD),
+     .TRFC          (TRFC),
+     .TRP           (TRP),
+     .TRTP          (TRTP),
+     .TWR           (TWR),
+     .TWTR          (TWTR),
+     .CLK_PERIOD    (CLK_PERIOD),
+     .DDR_TYPE      (DDR_TYPE)
+     )
+    u_ctrl
+      (
+       .clk           (clk0),
+       .rst           (rst0),
+       .af_cmd        (af_cmd),
+       .af_addr       (af_addr),
+       .af_empty      (af_empty),
+       .wr_fifo_clear (wr_fifo_clear),
+       .wr_fifo_burst (wr_fifo_burst),
+       .rd_fifo_clear (rd_fifo_clear),
+       .phy_init_done (phy_init_done),
+       .ctrl_ref_flag (ctrl_ref_flag),
+       .ctrl_af_rden  (ctrl_af_rden),
+       .ctrl_wren     (ctrl_wren),
+       .ctrl_rden     (ctrl_rden),
+       .ctrl_addr     (ctrl_addr),
+       .ctrl_ba       (ctrl_ba),
+       .ctrl_ras_n    (ctrl_ras_n),
+       .ctrl_cas_n    (ctrl_cas_n),
+       .ctrl_we_n     (ctrl_we_n),
+       .ctrl_cs_n     (ctrl_cs_n)
+       );
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_calib.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_calib.v
new file mode 100644 (file)
index 0000000..e4deda8
--- /dev/null
@@ -0,0 +1,2353 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+// 
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a 
+// license to use this text/file solely for design, simulation, 
+// implementation and creation of design files limited 
+// to Xilinx devices or technologies. Use with non-Xilinx 
+// devices or technologies is expressly prohibited and 
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information 
+// "as-is" solely for use in developing programs and 
+// solutions for Xilinx devices, with no obligation on the 
+// part of Xilinx to provide support. By providing this design, 
+// code, or information as one possible implementation of 
+// this feature, application or standard, Xilinx is making no 
+// representation that this implementation is free from any 
+// claims of infringement. You are responsible for 
+// obtaining any rights you may require for your implementation. 
+// Xilinx expressly disclaims any warranty whatsoever with 
+// respect to the adequacy of the implementation, including 
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied 
+// warranties of merchantability or fitness for a particular 
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are 
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part 
+// of this text at all times. 
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_phy_calib.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/02 14:03:08 $
+// \   \  /  \    Date Created: Thu Aug 10 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   This module handles calibration after memory initialization.
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_phy_calib #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference 
+   // board design). Actual values may be different. Actual parameters values 
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter DQ_WIDTH      = 72,
+   parameter DQ_BITS       = 7,
+   parameter DQ_PER_DQS    = 8,
+   parameter DQS_BITS      = 4,
+   parameter DQS_WIDTH     = 9,
+   parameter ADDITIVE_LAT  = 0,
+   parameter CAS_LAT       = 5,
+   parameter REG_ENABLE    = 1,
+   parameter CLK_PERIOD    = 3000,
+   parameter SIM_ONLY      = 0,
+   parameter DEBUG_EN      = 0
+   )
+  (
+   input                                   clk,
+   input                                   clkdiv,
+   input                                   rstdiv,
+   input [3:0]                             calib_start,
+   input                                   ctrl_rden,
+   input                                   phy_init_rden,
+   input [DQ_WIDTH-1:0]                    rd_data_rise,
+   input [DQ_WIDTH-1:0]                    rd_data_fall,
+   input                                   calib_ref_done,
+   output reg [3:0]                        calib_done,
+   output reg                              calib_ref_req,
+   output [DQS_WIDTH-1:0]                  calib_rden,
+   output reg [DQS_WIDTH-1:0]              calib_rden_sel,
+   output reg                              dlyrst_dq,
+   output reg [DQ_WIDTH-1:0]               dlyce_dq,
+   output reg [DQ_WIDTH-1:0]               dlyinc_dq,
+   output reg                              dlyrst_dqs,
+   output reg [DQS_WIDTH-1:0]              dlyce_dqs,
+   output reg [DQS_WIDTH-1:0]              dlyinc_dqs,
+   output reg [DQS_WIDTH-1:0]              dlyrst_gate,
+   output reg [DQS_WIDTH-1:0]              dlyce_gate,
+   output reg [DQS_WIDTH-1:0]              dlyinc_gate,
+   output [DQS_WIDTH-1:0]                  en_dqs,
+   output [DQS_WIDTH-1:0]                  rd_data_sel,
+   // Debug signals (optional use)
+   input                                   dbg_idel_up_all,
+   input                                   dbg_idel_down_all,
+   input                                   dbg_idel_up_dq,
+   input                                   dbg_idel_down_dq,
+   input                                   dbg_idel_up_dqs,
+   input                                   dbg_idel_down_dqs,
+   input                                   dbg_idel_up_gate,
+   input                                   dbg_idel_down_gate,
+   input [DQ_BITS-1:0]                     dbg_sel_idel_dq,
+   input                                   dbg_sel_all_idel_dq,
+   input [DQS_BITS:0]                      dbg_sel_idel_dqs,
+   input                                   dbg_sel_all_idel_dqs,
+   input [DQS_BITS:0]                      dbg_sel_idel_gate,
+   input                                   dbg_sel_all_idel_gate,
+   output [3:0]                            dbg_calib_done,
+   output [3:0]                            dbg_calib_err,
+   output [(6*DQ_WIDTH)-1:0]               dbg_calib_dq_tap_cnt,
+   output [(6*DQS_WIDTH)-1:0]              dbg_calib_dqs_tap_cnt,
+   output [(6*DQS_WIDTH)-1:0]              dbg_calib_gate_tap_cnt,
+   output [DQS_WIDTH-1:0]                  dbg_calib_rd_data_sel,
+   output [(5*DQS_WIDTH)-1:0]              dbg_calib_rden_dly,
+   output [(5*DQS_WIDTH)-1:0]              dbg_calib_gate_dly
+   );
+
+  // minimum time (in IDELAY taps) for which capture data must be stable for
+  // algorithm to consider
+  localparam MIN_WIN_SIZE = 5;
+  // IDEL_SET_VAL = (# of cycles - 1) to wait after changing IDELAY value
+  // we only have to wait enough for input with new IDELAY value to
+  // propagate through pipeline stages.
+  localparam IDEL_SET_VAL = 3'b111;
+  // # of clock cycles to delay read enable to determine if read data pattern
+  // is correct for stage 3/4 (RDEN, DQS gate) calibration
+  localparam CALIB_RDEN_PIPE_LEN = 31;
+  // translate CAS latency into number of clock cycles for read valid delay
+  // determination. Really only needed for CL = 2.5 (set to 2)
+  localparam CAS_LAT_RDEN = (CAS_LAT == 25) ? 2 : CAS_LAT;
+  // an SRL32 is used to delay CTRL_RDEN to generate read valid signal. This
+  // is min possible value delay through SRL32 can be
+  localparam RDEN_BASE_DELAY = CAS_LAT_RDEN + ADDITIVE_LAT + REG_ENABLE;
+  // an SRL32 is used to delay the CTRL_RDEN from the read postamble DQS
+  // gate. This is min possible value the SRL32 delay can be:
+  //  - Delay from end of deassertion of CTRL_RDEN to last falling edge of
+  //    read burst = 3.5 (CTRL_RDEN -> CAS delay) + 3 (min CAS latency) = 6.5
+  //  - Minimum time for DQS gate circuit to be generated:
+  //      * 1 cyc to register CTRL_RDEN from controller
+  //      * 1 cyc after RDEN_CTRL falling edge
+  //      * 1 cyc min through SRL32
+  //      * 1 cyc through SRL32 output flop
+  //      * 0 (<1) cyc of synchronization to DQS domain via IDELAY
+  //      * 1 cyc of delay through IDDR to generate CE to DQ IDDR's
+  //    Total = 5 cyc < 6.5 cycles
+  //    The total should be less than 5.5 cycles to account prop delays
+  //    adding one cycle to the synchronization time via the IDELAY.
+  //    NOTE: Value differs because of optional pipeline register added
+  //      for case of RDEN_BASE_DELAY > 3 to improve timing
+  localparam GATE_BASE_DELAY = RDEN_BASE_DELAY - 3;
+  localparam GATE_BASE_INIT = (GATE_BASE_DELAY <= 1) ? 0 : GATE_BASE_DELAY;
+  // used for RDEN calibration: difference between shift value used during
+  // calibration, and shift value for actual RDEN SRL. Only applies when
+  // RDEN edge is immediately captured by CLKDIV0. If not (depends on phase
+  // of CLK0 and CLKDIV0 when RDEN is asserted), then add 1 to this value.
+  localparam CAL3_RDEN_SRL_DLY_DELTA = 6;
+  // fix minimum value of DQS to be 1 to handle the case where's there's only
+  // one DQS group. We could also enforce that user always inputs minimum
+  // value of 1 for DQS_BITS (even when DQS_WIDTH=1). Leave this as safeguard
+  // Assume we don't have to do this for DQ, DQ_WIDTH always > 1
+  localparam DQS_BITS_FIX = (DQS_BITS == 0) ? 1 : DQS_BITS;
+  // how many taps to "pre-delay" DQ before stg 1 calibration - not needed for
+  // current calibration, but leave for debug
+  localparam DQ_IDEL_INIT = 6'b000000;
+  // # IDELAY taps per bit time (i.e. half cycle). Limit to 63.
+  localparam integer BIT_TIME_TAPS = (CLK_PERIOD/150 < 64) ?
+             CLK_PERIOD/150 : 63;
+
+  // used in various places during stage 4 cal: (1) determines maximum taps
+  // to increment when finding right edge, (2) amount to decrement after
+  // finding left edge, (3) amount to increment after finding right edge
+  localparam CAL4_IDEL_BIT_VAL = (BIT_TIME_TAPS >= 6'b100000) ?
+             6'b100000 : BIT_TIME_TAPS;
+
+  localparam CAL1_IDLE                   = 4'h0;
+  localparam CAL1_INIT                   = 4'h1;
+  localparam CAL1_INC_IDEL               = 4'h2;
+  localparam CAL1_FIND_FIRST_EDGE        = 4'h3;
+  localparam CAL1_FIRST_EDGE_IDEL_WAIT   = 4'h4;
+  localparam CAL1_FOUND_FIRST_EDGE_WAIT  = 4'h5;
+  localparam CAL1_FIND_SECOND_EDGE       = 4'h6;
+  localparam CAL1_SECOND_EDGE_IDEL_WAIT  = 4'h7;
+  localparam CAL1_CALC_IDEL              = 4'h8;
+  localparam CAL1_DEC_IDEL               = 4'h9;
+  localparam CAL1_DONE                   = 4'hA;
+
+  localparam CAL2_IDLE                    = 4'h0;
+  localparam CAL2_INIT                    = 4'h1;
+  localparam CAL2_INIT_IDEL_WAIT          = 4'h2;
+  localparam CAL2_FIND_EDGE_POS           = 4'h3;
+  localparam CAL2_FIND_EDGE_IDEL_WAIT_POS = 4'h4;
+  localparam CAL2_FIND_EDGE_NEG           = 4'h5;
+  localparam CAL2_FIND_EDGE_IDEL_WAIT_NEG = 4'h6;
+  localparam CAL2_DEC_IDEL                = 4'h7;
+  localparam CAL2_DONE                    = 4'h8;
+
+  localparam CAL3_IDLE                    = 3'h0;
+  localparam CAL3_INIT                    = 3'h1;
+  localparam CAL3_DETECT                  = 3'h2;
+  localparam CAL3_RDEN_PIPE_CLR_WAIT      = 3'h3;
+  localparam CAL3_DONE                    = 3'h4;
+
+  localparam CAL4_IDLE                    = 3'h0;
+  localparam CAL4_INIT                    = 3'h1;
+  localparam CAL4_FIND_WINDOW             = 3'h2;
+  localparam CAL4_FIND_EDGE               = 3'h3;
+  localparam CAL4_IDEL_WAIT               = 3'h4;
+  localparam CAL4_RDEN_PIPE_CLR_WAIT      = 3'h5;
+  localparam CAL4_ADJ_IDEL                = 3'h6;
+  localparam CAL4_DONE                    = 3'h7;
+
+  integer                        i, j;
+
+  reg [5:0]                      cal1_bit_time_tap_cnt;
+  reg [1:0]                      cal1_data_chk_last;
+  reg                            cal1_data_chk_last_valid;
+  reg [1:0]                      cal1_data_chk_r;
+  reg                            cal1_dlyce_dq;
+  reg                            cal1_dlyinc_dq;
+  reg                            cal1_dqs_dq_init_phase;
+  reg                            cal1_detect_edge;
+  reg                            cal1_detect_stable;
+  reg                            cal1_found_second_edge;
+  reg                            cal1_found_rising;
+  reg                            cal1_found_window;
+  reg                            cal1_first_edge_done;
+  reg [5:0]                      cal1_first_edge_tap_cnt;
+  reg [6:0]                      cal1_idel_dec_cnt;
+  reg [5:0]                      cal1_idel_inc_cnt;
+  reg [5:0]                      cal1_idel_max_tap;
+  reg                            cal1_idel_max_tap_we;
+  reg [5:0]                      cal1_idel_tap_cnt;
+  reg                            cal1_idel_tap_limit_hit;
+  reg [6:0]                      cal1_low_freq_idel_dec;
+  reg                            cal1_ref_req;
+  wire                           cal1_refresh;
+  reg [3:0]                      cal1_state;
+  reg [3:0]                      cal1_window_cnt;
+  reg                            cal2_curr_sel;
+  wire                           cal2_detect_edge;
+  reg                            cal2_dlyce_dqs;
+  reg                            cal2_dlyinc_dqs;
+  reg [5:0]                      cal2_idel_dec_cnt;
+  reg [5:0]                      cal2_idel_tap_cnt;
+  reg [5:0]                      cal2_idel_tap_limit;
+  reg                            cal2_idel_tap_limit_hit;
+  reg                            cal2_rd_data_fall_last_neg;
+  reg                            cal2_rd_data_fall_last_pos;
+  reg                            cal2_rd_data_last_valid_neg;
+  reg                            cal2_rd_data_last_valid_pos;
+  reg                            cal2_rd_data_rise_last_neg;
+  reg                            cal2_rd_data_rise_last_pos;
+  reg [DQS_WIDTH-1:0]            cal2_rd_data_sel;
+  wire                           cal2_rd_data_sel_edge;
+  reg [DQS_WIDTH-1:0]            cal2_rd_data_sel_r;
+  reg                            cal2_ref_req;
+  reg [3:0]                      cal2_state;
+  reg                            cal3_data_match;
+  reg                            cal3_data_match_stgd;
+  wire                           cal3_data_valid;
+  wire                           cal3_match_found;
+  wire [4:0]                     cal3_rden_dly;
+  reg [4:0]                      cal3_rden_srl_a;
+  reg [2:0]                      cal3_state;
+  wire                           cal4_data_good;
+  reg                            cal4_data_match;
+  reg                            cal4_data_match_stgd;
+  wire                           cal4_data_valid;
+  reg                            cal4_dlyce_gate;
+  reg                            cal4_dlyinc_gate;
+  reg                            cal4_dlyrst_gate;
+  reg [4:0]                      cal4_gate_srl_a;
+  reg [5:0]                      cal4_idel_adj_cnt;
+  reg                            cal4_idel_adj_inc;
+  reg                            cal4_idel_bit_tap;
+  reg [5:0]                      cal4_idel_tap_cnt;
+  reg                            cal4_idel_max_tap;
+  reg [4:0]                      cal4_rden_srl_a;
+  reg                            cal4_ref_req;
+  reg                            cal4_seek_left;
+  reg                            cal4_stable_window;
+  reg [2:0]                      cal4_state;
+  reg [3:0]                      cal4_window_cnt;
+  reg [3:0]                      calib_done_tmp;         // only for stg1/2/4
+  reg                            calib_ctrl_gate_pulse_r;
+  reg                            calib_ctrl_rden;
+  reg                            calib_ctrl_rden_r;
+  wire                           calib_ctrl_rden_negedge;
+  reg                            calib_ctrl_rden_negedge_r;
+  reg [3:0]                      calib_done_r;
+  reg [3:0]                      calib_err;
+  reg [1:0]                      calib_err_2;
+  wire                           calib_init_gate_pulse;
+  reg                            calib_init_gate_pulse_r;
+  reg                            calib_init_gate_pulse_r1;
+  reg                            calib_init_rden;
+  reg                            calib_init_rden_r;
+  reg [4:0]                      calib_rden_srl_a;
+  wire [4:0]                     calib_rden_srl_a_r;
+  reg [(5*DQS_WIDTH)-1:0]        calib_rden_dly;
+  reg                            calib_rden_edge_r;
+  reg [4:0]                      calib_rden_pipe_cnt;
+  wire                           calib_rden_srl_out;
+  wire                           calib_rden_srl_out_r;
+  reg                            calib_rden_srl_out_r1;
+  reg                            calib_rden_valid;
+  reg                            calib_rden_valid_stgd;
+  reg [DQ_BITS-1:0]              count_dq;
+  reg [DQS_BITS_FIX-1:0]         count_dqs;
+  reg [DQS_BITS_FIX-1:0]         count_gate;
+  reg [DQS_BITS_FIX-1:0]         count_rden;
+  reg                            ctrl_rden_r;
+  wire                           dlyce_or;
+  reg [(5*DQS_WIDTH)-1:0]        gate_dly;
+  wire [(5*DQS_WIDTH)-1:0]       gate_dly_r;
+  wire                           gate_srl_in;
+  wire [DQS_WIDTH-1:0]           gate_srl_out;
+  wire [DQS_WIDTH-1:0]           gate_srl_out_r;
+  reg [2:0]                      idel_set_cnt;
+  wire                           idel_set_wait;
+  reg [DQ_BITS-1:0]              next_count_dq;
+  reg [DQS_BITS_FIX-1:0]         next_count_dqs;
+  reg [DQS_BITS_FIX-1:0]         next_count_gate;
+  reg                            phy_init_rden_r;
+  reg                            phy_init_rden_r1;
+  reg [DQ_WIDTH-1:0]             rd_data_fall_1x_r;
+  reg [DQS_WIDTH-1:0]            rd_data_fall_1x_r1;
+  reg [DQS_WIDTH-1:0]            rd_data_fall_2x_r;
+  wire [DQS_WIDTH-1:0]           rd_data_fall_chk_q1;
+  wire [DQS_WIDTH-1:0]           rd_data_fall_chk_q2;
+  reg [DQ_WIDTH-1:0]             rd_data_rise_1x_r;
+  reg [DQS_WIDTH-1:0]            rd_data_rise_1x_r1;
+  reg [DQS_WIDTH-1:0]            rd_data_rise_2x_r;
+  wire [DQS_WIDTH-1:0]           rd_data_rise_chk_q1;
+  wire [DQS_WIDTH-1:0]           rd_data_rise_chk_q2;
+  reg                            rdd_fall_q1;
+  reg                            rdd_fall_q1_r;
+  reg                            rdd_fall_q1_r1;
+  reg                            rdd_fall_q2;
+  reg                            rdd_fall_q2_r;
+  reg                            rdd_rise_q1;
+  reg                            rdd_rise_q1_r;
+  reg                            rdd_rise_q1_r1;
+  reg                            rdd_rise_q2;
+  reg                            rdd_rise_q2_r;
+  reg [DQS_BITS_FIX-1:0]         rdd_mux_sel;
+  reg                            rden_dec;
+  reg [(5*DQS_WIDTH)-1:0]        rden_dly;
+  wire [(5*DQS_WIDTH)-1:0]       rden_dly_r;
+  reg [4:0]                      rden_dly_0;
+  reg                            rden_inc;
+  reg [DQS_WIDTH-1:0]            rden_mux;
+  wire [DQS_WIDTH-1:0]           rden_srl_out;
+
+  // Debug
+  integer                        x;
+  reg [5:0]                      dbg_dq_tap_cnt [DQ_WIDTH-1:0];
+  reg [5:0]                      dbg_dqs_tap_cnt [DQS_WIDTH-1:0];
+  reg [5:0]                      dbg_gate_tap_cnt [DQS_WIDTH-1:0];
+
+  //***************************************************************************
+  // Debug output ("dbg_phy_calib_*")
+  // NOTES:
+  //  1. All debug outputs coming out of PHY_CALIB are clocked off CLKDIV0,
+  //     although they are also static after calibration is complete. This
+  //     means the user can either connect them to a Chipscope ILA, or to
+  //     either a sync/async VIO input block. Using an async VIO has the
+  //     advantage of not requiring these paths to meet cycle-to-cycle timing.
+  //  2. The widths of most of these debug buses are dependent on the # of
+  //     DQS/DQ bits (e.g. dq_tap_cnt width = 6 * (# of DQ bits)
+  // SIGNAL DESCRIPTION:
+  //  1. calib_done:   4 bits - each one asserted as each phase of calibration
+  //                   is completed.
+  //  2. calib_err:    4 bits - each one asserted when a calibration error
+  //                   encountered for that stage. Some of these bits may not
+  //                   be used (not all cal stages report an error).
+  //  3. dq_tap_cnt:   final IDELAY tap counts for all DQ IDELAYs
+  //  4. dqs_tap_cnt:  final IDELAY tap counts for all DQS IDELAYs
+  //  5. gate_tap_cnt: final IDELAY tap counts for all DQS gate
+  //                   synchronization IDELAYs
+  //  6. rd_data_sel:  final read capture MUX (either "positive" or "negative"
+  //                   edge capture) settings for all DQS groups
+  //  7. rden_dly:     related to # of cycles after issuing a read until when
+  //                   read data is valid - for all DQS groups
+  //  8. gate_dly:     related to # of cycles after issuing a read until when
+  //                   clock enable for all DQ's is deasserted to prevent
+  //                   effect of DQS postamble glitch - for all DQS groups
+  //***************************************************************************
+
+  //*****************************************************************
+  // Record IDELAY tap values by "snooping" IDELAY control signals
+  //*****************************************************************
+
+  // record DQ IDELAY tap values
+  genvar dbg_dq_tc_i;
+  generate
+    for (dbg_dq_tc_i = 0; dbg_dq_tc_i < DQ_WIDTH;
+         dbg_dq_tc_i = dbg_dq_tc_i + 1) begin: gen_dbg_dq_tap_cnt
+      assign dbg_calib_dq_tap_cnt[(6*dbg_dq_tc_i)+5:(6*dbg_dq_tc_i)]
+               = dbg_dq_tap_cnt[dbg_dq_tc_i];
+      always @(posedge clkdiv)
+        if (rstdiv | dlyrst_dq)
+          dbg_dq_tap_cnt[dbg_dq_tc_i] <= 6'b000000;
+        else
+          if (dlyce_dq[dbg_dq_tc_i])
+            if (dlyinc_dq[dbg_dq_tc_i])
+              dbg_dq_tap_cnt[dbg_dq_tc_i]
+                <= dbg_dq_tap_cnt[dbg_dq_tc_i] + 1;
+            else
+              dbg_dq_tap_cnt[dbg_dq_tc_i]
+                <= dbg_dq_tap_cnt[dbg_dq_tc_i] - 1;
+    end
+  endgenerate
+
+  // record DQS IDELAY tap values
+  genvar dbg_dqs_tc_i;
+  generate
+    for (dbg_dqs_tc_i = 0; dbg_dqs_tc_i < DQS_WIDTH;
+         dbg_dqs_tc_i = dbg_dqs_tc_i + 1) begin: gen_dbg_dqs_tap_cnt
+      assign dbg_calib_dqs_tap_cnt[(6*dbg_dqs_tc_i)+5:(6*dbg_dqs_tc_i)]
+               = dbg_dqs_tap_cnt[dbg_dqs_tc_i];
+      always @(posedge clkdiv)
+        if (rstdiv | dlyrst_dqs)
+          dbg_dqs_tap_cnt[dbg_dqs_tc_i] <= 6'b000000;
+        else
+          if (dlyce_dqs[dbg_dqs_tc_i])
+            if (dlyinc_dqs[dbg_dqs_tc_i])
+              dbg_dqs_tap_cnt[dbg_dqs_tc_i]
+                <= dbg_dqs_tap_cnt[dbg_dqs_tc_i] + 1;
+            else
+              dbg_dqs_tap_cnt[dbg_dqs_tc_i]
+                <= dbg_dqs_tap_cnt[dbg_dqs_tc_i] - 1;
+    end
+  endgenerate
+
+  // record DQS gate IDELAY tap values
+  genvar dbg_gate_tc_i;
+  generate
+    for (dbg_gate_tc_i = 0; dbg_gate_tc_i < DQS_WIDTH;
+         dbg_gate_tc_i = dbg_gate_tc_i + 1) begin: gen_dbg_gate_tap_cnt
+      assign dbg_calib_gate_tap_cnt[(6*dbg_gate_tc_i)+5:(6*dbg_gate_tc_i)]
+               = dbg_gate_tap_cnt[dbg_gate_tc_i];
+      always @(posedge clkdiv)
+        if (rstdiv | dlyrst_gate[dbg_gate_tc_i])
+          dbg_gate_tap_cnt[dbg_gate_tc_i] <= 6'b000000;
+        else
+          if (dlyce_gate[dbg_gate_tc_i])
+            if (dlyinc_gate[dbg_gate_tc_i])
+              dbg_gate_tap_cnt[dbg_gate_tc_i]
+                <= dbg_gate_tap_cnt[dbg_gate_tc_i] + 1;
+            else
+              dbg_gate_tap_cnt[dbg_gate_tc_i]
+                <= dbg_gate_tap_cnt[dbg_gate_tc_i] - 1;
+    end
+  endgenerate
+
+  assign dbg_calib_done        = calib_done;
+  assign dbg_calib_err         = calib_err;
+  assign dbg_calib_rd_data_sel = cal2_rd_data_sel;
+  assign dbg_calib_rden_dly    = rden_dly;
+  assign dbg_calib_gate_dly    = gate_dly;
+
+  //***************************************************************************
+  // Read data pipelining, and read data "ISERDES" data width expansion
+  //***************************************************************************
+
+  // For all data bits, register incoming capture data to slow clock to improve
+  // timing. Adding single pipeline stage does not affect functionality (as
+  // long as we make sure to wait extra clock cycle after changing DQ IDELAY)
+  // Also note in this case that we're "missing" every other clock cycle's
+  // worth of data capture since we're sync'ing to the slow clock. This is
+  // fine for stage 1 and stage 2 cal, but not for stage 3 and 4 (see below
+  // for different circuit to handle those stages)
+  always @(posedge clkdiv) begin
+    rd_data_rise_1x_r <= rd_data_rise;
+    rd_data_fall_1x_r <= rd_data_fall;
+  end
+
+  // For every DQ_PER_DQS bit, generate what is essentially a ISERDES-type
+  // data width expander. Will need this for stage 3 and 4 cal, where we need
+  // to compare data over consecutive clock cycles. We can also use this for
+  // stage 2 as well (stage 2 doesn't require every bit to be looked at, only
+  // one bit per DQS group)
+  genvar rdd_i;
+  generate
+    for (rdd_i = 0; rdd_i < DQS_WIDTH; rdd_i = rdd_i + 1) begin: gen_rdd
+      // first stage: keep data in fast clk domain. Store data over two
+      // consecutive clock cycles for rise/fall data for proper transfer
+      // to slow clock domain
+      always @(posedge clk) begin
+        rd_data_rise_2x_r[rdd_i] <= rd_data_rise[(rdd_i*DQ_PER_DQS)];
+        rd_data_fall_2x_r[rdd_i] <= rd_data_fall[(rdd_i*DQ_PER_DQS)];
+      end
+      // second stage, register first stage to slow clock domain, 2nd stage
+      // consists of both these flops, and the rd_data_rise_1x_r flops
+      always @(posedge clkdiv) begin
+        rd_data_rise_1x_r1[rdd_i] <= rd_data_rise_2x_r[rdd_i];
+        rd_data_fall_1x_r1[rdd_i] <= rd_data_fall_2x_r[rdd_i];
+      end
+      // now we have four outputs - representing rise/fall outputs over last
+      // 2 fast clock cycles. However, the ordering these represent can either
+      // be: (1) Q2 = data @ time = n, Q1 = data @ time = n+1, or (2)
+      // Q2 = data @ time = n - 1, Q1 = data @ time = n (and data at [Q1,Q2]
+      // is "staggered") - leave it up to the stage of calibration using this
+      // to figure out which is which, if they care at all (e.g. stage 2 cal
+      // doesn't care about the ordering)
+      assign rd_data_rise_chk_q1[rdd_i]
+               = rd_data_rise_1x_r[(rdd_i*DQ_PER_DQS)];
+      assign rd_data_rise_chk_q2[rdd_i]
+               = rd_data_rise_1x_r1[rdd_i];
+      assign rd_data_fall_chk_q1[rdd_i]
+               = rd_data_fall_1x_r[(rdd_i*DQ_PER_DQS)];
+      assign rd_data_fall_chk_q2[rdd_i]
+               = rd_data_fall_1x_r1[rdd_i];
+    end
+  endgenerate
+
+  //*****************************************************************
+  // Outputs of these simplified ISERDES circuits then feed MUXes based on
+  // which DQ the current calibration algorithm needs to look at
+  //*****************************************************************
+
+  // generate MUX control; assume that adding an extra pipeline stage isn't
+  // an issue - whatever stage cal logic is using output of MUX will wait
+  // enough time after changing it
+  always @(posedge clkdiv) begin
+    (* full_case, parallel_case *) case (calib_done[2:0])
+      3'b001: rdd_mux_sel <= next_count_dqs;
+      3'b011: rdd_mux_sel <= count_rden;
+      3'b111: rdd_mux_sel <= next_count_gate;
+    endcase
+  end
+
+  always @(posedge clkdiv) begin
+    rdd_rise_q1 <= rd_data_rise_chk_q1[rdd_mux_sel];
+    rdd_rise_q2 <= rd_data_rise_chk_q2[rdd_mux_sel];
+    rdd_fall_q1 <= rd_data_fall_chk_q1[rdd_mux_sel];
+    rdd_fall_q2 <= rd_data_fall_chk_q2[rdd_mux_sel];
+  end
+
+  //***************************************************************************
+  // Demultiplexor to control (reset, increment, decrement) IDELAY tap values
+  //   For DQ:
+  //     STG1: for per-bit-deskew, only inc/dec the current DQ. For non-per
+  //       deskew, increment all bits in the current DQS set
+  //     STG2: inc/dec all DQ's in the current DQS set.
+  // NOTE: Nice to add some error checking logic here (or elsewhere in the
+  //       code) to check if logic attempts to overflow tap value
+  //***************************************************************************
+
+  // don't use DLYRST to reset value of IDELAY after reset. Need to change this
+  // if we want to allow user to recalibrate after initial reset
+  always @(posedge clkdiv)
+    if (rstdiv) begin
+      dlyrst_dq <= 1'b1;
+      dlyrst_dqs <= 1'b1;
+    end else begin
+      dlyrst_dq <= 1'b0;
+      dlyrst_dqs <= 1'b0;
+    end
+
+  always @(posedge clkdiv) begin
+    if (rstdiv) begin
+      dlyce_dq   <= 'b0;
+      dlyinc_dq  <= 'b0;
+      dlyce_dqs  <= 'b0;
+      dlyinc_dqs <= 'b0;
+    end else begin
+      dlyce_dq   <= 'b0;
+      dlyinc_dq  <= 'b0;
+      dlyce_dqs  <= 'b0;
+      dlyinc_dqs <= 'b0;
+
+      // stage 1 cal: change only specified DQ
+      if (cal1_dlyce_dq) begin
+        if (SIM_ONLY == 0) begin
+          dlyce_dq[count_dq] <= 1'b1;
+          dlyinc_dq[count_dq] <= cal1_dlyinc_dq;
+        end else begin
+          // if simulation, then calibrate only first DQ, apply results
+          // to all DQs (i.e. assume delay on all DQs is the same)
+          for (i = 0; i < DQ_WIDTH; i = i + 1) begin: loop_sim_dq_dly
+            dlyce_dq[i] <= 1'b1;
+            dlyinc_dq[i] <= cal1_dlyinc_dq;
+          end
+        end
+      end else if (cal2_dlyce_dqs) begin
+        // stage 2 cal: change DQS and all corresponding DQ's
+        if (SIM_ONLY == 0) begin
+          dlyce_dqs[count_dqs] <= 1'b1;
+          dlyinc_dqs[count_dqs] <= cal2_dlyinc_dqs;
+          for (i = 0; i < DQ_PER_DQS; i = i + 1) begin: loop_dqs_dly
+            dlyce_dq[(DQ_PER_DQS*count_dqs)+i] <= 1'b1;
+            dlyinc_dq[(DQ_PER_DQS*count_dqs)+i] <= cal2_dlyinc_dqs;
+          end
+        end else begin
+          for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_dqs_dly
+            // if simulation, then calibrate only first DQS
+            dlyce_dqs[i] <= 1'b1;
+            dlyinc_dqs[i] <= cal2_dlyinc_dqs;
+            for (j = 0; j < DQ_PER_DQS; j = j + 1) begin: loop_sim_dq_dqs_dly
+              dlyce_dq[(DQ_PER_DQS*i)+j] <= 1'b1;
+              dlyinc_dq[(DQ_PER_DQS*i)+j] <= cal2_dlyinc_dqs;
+            end
+          end
+        end
+      end else if (DEBUG_EN != 0) begin
+        // DEBUG: allow user to vary IDELAY tap settings
+        // For DQ IDELAY taps
+        if (dbg_idel_up_all || dbg_idel_down_all ||
+            dbg_sel_all_idel_dq) begin
+          for (x = 0; x < DQ_WIDTH; x = x + 1) begin: loop_dly_inc_dq
+            dlyce_dq[x] <= dbg_idel_up_all | dbg_idel_down_all |
+                           dbg_idel_up_dq  | dbg_idel_down_dq;
+            dlyinc_dq[x] <= dbg_idel_up_all | dbg_idel_up_dq;
+          end
+        end else begin
+          dlyce_dq <= 'b0;
+          dlyce_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq |
+                                       dbg_idel_down_dq;
+          dlyinc_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq;
+        end
+        // For DQS IDELAY taps
+        if (dbg_idel_up_all || dbg_idel_down_all ||
+            dbg_sel_all_idel_dqs) begin
+          for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_dqs
+            dlyce_dqs[x] <= dbg_idel_up_all | dbg_idel_down_all |
+                            dbg_idel_up_dqs | dbg_idel_down_dqs;
+            dlyinc_dqs[x] <= dbg_idel_up_all | dbg_idel_up_dqs;
+          end
+        end else begin
+          dlyce_dqs <= 'b0;
+          dlyce_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs |
+                                         dbg_idel_down_dqs;
+          dlyinc_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs;
+        end
+      end
+    end
+  end
+
+  // GATE synchronization is handled directly by Stage 4 calibration FSM
+  always @(posedge clkdiv)
+    if (rstdiv) begin
+      dlyrst_gate <= {DQS_WIDTH{1'b1}};
+      dlyce_gate  <= {DQS_WIDTH{1'b0}};
+      dlyinc_gate <= {DQS_WIDTH{1'b0}};
+    end else begin
+      dlyrst_gate <= {DQS_WIDTH{1'b0}};
+      dlyce_gate  <= {DQS_WIDTH{1'b0}};
+      dlyinc_gate <= {DQS_WIDTH{1'b0}};
+
+      if (cal4_dlyrst_gate) begin
+        if (SIM_ONLY == 0)
+          dlyrst_gate[count_gate] <= 1'b1;
+        else
+          for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly_rst
+            dlyrst_gate[i] <= 1'b1;
+          end
+      end
+
+      if (cal4_dlyce_gate) begin
+        if (SIM_ONLY == 0) begin
+          dlyce_gate[count_gate]  <= 1'b1;
+          dlyinc_gate[count_gate] <= cal4_dlyinc_gate;
+        end else begin
+          // if simulation, then calibrate only first gate
+          for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly
+            dlyce_gate[i]  <= 1'b1;
+            dlyinc_gate[i] <= cal4_dlyinc_gate;
+          end
+        end
+      end else if (DEBUG_EN != 0) begin
+        // DEBUG: allow user to vary IDELAY tap settings
+        if (dbg_idel_up_all || dbg_idel_down_all ||
+            dbg_sel_all_idel_gate) begin
+          for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_gate
+            dlyce_gate[x] <= dbg_idel_up_all | dbg_idel_down_all |
+                             dbg_idel_up_gate | dbg_idel_down_gate;
+            dlyinc_gate[x] <= dbg_idel_up_all | dbg_idel_up_gate;
+          end
+        end else begin
+          dlyce_gate <= {DQS_WIDTH{1'b0}};
+          dlyce_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate |
+                                           dbg_idel_down_gate;
+          dlyinc_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate;
+        end
+      end
+    end
+
+  //***************************************************************************
+  // signal to tell calibration state machines to wait and give IDELAY time to
+  // settle after it's value is changed (both time for IDELAY chain to settle,
+  // and for settled output to propagate through ISERDES). For general use: use
+  // for any calibration state machines that modify any IDELAY.
+  // Should give at least enough time for IDELAY output to settle (technically
+  // for V5, this should be "glitchless" when IDELAY taps are changed, so don't
+  // need any time here), and also time for new data to propagate through both
+  // ISERDES and the "RDD" MUX + associated pipelining
+  // For now, give very "generous" delay - doesn't really matter since only
+  // needed during calibration
+  //***************************************************************************
+
+  // determine if calibration polarity has changed
+  always @(posedge clkdiv)
+    cal2_rd_data_sel_r   <= cal2_rd_data_sel;
+
+  assign cal2_rd_data_sel_edge = |(cal2_rd_data_sel ^ cal2_rd_data_sel_r);
+
+  // combine requests to modify any of the IDELAYs into one. Also when second
+  // stage capture "edge" polarity is changed (IDELAY isn't changed in this
+  // case, but use the same counter to stall cal logic)
+  assign dlyce_or = cal1_dlyce_dq |
+                    cal2_dlyce_dqs |
+                    cal2_rd_data_sel_edge |
+                    cal4_dlyce_gate |
+                    cal4_dlyrst_gate;
+
+  // SYN_NOTE: Can later recode to avoid combinational path
+  assign idel_set_wait = dlyce_or || (idel_set_cnt != IDEL_SET_VAL);
+
+  always @(posedge clkdiv)
+    if (rstdiv)
+      idel_set_cnt <= 4'b0000;
+    else if (dlyce_or)
+      idel_set_cnt <= 4'b0000;
+    else if (idel_set_cnt != IDEL_SET_VAL)
+      idel_set_cnt <= idel_set_cnt + 1;
+
+  // generate request to PHY_INIT logic to issue auto-refresh
+  // used by certain states to force prech/auto-refresh part way through
+  // calibration to avoid a tRAS violation (which will happen if that
+  // stage of calibration lasts long enough). This signal must meet the
+  // following requirements: (1) only transition from 0->1 when the refresh
+  // request is needed, (2) stay at 1 and only transition 1->0 when
+  // CALIB_REF_DONE is asserted
+  always @(posedge clkdiv)
+    if (rstdiv)
+      calib_ref_req <= 1'b0;
+    else
+      calib_ref_req <= cal1_ref_req | cal2_ref_req  | cal4_ref_req;
+
+  // stage 1 calibration requests auto-refresh every 4 bits
+  generate
+    if (DQ_BITS < 2) begin: gen_cal1_refresh_dq_lte4
+      assign cal1_refresh = 1'b0;
+    end else begin: gen_cal1_refresh_dq_gt4
+      assign cal1_refresh = (next_count_dq[1:0] == 2'b00);
+    end
+  endgenerate
+
+  //***************************************************************************
+  // First stage calibration: DQ-DQS
+  // Definitions:
+  //  edge: detected when varying IDELAY, and current capture data != prev
+  //    capture data
+  //  valid bit window: detected when current capture data == prev capture
+  //    data for more than half the bit time
+  //  starting conditions for DQS-DQ phase:
+  //    case 1: when DQS starts somewhere in rising edge bit window, or
+  //      on the right edge of the rising bit window.
+  //    case 2: when DQS starts somewhere in falling edge bit window, or
+  //      on the right edge of the falling bit window.
+  // Algorithm Description:
+  //  1. Increment DQ IDELAY until we find an edge.
+  //  2. While we're finding the first edge, note whether a valid bit window
+  //     has been detected before we found an edge. If so, then figure out if
+  //     this is the rising or falling bit window. If rising, then our starting
+  //     DQS-DQ phase is case 1. If falling, then it's case 2. If don't detect
+  //     a valid bit window, then we must have started on the edge of a window.
+  //     Need to wait until later on to decide which case we are.
+  //       - Store FIRST_EDGE IDELAY value
+  //  3. Now look for second edge.
+  //  4. While we're finding the second edge, note whether valid bit window
+  //     is detected. If so, then use to, along with results from (2) to figure
+  //     out what the starting case is. If in rising bit window, then we're in
+  //     case 2. If falling, then case 1.
+  //       - Store SECOND_EDGE IDELAY value
+  //     NOTES:
+  //       a. Finding two edges allows us to calculate the bit time (although
+  //          not the "same" bit time polarity - need to investigate this
+  //          more).
+  //       b. If we run out of taps looking for the second edge, then the bit
+  //       time must be too long (>= 2.5ns, and DQS-DQ starting phase must be
+  //       case 1).
+  //  5. Calculate absolute amount to delay DQ as:
+  //       If second edge found, and case 1:
+  //         - DQ_IDELAY = FIRST_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE)
+  //       If second edge found, and case 2:
+  //         - DQ_IDELAY = SECOND_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE)
+  //       If second edge not found, then need to make an approximation on
+  //       how much to shift by (should be okay, because we have more timing
+  //       margin):
+  //         - DQ_IDELAY = FIRST_EDGE - 0.5 * (bit_time)
+  //     NOTE: Does this account for either case 1 or case 2?????
+  //     NOTE: It's also possible even when we find the second edge, that
+  //           to instead just use half the bit time to subtract from either
+  //           FIRST or SECOND_EDGE. Finding the actual bit time (which is
+  //           what (SECOND_EDGE - FIRST_EDGE) is, is slightly more accurate,
+  //           since it takes into account duty cycle distortion.
+  //  6. Repeat for each DQ in current DQS set.
+  //***************************************************************************
+
+  //*****************************************************************
+  // for first stage calibration - used for checking if DQS is aligned to the
+  // particular DQ, such that we're in the data valid window. Basically, this
+  // is one giant MUX.
+  //  = [falling data, rising data]
+  //  = [0, 1] = rising DQS aligned in proper (rising edge) bit window
+  //  = [1, 0] = rising DQS aligned in wrong (falling edge) bit window
+  //  = [0, 0], or [1,1] = in uncertain region between windows
+  //*****************************************************************
+
+  // SYN_NOTE: May have to split this up into multiple levels - MUX can get
+  //  very wide - as wide as the data bus width
+  always @(posedge clkdiv)
+    cal1_data_chk_r <= {rd_data_fall_1x_r[next_count_dq],
+                       rd_data_rise_1x_r[next_count_dq]};
+
+  //*****************************************************************
+  // determine when an edge has occurred - when either the current value
+  // is different from the previous latched value or when the DATA_CHK
+  // outputs are the same (rare, but indicates that we're at an edge)
+  // This is only valid when the IDELAY output and propagation of the
+  // data through the capture flops has had a chance to settle out.
+  //*****************************************************************
+
+  // write CAL1_DETECT_EDGE and CAL1_DETECT_STABLE in such a way that
+  // if X's are captured on the bus during functional simulation, that
+  // the logic will register this as an edge detected. Do this to allow
+  // use of this HDL with Denali memory models (Denali models drive DQ
+  // to X's on both edges of the data valid window to simulate jitter)
+  // This is only done for functional simulation purposes. **Should not**
+  // make the final synthesized logic more complicated, but it does make
+  // the HDL harder to understand b/c we have to "phrase" the logic
+  // slightly differently than when not worrying about X's
+  always @(*) begin
+    // no edge found if: (1) we have recorded prev edge, and rise
+    // data == fall data, (2) we haven't yet recorded prev edge, but
+    // rise/fall data is equal to either [0,1] or [1,0] (i.e. rise/fall
+    // data isn't either X's, or [0,0] or [1,1], which indicates we're
+    // in the middle of an edge, since normally rise != fall data for stg1)
+    if ((cal1_data_chk_last_valid &&
+         (cal1_data_chk_r == cal1_data_chk_last)) ||
+        (!cal1_data_chk_last_valid &&
+         ((cal1_data_chk_r == 2'b01) || (cal1_data_chk_r == 2'b10))))
+      cal1_detect_edge = 1'b0;
+    else
+      cal1_detect_edge = 1'b1;
+  end
+
+  always @(*) begin
+    // assert if we've found a region where data valid window is stable
+    // over consecutive IDELAY taps, and either rise/fall = [1,0], or [0,1]
+    if ((cal1_data_chk_last_valid &&
+         (cal1_data_chk_r == cal1_data_chk_last)) &&
+        ((cal1_data_chk_r == 2'b01) || (cal1_data_chk_r == 2'b10)))
+      cal1_detect_stable <= 1'b1;
+    else
+      cal1_detect_stable <= 1'b0;
+  end
+
+  //*****************************************************************
+  // Find valid window: keep track of how long we've been in the same data
+  // window. If it's been long enough, then declare that we've found a valid
+  // window. Also returns whether we found a rising or falling window (only
+  // valid when found_window is asserted)
+  //*****************************************************************
+
+  always @(posedge clkdiv) begin
+    if (cal1_state == CAL1_INIT) begin
+      cal1_window_cnt   <= 4'b0000;
+      cal1_found_window <= 1'b0;
+      cal1_found_rising <= 1'bx;
+    end else if (!cal1_data_chk_last_valid) begin
+      // if we haven't stored a previous value of CAL1_DATA_CHK (or it got
+      // invalidated because we detected an edge, and are now looking for the
+      // second edge), then make sure FOUND_WINDOW deasserted on following
+      // clock edge (to avoid finding a false window immediately after finding
+      // an edge). Note that because of jitter, it's possible to not find an
+      // edge at the end of the IDELAY increment settling time, but to find an
+      // edge on the next clock cycle (e.g. during CAL1_FIND_FIRST_EDGE)
+      cal1_window_cnt   <= 4'b0000;
+      cal1_found_window <= 1'b0;
+      cal1_found_rising <= 1'bx;
+    end else if (((cal1_state == CAL1_FIRST_EDGE_IDEL_WAIT) ||
+                  (cal1_state == CAL1_SECOND_EDGE_IDEL_WAIT)) &&
+                 !idel_set_wait) begin
+      // while finding the first and second edges, see if we can detect a
+      // stable bit window (occurs over MIN_WIN_SIZE number of taps). If
+      // so, then we're away from an edge, and can conclusively determine the
+      // starting DQS-DQ phase.
+      if (cal1_detect_stable) begin
+        cal1_window_cnt <= cal1_window_cnt + 1;
+        if (cal1_window_cnt == MIN_WIN_SIZE-1) begin
+          cal1_found_window <= 1'b1;
+          if (cal1_data_chk_r == 2'b01)
+            cal1_found_rising <= 1'b1;
+          else
+            cal1_found_rising <= 1'b0;
+        end
+      end else begin
+        // otherwise, we're not in a data valid window, reset the window
+        // counter, and indicate we're not currently in window. This should
+        // happen by design at least once after finding the first edge.
+        cal1_window_cnt <= 4'b0000;
+        cal1_found_window <= 1'b0;
+        cal1_found_rising <= 1'bx;
+      end
+    end
+  end
+
+  //*****************************************************************
+  // keep track of edge tap counts found, and whether we've
+  // incremented to the maximum number of taps allowed
+  //*****************************************************************
+
+  always @(posedge clkdiv)
+    if (cal1_state == CAL1_INIT) begin
+      cal1_idel_tap_limit_hit   <= 1'b0;
+      cal1_idel_tap_cnt   <= 6'b000000;
+    end else if (cal1_dlyce_dq) begin
+      if (cal1_dlyinc_dq) begin
+        cal1_idel_tap_cnt <= cal1_idel_tap_cnt + 1;
+        cal1_idel_tap_limit_hit <= (cal1_idel_tap_cnt == 6'b111110);
+      end else begin
+        cal1_idel_tap_cnt <= cal1_idel_tap_cnt - 1;
+        cal1_idel_tap_limit_hit <= 1'b0;
+      end
+    end
+
+  //*****************************************************************
+  // Pipeline for better timing - amount to decrement by if second
+  // edge not found
+  //*****************************************************************
+  // if only one edge found (possible for low frequencies), then:
+  //  1. Assume starting DQS-DQ phase has DQS in DQ window (aka "case 1")
+  //  2. We have to decrement by (63 - first_edge_tap_cnt) + (BIT_TIME_TAPS/2)
+  //     (i.e. decrement by 63-first_edge_tap_cnt to get to right edge of
+  //     DQ window. Then decrement again by (BIT_TIME_TAPS/2) to get to center
+  //     of DQ window.
+  //  3. Clamp the above value at 63 to ensure we don't underflow IDELAY
+  //     (note: clamping happens in the CAL1 state machine)
+  always @(posedge clkdiv)
+    cal1_low_freq_idel_dec
+      <= (7'b0111111 - {1'b0, cal1_first_edge_tap_cnt}) +
+         (BIT_TIME_TAPS/2);
+
+  //*****************************************************************
+  // Keep track of max taps used during stage 1, use this to limit
+  // the number of taps that can be used in stage 2
+  //*****************************************************************
+
+  always @(posedge clkdiv)
+    if (rstdiv) begin
+      cal1_idel_max_tap    <= 6'b000000;
+      cal1_idel_max_tap_we <= 1'b0;
+    end else begin
+      // pipeline latch enable for CAL1_IDEL_MAX_TAP - we have plenty
+      // of time, tap count gets updated, then dead cycles waiting for
+      // IDELAY output to settle
+      cal1_idel_max_tap_we <= (cal1_idel_max_tap < cal1_idel_tap_cnt);
+      // record maximum # of taps used for stg 1 cal
+      if ((cal1_state == CAL1_DONE) && cal1_idel_max_tap_we)
+        cal1_idel_max_tap <= cal1_idel_tap_cnt;
+    end
+
+  //*****************************************************************
+
+  always @(posedge clkdiv)
+    if (rstdiv) begin
+      calib_done[0]            <= 1'b0;
+      calib_done_tmp[0]        <= 1'bx;
+      calib_err[0]             <= 1'b0;
+      count_dq                 <= {DQ_BITS{1'b0}};
+      next_count_dq            <= {DQ_BITS{1'b0}};
+      cal1_bit_time_tap_cnt    <= 6'bxxxxxx;
+      cal1_data_chk_last       <= 2'bxx;
+      cal1_data_chk_last_valid <= 1'bx;
+      cal1_dlyce_dq            <= 1'b0;
+      cal1_dlyinc_dq           <= 1'b0;
+      cal1_dqs_dq_init_phase   <= 1'bx;
+      cal1_first_edge_done     <= 1'bx;
+      cal1_found_second_edge   <= 1'bx;
+      cal1_first_edge_tap_cnt  <= 6'bxxxxxx;
+      cal1_idel_dec_cnt        <= 7'bxxxxxxx;
+      cal1_idel_inc_cnt        <= 6'bxxxxxx;
+      cal1_ref_req             <= 1'b0;
+      cal1_state               <= CAL1_IDLE;
+    end else begin
+      // default values for all "pulse" outputs
+      cal1_ref_req        <= 1'b0;
+      cal1_dlyce_dq       <= 1'b0;
+      cal1_dlyinc_dq      <= 1'b0;
+
+      case (cal1_state)
+        CAL1_IDLE: begin
+          count_dq      <= {DQ_BITS{1'b0}};
+          next_count_dq <= {DQ_BITS{1'b0}};
+          if (calib_start[0]) begin
+            calib_done[0] <= 1'b0;
+            calib_done_tmp[0] <= 1'b0;
+            cal1_state    <= CAL1_INIT;
+          end
+        end
+
+        CAL1_INIT: begin
+          cal1_data_chk_last_valid <= 1'b0;
+          cal1_found_second_edge <= 1'b0;
+          cal1_dqs_dq_init_phase <= 1'b0;
+          cal1_idel_inc_cnt      <= 6'b000000;
+          cal1_state <= CAL1_INC_IDEL;
+        end
+
+        // increment DQ IDELAY so that either: (1) DQS starts somewhere in
+        // first rising DQ window, or (2) DQS starts in first falling DQ
+        // window. The amount to shift is frequency dependent (and is either
+        // precalculated by MIG or possibly adjusted by the user)
+        CAL1_INC_IDEL:
+          if ((cal1_idel_inc_cnt == DQ_IDEL_INIT) && !idel_set_wait) begin
+            cal1_state <= CAL1_FIND_FIRST_EDGE;
+          end else if (cal1_idel_inc_cnt != DQ_IDEL_INIT) begin
+            cal1_idel_inc_cnt <= cal1_idel_inc_cnt + 1;
+            cal1_dlyce_dq <= 1'b1;
+            cal1_dlyinc_dq <= 1'b1;
+          end
+
+        // look for first edge
+        CAL1_FIND_FIRST_EDGE: begin
+          // Determine DQS-DQ phase if we can detect enough of a valid window
+          if (cal1_found_window)
+            cal1_dqs_dq_init_phase <= ~cal1_found_rising;
+          // find first edge - if found then record position
+          if (cal1_detect_edge) begin
+            cal1_state <= CAL1_FOUND_FIRST_EDGE_WAIT;
+            cal1_first_edge_done   <= 1'b0;
+            cal1_first_edge_tap_cnt <= cal1_idel_tap_cnt;
+            cal1_data_chk_last_valid <= 1'b0;
+          end else begin
+            // otherwise, store the current value of DATA_CHK, increment
+            // DQ IDELAY, and compare again
+            cal1_state <= CAL1_FIRST_EDGE_IDEL_WAIT;
+            cal1_data_chk_last <= cal1_data_chk_r;
+            // avoid comparing against DATA_CHK_LAST for previous iteration
+            cal1_data_chk_last_valid <= 1'b1;
+            cal1_dlyce_dq <= 1'b1;
+            cal1_dlyinc_dq <= 1'b1;
+          end
+        end
+
+        // wait for DQ IDELAY to settle
+        CAL1_FIRST_EDGE_IDEL_WAIT:
+          if (!idel_set_wait)
+            cal1_state <= CAL1_FIND_FIRST_EDGE;
+
+        // delay state between finding first edge and looking for second
+        // edge. Necessary in order to invalidate CAL1_FOUND_WINDOW before
+        // starting to look for second edge
+        CAL1_FOUND_FIRST_EDGE_WAIT:
+          cal1_state <= CAL1_FIND_SECOND_EDGE;
+
+        // Try and find second edge
+        CAL1_FIND_SECOND_EDGE: begin
+          // When looking for 2nd edge, first make sure data stabilized (by
+          // detecting valid data window) - needed to avoid false edges
+          if (cal1_found_window) begin
+            cal1_first_edge_done <= 1'b1;
+            cal1_dqs_dq_init_phase <= cal1_found_rising;
+          end
+          // exit if run out of taps to increment
+          if (cal1_idel_tap_limit_hit)
+            cal1_state <= CAL1_CALC_IDEL;
+          else begin
+            // found second edge, record the current edge count
+            if (cal1_first_edge_done && cal1_detect_edge) begin
+              cal1_state <= CAL1_CALC_IDEL;
+              cal1_found_second_edge <= 1'b1;
+              cal1_bit_time_tap_cnt <= cal1_idel_tap_cnt -
+                                       cal1_first_edge_tap_cnt + 1;
+            end else begin
+              cal1_state <= CAL1_SECOND_EDGE_IDEL_WAIT;
+              cal1_data_chk_last <= cal1_data_chk_r;
+              cal1_data_chk_last_valid <= 1'b1;
+              cal1_dlyce_dq <= 1'b1;
+              cal1_dlyinc_dq <= 1'b1;
+            end
+          end
+        end
+
+        // wait for DQ IDELAY to settle, then store ISERDES output
+        CAL1_SECOND_EDGE_IDEL_WAIT:
+          if (!idel_set_wait)
+            cal1_state <= CAL1_FIND_SECOND_EDGE;
+
+        // pipeline delay state to calculate amount to decrement DQ IDELAY
+        // NOTE: We're calculating the amount to decrement by, not the
+        //  absolute setting for DQ IDELAY
+        CAL1_CALC_IDEL: begin
+          // if two edges found
+          if (cal1_found_second_edge)
+            // case 1: DQS was in DQ window to start with. First edge found
+            // corresponds to left edge of DQ rising window. Backup by 1.5*BT
+            // NOTE: In this particular case, it is possible to decrement
+            //  "below 0" in the case where DQS delay is less than 0.5*BT,
+            //  need to limit decrement to prevent IDELAY tap underflow
+            if (!cal1_dqs_dq_init_phase)
+              cal1_idel_dec_cnt <= {1'b0, cal1_bit_time_tap_cnt} +
+                                   {1'b0, (cal1_bit_time_tap_cnt >> 1)};
+            // case 2: DQS was in wrong DQ window (in DQ falling window).
+            // First edge found is right edge of DQ rising window. Second
+            // edge is left edge of DQ rising window. Backup by 0.5*BT
+            else
+              cal1_idel_dec_cnt <= {1'b0, (cal1_bit_time_tap_cnt >> 1)};
+          // if only one edge found - assume will always be case 1 - DQS in
+          // DQS window. Case 2 only possible if path delay on DQS > 5ns
+          else
+            cal1_idel_dec_cnt <= cal1_low_freq_idel_dec;
+          cal1_state <= CAL1_DEC_IDEL;
+        end
+
+        // decrement DQ IDELAY for final adjustment
+        CAL1_DEC_IDEL:
+          // once adjustment is complete, we're done with calibration for
+          // this DQ, now return to IDLE state and repeat for next DQ
+          // Add underflow protection for case of 2 edges found and DQS
+          // starting in DQ window (see comments for above state) - note we
+          // have to take into account delayed value of CAL1_IDEL_TAP_CNT -
+          // gets updated one clock cycle after CAL1_DLYCE/INC_DQ
+          if ((cal1_idel_dec_cnt == 7'b0000000) ||
+              (cal1_dlyce_dq && (cal1_idel_tap_cnt == 6'b000001))) begin
+            cal1_state <= CAL1_DONE;
+            // stop when all DQ's calibrated, or DQ[0] cal'ed (for sim)
+            if ((count_dq == DQ_WIDTH-1) || (SIM_ONLY != 0))
+              calib_done_tmp[0] <= 1'b1;
+            else
+              // need for VHDL simulation to prevent out-of-index error
+              next_count_dq <= count_dq + 1;
+          end else begin
+            // keep decrementing until final tap count reached
+            cal1_idel_dec_cnt <= cal1_idel_dec_cnt - 1;
+            cal1_dlyce_dq <= 1'b1;
+            cal1_dlyinc_dq <= 1'b0;
+          end
+
+        // delay state to allow count_dq and DATA_CHK to point to the next
+        // DQ bit (allows us to potentially begin checking for an edge on
+        // next DQ right away).
+        CAL1_DONE:
+          if (!idel_set_wait) begin
+            count_dq <= next_count_dq;
+            if (calib_done_tmp[0]) begin
+              calib_done[0] <= 1'b1;
+              cal1_state <= CAL1_IDLE;
+            end else begin
+              // request auto-refresh after every 8-bits calibrated to
+              // avoid tRAS violation
+              if (cal1_refresh) begin
+                cal1_ref_req <= 1'b1;
+                if (calib_ref_done)
+                  cal1_state <= CAL1_INIT;
+              end else
+                // if no need this time for refresh, proceed to next bit
+                cal1_state <= CAL1_INIT;
+            end
+          end
+      endcase
+    end
+
+  //***************************************************************************
+  // Second stage calibration: DQS-FPGA Clock
+  // Algorithm Description:
+  //  1. Assumes a training pattern that will produce a pattern oscillating at
+  //     half the core clock frequency each on rise and fall outputs, and such
+  //     that rise and fall outputs are 180 degrees out of phase from each
+  //     other. Note that since the calibration logic runs at half the speed
+  //     of the interface, expect that data sampled with the slow clock always
+  //     to be constant (either always = 1, or = 0, and rise data != fall data)
+  //     unless we cross the edge of the data valid window
+  //  2. Start by setting RD_DATA_SEL = 0. This selects the rising capture data
+  //     sync'ed to rising edge of core clock, and falling edge data sync'ed
+  //     to falling edge of core clock
+  //  3. Start looking for an edge. An edge is defined as either: (1) a
+  //     change in capture value or (2) an invalid capture value (e.g. rising
+  //     data != falling data for that same clock cycle).
+  //  4. If an edge is found, go to step (6). If edge hasn't been found, then
+  //     set RD_DATA_SEL = 1, and try again.
+  //  5. If no edge is found, then increment IDELAY and return to step (3)
+  //  6. If an edge if found, then invert RD_DATA_SEL - this shifts the
+  //     capture point 180 degrees from the edge of the window (minus duty
+  //     cycle distortion, delay skew between rising/falling edge capture
+  //     paths, etc.)
+  //  7. If no edge is found by CAL2_IDEL_TAP_LIMIT (= 63 - # taps used for
+  //     stage 1 calibration), then decrement IDELAY (without reinverting
+  //     RD_DATA_SEL) by CAL2_IDEL_TAP_LIMIT/2. This guarantees we at least
+  //     have CAL2_IDEL_TAP_LIMIT/2 of slack both before and after the
+  //     capture point (not optimal, but best we can do not having found an
+  //     of the window). This happens only for very low frequencies.
+  //  8. Repeat for each DQS group.
+  //  NOTE: Step 6 is not optimal. A better (and perhaps more complicated)
+  //   algorithm might be to find both edges of the data valid window (using
+  //   the same polarity of RD_DATA_SEL), and then decrement to the midpoint.
+  //***************************************************************************
+
+  // RD_DATA_SEL should be tagged with FROM-TO (multi-cycle) constraint in
+  // UCF file to relax timing. This net is "pseudo-static" (after value is
+  // changed, FSM waits number of cycles before using the output).
+  // Note that we are adding one clock cycle of delay (to isolate it from
+  // the other logic CAL2_RD_DATA_SEL feeds), make sure FSM waits long
+  // enough to compensate (by default it does, it waits a few cycles more
+  // than minimum # of clock cycles)
+  genvar rd_i;
+  generate
+    for (rd_i = 0; rd_i < DQS_WIDTH; rd_i = rd_i+1) begin: gen_rd_data_sel
+      FDRSE u_ff_rd_data_sel
+        (
+         .Q   (rd_data_sel[rd_i]),
+         .C   (clkdiv),
+         .CE  (1'b1),
+         .D   (cal2_rd_data_sel[rd_i]),
+         .R   (1'b0),
+         .S   (1'b0)
+         ) /* synthesis syn_preserve = 1 */
+           /* synthesis syn_replicate = 0 */;
+    end
+  endgenerate
+
+  //*****************************************************************
+  // Max number of taps used for stg2 cal dependent on number of taps
+  // used for stg1 (give priority to stg1 cal - let it use as many
+  // taps as it needs - the remainder of the IDELAY taps can be used
+  // by stg2)
+  //*****************************************************************
+
+  always @(posedge clkdiv)
+    cal2_idel_tap_limit <= 6'b111111 - cal1_idel_max_tap;
+
+  //*****************************************************************
+  // second stage calibration uses readback pattern of "1100" (i.e.
+  // 1st rising = 1, 1st falling = 1, 2nd rising = 0, 2nd falling = 0)
+  // only look at the first bit of each DQS group
+  //*****************************************************************
+
+  // deasserted when captured data has changed since IDELAY was
+  // incremented, or when we're right on the edge (i.e. rise data =
+  // fall data).
+  assign cal2_detect_edge =
+    ((((rdd_rise_q1 != cal2_rd_data_rise_last_pos) ||
+       (rdd_fall_q1 != cal2_rd_data_fall_last_pos)) &&
+      cal2_rd_data_last_valid_pos && (!cal2_curr_sel)) ||
+     (((rdd_rise_q1 != cal2_rd_data_rise_last_neg) ||
+       (rdd_fall_q1 != cal2_rd_data_fall_last_neg)) &&
+      cal2_rd_data_last_valid_neg && (cal2_curr_sel)) ||
+     (rdd_rise_q1 != rdd_fall_q1));
+
+  //*****************************************************************
+  // keep track of edge tap counts found, and whether we've
+  // incremented to the maximum number of taps allowed
+  // NOTE: Assume stage 2 cal always increments the tap count (never
+  //       decrements) when searching for edge of the data valid window
+  //*****************************************************************
+
+  always @(posedge clkdiv)
+    if (cal2_state == CAL2_INIT) begin
+      cal2_idel_tap_limit_hit <= 1'b0;
+      cal2_idel_tap_cnt <= 6'b000000;
+    end else if (cal2_dlyce_dqs) begin
+      cal2_idel_tap_cnt <= cal2_idel_tap_cnt + 1;
+      cal2_idel_tap_limit_hit <= (cal2_idel_tap_cnt ==
+                                  cal2_idel_tap_limit - 1);
+    end
+
+  //*****************************************************************
+
+  always @(posedge clkdiv)
+    if (rstdiv) begin
+      calib_done[1]               <= 1'b0;
+      calib_done_tmp[1]           <= 1'bx;
+      calib_err[1]                <= 1'b0;
+      count_dqs                   <= 'b0;
+      next_count_dqs              <= 'b0;
+      cal2_dlyce_dqs              <= 1'b0;
+      cal2_dlyinc_dqs             <= 1'b0;
+      cal2_idel_dec_cnt           <= 6'bxxxxxx;
+      cal2_rd_data_last_valid_neg <= 1'bx;
+      cal2_rd_data_last_valid_pos <= 1'bx;
+      cal2_rd_data_sel            <= 'b0;
+      cal2_ref_req                <= 1'b0;
+      cal2_state                  <= CAL2_IDLE;
+    end else begin
+      cal2_ref_req      <= 1'b0;
+      cal2_dlyce_dqs    <= 1'b0;
+      cal2_dlyinc_dqs   <= 1'b0;
+
+      case (cal2_state)
+        CAL2_IDLE: begin
+          count_dqs      <= 'b0;
+          next_count_dqs <= 'b0;
+          if (calib_start[1]) begin
+            cal2_rd_data_sel  <= {DQS_WIDTH{1'b0}};
+            calib_done[1]     <= 1'b0;
+            calib_done_tmp[1] <= 1'b0;
+            cal2_state        <= CAL2_INIT;
+          end
+        end
+
+        // Pass through this state every time we calibrate a new DQS group
+        CAL2_INIT: begin
+          cal2_curr_sel <= 1'b0;
+          cal2_rd_data_last_valid_neg <= 1'b0;
+          cal2_rd_data_last_valid_pos <= 1'b0;
+          cal2_state <= CAL2_INIT_IDEL_WAIT;
+        end
+
+        // Stall state only used if calibration run more than once. Can take
+        // this state out if design never runs calibration more than once.
+        // We need this state to give time for MUX'ed data to settle after
+        // resetting RD_DATA_SEL
+        CAL2_INIT_IDEL_WAIT:
+          if (!idel_set_wait)
+            cal2_state <= CAL2_FIND_EDGE_POS;
+
+        // Look for an edge - first check "positive-edge" stage 2 capture
+        CAL2_FIND_EDGE_POS: begin
+          // if found an edge, then switch to the opposite edge stage 2
+          // capture and we're done - no need to decrement the tap count,
+          // since switching to the opposite edge will shift the capture
+          // point by 180 degrees
+          if (cal2_detect_edge) begin
+            cal2_curr_sel <= 1'b1;
+            cal2_state <= CAL2_DONE;
+            // set all DQS groups to be the same for simulation
+            if (SIM_ONLY != 0)
+              cal2_rd_data_sel <= {DQS_WIDTH{1'b1}};
+            else
+              cal2_rd_data_sel[count_dqs] <= 1'b1;
+            if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
+              calib_done_tmp[1] <= 1'b1;
+           else
+              // MIG 2.1: Fix for simulation out-of-bounds error when
+              // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)  
+             next_count_dqs <= count_dqs + 1;
+          end else begin
+            // otherwise, invert polarity of stage 2 capture and look for
+            // an edge with opposite capture clock polarity
+            cal2_curr_sel <= 1'b1;
+            cal2_rd_data_sel[count_dqs] <= 1'b1;
+            cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_POS;
+            cal2_rd_data_rise_last_pos  <= rdd_rise_q1;
+            cal2_rd_data_fall_last_pos  <= rdd_fall_q1;
+            cal2_rd_data_last_valid_pos <= 1'b1;
+          end
+        end
+
+        // Give time to switch from positive-edge to negative-edge second
+        // stage capture (need time for data to filter though pipe stages)
+        CAL2_FIND_EDGE_IDEL_WAIT_POS:
+          if (!idel_set_wait)
+            cal2_state <= CAL2_FIND_EDGE_NEG;
+
+        // Look for an edge - check "negative-edge" stage 2 capture
+        CAL2_FIND_EDGE_NEG:
+          if (cal2_detect_edge) begin
+            cal2_curr_sel <= 1'b0;
+            cal2_state <= CAL2_DONE;
+            // set all DQS groups to be the same for simulation
+            if (SIM_ONLY != 0)
+              cal2_rd_data_sel <= {DQS_WIDTH{1'b0}};
+            else
+              cal2_rd_data_sel[count_dqs] <= 1'b0;
+            if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
+              calib_done_tmp[1] <= 1'b1;
+           else
+              // MIG 2.1: Fix for simulation out-of-bounds error when
+              // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
+             next_count_dqs <= count_dqs + 1;
+          end else if (cal2_idel_tap_limit_hit) begin
+            // otherwise, if we've run out of taps, then immediately
+            // backoff by half # of taps used - that's our best estimate
+            // for optimal calibration point. Doesn't matter whether which
+            // polarity we're using for capture (we don't know which one is
+            // best to use)
+            cal2_idel_dec_cnt <= {1'b0, cal2_idel_tap_limit[5:1]};
+            cal2_state <= CAL2_DEC_IDEL;
+            if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
+              calib_done_tmp[1] <= 1'b1;
+           else
+              // MIG 2.1: Fix for simulation out-of-bounds error when
+              // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
+             next_count_dqs <= count_dqs + 1;
+          end else begin
+            // otherwise, increment IDELAY, and start looking for edge again
+            cal2_curr_sel <= 1'b0;
+            cal2_rd_data_sel[count_dqs] <= 1'b0;
+            cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_NEG;
+            cal2_rd_data_rise_last_neg  <= rdd_rise_q1;
+            cal2_rd_data_fall_last_neg  <= rdd_fall_q1;
+            cal2_rd_data_last_valid_neg <= 1'b1;
+            cal2_dlyce_dqs  <= 1'b1;
+            cal2_dlyinc_dqs <= 1'b1;
+          end
+
+        CAL2_FIND_EDGE_IDEL_WAIT_NEG:
+          if (!idel_set_wait)
+            cal2_state <= CAL2_FIND_EDGE_POS;
+
+        // if no edge found, then decrement by half # of taps used
+        CAL2_DEC_IDEL: begin
+          if (cal2_idel_dec_cnt == 6'b000000)
+            cal2_state <= CAL2_DONE;
+          else begin
+            cal2_idel_dec_cnt <= cal2_idel_dec_cnt - 1;
+            cal2_dlyce_dqs  <= 1'b1;
+            cal2_dlyinc_dqs <= 1'b0;
+          end
+        end
+
+        // delay state to allow count_dqs and ISERDES data to point to next
+        // DQ bit (DQS group) before going to INIT
+        CAL2_DONE:
+          if (!idel_set_wait) begin
+            count_dqs <= next_count_dqs;
+            if (calib_done_tmp[1]) begin
+              calib_done[1] <= 1'b1;
+              cal2_state <= CAL2_IDLE;
+            end else begin
+              // request auto-refresh after every DQS group calibrated to
+              // avoid tRAS violation
+              cal2_ref_req <= 1'b1;
+              if (calib_ref_done)
+                cal2_state <= CAL2_INIT;
+            end
+          end
+      endcase
+    end
+
+  //***************************************************************************
+  // Stage 3 calibration: Read Enable
+  // Description:
+  // read enable calibration determines the "round-trip" time (in # of CLK0
+  // cycles) between when a read command is issued by the controller, and
+  // when the corresponding read data is synchronized by into the CLK0 domain
+  // this is a long delay chain to delay read enable signal from controller/
+  // initialization logic (i.e. this is used for both initialization and
+  // during normal controller operation). Stage 3 calibration logic decides
+  // which delayed version is appropriate to use (which is affected by the
+  // round trip delay of DQ/DQS) as a "valid" signal to tell rest of logic
+  // when the captured data output from ISERDES is valid.
+  //***************************************************************************
+
+  //*****************************************************************
+  // Delay chains: Use shift registers
+  // Two sets of delay chains are used:
+  //  1. One to delay RDEN from PHY_INIT module for calibration
+  //     purposes (delay required for RDEN for calibration is different
+  //     than during normal operation)
+  //  2. One per DQS group to delay RDEN from controller for normal
+  //     operation - the value to delay for each DQS group can be different
+  //     as is determined during calibration
+  //*****************************************************************
+
+  //*****************************************************************
+  // First delay chain, use only for calibration
+  // input = asserted on rising edge of RDEN from PHY_INIT module
+  //*****************************************************************
+
+  always @(posedge clk) begin
+    ctrl_rden_r       <= ctrl_rden;
+    phy_init_rden_r   <= phy_init_rden;
+    phy_init_rden_r1  <= phy_init_rden_r;
+    calib_rden_edge_r <= phy_init_rden_r & ~phy_init_rden_r1;
+  end
+
+  // Calibration shift register used for both Stage 3 and Stage 4 cal
+  // (not strictly necessary for stage 4, but use as an additional check
+  // to make sure we're checking for correct data on the right clock cycle)
+  always @(posedge clkdiv)
+    if (!calib_done[2])
+      calib_rden_srl_a <= cal3_rden_srl_a;
+    else
+      calib_rden_srl_a <= cal4_rden_srl_a;
+
+  // Flops for targetting of multi-cycle path in UCF
+  genvar cal_rden_ff_i;
+  generate
+    for (cal_rden_ff_i = 0; cal_rden_ff_i < 5;
+         cal_rden_ff_i = cal_rden_ff_i+1) begin: gen_cal_rden_dly
+      FDRSE u_ff_cal_rden_dly
+        (
+         .Q   (calib_rden_srl_a_r[cal_rden_ff_i]),
+         .C   (clkdiv),
+         .CE  (1'b1),
+         .D   (calib_rden_srl_a[cal_rden_ff_i]),
+         .R   (1'b0),
+         .S   (1'b0)
+         ) /* synthesis syn_preserve = 1 */
+           /* synthesis syn_replicate = 0 */;
+    end
+  endgenerate
+
+  SRLC32E u_calib_rden_srl
+    (
+     .Q   (calib_rden_srl_out),
+     .Q31 (),
+     .A   (calib_rden_srl_a_r),
+     .CE  (1'b1),
+     .CLK (clk),
+     .D   (calib_rden_edge_r)
+     );
+
+  FDRSE u_calib_rden_srl_out_r
+    (
+         .Q   (calib_rden_srl_out_r),
+         .C   (clk),
+         .CE  (1'b1),
+         .D   (calib_rden_srl_out),
+         .R   (1'b0),
+         .S   (1'b0)
+     ) /* synthesis syn_preserve = 1 */;
+
+  // convert to CLKDIV domain. Two version are generated because we need
+  // to be able to tell exactly which fast (clk) clock cycle the read
+  // enable was asserted in. Only one of CALIB_DATA_VALID or
+  // CALIB_DATA_VALID_STGD will be asserted for any given shift value
+  always @(posedge clk)
+    calib_rden_srl_out_r1 <= calib_rden_srl_out_r;
+
+  always @(posedge clkdiv) begin
+    calib_rden_valid      <= calib_rden_srl_out_r;
+    calib_rden_valid_stgd <= calib_rden_srl_out_r1;
+  end
+
+  //*****************************************************************
+  // Second set of delays chain, use for normal reads
+  // input = RDEN from controller
+  //*****************************************************************
+
+  // Flops for targetting of multi-cycle path in UCF
+  genvar rden_ff_i;
+  generate
+    for (rden_ff_i = 0; rden_ff_i < 5*DQS_WIDTH;
+         rden_ff_i = rden_ff_i+1) begin: gen_rden_dly
+      FDRSE u_ff_rden_dly
+        (
+         .Q   (rden_dly_r[rden_ff_i]),
+         .C   (clkdiv),
+         .CE  (1'b1),
+         .D   (rden_dly[rden_ff_i]),
+         .R   (1'b0),
+         .S   (1'b0)
+         ) /* synthesis syn_preserve = 1 */
+           /* synthesis syn_replicate = 0 */;
+    end
+  endgenerate
+
+  // NOTE: Comment this section explaining purpose of SRL's
+  genvar rden_i;
+  generate
+    for (rden_i = 0; rden_i < DQS_WIDTH; rden_i = rden_i + 1) begin: gen_rden
+      SRLC32E u_rden_srl
+        (
+         .Q   (rden_srl_out[rden_i]),
+         .Q31 (),
+         .A   ({rden_dly_r[(rden_i*5)+4],
+                rden_dly_r[(rden_i*5)+3],
+                rden_dly_r[(rden_i*5)+2],
+                rden_dly_r[(rden_i*5)+1],
+                rden_dly_r[(rden_i*5)]}),
+         .CE  (1'b1),
+         .CLK (clk),
+         .D   (ctrl_rden_r)
+         );
+      FDRSE u_calib_rden_r
+        (
+         .Q   (calib_rden[rden_i]),
+         .C   (clk),
+         .CE  (1'b1),
+         .D   (rden_srl_out[rden_i]),
+         .R   (1'b0),
+         .S   (1'b0)
+         ) /* synthesis syn_preserve = 1 */;
+    end
+  endgenerate
+
+  //*****************************************************************
+  // indicates that current received data is the correct pattern. Check both
+  // rising and falling data for first DQ in each DQS group. Note that
+  // we're checking using a pipelined version of read data, so need to take
+  // this inherent delay into account in determining final read valid delay
+  // Data is written to the memory in the following order (first -> last):
+  //   0x1, 0xE, 0xE, 0x1, 0x1, 0xE, 0xE, 0x1
+  // Looking just at LSb, expect data in sequence (in binary):
+  //   1, 0, 0, 1, 1, 0, 0, 1
+  // Check for the presence of the first 7 words, and compensate read valid
+  // delay accordingly. Don't check last falling edge data, it may be
+  // corrupted by the DQS tri-state glitch at end of read postamble
+  // (glitch protection not yet active until stage 4 cal)
+  //*****************************************************************
+
+  always @(posedge clkdiv) begin
+    rdd_rise_q1_r  <= rdd_rise_q1;
+    rdd_fall_q1_r  <= rdd_fall_q1;
+    rdd_rise_q2_r  <= rdd_rise_q2;
+    rdd_fall_q2_r  <= rdd_fall_q2;
+    rdd_rise_q1_r1 <= rdd_rise_q1_r;
+    rdd_fall_q1_r1 <= rdd_fall_q1_r;
+  end
+
+  always @(posedge clkdiv) begin
+    // For the following sequence from memory:
+    //   rise[0], fall[0], rise[1], fall[1]
+    // if data is aligned out of fabric ISERDES:
+    //   RDD_RISE_Q2 = rise[0]
+    //   RDD_FALL_Q2 = fall[0]
+    //   RDD_RISE_Q1 = rise[1]
+    //   RDD_FALL_Q1 = fall[1]
+    cal3_data_match <= ((rdd_rise_q2_r == 1) &&
+                        (rdd_fall_q2_r == 0) &&
+                        (rdd_rise_q1_r == 0) &&
+                        (rdd_fall_q1_r == 1) &&
+                        (rdd_rise_q2   == 1) &&
+                        (rdd_fall_q2   == 0) &&
+                        (rdd_rise_q1   == 0));
+
+    // if data is staggered out of fabric ISERDES:
+    //   RDD_RISE_Q1_R = rise[0]
+    //   RDD_FALL_Q1_R = fall[0]
+    //   RDD_RISE_Q2   = rise[1]
+    //   RDD_FALL_Q2   = fall[1]
+    cal3_data_match_stgd <= ((rdd_rise_q1_r1 == 1) &&
+                             (rdd_fall_q1_r1 == 0) &&
+                             (rdd_rise_q2_r  == 0) &&
+                             (rdd_fall_q2_r  == 1) &&
+                             (rdd_rise_q1_r  == 1) &&
+                             (rdd_fall_q1_r  == 0) &&
+                             (rdd_rise_q2    == 0));
+  end
+
+  assign cal3_rden_dly = cal3_rden_srl_a - CAL3_RDEN_SRL_DLY_DELTA;
+  assign cal3_data_valid = (calib_rden_valid | calib_rden_valid_stgd);
+  assign cal3_match_found
+    = ((calib_rden_valid && cal3_data_match) ||
+       (calib_rden_valid_stgd && cal3_data_match_stgd));
+
+  // when calibrating, check to see which clock cycle (after the read is
+  // issued) does the expected data pattern arrive. Record this result
+  // NOTE: Can add error checking here in case valid data not found on any
+  //  of the available pipeline stages
+  always @(posedge clkdiv) begin
+    if (rstdiv) begin
+      cal3_rden_srl_a <= 5'bxxxxx;
+      cal3_state      <= CAL3_IDLE;
+      calib_done[2]   <= 1'b0;
+      calib_err_2[0]  <= 1'b0;
+      count_rden      <= {DQS_WIDTH{1'b0}};
+      rden_dly        <= {5*DQS_WIDTH{1'b0}};
+    end else begin
+
+      case (cal3_state)
+        CAL3_IDLE: begin
+          count_rden <= {DQS_WIDTH{1'b0}};
+          if (calib_start[2]) begin
+            calib_done[2] <= 1'b0;
+            cal3_state    <= CAL3_INIT;
+          end
+        end
+
+        CAL3_INIT: begin
+          cal3_rden_srl_a <= RDEN_BASE_DELAY;
+          // let SRL pipe clear after loading initial shift value
+          cal3_state      <= CAL3_RDEN_PIPE_CLR_WAIT;
+        end
+
+        CAL3_DETECT:
+          if (cal3_data_valid)
+            // if match found at the correct clock cycle
+            if (cal3_match_found) begin
+
+              // For simulation, load SRL addresses for all DQS with same value
+              if (SIM_ONLY != 0) begin
+                for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_rden_dly
+                  rden_dly[(i*5)]   <= cal3_rden_dly[0];
+                  rden_dly[(i*5)+1] <= cal3_rden_dly[1];
+                  rden_dly[(i*5)+2] <= cal3_rden_dly[2];
+                  rden_dly[(i*5)+3] <= cal3_rden_dly[3];
+                  rden_dly[(i*5)+4] <= cal3_rden_dly[4];
+                end
+              end else begin
+                rden_dly[(count_rden*5)]   <= cal3_rden_dly[0];
+                rden_dly[(count_rden*5)+1] <= cal3_rden_dly[1];
+                rden_dly[(count_rden*5)+2] <= cal3_rden_dly[2];
+                rden_dly[(count_rden*5)+3] <= cal3_rden_dly[3];
+                rden_dly[(count_rden*5)+4] <= cal3_rden_dly[4];
+              end
+
+              // Use for stage 4 calibration
+              calib_rden_dly[(count_rden*5)]   <= cal3_rden_srl_a[0];
+              calib_rden_dly[(count_rden*5)+1] <= cal3_rden_srl_a[1];
+              calib_rden_dly[(count_rden*5)+2] <= cal3_rden_srl_a[2];
+              calib_rden_dly[(count_rden*5)+3] <= cal3_rden_srl_a[3];
+              calib_rden_dly[(count_rden*5)+4] <= cal3_rden_srl_a[4];
+              cal3_state <= CAL3_DONE;
+            end else begin
+              // If we run out of stages to shift, without finding correct
+              // result, the stop and assert error
+              if (cal3_rden_srl_a == 5'b11111) begin
+                calib_err_2[0] <= 1'b1;
+                cal3_state   <= CAL3_IDLE;
+              end else begin
+                // otherwise, increase the shift value and try again
+                cal3_rden_srl_a <= cal3_rden_srl_a + 1;
+                cal3_state      <= CAL3_RDEN_PIPE_CLR_WAIT;
+              end
+            end
+
+        // give additional time for RDEN_R pipe to clear from effects of
+        // previous pipeline or IDELAY tap change
+        CAL3_RDEN_PIPE_CLR_WAIT:
+          if (calib_rden_pipe_cnt == 5'b00000)
+              cal3_state <= CAL3_DETECT;
+
+        CAL3_DONE: begin
+          if ((count_rden == DQS_WIDTH-1) || (SIM_ONLY != 0)) begin
+            calib_done[2] <= 1'b1;
+            cal3_state    <= CAL3_IDLE;
+          end else begin
+            count_rden    <= count_rden + 1;
+            cal3_state    <= CAL3_INIT;
+          end
+        end
+      endcase
+    end
+  end
+
+  //*****************************************************************
+  // Last part of stage 3 calibration - compensate for differences
+  // in delay between different DQS groups. Assume that in the worst
+  // case, DQS groups can only differ by one clock cycle. Data for
+  // certain DQS groups must be delayed by one clock cycle.
+  // NOTE: May need to increase allowable variation to greater than
+  //  one clock cycle in certain customer designs.
+  // Algorithm is:
+  //   1. Record shift delay value for DQS[0]
+  //   2. Compare each DQS[x] delay value to that of DQS[0]:
+  //     - If different, than record this fact (RDEN_MUX)
+  //     - If greater than DQS[0], set RDEN_INC. Assume greater by
+  //       one clock cycle only - this is a key assumption, assume no
+  //       more than a one clock cycle variation.
+  //     - If less than DQS[0], set RDEN_DEC
+  //   3. After calibration is complete, set control for DQS group
+  //      delay (CALIB_RDEN_SEL):
+  //     - If RDEN_DEC = 1, then assume that DQS[0] is the lowest
+  //       delay (and at least one other DQS group has a higher
+  //       delay).
+  //     - If RDEN_INC = 1, then assume that DQS[0] is the highest
+  //       delay (and that all other DQS groups have the same or
+  //       lower delay).
+  //     - If both RDEN_INC and RDEN_DEC = 1, then flag error
+  //       (variation is too high for this algorithm to handle)
+  //*****************************************************************
+
+  always @(posedge clkdiv) begin
+    if (rstdiv) begin
+      calib_err_2[1] <= 1'b0;
+      calib_rden_sel <= {DQS_WIDTH{1'bx}};
+      rden_dec       <= 1'b0;
+      rden_dly_0     <= 5'bxxxxx;
+      rden_inc       <= 1'b0;
+      rden_mux       <= {DQS_WIDTH{1'b0}};
+    end else begin
+      // if a match if found, then store the value of rden_dly
+      if (!calib_done[2]) begin
+        if ((cal3_state == CAL3_DETECT) && cal3_match_found) begin
+          // store the value for DQS[0] as a reference
+          if (count_rden == 0) begin
+            // for simulation, RDEN calibration only happens for DQS[0]
+            // set RDEN_MUX for all DQS groups to be the same as DQS[0]
+            if (SIM_ONLY != 0)
+              rden_mux <= {DQS_WIDTH{1'b0}};
+            else begin
+              // otherwise, load values for DQS[0]
+              rden_dly_0  <= cal3_rden_srl_a;
+              rden_mux[0] <= 1'b0;
+            end
+          end else if (SIM_ONLY == 0) begin
+            // for all other DQS groups, compare RDEN_DLY delay value with
+            // that of DQS[0]
+            if (rden_dly_0 != cal3_rden_srl_a) begin
+              // record that current DQS group has a different delay
+              // than DQS[0] (the "reference" DQS group)
+              rden_mux[count_rden] <= 1'b1;
+              if (rden_dly_0 > cal3_rden_srl_a)
+                rden_inc <= 1'b1;
+              else if (rden_dly_0 < cal3_rden_srl_a)
+                rden_dec <= 1'b1;
+              // otherwise, if current DQS group has same delay as DQS[0],
+              // then rden_mux[count_rden] remains at 0 (since rden_mux
+              // array contents initialized to 0)
+            end
+          end
+        end
+      end else begin
+        // Otherwise - if we're done w/ stage 2 calibration:
+        // set final value for RDEN data delay
+        // flag error if there's more than one cycle variation from DQS[0]
+        calib_err_2[1] <= (rden_inc && rden_dec);
+        if (rden_inc)
+          // if DQS[0] delay represents max delay
+          calib_rden_sel <= ~rden_mux;
+        else
+          // if DQS[0] delay represents min delay (or all the delays are
+          // the same between DQS groups)
+          calib_rden_sel <= rden_mux;
+      end
+    end
+  end
+
+  // flag error for stage 3 if appropriate
+  always @(posedge clkdiv)
+    calib_err[2] <= calib_err_2[0] | calib_err_2[1];
+
+  //***************************************************************************
+  // Stage 4 calibration: DQS gate
+  //***************************************************************************
+
+  //*****************************************************************
+  // indicates that current received data is the correct pattern. Same as
+  // for READ VALID calibration, except that the expected data sequence is
+  // different since DQS gate is asserted after the 6th word.
+  // Data sequence:
+  //  Arrives from memory (at FPGA input) (R, F): 1 0 0 1 1 0 0 1
+  //  After gating the sequence looks like: 1 0 0 1 1 0 1 0 (7th word =
+  //   5th word, 8th word = 6th word)
+  // What is the gate timing is off? Need to make sure we can distinquish
+  // between the results of correct vs. incorrect gate timing. We also use
+  // the "read_valid" signal from stage 3 calibration to help us determine
+  // when to check for a valid sequence for stage 4 calibration (i.e. use
+  // CAL4_DATA_VALID in addition to CAL4_DATA_MATCH/CAL4_DATA_MATCH_STGD)
+  // Note that since the gate signal from the CLK0 domain is synchronized
+  // to the falling edge of DQS, that the effect of the gate will only be
+  // seen starting with a rising edge data (although it is possible
+  // the GATE IDDR output could go metastable and cause a unexpected result
+  // on the first rising and falling edges after the gate is enabled). 
+  // Also note that the actual DQS glitch can come more than 0.5*tCK after 
+  // the last falling edge of DQS and the constraint for this path is can 
+  // be > 0.5*tCK; however, this means when calibrating, the output of the 
+  // GATE IDDR may miss the setup time requirement of the rising edge flop 
+  // and only meet it for the falling edge flop. Therefore the rising
+  // edge data immediately following the assertion of the gate can either
+  // be a 1 or 0 (can rely on either)
+  // As the timing on the gate is varied, we expect to see (sequence of
+  // captured read data shown below):
+  //       - 1 0 0 1 1 0 0 1 (gate is really early, starts and ends before
+  //                          read burst even starts)
+  //       - x 0 0 1 1 0 0 1 (gate pulse starts before the burst, and ends
+  //       - x y 0 1 1 0 0 1  sometime during the burst; x,y = 0, or 1, but 
+  //       - x y x 1 1 0 0 1  all bits that show an x are the same value, 
+  //       - x y x y 1 0 0 1  and y are the same value)
+  //       - x y x y x 0 0 1
+  //       - x y x y x y 0 1 (gate starts just before start of burst)
+  //       - 1 0 x 0 x 0 x 0 (gate starts after 1st falling word. The "x"
+  //                          represents possiblity that gate may not disable
+  //                          clock for 2nd rising word in time)
+  //       - 1 0 0 1 x 1 x 1 (gate starts after 2nd falling word)
+  //       - 1 0 0 1 1 0 x 0 (gate starts after 3rd falling word - GOOD!!)
+  //       - 1 0 0 1 1 0 0 1 (gate starts after burst is already done)
+  //*****************************************************************
+
+  assign cal4_data_valid = calib_rden_valid | calib_rden_valid_stgd;
+  assign cal4_data_good  = (calib_rden_valid &
+                            cal4_data_match) |
+                           (calib_rden_valid_stgd &
+                            cal4_data_match_stgd);
+
+  always @(posedge clkdiv) begin
+    // if data is aligned out of fabric ISERDES:
+    cal4_data_match <= ((rdd_rise_q2_r == 1) &&
+                        (rdd_fall_q2_r == 0) &&
+                        (rdd_rise_q1_r == 0) &&
+                        (rdd_fall_q1_r == 1) &&
+                        (rdd_rise_q2   == 1) &&
+                        (rdd_fall_q2   == 0) &&
+                       // MIG 2.1: Last rising edge data value not
+                       // guaranteed to be certain value at higher
+                       // frequencies
+                       // (rdd_rise_q1   == 0) &&
+                        (rdd_fall_q1   == 0));
+    // if data is staggered out of fabric ISERDES:
+    cal4_data_match_stgd <= ((rdd_rise_q1_r1 == 1) &&
+                             (rdd_fall_q1_r1 == 0) &&
+                             (rdd_rise_q2_r  == 0) &&
+                             (rdd_fall_q2_r  == 1) &&
+                             (rdd_rise_q1_r  == 1) &&
+                             (rdd_fall_q1_r  == 0) &&
+                            // MIG 2.1: Last rising edge data value not
+                            // guaranteed to be certain value at higher
+                            // frequencies
+                            // (rdd_rise_q2    == 0) &&
+                             (rdd_fall_q2    == 0));
+  end
+
+  //*****************************************************************
+  // DQS gate enable generation:
+  // This signal gets synchronized to DQS domain, and drives IDDR
+  // register that in turn asserts/deasserts CE to all 4 or 8 DQ
+  // IDDR's in that DQS group.
+  //   1. During normal (post-cal) operation, this is only for 2 clock
+  //      cycles following the end of a burst. Check for falling edge
+  //      of RDEN. But must also make sure NOT assert for a read-idle-
+  //      read (two non-consecutive reads, separated by exactly one
+  //      idle cycle) - in this case, don't assert the gate because:
+  //      (1) we don't have enough time to deassert the gate before the
+  //          first rising edge of DQS for second burst (b/c of fact
+  //          that DQS gate is generated in the fabric only off rising
+  //          edge of CLK0 - if we somehow had an ODDR in fabric, we
+  //          could pull this off, (2) assumption is that the DQS glitch
+  //          will not rise enough to cause a glitch because the
+  //          post-amble of the first burst is followed immediately by
+  //          the pre-amble of the next burst
+  //   2. During stage 4 calibration, assert for 3 clock cycles
+  //      (assert gate enable one clock cycle early), since we gate out
+  //      the last two words (in addition to the crap on the DQ bus after
+  //      the DQS read postamble).
+  // NOTE: PHY_INIT_RDEN and CTRL_RDEN have slightly different timing w/r
+  //  to when they are asserted w/r to the start of the read burst
+  //  (PHY_INIT_RDEN is one cycle earlier than CTRL_RDEN).
+  //*****************************************************************
+
+  // register for timing purposes for fast clock path - currently only
+  // calib_done_r[2] used
+  always @(posedge clk)
+    calib_done_r <= calib_done;
+
+  always @(*) begin
+    calib_ctrl_rden = ctrl_rden;
+    calib_init_rden = calib_done_r[2] & phy_init_rden;
+  end
+
+  assign calib_ctrl_rden_negedge = ~calib_ctrl_rden & calib_ctrl_rden_r;
+  // check for read-idle-read before asserting DQS pulse at end of read
+  assign calib_ctrl_gate_pulse   = calib_ctrl_rden_negedge_r &
+                                   ~calib_ctrl_rden;
+  always @(posedge clk) begin
+    calib_ctrl_rden_r         <= calib_ctrl_rden;
+    calib_ctrl_rden_negedge_r <= calib_ctrl_rden_negedge;
+    calib_ctrl_gate_pulse_r   <= calib_ctrl_gate_pulse;
+  end
+
+  assign calib_init_gate_pulse = ~calib_init_rden & calib_init_rden_r;
+  always @(posedge clk) begin
+    calib_init_rden_r        <= calib_init_rden;
+    calib_init_gate_pulse_r  <= calib_init_gate_pulse;
+    calib_init_gate_pulse_r1 <= calib_init_gate_pulse_r;
+  end
+
+  // Gate is asserted: (1) during cal, for 3 cycles, starting 1 cycle
+  // after falling edge of CTRL_RDEN, (2) during normal ops, for 2
+  // cycles, starting 2 cycles after falling edge of CTRL_RDEN
+  assign gate_srl_in = ~((calib_ctrl_gate_pulse |
+                          calib_ctrl_gate_pulse_r) |
+                         (calib_init_gate_pulse   |
+                          calib_init_gate_pulse_r |
+                          calib_init_gate_pulse_r1));
+
+  //*****************************************************************
+  // generate DQS enable signal for each DQS group
+  // There are differences between DQS gate signal for calibration vs. during
+  // normal operation:
+  //  * calibration gates the second to last clock cycle of the burst,
+  //    rather than after the last word (e.g. for a 8-word, 4-cycle burst,
+  //    cycle 4 is gated for calibration; during normal operation, cycle
+  //    5 (i.e. cycle after the last word) is gated)
+  // enable for DQS is deasserted for two clock cycles, except when
+  // we have the preamble for the next read immediately following
+  // the postamble of the current read - assume DQS does not glitch
+  // during this time, that it stays low. Also if we did have to gate
+  // the DQS for this case, then we don't have enough time to deassert
+  // the gate in time for the first rising edge of DQS for the second
+  // read
+  //*****************************************************************
+
+  // Flops for targetting of multi-cycle path in UCF
+  genvar gate_ff_i;
+  generate
+    for (gate_ff_i = 0; gate_ff_i < 5*DQS_WIDTH;
+         gate_ff_i = gate_ff_i+1) begin: gen_gate_dly
+      FDRSE u_ff_gate_dly
+        (
+         .Q   (gate_dly_r[gate_ff_i]),
+         .C   (clkdiv),
+         .CE  (1'b1),
+         .D   (gate_dly[gate_ff_i]),
+         .R   (1'b0),
+         .S   (1'b0)
+         ) /* synthesis syn_preserve = 1 */
+           /* synthesis syn_replicate = 0 */;
+    end
+  endgenerate
+
+  genvar gate_i;
+  generate
+    for (gate_i = 0; gate_i < DQS_WIDTH; gate_i = gate_i + 1) begin: gen_gate
+      SRLC32E u_gate_srl
+        (
+         .Q   (gate_srl_out[gate_i]),
+         .Q31 (),
+         .A   ({gate_dly_r[(gate_i*5)+4],
+                gate_dly_r[(gate_i*5)+3],
+                gate_dly_r[(gate_i*5)+2],
+                gate_dly_r[(gate_i*5)+1],
+                gate_dly_r[(gate_i*5)]}),
+         .CE  (1'b1),
+         .CLK (clk),
+         .D   (gate_srl_in)
+         );
+
+      // For GATE_BASE_DELAY > 0, have one extra cycle to register outputs
+      // from controller before generating DQS gate pulse. In PAR, the
+      // location of the controller logic can be far from the DQS gate
+      // logic (DQS gate logic located near the DQS I/O's), contributing
+      // to large net delays. Registering the controller outputs for
+      // CL >= 4 (above 200MHz) adds a stage of pipelining to reduce net
+      // delays
+      if (GATE_BASE_DELAY > 0) begin: gen_gate_base_dly_gt3
+        // add flop between SRL32 and EN_DQS flop (which is located near the
+        // DDR2 IOB's)
+        FDRSE u_gate_srl_ff
+          (
+         .Q   (gate_srl_out_r[gate_i]),
+         .C   (clk),
+         .CE  (1'b1),
+         .D   (gate_srl_out[gate_i]),
+         .R   (1'b0),
+         .S   (1'b0)
+           ) /* synthesis syn_preserve = 1 */;
+      end else begin: gen_gate_base_dly_le3
+        assign gate_srl_out_r[gate_i] = gate_srl_out[gate_i];
+      end
+
+      FDRSE u_en_dqs_ff
+        (
+         .Q   (en_dqs[gate_i]),
+         .C   (clk),
+         .CE  (1'b1),
+         .D   (gate_srl_out_r[gate_i]),
+         .R   (1'b0),
+         .S   (1'b0)
+         ) /* synthesis syn_preserve = 1 */
+           /* synthesis syn_replicate = 0 */;
+    end
+  endgenerate
+
+  //*****************************************************************
+  // Find valid window: keep track of how long we've been in the same data
+  // window. If it's been long enough, then declare that we've found a stable
+  // valid window - in particular, that we're past any region of instability
+  // associated with the edge of the window. Use only when finding left edge
+  //*****************************************************************
+
+  always @(posedge clkdiv)
+    // reset before we start to look for window
+    if (cal4_state == CAL4_INIT) begin
+      cal4_window_cnt    <= 4'b0000;
+      cal4_stable_window <= 1'b0;
+    end else if ((cal4_state == CAL4_FIND_EDGE) && cal4_seek_left) begin
+      // if we're looking for left edge, and incrementing IDELAY, count
+      // consecutive taps over which we're in the window
+      if (cal4_data_valid) begin
+        if (cal4_data_good)
+          cal4_window_cnt <= cal4_window_cnt + 1;
+        else
+          cal4_window_cnt <= 4'b0000;
+      end
+
+      if (cal4_window_cnt == MIN_WIN_SIZE-1)
+        cal4_stable_window <= 1'b1;
+    end
+
+  //*****************************************************************
+  // keep track of edge tap counts found, and whether we've
+  // incremented to the maximum number of taps allowed
+  //*****************************************************************
+
+  always @(posedge clkdiv)
+    if ((cal4_state == CAL4_INIT) || cal4_dlyrst_gate) begin
+      cal4_idel_max_tap <= 1'b0;
+      cal4_idel_bit_tap <= 1'b0;
+      cal4_idel_tap_cnt <= 6'b000000;
+    end else if (cal4_dlyce_gate) begin
+      if (cal4_dlyinc_gate) begin
+        cal4_idel_tap_cnt <= cal4_idel_tap_cnt + 1;
+        cal4_idel_bit_tap <= (cal4_idel_tap_cnt == CAL4_IDEL_BIT_VAL-2);
+        cal4_idel_max_tap <= (cal4_idel_tap_cnt == 6'b111110);
+      end else begin
+        cal4_idel_tap_cnt <= cal4_idel_tap_cnt - 1;
+        cal4_idel_bit_tap <= 1'b0;
+        cal4_idel_max_tap <= 1'b0;
+      end
+    end
+
+  always @(posedge clkdiv)
+    if ((cal4_state != CAL4_RDEN_PIPE_CLR_WAIT) &&
+        (cal3_state != CAL3_RDEN_PIPE_CLR_WAIT))
+      calib_rden_pipe_cnt <= CALIB_RDEN_PIPE_LEN-1;
+    else
+      calib_rden_pipe_cnt <= calib_rden_pipe_cnt - 1;
+
+  //*****************************************************************
+  // Stage 4 cal state machine
+  //*****************************************************************
+
+  always @(posedge clkdiv)
+    if (rstdiv) begin
+      calib_done[3]      <= 1'b0;
+      calib_done_tmp[3]  <= 1'b0;
+      calib_err[3]       <= 1'b0;
+      count_gate         <= 'b0;
+      gate_dly           <= 'b0;
+      next_count_gate    <= 'b0;
+      cal4_idel_adj_cnt  <= 6'bxxxxxx;
+      cal4_dlyce_gate    <= 1'b0;
+      cal4_dlyinc_gate   <= 1'b0;
+      cal4_dlyrst_gate   <= 1'b0;    // reset handled elsewhere in code
+      cal4_gate_srl_a    <= 5'bxxxxx;
+      cal4_rden_srl_a    <= 5'bxxxxx;
+      cal4_ref_req       <= 1'b0;
+      cal4_seek_left     <= 1'bx;
+      cal4_state         <= CAL4_IDLE;
+    end else begin
+      cal4_ref_req     <= 1'b0;
+      cal4_dlyce_gate  <= 1'b0;
+      cal4_dlyinc_gate <= 1'b0;
+      cal4_dlyrst_gate <= 1'b0;
+
+      case (cal4_state)
+        CAL4_IDLE: begin
+          count_gate      <= 'b0;
+          next_count_gate <= 'b0;
+          if (calib_start[3]) begin
+            gate_dly      <= 'b0;
+            calib_done[3] <= 1'b0;
+            cal4_state    <= CAL4_INIT;
+          end
+        end
+
+        CAL4_INIT: begin
+          // load: (1) initial value of gate delay SRL, (2) appropriate
+          // value of RDEN SRL (so that we get correct "data valid" timing)
+          cal4_gate_srl_a <= GATE_BASE_INIT;
+          cal4_rden_srl_a <= {calib_rden_dly[(count_gate*5)+4],
+                              calib_rden_dly[(count_gate*5)+3],
+                              calib_rden_dly[(count_gate*5)+2],
+                              calib_rden_dly[(count_gate*5)+1],
+                              calib_rden_dly[(count_gate*5)]};
+          // let SRL pipe clear after loading initial shift value
+          cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT;
+        end
+
+        // sort of an initial state - start checking to see whether we're
+        // already in the window or not
+        CAL4_FIND_WINDOW:
+          // decide right away if we start in the proper window - this
+          // determines if we are then looking for the left (trailing) or
+          // right (leading) edge of the data valid window
+          if (cal4_data_valid) begin
+            // if we find a match - then we're already in window, now look
+            // for left edge. Otherwise, look for right edge of window
+            cal4_seek_left  <= cal4_data_good;
+            cal4_state      <= CAL4_FIND_EDGE;
+          end
+
+        CAL4_FIND_EDGE:
+          // don't do anything until the exact clock cycle when to check that
+          // readback data is valid or not
+          if (cal4_data_valid) begin
+            // we're currently in the window, look for left edge of window
+            if (cal4_seek_left) begin
+              // make sure we've passed the right edge before trying to detect
+              // the left edge (i.e. avoid any edge "instability") - else, we
+              // may detect an "false" edge too soon. By design, if we start in
+              // the data valid window, always expect at least
+              // MIN(BIT_TIME_TAPS,32) (-/+ jitter, see below) taps of valid
+              // window before we hit the left edge (this is because when stage
+              // 4 calibration first begins (i.e., gate_dly = 00, and IDELAY =
+              // 00), we're guaranteed to NOT be in the window, and we always
+              // start searching for MIN(BIT_TIME_TAPS,32) for the right edge
+              // of window. If we don't find it, increment gate_dly, and if we
+              // now start in the window, we have at least approximately
+              // CLK_PERIOD-MIN(BIT_TIME_TAPS,32) = MIN(BIT_TIME_TAPS,32) taps.
+              // It's approximately because jitter, noise, etc. can bring this
+              // value down slightly. Because of this (although VERY UNLIKELY),
+              // we have to protect against not decrementing IDELAY below 0
+              // during adjustment phase).
+              if (cal4_stable_window && !cal4_data_good) begin
+                // found left edge of window, dec by MIN(BIT_TIME_TAPS,32)
+                cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL;
+                cal4_idel_adj_inc <= 1'b0;
+                cal4_state        <= CAL4_ADJ_IDEL;
+              end else begin
+                // Otherwise, keep looking for left edge:
+                if (cal4_idel_max_tap) begin
+                  // ran out of taps looking for left edge (max=63) - happens
+                  // for low frequency case, decrement by 32
+                  cal4_idel_adj_cnt <= 6'b100000;
+                  cal4_idel_adj_inc <= 1'b0;
+                  cal4_state        <= CAL4_ADJ_IDEL;
+                end else begin
+                  cal4_dlyce_gate  <= 1'b1;
+                  cal4_dlyinc_gate <= 1'b1;
+                  cal4_state       <= CAL4_IDEL_WAIT;
+                end
+              end
+            end else begin
+              // looking for right edge of window:
+              // look for the first match - this means we've found the right
+              // (leading) edge of the data valid window, increment by
+              // MIN(BIT_TIME_TAPS,32)
+              if (cal4_data_good) begin
+                cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL;
+                cal4_idel_adj_inc <= 1'b1;
+                cal4_state        <= CAL4_ADJ_IDEL;
+              end else begin
+                // Otherwise, keep looking:
+                // only look for MIN(BIT_TIME_TAPS,32) taps for right edge,
+                // if we haven't found it, then inc gate delay, try again
+                if (cal4_idel_bit_tap) begin
+                  // if we're already maxed out on gate delay, then error out
+                  // (simulation only - calib_err isn't currently connected)
+                  if (cal4_gate_srl_a == 5'b11111) begin
+                    calib_err[3] <= 1'b1;
+                    cal4_state   <= CAL4_IDLE;
+                  end else begin
+                    // otherwise, increment gate delay count, and start
+                    // over again
+                    cal4_gate_srl_a <= cal4_gate_srl_a + 1;
+                    cal4_dlyrst_gate <= 1'b1;
+                    cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT;
+                  end
+                end else begin
+                  // keep looking for right edge
+                  cal4_dlyce_gate  <= 1'b1;
+                  cal4_dlyinc_gate <= 1'b1;
+                  cal4_state       <= CAL4_IDEL_WAIT;
+                end
+              end
+            end
+          end
+
+        // wait for GATE IDELAY to settle, after reset or increment
+        CAL4_IDEL_WAIT: begin
+          // For simulation, load SRL addresses for all DQS with same value
+          if (SIM_ONLY != 0) begin
+            for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly
+              gate_dly[(i*5)+4] <= cal4_gate_srl_a[4];
+              gate_dly[(i*5)+3] <= cal4_gate_srl_a[3];
+              gate_dly[(i*5)+2] <= cal4_gate_srl_a[2];
+              gate_dly[(i*5)+1] <= cal4_gate_srl_a[1];
+              gate_dly[(i*5)]   <= cal4_gate_srl_a[0];
+            end
+          end else begin
+            gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4];
+            gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3];
+            gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2];
+            gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1];
+            gate_dly[(count_gate*5)]   <= cal4_gate_srl_a[0];
+          end
+          // check to see if we've found edge of window
+          if (!idel_set_wait)
+            cal4_state <= CAL4_FIND_EDGE;
+        end
+
+        // give additional time for RDEN_R pipe to clear from effects of
+        // previous pipeline (and IDELAY reset)
+        CAL4_RDEN_PIPE_CLR_WAIT: begin
+          // MIG 2.2: Bug fix - make sure to update GATE_DLY count, since
+          // possible for FIND_EDGE->RDEN_PIPE_CLR_WAIT->FIND_WINDOW
+          // transition (i.e. need to make sure the gate count updated in
+          // FIND_EDGE gets reflected in GATE_DLY by the time we reach
+          // state FIND_WINDOW) - previously GATE_DLY only being updated
+          // during state CAL4_IDEL_WAIT
+          if (SIM_ONLY != 0) begin
+            for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly_pipe
+              gate_dly[(i*5)+4] <= cal4_gate_srl_a[4];
+              gate_dly[(i*5)+3] <= cal4_gate_srl_a[3];
+              gate_dly[(i*5)+2] <= cal4_gate_srl_a[2];
+              gate_dly[(i*5)+1] <= cal4_gate_srl_a[1];
+              gate_dly[(i*5)]   <= cal4_gate_srl_a[0];
+            end
+          end else begin
+            gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4];
+            gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3];
+            gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2];
+            gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1];
+            gate_dly[(count_gate*5)]   <= cal4_gate_srl_a[0];
+          end    
+          // look for new window
+          if (calib_rden_pipe_cnt == 5'b00000)
+            cal4_state <= CAL4_FIND_WINDOW;
+       end
+
+        // increment/decrement DQS/DQ IDELAY for final adjustment
+        CAL4_ADJ_IDEL:
+          // add underflow protection for corner case when left edge found
+          // using fewer than MIN(BIT_TIME_TAPS,32) taps
+          if ((cal4_idel_adj_cnt == 6'b000000) ||
+              (cal4_dlyce_gate && !cal4_dlyinc_gate &&
+               (cal4_idel_tap_cnt == 6'b000001))) begin
+            cal4_state <= CAL4_DONE;
+            // stop when all gates calibrated, or gate[0] cal'ed (for sim)
+            if ((count_gate == DQS_WIDTH-1) || (SIM_ONLY != 0))
+              calib_done_tmp[3] <= 1'b1;
+            else
+              // need for VHDL simulation to prevent out-of-index error
+              next_count_gate <= count_gate + 1;
+          end else begin
+            cal4_idel_adj_cnt <= cal4_idel_adj_cnt - 1;
+            cal4_dlyce_gate  <= 1'b1;
+            // whether inc or dec depends on whether left or right edge found
+            cal4_dlyinc_gate <= cal4_idel_adj_inc;
+          end
+
+        // wait for IDELAY output to settle after decrement. Check current
+        // COUNT_GATE value and decide if we're done
+        CAL4_DONE:
+          if (!idel_set_wait) begin
+            count_gate <= next_count_gate;
+            if (calib_done_tmp[3]) begin
+              calib_done[3] <= 1'b1;
+              cal4_state <= CAL4_IDLE;
+            end else begin
+              // request auto-refresh after every DQS group calibrated to
+              // avoid tRAS violation
+              cal4_ref_req <= 1'b1;
+              if (calib_ref_done)
+                cal4_state <= CAL4_INIT;
+            end
+          end
+      endcase
+    end
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_ctl_io.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_ctl_io.v
new file mode 100644 (file)
index 0000000..df1dc70
--- /dev/null
@@ -0,0 +1,306 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+// 
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a 
+// license to use this text/file solely for design, simulation, 
+// implementation and creation of design files limited 
+// to Xilinx devices or technologies. Use with non-Xilinx 
+// devices or technologies is expressly prohibited and 
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information 
+// "as-is" solely for use in developing programs and 
+// solutions for Xilinx devices, with no obligation on the 
+// part of Xilinx to provide support. By providing this design, 
+// code, or information as one possible implementation of 
+// this feature, application or standard, Xilinx is making no 
+// representation that this implementation is free from any 
+// claims of infringement. You are responsible for 
+// obtaining any rights you may require for your implementation. 
+// Xilinx expressly disclaims any warranty whatsoever with 
+// respect to the adequacy of the implementation, including 
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied 
+// warranties of merchantability or fitness for a particular 
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are 
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part 
+// of this text at all times. 
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_phy_ctl_io.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/29 15:24:03 $
+// \   \  /  \    Date Created: Thu Aug 24 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   This module puts the memory control signals like address, bank address,
+//   row address strobe, column address strobe, write enable and clock enable
+//   in the IOBs.
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_phy_ctl_io #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference 
+   // board design). Actual values may be different. Actual parameters values 
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter BANK_WIDTH    = 2,
+   parameter CKE_WIDTH     = 1,
+   parameter COL_WIDTH     = 10,
+   parameter CS_NUM        = 1,
+   parameter TWO_T_TIME_EN = 0,
+   parameter CS_WIDTH      = 1,
+   parameter ODT_WIDTH     = 1,
+   parameter ROW_WIDTH     = 14,
+   parameter DDR_TYPE      = 1
+   )
+  (
+   input                   clk0,
+   input                   clk90,
+   input                   rst0,
+   input                   rst90,
+   input [ROW_WIDTH-1:0]   ctrl_addr,
+   input [BANK_WIDTH-1:0]  ctrl_ba,
+   input                   ctrl_ras_n,
+   input                   ctrl_cas_n,
+   input                   ctrl_we_n,
+   input [CS_NUM-1:0]      ctrl_cs_n,
+   input [ROW_WIDTH-1:0]   phy_init_addr,
+   input [BANK_WIDTH-1:0]  phy_init_ba,
+   input                   phy_init_ras_n,
+   input                   phy_init_cas_n,
+   input                   phy_init_we_n,
+   input [CS_NUM-1:0]      phy_init_cs_n,
+   input [CKE_WIDTH-1:0]   phy_init_cke,
+   input                   phy_init_data_sel,
+   input [CS_NUM-1:0]      odt,
+   output [ROW_WIDTH-1:0]  ddr_addr,
+   output [BANK_WIDTH-1:0] ddr_ba,
+   output                  ddr_ras_n,
+   output                  ddr_cas_n,
+   output                  ddr_we_n,
+   output [CKE_WIDTH-1:0]  ddr_cke,
+   output [CS_WIDTH-1:0]   ddr_cs_n,
+   output [ODT_WIDTH-1:0]  ddr_odt
+   );
+
+  reg [ROW_WIDTH-1:0]     addr_mux;
+  reg [BANK_WIDTH-1:0]    ba_mux;
+  reg                     cas_n_mux;
+  reg [CS_NUM-1:0]        cs_n_mux;
+  reg                     ras_n_mux;
+  reg                     we_n_mux;
+
+
+
+  //***************************************************************************
+
+
+
+
+  // MUX to choose from either PHY or controller for SDRAM control
+
+  generate // in 2t timing mode the extra register stage cannot be used.
+    if(TWO_T_TIME_EN) begin // the control signals are asserted for two cycles
+      always @(*)begin
+        if (phy_init_data_sel) begin
+          addr_mux  = ctrl_addr;
+          ba_mux    = ctrl_ba;
+          cas_n_mux = ctrl_cas_n;
+          cs_n_mux  = ctrl_cs_n;
+          ras_n_mux = ctrl_ras_n;
+          we_n_mux  = ctrl_we_n;
+        end else begin
+          addr_mux  = phy_init_addr;
+          ba_mux    = phy_init_ba;
+          cas_n_mux = phy_init_cas_n;
+          cs_n_mux  = phy_init_cs_n;
+          ras_n_mux = phy_init_ras_n;
+          we_n_mux  = phy_init_we_n;
+        end
+      end
+    end else begin
+      always @(posedge clk0)begin // register the signals in non 2t mode
+        if (phy_init_data_sel) begin
+          addr_mux <= ctrl_addr;
+          ba_mux <= ctrl_ba;
+          cas_n_mux <= ctrl_cas_n;
+          cs_n_mux <= ctrl_cs_n;
+          ras_n_mux <= ctrl_ras_n;
+          we_n_mux <= ctrl_we_n;
+        end else begin
+          addr_mux <= phy_init_addr;
+          ba_mux <= phy_init_ba;
+          cas_n_mux <= phy_init_cas_n;
+          cs_n_mux <= phy_init_cs_n;
+          ras_n_mux <= phy_init_ras_n;
+          we_n_mux <= phy_init_we_n;
+        end
+      end
+    end
+  endgenerate
+
+  //***************************************************************************
+  // Output flop instantiation
+  // NOTE: Make sure all control/address flops are placed in IOBs
+  //***************************************************************************
+
+  // RAS: = 1 at reset
+  (* IOB = "TRUE" *) FDCPE u_ff_ras_n
+    (
+     .Q   (ddr_ras_n),
+     .C   (clk0),
+     .CE  (1'b1),
+     .CLR (1'b0),
+     .D   (ras_n_mux),
+     .PRE (rst0)
+     ) /* synthesis syn_useioff = 1 */;
+
+  // CAS: = 1 at reset
+  (* IOB = "TRUE" *) FDCPE u_ff_cas_n
+    (
+     .Q   (ddr_cas_n),
+     .C   (clk0),
+     .CE  (1'b1),
+     .CLR (1'b0),
+     .D   (cas_n_mux),
+     .PRE (rst0)
+     ) /* synthesis syn_useioff = 1 */;
+
+  // WE: = 1 at reset
+  (* IOB = "TRUE" *) FDCPE u_ff_we_n
+    (
+     .Q   (ddr_we_n),
+     .C   (clk0),
+     .CE  (1'b1),
+     .CLR (1'b0),
+     .D   (we_n_mux),
+     .PRE (rst0)
+     ) /* synthesis syn_useioff = 1 */;
+
+  // CKE: = 0 at reset
+  genvar cke_i;
+  generate
+    for (cke_i = 0; cke_i < CKE_WIDTH; cke_i = cke_i + 1) begin: gen_cke
+      (* IOB = "TRUE" *) FDCPE u_ff_cke
+        (
+         .Q   (ddr_cke[cke_i]),
+         .C   (clk0),
+         .CE  (1'b1),
+         .CLR (rst0),
+         .D   (phy_init_cke[cke_i]),
+         .PRE (1'b0)
+         ) /* synthesis syn_useioff = 1 */;
+    end
+  endgenerate
+
+  // chip select: = 1 at reset
+  // For unbuffered dimms the loading will be high. The chip select
+  // can be asserted early if the loading is very high. The
+  // code as is uses clock 0. If needed clock 270 can be used to
+  // toggle chip select 1/4 clock cycle early. The code has
+  // the clock 90 input for the early assertion of chip select.
+
+  genvar cs_i;
+  generate
+    for(cs_i = 0; cs_i < CS_WIDTH; cs_i = cs_i + 1) begin: gen_cs_n
+      if(TWO_T_TIME_EN) begin
+         (* IOB = "TRUE" *) FDCPE u_ff_cs_n
+           (
+            .Q   (ddr_cs_n[cs_i]),
+            .C   (clk0),
+            .CE  (1'b1),
+            .CLR (1'b0),
+            .D   (cs_n_mux[(cs_i*CS_NUM)/CS_WIDTH]),
+            .PRE (rst0)
+            ) /* synthesis syn_useioff = 1 */;
+      end else begin // if (TWO_T_TIME_EN)
+         (* IOB = "TRUE" *) FDCPE u_ff_cs_n
+           (
+            .Q   (ddr_cs_n[cs_i]),
+            .C   (clk0),
+            .CE  (1'b1),
+            .CLR (1'b0),
+            .D   (cs_n_mux[(cs_i*CS_NUM)/CS_WIDTH]),
+            .PRE (rst0)
+            ) /* synthesis syn_useioff = 1 */;
+      end // else: !if(TWO_T_TIME_EN)
+    end
+  endgenerate
+
+  // address: = X at reset
+  genvar addr_i;
+  generate
+    for (addr_i = 0; addr_i < ROW_WIDTH; addr_i = addr_i + 1) begin: gen_addr
+      (* IOB = "TRUE" *) FDCPE u_ff_addr
+        (
+         .Q   (ddr_addr[addr_i]),
+         .C   (clk0),
+         .CE  (1'b1),
+         .CLR (1'b0),
+         .D   (addr_mux[addr_i]),
+         .PRE (1'b0)
+         ) /* synthesis syn_useioff = 1 */;
+    end
+  endgenerate
+
+  // bank address = X at reset
+  genvar ba_i;
+  generate
+    for (ba_i = 0; ba_i < BANK_WIDTH; ba_i = ba_i + 1) begin: gen_ba
+      (* IOB = "TRUE" *) FDCPE u_ff_ba
+        (
+         .Q   (ddr_ba[ba_i]),
+         .C   (clk0),
+         .CE  (1'b1),
+         .CLR (1'b0),
+         .D   (ba_mux[ba_i]),
+         .PRE (1'b0)
+         ) /* synthesis syn_useioff = 1 */;
+    end
+  endgenerate
+
+  // ODT control = 0 at reset
+  genvar odt_i;
+  generate
+    if (DDR_TYPE > 0) begin: gen_odt_ddr2
+      for (odt_i = 0; odt_i < ODT_WIDTH; odt_i = odt_i + 1) begin: gen_odt
+        (* IOB = "TRUE" *) FDCPE u_ff_odt
+          (
+           .Q   (ddr_odt[odt_i]),
+           .C   (clk0),
+           .CE  (1'b1),
+           .CLR (rst0),
+          .D   (odt[(odt_i*CS_NUM)/ODT_WIDTH]),
+           .PRE (1'b0)
+           ) /* synthesis syn_useioff = 1 */;
+      end
+    end
+  endgenerate
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_dm_iob.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_dm_iob.v
new file mode 100644 (file)
index 0000000..80ca839
--- /dev/null
@@ -0,0 +1,108 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+//
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a
+// license to use this text/file solely for design, simulation,
+// implementation and creation of design files limited
+// to Xilinx devices or technologies. Use with non-Xilinx
+// devices or technologies is expressly prohibited and
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information
+// "as-is" solely for use in developing programs and
+// solutions for Xilinx devices, with no obligation on the
+// part of Xilinx to provide support. By providing this design,
+// code, or information as one possible implementation of
+// this feature, application or standard, Xilinx is making no
+// representation that this implementation is free from any
+// claims of infringement. You are responsible for
+// obtaining any rights you may require for your implementation.
+// Xilinx expressly disclaims any warranty whatsoever with
+// respect to the adequacy of the implementation, including
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied
+// warranties of merchantability or fitness for a particular
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part
+// of this text at all times.
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_phy_dm_iob.v
+// /___/   /\     Date Last Modified: $Date: 2008/05/21 14:53:09 $
+// \   \  /  \    Date Created: Wed Aug 16 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   This module places the data mask signals into the IOBs.
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_phy_dm_iob
+  (
+   input  clk90,
+   input  dm_ce,
+   input  mask_data_rise,
+   input  mask_data_fall,
+   output ddr_dm
+   );
+
+  wire    dm_out;
+  wire    dm_ce_r;
+
+  FDRSE_1 u_dm_ce
+    (
+     .Q    (dm_ce_r),
+     .C    (clk90),
+     .CE   (1'b1),
+     .D    (dm_ce),
+     .R   (1'b0),
+     .S   (1'b0)
+     );
+
+  ODDR #
+    (
+     .SRTYPE("SYNC"),
+     .DDR_CLK_EDGE("SAME_EDGE")
+     )
+    u_oddr_dm
+      (
+       .Q  (dm_out),
+       .C  (clk90),
+       .CE (dm_ce_r),
+       .D1 (mask_data_rise),
+       .D2 (mask_data_fall),
+       .R  (1'b0),
+       .S  (1'b0)
+       );
+
+  OBUF u_obuf_dm
+    (
+     .I (dm_out),
+     .O (ddr_dm)
+     );
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_dq_iob.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_dq_iob.v
new file mode 100644 (file)
index 0000000..ea96b50
--- /dev/null
@@ -0,0 +1,929 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+//
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a
+// license to use this text/file solely for design, simulation,
+// implementation and creation of design files limited
+// to Xilinx devices or technologies. Use with non-Xilinx
+// devices or technologies is expressly prohibited and
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information
+// "as-is" solely for use in developing programs and
+// solutions for Xilinx devices, with no obligation on the
+// part of Xilinx to provide support. By providing this design,
+// code, or information as one possible implementation of
+// this feature, application or standard, Xilinx is making no
+// representation that this implementation is free from any
+// claims of infringement. You are responsible for
+// obtaining any rights you may require for your implementation.
+// Xilinx expressly disclaims any warranty whatsoever with
+// respect to the adequacy of the implementation, including
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied
+// warranties of merchantability or fitness for a particular
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part
+// of this text at all times.
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_phy_dq_iob.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/17 07:52:27 $
+// \   \  /  \    Date Created: Wed Aug 16 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   This module places the data in the IOBs.
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_phy_dq_iob #
+  (
+   parameter DQ_COL                = 0,
+   parameter DQ_MS                 = 0,
+   parameter HIGH_PERFORMANCE_MODE = "TRUE"
+   )
+  (
+   input        clk0,
+   input        clk90,
+   input        clkdiv0,
+   input        rst90,
+   input        dlyinc,
+   input        dlyce,
+   input        dlyrst,
+   input  [1:0] dq_oe_n,
+   input        dqs,
+   input        ce,
+   input        rd_data_sel,
+   input        wr_data_rise,
+   input        wr_data_fall,
+   output       rd_data_rise,
+   output       rd_data_fall,
+   inout        ddr_dq
+   );
+
+  wire       dq_iddr_clk;
+  wire       dq_idelay;
+  wire       dq_in;
+  wire       dq_oe_n_r;
+  wire       dq_out;
+  wire       stg2a_out_fall;
+  wire       stg2a_out_rise;
+  wire       stg2b_out_fall;
+  wire       stg2b_out_rise;
+  wire       stg3a_out_fall;
+  wire       stg3a_out_rise;
+  wire       stg3b_out_fall;
+  wire       stg3b_out_rise;
+
+  //***************************************************************************
+  // Directed routing constraints for route between IDDR and stage 2 capture
+  // in fabric.
+  // Only 2 out of the 12 wire declarations will be used for any given
+  // instantiation of this module.
+  // Varies according:
+  //  (1) I/O column (left, center, right) used
+  //  (2) Which I/O in I/O pair (master, slave) used
+  // Nomenclature: _Xy, X = column (0 = left, 1 = center, 2 = right),
+  //  y = master or slave
+  //***************************************************************************
+
+  // master, left
+  (* syn_keep = "1", keep = "TRUE",
+     ROUTE = "{3;1;5vlx50tff1136;93a1e3bb!-1;-78112;-4200;S!0;-143;-1248!1;-452;0!2;2747;1575!3;2461;81!4;2732;-960!4;2732;-984!5;404;8!6;404;8!7;683;-568;L!8;843;24;L!}" *)
+  wire stg1_out_rise_0m;
+  (* syn_keep = "1", keep = "TRUE",
+     ROUTE = "{3;1;5vlx50tff1136;907923a!-1;-78112;-4192;S!0;-143;-1192!0;-143;-1272!1;-452;0!2;-452;0!3;2723;-385!4;2731;-311!5;3823;-1983!6;5209;1271!7;1394;3072!8;0;-8!9;404;8!10;0;-144!11;683;-536;L!12;404;8!14;843;8;L!}" *)
+  wire stg1_out_fall_0m;
+  // slave, left
+  (* syn_keep = "1", keep = "TRUE",
+     ROUTE = "{3;1;5vlx50tff1136;53bb9d6f!-1;-78112;-4600;S!0;-143;-712!1;-452;0!2;1008;-552!3;2780;1360!4;0;-8!5;0;-240!5;0;-264!6;404;8!7;404;8!8;683;-568;L!9;843;24;L!}" *)
+  wire stg1_out_rise_0s;
+  (* syn_keep = "1", keep = "TRUE",
+     ROUTE = "{3;1;5vlx50tff1136;46bf60d8!-1;-78112;-4592;S!0;-143;-800!1;-452;0!2;1040;1592!3;5875;-85!4;-3127;-843!4;-3127;-939!5;404;8!6;404;8!7;683;-696;L!8;843;-136;L!}" *)
+  wire stg1_out_fall_0s;
+  // master, center
+  (* syn_keep = "1", keep = "TRUE",
+     ROUTE = "{3;1;5vlx50tff1136;9ee47800!-1;-6504;-50024;S!0;-175;-1136!1;-484;0!2;-3208;1552!3;-4160;-2092!4;-1428;1172!4;-1428;1076!5;404;8!6;404;8!7;843;-152;L!8;683;-728;L!}" *)
+  wire stg1_out_rise_1m;
+  (* syn_keep = "1", keep = "TRUE",
+     ROUTE = "{3;1;5vlx50tff1136;e7df31c2!-1;-6504;-50016;S!0;-175;-1192!1;-484;0!2;-5701;1523!3;-3095;-715!3;-4423;2421!4;0;-8!5;1328;-3288!6;0;-240!7;404;8!8;404;8!9;683;-696;L!10;843;-136;L!}" *)
+  wire stg1_out_fall_1m;
+  // slave, center
+  (* syn_keep = "1", keep = "TRUE",
+     ROUTE = "{3;1;5vlx50tff1136;a8c11eb3!-1;-6504;-50424;S!0;-175;-856!1;-484;0!2;-5677;-337!3;1033;1217!3;-295;4353!4;0;-8!5;1328;-3288!6;0;-120!7;404;8!8;404;8!9;683;-696;L!10;843;-152;L!}" *)
+  wire stg1_out_rise_1s;
+  (* syn_keep = "1", keep = "TRUE",
+     ROUTE = "{3;1;5vlx50tff1136;ed30cce!-1;-6504;-50416;S!0;-175;-848!1;-484;0!2;-3192;-432!3;-1452;1368!3;-6645;85!4;0;-8!5;5193;1035!6;0;-264!7;404;8!8;404;8!9;683;-568;L!10;843;24;L!}" *)
+  wire stg1_out_fall_1s;
+  // master, right
+  (* syn_keep = "1", keep = "TRUE",
+     ROUTE = "{3;1;5vlx50tff1136;4d035a44!-1;54728;-108896;S!0;-175;-1248!1;-484;0!2;-3192;-424!3;-4208;2092!4;-1396;-972!4;-1396;-996!5;404;8!6;404;8!7;683;-568;L!8;843;24;L!}" *)
+  wire stg1_out_rise_2m;
+  (* syn_keep = "1", keep = "TRUE",
+     ROUTE = "{3;1;5vlx50tff1136;92ae8739!-1;54728;-108888;S!0;-175;-1272!1;-484;0!2;-5677;-329!3;-1691;-83!4;-1428;1076!4;-1428;1052!5;404;8!6;404;8!7;683;-728;L!8;843;-136;L!}" *)
+  wire stg1_out_fall_2m;
+  // slave, right
+  (* syn_keep = "1", keep = "TRUE",
+     ROUTE = "{3;1;5vlx50tff1136;9de34bf1!-1;54728;-109296;S!0;-175;-712!1;-484;0!2;-5685;-475!3;1041;1107!3;1041;1011!4;404;8!5;404;8!6;683;-536;L!7;843;24;L!}" *)
+  wire stg1_out_rise_2s;
+  (* syn_keep = "1", keep = "TRUE",
+     ROUTE = "{3;1;5vlx50tff1136;1df9e65d!-1;54728;-109288;S!0;-175;-800!1;-484;0!2;-3208;1608!3;-1436;-792!4;0;-8!5;0;-240!5;0;-144!6;404;8!7;404;8!8;843;-136;L!9;683;-696;L!}" *)
+  wire stg1_out_fall_2s;
+
+  //***************************************************************************
+  // Bidirectional I/O
+  //***************************************************************************
+
+  IOBUF u_iobuf_dq
+    (
+     .I  (dq_out),
+     .T  (dq_oe_n_r),
+     .IO (ddr_dq),
+     .O  (dq_in)
+     );
+
+  //***************************************************************************
+  // Write (output) path
+  //***************************************************************************
+
+  // on a write, rising edge of DQS corresponds to rising edge of CLK180
+  // (aka falling edge of CLK0 -> rising edge DQS). We also know:
+  //  1. data must be driven 1/4 clk cycle before corresponding DQS edge
+  //  2. first rising DQS edge driven on falling edge of CLK0
+  //  3. rising data must be driven 1/4 cycle before falling edge of CLK0
+  //  4. therefore, rising data driven on rising edge of CLK
+  ODDR #
+    (
+     .SRTYPE("SYNC"),
+     .DDR_CLK_EDGE("SAME_EDGE")
+     )
+    u_oddr_dq
+      (
+       .Q  (dq_out),
+       .C  (clk90),
+       .CE (1'b1),
+       .D1 (wr_data_rise),
+       .D2 (wr_data_fall),
+       .R  (1'b0),
+       .S  (1'b0)
+       );
+
+  // make sure output is tri-state during reset (DQ_OE_N_R = 1)
+  ODDR #
+    (
+     .SRTYPE("ASYNC"),
+     .DDR_CLK_EDGE("SAME_EDGE")
+     )
+    u_tri_state_dq
+      (
+       .Q  (dq_oe_n_r),
+       .C  (clk90),
+       .CE (1'b1),
+       .D1 (dq_oe_n[0]),
+       .D2 (dq_oe_n[1]),
+       .R  (1'b0),
+       .S  (rst90)
+       );
+
+  //***************************************************************************
+  // Read data capture scheme description:
+  // Data capture consists of 3 ranks of flops, and a MUX
+  //  1. Rank 1 ("Stage 1"): IDDR captures delayed DDR DQ from memory using
+  //     delayed DQS.
+  //     - Data is split into 2 SDR streams, one each for rise and fall data.
+  //     - BUFIO (DQS) input inverted to IDDR. IDDR configured in SAME_EDGE
+  //       mode. This means that: (1) Q1 = fall data, Q2 = rise data,
+  //       (2) Both rise and fall data are output on falling edge of DQS -
+  //       rather than rise output being output on one edge of DQS, and fall
+  //       data on the other edge if the IDDR were configured in OPPOSITE_EDGE
+  //       mode. This simplifies Stage 2 capture (only one core clock edge
+  //       used, removing effects of duty-cycle-distortion), and saves one
+  //       fabric flop in Rank 3.
+  //  2. Rank 2 ("Stage 2"): Fabric flops are used to capture output of first
+  //     rank into FPGA clock (CLK) domain. Each rising/falling SDR stream
+  //     from IDDR is feed into two flops, one clocked off rising and one off
+  //     falling edge of CLK. One of these flops is chosen, with the choice
+  //     being the one that reduces # of DQ/DQS taps necessary to align Stage
+  //     1 and Stage 2. Same edge is used to capture both rise and fall SDR
+  //     streams.
+  //  3. Rank 3 ("Stage 3"): Removes half-cycle paths in CLK domain from
+  //     output of Rank 2. This stage, like Stage 2, is clocked by CLK. Note
+  //     that Stage 3 can be expanded to also support SERDES functionality
+  //  4. Output MUX: Selects whether Stage 1 output is aligned to rising or
+  //     falling edge of CLK (i.e. specifically this selects whether IDDR
+  //     rise/fall output is transfered to rising or falling edge of CLK).
+  // Implementation:
+  //  1. Rank 1 is implemented using an IDDR primitive
+  //  2. Rank 2 is implemented using:
+  //     - An RPM to fix the location of the capture flops near the DQ I/O.
+  //       The exact RPM used depends on which I/O column (left, center,
+  //       right) the DQ I/O is placed at - this affects the optimal location
+  //       of the slice flops (or does it - can we always choose the two
+  //       columns to slices to the immediate right of the I/O to use, no
+  //       matter what the column?). The origin of the RPM must be set in the
+  //       UCF file using the RLOC_ORIGIN constraint (where the original is
+  //       based on the DQ I/O location).
+  //     - Directed Routing Constraints ("DIRT strings") to fix the routing
+  //       to the rank 2 fabric flops. This is done to minimize: (1) total
+  //       route delay (and therefore minimize voltage/temperature-related
+  //       variations), and (2) minimize skew both within each rising and
+  //       falling data net, as well as between the rising and falling nets.
+  //       The exact DIRT string used depends on: (1) which I/O column the
+  //       DQ I/O is placed, and (2) whether the DQ I/O is placed on the
+  //       "Master" or "Slave" I/O of a diff pair (DQ is not differential, but
+  //       the routing will be affected by which of each I/O pair is used)
+  // 3. Rank 3 is implemented using fabric flops. No LOC or DIRT contraints
+  //    are used, tools are expected to place these and meet PERIOD timing
+  //    without constraints (constraints may be necessary for "full" designs,
+  //    in this case, user may need to add LOC constraints - if this is the
+  //    case, there are no constraints - other than meeting PERIOD timing -
+  //    for rank 3 flops.
+  //***************************************************************************
+
+  //***************************************************************************
+  // MIG 2.2: Define AREA_GROUP = "DDR_CAPTURE_FFS" contain all RPM flops in
+  //          design. In UCF file, add constraint:
+  //             AREA_GROUP "DDR_CAPTURE_FFS" GROUP = CLOSED;
+  //          This is done to prevent MAP from packing unrelated logic into
+  //          the slices used by the RPMs. Doing so may cause the DIRT strings
+  //          that define the IDDR -> fabric flop routing to later become
+  //          unroutable during PAR because the unrelated logic placed by MAP
+  //          may use routing resources required by the DIRT strings. MAP
+  //          does not currently take into account DIRT strings when placing
+  //          logic
+  //***************************************************************************
+
+  // IDELAY to delay incoming data for synchronization purposes
+  IODELAY #
+    (
+     .DELAY_SRC             ("I"),
+     .IDELAY_TYPE           ("VARIABLE"),
+     .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE),
+     .IDELAY_VALUE          (0),
+     .ODELAY_VALUE          (0)
+     )
+    u_idelay_dq
+      (
+       .DATAOUT (dq_idelay),
+       .C       (clkdiv0),
+       .CE      (dlyce),
+       .DATAIN  (),
+       .IDATAIN (dq_in),
+       .INC     (dlyinc),
+       .ODATAIN (),
+       .RST     (dlyrst),
+       .T       ()
+       );
+
+  //***************************************************************************
+  // Rank 1 capture: Use IDDR to generate two SDR outputs
+  //***************************************************************************
+
+  // invert clock to IDDR in order to use SAME_EDGE mode (otherwise, we "run
+  // out of clocks" because DQS is not continuous
+  assign dq_iddr_clk = ~dqs;
+
+  //***************************************************************************
+  // Rank 2 capture: Use fabric flops to capture Rank 1 output. Use RPM and
+  // DIRT strings here.
+  // BEL ("Basic Element of Logic") and relative location constraints for
+  // second stage capture. C
+  // Varies according:
+  //  (1) I/O column (left, center, right) used
+  //  (2) Which I/O in I/O pair (master, slave) used
+  //***************************************************************************
+
+  // Six different cases for the different I/O column, master/slave
+  // combinations (can't seem to do this using a localparam, which
+  // would be easier, XST doesn't allow it)
+  generate
+    if ((DQ_MS == 1) && (DQ_COL == 0)) begin: gen_stg2_0m
+
+      //*****************************************************************
+      // master, left
+      //*****************************************************************
+
+      IDDR #
+        (
+         .DDR_CLK_EDGE ("SAME_EDGE")
+         )
+        u_iddr_dq
+          (
+           .Q1 (stg1_out_fall_0m),
+           .Q2 (stg1_out_rise_0m),
+           .C  (dq_iddr_clk),
+           .CE (ce),
+           .D  (dq_idelay),
+           .R  (1'b0),
+           .S  (1'b0)
+           );
+
+      //*********************************************************
+      // Slice #1 (posedge CLK): Used for:
+      //  1. IDDR transfer to CLK0 rising edge domain ("stg2a")
+      //  2. stg2 falling edge -> stg3 rising edge transfer
+      //*********************************************************
+
+      // Stage 2 capture
+      (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "DFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg2a_fall
+        (
+         .Q   (stg2a_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_fall_0m),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "CFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg2a_rise
+        (
+         .Q   (stg2a_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_rise_0m),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      // Stage 3 falling -> rising edge translation
+      (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "BFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg3b_fall
+        (
+         .Q   (stg3b_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg2b_out_fall),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "AFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg3b_rise
+        (
+         .Q   (stg3b_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg2b_out_rise),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+      //*********************************************************
+      // Slice #2 (posedge CLK): Used for:
+      //  1. IDDR transfer to CLK0 falling edge domain ("stg2b")
+      //*********************************************************
+
+      (* HU_SET = "stg2_capture", RLOC = "X3Y0", BEL = "DFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE_1 u_ff_stg2b_fall
+        (
+         .Q   (stg2b_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_fall_0m),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+      (* HU_SET = "stg2_capture", RLOC = "X3Y0", BEL = "CFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE_1 u_ff_stg2b_rise
+        (
+         .Q   (stg2b_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_rise_0m),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+    end else if ((DQ_MS == 0) && (DQ_COL == 0)) begin: gen_stg2_0s
+
+      //*****************************************************************
+      // slave, left
+      //*****************************************************************
+
+      IDDR #
+        (
+         .DDR_CLK_EDGE ("SAME_EDGE")
+         )
+        u_iddr_dq
+          (
+           .Q1 (stg1_out_fall_0s),
+           .Q2 (stg1_out_rise_0s),
+           .C  (dq_iddr_clk),
+           .CE (ce),
+           .D  (dq_idelay),
+           .R  (1'b0),
+           .S  (1'b0)
+           );
+
+      (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "BFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg2a_fall
+        (
+         .Q   (stg2a_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_fall_0s),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "CFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg2a_rise
+        (
+         .Q   (stg2a_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_rise_0s),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+      (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "DFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg3b_fall
+        (
+         .Q   (stg3b_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg2b_out_fall),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "AFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg3b_rise
+        (
+         .Q   (stg3b_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg2b_out_rise),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+      (* HU_SET = "stg2_capture", RLOC = "X1Y0", BEL = "AFF",
+       AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE_1 u_ff_stg2b_fall
+        (
+         .Q   (stg2b_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_fall_0s),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X1Y0", BEL = "CFF",
+       AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE_1 u_ff_stg2b_rise
+        (
+         .Q   (stg2b_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_rise_0s),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+    end else if ((DQ_MS == 1) && (DQ_COL == 1))  begin: gen_stg2_1m
+
+      //*****************************************************************
+      // master, center
+      //*****************************************************************
+
+      IDDR #
+        (
+         .DDR_CLK_EDGE ("SAME_EDGE")
+         )
+        u_iddr_dq
+          (
+           .Q1 (stg1_out_fall_1m),
+           .Q2 (stg1_out_rise_1m),
+           .C  (dq_iddr_clk),
+           .CE (ce),
+           .D  (dq_idelay),
+           .R  (1'b0),
+           .S  (1'b0)
+           );
+
+      (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "BFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg2a_fall
+        (
+         .Q   (stg2a_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_fall_1m),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "AFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg2a_rise
+        (
+         .Q   (stg2a_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_rise_1m),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+      (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "DFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg3b_fall
+        (
+         .Q   (stg3b_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg2b_out_fall),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "CFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg3b_rise
+        (
+         .Q   (stg3b_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg2b_out_rise),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+      (* HU_SET = "stg2_capture", RLOC = "X1Y0", BEL = "AFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE_1 u_ff_stg2b_fall
+        (
+         .Q   (stg2b_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_fall_1m),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X1Y0", BEL = "BFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE_1 u_ff_stg2b_rise
+        (
+         .Q   (stg2b_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_rise_1m),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+    end else if ((DQ_MS == 0) && (DQ_COL == 1)) begin: gen_stg2_1s
+
+      //*****************************************************************
+      // slave, center
+      //*****************************************************************
+
+      IDDR #
+        (
+         .DDR_CLK_EDGE ("SAME_EDGE")
+         )
+        u_iddr_dq
+          (
+           .Q1 (stg1_out_fall_1s),
+           .Q2 (stg1_out_rise_1s),
+           .C  (dq_iddr_clk),
+           .CE (ce),
+           .D  (dq_idelay),
+           .R  (1'b0),
+           .S  (1'b0)
+           );
+
+      (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "CFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg2a_fall
+        (
+         .Q   (stg2a_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_fall_1s),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "BFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg2a_rise
+        (
+         .Q   (stg2a_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_rise_1s),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+      (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "DFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg3b_fall
+        (
+         .Q   (stg3b_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg2b_out_fall),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "AFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg3b_rise
+        (
+         .Q   (stg3b_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg2b_out_rise),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+      (* HU_SET = "stg2_capture", RLOC = "X3Y0", BEL = "CFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE_1 u_ff_stg2b_fall
+        (
+         .Q   (stg2b_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_fall_1s),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X3Y0", BEL = "BFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE_1 u_ff_stg2b_rise
+        (
+         .Q   (stg2b_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_rise_1s),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+    end else if ((DQ_MS == 1) && (DQ_COL == 2)) begin: gen_stg2_2m
+
+      //*****************************************************************
+      // master, right
+      //*****************************************************************
+
+      IDDR #
+        (
+         .DDR_CLK_EDGE ("SAME_EDGE")
+         )
+        u_iddr_dq
+          (
+           .Q1 (stg1_out_fall_2m),
+           .Q2 (stg1_out_rise_2m),
+           .C  (dq_iddr_clk),
+           .CE (ce),
+           .D  (dq_idelay),
+           .R  (1'b0),
+           .S  (1'b0)
+           );
+
+      (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "AFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg2a_fall
+        (
+         .Q   (stg2a_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_fall_2m),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "CFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg2a_rise
+        (
+         .Q   (stg2a_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_rise_2m),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+      (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "DFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg3b_fall
+        (
+         .Q   (stg3b_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg2b_out_fall),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X0Y0", BEL = "BFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg3b_rise
+        (
+         .Q   (stg3b_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg2b_out_rise),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+      (* HU_SET = "stg2_capture", RLOC = "X1Y0", BEL = "AFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE_1 u_ff_stg2b_fall
+        (
+         .Q   (stg2b_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_fall_2m),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X1Y0", BEL = "CFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE_1 u_ff_stg2b_rise
+        (
+         .Q   (stg2b_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_rise_2m),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+    end else if ((DQ_MS == 0) && (DQ_COL == 2)) begin: gen_stg2_2s
+
+      //*****************************************************************
+      // slave, right
+      //*****************************************************************
+
+      IDDR #
+        (
+         .DDR_CLK_EDGE ("SAME_EDGE")
+         )
+        u_iddr_dq
+          (
+           .Q1 (stg1_out_fall_2s),
+           .Q2 (stg1_out_rise_2s),
+           .C  (dq_iddr_clk),
+           .CE (ce),
+           .D  (dq_idelay),
+           .R  (1'b0),
+           .S  (1'b0)
+           );
+
+      (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "BFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg2a_fall
+        (
+         .Q   (stg2a_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_fall_2s),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "DFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg2a_rise
+        (
+         .Q   (stg2a_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_rise_2s),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "CFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg3b_fall
+        (
+         .Q   (stg3b_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg2b_out_fall),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X2Y0", BEL = "AFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE u_ff_stg3b_rise
+        (
+         .Q   (stg3b_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg2b_out_rise),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+      (* HU_SET = "stg2_capture", RLOC = "X3Y0", BEL = "AFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE_1 u_ff_stg2b_fall
+        (
+         .Q   (stg2b_out_fall),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_fall_2s),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+      (* HU_SET = "stg2_capture", RLOC = "X3Y0", BEL = "CFF",
+         AREA_GROUP = "DDR_CAPTURE_FFS" *)
+      FDRSE_1 u_ff_stg2b_rise
+        (
+         .Q   (stg2b_out_rise),
+         .C   (clk0),
+         .CE  (1'b1),
+     .D   (stg1_out_rise_2s),
+         .R   (1'b0),
+         .S   (1'b0)
+         )/* synthesis syn_preserve = 1 */
+          /* synthesis syn_replicate = 0 */;
+
+    end
+  endgenerate
+
+  //***************************************************************************
+  // Second stage flops clocked by posedge CLK0 don't need another layer of
+  // registering
+  //***************************************************************************
+
+  assign stg3a_out_rise = stg2a_out_rise;
+  assign stg3a_out_fall = stg2a_out_fall;
+
+  //*******************************************************************
+
+  assign rd_data_rise = (rd_data_sel) ? stg3a_out_rise : stg3b_out_rise;
+  assign rd_data_fall = (rd_data_sel) ? stg3a_out_fall : stg3b_out_fall;
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_dqs_iob.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_dqs_iob.v
new file mode 100644 (file)
index 0000000..ead1d94
--- /dev/null
@@ -0,0 +1,260 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+//
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a
+// license to use this text/file solely for design, simulation,
+// implementation and creation of design files limited
+// to Xilinx devices or technologies. Use with non-Xilinx
+// devices or technologies is expressly prohibited and
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information
+// "as-is" solely for use in developing programs and
+// solutions for Xilinx devices, with no obligation on the
+// part of Xilinx to provide support. By providing this design,
+// code, or information as one possible implementation of
+// this feature, application or standard, Xilinx is making no
+// representation that this implementation is free from any
+// claims of infringement. You are responsible for
+// obtaining any rights you may require for your implementation.
+// Xilinx expressly disclaims any warranty whatsoever with
+// respect to the adequacy of the implementation, including
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied
+// warranties of merchantability or fitness for a particular
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part
+// of this text at all times.
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_phy_dqs_iob.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/22 15:41:06 $
+// \   \  /  \    Date Created: Wed Aug 16 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   This module places the data strobes in the IOBs.
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_phy_dqs_iob #
+  (
+   parameter DDR_TYPE              = 1,
+   parameter HIGH_PERFORMANCE_MODE = "TRUE"
+   )
+  (
+   input        clk0,
+   input        clkdiv0,
+   input        rst0,
+   input        dlyinc_dqs,
+   input        dlyce_dqs,
+   input        dlyrst_dqs,
+   input        dlyinc_gate,
+   input        dlyce_gate,
+   input        dlyrst_gate,
+   input        dqs_oe_n,
+   input        dqs_rst_n,
+   input        en_dqs,
+   inout        ddr_dqs,
+   inout        ddr_dqs_n,
+   output       dq_ce,
+   output       delayed_dqs
+   );
+
+  wire                     clk180;
+  wire                     dqs_bufio;
+
+  wire                     dqs_ibuf;
+  wire                     dqs_idelay;
+  wire                     dqs_oe_n_delay;
+  wire                     dqs_oe_n_r;
+  wire                     dqs_rst_n_delay;
+  reg                      dqs_rst_n_r /* synthesis syn_preserve = 1*/;
+  wire                     dqs_out;
+  wire                     en_dqs_sync /* synthesis syn_keep = 1 */;
+
+  // for simulation only. Synthesis should ignore this delay
+  localparam    DQS_NET_DELAY = 0.8;
+
+  assign        clk180 = ~clk0;
+
+  // add delta delay to inputs clocked by clk180 to avoid delta-delay
+  // simulation issues
+  assign dqs_rst_n_delay = dqs_rst_n;
+  assign dqs_oe_n_delay  = dqs_oe_n;
+
+  //***************************************************************************
+  // DQS input-side resources:
+  //  - IODELAY (pad -> IDELAY)
+  //  - BUFIO (IDELAY -> BUFIO)
+  //***************************************************************************
+
+  // Route DQS from PAD to IDELAY
+  IODELAY #
+    (
+     .DELAY_SRC("I"),
+     .IDELAY_TYPE("VARIABLE"),
+     .HIGH_PERFORMANCE_MODE(HIGH_PERFORMANCE_MODE),
+     .IDELAY_VALUE(0),
+     .ODELAY_VALUE(0)
+     )
+    u_idelay_dqs
+      (
+       .DATAOUT (dqs_idelay),
+       .C       (clkdiv0),
+       .CE      (dlyce_dqs),
+       .DATAIN  (),
+       .IDATAIN (dqs_ibuf),
+       .INC     (dlyinc_dqs),
+       .ODATAIN (),
+       .RST     (dlyrst_dqs),
+       .T       ()
+       );
+
+  // From IDELAY to BUFIO
+  BUFIO u_bufio_dqs
+    (
+     .I  (dqs_idelay),
+     .O  (dqs_bufio)
+     );
+
+  // To model additional delay of DQS BUFIO + gating network
+  // for behavioral simulation. Make sure to select a delay number smaller
+  // than half clock cycle (otherwise output will not track input changes
+  // because of inertial delay). Duplicate to avoid delta delay issues.
+  assign #(DQS_NET_DELAY) i_delayed_dqs = dqs_bufio;
+  assign #(DQS_NET_DELAY) delayed_dqs   = dqs_bufio;
+
+  //***************************************************************************
+  // DQS gate circuit (not supported for all controllers)
+  //***************************************************************************
+
+  // Gate routing:
+  //   en_dqs -> IDELAY -> en_dqs_sync -> IDDR.S -> dq_ce ->
+  //   capture IDDR.CE
+
+  // Delay CE control so that it's in phase with delayed DQS
+  IODELAY #
+    (
+     .DELAY_SRC             ("DATAIN"),
+     .IDELAY_TYPE           ("VARIABLE"),
+     .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE),
+     .IDELAY_VALUE          (0),
+     .ODELAY_VALUE          (0)
+     )
+    u_iodelay_dq_ce
+      (
+       .DATAOUT (en_dqs_sync),
+       .C       (clkdiv0),
+       .CE      (dlyce_gate),
+       .DATAIN  (en_dqs),
+       .IDATAIN (),
+       .INC     (dlyinc_gate),
+       .ODATAIN (),
+       .RST     (dlyrst_gate),
+       .T       ()
+       );
+
+  // Generate sync'ed CE to DQ IDDR's using an IDDR clocked by DQS
+  // We could also instantiate a negative-edge SDR flop here
+  IDDR #
+    (
+     .DDR_CLK_EDGE ("OPPOSITE_EDGE"),
+     .INIT_Q1      (1'b0),
+     .INIT_Q2      (1'b0),
+     .SRTYPE       ("ASYNC")
+     )
+    u_iddr_dq_ce
+      (
+       .Q1 (),
+       .Q2 (dq_ce),           // output on falling edge
+       .C  (i_delayed_dqs),
+       .CE (1'b1),
+       .D  (en_dqs_sync),
+       .R  (1'b0),
+       .S  (en_dqs_sync)
+       );
+
+  //***************************************************************************
+  // DQS output-side resources
+  //***************************************************************************
+
+  // synthesis attribute keep of dqs_rst_n_r is "true"
+  always @(posedge clk180)
+    dqs_rst_n_r <= dqs_rst_n_delay;
+
+  ODDR #
+    (
+     .SRTYPE("SYNC"),
+     .DDR_CLK_EDGE("OPPOSITE_EDGE")
+     )
+    u_oddr_dqs
+      (
+       .Q  (dqs_out),
+       .C  (clk180),
+       .CE (1'b1),
+       .D1 (dqs_rst_n_r),      // keep output deasserted for write preamble
+       .D2 (1'b0),
+       .R  (1'b0),
+       .S  (1'b0)
+       );
+
+  (* IOB = "TRUE" *) FDP u_tri_state_dqs
+    (
+     .D   (dqs_oe_n_delay),
+     .Q   (dqs_oe_n_r),
+     .C   (clk180),
+     .PRE (rst0)
+     ) /* synthesis syn_useioff = 1 */;
+
+  //***************************************************************************
+
+  // use either single-ended (for DDR1) or differential (for DDR2) DQS input
+
+  generate
+    if (DDR_TYPE > 0) begin: gen_dqs_iob_ddr2
+      IOBUFDS u_iobuf_dqs
+        (
+         .O   (dqs_ibuf),
+         .IO  (ddr_dqs),
+         .IOB (ddr_dqs_n),
+         .I   (dqs_out),
+         .T   (dqs_oe_n_r)
+         );
+      defparam u_iobuf_dqs.IOSTANDARD="DIFF_SSTL18_II_DCI";
+    end else begin: gen_dqs_iob_ddr1
+      IOBUF u_iobuf_dqs
+        (
+         .O   (dqs_ibuf),
+         .IO  (ddr_dqs),
+         .I   (dqs_out),
+         .T   (dqs_oe_n_r)
+         );
+    end
+  endgenerate
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_init.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_init.v
new file mode 100644 (file)
index 0000000..0b03524
--- /dev/null
@@ -0,0 +1,1157 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+//
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a
+// license to use this text/file solely for design, simulation,
+// implementation and creation of design files limited
+// to Xilinx devices or technologies. Use with non-Xilinx
+// devices or technologies is expressly prohibited and
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information
+// "as-is" solely for use in developing programs and
+// solutions for Xilinx devices, with no obligation on the
+// part of Xilinx to provide support. By providing this design,
+// code, or information as one possible implementation of
+// this feature, application or standard, Xilinx is making no
+// representation that this implementation is free from any
+// claims of infringement. You are responsible for
+// obtaining any rights you may require for your implementation.
+// Xilinx expressly disclaims any warranty whatsoever with
+// respect to the adequacy of the implementation, including
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied
+// warranties of merchantability or fitness for a particular
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are
+// done at the user�s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part
+// of this text at all times.
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_phy_init.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/22 15:41:06 $
+// \   \  /  \    Date Created: Thu Aug 24 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//Reference:
+//   This module is the intialization control logic of the memory interface.
+//   All commands are issued from here acoording to the burst, CAS Latency and
+//   the user commands.
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_phy_init #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference
+   // board design). Actual values may be different. Actual parameters values
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter BANK_WIDTH    = 2,
+   parameter CKE_WIDTH     = 1,
+   parameter COL_WIDTH     = 10,
+   parameter CS_NUM        = 1,
+   parameter DQ_WIDTH      = 72,
+   parameter ODT_WIDTH     = 1,
+   parameter ROW_WIDTH     = 14,
+   parameter ADDITIVE_LAT  = 0,
+   parameter BURST_LEN     = 4,
+   parameter TWO_T_TIME_EN = 0,
+   parameter BURST_TYPE    = 0,
+   parameter CAS_LAT       = 5,
+   parameter ODT_TYPE      = 1,
+   parameter REDUCE_DRV    = 0,
+   parameter REG_ENABLE    = 1,
+   parameter TWR           = 15000,
+   parameter CLK_PERIOD    = 3000,
+   parameter DDR_TYPE      = 1,
+   parameter SIM_ONLY      = 0
+   )
+  (
+   input                                   clk0,
+   input                                   clkdiv0,
+   input                                   rst0,
+   input                                   rstdiv0,
+   input [3:0]                             calib_done,
+   input                                   ctrl_ref_flag,
+   input                                   calib_ref_req,
+   output reg [3:0]                        calib_start,
+   output reg                              calib_ref_done,
+   output reg                              phy_init_wren,
+   output reg                              phy_init_rden,
+   output [ROW_WIDTH-1:0]                  phy_init_addr,
+   output [BANK_WIDTH-1:0]                 phy_init_ba,
+   output                                  phy_init_ras_n,
+   output                                  phy_init_cas_n,
+   output                                  phy_init_we_n,
+   output [CS_NUM-1:0]                     phy_init_cs_n,
+   output [CKE_WIDTH-1:0]                  phy_init_cke,
+   output reg                              phy_init_done,
+   output                                  phy_init_data_sel
+   );
+
+  // time to wait between consecutive commands in PHY_INIT - this is a
+  // generic number, and must be large enough to account for worst case
+  // timing parameter (tRFC - refresh-to-active) across all memory speed
+  // grades and operating frequencies. Expressed in CLKDIV clock cycles.
+  localparam  CNTNEXT_CMD = 7'b1111111;
+  // time to wait between read and read or precharge for stage 3 & 4
+  // the larger CNTNEXT_CMD can also be used, use smaller number to
+  // speed up calibration - avoid tRAS violation, and speeds up simulation
+  localparam  CNTNEXT_RD  = 4'b1111;
+
+  // Write recovery (WR) time - is defined by 
+  // tWR (in nanoseconds) by tCK (in nanoseconds) and rounding up a 
+  // noninteger value to the next integer
+  localparam integer WR_RECOVERY =  ((TWR + CLK_PERIOD) - 1)/CLK_PERIOD;
+
+  localparam  INIT_CAL1_READ            = 5'h00;
+  localparam  INIT_CAL2_READ            = 5'h01;
+  localparam  INIT_CAL3_READ            = 5'h02;
+  localparam  INIT_CAL4_READ            = 5'h03;
+  localparam  INIT_CAL1_WRITE           = 5'h04;
+  localparam  INIT_CAL2_WRITE           = 5'h05;
+  localparam  INIT_CAL3_WRITE           = 5'h06;
+  localparam  INIT_DUMMY_ACTIVE_WAIT    = 5'h07;
+  localparam  INIT_PRECHARGE            = 5'h08;
+  localparam  INIT_LOAD_MODE            = 5'h09;
+  localparam  INIT_AUTO_REFRESH         = 5'h0A;
+  localparam  INIT_IDLE                 = 5'h0B;
+  localparam  INIT_CNT_200              = 5'h0C;
+  localparam  INIT_CNT_200_WAIT         = 5'h0D;
+  localparam  INIT_PRECHARGE_WAIT       = 5'h0E;
+  localparam  INIT_MODE_REGISTER_WAIT   = 5'h0F;
+  localparam  INIT_AUTO_REFRESH_WAIT    = 5'h10;
+  localparam  INIT_DEEP_MEMORY_ST       = 5'h11;
+  localparam  INIT_DUMMY_ACTIVE         = 5'h12;
+  localparam  INIT_CAL1_WRITE_READ      = 5'h13;
+  localparam  INIT_CAL1_READ_WAIT       = 5'h14;
+  localparam  INIT_CAL2_WRITE_READ      = 5'h15;
+  localparam  INIT_CAL2_READ_WAIT       = 5'h16;
+  localparam  INIT_CAL3_WRITE_READ      = 5'h17;
+  localparam  INIT_CAL3_READ_WAIT       = 5'h18;
+  localparam  INIT_CAL4_READ_WAIT       = 5'h19;
+  localparam  INIT_CALIB_REF            = 5'h1A;
+  localparam  INIT_ZQCL                 = 5'h1B;
+  localparam  INIT_WAIT_DLLK_ZQINIT     = 5'h1C;
+
+  localparam  INIT_CNTR_INIT            = 4'h0;
+  localparam  INIT_CNTR_PRECH_1         = 4'h1;
+  localparam  INIT_CNTR_EMR2_INIT       = 4'h2;
+  localparam  INIT_CNTR_EMR3_INIT       = 4'h3;
+  localparam  INIT_CNTR_EMR_EN_DLL      = 4'h4;
+  localparam  INIT_CNTR_MR_RST_DLL      = 4'h5;
+  localparam  INIT_CNTR_CNT_200_WAIT    = 4'h6;
+  localparam  INIT_CNTR_PRECH_2         = 4'h7;
+  localparam  INIT_CNTR_AR_1            = 4'h8;
+  localparam  INIT_CNTR_AR_2            = 4'h9;
+  localparam  INIT_CNTR_MR_ACT_DLL      = 4'hA;
+  localparam  INIT_CNTR_EMR_DEF_OCD     = 4'hB;
+  localparam  INIT_CNTR_EMR_EXIT_OCD    = 4'hC;
+  localparam  INIT_CNTR_DEEP_MEM        = 4'hD;
+  localparam  INIT_CNTR_PRECH_3         = 4'hE;
+  localparam  INIT_CNTR_DONE            = 4'hF;
+
+  localparam   DDR1                     = 0;
+  localparam   DDR2                     = 1;
+  localparam   DDR3                     = 2;
+
+
+  reg [1:0]             burst_addr_r;
+  reg [1:0]             burst_cnt_r;
+  wire [1:0]            burst_val;
+  wire                  cal_read;
+  wire                  cal_write;
+  wire                  cal_write_read;
+  reg                   cal1_started_r;
+  reg                   cal2_started_r;
+  reg                   cal4_started_r;
+  reg [3:0]             calib_done_r;
+  reg                   calib_ref_req_posedge;
+  reg                   calib_ref_req_r;
+  reg [15:0]            calib_start_shift0_r;
+  reg [15:0]            calib_start_shift1_r;
+  reg [15:0]            calib_start_shift2_r;
+  reg [15:0]            calib_start_shift3_r;
+  reg [1:0]             chip_cnt_r;
+  reg [4:0]             cke_200us_cnt_r;
+  reg                   cke_200us_cnt_en_r;
+  reg [7:0]             cnt_200_cycle_r;
+  reg                   cnt_200_cycle_done_r;
+  reg [6:0]             cnt_cmd_r;
+  reg                   cnt_cmd_ok_r;
+  reg [3:0]             cnt_rd_r;
+  reg                   cnt_rd_ok_r;
+  reg                   ctrl_ref_flag_r;
+  reg                   done_200us_r;
+  reg [ROW_WIDTH-1:0]   ddr_addr_r;
+  reg [ROW_WIDTH-1:0]   ddr_addr_r1;
+  reg [BANK_WIDTH-1:0]  ddr_ba_r;
+  reg [BANK_WIDTH-1:0]  ddr_ba_r1;
+  reg                   ddr_cas_n_r;
+  reg                   ddr_cas_n_r1;
+  reg [CKE_WIDTH-1:0]   ddr_cke_r;
+  reg [CS_NUM-1:0]      ddr_cs_n_r;
+  reg [CS_NUM-1:0]      ddr_cs_n_r1;
+  reg [CS_NUM-1:0]      ddr_cs_disable_r;
+  reg                   ddr_ras_n_r;
+  reg                   ddr_ras_n_r1;
+  reg                   ddr_we_n_r;
+  reg                   ddr_we_n_r1;
+  wire [15:0]           ext_mode_reg;
+  reg [3:0]             init_cnt_r;
+  reg                   init_done_r;
+  reg [4:0]             init_next_state;
+  reg [4:0]             init_state_r;
+  reg [4:0]             init_state_r1;
+  reg [4:0]             init_state_r2;
+  wire [15:0]           load_mode_reg;
+  wire [15:0]           load_mode_reg0;
+  wire [15:0]           load_mode_reg1;
+  wire [15:0]           load_mode_reg2;
+  wire [15:0]           load_mode_reg3;
+  reg                   phy_init_done_r;
+  reg                   phy_init_done_r1;
+  reg                   phy_init_done_r2;
+  reg                   phy_init_done_r3;
+  reg                   refresh_req;
+  wire [3:0]            start_cal;
+
+  //***************************************************************************
+
+  //*****************************************************************
+  // DDR1 and DDR2 Load mode register
+  // Mode Register (MR):
+  //   [15:14] - unused          - 00
+  //   [13]    - reserved        - 0
+  //   [12]    - Power-down mode - 0 (normal)
+  //   [11:9]  - write recovery  - for Auto Precharge (tWR/tCK)
+  //   [8]     - DLL reset       - 0 or 1
+  //   [7]     - Test Mode       - 0 (normal)
+  //   [6:4]   - CAS latency     - CAS_LAT
+  //   [3]     - Burst Type      - BURST_TYPE
+  //   [2:0]   - Burst Length    - BURST_LEN
+  //*****************************************************************
+
+  generate
+    if (DDR_TYPE == DDR2) begin: gen_load_mode_reg_ddr2
+      assign load_mode_reg[2:0]   = (BURST_LEN == 8) ? 3'b011 :
+                                    ((BURST_LEN == 4) ? 3'b010 : 3'b111);
+      assign load_mode_reg[3]     = BURST_TYPE;
+      assign load_mode_reg[6:4]   = (CAS_LAT == 3) ? 3'b011 :
+                                    ((CAS_LAT == 4) ? 3'b100 :
+                                     ((CAS_LAT == 5) ? 3'b101 : 3'b111));
+      assign load_mode_reg[7]     = 1'b0;
+      assign load_mode_reg[8]     = 1'b0;    // init value only (DLL not reset)
+      assign load_mode_reg[11:9]  = (WR_RECOVERY == 6) ? 3'b101 :
+                                    ((WR_RECOVERY == 5) ? 3'b100 :
+                                     ((WR_RECOVERY == 4) ? 3'b011 :
+                                      ((WR_RECOVERY == 3) ? 3'b010 :
+                                      3'b001)));
+      assign load_mode_reg[15:12] = 4'b000;
+    end else if (DDR_TYPE == DDR1)begin: gen_load_mode_reg_ddr1
+      assign load_mode_reg[2:0]   = (BURST_LEN == 8) ? 3'b011 :
+                                    ((BURST_LEN == 4) ? 3'b010 :
+                                     ((BURST_LEN == 2) ? 3'b001 : 3'b111));
+      assign load_mode_reg[3]     = BURST_TYPE;
+      assign load_mode_reg[6:4]   = (CAS_LAT == 2) ? 3'b010 :
+                                    ((CAS_LAT == 3) ? 3'b011 :
+                                     ((CAS_LAT == 25) ? 3'b110 : 3'b111));
+      assign load_mode_reg[12:7]  = 6'b000000; // init value only
+      assign load_mode_reg[15:13]  = 3'b000;
+    end
+  endgenerate
+
+  //*****************************************************************
+  // DDR1 and DDR2 ext mode register
+  // Extended Mode Register (MR):
+  //   [15:14] - unused          - 00
+  //   [13]    - reserved        - 0
+  //   [12]    - output enable   - 0 (enabled)
+  //   [11]    - RDQS enable     - 0 (disabled)
+  //   [10]    - DQS# enable     - 0 (enabled)
+  //   [9:7]   - OCD Program     - 111 or 000 (first 111, then 000 during init)
+  //   [6]     - RTT[1]          - RTT[1:0] = 0(no ODT), 1(75), 2(150), 3(50)
+  //   [5:3]   - Additive CAS    - ADDITIVE_CAS
+  //   [2]     - RTT[0]
+  //   [1]     - Output drive    - REDUCE_DRV (= 0(full), = 1 (reduced)
+  //   [0]     - DLL enable      - 0 (normal)
+  //*****************************************************************
+
+  generate
+    if (DDR_TYPE == DDR2) begin: gen_ext_mode_reg_ddr2
+      assign ext_mode_reg[0]     = 1'b0;
+      assign ext_mode_reg[1]     = REDUCE_DRV;
+      assign ext_mode_reg[2]     = ((ODT_TYPE == 1) || (ODT_TYPE == 3)) ?
+                                   1'b1 : 1'b0;
+      assign ext_mode_reg[5:3]   = (ADDITIVE_LAT == 0) ? 3'b000 :
+                                   ((ADDITIVE_LAT == 1) ? 3'b001 :
+                                    ((ADDITIVE_LAT == 2) ? 3'b010 :
+                                     ((ADDITIVE_LAT == 3) ? 3'b011 :
+                                      ((ADDITIVE_LAT == 4) ? 3'b100 :
+                                      3'b111))));
+      assign ext_mode_reg[6]     = ((ODT_TYPE == 2) || (ODT_TYPE == 3)) ?
+                                   1'b1 : 1'b0;
+      assign ext_mode_reg[9:7]   = 3'b000;
+      assign ext_mode_reg[10]    = 1'b0;
+      assign ext_mode_reg[15:10] = 6'b000000;
+    end else if (DDR_TYPE == DDR1)begin: gen_ext_mode_reg_ddr1
+      assign ext_mode_reg[0]     = 1'b0;
+      assign ext_mode_reg[1]     = REDUCE_DRV;
+      assign ext_mode_reg[12:2]  = 11'b00000000000;
+      assign ext_mode_reg[15:13] = 3'b000;
+    end
+  endgenerate
+
+  //*****************************************************************
+  // DDR3 Load mode reg0
+  // Mode Register (MR0):
+  //   [15:13] - unused          - 000
+  //   [12]    - Precharge Power-down DLL usage - 0 (DLL frozen, slow-exit),
+  //             1 (DLL maintained)
+  //   [11:9]  - write recovery for Auto Precharge (tWR/tCK = 6)
+  //   [8]     - DLL reset       - 0 or 1
+  //   [7]     - Test Mode       - 0 (normal)
+  //   [6:4],[2]   - CAS latency     - CAS_LAT
+  //   [3]     - Burst Type      - BURST_TYPE
+  //   [1:0]   - Burst Length    - BURST_LEN
+  //*****************************************************************
+
+  generate
+    if (DDR_TYPE == DDR3) begin: gen_load_mode_reg0_ddr3
+      assign load_mode_reg0[1:0]   = (BURST_LEN == 8) ? 2'b00 :
+                                     ((BURST_LEN == 4) ? 2'b10 : 2'b11);
+      // Part of CAS latency. This bit is '0' for all CAS latencies
+      assign load_mode_reg0[2]     = 1'b0;
+      assign load_mode_reg0[3]     = BURST_TYPE;
+      assign load_mode_reg0[6:4]   = (CAS_LAT == 5) ? 3'b001 :
+                                     (CAS_LAT == 6) ? 3'b010 : 3'b111;
+      assign load_mode_reg0[7]     = 1'b0;
+      // init value only (DLL reset)
+      assign load_mode_reg0[8]     = 1'b1;
+      assign load_mode_reg0[11:9]  = 3'b010;
+      // Precharge Power-Down DLL 'slow-exit'
+      assign load_mode_reg0[12]    = 1'b0;
+      assign load_mode_reg0[15:13] = 3'b000;
+    end
+  endgenerate
+
+  //*****************************************************************
+  // DDR3 Load mode reg1
+  // Mode Register (MR1):
+  //   [15:13] - unused          - 00
+  //   [12]    - output enable   - 0 (enabled for DQ, DQS, DQS#)
+  //   [11]    - TDQS enable     - 0 (TDQS disabled and DM enabled)
+  //   [10]    - reserved   - 0 (must be '0')
+  //   [9]     - RTT[2]     - 0
+  //   [8]     - reserved   - 0 (must be '0')
+  //   [7]     - write leveling - 0 (disabled), 1 (enabled)
+  //   [6]     - RTT[1]          - RTT[1:0] = 0(no ODT), 1(75), 2(150), 3(50)
+  //   [5]     - Output driver impedance[1] - 0 (RZQ/6 and RZQ/7)
+  //   [4:3]   - Additive CAS    - ADDITIVE_CAS
+  //   [2]     - RTT[0]
+  //   [1]     - Output driver impedance[0] - 0(RZQ/6), or 1 (RZQ/7)
+  //   [0]     - DLL enable      - 0 (normal)
+  //*****************************************************************
+
+  generate
+    if (DDR_TYPE == DDR3) begin: gen_ext_mode_reg1_ddr3
+      // DLL enabled during Imitialization
+      assign load_mode_reg1[0]     = 1'b0;
+      // RZQ/6
+      assign load_mode_reg1[1]     = REDUCE_DRV;
+      assign load_mode_reg1[2]     = ((ODT_TYPE == 1) || (ODT_TYPE == 3)) ?
+                                     1'b1 : 1'b0;
+      assign load_mode_reg1[4:3]   = (ADDITIVE_LAT == 0) ? 2'b00 :
+                                     ((ADDITIVE_LAT == 1) ? 2'b01 :
+                                      ((ADDITIVE_LAT == 2) ? 2'b10 :
+                                       3'b111));
+      // RZQ/6
+      assign load_mode_reg1[5]     = 1'b0;
+      assign load_mode_reg1[6]     = ((ODT_TYPE == 2) || (ODT_TYPE == 3)) ?
+                                   1'b1 : 1'b0;
+      // Make zero WRITE_LEVEL
+      assign load_mode_reg1[7]   = 0;
+      assign load_mode_reg1[8]   = 1'b0;
+      assign load_mode_reg1[9]   = 1'b0;
+      assign load_mode_reg1[10]    = 1'b0;
+      assign load_mode_reg1[15:11] = 5'b00000;
+    end
+  endgenerate
+
+  //*****************************************************************
+  // DDR3 Load mode reg2
+  // Mode Register (MR2):
+  //   [15:11] - unused     - 00
+  //   [10:9]  - RTT_WR     - 00 (Dynamic ODT off)
+  //   [8]     - reserved   - 0 (must be '0')
+  //   [7]     - self-refresh temperature range -
+  //               0 (normal), 1 (extended)
+  //   [6]     - Auto Self-Refresh - 0 (manual), 1(auto)
+  //   [5:3]   - CAS Write Latency (CWL) -
+  //               000 (5 for 400 MHz device),
+  //               001 (6 for 400 MHz to 533 MHz devices),
+  //               010 (7 for 533 MHz to 667 MHz devices),
+  //               011 (8 for 667 MHz to 800 MHz)
+  //   [2:0]   - Partial Array Self-Refresh (Optional)      -
+  //               000 (full array)
+  //*****************************************************************
+
+  generate
+    if (DDR_TYPE == DDR3) begin: gen_ext_mode_reg2_ddr3
+      assign load_mode_reg2[2:0]     = 3'b000;
+      assign load_mode_reg2[5:3]   = (CAS_LAT == 5) ? 3'b000 :
+                                     (CAS_LAT == 6) ? 3'b001 : 3'b111;
+      assign load_mode_reg2[6]     = 1'b0; // Manual Self-Refresh
+      assign load_mode_reg2[7]   = 1'b0;
+      assign load_mode_reg2[8]   = 1'b0;
+      assign load_mode_reg2[10:9]  = 2'b00;
+      assign load_mode_reg2[15:11] = 5'b00000;
+    end
+  endgenerate
+
+  //*****************************************************************
+  // DDR3 Load mode reg3
+  // Mode Register (MR3):
+  //   [15:3] - unused          - All zeros
+  //   [2]     - MPR Operation - 0(normal operation), 1(data flow from MPR)
+  //   [1:0]   - MPR location     - 00 (Predefined pattern)
+  //*****************************************************************
+
+  generate
+    if (DDR_TYPE == DDR3)begin: gen_ext_mode_reg3_ddr3
+      assign load_mode_reg3[1:0]   = 2'b00;
+      assign load_mode_reg3[2]     = 1'b0;
+      assign load_mode_reg3[15:3] = 13'b0000000000000;
+    end
+  endgenerate
+
+  //***************************************************************************
+  // Logic for calibration start, and for auto-refresh during cal request
+  // CALIB_REF_REQ is used by calibration logic to request auto-refresh
+  // durign calibration (used to avoid tRAS violation is certain calibration
+  // stages take a long time). Once the auto-refresh is complete and cal can
+  // be resumed, CALIB_REF_DONE is asserted by PHY_INIT.
+  //***************************************************************************
+
+  // generate pulse for each of calibration start controls
+  assign start_cal[0] = ((init_state_r1 == INIT_CAL1_READ) &&
+                         (init_state_r2 != INIT_CAL1_READ));
+  assign start_cal[1] = ((init_state_r1 == INIT_CAL2_READ) &&
+                         (init_state_r2 != INIT_CAL2_READ));
+  assign start_cal[2] = ((init_state_r1 == INIT_CAL3_READ) &&
+                         (init_state_r2 == INIT_CAL3_WRITE_READ));
+  assign start_cal[3] = ((init_state_r1 == INIT_CAL4_READ) &&
+                         (init_state_r2 == INIT_DUMMY_ACTIVE_WAIT));
+
+  // Generate positive-edge triggered, latched signal to force initialization
+  // to pause calibration, and to issue auto-refresh. Clear flag as soon as
+  // refresh initiated
+  always @(posedge clkdiv0)
+    if (rstdiv0) begin
+      calib_ref_req_r       <= 1'b0;
+      calib_ref_req_posedge <= 1'b0;
+      refresh_req           <= 1'b0;
+    end else begin
+      calib_ref_req_r       <= calib_ref_req;
+      calib_ref_req_posedge <= calib_ref_req & ~calib_ref_req_r;
+      if (init_state_r1 == INIT_AUTO_REFRESH)
+        refresh_req <= 1'b0;
+      else if (calib_ref_req_posedge)
+        refresh_req <= 1'b1;
+    end
+
+  // flag to tell cal1 calibration was started.
+  // This flag is used for cal1 auto refreshes
+  // some of these bits may not be needed - only needed for those stages that
+  // need refreshes within the stage (i.e. very long stages)
+  always @(posedge clkdiv0)
+    if (rstdiv0) begin
+      cal1_started_r <= 1'b0;
+      cal2_started_r <= 1'b0;
+      cal4_started_r <= 1'b0;
+    end else begin
+      if (calib_start[0])
+        cal1_started_r <= 1'b1;
+      if (calib_start[1])
+        cal2_started_r <= 1'b1;
+      if (calib_start[3])
+        cal4_started_r <= 1'b1;
+    end
+
+  // Delay start of each calibration by 16 clock cycles to
+  // ensure that when calibration logic begins, that read data is already
+  // appearing on the bus. Don't really need it, it's more for simulation
+  // purposes. Each circuit should synthesize using an SRL16.
+  // In first stage of calibration  periodic auto refreshes
+  // will be issued to meet memory timing. calib_start_shift0_r[15] will be
+  // asserted more than once.calib_start[0] is anded with cal1_started_r so
+  // that it is asserted only once. cal1_refresh_done is anded with
+  // cal1_started_r so that it is asserted after the auto refreshes.
+  always @(posedge clkdiv0) begin
+    calib_start_shift0_r <= {calib_start_shift0_r[14:0], start_cal[0]};
+    calib_start_shift1_r <= {calib_start_shift1_r[14:0], start_cal[1]};
+    calib_start_shift2_r <= {calib_start_shift2_r[14:0], start_cal[2]};
+    calib_start_shift3_r <= {calib_start_shift3_r[14:0], start_cal[3]};
+    calib_start[0]       <= calib_start_shift0_r[15] & ~cal1_started_r;
+    calib_start[1]       <= calib_start_shift1_r[15] & ~cal2_started_r;
+    calib_start[2]       <= calib_start_shift2_r[15];
+    calib_start[3]       <= calib_start_shift3_r[15] & ~cal4_started_r;
+    calib_ref_done       <= calib_start_shift0_r[15] |
+                            calib_start_shift1_r[15] |
+                            calib_start_shift3_r[15];
+  end
+
+  // generate delay for various states that require it (no maximum delay
+  // requirement, make sure that terminal count is large enough to cover
+  // all cases)
+  always @(posedge clkdiv0) begin
+    case (init_state_r)
+      INIT_PRECHARGE_WAIT,
+      INIT_MODE_REGISTER_WAIT,
+      INIT_AUTO_REFRESH_WAIT,
+      INIT_DUMMY_ACTIVE_WAIT,
+      INIT_CAL1_WRITE_READ,
+      INIT_CAL1_READ_WAIT,
+      INIT_CAL2_WRITE_READ,
+      INIT_CAL2_READ_WAIT,
+      INIT_CAL3_WRITE_READ:
+        cnt_cmd_r <= cnt_cmd_r + 1;
+      default:
+        cnt_cmd_r <= 7'b0000000;
+    endcase
+  end
+
+  // assert when count reaches the value
+  always @(posedge clkdiv0) begin
+    if(cnt_cmd_r == CNTNEXT_CMD)
+      cnt_cmd_ok_r <= 1'b1;
+    else
+      cnt_cmd_ok_r <= 1'b0;
+  end
+
+  always @(posedge clkdiv0) begin
+    case (init_state_r)
+      INIT_CAL3_READ_WAIT,
+      INIT_CAL4_READ_WAIT:
+        cnt_rd_r <= cnt_rd_r + 1;
+      default:
+        cnt_rd_r <= 4'b0000;
+    endcase
+  end
+
+  always @(posedge clkdiv0) begin
+    if(cnt_rd_r == CNTNEXT_RD)
+      cnt_rd_ok_r <= 1'b1;
+    else
+      cnt_rd_ok_r <= 1'b0;
+  end
+
+  //***************************************************************************
+  // Initial delay after power-on
+  //***************************************************************************
+
+  // register the refresh flag from the controller.
+  // The refresh flag is in full frequency domain - so a pulsed version must
+  // be generated for half freq domain using 2 consecutive full clk cycles
+  // The registered version is used for the 200us counter
+  always @(posedge clk0)
+    ctrl_ref_flag_r <= ctrl_ref_flag;
+  always @(posedge clkdiv0)
+    cke_200us_cnt_en_r <= ctrl_ref_flag || ctrl_ref_flag_r;
+
+  // 200us counter for cke
+  always @(posedge clkdiv0)
+    if (rstdiv0) begin
+      // skip power-up count if only simulating
+      if (SIM_ONLY)
+        cke_200us_cnt_r <= 5'b00001;
+      else
+        cke_200us_cnt_r <= 5'd27;
+    end else if (cke_200us_cnt_en_r)
+      cke_200us_cnt_r <= cke_200us_cnt_r - 1;
+
+  always @(posedge clkdiv0)
+    if (rstdiv0)
+      done_200us_r <= 1'b0;
+    else if (!done_200us_r)
+      done_200us_r <= (cke_200us_cnt_r == 5'b00000);
+
+  // 200 clocks counter - count value : h'64 required for initialization
+  // Counts 100 divided by two clocks
+  always @(posedge clkdiv0)
+    if (rstdiv0 || (init_state_r == INIT_CNT_200))
+      cnt_200_cycle_r <= 8'h64;
+    else if  (init_state_r == INIT_ZQCL) // ddr3
+      cnt_200_cycle_r <= 8'hC8;
+    else if (cnt_200_cycle_r != 8'h00)
+      cnt_200_cycle_r <= cnt_200_cycle_r - 1;
+
+  always @(posedge clkdiv0)
+    if (rstdiv0 || (init_state_r == INIT_CNT_200)
+        || (init_state_r == INIT_ZQCL))
+      cnt_200_cycle_done_r <= 1'b0;
+    else if (cnt_200_cycle_r == 8'h00)
+      cnt_200_cycle_done_r <= 1'b1;
+
+  //*****************************************************************
+  //   handle deep memory configuration:
+  //   During initialization: Repeat initialization sequence once for each
+  //   chip select. Note that we could perform initalization for all chip
+  //   selects simulataneously. Probably fine - any potential SI issues with
+  //   auto refreshing all chip selects at once?
+  //   Once initialization complete, assert only CS[0] for calibration.
+  //*****************************************************************
+
+  always @(posedge clkdiv0)
+    if (rstdiv0) begin
+      chip_cnt_r <= 2'b00;
+    end else if (init_state_r == INIT_DEEP_MEMORY_ST) begin
+      if (chip_cnt_r != CS_NUM)
+        chip_cnt_r <= chip_cnt_r + 1;
+      else
+        chip_cnt_r <= 2'b00;
+    end
+
+  always @(posedge clkdiv0)
+    if (rstdiv0) begin
+      ddr_cs_n_r <= {CS_NUM{1'b1}};
+    end else begin
+      ddr_cs_n_r <= {CS_NUM{1'b1}};
+      if ((init_state_r == INIT_DUMMY_ACTIVE) ||
+          (init_state_r == INIT_PRECHARGE) ||
+          (init_state_r == INIT_LOAD_MODE) ||
+          (init_state_r == INIT_AUTO_REFRESH) ||
+          (init_state_r  == INIT_ZQCL    ) ||
+          (((init_state_r == INIT_CAL1_READ) ||
+            (init_state_r == INIT_CAL2_READ) ||
+            (init_state_r == INIT_CAL3_READ) ||
+            (init_state_r == INIT_CAL4_READ) ||
+            (init_state_r == INIT_CAL1_WRITE) ||
+            (init_state_r == INIT_CAL2_WRITE) ||
+            (init_state_r == INIT_CAL3_WRITE)) && (burst_cnt_r == 2'b00)))
+        ddr_cs_n_r[chip_cnt_r] <= 1'b0;
+      else
+        ddr_cs_n_r[chip_cnt_r] <= 1'b1;
+    end
+
+  //***************************************************************************
+  // Write/read burst logic
+  //***************************************************************************
+
+  assign cal_write = ((init_state_r == INIT_CAL1_WRITE) ||
+                      (init_state_r == INIT_CAL2_WRITE) ||
+                      (init_state_r == INIT_CAL3_WRITE));
+  assign cal_read = ((init_state_r == INIT_CAL1_READ) ||
+                     (init_state_r == INIT_CAL2_READ) ||
+                     (init_state_r == INIT_CAL3_READ) ||
+                     (init_state_r == INIT_CAL4_READ));
+  assign cal_write_read = ((init_state_r == INIT_CAL1_READ) ||
+                           (init_state_r == INIT_CAL2_READ) ||
+                           (init_state_r == INIT_CAL3_READ) ||
+                           (init_state_r == INIT_CAL4_READ) ||
+                           (init_state_r == INIT_CAL1_WRITE) ||
+                           (init_state_r == INIT_CAL2_WRITE) ||
+                           (init_state_r == INIT_CAL3_WRITE));
+
+  assign burst_val = (BURST_LEN == 4) ? 2'b00 :
+                     (BURST_LEN == 8) ? 2'b01 : 2'b00;
+
+  // keep track of current address - need this if burst length < 8 for
+  // stage 2-4 calibration writes and reads. Make sure value always gets
+  // initialized to 0 before we enter write/read state. This is used to
+  // keep track of when another burst must be issued
+  always @(posedge clkdiv0)
+    if (cal_write_read)
+      burst_addr_r <= burst_addr_r + 2;
+    else
+      burst_addr_r <= 2'b00;
+
+  // write/read burst count
+  always @(posedge clkdiv0)
+    if (cal_write_read)
+      if (burst_cnt_r == 2'b00)
+        burst_cnt_r <= burst_val;
+      else // SHOULD THIS BE -2 CHECK THIS LOGIC
+        burst_cnt_r <= burst_cnt_r - 1;
+    else
+      burst_cnt_r <= 2'b00;
+
+  // indicate when a write is occurring
+  always @(posedge clkdiv0)
+    // MIG 2.1: Remove (burst_addr_r<4) term - not used
+    // phy_init_wren <= cal_write && (burst_addr_r < 3'd4);
+    phy_init_wren <= cal_write;
+
+  // used for read enable calibration, pulse to indicate when read issued
+  always @(posedge clkdiv0)
+    // MIG 2.1: Remove (burst_addr_r<4) term - not used
+    // phy_init_rden <= cal_read && (burst_addr_r < 3'd4);
+    phy_init_rden <= cal_read;
+
+  //***************************************************************************
+  // Initialization state machine
+  //***************************************************************************
+
+  always @(posedge clkdiv0)
+    // every time we need to initialize another rank of memory, need to
+    // reset init count, and repeat the entire initialization (but not
+    // calibration) sequence
+    if (rstdiv0 || (init_state_r == INIT_DEEP_MEMORY_ST))
+      init_cnt_r <= INIT_CNTR_INIT;
+    else if ((DDR_TYPE == DDR1) && (init_state_r == INIT_PRECHARGE) &&
+             (init_cnt_r == INIT_CNTR_PRECH_1))
+      // skip EMR(2) and EMR(3) register loads
+      init_cnt_r <= INIT_CNTR_EMR_EN_DLL;
+    else if ((DDR_TYPE == DDR1) && (init_state_r == INIT_LOAD_MODE) &&
+             (init_cnt_r == INIT_CNTR_MR_ACT_DLL))
+      // skip OCD calibration for DDR1
+      init_cnt_r <= INIT_CNTR_DEEP_MEM;
+    else if ((DDR_TYPE == DDR3) && (init_state_r ==  INIT_ZQCL))
+      // skip states for DDR3
+      init_cnt_r <= INIT_CNTR_DEEP_MEM;
+    else if ((init_state_r == INIT_LOAD_MODE) ||
+             ((init_state_r == INIT_PRECHARGE)
+              && (init_state_r1 != INIT_CALIB_REF))||
+             ((init_state_r == INIT_AUTO_REFRESH)
+              && (~init_done_r))||
+             (init_state_r == INIT_CNT_200))
+      init_cnt_r <= init_cnt_r + 1;
+
+  always @(posedge clkdiv0) begin
+    if ((init_state_r == INIT_IDLE) && (init_cnt_r == INIT_CNTR_DONE)) begin
+      phy_init_done_r <= 1'b1;
+    end else
+      phy_init_done_r <= 1'b0;
+  end
+
+  // phy_init_done to the controller and the user interface.
+  // It is delayed by four clocks to account for the
+  // multi cycle path constraint to the (phy_init_data_sel)
+  // to the phy layer.
+  always @(posedge clkdiv0)begin
+       if (rstdiv0) begin
+               phy_init_done_r1 <= 1'b0;
+           phy_init_done_r2 <= 1'b0;
+           phy_init_done_r3 <= 1'b0;
+           phy_init_done <= 1'b0;
+       end else begin
+           phy_init_done_r1 <= phy_init_done_r;
+           phy_init_done_r2 <= phy_init_done_r1;
+           phy_init_done_r3 <= phy_init_done_r2;
+           phy_init_done <= phy_init_done_r3;
+       end
+  end
+
+  // Instantiate primitive to allow this flop to be attached to multicycle
+  // path constraint in UCF. This signal goes to PHY_WRITE and PHY_CTL_IO
+  // datapath logic only. Because it is a multi-cycle path, it can be
+  // clocked by either CLKDIV0 or CLK0.
+  FDRSE u_ff_phy_init_data_sel
+    (
+     .Q   (phy_init_data_sel),
+     .C   (clkdiv0),
+     .CE  (1'b1),
+     .D   (phy_init_done_r1),
+     .R   (1'b0),
+     .S   (1'b0)
+     ) /* synthesis syn_preserve=1 */
+       /* synthesis syn_replicate = 0 */;
+
+  //synthesis translate_off
+  always @(posedge calib_done[0])
+      $display ("First Stage Calibration completed at time %t", $time);
+
+  always @(posedge calib_done[1])
+    $display ("Second Stage Calibration completed at time %t", $time);
+
+  always @(posedge calib_done[2]) begin
+    $display ("Third Stage Calibration completed at time %t", $time);
+  end
+
+  always @(posedge calib_done[3]) begin
+    $display ("Fourth Stage Calibration completed at time %t", $time);
+    $display ("Calibration completed at time %t", $time);
+  end
+  //synthesis translate_on
+
+  always @(posedge clkdiv0) begin
+    if ((init_cnt_r >= INIT_CNTR_DEEP_MEM))begin
+       init_done_r <= 1'b1;
+    end else
+       init_done_r <= 1'b0;
+  end
+
+  //*****************************************************************
+
+  always @(posedge clkdiv0)
+    if (rstdiv0) begin
+      init_state_r  <= INIT_IDLE;
+      init_state_r1 <= INIT_IDLE;
+      init_state_r2 <= INIT_IDLE;
+      calib_done_r  <= 4'b0000;
+    end else begin
+      init_state_r  <= init_next_state;
+      init_state_r1 <= init_state_r;
+      init_state_r2 <= init_state_r1;
+      calib_done_r  <= calib_done; // register for timing
+    end
+
+  always @(*) begin
+    init_next_state = init_state_r;
+    (* full_case, parallel_case *) case (init_state_r)
+      INIT_IDLE: begin
+        if (done_200us_r) begin
+          (* parallel_case *) case (init_cnt_r)
+            INIT_CNTR_INIT:
+              init_next_state = INIT_CNT_200;
+            INIT_CNTR_PRECH_1:
+              init_next_state = INIT_PRECHARGE;
+            INIT_CNTR_EMR2_INIT:
+              init_next_state = INIT_LOAD_MODE; // EMR(2)
+            INIT_CNTR_EMR3_INIT:
+              init_next_state = INIT_LOAD_MODE; // EMR(3);
+            INIT_CNTR_EMR_EN_DLL:
+              init_next_state = INIT_LOAD_MODE; // EMR, enable DLL
+            INIT_CNTR_MR_RST_DLL:
+              init_next_state = INIT_LOAD_MODE; // MR, reset DLL
+            INIT_CNTR_CNT_200_WAIT:begin
+              if(DDR_TYPE == DDR3)
+                 init_next_state = INIT_ZQCL; // DDR3
+              else
+                // Wait 200cc after reset DLL
+                init_next_state = INIT_CNT_200;
+            end
+            INIT_CNTR_PRECH_2:
+              init_next_state = INIT_PRECHARGE;
+            INIT_CNTR_AR_1:
+              init_next_state = INIT_AUTO_REFRESH;
+            INIT_CNTR_AR_2:
+              init_next_state = INIT_AUTO_REFRESH;
+            INIT_CNTR_MR_ACT_DLL:
+              init_next_state = INIT_LOAD_MODE; // MR, unreset DLL
+            INIT_CNTR_EMR_DEF_OCD:
+              init_next_state = INIT_LOAD_MODE; // EMR, OCD default
+            INIT_CNTR_EMR_EXIT_OCD:
+              init_next_state = INIT_LOAD_MODE; // EMR, enable OCD exit
+            INIT_CNTR_DEEP_MEM: begin
+               if ((chip_cnt_r < CS_NUM-1))
+                  init_next_state = INIT_DEEP_MEMORY_ST;
+              else if (cnt_200_cycle_done_r)
+                init_next_state = INIT_DUMMY_ACTIVE;
+              else
+                init_next_state = INIT_IDLE;
+            end
+            INIT_CNTR_PRECH_3:
+              init_next_state = INIT_PRECHARGE;
+            INIT_CNTR_DONE:
+              init_next_state = INIT_IDLE;
+            default :
+              init_next_state = INIT_IDLE;
+          endcase
+        end
+      end
+      INIT_CNT_200:
+        init_next_state = INIT_CNT_200_WAIT;
+      INIT_CNT_200_WAIT:
+        if (cnt_200_cycle_done_r)
+          init_next_state = INIT_IDLE;
+      INIT_PRECHARGE:
+        init_next_state = INIT_PRECHARGE_WAIT;
+      INIT_PRECHARGE_WAIT:
+        if (cnt_cmd_ok_r)begin
+          if (init_done_r && (!(&calib_done_r)))
+            init_next_state = INIT_AUTO_REFRESH;
+          else
+            init_next_state = INIT_IDLE;
+        end
+      INIT_ZQCL:
+        init_next_state = INIT_WAIT_DLLK_ZQINIT;
+      INIT_WAIT_DLLK_ZQINIT:
+        if (cnt_200_cycle_done_r)
+          init_next_state = INIT_IDLE;
+      INIT_LOAD_MODE:
+        init_next_state = INIT_MODE_REGISTER_WAIT;
+      INIT_MODE_REGISTER_WAIT:
+        if (cnt_cmd_ok_r)
+          init_next_state = INIT_IDLE;
+      INIT_AUTO_REFRESH:
+        init_next_state = INIT_AUTO_REFRESH_WAIT;
+      INIT_AUTO_REFRESH_WAIT:
+        if (cnt_cmd_ok_r)begin
+          if(init_done_r)
+            init_next_state = INIT_DUMMY_ACTIVE;
+          else
+            init_next_state = INIT_IDLE;
+        end
+      INIT_DEEP_MEMORY_ST:
+        init_next_state = INIT_IDLE;
+      // single row activate. All subsequent calibration writes and
+      // read will take place in this row
+      INIT_DUMMY_ACTIVE:
+        init_next_state = INIT_DUMMY_ACTIVE_WAIT;
+      INIT_DUMMY_ACTIVE_WAIT:
+        if (cnt_cmd_ok_r)begin
+          if (~calib_done_r[0]) begin
+            // if returning to stg1 after refresh, don't need to write
+            if (cal1_started_r)
+              init_next_state = INIT_CAL1_READ;
+            // if first entering stg1, need to write training pattern
+            else
+              init_next_state = INIT_CAL1_WRITE;
+          end else if (~calib_done[1]) begin
+            if (cal2_started_r)
+              init_next_state = INIT_CAL2_READ;
+            else
+              init_next_state = INIT_CAL2_WRITE;
+          end else if (~calib_done_r[2])
+             init_next_state = INIT_CAL3_WRITE;
+          else
+            init_next_state = INIT_CAL4_READ;
+        end
+      // Stage 1 calibration (write and continuous read)
+      INIT_CAL1_WRITE:
+        if (burst_addr_r == 2'b10)
+          init_next_state = INIT_CAL1_WRITE_READ;
+      INIT_CAL1_WRITE_READ:
+        if (cnt_cmd_ok_r)
+          init_next_state = INIT_CAL1_READ;
+      INIT_CAL1_READ:
+        // Stage 1 requires inter-stage auto-refresh
+        if (calib_done_r[0] || refresh_req)
+          init_next_state = INIT_CAL1_READ_WAIT;
+      INIT_CAL1_READ_WAIT:
+        if (cnt_cmd_ok_r)
+          init_next_state = INIT_CALIB_REF;
+      // Stage 2 calibration (write and continuous read)
+      INIT_CAL2_WRITE:
+        if (burst_addr_r == 2'b10)
+          init_next_state = INIT_CAL2_WRITE_READ;
+      INIT_CAL2_WRITE_READ:
+        if (cnt_cmd_ok_r)
+          init_next_state = INIT_CAL2_READ;
+      INIT_CAL2_READ:
+        // Stage 2 requires inter-stage auto-refresh
+        if (calib_done_r[1] || refresh_req)
+          init_next_state = INIT_CAL2_READ_WAIT;
+      INIT_CAL2_READ_WAIT:
+        if(cnt_cmd_ok_r)
+          init_next_state = INIT_CALIB_REF;
+      // Stage 3 calibration (write and continuous read)
+      INIT_CAL3_WRITE:
+        if (burst_addr_r == 2'b10)
+          init_next_state = INIT_CAL3_WRITE_READ;
+      INIT_CAL3_WRITE_READ:
+        if (cnt_cmd_ok_r)
+          init_next_state = INIT_CAL3_READ;
+      INIT_CAL3_READ:
+        if (burst_addr_r == 2'b10)
+          init_next_state = INIT_CAL3_READ_WAIT;
+      INIT_CAL3_READ_WAIT: begin
+        if (cnt_rd_ok_r)
+          if (calib_done_r[2]) begin
+            init_next_state = INIT_CALIB_REF;
+          end else
+            init_next_state = INIT_CAL3_READ;
+      end
+      // Stage 4 calibration (continuous read only, same pattern as stage 3)
+      // only used if DQS_GATE supported
+      INIT_CAL4_READ:
+        if (burst_addr_r == 2'b10)
+          init_next_state = INIT_CAL4_READ_WAIT;
+      INIT_CAL4_READ_WAIT: begin
+        if (cnt_rd_ok_r)
+          // Stage 4 requires inter-stage auto-refresh
+          if (calib_done_r[3] || refresh_req)
+            init_next_state = INIT_PRECHARGE;
+          else
+            init_next_state = INIT_CAL4_READ;
+      end
+      INIT_CALIB_REF:
+        init_next_state = INIT_PRECHARGE;
+    endcase
+  end
+
+  //***************************************************************************
+  // Memory control/address
+  //***************************************************************************
+
+  always @(posedge clkdiv0)
+    if ((init_state_r == INIT_DUMMY_ACTIVE) ||
+        (init_state_r == INIT_PRECHARGE) ||
+        (init_state_r == INIT_LOAD_MODE) ||
+        (init_state_r == INIT_AUTO_REFRESH)) begin
+      ddr_ras_n_r <= 1'b0;
+    end else begin
+      ddr_ras_n_r <= 1'b1;
+    end
+
+  always @(posedge clkdiv0)
+    if ((init_state_r == INIT_LOAD_MODE) ||
+        (init_state_r == INIT_AUTO_REFRESH) ||
+        (cal_write_read && (burst_cnt_r == 2'b00))) begin
+      ddr_cas_n_r <= 1'b0;
+    end else begin
+      ddr_cas_n_r <= 1'b1;
+    end
+
+  always @(posedge clkdiv0)
+    if ((init_state_r == INIT_LOAD_MODE) ||
+        (init_state_r == INIT_PRECHARGE) ||
+        (init_state_r == INIT_ZQCL) ||
+        (cal_write && (burst_cnt_r == 2'b00)))begin
+      ddr_we_n_r <= 1'b0;
+    end else begin
+      ddr_we_n_r <= 1'b1;
+    end
+
+  //*****************************************************************
+  // memory address during init
+  //*****************************************************************
+
+  always @(posedge clkdiv0) begin
+    if ((init_state_r == INIT_PRECHARGE)
+        || (init_state_r == INIT_ZQCL))begin
+      // Precharge all - set A10 = 1
+      ddr_addr_r <= {ROW_WIDTH{1'b0}};
+      ddr_addr_r[10] <= 1'b1;
+      ddr_ba_r <= {BANK_WIDTH{1'b0}};
+    end else if (init_state_r == INIT_LOAD_MODE) begin
+      ddr_ba_r <= {BANK_WIDTH{1'b0}};
+      ddr_addr_r <= {ROW_WIDTH{1'b0}};
+      case (init_cnt_r)
+        // EMR (2)
+        INIT_CNTR_EMR2_INIT: begin
+          ddr_ba_r[1:0] <= 2'b10;
+          ddr_addr_r    <= {ROW_WIDTH{1'b0}};
+        end
+        // EMR (3)
+        INIT_CNTR_EMR3_INIT: begin
+          ddr_ba_r[1:0] <= 2'b11;
+          if(DDR_TYPE == DDR3)
+            ddr_addr_r    <= load_mode_reg3[ROW_WIDTH-1:0];
+          else
+            ddr_addr_r    <= {ROW_WIDTH{1'b0}};
+        end
+        // EMR write - A0 = 0 for DLL enable
+        INIT_CNTR_EMR_EN_DLL: begin
+          ddr_ba_r[1:0] <= 2'b01;
+          if(DDR_TYPE == DDR3)
+            ddr_addr_r <= load_mode_reg1[ROW_WIDTH-1:0];
+          else
+            ddr_addr_r <= ext_mode_reg[ROW_WIDTH-1:0];
+        end
+        // MR write, reset DLL (A8=1)
+        INIT_CNTR_MR_RST_DLL: begin
+          if(DDR_TYPE == DDR3)
+            ddr_addr_r <= load_mode_reg0[ROW_WIDTH-1:0];
+          else
+            ddr_addr_r <= load_mode_reg[ROW_WIDTH-1:0];
+          ddr_ba_r[1:0] <= 2'b00;
+          ddr_addr_r[8] <= 1'b1;
+        end
+        // MR write, unreset DLL (A8=0)
+        INIT_CNTR_MR_ACT_DLL: begin
+          ddr_ba_r[1:0] <= 2'b00;
+          ddr_addr_r <= load_mode_reg[ROW_WIDTH-1:0];
+        end
+        // EMR write, OCD default state
+        INIT_CNTR_EMR_DEF_OCD: begin
+          ddr_ba_r[1:0] <= 2'b01;
+          ddr_addr_r <= ext_mode_reg[ROW_WIDTH-1:0];
+          ddr_addr_r[9:7] <= 3'b111;
+        end
+        // EMR write - OCD exit
+        INIT_CNTR_EMR_EXIT_OCD: begin
+          ddr_ba_r[1:0] <= 2'b01;
+          ddr_addr_r <= ext_mode_reg[ROW_WIDTH-1:0];
+        end
+        default: begin
+          ddr_ba_r <= {BANK_WIDTH{1'bx}};
+          ddr_addr_r <= {ROW_WIDTH{1'bx}};
+        end
+      endcase
+    end else if (cal_write_read) begin
+      // when writing or reading for Stages 2-4, since training pattern is
+      // either 4 (stage 2) or 8 (stage 3-4) long, if BURST LEN < 8, then
+      // need to issue multiple bursts to read entire training pattern
+      ddr_addr_r[ROW_WIDTH-1:3] <= {ROW_WIDTH-4{1'b0}};
+      ddr_addr_r[2:0]           <= {burst_addr_r, 1'b0};
+      ddr_ba_r                  <= {BANK_WIDTH-1{1'b0}};
+    end else if (init_state_r == INIT_DUMMY_ACTIVE) begin
+      // all calibration writing read takes place in row 0x0 only
+      ddr_ba_r   <= {BANK_WIDTH{1'b0}};
+      ddr_addr_r <= {ROW_WIDTH{1'b0}};
+    end else begin
+      // otherwise, cry me a river
+      ddr_ba_r   <= {BANK_WIDTH{1'bx}};
+      ddr_addr_r <= {ROW_WIDTH{1'bx}};
+    end
+  end
+
+  // Keep CKE asserted after initial power-on delay
+  always @(posedge clkdiv0)
+    ddr_cke_r <= {CKE_WIDTH{done_200us_r}};
+
+  // register commands to memory. Two clock cycle delay from state -> output
+  always @(posedge clk0) begin
+    ddr_addr_r1   <= ddr_addr_r;
+    ddr_ba_r1     <= ddr_ba_r;
+    ddr_cas_n_r1  <= ddr_cas_n_r;
+    ddr_ras_n_r1  <= ddr_ras_n_r;
+    ddr_we_n_r1   <= ddr_we_n_r;
+    ddr_cs_n_r1   <= ddr_cs_n_r;
+  end // always @ (posedge clk0)
+
+  // logic to toggle chip select. The chip_select is
+  // clocked of clkdiv0 and will be asserted for
+  // two clock cycles.
+   always @(posedge clk0) begin
+      if(rst0)
+        ddr_cs_disable_r <= {CS_NUM{1'b0}};
+      else begin
+         if(ddr_cs_disable_r[chip_cnt_r])
+            ddr_cs_disable_r[chip_cnt_r] <= 1'b0;
+         else begin
+            if(TWO_T_TIME_EN)
+               ddr_cs_disable_r[chip_cnt_r] <= ~ddr_cs_n_r1[chip_cnt_r];
+            else
+               ddr_cs_disable_r[chip_cnt_r] <= ~ddr_cs_n_r[chip_cnt_r];
+         end
+       end
+   end
+
+
+  assign phy_init_addr      = ddr_addr_r;
+  assign phy_init_ba        = ddr_ba_r;
+  assign phy_init_cas_n     = ddr_cas_n_r;
+  assign phy_init_cke       = ddr_cke_r;
+  assign phy_init_ras_n     = ddr_ras_n_r;
+  assign phy_init_we_n      = ddr_we_n_r;
+  assign phy_init_cs_n      = (TWO_T_TIME_EN) ?
+                              ddr_cs_n_r1 | ddr_cs_disable_r
+                              : ddr_cs_n_r| ddr_cs_disable_r;
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_io.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_io.v
new file mode 100644 (file)
index 0000000..bead645
--- /dev/null
@@ -0,0 +1,354 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+//
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a
+// license to use this text/file solely for design, simulation,
+// implementation and creation of design files limited
+// to Xilinx devices or technologies. Use with non-Xilinx
+// devices or technologies is expressly prohibited and
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information
+// "as-is" solely for use in developing programs and
+// solutions for Xilinx devices, with no obligation on the
+// part of Xilinx to provide support. By providing this design,
+// code, or information as one possible implementation of
+// this feature, application or standard, Xilinx is making no
+// representation that this implementation is free from any
+// claims of infringement. You are responsible for
+// obtaining any rights you may require for your implementation.
+// Xilinx expressly disclaims any warranty whatsoever with
+// respect to the adequacy of the implementation, including
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied
+// warranties of merchantability or fitness for a particular
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part
+// of this text at all times.
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_phy_io.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/29 15:24:03 $
+// \   \  /  \    Date Created: Wed Aug 16 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   This module instantiates calibration logic, data, data strobe and the
+//   data mask iobs.
+//Reference:
+//Revision History:
+//   Rev 1.1 - DM_IOB instance made based on USE_DM_PORT value . PK. 25/6/08
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_phy_io #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference
+   // board design). Actual values may be different. Actual parameters values
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter CLK_WIDTH             = 1,
+   parameter USE_DM_PORT           = 1,
+   parameter DM_WIDTH              = 9,
+   parameter DQ_WIDTH              = 72,
+   parameter DQ_BITS               = 7,
+   parameter DQ_PER_DQS            = 8,
+   parameter DQS_BITS              = 4,
+   parameter DQS_WIDTH             = 9,
+   parameter HIGH_PERFORMANCE_MODE = "TRUE",
+   parameter ODT_WIDTH             = 1,
+   parameter ADDITIVE_LAT          = 0,
+   parameter CAS_LAT               = 5,
+   parameter REG_ENABLE            = 1,
+   parameter CLK_PERIOD            = 3000,
+   parameter DDR_TYPE              = 1,
+   parameter SIM_ONLY              = 0,
+   parameter DEBUG_EN              = 0,
+   parameter DQS_IO_COL            = 0,
+   parameter DQ_IO_MS              = 0
+   )
+  (
+   input                                clk0,
+   input                                clk90,
+   input                                clkdiv0,
+   input                                rst0,
+   input                                rst90,
+   input                                rstdiv0,
+   input                                dm_ce,
+   input [1:0]                          dq_oe_n,
+   input                                dqs_oe_n,
+   input                                dqs_rst_n,
+   input [3:0]                          calib_start,
+   input                                ctrl_rden,
+   input                                phy_init_rden,
+   input                                calib_ref_done,
+   output [3:0]                         calib_done,
+   output                               calib_ref_req,
+   output [DQS_WIDTH-1:0]               calib_rden,
+   output [DQS_WIDTH-1:0]               calib_rden_sel,
+   input [DQ_WIDTH-1:0]                 wr_data_rise,
+   input [DQ_WIDTH-1:0]                 wr_data_fall,
+   input [(DQ_WIDTH/8)-1:0]             mask_data_rise,
+   input [(DQ_WIDTH/8)-1:0]             mask_data_fall,
+   output [(DQ_WIDTH)-1:0]              rd_data_rise,
+   output [(DQ_WIDTH)-1:0]              rd_data_fall,
+   output [CLK_WIDTH-1:0]               ddr_ck,
+   output [CLK_WIDTH-1:0]               ddr_ck_n,
+   output [DM_WIDTH-1:0]                ddr_dm,
+   inout [DQS_WIDTH-1:0]                ddr_dqs,
+   inout [DQS_WIDTH-1:0]                ddr_dqs_n,
+   inout [DQ_WIDTH-1:0]                 ddr_dq,
+   // Debug signals (optional use)
+   input                                dbg_idel_up_all,
+   input                                dbg_idel_down_all,
+   input                                dbg_idel_up_dq,
+   input                                dbg_idel_down_dq,
+   input                                dbg_idel_up_dqs,
+   input                                dbg_idel_down_dqs,
+   input                                dbg_idel_up_gate,
+   input                                dbg_idel_down_gate,
+   input [DQ_BITS-1:0]                  dbg_sel_idel_dq,
+   input                                dbg_sel_all_idel_dq,
+   input [DQS_BITS:0]                   dbg_sel_idel_dqs,
+   input                                dbg_sel_all_idel_dqs,
+   input [DQS_BITS:0]                   dbg_sel_idel_gate,
+   input                                dbg_sel_all_idel_gate,
+   output [3:0]                         dbg_calib_done,
+   output [3:0]                         dbg_calib_err,
+   output [(6*DQ_WIDTH)-1:0]            dbg_calib_dq_tap_cnt,
+   output [(6*DQS_WIDTH)-1:0]           dbg_calib_dqs_tap_cnt,
+   output [(6*DQS_WIDTH)-1:0]           dbg_calib_gate_tap_cnt,
+   output [DQS_WIDTH-1:0]               dbg_calib_rd_data_sel,
+   output [(5*DQS_WIDTH)-1:0]           dbg_calib_rden_dly,
+   output [(5*DQS_WIDTH)-1:0]           dbg_calib_gate_dly
+   );
+
+  // ratio of # of physical DM outputs to bytes in data bus
+  // may be different - e.g. if using x4 components
+  localparam DM_TO_BYTE_RATIO = DM_WIDTH / (DQ_WIDTH/8);
+
+  wire [CLK_WIDTH-1:0]                     ddr_ck_q;
+  wire [DQS_WIDTH-1:0]                     delayed_dqs;
+  wire [DQ_WIDTH-1:0]                      dlyce_dq;
+  wire [DQS_WIDTH-1:0]                     dlyce_dqs;
+  wire [DQS_WIDTH-1:0]                     dlyce_gate;
+  wire [DQ_WIDTH-1:0]                      dlyinc_dq;
+  wire [DQS_WIDTH-1:0]                     dlyinc_dqs;
+  wire [DQS_WIDTH-1:0]                     dlyinc_gate;
+  wire                                     dlyrst_dq;
+  wire                                     dlyrst_dqs;
+  wire [DQS_WIDTH-1:0]                     dlyrst_gate;
+  wire [DQS_WIDTH-1:0]                     dq_ce;
+  (* KEEP = "TRUE" *) wire [DQS_WIDTH-1:0] en_dqs /* synthesis syn_keep = 1 */;
+  wire [DQS_WIDTH-1:0]                     rd_data_sel;
+
+  //***************************************************************************
+
+  ddr2_phy_calib #
+    (
+     .DQ_WIDTH      (DQ_WIDTH),
+     .DQ_BITS       (DQ_BITS),
+     .DQ_PER_DQS    (DQ_PER_DQS),
+     .DQS_BITS      (DQS_BITS),
+     .DQS_WIDTH     (DQS_WIDTH),
+     .ADDITIVE_LAT  (ADDITIVE_LAT),
+     .CAS_LAT       (CAS_LAT),
+     .REG_ENABLE    (REG_ENABLE),
+     .CLK_PERIOD    (CLK_PERIOD),
+     .SIM_ONLY      (SIM_ONLY),
+     .DEBUG_EN      (DEBUG_EN)
+     )
+    u_phy_calib
+      (
+       .clk                    (clk0),
+       .clkdiv                 (clkdiv0),
+       .rstdiv                 (rstdiv0),
+       .calib_start            (calib_start),
+       .ctrl_rden              (ctrl_rden),
+       .phy_init_rden          (phy_init_rden),
+       .rd_data_rise           (rd_data_rise),
+       .rd_data_fall           (rd_data_fall),
+       .calib_ref_done         (calib_ref_done),
+       .calib_done             (calib_done),
+       .calib_ref_req          (calib_ref_req),
+       .calib_rden             (calib_rden),
+       .calib_rden_sel         (calib_rden_sel),
+       .dlyrst_dq              (dlyrst_dq),
+       .dlyce_dq               (dlyce_dq),
+       .dlyinc_dq              (dlyinc_dq),
+       .dlyrst_dqs             (dlyrst_dqs),
+       .dlyce_dqs              (dlyce_dqs),
+       .dlyinc_dqs             (dlyinc_dqs),
+       .dlyrst_gate            (dlyrst_gate),
+       .dlyce_gate             (dlyce_gate),
+       .dlyinc_gate            (dlyinc_gate),
+       .en_dqs                 (en_dqs),
+       .rd_data_sel            (rd_data_sel),
+       .dbg_idel_up_all        (dbg_idel_up_all),
+       .dbg_idel_down_all      (dbg_idel_down_all),
+       .dbg_idel_up_dq         (dbg_idel_up_dq),
+       .dbg_idel_down_dq       (dbg_idel_down_dq),
+       .dbg_idel_up_dqs        (dbg_idel_up_dqs),
+       .dbg_idel_down_dqs      (dbg_idel_down_dqs),
+       .dbg_idel_up_gate       (dbg_idel_up_gate),
+       .dbg_idel_down_gate     (dbg_idel_down_gate),
+       .dbg_sel_idel_dq        (dbg_sel_idel_dq),
+       .dbg_sel_all_idel_dq    (dbg_sel_all_idel_dq),
+       .dbg_sel_idel_dqs       (dbg_sel_idel_dqs),
+       .dbg_sel_all_idel_dqs   (dbg_sel_all_idel_dqs),
+       .dbg_sel_idel_gate      (dbg_sel_idel_gate),
+       .dbg_sel_all_idel_gate  (dbg_sel_all_idel_gate),
+       .dbg_calib_done         (dbg_calib_done),
+       .dbg_calib_err          (dbg_calib_err),
+       .dbg_calib_dq_tap_cnt   (dbg_calib_dq_tap_cnt),
+       .dbg_calib_dqs_tap_cnt  (dbg_calib_dqs_tap_cnt),
+       .dbg_calib_gate_tap_cnt (dbg_calib_gate_tap_cnt),
+       .dbg_calib_rd_data_sel  (dbg_calib_rd_data_sel),
+       .dbg_calib_rden_dly     (dbg_calib_rden_dly),
+       .dbg_calib_gate_dly     (dbg_calib_gate_dly)
+       );
+
+  //***************************************************************************
+  // Memory clock generation
+  //***************************************************************************
+
+  genvar ck_i;
+  generate
+    for(ck_i = 0; ck_i < CLK_WIDTH; ck_i = ck_i+1) begin: gen_ck
+      ODDR #
+        (
+         .SRTYPE       ("SYNC"),
+         .DDR_CLK_EDGE ("OPPOSITE_EDGE")
+         )
+        u_oddr_ck_i
+          (
+           .Q   (ddr_ck_q[ck_i]),
+           .C   (clk0),
+           .CE  (1'b1),
+           .D1  (1'b0),
+           .D2  (1'b1),
+           .R   (1'b0),
+           .S   (1'b0)
+           );
+      // Can insert ODELAY here if required
+      OBUFDS u_obuf_ck_i
+        (
+         .I   (ddr_ck_q[ck_i]),
+         .O   (ddr_ck[ck_i]),
+         .OB  (ddr_ck_n[ck_i])
+         );
+    end
+  endgenerate
+
+  //***************************************************************************
+  // DQS instances
+  //***************************************************************************
+
+  genvar dqs_i;
+  generate
+    for(dqs_i = 0; dqs_i < DQS_WIDTH; dqs_i = dqs_i+1) begin: gen_dqs
+      ddr2_phy_dqs_iob #
+        (
+         .DDR_TYPE              (DDR_TYPE),
+         .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE)
+         )
+        u_iob_dqs
+          (
+           .clk0           (clk0),
+           .clkdiv0        (clkdiv0),
+           .rst0           (rst0),
+           .dlyinc_dqs     (dlyinc_dqs[dqs_i]),
+           .dlyce_dqs      (dlyce_dqs[dqs_i]),
+           .dlyrst_dqs     (dlyrst_dqs),
+           .dlyinc_gate    (dlyinc_gate[dqs_i]),
+           .dlyce_gate     (dlyce_gate[dqs_i]),
+           .dlyrst_gate    (dlyrst_gate[dqs_i]),
+           .dqs_oe_n       (dqs_oe_n),
+           .dqs_rst_n      (dqs_rst_n),
+           .en_dqs         (en_dqs[dqs_i]),
+           .ddr_dqs        (ddr_dqs[dqs_i]),
+           .ddr_dqs_n      (ddr_dqs_n[dqs_i]),
+           .dq_ce          (dq_ce[dqs_i]),
+           .delayed_dqs    (delayed_dqs[dqs_i])
+           );
+    end
+  endgenerate
+
+  //***************************************************************************
+  // DM instances
+  //***************************************************************************
+
+  genvar dm_i;
+  generate
+    if (USE_DM_PORT) begin: gen_dm_inst
+      for(dm_i = 0; dm_i < DM_WIDTH; dm_i = dm_i+1) begin: gen_dm
+        ddr2_phy_dm_iob u_iob_dm
+          (
+           .clk90           (clk90),
+           .dm_ce           (dm_ce),
+           .mask_data_rise  (mask_data_rise[dm_i/DM_TO_BYTE_RATIO]),
+           .mask_data_fall  (mask_data_fall[dm_i/DM_TO_BYTE_RATIO]),
+           .ddr_dm          (ddr_dm[dm_i])
+           );
+      end
+    end
+  endgenerate
+
+  //***************************************************************************
+  // DQ IOB instances
+  //***************************************************************************
+
+  genvar dq_i;
+  generate
+    for(dq_i = 0; dq_i < DQ_WIDTH; dq_i = dq_i+1) begin: gen_dq
+      ddr2_phy_dq_iob #
+        (
+         .DQ_COL (DQS_IO_COL[2*(dq_i/DQ_PER_DQS)+1:2*(dq_i/DQ_PER_DQS)]),
+         .DQ_MS  (DQ_IO_MS[dq_i]),
+         .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE)
+         )
+        u_iob_dq
+        (
+         .clk0         (clk0),
+         .clk90        (clk90),
+         .clkdiv0      (clkdiv0),
+         .rst90        (rst90),
+         .dlyinc       (dlyinc_dq[dq_i]),
+         .dlyce        (dlyce_dq[dq_i]),
+         .dlyrst       (dlyrst_dq),
+         .dq_oe_n      (dq_oe_n),
+         .dqs          (delayed_dqs[dq_i/DQ_PER_DQS]),
+         .ce           (dq_ce[dq_i/DQ_PER_DQS]),
+         .rd_data_sel  (rd_data_sel[dq_i/DQ_PER_DQS]),
+         .wr_data_rise (wr_data_rise[dq_i]),
+         .wr_data_fall (wr_data_fall[dq_i]),
+         .rd_data_rise (rd_data_rise[dq_i]),
+         .rd_data_fall (rd_data_fall[dq_i]),
+         .ddr_dq       (ddr_dq[dq_i])
+         );
+    end
+  endgenerate
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_top.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_top.v
new file mode 100644 (file)
index 0000000..300a7ba
--- /dev/null
@@ -0,0 +1,393 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+//
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a
+// license to use this text/file solely for design, simulation,
+// implementation and creation of design files limited
+// to Xilinx devices or technologies. Use with non-Xilinx
+// devices or technologies is expressly prohibited and
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information
+// "as-is" solely for use in developing programs and
+// solutions for Xilinx devices, with no obligation on the
+// part of Xilinx to provide support. By providing this design,
+// code, or information as one possible implementation of
+// this feature, application or standard, Xilinx is making no
+// representation that this implementation is free from any
+// claims of infringement. You are responsible for
+// obtaining any rights you may require for your implementation.
+// Xilinx expressly disclaims any warranty whatsoever with
+// respect to the adequacy of the implementation, including
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied
+// warranties of merchantability or fitness for a particular
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part
+// of this text at all times.
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_phy_top.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/22 15:41:06 $
+// \   \  /  \    Date Created: Wed Aug 16 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   Top-level for memory physical layer (PHY) interface
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+(* X_CORE_INFO = "mig_v2_3_ddr2_v5, Coregen 10.1.02" , CORE_GENERATION_INFO = "ddr2_v5,mig_v2_3,{component_name=ddr2_phy_top, BANK_WIDTH=2, CKE_WIDTH=1, CLK_WIDTH=2, COL_WIDTH=10, CS_NUM=1, CS_WIDTH=1, DM_WIDTH=8, DQ_WIDTH=64, DQ_PER_DQS=8, DQS_WIDTH=8, ODT_WIDTH=1, ROW_WIDTH=13, ADDITIVE_LAT=0, BURST_LEN=4, BURST_TYPE=0, CAS_LAT=4, ECC_ENABLE=0, MULTI_BANK_EN=1, TWO_T_TIME_EN=1, ODT_TYPE=1, REDUCE_DRV=0, REG_ENABLE=0, TREFI_NS=7800, TRAS=40000, TRCD=15000, TRFC=105000, TRP=15000, TRTP=7500, TWR=15000, TWTR=7500, DDR2_CLK_PERIOD=3750, RST_ACT_LOW=1}" *)
+module ddr2_phy_top #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference
+   // board design). Actual values may be different. Actual parameters values
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter BANK_WIDTH            = 2,
+   parameter CLK_WIDTH             = 1,
+   parameter CKE_WIDTH             = 1,
+   parameter COL_WIDTH             = 10,
+   parameter CS_NUM                = 1,
+   parameter CS_WIDTH              = 1,
+   parameter USE_DM_PORT           = 1,
+   parameter DM_WIDTH              = 9,
+   parameter DQ_WIDTH              = 72,
+   parameter DQ_BITS               = 7,
+   parameter DQ_PER_DQS            = 8,
+   parameter DQS_WIDTH             = 9,
+   parameter DQS_BITS              = 4,
+   parameter HIGH_PERFORMANCE_MODE = "TRUE",
+   parameter ODT_WIDTH             = 1,
+   parameter ROW_WIDTH             = 14,
+   parameter ADDITIVE_LAT          = 0,
+   parameter TWO_T_TIME_EN         = 0,
+   parameter BURST_LEN             = 4,
+   parameter BURST_TYPE            = 0,
+   parameter CAS_LAT               = 5,
+   parameter TWR                   = 15000,
+   parameter ECC_ENABLE            = 0,
+   parameter ODT_TYPE              = 1,
+   parameter DDR_TYPE              = 1,
+   parameter REDUCE_DRV            = 0,
+   parameter REG_ENABLE            = 1,
+   parameter CLK_PERIOD            = 3000,
+   parameter SIM_ONLY              = 0,
+   parameter DEBUG_EN              = 0,
+   parameter DQS_IO_COL            = 0,
+   parameter DQ_IO_MS              = 0
+   )
+  (
+   input                                  clk0,
+   input                                  clk90,
+   input                                  clkdiv0,
+   input                                  rst0,
+   input                                  rst90,
+   input                                  rstdiv0,
+   input                                  ctrl_wren,
+   input [ROW_WIDTH-1:0]                  ctrl_addr,
+   input [BANK_WIDTH-1:0]                 ctrl_ba,
+   input                                  ctrl_ras_n,
+   input                                  ctrl_cas_n,
+   input                                  ctrl_we_n,
+   input [CS_NUM-1:0]                     ctrl_cs_n,
+   input                                  ctrl_rden,
+   input                                  ctrl_ref_flag,
+   input [(2*DQ_WIDTH)-1:0]               wdf_data,
+   input [(2*DQ_WIDTH/8)-1:0]             wdf_mask_data,
+   output                                 wdf_rden,
+   output                                 phy_init_done,
+   output [DQS_WIDTH-1:0]                 phy_calib_rden,
+   output [DQS_WIDTH-1:0]                 phy_calib_rden_sel,
+   output [DQ_WIDTH-1:0]                  rd_data_rise,
+   output [DQ_WIDTH-1:0]                  rd_data_fall,
+   output [CLK_WIDTH-1:0]                 ddr_ck,
+   output [CLK_WIDTH-1:0]                 ddr_ck_n,
+   output [ROW_WIDTH-1:0]                 ddr_addr,
+   output [BANK_WIDTH-1:0]                ddr_ba,
+   output                                 ddr_ras_n,
+   output                                 ddr_cas_n,
+   output                                 ddr_we_n,
+   output [CS_WIDTH-1:0]                  ddr_cs_n,
+   output [CKE_WIDTH-1:0]                 ddr_cke,
+   output [ODT_WIDTH-1:0]                 ddr_odt,
+   output [DM_WIDTH-1:0]                  ddr_dm,
+   inout [DQS_WIDTH-1:0]                  ddr_dqs,
+   inout [DQS_WIDTH-1:0]                  ddr_dqs_n,
+   inout [DQ_WIDTH-1:0]                   ddr_dq,
+   // Debug signals (optional use)
+   input                                  dbg_idel_up_all,
+   input                                  dbg_idel_down_all,
+   input                                  dbg_idel_up_dq,
+   input                                  dbg_idel_down_dq,
+   input                                  dbg_idel_up_dqs,
+   input                                  dbg_idel_down_dqs,
+   input                                  dbg_idel_up_gate,
+   input                                  dbg_idel_down_gate,
+   input [DQ_BITS-1:0]                    dbg_sel_idel_dq,
+   input                                  dbg_sel_all_idel_dq,
+   input [DQS_BITS:0]                     dbg_sel_idel_dqs,
+   input                                  dbg_sel_all_idel_dqs,
+   input [DQS_BITS:0]                     dbg_sel_idel_gate,
+   input                                  dbg_sel_all_idel_gate,
+   output [3:0]                           dbg_calib_done,
+   output [3:0]                           dbg_calib_err,
+   output [(6*DQ_WIDTH)-1:0]              dbg_calib_dq_tap_cnt,
+   output [(6*DQS_WIDTH)-1:0]             dbg_calib_dqs_tap_cnt,
+   output [(6*DQS_WIDTH)-1:0]             dbg_calib_gate_tap_cnt,
+   output [DQS_WIDTH-1:0]                 dbg_calib_rd_data_sel,
+   output [(5*DQS_WIDTH)-1:0]             dbg_calib_rden_dly,
+   output [(5*DQS_WIDTH)-1:0]             dbg_calib_gate_dly
+   );
+
+  wire [3:0]               calib_done;
+  wire                     calib_ref_done;
+  wire                     calib_ref_req;
+  wire [3:0]               calib_start;
+  wire                     dm_ce;
+  wire [1:0]               dq_oe_n;
+  wire                     dqs_oe_n;
+  wire                     dqs_rst_n;
+  wire [(DQ_WIDTH/8)-1:0]  mask_data_fall;
+  wire [(DQ_WIDTH/8)-1:0]  mask_data_rise;
+  wire [CS_NUM-1:0]        odt;
+  wire [ROW_WIDTH-1:0]     phy_init_addr;
+  wire [BANK_WIDTH-1:0]    phy_init_ba;
+  wire                     phy_init_cas_n;
+  wire [CKE_WIDTH-1:0]     phy_init_cke;
+  wire [CS_NUM-1:0]        phy_init_cs_n;
+  wire                     phy_init_data_sel;
+  wire                     phy_init_ras_n;
+  wire                     phy_init_rden;
+  wire                     phy_init_we_n;
+  wire                     phy_init_wren;
+  wire [DQ_WIDTH-1:0]      wr_data_fall;
+  wire [DQ_WIDTH-1:0]      wr_data_rise;
+
+  //***************************************************************************
+
+  ddr2_phy_write #
+    (
+     .DQ_WIDTH     (DQ_WIDTH),
+     .CS_NUM       (CS_NUM),
+     .ADDITIVE_LAT (ADDITIVE_LAT),
+     .CAS_LAT      (CAS_LAT),
+     .ECC_ENABLE   (ECC_ENABLE),
+     .ODT_TYPE     (ODT_TYPE),
+     .REG_ENABLE   (REG_ENABLE),
+     .DDR_TYPE     (DDR_TYPE)
+     )
+    u_phy_write
+      (
+       .clk0                    (clk0),
+       .clk90                   (clk90),
+       .rst90                   (rst90),
+       .wdf_data                (wdf_data),
+       .wdf_mask_data           (wdf_mask_data),
+       .ctrl_wren               (ctrl_wren),
+       .phy_init_wren           (phy_init_wren),
+       .phy_init_data_sel       (phy_init_data_sel),
+       .dm_ce                   (dm_ce),
+       .dq_oe_n                 (dq_oe_n),
+       .dqs_oe_n                (dqs_oe_n),
+       .dqs_rst_n               (dqs_rst_n),
+       .wdf_rden                (wdf_rden),
+       .odt                     (odt),
+       .wr_data_rise            (wr_data_rise),
+       .wr_data_fall            (wr_data_fall),
+       .mask_data_rise          (mask_data_rise),
+       .mask_data_fall          (mask_data_fall)
+       );
+
+  ddr2_phy_io #
+    (
+     .CLK_WIDTH             (CLK_WIDTH),
+     .USE_DM_PORT           (USE_DM_PORT),
+     .DM_WIDTH              (DM_WIDTH),
+     .DQ_WIDTH              (DQ_WIDTH),
+     .DQ_BITS               (DQ_BITS),
+     .DQ_PER_DQS            (DQ_PER_DQS),
+     .DQS_BITS              (DQS_BITS),
+     .DQS_WIDTH             (DQS_WIDTH),
+     .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE),
+     .ODT_WIDTH             (ODT_WIDTH),
+     .ADDITIVE_LAT          (ADDITIVE_LAT),
+     .CAS_LAT               (CAS_LAT),
+     .REG_ENABLE            (REG_ENABLE),
+     .CLK_PERIOD            (CLK_PERIOD),
+     .DDR_TYPE              (DDR_TYPE),
+     .SIM_ONLY              (SIM_ONLY),
+     .DEBUG_EN              (DEBUG_EN),
+     .DQS_IO_COL            (DQS_IO_COL),
+     .DQ_IO_MS              (DQ_IO_MS)
+     )
+    u_phy_io
+      (
+       .clk0                   (clk0),
+       .clk90                  (clk90),
+       .clkdiv0                (clkdiv0),
+       .rst0                   (rst0),
+       .rst90                  (rst90),
+       .rstdiv0                (rstdiv0),
+       .dm_ce                  (dm_ce),
+       .dq_oe_n                (dq_oe_n),
+       .dqs_oe_n               (dqs_oe_n),
+       .dqs_rst_n              (dqs_rst_n),
+       .calib_start            (calib_start),
+       .ctrl_rden              (ctrl_rden),
+       .phy_init_rden          (phy_init_rden),
+       .calib_ref_done         (calib_ref_done),
+       .calib_done             (calib_done),
+       .calib_ref_req          (calib_ref_req),
+       .calib_rden             (phy_calib_rden),
+       .calib_rden_sel         (phy_calib_rden_sel),
+       .wr_data_rise           (wr_data_rise),
+       .wr_data_fall           (wr_data_fall),
+       .mask_data_rise         (mask_data_rise),
+       .mask_data_fall         (mask_data_fall),
+       .rd_data_rise           (rd_data_rise),
+       .rd_data_fall           (rd_data_fall),
+       .ddr_ck                 (ddr_ck),
+       .ddr_ck_n               (ddr_ck_n),
+       .ddr_dm                 (ddr_dm),
+       .ddr_dqs                (ddr_dqs),
+       .ddr_dqs_n              (ddr_dqs_n),
+       .ddr_dq                 (ddr_dq),
+       .dbg_idel_up_all        (dbg_idel_up_all),
+       .dbg_idel_down_all      (dbg_idel_down_all),
+       .dbg_idel_up_dq         (dbg_idel_up_dq),
+       .dbg_idel_down_dq       (dbg_idel_down_dq),
+       .dbg_idel_up_dqs        (dbg_idel_up_dqs),
+       .dbg_idel_down_dqs      (dbg_idel_down_dqs),
+       .dbg_idel_up_gate       (dbg_idel_up_gate),
+       .dbg_idel_down_gate     (dbg_idel_down_gate),
+       .dbg_sel_idel_dq        (dbg_sel_idel_dq),
+       .dbg_sel_all_idel_dq    (dbg_sel_all_idel_dq),
+       .dbg_sel_idel_dqs       (dbg_sel_idel_dqs),
+       .dbg_sel_all_idel_dqs   (dbg_sel_all_idel_dqs),
+       .dbg_sel_idel_gate      (dbg_sel_idel_gate),
+       .dbg_sel_all_idel_gate  (dbg_sel_all_idel_gate),
+       .dbg_calib_done         (dbg_calib_done),
+       .dbg_calib_err          (dbg_calib_err),
+       .dbg_calib_dq_tap_cnt   (dbg_calib_dq_tap_cnt),
+       .dbg_calib_dqs_tap_cnt  (dbg_calib_dqs_tap_cnt),
+       .dbg_calib_gate_tap_cnt (dbg_calib_gate_tap_cnt),
+       .dbg_calib_rd_data_sel  (dbg_calib_rd_data_sel),
+       .dbg_calib_rden_dly     (dbg_calib_rden_dly),
+       .dbg_calib_gate_dly     (dbg_calib_gate_dly)
+       );
+
+  ddr2_phy_ctl_io #
+    (
+     .BANK_WIDTH    (BANK_WIDTH),
+     .CKE_WIDTH     (CKE_WIDTH),
+     .COL_WIDTH     (COL_WIDTH),
+     .CS_NUM        (CS_NUM),
+     .CS_WIDTH      (CS_WIDTH),
+     .TWO_T_TIME_EN (TWO_T_TIME_EN),
+     .ODT_WIDTH     (ODT_WIDTH),
+     .ROW_WIDTH     (ROW_WIDTH),
+     .DDR_TYPE      (DDR_TYPE)
+     )
+    u_phy_ctl_io
+      (
+       .clk0                    (clk0),
+       .clk90                   (clk90),
+       .rst0                    (rst0),
+       .rst90                   (rst90),
+       .ctrl_addr               (ctrl_addr),
+       .ctrl_ba                 (ctrl_ba),
+       .ctrl_ras_n              (ctrl_ras_n),
+       .ctrl_cas_n              (ctrl_cas_n),
+       .ctrl_we_n               (ctrl_we_n),
+       .ctrl_cs_n               (ctrl_cs_n),
+       .phy_init_addr           (phy_init_addr),
+       .phy_init_ba             (phy_init_ba),
+       .phy_init_ras_n          (phy_init_ras_n),
+       .phy_init_cas_n          (phy_init_cas_n),
+       .phy_init_we_n           (phy_init_we_n),
+       .phy_init_cs_n           (phy_init_cs_n),
+       .phy_init_cke            (phy_init_cke),
+       .phy_init_data_sel       (phy_init_data_sel),
+       .odt                     (odt),
+       .ddr_addr                (ddr_addr),
+       .ddr_ba                  (ddr_ba),
+       .ddr_ras_n               (ddr_ras_n),
+       .ddr_cas_n               (ddr_cas_n),
+       .ddr_we_n                (ddr_we_n),
+       .ddr_cke                 (ddr_cke),
+       .ddr_cs_n                (ddr_cs_n),
+       .ddr_odt                 (ddr_odt)
+       );
+
+  ddr2_phy_init #
+    (
+     .BANK_WIDTH   (BANK_WIDTH),
+     .CKE_WIDTH    (CKE_WIDTH),
+     .COL_WIDTH    (COL_WIDTH),
+     .CS_NUM       (CS_NUM),
+     .DQ_WIDTH     (DQ_WIDTH),
+     .ODT_WIDTH    (ODT_WIDTH),
+     .ROW_WIDTH    (ROW_WIDTH),
+     .ADDITIVE_LAT (ADDITIVE_LAT),
+     .BURST_LEN    (BURST_LEN),
+     .BURST_TYPE   (BURST_TYPE),
+     .TWO_T_TIME_EN(TWO_T_TIME_EN),
+     .CAS_LAT      (CAS_LAT),
+     .ODT_TYPE     (ODT_TYPE),
+     .REDUCE_DRV   (REDUCE_DRV),
+     .REG_ENABLE   (REG_ENABLE),
+     .TWR          (TWR),
+     .CLK_PERIOD   (CLK_PERIOD),
+     .DDR_TYPE     (DDR_TYPE),
+     .SIM_ONLY     (SIM_ONLY)
+     )
+    u_phy_init
+      (
+       .clk0                    (clk0),
+       .clkdiv0                 (clkdiv0),
+       .rst0                    (rst0),
+       .rstdiv0                 (rstdiv0),
+       .calib_done              (calib_done),
+       .ctrl_ref_flag           (ctrl_ref_flag),
+       .calib_ref_req           (calib_ref_req),
+       .calib_start             (calib_start),
+       .calib_ref_done          (calib_ref_done),
+       .phy_init_wren           (phy_init_wren),
+       .phy_init_rden           (phy_init_rden),
+       .phy_init_addr           (phy_init_addr),
+       .phy_init_ba             (phy_init_ba),
+       .phy_init_ras_n          (phy_init_ras_n),
+       .phy_init_cas_n          (phy_init_cas_n),
+       .phy_init_we_n           (phy_init_we_n),
+       .phy_init_cs_n           (phy_init_cs_n),
+       .phy_init_cke            (phy_init_cke),
+       .phy_init_done           (phy_init_done),
+       .phy_init_data_sel       (phy_init_data_sel)
+       );
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_write.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_phy_write.v
new file mode 100644 (file)
index 0000000..1bb831a
--- /dev/null
@@ -0,0 +1,446 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+//
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a
+// license to use this text/file solely for design, simulation,
+// implementation and creation of design files limited
+// to Xilinx devices or technologies. Use with non-Xilinx
+// devices or technologies is expressly prohibited and
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information
+// "as-is" solely for use in developing programs and
+// solutions for Xilinx devices, with no obligation on the
+// part of Xilinx to provide support. By providing this design,
+// code, or information as one possible implementation of
+// this feature, application or standard, Xilinx is making no
+// representation that this implementation is free from any
+// claims of infringement. You are responsible for
+// obtaining any rights you may require for your implementation.
+// Xilinx expressly disclaims any warranty whatsoever with
+// respect to the adequacy of the implementation, including
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied
+// warranties of merchantability or fitness for a particular
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part
+// of this text at all times.
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_phy_write.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/29 15:24:03 $
+// \   \  /  \    Date Created: Thu Aug 24 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//Reference:
+//   Handles delaying various write control signals appropriately depending
+//   on CAS latency, additive latency, etc. Also splits the data and mask in
+//   rise and fall buses.
+//Revision History:
+//   Rev 1.1 - For Dual Rank parts support ODT logic corrected. PK. 08/05/08
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_phy_write #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference
+   // board design). Actual values may be different. Actual parameters values
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter DQ_WIDTH      = 72,
+   parameter CS_NUM        = 1,
+   parameter ADDITIVE_LAT  = 0,
+   parameter CAS_LAT       = 5,
+   parameter ECC_ENABLE    = 0,
+   parameter ODT_TYPE      = 1,
+   parameter REG_ENABLE    = 1,
+   parameter DDR_TYPE      = 1
+   )
+  (
+   input                       clk0,
+   input                       clk90,
+   input                       rst90,
+   input [(2*DQ_WIDTH)-1:0]    wdf_data,
+   input [(2*DQ_WIDTH/8)-1:0]  wdf_mask_data,
+   input                       ctrl_wren,
+   input                       phy_init_wren,
+   input                       phy_init_data_sel,
+   output reg                  dm_ce,
+   output reg [1:0]            dq_oe_n,
+   output reg                  dqs_oe_n ,
+   output reg                  dqs_rst_n ,
+   output                      wdf_rden,
+   output reg [CS_NUM-1:0]     odt ,
+   output [DQ_WIDTH-1:0]       wr_data_rise,
+   output [DQ_WIDTH-1:0]       wr_data_fall,
+   output [(DQ_WIDTH/8)-1:0]   mask_data_rise,
+   output [(DQ_WIDTH/8)-1:0]   mask_data_fall
+   );
+
+  localparam   MASK_WIDTH               = DQ_WIDTH/8;
+  localparam   DDR1                     = 0;
+  localparam   DDR2                     = 1;
+  localparam   DDR3                     = 2;
+
+  // (MIN,MAX) value of WR_LATENCY for DDR1:
+  //   REG_ENABLE   = (0,1)
+  //   ECC_ENABLE   = (0,1)
+  //   Write latency = 1
+  //   Total: (1,3)
+  // (MIN,MAX) value of WR_LATENCY for DDR2:
+  //   REG_ENABLE   = (0,1)
+  //   ECC_ENABLE   = (0,1)
+  //   Write latency = ADDITIVE_CAS + CAS_LAT - 1 = (0,4) + (3,5) - 1 = (2,8)
+  //     ADDITIVE_LAT = (0,4) (JEDEC79-2B)
+  //     CAS_LAT      = (3,5) (JEDEC79-2B)
+  //   Total: (2,10)
+  localparam WR_LATENCY = (DDR_TYPE == DDR3) ?
+             (ADDITIVE_LAT + (CAS_LAT) + REG_ENABLE ) :
+             (DDR_TYPE == DDR2) ?
+             (ADDITIVE_LAT + (CAS_LAT-1) + REG_ENABLE ) :
+             (1 + REG_ENABLE );
+
+  // NOTE that ODT timing does not need to be delayed for registered
+  // DIMM case, since like other control/address signals, it gets
+  // delayed by one clock cycle at the DIMM
+  localparam ODT_WR_LATENCY = WR_LATENCY - REG_ENABLE;
+
+  wire                     dm_ce_0;
+  reg                      dm_ce_r;
+  wire [1:0]               dq_oe_0;
+  reg [1:0]                dq_oe_n_90_r1;
+  reg [1:0]                dq_oe_270;
+  wire                     dqs_oe_0;
+  reg                      dqs_oe_270;
+  reg                      dqs_oe_n_180_r1;
+  wire                     dqs_rst_0;
+  reg                      dqs_rst_n_180_r1;
+  reg                      dqs_rst_270;
+  reg                      ecc_dm_error_r;
+  reg                      ecc_dm_error_r1;
+  reg [(DQ_WIDTH-1):0]     init_data_f;
+  reg [(DQ_WIDTH-1):0]     init_data_r;
+  reg [3:0]                init_wdf_cnt_r;
+  wire                     odt_0;
+  reg                      rst90_r /* synthesis syn_maxfan = 10 */;
+  reg [10:0]               wr_stages ;
+  reg [(2*DQ_WIDTH)-1:0]   wdf_data_r;
+  reg [(2*DQ_WIDTH/8)-1:0] wdf_mask_r;
+  wire [(2*DQ_WIDTH/8)-1:0] wdf_ecc_mask;
+
+  reg [(2*DQ_WIDTH/8)-1:0] wdf_mask_r1;
+  wire                     wdf_rden_0;
+  reg                      calib_rden_90_r;
+  reg                      wdf_rden_90_r;
+  reg                      wdf_rden_90_r1;
+  reg                      wdf_rden_270;
+
+  always @(posedge clk90)
+      rst90_r <= rst90;
+
+  //***************************************************************************
+  // Analysis of additional pipeline delays:
+  //   1. dq_oe (DQ 3-state): 1 CLK90 cyc in IOB 3-state FF
+  //   2. dqs_oe (DQS 3-state): 1 CLK180 cyc in IOB 3-state FF
+  //   3. dqs_rst (DQS output value reset): 1 CLK180 cyc in FF + 1 CLK180 cyc
+  //      in IOB DDR
+  //   4. odt (ODT control): 1 CLK0 cyc in IOB FF
+  //   5. write data (output two cyc after wdf_rden - output of RAMB_FIFO w/
+  //      output register enabled): 2 CLK90 cyc in OSERDES
+  //***************************************************************************
+
+  // DQS 3-state must be asserted one extra clock cycle due b/c of write
+  // pre- and post-amble (extra half clock cycle for each)
+  assign dqs_oe_0 = wr_stages[WR_LATENCY-1] | wr_stages[WR_LATENCY-2];
+
+  // same goes for ODT, need to handle both pre- and post-amble (generate
+  // ODT only for DDR2)
+  // ODT generation for DDR2 based on write latency. The MIN write
+  // latency is 2. Based on the write latency ODT is asserted.
+  generate
+    if ((DDR_TYPE != DDR1) && (ODT_TYPE > 0))begin: gen_odt_ddr2
+       if(ODT_WR_LATENCY > 2)
+         assign odt_0 =
+                   wr_stages[ODT_WR_LATENCY-1] |
+                   wr_stages[ODT_WR_LATENCY-2] |
+                   wr_stages[ODT_WR_LATENCY-3] ;
+       else
+         assign odt_0 =
+                  wr_stages[ODT_WR_LATENCY] |
+                  wr_stages[ODT_WR_LATENCY-1] |
+                  wr_stages[ODT_WR_LATENCY-2] ;
+    end else
+      assign odt_0 = 1'b0;
+   endgenerate
+
+  assign dq_oe_0[0]   = wr_stages[WR_LATENCY-1] | wr_stages[WR_LATENCY];
+  assign dq_oe_0[1]   = wr_stages[WR_LATENCY-1] | wr_stages[WR_LATENCY-2];
+  assign dqs_rst_0    = ~wr_stages[WR_LATENCY-2];
+  assign dm_ce_0      = wr_stages[WR_LATENCY] | wr_stages[WR_LATENCY-1]
+                        | wr_stages[WR_LATENCY-2];
+
+  // write data fifo, read flag assertion
+  generate
+    if (DDR_TYPE != DDR1) begin: gen_wdf_ddr2
+      if (WR_LATENCY > 2)
+        assign wdf_rden_0 = wr_stages[WR_LATENCY-3];
+      else
+        assign wdf_rden_0 = wr_stages[WR_LATENCY-2];
+    end else begin: gen_wdf_ddr1
+      assign wdf_rden_0 = wr_stages[WR_LATENCY-2];
+    end
+  endgenerate
+
+  // first stage isn't registered
+  always @(*)
+    wr_stages[0] = (phy_init_data_sel) ? ctrl_wren : phy_init_wren;
+
+  always @(posedge clk0) begin
+    wr_stages[1] <= wr_stages[0];
+    wr_stages[2] <= wr_stages[1];
+    wr_stages[3] <= wr_stages[2];
+    wr_stages[4] <= wr_stages[3];
+    wr_stages[5] <= wr_stages[4];
+    wr_stages[6] <= wr_stages[5];
+    wr_stages[7] <= wr_stages[6];
+    wr_stages[8] <= wr_stages[7];
+    wr_stages[9] <= wr_stages[8];
+    wr_stages[10] <= wr_stages[9];
+  end
+
+  // intermediate synchronization to CLK270
+  always @(negedge clk90) begin
+    dq_oe_270         <= dq_oe_0;
+    dqs_oe_270        <= dqs_oe_0;
+    dqs_rst_270       <= dqs_rst_0;
+    wdf_rden_270      <= wdf_rden_0;
+  end
+
+  // synchronize DQS signals to CLK180
+  always @(negedge clk0) begin
+    dqs_oe_n_180_r1  <= ~dqs_oe_270;
+    dqs_rst_n_180_r1 <= ~dqs_rst_270;
+  end
+
+  // All write data-related signals synced to CLK90
+  always @(posedge clk90) begin
+    dq_oe_n_90_r1  <= ~dq_oe_270;
+    wdf_rden_90_r  <= wdf_rden_270;
+  end
+
+  // generate for wdf_rden and calib rden. These signals
+  // are asserted based on write latency. For write
+  // latency of 2, the extra register stage is taken out.
+  generate
+   if (WR_LATENCY > 2) begin
+     always @(posedge clk90) begin
+        // assert wdf rden only for non calibration opertations
+        wdf_rden_90_r1 <=  wdf_rden_90_r &
+                           phy_init_data_sel;
+        // rden for calibration
+        calib_rden_90_r <= wdf_rden_90_r;
+     end
+   end else begin
+     always @(*) begin
+        wdf_rden_90_r1 = wdf_rden_90_r
+                         & phy_init_data_sel;
+        calib_rden_90_r = wdf_rden_90_r;
+     end
+  end // else: !if(WR_LATENCY > 2)
+  endgenerate
+
+  // dm CE signal to stop dm oscilation
+  always @(negedge clk90)begin
+    dm_ce_r <= dm_ce_0;
+    dm_ce <= dm_ce_r;
+  end
+
+  // When in ECC mode the upper byte [71:64] will have the
+  // ECC parity. Mapping the bytes which have valid data
+  // to the upper byte in ecc mode. Also in ecc mode there
+  // is an extra register stage to account for timing.
+
+  genvar mask_i;
+  generate
+    if(ECC_ENABLE) begin
+      for (mask_i  = 0; mask_i < (2*DQ_WIDTH)/72;
+          mask_i = mask_i+1) begin: gen_mask
+       assign wdf_ecc_mask[((mask_i*9)+9)-1:(mask_i*9)] =
+                {&wdf_mask_data[(mask_i*8)+(7+mask_i):mask_i*9],
+                wdf_mask_data[(mask_i*8)+(7+mask_i):mask_i*9]};
+      end
+    end
+   endgenerate
+
+  generate
+    if (ECC_ENABLE) begin:gen_ecc_reg
+       always @(posedge clk90)begin
+          if(phy_init_data_sel)
+               wdf_mask_r <= wdf_ecc_mask;
+          else
+             wdf_mask_r <= {(2*DQ_WIDTH/8){1'b0}};
+      end       
+    end else begin
+      always@(posedge clk90) begin
+        if (phy_init_data_sel)
+          wdf_mask_r <= wdf_mask_data;
+        else
+          wdf_mask_r <= {(2*DQ_WIDTH/8){1'b0}};
+      end
+    end
+  endgenerate
+
+   always @(posedge clk90) begin
+      if(phy_init_data_sel)
+          wdf_data_r <= wdf_data;
+      else
+          wdf_data_r <={init_data_f,init_data_r};
+   end
+
+  // Error generation block during simulation.
+  // Error will be displayed when all the DM
+  // bits are not zero. The error will be
+  // displayed only during the start of the sequence
+  // for errors that are continous over many cycles.
+  generate
+    if (ECC_ENABLE) begin: gen_ecc_error
+      always @(posedge clk90) begin
+        //synthesis translate_off
+        wdf_mask_r1 <= wdf_mask_r;
+        if(DQ_WIDTH > 72)
+           ecc_dm_error_r
+              <= (
+              (~wdf_mask_r1[35] && (|wdf_mask_r1[34:27])) ||
+              (~wdf_mask_r1[26] && (|wdf_mask_r1[25:18])) ||
+              (~wdf_mask_r1[17] && (|wdf_mask_r1[16:9])) ||
+              (~wdf_mask_r1[8] &&  (|wdf_mask_r1[7:0]))) && phy_init_data_sel;
+         else
+            ecc_dm_error_r
+              <= ((~wdf_mask_r1[17] && (|wdf_mask_r1[16:9])) ||
+              (~wdf_mask_r1[8] &&  (|wdf_mask_r1[7:0]))) && phy_init_data_sel;
+        ecc_dm_error_r1 <= ecc_dm_error_r ;
+        if (ecc_dm_error_r && ~ecc_dm_error_r1) // assert the error only once.
+          $display ("ECC DM ERROR. ");
+        //synthesis translate_on
+      end
+    end
+  endgenerate
+
+  //***************************************************************************
+  // State logic to write calibration training patterns
+  //***************************************************************************
+
+  always @(posedge clk90) begin
+    if (rst90_r) begin
+      init_wdf_cnt_r  <= 4'd0;
+      init_data_r <= {64{1'bx}};
+      init_data_f <= {64{1'bx}};
+    end else begin
+      init_wdf_cnt_r  <= init_wdf_cnt_r + calib_rden_90_r;
+      casex (init_wdf_cnt_r)
+        // First stage calibration. Pattern (rise/fall) = 1(r)->0(f)
+        // The rise data and fall data are already interleaved in the manner
+        // required for data into the WDF write FIFO
+        4'b00xx: begin
+          init_data_r <= {DQ_WIDTH{1'b1}};
+          init_data_f <= {DQ_WIDTH{1'b0}};
+        end
+        // Second stage calibration. Pattern = 1(r)->1(f)->0(r)->0(f)
+        4'b01x0: begin
+           init_data_r <= {DQ_WIDTH{1'b1}};
+           init_data_f <= {DQ_WIDTH{1'b1}};
+          end
+        4'b01x1: begin
+           init_data_r <= {DQ_WIDTH{1'b0}};
+           init_data_f <= {DQ_WIDTH{1'b0}};
+        end
+       // MIG 2.1: Changed Stage 3/4 training pattern
+        // Third and fourth stage calibration patern = 
+       //   11(r)->ee(f)->ee(r)->11(f)-11(r)->ee(f)->ee(r)->11(f)
+        4'b1000: begin
+          init_data_r <= {DQ_WIDTH/4{4'h1}};
+          init_data_f <= {DQ_WIDTH/4{4'hE}};
+        end
+        4'b1001: begin
+          init_data_r <= {DQ_WIDTH/4{4'hE}};
+          init_data_f <= {DQ_WIDTH/4{4'h1}};
+          end
+        4'b1010: begin
+          init_data_r <= {(DQ_WIDTH/4){4'h1}};
+          init_data_f <= {(DQ_WIDTH/4){4'hE}};
+        end
+        4'b1011: begin
+          init_data_r <= {(DQ_WIDTH/4){4'hE}};
+          init_data_f <= {(DQ_WIDTH/4){4'h1}};
+         end
+        default: begin
+          init_data_f <= {(2*DQ_WIDTH){1'bx}};
+          init_data_r <= {(2*DQ_WIDTH){1'bx}};
+        end
+      endcase
+    end
+  end
+
+  //***************************************************************************
+
+  always @(posedge clk90)
+    dq_oe_n   <= dq_oe_n_90_r1;
+
+  always @(negedge clk0)
+    dqs_oe_n  <= dqs_oe_n_180_r1;
+
+  always @(negedge clk0)
+    dqs_rst_n <= dqs_rst_n_180_r1;
+
+  // generate for odt. odt is asserted based on
+  //  write latency. For write latency of 2
+  //  the extra register stage is taken out.
+  generate
+    if (ODT_WR_LATENCY > 2) begin
+      always @(posedge clk0) begin
+        odt    <= 'b0;
+        odt[0] <= odt_0;
+      end
+    end else begin
+      always @ (*) begin
+        odt = 'b0;
+        odt[0] = odt_0;
+      end
+    end
+  endgenerate
+
+  assign wdf_rden  = wdf_rden_90_r1;
+
+  //***************************************************************************
+  // Format write data/mask: Data is in format: {fall, rise}
+  //***************************************************************************
+
+  assign wr_data_rise = wdf_data_r[DQ_WIDTH-1:0];
+  assign wr_data_fall = wdf_data_r[(2*DQ_WIDTH)-1:DQ_WIDTH];
+  assign mask_data_rise = wdf_mask_r[MASK_WIDTH-1:0];
+  assign mask_data_fall = wdf_mask_r[(2*MASK_WIDTH)-1:MASK_WIDTH];
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_sdram.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_sdram.v
new file mode 100644 (file)
index 0000000..64f1a0a
--- /dev/null
@@ -0,0 +1,629 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+//
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a
+// license to use this text/file solely for design, simulation,
+// implementation and creation of design files limited
+// to Xilinx devices or technologies. Use with non-Xilinx
+// devices or technologies is expressly prohibited and
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information
+// "as-is" solely for use in developing programs and
+// solutions for Xilinx devices, with no obligation on the
+// part of Xilinx to provide support. By providing this design,
+// code, or information as one possible implementation of
+// this feature, application or standard, Xilinx is making no
+// representation that this implementation is free from any
+// claims of infringement. You are responsible for
+// obtaining any rights you may require for your implementation.
+// Xilinx expressly disclaims any warranty whatsoever with
+// respect to the adequacy of the implementation, including
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied
+// warranties of merchantability or fitness for a particular
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part
+// of this text at all times.
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_sdram.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/09 12:33:12 $
+// \   \  /  \    Date Created: Wed Aug 16 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   Top-level  module. Simple model for what the user might use
+//   Typically, the user will only instantiate MEM_INTERFACE_TOP in their
+//   code, and generate all backend logic (test bench) and all the other infrastructure logic
+//    separately. 
+//   In addition to the memory controller, the module instantiates:
+//     1. Reset logic based on user clocks
+//     2. IDELAY control block
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+(* X_CORE_INFO = "mig_v2_3_ddr2_sdram_v5, Coregen 10.1.02" , CORE_GENERATION_INFO = "ddr2_sdram_v5,mig_v2_3,{component_name=ddr2_sdram, BANK_WIDTH=2, CKE_WIDTH=1, CLK_WIDTH=2, COL_WIDTH=10, CS_NUM=1, CS_WIDTH=1, DM_WIDTH=8, DQ_WIDTH=64, DQ_PER_DQS=8, DQS_WIDTH=8, ODT_WIDTH=1, ROW_WIDTH=13, ADDITIVE_LAT=0, BURST_LEN=4, BURST_TYPE=0, CAS_LAT=4, ECC_ENABLE=0, MULTI_BANK_EN=1, TWO_T_TIME_EN=1, ODT_TYPE=1, REDUCE_DRV=0, REG_ENABLE=0, TREFI_NS=7800, TRAS=40000, TRCD=15000, TRFC=105000, TRP=15000, TRTP=7500, TWR=15000, TWTR=7500, DDR2_CLK_PERIOD=3750, RST_ACT_LOW=1}" *)
+module ddr2_sdram #
+  (
+   parameter BANK_WIDTH              = 2,       
+                                       // # of memory bank addr bits.
+   parameter CKE_WIDTH               = 1,       
+                                       // # of memory clock enable outputs.
+   parameter CLK_WIDTH               = 2,       
+                                       // # of clock outputs.
+   parameter COL_WIDTH               = 10,       
+                                       // # of memory column bits.
+   parameter CS_NUM                  = 1,       
+                                       // # of separate memory chip selects.
+   parameter CS_WIDTH                = 1,       
+                                       // # of total memory chip selects.
+   parameter CS_BITS                 = 0,       
+                                       // set to log2(CS_NUM) (rounded up).
+   parameter DM_WIDTH                = 8,       
+                                       // # of data mask bits.
+   parameter DQ_WIDTH                = 64,       
+                                       // # of data width.
+   parameter DQ_PER_DQS              = 8,       
+                                       // # of DQ data bits per strobe.
+   parameter DQS_WIDTH               = 8,       
+                                       // # of DQS strobes.
+   parameter DQ_BITS                 = 6,       
+                                       // set to log2(DQS_WIDTH*DQ_PER_DQS).
+   parameter DQS_BITS                = 3,       
+                                       // set to log2(DQS_WIDTH).
+   parameter ODT_WIDTH               = 1,       
+                                       // # of memory on-die term enables.
+   parameter ROW_WIDTH               = 13,       
+                                       // # of memory row and # of addr bits.
+   parameter ADDITIVE_LAT            = 0,       
+                                       // additive write latency.
+   parameter BURST_LEN               = 4,       
+                                       // burst length (in double words).
+   parameter BURST_TYPE              = 0,       
+                                       // burst type (=0 seq; =1 interleaved).
+   parameter CAS_LAT                 = 4,       
+                                       // CAS latency.
+   parameter ECC_ENABLE              = 0,       
+                                       // enable ECC (=1 enable).
+   parameter APPDATA_WIDTH           = 128,       
+                                       // # of usr read/write data bus bits.
+   parameter MULTI_BANK_EN           = 1,       
+                                       // Keeps multiple banks open. (= 1 enable).
+   parameter TWO_T_TIME_EN           = 1,       
+                                       // 2t timing for unbuffered dimms.
+   parameter ODT_TYPE                = 1,       
+                                       // ODT (=0(none),=1(75),=2(150),=3(50)).
+   parameter REDUCE_DRV              = 0,       
+                                       // reduced strength mem I/O (=1 yes).
+   parameter REG_ENABLE              = 0,       
+                                       // registered addr/ctrl (=1 yes).
+   parameter TREFI_NS                = 7800,       
+                                       // auto refresh interval (ns).
+   parameter TRAS                    = 40000,       
+                                       // active->precharge delay.
+   parameter TRCD                    = 15000,       
+                                       // active->read/write delay.
+   parameter TRFC                    = 105000,       
+                                       // refresh->refresh, refresh->active delay.
+   parameter TRP                     = 15000,       
+                                       // precharge->command delay.
+   parameter TRTP                    = 7500,       
+                                       // read->precharge delay.
+   parameter TWR                     = 15000,       
+                                       // used to determine write->precharge.
+   parameter TWTR                    = 7500,       
+                                       // write->read delay.
+   parameter HIGH_PERFORMANCE_MODE   = "TRUE",       
+                              // # = TRUE, the IODELAY performance mode is set
+                              // to high.
+                              // # = FALSE, the IODELAY performance mode is set
+                              // to low.
+   parameter SIM_ONLY                = 0,       
+                                       // = 1 to skip SDRAM power up delay.
+   parameter DEBUG_EN                = 0,       
+                                       // Enable debug signals/controls.
+                                       // When this parameter is changed from 0 to 1,
+                                       // make sure to uncomment the coregen commands
+                                       // in ise_flow.bat or create_ise.bat files in
+                                       // par folder.
+   parameter CLK_PERIOD              = 3750,       
+                                       // Core/Memory clock period (in ps).
+   parameter DQS_IO_COL              = 16'b0000000000000000,
+                                       // I/O column location of DQS groups
+                                       // (=0, left; =1 center, =2 right).
+   parameter DQ_IO_MS                = 64'b01110101_00111101_00001111_00011110_00101110_11000011_11000001_10111100,
+                                       // Master/Slave location of DQ I/O (=0 slave).
+   parameter RST_ACT_LOW             = 1,        
+                                       // =1 for active low reset, =0 for active high.
+   parameter EN_SYN                  = "FALSE"
+   )
+  (
+   inout  [DQ_WIDTH-1:0]              ddr2_dq,
+   output [ROW_WIDTH-1:0]             ddr2_a,
+   output [BANK_WIDTH-1:0]            ddr2_ba,
+   output                             ddr2_ras_n,
+   output                             ddr2_cas_n,
+   output                             ddr2_we_n,
+   output [CS_WIDTH-1:0]              ddr2_cs_n,
+   output [ODT_WIDTH-1:0]             ddr2_odt,
+   output [CKE_WIDTH-1:0]             ddr2_cke,
+   output [DM_WIDTH-1:0]              ddr2_dm,
+   input                              sys_rst_n,
+   output                             phy_init_done,
+   input                              dcm_lock,
+   output                             rst0_tb,
+   input                              clk0,
+   output                             clk0_tb,
+   input                              clk90,
+   input                              clkdiv0,
+   input                              clk200,
+
+   //added by xtan & gdgib
+   input                             af_clk,                   //address fifo clk
+   input                             rb_clk,                   //read buffer clk
+   input                         wb_clk,                       //write buffer clk
+   input                             af_rst,                   //address fifo rst
+   input                             rb_rst,                   //read buffer rst
+   input                         wb_rst,                       //write buffer rst
+   output                            rb_full,                  //read buffer is full
+   //end of add
+   
+   output                             app_wdf_afull,
+   output                             app_af_afull,
+   output                             rd_data_valid,
+   input                              app_wdf_wren,
+   input                              app_af_wren,
+   input  [30:0]                      app_af_addr,
+   input  [2:0]                       app_af_cmd,
+   output [(APPDATA_WIDTH)-1:0]                rd_data_fifo_out,
+   input                              rd_data_rden,
+   output [1:0]                       rd_ecc_error,
+   input  [(APPDATA_WIDTH)-1:0]                app_wdf_data,
+   input  [(APPDATA_WIDTH/8)-1:0]              app_wdf_mask_data,
+   inout  [DQS_WIDTH-1:0]             ddr2_dqs,
+   inout  [DQS_WIDTH-1:0]             ddr2_dqs_n,
+   output [CLK_WIDTH-1:0]             ddr2_ck,
+   output [CLK_WIDTH-1:0]             ddr2_ck_n
+   );
+
+  /////////////////////////////////////////////////////////////////////////////
+  // The following parameter "IDELAYCTRL_NUM" indicates the number of
+  // IDELAYCTRLs that are LOCed for the design. The IDELAYCTRL LOCs are
+  // provided in the UCF file of par folder. MIG provides the parameter value
+  // and the LOCs in the UCF file based on the selected Data Read banks for
+  // the design. You must not alter this value unless it is needed. If you
+  // modify this value, you should make sure that the value of "IDELAYCTRL_NUM"
+  // and IDELAYCTRL LOCs in UCF file are same and are relavent to the Data Read
+  // banks used.
+  /////////////////////////////////////////////////////////////////////////////
+
+  localparam IDELAYCTRL_NUM = 3;
+
+
+
+
+
+  wire                              rst0;
+  wire                              rst90;
+  wire                              rstdiv0;
+  wire                              rst200;
+  wire                              idelay_ctrl_rdy;
+
+
+  //Debug signals
+
+
+  wire [3:0]                        dbg_calib_done;
+  wire [3:0]                        dbg_calib_err;
+  wire [(6*DQ_WIDTH)-1:0]           dbg_calib_dq_tap_cnt;
+  wire [(6*DQS_WIDTH)-1:0]          dbg_calib_dqs_tap_cnt;
+  wire [(6*DQS_WIDTH)-1:0]          dbg_calib_gate_tap_cnt;
+  wire [DQS_WIDTH-1:0]              dbg_calib_rd_data_sel;
+  wire [(5*DQS_WIDTH)-1:0]          dbg_calib_rden_dly;
+  wire [(5*DQS_WIDTH)-1:0]          dbg_calib_gate_dly;
+  wire                              dbg_idel_up_all;
+  wire                              dbg_idel_down_all;
+  wire                              dbg_idel_up_dq;
+  wire                              dbg_idel_down_dq;
+  wire                              dbg_idel_up_dqs;
+  wire                              dbg_idel_down_dqs;
+  wire                              dbg_idel_up_gate;
+  wire                              dbg_idel_down_gate;
+  wire [DQ_BITS-1:0]                dbg_sel_idel_dq;
+  wire                              dbg_sel_all_idel_dq;
+  wire [DQS_BITS:0]                 dbg_sel_idel_dqs;
+  wire                              dbg_sel_all_idel_dqs;
+  wire [DQS_BITS:0]                 dbg_sel_idel_gate;
+  wire                              dbg_sel_all_idel_gate;
+
+
+    // Debug signals (optional use)
+
+  //***********************************
+  // PHY Debug Port demo
+  //***********************************
+  wire [35:0]                        cs_control0;
+  wire [35:0]                        cs_control1;
+  wire [35:0]                        cs_control2;
+  wire [35:0]                        cs_control3;
+  wire [191:0]                       vio0_in;
+  wire [95:0]                        vio1_in;
+  wire [99:0]                        vio2_in;
+  wire [31:0]                        vio3_out;
+
+
+
+  //***************************************************************************
+
+  assign  rst0_tb = rst0;
+  assign  clk0_tb = clk0;
+
+
+   ddr2_idelay_ctrl #
+   (
+    .IDELAYCTRL_NUM         (IDELAYCTRL_NUM)
+   )
+   u_ddr2_idelay_ctrl
+   (
+   .rst200                 (rst200),
+   .clk200                 (clk200),
+   .idelay_ctrl_rdy        (idelay_ctrl_rdy)
+   );
+
+ ddr2_infrastructure #
+ (
+   .RST_ACT_LOW            (RST_ACT_LOW)
+   )
+u_ddr2_infrastructure
+ (
+   .sys_rst_n              (sys_rst_n),
+   .dcm_lock               (dcm_lock),
+   .rst0                   (rst0),
+   .rst90                  (rst90),
+   .rstdiv0                (rstdiv0),
+   .rst200                 (rst200),
+   .clk0                   (clk0),
+   .clk90                  (clk90),
+   .clkdiv0                (clkdiv0),
+   .clk200                 (clk200),
+   .idelay_ctrl_rdy        (idelay_ctrl_rdy)
+   );
+
+ ddr2_top #
+ (
+   .BANK_WIDTH             (BANK_WIDTH),
+   .CKE_WIDTH              (CKE_WIDTH),
+   .CLK_WIDTH              (CLK_WIDTH),
+   .COL_WIDTH              (COL_WIDTH),
+   .CS_NUM                 (CS_NUM),
+   .CS_WIDTH               (CS_WIDTH),
+   .CS_BITS                (CS_BITS),
+   .DM_WIDTH               (DM_WIDTH),
+   .DQ_WIDTH               (DQ_WIDTH),
+   .DQ_PER_DQS             (DQ_PER_DQS),
+   .DQS_WIDTH              (DQS_WIDTH),
+   .DQ_BITS                (DQ_BITS),
+   .DQS_BITS               (DQS_BITS),
+   .ODT_WIDTH              (ODT_WIDTH),
+   .ROW_WIDTH              (ROW_WIDTH),
+   .ADDITIVE_LAT           (ADDITIVE_LAT),
+   .BURST_LEN              (BURST_LEN),
+   .BURST_TYPE             (BURST_TYPE),
+   .CAS_LAT                (CAS_LAT),
+   .ECC_ENABLE             (ECC_ENABLE),
+   .APPDATA_WIDTH          (APPDATA_WIDTH),
+   .MULTI_BANK_EN          (MULTI_BANK_EN),
+   .TWO_T_TIME_EN          (TWO_T_TIME_EN),
+   .ODT_TYPE               (ODT_TYPE),
+   .REDUCE_DRV             (REDUCE_DRV),
+   .REG_ENABLE             (REG_ENABLE),
+   .TREFI_NS               (TREFI_NS),
+   .TRAS                   (TRAS),
+   .TRCD                   (TRCD),
+   .TRFC                   (TRFC),
+   .TRP                    (TRP),
+   .TRTP                   (TRTP),
+   .TWR                    (TWR),
+   .TWTR                   (TWTR),
+   .HIGH_PERFORMANCE_MODE  (HIGH_PERFORMANCE_MODE),
+   .SIM_ONLY               (SIM_ONLY),
+   .DEBUG_EN               (DEBUG_EN),
+   .CLK_PERIOD             (CLK_PERIOD),
+   .DQS_IO_COL             (DQS_IO_COL),
+   .DQ_IO_MS               (DQ_IO_MS),
+   .USE_DM_PORT            (1),
+   .EN_SYN                 (EN_SYN)
+   )
+u_ddr2_top_0
+(
+   .ddr2_dq                (ddr2_dq),
+   .ddr2_a                 (ddr2_a),
+   .ddr2_ba                (ddr2_ba),
+   .ddr2_ras_n             (ddr2_ras_n),
+   .ddr2_cas_n             (ddr2_cas_n),
+   .ddr2_we_n              (ddr2_we_n),
+   .ddr2_cs_n              (ddr2_cs_n),
+   .ddr2_odt               (ddr2_odt),
+   .ddr2_cke               (ddr2_cke),
+   .ddr2_dm                (ddr2_dm),
+   .phy_init_done          (phy_init_done),
+   .rst0                   (rst0),
+   .rst90                  (rst90),
+   .rstdiv0                (rstdiv0),
+   .clk0                   (clk0),
+   .clk90                  (clk90),
+   .clkdiv0                (clkdiv0),
+
+   //added by xtan & gdgib
+   .af_clk                 (af_clk),
+   .rb_clk                 (rb_clk),
+   .wb_clk                 (wb_clk),
+   .af_rst                 (af_rst),
+   .rb_rst                 (rb_rst),
+   .wb_rst                 (wb_rst),
+   .rb_full                (rb_full),
+   //end of add
+
+   .app_wdf_afull          (app_wdf_afull),
+   .app_af_afull           (app_af_afull),
+   .rd_data_valid          (rd_data_valid),
+   .rd_data_rden(rd_data_rden),
+   .app_wdf_wren           (app_wdf_wren),
+   .app_af_wren            (app_af_wren),
+   .app_af_addr            (app_af_addr),
+   .app_af_cmd             (app_af_cmd),
+   .rd_data_fifo_out       (rd_data_fifo_out),
+   .app_wdf_data           (app_wdf_data),
+   .app_wdf_mask_data      (app_wdf_mask_data),
+   .ddr2_dqs               (ddr2_dqs),
+   .ddr2_dqs_n             (ddr2_dqs_n),
+   .ddr2_ck                (ddr2_ck),
+   .rd_ecc_error           (rd_ecc_error),
+   .ddr2_ck_n              (ddr2_ck_n),
+
+   .dbg_calib_done         (dbg_calib_done),
+   .dbg_calib_err          (dbg_calib_err),
+   .dbg_calib_dq_tap_cnt   (dbg_calib_dq_tap_cnt),
+   .dbg_calib_dqs_tap_cnt  (dbg_calib_dqs_tap_cnt),
+   .dbg_calib_gate_tap_cnt  (dbg_calib_gate_tap_cnt),
+   .dbg_calib_rd_data_sel  (dbg_calib_rd_data_sel),
+   .dbg_calib_rden_dly     (dbg_calib_rden_dly),
+   .dbg_calib_gate_dly     (dbg_calib_gate_dly),
+   .dbg_idel_up_all        (dbg_idel_up_all),
+   .dbg_idel_down_all      (dbg_idel_down_all),
+   .dbg_idel_up_dq         (dbg_idel_up_dq),
+   .dbg_idel_down_dq       (dbg_idel_down_dq),
+   .dbg_idel_up_dqs        (dbg_idel_up_dqs),
+   .dbg_idel_down_dqs      (dbg_idel_down_dqs),
+   .dbg_idel_up_gate       (dbg_idel_up_gate),
+   .dbg_idel_down_gate     (dbg_idel_down_gate),
+   .dbg_sel_idel_dq        (dbg_sel_idel_dq),
+   .dbg_sel_all_idel_dq    (dbg_sel_all_idel_dq),
+   .dbg_sel_idel_dqs       (dbg_sel_idel_dqs),
+   .dbg_sel_all_idel_dqs   (dbg_sel_all_idel_dqs),
+   .dbg_sel_idel_gate      (dbg_sel_idel_gate),
+   .dbg_sel_all_idel_gate  (dbg_sel_all_idel_gate)
+   );
+
+   //*****************************************************************
+  // Hooks to prevent sim/syn compilation errors (mainly for VHDL - but
+  // keep it also in Verilog version of code) w/ floating inputs if
+  // DEBUG_EN = 0.
+  //*****************************************************************
+
+  generate
+    if (DEBUG_EN == 0) begin: gen_dbg_tie_off
+      assign dbg_idel_up_all       = 'b0;
+      assign dbg_idel_down_all     = 'b0;
+      assign dbg_idel_up_dq        = 'b0;
+      assign dbg_idel_down_dq      = 'b0;
+      assign dbg_idel_up_dqs       = 'b0;
+      assign dbg_idel_down_dqs     = 'b0;
+      assign dbg_idel_up_gate      = 'b0;
+      assign dbg_idel_down_gate    = 'b0;
+      assign dbg_sel_idel_dq       = 'b0;
+      assign dbg_sel_all_idel_dq   = 'b0;
+      assign dbg_sel_idel_dqs      = 'b0;
+      assign dbg_sel_all_idel_dqs  = 'b0;
+      assign dbg_sel_idel_gate     = 'b0;
+      assign dbg_sel_all_idel_gate = 'b0;
+    end else begin: gen_dbg_enable
+      
+      //*****************************************************************
+      // PHY Debug Port example - see MIG User's Guide, XAPP858 or 
+      // Answer Record 29443
+      // This logic supports up to 32 DQ and 8 DQS I/O
+      // NOTES:
+      //   1. PHY Debug Port demo connects to 4 VIO modules:
+      //     - 3 VIO modules with only asynchronous inputs
+      //      * Monitor IDELAY taps for DQ, DQS, DQS Gate
+      //      * Calibration status
+      //     - 1 VIO module with synchronous outputs
+      //      * Allow dynamic adjustment o f IDELAY taps
+      //   2. User may need to modify this code to incorporate other
+      //      chipscope-related modules in their larger design (e.g.
+      //      if they have other ILA/VIO modules, they will need to
+      //      for example instantiate a larger ICON module). In addition
+      //      user may want to instantiate more VIO modules to control
+      //      IDELAY for more DQ, DQS than is shown here
+      //*****************************************************************
+
+      icon4 u_icon
+        (
+         .control0 (cs_control0),
+         .control1 (cs_control1),
+         .control2 (cs_control2),
+         .control3 (cs_control3)
+         );
+
+      //*****************************************************************
+      // VIO ASYNC input: Display current IDELAY setting for up to 32
+      // DQ taps (32x6) = 192
+      //*****************************************************************
+
+      vio_async_in192 u_vio0
+        (
+         .control  (cs_control0),
+         .async_in (vio0_in)
+         );
+
+      //*****************************************************************
+      // VIO ASYNC input: Display current IDELAY setting for up to 8 DQS
+      // and DQS Gate taps (8x6x2) = 96
+      //*****************************************************************
+
+      vio_async_in96 u_vio1
+        (
+         .control  (cs_control1),
+         .async_in (vio1_in)
+         );
+
+      //*****************************************************************
+      // VIO ASYNC input: Display other calibration results
+      //*****************************************************************
+
+      vio_async_in100 u_vio2
+        (
+         .control  (cs_control2),
+         .async_in (vio2_in)
+         );
+      
+      //*****************************************************************
+      // VIO SYNC output: Dynamically change IDELAY taps
+      //*****************************************************************
+      
+      vio_sync_out32 u_vio3
+        (
+         .control  (cs_control3),
+         .clk      (clkdiv0),
+         .sync_out (vio3_out)
+         );
+
+      //*****************************************************************
+      // Bit assignments:
+      // NOTE: Not all VIO, ILA inputs/outputs may be used - these will
+      //       be dependent on the user's particular bit width
+      //*****************************************************************
+
+      if (DQ_WIDTH <= 32) begin: gen_dq_le_32
+        assign vio0_in[(6*DQ_WIDTH)-1:0] 
+                 = dbg_calib_dq_tap_cnt[(6*DQ_WIDTH)-1:0];
+      end else begin: gen_dq_gt_32
+        assign vio0_in = dbg_calib_dq_tap_cnt[191:0];
+      end
+
+      if (DQS_WIDTH <= 8) begin: gen_dqs_le_8
+        assign vio1_in[(6*DQS_WIDTH)-1:0]
+                 = dbg_calib_dqs_tap_cnt[(6*DQS_WIDTH)-1:0];
+        assign vio1_in[(12*DQS_WIDTH)-1:(6*DQS_WIDTH)] 
+                 =  dbg_calib_gate_tap_cnt[(6*DQS_WIDTH)-1:0];
+      end else begin: gen_dqs_gt_32
+        assign vio1_in[47:0]  = dbg_calib_dqs_tap_cnt[47:0];
+        assign vio1_in[95:48] = dbg_calib_gate_tap_cnt[47:0];
+      end
+//dbg_calib_rd_data_sel
+
+     if (DQS_WIDTH <= 8) begin: gen_rdsel_le_8
+        assign vio2_in[(DQS_WIDTH)+7:8]    
+                = dbg_calib_rd_data_sel[(DQS_WIDTH)-1:0];
+     end else begin: gen_rdsel_gt_32
+      assign vio2_in[15:8]    
+                 = dbg_calib_rd_data_sel[7:0];
+     end
+//dbg_calib_rden_dly
+
+     if (DQS_WIDTH <= 8) begin: gen_calrd_le_8
+       assign vio2_in[(5*DQS_WIDTH)+19:20]   
+                 = dbg_calib_rden_dly[(5*DQS_WIDTH)-1:0];
+     end else begin: gen_calrd_gt_32
+       assign vio2_in[59:20]   
+                 = dbg_calib_rden_dly[39:0];
+     end
+
+//dbg_calib_gate_dly
+
+     if (DQS_WIDTH <= 8) begin: gen_calgt_le_8
+       assign vio2_in[(5*DQS_WIDTH)+59:60]   
+                 = dbg_calib_gate_dly[(5*DQS_WIDTH)-1:0];
+     end else begin: gen_calgt_gt_32
+       assign vio2_in[99:60]   
+                 = dbg_calib_gate_dly[39:0];
+     end
+
+//dbg_sel_idel_dq
+
+     if (DQ_BITS <= 5) begin: gen_selid_le_5
+       assign dbg_sel_idel_dq[DQ_BITS-1:0]      
+                 = vio3_out[DQ_BITS+7:8];
+     end else begin: gen_selid_gt_32
+       assign dbg_sel_idel_dq[4:0]      
+                 = vio3_out[12:8];
+     end
+
+//dbg_sel_idel_dqs
+
+     if (DQS_BITS <= 3) begin: gen_seldqs_le_3
+       assign dbg_sel_idel_dqs[DQS_BITS:0]     
+                 = vio3_out[(DQS_BITS+16):16];
+     end else begin: gen_seldqs_gt_32
+       assign dbg_sel_idel_dqs[3:0]     
+                 = vio3_out[19:16];
+     end
+
+//dbg_sel_idel_gate
+
+     if (DQS_BITS <= 3) begin: gen_gtdqs_le_3
+       assign dbg_sel_idel_gate[DQS_BITS:0]    
+                 = vio3_out[(DQS_BITS+21):21];
+     end else begin: gen_gtdqs_gt_32
+       assign dbg_sel_idel_gate[3:0]    
+                 = vio3_out[24:21];
+     end
+
+
+      assign vio2_in[3:0]              = dbg_calib_done;
+      assign vio2_in[7:4]              = dbg_calib_err;
+      
+      assign dbg_idel_up_all           = vio3_out[0];
+      assign dbg_idel_down_all         = vio3_out[1];
+      assign dbg_idel_up_dq            = vio3_out[2];
+      assign dbg_idel_down_dq          = vio3_out[3];
+      assign dbg_idel_up_dqs           = vio3_out[4];
+      assign dbg_idel_down_dqs         = vio3_out[5];
+      assign dbg_idel_up_gate          = vio3_out[6];
+      assign dbg_idel_down_gate        = vio3_out[7];
+      assign dbg_sel_all_idel_dq       = vio3_out[15];
+      assign dbg_sel_all_idel_dqs      = vio3_out[20];
+      assign dbg_sel_all_idel_gate     = vio3_out[25];
+    end
+  endgenerate
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_top.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_top.v
new file mode 100644 (file)
index 0000000..7d4c64d
--- /dev/null
@@ -0,0 +1,297 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+//
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a
+// license to use this text/file solely for design, simulation,
+// implementation and creation of design files limited
+// to Xilinx devices or technologies. Use with non-Xilinx
+// devices or technologies is expressly prohibited and
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information
+// "as-is" solely for use in developing programs and
+// solutions for Xilinx devices, with no obligation on the
+// part of Xilinx to provide support. By providing this design,
+// code, or information as one possible implementation of
+// this feature, application or standard, Xilinx is making no
+// representation that this implementation is free from any
+// claims of infringement. You are responsible for
+// obtaining any rights you may require for your implementation.
+// Xilinx expressly disclaims any warranty whatsoever with
+// respect to the adequacy of the implementation, including
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied
+// warranties of merchantability or fitness for a particular
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part
+// of this text at all times.
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_top.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/29 15:24:03 $
+// \   \  /  \    Date Created: Wed Aug 16 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   System level module. This level contains just the memory controller.
+//   This level will be intiantated when the user wants to remove the
+//   synthesizable test bench, IDELAY control block and the clock
+//   generation modules.
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_top #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference
+   // board design). Actual values may be different. Actual parameters values
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter BANK_WIDTH            = 2,      // # of memory bank addr bits
+   parameter CKE_WIDTH             = 1,      // # of memory clock enable outputs
+   parameter CLK_WIDTH             = 1,      // # of clock outputs
+   parameter COL_WIDTH             = 10,     // # of memory column bits
+   parameter CS_NUM                = 1,      // # of separate memory chip selects
+   parameter CS_BITS               = 0,      // set to log2(CS_NUM) (rounded up)
+   parameter CS_WIDTH              = 1,      // # of total memory chip selects
+   parameter USE_DM_PORT           = 1,      // enable Data Mask (=1 enable)
+   parameter DM_WIDTH              = 9,      // # of data mask bits
+   parameter DQ_WIDTH              = 72,     // # of data width
+   parameter DQ_BITS               = 7,      // set to log2(DQS_WIDTH*DQ_PER_DQS)
+   parameter DQ_PER_DQS            = 8,      // # of DQ data bits per strobe
+   parameter DQS_WIDTH             = 9,      // # of DQS strobes
+   parameter DQS_BITS              = 4,      // set to log2(DQS_WIDTH)
+   parameter HIGH_PERFORMANCE_MODE = "TRUE", // IODELAY Performance Mode
+   parameter ODT_WIDTH             = 1,      // # of memory on-die term enables
+   parameter ROW_WIDTH             = 14,     // # of memory row & # of addr bits
+   parameter APPDATA_WIDTH         = 144,    // # of usr read/write data bus bits
+   parameter ADDITIVE_LAT          = 0,      // additive write latency
+   parameter BURST_LEN             = 4,      // burst length (in double words)
+   parameter BURST_TYPE            = 0,      // burst type (=0 seq; =1 interlved)
+   parameter CAS_LAT               = 5,      // CAS latency
+   parameter ECC_ENABLE            = 0,      // enable ECC (=1 enable)
+   parameter ODT_TYPE              = 1,      // ODT (=0(none),=1(75),=2(150),=3(50))
+   parameter MULTI_BANK_EN         = 1,      // enable bank management
+   parameter TWO_T_TIME_EN         = 0,      // 2t timing for unbuffered dimms
+   parameter REDUCE_DRV            = 0,      // reduced strength mem I/O (=1 yes)
+   parameter REG_ENABLE            = 1,      // registered addr/ctrl (=1 yes)
+   parameter TREFI_NS              = 7800,   // auto refresh interval (ns)
+   parameter TRAS                  = 40000,  // active->precharge delay
+   parameter TRCD                  = 15000,  // active->read/write delay
+   parameter TRFC                  = 105000, // ref->ref, ref->active delay
+   parameter TRP                   = 15000,  // precharge->command delay
+   parameter TRTP                  = 7500,   // read->precharge delay
+   parameter TWR                   = 15000,  // used to determine wr->prech
+   parameter TWTR                  = 10000,  // write->read delay
+   parameter CLK_PERIOD            = 3000,   // Core/Mem clk period (in ps)
+   parameter SIM_ONLY              = 0,      // = 1 to skip power up delay
+   parameter DEBUG_EN              = 0,      // Enable debug signals/controls
+   parameter DQS_IO_COL            = 0,      // I/O column location of DQS groups
+   parameter DQ_IO_MS              = 0,      // Master/Slave location of DQ I/O
+   parameter EN_SYN                = "FALSE"
+   )
+  (
+   input                                    clk0,
+   input                                    clk90,
+   input                                    clkdiv0,
+   input                                    rst0,
+   input                                    rst90,
+   input                                    rstdiv0,
+   //added by xtan & gdgib
+   input                                   af_clk,                     //address fifo clk
+   input                                   rb_clk,                     //read buffer clk
+   input                                   wb_clk,                     //write buffer clk
+   input                                   af_rst,                     //address fifo rst
+   input                                   rb_rst,                     //read buffer rst
+   input                                   wb_rst,                     //write buffer rst
+   output                                  rb_full,                    //read buffer is full
+   //end of add
+   input [2:0]                              app_af_cmd,
+   input [30:0]                             app_af_addr,
+   input                                    app_af_wren,
+   input                                    app_wdf_wren,
+   input [APPDATA_WIDTH-1:0]                app_wdf_data,
+   input [(APPDATA_WIDTH/8)-1:0]            app_wdf_mask_data,
+   output                                   app_af_afull,
+   output                                   app_wdf_afull,
+   output                                   rd_data_valid,
+   input                                    rd_data_rden,
+   output [APPDATA_WIDTH-1:0]               rd_data_fifo_out,
+   output [1:0]                             rd_ecc_error,
+   output                                   phy_init_done,
+   output [CLK_WIDTH-1:0]                   ddr2_ck,
+   output [CLK_WIDTH-1:0]                   ddr2_ck_n,
+   output [ROW_WIDTH-1:0]                   ddr2_a,
+   output [BANK_WIDTH-1:0]                  ddr2_ba,
+   output                                   ddr2_ras_n,
+   output                                   ddr2_cas_n,
+   output                                   ddr2_we_n,
+   output [CS_WIDTH-1:0]                    ddr2_cs_n,
+   output [CKE_WIDTH-1:0]                   ddr2_cke,
+   output [ODT_WIDTH-1:0]                   ddr2_odt,
+   output [DM_WIDTH-1:0]                    ddr2_dm,
+   inout [DQS_WIDTH-1:0]                    ddr2_dqs,
+   inout [DQS_WIDTH-1:0]                    ddr2_dqs_n,
+   inout [DQ_WIDTH-1:0]                     ddr2_dq,
+   // Debug signals (optional use)
+   input                                    dbg_idel_up_all,
+   input                                    dbg_idel_down_all,
+   input                                    dbg_idel_up_dq,
+   input                                    dbg_idel_down_dq,
+   input                                    dbg_idel_up_dqs,
+   input                                    dbg_idel_down_dqs,
+   input                                    dbg_idel_up_gate,
+   input                                    dbg_idel_down_gate,
+   input [DQ_BITS-1:0]                      dbg_sel_idel_dq,
+   input                                    dbg_sel_all_idel_dq,
+   input [DQS_BITS:0]                       dbg_sel_idel_dqs,
+   input                                    dbg_sel_all_idel_dqs,
+   input [DQS_BITS:0]                       dbg_sel_idel_gate,
+   input                                    dbg_sel_all_idel_gate,
+   output [3:0]                             dbg_calib_done,
+   output [3:0]                             dbg_calib_err,
+   output [(6*DQ_WIDTH)-1:0]                dbg_calib_dq_tap_cnt,
+   output [(6*DQS_WIDTH)-1:0]               dbg_calib_dqs_tap_cnt,
+   output [(6*DQS_WIDTH)-1:0]               dbg_calib_gate_tap_cnt,
+   output [DQS_WIDTH-1:0]                   dbg_calib_rd_data_sel,
+   output [(5*DQS_WIDTH)-1:0]               dbg_calib_rden_dly,
+   output [(5*DQS_WIDTH)-1:0]               dbg_calib_gate_dly
+   );
+
+  // memory initialization/control logic
+  ddr2_mem_if_top #
+    (
+     .BANK_WIDTH            (BANK_WIDTH),
+     .CKE_WIDTH             (CKE_WIDTH),
+     .CLK_WIDTH             (CLK_WIDTH),
+     .COL_WIDTH             (COL_WIDTH),
+     .CS_BITS               (CS_BITS),
+     .CS_NUM                (CS_NUM),
+     .CS_WIDTH              (CS_WIDTH),
+     .USE_DM_PORT           (USE_DM_PORT),
+     .DM_WIDTH              (DM_WIDTH),
+     .DQ_WIDTH              (DQ_WIDTH),
+     .DQ_BITS               (DQ_BITS),
+     .DQ_PER_DQS            (DQ_PER_DQS),
+     .DQS_BITS              (DQS_BITS),
+     .DQS_WIDTH             (DQS_WIDTH),
+     .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE),
+     .ODT_WIDTH             (ODT_WIDTH),
+     .ROW_WIDTH             (ROW_WIDTH),
+     .APPDATA_WIDTH         (APPDATA_WIDTH),
+     .ADDITIVE_LAT          (ADDITIVE_LAT),
+     .BURST_LEN             (BURST_LEN),
+     .BURST_TYPE            (BURST_TYPE),
+     .CAS_LAT               (CAS_LAT),
+     .ECC_ENABLE            (ECC_ENABLE),
+     .MULTI_BANK_EN         (MULTI_BANK_EN),
+     .TWO_T_TIME_EN         (TWO_T_TIME_EN),
+     .ODT_TYPE              (ODT_TYPE),
+     .DDR_TYPE              (1),
+     .REDUCE_DRV            (REDUCE_DRV),
+     .REG_ENABLE            (REG_ENABLE),
+     .TREFI_NS              (TREFI_NS),
+     .TRAS                  (TRAS),
+     .TRCD                  (TRCD),
+     .TRFC                  (TRFC),
+     .TRP                   (TRP),
+     .TRTP                  (TRTP),
+     .TWR                   (TWR),
+     .TWTR                  (TWTR),
+     .CLK_PERIOD            (CLK_PERIOD),
+     .SIM_ONLY              (SIM_ONLY),
+     .DEBUG_EN              (DEBUG_EN),
+     .DQS_IO_COL            (DQS_IO_COL),
+     .DQ_IO_MS              (DQ_IO_MS),
+     .EN_SYN                (EN_SYN)
+     )
+    u_mem_if_top
+      (
+       .clk0                   (clk0),
+       .clk90                  (clk90),
+       .clkdiv0                (clkdiv0),
+       .rst0                   (rst0),
+       .rst90                  (rst90),
+       .rstdiv0                (rstdiv0),
+       .af_clk                 (af_clk),
+       .rb_clk                 (rb_clk),
+       .wb_clk                 (wb_clk),
+       .af_rst                 (af_rst),
+       .rb_rst                 (rb_rst),
+       .wb_rst                 (wb_rst),
+       .rb_full                (rb_full),
+       .app_af_cmd             (app_af_cmd),
+       .app_af_addr            (app_af_addr),
+       .app_af_wren            (app_af_wren),
+       .app_wdf_wren           (app_wdf_wren),
+       .app_wdf_data           (app_wdf_data),
+       .app_wdf_mask_data      (app_wdf_mask_data),
+       .app_af_afull           (app_af_afull),
+       .app_wdf_afull          (app_wdf_afull),
+       .rd_data_valid          (rd_data_valid),
+       .rd_data_rden           (rd_data_rden),
+       .rd_data_fifo_out       (rd_data_fifo_out),
+       .rd_ecc_error           (rd_ecc_error),
+       .phy_init_done          (phy_init_done),
+       .ddr_ck                 (ddr2_ck),
+       .ddr_ck_n               (ddr2_ck_n),
+       .ddr_addr               (ddr2_a),
+       .ddr_ba                 (ddr2_ba),
+       .ddr_ras_n              (ddr2_ras_n),
+       .ddr_cas_n              (ddr2_cas_n),
+       .ddr_we_n               (ddr2_we_n),
+       .ddr_cs_n               (ddr2_cs_n),
+       .ddr_cke                (ddr2_cke),
+       .ddr_odt                (ddr2_odt),
+       .ddr_dm                 (ddr2_dm),
+       .ddr_dqs                (ddr2_dqs),
+       .ddr_dqs_n              (ddr2_dqs_n),
+       .ddr_dq                 (ddr2_dq),
+       .dbg_idel_up_all        (dbg_idel_up_all),
+       .dbg_idel_down_all      (dbg_idel_down_all),
+       .dbg_idel_up_dq         (dbg_idel_up_dq),
+       .dbg_idel_down_dq       (dbg_idel_down_dq),
+       .dbg_idel_up_dqs        (dbg_idel_up_dqs),
+       .dbg_idel_down_dqs      (dbg_idel_down_dqs),
+       .dbg_idel_up_gate       (dbg_idel_up_gate),
+       .dbg_idel_down_gate     (dbg_idel_down_gate),
+       .dbg_sel_idel_dq        (dbg_sel_idel_dq),
+       .dbg_sel_all_idel_dq    (dbg_sel_all_idel_dq),
+       .dbg_sel_idel_dqs       (dbg_sel_idel_dqs),
+       .dbg_sel_all_idel_dqs   (dbg_sel_all_idel_dqs),
+       .dbg_sel_idel_gate      (dbg_sel_idel_gate),
+       .dbg_sel_all_idel_gate  (dbg_sel_all_idel_gate),
+       .dbg_calib_done         (dbg_calib_done),
+       .dbg_calib_err          (dbg_calib_err),
+       .dbg_calib_dq_tap_cnt   (dbg_calib_dq_tap_cnt),
+       .dbg_calib_dqs_tap_cnt  (dbg_calib_dqs_tap_cnt),
+       .dbg_calib_gate_tap_cnt (dbg_calib_gate_tap_cnt),
+       .dbg_calib_rd_data_sel  (dbg_calib_rd_data_sel),
+       .dbg_calib_rden_dly     (dbg_calib_rden_dly),
+       .dbg_calib_gate_dly     (dbg_calib_gate_dly)
+       );
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_usr_addr_fifo.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_usr_addr_fifo.v
new file mode 100644 (file)
index 0000000..4100015
--- /dev/null
@@ -0,0 +1,140 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+// 
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a 
+// license to use this text/file solely for design, simulation, 
+// implementation and creation of design files limited 
+// to Xilinx devices or technologies. Use with non-Xilinx 
+// devices or technologies is expressly prohibited and 
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information 
+// "as-is" solely for use in developing programs and 
+// solutions for Xilinx devices, with no obligation on the 
+// part of Xilinx to provide support. By providing this design, 
+// code, or information as one possible implementation of 
+// this feature, application or standard, Xilinx is making no 
+// representation that this implementation is free from any 
+// claims of infringement. You are responsible for 
+// obtaining any rights you may require for your implementation. 
+// Xilinx expressly disclaims any warranty whatsoever with 
+// respect to the adequacy of the implementation, including 
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied 
+// warranties of merchantability or fitness for a particular 
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are 
+// done at the user\92s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part 
+// of this text at all times. 
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_usr_addr_fifo.v
+// /___/   /\     Date Last Modified: $Date: 2008/05/08 15:20:47 $
+// \   \  /  \    Date Created: Mon Aug 28 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   This module instantiates the block RAM based FIFO to store the user
+//   address and the command information. Also calculates potential bank/row
+//   conflicts by comparing the new address with last address issued.
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_usr_addr_fifo #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference 
+   // board design). Actual values may be different. Actual parameters values 
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter BANK_WIDTH    = 2,
+   parameter COL_WIDTH     = 10,
+   parameter CS_BITS       = 0,
+   parameter ROW_WIDTH     = 14,
+   parameter EN_SYN        = "FALSE"
+   )
+  (
+   input          clk0,                        //ddr2 phy clock
+   input          rst0,
+   //start new port by xtan & gdgib
+   input         af_clk,               //user side clock
+   input         af_rst,               //user side reset
+   //end new port by xtan & gdgib
+   input [2:0]    app_af_cmd,
+   input [30:0]   app_af_addr,
+   input          app_af_wren,
+   input          ctrl_af_rden,
+   output [2:0]   af_cmd,
+   output [30:0]  af_addr,
+   output         af_empty,
+   output         app_af_afull
+   );
+
+  wire [35:0]     fifo_data_out;
+   reg            rst_r;
+
+
+  always @(posedge clk0)
+     rst_r <= rst0;
+
+
+  //***************************************************************************
+
+  assign af_cmd      = fifo_data_out[33:31];
+  assign af_addr     = fifo_data_out[30:0];
+
+  //***************************************************************************
+
+  FIFO36 #
+    (
+     .ALMOST_EMPTY_OFFSET     (13'h0007),
+     .ALMOST_FULL_OFFSET      (13'h000F),
+     .DATA_WIDTH              (36),
+     .DO_REG                  (1),
+     .EN_SYN                  (EN_SYN),
+     .FIRST_WORD_FALL_THROUGH ("FALSE")
+     )
+    u_af
+      (
+       .ALMOSTEMPTY (),
+       .ALMOSTFULL  (app_af_afull),
+       .DO          (fifo_data_out[31:0]),
+       .DOP         (fifo_data_out[35:32]),
+       .EMPTY       (af_empty),
+       .FULL        (),
+       .RDCOUNT     (),
+       .RDERR       (),
+       .WRCOUNT     (),
+       .WRERR       (),
+       .DI          ({app_af_cmd[0],app_af_addr}),
+       .DIP         ({2'b00,app_af_cmd[2:1]}),
+       .RDCLK       (clk0),
+       .RDEN        (ctrl_af_rden),
+       .RST         (rst_r | af_rst),
+       .WRCLK       (af_clk),                  //changed by xtan: clk0 -> af_clk
+       .WREN        (app_af_wren)
+       );
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_usr_rd.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_usr_rd.v
new file mode 100644 (file)
index 0000000..c1ceec7
--- /dev/null
@@ -0,0 +1,424 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+// 
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a 
+// license to use this text/file solely for design, simulation, 
+// implementation and creation of design files limited 
+// to Xilinx devices or technologies. Use with non-Xilinx 
+// devices or technologies is expressly prohibited and 
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information 
+// "as-is" solely for use in developing programs and 
+// solutions for Xilinx devices, with no obligation on the 
+// part of Xilinx to provide support. By providing this design, 
+// code, or information as one possible implementation of 
+// this feature, application or standard, Xilinx is making no 
+// representation that this implementation is free from any 
+// claims of infringement. You are responsible for 
+// obtaining any rights you may require for your implementation. 
+// Xilinx expressly disclaims any warranty whatsoever with 
+// respect to the adequacy of the implementation, including 
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied 
+// warranties of merchantability or fitness for a particular 
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are 
+// done at the user�s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part 
+// of this text at all times. 
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_usr_rd.v
+// /___/   /\     Date Last Modified: $Date: 2008/07/02 14:03:08 $
+// \   \  /  \    Date Created: Tue Aug 29 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   The delay between the read data with respect to the command issued is
+//   calculted in terms of no. of clocks. This data is then stored into the
+//   FIFOs and then read back and given as the ouput for comparison.
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_usr_rd #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference 
+   // board design). Actual values may be different. Actual parameters values 
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter DQ_PER_DQS    = 8,
+   parameter DQS_WIDTH     = 9,
+   parameter APPDATA_WIDTH = 144,
+   parameter APPDATA_BURST_LEN = 2,
+   parameter APPDATA_BURST_BITS = 1,
+   parameter ECC_WIDTH     = 72,
+   parameter ECC_ENABLE    = 0,
+   parameter EN_SYN        = "FALSE"
+   )
+  (
+   input                                    clk0,              //read buffer write clock
+   input                                    rst0,
+   //new ports by xtan & gdgib
+   input                                   rb_clk,             //read buffer read clock
+   input                                   rb_rst,             //read buffer read reset
+   output                                  rb_full,            //read buffer is full (error)
+   //end new signals by xtan & gdgib
+   input [(DQS_WIDTH*DQ_PER_DQS)-1:0]       rd_data_in_rise,
+   input [(DQS_WIDTH*DQ_PER_DQS)-1:0]       rd_data_in_fall,
+   input [DQS_WIDTH-1:0]                    ctrl_rden,
+   input [DQS_WIDTH-1:0]                    ctrl_rden_sel,
+   output reg [1:0]                         rd_ecc_error,
+   output reg                               rd_data_valid,
+   input                                    rd_data_rden,
+   output reg [(APPDATA_WIDTH/2)-1:0]       rd_data_out_rise,
+   output reg [(APPDATA_WIDTH/2)-1:0]       rd_data_out_fall,
+   output                                                                      rd_fifo_clear
+   );
+
+  // determine number of FIFO72's to use based on data width
+  localparam RDF_FIFO_NUM = ((APPDATA_WIDTH/2)+63)/64;
+
+  reg [DQS_WIDTH-1:0]               ctrl_rden_r;
+  wire [(DQS_WIDTH*DQ_PER_DQS)-1:0] fall_data;
+  reg [(DQS_WIDTH*DQ_PER_DQS)-1:0]  rd_data_in_fall_r;
+  reg [(DQS_WIDTH*DQ_PER_DQS)-1:0]  rd_data_in_rise_r;
+  wire                              rden;
+  reg [DQS_WIDTH-1:0]               rden_sel_r
+                                    /* synthesis syn_preserve=1 */;
+  wire [DQS_WIDTH-1:0]              rden_sel_mux;
+  wire [(DQS_WIDTH*DQ_PER_DQS)-1:0] rise_data;
+
+  // ECC specific signals
+  wire [((RDF_FIFO_NUM -1) *2)+1:0] db_ecc_error;
+  reg [(DQS_WIDTH*DQ_PER_DQS)-1:0]  fall_data_r;
+  reg                               rden_r;
+  wire [(APPDATA_WIDTH/2)-1:0]      rd_data_out_fall_temp;
+  wire [(APPDATA_WIDTH/2)-1:0]      rd_data_out_rise_temp;
+  reg                               rst_r;
+  reg [(DQS_WIDTH*DQ_PER_DQS)-1:0]  rise_data_r;
+  wire [((RDF_FIFO_NUM -1) *2)+1:0] sb_ecc_error;
+  wire [RDF_FIFO_NUM-1:0]           rdf_empty;
+  wire                                                         fifo_read, fifo_preread;
+
+  reg  [1:0]                        rd_data_valid_pre;
+  wire [RDF_FIFO_NUM-1:0]              w_rb_full;              //added by xtan
+
+       //***************************************************************************
+  
+  reg  [APPDATA_BURST_BITS-1:0] blfifo_readcount, blfifo_writecount;
+  wire                                                 blfifo_readterminal, blfifo_writeterminal;
+  wire                                                 blfifo_read, blfifo_write;
+  wire                                                 blfifo_empty, blfifo_full;
+  
+  asyncfifo_dmem_1b blfifo (
+       .din(1'b0), 
+       .rd_clk(rb_clk),
+       .rd_en(blfifo_read),
+       .rst(rst_r | rb_rst),
+       .wr_clk(clk0),
+       .wr_en(blfifo_write),
+       .dout(), 
+       .empty(blfifo_empty),
+       .full(blfifo_full));
+       assign rd_fifo_clear = ~blfifo_full;
+       
+       always @ (posedge rb_clk) begin
+               if (rb_rst) blfifo_readcount <= 0;
+               else if (fifo_read & ~rdf_empty[0]) begin
+                       if (blfifo_readterminal) blfifo_readcount <= 0;
+                       else blfifo_readcount <= blfifo_readcount + 1;
+               end
+       end
+       
+       always @ (posedge clk0) begin
+               if (rst_r | rst0) blfifo_writecount <= 0;
+               else if (rden_r) begin
+                       if (blfifo_writeterminal) blfifo_writecount <= 0;
+                       else blfifo_writecount <= blfifo_writecount + 1;
+               end
+       end
+       
+       assign  blfifo_readterminal = (blfifo_readcount == (APPDATA_BURST_LEN - 1));
+       assign  blfifo_writeterminal = (blfifo_writecount == (APPDATA_BURST_LEN - 1));
+       assign  blfifo_read = (~|blfifo_readcount) & (fifo_read & rd_data_valid);
+       assign  blfifo_write = blfifo_writeterminal & rden_r;
+
+  //***************************************************************************
+
+  always @(posedge clk0) begin
+    rden_sel_r        <= ctrl_rden_sel;
+    ctrl_rden_r       <= ctrl_rden;
+    rd_data_in_rise_r <= rd_data_in_rise;
+    rd_data_in_fall_r <= rd_data_in_fall;
+  end
+
+  // Instantiate primitive to allow this flop to be attached to multicycle
+  // path constraint in UCF. Multicycle path allowed for data from read FIFO.
+  // This is the same signal as RDEN_SEL_R, but is only used to select data
+  // (does not affect control signals)
+  genvar rd_i;
+  generate
+    for (rd_i = 0; rd_i < DQS_WIDTH; rd_i = rd_i+1) begin: gen_rden_sel_mux
+      FDRSE u_ff_rden_sel_mux
+        (
+         .Q   (rden_sel_mux[rd_i]),
+         .C   (clk0),
+         .CE  (1'b1),
+         .D   (ctrl_rden_sel[rd_i]),
+         .R   (1'b0),
+         .S   (1'b0)
+         ) /* synthesis syn_preserve=1 */;
+    end
+  endgenerate
+
+  // determine correct read data valid signal timing
+  assign rden = (rden_sel_r[0]) ? ctrl_rden[0] : ctrl_rden_r[0];
+
+  // assign data based on the skew
+  genvar data_i;
+  generate
+    for(data_i = 0; data_i < DQS_WIDTH; data_i = data_i+1) begin: gen_data
+      assign rise_data[(data_i*DQ_PER_DQS)+(DQ_PER_DQS-1):
+                       (data_i*DQ_PER_DQS)]
+               = (rden_sel_mux[data_i]) ?
+                 rd_data_in_rise[(data_i*DQ_PER_DQS)+(DQ_PER_DQS-1) :
+                                 (data_i*DQ_PER_DQS)] :
+                 rd_data_in_rise_r[(data_i*DQ_PER_DQS)+(DQ_PER_DQS-1):
+                                   (data_i*DQ_PER_DQS)];
+       assign fall_data[(data_i*DQ_PER_DQS)+(DQ_PER_DQS-1):
+                        (data_i*DQ_PER_DQS)]
+                = (rden_sel_mux[data_i]) ?
+                  rd_data_in_fall[(data_i*DQ_PER_DQS)+(DQ_PER_DQS-1):
+                                  (data_i*DQ_PER_DQS)] :
+                  rd_data_in_fall_r[(data_i*DQ_PER_DQS)+(DQ_PER_DQS-1):
+                                    (data_i*DQ_PER_DQS)];
+    end
+  endgenerate
+
+  assign rb_full = w_rb_full[0];
+  
+  // Generate RST for FIFO reset AND for read/write enable:
+  // ECC FIFO always being read from and written to
+  always @(posedge clk0)
+    rst_r <= rst0;
+  
+  always @(posedge clk0) begin
+       rise_data_r       <= rise_data;
+       fall_data_r       <= fall_data;
+  end
+  
+  // delay read valid to take into account max delay difference btw
+  // the read enable coming from the different DQS groups
+  always @(posedge clk0) begin
+    if (rst0) rden_r <= 1'b0;
+    else rden_r <= rden;
+  end
+
+  always @(posedge rb_clk) begin
+       if (fifo_read) begin
+         rd_data_out_rise  <= rd_data_out_rise_temp;
+         rd_data_out_fall  <= rd_data_out_fall_temp;
+       end
+       
+       if (rb_rst) rd_data_valid <= 1'b0;
+    else if (fifo_read) rd_data_valid <= rd_data_valid_pre[1];
+       
+    if (rb_rst) begin
+       rd_data_valid_pre <= 2'b00;
+    end else begin
+       rd_data_valid_pre[0] <= fifo_preread & ~rdf_empty[0];
+       rd_data_valid_pre[1] <= rd_data_valid_pre[0] | (fifo_preread ? 1'b0 : rd_data_valid_pre[1]);
+    end
+  end
+  
+  assign fifo_preread = (~|rd_data_valid_pre | (fifo_read & ^rd_data_valid_pre)) & ~(rst0 | rst_r | rb_rst);
+  assign fifo_read = (~rd_data_valid | rd_data_rden) & ~(rst0 | rst_r | rb_rst);
+
+  genvar rdf_i;
+  generate
+    if (ECC_ENABLE) begin
+      always @(posedge rb_clk) begin
+        if (fifo_preread) begin
+             rd_ecc_error[0]   <= |sb_ecc_error;
+             rd_ecc_error[1]   <= |db_ecc_error;
+           end
+      end
+
+      for (rdf_i = 0; rdf_i < RDF_FIFO_NUM; rdf_i = rdf_i + 1) begin: gen_rdf
+
+        FIFO36_72  # // rise fifo
+          (
+           .ALMOST_EMPTY_OFFSET     (9'h007),
+           .ALMOST_FULL_OFFSET      (9'h00F),
+           .DO_REG                  (1),          // extra CC output delay
+           .EN_ECC_WRITE            ("FALSE"),
+           .EN_ECC_READ             ("TRUE"),
+           .EN_SYN                  (EN_SYN),
+           .FIRST_WORD_FALL_THROUGH ("FALSE")
+           )
+          u_rdf
+            (
+             .ALMOSTEMPTY (),
+             .ALMOSTFULL  (),
+             .DBITERR     (db_ecc_error[rdf_i + rdf_i]),
+             .DO          (rd_data_out_rise_temp[(64*(rdf_i+1))-1:
+                                                 (64 *rdf_i)]),
+             .DOP         (),
+             .ECCPARITY   (),
+             .EMPTY       (rdf_empty[rdf_i]),
+             .FULL        (w_rb_full[rdf_i]),
+             .RDCOUNT     (),
+             .RDERR       (),
+             .SBITERR     (sb_ecc_error[rdf_i + rdf_i]),
+             .WRCOUNT     (),
+             .WRERR       (),
+             .DI          (rise_data_r[((64*(rdf_i+1)) + (rdf_i*8))-1:
+                                       (64 *rdf_i)+(rdf_i*8)]),
+             .DIP         (rise_data_r[(72*(rdf_i+1))-1:
+                                       (64*(rdf_i+1))+ (8*rdf_i)]),
+             .RDCLK       (rb_clk),
+             .RDEN        (fifo_preread),
+             .RST         (rst_r | rb_rst),
+             .WRCLK       (clk0),
+             .WREN        (rden_r)
+             );
+
+        FIFO36_72  # // fall_fifo
+          (
+           .ALMOST_EMPTY_OFFSET     (9'h007),
+           .ALMOST_FULL_OFFSET      (9'h00F),
+           .DO_REG                  (1),          // extra CC output delay
+           .EN_ECC_WRITE            ("FALSE"),
+           .EN_ECC_READ             ("TRUE"),
+           .EN_SYN                  (EN_SYN),
+           .FIRST_WORD_FALL_THROUGH ("FALSE")
+           )
+          u_rdf1
+            (
+             .ALMOSTEMPTY (),
+             .ALMOSTFULL  (),
+             .DBITERR     (db_ecc_error[(rdf_i+1) + rdf_i]),
+             .DO          (rd_data_out_fall_temp[(64*(rdf_i+1))-1:
+                                                 (64 *rdf_i)]),
+             .DOP         (),
+             .ECCPARITY   (),
+             .EMPTY       (),
+             .FULL        (),
+             .RDCOUNT     (),
+             .RDERR       (),
+             .SBITERR     (sb_ecc_error[(rdf_i+1) + rdf_i]),
+             .WRCOUNT     (),
+             .WRERR       (),
+             .DI          (fall_data_r[((64*(rdf_i+1)) + (rdf_i*8))-1:
+                                       (64*rdf_i)+(rdf_i*8)]),
+             .DIP         (fall_data_r[(72*(rdf_i+1))-1:
+                                       (64*(rdf_i+1))+ (8*rdf_i)]),
+             .RDCLK       (rb_clk),
+             .RDEN        (fifo_preread),
+             .RST         (rst_r | rb_rst),          // or can use rst0
+             .WRCLK       (clk0),
+             .WREN        (rden_r)
+             );
+      end
+    end else begin
+      for (rdf_i = 0; rdf_i < RDF_FIFO_NUM; rdf_i = rdf_i + 1) begin: gen_rdf
+
+        FIFO36_72  # // rise fifo
+          (
+           .ALMOST_EMPTY_OFFSET     (9'h007),
+           .ALMOST_FULL_OFFSET      (9'h00F),
+           .DO_REG                  (1),          // extra CC output delay
+           .EN_ECC_WRITE            ("FALSE"),
+           .EN_ECC_READ             ("FALSE"),
+           .EN_SYN                  (EN_SYN),
+           .FIRST_WORD_FALL_THROUGH ("FALSE")
+           )
+          u_rdf
+            (
+             .ALMOSTEMPTY (),
+             .ALMOSTFULL  (),
+             .DBITERR     (),
+             .DO          (rd_data_out_rise_temp[(64*(rdf_i+1))-1:
+                                                 (64 *rdf_i)]),
+             .DOP         (),
+             .ECCPARITY   (),
+             .EMPTY       (rdf_empty[rdf_i]),
+             .FULL        (w_rb_full[rdf_i]),
+             .RDCOUNT     (),
+             .RDERR       (),
+             .SBITERR     (),
+             .WRCOUNT     (),
+             .WRERR       (),
+             .DI          (rise_data_r[((64*(rdf_i+1)) + (rdf_i*8))-1:
+                                       (64 *rdf_i)+(rdf_i*8)]),
+             .DIP         (),
+             .RDCLK       (rb_clk),
+             .RDEN        (fifo_preread),
+             .RST         (rst_r | rb_rst),
+             .WRCLK       (clk0),
+             .WREN        (rden_r)
+             );
+
+        FIFO36_72  # // fall_fifo
+          (
+           .ALMOST_EMPTY_OFFSET     (9'h007),
+           .ALMOST_FULL_OFFSET      (9'h00F),
+           .DO_REG                  (1),          // extra CC output delay
+           .EN_ECC_WRITE            ("FALSE"),
+           .EN_ECC_READ             ("FALSE"),
+           .EN_SYN                  (EN_SYN),
+           .FIRST_WORD_FALL_THROUGH ("FALSE")
+           )
+          u_rdf1
+            (
+             .ALMOSTEMPTY (),
+             .ALMOSTFULL  (),
+             .DBITERR     (),
+             .DO          (rd_data_out_fall_temp[(64*(rdf_i+1))-1:
+                                                 (64 *rdf_i)]),
+             .DOP         (),
+             .ECCPARITY   (),
+             .EMPTY       (),
+             .FULL        (),
+             .RDCOUNT     (),
+             .RDERR       (),
+             .SBITERR     (),
+             .WRCOUNT     (),
+             .WRERR       (),
+             .DI          (fall_data_r[((64*(rdf_i+1)) + (rdf_i*8))-1:
+                                       (64*rdf_i)+(rdf_i*8)]),
+             .DIP         (),
+             .RDCLK       (rb_clk),
+             .RDEN        (fifo_preread),
+             .RST         (rst_r | rb_rst),          // or can use rst0
+             .WRCLK       (clk0),
+             .WREN        (rden_r)
+             );
+      end
+    end
+  endgenerate
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_usr_top.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_usr_top.v
new file mode 100644 (file)
index 0000000..22562a6
--- /dev/null
@@ -0,0 +1,218 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+// 
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a 
+// license to use this text/file solely for design, simulation, 
+// implementation and creation of design files limited 
+// to Xilinx devices or technologies. Use with non-Xilinx 
+// devices or technologies is expressly prohibited and 
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information 
+// "as-is" solely for use in developing programs and 
+// solutions for Xilinx devices, with no obligation on the 
+// part of Xilinx to provide support. By providing this design, 
+// code, or information as one possible implementation of 
+// this feature, application or standard, Xilinx is making no 
+// representation that this implementation is free from any 
+// claims of infringement. You are responsible for 
+// obtaining any rights you may require for your implementation. 
+// Xilinx expressly disclaims any warranty whatsoever with 
+// respect to the adequacy of the implementation, including 
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied 
+// warranties of merchantability or fitness for a particular 
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are 
+// done at the user�s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part 
+// of this text at all times. 
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_usr_top.v
+// /___/   /\     Date Last Modified: $Date: 2008/05/08 15:20:47 $
+// \   \  /  \    Date Created: Mon Aug 28 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR2
+//Purpose:
+//   This module interfaces with the user. The user should provide the data
+//   and  various commands.
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_usr_top #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference 
+   // board design). Actual values may be different. Actual parameters values 
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter BANK_WIDTH     = 2,
+   parameter CS_BITS        = 0,
+   parameter COL_WIDTH      = 10,
+   parameter DQ_WIDTH       = 72,
+   parameter DQ_PER_DQS     = 8,
+   parameter APPDATA_WIDTH  = 144,
+   parameter APPDATA_BURST_LEN = 2,
+   parameter APPDATA_BURST_BITS = 1,
+   parameter ECC_ENABLE     = 0,
+   parameter DQS_WIDTH      = 9,
+   parameter ROW_WIDTH      = 14,
+   parameter EN_SYN         = "FALSE"
+   )
+  (
+   input                                     clk0,
+   input                                     clk90,
+   input                                     rst0,
+   //added by xtan
+   input                                    af_clk,                    //address fifo clk
+   input                                    rb_clk,                    //read buffer clk
+   input                                    wb_clk,                    //write buffer clk
+   input                                    af_rst,                    //address fifo rst
+   input                                    rb_rst,                    //read buffer rst
+   input                                    wb_rst,                    //write buffer rst
+   output                                   rb_full,                   //read buffer is full
+   //end of add
+   input [DQ_WIDTH-1:0]                      rd_data_in_rise,
+   input [DQ_WIDTH-1:0]                      rd_data_in_fall,
+   input [DQS_WIDTH-1:0]                     phy_calib_rden,
+   input [DQS_WIDTH-1:0]                     phy_calib_rden_sel,
+   output                                    rd_data_valid,
+   input                                     rd_data_rden,
+   output [APPDATA_WIDTH-1:0]                rd_data_fifo_out,
+   input [2:0]                               app_af_cmd,
+   input [30:0]                              app_af_addr,
+   input                                     app_af_wren,
+   input                                     ctrl_af_rden,
+   output [2:0]                              af_cmd,
+   output [30:0]                             af_addr,
+   output                                    af_empty,
+   output                                    app_af_afull,
+   output [1:0]                              rd_ecc_error,
+   input                                     app_wdf_wren,
+   input [APPDATA_WIDTH-1:0]                 app_wdf_data,
+   input [(APPDATA_WIDTH/8)-1:0]             app_wdf_mask_data,
+   input                                     wdf_rden,
+   output                                    app_wdf_afull,
+   output [(2*DQ_WIDTH)-1:0]                 wdf_data,
+   output [((2*DQ_WIDTH)/8)-1:0]             wdf_mask_data,
+   output                                                                       wr_fifo_clear,
+   input                                     wr_fifo_burst,
+   output                                    rd_fifo_clear
+   );
+
+  wire [(APPDATA_WIDTH/2)-1:0] i_rd_data_fifo_out_fall;
+  wire [(APPDATA_WIDTH/2)-1:0] i_rd_data_fifo_out_rise;
+
+  //***************************************************************************
+
+  assign rd_data_fifo_out = {i_rd_data_fifo_out_fall,
+                             i_rd_data_fifo_out_rise};
+
+  // read data de-skew and ECC calculation
+  ddr2_usr_rd #
+    (
+     .DQ_PER_DQS    (DQ_PER_DQS),
+     .ECC_ENABLE    (ECC_ENABLE),
+     .APPDATA_WIDTH (APPDATA_WIDTH),
+     .APPDATA_BURST_LEN( APPDATA_BURST_LEN),
+     .APPDATA_BURST_BITS(APPDATA_BURST_BITS),
+     .DQS_WIDTH     (DQS_WIDTH),
+     .EN_SYN        (EN_SYN)
+     )
+     u_usr_rd
+      (
+       .clk0             (clk0),
+       .rst0             (rst0),
+       .rb_clk           (rb_clk),
+       .rb_rst           (rb_rst),
+       .rb_full          (rb_full),
+       .rd_data_in_rise  (rd_data_in_rise),
+       .rd_data_in_fall  (rd_data_in_fall),
+       .rd_ecc_error     (rd_ecc_error),
+       .ctrl_rden        (phy_calib_rden),
+       .ctrl_rden_sel    (phy_calib_rden_sel),
+       .rd_data_valid    (rd_data_valid),
+       .rd_data_rden     (rd_data_rden),
+       .rd_data_out_rise (i_rd_data_fifo_out_rise),
+       .rd_data_out_fall (i_rd_data_fifo_out_fall),
+       .rd_fifo_clear    (rd_fifo_clear)
+       );
+
+  // Command/Addres FIFO
+  ddr2_usr_addr_fifo #
+    (
+     .BANK_WIDTH (BANK_WIDTH),
+     .COL_WIDTH  (COL_WIDTH),
+     .CS_BITS    (CS_BITS),
+     .ROW_WIDTH  (ROW_WIDTH),
+     .EN_SYN     (EN_SYN)
+     )
+     u_usr_addr_fifo
+      (
+       .clk0         (clk0),
+       .af_clk       (af_clk),
+       .af_rst       (af_rst),
+       .rst0         (rst0),
+       .app_af_cmd   (app_af_cmd),
+       .app_af_addr  (app_af_addr),
+       .app_af_wren  (app_af_wren),
+       .ctrl_af_rden (ctrl_af_rden),
+       .af_cmd       (af_cmd),
+       .af_addr      (af_addr),
+       .af_empty     (af_empty),
+       .app_af_afull (app_af_afull)
+       );
+
+  ddr2_usr_wr #
+    (
+     .BANK_WIDTH    (BANK_WIDTH),
+     .COL_WIDTH     (COL_WIDTH),
+     .CS_BITS       (CS_BITS),
+     .DQ_WIDTH      (DQ_WIDTH),
+     .APPDATA_WIDTH (APPDATA_WIDTH),
+     .APPDATA_BURST_LEN(APPDATA_BURST_LEN),
+     .APPDATA_BURST_BITS(APPDATA_BURST_BITS),
+     .ECC_ENABLE    (ECC_ENABLE),
+     .ROW_WIDTH     (ROW_WIDTH)
+     )
+    u_usr_wr
+      (
+       .clk0              (clk0),
+       .wb_clk            (wb_clk),
+       .wb_rst            (wb_rst),
+       .clk90             (clk90),
+       .rst0              (rst0),
+       .app_wdf_wren      (app_wdf_wren),
+       .app_wdf_data      (app_wdf_data),
+       .app_wdf_mask_data (app_wdf_mask_data),
+       .wdf_rden          (wdf_rden),
+       .app_wdf_afull     (app_wdf_afull),
+       .wdf_data          (wdf_data),
+       .wdf_mask_data     (wdf_mask_data),
+       .wr_fifo_clear     (wr_fifo_clear),
+       .wr_fifo_burst     (wr_fifo_burst)
+       );
+
+endmodule
\ No newline at end of file
diff --git a/src/edu/berkeley/fleet/fpga/greg/ddr2_usr_wr.v b/src/edu/berkeley/fleet/fpga/greg/ddr2_usr_wr.v
new file mode 100644 (file)
index 0000000..6e72fed
--- /dev/null
@@ -0,0 +1,377 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+// 
+// This text/file contains proprietary, confidential
+// information of Xilinx, Inc., is distributed under license
+// from Xilinx, Inc., and may be used, copied and/or
+// disclosed only pursuant to the terms of a valid license
+// agreement with Xilinx, Inc. Xilinx hereby grants you a 
+// license to use this text/file solely for design, simulation, 
+// implementation and creation of design files limited 
+// to Xilinx devices or technologies. Use with non-Xilinx 
+// devices or technologies is expressly prohibited and 
+// immediately terminates your license unless covered by
+// a separate agreement.
+//
+// Xilinx is providing this design, code, or information 
+// "as-is" solely for use in developing programs and 
+// solutions for Xilinx devices, with no obligation on the 
+// part of Xilinx to provide support. By providing this design, 
+// code, or information as one possible implementation of 
+// this feature, application or standard, Xilinx is making no 
+// representation that this implementation is free from any 
+// claims of infringement. You are responsible for 
+// obtaining any rights you may require for your implementation. 
+// Xilinx expressly disclaims any warranty whatsoever with 
+// respect to the adequacy of the implementation, including 
+// but not limited to any warranties or representations that this
+// implementation is free from claims of infringement, implied 
+// warranties of merchantability or fitness for a particular 
+// purpose.
+//
+// Xilinx products are not intended for use in life support
+// appliances, devices, or systems. Use in such applications is
+// expressly prohibited.
+//
+// Any modifications that are made to the Source Code are 
+// done at the user�s sole risk and will be unsupported.
+//
+// Copyright (c) 2006-2007 Xilinx, Inc. All rights reserved.
+//
+// This copyright and support notice must be retained as part 
+// of this text at all times. 
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /    Vendor: Xilinx
+// \   \   \/     Version: 2.3
+//  \   \         Application: MIG
+//  /   /         Filename: ddr2_usr_wr.v
+// /___/   /\     Date Last Modified: $Date: 2008/05/08 15:20:47 $
+// \   \  /  \    Date Created: Mon Aug 28 2006
+//  \___\/\___\
+//
+//Device: Virtex-5
+//Design Name: DDR/DDR2
+//Purpose:
+//   This module instantiates the modules containing internal FIFOs
+//Reference:
+//Revision History:
+//*****************************************************************************
+
+`timescale 1ns/1ps
+
+module ddr2_usr_wr #
+  (
+   // Following parameters are for 72-bit RDIMM design (for ML561 Reference 
+   // board design). Actual values may be different. Actual parameters values 
+   // are passed from design top module ddr2_sdram module. Please refer to
+   // the ddr2_sdram module for actual values.
+   parameter BANK_WIDTH    = 2,
+   parameter COL_WIDTH     = 10,
+   parameter CS_BITS       = 0,
+   parameter DQ_WIDTH      = 72,
+   parameter APPDATA_WIDTH = 144,
+   parameter APPDATA_BURST_LEN = 2,
+   parameter APPDATA_BURST_BITS = 1,
+   parameter ECC_ENABLE    = 0,
+   parameter ROW_WIDTH     = 14
+   )
+  (
+   input                         clk0,                 //write buffer phy clock
+   input                         clk90,
+   input                         rst0,
+   // start of changes xtan & gdgib
+   input                        wb_clk,                //write buffer user clock
+   input                        wb_rst,                //write buffer user reset
+   //end of changes
+   // Write data FIFO interface
+   input                         app_wdf_wren,
+   input [APPDATA_WIDTH-1:0]     app_wdf_data,
+   input [(APPDATA_WIDTH/8)-1:0] app_wdf_mask_data,
+   input                         wdf_rden,
+   output                        app_wdf_afull,
+   output [(2*DQ_WIDTH)-1:0]     wdf_data,
+   output [((2*DQ_WIDTH)/8)-1:0] wdf_mask_data,
+   output                                                      wr_fifo_clear,
+   input                                                       wr_fifo_burst
+   );
+
+  // determine number of FIFO72's to use based on data width
+  // round up to next integer value when determining WDF_FIFO_NUM
+  localparam WDF_FIFO_NUM = (ECC_ENABLE) ? (APPDATA_WIDTH+63)/64 :
+             ((2*DQ_WIDTH)+63)/64;
+  // MASK_WIDTH = number of bytes in data bus
+  localparam MASK_WIDTH = DQ_WIDTH/8;
+
+  wire [WDF_FIFO_NUM-1:0]      i_wdf_afull;
+  wire [DQ_WIDTH-1:0]          i_wdf_data_fall_in;
+  wire [DQ_WIDTH-1:0]          i_wdf_data_fall_out;
+  wire [(64*WDF_FIFO_NUM)-1:0] i_wdf_data_in;
+  wire [(64*WDF_FIFO_NUM)-1:0] i_wdf_data_out;
+  wire [DQ_WIDTH-1:0]          i_wdf_data_rise_in;
+  wire [DQ_WIDTH-1:0]          i_wdf_data_rise_out;
+  wire [MASK_WIDTH-1:0]        i_wdf_mask_data_fall_in;
+  wire [MASK_WIDTH-1:0]        i_wdf_mask_data_fall_out;
+  wire [(8*WDF_FIFO_NUM)-1:0]  i_wdf_mask_data_in;
+  wire [(8*WDF_FIFO_NUM)-1:0]  i_wdf_mask_data_out;
+  wire [MASK_WIDTH-1:0]        i_wdf_mask_data_rise_in;
+  wire [MASK_WIDTH-1:0]        i_wdf_mask_data_rise_out;
+  reg                          rst_r;
+
+  // ECC signals
+  wire [(2*DQ_WIDTH)-1:0]      i_wdf_data_out_ecc;
+  wire [((2*DQ_WIDTH)/8)-1:0]  i_wdf_mask_data_out_ecc;
+  wire [63:0]                  i_wdf_mask_data_out_ecc_wire;
+  wire [((2*DQ_WIDTH)/8)-1:0]  mask_data_in_ecc;
+  wire [63:0]                  mask_data_in_ecc_wire;
+  
+  //***************************************************************************
+  
+  reg  [APPDATA_BURST_BITS-1:0] blfifo_writecount;
+  wire                                                 blfifo_writeterminal;
+  wire                                                 blfifo_write;
+  wire                                                 blfifo_empty, blfifo_full;
+  
+  // Doesn't NEED to be async for now, but will need to be if we add async data writes....
+  asyncfifo_dmem_1b blfifo (
+       .din(1'b0), 
+       .rd_clk(clk0),
+       .rd_en(wr_fifo_burst),
+       .rst(rst_r | wb_rst),
+       .wr_clk(wb_clk),
+       .wr_en(blfifo_write),
+       .dout(), 
+       .empty(blfifo_empty),
+       .full(blfifo_full));
+       assign wr_fifo_clear = ~blfifo_empty;
+       
+       always @ (posedge wb_clk) begin
+               if (wb_rst) blfifo_writecount <= 0;
+               else if (app_wdf_wren & ~blfifo_full) begin
+                       if (blfifo_writeterminal) blfifo_writecount <= 0;
+                       else blfifo_writecount <= blfifo_writecount + 1;
+               end
+       end
+       
+       assign  blfifo_writeterminal = (blfifo_writecount == (APPDATA_BURST_LEN - 1));
+       assign  blfifo_write = blfifo_writeterminal & app_wdf_wren & ~blfifo_full;
+
+  //***************************************************************************
+
+  assign app_wdf_afull = i_wdf_afull[0] | blfifo_full;
+
+  always @(posedge clk0 )
+      rst_r <= rst0;
+
+  genvar wdf_di_i;
+  genvar wdf_do_i;
+  genvar mask_i;
+  genvar wdf_i;
+  generate
+    if(ECC_ENABLE) begin    // ECC code
+
+      assign wdf_data = i_wdf_data_out_ecc;
+
+      // the byte 9 dm is always held to 0
+      assign wdf_mask_data = i_wdf_mask_data_out_ecc;
+
+
+
+      // generate for write data fifo .
+      for (wdf_i = 0; wdf_i < WDF_FIFO_NUM; wdf_i = wdf_i + 1) begin: gen_wdf
+
+        FIFO36_72  #
+          (
+           .ALMOST_EMPTY_OFFSET     (9'h007),
+           .ALMOST_FULL_OFFSET      (9'h00F),
+           .DO_REG                  (1),          // extra CC output delay
+           .EN_ECC_WRITE            ("TRUE"),
+           .EN_ECC_READ             ("FALSE"),
+           .EN_SYN                  ("FALSE"),
+           .FIRST_WORD_FALL_THROUGH ("FALSE")
+           )
+          u_wdf_ecc
+            (
+             .ALMOSTEMPTY (),
+             .ALMOSTFULL  (i_wdf_afull[wdf_i]),
+             .DBITERR     (),
+             .DO          (i_wdf_data_out_ecc[((64*(wdf_i+1))+(wdf_i *8))-1:
+                                              (64*wdf_i)+(wdf_i *8)]),
+             .DOP         (i_wdf_data_out_ecc[(72*(wdf_i+1))-1:
+                                              (64*(wdf_i+1))+ (8*wdf_i) ]),
+             .ECCPARITY   (),
+             .EMPTY       (),
+             .FULL        (),
+             .RDCOUNT     (),
+             .RDERR       (),
+             .SBITERR     (),
+             .WRCOUNT     (),
+             .WRERR       (),
+             .DI          (app_wdf_data[(64*(wdf_i+1))-1:
+                                        (64*wdf_i)]),
+             .DIP         (),
+             .RDCLK       (clk90),
+             .RDEN        (wdf_rden),
+             .RST         (rst_r | wb_rst),          // or can use rst0
+             .WRCLK       (wb_clk),        // xtan: clk0 -> wb_clk
+             .WREN        (app_wdf_wren & ~blfifo_full)
+             );
+      end
+
+      // remapping the mask data. The mask data from user i/f does not have
+      // the mask for the ECC byte. Assigning 0 to the ECC mask byte.
+      for (mask_i = 0; mask_i < (DQ_WIDTH)/36;
+           mask_i = mask_i +1) begin: gen_mask
+        assign mask_data_in_ecc[((8*(mask_i+1))+ mask_i)-1:((8*mask_i)+mask_i)]
+                 = app_wdf_mask_data[(8*(mask_i+1))-1:8*(mask_i)] ;
+        assign mask_data_in_ecc[((8*(mask_i+1))+mask_i)] = 1'd0;
+      end
+
+      // assign ecc bits to temp variables to avoid
+      // sim warnings. Not all the 64 bits of the fifo
+      // are used in ECC mode.
+       assign  mask_data_in_ecc_wire[((2*DQ_WIDTH)/8)-1:0] = mask_data_in_ecc;
+       assign  mask_data_in_ecc_wire[63:((2*DQ_WIDTH)/8)]  =
+              {(64-((2*DQ_WIDTH)/8)){1'b0}};
+       assign i_wdf_mask_data_out_ecc =
+               i_wdf_mask_data_out_ecc_wire[((2*DQ_WIDTH)/8)-1:0];
+
+
+      FIFO36_72  #
+        (
+         .ALMOST_EMPTY_OFFSET     (9'h007),
+         .ALMOST_FULL_OFFSET      (9'h00F),
+         .DO_REG                  (1),          // extra CC output delay
+         .EN_ECC_WRITE            ("TRUE"),
+         .EN_ECC_READ             ("FALSE"),
+         .EN_SYN                  ("FALSE"),
+         .FIRST_WORD_FALL_THROUGH ("FALSE")
+         )
+        u_wdf_ecc_mask
+          (
+           .ALMOSTEMPTY (),
+           .ALMOSTFULL  (),
+           .DBITERR     (),
+           .DO          (i_wdf_mask_data_out_ecc_wire),
+           .DOP         (),
+           .ECCPARITY   (),
+           .EMPTY       (),
+           .FULL        (),
+           .RDCOUNT     (),
+           .RDERR       (),
+           .SBITERR     (),
+           .WRCOUNT     (),
+           .WRERR       (),
+           .DI          (mask_data_in_ecc_wire),
+           .DIP         (),
+           .RDCLK       (clk90),
+           .RDEN        (wdf_rden),
+           .RST         (rst_r | wb_rst),          // or can use rst0
+           .WRCLK       (wb_clk),        // xtan: clk0->wb_clk
+           .WREN        (app_wdf_wren & ~blfifo_full)
+           );
+    end else begin
+
+      //***********************************************************************
+
+      // Define intermediate buses:
+      assign i_wdf_data_rise_in
+        = app_wdf_data[DQ_WIDTH-1:0];
+      assign i_wdf_data_fall_in
+        = app_wdf_data[(2*DQ_WIDTH)-1:DQ_WIDTH];
+      assign i_wdf_mask_data_rise_in
+        = app_wdf_mask_data[MASK_WIDTH-1:0];
+      assign i_wdf_mask_data_fall_in
+        = app_wdf_mask_data[(2*MASK_WIDTH)-1:MASK_WIDTH];
+
+      //***********************************************************************
+      // Write data FIFO Input:
+      // Arrange DQ's so that the rise data and fall data are interleaved.
+      // the data arrives at the input of the wdf fifo as {fall,rise}.
+      // It is remapped as:
+      //     {...fall[15:8],rise[15:8],fall[7:0],rise[7:0]}
+      // This is done to avoid having separate fifo's for rise and fall data
+      // and to keep rise/fall data for the same DQ's on same FIFO
+      // Data masks are interleaved in a similar manner
+      // NOTE: Initialization data from PHY_INIT module does not need to be
+      //  interleaved - it's already in the correct format - and the same
+      //  initialization pattern from PHY_INIT is sent to all write FIFOs
+      //***********************************************************************
+
+      for (wdf_di_i = 0; wdf_di_i < MASK_WIDTH;
+           wdf_di_i = wdf_di_i + 1) begin: gen_wdf_data_in
+        assign i_wdf_data_in[(16*wdf_di_i)+15:(16*wdf_di_i)]
+                 = {i_wdf_data_fall_in[(8*wdf_di_i)+7:(8*wdf_di_i)],
+                    i_wdf_data_rise_in[(8*wdf_di_i)+7:(8*wdf_di_i)]};
+        assign i_wdf_mask_data_in[(2*wdf_di_i)+1:(2*wdf_di_i)]
+                 = {i_wdf_mask_data_fall_in[wdf_di_i],
+                    i_wdf_mask_data_rise_in[wdf_di_i]};
+      end
+
+      //***********************************************************************
+      // Write data FIFO Output:
+      // FIFO DQ and mask outputs must be untangled and put in the standard
+      // format of {fall,rise}. Same goes for mask output
+      //***********************************************************************
+
+      for (wdf_do_i = 0; wdf_do_i < MASK_WIDTH;
+           wdf_do_i = wdf_do_i + 1) begin: gen_wdf_data_out
+        assign i_wdf_data_rise_out[(8*wdf_do_i)+7:(8*wdf_do_i)]
+                 = i_wdf_data_out[(16*wdf_do_i)+7:(16*wdf_do_i)];
+        assign i_wdf_data_fall_out[(8*wdf_do_i)+7:(8*wdf_do_i)]
+                 = i_wdf_data_out[(16*wdf_do_i)+15:(16*wdf_do_i)+8];
+        assign i_wdf_mask_data_rise_out[wdf_do_i]
+                 = i_wdf_mask_data_out[2*wdf_do_i];
+        assign i_wdf_mask_data_fall_out[wdf_do_i]
+                 = i_wdf_mask_data_out[(2*wdf_do_i)+1];
+      end
+
+      assign wdf_data = {i_wdf_data_fall_out,
+                         i_wdf_data_rise_out};
+
+      assign wdf_mask_data = {i_wdf_mask_data_fall_out,
+                              i_wdf_mask_data_rise_out};
+
+      //***********************************************************************
+
+      for (wdf_i = 0; wdf_i < WDF_FIFO_NUM; wdf_i = wdf_i + 1) begin: gen_wdf
+
+        FIFO36_72  #
+          (
+           .ALMOST_EMPTY_OFFSET     (9'h007),
+           .ALMOST_FULL_OFFSET      (9'h00F),
+           .DO_REG                  (1),          // extra CC output delay
+           .EN_ECC_WRITE            ("FALSE"),
+           .EN_ECC_READ             ("FALSE"),
+           .EN_SYN                  ("FALSE"),
+           .FIRST_WORD_FALL_THROUGH ("FALSE")
+           )
+          u_wdf
+            (
+             .ALMOSTEMPTY (),
+             .ALMOSTFULL  (i_wdf_afull[wdf_i]),
+             .DBITERR     (),
+             .DO          (i_wdf_data_out[(64*(wdf_i+1))-1:64*wdf_i]),
+             .DOP         (i_wdf_mask_data_out[(8*(wdf_i+1))-1:8*wdf_i]),
+             .ECCPARITY   (),
+             .EMPTY       (),
+             .FULL        (),
+             .RDCOUNT     (),
+             .RDERR       (),
+             .SBITERR     (),
+             .WRCOUNT     (),
+             .WRERR       (),
+             .DI          (i_wdf_data_in[(64*(wdf_i+1))-1:64*wdf_i]),
+             .DIP         (i_wdf_mask_data_in[(8*(wdf_i+1))-1:8*wdf_i]),
+             .RDCLK       (clk90),
+             .RDEN        (wdf_rden),
+             .RST         (rst_r | wb_rst),          // or can use rst0
+             .WRCLK       (wb_clk),        // xtan: clk0 -> wb_clk
+             .WREN        (app_wdf_wren & ~blfifo_full)
+             );
+      end
+    end
+  endgenerate
+
+endmodule
diff --git a/src/edu/berkeley/fleet/fpga/main-ml410.ucf b/src/edu/berkeley/fleet/fpga/main-ml410.ucf
new file mode 100644 (file)
index 0000000..cd14662
--- /dev/null
@@ -0,0 +1,223 @@
+## Clock, Reset ##############################################################################
+
+Net clk_pin LOC=J16;
+Net clk_pin IOSTANDARD = LVCMOS25;
+
+Net rst_pin LOC=H7;
+Net rst_pin PULLUP;
+Net rst_pin IOSTANDARD = LVCMOS33;
+
+Net clk_pin TNM_NET = clk_pin;
+TIMESPEC TS_clk_pin = PERIOD clk_pin 10 ns HIGH 50%;
+
+Net clk_unbuffered TNM_NET = clk_unbuffered;
+TIMESPEC TS_clk_unbuffered = PERIOD clk_unbuffered 20 ns;
+
+Net rst_pin TIG;
+
+## UART ##############################################################################
+
+#Net uart_cts LOC=G6;
+#Net uart_cts IOSTANDARD = LVCMOS33;
+#Net uart_cts TIG;
+
+#Net uart_rts LOC=F6;
+#Net uart_rts IOSTANDARD = LVCMOS33;
+#Net uart_rts TIG;
+
+Net uart_in LOC=E6;
+Net uart_in IOSTANDARD = LVCMOS33;
+Net uart_in TIG;
+Net uart_in PULLUP;
+
+Net uart_out LOC=D6;
+Net uart_out IOSTANDARD = LVCMOS33;
+Net uart_out TIG;
+Net uart_out PULLUP;
+
+## VGA ##############################################################################
+
+net "vga_hsync" loc   = f9;
+net "vga_hsync" slew  = slow;
+net "vga_hsync" drive = 2;
+
+net "vga_vsync" loc   = h10;
+net "vga_vsync" slew  = slow;
+net "vga_vsync" drive = 2;
+
+net "vga_clkout"  loc ="c12";
+net "vga_clkout"  slew = fast;
+net "vga_clkout"  drive = 8;
+
+net "vga_r<7>" loc ="h8";
+net "vga_r<6>" loc ="c5";
+net "vga_r<5>" loc ="h9";
+net "vga_r<4>" loc ="g12";
+net "vga_r<3>" loc ="g11";
+net "vga_r<2>" loc ="g10";
+net "vga_r<1>" loc ="f11";
+net "vga_r<0>" loc ="f10";
+net "vga_r<*>" slew = slow;
+net "vga_r<*>" drive = 2;
+
+net "vga_g<7>" loc ="d5";
+net "vga_g<6>" loc ="d4";
+net "vga_g<5>" loc ="f8";
+net "vga_g<4>" loc ="e13";
+net "vga_g<3>" loc ="e12";
+net "vga_g<2>" loc ="e11";
+net "vga_g<1>" loc ="e9";
+net "vga_g<0>" loc ="e8";
+net "vga_g<*>" slew = slow;
+net "vga_g<*>" drive = 2;
+
+net "vga_b<7>" loc ="c4";
+net "vga_b<6>" loc ="c3";
+net "vga_b<5>" loc ="d12";
+net "vga_b<4>" loc ="d11";
+net "vga_b<3>" loc ="d10";
+net "vga_b<2>" loc ="d9";
+net "vga_b<1>" loc ="c13";
+net "vga_b<0>" loc ="g8";
+net "vga_b<*>" slew = slow;
+net "vga_b<*>" drive = 2;
+
+net "vga_*" iostandard = lvcmos33;
+
+## DRAM ##############################################################################
+
+NET "clk_pin"               TNM="SYS_CLK";
+#NET "*/*/clkgen/write_clk_u"    TNM="WRITE_CLK";
+#NET "*/*/clkgen/write_clk90_u"  TNM="WRITE_CLK";
+#NET "*/*/clkgen/read_clk_u"     TNM="READ_CLK";
+#TIMESPEC "TS_SYS_DDRREAD"=FROM "SYS_CLK" TO "WRITE_CLK" TIG;
+#TIMESPEC "TS_DDRREAD_SYS"=FROM "WRITE_CLK" TO "SYS_CLK" TIG;
+#TIMESPEC "TS_SYS_DDRWRITE"=FROM "SYS_CLK" TO "READ_CLK" TIG;
+#TIMESPEC "TS_DDRWRITE_SYS"=FROM "READ_CLK" TO "SYS_CLK" TIG;
+#TIMESPEC "TS_DDRREAD_DDRWRITE"=FROM "READ_CLK" TO "WRITE_CLK" TIG;
+#TIMESPEC "TS_DDRWRITE_DDRREAD"=FROM "WRITE_CLK" TO "READ_CLK" TIG;
+
+Net ddr1_Addr_pin<12> LOC=J24;
+Net ddr1_Addr_pin<12> IOSTANDARD = SSTL2_I;
+Net ddr1_Addr_pin<11> LOC=K26;
+Net ddr1_Addr_pin<11> IOSTANDARD = SSTL2_I;
+Net ddr1_Addr_pin<10> LOC=K24;
+Net ddr1_Addr_pin<10> IOSTANDARD = SSTL2_I;
+Net ddr1_Addr_pin<9> LOC=K23;
+Net ddr1_Addr_pin<9> IOSTANDARD = SSTL2_I;
+Net ddr1_Addr_pin<8> LOC=L26;
+Net ddr1_Addr_pin<8> IOSTANDARD = SSTL2_I;
+Net ddr1_Addr_pin<7> LOC=L25;
+Net ddr1_Addr_pin<7> IOSTANDARD = SSTL2_I;
+Net ddr1_Addr_pin<6> LOC=L24;
+Net ddr1_Addr_pin<6> IOSTANDARD = SSTL2_I;
+Net ddr1_Addr_pin<5> LOC=M23;
+Net ddr1_Addr_pin<5> IOSTANDARD = SSTL2_I;
+Net ddr1_Addr_pin<4> LOC=N24;
+Net ddr1_Addr_pin<4> IOSTANDARD = SSTL2_I;
+Net ddr1_Addr_pin<3> LOC=N23;
+Net ddr1_Addr_pin<3> IOSTANDARD = SSTL2_I;
+Net ddr1_Addr_pin<2> LOC=N22;
+Net ddr1_Addr_pin<2> IOSTANDARD = SSTL2_I;
+Net ddr1_Addr_pin<1> LOC=P22;
+Net ddr1_Addr_pin<1> IOSTANDARD = SSTL2_I;
+Net ddr1_Addr_pin<0> LOC=P24;
+Net ddr1_Addr_pin<0> IOSTANDARD = SSTL2_I;
+Net ddr1_BankAddr_pin<1> LOC=J26;
+Net ddr1_BankAddr_pin<1> IOSTANDARD = SSTL2_I;
+Net ddr1_BankAddr_pin<0> LOC=J25;
+Net ddr1_BankAddr_pin<0> IOSTANDARD = SSTL2_I;
+Net ddr1_CAS_n_pin LOC=D26;
+Net ddr1_CAS_n_pin IOSTANDARD = SSTL2_I;
+Net ddr1_CE_pin LOC=H14;
+Net ddr1_CE_pin IOSTANDARD = SSTL2_I;
+Net ddr1_CS_n_pin LOC=C27;
+Net ddr1_CS_n_pin IOSTANDARD = SSTL2_I;
+Net ddr1_RAS_n_pin LOC=D27;
+Net ddr1_RAS_n_pin IOSTANDARD = SSTL2_I;
+Net ddr1_WE_n_pin LOC=E27;
+Net ddr1_WE_n_pin IOSTANDARD = SSTL2_I;
+Net ddr1_DM_pin<0> LOC=F21;
+Net ddr1_DM_pin<0> IOSTANDARD = SSTL2_II;
+Net ddr1_DM_pin<1> LOC=G22;
+Net ddr1_DM_pin<1> IOSTANDARD = SSTL2_II;
+Net ddr1_DM_pin<2> LOC=E23;
+Net ddr1_DM_pin<2> IOSTANDARD = SSTL2_II;
+Net ddr1_DM_pin<3> LOC=G23;
+Net ddr1_DM_pin<3> IOSTANDARD = SSTL2_II;
+Net ddr1_DQS<0> LOC=F20;
+Net ddr1_DQS<0> IOSTANDARD = SSTL2_II;
+Net ddr1_DQS<1> LOC=G20;
+Net ddr1_DQS<1> IOSTANDARD = SSTL2_II;
+Net ddr1_DQS<2> LOC=G25;
+Net ddr1_DQS<2> IOSTANDARD = SSTL2_II;
+Net ddr1_DQS<3> LOC=F25;
+Net ddr1_DQS<3> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<0> LOC=E17;
+Net ddr1_DQ<0> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<1> LOC=E18;
+Net ddr1_DQ<1> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<2> LOC=F18;
+Net ddr1_DQ<2> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<3> LOC=G18;
+Net ddr1_DQ<3> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<4> LOC=F19;
+Net ddr1_DQ<4> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<5> LOC=E19;
+Net ddr1_DQ<5> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<6> LOC=D21;
+Net ddr1_DQ<6> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<7> LOC=E21;
+Net ddr1_DQ<7> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<8> LOC=G21;
+Net ddr1_DQ<8> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<9> LOC=H20;
+Net ddr1_DQ<9> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<10> LOC=J20;
+Net ddr1_DQ<10> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<11> LOC=J21;
+Net ddr1_DQ<11> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<12> LOC=K21;
+Net ddr1_DQ<12> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<13> LOC=L21;
+Net ddr1_DQ<13> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<14> LOC=J22;
+Net ddr1_DQ<14> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<15> LOC=H22;
+Net ddr1_DQ<15> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<16> LOC=C22;
+Net ddr1_DQ<16> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<17> LOC=C23;
+Net ddr1_DQ<17> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<18> LOC=C24;
+Net ddr1_DQ<18> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<19> LOC=C25;
+Net ddr1_DQ<19> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<20> LOC=D22;
+Net ddr1_DQ<20> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<21> LOC=D24;
+Net ddr1_DQ<21> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<22> LOC=D25;
+Net ddr1_DQ<22> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<23> LOC=C28;
+Net ddr1_DQ<23> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<24> LOC=F23;
+Net ddr1_DQ<24> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<25> LOC=F24;
+Net ddr1_DQ<25> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<26> LOC=F26;
+Net ddr1_DQ<26> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<27> LOC=G26;
+Net ddr1_DQ<27> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<28> LOC=H25;
+Net ddr1_DQ<28> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<29> LOC=H24;
+Net ddr1_DQ<29> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<30> LOC=E24;
+Net ddr1_DQ<30> IOSTANDARD = SSTL2_II;
+Net ddr1_DQ<31> LOC=E22;
+Net ddr1_DQ<31> IOSTANDARD = SSTL2_II;
+Net ddr1_Clk_pin LOC=F28;
+Net ddr1_Clk_pin IOSTANDARD = SSTL2_II;
+Net ddr1_Clk_n_pin LOC=E28;
+Net ddr1_Clk_n_pin IOSTANDARD = SSTL2_II;
diff --git a/src/edu/berkeley/fleet/fpga/main-ml50x.ucf b/src/edu/berkeley/fleet/fpga/main-ml50x.ucf
new file mode 100644 (file)
index 0000000..b1c4a03
--- /dev/null
@@ -0,0 +1,223 @@
+## Clock, Reset ##############################################################################
+
+Net clk_pin LOC=AH15;
+#Net clk_pin IOSTANDARD = LVCMOS25;
+
+#Net rst_pin LOC=H7;
+#Net rst_pin PULLUP;
+#Net rst_pin IOSTANDARD = LVCMOS33;
+
+Net clk_pin TNM_NET = clk_pin;
+TIMESPEC TS_clk_pin = PERIOD clk_pin 10 ns HIGH 50%;
+
+Net clk_unbuffered TNM_NET = clk_unbuffered;
+TIMESPEC TS_clk_unbuffered = PERIOD clk_unbuffered 20 ns;
+
+Net rst_pin TIG;
+
+## UART ##############################################################################
+
+#Net uart_cts LOC=G6;
+#Net uart_cts IOSTANDARD = LVCMOS33;
+#Net uart_cts TIG;
+
+#Net uart_rts LOC=F6;
+#Net uart_rts IOSTANDARD = LVCMOS33;
+#Net uart_rts TIG;
+
+Net uart_in LOC=AG15;
+#Net uart_in IOSTANDARD = LVCMOS33;
+Net uart_in TIG;
+Net uart_in PULLUP;
+
+Net uart_out LOC=AG20;
+#Net uart_out IOSTANDARD = LVCMOS33;
+Net uart_out TIG;
+Net uart_out PULLUP;
+
+## VGA ##############################################################################
+
+#net "vga_hsync" loc   = f9;
+#net "vga_hsync" slew  = slow;
+#net "vga_hsync" drive = 2;
+#
+#net "vga_vsync" loc   = h10;
+#net "vga_vsync" slew  = slow;
+#net "vga_vsync" drive = 2;
+#
+#net "vga_clkout"  loc ="c12";
+#net "vga_clkout"  slew = fast;
+#net "vga_clkout"  drive = 8;
+#
+#net "vga_r<7>" loc ="h8";
+#net "vga_r<6>" loc ="c5";
+#net "vga_r<5>" loc ="h9";
+#net "vga_r<4>" loc ="g12";
+#net "vga_r<3>" loc ="g11";
+#net "vga_r<2>" loc ="g10";
+#net "vga_r<1>" loc ="f11";
+#net "vga_r<0>" loc ="f10";
+#net "vga_r<*>" slew = slow;
+#net "vga_r<*>" drive = 2;
+#
+#net "vga_g<7>" loc ="d5";
+#net "vga_g<6>" loc ="d4";
+#net "vga_g<5>" loc ="f8";
+#net "vga_g<4>" loc ="e13";
+#net "vga_g<3>" loc ="e12";
+#net "vga_g<2>" loc ="e11";
+#net "vga_g<1>" loc ="e9";
+#net "vga_g<0>" loc ="e8";
+#net "vga_g<*>" slew = slow;
+#net "vga_g<*>" drive = 2;
+#
+#net "vga_b<7>" loc ="c4";
+#net "vga_b<6>" loc ="c3";
+#net "vga_b<5>" loc ="d12";
+#net "vga_b<4>" loc ="d11";
+#net "vga_b<3>" loc ="d10";
+#net "vga_b<2>" loc ="d9";
+#net "vga_b<1>" loc ="c13";
+#net "vga_b<0>" loc ="g8";
+#net "vga_b<*>" slew = slow;
+#net "vga_b<*>" drive = 2;
+#
+#net "vga_*" iostandard = lvcmos33;
+#
+### DRAM ##############################################################################
+
+NET "clk_pin"               TNM="SYS_CLK";
+#NET "*/*/clkgen/write_clk_u"    TNM="WRITE_CLK";
+#NET "*/*/clkgen/write_clk90_u"  TNM="WRITE_CLK";
+#NET "*/*/clkgen/read_clk_u"     TNM="READ_CLK";
+#TIMESPEC "TS_SYS_DDRREAD"=FROM "SYS_CLK" TO "WRITE_CLK" TIG;
+#TIMESPEC "TS_DDRREAD_SYS"=FROM "WRITE_CLK" TO "SYS_CLK" TIG;
+#TIMESPEC "TS_SYS_DDRWRITE"=FROM "SYS_CLK" TO "READ_CLK" TIG;
+#TIMESPEC "TS_DDRWRITE_SYS"=FROM "READ_CLK" TO "SYS_CLK" TIG;
+#TIMESPEC "TS_DDRREAD_DDRWRITE"=FROM "READ_CLK" TO "WRITE_CLK" TIG;
+#TIMESPEC "TS_DDRWRITE_DDRREAD"=FROM "WRITE_CLK" TO "READ_CLK" TIG;
+
+#Net ddr1_Addr_pin<12> LOC=J24;
+#Net ddr1_Addr_pin<12> IOSTANDARD = SSTL2_I;
+#Net ddr1_Addr_pin<11> LOC=K26;
+#Net ddr1_Addr_pin<11> IOSTANDARD = SSTL2_I;
+#Net ddr1_Addr_pin<10> LOC=K24;
+#Net ddr1_Addr_pin<10> IOSTANDARD = SSTL2_I;
+#Net ddr1_Addr_pin<9> LOC=K23;
+#Net ddr1_Addr_pin<9> IOSTANDARD = SSTL2_I;
+#Net ddr1_Addr_pin<8> LOC=L26;
+#Net ddr1_Addr_pin<8> IOSTANDARD = SSTL2_I;
+#Net ddr1_Addr_pin<7> LOC=L25;
+#Net ddr1_Addr_pin<7> IOSTANDARD = SSTL2_I;
+#Net ddr1_Addr_pin<6> LOC=L24;
+#Net ddr1_Addr_pin<6> IOSTANDARD = SSTL2_I;
+#Net ddr1_Addr_pin<5> LOC=M23;
+#Net ddr1_Addr_pin<5> IOSTANDARD = SSTL2_I;
+#Net ddr1_Addr_pin<4> LOC=N24;
+#Net ddr1_Addr_pin<4> IOSTANDARD = SSTL2_I;
+#Net ddr1_Addr_pin<3> LOC=N23;
+#Net ddr1_Addr_pin<3> IOSTANDARD = SSTL2_I;
+#Net ddr1_Addr_pin<2> LOC=N22;
+#Net ddr1_Addr_pin<2> IOSTANDARD = SSTL2_I;
+#Net ddr1_Addr_pin<1> LOC=P22;
+#Net ddr1_Addr_pin<1> IOSTANDARD = SSTL2_I;
+#Net ddr1_Addr_pin<0> LOC=P24;
+#Net ddr1_Addr_pin<0> IOSTANDARD = SSTL2_I;
+#Net ddr1_BankAddr_pin<1> LOC=J26;
+#Net ddr1_BankAddr_pin<1> IOSTANDARD = SSTL2_I;
+#Net ddr1_BankAddr_pin<0> LOC=J25;
+#Net ddr1_BankAddr_pin<0> IOSTANDARD = SSTL2_I;
+#Net ddr1_CAS_n_pin LOC=D26;
+#Net ddr1_CAS_n_pin IOSTANDARD = SSTL2_I;
+#Net ddr1_CE_pin LOC=H14;
+#Net ddr1_CE_pin IOSTANDARD = SSTL2_I;
+#Net ddr1_CS_n_pin LOC=C27;
+#Net ddr1_CS_n_pin IOSTANDARD = SSTL2_I;
+#Net ddr1_RAS_n_pin LOC=D27;
+#Net ddr1_RAS_n_pin IOSTANDARD = SSTL2_I;
+#Net ddr1_WE_n_pin LOC=E27;
+#Net ddr1_WE_n_pin IOSTANDARD = SSTL2_I;
+#Net ddr1_DM_pin<0> LOC=F21;
+#Net ddr1_DM_pin<0> IOSTANDARD = SSTL2_II;
+#Net ddr1_DM_pin<1> LOC=G22;
+#Net ddr1_DM_pin<1> IOSTANDARD = SSTL2_II;
+#Net ddr1_DM_pin<2> LOC=E23;
+#Net ddr1_DM_pin<2> IOSTANDARD = SSTL2_II;
+#Net ddr1_DM_pin<3> LOC=G23;
+#Net ddr1_DM_pin<3> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQS<0> LOC=F20;
+#Net ddr1_DQS<0> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQS<1> LOC=G20;
+#Net ddr1_DQS<1> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQS<2> LOC=G25;
+#Net ddr1_DQS<2> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQS<3> LOC=F25;
+#Net ddr1_DQS<3> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<0> LOC=E17;
+#Net ddr1_DQ<0> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<1> LOC=E18;
+#Net ddr1_DQ<1> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<2> LOC=F18;
+#Net ddr1_DQ<2> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<3> LOC=G18;
+#Net ddr1_DQ<3> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<4> LOC=F19;
+#Net ddr1_DQ<4> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<5> LOC=E19;
+#Net ddr1_DQ<5> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<6> LOC=D21;
+#Net ddr1_DQ<6> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<7> LOC=E21;
+#Net ddr1_DQ<7> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<8> LOC=G21;
+#Net ddr1_DQ<8> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<9> LOC=H20;
+#Net ddr1_DQ<9> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<10> LOC=J20;
+#Net ddr1_DQ<10> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<11> LOC=J21;
+#Net ddr1_DQ<11> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<12> LOC=K21;
+#Net ddr1_DQ<12> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<13> LOC=L21;
+#Net ddr1_DQ<13> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<14> LOC=J22;
+#Net ddr1_DQ<14> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<15> LOC=H22;
+#Net ddr1_DQ<15> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<16> LOC=C22;
+#Net ddr1_DQ<16> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<17> LOC=C23;
+#Net ddr1_DQ<17> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<18> LOC=C24;
+#Net ddr1_DQ<18> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<19> LOC=C25;
+#Net ddr1_DQ<19> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<20> LOC=D22;
+#Net ddr1_DQ<20> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<21> LOC=D24;
+#Net ddr1_DQ<21> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<22> LOC=D25;
+#Net ddr1_DQ<22> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<23> LOC=C28;
+#Net ddr1_DQ<23> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<24> LOC=F23;
+#Net ddr1_DQ<24> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<25> LOC=F24;
+#Net ddr1_DQ<25> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<26> LOC=F26;
+#Net ddr1_DQ<26> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<27> LOC=G26;
+#Net ddr1_DQ<27> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<28> LOC=H25;
+#Net ddr1_DQ<28> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<29> LOC=H24;
+#Net ddr1_DQ<29> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<30> LOC=E24;
+#Net ddr1_DQ<30> IOSTANDARD = SSTL2_II;
+#Net ddr1_DQ<31> LOC=E22;
+#Net ddr1_DQ<31> IOSTANDARD = SSTL2_II;
+#Net ddr1_Clk_pin LOC=F28;
+#Net ddr1_Clk_pin IOSTANDARD = SSTL2_II;
+#Net ddr1_Clk_n_pin LOC=E28;
+#Net ddr1_Clk_n_pin IOSTANDARD = SSTL2_II;
index cd14662..24fbf68 100644 (file)
+Net DDR2_CAS_B LOC=E31;  Net DDR2_CAS_B IOSTANDARD="SSTL18_II";
+
+Net DDR2_CKE<1> LOC=U30; Net DDR2_CKE<1> IOSTANDARD="SSTL18_II";
+Net DDR2_CKE<0> LOC=T28; Net DDR2_CKE<0> IOSTANDARD="SSTL18_II";
+
+Net DDR2_RAS_B LOC=H30;  Net DDR2_RAS_B IOSTANDARD="SSTL18_II";
+
+Net DDR2_WE_B LOC=K29;   Net DDR2_WE_B IOSTANDARD="SSTL18_II";
+
+Net DDR2_ODT<1> LOC=F30; Net DDR2_ODT<1> IOSTANDARD="SSTL18_II";
+Net DDR2_ODT<0> LOC=F31; Net DDR2_ODT<0> IOSTANDARD="SSTL18_II";
+
+Net DDR2_CS0_B<1> LOC=J29; Net DDR2_CS0_B<1> IOSTANDARD="SSTL18_II";
+Net DDR2_CS0_B<0> LOC=L29; Net DDR2_CS0_B<0> IOSTANDARD="SSTL18_II";
+
+Net DDR2_CLK_N<1> LOC=F28; Net DDR2_CLK_N<1> IOSTANDARD="DIFF_SSTL18_II";
+Net DDR2_CLK_N<0> LOC=AJ29; Net DDR2_CLK_N<0> IOSTANDARD="DIFF_SSTL18_II";
+
+Net DDR2_CLK_P<1> LOC=E28; Net DDR2_CLK_P<1> IOSTANDARD="DIFF_SSTL18_II";
+Net DDR2_CLK_P<0> LOC=AK29; Net DDR2_CLK_P<0> IOSTANDARD="DIFF_SSTL18_II";
+
+Net DDR2_A<13> LOC=H29; Net DDR2_A<13> IOSTANDARD="SSTL18_II";
+Net DDR2_A<12> LOC=T31; Net DDR2_A<12> IOSTANDARD="SSTL18_II";
+Net DDR2_A<11> LOC=R29; Net DDR2_A<11> IOSTANDARD="SSTL18_II";
+Net DDR2_A<10> LOC=J31; Net DDR2_A<10> IOSTANDARD="SSTL18_II";
+Net DDR2_A<9> LOC=R28; Net DDR2_A<9> IOSTANDARD="SSTL18_II";
+Net DDR2_A<8> LOC=M31; Net DDR2_A<8> IOSTANDARD="SSTL18_II";
+Net DDR2_A<7> LOC=P30; Net DDR2_A<7> IOSTANDARD="SSTL18_II";
+Net DDR2_A<6> LOC=P31; Net DDR2_A<6> IOSTANDARD="SSTL18_II";
+Net DDR2_A<5> LOC=L31; Net DDR2_A<5> IOSTANDARD="SSTL18_II";
+Net DDR2_A<4> LOC=K31; Net DDR2_A<4> IOSTANDARD="SSTL18_II";
+Net DDR2_A<3> LOC=P29; Net DDR2_A<3> IOSTANDARD="SSTL18_II";
+Net DDR2_A<2> LOC=N29; Net DDR2_A<2> IOSTANDARD="SSTL18_II";
+Net DDR2_A<1> LOC=M30; Net DDR2_A<1> IOSTANDARD="SSTL18_II";
+Net DDR2_A<0> LOC=L30; Net DDR2_A<0> IOSTANDARD="SSTL18_II";
+
+Net DDR2_BA<2> LOC=R31; Net DDR2_BA<2> IOSTANDARD="SSTL18_II";
+Net DDR2_BA<1> LOC=J30; Net DDR2_BA<1> IOSTANDARD="SSTL18_II";
+Net DDR2_BA<0> LOC=G31; Net DDR2_BA<0> IOSTANDARD="SSTL18_II";
+
+Net DDR2_DQ<63> LOC=L24; Net DDR2_DQ<63> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<62> LOC=L25; Net DDR2_DQ<62> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<61> LOC=M25; Net DDR2_DQ<61> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<60> LOC=J27; Net DDR2_DQ<60> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<59> LOC=L26; Net DDR2_DQ<59> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<58> LOC=J24; Net DDR2_DQ<58> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<57> LOC=M26; Net DDR2_DQ<57> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<56> LOC=G25; Net DDR2_DQ<56> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<55> LOC=G26; Net DDR2_DQ<55> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<54> LOC=H24; Net DDR2_DQ<54> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<53> LOC=K28; Net DDR2_DQ<53> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<52> LOC=K27; Net DDR2_DQ<52> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<51> LOC=H25; Net DDR2_DQ<51> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<50> LOC=F25; Net DDR2_DQ<50> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<49> LOC=L28; Net DDR2_DQ<49> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<48> LOC=M28; Net DDR2_DQ<48> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<47> LOC=N28; Net DDR2_DQ<47> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<46> LOC=P27; Net DDR2_DQ<46> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<45> LOC=N25; Net DDR2_DQ<45> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<44> LOC=T24; Net DDR2_DQ<44> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<43> LOC=P26; Net DDR2_DQ<43> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<42> LOC=N24; Net DDR2_DQ<42> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<41> LOC=P25; Net DDR2_DQ<41> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<40> LOC=R24; Net DDR2_DQ<40> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<39> LOC=V24; Net DDR2_DQ<39> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<38> LOC=W26; Net DDR2_DQ<38> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<37> LOC=W25; Net DDR2_DQ<37> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<36> LOC=V28; Net DDR2_DQ<36> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<35> LOC=W24; Net DDR2_DQ<35> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<34> LOC=Y26; Net DDR2_DQ<34> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<33> LOC=Y27; Net DDR2_DQ<33> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<32> LOC=V29; Net DDR2_DQ<32> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<31> LOC=W27; Net DDR2_DQ<31> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<30> LOC=V27; Net DDR2_DQ<30> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<29> LOC=W29; Net DDR2_DQ<29> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<28> LOC=AC30; Net DDR2_DQ<28> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<27> LOC=V30; Net DDR2_DQ<27> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<26> LOC=W31; Net DDR2_DQ<26> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<25> LOC=AB30; Net DDR2_DQ<25> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<24> LOC=AC29; Net DDR2_DQ<24> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<23> LOC=AA25; Net DDR2_DQ<23> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<22> LOC=AB27; Net DDR2_DQ<22> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<21> LOC=AA24; Net DDR2_DQ<21> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<20> LOC=AB26; Net DDR2_DQ<20> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<19> LOC=AA26; Net DDR2_DQ<19> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<18> LOC=AC27; Net DDR2_DQ<18> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<17> LOC=AB25; Net DDR2_DQ<17> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<16> LOC=AC28; Net DDR2_DQ<16> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<15> LOC=AB28; Net DDR2_DQ<15> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<14> LOC=AG28; Net DDR2_DQ<14> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<13> LOC=AJ26; Net DDR2_DQ<13> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<12> LOC=AG25; Net DDR2_DQ<12> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<11> LOC=AA28; Net DDR2_DQ<11> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<10> LOC=AH28; Net DDR2_DQ<10> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<9> LOC=AF28; Net DDR2_DQ<9> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<8> LOC=AH27; Net DDR2_DQ<8> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<7> LOC=AE29; Net DDR2_DQ<7> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<6> LOC=AD29; Net DDR2_DQ<6> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<5> LOC=AF29; Net DDR2_DQ<5> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<4> LOC=AJ30; Net DDR2_DQ<4> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<3> LOC=AD30; Net DDR2_DQ<3> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<2> LOC=AF31; Net DDR2_DQ<2> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<1> LOC=AK31; Net DDR2_DQ<1> IOSTANDARD="SSTL18_II_DCI";
+Net DDR2_DQ<0> LOC=AF30; Net DDR2_DQ<0> IOSTANDARD="SSTL18_II_DCI";
+
+Net DDR2_DM<7> LOC=J25; Net DDR2_DM<7> IOSTANDARD="SSTL18_II";
+Net DDR2_DM<6> LOC=F26; Net DDR2_DM<6> IOSTANDARD="SSTL18_II";
+Net DDR2_DM<5> LOC=P24; Net DDR2_DM<5> IOSTANDARD="SSTL18_II";
+Net DDR2_DM<4> LOC=V25; Net DDR2_DM<4> IOSTANDARD="SSTL18_II";
+Net DDR2_DM<3> LOC=Y31; Net DDR2_DM<3> IOSTANDARD="SSTL18_II";
+Net DDR2_DM<2> LOC=Y24; Net DDR2_DM<2> IOSTANDARD="SSTL18_II";
+Net DDR2_DM<1> LOC=AE28; Net DDR2_DM<1> IOSTANDARD="SSTL18_II";
+Net DDR2_DM<0> LOC=AJ31; Net DDR2_DM<0> IOSTANDARD="SSTL18_II";
+
+Net DDR2_DQS_N<7> LOC=H27; Net DDR2_DQS_N<7> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_N<6> LOC=G28; Net DDR2_DQS_N<6> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_N<5> LOC=E27; Net DDR2_DQS_N<5> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_N<4> LOC=Y29; Net DDR2_DQS_N<4> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_N<3> LOC=AA31; Net DDR2_DQS_N<3> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_N<2> LOC=AJ27; Net DDR2_DQS_N<2> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_N<1> LOC=AK27; Net DDR2_DQS_N<1> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_N<0> LOC=AA30; Net DDR2_DQS_N<0> IOSTANDARD="DIFF_SSTL18_II_DCI";
+
+Net DDR2_DQS_P<7> LOC=G27; Net DDR2_DQS_P<7> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_P<6> LOC=H28; Net DDR2_DQS_P<6> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_P<5> LOC=E26; Net DDR2_DQS_P<5> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_P<4> LOC=Y28; Net DDR2_DQS_P<4> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_P<3> LOC=AB31; Net DDR2_DQS_P<3> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_P<2> LOC=AK26; Net DDR2_DQS_P<2> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_P<1> LOC=AK28; Net DDR2_DQS_P<1> IOSTANDARD="DIFF_SSTL18_II_DCI";
+Net DDR2_DQS_P<0> LOC=AA29; Net DDR2_DQS_P<0> IOSTANDARD="DIFF_SSTL18_II_DCI";
+
+Net I2C_DDR2_SCL LOC=E29;
+
+Net I2C_DDR2_SDA LOC=F29;
+
+Net CLKBUF_Q1_N LOC=J19;
+#Net CLKBUF_Q1_N PERIOD="200 Mhz";
+Net CLKBUF_Q1_P LOC=K18;
+#Net CLKBUF_Q1_P PERIOD="200 Mhz";
+
+
 ## Clock, Reset ##############################################################################
 
-Net clk_pin LOC=J16;
-Net clk_pin IOSTANDARD = LVCMOS25;
+Net clk_pin LOC=AH17;
 
-Net rst_pin LOC=H7;
+Net rst_pin LOC=E9;
 Net rst_pin PULLUP;
-Net rst_pin IOSTANDARD = LVCMOS33;
 
 Net clk_pin TNM_NET = clk_pin;
-TIMESPEC TS_clk_pin = PERIOD clk_pin 10 ns HIGH 50%;
+TIMESPEC TS_clk_pin = PERIOD clk_pin 30 ns HIGH 50%;  # 33Mhz
 
-Net clk_unbuffered TNM_NET = clk_unbuffered;
-TIMESPEC TS_clk_unbuffered = PERIOD clk_unbuffered 20 ns;
+#Net clk_unbuffered TNM_NET = clk_unbuffered;
+#TIMESPEC TS_clk_unbuffered = PERIOD clk_unbuffered 20 ns;
 
 Net rst_pin TIG;
 
@@ -25,66 +165,67 @@ Net rst_pin TIG;
 #Net uart_rts IOSTANDARD = LVCMOS33;
 #Net uart_rts TIG;
 
-Net uart_in LOC=E6;
-Net uart_in IOSTANDARD = LVCMOS33;
+Net uart_in LOC=AG15;
+#Net uart_in IOSTANDARD = LVCMOS33;
 Net uart_in TIG;
 Net uart_in PULLUP;
 
-Net uart_out LOC=D6;
-Net uart_out IOSTANDARD = LVCMOS33;
+Net uart_out LOC=AG20;
+#Net uart_out IOSTANDARD = LVCMOS33;
 Net uart_out TIG;
 Net uart_out PULLUP;
 
 ## VGA ##############################################################################
 
-net "vga_hsync" loc   = f9;
-net "vga_hsync" slew  = slow;
-net "vga_hsync" drive = 2;
-
-net "vga_vsync" loc   = h10;
-net "vga_vsync" slew  = slow;
-net "vga_vsync" drive = 2;
-
-net "vga_clkout"  loc ="c12";
-net "vga_clkout"  slew = fast;
-net "vga_clkout"  drive = 8;
-
-net "vga_r<7>" loc ="h8";
-net "vga_r<6>" loc ="c5";
-net "vga_r<5>" loc ="h9";
-net "vga_r<4>" loc ="g12";
-net "vga_r<3>" loc ="g11";
-net "vga_r<2>" loc ="g10";
-net "vga_r<1>" loc ="f11";
-net "vga_r<0>" loc ="f10";
-net "vga_r<*>" slew = slow;
-net "vga_r<*>" drive = 2;
-
-net "vga_g<7>" loc ="d5";
-net "vga_g<6>" loc ="d4";
-net "vga_g<5>" loc ="f8";
-net "vga_g<4>" loc ="e13";
-net "vga_g<3>" loc ="e12";
-net "vga_g<2>" loc ="e11";
-net "vga_g<1>" loc ="e9";
-net "vga_g<0>" loc ="e8";
-net "vga_g<*>" slew = slow;
-net "vga_g<*>" drive = 2;
-
-net "vga_b<7>" loc ="c4";
-net "vga_b<6>" loc ="c3";
-net "vga_b<5>" loc ="d12";
-net "vga_b<4>" loc ="d11";
-net "vga_b<3>" loc ="d10";
-net "vga_b<2>" loc ="d9";
-net "vga_b<1>" loc ="c13";
-net "vga_b<0>" loc ="g8";
-net "vga_b<*>" slew = slow;
-net "vga_b<*>" drive = 2;
-
-net "vga_*" iostandard = lvcmos33;
-
-## DRAM ##############################################################################
+#net "vga_hsync" loc   = f9;
+#net "vga_hsync" slew  = slow;
+#net "vga_hsync" drive = 2;
+#
+#net "vga_vsync" loc   = h10;
+#net "vga_vsync" slew  = slow;
+#net "vga_vsync" drive = 2;
+#
+#net "vga_clkout"  loc ="c12";
+#net "vga_clkout"  slew = fast;
+#net "vga_clkout"  drive = 8;
+#
+#net "vga_r<7>" loc ="h8";
+#net "vga_r<6>" loc ="c5";
+#net "vga_r<5>" loc ="h9";
+#net "vga_r<4>" loc ="g12";
+#net "vga_r<3>" loc ="g11";
+#net "vga_r<2>" loc ="g10";
+#net "vga_r<1>" loc ="f11";
+#net "vga_r<0>" loc ="f10";
+#net "vga_r<*>" slew = slow;
+#net "vga_r<*>" drive = 2;
+#
+#net "vga_g<7>" loc ="d5";
+#net "vga_g<6>" loc ="d4";
+#net "vga_g<5>" loc ="f8";
+#net "vga_g<4>" loc ="e13";
+#net "vga_g<3>" loc ="e12";
+#net "vga_g<2>" loc ="e11";
+#net "vga_g<1>" loc ="e9";
+#net "vga_g<0>" loc ="e8";
+#net "vga_g<*>" slew = slow;
+#net "vga_g<*>" drive = 2;
+#
+#net "vga_b<7>" loc ="c4";
+#net "vga_b<6>" loc ="c3";
+#net "vga_b<5>" loc ="d12";
+#net "vga_b<4>" loc ="d11";
+#net "vga_b<3>" loc ="d10";
+#net "vga_b<2>" loc ="d9";
+#net "vga_b<1>" loc ="c13";
+#net "vga_b<0>" loc ="g8";
+#net "vga_b<*>" slew = slow;
+#net "vga_b<*>" drive = 2;
+#
+#net "vga_*" iostandard = lvcmos33;
+#
+
+### DRAM ##############################################################################
 
 NET "clk_pin"               TNM="SYS_CLK";
 #NET "*/*/clkgen/write_clk_u"    TNM="WRITE_CLK";
@@ -97,127 +238,302 @@ NET "clk_pin"               TNM="SYS_CLK";
 #TIMESPEC "TS_DDRREAD_DDRWRITE"=FROM "READ_CLK" TO "WRITE_CLK" TIG;
 #TIMESPEC "TS_DDRWRITE_DDRREAD"=FROM "WRITE_CLK" TO "READ_CLK" TIG;
 
-Net ddr1_Addr_pin<12> LOC=J24;
-Net ddr1_Addr_pin<12> IOSTANDARD = SSTL2_I;
-Net ddr1_Addr_pin<11> LOC=K26;
-Net ddr1_Addr_pin<11> IOSTANDARD = SSTL2_I;
-Net ddr1_Addr_pin<10> LOC=K24;
-Net ddr1_Addr_pin<10> IOSTANDARD = SSTL2_I;
-Net ddr1_Addr_pin<9> LOC=K23;
-Net ddr1_Addr_pin<9> IOSTANDARD = SSTL2_I;
-Net ddr1_Addr_pin<8> LOC=L26;
-Net ddr1_Addr_pin<8> IOSTANDARD = SSTL2_I;
-Net ddr1_Addr_pin<7> LOC=L25;
-Net ddr1_Addr_pin<7> IOSTANDARD = SSTL2_I;
-Net ddr1_Addr_pin<6> LOC=L24;
-Net ddr1_Addr_pin<6> IOSTANDARD = SSTL2_I;
-Net ddr1_Addr_pin<5> LOC=M23;
-Net ddr1_Addr_pin<5> IOSTANDARD = SSTL2_I;
-Net ddr1_Addr_pin<4> LOC=N24;
-Net ddr1_Addr_pin<4> IOSTANDARD = SSTL2_I;
-Net ddr1_Addr_pin<3> LOC=N23;
-Net ddr1_Addr_pin<3> IOSTANDARD = SSTL2_I;
-Net ddr1_Addr_pin<2> LOC=N22;
-Net ddr1_Addr_pin<2> IOSTANDARD = SSTL2_I;
-Net ddr1_Addr_pin<1> LOC=P22;
-Net ddr1_Addr_pin<1> IOSTANDARD = SSTL2_I;
-Net ddr1_Addr_pin<0> LOC=P24;
-Net ddr1_Addr_pin<0> IOSTANDARD = SSTL2_I;
-Net ddr1_BankAddr_pin<1> LOC=J26;
-Net ddr1_BankAddr_pin<1> IOSTANDARD = SSTL2_I;
-Net ddr1_BankAddr_pin<0> LOC=J25;
-Net ddr1_BankAddr_pin<0> IOSTANDARD = SSTL2_I;
-Net ddr1_CAS_n_pin LOC=D26;
-Net ddr1_CAS_n_pin IOSTANDARD = SSTL2_I;
-Net ddr1_CE_pin LOC=H14;
-Net ddr1_CE_pin IOSTANDARD = SSTL2_I;
-Net ddr1_CS_n_pin LOC=C27;
-Net ddr1_CS_n_pin IOSTANDARD = SSTL2_I;
-Net ddr1_RAS_n_pin LOC=D27;
-Net ddr1_RAS_n_pin IOSTANDARD = SSTL2_I;
-Net ddr1_WE_n_pin LOC=E27;
-Net ddr1_WE_n_pin IOSTANDARD = SSTL2_I;
-Net ddr1_DM_pin<0> LOC=F21;
-Net ddr1_DM_pin<0> IOSTANDARD = SSTL2_II;
-Net ddr1_DM_pin<1> LOC=G22;
-Net ddr1_DM_pin<1> IOSTANDARD = SSTL2_II;
-Net ddr1_DM_pin<2> LOC=E23;
-Net ddr1_DM_pin<2> IOSTANDARD = SSTL2_II;
-Net ddr1_DM_pin<3> LOC=G23;
-Net ddr1_DM_pin<3> IOSTANDARD = SSTL2_II;
-Net ddr1_DQS<0> LOC=F20;
-Net ddr1_DQS<0> IOSTANDARD = SSTL2_II;
-Net ddr1_DQS<1> LOC=G20;
-Net ddr1_DQS<1> IOSTANDARD = SSTL2_II;
-Net ddr1_DQS<2> LOC=G25;
-Net ddr1_DQS<2> IOSTANDARD = SSTL2_II;
-Net ddr1_DQS<3> LOC=F25;
-Net ddr1_DQS<3> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<0> LOC=E17;
-Net ddr1_DQ<0> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<1> LOC=E18;
-Net ddr1_DQ<1> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<2> LOC=F18;
-Net ddr1_DQ<2> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<3> LOC=G18;
-Net ddr1_DQ<3> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<4> LOC=F19;
-Net ddr1_DQ<4> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<5> LOC=E19;
-Net ddr1_DQ<5> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<6> LOC=D21;
-Net ddr1_DQ<6> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<7> LOC=E21;
-Net ddr1_DQ<7> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<8> LOC=G21;
-Net ddr1_DQ<8> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<9> LOC=H20;
-Net ddr1_DQ<9> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<10> LOC=J20;
-Net ddr1_DQ<10> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<11> LOC=J21;
-Net ddr1_DQ<11> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<12> LOC=K21;
-Net ddr1_DQ<12> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<13> LOC=L21;
-Net ddr1_DQ<13> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<14> LOC=J22;
-Net ddr1_DQ<14> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<15> LOC=H22;
-Net ddr1_DQ<15> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<16> LOC=C22;
-Net ddr1_DQ<16> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<17> LOC=C23;
-Net ddr1_DQ<17> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<18> LOC=C24;
-Net ddr1_DQ<18> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<19> LOC=C25;
-Net ddr1_DQ<19> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<20> LOC=D22;
-Net ddr1_DQ<20> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<21> LOC=D24;
-Net ddr1_DQ<21> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<22> LOC=D25;
-Net ddr1_DQ<22> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<23> LOC=C28;
-Net ddr1_DQ<23> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<24> LOC=F23;
-Net ddr1_DQ<24> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<25> LOC=F24;
-Net ddr1_DQ<25> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<26> LOC=F26;
-Net ddr1_DQ<26> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<27> LOC=G26;
-Net ddr1_DQ<27> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<28> LOC=H25;
-Net ddr1_DQ<28> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<29> LOC=H24;
-Net ddr1_DQ<29> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<30> LOC=E24;
-Net ddr1_DQ<30> IOSTANDARD = SSTL2_II;
-Net ddr1_DQ<31> LOC=E22;
-Net ddr1_DQ<31> IOSTANDARD = SSTL2_II;
-Net ddr1_Clk_pin LOC=F28;
-Net ddr1_Clk_pin IOSTANDARD = SSTL2_II;
-Net ddr1_Clk_n_pin LOC=E28;
-Net ddr1_Clk_n_pin IOSTANDARD = SSTL2_II;
+
+##==============================================================================
+##     File:           $URL: svn+ssh://repositorypub@repository.eecs.berkeley.edu/public/Projects/GateLib/branches/dev/Firmware/DRAM/Hardware/DDR2SDRAM/Constraints/DDR2SDRAM_ML505_110.ucf $
+##     Version:        $Revision: 16601 $
+##     Author:         Greg Gibeling (http://www.eecs.berkeley.edu/~gdgib/)
+##     Copyright:      Copyright 2005-2008 UC Berkeley
+##==============================================================================
+
+##==============================================================================
+##     Section:        License
+##==============================================================================
+##     Copyright (c) 2005-2008, Regents of the University of California
+##     All rights reserved.
+##
+##     Redistribution and use in source and binary forms, with or without modification,
+##     are permitted provided that the following conditions are met:
+##
+##             - Redistributions of source code must retain the above copyright notice,
+##                     this list of conditions and the following disclaimer.
+##             - Redistributions in binary form must reproduce the above copyright
+##                     notice, this list of conditions and the following disclaimer
+##                     in the documentation and/or other materials provided with the
+##                     distribution.
+##             - Neither the name of the University of California, Berkeley nor the
+##                     names of its contributors may be used to endorse or promote
+##                     products derived from this software without specific prior
+##                     written permission.
+##
+##     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+##     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+##     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+##     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+##     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+##     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+##     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+##     ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+##     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+##     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+##==============================================================================
+
+##==============================================================================
+##     Xilinx, Inc. 2006            www.xilinx.com 
+##     Mon Jul 7 12:05:40 2008
+##     Generated by MIG Version 2.2
+##==============================================================================
+##     File name:      ddr2_sdram.ucf
+##     Details:        Constraints file
+##                             FPGA family:            virtex5
+##                             FPGA:                           xc5vlx110t-ff1136
+##                             Speedgrade:                     -1
+##                             Design Entry:           VERILOG
+##                             Frequency:                      266.66 MHz
+##                             Design:                         without Test bench
+##                             DCM Used:                       Disable
+##                             Two Bytes per Bank: Disable
+##                             Compatible FPGA's: xc5vlx85t-ff1136,xc5vlx50t-ff1136,xc5vlx155t-ff1136,xc5vfx70t-ff1136,xc5vfx100t-ff1136,xc5vsx50t-ff1136,xc5vsx95t-ff1136
+##                             No.Of Controllers:      1
+##==============================================================================
+
+##------------------------------------------------------------------------------
+##     Section:        Pin Constraints (use FPGA_TOP instead)
+##------------------------------------------------------------------------------
+#NET  "ddr2_dq[*]"                               IOSTANDARD = SSTL18_II_DCI;
+#NET  "ddr2_a[*]"                                IOSTANDARD = SSTL18_II;
+#NET  "ddr2_ba[*]"                               IOSTANDARD = SSTL18_II;
+#NET  "ddr2_ras_n"                               IOSTANDARD = SSTL18_II;
+#NET  "ddr2_cas_n"                               IOSTANDARD = SSTL18_II;
+#NET  "ddr2_we_n"                                IOSTANDARD = SSTL18_II;
+#NET  "ddr2_cs_n[*]"                             IOSTANDARD = SSTL18_II;
+#NET  "ddr2_odt[*]"                              IOSTANDARD = SSTL18_II;
+#NET  "ddr2_cke[*]"                              IOSTANDARD = SSTL18_II;
+#NET  "ddr2_dm[*]"                               IOSTANDARD = SSTL18_II;
+#NET  "ddr2_dqs[*]"                              IOSTANDARD = DIFF_SSTL18_II_DCI;
+#NET  "ddr2_dqs_n[*]"                            IOSTANDARD = DIFF_SSTL18_II_DCI;
+#NET  "ddr2_ck[*]"                               IOSTANDARD = DIFF_SSTL18_II;
+#NET  "ddr2_ck_n[*]"                             IOSTANDARD = DIFF_SSTL18_II;
+##------------------------------------------------------------------------------
+
+##------------------------------------------------------------------------------
+##     Section:        Clock Constraints
+##------------------------------------------------------------------------------
+NET "**/Clock" TNM_NET = "SYS_Clock";
+TIMESPEC "TS_SYS_Clock" = PERIOD "SYS_Clock" 5 ns HIGH 50 %;
+
+NET "**/ClockP90" TNM_NET = "SYS_ClockP90";
+TIMESPEC "TS_SYS_ClockP90" = PERIOD "SYS_ClockP90" 5 ns HIGH 50 %;
+
+NET "**/ClockD2" TNM_NET = "SYS_ClockD2";
+TIMESPEC "TS_SYS_ClockD2" = PERIOD "SYS_ClockD2" 7.5 ns HIGH 50 %;
+
+#NET "*/ClockF200" TNM_NET = "SYS_ClockF200";
+#TIMESPEC "TS_SYS_ClockF200" = PERIOD "SYS_ClockF200" 5 ns HIGH 50 %;
+##------------------------------------------------------------------------------
+
+##==============================================================================
+##     Section:        Controller 0
+##     Desc:           Memory Device:  DDR2_SDRAM->SODIMMs->MT4HTF3264HY-53E
+##                             Data Width:             64
+##                             Data Mask:              1
+##==============================================================================
+
+##------------------------------------------------------------------------------
+##     Section:        IDELAYCTRL Location Constraints
+##------------------------------------------------------------------------------
+INST "**/*IDELAYCTRL_INST[0].u_idelayctrl" LOC=IDELAYCTRL_X0Y2;
+INST "**/*IDELAYCTRL_INST[1].u_idelayctrl" LOC=IDELAYCTRL_X0Y1;
+INST "**/*IDELAYCTRL_INST[2].u_idelayctrl" LOC=IDELAYCTRL_X0Y6;
+##------------------------------------------------------------------------------
+
+##------------------------------------------------------------------------------
+##     Section:        Multicycle Paths
+##     Desc:           Define multicycle paths - these paths may take longer because
+##                             additional time allowed for logic to settle in
+##                             calibration/initialization FSM
+##------------------------------------------------------------------------------
+# MIG 2.1: Eliminate Timegroup definitions for CLK0, and CLK90. Instead trace
+#          multicycle paths from originating flip-flop to ANY destination
+#          flip-flop (or in some cases, it can also be a BRAM)
+# MUX Select for either rising/falling CLK0 for 2nd stage read capture
+INST "**/u_phy_calib/*gen_rd_data_sel*.u_ff_rd_data_sel" TNM = "TNM_RD_DATA_SEL";
+TIMESPEC "TS_MC_RD_DATA_SEL" = FROM "TNM_RD_DATA_SEL" TO FFS "TS_SYS_Clock" * 4;
+
+# MUX select for read data - optional delay on data to account for byte skews
+#INST "*/u_usr_rd/*gen_rden_sel_mux*.u_ff_rden_sel_mux" TNM = "TNM_RDEN_SEL_MUX";
+#TIMESPEC "TS_MC_RDEN_SEL_MUX" = FROM "TNM_RDEN_SEL_MUX" TO FFS "TS_SYS_Clock" * 4;
+
+# Calibration/Initialization complete status flag (for PHY logic only) - can
+# be used to drive both flip-flops and BRAMs
+INST "*/u_phy_init/u_ff_phy_init_data_sel" TNM = "TNM_PHY_INIT_DATA_SEL";
+TIMESPEC "TS_MC_PHY_INIT_DATA_SEL_0" = FROM "TNM_PHY_INIT_DATA_SEL" TO FFS "TS_SYS_Clock" * 4;
+TIMESPEC "TS_MC_PHY_INIT_DATA_SEL_90" = FROM "TNM_PHY_INIT_DATA_SEL" TO RAMS "TS_SYS_Clock" * 4;
+
+# Select (address) bits for SRL32 shift registers used in stage3/stage4
+# calibration
+INST "**/u_phy_calib/*gen_gate_dly*.u_ff_gate_dly" TNM = "TNM_GATE_DLY";
+TIMESPEC "TS_MC_GATE_DLY" = FROM "TNM_GATE_DLY" TO FFS "TS_SYS_Clock" * 4;
+INST "**/u_phy_calib/*gen_rden_dly*.u_ff_rden_dly" TNM = "TNM_RDEN_DLY";
+TIMESPEC "TS_MC_RDEN_DLY" = FROM "TNM_RDEN_DLY" TO FFS "TS_SYS_Clock" * 4;
+INST "**/u_phy_calib/*gen_cal_rden_dly*.u_ff_cal_rden_dly" TNM = "TNM_CAL_RDEN_DLY";
+TIMESPEC "TS_MC_CAL_RDEN_DLY" = FROM "TNM_CAL_RDEN_DLY" TO FFS "TS_SYS_Clock" * 4;
+##------------------------------------------------------------------------------
+
+##------------------------------------------------------------------------------
+##     Section:        DQS Squelch Constraints
+##     Desc:           DQS Read Post amble Glitch Squelch circuit related constraints
+##                             LOC placement of DQS-squelch related IDDR and IDELAY elements
+##                             Each circuit can be located at any of the following locations:
+##                                     1. Unused "N"-side of DQS differential pair I/O
+##                                     2. DM data mask (output only, input side is free for use)
+##                                     3. Any output-only site
+##------------------------------------------------------------------------------
+INST "**/*gen_dqs[0].u_iob_dqs/u_iddr_dq_ce"  LOC = "ILOGIC_X0Y96";
+INST "**/*gen_dqs[0].u_iob_dqs/u_iodelay_dq_ce"  LOC = "IODELAY_X0Y96";
+INST "**/*gen_dqs[1].u_iob_dqs/u_iddr_dq_ce"  LOC = "ILOGIC_X0Y58";
+INST "**/*gen_dqs[1].u_iob_dqs/u_iodelay_dq_ce"  LOC = "IODELAY_X0Y58";
+INST "**/*gen_dqs[2].u_iob_dqs/u_iddr_dq_ce"  LOC = "ILOGIC_X0Y62";
+INST "**/*gen_dqs[2].u_iob_dqs/u_iodelay_dq_ce"  LOC = "IODELAY_X0Y62";
+INST "**/*gen_dqs[3].u_iob_dqs/u_iddr_dq_ce"  LOC = "ILOGIC_X0Y100";
+INST "**/*gen_dqs[3].u_iob_dqs/u_iodelay_dq_ce"  LOC = "IODELAY_X0Y100";
+INST "**/*gen_dqs[4].u_iob_dqs/u_iddr_dq_ce"  LOC = "ILOGIC_X0Y102";
+INST "**/*gen_dqs[4].u_iob_dqs/u_iodelay_dq_ce"  LOC = "IODELAY_X0Y102";
+INST "**/*gen_dqs[5].u_iob_dqs/u_iddr_dq_ce"  LOC = "ILOGIC_X0Y256";
+INST "**/*gen_dqs[5].u_iob_dqs/u_iodelay_dq_ce"  LOC = "IODELAY_X0Y256";
+INST "**/*gen_dqs[6].u_iob_dqs/u_iddr_dq_ce"  LOC = "ILOGIC_X0Y260";
+INST "**/*gen_dqs[6].u_iob_dqs/u_iodelay_dq_ce"  LOC = "IODELAY_X0Y260";
+INST "**/*gen_dqs[7].u_iob_dqs/u_iddr_dq_ce"  LOC = "ILOGIC_X0Y262";
+INST "**/*gen_dqs[7].u_iob_dqs/u_iodelay_dq_ce"  LOC = "IODELAY_X0Y262";
+##------------------------------------------------------------------------------
+
+##------------------------------------------------------------------------------
+##     Section:        DQS CE LOCs
+##     Desc:           LOC and timing constraints for flop driving DQS CE enable signal
+##                             from fabric logic. Even though the absolute delay on this path
+##                             is calibrated out (when synchronizing this output to DQS), the
+##                             delay should still be kept as low as possible to reduce
+##                             post-calibration voltage/temp variations - these are roughly
+##                             proportional to the absolute delay of the path
+##------------------------------------------------------------------------------
+INST "**/u_phy_calib/*gen_gate[0].u_en_dqs_ff"  LOC = SLICE_X0Y48;
+INST "**/u_phy_calib/*gen_gate[1].u_en_dqs_ff"  LOC = SLICE_X0Y29;
+INST "**/u_phy_calib/*gen_gate[2].u_en_dqs_ff"  LOC = SLICE_X0Y31;
+INST "**/u_phy_calib/*gen_gate[3].u_en_dqs_ff"  LOC = SLICE_X0Y50;
+INST "**/u_phy_calib/*gen_gate[4].u_en_dqs_ff"  LOC = SLICE_X0Y51;
+INST "**/u_phy_calib/*gen_gate[5].u_en_dqs_ff"  LOC = SLICE_X0Y128;
+INST "**/u_phy_calib/*gen_gate[6].u_en_dqs_ff"  LOC = SLICE_X0Y130;
+INST "**/u_phy_calib/*gen_gate[7].u_en_dqs_ff"  LOC = SLICE_X0Y131;
+##------------------------------------------------------------------------------
+
+##------------------------------------------------------------------------------
+##     Section:        DQS Gate Control
+##     Desc:           Control for DQS gate - from fabric flop. Prevent "runaway"
+##                             delay - two parts to this path: (1) from fabric flop to IDELAY,
+##                             (2) from IDELAY to asynchronous reset of IDDR that drives the DQ
+##                             CE's.  This can be relaxed by the user for lower frequencies:
+##                             300MHz = 850ps, 267MHz = 900ps. At 200MHz = 950ps.
+##                             In general PAR should be able to route this within 900ps over
+##                             all speed grades.
+##------------------------------------------------------------------------------
+NET "**/u_phy_io/en_dqs*" MAXDELAY = 600 ps;
+NET "**/u_phy_io/*gen_dqs*.u_iob_dqs/en_dqs_sync" MAXDELAY = 850 ps;
+##------------------------------------------------------------------------------
+
+##------------------------------------------------------------------------------
+##     Section:        IDDR Half Cycles
+##     Desc:           "Half-cycle" path constraint from IDDR to CE pin for all DQ
+##                             IDDR's for DQS Read Post amble Glitch Squelch circuit.
+##------------------------------------------------------------------------------
+# Max delay from output of IDDR to CE input of DQ IDDRs = tRPST + some slack
+#  where slack account for rise-time of DQS on board. For now assume slack = 
+#  0.400ns (based on initial SPICE simulations, assumes use of ODT), so 
+#  time = 0.4*Tcyc + 0.40ns = 1.6ns @333MHz
+INST "**/*gen_dqs[*].u_iob_dqs/u_iddr_dq_ce" TNM = "TNM_DQ_CE_IDDR";
+INST "**/*gen_dq[*].u_iob_dq/*gen_stg2_*.u_iddr_dq" TNM = "TNM_DQS_FLOPS";
+TIMESPEC "TS_DQ_CE" = FROM "TNM_DQ_CE_IDDR" TO "TNM_DQS_FLOPS" 1.9 ns;
+##------------------------------------------------------------------------------
+
+##------------------------------------------------------------------------------
+##     Section:        Area Group
+##     Desc:           MIG 2.2: Prevent unrelated logic from being packed into any
+##                             slices used by read data capture RPM's - if unrelated logic gets
+##                             packed into these slices, it could cause the DIRT strings that
+##                             define the IDDR -> fabric flop routing to become unroutable
+##                             during PAR stage (unrelated logic may require routing resources
+##                             required by the DIRT strings - MAP does not currently take into
+##                             account DIRT strings when placing logic
+##------------------------------------------------------------------------------
+AREA_GROUP "DDR_CAPTURE_FFS" GROUP = CLOSED;
+##------------------------------------------------------------------------------
+
+##------------------------------------------------------------------------------
+##     Section:        DQ LOC Constraints
+##     Desc:           Location constraints for DQ read-data capture flops in fabric
+##                             (for 2nd stage capture)
+##------------------------------------------------------------------------------
+INST "**/*gen_dq[0].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y42;
+INST "**/*gen_dq[1].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y43;
+INST "**/*gen_dq[2].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y45;
+INST "**/*gen_dq[3].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y46;
+INST "**/*gen_dq[4].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y41;
+INST "**/*gen_dq[5].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y42;
+INST "**/*gen_dq[6].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y44;
+INST "**/*gen_dq[7].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y44;
+INST "**/*gen_dq[8].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y28;
+INST "**/*gen_dq[9].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y32;
+INST "**/*gen_dq[10].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y33;
+INST "**/*gen_dq[11].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y34;
+INST "**/*gen_dq[12].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y26;
+INST "**/*gen_dq[13].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y28;
+INST "**/*gen_dq[14].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y33;
+INST "**/*gen_dq[15].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y34;
+INST "**/*gen_dq[16].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y35;
+INST "**/*gen_dq[17].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y36;
+INST "**/*gen_dq[18].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y38;
+INST "**/*gen_dq[19].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y39;
+INST "**/*gen_dq[20].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y36;
+INST "**/*gen_dq[21].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y37;
+INST "**/*gen_dq[22].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y38;
+INST "**/*gen_dq[23].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y39;
+INST "**/*gen_dq[24].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y46;
+INST "**/*gen_dq[25].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y49;
+INST "**/*gen_dq[26].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y53;
+INST "**/*gen_dq[27].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y55;
+INST "**/*gen_dq[28].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y49;
+INST "**/*gen_dq[29].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y52;
+INST "**/*gen_dq[30].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y54;
+INST "**/*gen_dq[31].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y56;
+INST "**/*gen_dq[32].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y52;
+INST "**/*gen_dq[33].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y56;
+INST "**/*gen_dq[34].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y58;
+INST "**/*gen_dq[35].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y59;
+INST "**/*gen_dq[36].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y54;
+INST "**/*gen_dq[37].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y57;
+INST "**/*gen_dq[38].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y58;
+INST "**/*gen_dq[39].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y59;
+INST "**/*gen_dq[40].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y120;
+INST "**/*gen_dq[41].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y121;
+INST "**/*gen_dq[42].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y122;
+INST "**/*gen_dq[43].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y123;
+INST "**/*gen_dq[44].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y120;
+INST "**/*gen_dq[45].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y121;
+INST "**/*gen_dq[46].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y123;
+INST "**/*gen_dq[47].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y124;
+INST "**/*gen_dq[48].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y124;
+INST "**/*gen_dq[49].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y126;
+INST "**/*gen_dq[50].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y132;
+INST "**/*gen_dq[51].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y133;
+INST "**/*gen_dq[52].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y125;
+INST "**/*gen_dq[53].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y126;
+INST "**/*gen_dq[54].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y133;
+INST "**/*gen_dq[55].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y134;
+INST "**/*gen_dq[56].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y134;
+INST "**/*gen_dq[57].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y136;
+INST "**/*gen_dq[58].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y137;
+INST "**/*gen_dq[59].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y138;
+INST "**/*gen_dq[60].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y135;
+INST "**/*gen_dq[61].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y136;
+INST "**/*gen_dq[62].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y138;
+INST "**/*gen_dq[63].u_iob_dq/*gen_stg2_*.u_ff_stg2a_rise"  RLOC_ORIGIN = X0Y139;
+##------------------------------------------------------------------------------