From c2e071853d498452d98e65cc401ef8e24259595d Mon Sep 17 00:00:00 2001
From: "Wesley W. Terpstra" <>
Date: Thu, 25 Apr 2013 12:02:54 +0200
Subject: [PATCH] remove gc_wfifo in favour of generic_async_fifo

 modules/common/                    |   3 +-
 modules/common/gc_wfifo.vhd                   | 182 ------------------
 modules/common/gencores_pkg.vhd               |  31 ---
 .../wb_clock_crossing/xwb_clock_crossing.vhd  | 132 +++++++++----
 modules/wishbone/wishbone_pkg.vhd             |   3 +-
 5 files changed, 101 insertions(+), 250 deletions(-)
 delete mode 100644 modules/common/gc_wfifo.vhd

diff --git a/modules/common/ b/modules/common/
index 2e185e10..b1f63397 100644
--- a/modules/common/
+++ b/modules/common/
@@ -9,5 +9,4 @@ files = [	"gencores_pkg.vhd",
-                "gc_frequency_meter.vhd",
-                "gc_wfifo.vhd"];
+                "gc_frequency_meter.vhd"];
diff --git a/modules/common/gc_wfifo.vhd b/modules/common/gc_wfifo.vhd
deleted file mode 100644
index 60b2673f..00000000
--- a/modules/common/gc_wfifo.vhd
+++ /dev/null
@@ -1,182 +0,0 @@
-library ieee;
-use ieee.std_logic_1164.all;
-use ieee.numeric_std.all;
-library work;
-use work.gencores_pkg.all;
-use work.genram_pkg.all;
-entity gc_wfifo is
-   generic(
-      sync_depth : natural := 3;
-      gray_code  : boolean := true;
-      addr_width : natural := 4;
-      data_width : natural := 32);
-   port(
-      -- write port, only set w_en when w_rdy
-      w_clk_i  : in  std_logic;
-      w_rst_n_i: in  std_logic;
-      w_rdy_o  : out std_logic;
-      w_en_i   : in  std_logic;
-      w_data_i : in  std_logic_vector(data_width-1 downto 0);
-      -- (pre)alloc port, can be unused
-      a_clk_i  : in  std_logic;
-      a_rst_n_i: in  std_logic;
-      a_rdy_o  : out std_logic;
-      a_en_i   : in  std_logic;
-      -- read port, only set r_en when r_rdy
-      -- data is valid the cycle after r_en raised
-      r_clk_i  : in  std_logic;
-      r_rst_n_i: in  std_logic;
-      r_rdy_o  : out std_logic;
-      r_en_i   : in  std_logic;
-      r_data_o : out std_logic_vector(data_width-1 downto 0));
-end gc_wfifo;
-architecture rtl of gc_wfifo is
-   -- Quartus 11 sometimes goes crazy and infers an altshift_taps! Stop it.
-   attribute altera_attribute : string; 
-   attribute altera_attribute of rtl : architecture is "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF";
-   subtype counter is unsigned(addr_width downto 0);
-   type counter_shift is array(sync_depth downto 0) of counter;
-   signal r_idx_bnry : counter;
-   signal r_idx_gray : counter;
-   signal w_idx_bnry : counter;
-   signal w_idx_gray : counter;
-   signal a_idx_bnry : counter;
-   signal a_idx_gray : counter;
-   signal r_idx_shift_w : counter_shift; -- r_idx_gray in w_clk
-   signal r_idx_shift_a : counter_shift; -- r_idx_gray in a_clk
-   signal w_idx_shift_r : counter_shift; -- w_idx_gray in r_clk
-   signal qb : std_logic_vector(data_width-1 downto 0);
-   function bin2gray(a : unsigned) return unsigned is
-      variable o : unsigned(a'length downto 0);
-   begin
-      if gray_code then
-         o := (a & '0') xor ('0' & a);
-      else
-         o := (a & '0');
-      end if;
-      return o(a'length downto 1);
-   end bin2gray;
-   function index(a : counter) return std_logic_vector is
-   begin
-      return std_logic_vector(a(addr_width-1 downto 0));
-   end index;
-   function empty(a, b : counter) return std_logic is
-   begin
-      if a = b then
-         return '1';
-      else
-         return '0';
-      end if;
-   end empty;
-   function full(a, b : counter) return std_logic is
-      variable mask : counter := (others => '0');
-   begin
-      -- In binary a full FIFO has indexes (a XOR 1000...00) = b
-      -- bin2gray is a linear function, thus:
-      --   a XOR 1000..00 = b                                iff
-      --   bin2gray(a XOR 1000...00) = bin2gray(b)           iff
-      --   bin2gray(a) XOR bin2gray(1000...00) = bin2gray(b) iif
-      --   bin2gray(a) XOR 1100..00 = bin2gray(b)
-      mask(addr_width) := '1';
-      mask := bin2gray(mask);
-      if (a xor mask) = b then
-         return '1';
-      else
-         return '0';
-      end if;
-   end full;
-   ram : generic_simple_dpram
-     generic map(
-       g_data_width               => data_width,
-       g_size                     => 2**addr_width,
-       g_addr_conflict_resolution => "dont_care",
-       g_dual_clock               => gray_code)
-     port map(
-       clka_i => w_clk_i,
-       wea_i  => w_en_i,
-       aa_i   => index(w_idx_bnry),
-       da_i   => w_data_i,
-       clkb_i => r_clk_i,
-       ab_i   => index(r_idx_bnry),
-       qb_o   => qb);
-   read : process(r_clk_i)
-      variable idx : counter;
-   begin
-      if rising_edge(r_clk_i) then
-         if r_rst_n_i = '0' then
-            idx := (others => '0');
-            r_data_o <= qb;
-         elsif r_en_i = '1' then
-            idx := r_idx_bnry + 1;
-            r_data_o <= qb;
-         else
-            idx := r_idx_bnry;
-            --r_data_o <= r_data_o; --implied
-         end if;
-         r_idx_bnry <= idx;
-         r_idx_gray <= bin2gray(idx);
-         if sync_depth > 0 then
-           w_idx_shift_r(sync_depth downto 1) <= w_idx_shift_r(sync_depth-1 downto 0);
-         end if;
-      end if;
-   end process;
-   w_idx_shift_r(0) <= w_idx_gray;
-   r_rdy_o <= not empty(r_idx_gray, w_idx_shift_r(sync_depth));
-   write : process(w_clk_i)
-     variable idx : counter;
-   begin
-      if rising_edge(w_clk_i) then
-         if w_rst_n_i = '0' then
-            idx := (others => '0');
-         elsif w_en_i = '1' then
-            idx := w_idx_bnry + 1;
-         else
-            idx := w_idx_bnry;
-         end if;
-         w_idx_bnry <= idx;
-         w_idx_gray <= bin2gray(idx);
-         if sync_depth > 0 then
-           r_idx_shift_w(sync_depth downto 1) <= r_idx_shift_w(sync_depth-1 downto 0);
-         end if;
-      end if;
-   end process;
-   r_idx_shift_w(0) <= r_idx_gray;
-   w_rdy_o <= not full(w_idx_gray, r_idx_shift_w(sync_depth));
-   alloc : process(a_clk_i)
-     variable idx : counter;
-   begin
-      if rising_edge(a_clk_i) then
-         if a_rst_n_i = '0' then
-            idx := (others => '0');
-         elsif a_en_i = '1' then
-            idx := a_idx_bnry + 1;
-         else
-            idx := a_idx_bnry;
-         end if;
-         a_idx_bnry <= idx;
-         a_idx_gray <= bin2gray(idx);
-         if sync_depth > 0 then
-           r_idx_shift_a(sync_depth downto 1) <= r_idx_shift_a(sync_depth-1 downto 0);
-         end if;
-      end if;
-   end process;
-   r_idx_shift_a(0) <= r_idx_gray;
-   a_rdy_o <= not full(a_idx_gray, r_idx_shift_a(sync_depth));
-end rtl;
diff --git a/modules/common/gencores_pkg.vhd b/modules/common/gencores_pkg.vhd
index 91f65568..930a4392 100644
--- a/modules/common/gencores_pkg.vhd
+++ b/modules/common/gencores_pkg.vhd
@@ -189,37 +189,6 @@ package gencores_pkg is
       q_input_id_o : out std_logic_vector(f_log2_size(g_num_inputs)-1 downto 0));
   end component;
-  -- A 'Wes' FIFO. Generic FIFO using inferred memory.
-  -- Supports clock domain crossing 
-  -- Should be safe from fast->slow or reversed
-  -- Set sync_depth := 0 and gray_code := false if only one clock
-  component gc_wfifo is
-    generic(
-      sync_depth : natural := 3;
-      gray_code  : boolean := true;
-      addr_width : natural := 4;
-      data_width : natural := 32);
-    port(
-      -- write port, only set w_en when w_rdy
-      w_clk_i  : in  std_logic;
-      w_rst_n_i: in  std_logic;
-      w_rdy_o  : out std_logic;
-      w_en_i   : in  std_logic;
-      w_data_i : in  std_logic_vector(data_width-1 downto 0);
-      -- (pre)alloc port, can be unused
-      a_clk_i  : in  std_logic;
-      a_rst_n_i: in  std_logic;
-      a_rdy_o  : out std_logic;
-      a_en_i   : in  std_logic;
-      -- read port, only set r_en when r_rdy
-      -- data is valid the cycle after r_en raised
-      r_clk_i  : in  std_logic;
-      r_rst_n_i: in  std_logic;
-      r_rdy_o  : out std_logic;
-      r_en_i   : in  std_logic;
-      r_data_o : out std_logic_vector(data_width-1 downto 0));
-  end component;
   -- Power-On reset generation
   component gc_reset is
diff --git a/modules/wishbone/wb_clock_crossing/xwb_clock_crossing.vhd b/modules/wishbone/wb_clock_crossing/xwb_clock_crossing.vhd
index a068f63b..c04a628d 100644
--- a/modules/wishbone/wb_clock_crossing/xwb_clock_crossing.vhd
+++ b/modules/wishbone/wb_clock_crossing/xwb_clock_crossing.vhd
@@ -2,14 +2,13 @@ library ieee;
 use ieee.std_logic_1164.all;
 use ieee.numeric_std.all;
 use work.wishbone_pkg.all;
-use work.gencores_pkg.all;
+use work.genram_pkg.all;
 -- If you reset one clock domain, you must reset BOTH!
 -- Release of the reset lines may be arbitrarily out-of-phase
 entity xwb_clock_crossing is
-      sync_depth : natural := 3;
-      log2fifo   : natural := 4);
+      g_size : natural := 16);
       -- Slave control port
       slave_clk_i    : in  std_logic;
@@ -35,11 +34,11 @@ architecture rtl of xwb_clock_crossing is
    constant mSEL_start : natural := mDAT_end + 1;
    constant mSEL_end   : natural := mSEL_start + (c_wishbone_data_width/8) - 1;
    constant mlen       : natural := mSEL_end + 1;
    signal msend, mrecv : t_wishbone_master_out;
    signal msend_vect, mrecv_vect : std_logic_vector(mlen-1 downto 0);
-   signal mw_rdy, mw_en, mr_rdy, mr_en : std_logic;
+   signal mw_en, mr_empty, mr_en : std_logic;
    constant sACK_start : natural := 0;
    constant sACK_end   : natural := sACK_start;
    constant sRTY_start : natural := sACK_end + 1;
@@ -49,69 +48,135 @@ architecture rtl of xwb_clock_crossing is
    constant sDAT_start : natural := sERR_end + 1;
    constant sDAT_end   : natural := sDAT_start + c_wishbone_data_width - 1;
    constant slen       : natural := sDAT_end + 1;
    signal ssend, srecv : t_wishbone_slave_out;
    signal ssend_vect, srecv_vect : std_logic_vector(slen-1 downto 0);
-   signal sw_rdy, sw_en, sr_rdy, sr_en, sa_rdy, sa_en : std_logic;
+   signal sw_en, sr_empty, sr_en : std_logic;
    signal slave_CYC : std_logic;
    signal master_o_STB : std_logic;
    signal slave_o_PUSH : std_logic;
+   -- We need to limit the total number of incomplete Wishbone requests.
+   -- Consider a slow master and fast slave.
+   -- The master slowly pushes a lot of requests.
+   -- The slave pops them immediately from the mfifo and queues them itself.
+   -- Suddenly, the slave can do work and answers all pending requests.
+   -- The slow master is unable to read the sfifo fast enough and it overflows.
+   subtype t_count is unsigned(f_ceil_log2(g_size+1)-1 downto 0);
+   signal mpushed : t_count;
+   signal mpopped : t_count;
+   signal full    : std_logic;
-   mfifo : gc_wfifo
-      generic map(addr_width => log2fifo, data_width => mlen, sync_depth => sync_depth, gray_code => true)
-      port map(w_clk_i => slave_clk_i,  w_rst_n_i => slave_rst_n_i,  w_rdy_o => mw_rdy, w_en_i => mw_en, w_data_i => msend_vect,
-               r_clk_i => master_clk_i, r_rst_n_i => master_rst_n_i, r_rdy_o => mr_rdy, r_en_i => mr_en, r_data_o => mrecv_vect,
-               a_clk_i => '0',          a_rst_n_i => '0',            a_rdy_o => open,   a_en_i => '0');
+   full <= '1' when mpushed = mpopped else '0';
+   count : process(slave_clk_i) is
+   begin
+      if rising_edge(slave_clk_i) then
+         if slave_rst_n_i = '0' then
+            mpushed <= (others => '0');
+            mpopped <= to_unsigned(g_size, t_count'length);
+         else
+            if (not full and slave_i.CYC and slave_i.STB) = '1' then
+               mpushed <= mpushed + 1;
+            end if;
+            if slave_o_PUSH = '1' then
+               mpopped <= mpopped + 1;
+            end if;
+         end if;
+      end if;
+   end process;
+   mfifo : generic_async_fifo
+      generic map(
+         g_data_width      => mlen,
+         g_size            => g_size)
+      port map(
+         rst_n_i           => slave_rst_n_i,
+         clk_wr_i          => slave_clk_i,
+         d_i               => msend_vect,
+         we_i              => mw_en,
+         wr_empty_o        => open,
+         wr_full_o         => open,
+         wr_almost_empty_o => open,
+         wr_almost_full_o  => open,
+         wr_count_o        => open,
+         clk_rd_i          => master_clk_i,
+         q_o               => mrecv_vect,
+         rd_i              => mr_en,
+         rd_empty_o        => mr_empty,
+         rd_full_o         => open,
+         rd_almost_empty_o => open,
+         rd_almost_full_o  => open,
+         rd_count_o        => open);
    msend_vect(mCYC_start) <= msend.CYC;
    msend_vect(mWE_start) <= msend.WE;
    msend_vect(mADR_end downto mADR_start) <= msend.ADR;
    msend_vect(mDAT_end downto mDAT_start) <= msend.DAT;
    msend_vect(mSEL_end downto mSEL_start) <= msend.SEL;
    mrecv.CYC <= mrecv_vect(mCYC_start);
    mrecv.WE  <= mrecv_vect(mWE_start);
    mrecv.ADR <= mrecv_vect(mADR_end downto mADR_start);
    mrecv.DAT <= mrecv_vect(mDAT_end downto mDAT_start);
    mrecv.SEL <= mrecv_vect(mSEL_end downto mSEL_start);
-   sfifo : gc_wfifo
-      generic map(addr_width => log2fifo, data_width => slen, sync_depth => sync_depth, gray_code => true)
-      port map(w_clk_i => master_clk_i, w_rst_n_i => master_rst_n_i, w_rdy_o => open,   w_en_i => sw_en, w_data_i => ssend_vect,
-               r_clk_i => slave_clk_i,  r_rst_n_i => slave_rst_n_i,  r_rdy_o => sr_rdy, r_en_i => sr_en, r_data_o => srecv_vect,
-               a_clk_i => slave_clk_i,  a_rst_n_i => slave_rst_n_i,  a_rdy_o => sa_rdy, a_en_i => sa_en);
+   sfifo : generic_async_fifo
+      generic map(
+         g_data_width      => slen,
+         g_size            => g_size)
+      port map(
+         rst_n_i           => master_rst_n_i,
+         clk_wr_i          => master_clk_i,
+         d_i               => ssend_vect,
+         we_i              => sw_en,
+         wr_empty_o        => open,
+         wr_full_o         => open,
+         wr_almost_empty_o => open,
+         wr_almost_full_o  => open,
+         wr_count_o        => open,
+         clk_rd_i          => slave_clk_i,
+         q_o               => srecv_vect,
+         rd_i              => sr_en,
+         rd_empty_o        => sr_empty,
+         rd_full_o         => open,
+         rd_almost_empty_o => open,
+         rd_almost_full_o  => open,
+         rd_count_o        => open);
    ssend_vect(sACK_start) <= ssend.ACK;
    ssend_vect(sRTY_start) <= ssend.RTY;
    ssend_vect(sERR_start) <= ssend.ERR;
    ssend_vect(sDAT_end downto sDAT_start) <= ssend.DAT;
    srecv.ACK <= srecv_vect(sACK_start);
    srecv.RTY <= srecv_vect(sRTY_start);
    srecv.ERR <= srecv_vect(sERR_start);
    srecv.DAT <= srecv_vect(sDAT_end downto sDAT_start);
    -- Slave clock domain: slave -> mFIFO
-   mw_en <= (mw_rdy and sa_rdy and slave_i.CYC and slave_i.STB) or 
+   mw_en <= (not full and slave_i.CYC and slave_i.STB) or 
             (not slave_i.CYC and slave_CYC); -- Masters may only drop cycle if FIFOs are empty
-   sa_en <= mw_rdy and sa_rdy and slave_i.CYC and slave_i.STB;
-   slave_o.STALL <= not mw_rdy or not sa_rdy;
+   slave_o.STALL <= full;
    msend.CYC <= slave_i.CYC;
    msend.ADR <= slave_i.ADR;
    msend.WE  <= slave_i.WE;
    msend.SEL <= slave_i.SEL;
    msend.DAT <= slave_i.DAT;
    -- Master clock domain: mFIFO -> master
-   mr_en <= mr_rdy and (not mrecv.CYC or not master_o_STB or not master_i.STALL);
+   mr_en <= not mr_empty and (not mrecv.CYC or not master_o_STB or not master_i.STALL);
    master_o.CYC <= mrecv.CYC;
    master_o.STB <= master_o_STB; -- is high outside of CYC. that's ok; it should be ignored.
    master_o.ADR <= mrecv.ADR;
    master_o.WE  <= mrecv.WE;
    master_o.SEL <= mrecv.SEL;
    master_o.DAT <= mrecv.DAT;
    drive_master_port : process(master_clk_i)
       if rising_edge(master_clk_i) then
@@ -122,21 +187,21 @@ begin
          end if;
       end if;
    end process;
    -- Master clock domain: master -> sFIFO
    sw_en <= mrecv.CYC and (master_i.ACK or master_i.ERR or master_i.RTY);
    ssend.ACK <= master_i.ACK;
    ssend.ERR <= master_i.ERR;
    ssend.RTY <= master_i.RTY;
    ssend.DAT <= master_i.DAT;
    -- Slave clock domain: sFIFO -> slave
-   sr_en <= sr_rdy;
+   sr_en <= not sr_empty;
    slave_o.DAT <= srecv.DAT;
    slave_o.ACK <= srecv.ACK and slave_o_PUSH;
    slave_o.RTY <= srecv.RTY and slave_o_PUSH;
    slave_o.ERR <= srecv.ERR and slave_o_PUSH;
    drive_slave_port : process(slave_clk_i)
       if rising_edge(slave_clk_i) then
@@ -149,4 +214,5 @@ begin
          end if;
       end if;
    end process;
 end rtl;
diff --git a/modules/wishbone/wishbone_pkg.vhd b/modules/wishbone/wishbone_pkg.vhd
index ad10e1ee..f195267d 100644
--- a/modules/wishbone/wishbone_pkg.vhd
+++ b/modules/wishbone/wishbone_pkg.vhd
@@ -334,8 +334,7 @@ package wishbone_pkg is
   -- Release of the reset lines may be arbitrarily out-of-phase
   component xwb_clock_crossing is
-      sync_depth : natural := 3;
-      log2fifo   : natural := 4);
+      g_size : natural := 16);
       -- Slave control port
       slave_clk_i    : in  std_logic;