From 01e8846e2b239892ed05bea4897d5a07bc5c32c5 Mon Sep 17 00:00:00 2001
From: "Wesley W. Terpstra" <w.terpstra@gsi.de>
Date: Tue, 9 Apr 2013 19:33:35 +0200
Subject: [PATCH] eb2: fixup inter-operation cycle line control

On falling edge of RX cycle line, push a request to the tag FIFO to lower TX.
Lower the WB master cycle line as soon as no inflight operations.
---
 hdl/eb_slave_core/eb_internals_pkg.vhd |   9 +-
 hdl/eb_slave_core/eb_reg_fifo.vhd      |   2 +-
 hdl/eb_slave_core/eb_rx_fsm.vhd        | 136 ++++++++++++++-----------
 hdl/eb_slave_core/eb_slave.vhd         |  18 +---
 hdl/eb_slave_core/eb_slave_core.vhd    |   2 +-
 hdl/eb_slave_core/eb_tag_fifo.vhd      |   2 +-
 hdl/eb_slave_core/eb_tx_mux.vhd        |  20 +++-
 hdl/eb_slave_core/eb_wbm_fifo.vhd      |  28 +++--
 8 files changed, 122 insertions(+), 95 deletions(-)

diff --git a/hdl/eb_slave_core/eb_internals_pkg.vhd b/hdl/eb_slave_core/eb_internals_pkg.vhd
index 371727c..8559f92 100644
--- a/hdl/eb_slave_core/eb_internals_pkg.vhd
+++ b/hdl/eb_slave_core/eb_internals_pkg.vhd
@@ -15,6 +15,7 @@ package eb_internals_pkg is
   constant c_tag_wbm_req : t_tag := "00";
   constant c_tag_cfg_req : t_tag := "01";
   constant c_tag_pass_on : t_tag := "10";
+  constant c_tag_drop_tx : t_tag := "11";
 
   component eb_slave is
     generic(
@@ -47,9 +48,6 @@ package eb_internals_pkg is
       rx_stb_i    : in  std_logic;
       rx_dat_i    : in  t_wishbone_data;
       rx_stall_o  : out std_logic;
-      tx_cyc_o    : out std_logic;
-      
-      mux_empty_i : in  std_logic;
       
       tag_stb_o   : out std_logic;
       tag_dat_o   : out t_tag;
@@ -67,6 +65,7 @@ package eb_internals_pkg is
       wbm_stb_o   : out std_logic;
       wbm_we_o    : out std_logic;
       wbm_full_i  : in  std_logic;
+      wbm_busy_i  : in  std_logic;
       
       master_o       : out t_wishbone_master_out;
       master_stall_i : in  std_logic);
@@ -108,6 +107,7 @@ package eb_internals_pkg is
       wbm_dat_i    : in  t_wishbone_data;
       wbm_empty_i  : in  std_logic;
       
+      tx_cyc_o     : out std_logic;
       tx_stb_o     : out std_logic;
       tx_dat_o     : out t_wishbone_data;
       tx_stall_i   : in  std_logic);
@@ -173,13 +173,12 @@ package eb_internals_pkg is
       rstn_i      : in  std_logic;
       
       errreg_o    : out std_logic_vector(63 downto 0);
-      busy_o      : out std_logic;
-      
       wb_i        : in  t_wishbone_master_in;
       
       fsm_stb_i   : in  std_logic;
       fsm_we_i    : in  std_logic;
       fsm_full_o  : out std_logic;
+      fsm_busy_o  : out std_logic;
 
       mux_pop_i   : in  std_logic;
       mux_dat_o   : out t_wishbone_data;
diff --git a/hdl/eb_slave_core/eb_reg_fifo.vhd b/hdl/eb_slave_core/eb_reg_fifo.vhd
index d59ee35..1fe5d4c 100644
--- a/hdl/eb_slave_core/eb_reg_fifo.vhd
+++ b/hdl/eb_slave_core/eb_reg_fifo.vhd
@@ -50,7 +50,7 @@ end eb_fifo;
 architecture rtl of eb_fifo is
   function gcd(a, b : natural) return natural is
   begin
-    if b > a then
+    if a > b then
       return gcd(b,a);
     elsif a = 0 then
       return b;
diff --git a/hdl/eb_slave_core/eb_rx_fsm.vhd b/hdl/eb_slave_core/eb_rx_fsm.vhd
index 9e96bbb..cffe07e 100644
--- a/hdl/eb_slave_core/eb_rx_fsm.vhd
+++ b/hdl/eb_slave_core/eb_rx_fsm.vhd
@@ -19,9 +19,6 @@ entity eb_rx_fsm is
     rx_stb_i    : in  std_logic;
     rx_dat_i    : in  t_wishbone_data;
     rx_stall_o  : out std_logic;
-    tx_cyc_o    : out std_logic;
-    
-    mux_empty_i : in  std_logic;
     
     tag_stb_o   : out std_logic;
     tag_dat_o   : out t_tag;
@@ -39,6 +36,7 @@ entity eb_rx_fsm is
     wbm_stb_o   : out std_logic;
     wbm_we_o    : out std_logic;
     wbm_full_i  : in  std_logic;
+    wbm_busy_i  : in  std_logic;
     
     master_o       : out t_wishbone_master_out;
     master_stall_i : in  std_logic);
@@ -49,22 +47,23 @@ architecture behavioral of eb_rx_fsm is
 
   type t_state_RX is (S_EB_HDR, S_PROBE_ID, s_CYC_HDR, S_WR_ADR, S_WRITE, S_RD_ADR, S_READ, S_ERRORS);
   
-  signal r_tx_cyc_o    : std_logic;
-  signal r_tag_stb_o   : std_logic;
-  signal r_tag_dat_o   : t_tag;
-  signal r_pass_stb_o  : std_logic;
-  signal r_pass_dat_o  : t_wishbone_data;
-  signal r_cfg_stb_o   : std_logic;
-  signal r_wbm_stb_o   : std_logic;
-  signal r_adr_o       : t_wishbone_address;
-  signal r_we_o        : std_logic;
-  signal r_master_stb_o: std_logic;
-  signal r_wr_adr      : unsigned(t_wishbone_address'range);
-  signal r_wait_mux    : std_logic;
-  signal r_rx_cyc_hdr  : EB_CYC;
-  signal r_tx_cyc_hdr  : EB_CYC;
-  signal r_state       : t_state_RX;
-  signal s_stall       : std_logic;
+  signal r_tag_stb_o    : std_logic;
+  signal r_tag_dat_o    : t_tag;
+  signal r_pass_stb_o   : std_logic;
+  signal r_pass_dat_o   : t_wishbone_data;
+  signal r_cfg_stb_o    : std_logic;
+  signal r_wbm_stb_o    : std_logic;
+  signal r_master_cyc_o : std_logic;
+  signal r_master_stb_o : std_logic;
+  signal r_master_we_o  : std_logic;
+  signal r_master_adr_o : t_wishbone_address;
+  signal r_master_dat_o : t_wishbone_data;
+  signal r_wr_adr       : unsigned(t_wishbone_address'range);
+  signal r_rx_cyc_hdr   : EB_CYC;
+  signal r_tx_cyc_hdr   : EB_CYC;
+  signal r_rx_cyc       : std_logic;
+  signal r_state        : t_state_RX;
+  signal s_stall        : std_logic;
   
   function reply(rx_cyc_hdr : EB_CYC)
     return EB_CYC is
@@ -84,28 +83,32 @@ architecture behavioral of eb_rx_fsm is
 begin
               
   rx_stall_o <= s_stall;
-  tx_cyc_o   <= r_tx_cyc_o;
   tag_stb_o  <= r_tag_stb_o;
   tag_dat_o  <= r_tag_dat_o;
   pass_stb_o <= r_pass_stb_o;
   pass_dat_o <= r_pass_dat_o;
   cfg_stb_o  <= r_cfg_stb_o;
-  cfg_we_o   <= r_we_o;
-  cfg_adr_o  <= r_adr_o;
+  cfg_we_o   <= r_master_we_o;
+  cfg_adr_o  <= r_master_adr_o;
   wbm_stb_o  <= r_wbm_stb_o;
-  wbm_we_o   <= r_we_o;
+  wbm_we_o   <= r_master_we_o;
     
-  master_o.cyc <= not r_wait_mux or not mux_empty_i; -- Lower when mux is drained
+  master_o.cyc <= r_master_cyc_o or wbm_busy_i;
   master_o.stb <= r_master_stb_o;
-  master_o.adr <= r_adr_o;
-  master_o.we  <= r_we_o;
+  master_o.we  <= r_master_we_o;
+  master_o.adr <= r_master_adr_o;
+  master_o.dat <= r_master_dat_o;
   master_o.sel <= r_rx_cyc_hdr.sel;
-  master_o.dat <= rx_dat_i;
   
-  -- Stall if FIFOs full or we are trying to quiet the bus
-  s_stall <= pass_full_i OR tag_full_i OR wbm_full_i OR 
-             (r_wait_mux and not mux_empty_i) OR 
-	     (r_master_stb_o and master_stall_i);
+  -- Stall the RX path if:
+  --   Any TX FIFO is full (probably only tag matters)
+  --   We are pushing a strobe that is stalled
+  --   We are waiting to lower the cycle line
+  -- 
+  -- !!! could be improved to allow pipeline progress until stb/cyc need to be raised again
+  s_stall <= tag_full_i OR pass_full_i OR cfg_full_i OR wbm_full_i OR 
+             (r_master_stb_o and master_stall_i) OR
+             (not r_master_cyc_o and wbm_busy_i);
   
   fsm : process(clk_i, rstn_i) is
     variable rx_frame_hdr : EB_HDR;
@@ -115,21 +118,22 @@ begin
     variable tx_cyc_hdr   : EB_CYC;
   begin
     if (rstn_i = '0') then
-      r_tx_cyc_o    <= '0';
-      r_tag_stb_o   <= '0';
-      r_tag_dat_o   <= (others => '0');
-      r_pass_stb_o  <= '0';
-      r_pass_dat_o  <= (others => '0');
-      r_cfg_stb_o   <= '0';
-      r_wbm_stb_o   <= '0';
-      r_adr_o       <= (others => '0');
-      r_we_o        <= '0';
-      r_master_stb_o<= '0';
-      r_wr_adr      <= (others => '0');
-      r_wait_mux    <= '0';
-      r_rx_cyc_hdr  <= INIT_EB_CYC;
-      r_tx_cyc_hdr  <= INIT_EB_CYC;
-      r_state       <= S_EB_HDR;
+      r_tag_stb_o    <= '0';
+      r_tag_dat_o    <= (others => '0');
+      r_pass_stb_o   <= '0';
+      r_pass_dat_o   <= (others => '0');
+      r_cfg_stb_o    <= '0';
+      r_wbm_stb_o    <= '0';
+      r_master_cyc_o <= '0';
+      r_master_stb_o <= '0';
+      r_master_we_o  <= '0';
+      r_master_adr_o <= (others => '0');
+      r_master_dat_o <= (others => '0');
+      r_wr_adr       <= (others => '0');
+      r_rx_cyc_hdr   <= INIT_EB_CYC;
+      r_tx_cyc_hdr   <= INIT_EB_CYC;
+      r_rx_cyc       <= '0';
+      r_state        <= S_EB_HDR;
     elsif rising_edge(clk_i) then
     
       -- By default, write nowhere in particular
@@ -138,12 +142,23 @@ begin
       r_cfg_stb_o  <= '0';
       r_wbm_stb_o  <= '0';
       
+      -- Lower strobe line when it is queued
       r_master_stb_o <= r_master_stb_o and master_stall_i;
       
+      -- Register to enable detecting falling edge
+      r_rx_cyc <= rx_cyc_i;
+      
       if(rx_cyc_i = '0') then
-        r_wait_mux <= '1'; -- stop next request until mux has drained
-        r_state    <= S_EB_HDR;
-        r_tx_cyc_o <= not mux_empty_i; -- !!! might combine packets
+        -- expect a new negotiation header
+        r_state <= S_EB_HDR; 
+        -- guard against improperly terminated streams
+        r_master_cyc_o <= '0'; 
+        
+        -- On falling edge of cycle line, push a tag to drop TX cycle
+        if r_rx_cyc = '1' then
+          r_tag_stb_o <= '1';
+          r_tag_dat_o <= c_tag_drop_tx;
+        end if;
       elsif(rx_stb_i = '1' and s_stall = '0') then
         -- Every non-error state must write something
         
@@ -156,7 +171,7 @@ begin
                 (rx_frame_hdr.VER                                = c_EB_VER)
             ) then --header valid ?             
               -- Raise TX cycle line if this needs to be sent
-              r_tx_cyc_o <= NOT rx_frame_hdr.NO_RESPONSE;
+              --r_tx_cyc_o <= NOT rx_frame_hdr.NO_RESPONSE;
               
               -- Create output header
               tx_frame_hdr           := init_EB_hdr;
@@ -192,8 +207,6 @@ begin
             r_tx_cyc_hdr  <= tx_cyc_hdr;                              
             r_rx_cyc_hdr  <= rx_cyc_hdr;
             
-            r_wait_mux <= '0'; -- Re-enable pipelining
-            
             -- Write padding/header using pass fifo
             r_tag_stb_o  <= '1';
             r_tag_dat_o  <= c_tag_pass_on;
@@ -211,8 +224,8 @@ begin
               --no writes, no padding. insert the header 
               r_pass_dat_o <= to_std_logic_vector(tx_cyc_hdr);
               
-              r_wait_mux <= rx_cyc_hdr.DROP_CYC;
-              r_state    <= S_CYC_HDR;
+              r_master_cyc_o <= r_master_cyc_o and not rx_cyc_hdr.DROP_CYC;
+              r_state <= S_CYC_HDR;
             end if;
             
           when S_WR_ADR =>
@@ -232,11 +245,13 @@ begin
               r_cfg_stb_o <= '1';
             else
               r_wbm_stb_o <= '1';
+              r_master_cyc_o <= '1';
               r_master_stb_o <= '1';
             end if;
             
-            r_adr_o <= std_logic_vector(r_wr_adr);
-            r_we_o  <= '1';
+            r_master_we_o  <= '1';
+            r_master_adr_o <= std_logic_vector(r_wr_adr);
+            r_master_dat_o <= rx_dat_i;
             
             if(r_rx_cyc_hdr.WR_FIFO = '0') then
               r_wr_adr <= r_wr_adr + 4;
@@ -255,7 +270,7 @@ begin
               if (r_rx_cyc_hdr.RD_CNT /= 0) then
                 r_state <= S_RD_ADR;
               else
-                r_wait_mux <= r_rx_cyc_hdr.DROP_CYC;  
+                r_master_cyc_o <= r_master_cyc_o and not r_rx_cyc_hdr.DROP_CYC;  
                 r_state <= S_CYC_HDR;
               end if;
             end if;
@@ -280,14 +295,15 @@ begin
             else
               r_wbm_stb_o <= '1';
               r_tag_dat_o <= c_tag_wbm_req;
+              r_master_cyc_o <= '1';
               r_master_stb_o <= '1';
             end if;
             
-            r_adr_o <= rx_dat_i;
-            r_we_o  <= '0';
+            r_master_we_o  <= '0';
+            r_master_adr_o <= rx_dat_i;
             
             if(r_rx_cyc_hdr.RD_CNT = 1) then
-              r_wait_mux <= r_rx_cyc_hdr.DROP_CYC;  
+              r_master_cyc_o <= r_master_cyc_o and not r_rx_cyc_hdr.DROP_CYC;  
               r_state <= S_CYC_HDR;
             end if;
             
diff --git a/hdl/eb_slave_core/eb_slave.vhd b/hdl/eb_slave_core/eb_slave.vhd
index 24ebfaa..3f444f6 100644
--- a/hdl/eb_slave_core/eb_slave.vhd
+++ b/hdl/eb_slave_core/eb_slave.vhd
@@ -52,11 +52,8 @@ end eb_slave;
 architecture rtl of eb_slave is
   signal rstn_i : std_logic;
   
-  signal mux_empty      : std_logic;
   signal errreg         : std_logic_vector(63 downto 0);
-  signal wbm_busy       : std_logic;
   signal rx_stall       : std_logic;
-  signal tx_cyc         : std_logic;
   
   signal fsm_tag_stb    : std_logic;
   signal fsm_tag_dat    : std_logic_vector(1 downto 0);
@@ -71,6 +68,7 @@ architecture rtl of eb_slave is
   signal fsm_wbm_stb    : std_logic;
   signal fsm_wbm_we     : std_logic;
   signal wbm_fsm_full   : std_logic;
+  signal wbm_fsm_busy   : std_logic;
   
   signal mux_tag_pop    : std_logic;
   signal tag_mux_dat    : std_logic_vector(1 downto 0);
@@ -104,8 +102,6 @@ begin
       rx_stb_i    => EB_RX_i.stb,
       rx_dat_i    => EB_RX_i.dat,
       rx_stall_o  => rx_stall,
-      tx_cyc_o    => tx_cyc,
-      mux_empty_i => mux_empty,
       tag_stb_o   => fsm_tag_stb,
       tag_dat_o   => fsm_tag_dat,
       tag_full_i  => tag_fsm_full,
@@ -119,10 +115,10 @@ begin
       wbm_stb_o   => fsm_wbm_stb,
       wbm_we_o    => fsm_wbm_we,
       wbm_full_i  => wbm_fsm_full,
+      wbm_busy_i  => wbm_fsm_busy,
       master_o    => WB_master_o,
       master_stall_i => WB_master_i.stall);
 
-  EB_TX_o.cyc <= tx_cyc;
   EB_TX_o.we  <= '1';
   EB_TX_o.sel <= (others => '1');
   EB_TX_o.adr <= (others => '0');
@@ -143,6 +139,7 @@ begin
       wbm_pop_o    => mux_wbm_pop,
       wbm_dat_i    => wbm_mux_dat,
       wbm_empty_i  => wbm_mux_empty,
+      tx_cyc_o     => EB_TX_o.cyc,
       tx_stb_o     => EB_TX_o.stb,
       tx_dat_o     => EB_TX_o.dat,
       tx_stall_i   => EB_TX_i.stall);
@@ -194,20 +191,13 @@ begin
       clk_i       => clk_i,
       rstn_i      => rstn_i,
       errreg_o    => errreg,
-      busy_o      => wbm_busy,
       wb_i        => WB_master_i,
       fsm_stb_i   => fsm_wbm_stb,
       fsm_we_i    => fsm_wbm_we,
       fsm_full_o  => wbm_fsm_full,
+      fsm_busy_o  => wbm_fsm_busy,
       mux_pop_i   => mux_wbm_pop,
       mux_dat_o   => wbm_mux_dat,
       mux_empty_o => wbm_mux_empty);
 
-  mux_empty <= 
-    not wbm_busy   and 
---    wbm_mux_empty  and -- redundant
---    cfg_mux_empty  and 
---    pass_mux_empty and
-    tag_mux_empty;
-
 end rtl;
diff --git a/hdl/eb_slave_core/eb_slave_core.vhd b/hdl/eb_slave_core/eb_slave_core.vhd
index d89d721..4ff4628 100644
--- a/hdl/eb_slave_core/eb_slave_core.vhd
+++ b/hdl/eb_slave_core/eb_slave_core.vhd
@@ -186,7 +186,7 @@ begin
       my_ip_i   => CFG_MY_IP,
       my_port_i => CFG_MY_PORT,
       my_vlan_i => (others => '0'),
-      silent_i  => EB_2_TXCTRL_wb_master.cyc,
+      silent_i  => '0',
       valid_i   => RXCTRL_2_TXCTRL_valid
 
       );
diff --git a/hdl/eb_slave_core/eb_tag_fifo.vhd b/hdl/eb_slave_core/eb_tag_fifo.vhd
index a0fe4e6..7d26993 100644
--- a/hdl/eb_slave_core/eb_tag_fifo.vhd
+++ b/hdl/eb_slave_core/eb_tag_fifo.vhd
@@ -46,7 +46,7 @@ begin
 
   fifo : eb_fifo
     generic map(
-      g_width => 2,
+      g_width => t_tag'length,
       g_size  => 1024) -- 4* the length of the other FIFOs
     port map(
       clk_i     => clk_i,
diff --git a/hdl/eb_slave_core/eb_tx_mux.vhd b/hdl/eb_slave_core/eb_tx_mux.vhd
index 1eb1b47..37e5c19 100644
--- a/hdl/eb_slave_core/eb_tx_mux.vhd
+++ b/hdl/eb_slave_core/eb_tx_mux.vhd
@@ -48,6 +48,7 @@ entity eb_tx_mux is
     wbm_dat_i    : in  t_wishbone_data;
     wbm_empty_i  : in  std_logic;
     
+    tx_cyc_o     : out std_logic;
     tx_stb_o     : out std_logic;
     tx_dat_o     : out t_wishbone_data;
     tx_stall_i   : in  std_logic);
@@ -55,6 +56,7 @@ end eb_tx_mux;
 
 architecture rtl of eb_tx_mux is
 
+  signal r_tx_cyc    : std_logic;
   signal r_tx_stb    : std_logic;
   signal s_can_tx    : std_logic;
   signal s_dat_empty : std_logic;
@@ -73,15 +75,21 @@ begin
   -- We can write whenever TX is unstalled and/or not full
   s_can_tx <= not r_tx_stb or not tx_stall_i;
   
+  tx_cyc_o <= r_tx_cyc;
   tx_stb_o <= r_tx_stb;
   tx_out : process(rstn_i, clk_i) is
   begin
     if rstn_i = '0' then
+      r_tx_cyc <= '0';
       r_tx_stb <= '0';
       tx_dat_o <= (others => '0');
     elsif rising_edge(clk_i) then
       -- Can we push the data?
       if s_can_tx = '1' then
+        if r_tag_valid = '1' then
+          r_tx_cyc <= f_active_high(r_tag_value /= c_tag_drop_tx);
+        end if;
+        
         r_tx_stb <= not s_dat_empty and r_tag_valid;
         tx_dat_o <= s_dat_value;
       end if;
@@ -96,15 +104,17 @@ begin
   
   with r_tag_value select
   s_dat_empty <= 
-    cfg_empty_i  when c_tag_cfg_req,
     pass_empty_i when c_tag_pass_on,
-    wbm_empty_i  when others;
+    cfg_empty_i  when c_tag_cfg_req,
+    wbm_empty_i  when c_tag_wbm_req,
+    '0'          when others;
 
   with r_tag_value select
   s_dat_value <= 
-    cfg_dat_i  when c_tag_cfg_req,
-    pass_dat_i when c_tag_pass_on,
-    wbm_dat_i  when others;
+    pass_dat_i      when c_tag_pass_on,
+    cfg_dat_i       when c_tag_cfg_req,
+    wbm_dat_i       when c_tag_wbm_req,
+    (others => '-') when others;
     
   -- Pop the tag FIFO if the register is empty/emptied
   tag_pop_o <= not tag_empty_i and (s_tag_pop or not r_tag_valid);
diff --git a/hdl/eb_slave_core/eb_wbm_fifo.vhd b/hdl/eb_slave_core/eb_wbm_fifo.vhd
index d899423..2687507 100644
--- a/hdl/eb_slave_core/eb_wbm_fifo.vhd
+++ b/hdl/eb_slave_core/eb_wbm_fifo.vhd
@@ -33,12 +33,11 @@ entity eb_wbm_fifo is
     rstn_i      : in  std_logic;
     
     errreg_o    : out std_logic_vector(63 downto 0);
-    busy_o      : out std_logic;
-    
     wb_i        : in  t_wishbone_master_in;
     
     fsm_stb_i   : in  std_logic;
     fsm_we_i    : in  std_logic;
+    fsm_busy_o  : out std_logic;
     fsm_full_o  : out std_logic;
 
     mux_pop_i   : in  std_logic;
@@ -54,6 +53,7 @@ architecture rtl of eb_wbm_fifo is
   signal r_timeout     : unsigned(20 downto 0);
   signal r_kill_ack    : std_logic;
   signal r_inflight    : unsigned(c_depth-1 downto 0);
+  signal r_queued      : unsigned(c_depth-1 downto 0);
   signal r_full        : std_logic;
   signal r_errreg      : std_logic_vector(63 downto 0);
   signal s_wb_i_rdy    : std_logic;
@@ -77,7 +77,7 @@ begin
     if rstn_i = '0' then
       r_full <= '0';
     elsif rising_edge(clk_i) then
-      if r_inflight < c_size-2 then
+      if r_queued < c_size-2 then
         r_full <= '0';
       else
         r_full <= '1';
@@ -89,25 +89,37 @@ begin
   begin
     if rstn_i = '0' then
       r_inflight <= (others => '0');
-      busy_o     <= '0';
+      r_queued   <= (others => '0');
+      fsm_busy_o <= '0';
     elsif rising_edge(clk_i) then
       if fsm_stb_i = '1' then
-        busy_o <= '1';
-        if s_fifo_pop = '1' then
+        fsm_busy_o <= '1';
+        
+        if s_wb_i_rdy = '1' then
           r_inflight <= r_inflight;
         else
           r_inflight <= r_inflight + 1;
         end if;
+        if s_fifo_pop = '1' then
+          r_queued <= r_queued;
+        else
+          r_queued <= r_queued + 1;
+        end if;
       else
         if r_inflight = 0 then
-          busy_o <= '0';
+          fsm_busy_o <= '0';
         end if;
         
-        if s_fifo_pop = '1' then
+        if s_wb_i_rdy = '1' then
           r_inflight <= r_inflight - 1;
         else
           r_inflight <= r_inflight;
         end if;
+        if s_fifo_pop = '1' then
+          r_queued <= r_queued - 1;
+        else
+          r_queued <= r_queued;
+        end if;
       end if;
     end if;
   end process;
-- 
GitLab