From c7e859e9b3243033a707fafedce206d17e52d2b3 Mon Sep 17 00:00:00 2001
From: Grzegorz Daniluk <grzegorz.daniluk@cern.ch>
Date: Mon, 1 Aug 2016 11:06:52 +0200
Subject: [PATCH] wrsw_nic: Random Early Detection for b/w throttling

---
 modules/wrsw_nic/nic_bw_throttling.vhd  | 213 +++++++++++++++---------
 modules/wrsw_nic/nic_rx_fsm.vhd         |  23 ++-
 modules/wrsw_nic/nic_wbgen2_pkg.vhd     |  20 ++-
 modules/wrsw_nic/nic_wishbone_slave.vhd |  29 +++-
 modules/wrsw_nic/wr_nic.wb              |  16 +-
 5 files changed, 195 insertions(+), 106 deletions(-)

diff --git a/modules/wrsw_nic/nic_bw_throttling.vhd b/modules/wrsw_nic/nic_bw_throttling.vhd
index 3ca233ee..22c3da7a 100644
--- a/modules/wrsw_nic/nic_bw_throttling.vhd
+++ b/modules/wrsw_nic/nic_bw_throttling.vhd
@@ -1,13 +1,49 @@
+-------------------------------------------------------------------------------
+-- Title      : Rx bandwidth throttling
+-- Project    : WhiteRabbit Switch
+-------------------------------------------------------------------------------
+-- File       : nic_bw_throttling.vhd
+-- Author     : Grzegorz Daniluk
+-- Company    : CERN BE-Co-HT
+-- Created    : 2016-07-28
+-- Platform   : FPGA-generic
+-- Standard   : VHDL
+-------------------------------------------------------------------------------
+-- Description:
+-- Module implementing Random Early Detection algorithm for throttling the
+-- bandwidth of RX traffic on NIC.
+-------------------------------------------------------------------------------
+--
+-- Copyright (c) 2016 CERN / BE-CO-HT
+--
+-- This source file is free software; you can redistribute it
+-- and/or modify it under the terms of the GNU Lesser General
+-- Public License as published by the Free Software Foundation;
+-- either version 2.1 of the License, or (at your option) any
+-- later version.
+--
+-- This source is distributed in the hope that it will be
+-- useful, but WITHOUT ANY WARRANTY; without even the implied
+-- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+-- PURPOSE.  See the GNU Lesser General Public License for more
+-- details.
+--
+-- You should have received a copy of the GNU Lesser General
+-- Public License along with this source; if not, download it
+-- from http://www.gnu.org/licenses/lgpl-2.1.html
+--
+-------------------------------------------------------------------------------
+-- Revisions  :
+-- Date        Version  Author          Description
+-- 2016-08-01  1.0      greg.d          Created
+-------------------------------------------------------------------------------
 library IEEE;
 use ieee.std_logic_1164.all;
 use ieee.numeric_std.all;
 
 use work.wr_fabric_pkg.all;
---use work.gencores_pkg.all;
 
 entity nic_bw_throttling is
-  generic (
-    g_true_random : boolean := false);
   port (
     clk_sys_i   : in  std_logic;
     rst_n_i     : in  std_logic;
@@ -20,88 +56,99 @@ entity nic_bw_throttling is
     src_o   : out t_wrf_source_out;
     src_i   : in  t_wrf_source_in;
 
-    bw_o    : out std_logic_vector(31 downto 0);
-    rnd_o   : out std_logic_vector(31 downto 0));
+    new_limit_i  : in std_logic;
+    bwmax_kbps_i : in  unsigned(15 downto 0);
+    bw_bps_o     : out std_logic_vector(31 downto 0));
 end nic_bw_throttling;
 
 architecture behav of nic_bw_throttling is
 
-  signal bw_cnt : unsigned(31 downto 0);
-  signal bw_reg : unsigned(31 downto 0);
-  signal is_data : std_logic;
+  signal bw_bps_cnt : unsigned(31 downto 0);
+  signal is_data    : std_logic;
+  signal src_out    : t_wrf_source_out;
 
   signal drop_frame : std_logic;
   type t_fwd_fsm is (WAIT_FRAME, FLUSH, PASS, DROP);
   signal state_fwd : t_fwd_fsm;
-  signal wrf_reg : t_wrf_sink_in;
-
-  signal ring_out : std_logic_vector(31 downto 0);
-  signal rnd_reg  : std_logic_vector(31 downto 0);
-  --attribute keep : string;
-  --attribute keep of ring_out : signal is "true";
-  --attribute keep_hierarchy : string;
-  --attribute keep_hierarchy of behav : architecture is "true";
-  attribute S : string;
-  attribute S of ring_out : signal is "true";
-
-  constant c_LFSR_START : std_logic_vector := x"A5A5";
+  signal wrf_reg   : t_wrf_sink_in;
+
+  signal rnd_reg  : unsigned(7 downto 0);
+
+  constant c_LFSR_START   : unsigned(7 downto 0) := x"A5";
+  constant c_DROP_STEP    : unsigned(7 downto 0) := x"20"; --32
+  constant c_DROP_THR_MAX : unsigned(8 downto 0) := to_unsigned(256, 9);
+  signal drop_thr   : unsigned(8 downto 0); -- 1 more bit than rnd_reg
+  -- so that we can have drop_thr larger than any random number and drop the
+  -- whole traffic.
+  signal bwmin_kbps    : unsigned(15 downto 0);
+  signal bwcur_kbps    : unsigned(31 downto 0);
+  signal last_thr_kbps : unsigned(31 downto 0);
+  signal thr_step_kbps : unsigned(15 downto 0);
+
 begin
 
   -------------------------------------------------
-  --          Random number generation           --
+  --      Pseudo-random number generation        --
+  --   based on LSFR x^8 + x^6 + x^5 + x^4 + 1   --
   -------------------------------------------------
-  GEN_RND: if g_true_random generate
-    -- based on Generalized Ring Oscillator
-    ring_out(0) <= ring_out(31) xnor ring_out(0) xnor ring_out(1);
-    GEN_RND: for I in 1 to 30 generate
-      ring_out(I) <= ring_out(I-1) xor ring_out(I) xor ring_out(I+1);
-    end generate;
-    ring_out(31) <= ring_out(30) xor ring_out(31) xor ring_out(0);
-
-    --GEN_ANTI_META: for J in 0 to 31 generate
-    --  SYNC_FFS: gc_sync_ffs
-    --    port map (
-    --      clk_i    => clk_sys_i,
-    --      rst_n_i  => rst_n_i,
-    --      data_i   => ring_out(J),
-    --      synced_o => rnd_reg(J));
-    --end generate;
-    process(clk_sys_i)
-    begin
-      if rising_edge(clk_sys_i) then
-        if rst_n_i = '0' then
-          rnd_reg <= (others=>'0');
-        else
-          rnd_reg <= ring_out;
-        end if;
+  process(clk_sys_i)
+  begin
+    if rising_edge(clk_sys_i) then
+      if rst_n_i = '0' then
+        rnd_reg(7 downto 0) <= c_LFSR_START;
+      else
+        rnd_reg(0) <= rnd_reg(7) xor rnd_reg(5) xor rnd_reg(4) xor rnd_reg(3);
+        rnd_reg(7 downto 1) <= rnd_reg(6 downto 0);
       end if;
-    end process;
-  end generate;
-
-  GEN_PSEUDO_RND: if not g_true_random generate
-    -- based on LSFR x^16 + x^15 + x^13 + x^4 + 1
-    process(clk_sys_i)
-    begin
-      if rising_edge(clk_sys_i) then
-        if rst_n_i = '0' then
-          rnd_reg(31 downto 0) <= (others=>'0');
-          rnd_reg(15 downto 0) <= c_LFSR_START;
-        else
-          rnd_reg(0) <= rnd_reg(15) xor rnd_reg(14) xor rnd_reg(12) xor rnd_reg(3);
-          rnd_reg(15 downto 1) <= rnd_reg(14 downto 0);
+    end if;
+  end process;
+
+
+  -------------------------------------------------
+  -- Monitoring b/w and generating drop decisions--
+  -------------------------------------------------
+  drop_frame <= '1' when (rnd_reg < drop_thr) else
+                '0';
+
+  -- set min b/w from which we start the throttling
+  -- set it to half of the required max b/w
+  bwmin_kbps <= shift_right(bwmax_kbps_i, 1);
+
+  -- convert current b/w to KBps
+  -- it's bw_bps_cnt divided by 1024 (2^10)
+  bwcur_kbps <= shift_right(bw_bps_cnt, 10);
+  
+  process(clk_sys_i)
+  begin
+    if rising_edge(clk_sys_i) then
+      if rst_n_i = '0' or new_limit_i = '1' then
+        drop_thr      <= (others=>'0');
+        last_thr_kbps <= x"0000" & bwmin_kbps;
+        thr_step_kbps <= shift_right(bwmax_kbps_i - bwmin_kbps, 3);
+      -- both max and min b/w we divide by 8 (because we want 8 steps like with
+      -- c_DROP_STEP = 64 for range 0-255)
+      else
+        if (bwcur_kbps > last_thr_kbps and drop_thr < c_DROP_THR_MAX) then
+        -- current b/w is larger than the last crossed threshold
+        -- we increase the probability of drop
+          drop_thr      <= drop_thr + c_DROP_STEP;
+          last_thr_kbps <= last_thr_kbps + thr_step_kbps;
+
+        elsif (bwcur_kbps + thr_step_kbps < last_thr_kbps and drop_thr > 0) then
+        -- current b/w has dropped below the last crossed threshold,
+        -- we decrease the probability of drop
+          drop_thr      <= drop_thr - c_DROP_STEP;
+          last_thr_kbps <= last_thr_kbps - thr_step_kbps;
         end if;
-      end if;
-    end process;
 
-  end generate;
+      end if;
+    end if;
+  end process;
 
-  rnd_o <= rnd_reg;
 
   -------------------------------------------------
   --        Forwarding or dropping frames        --
   -------------------------------------------------
-  drop_frame <= '0';
-
   process(clk_sys_i)
   begin
     if rising_edge(clk_sys_i) then
@@ -109,15 +156,15 @@ begin
         state_fwd <= WAIT_FRAME;
         wrf_reg <= c_dummy_snk_in;
 
-        snk_o <= c_dummy_src_in;
-        src_o <= c_dummy_snk_in;
+        snk_o   <= c_dummy_src_in;
+        src_out <= c_dummy_snk_in;
       else
         case state_fwd is
           when WAIT_FRAME =>
             snk_o.ack   <= '0';
             snk_o.err   <= '0';
             snk_o.rty   <= '0';
-            src_o       <= c_dummy_snk_in;
+            src_out     <= c_dummy_snk_in;
             if (snk_i.cyc='1' and snk_i.stb='1') then
               -- new frame is transmitted
               snk_o.stall <= '1';
@@ -136,14 +183,17 @@ begin
             -- flush wrf_reg stored on stall or in WAIT_FRAME
             snk_o <= src_i;
             if (src_i.stall = '0') then
-              src_o     <= wrf_reg;
+              src_out   <= wrf_reg;
               state_fwd <= PASS;
             end if;
 
           when PASS =>
             snk_o <= src_i;
             if (src_i.stall = '0') then
-              src_o <= snk_i;
+              src_out <= snk_i;
+              if (snk_i.cyc='0' and snk_i.stb='0') then
+                state_fwd <= WAIT_FRAME;
+              end if;
             else
               wrf_reg   <= snk_i;
               state_fwd <= FLUSH;
@@ -154,7 +204,7 @@ begin
             snk_o.stall <= '0';
             snk_o.err   <= '0';
             snk_o.rty   <= '0';
-            src_o       <= c_dummy_snk_in;
+            src_out     <= c_dummy_snk_in;
             if (snk_i.stb='1') then
               snk_o.ack <= '1';
             else
@@ -169,36 +219,35 @@ begin
     end if;
   end process;
 
+  src_o <= src_out;
 
   -------------------------------------------------
   -- Calculating bandwidth actually going to ARM --
   -------------------------------------------------
 
-  is_data <= '1' when (snk_i.adr=c_WRF_DATA and snk_i.cyc='1' and snk_i.stb='1') else
+  is_data <= '1' when (src_out.adr=c_WRF_DATA and src_out.cyc='1' and src_out.stb='1' and src_i.stall='0') else
              '0';
 
   process(clk_sys_i)
   begin
     if rising_edge(clk_sys_i) then
       if rst_n_i = '0' or pps_valid_i = '0' then
-        bw_cnt <= (others=>'0');
-        bw_reg <= (others=>'0');
+        bw_bps_cnt   <= (others=>'0');
+        bw_bps_o <= (others=>'0');
       elsif pps_p_i = '1' then
-        bw_reg <= bw_cnt;
-        bw_cnt <= (others=>'0');
+        bw_bps_cnt   <= (others=>'0');
+        bw_bps_o <= std_logic_vector(bw_bps_cnt);
       elsif is_data = '1' then
         -- we count incoming bytes here
-        if snk_i.sel(0) = '1' then
+        if src_out.sel(0) = '1' then
           -- 16bits carry valid data
-          bw_cnt <= bw_cnt + 2;
-        elsif snk_i.sel(0) = '0' then
+          bw_bps_cnt <= bw_bps_cnt + 2;
+        elsif src_out.sel(0) = '0' then
           -- only 8bits carry valid data
-          bw_cnt <= bw_cnt + 1;
+          bw_bps_cnt <= bw_bps_cnt + 1;
         end if;
       end if;
     end if;
   end process;
   
-  bw_o <= std_logic_vector(bw_reg);
-
 end behav;
diff --git a/modules/wrsw_nic/nic_rx_fsm.vhd b/modules/wrsw_nic/nic_rx_fsm.vhd
index a8bc4667..111ca235 100644
--- a/modules/wrsw_nic/nic_rx_fsm.vhd
+++ b/modules/wrsw_nic/nic_rx_fsm.vhd
@@ -133,8 +133,9 @@ architecture behavioral of NIC_RX_FSM is
       snk_o   : out t_wrf_sink_out;
       src_o   : out t_wrf_source_out;
       src_i   : in  t_wrf_source_in;
-      bw_o    : out std_logic_vector(31 downto 0);
-      rnd_o   : out std_logic_vector(31 downto 0));
+      new_limit_i  : in  std_logic;
+      bwmax_kbps_i : in  unsigned(15 downto 0);
+      bw_bps_o     : out std_logic_vector(31 downto 0));
   end component;
 
 
@@ -166,6 +167,7 @@ architecture behavioral of NIC_RX_FSM is
 
   signal bw_src_out : t_wrf_source_out;
   signal bw_src_in  : t_wrf_source_in;
+  signal max_bw_reg : std_logic_vector(15 downto 0);
   
 begin
 
@@ -179,8 +181,21 @@ begin
       snk_o       => snk_o,
       src_o       => bw_src_out,
       src_i       => bw_src_in,
-      bw_o        => regs_o.bw_i,
-      rnd_o       => regs_o.rnd_i);
+      new_limit_i  => regs_i.maxrxbw_load_o,
+      bwmax_kbps_i => unsigned(regs_i.maxrxbw_o),
+      bw_bps_o     => regs_o.rxbw_i);
+
+  process(clk_sys_i)
+  begin
+    if rising_edge(clk_sys_i) then
+      if rst_n_i = '0' then
+        max_bw_reg <= (others=>'0');
+      elsif regs_i.maxrxbw_load_o = '1' then
+        max_bw_reg <= regs_i.maxrxbw_o;
+      end if;
+    end if;
+  end process;
+  regs_o.maxrxbw_i <= max_bw_reg;
 
   U_Buffer : nic_elastic_buffer
     generic map (
diff --git a/modules/wrsw_nic/nic_wbgen2_pkg.vhd b/modules/wrsw_nic/nic_wbgen2_pkg.vhd
index f4ab2e05..1fddc14d 100644
--- a/modules/wrsw_nic/nic_wbgen2_pkg.vhd
+++ b/modules/wrsw_nic/nic_wbgen2_pkg.vhd
@@ -3,7 +3,7 @@
 ---------------------------------------------------------------------------------------
 -- File           : nic_wbgen2_pkg.vhd
 -- Author         : auto-generated by wbgen2 from wr_nic.wb
--- Created        : Thu Jul 28 10:18:55 2016
+-- Created        : Mon Aug  1 16:03:57 2016
 -- Standard       : VHDL'87
 ---------------------------------------------------------------------------------------
 -- THIS FILE WAS GENERATED BY wbgen2 FROM SOURCE FILE wr_nic.wb
@@ -27,8 +27,8 @@ package nic_wbgen2_pkg is
     sr_tx_error_i                            : std_logic;
     sr_cur_tx_desc_i                         : std_logic_vector(2 downto 0);
     sr_cur_rx_desc_i                         : std_logic_vector(2 downto 0);
-    bw_i                                     : std_logic_vector(31 downto 0);
-    rnd_i                                    : std_logic_vector(31 downto 0);
+    rxbw_i                                   : std_logic_vector(31 downto 0);
+    maxrxbw_i                                : std_logic_vector(15 downto 0);
     end record;
   
   constant c_nic_in_registers_init_value: t_nic_in_registers := (
@@ -38,8 +38,8 @@ package nic_wbgen2_pkg is
     sr_tx_error_i => '0',
     sr_cur_tx_desc_i => (others => '0'),
     sr_cur_rx_desc_i => (others => '0'),
-    bw_i => (others => '0'),
-    rnd_i => (others => '0')
+    rxbw_i => (others => '0'),
+    maxrxbw_i => (others => '0')
     );
     
     -- Output registers (WB slave -> user design)
@@ -54,6 +54,8 @@ package nic_wbgen2_pkg is
       sr_tx_done_load_o                        : std_logic;
       sr_tx_error_o                            : std_logic;
       sr_tx_error_load_o                       : std_logic;
+      maxrxbw_o                                : std_logic_vector(15 downto 0);
+      maxrxbw_load_o                           : std_logic;
       end record;
     
     constant c_nic_out_registers_init_value: t_nic_out_registers := (
@@ -65,7 +67,9 @@ package nic_wbgen2_pkg is
       sr_tx_done_o => '0',
       sr_tx_done_load_o => '0',
       sr_tx_error_o => '0',
-      sr_tx_error_load_o => '0'
+      sr_tx_error_load_o => '0',
+      maxrxbw_o => (others => '0'),
+      maxrxbw_load_o => '0'
       );
     function "or" (left, right: t_nic_in_registers) return t_nic_in_registers;
     function f_x_to_zero (x:std_logic) return std_logic;
@@ -102,8 +106,8 @@ tmp.sr_tx_done_i := f_x_to_zero(left.sr_tx_done_i) or f_x_to_zero(right.sr_tx_do
 tmp.sr_tx_error_i := f_x_to_zero(left.sr_tx_error_i) or f_x_to_zero(right.sr_tx_error_i);
 tmp.sr_cur_tx_desc_i := f_x_to_zero(left.sr_cur_tx_desc_i) or f_x_to_zero(right.sr_cur_tx_desc_i);
 tmp.sr_cur_rx_desc_i := f_x_to_zero(left.sr_cur_rx_desc_i) or f_x_to_zero(right.sr_cur_rx_desc_i);
-tmp.bw_i := f_x_to_zero(left.bw_i) or f_x_to_zero(right.bw_i);
-tmp.rnd_i := f_x_to_zero(left.rnd_i) or f_x_to_zero(right.rnd_i);
+tmp.rxbw_i := f_x_to_zero(left.rxbw_i) or f_x_to_zero(right.rxbw_i);
+tmp.maxrxbw_i := f_x_to_zero(left.maxrxbw_i) or f_x_to_zero(right.maxrxbw_i);
 return tmp;
 end function;
 end package body;
diff --git a/modules/wrsw_nic/nic_wishbone_slave.vhd b/modules/wrsw_nic/nic_wishbone_slave.vhd
index 1702ae62..9de19b32 100644
--- a/modules/wrsw_nic/nic_wishbone_slave.vhd
+++ b/modules/wrsw_nic/nic_wishbone_slave.vhd
@@ -3,7 +3,7 @@
 ---------------------------------------------------------------------------------------
 -- File           : nic_wishbone_slave.vhd
 -- Author         : auto-generated by wbgen2 from wr_nic.wb
--- Created        : Thu Jul 28 10:18:55 2016
+-- Created        : Mon Aug  1 16:03:57 2016
 -- Standard       : VHDL'87
 ---------------------------------------------------------------------------------------
 -- THIS FILE WAS GENERATED BY wbgen2 FROM SOURCE FILE wr_nic.wb
@@ -120,6 +120,7 @@ begin
       regs_o.sr_rec_load_o <= '0';
       regs_o.sr_tx_done_load_o <= '0';
       regs_o.sr_tx_error_load_o <= '0';
+      regs_o.maxrxbw_load_o <= '0';
       eic_idr_write_int <= '0';
       eic_ier_write_int <= '0';
       eic_isr_write_int <= '0';
@@ -133,6 +134,7 @@ begin
           regs_o.sr_rec_load_o <= '0';
           regs_o.sr_tx_done_load_o <= '0';
           regs_o.sr_tx_error_load_o <= '0';
+          regs_o.maxrxbw_load_o <= '0';
           eic_idr_write_int <= '0';
           eic_ier_write_int <= '0';
           eic_isr_write_int <= '0';
@@ -141,6 +143,7 @@ begin
           regs_o.sr_rec_load_o <= '0';
           regs_o.sr_tx_done_load_o <= '0';
           regs_o.sr_tx_error_load_o <= '0';
+          regs_o.maxrxbw_load_o <= '0';
         end if;
       else
         if ((wb_cyc_i = '1') and (wb_stb_i = '1')) then
@@ -226,13 +229,30 @@ begin
             when "0010" => 
               if (wb_we_i = '1') then
               end if;
-              rddata_reg(31 downto 0) <= regs_i.bw_i;
+              rddata_reg(31 downto 0) <= regs_i.rxbw_i;
               ack_sreg(0) <= '1';
               ack_in_progress <= '1';
             when "0011" => 
               if (wb_we_i = '1') then
+                regs_o.maxrxbw_load_o <= '1';
               end if;
-              rddata_reg(31 downto 0) <= regs_i.rnd_i;
+              rddata_reg(15 downto 0) <= regs_i.maxrxbw_i;
+              rddata_reg(16) <= 'X';
+              rddata_reg(17) <= 'X';
+              rddata_reg(18) <= 'X';
+              rddata_reg(19) <= 'X';
+              rddata_reg(20) <= 'X';
+              rddata_reg(21) <= 'X';
+              rddata_reg(22) <= 'X';
+              rddata_reg(23) <= 'X';
+              rddata_reg(24) <= 'X';
+              rddata_reg(25) <= 'X';
+              rddata_reg(26) <= 'X';
+              rddata_reg(27) <= 'X';
+              rddata_reg(28) <= 'X';
+              rddata_reg(29) <= 'X';
+              rddata_reg(30) <= 'X';
+              rddata_reg(31) <= 'X';
               ack_sreg(0) <= '1';
               ack_in_progress <= '1';
             when "1000" => 
@@ -473,7 +493,8 @@ begin
 -- Current TX descriptor
 -- Current RX descriptor
 -- Bytes-per-second
--- 32-bit random number for throttling
+-- KBytes-per-second
+  regs_o.maxrxbw_o <= wrdata_reg(15 downto 0);
 -- extra code for reg/fifo/mem: TX descriptors mem
 -- RAM block instantiation for memory: TX descriptors mem
   nic_dtx_raminst : wbgen2_dpssram
diff --git a/modules/wrsw_nic/wr_nic.wb b/modules/wrsw_nic/wr_nic.wb
index e50197ab..8791d441 100644
--- a/modules/wrsw_nic/wr_nic.wb
+++ b/modules/wrsw_nic/wr_nic.wb
@@ -153,8 +153,8 @@ top = peripheral {
    };
 
    reg {
-      name = "NIC Bandwidth Register";
-      prefix = "BW";
+      name = "NIC Current Rx Bandwidth Register";
+      prefix = "RXBW";
       field {
         name = "Bytes-per-second";
         type = SLV;
@@ -166,14 +166,14 @@ top = peripheral {
    };
 
    reg {
-      name = "NIC Random Register";
-      prefix = "RND";
+      name = "NIC Max Rx Bandwidth Register";
+      prefix = "MAXRXBW";
       field {
-        name = "32-bit random number for throttling";
+        name = "KBytes-per-second";
         type = SLV;
-        size = 32;
-        access_bus = READ_ONLY;
-        access_dev = WRITE_ONLY;
+        size = 16;
+        access_bus = READ_WRITE;
+        access_dev = READ_WRITE;
         load = LOAD_EXT;
       };
    };
-- 
GitLab