From c7e859e9b3243033a707fafedce206d17e52d2b3 Mon Sep 17 00:00:00 2001 From: Grzegorz Daniluk <grzegorz.daniluk@cern.ch> Date: Mon, 1 Aug 2016 11:06:52 +0200 Subject: [PATCH] wrsw_nic: Random Early Detection for b/w throttling --- modules/wrsw_nic/nic_bw_throttling.vhd | 213 +++++++++++++++--------- modules/wrsw_nic/nic_rx_fsm.vhd | 23 ++- modules/wrsw_nic/nic_wbgen2_pkg.vhd | 20 ++- modules/wrsw_nic/nic_wishbone_slave.vhd | 29 +++- modules/wrsw_nic/wr_nic.wb | 16 +- 5 files changed, 195 insertions(+), 106 deletions(-) diff --git a/modules/wrsw_nic/nic_bw_throttling.vhd b/modules/wrsw_nic/nic_bw_throttling.vhd index 3ca233ee..22c3da7a 100644 --- a/modules/wrsw_nic/nic_bw_throttling.vhd +++ b/modules/wrsw_nic/nic_bw_throttling.vhd @@ -1,13 +1,49 @@ +------------------------------------------------------------------------------- +-- Title : Rx bandwidth throttling +-- Project : WhiteRabbit Switch +------------------------------------------------------------------------------- +-- File : nic_bw_throttling.vhd +-- Author : Grzegorz Daniluk +-- Company : CERN BE-Co-HT +-- Created : 2016-07-28 +-- Platform : FPGA-generic +-- Standard : VHDL +------------------------------------------------------------------------------- +-- Description: +-- Module implementing Random Early Detection algorithm for throttling the +-- bandwidth of RX traffic on NIC. +------------------------------------------------------------------------------- +-- +-- Copyright (c) 2016 CERN / BE-CO-HT +-- +-- This source file is free software; you can redistribute it +-- and/or modify it under the terms of the GNU Lesser General +-- Public License as published by the Free Software Foundation; +-- either version 2.1 of the License, or (at your option) any +-- later version. +-- +-- This source is distributed in the hope that it will be +-- useful, but WITHOUT ANY WARRANTY; without even the implied +-- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +-- PURPOSE. See the GNU Lesser General Public License for more +-- details. +-- +-- You should have received a copy of the GNU Lesser General +-- Public License along with this source; if not, download it +-- from http://www.gnu.org/licenses/lgpl-2.1.html +-- +------------------------------------------------------------------------------- +-- Revisions : +-- Date Version Author Description +-- 2016-08-01 1.0 greg.d Created +------------------------------------------------------------------------------- library IEEE; use ieee.std_logic_1164.all; use ieee.numeric_std.all; use work.wr_fabric_pkg.all; ---use work.gencores_pkg.all; entity nic_bw_throttling is - generic ( - g_true_random : boolean := false); port ( clk_sys_i : in std_logic; rst_n_i : in std_logic; @@ -20,88 +56,99 @@ entity nic_bw_throttling is src_o : out t_wrf_source_out; src_i : in t_wrf_source_in; - bw_o : out std_logic_vector(31 downto 0); - rnd_o : out std_logic_vector(31 downto 0)); + new_limit_i : in std_logic; + bwmax_kbps_i : in unsigned(15 downto 0); + bw_bps_o : out std_logic_vector(31 downto 0)); end nic_bw_throttling; architecture behav of nic_bw_throttling is - signal bw_cnt : unsigned(31 downto 0); - signal bw_reg : unsigned(31 downto 0); - signal is_data : std_logic; + signal bw_bps_cnt : unsigned(31 downto 0); + signal is_data : std_logic; + signal src_out : t_wrf_source_out; signal drop_frame : std_logic; type t_fwd_fsm is (WAIT_FRAME, FLUSH, PASS, DROP); signal state_fwd : t_fwd_fsm; - signal wrf_reg : t_wrf_sink_in; - - signal ring_out : std_logic_vector(31 downto 0); - signal rnd_reg : std_logic_vector(31 downto 0); - --attribute keep : string; - --attribute keep of ring_out : signal is "true"; - --attribute keep_hierarchy : string; - --attribute keep_hierarchy of behav : architecture is "true"; - attribute S : string; - attribute S of ring_out : signal is "true"; - - constant c_LFSR_START : std_logic_vector := x"A5A5"; + signal wrf_reg : t_wrf_sink_in; + + signal rnd_reg : unsigned(7 downto 0); + + constant c_LFSR_START : unsigned(7 downto 0) := x"A5"; + constant c_DROP_STEP : unsigned(7 downto 0) := x"20"; --32 + constant c_DROP_THR_MAX : unsigned(8 downto 0) := to_unsigned(256, 9); + signal drop_thr : unsigned(8 downto 0); -- 1 more bit than rnd_reg + -- so that we can have drop_thr larger than any random number and drop the + -- whole traffic. + signal bwmin_kbps : unsigned(15 downto 0); + signal bwcur_kbps : unsigned(31 downto 0); + signal last_thr_kbps : unsigned(31 downto 0); + signal thr_step_kbps : unsigned(15 downto 0); + begin ------------------------------------------------- - -- Random number generation -- + -- Pseudo-random number generation -- + -- based on LSFR x^8 + x^6 + x^5 + x^4 + 1 -- ------------------------------------------------- - GEN_RND: if g_true_random generate - -- based on Generalized Ring Oscillator - ring_out(0) <= ring_out(31) xnor ring_out(0) xnor ring_out(1); - GEN_RND: for I in 1 to 30 generate - ring_out(I) <= ring_out(I-1) xor ring_out(I) xor ring_out(I+1); - end generate; - ring_out(31) <= ring_out(30) xor ring_out(31) xor ring_out(0); - - --GEN_ANTI_META: for J in 0 to 31 generate - -- SYNC_FFS: gc_sync_ffs - -- port map ( - -- clk_i => clk_sys_i, - -- rst_n_i => rst_n_i, - -- data_i => ring_out(J), - -- synced_o => rnd_reg(J)); - --end generate; - process(clk_sys_i) - begin - if rising_edge(clk_sys_i) then - if rst_n_i = '0' then - rnd_reg <= (others=>'0'); - else - rnd_reg <= ring_out; - end if; + process(clk_sys_i) + begin + if rising_edge(clk_sys_i) then + if rst_n_i = '0' then + rnd_reg(7 downto 0) <= c_LFSR_START; + else + rnd_reg(0) <= rnd_reg(7) xor rnd_reg(5) xor rnd_reg(4) xor rnd_reg(3); + rnd_reg(7 downto 1) <= rnd_reg(6 downto 0); end if; - end process; - end generate; - - GEN_PSEUDO_RND: if not g_true_random generate - -- based on LSFR x^16 + x^15 + x^13 + x^4 + 1 - process(clk_sys_i) - begin - if rising_edge(clk_sys_i) then - if rst_n_i = '0' then - rnd_reg(31 downto 0) <= (others=>'0'); - rnd_reg(15 downto 0) <= c_LFSR_START; - else - rnd_reg(0) <= rnd_reg(15) xor rnd_reg(14) xor rnd_reg(12) xor rnd_reg(3); - rnd_reg(15 downto 1) <= rnd_reg(14 downto 0); + end if; + end process; + + + ------------------------------------------------- + -- Monitoring b/w and generating drop decisions-- + ------------------------------------------------- + drop_frame <= '1' when (rnd_reg < drop_thr) else + '0'; + + -- set min b/w from which we start the throttling + -- set it to half of the required max b/w + bwmin_kbps <= shift_right(bwmax_kbps_i, 1); + + -- convert current b/w to KBps + -- it's bw_bps_cnt divided by 1024 (2^10) + bwcur_kbps <= shift_right(bw_bps_cnt, 10); + + process(clk_sys_i) + begin + if rising_edge(clk_sys_i) then + if rst_n_i = '0' or new_limit_i = '1' then + drop_thr <= (others=>'0'); + last_thr_kbps <= x"0000" & bwmin_kbps; + thr_step_kbps <= shift_right(bwmax_kbps_i - bwmin_kbps, 3); + -- both max and min b/w we divide by 8 (because we want 8 steps like with + -- c_DROP_STEP = 64 for range 0-255) + else + if (bwcur_kbps > last_thr_kbps and drop_thr < c_DROP_THR_MAX) then + -- current b/w is larger than the last crossed threshold + -- we increase the probability of drop + drop_thr <= drop_thr + c_DROP_STEP; + last_thr_kbps <= last_thr_kbps + thr_step_kbps; + + elsif (bwcur_kbps + thr_step_kbps < last_thr_kbps and drop_thr > 0) then + -- current b/w has dropped below the last crossed threshold, + -- we decrease the probability of drop + drop_thr <= drop_thr - c_DROP_STEP; + last_thr_kbps <= last_thr_kbps - thr_step_kbps; end if; - end if; - end process; - end generate; + end if; + end if; + end process; - rnd_o <= rnd_reg; ------------------------------------------------- -- Forwarding or dropping frames -- ------------------------------------------------- - drop_frame <= '0'; - process(clk_sys_i) begin if rising_edge(clk_sys_i) then @@ -109,15 +156,15 @@ begin state_fwd <= WAIT_FRAME; wrf_reg <= c_dummy_snk_in; - snk_o <= c_dummy_src_in; - src_o <= c_dummy_snk_in; + snk_o <= c_dummy_src_in; + src_out <= c_dummy_snk_in; else case state_fwd is when WAIT_FRAME => snk_o.ack <= '0'; snk_o.err <= '0'; snk_o.rty <= '0'; - src_o <= c_dummy_snk_in; + src_out <= c_dummy_snk_in; if (snk_i.cyc='1' and snk_i.stb='1') then -- new frame is transmitted snk_o.stall <= '1'; @@ -136,14 +183,17 @@ begin -- flush wrf_reg stored on stall or in WAIT_FRAME snk_o <= src_i; if (src_i.stall = '0') then - src_o <= wrf_reg; + src_out <= wrf_reg; state_fwd <= PASS; end if; when PASS => snk_o <= src_i; if (src_i.stall = '0') then - src_o <= snk_i; + src_out <= snk_i; + if (snk_i.cyc='0' and snk_i.stb='0') then + state_fwd <= WAIT_FRAME; + end if; else wrf_reg <= snk_i; state_fwd <= FLUSH; @@ -154,7 +204,7 @@ begin snk_o.stall <= '0'; snk_o.err <= '0'; snk_o.rty <= '0'; - src_o <= c_dummy_snk_in; + src_out <= c_dummy_snk_in; if (snk_i.stb='1') then snk_o.ack <= '1'; else @@ -169,36 +219,35 @@ begin end if; end process; + src_o <= src_out; ------------------------------------------------- -- Calculating bandwidth actually going to ARM -- ------------------------------------------------- - is_data <= '1' when (snk_i.adr=c_WRF_DATA and snk_i.cyc='1' and snk_i.stb='1') else + is_data <= '1' when (src_out.adr=c_WRF_DATA and src_out.cyc='1' and src_out.stb='1' and src_i.stall='0') else '0'; process(clk_sys_i) begin if rising_edge(clk_sys_i) then if rst_n_i = '0' or pps_valid_i = '0' then - bw_cnt <= (others=>'0'); - bw_reg <= (others=>'0'); + bw_bps_cnt <= (others=>'0'); + bw_bps_o <= (others=>'0'); elsif pps_p_i = '1' then - bw_reg <= bw_cnt; - bw_cnt <= (others=>'0'); + bw_bps_cnt <= (others=>'0'); + bw_bps_o <= std_logic_vector(bw_bps_cnt); elsif is_data = '1' then -- we count incoming bytes here - if snk_i.sel(0) = '1' then + if src_out.sel(0) = '1' then -- 16bits carry valid data - bw_cnt <= bw_cnt + 2; - elsif snk_i.sel(0) = '0' then + bw_bps_cnt <= bw_bps_cnt + 2; + elsif src_out.sel(0) = '0' then -- only 8bits carry valid data - bw_cnt <= bw_cnt + 1; + bw_bps_cnt <= bw_bps_cnt + 1; end if; end if; end if; end process; - bw_o <= std_logic_vector(bw_reg); - end behav; diff --git a/modules/wrsw_nic/nic_rx_fsm.vhd b/modules/wrsw_nic/nic_rx_fsm.vhd index a8bc4667..111ca235 100644 --- a/modules/wrsw_nic/nic_rx_fsm.vhd +++ b/modules/wrsw_nic/nic_rx_fsm.vhd @@ -133,8 +133,9 @@ architecture behavioral of NIC_RX_FSM is snk_o : out t_wrf_sink_out; src_o : out t_wrf_source_out; src_i : in t_wrf_source_in; - bw_o : out std_logic_vector(31 downto 0); - rnd_o : out std_logic_vector(31 downto 0)); + new_limit_i : in std_logic; + bwmax_kbps_i : in unsigned(15 downto 0); + bw_bps_o : out std_logic_vector(31 downto 0)); end component; @@ -166,6 +167,7 @@ architecture behavioral of NIC_RX_FSM is signal bw_src_out : t_wrf_source_out; signal bw_src_in : t_wrf_source_in; + signal max_bw_reg : std_logic_vector(15 downto 0); begin @@ -179,8 +181,21 @@ begin snk_o => snk_o, src_o => bw_src_out, src_i => bw_src_in, - bw_o => regs_o.bw_i, - rnd_o => regs_o.rnd_i); + new_limit_i => regs_i.maxrxbw_load_o, + bwmax_kbps_i => unsigned(regs_i.maxrxbw_o), + bw_bps_o => regs_o.rxbw_i); + + process(clk_sys_i) + begin + if rising_edge(clk_sys_i) then + if rst_n_i = '0' then + max_bw_reg <= (others=>'0'); + elsif regs_i.maxrxbw_load_o = '1' then + max_bw_reg <= regs_i.maxrxbw_o; + end if; + end if; + end process; + regs_o.maxrxbw_i <= max_bw_reg; U_Buffer : nic_elastic_buffer generic map ( diff --git a/modules/wrsw_nic/nic_wbgen2_pkg.vhd b/modules/wrsw_nic/nic_wbgen2_pkg.vhd index f4ab2e05..1fddc14d 100644 --- a/modules/wrsw_nic/nic_wbgen2_pkg.vhd +++ b/modules/wrsw_nic/nic_wbgen2_pkg.vhd @@ -3,7 +3,7 @@ --------------------------------------------------------------------------------------- -- File : nic_wbgen2_pkg.vhd -- Author : auto-generated by wbgen2 from wr_nic.wb --- Created : Thu Jul 28 10:18:55 2016 +-- Created : Mon Aug 1 16:03:57 2016 -- Standard : VHDL'87 --------------------------------------------------------------------------------------- -- THIS FILE WAS GENERATED BY wbgen2 FROM SOURCE FILE wr_nic.wb @@ -27,8 +27,8 @@ package nic_wbgen2_pkg is sr_tx_error_i : std_logic; sr_cur_tx_desc_i : std_logic_vector(2 downto 0); sr_cur_rx_desc_i : std_logic_vector(2 downto 0); - bw_i : std_logic_vector(31 downto 0); - rnd_i : std_logic_vector(31 downto 0); + rxbw_i : std_logic_vector(31 downto 0); + maxrxbw_i : std_logic_vector(15 downto 0); end record; constant c_nic_in_registers_init_value: t_nic_in_registers := ( @@ -38,8 +38,8 @@ package nic_wbgen2_pkg is sr_tx_error_i => '0', sr_cur_tx_desc_i => (others => '0'), sr_cur_rx_desc_i => (others => '0'), - bw_i => (others => '0'), - rnd_i => (others => '0') + rxbw_i => (others => '0'), + maxrxbw_i => (others => '0') ); -- Output registers (WB slave -> user design) @@ -54,6 +54,8 @@ package nic_wbgen2_pkg is sr_tx_done_load_o : std_logic; sr_tx_error_o : std_logic; sr_tx_error_load_o : std_logic; + maxrxbw_o : std_logic_vector(15 downto 0); + maxrxbw_load_o : std_logic; end record; constant c_nic_out_registers_init_value: t_nic_out_registers := ( @@ -65,7 +67,9 @@ package nic_wbgen2_pkg is sr_tx_done_o => '0', sr_tx_done_load_o => '0', sr_tx_error_o => '0', - sr_tx_error_load_o => '0' + sr_tx_error_load_o => '0', + maxrxbw_o => (others => '0'), + maxrxbw_load_o => '0' ); function "or" (left, right: t_nic_in_registers) return t_nic_in_registers; function f_x_to_zero (x:std_logic) return std_logic; @@ -102,8 +106,8 @@ tmp.sr_tx_done_i := f_x_to_zero(left.sr_tx_done_i) or f_x_to_zero(right.sr_tx_do tmp.sr_tx_error_i := f_x_to_zero(left.sr_tx_error_i) or f_x_to_zero(right.sr_tx_error_i); tmp.sr_cur_tx_desc_i := f_x_to_zero(left.sr_cur_tx_desc_i) or f_x_to_zero(right.sr_cur_tx_desc_i); tmp.sr_cur_rx_desc_i := f_x_to_zero(left.sr_cur_rx_desc_i) or f_x_to_zero(right.sr_cur_rx_desc_i); -tmp.bw_i := f_x_to_zero(left.bw_i) or f_x_to_zero(right.bw_i); -tmp.rnd_i := f_x_to_zero(left.rnd_i) or f_x_to_zero(right.rnd_i); +tmp.rxbw_i := f_x_to_zero(left.rxbw_i) or f_x_to_zero(right.rxbw_i); +tmp.maxrxbw_i := f_x_to_zero(left.maxrxbw_i) or f_x_to_zero(right.maxrxbw_i); return tmp; end function; end package body; diff --git a/modules/wrsw_nic/nic_wishbone_slave.vhd b/modules/wrsw_nic/nic_wishbone_slave.vhd index 1702ae62..9de19b32 100644 --- a/modules/wrsw_nic/nic_wishbone_slave.vhd +++ b/modules/wrsw_nic/nic_wishbone_slave.vhd @@ -3,7 +3,7 @@ --------------------------------------------------------------------------------------- -- File : nic_wishbone_slave.vhd -- Author : auto-generated by wbgen2 from wr_nic.wb --- Created : Thu Jul 28 10:18:55 2016 +-- Created : Mon Aug 1 16:03:57 2016 -- Standard : VHDL'87 --------------------------------------------------------------------------------------- -- THIS FILE WAS GENERATED BY wbgen2 FROM SOURCE FILE wr_nic.wb @@ -120,6 +120,7 @@ begin regs_o.sr_rec_load_o <= '0'; regs_o.sr_tx_done_load_o <= '0'; regs_o.sr_tx_error_load_o <= '0'; + regs_o.maxrxbw_load_o <= '0'; eic_idr_write_int <= '0'; eic_ier_write_int <= '0'; eic_isr_write_int <= '0'; @@ -133,6 +134,7 @@ begin regs_o.sr_rec_load_o <= '0'; regs_o.sr_tx_done_load_o <= '0'; regs_o.sr_tx_error_load_o <= '0'; + regs_o.maxrxbw_load_o <= '0'; eic_idr_write_int <= '0'; eic_ier_write_int <= '0'; eic_isr_write_int <= '0'; @@ -141,6 +143,7 @@ begin regs_o.sr_rec_load_o <= '0'; regs_o.sr_tx_done_load_o <= '0'; regs_o.sr_tx_error_load_o <= '0'; + regs_o.maxrxbw_load_o <= '0'; end if; else if ((wb_cyc_i = '1') and (wb_stb_i = '1')) then @@ -226,13 +229,30 @@ begin when "0010" => if (wb_we_i = '1') then end if; - rddata_reg(31 downto 0) <= regs_i.bw_i; + rddata_reg(31 downto 0) <= regs_i.rxbw_i; ack_sreg(0) <= '1'; ack_in_progress <= '1'; when "0011" => if (wb_we_i = '1') then + regs_o.maxrxbw_load_o <= '1'; end if; - rddata_reg(31 downto 0) <= regs_i.rnd_i; + rddata_reg(15 downto 0) <= regs_i.maxrxbw_i; + rddata_reg(16) <= 'X'; + rddata_reg(17) <= 'X'; + rddata_reg(18) <= 'X'; + rddata_reg(19) <= 'X'; + rddata_reg(20) <= 'X'; + rddata_reg(21) <= 'X'; + rddata_reg(22) <= 'X'; + rddata_reg(23) <= 'X'; + rddata_reg(24) <= 'X'; + rddata_reg(25) <= 'X'; + rddata_reg(26) <= 'X'; + rddata_reg(27) <= 'X'; + rddata_reg(28) <= 'X'; + rddata_reg(29) <= 'X'; + rddata_reg(30) <= 'X'; + rddata_reg(31) <= 'X'; ack_sreg(0) <= '1'; ack_in_progress <= '1'; when "1000" => @@ -473,7 +493,8 @@ begin -- Current TX descriptor -- Current RX descriptor -- Bytes-per-second --- 32-bit random number for throttling +-- KBytes-per-second + regs_o.maxrxbw_o <= wrdata_reg(15 downto 0); -- extra code for reg/fifo/mem: TX descriptors mem -- RAM block instantiation for memory: TX descriptors mem nic_dtx_raminst : wbgen2_dpssram diff --git a/modules/wrsw_nic/wr_nic.wb b/modules/wrsw_nic/wr_nic.wb index e50197ab..8791d441 100644 --- a/modules/wrsw_nic/wr_nic.wb +++ b/modules/wrsw_nic/wr_nic.wb @@ -153,8 +153,8 @@ top = peripheral { }; reg { - name = "NIC Bandwidth Register"; - prefix = "BW"; + name = "NIC Current Rx Bandwidth Register"; + prefix = "RXBW"; field { name = "Bytes-per-second"; type = SLV; @@ -166,14 +166,14 @@ top = peripheral { }; reg { - name = "NIC Random Register"; - prefix = "RND"; + name = "NIC Max Rx Bandwidth Register"; + prefix = "MAXRXBW"; field { - name = "32-bit random number for throttling"; + name = "KBytes-per-second"; type = SLV; - size = 32; - access_bus = READ_ONLY; - access_dev = WRITE_ONLY; + size = 16; + access_bus = READ_WRITE; + access_dev = READ_WRITE; load = LOAD_EXT; }; }; -- GitLab