From 83621f3db14b9eb84708b09613822e58f17a7ac0 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" <w.terpstra@gsi.de> Date: Fri, 4 Nov 2011 15:14:44 +0100 Subject: [PATCH] wb_crossbar: Improve timings Make all MUXs explicitly log deep Add a Kogge-Stone OR network for arbitration (makes arbitration scale log(n) with n masters, not O(n)) Signed-off-by: Tomasz Wlostowski <tomasz.wlostowski@cern.ch> --- modules/wishbone/wb_crossbar/xwb_crossbar.vhd | 279 ++++++++++++------ 1 file changed, 188 insertions(+), 91 deletions(-) diff --git a/modules/wishbone/wb_crossbar/xwb_crossbar.vhd b/modules/wishbone/wb_crossbar/xwb_crossbar.vhd index 053d8062..021ed762 100644 --- a/modules/wishbone/wb_crossbar/xwb_crossbar.vhd +++ b/modules/wishbone/wb_crossbar/xwb_crossbar.vhd @@ -31,6 +31,7 @@ ------------------------------------------------------------------------------- -- Revisions : -- Date Version Author Description +-- 2011-11-04 2.0 wterpstra timing improvements -- 2011-06-08 1.0 wterpstra import from SVN ------------------------------------------------------------------------------- @@ -62,9 +63,8 @@ entity xwb_crossbar is end xwb_crossbar; architecture rtl of xwb_crossbar is + -- Crossbar connection matrix type matrix is array (g_num_masters-1 downto 0, g_num_slaves downto 0) of std_logic; - type column is array (g_num_masters-1 downto 0) of std_logic; - type row is array (g_num_slaves downto 0) of std_logic; -- Add an 'error' device to the list of slaves signal master_ie : t_wishbone_master_in_array(g_num_slaves downto 0); @@ -77,59 +77,125 @@ architecture rtl of xwb_crossbar is -- Either matrix_old or matrix_new, depending on g_registered signal granted : matrix; - procedure main_logic( - signal matrix_new : out matrix; - signal matrix_old : in matrix; - signal slave_i : in t_wishbone_slave_in_array(g_num_masters-1 downto 0)) is - variable acc, tmp : std_logic; - variable request : matrix; -- Which slaves do the masters address log(S) - variable selected : matrix; -- Which master wins arbitration log(M) request - variable sbusy : row; -- Does the slave's previous connection persist? - variable mbusy : column; -- Does the master's previous connection persist? + -- If any of the bits are '1', the whole thing is '1' + -- This function makes the check explicitly have logarithmic depth. + function vector_OR(x : std_logic_vector) + return std_logic + is + constant len : integer := x'length; + constant mid : integer := len / 2; + alias y : std_logic_vector(len-1 downto 0) is x; + begin + if len = 1 + then return y(0); + else return vector_OR(y(len-1 downto mid)) or + vector_OR(y(mid-1 downto 0)); + end if; + end vector_OR; + + -- Kogge-Stone network of ORs. + -- A log(n) deep, n-wide circuit where: + -- output(i) = OR_{j<=i} input(j) + function ks_OR(input : std_logic_vector) + return std_logic_vector + is + -- 1 => 0 2 => 1 3..4 => 2 5..8 => 3 + function log2(i : natural) return natural is + begin + if i <= 1 + then return 0; + else return log2((i+1)/2) + 1; + end if; + end log2; + + -- 0 => 1 1 => 2 2 => 4 3 => 8 + function pow2(i : natural) return natural is + begin + if i = 0 + then return 1; + else return pow2(i-1)*2; + end if; + end pow2; + + constant width : natural := input'length; + constant stages : natural := log2(width); + variable prev : std_logic_vector(width-1 downto 0); + variable output : std_logic_vector(width-1 downto 0); + begin + prev := input; + for l in 0 to stages-1 loop + for i in 0 to width-1 loop + if i >= pow2(l) + then output(i) := prev(i) or prev(i-pow2(l)); + else output(i) := prev(i); + end if; + end loop; + prev := output; + end loop; + return output; + end ks_OR; + + -- Impure because it accesses cfg_{address_i, mask_i} + impure function matrix_logic( + matrix_old : matrix; + slave_i : t_wishbone_slave_in_array(g_num_masters-1 downto 0)) + return matrix + is + subtype row is std_logic_vector(g_num_masters-1 downto 0); + subtype column is std_logic_vector(g_num_slaves downto 0); + + variable tmp : std_logic; + variable tmp_column : column; + variable tmp_row : row; + + variable request : matrix; -- Which slaves do the masters address log(S) + variable selected : matrix; -- Which master wins arbitration log(M) request + variable sbusy : column; -- Does the slave's previous connection persist? + variable mbusy : row; -- Does the master's previous connection persist? + variable matrix_new : matrix; begin -- A slave is busy iff it services an in-progress cycle for slave in g_num_slaves downto 0 loop - acc := '0'; for master in g_num_masters-1 downto 0 loop - acc := acc or (matrix_old(master, slave) and slave_i(master).CYC); + tmp_row(master) := matrix_old(master, slave) and slave_i(master).CYC; end loop; - sbusy(slave) := acc; + sbusy(slave) := vector_OR(tmp_row); end loop; - + -- A master is busy iff it services an in-progress cycle for master in g_num_masters-1 downto 0 loop - acc := '0'; for slave in g_num_slaves downto 0 loop - acc := acc or matrix_old(master, slave); + tmp_column(slave) := matrix_old(master, slave); end loop; - mbusy(master) := acc and slave_i(master).CYC; + mbusy(master) := vector_OR(tmp_column) and slave_i(master).CYC; end loop; -- Decode the request address to see if master wants access for master in g_num_masters-1 downto 0 loop - acc := '0'; for slave in g_num_slaves-1 downto 0 loop - if (slave_i(master).ADR and cfg_mask_i(slave)) = cfg_address_i(slave) then - tmp := '1'; - else - tmp := '0'; - end if; - acc := acc or tmp; + tmp := not vector_OR((slave_i(master).ADR and cfg_mask_i(slave)) xor cfg_address_i(slave)); + tmp_column(slave) := tmp; request(master, slave) := slave_i(master).CYC and slave_i(master).STB and tmp; end loop; + tmp_column(g_num_slaves) := '0'; -- If no slaves match request, bind to 'error device' - request(master, g_num_slaves) := slave_i(master).CYC and slave_i(master).STB and not acc; + request(master, g_num_slaves) := slave_i(master).CYC and slave_i(master).STB and not vector_OR(tmp_column); end loop; -- Arbitrate among the requesting masters -- Policy: lowest numbered master first for slave in g_num_slaves downto 0 loop - acc := '0'; - -- It is possible to break the chain of LUTs here using a sort of kogge-stone network - -- This probably only makes sense if you have more than 32 masters + -- OR together all the requests by higher priority masters for master in 0 to g_num_masters-1 loop - selected(master, slave) := request(master, slave) and not acc; - acc := acc or request(master, slave); + tmp_row(master) := request(master, slave); + end loop; + tmp_row := ks_OR(tmp_row); + + -- Grant to highest priority master + selected(0, slave) := request(0, slave); -- master 0 always wins + for master in 1 to g_num_masters-1 loop + selected(master, slave) := -- only if requested and no lower requests + not tmp_row(master-1) and request(master, slave); end loop; end loop; @@ -138,77 +204,110 @@ architecture rtl of xwb_crossbar is for slave in g_num_slaves downto 0 loop for master in g_num_masters-1 downto 0 loop if sbusy(slave) = '1' or mbusy(master) = '1' then - matrix_new(master, slave) <= matrix_old(master, slave); + matrix_new(master, slave) := matrix_old(master, slave); else - matrix_new(master, slave) <= selected(master, slave); + matrix_new(master, slave) := selected(master, slave); end if; end loop; end loop; - end main_logic; + + return matrix_new; + end matrix_logic; -- Select the master pins the slave will receive - procedure slave_logic(signal o : out t_wishbone_master_out; - signal slave_i : in t_wishbone_slave_in_array(g_num_masters-1 downto 0); - signal granted : in matrix; - slave : integer) is - variable acc : t_wishbone_master_out; - variable granted_address : t_wishbone_address; - variable granted_select : t_wishbone_byte_select; - variable granted_data : t_wishbone_data; + function slave_logic(slave : integer; + granted : matrix; + slave_i : t_wishbone_slave_in_array(g_num_masters-1 downto 0)) + return t_wishbone_master_out + is + subtype row is std_logic_vector(g_num_masters-1 downto 0); + type matrix is array (natural range <>) of row; + + function matrix_OR(x : matrix) + return std_logic_vector is + variable result : std_logic_vector(x'LENGTH-1 downto 0); + begin + for i in x'LENGTH-1 downto 0 loop + result(i) := vector_OR(x(i)); + end loop; + return result; + end matrix_OR; + + variable CYC_row : row; + variable STB_row : row; + variable ADR_matrix : matrix(c_wishbone_address_width-1 downto 0); + variable SEL_matrix : matrix((c_wishbone_address_width/8)-1 downto 0); + variable WE_row : row; + variable DAT_matrix : matrix(c_wishbone_data_width-1 downto 0); begin - acc := ( - CYC => '0', - STB => '0', - ADR => (others => '0'), - SEL => (others => '0'), - WE => '0', - DAT => (others => '0')); - + -- Rename all the signals ready for big_or for master in g_num_masters-1 downto 0 loop - granted_address := (others => granted(master, slave)); - granted_select := (others => granted(master, slave)); - granted_data := (others => granted(master, slave)); - acc := ( - CYC => acc.CYC or (slave_i(master).CYC and granted(master, slave)), - STB => acc.STB or (slave_i(master).STB and granted(master, slave)), - ADR => acc.ADR or (slave_i(master).ADR and granted_address), - SEL => acc.SEL or (slave_i(master).SEL and granted_select), - WE => acc.WE or (slave_i(master).WE and granted(master, slave)), - DAT => acc.DAT or (slave_i(master).DAT and granted_data)); + CYC_row(master) := slave_i(master).CYC and granted(master, slave); + STB_row(master) := slave_i(master).STB and granted(master, slave); + for bit in c_wishbone_address_width-1 downto 0 loop + ADR_matrix(bit)(master) := slave_i(master).ADR(bit) and granted(master, slave); + end loop; + for bit in (c_wishbone_address_width/8)-1 downto 0 loop + SEL_matrix(bit)(master) := slave_i(master).SEL(bit) and granted(master, slave); + end loop; + WE_row(master) := slave_i(master).WE and granted(master, slave); + for bit in c_wishbone_data_width-1 downto 0 loop + DAT_matrix(bit)(master) := slave_i(master).DAT(bit) and granted(master, slave); + end loop; end loop; - o <= acc; + + return ( + CYC => vector_OR(CYC_row), + STB => vector_OR(STB_row), + ADR => matrix_OR(ADR_matrix), + SEL => matrix_OR(SEL_matrix), + WE => vector_OR(WE_row), + DAT => matrix_OR(DAT_matrix)); end slave_logic; -- Select the slave pins the master will receive - procedure master_logic(signal o : out t_wishbone_slave_out; - signal master_i : in t_wishbone_master_in_array(g_num_slaves downto 0); - signal granted : in matrix; - master : integer) is - variable acc : t_wishbone_slave_out; - variable granted_data : t_wishbone_data; + function master_logic(master : integer; + granted : matrix; + master_ie : t_wishbone_master_in_array(g_num_slaves downto 0)) + return t_wishbone_slave_out + is + subtype row is std_logic_vector(g_num_slaves downto 0); + type matrix is array (natural range <>) of row; + + function matrix_OR(x : matrix) + return std_logic_vector is + variable result : std_logic_vector(x'LENGTH-1 downto 0); + begin + for i in x'LENGTH-1 downto 0 loop + result(i) := vector_OR(x(i)); + end loop; + return result; + end matrix_OR; + + variable ACK_row : row; + variable ERR_row : row; + variable RTY_row : row; + variable STALL_row : row; + variable DAT_matrix : matrix(c_wishbone_data_width-1 downto 0); begin - acc := ( - ACK => '0', - ERR => '0', - RTY => '0', - STALL => '0', - DAT => (others => '0'), - INT => '0'); - -- We use inverted logic on STALL so that if no slave granted => stall for slave in g_num_slaves downto 0 loop - granted_data := (others => granted(master, slave)); - acc := ( - ACK => acc.ACK or (master_i(slave).ACK and granted(master, slave)), - ERR => acc.ERR or (master_i(slave).ERR and granted(master, slave)), - RTY => acc.RTY or (master_i(slave).RTY and granted(master, slave)), - STALL => acc.STALL or (not master_i(slave).STALL and granted(master, slave)), - DAT => acc.DAT or (master_i(slave).DAT and granted_data), - INT => '0'); + ACK_row(slave) := master_ie(slave).ACK and granted(master, slave); + ERR_row(slave) := master_ie(slave).ERR and granted(master, slave); + RTY_row(slave) := master_ie(slave).RTY and granted(master, slave); + STALL_row(slave) := not master_ie(slave).STALL and granted(master, slave); + for bit in c_wishbone_data_width-1 downto 0 loop + DAT_matrix(bit)(slave) := master_ie(slave).DAT(bit) and granted(master, slave); + end loop; end loop; - acc.STALL := not acc.STALL; - - o <= acc; + + return ( + ACK => vector_OR(ACK_row), + ERR => vector_OR(ERR_row), + RTY => vector_OR(RTY_row), + STALL => not vector_OR(STALL_row), + DAT => matrix_OR(DAT_matrix), + INT => '0'); end master_logic; begin -- The virtual error slave is pretty straight-forward: @@ -230,6 +329,7 @@ begin end process virtual_error_slave; -- Copy the matrix to a register: + matrix_new <= matrix_logic(matrix_old, slave_i); main : process(clk_sys_i) begin if rising_edge(clk_sys_i) then @@ -246,14 +346,11 @@ begin -- Make the slave connections slave_matrix : for slave in g_num_slaves downto 0 generate - slave_logic(master_oe(slave), slave_i, granted, slave); + master_oe(slave) <= slave_logic(slave, granted, slave_i); end generate; -- Make the master connections master_matrix : for master in g_num_masters-1 downto 0 generate - master_logic(slave_o(master), master_ie, granted, master); + slave_o(master) <= master_logic(master, granted, master_ie); end generate; - - -- The main crossbar logic: - main_logic(matrix_new, matrix_old, slave_i); end rtl; -- GitLab