Skip to content
Snippets Groups Projects
Commit a6516f98 authored by Phil Clarke's avatar Phil Clarke
Browse files

add a component to insert a axi pipeline stage under control of a generic.

read throughput improvement (issue next read when there is space instead of waiting for output fifo to be empty)
add generic to insert AXI_Pipeline stages in-front of specific gencores fifos for timing.
parent a38d51dc
No related merge requests found
...@@ -3,6 +3,7 @@ files = [ ...@@ -3,6 +3,7 @@ files = [
"dma_controller_regs.vhd", "dma_controller_regs.vhd",
"l2p_arbiter.vhd", "l2p_arbiter.vhd",
"rl0_pl_stage_flowcontrol_srst.vhd", "rl0_pl_stage_flowcontrol_srst.vhd",
"opt_rl0_pl_stg.vhd",
"l2p_dma_master.vhd", "l2p_dma_master.vhd",
"p2l_decode32.vhd", "p2l_decode32.vhd",
"p2l_dma_master.vhd", "p2l_dma_master.vhd",
......
-- SPDX-FileCopyrightText: 2023 CERN (home.cern)
--
-- SPDX-License-Identifier: CERN-OHL-W-2.0+
--
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- GN4124 core for PCIe FMC carrier -- GN4124 core for PCIe FMC carrier
-- http://www.ohwr.org/projects/gn4124-core -- http://www.ohwr.org/projects/gn4124-core
...@@ -12,7 +16,7 @@ ...@@ -12,7 +16,7 @@
-- --
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- --
-- This source describes Open Hardware and is licensed under the CERN-OHL-W v2. -- This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later.
-- You may redistribute and modify this source and make products using it -- You may redistribute and modify this source and make products using it
-- under the terms of the CERN-OHL-W v2 or future versions (https://ohwr.org/cern_ohl_w_v2.txt). -- under the terms of the CERN-OHL-W v2 or future versions (https://ohwr.org/cern_ohl_w_v2.txt).
-- --
...@@ -22,7 +26,7 @@ ...@@ -22,7 +26,7 @@
-- the CERN-OHL-W v2 for applicable conditions. -- the CERN-OHL-W v2 for applicable conditions.
-- --
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- Copyright CERN 2021 -- Copyright CERN 2023
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- --
-- NOTES: -- NOTES:
...@@ -39,7 +43,8 @@ entity gn4124_axi_r_chl_dcfifo is ...@@ -39,7 +43,8 @@ entity gn4124_axi_r_chl_dcfifo is
g_DATA_WIDTH : positive := 32; g_DATA_WIDTH : positive := 32;
g_NUM_WORDS : positive := 8; g_NUM_WORDS : positive := 8;
g_ALMOST_FULL_THRESHOLD : positive; g_ALMOST_FULL_THRESHOLD : positive;
g_RESET_SRC_DAT : boolean := true g_RESET_SRC_DAT : boolean := true;
g_OP_PL_STG : boolean := false
); );
port ( port (
-- sink interface -- sink interface
...@@ -67,12 +72,16 @@ architecture rtl of gn4124_axi_r_chl_dcfifo is ...@@ -67,12 +72,16 @@ architecture rtl of gn4124_axi_r_chl_dcfifo is
signal wr_write : std_logic; signal wr_write : std_logic;
signal rd_empty : std_logic; signal rd_empty : std_logic;
signal src_valid_int : std_logic;
signal src_dat_clk_en : std_logic; signal src_dat_clk_en : std_logic;
-- TBD: Apply attributes on this to make it a clk_en for the regs -- TBD: Apply attributes on this to make it a clk_en for the regs
signal rd_dat : std_logic_vector(g_DATA_WIDTH -1 downto 0); signal rd_dat : std_logic_vector(g_DATA_WIDTH -1 downto 0);
signal almost_full : std_logic; signal almost_full : std_logic;
signal int_dat : std_logic_vector(g_DATA_WIDTH -1 downto 0);
signal int_vld : std_logic;
signal int_rdy : std_logic;
begin begin
snk_rdy_o <= not wr_full; snk_rdy_o <= not wr_full;
...@@ -123,10 +132,10 @@ begin ...@@ -123,10 +132,10 @@ begin
begin begin
if rising_edge(src_clk_i) then if rising_edge(src_clk_i) then
if src_dat_clk_en = '1' then if src_dat_clk_en = '1' then
src_dat_o <= rd_dat; int_dat <= rd_dat;
end if; end if;
if g_RESET_SRC_DAT and src_rst_n_i = '0' then if g_RESET_SRC_DAT and src_rst_n_i = '0' then
src_dat_o <= (others => '0'); int_dat <= (others => '0');
end if; end if;
end if; end if;
end process p_dat; end process p_dat;
...@@ -136,26 +145,23 @@ begin ...@@ -136,26 +145,23 @@ begin
begin begin
if rising_edge(src_clk_i) then if rising_edge(src_clk_i) then
if src_rst_n_i = '0' then if src_rst_n_i = '0' then
src_valid_int <= '0'; int_vld <= '0';
else else
if src_dat_clk_en = '1' then if src_dat_clk_en = '1' then
src_valid_int <= '1'; int_vld <= '1';
elsif src_rdy_i = '1' and src_valid_int = '1' then elsif src_rdy_i = '1' and int_vld = '1' then
src_valid_int <= '0'; int_vld <= '0';
end if; end if;
end if; end if;
end if; end if;
end process p_valid; end process p_valid;
src_vld_o <= src_valid_int; p_int_clken : process (int_vld, int_rdy, rd_empty)
p_int_clken : process (src_valid_int, src_rdy_i, rd_empty)
begin begin
if src_valid_int = '1' and src_rdy_i = '1' and rd_empty = '0' then if int_vld = '1' and int_rdy = '1' and rd_empty = '0' then
src_dat_clk_en <= '1'; src_dat_clk_en <= '1';
elsif src_valid_int = '0' and rd_empty = '0' then elsif int_vld = '0' and rd_empty = '0' then
src_dat_clk_en <= '1'; src_dat_clk_en <= '1';
else else
src_dat_clk_en <= '0'; src_dat_clk_en <= '0';
...@@ -163,4 +169,21 @@ begin ...@@ -163,4 +169,21 @@ begin
end process p_int_clken; end process p_int_clken;
opt_pl : entity work.opt_rl0_pl_stg
generic map (
g_IMPLEMENT_PL_STG => g_OP_PL_STG, --: boolean := ;
g_DATA_WIDTH => g_DATA_WIDTH, --: positive := 1
g_MIMIMISE_TRANSITIONS => true --: boolean := false -- when implementing a pl_stg controls if it is minimal logic OR "cleaner" for simulation and thus debugging
) port map(
clk_i => src_clk_i, --: in std_logic;
rst_n_i => src_rst_n_i, --: in std_logic;
in_rdy_o => int_rdy, --: out std_logic;
in_vld_i => int_vld, --: in std_logic;
in_dat_i => int_dat, --: in std_logic_vector(g_DATA_WIDTH-1 downto 0);
out_rdy_i => src_rdy_i, --: in std_logic;
out_vld_o => src_vld_o, --: out std_logic;
out_dat_o => src_dat_o --: out std_logic_vector(g_DATA_WIDTH-1 downto 0)
);
end architecture rtl; end architecture rtl;
-- SPDX-FileCopyrightText: 2023 CERN (home.cern)
--
-- SPDX-License-Identifier: CERN-OHL-W-2.0+
--
--------------------------------------------------------------------------------
-- GN4124 core for PCIe FMC carrier
-- http://www.ohwr.org/projects/gn4124-core
--------------------------------------------------------------------------------
--
-- unit name: opt_rl0_pl_stg
--
-- description: entity toigenerically switch in/out a AXI_ST or avalon_st RL0 pipeline stage.
--
--
--------------------------------------------------------------------------------
--
-- This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later.
-- You may redistribute and modify this source and make products using it
-- under the terms of the CERN-OHL-W v2 or future versions (https://ohwr.org/cern_ohl_w_v2.txt).
--
-- This source is distributed WITHOUT ANY EXPRESS OR IMPLIED
-- WARRANTY, INCLUDING OF MERCHANTABILITY, SATISFACTORY
-- QUALITY AND FITNESS FOR A PARTICULAR PURPOSE. Please see
-- the CERN-OHL-W v2 for applicable conditions.
--
--------------------------------------------------------------------------------
-- Copyright CERN 2023
--------------------------------------------------------------------------------
library ieee;
use ieee.std_logic_1164.all;
entity opt_rl0_pl_stg is
generic (
g_IMPLEMENT_PL_STG : boolean := false;
g_DATA_WIDTH : positive := 1;
g_MIMIMISE_TRANSITIONS : boolean := false; -- when implementing a pl_stg controls if it is minimal logic OR "cleaner" for simulation and thus debugging
g_DAT_O_HAS_SRST : boolean := false
);
port (
clk_i : in std_logic;
rst_n_i : in std_logic;
in_rdy_o : out std_logic;
in_vld_i : in std_logic;
in_dat_i : in std_logic_vector(g_DATA_WIDTH-1 downto 0);
out_rdy_i : in std_logic;
out_vld_o : out std_logic;
out_dat_o : out std_logic_vector(g_DATA_WIDTH-1 downto 0)
);
end entity opt_rl0_pl_stg;
architecture rtl of opt_rl0_pl_stg is
begin
gen_pl : if g_IMPLEMENT_PL_STG generate
signal clk_en_pl_reg : std_logic;
signal clk_en_op_reg : std_logic;
signal use_dat_from_pl_reg : std_logic;
signal pl_dat : std_logic_vector(g_DATA_WIDTH -1 downto 0);
begin
ctrl : entity work.teng_wr_nic_rl0_pl_stage_flowcontrol_srst
generic map (
g_minimal_op_transitions => g_MIMIMISE_TRANSITIONS
) port map(
clk_i => clk_i,
rst_n_i => rst_n_i,
in_rdy_o => in_rdy_o,
in_vld_i => in_vld_i,
out_rdy_i => out_rdy_i,
out_vld_o => out_vld_o,
clk_en_pl_reg_o => clk_en_pl_reg,
clk_en_op_reg_o => clk_en_op_reg,
use_dat_from_pl_reg_o => use_dat_from_pl_reg
);
p_pl_stage : process(clk_i) begin
if rising_edge(clk_i) then
if '1' = clk_en_pl_reg then
pl_dat <= in_dat_i;
end if;
if '1' = clk_en_op_reg then
if '1' = use_dat_from_pl_reg then
out_dat_o <= pl_dat;
else
out_dat_o <= in_dat_i;
end if;
end if;
if (g_DAT_O_HAS_SRST = true) and (rst_n_i = '0') then
out_dat_o <= (others => '0');
pl_dat <= (others => '0');
end if;
end if;
end process p_pl_stage;
end generate;
gen_no_pl : if g_IMPLEMENT_PL_STG = false generate
begin
in_rdy_o <= out_rdy_i;
out_vld_o <= in_vld_i;
out_dat_o <= in_dat_i;
end generate;
end architecture rtl;
...@@ -62,7 +62,8 @@ entity p2l_axi4_rd_dc is ...@@ -62,7 +62,8 @@ entity p2l_axi4_rd_dc is
g_AXI_AR_FIFO_WRDS : positive := 16; -- AXI4 AR Channel fifo depth g_AXI_AR_FIFO_WRDS : positive := 16; -- AXI4 AR Channel fifo depth
g_AXI_R_FIFO_WRDS : positive := 32; -- AXI4 R Channel fifo depth g_AXI_R_FIFO_WRDS : positive := 32; -- AXI4 R Channel fifo depth
g_EN_AXI_QOS_2_TC_LOOKUP : boolean := false; -- currently unused! g_EN_AXI_QOS_2_TC_LOOKUP : boolean := false; -- currently unused!
g_PCIE_DEFAULT_TC : std_logic_vector(2 downto 0) := "000" -- default PCIE TC value g_PCIE_DEFAULT_TC : std_logic_vector(2 downto 0) := "000"; -- default PCIE TC value
g_RD_AXI_GN_INIT_IP_PL : boolean := true -- put a fifo on the input to the intiator form the fifo..
); );
port ( port (
-- ------------------------------------------------------- -- -------------------------------------------------------
...@@ -282,9 +283,10 @@ begin ...@@ -282,9 +283,10 @@ begin
gennum_master_axi_rd_init : entity work.p2l_axi4_rd_initiator gennum_master_axi_rd_init : entity work.p2l_axi4_rd_initiator
generic map ( generic map (
g_USE_AR_REGISTER_PL => false, g_USE_AR_REGISTER_PL => g_RD_AXI_GN_INIT_IP_PL,
g_ID_WIDTH => g_ID_WIDTH, g_ID_WIDTH => g_ID_WIDTH,
g_RD_FIFO_W => c_R_FIFO_FILL_LVL_W, g_RD_FIFO_W => c_R_FIFO_FILL_LVL_W,
g_RD_FIFO_DEPTH => g_AXI_R_FIFO_WRDS,
g_RD_FIFO_SPACE_CMP_VAL => c_R_BACKOFF_FILL_LVL g_RD_FIFO_SPACE_CMP_VAL => c_R_BACKOFF_FILL_LVL
) port map ( ) port map (
clk_i => clk_i, clk_i => clk_i,
...@@ -326,42 +328,35 @@ begin ...@@ -326,42 +328,35 @@ begin
); );
-- -- block to encapsulate DCFIFO and signals to enable AXI_STREAM signalling.
-- b_r_chl_fifo : block
--
-- begin
-- Pack and unpack SLV for the fifo -- Pack and unpack SLV for the fifo
r_fifo_wdat <= int_r_rec_id & int_r_rec.last & int_r_rec.resp & int_r_rec.data; r_fifo_wdat <= int_r_rec_id & int_r_rec.last & int_r_rec.resp & int_r_rec.data;
r_chnl_data_o <= r_fifo_rdat(g_DATA_WIDTH -1 downto 0); r_chnl_data_o <= r_fifo_rdat(g_DATA_WIDTH -1 downto 0);
r_chnl_resp_o <= r_fifo_rdat(g_DATA_WIDTH+1 downto g_DATA_WIDTH); r_chnl_resp_o <= r_fifo_rdat(g_DATA_WIDTH+1 downto g_DATA_WIDTH);
r_chnl_last_o <= r_fifo_rdat(g_DATA_WIDTH+2); r_chnl_last_o <= r_fifo_rdat(g_DATA_WIDTH+2);
r_chnl_id_o <= r_fifo_rdat(r_fifo_rdat'high downto g_DATA_WIDTH+3); r_chnl_id_o <= r_fifo_rdat(r_fifo_rdat'high downto g_DATA_WIDTH+3);
r_chl_dcfifo : entity work.gn4124_axi_r_chl_dcfifo r_chl_dcfifo : entity work.gn4124_axi_r_chl_dcfifo
generic map( generic map(
g_DATA_WIDTH => c_R_CHL_FIFO_W, g_DATA_WIDTH => c_R_CHL_FIFO_W,
g_NUM_WORDS => g_AXI_R_FIFO_WRDS, g_NUM_WORDS => g_AXI_R_FIFO_WRDS,
g_ALMOST_FULL_THRESHOLD => c_R_BACKOFF_FILL_LVL, g_ALMOST_FULL_THRESHOLD => c_R_BACKOFF_FILL_LVL,
g_RESET_SRC_DAT => TRUE g_RESET_SRC_DAT => TRUE
) port map ( ) port map (
snk_clk_i => clk_i, snk_clk_i => clk_i,
snk_rst_n_i => rst_n_i, snk_rst_n_i => rst_n_i,
snk_rdy_o => int_r_rdy, snk_rdy_o => int_r_rdy,
snk_vld_i => int_r_vld, snk_vld_i => int_r_vld,
snk_dat_i => r_fifo_wdat, snk_dat_i => r_fifo_wdat,
snk_lvl_o => r_chl_dcfifo_fill, snk_lvl_o => r_chl_dcfifo_fill,
snk_almost_full_o => open, -- here as an alternative to using the level myself snk_almost_full_o => open, -- here as an alternative to using the level myself
src_clk_i => clk_axi_i, src_clk_i => clk_axi_i,
src_rst_n_i => rst_axi_n_i, src_rst_n_i => rst_axi_n_i,
src_rdy_i => r_ready_i, src_rdy_i => r_ready_i,
src_vld_o => r_valid_o, src_vld_o => r_valid_o,
src_dat_o => r_fifo_rdat src_dat_o => r_fifo_rdat
); );
-- end block b_r_chl_fifo;
-- end block b_axi_rd;
end generate gen_axi_rd_subsystem; end generate gen_axi_rd_subsystem;
end architecture struct; end architecture struct;
...@@ -44,6 +44,7 @@ entity p2l_axi4_rd_initiator is ...@@ -44,6 +44,7 @@ entity p2l_axi4_rd_initiator is
g_USE_AR_REGISTER_PL : boolean := false; g_USE_AR_REGISTER_PL : boolean := false;
g_ID_WIDTH : positive := 2; g_ID_WIDTH : positive := 2;
g_RD_FIFO_W : natural; g_RD_FIFO_W : natural;
g_RD_FIFO_DEPTH : positive;
g_RD_FIFO_SPACE_CMP_VAL : positive g_RD_FIFO_SPACE_CMP_VAL : positive
); );
port ( port (
...@@ -75,11 +76,11 @@ entity p2l_axi4_rd_initiator is ...@@ -75,11 +76,11 @@ entity p2l_axi4_rd_initiator is
-- currently not used.. -- currently not used..
tid_gnt_i : in std_logic; tid_gnt_i : in std_logic;
tid_req_o : out std_logic; tid_req_o : out std_logic;
tid_val_i : in std_logic_Vector(1 downto 0); -- := g_TID_CID; tid_val_i : in std_logic_vector(1 downto 0); -- := g_TID_CID;
cid_val_o : out std_logic_vector(1 downto 0); cid_val_o : out std_logic_vector(1 downto 0);
cid_fin_o : out std_logic; cid_fin_o : out std_logic;
r_chl_dcfifo_fill_lvl_i : in std_logic_Vector(g_RD_FIFO_W-1 downto 0); r_chl_dcfifo_fill_lvl_i : in std_logic_vector(g_RD_FIFO_W-1 downto 0);
-- TODO: should make this generic based on the fifo size, or "merge" the fifo into this module... -- TODO: should make this generic based on the fifo size, or "merge" the fifo into this module...
-- data pipe to push data from this Axi Read Master to the arbitrator.. -- data pipe to push data from this Axi Read Master to the arbitrator..
...@@ -147,7 +148,7 @@ architecture rtl of p2l_axi4_rd_initiator is ...@@ -147,7 +148,7 @@ architecture rtl of p2l_axi4_rd_initiator is
signal cmd_length_in_words : unsigned(9 downto 0); signal cmd_length_in_words : unsigned(9 downto 0);
signal outstanding_data : unsigned ( 8 downto 0); signal outstanding_data : unsigned (8 downto 0);
-- TBD to we add a outstanding_data_is_0 signal to help timing? -- TBD to we add a outstanding_data_is_0 signal to help timing?
signal rdata_fifo_has_space : std_logic; signal rdata_fifo_has_space : std_logic;
...@@ -172,6 +173,18 @@ architecture rtl of p2l_axi4_rd_initiator is ...@@ -172,6 +173,18 @@ architecture rtl of p2l_axi4_rd_initiator is
signal sm_err_decode : t_err; signal sm_err_decode : t_err;
signal int_seen_error : std_logic; signal int_seen_error : std_logic;
signal fifo_space_bodge_count : unsigned(3 downto 0);
signal potential_op_fifo_depth : unsigned(g_RD_FIFO_W downto 0);
signal op_fifo_has_space : std_logic;
--synthesis translate_off
signal sm_active_cycles : natural;
signal sm_idle_backed_off_cycles : natural;
--synthesis translate_on
begin begin
...@@ -270,7 +283,8 @@ begin ...@@ -270,7 +283,8 @@ begin
case cmd_state is case cmd_state is
when ST_IDLE => when ST_IDLE =>
if ar_sm_vld = '1' and ar_sm_rdy = '0' and rd_fifo_empty_duration_ok = '1' then --if ar_sm_vld = '1' and ar_sm_rdy = '0' and rd_fifo_empty_duration_ok = '1' then -- PC 1 MAY 2023 perf incr??
if ar_sm_vld = '1' and ar_sm_rdy = '0' and op_fifo_has_space = '1' then
if ar_chl_sm.err = '1' then if ar_chl_sm.err = '1' then
cmd_state <= ST_ERR; cmd_state <= ST_ERR;
err_push_last <= ar_chl_sm.r_last; err_push_last <= ar_chl_sm.r_last;
...@@ -416,10 +430,61 @@ begin ...@@ -416,10 +430,61 @@ begin
end process p_fsm; end process p_fsm;
--synthesis translate_off
p_simonly_perf : process (clk_i)
begin
if rising_edge(clk_i) then
if cmd_state = ST_IDLE and ar_sm_vld = '1' and ar_sm_rdy = '0' then
sm_active_cycles <= sm_active_cycles + 1;
if rd_fifo_empty_duration_ok = '0' and op_fifo_has_space = '1' then
sm_idle_backed_off_cycles <= sm_idle_backed_off_cycles + 1;
end if;
elsif cmd_state /= ST_IDLE then
sm_active_cycles <= sm_active_cycles + 1;
end if;
if rst_n_i = '0' then
sm_idle_backed_off_cycles <= 0;
sm_active_cycles <= 0;
end if;
end if;
end process p_simonly_perf;
--synthesis translate_on
p_fifo_space : process (clk_i)
begin
if rising_edge(clk_i) then
if cmd_state = ST_IDLE then
fifo_space_bodge_count <= (others => '1');
elsif fifo_space_bodge_count /= "0000" then
fifo_space_bodge_count <= fifo_space_bodge_count - 1;
end if;
potential_op_fifo_depth <= unsigned(r_chl_dcfifo_fill_lvl_i)
+ fifo_space_bodge_count
+ ar_chl_sm.len
+ to_unsigned(3, potential_op_fifo_depth'length);
if potential_op_fifo_depth < to_unsigned(g_RD_FIFO_DEPTH, potential_op_fifo_depth'length) then
op_fifo_has_space <= '1';
else
op_fifo_has_space <= '0';
end if;
if rst_n_i = '0' then
fifo_space_bodge_count <= (others => '0');
end if;
end if;
end process p_fifo_space;
-- NOTE: we need to be really carefull here the value g_RD_FIFO_SPACE_CMP_VAL used in the -- NOTE: we need to be really carefull here the value g_RD_FIFO_SPACE_CMP_VAL used in the
-- comparison. it is a bit of a "magic" number... -- comparison. it is a bit of a "magic" number...
-- Ideally we would use an axi style FIFO that had "ready" (full almost full etc) -- Ideally we would use an axi style FIFO that had "ready" (full almost full etc)
-- synchronous to the value of fil level, but that is not to the authors knowledge availablein the general-cores lib. -- synchronous to the value of fill level, but that is not to the authors knowledge
-- available in the general-cores lib.
p_ready : process (clk_i) p_ready : process (clk_i)
begin begin
if rising_edge(clk_i) then if rising_edge(clk_i) then
...@@ -450,7 +515,7 @@ begin ...@@ -450,7 +515,7 @@ begin
if rst_n_i = '0' then if rst_n_i = '0' then
rd_fifo_empty_cycle_count <= (others => '1'); rd_fifo_empty_cycle_count <= (others => '1');
-- 7 cycles of being empty before we can get ready to issue the next command -- 7 cycles of being empty before we can get ready to issue the next command
rd_fifo_empty_duration_ok <= '0'; rd_fifo_empty_duration_ok <= '0';
end if; end if;
end if; end if;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment