diff --git a/hdl/rtl/Manifest.py b/hdl/rtl/Manifest.py index 776e7344db3e55f0723ac79115f2729472552748..f66e2616207b37541604618dc491d3d513c1e8dc 100644 --- a/hdl/rtl/Manifest.py +++ b/hdl/rtl/Manifest.py @@ -3,6 +3,7 @@ files = [ "dma_controller_regs.vhd", "l2p_arbiter.vhd", "rl0_pl_stage_flowcontrol_srst.vhd", + "opt_rl0_pl_stg.vhd", "l2p_dma_master.vhd", "p2l_decode32.vhd", "p2l_dma_master.vhd", diff --git a/hdl/rtl/gn4124_axi_r_chl_dcfifo.vhd b/hdl/rtl/gn4124_axi_r_chl_dcfifo.vhd index 1f9b4b6aa7e596710e8843ca5025f113aa879f15..18f9d839170bcf2cc34931cbedef1d055e34be5b 100644 --- a/hdl/rtl/gn4124_axi_r_chl_dcfifo.vhd +++ b/hdl/rtl/gn4124_axi_r_chl_dcfifo.vhd @@ -1,3 +1,7 @@ +-- SPDX-FileCopyrightText: 2023 CERN (home.cern) +-- +-- SPDX-License-Identifier: CERN-OHL-W-2.0+ +-- -------------------------------------------------------------------------------- -- GN4124 core for PCIe FMC carrier -- http://www.ohwr.org/projects/gn4124-core @@ -12,7 +16,7 @@ -- -------------------------------------------------------------------------------- -- --- This source describes Open Hardware and is licensed under the CERN-OHL-W v2. +-- This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later. -- You may redistribute and modify this source and make products using it -- under the terms of the CERN-OHL-W v2 or future versions (https://ohwr.org/cern_ohl_w_v2.txt). -- @@ -22,7 +26,7 @@ -- the CERN-OHL-W v2 for applicable conditions. -- -------------------------------------------------------------------------------- --- Copyright CERN 2021 +-- Copyright CERN 2023 -------------------------------------------------------------------------------- -- -- NOTES: @@ -39,7 +43,8 @@ entity gn4124_axi_r_chl_dcfifo is g_DATA_WIDTH : positive := 32; g_NUM_WORDS : positive := 8; g_ALMOST_FULL_THRESHOLD : positive; - g_RESET_SRC_DAT : boolean := true + g_RESET_SRC_DAT : boolean := true; + g_OP_PL_STG : boolean := false ); port ( -- sink interface @@ -67,12 +72,16 @@ architecture rtl of gn4124_axi_r_chl_dcfifo is signal wr_write : std_logic; signal rd_empty : std_logic; - signal src_valid_int : std_logic; signal src_dat_clk_en : std_logic; -- TBD: Apply attributes on this to make it a clk_en for the regs signal rd_dat : std_logic_vector(g_DATA_WIDTH -1 downto 0); - signal almost_full : std_logic; + + + signal int_dat : std_logic_vector(g_DATA_WIDTH -1 downto 0); + signal int_vld : std_logic; + signal int_rdy : std_logic; + begin snk_rdy_o <= not wr_full; @@ -123,10 +132,10 @@ begin begin if rising_edge(src_clk_i) then if src_dat_clk_en = '1' then - src_dat_o <= rd_dat; + int_dat <= rd_dat; end if; if g_RESET_SRC_DAT and src_rst_n_i = '0' then - src_dat_o <= (others => '0'); + int_dat <= (others => '0'); end if; end if; end process p_dat; @@ -136,26 +145,23 @@ begin begin if rising_edge(src_clk_i) then if src_rst_n_i = '0' then - src_valid_int <= '0'; + int_vld <= '0'; else if src_dat_clk_en = '1' then - src_valid_int <= '1'; - elsif src_rdy_i = '1' and src_valid_int = '1' then - src_valid_int <= '0'; + int_vld <= '1'; + elsif src_rdy_i = '1' and int_vld = '1' then + int_vld <= '0'; end if; end if; end if; end process p_valid; - src_vld_o <= src_valid_int; - - - p_int_clken : process (src_valid_int, src_rdy_i, rd_empty) + p_int_clken : process (int_vld, int_rdy, rd_empty) begin - if src_valid_int = '1' and src_rdy_i = '1' and rd_empty = '0' then + if int_vld = '1' and int_rdy = '1' and rd_empty = '0' then src_dat_clk_en <= '1'; - elsif src_valid_int = '0' and rd_empty = '0' then + elsif int_vld = '0' and rd_empty = '0' then src_dat_clk_en <= '1'; else src_dat_clk_en <= '0'; @@ -163,4 +169,21 @@ begin end process p_int_clken; + opt_pl : entity work.opt_rl0_pl_stg + generic map ( + g_IMPLEMENT_PL_STG => g_OP_PL_STG, --: boolean := ; + g_DATA_WIDTH => g_DATA_WIDTH, --: positive := 1 + g_MIMIMISE_TRANSITIONS => true --: boolean := false -- when implementing a pl_stg controls if it is minimal logic OR "cleaner" for simulation and thus debugging + ) port map( + clk_i => src_clk_i, --: in std_logic; + rst_n_i => src_rst_n_i, --: in std_logic; + in_rdy_o => int_rdy, --: out std_logic; + in_vld_i => int_vld, --: in std_logic; + in_dat_i => int_dat, --: in std_logic_vector(g_DATA_WIDTH-1 downto 0); + out_rdy_i => src_rdy_i, --: in std_logic; + out_vld_o => src_vld_o, --: out std_logic; + out_dat_o => src_dat_o --: out std_logic_vector(g_DATA_WIDTH-1 downto 0) + ); + + end architecture rtl; diff --git a/hdl/rtl/opt_rl0_pl_stg.vhd b/hdl/rtl/opt_rl0_pl_stg.vhd new file mode 100644 index 0000000000000000000000000000000000000000..ea11f4b4b4edffc3eec47fe0c6d292ea0dda95df --- /dev/null +++ b/hdl/rtl/opt_rl0_pl_stg.vhd @@ -0,0 +1,114 @@ +-- SPDX-FileCopyrightText: 2023 CERN (home.cern) +-- +-- SPDX-License-Identifier: CERN-OHL-W-2.0+ +-- +-------------------------------------------------------------------------------- +-- GN4124 core for PCIe FMC carrier +-- http://www.ohwr.org/projects/gn4124-core +-------------------------------------------------------------------------------- +-- +-- unit name: opt_rl0_pl_stg +-- +-- description: entity toigenerically switch in/out a AXI_ST or avalon_st RL0 pipeline stage. +-- +-- +-------------------------------------------------------------------------------- +-- +-- This source describes Open Hardware and is licensed under the CERN-OHL-W v2 or later. +-- You may redistribute and modify this source and make products using it +-- under the terms of the CERN-OHL-W v2 or future versions (https://ohwr.org/cern_ohl_w_v2.txt). +-- +-- This source is distributed WITHOUT ANY EXPRESS OR IMPLIED +-- WARRANTY, INCLUDING OF MERCHANTABILITY, SATISFACTORY +-- QUALITY AND FITNESS FOR A PARTICULAR PURPOSE. Please see +-- the CERN-OHL-W v2 for applicable conditions. +-- +-------------------------------------------------------------------------------- +-- Copyright CERN 2023 +-------------------------------------------------------------------------------- + + +library ieee; +use ieee.std_logic_1164.all; + +entity opt_rl0_pl_stg is + generic ( + g_IMPLEMENT_PL_STG : boolean := false; + g_DATA_WIDTH : positive := 1; + g_MIMIMISE_TRANSITIONS : boolean := false; -- when implementing a pl_stg controls if it is minimal logic OR "cleaner" for simulation and thus debugging + g_DAT_O_HAS_SRST : boolean := false + ); + port ( + clk_i : in std_logic; + rst_n_i : in std_logic; + + in_rdy_o : out std_logic; + in_vld_i : in std_logic; + in_dat_i : in std_logic_vector(g_DATA_WIDTH-1 downto 0); + + out_rdy_i : in std_logic; + out_vld_o : out std_logic; + out_dat_o : out std_logic_vector(g_DATA_WIDTH-1 downto 0) + ); +end entity opt_rl0_pl_stg; + + + +architecture rtl of opt_rl0_pl_stg is + +begin + gen_pl : if g_IMPLEMENT_PL_STG generate + signal clk_en_pl_reg : std_logic; + signal clk_en_op_reg : std_logic; + signal use_dat_from_pl_reg : std_logic; + signal pl_dat : std_logic_vector(g_DATA_WIDTH -1 downto 0); + + begin + + ctrl : entity work.teng_wr_nic_rl0_pl_stage_flowcontrol_srst + generic map ( + g_minimal_op_transitions => g_MIMIMISE_TRANSITIONS + ) port map( + clk_i => clk_i, + rst_n_i => rst_n_i, + in_rdy_o => in_rdy_o, + in_vld_i => in_vld_i, + out_rdy_i => out_rdy_i, + out_vld_o => out_vld_o, + clk_en_pl_reg_o => clk_en_pl_reg, + clk_en_op_reg_o => clk_en_op_reg, + use_dat_from_pl_reg_o => use_dat_from_pl_reg + ); + + + p_pl_stage : process(clk_i) begin + if rising_edge(clk_i) then + if '1' = clk_en_pl_reg then + pl_dat <= in_dat_i; + end if; + + if '1' = clk_en_op_reg then + if '1' = use_dat_from_pl_reg then + out_dat_o <= pl_dat; + else + out_dat_o <= in_dat_i; + end if; + end if; + + if (g_DAT_O_HAS_SRST = true) and (rst_n_i = '0') then + out_dat_o <= (others => '0'); + pl_dat <= (others => '0'); + end if; + end if; + end process p_pl_stage; + end generate; + + + gen_no_pl : if g_IMPLEMENT_PL_STG = false generate + begin + in_rdy_o <= out_rdy_i; + out_vld_o <= in_vld_i; + out_dat_o <= in_dat_i; + end generate; + +end architecture rtl; diff --git a/hdl/rtl/p2l_axi4_rd_dc.vhd b/hdl/rtl/p2l_axi4_rd_dc.vhd index 1826588b4cadb1822708f262ecdcb571bbcfd278..785ec49b8c9557d061a9454d6a506772dd56836d 100644 --- a/hdl/rtl/p2l_axi4_rd_dc.vhd +++ b/hdl/rtl/p2l_axi4_rd_dc.vhd @@ -62,7 +62,8 @@ entity p2l_axi4_rd_dc is g_AXI_AR_FIFO_WRDS : positive := 16; -- AXI4 AR Channel fifo depth g_AXI_R_FIFO_WRDS : positive := 32; -- AXI4 R Channel fifo depth g_EN_AXI_QOS_2_TC_LOOKUP : boolean := false; -- currently unused! - g_PCIE_DEFAULT_TC : std_logic_vector(2 downto 0) := "000" -- default PCIE TC value + g_PCIE_DEFAULT_TC : std_logic_vector(2 downto 0) := "000"; -- default PCIE TC value + g_RD_AXI_GN_INIT_IP_PL : boolean := true -- put a fifo on the input to the intiator form the fifo.. ); port ( -- ------------------------------------------------------- @@ -282,9 +283,10 @@ begin gennum_master_axi_rd_init : entity work.p2l_axi4_rd_initiator generic map ( - g_USE_AR_REGISTER_PL => false, + g_USE_AR_REGISTER_PL => g_RD_AXI_GN_INIT_IP_PL, g_ID_WIDTH => g_ID_WIDTH, g_RD_FIFO_W => c_R_FIFO_FILL_LVL_W, + g_RD_FIFO_DEPTH => g_AXI_R_FIFO_WRDS, g_RD_FIFO_SPACE_CMP_VAL => c_R_BACKOFF_FILL_LVL ) port map ( clk_i => clk_i, @@ -326,42 +328,35 @@ begin ); --- -- block to encapsulate DCFIFO and signals to enable AXI_STREAM signalling. --- b_r_chl_fifo : block --- --- begin - -- Pack and unpack SLV for the fifo - r_fifo_wdat <= int_r_rec_id & int_r_rec.last & int_r_rec.resp & int_r_rec.data; - - r_chnl_data_o <= r_fifo_rdat(g_DATA_WIDTH -1 downto 0); - r_chnl_resp_o <= r_fifo_rdat(g_DATA_WIDTH+1 downto g_DATA_WIDTH); - r_chnl_last_o <= r_fifo_rdat(g_DATA_WIDTH+2); - r_chnl_id_o <= r_fifo_rdat(r_fifo_rdat'high downto g_DATA_WIDTH+3); - - - r_chl_dcfifo : entity work.gn4124_axi_r_chl_dcfifo - generic map( - g_DATA_WIDTH => c_R_CHL_FIFO_W, - g_NUM_WORDS => g_AXI_R_FIFO_WRDS, - g_ALMOST_FULL_THRESHOLD => c_R_BACKOFF_FILL_LVL, - g_RESET_SRC_DAT => TRUE - ) port map ( - snk_clk_i => clk_i, - snk_rst_n_i => rst_n_i, - snk_rdy_o => int_r_rdy, - snk_vld_i => int_r_vld, - snk_dat_i => r_fifo_wdat, - snk_lvl_o => r_chl_dcfifo_fill, - snk_almost_full_o => open, -- here as an alternative to using the level myself - src_clk_i => clk_axi_i, - src_rst_n_i => rst_axi_n_i, - src_rdy_i => r_ready_i, - src_vld_o => r_valid_o, - src_dat_o => r_fifo_rdat - ); - --- end block b_r_chl_fifo; --- end block b_axi_rd; + r_fifo_wdat <= int_r_rec_id & int_r_rec.last & int_r_rec.resp & int_r_rec.data; + + r_chnl_data_o <= r_fifo_rdat(g_DATA_WIDTH -1 downto 0); + r_chnl_resp_o <= r_fifo_rdat(g_DATA_WIDTH+1 downto g_DATA_WIDTH); + r_chnl_last_o <= r_fifo_rdat(g_DATA_WIDTH+2); + r_chnl_id_o <= r_fifo_rdat(r_fifo_rdat'high downto g_DATA_WIDTH+3); + + + r_chl_dcfifo : entity work.gn4124_axi_r_chl_dcfifo + generic map( + g_DATA_WIDTH => c_R_CHL_FIFO_W, + g_NUM_WORDS => g_AXI_R_FIFO_WRDS, + g_ALMOST_FULL_THRESHOLD => c_R_BACKOFF_FILL_LVL, + g_RESET_SRC_DAT => TRUE + ) port map ( + snk_clk_i => clk_i, + snk_rst_n_i => rst_n_i, + snk_rdy_o => int_r_rdy, + snk_vld_i => int_r_vld, + snk_dat_i => r_fifo_wdat, + snk_lvl_o => r_chl_dcfifo_fill, + snk_almost_full_o => open, -- here as an alternative to using the level myself + src_clk_i => clk_axi_i, + src_rst_n_i => rst_axi_n_i, + src_rdy_i => r_ready_i, + src_vld_o => r_valid_o, + src_dat_o => r_fifo_rdat + ); + end generate gen_axi_rd_subsystem; end architecture struct; diff --git a/hdl/rtl/p2l_axi4_rd_initiator.vhd b/hdl/rtl/p2l_axi4_rd_initiator.vhd index fd457fdf7a7936a1acaa601a73451a1b01c56bee..ddfae20cfe188a812ad0b50cfbd297c88b00fb20 100644 --- a/hdl/rtl/p2l_axi4_rd_initiator.vhd +++ b/hdl/rtl/p2l_axi4_rd_initiator.vhd @@ -44,6 +44,7 @@ entity p2l_axi4_rd_initiator is g_USE_AR_REGISTER_PL : boolean := false; g_ID_WIDTH : positive := 2; g_RD_FIFO_W : natural; + g_RD_FIFO_DEPTH : positive; g_RD_FIFO_SPACE_CMP_VAL : positive ); port ( @@ -75,11 +76,11 @@ entity p2l_axi4_rd_initiator is -- currently not used.. tid_gnt_i : in std_logic; tid_req_o : out std_logic; - tid_val_i : in std_logic_Vector(1 downto 0); -- := g_TID_CID; + tid_val_i : in std_logic_vector(1 downto 0); -- := g_TID_CID; cid_val_o : out std_logic_vector(1 downto 0); cid_fin_o : out std_logic; - r_chl_dcfifo_fill_lvl_i : in std_logic_Vector(g_RD_FIFO_W-1 downto 0); + r_chl_dcfifo_fill_lvl_i : in std_logic_vector(g_RD_FIFO_W-1 downto 0); -- TODO: should make this generic based on the fifo size, or "merge" the fifo into this module... -- data pipe to push data from this Axi Read Master to the arbitrator.. @@ -147,7 +148,7 @@ architecture rtl of p2l_axi4_rd_initiator is signal cmd_length_in_words : unsigned(9 downto 0); - signal outstanding_data : unsigned ( 8 downto 0); + signal outstanding_data : unsigned (8 downto 0); -- TBD to we add a outstanding_data_is_0 signal to help timing? signal rdata_fifo_has_space : std_logic; @@ -172,6 +173,18 @@ architecture rtl of p2l_axi4_rd_initiator is signal sm_err_decode : t_err; signal int_seen_error : std_logic; + signal fifo_space_bodge_count : unsigned(3 downto 0); + + signal potential_op_fifo_depth : unsigned(g_RD_FIFO_W downto 0); + + + signal op_fifo_has_space : std_logic; + +--synthesis translate_off + signal sm_active_cycles : natural; + signal sm_idle_backed_off_cycles : natural; +--synthesis translate_on + begin @@ -270,7 +283,8 @@ begin case cmd_state is when ST_IDLE => - if ar_sm_vld = '1' and ar_sm_rdy = '0' and rd_fifo_empty_duration_ok = '1' then + --if ar_sm_vld = '1' and ar_sm_rdy = '0' and rd_fifo_empty_duration_ok = '1' then -- PC 1 MAY 2023 perf incr?? + if ar_sm_vld = '1' and ar_sm_rdy = '0' and op_fifo_has_space = '1' then if ar_chl_sm.err = '1' then cmd_state <= ST_ERR; err_push_last <= ar_chl_sm.r_last; @@ -416,10 +430,61 @@ begin end process p_fsm; +--synthesis translate_off + p_simonly_perf : process (clk_i) + begin + if rising_edge(clk_i) then + if cmd_state = ST_IDLE and ar_sm_vld = '1' and ar_sm_rdy = '0' then + sm_active_cycles <= sm_active_cycles + 1; + if rd_fifo_empty_duration_ok = '0' and op_fifo_has_space = '1' then + sm_idle_backed_off_cycles <= sm_idle_backed_off_cycles + 1; + end if; + elsif cmd_state /= ST_IDLE then + sm_active_cycles <= sm_active_cycles + 1; + end if; + if rst_n_i = '0' then + sm_idle_backed_off_cycles <= 0; + sm_active_cycles <= 0; + end if; + end if; + end process p_simonly_perf; +--synthesis translate_on + + + p_fifo_space : process (clk_i) + begin + if rising_edge(clk_i) then + if cmd_state = ST_IDLE then + fifo_space_bodge_count <= (others => '1'); + elsif fifo_space_bodge_count /= "0000" then + fifo_space_bodge_count <= fifo_space_bodge_count - 1; + end if; + + potential_op_fifo_depth <= unsigned(r_chl_dcfifo_fill_lvl_i) + + fifo_space_bodge_count + + ar_chl_sm.len + + to_unsigned(3, potential_op_fifo_depth'length); + + if potential_op_fifo_depth < to_unsigned(g_RD_FIFO_DEPTH, potential_op_fifo_depth'length) then + op_fifo_has_space <= '1'; + else + op_fifo_has_space <= '0'; + end if; + + if rst_n_i = '0' then + fifo_space_bodge_count <= (others => '0'); + end if; + end if; + end process p_fifo_space; + + + + -- NOTE: we need to be really carefull here the value g_RD_FIFO_SPACE_CMP_VAL used in the -- comparison. it is a bit of a "magic" number... -- Ideally we would use an axi style FIFO that had "ready" (full almost full etc) - -- synchronous to the value of fil level, but that is not to the authors knowledge availablein the general-cores lib. + -- synchronous to the value of fill level, but that is not to the authors knowledge + -- available in the general-cores lib. p_ready : process (clk_i) begin if rising_edge(clk_i) then @@ -450,7 +515,7 @@ begin if rst_n_i = '0' then rd_fifo_empty_cycle_count <= (others => '1'); - -- 7 cycles of being empty before we can get ready to issue the next command + -- 7 cycles of being empty before we can get ready to issue the next command rd_fifo_empty_duration_ok <= '0'; end if; end if;