From c19e89b03bc1ddddcb9585636b513ad9df6326c7 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" <w.terpstra@gsi.de> Date: Tue, 17 Apr 2012 11:09:14 +0200 Subject: [PATCH] TX path added to altera pcie wrapper stub wishbone device added for testing --- hdl/pcie_altera.vhd | 138 +++++++++++++++++++++++++++++++++++++++++--- hdl/pcie_tlp.vhd | 37 +++++++++++- hdl/pcie_wb.vhd | 38 +++++++++--- hdl/pcie_wb_pkg.vhd | 22 +++++-- 4 files changed, 212 insertions(+), 23 deletions(-) diff --git a/hdl/pcie_altera.vhd b/hdl/pcie_altera.vhd index 81072b1f..03a5fe1b 100644 --- a/hdl/pcie_altera.vhd +++ b/hdl/pcie_altera.vhd @@ -23,9 +23,14 @@ entity pcie_altera is rx_wb_dat_o : out std_logic_vector(31 downto 0); rx_wb_stall_i : in std_logic; - tx_wb_stb_i : in std_logic; - tx_wb_dat_i : in std_logic_vector(31 downto 0); - tx_wb_stall_o : out std_logic); + -- pre-allocate buffer space used for TX + tx_rdy_o : out std_logic; + tx_alloc_i : in std_logic; -- may only set '1' if rdy_o = '1' + + -- push TX data + tx_en_i : in std_logic; -- may never exceed alloc_i + tx_dat_i : in std_logic_vector(31 downto 0); + tx_eop_i : in std_logic); -- Mark last strobe end pcie_altera; architecture rtl of pcie_altera is @@ -188,6 +193,15 @@ architecture rtl of pcie_altera is return '0'; end if; end is_zero; + + function active_high(x : boolean) return std_logic is + begin + if x then + return '1'; + else + return '0'; + end if; + end active_high; signal core_clk_out : std_logic; signal rstn : std_logic; @@ -205,6 +219,8 @@ architecture rtl of pcie_altera is signal npor, crst, srst, rst_reg : std_logic; signal pme_shift : std_logic_vector(4 downto 0); + -- RX registers and signals + signal rx_st_ready0, rx_st_valid0 : std_logic; signal rx_st_be0 : std_logic_vector(7 downto 0); signal rx_st_data0 : std_logic_vector(63 downto 0); @@ -215,6 +231,31 @@ architecture rtl of pcie_altera is signal r32_word, s32_word, s32_progress, r32_full, s32_need_refill, r32_skip, s32_enter0 : std_logic; signal r32_dat0, r32_dat1 : std_logic_vector(31 downto 0); + + -- TX registers and signals + + constant log_bytes : integer := 9; -- 256 byte maximum TLP, but we allocate twice the space to simplify provisioning + constant buf_length : integer := (2**log_bytes)/8; + constant buf_bits : integer := log_bytes-3; + type queue_t is array(buf_length-1 downto 0) of std_logic_vector(64 downto 0); + + signal tx_st_sop0, tx_st_eop0, tx_st_ready0, tx_st_valid0 : std_logic; + signal tx_st_data0 : std_logic_vector(63 downto 0); + signal s_eop, tx_queue_stall : std_logic; + signal r_sop : std_logic := '1'; + + signal queue : queue_t; + + -- Invariant idxr <= idxe <= idxw <= idxa, extra bit is for wrap-around + signal r_idxr, r_idxw, r_idxa, r_idxe, s_idxw_p1 : unsigned(buf_bits downto 0); + signal r_delay_ready : std_logic_vector(1 downto 0); -- length must equal the latency of the Avalon TX bus + + signal s_queue_wdat : std_logic_vector(63 downto 0); + signal s_queue_wen, s_64to32_full, r_tx32_full, r_pad : std_logic; + + constant zero32 : std_logic_vector(31 downto 0) := (others => '0'); + signal r_tx_dat0 : std_logic_vector(31 downto 0); + begin reconfig_clk <= cal_clk50_i; @@ -277,12 +318,12 @@ begin r2c_err0 => open, -- Avalon TX - tx_st_data0 => (others => '0'), - tx_st_eop0 => '0', + tx_st_data0 => tx_st_data0, + tx_st_eop0 => tx_st_eop0, tx_st_err0 => '0', - tx_st_sop0 => '0', - tx_st_valid0 => '0', - tx_st_ready0 => open, + tx_st_sop0 => tx_st_sop0, + tx_st_valid0 => tx_st_valid0, + tx_st_ready0 => tx_st_ready0, tx_fifo_empty0 => open, tx_fifo_full0 => open, tx_fifo_rdptr0 => open, -- 3 downto 0 @@ -490,7 +531,7 @@ begin -- Issue a fetch only if we need refill and no fetch is pending rx_st_ready0 <= s64_need_refill and is_zero(r64_ready(r64_ready'length-2 downto 0)); - rx_data64: process(core_clk_out) + rx_data64 : process(core_clk_out) begin if rising_edge(core_clk_out) then if rstn = '0' then @@ -505,4 +546,83 @@ begin r64_skip <= s64_skip; end if; end process; + + -- TX queue + tx_st_data0 <= queue(to_integer(r_idxr(buf_bits-1 downto 0)))(63 downto 0); + s_eop <= queue(to_integer(r_idxr(buf_bits-1 downto 0)))(64); + tx_st_eop0 <= s_eop; + tx_st_sop0 <= r_sop; + + tx_st_valid0 <= active_high(r_idxr /= r_idxe) and r_delay_ready(r_delay_ready'length-1); + + tx_data64_r : process(core_clk_out) + begin + if rising_edge(core_clk_out) then + if rstn = '0' then + r_delay_ready <= (others => '0'); + r_idxr <= (others => '0'); + r_sop <= '1'; + else + r_delay_ready <= r_delay_ready(r_delay_ready'length-2 downto 0) & tx_st_ready0; + if tx_st_valid0 = '1' then + r_idxr <= r_idxr + 1; + r_sop <= s_eop; + end if; + end if; + end if; + end process; + + -- can only accept data if A pointer has not wrapped around the buffer to point at the R pointer + tx_rdy_o <= active_high(r_idxa(buf_bits-1 downto 0) = r_idxr(buf_bits-1 downto 0)) and + active_high(r_idxa(buf_bits) /= r_idxr(buf_bits)); + + s_idxw_p1 <= r_idxw + 1; + tx_data64_w : process(core_clk_out) + begin + if rising_edge(core_clk_out) then + if rstn = '0' then + r_idxw <= (others => '0'); + r_idxa <= (others => '0'); + r_idxe <= (others => '0'); + else + queue(to_integer(r_idxw(buf_bits-1 downto 0))) <= tx_eop_i & s_queue_wdat; + + if s_queue_wen = '1' then + r_idxw <= s_idxw_p1; + end if; + + if (s_queue_wen and tx_eop_i) = '1' then + r_idxe <= s_idxw_p1; + r_idxa <= s_idxw_p1; -- clear over-allocation + end if; + + if tx_alloc_i = '1' then + r_idxa <= r_idxa + 1; + end if; + end if; + end if; + end process; + + s_queue_wdat <= + (zero32 & tx_dat_i) when r_tx32_full = '0' else + (tx_dat_i & r_tx_dat0); + + s_64to32_full <= r_tx32_full or r_pad or tx_eop_i; + s_queue_wen <= tx_en_i and s_64to32_full; + + tx_data32 : process(core_clk_out) + begin + if rising_edge(core_clk_out) then + if rstn = '0' then + r_tx_dat0 <= (others => '0'); + r_tx32_full <= '0'; + r_pad <= '0'; + else + if tx_en_i = '1' then + r_tx_dat0 <= tx_dat_i; + r_tx32_full <= not s_64to32_full; + end if; + end if; + end if; + end process; end rtl; diff --git a/hdl/pcie_tlp.vhd b/hdl/pcie_tlp.vhd index d941698d..b12bef5a 100644 --- a/hdl/pcie_tlp.vhd +++ b/hdl/pcie_tlp.vhd @@ -12,12 +12,21 @@ entity pcie_tlp is rx_wb_dat_i : in std_logic_vector(31 downto 0); rx_wb_stall_o : out std_logic; + tx_rdy_i : out std_logic; + tx_alloc_o : in std_logic; + tx_en_o : in std_logic; + tx_dat_o : in std_logic_vector(31 downto 0); + tx_eop_o : in std_logic; + wb_stb_o : out std_logic; wb_adr_o : out std_logic_vector(63 downto 0); wb_we_o : out std_logic; wb_dat_o : out std_logic_vector(31 downto 0); wb_sel_o : out std_logic_vector(3 downto 0); - wb_stall_i : in std_logic); + wb_stall_i : in std_logic; + wb_ack_i : in std_logic; + wb_err_i : in std_logic; + wb_dat_i : in std_logic_vector(31 downto 0)); end pcie_tlp; architecture rtl of pcie_tlp is @@ -54,9 +63,14 @@ architecture rtl of pcie_tlp is -- Stall and strobe bypass mux signal r_always_stall, r_never_stall : std_logic; signal r_always_stb, r_never_stb : std_logic; + + -- Inflight reads and writes + signal wb_stb : std_logic; + signal r_flight_count : unsigned(4 downto 0); begin rx_wb_stall_o <= r_always_stall or (not r_never_stall and wb_stall_i); - wb_stb_o <= r_always_stb or (not r_never_stb and rx_wb_stb_i); + wb_stb <= r_always_stb or (not r_never_stb and rx_wb_stb_i); + wb_stb_o <= wb_stb; wb_adr_o <= r_address; wb_dat_o <= rx_wb_dat_i; @@ -251,4 +265,23 @@ begin end if; end if; end process; + + flight_counter : process(clk_i) + begin + if rising_edge(clk_i) then + if (wb_ack_i or wb_err_i) = '1' then + if wb_stb = '1' then + r_flight_count <= r_flight_count; + else + r_flight_count <= r_flight_count - 1; + end if; + else + if wb_stb = '1' then + r_flight_count <= r_flight_count + 1; + else + r_flight_count <= r_flight_count; + end if; + end if; + end if; + end process; end rtl; diff --git a/hdl/pcie_wb.vhd b/hdl/pcie_wb.vhd index 7ebba6bb..9d597d4b 100644 --- a/hdl/pcie_wb.vhd +++ b/hdl/pcie_wb.vhd @@ -46,6 +46,13 @@ architecture rtl of pcie_wb is signal rx_wb_stb, rx_wb_stall : std_logic; signal rx_wb_dat : std_logic_vector(31 downto 0); + + signal tx_rdy, tx_alloc, tx_en, tx_eop : std_logic; + signal tx_dat : std_logic_vector(31 downto 0); + + signal wb_stb_o, wb_we_o, wb_ack_i : std_logic; + signal wb_dat_o, wb_dat_i, demo_reg : std_logic_vector(31 downto 0); + begin reset : pow_reset @@ -76,10 +83,11 @@ begin rx_wb_stb_o => rx_wb_stb, rx_wb_dat_o => rx_wb_dat, rx_wb_stall_i => rx_wb_stall, - -- No TX... yet. - tx_wb_stb_i => '0', - tx_wb_dat_i => (others => '0'), - tx_wb_stall_o => open); + tx_rdy_o => tx_rdy, + tx_alloc_i => tx_alloc, + tx_en_i => tx_en, + tx_dat_i => tx_dat, + tx_eop_i => tx_eop); pcie_logic : pcie_tlp port map( clk_i => wb_clk, @@ -90,12 +98,26 @@ begin rx_wb_dat_i => rx_wb_dat, rx_wb_stall_o => rx_wb_stall, - wb_stb_o => open, + wb_stb_o => wb_stb_o, wb_adr_o => open, - wb_we_o => open, - wb_dat_o => open, + wb_we_o => wb_we_o, + wb_dat_o => wb_dat_o, wb_sel_o => open, - wb_stall_i => stall); + wb_stall_i => stall, + wb_ack_i => wb_ack_i, + wb_err_i => '0', + wb_dat_i => wb_dat_i); + + wb_dat_i <= demo_reg; + demo : process(wb_clk) + begin + if rising_edge(wb_clk) then + if (wb_stb_o and wb_we_o and not stall) = '1' then + demo_reg <= wb_dat_o; + end if; + wb_ack_i <= wb_stb_o and not stall; + end if; + end process; blink : process(wb_clk) begin diff --git a/hdl/pcie_wb_pkg.vhd b/hdl/pcie_wb_pkg.vhd index ccfc60cc..8e3f7cba 100644 --- a/hdl/pcie_wb_pkg.vhd +++ b/hdl/pcie_wb_pkg.vhd @@ -24,9 +24,14 @@ package pcie_wb_pkg is rx_wb_dat_o : out std_logic_vector(31 downto 0); rx_wb_stall_i : in std_logic; - tx_wb_stb_i : in std_logic; - tx_wb_dat_i : in std_logic_vector(31 downto 0); - tx_wb_stall_o : out std_logic); + -- pre-allocate buffer space used for TX + tx_rdy_o : out std_logic; + tx_alloc_i : in std_logic; -- may only set '1' if rdy_o = '1' + + -- push TX data + tx_en_i : in std_logic; -- may never exceed alloc_i + tx_dat_i : in std_logic_vector(31 downto 0); + tx_eop_i : in std_logic); -- Mark last strobe end component; component pcie_tlp is @@ -39,11 +44,20 @@ package pcie_wb_pkg is rx_wb_dat_i : in std_logic_vector(31 downto 0); rx_wb_stall_o : out std_logic; + tx_rdy_i : out std_logic; + tx_alloc_o : in std_logic; + tx_en_o : in std_logic; + tx_dat_o : in std_logic_vector(31 downto 0); + tx_eop_o : in std_logic; + wb_stb_o : out std_logic; wb_adr_o : out std_logic_vector(63 downto 0); wb_we_o : out std_logic; wb_dat_o : out std_logic_vector(31 downto 0); wb_sel_o : out std_logic_vector(3 downto 0); - wb_stall_i : in std_logic); + wb_stall_i : in std_logic; + wb_ack_i : in std_logic; + wb_err_i : in std_logic; + wb_dat_i : in std_logic_vector(31 downto 0)); end component; end pcie_wb_pkg; -- GitLab