Commit d0c2399b authored by Dimitris Lampridis's avatar Dimitris Lampridis Committed by Dimitris Lampridis

hdl: DMA rewrite work-in-progress.

parent 386c2ae9
Subproject commit 284373b7ea1db559dd323634dd34a8dba1811c12
Subproject commit 64f7e518bab2bf0489077f4b9eb26e8cccbf1411
......@@ -306,10 +306,10 @@ begin
dma_stat_reg <= c_DMA_STAT_ERROR;
dma_ctrl_current_state <= DMA_IDLE;
else
-- Start the DMA if the length is not 0
if dma_attrib_dir_reg = '0' then
-- L2P transfer (from target to PCIe)
dma_ctrl_start_l2p_o <= '1';
dma_ctrl_direction_o <= '0';
else
-- P2L transfer (from PCIe to target)
dma_ctrl_start_p2l_o <= '1';
......
......@@ -9,7 +9,7 @@
-- description: L2P DMA master
--
--------------------------------------------------------------------------------
-- Copyright CERN 2010-2018
-- Copyright CERN 2010-2020
--------------------------------------------------------------------------------
-- Copyright and related rights are licensed under the Solderpad Hardware
-- License, Version 2.0 (the "License"); you may not use this file except
......@@ -28,474 +28,574 @@ use IEEE.NUMERIC_STD.all;
use work.gn4124_core_pkg.all;
use work.gencores_pkg.all;
use work.wishbone_pkg.all;
use work.genram_pkg.all;
entity l2p_dma_master is
generic (
g_ADDR_FIFO_FULL_SIZE : positive := 1024;
g_ADDR_FIFO_FULL_THRES : positive := 700;
g_DATA_FIFO_FULL_SIZE : positive := 1024;
g_DATA_FIFO_FULL_THRES : positive := 700;
g_BYTE_SWAP : boolean := FALSE);
g_DMA_USE_PCI_CLK : boolean := FALSE;
g_DATA_FIFO_SIZE : positive := 128;
g_BYTE_SWAP : boolean := FALSE);
port (
-- GN4124 core clk and reset
clk_i : in std_logic;
rst_n_i : in std_logic;
-- From the DMA controller
dma_ctrl_target_addr_i : in std_logic_vector(31 downto 0);
dma_ctrl_host_addr_h_i : in std_logic_vector(31 downto 0);
dma_ctrl_host_addr_l_i : in std_logic_vector(31 downto 0);
dma_ctrl_len_i : in std_logic_vector(31 downto 0);
dma_ctrl_start_l2p_i : in std_logic;
dma_ctrl_done_o : out std_logic;
dma_ctrl_error_o : out std_logic;
dma_ctrl_byte_swap_i : in std_logic_vector(1 downto 0);
dma_ctrl_abort_i : in std_logic;
-- To the arbiter (L2P data)
ldm_arb_valid_o : out std_logic;
ldm_arb_dframe_o : out std_logic;
ldm_arb_data_o : out std_logic_vector(31 downto 0);
ldm_arb_req_o : out std_logic;
arb_ldm_gnt_i : in std_logic;
-- L2P channel control
l2p_edb_o : out std_logic; -- Asserted when transfer is aborted
l_wr_rdy_i : in std_logic; -- Asserted when GN4124 is ready to receive master write
l2p_rdy_i : in std_logic; -- De-asserted to pause transdert already in progress
tx_error_i : in std_logic; -- Asserted when unexpected or malformed paket received
-- DMA Interface (Pipelined Wishbone)
l2p_dma_rst_n_i : in std_logic; -- Active low reset in sync with l2p_dma_clk_i
l2p_dma_clk_i : in std_logic;
l2p_dma_adr_o : out std_logic_vector(31 downto 0);
l2p_dma_dat_i : in std_logic_vector(31 downto 0);
l2p_dma_dat_o : out std_logic_vector(31 downto 0);
l2p_dma_sel_o : out std_logic_vector(3 downto 0);
l2p_dma_cyc_o : out std_logic;
l2p_dma_stb_o : out std_logic;
l2p_dma_we_o : out std_logic;
l2p_dma_ack_i : in std_logic;
l2p_dma_stall_i : in std_logic);
-- GN4124 core clk and reset
clk_i : in std_logic;
rst_n_i : in std_logic;
-- From the DMA controller
dma_ctrl_target_addr_i : in std_logic_vector(31 downto 0);
dma_ctrl_host_addr_h_i : in std_logic_vector(31 downto 0);
dma_ctrl_host_addr_l_i : in std_logic_vector(31 downto 0);
dma_ctrl_len_i : in std_logic_vector(31 downto 0);
dma_ctrl_start_l2p_i : in std_logic;
dma_ctrl_done_o : out std_logic;
dma_ctrl_error_o : out std_logic;
dma_ctrl_byte_swap_i : in std_logic_vector(1 downto 0);
dma_ctrl_abort_i : in std_logic;
-- To the arbiter (L2P data)
ldm_arb_valid_o : out std_logic;
ldm_arb_dframe_o : out std_logic;
ldm_arb_data_o : out std_logic_vector(31 downto 0);
ldm_arb_req_o : out std_logic;
ldm_arb_gnt_i : in std_logic;
-- L2P channel control
l2p_edb_o : out std_logic; -- Asserted when transfer is aborted
l_wr_rdy_i : in std_logic; -- Asserted when GN4124 is ready to receive master write
l2p_rdy_i : in std_logic; -- De-asserted to pause transfer already in progress
tx_error_i : in std_logic; -- Asserted when unexpected or malformed paket received
-- DMA Interface (Pipelined Wishbone Master)
wb_dma_rst_n_i : in std_logic; -- Active low reset in sync with wb_dma_clk_i
wb_dma_clk_i : in std_logic;
wb_dma_i : in t_wishbone_master_in;
wb_dma_o : out t_wishbone_master_out);
end l2p_dma_master;
architecture behavioral of l2p_dma_master is
---------------------
-- Constants
---------------------
-- L2P_MAX_PAYLOAD must be a power of 2 for easier 32-bit address overflow check.
constant c_L2P_MAX_PAYLOAD_NBITS : integer := 7;
constant c_L2P_MAX_PAYLOAD_BYTES : integer := 2 ** c_L2P_MAX_PAYLOAD_NBITS;
constant c_L2P_MAX_PAYLOAD_WORDS : integer := c_L2P_MAX_PAYLOAD_BYTES / 4;
constant c_L2P_A32_OVERFLOW_MASK : std_logic_vector(31 downto 0) :=
not std_logic_vector(to_unsigned(c_L2P_MAX_PAYLOAD_BYTES - 1, 32));
constant c_TIMEOUT : integer := 2000;
-- how many pending WB requests to allow without ACK
constant c_L2P_WB_THROTTLE_THRESHOLD : integer :=
g_DATA_FIFO_FULL_SIZE - g_DATA_FIFO_FULL_THRES;
---------------------
-- Signals
---------------------
signal fifo_rst_n : std_logic;
signal fifo_rst_t : std_logic;
signal wb_fifo_rst_n : std_logic;
-- Data FIFO
signal data_fifo_rd : std_logic;
signal data_fifo_empty : std_logic;
signal data_fifo_full : std_logic;
signal data_fifo_dout : std_logic_vector(31 downto 0);
-- Addr FIFO
signal addr_fifo_rd : std_logic;
signal addr_fifo_wr : std_logic;
signal addr_fifo_empty : std_logic;
signal addr_fifo_full : std_logic;
signal addr_fifo_dout : std_logic_vector(31 downto 0);
signal addr_fifo_din : std_logic_vector(31 downto 0) := (others => '0');
-- L2P FSM
type l2p_dma_state_type is (L2P_IDLE, L2P_SETUP, L2P_HEADER,
L2P_ADDR_H, L2P_ADDR_L, L2P_SETUP_DATA, L2P_DATA,
L2P_LAST_DATA, L2P_ERROR);
signal l2p_dma_current_state : l2p_dma_state_type;
-- L2P packets
signal s_l2p_header : std_logic_vector(31 downto 0);
signal l2p_len_cnt : unsigned(29 downto 0) := (others => '0');
signal l2p_address_h : std_logic_vector(31 downto 0) := (others => '0');
signal l2p_address_l : std_logic_vector(31 downto 0) := (others => '0');
signal l2p_data_cnt : unsigned(12 downto 0) := (others => '0');
signal l2p_64b_address : std_logic;
signal l2p_len_header : unsigned(12 downto 0);
signal l2p_byte_swap : std_logic_vector(1 downto 0) := (others => '0');
signal l2p_last_packet : std_logic;
signal l2p_lbe_header : std_logic_vector(3 downto 0);
-- Counter
signal target_addr_cnt : std_logic_vector(31 downto 0) := (others => '0');
signal dma_length_cnt : unsigned(29 downto 0) := (others => '0');
signal l2p_timeout_cnt : unsigned(12 downto 0) := (others => '0');
-- Wishbone
signal l2p_dma_stb_t : std_logic;
signal l2p_throttle : std_logic;
signal wb_read_cnt : unsigned(log2_ceil(c_L2P_WB_THROTTLE_THRESHOLD)-1 downto 0);
architecture arch of l2p_dma_master is
-- Used to tweak the almost full flag threshold of the SYNC FIFO
-- in
constant c_SYNC_FIFO_FULL_DELAY : natural := 3;
type l2p_dma_state_type is (L2P_IDLE, L2P_SETUP, L2P_WAIT,
L2P_HEADER, L2P_HOLD, L2P_ADDR_H,
L2P_ADDR_L, L2P_DATA, L2P_NEXT,
L2P_ERROR);
signal l2p_dma_current_state : l2p_dma_state_type := L2P_IDLE;
type wb_dma_state_type is (WB_IDLE, WB_SETUP, WB_DATA, WB_HOLD);
signal wb_dma_current_state : wb_dma_state_type := WB_IDLE;
signal dma_target_addr : unsigned(29 downto 0) := (others => '0');
signal dma_total_len : unsigned(29 downto 0) := (others => '0');
signal dma_packet_len : unsigned(10 downto 0) := (others => '0');
signal dma_host_addr : unsigned(63 downto 0) := (others => '0');
signal dma_byte_swap : std_logic_vector(1 downto 0) := (others => '0');
alias dma_host_addr_h : unsigned(31 downto 0) is dma_host_addr(63 downto 32);
alias dma_host_addr_l : unsigned(31 downto 0) is dma_host_addr(31 downto 0);
signal l2p_64b_address : std_logic := '0';
signal l2p_fsm_valid : std_logic := '0';
signal l2p_fsm_dframe : std_logic := '0';
signal l2p_fsm_hold : std_logic := '0';
signal l2p_fsm_data : std_logic_vector(31 downto 0) := (others => '0');
signal l2p_fsm_dma_param_wr : std_logic := '0';
signal l2p_fsm_dma_param_busy : std_logic := '0';
signal dma_param_sync : std_logic_vector(40 downto 0) := (others => '0');
signal dma_param_wr : std_logic := '0';
signal l2p_timeout_cnt : unsigned(12 downto 0) := (others => '0');
signal dma_last_packet : std_logic := '0';
signal wb_dma_stb : std_logic := '0';
signal wb_dma_addr : unsigned(29 downto 0) := (others => '0');
signal wb_dma_addr_d : unsigned(29 downto 0) := (others => '0');
signal wb_dma_cnt_stb : unsigned(10 downto 0) := (others => '0');
signal wb_dma_cnt_ack : unsigned(10 downto 0) := (others => '0');
signal wb_dma_fsm_en : std_logic := '0';
signal wb_dma_fsm_en_sync : std_logic := '0';
signal data_fifo_rd : std_logic := '0';
signal data_fifo_wr : std_logic := '0';
signal data_fifo_empty : std_logic := '1';
signal data_fifo_full : std_logic := '0';
signal data_fifo_din : std_logic_vector(31 downto 0) := (others => '0');
signal data_fifo_dout : std_logic_vector(31 downto 0) := (others => '0');
signal data_fifo_dout_d : std_logic_vector(31 downto 0) := (others => '0');
signal fsm_fifo_rst_n : std_logic := '0';
begin
------------------------------
-- Active low reset for fifos
------------------------------
fifo_rst_n <= rst_n_i and (not fifo_rst_t);
-- Local resynced copy of fifo_rst_n to make sure that both sides of the fifo
-- are reset if rst_n_i = '0'
cmp_wb_fifo_rst_sync: gc_sync_ffs
port map (
clk_i => l2p_dma_clk_i,
rst_n_i => l2p_dma_rst_n_i,
data_i => fifo_rst_n,
synced_o => wb_fifo_rst_n);
---------------------
-- L2P FSM
---------------------
p_l2p_fsm : process (clk_i)
begin
if rising_edge(clk_i) then
if (rst_n_i = '0') then
l2p_dma_current_state <= L2P_IDLE;
ldm_arb_req_o <= '0';
ldm_arb_valid_o <= '0';
ldm_arb_dframe_o <= '0';
data_fifo_rd <= '0';
dma_ctrl_done_o <= '0';
l2p_edb_o <= '0';
fifo_rst_t <= '1';
else
ldm_arb_data_o <= (others => 'X');
case l2p_dma_current_state is
when L2P_IDLE =>
l2p_timeout_cnt <= (others => '0');
l2p_edb_o <= '0';
fifo_rst_t <= '0';
ldm_arb_req_o <= '0';
ldm_arb_valid_o <= '0';
ldm_arb_dframe_o <= '0';
data_fifo_rd <= '0';
dma_ctrl_done_o <= '0';
if (dma_ctrl_start_l2p_i = '1') then
l2p_dma_current_state <= L2P_SETUP;
end if;
-- 64bit address flag used to generate the L2P header and as an input to
-- the L2P FSM to control the transition through the L2P_ADDR states.
l2p_64b_address <= '0' when dma_host_addr_h = x"00000000" else '1';
---------------------------------------
-- L2P FSM (in the Gennum clock domain)
---------------------------------------
p_l2p_fsm : process (clk_i)
begin
if rising_edge(clk_i) then
if rst_n_i = '0' then
l2p_dma_current_state <= L2P_IDLE;
ldm_arb_req_o <= '0';
l2p_fsm_valid <= '0';
l2p_fsm_dframe <= '0';
l2p_fsm_hold <= '0';
dma_ctrl_done_o <= '0';
dma_ctrl_error_o <= '0';
wb_dma_fsm_en <= '0';
l2p_edb_o <= '0';
fsm_fifo_rst_n <= '0';
data_fifo_rd <= '0';
else
data_fifo_dout_d <= data_fifo_dout;
-- default values if not overriden by current state
ldm_arb_req_o <= '0';
l2p_fsm_valid <= '0';
l2p_fsm_dframe <= '0';
l2p_fsm_dma_param_wr <= '0';
dma_ctrl_done_o <= '0';
dma_ctrl_error_o <= '0';
l2p_edb_o <= '0';
fsm_fifo_rst_n <= '1';
data_fifo_rd <= '0';
wb_dma_fsm_en <= '1';
l2p_timeout_cnt <= (others => '0');
case l2p_dma_current_state is
when L2P_IDLE =>
wb_dma_fsm_en <= '0';
fsm_fifo_rst_n <= '0';
l2p_fsm_hold <= '0';
dma_last_packet <= '0';
if dma_ctrl_start_l2p_i = '1' then
dma_target_addr <= unsigned(dma_ctrl_target_addr_i(31 downto 2));
dma_host_addr_h <= unsigned(dma_ctrl_host_addr_h_i);
dma_host_addr_l <= unsigned(dma_ctrl_host_addr_l_i);
dma_total_len <= unsigned(dma_ctrl_len_i(31 downto 2));
dma_byte_swap <= dma_ctrl_byte_swap_i;
l2p_dma_current_state <= L2P_SETUP;
end if;
when L2P_SETUP =>
ldm_arb_valid_o <= '0';
ldm_arb_dframe_o <= '0';
data_fifo_rd <= '0';
l2p_timeout_cnt <= (others => '0');
if (l2p_rdy_i = '1') then
l2p_dma_current_state <= L2P_HEADER;
ldm_arb_req_o <= '1'; -- Request bus
end if;
when L2P_SETUP =>
-- Calculate DMA packet length for next tranfer. A transfer can be
-- up to 1024 words, limited by the "length" field in the L2P header.
if dma_total_len > 1024 then
dma_packet_len <= to_unsigned(1024, dma_packet_len'length);
dma_last_packet <= '0';
elsif dma_total_len = 1024 then
dma_packet_len <= to_unsigned(1024, dma_packet_len'length);
dma_last_packet <= '1';
else
dma_packet_len <= dma_total_len(10 downto 0);
dma_last_packet <= '1';
end if;
l2p_fsm_dma_param_wr <= '1';
if l2p_fsm_dma_param_busy = '1' then
l2p_dma_current_state <= L2P_WAIT;
end if;
when L2P_HEADER =>
ldm_arb_valid_o <= '0';
if (arb_ldm_gnt_i = '1' and l_wr_rdy_i = '1') then
ldm_arb_req_o <= '0'; -- Bus has been granted
-- Send header
ldm_arb_data_o <= s_l2p_header;
ldm_arb_valid_o <= '1';
ldm_arb_dframe_o <= '1'; -- Keep asserted to stay bus master
if (l2p_64b_address = '1') then
l2p_dma_current_state <= L2P_ADDR_H;
else
l2p_dma_current_state <= L2P_ADDR_L;
end if;
end if;
when L2P_WAIT =>
-- Send request to DMA arbiter
ldm_arb_req_o <= not ldm_arb_gnt_i;
-- Move to next state when:
-- a) granted DMA access by arbiter
-- b) there is data waiting in the FIFO
-- c) Gennum is ready
if ldm_arb_gnt_i = '1' and data_fifo_empty = '0' and
l_wr_rdy_i = '1' and l2p_rdy_i = '1' then
l2p_dma_current_state <= L2P_HEADER;
end if;
when L2P_ADDR_H =>
ldm_arb_data_o <= l2p_address_h;
-- Note: we don't check l2p_rdy_i again until we reach the L2P_DATA state.
-- That's ok, according to Gennum, we have up to 7 clock cycles to
-- drop ldm_arb_valid adter the Gennum drops l2p_rdy.
when L2P_HEADER =>
-- Must keep dframe asserted to stay bus master until end of transfer
l2p_fsm_dframe <= '1';
l2p_fsm_valid <= '1';
l2p_fsm_data <= (others => '0');
-- Header type
l2p_fsm_data(25) <= '1';
l2p_fsm_data(24) <= l2p_64b_address;
-- LBE (Last Byte Enable) must be "0000" only
-- when the length field is equal to 1
if dma_packet_len /= 1 then
l2p_fsm_data(23 downto 20) <= "1111";
end if;
-- FBE (First Byte Enable)
l2p_fsm_data(19 downto 16) <= "1111";
-- Length field (in 32 bit words). When zero it means 1024 words.
l2p_fsm_data(9 downto 0) <= std_logic_vector(dma_packet_len(9 downto 0));
if (l2p_64b_address = '1') then
l2p_dma_current_state <= L2P_ADDR_H;
else
l2p_dma_current_state <= L2P_ADDR_L;
end if;
when L2P_ADDR_L =>
ldm_arb_data_o <= l2p_address_l;
when L2P_ADDR_H =>
l2p_fsm_dframe <= '1';
l2p_fsm_valid <= '1';
l2p_fsm_data <= std_logic_vector(dma_host_addr_h);
l2p_dma_current_state <= L2P_ADDR_L;
when L2P_ADDR_L =>
l2p_fsm_dframe <= '1';
l2p_fsm_valid <= '1';
l2p_fsm_data <= std_logic_vector(dma_host_addr_l);
-- Already checked data_fifo_empty flag during L2P_WAIT.
-- Start readout here to get first data out on the next cycle.
data_fifo_rd <= '1';
l2p_dma_current_state <= L2P_DATA;
when L2P_HOLD =>
l2p_fsm_dframe <= '1';
l2p_fsm_valid <= '0';
if data_fifo_empty = '0' and l2p_rdy_i = '1' then
data_fifo_rd <= '1';
l2p_dma_current_state <= L2P_DATA;
end if;
when L2P_DATA =>
if (data_fifo_empty = '0' and l2p_rdy_i = '1') then
data_fifo_rd <= '1';
else
data_fifo_rd <= '0';
when L2P_DATA =>
l2p_fsm_dframe <= '1';
-- Data FIFO readout
if data_fifo_empty = '1' or l2p_rdy_i = '0' then
l2p_dma_current_state <= L2P_HOLD;
else
data_fifo_rd <= '1';
l2p_fsm_valid <= '1';
l2p_fsm_data <= f_byte_swap(g_BYTE_SWAP, data_fifo_dout, dma_byte_swap);
dma_packet_len <= dma_packet_len - 1;
-- Detect end of transfer
if dma_packet_len = 1 then
l2p_fsm_dframe <= '0';
if dma_last_packet = '0' then
l2p_dma_current_state <= L2P_NEXT;
else
dma_total_len <= (others => '0');
l2p_dma_current_state <= L2P_IDLE;
dma_ctrl_done_o <= '1';
end if;
end if;
end if;
-- Timeout counter, it is reset to 0 by default FSM value
-- if not increased here.
if l2p_rdy_i = '1' then
l2p_timeout_cnt <= l2p_timeout_cnt + 1;
end if;
-- Check for errors
if tx_error_i = '1' or dma_ctrl_abort_i = '1' or l2p_timeout_cnt = x"fff" then
l2p_fsm_dframe <= '0';
l2p_dma_current_state <= L2P_ERROR;
end if;
if (data_fifo_rd = '1' and l2p_data_cnt = 1) then
ldm_arb_data_o <= f_byte_swap(g_BYTE_SWAP, data_fifo_dout, l2p_byte_swap);
ldm_arb_valid_o <= '1';
ldm_arb_dframe_o <= '0';
l2p_dma_current_state <= L2P_LAST_DATA;
data_fifo_rd <= '0'; -- Don't read too much
elsif (data_fifo_rd = '1' and l2p_data_cnt > 1) then
ldm_arb_data_o <= f_byte_swap(g_BYTE_SWAP, data_fifo_dout, l2p_byte_swap);
ldm_arb_valid_o <= '1';
ldm_arb_dframe_o <= '1';
else
ldm_arb_data_o <= f_byte_swap(g_BYTE_SWAP, data_fifo_dout, l2p_byte_swap);
ldm_arb_valid_o <= '0';
ldm_arb_dframe_o <= '1';
end if;
when L2P_NEXT =>
if data_fifo_empty = '1' then
dma_total_len <= dma_total_len - 1024;
dma_target_addr <= dma_target_addr + 1024;
dma_host_addr <= dma_host_addr + 4096;
l2p_dma_current_state <= L2P_SETUP;
end if;
-- Error condition, abort transfer
if (tx_error_i = '1' or l2p_timeout_cnt > c_TIMEOUT or dma_ctrl_abort_i = '1') then
l2p_dma_current_state <= L2P_ERROR;
end if;
when L2P_ERROR =>
wb_dma_fsm_en <= '0';
fsm_fifo_rst_n <= '0';
dma_ctrl_error_o <= '1';
l2p_edb_o <= '1';
l2p_dma_current_state <= L2P_IDLE;
-- Timeout counter
if (data_fifo_empty = '1' or l2p_rdy_i = '1') then
l2p_timeout_cnt <= l2p_timeout_cnt + 1;
else
l2p_timeout_cnt <= (others => '0');
end if;
when others =>
l2p_dma_current_state <= L2P_ERROR;
when L2P_LAST_DATA =>
ldm_arb_dframe_o <= '0';
ldm_arb_valid_o <= '0';
data_fifo_rd <= '0';
if (dma_ctrl_abort_i = '1' or tx_error_i = '1') then
l2p_dma_current_state <= L2P_IDLE;
dma_ctrl_done_o <= '1';
elsif (l2p_last_packet = '0') then
l2p_dma_current_state <= L2P_SETUP;
else
l2p_dma_current_state <= L2P_IDLE;
dma_ctrl_done_o <= '1';
end if;
end case;
end if;
end if;
end process p_l2p_fsm;
when L2P_ERROR =>
ldm_arb_dframe_o <= '0';
ldm_arb_valid_o <= '1';
l2p_edb_o <= '1';
fifo_rst_t <= '1';
l2p_dma_current_state <= L2P_IDLE;
ldm_arb_valid_o <= l2p_fsm_valid;
ldm_arb_dframe_o <= l2p_fsm_dframe;
ldm_arb_data_o <= l2p_fsm_data;
when others =>
l2p_dma_current_state <= L2P_IDLE;
-------------------------------------------------
-- Wishbone Master (in the Wishbone clock domain)
-------------------------------------------------
end case;
end if;
end if;
end process p_l2p_fsm;
---------------------
--- Paket Generator
---------------------
-- Last Byte Enable must be "0000" when length = 1
l2p_lbe_header <= "0000" when l2p_len_header = 1 else "1111";
-- 64bit address flag
l2p_64b_address <= '0' when l2p_address_h = x"00000000" else '1';
-- Packet header
s_l2p_header(31 downto 29) <= "000"; --> Traffic Class
s_l2p_header(28) <= '0'; --> Snoop
s_l2p_header(27 downto 25) <= "001"; --> Header type,
-- memory write 32-bit or
-- memory write 64-bit
s_l2p_header(24) <= l2p_64b_address;
s_l2p_header(23 downto 20) <= l2p_lbe_header; --> LBE (Last Byte Enable)
s_l2p_header(19 downto 16) <= "1111"; --> FBE (First Byte Enable)
s_l2p_header(15 downto 13) <= "000"; --> Reserved
s_l2p_header(12) <= '0'; --> VC (Virtual Channel)
s_l2p_header(11 downto 10) <= "00"; --> Reserved
s_l2p_header(9 downto 0) <= STD_LOGIC_VECTOR(l2p_len_header(9 downto 0)); --> Length (in 32-bit words)
-- 0x000 => 1024 words (4096 bytes)
p_pkt_gen : process (clk_i)
begin
if rising_edge(clk_i) then
if (rst_n_i = '0') then
l2p_len_header <= (others => '0');
l2p_last_packet <= '0';
else
if (l2p_dma_current_state = L2P_IDLE) then
l2p_len_cnt <= unsigned(dma_ctrl_len_i(31 downto 2));
l2p_address_h <= dma_ctrl_host_addr_h_i;
l2p_address_l <= dma_ctrl_host_addr_l_i;
l2p_byte_swap <= dma_ctrl_byte_swap_i;
l2p_last_packet <= '0';
elsif (l2p_dma_current_state = L2P_SETUP) then
if (l2p_len_cnt > c_L2P_MAX_PAYLOAD_WORDS) then
l2p_data_cnt <= TO_UNSIGNED(c_L2P_MAX_PAYLOAD_WORDS, 13);
l2p_len_header <= TO_UNSIGNED(c_L2P_MAX_PAYLOAD_WORDS, 13);
l2p_last_packet <= '0';
elsif (l2p_len_cnt = c_L2P_MAX_PAYLOAD_WORDS) then
l2p_data_cnt <= TO_UNSIGNED(c_L2P_MAX_PAYLOAD_WORDS, 13);
l2p_len_header <= TO_UNSIGNED(c_L2P_MAX_PAYLOAD_WORDS, 13);
l2p_last_packet <= '1';
else
l2p_data_cnt <= l2p_len_cnt(12 downto 0);
l2p_len_header <= l2p_len_cnt(12 downto 0);
l2p_last_packet <= '1';
gen_sync_word_dma_param : if g_DMA_USE_PCI_CLK = FALSE generate
signal dma_param_to_sync : std_logic_vector(40 downto 0);
begin
dma_param_to_sync(40 downto 11) <= std_logic_vector(dma_target_addr);
dma_param_to_sync(10 downto 0) <= std_logic_vector(dma_packet_len);
cmp_sync_dma_param : entity work.gc_sync_word_wr
generic map (
g_AUTO_WR => false,
g_WIDTH => 41)
port map (
clk_in_i => clk_i,
rst_in_n_i => '1',
clk_out_i => wb_dma_clk_i,
rst_out_n_i => '1',
data_i => dma_param_to_sync,
wr_i => l2p_fsm_dma_param_wr,
busy_o => l2p_fsm_dma_param_busy,
ack_o => open,
data_o => dma_param_sync,
wr_o => dma_param_wr);
cmp_wb_dma_fsm_en_sync : gc_sync
port map (
clk_i => wb_dma_clk_i,
rst_n_a_i => '1',
d_i => wb_dma_fsm_en,
q_o => wb_dma_fsm_en_sync);
end generate gen_sync_word_dma_param;
gen_no_sync_word_dma_param: if g_DMA_USE_PCI_CLK = TRUE generate
dma_param_sync(40 downto 11) <= std_logic_vector(dma_target_addr);
dma_param_sync(10 downto 0) <= std_logic_vector(dma_packet_len);
l2p_fsm_dma_param_busy <= l2p_fsm_dma_param_wr;
dma_param_wr <= l2p_fsm_dma_param_wr;
wb_dma_fsm_en_sync <= wb_dma_fsm_en;
end generate gen_no_sync_word_dma_param;
-- p_wb_master : process (wb_dma_clk_i) is
-- begin
-- if rising_edge(wb_dma_clk_i) then
-- if wb_dma_rst_n_i = '0' then
-- wb_dma_stb <= '0';
-- else
-- -- Handle strobe and address generation.
-- wb_dma_stb <= '0';
-- if wb_xfer_en_sync = '1' then
-- if wb_dma_cnt > 1 and data_fifo_full = '0' or
-- wb_dma_stb <= '1';
-- if wb_dma_i.stall = '0' and wb_dma_stb = '1' then
-- wb_dma_addr <= wb_dma_addr + 1;
-- wb_dma_cnt <= wb_dma_cnt - 1;
-- end if;
-- end if;
-- elsif dma_param_wr = '1' then
-- wb_dma_addr <= unsigned(dma_param_sync(40 downto 11));
-- wb_dma_cnt <= unsigned(dma_param_sync(10 downto 0));
-- end if;
-- -- Write received data to FIFO.
-- -- No need to check FIFO full, it was done earlier
-- -- when we decided to strobe.
-- data_fifo_din <= wb_dma_i.dat;
-- data_fifo_wr <= wb_dma_i.ack;
-- end if;
-- end if;
-- end process p_wb_master;
-- P2P communication, no need to drop WB cycle.
wb_dma_o.cyc <= '1';
wb_dma_o.stb <= wb_dma_stb;
wb_dma_o.adr <= "00" & std_logic_vector(wb_dma_addr_d);
wb_dma_o.we <= '0';
wb_dma_o.sel <= (others => '1');
wb_dma_o.dat <= (others => '0');
-- No need to check FIFO full, it was done earlier
-- when we decided to strobe (in combination with a proper
-- almost_full flag from the FIFO).
data_fifo_din <= wb_dma_i.dat;
data_fifo_wr <= wb_dma_i.ack;
p_wb_fsm : process (wb_dma_clk_i)
begin
if rising_edge(wb_dma_clk_i) then
if wb_dma_rst_n_i = '0' or wb_dma_fsm_en_sync = '0' then
wb_dma_stb <= '0';
wb_dma_addr <= (others => '0');
wb_dma_current_state <= WB_IDLE;
else
wb_dma_addr_d <= wb_dma_addr;
-- default values if not overriden by current state
wb_dma_stb <= '0';
case wb_dma_current_state is
when WB_IDLE =>
if dma_param_wr = '1' then
wb_dma_current_state <= WB_SETUP;
end if;
elsif (l2p_dma_current_state = L2P_DATA) then
if (data_fifo_empty = '0' and data_fifo_rd = '1') then
l2p_data_cnt <= l2p_data_cnt - 1;
when WB_SETUP =>
wb_dma_addr <= unsigned(dma_param_sync(40 downto 11));
wb_dma_cnt_stb <= unsigned(dma_param_sync(10 downto 0));
wb_dma_cnt_ack <= unsigned(dma_param_sync(10 downto 0));
if data_fifo_full = '0' then
wb_dma_current_state <= WB_DATA;
end if;
elsif (l2p_dma_current_state = L2P_LAST_DATA) then
if (l2p_last_packet = '0') then
-- Increase Address, check for overflow
if (l2p_address_l and (c_L2P_A32_OVERFLOW_MASK)) = c_L2P_A32_OVERFLOW_MASK then
l2p_address_h <= std_logic_vector(unsigned(l2p_address_h) + 1);
l2p_address_l(31 downto c_L2P_MAX_PAYLOAD_NBITS) <= (others => '0');
else
l2p_address_l <= std_logic_vector(
unsigned(l2p_address_l) + (c_L2P_MAX_PAYLOAD_BYTES));
end if;
l2p_len_cnt <= l2p_len_cnt - c_L2P_MAX_PAYLOAD_WORDS;
when WB_DATA =>
if wb_dma_i.ack = '1' then
wb_dma_cnt_ack <= wb_dma_cnt_ack - 1;
end if;
if data_fifo_full = '1' then
wb_dma_current_state <= WB_HOLD;
elsif wb_dma_cnt_ack = 0 and wb_dma_cnt_stb = 0 then
wb_dma_current_state <= WB_IDLE;
else
l2p_len_cnt <= (others => '0');
if wb_dma_i.stall = '0' and wb_dma_cnt_stb > 0 then
wb_dma_stb <= '1';
wb_dma_addr <= wb_dma_addr + 1;
wb_dma_cnt_stb <= wb_dma_cnt_stb - 1;
end if;
end if;
end if;
end if;
end if;
end process p_pkt_gen;
---------------------
-- Address Counter
---------------------
p_target_cnt : process (clk_i)
when WB_HOLD =>
if wb_dma_i.ack = '1' then
wb_dma_cnt_ack <= wb_dma_cnt_ack - 1;
end if;
if data_fifo_full = '0' then
wb_dma_current_state <= WB_DATA;
end if;
when others =>
wb_dma_current_state <= WB_IDLE;
end case;
end if;
end if;
end process p_wb_fsm;
-----------------------------------------
-- Flow Control FIFO (cross-clock domain)
-----------------------------------------
gen_sync_fifo : if g_DMA_USE_PCI_CLK = TRUE generate
signal data_fifo_full_d : std_logic_vector(c_SYNC_FIFO_FULL_DELAY - 1 downto 0) := (others => '0');
signal data_fifo_full_next : std_logic;
signal data_fifo_rst_n : std_logic := '0';
begin
p_fifo_full_delay_reg : process (clk_i) is
begin
if rising_edge(clk_i) then
if (rst_n_i = '0') then
dma_ctrl_error_o <= '0';
addr_fifo_wr <= '0';
-- we want proper registers to help with timing and
-- having a reset prevents inferring of shift register.
if data_fifo_rst_n = '0' then
data_fifo_full_d <= (others => '0');
else
-- New Transfer started
if (l2p_dma_current_state = L2P_ERROR) then
target_addr_cnt <= (others => '0');
dma_ctrl_error_o <= '1';
addr_fifo_wr <= '0';
dma_length_cnt <= (others => '0');
elsif (dma_ctrl_start_l2p_i = '1') then
if (l2p_dma_current_state = L2P_IDLE) then
-- dma target addr is byte address, need 32bit address
target_addr_cnt(31 downto 30) <= "00";
target_addr_cnt(29 downto 0) <= dma_ctrl_target_addr_i(31 downto 2);
-- dma target length is in byte, need 32bit
dma_length_cnt <= unsigned(dma_ctrl_len_i(31 downto 2));
dma_ctrl_error_o <= '0';
else
target_addr_cnt <= (others => '0');
dma_length_cnt <= (others => '0');
dma_ctrl_error_o <= '1';
end if;
addr_fifo_wr <= '0';
elsif (dma_length_cnt > 0) and (addr_fifo_full = '0') then
addr_fifo_wr <= '1';
target_addr_cnt <= std_logic_vector(unsigned(target_addr_cnt) + 1);
dma_length_cnt <= dma_length_cnt - 1;
addr_fifo_din <= target_addr_cnt;
else
addr_fifo_wr <= '0';
dma_ctrl_error_o <= '0';
end if;
data_fifo_full_d <= data_fifo_full_d(data_fifo_full_d'high-1 downto 0) & data_fifo_full_next;
end if;
end if;
end process p_target_cnt;
---------------------
-- Wishbone Master
---------------------
-- Tie offs
l2p_dma_cyc_o <= '1';
l2p_dma_stb_o <= l2p_dma_stb_t;
l2p_dma_sel_o <= (others => '1');
l2p_dma_adr_o <= addr_fifo_dout;
l2p_dma_dat_o <= (others => '0');
l2p_dma_we_o <= '0';
-- fetch new data when:
-- a) there is a new request in addr_fifo
-- b) there is enough space to store it in data_fifo
-- c) there aren't too many pending WB transactions to flood the data_fifo
l2p_dma_stb_t <= not(addr_fifo_empty or data_fifo_full or l2p_throttle);
addr_fifo_rd <= l2p_dma_stb_t and not l2p_dma_stall_i;
p_wb_master : process (l2p_dma_clk_i)
end process p_fifo_full_delay_reg;
data_fifo_full <= data_fifo_full_d(data_fifo_full_d'high);
data_fifo_rst_n <= rst_n_i and fsm_fifo_rst_n and wb_dma_rst_n_i;
cmp_data_fifo : generic_sync_fifo
generic map (
g_DATA_WIDTH => 32,
g_SIZE => 256,
g_SHOW_AHEAD => TRUE,
g_WITH_EMPTY => TRUE,
g_WITH_FULL => FALSE,
g_WITH_ALMOST_EMPTY => FALSE,
g_WITH_ALMOST_FULL => TRUE,
g_ALMOST_FULL_THRESHOLD => 128 - c_SYNC_FIFO_FULL_DELAY,
g_REGISTER_FLAG_OUTPUTS => FALSE,
g_WITH_COUNT => FALSE)
port map (
rst_n_i => data_fifo_rst_n,
clk_i => clk_i,
-- write port
d_i => data_fifo_din,
we_i => data_fifo_wr,
almost_full_o => data_fifo_full_next,
-- read port
q_o => data_fifo_dout,
rd_i => data_fifo_rd,
empty_o => data_fifo_empty);
end generate gen_sync_fifo;
gen_async_fifo : if g_DMA_USE_PCI_CLK = FALSE generate
signal data_fifo_full_d : std_logic_vector(c_SYNC_FIFO_FULL_DELAY - 1 downto 0) := (others => '0');
signal data_fifo_full_next : std_logic;
signal data_fifo_rst_wr_n : std_logic;
signal data_fifo_rst_rd_n : std_logic;
signal fsm_fifo_rst_sync_n : std_logic;
signal rst_sync_n : std_logic;
signal wb_dma_rst_sync_n : std_logic;
begin
cmp_wb_dma_rst_n_sync : gc_sync
port map (
clk_i => clk_i,
rst_n_a_i => '1',
d_i => wb_dma_rst_n_i,
q_o => wb_dma_rst_sync_n);
cmp_fsm_fifo_rst_n_sync : gc_sync
port map (
clk_i => wb_dma_clk_i,
rst_n_a_i => '1',
d_i => fsm_fifo_rst_n,
q_o => fsm_fifo_rst_sync_n);
cmp_rst_n_sync : gc_sync
port map (
clk_i => wb_dma_clk_i,
rst_n_a_i => '1',
d_i => rst_n_i,
q_o => rst_sync_n);
data_fifo_rst_wr_n <= wb_dma_rst_n_i and fsm_fifo_rst_sync_n and rst_sync_n;
data_fifo_rst_rd_n <= wb_dma_rst_sync_n and fsm_fifo_rst_n and rst_n_i;
p_fifo_full_delay_reg : process (wb_dma_clk_i) is
begin
if rising_edge(l2p_dma_clk_i) then
if wb_fifo_rst_n = '0' then
wb_read_cnt <= (others => '0');
l2p_throttle <= '0';
if rising_edge(wb_dma_clk_i) then
-- we want proper registers to help with timing and
-- having a reset prevents inferring of shift register.
if data_fifo_rst_wr_n = '0' then
data_fifo_full_d <= (others => '0');
else
if (l2p_dma_stb_t = '1' and l2p_dma_ack_i = '0') then
wb_read_cnt <= wb_read_cnt + 1;
elsif (l2p_dma_stb_t = '0' and l2p_dma_ack_i = '1') then
wb_read_cnt <= wb_read_cnt - 1;
end if;
if wb_read_cnt > c_L2P_WB_THROTTLE_THRESHOLD then
l2p_throttle <= '1';
else
l2p_throttle <= '0';
end if;
data_fifo_full_d <= data_fifo_full_d(data_fifo_full_d'high-1 downto 0) & data_fifo_full_next;
end if;
end if;
end process p_wb_master;
end process p_fifo_full_delay_reg;
---------------------
-- FIFOs
---------------------
data_fifo_full <= data_fifo_full_d(data_fifo_full_d'high);
cmp_addr_fifo: generic_async_fifo_dual_rst
generic map (
g_data_width => 32,
g_size => g_ADDR_FIFO_FULL_SIZE,
g_show_ahead => true,
g_with_wr_full => false,
g_with_wr_almost_full => true,
g_almost_empty_threshold => 0,
g_almost_full_threshold => g_ADDR_FIFO_FULL_THRES)
port map (
rst_wr_n_i => fifo_rst_n,
clk_wr_i => clk_i,
d_i => addr_fifo_din,
we_i => addr_fifo_wr,
wr_almost_full_o => addr_fifo_full,
rst_rd_n_i => wb_fifo_rst_n,
clk_rd_i => l2p_dma_clk_i,
q_o => addr_fifo_dout,
rd_i => addr_fifo_rd,
rd_empty_o => addr_fifo_empty);
cmp_data_fifo: generic_async_fifo_dual_rst
cmp_data_fifo : generic_async_fifo_dual_rst
generic map (
g_data_width => 32,
g_size => g_DATA_FIFO_FULL_SIZE,
g_show_ahead => true,
g_with_wr_full => false,
g_with_wr_almost_full => true,
g_almost_empty_threshold => 0,
g_almost_full_threshold => g_DATA_FIFO_FULL_THRES)
g_DATA_WIDTH => 32,
g_SIZE => 256,
g_SHOW_AHEAD => TRUE,
g_WITH_WR_FULL => FALSE,
g_WITH_WR_ALMOST_FULL => TRUE,
g_ALMOST_FULL_THRESHOLD => 128 - c_SYNC_FIFO_FULL_DELAY)
port map (
rst_wr_n_i => wb_fifo_rst_n,
clk_wr_i => l2p_dma_clk_i,
d_i => l2p_dma_dat_i,
we_i => l2p_dma_ack_i,
wr_almost_full_o => data_fifo_full,
rst_rd_n_i => fifo_rst_n,
clk_rd_i => clk_i,
q_o => data_fifo_dout,
rd_i => data_fifo_rd,
rd_empty_o => data_fifo_empty);
end behavioral;
-- write port
rst_wr_n_i => data_fifo_rst_wr_n,
clk_wr_i => wb_dma_clk_i,
d_i => data_fifo_din,
we_i => data_fifo_wr,
wr_almost_full_o => data_fifo_full_next,
-- read port
rst_rd_n_i => data_fifo_rst_rd_n,
clk_rd_i => clk_i,
q_o => data_fifo_dout,
rd_i => data_fifo_rd,
rd_empty_o => data_fifo_empty);
end generate gen_async_fifo;
end architecture arch;
......@@ -9,7 +9,7 @@
-- description: GN4124 core top level. Version for spartan6 FPGAs.
--
--------------------------------------------------------------------------------
-- Copyright CERN 2010-2019
-- Copyright CERN 2010-2020
--------------------------------------------------------------------------------
-- Copyright and related rights are licensed under the Solderpad Hardware
-- License, Version 2.0 (the "License"); you may not use this file except
......@@ -25,8 +25,10 @@
library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;
use work.gn4124_core_pkg.all;
use work.gencores_pkg.all;
use work.wishbone_pkg.all;
library UNISIM;
use UNISIM.vcomponents.all;
......@@ -39,6 +41,11 @@ entity gn4124_core is
generic (
-- If TRUE, enable the DMA interface
g_WITH_DMA : boolean := TRUE;
-- if TRUE, use 200MHz PCI clock also for DMA transfers.
-- if FALSE, use whatever is provided by the user on dma_clk_i,
-- which is assumed to be asynchronous to the PCI clock and goes
-- through dual clock FIFOs.
g_DMA_USE_PCI_CLK : boolean := FALSE;
-- Tunable size and threshold for all async FIFOs.
-- If not sure, leave the defaults.
g_WBM_TO_WB_FIFO_SIZE : positive := 128;
......@@ -47,10 +54,7 @@ entity gn4124_core is
g_WBM_FROM_WB_FIFO_FULL_THRES : positive := 110;
g_P2L_FIFO_SIZE : positive := 512;
g_P2L_FIFO_FULL_THRES : positive := 500;
g_L2P_ADDR_FIFO_FULL_SIZE : positive := 1024;
g_L2P_ADDR_FIFO_FULL_THRES : positive := 700;
g_L2P_DATA_FIFO_FULL_SIZE : positive := 1024;
g_L2P_DATA_FIFO_FULL_THRES : positive := 700;
g_L2P_DATA_FIFO_SIZE : positive := 128;
-- Wishbone ACK timeout (in wishbone clock cycles)
g_ACK_TIMEOUT : positive := 100);
port (
......@@ -59,6 +63,11 @@ entity gn4124_core is
rst_n_a_i : in std_logic; -- Asynchronous reset from GN4124
status_o : out std_logic_vector(31 downto 0); -- Core status output
---------------------------------------------------------
-- 200MHz PCI clock output and synchronous reset for applications
clk_200m_o : out std_logic;
rst_200m_n_o : out std_logic;
---------------------------------------------------------
-- P2L Direction
--
......@@ -170,6 +179,9 @@ architecture rtl of gn4124_core is
signal sys_rst_n : std_logic;
signal arst_pll : std_logic;
signal wb_dma_clk : std_logic;
signal wb_dma_rst_n : std_logic;
-------------------------------------------------------------
-- P2L DataPath (from deserializer to packet decoder)
-------------------------------------------------------------
......@@ -277,6 +289,19 @@ architecture rtl of gn4124_core is
signal dma_irq : std_logic;
attribute keep of dma_ctrl_l2p_error : signal is "TRUE";
attribute keep of dma_ctrl_l2p_done : signal is "TRUE";
attribute keep of dma_ctrl_start_l2p : signal is "TRUE";
attribute keep of dma_ctrl_abort : signal is "TRUE";
attribute keep of ldm_arb_valid : signal is "TRUE";
attribute keep of ldm_arb_dframe : signal is "TRUE";
attribute keep of ldm_arb_data : signal is "TRUE";
attribute keep of ldm_arb_req : signal is "TRUE";
attribute keep of l2p_rdy : signal is "TRUE";
attribute keep of l_wr_rdy : signal is "TRUE";
attribute keep of tx_error : signal is "TRUE";
attribute keep of arb_ldm_gnt : signal is "TRUE";
------------------------------------------------------------------------------
-- CSR wishbone bus
------------------------------------------------------------------------------
......@@ -285,14 +310,8 @@ architecture rtl of gn4124_core is
------------------------------------------------------------------------------
-- DMA wishbone bus
------------------------------------------------------------------------------
signal l2p_dma_adr : std_logic_vector(31 downto 0);
signal l2p_dma_dat : std_logic_vector(31 downto 0);
signal l2p_dma_sel : std_logic_vector(3 downto 0);
signal l2p_dma_cyc : std_logic;
signal l2p_dma_stb : std_logic;
signal l2p_dma_we : std_logic;
signal l2p_dma_ack : std_logic;
signal l2p_dma_stall : std_logic;
signal l2p_dma_in : t_wishbone_master_in;
signal l2p_dma_out : t_wishbone_master_out;
signal p2l_dma_adr : std_logic_vector(31 downto 0);
signal p2l_dma_dat : std_logic_vector(31 downto 0);
......@@ -352,6 +371,9 @@ begin
clks_i(0) => sys_clk,
rst_n_o(0) => sys_rst_n);
clk_200m_o <= sys_clk;
rst_200m_n_o <= sys_rst_n;
-- Always active high reset for PLL and SERDES
arst_pll <= not(rst_n_a_i);
......@@ -360,6 +382,19 @@ begin
------------------------------------------------------------------------------
irq_p_o <= irq_p_i;
------------------------------------------------------------------------------
-- DMA WB clock and reset selection
------------------------------------------------------------------------------
gen_sync_wb_dma : if g_DMA_USE_PCI_CLK = TRUE generate
wb_dma_clk <= sys_clk;
wb_dma_rst_n <= sys_rst_n;
end generate gen_sync_wb_dma;
gen_async_wb_dma : if g_DMA_USE_PCI_CLK = FALSE generate
wb_dma_clk <= dma_clk_i;
wb_dma_rst_n <= dma_rst_n_i;
end generate gen_async_wb_dma;
--============================================================================
-- P2L DataPath
--============================================================================
......@@ -583,11 +618,9 @@ begin
-----------------------------------------------------------------------------
cmp_l2p_dma_master : entity work.l2p_dma_master
generic map (
g_ADDR_FIFO_FULL_SIZE => g_L2P_ADDR_FIFO_FULL_SIZE,
g_ADDR_FIFO_FULL_THRES => g_L2P_ADDR_FIFO_FULL_THRES,
g_DATA_FIFO_FULL_SIZE => g_L2P_DATA_FIFO_FULL_SIZE,
g_DATA_FIFO_FULL_THRES => g_L2P_DATA_FIFO_FULL_THRES,
g_BYTE_SWAP => TRUE)
g_DMA_USE_PCI_CLK => g_DMA_USE_PCI_CLK,
g_DATA_FIFO_SIZE => g_L2P_DATA_FIFO_SIZE,
g_BYTE_SWAP => TRUE)
port map (
clk_i => sys_clk,
rst_n_i => sys_rst_n,
......@@ -606,30 +639,28 @@ begin
ldm_arb_dframe_o => ldm_arb_dframe,
ldm_arb_data_o => ldm_arb_data,
ldm_arb_req_o => ldm_arb_req,
arb_ldm_gnt_i => arb_ldm_gnt,
ldm_arb_gnt_i => arb_ldm_gnt,
l2p_edb_o => l2p_edb,
l_wr_rdy_i => l_wr_rdy,
l2p_rdy_i => l2p_rdy,
tx_error_i => tx_error,
l2p_dma_rst_n_i => dma_rst_n_i,
l2p_dma_clk_i => dma_clk_i,
l2p_dma_adr_o => l2p_dma_adr,
l2p_dma_dat_i => dma_dat_i,
l2p_dma_dat_o => l2p_dma_dat,
l2p_dma_sel_o => l2p_dma_sel,
l2p_dma_cyc_o => l2p_dma_cyc,
l2p_dma_stb_o => l2p_dma_stb,
l2p_dma_we_o => l2p_dma_we,
l2p_dma_ack_i => l2p_dma_ack,
l2p_dma_stall_i => l2p_dma_stall);
wb_dma_rst_n_i => wb_dma_rst_n,
wb_dma_clk_i => wb_dma_clk,
wb_dma_i => l2p_dma_in,
wb_dma_o => l2p_dma_out);
l2p_dma_in.dat <= dma_dat_i;
l2p_dma_in.err <= dma_err_i;
l2p_dma_in.rty <= dma_rty_i;
-----------------------------------------------------------------------------
-- P2L DMA master
-----------------------------------------------------------------------------
cmp_p2l_dma_master : entity work.p2l_dma_master
generic map (
--g_DMA_USE_PCI_CLK => g_DMA_USE_PCI_CLK,
g_FIFO_SIZE => g_P2L_FIFO_SIZE,
g_FIFO_FULL_THRES => g_P2L_FIFO_FULL_THRES,
g_BYTE_SWAP => TRUE)
......@@ -691,32 +722,31 @@ begin
);
p_dma_wb_mux : process (dma_ack_i, dma_ctrl_direction, dma_stall_i,
l2p_dma_adr, l2p_dma_cyc, l2p_dma_dat, l2p_dma_sel,
l2p_dma_stb, l2p_dma_we, p2l_dma_adr, p2l_dma_cyc,
p2l_dma_dat, p2l_dma_sel, p2l_dma_stb, p2l_dma_we)
l2p_dma_out, p2l_dma_adr, p2l_dma_cyc, p2l_dma_dat,
p2l_dma_sel, p2l_dma_stb, p2l_dma_we)
begin
if (dma_ctrl_direction = '0') then
dma_adr_o <= l2p_dma_adr;
dma_dat_o <= l2p_dma_dat;
dma_sel_o <= l2p_dma_sel;
dma_cyc_o <= l2p_dma_cyc;
dma_stb_o <= l2p_dma_stb;
dma_we_o <= l2p_dma_we;
l2p_dma_ack <= dma_ack_i;
l2p_dma_stall <= dma_stall_i;
p2l_dma_ack <= '0';
p2l_dma_stall <= '0';
dma_adr_o <= l2p_dma_out.adr;
dma_dat_o <= l2p_dma_out.dat;
dma_sel_o <= l2p_dma_out.sel;
dma_cyc_o <= l2p_dma_out.cyc;
dma_stb_o <= l2p_dma_out.stb;
dma_we_o <= l2p_dma_out.we;
l2p_dma_in.ack <= dma_ack_i;
l2p_dma_in.stall <= dma_stall_i;
p2l_dma_ack <= '0';
p2l_dma_stall <= '0';
else
dma_adr_o <= p2l_dma_adr;
dma_dat_o <= p2l_dma_dat;
dma_sel_o <= p2l_dma_sel;
dma_cyc_o <= p2l_dma_cyc;
dma_stb_o <= p2l_dma_stb;
dma_we_o <= p2l_dma_we;
p2l_dma_ack <= dma_ack_i;
p2l_dma_stall <= dma_stall_i;
l2p_dma_ack <= '0';
l2p_dma_stall <= '0';
dma_adr_o <= p2l_dma_adr;
dma_dat_o <= p2l_dma_dat;
dma_sel_o <= p2l_dma_sel;
dma_cyc_o <= p2l_dma_cyc;
dma_stb_o <= p2l_dma_stb;
dma_we_o <= p2l_dma_we;
p2l_dma_ack <= dma_ack_i;
p2l_dma_stall <= dma_stall_i;
l2p_dma_in.ack <= '0';
l2p_dma_in.stall <= '0';
end if;
end process p_dma_wb_mux;
......
......@@ -56,16 +56,14 @@ package gn4124_core_pkg is
component xwb_gn4124_core is
generic (
g_WITH_DMA : boolean := TRUE;
g_DMA_USE_PCI_CLK : boolean := FALSE;
g_WBM_TO_WB_FIFO_SIZE : positive := 128;
g_WBM_TO_WB_FIFO_FULL_THRES : positive := 110;
g_WBM_FROM_WB_FIFO_SIZE : positive := 128;
g_WBM_FROM_WB_FIFO_FULL_THRES : positive := 110;
g_P2L_FIFO_SIZE : positive := 512;
g_P2L_FIFO_FULL_THRES : positive := 500;
g_L2P_ADDR_FIFO_FULL_SIZE : positive := 1024;
g_L2P_ADDR_FIFO_FULL_THRES : positive := 700;
g_L2P_DATA_FIFO_FULL_SIZE : positive := 1024;
g_L2P_DATA_FIFO_FULL_THRES : positive := 700;
g_L2P_DATA_FIFO_SIZE : positive := 128;
g_WB_MASTER_MODE : t_wishbone_interface_mode := PIPELINED;
g_WB_MASTER_GRANULARITY : t_wishbone_address_granularity := BYTE;
g_WB_DMA_CFG_MODE : t_wishbone_interface_mode := PIPELINED;
......@@ -76,6 +74,8 @@ package gn4124_core_pkg is
port (
rst_n_a_i : in std_logic;
status_o : out std_logic_vector(31 downto 0);
clk_200m_o : out std_logic;
rst_200m_n_o : out std_logic;
p2l_clk_p_i : in std_logic;
p2l_clk_n_i : in std_logic;
p2l_data_i : in std_logic_vector(15 downto 0);
......@@ -117,16 +117,14 @@ package gn4124_core_pkg is
component gn4124_core
generic (
g_WITH_DMA : boolean := TRUE;
g_DMA_USE_PCI_CLK : boolean := FALSE;
g_WBM_TO_WB_FIFO_SIZE : positive := 128;
g_WBM_TO_WB_FIFO_FULL_THRES : positive := 110;
g_WBM_FROM_WB_FIFO_SIZE : positive := 128;
g_WBM_FROM_WB_FIFO_FULL_THRES : positive := 110;
g_P2L_FIFO_SIZE : positive := 512;
g_P2L_FIFO_FULL_THRES : positive := 500;
g_L2P_ADDR_FIFO_FULL_SIZE : positive := 1024;
g_L2P_ADDR_FIFO_FULL_THRES : positive := 700;
g_L2P_DATA_FIFO_FULL_SIZE : positive := 1024;
g_L2P_DATA_FIFO_FULL_THRES : positive := 700;
g_L2P_DATA_FIFO_SIZE : positive := 128;
g_ACK_TIMEOUT : positive := 100);
port (
---------------------------------------------------------
......@@ -134,6 +132,11 @@ package gn4124_core_pkg is
rst_n_a_i : in std_logic; -- Asynchronous reset from GN4124
status_o : out std_logic_vector(31 downto 0); -- Core status output
---------------------------------------------------------
-- 200MHz PCI clock output and synchronous reset for applications
clk_200m_o : out std_logic;
rst_200m_n_o : out std_logic;
---------------------------------------------------------
-- P2L Direction
--
......
......@@ -11,7 +11,7 @@
-- Version for Spartan6 FPGAs.
--
--------------------------------------------------------------------------------
-- Copyright CERN 2018
-- Copyright CERN 2018 - 2020
--------------------------------------------------------------------------------
-- Copyright and related rights are licensed under the Solderpad Hardware
-- License, Version 2.0 (the "License"); you may not use this file except
......@@ -34,6 +34,11 @@ entity xwb_gn4124_core is
generic (
-- If TRUE, enable the DMA interface
g_WITH_DMA : boolean := TRUE;
-- if TRUE, use 200MHz PCI clock also for DMA transfers.
-- if FALSE, use whatever is provided by the user on dma_clk_i,
-- which is assumed to be asynchronous to the PCI clock and goes
-- through dual clock FIFOs.
g_DMA_USE_PCI_CLK : boolean := FALSE;
-- Tunable size and threshold for all async FIFOs.
-- If not sure, leave the defaults.
g_WBM_TO_WB_FIFO_SIZE : positive := 128;
......@@ -42,10 +47,7 @@ entity xwb_gn4124_core is
g_WBM_FROM_WB_FIFO_FULL_THRES : positive := 110;
g_P2L_FIFO_SIZE : positive := 512;
g_P2L_FIFO_FULL_THRES : positive := 500;
g_L2P_ADDR_FIFO_FULL_SIZE : positive := 1024;
g_L2P_ADDR_FIFO_FULL_THRES : positive := 700;
g_L2P_DATA_FIFO_FULL_SIZE : positive := 1024;
g_L2P_DATA_FIFO_FULL_THRES : positive := 700;
g_L2P_DATA_FIFO_SIZE : positive := 128;
-- WB config for three WB interfaces
g_WB_MASTER_MODE : t_wishbone_interface_mode := PIPELINED;
g_WB_MASTER_GRANULARITY : t_wishbone_address_granularity := BYTE;
......@@ -61,6 +63,11 @@ entity xwb_gn4124_core is
rst_n_a_i : in std_logic; -- Asynchronous reset from GN4124
status_o : out std_logic_vector(31 downto 0); -- Core status output
---------------------------------------------------------
-- 200MHz PCI clock output and synchronous reset for applications
clk_200m_o : out std_logic;
rst_200m_n_o : out std_logic;
---------------------------------------------------------
-- P2L Direction
--
......@@ -191,20 +198,20 @@ begin
cmp_wrapped_gn4124 : gn4124_core
generic map (
g_WITH_DMA => g_WITH_DMA,
g_DMA_USE_PCI_CLK => g_DMA_USE_PCI_CLK,
g_WBM_TO_WB_FIFO_SIZE => g_WBM_TO_WB_FIFO_SIZE,
g_WBM_TO_WB_FIFO_FULL_THRES => g_WBM_TO_WB_FIFO_FULL_THRES,
g_WBM_FROM_WB_FIFO_SIZE => g_WBM_FROM_WB_FIFO_SIZE,
g_WBM_FROM_WB_FIFO_FULL_THRES => g_WBM_FROM_WB_FIFO_FULL_THRES,
g_P2L_FIFO_SIZE => g_P2L_FIFO_SIZE,
g_P2L_FIFO_FULL_THRES => g_P2L_FIFO_FULL_THRES,
g_L2P_ADDR_FIFO_FULL_SIZE => g_L2P_ADDR_FIFO_FULL_SIZE,
g_L2P_ADDR_FIFO_FULL_THRES => g_L2P_ADDR_FIFO_FULL_THRES,
g_L2P_DATA_FIFO_FULL_SIZE => g_L2P_DATA_FIFO_FULL_SIZE,
g_L2P_DATA_FIFO_FULL_THRES => g_L2P_DATA_FIFO_FULL_THRES,
g_L2P_DATA_FIFO_SIZE => g_L2P_DATA_FIFO_SIZE,
g_ACK_TIMEOUT => g_ACK_TIMEOUT)
port map (
rst_n_a_i => rst_n_a_i,
status_o => status_o,
clk_200m_o => clk_200m_o,
rst_200m_n_o => rst_200m_n_o,
p2l_clk_p_i => p2l_clk_p_i,
p2l_clk_n_i => p2l_clk_n_i,
p2l_data_i => p2l_data_i,
......
......@@ -36,6 +36,7 @@ import wishbone_pkg::*;
module main;
reg clk_125m = 0;
reg clk_62m5 = 0;
logic gn4124_irq;
......@@ -43,19 +44,35 @@ module main;
t_wishbone_master_out wb_out, wb_dma_out, wb_mem_out;
always #4ns clk_125m <= ~clk_125m;
always #23ns clk_62m5 <= ~clk_62m5;
logic rst_125m_n;
logic rst_62m5_n;
logic rst_gn4124_n;
logic clk_gn4124;
logic wb_dma_clk;
logic wb_dma_rst_n;
initial begin
rst_125m_n = 0;
#80ns rst_125m_n = 1;
rst_62m5_n = 0;
#80ns;
rst_125m_n = 1;
rst_62m5_n = 1;
end
IGN4124PCIMaster i_gn4124 ();
xwb_gn4124_core
xwb_gn4124_core #
(
.g_dma_use_pci_clk (0)
)
DUT (
.rst_n_a_i (i_gn4124.rst_n),
.clk_200m_o (clk_gn4124),
.rst_200m_n_o (rst_gn4124_n),
.p2l_clk_p_i (i_gn4124.p2l_clk_p),
.p2l_clk_n_i (i_gn4124.p2l_clk_n),
.p2l_data_i (i_gn4124.p2l_data),
......@@ -88,12 +105,21 @@ module main;
.wb_dma_cfg_rst_n_i (rst_125m_n),
.wb_dma_cfg_i (wb_out),
.wb_dma_cfg_o (wb_in),
.wb_dma_dat_clk_i (clk_125m),
.wb_dma_dat_rst_n_i (rst_125m_n),
.wb_dma_dat_clk_i (wb_dma_clk),
.wb_dma_dat_rst_n_i (wb_dma_rst_n),
.wb_dma_dat_i (wb_dma_in),
.wb_dma_dat_o (wb_dma_out)
);
/* -----\/----- EXCLUDED -----\/-----
assign wb_dma_clk = clk_gn4124;
assign wb_dma_rst_n = rst_gn4124_n;
assign wb_dma_clk = clk_125m;
assign wb_dma_rst_n = rst_125m_n;
-----/\----- EXCLUDED -----/\----- */
assign wb_dma_clk = clk_62m5;
assign wb_dma_rst_n = rst_62m5_n;
xwb_dpram #
(
.g_size (32),
......@@ -104,8 +130,8 @@ module main;
.g_slave2_granularity (1)
)
MEM (
.rst_n_i (1'b1),
.clk_sys_i (clk_125m),
.rst_n_i (wb_dma_rst_n),
.clk_sys_i (wb_dma_clk),
.slave1_i (wb_dma_out),
.slave1_o (wb_dma_in),
.slave2_i (wb_mem_out),
......@@ -153,7 +179,7 @@ module main;
initial begin
automatic int ntest = 1;
const int tests = 8;
const int tests = 9;
uint32_t addr, val, expected;
......@@ -185,6 +211,7 @@ module main;
$write("PASS\n");
/* -----\/----- EXCLUDED -----\/-----
$write("Test %0d/%0d: 128B read over DMA, abort after first read: ",
ntest++, tests);
......@@ -196,7 +223,7 @@ module main;
// Check values read from memory
@(posedge i_gn4124.l2p_valid); // skip header
@(posedge i_gn4124.l2p_valid);
repeat(2) @(posedge i_gn4124.l2p_clk_p);
expected = 32'h8000001f;
val = i_gn4124.l2p_data;
......@@ -214,12 +241,13 @@ module main;
repeat(2) @(posedge clk_125m);
$write("PASS\n");
-----/\----- EXCLUDED -----/\----- */
$write("Test %0d/%0d: 2x128B chained reads over DMA: ",
ntest++, tests);
// Setup DMA chain info in BFM memory
i_gn4124.host_mem_write('h20000, 'h00000000); // remote address
i_gn4124.host_mem_write('h20000, 'h00001000); // remote address
i_gn4124.host_mem_write('h20004, 'h20000100); // hstartL
i_gn4124.host_mem_write('h20008, 'h00000000); // hstartH
i_gn4124.host_mem_write('h2000C, 'h80); // count
......@@ -241,22 +269,21 @@ module main;
@(posedge dma_irq);
check_irq_status;
clear_irq;
for (addr = 'h00; addr < 'h20; addr += 1)
begin
expected = 32'h80000000 + 'h20 - addr - 1;
expected = 32'h80000000 + 'h20 - (addr % 'h20) - 1;
mem_check(4 * addr, expected);
mem_check('h100 + 4 * addr, expected);
end
clear_irq;
repeat(4) @(posedge clk_125m);
$write("PASS\n");
// ---------------------------------
$write("Test %0d/%0d: 128 reads over DMA: ",
$write("Test %0d/%0d: 256B read over DMA: ",
ntest++, tests);
// Setup DMA
......@@ -270,15 +297,14 @@ module main;
@(posedge dma_irq);
check_irq_status;
clear_irq;
for (addr = 'h00; addr < 'h40; addr += 1)
begin
expected = 32'h80000000 + 'h20 - addr - 1;
expected = 32'h80000000 + 'h20 - (addr % 'h20) - 1;
mem_check(4 * addr, expected);
end
clear_irq;
repeat(4) @(posedge clk_125m);
$write("PASS\n");
......@@ -286,11 +312,11 @@ module main;
// Check all four byte swap settings
// ---------------------------------
for (int i = 0; i < 4; i++) begin
$write("Test %0d/%0d: 64KB read over DMA (byte swap = %0d): ",
$write("Test %0d/%0d: 16KB read over DMA (byte swap = %0d): ",
ntest++, tests, i);
// Restart
acc.write('h14, 'h10000); // count
acc.write('h14, 'h4000); // count
acc.write('h20, 'h00); // attrib
acc.write('h0c, 'h20000000 + i * 'h4000); // hstartL
acc.write('h10, 'h00000000); // hstartH
......@@ -300,7 +326,7 @@ module main;
check_irq_status;
for (addr = 'h00; addr < 'h4000; addr += 1)
for (addr = 'h00; addr < 'h1000; addr += 1)
begin
expected = 32'h80000000 + 'h20 - (addr % 'h20) - 1;
if (i == 1)
......@@ -321,26 +347,26 @@ module main;
#1us;
end
$write("Test %0d/%0d: 256B read over DMA with 32bit host address overflow: ",
$write("Test %0d/%0d: 8KB read over DMA with 32bit host address overflow: ",
ntest++, tests);
acc.write('h14, 'h100); // count
acc.write('h14, 'h2000); // count
acc.write('h20, 'h00); // attrib
acc.write('h0c, 'hffffff80); // hstartL
acc.write('h0c, 'hfffff000); // hstartL
acc.write('h10, 'h00000000); // hstartH
acc.write('h00, 'h01); // start
// Transfer will be split internally by L2P DMA master in two requests, the first
// one with a 32-bit adress starting at ffff_ff80 and the next one with a 64-bit
// one with a 32-bit adress starting at ffff_f000 and the next one with a 64-bit
// address starting at 1_0000_0000
@(posedge DUT.cmp_wrapped_gn4124.ldm_arb_dframe);
@(posedge DUT.cmp_wrapped_gn4124.sys_clk);
val_check("Host address overflow header", 1, DUT.cmp_wrapped_gn4124.ldm_arb_data, 'h02ff0020);
val_check("Host address overflow header", 1, DUT.cmp_wrapped_gn4124.ldm_arb_data, 'h02ff0000);
@(posedge DUT.cmp_wrapped_gn4124.sys_clk);
val_check("Host address overflow address", 1, DUT.cmp_wrapped_gn4124.ldm_arb_data, 'hffffff80);
val_check("Host address overflow address", 1, DUT.cmp_wrapped_gn4124.ldm_arb_data, 'hfffff000);
@(posedge DUT.cmp_wrapped_gn4124.ldm_arb_dframe);
@(posedge DUT.cmp_wrapped_gn4124.sys_clk);
val_check("Host address overflow header", 2, DUT.cmp_wrapped_gn4124.ldm_arb_data, 'h03ff0020);
val_check("Host address overflow header", 2, DUT.cmp_wrapped_gn4124.ldm_arb_data, 'h03ff0000);
@(posedge DUT.cmp_wrapped_gn4124.sys_clk);
val_check("Host address overflow address high", 2, DUT.cmp_wrapped_gn4124.ldm_arb_data, 1);
@(posedge DUT.cmp_wrapped_gn4124.sys_clk);
......
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/wb_dma_clk_i
add wave -noupdate /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/wb_dma_current_state
add wave -noupdate /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/wb_dma_cnt_stb
add wave -noupdate -expand /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/wb_dma_o
add wave -noupdate /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/wb_dma_cnt_ack
add wave -noupdate -expand /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/wb_dma_i
add wave -noupdate /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/data_fifo_wr
add wave -noupdate /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/data_fifo_full
add wave -noupdate /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/data_fifo_din
add wave -noupdate -color Gold /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/clk_i
add wave -noupdate -color Gold /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/l2p_dma_current_state
add wave -noupdate -color Gold /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/ldm_arb_valid_o
add wave -noupdate -color Gold /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/ldm_arb_dframe_o
add wave -noupdate -color Gold /main/DUT/cmp_wrapped_gn4124/gen_with_dma/cmp_l2p_dma_master/ldm_arb_data_o
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {537831000 ps} 0}
quietly wave cursor active 1
configure wave -namecolwidth 199
configure wave -valuecolwidth 100
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 1
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {0 ps} {737045400 ps}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment