diff --git a/hdl/rtl/hydra_core.vhd b/hdl/rtl/hydra_core.vhd index 3000c8bf4bfdac32ccd04da5a672d86fe09e5ad4..d2d489f67b4136ec03555008b4d4d242c35e0fb9 100644 --- a/hdl/rtl/hydra_core.vhd +++ b/hdl/rtl/hydra_core.vhd @@ -134,8 +134,10 @@ architecture arch of hydra_core is signal scrub_cfg_wr, scrub_cfg_iram_val, scrub_cfg_dram_val : std_logic; signal nbr_cpu_data_err : std_logic_vector (31 downto 0); signal nbr_cpu_iaddr_err : std_logic_vector (31 downto 0); + signal iram_de_addr : std_logic_vector (31 downto 0) := (others => '0'); - signal reset_cause_cpu, reset_cause_ecc, reset_cause_wd : std_logic; + signal reset_cause_cpu, reset_cause_wd : std_logic; + signal reset_cause_iram_ecc, reset_cause_dram_ecc : std_logic; signal dram_scrub_period, iram_scrub_period : std_logic_vector(15 downto 0); -- Initial watchdog period. @@ -155,7 +157,6 @@ begin dwb_o <= dwb_out; cpu_rst2 <= "111" when rst_n_i = '0' or cpu_rst_n_i = '0' else cpu_rst; - cpu_err_o <= '1' when cpu_rst /= "111" and cpu_rst /= "000" else '0'; inst_cpus : entity work.hydra_triple_cpu port map ( @@ -234,6 +235,7 @@ begin ecc_one_o => iram_ecc_err, ecc_fatal_o => iram_ecc_fatal, + ecc_fatal_addr_o => iram_de_addr(g_IRAM_LOG_SIZE - 1 downto 2), scrub_se_o => iram_scrub_se, scrub_de_o => iram_scrub_de, scrubber_period_i => iram_scrub_period @@ -503,7 +505,8 @@ begin wb_i => sv_wb_out, wb_o => sv_wb_in, reset_cause_cpu_i => reset_cause_cpu, - reset_cause_ecc_i => reset_cause_ecc, + reset_cause_iram_ecc_i => reset_cause_iram_ecc, + reset_cause_dram_ecc_i => reset_cause_dram_ecc, reset_cause_watchdog_i => reset_cause_wd, cpu_reset_i => cpu_rst, cpu_reset_o => open, @@ -527,6 +530,7 @@ begin cpu_data_err_i => nbr_cpu_data_err, cpu_iaddr_err_i => nbr_cpu_iaddr_err, dram_ecc_mask_o => dram_ecc_mask, + iram_de_addr_i => iram_de_addr, scrub_cfg_iram_en_i => iram_scrub_en, scrub_cfg_iram_en_o => scrub_cfg_iram_val, scrub_cfg_dram_en_i => dram_scrub_en, @@ -539,6 +543,8 @@ begin process (clk_sys_i) is begin if rising_edge(clk_sys_i) then + cpu_err_o <= '0'; + if rst_n_i = '0' or cpu_rst_n_i = '0' then state <= S_VOTER; cpu_rst <= (others => '1'); @@ -546,7 +552,8 @@ begin nbr_cpu_data_err <= (others => '0'); nbr_cpu_iaddr_err <= (others => '0'); reset_cause_cpu <= '0'; - reset_cause_ecc <= '0'; + reset_cause_iram_ecc <= '0'; + reset_cause_dram_ecc <= '0'; reset_cause_wd <= '0'; else case state is @@ -578,15 +585,18 @@ begin end if; end case; - if cpu_sync = "000" or cpu_rst_err = '1' + if cpu_sync = "000" + or cpu_rst_err = '1' or (err_cpu_dm = '1' and state = S_LOCK) or (cpu_wr = '1' and cpu_recovery_in = '1') then -- Fatal error cpu_rst <= (others => '1'); reset_cause_cpu <= not cpu_rst_err; - reset_cause_ecc <= dram_ecc_fatal or iram_ecc_fatal; + reset_cause_iram_ecc <= iram_ecc_fatal; + reset_cause_dram_ecc <= dram_ecc_fatal; reset_cause_wd <= wd_timeout; + cpu_err_o <= '1'; state <= S_VOTER; end if; end if; diff --git a/hdl/rtl/hydra_dram.vhd b/hdl/rtl/hydra_dram.vhd index 405f08dff527c68cb6b058b057bd2fdbc3f3ccc3..4b52beffdd166835a98bec1f105f6d01136cf67b 100644 --- a/hdl/rtl/hydra_dram.vhd +++ b/hdl/rtl/hydra_dram.vhd @@ -160,9 +160,9 @@ begin p_ctrl: process (state, p_done, r_done, scrub_done, rerr, - rerr_one, rdata_ecc, rsyndrome, last_addr, last_we, last_sel, recc, + rerr_one, rdata_ecc, rsyndrome, last_addr, last_we, last_sel, eccm_i, en_i, addr_i, we_i, data_i, sel_i, r_done_d, - scrub_rd, scrub_addr, scrub_done_d) + scrub_rd, scrub_addr, scrub_done_d, scrub_en_i) is variable d : std_logic_vector(31 downto 0); begin diff --git a/hdl/rtl/hydra_iram.vhd b/hdl/rtl/hydra_iram.vhd index 80926208e36df3109c170f5798241ace63ed33c8..edf6c25e7c557d78645f27921cd9756e27450602 100644 --- a/hdl/rtl/hydra_iram.vhd +++ b/hdl/rtl/hydra_iram.vhd @@ -68,6 +68,7 @@ entity hydra_iram is -- For statistics ecc_one_o : out std_logic; ecc_fatal_o : out std_logic; + ecc_fatal_addr_o : out std_logic_vector(g_RAM_LOG_SIZE - 1 downto 2); scrub_se_o : out std_logic; scrub_de_o : out std_logic; @@ -172,7 +173,7 @@ begin end if; end process; - p_ctrl: process (state, r1_done, r2_done, rerr, rerr_one, rdata_ecc, rsyndrome, last_raddr, recc, + p_ctrl: process (state, r1_done, r2_done, rerr, rerr_one, rdata_ecc, rsyndrome, last_raddr, r1_en_i, r1_addr_i, we_i, waddr_i, wdata_i, wecc_i, r1_done_d, r2_done_d, r2_en_i, r2_addr_i, scrub_rd, scrub_addr, scrub_done, scrub_en_i) begin @@ -273,6 +274,7 @@ begin ecc_fatal_o <= '0'; scrub_se_o <= '0'; scrub_de_o <= '0'; + ecc_fatal_addr_o <= (others => '0'); else r1_done_d <= r1_done; r1_done <= n_r1_done; @@ -285,6 +287,9 @@ begin ecc_one_o <= n_ecc_one; ecc_fatal_o <= n_ecc_fatal; + if n_ecc_fatal = '1' then + ecc_fatal_addr_o <= n_last_raddr; + end if; scrub_se_o <= n_scrub_se; scrub_de_o <= n_scrub_de; end if; diff --git a/hdl/rtl/hydra_supervisor_regs.cheby b/hdl/rtl/hydra_supervisor_regs.cheby index 857d890acab915f714f94bc090cb0eadf87081f1..c87b88cb10be2830053515791817c854055ee09c 100644 --- a/hdl/rtl/hydra_supervisor_regs.cheby +++ b/hdl/rtl/hydra_supervisor_regs.cheby @@ -16,13 +16,17 @@ memory-map: description: CPU divergence range: 0 - field: - name: ecc - description: Uncorrectable ECC error + name: iram_ecc + description: Uncorrectable ECC error in IRAM range: 1 + - field: + name: dram_ecc + description: Uncorrectable ECC error in DRAM + range: 2 - field: name: watchdog description: Watchdog timeout - range: 2 + range: 3 - reg: name: cpu description: state of cpus @@ -112,6 +116,11 @@ memory-map: description: Change ECC bits on dram writes for testing width: 8 access: rw + - reg: + name: iram_de_addr + description: Address of the last fatal error + width: 32 + access: ro - reg: name: scrub_cfg description: configuration of scrubbers @@ -134,11 +143,11 @@ memory-map: description: Maximum number of cycles between a scrub width: 16 access: rw - preset: 2048 + preset: 2047 - reg: name: dram_scrub_period description: Maximum number of cycles between a scrub width: 16 access: rw - preset: 2048 + preset: 2047 diff --git a/hdl/rtl/hydra_supervisor_regs.vhd b/hdl/rtl/hydra_supervisor_regs.vhd index 8f917a21f1a35fd01ff3d7540066f55db35a6994..859e95c71635b2573e10422e9be3ec85218626c9 100644 --- a/hdl/rtl/hydra_supervisor_regs.vhd +++ b/hdl/rtl/hydra_supervisor_regs.vhd @@ -17,8 +17,10 @@ entity hydra_supervisor_regs is -- Cause of a reset -- CPU divergence reset_cause_cpu_i : in std_logic; - -- Uncorrectable ECC error - reset_cause_ecc_i : in std_logic; + -- Uncorrectable ECC error in IRAM + reset_cause_iram_ecc_i : in std_logic; + -- Uncorrectable ECC error in DRAM + reset_cause_dram_ecc_i : in std_logic; -- Watchdog timeout reset_cause_watchdog_i : in std_logic; @@ -74,6 +76,9 @@ entity hydra_supervisor_regs is -- Change ECC bits on dram writes for testing dram_ecc_mask_o : out std_logic_vector(7 downto 0); + -- Address of the last fatal error + iram_de_addr_i : in std_logic_vector(31 downto 0); + -- configuration of scrubbers -- Write 1 to enable iram scrubber scrub_cfg_iram_en_i : in std_logic; @@ -235,6 +240,8 @@ begin end if; end process; + -- Register iram_de_addr + -- Register scrub_cfg scrub_cfg_iram_en_o <= wr_dat_d0(0); scrub_cfg_dram_en_o <= wr_dat_d0(1); @@ -245,7 +252,7 @@ begin process (clk_i) begin if rising_edge(clk_i) then if rst_n_i = '0' then - iram_scrub_period_reg <= "0000100000000000"; + iram_scrub_period_reg <= "0000011111111111"; iram_scrub_period_wack <= '0'; else if iram_scrub_period_wreq = '1' then @@ -261,7 +268,7 @@ begin process (clk_i) begin if rising_edge(clk_i) then if rst_n_i = '0' then - dram_scrub_period_reg <= "0000100000000000"; + dram_scrub_period_reg <= "0000011111111111"; dram_scrub_period_wack <= '0'; else if dram_scrub_period_wreq = '1' then @@ -333,14 +340,17 @@ begin dram_ecc_mask_wreq <= wr_req_d0; wr_ack_int <= dram_ecc_mask_wack; when "01111" => + -- Reg iram_de_addr + wr_ack_int <= wr_req_d0; + when "10000" => -- Reg scrub_cfg scrub_cfg_wreq <= wr_req_d0; wr_ack_int <= wr_req_d0; - when "10000" => + when "10001" => -- Reg iram_scrub_period iram_scrub_period_wreq <= wr_req_d0; wr_ack_int <= iram_scrub_period_wack; - when "10001" => + when "10010" => -- Reg dram_scrub_period dram_scrub_period_wreq <= wr_req_d0; wr_ack_int <= dram_scrub_period_wack; @@ -350,13 +360,13 @@ begin end process; -- Process for read requests. - process (adr_int, rd_req_int, reset_cause_cpu_i, reset_cause_ecc_i, - reset_cause_watchdog_i, cpu_reset_i, cpu_recovery_i, - force_divergence_i, wd_period_i, wd_count_i, iram_ecc_se_i, - iram_scrub_se_i, iram_scrub_de_i, dram_ecc_se_i, dram_scrub_se_i, - dram_scrub_de_i, cpu_data_err_i, cpu_iaddr_err_i, dram_ecc_mask_reg, - scrub_cfg_iram_en_i, scrub_cfg_dram_en_i, iram_scrub_period_reg, - dram_scrub_period_reg) begin + process (adr_int, rd_req_int, reset_cause_cpu_i, reset_cause_iram_ecc_i, + reset_cause_dram_ecc_i, reset_cause_watchdog_i, cpu_reset_i, + cpu_recovery_i, force_divergence_i, wd_period_i, wd_count_i, + iram_ecc_se_i, iram_scrub_se_i, iram_scrub_de_i, dram_ecc_se_i, + dram_scrub_se_i, dram_scrub_de_i, cpu_data_err_i, cpu_iaddr_err_i, + dram_ecc_mask_reg, iram_de_addr_i, scrub_cfg_iram_en_i, + scrub_cfg_dram_en_i, iram_scrub_period_reg, dram_scrub_period_reg) begin -- By default ack read requests rd_dat_d0 <= (others => 'X'); force_divergence_rd_o <= '0'; @@ -365,9 +375,10 @@ begin -- Reg reset_cause rd_ack_d0 <= rd_req_int; rd_dat_d0(0) <= reset_cause_cpu_i; - rd_dat_d0(1) <= reset_cause_ecc_i; - rd_dat_d0(2) <= reset_cause_watchdog_i; - rd_dat_d0(31 downto 3) <= (others => '0'); + rd_dat_d0(1) <= reset_cause_iram_ecc_i; + rd_dat_d0(2) <= reset_cause_dram_ecc_i; + rd_dat_d0(3) <= reset_cause_watchdog_i; + rd_dat_d0(31 downto 4) <= (others => '0'); when "00001" => -- Reg cpu rd_ack_d0 <= rd_req_int; @@ -429,17 +440,21 @@ begin rd_dat_d0(7 downto 0) <= dram_ecc_mask_reg; rd_dat_d0(31 downto 8) <= (others => '0'); when "01111" => + -- Reg iram_de_addr + rd_ack_d0 <= rd_req_int; + rd_dat_d0 <= iram_de_addr_i; + when "10000" => -- Reg scrub_cfg rd_ack_d0 <= rd_req_int; rd_dat_d0(0) <= scrub_cfg_iram_en_i; rd_dat_d0(1) <= scrub_cfg_dram_en_i; rd_dat_d0(31 downto 2) <= (others => '0'); - when "10000" => + when "10001" => -- Reg iram_scrub_period rd_ack_d0 <= rd_req_int; rd_dat_d0(15 downto 0) <= iram_scrub_period_reg; rd_dat_d0(31 downto 16) <= (others => '0'); - when "10001" => + when "10010" => -- Reg dram_scrub_period rd_ack_d0 <= rd_req_int; rd_dat_d0(15 downto 0) <= dram_scrub_period_reg; diff --git a/sw/include/hydra_supervisor_regs.h b/sw/include/hydra_supervisor_regs.h index 0f5ae16b537fa146f9b4273db895930a2a37fccb..a92d88727801eab9388d4a81538eb1d30def06aa 100644 --- a/sw/include/hydra_supervisor_regs.h +++ b/sw/include/hydra_supervisor_regs.h @@ -1,12 +1,13 @@ #ifndef __CHEBY__HYDRA_SUPERVISOR_REGS__H__ #define __CHEBY__HYDRA_SUPERVISOR_REGS__H__ -#define HYDRA_SUPERVISOR_REGS_SIZE 70 /* 0x46 */ +#define HYDRA_SUPERVISOR_REGS_SIZE 74 /* 0x4a */ /* Cause of a reset */ #define HYDRA_SUPERVISOR_REGS_RESET_CAUSE 0x0UL #define HYDRA_SUPERVISOR_REGS_RESET_CAUSE_CPU 0x1UL -#define HYDRA_SUPERVISOR_REGS_RESET_CAUSE_ECC 0x2UL -#define HYDRA_SUPERVISOR_REGS_RESET_CAUSE_WATCHDOG 0x4UL +#define HYDRA_SUPERVISOR_REGS_RESET_CAUSE_IRAM_ECC 0x2UL +#define HYDRA_SUPERVISOR_REGS_RESET_CAUSE_DRAM_ECC 0x4UL +#define HYDRA_SUPERVISOR_REGS_RESET_CAUSE_WATCHDOG 0x8UL /* state of cpus */ #define HYDRA_SUPERVISOR_REGS_CPU 0x4UL @@ -53,18 +54,21 @@ /* Change ECC bits on dram writes for testing */ #define HYDRA_SUPERVISOR_REGS_DRAM_ECC_MASK 0x38UL +/* Address of the last fatal error */ +#define HYDRA_SUPERVISOR_REGS_IRAM_DE_ADDR 0x3cUL + /* configuration of scrubbers */ -#define HYDRA_SUPERVISOR_REGS_SCRUB_CFG 0x3cUL +#define HYDRA_SUPERVISOR_REGS_SCRUB_CFG 0x40UL #define HYDRA_SUPERVISOR_REGS_SCRUB_CFG_IRAM_EN 0x1UL #define HYDRA_SUPERVISOR_REGS_SCRUB_CFG_DRAM_EN 0x2UL /* Maximum number of cycles between a scrub */ -#define HYDRA_SUPERVISOR_REGS_IRAM_SCRUB_PERIOD 0x40UL -#define HYDRA_SUPERVISOR_REGS_IRAM_SCRUB_PERIOD_PRESET 0x800UL +#define HYDRA_SUPERVISOR_REGS_IRAM_SCRUB_PERIOD 0x44UL +#define HYDRA_SUPERVISOR_REGS_IRAM_SCRUB_PERIOD_PRESET 0x7ffUL /* Maximum number of cycles between a scrub */ -#define HYDRA_SUPERVISOR_REGS_DRAM_SCRUB_PERIOD 0x44UL -#define HYDRA_SUPERVISOR_REGS_DRAM_SCRUB_PERIOD_PRESET 0x800UL +#define HYDRA_SUPERVISOR_REGS_DRAM_SCRUB_PERIOD 0x48UL +#define HYDRA_SUPERVISOR_REGS_DRAM_SCRUB_PERIOD_PRESET 0x7ffUL struct hydra_supervisor_regs { /* [0x0]: REG (ro) Cause of a reset */ @@ -115,16 +119,19 @@ struct hydra_supervisor_regs { /* padding to: 60 words */ uint8_t __padding_0[3]; - /* [0x3c]: REG (rw) configuration of scrubbers */ + /* [0x3c]: REG (ro) Address of the last fatal error */ + uint32_t iram_de_addr; + + /* [0x40]: REG (rw) configuration of scrubbers */ uint32_t scrub_cfg; - /* [0x40]: REG (rw) Maximum number of cycles between a scrub */ + /* [0x44]: REG (rw) Maximum number of cycles between a scrub */ uint16_t iram_scrub_period; - /* padding to: 68 words */ + /* padding to: 72 words */ uint8_t __padding_1[2]; - /* [0x44]: REG (rw) Maximum number of cycles between a scrub */ + /* [0x48]: REG (rw) Maximum number of cycles between a scrub */ uint16_t dram_scrub_period; }; diff --git a/sw/sf2-test/main.c b/sw/sf2-test/main.c index b39116e029ee36073a48fdd313f79d2ac2174ed7..b3e6262079453f70d02503a5e7380122d1126991 100644 --- a/sw/sf2-test/main.c +++ b/sw/sf2-test/main.c @@ -53,7 +53,7 @@ static void uart_raw_putc (unsigned char c) { /* Wait until TEMT (transmit empty) is set. */ - /* NB: bit 5 for RX not empty */ + /* NB: bit 5 for TX not empty */ do { SUPERVISOR->wd_key = WD_KEY; } while (!(*(volatile unsigned*)UART_LSR & 0x20)); @@ -61,6 +61,17 @@ uart_raw_putc (unsigned char c) *(volatile unsigned *)UART_TX = c; } +static unsigned char +uart_raw_getc (void) +{ + /* Wait until DR (data ready, bit 0) is set. */ + do { + SUPERVISOR->wd_key = WD_KEY; + } while (!(*(volatile unsigned*)UART_LSR & 0x1)); + + return *(volatile unsigned *)UART_RX; +} + static void uart_putc (char c) { @@ -162,6 +173,13 @@ main (void) uart_put_hex_digit (next_test & 0x0f); uart_putc('\n'); +#if 0 + uart_puts("Go?"); + while (uart_raw_getc() != 'y') + ; + uart_putc('\n'); +#endif + switch (next_test) { case TEST_DRAM_DE: SUPERVISOR->dram_ecc_mask = 0x03; @@ -209,16 +227,18 @@ main (void) uart_puts("Start IRAM scrubber"); SUPERVISOR->scrub_cfg = HYDRA_SUPERVISOR_REGS_SCRUB_CFG_IRAM_EN; next_test = TEST_WD; - while (SUPERVISOR->iram_scrub_de == 0) + do { + v = SUPERVISOR->iram_scrub_de; SUPERVISOR->wd_key = WD_KEY; + } while (v == 0); + /* Because the scrubber is always running, the counter is always + incrementing. So the original value is kept. */ uart_putc('\n'); - if (SUPERVISOR->iram_scrub_se != 1 - || SUPERVISOR->iram_scrub_de != 2) { + if (SUPERVISOR->iram_scrub_se != 1 || v > 2) { uart_puts ("se:"); uart_put_hex8 (SUPERVISOR->iram_scrub_se); uart_puts (", de:"); - uart_put_hex8 (SUPERVISOR->iram_scrub_de); - while (1) ; + uart_put_hex8 (v); unreachable(); } /* Fallthrough */