From 140833dc05c2e4adb6f1ba485c3b1c4b238f152e Mon Sep 17 00:00:00 2001 From: Tristan Gingold <tristan.gingold@cern.ch> Date: Thu, 21 Apr 2022 17:32:32 +0200 Subject: [PATCH] Rework test, renaming --- hdl/rtl/hydra_core.vhd | 18 +++--- hdl/rtl/hydra_supervisor_regs.cheby | 28 +++++---- hdl/rtl/hydra_supervisor_regs.vhd | 97 +++++++++++++++++------------ sw/include/hydra_supervisor_regs.h | 45 ++++++++----- sw/sf2-test/crt0.S | 20 +++--- sw/sf2-test/main.c | 97 +++++++++++++++++++++-------- sw/sf2-test/ram.ld | 5 +- 7 files changed, 200 insertions(+), 110 deletions(-) diff --git a/hdl/rtl/hydra_core.vhd b/hdl/rtl/hydra_core.vhd index 9404b9e..659b23e 100644 --- a/hdl/rtl/hydra_core.vhd +++ b/hdl/rtl/hydra_core.vhd @@ -448,8 +448,7 @@ begin reset_cause_cpu_i => reset_cause_cpu, reset_cause_ecc_i => reset_cause_ecc, reset_cause_watchdog_i => reset_cause_wd, - cpu_status_i => cpu_rst, - cpu_boot_done_i => '0', + cpu_reset_i => cpu_rst, cpu_recovery_i => cpu_recovery, cpu_recovery_o => cpu_recovery_in, cpu_wr_o => cpu_wr, @@ -461,10 +460,12 @@ begin wd_count_i => wd_counter, wd_key_o => wd_key_val, wd_key_wr_o => wd_key_wr, - iram_ecc_corr_i => nbr_iram_ecc_corr, - iram_ecc_uncorr_i => nbr_iram_ecc_uncorr, - dram_ecc_corr_i => nbr_dram_ecc_corr, - dram_ecc_uncorr_i => nbr_dram_ecc_uncorr, + iram_ecc_se_i => nbr_iram_ecc_corr, + iram_ecc_de_i => nbr_iram_ecc_uncorr, + dram_ecc_se_i => nbr_dram_ecc_corr, + dram_ecc_de_i => nbr_dram_ecc_uncorr, + cpu_data_err_i => nbr_cpu_data_err, + cpu_iaddr_err_i => nbr_cpu_iaddr_err, iram_scrub_period_o => iram_scrub_period, dram_scrub_period_o => dram_scrub_period ); @@ -504,7 +505,10 @@ begin end if; end case; - if cpu_sync = "000" or cpu_rst_err = '1' or (err_cpu_dm = '1' and state = S_LOCK) then + if cpu_sync = "000" or cpu_rst_err = '1' + or (err_cpu_dm = '1' and state = S_LOCK) + or (cpu_wr = '1' and cpu_recovery_in = '1') + then -- Fatal error cpu_rst <= (others => '1'); reset_cause_cpu <= not cpu_rst_err; diff --git a/hdl/rtl/hydra_supervisor_regs.cheby b/hdl/rtl/hydra_supervisor_regs.cheby index 23a1341..9391a58 100644 --- a/hdl/rtl/hydra_supervisor_regs.cheby +++ b/hdl/rtl/hydra_supervisor_regs.cheby @@ -33,17 +33,13 @@ memory-map: write-strobe: True children: - field: - name: status - description: Status of cpus, set when running (RO) + name: reset + description: CPU under reset due to divergence (RO) range: 2-0 - - field: - name: boot_done - description: Set on reset to disable compairison. To be cleared after initialization - range: 8 - field: name: recovery description: Set to start recovery (will reset all the cpus) - range: 9 + range: 8 - reg: name: force_divergence description: Each cpu will read a different value from this registers @@ -73,25 +69,35 @@ memory-map: x-hdl: write-strobe: True - reg: - name: iram_ecc_corr + name: iram_ecc_se description: Number of ECC correctable errors for iram width: 32 access: ro - reg: - name: iram_ecc_uncorr + name: iram_ecc_de description: Number of ECC uncorrectable errors for iram width: 32 access: ro - reg: - name: dram_ecc_corr + name: dram_ecc_se description: Number of ECC correctable errors for dram width: 32 access: ro - reg: - name: dram_ecc_uncorr + name: dram_ecc_de description: Number of ECC uncorrectable errors for dram width: 32 access: ro + - reg: + name: cpu_data_err + description: Number of CPU errors on data bus + width: 32 + access: ro + - reg: + name: cpu_iaddr_err + description: Number of CPU errors on instruction bus + width: 32 + access: ro - reg: name: iram_scrub description: Maximum number of cycles between a scrub diff --git a/hdl/rtl/hydra_supervisor_regs.vhd b/hdl/rtl/hydra_supervisor_regs.vhd index 1b6f56d..300e81d 100644 --- a/hdl/rtl/hydra_supervisor_regs.vhd +++ b/hdl/rtl/hydra_supervisor_regs.vhd @@ -23,12 +23,9 @@ entity hydra_supervisor_regs is reset_cause_watchdog_i : in std_logic; -- state of cpus - -- Status of cpus, set when running (RO) - cpu_status_i : in std_logic_vector(2 downto 0); - cpu_status_o : out std_logic_vector(2 downto 0); - -- Set on reset to disable compairison. To be cleared after initialization - cpu_boot_done_i : in std_logic; - cpu_boot_done_o : out std_logic; + -- CPU under reset due to divergence (RO) + cpu_reset_i : in std_logic_vector(2 downto 0); + cpu_reset_o : out std_logic_vector(2 downto 0); -- Set to start recovery (will reset all the cpus) cpu_recovery_i : in std_logic; cpu_recovery_o : out std_logic; @@ -51,16 +48,22 @@ entity hydra_supervisor_regs is wd_key_wr_o : out std_logic; -- Number of ECC correctable errors for iram - iram_ecc_corr_i : in std_logic_vector(31 downto 0); + iram_ecc_se_i : in std_logic_vector(31 downto 0); -- Number of ECC uncorrectable errors for iram - iram_ecc_uncorr_i : in std_logic_vector(31 downto 0); + iram_ecc_de_i : in std_logic_vector(31 downto 0); -- Number of ECC correctable errors for dram - dram_ecc_corr_i : in std_logic_vector(31 downto 0); + dram_ecc_se_i : in std_logic_vector(31 downto 0); -- Number of ECC uncorrectable errors for dram - dram_ecc_uncorr_i : in std_logic_vector(31 downto 0); + dram_ecc_de_i : in std_logic_vector(31 downto 0); + + -- Number of CPU errors on data bus + cpu_data_err_i : in std_logic_vector(31 downto 0); + + -- Number of CPU errors on instruction bus + cpu_iaddr_err_i : in std_logic_vector(31 downto 0); -- Maximum number of cycles between a scrub -- Watchdog timeout @@ -151,9 +154,8 @@ begin -- Register reset_cause -- Register cpu - cpu_status_o <= wr_dat_d0(2 downto 0); - cpu_boot_done_o <= wr_dat_d0(8); - cpu_recovery_o <= wr_dat_d0(9); + cpu_reset_o <= wr_dat_d0(2 downto 0); + cpu_recovery_o <= wr_dat_d0(8); cpu_wr_o <= cpu_wreq; -- Register force_divergence @@ -181,13 +183,17 @@ begin end process; wd_key_wr_o <= wd_key_wack; - -- Register iram_ecc_corr + -- Register iram_ecc_se + + -- Register iram_ecc_de - -- Register iram_ecc_uncorr + -- Register dram_ecc_se - -- Register dram_ecc_corr + -- Register dram_ecc_de - -- Register dram_ecc_uncorr + -- Register cpu_data_err + + -- Register cpu_iaddr_err -- Register iram_scrub iram_scrub_period_o <= iram_scrub_period_reg; @@ -251,22 +257,28 @@ begin wd_key_wreq <= wr_req_d0; wr_ack_int <= wd_key_wack; when "0110" => - -- Reg iram_ecc_corr + -- Reg iram_ecc_se wr_ack_int <= wr_req_d0; when "0111" => - -- Reg iram_ecc_uncorr + -- Reg iram_ecc_de wr_ack_int <= wr_req_d0; when "1000" => - -- Reg dram_ecc_corr + -- Reg dram_ecc_se wr_ack_int <= wr_req_d0; when "1001" => - -- Reg dram_ecc_uncorr + -- Reg dram_ecc_de wr_ack_int <= wr_req_d0; when "1010" => + -- Reg cpu_data_err + wr_ack_int <= wr_req_d0; + when "1011" => + -- Reg cpu_iaddr_err + wr_ack_int <= wr_req_d0; + when "1100" => -- Reg iram_scrub iram_scrub_wreq <= wr_req_d0; wr_ack_int <= iram_scrub_wack; - when "1011" => + when "1101" => -- Reg dram_scrub dram_scrub_wreq <= wr_req_d0; wr_ack_int <= dram_scrub_wack; @@ -277,10 +289,10 @@ begin -- Process for read requests. process (adr_int, rd_req_int, reset_cause_cpu_i, reset_cause_ecc_i, - reset_cause_watchdog_i, cpu_status_i, cpu_boot_done_i, cpu_recovery_i, - force_divergence_i, wd_period_i, wd_count_i, iram_ecc_corr_i, - iram_ecc_uncorr_i, dram_ecc_corr_i, dram_ecc_uncorr_i, - iram_scrub_period_reg, dram_scrub_period_reg) begin + reset_cause_watchdog_i, cpu_reset_i, cpu_recovery_i, + force_divergence_i, wd_period_i, wd_count_i, iram_ecc_se_i, + iram_ecc_de_i, dram_ecc_se_i, dram_ecc_de_i, cpu_data_err_i, + cpu_iaddr_err_i, iram_scrub_period_reg, dram_scrub_period_reg) begin -- By default ack read requests rd_dat_d0 <= (others => 'X'); force_divergence_rd_o <= '0'; @@ -295,11 +307,10 @@ begin when "0001" => -- Reg cpu rd_ack_d0 <= rd_req_int; - rd_dat_d0(2 downto 0) <= cpu_status_i; + rd_dat_d0(2 downto 0) <= cpu_reset_i; rd_dat_d0(7 downto 3) <= (others => '0'); - rd_dat_d0(8) <= cpu_boot_done_i; - rd_dat_d0(9) <= cpu_recovery_i; - rd_dat_d0(31 downto 10) <= (others => '0'); + rd_dat_d0(8) <= cpu_recovery_i; + rd_dat_d0(31 downto 9) <= (others => '0'); when "0010" => -- Reg force_divergence force_divergence_rd_o <= rd_req_int; @@ -317,27 +328,35 @@ begin -- Reg wd_key rd_ack_d0 <= rd_req_int; when "0110" => - -- Reg iram_ecc_corr + -- Reg iram_ecc_se rd_ack_d0 <= rd_req_int; - rd_dat_d0 <= iram_ecc_corr_i; + rd_dat_d0 <= iram_ecc_se_i; when "0111" => - -- Reg iram_ecc_uncorr + -- Reg iram_ecc_de rd_ack_d0 <= rd_req_int; - rd_dat_d0 <= iram_ecc_uncorr_i; + rd_dat_d0 <= iram_ecc_de_i; when "1000" => - -- Reg dram_ecc_corr + -- Reg dram_ecc_se rd_ack_d0 <= rd_req_int; - rd_dat_d0 <= dram_ecc_corr_i; + rd_dat_d0 <= dram_ecc_se_i; when "1001" => - -- Reg dram_ecc_uncorr + -- Reg dram_ecc_de rd_ack_d0 <= rd_req_int; - rd_dat_d0 <= dram_ecc_uncorr_i; + rd_dat_d0 <= dram_ecc_de_i; when "1010" => + -- Reg cpu_data_err + rd_ack_d0 <= rd_req_int; + rd_dat_d0 <= cpu_data_err_i; + when "1011" => + -- Reg cpu_iaddr_err + rd_ack_d0 <= rd_req_int; + rd_dat_d0 <= cpu_iaddr_err_i; + when "1100" => -- Reg iram_scrub rd_ack_d0 <= rd_req_int; rd_dat_d0(15 downto 0) <= iram_scrub_period_reg; rd_dat_d0(31 downto 16) <= (others => '0'); - when "1011" => + when "1101" => -- Reg dram_scrub rd_ack_d0 <= rd_req_int; rd_dat_d0(15 downto 0) <= dram_scrub_period_reg; diff --git a/sw/include/hydra_supervisor_regs.h b/sw/include/hydra_supervisor_regs.h index 8c786fa..3a9f1bd 100644 --- a/sw/include/hydra_supervisor_regs.h +++ b/sw/include/hydra_supervisor_regs.h @@ -1,6 +1,6 @@ #ifndef __CHEBY__HYDRA_SUPERVISOR_REGS__H__ #define __CHEBY__HYDRA_SUPERVISOR_REGS__H__ -#define HYDRA_SUPERVISOR_REGS_SIZE 48 /* 0x30 */ +#define HYDRA_SUPERVISOR_REGS_SIZE 56 /* 0x38 */ /* Cause of a reset */ #define HYDRA_SUPERVISOR_REGS_RESET_CAUSE 0x0UL @@ -10,10 +10,9 @@ /* state of cpus */ #define HYDRA_SUPERVISOR_REGS_CPU 0x4UL -#define HYDRA_SUPERVISOR_REGS_CPU_STATUS_MASK 0x7UL -#define HYDRA_SUPERVISOR_REGS_CPU_STATUS_SHIFT 0 -#define HYDRA_SUPERVISOR_REGS_CPU_BOOT_DONE 0x100UL -#define HYDRA_SUPERVISOR_REGS_CPU_RECOVERY 0x200UL +#define HYDRA_SUPERVISOR_REGS_CPU_RESET_MASK 0x7UL +#define HYDRA_SUPERVISOR_REGS_CPU_RESET_SHIFT 0 +#define HYDRA_SUPERVISOR_REGS_CPU_RECOVERY 0x100UL /* Each cpu will read a different value from this registers */ #define HYDRA_SUPERVISOR_REGS_FORCE_DIVERGENCE 0x8UL @@ -29,24 +28,30 @@ #define HYDRA_SUPERVISOR_REGS_WD_KEY 0x14UL /* Number of ECC correctable errors for iram */ -#define HYDRA_SUPERVISOR_REGS_IRAM_ECC_CORR 0x18UL +#define HYDRA_SUPERVISOR_REGS_IRAM_ECC_SE 0x18UL /* Number of ECC uncorrectable errors for iram */ -#define HYDRA_SUPERVISOR_REGS_IRAM_ECC_UNCORR 0x1cUL +#define HYDRA_SUPERVISOR_REGS_IRAM_ECC_DE 0x1cUL /* Number of ECC correctable errors for dram */ -#define HYDRA_SUPERVISOR_REGS_DRAM_ECC_CORR 0x20UL +#define HYDRA_SUPERVISOR_REGS_DRAM_ECC_SE 0x20UL /* Number of ECC uncorrectable errors for dram */ -#define HYDRA_SUPERVISOR_REGS_DRAM_ECC_UNCORR 0x24UL +#define HYDRA_SUPERVISOR_REGS_DRAM_ECC_DE 0x24UL + +/* Number of CPU errors on data bus */ +#define HYDRA_SUPERVISOR_REGS_CPU_DATA_ERR 0x28UL + +/* Number of CPU errors on instruction bus */ +#define HYDRA_SUPERVISOR_REGS_CPU_IADDR_ERR 0x2cUL /* Maximum number of cycles between a scrub */ -#define HYDRA_SUPERVISOR_REGS_IRAM_SCRUB 0x28UL +#define HYDRA_SUPERVISOR_REGS_IRAM_SCRUB 0x30UL #define HYDRA_SUPERVISOR_REGS_IRAM_SCRUB_PERIOD_MASK 0xffffUL #define HYDRA_SUPERVISOR_REGS_IRAM_SCRUB_PERIOD_SHIFT 0 /* Maximum number of cycles between a scrub */ -#define HYDRA_SUPERVISOR_REGS_DRAM_SCRUB 0x2cUL +#define HYDRA_SUPERVISOR_REGS_DRAM_SCRUB 0x34UL #define HYDRA_SUPERVISOR_REGS_DRAM_SCRUB_PERIOD_MASK 0xffffUL #define HYDRA_SUPERVISOR_REGS_DRAM_SCRUB_PERIOD_SHIFT 0 @@ -70,21 +75,27 @@ struct hydra_supervisor_regs { uint32_t wd_key; /* [0x18]: REG (ro) Number of ECC correctable errors for iram */ - uint32_t iram_ecc_corr; + uint32_t iram_ecc_se; /* [0x1c]: REG (ro) Number of ECC uncorrectable errors for iram */ - uint32_t iram_ecc_uncorr; + uint32_t iram_ecc_de; /* [0x20]: REG (ro) Number of ECC correctable errors for dram */ - uint32_t dram_ecc_corr; + uint32_t dram_ecc_se; /* [0x24]: REG (ro) Number of ECC uncorrectable errors for dram */ - uint32_t dram_ecc_uncorr; + uint32_t dram_ecc_de; + + /* [0x28]: REG (ro) Number of CPU errors on data bus */ + uint32_t cpu_data_err; + + /* [0x2c]: REG (ro) Number of CPU errors on instruction bus */ + uint32_t cpu_iaddr_err; - /* [0x28]: REG (rw) Maximum number of cycles between a scrub */ + /* [0x30]: REG (rw) Maximum number of cycles between a scrub */ uint32_t iram_scrub; - /* [0x2c]: REG (rw) Maximum number of cycles between a scrub */ + /* [0x34]: REG (rw) Maximum number of cycles between a scrub */ uint32_t dram_scrub; }; diff --git a/sw/sf2-test/crt0.S b/sw/sf2-test/crt0.S index f4a7499..27cdcdc 100644 --- a/sw/sf2-test/crt0.S +++ b/sw/sf2-test/crt0.S @@ -5,14 +5,6 @@ _start: la gp, _gp # Initialize global pointer la sp, _fstack -# clear the bss segment - la t0, _fbss - la t1, _end -1: - sw zero,0(t0) - addi t0, t0, 4 - bltu t0, t1, 1b - # Clear registers mv s0, zero mv s1, zero @@ -25,3 +17,15 @@ _start: mv a0, zero call main + + .global clear_bss +clear_bss: + # clear the bss segment + la t0, _fbss + la t1, _end +1: + sw zero,0(t0) + addi t0, t0, 4 + bltu t0, t1, 1b + + ret diff --git a/sw/sf2-test/main.c b/sw/sf2-test/main.c index 693e687..54bdf2c 100644 --- a/sw/sf2-test/main.c +++ b/sw/sf2-test/main.c @@ -1,6 +1,8 @@ #include <stdint.h> #include "hydra_supervisor_regs.h" +extern void clear_bss(void); + #define SYS_RESET 0x40038048 #define UART_BASE 0x40000000 #define UART_RX (UART_BASE | 0x00) @@ -17,8 +19,10 @@ static enum t_test { TEST_WD, - TEST_CPU0_ERR -} cur_test; + TEST_CPU0_RECOVER, + TEST_CPU0_FATAL, + TEST_RAM +} next_test; static void uart_init (void) @@ -108,6 +112,28 @@ ram_test(void) return 0; } +static void +force_divergence_1 (void) +{ + unsigned v; + + /* Force divergence by executing different code. */ + v = SUPERVISOR->force_divergence; + if (v != 1) + uart_putc('D'); + + /* Must be in lock-step. */ + v = SUPERVISOR->cpu; + uart_put_hex_digit(v & 0x0f); +} + +static void +unreachable(void) +{ + while (1) + uart_putc('$'); +} + int main (void) { @@ -118,36 +144,55 @@ main (void) uart_put_hex_digit (v & 0x0f); uart_putc('\n'); - /* On the first reset, v=0 (no cause), so watchdog won't be restarted - and will expire during ram test. - On later reset, there is always a reset cause. */ - if (v) - SUPERVISOR->wd_key = WD_KEY; + if (v == 0) + clear_bss (); - uart_puts ("Ram\n"); - if (ram_test() != 0) { + switch (next_test) { + case TEST_WD: + next_test = TEST_CPU0_RECOVER; while (1) - uart_puts("Error\n"); - } + v = pad[0]; + unreachable(); + break; - /* Read a different value. - This will trigger a vote and a data error increment. */ - v = SUPERVISOR->force_divergence; - uart_put_hex_digit(v & 0x0f); + case TEST_CPU0_RECOVER: + SUPERVISOR->wd_key = WD_KEY; + force_divergence_1(); - /* Force divergence by executing different code. */ - v = SUPERVISOR->force_divergence; - if (v != 1) - uart_putc('D'); + next_test = TEST_CPU0_FATAL; - /* Must be in lock-step. */ - v = SUPERVISOR->cpu; - uart_put_hex_digit(v & 0x0f); + /* Generate reset for recovery */ + SUPERVISOR->cpu = HYDRA_SUPERVISOR_REGS_CPU_RECOVERY; - /* Force a data error in lock-step. - This will generate a reset. */ - v = SUPERVISOR->force_divergence; - uart_put_hex_digit(v & 0x0f); + unreachable(); + break; + + case TEST_CPU0_FATAL: + SUPERVISOR->wd_key = WD_KEY; + /* Clear recovery */ + SUPERVISOR->cpu = 0; + + force_divergence_1(); + + next_test = TEST_RAM; + + /* Force a data error in lock-step. + This will generate a reset. */ + v = SUPERVISOR->force_divergence; + uart_put_hex_digit(v & 0x0f); + + unreachable(); + break; + + case TEST_RAM: + SUPERVISOR->wd_key = WD_KEY; + uart_puts ("Ram\n"); + v = ram_test(); + if (v != 0) { + uart_put_hex_digit(v & 0x0f); + unreachable(); + } + } while (1) { SUPERVISOR->wd_key = WD_KEY; diff --git a/sw/sf2-test/ram.ld b/sw/sf2-test/ram.ld index bb05f5c..a96290e 100644 --- a/sw/sf2-test/ram.ld +++ b/sw/sf2-test/ram.ld @@ -22,12 +22,13 @@ SECTIONS .text : { *(.text .text.*) } > rom =0 .rodata : { *(.rodata .rodata.*) } > rom - _gp = .; - .data : { *(.data .data.*) } > empty + _gp = . + 0x800; + .bss : { _fbss = .; + *(.sbss .sbss.*) *(.bss .bss.*) *(COMMON) _ebss = .; -- GitLab