From 308dce40679f63db504cd3d746a0c37a2a05f473 Mon Sep 17 00:00:00 2001 From: Varun Wadekar Date: Mon, 24 Jan 2022 05:45:15 -0800 Subject: [PATCH 1/3] feat(gic600ae_fmu): introduce support for RAS error handling The GIC-600AE uses a range of RAS features for all RAMs, which include SECDED, ECC, and Scrub, software and bus error reporting. The GIC makes all necessary information available to software through Armv8.2 RAS architecture compliant register space. This patch introduces support to probe the FMU_ERRGSR register to find the right error record. Once the correct record is identified, the "handler" function queries the FMU_ERRSTATUS register to further identify the block ID, safety mechanism and the architecturally defined primary error code. The description of the error is displayed on the console to simplify debug. Change-Id: I7e543664b74457afee2da250549f4c3d9beb1a03 Signed-off-by: Varun Wadekar --- drivers/arm/gic/v3/gic600ae_fmu.c | 132 ++++++++++++++++++++++++++++- include/drivers/arm/gic600ae_fmu.h | 19 +++-- 2 files changed, 144 insertions(+), 7 deletions(-) diff --git a/drivers/arm/gic/v3/gic600ae_fmu.c b/drivers/arm/gic/v3/gic600ae_fmu.c index 13979fa4d..2233bbf93 100644 --- a/drivers/arm/gic/v3/gic600ae_fmu.c +++ b/drivers/arm/gic/v3/gic600ae_fmu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA Corporation. All rights reserved. + * Copyright (c) 2021-2022, NVIDIA Corporation. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,6 +9,7 @@ */ #include +#include #include #include @@ -112,6 +113,135 @@ static char *wkrqst_sm_info[] = { "Wake-GICD AXI4-Stream interface error" }; +/* Helper function to find detailed information for a specific IERR */ +static char __unused *ras_ierr_to_str(unsigned int blkid, unsigned int ierr) +{ + char *str = NULL; + + /* Find the correct record */ + switch (blkid) { + case FMU_BLK_GICD: + assert(ierr < ARRAY_SIZE(gicd_sm_info)); + str = gicd_sm_info[ierr]; + break; + + case FMU_BLK_SPICOL: + assert(ierr < ARRAY_SIZE(spicol_sm_info)); + str = spicol_sm_info[ierr]; + break; + + case FMU_BLK_WAKERQ: + assert(ierr < ARRAY_SIZE(wkrqst_sm_info)); + str = wkrqst_sm_info[ierr]; + break; + + case FMU_BLK_ITS0...FMU_BLK_ITS7: + assert(ierr < ARRAY_SIZE(its_sm_info)); + str = its_sm_info[ierr]; + break; + + case FMU_BLK_PPI0...FMU_BLK_PPI31: + assert(ierr < ARRAY_SIZE(ppi_sm_info)); + str = ppi_sm_info[ierr]; + break; + + default: + assert(false); + break; + } + + return str; +} + +/* + * Probe for error in memory-mapped registers containing error records. + * Upon detecting an error, set probe data to the index of the record + * in error, and return 1; otherwise, return 0. + */ +int gic600_fmu_probe(uint64_t base, int *probe_data) +{ + uint64_t gsr; + + assert(base != 0UL); + + /* + * Read ERR_GSR to find the error record 'M' + */ + gsr = gic_fmu_read_errgsr(base); + if (gsr == U(0)) { + return 0; + } + + /* Return the index of the record in error */ + if (probe_data != NULL) { + *probe_data = (int)__builtin_ctzll(gsr); + } + + return 1; +} + +/* + * The handler function to read RAS records and find the safety + * mechanism with the error. + */ +int gic600_fmu_ras_handler(uint64_t base, int probe_data) +{ + uint64_t errstatus; + unsigned int blkid = (unsigned int)probe_data, ierr, serr; + + assert(base != 0UL); + + /* + * FMU_ERRGSR indicates the ID of the GIC + * block that faulted. + */ + assert(blkid <= FMU_BLK_PPI31); + + /* + * Find more information by reading FMU_ERRSTATUS + * register + */ + errstatus = gic_fmu_read_errstatus(base, blkid); + + /* + * If FMU_ERRSTATUS.V is set to 0, no RAS records + * need to be scanned. + */ + if ((errstatus & FMU_ERRSTATUS_V_BIT) == U(0)) { + return 0; + } + + /* + * FMU_ERRSTATUS.IERR indicates which Safety Mechanism + * reported the error. + */ + ierr = (errstatus >> FMU_ERRSTATUS_IERR_SHIFT) & + FMU_ERRSTATUS_IERR_MASK; + + /* + * FMU_ERRSTATUS.SERR indicates architecturally + * defined primary error code. + */ + serr = errstatus & FMU_ERRSTATUS_SERR_MASK; + + ERROR("**************************************\n"); + ERROR("RAS %s Error detected by GIC600 AE FMU\n", + ((errstatus & FMU_ERRSTATUS_UE_BIT) != 0U) ? + "Uncorrectable" : "Corrected"); + ERROR("\tStatus = 0x%lx \n", errstatus); + ERROR("\tBlock ID = 0x%x\n", blkid); + ERROR("\tSafety Mechanism ID = 0x%x (%s)\n", ierr, + ras_ierr_to_str(blkid, ierr)); + ERROR("\tArchitecturally defined primary error code = 0x%x\n", + serr); + ERROR("**************************************\n"); + + /* Clear FMU_ERRSTATUS */ + gic_fmu_write_errstatus(base, probe_data, errstatus); + + return 0; +} + /* * Initialization sequence for the FMU * diff --git a/include/drivers/arm/gic600ae_fmu.h b/include/drivers/arm/gic600ae_fmu.h index 691ffc7b7..f7dcbb8a2 100644 --- a/include/drivers/arm/gic600ae_fmu.h +++ b/include/drivers/arm/gic600ae_fmu.h @@ -100,12 +100,17 @@ #define ITS_FMU_CLKGATE_ERROR U(14) /* ERRSTATUS bits */ -#define FMU_ERRSTATUS_V_BIT BIT(30) -#define FMU_ERRSTATUS_UE_BIT BIT(29) -#define FMU_ERRSTATUS_OV_BIT BIT(27) -#define FMU_ERRSTATUS_CE_BITS (BIT(25) | BIT(24)) -#define FMU_ERRSTATUS_CLEAR (FMU_ERRSTATUS_V_BIT | FMU_ERRSTATUS_UE_BIT | \ - FMU_ERRSTATUS_OV_BIT | FMU_ERRSTATUS_CE_BITS) +#define FMU_ERRSTATUS_BLKID_SHIFT U(32) +#define FMU_ERRSTATUS_BLKID_MASK U(0xFF) +#define FMU_ERRSTATUS_V_BIT BIT(30) +#define FMU_ERRSTATUS_UE_BIT BIT(29) +#define FMU_ERRSTATUS_OV_BIT BIT(27) +#define FMU_ERRSTATUS_CE_BITS (BIT(25) | BIT(24)) +#define FMU_ERRSTATUS_CLEAR (FMU_ERRSTATUS_V_BIT | FMU_ERRSTATUS_UE_BIT | \ + FMU_ERRSTATUS_OV_BIT | FMU_ERRSTATUS_CE_BITS) +#define FMU_ERRSTATUS_IERR_MASK U(0xFF) +#define FMU_ERRSTATUS_IERR_SHIFT U(8) +#define FMU_ERRSTATUS_SERR_MASK U(0xFF) /* PINGCTLR constants */ #define FMU_PINGCTLR_INTDIFF_SHIFT U(16) @@ -142,6 +147,8 @@ void gic600_fmu_init(uint64_t base, uint64_t blk_present_mask, bool errctlr_ce_e void gic600_fmu_enable_ping(uint64_t base, uint64_t blk_present_mask, unsigned int timeout_val, unsigned int interval_diff); void gic600_fmu_print_sm_info(uint64_t base, unsigned int blk, unsigned int smid); +int gic600_fmu_probe(uint64_t base, int *probe_data); +int gic600_fmu_ras_handler(uint64_t base, int probe_data); #endif /* __ASSEMBLER__ */ From 3f0094c15d433cd3de413a4633a4ac2b8e1d1f2e Mon Sep 17 00:00:00 2001 From: Varun Wadekar Date: Tue, 25 Jan 2022 03:39:28 -0800 Subject: [PATCH 2/3] feat(gic600ae_fmu): disable SMID for unavailable blocks This patch updates the gic600_fmu_init function to disable all safety mechanisms for a block ID that is not present on the platform. All safety mechanisms for GIC-600AE are enabled by default and should be disabled for blocks that are not present on the platform to avoid false positive RAS errors. Change-Id: I52dc3bee9a8b49fd2e51d7ed851fdc803a48e6e3 Signed-off-by: Varun Wadekar --- drivers/arm/gic/v3/gic600ae_fmu.c | 6 ++- drivers/arm/gic/v3/gic600ae_fmu_helpers.c | 46 ++++++++++++++++++++++- include/drivers/arm/gic600ae_fmu.h | 7 ++-- 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/drivers/arm/gic/v3/gic600ae_fmu.c b/drivers/arm/gic/v3/gic600ae_fmu.c index 2233bbf93..efca08fdd 100644 --- a/drivers/arm/gic/v3/gic600ae_fmu.c +++ b/drivers/arm/gic/v3/gic600ae_fmu.c @@ -268,8 +268,12 @@ void gic600_fmu_init(uint64_t base, uint64_t blk_present_mask, /* Enable error detection for all error records */ for (unsigned int i = 0U; i < num_blk; i++) { - /* Skip next steps if the block is not present */ + /* + * Disable all safety mechanisms for blocks that are not + * present and skip the next steps. + */ if ((blk_present_mask & BIT(i)) == 0U) { + gic_fmu_disable_all_sm_blkid(base, i); continue; } diff --git a/drivers/arm/gic/v3/gic600ae_fmu_helpers.c b/drivers/arm/gic/v3/gic600ae_fmu_helpers.c index 4aa0efb32..09806dcfc 100644 --- a/drivers/arm/gic/v3/gic600ae_fmu_helpers.c +++ b/drivers/arm/gic/v3/gic600ae_fmu_helpers.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA Corporation. All rights reserved. + * Copyright (c) 2021-2022, NVIDIA Corporation. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -258,3 +258,47 @@ void gic_fmu_write_pingmask(uintptr_t base, uint64_t val) { GIC_FMU_WRITE_64(base, GICFMU_PINGMASK, 0, val); } + +/* + * Helper function to disable all safety mechanisms for a given block + */ +void gic_fmu_disable_all_sm_blkid(uintptr_t base, unsigned int blkid) +{ + uint32_t smen, max_smid = U(0); + + /* Sanity check block ID */ + assert((blkid >= FMU_BLK_GICD) && (blkid <= FMU_BLK_PPI31)); + + /* Find the max safety mechanism ID for the block */ + switch (blkid) { + case FMU_BLK_GICD: + max_smid = FMU_SMID_GICD_MAX; + break; + + case FMU_BLK_SPICOL: + max_smid = FMU_SMID_SPICOL_MAX; + break; + + case FMU_BLK_WAKERQ: + max_smid = FMU_SMID_WAKERQ_MAX; + break; + + case FMU_BLK_ITS0...FMU_BLK_ITS7: + max_smid = FMU_SMID_ITS_MAX; + break; + + case FMU_BLK_PPI0...FMU_BLK_PPI31: + max_smid = FMU_SMID_PPI_MAX; + break; + + default: + assert(false); + break; + } + + /* Disable all Safety Mechanisms for a given block id */ + for (unsigned int i = 0U; i < max_smid; i++) { + smen = (blkid << FMU_SMEN_BLK_SHIFT) | (i << FMU_SMEN_SMID_SHIFT); + gic_fmu_write_smen(base, smen); + } +} diff --git a/include/drivers/arm/gic600ae_fmu.h b/include/drivers/arm/gic600ae_fmu.h index f7dcbb8a2..904a5f984 100644 --- a/include/drivers/arm/gic600ae_fmu.h +++ b/include/drivers/arm/gic600ae_fmu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA Corporation. All rights reserved. + * Copyright (c) 2021-2022, NVIDIA Corporation. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -86,10 +86,10 @@ /* Safety Mechamism limit */ #define FMU_SMID_GICD_MAX U(33) +#define FMU_SMID_PPI_MAX U(12) +#define FMU_SMID_ITS_MAX U(14) #define FMU_SMID_SPICOL_MAX U(5) #define FMU_SMID_WAKERQ_MAX U(2) -#define FMU_SMID_ITS_MAX U(14) -#define FMU_SMID_PPI_MAX U(12) /* MBIST Safety Mechanism ID */ #define GICD_MBIST_REQ_ERROR U(23) @@ -142,6 +142,7 @@ void gic_fmu_write_pingnow(uintptr_t base, uint32_t val); void gic_fmu_write_smen(uintptr_t base, uint32_t val); void gic_fmu_write_sminjerr(uintptr_t base, uint32_t val); void gic_fmu_write_pingmask(uintptr_t base, uint64_t val); +void gic_fmu_disable_all_sm_blkid(uintptr_t base, unsigned int blkid); void gic600_fmu_init(uint64_t base, uint64_t blk_present_mask, bool errctlr_ce_en, bool errctlr_ue_en); void gic600_fmu_enable_ping(uint64_t base, uint64_t blk_present_mask, From 6a1c17c770139c00395783e7568220d61264c247 Mon Sep 17 00:00:00 2001 From: Varun Wadekar Date: Wed, 26 Jan 2022 00:33:02 -0800 Subject: [PATCH 3/3] feat(gic600ae_fmu): enable all GICD, PPI, ITS SMs The following SMIDs are disabled by default. * GICD: MBIST REQ error and GICD FMU ClkGate override * PPI: MBIST REQ error and PPI FMU ClkGate override * ITS: MBIST REQ error and ITS FMU ClkGate override This patch explicitly enables them during the FMU init sequence. Change-Id: I573e64786e3318d4cbcd07d0a1caf25f8e6e9200 Signed-off-by: Varun Wadekar --- drivers/arm/gic/v3/gic600ae_fmu.c | 18 ++++++++++++------ include/drivers/arm/gic600ae_fmu.h | 1 + 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/arm/gic/v3/gic600ae_fmu.c b/drivers/arm/gic/v3/gic600ae_fmu.c index efca08fdd..0262f4835 100644 --- a/drivers/arm/gic/v3/gic600ae_fmu.c +++ b/drivers/arm/gic/v3/gic600ae_fmu.c @@ -302,22 +302,26 @@ void gic600_fmu_init(uint64_t base, uint64_t blk_present_mask, */ if ((blk_present_mask & BIT(FMU_BLK_GICD)) != 0U) { smen = (GICD_MBIST_REQ_ERROR << FMU_SMEN_SMID_SHIFT) | - (FMU_BLK_GICD << FMU_SMEN_BLK_SHIFT); + (FMU_BLK_GICD << FMU_SMEN_BLK_SHIFT) | + FMU_SMEN_EN_BIT; gic_fmu_write_smen(base, smen); smen = (GICD_FMU_CLKGATE_ERROR << FMU_SMEN_SMID_SHIFT) | - (FMU_BLK_GICD << FMU_SMEN_BLK_SHIFT); + (FMU_BLK_GICD << FMU_SMEN_BLK_SHIFT) | + FMU_SMEN_EN_BIT; gic_fmu_write_smen(base, smen); } for (unsigned int i = FMU_BLK_PPI0; i < FMU_BLK_PPI31; i++) { if ((blk_present_mask & BIT(i)) != 0U) { smen = (PPI_MBIST_REQ_ERROR << FMU_SMEN_SMID_SHIFT) | - (i << FMU_SMEN_BLK_SHIFT); + (i << FMU_SMEN_BLK_SHIFT) | + FMU_SMEN_EN_BIT; gic_fmu_write_smen(base, smen); smen = (PPI_FMU_CLKGATE_ERROR << FMU_SMEN_SMID_SHIFT) | - (i << FMU_SMEN_BLK_SHIFT); + (i << FMU_SMEN_BLK_SHIFT) | + FMU_SMEN_EN_BIT; gic_fmu_write_smen(base, smen); } } @@ -325,11 +329,13 @@ void gic600_fmu_init(uint64_t base, uint64_t blk_present_mask, for (unsigned int i = FMU_BLK_ITS0; i < FMU_BLK_ITS7; i++) { if ((blk_present_mask & BIT(i)) != 0U) { smen = (ITS_MBIST_REQ_ERROR << FMU_SMEN_SMID_SHIFT) | - (i << FMU_SMEN_BLK_SHIFT); + (i << FMU_SMEN_BLK_SHIFT) | + FMU_SMEN_EN_BIT; gic_fmu_write_smen(base, smen); smen = (ITS_FMU_CLKGATE_ERROR << FMU_SMEN_SMID_SHIFT) | - (i << FMU_SMEN_BLK_SHIFT); + (i << FMU_SMEN_BLK_SHIFT) | + FMU_SMEN_EN_BIT; gic_fmu_write_smen(base, smen); } } diff --git a/include/drivers/arm/gic600ae_fmu.h b/include/drivers/arm/gic600ae_fmu.h index 904a5f984..88b87b920 100644 --- a/include/drivers/arm/gic600ae_fmu.h +++ b/include/drivers/arm/gic600ae_fmu.h @@ -37,6 +37,7 @@ /* SMEN constants */ #define FMU_SMEN_BLK_SHIFT U(8) #define FMU_SMEN_SMID_SHIFT U(24) +#define FMU_SMEN_EN_BIT BIT(0) /* Error record IDs */ #define FMU_BLK_GICD U(0)