Merge changes from topic "tegra194-ras-handling" into integration

* changes:
  Tegra194: ras: verbose prints for SErrors
  Prevent RAS register access from lower ELs
  Tegra194: SiP: clear RAS corrected error records
  Tegra194: add RAS exception handling
This commit is contained in:
Manish Pandey 2020-06-16 09:55:36 +00:00 committed by TrustedFirmware Code Review
commit 5eeb091ade
19 changed files with 805 additions and 2 deletions

View File

@ -900,6 +900,7 @@ $(eval $(call assert_boolean,USE_SPINLOCK_CAS))
$(eval $(call assert_boolean,ENCRYPT_BL31))
$(eval $(call assert_boolean,ENCRYPT_BL32))
$(eval $(call assert_boolean,ERRATA_SPECULATIVE_AT))
$(eval $(call assert_boolean,RAS_TRAP_LOWER_EL_ERR_ACCESS))
$(eval $(call assert_numeric,ARM_ARCH_MAJOR))
$(eval $(call assert_numeric,ARM_ARCH_MINOR))
@ -979,6 +980,7 @@ $(eval $(call add_define,BL2_IN_XIP_MEM))
$(eval $(call add_define,BL2_INV_DCACHE))
$(eval $(call add_define,USE_SPINLOCK_CAS))
$(eval $(call add_define,ERRATA_SPECULATIVE_AT))
$(eval $(call add_define,RAS_TRAP_LOWER_EL_ERR_ACCESS))
ifeq (${SANITIZE_UB},trap)
$(eval $(call add_define,MONITOR_TRAPS))

View File

@ -32,7 +32,8 @@ introduced by the RAS extensions.
The build option ``RAS_EXTENSION`` when set to ``1`` includes the RAS in run
time firmware; ``EL3_EXCEPTION_HANDLING`` and ``HANDLE_EA_EL3_FIRST`` must also
be set ``1``.
be set ``1``. ``RAS_TRAP_LOWER_EL_ERR_ACCESS`` controls the access to the RAS
error record registers from lower ELs.
.. _ras-figure:

View File

@ -707,6 +707,10 @@ Common build options
| 1530924 | Cortex-A53 |
+---------+--------------+
- ``RAS_TRAP_LOWER_EL_ERR_ACCESS``: This flag enables/disables the SCR_EL3.TERR
bit, to trap access to the RAS ERR and RAS ERX registers from lower ELs.
This flag is disabled by default.
GICv3 driver options
--------------------

View File

@ -342,6 +342,7 @@
#define SCR_EEL2_BIT (U(1) << 18)
#define SCR_API_BIT (U(1) << 17)
#define SCR_APK_BIT (U(1) << 16)
#define SCR_TERR_BIT (U(1) << 15)
#define SCR_TWE_BIT (U(1) << 13)
#define SCR_TWI_BIT (U(1) << 12)
#define SCR_ST_BIT (U(1) << 11)

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.
* Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@ -192,6 +193,7 @@ static inline int ras_err_ser_probe_sysreg(const struct err_record_info *info,
probe_data);
}
const char *ras_serr_to_str(unsigned int serr);
int ras_ea_handler(unsigned int ea_reason, uint64_t syndrome, void *cookie,
void *handle, uint64_t flags);
void ras_init(void);

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.
* Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@ -151,6 +152,9 @@
#define ERROR_STATUS_SET_UC 0x2 /* Uncontainable */
#define ERROR_STATUS_SET_CE 0x3 /* Corrected */
/* Number of architecturally-defined primary error codes */
#define ERROR_STATUS_NUM_SERR U(22)
/* Implementation Defined Syndrome bit in ESR */
#define SERROR_IDS_BIT U(24)

View File

@ -108,6 +108,14 @@ void cm_setup_context(cpu_context_t *ctx, const entry_point_info_t *ep)
if (EP_GET_ST(ep->h.attr) != 0U)
scr_el3 |= SCR_ST_BIT;
#if RAS_TRAP_LOWER_EL_ERR_ACCESS
/*
* SCR_EL3.TERR: Trap Error record accesses. Accesses to the RAS ERR
* and RAS ERX registers from EL1 and EL2 are trapped to EL3.
*/
scr_el3 |= SCR_TERR_BIT;
#endif
#if !HANDLE_EA_EL3_FIRST
/*
* SCR_EL3.EA: Do not route External Abort and SError Interrupt External

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2018-2019, ARM Limited and Contributors. All rights reserved.
* Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@ -18,6 +19,47 @@
# error Platform must define RAS priority value
#endif
/*
* Function to convert architecturally-defined primary error code SERR,
* bits[7:0] from ERR<n>STATUS to its corresponding error string.
*/
const char *ras_serr_to_str(unsigned int serr)
{
const char *str[ERROR_STATUS_NUM_SERR] = {
"No error",
"IMPLEMENTATION DEFINED error",
"Data value from (non-associative) internal memory",
"IMPLEMENTATION DEFINED pin",
"Assertion failure",
"Error detected on internal data path",
"Data value from associative memory",
"Address/control value from associative memory",
"Data value from a TLB",
"Address/control value from a TLB",
"Data value from producer",
"Address/control value from producer",
"Data value from (non-associative) external memory",
"Illegal address (software fault)",
"Illegal access (software fault)",
"Illegal state (software fault)",
"Internal data register",
"Internal control register",
"Error response from slave",
"External timeout",
"Internal timeout",
"Deferred error from slave not supported at master"
};
/*
* All other values are reserved. Reserved values might be defined
* in a future version of the architecture
*/
if (serr >= ERROR_STATUS_NUM_SERR)
return "unknown SERR";
return str[serr];
}
/* Handler that receives External Aborts on RAS-capable systems */
int ras_ea_handler(unsigned int ea_reason, uint64_t syndrome, void *cookie,
void *handle, uint64_t flags)

View File

@ -302,3 +302,6 @@ SUPPORT_STACK_MEMTAG := no
# Select workaround for AT speculative behaviour.
ERRATA_SPECULATIVE_AT := 0
# Trap RAS error record access from lower EL
RAS_TRAP_LOWER_EL_ERR_ACCESS := 0

View File

@ -95,6 +95,7 @@
* Platform macros to support exception handling framework
******************************************************************************/
#define PLAT_PRI_BITS U(3)
#define PLAT_RAS_PRI U(0x10)
#define PLAT_SDEI_CRITICAL_PRI U(0x20)
#define PLAT_SDEI_NORMAL_PRI U(0x30)
#define PLAT_TEGRA_WDT_PRIO U(0x40)

View File

@ -0,0 +1,263 @@
/*
* Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#ifndef TEGRA194_RAS_PRIVATE
#define TEGRA194_RAS_PRIVATE
#include <stdint.h>
/* Implementation defined RAS error and corresponding error message */
struct ras_error {
const char *error_msg;
/* IERR(bits[15:8]) from ERR<n>STATUS */
uint8_t error_code;
};
/* RAS error node-specific auxiliary data */
struct ras_aux_data {
/* name for current RAS node. */
const char *name;
/* point to null-terminated ras_error array to convert error code to msg. */
const struct ras_error *error_records;
/*
* function to return an value which needs to be programmed into ERXCTLR_EL1
* to enable all specified RAS errors for current node.
*/
uint64_t (*err_ctrl)(void);
};
/* IFU Uncorrectable RAS ERROR */
#define IFU_UNCORR_RAS_ERROR_LIST(X)
/* JSR_RET Uncorrectable RAS ERROR */
#define JSR_RET_UNCORR_RAS_ERROR_LIST(X) \
/* Name, ERR_CTRL, IERR, ISA Desc */ \
X(JSR_RET, 35, 0x13, "Floating Point Register File Parity Error") \
X(JSR_RET, 34, 0x12, "Integer Register File Parity Error") \
X(JSR_RET, 33, 0x11, "Garbage Bundle") \
X(JSR_RET, 32, 0x10, "Bundle Completion Timeout")
/* JSR_MTS Uncorrectable RAS ERROR */
#define JSR_MTS_UNCORR_RAS_ERROR_LIST(X) \
/* Name, ERR_CTRL, IERR, ISA Desc */ \
X(JSR_MTS, 40, 0x28, "CoreSight Access Error") \
X(JSR_MTS, 39, 0x27, "Dual Execution Uncorrectable Error") \
X(JSR_MTS, 37, 0x25, "CTU MMIO Region") \
X(JSR_MTS, 36, 0x24, "MTS MMCRAB Region Access") \
X(JSR_MTS, 35, 0x23, "MTS_CARVEOUT Access from ARM SW") \
X(JSR_MTS, 34, 0x22, "NAFLL PLL Failure to Lock") \
X(JSR_MTS, 32, 0x20, "Internal Uncorrectable MTS Error")
/* LSD_STQ Uncorrectable RAS ERROR */
#define LSD_STQ_UNCORR_RAS_ERROR_LIST(X) \
/* Name, ERR_CTRL, IERR, ISA Desc */ \
X(LSD_STQ, 41, 0x39, "Coherent Cache Data Store Multi-Line ECC Error") \
X(LSD_STQ, 40, 0x38, "Coherent Cache Data Store Uncorrectable ECC Error") \
X(LSD_STQ, 38, 0x36, "Coherent Cache Data Load Uncorrectable ECC Error") \
X(LSD_STQ, 33, 0x31, "Coherent Cache Tag Store Parity Error") \
X(LSD_STQ, 32, 0x30, "Coherent Cache Tag Load Parity Error")
/* LSD_DCC Uncorrectable RAS ERROR */
#define LSD_DCC_UNCORR_RAS_ERROR_LIST(X) \
/* Name, ERR_CTRL, IERR, ISA Desc */ \
X(LSD_DCC, 41, 0x49, "BTU Copy Mini-Cache PPN Multi-Hit Error") \
X(LSD_DCC, 39, 0x47, "Coherent Cache Data Uncorrectable ECC Error") \
X(LSD_DCC, 37, 0x45, "Version Cache Byte-Enable Parity Error") \
X(LSD_DCC, 36, 0x44, "Version Cache Data Uncorrectable ECC Error") \
X(LSD_DCC, 33, 0x41, "BTU Copy Coherent Cache PPN Parity Error") \
X(LSD_DCC, 32, 0x40, "BTU Copy Coherent Cache VPN Parity Error")
/* LSD_L1HPF Uncorrectable RAS ERROR */
#define LSD_L1HPF_UNCORR_RAS_ERROR_LIST(X)
/* L2 Uncorrectable RAS ERROR */
#define L2_UNCORR_RAS_ERROR_LIST(X) \
/* Name, ERR_CTRL, IERR, ISA Desc */ \
X(L2, 56, 0x68, "URT Timeout") \
X(L2, 55, 0x67, "L2 Protocol Violation") \
X(L2, 54, 0x66, "SCF to L2 Slave Error Read") \
X(L2, 53, 0x65, "SCF to L2 Slave Error Write") \
X(L2, 52, 0x64, "SCF to L2 Decode Error Read") \
X(L2, 51, 0x63, "SCF to L2 Decode Error Write") \
X(L2, 50, 0x62, "SCF to L2 Request Response Interface Parity Errors") \
X(L2, 49, 0x61, "SCF to L2 Advance notice interface parity errors") \
X(L2, 48, 0x60, "SCF to L2 Filldata Parity Errors") \
X(L2, 47, 0x5F, "SCF to L2 UnCorrectable ECC Data Error on interface") \
X(L2, 45, 0x5D, "Core 1 to L2 Parity Error") \
X(L2, 44, 0x5C, "Core 0 to L2 Parity Error") \
X(L2, 43, 0x5B, "L2 Multi-Hit") \
X(L2, 42, 0x5A, "L2 URT Tag Parity Error") \
X(L2, 41, 0x59, "L2 NTT Tag Parity Error") \
X(L2, 40, 0x58, "L2 MLT Tag Parity Error") \
X(L2, 39, 0x57, "L2 URD Data") \
X(L2, 38, 0x56, "L2 NTP Data") \
X(L2, 36, 0x54, "L2 MLC Uncorrectable Clean") \
X(L2, 35, 0x53, "L2 URD Uncorrectable Dirty") \
X(L2, 34, 0x52, "L2 MLC Uncorrectable Dirty")
/* CLUSTER_CLOCKS Uncorrectable RAS ERROR */
#define CLUSTER_CLOCKS_UNCORR_RAS_ERROR_LIST(X) \
/* Name, ERR_CTRL, IERR, ISA Desc */ \
X(CLUSTER_CLOCKS, 32, 0xE4, "Frequency Monitor Error")
/* MMU Uncorrectable RAS ERROR */
#define MMU_UNCORR_RAS_ERROR_LIST(X)
/* L3 Uncorrectable RAS ERROR */
#define L3_UNCORR_RAS_ERROR_LIST(X) \
/* Name, ERR_CTRL, IERR, ISA Desc */ \
X(L3, 43, 0x7B, "SNOC Interface Parity Error") \
X(L3, 42, 0x7A, "MCF Interface Parity Error") \
X(L3, 41, 0x79, "L3 Tag Parity Error") \
X(L3, 40, 0x78, "L3 Dir Parity Error") \
X(L3, 39, 0x77, "L3 Uncorrectable ECC Error") \
X(L3, 37, 0x75, "Multi-Hit CAM Error") \
X(L3, 36, 0x74, "Multi-Hit Tag Error") \
X(L3, 35, 0x73, "Unrecognized Command Error") \
X(L3, 34, 0x72, "L3 Protocol Error")
/* CCPMU Uncorrectable RAS ERROR */
#define CCPMU_UNCORR_RAS_ERROR_LIST(X) \
/* Name, ERR_CTRL, IERR, ISA Desc */ \
X(CCPMU, 40, 0x87, "CoreSight Access Error") \
X(CCPMU, 36, 0x84, "MCE Ucode Error") \
X(CCPMU, 35, 0x83, "MCE IL1 Parity Error") \
X(CCPMU, 34, 0x82, "MCE Timeout Error") \
X(CCPMU, 33, 0x81, "CRAB Access Error") \
X(CCPMU, 32, 0x80, "MCE Memory Access Error")
/* SCF_IOB Uncorrectable RAS ERROR */
#define SCF_IOB_UNCORR_RAS_ERROR_LIST(X) \
/* Name, ERR_CTRL, IERR, ISA Desc */ \
X(SCF_IOB, 41, 0x99, "Request parity error") \
X(SCF_IOB, 40, 0x98, "Putdata parity error") \
X(SCF_IOB, 39, 0x97, "Uncorrectable ECC on Putdata") \
X(SCF_IOB, 38, 0x96, "CBB Interface Error") \
X(SCF_IOB, 37, 0x95, "MMCRAB Error") \
X(SCF_IOB, 36, 0x94, "IHI Interface Error") \
X(SCF_IOB, 35, 0x93, "CRI Error") \
X(SCF_IOB, 34, 0x92, "TBX Interface Error") \
X(SCF_IOB, 33, 0x91, "EVP Interface Error")
/* SCF_SNOC Uncorrectable RAS ERROR */
#define SCF_SNOC_UNCORR_RAS_ERROR_LIST(X) \
/* Name, ERR_CTRL, IERR, ISA Desc */ \
X(SCF_SNOC, 42, 0xAA, "Misc Client Parity Error") \
X(SCF_SNOC, 41, 0xA9, "Misc Filldata Parity Error") \
X(SCF_SNOC, 40, 0xA8, "Uncorrectable ECC Misc Client") \
X(SCF_SNOC, 39, 0xA7, "DVMU Interface Parity Error") \
X(SCF_SNOC, 38, 0xA6, "DVMU Interface Timeout Error") \
X(SCF_SNOC, 37, 0xA5, "CPE Request Error") \
X(SCF_SNOC, 36, 0xA4, "CPE Response Error") \
X(SCF_SNOC, 35, 0xA3, "CPE Timeout Error") \
X(SCF_SNOC, 34, 0xA2, "Uncorrectable Carveout Error")
/* SCF_CTU Uncorrectable RAS ERROR */
#define SCF_CTU_UNCORR_RAS_ERROR_LIST(X) \
/* Name, ERR_CTRL, IERR, ISA Desc */ \
X(SCF_CTU, 39, 0xB7, "Timeout error for TRC_DMA request") \
X(SCF_CTU, 38, 0xB6, "Timeout error for CTU Snp") \
X(SCF_CTU, 37, 0xB5, "Parity error in CTU TAG RAM") \
X(SCF_CTU, 36, 0xB3, "Parity error in CTU DATA RAM") \
X(SCF_CTU, 35, 0xB4, "Parity error for Cluster Rsp") \
X(SCF_CTU, 34, 0xB2, "Parity error for TRL requests from 9 agents") \
X(SCF_CTU, 33, 0xB1, "Parity error for MCF request") \
X(SCF_CTU, 32, 0xB0, "TRC DMA fillsnoop parity error")
/* CMU_CLOCKS Uncorrectable RAS ERROR */
#define CMU_CLOCKS_UNCORR_RAS_ERROR_LIST(X) \
/* Name, ERR_CTRL, IERR, ISA Desc */ \
X(CMU_CLOCKS, 39, 0xC7, "Cluster 3 frequency monitor error") \
X(CMU_CLOCKS, 38, 0xC6, "Cluster 2 frequency monitor error") \
X(CMU_CLOCKS, 37, 0xC5, "Cluster 1 frequency monitor error") \
X(CMU_CLOCKS, 36, 0xC3, "Cluster 0 frequency monitor error") \
X(CMU_CLOCKS, 35, 0xC4, "Voltage error on ADC1 Monitored Logic") \
X(CMU_CLOCKS, 34, 0xC2, "Voltage error on ADC0 Monitored Logic") \
X(CMU_CLOCKS, 33, 0xC1, "Lookup Table 1 Parity Error") \
X(CMU_CLOCKS, 32, 0xC0, "Lookup Table 0 Parity Error")
/*
* Define one ras_error entry.
*
* This macro wille be used to to generate ras_error records for each node
* defined by <NODE_NAME>_UNCORR_RAS_ERROR_LIST macro.
*/
#define DEFINE_ONE_RAS_ERROR_MSG(unit, ras_bit, ierr, msg) \
{ \
.error_msg = (msg), \
.error_code = (ierr) \
},
/*
* Set one implementation defined bit in ERR<n>CTLR
*
* This macro will be used to collect all defined ERR_CTRL bits for each node
* defined by <NODE_NAME>_UNCORR_RAS_ERROR_LIST macro.
*/
#define DEFINE_ENABLE_RAS_BIT(unit, ras_bit, ierr, msg) \
do { \
val |= (1ULL << ras_bit##U); \
} while (0);
/* Represent one RAS node with 0 or more error bits (ERR_CTLR) enabled */
#define DEFINE_ONE_RAS_NODE(node) \
static const struct ras_error node##_uncorr_ras_errors[] = { \
node##_UNCORR_RAS_ERROR_LIST(DEFINE_ONE_RAS_ERROR_MSG) \
{ \
NULL, \
0U \
}, \
}; \
static inline uint64_t node##_err_ctrl(void) \
{ \
uint64_t val = 0ULL; \
node##_UNCORR_RAS_ERROR_LIST(DEFINE_ENABLE_RAS_BIT) \
return val; \
}
#define DEFINE_ONE_RAS_AUX_DATA(node) \
{ \
.name = #node, \
.error_records = node##_uncorr_ras_errors, \
.err_ctrl = &node##_err_ctrl \
},
#define PER_CORE_RAS_NODE_LIST(X) \
X(IFU) \
X(JSR_RET) \
X(JSR_MTS) \
X(LSD_STQ) \
X(LSD_DCC) \
X(LSD_L1HPF)
#define PER_CORE_RAS_GROUP_NODES PER_CORE_RAS_NODE_LIST(DEFINE_ONE_RAS_AUX_DATA)
#define PER_CLUSTER_RAS_NODE_LIST(X) \
X(L2) \
X(CLUSTER_CLOCKS) \
X(MMU)
#define PER_CLUSTER_RAS_GROUP_NODES PER_CLUSTER_RAS_NODE_LIST(DEFINE_ONE_RAS_AUX_DATA)
#define SCF_L3_BANK_RAS_NODE_LIST(X) X(L3)
/* we have 4 SCF_L3 nodes:3*256 + L3_Bank_ID(0-3) */
#define SCF_L3_BANK_RAS_GROUP_NODES \
SCF_L3_BANK_RAS_NODE_LIST(DEFINE_ONE_RAS_AUX_DATA) \
SCF_L3_BANK_RAS_NODE_LIST(DEFINE_ONE_RAS_AUX_DATA) \
SCF_L3_BANK_RAS_NODE_LIST(DEFINE_ONE_RAS_AUX_DATA) \
SCF_L3_BANK_RAS_NODE_LIST(DEFINE_ONE_RAS_AUX_DATA)
#define CCPLEX_RAS_NODE_LIST(X) \
X(CCPMU) \
X(SCF_IOB) \
X(SCF_SNOC) \
X(SCF_CTU) \
X(CMU_CLOCKS)
#define CCPLEX_RAS_GROUP_NODES CCPLEX_RAS_NODE_LIST(DEFINE_ONE_RAS_AUX_DATA)
#endif /* TEGRA194_RAS_PRIVATE */

View File

@ -89,7 +89,7 @@ int32_t plat_lock_cpu_vectors(void);
/* Declarations for tegra_fiq_glue.c */
void tegra_fiq_handler_setup(void);
int tegra_fiq_get_intr_context(void);
int32_t tegra_fiq_get_intr_context(void);
void tegra_fiq_set_ns_entrypoint(uint64_t entrypoint);
/* Declarations for tegra_security.c */
@ -157,4 +157,9 @@ int plat_sip_handler(uint32_t smc_fid,
void *handle,
uint64_t flags);
#if RAS_EXTENSION
void tegra194_ras_enable(void);
void tegra194_ras_corrected_err_clear(void);
#endif
#endif /* TEGRA_PRIVATE_H */

View File

@ -58,6 +58,7 @@ int32_t nvg_roc_clean_cache_trbits(void);
void nvg_enable_strict_checking_mode(void);
void nvg_system_shutdown(void);
void nvg_system_reboot(void);
void nvg_clear_hsm_corr_status(void);
/* declarations for assembly functions */
void nvg_set_request_data(uint64_t req, uint64_t data);
@ -71,5 +72,6 @@ uint64_t nvg_cache_inval_all(void);
void mce_enable_strict_checking(void);
void mce_system_shutdown(void);
void mce_system_reboot(void);
void mce_clear_hsm_corr_status(void);
#endif /* MCE_PRIVATE_H */

View File

@ -234,3 +234,11 @@ void mce_system_reboot(void)
{
nvg_system_reboot();
}
/*******************************************************************************
* Handler to clear CCPLEX->HSM correctable RAS error signal.
******************************************************************************/
void mce_clear_hsm_corr_status(void)
{
nvg_clear_hsm_corr_status();
}

View File

@ -236,3 +236,15 @@ void nvg_system_shutdown(void)
nvg_set_request_data((uint64_t)TEGRA_NVG_CHANNEL_SHUTDOWN,
(uint64_t)TEGRA_NVG_SHUTDOWN);
}
/*
* Request to clear CCPLEX->HSM correctable error signal.
* NVGDATA[1]: A write of 1 clears the CCPLEX->HSM correctable error signal,
* A write of 0 has no effect.
*/
void nvg_clear_hsm_corr_status(void)
{
nvg_hsm_error_ctrl_channel_t status = { .bits = { .corr = 1U, }, };
nvg_set_request_data((uint64_t)TEGRA_NVG_CHANNEL_HSM_ERROR_CTRL, status.flat);
}

View File

@ -0,0 +1,418 @@
/*
* Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <stdbool.h>
#include <stdint.h>
#include <common/debug.h>
#include <lib/bakery_lock.h>
#include <lib/extensions/ras.h>
#include <lib/utils_def.h>
#include <services/sdei.h>
#include <plat/common/platform.h>
#include <platform_def.h>
#include <tegra194_ras_private.h>
#include <tegra_def.h>
#include <tegra_platform.h>
#include <tegra_private.h>
/*
* ERR<n>FR bits[63:32], it indicates supported RAS errors which can be enabled
* by setting corresponding bits in ERR<n>CTLR
*/
#define ERR_FR_EN_BITS_MASK 0xFFFFFFFF00000000ULL
/* bakery lock for platform RAS handler. */
static DEFINE_BAKERY_LOCK(ras_handler_lock);
#define ras_lock() bakery_lock_get(&ras_handler_lock)
#define ras_unlock() bakery_lock_release(&ras_handler_lock)
/*
* Function to handle an External Abort received at EL3.
* This function is invoked by RAS framework.
*/
static void tegra194_ea_handler(unsigned int ea_reason, uint64_t syndrome,
void *cookie, void *handle, uint64_t flags)
{
int32_t ret;
ras_lock();
ERROR("MPIDR 0x%lx: exception reason=%u syndrome=0x%llx\n",
read_mpidr(), ea_reason, syndrome);
/* Call RAS EA handler */
ret = ras_ea_handler(ea_reason, syndrome, cookie, handle, flags);
if (ret != 0) {
ERROR("RAS error handled!\n");
ret = sdei_dispatch_event(TEGRA_SDEI_EP_EVENT_0 +
plat_my_core_pos());
if (ret != 0)
ERROR("sdei_dispatch_event returned %d\n", ret);
} else {
ERROR("Not a RAS error!\n");
}
ras_unlock();
}
/*
* Function to enable all supported RAS error report.
*
* Uncorrected errors are set to report as External abort (SError)
* Corrected errors are set to report as interrupt.
*/
void tegra194_ras_enable(void)
{
VERBOSE("%s\n", __func__);
/* skip RAS enablement if not a silicon platform. */
if (!tegra_platform_is_silicon()) {
return;
}
/*
* Iterate for each group(num_idx ERRSELRs starting from idx_start)
* use normal for loop instead of for_each_err_record_info to get rid
* of MISRA noise..
*/
for (uint32_t i = 0U; i < err_record_mappings.num_err_records; i++) {
const struct err_record_info *info = &err_record_mappings.err_records[i];
uint32_t idx_start = info->sysreg.idx_start;
uint32_t num_idx = info->sysreg.num_idx;
const struct ras_aux_data *aux_data = (const struct ras_aux_data *)info->aux_data;
assert(aux_data != NULL);
for (uint32_t j = 0; j < num_idx; j++) {
/* ERR<n>CTLR register value. */
uint64_t err_ctrl = 0ULL;
/* all supported errors for this node. */
uint64_t err_fr;
/* uncorrectable errors */
uint64_t uncorr_errs;
/* correctable errors */
uint64_t corr_errs;
/*
* Catch error if something wrong with the RAS aux data
* record table.
*/
assert(aux_data[j].err_ctrl != NULL);
/*
* Write to ERRSELR_EL1 to select the RAS error node.
* Always program this at first to select corresponding
* RAS node before any other RAS register r/w.
*/
ser_sys_select_record(idx_start + j);
err_fr = read_erxfr_el1() & ERR_FR_EN_BITS_MASK;
uncorr_errs = aux_data[j].err_ctrl();
corr_errs = ~uncorr_errs & err_fr;
/* enable error reporting */
ERR_CTLR_ENABLE_FIELD(err_ctrl, ED);
/* enable SError reporting for uncorrectable errors */
if ((uncorr_errs & err_fr) != 0ULL) {
ERR_CTLR_ENABLE_FIELD(err_ctrl, UE);
}
/* generate interrupt for corrected errors. */
if (corr_errs != 0ULL) {
ERR_CTLR_ENABLE_FIELD(err_ctrl, CFI);
}
/* enable the supported errors */
err_ctrl |= err_fr;
VERBOSE("errselr_el1:0x%x, erxfr:0x%llx, err_ctrl:0x%llx\n",
idx_start + j, err_fr, err_ctrl);
/* enable specified errors, or set to 0 if no supported error */
write_erxctlr_el1(err_ctrl);
/*
* Check if all the bit settings have been enabled to detect
* uncorrected/corrected errors, if not assert.
*/
assert(read_erxctlr_el1() == err_ctrl);
}
}
}
/*
* Function to clear RAS ERR<n>STATUS for corrected RAS error.
* This function ignores any new RAS error signaled during clearing; it is not
* multi-core safe(no ras_lock is taken to reduce overhead).
*/
void tegra194_ras_corrected_err_clear(void)
{
uint64_t clear_ce_status = 0ULL;
ERR_STATUS_SET_FIELD(clear_ce_status, AV, 0x1UL);
ERR_STATUS_SET_FIELD(clear_ce_status, V, 0x1UL);
ERR_STATUS_SET_FIELD(clear_ce_status, OF, 0x1UL);
ERR_STATUS_SET_FIELD(clear_ce_status, MV, 0x1UL);
ERR_STATUS_SET_FIELD(clear_ce_status, CE, 0x3UL);
for (uint32_t i = 0U; i < err_record_mappings.num_err_records; i++) {
const struct err_record_info *info = &err_record_mappings.err_records[i];
uint32_t idx_start = info->sysreg.idx_start;
uint32_t num_idx = info->sysreg.num_idx;
for (uint32_t j = 0U; j < num_idx; j++) {
uint64_t status;
uint32_t err_idx = idx_start + j;
write_errselr_el1(err_idx);
status = read_erxstatus_el1();
if (ERR_STATUS_GET_FIELD(status, CE) != 0U) {
write_erxstatus_el1(clear_ce_status);
}
}
}
}
/* Function to probe an error from error record group. */
static int32_t tegra194_ras_record_probe(const struct err_record_info *info,
int *probe_data)
{
/* Skip probing if not a silicon platform */
if (!tegra_platform_is_silicon()) {
return 0;
}
return ser_probe_sysreg(info->sysreg.idx_start, info->sysreg.num_idx, probe_data);
}
/* Function to handle error from one given node */
static int32_t tegra194_ras_node_handler(uint32_t errselr, const char *name,
const struct ras_error *errors, uint64_t status)
{
bool found = false;
uint32_t ierr = (uint32_t)ERR_STATUS_GET_FIELD(status, IERR);
uint32_t serr = (uint32_t)ERR_STATUS_GET_FIELD(status, SERR);
uint64_t val = 0;
/* not a valid error. */
if (ERR_STATUS_GET_FIELD(status, V) == 0U) {
return 0;
}
ERR_STATUS_SET_FIELD(val, V, 1);
/* keep the log print same as linux arm64_ras driver. */
ERROR("**************************************\n");
ERROR("RAS Error in %s, ERRSELR_EL1=0x%x:\n", name, errselr);
ERROR("\tStatus = 0x%llx\n", status);
/* Print uncorrectable errror information. */
if (ERR_STATUS_GET_FIELD(status, UE) != 0U) {
ERR_STATUS_SET_FIELD(val, UE, 1);
ERR_STATUS_SET_FIELD(val, UET, 1);
/* IERR to error message */
for (uint32_t i = 0; errors[i].error_msg != NULL; i++) {
if (ierr == errors[i].error_code) {
ERROR("\tIERR = %s: 0x%x\n",
errors[i].error_msg, ierr);
found = true;
break;
}
}
if (!found) {
ERROR("\tUnknown IERR: 0x%x\n", ierr);
}
ERROR("SERR = %s: 0x%x\n", ras_serr_to_str(serr), serr);
/* Overflow, multiple errors have been detected. */
if (ERR_STATUS_GET_FIELD(status, OF) != 0U) {
ERROR("\tOverflow (there may be more errors) - "
"Uncorrectable\n");
ERR_STATUS_SET_FIELD(val, OF, 1);
}
ERROR("\tUncorrectable (this is fatal)\n");
/* Miscellaneous Register Valid. */
if (ERR_STATUS_GET_FIELD(status, MV) != 0U) {
ERROR("\tMISC0 = 0x%lx\n", read_erxmisc0_el1());
ERROR("\tMISC1 = 0x%lx\n", read_erxmisc1_el1());
ERR_STATUS_SET_FIELD(val, MV, 1);
}
/* Address Valid. */
if (ERR_STATUS_GET_FIELD(status, AV) != 0U) {
ERROR("\tADDR = 0x%lx\n", read_erxaddr_el1());
ERR_STATUS_SET_FIELD(val, AV, 1);
}
/* Deferred error */
if (ERR_STATUS_GET_FIELD(status, DE) != 0U) {
ERROR("\tDeferred error\n");
ERR_STATUS_SET_FIELD(val, DE, 1);
}
} else {
/* For corrected error, simply clear it. */
VERBOSE("corrected RAS error is cleared: ERRSELR_EL1:0x%x, "
"IERR:0x%x, SERR:0x%x\n", errselr, ierr, serr);
ERR_STATUS_SET_FIELD(val, CE, 1);
}
ERROR("**************************************\n");
/* Write to clear reported errors. */
write_erxstatus_el1(val);
/* error handled */
return 0;
}
/* Function to handle one error node from an error record group. */
static int32_t tegra194_ras_record_handler(const struct err_record_info *info,
int probe_data, const struct err_handler_data *const data __unused)
{
uint32_t num_idx = info->sysreg.num_idx;
uint32_t idx_start = info->sysreg.idx_start;
const struct ras_aux_data *aux_data = info->aux_data;
const struct ras_error *errors;
uint32_t offset;
const char *node_name;
uint64_t status = 0ULL;
VERBOSE("%s\n", __func__);
assert(probe_data >= 0);
assert((uint32_t)probe_data < num_idx);
offset = (uint32_t)probe_data;
errors = aux_data[offset].error_records;
node_name = aux_data[offset].name;
assert(errors != NULL);
/* Write to ERRSELR_EL1 to select the error record */
ser_sys_select_record(idx_start + offset);
/* Retrieve status register from the error record */
status = read_erxstatus_el1();
return tegra194_ras_node_handler(idx_start + offset, node_name,
errors, status);
}
/* Instantiate RAS nodes */
PER_CORE_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE)
PER_CLUSTER_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE)
SCF_L3_BANK_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE)
CCPLEX_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE)
/* Instantiate RAS node groups */
static struct ras_aux_data per_core_ras_group[] = {
PER_CORE_RAS_GROUP_NODES
};
static struct ras_aux_data per_cluster_ras_group[] = {
PER_CLUSTER_RAS_GROUP_NODES
};
static struct ras_aux_data scf_l3_ras_group[] = {
SCF_L3_BANK_RAS_GROUP_NODES
};
static struct ras_aux_data ccplex_ras_group[] = {
CCPLEX_RAS_GROUP_NODES
};
/*
* We have same probe and handler for each error record group, use a macro to
* simply the record definition.
*/
#define ADD_ONE_ERR_GROUP(errselr_start, group) \
ERR_RECORD_SYSREG_V1((errselr_start), (uint32_t)ARRAY_SIZE((group)), \
&tegra194_ras_record_probe, \
&tegra194_ras_record_handler, (group))
/* RAS error record group information */
static struct err_record_info carmel_ras_records[] = {
/*
* Per core ras error records
* ERRSELR starts from 0*256 + Logical_CPU_ID*16 + 0 to
* 0*256 + Logical_CPU_ID*16 + 5 for each group.
* 8 cores/groups, 6 * 8 nodes in total.
*/
ADD_ONE_ERR_GROUP(0x000, per_core_ras_group),
ADD_ONE_ERR_GROUP(0x010, per_core_ras_group),
ADD_ONE_ERR_GROUP(0x020, per_core_ras_group),
ADD_ONE_ERR_GROUP(0x030, per_core_ras_group),
ADD_ONE_ERR_GROUP(0x040, per_core_ras_group),
ADD_ONE_ERR_GROUP(0x050, per_core_ras_group),
ADD_ONE_ERR_GROUP(0x060, per_core_ras_group),
ADD_ONE_ERR_GROUP(0x070, per_core_ras_group),
/*
* Per cluster ras error records
* ERRSELR starts from 2*256 + Logical_Cluster_ID*16 + 0 to
* 2*256 + Logical_Cluster_ID*16 + 3.
* 4 clusters/groups, 3 * 4 nodes in total.
*/
ADD_ONE_ERR_GROUP(0x200, per_cluster_ras_group),
ADD_ONE_ERR_GROUP(0x210, per_cluster_ras_group),
ADD_ONE_ERR_GROUP(0x220, per_cluster_ras_group),
ADD_ONE_ERR_GROUP(0x230, per_cluster_ras_group),
/*
* SCF L3_Bank ras error records
* ERRSELR: 3*256 + L3_Bank_ID, L3_Bank_ID: 0-3
* 1 groups, 4 nodes in total.
*/
ADD_ONE_ERR_GROUP(0x300, scf_l3_ras_group),
/*
* CCPLEX ras error records
* ERRSELR: 4*256 + Unit_ID, Unit_ID: 0 - 4
* 1 groups, 5 nodes in total.
*/
ADD_ONE_ERR_GROUP(0x400, ccplex_ras_group),
};
REGISTER_ERR_RECORD_INFO(carmel_ras_records);
/* dummy RAS interrupt */
static struct ras_interrupt carmel_ras_interrupts[] = {};
REGISTER_RAS_INTERRUPTS(carmel_ras_interrupts);
/*******************************************************************************
* RAS handler for the platform
******************************************************************************/
void plat_ea_handler(unsigned int ea_reason, uint64_t syndrome, void *cookie,
void *handle, uint64_t flags)
{
#if RAS_EXTENSION
tegra194_ea_handler(ea_reason, syndrome, cookie, handle, flags);
#else
ERROR("Unhandled External Abort received on 0x%llx at EL3!\n",
read_mpidr_el1());
ERROR(" exception reason=%u syndrome=0x%lx\n", ea_reason, syndrome);
panic();
#endif
}

View File

@ -208,6 +208,11 @@ void plat_early_platform_setup(void)
/* sanity check MCE firmware compatibility */
mce_verify_firmware_version();
#if RAS_EXTENSION
/* Enable Uncorrectable RAS error */
tegra194_ras_enable();
#endif
/*
* Program XUSB STREAMIDs
* ======================

View File

@ -12,6 +12,7 @@
#include <common/debug.h>
#include <errno.h>
#include <mce.h>
#include <mce_private.h>
#include <memctrl.h>
#include <common/runtime_svc.h>
#include <tegra_private.h>
@ -23,6 +24,7 @@
* Tegra194 SiP SMCs
******************************************************************************/
#define TEGRA_SIP_GET_SMMU_PER 0xC200FF00U
#define TEGRA_SIP_CLEAR_RAS_CORRECTED_ERRORS 0xC200FF01U
/*******************************************************************************
* This function is responsible for handling all T194 SiP calls
@ -69,6 +71,15 @@ int32_t plat_sip_handler(uint32_t smc_fid,
break;
#if RAS_EXTENSION
case TEGRA_SIP_CLEAR_RAS_CORRECTED_ERRORS:
/* clear all RAS error records for corrected errors at first. */
tegra194_ras_corrected_err_clear();
/* clear HSM corrected error status. */
mce_clear_hsm_corr_status();
break;
#endif
default:
ret = -ENOTSUP;
break;

View File

@ -30,6 +30,10 @@ $(eval $(call add_define,MAX_XLAT_TABLES))
MAX_MMAP_REGIONS := 30
$(eval $(call add_define,MAX_MMAP_REGIONS))
# enable RAS handling
HANDLE_EA_EL3_FIRST := 1
RAS_EXTENSION := 1
# platform files
PLAT_INCLUDES += -Iplat/nvidia/tegra/include/t194 \
-I${SOC_DIR}/drivers/include
@ -56,3 +60,10 @@ BL31_SOURCES += drivers/ti/uart/aarch64/16550_console.S \
ifeq (${ENABLE_CONSOLE_SPE},1)
BL31_SOURCES += ${COMMON_DIR}/drivers/spe/shared_console.S
endif
# RAS sources
ifeq (${RAS_EXTENSION},1)
BL31_SOURCES += lib/extensions/ras/std_err_record.c \
lib/extensions/ras/ras_common.c \
${SOC_DIR}/plat_ras.c
endif