From 068fe919613197bf221c00fb84a1d94c66a7a8ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 26 Jun 2021 16:26:56 +0200 Subject: [PATCH 1/2] fix(plat/marvell/a3k): update information about PCIe abort hack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A3700 plat_ea_handler was introduced into TF-A codebase just because of bugs in U-Boot and Linux kernel PCIe controller driver pci-aardvark.c. These bugs were finally fixed in both U-Boot and Linux kernel drivers: https://source.denx.de/u-boot/u-boot/-/commit/eccbd4ad8e4e182638eafbfb87ac139c04f24a01 https://git.kernel.org/stable/c/f18139966d072dab8e4398c95ce955a9742e04f7 Add all these information into comments, including printing error message into a3k plat_ea_handler. Also check that abort is really asynchronous and comes from lower level than EL3. Signed-off-by: Pali Rohár Change-Id: I46318d221b39773d5e25b3a0221d7738736ffdf1 --- plat/marvell/armada/a3k/common/a3700_ea.c | 65 ++++++++++++++++++++++- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/plat/marvell/armada/a3k/common/a3700_ea.c b/plat/marvell/armada/a3k/common/a3700_ea.c index 3a4f7203b..dde92e38a 100644 --- a/plat/marvell/armada/a3k/common/a3700_ea.c +++ b/plat/marvell/armada/a3k/common/a3700_ea.c @@ -8,14 +8,75 @@ #include #include #include +#include -#define ADVK_SERROR_SYNDROME 0xbf000002 +#define A53_SERR_INT_AXI_SLVERR_ON_EXTERNAL_ACCESS 0xbf000002 + +#if !ENABLE_BACKTRACE +static const char *get_el_str(unsigned int el) +{ + if (el == MODE_EL3) { + return "EL3"; + } else if (el == MODE_EL2) { + return "EL2"; + } + return "S-EL1"; +} +#endif /* !ENABLE_BACKTRACE */ void plat_ea_handler(unsigned int ea_reason, uint64_t syndrome, void *cookie, void *handle, uint64_t flags) { - if (syndrome == ADVK_SERROR_SYNDROME) + unsigned int level = (unsigned int)GET_EL(read_spsr_el3()); + + /* + * Asynchronous External Abort with syndrome 0xbf000002 on Cortex A53 + * core means SError interrupt caused by AXI SLVERR on external access. + * + * In most cases this indicates a bug in U-Boot or Linux kernel driver + * pci-aardvark.c which implements access to A3700 PCIe config space. + * Driver does not wait for PCIe PIO transfer completion and try to + * start a new PCIe PIO transfer while previous has not finished yet. + * A3700 PCIe controller in this case sends SLVERR via AXI which results + * in a fatal Asynchronous SError interrupt on Cortex A53 CPU. + * + * Following patches fix that bug in U-Boot and Linux kernel drivers: + * https://source.denx.de/u-boot/u-boot/-/commit/eccbd4ad8e4e182638eafbfb87ac139c04f24a01 + * https://git.kernel.org/stable/c/f18139966d072dab8e4398c95ce955a9742e04f7 + * + * As a hacky workaround for unpatched U-Boot and Linux kernel drivers + * ignore all asynchronous aborts with that syndrome value received on + * CPU from level lower than EL3. + * + * Because these aborts are delivered on CPU asynchronously, they are + * imprecise and we cannot check the real reason of abort and neither + * who and why sent this abort. We expect that on A3700 it is always + * PCIe controller. + * + * Hence ignoring all aborts with this syndrome value is just a giant + * hack that we need only because of bugs in old U-Boot and Linux kernel + * versions and because it was decided that TF-A would implement this + * hack for U-Boot and Linux kernel it in this way. New patched U-Boot + * and kernel versions do not need it anymore. + * + * Links to discussion about this workaround: + * https://lore.kernel.org/linux-pci/20190316161243.29517-1-repk@triplefau.lt/ + * https://lore.kernel.org/linux-pci/971be151d24312cc533989a64bd454b4@www.loen.fr/ + * https://review.trustedfirmware.org/c/TF-A/trusted-firmware-a/+/1541 + */ + if (level < MODE_EL3 && ea_reason == ERROR_EA_ASYNC && + syndrome == A53_SERR_INT_AXI_SLVERR_ON_EXTERNAL_ACCESS) { + ERROR_NL(); + ERROR("Ignoring Asynchronous External Abort with" + " syndrome 0x%llx received on 0x%lx from %s\n", + syndrome, read_mpidr_el1(), get_el_str(level)); + ERROR("SError interrupt: AXI SLVERR on external access\n"); + ERROR("This indicates a bug in pci-aardvark.c driver\n"); + ERROR("Please update U-Boot/Linux to the latest version\n"); + ERROR_NL(); + console_flush(); return; + } plat_default_ea_handler(ea_reason, syndrome, cookie, handle, flags); } From 3017e932768c7357a1a41493c58323419e9a1ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 9 Jul 2021 15:10:27 +0200 Subject: [PATCH 2/2] fix(plat/marvell/a3k): disable HANDLE_EA_EL3_FIRST by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was enabled in commit 3c7dcdac5c50 ("marvell/a3700: Prevent SError accessing PCIe link while it is down") with a workaround for a bug found in U-Boot and Linux kernel driver pci-aardvark.c (PCIe controller driver for Armada 37xx SoC) which results in SError interrupt caused by AXI SLVERR on external access (syndrome 0xbf000002) and immediate kernel panic. Now when proper patches are in both U-Boot and Linux kernel projects, this workaround in TF-A should not have to be enabled by default anymore as it has unwanted side effects like propagating all external aborts, including non-fatal/correctable into EL3 and making them as fatal which cause immediate abort. Add documentation for HANDLE_EA_EL3_FIRST build option into Marvell Armada build section. Signed-off-by: Pali Rohár Change-Id: Ic92b65bf9923505ab682830afb66c2f6cec70491 --- docs/plat/marvell/armada/build.rst | 23 +++++++++++++++++++ .../marvell/armada/a3k/common/a3700_common.mk | 6 +++-- plat/marvell/armada/a3k/common/a3700_ea.c | 4 ++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/docs/plat/marvell/armada/build.rst b/docs/plat/marvell/armada/build.rst index ca84be6bc..b10fb3a4c 100644 --- a/docs/plat/marvell/armada/build.rst +++ b/docs/plat/marvell/armada/build.rst @@ -139,6 +139,29 @@ A7K/8K/CN913x specific build options: Armada37x0 specific build options: +- HANDLE_EA_EL3_FIRST + + When ``HANDLE_EA_EL3_FIRST=1``, External Aborts and SError Interrupts will be always trapped + in TF-A. TF-A in this case enables dirty hack / workaround for a bug found in U-Boot and + Linux kernel PCIe controller driver pci-aardvark.c, traps and then masks SError interrupt + caused by AXI SLVERR on external access (syndrome 0xbf000002). + + Otherwise when ``HANDLE_EA_EL3_FIRST=0``, these exceptions will be trapped in the current + exception level (or in EL1 if the current exception level is EL0). So exceptions caused by + U-Boot will be trapped in U-Boot, exceptions caused by Linux kernel (or user applications) + will be trapped in Linux kernel. + + Mentioned bug in pci-aardvark.c driver is fixed in U-Boot version v2021.07 and Linux kernel + version v5.13 (workarounded since Linux kernel version 5.9) and also backported in Linux + kernel stable releases since versions v5.12.13, v5.10.46, v5.4.128, v4.19.198, v4.14.240. + + If target system has already patched version of U-Boot and Linux kernel then it is strongly + recommended to not enable this workaround as it disallows propagating of all External Aborts + to running Linux kernel and makes correctable errors as fatal aborts. + + This option is now disabled by default. In past this option was enabled by default in + TF-A versions v2.2, v2.3, v2.4 and v2.5. + - CM3_SYSTEM_RESET When ``CM3_SYSTEM_RESET=1``, the Cortex-M3 secure coprocessor will be used for system reset. diff --git a/plat/marvell/armada/a3k/common/a3700_common.mk b/plat/marvell/armada/a3k/common/a3700_common.mk index 0a8974293..238587d6a 100644 --- a/plat/marvell/armada/a3k/common/a3700_common.mk +++ b/plat/marvell/armada/a3k/common/a3700_common.mk @@ -13,7 +13,6 @@ PLAT_INCLUDE_BASE := $(MARVELL_PLAT_INCLUDE_BASE)/$(PLAT_FAMILY) PLAT_COMMON_BASE := $(PLAT_FAMILY_BASE)/common MARVELL_DRV_BASE := drivers/marvell MARVELL_COMMON_BASE := $(MARVELL_PLAT_BASE)/common -HANDLE_EA_EL3_FIRST := 1 include plat/marvell/marvell.mk @@ -53,7 +52,6 @@ BL31_SOURCES += lib/cpus/aarch64/cortex_a53.S \ $(PLAT_COMMON_BASE)/dram_win.c \ $(PLAT_COMMON_BASE)/io_addr_dec.c \ $(PLAT_COMMON_BASE)/marvell_plat_config.c \ - $(PLAT_COMMON_BASE)/a3700_ea.c \ $(PLAT_FAMILY_BASE)/$(PLAT)/plat_bl31_setup.c \ $(MARVELL_COMMON_BASE)/marvell_cci.c \ $(MARVELL_COMMON_BASE)/marvell_ddr_info.c \ @@ -63,6 +61,10 @@ BL31_SOURCES += lib/cpus/aarch64/cortex_a53.S \ $(PLAT_COMMON_BASE)/a3700_sip_svc.c \ $(MARVELL_DRV) +ifeq ($(HANDLE_EA_EL3_FIRST),1) +BL31_SOURCES += $(PLAT_COMMON_BASE)/a3700_ea.c +endif + ifeq ($(CM3_SYSTEM_RESET),1) BL31_SOURCES += $(PLAT_COMMON_BASE)/cm3_system_reset.c endif diff --git a/plat/marvell/armada/a3k/common/a3700_ea.c b/plat/marvell/armada/a3k/common/a3700_ea.c index dde92e38a..4a58fc6a4 100644 --- a/plat/marvell/armada/a3k/common/a3700_ea.c +++ b/plat/marvell/armada/a3k/common/a3700_ea.c @@ -24,6 +24,10 @@ static const char *get_el_str(unsigned int el) } #endif /* !ENABLE_BACKTRACE */ +/* + * This source file with custom plat_ea_handler function is compiled only when + * building TF-A with compile option HANDLE_EA_EL3_FIRST=1 + */ void plat_ea_handler(unsigned int ea_reason, uint64_t syndrome, void *cookie, void *handle, uint64_t flags) {