Merge changes from topic "advk-serror" into integration

* changes:
  fix(plat/marvell/a3k): disable HANDLE_EA_EL3_FIRST by default
  fix(plat/marvell/a3k): update information about PCIe abort hack
This commit is contained in:
Madhukar Pappireddy 2021-09-08 00:04:15 +02:00 committed by TrustedFirmware Code Review
commit a138717d9e
3 changed files with 94 additions and 4 deletions

View File

@ -153,6 +153,29 @@ A7K/8K/CN913x specific build options:
Armada37x0 specific build options:
- HANDLE_EA_EL3_FIRST
When ``HANDLE_EA_EL3_FIRST=1``, External Aborts and SError Interrupts will be always trapped
in TF-A. TF-A in this case enables dirty hack / workaround for a bug found in U-Boot and
Linux kernel PCIe controller driver pci-aardvark.c, traps and then masks SError interrupt
caused by AXI SLVERR on external access (syndrome 0xbf000002).
Otherwise when ``HANDLE_EA_EL3_FIRST=0``, these exceptions will be trapped in the current
exception level (or in EL1 if the current exception level is EL0). So exceptions caused by
U-Boot will be trapped in U-Boot, exceptions caused by Linux kernel (or user applications)
will be trapped in Linux kernel.
Mentioned bug in pci-aardvark.c driver is fixed in U-Boot version v2021.07 and Linux kernel
version v5.13 (workarounded since Linux kernel version 5.9) and also backported in Linux
kernel stable releases since versions v5.12.13, v5.10.46, v5.4.128, v4.19.198, v4.14.240.
If target system has already patched version of U-Boot and Linux kernel then it is strongly
recommended to not enable this workaround as it disallows propagating of all External Aborts
to running Linux kernel and makes correctable errors as fatal aborts.
This option is now disabled by default. In past this option was enabled by default in
TF-A versions v2.2, v2.3, v2.4 and v2.5.
- CM3_SYSTEM_RESET
When ``CM3_SYSTEM_RESET=1``, the Cortex-M3 secure coprocessor will be used for system reset.

View File

@ -13,7 +13,6 @@ PLAT_INCLUDE_BASE := $(MARVELL_PLAT_INCLUDE_BASE)/$(PLAT_FAMILY)
PLAT_COMMON_BASE := $(PLAT_FAMILY_BASE)/common
MARVELL_DRV_BASE := drivers/marvell
MARVELL_COMMON_BASE := $(MARVELL_PLAT_BASE)/common
HANDLE_EA_EL3_FIRST := 1
include plat/marvell/marvell.mk
@ -53,7 +52,6 @@ BL31_SOURCES += lib/cpus/aarch64/cortex_a53.S \
$(PLAT_COMMON_BASE)/dram_win.c \
$(PLAT_COMMON_BASE)/io_addr_dec.c \
$(PLAT_COMMON_BASE)/marvell_plat_config.c \
$(PLAT_COMMON_BASE)/a3700_ea.c \
$(PLAT_FAMILY_BASE)/$(PLAT)/plat_bl31_setup.c \
$(MARVELL_COMMON_BASE)/marvell_cci.c \
$(MARVELL_COMMON_BASE)/marvell_ddr_info.c \
@ -63,6 +61,10 @@ BL31_SOURCES += lib/cpus/aarch64/cortex_a53.S \
$(PLAT_COMMON_BASE)/a3700_sip_svc.c \
$(MARVELL_DRV)
ifeq ($(HANDLE_EA_EL3_FIRST),1)
BL31_SOURCES += $(PLAT_COMMON_BASE)/a3700_ea.c
endif
ifeq ($(CM3_SYSTEM_RESET),1)
BL31_SOURCES += $(PLAT_COMMON_BASE)/cm3_system_reset.c
endif

View File

@ -8,14 +8,79 @@
#include <common/debug.h>
#include <arch_helpers.h>
#include <plat/common/platform.h>
#include <bl31/ea_handle.h>
#define ADVK_SERROR_SYNDROME 0xbf000002
#define A53_SERR_INT_AXI_SLVERR_ON_EXTERNAL_ACCESS 0xbf000002
#if !ENABLE_BACKTRACE
static const char *get_el_str(unsigned int el)
{
if (el == MODE_EL3) {
return "EL3";
} else if (el == MODE_EL2) {
return "EL2";
}
return "S-EL1";
}
#endif /* !ENABLE_BACKTRACE */
/*
* This source file with custom plat_ea_handler function is compiled only when
* building TF-A with compile option HANDLE_EA_EL3_FIRST=1
*/
void plat_ea_handler(unsigned int ea_reason, uint64_t syndrome, void *cookie,
void *handle, uint64_t flags)
{
if (syndrome == ADVK_SERROR_SYNDROME)
unsigned int level = (unsigned int)GET_EL(read_spsr_el3());
/*
* Asynchronous External Abort with syndrome 0xbf000002 on Cortex A53
* core means SError interrupt caused by AXI SLVERR on external access.
*
* In most cases this indicates a bug in U-Boot or Linux kernel driver
* pci-aardvark.c which implements access to A3700 PCIe config space.
* Driver does not wait for PCIe PIO transfer completion and try to
* start a new PCIe PIO transfer while previous has not finished yet.
* A3700 PCIe controller in this case sends SLVERR via AXI which results
* in a fatal Asynchronous SError interrupt on Cortex A53 CPU.
*
* Following patches fix that bug in U-Boot and Linux kernel drivers:
* https://source.denx.de/u-boot/u-boot/-/commit/eccbd4ad8e4e182638eafbfb87ac139c04f24a01
* https://git.kernel.org/stable/c/f18139966d072dab8e4398c95ce955a9742e04f7
*
* As a hacky workaround for unpatched U-Boot and Linux kernel drivers
* ignore all asynchronous aborts with that syndrome value received on
* CPU from level lower than EL3.
*
* Because these aborts are delivered on CPU asynchronously, they are
* imprecise and we cannot check the real reason of abort and neither
* who and why sent this abort. We expect that on A3700 it is always
* PCIe controller.
*
* Hence ignoring all aborts with this syndrome value is just a giant
* hack that we need only because of bugs in old U-Boot and Linux kernel
* versions and because it was decided that TF-A would implement this
* hack for U-Boot and Linux kernel it in this way. New patched U-Boot
* and kernel versions do not need it anymore.
*
* Links to discussion about this workaround:
* https://lore.kernel.org/linux-pci/20190316161243.29517-1-repk@triplefau.lt/
* https://lore.kernel.org/linux-pci/971be151d24312cc533989a64bd454b4@www.loen.fr/
* https://review.trustedfirmware.org/c/TF-A/trusted-firmware-a/+/1541
*/
if (level < MODE_EL3 && ea_reason == ERROR_EA_ASYNC &&
syndrome == A53_SERR_INT_AXI_SLVERR_ON_EXTERNAL_ACCESS) {
ERROR_NL();
ERROR("Ignoring Asynchronous External Abort with"
" syndrome 0x%llx received on 0x%lx from %s\n",
syndrome, read_mpidr_el1(), get_el_str(level));
ERROR("SError interrupt: AXI SLVERR on external access\n");
ERROR("This indicates a bug in pci-aardvark.c driver\n");
ERROR("Please update U-Boot/Linux to the latest version\n");
ERROR_NL();
console_flush();
return;
}
plat_default_ea_handler(ea_reason, syndrome, cookie, handle, flags);
}