diff --git a/docs/plat/marvell/armada/build.rst b/docs/plat/marvell/armada/build.rst index 8af27b1b5..b12514469 100644 --- a/docs/plat/marvell/armada/build.rst +++ b/docs/plat/marvell/armada/build.rst @@ -153,6 +153,29 @@ A7K/8K/CN913x specific build options: Armada37x0 specific build options: +- HANDLE_EA_EL3_FIRST + + When ``HANDLE_EA_EL3_FIRST=1``, External Aborts and SError Interrupts will be always trapped + in TF-A. TF-A in this case enables dirty hack / workaround for a bug found in U-Boot and + Linux kernel PCIe controller driver pci-aardvark.c, traps and then masks SError interrupt + caused by AXI SLVERR on external access (syndrome 0xbf000002). + + Otherwise when ``HANDLE_EA_EL3_FIRST=0``, these exceptions will be trapped in the current + exception level (or in EL1 if the current exception level is EL0). So exceptions caused by + U-Boot will be trapped in U-Boot, exceptions caused by Linux kernel (or user applications) + will be trapped in Linux kernel. + + Mentioned bug in pci-aardvark.c driver is fixed in U-Boot version v2021.07 and Linux kernel + version v5.13 (workarounded since Linux kernel version 5.9) and also backported in Linux + kernel stable releases since versions v5.12.13, v5.10.46, v5.4.128, v4.19.198, v4.14.240. + + If target system has already patched version of U-Boot and Linux kernel then it is strongly + recommended to not enable this workaround as it disallows propagating of all External Aborts + to running Linux kernel and makes correctable errors as fatal aborts. + + This option is now disabled by default. In past this option was enabled by default in + TF-A versions v2.2, v2.3, v2.4 and v2.5. + - CM3_SYSTEM_RESET When ``CM3_SYSTEM_RESET=1``, the Cortex-M3 secure coprocessor will be used for system reset. diff --git a/plat/marvell/armada/a3k/common/a3700_common.mk b/plat/marvell/armada/a3k/common/a3700_common.mk index 0a8974293..238587d6a 100644 --- a/plat/marvell/armada/a3k/common/a3700_common.mk +++ b/plat/marvell/armada/a3k/common/a3700_common.mk @@ -13,7 +13,6 @@ PLAT_INCLUDE_BASE := $(MARVELL_PLAT_INCLUDE_BASE)/$(PLAT_FAMILY) PLAT_COMMON_BASE := $(PLAT_FAMILY_BASE)/common MARVELL_DRV_BASE := drivers/marvell MARVELL_COMMON_BASE := $(MARVELL_PLAT_BASE)/common -HANDLE_EA_EL3_FIRST := 1 include plat/marvell/marvell.mk @@ -53,7 +52,6 @@ BL31_SOURCES += lib/cpus/aarch64/cortex_a53.S \ $(PLAT_COMMON_BASE)/dram_win.c \ $(PLAT_COMMON_BASE)/io_addr_dec.c \ $(PLAT_COMMON_BASE)/marvell_plat_config.c \ - $(PLAT_COMMON_BASE)/a3700_ea.c \ $(PLAT_FAMILY_BASE)/$(PLAT)/plat_bl31_setup.c \ $(MARVELL_COMMON_BASE)/marvell_cci.c \ $(MARVELL_COMMON_BASE)/marvell_ddr_info.c \ @@ -63,6 +61,10 @@ BL31_SOURCES += lib/cpus/aarch64/cortex_a53.S \ $(PLAT_COMMON_BASE)/a3700_sip_svc.c \ $(MARVELL_DRV) +ifeq ($(HANDLE_EA_EL3_FIRST),1) +BL31_SOURCES += $(PLAT_COMMON_BASE)/a3700_ea.c +endif + ifeq ($(CM3_SYSTEM_RESET),1) BL31_SOURCES += $(PLAT_COMMON_BASE)/cm3_system_reset.c endif diff --git a/plat/marvell/armada/a3k/common/a3700_ea.c b/plat/marvell/armada/a3k/common/a3700_ea.c index 3a4f7203b..4a58fc6a4 100644 --- a/plat/marvell/armada/a3k/common/a3700_ea.c +++ b/plat/marvell/armada/a3k/common/a3700_ea.c @@ -8,14 +8,79 @@ #include #include #include +#include -#define ADVK_SERROR_SYNDROME 0xbf000002 +#define A53_SERR_INT_AXI_SLVERR_ON_EXTERNAL_ACCESS 0xbf000002 +#if !ENABLE_BACKTRACE +static const char *get_el_str(unsigned int el) +{ + if (el == MODE_EL3) { + return "EL3"; + } else if (el == MODE_EL2) { + return "EL2"; + } + return "S-EL1"; +} +#endif /* !ENABLE_BACKTRACE */ + +/* + * This source file with custom plat_ea_handler function is compiled only when + * building TF-A with compile option HANDLE_EA_EL3_FIRST=1 + */ void plat_ea_handler(unsigned int ea_reason, uint64_t syndrome, void *cookie, void *handle, uint64_t flags) { - if (syndrome == ADVK_SERROR_SYNDROME) + unsigned int level = (unsigned int)GET_EL(read_spsr_el3()); + + /* + * Asynchronous External Abort with syndrome 0xbf000002 on Cortex A53 + * core means SError interrupt caused by AXI SLVERR on external access. + * + * In most cases this indicates a bug in U-Boot or Linux kernel driver + * pci-aardvark.c which implements access to A3700 PCIe config space. + * Driver does not wait for PCIe PIO transfer completion and try to + * start a new PCIe PIO transfer while previous has not finished yet. + * A3700 PCIe controller in this case sends SLVERR via AXI which results + * in a fatal Asynchronous SError interrupt on Cortex A53 CPU. + * + * Following patches fix that bug in U-Boot and Linux kernel drivers: + * https://source.denx.de/u-boot/u-boot/-/commit/eccbd4ad8e4e182638eafbfb87ac139c04f24a01 + * https://git.kernel.org/stable/c/f18139966d072dab8e4398c95ce955a9742e04f7 + * + * As a hacky workaround for unpatched U-Boot and Linux kernel drivers + * ignore all asynchronous aborts with that syndrome value received on + * CPU from level lower than EL3. + * + * Because these aborts are delivered on CPU asynchronously, they are + * imprecise and we cannot check the real reason of abort and neither + * who and why sent this abort. We expect that on A3700 it is always + * PCIe controller. + * + * Hence ignoring all aborts with this syndrome value is just a giant + * hack that we need only because of bugs in old U-Boot and Linux kernel + * versions and because it was decided that TF-A would implement this + * hack for U-Boot and Linux kernel it in this way. New patched U-Boot + * and kernel versions do not need it anymore. + * + * Links to discussion about this workaround: + * https://lore.kernel.org/linux-pci/20190316161243.29517-1-repk@triplefau.lt/ + * https://lore.kernel.org/linux-pci/971be151d24312cc533989a64bd454b4@www.loen.fr/ + * https://review.trustedfirmware.org/c/TF-A/trusted-firmware-a/+/1541 + */ + if (level < MODE_EL3 && ea_reason == ERROR_EA_ASYNC && + syndrome == A53_SERR_INT_AXI_SLVERR_ON_EXTERNAL_ACCESS) { + ERROR_NL(); + ERROR("Ignoring Asynchronous External Abort with" + " syndrome 0x%llx received on 0x%lx from %s\n", + syndrome, read_mpidr_el1(), get_el_str(level)); + ERROR("SError interrupt: AXI SLVERR on external access\n"); + ERROR("This indicates a bug in pci-aardvark.c driver\n"); + ERROR("Please update U-Boot/Linux to the latest version\n"); + ERROR_NL(); + console_flush(); return; + } plat_default_ea_handler(ea_reason, syndrome, cookie, handle, flags); }