From 6a7b30057806c3573ee0194b7aa583bd60c89371 Mon Sep 17 00:00:00 2001 From: Soby Mathew Date: Fri, 12 Oct 2018 16:26:20 +0100 Subject: [PATCH 1/7] Add helper to return reference to a symbol This patch adds a utility function to return the address of a symbol. By default, the compiler generates adr/adrp instruction pair to return the reference and this utility is used to override this compiler generated to code and use `ldr` instruction. This is needed for Position Independent Executable when it needs to reference a symbol which is constant and does not depend on the execute address of the binary. For example, on the FVP, the GICv3 register context is stored in a secure carveout (arm_el3_tzc_dram) within DDR and does not relocate with the BL image. Now if BL31 is executing at a different address other than the compiled address, using adrp/adr instructions to reference this memory will not work as they generate an address that is PC relative. The way to get around this problem is to reference it as non-PC relative (i.e non-relocatable location) via `ldr` instruction. Change-Id: I5008a951b007144258121690afb68dc8e12ee6f7 Signed-off-by: Soby Mathew --- include/lib/utils.h | 23 +++++++++++++++++++++++ plat/arm/common/arm_gicv3.c | 30 +++++++++++++++++++++++------- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/include/lib/utils.h b/include/lib/utils.h index d46d8461d..f324a9909 100644 --- a/include/lib/utils.h +++ b/include/lib/utils.h @@ -67,6 +67,29 @@ void zero_normalmem(void *mem, u_register_t length); * zeroing. */ void zeromem(void *mem, u_register_t length); + +/* + * Utility function to return the address of a symbol. By default, the + * compiler generates adr/adrp instruction pair to return the reference + * to the symbol and this utility is used to override this compiler + * generated to code to use `ldr` instruction. + * + * This helps when Position Independent Executable needs to reference a symbol + * which is constant and does not depend on the execute address of the binary. + */ +#define DEFINE_LOAD_SYM_ADDR(_name) \ +static inline u_register_t load_addr_## _name(void) \ +{ \ + u_register_t v; \ + /* Create a void reference to silence compiler */ \ + (void) _name; \ + __asm__ volatile ("ldr %0, =" #_name : "=r" (v)); \ + return v; \ +} + +/* Helper to invoke the function defined by DEFINE_LOAD_SYM_ADDR() */ +#define LOAD_ADDR_OF(_name) (typeof(_name) *) load_addr_## _name() + #endif /* !(defined(__LINKER__) || defined(__ASSEMBLY__)) */ #endif /* __UTILS_H__ */ diff --git a/plat/arm/common/arm_gicv3.c b/plat/arm/common/arm_gicv3.c index e9e8a74d9..a43bff3d4 100644 --- a/plat/arm/common/arm_gicv3.c +++ b/plat/arm/common/arm_gicv3.c @@ -10,6 +10,7 @@ #include #include #include +#include /****************************************************************************** * The following functions are defined as weak to allow a platform to override @@ -33,10 +34,16 @@ static const interrupt_prop_t arm_interrupt_props[] = { /* * We save and restore the GICv3 context on system suspend. Allocate the - * data in the designated EL3 Secure carve-out memory + * data in the designated EL3 Secure carve-out memory. The `volatile` + * is used to prevent the compiler from removing the gicv3 contexts even + * though the DEFINE_LOAD_SYM_ADDR creates a dummy reference to it. */ -static gicv3_redist_ctx_t rdist_ctx __section("arm_el3_tzc_dram"); -static gicv3_dist_ctx_t dist_ctx __section("arm_el3_tzc_dram"); +static volatile gicv3_redist_ctx_t rdist_ctx __section("arm_el3_tzc_dram"); +static volatile gicv3_dist_ctx_t dist_ctx __section("arm_el3_tzc_dram"); + +/* Define accessor function to get reference to the GICv3 context */ +DEFINE_LOAD_SYM_ADDR(rdist_ctx) +DEFINE_LOAD_SYM_ADDR(dist_ctx) /* * MPIDR hashing function for translating MPIDRs read from GICR_TYPER register @@ -134,6 +141,10 @@ void plat_arm_gic_redistif_off(void) *****************************************************************************/ void plat_arm_gic_save(void) { + gicv3_redist_ctx_t * const rdist_context = + (gicv3_redist_ctx_t *)LOAD_ADDR_OF(rdist_ctx); + gicv3_dist_ctx_t * const dist_context = + (gicv3_dist_ctx_t *)LOAD_ADDR_OF(dist_ctx); /* * If an ITS is available, save its context before @@ -149,10 +160,10 @@ void plat_arm_gic_save(void) * we only need to save the context of the CPU that is issuing * the SYSTEM SUSPEND call, i.e. the current CPU. */ - gicv3_rdistif_save(plat_my_core_pos(), &rdist_ctx); + gicv3_rdistif_save(plat_my_core_pos(), rdist_context); /* Save the GIC Distributor context */ - gicv3_distif_save(&dist_ctx); + gicv3_distif_save(dist_context); /* * From here, all the components of the GIC can be safely powered down @@ -163,8 +174,13 @@ void plat_arm_gic_save(void) void plat_arm_gic_resume(void) { + const gicv3_redist_ctx_t *rdist_context = + (gicv3_redist_ctx_t *)LOAD_ADDR_OF(rdist_ctx); + const gicv3_dist_ctx_t *dist_context = + (gicv3_dist_ctx_t *)LOAD_ADDR_OF(dist_ctx); + /* Restore the GIC Distributor context */ - gicv3_distif_init_restore(&dist_ctx); + gicv3_distif_init_restore(dist_context); /* * Restore the GIC Redistributor and ITS contexts after the @@ -172,7 +188,7 @@ void plat_arm_gic_resume(void) * we only need to restore the context of the CPU that issued * the SYSTEM SUSPEND call. */ - gicv3_rdistif_init_restore(plat_my_core_pos(), &rdist_ctx); + gicv3_rdistif_init_restore(plat_my_core_pos(), rdist_context); /* * If an ITS is available, restore its context after From 3bd17c0fef46a7cd894974ac4c996801458f6d09 Mon Sep 17 00:00:00 2001 From: Soby Mathew Date: Tue, 28 Aug 2018 11:13:55 +0100 Subject: [PATCH 2/7] Basic Makefile changes for PIE Change-Id: I0b8ccba15024c55bb03927cdb50370913eb8010c Signed-off-by: Soby Mathew --- Makefile | 17 ++++++++++++----- docs/user-guide.rst | 4 ++++ make_helpers/defaults.mk | 3 +++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index be543facb..74d518043 100644 --- a/Makefile +++ b/Makefile @@ -205,11 +205,6 @@ TF_CFLAGS += $(CPPFLAGS) $(TF_CFLAGS_$(ARCH)) \ -Os -ffunction-sections -fdata-sections GCC_V_OUTPUT := $(shell $(CC) -v 2>&1) -PIE_FOUND := $(findstring --enable-default-pie,${GCC_V_OUTPUT}) - -ifneq ($(PIE_FOUND),) -TF_CFLAGS += -fno-PIE -endif # Force the compiler to include the frame pointer ifeq (${ENABLE_BACKTRACE},1) @@ -335,6 +330,16 @@ ifeq (${ARM_ARCH_MAJOR},7) include make_helpers/armv7-a-cpus.mk endif +ifeq ($(ENABLE_PIE),1) + TF_CFLAGS += -fpie + TF_LDFLAGS += -pie +else + PIE_FOUND := $(findstring --enable-default-pie,${GCC_V_OUTPUT}) + ifneq ($(PIE_FOUND),) + TF_CFLAGS += -fno-PIE + endif +endif + # Include the CPU specific operations makefile, which provides default # values for all CPU errata workarounds and CPU specific optimisations. # This can be overridden by the platform. @@ -565,6 +570,7 @@ $(eval $(call assert_boolean,ENABLE_AMU)) $(eval $(call assert_boolean,ENABLE_ASSERTIONS)) $(eval $(call assert_boolean,ENABLE_BACKTRACE)) $(eval $(call assert_boolean,ENABLE_MPAM_FOR_LOWER_ELS)) +$(eval $(call assert_boolean,ENABLE_PIE)) $(eval $(call assert_boolean,ENABLE_PMF)) $(eval $(call assert_boolean,ENABLE_PSCI_STAT)) $(eval $(call assert_boolean,ENABLE_RUNTIME_INSTRUMENTATION)) @@ -615,6 +621,7 @@ $(eval $(call add_define,ENABLE_AMU)) $(eval $(call add_define,ENABLE_ASSERTIONS)) $(eval $(call add_define,ENABLE_BACKTRACE)) $(eval $(call add_define,ENABLE_MPAM_FOR_LOWER_ELS)) +$(eval $(call add_define,ENABLE_PIE)) $(eval $(call add_define,ENABLE_PMF)) $(eval $(call add_define,ENABLE_PSCI_STAT)) $(eval $(call add_define,ENABLE_RUNTIME_INSTRUMENTATION)) diff --git a/docs/user-guide.rst b/docs/user-guide.rst index f4ef85d2a..52cb45c09 100644 --- a/docs/user-guide.rst +++ b/docs/user-guide.rst @@ -371,6 +371,10 @@ Common build options partitioning in EL3, however. Platform initialisation code should configure and use partitions in EL3 as required. This option defaults to ``0``. +- ``ENABLE_PIE``: Boolean option to enable Position Independent Executable(PIE) + support within generic code in TF-A. This option is currently only supported + in BL31. Default is 0. + - ``ENABLE_PMF``: Boolean option to enable support for optional Performance Measurement Framework(PMF). Default is 0. diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk index 435de20e3..4a3f54116 100644 --- a/make_helpers/defaults.mk +++ b/make_helpers/defaults.mk @@ -64,6 +64,9 @@ DYN_DISABLE_AUTH := 0 # Build option to enable MPAM for lower ELs ENABLE_MPAM_FOR_LOWER_ELS := 0 +# Flag to Enable Position Independant support (PIE) +ENABLE_PIE := 0 + # Flag to enable Performance Measurement Framework ENABLE_PMF := 0 From f1722b693d363cc6a2b624d59f0442bf845baf62 Mon Sep 17 00:00:00 2001 From: Soby Mathew Date: Fri, 12 Oct 2018 16:40:28 +0100 Subject: [PATCH 3/7] PIE: Use PC relative adrp/adr for symbol reference This patch fixes up the AArch64 assembly code to use adrp/adr instructions instead of ldr instruction for reference to symbols. This allows these assembly sequences to be Position Independant. Note that the the reference to sizes have been replaced with calculation of size at runtime. This is because size is a constant value and does not depend on execution address and using PC relative instructions for loading them makes them relative to execution address. Also we cannot use `ldr` instruction to load size as it generates a dynamic relocation entry which must *not* be fixed up and it is difficult for a dynamic loader to differentiate which entries need to be skipped. Change-Id: I8bf4ed5c58a9703629e5498a27624500ef40a836 Signed-off-by: Soby Mathew --- bl2/aarch64/bl2_entrypoint.S | 14 +++++++--- include/common/aarch64/asm_macros.S | 8 +++--- include/common/aarch64/el3_common_macros.S | 30 +++++++++++++++------- include/lib/pmf/pmf_asm_macros.S | 6 +++-- lib/romlib/init.s | 16 ++++++++---- lib/xlat_tables_v2/aarch64/enable_mmu.S | 3 ++- 6 files changed, 53 insertions(+), 24 deletions(-) diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S index bc8cbfd65..30a5c599a 100644 --- a/bl2/aarch64/bl2_entrypoint.S +++ b/bl2/aarch64/bl2_entrypoint.S @@ -70,13 +70,19 @@ func bl2_entrypoint * - the coherent memory section. * --------------------------------------------- */ - ldr x0, =__BSS_START__ - ldr x1, =__BSS_SIZE__ + adrp x0, __BSS_START__ + add x0, x0, :lo12:__BSS_START__ + adrp x1, __BSS_END__ + add x1, x1, :lo12:__BSS_END__ + sub x1, x1, x0 bl zeromem #if USE_COHERENT_MEM - ldr x0, =__COHERENT_RAM_START__ - ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__ + adrp x0, __COHERENT_RAM_START__ + add x0, x0, :lo12:__COHERENT_RAM_START__ + adrp x1, __COHERENT_RAM_END_UNALIGNED__ + add x1, x1, :lo12:__COHERENT_RAM_END_UNALIGNED__ + sub x1, x1, x0 bl zeromem #endif diff --git a/include/common/aarch64/asm_macros.S b/include/common/aarch64/asm_macros.S index 9621a1c02..91416e4e4 100644 --- a/include/common/aarch64/asm_macros.S +++ b/include/common/aarch64/asm_macros.S @@ -105,8 +105,9 @@ * Clobber: X30, X1, X2 */ .macro get_my_mp_stack _name, _size - bl plat_my_core_pos - ldr x2, =(\_name + \_size) + bl plat_my_core_pos + adrp x2, (\_name + \_size) + add x2, x2, :lo12:(\_name + \_size) mov x1, #\_size madd x0, x0, x1, x2 .endm @@ -117,7 +118,8 @@ * Out: X0 = physical address of stack base */ .macro get_up_stack _name, _size - ldr x0, =(\_name + \_size) + adrp x0, (\_name + \_size) + add x0, x0, :lo12:(\_name + \_size) .endm /* diff --git a/include/common/aarch64/el3_common_macros.S b/include/common/aarch64/el3_common_macros.S index 143c70c39..4902583b1 100644 --- a/include/common/aarch64/el3_common_macros.S +++ b/include/common/aarch64/el3_common_macros.S @@ -283,26 +283,38 @@ * an earlier boot loader stage. * ------------------------------------------------------------- */ - ldr x0, =__RW_START__ - ldr x1, =__RW_END__ + adrp x0, __RW_START__ + add x0, x0, :lo12:__RW_START__ + adrp x1, __RW_END__ + add x1, x1, :lo12:__RW_END__ sub x1, x1, x0 bl inv_dcache_range #endif + adrp x0, __BSS_START__ + add x0, x0, :lo12:__BSS_START__ - ldr x0, =__BSS_START__ - ldr x1, =__BSS_SIZE__ + adrp x1, __BSS_END__ + add x1, x1, :lo12:__BSS_END__ + sub x1, x1, x0 bl zeromem #if USE_COHERENT_MEM - ldr x0, =__COHERENT_RAM_START__ - ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__ + adrp x0, __COHERENT_RAM_START__ + add x0, x0, :lo12:__COHERENT_RAM_START__ + adrp x1, __COHERENT_RAM_END_UNALIGNED__ + add x1, x1, :lo12: __COHERENT_RAM_END_UNALIGNED__ + sub x1, x1, x0 bl zeromem #endif #if defined(IMAGE_BL1) || (defined(IMAGE_BL2) && BL2_IN_XIP_MEM) - ldr x0, =__DATA_RAM_START__ - ldr x1, =__DATA_ROM_START__ - ldr x2, =__DATA_SIZE__ + adrp x0, __DATA_RAM_START__ + add x0, x0, :lo12:__DATA_RAM_START__ + adrp x1, __DATA_ROM_START__ + add x1, x1, :lo12:__DATA_ROM_START__ + adrp x2, __DATA_RAM_END__ + add x2, x2, :lo12:__DATA_RAM_END__ + sub x2, x2, x0 bl memcpy16 #endif .endif /* _init_c_runtime */ diff --git a/include/lib/pmf/pmf_asm_macros.S b/include/lib/pmf/pmf_asm_macros.S index d58829eec..5e19e62f7 100644 --- a/include/lib/pmf/pmf_asm_macros.S +++ b/include/lib/pmf/pmf_asm_macros.S @@ -18,10 +18,12 @@ mov x9, x30 bl plat_my_core_pos mov x30, x9 - ldr x1, =__PERCPU_TIMESTAMP_SIZE__ + adr x2, __PMF_PERCPU_TIMESTAMP_END__ + adr x1, __PMF_TIMESTAMP_START__ + sub x1, x2, x1 mov x2, #(\_tid * PMF_TS_SIZE) madd x0, x0, x1, x2 - ldr x1, =pmf_ts_mem_\_name + adr x1, pmf_ts_mem_\_name add x0, x0, x1 .endm diff --git a/lib/romlib/init.s b/lib/romlib/init.s index 5cf2aca04..7d97e4d1d 100644 --- a/lib/romlib/init.s +++ b/lib/romlib/init.s @@ -5,7 +5,7 @@ */ .globl rom_lib_init - .extern __DATA_RAM_START__, __DATA_ROM_START__, __DATA_SIZE__ + .extern __DATA_RAM_START__, __DATA_ROM_START__, __DATA_RAM_END__ .extern memset, memcpy rom_lib_init: @@ -16,13 +16,19 @@ rom_lib_init: 1: stp x29, x30, [sp, #-16]! adrp x0, __DATA_RAM_START__ - ldr x1,= __DATA_ROM_START__ - ldr x2, =__DATA_SIZE__ + adrp x1, __DATA_ROM_START__ + add x1, x1, :lo12:__DATA_ROM_START__ + adrp x2, __DATA_RAM_END__ + add x2, x2, :lo12:__DATA_RAM_END__ + sub x2, x2, x0 bl memcpy - ldr x0, =__BSS_START__ + adrp x0,__BSS_START__ + add x0, x0, :lo12:__BSS_START__ mov x1, #0 - ldr x2, =__BSS_SIZE__ + adrp x2, __BSS_END__ + add x2, x2, :lo12:__BSS_END__ + sub x2, x2, x0 bl memset ldp x29, x30, [sp], #16 diff --git a/lib/xlat_tables_v2/aarch64/enable_mmu.S b/lib/xlat_tables_v2/aarch64/enable_mmu.S index 21717d28a..504c03c15 100644 --- a/lib/xlat_tables_v2/aarch64/enable_mmu.S +++ b/lib/xlat_tables_v2/aarch64/enable_mmu.S @@ -45,7 +45,8 @@ tlbi_invalidate_all \el mov x7, x0 - ldr x0, =mmu_cfg_params + adrp x0, mmu_cfg_params + add x0, x0, :lo12:mmu_cfg_params /* MAIR */ ldr x1, [x0, #(MMU_CFG_MAIR << 3)] From 12af5ed4fb146d575463bd304027da5a0e6b4a68 Mon Sep 17 00:00:00 2001 From: Soby Mathew Date: Mon, 17 Sep 2018 04:34:35 +0100 Subject: [PATCH 4/7] Make errata reporting mandatory for CPU files Previously the errata reporting was optional for CPU operation files and this was achieved by making use of weak reference to resolve to 0 if the symbol is not defined. This is error prone when adding new CPU operation files and weak references are problematic when fixing up dynamic relocations. Hence this patch removes the weak reference and makes it mandatory for the CPU operation files to define the errata reporting function. Change-Id: I8af192e19b85b7cd8c7579e52f8f05a4294e5396 Signed-off-by: Soby Mathew --- include/lib/cpus/aarch32/cpu_macros.S | 3 +-- include/lib/cpus/aarch64/cpu_macros.S | 3 +-- lib/cpus/aarch32/aem_generic.S | 9 +++++++++ lib/cpus/aarch32/cortex_a12.S | 9 +++++++++ lib/cpus/aarch32/cortex_a32.S | 9 +++++++++ lib/cpus/aarch32/cortex_a5.S | 9 +++++++++ lib/cpus/aarch32/cortex_a7.S | 9 +++++++++ lib/cpus/aarch64/aem_generic.S | 9 +++++++++ lib/cpus/aarch64/cortex_a35.S | 10 ++++++++++ lib/cpus/aarch64/cortex_deimos.S | 10 ++++++++++ lib/cpus/aarch64/cortex_helios.S | 10 ++++++++++ 11 files changed, 86 insertions(+), 4 deletions(-) diff --git a/include/lib/cpus/aarch32/cpu_macros.S b/include/lib/cpus/aarch32/cpu_macros.S index 525e18caf..aa728b241 100644 --- a/include/lib/cpus/aarch32/cpu_macros.S +++ b/include/lib/cpus/aarch32/cpu_macros.S @@ -161,10 +161,9 @@ .endif /* - * Weakly-bound, optional errata status printing function for CPUs of + * Mandatory errata status printing function for CPUs of * this class. */ - .weak \_name\()_errata_report .word \_name\()_errata_report #ifdef IMAGE_BL32 diff --git a/include/lib/cpus/aarch64/cpu_macros.S b/include/lib/cpus/aarch64/cpu_macros.S index 4672cbc06..14616ace4 100644 --- a/include/lib/cpus/aarch64/cpu_macros.S +++ b/include/lib/cpus/aarch64/cpu_macros.S @@ -183,10 +183,9 @@ .endif /* - * Weakly-bound, optional errata status printing function for CPUs of + * Mandatory errata status printing function for CPUs of * this class. */ - .weak \_name\()_errata_report .quad \_name\()_errata_report #ifdef IMAGE_BL31 diff --git a/lib/cpus/aarch32/aem_generic.S b/lib/cpus/aarch32/aem_generic.S index 5f3d74470..7bd586ad9 100644 --- a/lib/cpus/aarch32/aem_generic.S +++ b/lib/cpus/aarch32/aem_generic.S @@ -40,6 +40,15 @@ func aem_generic_cluster_pwr_dwn b dcsw_op_all endfunc aem_generic_cluster_pwr_dwn +#if REPORT_ERRATA +/* + * Errata printing function for AEM. Must follow AAPCS. + */ +func aem_generic_errata_report + bx lr +endfunc aem_generic_errata_report +#endif + /* cpu_ops for Base AEM FVP */ declare_cpu_ops aem_generic, BASE_AEM_MIDR, CPU_NO_RESET_FUNC, \ aem_generic_core_pwr_dwn, \ diff --git a/lib/cpus/aarch32/cortex_a12.S b/lib/cpus/aarch32/cortex_a12.S index 73c975072..5300fe009 100644 --- a/lib/cpus/aarch32/cortex_a12.S +++ b/lib/cpus/aarch32/cortex_a12.S @@ -69,6 +69,15 @@ func cortex_a12_cluster_pwr_dwn b cortex_a12_disable_smp endfunc cortex_a12_cluster_pwr_dwn +#if REPORT_ERRATA +/* + * Errata printing function for Cortex-A12. Must follow AAPCS. + */ +func cortex_a12_errata_report + bx lr +endfunc cortex_a12_errata_report +#endif + declare_cpu_ops cortex_a12, CORTEX_A12_MIDR, \ cortex_a12_reset_func, \ cortex_a12_core_pwr_dwn, \ diff --git a/lib/cpus/aarch32/cortex_a32.S b/lib/cpus/aarch32/cortex_a32.S index 2b6df272d..c26227622 100644 --- a/lib/cpus/aarch32/cortex_a32.S +++ b/lib/cpus/aarch32/cortex_a32.S @@ -117,6 +117,15 @@ func cortex_a32_cluster_pwr_dwn b cortex_a32_disable_smp endfunc cortex_a32_cluster_pwr_dwn +#if REPORT_ERRATA +/* + * Errata printing function for Cortex-A32. Must follow AAPCS. + */ +func cortex_a32_errata_report + bx lr +endfunc cortex_a32_errata_report +#endif + declare_cpu_ops cortex_a32, CORTEX_A32_MIDR, \ cortex_a32_reset_func, \ cortex_a32_core_pwr_dwn, \ diff --git a/lib/cpus/aarch32/cortex_a5.S b/lib/cpus/aarch32/cortex_a5.S index c07c13ea1..8abb66f0e 100644 --- a/lib/cpus/aarch32/cortex_a5.S +++ b/lib/cpus/aarch32/cortex_a5.S @@ -69,6 +69,15 @@ func cortex_a5_cluster_pwr_dwn b cortex_a5_disable_smp endfunc cortex_a5_cluster_pwr_dwn +#if REPORT_ERRATA +/* + * Errata printing function for Cortex-A5. Must follow AAPCS. + */ +func cortex_a5_errata_report + bx lr +endfunc cortex_a5_errata_report +#endif + declare_cpu_ops cortex_a5, CORTEX_A5_MIDR, \ cortex_a5_reset_func, \ cortex_a5_core_pwr_dwn, \ diff --git a/lib/cpus/aarch32/cortex_a7.S b/lib/cpus/aarch32/cortex_a7.S index 0278d1fda..4d4bb77bb 100644 --- a/lib/cpus/aarch32/cortex_a7.S +++ b/lib/cpus/aarch32/cortex_a7.S @@ -69,6 +69,15 @@ func cortex_a7_cluster_pwr_dwn b cortex_a7_disable_smp endfunc cortex_a7_cluster_pwr_dwn +#if REPORT_ERRATA +/* + * Errata printing function for Cortex-A7. Must follow AAPCS. + */ +func cortex_a7_errata_report + bx lr +endfunc cortex_a7_errata_report +#endif + declare_cpu_ops cortex_a7, CORTEX_A7_MIDR, \ cortex_a7_reset_func, \ cortex_a7_core_pwr_dwn, \ diff --git a/lib/cpus/aarch64/aem_generic.S b/lib/cpus/aarch64/aem_generic.S index 7592e3dc9..51b5ce91c 100644 --- a/lib/cpus/aarch64/aem_generic.S +++ b/lib/cpus/aarch64/aem_generic.S @@ -46,6 +46,15 @@ func aem_generic_cluster_pwr_dwn b dcsw_op_all endfunc aem_generic_cluster_pwr_dwn +#if REPORT_ERRATA +/* + * Errata printing function for AEM. Must follow AAPCS. + */ +func aem_generic_errata_report + ret +endfunc aem_generic_errata_report +#endif + /* --------------------------------------------- * This function provides cpu specific * register information for crash reporting. diff --git a/lib/cpus/aarch64/cortex_a35.S b/lib/cpus/aarch64/cortex_a35.S index b22189c80..2e0d63161 100644 --- a/lib/cpus/aarch64/cortex_a35.S +++ b/lib/cpus/aarch64/cortex_a35.S @@ -114,6 +114,16 @@ func cortex_a35_cluster_pwr_dwn b cortex_a35_disable_smp endfunc cortex_a35_cluster_pwr_dwn +#if REPORT_ERRATA +/* + * Errata printing function for Cortex A35. Must follow AAPCS. + */ +func cortex_a35_errata_report + ret +endfunc cortex_a35_errata_report +#endif + + /* --------------------------------------------- * This function provides cortex_a35 specific * register information for crash reporting. diff --git a/lib/cpus/aarch64/cortex_deimos.S b/lib/cpus/aarch64/cortex_deimos.S index aec62a287..cad906f65 100644 --- a/lib/cpus/aarch64/cortex_deimos.S +++ b/lib/cpus/aarch64/cortex_deimos.S @@ -27,6 +27,16 @@ func cortex_deimos_core_pwr_dwn ret endfunc cortex_deimos_core_pwr_dwn +#if REPORT_ERRATA +/* + * Errata printing function for Cortex Deimos. Must follow AAPCS. + */ +func cortex_deimos_errata_report + ret +endfunc cortex_deimos_errata_report +#endif + + /* --------------------------------------------- * This function provides Cortex-Deimos specific * register information for crash reporting. diff --git a/lib/cpus/aarch64/cortex_helios.S b/lib/cpus/aarch64/cortex_helios.S index bcda74114..4812ac47c 100644 --- a/lib/cpus/aarch64/cortex_helios.S +++ b/lib/cpus/aarch64/cortex_helios.S @@ -19,6 +19,16 @@ func cortex_helios_cpu_pwr_dwn ret endfunc cortex_helios_cpu_pwr_dwn +#if REPORT_ERRATA +/* + * Errata printing function for Cortex Helios. Must follow AAPCS. + */ +func cortex_helios_errata_report + ret +endfunc cortex_helios_errata_report +#endif + + .section .rodata.cortex_helios_regs, "aS" cortex_helios_regs: /* The ascii list of register names to be reported */ .asciz "cpuectlr_el1", "" From 931f7c615643e5d0fb2cbab68e4093c980b0e271 Mon Sep 17 00:00:00 2001 From: Soby Mathew Date: Sun, 14 Oct 2018 08:09:22 +0100 Subject: [PATCH 5/7] PIE: Position Independant Executable support for BL31 This patch introduces Position Independant Executable(PIE) support in TF-A. As a initial prototype, only BL31 can support PIE. A trivial dynamic linker is implemented which supports fixing up Global Offset Table(GOT) and Dynamic relocations(.rela.dyn). The fixup_gdt_reloc() helper function implements this linker and this needs to be called early in the boot sequence prior to invoking C functions. The GOT is placed in the RO section of BL31 binary for improved security and the BL31 linker script is modified to export the appropriate symbols required for the dynamic linker. The C compiler always generates PC relative addresses to linker symbols and hence referencing symbols exporting constants are a problem when relocating the binary. Hence the reference to the `__PERCPU_TIMESTAMP_SIZE__` symbol in PMF is removed and is now calculated at runtime based on start and end addresses. Change-Id: I1228583ff92cf432963b7cef052e95d995cca93d Signed-off-by: Soby Mathew --- bl31/aarch64/bl31_entrypoint.S | 13 ++++ bl31/bl31.ld.S | 28 ++++++-- include/common/bl_common.h | 1 + lib/aarch64/misc_helpers.S | 116 ++++++++++++++++++++++++++++++++- lib/pmf/pmf_main.c | 5 +- 5 files changed, 154 insertions(+), 9 deletions(-) diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S index 3a45e53f4..7c116a2e0 100644 --- a/bl31/aarch64/bl31_entrypoint.S +++ b/bl31/aarch64/bl31_entrypoint.S @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,18 @@ func bl31_entrypoint mov x22, 0 mov x23, 0 #endif /* RESET_TO_BL31 */ + + /* -------------------------------------------------------------------- + * If PIE is enabled, fixup the Global descriptor Table and dynamic + * relocations + * -------------------------------------------------------------------- + */ +#if ENABLE_PIE + mov_imm x0, BL31_BASE + mov_imm x1, BL31_LIMIT + bl fixup_gdt_reloc +#endif /* ENABLE_PIE */ + /* --------------------------------------------- * Perform platform specific early arch. setup * --------------------------------------------- diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S index 81e7ba3a3..43d0ed440 100644 --- a/bl31/bl31.ld.S +++ b/bl31/bl31.ld.S @@ -26,6 +26,8 @@ SECTIONS ASSERT(. == ALIGN(PAGE_SIZE), "BL31_BASE address is not aligned on a page boundary.") + __BL31_START__ = .; + #if SEPARATE_CODE_AND_RODATA .text . : { __TEXT_START__ = .; @@ -63,6 +65,16 @@ SECTIONS KEEP(*(cpu_ops)) __CPU_OPS_END__ = .; + /* + * Keep the .got section in the RO section as the it is patched + * prior to enabling the MMU and having the .got in RO is better for + * security. + */ + . = ALIGN(16); + __GOT_START__ = .; + *(.got) + __GOT_END__ = .; + /* Place pubsub sections for events */ . = ALIGN(8); #include @@ -153,6 +165,16 @@ SECTIONS __DATA_END__ = .; } >RAM + . = ALIGN(16); + /* + * .rela.dyn needs to come after .data for the read-elf utility to parse + * this section correctly. + */ + __RELA_START__ = .; + .rela.dyn . : { + } >RAM + __RELA_END__ = .; + #ifdef BL31_PROGBITS_LIMIT ASSERT(. <= BL31_PROGBITS_LIMIT, "BL31 progbits has exceeded its limit.") #endif @@ -265,11 +287,5 @@ SECTIONS __RW_END__ = .; __BL31_END__ = .; - __BSS_SIZE__ = SIZEOF(.bss); -#if USE_COHERENT_MEM - __COHERENT_RAM_UNALIGNED_SIZE__ = - __COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__; -#endif - ASSERT(. <= BL31_LIMIT, "BL31 image has exceeded its limit.") } diff --git a/include/common/bl_common.h b/include/common/bl_common.h index 2ecf281c0..6a79dc33f 100644 --- a/include/common/bl_common.h +++ b/include/common/bl_common.h @@ -83,6 +83,7 @@ IMPORT_SYM(unsigned long, __BL2_END__, BL2_END); #elif defined(IMAGE_BL2U) IMPORT_SYM(unsigned long, __BL2U_END__, BL2U_END); #elif defined(IMAGE_BL31) +IMPORT_SYM(unsigned long, __BL31_START__, BL31_START); IMPORT_SYM(unsigned long, __BL31_END__, BL31_END); #elif defined(IMAGE_BL32) IMPORT_SYM(unsigned long, __BL32_END__, BL32_END); diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S index 1a075aa5a..002942e8d 100644 --- a/lib/aarch64/misc_helpers.S +++ b/lib/aarch64/misc_helpers.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,6 +7,7 @@ #include #include #include +#include .globl get_afflvl_shift .globl mpidr_mask_lower_afflvls @@ -23,6 +24,8 @@ .globl disable_mmu_icache_el1 .globl disable_mmu_icache_el3 + .globl fixup_gdt_reloc + #if SUPPORT_VFP .globl enable_vfp #endif @@ -497,3 +500,114 @@ func enable_vfp ret endfunc enable_vfp #endif + +/* --------------------------------------------------------------------------- + * Helper to fixup Global Descriptor table (GDT) and dynamic relocations + * (.rela.dyn) at runtime. + * + * This function is meant to be used when the firmware is compiled with -fpie + * and linked with -pie options. We rely on the linker script exporting + * appropriate markers for start and end of the section. For GOT, we + * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect + * __RELA_START__ and __RELA_END__. + * + * The function takes the limits of the memory to apply fixups to as + * arguments (which is usually the limits of the relocable BL image). + * x0 - the start of the fixup region + * x1 - the limit of the fixup region + * These addresses have to be page (4KB aligned). + * --------------------------------------------------------------------------- + */ +func fixup_gdt_reloc + mov x6, x0 + mov x7, x1 + + /* Test if the limits are 4K aligned */ +#if ENABLE_ASSERTIONS + orr x0, x0, x1 + tst x0, #(PAGE_SIZE - 1) + ASM_ASSERT(eq) +#endif + /* + * Calculate the offset based on return address in x30. + * Assume that this funtion is called within a page of the start of + * of fixup region. + */ + and x2, x30, #~(PAGE_SIZE - 1) + sub x0, x2, x6 /* Diff(S) = Current Address - Compiled Address */ + + adrp x1, __GOT_START__ + add x1, x1, :lo12:__GOT_START__ + adrp x2, __GOT_END__ + add x2, x2, :lo12:__GOT_END__ + + /* + * GOT is an array of 64_bit addresses which must be fixed up as + * new_addr = old_addr + Diff(S). + * The new_addr is the address currently the binary is executing from + * and old_addr is the address at compile time. + */ +1: + ldr x3, [x1] + /* Skip adding offset if address is < lower limit */ + cmp x3, x6 + b.lo 2f + /* Skip adding offset if address is >= upper limit */ + cmp x3, x7 + b.ge 2f + add x3, x3, x0 + str x3, [x1] +2: + add x1, x1, #8 + cmp x1, x2 + b.lo 1b + + /* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */ + adrp x1, __RELA_START__ + add x1, x1, :lo12:__RELA_START__ + adrp x2, __RELA_END__ + add x2, x2, :lo12:__RELA_END__ + /* + * According to ELF-64 specification, the RELA data structure is as + * follows: + * typedef struct + * { + * Elf64_Addr r_offset; + * Elf64_Xword r_info; + * Elf64_Sxword r_addend; + * } Elf64_Rela; + * + * r_offset is address of reference + * r_info is symbol index and type of relocation (in this case + * 0x403 which corresponds to R_AARCH64_RELATIV). + * r_addend is constant part of expression. + * + * Size of Elf64_Rela structure is 24 bytes. + */ +1: + /* Assert that the relocation type is R_AARCH64_RELATIV */ +#if ENABLE_ASSERTIONS + ldr x3, [x1, #8] + cmp x3, #0x403 + ASM_ASSERT(eq) +#endif + ldr x3, [x1] /* r_offset */ + add x3, x0, x3 + ldr x4, [x1, #16] /* r_addend */ + + /* Skip adding offset if r_addend is < lower limit */ + cmp x4, x6 + b.lo 2f + /* Skip adding offset if r_addend entry is >= upper limit */ + cmp x4, x7 + b.ge 2f + + add x4, x0, x4 /* Diff(S) + r_addend */ + str x4, [x3] + +2: add x1, x1, #24 + cmp x1, x2 + b.lo 1b + + ret +endfunc fixup_gdt_reloc diff --git a/lib/pmf/pmf_main.c b/lib/pmf/pmf_main.c index a02086008..25513c191 100644 --- a/lib/pmf/pmf_main.c +++ b/lib/pmf/pmf_main.c @@ -25,9 +25,10 @@ IMPORT_SYM(uintptr_t, __PMF_SVC_DESCS_START__, PMF_SVC_DESCS_START); IMPORT_SYM(uintptr_t, __PMF_SVC_DESCS_END__, PMF_SVC_DESCS_END); -IMPORT_SYM(uintptr_t, __PERCPU_TIMESTAMP_SIZE__, PMF_PERCPU_TIMESTAMP_SIZE); +IMPORT_SYM(uintptr_t, __PMF_PERCPU_TIMESTAMP_END__, PMF_PERCPU_TIMESTAMP_END); IMPORT_SYM(intptr_t, __PMF_TIMESTAMP_START__, PMF_TIMESTAMP_ARRAY_START); -IMPORT_SYM(uintptr_t, __PMF_TIMESTAMP_END__, PMF_TIMESTAMP_ARRAY_END); + +#define PMF_PERCPU_TIMESTAMP_SIZE (PMF_PERCPU_TIMESTAMP_END - PMF_TIMESTAMP_ARRAY_START) #define PMF_SVC_DESCS_MAX 10 From bd83b39621f735b61c7a39cb919713f97ed2c22c Mon Sep 17 00:00:00 2001 From: Soby Mathew Date: Fri, 12 Oct 2018 17:08:28 +0100 Subject: [PATCH 6/7] plat/arm: Use `mov_imm` macro to load immediate values This patch makes use of mov_imm macro where possible to load immediate values within ARM platform layer. Change-Id: I02bc7fbc1fa334c9fccf76fbddf515952f9a1298 Signed-off-by: Soby Mathew --- plat/arm/board/fvp/aarch64/fvp_helpers.S | 12 ++++++------ plat/arm/css/common/aarch64/css_helpers.S | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/plat/arm/board/fvp/aarch64/fvp_helpers.S b/plat/arm/board/fvp/aarch64/fvp_helpers.S index 88fcdb1b0..abc3ceb00 100644 --- a/plat/arm/board/fvp/aarch64/fvp_helpers.S +++ b/plat/arm/board/fvp/aarch64/fvp_helpers.S @@ -19,7 +19,7 @@ .globl plat_arm_calc_core_pos .macro fvp_choose_gicmmap param1, param2, x_tmp, w_tmp, res - ldr \x_tmp, =V2M_SYSREGS_BASE + V2M_SYS_ID + mov_imm \x_tmp, V2M_SYSREGS_BASE + V2M_SYS_ID ldr \w_tmp, [\x_tmp] ubfx \w_tmp, \w_tmp, #V2M_SYS_ID_BLD_SHIFT, #V2M_SYS_ID_BLD_LENGTH cmp \w_tmp, #BLD_GIC_VE_MMAP @@ -48,7 +48,7 @@ func plat_secondary_cold_boot_setup * --------------------------------------------- */ mrs x0, mpidr_el1 - ldr x1, =PWRC_BASE + mov_imm x1, PWRC_BASE str w0, [x1, #PPOFFR_OFF] /* --------------------------------------------- @@ -72,8 +72,8 @@ func plat_secondary_cold_boot_setup b secondary_cold_boot_wait gicv2_bypass_disable: - ldr x0, =VE_GICC_BASE - ldr x1, =BASE_GICC_BASE + mov_imm x0, VE_GICC_BASE + mov_imm x1, BASE_GICC_BASE fvp_choose_gicmmap x0, x1, x2, w2, x1 mov w0, #(IRQ_BYP_DIS_GRP1 | FIQ_BYP_DIS_GRP1) orr w0, w0, #(IRQ_BYP_DIS_GRP0 | FIQ_BYP_DIS_GRP0) @@ -128,7 +128,7 @@ func plat_get_my_entrypoint * --------------------------------------------------------------------- */ mrs x2, mpidr_el1 - ldr x1, =PWRC_BASE + mov_imm x1, PWRC_BASE str w2, [x1, #PSYSR_OFF] ldr w2, [x1, #PSYSR_OFF] ubfx w2, w2, #PSYSR_WK_SHIFT, #PSYSR_WK_WIDTH @@ -171,7 +171,7 @@ endfunc plat_get_my_entrypoint */ func plat_is_my_cpu_primary mrs x0, mpidr_el1 - ldr x1, =MPIDR_AFFINITY_MASK + mov_imm x1, MPIDR_AFFINITY_MASK and x0, x0, x1 cmp x0, #FVP_PRIMARY_CPU cset w0, eq diff --git a/plat/arm/css/common/aarch64/css_helpers.S b/plat/arm/css/common/aarch64/css_helpers.S index 59d920650..5096d8d95 100644 --- a/plat/arm/css/common/aarch64/css_helpers.S +++ b/plat/arm/css/common/aarch64/css_helpers.S @@ -108,7 +108,7 @@ endfunc plat_is_my_cpu_primary func plat_is_my_cpu_primary mov x9, x30 bl plat_my_core_pos - ldr x1, =SCP_BOOT_CFG_ADDR + mov_imm x1, SCP_BOOT_CFG_ADDR ldr x1, [x1] ubfx x1, x1, #PLAT_CSS_PRIMARY_CPU_SHIFT, \ #PLAT_CSS_PRIMARY_CPU_BIT_WIDTH From fc922ca87cc6af8277dc0eb710fc63a2957f0194 Mon Sep 17 00:00:00 2001 From: Soby Mathew Date: Sun, 14 Oct 2018 08:13:44 +0100 Subject: [PATCH 7/7] FVP: Enable PIE for RESET_TO_BL31=1 This patch enabled PIE for FVP when RESET_TO_BL31=1. The references to BL31_BASE are replaced by BL31_START as being a symbol exported by the linker, will create a dynamic relocation entry in .rela.dyn and hence will be fixed up by dynamic linker at runtime. Also, we disable RECLAIM_INIT_CODE when PIE is enabled as the init section overlay creates some static relocations which cannot be handled by the dynamic linker currently. Change-Id: I86df1b0a8b2a8bbbe7c3f3c0b9a08c86c2963ec0 Signed-off-by: Soby Mathew --- plat/arm/board/fvp/platform.mk | 2 ++ plat/arm/common/arm_bl31_setup.c | 6 ++++-- plat/arm/common/arm_common.mk | 5 +++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk index 332df4d44..3d858c20e 100644 --- a/plat/arm/board/fvp/platform.mk +++ b/plat/arm/board/fvp/platform.mk @@ -202,7 +202,9 @@ ENABLE_AMU := 1 DYNAMIC_WORKAROUND_CVE_2018_3639 := 1 # Enable reclaiming of BL31 initialisation code for secondary cores stacks for FVP +ifneq (${RESET_TO_BL31},1) RECLAIM_INIT_CODE := 1 +endif ifeq (${ENABLE_AMU},1) BL31_SOURCES += lib/cpus/aarch64/cortex_a75_pubsub.c \ diff --git a/plat/arm/common/arm_bl31_setup.c b/plat/arm/common/arm_bl31_setup.c index 24d557095..1b05f46e9 100644 --- a/plat/arm/common/arm_bl31_setup.c +++ b/plat/arm/common/arm_bl31_setup.c @@ -25,11 +25,13 @@ static entry_point_info_t bl32_image_ep_info; static entry_point_info_t bl33_image_ep_info; +#if !RESET_TO_BL31 /* * Check that BL31_BASE is above ARM_TB_FW_CONFIG_LIMIT. The reserved page * is required for SOC_FW_CONFIG/TOS_FW_CONFIG passed from BL2. */ CASSERT(BL31_BASE >= ARM_TB_FW_CONFIG_LIMIT, assert_bl31_base_overflows); +#endif /* Weak definitions may be overridden in specific ARM standard platform */ #pragma weak bl31_early_platform_setup2 @@ -38,8 +40,8 @@ CASSERT(BL31_BASE >= ARM_TB_FW_CONFIG_LIMIT, assert_bl31_base_overflows); #pragma weak bl31_plat_get_next_image_ep_info #define MAP_BL31_TOTAL MAP_REGION_FLAT( \ - BL31_BASE, \ - BL31_END - BL31_BASE, \ + BL31_START, \ + BL31_END - BL31_START, \ MT_MEMORY | MT_RW | MT_SECURE) #if RECLAIM_INIT_CODE IMPORT_SYM(unsigned long, __INIT_CODE_START__, BL_INIT_CODE_BASE); diff --git a/plat/arm/common/arm_common.mk b/plat/arm/common/arm_common.mk index 3fb1eff2d..23777fb7b 100644 --- a/plat/arm/common/arm_common.mk +++ b/plat/arm/common/arm_common.mk @@ -130,6 +130,11 @@ ARM_CRYPTOCELL_INTEG := 0 $(eval $(call assert_boolean,ARM_CRYPTOCELL_INTEG)) $(eval $(call add_define,ARM_CRYPTOCELL_INTEG)) +# Enable PIE support for RESET_TO_BL31 case +ifeq (${RESET_TO_BL31},1) + ENABLE_PIE := 1 +endif + # CryptoCell integration relies on coherent buffers for passing data from # the AP CPU to the CryptoCell ifeq (${ARM_CRYPTOCELL_INTEG},1)