From 931f7c615643e5d0fb2cbab68e4093c980b0e271 Mon Sep 17 00:00:00 2001 From: Soby Mathew Date: Sun, 14 Oct 2018 08:09:22 +0100 Subject: [PATCH] PIE: Position Independant Executable support for BL31 This patch introduces Position Independant Executable(PIE) support in TF-A. As a initial prototype, only BL31 can support PIE. A trivial dynamic linker is implemented which supports fixing up Global Offset Table(GOT) and Dynamic relocations(.rela.dyn). The fixup_gdt_reloc() helper function implements this linker and this needs to be called early in the boot sequence prior to invoking C functions. The GOT is placed in the RO section of BL31 binary for improved security and the BL31 linker script is modified to export the appropriate symbols required for the dynamic linker. The C compiler always generates PC relative addresses to linker symbols and hence referencing symbols exporting constants are a problem when relocating the binary. Hence the reference to the `__PERCPU_TIMESTAMP_SIZE__` symbol in PMF is removed and is now calculated at runtime based on start and end addresses. Change-Id: I1228583ff92cf432963b7cef052e95d995cca93d Signed-off-by: Soby Mathew --- bl31/aarch64/bl31_entrypoint.S | 13 ++++ bl31/bl31.ld.S | 28 ++++++-- include/common/bl_common.h | 1 + lib/aarch64/misc_helpers.S | 116 ++++++++++++++++++++++++++++++++- lib/pmf/pmf_main.c | 5 +- 5 files changed, 154 insertions(+), 9 deletions(-) diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S index 3a45e53f4..7c116a2e0 100644 --- a/bl31/aarch64/bl31_entrypoint.S +++ b/bl31/aarch64/bl31_entrypoint.S @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,18 @@ func bl31_entrypoint mov x22, 0 mov x23, 0 #endif /* RESET_TO_BL31 */ + + /* -------------------------------------------------------------------- + * If PIE is enabled, fixup the Global descriptor Table and dynamic + * relocations + * -------------------------------------------------------------------- + */ +#if ENABLE_PIE + mov_imm x0, BL31_BASE + mov_imm x1, BL31_LIMIT + bl fixup_gdt_reloc +#endif /* ENABLE_PIE */ + /* --------------------------------------------- * Perform platform specific early arch. setup * --------------------------------------------- diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S index 81e7ba3a3..43d0ed440 100644 --- a/bl31/bl31.ld.S +++ b/bl31/bl31.ld.S @@ -26,6 +26,8 @@ SECTIONS ASSERT(. == ALIGN(PAGE_SIZE), "BL31_BASE address is not aligned on a page boundary.") + __BL31_START__ = .; + #if SEPARATE_CODE_AND_RODATA .text . : { __TEXT_START__ = .; @@ -63,6 +65,16 @@ SECTIONS KEEP(*(cpu_ops)) __CPU_OPS_END__ = .; + /* + * Keep the .got section in the RO section as the it is patched + * prior to enabling the MMU and having the .got in RO is better for + * security. + */ + . = ALIGN(16); + __GOT_START__ = .; + *(.got) + __GOT_END__ = .; + /* Place pubsub sections for events */ . = ALIGN(8); #include @@ -153,6 +165,16 @@ SECTIONS __DATA_END__ = .; } >RAM + . = ALIGN(16); + /* + * .rela.dyn needs to come after .data for the read-elf utility to parse + * this section correctly. + */ + __RELA_START__ = .; + .rela.dyn . : { + } >RAM + __RELA_END__ = .; + #ifdef BL31_PROGBITS_LIMIT ASSERT(. <= BL31_PROGBITS_LIMIT, "BL31 progbits has exceeded its limit.") #endif @@ -265,11 +287,5 @@ SECTIONS __RW_END__ = .; __BL31_END__ = .; - __BSS_SIZE__ = SIZEOF(.bss); -#if USE_COHERENT_MEM - __COHERENT_RAM_UNALIGNED_SIZE__ = - __COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__; -#endif - ASSERT(. <= BL31_LIMIT, "BL31 image has exceeded its limit.") } diff --git a/include/common/bl_common.h b/include/common/bl_common.h index 2ecf281c0..6a79dc33f 100644 --- a/include/common/bl_common.h +++ b/include/common/bl_common.h @@ -83,6 +83,7 @@ IMPORT_SYM(unsigned long, __BL2_END__, BL2_END); #elif defined(IMAGE_BL2U) IMPORT_SYM(unsigned long, __BL2U_END__, BL2U_END); #elif defined(IMAGE_BL31) +IMPORT_SYM(unsigned long, __BL31_START__, BL31_START); IMPORT_SYM(unsigned long, __BL31_END__, BL31_END); #elif defined(IMAGE_BL32) IMPORT_SYM(unsigned long, __BL32_END__, BL32_END); diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S index 1a075aa5a..002942e8d 100644 --- a/lib/aarch64/misc_helpers.S +++ b/lib/aarch64/misc_helpers.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -7,6 +7,7 @@ #include #include #include +#include .globl get_afflvl_shift .globl mpidr_mask_lower_afflvls @@ -23,6 +24,8 @@ .globl disable_mmu_icache_el1 .globl disable_mmu_icache_el3 + .globl fixup_gdt_reloc + #if SUPPORT_VFP .globl enable_vfp #endif @@ -497,3 +500,114 @@ func enable_vfp ret endfunc enable_vfp #endif + +/* --------------------------------------------------------------------------- + * Helper to fixup Global Descriptor table (GDT) and dynamic relocations + * (.rela.dyn) at runtime. + * + * This function is meant to be used when the firmware is compiled with -fpie + * and linked with -pie options. We rely on the linker script exporting + * appropriate markers for start and end of the section. For GOT, we + * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect + * __RELA_START__ and __RELA_END__. + * + * The function takes the limits of the memory to apply fixups to as + * arguments (which is usually the limits of the relocable BL image). + * x0 - the start of the fixup region + * x1 - the limit of the fixup region + * These addresses have to be page (4KB aligned). + * --------------------------------------------------------------------------- + */ +func fixup_gdt_reloc + mov x6, x0 + mov x7, x1 + + /* Test if the limits are 4K aligned */ +#if ENABLE_ASSERTIONS + orr x0, x0, x1 + tst x0, #(PAGE_SIZE - 1) + ASM_ASSERT(eq) +#endif + /* + * Calculate the offset based on return address in x30. + * Assume that this funtion is called within a page of the start of + * of fixup region. + */ + and x2, x30, #~(PAGE_SIZE - 1) + sub x0, x2, x6 /* Diff(S) = Current Address - Compiled Address */ + + adrp x1, __GOT_START__ + add x1, x1, :lo12:__GOT_START__ + adrp x2, __GOT_END__ + add x2, x2, :lo12:__GOT_END__ + + /* + * GOT is an array of 64_bit addresses which must be fixed up as + * new_addr = old_addr + Diff(S). + * The new_addr is the address currently the binary is executing from + * and old_addr is the address at compile time. + */ +1: + ldr x3, [x1] + /* Skip adding offset if address is < lower limit */ + cmp x3, x6 + b.lo 2f + /* Skip adding offset if address is >= upper limit */ + cmp x3, x7 + b.ge 2f + add x3, x3, x0 + str x3, [x1] +2: + add x1, x1, #8 + cmp x1, x2 + b.lo 1b + + /* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */ + adrp x1, __RELA_START__ + add x1, x1, :lo12:__RELA_START__ + adrp x2, __RELA_END__ + add x2, x2, :lo12:__RELA_END__ + /* + * According to ELF-64 specification, the RELA data structure is as + * follows: + * typedef struct + * { + * Elf64_Addr r_offset; + * Elf64_Xword r_info; + * Elf64_Sxword r_addend; + * } Elf64_Rela; + * + * r_offset is address of reference + * r_info is symbol index and type of relocation (in this case + * 0x403 which corresponds to R_AARCH64_RELATIV). + * r_addend is constant part of expression. + * + * Size of Elf64_Rela structure is 24 bytes. + */ +1: + /* Assert that the relocation type is R_AARCH64_RELATIV */ +#if ENABLE_ASSERTIONS + ldr x3, [x1, #8] + cmp x3, #0x403 + ASM_ASSERT(eq) +#endif + ldr x3, [x1] /* r_offset */ + add x3, x0, x3 + ldr x4, [x1, #16] /* r_addend */ + + /* Skip adding offset if r_addend is < lower limit */ + cmp x4, x6 + b.lo 2f + /* Skip adding offset if r_addend entry is >= upper limit */ + cmp x4, x7 + b.ge 2f + + add x4, x0, x4 /* Diff(S) + r_addend */ + str x4, [x3] + +2: add x1, x1, #24 + cmp x1, x2 + b.lo 1b + + ret +endfunc fixup_gdt_reloc diff --git a/lib/pmf/pmf_main.c b/lib/pmf/pmf_main.c index a02086008..25513c191 100644 --- a/lib/pmf/pmf_main.c +++ b/lib/pmf/pmf_main.c @@ -25,9 +25,10 @@ IMPORT_SYM(uintptr_t, __PMF_SVC_DESCS_START__, PMF_SVC_DESCS_START); IMPORT_SYM(uintptr_t, __PMF_SVC_DESCS_END__, PMF_SVC_DESCS_END); -IMPORT_SYM(uintptr_t, __PERCPU_TIMESTAMP_SIZE__, PMF_PERCPU_TIMESTAMP_SIZE); +IMPORT_SYM(uintptr_t, __PMF_PERCPU_TIMESTAMP_END__, PMF_PERCPU_TIMESTAMP_END); IMPORT_SYM(intptr_t, __PMF_TIMESTAMP_START__, PMF_TIMESTAMP_ARRAY_START); -IMPORT_SYM(uintptr_t, __PMF_TIMESTAMP_END__, PMF_TIMESTAMP_ARRAY_END); + +#define PMF_PERCPU_TIMESTAMP_SIZE (PMF_PERCPU_TIMESTAMP_END - PMF_TIMESTAMP_ARRAY_START) #define PMF_SVC_DESCS_MAX 10