arm-trusted-firmware/lib/aarch32/misc_helpers.S

316 lines
7.5 KiB
ArmAsm
Raw Normal View History

/*
* Copyright (c) 2016-2021, ARM Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <arch.h>
#include <asm_macros.S>
#include <assert_macros.S>
#include <common/bl_common.h>
#include <lib/xlat_tables/xlat_tables_defs.h>
.globl smc
.globl zeromem
Introduce unified API to zero memory Introduce zeromem_dczva function on AArch64 that can handle unaligned addresses and make use of DC ZVA instruction to zero a whole block at a time. This zeroing takes place directly in the cache to speed it up without doing external memory access. Remove the zeromem16 function on AArch64 and replace it with an alias to zeromem. This zeromem16 function is now deprecated. Remove the 16-bytes alignment constraint on __BSS_START__ in firmware-design.md as it is now not mandatory anymore (it used to comply with zeromem16 requirements). Change the 16-bytes alignment constraints in SP min's linker script to a 8-bytes alignment constraint as the AArch32 zeromem implementation is now more efficient on 8-bytes aligned addresses. Introduce zero_normalmem and zeromem helpers in platform agnostic header that are implemented this way: * AArch32: * zero_normalmem: zero using usual data access * zeromem: alias for zero_normalmem * AArch64: * zero_normalmem: zero normal memory using DC ZVA instruction (needs MMU enabled) * zeromem: zero using usual data access Usage guidelines: in most cases, zero_normalmem should be preferred. There are 2 scenarios where zeromem (or memset) must be used instead: * Code that must run with MMU disabled (which means all memory is considered device memory for data accesses). * Code that fills device memory with null bytes. Optionally, the following rule can be applied if performance is important: * Code zeroing small areas (few bytes) that are not secrets should use memset to take advantage of compiler optimizations. Note: Code zeroing security-related critical information should use zero_normalmem/zeromem instead of memset to avoid removal by compilers' optimizations in some cases or misbehaving versions of GCC. Fixes ARM-software/tf-issues#408 Change-Id: Iafd9663fc1070413c3e1904e54091cf60effaa82 Signed-off-by: Douglas Raillard <douglas.raillard@arm.com>
2016-12-02 13:51:54 +00:00
.globl zero_normalmem
.globl memcpy4
.globl disable_mmu_icache_secure
.globl disable_mmu_secure
.globl fixup_gdt_reloc
#define PAGE_START_MASK ~(PAGE_SIZE_MASK)
func smc
/*
* For AArch32 only r0-r3 will be in the registers;
* rest r4-r6 will be pushed on to the stack. So here, we'll
* have to load them from the stack to registers r4-r6 explicitly.
* Clobbers: r4-r6
*/
ldm sp, {r4, r5, r6}
smc #0
endfunc smc
/* -----------------------------------------------------------------------
Introduce unified API to zero memory Introduce zeromem_dczva function on AArch64 that can handle unaligned addresses and make use of DC ZVA instruction to zero a whole block at a time. This zeroing takes place directly in the cache to speed it up without doing external memory access. Remove the zeromem16 function on AArch64 and replace it with an alias to zeromem. This zeromem16 function is now deprecated. Remove the 16-bytes alignment constraint on __BSS_START__ in firmware-design.md as it is now not mandatory anymore (it used to comply with zeromem16 requirements). Change the 16-bytes alignment constraints in SP min's linker script to a 8-bytes alignment constraint as the AArch32 zeromem implementation is now more efficient on 8-bytes aligned addresses. Introduce zero_normalmem and zeromem helpers in platform agnostic header that are implemented this way: * AArch32: * zero_normalmem: zero using usual data access * zeromem: alias for zero_normalmem * AArch64: * zero_normalmem: zero normal memory using DC ZVA instruction (needs MMU enabled) * zeromem: zero using usual data access Usage guidelines: in most cases, zero_normalmem should be preferred. There are 2 scenarios where zeromem (or memset) must be used instead: * Code that must run with MMU disabled (which means all memory is considered device memory for data accesses). * Code that fills device memory with null bytes. Optionally, the following rule can be applied if performance is important: * Code zeroing small areas (few bytes) that are not secrets should use memset to take advantage of compiler optimizations. Note: Code zeroing security-related critical information should use zero_normalmem/zeromem instead of memset to avoid removal by compilers' optimizations in some cases or misbehaving versions of GCC. Fixes ARM-software/tf-issues#408 Change-Id: Iafd9663fc1070413c3e1904e54091cf60effaa82 Signed-off-by: Douglas Raillard <douglas.raillard@arm.com>
2016-12-02 13:51:54 +00:00
* void zeromem(void *mem, unsigned int length)
*
* Initialise a region in normal memory to 0. This functions complies with the
* AAPCS and can be called from C code.
*
* -----------------------------------------------------------------------
*/
func zeromem
Introduce unified API to zero memory Introduce zeromem_dczva function on AArch64 that can handle unaligned addresses and make use of DC ZVA instruction to zero a whole block at a time. This zeroing takes place directly in the cache to speed it up without doing external memory access. Remove the zeromem16 function on AArch64 and replace it with an alias to zeromem. This zeromem16 function is now deprecated. Remove the 16-bytes alignment constraint on __BSS_START__ in firmware-design.md as it is now not mandatory anymore (it used to comply with zeromem16 requirements). Change the 16-bytes alignment constraints in SP min's linker script to a 8-bytes alignment constraint as the AArch32 zeromem implementation is now more efficient on 8-bytes aligned addresses. Introduce zero_normalmem and zeromem helpers in platform agnostic header that are implemented this way: * AArch32: * zero_normalmem: zero using usual data access * zeromem: alias for zero_normalmem * AArch64: * zero_normalmem: zero normal memory using DC ZVA instruction (needs MMU enabled) * zeromem: zero using usual data access Usage guidelines: in most cases, zero_normalmem should be preferred. There are 2 scenarios where zeromem (or memset) must be used instead: * Code that must run with MMU disabled (which means all memory is considered device memory for data accesses). * Code that fills device memory with null bytes. Optionally, the following rule can be applied if performance is important: * Code zeroing small areas (few bytes) that are not secrets should use memset to take advantage of compiler optimizations. Note: Code zeroing security-related critical information should use zero_normalmem/zeromem instead of memset to avoid removal by compilers' optimizations in some cases or misbehaving versions of GCC. Fixes ARM-software/tf-issues#408 Change-Id: Iafd9663fc1070413c3e1904e54091cf60effaa82 Signed-off-by: Douglas Raillard <douglas.raillard@arm.com>
2016-12-02 13:51:54 +00:00
/*
* Readable names for registers
*
* Registers r0, r1 and r2 are also set by zeromem which
* branches into the fallback path directly, so cursor, length and
* stop_address should not be retargeted to other registers.
*/
cursor .req r0 /* Start address and then current address */
length .req r1 /* Length in bytes of the region to zero out */
/*
* Reusing the r1 register as length is only used at the beginning of
* the function.
*/
stop_address .req r1 /* Address past the last zeroed byte */
zeroreg1 .req r2 /* Source register filled with 0 */
zeroreg2 .req r3 /* Source register filled with 0 */
tmp .req r12 /* Temporary scratch register */
mov zeroreg1, #0
/* stop_address is the address past the last to zero */
add stop_address, cursor, length
/*
* Length cannot be used anymore as it shares the same register with
* stop_address.
*/
.unreq length
/*
* If the start address is already aligned to 8 bytes, skip this loop.
*/
tst cursor, #(8-1)
beq .Lzeromem_8bytes_aligned
/* Calculate the next address aligned to 8 bytes */
orr tmp, cursor, #(8-1)
adds tmp, tmp, #1
/* If it overflows, fallback to byte per byte zeroing */
beq .Lzeromem_1byte_aligned
/* If the next aligned address is after the stop address, fall back */
cmp tmp, stop_address
bhs .Lzeromem_1byte_aligned
/* zero byte per byte */
1:
strb zeroreg1, [cursor], #1
cmp cursor, tmp
bne 1b
/* zero 8 bytes at a time */
.Lzeromem_8bytes_aligned:
/* Calculate the last 8 bytes aligned address. */
bic tmp, stop_address, #(8-1)
cmp cursor, tmp
bhs 2f
mov zeroreg2, #0
1:
stmia cursor!, {zeroreg1, zeroreg2}
cmp cursor, tmp
blo 1b
2:
/* zero byte per byte */
.Lzeromem_1byte_aligned:
cmp cursor, stop_address
beq 2f
1:
strb zeroreg1, [cursor], #1
cmp cursor, stop_address
bne 1b
2:
bx lr
Introduce unified API to zero memory Introduce zeromem_dczva function on AArch64 that can handle unaligned addresses and make use of DC ZVA instruction to zero a whole block at a time. This zeroing takes place directly in the cache to speed it up without doing external memory access. Remove the zeromem16 function on AArch64 and replace it with an alias to zeromem. This zeromem16 function is now deprecated. Remove the 16-bytes alignment constraint on __BSS_START__ in firmware-design.md as it is now not mandatory anymore (it used to comply with zeromem16 requirements). Change the 16-bytes alignment constraints in SP min's linker script to a 8-bytes alignment constraint as the AArch32 zeromem implementation is now more efficient on 8-bytes aligned addresses. Introduce zero_normalmem and zeromem helpers in platform agnostic header that are implemented this way: * AArch32: * zero_normalmem: zero using usual data access * zeromem: alias for zero_normalmem * AArch64: * zero_normalmem: zero normal memory using DC ZVA instruction (needs MMU enabled) * zeromem: zero using usual data access Usage guidelines: in most cases, zero_normalmem should be preferred. There are 2 scenarios where zeromem (or memset) must be used instead: * Code that must run with MMU disabled (which means all memory is considered device memory for data accesses). * Code that fills device memory with null bytes. Optionally, the following rule can be applied if performance is important: * Code zeroing small areas (few bytes) that are not secrets should use memset to take advantage of compiler optimizations. Note: Code zeroing security-related critical information should use zero_normalmem/zeromem instead of memset to avoid removal by compilers' optimizations in some cases or misbehaving versions of GCC. Fixes ARM-software/tf-issues#408 Change-Id: Iafd9663fc1070413c3e1904e54091cf60effaa82 Signed-off-by: Douglas Raillard <douglas.raillard@arm.com>
2016-12-02 13:51:54 +00:00
.unreq cursor
/*
* length is already unreq'ed to reuse the register for another
* variable.
*/
.unreq stop_address
.unreq zeroreg1
.unreq zeroreg2
.unreq tmp
endfunc zeromem
Introduce unified API to zero memory Introduce zeromem_dczva function on AArch64 that can handle unaligned addresses and make use of DC ZVA instruction to zero a whole block at a time. This zeroing takes place directly in the cache to speed it up without doing external memory access. Remove the zeromem16 function on AArch64 and replace it with an alias to zeromem. This zeromem16 function is now deprecated. Remove the 16-bytes alignment constraint on __BSS_START__ in firmware-design.md as it is now not mandatory anymore (it used to comply with zeromem16 requirements). Change the 16-bytes alignment constraints in SP min's linker script to a 8-bytes alignment constraint as the AArch32 zeromem implementation is now more efficient on 8-bytes aligned addresses. Introduce zero_normalmem and zeromem helpers in platform agnostic header that are implemented this way: * AArch32: * zero_normalmem: zero using usual data access * zeromem: alias for zero_normalmem * AArch64: * zero_normalmem: zero normal memory using DC ZVA instruction (needs MMU enabled) * zeromem: zero using usual data access Usage guidelines: in most cases, zero_normalmem should be preferred. There are 2 scenarios where zeromem (or memset) must be used instead: * Code that must run with MMU disabled (which means all memory is considered device memory for data accesses). * Code that fills device memory with null bytes. Optionally, the following rule can be applied if performance is important: * Code zeroing small areas (few bytes) that are not secrets should use memset to take advantage of compiler optimizations. Note: Code zeroing security-related critical information should use zero_normalmem/zeromem instead of memset to avoid removal by compilers' optimizations in some cases or misbehaving versions of GCC. Fixes ARM-software/tf-issues#408 Change-Id: Iafd9663fc1070413c3e1904e54091cf60effaa82 Signed-off-by: Douglas Raillard <douglas.raillard@arm.com>
2016-12-02 13:51:54 +00:00
/*
* AArch32 does not have special ways of zeroing normal memory as AArch64 does
* using the DC ZVA instruction, so we just alias zero_normalmem to zeromem.
*/
.equ zero_normalmem, zeromem
/* --------------------------------------------------------------------------
* void memcpy4(void *dest, const void *src, unsigned int length)
*
* Copy length bytes from memory area src to memory area dest.
* The memory areas should not overlap.
* Destination and source addresses must be 4-byte aligned.
* --------------------------------------------------------------------------
*/
func memcpy4
#if ENABLE_ASSERTIONS
orr r3, r0, r1
tst r3, #0x3
ASM_ASSERT(eq)
#endif
/* copy 4 bytes at a time */
m_loop4:
cmp r2, #4
blo m_loop1
ldr r3, [r1], #4
str r3, [r0], #4
subs r2, r2, #4
bne m_loop4
bx lr
/* copy byte per byte */
m_loop1:
ldrb r3, [r1], #1
strb r3, [r0], #1
subs r2, r2, #1
bne m_loop1
bx lr
endfunc memcpy4
/* ---------------------------------------------------------------------------
* Disable the MMU in Secure State
* ---------------------------------------------------------------------------
*/
func disable_mmu_secure
mov r1, #(SCTLR_M_BIT | SCTLR_C_BIT)
do_disable_mmu:
#if ERRATA_A9_794073
stcopr r0, BPIALL
dsb
#endif
ldcopr r0, SCTLR
bic r0, r0, r1
stcopr r0, SCTLR
isb // ensure MMU is off
dsb sy
bx lr
endfunc disable_mmu_secure
func disable_mmu_icache_secure
ldr r1, =(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
b do_disable_mmu
endfunc disable_mmu_icache_secure
/* ---------------------------------------------------------------------------
* Helper to fixup Global Descriptor table (GDT) and dynamic relocations
* (.rel.dyn) at runtime.
*
* This function is meant to be used when the firmware is compiled with -fpie
* and linked with -pie options. We rely on the linker script exporting
* appropriate markers for start and end of the section. For GOT, we
* expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect
* __RELA_START__ and __RELA_END__.
*
* The function takes the limits of the memory to apply fixups to as
* arguments (which is usually the limits of the relocable BL image).
* r0 - the start of the fixup region
* r1 - the limit of the fixup region
* These addresses have to be 4KB page aligned.
* ---------------------------------------------------------------------------
*/
/* Relocation codes */
#define R_ARM_RELATIVE 23
func fixup_gdt_reloc
mov r6, r0
mov r7, r1
#if ENABLE_ASSERTIONS
/* Test if the limits are 4K aligned */
orr r0, r0, r1
mov r1, #(PAGE_SIZE_MASK)
tst r0, r1
ASM_ASSERT(eq)
#endif
/*
* Calculate the offset based on return address in lr.
* Assume that this function is called within a page at the start of
* fixup region.
*/
ldr r1, =PAGE_START_MASK
and r2, lr, r1
subs r0, r2, r6 /* Diff(S) = Current Address - Compiled Address */
beq 3f /* Diff(S) = 0. No relocation needed */
ldr r1, =__GOT_START__
add r1, r1, r0
ldr r2, =__GOT_END__
add r2, r2, r0
/*
* GOT is an array of 32_bit addresses which must be fixed up as
* new_addr = old_addr + Diff(S).
* The new_addr is the address currently the binary is executing from
* and old_addr is the address at compile time.
*/
1: ldr r3, [r1]
/* Skip adding offset if address is < lower limit */
cmp r3, r6
blo 2f
/* Skip adding offset if address is > upper limit */
cmp r3, r7
bhi 2f
add r3, r3, r0
str r3, [r1]
2: add r1, r1, #4
cmp r1, r2
blo 1b
/* Starting dynamic relocations. Use ldr to get RELA_START and END */
3: ldr r1, =__RELA_START__
add r1, r1, r0
ldr r2, =__RELA_END__
add r2, r2, r0
/*
* According to ELF-32 specification, the RELA data structure is as
* follows:
* typedef struct {
* Elf32_Addr r_offset;
* Elf32_Xword r_info;
* } Elf32_Rela;
*
* r_offset is address of reference
* r_info is symbol index and type of relocation (in this case
* code 23 which corresponds to R_ARM_RELATIVE).
*
* Size of Elf32_Rela structure is 8 bytes.
*/
/* Skip R_ARM_NONE entry with code 0 */
1: ldr r3, [r1, #4]
ands r3, r3, #0xff
beq 2f
#if ENABLE_ASSERTIONS
/* Assert that the relocation type is R_ARM_RELATIVE */
cmp r3, #R_ARM_RELATIVE
ASM_ASSERT(eq)
#endif
ldr r3, [r1] /* r_offset */
add r3, r0, r3 /* Diff(S) + r_offset */
ldr r4, [r3]
/* Skip adding offset if address is < lower limit */
cmp r4, r6
blo 2f
/* Skip adding offset if address is >= upper limit */
cmp r4, r7
bhs 2f
add r4, r0, r4
str r4, [r3]
2: add r1, r1, #8
cmp r1, r2
blo 1b
bx lr
endfunc fixup_gdt_reloc