Optimize/cleanup BPIALL workaround

In the initial implementation of this workaround we used a dedicated
workaround context to save/restore state.  This patch reduces the
footprint as no additional context is needed.

Additionally, this patch reduces the memory loads and stores by 20%,
reduces the instruction count and exploits static branch prediction to
optimize the SMC path.

Change-Id: Ia9f6bf06fbf8a9037cfe7f1f1fb32e8aec38ec7d
Signed-off-by: Dimitris Papastamos <dimitris.papastamos@arm.com>
This commit is contained in:
Dimitris Papastamos 2018-01-11 15:29:36 +00:00
parent 6eabbb07d7
commit d9bd656cf5
2 changed files with 90 additions and 148 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
* Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@ -46,26 +46,12 @@
#define CTX_GPREG_SP_EL0 U(0xf8)
#define CTX_GPREGS_END U(0x100)
#if WORKAROUND_CVE_2017_5715
#define CTX_CVE_2017_5715_OFFSET (CTX_GPREGS_OFFSET + CTX_GPREGS_END)
#define CTX_CVE_2017_5715_QUAD0 U(0x0)
#define CTX_CVE_2017_5715_QUAD1 U(0x8)
#define CTX_CVE_2017_5715_QUAD2 U(0x10)
#define CTX_CVE_2017_5715_QUAD3 U(0x18)
#define CTX_CVE_2017_5715_QUAD4 U(0x20)
#define CTX_CVE_2017_5715_QUAD5 U(0x28)
#define CTX_CVE_2017_5715_END U(0x30)
#else
#define CTX_CVE_2017_5715_OFFSET CTX_GPREGS_OFFSET
#define CTX_CVE_2017_5715_END CTX_GPREGS_END
#endif
/*******************************************************************************
* Constants that allow assembler code to access members of and the 'el3_state'
* structure at their correct offsets. Note that some of the registers are only
* 32-bits wide but are stored as 64-bit values for convenience
******************************************************************************/
#define CTX_EL3STATE_OFFSET (CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_END)
#define CTX_EL3STATE_OFFSET (CTX_GPREGS_OFFSET + CTX_GPREGS_END)
#define CTX_SCR_EL3 U(0x0)
#define CTX_RUNTIME_SP U(0x8)
#define CTX_SPSR_EL3 U(0x10)
@ -200,9 +186,6 @@
/* Constants to determine the size of individual context structures */
#define CTX_GPREG_ALL (CTX_GPREGS_END >> DWORD_SHIFT)
#if WORKAROUND_CVE_2017_5715
#define CTX_CVE_2017_5715_ALL (CTX_CVE_2017_5715_END >> DWORD_SHIFT)
#endif
#define CTX_SYSREG_ALL (CTX_SYSREGS_END >> DWORD_SHIFT)
#if CTX_INCLUDE_FPREGS
#define CTX_FPREG_ALL (CTX_FPREGS_END >> DWORD_SHIFT)
@ -218,10 +201,6 @@
*/
DEFINE_REG_STRUCT(gp_regs, CTX_GPREG_ALL);
#if WORKAROUND_CVE_2017_5715
DEFINE_REG_STRUCT(cve_2017_5715_regs, CTX_CVE_2017_5715_ALL);
#endif
/*
* AArch64 EL1 system register context structure for preserving the
* architectural state during switches from one security state to
@ -263,9 +242,6 @@ DEFINE_REG_STRUCT(el3_state, CTX_EL3STATE_ALL);
*/
typedef struct cpu_context {
gp_regs_t gpregs_ctx;
#if WORKAROUND_CVE_2017_5715
cve_2017_5715_regs_t cve_2017_5715_regs_ctx;
#endif
el3_state_t el3state_ctx;
el1_sys_regs_t sysregs_ctx;
#if CTX_INCLUDE_FPREGS

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
* Copyright (c) 2017-2018, ARM Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@ -11,10 +11,15 @@
.globl workaround_bpiall_vbar0_runtime_exceptions
#define EMIT_BPIALL 0xee070fd5
#define EMIT_MOV_R0_IMM(v) 0xe3a0000##v
#define EMIT_SMC 0xe1600070
.macro enter_workaround _stub_name
.macro enter_workaround _from_vector
/*
* Save register state to enable a call to AArch32 S-EL1 and return
* Identify the original calling vector in w2 (==_from_vector)
* Use w3-w6 for additional register state preservation while in S-EL1
*/
/* Save GP regs */
stp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
stp x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
@ -32,47 +37,50 @@
stp x26, x27, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X26]
stp x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
adr x4, \_stub_name
/* Identify the original exception vector */
mov w2, \_from_vector
/* Preserve 32-bit system registers in GP registers through the workaround */
mrs x3, esr_el3
mrs x4, spsr_el3
mrs x5, scr_el3
mrs x6, sctlr_el1
/*
* Load SPSR_EL3 and VBAR_EL3. SPSR_EL3 is set up to have
* all interrupts masked in preparation to running the workaround
* stub in S-EL1. VBAR_EL3 points to the vector table that
* will handle the SMC back from the workaround stub.
* Preserve LR and ELR_EL3 registers in the GP regs context.
* Temporarily use the CTX_GPREG_SP_EL0 slot to preserve ELR_EL3
* through the workaround. This is OK because at this point the
* current state for this context's SP_EL0 is in the live system
* register, which is unmodified by the workaround.
*/
ldp x0, x1, [x4, #0]
mrs x7, elr_el3
stp x30, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
/*
* Load SCTLR_EL1 and ELR_EL3. SCTLR_EL1 is configured to disable
* the MMU in S-EL1. ELR_EL3 points to the appropriate stub in S-EL1.
* Load system registers for entry to S-EL1.
*/
ldp x2, x3, [x4, #16]
mrs x4, scr_el3
mrs x5, spsr_el3
mrs x6, elr_el3
mrs x7, sctlr_el1
mrs x8, esr_el3
/* Mask all interrupts and set AArch32 Supervisor mode */
movz w8, SPSR_MODE32(MODE32_svc, SPSR_T_ARM, SPSR_E_LITTLE, SPSR_AIF_MASK)
/* Preserve system registers in the workaround context */
stp x4, x5, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD0]
stp x6, x7, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD2]
stp x8, x30, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD4]
/* Switch EL3 exception vectors while the workaround is executing. */
adr x9, workaround_bpiall_vbar1_runtime_exceptions
/* Setup SCTLR_EL1 with MMU off and I$ on */
ldr x10, stub_sel1_sctlr
/* Land at the S-EL1 workaround stub */
adr x11, aarch32_stub
/*
* Setting SCR_EL3 to all zeroes means that the NS, RW
* and SMD bits are configured as expected.
*/
msr scr_el3, xzr
/*
* Reload system registers with the crafted values
* in preparation for entry in S-EL1.
*/
msr spsr_el3, x0
msr vbar_el3, x1
msr sctlr_el1, x2
msr elr_el3, x3
msr spsr_el3, x8
msr vbar_el3, x9
msr sctlr_el1, x10
msr elr_el3, x11
eret
.endm
@ -91,76 +99,31 @@ vector_base workaround_bpiall_vbar0_runtime_exceptions
*/
vector_entry workaround_bpiall_vbar0_sync_exception_sp_el0
b sync_exception_sp_el0
nop /* to force 8 byte alignment for the following stub */
/*
* Since each vector table entry is 128 bytes, we can store the
* stub context in the unused space to minimize memory footprint.
*/
aarch32_stub_smc:
.word EMIT_BPIALL
.word EMIT_MOV_R0_IMM(1)
.word EMIT_SMC
aarch32_stub_ctx_smc:
/* Mask all interrupts and set AArch32 Supervisor mode */
.quad (SPSR_AIF_MASK << SPSR_AIF_SHIFT | \
SPSR_M_AARCH32 << SPSR_M_SHIFT | \
MODE32_svc << MODE32_SHIFT)
/*
* VBAR_EL3 points to vbar1 which is the vector table
* used while the workaround is executing.
*/
.quad workaround_bpiall_vbar1_runtime_exceptions
/* Setup SCTLR_EL1 with MMU off and I$ on */
stub_sel1_sctlr:
.quad SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
/* ELR_EL3 is setup to point to the sync exception stub in AArch32 */
.quad aarch32_stub_smc
aarch32_stub:
.word EMIT_BPIALL
.word EMIT_SMC
check_vector_size workaround_bpiall_vbar0_sync_exception_sp_el0
vector_entry workaround_bpiall_vbar0_irq_sp_el0
b irq_sp_el0
aarch32_stub_irq:
.word EMIT_BPIALL
.word EMIT_MOV_R0_IMM(2)
.word EMIT_SMC
aarch32_stub_ctx_irq:
.quad (SPSR_AIF_MASK << SPSR_AIF_SHIFT | \
SPSR_M_AARCH32 << SPSR_M_SHIFT | \
MODE32_svc << MODE32_SHIFT)
.quad workaround_bpiall_vbar1_runtime_exceptions
.quad SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
.quad aarch32_stub_irq
check_vector_size workaround_bpiall_vbar0_irq_sp_el0
vector_entry workaround_bpiall_vbar0_fiq_sp_el0
b fiq_sp_el0
aarch32_stub_fiq:
.word EMIT_BPIALL
.word EMIT_MOV_R0_IMM(4)
.word EMIT_SMC
aarch32_stub_ctx_fiq:
.quad (SPSR_AIF_MASK << SPSR_AIF_SHIFT | \
SPSR_M_AARCH32 << SPSR_M_SHIFT | \
MODE32_svc << MODE32_SHIFT)
.quad workaround_bpiall_vbar1_runtime_exceptions
.quad SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
.quad aarch32_stub_fiq
check_vector_size workaround_bpiall_vbar0_fiq_sp_el0
vector_entry workaround_bpiall_vbar0_serror_sp_el0
b serror_sp_el0
aarch32_stub_serror:
.word EMIT_BPIALL
.word EMIT_MOV_R0_IMM(8)
.word EMIT_SMC
aarch32_stub_ctx_serror:
.quad (SPSR_AIF_MASK << SPSR_AIF_SHIFT | \
SPSR_M_AARCH32 << SPSR_M_SHIFT | \
MODE32_svc << MODE32_SHIFT)
.quad workaround_bpiall_vbar1_runtime_exceptions
.quad SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
.quad aarch32_stub_serror
check_vector_size workaround_bpiall_vbar0_serror_sp_el0
/* ---------------------------------------------------------------------
@ -188,19 +151,19 @@ vector_entry workaround_bpiall_vbar0_serror_sp_elx
* ---------------------------------------------------------------------
*/
vector_entry workaround_bpiall_vbar0_sync_exception_aarch64
enter_workaround aarch32_stub_ctx_smc
enter_workaround 1
check_vector_size workaround_bpiall_vbar0_sync_exception_aarch64
vector_entry workaround_bpiall_vbar0_irq_aarch64
enter_workaround aarch32_stub_ctx_irq
enter_workaround 2
check_vector_size workaround_bpiall_vbar0_irq_aarch64
vector_entry workaround_bpiall_vbar0_fiq_aarch64
enter_workaround aarch32_stub_ctx_fiq
enter_workaround 4
check_vector_size workaround_bpiall_vbar0_fiq_aarch64
vector_entry workaround_bpiall_vbar0_serror_aarch64
enter_workaround aarch32_stub_ctx_serror
enter_workaround 8
check_vector_size workaround_bpiall_vbar0_serror_aarch64
/* ---------------------------------------------------------------------
@ -208,19 +171,19 @@ vector_entry workaround_bpiall_vbar0_serror_aarch64
* ---------------------------------------------------------------------
*/
vector_entry workaround_bpiall_vbar0_sync_exception_aarch32
enter_workaround aarch32_stub_ctx_smc
enter_workaround 1
check_vector_size workaround_bpiall_vbar0_sync_exception_aarch32
vector_entry workaround_bpiall_vbar0_irq_aarch32
enter_workaround aarch32_stub_ctx_irq
enter_workaround 2
check_vector_size workaround_bpiall_vbar0_irq_aarch32
vector_entry workaround_bpiall_vbar0_fiq_aarch32
enter_workaround aarch32_stub_ctx_fiq
enter_workaround 4
check_vector_size workaround_bpiall_vbar0_fiq_aarch32
vector_entry workaround_bpiall_vbar0_serror_aarch32
enter_workaround aarch32_stub_ctx_serror
enter_workaround 8
check_vector_size workaround_bpiall_vbar0_serror_aarch32
/* ---------------------------------------------------------------------
@ -297,31 +260,33 @@ vector_entry workaround_bpiall_vbar1_serror_aarch64
* ---------------------------------------------------------------------
*/
vector_entry workaround_bpiall_vbar1_sync_exception_aarch32
/* Restore register state from the workaround context */
ldp x2, x3, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD0]
ldp x4, x5, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD2]
ldp x6, x30, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD4]
/*
* w2 indicates which SEL1 stub was run and thus which original vector was used
* w3-w6 contain saved system register state (esr_el3 in w3)
* Restore LR and ELR_EL3 register state from the GP regs context
*/
ldp x30, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
/* Apply the restored system register state */
msr scr_el3, x2
msr spsr_el3, x3
msr elr_el3, x4
msr sctlr_el1, x5
msr esr_el3, x6
msr esr_el3, x3
msr spsr_el3, x4
msr scr_el3, x5
msr sctlr_el1, x6
msr elr_el3, x7
/*
* Workaround is complete, so swap VBAR_EL3 to point
* to workaround entry table in preparation for subsequent
* Sync/IRQ/FIQ/SError exceptions.
*/
adr x2, workaround_bpiall_vbar0_runtime_exceptions
msr vbar_el3, x2
adr x0, workaround_bpiall_vbar0_runtime_exceptions
msr vbar_el3, x0
/*
* Restore all GP regs except x0 and x1. The value in x0
* Restore all GP regs except x2 and x3 (esr). The value in x2
* indicates the type of the original exception.
*/
ldp x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
ldp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
ldp x4, x5, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X4]
ldp x6, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X6]
ldp x8, x9, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X8]
@ -336,37 +301,38 @@ vector_entry workaround_bpiall_vbar1_sync_exception_aarch32
ldp x26, x27, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X26]
ldp x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
/*
* Each of these handlers will first restore x0 and x1 from
* the context and the branch to the common implementation for
* each of the exception types.
*/
tbnz x0, #1, workaround_bpiall_vbar1_irq
tbnz x0, #2, workaround_bpiall_vbar1_fiq
tbnz x0, #3, workaround_bpiall_vbar1_serror
/* Fallthrough case for Sync exception */
ldp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
/* Fast path Sync exceptions. Static predictor will fall through. */
tbz w2, #0, workaround_not_sync
ldp x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
b sync_exception_aarch64
check_vector_size workaround_bpiall_vbar1_sync_exception_aarch32
vector_entry workaround_bpiall_vbar1_irq_aarch32
b report_unhandled_interrupt
workaround_bpiall_vbar1_irq:
ldp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
/*
* Post-workaround fan-out for non-sync exceptions
*/
workaround_not_sync:
tbnz w2, #3, workaround_bpiall_vbar1_serror
tbnz w2, #2, workaround_bpiall_vbar1_fiq
/* IRQ */
ldp x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
b irq_aarch64
workaround_bpiall_vbar1_fiq:
ldp x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
b fiq_aarch64
workaround_bpiall_vbar1_serror:
ldp x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
b serror_aarch64
check_vector_size workaround_bpiall_vbar1_irq_aarch32
vector_entry workaround_bpiall_vbar1_fiq_aarch32
b report_unhandled_interrupt
workaround_bpiall_vbar1_fiq:
ldp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
b fiq_aarch64
check_vector_size workaround_bpiall_vbar1_fiq_aarch32
vector_entry workaround_bpiall_vbar1_serror_aarch32
b report_unhandled_exception
workaround_bpiall_vbar1_serror:
ldp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
b serror_aarch64
check_vector_size workaround_bpiall_vbar1_serror_aarch32