From 54035fc4672aab046f3cf5288ce9870613bd713d Mon Sep 17 00:00:00 2001 From: Sandrine Bailleux Date: Wed, 13 Jan 2016 14:57:38 +0000 Subject: [PATCH] Disable non-temporal hint on Cortex-A53/57 The LDNP/STNP instructions as implemented on Cortex-A53 and Cortex-A57 do not behave in a way most programmers expect, and will most probably result in a significant speed degradation to any code that employs them. The ARMv8-A architecture (see Document ARM DDI 0487A.h, section D3.4.3) allows cores to ignore the non-temporal hint and treat LDNP/STNP as LDP/STP instead. This patch introduces 2 new build flags: A53_DISABLE_NON_TEMPORAL_HINT and A57_DISABLE_NON_TEMPORAL_HINT to enforce this behaviour on Cortex-A53 and Cortex-A57. They are enabled by default. The string printed in debug builds when a specific CPU errata workaround is compiled in but skipped at runtime has been generalised, so that it can be reused for the non-temporal hint use case as well. Change-Id: I3e354f4797fd5d3959872a678e160322b13867a1 --- Makefile | 8 +++--- docs/cpu-specific-build-macros.md | 19 +++++++++++--- include/lib/cpus/aarch64/cortex_a57.h | 7 +++--- lib/cpus/aarch64/cortex_a53.S | 33 ++++++++++++++---------- lib/cpus/aarch64/cortex_a57.S | 36 ++++++++++++++++++++++++++- lib/cpus/aarch64/cpu_helpers.S | 17 +++++++------ lib/cpus/cpu-ops.mk | 18 +++++++++++++- 7 files changed, 105 insertions(+), 33 deletions(-) diff --git a/Makefile b/Makefile index add2e9f9a..ac688ba88 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # -# Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved. +# Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -294,9 +294,9 @@ ifneq (${ENABLE_PLAT_COMPAT}, 0) include plat/compat/plat_compat.mk endif -# Include the CPU specific operations makefile. By default all CPU errata -# workarounds and CPU specific optimisations are disabled. This can be -# overridden by the platform. +# Include the CPU specific operations makefile, which provides default +# values for all CPU errata workarounds and CPU specific optimisations. +# This can be overridden by the platform. include lib/cpus/cpu-ops.mk diff --git a/docs/cpu-specific-build-macros.md b/docs/cpu-specific-build-macros.md index d9b7108cb..e7185aa4d 100644 --- a/docs/cpu-specific-build-macros.md +++ b/docs/cpu-specific-build-macros.md @@ -26,8 +26,8 @@ by ARM. The errata workarounds are implemented for a particular revision or a set of processor revisions. This is checked by reset handler at runtime. Each errata workaround is identified by its `ID` as specified in the processor's errata notice document. The format of the define used to enable/disable the -errata is `ERRATA__` where the `Processor name` -is either `A57` for the `Cortex_A57` CPU or `A53` for `Cortex_A53` CPU. +errata workaround is `ERRATA__`, where the `Processor name` +is for example `A57` for the `Cortex_A57` CPU. All workarounds are disabled by default. The platform is reponsible for enabling these workarounds according to its requirement by defining the @@ -74,6 +74,19 @@ architecture that can be enabled by the platform as desired. sequence. Each Cortex-A57 based platform must make its own decision on whether to use the optimization. +* `A53_DISABLE_NON_TEMPORAL_HINT`: This flag disables the cache non-temporal + hint. The LDNP/STNP instructions as implemented on Cortex-A53 do not behave + in a way most programmers expect, and will most probably result in a + significant speed degradation to any code that employs them. The ARMv8-A + architecture (see ARM DDI 0487A.h, section D3.4.3) allows cores to ignore + the non-temporal hint and treat LDNP/STNP as LDP/STP instead. Enabling this + flag enforces this behaviour. This needs to be enabled only for revisions + <= r0p3 of the CPU and is enabled by default. + +* `A57_DISABLE_NON_TEMPORAL_HINT`: This flag has the same behaviour as + `A53_DISABLE_NON_TEMPORAL_HINT` but for Cortex-A57. This needs to be + enabled only for revisions <= r1p2 of the CPU and is enabled by default. + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -_Copyright (c) 2014, ARM Limited and Contributors. All rights reserved._ +_Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved._ diff --git a/include/lib/cpus/aarch64/cortex_a57.h b/include/lib/cpus/aarch64/cortex_a57.h index c81259c89..c512129a1 100644 --- a/include/lib/cpus/aarch64/cortex_a57.h +++ b/include/lib/cpus/aarch64/cortex_a57.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -61,8 +61,9 @@ ******************************************************************************/ #define CPUACTLR_EL1 S3_1_C15_C2_0 /* Instruction def. */ -#define CPUACTLR_NO_ALLOC_WBWA (1 << 49) -#define CPUACTLR_DCC_AS_DCCI (1 << 44) +#define CPUACTLR_DIS_OVERREAD (1 << 52) +#define CPUACTLR_NO_ALLOC_WBWA (1 << 49) +#define CPUACTLR_DCC_AS_DCCI (1 << 44) /******************************************************************************* * L2 Control register specific definitions. diff --git a/lib/cpus/aarch64/cortex_a53.S b/lib/cpus/aarch64/cortex_a53.S index e4b94e809..00ceadb4f 100644 --- a/lib/cpus/aarch64/cortex_a53.S +++ b/lib/cpus/aarch64/cortex_a53.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -86,33 +86,40 @@ apply_826319: ret endfunc errata_a53_826319_wa - /* -------------------------------------------------- - * Errata Workaround for Cortex A53 Errata #836870. - * This applies only to revision <= r0p3 of Cortex A53. - * From r0p4 and onwards, this errata is enabled by - * default. + /* --------------------------------------------------------------------- + * Disable the cache non-temporal hint. + * + * This ignores the Transient allocation hint in the MAIR and treats + * allocations the same as non-transient allocation types. As a result, + * the LDNP and STNP instructions in AArch64 behave the same as the + * equivalent LDP and STP instructions. + * + * This is relevant only for revisions <= r0p3 of Cortex-A53. + * From r0p4 and onwards, the bit to disable the hint is enabled by + * default at reset. + * * Inputs: * x0: variant[4:7] and revision[0:3] of current cpu. * Clobbers : x0 - x5 - * -------------------------------------------------- + * --------------------------------------------------------------------- */ -func errata_a53_836870_wa +func a53_disable_non_temporal_hint /* * Compare x0 against revision r0p3 */ cmp x0, #3 - b.ls apply_836870 + b.ls disable_hint #if DEBUG b print_revision_warning #else ret #endif -apply_836870: +disable_hint: mrs x1, CPUACTLR_EL1 orr x1, x1, #CPUACTLR_DTAH msr CPUACTLR_EL1, x1 ret -endfunc errata_a53_836870_wa +endfunc a53_disable_non_temporal_hint /* ------------------------------------------------- * The CPU Ops reset function for Cortex-A53. @@ -138,9 +145,9 @@ func cortex_a53_reset_func bl errata_a53_826319_wa #endif -#if ERRATA_A53_836870 +#if ERRATA_A53_836870 || A53_DISABLE_NON_TEMPORAL_HINT mov x0, x15 - bl errata_a53_836870_wa + bl a53_disable_non_temporal_hint #endif /* --------------------------------------------- diff --git a/lib/cpus/aarch64/cortex_a57.S b/lib/cpus/aarch64/cortex_a57.S index 05799d617..8bcb5ddb9 100644 --- a/lib/cpus/aarch64/cortex_a57.S +++ b/lib/cpus/aarch64/cortex_a57.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -153,6 +153,35 @@ skip_813420: ret endfunc errata_a57_813420_wa + /* -------------------------------------------------------------------- + * Disable the over-read from the LDNP instruction. + * + * This applies to all revisions <= r1p2. The performance degradation + * observed with LDNP/STNP has been fixed on r1p3 and onwards. + * + * Inputs: + * x0: variant[4:7] and revision[0:3] of current cpu. + * Clobbers : x0 - x5, x30 + * --------------------------------------------------------------------- + */ +func a57_disable_ldnp_overread + /* + * Compare x0 against revision r1p2 + */ + cmp x0, #0x12 + b.ls disable_hint +#if DEBUG + b print_revision_warning +#else + ret +#endif +disable_hint: + mrs x1, CPUACTLR_EL1 + orr x1, x1, #CPUACTLR_DIS_OVERREAD + msr CPUACTLR_EL1, x1 + ret +endfunc a57_disable_ldnp_overread + /* ------------------------------------------------- * The CPU Ops reset function for Cortex-A57. * Clobbers: x0-x5, x15, x19, x30 @@ -181,6 +210,11 @@ func cortex_a57_reset_func bl errata_a57_813420_wa #endif +#if A57_DISABLE_NON_TEMPORAL_HINT + mov x0, x15 + bl a57_disable_ldnp_overread +#endif + /* --------------------------------------------- * As a bare minimum enable the SMP bit if it is * not already set. diff --git a/lib/cpus/aarch64/cpu_helpers.S b/lib/cpus/aarch64/cpu_helpers.S index e8a139299..e41d95ba3 100644 --- a/lib/cpus/aarch64/cpu_helpers.S +++ b/lib/cpus/aarch64/cpu_helpers.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -205,16 +205,17 @@ error_exit: endfunc get_cpu_ops_ptr #if DEBUG - /* - * This function prints a warning message to the crash console - * if the CPU revision/part number does not match the errata - * workaround enabled in the build. - * Clobber: x30, x0 - x5 - */ .section .rodata.rev_warn_str, "aS" rev_warn_str: - .asciz "Warning: Skipping Errata workaround for non matching CPU revision number.\n" + .asciz "Warning: Skipping CPU specific reset operation for non-matching CPU revision number.\n" + /* + * This function prints the above warning message to the crash console. + * It should be called when a CPU specific operation is enabled in the + * build but doesn't apply to this CPU revision/part number. + * + * Clobber: x30, x0 - x5 + */ .globl print_revision_warning func print_revision_warning mov x5, x30 diff --git a/lib/cpus/cpu-ops.mk b/lib/cpus/cpu-ops.mk index a872360e4..a3a08e155 100644 --- a/lib/cpus/cpu-ops.mk +++ b/lib/cpus/cpu-ops.mk @@ -1,5 +1,5 @@ # -# Copyright (c) 2014, ARM Limited and Contributors. All rights reserved. +# Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: @@ -32,10 +32,26 @@ # cluster is powered down. SKIP_A57_L1_FLUSH_PWR_DWN ?=0 +# Flag to disable the cache non-temporal hint. +# It is enabled by default. +A53_DISABLE_NON_TEMPORAL_HINT ?=1 + +# Flag to disable the cache non-temporal hint. +# It is enabled by default. +A57_DISABLE_NON_TEMPORAL_HINT ?=1 + # Process SKIP_A57_L1_FLUSH_PWR_DWN flag $(eval $(call assert_boolean,SKIP_A57_L1_FLUSH_PWR_DWN)) $(eval $(call add_define,SKIP_A57_L1_FLUSH_PWR_DWN)) +# Process A53_DISABLE_NON_TEMPORAL_HINT flag +$(eval $(call assert_boolean,A53_DISABLE_NON_TEMPORAL_HINT)) +$(eval $(call add_define,A53_DISABLE_NON_TEMPORAL_HINT)) + +# Process A57_DISABLE_NON_TEMPORAL_HINT flag +$(eval $(call assert_boolean,A57_DISABLE_NON_TEMPORAL_HINT)) +$(eval $(call add_define,A57_DISABLE_NON_TEMPORAL_HINT)) + # CPU Errata Build flags. These should be enabled by the # platform if the errata needs to be applied.