arm_fpga: Add support for topology self-discovery

As secondary cores show up, they populate an array to
announce themselves so plat_core_pos_by_mpidr() can
return an invalid COREID code for any non-existing
MPIDR that it is queried about.

The Power Domain Tree Description is populated with
a topology based on the maximum harcoded values.

Signed-off-by: Javier Almansa Sobrino <javier.almansasobrino@arm.com>
Change-Id: I8fd64761a2296714ce0f37c46544f3e6f13b5f61
This commit is contained in:
Javier Almansa Sobrino 2020-05-13 14:09:58 +01:00
parent 1056ddce76
commit 727bbf680d
6 changed files with 155 additions and 56 deletions

View File

@ -7,6 +7,8 @@
#include <arch.h>
#include <asm_macros.S>
#include <common/bl_common.h>
#include "../fpga_private.h"
#include <platform_def.h>
.globl plat_get_my_entrypoint
@ -14,10 +16,10 @@
.globl plat_is_my_cpu_primary
.globl platform_mem_init
.globl plat_my_core_pos
.globl plat_fpga_calc_core_pos
.globl plat_crash_console_init
.globl plat_crash_console_putc
.globl plat_crash_console_flush
.globl plat_fpga_calc_core_pos
/* -----------------------------------------------------------------------
* Indicate a cold boot for every CPU - warm boot is unsupported for the
@ -34,23 +36,59 @@ endfunc plat_get_my_entrypoint
* -----------------------------------------------------------------------
*/
func plat_secondary_cold_boot_setup
/*
* Wait for the primary processor to initialise the .BSS segment
* to avoid a race condition that would erase fpga_valid_mpids
* if it is populated before the C runtime is ready.
*
* We cannot use the current spin-lock implementation until the
* runtime is up and we should not rely on sevl/wfe instructions as
* it is optional whether they are implemented or not, so we use
* a global variable as lock and wait for the primary processor to
* finish the C runtime bring-up.
*/
ldr w0, =C_RUNTIME_READY_KEY
adrp x1, secondary_core_spinlock
add x1, x1, :lo12:secondary_core_spinlock
1:
wfe
ldr w2, [x1]
cmp w2, w0
b.ne 1b
/* Prevent reordering of the store into fpga_valid_mpids below */
dmb ish
mov x10, x30
bl plat_my_core_pos
mov x30, x10
adrp x4, fpga_valid_mpids
add x4, x4, :lo12:fpga_valid_mpids
mov x5, #VALID_MPID
strb w5, [x4, x0]
/*
* Poll the CPU's hold entry until it indicates to jump
* to the entrypoint address.
*/
bl plat_my_core_pos
lsl x0, x0, #PLAT_FPGA_HOLD_ENTRY_SHIFT
ldr x1, =hold_base
ldr x2, =fpga_sec_entrypoint
adrp x1, hold_base
add x1, x1, :lo12:hold_base
poll_hold_entry:
ldr x3, [x1, x0]
ldr x3, [x1, x0, LSL #PLAT_FPGA_HOLD_ENTRY_SHIFT]
cmp x3, #PLAT_FPGA_HOLD_STATE_GO
b.ne 1f
adrp x2, fpga_sec_entrypoint
add x2, x2, :lo12:fpga_sec_entrypoint
ldr x3, [x2]
br x3
1:
wfe
b poll_hold_entry
endfunc plat_secondary_cold_boot_setup
/* -----------------------------------------------------------------------
@ -73,12 +111,16 @@ func platform_mem_init
endfunc platform_mem_init
func plat_my_core_pos
ldr x1, =(MPID_MASK & ~(MPIDR_AFFLVL_MASK << MPIDR_AFF3_SHIFT))
mrs x0, mpidr_el1
and x0, x0, x1
b plat_fpga_calc_core_pos
endfunc plat_my_core_pos
/* -----------------------------------------------------------------------
* unsigned int plat_fpga_calc_core_pos(u_register_t mpidr)
* unsigned int plat_fpga_calc_core_pos (uint32_t mpid)
* Clobber registers: x0 to x5
* -----------------------------------------------------------------------
*/
func plat_fpga_calc_core_pos
@ -88,6 +130,7 @@ func plat_fpga_calc_core_pos
*
* If not set, shift MPIDR to left to make it look as if in a
* multi-threaded implementation.
*
*/
tst x0, #MPIDR_MT_MASK
lsl x3, x0, #MPIDR_AFFINITY_BITS
@ -98,11 +141,13 @@ func plat_fpga_calc_core_pos
ubfx x1, x3, #MPIDR_AFF1_SHIFT, #MPIDR_AFFINITY_BITS
ubfx x2, x3, #MPIDR_AFF2_SHIFT, #MPIDR_AFFINITY_BITS
/* Compute linear position */
mov x4, #FPGA_MAX_CPUS_PER_CLUSTER
madd x1, x2, x4, x1
mov x5, #FPGA_MAX_PE_PER_CPU
/* Compute linear position */
madd x1, x2, x4, x1
madd x0, x1, x5, x0
ret
endfunc plat_fpga_calc_core_pos

View File

@ -7,23 +7,23 @@
#include <assert.h>
#include <common/fdt_wrappers.h>
#include <drivers/delay_timer.h>
#include <drivers/generic_delay_timer.h>
#include <lib/mmio.h>
#include <libfdt.h>
#include "fpga_private.h"
#include <plat/common/platform.h>
#include <platform_def.h>
#include "fpga_private.h"
static entry_point_info_t bl33_image_ep_info;
volatile uint32_t secondary_core_spinlock;
uintptr_t plat_get_ns_image_entrypoint(void)
{
#ifdef PRELOADED_BL33_BASE
return PRELOADED_BL33_BASE;
#else
return 0;
return 0ULL;
#endif
}
@ -35,6 +35,17 @@ uint32_t fpga_get_spsr_for_bl33_entry(void)
void bl31_early_platform_setup2(u_register_t arg0, u_register_t arg1,
u_register_t arg2, u_register_t arg3)
{
/* Add this core to the VALID mpids list */
fpga_valid_mpids[plat_my_core_pos()] = VALID_MPID;
/*
* Notify the secondary CPUs that the C runtime is ready
* so they can announce themselves.
*/
secondary_core_spinlock = C_RUNTIME_READY_KEY;
dsbish();
sev();
fpga_console_init();
bl33_image_ep_info.pc = plat_get_ns_image_entrypoint();
@ -54,11 +65,37 @@ void bl31_plat_arch_setup(void)
void bl31_platform_setup(void)
{
/* Initialize the GIC driver, cpu and distributor interfaces */
plat_fpga_gic_init();
/* Write frequency to CNTCRL and initialize timer */
generic_delay_timer_init();
/*
* Before doing anything else, wait for some time to ensure that
* the secondary CPUs have populated the fpga_valid_mpids array.
* As the number of secondary cores is unknown and can even be 0,
* it is not possible to rely on any signal from them, so use a
* delay instead.
*/
mdelay(5);
/*
* On the event of a cold reset issued by, for instance, a reset pin
* assertion, we cannot guarantee memory to be initialized to zero.
* In such scenario, if the secondary cores reached
* plat_secondary_cold_boot_setup before the primary one initialized
* .BSS, we could end up having a race condition if the spinlock
* was not cleared before.
*
* Similarly, if there were a reset before the spinlock had been
* cleared, the secondary cores would find the lock opened before
* .BSS is cleared, causing another race condition.
*
* So clean the spinlock as soon as we think it is safe to reduce the
* chances of any race condition on a reset.
*/
secondary_core_spinlock = 0UL;
/* Initialize the GIC driver, cpu and distributor interfaces */
plat_fpga_gic_init();
}
entry_point_info_t *bl31_plat_get_next_image_ep_info(uint32_t type)

View File

@ -18,6 +18,7 @@
* that are present will still be indexed appropriately regardless of any empty
* entries in the array used to represent the topology.
*/
#define FPGA_MAX_CLUSTER_COUNT 4
#define FPGA_MAX_CPUS_PER_CLUSTER 8
#define FPGA_MAX_PE_PER_CPU 4

View File

@ -7,12 +7,23 @@
#ifndef FPGA_PRIVATE_H
#define FPGA_PRIVATE_H
unsigned int plat_fpga_calc_core_pos(u_register_t mpidr);
#include "../fpga_def.h"
#include <platform_def.h>
#define C_RUNTIME_READY_KEY (0xaa55aa55)
#define VALID_MPID (1U)
#ifndef __ASSEMBLER__
extern unsigned char fpga_valid_mpids[PLATFORM_CORE_COUNT];
void fpga_console_init(void);
void plat_fpga_gic_init(void);
void fpga_pwr_gic_on_finish(void);
void fpga_pwr_gic_off(void);
unsigned int plat_fpga_calc_core_pos(uint32_t mpid);
#endif
#endif /* __ASSEMBLER__ */
#endif /* FPGA_PRIVATE_H */

View File

@ -5,15 +5,20 @@
*/
#include <arch_helpers.h>
#include <common/debug.h>
#include <lib/spinlock.h>
#include "fpga_private.h"
#include <plat/common/platform.h>
#include <platform_def.h>
static unsigned char fpga_power_domain_tree_desc[FPGA_MAX_CLUSTER_COUNT + 2];
unsigned char fpga_power_domain_tree_desc[FPGA_MAX_CLUSTER_COUNT + 2];
unsigned char fpga_valid_mpids[PLATFORM_CORE_COUNT];
const unsigned char *plat_get_power_domain_tree_desc(void)
{
int i;
unsigned int i;
/*
* The highest level is the system level. The next level is constituted
* by clusters and then cores in clusters.
@ -21,12 +26,26 @@ const unsigned char *plat_get_power_domain_tree_desc(void)
* This description of the power domain topology is aligned with the CPU
* indices returned by the plat_core_pos_by_mpidr() and plat_my_core_pos()
* APIs.
*
* A description of the topology tree can be found at
* https://trustedfirmware-a.readthedocs.io/en/latest/design/psci-pd-tree.html#design
*/
fpga_power_domain_tree_desc[0] = 1;
fpga_power_domain_tree_desc[1] = FPGA_MAX_CLUSTER_COUNT;
for (i = 0; i < FPGA_MAX_CLUSTER_COUNT; i++) {
fpga_power_domain_tree_desc[i + 2] = FPGA_MAX_CPUS_PER_CLUSTER * FPGA_MAX_PE_PER_CPU;
if (fpga_power_domain_tree_desc[0] == 0U) {
/*
* As fpga_power_domain_tree_desc[0] == 0, assume that the
* Power Domain Topology Tree has not been initialized, so
* perform the initialization here.
*/
fpga_power_domain_tree_desc[0] = 1U;
fpga_power_domain_tree_desc[1] = FPGA_MAX_CLUSTER_COUNT;
for (i = 0U; i < FPGA_MAX_CLUSTER_COUNT; i++) {
fpga_power_domain_tree_desc[2 + i] =
(FPGA_MAX_CPUS_PER_CLUSTER *
FPGA_MAX_PE_PER_CPU);
}
}
return fpga_power_domain_tree_desc;
@ -34,40 +53,25 @@ const unsigned char *plat_get_power_domain_tree_desc(void)
int plat_core_pos_by_mpidr(u_register_t mpidr)
{
unsigned int cluster_id, cpu_id, thread_id;
unsigned int core_pos;
/*
* The image running on the FPGA may or may not implement
* multithreading, and it shouldn't be assumed this is consistent
* across all CPUs.
* This ensures that any passed mpidr values reflect the status of the
* primary CPU's MT bit.
*/
mpidr &= (MPID_MASK & ~(MPIDR_AFFLVL_MASK << MPIDR_AFF3_SHIFT));
mpidr |= (read_mpidr_el1() & MPIDR_MT_MASK);
mpidr &= MPID_MASK;
if (mpidr & MPIDR_MT_MASK) {
thread_id = MPIDR_AFFLVL0_VAL(mpidr);
cpu_id = MPIDR_AFFLVL1_VAL(mpidr);
cluster_id = MPIDR_AFFLVL2_VAL(mpidr);
} else {
thread_id = 0;
cpu_id = MPIDR_AFFLVL0_VAL(mpidr);
cluster_id = MPIDR_AFFLVL1_VAL(mpidr);
if ((MPIDR_AFFLVL2_VAL(mpidr) >= FPGA_MAX_CLUSTER_COUNT) ||
(MPIDR_AFFLVL1_VAL(mpidr) >= FPGA_MAX_CPUS_PER_CLUSTER) ||
(MPIDR_AFFLVL0_VAL(mpidr) >= FPGA_MAX_PE_PER_CPU)) {
ERROR ("Invalid mpidr: 0x%08x\n", (uint32_t)mpidr);
panic();
}
if (cluster_id >= FPGA_MAX_CLUSTER_COUNT) {
/* Calculate the core position, based on the maximum topology. */
core_pos = plat_fpga_calc_core_pos(mpidr);
/* Check whether this core is actually present. */
if (fpga_valid_mpids[core_pos] != VALID_MPID) {
return -1;
}
if (cpu_id >= FPGA_MAX_CPUS_PER_CLUSTER) {
return -1;
}
if (thread_id >= FPGA_MAX_PE_PER_CPU) {
return -1;
}
/* Calculate the correct core, catering for multi-threaded images */
return (int) plat_fpga_calc_core_pos(mpidr);
return core_pos;
}

View File

@ -21,11 +21,12 @@
#define CACHE_WRITEBACK_SHIFT U(6)
#define CACHE_WRITEBACK_GRANULE (U(1) << CACHE_WRITEBACK_SHIFT)
#define PLATFORM_CORE_COUNT \
(FPGA_MAX_CLUSTER_COUNT * FPGA_MAX_CPUS_PER_CLUSTER * FPGA_MAX_PE_PER_CPU)
#define PLATFORM_CORE_COUNT \
(FPGA_MAX_CLUSTER_COUNT * \
FPGA_MAX_CPUS_PER_CLUSTER * \
FPGA_MAX_PE_PER_CPU)
#define PLAT_NUM_PWR_DOMAINS (FPGA_MAX_CLUSTER_COUNT + \
PLATFORM_CORE_COUNT) + 1
#define PLAT_NUM_PWR_DOMAINS (FPGA_MAX_CLUSTER_COUNT + PLATFORM_CORE_COUNT + 1)
#if !ENABLE_PIE
#define BL31_BASE UL(0x80000000)