diff --git a/common/fdt_fixup.c b/common/fdt_fixup.c index 980e60d45..a1604e74f 100644 --- a/common/fdt_fixup.c +++ b/common/fdt_fixup.c @@ -377,3 +377,64 @@ int fdt_add_cpus_node(void *dtb, unsigned int afflv0, return offs; } + +/** + * fdt_adjust_gic_redist() - Adjust GICv3 redistributor size + * @dtb: Pointer to the DT blob in memory + * @nr_cores: Number of CPU cores on this system. + * @gicr_frame_size: Size of the GICR frame per core + * + * On a GICv3 compatible interrupt controller, the redistributor provides + * a number of 64k pages per each supported core. So with a dynamic topology, + * this size cannot be known upfront and thus can't be hardcoded into the DTB. + * + * Find the DT node describing the GICv3 interrupt controller, and adjust + * the size of the redistributor to match the number of actual cores on + * this system. + * A GICv4 compatible redistributor uses four 64K pages per core, whereas GICs + * without support for direct injection of virtual interrupts use two 64K pages. + * The @gicr_frame_size parameter should be 262144 and 131072, respectively. + * + * Return: 0 on success, negative error value otherwise. + */ +int fdt_adjust_gic_redist(void *dtb, unsigned int nr_cores, + unsigned int gicr_frame_size) +{ + int offset = fdt_node_offset_by_compatible(dtb, 0, "arm,gic-v3"); + uint64_t redist_size_64; + uint32_t redist_size_32; + void *val; + int parent; + int ac, sc; + + if (offset < 0) { + return offset; + } + + parent = fdt_parent_offset(dtb, offset); + if (parent < 0) { + return parent; + } + ac = fdt_address_cells(dtb, parent); + sc = fdt_size_cells(dtb, parent); + if (ac < 0 || sc < 0) { + return -EINVAL; + } + + if (sc == 1) { + redist_size_32 = cpu_to_fdt32(nr_cores * gicr_frame_size); + val = &redist_size_32; + } else { + redist_size_64 = cpu_to_fdt64(nr_cores * gicr_frame_size); + val = &redist_size_64; + } + + /* + * The redistributor is described in the second "reg" entry. + * So we have to skip one address and one size cell, then another + * address cell to get to the second size cell. + */ + return fdt_setprop_inplace_namelen_partial(dtb, offset, "reg", 3, + (ac + sc + ac) * 4, + val, sc * 4); +} diff --git a/docs/plat/arm/arm_fpga/index.rst b/docs/plat/arm/arm_fpga/index.rst new file mode 100644 index 000000000..5427c1dde --- /dev/null +++ b/docs/plat/arm/arm_fpga/index.rst @@ -0,0 +1,97 @@ +Arm FPGA Platform +================= + +This platform supports FPGA images used internally in Arm Ltd., for +testing and bringup of new cores. With that focus, peripheral support is +minimal: there is no mass storage or display output, for instance. Also +this port ignores any power management features of the platform. +Some interconnect setup is done internally by the platform, so the TF-A code +just needs to setup UART and GIC. + +The FPGA platform requires to pass on a DTB for the non-secure payload +(mostly Linux), so we let TF-A use information from the DTB for dynamic +configuration: the UART and GIC base addresses are read from there. + +As a result this port is a fairly generic BL31-only port, which can serve +as a template for a minimal new (and possibly DT-based) platform port. + +The aim of this port is to support as many FPGA images as possible with +a single build. Image specific data must be described in the DTB or should +be auto-detected at runtime. + +As the number and topology layout of the CPU cores differs significantly +across the various images, this is detected at runtime by BL31. +The /cpus node in the DT will be added and filled accordingly, as long as +it does not exist already. + +Platform-specific build options +------------------------------- + +- ``SUPPORT_UNKNOWN_MPID`` : Boolean option to allow unknown MPIDR registers. + Normally TF-A panics if it encounters a MPID value not matched to its + internal list, but for new or experimental cores this creates a lot of + churn. With this option, the code will fall back to some basic CPU support + code (only architectural system registers, and no errata). + Default value of this flag is 1. + +- ``PRELOADED_BL33_BASE`` : Physical address of the BL33 non-secure payload. + It must have been loaded into DRAM already, typically this is done by + the script that also loads BL31 and the DTB. + It defaults to 0x80080000, which is the traditional load address for an + arm64 Linux kernel. + +- ``FPGA_PRELOADED_DTB_BASE`` : Physical address of the flattened device + tree blob (DTB). This DT will be used by TF-A for dynamic configuration, + so it must describe at least the UART and a GICv3 interrupt controller. + The DT gets amended by the code, to potentially add a command line and + fill the CPU topology nodes. It will also be passed on to BL33, by + putting its address into the x0 register before jumping to the entry + point (following the Linux kernel boot protocol). + It defaults to 0x80070000, which is 64KB before the BL33 load address. + +- ``FPGA_PRELOADED_CMD_LINE`` : Physical address of the command line to + put into the devicetree blob. Due to the lack of a proper bootloader, + a command line can be put somewhere into memory, so that BL31 will + detect it and copy it into the DTB passed on to BL33. + To avoid random garbage, there needs to be a "CMD:" signature before the + actual command line. + Defaults to 0x1000, which is normally in the "ROM" space of the typical + FPGA image (which can be written by the FPGA payload uploader, but is + read-only to the CPU). The FPGA payload tool should be given a text file + containing the desired command line, prefixed by the "CMD:" signature. + +Building the TF-A image +----------------------- + + .. code:: shell + + make PLAT=arm_fgpa DEBUG=1 + + This will use the default load addresses as described above. When those + addresses need to differ for a certain setup, they can be passed on the + make command line: + + .. code:: shell + + make PLAT=arm_fgpa DEBUG=1 PRELOADED_BL33_BASE=0x80200000 FPGA_PRELOADED_DTB_BASE=0x80180000 bl31 + +Running the TF-A image +---------------------- + +After building TF-A, the actual TF-A code will be located in ``bl31.bin`` in +the build directory. +Additionally there is a ``bl31.axf`` ELF file, which contains BL31, as well +as some simple ROM trampoline code (required by the Arm FPGA boot flow) and +a generic DTB to support most of the FPGA images. This can be simply handed +over to the FPGA payload uploader, which will take care of loading the +components at their respective load addresses. In addition to this file +you need at least a BL33 payload (typically a Linux kernel image), optionally +a Linux initrd image file and possibly a command line: + + .. code:: shell + + fpga-run ... -m bl31.axf -l auto -m Image -l 0x80080000 -m initrd.gz -l 0x84000000 -m cmdline.txt -l 0x1000 + +-------------- + +*Copyright (c) 2020, Arm Limited. All rights reserved.* diff --git a/docs/plat/arm/index.rst b/docs/plat/arm/index.rst index 1afe475c6..9c2fcb102 100644 --- a/docs/plat/arm/index.rst +++ b/docs/plat/arm/index.rst @@ -9,6 +9,7 @@ Arm Development Platforms fvp/index fvp-ve/index tc0/index + arm_fpga/index arm-build-options This chapter holds documentation related to Arm's development platforms, diff --git a/drivers/arm/gic/v3/gicv3_helpers.c b/drivers/arm/gic/v3/gicv3_helpers.c index 09fa6786e..ff346f9df 100644 --- a/drivers/arm/gic/v3/gicv3_helpers.c +++ b/drivers/arm/gic/v3/gicv3_helpers.c @@ -326,3 +326,33 @@ unsigned int gicv3_secure_ppi_sgi_config_props(uintptr_t gicr_base, return ctlr_enable; } + +/** + * gicv3_rdistif_get_number_frames() - determine size of GICv3 GICR region + * @gicr_frame: base address of the GICR region to check + * + * This iterates over the GICR_TYPER registers of multiple GICR frames in + * a GICR region, to find the instance which has the LAST bit set. For most + * systems this corresponds to the number of cores handled by a redistributor, + * but there could be disabled cores among them. + * It assumes that each GICR region is fully accessible (till the LAST bit + * marks the end of the region). + * If a platform has multiple GICR regions, this function would need to be + * called multiple times, providing the respective GICR base address each time. + * + * Return: number of valid GICR frames (at least 1, up to PLATFORM_CORE_COUNT) + ******************************************************************************/ +unsigned int gicv3_rdistif_get_number_frames(const uintptr_t gicr_frame) +{ + uintptr_t rdistif_base = gicr_frame; + unsigned int count; + + for (count = 1; count < PLATFORM_CORE_COUNT; count++) { + if ((gicr_read_typer(rdistif_base) & TYPER_LAST_BIT) != 0U) { + break; + } + rdistif_base += (1U << GICR_PCPUBASE_SHIFT); + } + + return count; +} diff --git a/fdts/arm_fpga.dts b/fdts/arm_fpga.dts new file mode 100644 index 000000000..6a966fd85 --- /dev/null +++ b/fdts/arm_fpga.dts @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: (GPL-2.0 or BSD-3-Clause) +/* + * Copyright (c) 2020, Arm Limited. All rights reserved. + * + * Devicetree for the Arm Ltd. FPGA platform + * Number and kind of CPU cores differs from image to image, so the + * topology is auto-detected by BL31, and the /cpus node is created and + * populated accordingly at runtime. + */ + +#include + +/dts-v1/; + +/ { + model = "ARM FPGA"; + compatible = "arm,fpga", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + aliases { + serial0 = &dbg_uart; + }; + + chosen { + stdout-path = "serial0:38400n8"; + bootargs = "console=ttyAMA0,38400n8 earlycon"; + /* Allow to upload a generous 100MB initrd payload. */ + linux,initrd-start = <0x0 0x84000000>; + linux,initrd-end = <0x0 0x85400000>; + }; + + /* /cpus node will be added by BL31 at runtime. */ + + psci { + compatible = "arm,psci-0.2"; + method = "smc"; + }; + + timer { + compatible = "arm,armv8-timer"; + clock-frequency = <10000000>; + interrupts = , + , + , + ; + }; + + pmu { + compatible = "arm,armv8-pmuv3"; + interrupts = ; + }; + + /* This node will be removed at runtime on cores without SPE. */ + spe-pmu { + compatible = "arm,statistical-profiling-extension-v1"; + interrupts = ; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x0 0x80000000 0x0 0x80000000>, + <0x8 0x80000000 0x1 0x80000000>; + }; + + + bus_refclk: refclk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <100000000>; + clock-output-names = "apb_pclk"; + }; + + uartclk: baudclock { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <10000000>; + clock-output-names = "uartclk"; + }; + + dbg_uart: serial@7ff80000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0 0x7ff80000 0x0 0x00001000>; + interrupts = ; + clocks = <&uartclk>, <&bus_refclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + gic: interrupt-controller@30000000 { + compatible = "arm,gic-v3"; + #address-cells = <2>; + #interrupt-cells = <3>; + #size-cells = <2>; + ranges; + interrupt-controller; + reg = <0x0 0x30000000 0x0 0x00010000>, /* GICD */ + /* The GICR size will be adjusted at runtime to match the cores. */ + <0x0 0x30040000 0x0 0x00020000>; /* GICR for one core */ + interrupts = ; + }; +}; diff --git a/include/common/fdt_fixup.h b/include/common/fdt_fixup.h index 29d8b3aa0..2e9d49d53 100644 --- a/include/common/fdt_fixup.h +++ b/include/common/fdt_fixup.h @@ -13,5 +13,7 @@ int fdt_add_reserved_memory(void *dtb, const char *node_name, uintptr_t base, size_t size); int fdt_add_cpus_node(void *dtb, unsigned int afflv0, unsigned int afflv1, unsigned int afflv2); +int fdt_adjust_gic_redist(void *dtb, unsigned int nr_cores, + unsigned int gicr_frame_size); #endif /* FDT_FIXUP_H */ diff --git a/include/drivers/arm/gicv3.h b/include/drivers/arm/gicv3.h index 18d5b73e2..d8ac4cb33 100644 --- a/include/drivers/arm/gicv3.h +++ b/include/drivers/arm/gicv3.h @@ -488,6 +488,7 @@ void gicv3_distif_init(void); void gicv3_rdistif_init(unsigned int proc_num); void gicv3_rdistif_on(unsigned int proc_num); void gicv3_rdistif_off(unsigned int proc_num); +unsigned int gicv3_rdistif_get_number_frames(const uintptr_t gicr_frame); void gicv3_cpuif_enable(unsigned int proc_num); void gicv3_cpuif_disable(unsigned int proc_num); unsigned int gicv3_get_pending_interrupt_type(void); diff --git a/plat/arm/board/arm_fpga/build_axf.ld.S b/plat/arm/board/arm_fpga/build_axf.ld.S new file mode 100644 index 000000000..d7cd00882 --- /dev/null +++ b/plat/arm/board/arm_fpga/build_axf.ld.S @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020, ARM Limited. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + * Linker script for the Arm Ltd. FPGA boards to generate an ELF file that + * contains the ROM trampoline, BL31 and the DTB. + * + * This allows to pass just one file to the uploader tool, and automatically + * provides the correct load addresses. + */ + +#include + +OUTPUT_FORMAT("elf64-littleaarch64") +OUTPUT_ARCH(aarch64) + +INPUT(./bl31/bl31.elf) +INPUT(./rom_trampoline.o) + +TARGET(binary) +INPUT(./fdts/arm_fpga.dtb) + +ENTRY(_start) + +SECTIONS +{ + .rom (0x0): { + *rom_trampoline.o(.text*) + KEEP(*(.rom)) + } + + .bl31 (BL31_BASE): { + ASSERT(. == ALIGN(PAGE_SIZE), "BL31_BASE is not page aligned"); + *bl31.elf(.text* .data* .rodata* ro* .bss*) + *bl31.elf(.stack) + } + + .dtb (FPGA_PRELOADED_DTB_BASE): { + ASSERT(. == ALIGN(8), "DTB address is not 8-byte aligned"); + *arm_fpga.dtb + } + + /DISCARD/ : { *(.debug_*) } + /DISCARD/ : { *(.note*) } + /DISCARD/ : { *(.comment*) } +} diff --git a/plat/arm/board/arm_fpga/fpga_bl31_setup.c b/plat/arm/board/arm_fpga/fpga_bl31_setup.c index de6d9d5e3..a5f5ea0f3 100644 --- a/plat/arm/board/arm_fpga/fpga_bl31_setup.c +++ b/plat/arm/board/arm_fpga/fpga_bl31_setup.c @@ -9,8 +9,10 @@ #include #include +#include #include #include +#include #include #include "fpga_private.h" @@ -210,6 +212,26 @@ static void fpga_prepare_dtb(void) if (err < 0) { ERROR("Error %d creating the /cpus DT node\n", err); panic(); + } else { + unsigned int nr_cores = fpga_get_nr_gic_cores(); + + INFO("Adjusting GICR DT region to cover %u cores\n", + nr_cores); + err = fdt_adjust_gic_redist(fdt, nr_cores, + 1U << GICR_PCPUBASE_SHIFT); + if (err < 0) { + ERROR("Error %d fixing up GIC DT node\n", err); + } + } + } + + /* Check whether we support the SPE PMU. Remove the DT node if not. */ + if (!spe_supported()) { + int node = fdt_node_offset_by_compatible(fdt, 0, + "arm,statistical-profiling-extension-v1"); + + if (node >= 0) { + fdt_del_node(fdt, node); } } diff --git a/plat/arm/board/arm_fpga/fpga_gicv3.c b/plat/arm/board/arm_fpga/fpga_gicv3.c index 9fb5fa935..bfc116bef 100644 --- a/plat/arm/board/arm_fpga/fpga_gicv3.c +++ b/plat/arm/board/arm_fpga/fpga_gicv3.c @@ -77,3 +77,8 @@ void fpga_pwr_gic_off(void) gicv3_cpuif_disable(plat_my_core_pos()); gicv3_rdistif_off(plat_my_core_pos()); } + +unsigned int fpga_get_nr_gic_cores(void) +{ + return gicv3_rdistif_get_number_frames(fpga_gicv3_driver_data.gicr_base); +} diff --git a/plat/arm/board/arm_fpga/fpga_private.h b/plat/arm/board/arm_fpga/fpga_private.h index 47059d64a..1ca241f26 100644 --- a/plat/arm/board/arm_fpga/fpga_private.h +++ b/plat/arm/board/arm_fpga/fpga_private.h @@ -24,6 +24,7 @@ void plat_fpga_gic_init(void); void fpga_pwr_gic_on_finish(void); void fpga_pwr_gic_off(void); unsigned int plat_fpga_calc_core_pos(uint32_t mpid); +unsigned int fpga_get_nr_gic_cores(void); #endif /* __ASSEMBLER__ */ diff --git a/plat/arm/board/arm_fpga/platform.mk b/plat/arm/board/arm_fpga/platform.mk index 8f0ff0bb8..ab576b6ea 100644 --- a/plat/arm/board/arm_fpga/platform.mk +++ b/plat/arm/board/arm_fpga/platform.mk @@ -89,6 +89,8 @@ FPGA_GIC_SOURCES := ${GICV3_SOURCES} \ plat/common/plat_gicv3.c \ plat/arm/board/arm_fpga/fpga_gicv3.c +FDT_SOURCES := fdts/arm_fpga.dts + PLAT_INCLUDES := -Iplat/arm/board/arm_fpga/include PLAT_BL_COMMON_SOURCES := plat/arm/board/arm_fpga/${ARCH}/fpga_helpers.S @@ -106,4 +108,11 @@ BL31_SOURCES += common/fdt_wrappers.c \ ${FPGA_CPU_LIBS} \ ${FPGA_GIC_SOURCES} -all: bl31 +$(eval $(call MAKE_S,$(BUILD_PLAT),plat/arm/board/arm_fpga/rom_trampoline.S,31)) +$(eval $(call MAKE_LD,$(BUILD_PLAT)/build_axf.ld,plat/arm/board/arm_fpga/build_axf.ld.S,31)) + +bl31.axf: bl31 dtbs ${BUILD_PLAT}/rom_trampoline.o ${BUILD_PLAT}/build_axf.ld + $(ECHO) " LD $@" + $(Q)$(LD) -T ${BUILD_PLAT}/build_axf.ld -L ${BUILD_PLAT} --strip-debug -o ${BUILD_PLAT}/bl31.axf + +all: bl31.axf diff --git a/plat/arm/board/arm_fpga/rom_trampoline.S b/plat/arm/board/arm_fpga/rom_trampoline.S new file mode 100644 index 000000000..cd66c7927 --- /dev/null +++ b/plat/arm/board/arm_fpga/rom_trampoline.S @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2020, ARM Limited. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + * The Arm Ltd. FPGA images start execution at address 0x0, which is + * mapped at an (emulated) ROM image. The payload uploader can write to + * this memory, but write access by the CPU cores is prohibited. + * + * Provide a simple trampoline to start BL31 execution at the actual + * load address. We put the DTB address in x0, so any code in DRAM could + * make use of that information (not yet used in BL31 right now). + */ + +#include +#include + +.text +.global _start + +_start: + mov_imm x1, BL31_BASE /* beginning of DRAM */ + mov_imm x0, FPGA_PRELOADED_DTB_BASE + br x1