Merge pull request #240 from danh-arm/sm/rem_coh_mem

Remove coherent memory v2
This commit is contained in:
danh-arm 2015-01-22 11:05:48 +00:00
commit eadd7a1b1e
40 changed files with 941 additions and 124 deletions

View File

@ -63,6 +63,8 @@ ARM_GIC_ARCH := 2
# Flag used to indicate if ASM_ASSERTION should be enabled for the build.
# This defaults to being present in DEBUG builds only.
ASM_ASSERTION := ${DEBUG}
# Build option to choose whether Trusted firmware uses Coherent memory or not.
USE_COHERENT_MEM := 1
# Default FIP file name
FIP_NAME := fip.bin
@ -230,6 +232,10 @@ $(eval $(call add_define,ASM_ASSERTION))
# Process LOG_LEVEL flag
$(eval $(call add_define,LOG_LEVEL))
# Process USE_COHERENT_MEM flag
$(eval $(call assert_boolean,USE_COHERENT_MEM))
$(eval $(call add_define,USE_COHERENT_MEM))
ASFLAGS += -nostdinc -ffreestanding -Wa,--fatal-warnings \
-Werror -Wmissing-include-dirs \
-mgeneral-regs-only -D__ASSEMBLY__ \

View File

@ -131,9 +131,11 @@ func bl1_entrypoint
ldr x1, =__BSS_SIZE__
bl zeromem16
#if USE_COHERENT_MEM
ldr x0, =__COHERENT_RAM_START__
ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__
bl zeromem16
#endif
ldr x0, =__DATA_RAM_START__
ldr x1, =__DATA_ROM_START__

View File

@ -107,6 +107,7 @@ SECTIONS
*(xlat_table)
} >RAM
#if USE_COHERENT_MEM
/*
* The base address of the coherent memory section must be page-aligned (4K)
* to guarantee that the coherent data are stored on their own pages and
@ -125,6 +126,7 @@ SECTIONS
. = NEXT(4096);
__COHERENT_RAM_END__ = .;
} >RAM
#endif
__BL1_RAM_START__ = ADDR(.data);
__BL1_RAM_END__ = .;
@ -140,8 +142,10 @@ SECTIONS
__BSS_SIZE__ = SIZEOF(.bss);
#if USE_COHERENT_MEM
__COHERENT_RAM_UNALIGNED_SIZE__ =
__COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
#endif
ASSERT(. <= BL1_RW_LIMIT, "BL1's RW section has exceeded its limit.")
}

View File

@ -91,9 +91,11 @@ func bl2_entrypoint
ldr x1, =__BSS_SIZE__
bl zeromem16
#if USE_COHERENT_MEM
ldr x0, =__COHERENT_RAM_START__
ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__
bl zeromem16
#endif
/* --------------------------------------------
* Allocate a stack whose memory will be marked

View File

@ -93,6 +93,7 @@ SECTIONS
*(xlat_table)
} >RAM
#if USE_COHERENT_MEM
/*
* The base address of the coherent memory section must be page-aligned (4K)
* to guarantee that the coherent data are stored on their own pages and
@ -111,12 +112,16 @@ SECTIONS
. = NEXT(4096);
__COHERENT_RAM_END__ = .;
} >RAM
#endif
__BL2_END__ = .;
__BSS_SIZE__ = SIZEOF(.bss);
#if USE_COHERENT_MEM
__COHERENT_RAM_UNALIGNED_SIZE__ =
__COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
#endif
ASSERT(. <= BL2_LIMIT, "BL2 image has exceeded its limit.")
}

View File

@ -149,9 +149,11 @@ func bl31_entrypoint
ldr x1, =__BSS_SIZE__
bl zeromem16
#if USE_COHERENT_MEM
ldr x0, =__COHERENT_RAM_START__
ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__
bl zeromem16
#endif
/* ---------------------------------------------
* Initialize the cpu_ops pointer.

View File

@ -117,6 +117,7 @@ SECTIONS
*(xlat_table)
} >RAM
#if USE_COHERENT_MEM
/*
* The base address of the coherent memory section must be page-aligned (4K)
* to guarantee that the coherent data are stored on their own pages and
@ -135,12 +136,15 @@ SECTIONS
. = NEXT(4096);
__COHERENT_RAM_END__ = .;
} >RAM
#endif
__BL31_END__ = .;
__BSS_SIZE__ = SIZEOF(.bss);
#if USE_COHERENT_MEM
__COHERENT_RAM_UNALIGNED_SIZE__ =
__COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
#endif
ASSERT(. <= BL31_LIMIT, "BL3-1 image has exceeded its limit.")
}

View File

@ -40,7 +40,6 @@ BL31_SOURCES += bl31/bl31_main.c \
bl31/aarch64/runtime_exceptions.S \
bl31/aarch64/crash_reporting.S \
lib/cpus/aarch64/cpu_helpers.S \
lib/locks/bakery/bakery_lock.c \
lib/locks/exclusive/spinlock.S \
services/std_svc/std_svc_setup.c \
services/std_svc/psci/psci_afflvl_off.c \
@ -53,6 +52,12 @@ BL31_SOURCES += bl31/bl31_main.c \
services/std_svc/psci/psci_setup.c \
services/std_svc/psci/psci_system_off.c
ifeq (${USE_COHERENT_MEM}, 1)
BL31_SOURCES += lib/locks/bakery/bakery_lock_coherent.c
else
BL31_SOURCES += lib/locks/bakery/bakery_lock_normal.c
endif
BL31_LINKERFILE := bl31/bl31.ld.S
# Flag used by the generic interrupt management framework to determine if

View File

@ -108,9 +108,11 @@ func tsp_entrypoint
ldr x1, =__BSS_SIZE__
bl zeromem16
#if USE_COHERENT_MEM
ldr x0, =__COHERENT_RAM_START__
ldr x1, =__COHERENT_RAM_UNALIGNED_SIZE__
bl zeromem16
#endif
/* --------------------------------------------
* Allocate a stack whose memory will be marked

View File

@ -98,6 +98,7 @@ SECTIONS
*(xlat_table)
} >RAM
#if USE_COHERENT_MEM
/*
* The base address of the coherent memory section must be page-aligned (4K)
* to guarantee that the coherent data are stored on their own pages and
@ -116,12 +117,15 @@ SECTIONS
. = NEXT(4096);
__COHERENT_RAM_END__ = .;
} >RAM
#endif
__BL32_END__ = .;
__BSS_SIZE__ = SIZEOF(.bss);
#if USE_COHERENT_MEM
__COHERENT_RAM_UNALIGNED_SIZE__ =
__COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
#endif
ASSERT(. <= BL32_LIMIT, "BL3-2 image has exceeded its limit.")
}

View File

@ -43,7 +43,7 @@
* of trusted SRAM
******************************************************************************/
extern unsigned long __RO_START__;
extern unsigned long __COHERENT_RAM_END__;
extern unsigned long __BL32_END__;
/*******************************************************************************
* Lock to control access to the console
@ -63,11 +63,11 @@ work_statistics_t tsp_stats[PLATFORM_CORE_COUNT];
/*******************************************************************************
* The BL32 memory footprint starts with an RO sections and ends
* with a section for coherent RAM. Use it to find the memory size
* with the linker symbol __BL32_END__. Use it to find the memory size
******************************************************************************/
#define BL32_TOTAL_BASE (unsigned long)(&__RO_START__)
#define BL32_TOTAL_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
#define BL32_TOTAL_LIMIT (unsigned long)(&__BL32_END__)
static tsp_args_t *set_smc_args(uint64_t arg0,
uint64_t arg1,

View File

@ -12,8 +12,9 @@ Contents :
7. [CPU specific operations framework](#7--cpu-specific-operations-framework)
8. [Memory layout of BL images](#8-memory-layout-of-bl-images)
9. [Firmware Image Package (FIP)](#9--firmware-image-package-fip)
10. [Code Structure](#10--code-structure)
11. [References](#11--references)
10. [Use of coherent memory in Trusted Firmware](#10--use-of-coherent-memory-in-trusted-firmware)
11. [Code Structure](#11--code-structure)
12. [References](#12--references)
1. Introduction
@ -368,10 +369,10 @@ level implementation of the generic timer through the memory mapped interface.
`ON`; any other cluster is `OFF`. BL3-1 initializes the data structures that
implement the state machine, including the locks that protect them. BL3-1
accesses the state of a CPU or cluster immediately after reset and before
the MMU is enabled in the warm boot path. It is not currently possible to
use 'exclusive' based spinlocks, therefore BL3-1 uses locks based on
Lamport's Bakery algorithm instead. BL3-1 allocates these locks in device
memory. They are accessible irrespective of MMU state.
the data cache is enabled in the warm boot path. It is not currently
possible to use 'exclusive' based spinlocks, therefore BL3-1 uses locks
based on Lamport's Bakery algorithm instead. BL3-1 allocates these locks in
device memory by default.
* Runtime services initialization:
@ -1127,9 +1128,10 @@ this purpose:
* `__BSS_START__` This address must be aligned on a 16-byte boundary.
* `__BSS_SIZE__`
Similarly, the coherent memory section must be zero-initialised. Also, the MMU
setup code needs to know the extents of this section to set the right memory
attributes for it. The following linker symbols are defined for this purpose:
Similarly, the coherent memory section (if enabled) must be zero-initialised.
Also, the MMU setup code needs to know the extents of this section to set the
right memory attributes for it. The following linker symbols are defined for
this purpose:
* `__COHERENT_RAM_START__` This address must be aligned on a page-size boundary.
* `__COHERENT_RAM_END__` This address must be aligned on a page-size boundary.
@ -1443,7 +1445,208 @@ Currently the FVP's policy only allows loading of a known set of images. The
platform policy can be modified to allow additional images.
10. Code Structure
10. Use of coherent memory in Trusted Firmware
----------------------------------------------
There might be loss of coherency when physical memory with mismatched
shareability, cacheability and memory attributes is accessed by multiple CPUs
(refer to section B2.9 of [ARM ARM] for more details). This possibility occurs
in Trusted Firmware during power up/down sequences when coherency, MMU and
caches are turned on/off incrementally.
Trusted Firmware defines coherent memory as a region of memory with Device
nGnRE attributes in the translation tables. The translation granule size in
Trusted Firmware is 4KB. This is the smallest possible size of the coherent
memory region.
By default, all data structures which are susceptible to accesses with
mismatched attributes from various CPUs are allocated in a coherent memory
region (refer to section 2.1 of [Porting Guide]). The coherent memory region
accesses are Outer Shareable, non-cacheable and they can be accessed
with the Device nGnRE attributes when the MMU is turned on. Hence, at the
expense of at least an extra page of memory, Trusted Firmware is able to work
around coherency issues due to mismatched memory attributes.
The alternative to the above approach is to allocate the susceptible data
structures in Normal WriteBack WriteAllocate Inner shareable memory. This
approach requires the data structures to be designed so that it is possible to
work around the issue of mismatched memory attributes by performing software
cache maintenance on them.
### Disabling the use of coherent memory in Trusted Firmware
It might be desirable to avoid the cost of allocating coherent memory on
platforms which are memory constrained. Trusted Firmware enables inclusion of
coherent memory in firmware images through the build flag `USE_COHERENT_MEM`.
This flag is enabled by default. It can be disabled to choose the second
approach described above.
The below sections analyze the data structures allocated in the coherent memory
region and the changes required to allocate them in normal memory.
### PSCI Affinity map nodes
The `psci_aff_map` data structure stores the hierarchial node information for
each affinity level in the system including the PSCI states associated with them.
By default, this data structure is allocated in the coherent memory region in
the Trusted Firmware because it can be accessed by multiple CPUs, either with
their caches enabled or disabled.
typedef struct aff_map_node {
unsigned long mpidr;
unsigned char ref_count;
unsigned char state;
unsigned char level;
#if USE_COHERENT_MEM
bakery_lock_t lock;
#else
unsigned char aff_map_index;
#endif
} aff_map_node_t;
In order to move this data structure to normal memory, the use of each of its
fields must be analyzed. Fields like `mpidr` and `level` are only written once
during cold boot. Hence removing them from coherent memory involves only doing
a clean and invalidate of the cache lines after these fields are written.
The fields `state` and `ref_count` can be concurrently accessed by multiple
CPUs in different cache states. A Lamport's Bakery lock is used to ensure mutual
exlusion to these fields. As a result, it is possible to move these fields out
of coherent memory by performing software cache maintenance on them. The field
`lock` is the bakery lock data structure when `USE_COHERENT_MEM` is enabled.
The `aff_map_index` is used to identify the bakery lock when `USE_COHERENT_MEM`
is disabled.
### Bakery lock data
The bakery lock data structure `bakery_lock_t` is allocated in coherent memory
and is accessed by multiple CPUs with mismatched attributes. `bakery_lock_t` is
defined as follows:
typedef struct bakery_lock {
int owner;
volatile char entering[BAKERY_LOCK_MAX_CPUS];
volatile unsigned number[BAKERY_LOCK_MAX_CPUS];
} bakery_lock_t;
It is a characteristic of Lamport's Bakery algorithm that the volatile per-CPU
fields can be read by all CPUs but only written to by the owning CPU.
Depending upon the data cache line size, the per-CPU fields of the
`bakery_lock_t` structure for multiple CPUs may exist on a single cache line.
These per-CPU fields can be read and written during lock contention by multiple
CPUs with mismatched memory attributes. Since these fields are a part of the
lock implementation, they do not have access to any other locking primitive to
safeguard against the resulting coherency issues. As a result, simple software
cache maintenance is not enough to allocate them in coherent memory. Consider
the following example.
CPU0 updates its per-CPU field with data cache enabled. This write updates a
local cache line which contains a copy of the fields for other CPUs as well. Now
CPU1 updates its per-CPU field of the `bakery_lock_t` structure with data cache
disabled. CPU1 then issues a DCIVAC operation to invalidate any stale copies of
its field in any other cache line in the system. This operation will invalidate
the update made by CPU0 as well.
To use bakery locks when `USE_COHERENT_MEM` is disabled, the lock data structure
has been redesigned. The changes utilise the characteristic of Lamport's Bakery
algorithm mentioned earlier. The per-CPU fields of the new lock structure are
aligned such that they are allocated on separate cache lines. The per-CPU data
framework in Trusted Firmware is used to achieve this. This enables software to
perform software cache maintenance on the lock data structure without running
into coherency issues associated with mismatched attributes.
The per-CPU data framework enables consolidation of data structures on the
fewest cache lines possible. This saves memory as compared to the scenario where
each data structure is separately aligned to the cache line boundary to achieve
the same effect.
The bakery lock data structure `bakery_info_t` is defined for use when
`USE_COHERENT_MEM` is disabled as follows:
typedef struct bakery_info {
/*
* The lock_data is a bit-field of 2 members:
* Bit[0] : choosing. This field is set when the CPU is
* choosing its bakery number.
* Bits[1 - 15] : number. This is the bakery number allocated.
*/
volatile uint16_t lock_data;
} bakery_info_t;
The `bakery_info_t` represents a single per-CPU field of one lock and
the combination of corresponding `bakery_info_t` structures for all CPUs in the
system represents the complete bakery lock. It is embedded in the per-CPU
data framework `cpu_data` as shown below:
CPU0 cpu_data
------------------
| .... |
|----------------|
| `bakery_info_t`| <-- Lock_0 per-CPU field
| Lock_0 | for CPU0
|----------------|
| `bakery_info_t`| <-- Lock_1 per-CPU field
| Lock_1 | for CPU0
|----------------|
| .... |
|----------------|
| `bakery_info_t`| <-- Lock_N per-CPU field
| Lock_N | for CPU0
------------------
CPU1 cpu_data
------------------
| .... |
|----------------|
| `bakery_info_t`| <-- Lock_0 per-CPU field
| Lock_0 | for CPU1
|----------------|
| `bakery_info_t`| <-- Lock_1 per-CPU field
| Lock_1 | for CPU1
|----------------|
| .... |
|----------------|
| `bakery_info_t`| <-- Lock_N per-CPU field
| Lock_N | for CPU1
------------------
Consider a system of 2 CPUs with 'N' bakery locks as shown above. For an
operation on Lock_N, the corresponding `bakery_info_t` in both CPU0 and CPU1
`cpu_data` need to be fetched and appropriate cache operations need to be
performed for each access.
For multiple bakery locks, an array of `bakery_info_t` is declared in `cpu_data`
and each lock is given an `id` to identify it in the array.
### Non Functional Impact of removing coherent memory
Removal of the coherent memory region leads to the additional software overhead
of performing cache maintenance for the affected data structures. However, since
the memory where the data structures are allocated is cacheable, the overhead is
mostly mitigated by an increase in performance.
There is however a performance impact for bakery locks, due to:
* Additional cache maintenance operations, and
* Multiple cache line reads for each lock operation, since the bakery locks
for each CPU are distributed across different cache lines.
The implementation has been optimized to mimimize this additional overhead.
Measurements indicate that when bakery locks are allocated in Normal memory, the
minimum latency of acquiring a lock is on an average 3-4 micro seconds whereas
in Device memory the same is 2 micro seconds. The measurements were done on the
Juno ARM development platform.
As mentioned earlier, almost a page of memory can be saved by disabling
`USE_COHERENT_MEM`. Each platform needs to consider these trade-offs to decide
whether coherent memory should be used. If a platform disables
`USE_COHERENT_MEM` and needs to use bakery locks in the porting layer, it should
reserve memory in `cpu_data` by defining the macro `PLAT_PCPU_DATA_SIZE` (see
the [Porting Guide]). Refer to the reference platform code for examples.
11. Code Structure
-------------------
Trusted Firmware code is logically divided between the three boot loader
@ -1488,7 +1691,7 @@ FDTs provide a description of the hardware platform and are used by the Linux
kernel at boot time. These can be found in the `fdts` directory.
11. References
12. References
---------------
1. Trusted Board Boot Requirements CLIENT PDD (ARM DEN 0006B-5). Available
@ -1504,7 +1707,7 @@ kernel at boot time. These can be found in the `fdts` directory.
_Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved._
[ARM ARM]: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0487a.e/index.html "ARMv8-A Reference Manual (ARM DDI0487A.E)"
[PSCI]: http://infocenter.arm.com/help/topic/com.arm.doc.den0022b/index.html "Power State Coordination Interface PDD (ARM DEN 0022B.b)"
[SMCCC]: http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html "SMC Calling Convention PDD (ARM DEN 0028A)"
[UUID]: https://tools.ietf.org/rfc/rfc4122.txt "A Universally Unique IDentifier (UUID) URN Namespace"

View File

@ -63,11 +63,11 @@ mapped page tables, and enable both the instruction and data caches for each BL
stage. In the ARM FVP port, each BL stage configures the MMU in its platform-
specific architecture setup function, for example `blX_plat_arch_setup()`.
Each platform must allocate a block of identity mapped secure memory with
Device-nGnRE attributes aligned to page boundary (4K) for each BL stage. This
memory is identified by the section name `tzfw_coherent_mem` so that its
possible for the firmware to place variables in it using the following C code
directive:
If the build option `USE_COHERENT_MEM` is enabled, each platform must allocate a
block of identity mapped secure memory with Device-nGnRE attributes aligned to
page boundary (4K) for each BL stage. This memory is identified by the section
name `tzfw_coherent_mem` so that its possible for the firmware to place
variables in it using the following C code directive:
__attribute__ ((section("tzfw_coherent_mem")))
@ -246,6 +246,17 @@ must also be defined:
entities than this value using `io_open()` will fail with
IO_RESOURCES_EXHAUSTED.
If the platform needs to allocate data within the per-cpu data framework in
BL3-1, it should define the following macro. Currently this is only required if
the platform decides not to use the coherent memory section by undefining the
USE_COHERENT_MEM build flag. In this case, the framework allocates the required
memory within the the per-cpu data to minimize wastage.
* **#define : PLAT_PCPU_DATA_SIZE**
Defines the memory (in bytes) to be reserved within the per-cpu data
structure for use by the platform layer.
The following constants are optional. They should be defined when the platform
memory layout implies some image overlaying like on FVP.

View File

@ -245,6 +245,12 @@ performed.
synchronous method) or 1 (BL3-2 is initialized using asynchronous method).
Default is 0.
* `USE_COHERENT_MEM`: This flag determines whether to include the coherent
memory region in the BL memory map or not (see "Use of Coherent memory in
Trusted Firmware" section in [Firmware Design]). It can take the value 1
(Coherent memory region is included) or 0 (Coherent memory region is
excluded). Default is 1.
#### FVP specific build options
* `FVP_TSP_RAM_LOCATION`: location of the TSP binary. Options:

View File

@ -32,7 +32,7 @@
#define __CPU_DATA_H__
/* Offsets for the cpu_data structure */
#define CPU_DATA_CRASH_BUF_OFFSET 0x20
#define CPU_DATA_CRASH_BUF_OFFSET 0x18
#if CRASH_REPORTING
#define CPU_DATA_LOG2SIZE 7
#else
@ -45,10 +45,20 @@
#ifndef __ASSEMBLY__
#include <arch_helpers.h>
#include <cassert.h>
#include <platform_def.h>
#include <psci.h>
#include <stdint.h>
/* Offsets for the cpu_data structure */
#define CPU_DATA_PSCI_LOCK_OFFSET __builtin_offsetof\
(cpu_data_t, psci_svc_cpu_data.pcpu_bakery_info)
#if PLAT_PCPU_DATA_SIZE
#define CPU_DATA_PLAT_PCPU_OFFSET __builtin_offsetof\
(cpu_data_t, platform_cpu_data)
#endif
/*******************************************************************************
* Function & variable prototypes
******************************************************************************/
@ -69,9 +79,12 @@
typedef struct cpu_data {
void *cpu_context[2];
uint64_t cpu_ops_ptr;
struct psci_cpu_data psci_svc_cpu_data;
#if CRASH_REPORTING
uint64_t crash_buf[CPU_DATA_CRASH_BUF_SIZE >> 3];
#endif
struct psci_cpu_data psci_svc_cpu_data;
#if PLAT_PCPU_DATA_SIZE
uint8_t platform_cpu_data[PLAT_PCPU_DATA_SIZE];
#endif
} __aligned(CACHE_WRITEBACK_GRANULE) cpu_data_t;

View File

@ -31,6 +31,17 @@
#ifndef __PSCI_H__
#define __PSCI_H__
#include <bakery_lock.h>
#include <platform_def.h> /* for PLATFORM_NUM_AFFS */
/*******************************************************************************
* Number of affinity instances whose state this psci imp. can track
******************************************************************************/
#ifdef PLATFORM_NUM_AFFS
#define PSCI_NUM_AFFS PLATFORM_NUM_AFFS
#else
#define PSCI_NUM_AFFS (2 * PLATFORM_CORE_COUNT)
#endif
/*******************************************************************************
* Defines for runtime services func ids
@ -140,6 +151,9 @@ typedef struct psci_cpu_data {
uint32_t power_state;
uint32_t max_phys_off_afflvl; /* Highest affinity level in physically
powered off state */
#if !USE_COHERENT_MEM
bakery_info_t pcpu_bakery_info[PSCI_NUM_AFFS];
#endif
} psci_cpu_data_t;
/*******************************************************************************

View File

@ -175,6 +175,9 @@ DEFINE_SYSOP_FUNC(wfi)
DEFINE_SYSOP_FUNC(wfe)
DEFINE_SYSOP_FUNC(sev)
DEFINE_SYSOP_TYPE_FUNC(dsb, sy)
DEFINE_SYSOP_TYPE_FUNC(dmb, sy)
DEFINE_SYSOP_TYPE_FUNC(dsb, ish)
DEFINE_SYSOP_TYPE_FUNC(dmb, ish)
DEFINE_SYSOP_FUNC(isb)
uint32_t get_afflvl_shift(uint32_t);

View File

@ -35,6 +35,11 @@
#define BAKERY_LOCK_MAX_CPUS PLATFORM_CORE_COUNT
#ifndef __ASSEMBLY__
#include <stdint.h>
#if USE_COHERENT_MEM
typedef struct bakery_lock {
int owner;
volatile char entering[BAKERY_LOCK_MAX_CPUS];
@ -48,4 +53,21 @@ void bakery_lock_get(bakery_lock_t *bakery);
void bakery_lock_release(bakery_lock_t *bakery);
int bakery_lock_try(bakery_lock_t *bakery);
#else
typedef struct bakery_info {
/*
* The lock_data is a bit-field of 2 members:
* Bit[0] : choosing. This field is set when the CPU is
* choosing its bakery number.
* Bits[1 - 15] : number. This is the bakery number allocated.
*/
volatile uint16_t lock_data;
} bakery_info_t;
void bakery_lock_get(unsigned int id, unsigned int offset);
void bakery_lock_release(unsigned int id, unsigned int offset);
#endif /* __USE_COHERENT_MEM__ */
#endif /* __ASSEMBLY__ */
#endif /* __BAKERY_LOCK_H__ */

View File

@ -31,11 +31,13 @@
#include <arch_helpers.h>
#include <assert.h>
#include <bakery_lock.h>
#include <cpu_data.h>
#include <platform.h>
#include <string.h>
/*
* Functions in this file implement Bakery Algorithm for mutual exclusion.
* Functions in this file implement Bakery Algorithm for mutual exclusion with the
* bakery lock data structures in coherent memory.
*
* ARM architecture offers a family of exclusive access instructions to
* efficiently implement mutual exclusion with hardware support. However, as
@ -107,8 +109,6 @@ static unsigned int bakery_get_ticket(bakery_lock_t *bakery, unsigned int me)
++my_ticket;
bakery->number[me] = my_ticket;
bakery->entering[me] = 0;
dsb();
sev();
return my_ticket;
}
@ -151,7 +151,7 @@ void bakery_lock_get(bakery_lock_t *bakery)
/* Wait for the contender to get their ticket */
while (bakery->entering[they])
wfe();
;
/*
* If the other party is a contender, they'll have non-zero

View File

@ -0,0 +1,217 @@
/*
* Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of ARM nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <arch_helpers.h>
#include <assert.h>
#include <bakery_lock.h>
#include <cpu_data.h>
#include <platform.h>
#include <string.h>
/*
* Functions in this file implement Bakery Algorithm for mutual exclusion with the
* bakery lock data structures in cacheable and Normal memory.
*
* ARM architecture offers a family of exclusive access instructions to
* efficiently implement mutual exclusion with hardware support. However, as
* well as depending on external hardware, these instructions have defined
* behavior only on certain memory types (cacheable and Normal memory in
* particular; see ARMv8 Architecture Reference Manual section B2.10). Use cases
* in trusted firmware are such that mutual exclusion implementation cannot
* expect that accesses to the lock have the specific type required by the
* architecture for these primitives to function (for example, not all
* contenders may have address translation enabled).
*
* This implementation does not use mutual exclusion primitives. It expects
* memory regions where the locks reside to be cacheable and Normal.
*
* Note that the ARM architecture guarantees single-copy atomicity for aligned
* accesses regardless of status of address translation.
*/
/* Convert a ticket to priority */
#define PRIORITY(t, pos) (((t) << 8) | (pos))
#define CHOOSING_TICKET 0x1
#define CHOOSING_DONE 0x0
#define bakery_is_choosing(info) (info & 0x1)
#define bakery_ticket_number(info) ((info >> 1) & 0x7FFF)
#define make_bakery_data(choosing, number) \
(((choosing & 0x1) | (number << 1)) & 0xFFFF)
/* This macro assumes that the bakery_info array is located at the offset specified */
#define get_my_bakery_info(offset, id) \
(((bakery_info_t *) (((uint8_t *)_cpu_data()) + offset)) + id)
#define get_bakery_info_by_index(offset, id, ix) \
(((bakery_info_t *) (((uint8_t *)_cpu_data_by_index(ix)) + offset)) + id)
#define write_cache_op(addr, cached) \
do { \
(cached ? dccvac((uint64_t)addr) :\
dcivac((uint64_t)addr));\
dsbish();\
} while (0)
#define read_cache_op(addr, cached) if (cached) \
dccivac((uint64_t)addr)
static unsigned int bakery_get_ticket(int id, unsigned int offset,
unsigned int me, int is_cached)
{
unsigned int my_ticket, their_ticket;
unsigned int they;
bakery_info_t *my_bakery_info, *their_bakery_info;
/*
* Obtain a reference to the bakery information for this cpu and ensure
* it is not NULL.
*/
my_bakery_info = get_my_bakery_info(offset, id);
assert(my_bakery_info);
/*
* Tell other contenders that we are through the bakery doorway i.e.
* going to allocate a ticket for this cpu.
*/
my_ticket = 0;
my_bakery_info->lock_data = make_bakery_data(CHOOSING_TICKET, my_ticket);
write_cache_op(my_bakery_info, is_cached);
/*
* Iterate through the bakery information of each contender to allocate
* the highest ticket number for this cpu.
*/
for (they = 0; they < BAKERY_LOCK_MAX_CPUS; they++) {
if (me == they)
continue;
/*
* Get a reference to the other contender's bakery info and
* ensure that a stale copy is not read.
*/
their_bakery_info = get_bakery_info_by_index(offset, id, they);
assert(their_bakery_info);
read_cache_op(their_bakery_info, is_cached);
/*
* Update this cpu's ticket number if a higher ticket number is
* seen
*/
their_ticket = bakery_ticket_number(their_bakery_info->lock_data);
if (their_ticket > my_ticket)
my_ticket = their_ticket;
}
/*
* Compute ticket; then signal to other contenders waiting for us to
* finish calculating our ticket value that we're done
*/
++my_ticket;
my_bakery_info->lock_data = make_bakery_data(CHOOSING_DONE, my_ticket);
write_cache_op(my_bakery_info, is_cached);
return my_ticket;
}
void bakery_lock_get(unsigned int id, unsigned int offset)
{
unsigned int they, me, is_cached;
unsigned int my_ticket, my_prio, their_ticket;
bakery_info_t *their_bakery_info;
uint16_t their_bakery_data;
me = platform_get_core_pos(read_mpidr_el1());
is_cached = read_sctlr_el3() & SCTLR_C_BIT;
/* Get a ticket */
my_ticket = bakery_get_ticket(id, offset, me, is_cached);
/*
* Now that we got our ticket, compute our priority value, then compare
* with that of others, and proceed to acquire the lock
*/
my_prio = PRIORITY(my_ticket, me);
for (they = 0; they < BAKERY_LOCK_MAX_CPUS; they++) {
if (me == they)
continue;
/*
* Get a reference to the other contender's bakery info and
* ensure that a stale copy is not read.
*/
their_bakery_info = get_bakery_info_by_index(offset, id, they);
assert(their_bakery_info);
read_cache_op(their_bakery_info, is_cached);
their_bakery_data = their_bakery_info->lock_data;
/* Wait for the contender to get their ticket */
while (bakery_is_choosing(their_bakery_data)) {
read_cache_op(their_bakery_info, is_cached);
their_bakery_data = their_bakery_info->lock_data;
}
/*
* If the other party is a contender, they'll have non-zero
* (valid) ticket value. If they do, compare priorities
*/
their_ticket = bakery_ticket_number(their_bakery_data);
if (their_ticket && (PRIORITY(their_ticket, they) < my_prio)) {
/*
* They have higher priority (lower value). Wait for
* their ticket value to change (either release the lock
* to have it dropped to 0; or drop and probably content
* again for the same lock to have an even higher value)
*/
do {
wfe();
read_cache_op(their_bakery_info, is_cached);
} while (their_ticket
== bakery_ticket_number(their_bakery_info->lock_data));
}
}
}
void bakery_lock_release(unsigned int id, unsigned int offset)
{
bakery_info_t *my_bakery_info;
unsigned int is_cached = read_sctlr_el3() & SCTLR_C_BIT;
my_bakery_info = get_my_bakery_info(offset, id);
my_bakery_info->lock_data = 0;
write_cache_op(my_bakery_info, is_cached);
sev();
}

View File

@ -136,7 +136,8 @@ const unsigned int num_sec_irqs = sizeof(irq_sec_array) /
* Macro generating the code for the function setting up the pagetables as per
* the platform memory map & initialize the mmu, for the given exception level
******************************************************************************/
#define DEFINE_CONFIGURE_MMU_EL(_el) \
#if USE_COHERENT_MEM
#define DEFINE_CONFIGURE_MMU_EL(_el) \
void fvp_configure_mmu_el##_el(unsigned long total_base, \
unsigned long total_size, \
unsigned long ro_start, \
@ -158,6 +159,25 @@ const unsigned int num_sec_irqs = sizeof(irq_sec_array) /
\
enable_mmu_el##_el(0); \
}
#else
#define DEFINE_CONFIGURE_MMU_EL(_el) \
void fvp_configure_mmu_el##_el(unsigned long total_base, \
unsigned long total_size, \
unsigned long ro_start, \
unsigned long ro_limit) \
{ \
mmap_add_region(total_base, total_base, \
total_size, \
MT_MEMORY | MT_RW | MT_SECURE); \
mmap_add_region(ro_start, ro_start, \
ro_limit - ro_start, \
MT_MEMORY | MT_RO | MT_SECURE); \
mmap_add(fvp_mmap); \
init_xlat_tables(); \
\
enable_mmu_el##_el(0); \
}
#endif
/* Define EL1 and EL3 variants of the function initialising the MMU */
DEFINE_CONFIGURE_MMU_EL(1)

View File

@ -40,6 +40,7 @@
#include "fvp_def.h"
#include "fvp_private.h"
#if USE_COHERENT_MEM
/*******************************************************************************
* Declarations of linker defined symbols which will help us find the layout
* of trusted SRAM
@ -56,6 +57,7 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL1_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL1_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
#endif
/* Data structure which holds the extents of the trusted SRAM for BL1*/
static meminfo_t bl1_tzram_layout;
@ -116,9 +118,12 @@ void bl1_plat_arch_setup(void)
fvp_configure_mmu_el3(bl1_tzram_layout.total_base,
bl1_tzram_layout.total_size,
BL1_RO_BASE,
BL1_RO_LIMIT,
BL1_COHERENT_RAM_BASE,
BL1_COHERENT_RAM_LIMIT);
BL1_RO_LIMIT
#if USE_COHERENT_MEM
, BL1_COHERENT_RAM_BASE,
BL1_COHERENT_RAM_LIMIT
#endif
);
}

View File

@ -45,8 +45,10 @@
extern unsigned long __RO_START__;
extern unsigned long __RO_END__;
#if USE_COHERENT_MEM
extern unsigned long __COHERENT_RAM_START__;
extern unsigned long __COHERENT_RAM_END__;
#endif
/*
* The next 2 constants identify the extents of the code & RO data region.
@ -57,6 +59,7 @@ extern unsigned long __COHERENT_RAM_END__;
#define BL2_RO_BASE (unsigned long)(&__RO_START__)
#define BL2_RO_LIMIT (unsigned long)(&__RO_END__)
#if USE_COHERENT_MEM
/*
* The next 2 constants identify the extents of the coherent memory region.
* These addresses are used by the MMU setup code and therefore they must be
@ -66,11 +69,11 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL2_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL2_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
#endif
/* Data structure which holds the extents of the trusted SRAM for BL2 */
static meminfo_t bl2_tzram_layout
__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE),
section("tzfw_coherent_mem")));
__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE)));
/* Assert that BL3-1 parameters fit in shared memory */
CASSERT((PARAMS_BASE + sizeof(bl2_to_bl31_params_mem_t)) <
@ -209,9 +212,12 @@ void bl2_plat_arch_setup(void)
fvp_configure_mmu_el1(bl2_tzram_layout.total_base,
bl2_tzram_layout.total_size,
BL2_RO_BASE,
BL2_RO_LIMIT,
BL2_COHERENT_RAM_BASE,
BL2_COHERENT_RAM_LIMIT);
BL2_RO_LIMIT
#if USE_COHERENT_MEM
, BL2_COHERENT_RAM_BASE,
BL2_COHERENT_RAM_LIMIT
#endif
);
}
/*******************************************************************************

View File

@ -48,19 +48,25 @@
******************************************************************************/
extern unsigned long __RO_START__;
extern unsigned long __RO_END__;
extern unsigned long __BL31_END__;
#if USE_COHERENT_MEM
extern unsigned long __COHERENT_RAM_START__;
extern unsigned long __COHERENT_RAM_END__;
#endif
/*
* The next 2 constants identify the extents of the code & RO data region.
* These addresses are used by the MMU setup code and therefore they must be
* page-aligned. It is the responsibility of the linker script to ensure that
* __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
* The next 3 constants identify the extents of the code, RO data region and the
* limit of the BL3-1 image. These addresses are used by the MMU setup code and
* therefore they must be page-aligned. It is the responsibility of the linker
* script to ensure that __RO_START__, __RO_END__ & __BL31_END__ linker symbols
* refer to page-aligned addresses.
*/
#define BL31_RO_BASE (unsigned long)(&__RO_START__)
#define BL31_RO_LIMIT (unsigned long)(&__RO_END__)
#define BL31_END (unsigned long)(&__BL31_END__)
#if USE_COHERENT_MEM
/*
* The next 2 constants identify the extents of the coherent memory region.
* These addresses are used by the MMU setup code and therefore they must be
@ -70,7 +76,7 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL31_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL31_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
#endif
#if RESET_TO_BL31
static entry_point_info_t bl32_image_ep_info;
@ -235,9 +241,12 @@ void bl31_plat_arch_setup(void)
fvp_cci_enable();
#endif
fvp_configure_mmu_el3(BL31_RO_BASE,
(BL31_COHERENT_RAM_LIMIT - BL31_RO_BASE),
(BL31_END - BL31_RO_BASE),
BL31_RO_BASE,
BL31_RO_LIMIT,
BL31_COHERENT_RAM_BASE,
BL31_COHERENT_RAM_LIMIT);
BL31_RO_LIMIT
#if USE_COHERENT_MEM
, BL31_COHERENT_RAM_BASE,
BL31_COHERENT_RAM_LIMIT
#endif
);
}

View File

@ -31,13 +31,19 @@
#include <bakery_lock.h>
#include <mmio.h>
#include "../../fvp_def.h"
#include "../../fvp_private.h"
#include "fvp_pwrc.h"
/*
* TODO: Someday there will be a generic power controller api. At the moment
* each platform has its own pwrc so just exporting functions is fine.
*/
#if USE_COHERENT_MEM
static bakery_lock_t pwrc_lock __attribute__ ((section("tzfw_coherent_mem")));
#define LOCK_ARG &pwrc_lock
#else
#define LOCK_ARG FVP_PWRC_BAKERY_ID
#endif
unsigned int fvp_pwrc_get_cpu_wkr(unsigned long mpidr)
{
@ -47,54 +53,55 @@ unsigned int fvp_pwrc_get_cpu_wkr(unsigned long mpidr)
unsigned int fvp_pwrc_read_psysr(unsigned long mpidr)
{
unsigned int rc;
bakery_lock_get(&pwrc_lock);
fvp_lock_get(LOCK_ARG);
mmio_write_32(PWRC_BASE + PSYSR_OFF, (unsigned int) mpidr);
rc = mmio_read_32(PWRC_BASE + PSYSR_OFF);
bakery_lock_release(&pwrc_lock);
fvp_lock_release(LOCK_ARG);
return rc;
}
void fvp_pwrc_write_pponr(unsigned long mpidr)
{
bakery_lock_get(&pwrc_lock);
fvp_lock_get(LOCK_ARG);
mmio_write_32(PWRC_BASE + PPONR_OFF, (unsigned int) mpidr);
bakery_lock_release(&pwrc_lock);
fvp_lock_release(LOCK_ARG);
}
void fvp_pwrc_write_ppoffr(unsigned long mpidr)
{
bakery_lock_get(&pwrc_lock);
fvp_lock_get(LOCK_ARG);
mmio_write_32(PWRC_BASE + PPOFFR_OFF, (unsigned int) mpidr);
bakery_lock_release(&pwrc_lock);
fvp_lock_release(LOCK_ARG);
}
void fvp_pwrc_set_wen(unsigned long mpidr)
{
bakery_lock_get(&pwrc_lock);
fvp_lock_get(LOCK_ARG);
mmio_write_32(PWRC_BASE + PWKUPR_OFF,
(unsigned int) (PWKUPR_WEN | mpidr));
bakery_lock_release(&pwrc_lock);
fvp_lock_release(LOCK_ARG);
}
void fvp_pwrc_clr_wen(unsigned long mpidr)
{
bakery_lock_get(&pwrc_lock);
fvp_lock_get(LOCK_ARG);
mmio_write_32(PWRC_BASE + PWKUPR_OFF,
(unsigned int) mpidr);
bakery_lock_release(&pwrc_lock);
fvp_lock_release(LOCK_ARG);
}
void fvp_pwrc_write_pcoffr(unsigned long mpidr)
{
bakery_lock_get(&pwrc_lock);
fvp_lock_get(LOCK_ARG);
mmio_write_32(PWRC_BASE + PCOFFR_OFF, (unsigned int) mpidr);
bakery_lock_release(&pwrc_lock);
fvp_lock_release(LOCK_ARG);
}
/* Nothing else to do here apart from initializing the lock */
int fvp_pwrc_setup(void)
{
bakery_lock_init(&pwrc_lock);
fvp_lock_init(LOCK_ARG);
return 0;
}

View File

@ -31,7 +31,9 @@
#ifndef __FVP_PRIVATE_H__
#define __FVP_PRIVATE_H__
#include <bakery_lock.h>
#include <bl_common.h>
#include <cpu_data.h>
#include <platform_def.h>
@ -55,10 +57,60 @@ typedef struct bl2_to_bl31_params_mem {
entry_point_info_t bl31_ep_info;
} bl2_to_bl31_params_mem_t;
#if USE_COHERENT_MEM
/*
* These are wrapper macros to the Coherent Memory Bakery Lock API.
*/
#define fvp_lock_init(_lock_arg) bakery_lock_init(_lock_arg)
#define fvp_lock_get(_lock_arg) bakery_lock_get(_lock_arg)
#define fvp_lock_release(_lock_arg) bakery_lock_release(_lock_arg)
#else
/*******************************************************************************
* Forward declarations
* Constants to specify how many bakery locks this platform implements. These
* are used if the platform chooses not to use coherent memory for bakery lock
* data structures.
******************************************************************************/
struct meminfo;
#define FVP_MAX_BAKERIES 1
#define FVP_PWRC_BAKERY_ID 0
/*******************************************************************************
* Definition of structure which holds platform specific per-cpu data. Currently
* it holds only the bakery lock information for each cpu. Constants to
* specify how many bakeries this platform implements and bakery ids are
* specified in fvp_def.h
******************************************************************************/
typedef struct fvp_cpu_data {
bakery_info_t pcpu_bakery_info[FVP_MAX_BAKERIES];
} fvp_cpu_data_t;
/* Macro to define the offset of bakery_info_t in fvp_cpu_data_t */
#define FVP_CPU_DATA_LOCK_OFFSET __builtin_offsetof\
(fvp_cpu_data_t, pcpu_bakery_info)
/*******************************************************************************
* Helper macros for bakery lock api when using the above fvp_cpu_data_t for
* bakery lock data structures. It assumes that the bakery_info is at the
* beginning of the platform specific per-cpu data.
******************************************************************************/
#define fvp_lock_init(_lock_arg) /* No init required */
#define fvp_lock_get(_lock_arg) bakery_lock_get(_lock_arg, \
CPU_DATA_PLAT_PCPU_OFFSET + \
FVP_CPU_DATA_LOCK_OFFSET)
#define fvp_lock_release(_lock_arg) bakery_lock_release(_lock_arg, \
CPU_DATA_PLAT_PCPU_OFFSET + \
FVP_CPU_DATA_LOCK_OFFSET)
/*
* Ensure that the size of the FVP specific per-cpu data structure and the size
* of the memory allocated in generic per-cpu data for the platform are the same.
*/
CASSERT(PLAT_PCPU_DATA_SIZE == sizeof(fvp_cpu_data_t), \
fvp_pcpu_data_size_mismatch);
#endif /* __USE_COHERENT_MEM__ */
/*******************************************************************************
* Function and variable prototypes
@ -66,15 +118,22 @@ struct meminfo;
void fvp_configure_mmu_el1(unsigned long total_base,
unsigned long total_size,
unsigned long,
unsigned long,
unsigned long,
unsigned long);
unsigned long
#if USE_COHERENT_MEM
, unsigned long,
unsigned long
#endif
);
void fvp_configure_mmu_el3(unsigned long total_base,
unsigned long total_size,
unsigned long,
unsigned long,
unsigned long,
unsigned long);
unsigned long
#if USE_COHERENT_MEM
, unsigned long,
unsigned long
#endif
);
int fvp_config_setup(void);
void fvp_cci_init(void);

View File

@ -169,5 +169,12 @@
#define CACHE_WRITEBACK_SHIFT 6
#define CACHE_WRITEBACK_GRANULE (1 << CACHE_WRITEBACK_SHIFT)
#if !USE_COHERENT_MEM
/*******************************************************************************
* Size of the per-cpu data in bytes that should be reserved in the generic
* per-cpu data structure for the FVP port.
******************************************************************************/
#define PLAT_PCPU_DATA_SIZE 2
#endif
#endif /* __PLATFORM_DEF_H__ */

View File

@ -40,19 +40,25 @@
******************************************************************************/
extern unsigned long __RO_START__;
extern unsigned long __RO_END__;
extern unsigned long __BL32_END__;
#if USE_COHERENT_MEM
extern unsigned long __COHERENT_RAM_START__;
extern unsigned long __COHERENT_RAM_END__;
#endif
/*
* The next 2 constants identify the extents of the code & RO data region.
* These addresses are used by the MMU setup code and therefore they must be
* page-aligned. It is the responsibility of the linker script to ensure that
* __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
* The next 3 constants identify the extents of the code & RO data region and
* the limit of the BL3-2 image. These addresses are used by the MMU setup code
* and therefore they must be page-aligned. It is the responsibility of the
* linker script to ensure that __RO_START__, __RO_END__ & & __BL32_END__
* linker symbols refer to page-aligned addresses.
*/
#define BL32_RO_BASE (unsigned long)(&__RO_START__)
#define BL32_RO_LIMIT (unsigned long)(&__RO_END__)
#define BL32_END (unsigned long)(&__BL32_END__)
#if USE_COHERENT_MEM
/*
* The next 2 constants identify the extents of the coherent memory region.
* These addresses are used by the MMU setup code and therefore they must be
@ -62,6 +68,7 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL32_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL32_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
#endif
/*******************************************************************************
* Initialize the UART
@ -93,9 +100,12 @@ void tsp_platform_setup(void)
void tsp_plat_arch_setup(void)
{
fvp_configure_mmu_el1(BL32_RO_BASE,
(BL32_COHERENT_RAM_LIMIT - BL32_RO_BASE),
(BL32_END - BL32_RO_BASE),
BL32_RO_BASE,
BL32_RO_LIMIT,
BL32_COHERENT_RAM_BASE,
BL32_COHERENT_RAM_LIMIT);
BL32_RO_LIMIT
#if USE_COHERENT_MEM
, BL32_COHERENT_RAM_BASE,
BL32_COHERENT_RAM_LIMIT
#endif
);
}

View File

@ -140,6 +140,7 @@ const unsigned int num_sec_irqs = sizeof(irq_sec_array) /
* Macro generating the code for the function setting up the pagetables as per
* the platform memory map & initialize the mmu, for the given exception level
******************************************************************************/
#if USE_COHERENT_MEM
#define DEFINE_CONFIGURE_MMU_EL(_el) \
void configure_mmu_el##_el(unsigned long total_base, \
unsigned long total_size, \
@ -162,7 +163,25 @@ const unsigned int num_sec_irqs = sizeof(irq_sec_array) /
\
enable_mmu_el##_el(0); \
}
#else
#define DEFINE_CONFIGURE_MMU_EL(_el) \
void configure_mmu_el##_el(unsigned long total_base, \
unsigned long total_size, \
unsigned long ro_start, \
unsigned long ro_limit) \
{ \
mmap_add_region(total_base, total_base, \
total_size, \
MT_MEMORY | MT_RW | MT_SECURE); \
mmap_add_region(ro_start, ro_start, \
ro_limit - ro_start, \
MT_MEMORY | MT_RO | MT_SECURE); \
mmap_add(juno_mmap); \
init_xlat_tables(); \
\
enable_mmu_el##_el(0); \
}
#endif
/* Define EL1 and EL3 variants of the function initialising the MMU */
DEFINE_CONFIGURE_MMU_EL(1)
DEFINE_CONFIGURE_MMU_EL(3)

View File

@ -41,6 +41,7 @@
#include "juno_def.h"
#include "juno_private.h"
#if USE_COHERENT_MEM
/*******************************************************************************
* Declarations of linker defined symbols which will help us find the layout
* of trusted RAM
@ -57,6 +58,7 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL1_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL1_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
#endif
/* Data structure which holds the extents of the trusted RAM for BL1 */
static meminfo_t bl1_tzram_layout;
@ -189,9 +191,12 @@ void bl1_plat_arch_setup(void)
configure_mmu_el3(bl1_tzram_layout.total_base,
bl1_tzram_layout.total_size,
TZROM_BASE,
TZROM_BASE + TZROM_SIZE,
BL1_COHERENT_RAM_BASE,
BL1_COHERENT_RAM_LIMIT);
TZROM_BASE + TZROM_SIZE
#if USE_COHERENT_MEM
, BL1_COHERENT_RAM_BASE,
BL1_COHERENT_RAM_LIMIT
#endif
);
}
/*******************************************************************************

View File

@ -47,8 +47,10 @@
extern unsigned long __RO_START__;
extern unsigned long __RO_END__;
#if USE_COHERENT_MEM
extern unsigned long __COHERENT_RAM_START__;
extern unsigned long __COHERENT_RAM_END__;
#endif
/*
* The next 2 constants identify the extents of the code & RO data region.
@ -59,6 +61,7 @@ extern unsigned long __COHERENT_RAM_END__;
#define BL2_RO_BASE (unsigned long)(&__RO_START__)
#define BL2_RO_LIMIT (unsigned long)(&__RO_END__)
#if USE_COHERENT_MEM
/*
* The next 2 constants identify the extents of the coherent memory region.
* These addresses are used by the MMU setup code and therefore they must be
@ -68,11 +71,11 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL2_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL2_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
#endif
/* Data structure which holds the extents of the trusted RAM for BL2 */
static meminfo_t bl2_tzram_layout
__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE),
section("tzfw_coherent_mem")));
__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE)));
/*******************************************************************************
* Structure which holds the arguments which need to be passed to BL3-1
@ -194,9 +197,12 @@ void bl2_plat_arch_setup(void)
configure_mmu_el1(bl2_tzram_layout.total_base,
bl2_tzram_layout.total_size,
BL2_RO_BASE,
BL2_RO_LIMIT,
BL2_COHERENT_RAM_BASE,
BL2_COHERENT_RAM_LIMIT);
BL2_RO_LIMIT
#if USE_COHERENT_MEM
, BL2_COHERENT_RAM_BASE,
BL2_COHERENT_RAM_LIMIT
#endif
);
}
/*******************************************************************************

View File

@ -48,19 +48,25 @@
******************************************************************************/
extern unsigned long __RO_START__;
extern unsigned long __RO_END__;
extern unsigned long __BL31_END__;
#if USE_COHERENT_MEM
extern unsigned long __COHERENT_RAM_START__;
extern unsigned long __COHERENT_RAM_END__;
#endif
/*
* The next 2 constants identify the extents of the code & RO data region.
* These addresses are used by the MMU setup code and therefore they must be
* page-aligned. It is the responsibility of the linker script to ensure that
* __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
* The next 3 constants identify the extents of the code, RO data region and the
* limit of the BL3-1 image. These addresses are used by the MMU setup code and
* therefore they must be page-aligned. It is the responsibility of the linker
* script to ensure that __RO_START__, __RO_END__ & __BL31_END__ linker symbols
* refer to page-aligned addresses.
*/
#define BL31_RO_BASE (unsigned long)(&__RO_START__)
#define BL31_RO_LIMIT (unsigned long)(&__RO_END__)
#define BL31_END (unsigned long)(&__BL31_END__)
#if USE_COHERENT_MEM
/*
* The next 2 constants identify the extents of the coherent memory region.
* These addresses are used by the MMU setup code and therefore they must be
@ -70,6 +76,7 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL31_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL31_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
#endif
/******************************************************************************
* Placeholder variables for copying the arguments that have been passed to
@ -178,9 +185,13 @@ void bl31_platform_setup(void)
void bl31_plat_arch_setup()
{
configure_mmu_el3(BL31_RO_BASE,
BL31_COHERENT_RAM_LIMIT - BL31_RO_BASE,
(BL31_END - BL31_RO_BASE),
BL31_RO_BASE,
BL31_RO_LIMIT,
BL31_RO_LIMIT
#if USE_COHERENT_MEM
,
BL31_COHERENT_RAM_BASE,
BL31_COHERENT_RAM_LIMIT);
BL31_COHERENT_RAM_LIMIT
#endif
);
}

View File

@ -174,4 +174,12 @@
#define CACHE_WRITEBACK_SHIFT 6
#define CACHE_WRITEBACK_GRANULE (1 << CACHE_WRITEBACK_SHIFT)
#if !USE_COHERENT_MEM
/*******************************************************************************
* Size of the per-cpu data in bytes that should be reserved in the generic
* per-cpu data structure for the Juno port.
******************************************************************************/
#define PLAT_PCPU_DATA_SIZE 2
#endif
#endif /* __PLATFORM_DEF_H__ */

View File

@ -31,7 +31,9 @@
#ifndef __JUNO_PRIVATE_H__
#define __JUNO_PRIVATE_H__
#include <bakery_lock.h>
#include <bl_common.h>
#include <cpu_data.h>
#include <platform_def.h>
#include <stdint.h>
@ -59,6 +61,68 @@ typedef struct bl2_to_bl31_params_mem {
struct entry_point_info bl31_ep_info;
} bl2_to_bl31_params_mem_t;
#if IMAGE_BL31
#if USE_COHERENT_MEM
/*
* These are wrapper macros to the Coherent Memory Bakery Lock API.
*/
#define juno_lock_init(_lock_arg) bakery_lock_init(_lock_arg)
#define juno_lock_get(_lock_arg) bakery_lock_get(_lock_arg)
#define juno_lock_release(_lock_arg) bakery_lock_release(_lock_arg)
#else
/*******************************************************************************
* Constants that specify how many bakeries this platform implements and bakery
* ids.
******************************************************************************/
#define JUNO_MAX_BAKERIES 1
#define JUNO_MHU_BAKERY_ID 0
/*******************************************************************************
* Definition of structure which holds platform specific per-cpu data. Currently
* it holds only the bakery lock information for each cpu. Constants to specify
* how many bakeries this platform implements and bakery ids are specified in
* juno_def.h
******************************************************************************/
typedef struct juno_cpu_data {
bakery_info_t pcpu_bakery_info[JUNO_MAX_BAKERIES];
} juno_cpu_data_t;
/* Macro to define the offset of bakery_info_t in juno_cpu_data_t */
#define JUNO_CPU_DATA_LOCK_OFFSET __builtin_offsetof\
(juno_cpu_data_t, pcpu_bakery_info)
/*******************************************************************************
* Helper macros for bakery lock api when using the above juno_cpu_data_t for
* bakery lock data structures. It assumes that the bakery_info is at the
* beginning of the platform specific per-cpu data.
******************************************************************************/
#define juno_lock_init(_lock_arg) /* No init required */
#define juno_lock_get(_lock_arg) bakery_lock_get(_lock_arg, \
CPU_DATA_PLAT_PCPU_OFFSET + \
JUNO_CPU_DATA_LOCK_OFFSET)
#define juno_lock_release(_lock_arg) bakery_lock_release(_lock_arg, \
CPU_DATA_PLAT_PCPU_OFFSET + \
JUNO_CPU_DATA_LOCK_OFFSET)
/*
* Ensure that the size of the Juno specific per-cpu data structure and the size
* of the memory allocated in generic per-cpu data for the platform are the same.
*/
CASSERT(PLAT_PCPU_DATA_SIZE == sizeof(juno_cpu_data_t), \
juno_pcpu_data_size_mismatch);
#endif /* __USE_COHERENT_MEM__ */
#else
/*
* Dummy wrapper macros for all other BL stages other than BL3-1
*/
#define juno_lock_init(_lock_arg)
#define juno_lock_get(_lock_arg)
#define juno_lock_release(_lock_arg)
#endif /* __IMAGE_BL31__ */
/*******************************************************************************
* Function and variable prototypes
******************************************************************************/
@ -70,15 +134,21 @@ unsigned int platform_get_core_pos(unsigned long mpidr);
void configure_mmu_el1(unsigned long total_base,
unsigned long total_size,
unsigned long ro_start,
unsigned long ro_limit,
unsigned long coh_start,
unsigned long coh_limit);
unsigned long ro_limit
#if USE_COHERENT_MEM
, unsigned long coh_start,
unsigned long coh_limit
#endif
);
void configure_mmu_el3(unsigned long total_base,
unsigned long total_size,
unsigned long ro_start,
unsigned long ro_limit,
unsigned long coh_start,
unsigned long coh_limit);
unsigned long ro_limit
#if USE_COHERENT_MEM
, unsigned long coh_start,
unsigned long coh_limit
#endif
);
void plat_report_exception(unsigned long type);
unsigned long plat_get_ns_image_entrypoint(void);
unsigned long platform_get_stack(unsigned long mpidr);

View File

@ -32,6 +32,7 @@
#include <bakery_lock.h>
#include <mmio.h>
#include "juno_def.h"
#include "juno_private.h"
#include "mhu.h"
/* SCP MHU secure channel registers */
@ -44,13 +45,20 @@
#define CPU_INTR_S_SET 0x308
#define CPU_INTR_S_CLEAR 0x310
#if IMAGE_BL31
#if USE_COHERENT_MEM
static bakery_lock_t mhu_secure_lock __attribute__ ((section("tzfw_coherent_mem")));
#define LOCK_ARG &mhu_secure_lock
#else
#define LOCK_ARG JUNO_MHU_BAKERY_ID
#endif /*__USE_COHERENT_MEM__ */
#else
#define LOCK_ARG /* Locks required only for BL3-1 images */
#endif /* __IMAGE_BL31__ */
void mhu_secure_message_start(void)
{
bakery_lock_get(&mhu_secure_lock);
juno_lock_get(LOCK_ARG);
/* Make sure any previous command has finished */
while (mmio_read_32(MHU_BASE + CPU_INTR_S_STAT) != 0)
@ -80,12 +88,12 @@ void mhu_secure_message_end(void)
/* Clear any response we got by writing all ones to the CLEAR register */
mmio_write_32(MHU_BASE + SCP_INTR_S_CLEAR, 0xffffffffu);
bakery_lock_release(&mhu_secure_lock);
juno_lock_release(LOCK_ARG);
}
void mhu_secure_init(void)
{
bakery_lock_init(&mhu_secure_lock);
juno_lock_init(LOCK_ARG);
/*
* Clear the CPU's INTR register to make sure we don't see a stale

View File

@ -66,7 +66,6 @@ BL1_SOURCES += drivers/arm/cci400/cci400.c \
plat/juno/aarch64/juno_common.c
BL2_SOURCES += drivers/arm/tzc400/tzc400.c \
lib/locks/bakery/bakery_lock.c \
plat/common/aarch64/platform_up_stack.S \
plat/juno/bl2_plat_setup.c \
plat/juno/mhu.c \

View File

@ -40,19 +40,25 @@
******************************************************************************/
extern unsigned long __RO_START__;
extern unsigned long __RO_END__;
extern unsigned long __BL32_END__;
#if USE_COHERENT_MEM
extern unsigned long __COHERENT_RAM_START__;
extern unsigned long __COHERENT_RAM_END__;
#endif
/*
* The next 2 constants identify the extents of the code & RO data region.
* These addresses are used by the MMU setup code and therefore they must be
* page-aligned. It is the responsibility of the linker script to ensure that
* __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
* The next 3 constants identify the extents of the code, RO data region and the
* limit of the BL3-2 image. These addresses are used by the MMU setup code and
* therefore they must be page-aligned. It is the responsibility of the linker
* script to ensure that __RO_START__, __RO_END__ & __BL32_END__ linker symbols
* refer to page-aligned addresses.
*/
#define BL32_RO_BASE (unsigned long)(&__RO_START__)
#define BL32_RO_LIMIT (unsigned long)(&__RO_END__)
#define BL32_END (unsigned long)(&__BL32_END__)
#if USE_COHERENT_MEM
/*
* The next 2 constants identify the extents of the coherent memory region.
* These addresses are used by the MMU setup code and therefore they must be
@ -62,6 +68,7 @@ extern unsigned long __COHERENT_RAM_END__;
*/
#define BL32_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
#define BL32_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
#endif
/*******************************************************************************
* Initialize the UART
@ -90,9 +97,12 @@ void tsp_platform_setup(void)
void tsp_plat_arch_setup(void)
{
configure_mmu_el1(BL32_RO_BASE,
BL32_COHERENT_RAM_LIMIT - BL32_RO_BASE,
(BL32_END - BL32_RO_BASE),
BL32_RO_BASE,
BL32_RO_LIMIT,
BL32_COHERENT_RAM_BASE,
BL32_COHERENT_RAM_LIMIT);
BL32_RO_LIMIT
#if USE_COHERENT_MEM
, BL32_COHERENT_RAM_BASE,
BL32_COHERENT_RAM_LIMIT
#endif
);
}

View File

@ -51,7 +51,10 @@ const spd_pm_ops_t *psci_spd_pm;
* corresponds to an affinity instance e.g. cluster, cpu within an mpidr
******************************************************************************/
aff_map_node_t psci_aff_map[PSCI_NUM_AFFS]
__attribute__ ((section("tzfw_coherent_mem")));
#if USE_COHERENT_MEM
__attribute__ ((section("tzfw_coherent_mem")))
#endif
;
/*******************************************************************************
* Pointer to functions exported by the platform to complete power mgmt. ops
@ -246,7 +249,8 @@ void psci_acquire_afflvl_locks(int start_afflvl,
for (level = start_afflvl; level <= end_afflvl; level++) {
if (mpidr_nodes[level] == NULL)
continue;
bakery_lock_get(&mpidr_nodes[level]->lock);
psci_lock_get(mpidr_nodes[level]);
}
}
@ -264,7 +268,8 @@ void psci_release_afflvl_locks(int start_afflvl,
for (level = end_afflvl; level >= start_afflvl; level--) {
if (mpidr_nodes[level] == NULL)
continue;
bakery_lock_release(&mpidr_nodes[level]->lock);
psci_lock_release(mpidr_nodes[level]);
}
}
@ -350,6 +355,10 @@ int psci_save_ns_entry(uint64_t mpidr,
******************************************************************************/
unsigned short psci_get_state(aff_map_node_t *node)
{
#if !USE_COHERENT_MEM
flush_dcache_range((uint64_t) node, sizeof(*node));
#endif
assert(node->level >= MPIDR_AFFLVL0 && node->level <= MPIDR_MAX_AFFLVL);
/* A cpu node just contains the state which can be directly returned */
@ -407,6 +416,10 @@ void psci_set_state(aff_map_node_t *node, unsigned short state)
node->state &= ~(PSCI_STATE_MASK << PSCI_STATE_SHIFT);
node->state |= (state & PSCI_STATE_MASK) << PSCI_STATE_SHIFT;
}
#if !USE_COHERENT_MEM
flush_dcache_range((uint64_t) node, sizeof(*node));
#endif
}
/*******************************************************************************

View File

@ -33,14 +33,22 @@
#include <arch.h>
#include <bakery_lock.h>
#include <platform_def.h> /* for PLATFORM_NUM_AFFS */
#include <psci.h>
/* Number of affinity instances whose state this psci imp. can track */
#ifdef PLATFORM_NUM_AFFS
#define PSCI_NUM_AFFS PLATFORM_NUM_AFFS
/*
* The following helper macros abstract the interface to the Bakery
* Lock API.
*/
#if USE_COHERENT_MEM
#define psci_lock_init(aff_map, idx) bakery_lock_init(&(aff_map)[(idx)].lock)
#define psci_lock_get(node) bakery_lock_get(&((node)->lock))
#define psci_lock_release(node) bakery_lock_release(&((node)->lock))
#else
#define PSCI_NUM_AFFS (2 * PLATFORM_CORE_COUNT)
#define psci_lock_init(aff_map, idx) ((aff_map)[(idx)].aff_map_index = (idx))
#define psci_lock_get(node) bakery_lock_get((node)->aff_map_index, \
CPU_DATA_PSCI_LOCK_OFFSET)
#define psci_lock_release(node) bakery_lock_release((node)->aff_map_index,\
CPU_DATA_PSCI_LOCK_OFFSET)
#endif
/*******************************************************************************
@ -49,10 +57,15 @@
******************************************************************************/
typedef struct aff_map_node {
unsigned long mpidr;
unsigned short ref_count;
unsigned char ref_count;
unsigned char state;
unsigned char level;
#if USE_COHERENT_MEM
bakery_lock_t lock;
#else
/* For indexing the bakery_info array in per CPU data */
unsigned char aff_map_index;
#endif
} aff_map_node_t;
typedef struct aff_limits_node {

View File

@ -181,7 +181,7 @@ static void psci_init_aff_map_node(unsigned long mpidr,
uint32_t linear_id;
psci_aff_map[idx].mpidr = mpidr;
psci_aff_map[idx].level = level;
bakery_lock_init(&psci_aff_map[idx].lock);
psci_lock_init(psci_aff_map, idx);
/*
* If an affinity instance is present then mark it as OFF to begin with.
@ -331,13 +331,20 @@ int32_t psci_setup(void)
afflvl);
}
#if !USE_COHERENT_MEM
/*
* The psci_aff_map only needs flushing when it's not allocated in
* coherent memory.
*/
flush_dcache_range((uint64_t) &psci_aff_map, sizeof(psci_aff_map));
#endif
/*
* Set the bounds for the affinity counts of each level in the map. Also
* flush out the entire array so that it's visible to subsequent power
* management operations. The 'psci_aff_map' array is allocated in
* coherent memory so does not need flushing. The 'psci_aff_limits'
* array is allocated in normal memory. It will be accessed when the mmu
* is off e.g. after reset. Hence it needs to be flushed.
* management operations. The 'psci_aff_limits' array is allocated in
* normal memory. It will be accessed when the mmu is off e.g. after
* reset. Hence it needs to be flushed.
*/
for (afflvl = MPIDR_AFFLVL0; afflvl < max_afflvl; afflvl++) {
psci_aff_limits[afflvl].min =