Phriction Trusted Firmware Trusted Firmware-A (TF-A) Patchdescription Libaarch64misc Helperssvsplatarmboardfvp Rfvp R Misc Helperss
Libaarch64misc Helperssvsplatarmboardfvp Rfvp R Misc Helperss
Libaarch64misc Helperssvsplatarmboardfvp Rfvp R Misc Helperss
/* /* * Copyright (c) 2013-2021, Arm Limited and Contributors. | * Copyright (c) 2013-2021, ARM Limited and Contributors. * * * SPDX-License-Identifier: BSD-3-Clause * SPDX-License-Identifier: BSD-3-Clause */ */ #include <arch.h> < #include <asm_macros.S> #include <asm_macros.S> #include <assert_macros.S> < #include <common/bl_common.h> < #include <lib/xlat_tables/xlat_tables_defs.h> < .globl smc | .globl disable_mpu_el2 | .globl disable_mpu_icache_el2 .globl zero_normalmem < .globl zeromem < .globl memcpy16 < < .globl disable_mmu_el1 < .globl disable_mmu_el3 < .globl disable_mmu_icache_el1 < .globl disable_mmu_icache_el3 < .globl fixup_gdt_reloc < #if SUPPORT_VFP < .globl enable_vfp < #endif < < func smc < smc #0 < endfunc smc < < /* ------------------------------------------------------- < * void zero_normalmem(void *mem, unsigned int length); < * < * Initialise a region in normal memory to 0. This functio < * AAPCS and can be called from C code. < * < * NOTE: MMU must be enabled when using this function as i < * normal memory. It is intended to be mainly used f < * is usually enabled. < * ------------------------------------------------------- < */ < .equ zero_normalmem, zeromem_dczva < < /* ------------------------------------------------------- < * void zeromem(void *mem, unsigned int length); < * < * Initialise a region of device memory to 0. This functio < * AAPCS and can be called from C code. < * < * NOTE: When data caches and MMU are enabled, zero_normal < * used instead for faster zeroing. < * < * ------------------------------------------------------- < */ < func zeromem < /* x2 is the address past the last zeroed address < add x2, x0, x1 < /* < * Uses the fallback path that does not use DC ZVA < * therefore does not need enabled MMU < */ < b .Lzeromem_dczva_fallback_entry < endfunc zeromem < < /* ------------------------------------------------------- < * void zeromem_dczva(void *mem, unsigned int length); < * < * Fill a region of normal memory of size "length" in byte < * MMU must be enabled and the memory be of < * normal type. This is because this function internally u < * instruction, which generates an Alignment fault if used < * Device memory (see section D3.4.9 of the ARMv8 ARM, iss < * is disabled, all memory behaves like Device-nGnRnE memo < * D4.2.8), hence the requirement on the MMU being enabled < * NOTE: The code assumes that the block size as defined i < * register is at least 16 bytes. < * < * ------------------------------------------------------- < */ < func zeromem_dczva < < /* < * The function consists of a series of loops that < * at a time, 16 bytes at a time or using the DC Z < * zero aligned block of bytes, which is assumed t < * In the case where the DC ZVA instruction cannot < * first 16 bytes loop would overflow, there is fa < * not use DC ZVA. < * Note: The fallback path is also used by the zer < * branches to it directly. < * < * +---------+ zeromem_dczva < * | entry | < * +----+----+ < * | < * v < * +---------+ < * | checks |>o-------+ (If any chec < * +----+----+ | < * | |------------- < * v | Fallback pat < * +------+------+ |------------- < * | 1 byte loop | | < * +------+------+ .Lzeromem_dczva_init < * | | < * v | < * +-------+-------+ | < * | 16 bytes loop | | < * +-------+-------+ | < * | | < * v | < * +------+------+ .Lzeromem_dczva_bloc < * | DC ZVA loop | | < * +------+------+ | < * +--------+ | | < * | | | | < * | v v | < * | +-------+-------+ .Lzeromem_dczva_fin < * | | 16 bytes loop | | < * | +-------+-------+ | < * | | | < * | v | < * | +------+------+ .Lzeromem_dczva_fina < * | | 1 byte loop | | < * | +-------------+ | < * | | | < * | v | < * | +---+--+ | < * | | exit | | < * | +------+ | < * | | < * | +--------------+ +-------- < * | | +----------------| zeromem < * | | | +-------- < * | v v < * | +-------------+ .Lzeromem_dczva_fall < * | | 1 byte loop | < * | +------+------+ < * | | < * +-----------+ < */ < < /* < * Readable names for registers < * < * Registers x0, x1 and x2 are also set by zeromem < * branches into the fallback path directly, so cu < * stop_address should not be retargeted to other < */ < cursor .req x0 /* Start address and then cur < length .req x1 /* Length in bytes of the reg < /* Reusing x1 as length is never used after block_ < block_mask .req x1 /* Bitmask of the block size < stop_address .req x2 /* Address past the last zero < block_size .req x3 /* Size of a block in bytes a < tmp1 .req x4 < tmp2 .req x5 < < #if ENABLE_ASSERTIONS < /* < * Check for M bit (MMU enabled) of the current SC < * register value and panic if the MMU is disabled < */ < #if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined( < mrs tmp1, sctlr_el3 < #else < mrs tmp1, sctlr_el1 < #endif < < tst tmp1, #SCTLR_M_BIT < ASM_ASSERT(ne) < #endif /* ENABLE_ASSERTIONS */ < < /* stop_address is the address past the last to ze < add stop_address, cursor, length < < /* < * Get block_size = (log2(<block size>) >> 2) (see < * dczid_el0 reg) < */ < mrs block_size, dczid_el0 < < /* < * Select the 4 lowest bits and convert the extrac < * in words>) to <block size in bytes> < */ < ubfx block_size, block_size, #0, #4 < mov tmp2, #(1 << 2) < lsl block_size, tmp2, block_size < < #if ENABLE_ASSERTIONS < /* < * Assumes block size is at least 16 bytes to avoi < * of the cursor at the end of the DCZVA loop. < */ < cmp block_size, #16 < ASM_ASSERT(hs) < #endif < /* < * Not worth doing all the setup for a region less < * protects against zeroing a whole block when the < * smaller than that. Also, as it is assumed that < * least 16 bytes, this also protects the initial < * trying to zero 16 bytes when length is less tha < */ < cmp length, block_size < b.lo .Lzeromem_dczva_fallback_entry < < /* < * Calculate the bitmask of the block alignment. I < * underflow as the block size is between 4 bytes < * block_mask = block_size - 1 < */ < sub block_mask, block_size, #1 < < /* < * length alias should not be used after this poin < * defined as a register other than block_mask's. < */ < .unreq length < < /* < * If the start address is already aligned to zero < * straight to the cache zeroing loop. This is saf < * point, the length cannot be smaller than a bloc < */ < tst cursor, block_mask < b.eq .Lzeromem_dczva_blocksize_aligned < < /* < * Calculate the first block-size-aligned address. < * the zero block size is at least 16 bytes. This < * address of this initial loop. < */ < orr tmp1, cursor, block_mask < add tmp1, tmp1, #1 < < /* < * If the addition overflows, skip the cache zeroi < * quite unlikely however. < */ < cbz tmp1, .Lzeromem_dczva_fallback_entry < < /* < * If the first block-size-aligned address is past < * fallback to the simpler code. < */ < cmp tmp1, stop_address < b.hi .Lzeromem_dczva_fallback_entry < < /* < * If the start address is already aligned to 16 b < * It is safe to do this because tmp1 (the stop ad < * 16 bytes loop) will never be greater than the f < */ < tst cursor, #0xf < b.eq .Lzeromem_dczva_initial_1byte_aligned_end < < /* Calculate the next address aligned to 16 bytes < orr tmp2, cursor, #0xf < add tmp2, tmp2, #1 < /* If it overflows, fallback to the simple path (u < cbz tmp2, .Lzeromem_dczva_fallback_entry < /* < * Next aligned address cannot be after the stop a < * length cannot be smaller than 16 at this point. < */ < < /* First loop: zero byte per byte */ < 1: < strb wzr, [cursor], #1 < cmp cursor, tmp2 < b.ne 1b < .Lzeromem_dczva_initial_1byte_aligned_end: < < /* < * Second loop: we need to zero 16 bytes at a time < * before being able to use the code that deals wi < * addresses. < */ < cmp cursor, tmp1 < b.hs 2f < 1: < stp xzr, xzr, [cursor], #16 < cmp cursor, tmp1 < b.lo 1b < 2: < < /* < * Third loop: zero a block at a time using DC ZVA < * instruction. < */ < .Lzeromem_dczva_blocksize_aligned: < /* < * Calculate the last block-size-aligned address. < * to the start address, the loop will exit immedi < */ < bic tmp1, stop_address, block_mask < < cmp cursor, tmp1 < b.hs 2f < 1: < /* Zero the block containing the cursor */ < dc zva, cursor < /* Increment the cursor by the size of a block */ < add cursor, cursor, block_size < cmp cursor, tmp1 < b.lo 1b < 2: < < /* < * Fourth loop: zero 16 bytes at a time and then b < * remaining area < */ < .Lzeromem_dczva_final_16bytes_aligned: < /* < * Calculate the last 16 bytes aligned address. It < * block size will never be smaller than 16 bytes < * cursor is aligned to at least 16 bytes boundary < */ < bic tmp1, stop_address, #15 < < cmp cursor, tmp1 < b.hs 2f < 1: < stp xzr, xzr, [cursor], #16 < cmp cursor, tmp1 < b.lo 1b < 2: < < /* Fifth and final loop: zero byte per byte */ < .Lzeromem_dczva_final_1byte_aligned: < cmp cursor, stop_address < b.eq 2f < 1: < strb wzr, [cursor], #1 < cmp cursor, stop_address < b.ne 1b < 2: < ret < < /* Fallback for unaligned start addresses */ < .Lzeromem_dczva_fallback_entry: < /* < * If the start address is already aligned to 16 b < */ < tst cursor, #0xf < b.eq .Lzeromem_dczva_final_16bytes_aligned < < /* Calculate the next address aligned to 16 bytes < orr tmp1, cursor, #15 < add tmp1, tmp1, #1 < /* If it overflows, fallback to byte per byte zero < cbz tmp1, .Lzeromem_dczva_final_1byte_aligned < /* If the next aligned address is after the stop a < cmp tmp1, stop_address < b.hs .Lzeromem_dczva_final_1byte_aligned < < /* Fallback entry loop: zero byte per byte */ < 1: < strb wzr, [cursor], #1 < cmp cursor, tmp1 < b.ne 1b < < b .Lzeromem_dczva_final_16bytes_aligned < < .unreq cursor < /* < * length is already unreq'ed to reuse the registe < * variable. < */ < .unreq stop_address < .unreq block_size < .unreq block_mask < .unreq tmp1 < .unreq tmp2 < endfunc zeromem_dczva < < /* ------------------------------------------------------- < * void memcpy16(void *dest, const void *src, unsigned int < * < * Copy length bytes from memory area src to memory area d < * The memory areas should not overlap. < * Destination and source addresses must be 16-byte aligne < * ------------------------------------------------------- < */ < func memcpy16 < #if ENABLE_ASSERTIONS < orr x3, x0, x1 < tst x3, #0xf < ASM_ASSERT(eq) < #endif < /* copy 16 bytes at a time */ < m_loop16: < cmp x2, #16 < b.lo m_loop1 < ldp x3, x4, [x1], #16 < stp x3, x4, [x0], #16 < sub x2, x2, #16 < b m_loop16 < /* copy byte per byte */ < m_loop1: < cbz x2, m_end < ldrb w3, [x1], #1 < strb w3, [x0], #1 < subs x2, x2, #1 < b.ne m_loop1 < m_end: < ret < endfunc memcpy16 < /* ------------------------------------------------------- /* ------------------------------------------------------- * Disable the MMU at EL3 | * Disable the MPU at EL2. * ------------------------------------------------------- * ------------------------------------------------------- */ */ func disable_mmu_el3 | func disable_mpu_el2 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT) mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT) do_disable_mmu_el3: | do_disable_mpu_el2: mrs x0, sctlr_el3 | mrs x0, sctlr_el2 bic x0, x0, x1 bic x0, x0, x1 msr sctlr_el3, x0 | msr sctlr_el2, x0 isb /* ensure MMU is off */ isb /* ensure MMU is off */ dsb sy dsb sy ret ret endfunc disable_mmu_el3 | endfunc disable_mpu_el2 func disable_mmu_icache_el3 | func disable_mpu_icache_el2 mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_ mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_ b do_disable_mmu_el3 | b do_disable_mpu_el2 endfunc disable_mmu_icache_el3 | endfunc disable_mpu_icache_el2 < /* ------------------------------------------------------- < * Disable the MMU at EL1 < * ------------------------------------------------------- < */ < < func disable_mmu_el1 < mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT) < do_disable_mmu_el1: < mrs x0, sctlr_el1 < bic x0, x0, x1 < msr sctlr_el1, x0 < isb /* ensure MMU is off */ < dsb sy < ret < endfunc disable_mmu_el1 < < < func disable_mmu_icache_el1 < mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_ < b do_disable_mmu_el1 < endfunc disable_mmu_icache_el1 < < /* ------------------------------------------------------- < * Enable the use of VFP at EL3 < * ------------------------------------------------------- < */ < #if SUPPORT_VFP < func enable_vfp < mrs x0, cpacr_el1 < orr x0, x0, #CPACR_VFP_BITS < msr cpacr_el1, x0 < mrs x0, cptr_el3 < mov x1, #AARCH64_CPTR_TFP < bic x0, x0, x1 < msr cptr_el3, x0 < isb < ret < endfunc enable_vfp < #endif < < /* ------------------------------------------------------- < * Helper to fixup Global Descriptor table (GDT) and dynam < * (.rela.dyn) at runtime. < * < * This function is meant to be used when the firmware is < * and linked with -pie options. We rely on the linker scr < * appropriate markers for start and end of the section. F < * expect __GOT_START__ and __GOT_END__. Similarly for .re < * __RELA_START__ and __RELA_END__. < * < * The function takes the limits of the memory to apply fi < * arguments (which is usually the limits of the relocable < * x0 - the start of the fixup region < * x1 - the limit of the fixup region < * These addresses have to be 4KB page aligned. < * ------------------------------------------------------- < */ < < /* Relocation codes */ < #define R_AARCH64_NONE 0 < #define R_AARCH64_RELATIVE 1027 < < func fixup_gdt_reloc < mov x6, x0 < mov x7, x1 < < #if ENABLE_ASSERTIONS < /* Test if the limits are 4KB aligned */ < orr x0, x0, x1 < tst x0, #(PAGE_SIZE_MASK) < ASM_ASSERT(eq) < #endif < /* < * Calculate the offset based on return address in < * Assume that this function is called within a pa < * fixup region. < */ < and x2, x30, #~(PAGE_SIZE_MASK) < subs x0, x2, x6 /* Diff(S) = Current Addre < b.eq 3f /* Diff(S) = 0. No relocat < < adrp x1, __GOT_START__ < add x1, x1, :lo12:__GOT_START__ < adrp x2, __GOT_END__ < add x2, x2, :lo12:__GOT_END__ < < /* < * GOT is an array of 64_bit addresses which must < * new_addr = old_addr + Diff(S). < * The new_addr is the address currently the binar < * and old_addr is the address at compile time. < */ < 1: ldr x3, [x1] < < /* Skip adding offset if address is < lower limit < cmp x3, x6 < b.lo 2f < < /* Skip adding offset if address is >= upper limit < cmp x3, x7 < b.hs 2f < add x3, x3, x0 < str x3, [x1] < < 2: add x1, x1, #8 < cmp x1, x2 < b.lo 1b < < /* Starting dynamic relocations. Use adrp/adr to g < 3: adrp x1, __RELA_START__ < add x1, x1, :lo12:__RELA_START__ < adrp x2, __RELA_END__ < add x2, x2, :lo12:__RELA_END__ < < /* < * According to ELF-64 specification, the RELA dat < * follows: < * typedef struct { < * Elf64_Addr r_offset; < * Elf64_Xword r_info; < * Elf64_Sxword r_addend; < * } Elf64_Rela; < * < * r_offset is address of reference < * r_info is symbol index and type of relocation ( < * code 1027 which corresponds to R_AARCH64_RELATI < * r_addend is constant part of expression. < * < * Size of Elf64_Rela structure is 24 bytes. < */ < < /* Skip R_AARCH64_NONE entry with code 0 */ < 1: ldr x3, [x1, #8] < cbz x3, 2f < < #if ENABLE_ASSERTIONS < /* Assert that the relocation type is R_AARCH64_RE < cmp x3, #R_AARCH64_RELATIVE < ASM_ASSERT(eq) < #endif < ldr x3, [x1] /* r_offset */ < add x3, x0, x3 < ldr x4, [x1, #16] /* r_addend */ < < /* Skip adding offset if r_addend is < lower limit < cmp x4, x6 < b.lo 2f < < /* Skip adding offset if r_addend entry is >= uppe < cmp x4, x7 < b.hs 2f < < add x4, x0, x4 /* Diff(S) + r_addend */ < str x4, [x3] < < 2: add x1, x1, #24 < cmp x1, x2 < b.lo 1b < ret < endfunc fixup_gdt_reloc <
Tags
None
Subscribers
None
- Last Author
- garymorrison-arm
- Last Edited
- Jul 2 2021, 11:01 PM