Page MenuHomePhabricator

Libaarch64misc Helperssvsplatarmboardfvp Rfvp R Misc Helperss
Updated 1,237 Days AgoPublic

/*                                                            /*
 * Copyright (c) 2013-2021, Arm Limited and Contributors.  |   * Copyright (c) 2013-2021, ARM Limited and Contributors. 
 *                                                             *
 * SPDX-License-Identifier: BSD-3-Clause                       * SPDX-License-Identifier: BSD-3-Clause
 */                                                            */

#include <arch.h>                                          <
#include <asm_macros.S>                                       #include <asm_macros.S>
#include <assert_macros.S>                                 <
#include <common/bl_common.h>                              <
#include <lib/xlat_tables/xlat_tables_defs.h>              <

        .globl  smc                                        |          .globl  disable_mpu_el2
                                                           |          .globl  disable_mpu_icache_el2
        .globl  zero_normalmem                             <
        .globl  zeromem                                    <
        .globl  memcpy16                                   <
                                                           <
        .globl  disable_mmu_el1                            <
        .globl  disable_mmu_el3                            <
        .globl  disable_mmu_icache_el1                     <
        .globl  disable_mmu_icache_el3                     <
        .globl  fixup_gdt_reloc                            <
#if SUPPORT_VFP                                            <
        .globl  enable_vfp                                 <
#endif                                                     <
                                                           <
func smc                                                   <
        smc     #0                                         <
endfunc smc                                                <
                                                           <
/* ------------------------------------------------------- <
 * void zero_normalmem(void *mem, unsigned int length);    <
 *                                                         <
 * Initialise a region in normal memory to 0. This functio <
 * AAPCS and can be called from C code.                    <
 *                                                         <
 * NOTE: MMU must be enabled when using this function as i <
 *       normal memory. It is intended to be mainly used f <
 *       is usually enabled.                               <
 * ------------------------------------------------------- <
 */                                                        <
.equ    zero_normalmem, zeromem_dczva                      <
                                                           <
/* ------------------------------------------------------- <
 * void zeromem(void *mem, unsigned int length);           <
 *                                                         <
 * Initialise a region of device memory to 0. This functio <
 * AAPCS and can be called from C code.                    <
 *                                                         <
 * NOTE: When data caches and MMU are enabled, zero_normal <
 *       used instead for faster zeroing.                  <
 *                                                         <
 * ------------------------------------------------------- <
 */                                                        <
func zeromem                                               <
        /* x2 is the address past the last zeroed address  <
        add     x2, x0, x1                                 <
        /*                                                 <
         * Uses the fallback path that does not use DC ZVA <
         * therefore does not need enabled MMU             <
         */                                                <
        b       .Lzeromem_dczva_fallback_entry             <
endfunc zeromem                                            <
                                                           <
/* ------------------------------------------------------- <
 * void zeromem_dczva(void *mem, unsigned int length);     <
 *                                                         <
 * Fill a region of normal memory of size "length" in byte <
 * MMU must be enabled and the memory be of                <
 * normal type. This is because this function internally u <
 * instruction, which generates an Alignment fault if used <
 * Device memory (see section D3.4.9 of the ARMv8 ARM, iss <
 * is disabled, all memory behaves like Device-nGnRnE memo <
 * D4.2.8), hence the requirement on the MMU being enabled <
 * NOTE: The code assumes that the block size as defined i <
 *       register is at least 16 bytes.                    <
 *                                                         <
 * ------------------------------------------------------- <
 */                                                        <
func zeromem_dczva                                         <
                                                           <
        /*                                                 <
         * The function consists of a series of loops that <
         * at a time, 16 bytes at a time or using the DC Z <
         * zero aligned block of bytes, which is assumed t <
         * In the case where the DC ZVA instruction cannot <
         * first 16 bytes loop would overflow, there is fa <
         * not use DC ZVA.                                 <
         * Note: The fallback path is also used by the zer <
         *       branches to it directly.                  <
         *                                                 <
         *              +---------+   zeromem_dczva        <
         *              |  entry  |                        <
         *              +----+----+                        <
         *                   |                             <
         *                   v                             <
         *              +---------+                        <
         *              | checks  |>o-------+ (If any chec <
         *              +----+----+         |              <
         *                   |              |------------- <
         *                   v              | Fallback pat <
         *            +------+------+       |------------- <
         *            | 1 byte loop |       |              <
         *            +------+------+ .Lzeromem_dczva_init <
         *                   |              |              <
         *                   v              |              <
         *           +-------+-------+      |              <
         *           | 16 bytes loop |      |              <
         *           +-------+-------+      |              <
         *                   |              |              <
         *                   v              |              <
         *            +------+------+ .Lzeromem_dczva_bloc <
         *            | DC ZVA loop |       |              <
         *            +------+------+       |              <
         *       +--------+  |              |              <
         *       |        |  |              |              <
         *       |        v  v              |              <
         *       |   +-------+-------+ .Lzeromem_dczva_fin <
         *       |   | 16 bytes loop |      |              <
         *       |   +-------+-------+      |              <
         *       |           |              |              <
         *       |           v              |              <
         *       |    +------+------+ .Lzeromem_dczva_fina <
         *       |    | 1 byte loop |       |              <
         *       |    +-------------+       |              <
         *       |           |              |              <
         *       |           v              |              <
         *       |       +---+--+           |              <
         *       |       | exit |           |              <
         *       |       +------+           |              <
         *       |                          |              <
         *       |           +--------------+    +-------- <
         *       |           |  +----------------| zeromem <
         *       |           |  |                +-------- <
         *       |           v  v                          <
         *       |    +-------------+ .Lzeromem_dczva_fall <
         *       |    | 1 byte loop |                      <
         *       |    +------+------+                      <
         *       |           |                             <
         *       +-----------+                             <
         */                                                <
                                                           <
        /*                                                 <
         * Readable names for registers                    <
         *                                                 <
         * Registers x0, x1 and x2 are also set by zeromem <
         * branches into the fallback path directly, so cu <
         * stop_address should not be retargeted to other  <
         */                                                <
        cursor       .req x0 /* Start address and then cur <
        length       .req x1 /* Length in bytes of the reg <
        /* Reusing x1 as length is never used after block_ <
        block_mask   .req x1 /* Bitmask of the block size  <
        stop_address .req x2 /* Address past the last zero <
        block_size   .req x3 /* Size of a block in bytes a <
        tmp1         .req x4                               <
        tmp2         .req x5                               <
                                                           <
#if ENABLE_ASSERTIONS                                      <
        /*                                                 <
         * Check for M bit (MMU enabled) of the current SC <
         * register value and panic if the MMU is disabled <
         */                                                <
#if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined( <
        mrs     tmp1, sctlr_el3                            <
#else                                                      <
        mrs     tmp1, sctlr_el1                            <
#endif                                                     <
                                                           <
        tst     tmp1, #SCTLR_M_BIT                         <
        ASM_ASSERT(ne)                                     <
#endif /* ENABLE_ASSERTIONS */                             <
                                                           <
        /* stop_address is the address past the last to ze <
        add     stop_address, cursor, length               <
                                                           <
        /*                                                 <
         * Get block_size = (log2(<block size>) >> 2) (see <
         * dczid_el0 reg)                                  <
         */                                                <
        mrs     block_size, dczid_el0                      <
                                                           <
        /*                                                 <
         * Select the 4 lowest bits and convert the extrac <
         * in words>) to <block size in bytes>             <
         */                                                <
        ubfx    block_size, block_size, #0, #4             <
        mov     tmp2, #(1 << 2)                            <
        lsl     block_size, tmp2, block_size               <
                                                           <
#if ENABLE_ASSERTIONS                                      <
        /*                                                 <
         * Assumes block size is at least 16 bytes to avoi <
         * of the cursor at the end of the DCZVA loop.     <
         */                                                <
        cmp     block_size, #16                            <
        ASM_ASSERT(hs)                                     <
#endif                                                     <
        /*                                                 <
         * Not worth doing all the setup for a region less <
         * protects against zeroing a whole block when the <
         * smaller than that. Also, as it is assumed that  <
         * least 16 bytes, this also protects the initial  <
         * trying to zero 16 bytes when length is less tha <
         */                                                <
        cmp     length, block_size                         <
        b.lo    .Lzeromem_dczva_fallback_entry             <
                                                           <
        /*                                                 <
         * Calculate the bitmask of the block alignment. I <
         * underflow as the block size is between 4 bytes  <
         * block_mask = block_size - 1                     <
         */                                                <
        sub     block_mask, block_size, #1                 <
                                                           <
        /*                                                 <
         * length alias should not be used after this poin <
         * defined as a register other than block_mask's.  <
         */                                                <
         .unreq length                                     <
                                                           <
        /*                                                 <
         * If the start address is already aligned to zero <
         * straight to the cache zeroing loop. This is saf <
         * point, the length cannot be smaller than a bloc <
         */                                                <
        tst     cursor, block_mask                         <
        b.eq    .Lzeromem_dczva_blocksize_aligned          <
                                                           <
        /*                                                 <
         * Calculate the first block-size-aligned address. <
         * the zero block size is at least 16 bytes. This  <
         * address of this initial loop.                   <
         */                                                <
        orr     tmp1, cursor, block_mask                   <
        add     tmp1, tmp1, #1                             <
                                                           <
        /*                                                 <
         * If the addition overflows, skip the cache zeroi <
         * quite unlikely however.                         <
         */                                                <
        cbz     tmp1, .Lzeromem_dczva_fallback_entry       <
                                                           <
        /*                                                 <
         * If the first block-size-aligned address is past <
         * fallback to the simpler code.                   <
         */                                                <
        cmp     tmp1, stop_address                         <
        b.hi    .Lzeromem_dczva_fallback_entry             <
                                                           <
        /*                                                 <
         * If the start address is already aligned to 16 b <
         * It is safe to do this because tmp1 (the stop ad <
         * 16 bytes loop) will never be greater than the f <
         */                                                <
        tst     cursor, #0xf                               <
        b.eq    .Lzeromem_dczva_initial_1byte_aligned_end  <
                                                           <
        /* Calculate the next address aligned to 16 bytes  <
        orr     tmp2, cursor, #0xf                         <
        add     tmp2, tmp2, #1                             <
        /* If it overflows, fallback to the simple path (u <
        cbz     tmp2, .Lzeromem_dczva_fallback_entry       <
        /*                                                 <
         * Next aligned address cannot be after the stop a <
         * length cannot be smaller than 16 at this point. <
         */                                                <
                                                           <
        /* First loop: zero byte per byte */               <
1:                                                         <
        strb    wzr, [cursor], #1                          <
        cmp     cursor, tmp2                               <
        b.ne    1b                                         <
.Lzeromem_dczva_initial_1byte_aligned_end:                 <
                                                           <
        /*                                                 <
         * Second loop: we need to zero 16 bytes at a time <
         * before being able to use the code that deals wi <
         * addresses.                                      <
         */                                                <
        cmp     cursor, tmp1                               <
        b.hs    2f                                         <
1:                                                         <
        stp     xzr, xzr, [cursor], #16                    <
        cmp     cursor, tmp1                               <
        b.lo    1b                                         <
2:                                                         <
                                                           <
        /*                                                 <
         * Third loop: zero a block at a time using DC ZVA <
         * instruction.                                    <
         */                                                <
.Lzeromem_dczva_blocksize_aligned:                         <
        /*                                                 <
         * Calculate the last block-size-aligned address.  <
         * to the start address, the loop will exit immedi <
         */                                                <
        bic     tmp1, stop_address, block_mask             <
                                                           <
        cmp     cursor, tmp1                               <
        b.hs    2f                                         <
1:                                                         <
        /* Zero the block containing the cursor */         <
        dc      zva, cursor                                <
        /* Increment the cursor by the size of a block */  <
        add     cursor, cursor, block_size                 <
        cmp     cursor, tmp1                               <
        b.lo    1b                                         <
2:                                                         <
                                                           <
        /*                                                 <
         * Fourth loop: zero 16 bytes at a time and then b <
         * remaining area                                  <
         */                                                <
.Lzeromem_dczva_final_16bytes_aligned:                     <
        /*                                                 <
         * Calculate the last 16 bytes aligned address. It <
         * block size will never be smaller than 16 bytes  <
         * cursor is aligned to at least 16 bytes boundary <
         */                                                <
        bic     tmp1, stop_address, #15                    <
                                                           <
        cmp     cursor, tmp1                               <
        b.hs    2f                                         <
1:                                                         <
        stp     xzr, xzr, [cursor], #16                    <
        cmp     cursor, tmp1                               <
        b.lo    1b                                         <
2:                                                         <
                                                           <
        /* Fifth and final loop: zero byte per byte */     <
.Lzeromem_dczva_final_1byte_aligned:                       <
        cmp     cursor, stop_address                       <
        b.eq    2f                                         <
1:                                                         <
        strb    wzr, [cursor], #1                          <
        cmp     cursor, stop_address                       <
        b.ne    1b                                         <
2:                                                         <
        ret                                                <
                                                           <
        /* Fallback for unaligned start addresses */       <
.Lzeromem_dczva_fallback_entry:                            <
        /*                                                 <
         * If the start address is already aligned to 16 b <
         */                                                <
        tst     cursor, #0xf                               <
        b.eq    .Lzeromem_dczva_final_16bytes_aligned      <
                                                           <
        /* Calculate the next address aligned to 16 bytes  <
        orr     tmp1, cursor, #15                          <
        add     tmp1, tmp1, #1                             <
        /* If it overflows, fallback to byte per byte zero <
        cbz     tmp1, .Lzeromem_dczva_final_1byte_aligned  <
        /* If the next aligned address is after the stop a <
        cmp     tmp1, stop_address                         <
        b.hs    .Lzeromem_dczva_final_1byte_aligned        <
                                                           <
        /* Fallback entry loop: zero byte per byte */      <
1:                                                         <
        strb    wzr, [cursor], #1                          <
        cmp     cursor, tmp1                               <
        b.ne    1b                                         <
                                                           <
        b       .Lzeromem_dczva_final_16bytes_aligned      <
                                                           <
        .unreq  cursor                                     <
        /*                                                 <
         * length is already unreq'ed to reuse the registe <
         * variable.                                       <
         */                                                <
        .unreq  stop_address                               <
        .unreq  block_size                                 <
        .unreq  block_mask                                 <
        .unreq  tmp1                                       <
        .unreq  tmp2                                       <
endfunc zeromem_dczva                                      <
                                                           <
/* ------------------------------------------------------- <
 * void memcpy16(void *dest, const void *src, unsigned int <
 *                                                         <
 * Copy length bytes from memory area src to memory area d <
 * The memory areas should not overlap.                    <
 * Destination and source addresses must be 16-byte aligne <
 * ------------------------------------------------------- <
 */                                                        <
func memcpy16                                              <
#if ENABLE_ASSERTIONS                                      <
        orr     x3, x0, x1                                 <
        tst     x3, #0xf                                   <
        ASM_ASSERT(eq)                                     <
#endif                                                     <
/* copy 16 bytes at a time */                              <
m_loop16:                                                  <
        cmp     x2, #16                                    <
        b.lo    m_loop1                                    <
        ldp     x3, x4, [x1], #16                          <
        stp     x3, x4, [x0], #16                          <
        sub     x2, x2, #16                                <
        b       m_loop16                                   <
/* copy byte per byte */                                   <
m_loop1:                                                   <
        cbz     x2, m_end                                  <
        ldrb    w3, [x1], #1                               <
        strb    w3, [x0], #1                               <
        subs    x2, x2, #1                                 <
        b.ne    m_loop1                                    <
m_end:                                                     <
        ret                                                <
endfunc memcpy16                                           <

/* -------------------------------------------------------    /* -------------------------------------------------------
 * Disable the MMU at EL3                                  |   * Disable the MPU at EL2.
 * -------------------------------------------------------     * -------------------------------------------------------
 */                                                            */

func disable_mmu_el3                                       |  func disable_mpu_el2
        mov     x1, #(SCTLR_M_BIT | SCTLR_C_BIT)                      mov     x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
do_disable_mmu_el3:                                        |  do_disable_mpu_el2:
        mrs     x0, sctlr_el3                              |          mrs     x0, sctlr_el2
        bic     x0, x0, x1                                            bic     x0, x0, x1
        msr     sctlr_el3, x0                              |          msr     sctlr_el2, x0
        isb     /* ensure MMU is off */                               isb     /* ensure MMU is off */
        dsb     sy                                                    dsb     sy
        ret                                                           ret
endfunc disable_mmu_el3                                    |  endfunc disable_mpu_el2


func disable_mmu_icache_el3                                |  func disable_mpu_icache_el2
        mov     x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_            mov     x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_
        b       do_disable_mmu_el3                         |          b       do_disable_mpu_el2
endfunc disable_mmu_icache_el3                             |  endfunc disable_mpu_icache_el2
                                                           <
/* ------------------------------------------------------- <
 * Disable the MMU at EL1                                  <
 * ------------------------------------------------------- <
 */                                                        <
                                                           <
func disable_mmu_el1                                       <
        mov     x1, #(SCTLR_M_BIT | SCTLR_C_BIT)           <
do_disable_mmu_el1:                                        <
        mrs     x0, sctlr_el1                              <
        bic     x0, x0, x1                                 <
        msr     sctlr_el1, x0                              <
        isb     /* ensure MMU is off */                    <
        dsb     sy                                         <
        ret                                                <
endfunc disable_mmu_el1                                    <
                                                           <
                                                           <
func disable_mmu_icache_el1                                <
        mov     x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_ <
        b       do_disable_mmu_el1                         <
endfunc disable_mmu_icache_el1                             <
                                                           <
/* ------------------------------------------------------- <
 * Enable the use of VFP at EL3                            <
 * ------------------------------------------------------- <
 */                                                        <
#if SUPPORT_VFP                                            <
func enable_vfp                                            <
        mrs     x0, cpacr_el1                              <
        orr     x0, x0, #CPACR_VFP_BITS                    <
        msr     cpacr_el1, x0                              <
        mrs     x0, cptr_el3                               <
        mov     x1, #AARCH64_CPTR_TFP                      <
        bic     x0, x0, x1                                 <
        msr     cptr_el3, x0                               <
        isb                                                <
        ret                                                <
endfunc enable_vfp                                         <
#endif                                                     <
                                                           <
/* ------------------------------------------------------- <
 * Helper to fixup Global Descriptor table (GDT) and dynam <
 * (.rela.dyn) at runtime.                                 <
 *                                                         <
 * This function is meant to be used when the firmware is  <
 * and linked with -pie options. We rely on the linker scr <
 * appropriate markers for start and end of the section. F <
 * expect __GOT_START__ and __GOT_END__. Similarly for .re <
 * __RELA_START__ and __RELA_END__.                        <
 *                                                         <
 * The function takes the limits of the memory to apply fi <
 * arguments (which is usually the limits of the relocable <
 *   x0 -  the start of the fixup region                   <
 *   x1 -  the limit of the fixup region                   <
 * These addresses have to be 4KB page aligned.            <
 * ------------------------------------------------------- <
 */                                                        <
                                                           <
/* Relocation codes */                                     <
#define R_AARCH64_NONE          0                          <
#define R_AARCH64_RELATIVE      1027                       <
                                                           <
func fixup_gdt_reloc                                       <
        mov     x6, x0                                     <
        mov     x7, x1                                     <
                                                           <
#if ENABLE_ASSERTIONS                                      <
        /* Test if the limits are 4KB aligned */           <
        orr     x0, x0, x1                                 <
        tst     x0, #(PAGE_SIZE_MASK)                      <
        ASM_ASSERT(eq)                                     <
#endif                                                     <
        /*                                                 <
         * Calculate the offset based on return address in <
         * Assume that this function is called within a pa <
         * fixup region.                                   <
         */                                                <
        and     x2, x30, #~(PAGE_SIZE_MASK)                <
        subs    x0, x2, x6      /* Diff(S) = Current Addre <
        b.eq    3f              /* Diff(S) = 0. No relocat <
                                                           <
        adrp    x1, __GOT_START__                          <
        add     x1, x1, :lo12:__GOT_START__                <
        adrp    x2, __GOT_END__                            <
        add     x2, x2, :lo12:__GOT_END__                  <
                                                           <
        /*                                                 <
         * GOT is an array of 64_bit addresses which must  <
         * new_addr = old_addr + Diff(S).                  <
         * The new_addr is the address currently the binar <
         * and old_addr is the address at compile time.    <
         */                                                <
1:      ldr     x3, [x1]                                   <
                                                           <
        /* Skip adding offset if address is < lower limit  <
        cmp     x3, x6                                     <
        b.lo    2f                                         <
                                                           <
        /* Skip adding offset if address is >= upper limit <
        cmp     x3, x7                                     <
        b.hs    2f                                         <
        add     x3, x3, x0                                 <
        str     x3, [x1]                                   <
                                                           <
2:      add     x1, x1, #8                                 <
        cmp     x1, x2                                     <
        b.lo    1b                                         <
                                                           <
        /* Starting dynamic relocations. Use adrp/adr to g <
3:      adrp    x1, __RELA_START__                         <
        add     x1, x1, :lo12:__RELA_START__               <
        adrp    x2, __RELA_END__                           <
        add     x2, x2, :lo12:__RELA_END__                 <
                                                           <
        /*                                                 <
         * According to ELF-64 specification, the RELA dat <
         * follows:                                        <
         *      typedef struct {                           <
         *              Elf64_Addr r_offset;               <
         *              Elf64_Xword r_info;                <
         *              Elf64_Sxword r_addend;             <
         *      } Elf64_Rela;                              <
         *                                                 <
         * r_offset is address of reference                <
         * r_info is symbol index and type of relocation ( <
         * code 1027 which corresponds to R_AARCH64_RELATI <
         * r_addend is constant part of expression.        <
         *                                                 <
         * Size of Elf64_Rela structure is 24 bytes.       <
         */                                                <
                                                           <
        /* Skip R_AARCH64_NONE entry with code 0 */        <
1:      ldr     x3, [x1, #8]                               <
        cbz     x3, 2f                                     <
                                                           <
#if ENABLE_ASSERTIONS                                      <
        /* Assert that the relocation type is R_AARCH64_RE <
        cmp     x3, #R_AARCH64_RELATIVE                    <
        ASM_ASSERT(eq)                                     <
#endif                                                     <
        ldr     x3, [x1]        /* r_offset */             <
        add     x3, x0, x3                                 <
        ldr     x4, [x1, #16]   /* r_addend */             <
                                                           <
        /* Skip adding offset if r_addend is < lower limit <
        cmp     x4, x6                                     <
        b.lo    2f                                         <
                                                           <
        /* Skip adding offset if r_addend entry is >= uppe <
        cmp     x4, x7                                     <
        b.hs    2f                                         <
                                                           <
        add     x4, x0, x4      /* Diff(S) + r_addend */   <
        str     x4, [x3]                                   <
                                                           <
2:      add     x1, x1, #24                                <
        cmp     x1, x2                                     <
        b.lo    1b                                         <
        ret                                                <
endfunc fixup_gdt_reloc                                    <
Last Author
garymorrison-arm
Last Edited
Jul 2 2021, 11:01 PM