1// Copyright 2017 The Fuchsia Authors 2// 3// Use of this source code is governed by a MIT-style 4// license that can be found in the LICENSE file or at 5// https://opensource.org/licenses/MIT 6 7#include <arch/asm_macros.h> 8#include <arch/defines.h> 9#include <asm.h> 10#include <mexec.h> 11 12/* Arguments Passed via x0 through x8 inclusive */ 13bootarg0 .req x25 14bootarg1 .req x26 15bootarg2 .req x27 16boot_el .req x28 17 18// This is a null terminated list of memory regions to copy. 19copy_list .req x23 20 21// This is the address to branch to once the copy is completed. 22new_kernel_addr .req x24 23 24tmp .req x9 25 26.section .text 27FUNCTION(mexec_asm) 28 29// Turn off the caches and MMU 30 mrs tmp, sctlr_el1 // Read the SCTLR into a temp 31 bic tmp, tmp, #(1<<12) // Disable icache 32 bic tmp, tmp, #(1<<2) // Disable dcache/ucache 33 bic tmp, tmp, #(1<<0) // Disable the MMU 34 msr sctlr_el1, tmp // Write the temp back to the control register 35 36// Stash the boot arguments to pass to the next kernel since we expect to trash 37// x0 - x5 38 mov bootarg0, x0 39 mov bootarg1, x1 40 mov bootarg2, x2 41 42// Stash the boot el, as we will need it later 43 mov boot_el, x3 44 45// This list contains the memmove operations that we need to perform. 46 mov copy_list, x4 47 48// This is the address of the kernel that we eventueally want to jump to. 49 mov new_kernel_addr, x5 50 51 // If we were originally booted in EL2, transition back into EL2 52 cmp boot_el, #2 53 b.lt cplt_transition_to_boot_el // We booted in EL1, no need to transition up 54 adr x0, cplt_transition_to_boot_el 55 hvc #1 // Branch into an EL2 trampoline that bounces to cplt_transition_to_boot_el 56 57 cplt_transition_to_boot_el: 58 59 // Clean/Invalidate the cache early on. 60 // We want to make sure that there are no dirty cache entries hanging around 61 // in the cache before we start the memcpy. 62 // If these cache entries were to get written back later, they would corrupt 63 // the state of the system so we clean/invalidate them up front. 64 bl mexec_arch_clean_invalidate_cache_all 65 66/* Mempy the new kernel over the old kernel. Keep in mind that since the MMU 67 * is disabled, unaligned accesses are no longer legal. All accesses must be 68 * word aligned. 69 */ 70.Lcopy: 71 // Load a copy operation into memory 72 ldr x0, [copy_list, MEMMOV_OPS_DST_OFFSET] 73 ldr x1, [copy_list, MEMMOV_OPS_SRC_OFFSET] 74 ldr x2, [copy_list, MEMMOV_OPS_LEN_OFFSET] 75 76 // Determine if this is the end of the list by checking if all three elems 77 // in the copy list are null 78 orr tmp, x0, x1 79 orr tmp, tmp, x2 80 cbz tmp, .Lfinish_copy 81 82 // The copy operation is not null, go ahead and memmove 83 bl memmove_mexec 84 85 // Advance the pointer to the next copy operation. 86 add copy_list, copy_list, 24 87 88 b .Lcopy 89 90.Lfinish_copy: 91 92 bl mexec_arch_clean_invalidate_cache_all 93 94 // Restore the bootarguments for the next kernel. 95 mov x0, bootarg0 96 mov x1, bootarg1 97 mov x2, bootarg2 98 99 // Get everything out of the pipeline before branching to the new kernel. 100 isb 101 dsb sy 102 103 // Branch to the next kernel. 104 br new_kernel_addr 105END_FUNCTION(mexec_asm) 106 107LOCAL_FUNCTION(memmove_mexec) 108 // x6 contains the stride (1 word if we're copying forward 109 // -1 word if we're copying backwards) 110 mov x6, 1 111 112 // x3 is the start index of the copy, this is the front of the array if 113 // we're copying forward or the back of the array if we're copying 114 // backwards. 115 mov x3, 0 116 117 // Convert the length of the array from bytes to machine words 118 lsr x2, x2, 3 119 120 // If the source address and the destination address are the same then 121 // we can return because there's nothing to be done. 122 cmp x0, x1 123 beq .done 124 125 // Decide if we need to copy backwards. 126 blt .no_alias 127 mov x6, -1 // Set the stride to backwards 128 mov x3, x2 // Move the copy index to the back of the array 129 sub x3, x3, 1 // i = (len_wrds - 1); to start at the last word 130 131.no_alias: 132 mov x4, 0 // Loop iteration index 133.copy_loop: 134 // Copy one word of data 135 // dst[i << 3] = src[i << 3] 136 ldr tmp, [x1, x3, lsl 3] 137 str tmp, [x0, x3, lsl 3] 138 139 lsl x7, x3, 3 140 add x7, x7, x0 141 142 // Increment the array index by the stride (backwards or forwards). 143 // i += stride 144 add x3, x3, x6 145 146 // Increment the number of words copied (we use this to decide when to 147 // stop) 148 // words_copied += 1 149 add x4, x4, 1 150 151 // If we've copied the whole buffer, then finish. 152 // if (words_copied == words_to_copy) break; 153 cmp x2, x4 154 bne .copy_loop 155.done: 156 ret 157END_FUNCTION(memmove_mexec) 158 159// Perform a bulk clean/invalidate across the whole cache 160// Normally on ARM we can use the CIVAC, CVAC, CVAU and IVAC instructions to 161// manipulate the cache but these ops only work against virtual memory addresses 162// and since we have disabled the MMU, these instructions are no longer 163// meaningful. 164// As a result, we have to use the Level/Set/Way cache ops. Since the definition 165// of the cache set is left up to the implementation, the only portable (safe) 166// way to perform these cache ops is to operate against the whole cache. 167// The following op cleans and invalidates every entry in each level of the 168// cache. 169// The original implementation can be found in the ARMv8-A TRM or at the 170// following URL: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/BABJDBHI.html 171LOCAL_FUNCTION(mexec_arch_clean_invalidate_cache_all) 172 mrs x0, clidr_el1 173 and w3, w0, #0x07000000 // get 2 x level of coherence 174 lsr w3, w3, #23 175 cbz w3, finished2 176 mov w10, #0 // w10 = 2 x cache level 177 mov w8, #1 // w8 = constant 0b1 178 loop12: add w2, w10, w10, lsr #1 // calculate 3 x cache level 179 lsr w1, w0, w2 // extract 3-bit cache type for this level 180 and w1, w1, #0x7 181 cmp w1, #2 182 b.lt skip2 // no data or unified cache at this level 183 msr csselr_el1, x10 // select this cache level 184 isb // synchronize change of csselr 185 mrs x1, ccsidr_el1 // read ccsidr 186 and w2, w1, #7 // w2 = log2(linelen)-4 187 add w2, w2, #4 // w2 = log2(linelen) 188 ubfx w4, w1, #3, #10 // w4 = max way number, right aligned 189 clz w5, w4 /* w5 = 32-log2(ways), bit position of way in dc operand */ 190 lsl w9, w4, w5 /* w9 = max way number, aligned to position in dc 191 operand */ 192 lsl w16, w8, w5 // w16 = amount to decrement way number per iteration 193 loop22: ubfx w7, w1, #13, #15 // w7 = max set number, right aligned 194 lsl w7, w7, w2 /* w7 = max set number, aligned to position in dc 195 operand */ 196 lsl w17, w8, w2 // w17 = amount to decrement set number per iteration 197 loop33: orr w11, w10, w9 // w11 = combine way number and cache number... 198 orr w11, w11, w7 // ... and set number for dc operand 199 dc cisw, x11 // do data cache clean by set and way 200 subs w7, w7, w17 // decrement set number 201 b.ge loop33 202 subs x9, x9, x16 // decrement way number 203 b.ge loop22 204 skip2: add w10, w10, #2 // increment 2 x cache level 205 cmp w3, w10 206 dsb sy /* ensure completion of previous cache maintenance 207 // operation */ 208 b.gt loop12 209 finished2: 210 ic iallu 211 isb 212 dsb sy 213 214 ret 215END_FUNCTION(mexec_arch_clean_invalidate_cache_all) 216 217 218/* This .ltorg emits any immediate constants here. We need to put this before 219 * the mexec_asm_end symbol because we intend to relocate the assembly contained 220 * within the mexec_asm[_end] block. Any constants needed by this block should 221 * also be relocated so we need to ensure that they occur before mexec_asm_end. 222 */ 223.ltorg 224 225DATA(mexec_asm_end) 226