1// Copyright 2017 The Fuchsia Authors
2//
3// Use of this source code is governed by a MIT-style
4// license that can be found in the LICENSE file or at
5// https://opensource.org/licenses/MIT
6
7#include <arch/asm_macros.h>
8#include <arch/defines.h>
9#include <asm.h>
10#include <mexec.h>
11
12/* Arguments Passed via x0 through x8 inclusive */
13bootarg0                .req x25
14bootarg1                .req x26
15bootarg2                .req x27
16boot_el                 .req x28
17
18// This is a null terminated list of memory regions to copy.
19copy_list               .req x23
20
21// This is the address to branch to once the copy is completed.
22new_kernel_addr         .req x24
23
24tmp                     .req x9
25
26.section .text
27FUNCTION(mexec_asm)
28
29// Turn off the caches and MMU
30    mrs     tmp, sctlr_el1      // Read the SCTLR into a temp
31    bic     tmp, tmp, #(1<<12)  // Disable icache
32    bic     tmp, tmp, #(1<<2)   // Disable dcache/ucache
33    bic     tmp, tmp, #(1<<0)   // Disable the MMU
34    msr     sctlr_el1, tmp      // Write the temp back to the control register
35
36// Stash the boot arguments to pass to the next kernel since we expect to trash
37// x0 - x5
38    mov     bootarg0, x0
39    mov     bootarg1, x1
40    mov     bootarg2, x2
41
42// Stash the boot el, as we will need it later
43    mov     boot_el,  x3
44
45// This list contains the memmove operations that we need to perform.
46    mov     copy_list, x4
47
48// This is the address of the kernel that we eventueally want to jump to.
49    mov     new_kernel_addr, x5
50
51    // If we were originally booted in EL2, transition back into EL2
52    cmp  boot_el, #2
53    b.lt cplt_transition_to_boot_el  // We booted in EL1, no need to transition up
54    adr  x0, cplt_transition_to_boot_el
55    hvc  #1     // Branch into an EL2 trampoline that bounces to cplt_transition_to_boot_el
56
57 cplt_transition_to_boot_el:
58
59    // Clean/Invalidate the cache early on.
60    // We want to make sure that there are no dirty cache entries hanging around
61    // in the cache before we start the memcpy.
62    // If these cache entries were to get written back later, they would corrupt
63    // the state of the system so we clean/invalidate them up front.
64    bl      mexec_arch_clean_invalidate_cache_all
65
66/* Mempy the new kernel over the old kernel. Keep in mind that since the MMU
67 * is disabled, unaligned accesses are no longer legal. All accesses must be
68 * word aligned.
69 */
70.Lcopy:
71    // Load a copy operation into memory
72    ldr     x0, [copy_list, MEMMOV_OPS_DST_OFFSET]
73    ldr     x1, [copy_list, MEMMOV_OPS_SRC_OFFSET]
74    ldr     x2, [copy_list, MEMMOV_OPS_LEN_OFFSET]
75
76    // Determine if this is the end of the list by checking if all three elems
77    // in the copy list are null
78    orr     tmp, x0, x1
79    orr     tmp, tmp, x2
80    cbz     tmp, .Lfinish_copy
81
82    // The copy operation is not null, go ahead and memmove
83    bl      memmove_mexec
84
85    // Advance the pointer to the next copy operation.
86    add     copy_list, copy_list, 24
87
88    b       .Lcopy
89
90.Lfinish_copy:
91
92    bl mexec_arch_clean_invalidate_cache_all
93
94    // Restore the bootarguments for the next kernel.
95    mov     x0, bootarg0
96    mov     x1, bootarg1
97    mov     x2, bootarg2
98
99    // Get everything out of the pipeline before branching to the new kernel.
100    isb
101    dsb sy
102
103    // Branch to the next kernel.
104    br      new_kernel_addr
105END_FUNCTION(mexec_asm)
106
107LOCAL_FUNCTION(memmove_mexec)
108        // x6 contains the stride (1 word if we're copying forward
109        // -1 word if we're copying backwards)
110        mov     x6, 1
111
112        // x3 is the start index of the copy, this is the front of the array if
113        // we're copying forward or the back of the array if we're copying
114        // backwards.
115        mov     x3, 0
116
117        // Convert the length of the array from bytes to machine words
118        lsr     x2, x2, 3
119
120        // If the source address and the destination address are the same then
121        // we can return because there's nothing to be done.
122        cmp     x0, x1
123        beq     .done
124
125        // Decide if we need to copy backwards.
126        blt     .no_alias
127        mov     x6, -1          // Set the stride to backwards
128        mov     x3, x2          // Move the copy index to the back of the array
129        sub     x3, x3, 1       // i = (len_wrds - 1); to start at the last word
130
131.no_alias:
132        mov     x4, 0           // Loop iteration index
133.copy_loop:
134        // Copy one word of data
135        // dst[i << 3] = src[i << 3]
136        ldr     tmp, [x1, x3, lsl 3]
137        str     tmp, [x0, x3, lsl 3]
138
139        lsl     x7, x3, 3
140        add     x7, x7, x0
141
142        // Increment the array index by the stride (backwards or forwards).
143        // i += stride
144        add     x3, x3, x6
145
146        // Increment the number of words copied (we use this to decide when to
147        // stop)
148        // words_copied += 1
149        add     x4, x4, 1
150
151        // If we've copied the whole buffer, then finish.
152        // if (words_copied == words_to_copy) break;
153        cmp     x2, x4
154        bne     .copy_loop
155.done:
156        ret
157END_FUNCTION(memmove_mexec)
158
159// Perform a bulk clean/invalidate across the whole cache
160// Normally on ARM we can use the CIVAC, CVAC, CVAU and IVAC instructions to
161// manipulate the cache but these ops only work against virtual memory addresses
162// and since we have disabled the MMU, these instructions are no longer
163// meaningful.
164// As a result, we have to use the Level/Set/Way cache ops. Since the definition
165// of the cache set is left up to the implementation, the only portable (safe)
166// way to perform these cache ops is to operate against the whole cache.
167// The following op cleans and invalidates every entry in each level of the
168// cache.
169// The original implementation can be found in the ARMv8-A TRM or at the
170// following URL: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/BABJDBHI.html
171LOCAL_FUNCTION(mexec_arch_clean_invalidate_cache_all)
172         mrs x0, clidr_el1
173         and w3, w0, #0x07000000  // get 2 x level of coherence
174         lsr w3, w3, #23
175         cbz w3, finished2
176         mov w10, #0              // w10 = 2 x cache level
177         mov w8, #1               // w8 = constant 0b1
178  loop12: add w2, w10, w10, lsr #1 // calculate 3 x cache level
179         lsr w1, w0, w2           // extract 3-bit cache type for this level
180         and w1, w1, #0x7
181         cmp w1, #2
182         b.lt skip2                // no data or unified cache at this level
183         msr csselr_el1, x10      // select this cache level
184         isb                      // synchronize change of csselr
185         mrs x1, ccsidr_el1       // read ccsidr
186         and w2, w1, #7           // w2 = log2(linelen)-4
187         add w2, w2, #4           // w2 = log2(linelen)
188         ubfx w4, w1, #3, #10     // w4 = max way number, right aligned
189         clz w5, w4               /* w5 = 32-log2(ways), bit position of way in dc                                    operand */
190         lsl w9, w4, w5           /* w9 = max way number, aligned to position in dc
191                                     operand */
192         lsl w16, w8, w5          // w16 = amount to decrement way number per iteration
193  loop22: ubfx w7, w1, #13, #15    // w7 = max set number, right aligned
194         lsl w7, w7, w2           /* w7 = max set number, aligned to position in dc
195                                     operand */
196         lsl w17, w8, w2          // w17 = amount to decrement set number per iteration
197  loop33: orr w11, w10, w9         // w11 = combine way number and cache number...
198         orr w11, w11, w7         // ... and set number for dc operand
199         dc cisw, x11              // do data cache clean by set and way
200         subs w7, w7, w17         // decrement set number
201         b.ge loop33
202         subs x9, x9, x16         // decrement way number
203         b.ge loop22
204  skip2:  add w10, w10, #2         // increment 2 x cache level
205         cmp w3, w10
206         dsb sy                      /* ensure completion of previous cache maintenance
207                                    //  operation */
208         b.gt loop12
209  finished2:
210         ic iallu
211         isb
212         dsb sy
213
214         ret
215END_FUNCTION(mexec_arch_clean_invalidate_cache_all)
216
217
218/* This .ltorg emits any immediate constants here. We need to put this before
219 * the mexec_asm_end symbol because we intend to relocate the assembly contained
220 * within the mexec_asm[_end] block. Any constants needed by this block should
221 * also be relocated so we need to ensure that they occur before mexec_asm_end.
222 */
223.ltorg
224
225DATA(mexec_asm_end)
226