1/**
2 * \file
3 * \brief Cache control routines for ARMv8.
4 */
5
6/*
7 * Copyright (c) 2015, Hewlett Packard Enterprise Development LP.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#ifndef __ASSEMBLER__
16#define __ASSEMBLER__   1
17#endif
18
19        .text
20        .globl sysreg_invalidate_d_cache,\
21               sysreg_invalidate_i_and_d_caches,\
22               sysreg_invalidate_i_and_d_caches_fast, \
23                  sysreg_invalidate_tlb_fn, \
24                  sysreg_enable_mmu
25
26/* Based on algorithm from ARM Architecture Reference Manual */
27sysreg_invalidate_d_cache:
28
29    sub     sp, sp, #96
30
31    stp     x0, x1, [sp, #16 * 0]
32    stp     x2, x3, [sp, #16 * 1]
33    stp     x4, x5, [sp, #16 * 2]
34    stp     x6, x7, [sp, #16 * 3]
35    stp     x8, x9, [sp, #16 * 4]
36    stp     x10, x11, [sp, #16 * 5]
37
38    dmb    sy                // ensure ordering with previous memory accesses
39
40    mrs x0, clidr_el1
41    and w3, w0, #0x07000000     // get 2 x level of coherency
42    lsr w3, w3, #23
43    cbz w3, finished
44    mov w10, #0                 // w10 = 2 x cache level
45    mov w8, #1                     // w8 = constant 0b1
46
47loop1:
48    add w2, w10, w10, lsr #1     // calculate 3 x cache level
49    lsr w1, w0, w2                 // extract 3-bit cache type for this level
50    and w1, w1, #0x7
51    cmp w1, #2
52    b.lt skip                     // no data or unified cache at this level
53    msr csselr_el1, x10         // select this cache level
54    isb                         // sync change of csselr
55    mrs x1, ccsidr_el1             // read ccsidr
56    and w2, w1, #7                 // w2 = log2(linelen)-4
57    add w2, w2, #4                 // w2 = log2(linelen)
58    ubfx w4, w1, #3, #10         // w4 = max way number, right aligned
59    clz w5, w4                     // w5 = 32-log2(ways), bit position of way in dc operand
60    lsl w9, w4, w5                 // w9 = max way number, aligned to position in dc operand
61    lsl w16, w8, w5             // w16 = amount to decrement way number per iteration
62loop2:
63    ubfx w7, w1, #13, #15         // w7 = max set number, right aligned
64    lsl w7, w7, w2                 // w7 = max set number, aligned to position in dc operand
65    lsl w17, w8, w2             // w17 = amount to decrement set number per iteration
66loop3:
67    orr w11, w10, w9             // w11 = combine way number and cache number ...
68    orr w11, w11, w7             // ... and set number for dc operand
69    dc csw, x11                 // do data cache clean by set and way
70    subs w7, w7, w17             // decrement set number
71    b.ge loop3
72    subs x9, x9, x16             // decrement way number
73    b.ge loop2
74skip:
75    add w10, w10, #2         // increment 2 x cache level
76    cmp w3, w10
77    b.gt loop1
78finished:
79    mov    x10, #0
80    msr    csselr_el1, x10
81    dsb    sy
82    isb
83
84    ldp x0, x1,  [sp], #16
85    ldp x2, x3,  [sp], #16
86    ldp x4, x5,  [sp], #16
87    ldp x6, x7,  [sp], #16
88    ldp x8, x9,  [sp], #16
89    ldp x10, x11,  [sp], #16
90    ret
91
92sysreg_invalidate_i_and_d_caches:
93    mov    x12, x30 //lr
94    bl    sysreg_invalidate_d_cache
95    mov    x0, #0
96    ic    ialluis                // I+BTB cache invalidate
97    ret    x12
98
99sysreg_invalidate_d_cache_fast:
100    mov x12, x30 //lr
101    bl  sysreg_invalidate_d_cache
102    ret x12
103
104sysreg_invalidate_i_and_d_caches_fast:
105    mov x12, x30 //lr
106    bl  sysreg_invalidate_d_cache
107    mov x0, #0
108    ic  ialluis             // I+BTB cache invalidate
109    ret x12
110
111sysreg_invalidate_tlb:
112
113    sub     sp, sp, #96
114
115    stp     x0, x1, [sp, #16 * 0]
116    stp     x2, x3, [sp, #16 * 1]
117    stp     x4, x5, [sp, #16 * 2]
118    stp     x6, x7, [sp, #16 * 3]
119    stp     x8, x9, [sp, #16 * 4]
120    stp     x10, x11, [sp, #16 * 5]
121
122    tlbi    vmalle1
123    dsb    sy
124    isb
125
126    ldp x0, x1,  [sp], #16
127    ldp x2, x3,  [sp], #16
128    ldp x4, x5,  [sp], #16
129    ldp x6, x7,  [sp], #16
130    ldp x8, x9,  [sp], #16
131    ldp x10, x11,  [sp], #16
132    ret
133
134sysreg_invalidate_tlb_fn:
135    mov x12, x30 //lr
136    bl  sysreg_invalidate_tlb
137    ret x12
138
139sysreg_enable_mmu:
140    mov x12, x30 //lr
141
142    ic      iallu                           // I+BTB cache invalidate
143    tlbi    vmalle1                         // invalidate I + D TLBs
144    dsb     sy
145
146    ldr     x0, =0xff440c0400
147    msr        mair_el1, x0
148    isb
149
150    /*  TCR - Translation Control Register
151     *    4K granularity, 32-bit addresses, two subranges:
152     *
153     * TTBR1_EL1 -> 0xffff_ffff_8000_0000 to 0xffff_ffff_ffff_ffff
154     * TTBR0_EL1 -> 0x0000_0000_0000_0000 to 0x0000_0000_7fff_ffff
155     */
156
157    ldr     x0, =0x10b5203520
158    msr     tcr_el1, x0
159    isb
160
161    ldr     x0, =0x30d0199d
162    msr     sctlr_el1, x0
163    isb
164
165    ret x12
166
167