1/*-
2 * Copyright (c) 2010 Per Odlund <per.odlund@armagedon.se>
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
14 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
15 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
23 * POSSIBILITY OF SUCH DAMAGE.
24 */
25
26/* ARMv7 assembly functions for manipulating caches and other core functions.
27 * Based on cpufuncs for v6 and xscale.
28 */
29
30#include <mach/arm/asm.h>
31#include <arm/asm_help.h>
32#include <arm/arch.h>
33
34#ifdef _ARM_ARCH_7
35
36#define ENTRY_NP ENTRY
37#define _C_LABEL(x) _ ##x
38
39ENTRY(arm_cpu_sleep)
40	wfi				@ wait for an interrupt
41END(arm_cpu_sleep)
42
43ENTRY(arm_wait)
44	mrc	p15, 0, r0, c2, c0, 0	@ arbitrary read of CP15
45	add	r0, r0, #0		@ a stall
46	bx	lr
47END(arm_wait)
48
49ENTRY(arm_context_switch)
50	dsb				@ data synchronization barrier
51	mrc	p15, 0, r2, c0, c0, 5	@ get MPIDR
52	cmp	r2, #0
53	orrlt	r0, r0, #0x5b		@ MP, cachable (Normal WB)
54	orrge	r0, r0, #0x1b		@ Non-MP, cacheable, normal WB
55	mcr	p15, 0, r0, c2, c0, 0 	@ set the new TTB
56#ifdef MULTIPROCESSOR
57	mcr	p15, 0, r0, c8, c3, 0	@ flush the I+D
58#else
59	mcr	p15, 0, r0, c8, c7, 0	@ flush the I+D
60#endif
61	dsb
62	isb
63	bx	lr
64END(arm_context_switch)
65
66ENTRY(arm_tlb_flushID_ASID)
67#ifdef MULTIPROCESSOR
68	mcr	p15, 0, r0, c8, c3, 2	@ flush I+D tlb per ASID inner
69#else
70	mcr	p15, 0, r0, c8, c7, 2	@ flush I+D tlb per ASID
71#endif
72	dsb				@ data synchronization barrier
73	isb
74	bx	lr
75END(arm_tlb_flushID_ASID)
76
77ENTRY(arm_tlb_flushID)
78	mov r0, #0
79#ifdef MULTIPROCESSOR
80	mcr	p15, 0, r0, c8, c3, 0	@ flush I+D tlb all
81#else
82	mcr	p15, 0, r0, c8, c7, 0	@ flush I+D tlb all
83#endif
84	dsb				@ data synchronization barrier
85	isb
86	bx	lr
87END(arm_tlb_flushID)
88
89ENTRY(arm_tlb_flushID_RANGE)
90	dsb  			@ data synchronization barrier
91	mov r0, r0, lsr#12
92	mov r1, r1, lsr#12
93	mov r0, r0, lsl#12
94	mov r1, r1, lsl#12
951:	mcr p15, 0, r0, c8, c7, 1 	@ flush I+D tlb single entry
96	add r0, r0, #0x1000 		@ page size
97	cmp r0, r1
98	bcc 1b
99	mov r2, #0
100	mcr p15, 0, r2, c7, c5, 6 	@ BPIALL
101	dsb
102	isb
103	bx lr
104END(arm_tlb_flushID_RANGE)
105
106ENTRY(arm_tlb_flushID_SE)
107#ifdef MULTIPROCESSOR
108	mcr	p15, 0, r0, c8, c3, 1	@ flush I+D tlb single entry
109#else
110	mcr	p15, 0, r0, c8, c7, 1	@ flush I+D tlb single entry
111#endif
112	dsb				@ data synchronization barrier
113	isb
114	bx	lr
115END(arm_tlb_flushID_SE)
116
117
118ENTRY_NP(arm_setttb)
119	mrc	p15, 0, r2, c0, c0, 5	@ get MPIDR
120	cmp	r2, #0
121	orrlt	r0, r0, #0x58		@ MP, cachable
122	orrge	r0, r0, #0x18		@ Non-MP, cacheable
123	mcr	p15, 0, r0, c2, c0, 0   @ load new TTB
124	cmp	r1, #0
125#ifdef MULTIPROCESSOR
126	mcrne	p15, 0, r0, c8, c3, 0	@ invalidate all I+D TLBs
127#else
128	mcrne	p15, 0, r0, c8, c7, 0   @ invalidate all I+D TLBs
129#endif
130	dsb				@ data synchronization barrier
131	isb
132	bx	lr
133END(arm_setttb)
134
135/* Other functions. */
136
137ENTRY_NP(arm_drain_writebuf)
138	dsb				@ data synchronization barrier
139	RET
140END(arm_drain_writebuf)
141
142/* Cache operations. */
143
144/* LINTSTUB: void arm_icache_sync_range(vaddr_t, vsize_t); */
145ENTRY_NP(arm_icache_sync_range)
146	mrc	p15, 1, r2, c0, c0, 0	@ read CCSIDR
147	and	r2, r2, #7		@ get line size (log2(size)-4, 0=16)
148	mov	ip, #16			@ make a bit mask
149	lsl	r2, ip, r2		@ and shift into position
150	sub	ip, r2, #1		@ make into a mask
151	and	r3, r0, ip		@ get offset into cache line
152	add	r1, r1, r3		@ add to length
153	bic	r0, r0, ip		@ clear offset from start.
1541:
155	mcr	p15, 0, r0, c7, c10, 1	@ wb the D-Cache line
156	mcr	p15, 0, r0, c7, c5, 1	@ invalidate the I-Cache line
157	add	r0, r0, r2
158	subs	r1, r1, r2
159	bhi	1b
160
161	dsb				@ data synchronization barrier
162	isb
163	bx	lr
164END(arm_icache_sync_range)
165
166ENTRY(arm_set_context_id)
167	mcr p15, 0, r0, c13, c0, 1
168	isb
169	bx lr
170END(arm_set_context_id)
171
172/* LINTSTUB: void arm_icache_sync_all(void); */
173ENTRY_NP(arm_icache_sync_all)
174	/*
175	 * We assume that the code here can never be out of sync with the
176	 * dcache, so that we can safely flush the Icache and fall through
177	 * into the Dcache cleaning code.
178	 */
179	stmdb	sp!, {r0, lr}
180	bl	_C_LABEL(arm_idcache_wbinv_all) @clean the D cache
181	ldmia	sp!, {r0, lr}
182	dsb				@ data synchronization barrier
183	isb
184	bx	lr
185END(arm_icache_sync_all)
186
187ENTRY(arm_dcache_wb_range)
188	mrc	p15, 1, r2, c0, c0, 0	@ read CCSIDR
189	and	r2, r2, #7		@ get line size (log2(size)-4, 0=16)
190	mov	ip, #16			@ make a bit mask
191	lsl	r2, ip, r2		@ and shift into position
192	sub	ip, r2, #1		@ make into a mask
193	and	r3, r0, ip		@ get offset into cache line
194	add	r1, r1, r3		@ add to length
195	bic	r0, r0, ip		@ clear offset from start.
196	dsb
1971:
198	mcr	p15, 0, r0, c7, c10, 1	@ wb the D-Cache to PoC
199	add	r0, r0, r2
200	subs	r1, r1, r2
201	bhi	1b
202	dsb				@ data synchronization barrier
203	bx	lr
204END(arm_dcache_wb_range)
205
206/* LINTSTUB: void arm_dcache_wbinv_range(vaddr_t, vsize_t); */
207ENTRY(arm_dcache_wbinv_range)
208	mrc	p15, 1, r2, c0, c0, 0	@ read CCSIDR
209	and	r2, r2, #7		@ get line size (log2(size)-4, 0=16)
210	mov	ip, #16			@ make a bit mask
211	lsl	r2, ip, r2		@ and shift into position
212	sub	ip, r2, #1		@ make into a mask
213	and	r3, r0, ip		@ get offset into cache line
214	add	r1, r1, r3		@ add to length
215	bic	r0, r0, ip		@ clear offset from start.
216	dsb
2171:
218	mcr	p15, 0, r0, c7, c14, 1	@ wb and inv the D-Cache line
219	add	r0, r0, r2
220	subs	r1, r1, r2
221	bhi	1b
222	dsb				@ data synchronization barrier
223	bx	lr
224END(arm_dcache_wbinv_range)
225
226/* * LINTSTUB: void arm_dcache_inv_range(vaddr_t, vsize_t); */
227ENTRY(arm_dcache_inv_range)
228	mrc	p15, 1, r2, c0, c0, 0	@ read CCSIDR
229	and	r2, r2, #7		@ get line size (log2(size)-4, 0=16)
230	mov	ip, #16			@ make a bit mask
231	lsl	r2, ip, r2		@ and shift into position
232	sub	ip, r2, #1		@ make into a mask
233	and	r3, r0, ip		@ get offset into cache line
234	add	r1, r1, r3		@ add to length
235	bic	r0, r0, ip		@ clear offset from start.
2361:
237	mcr	p15, 0, r0, c7, c6, 1	@ invalidate the D-Cache line
238	add	r0, r0, r2
239	subs	r1, r1, r2
240	bhi	1b
241
242	dsb				@ data synchronization barrier
243	bx	lr
244END(arm_dcache_inv_range)
245
246
247/* * LINTSTUB: void arm_idcache_wbinv_range(vaddr_t, vsize_t); */
248ENTRY(arm_idcache_wbinv_range)
249	mrc	p15, 1, r2, c0, c0, 0	@ read CCSIDR
250	and	r2, r2, #7		@ get line size (log2(size)-4, 0=16)
251	mov	ip, #16			@ make a bit mask
252	lsl	r2, ip, r2		@ and shift into position
253	sub	ip, r2, #1		@ make into a mask
254	and	r3, r0, ip		@ get offset into cache line
255	add	r1, r1, r3		@ add to length
256	bic	r0, r0, ip		@ clear offset from start.
257	dsb
2581:
259	mcr	p15, 0, r0, c7, c5, 1	@ invalidate the I-Cache line
260	mcr	p15, 0, r0, c7, c14, 1 	@ wb and inv the D-Cache line
261	add	r0, r0, r2
262	subs	r1, r1, r2
263	bhi	1b
264
265	dsb				@ data synchronization barrier
266	isb
267	bx	lr
268END(arm_idcache_wbinv_range)
269
270/* * LINTSTUB: void arm_idcache_wbinv_all(void); */
271ENTRY_NP(arm_idcache_wbinv_all)
272	/*
273	 * We assume that the code here can never be out of sync with the
274	 * dcache, so that we can safely flush the Icache and fall through
275	 * into the Dcache purging code.
276	 */
277	dmb
278	mcr	p15, 0, r0, c7, c5, 0
279	b	_C_LABEL(arm_dcache_wbinv_all)
280END(arm_idcache_wbinv_all)
281
282/*
283 * These work very hard to not push registers onto the stack and to limit themselves
284 * to use r0-r3 and ip.
285 */
286/* * LINTSTUB: void arm_icache_inv_all(void); */
287ENTRY_NP(arm_icache_inv_all)
288	mov	r0, #0
289	mcr	p15, 2, r0, c0, c0, 0	@ set cache level to L1
290	mrc	p15, 1, r0, c0, c0, 0	@ read CCSIDR
291
292	ubfx	r2, r0, #13, #15	@ get num sets - 1 from CCSIDR
293	ubfx	r3, r0, #3, #10		@ get numways - 1 from CCSIDR
294	clz	r1, r3			@ number of bits to MSB of way
295	lsl	r3, r3, r1		@ shift into position
296	mov	ip, #1			@
297	lsl	ip, ip, r1		@ ip now contains the way decr
298
299	ubfx	r0, r0, #0, #3		@ get linesize from CCSIDR
300	add	r0, r0, #4		@ apply bias
301	lsl	r2, r2, r0		@ shift sets by log2(linesize)
302	add	r3, r3, r2		@ merge numsets - 1 with numways - 1
303	sub	ip, ip, r2		@ subtract numsets - 1 from way decr
304	mov	r1, #1
305	lsl	r1, r1, r0		@ r1 now contains the set decr
306	mov	r2, ip			@ r2 now contains set way decr
307
308	/* r3 = ways/sets, r2 = way decr, r1 = set decr, r0 and ip are free */
3091:	mcr	p15, 0, r3, c7, c6, 2	@ invalidate line
310	movs	r0, r3			@ get current way/set
311	beq	2f			@ at 0 means we are done.
312	movs	r0, r0, lsl #10		@ clear way bits leaving only set bits
313	subne	r3, r3, r1		@ non-zero?, decrement set #
314	subeq	r3, r3, r2		@ zero?, decrement way # and restore set count
315	b	1b
316
3172:	dsb				@ wait for stores to finish
318	mov	r0, #0			@ and ...
319	mcr	p15, 0, r0, c7, c5, 0	@ invalidate L1 cache
320	isb				@ instruction sync barrier
321	bx	lr			@ return
322END(arm_icache_inv_all)
323
324/* * LINTSTUB: void arm_dcache_inv_all(void); */
325ENTRY_NP(arm_dcache_inv_all)
326	mrc	p15, 1, r0, c0, c0, 1	@ read CLIDR
327	ands	r3, r0, #0x07000000
328	beq	.Ldone_inv
329	lsr	r3, r3, #23		@ left align loc (low 4 bits)
330
331	mov	r1, #0
332.Lstart_inv:
333	add	r2, r3, r3, lsr #1	@ r2 = level * 3 / 2
334	mov	r1, r0, lsr r2		@ r1 = cache type
335	and	r1, r1, #7
336	cmp	r1, #2			@ is it data or i&d?
337	blt	.Lnext_level_inv	@ nope, skip level
338
339	mcr	p15, 2, r3, c0, c0, 0	@ select cache level
340	isb
341	mrc	p15, 1, r0, c0, c0, 0	@ read CCSIDR
342
343	ubfx	ip, r0, #0, #3		@ get linesize from CCSIDR
344	add	ip, ip, #4		@ apply bias
345	ubfx	r2, r0, #13, #15	@ get numsets - 1 from CCSIDR
346	lsl	r2, r2, ip		@ shift to set position
347	orr	r3, r3, r2		@ merge set into way/set/level
348	mov	r1, #1
349	lsl	r1, r1, ip		@ r1 = set decr
350
351	ubfx	ip, r0, #3, #10		@ get numways - 1 from [to be discarded] CCSIDR
352	clz	r2, ip			@ number of bits to MSB of way
353	lsl	ip, ip, r2		@ shift by that into way position
354	mov	r0, #1			@
355	lsl	r2, r0, r2		@ r2 now contains the way decr
356	mov	r0, r3 			@ get sets/level (no way yet)
357	orr	r3, r3, ip		@ merge way into way/set/level
358	bfc	r0, #0, #4		@ clear low 4 bits (level) to get numset - 1
359	sub	r2, r2, r0		@ subtract from way decr
360
361	/* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
3621:	mcr	p15, 0, r3, c7, c6, 2	@ invalidate line
363	cmp	r3, #15			@ are we done with this level (way/set == 0)
364	bls	.Lnext_level_inv	@ yes, go to next level
365	lsl	r0, r3, #10		@ clear way bits leaving only set/level bits
366	lsr	r0, r0, #4		@ clear level bits leaving only set bits
367	subne	r3, r3, r1		@ non-zero?, decrement set #
368	subeq	r3, r3, r2		@ zero?, decrement way # and restore set count
369	b	1b
370
371.Lnext_level_inv:
372	mrc	p15, 1, r0, c0, c0, 1	@ read CLIDR
373	and	ip, r0, #0x07000000	@ narrow to LoC
374	lsr	ip, ip, #23		@ left align LoC (low 4 bits)
375	add	r3, r3, #2		@ go to next level
376	cmp	r3, ip			@ compare
377	blt	.Lstart_inv		@ not done, next level (r0 == CLIDR)
378
379.Ldone_inv:
380	mov	r0, #0			@ default back to cache level 0
381	mcr	p15, 2, r0, c0, c0, 0	@ select cache level
382	dsb
383	isb
384	bx	lr
385END(arm_dcache_inv_all)
386
387/* * LINTSTUB: void arm_dcache_wbinv_all(void); */
388ENTRY_NP(arm_dcache_wbinv_all)
389	mrc	p15, 1, r0, c0, c0, 1	@ read CLIDR
390	ands	r3, r0, #0x07000000
391	beq	.Ldone_wbinv
392	lsr	r3, r3, #23		@ left align loc (low 4 bits)
393
394	mov	r1, #0
395.Lstart_wbinv:
396	add	r2, r3, r3, lsr #1	@ r2 = level * 3 / 2
397	mov	r1, r0, lsr r2		@ r1 = cache type
398	bfc	r1, #3, #28
399	cmp	r1, #2			@ is it data or i&d?
400	blt	.Lnext_level_wbinv	@ nope, skip level
401
402	mcr	p15, 2, r3, c0, c0, 0	@ select cache level
403	isb
404	mrc	p15, 1, r0, c0, c0, 0	@ read CCSIDR
405
406	ubfx	ip, r0, #0, #3		@ get linesize from CCSIDR
407	add	ip, ip, #4		@ apply bias
408	ubfx	r2, r0, #13, #15	@ get numsets - 1 from CCSIDR
409	lsl	r2, r2, ip		@ shift to set position
410	orr	r3, r3, r2		@ merge set into way/set/level
411	mov	r1, #1
412	lsl	r1, r1, ip		@ r1 = set decr
413
414	ubfx	ip, r0, #3, #10		@ get numways - 1 from [to be discarded] CCSIDR
415	clz	r2, ip			@ number of bits to MSB of way
416	lsl	ip, ip, r2		@ shift by that into way position
417	mov	r0, #1			@
418	lsl	r2, r0, r2		@ r2 now contains the way decr
419	mov	r0, r3 			@ get sets/level (no way yet)
420	orr	r3, r3, ip		@ merge way into way/set/level
421	bfc	r0, #0, #4		@ clear low 4 bits (level) to get numset - 1
422	sub	r2, r2, r0		@ subtract from way decr
423
424	/* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
4251:	mcr	p15, 0, r3, c7, c14, 2	@ writeback and invalidate line
426	cmp	r3, #15			@ are we done with this level (way/set == 0)
427	bls	.Lnext_level_wbinv	@ yes, go to next level
428	lsl	r0, r3, #10		@ clear way bits leaving only set/level bits
429	lsr	r0, r0, #4		@ clear level bits leaving only set bits
430	subne	r3, r3, r1		@ non-zero?, decrement set #
431	subeq	r3, r3, r2		@ zero?, decrement way # and restore set count
432	b	1b
433
434.Lnext_level_wbinv:
435	mrc	p15, 1, r0, c0, c0, 1	@ read CLIDR
436	and	ip, r0, #0x07000000	@ narrow to LoC
437	lsr	ip, ip, #23		@ left align LoC (low 4 bits)
438	add	r3, r3, #2		@ go to next level
439	cmp	r3, ip			@ compare
440	blt	.Lstart_wbinv		@ not done, next level (r0 == CLIDR)
441
442.Ldone_wbinv:
443	mov	r0, #0			@ default back to cache level 0
444	mcr	p15, 2, r0, c0, c0, 0	@ select cache level
445	dsb
446	isb
447	bx	lr
448END(arm_dcache_wbinv_all)
449
450#endif