1/*	$NetBSD: cpufunc_asm_xscale.S,v 1.16 2002/08/17 16:36:32 thorpej Exp $	*/
2
3/*-
4 * Copyright (c) 2001, 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Allen Briggs and Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 *
37 */
38
39/*-
40 * Copyright (c) 2001 Matt Thomas.
41 * Copyright (c) 1997,1998 Mark Brinicombe.
42 * Copyright (c) 1997 Causality Limited
43 * All rights reserved.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 *    notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 *    notice, this list of conditions and the following disclaimer in the
52 *    documentation and/or other materials provided with the distribution.
53 * 3. All advertising materials mentioning features or use of this software
54 *    must display the following acknowledgement:
55 *	This product includes software developed by Causality Limited.
56 * 4. The name of Causality Limited may not be used to endorse or promote
57 *    products derived from this software without specific prior written
58 *    permission.
59 *
60 * THIS SOFTWARE IS PROVIDED BY CAUSALITY LIMITED ``AS IS'' AND ANY EXPRESS
61 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
62 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
63 * DISCLAIMED. IN NO EVENT SHALL CAUSALITY LIMITED BE LIABLE FOR ANY DIRECT,
64 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
65 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
66 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
70 * SUCH DAMAGE.
71 *
72 * XScale assembly functions for CPU / MMU / TLB specific operations
73 */
74#include <machine/armreg.h>
75#include <machine/asm.h>
76__FBSDID("$FreeBSD$");
77
78/*
79 * Size of the XScale core D-cache.
80 */
81#define	DCACHE_SIZE		0x00008000
82
83/*
84 * CPWAIT -- Canonical method to wait for CP15 update.
85 * From: Intel 80200 manual, section 2.3.3.
86 *
87 * NOTE: Clobbers the specified temp reg.
88 */
89#define	CPWAIT_BRANCH							 \
90	sub	pc, pc, #4
91
92#define	CPWAIT(tmp)							 \
93	mrc	p15, 0, tmp, c2, c0, 0	/* arbitrary read of CP15 */	;\
94	mov	tmp, tmp		/* wait for it to complete */	;\
95	CPWAIT_BRANCH			/* branch to next insn */
96
97#define	CPWAIT_AND_RETURN_SHIFTER	lsr #32
98
99#define	CPWAIT_AND_RETURN(tmp)						 \
100	mrc	p15, 0, tmp, c2, c0, 0	/* arbitrary read of CP15 */	;\
101	/* Wait for it to complete and branch to the return address */	 \
102	sub	pc, lr, tmp, CPWAIT_AND_RETURN_SHIFTER
103
104ENTRY(xscale_cpwait)
105	CPWAIT_AND_RETURN(r0)
106END(xscale_cpwait)
107
108/*
109 * We need a separate cpu_control() entry point, since we have to
110 * invalidate the Branch Target Buffer in the event the BPRD bit
111 * changes in the control register.
112 */
113ENTRY(xscale_control)
114	mrc	CP15_SCTLR(r3)		/* Read the control register */
115	bic	r2, r3, r0		/* Clear bits */
116	eor	r2, r2, r1		/* XOR bits */
117
118	teq	r2, r3			/* Only write if there was a change */
119	mcrne	p15, 0, r0, c7, c5, 6	/* Invalidate the BTB */
120	mcrne	CP15_SCTLR(r2)		/* Write new control register */
121	mov	r0, r3			/* Return old value */
122
123	CPWAIT_AND_RETURN(r1)
124END(xscale_control)
125
126/*
127 * Functions to set the MMU Translation Table Base register
128 *
129 * We need to clean and flush the cache as it uses virtual
130 * addresses that are about to change.
131 */
132ENTRY(xscale_setttb)
133#ifdef CACHE_CLEAN_BLOCK_INTR
134	mrs	r3, cpsr
135	orr	r1, r3, #(PSR_I | PSR_F)
136	msr	cpsr_fsxc, r1
137#endif
138	stmfd	sp!, {r0-r3, lr}
139	bl	_C_LABEL(xscale_cache_cleanID)
140	mcr	p15, 0, r0, c7, c5, 0	/* invalidate I$ and BTB */
141	mcr	p15, 0, r0, c7, c10, 4	/* drain write and fill buffer */
142
143	CPWAIT(r0)
144
145	ldmfd	sp!, {r0-r3, lr}
146
147	/* Write the TTB */
148	mcr	p15, 0, r0, c2, c0, 0
149
150	/* If we have updated the TTB we must flush the TLB */
151	mcr	p15, 0, r0, c8, c7, 0	/* invalidate I+D TLB */
152
153	/* The cleanID above means we only need to flush the I cache here */
154	mcr	p15, 0, r0, c7, c5, 0	/* invalidate I$ and BTB */
155
156	CPWAIT(r0)
157
158#ifdef CACHE_CLEAN_BLOCK_INTR
159	msr	cpsr_fsxc, r3
160#endif
161	RET
162END(xscale_setttb)
163
164/*
165 * TLB functions
166 *
167 */
168ENTRY(xscale_tlb_flushID_SE)
169	mcr	p15, 0, r0, c8, c6, 1	/* flush D tlb single entry */
170	mcr	p15, 0, r0, c8, c5, 1	/* flush I tlb single entry */
171	CPWAIT_AND_RETURN(r0)
172END(xscale_tlb_flushID_SE)
173
174/*
175 * Cache functions
176 */
177ENTRY(xscale_cache_flushID)
178	mcr	p15, 0, r0, c7, c7, 0	/* flush I+D cache */
179	CPWAIT_AND_RETURN(r0)
180END(xscale_cache_flushID)
181
182ENTRY(xscale_cache_flushI)
183	mcr	p15, 0, r0, c7, c5, 0	/* flush I cache */
184	CPWAIT_AND_RETURN(r0)
185END(xscale_cache_flushI)
186
187ENTRY(xscale_cache_flushD)
188	mcr	p15, 0, r0, c7, c6, 0	/* flush D cache */
189	CPWAIT_AND_RETURN(r0)
190END(xscale_cache_flushD)
191
192ENTRY(xscale_cache_flushI_SE)
193	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
194	CPWAIT_AND_RETURN(r0)
195END(xscale_cache_flushI_SE)
196
197ENTRY(xscale_cache_flushD_SE)
198	/*
199	 * Errata (rev < 2): Must clean-dcache-line to an address
200	 * before invalidate-dcache-line to an address, or dirty
201	 * bits will not be cleared in the dcache array.
202	 */
203	mcr	p15, 0, r0, c7, c10, 1
204	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
205	CPWAIT_AND_RETURN(r0)
206END(xscale_cache_flushD_SE)
207
208ENTRY(xscale_cache_cleanD_E)
209	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
210	CPWAIT_AND_RETURN(r0)
211END(xscale_cache_cleanD_E)
212
213/*
214 * Information for the XScale cache clean/purge functions:
215 *
216 *	* Virtual address of the memory region to use
217 *	* Size of memory region
218 *
219 * Note the virtual address for the Data cache clean operation
220 * does not need to be backed by physical memory, since no loads
221 * will actually be performed by the allocate-line operation.
222 *
223 * Note that the Mini-Data cache MUST be cleaned by executing
224 * loads from memory mapped into a region reserved exclusively
225 * for cleaning of the Mini-Data cache.
226 */
227	.data
228
229	.global	_C_LABEL(xscale_cache_clean_addr)
230_C_LABEL(xscale_cache_clean_addr):
231	.word	0x00000000
232
233	.global	_C_LABEL(xscale_cache_clean_size)
234_C_LABEL(xscale_cache_clean_size):
235	.word	DCACHE_SIZE
236
237	.global	_C_LABEL(xscale_minidata_clean_addr)
238_C_LABEL(xscale_minidata_clean_addr):
239	.word	0x00000000
240
241	.global	_C_LABEL(xscale_minidata_clean_size)
242_C_LABEL(xscale_minidata_clean_size):
243	.word	0x00000800
244
245	.text
246
247.Lxscale_cache_clean_addr:
248	.word	_C_LABEL(xscale_cache_clean_addr)
249.Lxscale_cache_clean_size:
250	.word	_C_LABEL(xscale_cache_clean_size)
251
252.Lxscale_minidata_clean_addr:
253	.word	_C_LABEL(xscale_minidata_clean_addr)
254.Lxscale_minidata_clean_size:
255	.word	_C_LABEL(xscale_minidata_clean_size)
256
257#ifdef CACHE_CLEAN_BLOCK_INTR
258#define	XSCALE_CACHE_CLEAN_BLOCK					\
259	mrs	r3, cpsr					;	\
260	orr	r0, r3, #(PSR_I | PSR_F)			;	\
261	msr	cpsr_fsxc, r0
262
263#define	XSCALE_CACHE_CLEAN_UNBLOCK					\
264	msr	cpsr_fsxc, r3
265#else
266#define	XSCALE_CACHE_CLEAN_BLOCK
267
268#define	XSCALE_CACHE_CLEAN_UNBLOCK
269#endif /* CACHE_CLEAN_BLOCK_INTR */
270
271#define	XSCALE_CACHE_CLEAN_PROLOGUE					\
272	XSCALE_CACHE_CLEAN_BLOCK				;	\
273	ldr	r2, .Lxscale_cache_clean_addr			;	\
274	ldmia	r2, {r0, r1}					;	\
275	/*								\
276	 * BUG ALERT!							\
277	 *								\
278	 * The XScale core has a strange cache eviction bug, which	\
279	 * requires us to use 2x the cache size for the cache clean	\
280	 * and for that area to be aligned to 2 * cache size.		\
281	 *								\
282	 * The work-around is to use 2 areas for cache clean, and to	\
283	 * alternate between them whenever this is done.  No one knows	\
284	 * why the work-around works (mmm!).				\
285	 */								\
286	eor	r0, r0, #(DCACHE_SIZE)				;	\
287	str	r0, [r2]					;	\
288	add	r0, r0, r1
289
290#define	XSCALE_CACHE_CLEAN_EPILOGUE					\
291	XSCALE_CACHE_CLEAN_UNBLOCK
292
293ENTRY_NP(xscale_cache_syncI)
294
295EENTRY_NP(xscale_cache_purgeID)
296	mcr	p15, 0, r0, c7, c5, 0	/* flush I cache (D cleaned below) */
297EENTRY_NP(xscale_cache_cleanID)
298EENTRY_NP(xscale_cache_purgeD)
299EENTRY(xscale_cache_cleanD)
300	XSCALE_CACHE_CLEAN_PROLOGUE
301
3021:	subs	r0, r0, #32
303	mcr	p15, 0, r0, c7, c2, 5	/* allocate cache line */
304	subs	r1, r1, #32
305	bne	1b
306
307	CPWAIT(r0)
308
309	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
310
311	CPWAIT(r0)
312
313	XSCALE_CACHE_CLEAN_EPILOGUE
314	RET
315EEND(xscale_cache_cleanD)
316EEND(xscale_cache_purgeD)
317EEND(xscale_cache_cleanID)
318EEND(xscale_cache_purgeID)
319END(xscale_cache_syncI)
320
321/*
322 * Clean the mini-data cache.
323 *
324 * It's expected that we only use the mini-data cache for
325 * kernel addresses, so there is no need to purge it on
326 * context switch, and no need to prevent userspace access
327 * while we clean it.
328 */
329ENTRY(xscale_cache_clean_minidata)
330	ldr	r2, .Lxscale_minidata_clean_addr
331	ldmia	r2, {r0, r1}
3321:	ldr	r3, [r0], #32
333	subs	r1, r1, #32
334	bne	1b
335
336	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
337
338	CPWAIT_AND_RETURN(r1)
339END(xscale_cache_clean_minidata)
340
341ENTRY(xscale_cache_purgeID_E)
342	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
343	CPWAIT(r1)
344	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
345	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
346	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
347	CPWAIT_AND_RETURN(r1)
348END(xscale_cache_purgeID_E)
349
350ENTRY(xscale_cache_purgeD_E)
351	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
352	CPWAIT(r1)
353	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
354	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
355	CPWAIT_AND_RETURN(r1)
356END(xscale_cache_purgeD_E)
357
358/*
359 * Soft functions
360 */
361/* xscale_cache_syncI is identical to xscale_cache_purgeID */
362
363EENTRY(xscale_cache_cleanID_rng)
364ENTRY(xscale_cache_cleanD_rng)
365	cmp	r1, #0x4000
366	bcs	_C_LABEL(xscale_cache_cleanID)
367
368	and	r2, r0, #0x1f
369	add	r1, r1, r2
370	bic	r0, r0, #0x1f
371
3721:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
373	add	r0, r0, #32
374	subs	r1, r1, #32
375	bhi	1b
376
377	CPWAIT(r0)
378
379	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
380
381	CPWAIT_AND_RETURN(r0)
382/*END(xscale_cache_cleanID_rng)*/
383END(xscale_cache_cleanD_rng)
384
385ENTRY(xscale_cache_purgeID_rng)
386	cmp	r1, #0x4000
387	bcs	_C_LABEL(xscale_cache_purgeID)
388
389	and	r2, r0, #0x1f
390	add	r1, r1, r2
391	bic	r0, r0, #0x1f
392
3931:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
394	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
395	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
396	add	r0, r0, #32
397	subs	r1, r1, #32
398	bhi	1b
399
400	CPWAIT(r0)
401
402	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
403
404	CPWAIT_AND_RETURN(r0)
405END(xscale_cache_purgeID_rng)
406
407ENTRY(xscale_cache_purgeD_rng)
408	cmp	r1, #0x4000
409	bcs	_C_LABEL(xscale_cache_purgeD)
410
411	and	r2, r0, #0x1f
412	add	r1, r1, r2
413	bic	r0, r0, #0x1f
414
4151:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
416	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
417	add	r0, r0, #32
418	subs	r1, r1, #32
419	bhi	1b
420
421	CPWAIT(r0)
422
423	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
424
425	CPWAIT_AND_RETURN(r0)
426END(xscale_cache_purgeD_rng)
427
428ENTRY(xscale_cache_syncI_rng)
429	cmp	r1, #0x4000
430	bcs	_C_LABEL(xscale_cache_syncI)
431
432	and	r2, r0, #0x1f
433	add	r1, r1, r2
434	bic	r0, r0, #0x1f
435
4361:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
437	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
438	add	r0, r0, #32
439	subs	r1, r1, #32
440	bhi	1b
441
442	CPWAIT(r0)
443
444	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
445
446	CPWAIT_AND_RETURN(r0)
447END(xscale_cache_syncI_rng)
448
449ENTRY(xscale_cache_flushD_rng)
450	and	r2, r0, #0x1f
451	add	r1, r1, r2
452	bic	r0, r0, #0x1f
453
4541:	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
455	add	r0, r0, #32
456	subs	r1, r1, #32
457	bhi	1b
458
459	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
460
461	CPWAIT_AND_RETURN(r0)
462END(xscale_cache_flushD_rng)
463
464/*
465 * Context switch.
466 *
467 * These is the CPU-specific parts of the context switcher cpu_switch()
468 * These functions actually perform the TTB reload.
469 *
470 * NOTE: Special calling convention
471 *	r1, r4-r13 must be preserved
472 */
473ENTRY(xscale_context_switch)
474	/*
475	 * CF_CACHE_PURGE_ID will *ALWAYS* be called prior to this.
476	 * Thus the data cache will contain only kernel data and the
477	 * instruction cache will contain only kernel code, and all
478	 * kernel mappings are shared by all processes.
479	 */
480
481	/* Write the TTB */
482	mcr	p15, 0, r0, c2, c0, 0
483
484	/* If we have updated the TTB we must flush the TLB */
485	mcr	p15, 0, r0, c8, c7, 0	/* flush the I+D tlb */
486
487	CPWAIT_AND_RETURN(r0)
488END(xscale_context_switch)
489
490/*
491 * xscale_cpu_sleep
492 *
493 * This is called when there is nothing on any of the run queues.
494 * We go into IDLE mode so that any IRQ or FIQ will awaken us.
495 *
496 * If this is called with anything other than ARM_SLEEP_MODE_IDLE,
497 * ignore it.
498 */
499ENTRY(xscale_cpu_sleep)
500	tst	r0, #0x00000000
501	bne	1f
502	mov	r0, #0x1
503	mcr	p14, 0, r0, c7, c0, 0
504
5051:
506	RET
507END(xscale_cpu_sleep)
508
509