1/*	$NetBSD: cpufunc_asm_xscale.S,v 1.16 2002/08/17 16:36:32 thorpej Exp $	*/
2
3/*-
4 * Copyright (c) 2001, 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Allen Briggs and Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 *
37 */
38
39/*-
40 * Copyright (c) 2001 Matt Thomas.
41 * Copyright (c) 1997,1998 Mark Brinicombe.
42 * Copyright (c) 1997 Causality Limited
43 * All rights reserved.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 *    notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 *    notice, this list of conditions and the following disclaimer in the
52 *    documentation and/or other materials provided with the distribution.
53 * 3. All advertising materials mentioning features or use of this software
54 *    must display the following acknowledgement:
55 *	This product includes software developed by Causality Limited.
56 * 4. The name of Causality Limited may not be used to endorse or promote
57 *    products derived from this software without specific prior written
58 *    permission.
59 *
60 * THIS SOFTWARE IS PROVIDED BY CAUSALITY LIMITED ``AS IS'' AND ANY EXPRESS
61 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
62 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
63 * DISCLAIMED. IN NO EVENT SHALL CAUSALITY LIMITED BE LIABLE FOR ANY DIRECT,
64 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
65 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
66 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
70 * SUCH DAMAGE.
71 *
72 * XScale assembly functions for CPU / MMU / TLB specific operations
73 */
74
75#include <machine/asm.h>
76__FBSDID("$FreeBSD$");
77
78/*
79 * Size of the XScale core D-cache.
80 */
81#define	DCACHE_SIZE		0x00008000
82
83.Lblock_userspace_access:
84	.word	_C_LABEL(block_userspace_access)
85
86/*
87 * CPWAIT -- Canonical method to wait for CP15 update.
88 * From: Intel 80200 manual, section 2.3.3.
89 *
90 * NOTE: Clobbers the specified temp reg.
91 */
92#define	CPWAIT_BRANCH							 \
93	sub	pc, pc, #4
94
95#define	CPWAIT(tmp)							 \
96	mrc	p15, 0, tmp, c2, c0, 0	/* arbitrary read of CP15 */	;\
97	mov	tmp, tmp		/* wait for it to complete */	;\
98	CPWAIT_BRANCH			/* branch to next insn */
99
100#define	CPWAIT_AND_RETURN_SHIFTER	lsr #32
101
102#define	CPWAIT_AND_RETURN(tmp)						 \
103	mrc	p15, 0, tmp, c2, c0, 0	/* arbitrary read of CP15 */	;\
104	/* Wait for it to complete and branch to the return address */	 \
105	sub	pc, lr, tmp, CPWAIT_AND_RETURN_SHIFTER
106
107ENTRY(xscale_cpwait)
108	CPWAIT_AND_RETURN(r0)
109
110/*
111 * We need a separate cpu_control() entry point, since we have to
112 * invalidate the Branch Target Buffer in the event the BPRD bit
113 * changes in the control register.
114 */
115ENTRY(xscale_control)
116	mrc	p15, 0, r3, c1, c0, 0	/* Read the control register */
117	bic	r2, r3, r0		/* Clear bits */
118	eor	r2, r2, r1		/* XOR bits */
119
120	teq	r2, r3			/* Only write if there was a change */
121	mcrne	p15, 0, r0, c7, c5, 6	/* Invalidate the BTB */
122	mcrne	p15, 0, r2, c1, c0, 0	/* Write new control register */
123	mov	r0, r3			/* Return old value */
124
125	CPWAIT_AND_RETURN(r1)
126
127/*
128 * Functions to set the MMU Translation Table Base register
129 *
130 * We need to clean and flush the cache as it uses virtual
131 * addresses that are about to change.
132 */
133ENTRY(xscale_setttb)
134#ifdef CACHE_CLEAN_BLOCK_INTR
135	mrs	r3, cpsr_all
136	orr	r1, r3, #(I32_bit | F32_bit)
137	msr	cpsr_all, r1
138#else
139	ldr	r3, .Lblock_userspace_access
140	ldr	r2, [r3]
141	orr	r1, r2, #1
142	str	r1, [r3]
143#endif
144	stmfd	sp!, {r0-r3, lr}
145	bl	_C_LABEL(xscale_cache_cleanID)
146	mcr	p15, 0, r0, c7, c5, 0	/* invalidate I$ and BTB */
147	mcr	p15, 0, r0, c7, c10, 4	/* drain write and fill buffer */
148
149	CPWAIT(r0)
150
151	ldmfd	sp!, {r0-r3, lr}
152
153	/* Write the TTB */
154	mcr	p15, 0, r0, c2, c0, 0
155
156	/* If we have updated the TTB we must flush the TLB */
157	mcr	p15, 0, r0, c8, c7, 0	/* invalidate I+D TLB */
158
159	/* The cleanID above means we only need to flush the I cache here */
160	mcr	p15, 0, r0, c7, c5, 0	/* invalidate I$ and BTB */
161
162	CPWAIT(r0)
163
164#ifdef CACHE_CLEAN_BLOCK_INTR
165	msr	cpsr_all, r3
166#else
167	str	r2, [r3]
168#endif
169	RET
170
171/*
172 * TLB functions
173 *
174 */
175ENTRY(xscale_tlb_flushID_SE)
176	mcr	p15, 0, r0, c8, c6, 1	/* flush D tlb single entry */
177	mcr	p15, 0, r0, c8, c5, 1	/* flush I tlb single entry */
178	CPWAIT_AND_RETURN(r0)
179
180/*
181 * Cache functions
182 */
183ENTRY(xscale_cache_flushID)
184	mcr	p15, 0, r0, c7, c7, 0	/* flush I+D cache */
185	CPWAIT_AND_RETURN(r0)
186
187ENTRY(xscale_cache_flushI)
188	mcr	p15, 0, r0, c7, c5, 0	/* flush I cache */
189	CPWAIT_AND_RETURN(r0)
190
191ENTRY(xscale_cache_flushD)
192	mcr	p15, 0, r0, c7, c6, 0	/* flush D cache */
193	CPWAIT_AND_RETURN(r0)
194
195ENTRY(xscale_cache_flushI_SE)
196	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
197	CPWAIT_AND_RETURN(r0)
198
199ENTRY(xscale_cache_flushD_SE)
200	/*
201	 * Errata (rev < 2): Must clean-dcache-line to an address
202	 * before invalidate-dcache-line to an address, or dirty
203	 * bits will not be cleared in the dcache array.
204	 */
205	mcr	p15, 0, r0, c7, c10, 1
206	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
207	CPWAIT_AND_RETURN(r0)
208
209ENTRY(xscale_cache_cleanD_E)
210	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
211	CPWAIT_AND_RETURN(r0)
212
213/*
214 * Information for the XScale cache clean/purge functions:
215 *
216 *	* Virtual address of the memory region to use
217 *	* Size of memory region
218 *
219 * Note the virtual address for the Data cache clean operation
220 * does not need to be backed by physical memory, since no loads
221 * will actually be performed by the allocate-line operation.
222 *
223 * Note that the Mini-Data cache MUST be cleaned by executing
224 * loads from memory mapped into a region reserved exclusively
225 * for cleaning of the Mini-Data cache.
226 */
227	.data
228
229	.global	_C_LABEL(xscale_cache_clean_addr)
230_C_LABEL(xscale_cache_clean_addr):
231	.word	0x00000000
232
233	.global	_C_LABEL(xscale_cache_clean_size)
234_C_LABEL(xscale_cache_clean_size):
235	.word	DCACHE_SIZE
236
237	.global	_C_LABEL(xscale_minidata_clean_addr)
238_C_LABEL(xscale_minidata_clean_addr):
239	.word	0x00000000
240
241	.global	_C_LABEL(xscale_minidata_clean_size)
242_C_LABEL(xscale_minidata_clean_size):
243	.word	0x00000800
244
245	.text
246
247.Lxscale_cache_clean_addr:
248	.word	_C_LABEL(xscale_cache_clean_addr)
249.Lxscale_cache_clean_size:
250	.word	_C_LABEL(xscale_cache_clean_size)
251
252.Lxscale_minidata_clean_addr:
253	.word	_C_LABEL(xscale_minidata_clean_addr)
254.Lxscale_minidata_clean_size:
255	.word	_C_LABEL(xscale_minidata_clean_size)
256
257#ifdef CACHE_CLEAN_BLOCK_INTR
258#define	XSCALE_CACHE_CLEAN_BLOCK					\
259	mrs	r3, cpsr_all					;	\
260	orr	r0, r3, #(I32_bit | F32_bit)			;	\
261	msr	cpsr_all, r0
262
263#define	XSCALE_CACHE_CLEAN_UNBLOCK					\
264	msr	cpsr_all, r3
265#else
266#define	XSCALE_CACHE_CLEAN_BLOCK					\
267	ldr	r3, .Lblock_userspace_access			;	\
268	ldr	ip, [r3]					;	\
269	orr	r0, ip, #1					;	\
270	str	r0, [r3]
271
272#define	XSCALE_CACHE_CLEAN_UNBLOCK					\
273	str	ip, [r3]
274#endif /* CACHE_CLEAN_BLOCK_INTR */
275
276#define	XSCALE_CACHE_CLEAN_PROLOGUE					\
277	XSCALE_CACHE_CLEAN_BLOCK				;	\
278	ldr	r2, .Lxscale_cache_clean_addr			;	\
279	ldmia	r2, {r0, r1}					;	\
280	/*								\
281	 * BUG ALERT!							\
282	 *								\
283	 * The XScale core has a strange cache eviction bug, which	\
284	 * requires us to use 2x the cache size for the cache clean	\
285	 * and for that area to be aligned to 2 * cache size.		\
286	 *								\
287	 * The work-around is to use 2 areas for cache clean, and to	\
288	 * alternate between them whenever this is done.  No one knows	\
289	 * why the work-around works (mmm!).				\
290	 */								\
291	eor	r0, r0, #(DCACHE_SIZE)				;	\
292	str	r0, [r2]					;	\
293	add	r0, r0, r1
294
295#define	XSCALE_CACHE_CLEAN_EPILOGUE					\
296	XSCALE_CACHE_CLEAN_UNBLOCK
297
298ENTRY_NP(xscale_cache_syncI)
299ENTRY_NP(xscale_cache_purgeID)
300	mcr	p15, 0, r0, c7, c5, 0	/* flush I cache (D cleaned below) */
301ENTRY_NP(xscale_cache_cleanID)
302ENTRY_NP(xscale_cache_purgeD)
303ENTRY(xscale_cache_cleanD)
304	XSCALE_CACHE_CLEAN_PROLOGUE
305
3061:	subs	r0, r0, #32
307	mcr	p15, 0, r0, c7, c2, 5	/* allocate cache line */
308	subs	r1, r1, #32
309	bne	1b
310
311	CPWAIT(r0)
312
313	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
314
315	CPWAIT(r0)
316
317	XSCALE_CACHE_CLEAN_EPILOGUE
318	RET
319
320/*
321 * Clean the mini-data cache.
322 *
323 * It's expected that we only use the mini-data cache for
324 * kernel addresses, so there is no need to purge it on
325 * context switch, and no need to prevent userspace access
326 * while we clean it.
327 */
328ENTRY(xscale_cache_clean_minidata)
329	ldr	r2, .Lxscale_minidata_clean_addr
330	ldmia	r2, {r0, r1}
3311:	ldr	r3, [r0], #32
332	subs	r1, r1, #32
333	bne	1b
334
335	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
336
337	CPWAIT_AND_RETURN(r1)
338
339ENTRY(xscale_cache_purgeID_E)
340	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
341	CPWAIT(r1)
342	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
343	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
344	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
345	CPWAIT_AND_RETURN(r1)
346
347ENTRY(xscale_cache_purgeD_E)
348	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
349	CPWAIT(r1)
350	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
351	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
352	CPWAIT_AND_RETURN(r1)
353
354/*
355 * Soft functions
356 */
357/* xscale_cache_syncI is identical to xscale_cache_purgeID */
358
359ENTRY(xscale_cache_cleanID_rng)
360ENTRY(xscale_cache_cleanD_rng)
361	cmp	r1, #0x4000
362	bcs	_C_LABEL(xscale_cache_cleanID)
363
364	and	r2, r0, #0x1f
365	add	r1, r1, r2
366	bic	r0, r0, #0x1f
367
3681:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
369	add	r0, r0, #32
370	subs	r1, r1, #32
371	bhi	1b
372
373	CPWAIT(r0)
374
375	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
376
377	CPWAIT_AND_RETURN(r0)
378
379ENTRY(xscale_cache_purgeID_rng)
380	cmp	r1, #0x4000
381	bcs	_C_LABEL(xscale_cache_purgeID)
382
383	and	r2, r0, #0x1f
384	add	r1, r1, r2
385	bic	r0, r0, #0x1f
386
3871:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
388	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
389	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
390	add	r0, r0, #32
391	subs	r1, r1, #32
392	bhi	1b
393
394	CPWAIT(r0)
395
396	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
397
398	CPWAIT_AND_RETURN(r0)
399
400ENTRY(xscale_cache_purgeD_rng)
401	cmp	r1, #0x4000
402	bcs	_C_LABEL(xscale_cache_purgeD)
403
404	and	r2, r0, #0x1f
405	add	r1, r1, r2
406	bic	r0, r0, #0x1f
407
4081:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
409	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
410	add	r0, r0, #32
411	subs	r1, r1, #32
412	bhi	1b
413
414	CPWAIT(r0)
415
416	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
417
418	CPWAIT_AND_RETURN(r0)
419
420ENTRY(xscale_cache_syncI_rng)
421	cmp	r1, #0x4000
422	bcs	_C_LABEL(xscale_cache_syncI)
423
424	and	r2, r0, #0x1f
425	add	r1, r1, r2
426	bic	r0, r0, #0x1f
427
4281:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
429	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
430	add	r0, r0, #32
431	subs	r1, r1, #32
432	bhi	1b
433
434	CPWAIT(r0)
435
436	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
437
438	CPWAIT_AND_RETURN(r0)
439
440ENTRY(xscale_cache_flushD_rng)
441	and	r2, r0, #0x1f
442	add	r1, r1, r2
443	bic	r0, r0, #0x1f
444
4451:	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
446	add	r0, r0, #32
447	subs	r1, r1, #32
448	bhi	1b
449
450	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
451
452	CPWAIT_AND_RETURN(r0)
453
454/*
455 * Context switch.
456 *
457 * These is the CPU-specific parts of the context switcher cpu_switch()
458 * These functions actually perform the TTB reload.
459 *
460 * NOTE: Special calling convention
461 *	r1, r4-r13 must be preserved
462 */
463ENTRY(xscale_context_switch)
464	/*
465	 * CF_CACHE_PURGE_ID will *ALWAYS* be called prior to this.
466	 * Thus the data cache will contain only kernel data and the
467	 * instruction cache will contain only kernel code, and all
468	 * kernel mappings are shared by all processes.
469	 */
470
471	/* Write the TTB */
472	mcr	p15, 0, r0, c2, c0, 0
473
474	/* If we have updated the TTB we must flush the TLB */
475	mcr	p15, 0, r0, c8, c7, 0	/* flush the I+D tlb */
476
477	CPWAIT_AND_RETURN(r0)
478
479/*
480 * xscale_cpu_sleep
481 *
482 * This is called when there is nothing on any of the run queues.
483 * We go into IDLE mode so that any IRQ or FIQ will awaken us.
484 *
485 * If this is called with anything other than ARM_SLEEP_MODE_IDLE,
486 * ignore it.
487 */
488ENTRY(xscale_cpu_sleep)
489	tst	r0, #0x00000000
490	bne	1f
491	mov	r0, #0x1
492	mcr	p14, 0, r0, c7, c0, 0
493
4941:
495	RET
496