cpufunc_asm_xscale.S revision 139735
1/*	$NetBSD: cpufunc_asm_xscale.S,v 1.16 2002/08/17 16:36:32 thorpej Exp $	*/
2
3/*-
4 * Copyright (c) 2001, 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Allen Briggs and Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 *
37 */
38
39/*-
40 * Copyright (c) 2001 Matt Thomas.
41 * Copyright (c) 1997,1998 Mark Brinicombe.
42 * Copyright (c) 1997 Causality Limited
43 * All rights reserved.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 *    notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 *    notice, this list of conditions and the following disclaimer in the
52 *    documentation and/or other materials provided with the distribution.
53 * 3. All advertising materials mentioning features or use of this software
54 *    must display the following acknowledgement:
55 *	This product includes software developed by Causality Limited.
56 * 4. The name of Causality Limited may not be used to endorse or promote
57 *    products derived from this software without specific prior written
58 *    permission.
59 *
60 * THIS SOFTWARE IS PROVIDED BY CAUSALITY LIMITED ``AS IS'' AND ANY EXPRESS
61 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
62 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
63 * DISCLAIMED. IN NO EVENT SHALL CAUSALITY LIMITED BE LIABLE FOR ANY DIRECT,
64 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
65 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
66 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
70 * SUCH DAMAGE.
71 *
72 * XScale assembly functions for CPU / MMU / TLB specific operations
73 */
74
75#include <machine/asm.h>
76__FBSDID("$FreeBSD: head/sys/arm/arm/cpufunc_asm_xscale.S 139735 2005-01-05 21:58:49Z imp $");
77
78/*
79 * Size of the XScale core D-cache.
80 */
81#define	DCACHE_SIZE		0x00008000
82
83.Lblock_userspace_access:
84	.word	_C_LABEL(block_userspace_access)
85
86/*
87 * CPWAIT -- Canonical method to wait for CP15 update.
88 * From: Intel 80200 manual, section 2.3.3.
89 *
90 * NOTE: Clobbers the specified temp reg.
91 */
92#define	CPWAIT_BRANCH							 \
93	sub	pc, pc, #4
94
95#define	CPWAIT(tmp)							 \
96	mrc	p15, 0, tmp, c2, c0, 0	/* arbitrary read of CP15 */	;\
97	mov	tmp, tmp		/* wait for it to complete */	;\
98	CPWAIT_BRANCH			/* branch to next insn */
99
100#define	CPWAIT_AND_RETURN_SHIFTER	lsr #32
101
102#define	CPWAIT_AND_RETURN(tmp)						 \
103	mrc	p15, 0, tmp, c2, c0, 0	/* arbitrary read of CP15 */	;\
104	/* Wait for it to complete and branch to the return address */	 \
105	sub	pc, lr, tmp, CPWAIT_AND_RETURN_SHIFTER
106
107ENTRY(xscale_cpwait)
108	CPWAIT_AND_RETURN(r0)
109
110/*
111 * We need a separate cpu_control() entry point, since we have to
112 * invalidate the Branch Target Buffer in the event the BPRD bit
113 * changes in the control register.
114 */
115ENTRY(xscale_control)
116	mrc	p15, 0, r3, c1, c0, 0	/* Read the control register */
117	bic	r2, r3, r0		/* Clear bits */
118	eor	r2, r2, r1		/* XOR bits */
119
120	teq	r2, r3			/* Only write if there was a change */
121	mcrne	p15, 0, r0, c7, c5, 6	/* Invalidate the BTB */
122	mcrne	p15, 0, r2, c1, c0, 0	/* Write new control register */
123	mov	r0, r3			/* Return old value */
124
125	CPWAIT_AND_RETURN(r1)
126
127/*
128 * Functions to set the MMU Translation Table Base register
129 *
130 * We need to clean and flush the cache as it uses virtual
131 * addresses that are about to change.
132 */
133ENTRY(xscale_setttb)
134#ifdef CACHE_CLEAN_BLOCK_INTR
135	mrs	r3, cpsr_all
136	orr	r1, r3, #(I32_bit | F32_bit)
137	msr	cpsr_all, r1
138#else
139	ldr	r3, .Lblock_userspace_access
140	ldr	r2, [r3]
141	orr	r1, r2, #1
142	str	r1, [r3]
143#endif
144	stmfd	sp!, {r0-r3, lr}
145	bl	_C_LABEL(xscale_cache_cleanID)
146	mcr	p15, 0, r0, c7, c5, 0	/* invalidate I$ and BTB */
147	mcr	p15, 0, r0, c7, c10, 4	/* drain write and fill buffer */
148
149	CPWAIT(r0)
150
151	ldmfd	sp!, {r0-r3, lr}
152
153	/* Write the TTB */
154	mcr	p15, 0, r0, c2, c0, 0
155
156	/* If we have updated the TTB we must flush the TLB */
157	mcr	p15, 0, r0, c8, c7, 0	/* invalidate I+D TLB */
158
159	/* The cleanID above means we only need to flush the I cache here */
160	mcr	p15, 0, r0, c7, c5, 0	/* invalidate I$ and BTB */
161
162	CPWAIT(r0)
163
164#ifdef CACHE_CLEAN_BLOCK_INTR
165	msr	cpsr_all, r3
166#else
167	str	r2, [r3]
168#endif
169	RET
170
171/*
172 * TLB functions
173 *
174 * Note: We don't need to worry about issuing a CPWAIT after
175 * TLB operations, because we expect a pmap_update() to follow.
176 */
177ENTRY(xscale_tlb_flushID_SE)
178	mcr	p15, 0, r0, c8, c6, 1	/* flush D tlb single entry */
179	mcr	p15, 0, r0, c8, c5, 1	/* flush I tlb single entry */
180	RET
181
182/*
183 * Cache functions
184 */
185ENTRY(xscale_cache_flushID)
186	mcr	p15, 0, r0, c7, c7, 0	/* flush I+D cache */
187	CPWAIT_AND_RETURN(r0)
188
189ENTRY(xscale_cache_flushI)
190	mcr	p15, 0, r0, c7, c5, 0	/* flush I cache */
191	CPWAIT_AND_RETURN(r0)
192
193ENTRY(xscale_cache_flushD)
194	mcr	p15, 0, r0, c7, c6, 0	/* flush D cache */
195	CPWAIT_AND_RETURN(r0)
196
197ENTRY(xscale_cache_flushI_SE)
198	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
199	CPWAIT_AND_RETURN(r0)
200
201ENTRY(xscale_cache_flushD_SE)
202	/*
203	 * Errata (rev < 2): Must clean-dcache-line to an address
204	 * before invalidate-dcache-line to an address, or dirty
205	 * bits will not be cleared in the dcache array.
206	 */
207	mcr	p15, 0, r0, c7, c10, 1
208	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
209	CPWAIT_AND_RETURN(r0)
210
211ENTRY(xscale_cache_cleanD_E)
212	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
213	CPWAIT_AND_RETURN(r0)
214
215/*
216 * Information for the XScale cache clean/purge functions:
217 *
218 *	* Virtual address of the memory region to use
219 *	* Size of memory region
220 *
221 * Note the virtual address for the Data cache clean operation
222 * does not need to be backed by physical memory, since no loads
223 * will actually be performed by the allocate-line operation.
224 *
225 * Note that the Mini-Data cache MUST be cleaned by executing
226 * loads from memory mapped into a region reserved exclusively
227 * for cleaning of the Mini-Data cache.
228 */
229	.data
230
231	.global	_C_LABEL(xscale_cache_clean_addr)
232_C_LABEL(xscale_cache_clean_addr):
233	.word	0x00000000
234
235	.global	_C_LABEL(xscale_cache_clean_size)
236_C_LABEL(xscale_cache_clean_size):
237	.word	DCACHE_SIZE
238
239	.global	_C_LABEL(xscale_minidata_clean_addr)
240_C_LABEL(xscale_minidata_clean_addr):
241	.word	0x00000000
242
243	.global	_C_LABEL(xscale_minidata_clean_size)
244_C_LABEL(xscale_minidata_clean_size):
245	.word	0x00000800
246
247	.text
248
249.Lxscale_cache_clean_addr:
250	.word	_C_LABEL(xscale_cache_clean_addr)
251.Lxscale_cache_clean_size:
252	.word	_C_LABEL(xscale_cache_clean_size)
253
254.Lxscale_minidata_clean_addr:
255	.word	_C_LABEL(xscale_minidata_clean_addr)
256.Lxscale_minidata_clean_size:
257	.word	_C_LABEL(xscale_minidata_clean_size)
258
259#ifdef CACHE_CLEAN_BLOCK_INTR
260#define	XSCALE_CACHE_CLEAN_BLOCK					\
261	mrs	r3, cpsr_all					;	\
262	orr	r0, r3, #(I32_bit | F32_bit)			;	\
263	msr	cpsr_all, r0
264
265#define	XSCALE_CACHE_CLEAN_UNBLOCK					\
266	msr	cpsr_all, r3
267#else
268#define	XSCALE_CACHE_CLEAN_BLOCK					\
269	ldr	r3, .Lblock_userspace_access			;	\
270	ldr	ip, [r3]					;	\
271	orr	r0, ip, #1					;	\
272	str	r0, [r3]
273
274#define	XSCALE_CACHE_CLEAN_UNBLOCK					\
275	str	ip, [r3]
276#endif /* CACHE_CLEAN_BLOCK_INTR */
277
278#define	XSCALE_CACHE_CLEAN_PROLOGUE					\
279	XSCALE_CACHE_CLEAN_BLOCK				;	\
280	ldr	r2, .Lxscale_cache_clean_addr			;	\
281	ldmia	r2, {r0, r1}					;	\
282	/*								\
283	 * BUG ALERT!							\
284	 *								\
285	 * The XScale core has a strange cache eviction bug, which	\
286	 * requires us to use 2x the cache size for the cache clean	\
287	 * and for that area to be aligned to 2 * cache size.		\
288	 *								\
289	 * The work-around is to use 2 areas for cache clean, and to	\
290	 * alternate between them whenever this is done.  No one knows	\
291	 * why the work-around works (mmm!).				\
292	 */								\
293	eor	r0, r0, #(DCACHE_SIZE)				;	\
294	str	r0, [r2]					;	\
295	add	r0, r0, r1
296
297#define	XSCALE_CACHE_CLEAN_EPILOGUE					\
298	XSCALE_CACHE_CLEAN_UNBLOCK
299
300ENTRY_NP(xscale_cache_syncI)
301ENTRY_NP(xscale_cache_purgeID)
302	mcr	p15, 0, r0, c7, c5, 0	/* flush I cache (D cleaned below) */
303ENTRY_NP(xscale_cache_cleanID)
304ENTRY_NP(xscale_cache_purgeD)
305ENTRY(xscale_cache_cleanD)
306	XSCALE_CACHE_CLEAN_PROLOGUE
307
3081:	subs	r0, r0, #32
309	mcr	p15, 0, r0, c7, c2, 5	/* allocate cache line */
310	subs	r1, r1, #32
311	bne	1b
312
313	CPWAIT(r0)
314
315	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
316
317	CPWAIT(r0)
318
319	XSCALE_CACHE_CLEAN_EPILOGUE
320	RET
321
322/*
323 * Clean the mini-data cache.
324 *
325 * It's expected that we only use the mini-data cache for
326 * kernel addresses, so there is no need to purge it on
327 * context switch, and no need to prevent userspace access
328 * while we clean it.
329 */
330ENTRY(xscale_cache_clean_minidata)
331	ldr	r2, .Lxscale_minidata_clean_addr
332	ldmia	r2, {r0, r1}
3331:	ldr	r3, [r0], #32
334	subs	r1, r1, #32
335	bne	1b
336
337	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
338
339	CPWAIT_AND_RETURN(r1)
340
341ENTRY(xscale_cache_purgeID_E)
342	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
343	CPWAIT(r1)
344	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
345	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
346	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
347	CPWAIT_AND_RETURN(r1)
348
349ENTRY(xscale_cache_purgeD_E)
350	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
351	CPWAIT(r1)
352	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
353	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
354	CPWAIT_AND_RETURN(r1)
355
356/*
357 * Soft functions
358 */
359/* xscale_cache_syncI is identical to xscale_cache_purgeID */
360
361ENTRY(xscale_cache_cleanID_rng)
362ENTRY(xscale_cache_cleanD_rng)
363	cmp	r1, #0x4000
364	bcs	_C_LABEL(xscale_cache_cleanID)
365
366	and	r2, r0, #0x1f
367	add	r1, r1, r2
368	bic	r0, r0, #0x1f
369
3701:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
371	add	r0, r0, #32
372	subs	r1, r1, #32
373	bhi	1b
374
375	CPWAIT(r0)
376
377	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
378
379	CPWAIT_AND_RETURN(r0)
380
381ENTRY(xscale_cache_purgeID_rng)
382	cmp	r1, #0x4000
383	bcs	_C_LABEL(xscale_cache_purgeID)
384
385	and	r2, r0, #0x1f
386	add	r1, r1, r2
387	bic	r0, r0, #0x1f
388
3891:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
390	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
391	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
392	add	r0, r0, #32
393	subs	r1, r1, #32
394	bhi	1b
395
396	CPWAIT(r0)
397
398	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
399
400	CPWAIT_AND_RETURN(r0)
401
402ENTRY(xscale_cache_purgeD_rng)
403	cmp	r1, #0x4000
404	bcs	_C_LABEL(xscale_cache_purgeD)
405
406	and	r2, r0, #0x1f
407	add	r1, r1, r2
408	bic	r0, r0, #0x1f
409
4101:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
411	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
412	add	r0, r0, #32
413	subs	r1, r1, #32
414	bhi	1b
415
416	CPWAIT(r0)
417
418	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
419
420	CPWAIT_AND_RETURN(r0)
421
422ENTRY(xscale_cache_syncI_rng)
423	cmp	r1, #0x4000
424	bcs	_C_LABEL(xscale_cache_syncI)
425
426	and	r2, r0, #0x1f
427	add	r1, r1, r2
428	bic	r0, r0, #0x1f
429
4301:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
431	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
432	add	r0, r0, #32
433	subs	r1, r1, #32
434	bhi	1b
435
436	CPWAIT(r0)
437
438	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
439
440	CPWAIT_AND_RETURN(r0)
441
442ENTRY(xscale_cache_flushD_rng)
443	and	r2, r0, #0x1f
444	add	r1, r1, r2
445	bic	r0, r0, #0x1f
446
4471:	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
448	add	r0, r0, #32
449	subs	r1, r1, #32
450	bhi	1b
451
452	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
453
454	CPWAIT_AND_RETURN(r0)
455
456/*
457 * Context switch.
458 *
459 * These is the CPU-specific parts of the context switcher cpu_switch()
460 * These functions actually perform the TTB reload.
461 *
462 * NOTE: Special calling convention
463 *	r1, r4-r13 must be preserved
464 */
465ENTRY(xscale_context_switch)
466	/*
467	 * CF_CACHE_PURGE_ID will *ALWAYS* be called prior to this.
468	 * Thus the data cache will contain only kernel data and the
469	 * instruction cache will contain only kernel code, and all
470	 * kernel mappings are shared by all processes.
471	 */
472
473	/* Write the TTB */
474	mcr	p15, 0, r0, c2, c0, 0
475
476	/* If we have updated the TTB we must flush the TLB */
477	mcr	p15, 0, r0, c8, c7, 0	/* flush the I+D tlb */
478
479	CPWAIT_AND_RETURN(r0)
480
481/*
482 * xscale_cpu_sleep
483 *
484 * This is called when there is nothing on any of the run queues.
485 * We go into IDLE mode so that any IRQ or FIQ will awaken us.
486 *
487 * If this is called with anything other than ARM_SLEEP_MODE_IDLE,
488 * ignore it.
489 */
490ENTRY(xscale_cpu_sleep)
491	tst	r0, #0x00000000
492	bne	1f
493	mov	r0, #0x1
494	mcr	p14, 0, r0, c7, c0, 0
495
4961:
497	RET
498