1/*	$NetBSD: bcopy_page.S,v 1.7 2003/10/13 21:03:13 scw Exp $	*/
2
3/*-
4 * Copyright (c) 1995 Scott Stevens
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by Scott Stevens.
18 * 4. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * RiscBSD kernel project
33 *
34 * bcopy_page.S
35 *
36 * page optimised bcopy and bzero routines
37 *
38 * Created      : 08/04/95
39 */
40
41#include <machine/asm.h>
42
43__FBSDID("$FreeBSD$");
44
45#include "assym.s"
46
47#ifndef _ARM_ARCH_5E
48
49/* #define BIG_LOOPS */
50
51/*
52 * bcopy_page(src, dest)
53 *
54 * Optimised copy page routine.
55 *
56 * On entry:
57 *   r0 - src address
58 *   r1 - dest address
59 *
60 * Requires:
61 *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
62 *   otherwise.
63 */
64
65#define	CHUNK_SIZE	32
66
67#define	PREFETCH_FIRST_CHUNK	/* nothing */
68#define	PREFETCH_NEXT_CHUNK	/* nothing */
69
70#ifndef COPY_CHUNK
71#define	COPY_CHUNK \
72	PREFETCH_NEXT_CHUNK ; \
73	ldmia	r0!, {r3-r8,ip,lr} ; \
74	stmia	r1!, {r3-r8,ip,lr}
75#endif /* ! COPY_CHUNK */
76
77#ifndef SAVE_REGS
78#define	SAVE_REGS	stmfd	sp!, {r4-r8, lr}; _SAVE({r4-r8, lr})
79#define	RESTORE_REGS	ldmfd	sp!, {r4-r8, pc}
80#endif
81
82ENTRY(bcopy_page)
83	PREFETCH_FIRST_CHUNK
84	SAVE_REGS
85#ifdef BIG_LOOPS
86	mov	r2, #(PAGE_SIZE >> 9)
87#else
88	mov	r2, #(PAGE_SIZE >> 7)
89#endif
90
911:
92	COPY_CHUNK
93	COPY_CHUNK
94	COPY_CHUNK
95	COPY_CHUNK
96
97#ifdef BIG_LOOPS
98	/* There is little point making the loop any larger; unless we are
99	   running with the cache off, the load/store overheads will
100	   completely dominate this loop.  */
101	COPY_CHUNK
102	COPY_CHUNK
103	COPY_CHUNK
104	COPY_CHUNK
105
106	COPY_CHUNK
107	COPY_CHUNK
108	COPY_CHUNK
109	COPY_CHUNK
110
111	COPY_CHUNK
112	COPY_CHUNK
113	COPY_CHUNK
114	COPY_CHUNK
115#endif
116	subs	r2, r2, #1
117	bne	1b
118
119	RESTORE_REGS		/* ...and return. */
120END(bcopy_page)
121
122/*
123 * bzero_page(dest)
124 *
125 * Optimised zero page routine.
126 *
127 * On entry:
128 *   r0 - dest address
129 *
130 * Requires:
131 *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
132 *   otherwise
133 */
134
135ENTRY(bzero_page)
136	stmfd	sp!, {r4-r8, lr}
137	_SAVE({r4-r8, lr})
138#ifdef BIG_LOOPS
139	mov	r2, #(PAGE_SIZE >> 9)
140#else
141	mov	r2, #(PAGE_SIZE >> 7)
142#endif
143	mov	r3, #0
144	mov	r4, #0
145	mov	r5, #0
146	mov	r6, #0
147	mov	r7, #0
148	mov	r8, #0
149	mov	ip, #0
150	mov	lr, #0
151
1521:
153	stmia	r0!, {r3-r8,ip,lr}
154	stmia	r0!, {r3-r8,ip,lr}
155	stmia	r0!, {r3-r8,ip,lr}
156	stmia	r0!, {r3-r8,ip,lr}
157
158#ifdef BIG_LOOPS
159	/* There is little point making the loop any larger; unless we are
160	   running with the cache off, the load/store overheads will
161	   completely dominate this loop.  */
162	stmia	r0!, {r3-r8,ip,lr}
163	stmia	r0!, {r3-r8,ip,lr}
164	stmia	r0!, {r3-r8,ip,lr}
165	stmia	r0!, {r3-r8,ip,lr}
166
167	stmia	r0!, {r3-r8,ip,lr}
168	stmia	r0!, {r3-r8,ip,lr}
169	stmia	r0!, {r3-r8,ip,lr}
170	stmia	r0!, {r3-r8,ip,lr}
171
172	stmia	r0!, {r3-r8,ip,lr}
173	stmia	r0!, {r3-r8,ip,lr}
174	stmia	r0!, {r3-r8,ip,lr}
175	stmia	r0!, {r3-r8,ip,lr}
176
177#endif
178
179	subs	r2, r2, #1
180	bne	1b
181
182	ldmfd	sp!, {r4-r8, pc}
183END(bzero_page)
184
185#else	/* _ARM_ARCH_5E */
186
187/*
188 * armv5e version of bcopy_page
189 */
190ENTRY(bcopy_page)
191	pld	[r0]
192	stmfd	sp!, {r4, r5}
193	_SAVE({r4, r5})
194	mov	ip, #32
195	ldr	r2, [r0], #0x04		/* 0x00 */
196	ldr	r3, [r0], #0x04		/* 0x04 */
1971:	pld	[r0, #0x18]		/* Prefetch 0x20 */
198	ldr	r4, [r0], #0x04		/* 0x08 */
199	ldr	r5, [r0], #0x04		/* 0x0c */
200	strd	r2, [r1], #0x08
201	ldr	r2, [r0], #0x04		/* 0x10 */
202	ldr	r3, [r0], #0x04		/* 0x14 */
203	strd	r4, [r1], #0x08
204	ldr	r4, [r0], #0x04		/* 0x18 */
205	ldr	r5, [r0], #0x04		/* 0x1c */
206	strd	r2, [r1], #0x08
207	ldr	r2, [r0], #0x04		/* 0x20 */
208	ldr	r3, [r0], #0x04		/* 0x24 */
209	pld	[r0, #0x18]		/* Prefetch 0x40 */
210	strd	r4, [r1], #0x08
211	ldr	r4, [r0], #0x04		/* 0x28 */
212	ldr	r5, [r0], #0x04		/* 0x2c */
213	strd	r2, [r1], #0x08
214	ldr	r2, [r0], #0x04		/* 0x30 */
215	ldr	r3, [r0], #0x04		/* 0x34 */
216	strd	r4, [r1], #0x08
217	ldr	r4, [r0], #0x04		/* 0x38 */
218	ldr	r5, [r0], #0x04		/* 0x3c */
219	strd	r2, [r1], #0x08
220	ldr	r2, [r0], #0x04		/* 0x40 */
221	ldr	r3, [r0], #0x04		/* 0x44 */
222	pld	[r0, #0x18]		/* Prefetch 0x60 */
223	strd	r4, [r1], #0x08
224	ldr	r4, [r0], #0x04		/* 0x48 */
225	ldr	r5, [r0], #0x04		/* 0x4c */
226	strd	r2, [r1], #0x08
227	ldr	r2, [r0], #0x04		/* 0x50 */
228	ldr	r3, [r0], #0x04		/* 0x54 */
229	strd	r4, [r1], #0x08
230	ldr	r4, [r0], #0x04		/* 0x58 */
231	ldr	r5, [r0], #0x04		/* 0x5c */
232	strd	r2, [r1], #0x08
233	ldr	r2, [r0], #0x04		/* 0x60 */
234	ldr	r3, [r0], #0x04		/* 0x64 */
235	pld	[r0, #0x18]		/* Prefetch 0x80 */
236	strd	r4, [r1], #0x08
237	ldr	r4, [r0], #0x04		/* 0x68 */
238	ldr	r5, [r0], #0x04		/* 0x6c */
239	strd	r2, [r1], #0x08
240	ldr	r2, [r0], #0x04		/* 0x70 */
241	ldr	r3, [r0], #0x04		/* 0x74 */
242	strd	r4, [r1], #0x08
243	ldr	r4, [r0], #0x04		/* 0x78 */
244	ldr	r5, [r0], #0x04		/* 0x7c */
245	strd	r2, [r1], #0x08
246	subs	ip, ip, #0x01
247	ldrgt	r2, [r0], #0x04		/* 0x80 */
248	ldrgt	r3, [r0], #0x04		/* 0x84 */
249	strd	r4, [r1], #0x08
250	bgt	1b
251	ldmfd	sp!, {r4, r5}
252	RET
253END(bcopy_page)
254
255/*
256 * armv5e version of bzero_page
257 */
258ENTRY(bzero_page)
259	mov	r1, #PAGE_SIZE
260	mov	r2, #0
261	mov	r3, #0
2621:	strd	r2, [r0], #8		/* 32 */
263	strd	r2, [r0], #8
264	strd	r2, [r0], #8
265	strd	r2, [r0], #8
266	strd	r2, [r0], #8		/* 64 */
267	strd	r2, [r0], #8
268	strd	r2, [r0], #8
269	strd	r2, [r0], #8
270	strd	r2, [r0], #8		/* 96 */
271	strd	r2, [r0], #8
272	strd	r2, [r0], #8
273	strd	r2, [r0], #8
274	strd	r2, [r0], #8		/* 128 */
275	strd	r2, [r0], #8
276	strd	r2, [r0], #8
277	strd	r2, [r0], #8
278	subs	r1, r1, #128
279	bne	1b
280	RET
281END(bzero_page)
282#endif	/* _ARM_ARCH_5E */
283