1129202Scognet/*	$NetBSD: memcpy_arm.S,v 1.1 2003/10/14 07:51:45 scw Exp $	*/
2129202Scognet
3129202Scognet/*-
4129202Scognet * Copyright (c) 1997 The NetBSD Foundation, Inc.
5129202Scognet * All rights reserved.
6129202Scognet *
7129202Scognet * This code is derived from software contributed to The NetBSD Foundation
8129202Scognet * by Neil A. Carson and Mark Brinicombe
9129202Scognet *
10129202Scognet * Redistribution and use in source and binary forms, with or without
11129202Scognet * modification, are permitted provided that the following conditions
12129202Scognet * are met:
13129202Scognet * 1. Redistributions of source code must retain the above copyright
14129202Scognet *    notice, this list of conditions and the following disclaimer.
15129202Scognet * 2. Redistributions in binary form must reproduce the above copyright
16129202Scognet *    notice, this list of conditions and the following disclaimer in the
17129202Scognet *    documentation and/or other materials provided with the distribution.
18129202Scognet *
19129202Scognet * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20129202Scognet * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21129202Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22129202Scognet * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23129202Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24129202Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25129202Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26129202Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27129202Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28129202Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29129202Scognet * POSSIBILITY OF SUCH DAMAGE.
30129202Scognet */
31129202Scognet
32129202Scognet#include <machine/asm.h>
33129202Scognet__FBSDID("$FreeBSD$");
34129202Scognet/*
35129202Scognet * This is one fun bit of code ...
36129202Scognet * Some easy listening music is suggested while trying to understand this
37129202Scognet * code e.g. Iron Maiden
38129202Scognet *
39129202Scognet * For anyone attempting to understand it :
40129202Scognet *
41129202Scognet * The core code is implemented here with simple stubs for memcpy().
42129202Scognet *
43129202Scognet * All local labels are prefixed with Lmemcpy_
44129202Scognet * Following the prefix a label starting f is used in the forward copy code
45129202Scognet * while a label using b is used in the backwards copy code
46129202Scognet * The source and destination addresses determine whether a forward or
47129202Scognet * backward copy is performed.
48129202Scognet * Separate bits of code are used to deal with the following situations
49129202Scognet * for both the forward and backwards copy.
50129202Scognet * unaligned source address
51129202Scognet * unaligned destination address
52129202Scognet * Separate copy routines are used to produce an optimised result for each
53129202Scognet * of these cases.
54129202Scognet * The copy code will use LDM/STM instructions to copy up to 32 bytes at
55129202Scognet * a time where possible.
56129202Scognet *
57129202Scognet * Note: r12 (aka ip) can be trashed during the function along with
58129202Scognet * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
59129202Scognet * Additional registers are preserved prior to use i.e. r4, r5 & lr
60129202Scognet *
61129202Scognet * Apologies for the state of the comments ;-)
62129202Scognet */
63129202Scognet/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
64129202ScognetENTRY(memcpy)
65129202Scognet	/* save leaf functions having to store this away */
66129202Scognet	stmdb	sp!, {r0, lr}		/* memcpy() returns dest addr */
67129202Scognet
68129202Scognet	subs	r2, r2, #4
69129202Scognet	blt	.Lmemcpy_l4		/* less than 4 bytes */
70129202Scognet	ands	r12, r0, #3
71129202Scognet	bne	.Lmemcpy_destul		/* oh unaligned destination addr */
72129202Scognet	ands	r12, r1, #3
73129202Scognet	bne	.Lmemcpy_srcul		/* oh unaligned source addr */
74129202Scognet
75129202Scognet.Lmemcpy_t8:
76129202Scognet	/* We have aligned source and destination */
77129202Scognet	subs	r2, r2, #8
78129202Scognet	blt	.Lmemcpy_l12		/* less than 12 bytes (4 from above) */
79129202Scognet	subs	r2, r2, #0x14
80129202Scognet	blt	.Lmemcpy_l32		/* less than 32 bytes (12 from above) */
81129202Scognet	stmdb	sp!, {r4}		/* borrow r4 */
82129202Scognet
83129202Scognet	/* blat 32 bytes at a time */
84129202Scognet	/* XXX for really big copies perhaps we should use more registers */
85129202Scognet.Lmemcpy_loop32:
86129202Scognet	ldmia	r1!, {r3, r4, r12, lr}
87129202Scognet	stmia	r0!, {r3, r4, r12, lr}
88129202Scognet	ldmia	r1!, {r3, r4, r12, lr}
89129202Scognet	stmia	r0!, {r3, r4, r12, lr}
90129202Scognet	subs	r2, r2, #0x20
91129202Scognet	bge	.Lmemcpy_loop32
92129202Scognet
93129202Scognet	cmn	r2, #0x10
94129202Scognet	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
95129202Scognet	stmgeia	r0!, {r3, r4, r12, lr}
96129202Scognet	subge	r2, r2, #0x10
97129202Scognet	ldmia	sp!, {r4}		/* return r4 */
98129202Scognet
99129202Scognet.Lmemcpy_l32:
100129202Scognet	adds	r2, r2, #0x14
101129202Scognet
102129202Scognet	/* blat 12 bytes at a time */
103129202Scognet.Lmemcpy_loop12:
104129202Scognet	ldmgeia	r1!, {r3, r12, lr}
105129202Scognet	stmgeia	r0!, {r3, r12, lr}
106129202Scognet	subges	r2, r2, #0x0c
107129202Scognet	bge	.Lmemcpy_loop12
108129202Scognet
109129202Scognet.Lmemcpy_l12:
110129202Scognet	adds	r2, r2, #8
111129202Scognet	blt	.Lmemcpy_l4
112129202Scognet
113129202Scognet	subs	r2, r2, #4
114129202Scognet	ldrlt	r3, [r1], #4
115129202Scognet	strlt	r3, [r0], #4
116129202Scognet	ldmgeia	r1!, {r3, r12}
117129202Scognet	stmgeia	r0!, {r3, r12}
118129202Scognet	subge	r2, r2, #4
119129202Scognet
120129202Scognet.Lmemcpy_l4:
121129202Scognet	/* less than 4 bytes to go */
122129202Scognet	adds	r2, r2, #4
123129202Scognet#ifdef __APCS_26_
124129202Scognet	ldmeqia sp!, {r0, pc}^		/* done */
125129202Scognet#else
126129202Scognet	ldmeqia	sp!, {r0, pc}		/* done */
127129202Scognet#endif
128129202Scognet	/* copy the crud byte at a time */
129129202Scognet	cmp	r2, #2
130129202Scognet	ldrb	r3, [r1], #1
131129202Scognet	strb	r3, [r0], #1
132129202Scognet	ldrgeb	r3, [r1], #1
133129202Scognet	strgeb	r3, [r0], #1
134129202Scognet	ldrgtb	r3, [r1], #1
135129202Scognet	strgtb	r3, [r0], #1
136129202Scognet	ldmia	sp!, {r0, pc}
137129202Scognet
138129202Scognet	/* erg - unaligned destination */
139129202Scognet.Lmemcpy_destul:
140129202Scognet	rsb	r12, r12, #4
141129202Scognet	cmp	r12, #2
142129202Scognet
143129202Scognet	/* align destination with byte copies */
144129202Scognet	ldrb	r3, [r1], #1
145129202Scognet	strb	r3, [r0], #1
146129202Scognet	ldrgeb	r3, [r1], #1
147129202Scognet	strgeb	r3, [r0], #1
148129202Scognet	ldrgtb	r3, [r1], #1
149129202Scognet	strgtb	r3, [r0], #1
150129202Scognet	subs	r2, r2, r12
151129202Scognet	blt	.Lmemcpy_l4		/* less the 4 bytes */
152129202Scognet
153129202Scognet	ands	r12, r1, #3
154129202Scognet	beq	.Lmemcpy_t8		/* we have an aligned source */
155129202Scognet
156129202Scognet	/* erg - unaligned source */
157129202Scognet	/* This is where it gets nasty ... */
158129202Scognet.Lmemcpy_srcul:
159129202Scognet	bic	r1, r1, #3
160129202Scognet	ldr	lr, [r1], #4
161129202Scognet	cmp	r12, #2
162129202Scognet	bgt	.Lmemcpy_srcul3
163129202Scognet	beq	.Lmemcpy_srcul2
164129202Scognet	cmp	r2, #0x0c
165129202Scognet	blt	.Lmemcpy_srcul1loop4
166129202Scognet	sub	r2, r2, #0x0c
167129202Scognet	stmdb	sp!, {r4, r5}
168129202Scognet
169129202Scognet.Lmemcpy_srcul1loop16:
170129202Scognet#ifdef __ARMEB__
171129202Scognet	mov	r3, lr, lsl #8
172129202Scognet#else
173129202Scognet	mov	r3, lr, lsr #8
174129202Scognet#endif
175129202Scognet	ldmia	r1!, {r4, r5, r12, lr}
176129202Scognet#ifdef __ARMEB__
177129202Scognet	orr	r3, r3, r4, lsr #24
178129202Scognet	mov	r4, r4, lsl #8
179129202Scognet	orr	r4, r4, r5, lsr #24
180129202Scognet	mov	r5, r5, lsl #8
181129202Scognet	orr	r5, r5, r12, lsr #24
182129202Scognet	mov	r12, r12, lsl #8
183129202Scognet	orr	r12, r12, lr, lsr #24
184129202Scognet#else
185129202Scognet	orr	r3, r3, r4, lsl #24
186129202Scognet	mov	r4, r4, lsr #8
187129202Scognet	orr	r4, r4, r5, lsl #24
188129202Scognet	mov	r5, r5, lsr #8
189129202Scognet	orr	r5, r5, r12, lsl #24
190129202Scognet	mov	r12, r12, lsr #8
191129202Scognet	orr	r12, r12, lr, lsl #24
192129202Scognet#endif
193129202Scognet	stmia	r0!, {r3-r5, r12}
194129202Scognet	subs	r2, r2, #0x10
195129202Scognet	bge	.Lmemcpy_srcul1loop16
196129202Scognet	ldmia	sp!, {r4, r5}
197129202Scognet	adds	r2, r2, #0x0c
198129202Scognet	blt	.Lmemcpy_srcul1l4
199129202Scognet
200129202Scognet.Lmemcpy_srcul1loop4:
201129202Scognet#ifdef __ARMEB__
202129202Scognet	mov	r12, lr, lsl #8
203129202Scognet#else
204129202Scognet	mov	r12, lr, lsr #8
205129202Scognet#endif
206129202Scognet	ldr	lr, [r1], #4
207129202Scognet#ifdef __ARMEB__
208129202Scognet	orr	r12, r12, lr, lsr #24
209129202Scognet#else
210129202Scognet	orr	r12, r12, lr, lsl #24
211129202Scognet#endif
212129202Scognet	str	r12, [r0], #4
213129202Scognet	subs	r2, r2, #4
214129202Scognet	bge	.Lmemcpy_srcul1loop4
215129202Scognet
216129202Scognet.Lmemcpy_srcul1l4:
217129202Scognet	sub	r1, r1, #3
218129202Scognet	b	.Lmemcpy_l4
219129202Scognet
220129202Scognet.Lmemcpy_srcul2:
221129202Scognet	cmp	r2, #0x0c
222129202Scognet	blt	.Lmemcpy_srcul2loop4
223129202Scognet	sub	r2, r2, #0x0c
224129202Scognet	stmdb	sp!, {r4, r5}
225129202Scognet
226129202Scognet.Lmemcpy_srcul2loop16:
227129202Scognet#ifdef __ARMEB__
228129202Scognet	mov	r3, lr, lsl #16
229129202Scognet#else
230129202Scognet	mov	r3, lr, lsr #16
231129202Scognet#endif
232129202Scognet	ldmia	r1!, {r4, r5, r12, lr}
233129202Scognet#ifdef __ARMEB__
234129202Scognet	orr	r3, r3, r4, lsr #16
235129202Scognet	mov	r4, r4, lsl #16
236129202Scognet	orr	r4, r4, r5, lsr #16
237129202Scognet	mov	r5, r5, lsl #16
238129202Scognet	orr	r5, r5, r12, lsr #16
239129202Scognet	mov	r12, r12, lsl #16
240129202Scognet	orr	r12, r12, lr, lsr #16
241129202Scognet#else
242129202Scognet	orr	r3, r3, r4, lsl #16
243129202Scognet	mov	r4, r4, lsr #16
244129202Scognet	orr	r4, r4, r5, lsl #16
245129202Scognet	mov	r5, r5, lsr #16
246129202Scognet	orr	r5, r5, r12, lsl #16
247129202Scognet	mov	r12, r12, lsr #16
248129202Scognet	orr	r12, r12, lr, lsl #16
249129202Scognet#endif
250129202Scognet	stmia	r0!, {r3-r5, r12}
251129202Scognet	subs	r2, r2, #0x10
252129202Scognet	bge	.Lmemcpy_srcul2loop16
253129202Scognet	ldmia	sp!, {r4, r5}
254129202Scognet	adds	r2, r2, #0x0c
255129202Scognet	blt	.Lmemcpy_srcul2l4
256129202Scognet
257129202Scognet.Lmemcpy_srcul2loop4:
258129202Scognet#ifdef __ARMEB__
259129202Scognet	mov	r12, lr, lsl #16
260129202Scognet#else
261129202Scognet	mov	r12, lr, lsr #16
262129202Scognet#endif
263129202Scognet	ldr	lr, [r1], #4
264129202Scognet#ifdef __ARMEB__
265129202Scognet	orr	r12, r12, lr, lsr #16
266129202Scognet#else
267129202Scognet	orr	r12, r12, lr, lsl #16
268129202Scognet#endif
269129202Scognet	str	r12, [r0], #4
270129202Scognet	subs	r2, r2, #4
271129202Scognet	bge	.Lmemcpy_srcul2loop4
272129202Scognet
273129202Scognet.Lmemcpy_srcul2l4:
274129202Scognet	sub	r1, r1, #2
275129202Scognet	b	.Lmemcpy_l4
276129202Scognet
277129202Scognet.Lmemcpy_srcul3:
278129202Scognet	cmp	r2, #0x0c
279129202Scognet	blt	.Lmemcpy_srcul3loop4
280129202Scognet	sub	r2, r2, #0x0c
281129202Scognet	stmdb	sp!, {r4, r5}
282129202Scognet
283129202Scognet.Lmemcpy_srcul3loop16:
284129202Scognet#ifdef __ARMEB__
285129202Scognet	mov	r3, lr, lsl #24
286129202Scognet#else
287129202Scognet	mov	r3, lr, lsr #24
288129202Scognet#endif
289129202Scognet	ldmia	r1!, {r4, r5, r12, lr}
290129202Scognet#ifdef __ARMEB__
291129202Scognet	orr	r3, r3, r4, lsr #8
292129202Scognet	mov	r4, r4, lsl #24
293129202Scognet	orr	r4, r4, r5, lsr #8
294129202Scognet	mov	r5, r5, lsl #24
295129202Scognet	orr	r5, r5, r12, lsr #8
296129202Scognet	mov	r12, r12, lsl #24
297129202Scognet	orr	r12, r12, lr, lsr #8
298129202Scognet#else
299129202Scognet	orr	r3, r3, r4, lsl #8
300129202Scognet	mov	r4, r4, lsr #24
301129202Scognet	orr	r4, r4, r5, lsl #8
302129202Scognet	mov	r5, r5, lsr #24
303129202Scognet	orr	r5, r5, r12, lsl #8
304129202Scognet	mov	r12, r12, lsr #24
305129202Scognet	orr	r12, r12, lr, lsl #8
306129202Scognet#endif
307129202Scognet	stmia	r0!, {r3-r5, r12}
308129202Scognet	subs	r2, r2, #0x10
309129202Scognet	bge	.Lmemcpy_srcul3loop16
310129202Scognet	ldmia	sp!, {r4, r5}
311129202Scognet	adds	r2, r2, #0x0c
312129202Scognet	blt	.Lmemcpy_srcul3l4
313129202Scognet
314129202Scognet.Lmemcpy_srcul3loop4:
315129202Scognet#ifdef __ARMEB__
316129202Scognet	mov	r12, lr, lsl #24
317129202Scognet#else
318129202Scognet	mov	r12, lr, lsr #24
319129202Scognet#endif
320129202Scognet	ldr	lr, [r1], #4
321129202Scognet#ifdef __ARMEB__
322129202Scognet	orr	r12, r12, lr, lsr #8
323129202Scognet#else
324129202Scognet	orr	r12, r12, lr, lsl #8
325129202Scognet#endif
326129202Scognet	str	r12, [r0], #4
327129202Scognet	subs	r2, r2, #4
328129202Scognet	bge	.Lmemcpy_srcul3loop4
329129202Scognet
330129202Scognet.Lmemcpy_srcul3l4:
331129202Scognet	sub	r1, r1, #1
332129202Scognet	b	.Lmemcpy_l4
333271337SianEND(memcpy)
334