1129202Scognet/*	$NetBSD: memcpy_arm.S,v 1.1 2003/10/14 07:51:45 scw Exp $	*/
2129202Scognet
3129202Scognet/*-
4129202Scognet * Copyright (c) 1997 The NetBSD Foundation, Inc.
5129202Scognet * All rights reserved.
6129202Scognet *
7129202Scognet * This code is derived from software contributed to The NetBSD Foundation
8129202Scognet * by Neil A. Carson and Mark Brinicombe
9129202Scognet *
10129202Scognet * Redistribution and use in source and binary forms, with or without
11129202Scognet * modification, are permitted provided that the following conditions
12129202Scognet * are met:
13129202Scognet * 1. Redistributions of source code must retain the above copyright
14129202Scognet *    notice, this list of conditions and the following disclaimer.
15129202Scognet * 2. Redistributions in binary form must reproduce the above copyright
16129202Scognet *    notice, this list of conditions and the following disclaimer in the
17129202Scognet *    documentation and/or other materials provided with the distribution.
18129202Scognet *
19129202Scognet * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20129202Scognet * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21129202Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22129202Scognet * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23129202Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24129202Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25129202Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26129202Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27129202Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28129202Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29129202Scognet * POSSIBILITY OF SUCH DAMAGE.
30129202Scognet */
31129202Scognet
32129202Scognet#include <machine/asm.h>
33129202Scognet__FBSDID("$FreeBSD$");
34275767Sandrew
35275767Sandrew.syntax	unified
36275767Sandrew
37129202Scognet/*
38129202Scognet * This is one fun bit of code ...
39129202Scognet * Some easy listening music is suggested while trying to understand this
40129202Scognet * code e.g. Iron Maiden
41129202Scognet *
42129202Scognet * For anyone attempting to understand it :
43129202Scognet *
44129202Scognet * The core code is implemented here with simple stubs for memcpy().
45129202Scognet *
46129202Scognet * All local labels are prefixed with Lmemcpy_
47129202Scognet * Following the prefix a label starting f is used in the forward copy code
48129202Scognet * while a label using b is used in the backwards copy code
49129202Scognet * The source and destination addresses determine whether a forward or
50129202Scognet * backward copy is performed.
51129202Scognet * Separate bits of code are used to deal with the following situations
52129202Scognet * for both the forward and backwards copy.
53129202Scognet * unaligned source address
54129202Scognet * unaligned destination address
55129202Scognet * Separate copy routines are used to produce an optimised result for each
56129202Scognet * of these cases.
57129202Scognet * The copy code will use LDM/STM instructions to copy up to 32 bytes at
58129202Scognet * a time where possible.
59129202Scognet *
60129202Scognet * Note: r12 (aka ip) can be trashed during the function along with
61129202Scognet * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
62129202Scognet * Additional registers are preserved prior to use i.e. r4, r5 & lr
63129202Scognet *
64129202Scognet * Apologies for the state of the comments ;-)
65129202Scognet */
66129202Scognet/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
67129202ScognetENTRY(memcpy)
68129202Scognet	/* save leaf functions having to store this away */
69129202Scognet	stmdb	sp!, {r0, lr}		/* memcpy() returns dest addr */
70129202Scognet
71129202Scognet	subs	r2, r2, #4
72129202Scognet	blt	.Lmemcpy_l4		/* less than 4 bytes */
73129202Scognet	ands	r12, r0, #3
74129202Scognet	bne	.Lmemcpy_destul		/* oh unaligned destination addr */
75129202Scognet	ands	r12, r1, #3
76129202Scognet	bne	.Lmemcpy_srcul		/* oh unaligned source addr */
77129202Scognet
78129202Scognet.Lmemcpy_t8:
79129202Scognet	/* We have aligned source and destination */
80129202Scognet	subs	r2, r2, #8
81129202Scognet	blt	.Lmemcpy_l12		/* less than 12 bytes (4 from above) */
82129202Scognet	subs	r2, r2, #0x14
83129202Scognet	blt	.Lmemcpy_l32		/* less than 32 bytes (12 from above) */
84129202Scognet	stmdb	sp!, {r4}		/* borrow r4 */
85129202Scognet
86129202Scognet	/* blat 32 bytes at a time */
87129202Scognet	/* XXX for really big copies perhaps we should use more registers */
88129202Scognet.Lmemcpy_loop32:
89129202Scognet	ldmia	r1!, {r3, r4, r12, lr}
90129202Scognet	stmia	r0!, {r3, r4, r12, lr}
91129202Scognet	ldmia	r1!, {r3, r4, r12, lr}
92129202Scognet	stmia	r0!, {r3, r4, r12, lr}
93129202Scognet	subs	r2, r2, #0x20
94129202Scognet	bge	.Lmemcpy_loop32
95129202Scognet
96129202Scognet	cmn	r2, #0x10
97275767Sandrew	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
98275767Sandrew	stmiage	r0!, {r3, r4, r12, lr}
99129202Scognet	subge	r2, r2, #0x10
100129202Scognet	ldmia	sp!, {r4}		/* return r4 */
101129202Scognet
102129202Scognet.Lmemcpy_l32:
103129202Scognet	adds	r2, r2, #0x14
104129202Scognet
105129202Scognet	/* blat 12 bytes at a time */
106129202Scognet.Lmemcpy_loop12:
107275767Sandrew	ldmiage	r1!, {r3, r12, lr}
108275767Sandrew	stmiage	r0!, {r3, r12, lr}
109275767Sandrew	subsge	r2, r2, #0x0c
110129202Scognet	bge	.Lmemcpy_loop12
111129202Scognet
112129202Scognet.Lmemcpy_l12:
113129202Scognet	adds	r2, r2, #8
114129202Scognet	blt	.Lmemcpy_l4
115129202Scognet
116129202Scognet	subs	r2, r2, #4
117129202Scognet	ldrlt	r3, [r1], #4
118129202Scognet	strlt	r3, [r0], #4
119275767Sandrew	ldmiage	r1!, {r3, r12}
120275767Sandrew	stmiage	r0!, {r3, r12}
121129202Scognet	subge	r2, r2, #4
122129202Scognet
123129202Scognet.Lmemcpy_l4:
124129202Scognet	/* less than 4 bytes to go */
125129202Scognet	adds	r2, r2, #4
126129202Scognet#ifdef __APCS_26_
127275767Sandrew	ldmiaeq sp!, {r0, pc}^		/* done */
128129202Scognet#else
129275767Sandrew	ldmiaeq	sp!, {r0, pc}		/* done */
130129202Scognet#endif
131129202Scognet	/* copy the crud byte at a time */
132129202Scognet	cmp	r2, #2
133129202Scognet	ldrb	r3, [r1], #1
134129202Scognet	strb	r3, [r0], #1
135275767Sandrew	ldrbge	r3, [r1], #1
136275767Sandrew	strbge	r3, [r0], #1
137275767Sandrew	ldrbgt	r3, [r1], #1
138275767Sandrew	strbgt	r3, [r0], #1
139129202Scognet	ldmia	sp!, {r0, pc}
140129202Scognet
141129202Scognet	/* erg - unaligned destination */
142129202Scognet.Lmemcpy_destul:
143129202Scognet	rsb	r12, r12, #4
144129202Scognet	cmp	r12, #2
145129202Scognet
146129202Scognet	/* align destination with byte copies */
147129202Scognet	ldrb	r3, [r1], #1
148129202Scognet	strb	r3, [r0], #1
149275767Sandrew	ldrbge	r3, [r1], #1
150275767Sandrew	strbge	r3, [r0], #1
151275767Sandrew	ldrbgt	r3, [r1], #1
152275767Sandrew	strbgt	r3, [r0], #1
153129202Scognet	subs	r2, r2, r12
154129202Scognet	blt	.Lmemcpy_l4		/* less the 4 bytes */
155129202Scognet
156129202Scognet	ands	r12, r1, #3
157129202Scognet	beq	.Lmemcpy_t8		/* we have an aligned source */
158129202Scognet
159129202Scognet	/* erg - unaligned source */
160129202Scognet	/* This is where it gets nasty ... */
161129202Scognet.Lmemcpy_srcul:
162129202Scognet	bic	r1, r1, #3
163129202Scognet	ldr	lr, [r1], #4
164129202Scognet	cmp	r12, #2
165129202Scognet	bgt	.Lmemcpy_srcul3
166129202Scognet	beq	.Lmemcpy_srcul2
167129202Scognet	cmp	r2, #0x0c
168129202Scognet	blt	.Lmemcpy_srcul1loop4
169129202Scognet	sub	r2, r2, #0x0c
170129202Scognet	stmdb	sp!, {r4, r5}
171129202Scognet
172129202Scognet.Lmemcpy_srcul1loop16:
173129202Scognet#ifdef __ARMEB__
174129202Scognet	mov	r3, lr, lsl #8
175129202Scognet#else
176129202Scognet	mov	r3, lr, lsr #8
177129202Scognet#endif
178129202Scognet	ldmia	r1!, {r4, r5, r12, lr}
179129202Scognet#ifdef __ARMEB__
180129202Scognet	orr	r3, r3, r4, lsr #24
181129202Scognet	mov	r4, r4, lsl #8
182129202Scognet	orr	r4, r4, r5, lsr #24
183129202Scognet	mov	r5, r5, lsl #8
184129202Scognet	orr	r5, r5, r12, lsr #24
185129202Scognet	mov	r12, r12, lsl #8
186129202Scognet	orr	r12, r12, lr, lsr #24
187129202Scognet#else
188129202Scognet	orr	r3, r3, r4, lsl #24
189129202Scognet	mov	r4, r4, lsr #8
190129202Scognet	orr	r4, r4, r5, lsl #24
191129202Scognet	mov	r5, r5, lsr #8
192129202Scognet	orr	r5, r5, r12, lsl #24
193129202Scognet	mov	r12, r12, lsr #8
194129202Scognet	orr	r12, r12, lr, lsl #24
195129202Scognet#endif
196129202Scognet	stmia	r0!, {r3-r5, r12}
197129202Scognet	subs	r2, r2, #0x10
198129202Scognet	bge	.Lmemcpy_srcul1loop16
199129202Scognet	ldmia	sp!, {r4, r5}
200129202Scognet	adds	r2, r2, #0x0c
201129202Scognet	blt	.Lmemcpy_srcul1l4
202129202Scognet
203129202Scognet.Lmemcpy_srcul1loop4:
204129202Scognet#ifdef __ARMEB__
205129202Scognet	mov	r12, lr, lsl #8
206129202Scognet#else
207129202Scognet	mov	r12, lr, lsr #8
208129202Scognet#endif
209129202Scognet	ldr	lr, [r1], #4
210129202Scognet#ifdef __ARMEB__
211129202Scognet	orr	r12, r12, lr, lsr #24
212129202Scognet#else
213129202Scognet	orr	r12, r12, lr, lsl #24
214129202Scognet#endif
215129202Scognet	str	r12, [r0], #4
216129202Scognet	subs	r2, r2, #4
217129202Scognet	bge	.Lmemcpy_srcul1loop4
218129202Scognet
219129202Scognet.Lmemcpy_srcul1l4:
220129202Scognet	sub	r1, r1, #3
221129202Scognet	b	.Lmemcpy_l4
222129202Scognet
223129202Scognet.Lmemcpy_srcul2:
224129202Scognet	cmp	r2, #0x0c
225129202Scognet	blt	.Lmemcpy_srcul2loop4
226129202Scognet	sub	r2, r2, #0x0c
227129202Scognet	stmdb	sp!, {r4, r5}
228129202Scognet
229129202Scognet.Lmemcpy_srcul2loop16:
230129202Scognet#ifdef __ARMEB__
231129202Scognet	mov	r3, lr, lsl #16
232129202Scognet#else
233129202Scognet	mov	r3, lr, lsr #16
234129202Scognet#endif
235129202Scognet	ldmia	r1!, {r4, r5, r12, lr}
236129202Scognet#ifdef __ARMEB__
237129202Scognet	orr	r3, r3, r4, lsr #16
238129202Scognet	mov	r4, r4, lsl #16
239129202Scognet	orr	r4, r4, r5, lsr #16
240129202Scognet	mov	r5, r5, lsl #16
241129202Scognet	orr	r5, r5, r12, lsr #16
242129202Scognet	mov	r12, r12, lsl #16
243129202Scognet	orr	r12, r12, lr, lsr #16
244129202Scognet#else
245129202Scognet	orr	r3, r3, r4, lsl #16
246129202Scognet	mov	r4, r4, lsr #16
247129202Scognet	orr	r4, r4, r5, lsl #16
248129202Scognet	mov	r5, r5, lsr #16
249129202Scognet	orr	r5, r5, r12, lsl #16
250129202Scognet	mov	r12, r12, lsr #16
251129202Scognet	orr	r12, r12, lr, lsl #16
252129202Scognet#endif
253129202Scognet	stmia	r0!, {r3-r5, r12}
254129202Scognet	subs	r2, r2, #0x10
255129202Scognet	bge	.Lmemcpy_srcul2loop16
256129202Scognet	ldmia	sp!, {r4, r5}
257129202Scognet	adds	r2, r2, #0x0c
258129202Scognet	blt	.Lmemcpy_srcul2l4
259129202Scognet
260129202Scognet.Lmemcpy_srcul2loop4:
261129202Scognet#ifdef __ARMEB__
262129202Scognet	mov	r12, lr, lsl #16
263129202Scognet#else
264129202Scognet	mov	r12, lr, lsr #16
265129202Scognet#endif
266129202Scognet	ldr	lr, [r1], #4
267129202Scognet#ifdef __ARMEB__
268129202Scognet	orr	r12, r12, lr, lsr #16
269129202Scognet#else
270129202Scognet	orr	r12, r12, lr, lsl #16
271129202Scognet#endif
272129202Scognet	str	r12, [r0], #4
273129202Scognet	subs	r2, r2, #4
274129202Scognet	bge	.Lmemcpy_srcul2loop4
275129202Scognet
276129202Scognet.Lmemcpy_srcul2l4:
277129202Scognet	sub	r1, r1, #2
278129202Scognet	b	.Lmemcpy_l4
279129202Scognet
280129202Scognet.Lmemcpy_srcul3:
281129202Scognet	cmp	r2, #0x0c
282129202Scognet	blt	.Lmemcpy_srcul3loop4
283129202Scognet	sub	r2, r2, #0x0c
284129202Scognet	stmdb	sp!, {r4, r5}
285129202Scognet
286129202Scognet.Lmemcpy_srcul3loop16:
287129202Scognet#ifdef __ARMEB__
288129202Scognet	mov	r3, lr, lsl #24
289129202Scognet#else
290129202Scognet	mov	r3, lr, lsr #24
291129202Scognet#endif
292129202Scognet	ldmia	r1!, {r4, r5, r12, lr}
293129202Scognet#ifdef __ARMEB__
294129202Scognet	orr	r3, r3, r4, lsr #8
295129202Scognet	mov	r4, r4, lsl #24
296129202Scognet	orr	r4, r4, r5, lsr #8
297129202Scognet	mov	r5, r5, lsl #24
298129202Scognet	orr	r5, r5, r12, lsr #8
299129202Scognet	mov	r12, r12, lsl #24
300129202Scognet	orr	r12, r12, lr, lsr #8
301129202Scognet#else
302129202Scognet	orr	r3, r3, r4, lsl #8
303129202Scognet	mov	r4, r4, lsr #24
304129202Scognet	orr	r4, r4, r5, lsl #8
305129202Scognet	mov	r5, r5, lsr #24
306129202Scognet	orr	r5, r5, r12, lsl #8
307129202Scognet	mov	r12, r12, lsr #24
308129202Scognet	orr	r12, r12, lr, lsl #8
309129202Scognet#endif
310129202Scognet	stmia	r0!, {r3-r5, r12}
311129202Scognet	subs	r2, r2, #0x10
312129202Scognet	bge	.Lmemcpy_srcul3loop16
313129202Scognet	ldmia	sp!, {r4, r5}
314129202Scognet	adds	r2, r2, #0x0c
315129202Scognet	blt	.Lmemcpy_srcul3l4
316129202Scognet
317129202Scognet.Lmemcpy_srcul3loop4:
318129202Scognet#ifdef __ARMEB__
319129202Scognet	mov	r12, lr, lsl #24
320129202Scognet#else
321129202Scognet	mov	r12, lr, lsr #24
322129202Scognet#endif
323129202Scognet	ldr	lr, [r1], #4
324129202Scognet#ifdef __ARMEB__
325129202Scognet	orr	r12, r12, lr, lsr #8
326129202Scognet#else
327129202Scognet	orr	r12, r12, lr, lsl #8
328129202Scognet#endif
329129202Scognet	str	r12, [r0], #4
330129202Scognet	subs	r2, r2, #4
331129202Scognet	bge	.Lmemcpy_srcul3loop4
332129202Scognet
333129202Scognet.Lmemcpy_srcul3l4:
334129202Scognet	sub	r1, r1, #1
335129202Scognet	b	.Lmemcpy_l4
336271337SianEND(memcpy)
337