memmove.S revision 137464
1/*	$NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $	*/
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the NetBSD
21 *	Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 *    contributors may be used to endorse or promote products derived
24 *    from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39#include <machine/asm.h>
40__FBSDID("$FreeBSD: head/lib/libc/arm/string/memmove.S 137464 2004-11-09 16:49:14Z cognet $");
41
42#ifndef _BCOPY
43/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
44ENTRY(memmove)
45#else
46/* bcopy = memcpy/memmove with arguments reversed. */
47/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
48ENTRY(bcopy)
49	/* switch the source and destination registers */
50	eor     r0, r1, r0
51	eor     r1, r0, r1
52	eor     r0, r1, r0
53#endif
54	/* Do the buffers overlap? */
55	cmp	r0, r1
56	RETeq		/* Bail now if src/dst are the same */
57	subcc	r3, r0, r1	/* if (dst > src) r3 = dst - src */
58	subcs	r3, r1, r0	/* if (src > dsr) r3 = src - dst */
59	cmp	r3, r2		/* if (r3 < len) we have an overlap */
60	bcc	PIC_SYM(_C_LABEL(memcpy), PLT)
61
62	/* Determine copy direction */
63	cmp	r1, r0
64	bcc	.Lmemmove_backwards
65
66	moveq	r0, #0			/* Quick abort for len=0 */
67	RETeq
68
69	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
70	subs	r2, r2, #4
71	blt	.Lmemmove_fl4		/* less than 4 bytes */
72	ands	r12, r0, #3
73	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
74	ands	r12, r1, #3
75	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
76
77.Lmemmove_ft8:
78	/* We have aligned source and destination */
79	subs	r2, r2, #8
80	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
81	subs	r2, r2, #0x14
82	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
83	stmdb	sp!, {r4}		/* borrow r4 */
84
85	/* blat 32 bytes at a time */
86	/* XXX for really big copies perhaps we should use more registers */
87.Lmemmove_floop32:
88	ldmia	r1!, {r3, r4, r12, lr}
89	stmia	r0!, {r3, r4, r12, lr}
90	ldmia	r1!, {r3, r4, r12, lr}
91	stmia	r0!, {r3, r4, r12, lr}
92	subs	r2, r2, #0x20
93	bge	.Lmemmove_floop32
94
95	cmn	r2, #0x10
96	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
97	stmgeia	r0!, {r3, r4, r12, lr}
98	subge	r2, r2, #0x10
99	ldmia	sp!, {r4}		/* return r4 */
100
101.Lmemmove_fl32:
102	adds	r2, r2, #0x14
103
104	/* blat 12 bytes at a time */
105.Lmemmove_floop12:
106	ldmgeia	r1!, {r3, r12, lr}
107	stmgeia	r0!, {r3, r12, lr}
108	subges	r2, r2, #0x0c
109	bge	.Lmemmove_floop12
110
111.Lmemmove_fl12:
112	adds	r2, r2, #8
113	blt	.Lmemmove_fl4
114
115	subs	r2, r2, #4
116	ldrlt	r3, [r1], #4
117	strlt	r3, [r0], #4
118	ldmgeia	r1!, {r3, r12}
119	stmgeia	r0!, {r3, r12}
120	subge	r2, r2, #4
121
122.Lmemmove_fl4:
123	/* less than 4 bytes to go */
124	adds	r2, r2, #4
125	ldmeqia	sp!, {r0, pc}		/* done */
126
127	/* copy the crud byte at a time */
128	cmp	r2, #2
129	ldrb	r3, [r1], #1
130	strb	r3, [r0], #1
131	ldrgeb	r3, [r1], #1
132	strgeb	r3, [r0], #1
133	ldrgtb	r3, [r1], #1
134	strgtb	r3, [r0], #1
135	ldmia	sp!, {r0, pc}
136
137	/* erg - unaligned destination */
138.Lmemmove_fdestul:
139	rsb	r12, r12, #4
140	cmp	r12, #2
141
142	/* align destination with byte copies */
143	ldrb	r3, [r1], #1
144	strb	r3, [r0], #1
145	ldrgeb	r3, [r1], #1
146	strgeb	r3, [r0], #1
147	ldrgtb	r3, [r1], #1
148	strgtb	r3, [r0], #1
149	subs	r2, r2, r12
150	blt	.Lmemmove_fl4		/* less the 4 bytes */
151
152	ands	r12, r1, #3
153	beq	.Lmemmove_ft8		/* we have an aligned source */
154
155	/* erg - unaligned source */
156	/* This is where it gets nasty ... */
157.Lmemmove_fsrcul:
158	bic	r1, r1, #3
159	ldr	lr, [r1], #4
160	cmp	r12, #2
161	bgt	.Lmemmove_fsrcul3
162	beq	.Lmemmove_fsrcul2
163	cmp	r2, #0x0c
164	blt	.Lmemmove_fsrcul1loop4
165	sub	r2, r2, #0x0c
166	stmdb	sp!, {r4, r5}
167
168.Lmemmove_fsrcul1loop16:
169#ifdef __ARMEB__
170	mov	r3, lr, lsl #8
171#else
172	mov	r3, lr, lsr #8
173#endif
174	ldmia	r1!, {r4, r5, r12, lr}
175#ifdef __ARMEB__
176	orr	r3, r3, r4, lsr #24
177	mov	r4, r4, lsl #8
178	orr	r4, r4, r5, lsr #24
179	mov	r5, r5, lsl #8
180	orr	r5, r5, r12, lsr #24
181	mov	r12, r12, lsl #8
182	orr	r12, r12, lr, lsr #24
183#else
184	orr	r3, r3, r4, lsl #24
185	mov	r4, r4, lsr #8
186	orr	r4, r4, r5, lsl #24
187	mov	r5, r5, lsr #8
188	orr	r5, r5, r12, lsl #24
189	mov	r12, r12, lsr #8
190	orr	r12, r12, lr, lsl #24
191#endif
192	stmia	r0!, {r3-r5, r12}
193	subs	r2, r2, #0x10
194	bge	.Lmemmove_fsrcul1loop16
195	ldmia	sp!, {r4, r5}
196	adds	r2, r2, #0x0c
197	blt	.Lmemmove_fsrcul1l4
198
199.Lmemmove_fsrcul1loop4:
200#ifdef __ARMEB__
201	mov	r12, lr, lsl #8
202#else
203	mov	r12, lr, lsr #8
204#endif
205	ldr	lr, [r1], #4
206#ifdef __ARMEB__
207	orr	r12, r12, lr, lsr #24
208#else
209	orr	r12, r12, lr, lsl #24
210#endif
211	str	r12, [r0], #4
212	subs	r2, r2, #4
213	bge	.Lmemmove_fsrcul1loop4
214
215.Lmemmove_fsrcul1l4:
216	sub	r1, r1, #3
217	b	.Lmemmove_fl4
218
219.Lmemmove_fsrcul2:
220	cmp	r2, #0x0c
221	blt	.Lmemmove_fsrcul2loop4
222	sub	r2, r2, #0x0c
223	stmdb	sp!, {r4, r5}
224
225.Lmemmove_fsrcul2loop16:
226#ifdef __ARMEB__
227	mov	r3, lr, lsl #16
228#else
229	mov	r3, lr, lsr #16
230#endif
231	ldmia	r1!, {r4, r5, r12, lr}
232#ifdef __ARMEB__
233	orr	r3, r3, r4, lsr #16
234	mov	r4, r4, lsl #16
235	orr	r4, r4, r5, lsr #16
236	mov	r5, r5, lsl #16
237	orr	r5, r5, r12, lsr #16
238	mov	r12, r12, lsl #16
239	orr	r12, r12, lr, lsr #16
240#else
241	orr	r3, r3, r4, lsl #16
242	mov	r4, r4, lsr #16
243	orr	r4, r4, r5, lsl #16
244	mov	r5, r5, lsr #16
245	orr	r5, r5, r12, lsl #16
246	mov	r12, r12, lsr #16
247	orr	r12, r12, lr, lsl #16
248#endif
249	stmia	r0!, {r3-r5, r12}
250	subs	r2, r2, #0x10
251	bge	.Lmemmove_fsrcul2loop16
252	ldmia	sp!, {r4, r5}
253	adds	r2, r2, #0x0c
254	blt	.Lmemmove_fsrcul2l4
255
256.Lmemmove_fsrcul2loop4:
257#ifdef __ARMEB__
258	mov	r12, lr, lsl #16
259#else
260	mov	r12, lr, lsr #16
261#endif
262	ldr	lr, [r1], #4
263#ifdef __ARMEB__
264	orr	r12, r12, lr, lsr #16
265#else
266	orr	r12, r12, lr, lsl #16
267#endif
268	str	r12, [r0], #4
269	subs	r2, r2, #4
270	bge	.Lmemmove_fsrcul2loop4
271
272.Lmemmove_fsrcul2l4:
273	sub	r1, r1, #2
274	b	.Lmemmove_fl4
275
276.Lmemmove_fsrcul3:
277	cmp	r2, #0x0c
278	blt	.Lmemmove_fsrcul3loop4
279	sub	r2, r2, #0x0c
280	stmdb	sp!, {r4, r5}
281
282.Lmemmove_fsrcul3loop16:
283#ifdef __ARMEB__
284	mov	r3, lr, lsl #24
285#else
286	mov	r3, lr, lsr #24
287#endif
288	ldmia	r1!, {r4, r5, r12, lr}
289#ifdef __ARMEB__
290	orr	r3, r3, r4, lsr #8
291	mov	r4, r4, lsl #24
292	orr	r4, r4, r5, lsr #8
293	mov	r5, r5, lsl #24
294	orr	r5, r5, r12, lsr #8
295	mov	r12, r12, lsl #24
296	orr	r12, r12, lr, lsr #8
297#else
298	orr	r3, r3, r4, lsl #8
299	mov	r4, r4, lsr #24
300	orr	r4, r4, r5, lsl #8
301	mov	r5, r5, lsr #24
302	orr	r5, r5, r12, lsl #8
303	mov	r12, r12, lsr #24
304	orr	r12, r12, lr, lsl #8
305#endif
306	stmia	r0!, {r3-r5, r12}
307	subs	r2, r2, #0x10
308	bge	.Lmemmove_fsrcul3loop16
309	ldmia	sp!, {r4, r5}
310	adds	r2, r2, #0x0c
311	blt	.Lmemmove_fsrcul3l4
312
313.Lmemmove_fsrcul3loop4:
314#ifdef __ARMEB__
315	mov	r12, lr, lsl #24
316#else
317	mov	r12, lr, lsr #24
318#endif
319	ldr	lr, [r1], #4
320#ifdef __ARMEB__
321	orr	r12, r12, lr, lsr #8
322#else
323	orr	r12, r12, lr, lsl #8
324#endif
325	str	r12, [r0], #4
326	subs	r2, r2, #4
327	bge	.Lmemmove_fsrcul3loop4
328
329.Lmemmove_fsrcul3l4:
330	sub	r1, r1, #1
331	b	.Lmemmove_fl4
332
333.Lmemmove_backwards:
334	add	r1, r1, r2
335	add	r0, r0, r2
336	subs	r2, r2, #4
337	blt	.Lmemmove_bl4		/* less than 4 bytes */
338	ands	r12, r0, #3
339	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
340	ands	r12, r1, #3
341	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
342
343.Lmemmove_bt8:
344	/* We have aligned source and destination */
345	subs	r2, r2, #8
346	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
347	stmdb	sp!, {r4, lr}
348	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
349	blt	.Lmemmove_bl32
350
351	/* blat 32 bytes at a time */
352	/* XXX for really big copies perhaps we should use more registers */
353.Lmemmove_bloop32:
354	ldmdb	r1!, {r3, r4, r12, lr}
355	stmdb	r0!, {r3, r4, r12, lr}
356	ldmdb	r1!, {r3, r4, r12, lr}
357	stmdb	r0!, {r3, r4, r12, lr}
358	subs	r2, r2, #0x20
359	bge	.Lmemmove_bloop32
360
361.Lmemmove_bl32:
362	cmn	r2, #0x10
363	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
364	stmgedb	r0!, {r3, r4, r12, lr}
365	subge	r2, r2, #0x10
366	adds	r2, r2, #0x14
367	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
368	stmgedb	r0!, {r3, r12, lr}
369	subge	r2, r2, #0x0c
370	ldmia	sp!, {r4, lr}
371
372.Lmemmove_bl12:
373	adds	r2, r2, #8
374	blt	.Lmemmove_bl4
375	subs	r2, r2, #4
376	ldrlt	r3, [r1, #-4]!
377	strlt	r3, [r0, #-4]!
378	ldmgedb	r1!, {r3, r12}
379	stmgedb	r0!, {r3, r12}
380	subge	r2, r2, #4
381
382.Lmemmove_bl4:
383	/* less than 4 bytes to go */
384	adds	r2, r2, #4
385	RETeq			/* done */
386
387	/* copy the crud byte at a time */
388	cmp	r2, #2
389	ldrb	r3, [r1, #-1]!
390	strb	r3, [r0, #-1]!
391	ldrgeb	r3, [r1, #-1]!
392	strgeb	r3, [r0, #-1]!
393	ldrgtb	r3, [r1, #-1]!
394	strgtb	r3, [r0, #-1]!
395	RET
396
397	/* erg - unaligned destination */
398.Lmemmove_bdestul:
399	cmp	r12, #2
400
401	/* align destination with byte copies */
402	ldrb	r3, [r1, #-1]!
403	strb	r3, [r0, #-1]!
404	ldrgeb	r3, [r1, #-1]!
405	strgeb	r3, [r0, #-1]!
406	ldrgtb	r3, [r1, #-1]!
407	strgtb	r3, [r0, #-1]!
408	subs	r2, r2, r12
409	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
410	ands	r12, r1, #3
411	beq	.Lmemmove_bt8		/* we have an aligned source */
412
413	/* erg - unaligned source */
414	/* This is where it gets nasty ... */
415.Lmemmove_bsrcul:
416	bic	r1, r1, #3
417	ldr	r3, [r1, #0]
418	cmp	r12, #2
419	blt	.Lmemmove_bsrcul1
420	beq	.Lmemmove_bsrcul2
421	cmp	r2, #0x0c
422	blt	.Lmemmove_bsrcul3loop4
423	sub	r2, r2, #0x0c
424	stmdb	sp!, {r4, r5, lr}
425
426.Lmemmove_bsrcul3loop16:
427#ifdef __ARMEB__
428	mov	lr, r3, lsr #8
429#else
430	mov	lr, r3, lsl #8
431#endif
432	ldmdb	r1!, {r3-r5, r12}
433#ifdef __ARMEB__
434	orr	lr, lr, r12, lsl #24
435	mov	r12, r12, lsr #8
436	orr	r12, r12, r5, lsl #24
437	mov	r5, r5, lsr #8
438	orr	r5, r5, r4, lsl #24
439	mov	r4, r4, lsr #8
440	orr	r4, r4, r3, lsl #24
441#else
442	orr	lr, lr, r12, lsr #24
443	mov	r12, r12, lsl #8
444	orr	r12, r12, r5, lsr #24
445	mov	r5, r5, lsl #8
446	orr	r5, r5, r4, lsr #24
447	mov	r4, r4, lsl #8
448	orr	r4, r4, r3, lsr #24
449#endif
450	stmdb	r0!, {r4, r5, r12, lr}
451	subs	r2, r2, #0x10
452	bge	.Lmemmove_bsrcul3loop16
453	ldmia	sp!, {r4, r5, lr}
454	adds	r2, r2, #0x0c
455	blt	.Lmemmove_bsrcul3l4
456
457.Lmemmove_bsrcul3loop4:
458#ifdef __ARMEB__
459	mov	r12, r3, lsr #8
460#else
461	mov	r12, r3, lsl #8
462#endif
463	ldr	r3, [r1, #-4]!
464#ifdef __ARMEB__
465	orr	r12, r12, r3, lsl #24
466#else
467	orr	r12, r12, r3, lsr #24
468#endif
469	str	r12, [r0, #-4]!
470	subs	r2, r2, #4
471	bge	.Lmemmove_bsrcul3loop4
472
473.Lmemmove_bsrcul3l4:
474	add	r1, r1, #3
475	b	.Lmemmove_bl4
476
477.Lmemmove_bsrcul2:
478	cmp	r2, #0x0c
479	blt	.Lmemmove_bsrcul2loop4
480	sub	r2, r2, #0x0c
481	stmdb	sp!, {r4, r5, lr}
482
483.Lmemmove_bsrcul2loop16:
484#ifdef __ARMEB__
485	mov	lr, r3, lsr #16
486#else
487	mov	lr, r3, lsl #16
488#endif
489	ldmdb	r1!, {r3-r5, r12}
490#ifdef __ARMEB__
491	orr	lr, lr, r12, lsl #16
492	mov	r12, r12, lsr #16
493	orr	r12, r12, r5, lsl #16
494	mov	r5, r5, lsr #16
495	orr	r5, r5, r4, lsl #16
496	mov	r4, r4, lsr #16
497	orr	r4, r4, r3, lsl #16
498#else
499	orr	lr, lr, r12, lsr #16
500	mov	r12, r12, lsl #16
501	orr	r12, r12, r5, lsr #16
502	mov	r5, r5, lsl #16
503	orr	r5, r5, r4, lsr #16
504	mov	r4, r4, lsl #16
505	orr	r4, r4, r3, lsr #16
506#endif
507	stmdb	r0!, {r4, r5, r12, lr}
508	subs	r2, r2, #0x10
509	bge	.Lmemmove_bsrcul2loop16
510	ldmia	sp!, {r4, r5, lr}
511	adds	r2, r2, #0x0c
512	blt	.Lmemmove_bsrcul2l4
513
514.Lmemmove_bsrcul2loop4:
515#ifdef __ARMEB__
516	mov	r12, r3, lsr #16
517#else
518	mov	r12, r3, lsl #16
519#endif
520	ldr	r3, [r1, #-4]!
521#ifdef __ARMEB__
522	orr	r12, r12, r3, lsl #16
523#else
524	orr	r12, r12, r3, lsr #16
525#endif
526	str	r12, [r0, #-4]!
527	subs	r2, r2, #4
528	bge	.Lmemmove_bsrcul2loop4
529
530.Lmemmove_bsrcul2l4:
531	add	r1, r1, #2
532	b	.Lmemmove_bl4
533
534.Lmemmove_bsrcul1:
535	cmp	r2, #0x0c
536	blt	.Lmemmove_bsrcul1loop4
537	sub	r2, r2, #0x0c
538	stmdb	sp!, {r4, r5, lr}
539
540.Lmemmove_bsrcul1loop32:
541#ifdef __ARMEB__
542	mov	lr, r3, lsr #24
543#else
544	mov	lr, r3, lsl #24
545#endif
546	ldmdb	r1!, {r3-r5, r12}
547#ifdef __ARMEB__
548	orr	lr, lr, r12, lsl #8
549	mov	r12, r12, lsr #24
550	orr	r12, r12, r5, lsl #8
551	mov	r5, r5, lsr #24
552	orr	r5, r5, r4, lsl #8
553	mov	r4, r4, lsr #24
554	orr	r4, r4, r3, lsl #8
555#else
556	orr	lr, lr, r12, lsr #8
557	mov	r12, r12, lsl #24
558	orr	r12, r12, r5, lsr #8
559	mov	r5, r5, lsl #24
560	orr	r5, r5, r4, lsr #8
561	mov	r4, r4, lsl #24
562	orr	r4, r4, r3, lsr #8
563#endif
564	stmdb	r0!, {r4, r5, r12, lr}
565	subs	r2, r2, #0x10
566	bge	.Lmemmove_bsrcul1loop32
567	ldmia	sp!, {r4, r5, lr}
568	adds	r2, r2, #0x0c
569	blt	.Lmemmove_bsrcul1l4
570
571.Lmemmove_bsrcul1loop4:
572#ifdef __ARMEB__
573	mov	r12, r3, lsr #24
574#else
575	mov	r12, r3, lsl #24
576#endif
577	ldr	r3, [r1, #-4]!
578#ifdef __ARMEB__
579	orr	r12, r12, r3, lsl #8
580#else
581	orr	r12, r12, r3, lsr #8
582#endif
583	str	r12, [r0, #-4]!
584	subs	r2, r2, #4
585	bge	.Lmemmove_bsrcul1loop4
586
587.Lmemmove_bsrcul1l4:
588	add	r1, r1, #1
589	b	.Lmemmove_bl4
590