memmove.S revision 204607
1/*	$NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $	*/
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <machine/asm.h>
33__FBSDID("$FreeBSD: head/lib/libc/arm/string/memmove.S 204607 2010-03-02 22:16:40Z joel $");
34
35#ifndef _BCOPY
36/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
37ENTRY(memmove)
38#else
39/* bcopy = memcpy/memmove with arguments reversed. */
40/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
41ENTRY(bcopy)
42	/* switch the source and destination registers */
43	eor     r0, r1, r0
44	eor     r1, r0, r1
45	eor     r0, r1, r0
46#endif
47	/* Do the buffers overlap? */
48	cmp	r0, r1
49	RETeq		/* Bail now if src/dst are the same */
50	subcc	r3, r0, r1	/* if (dst > src) r3 = dst - src */
51	subcs	r3, r1, r0	/* if (src > dsr) r3 = src - dst */
52	cmp	r3, r2		/* if (r3 < len) we have an overlap */
53	bcc	PIC_SYM(_C_LABEL(memcpy), PLT)
54
55	/* Determine copy direction */
56	cmp	r1, r0
57	bcc	.Lmemmove_backwards
58
59	moveq	r0, #0			/* Quick abort for len=0 */
60	RETeq
61
62	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
63	subs	r2, r2, #4
64	blt	.Lmemmove_fl4		/* less than 4 bytes */
65	ands	r12, r0, #3
66	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
67	ands	r12, r1, #3
68	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
69
70.Lmemmove_ft8:
71	/* We have aligned source and destination */
72	subs	r2, r2, #8
73	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
74	subs	r2, r2, #0x14
75	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
76	stmdb	sp!, {r4}		/* borrow r4 */
77
78	/* blat 32 bytes at a time */
79	/* XXX for really big copies perhaps we should use more registers */
80.Lmemmove_floop32:
81	ldmia	r1!, {r3, r4, r12, lr}
82	stmia	r0!, {r3, r4, r12, lr}
83	ldmia	r1!, {r3, r4, r12, lr}
84	stmia	r0!, {r3, r4, r12, lr}
85	subs	r2, r2, #0x20
86	bge	.Lmemmove_floop32
87
88	cmn	r2, #0x10
89	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
90	stmgeia	r0!, {r3, r4, r12, lr}
91	subge	r2, r2, #0x10
92	ldmia	sp!, {r4}		/* return r4 */
93
94.Lmemmove_fl32:
95	adds	r2, r2, #0x14
96
97	/* blat 12 bytes at a time */
98.Lmemmove_floop12:
99	ldmgeia	r1!, {r3, r12, lr}
100	stmgeia	r0!, {r3, r12, lr}
101	subges	r2, r2, #0x0c
102	bge	.Lmemmove_floop12
103
104.Lmemmove_fl12:
105	adds	r2, r2, #8
106	blt	.Lmemmove_fl4
107
108	subs	r2, r2, #4
109	ldrlt	r3, [r1], #4
110	strlt	r3, [r0], #4
111	ldmgeia	r1!, {r3, r12}
112	stmgeia	r0!, {r3, r12}
113	subge	r2, r2, #4
114
115.Lmemmove_fl4:
116	/* less than 4 bytes to go */
117	adds	r2, r2, #4
118	ldmeqia	sp!, {r0, pc}		/* done */
119
120	/* copy the crud byte at a time */
121	cmp	r2, #2
122	ldrb	r3, [r1], #1
123	strb	r3, [r0], #1
124	ldrgeb	r3, [r1], #1
125	strgeb	r3, [r0], #1
126	ldrgtb	r3, [r1], #1
127	strgtb	r3, [r0], #1
128	ldmia	sp!, {r0, pc}
129
130	/* erg - unaligned destination */
131.Lmemmove_fdestul:
132	rsb	r12, r12, #4
133	cmp	r12, #2
134
135	/* align destination with byte copies */
136	ldrb	r3, [r1], #1
137	strb	r3, [r0], #1
138	ldrgeb	r3, [r1], #1
139	strgeb	r3, [r0], #1
140	ldrgtb	r3, [r1], #1
141	strgtb	r3, [r0], #1
142	subs	r2, r2, r12
143	blt	.Lmemmove_fl4		/* less the 4 bytes */
144
145	ands	r12, r1, #3
146	beq	.Lmemmove_ft8		/* we have an aligned source */
147
148	/* erg - unaligned source */
149	/* This is where it gets nasty ... */
150.Lmemmove_fsrcul:
151	bic	r1, r1, #3
152	ldr	lr, [r1], #4
153	cmp	r12, #2
154	bgt	.Lmemmove_fsrcul3
155	beq	.Lmemmove_fsrcul2
156	cmp	r2, #0x0c
157	blt	.Lmemmove_fsrcul1loop4
158	sub	r2, r2, #0x0c
159	stmdb	sp!, {r4, r5}
160
161.Lmemmove_fsrcul1loop16:
162#ifdef __ARMEB__
163	mov	r3, lr, lsl #8
164#else
165	mov	r3, lr, lsr #8
166#endif
167	ldmia	r1!, {r4, r5, r12, lr}
168#ifdef __ARMEB__
169	orr	r3, r3, r4, lsr #24
170	mov	r4, r4, lsl #8
171	orr	r4, r4, r5, lsr #24
172	mov	r5, r5, lsl #8
173	orr	r5, r5, r12, lsr #24
174	mov	r12, r12, lsl #8
175	orr	r12, r12, lr, lsr #24
176#else
177	orr	r3, r3, r4, lsl #24
178	mov	r4, r4, lsr #8
179	orr	r4, r4, r5, lsl #24
180	mov	r5, r5, lsr #8
181	orr	r5, r5, r12, lsl #24
182	mov	r12, r12, lsr #8
183	orr	r12, r12, lr, lsl #24
184#endif
185	stmia	r0!, {r3-r5, r12}
186	subs	r2, r2, #0x10
187	bge	.Lmemmove_fsrcul1loop16
188	ldmia	sp!, {r4, r5}
189	adds	r2, r2, #0x0c
190	blt	.Lmemmove_fsrcul1l4
191
192.Lmemmove_fsrcul1loop4:
193#ifdef __ARMEB__
194	mov	r12, lr, lsl #8
195#else
196	mov	r12, lr, lsr #8
197#endif
198	ldr	lr, [r1], #4
199#ifdef __ARMEB__
200	orr	r12, r12, lr, lsr #24
201#else
202	orr	r12, r12, lr, lsl #24
203#endif
204	str	r12, [r0], #4
205	subs	r2, r2, #4
206	bge	.Lmemmove_fsrcul1loop4
207
208.Lmemmove_fsrcul1l4:
209	sub	r1, r1, #3
210	b	.Lmemmove_fl4
211
212.Lmemmove_fsrcul2:
213	cmp	r2, #0x0c
214	blt	.Lmemmove_fsrcul2loop4
215	sub	r2, r2, #0x0c
216	stmdb	sp!, {r4, r5}
217
218.Lmemmove_fsrcul2loop16:
219#ifdef __ARMEB__
220	mov	r3, lr, lsl #16
221#else
222	mov	r3, lr, lsr #16
223#endif
224	ldmia	r1!, {r4, r5, r12, lr}
225#ifdef __ARMEB__
226	orr	r3, r3, r4, lsr #16
227	mov	r4, r4, lsl #16
228	orr	r4, r4, r5, lsr #16
229	mov	r5, r5, lsl #16
230	orr	r5, r5, r12, lsr #16
231	mov	r12, r12, lsl #16
232	orr	r12, r12, lr, lsr #16
233#else
234	orr	r3, r3, r4, lsl #16
235	mov	r4, r4, lsr #16
236	orr	r4, r4, r5, lsl #16
237	mov	r5, r5, lsr #16
238	orr	r5, r5, r12, lsl #16
239	mov	r12, r12, lsr #16
240	orr	r12, r12, lr, lsl #16
241#endif
242	stmia	r0!, {r3-r5, r12}
243	subs	r2, r2, #0x10
244	bge	.Lmemmove_fsrcul2loop16
245	ldmia	sp!, {r4, r5}
246	adds	r2, r2, #0x0c
247	blt	.Lmemmove_fsrcul2l4
248
249.Lmemmove_fsrcul2loop4:
250#ifdef __ARMEB__
251	mov	r12, lr, lsl #16
252#else
253	mov	r12, lr, lsr #16
254#endif
255	ldr	lr, [r1], #4
256#ifdef __ARMEB__
257	orr	r12, r12, lr, lsr #16
258#else
259	orr	r12, r12, lr, lsl #16
260#endif
261	str	r12, [r0], #4
262	subs	r2, r2, #4
263	bge	.Lmemmove_fsrcul2loop4
264
265.Lmemmove_fsrcul2l4:
266	sub	r1, r1, #2
267	b	.Lmemmove_fl4
268
269.Lmemmove_fsrcul3:
270	cmp	r2, #0x0c
271	blt	.Lmemmove_fsrcul3loop4
272	sub	r2, r2, #0x0c
273	stmdb	sp!, {r4, r5}
274
275.Lmemmove_fsrcul3loop16:
276#ifdef __ARMEB__
277	mov	r3, lr, lsl #24
278#else
279	mov	r3, lr, lsr #24
280#endif
281	ldmia	r1!, {r4, r5, r12, lr}
282#ifdef __ARMEB__
283	orr	r3, r3, r4, lsr #8
284	mov	r4, r4, lsl #24
285	orr	r4, r4, r5, lsr #8
286	mov	r5, r5, lsl #24
287	orr	r5, r5, r12, lsr #8
288	mov	r12, r12, lsl #24
289	orr	r12, r12, lr, lsr #8
290#else
291	orr	r3, r3, r4, lsl #8
292	mov	r4, r4, lsr #24
293	orr	r4, r4, r5, lsl #8
294	mov	r5, r5, lsr #24
295	orr	r5, r5, r12, lsl #8
296	mov	r12, r12, lsr #24
297	orr	r12, r12, lr, lsl #8
298#endif
299	stmia	r0!, {r3-r5, r12}
300	subs	r2, r2, #0x10
301	bge	.Lmemmove_fsrcul3loop16
302	ldmia	sp!, {r4, r5}
303	adds	r2, r2, #0x0c
304	blt	.Lmemmove_fsrcul3l4
305
306.Lmemmove_fsrcul3loop4:
307#ifdef __ARMEB__
308	mov	r12, lr, lsl #24
309#else
310	mov	r12, lr, lsr #24
311#endif
312	ldr	lr, [r1], #4
313#ifdef __ARMEB__
314	orr	r12, r12, lr, lsr #8
315#else
316	orr	r12, r12, lr, lsl #8
317#endif
318	str	r12, [r0], #4
319	subs	r2, r2, #4
320	bge	.Lmemmove_fsrcul3loop4
321
322.Lmemmove_fsrcul3l4:
323	sub	r1, r1, #1
324	b	.Lmemmove_fl4
325
326.Lmemmove_backwards:
327	add	r1, r1, r2
328	add	r0, r0, r2
329	subs	r2, r2, #4
330	blt	.Lmemmove_bl4		/* less than 4 bytes */
331	ands	r12, r0, #3
332	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
333	ands	r12, r1, #3
334	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
335
336.Lmemmove_bt8:
337	/* We have aligned source and destination */
338	subs	r2, r2, #8
339	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
340	stmdb	sp!, {r4, lr}
341	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
342	blt	.Lmemmove_bl32
343
344	/* blat 32 bytes at a time */
345	/* XXX for really big copies perhaps we should use more registers */
346.Lmemmove_bloop32:
347	ldmdb	r1!, {r3, r4, r12, lr}
348	stmdb	r0!, {r3, r4, r12, lr}
349	ldmdb	r1!, {r3, r4, r12, lr}
350	stmdb	r0!, {r3, r4, r12, lr}
351	subs	r2, r2, #0x20
352	bge	.Lmemmove_bloop32
353
354.Lmemmove_bl32:
355	cmn	r2, #0x10
356	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
357	stmgedb	r0!, {r3, r4, r12, lr}
358	subge	r2, r2, #0x10
359	adds	r2, r2, #0x14
360	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
361	stmgedb	r0!, {r3, r12, lr}
362	subge	r2, r2, #0x0c
363	ldmia	sp!, {r4, lr}
364
365.Lmemmove_bl12:
366	adds	r2, r2, #8
367	blt	.Lmemmove_bl4
368	subs	r2, r2, #4
369	ldrlt	r3, [r1, #-4]!
370	strlt	r3, [r0, #-4]!
371	ldmgedb	r1!, {r3, r12}
372	stmgedb	r0!, {r3, r12}
373	subge	r2, r2, #4
374
375.Lmemmove_bl4:
376	/* less than 4 bytes to go */
377	adds	r2, r2, #4
378	RETeq			/* done */
379
380	/* copy the crud byte at a time */
381	cmp	r2, #2
382	ldrb	r3, [r1, #-1]!
383	strb	r3, [r0, #-1]!
384	ldrgeb	r3, [r1, #-1]!
385	strgeb	r3, [r0, #-1]!
386	ldrgtb	r3, [r1, #-1]!
387	strgtb	r3, [r0, #-1]!
388	RET
389
390	/* erg - unaligned destination */
391.Lmemmove_bdestul:
392	cmp	r12, #2
393
394	/* align destination with byte copies */
395	ldrb	r3, [r1, #-1]!
396	strb	r3, [r0, #-1]!
397	ldrgeb	r3, [r1, #-1]!
398	strgeb	r3, [r0, #-1]!
399	ldrgtb	r3, [r1, #-1]!
400	strgtb	r3, [r0, #-1]!
401	subs	r2, r2, r12
402	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
403	ands	r12, r1, #3
404	beq	.Lmemmove_bt8		/* we have an aligned source */
405
406	/* erg - unaligned source */
407	/* This is where it gets nasty ... */
408.Lmemmove_bsrcul:
409	bic	r1, r1, #3
410	ldr	r3, [r1, #0]
411	cmp	r12, #2
412	blt	.Lmemmove_bsrcul1
413	beq	.Lmemmove_bsrcul2
414	cmp	r2, #0x0c
415	blt	.Lmemmove_bsrcul3loop4
416	sub	r2, r2, #0x0c
417	stmdb	sp!, {r4, r5, lr}
418
419.Lmemmove_bsrcul3loop16:
420#ifdef __ARMEB__
421	mov	lr, r3, lsr #8
422#else
423	mov	lr, r3, lsl #8
424#endif
425	ldmdb	r1!, {r3-r5, r12}
426#ifdef __ARMEB__
427	orr	lr, lr, r12, lsl #24
428	mov	r12, r12, lsr #8
429	orr	r12, r12, r5, lsl #24
430	mov	r5, r5, lsr #8
431	orr	r5, r5, r4, lsl #24
432	mov	r4, r4, lsr #8
433	orr	r4, r4, r3, lsl #24
434#else
435	orr	lr, lr, r12, lsr #24
436	mov	r12, r12, lsl #8
437	orr	r12, r12, r5, lsr #24
438	mov	r5, r5, lsl #8
439	orr	r5, r5, r4, lsr #24
440	mov	r4, r4, lsl #8
441	orr	r4, r4, r3, lsr #24
442#endif
443	stmdb	r0!, {r4, r5, r12, lr}
444	subs	r2, r2, #0x10
445	bge	.Lmemmove_bsrcul3loop16
446	ldmia	sp!, {r4, r5, lr}
447	adds	r2, r2, #0x0c
448	blt	.Lmemmove_bsrcul3l4
449
450.Lmemmove_bsrcul3loop4:
451#ifdef __ARMEB__
452	mov	r12, r3, lsr #8
453#else
454	mov	r12, r3, lsl #8
455#endif
456	ldr	r3, [r1, #-4]!
457#ifdef __ARMEB__
458	orr	r12, r12, r3, lsl #24
459#else
460	orr	r12, r12, r3, lsr #24
461#endif
462	str	r12, [r0, #-4]!
463	subs	r2, r2, #4
464	bge	.Lmemmove_bsrcul3loop4
465
466.Lmemmove_bsrcul3l4:
467	add	r1, r1, #3
468	b	.Lmemmove_bl4
469
470.Lmemmove_bsrcul2:
471	cmp	r2, #0x0c
472	blt	.Lmemmove_bsrcul2loop4
473	sub	r2, r2, #0x0c
474	stmdb	sp!, {r4, r5, lr}
475
476.Lmemmove_bsrcul2loop16:
477#ifdef __ARMEB__
478	mov	lr, r3, lsr #16
479#else
480	mov	lr, r3, lsl #16
481#endif
482	ldmdb	r1!, {r3-r5, r12}
483#ifdef __ARMEB__
484	orr	lr, lr, r12, lsl #16
485	mov	r12, r12, lsr #16
486	orr	r12, r12, r5, lsl #16
487	mov	r5, r5, lsr #16
488	orr	r5, r5, r4, lsl #16
489	mov	r4, r4, lsr #16
490	orr	r4, r4, r3, lsl #16
491#else
492	orr	lr, lr, r12, lsr #16
493	mov	r12, r12, lsl #16
494	orr	r12, r12, r5, lsr #16
495	mov	r5, r5, lsl #16
496	orr	r5, r5, r4, lsr #16
497	mov	r4, r4, lsl #16
498	orr	r4, r4, r3, lsr #16
499#endif
500	stmdb	r0!, {r4, r5, r12, lr}
501	subs	r2, r2, #0x10
502	bge	.Lmemmove_bsrcul2loop16
503	ldmia	sp!, {r4, r5, lr}
504	adds	r2, r2, #0x0c
505	blt	.Lmemmove_bsrcul2l4
506
507.Lmemmove_bsrcul2loop4:
508#ifdef __ARMEB__
509	mov	r12, r3, lsr #16
510#else
511	mov	r12, r3, lsl #16
512#endif
513	ldr	r3, [r1, #-4]!
514#ifdef __ARMEB__
515	orr	r12, r12, r3, lsl #16
516#else
517	orr	r12, r12, r3, lsr #16
518#endif
519	str	r12, [r0, #-4]!
520	subs	r2, r2, #4
521	bge	.Lmemmove_bsrcul2loop4
522
523.Lmemmove_bsrcul2l4:
524	add	r1, r1, #2
525	b	.Lmemmove_bl4
526
527.Lmemmove_bsrcul1:
528	cmp	r2, #0x0c
529	blt	.Lmemmove_bsrcul1loop4
530	sub	r2, r2, #0x0c
531	stmdb	sp!, {r4, r5, lr}
532
533.Lmemmove_bsrcul1loop32:
534#ifdef __ARMEB__
535	mov	lr, r3, lsr #24
536#else
537	mov	lr, r3, lsl #24
538#endif
539	ldmdb	r1!, {r3-r5, r12}
540#ifdef __ARMEB__
541	orr	lr, lr, r12, lsl #8
542	mov	r12, r12, lsr #24
543	orr	r12, r12, r5, lsl #8
544	mov	r5, r5, lsr #24
545	orr	r5, r5, r4, lsl #8
546	mov	r4, r4, lsr #24
547	orr	r4, r4, r3, lsl #8
548#else
549	orr	lr, lr, r12, lsr #8
550	mov	r12, r12, lsl #24
551	orr	r12, r12, r5, lsr #8
552	mov	r5, r5, lsl #24
553	orr	r5, r5, r4, lsr #8
554	mov	r4, r4, lsl #24
555	orr	r4, r4, r3, lsr #8
556#endif
557	stmdb	r0!, {r4, r5, r12, lr}
558	subs	r2, r2, #0x10
559	bge	.Lmemmove_bsrcul1loop32
560	ldmia	sp!, {r4, r5, lr}
561	adds	r2, r2, #0x0c
562	blt	.Lmemmove_bsrcul1l4
563
564.Lmemmove_bsrcul1loop4:
565#ifdef __ARMEB__
566	mov	r12, r3, lsr #24
567#else
568	mov	r12, r3, lsl #24
569#endif
570	ldr	r3, [r1, #-4]!
571#ifdef __ARMEB__
572	orr	r12, r12, r3, lsl #8
573#else
574	orr	r12, r12, r3, lsr #8
575#endif
576	str	r12, [r0, #-4]!
577	subs	r2, r2, #4
578	bge	.Lmemmove_bsrcul1loop4
579
580.Lmemmove_bsrcul1l4:
581	add	r1, r1, #1
582	b	.Lmemmove_bl4
583