memmove.S revision 285830
1/*	$NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $	*/
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <machine/asm.h>
33__FBSDID("$FreeBSD: releng/10.2/lib/libc/arm/string/memmove.S 275767 2014-12-14 16:28:53Z andrew $");
34
35.syntax	unified
36
37#ifndef _BCOPY
38/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
39ENTRY(memmove)
40#else
41/* bcopy = memcpy/memmove with arguments reversed. */
42/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
43ENTRY(bcopy)
44	/* switch the source and destination registers */
45	eor     r0, r1, r0
46	eor     r1, r0, r1
47	eor     r0, r1, r0
48#endif
49	/* Do the buffers overlap? */
50	cmp	r0, r1
51	RETeq		/* Bail now if src/dst are the same */
52	subcc	r3, r0, r1	/* if (dst > src) r3 = dst - src */
53	subcs	r3, r1, r0	/* if (src > dsr) r3 = src - dst */
54	cmp	r3, r2		/* if (r3 < len) we have an overlap */
55	bcc	PIC_SYM(_C_LABEL(memcpy), PLT)
56
57	/* Determine copy direction */
58	cmp	r1, r0
59	bcc	.Lmemmove_backwards
60
61	moveq	r0, #0			/* Quick abort for len=0 */
62	RETeq
63
64	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
65	subs	r2, r2, #4
66	blt	.Lmemmove_fl4		/* less than 4 bytes */
67	ands	r12, r0, #3
68	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
69	ands	r12, r1, #3
70	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
71
72.Lmemmove_ft8:
73	/* We have aligned source and destination */
74	subs	r2, r2, #8
75	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
76	subs	r2, r2, #0x14
77	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
78	stmdb	sp!, {r4}		/* borrow r4 */
79
80	/* blat 32 bytes at a time */
81	/* XXX for really big copies perhaps we should use more registers */
82.Lmemmove_floop32:
83	ldmia	r1!, {r3, r4, r12, lr}
84	stmia	r0!, {r3, r4, r12, lr}
85	ldmia	r1!, {r3, r4, r12, lr}
86	stmia	r0!, {r3, r4, r12, lr}
87	subs	r2, r2, #0x20
88	bge	.Lmemmove_floop32
89
90	cmn	r2, #0x10
91	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
92	stmiage	r0!, {r3, r4, r12, lr}
93	subge	r2, r2, #0x10
94	ldmia	sp!, {r4}		/* return r4 */
95
96.Lmemmove_fl32:
97	adds	r2, r2, #0x14
98
99	/* blat 12 bytes at a time */
100.Lmemmove_floop12:
101	ldmiage	r1!, {r3, r12, lr}
102	stmiage	r0!, {r3, r12, lr}
103	subsge	r2, r2, #0x0c
104	bge	.Lmemmove_floop12
105
106.Lmemmove_fl12:
107	adds	r2, r2, #8
108	blt	.Lmemmove_fl4
109
110	subs	r2, r2, #4
111	ldrlt	r3, [r1], #4
112	strlt	r3, [r0], #4
113	ldmiage	r1!, {r3, r12}
114	stmiage	r0!, {r3, r12}
115	subge	r2, r2, #4
116
117.Lmemmove_fl4:
118	/* less than 4 bytes to go */
119	adds	r2, r2, #4
120	ldmiaeq	sp!, {r0, pc}		/* done */
121
122	/* copy the crud byte at a time */
123	cmp	r2, #2
124	ldrb	r3, [r1], #1
125	strb	r3, [r0], #1
126	ldrbge	r3, [r1], #1
127	strbge	r3, [r0], #1
128	ldrbgt	r3, [r1], #1
129	strbgt	r3, [r0], #1
130	ldmia	sp!, {r0, pc}
131
132	/* erg - unaligned destination */
133.Lmemmove_fdestul:
134	rsb	r12, r12, #4
135	cmp	r12, #2
136
137	/* align destination with byte copies */
138	ldrb	r3, [r1], #1
139	strb	r3, [r0], #1
140	ldrbge	r3, [r1], #1
141	strbge	r3, [r0], #1
142	ldrbgt	r3, [r1], #1
143	strbgt	r3, [r0], #1
144	subs	r2, r2, r12
145	blt	.Lmemmove_fl4		/* less the 4 bytes */
146
147	ands	r12, r1, #3
148	beq	.Lmemmove_ft8		/* we have an aligned source */
149
150	/* erg - unaligned source */
151	/* This is where it gets nasty ... */
152.Lmemmove_fsrcul:
153	bic	r1, r1, #3
154	ldr	lr, [r1], #4
155	cmp	r12, #2
156	bgt	.Lmemmove_fsrcul3
157	beq	.Lmemmove_fsrcul2
158	cmp	r2, #0x0c
159	blt	.Lmemmove_fsrcul1loop4
160	sub	r2, r2, #0x0c
161	stmdb	sp!, {r4, r5}
162
163.Lmemmove_fsrcul1loop16:
164#ifdef __ARMEB__
165	mov	r3, lr, lsl #8
166#else
167	mov	r3, lr, lsr #8
168#endif
169	ldmia	r1!, {r4, r5, r12, lr}
170#ifdef __ARMEB__
171	orr	r3, r3, r4, lsr #24
172	mov	r4, r4, lsl #8
173	orr	r4, r4, r5, lsr #24
174	mov	r5, r5, lsl #8
175	orr	r5, r5, r12, lsr #24
176	mov	r12, r12, lsl #8
177	orr	r12, r12, lr, lsr #24
178#else
179	orr	r3, r3, r4, lsl #24
180	mov	r4, r4, lsr #8
181	orr	r4, r4, r5, lsl #24
182	mov	r5, r5, lsr #8
183	orr	r5, r5, r12, lsl #24
184	mov	r12, r12, lsr #8
185	orr	r12, r12, lr, lsl #24
186#endif
187	stmia	r0!, {r3-r5, r12}
188	subs	r2, r2, #0x10
189	bge	.Lmemmove_fsrcul1loop16
190	ldmia	sp!, {r4, r5}
191	adds	r2, r2, #0x0c
192	blt	.Lmemmove_fsrcul1l4
193
194.Lmemmove_fsrcul1loop4:
195#ifdef __ARMEB__
196	mov	r12, lr, lsl #8
197#else
198	mov	r12, lr, lsr #8
199#endif
200	ldr	lr, [r1], #4
201#ifdef __ARMEB__
202	orr	r12, r12, lr, lsr #24
203#else
204	orr	r12, r12, lr, lsl #24
205#endif
206	str	r12, [r0], #4
207	subs	r2, r2, #4
208	bge	.Lmemmove_fsrcul1loop4
209
210.Lmemmove_fsrcul1l4:
211	sub	r1, r1, #3
212	b	.Lmemmove_fl4
213
214.Lmemmove_fsrcul2:
215	cmp	r2, #0x0c
216	blt	.Lmemmove_fsrcul2loop4
217	sub	r2, r2, #0x0c
218	stmdb	sp!, {r4, r5}
219
220.Lmemmove_fsrcul2loop16:
221#ifdef __ARMEB__
222	mov	r3, lr, lsl #16
223#else
224	mov	r3, lr, lsr #16
225#endif
226	ldmia	r1!, {r4, r5, r12, lr}
227#ifdef __ARMEB__
228	orr	r3, r3, r4, lsr #16
229	mov	r4, r4, lsl #16
230	orr	r4, r4, r5, lsr #16
231	mov	r5, r5, lsl #16
232	orr	r5, r5, r12, lsr #16
233	mov	r12, r12, lsl #16
234	orr	r12, r12, lr, lsr #16
235#else
236	orr	r3, r3, r4, lsl #16
237	mov	r4, r4, lsr #16
238	orr	r4, r4, r5, lsl #16
239	mov	r5, r5, lsr #16
240	orr	r5, r5, r12, lsl #16
241	mov	r12, r12, lsr #16
242	orr	r12, r12, lr, lsl #16
243#endif
244	stmia	r0!, {r3-r5, r12}
245	subs	r2, r2, #0x10
246	bge	.Lmemmove_fsrcul2loop16
247	ldmia	sp!, {r4, r5}
248	adds	r2, r2, #0x0c
249	blt	.Lmemmove_fsrcul2l4
250
251.Lmemmove_fsrcul2loop4:
252#ifdef __ARMEB__
253	mov	r12, lr, lsl #16
254#else
255	mov	r12, lr, lsr #16
256#endif
257	ldr	lr, [r1], #4
258#ifdef __ARMEB__
259	orr	r12, r12, lr, lsr #16
260#else
261	orr	r12, r12, lr, lsl #16
262#endif
263	str	r12, [r0], #4
264	subs	r2, r2, #4
265	bge	.Lmemmove_fsrcul2loop4
266
267.Lmemmove_fsrcul2l4:
268	sub	r1, r1, #2
269	b	.Lmemmove_fl4
270
271.Lmemmove_fsrcul3:
272	cmp	r2, #0x0c
273	blt	.Lmemmove_fsrcul3loop4
274	sub	r2, r2, #0x0c
275	stmdb	sp!, {r4, r5}
276
277.Lmemmove_fsrcul3loop16:
278#ifdef __ARMEB__
279	mov	r3, lr, lsl #24
280#else
281	mov	r3, lr, lsr #24
282#endif
283	ldmia	r1!, {r4, r5, r12, lr}
284#ifdef __ARMEB__
285	orr	r3, r3, r4, lsr #8
286	mov	r4, r4, lsl #24
287	orr	r4, r4, r5, lsr #8
288	mov	r5, r5, lsl #24
289	orr	r5, r5, r12, lsr #8
290	mov	r12, r12, lsl #24
291	orr	r12, r12, lr, lsr #8
292#else
293	orr	r3, r3, r4, lsl #8
294	mov	r4, r4, lsr #24
295	orr	r4, r4, r5, lsl #8
296	mov	r5, r5, lsr #24
297	orr	r5, r5, r12, lsl #8
298	mov	r12, r12, lsr #24
299	orr	r12, r12, lr, lsl #8
300#endif
301	stmia	r0!, {r3-r5, r12}
302	subs	r2, r2, #0x10
303	bge	.Lmemmove_fsrcul3loop16
304	ldmia	sp!, {r4, r5}
305	adds	r2, r2, #0x0c
306	blt	.Lmemmove_fsrcul3l4
307
308.Lmemmove_fsrcul3loop4:
309#ifdef __ARMEB__
310	mov	r12, lr, lsl #24
311#else
312	mov	r12, lr, lsr #24
313#endif
314	ldr	lr, [r1], #4
315#ifdef __ARMEB__
316	orr	r12, r12, lr, lsr #8
317#else
318	orr	r12, r12, lr, lsl #8
319#endif
320	str	r12, [r0], #4
321	subs	r2, r2, #4
322	bge	.Lmemmove_fsrcul3loop4
323
324.Lmemmove_fsrcul3l4:
325	sub	r1, r1, #1
326	b	.Lmemmove_fl4
327
328.Lmemmove_backwards:
329	add	r1, r1, r2
330	add	r0, r0, r2
331	subs	r2, r2, #4
332	blt	.Lmemmove_bl4		/* less than 4 bytes */
333	ands	r12, r0, #3
334	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
335	ands	r12, r1, #3
336	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
337
338.Lmemmove_bt8:
339	/* We have aligned source and destination */
340	subs	r2, r2, #8
341	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
342	stmdb	sp!, {r4, lr}
343	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
344	blt	.Lmemmove_bl32
345
346	/* blat 32 bytes at a time */
347	/* XXX for really big copies perhaps we should use more registers */
348.Lmemmove_bloop32:
349	ldmdb	r1!, {r3, r4, r12, lr}
350	stmdb	r0!, {r3, r4, r12, lr}
351	ldmdb	r1!, {r3, r4, r12, lr}
352	stmdb	r0!, {r3, r4, r12, lr}
353	subs	r2, r2, #0x20
354	bge	.Lmemmove_bloop32
355
356.Lmemmove_bl32:
357	cmn	r2, #0x10
358	ldmdbge	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
359	stmdbge	r0!, {r3, r4, r12, lr}
360	subge	r2, r2, #0x10
361	adds	r2, r2, #0x14
362	ldmdbge	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
363	stmdbge	r0!, {r3, r12, lr}
364	subge	r2, r2, #0x0c
365	ldmia	sp!, {r4, lr}
366
367.Lmemmove_bl12:
368	adds	r2, r2, #8
369	blt	.Lmemmove_bl4
370	subs	r2, r2, #4
371	ldrlt	r3, [r1, #-4]!
372	strlt	r3, [r0, #-4]!
373	ldmdbge	r1!, {r3, r12}
374	stmdbge	r0!, {r3, r12}
375	subge	r2, r2, #4
376
377.Lmemmove_bl4:
378	/* less than 4 bytes to go */
379	adds	r2, r2, #4
380	RETeq			/* done */
381
382	/* copy the crud byte at a time */
383	cmp	r2, #2
384	ldrb	r3, [r1, #-1]!
385	strb	r3, [r0, #-1]!
386	ldrbge	r3, [r1, #-1]!
387	strbge	r3, [r0, #-1]!
388	ldrbgt	r3, [r1, #-1]!
389	strbgt	r3, [r0, #-1]!
390	RET
391
392	/* erg - unaligned destination */
393.Lmemmove_bdestul:
394	cmp	r12, #2
395
396	/* align destination with byte copies */
397	ldrb	r3, [r1, #-1]!
398	strb	r3, [r0, #-1]!
399	ldrbge	r3, [r1, #-1]!
400	strbge	r3, [r0, #-1]!
401	ldrbgt	r3, [r1, #-1]!
402	strbgt	r3, [r0, #-1]!
403	subs	r2, r2, r12
404	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
405	ands	r12, r1, #3
406	beq	.Lmemmove_bt8		/* we have an aligned source */
407
408	/* erg - unaligned source */
409	/* This is where it gets nasty ... */
410.Lmemmove_bsrcul:
411	bic	r1, r1, #3
412	ldr	r3, [r1, #0]
413	cmp	r12, #2
414	blt	.Lmemmove_bsrcul1
415	beq	.Lmemmove_bsrcul2
416	cmp	r2, #0x0c
417	blt	.Lmemmove_bsrcul3loop4
418	sub	r2, r2, #0x0c
419	stmdb	sp!, {r4, r5, lr}
420
421.Lmemmove_bsrcul3loop16:
422#ifdef __ARMEB__
423	mov	lr, r3, lsr #8
424#else
425	mov	lr, r3, lsl #8
426#endif
427	ldmdb	r1!, {r3-r5, r12}
428#ifdef __ARMEB__
429	orr	lr, lr, r12, lsl #24
430	mov	r12, r12, lsr #8
431	orr	r12, r12, r5, lsl #24
432	mov	r5, r5, lsr #8
433	orr	r5, r5, r4, lsl #24
434	mov	r4, r4, lsr #8
435	orr	r4, r4, r3, lsl #24
436#else
437	orr	lr, lr, r12, lsr #24
438	mov	r12, r12, lsl #8
439	orr	r12, r12, r5, lsr #24
440	mov	r5, r5, lsl #8
441	orr	r5, r5, r4, lsr #24
442	mov	r4, r4, lsl #8
443	orr	r4, r4, r3, lsr #24
444#endif
445	stmdb	r0!, {r4, r5, r12, lr}
446	subs	r2, r2, #0x10
447	bge	.Lmemmove_bsrcul3loop16
448	ldmia	sp!, {r4, r5, lr}
449	adds	r2, r2, #0x0c
450	blt	.Lmemmove_bsrcul3l4
451
452.Lmemmove_bsrcul3loop4:
453#ifdef __ARMEB__
454	mov	r12, r3, lsr #8
455#else
456	mov	r12, r3, lsl #8
457#endif
458	ldr	r3, [r1, #-4]!
459#ifdef __ARMEB__
460	orr	r12, r12, r3, lsl #24
461#else
462	orr	r12, r12, r3, lsr #24
463#endif
464	str	r12, [r0, #-4]!
465	subs	r2, r2, #4
466	bge	.Lmemmove_bsrcul3loop4
467
468.Lmemmove_bsrcul3l4:
469	add	r1, r1, #3
470	b	.Lmemmove_bl4
471
472.Lmemmove_bsrcul2:
473	cmp	r2, #0x0c
474	blt	.Lmemmove_bsrcul2loop4
475	sub	r2, r2, #0x0c
476	stmdb	sp!, {r4, r5, lr}
477
478.Lmemmove_bsrcul2loop16:
479#ifdef __ARMEB__
480	mov	lr, r3, lsr #16
481#else
482	mov	lr, r3, lsl #16
483#endif
484	ldmdb	r1!, {r3-r5, r12}
485#ifdef __ARMEB__
486	orr	lr, lr, r12, lsl #16
487	mov	r12, r12, lsr #16
488	orr	r12, r12, r5, lsl #16
489	mov	r5, r5, lsr #16
490	orr	r5, r5, r4, lsl #16
491	mov	r4, r4, lsr #16
492	orr	r4, r4, r3, lsl #16
493#else
494	orr	lr, lr, r12, lsr #16
495	mov	r12, r12, lsl #16
496	orr	r12, r12, r5, lsr #16
497	mov	r5, r5, lsl #16
498	orr	r5, r5, r4, lsr #16
499	mov	r4, r4, lsl #16
500	orr	r4, r4, r3, lsr #16
501#endif
502	stmdb	r0!, {r4, r5, r12, lr}
503	subs	r2, r2, #0x10
504	bge	.Lmemmove_bsrcul2loop16
505	ldmia	sp!, {r4, r5, lr}
506	adds	r2, r2, #0x0c
507	blt	.Lmemmove_bsrcul2l4
508
509.Lmemmove_bsrcul2loop4:
510#ifdef __ARMEB__
511	mov	r12, r3, lsr #16
512#else
513	mov	r12, r3, lsl #16
514#endif
515	ldr	r3, [r1, #-4]!
516#ifdef __ARMEB__
517	orr	r12, r12, r3, lsl #16
518#else
519	orr	r12, r12, r3, lsr #16
520#endif
521	str	r12, [r0, #-4]!
522	subs	r2, r2, #4
523	bge	.Lmemmove_bsrcul2loop4
524
525.Lmemmove_bsrcul2l4:
526	add	r1, r1, #2
527	b	.Lmemmove_bl4
528
529.Lmemmove_bsrcul1:
530	cmp	r2, #0x0c
531	blt	.Lmemmove_bsrcul1loop4
532	sub	r2, r2, #0x0c
533	stmdb	sp!, {r4, r5, lr}
534
535.Lmemmove_bsrcul1loop32:
536#ifdef __ARMEB__
537	mov	lr, r3, lsr #24
538#else
539	mov	lr, r3, lsl #24
540#endif
541	ldmdb	r1!, {r3-r5, r12}
542#ifdef __ARMEB__
543	orr	lr, lr, r12, lsl #8
544	mov	r12, r12, lsr #24
545	orr	r12, r12, r5, lsl #8
546	mov	r5, r5, lsr #24
547	orr	r5, r5, r4, lsl #8
548	mov	r4, r4, lsr #24
549	orr	r4, r4, r3, lsl #8
550#else
551	orr	lr, lr, r12, lsr #8
552	mov	r12, r12, lsl #24
553	orr	r12, r12, r5, lsr #8
554	mov	r5, r5, lsl #24
555	orr	r5, r5, r4, lsr #8
556	mov	r4, r4, lsl #24
557	orr	r4, r4, r3, lsr #8
558#endif
559	stmdb	r0!, {r4, r5, r12, lr}
560	subs	r2, r2, #0x10
561	bge	.Lmemmove_bsrcul1loop32
562	ldmia	sp!, {r4, r5, lr}
563	adds	r2, r2, #0x0c
564	blt	.Lmemmove_bsrcul1l4
565
566.Lmemmove_bsrcul1loop4:
567#ifdef __ARMEB__
568	mov	r12, r3, lsr #24
569#else
570	mov	r12, r3, lsl #24
571#endif
572	ldr	r3, [r1, #-4]!
573#ifdef __ARMEB__
574	orr	r12, r12, r3, lsl #8
575#else
576	orr	r12, r12, r3, lsr #8
577#endif
578	str	r12, [r0, #-4]!
579	subs	r2, r2, #4
580	bge	.Lmemmove_bsrcul1loop4
581
582.Lmemmove_bsrcul1l4:
583	add	r1, r1, #1
584	b	.Lmemmove_bl4
585#ifndef _BCOPY
586END(memmove)
587#else
588END(bcopy)
589#endif
590