1/*	$NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $	*/
2
3/*-
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <machine/asm.h>
33__FBSDID("$FreeBSD$");
34
35.syntax	unified
36
37#ifndef _BCOPY
38/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
39ENTRY(memmove)
40#else
41/* bcopy = memcpy/memmove with arguments reversed. */
42/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
43ENTRY(bcopy)
44	/* switch the source and destination registers */
45	eor     r0, r1, r0
46	eor     r1, r0, r1
47	eor     r0, r1, r0
48#endif
49	/* Do the buffers overlap? */
50	cmp	r0, r1
51	it	eq
52	RETeq		/* Bail now if src/dst are the same */
53	ite	cc
54	subcc	r3, r0, r1	/* if (dst > src) r3 = dst - src */
55	subcs	r3, r1, r0	/* if (src > dsr) r3 = src - dst */
56	cmp	r3, r2		/* if (r3 < len) we have an overlap */
57	bcc	PIC_SYM(_C_LABEL(memcpy), PLT)
58
59	/* Determine copy direction */
60	cmp	r1, r0
61	it	cc
62	bcc	.Lmemmove_backwards
63
64	itt	eq
65	moveq	r0, #0			/* Quick abort for len=0 */
66	RETeq
67
68	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
69	subs	r2, r2, #4
70	blt	.Lmemmove_fl4		/* less than 4 bytes */
71	ands	r12, r0, #3
72	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
73	ands	r12, r1, #3
74	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
75
76.Lmemmove_ft8:
77	/* We have aligned source and destination */
78	subs	r2, r2, #8
79	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
80	subs	r2, r2, #0x14
81	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
82	stmdb	sp!, {r4}		/* borrow r4 */
83
84	/* blat 32 bytes at a time */
85	/* XXX for really big copies perhaps we should use more registers */
86.Lmemmove_floop32:
87	ldmia	r1!, {r3, r4, r12, lr}
88	stmia	r0!, {r3, r4, r12, lr}
89	ldmia	r1!, {r3, r4, r12, lr}
90	stmia	r0!, {r3, r4, r12, lr}
91	subs	r2, r2, #0x20
92	bge	.Lmemmove_floop32
93
94	cmn	r2, #0x10
95	ittt	ge
96	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
97	stmiage	r0!, {r3, r4, r12, lr}
98	subge	r2, r2, #0x10
99	ldmia	sp!, {r4}		/* return r4 */
100
101.Lmemmove_fl32:
102	adds	r2, r2, #0x14
103
104	/* blat 12 bytes at a time */
105.Lmemmove_floop12:
106	ittt	ge
107	ldmiage	r1!, {r3, r12, lr}
108	stmiage	r0!, {r3, r12, lr}
109	subsge	r2, r2, #0x0c
110	bge	.Lmemmove_floop12
111
112.Lmemmove_fl12:
113	adds	r2, r2, #8
114	blt	.Lmemmove_fl4
115
116	subs	r2, r2, #4
117	itt	lt
118	ldrlt	r3, [r1], #4
119	strlt	r3, [r0], #4
120	ittt	ge
121	ldmiage	r1!, {r3, r12}
122	stmiage	r0!, {r3, r12}
123	subge	r2, r2, #4
124
125.Lmemmove_fl4:
126	/* less than 4 bytes to go */
127	adds	r2, r2, #4
128	it	eq
129	ldmiaeq	sp!, {r0, pc}		/* done */
130
131	/* copy the crud byte at a time */
132	cmp	r2, #2
133	ldrb	r3, [r1], #1
134	strb	r3, [r0], #1
135	itt	ge
136	ldrbge	r3, [r1], #1
137	strbge	r3, [r0], #1
138	itt	gt
139	ldrbgt	r3, [r1], #1
140	strbgt	r3, [r0], #1
141	ldmia	sp!, {r0, pc}
142
143	/* erg - unaligned destination */
144.Lmemmove_fdestul:
145	rsb	r12, r12, #4
146	cmp	r12, #2
147
148	/* align destination with byte copies */
149	ldrb	r3, [r1], #1
150	strb	r3, [r0], #1
151	itt	ge
152	ldrbge	r3, [r1], #1
153	strbge	r3, [r0], #1
154	itt	gt
155	ldrbgt	r3, [r1], #1
156	strbgt	r3, [r0], #1
157	subs	r2, r2, r12
158	blt	.Lmemmove_fl4		/* less the 4 bytes */
159
160	ands	r12, r1, #3
161	beq	.Lmemmove_ft8		/* we have an aligned source */
162
163	/* erg - unaligned source */
164	/* This is where it gets nasty ... */
165.Lmemmove_fsrcul:
166	bic	r1, r1, #3
167	ldr	lr, [r1], #4
168	cmp	r12, #2
169	bgt	.Lmemmove_fsrcul3
170	beq	.Lmemmove_fsrcul2
171	cmp	r2, #0x0c
172	blt	.Lmemmove_fsrcul1loop4
173	sub	r2, r2, #0x0c
174	stmdb	sp!, {r4, r5}
175
176.Lmemmove_fsrcul1loop16:
177#ifdef __ARMEB__
178	mov	r3, lr, lsl #8
179#else
180	mov	r3, lr, lsr #8
181#endif
182	ldmia	r1!, {r4, r5, r12, lr}
183#ifdef __ARMEB__
184	orr	r3, r3, r4, lsr #24
185	mov	r4, r4, lsl #8
186	orr	r4, r4, r5, lsr #24
187	mov	r5, r5, lsl #8
188	orr	r5, r5, r12, lsr #24
189	mov	r12, r12, lsl #8
190	orr	r12, r12, lr, lsr #24
191#else
192	orr	r3, r3, r4, lsl #24
193	mov	r4, r4, lsr #8
194	orr	r4, r4, r5, lsl #24
195	mov	r5, r5, lsr #8
196	orr	r5, r5, r12, lsl #24
197	mov	r12, r12, lsr #8
198	orr	r12, r12, lr, lsl #24
199#endif
200	stmia	r0!, {r3-r5, r12}
201	subs	r2, r2, #0x10
202	bge	.Lmemmove_fsrcul1loop16
203	ldmia	sp!, {r4, r5}
204	adds	r2, r2, #0x0c
205	blt	.Lmemmove_fsrcul1l4
206
207.Lmemmove_fsrcul1loop4:
208#ifdef __ARMEB__
209	mov	r12, lr, lsl #8
210#else
211	mov	r12, lr, lsr #8
212#endif
213	ldr	lr, [r1], #4
214#ifdef __ARMEB__
215	orr	r12, r12, lr, lsr #24
216#else
217	orr	r12, r12, lr, lsl #24
218#endif
219	str	r12, [r0], #4
220	subs	r2, r2, #4
221	bge	.Lmemmove_fsrcul1loop4
222
223.Lmemmove_fsrcul1l4:
224	sub	r1, r1, #3
225	b	.Lmemmove_fl4
226
227.Lmemmove_fsrcul2:
228	cmp	r2, #0x0c
229	blt	.Lmemmove_fsrcul2loop4
230	sub	r2, r2, #0x0c
231	stmdb	sp!, {r4, r5}
232
233.Lmemmove_fsrcul2loop16:
234#ifdef __ARMEB__
235	mov	r3, lr, lsl #16
236#else
237	mov	r3, lr, lsr #16
238#endif
239	ldmia	r1!, {r4, r5, r12, lr}
240#ifdef __ARMEB__
241	orr	r3, r3, r4, lsr #16
242	mov	r4, r4, lsl #16
243	orr	r4, r4, r5, lsr #16
244	mov	r5, r5, lsl #16
245	orr	r5, r5, r12, lsr #16
246	mov	r12, r12, lsl #16
247	orr	r12, r12, lr, lsr #16
248#else
249	orr	r3, r3, r4, lsl #16
250	mov	r4, r4, lsr #16
251	orr	r4, r4, r5, lsl #16
252	mov	r5, r5, lsr #16
253	orr	r5, r5, r12, lsl #16
254	mov	r12, r12, lsr #16
255	orr	r12, r12, lr, lsl #16
256#endif
257	stmia	r0!, {r3-r5, r12}
258	subs	r2, r2, #0x10
259	bge	.Lmemmove_fsrcul2loop16
260	ldmia	sp!, {r4, r5}
261	adds	r2, r2, #0x0c
262	blt	.Lmemmove_fsrcul2l4
263
264.Lmemmove_fsrcul2loop4:
265#ifdef __ARMEB__
266	mov	r12, lr, lsl #16
267#else
268	mov	r12, lr, lsr #16
269#endif
270	ldr	lr, [r1], #4
271#ifdef __ARMEB__
272	orr	r12, r12, lr, lsr #16
273#else
274	orr	r12, r12, lr, lsl #16
275#endif
276	str	r12, [r0], #4
277	subs	r2, r2, #4
278	bge	.Lmemmove_fsrcul2loop4
279
280.Lmemmove_fsrcul2l4:
281	sub	r1, r1, #2
282	b	.Lmemmove_fl4
283
284.Lmemmove_fsrcul3:
285	cmp	r2, #0x0c
286	blt	.Lmemmove_fsrcul3loop4
287	sub	r2, r2, #0x0c
288	stmdb	sp!, {r4, r5}
289
290.Lmemmove_fsrcul3loop16:
291#ifdef __ARMEB__
292	mov	r3, lr, lsl #24
293#else
294	mov	r3, lr, lsr #24
295#endif
296	ldmia	r1!, {r4, r5, r12, lr}
297#ifdef __ARMEB__
298	orr	r3, r3, r4, lsr #8
299	mov	r4, r4, lsl #24
300	orr	r4, r4, r5, lsr #8
301	mov	r5, r5, lsl #24
302	orr	r5, r5, r12, lsr #8
303	mov	r12, r12, lsl #24
304	orr	r12, r12, lr, lsr #8
305#else
306	orr	r3, r3, r4, lsl #8
307	mov	r4, r4, lsr #24
308	orr	r4, r4, r5, lsl #8
309	mov	r5, r5, lsr #24
310	orr	r5, r5, r12, lsl #8
311	mov	r12, r12, lsr #24
312	orr	r12, r12, lr, lsl #8
313#endif
314	stmia	r0!, {r3-r5, r12}
315	subs	r2, r2, #0x10
316	bge	.Lmemmove_fsrcul3loop16
317	ldmia	sp!, {r4, r5}
318	adds	r2, r2, #0x0c
319	blt	.Lmemmove_fsrcul3l4
320
321.Lmemmove_fsrcul3loop4:
322#ifdef __ARMEB__
323	mov	r12, lr, lsl #24
324#else
325	mov	r12, lr, lsr #24
326#endif
327	ldr	lr, [r1], #4
328#ifdef __ARMEB__
329	orr	r12, r12, lr, lsr #8
330#else
331	orr	r12, r12, lr, lsl #8
332#endif
333	str	r12, [r0], #4
334	subs	r2, r2, #4
335	bge	.Lmemmove_fsrcul3loop4
336
337.Lmemmove_fsrcul3l4:
338	sub	r1, r1, #1
339	b	.Lmemmove_fl4
340
341.Lmemmove_backwards:
342	add	r1, r1, r2
343	add	r0, r0, r2
344	subs	r2, r2, #4
345	blt	.Lmemmove_bl4		/* less than 4 bytes */
346	ands	r12, r0, #3
347	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
348	ands	r12, r1, #3
349	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
350
351.Lmemmove_bt8:
352	/* We have aligned source and destination */
353	subs	r2, r2, #8
354	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
355	stmdb	sp!, {r4, lr}
356	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
357	blt	.Lmemmove_bl32
358
359	/* blat 32 bytes at a time */
360	/* XXX for really big copies perhaps we should use more registers */
361.Lmemmove_bloop32:
362	ldmdb	r1!, {r3, r4, r12, lr}
363	stmdb	r0!, {r3, r4, r12, lr}
364	ldmdb	r1!, {r3, r4, r12, lr}
365	stmdb	r0!, {r3, r4, r12, lr}
366	subs	r2, r2, #0x20
367	bge	.Lmemmove_bloop32
368
369.Lmemmove_bl32:
370	cmn	r2, #0x10
371	ittt	ge
372	ldmdbge	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
373	stmdbge	r0!, {r3, r4, r12, lr}
374	subge	r2, r2, #0x10
375	adds	r2, r2, #0x14
376	ittt	ge
377	ldmdbge	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
378	stmdbge	r0!, {r3, r12, lr}
379	subge	r2, r2, #0x0c
380	ldmia	sp!, {r4, lr}
381
382.Lmemmove_bl12:
383	adds	r2, r2, #8
384	blt	.Lmemmove_bl4
385	subs	r2, r2, #4
386	itt	lt
387	ldrlt	r3, [r1, #-4]!
388	strlt	r3, [r0, #-4]!
389	ittt	ge
390	ldmdbge	r1!, {r3, r12}
391	stmdbge	r0!, {r3, r12}
392	subge	r2, r2, #4
393
394.Lmemmove_bl4:
395	/* less than 4 bytes to go */
396	adds	r2, r2, #4
397	it	eq
398	RETeq			/* done */
399
400	/* copy the crud byte at a time */
401	cmp	r2, #2
402	ldrb	r3, [r1, #-1]!
403	strb	r3, [r0, #-1]!
404	itt	ge
405	ldrbge	r3, [r1, #-1]!
406	strbge	r3, [r0, #-1]!
407	itt	gt
408	ldrbgt	r3, [r1, #-1]!
409	strbgt	r3, [r0, #-1]!
410	RET
411
412	/* erg - unaligned destination */
413.Lmemmove_bdestul:
414	cmp	r12, #2
415
416	/* align destination with byte copies */
417	ldrb	r3, [r1, #-1]!
418	strb	r3, [r0, #-1]!
419	itt	ge
420	ldrbge	r3, [r1, #-1]!
421	strbge	r3, [r0, #-1]!
422	itt	gt
423	ldrbgt	r3, [r1, #-1]!
424	strbgt	r3, [r0, #-1]!
425	subs	r2, r2, r12
426	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
427	ands	r12, r1, #3
428	beq	.Lmemmove_bt8		/* we have an aligned source */
429
430	/* erg - unaligned source */
431	/* This is where it gets nasty ... */
432.Lmemmove_bsrcul:
433	bic	r1, r1, #3
434	ldr	r3, [r1, #0]
435	cmp	r12, #2
436	blt	.Lmemmove_bsrcul1
437	beq	.Lmemmove_bsrcul2
438	cmp	r2, #0x0c
439	blt	.Lmemmove_bsrcul3loop4
440	sub	r2, r2, #0x0c
441	stmdb	sp!, {r4, r5, lr}
442
443.Lmemmove_bsrcul3loop16:
444#ifdef __ARMEB__
445	mov	lr, r3, lsr #8
446#else
447	mov	lr, r3, lsl #8
448#endif
449	ldmdb	r1!, {r3-r5, r12}
450#ifdef __ARMEB__
451	orr	lr, lr, r12, lsl #24
452	mov	r12, r12, lsr #8
453	orr	r12, r12, r5, lsl #24
454	mov	r5, r5, lsr #8
455	orr	r5, r5, r4, lsl #24
456	mov	r4, r4, lsr #8
457	orr	r4, r4, r3, lsl #24
458#else
459	orr	lr, lr, r12, lsr #24
460	mov	r12, r12, lsl #8
461	orr	r12, r12, r5, lsr #24
462	mov	r5, r5, lsl #8
463	orr	r5, r5, r4, lsr #24
464	mov	r4, r4, lsl #8
465	orr	r4, r4, r3, lsr #24
466#endif
467	stmdb	r0!, {r4, r5, r12, lr}
468	subs	r2, r2, #0x10
469	bge	.Lmemmove_bsrcul3loop16
470	ldmia	sp!, {r4, r5, lr}
471	adds	r2, r2, #0x0c
472	blt	.Lmemmove_bsrcul3l4
473
474.Lmemmove_bsrcul3loop4:
475#ifdef __ARMEB__
476	mov	r12, r3, lsr #8
477#else
478	mov	r12, r3, lsl #8
479#endif
480	ldr	r3, [r1, #-4]!
481#ifdef __ARMEB__
482	orr	r12, r12, r3, lsl #24
483#else
484	orr	r12, r12, r3, lsr #24
485#endif
486	str	r12, [r0, #-4]!
487	subs	r2, r2, #4
488	bge	.Lmemmove_bsrcul3loop4
489
490.Lmemmove_bsrcul3l4:
491	add	r1, r1, #3
492	b	.Lmemmove_bl4
493
494.Lmemmove_bsrcul2:
495	cmp	r2, #0x0c
496	blt	.Lmemmove_bsrcul2loop4
497	sub	r2, r2, #0x0c
498	stmdb	sp!, {r4, r5, lr}
499
500.Lmemmove_bsrcul2loop16:
501#ifdef __ARMEB__
502	mov	lr, r3, lsr #16
503#else
504	mov	lr, r3, lsl #16
505#endif
506	ldmdb	r1!, {r3-r5, r12}
507#ifdef __ARMEB__
508	orr	lr, lr, r12, lsl #16
509	mov	r12, r12, lsr #16
510	orr	r12, r12, r5, lsl #16
511	mov	r5, r5, lsr #16
512	orr	r5, r5, r4, lsl #16
513	mov	r4, r4, lsr #16
514	orr	r4, r4, r3, lsl #16
515#else
516	orr	lr, lr, r12, lsr #16
517	mov	r12, r12, lsl #16
518	orr	r12, r12, r5, lsr #16
519	mov	r5, r5, lsl #16
520	orr	r5, r5, r4, lsr #16
521	mov	r4, r4, lsl #16
522	orr	r4, r4, r3, lsr #16
523#endif
524	stmdb	r0!, {r4, r5, r12, lr}
525	subs	r2, r2, #0x10
526	bge	.Lmemmove_bsrcul2loop16
527	ldmia	sp!, {r4, r5, lr}
528	adds	r2, r2, #0x0c
529	blt	.Lmemmove_bsrcul2l4
530
531.Lmemmove_bsrcul2loop4:
532#ifdef __ARMEB__
533	mov	r12, r3, lsr #16
534#else
535	mov	r12, r3, lsl #16
536#endif
537	ldr	r3, [r1, #-4]!
538#ifdef __ARMEB__
539	orr	r12, r12, r3, lsl #16
540#else
541	orr	r12, r12, r3, lsr #16
542#endif
543	str	r12, [r0, #-4]!
544	subs	r2, r2, #4
545	bge	.Lmemmove_bsrcul2loop4
546
547.Lmemmove_bsrcul2l4:
548	add	r1, r1, #2
549	b	.Lmemmove_bl4
550
551.Lmemmove_bsrcul1:
552	cmp	r2, #0x0c
553	blt	.Lmemmove_bsrcul1loop4
554	sub	r2, r2, #0x0c
555	stmdb	sp!, {r4, r5, lr}
556
557.Lmemmove_bsrcul1loop32:
558#ifdef __ARMEB__
559	mov	lr, r3, lsr #24
560#else
561	mov	lr, r3, lsl #24
562#endif
563	ldmdb	r1!, {r3-r5, r12}
564#ifdef __ARMEB__
565	orr	lr, lr, r12, lsl #8
566	mov	r12, r12, lsr #24
567	orr	r12, r12, r5, lsl #8
568	mov	r5, r5, lsr #24
569	orr	r5, r5, r4, lsl #8
570	mov	r4, r4, lsr #24
571	orr	r4, r4, r3, lsl #8
572#else
573	orr	lr, lr, r12, lsr #8
574	mov	r12, r12, lsl #24
575	orr	r12, r12, r5, lsr #8
576	mov	r5, r5, lsl #24
577	orr	r5, r5, r4, lsr #8
578	mov	r4, r4, lsl #24
579	orr	r4, r4, r3, lsr #8
580#endif
581	stmdb	r0!, {r4, r5, r12, lr}
582	subs	r2, r2, #0x10
583	bge	.Lmemmove_bsrcul1loop32
584	ldmia	sp!, {r4, r5, lr}
585	adds	r2, r2, #0x0c
586	blt	.Lmemmove_bsrcul1l4
587
588.Lmemmove_bsrcul1loop4:
589#ifdef __ARMEB__
590	mov	r12, r3, lsr #24
591#else
592	mov	r12, r3, lsl #24
593#endif
594	ldr	r3, [r1, #-4]!
595#ifdef __ARMEB__
596	orr	r12, r12, r3, lsl #8
597#else
598	orr	r12, r12, r3, lsr #8
599#endif
600	str	r12, [r0, #-4]!
601	subs	r2, r2, #4
602	bge	.Lmemmove_bsrcul1loop4
603
604.Lmemmove_bsrcul1l4:
605	add	r1, r1, #1
606	b	.Lmemmove_bl4
607#ifndef _BCOPY
608END(memmove)
609#else
610END(bcopy)
611#endif
612
613	.section .note.GNU-stack,"",%progbits
614