memmove.S revision 204607
1193326Sed/*	$NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $	*/
2193326Sed
3193326Sed/*-
4193326Sed * Copyright (c) 1997 The NetBSD Foundation, Inc.
5193326Sed * All rights reserved.
6193326Sed *
7193326Sed * This code is derived from software contributed to The NetBSD Foundation
8193326Sed * by Neil A. Carson and Mark Brinicombe
9193326Sed *
10193326Sed * Redistribution and use in source and binary forms, with or without
11193326Sed * modification, are permitted provided that the following conditions
12193326Sed * are met:
13193326Sed * 1. Redistributions of source code must retain the above copyright
14193326Sed *    notice, this list of conditions and the following disclaimer.
15239462Sdim * 2. Redistributions in binary form must reproduce the above copyright
16249423Sdim *    notice, this list of conditions and the following disclaimer in the
17193326Sed *    documentation and/or other materials provided with the distribution.
18193326Sed *
19218893Sdim * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20207619Srdivacky * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21218893Sdim * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22249423Sdim * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23249423Sdim * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24249423Sdim * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25234353Sdim * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26249423Sdim * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27193326Sed * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28193326Sed * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29193326Sed * POSSIBILITY OF SUCH DAMAGE.
30193326Sed */
31193326Sed
32193326Sed#include <machine/asm.h>
33193326Sed__FBSDID("$FreeBSD: head/lib/libc/arm/string/memmove.S 204607 2010-03-02 22:16:40Z joel $");
34199990Srdivacky
35226633Sdim#ifndef _BCOPY
36193326Sed/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
37193326SedENTRY(memmove)
38193326Sed#else
39193326Sed/* bcopy = memcpy/memmove with arguments reversed. */
40193326Sed/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
41239462SdimENTRY(bcopy)
42195341Sed	/* switch the source and destination registers */
43193326Sed	eor     r0, r1, r0
44239462Sdim	eor     r1, r0, r1
45198092Srdivacky	eor     r0, r1, r0
46193326Sed#endif
47193326Sed	/* Do the buffers overlap? */
48193326Sed	cmp	r0, r1
49193326Sed	RETeq		/* Bail now if src/dst are the same */
50193326Sed	subcc	r3, r0, r1	/* if (dst > src) r3 = dst - src */
51193326Sed	subcs	r3, r1, r0	/* if (src > dsr) r3 = src - dst */
52193326Sed	cmp	r3, r2		/* if (r3 < len) we have an overlap */
53193326Sed	bcc	PIC_SYM(_C_LABEL(memcpy), PLT)
54193326Sed
55193326Sed	/* Determine copy direction */
56193326Sed	cmp	r1, r0
57193326Sed	bcc	.Lmemmove_backwards
58193326Sed
59193326Sed	moveq	r0, #0			/* Quick abort for len=0 */
60193326Sed	RETeq
61193326Sed
62193326Sed	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
63193326Sed	subs	r2, r2, #4
64193326Sed	blt	.Lmemmove_fl4		/* less than 4 bytes */
65193326Sed	ands	r12, r0, #3
66193326Sed	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
67243830Sdim	ands	r12, r1, #3
68193326Sed	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
69193326Sed
70218893Sdim.Lmemmove_ft8:
71221345Sdim	/* We have aligned source and destination */
72221345Sdim	subs	r2, r2, #8
73198092Srdivacky	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
74193326Sed	subs	r2, r2, #0x14
75193326Sed	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
76193326Sed	stmdb	sp!, {r4}		/* borrow r4 */
77193326Sed
78193326Sed	/* blat 32 bytes at a time */
79193326Sed	/* XXX for really big copies perhaps we should use more registers */
80198092Srdivacky.Lmemmove_floop32:
81226633Sdim	ldmia	r1!, {r3, r4, r12, lr}
82193326Sed	stmia	r0!, {r3, r4, r12, lr}
83193326Sed	ldmia	r1!, {r3, r4, r12, lr}
84193326Sed	stmia	r0!, {r3, r4, r12, lr}
85193326Sed	subs	r2, r2, #0x20
86198092Srdivacky	bge	.Lmemmove_floop32
87198092Srdivacky
88193326Sed	cmn	r2, #0x10
89193326Sed	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
90193326Sed	stmgeia	r0!, {r3, r4, r12, lr}
91193326Sed	subge	r2, r2, #0x10
92212904Sdim	ldmia	sp!, {r4}		/* return r4 */
93218893Sdim
94212904Sdim.Lmemmove_fl32:
95212904Sdim	adds	r2, r2, #0x14
96218893Sdim
97212904Sdim	/* blat 12 bytes at a time */
98212904Sdim.Lmemmove_floop12:
99212904Sdim	ldmgeia	r1!, {r3, r12, lr}
100198092Srdivacky	stmgeia	r0!, {r3, r12, lr}
101212904Sdim	subges	r2, r2, #0x0c
102193326Sed	bge	.Lmemmove_floop12
103193326Sed
104208600Srdivacky.Lmemmove_fl12:
105193326Sed	adds	r2, r2, #8
106193326Sed	blt	.Lmemmove_fl4
107193326Sed
108193326Sed	subs	r2, r2, #4
109193326Sed	ldrlt	r3, [r1], #4
110193326Sed	strlt	r3, [r0], #4
111193326Sed	ldmgeia	r1!, {r3, r12}
112193326Sed	stmgeia	r0!, {r3, r12}
113193326Sed	subge	r2, r2, #4
114193326Sed
115193326Sed.Lmemmove_fl4:
116193326Sed	/* less than 4 bytes to go */
117193326Sed	adds	r2, r2, #4
118193326Sed	ldmeqia	sp!, {r0, pc}		/* done */
119198092Srdivacky
120193326Sed	/* copy the crud byte at a time */
121193326Sed	cmp	r2, #2
122193326Sed	ldrb	r3, [r1], #1
123193326Sed	strb	r3, [r0], #1
124195341Sed	ldrgeb	r3, [r1], #1
125193326Sed	strgeb	r3, [r0], #1
126193326Sed	ldrgtb	r3, [r1], #1
127243830Sdim	strgtb	r3, [r0], #1
128243830Sdim	ldmia	sp!, {r0, pc}
129226633Sdim
130198092Srdivacky	/* erg - unaligned destination */
131193326Sed.Lmemmove_fdestul:
132193326Sed	rsb	r12, r12, #4
133195341Sed	cmp	r12, #2
134193326Sed
135193326Sed	/* align destination with byte copies */
136193326Sed	ldrb	r3, [r1], #1
137193326Sed	strb	r3, [r0], #1
138193326Sed	ldrgeb	r3, [r1], #1
139193326Sed	strgeb	r3, [r0], #1
140193326Sed	ldrgtb	r3, [r1], #1
141193326Sed	strgtb	r3, [r0], #1
142193326Sed	subs	r2, r2, r12
143193326Sed	blt	.Lmemmove_fl4		/* less the 4 bytes */
144193326Sed
145193326Sed	ands	r12, r1, #3
146193326Sed	beq	.Lmemmove_ft8		/* we have an aligned source */
147193326Sed
148193326Sed	/* erg - unaligned source */
149193326Sed	/* This is where it gets nasty ... */
150193326Sed.Lmemmove_fsrcul:
151193326Sed	bic	r1, r1, #3
152193326Sed	ldr	lr, [r1], #4
153193326Sed	cmp	r12, #2
154193326Sed	bgt	.Lmemmove_fsrcul3
155193326Sed	beq	.Lmemmove_fsrcul2
156193326Sed	cmp	r2, #0x0c
157193326Sed	blt	.Lmemmove_fsrcul1loop4
158193326Sed	sub	r2, r2, #0x0c
159193326Sed	stmdb	sp!, {r4, r5}
160198092Srdivacky
161193326Sed.Lmemmove_fsrcul1loop16:
162193326Sed#ifdef __ARMEB__
163193326Sed	mov	r3, lr, lsl #8
164193326Sed#else
165193326Sed	mov	r3, lr, lsr #8
166193326Sed#endif
167193326Sed	ldmia	r1!, {r4, r5, r12, lr}
168193326Sed#ifdef __ARMEB__
169193326Sed	orr	r3, r3, r4, lsr #24
170193326Sed	mov	r4, r4, lsl #8
171193326Sed	orr	r4, r4, r5, lsr #24
172193326Sed	mov	r5, r5, lsl #8
173193326Sed	orr	r5, r5, r12, lsr #24
174234982Sdim	mov	r12, r12, lsl #8
175234982Sdim	orr	r12, r12, lr, lsr #24
176234982Sdim#else
177239462Sdim	orr	r3, r3, r4, lsl #24
178239462Sdim	mov	r4, r4, lsr #8
179239462Sdim	orr	r4, r4, r5, lsl #24
180234982Sdim	mov	r5, r5, lsr #8
181234982Sdim	orr	r5, r5, r12, lsl #24
182234982Sdim	mov	r12, r12, lsr #8
183234982Sdim	orr	r12, r12, lr, lsl #24
184234982Sdim#endif
185239462Sdim	stmia	r0!, {r3-r5, r12}
186234982Sdim	subs	r2, r2, #0x10
187234982Sdim	bge	.Lmemmove_fsrcul1loop16
188234982Sdim	ldmia	sp!, {r4, r5}
189234982Sdim	adds	r2, r2, #0x0c
190234982Sdim	blt	.Lmemmove_fsrcul1l4
191193326Sed
192193326Sed.Lmemmove_fsrcul1loop4:
193243830Sdim#ifdef __ARMEB__
194243830Sdim	mov	r12, lr, lsl #8
195243830Sdim#else
196243830Sdim	mov	r12, lr, lsr #8
197193326Sed#endif
198198092Srdivacky	ldr	lr, [r1], #4
199193326Sed#ifdef __ARMEB__
200193326Sed	orr	r12, r12, lr, lsr #24
201193326Sed#else
202193326Sed	orr	r12, r12, lr, lsl #24
203193326Sed#endif
204193326Sed	str	r12, [r0], #4
205193326Sed	subs	r2, r2, #4
206193326Sed	bge	.Lmemmove_fsrcul1loop4
207193326Sed
208198092Srdivacky.Lmemmove_fsrcul1l4:
209193326Sed	sub	r1, r1, #3
210193326Sed	b	.Lmemmove_fl4
211198092Srdivacky
212193326Sed.Lmemmove_fsrcul2:
213193326Sed	cmp	r2, #0x0c
214193326Sed	blt	.Lmemmove_fsrcul2loop4
215193326Sed	sub	r2, r2, #0x0c
216193326Sed	stmdb	sp!, {r4, r5}
217193326Sed
218193326Sed.Lmemmove_fsrcul2loop16:
219193326Sed#ifdef __ARMEB__
220193326Sed	mov	r3, lr, lsl #16
221193326Sed#else
222193326Sed	mov	r3, lr, lsr #16
223193326Sed#endif
224193326Sed	ldmia	r1!, {r4, r5, r12, lr}
225193326Sed#ifdef __ARMEB__
226193326Sed	orr	r3, r3, r4, lsr #16
227193326Sed	mov	r4, r4, lsl #16
228193326Sed	orr	r4, r4, r5, lsr #16
229193326Sed	mov	r5, r5, lsl #16
230193326Sed	orr	r5, r5, r12, lsr #16
231193326Sed	mov	r12, r12, lsl #16
232193326Sed	orr	r12, r12, lr, lsr #16
233243830Sdim#else
234243830Sdim	orr	r3, r3, r4, lsl #16
235243830Sdim	mov	r4, r4, lsr #16
236243830Sdim	orr	r4, r4, r5, lsl #16
237193326Sed	mov	r5, r5, lsr #16
238198092Srdivacky	orr	r5, r5, r12, lsl #16
239193326Sed	mov	r12, r12, lsr #16
240193326Sed	orr	r12, r12, lr, lsl #16
241193326Sed#endif
242193326Sed	stmia	r0!, {r3-r5, r12}
243193326Sed	subs	r2, r2, #0x10
244193326Sed	bge	.Lmemmove_fsrcul2loop16
245193326Sed	ldmia	sp!, {r4, r5}
246193326Sed	adds	r2, r2, #0x0c
247193326Sed	blt	.Lmemmove_fsrcul2l4
248193326Sed
249193326Sed.Lmemmove_fsrcul2loop4:
250193326Sed#ifdef __ARMEB__
251193326Sed	mov	r12, lr, lsl #16
252243830Sdim#else
253243830Sdim	mov	r12, lr, lsr #16
254243830Sdim#endif
255243830Sdim	ldr	lr, [r1], #4
256193326Sed#ifdef __ARMEB__
257193326Sed	orr	r12, r12, lr, lsr #16
258193326Sed#else
259193326Sed	orr	r12, r12, lr, lsl #16
260193326Sed#endif
261193326Sed	str	r12, [r0], #4
262193326Sed	subs	r2, r2, #4
263193326Sed	bge	.Lmemmove_fsrcul2loop4
264193326Sed
265193326Sed.Lmemmove_fsrcul2l4:
266193326Sed	sub	r1, r1, #2
267193326Sed	b	.Lmemmove_fl4
268193326Sed
269193326Sed.Lmemmove_fsrcul3:
270198092Srdivacky	cmp	r2, #0x0c
271193326Sed	blt	.Lmemmove_fsrcul3loop4
272193326Sed	sub	r2, r2, #0x0c
273193326Sed	stmdb	sp!, {r4, r5}
274193326Sed
275193326Sed.Lmemmove_fsrcul3loop16:
276193326Sed#ifdef __ARMEB__
277193326Sed	mov	r3, lr, lsl #24
278193326Sed#else
279193326Sed	mov	r3, lr, lsr #24
280193326Sed#endif
281193326Sed	ldmia	r1!, {r4, r5, r12, lr}
282193326Sed#ifdef __ARMEB__
283193326Sed	orr	r3, r3, r4, lsr #8
284193326Sed	mov	r4, r4, lsl #24
285193326Sed	orr	r4, r4, r5, lsr #8
286193326Sed	mov	r5, r5, lsl #24
287193326Sed	orr	r5, r5, r12, lsr #8
288193326Sed	mov	r12, r12, lsl #24
289193326Sed	orr	r12, r12, lr, lsr #8
290193326Sed#else
291193326Sed	orr	r3, r3, r4, lsl #8
292193326Sed	mov	r4, r4, lsr #24
293193326Sed	orr	r4, r4, r5, lsl #8
294193326Sed	mov	r5, r5, lsr #24
295198092Srdivacky	orr	r5, r5, r12, lsl #8
296193326Sed	mov	r12, r12, lsr #24
297193326Sed	orr	r12, r12, lr, lsl #8
298193326Sed#endif
299193326Sed	stmia	r0!, {r3-r5, r12}
300193326Sed	subs	r2, r2, #0x10
301193326Sed	bge	.Lmemmove_fsrcul3loop16
302193326Sed	ldmia	sp!, {r4, r5}
303193326Sed	adds	r2, r2, #0x0c
304193326Sed	blt	.Lmemmove_fsrcul3l4
305193326Sed
306193326Sed.Lmemmove_fsrcul3loop4:
307193326Sed#ifdef __ARMEB__
308193326Sed	mov	r12, lr, lsl #24
309193326Sed#else
310193326Sed	mov	r12, lr, lsr #24
311193326Sed#endif
312193326Sed	ldr	lr, [r1], #4
313193326Sed#ifdef __ARMEB__
314198092Srdivacky	orr	r12, r12, lr, lsr #8
315193326Sed#else
316193326Sed	orr	r12, r12, lr, lsl #8
317193326Sed#endif
318193326Sed	str	r12, [r0], #4
319193326Sed	subs	r2, r2, #4
320193326Sed	bge	.Lmemmove_fsrcul3loop4
321193326Sed
322193326Sed.Lmemmove_fsrcul3l4:
323193326Sed	sub	r1, r1, #1
324221345Sdim	b	.Lmemmove_fl4
325221345Sdim
326221345Sdim.Lmemmove_backwards:
327221345Sdim	add	r1, r1, r2
328221345Sdim	add	r0, r0, r2
329221345Sdim	subs	r2, r2, #4
330221345Sdim	blt	.Lmemmove_bl4		/* less than 4 bytes */
331221345Sdim	ands	r12, r0, #3
332221345Sdim	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
333221345Sdim	ands	r12, r1, #3
334221345Sdim	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
335221345Sdim
336234353Sdim.Lmemmove_bt8:
337234353Sdim	/* We have aligned source and destination */
338234353Sdim	subs	r2, r2, #8
339234353Sdim	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
340234353Sdim	stmdb	sp!, {r4, lr}
341234353Sdim	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
342234353Sdim	blt	.Lmemmove_bl32
343234353Sdim
344234353Sdim	/* blat 32 bytes at a time */
345234353Sdim	/* XXX for really big copies perhaps we should use more registers */
346234353Sdim.Lmemmove_bloop32:
347234353Sdim	ldmdb	r1!, {r3, r4, r12, lr}
348234353Sdim	stmdb	r0!, {r3, r4, r12, lr}
349234353Sdim	ldmdb	r1!, {r3, r4, r12, lr}
350234353Sdim	stmdb	r0!, {r3, r4, r12, lr}
351234353Sdim	subs	r2, r2, #0x20
352193326Sed	bge	.Lmemmove_bloop32
353193326Sed
354193326Sed.Lmemmove_bl32:
355193326Sed	cmn	r2, #0x10
356193326Sed	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
357193326Sed	stmgedb	r0!, {r3, r4, r12, lr}
358193326Sed	subge	r2, r2, #0x10
359193326Sed	adds	r2, r2, #0x14
360193326Sed	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
361193326Sed	stmgedb	r0!, {r3, r12, lr}
362193326Sed	subge	r2, r2, #0x0c
363193326Sed	ldmia	sp!, {r4, lr}
364193326Sed
365193326Sed.Lmemmove_bl12:
366193326Sed	adds	r2, r2, #8
367193326Sed	blt	.Lmemmove_bl4
368193326Sed	subs	r2, r2, #4
369193326Sed	ldrlt	r3, [r1, #-4]!
370193326Sed	strlt	r3, [r0, #-4]!
371193326Sed	ldmgedb	r1!, {r3, r12}
372193326Sed	stmgedb	r0!, {r3, r12}
373193326Sed	subge	r2, r2, #4
374193326Sed
375193326Sed.Lmemmove_bl4:
376193326Sed	/* less than 4 bytes to go */
377193326Sed	adds	r2, r2, #4
378193326Sed	RETeq			/* done */
379193326Sed
380193326Sed	/* copy the crud byte at a time */
381243830Sdim	cmp	r2, #2
382193326Sed	ldrb	r3, [r1, #-1]!
383198092Srdivacky	strb	r3, [r0, #-1]!
384193326Sed	ldrgeb	r3, [r1, #-1]!
385193326Sed	strgeb	r3, [r0, #-1]!
386198092Srdivacky	ldrgtb	r3, [r1, #-1]!
387193326Sed	strgtb	r3, [r0, #-1]!
388193326Sed	RET
389198092Srdivacky
390193326Sed	/* erg - unaligned destination */
391193326Sed.Lmemmove_bdestul:
392193326Sed	cmp	r12, #2
393193326Sed
394198092Srdivacky	/* align destination with byte copies */
395193326Sed	ldrb	r3, [r1, #-1]!
396193326Sed	strb	r3, [r0, #-1]!
397193326Sed	ldrgeb	r3, [r1, #-1]!
398198092Srdivacky	strgeb	r3, [r0, #-1]!
399193326Sed	ldrgtb	r3, [r1, #-1]!
400193326Sed	strgtb	r3, [r0, #-1]!
401193326Sed	subs	r2, r2, r12
402193326Sed	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
403193326Sed	ands	r12, r1, #3
404198092Srdivacky	beq	.Lmemmove_bt8		/* we have an aligned source */
405193326Sed
406193326Sed	/* erg - unaligned source */
407193326Sed	/* This is where it gets nasty ... */
408193326Sed.Lmemmove_bsrcul:
409198092Srdivacky	bic	r1, r1, #3
410193326Sed	ldr	r3, [r1, #0]
411193326Sed	cmp	r12, #2
412193326Sed	blt	.Lmemmove_bsrcul1
413198092Srdivacky	beq	.Lmemmove_bsrcul2
414193326Sed	cmp	r2, #0x0c
415193326Sed	blt	.Lmemmove_bsrcul3loop4
416193326Sed	sub	r2, r2, #0x0c
417198092Srdivacky	stmdb	sp!, {r4, r5, lr}
418193326Sed
419193326Sed.Lmemmove_bsrcul3loop16:
420193326Sed#ifdef __ARMEB__
421193326Sed	mov	lr, r3, lsr #8
422193326Sed#else
423198092Srdivacky	mov	lr, r3, lsl #8
424193326Sed#endif
425193326Sed	ldmdb	r1!, {r3-r5, r12}
426193326Sed#ifdef __ARMEB__
427193326Sed	orr	lr, lr, r12, lsl #24
428198092Srdivacky	mov	r12, r12, lsr #8
429193326Sed	orr	r12, r12, r5, lsl #24
430193326Sed	mov	r5, r5, lsr #8
431193326Sed	orr	r5, r5, r4, lsl #24
432198092Srdivacky	mov	r4, r4, lsr #8
433193326Sed	orr	r4, r4, r3, lsl #24
434193326Sed#else
435193326Sed	orr	lr, lr, r12, lsr #24
436198092Srdivacky	mov	r12, r12, lsl #8
437243830Sdim	orr	r12, r12, r5, lsr #24
438193326Sed	mov	r5, r5, lsl #8
439198092Srdivacky	orr	r5, r5, r4, lsr #24
440193326Sed	mov	r4, r4, lsl #8
441193326Sed	orr	r4, r4, r3, lsr #24
442193326Sed#endif
443239462Sdim	stmdb	r0!, {r4, r5, r12, lr}
444239462Sdim	subs	r2, r2, #0x10
445239462Sdim	bge	.Lmemmove_bsrcul3loop16
446239462Sdim	ldmia	sp!, {r4, r5, lr}
447239462Sdim	adds	r2, r2, #0x0c
448251662Sdim	blt	.Lmemmove_bsrcul3l4
449239462Sdim
450239462Sdim.Lmemmove_bsrcul3loop4:
451239462Sdim#ifdef __ARMEB__
452239462Sdim	mov	r12, r3, lsr #8
453251662Sdim#else
454251662Sdim	mov	r12, r3, lsl #8
455251662Sdim#endif
456251662Sdim	ldr	r3, [r1, #-4]!
457193326Sed#ifdef __ARMEB__
458193326Sed	orr	r12, r12, r3, lsl #24
459193326Sed#else
460193326Sed	orr	r12, r12, r3, lsr #24
461193326Sed#endif
462193326Sed	str	r12, [r0, #-4]!
463198092Srdivacky	subs	r2, r2, #4
464207619Srdivacky	bge	.Lmemmove_bsrcul3loop4
465207619Srdivacky
466193326Sed.Lmemmove_bsrcul3l4:
467193326Sed	add	r1, r1, #3
468193326Sed	b	.Lmemmove_bl4
469193326Sed
470193326Sed.Lmemmove_bsrcul2:
471193326Sed	cmp	r2, #0x0c
472198092Srdivacky	blt	.Lmemmove_bsrcul2loop4
473198092Srdivacky	sub	r2, r2, #0x0c
474198092Srdivacky	stmdb	sp!, {r4, r5, lr}
475198092Srdivacky
476193326Sed.Lmemmove_bsrcul2loop16:
477198092Srdivacky#ifdef __ARMEB__
478198092Srdivacky	mov	lr, r3, lsr #16
479198092Srdivacky#else
480193326Sed	mov	lr, r3, lsl #16
481193326Sed#endif
482193326Sed	ldmdb	r1!, {r3-r5, r12}
483198092Srdivacky#ifdef __ARMEB__
484193326Sed	orr	lr, lr, r12, lsl #16
485193326Sed	mov	r12, r12, lsr #16
486193326Sed	orr	r12, r12, r5, lsl #16
487193326Sed	mov	r5, r5, lsr #16
488193326Sed	orr	r5, r5, r4, lsl #16
489193326Sed	mov	r4, r4, lsr #16
490193326Sed	orr	r4, r4, r3, lsl #16
491193326Sed#else
492193326Sed	orr	lr, lr, r12, lsr #16
493193326Sed	mov	r12, r12, lsl #16
494193326Sed	orr	r12, r12, r5, lsr #16
495193326Sed	mov	r5, r5, lsl #16
496193326Sed	orr	r5, r5, r4, lsr #16
497193326Sed	mov	r4, r4, lsl #16
498193326Sed	orr	r4, r4, r3, lsr #16
499193326Sed#endif
500193326Sed	stmdb	r0!, {r4, r5, r12, lr}
501193326Sed	subs	r2, r2, #0x10
502193326Sed	bge	.Lmemmove_bsrcul2loop16
503193326Sed	ldmia	sp!, {r4, r5, lr}
504193326Sed	adds	r2, r2, #0x0c
505193326Sed	blt	.Lmemmove_bsrcul2l4
506193326Sed
507193326Sed.Lmemmove_bsrcul2loop4:
508193326Sed#ifdef __ARMEB__
509193326Sed	mov	r12, r3, lsr #16
510224145Sdim#else
511224145Sdim	mov	r12, r3, lsl #16
512224145Sdim#endif
513224145Sdim	ldr	r3, [r1, #-4]!
514224145Sdim#ifdef __ARMEB__
515224145Sdim	orr	r12, r12, r3, lsl #16
516193326Sed#else
517193326Sed	orr	r12, r12, r3, lsr #16
518193326Sed#endif
519193326Sed	str	r12, [r0, #-4]!
520193326Sed	subs	r2, r2, #4
521193326Sed	bge	.Lmemmove_bsrcul2loop4
522193326Sed
523193326Sed.Lmemmove_bsrcul2l4:
524193326Sed	add	r1, r1, #2
525193326Sed	b	.Lmemmove_bl4
526193326Sed
527193326Sed.Lmemmove_bsrcul1:
528193326Sed	cmp	r2, #0x0c
529193326Sed	blt	.Lmemmove_bsrcul1loop4
530193326Sed	sub	r2, r2, #0x0c
531193326Sed	stmdb	sp!, {r4, r5, lr}
532193326Sed
533193326Sed.Lmemmove_bsrcul1loop32:
534193326Sed#ifdef __ARMEB__
535198092Srdivacky	mov	lr, r3, lsr #24
536193326Sed#else
537193326Sed	mov	lr, r3, lsl #24
538193326Sed#endif
539193326Sed	ldmdb	r1!, {r3-r5, r12}
540193326Sed#ifdef __ARMEB__
541193326Sed	orr	lr, lr, r12, lsl #8
542221345Sdim	mov	r12, r12, lsr #24
543221345Sdim	orr	r12, r12, r5, lsl #8
544221345Sdim	mov	r5, r5, lsr #24
545221345Sdim	orr	r5, r5, r4, lsl #8
546221345Sdim	mov	r4, r4, lsr #24
547221345Sdim	orr	r4, r4, r3, lsl #8
548221345Sdim#else
549221345Sdim	orr	lr, lr, r12, lsr #8
550221345Sdim	mov	r12, r12, lsl #24
551221345Sdim	orr	r12, r12, r5, lsr #8
552221345Sdim	mov	r5, r5, lsl #24
553221345Sdim	orr	r5, r5, r4, lsr #8
554221345Sdim	mov	r4, r4, lsl #24
555221345Sdim	orr	r4, r4, r3, lsr #8
556221345Sdim#endif
557221345Sdim	stmdb	r0!, {r4, r5, r12, lr}
558221345Sdim	subs	r2, r2, #0x10
559221345Sdim	bge	.Lmemmove_bsrcul1loop32
560221345Sdim	ldmia	sp!, {r4, r5, lr}
561221345Sdim	adds	r2, r2, #0x0c
562221345Sdim	blt	.Lmemmove_bsrcul1l4
563221345Sdim
564221345Sdim.Lmemmove_bsrcul1loop4:
565221345Sdim#ifdef __ARMEB__
566221345Sdim	mov	r12, r3, lsr #24
567221345Sdim#else
568221345Sdim	mov	r12, r3, lsl #24
569221345Sdim#endif
570221345Sdim	ldr	r3, [r1, #-4]!
571221345Sdim#ifdef __ARMEB__
572221345Sdim	orr	r12, r12, r3, lsl #8
573221345Sdim#else
574221345Sdim	orr	r12, r12, r3, lsr #8
575221345Sdim#endif
576221345Sdim	str	r12, [r0, #-4]!
577221345Sdim	subs	r2, r2, #4
578221345Sdim	bge	.Lmemmove_bsrcul1loop4
579221345Sdim
580221345Sdim.Lmemmove_bsrcul1l4:
581221345Sdim	add	r1, r1, #1
582193326Sed	b	.Lmemmove_bl4
583193326Sed