support.S revision 144967
1129198Scognet/*-
2129198Scognet * Copyright (c) 2004 Olivier Houchard
3129198Scognet * All rights reserved.
4129198Scognet *
5129198Scognet * Redistribution and use in source and binary forms, with or without
6129198Scognet * modification, are permitted provided that the following conditions
7129198Scognet * are met:
8129198Scognet * 1. Redistributions of source code must retain the above copyright
9129198Scognet *    notice, this list of conditions and the following disclaimer.
10129198Scognet * 2. Redistributions in binary form must reproduce the above copyright
11129198Scognet *    notice, this list of conditions and the following disclaimer in the
12129198Scognet *    documentation and/or other materials provided with the distribution.
13129198Scognet *
14129198Scognet * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15129198Scognet * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16129198Scognet * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17129198Scognet * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18129198Scognet * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19129198Scognet * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20129198Scognet * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21129198Scognet * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22129198Scognet * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23129198Scognet * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24129198Scognet * SUCH DAMAGE.
25129198Scognet */
26129198Scognet
27129198Scognet#include <machine/asm.h>
28129198Scognet#include <machine/asmacros.h>
29129198Scognet__FBSDID("$FreeBSD: head/sys/arm/arm/support.S 144967 2005-04-12 22:46:09Z cognet $");
30129198Scognet
31129198Scognet#include "assym.s"
32129198Scognet
33129198Scognet/*
34129250Scognet * memset: Sets a block of memory to the specified value
35129250Scognet *
36129250Scognet * On entry:
37129250Scognet *   r0 - dest address
38129250Scognet *   r1 - byte to write
39129250Scognet *   r2 - number of bytes to write
40129250Scognet *
41129250Scognet * On exit:
42129250Scognet *   r0 - dest address
43129250Scognet */
44129250Scognet/* LINTSTUB: Func: void bzero(void *, size_t) */
45129250ScognetENTRY(bzero)
46129250Scognet	mov	r3, #0x00
47129250Scognet	b	do_memset
48129250Scognet
49129250Scognet/* LINTSTUB: Func: void *memset(void *, int, size_t) */
50129250ScognetENTRY(memset)
51129250Scognet	and	r3, r1, #0xff		/* We deal with bytes */
52129250Scognet	mov	r1, r2
53129250Scognetdo_memset:
54129250Scognet	cmp	r1, #0x04		/* Do we have less than 4 bytes */
55129250Scognet	mov	ip, r0
56129250Scognet	blt	.Lmemset_lessthanfour
57129250Scognet
58129250Scognet	/* Ok first we will word align the address */
59129250Scognet	ands	r2, ip, #0x03		/* Get the bottom two bits */
60129250Scognet	bne	.Lmemset_wordunaligned	/* The address is not word aligned */
61129250Scognet
62129250Scognet	/* We are now word aligned */
63129250Scognet.Lmemset_wordaligned:
64129250Scognet	orr	r3, r3, r3, lsl #8	/* Extend value to 16-bits */
65129250Scognet#ifdef __XSCALE__
66129250Scognet	tst	ip, #0x04		/* Quad-align for Xscale */
67129250Scognet#else
68129250Scognet	cmp	r1, #0x10
69129250Scognet#endif
70129250Scognet	orr	r3, r3, r3, lsl #16	/* Extend value to 32-bits */
71129250Scognet#ifdef __XSCALE__
72129250Scognet	subne	r1, r1, #0x04		/* Quad-align if necessary */
73129250Scognet	strne	r3, [ip], #0x04
74129250Scognet	cmp	r1, #0x10
75129250Scognet#endif
76129250Scognet	blt	.Lmemset_loop4		/* If less than 16 then use words */
77129250Scognet	mov	r2, r3			/* Duplicate data */
78129250Scognet	cmp	r1, #0x80		/* If < 128 then skip the big loop */
79129250Scognet	blt	.Lmemset_loop32
80129250Scognet
81129250Scognet	/* Do 128 bytes at a time */
82129250Scognet.Lmemset_loop128:
83129250Scognet	subs	r1, r1, #0x80
84129250Scognet#ifdef __XSCALE__
85129250Scognet	strged	r2, [ip], #0x08
86129250Scognet	strged	r2, [ip], #0x08
87129250Scognet	strged	r2, [ip], #0x08
88129250Scognet	strged	r2, [ip], #0x08
89129250Scognet	strged	r2, [ip], #0x08
90129250Scognet	strged	r2, [ip], #0x08
91129250Scognet	strged	r2, [ip], #0x08
92129250Scognet	strged	r2, [ip], #0x08
93129250Scognet	strged	r2, [ip], #0x08
94129250Scognet	strged	r2, [ip], #0x08
95129250Scognet	strged	r2, [ip], #0x08
96129250Scognet	strged	r2, [ip], #0x08
97129250Scognet	strged	r2, [ip], #0x08
98129250Scognet	strged	r2, [ip], #0x08
99129250Scognet	strged	r2, [ip], #0x08
100129250Scognet	strged	r2, [ip], #0x08
101129250Scognet#else
102129250Scognet	stmgeia	ip!, {r2-r3}
103129250Scognet	stmgeia	ip!, {r2-r3}
104129250Scognet	stmgeia	ip!, {r2-r3}
105129250Scognet	stmgeia	ip!, {r2-r3}
106129250Scognet	stmgeia	ip!, {r2-r3}
107129250Scognet	stmgeia	ip!, {r2-r3}
108129250Scognet	stmgeia	ip!, {r2-r3}
109129250Scognet	stmgeia	ip!, {r2-r3}
110129250Scognet	stmgeia	ip!, {r2-r3}
111129250Scognet	stmgeia	ip!, {r2-r3}
112129250Scognet	stmgeia	ip!, {r2-r3}
113129250Scognet	stmgeia	ip!, {r2-r3}
114129250Scognet	stmgeia	ip!, {r2-r3}
115129250Scognet	stmgeia	ip!, {r2-r3}
116129250Scognet	stmgeia	ip!, {r2-r3}
117129250Scognet	stmgeia	ip!, {r2-r3}
118129250Scognet#endif
119129250Scognet	bgt	.Lmemset_loop128
120137463Scognet	RETeq			/* Zero length so just exit */
121129250Scognet
122129250Scognet	add	r1, r1, #0x80		/* Adjust for extra sub */
123129250Scognet
124129250Scognet	/* Do 32 bytes at a time */
125129250Scognet.Lmemset_loop32:
126129250Scognet	subs	r1, r1, #0x20
127129250Scognet#ifdef __XSCALE__
128129250Scognet	strged	r2, [ip], #0x08
129129250Scognet	strged	r2, [ip], #0x08
130129250Scognet	strged	r2, [ip], #0x08
131129250Scognet	strged	r2, [ip], #0x08
132129250Scognet#else
133129250Scognet	stmgeia	ip!, {r2-r3}
134129250Scognet	stmgeia	ip!, {r2-r3}
135129250Scognet	stmgeia	ip!, {r2-r3}
136129250Scognet	stmgeia	ip!, {r2-r3}
137129250Scognet#endif
138129250Scognet	bgt	.Lmemset_loop32
139137463Scognet	RETeq			/* Zero length so just exit */
140129250Scognet
141129250Scognet	adds	r1, r1, #0x10		/* Partially adjust for extra sub */
142129250Scognet
143129250Scognet	/* Deal with 16 bytes or more */
144129250Scognet#ifdef __XSCALE__
145129250Scognet	strged	r2, [ip], #0x08
146129250Scognet	strged	r2, [ip], #0x08
147129250Scognet#else
148129250Scognet	stmgeia	ip!, {r2-r3}
149129250Scognet	stmgeia	ip!, {r2-r3}
150129250Scognet#endif
151137463Scognet	RETeq			/* Zero length so just exit */
152129250Scognet
153129250Scognet	addlt	r1, r1, #0x10		/* Possibly adjust for extra sub */
154129250Scognet
155129250Scognet	/* We have at least 4 bytes so copy as words */
156129250Scognet.Lmemset_loop4:
157129250Scognet	subs	r1, r1, #0x04
158129250Scognet	strge	r3, [ip], #0x04
159129250Scognet	bgt	.Lmemset_loop4
160137463Scognet	RETeq			/* Zero length so just exit */
161129250Scognet
162129250Scognet#ifdef __XSCALE__
163129250Scognet	/* Compensate for 64-bit alignment check */
164129250Scognet	adds	r1, r1, #0x04
165137463Scognet	RETeq
166129250Scognet	cmp	r1, #2
167129250Scognet#else
168129250Scognet	cmp	r1, #-2
169129250Scognet#endif
170129250Scognet
171129250Scognet	strb	r3, [ip], #0x01		/* Set 1 byte */
172129250Scognet	strgeb	r3, [ip], #0x01		/* Set another byte */
173129250Scognet	strgtb	r3, [ip]		/* and a third */
174137463Scognet	RET			/* Exit */
175129250Scognet
176129250Scognet.Lmemset_wordunaligned:
177129250Scognet	rsb	r2, r2, #0x004
178129250Scognet	strb	r3, [ip], #0x01		/* Set 1 byte */
179129250Scognet	cmp	r2, #0x02
180129250Scognet	strgeb	r3, [ip], #0x01		/* Set another byte */
181129250Scognet	sub	r1, r1, r2
182129250Scognet	strgtb	r3, [ip], #0x01		/* and a third */
183129250Scognet	cmp	r1, #0x04		/* More than 4 bytes left? */
184129250Scognet	bge	.Lmemset_wordaligned	/* Yup */
185129250Scognet
186129250Scognet.Lmemset_lessthanfour:
187129250Scognet	cmp	r1, #0x00
188137463Scognet	RETeq			/* Zero length so exit */
189129250Scognet	strb	r3, [ip], #0x01		/* Set 1 byte */
190129250Scognet	cmp	r1, #0x02
191129250Scognet	strgeb	r3, [ip], #0x01		/* Set another byte */
192129250Scognet	strgtb	r3, [ip]		/* and a third */
193137463Scognet	RET			/* Exit */
194129254Scognet
195144967ScognetENTRY(bcmp)
196129254Scognet	mov	ip, r0
197129254Scognet	cmp	r2, #0x06
198129254Scognet	beq	.Lmemcmp_6bytes
199129254Scognet	mov	r0, #0x00
200129254Scognet
201129254Scognet	/* Are both addresses aligned the same way? */
202129254Scognet	cmp	r2, #0x00
203129254Scognet	eornes	r3, ip, r1
204137463Scognet	RETeq			/* len == 0, or same addresses! */
205129254Scognet	tst	r3, #0x03
206129254Scognet	subne	r2, r2, #0x01
207129254Scognet	bne	.Lmemcmp_bytewise2	/* Badly aligned. Do it the slow way */
208129254Scognet
209129254Scognet	/* Word-align the addresses, if necessary */
210129254Scognet	sub	r3, r1, #0x05
211129254Scognet	ands	r3, r3, #0x03
212129254Scognet	add	r3, r3, r3, lsl #1
213129254Scognet	addne	pc, pc, r3, lsl #3
214129254Scognet	nop
215129254Scognet
216129254Scognet	/* Compare up to 3 bytes */
217129254Scognet	ldrb	r0, [ip], #0x01
218129254Scognet	ldrb	r3, [r1], #0x01
219129254Scognet	subs	r0, r0, r3
220137463Scognet	RETne
221129254Scognet	subs	r2, r2, #0x01
222137463Scognet	RETeq
223129254Scognet
224129254Scognet	/* Compare up to 2 bytes */
225129254Scognet	ldrb	r0, [ip], #0x01
226129254Scognet	ldrb	r3, [r1], #0x01
227129254Scognet	subs	r0, r0, r3
228137463Scognet	RETne
229129254Scognet	subs	r2, r2, #0x01
230137463Scognet	RETeq
231129254Scognet
232129254Scognet	/* Compare 1 byte */
233129254Scognet	ldrb	r0, [ip], #0x01
234129254Scognet	ldrb	r3, [r1], #0x01
235129254Scognet	subs	r0, r0, r3
236137463Scognet	RETne
237129254Scognet	subs	r2, r2, #0x01
238137463Scognet	RETeq
239129254Scognet
240129254Scognet	/* Compare 4 bytes at a time, if possible */
241129254Scognet	subs	r2, r2, #0x04
242129254Scognet	bcc	.Lmemcmp_bytewise
243129254Scognet.Lmemcmp_word_aligned:
244129254Scognet	ldr	r0, [ip], #0x04
245129254Scognet	ldr	r3, [r1], #0x04
246129254Scognet	subs	r2, r2, #0x04
247129254Scognet	cmpcs	r0, r3
248129254Scognet	beq	.Lmemcmp_word_aligned
249129254Scognet	sub	r0, r0, r3
250129254Scognet
251129254Scognet	/* Correct for extra subtraction, and check if done */
252129254Scognet	adds	r2, r2, #0x04
253129254Scognet	cmpeq	r0, #0x00		/* If done, did all bytes match? */
254137463Scognet	RETeq			/* Yup. Just return */
255129254Scognet
256129254Scognet	/* Re-do the final word byte-wise */
257129254Scognet	sub	ip, ip, #0x04
258129254Scognet	sub	r1, r1, #0x04
259129254Scognet
260129254Scognet.Lmemcmp_bytewise:
261129254Scognet	add	r2, r2, #0x03
262129254Scognet.Lmemcmp_bytewise2:
263129254Scognet	ldrb	r0, [ip], #0x01
264129254Scognet	ldrb	r3, [r1], #0x01
265129254Scognet	subs	r2, r2, #0x01
266129254Scognet	cmpcs	r0, r3
267129254Scognet	beq	.Lmemcmp_bytewise2
268129254Scognet	sub	r0, r0, r3
269137463Scognet	RET
270129254Scognet
271129254Scognet	/*
272129254Scognet	 * 6 byte compares are very common, thanks to the network stack.
273129254Scognet	 * This code is hand-scheduled to reduce the number of stalls for
274129254Scognet	 * load results. Everything else being equal, this will be ~32%
275129254Scognet	 * faster than a byte-wise memcmp.
276129254Scognet	 */
277129254Scognet	.align	5
278129254Scognet.Lmemcmp_6bytes:
279129254Scognet	ldrb	r3, [r1, #0x00]		/* r3 = b2#0 */
280129254Scognet	ldrb	r0, [ip, #0x00]		/* r0 = b1#0 */
281129254Scognet	ldrb	r2, [r1, #0x01]		/* r2 = b2#1 */
282129254Scognet	subs	r0, r0, r3		/* r0 = b1#0 - b2#0 */
283129254Scognet	ldreqb	r3, [ip, #0x01]		/* r3 = b1#1 */
284137463Scognet	RETne			/* Return if mismatch on #0 */
285129254Scognet	subs	r0, r3, r2		/* r0 = b1#1 - b2#1 */
286129254Scognet	ldreqb	r3, [r1, #0x02]		/* r3 = b2#2 */
287129254Scognet	ldreqb	r0, [ip, #0x02]		/* r0 = b1#2 */
288137463Scognet	RETne			/* Return if mismatch on #1 */
289129254Scognet	ldrb	r2, [r1, #0x03]		/* r2 = b2#3 */
290129254Scognet	subs	r0, r0, r3		/* r0 = b1#2 - b2#2 */
291129254Scognet	ldreqb	r3, [ip, #0x03]		/* r3 = b1#3 */
292137463Scognet	RETne			/* Return if mismatch on #2 */
293129254Scognet	subs	r0, r3, r2		/* r0 = b1#3 - b2#3 */
294129254Scognet	ldreqb	r3, [r1, #0x04]		/* r3 = b2#4 */
295129254Scognet	ldreqb	r0, [ip, #0x04]		/* r0 = b1#4 */
296137463Scognet	RETne			/* Return if mismatch on #3 */
297129254Scognet	ldrb	r2, [r1, #0x05]		/* r2 = b2#5 */
298129254Scognet	subs	r0, r0, r3		/* r0 = b1#4 - b2#4 */
299129254Scognet	ldreqb	r3, [ip, #0x05]		/* r3 = b1#5 */
300137463Scognet	RETne			/* Return if mismatch on #4 */
301129254Scognet	sub	r0, r3, r2		/* r0 = b1#5 - b2#5 */
302137463Scognet	RET
303129254Scognet
304129254ScognetENTRY(bcopy)
305143175Scognet	/* switch the source and destination registers */
306143175Scognet	eor     r0, r1, r0
307143175Scognet	eor     r1, r0, r1
308143175Scognet	eor     r0, r1, r0
309143175ScognetENTRY(memmove)
310143175Scognet	/* Do the buffers overlap? */
311143175Scognet	cmp	r0, r1
312143175Scognet	RETeq		/* Bail now if src/dst are the same */
313143175Scognet	subcc	r3, r0, r1	/* if (dst > src) r3 = dst - src */
314143175Scognet	subcs	r3, r1, r0	/* if (src > dsr) r3 = src - dst */
315143175Scognet	cmp	r3, r2		/* if (r3 < len) we have an overlap */
316143175Scognet	bcc	PIC_SYM(_C_LABEL(memcpy), PLT)
317143175Scognet
318143175Scognet	/* Determine copy direction */
319143175Scognet	cmp	r1, r0
320143175Scognet	bcc	.Lmemmove_backwards
321143175Scognet
322143175Scognet	moveq	r0, #0			/* Quick abort for len=0 */
323143175Scognet	RETeq
324143175Scognet
325143175Scognet	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
326143175Scognet	subs	r2, r2, #4
327143175Scognet	blt	.Lmemmove_fl4		/* less than 4 bytes */
328143175Scognet	ands	r12, r0, #3
329143175Scognet	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
330143175Scognet	ands	r12, r1, #3
331143175Scognet	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
332143175Scognet
333143175Scognet.Lmemmove_ft8:
334143175Scognet	/* We have aligned source and destination */
335143175Scognet	subs	r2, r2, #8
336143175Scognet	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
337143175Scognet	subs	r2, r2, #0x14
338143175Scognet	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
339143175Scognet	stmdb	sp!, {r4}		/* borrow r4 */
340143175Scognet
341143175Scognet	/* blat 32 bytes at a time */
342143175Scognet	/* XXX for really big copies perhaps we should use more registers */
343143175Scognet.Lmemmove_floop32:
344143175Scognet	ldmia	r1!, {r3, r4, r12, lr}
345143175Scognet	stmia	r0!, {r3, r4, r12, lr}
346143175Scognet	ldmia	r1!, {r3, r4, r12, lr}
347143175Scognet	stmia	r0!, {r3, r4, r12, lr}
348143175Scognet	subs	r2, r2, #0x20
349143175Scognet	bge	.Lmemmove_floop32
350143175Scognet
351143175Scognet	cmn	r2, #0x10
352143175Scognet	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
353143175Scognet	stmgeia	r0!, {r3, r4, r12, lr}
354143175Scognet	subge	r2, r2, #0x10
355143175Scognet	ldmia	sp!, {r4}		/* return r4 */
356143175Scognet
357143175Scognet.Lmemmove_fl32:
358143175Scognet	adds	r2, r2, #0x14
359143175Scognet
360143175Scognet	/* blat 12 bytes at a time */
361143175Scognet.Lmemmove_floop12:
362143175Scognet	ldmgeia	r1!, {r3, r12, lr}
363143175Scognet	stmgeia	r0!, {r3, r12, lr}
364143175Scognet	subges	r2, r2, #0x0c
365143175Scognet	bge	.Lmemmove_floop12
366143175Scognet
367143175Scognet.Lmemmove_fl12:
368143175Scognet	adds	r2, r2, #8
369143175Scognet	blt	.Lmemmove_fl4
370143175Scognet
371143175Scognet	subs	r2, r2, #4
372143175Scognet	ldrlt	r3, [r1], #4
373143175Scognet	strlt	r3, [r0], #4
374143175Scognet	ldmgeia	r1!, {r3, r12}
375143175Scognet	stmgeia	r0!, {r3, r12}
376143175Scognet	subge	r2, r2, #4
377143175Scognet
378143175Scognet.Lmemmove_fl4:
379143175Scognet	/* less than 4 bytes to go */
380143175Scognet	adds	r2, r2, #4
381143175Scognet	ldmeqia	sp!, {r0, pc}		/* done */
382143175Scognet
383143175Scognet	/* copy the crud byte at a time */
384143175Scognet	cmp	r2, #2
385143175Scognet	ldrb	r3, [r1], #1
386143175Scognet	strb	r3, [r0], #1
387143175Scognet	ldrgeb	r3, [r1], #1
388143175Scognet	strgeb	r3, [r0], #1
389143175Scognet	ldrgtb	r3, [r1], #1
390143175Scognet	strgtb	r3, [r0], #1
391143175Scognet	ldmia	sp!, {r0, pc}
392143175Scognet
393143175Scognet	/* erg - unaligned destination */
394143175Scognet.Lmemmove_fdestul:
395143175Scognet	rsb	r12, r12, #4
396143175Scognet	cmp	r12, #2
397143175Scognet
398143175Scognet	/* align destination with byte copies */
399143175Scognet	ldrb	r3, [r1], #1
400143175Scognet	strb	r3, [r0], #1
401143175Scognet	ldrgeb	r3, [r1], #1
402143175Scognet	strgeb	r3, [r0], #1
403143175Scognet	ldrgtb	r3, [r1], #1
404143175Scognet	strgtb	r3, [r0], #1
405143175Scognet	subs	r2, r2, r12
406143175Scognet	blt	.Lmemmove_fl4		/* less the 4 bytes */
407143175Scognet
408143175Scognet	ands	r12, r1, #3
409143175Scognet	beq	.Lmemmove_ft8		/* we have an aligned source */
410143175Scognet
411143175Scognet	/* erg - unaligned source */
412143175Scognet	/* This is where it gets nasty ... */
413143175Scognet.Lmemmove_fsrcul:
414143175Scognet	bic	r1, r1, #3
415143175Scognet	ldr	lr, [r1], #4
416143175Scognet	cmp	r12, #2
417143175Scognet	bgt	.Lmemmove_fsrcul3
418143175Scognet	beq	.Lmemmove_fsrcul2
419143175Scognet	cmp	r2, #0x0c
420143175Scognet	blt	.Lmemmove_fsrcul1loop4
421143175Scognet	sub	r2, r2, #0x0c
422143175Scognet	stmdb	sp!, {r4, r5}
423143175Scognet
424143175Scognet.Lmemmove_fsrcul1loop16:
425143175Scognet#ifdef __ARMEB__
426143175Scognet	mov	r3, lr, lsl #8
427143175Scognet#else
428143175Scognet	mov	r3, lr, lsr #8
429143175Scognet#endif
430143175Scognet	ldmia	r1!, {r4, r5, r12, lr}
431143175Scognet#ifdef __ARMEB__
432143175Scognet	orr	r3, r3, r4, lsr #24
433143175Scognet	mov	r4, r4, lsl #8
434143175Scognet	orr	r4, r4, r5, lsr #24
435143175Scognet	mov	r5, r5, lsl #8
436143175Scognet	orr	r5, r5, r12, lsr #24
437143175Scognet	mov	r12, r12, lsl #8
438143175Scognet	orr	r12, r12, lr, lsr #24
439143175Scognet#else
440143175Scognet	orr	r3, r3, r4, lsl #24
441143175Scognet	mov	r4, r4, lsr #8
442143175Scognet	orr	r4, r4, r5, lsl #24
443143175Scognet	mov	r5, r5, lsr #8
444143175Scognet	orr	r5, r5, r12, lsl #24
445143175Scognet	mov	r12, r12, lsr #8
446143175Scognet	orr	r12, r12, lr, lsl #24
447143175Scognet#endif
448143175Scognet	stmia	r0!, {r3-r5, r12}
449143175Scognet	subs	r2, r2, #0x10
450143175Scognet	bge	.Lmemmove_fsrcul1loop16
451143175Scognet	ldmia	sp!, {r4, r5}
452143175Scognet	adds	r2, r2, #0x0c
453143175Scognet	blt	.Lmemmove_fsrcul1l4
454143175Scognet
455143175Scognet.Lmemmove_fsrcul1loop4:
456143175Scognet#ifdef __ARMEB__
457143175Scognet	mov	r12, lr, lsl #8
458143175Scognet#else
459143175Scognet	mov	r12, lr, lsr #8
460143175Scognet#endif
461143175Scognet	ldr	lr, [r1], #4
462143175Scognet#ifdef __ARMEB__
463143175Scognet	orr	r12, r12, lr, lsr #24
464143175Scognet#else
465143175Scognet	orr	r12, r12, lr, lsl #24
466143175Scognet#endif
467143175Scognet	str	r12, [r0], #4
468143175Scognet	subs	r2, r2, #4
469143175Scognet	bge	.Lmemmove_fsrcul1loop4
470143175Scognet
471143175Scognet.Lmemmove_fsrcul1l4:
472143175Scognet	sub	r1, r1, #3
473143175Scognet	b	.Lmemmove_fl4
474143175Scognet
475143175Scognet.Lmemmove_fsrcul2:
476143175Scognet	cmp	r2, #0x0c
477143175Scognet	blt	.Lmemmove_fsrcul2loop4
478143175Scognet	sub	r2, r2, #0x0c
479143175Scognet	stmdb	sp!, {r4, r5}
480143175Scognet
481143175Scognet.Lmemmove_fsrcul2loop16:
482143175Scognet#ifdef __ARMEB__
483143175Scognet	mov	r3, lr, lsl #16
484143175Scognet#else
485143175Scognet	mov	r3, lr, lsr #16
486143175Scognet#endif
487143175Scognet	ldmia	r1!, {r4, r5, r12, lr}
488143175Scognet#ifdef __ARMEB__
489143175Scognet	orr	r3, r3, r4, lsr #16
490143175Scognet	mov	r4, r4, lsl #16
491143175Scognet	orr	r4, r4, r5, lsr #16
492143175Scognet	mov	r5, r5, lsl #16
493143175Scognet	orr	r5, r5, r12, lsr #16
494143175Scognet	mov	r12, r12, lsl #16
495143175Scognet	orr	r12, r12, lr, lsr #16
496143175Scognet#else
497143175Scognet	orr	r3, r3, r4, lsl #16
498143175Scognet	mov	r4, r4, lsr #16
499143175Scognet	orr	r4, r4, r5, lsl #16
500143175Scognet	mov	r5, r5, lsr #16
501143175Scognet	orr	r5, r5, r12, lsl #16
502143175Scognet	mov	r12, r12, lsr #16
503143175Scognet	orr	r12, r12, lr, lsl #16
504143175Scognet#endif
505143175Scognet	stmia	r0!, {r3-r5, r12}
506143175Scognet	subs	r2, r2, #0x10
507143175Scognet	bge	.Lmemmove_fsrcul2loop16
508143175Scognet	ldmia	sp!, {r4, r5}
509143175Scognet	adds	r2, r2, #0x0c
510143175Scognet	blt	.Lmemmove_fsrcul2l4
511143175Scognet
512143175Scognet.Lmemmove_fsrcul2loop4:
513143175Scognet#ifdef __ARMEB__
514143175Scognet	mov	r12, lr, lsl #16
515143175Scognet#else
516143175Scognet	mov	r12, lr, lsr #16
517143175Scognet#endif
518143175Scognet	ldr	lr, [r1], #4
519143175Scognet#ifdef __ARMEB__
520143175Scognet	orr	r12, r12, lr, lsr #16
521143175Scognet#else
522143175Scognet	orr	r12, r12, lr, lsl #16
523143175Scognet#endif
524143175Scognet	str	r12, [r0], #4
525143175Scognet	subs	r2, r2, #4
526143175Scognet	bge	.Lmemmove_fsrcul2loop4
527143175Scognet
528143175Scognet.Lmemmove_fsrcul2l4:
529143175Scognet	sub	r1, r1, #2
530143175Scognet	b	.Lmemmove_fl4
531143175Scognet
532143175Scognet.Lmemmove_fsrcul3:
533143175Scognet	cmp	r2, #0x0c
534143175Scognet	blt	.Lmemmove_fsrcul3loop4
535143175Scognet	sub	r2, r2, #0x0c
536143175Scognet	stmdb	sp!, {r4, r5}
537143175Scognet
538143175Scognet.Lmemmove_fsrcul3loop16:
539143175Scognet#ifdef __ARMEB__
540143175Scognet	mov	r3, lr, lsl #24
541143175Scognet#else
542143175Scognet	mov	r3, lr, lsr #24
543143175Scognet#endif
544143175Scognet	ldmia	r1!, {r4, r5, r12, lr}
545143175Scognet#ifdef __ARMEB__
546143175Scognet	orr	r3, r3, r4, lsr #8
547143175Scognet	mov	r4, r4, lsl #24
548143175Scognet	orr	r4, r4, r5, lsr #8
549143175Scognet	mov	r5, r5, lsl #24
550143175Scognet	orr	r5, r5, r12, lsr #8
551143175Scognet	mov	r12, r12, lsl #24
552143175Scognet	orr	r12, r12, lr, lsr #8
553143175Scognet#else
554143175Scognet	orr	r3, r3, r4, lsl #8
555143175Scognet	mov	r4, r4, lsr #24
556143175Scognet	orr	r4, r4, r5, lsl #8
557143175Scognet	mov	r5, r5, lsr #24
558143175Scognet	orr	r5, r5, r12, lsl #8
559143175Scognet	mov	r12, r12, lsr #24
560143175Scognet	orr	r12, r12, lr, lsl #8
561143175Scognet#endif
562143175Scognet	stmia	r0!, {r3-r5, r12}
563143175Scognet	subs	r2, r2, #0x10
564143175Scognet	bge	.Lmemmove_fsrcul3loop16
565143175Scognet	ldmia	sp!, {r4, r5}
566143175Scognet	adds	r2, r2, #0x0c
567143175Scognet	blt	.Lmemmove_fsrcul3l4
568143175Scognet
569143175Scognet.Lmemmove_fsrcul3loop4:
570143175Scognet#ifdef __ARMEB__
571143175Scognet	mov	r12, lr, lsl #24
572143175Scognet#else
573143175Scognet	mov	r12, lr, lsr #24
574143175Scognet#endif
575143175Scognet	ldr	lr, [r1], #4
576143175Scognet#ifdef __ARMEB__
577143175Scognet	orr	r12, r12, lr, lsr #8
578143175Scognet#else
579143175Scognet	orr	r12, r12, lr, lsl #8
580143175Scognet#endif
581143175Scognet	str	r12, [r0], #4
582143175Scognet	subs	r2, r2, #4
583143175Scognet	bge	.Lmemmove_fsrcul3loop4
584143175Scognet
585143175Scognet.Lmemmove_fsrcul3l4:
586143175Scognet	sub	r1, r1, #1
587143175Scognet	b	.Lmemmove_fl4
588143175Scognet
589143175Scognet.Lmemmove_backwards:
590143175Scognet	add	r1, r1, r2
591143175Scognet	add	r0, r0, r2
592143175Scognet	subs	r2, r2, #4
593143175Scognet	blt	.Lmemmove_bl4		/* less than 4 bytes */
594143175Scognet	ands	r12, r0, #3
595143175Scognet	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
596143175Scognet	ands	r12, r1, #3
597143175Scognet	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
598143175Scognet
599143175Scognet.Lmemmove_bt8:
600143175Scognet	/* We have aligned source and destination */
601143175Scognet	subs	r2, r2, #8
602143175Scognet	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
603143175Scognet	stmdb	sp!, {r4, lr}
604143175Scognet	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
605143175Scognet	blt	.Lmemmove_bl32
606143175Scognet
607143175Scognet	/* blat 32 bytes at a time */
608143175Scognet	/* XXX for really big copies perhaps we should use more registers */
609143175Scognet.Lmemmove_bloop32:
610143175Scognet	ldmdb	r1!, {r3, r4, r12, lr}
611143175Scognet	stmdb	r0!, {r3, r4, r12, lr}
612143175Scognet	ldmdb	r1!, {r3, r4, r12, lr}
613143175Scognet	stmdb	r0!, {r3, r4, r12, lr}
614143175Scognet	subs	r2, r2, #0x20
615143175Scognet	bge	.Lmemmove_bloop32
616143175Scognet
617143175Scognet.Lmemmove_bl32:
618143175Scognet	cmn	r2, #0x10
619143175Scognet	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
620143175Scognet	stmgedb	r0!, {r3, r4, r12, lr}
621143175Scognet	subge	r2, r2, #0x10
622143175Scognet	adds	r2, r2, #0x14
623143175Scognet	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
624143175Scognet	stmgedb	r0!, {r3, r12, lr}
625143175Scognet	subge	r2, r2, #0x0c
626143175Scognet	ldmia	sp!, {r4, lr}
627143175Scognet
628143175Scognet.Lmemmove_bl12:
629143175Scognet	adds	r2, r2, #8
630143175Scognet	blt	.Lmemmove_bl4
631143175Scognet	subs	r2, r2, #4
632143175Scognet	ldrlt	r3, [r1, #-4]!
633143175Scognet	strlt	r3, [r0, #-4]!
634143175Scognet	ldmgedb	r1!, {r3, r12}
635143175Scognet	stmgedb	r0!, {r3, r12}
636143175Scognet	subge	r2, r2, #4
637143175Scognet
638143175Scognet.Lmemmove_bl4:
639143175Scognet	/* less than 4 bytes to go */
640143175Scognet	adds	r2, r2, #4
641143175Scognet	RETeq			/* done */
642143175Scognet
643143175Scognet	/* copy the crud byte at a time */
644143175Scognet	cmp	r2, #2
645143175Scognet	ldrb	r3, [r1, #-1]!
646143175Scognet	strb	r3, [r0, #-1]!
647143175Scognet	ldrgeb	r3, [r1, #-1]!
648143175Scognet	strgeb	r3, [r0, #-1]!
649143175Scognet	ldrgtb	r3, [r1, #-1]!
650143175Scognet	strgtb	r3, [r0, #-1]!
651143175Scognet	RET
652143175Scognet
653143175Scognet	/* erg - unaligned destination */
654143175Scognet.Lmemmove_bdestul:
655143175Scognet	cmp	r12, #2
656143175Scognet
657143175Scognet	/* align destination with byte copies */
658143175Scognet	ldrb	r3, [r1, #-1]!
659143175Scognet	strb	r3, [r0, #-1]!
660143175Scognet	ldrgeb	r3, [r1, #-1]!
661143175Scognet	strgeb	r3, [r0, #-1]!
662143175Scognet	ldrgtb	r3, [r1, #-1]!
663143175Scognet	strgtb	r3, [r0, #-1]!
664143175Scognet	subs	r2, r2, r12
665143175Scognet	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
666143175Scognet	ands	r12, r1, #3
667143175Scognet	beq	.Lmemmove_bt8		/* we have an aligned source */
668143175Scognet
669143175Scognet	/* erg - unaligned source */
670143175Scognet	/* This is where it gets nasty ... */
671143175Scognet.Lmemmove_bsrcul:
672143175Scognet	bic	r1, r1, #3
673143175Scognet	ldr	r3, [r1, #0]
674143175Scognet	cmp	r12, #2
675143175Scognet	blt	.Lmemmove_bsrcul1
676143175Scognet	beq	.Lmemmove_bsrcul2
677143175Scognet	cmp	r2, #0x0c
678143175Scognet	blt	.Lmemmove_bsrcul3loop4
679143175Scognet	sub	r2, r2, #0x0c
680143175Scognet	stmdb	sp!, {r4, r5, lr}
681143175Scognet
682143175Scognet.Lmemmove_bsrcul3loop16:
683143175Scognet#ifdef __ARMEB__
684143175Scognet	mov	lr, r3, lsr #8
685143175Scognet#else
686143175Scognet	mov	lr, r3, lsl #8
687143175Scognet#endif
688143175Scognet	ldmdb	r1!, {r3-r5, r12}
689143175Scognet#ifdef __ARMEB__
690143175Scognet	orr	lr, lr, r12, lsl #24
691143175Scognet	mov	r12, r12, lsr #8
692143175Scognet	orr	r12, r12, r5, lsl #24
693143175Scognet	mov	r5, r5, lsr #8
694143175Scognet	orr	r5, r5, r4, lsl #24
695143175Scognet	mov	r4, r4, lsr #8
696143175Scognet	orr	r4, r4, r3, lsl #24
697143175Scognet#else
698143175Scognet	orr	lr, lr, r12, lsr #24
699143175Scognet	mov	r12, r12, lsl #8
700143175Scognet	orr	r12, r12, r5, lsr #24
701143175Scognet	mov	r5, r5, lsl #8
702143175Scognet	orr	r5, r5, r4, lsr #24
703143175Scognet	mov	r4, r4, lsl #8
704143175Scognet	orr	r4, r4, r3, lsr #24
705143175Scognet#endif
706143175Scognet	stmdb	r0!, {r4, r5, r12, lr}
707143175Scognet	subs	r2, r2, #0x10
708143175Scognet	bge	.Lmemmove_bsrcul3loop16
709143175Scognet	ldmia	sp!, {r4, r5, lr}
710143175Scognet	adds	r2, r2, #0x0c
711143175Scognet	blt	.Lmemmove_bsrcul3l4
712143175Scognet
713143175Scognet.Lmemmove_bsrcul3loop4:
714143175Scognet#ifdef __ARMEB__
715143175Scognet	mov	r12, r3, lsr #8
716143175Scognet#else
717143175Scognet	mov	r12, r3, lsl #8
718143175Scognet#endif
719143175Scognet	ldr	r3, [r1, #-4]!
720143175Scognet#ifdef __ARMEB__
721143175Scognet	orr	r12, r12, r3, lsl #24
722143175Scognet#else
723143175Scognet	orr	r12, r12, r3, lsr #24
724143175Scognet#endif
725143175Scognet	str	r12, [r0, #-4]!
726143175Scognet	subs	r2, r2, #4
727143175Scognet	bge	.Lmemmove_bsrcul3loop4
728143175Scognet
729143175Scognet.Lmemmove_bsrcul3l4:
730143175Scognet	add	r1, r1, #3
731143175Scognet	b	.Lmemmove_bl4
732143175Scognet
733143175Scognet.Lmemmove_bsrcul2:
734143175Scognet	cmp	r2, #0x0c
735143175Scognet	blt	.Lmemmove_bsrcul2loop4
736143175Scognet	sub	r2, r2, #0x0c
737143175Scognet	stmdb	sp!, {r4, r5, lr}
738143175Scognet
739143175Scognet.Lmemmove_bsrcul2loop16:
740143175Scognet#ifdef __ARMEB__
741143175Scognet	mov	lr, r3, lsr #16
742143175Scognet#else
743143175Scognet	mov	lr, r3, lsl #16
744143175Scognet#endif
745143175Scognet	ldmdb	r1!, {r3-r5, r12}
746143175Scognet#ifdef __ARMEB__
747143175Scognet	orr	lr, lr, r12, lsl #16
748143175Scognet	mov	r12, r12, lsr #16
749143175Scognet	orr	r12, r12, r5, lsl #16
750143175Scognet	mov	r5, r5, lsr #16
751143175Scognet	orr	r5, r5, r4, lsl #16
752143175Scognet	mov	r4, r4, lsr #16
753143175Scognet	orr	r4, r4, r3, lsl #16
754143175Scognet#else
755143175Scognet	orr	lr, lr, r12, lsr #16
756143175Scognet	mov	r12, r12, lsl #16
757143175Scognet	orr	r12, r12, r5, lsr #16
758143175Scognet	mov	r5, r5, lsl #16
759143175Scognet	orr	r5, r5, r4, lsr #16
760143175Scognet	mov	r4, r4, lsl #16
761143175Scognet	orr	r4, r4, r3, lsr #16
762143175Scognet#endif
763143175Scognet	stmdb	r0!, {r4, r5, r12, lr}
764143175Scognet	subs	r2, r2, #0x10
765143175Scognet	bge	.Lmemmove_bsrcul2loop16
766143175Scognet	ldmia	sp!, {r4, r5, lr}
767143175Scognet	adds	r2, r2, #0x0c
768143175Scognet	blt	.Lmemmove_bsrcul2l4
769143175Scognet
770143175Scognet.Lmemmove_bsrcul2loop4:
771143175Scognet#ifdef __ARMEB__
772143175Scognet	mov	r12, r3, lsr #16
773143175Scognet#else
774143175Scognet	mov	r12, r3, lsl #16
775143175Scognet#endif
776143175Scognet	ldr	r3, [r1, #-4]!
777143175Scognet#ifdef __ARMEB__
778143175Scognet	orr	r12, r12, r3, lsl #16
779143175Scognet#else
780143175Scognet	orr	r12, r12, r3, lsr #16
781143175Scognet#endif
782143175Scognet	str	r12, [r0, #-4]!
783143175Scognet	subs	r2, r2, #4
784143175Scognet	bge	.Lmemmove_bsrcul2loop4
785143175Scognet
786143175Scognet.Lmemmove_bsrcul2l4:
787143175Scognet	add	r1, r1, #2
788143175Scognet	b	.Lmemmove_bl4
789143175Scognet
790143175Scognet.Lmemmove_bsrcul1:
791143175Scognet	cmp	r2, #0x0c
792143175Scognet	blt	.Lmemmove_bsrcul1loop4
793143175Scognet	sub	r2, r2, #0x0c
794143175Scognet	stmdb	sp!, {r4, r5, lr}
795143175Scognet
796143175Scognet.Lmemmove_bsrcul1loop32:
797143175Scognet#ifdef __ARMEB__
798143175Scognet	mov	lr, r3, lsr #24
799143175Scognet#else
800143175Scognet	mov	lr, r3, lsl #24
801143175Scognet#endif
802143175Scognet	ldmdb	r1!, {r3-r5, r12}
803143175Scognet#ifdef __ARMEB__
804143175Scognet	orr	lr, lr, r12, lsl #8
805143175Scognet	mov	r12, r12, lsr #24
806143175Scognet	orr	r12, r12, r5, lsl #8
807143175Scognet	mov	r5, r5, lsr #24
808143175Scognet	orr	r5, r5, r4, lsl #8
809143175Scognet	mov	r4, r4, lsr #24
810143175Scognet	orr	r4, r4, r3, lsl #8
811143175Scognet#else
812143175Scognet	orr	lr, lr, r12, lsr #8
813143175Scognet	mov	r12, r12, lsl #24
814143175Scognet	orr	r12, r12, r5, lsr #8
815143175Scognet	mov	r5, r5, lsl #24
816143175Scognet	orr	r5, r5, r4, lsr #8
817143175Scognet	mov	r4, r4, lsl #24
818143175Scognet	orr	r4, r4, r3, lsr #8
819143175Scognet#endif
820143175Scognet	stmdb	r0!, {r4, r5, r12, lr}
821143175Scognet	subs	r2, r2, #0x10
822143175Scognet	bge	.Lmemmove_bsrcul1loop32
823143175Scognet	ldmia	sp!, {r4, r5, lr}
824143175Scognet	adds	r2, r2, #0x0c
825143175Scognet	blt	.Lmemmove_bsrcul1l4
826143175Scognet
827143175Scognet.Lmemmove_bsrcul1loop4:
828143175Scognet#ifdef __ARMEB__
829143175Scognet	mov	r12, r3, lsr #24
830143175Scognet#else
831143175Scognet	mov	r12, r3, lsl #24
832143175Scognet#endif
833143175Scognet	ldr	r3, [r1, #-4]!
834143175Scognet#ifdef __ARMEB__
835143175Scognet	orr	r12, r12, r3, lsl #8
836143175Scognet#else
837143175Scognet	orr	r12, r12, r3, lsr #8
838143175Scognet#endif
839143175Scognet	str	r12, [r0, #-4]!
840143175Scognet	subs	r2, r2, #4
841143175Scognet	bge	.Lmemmove_bsrcul1loop4
842143175Scognet
843143175Scognet.Lmemmove_bsrcul1l4:
844143175Scognet	add	r1, r1, #1
845143175Scognet	b	.Lmemmove_bl4
846143175Scognet
847129254Scognet#if !defined(__XSCALE__)
848129254ScognetENTRY(memcpy)
849129254Scognet	/* save leaf functions having to store this away */
850129254Scognet	stmdb	sp!, {r0, lr}		/* memcpy() returns dest addr */
851129254Scognet
852129254Scognet	subs	r2, r2, #4
853129254Scognet	blt	.Lmemcpy_l4		/* less than 4 bytes */
854129254Scognet	ands	r12, r0, #3
855129254Scognet	bne	.Lmemcpy_destul		/* oh unaligned destination addr */
856129254Scognet	ands	r12, r1, #3
857129254Scognet	bne	.Lmemcpy_srcul		/* oh unaligned source addr */
858129254Scognet
859129254Scognet.Lmemcpy_t8:
860129254Scognet	/* We have aligned source and destination */
861129254Scognet	subs	r2, r2, #8
862129254Scognet	blt	.Lmemcpy_l12		/* less than 12 bytes (4 from above) */
863129254Scognet	subs	r2, r2, #0x14
864129254Scognet	blt	.Lmemcpy_l32		/* less than 32 bytes (12 from above) */
865129254Scognet	stmdb	sp!, {r4}		/* borrow r4 */
866129254Scognet
867129254Scognet	/* blat 32 bytes at a time */
868129254Scognet	/* XXX for really big copies perhaps we should use more registers */
869129254Scognet.Lmemcpy_loop32:
870129254Scognet	ldmia	r1!, {r3, r4, r12, lr}
871129254Scognet	stmia	r0!, {r3, r4, r12, lr}
872129254Scognet	ldmia	r1!, {r3, r4, r12, lr}
873129254Scognet	stmia	r0!, {r3, r4, r12, lr}
874129254Scognet	subs	r2, r2, #0x20
875129254Scognet	bge	.Lmemcpy_loop32
876129254Scognet
877129254Scognet	cmn	r2, #0x10
878129254Scognet	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
879129254Scognet	stmgeia	r0!, {r3, r4, r12, lr}
880129254Scognet	subge	r2, r2, #0x10
881129254Scognet	ldmia	sp!, {r4}		/* return r4 */
882129254Scognet
883129254Scognet.Lmemcpy_l32:
884129254Scognet	adds	r2, r2, #0x14
885129254Scognet
886129254Scognet	/* blat 12 bytes at a time */
887129254Scognet.Lmemcpy_loop12:
888129254Scognet	ldmgeia	r1!, {r3, r12, lr}
889129254Scognet	stmgeia	r0!, {r3, r12, lr}
890129254Scognet	subges	r2, r2, #0x0c
891129254Scognet	bge	.Lmemcpy_loop12
892129254Scognet
893129254Scognet.Lmemcpy_l12:
894129254Scognet	adds	r2, r2, #8
895129254Scognet	blt	.Lmemcpy_l4
896129254Scognet
897129254Scognet	subs	r2, r2, #4
898129254Scognet	ldrlt	r3, [r1], #4
899129254Scognet	strlt	r3, [r0], #4
900129254Scognet	ldmgeia	r1!, {r3, r12}
901129254Scognet	stmgeia	r0!, {r3, r12}
902129254Scognet	subge	r2, r2, #4
903129254Scognet
904129254Scognet.Lmemcpy_l4:
905129254Scognet	/* less than 4 bytes to go */
906129254Scognet	adds	r2, r2, #4
907129254Scognet#ifdef __APCS_26_
908129254Scognet	ldmeqia sp!, {r0, pc}^		/* done */
909129254Scognet#else
910129254Scognet	ldmeqia	sp!, {r0, pc}		/* done */
911129254Scognet#endif
912129254Scognet	/* copy the crud byte at a time */
913129254Scognet	cmp	r2, #2
914129254Scognet	ldrb	r3, [r1], #1
915129254Scognet	strb	r3, [r0], #1
916129254Scognet	ldrgeb	r3, [r1], #1
917129254Scognet	strgeb	r3, [r0], #1
918129254Scognet	ldrgtb	r3, [r1], #1
919129254Scognet	strgtb	r3, [r0], #1
920129254Scognet	ldmia	sp!, {r0, pc}
921129254Scognet
922129254Scognet	/* erg - unaligned destination */
923129254Scognet.Lmemcpy_destul:
924129254Scognet	rsb	r12, r12, #4
925129254Scognet	cmp	r12, #2
926129254Scognet
927129254Scognet	/* align destination with byte copies */
928129254Scognet	ldrb	r3, [r1], #1
929129254Scognet	strb	r3, [r0], #1
930129254Scognet	ldrgeb	r3, [r1], #1
931129254Scognet	strgeb	r3, [r0], #1
932129254Scognet	ldrgtb	r3, [r1], #1
933129254Scognet	strgtb	r3, [r0], #1
934129254Scognet	subs	r2, r2, r12
935129254Scognet	blt	.Lmemcpy_l4		/* less the 4 bytes */
936129254Scognet
937129254Scognet	ands	r12, r1, #3
938129254Scognet	beq	.Lmemcpy_t8		/* we have an aligned source */
939129254Scognet
940129254Scognet	/* erg - unaligned source */
941129254Scognet	/* This is where it gets nasty ... */
942129254Scognet.Lmemcpy_srcul:
943129254Scognet	bic	r1, r1, #3
944129254Scognet	ldr	lr, [r1], #4
945129254Scognet	cmp	r12, #2
946129254Scognet	bgt	.Lmemcpy_srcul3
947129254Scognet	beq	.Lmemcpy_srcul2
948129254Scognet	cmp	r2, #0x0c
949129254Scognet	blt	.Lmemcpy_srcul1loop4
950129254Scognet	sub	r2, r2, #0x0c
951129254Scognet	stmdb	sp!, {r4, r5}
952129254Scognet
953129254Scognet.Lmemcpy_srcul1loop16:
954129254Scognet	mov	r3, lr, lsr #8
955129254Scognet	ldmia	r1!, {r4, r5, r12, lr}
956129254Scognet	orr	r3, r3, r4, lsl #24
957129254Scognet	mov	r4, r4, lsr #8
958129254Scognet	orr	r4, r4, r5, lsl #24
959129254Scognet	mov	r5, r5, lsr #8
960129254Scognet	orr	r5, r5, r12, lsl #24
961129254Scognet	mov	r12, r12, lsr #8
962129254Scognet	orr	r12, r12, lr, lsl #24
963129254Scognet	stmia	r0!, {r3-r5, r12}
964129254Scognet	subs	r2, r2, #0x10
965129254Scognet	bge	.Lmemcpy_srcul1loop16
966129254Scognet	ldmia	sp!, {r4, r5}
967129254Scognet	adds	r2, r2, #0x0c
968129254Scognet	blt	.Lmemcpy_srcul1l4
969129254Scognet
970129254Scognet.Lmemcpy_srcul1loop4:
971129254Scognet	mov	r12, lr, lsr #8
972129254Scognet	ldr	lr, [r1], #4
973129254Scognet	orr	r12, r12, lr, lsl #24
974129254Scognet	str	r12, [r0], #4
975129254Scognet	subs	r2, r2, #4
976129254Scognet	bge	.Lmemcpy_srcul1loop4
977129254Scognet
978129254Scognet.Lmemcpy_srcul1l4:
979129254Scognet	sub	r1, r1, #3
980129254Scognet	b	.Lmemcpy_l4
981129254Scognet
982129254Scognet.Lmemcpy_srcul2:
983129254Scognet	cmp	r2, #0x0c
984129254Scognet	blt	.Lmemcpy_srcul2loop4
985129254Scognet	sub	r2, r2, #0x0c
986129254Scognet	stmdb	sp!, {r4, r5}
987129254Scognet
988129254Scognet.Lmemcpy_srcul2loop16:
989129254Scognet	mov	r3, lr, lsr #16
990129254Scognet	ldmia	r1!, {r4, r5, r12, lr}
991129254Scognet	orr	r3, r3, r4, lsl #16
992129254Scognet	mov	r4, r4, lsr #16
993129254Scognet	orr	r4, r4, r5, lsl #16
994129254Scognet	mov	r5, r5, lsr #16
995129254Scognet	orr	r5, r5, r12, lsl #16
996129254Scognet	mov	r12, r12, lsr #16
997129254Scognet	orr	r12, r12, lr, lsl #16
998129254Scognet	stmia	r0!, {r3-r5, r12}
999129254Scognet	subs	r2, r2, #0x10
1000129254Scognet	bge	.Lmemcpy_srcul2loop16
1001129254Scognet	ldmia	sp!, {r4, r5}
1002129254Scognet	adds	r2, r2, #0x0c
1003129254Scognet	blt	.Lmemcpy_srcul2l4
1004129254Scognet
1005129254Scognet.Lmemcpy_srcul2loop4:
1006129254Scognet	mov	r12, lr, lsr #16
1007129254Scognet	ldr	lr, [r1], #4
1008129254Scognet	orr	r12, r12, lr, lsl #16
1009129254Scognet	str	r12, [r0], #4
1010129254Scognet	subs	r2, r2, #4
1011129254Scognet	bge	.Lmemcpy_srcul2loop4
1012129254Scognet
1013129254Scognet.Lmemcpy_srcul2l4:
1014129254Scognet	sub	r1, r1, #2
1015129254Scognet	b	.Lmemcpy_l4
1016129254Scognet
1017129254Scognet.Lmemcpy_srcul3:
1018129254Scognet	cmp	r2, #0x0c
1019129254Scognet	blt	.Lmemcpy_srcul3loop4
1020129254Scognet	sub	r2, r2, #0x0c
1021129254Scognet	stmdb	sp!, {r4, r5}
1022129254Scognet
1023129254Scognet.Lmemcpy_srcul3loop16:
1024129254Scognet	mov	r3, lr, lsr #24
1025129254Scognet	ldmia	r1!, {r4, r5, r12, lr}
1026129254Scognet	orr	r3, r3, r4, lsl #8
1027129254Scognet	mov	r4, r4, lsr #24
1028129254Scognet	orr	r4, r4, r5, lsl #8
1029129254Scognet	mov	r5, r5, lsr #24
1030129254Scognet	orr	r5, r5, r12, lsl #8
1031129254Scognet	mov	r12, r12, lsr #24
1032129254Scognet	orr	r12, r12, lr, lsl #8
1033129254Scognet	stmia	r0!, {r3-r5, r12}
1034129254Scognet	subs	r2, r2, #0x10
1035129254Scognet	bge	.Lmemcpy_srcul3loop16
1036129254Scognet	ldmia	sp!, {r4, r5}
1037129254Scognet	adds	r2, r2, #0x0c
1038129254Scognet	blt	.Lmemcpy_srcul3l4
1039129254Scognet
1040129254Scognet.Lmemcpy_srcul3loop4:
1041129254Scognet	mov	r12, lr, lsr #24
1042129254Scognet	ldr	lr, [r1], #4
1043129254Scognet	orr	r12, r12, lr, lsl #8
1044129254Scognet	str	r12, [r0], #4
1045129254Scognet	subs	r2, r2, #4
1046129254Scognet	bge	.Lmemcpy_srcul3loop4
1047129254Scognet
1048129254Scognet.Lmemcpy_srcul3l4:
1049129254Scognet	sub	r1, r1, #1
1050129254Scognet	b	.Lmemcpy_l4
1051129254Scognet#else
1052129254Scognet/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
1053129254ScognetENTRY(memcpy)
1054129254Scognet	pld	[r1]
1055129254Scognet	cmp	r2, #0x0c
1056129254Scognet	ble	.Lmemcpy_short		/* <= 12 bytes */
1057129254Scognet	mov	r3, r0			/* We must not clobber r0 */
1058129254Scognet
1059129254Scognet	/* Word-align the destination buffer */
1060129254Scognet	ands	ip, r3, #0x03		/* Already word aligned? */
1061129254Scognet	beq	.Lmemcpy_wordaligned	/* Yup */
1062129254Scognet	cmp	ip, #0x02
1063129254Scognet	ldrb	ip, [r1], #0x01
1064129254Scognet	sub	r2, r2, #0x01
1065129254Scognet	strb	ip, [r3], #0x01
1066129254Scognet	ldrleb	ip, [r1], #0x01
1067129254Scognet	suble	r2, r2, #0x01
1068129254Scognet	strleb	ip, [r3], #0x01
1069129254Scognet	ldrltb	ip, [r1], #0x01
1070129254Scognet	sublt	r2, r2, #0x01
1071129254Scognet	strltb	ip, [r3], #0x01
1072129254Scognet
1073129254Scognet	/* Destination buffer is now word aligned */
1074129254Scognet.Lmemcpy_wordaligned:
1075129254Scognet	ands	ip, r1, #0x03		/* Is src also word-aligned? */
1076129254Scognet	bne	.Lmemcpy_bad_align	/* Nope. Things just got bad */
1077129254Scognet
1078129254Scognet	/* Quad-align the destination buffer */
1079129254Scognet	tst	r3, #0x07		/* Already quad aligned? */
1080129254Scognet	ldrne	ip, [r1], #0x04
1081129254Scognet	stmfd	sp!, {r4-r9}		/* Free up some registers */
1082129254Scognet	subne	r2, r2, #0x04
1083129254Scognet	strne	ip, [r3], #0x04
1084129254Scognet
1085129254Scognet	/* Destination buffer quad aligned, source is at least word aligned */
1086129254Scognet	subs	r2, r2, #0x80
1087129254Scognet	blt	.Lmemcpy_w_lessthan128
1088129254Scognet
1089129254Scognet	/* Copy 128 bytes at a time */
1090129254Scognet.Lmemcpy_w_loop128:
1091129254Scognet	ldr	r4, [r1], #0x04		/* LD:00-03 */
1092129254Scognet	ldr	r5, [r1], #0x04		/* LD:04-07 */
1093129254Scognet	pld	[r1, #0x18]		/* Prefetch 0x20 */
1094129254Scognet	ldr	r6, [r1], #0x04		/* LD:08-0b */
1095129254Scognet	ldr	r7, [r1], #0x04		/* LD:0c-0f */
1096129254Scognet	ldr	r8, [r1], #0x04		/* LD:10-13 */
1097129254Scognet	ldr	r9, [r1], #0x04		/* LD:14-17 */
1098129254Scognet	strd	r4, [r3], #0x08		/* ST:00-07 */
1099129254Scognet	ldr	r4, [r1], #0x04		/* LD:18-1b */
1100129254Scognet	ldr	r5, [r1], #0x04		/* LD:1c-1f */
1101129254Scognet	strd	r6, [r3], #0x08		/* ST:08-0f */
1102129254Scognet	ldr	r6, [r1], #0x04		/* LD:20-23 */
1103129254Scognet	ldr	r7, [r1], #0x04		/* LD:24-27 */
1104129254Scognet	pld	[r1, #0x18]		/* Prefetch 0x40 */
1105129254Scognet	strd	r8, [r3], #0x08		/* ST:10-17 */
1106129254Scognet	ldr	r8, [r1], #0x04		/* LD:28-2b */
1107129254Scognet	ldr	r9, [r1], #0x04		/* LD:2c-2f */
1108129254Scognet	strd	r4, [r3], #0x08		/* ST:18-1f */
1109129254Scognet	ldr	r4, [r1], #0x04		/* LD:30-33 */
1110129254Scognet	ldr	r5, [r1], #0x04		/* LD:34-37 */
1111129254Scognet	strd	r6, [r3], #0x08		/* ST:20-27 */
1112129254Scognet	ldr	r6, [r1], #0x04		/* LD:38-3b */
1113129254Scognet	ldr	r7, [r1], #0x04		/* LD:3c-3f */
1114129254Scognet	strd	r8, [r3], #0x08		/* ST:28-2f */
1115129254Scognet	ldr	r8, [r1], #0x04		/* LD:40-43 */
1116129254Scognet	ldr	r9, [r1], #0x04		/* LD:44-47 */
1117129254Scognet	pld	[r1, #0x18]		/* Prefetch 0x60 */
1118129254Scognet	strd	r4, [r3], #0x08		/* ST:30-37 */
1119129254Scognet	ldr	r4, [r1], #0x04		/* LD:48-4b */
1120129254Scognet	ldr	r5, [r1], #0x04		/* LD:4c-4f */
1121129254Scognet	strd	r6, [r3], #0x08		/* ST:38-3f */
1122129254Scognet	ldr	r6, [r1], #0x04		/* LD:50-53 */
1123129254Scognet	ldr	r7, [r1], #0x04		/* LD:54-57 */
1124129254Scognet	strd	r8, [r3], #0x08		/* ST:40-47 */
1125129254Scognet	ldr	r8, [r1], #0x04		/* LD:58-5b */
1126129254Scognet	ldr	r9, [r1], #0x04		/* LD:5c-5f */
1127129254Scognet	strd	r4, [r3], #0x08		/* ST:48-4f */
1128129254Scognet	ldr	r4, [r1], #0x04		/* LD:60-63 */
1129129254Scognet	ldr	r5, [r1], #0x04		/* LD:64-67 */
1130129254Scognet	pld	[r1, #0x18]		/* Prefetch 0x80 */
1131129254Scognet	strd	r6, [r3], #0x08		/* ST:50-57 */
1132129254Scognet	ldr	r6, [r1], #0x04		/* LD:68-6b */
1133129254Scognet	ldr	r7, [r1], #0x04		/* LD:6c-6f */
1134129254Scognet	strd	r8, [r3], #0x08		/* ST:58-5f */
1135129254Scognet	ldr	r8, [r1], #0x04		/* LD:70-73 */
1136129254Scognet	ldr	r9, [r1], #0x04		/* LD:74-77 */
1137129254Scognet	strd	r4, [r3], #0x08		/* ST:60-67 */
1138129254Scognet	ldr	r4, [r1], #0x04		/* LD:78-7b */
1139129254Scognet	ldr	r5, [r1], #0x04		/* LD:7c-7f */
1140129254Scognet	strd	r6, [r3], #0x08		/* ST:68-6f */
1141129254Scognet	strd	r8, [r3], #0x08		/* ST:70-77 */
1142129254Scognet	subs	r2, r2, #0x80
1143129254Scognet	strd	r4, [r3], #0x08		/* ST:78-7f */
1144129254Scognet	bge	.Lmemcpy_w_loop128
1145129254Scognet
1146129254Scognet.Lmemcpy_w_lessthan128:
1147129254Scognet	adds	r2, r2, #0x80		/* Adjust for extra sub */
1148129254Scognet	ldmeqfd	sp!, {r4-r9}
1149137463Scognet	RETeq			/* Return now if done */
1150129254Scognet	subs	r2, r2, #0x20
1151129254Scognet	blt	.Lmemcpy_w_lessthan32
1152129254Scognet
1153129254Scognet	/* Copy 32 bytes at a time */
1154129254Scognet.Lmemcpy_w_loop32:
1155129254Scognet	ldr	r4, [r1], #0x04
1156129254Scognet	ldr	r5, [r1], #0x04
1157129254Scognet	pld	[r1, #0x18]
1158129254Scognet	ldr	r6, [r1], #0x04
1159129254Scognet	ldr	r7, [r1], #0x04
1160129254Scognet	ldr	r8, [r1], #0x04
1161129254Scognet	ldr	r9, [r1], #0x04
1162129254Scognet	strd	r4, [r3], #0x08
1163129254Scognet	ldr	r4, [r1], #0x04
1164129254Scognet	ldr	r5, [r1], #0x04
1165129254Scognet	strd	r6, [r3], #0x08
1166129254Scognet	strd	r8, [r3], #0x08
1167129254Scognet	subs	r2, r2, #0x20
1168129254Scognet	strd	r4, [r3], #0x08
1169129254Scognet	bge	.Lmemcpy_w_loop32
1170129254Scognet
1171129254Scognet.Lmemcpy_w_lessthan32:
1172129254Scognet	adds	r2, r2, #0x20		/* Adjust for extra sub */
1173129254Scognet	ldmeqfd	sp!, {r4-r9}
1174137463Scognet	RETeq			/* Return now if done */
1175129254Scognet
1176129254Scognet	and	r4, r2, #0x18
1177129254Scognet	rsbs	r4, r4, #0x18
1178129254Scognet	addne	pc, pc, r4, lsl #1
1179129254Scognet	nop
1180129254Scognet
1181129254Scognet	/* At least 24 bytes remaining */
1182129254Scognet	ldr	r4, [r1], #0x04
1183129254Scognet	ldr	r5, [r1], #0x04
1184129254Scognet	sub	r2, r2, #0x08
1185129254Scognet	strd	r4, [r3], #0x08
1186129254Scognet
1187129254Scognet	/* At least 16 bytes remaining */
1188129254Scognet	ldr	r4, [r1], #0x04
1189129254Scognet	ldr	r5, [r1], #0x04
1190129254Scognet	sub	r2, r2, #0x08
1191129254Scognet	strd	r4, [r3], #0x08
1192129254Scognet
1193129254Scognet	/* At least 8 bytes remaining */
1194129254Scognet	ldr	r4, [r1], #0x04
1195129254Scognet	ldr	r5, [r1], #0x04
1196129254Scognet	subs	r2, r2, #0x08
1197129254Scognet	strd	r4, [r3], #0x08
1198129254Scognet
1199129254Scognet	/* Less than 8 bytes remaining */
1200129254Scognet	ldmfd	sp!, {r4-r9}
1201137463Scognet	RETeq			/* Return now if done */
1202129254Scognet	subs	r2, r2, #0x04
1203129254Scognet	ldrge	ip, [r1], #0x04
1204129254Scognet	strge	ip, [r3], #0x04
1205137463Scognet	RETeq			/* Return now if done */
1206129254Scognet	addlt	r2, r2, #0x04
1207129254Scognet	ldrb	ip, [r1], #0x01
1208129254Scognet	cmp	r2, #0x02
1209129254Scognet	ldrgeb	r2, [r1], #0x01
1210129254Scognet	strb	ip, [r3], #0x01
1211129254Scognet	ldrgtb	ip, [r1]
1212129254Scognet	strgeb	r2, [r3], #0x01
1213129254Scognet	strgtb	ip, [r3]
1214137463Scognet	RET
1215129254Scognet
1216129254Scognet
1217129254Scognet/*
1218129254Scognet * At this point, it has not been possible to word align both buffers.
1219129254Scognet * The destination buffer is word aligned, but the source buffer is not.
1220129254Scognet */
1221129254Scognet.Lmemcpy_bad_align:
1222129254Scognet	stmfd	sp!, {r4-r7}
1223129254Scognet	bic	r1, r1, #0x03
1224129254Scognet	cmp	ip, #2
1225129254Scognet	ldr	ip, [r1], #0x04
1226129254Scognet	bgt	.Lmemcpy_bad3
1227129254Scognet	beq	.Lmemcpy_bad2
1228129254Scognet	b	.Lmemcpy_bad1
1229129254Scognet
1230129254Scognet.Lmemcpy_bad1_loop16:
1231129254Scognet#ifdef __ARMEB__
1232129254Scognet	mov	r4, ip, lsl #8
1233129254Scognet#else
1234129254Scognet	mov	r4, ip, lsr #8
1235129254Scognet#endif
1236129254Scognet	ldr	r5, [r1], #0x04
1237129254Scognet	pld	[r1, #0x018]
1238129254Scognet	ldr	r6, [r1], #0x04
1239129254Scognet	ldr	r7, [r1], #0x04
1240129254Scognet	ldr	ip, [r1], #0x04
1241129254Scognet#ifdef __ARMEB__
1242129254Scognet	orr	r4, r4, r5, lsr #24
1243129254Scognet	mov	r5, r5, lsl #8
1244129254Scognet	orr	r5, r5, r6, lsr #24
1245129254Scognet	mov	r6, r6, lsl #8
1246129254Scognet	orr	r6, r6, r7, lsr #24
1247129254Scognet	mov	r7, r7, lsl #8
1248129254Scognet	orr	r7, r7, ip, lsr #24
1249129254Scognet#else
1250129254Scognet	orr	r4, r4, r5, lsl #24
1251129254Scognet	mov	r5, r5, lsr #8
1252129254Scognet	orr	r5, r5, r6, lsl #24
1253129254Scognet	mov	r6, r6, lsr #8
1254129254Scognet	orr	r6, r6, r7, lsl #24
1255129254Scognet	mov	r7, r7, lsr #8
1256129254Scognet	orr	r7, r7, ip, lsl #24
1257129254Scognet#endif
1258129254Scognet	str	r4, [r3], #0x04
1259129254Scognet	str	r5, [r3], #0x04
1260129254Scognet	str	r6, [r3], #0x04
1261129254Scognet	str	r7, [r3], #0x04
1262129254Scognet.Lmemcpy_bad1:
1263129254Scognet	subs	r2, r2, #0x10
1264129254Scognet	bge	.Lmemcpy_bad1_loop16
1265129254Scognet
1266129254Scognet	adds	r2, r2, #0x10
1267129254Scognet	ldmeqfd	sp!, {r4-r7}
1268137463Scognet	RETeq			/* Return now if done */
1269129254Scognet	subs	r2, r2, #0x04
1270129254Scognet	sublt	r1, r1, #0x03
1271129254Scognet	blt	.Lmemcpy_bad_done
1272129254Scognet
1273129254Scognet.Lmemcpy_bad1_loop4:
1274129254Scognet#ifdef __ARMEB__
1275129254Scognet	mov	r4, ip, lsl #8
1276129254Scognet#else
1277129254Scognet	mov	r4, ip, lsr #8
1278129254Scognet#endif
1279129254Scognet	ldr	ip, [r1], #0x04
1280129254Scognet	subs	r2, r2, #0x04
1281129254Scognet#ifdef __ARMEB__
1282129254Scognet	orr	r4, r4, ip, lsr #24
1283129254Scognet#else
1284129254Scognet	orr	r4, r4, ip, lsl #24
1285129254Scognet#endif
1286129254Scognet	str	r4, [r3], #0x04
1287129254Scognet	bge	.Lmemcpy_bad1_loop4
1288129254Scognet	sub	r1, r1, #0x03
1289129254Scognet	b	.Lmemcpy_bad_done
1290129254Scognet
1291129254Scognet.Lmemcpy_bad2_loop16:
1292129254Scognet#ifdef __ARMEB__
1293129254Scognet	mov	r4, ip, lsl #16
1294129254Scognet#else
1295129254Scognet	mov	r4, ip, lsr #16
1296129254Scognet#endif
1297129254Scognet	ldr	r5, [r1], #0x04
1298129254Scognet	pld	[r1, #0x018]
1299129254Scognet	ldr	r6, [r1], #0x04
1300129254Scognet	ldr	r7, [r1], #0x04
1301129254Scognet	ldr	ip, [r1], #0x04
1302129254Scognet#ifdef __ARMEB__
1303129254Scognet	orr	r4, r4, r5, lsr #16
1304129254Scognet	mov	r5, r5, lsl #16
1305129254Scognet	orr	r5, r5, r6, lsr #16
1306129254Scognet	mov	r6, r6, lsl #16
1307129254Scognet	orr	r6, r6, r7, lsr #16
1308129254Scognet	mov	r7, r7, lsl #16
1309129254Scognet	orr	r7, r7, ip, lsr #16
1310129254Scognet#else
1311129254Scognet	orr	r4, r4, r5, lsl #16
1312129254Scognet	mov	r5, r5, lsr #16
1313129254Scognet	orr	r5, r5, r6, lsl #16
1314129254Scognet	mov	r6, r6, lsr #16
1315129254Scognet	orr	r6, r6, r7, lsl #16
1316129254Scognet	mov	r7, r7, lsr #16
1317129254Scognet	orr	r7, r7, ip, lsl #16
1318129254Scognet#endif
1319129254Scognet	str	r4, [r3], #0x04
1320129254Scognet	str	r5, [r3], #0x04
1321129254Scognet	str	r6, [r3], #0x04
1322129254Scognet	str	r7, [r3], #0x04
1323129254Scognet.Lmemcpy_bad2:
1324129254Scognet	subs	r2, r2, #0x10
1325129254Scognet	bge	.Lmemcpy_bad2_loop16
1326129254Scognet
1327129254Scognet	adds	r2, r2, #0x10
1328129254Scognet	ldmeqfd	sp!, {r4-r7}
1329137463Scognet	RETeq			/* Return now if done */
1330129254Scognet	subs	r2, r2, #0x04
1331129254Scognet	sublt	r1, r1, #0x02
1332129254Scognet	blt	.Lmemcpy_bad_done
1333129254Scognet
1334129254Scognet.Lmemcpy_bad2_loop4:
1335129254Scognet#ifdef __ARMEB__
1336129254Scognet	mov	r4, ip, lsl #16
1337129254Scognet#else
1338129254Scognet	mov	r4, ip, lsr #16
1339129254Scognet#endif
1340129254Scognet	ldr	ip, [r1], #0x04
1341129254Scognet	subs	r2, r2, #0x04
1342129254Scognet#ifdef __ARMEB__
1343129254Scognet	orr	r4, r4, ip, lsr #16
1344129254Scognet#else
1345129254Scognet	orr	r4, r4, ip, lsl #16
1346129254Scognet#endif
1347129254Scognet	str	r4, [r3], #0x04
1348129254Scognet	bge	.Lmemcpy_bad2_loop4
1349129254Scognet	sub	r1, r1, #0x02
1350129254Scognet	b	.Lmemcpy_bad_done
1351129254Scognet
1352129254Scognet.Lmemcpy_bad3_loop16:
1353129254Scognet#ifdef __ARMEB__
1354129254Scognet	mov	r4, ip, lsl #24
1355129254Scognet#else
1356129254Scognet	mov	r4, ip, lsr #24
1357129254Scognet#endif
1358129254Scognet	ldr	r5, [r1], #0x04
1359129254Scognet	pld	[r1, #0x018]
1360129254Scognet	ldr	r6, [r1], #0x04
1361129254Scognet	ldr	r7, [r1], #0x04
1362129254Scognet	ldr	ip, [r1], #0x04
1363129254Scognet#ifdef __ARMEB__
1364129254Scognet	orr	r4, r4, r5, lsr #8
1365129254Scognet	mov	r5, r5, lsl #24
1366129254Scognet	orr	r5, r5, r6, lsr #8
1367129254Scognet	mov	r6, r6, lsl #24
1368129254Scognet	orr	r6, r6, r7, lsr #8
1369129254Scognet	mov	r7, r7, lsl #24
1370129254Scognet	orr	r7, r7, ip, lsr #8
1371129254Scognet#else
1372129254Scognet	orr	r4, r4, r5, lsl #8
1373129254Scognet	mov	r5, r5, lsr #24
1374129254Scognet	orr	r5, r5, r6, lsl #8
1375129254Scognet	mov	r6, r6, lsr #24
1376129254Scognet	orr	r6, r6, r7, lsl #8
1377129254Scognet	mov	r7, r7, lsr #24
1378129254Scognet	orr	r7, r7, ip, lsl #8
1379129254Scognet#endif
1380129254Scognet	str	r4, [r3], #0x04
1381129254Scognet	str	r5, [r3], #0x04
1382129254Scognet	str	r6, [r3], #0x04
1383129254Scognet	str	r7, [r3], #0x04
1384129254Scognet.Lmemcpy_bad3:
1385129254Scognet	subs	r2, r2, #0x10
1386129254Scognet	bge	.Lmemcpy_bad3_loop16
1387129254Scognet
1388129254Scognet	adds	r2, r2, #0x10
1389129254Scognet	ldmeqfd	sp!, {r4-r7}
1390137463Scognet	RETeq			/* Return now if done */
1391129254Scognet	subs	r2, r2, #0x04
1392129254Scognet	sublt	r1, r1, #0x01
1393129254Scognet	blt	.Lmemcpy_bad_done
1394129254Scognet
1395129254Scognet.Lmemcpy_bad3_loop4:
1396129254Scognet#ifdef __ARMEB__
1397129254Scognet	mov	r4, ip, lsl #24
1398129254Scognet#else
1399129254Scognet	mov	r4, ip, lsr #24
1400129254Scognet#endif
1401129254Scognet	ldr	ip, [r1], #0x04
1402129254Scognet	subs	r2, r2, #0x04
1403129254Scognet#ifdef __ARMEB__
1404129254Scognet	orr	r4, r4, ip, lsr #8
1405129254Scognet#else
1406129254Scognet	orr	r4, r4, ip, lsl #8
1407129254Scognet#endif
1408129254Scognet	str	r4, [r3], #0x04
1409129254Scognet	bge	.Lmemcpy_bad3_loop4
1410129254Scognet	sub	r1, r1, #0x01
1411129254Scognet
1412129254Scognet.Lmemcpy_bad_done:
1413129254Scognet	ldmfd	sp!, {r4-r7}
1414129254Scognet	adds	r2, r2, #0x04
1415137463Scognet	RETeq
1416129254Scognet	ldrb	ip, [r1], #0x01
1417129254Scognet	cmp	r2, #0x02
1418129254Scognet	ldrgeb	r2, [r1], #0x01
1419129254Scognet	strb	ip, [r3], #0x01
1420129254Scognet	ldrgtb	ip, [r1]
1421129254Scognet	strgeb	r2, [r3], #0x01
1422129254Scognet	strgtb	ip, [r3]
1423137463Scognet	RET
1424129254Scognet
1425129254Scognet
1426129254Scognet/*
1427129254Scognet * Handle short copies (less than 16 bytes), possibly misaligned.
1428129254Scognet * Some of these are *very* common, thanks to the network stack,
1429129254Scognet * and so are handled specially.
1430129254Scognet */
1431129254Scognet.Lmemcpy_short:
1432129254Scognet	add	pc, pc, r2, lsl #2
1433129254Scognet	nop
1434137463Scognet	RET			/* 0x00 */
1435129254Scognet	b	.Lmemcpy_bytewise	/* 0x01 */
1436129254Scognet	b	.Lmemcpy_bytewise	/* 0x02 */
1437129254Scognet	b	.Lmemcpy_bytewise	/* 0x03 */
1438129254Scognet	b	.Lmemcpy_4		/* 0x04 */
1439129254Scognet	b	.Lmemcpy_bytewise	/* 0x05 */
1440129254Scognet	b	.Lmemcpy_6		/* 0x06 */
1441129254Scognet	b	.Lmemcpy_bytewise	/* 0x07 */
1442129254Scognet	b	.Lmemcpy_8		/* 0x08 */
1443129254Scognet	b	.Lmemcpy_bytewise	/* 0x09 */
1444129254Scognet	b	.Lmemcpy_bytewise	/* 0x0a */
1445129254Scognet	b	.Lmemcpy_bytewise	/* 0x0b */
1446129254Scognet	b	.Lmemcpy_c		/* 0x0c */
1447129254Scognet.Lmemcpy_bytewise:
1448129254Scognet	mov	r3, r0			/* We must not clobber r0 */
1449129254Scognet	ldrb	ip, [r1], #0x01
1450129254Scognet1:	subs	r2, r2, #0x01
1451129254Scognet	strb	ip, [r3], #0x01
1452129254Scognet	ldrneb	ip, [r1], #0x01
1453129254Scognet	bne	1b
1454137463Scognet	RET
1455129254Scognet
1456129254Scognet/******************************************************************************
1457129254Scognet * Special case for 4 byte copies
1458129254Scognet */
1459129254Scognet#define	LMEMCPY_4_LOG2	6	/* 64 bytes */
1460129254Scognet#define	LMEMCPY_4_PAD	.align LMEMCPY_4_LOG2
1461129254Scognet	LMEMCPY_4_PAD
1462129254Scognet.Lmemcpy_4:
1463129254Scognet	and	r2, r1, #0x03
1464129254Scognet	orr	r2, r2, r0, lsl #2
1465129254Scognet	ands	r2, r2, #0x0f
1466129254Scognet	sub	r3, pc, #0x14
1467129254Scognet	addne	pc, r3, r2, lsl #LMEMCPY_4_LOG2
1468129254Scognet
1469129254Scognet/*
1470129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned
1471129254Scognet */
1472129254Scognet	ldr	r2, [r1]
1473129254Scognet	str	r2, [r0]
1474137463Scognet	RET
1475129254Scognet	LMEMCPY_4_PAD
1476129254Scognet
1477129254Scognet/*
1478129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned
1479129254Scognet */
1480129254Scognet	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
1481129254Scognet	ldr	r2, [r1, #3]		/* BE:r2 = 3xxx  LE:r2 = xxx3 */
1482129254Scognet#ifdef __ARMEB__
1483129254Scognet	mov	r3, r3, lsl #8		/* r3 = 012. */
1484129254Scognet	orr	r3, r3, r2, lsr #24	/* r3 = 0123 */
1485129254Scognet#else
1486129254Scognet	mov	r3, r3, lsr #8		/* r3 = .210 */
1487129254Scognet	orr	r3, r3, r2, lsl #24	/* r3 = 3210 */
1488129254Scognet#endif
1489129254Scognet	str	r3, [r0]
1490137463Scognet	RET
1491129254Scognet	LMEMCPY_4_PAD
1492129254Scognet
1493129254Scognet/*
1494129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned
1495129254Scognet */
1496129254Scognet#ifdef __ARMEB__
1497129254Scognet	ldrh	r3, [r1]
1498129254Scognet	ldrh	r2, [r1, #0x02]
1499129254Scognet#else
1500129254Scognet	ldrh	r3, [r1, #0x02]
1501129254Scognet	ldrh	r2, [r1]
1502129254Scognet#endif
1503129254Scognet	orr	r3, r2, r3, lsl #16
1504129254Scognet	str	r3, [r0]
1505137463Scognet	RET
1506129254Scognet	LMEMCPY_4_PAD
1507129254Scognet
1508129254Scognet/*
1509129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned
1510129254Scognet */
1511129254Scognet	ldr	r3, [r1, #-3]		/* BE:r3 = xxx0  LE:r3 = 0xxx */
1512129254Scognet	ldr	r2, [r1, #1]		/* BE:r2 = 123x  LE:r2 = x321 */
1513129254Scognet#ifdef __ARMEB__
1514129254Scognet	mov	r3, r3, lsl #24		/* r3 = 0... */
1515129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 0123 */
1516129254Scognet#else
1517129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...0 */
1518129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
1519129254Scognet#endif
1520129254Scognet	str	r3, [r0]
1521137463Scognet	RET
1522129254Scognet	LMEMCPY_4_PAD
1523129254Scognet
1524129254Scognet/*
1525129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned
1526129254Scognet */
1527129254Scognet	ldr	r2, [r1]
1528129254Scognet#ifdef __ARMEB__
1529129254Scognet	strb	r2, [r0, #0x03]
1530129254Scognet	mov	r3, r2, lsr #8
1531129254Scognet	mov	r1, r2, lsr #24
1532129254Scognet	strb	r1, [r0]
1533129254Scognet#else
1534129254Scognet	strb	r2, [r0]
1535129254Scognet	mov	r3, r2, lsr #8
1536129254Scognet	mov	r1, r2, lsr #24
1537129254Scognet	strb	r1, [r0, #0x03]
1538129254Scognet#endif
1539129254Scognet	strh	r3, [r0, #0x01]
1540137463Scognet	RET
1541129254Scognet	LMEMCPY_4_PAD
1542129254Scognet
1543129254Scognet/*
1544129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned
1545129254Scognet */
1546129254Scognet	ldrb	r2, [r1]
1547129254Scognet	ldrh	r3, [r1, #0x01]
1548129254Scognet	ldrb	r1, [r1, #0x03]
1549129254Scognet	strb	r2, [r0]
1550129254Scognet	strh	r3, [r0, #0x01]
1551129254Scognet	strb	r1, [r0, #0x03]
1552137463Scognet	RET
1553129254Scognet	LMEMCPY_4_PAD
1554129254Scognet
1555129254Scognet/*
1556129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned
1557129254Scognet */
1558129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1559129254Scognet	ldrh	r3, [r1, #0x02]		/* LE:r3 = ..23  LE:r3 = ..32 */
1560129254Scognet#ifdef __ARMEB__
1561129254Scognet	mov	r1, r2, lsr #8		/* r1 = ...0 */
1562129254Scognet	strb	r1, [r0]
1563129254Scognet	mov	r2, r2, lsl #8		/* r2 = .01. */
1564129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = .012 */
1565129254Scognet#else
1566129254Scognet	strb	r2, [r0]
1567129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
1568129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = .321 */
1569129254Scognet	mov	r3, r3, lsr #8		/* r3 = ...3 */
1570129254Scognet#endif
1571129254Scognet	strh	r2, [r0, #0x01]
1572129254Scognet	strb	r3, [r0, #0x03]
1573137463Scognet	RET
1574129254Scognet	LMEMCPY_4_PAD
1575129254Scognet
1576129254Scognet/*
1577129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned
1578129254Scognet */
1579129254Scognet	ldrb	r2, [r1]
1580129254Scognet	ldrh	r3, [r1, #0x01]
1581129254Scognet	ldrb	r1, [r1, #0x03]
1582129254Scognet	strb	r2, [r0]
1583129254Scognet	strh	r3, [r0, #0x01]
1584129254Scognet	strb	r1, [r0, #0x03]
1585137463Scognet	RET
1586129254Scognet	LMEMCPY_4_PAD
1587129254Scognet
1588129254Scognet/*
1589129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned
1590129254Scognet */
1591129254Scognet	ldr	r2, [r1]
1592129254Scognet#ifdef __ARMEB__
1593129254Scognet	strh	r2, [r0, #0x02]
1594129254Scognet	mov	r3, r2, lsr #16
1595129254Scognet	strh	r3, [r0]
1596129254Scognet#else
1597129254Scognet	strh	r2, [r0]
1598129254Scognet	mov	r3, r2, lsr #16
1599129254Scognet	strh	r3, [r0, #0x02]
1600129254Scognet#endif
1601137463Scognet	RET
1602129254Scognet	LMEMCPY_4_PAD
1603129254Scognet
1604129254Scognet/*
1605129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned
1606129254Scognet */
1607129254Scognet	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
1608129254Scognet	ldr	r3, [r1, #3]		/* BE:r3 = 3xxx  LE:r3 = xxx3 */
1609129254Scognet	mov	r1, r2, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
1610129254Scognet	strh	r1, [r0]
1611129254Scognet#ifdef __ARMEB__
1612129254Scognet	mov	r2, r2, lsl #8		/* r2 = 012. */
1613129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = 0123 */
1614129254Scognet#else
1615129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...2 */
1616129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = xx32 */
1617129254Scognet#endif
1618129254Scognet	strh	r2, [r0, #0x02]
1619137463Scognet	RET
1620129254Scognet	LMEMCPY_4_PAD
1621129254Scognet
1622129254Scognet/*
1623129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned
1624129254Scognet */
1625129254Scognet	ldrh	r2, [r1]
1626129254Scognet	ldrh	r3, [r1, #0x02]
1627129254Scognet	strh	r2, [r0]
1628129254Scognet	strh	r3, [r0, #0x02]
1629137463Scognet	RET
1630129254Scognet	LMEMCPY_4_PAD
1631129254Scognet
1632129254Scognet/*
1633129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned
1634129254Scognet */
1635129254Scognet	ldr	r3, [r1, #1]		/* BE:r3 = 123x  LE:r3 = x321 */
1636129254Scognet	ldr	r2, [r1, #-3]		/* BE:r2 = xxx0  LE:r2 = 0xxx */
1637129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .123  LE:r1 = .x32 */
1638129254Scognet	strh	r1, [r0, #0x02]
1639129254Scognet#ifdef __ARMEB__
1640129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...1 */
1641129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = xx01 */
1642129254Scognet#else
1643129254Scognet	mov	r3, r3, lsl #8		/* r3 = 321. */
1644129254Scognet	orr	r3, r3, r2, lsr #24	/* r3 = 3210 */
1645129254Scognet#endif
1646129254Scognet	strh	r3, [r0]
1647137463Scognet	RET
1648129254Scognet	LMEMCPY_4_PAD
1649129254Scognet
1650129254Scognet/*
1651129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned
1652129254Scognet */
1653129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
1654129254Scognet#ifdef __ARMEB__
1655129254Scognet	strb	r2, [r0, #0x03]
1656129254Scognet	mov	r3, r2, lsr #8
1657129254Scognet	mov	r1, r2, lsr #24
1658129254Scognet	strh	r3, [r0, #0x01]
1659129254Scognet	strb	r1, [r0]
1660129254Scognet#else
1661129254Scognet	strb	r2, [r0]
1662129254Scognet	mov	r3, r2, lsr #8
1663129254Scognet	mov	r1, r2, lsr #24
1664129254Scognet	strh	r3, [r0, #0x01]
1665129254Scognet	strb	r1, [r0, #0x03]
1666129254Scognet#endif
1667137463Scognet	RET
1668129254Scognet	LMEMCPY_4_PAD
1669129254Scognet
1670129254Scognet/*
1671129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned
1672129254Scognet */
1673129254Scognet	ldrb	r2, [r1]
1674129254Scognet	ldrh	r3, [r1, #0x01]
1675129254Scognet	ldrb	r1, [r1, #0x03]
1676129254Scognet	strb	r2, [r0]
1677129254Scognet	strh	r3, [r0, #0x01]
1678129254Scognet	strb	r1, [r0, #0x03]
1679137463Scognet	RET
1680129254Scognet	LMEMCPY_4_PAD
1681129254Scognet
1682129254Scognet/*
1683129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned
1684129254Scognet */
1685129254Scognet#ifdef __ARMEB__
1686129254Scognet	ldrh	r3, [r1, #0x02]		/* BE:r3 = ..23  LE:r3 = ..32 */
1687129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1688129254Scognet	strb	r3, [r0, #0x03]
1689129254Scognet	mov	r3, r3, lsr #8		/* r3 = ...2 */
1690129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = ..12 */
1691129254Scognet	strh	r3, [r0, #0x01]
1692129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...0 */
1693129254Scognet	strb	r2, [r0]
1694129254Scognet#else
1695129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1696129254Scognet	ldrh	r3, [r1, #0x02]		/* BE:r3 = ..23  LE:r3 = ..32 */
1697129254Scognet	strb	r2, [r0]
1698129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
1699129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = .321 */
1700129254Scognet	strh	r2, [r0, #0x01]
1701129254Scognet	mov	r3, r3, lsr #8		/* r3 = ...3 */
1702129254Scognet	strb	r3, [r0, #0x03]
1703129254Scognet#endif
1704137463Scognet	RET
1705129254Scognet	LMEMCPY_4_PAD
1706129254Scognet
1707129254Scognet/*
1708129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned
1709129254Scognet */
1710129254Scognet	ldrb	r2, [r1]
1711129254Scognet	ldrh	r3, [r1, #0x01]
1712129254Scognet	ldrb	r1, [r1, #0x03]
1713129254Scognet	strb	r2, [r0]
1714129254Scognet	strh	r3, [r0, #0x01]
1715129254Scognet	strb	r1, [r0, #0x03]
1716137463Scognet	RET
1717129254Scognet	LMEMCPY_4_PAD
1718129254Scognet
1719129254Scognet
1720129254Scognet/******************************************************************************
1721129254Scognet * Special case for 6 byte copies
1722129254Scognet */
1723129254Scognet#define	LMEMCPY_6_LOG2	6	/* 64 bytes */
1724129254Scognet#define	LMEMCPY_6_PAD	.align LMEMCPY_6_LOG2
1725129254Scognet	LMEMCPY_6_PAD
1726129254Scognet.Lmemcpy_6:
1727129254Scognet	and	r2, r1, #0x03
1728129254Scognet	orr	r2, r2, r0, lsl #2
1729129254Scognet	ands	r2, r2, #0x0f
1730129254Scognet	sub	r3, pc, #0x14
1731129254Scognet	addne	pc, r3, r2, lsl #LMEMCPY_6_LOG2
1732129254Scognet
1733129254Scognet/*
1734129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned
1735129254Scognet */
1736129254Scognet	ldr	r2, [r1]
1737129254Scognet	ldrh	r3, [r1, #0x04]
1738129254Scognet	str	r2, [r0]
1739129254Scognet	strh	r3, [r0, #0x04]
1740137463Scognet	RET
1741129254Scognet	LMEMCPY_6_PAD
1742129254Scognet
1743129254Scognet/*
1744129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned
1745129254Scognet */
1746129254Scognet	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
1747129254Scognet	ldr	r3, [r1, #0x03]		/* BE:r3 = 345x  LE:r3 = x543 */
1748129254Scognet#ifdef __ARMEB__
1749129254Scognet	mov	r2, r2, lsl #8		/* r2 = 012. */
1750129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = 0123 */
1751129254Scognet#else
1752129254Scognet	mov	r2, r2, lsr #8		/* r2 = .210 */
1753129254Scognet	orr	r2, r2, r3, lsl #24	/* r2 = 3210 */
1754129254Scognet#endif
1755129254Scognet	mov	r3, r3, lsr #8		/* BE:r3 = .345  LE:r3 = .x54 */
1756129254Scognet	str	r2, [r0]
1757129254Scognet	strh	r3, [r0, #0x04]
1758137463Scognet	RET
1759129254Scognet	LMEMCPY_6_PAD
1760129254Scognet
1761129254Scognet/*
1762129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned
1763129254Scognet */
1764129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
1765129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1766129254Scognet#ifdef __ARMEB__
1767129254Scognet	mov	r1, r3, lsr #16		/* r1 = ..23 */
1768129254Scognet	orr	r1, r1, r2, lsl #16	/* r1 = 0123 */
1769129254Scognet	str	r1, [r0]
1770129254Scognet	strh	r3, [r0, #0x04]
1771129254Scognet#else
1772129254Scognet	mov	r1, r3, lsr #16		/* r1 = ..54 */
1773129254Scognet	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
1774129254Scognet	str	r2, [r0]
1775129254Scognet	strh	r1, [r0, #0x04]
1776129254Scognet#endif
1777137463Scognet	RET
1778129254Scognet	LMEMCPY_6_PAD
1779129254Scognet
1780129254Scognet/*
1781129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned
1782129254Scognet */
1783129254Scognet	ldr	r2, [r1, #-3]		/* BE:r2 = xxx0  LE:r2 = 0xxx */
1784129254Scognet	ldr	r3, [r1, #1]		/* BE:r3 = 1234  LE:r3 = 4321 */
1785129254Scognet	ldr	r1, [r1, #5]		/* BE:r1 = 5xxx  LE:r3 = xxx5 */
1786129254Scognet#ifdef __ARMEB__
1787129254Scognet	mov	r2, r2, lsl #24		/* r2 = 0... */
1788129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 0123 */
1789129254Scognet	mov	r3, r3, lsl #8		/* r3 = 234. */
1790129254Scognet	orr	r1, r3, r1, lsr #24	/* r1 = 2345 */
1791129254Scognet#else
1792129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...0 */
1793129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 3210 */
1794129254Scognet	mov	r1, r1, lsl #8		/* r1 = xx5. */
1795129254Scognet	orr	r1, r1, r3, lsr #24	/* r1 = xx54 */
1796129254Scognet#endif
1797129254Scognet	str	r2, [r0]
1798129254Scognet	strh	r1, [r0, #0x04]
1799137463Scognet	RET
1800129254Scognet	LMEMCPY_6_PAD
1801129254Scognet
1802129254Scognet/*
1803129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned
1804129254Scognet */
1805129254Scognet	ldr	r3, [r1]		/* BE:r3 = 0123  LE:r3 = 3210 */
1806129254Scognet	ldrh	r2, [r1, #0x04]		/* BE:r2 = ..45  LE:r2 = ..54 */
1807129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .012  LE:r1 = .321 */
1808129254Scognet	strh	r1, [r0, #0x01]
1809129254Scognet#ifdef __ARMEB__
1810129254Scognet	mov	r1, r3, lsr #24		/* r1 = ...0 */
1811129254Scognet	strb	r1, [r0]
1812129254Scognet	mov	r3, r3, lsl #8		/* r3 = 123. */
1813129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 1234 */
1814129254Scognet#else
1815129254Scognet	strb	r3, [r0]
1816129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...3 */
1817129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = .543 */
1818129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...5 */
1819129254Scognet#endif
1820129254Scognet	strh	r3, [r0, #0x03]
1821129254Scognet	strb	r2, [r0, #0x05]
1822137463Scognet	RET
1823129254Scognet	LMEMCPY_6_PAD
1824129254Scognet
1825129254Scognet/*
1826129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned
1827129254Scognet */
1828129254Scognet	ldrb	r2, [r1]
1829129254Scognet	ldrh	r3, [r1, #0x01]
1830129254Scognet	ldrh	ip, [r1, #0x03]
1831129254Scognet	ldrb	r1, [r1, #0x05]
1832129254Scognet	strb	r2, [r0]
1833129254Scognet	strh	r3, [r0, #0x01]
1834129254Scognet	strh	ip, [r0, #0x03]
1835129254Scognet	strb	r1, [r0, #0x05]
1836137463Scognet	RET
1837129254Scognet	LMEMCPY_6_PAD
1838129254Scognet
1839129254Scognet/*
1840129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned
1841129254Scognet */
1842129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1843129254Scognet	ldr	r1, [r1, #0x02]		/* BE:r1 = 2345  LE:r1 = 5432 */
1844129254Scognet#ifdef __ARMEB__
1845129254Scognet	mov	r3, r2, lsr #8		/* r3 = ...0 */
1846129254Scognet	strb	r3, [r0]
1847129254Scognet	strb	r1, [r0, #0x05]
1848129254Scognet	mov	r3, r1, lsr #8		/* r3 = .234 */
1849129254Scognet	strh	r3, [r0, #0x03]
1850129254Scognet	mov	r3, r2, lsl #8		/* r3 = .01. */
1851129254Scognet	orr	r3, r3, r1, lsr #24	/* r3 = .012 */
1852129254Scognet	strh	r3, [r0, #0x01]
1853129254Scognet#else
1854129254Scognet	strb	r2, [r0]
1855129254Scognet	mov	r3, r1, lsr #24
1856129254Scognet	strb	r3, [r0, #0x05]
1857129254Scognet	mov	r3, r1, lsr #8		/* r3 = .543 */
1858129254Scognet	strh	r3, [r0, #0x03]
1859129254Scognet	mov	r3, r2, lsr #8		/* r3 = ...1 */
1860129254Scognet	orr	r3, r3, r1, lsl #8	/* r3 = 4321 */
1861129254Scognet	strh	r3, [r0, #0x01]
1862129254Scognet#endif
1863137463Scognet	RET
1864129254Scognet	LMEMCPY_6_PAD
1865129254Scognet
1866129254Scognet/*
1867129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned
1868129254Scognet */
1869129254Scognet	ldrb	r2, [r1]
1870129254Scognet	ldrh	r3, [r1, #0x01]
1871129254Scognet	ldrh	ip, [r1, #0x03]
1872129254Scognet	ldrb	r1, [r1, #0x05]
1873129254Scognet	strb	r2, [r0]
1874129254Scognet	strh	r3, [r0, #0x01]
1875129254Scognet	strh	ip, [r0, #0x03]
1876129254Scognet	strb	r1, [r0, #0x05]
1877137463Scognet	RET
1878129254Scognet	LMEMCPY_6_PAD
1879129254Scognet
1880129254Scognet/*
1881129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned
1882129254Scognet */
1883129254Scognet#ifdef __ARMEB__
1884129254Scognet	ldr	r2, [r1]		/* r2 = 0123 */
1885129254Scognet	ldrh	r3, [r1, #0x04]		/* r3 = ..45 */
1886129254Scognet	mov	r1, r2, lsr #16		/* r1 = ..01 */
1887129254Scognet	orr	r3, r3, r2, lsl#16	/* r3 = 2345 */
1888129254Scognet	strh	r1, [r0]
1889129254Scognet	str	r3, [r0, #0x02]
1890129254Scognet#else
1891129254Scognet	ldrh	r2, [r1, #0x04]		/* r2 = ..54 */
1892129254Scognet	ldr	r3, [r1]		/* r3 = 3210 */
1893129254Scognet	mov	r2, r2, lsl #16		/* r2 = 54.. */
1894129254Scognet	orr	r2, r2, r3, lsr #16	/* r2 = 5432 */
1895129254Scognet	strh	r3, [r0]
1896129254Scognet	str	r2, [r0, #0x02]
1897129254Scognet#endif
1898137463Scognet	RET
1899129254Scognet	LMEMCPY_6_PAD
1900129254Scognet
1901129254Scognet/*
1902129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned
1903129254Scognet */
1904129254Scognet	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
1905129254Scognet	ldr	r2, [r1, #3]		/* BE:r2 = 345x  LE:r2 = x543 */
1906129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
1907129254Scognet#ifdef __ARMEB__
1908129254Scognet	mov	r2, r2, lsr #8		/* r2 = .345 */
1909129254Scognet	orr	r2, r2, r3, lsl #24	/* r2 = 2345 */
1910129254Scognet#else
1911129254Scognet	mov	r2, r2, lsl #8		/* r2 = 543. */
1912129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = 5432 */
1913129254Scognet#endif
1914129254Scognet	strh	r1, [r0]
1915129254Scognet	str	r2, [r0, #0x02]
1916137463Scognet	RET
1917129254Scognet	LMEMCPY_6_PAD
1918129254Scognet
1919129254Scognet/*
1920129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned
1921129254Scognet */
1922129254Scognet	ldrh	r2, [r1]
1923129254Scognet	ldr	r3, [r1, #0x02]
1924129254Scognet	strh	r2, [r0]
1925129254Scognet	str	r3, [r0, #0x02]
1926137463Scognet	RET
1927129254Scognet	LMEMCPY_6_PAD
1928129254Scognet
1929129254Scognet/*
1930129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned
1931129254Scognet */
1932129254Scognet	ldrb	r3, [r1]		/* r3 = ...0 */
1933129254Scognet	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
1934129254Scognet	ldrb	r1, [r1, #0x05]		/* r1 = ...5 */
1935129254Scognet#ifdef __ARMEB__
1936129254Scognet	mov	r3, r3, lsl #8		/* r3 = ..0. */
1937129254Scognet	orr	r3, r3, r2, lsr #24	/* r3 = ..01 */
1938129254Scognet	orr	r1, r1, r2, lsl #8	/* r1 = 2345 */
1939129254Scognet#else
1940129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
1941129254Scognet	mov	r1, r1, lsl #24		/* r1 = 5... */
1942129254Scognet	orr	r1, r1, r2, lsr #8	/* r1 = 5432 */
1943129254Scognet#endif
1944129254Scognet	strh	r3, [r0]
1945129254Scognet	str	r1, [r0, #0x02]
1946137463Scognet	RET
1947129254Scognet	LMEMCPY_6_PAD
1948129254Scognet
1949129254Scognet/*
1950129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned
1951129254Scognet */
1952129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
1953129254Scognet	ldrh	r1, [r1, #0x04]		/* BE:r1 = ..45  LE:r1 = ..54 */
1954129254Scognet#ifdef __ARMEB__
1955129254Scognet	mov	r3, r2, lsr #24		/* r3 = ...0 */
1956129254Scognet	strb	r3, [r0]
1957129254Scognet	mov	r2, r2, lsl #8		/* r2 = 123. */
1958129254Scognet	orr	r2, r2, r1, lsr #8	/* r2 = 1234 */
1959129254Scognet#else
1960129254Scognet	strb	r2, [r0]
1961129254Scognet	mov	r2, r2, lsr #8		/* r2 = .321 */
1962129254Scognet	orr	r2, r2, r1, lsl #24	/* r2 = 4321 */
1963129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...5 */
1964129254Scognet#endif
1965129254Scognet	str	r2, [r0, #0x01]
1966129254Scognet	strb	r1, [r0, #0x05]
1967137463Scognet	RET
1968129254Scognet	LMEMCPY_6_PAD
1969129254Scognet
1970129254Scognet/*
1971129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned
1972129254Scognet */
1973129254Scognet	ldrb	r2, [r1]
1974129254Scognet	ldrh	r3, [r1, #0x01]
1975129254Scognet	ldrh	ip, [r1, #0x03]
1976129254Scognet	ldrb	r1, [r1, #0x05]
1977129254Scognet	strb	r2, [r0]
1978129254Scognet	strh	r3, [r0, #0x01]
1979129254Scognet	strh	ip, [r0, #0x03]
1980129254Scognet	strb	r1, [r0, #0x05]
1981137463Scognet	RET
1982129254Scognet	LMEMCPY_6_PAD
1983129254Scognet
1984129254Scognet/*
1985129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned
1986129254Scognet */
1987129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1988129254Scognet	ldr	r1, [r1, #0x02]		/* BE:r1 = 2345  LE:r1 = 5432 */
1989129254Scognet#ifdef __ARMEB__
1990129254Scognet	mov	r3, r2, lsr #8		/* r3 = ...0 */
1991129254Scognet	strb	r3, [r0]
1992129254Scognet	mov	r2, r2, lsl #24		/* r2 = 1... */
1993129254Scognet	orr	r2, r2, r1, lsr #8	/* r2 = 1234 */
1994129254Scognet#else
1995129254Scognet	strb	r2, [r0]
1996129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
1997129254Scognet	orr	r2, r2, r1, lsl #8	/* r2 = 4321 */
1998129254Scognet	mov	r1, r1, lsr #24		/* r1 = ...5 */
1999129254Scognet#endif
2000129254Scognet	str	r2, [r0, #0x01]
2001129254Scognet	strb	r1, [r0, #0x05]
2002137463Scognet	RET
2003129254Scognet	LMEMCPY_6_PAD
2004129254Scognet
2005129254Scognet/*
2006129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned
2007129254Scognet */
2008129254Scognet	ldrb	r2, [r1]
2009129254Scognet	ldr	r3, [r1, #0x01]
2010129254Scognet	ldrb	r1, [r1, #0x05]
2011129254Scognet	strb	r2, [r0]
2012129254Scognet	str	r3, [r0, #0x01]
2013129254Scognet	strb	r1, [r0, #0x05]
2014137463Scognet	RET
2015129254Scognet	LMEMCPY_6_PAD
2016129254Scognet
2017129254Scognet
2018129254Scognet/******************************************************************************
2019129254Scognet * Special case for 8 byte copies
2020129254Scognet */
2021129254Scognet#define	LMEMCPY_8_LOG2	6	/* 64 bytes */
2022129254Scognet#define	LMEMCPY_8_PAD	.align LMEMCPY_8_LOG2
2023129254Scognet	LMEMCPY_8_PAD
2024129254Scognet.Lmemcpy_8:
2025129254Scognet	and	r2, r1, #0x03
2026129254Scognet	orr	r2, r2, r0, lsl #2
2027129254Scognet	ands	r2, r2, #0x0f
2028129254Scognet	sub	r3, pc, #0x14
2029129254Scognet	addne	pc, r3, r2, lsl #LMEMCPY_8_LOG2
2030129254Scognet
2031129254Scognet/*
2032129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned
2033129254Scognet */
2034129254Scognet	ldr	r2, [r1]
2035129254Scognet	ldr	r3, [r1, #0x04]
2036129254Scognet	str	r2, [r0]
2037129254Scognet	str	r3, [r0, #0x04]
2038137463Scognet	RET
2039129254Scognet	LMEMCPY_8_PAD
2040129254Scognet
2041129254Scognet/*
2042129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned
2043129254Scognet */
2044129254Scognet	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
2045129254Scognet	ldr	r2, [r1, #0x03]		/* BE:r2 = 3456  LE:r2 = 6543 */
2046129254Scognet	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
2047129254Scognet#ifdef __ARMEB__
2048129254Scognet	mov	r3, r3, lsl #8		/* r3 = 012. */
2049129254Scognet	orr	r3, r3, r2, lsr #24	/* r3 = 0123 */
2050129254Scognet	orr	r2, r1, r2, lsl #8	/* r2 = 4567 */
2051129254Scognet#else
2052129254Scognet	mov	r3, r3, lsr #8		/* r3 = .210 */
2053129254Scognet	orr	r3, r3, r2, lsl #24	/* r3 = 3210 */
2054129254Scognet	mov	r1, r1, lsl #24		/* r1 = 7... */
2055129254Scognet	orr	r2, r1, r2, lsr #8	/* r2 = 7654 */
2056129254Scognet#endif
2057129254Scognet	str	r3, [r0]
2058129254Scognet	str	r2, [r0, #0x04]
2059137463Scognet	RET
2060129254Scognet	LMEMCPY_8_PAD
2061129254Scognet
2062129254Scognet/*
2063129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned
2064129254Scognet */
2065129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2066129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2067129254Scognet	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
2068129254Scognet#ifdef __ARMEB__
2069129254Scognet	mov	r2, r2, lsl #16		/* r2 = 01.. */
2070129254Scognet	orr	r2, r2, r3, lsr #16	/* r2 = 0123 */
2071129254Scognet	orr	r3, r1, r3, lsl #16	/* r3 = 4567 */
2072129254Scognet#else
2073129254Scognet	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
2074129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..54 */
2075129254Scognet	orr	r3, r3, r1, lsl #16	/* r3 = 7654 */
2076129254Scognet#endif
2077129254Scognet	str	r2, [r0]
2078129254Scognet	str	r3, [r0, #0x04]
2079137463Scognet	RET
2080129254Scognet	LMEMCPY_8_PAD
2081129254Scognet
2082129254Scognet/*
2083129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned
2084129254Scognet */
2085129254Scognet	ldrb	r3, [r1]		/* r3 = ...0 */
2086129254Scognet	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
2087129254Scognet	ldr	r1, [r1, #0x05]		/* BE:r1 = 567x  LE:r1 = x765 */
2088129254Scognet#ifdef __ARMEB__
2089129254Scognet	mov	r3, r3, lsl #24		/* r3 = 0... */
2090129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 0123 */
2091129254Scognet	mov	r2, r2, lsl #24		/* r2 = 4... */
2092129254Scognet	orr	r2, r2, r1, lsr #8	/* r2 = 4567 */
2093129254Scognet#else
2094129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
2095129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...4 */
2096129254Scognet	orr	r2, r2, r1, lsl #8	/* r2 = 7654 */
2097129254Scognet#endif
2098129254Scognet	str	r3, [r0]
2099129254Scognet	str	r2, [r0, #0x04]
2100137463Scognet	RET
2101129254Scognet	LMEMCPY_8_PAD
2102129254Scognet
2103129254Scognet/*
2104129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned
2105129254Scognet */
2106129254Scognet	ldr	r3, [r1]		/* BE:r3 = 0123  LE:r3 = 3210 */
2107129254Scognet	ldr	r2, [r1, #0x04]		/* BE:r2 = 4567  LE:r2 = 7654 */
2108129254Scognet#ifdef __ARMEB__
2109129254Scognet	mov	r1, r3, lsr #24		/* r1 = ...0 */
2110129254Scognet	strb	r1, [r0]
2111129254Scognet	mov	r1, r3, lsr #8		/* r1 = .012 */
2112129254Scognet	strb	r2, [r0, #0x07]
2113129254Scognet	mov	r3, r3, lsl #24		/* r3 = 3... */
2114129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 3456 */
2115129254Scognet#else
2116129254Scognet	strb	r3, [r0]
2117129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...7 */
2118129254Scognet	strb	r1, [r0, #0x07]
2119129254Scognet	mov	r1, r3, lsr #8		/* r1 = .321 */
2120129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...3 */
2121129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 6543 */
2122129254Scognet#endif
2123129254Scognet	strh	r1, [r0, #0x01]
2124129254Scognet	str	r3, [r0, #0x03]
2125137463Scognet	RET
2126129254Scognet	LMEMCPY_8_PAD
2127129254Scognet
2128129254Scognet/*
2129129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned
2130129254Scognet */
2131129254Scognet	ldrb	r2, [r1]
2132129254Scognet	ldrh	r3, [r1, #0x01]
2133129254Scognet	ldr	ip, [r1, #0x03]
2134129254Scognet	ldrb	r1, [r1, #0x07]
2135129254Scognet	strb	r2, [r0]
2136129254Scognet	strh	r3, [r0, #0x01]
2137129254Scognet	str	ip, [r0, #0x03]
2138129254Scognet	strb	r1, [r0, #0x07]
2139137463Scognet	RET
2140129254Scognet	LMEMCPY_8_PAD
2141129254Scognet
2142129254Scognet/*
2143129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned
2144129254Scognet */
2145129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2146129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2147129254Scognet	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
2148129254Scognet#ifdef __ARMEB__
2149129254Scognet	mov	ip, r2, lsr #8		/* ip = ...0 */
2150129254Scognet	strb	ip, [r0]
2151129254Scognet	mov	ip, r2, lsl #8		/* ip = .01. */
2152129254Scognet	orr	ip, ip, r3, lsr #24	/* ip = .012 */
2153129254Scognet	strb	r1, [r0, #0x07]
2154129254Scognet	mov	r3, r3, lsl #8		/* r3 = 345. */
2155129254Scognet	orr	r3, r3, r1, lsr #8	/* r3 = 3456 */
2156129254Scognet#else
2157129254Scognet	strb	r2, [r0]		/* 0 */
2158129254Scognet	mov	ip, r1, lsr #8		/* ip = ...7 */
2159129254Scognet	strb	ip, [r0, #0x07]		/* 7 */
2160129254Scognet	mov	ip, r2, lsr #8		/* ip = ...1 */
2161129254Scognet	orr	ip, ip, r3, lsl #8	/* ip = 4321 */
2162129254Scognet	mov	r3, r3, lsr #8		/* r3 = .543 */
2163129254Scognet	orr	r3, r3, r1, lsl #24	/* r3 = 6543 */
2164129254Scognet#endif
2165129254Scognet	strh	ip, [r0, #0x01]
2166129254Scognet	str	r3, [r0, #0x03]
2167137463Scognet	RET
2168129254Scognet	LMEMCPY_8_PAD
2169129254Scognet
2170129254Scognet/*
2171129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned
2172129254Scognet */
2173129254Scognet	ldrb	r3, [r1]		/* r3 = ...0 */
2174129254Scognet	ldr	ip, [r1, #0x01]		/* BE:ip = 1234  LE:ip = 4321 */
2175129254Scognet	ldrh	r2, [r1, #0x05]		/* BE:r2 = ..56  LE:r2 = ..65 */
2176129254Scognet	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
2177129254Scognet	strb	r3, [r0]
2178129254Scognet	mov	r3, ip, lsr #16		/* BE:r3 = ..12  LE:r3 = ..43 */
2179129254Scognet#ifdef __ARMEB__
2180129254Scognet	strh	r3, [r0, #0x01]
2181129254Scognet	orr	r2, r2, ip, lsl #16	/* r2 = 3456 */
2182129254Scognet#else
2183129254Scognet	strh	ip, [r0, #0x01]
2184129254Scognet	orr	r2, r3, r2, lsl #16	/* r2 = 6543 */
2185129254Scognet#endif
2186129254Scognet	str	r2, [r0, #0x03]
2187129254Scognet	strb	r1, [r0, #0x07]
2188137463Scognet	RET
2189129254Scognet	LMEMCPY_8_PAD
2190129254Scognet
2191129254Scognet/*
2192129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned
2193129254Scognet */
2194129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2195129254Scognet	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
2196129254Scognet	mov	r1, r2, lsr #16		/* BE:r1 = ..01  LE:r1 = ..32 */
2197129254Scognet#ifdef __ARMEB__
2198129254Scognet	strh	r1, [r0]
2199129254Scognet	mov	r1, r3, lsr #16		/* r1 = ..45 */
2200129254Scognet	orr	r2, r1 ,r2, lsl #16	/* r2 = 2345 */
2201129254Scognet#else
2202129254Scognet	strh	r2, [r0]
2203129254Scognet	orr	r2, r1, r3, lsl #16	/* r2 = 5432 */
2204129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..76 */
2205129254Scognet#endif
2206129254Scognet	str	r2, [r0, #0x02]
2207129254Scognet	strh	r3, [r0, #0x06]
2208137463Scognet	RET
2209129254Scognet	LMEMCPY_8_PAD
2210129254Scognet
2211129254Scognet/*
2212129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned
2213129254Scognet */
2214129254Scognet	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
2215129254Scognet	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
2216129254Scognet	ldrb	ip, [r1, #0x07]		/* ip = ...7 */
2217129254Scognet	mov	r1, r2, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
2218129254Scognet	strh	r1, [r0]
2219129254Scognet#ifdef __ARMEB__
2220129254Scognet	mov	r1, r2, lsl #24		/* r1 = 2... */
2221129254Scognet	orr	r1, r1, r3, lsr #8	/* r1 = 2345 */
2222129254Scognet	orr	r3, ip, r3, lsl #8	/* r3 = 4567 */
2223129254Scognet#else
2224129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...2 */
2225129254Scognet	orr	r1, r1, r3, lsl #8	/* r1 = 5432 */
2226129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...6 */
2227129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = ..76 */
2228129254Scognet#endif
2229129254Scognet	str	r1, [r0, #0x02]
2230129254Scognet	strh	r3, [r0, #0x06]
2231137463Scognet	RET
2232129254Scognet	LMEMCPY_8_PAD
2233129254Scognet
2234129254Scognet/*
2235129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned
2236129254Scognet */
2237129254Scognet	ldrh	r2, [r1]
2238129254Scognet	ldr	ip, [r1, #0x02]
2239129254Scognet	ldrh	r3, [r1, #0x06]
2240129254Scognet	strh	r2, [r0]
2241129254Scognet	str	ip, [r0, #0x02]
2242129254Scognet	strh	r3, [r0, #0x06]
2243137463Scognet	RET
2244129254Scognet	LMEMCPY_8_PAD
2245129254Scognet
2246129254Scognet/*
2247129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned
2248129254Scognet */
2249129254Scognet	ldr	r3, [r1, #0x05]		/* BE:r3 = 567x  LE:r3 = x765 */
2250129254Scognet	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
2251129254Scognet	ldrb	ip, [r1]		/* ip = ...0 */
2252129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .567  LE:r1 = .x76 */
2253129254Scognet	strh	r1, [r0, #0x06]
2254129254Scognet#ifdef __ARMEB__
2255129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...5 */
2256129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 2345 */
2257129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...1 */
2258129254Scognet	orr	r2, r2, ip, lsl #8	/* r2 = ..01 */
2259129254Scognet#else
2260129254Scognet	mov	r3, r3, lsl #24		/* r3 = 5... */
2261129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 5432 */
2262129254Scognet	orr	r2, ip, r2, lsl #8	/* r2 = 3210 */
2263129254Scognet#endif
2264129254Scognet	str	r3, [r0, #0x02]
2265129254Scognet	strh	r2, [r0]
2266137463Scognet	RET
2267129254Scognet	LMEMCPY_8_PAD
2268129254Scognet
2269129254Scognet/*
2270129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned
2271129254Scognet */
2272129254Scognet	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
2273129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2274129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .456  LE:r1 = .765 */
2275129254Scognet	strh	r1, [r0, #0x05]
2276129254Scognet#ifdef __ARMEB__
2277129254Scognet	strb	r3, [r0, #0x07]
2278129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...0 */
2279129254Scognet	strb	r1, [r0]
2280129254Scognet	mov	r2, r2, lsl #8		/* r2 = 123. */
2281129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = 1234 */
2282129254Scognet	str	r2, [r0, #0x01]
2283129254Scognet#else
2284129254Scognet	strb	r2, [r0]
2285129254Scognet	mov	r1, r3, lsr #24		/* r1 = ...7 */
2286129254Scognet	strb	r1, [r0, #0x07]
2287129254Scognet	mov	r2, r2, lsr #8		/* r2 = .321 */
2288129254Scognet	orr	r2, r2, r3, lsl #24	/* r2 = 4321 */
2289129254Scognet	str	r2, [r0, #0x01]
2290129254Scognet#endif
2291137463Scognet	RET
2292129254Scognet	LMEMCPY_8_PAD
2293129254Scognet
2294129254Scognet/*
2295129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned
2296129254Scognet */
2297129254Scognet	ldrb	r3, [r1]		/* r3 = ...0 */
2298129254Scognet	ldrh	r2, [r1, #0x01]		/* BE:r2 = ..12  LE:r2 = ..21 */
2299129254Scognet	ldr	ip, [r1, #0x03]		/* BE:ip = 3456  LE:ip = 6543 */
2300129254Scognet	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
2301129254Scognet	strb	r3, [r0]
2302129254Scognet	mov	r3, ip, lsr #16		/* BE:r3 = ..34  LE:r3 = ..65 */
2303129254Scognet#ifdef __ARMEB__
2304129254Scognet	strh	ip, [r0, #0x05]
2305129254Scognet	orr	r2, r3, r2, lsl #16	/* r2 = 1234 */
2306129254Scognet#else
2307129254Scognet	strh	r3, [r0, #0x05]
2308129254Scognet	orr	r2, r2, ip, lsl #16	/* r2 = 4321 */
2309129254Scognet#endif
2310129254Scognet	str	r2, [r0, #0x01]
2311129254Scognet	strb	r1, [r0, #0x07]
2312137463Scognet	RET
2313129254Scognet	LMEMCPY_8_PAD
2314129254Scognet
2315129254Scognet/*
2316129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned
2317129254Scognet */
2318129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2319129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2320129254Scognet	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
2321129254Scognet#ifdef __ARMEB__
2322129254Scognet	mov	ip, r2, lsr #8		/* ip = ...0 */
2323129254Scognet	strb	ip, [r0]
2324129254Scognet	mov	ip, r2, lsl #24		/* ip = 1... */
2325129254Scognet	orr	ip, ip, r3, lsr #8	/* ip = 1234 */
2326129254Scognet	strb	r1, [r0, #0x07]
2327129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...6 */
2328129254Scognet	orr	r1, r1, r3, lsl #8	/* r1 = 3456 */
2329129254Scognet#else
2330129254Scognet	strb	r2, [r0]
2331129254Scognet	mov	ip, r2, lsr #8		/* ip = ...1 */
2332129254Scognet	orr	ip, ip, r3, lsl #8	/* ip = 4321 */
2333129254Scognet	mov	r2, r1, lsr #8		/* r2 = ...7 */
2334129254Scognet	strb	r2, [r0, #0x07]
2335129254Scognet	mov	r1, r1, lsl #8		/* r1 = .76. */
2336129254Scognet	orr	r1, r1, r3, lsr #24	/* r1 = .765 */
2337129254Scognet#endif
2338129254Scognet	str	ip, [r0, #0x01]
2339129254Scognet	strh	r1, [r0, #0x05]
2340137463Scognet	RET
2341129254Scognet	LMEMCPY_8_PAD
2342129254Scognet
2343129254Scognet/*
2344129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned
2345129254Scognet */
2346129254Scognet	ldrb	r2, [r1]
2347129254Scognet	ldr	ip, [r1, #0x01]
2348129254Scognet	ldrh	r3, [r1, #0x05]
2349129254Scognet	ldrb	r1, [r1, #0x07]
2350129254Scognet	strb	r2, [r0]
2351129254Scognet	str	ip, [r0, #0x01]
2352129254Scognet	strh	r3, [r0, #0x05]
2353129254Scognet	strb	r1, [r0, #0x07]
2354137463Scognet	RET
2355129254Scognet	LMEMCPY_8_PAD
2356129254Scognet
2357129254Scognet/******************************************************************************
2358129254Scognet * Special case for 12 byte copies
2359129254Scognet */
2360129254Scognet#define	LMEMCPY_C_LOG2	7	/* 128 bytes */
2361129254Scognet#define	LMEMCPY_C_PAD	.align LMEMCPY_C_LOG2
2362129254Scognet	LMEMCPY_C_PAD
2363129254Scognet.Lmemcpy_c:
2364129254Scognet	and	r2, r1, #0x03
2365129254Scognet	orr	r2, r2, r0, lsl #2
2366129254Scognet	ands	r2, r2, #0x0f
2367129254Scognet	sub	r3, pc, #0x14
2368129254Scognet	addne	pc, r3, r2, lsl #LMEMCPY_C_LOG2
2369129254Scognet
2370129254Scognet/*
2371129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned
2372129254Scognet */
2373129254Scognet	ldr	r2, [r1]
2374129254Scognet	ldr	r3, [r1, #0x04]
2375129254Scognet	ldr	r1, [r1, #0x08]
2376129254Scognet	str	r2, [r0]
2377129254Scognet	str	r3, [r0, #0x04]
2378129254Scognet	str	r1, [r0, #0x08]
2379137463Scognet	RET
2380129254Scognet	LMEMCPY_C_PAD
2381129254Scognet
2382129254Scognet/*
2383129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned
2384129254Scognet */
2385129254Scognet	ldrb	r2, [r1, #0xb]		/* r2 = ...B */
2386129254Scognet	ldr	ip, [r1, #0x07]		/* BE:ip = 789A  LE:ip = A987 */
2387129254Scognet	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
2388129254Scognet	ldr	r1, [r1, #-1]		/* BE:r1 = x012  LE:r1 = 210x */
2389129254Scognet#ifdef __ARMEB__
2390129254Scognet	orr	r2, r2, ip, lsl #8	/* r2 = 89AB */
2391129254Scognet	str	r2, [r0, #0x08]
2392129254Scognet	mov	r2, ip, lsr #24		/* r2 = ...7 */
2393129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 4567 */
2394129254Scognet	mov	r1, r1, lsl #8		/* r1 = 012. */
2395129254Scognet	orr	r1, r1, r3, lsr #24	/* r1 = 0123 */
2396129254Scognet#else
2397129254Scognet	mov	r2, r2, lsl #24		/* r2 = B... */
2398129254Scognet	orr	r2, r2, ip, lsr #8	/* r2 = BA98 */
2399129254Scognet	str	r2, [r0, #0x08]
2400129254Scognet	mov	r2, ip, lsl #24		/* r2 = 7... */
2401129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 7654 */
2402129254Scognet	mov	r1, r1, lsr #8		/* r1 = .210 */
2403129254Scognet	orr	r1, r1, r3, lsl #24	/* r1 = 3210 */
2404129254Scognet#endif
2405129254Scognet	str	r2, [r0, #0x04]
2406129254Scognet	str	r1, [r0]
2407137463Scognet	RET
2408129254Scognet	LMEMCPY_C_PAD
2409129254Scognet
2410129254Scognet/*
2411129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned
2412129254Scognet */
2413129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2414129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2415129254Scognet	ldr	ip, [r1, #0x06]		/* BE:ip = 6789  LE:ip = 9876 */
2416129254Scognet	ldrh	r1, [r1, #0x0a]		/* BE:r1 = ..AB  LE:r1 = ..BA */
2417129254Scognet#ifdef __ARMEB__
2418129254Scognet	mov	r2, r2, lsl #16		/* r2 = 01.. */
2419129254Scognet	orr	r2, r2, r3, lsr #16	/* r2 = 0123 */
2420129254Scognet	str	r2, [r0]
2421129254Scognet	mov	r3, r3, lsl #16		/* r3 = 45.. */
2422129254Scognet	orr	r3, r3, ip, lsr #16	/* r3 = 4567 */
2423129254Scognet	orr	r1, r1, ip, lsl #16	/* r1 = 89AB */
2424129254Scognet#else
2425129254Scognet	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
2426129254Scognet	str	r2, [r0]
2427129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..54 */
2428129254Scognet	orr	r3, r3, ip, lsl #16	/* r3 = 7654 */
2429129254Scognet	mov	r1, r1, lsl #16		/* r1 = BA.. */
2430129254Scognet	orr	r1, r1, ip, lsr #16	/* r1 = BA98 */
2431129254Scognet#endif
2432129254Scognet	str	r3, [r0, #0x04]
2433129254Scognet	str	r1, [r0, #0x08]
2434137463Scognet	RET
2435129254Scognet	LMEMCPY_C_PAD
2436129254Scognet
2437129254Scognet/*
2438129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned
2439129254Scognet */
2440129254Scognet	ldrb	r2, [r1]		/* r2 = ...0 */
2441129254Scognet	ldr	r3, [r1, #0x01]		/* BE:r3 = 1234  LE:r3 = 4321 */
2442129254Scognet	ldr	ip, [r1, #0x05]		/* BE:ip = 5678  LE:ip = 8765 */
2443129254Scognet	ldr	r1, [r1, #0x09]		/* BE:r1 = 9ABx  LE:r1 = xBA9 */
2444129254Scognet#ifdef __ARMEB__
2445129254Scognet	mov	r2, r2, lsl #24		/* r2 = 0... */
2446129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 0123 */
2447129254Scognet	str	r2, [r0]
2448129254Scognet	mov	r3, r3, lsl #24		/* r3 = 4... */
2449129254Scognet	orr	r3, r3, ip, lsr #8	/* r3 = 4567 */
2450129254Scognet	mov	r1, r1, lsr #8		/* r1 = .9AB */
2451129254Scognet	orr	r1, r1, ip, lsl #24	/* r1 = 89AB */
2452129254Scognet#else
2453129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 3210 */
2454129254Scognet	str	r2, [r0]
2455129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...4 */
2456129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = 7654 */
2457129254Scognet	mov	r1, r1, lsl #8		/* r1 = BA9. */
2458129254Scognet	orr	r1, r1, ip, lsr #24	/* r1 = BA98 */
2459129254Scognet#endif
2460129254Scognet	str	r3, [r0, #0x04]
2461129254Scognet	str	r1, [r0, #0x08]
2462137463Scognet	RET
2463129254Scognet	LMEMCPY_C_PAD
2464129254Scognet
2465129254Scognet/*
2466129254Scognet * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
2467129254Scognet */
2468129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2469129254Scognet	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
2470129254Scognet	ldr	ip, [r1, #0x08]		/* BE:ip = 89AB  LE:ip = BA98 */
2471129254Scognet	mov	r1, r2, lsr #8		/* BE:r1 = .012  LE:r1 = .321 */
2472129254Scognet	strh	r1, [r0, #0x01]
2473129254Scognet#ifdef __ARMEB__
2474129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...0 */
2475129254Scognet	strb	r1, [r0]
2476129254Scognet	mov	r1, r2, lsl #24		/* r1 = 3... */
2477129254Scognet	orr	r2, r1, r3, lsr #8	/* r1 = 3456 */
2478129254Scognet	mov	r1, r3, lsl #24		/* r1 = 7... */
2479129254Scognet	orr	r1, r1, ip, lsr #8	/* r1 = 789A */
2480129254Scognet#else
2481129254Scognet	strb	r2, [r0]
2482129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...3 */
2483129254Scognet	orr	r2, r1, r3, lsl #8	/* r1 = 6543 */
2484129254Scognet	mov	r1, r3, lsr #24		/* r1 = ...7 */
2485129254Scognet	orr	r1, r1, ip, lsl #8	/* r1 = A987 */
2486129254Scognet	mov	ip, ip, lsr #24		/* ip = ...B */
2487129254Scognet#endif
2488129254Scognet	str	r2, [r0, #0x03]
2489129254Scognet	str	r1, [r0, #0x07]
2490129254Scognet	strb	ip, [r0, #0x0b]
2491137463Scognet	RET
2492129254Scognet	LMEMCPY_C_PAD
2493129254Scognet
2494129254Scognet/*
2495129254Scognet * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
2496129254Scognet */
2497129254Scognet	ldrb	r2, [r1]
2498129254Scognet	ldrh	r3, [r1, #0x01]
2499129254Scognet	ldr	ip, [r1, #0x03]
2500129254Scognet	strb	r2, [r0]
2501129254Scognet	ldr	r2, [r1, #0x07]
2502129254Scognet	ldrb	r1, [r1, #0x0b]
2503129254Scognet	strh	r3, [r0, #0x01]
2504129254Scognet	str	ip, [r0, #0x03]
2505129254Scognet	str	r2, [r0, #0x07]
2506129254Scognet	strb	r1, [r0, #0x0b]
2507137463Scognet	RET
2508129254Scognet	LMEMCPY_C_PAD
2509129254Scognet
2510129254Scognet/*
2511129254Scognet * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
2512129254Scognet */
2513129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2514129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2515129254Scognet	ldr	ip, [r1, #0x06]		/* BE:ip = 6789  LE:ip = 9876 */
2516129254Scognet	ldrh	r1, [r1, #0x0a]		/* BE:r1 = ..AB  LE:r1 = ..BA */
2517129254Scognet#ifdef __ARMEB__
2518129254Scognet	mov	r2, r2, ror #8		/* r2 = 1..0 */
2519129254Scognet	strb	r2, [r0]
2520129254Scognet	mov	r2, r2, lsr #16		/* r2 = ..1. */
2521129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = ..12 */
2522129254Scognet	strh	r2, [r0, #0x01]
2523129254Scognet	mov	r2, r3, lsl #8		/* r2 = 345. */
2524129254Scognet	orr	r3, r2, ip, lsr #24	/* r3 = 3456 */
2525129254Scognet	mov	r2, ip, lsl #8		/* r2 = 789. */
2526129254Scognet	orr	r2, r2, r1, lsr #8	/* r2 = 789A */
2527129254Scognet#else
2528129254Scognet	strb	r2, [r0]
2529129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
2530129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 4321 */
2531129254Scognet	strh	r2, [r0, #0x01]
2532129254Scognet	mov	r2, r3, lsr #8		/* r2 = .543 */
2533129254Scognet	orr	r3, r2, ip, lsl #24	/* r3 = 6543 */
2534129254Scognet	mov	r2, ip, lsr #8		/* r2 = .987 */
2535129254Scognet	orr	r2, r2, r1, lsl #24	/* r2 = A987 */
2536129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...B */
2537129254Scognet#endif
2538129254Scognet	str	r3, [r0, #0x03]
2539129254Scognet	str	r2, [r0, #0x07]
2540129254Scognet	strb	r1, [r0, #0x0b]
2541137463Scognet	RET
2542129254Scognet	LMEMCPY_C_PAD
2543129254Scognet
2544129254Scognet/*
2545129254Scognet * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
2546129254Scognet */
2547129254Scognet	ldrb	r2, [r1]
2548129254Scognet	ldr	r3, [r1, #0x01]		/* BE:r3 = 1234  LE:r3 = 4321 */
2549129254Scognet	ldr	ip, [r1, #0x05]		/* BE:ip = 5678  LE:ip = 8765 */
2550129254Scognet	ldr	r1, [r1, #0x09]		/* BE:r1 = 9ABx  LE:r1 = xBA9 */
2551129254Scognet	strb	r2, [r0]
2552129254Scognet#ifdef __ARMEB__
2553129254Scognet	mov	r2, r3, lsr #16		/* r2 = ..12 */
2554129254Scognet	strh	r2, [r0, #0x01]
2555129254Scognet	mov	r3, r3, lsl #16		/* r3 = 34.. */
2556129254Scognet	orr	r3, r3, ip, lsr #16	/* r3 = 3456 */
2557129254Scognet	mov	ip, ip, lsl #16		/* ip = 78.. */
2558129254Scognet	orr	ip, ip, r1, lsr #16	/* ip = 789A */
2559129254Scognet	mov	r1, r1, lsr #8		/* r1 = .9AB */
2560129254Scognet#else
2561129254Scognet	strh	r3, [r0, #0x01]
2562129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..43 */
2563129254Scognet	orr	r3, r3, ip, lsl #16	/* r3 = 6543 */
2564129254Scognet	mov	ip, ip, lsr #16		/* ip = ..87 */
2565129254Scognet	orr	ip, ip, r1, lsl #16	/* ip = A987 */
2566129254Scognet	mov	r1, r1, lsr #16		/* r1 = ..xB */
2567129254Scognet#endif
2568129254Scognet	str	r3, [r0, #0x03]
2569129254Scognet	str	ip, [r0, #0x07]
2570129254Scognet	strb	r1, [r0, #0x0b]
2571137463Scognet	RET
2572129254Scognet	LMEMCPY_C_PAD
2573129254Scognet
2574129254Scognet/*
2575129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned
2576129254Scognet */
2577129254Scognet	ldr	ip, [r1]		/* BE:ip = 0123  LE:ip = 3210 */
2578129254Scognet	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
2579129254Scognet	ldr	r2, [r1, #0x08]		/* BE:r2 = 89AB  LE:r2 = BA98 */
2580129254Scognet	mov	r1, ip, lsr #16		/* BE:r1 = ..01  LE:r1 = ..32 */
2581129254Scognet#ifdef __ARMEB__
2582129254Scognet	strh	r1, [r0]
2583129254Scognet	mov	r1, ip, lsl #16		/* r1 = 23.. */
2584129254Scognet	orr	r1, r1, r3, lsr #16	/* r1 = 2345 */
2585129254Scognet	mov	r3, r3, lsl #16		/* r3 = 67.. */
2586129254Scognet	orr	r3, r3, r2, lsr #16	/* r3 = 6789 */
2587129254Scognet#else
2588129254Scognet	strh	ip, [r0]
2589129254Scognet	orr	r1, r1, r3, lsl #16	/* r1 = 5432 */
2590129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..76 */
2591129254Scognet	orr	r3, r3, r2, lsl #16	/* r3 = 9876 */
2592129254Scognet	mov	r2, r2, lsr #16		/* r2 = ..BA */
2593129254Scognet#endif
2594129254Scognet	str	r1, [r0, #0x02]
2595129254Scognet	str	r3, [r0, #0x06]
2596129254Scognet	strh	r2, [r0, #0x0a]
2597137463Scognet	RET
2598129254Scognet	LMEMCPY_C_PAD
2599129254Scognet
2600129254Scognet/*
2601129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
2602129254Scognet */
2603129254Scognet	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
2604129254Scognet	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
2605129254Scognet	mov	ip, r2, lsr #8		/* BE:ip = .x01  LE:ip = .210 */
2606129254Scognet	strh	ip, [r0]
2607129254Scognet	ldr	ip, [r1, #0x07]		/* BE:ip = 789A  LE:ip = A987 */
2608129254Scognet	ldrb	r1, [r1, #0x0b]		/* r1 = ...B */
2609129254Scognet#ifdef __ARMEB__
2610129254Scognet	mov	r2, r2, lsl #24		/* r2 = 2... */
2611129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 2345 */
2612129254Scognet	mov	r3, r3, lsl #24		/* r3 = 6... */
2613129254Scognet	orr	r3, r3, ip, lsr #8	/* r3 = 6789 */
2614129254Scognet	orr	r1, r1, ip, lsl #8	/* r1 = 89AB */
2615129254Scognet#else
2616129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...2 */
2617129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 5432 */
2618129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...6 */
2619129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = 9876 */
2620129254Scognet	mov	r1, r1, lsl #8		/* r1 = ..B. */
2621129254Scognet	orr	r1, r1, ip, lsr #24	/* r1 = ..BA */
2622129254Scognet#endif
2623129254Scognet	str	r2, [r0, #0x02]
2624129254Scognet	str	r3, [r0, #0x06]
2625129254Scognet	strh	r1, [r0, #0x0a]
2626137463Scognet	RET
2627129254Scognet	LMEMCPY_C_PAD
2628129254Scognet
2629129254Scognet/*
2630129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned
2631129254Scognet */
2632129254Scognet	ldrh	r2, [r1]
2633129254Scognet	ldr	r3, [r1, #0x02]
2634129254Scognet	ldr	ip, [r1, #0x06]
2635129254Scognet	ldrh	r1, [r1, #0x0a]
2636129254Scognet	strh	r2, [r0]
2637129254Scognet	str	r3, [r0, #0x02]
2638129254Scognet	str	ip, [r0, #0x06]
2639129254Scognet	strh	r1, [r0, #0x0a]
2640137463Scognet	RET
2641129254Scognet	LMEMCPY_C_PAD
2642129254Scognet
2643129254Scognet/*
2644129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
2645129254Scognet */
2646129254Scognet	ldr	r2, [r1, #0x09]		/* BE:r2 = 9ABx  LE:r2 = xBA9 */
2647129254Scognet	ldr	r3, [r1, #0x05]		/* BE:r3 = 5678  LE:r3 = 8765 */
2648129254Scognet	mov	ip, r2, lsr #8		/* BE:ip = .9AB  LE:ip = .xBA */
2649129254Scognet	strh	ip, [r0, #0x0a]
2650129254Scognet	ldr	ip, [r1, #0x01]		/* BE:ip = 1234  LE:ip = 4321 */
2651129254Scognet	ldrb	r1, [r1]		/* r1 = ...0 */
2652129254Scognet#ifdef __ARMEB__
2653129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...9 */
2654129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 6789 */
2655129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...5 */
2656129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = 2345 */
2657129254Scognet	mov	r1, r1, lsl #8		/* r1 = ..0. */
2658129254Scognet	orr	r1, r1, ip, lsr #24	/* r1 = ..01 */
2659129254Scognet#else
2660129254Scognet	mov	r2, r2, lsl #24		/* r2 = 9... */
2661129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 9876 */
2662129254Scognet	mov	r3, r3, lsl #24		/* r3 = 5... */
2663129254Scognet	orr	r3, r3, ip, lsr #8	/* r3 = 5432 */
2664129254Scognet	orr	r1, r1, ip, lsl #8	/* r1 = 3210 */
2665129254Scognet#endif
2666129254Scognet	str	r2, [r0, #0x06]
2667129254Scognet	str	r3, [r0, #0x02]
2668129254Scognet	strh	r1, [r0]
2669137463Scognet	RET
2670129254Scognet	LMEMCPY_C_PAD
2671129254Scognet
2672129254Scognet/*
2673129254Scognet * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2674129254Scognet */
2675129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2676129254Scognet	ldr	ip, [r1, #0x04]		/* BE:ip = 4567  LE:ip = 7654 */
2677129254Scognet	ldr	r1, [r1, #0x08]		/* BE:r1 = 89AB  LE:r1 = BA98 */
2678129254Scognet#ifdef __ARMEB__
2679129254Scognet	mov	r3, r2, lsr #24		/* r3 = ...0 */
2680129254Scognet	strb	r3, [r0]
2681129254Scognet	mov	r2, r2, lsl #8		/* r2 = 123. */
2682129254Scognet	orr	r2, r2, ip, lsr #24	/* r2 = 1234 */
2683129254Scognet	str	r2, [r0, #0x01]
2684129254Scognet	mov	r2, ip, lsl #8		/* r2 = 567. */
2685129254Scognet	orr	r2, r2, r1, lsr #24	/* r2 = 5678 */
2686129254Scognet	str	r2, [r0, #0x05]
2687129254Scognet	mov	r2, r1, lsr #8		/* r2 = ..9A */
2688129254Scognet	strh	r2, [r0, #0x09]
2689129254Scognet	strb	r1, [r0, #0x0b]
2690129254Scognet#else
2691129254Scognet	strb	r2, [r0]
2692129254Scognet	mov	r3, r2, lsr #8		/* r3 = .321 */
2693129254Scognet	orr	r3, r3, ip, lsl #24	/* r3 = 4321 */
2694129254Scognet	str	r3, [r0, #0x01]
2695129254Scognet	mov	r3, ip, lsr #8		/* r3 = .765 */
2696129254Scognet	orr	r3, r3, r1, lsl #24	/* r3 = 8765 */
2697129254Scognet	str	r3, [r0, #0x05]
2698129254Scognet	mov	r1, r1, lsr #8		/* r1 = .BA9 */
2699129254Scognet	strh	r1, [r0, #0x09]
2700129254Scognet	mov	r1, r1, lsr #16		/* r1 = ...B */
2701129254Scognet	strb	r1, [r0, #0x0b]
2702129254Scognet#endif
2703137463Scognet	RET
2704129254Scognet	LMEMCPY_C_PAD
2705129254Scognet
2706129254Scognet/*
2707129254Scognet * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2708129254Scognet */
2709129254Scognet	ldrb	r2, [r1, #0x0b]		/* r2 = ...B */
2710129254Scognet	ldr	r3, [r1, #0x07]		/* BE:r3 = 789A  LE:r3 = A987 */
2711129254Scognet	ldr	ip, [r1, #0x03]		/* BE:ip = 3456  LE:ip = 6543 */
2712129254Scognet	ldr	r1, [r1, #-1]		/* BE:r1 = x012  LE:r1 = 210x */
2713129254Scognet	strb	r2, [r0, #0x0b]
2714129254Scognet#ifdef __ARMEB__
2715129254Scognet	strh	r3, [r0, #0x09]
2716129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..78 */
2717129254Scognet	orr	r3, r3, ip, lsl #16	/* r3 = 5678 */
2718129254Scognet	mov	ip, ip, lsr #16		/* ip = ..34 */
2719129254Scognet	orr	ip, ip, r1, lsl #16	/* ip = 1234 */
2720129254Scognet	mov	r1, r1, lsr #16		/* r1 = ..x0 */
2721129254Scognet#else
2722129254Scognet	mov	r2, r3, lsr #16		/* r2 = ..A9 */
2723129254Scognet	strh	r2, [r0, #0x09]
2724129254Scognet	mov	r3, r3, lsl #16		/* r3 = 87.. */
2725129254Scognet	orr	r3, r3, ip, lsr #16	/* r3 = 8765 */
2726129254Scognet	mov	ip, ip, lsl #16		/* ip = 43.. */
2727129254Scognet	orr	ip, ip, r1, lsr #16	/* ip = 4321 */
2728129254Scognet	mov	r1, r1, lsr #8		/* r1 = .210 */
2729129254Scognet#endif
2730129254Scognet	str	r3, [r0, #0x05]
2731129254Scognet	str	ip, [r0, #0x01]
2732129254Scognet	strb	r1, [r0]
2733137463Scognet	RET
2734129254Scognet	LMEMCPY_C_PAD
2735129254Scognet
2736129254Scognet/*
2737129254Scognet * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2738129254Scognet */
2739129254Scognet#ifdef __ARMEB__
2740129254Scognet	ldrh	r2, [r1, #0x0a]		/* r2 = ..AB */
2741129254Scognet	ldr	ip, [r1, #0x06]		/* ip = 6789 */
2742129254Scognet	ldr	r3, [r1, #0x02]		/* r3 = 2345 */
2743129254Scognet	ldrh	r1, [r1]		/* r1 = ..01 */
2744129254Scognet	strb	r2, [r0, #0x0b]
2745129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...A */
2746129254Scognet	orr	r2, r2, ip, lsl #8	/* r2 = 789A */
2747129254Scognet	mov	ip, ip, lsr #8		/* ip = .678 */
2748129254Scognet	orr	ip, ip, r3, lsl #24	/* ip = 5678 */
2749129254Scognet	mov	r3, r3, lsr #8		/* r3 = .234 */
2750129254Scognet	orr	r3, r3, r1, lsl #24	/* r3 = 1234 */
2751129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...0 */
2752129254Scognet	strb	r1, [r0]
2753129254Scognet	str	r3, [r0, #0x01]
2754129254Scognet	str	ip, [r0, #0x05]
2755129254Scognet	strh	r2, [r0, #0x09]
2756129254Scognet#else
2757129254Scognet	ldrh	r2, [r1]		/* r2 = ..10 */
2758129254Scognet	ldr	r3, [r1, #0x02]		/* r3 = 5432 */
2759129254Scognet	ldr	ip, [r1, #0x06]		/* ip = 9876 */
2760129254Scognet	ldrh	r1, [r1, #0x0a]		/* r1 = ..BA */
2761129254Scognet	strb	r2, [r0]
2762129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
2763129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 4321 */
2764129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...5 */
2765129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = 8765 */
2766129254Scognet	mov	ip, ip, lsr #24		/* ip = ...9 */
2767129254Scognet	orr	ip, ip, r1, lsl #8	/* ip = .BA9 */
2768129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...B */
2769129254Scognet	str	r2, [r0, #0x01]
2770129254Scognet	str	r3, [r0, #0x05]
2771129254Scognet	strh	ip, [r0, #0x09]
2772129254Scognet	strb	r1, [r0, #0x0b]
2773129254Scognet#endif
2774137463Scognet	RET
2775129254Scognet	LMEMCPY_C_PAD
2776129254Scognet
2777129254Scognet/*
2778129254Scognet * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2779129254Scognet */
2780129254Scognet	ldrb	r2, [r1]
2781129254Scognet	ldr	r3, [r1, #0x01]
2782129254Scognet	ldr	ip, [r1, #0x05]
2783129254Scognet	strb	r2, [r0]
2784129254Scognet	ldrh	r2, [r1, #0x09]
2785129254Scognet	ldrb	r1, [r1, #0x0b]
2786129254Scognet	str	r3, [r0, #0x01]
2787129254Scognet	str	ip, [r0, #0x05]
2788129254Scognet	strh	r2, [r0, #0x09]
2789129254Scognet	strb	r1, [r0, #0x0b]
2790137463Scognet	RET
2791129254Scognet#endif /* __XSCALE__ */
2792135654Scognet
2793135654Scognet#ifdef GPROF
2794135654Scognet
2795135654ScognetENTRY(user)
2796135654Scognet	nop
2797135654ScognetENTRY(btrap)
2798135654Scognet	nop
2799135654ScognetENTRY(etrap)
2800135654Scognet	nop
2801135654ScognetENTRY(bintr)
2802135654Scognet	nop
2803135654ScognetENTRY(eintr)
2804135654Scognet	nop
2805135654Scognet
2806135654Scognet#endif
2807