1129198Scognet/*-
2129198Scognet * Copyright (c) 2004 Olivier Houchard
3129198Scognet * All rights reserved.
4129198Scognet *
5129198Scognet * Redistribution and use in source and binary forms, with or without
6129198Scognet * modification, are permitted provided that the following conditions
7129198Scognet * are met:
8129198Scognet * 1. Redistributions of source code must retain the above copyright
9129198Scognet *    notice, this list of conditions and the following disclaimer.
10129198Scognet * 2. Redistributions in binary form must reproduce the above copyright
11129198Scognet *    notice, this list of conditions and the following disclaimer in the
12129198Scognet *    documentation and/or other materials provided with the distribution.
13129198Scognet *
14129198Scognet * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15129198Scognet * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16129198Scognet * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17129198Scognet * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18129198Scognet * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19129198Scognet * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20129198Scognet * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21129198Scognet * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22129198Scognet * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23129198Scognet * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24129198Scognet * SUCH DAMAGE.
25129198Scognet */
26175255Scognet/*
27175255Scognet * Copyright 2003 Wasabi Systems, Inc.
28175255Scognet * All rights reserved.
29175255Scognet *
30175255Scognet * Written by Steve C. Woodford for Wasabi Systems, Inc.
31175255Scognet *
32175255Scognet * Redistribution and use in source and binary forms, with or without
33175255Scognet * modification, are permitted provided that the following conditions
34175255Scognet * are met:
35175255Scognet * 1. Redistributions of source code must retain the above copyright
36175255Scognet *    notice, this list of conditions and the following disclaimer.
37175255Scognet * 2. Redistributions in binary form must reproduce the above copyright
38175255Scognet *    notice, this list of conditions and the following disclaimer in the
39175255Scognet *    documentation and/or other materials provided with the distribution.
40175255Scognet * 3. All advertising materials mentioning features or use of this software
41175255Scognet *    must display the following acknowledgement:
42175255Scognet *      This product includes software developed for the NetBSD Project by
43175255Scognet *      Wasabi Systems, Inc.
44175255Scognet * 4. The name of Wasabi Systems, Inc. may not be used to endorse
45175255Scognet *    or promote products derived from this software without specific prior
46175255Scognet *    written permission.
47175255Scognet *
48175255Scognet * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
49175255Scognet * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
50175255Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51175255Scognet * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
52175255Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
53175255Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
54175255Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55175255Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56175255Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
57175255Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58175255Scognet * POSSIBILITY OF SUCH DAMAGE.
59175255Scognet */
60175255Scognet/*
61175255Scognet * Copyright (c) 1997 The NetBSD Foundation, Inc.
62175255Scognet * All rights reserved.
63175255Scognet *
64175255Scognet * This code is derived from software contributed to The NetBSD Foundation
65175255Scognet * by Neil A. Carson and Mark Brinicombe
66175255Scognet *
67175255Scognet * Redistribution and use in source and binary forms, with or without
68175255Scognet * modification, are permitted provided that the following conditions
69175255Scognet * are met:
70175255Scognet * 1. Redistributions of source code must retain the above copyright
71175255Scognet *    notice, this list of conditions and the following disclaimer.
72175255Scognet * 2. Redistributions in binary form must reproduce the above copyright
73175255Scognet *    notice, this list of conditions and the following disclaimer in the
74175255Scognet *    documentation and/or other materials provided with the distribution.
75175255Scognet *
76175255Scognet * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
77175255Scognet * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
78175255Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
79175255Scognet * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
80175255Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
81175255Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
82175255Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
83175255Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
84175255Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
85175255Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
86175255Scognet * POSSIBILITY OF SUCH DAMAGE.
87175255Scognet */
88129198Scognet
89129198Scognet#include <machine/asm.h>
90129198Scognet__FBSDID("$FreeBSD$");
91129198Scognet
92129198Scognet#include "assym.s"
93129198Scognet
94275418Sandrew	.syntax	unified
95275418Sandrew
96150864Scognet.L_arm_memcpy:
97150864Scognet	.word	_C_LABEL(_arm_memcpy)
98150864Scognet.L_arm_bzero:
99150864Scognet	.word	_C_LABEL(_arm_bzero)
100150864Scognet.L_min_memcpy_size:
101150864Scognet	.word	_C_LABEL(_min_memcpy_size)
102150864Scognet.L_min_bzero_size:
103150864Scognet	.word	_C_LABEL(_min_bzero_size)
104129198Scognet/*
105129250Scognet * memset: Sets a block of memory to the specified value
106129250Scognet *
107129250Scognet * On entry:
108129250Scognet *   r0 - dest address
109129250Scognet *   r1 - byte to write
110129250Scognet *   r2 - number of bytes to write
111129250Scognet *
112129250Scognet * On exit:
113129250Scognet *   r0 - dest address
114129250Scognet */
115129250Scognet/* LINTSTUB: Func: void bzero(void *, size_t) */
116129250ScognetENTRY(bzero)
117150864Scognet	ldr	r3, .L_arm_bzero
118150864Scognet	ldr	r3, [r3]
119150864Scognet	cmp	r3, #0
120150864Scognet	beq	.Lnormal0
121150864Scognet	ldr	r2, .L_min_bzero_size
122150864Scognet	ldr	r2, [r2]
123150864Scognet	cmp	r1, r2
124150864Scognet	blt	.Lnormal0
125150864Scognet	stmfd	sp!, {r0, r1, lr}
126150864Scognet	mov	r2, #0
127150864Scognet	mov	lr, pc
128150864Scognet	mov	pc, r3
129150864Scognet	cmp	r0, #0
130150864Scognet	ldmfd	sp!, {r0, r1, lr}
131150864Scognet	RETeq
132150864Scognet.Lnormal0:
133129250Scognet	mov	r3, #0x00
134129250Scognet	b	do_memset
135275322SandrewEND(bzero)
136129250Scognet/* LINTSTUB: Func: void *memset(void *, int, size_t) */
137129250ScognetENTRY(memset)
138129250Scognet	and	r3, r1, #0xff		/* We deal with bytes */
139129250Scognet	mov	r1, r2
140129250Scognetdo_memset:
141129250Scognet	cmp	r1, #0x04		/* Do we have less than 4 bytes */
142129250Scognet	mov	ip, r0
143129250Scognet	blt	.Lmemset_lessthanfour
144129250Scognet
145129250Scognet	/* Ok first we will word align the address */
146129250Scognet	ands	r2, ip, #0x03		/* Get the bottom two bits */
147129250Scognet	bne	.Lmemset_wordunaligned	/* The address is not word aligned */
148129250Scognet
149129250Scognet	/* We are now word aligned */
150129250Scognet.Lmemset_wordaligned:
151129250Scognet	orr	r3, r3, r3, lsl #8	/* Extend value to 16-bits */
152172614Scognet#ifdef _ARM_ARCH_5E
153172614Scognet	tst	ip, #0x04		/* Quad-align for armv5e */
154129250Scognet#else
155129250Scognet	cmp	r1, #0x10
156129250Scognet#endif
157129250Scognet	orr	r3, r3, r3, lsl #16	/* Extend value to 32-bits */
158172614Scognet#ifdef _ARM_ARCH_5E
159129250Scognet	subne	r1, r1, #0x04		/* Quad-align if necessary */
160129250Scognet	strne	r3, [ip], #0x04
161129250Scognet	cmp	r1, #0x10
162129250Scognet#endif
163129250Scognet	blt	.Lmemset_loop4		/* If less than 16 then use words */
164129250Scognet	mov	r2, r3			/* Duplicate data */
165129250Scognet	cmp	r1, #0x80		/* If < 128 then skip the big loop */
166129250Scognet	blt	.Lmemset_loop32
167129250Scognet
168129250Scognet	/* Do 128 bytes at a time */
169129250Scognet.Lmemset_loop128:
170129250Scognet	subs	r1, r1, #0x80
171172614Scognet#ifdef _ARM_ARCH_5E
172275418Sandrew	strdge	r2, [ip], #0x08
173275418Sandrew	strdge	r2, [ip], #0x08
174275418Sandrew	strdge	r2, [ip], #0x08
175275418Sandrew	strdge	r2, [ip], #0x08
176275418Sandrew	strdge	r2, [ip], #0x08
177275418Sandrew	strdge	r2, [ip], #0x08
178275418Sandrew	strdge	r2, [ip], #0x08
179275418Sandrew	strdge	r2, [ip], #0x08
180275418Sandrew	strdge	r2, [ip], #0x08
181275418Sandrew	strdge	r2, [ip], #0x08
182275418Sandrew	strdge	r2, [ip], #0x08
183275418Sandrew	strdge	r2, [ip], #0x08
184275418Sandrew	strdge	r2, [ip], #0x08
185275418Sandrew	strdge	r2, [ip], #0x08
186275418Sandrew	strdge	r2, [ip], #0x08
187275418Sandrew	strdge	r2, [ip], #0x08
188129250Scognet#else
189275418Sandrew	stmiage	ip!, {r2-r3}
190275418Sandrew	stmiage	ip!, {r2-r3}
191275418Sandrew	stmiage	ip!, {r2-r3}
192275418Sandrew	stmiage	ip!, {r2-r3}
193275418Sandrew	stmiage	ip!, {r2-r3}
194275418Sandrew	stmiage	ip!, {r2-r3}
195275418Sandrew	stmiage	ip!, {r2-r3}
196275418Sandrew	stmiage	ip!, {r2-r3}
197275418Sandrew	stmiage	ip!, {r2-r3}
198275418Sandrew	stmiage	ip!, {r2-r3}
199275418Sandrew	stmiage	ip!, {r2-r3}
200275418Sandrew	stmiage	ip!, {r2-r3}
201275418Sandrew	stmiage	ip!, {r2-r3}
202275418Sandrew	stmiage	ip!, {r2-r3}
203275418Sandrew	stmiage	ip!, {r2-r3}
204275418Sandrew	stmiage	ip!, {r2-r3}
205129250Scognet#endif
206129250Scognet	bgt	.Lmemset_loop128
207137463Scognet	RETeq			/* Zero length so just exit */
208129250Scognet
209129250Scognet	add	r1, r1, #0x80		/* Adjust for extra sub */
210129250Scognet
211129250Scognet	/* Do 32 bytes at a time */
212129250Scognet.Lmemset_loop32:
213129250Scognet	subs	r1, r1, #0x20
214172614Scognet#ifdef _ARM_ARCH_5E
215275418Sandrew	strdge	r2, [ip], #0x08
216275418Sandrew	strdge	r2, [ip], #0x08
217275418Sandrew	strdge	r2, [ip], #0x08
218275418Sandrew	strdge	r2, [ip], #0x08
219129250Scognet#else
220275418Sandrew	stmiage	ip!, {r2-r3}
221275418Sandrew	stmiage	ip!, {r2-r3}
222275418Sandrew	stmiage	ip!, {r2-r3}
223275418Sandrew	stmiage	ip!, {r2-r3}
224129250Scognet#endif
225129250Scognet	bgt	.Lmemset_loop32
226137463Scognet	RETeq			/* Zero length so just exit */
227129250Scognet
228129250Scognet	adds	r1, r1, #0x10		/* Partially adjust for extra sub */
229129250Scognet
230129250Scognet	/* Deal with 16 bytes or more */
231172614Scognet#ifdef _ARM_ARCH_5E
232275418Sandrew	strdge	r2, [ip], #0x08
233275418Sandrew	strdge	r2, [ip], #0x08
234129250Scognet#else
235275418Sandrew	stmiage	ip!, {r2-r3}
236275418Sandrew	stmiage	ip!, {r2-r3}
237129250Scognet#endif
238137463Scognet	RETeq			/* Zero length so just exit */
239129250Scognet
240129250Scognet	addlt	r1, r1, #0x10		/* Possibly adjust for extra sub */
241129250Scognet
242129250Scognet	/* We have at least 4 bytes so copy as words */
243129250Scognet.Lmemset_loop4:
244129250Scognet	subs	r1, r1, #0x04
245129250Scognet	strge	r3, [ip], #0x04
246129250Scognet	bgt	.Lmemset_loop4
247137463Scognet	RETeq			/* Zero length so just exit */
248129250Scognet
249172614Scognet#ifdef _ARM_ARCH_5E
250129250Scognet	/* Compensate for 64-bit alignment check */
251129250Scognet	adds	r1, r1, #0x04
252137463Scognet	RETeq
253129250Scognet	cmp	r1, #2
254129250Scognet#else
255129250Scognet	cmp	r1, #-2
256129250Scognet#endif
257129250Scognet
258129250Scognet	strb	r3, [ip], #0x01		/* Set 1 byte */
259275418Sandrew	strbge	r3, [ip], #0x01		/* Set another byte */
260275418Sandrew	strbgt	r3, [ip]		/* and a third */
261137463Scognet	RET			/* Exit */
262129250Scognet
263129250Scognet.Lmemset_wordunaligned:
264129250Scognet	rsb	r2, r2, #0x004
265129250Scognet	strb	r3, [ip], #0x01		/* Set 1 byte */
266129250Scognet	cmp	r2, #0x02
267275418Sandrew	strbge	r3, [ip], #0x01		/* Set another byte */
268129250Scognet	sub	r1, r1, r2
269275418Sandrew	strbgt	r3, [ip], #0x01		/* and a third */
270129250Scognet	cmp	r1, #0x04		/* More than 4 bytes left? */
271129250Scognet	bge	.Lmemset_wordaligned	/* Yup */
272129250Scognet
273129250Scognet.Lmemset_lessthanfour:
274129250Scognet	cmp	r1, #0x00
275137463Scognet	RETeq			/* Zero length so exit */
276129250Scognet	strb	r3, [ip], #0x01		/* Set 1 byte */
277129250Scognet	cmp	r1, #0x02
278275418Sandrew	strbge	r3, [ip], #0x01		/* Set another byte */
279275418Sandrew	strbgt	r3, [ip]		/* and a third */
280137463Scognet	RET			/* Exit */
281275418SandrewEEND(memset)
282275418SandrewEND(bzero)
283129254Scognet
284144967ScognetENTRY(bcmp)
285129254Scognet	mov	ip, r0
286129254Scognet	cmp	r2, #0x06
287129254Scognet	beq	.Lmemcmp_6bytes
288129254Scognet	mov	r0, #0x00
289129254Scognet
290129254Scognet	/* Are both addresses aligned the same way? */
291129254Scognet	cmp	r2, #0x00
292275418Sandrew	eorsne	r3, ip, r1
293137463Scognet	RETeq			/* len == 0, or same addresses! */
294129254Scognet	tst	r3, #0x03
295129254Scognet	subne	r2, r2, #0x01
296129254Scognet	bne	.Lmemcmp_bytewise2	/* Badly aligned. Do it the slow way */
297129254Scognet
298129254Scognet	/* Word-align the addresses, if necessary */
299129254Scognet	sub	r3, r1, #0x05
300129254Scognet	ands	r3, r3, #0x03
301129254Scognet	add	r3, r3, r3, lsl #1
302129254Scognet	addne	pc, pc, r3, lsl #3
303129254Scognet	nop
304129254Scognet
305129254Scognet	/* Compare up to 3 bytes */
306129254Scognet	ldrb	r0, [ip], #0x01
307129254Scognet	ldrb	r3, [r1], #0x01
308129254Scognet	subs	r0, r0, r3
309137463Scognet	RETne
310129254Scognet	subs	r2, r2, #0x01
311137463Scognet	RETeq
312129254Scognet
313129254Scognet	/* Compare up to 2 bytes */
314129254Scognet	ldrb	r0, [ip], #0x01
315129254Scognet	ldrb	r3, [r1], #0x01
316129254Scognet	subs	r0, r0, r3
317137463Scognet	RETne
318129254Scognet	subs	r2, r2, #0x01
319137463Scognet	RETeq
320129254Scognet
321129254Scognet	/* Compare 1 byte */
322129254Scognet	ldrb	r0, [ip], #0x01
323129254Scognet	ldrb	r3, [r1], #0x01
324129254Scognet	subs	r0, r0, r3
325137463Scognet	RETne
326129254Scognet	subs	r2, r2, #0x01
327137463Scognet	RETeq
328129254Scognet
329129254Scognet	/* Compare 4 bytes at a time, if possible */
330129254Scognet	subs	r2, r2, #0x04
331129254Scognet	bcc	.Lmemcmp_bytewise
332129254Scognet.Lmemcmp_word_aligned:
333129254Scognet	ldr	r0, [ip], #0x04
334129254Scognet	ldr	r3, [r1], #0x04
335129254Scognet	subs	r2, r2, #0x04
336129254Scognet	cmpcs	r0, r3
337129254Scognet	beq	.Lmemcmp_word_aligned
338129254Scognet	sub	r0, r0, r3
339129254Scognet
340129254Scognet	/* Correct for extra subtraction, and check if done */
341129254Scognet	adds	r2, r2, #0x04
342129254Scognet	cmpeq	r0, #0x00		/* If done, did all bytes match? */
343137463Scognet	RETeq			/* Yup. Just return */
344129254Scognet
345129254Scognet	/* Re-do the final word byte-wise */
346129254Scognet	sub	ip, ip, #0x04
347129254Scognet	sub	r1, r1, #0x04
348129254Scognet
349129254Scognet.Lmemcmp_bytewise:
350129254Scognet	add	r2, r2, #0x03
351129254Scognet.Lmemcmp_bytewise2:
352129254Scognet	ldrb	r0, [ip], #0x01
353129254Scognet	ldrb	r3, [r1], #0x01
354129254Scognet	subs	r2, r2, #0x01
355129254Scognet	cmpcs	r0, r3
356129254Scognet	beq	.Lmemcmp_bytewise2
357129254Scognet	sub	r0, r0, r3
358137463Scognet	RET
359129254Scognet
360129254Scognet	/*
361129254Scognet	 * 6 byte compares are very common, thanks to the network stack.
362129254Scognet	 * This code is hand-scheduled to reduce the number of stalls for
363129254Scognet	 * load results. Everything else being equal, this will be ~32%
364129254Scognet	 * faster than a byte-wise memcmp.
365129254Scognet	 */
366129254Scognet	.align	5
367129254Scognet.Lmemcmp_6bytes:
368129254Scognet	ldrb	r3, [r1, #0x00]		/* r3 = b2#0 */
369129254Scognet	ldrb	r0, [ip, #0x00]		/* r0 = b1#0 */
370129254Scognet	ldrb	r2, [r1, #0x01]		/* r2 = b2#1 */
371129254Scognet	subs	r0, r0, r3		/* r0 = b1#0 - b2#0 */
372275418Sandrew	ldrbeq	r3, [ip, #0x01]		/* r3 = b1#1 */
373137463Scognet	RETne			/* Return if mismatch on #0 */
374129254Scognet	subs	r0, r3, r2		/* r0 = b1#1 - b2#1 */
375275418Sandrew	ldrbeq	r3, [r1, #0x02]		/* r3 = b2#2 */
376275418Sandrew	ldrbeq	r0, [ip, #0x02]		/* r0 = b1#2 */
377137463Scognet	RETne			/* Return if mismatch on #1 */
378129254Scognet	ldrb	r2, [r1, #0x03]		/* r2 = b2#3 */
379129254Scognet	subs	r0, r0, r3		/* r0 = b1#2 - b2#2 */
380275418Sandrew	ldrbeq	r3, [ip, #0x03]		/* r3 = b1#3 */
381137463Scognet	RETne			/* Return if mismatch on #2 */
382129254Scognet	subs	r0, r3, r2		/* r0 = b1#3 - b2#3 */
383275418Sandrew	ldrbeq	r3, [r1, #0x04]		/* r3 = b2#4 */
384275418Sandrew	ldrbeq	r0, [ip, #0x04]		/* r0 = b1#4 */
385137463Scognet	RETne			/* Return if mismatch on #3 */
386129254Scognet	ldrb	r2, [r1, #0x05]		/* r2 = b2#5 */
387129254Scognet	subs	r0, r0, r3		/* r0 = b1#4 - b2#4 */
388275418Sandrew	ldrbeq	r3, [ip, #0x05]		/* r3 = b1#5 */
389137463Scognet	RETne			/* Return if mismatch on #4 */
390129254Scognet	sub	r0, r3, r2		/* r0 = b1#5 - b2#5 */
391137463Scognet	RET
392248361SandrewEND(bcmp)
393129254Scognet
394129254ScognetENTRY(bcopy)
395143175Scognet	/* switch the source and destination registers */
396236991Simp	eor     r0, r1, r0
397236991Simp	eor     r1, r0, r1
398236991Simp	eor     r0, r1, r0
399269390SianEENTRY(memmove)
400143175Scognet	/* Do the buffers overlap? */
401143175Scognet	cmp	r0, r1
402143175Scognet	RETeq		/* Bail now if src/dst are the same */
403143175Scognet	subcc	r3, r0, r1	/* if (dst > src) r3 = dst - src */
404143175Scognet	subcs	r3, r1, r0	/* if (src > dsr) r3 = src - dst */
405143175Scognet	cmp	r3, r2		/* if (r3 < len) we have an overlap */
406143175Scognet	bcc	PIC_SYM(_C_LABEL(memcpy), PLT)
407143175Scognet
408143175Scognet	/* Determine copy direction */
409143175Scognet	cmp	r1, r0
410143175Scognet	bcc	.Lmemmove_backwards
411143175Scognet
412143175Scognet	moveq	r0, #0			/* Quick abort for len=0 */
413143175Scognet	RETeq
414143175Scognet
415143175Scognet	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
416143175Scognet	subs	r2, r2, #4
417143175Scognet	blt	.Lmemmove_fl4		/* less than 4 bytes */
418143175Scognet	ands	r12, r0, #3
419143175Scognet	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
420143175Scognet	ands	r12, r1, #3
421143175Scognet	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
422143175Scognet
423143175Scognet.Lmemmove_ft8:
424143175Scognet	/* We have aligned source and destination */
425143175Scognet	subs	r2, r2, #8
426143175Scognet	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
427236991Simp	subs	r2, r2, #0x14
428143175Scognet	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
429143175Scognet	stmdb	sp!, {r4}		/* borrow r4 */
430143175Scognet
431143175Scognet	/* blat 32 bytes at a time */
432143175Scognet	/* XXX for really big copies perhaps we should use more registers */
433283366Sandrew.Lmemmove_floop32:
434143175Scognet	ldmia	r1!, {r3, r4, r12, lr}
435143175Scognet	stmia	r0!, {r3, r4, r12, lr}
436143175Scognet	ldmia	r1!, {r3, r4, r12, lr}
437143175Scognet	stmia	r0!, {r3, r4, r12, lr}
438236991Simp	subs	r2, r2, #0x20
439143175Scognet	bge	.Lmemmove_floop32
440143175Scognet
441143175Scognet	cmn	r2, #0x10
442275418Sandrew	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
443275418Sandrew	stmiage	r0!, {r3, r4, r12, lr}
444236991Simp	subge	r2, r2, #0x10
445143175Scognet	ldmia	sp!, {r4}		/* return r4 */
446143175Scognet
447143175Scognet.Lmemmove_fl32:
448236991Simp	adds	r2, r2, #0x14
449143175Scognet
450143175Scognet	/* blat 12 bytes at a time */
451143175Scognet.Lmemmove_floop12:
452275418Sandrew	ldmiage	r1!, {r3, r12, lr}
453275418Sandrew	stmiage	r0!, {r3, r12, lr}
454275418Sandrew	subsge	r2, r2, #0x0c
455143175Scognet	bge	.Lmemmove_floop12
456143175Scognet
457143175Scognet.Lmemmove_fl12:
458143175Scognet	adds	r2, r2, #8
459143175Scognet	blt	.Lmemmove_fl4
460143175Scognet
461143175Scognet	subs	r2, r2, #4
462143175Scognet	ldrlt	r3, [r1], #4
463143175Scognet	strlt	r3, [r0], #4
464275418Sandrew	ldmiage	r1!, {r3, r12}
465275418Sandrew	stmiage	r0!, {r3, r12}
466143175Scognet	subge	r2, r2, #4
467143175Scognet
468143175Scognet.Lmemmove_fl4:
469143175Scognet	/* less than 4 bytes to go */
470143175Scognet	adds	r2, r2, #4
471275418Sandrew	ldmiaeq	sp!, {r0, pc}		/* done */
472143175Scognet
473143175Scognet	/* copy the crud byte at a time */
474143175Scognet	cmp	r2, #2
475143175Scognet	ldrb	r3, [r1], #1
476143175Scognet	strb	r3, [r0], #1
477275418Sandrew	ldrbge	r3, [r1], #1
478275418Sandrew	strbge	r3, [r0], #1
479275418Sandrew	ldrbgt	r3, [r1], #1
480275418Sandrew	strbgt	r3, [r0], #1
481143175Scognet	ldmia	sp!, {r0, pc}
482143175Scognet
483143175Scognet	/* erg - unaligned destination */
484143175Scognet.Lmemmove_fdestul:
485143175Scognet	rsb	r12, r12, #4
486143175Scognet	cmp	r12, #2
487143175Scognet
488143175Scognet	/* align destination with byte copies */
489143175Scognet	ldrb	r3, [r1], #1
490143175Scognet	strb	r3, [r0], #1
491275418Sandrew	ldrbge	r3, [r1], #1
492275418Sandrew	strbge	r3, [r0], #1
493275418Sandrew	ldrbgt	r3, [r1], #1
494275418Sandrew	strbgt	r3, [r0], #1
495143175Scognet	subs	r2, r2, r12
496143175Scognet	blt	.Lmemmove_fl4		/* less the 4 bytes */
497143175Scognet
498143175Scognet	ands	r12, r1, #3
499143175Scognet	beq	.Lmemmove_ft8		/* we have an aligned source */
500143175Scognet
501143175Scognet	/* erg - unaligned source */
502143175Scognet	/* This is where it gets nasty ... */
503143175Scognet.Lmemmove_fsrcul:
504143175Scognet	bic	r1, r1, #3
505143175Scognet	ldr	lr, [r1], #4
506143175Scognet	cmp	r12, #2
507143175Scognet	bgt	.Lmemmove_fsrcul3
508143175Scognet	beq	.Lmemmove_fsrcul2
509236991Simp	cmp	r2, #0x0c
510143175Scognet	blt	.Lmemmove_fsrcul1loop4
511236991Simp	sub	r2, r2, #0x0c
512143175Scognet	stmdb	sp!, {r4, r5}
513143175Scognet
514143175Scognet.Lmemmove_fsrcul1loop16:
515143175Scognet#ifdef __ARMEB__
516143175Scognet	mov	r3, lr, lsl #8
517143175Scognet#else
518143175Scognet	mov	r3, lr, lsr #8
519143175Scognet#endif
520143175Scognet	ldmia	r1!, {r4, r5, r12, lr}
521143175Scognet#ifdef __ARMEB__
522143175Scognet	orr	r3, r3, r4, lsr #24
523143175Scognet	mov	r4, r4, lsl #8
524143175Scognet	orr	r4, r4, r5, lsr #24
525143175Scognet	mov	r5, r5, lsl #8
526143175Scognet	orr	r5, r5, r12, lsr #24
527143175Scognet	mov	r12, r12, lsl #8
528143175Scognet	orr	r12, r12, lr, lsr #24
529143175Scognet#else
530143175Scognet	orr	r3, r3, r4, lsl #24
531143175Scognet	mov	r4, r4, lsr #8
532143175Scognet	orr	r4, r4, r5, lsl #24
533143175Scognet	mov	r5, r5, lsr #8
534143175Scognet	orr	r5, r5, r12, lsl #24
535143175Scognet	mov	r12, r12, lsr #8
536143175Scognet	orr	r12, r12, lr, lsl #24
537143175Scognet#endif
538143175Scognet	stmia	r0!, {r3-r5, r12}
539236991Simp	subs	r2, r2, #0x10
540143175Scognet	bge	.Lmemmove_fsrcul1loop16
541143175Scognet	ldmia	sp!, {r4, r5}
542236991Simp	adds	r2, r2, #0x0c
543143175Scognet	blt	.Lmemmove_fsrcul1l4
544143175Scognet
545143175Scognet.Lmemmove_fsrcul1loop4:
546143175Scognet#ifdef __ARMEB__
547143175Scognet	mov	r12, lr, lsl #8
548143175Scognet#else
549143175Scognet	mov	r12, lr, lsr #8
550143175Scognet#endif
551143175Scognet	ldr	lr, [r1], #4
552143175Scognet#ifdef __ARMEB__
553143175Scognet	orr	r12, r12, lr, lsr #24
554143175Scognet#else
555143175Scognet	orr	r12, r12, lr, lsl #24
556143175Scognet#endif
557143175Scognet	str	r12, [r0], #4
558143175Scognet	subs	r2, r2, #4
559143175Scognet	bge	.Lmemmove_fsrcul1loop4
560143175Scognet
561143175Scognet.Lmemmove_fsrcul1l4:
562143175Scognet	sub	r1, r1, #3
563143175Scognet	b	.Lmemmove_fl4
564143175Scognet
565143175Scognet.Lmemmove_fsrcul2:
566236991Simp	cmp	r2, #0x0c
567143175Scognet	blt	.Lmemmove_fsrcul2loop4
568236991Simp	sub	r2, r2, #0x0c
569143175Scognet	stmdb	sp!, {r4, r5}
570143175Scognet
571143175Scognet.Lmemmove_fsrcul2loop16:
572143175Scognet#ifdef __ARMEB__
573143175Scognet	mov	r3, lr, lsl #16
574143175Scognet#else
575143175Scognet	mov	r3, lr, lsr #16
576143175Scognet#endif
577143175Scognet	ldmia	r1!, {r4, r5, r12, lr}
578143175Scognet#ifdef __ARMEB__
579143175Scognet	orr	r3, r3, r4, lsr #16
580143175Scognet	mov	r4, r4, lsl #16
581143175Scognet	orr	r4, r4, r5, lsr #16
582143175Scognet	mov	r5, r5, lsl #16
583143175Scognet	orr	r5, r5, r12, lsr #16
584143175Scognet	mov	r12, r12, lsl #16
585143175Scognet	orr	r12, r12, lr, lsr #16
586143175Scognet#else
587143175Scognet	orr	r3, r3, r4, lsl #16
588143175Scognet	mov	r4, r4, lsr #16
589143175Scognet	orr	r4, r4, r5, lsl #16
590143175Scognet	mov	r5, r5, lsr #16
591143175Scognet	orr	r5, r5, r12, lsl #16
592143175Scognet	mov	r12, r12, lsr #16
593143175Scognet	orr	r12, r12, lr, lsl #16
594143175Scognet#endif
595143175Scognet	stmia	r0!, {r3-r5, r12}
596236991Simp	subs	r2, r2, #0x10
597143175Scognet	bge	.Lmemmove_fsrcul2loop16
598143175Scognet	ldmia	sp!, {r4, r5}
599236991Simp	adds	r2, r2, #0x0c
600143175Scognet	blt	.Lmemmove_fsrcul2l4
601143175Scognet
602143175Scognet.Lmemmove_fsrcul2loop4:
603143175Scognet#ifdef __ARMEB__
604143175Scognet	mov	r12, lr, lsl #16
605143175Scognet#else
606143175Scognet	mov	r12, lr, lsr #16
607143175Scognet#endif
608143175Scognet	ldr	lr, [r1], #4
609143175Scognet#ifdef __ARMEB__
610143175Scognet	orr	r12, r12, lr, lsr #16
611143175Scognet#else
612143175Scognet	orr	r12, r12, lr, lsl #16
613143175Scognet#endif
614143175Scognet	str	r12, [r0], #4
615143175Scognet	subs	r2, r2, #4
616143175Scognet	bge	.Lmemmove_fsrcul2loop4
617143175Scognet
618143175Scognet.Lmemmove_fsrcul2l4:
619143175Scognet	sub	r1, r1, #2
620143175Scognet	b	.Lmemmove_fl4
621143175Scognet
622143175Scognet.Lmemmove_fsrcul3:
623236991Simp	cmp	r2, #0x0c
624143175Scognet	blt	.Lmemmove_fsrcul3loop4
625236991Simp	sub	r2, r2, #0x0c
626143175Scognet	stmdb	sp!, {r4, r5}
627143175Scognet
628143175Scognet.Lmemmove_fsrcul3loop16:
629143175Scognet#ifdef __ARMEB__
630143175Scognet	mov	r3, lr, lsl #24
631143175Scognet#else
632143175Scognet	mov	r3, lr, lsr #24
633143175Scognet#endif
634143175Scognet	ldmia	r1!, {r4, r5, r12, lr}
635143175Scognet#ifdef __ARMEB__
636143175Scognet	orr	r3, r3, r4, lsr #8
637143175Scognet	mov	r4, r4, lsl #24
638143175Scognet	orr	r4, r4, r5, lsr #8
639143175Scognet	mov	r5, r5, lsl #24
640143175Scognet	orr	r5, r5, r12, lsr #8
641143175Scognet	mov	r12, r12, lsl #24
642143175Scognet	orr	r12, r12, lr, lsr #8
643143175Scognet#else
644143175Scognet	orr	r3, r3, r4, lsl #8
645143175Scognet	mov	r4, r4, lsr #24
646143175Scognet	orr	r4, r4, r5, lsl #8
647143175Scognet	mov	r5, r5, lsr #24
648143175Scognet	orr	r5, r5, r12, lsl #8
649143175Scognet	mov	r12, r12, lsr #24
650143175Scognet	orr	r12, r12, lr, lsl #8
651143175Scognet#endif
652143175Scognet	stmia	r0!, {r3-r5, r12}
653236991Simp	subs	r2, r2, #0x10
654143175Scognet	bge	.Lmemmove_fsrcul3loop16
655143175Scognet	ldmia	sp!, {r4, r5}
656236991Simp	adds	r2, r2, #0x0c
657143175Scognet	blt	.Lmemmove_fsrcul3l4
658143175Scognet
659143175Scognet.Lmemmove_fsrcul3loop4:
660143175Scognet#ifdef __ARMEB__
661143175Scognet	mov	r12, lr, lsl #24
662143175Scognet#else
663143175Scognet	mov	r12, lr, lsr #24
664143175Scognet#endif
665143175Scognet	ldr	lr, [r1], #4
666143175Scognet#ifdef __ARMEB__
667143175Scognet	orr	r12, r12, lr, lsr #8
668143175Scognet#else
669143175Scognet	orr	r12, r12, lr, lsl #8
670143175Scognet#endif
671143175Scognet	str	r12, [r0], #4
672143175Scognet	subs	r2, r2, #4
673143175Scognet	bge	.Lmemmove_fsrcul3loop4
674143175Scognet
675143175Scognet.Lmemmove_fsrcul3l4:
676143175Scognet	sub	r1, r1, #1
677143175Scognet	b	.Lmemmove_fl4
678143175Scognet
679143175Scognet.Lmemmove_backwards:
680143175Scognet	add	r1, r1, r2
681143175Scognet	add	r0, r0, r2
682143175Scognet	subs	r2, r2, #4
683143175Scognet	blt	.Lmemmove_bl4		/* less than 4 bytes */
684143175Scognet	ands	r12, r0, #3
685143175Scognet	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
686143175Scognet	ands	r12, r1, #3
687143175Scognet	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
688143175Scognet
689143175Scognet.Lmemmove_bt8:
690143175Scognet	/* We have aligned source and destination */
691143175Scognet	subs	r2, r2, #8
692143175Scognet	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
693143175Scognet	stmdb	sp!, {r4, lr}
694143175Scognet	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
695143175Scognet	blt	.Lmemmove_bl32
696143175Scognet
697143175Scognet	/* blat 32 bytes at a time */
698143175Scognet	/* XXX for really big copies perhaps we should use more registers */
699143175Scognet.Lmemmove_bloop32:
700143175Scognet	ldmdb	r1!, {r3, r4, r12, lr}
701143175Scognet	stmdb	r0!, {r3, r4, r12, lr}
702143175Scognet	ldmdb	r1!, {r3, r4, r12, lr}
703143175Scognet	stmdb	r0!, {r3, r4, r12, lr}
704236991Simp	subs	r2, r2, #0x20
705143175Scognet	bge	.Lmemmove_bloop32
706143175Scognet
707143175Scognet.Lmemmove_bl32:
708236991Simp	cmn	r2, #0x10
709275418Sandrew	ldmdbge	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
710275418Sandrew	stmdbge	r0!, {r3, r4, r12, lr}
711236991Simp	subge	r2, r2, #0x10
712236991Simp	adds	r2, r2, #0x14
713275418Sandrew	ldmdbge	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
714275418Sandrew	stmdbge	r0!, {r3, r12, lr}
715236991Simp	subge	r2, r2, #0x0c
716143175Scognet	ldmia	sp!, {r4, lr}
717143175Scognet
718143175Scognet.Lmemmove_bl12:
719143175Scognet	adds	r2, r2, #8
720143175Scognet	blt	.Lmemmove_bl4
721143175Scognet	subs	r2, r2, #4
722143175Scognet	ldrlt	r3, [r1, #-4]!
723143175Scognet	strlt	r3, [r0, #-4]!
724275418Sandrew	ldmdbge	r1!, {r3, r12}
725275418Sandrew	stmdbge	r0!, {r3, r12}
726143175Scognet	subge	r2, r2, #4
727143175Scognet
728143175Scognet.Lmemmove_bl4:
729143175Scognet	/* less than 4 bytes to go */
730143175Scognet	adds	r2, r2, #4
731143175Scognet	RETeq			/* done */
732143175Scognet
733143175Scognet	/* copy the crud byte at a time */
734143175Scognet	cmp	r2, #2
735143175Scognet	ldrb	r3, [r1, #-1]!
736143175Scognet	strb	r3, [r0, #-1]!
737275418Sandrew	ldrbge	r3, [r1, #-1]!
738275418Sandrew	strbge	r3, [r0, #-1]!
739275418Sandrew	ldrbgt	r3, [r1, #-1]!
740275418Sandrew	strbgt	r3, [r0, #-1]!
741143175Scognet	RET
742143175Scognet
743143175Scognet	/* erg - unaligned destination */
744143175Scognet.Lmemmove_bdestul:
745143175Scognet	cmp	r12, #2
746143175Scognet
747143175Scognet	/* align destination with byte copies */
748143175Scognet	ldrb	r3, [r1, #-1]!
749143175Scognet	strb	r3, [r0, #-1]!
750275418Sandrew	ldrbge	r3, [r1, #-1]!
751275418Sandrew	strbge	r3, [r0, #-1]!
752275418Sandrew	ldrbgt	r3, [r1, #-1]!
753275418Sandrew	strbgt	r3, [r0, #-1]!
754143175Scognet	subs	r2, r2, r12
755143175Scognet	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
756143175Scognet	ands	r12, r1, #3
757143175Scognet	beq	.Lmemmove_bt8		/* we have an aligned source */
758143175Scognet
759143175Scognet	/* erg - unaligned source */
760143175Scognet	/* This is where it gets nasty ... */
761143175Scognet.Lmemmove_bsrcul:
762143175Scognet	bic	r1, r1, #3
763143175Scognet	ldr	r3, [r1, #0]
764143175Scognet	cmp	r12, #2
765143175Scognet	blt	.Lmemmove_bsrcul1
766143175Scognet	beq	.Lmemmove_bsrcul2
767236991Simp	cmp	r2, #0x0c
768143175Scognet	blt	.Lmemmove_bsrcul3loop4
769236991Simp	sub	r2, r2, #0x0c
770143175Scognet	stmdb	sp!, {r4, r5, lr}
771143175Scognet
772143175Scognet.Lmemmove_bsrcul3loop16:
773143175Scognet#ifdef __ARMEB__
774143175Scognet	mov	lr, r3, lsr #8
775143175Scognet#else
776143175Scognet	mov	lr, r3, lsl #8
777143175Scognet#endif
778143175Scognet	ldmdb	r1!, {r3-r5, r12}
779143175Scognet#ifdef __ARMEB__
780143175Scognet	orr	lr, lr, r12, lsl #24
781143175Scognet	mov	r12, r12, lsr #8
782143175Scognet	orr	r12, r12, r5, lsl #24
783143175Scognet	mov	r5, r5, lsr #8
784143175Scognet	orr	r5, r5, r4, lsl #24
785143175Scognet	mov	r4, r4, lsr #8
786143175Scognet	orr	r4, r4, r3, lsl #24
787143175Scognet#else
788143175Scognet	orr	lr, lr, r12, lsr #24
789143175Scognet	mov	r12, r12, lsl #8
790143175Scognet	orr	r12, r12, r5, lsr #24
791143175Scognet	mov	r5, r5, lsl #8
792143175Scognet	orr	r5, r5, r4, lsr #24
793143175Scognet	mov	r4, r4, lsl #8
794143175Scognet	orr	r4, r4, r3, lsr #24
795143175Scognet#endif
796143175Scognet	stmdb	r0!, {r4, r5, r12, lr}
797236991Simp	subs	r2, r2, #0x10
798143175Scognet	bge	.Lmemmove_bsrcul3loop16
799143175Scognet	ldmia	sp!, {r4, r5, lr}
800236991Simp	adds	r2, r2, #0x0c
801143175Scognet	blt	.Lmemmove_bsrcul3l4
802143175Scognet
803143175Scognet.Lmemmove_bsrcul3loop4:
804143175Scognet#ifdef __ARMEB__
805143175Scognet	mov	r12, r3, lsr #8
806143175Scognet#else
807143175Scognet	mov	r12, r3, lsl #8
808143175Scognet#endif
809143175Scognet	ldr	r3, [r1, #-4]!
810143175Scognet#ifdef __ARMEB__
811143175Scognet	orr	r12, r12, r3, lsl #24
812143175Scognet#else
813143175Scognet	orr	r12, r12, r3, lsr #24
814143175Scognet#endif
815143175Scognet	str	r12, [r0, #-4]!
816143175Scognet	subs	r2, r2, #4
817143175Scognet	bge	.Lmemmove_bsrcul3loop4
818143175Scognet
819143175Scognet.Lmemmove_bsrcul3l4:
820143175Scognet	add	r1, r1, #3
821143175Scognet	b	.Lmemmove_bl4
822143175Scognet
823143175Scognet.Lmemmove_bsrcul2:
824236991Simp	cmp	r2, #0x0c
825143175Scognet	blt	.Lmemmove_bsrcul2loop4
826236991Simp	sub	r2, r2, #0x0c
827143175Scognet	stmdb	sp!, {r4, r5, lr}
828143175Scognet
829143175Scognet.Lmemmove_bsrcul2loop16:
830143175Scognet#ifdef __ARMEB__
831143175Scognet	mov	lr, r3, lsr #16
832143175Scognet#else
833143175Scognet	mov	lr, r3, lsl #16
834143175Scognet#endif
835143175Scognet	ldmdb	r1!, {r3-r5, r12}
836143175Scognet#ifdef __ARMEB__
837143175Scognet	orr	lr, lr, r12, lsl #16
838143175Scognet	mov	r12, r12, lsr #16
839143175Scognet	orr	r12, r12, r5, lsl #16
840143175Scognet	mov	r5, r5, lsr #16
841143175Scognet	orr	r5, r5, r4, lsl #16
842143175Scognet	mov	r4, r4, lsr #16
843143175Scognet	orr	r4, r4, r3, lsl #16
844143175Scognet#else
845143175Scognet	orr	lr, lr, r12, lsr #16
846143175Scognet	mov	r12, r12, lsl #16
847143175Scognet	orr	r12, r12, r5, lsr #16
848143175Scognet	mov	r5, r5, lsl #16
849143175Scognet	orr	r5, r5, r4, lsr #16
850143175Scognet	mov	r4, r4, lsl #16
851143175Scognet	orr	r4, r4, r3, lsr #16
852143175Scognet#endif
853143175Scognet	stmdb	r0!, {r4, r5, r12, lr}
854236991Simp	subs	r2, r2, #0x10
855143175Scognet	bge	.Lmemmove_bsrcul2loop16
856143175Scognet	ldmia	sp!, {r4, r5, lr}
857236991Simp	adds	r2, r2, #0x0c
858143175Scognet	blt	.Lmemmove_bsrcul2l4
859143175Scognet
860143175Scognet.Lmemmove_bsrcul2loop4:
861143175Scognet#ifdef __ARMEB__
862143175Scognet	mov	r12, r3, lsr #16
863143175Scognet#else
864143175Scognet	mov	r12, r3, lsl #16
865143175Scognet#endif
866143175Scognet	ldr	r3, [r1, #-4]!
867143175Scognet#ifdef __ARMEB__
868143175Scognet	orr	r12, r12, r3, lsl #16
869143175Scognet#else
870143175Scognet	orr	r12, r12, r3, lsr #16
871143175Scognet#endif
872143175Scognet	str	r12, [r0, #-4]!
873143175Scognet	subs	r2, r2, #4
874143175Scognet	bge	.Lmemmove_bsrcul2loop4
875143175Scognet
876143175Scognet.Lmemmove_bsrcul2l4:
877143175Scognet	add	r1, r1, #2
878143175Scognet	b	.Lmemmove_bl4
879143175Scognet
880143175Scognet.Lmemmove_bsrcul1:
881236991Simp	cmp	r2, #0x0c
882143175Scognet	blt	.Lmemmove_bsrcul1loop4
883236991Simp	sub	r2, r2, #0x0c
884143175Scognet	stmdb	sp!, {r4, r5, lr}
885143175Scognet
886143175Scognet.Lmemmove_bsrcul1loop32:
887143175Scognet#ifdef __ARMEB__
888143175Scognet	mov	lr, r3, lsr #24
889143175Scognet#else
890143175Scognet	mov	lr, r3, lsl #24
891143175Scognet#endif
892143175Scognet	ldmdb	r1!, {r3-r5, r12}
893143175Scognet#ifdef __ARMEB__
894143175Scognet	orr	lr, lr, r12, lsl #8
895143175Scognet	mov	r12, r12, lsr #24
896143175Scognet	orr	r12, r12, r5, lsl #8
897143175Scognet	mov	r5, r5, lsr #24
898143175Scognet	orr	r5, r5, r4, lsl #8
899143175Scognet	mov	r4, r4, lsr #24
900143175Scognet	orr	r4, r4, r3, lsl #8
901143175Scognet#else
902143175Scognet	orr	lr, lr, r12, lsr #8
903143175Scognet	mov	r12, r12, lsl #24
904143175Scognet	orr	r12, r12, r5, lsr #8
905143175Scognet	mov	r5, r5, lsl #24
906143175Scognet	orr	r5, r5, r4, lsr #8
907143175Scognet	mov	r4, r4, lsl #24
908143175Scognet	orr	r4, r4, r3, lsr #8
909143175Scognet#endif
910143175Scognet	stmdb	r0!, {r4, r5, r12, lr}
911236991Simp	subs	r2, r2, #0x10
912143175Scognet	bge	.Lmemmove_bsrcul1loop32
913143175Scognet	ldmia	sp!, {r4, r5, lr}
914236991Simp	adds	r2, r2, #0x0c
915143175Scognet	blt	.Lmemmove_bsrcul1l4
916143175Scognet
917143175Scognet.Lmemmove_bsrcul1loop4:
918143175Scognet#ifdef __ARMEB__
919143175Scognet	mov	r12, r3, lsr #24
920143175Scognet#else
921143175Scognet	mov	r12, r3, lsl #24
922143175Scognet#endif
923143175Scognet	ldr	r3, [r1, #-4]!
924143175Scognet#ifdef __ARMEB__
925143175Scognet	orr	r12, r12, r3, lsl #8
926143175Scognet#else
927143175Scognet	orr	r12, r12, r3, lsr #8
928143175Scognet#endif
929143175Scognet	str	r12, [r0, #-4]!
930143175Scognet	subs	r2, r2, #4
931143175Scognet	bge	.Lmemmove_bsrcul1loop4
932143175Scognet
933143175Scognet.Lmemmove_bsrcul1l4:
934143175Scognet	add	r1, r1, #1
935143175Scognet	b	.Lmemmove_bl4
936269390SianEEND(memmove)
937248361SandrewEND(bcopy)
938143175Scognet
939172614Scognet#if !defined(_ARM_ARCH_5E)
940129254ScognetENTRY(memcpy)
941129254Scognet	/* save leaf functions having to store this away */
942167003Scognet	/* Do not check arm_memcpy if we're running from flash */
943261596Sian#if defined(FLASHADDR) && defined(PHYSADDR)
944167003Scognet#if FLASHADDR > PHYSADDR
945167003Scognet	ldr	r3, =FLASHADDR
946167003Scognet	cmp	r3, pc
947167003Scognet	bls	.Lnormal
948167003Scognet#else
949167003Scognet	ldr	r3, =FLASHADDR
950167003Scognet	cmp	r3, pc
951167003Scognet	bhi	.Lnormal
952167003Scognet#endif
953167003Scognet#endif
954150864Scognet	ldr	r3, .L_arm_memcpy
955150864Scognet	ldr	r3, [r3]
956150864Scognet	cmp	r3, #0
957150864Scognet	beq	.Lnormal
958150864Scognet	ldr	r3, .L_min_memcpy_size
959150864Scognet	ldr	r3, [r3]
960150864Scognet	cmp	r2, r3
961150864Scognet	blt	.Lnormal
962150864Scognet	stmfd	sp!, {r0-r2, r4, lr}
963150864Scognet	mov	r3, #0
964150864Scognet	ldr	r4, .L_arm_memcpy
965150864Scognet	mov	lr, pc
966150864Scognet	ldr	pc, [r4]
967150864Scognet	cmp	r0, #0
968150864Scognet	ldmfd	sp!, {r0-r2, r4, lr}
969150864Scognet	RETeq
970150864Scognet
971151596Scognet.Lnormal:
972129254Scognet	stmdb	sp!, {r0, lr}		/* memcpy() returns dest addr */
973129254Scognet
974129254Scognet	subs	r2, r2, #4
975129254Scognet	blt	.Lmemcpy_l4		/* less than 4 bytes */
976129254Scognet	ands	r12, r0, #3
977129254Scognet	bne	.Lmemcpy_destul		/* oh unaligned destination addr */
978129254Scognet	ands	r12, r1, #3
979129254Scognet	bne	.Lmemcpy_srcul		/* oh unaligned source addr */
980129254Scognet
981129254Scognet.Lmemcpy_t8:
982129254Scognet	/* We have aligned source and destination */
983129254Scognet	subs	r2, r2, #8
984129254Scognet	blt	.Lmemcpy_l12		/* less than 12 bytes (4 from above) */
985236991Simp	subs	r2, r2, #0x14
986129254Scognet	blt	.Lmemcpy_l32		/* less than 32 bytes (12 from above) */
987129254Scognet	stmdb	sp!, {r4}		/* borrow r4 */
988129254Scognet
989129254Scognet	/* blat 32 bytes at a time */
990129254Scognet	/* XXX for really big copies perhaps we should use more registers */
991283366Sandrew.Lmemcpy_loop32:
992129254Scognet	ldmia	r1!, {r3, r4, r12, lr}
993129254Scognet	stmia	r0!, {r3, r4, r12, lr}
994129254Scognet	ldmia	r1!, {r3, r4, r12, lr}
995129254Scognet	stmia	r0!, {r3, r4, r12, lr}
996236991Simp	subs	r2, r2, #0x20
997129254Scognet	bge	.Lmemcpy_loop32
998129254Scognet
999129254Scognet	cmn	r2, #0x10
1000275418Sandrew	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
1001275418Sandrew	stmiage	r0!, {r3, r4, r12, lr}
1002236991Simp	subge	r2, r2, #0x10
1003129254Scognet	ldmia	sp!, {r4}		/* return r4 */
1004129254Scognet
1005129254Scognet.Lmemcpy_l32:
1006236991Simp	adds	r2, r2, #0x14
1007129254Scognet
1008129254Scognet	/* blat 12 bytes at a time */
1009129254Scognet.Lmemcpy_loop12:
1010275418Sandrew	ldmiage	r1!, {r3, r12, lr}
1011275418Sandrew	stmiage	r0!, {r3, r12, lr}
1012275418Sandrew	subsge	r2, r2, #0x0c
1013129254Scognet	bge	.Lmemcpy_loop12
1014129254Scognet
1015129254Scognet.Lmemcpy_l12:
1016129254Scognet	adds	r2, r2, #8
1017129254Scognet	blt	.Lmemcpy_l4
1018129254Scognet
1019129254Scognet	subs	r2, r2, #4
1020129254Scognet	ldrlt	r3, [r1], #4
1021129254Scognet	strlt	r3, [r0], #4
1022275418Sandrew	ldmiage	r1!, {r3, r12}
1023275418Sandrew	stmiage	r0!, {r3, r12}
1024129254Scognet	subge	r2, r2, #4
1025129254Scognet
1026129254Scognet.Lmemcpy_l4:
1027129254Scognet	/* less than 4 bytes to go */
1028129254Scognet	adds	r2, r2, #4
1029129254Scognet#ifdef __APCS_26_
1030275418Sandrew	ldmiaeq sp!, {r0, pc}^		/* done */
1031129254Scognet#else
1032275418Sandrew	ldmiaeq	sp!, {r0, pc}		/* done */
1033129254Scognet#endif
1034129254Scognet	/* copy the crud byte at a time */
1035129254Scognet	cmp	r2, #2
1036129254Scognet	ldrb	r3, [r1], #1
1037129254Scognet	strb	r3, [r0], #1
1038275418Sandrew	ldrbge	r3, [r1], #1
1039275418Sandrew	strbge	r3, [r0], #1
1040275418Sandrew	ldrbgt	r3, [r1], #1
1041275418Sandrew	strbgt	r3, [r0], #1
1042129254Scognet	ldmia	sp!, {r0, pc}
1043129254Scognet
1044129254Scognet	/* erg - unaligned destination */
1045129254Scognet.Lmemcpy_destul:
1046129254Scognet	rsb	r12, r12, #4
1047129254Scognet	cmp	r12, #2
1048129254Scognet
1049129254Scognet	/* align destination with byte copies */
1050129254Scognet	ldrb	r3, [r1], #1
1051129254Scognet	strb	r3, [r0], #1
1052275418Sandrew	ldrbge	r3, [r1], #1
1053275418Sandrew	strbge	r3, [r0], #1
1054275418Sandrew	ldrbgt	r3, [r1], #1
1055275418Sandrew	strbgt	r3, [r0], #1
1056129254Scognet	subs	r2, r2, r12
1057129254Scognet	blt	.Lmemcpy_l4		/* less the 4 bytes */
1058129254Scognet
1059129254Scognet	ands	r12, r1, #3
1060129254Scognet	beq	.Lmemcpy_t8		/* we have an aligned source */
1061129254Scognet
1062129254Scognet	/* erg - unaligned source */
1063129254Scognet	/* This is where it gets nasty ... */
1064129254Scognet.Lmemcpy_srcul:
1065129254Scognet	bic	r1, r1, #3
1066129254Scognet	ldr	lr, [r1], #4
1067129254Scognet	cmp	r12, #2
1068129254Scognet	bgt	.Lmemcpy_srcul3
1069129254Scognet	beq	.Lmemcpy_srcul2
1070236991Simp	cmp	r2, #0x0c
1071129254Scognet	blt	.Lmemcpy_srcul1loop4
1072236991Simp	sub	r2, r2, #0x0c
1073129254Scognet	stmdb	sp!, {r4, r5}
1074129254Scognet
1075129254Scognet.Lmemcpy_srcul1loop16:
1076129254Scognet	mov	r3, lr, lsr #8
1077129254Scognet	ldmia	r1!, {r4, r5, r12, lr}
1078129254Scognet	orr	r3, r3, r4, lsl #24
1079129254Scognet	mov	r4, r4, lsr #8
1080129254Scognet	orr	r4, r4, r5, lsl #24
1081129254Scognet	mov	r5, r5, lsr #8
1082129254Scognet	orr	r5, r5, r12, lsl #24
1083129254Scognet	mov	r12, r12, lsr #8
1084129254Scognet	orr	r12, r12, lr, lsl #24
1085129254Scognet	stmia	r0!, {r3-r5, r12}
1086236991Simp	subs	r2, r2, #0x10
1087129254Scognet	bge	.Lmemcpy_srcul1loop16
1088129254Scognet	ldmia	sp!, {r4, r5}
1089236991Simp	adds	r2, r2, #0x0c
1090129254Scognet	blt	.Lmemcpy_srcul1l4
1091129254Scognet
1092129254Scognet.Lmemcpy_srcul1loop4:
1093129254Scognet	mov	r12, lr, lsr #8
1094129254Scognet	ldr	lr, [r1], #4
1095129254Scognet	orr	r12, r12, lr, lsl #24
1096129254Scognet	str	r12, [r0], #4
1097129254Scognet	subs	r2, r2, #4
1098129254Scognet	bge	.Lmemcpy_srcul1loop4
1099129254Scognet
1100129254Scognet.Lmemcpy_srcul1l4:
1101129254Scognet	sub	r1, r1, #3
1102129254Scognet	b	.Lmemcpy_l4
1103129254Scognet
1104129254Scognet.Lmemcpy_srcul2:
1105236991Simp	cmp	r2, #0x0c
1106129254Scognet	blt	.Lmemcpy_srcul2loop4
1107236991Simp	sub	r2, r2, #0x0c
1108129254Scognet	stmdb	sp!, {r4, r5}
1109129254Scognet
1110129254Scognet.Lmemcpy_srcul2loop16:
1111129254Scognet	mov	r3, lr, lsr #16
1112129254Scognet	ldmia	r1!, {r4, r5, r12, lr}
1113129254Scognet	orr	r3, r3, r4, lsl #16
1114129254Scognet	mov	r4, r4, lsr #16
1115129254Scognet	orr	r4, r4, r5, lsl #16
1116129254Scognet	mov	r5, r5, lsr #16
1117129254Scognet	orr	r5, r5, r12, lsl #16
1118129254Scognet	mov	r12, r12, lsr #16
1119129254Scognet	orr	r12, r12, lr, lsl #16
1120129254Scognet	stmia	r0!, {r3-r5, r12}
1121236991Simp	subs	r2, r2, #0x10
1122129254Scognet	bge	.Lmemcpy_srcul2loop16
1123129254Scognet	ldmia	sp!, {r4, r5}
1124236991Simp	adds	r2, r2, #0x0c
1125129254Scognet	blt	.Lmemcpy_srcul2l4
1126129254Scognet
1127129254Scognet.Lmemcpy_srcul2loop4:
1128129254Scognet	mov	r12, lr, lsr #16
1129129254Scognet	ldr	lr, [r1], #4
1130129254Scognet	orr	r12, r12, lr, lsl #16
1131129254Scognet	str	r12, [r0], #4
1132129254Scognet	subs	r2, r2, #4
1133129254Scognet	bge	.Lmemcpy_srcul2loop4
1134129254Scognet
1135129254Scognet.Lmemcpy_srcul2l4:
1136129254Scognet	sub	r1, r1, #2
1137129254Scognet	b	.Lmemcpy_l4
1138129254Scognet
1139129254Scognet.Lmemcpy_srcul3:
1140236991Simp	cmp	r2, #0x0c
1141129254Scognet	blt	.Lmemcpy_srcul3loop4
1142236991Simp	sub	r2, r2, #0x0c
1143129254Scognet	stmdb	sp!, {r4, r5}
1144129254Scognet
1145129254Scognet.Lmemcpy_srcul3loop16:
1146129254Scognet	mov	r3, lr, lsr #24
1147129254Scognet	ldmia	r1!, {r4, r5, r12, lr}
1148129254Scognet	orr	r3, r3, r4, lsl #8
1149129254Scognet	mov	r4, r4, lsr #24
1150129254Scognet	orr	r4, r4, r5, lsl #8
1151129254Scognet	mov	r5, r5, lsr #24
1152129254Scognet	orr	r5, r5, r12, lsl #8
1153129254Scognet	mov	r12, r12, lsr #24
1154129254Scognet	orr	r12, r12, lr, lsl #8
1155129254Scognet	stmia	r0!, {r3-r5, r12}
1156236991Simp	subs	r2, r2, #0x10
1157129254Scognet	bge	.Lmemcpy_srcul3loop16
1158129254Scognet	ldmia	sp!, {r4, r5}
1159236991Simp	adds	r2, r2, #0x0c
1160129254Scognet	blt	.Lmemcpy_srcul3l4
1161129254Scognet
1162129254Scognet.Lmemcpy_srcul3loop4:
1163129254Scognet	mov	r12, lr, lsr #24
1164129254Scognet	ldr	lr, [r1], #4
1165129254Scognet	orr	r12, r12, lr, lsl #8
1166129254Scognet	str	r12, [r0], #4
1167129254Scognet	subs	r2, r2, #4
1168129254Scognet	bge	.Lmemcpy_srcul3loop4
1169129254Scognet
1170129254Scognet.Lmemcpy_srcul3l4:
1171129254Scognet	sub	r1, r1, #1
1172129254Scognet	b	.Lmemcpy_l4
1173248361SandrewEND(memcpy)
1174248361Sandrew
1175129254Scognet#else
1176129254Scognet/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
1177129254ScognetENTRY(memcpy)
1178129254Scognet	pld	[r1]
1179129254Scognet	cmp	r2, #0x0c
1180129254Scognet	ble	.Lmemcpy_short		/* <= 12 bytes */
1181167003Scognet#ifdef FLASHADDR
1182167003Scognet#if FLASHADDR > PHYSADDR
1183167003Scognet	ldr	r3, =FLASHADDR
1184167003Scognet	cmp	r3, pc
1185167003Scognet	bls	.Lnormal
1186167003Scognet#else
1187167003Scognet	ldr	r3, =FLASHADDR
1188167003Scognet	cmp	r3, pc
1189167003Scognet	bhi	.Lnormal
1190167003Scognet#endif
1191167003Scognet#endif
1192150864Scognet	ldr	r3, .L_arm_memcpy
1193150864Scognet	ldr	r3, [r3]
1194150864Scognet	cmp	r3, #0
1195150864Scognet	beq	.Lnormal
1196150864Scognet	ldr	r3, .L_min_memcpy_size
1197150864Scognet	ldr	r3, [r3]
1198150864Scognet	cmp	r2, r3
1199150864Scognet	blt	.Lnormal
1200150864Scognet	stmfd	sp!, {r0-r2, r4, lr}
1201150864Scognet	mov	r3, #0
1202150864Scognet	ldr	r4, .L_arm_memcpy
1203150864Scognet	mov	lr, pc
1204150864Scognet	ldr	pc, [r4]
1205150864Scognet	cmp	r0, #0
1206150864Scognet	ldmfd	sp!, {r0-r2, r4, lr}
1207150864Scognet	RETeq
1208150864Scognet.Lnormal:
1209129254Scognet	mov	r3, r0			/* We must not clobber r0 */
1210129254Scognet
1211129254Scognet	/* Word-align the destination buffer */
1212129254Scognet	ands	ip, r3, #0x03		/* Already word aligned? */
1213129254Scognet	beq	.Lmemcpy_wordaligned	/* Yup */
1214129254Scognet	cmp	ip, #0x02
1215129254Scognet	ldrb	ip, [r1], #0x01
1216129254Scognet	sub	r2, r2, #0x01
1217129254Scognet	strb	ip, [r3], #0x01
1218275418Sandrew	ldrble	ip, [r1], #0x01
1219129254Scognet	suble	r2, r2, #0x01
1220275418Sandrew	strble	ip, [r3], #0x01
1221275418Sandrew	ldrblt	ip, [r1], #0x01
1222129254Scognet	sublt	r2, r2, #0x01
1223275418Sandrew	strblt	ip, [r3], #0x01
1224129254Scognet
1225129254Scognet	/* Destination buffer is now word aligned */
1226129254Scognet.Lmemcpy_wordaligned:
1227129254Scognet	ands	ip, r1, #0x03		/* Is src also word-aligned? */
1228129254Scognet	bne	.Lmemcpy_bad_align	/* Nope. Things just got bad */
1229129254Scognet
1230129254Scognet	/* Quad-align the destination buffer */
1231129254Scognet	tst	r3, #0x07		/* Already quad aligned? */
1232129254Scognet	ldrne	ip, [r1], #0x04
1233129254Scognet	stmfd	sp!, {r4-r9}		/* Free up some registers */
1234129254Scognet	subne	r2, r2, #0x04
1235129254Scognet	strne	ip, [r3], #0x04
1236129254Scognet
1237129254Scognet	/* Destination buffer quad aligned, source is at least word aligned */
1238129254Scognet	subs	r2, r2, #0x80
1239129254Scognet	blt	.Lmemcpy_w_lessthan128
1240129254Scognet
1241129254Scognet	/* Copy 128 bytes at a time */
1242129254Scognet.Lmemcpy_w_loop128:
1243129254Scognet	ldr	r4, [r1], #0x04		/* LD:00-03 */
1244129254Scognet	ldr	r5, [r1], #0x04		/* LD:04-07 */
1245129254Scognet	pld	[r1, #0x18]		/* Prefetch 0x20 */
1246129254Scognet	ldr	r6, [r1], #0x04		/* LD:08-0b */
1247129254Scognet	ldr	r7, [r1], #0x04		/* LD:0c-0f */
1248129254Scognet	ldr	r8, [r1], #0x04		/* LD:10-13 */
1249129254Scognet	ldr	r9, [r1], #0x04		/* LD:14-17 */
1250129254Scognet	strd	r4, [r3], #0x08		/* ST:00-07 */
1251129254Scognet	ldr	r4, [r1], #0x04		/* LD:18-1b */
1252129254Scognet	ldr	r5, [r1], #0x04		/* LD:1c-1f */
1253129254Scognet	strd	r6, [r3], #0x08		/* ST:08-0f */
1254129254Scognet	ldr	r6, [r1], #0x04		/* LD:20-23 */
1255129254Scognet	ldr	r7, [r1], #0x04		/* LD:24-27 */
1256129254Scognet	pld	[r1, #0x18]		/* Prefetch 0x40 */
1257129254Scognet	strd	r8, [r3], #0x08		/* ST:10-17 */
1258129254Scognet	ldr	r8, [r1], #0x04		/* LD:28-2b */
1259129254Scognet	ldr	r9, [r1], #0x04		/* LD:2c-2f */
1260129254Scognet	strd	r4, [r3], #0x08		/* ST:18-1f */
1261129254Scognet	ldr	r4, [r1], #0x04		/* LD:30-33 */
1262129254Scognet	ldr	r5, [r1], #0x04		/* LD:34-37 */
1263129254Scognet	strd	r6, [r3], #0x08		/* ST:20-27 */
1264129254Scognet	ldr	r6, [r1], #0x04		/* LD:38-3b */
1265129254Scognet	ldr	r7, [r1], #0x04		/* LD:3c-3f */
1266129254Scognet	strd	r8, [r3], #0x08		/* ST:28-2f */
1267129254Scognet	ldr	r8, [r1], #0x04		/* LD:40-43 */
1268129254Scognet	ldr	r9, [r1], #0x04		/* LD:44-47 */
1269129254Scognet	pld	[r1, #0x18]		/* Prefetch 0x60 */
1270129254Scognet	strd	r4, [r3], #0x08		/* ST:30-37 */
1271129254Scognet	ldr	r4, [r1], #0x04		/* LD:48-4b */
1272129254Scognet	ldr	r5, [r1], #0x04		/* LD:4c-4f */
1273129254Scognet	strd	r6, [r3], #0x08		/* ST:38-3f */
1274129254Scognet	ldr	r6, [r1], #0x04		/* LD:50-53 */
1275129254Scognet	ldr	r7, [r1], #0x04		/* LD:54-57 */
1276129254Scognet	strd	r8, [r3], #0x08		/* ST:40-47 */
1277129254Scognet	ldr	r8, [r1], #0x04		/* LD:58-5b */
1278129254Scognet	ldr	r9, [r1], #0x04		/* LD:5c-5f */
1279129254Scognet	strd	r4, [r3], #0x08		/* ST:48-4f */
1280129254Scognet	ldr	r4, [r1], #0x04		/* LD:60-63 */
1281129254Scognet	ldr	r5, [r1], #0x04		/* LD:64-67 */
1282129254Scognet	pld	[r1, #0x18]		/* Prefetch 0x80 */
1283129254Scognet	strd	r6, [r3], #0x08		/* ST:50-57 */
1284129254Scognet	ldr	r6, [r1], #0x04		/* LD:68-6b */
1285129254Scognet	ldr	r7, [r1], #0x04		/* LD:6c-6f */
1286129254Scognet	strd	r8, [r3], #0x08		/* ST:58-5f */
1287129254Scognet	ldr	r8, [r1], #0x04		/* LD:70-73 */
1288129254Scognet	ldr	r9, [r1], #0x04		/* LD:74-77 */
1289129254Scognet	strd	r4, [r3], #0x08		/* ST:60-67 */
1290129254Scognet	ldr	r4, [r1], #0x04		/* LD:78-7b */
1291129254Scognet	ldr	r5, [r1], #0x04		/* LD:7c-7f */
1292129254Scognet	strd	r6, [r3], #0x08		/* ST:68-6f */
1293129254Scognet	strd	r8, [r3], #0x08		/* ST:70-77 */
1294129254Scognet	subs	r2, r2, #0x80
1295129254Scognet	strd	r4, [r3], #0x08		/* ST:78-7f */
1296129254Scognet	bge	.Lmemcpy_w_loop128
1297129254Scognet
1298129254Scognet.Lmemcpy_w_lessthan128:
1299129254Scognet	adds	r2, r2, #0x80		/* Adjust for extra sub */
1300275418Sandrew	ldmfdeq	sp!, {r4-r9}
1301137463Scognet	RETeq			/* Return now if done */
1302129254Scognet	subs	r2, r2, #0x20
1303129254Scognet	blt	.Lmemcpy_w_lessthan32
1304129254Scognet
1305129254Scognet	/* Copy 32 bytes at a time */
1306129254Scognet.Lmemcpy_w_loop32:
1307129254Scognet	ldr	r4, [r1], #0x04
1308129254Scognet	ldr	r5, [r1], #0x04
1309129254Scognet	pld	[r1, #0x18]
1310129254Scognet	ldr	r6, [r1], #0x04
1311129254Scognet	ldr	r7, [r1], #0x04
1312129254Scognet	ldr	r8, [r1], #0x04
1313129254Scognet	ldr	r9, [r1], #0x04
1314129254Scognet	strd	r4, [r3], #0x08
1315129254Scognet	ldr	r4, [r1], #0x04
1316129254Scognet	ldr	r5, [r1], #0x04
1317129254Scognet	strd	r6, [r3], #0x08
1318129254Scognet	strd	r8, [r3], #0x08
1319129254Scognet	subs	r2, r2, #0x20
1320129254Scognet	strd	r4, [r3], #0x08
1321129254Scognet	bge	.Lmemcpy_w_loop32
1322129254Scognet
1323129254Scognet.Lmemcpy_w_lessthan32:
1324129254Scognet	adds	r2, r2, #0x20		/* Adjust for extra sub */
1325275418Sandrew	ldmfdeq	sp!, {r4-r9}
1326137463Scognet	RETeq			/* Return now if done */
1327129254Scognet
1328129254Scognet	and	r4, r2, #0x18
1329129254Scognet	rsbs	r4, r4, #0x18
1330129254Scognet	addne	pc, pc, r4, lsl #1
1331129254Scognet	nop
1332129254Scognet
1333129254Scognet	/* At least 24 bytes remaining */
1334129254Scognet	ldr	r4, [r1], #0x04
1335129254Scognet	ldr	r5, [r1], #0x04
1336129254Scognet	sub	r2, r2, #0x08
1337129254Scognet	strd	r4, [r3], #0x08
1338129254Scognet
1339129254Scognet	/* At least 16 bytes remaining */
1340129254Scognet	ldr	r4, [r1], #0x04
1341129254Scognet	ldr	r5, [r1], #0x04
1342129254Scognet	sub	r2, r2, #0x08
1343129254Scognet	strd	r4, [r3], #0x08
1344129254Scognet
1345129254Scognet	/* At least 8 bytes remaining */
1346129254Scognet	ldr	r4, [r1], #0x04
1347129254Scognet	ldr	r5, [r1], #0x04
1348129254Scognet	subs	r2, r2, #0x08
1349129254Scognet	strd	r4, [r3], #0x08
1350129254Scognet
1351129254Scognet	/* Less than 8 bytes remaining */
1352129254Scognet	ldmfd	sp!, {r4-r9}
1353137463Scognet	RETeq			/* Return now if done */
1354129254Scognet	subs	r2, r2, #0x04
1355129254Scognet	ldrge	ip, [r1], #0x04
1356129254Scognet	strge	ip, [r3], #0x04
1357137463Scognet	RETeq			/* Return now if done */
1358129254Scognet	addlt	r2, r2, #0x04
1359129254Scognet	ldrb	ip, [r1], #0x01
1360129254Scognet	cmp	r2, #0x02
1361275418Sandrew	ldrbge	r2, [r1], #0x01
1362129254Scognet	strb	ip, [r3], #0x01
1363275418Sandrew	ldrbgt	ip, [r1]
1364275418Sandrew	strbge	r2, [r3], #0x01
1365275418Sandrew	strbgt	ip, [r3]
1366137463Scognet	RET
1367275522Sandrew/* Place a literal pool here for the above ldr instructions to use */
1368275522Sandrew.ltorg
1369129254Scognet
1370129254Scognet
1371129254Scognet/*
1372129254Scognet * At this point, it has not been possible to word align both buffers.
1373129254Scognet * The destination buffer is word aligned, but the source buffer is not.
1374129254Scognet */
1375129254Scognet.Lmemcpy_bad_align:
1376129254Scognet	stmfd	sp!, {r4-r7}
1377129254Scognet	bic	r1, r1, #0x03
1378129254Scognet	cmp	ip, #2
1379129254Scognet	ldr	ip, [r1], #0x04
1380129254Scognet	bgt	.Lmemcpy_bad3
1381129254Scognet	beq	.Lmemcpy_bad2
1382129254Scognet	b	.Lmemcpy_bad1
1383129254Scognet
1384129254Scognet.Lmemcpy_bad1_loop16:
1385129254Scognet#ifdef __ARMEB__
1386129254Scognet	mov	r4, ip, lsl #8
1387129254Scognet#else
1388129254Scognet	mov	r4, ip, lsr #8
1389129254Scognet#endif
1390129254Scognet	ldr	r5, [r1], #0x04
1391129254Scognet	pld	[r1, #0x018]
1392129254Scognet	ldr	r6, [r1], #0x04
1393129254Scognet	ldr	r7, [r1], #0x04
1394129254Scognet	ldr	ip, [r1], #0x04
1395129254Scognet#ifdef __ARMEB__
1396129254Scognet	orr	r4, r4, r5, lsr #24
1397129254Scognet	mov	r5, r5, lsl #8
1398129254Scognet	orr	r5, r5, r6, lsr #24
1399129254Scognet	mov	r6, r6, lsl #8
1400129254Scognet	orr	r6, r6, r7, lsr #24
1401129254Scognet	mov	r7, r7, lsl #8
1402129254Scognet	orr	r7, r7, ip, lsr #24
1403129254Scognet#else
1404129254Scognet	orr	r4, r4, r5, lsl #24
1405129254Scognet	mov	r5, r5, lsr #8
1406129254Scognet	orr	r5, r5, r6, lsl #24
1407129254Scognet	mov	r6, r6, lsr #8
1408129254Scognet	orr	r6, r6, r7, lsl #24
1409129254Scognet	mov	r7, r7, lsr #8
1410129254Scognet	orr	r7, r7, ip, lsl #24
1411129254Scognet#endif
1412129254Scognet	str	r4, [r3], #0x04
1413129254Scognet	str	r5, [r3], #0x04
1414129254Scognet	str	r6, [r3], #0x04
1415129254Scognet	str	r7, [r3], #0x04
1416129254Scognet.Lmemcpy_bad1:
1417236991Simp	subs	r2, r2, #0x10
1418129254Scognet	bge	.Lmemcpy_bad1_loop16
1419129254Scognet
1420236991Simp	adds	r2, r2, #0x10
1421275418Sandrew	ldmfdeq	sp!, {r4-r7}
1422137463Scognet	RETeq			/* Return now if done */
1423129254Scognet	subs	r2, r2, #0x04
1424129254Scognet	sublt	r1, r1, #0x03
1425129254Scognet	blt	.Lmemcpy_bad_done
1426129254Scognet
1427129254Scognet.Lmemcpy_bad1_loop4:
1428129254Scognet#ifdef __ARMEB__
1429129254Scognet	mov	r4, ip, lsl #8
1430129254Scognet#else
1431129254Scognet	mov	r4, ip, lsr #8
1432129254Scognet#endif
1433129254Scognet	ldr	ip, [r1], #0x04
1434129254Scognet	subs	r2, r2, #0x04
1435129254Scognet#ifdef __ARMEB__
1436129254Scognet	orr	r4, r4, ip, lsr #24
1437129254Scognet#else
1438129254Scognet	orr	r4, r4, ip, lsl #24
1439129254Scognet#endif
1440129254Scognet	str	r4, [r3], #0x04
1441129254Scognet	bge	.Lmemcpy_bad1_loop4
1442129254Scognet	sub	r1, r1, #0x03
1443129254Scognet	b	.Lmemcpy_bad_done
1444129254Scognet
1445129254Scognet.Lmemcpy_bad2_loop16:
1446129254Scognet#ifdef __ARMEB__
1447129254Scognet	mov	r4, ip, lsl #16
1448129254Scognet#else
1449129254Scognet	mov	r4, ip, lsr #16
1450129254Scognet#endif
1451129254Scognet	ldr	r5, [r1], #0x04
1452129254Scognet	pld	[r1, #0x018]
1453129254Scognet	ldr	r6, [r1], #0x04
1454129254Scognet	ldr	r7, [r1], #0x04
1455129254Scognet	ldr	ip, [r1], #0x04
1456129254Scognet#ifdef __ARMEB__
1457129254Scognet	orr	r4, r4, r5, lsr #16
1458129254Scognet	mov	r5, r5, lsl #16
1459129254Scognet	orr	r5, r5, r6, lsr #16
1460129254Scognet	mov	r6, r6, lsl #16
1461129254Scognet	orr	r6, r6, r7, lsr #16
1462129254Scognet	mov	r7, r7, lsl #16
1463129254Scognet	orr	r7, r7, ip, lsr #16
1464129254Scognet#else
1465129254Scognet	orr	r4, r4, r5, lsl #16
1466129254Scognet	mov	r5, r5, lsr #16
1467129254Scognet	orr	r5, r5, r6, lsl #16
1468129254Scognet	mov	r6, r6, lsr #16
1469129254Scognet	orr	r6, r6, r7, lsl #16
1470129254Scognet	mov	r7, r7, lsr #16
1471129254Scognet	orr	r7, r7, ip, lsl #16
1472129254Scognet#endif
1473129254Scognet	str	r4, [r3], #0x04
1474129254Scognet	str	r5, [r3], #0x04
1475129254Scognet	str	r6, [r3], #0x04
1476129254Scognet	str	r7, [r3], #0x04
1477129254Scognet.Lmemcpy_bad2:
1478236991Simp	subs	r2, r2, #0x10
1479129254Scognet	bge	.Lmemcpy_bad2_loop16
1480129254Scognet
1481236991Simp	adds	r2, r2, #0x10
1482275418Sandrew	ldmfdeq	sp!, {r4-r7}
1483137463Scognet	RETeq			/* Return now if done */
1484129254Scognet	subs	r2, r2, #0x04
1485129254Scognet	sublt	r1, r1, #0x02
1486129254Scognet	blt	.Lmemcpy_bad_done
1487129254Scognet
1488129254Scognet.Lmemcpy_bad2_loop4:
1489129254Scognet#ifdef __ARMEB__
1490129254Scognet	mov	r4, ip, lsl #16
1491129254Scognet#else
1492129254Scognet	mov	r4, ip, lsr #16
1493129254Scognet#endif
1494129254Scognet	ldr	ip, [r1], #0x04
1495129254Scognet	subs	r2, r2, #0x04
1496129254Scognet#ifdef __ARMEB__
1497129254Scognet	orr	r4, r4, ip, lsr #16
1498129254Scognet#else
1499129254Scognet	orr	r4, r4, ip, lsl #16
1500129254Scognet#endif
1501129254Scognet	str	r4, [r3], #0x04
1502129254Scognet	bge	.Lmemcpy_bad2_loop4
1503129254Scognet	sub	r1, r1, #0x02
1504129254Scognet	b	.Lmemcpy_bad_done
1505129254Scognet
1506129254Scognet.Lmemcpy_bad3_loop16:
1507129254Scognet#ifdef __ARMEB__
1508129254Scognet	mov	r4, ip, lsl #24
1509129254Scognet#else
1510129254Scognet	mov	r4, ip, lsr #24
1511129254Scognet#endif
1512129254Scognet	ldr	r5, [r1], #0x04
1513129254Scognet	pld	[r1, #0x018]
1514129254Scognet	ldr	r6, [r1], #0x04
1515129254Scognet	ldr	r7, [r1], #0x04
1516129254Scognet	ldr	ip, [r1], #0x04
1517129254Scognet#ifdef __ARMEB__
1518129254Scognet	orr	r4, r4, r5, lsr #8
1519129254Scognet	mov	r5, r5, lsl #24
1520129254Scognet	orr	r5, r5, r6, lsr #8
1521129254Scognet	mov	r6, r6, lsl #24
1522129254Scognet	orr	r6, r6, r7, lsr #8
1523129254Scognet	mov	r7, r7, lsl #24
1524129254Scognet	orr	r7, r7, ip, lsr #8
1525129254Scognet#else
1526129254Scognet	orr	r4, r4, r5, lsl #8
1527129254Scognet	mov	r5, r5, lsr #24
1528129254Scognet	orr	r5, r5, r6, lsl #8
1529129254Scognet	mov	r6, r6, lsr #24
1530129254Scognet	orr	r6, r6, r7, lsl #8
1531129254Scognet	mov	r7, r7, lsr #24
1532129254Scognet	orr	r7, r7, ip, lsl #8
1533129254Scognet#endif
1534129254Scognet	str	r4, [r3], #0x04
1535129254Scognet	str	r5, [r3], #0x04
1536129254Scognet	str	r6, [r3], #0x04
1537129254Scognet	str	r7, [r3], #0x04
1538129254Scognet.Lmemcpy_bad3:
1539236991Simp	subs	r2, r2, #0x10
1540129254Scognet	bge	.Lmemcpy_bad3_loop16
1541129254Scognet
1542236991Simp	adds	r2, r2, #0x10
1543275418Sandrew	ldmfdeq	sp!, {r4-r7}
1544137463Scognet	RETeq			/* Return now if done */
1545129254Scognet	subs	r2, r2, #0x04
1546129254Scognet	sublt	r1, r1, #0x01
1547129254Scognet	blt	.Lmemcpy_bad_done
1548129254Scognet
1549129254Scognet.Lmemcpy_bad3_loop4:
1550129254Scognet#ifdef __ARMEB__
1551129254Scognet	mov	r4, ip, lsl #24
1552129254Scognet#else
1553129254Scognet	mov	r4, ip, lsr #24
1554129254Scognet#endif
1555129254Scognet	ldr	ip, [r1], #0x04
1556129254Scognet	subs	r2, r2, #0x04
1557129254Scognet#ifdef __ARMEB__
1558129254Scognet	orr	r4, r4, ip, lsr #8
1559129254Scognet#else
1560129254Scognet	orr	r4, r4, ip, lsl #8
1561129254Scognet#endif
1562129254Scognet	str	r4, [r3], #0x04
1563129254Scognet	bge	.Lmemcpy_bad3_loop4
1564129254Scognet	sub	r1, r1, #0x01
1565129254Scognet
1566129254Scognet.Lmemcpy_bad_done:
1567129254Scognet	ldmfd	sp!, {r4-r7}
1568129254Scognet	adds	r2, r2, #0x04
1569137463Scognet	RETeq
1570129254Scognet	ldrb	ip, [r1], #0x01
1571129254Scognet	cmp	r2, #0x02
1572275418Sandrew	ldrbge	r2, [r1], #0x01
1573129254Scognet	strb	ip, [r3], #0x01
1574275418Sandrew	ldrbgt	ip, [r1]
1575275418Sandrew	strbge	r2, [r3], #0x01
1576275418Sandrew	strbgt	ip, [r3]
1577137463Scognet	RET
1578129254Scognet
1579129254Scognet
1580129254Scognet/*
1581129254Scognet * Handle short copies (less than 16 bytes), possibly misaligned.
1582129254Scognet * Some of these are *very* common, thanks to the network stack,
1583129254Scognet * and so are handled specially.
1584129254Scognet */
1585129254Scognet.Lmemcpy_short:
1586129254Scognet	add	pc, pc, r2, lsl #2
1587129254Scognet	nop
1588137463Scognet	RET			/* 0x00 */
1589129254Scognet	b	.Lmemcpy_bytewise	/* 0x01 */
1590129254Scognet	b	.Lmemcpy_bytewise	/* 0x02 */
1591129254Scognet	b	.Lmemcpy_bytewise	/* 0x03 */
1592129254Scognet	b	.Lmemcpy_4		/* 0x04 */
1593129254Scognet	b	.Lmemcpy_bytewise	/* 0x05 */
1594129254Scognet	b	.Lmemcpy_6		/* 0x06 */
1595129254Scognet	b	.Lmemcpy_bytewise	/* 0x07 */
1596129254Scognet	b	.Lmemcpy_8		/* 0x08 */
1597129254Scognet	b	.Lmemcpy_bytewise	/* 0x09 */
1598129254Scognet	b	.Lmemcpy_bytewise	/* 0x0a */
1599129254Scognet	b	.Lmemcpy_bytewise	/* 0x0b */
1600129254Scognet	b	.Lmemcpy_c		/* 0x0c */
1601129254Scognet.Lmemcpy_bytewise:
1602129254Scognet	mov	r3, r0			/* We must not clobber r0 */
1603129254Scognet	ldrb	ip, [r1], #0x01
1604129254Scognet1:	subs	r2, r2, #0x01
1605129254Scognet	strb	ip, [r3], #0x01
1606275418Sandrew	ldrbne	ip, [r1], #0x01
1607129254Scognet	bne	1b
1608137463Scognet	RET
1609129254Scognet
1610129254Scognet/******************************************************************************
1611129254Scognet * Special case for 4 byte copies
1612129254Scognet */
1613129254Scognet#define	LMEMCPY_4_LOG2	6	/* 64 bytes */
1614129254Scognet#define	LMEMCPY_4_PAD	.align LMEMCPY_4_LOG2
1615129254Scognet	LMEMCPY_4_PAD
1616129254Scognet.Lmemcpy_4:
1617129254Scognet	and	r2, r1, #0x03
1618129254Scognet	orr	r2, r2, r0, lsl #2
1619129254Scognet	ands	r2, r2, #0x0f
1620129254Scognet	sub	r3, pc, #0x14
1621129254Scognet	addne	pc, r3, r2, lsl #LMEMCPY_4_LOG2
1622129254Scognet
1623129254Scognet/*
1624129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned
1625129254Scognet */
1626129254Scognet	ldr	r2, [r1]
1627129254Scognet	str	r2, [r0]
1628137463Scognet	RET
1629129254Scognet	LMEMCPY_4_PAD
1630129254Scognet
1631129254Scognet/*
1632129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned
1633129254Scognet */
1634129254Scognet	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
1635129254Scognet	ldr	r2, [r1, #3]		/* BE:r2 = 3xxx  LE:r2 = xxx3 */
1636129254Scognet#ifdef __ARMEB__
1637129254Scognet	mov	r3, r3, lsl #8		/* r3 = 012. */
1638129254Scognet	orr	r3, r3, r2, lsr #24	/* r3 = 0123 */
1639129254Scognet#else
1640129254Scognet	mov	r3, r3, lsr #8		/* r3 = .210 */
1641129254Scognet	orr	r3, r3, r2, lsl #24	/* r3 = 3210 */
1642129254Scognet#endif
1643129254Scognet	str	r3, [r0]
1644137463Scognet	RET
1645129254Scognet	LMEMCPY_4_PAD
1646129254Scognet
1647129254Scognet/*
1648129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned
1649129254Scognet */
1650129254Scognet#ifdef __ARMEB__
1651129254Scognet	ldrh	r3, [r1]
1652129254Scognet	ldrh	r2, [r1, #0x02]
1653129254Scognet#else
1654129254Scognet	ldrh	r3, [r1, #0x02]
1655129254Scognet	ldrh	r2, [r1]
1656129254Scognet#endif
1657129254Scognet	orr	r3, r2, r3, lsl #16
1658129254Scognet	str	r3, [r0]
1659137463Scognet	RET
1660129254Scognet	LMEMCPY_4_PAD
1661129254Scognet
1662129254Scognet/*
1663129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned
1664129254Scognet */
1665129254Scognet	ldr	r3, [r1, #-3]		/* BE:r3 = xxx0  LE:r3 = 0xxx */
1666129254Scognet	ldr	r2, [r1, #1]		/* BE:r2 = 123x  LE:r2 = x321 */
1667129254Scognet#ifdef __ARMEB__
1668129254Scognet	mov	r3, r3, lsl #24		/* r3 = 0... */
1669129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 0123 */
1670129254Scognet#else
1671129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...0 */
1672129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
1673129254Scognet#endif
1674129254Scognet	str	r3, [r0]
1675137463Scognet	RET
1676129254Scognet	LMEMCPY_4_PAD
1677129254Scognet
1678129254Scognet/*
1679129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned
1680129254Scognet */
1681129254Scognet	ldr	r2, [r1]
1682129254Scognet#ifdef __ARMEB__
1683129254Scognet	strb	r2, [r0, #0x03]
1684129254Scognet	mov	r3, r2, lsr #8
1685129254Scognet	mov	r1, r2, lsr #24
1686129254Scognet	strb	r1, [r0]
1687129254Scognet#else
1688129254Scognet	strb	r2, [r0]
1689129254Scognet	mov	r3, r2, lsr #8
1690129254Scognet	mov	r1, r2, lsr #24
1691129254Scognet	strb	r1, [r0, #0x03]
1692129254Scognet#endif
1693129254Scognet	strh	r3, [r0, #0x01]
1694137463Scognet	RET
1695129254Scognet	LMEMCPY_4_PAD
1696129254Scognet
1697129254Scognet/*
1698129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned
1699129254Scognet */
1700129254Scognet	ldrb	r2, [r1]
1701129254Scognet	ldrh	r3, [r1, #0x01]
1702129254Scognet	ldrb	r1, [r1, #0x03]
1703129254Scognet	strb	r2, [r0]
1704129254Scognet	strh	r3, [r0, #0x01]
1705129254Scognet	strb	r1, [r0, #0x03]
1706137463Scognet	RET
1707129254Scognet	LMEMCPY_4_PAD
1708129254Scognet
1709129254Scognet/*
1710129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned
1711129254Scognet */
1712129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1713129254Scognet	ldrh	r3, [r1, #0x02]		/* LE:r3 = ..23  LE:r3 = ..32 */
1714129254Scognet#ifdef __ARMEB__
1715129254Scognet	mov	r1, r2, lsr #8		/* r1 = ...0 */
1716129254Scognet	strb	r1, [r0]
1717129254Scognet	mov	r2, r2, lsl #8		/* r2 = .01. */
1718129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = .012 */
1719129254Scognet#else
1720129254Scognet	strb	r2, [r0]
1721129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
1722129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = .321 */
1723129254Scognet	mov	r3, r3, lsr #8		/* r3 = ...3 */
1724129254Scognet#endif
1725129254Scognet	strh	r2, [r0, #0x01]
1726129254Scognet	strb	r3, [r0, #0x03]
1727137463Scognet	RET
1728129254Scognet	LMEMCPY_4_PAD
1729129254Scognet
1730129254Scognet/*
1731129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned
1732129254Scognet */
1733129254Scognet	ldrb	r2, [r1]
1734129254Scognet	ldrh	r3, [r1, #0x01]
1735129254Scognet	ldrb	r1, [r1, #0x03]
1736129254Scognet	strb	r2, [r0]
1737129254Scognet	strh	r3, [r0, #0x01]
1738129254Scognet	strb	r1, [r0, #0x03]
1739137463Scognet	RET
1740129254Scognet	LMEMCPY_4_PAD
1741129254Scognet
1742129254Scognet/*
1743129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned
1744129254Scognet */
1745129254Scognet	ldr	r2, [r1]
1746129254Scognet#ifdef __ARMEB__
1747129254Scognet	strh	r2, [r0, #0x02]
1748129254Scognet	mov	r3, r2, lsr #16
1749129254Scognet	strh	r3, [r0]
1750129254Scognet#else
1751129254Scognet	strh	r2, [r0]
1752129254Scognet	mov	r3, r2, lsr #16
1753129254Scognet	strh	r3, [r0, #0x02]
1754129254Scognet#endif
1755137463Scognet	RET
1756129254Scognet	LMEMCPY_4_PAD
1757129254Scognet
1758129254Scognet/*
1759129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned
1760129254Scognet */
1761129254Scognet	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
1762129254Scognet	ldr	r3, [r1, #3]		/* BE:r3 = 3xxx  LE:r3 = xxx3 */
1763129254Scognet	mov	r1, r2, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
1764129254Scognet	strh	r1, [r0]
1765129254Scognet#ifdef __ARMEB__
1766129254Scognet	mov	r2, r2, lsl #8		/* r2 = 012. */
1767129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = 0123 */
1768129254Scognet#else
1769129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...2 */
1770129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = xx32 */
1771129254Scognet#endif
1772129254Scognet	strh	r2, [r0, #0x02]
1773137463Scognet	RET
1774129254Scognet	LMEMCPY_4_PAD
1775129254Scognet
1776129254Scognet/*
1777129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned
1778129254Scognet */
1779129254Scognet	ldrh	r2, [r1]
1780129254Scognet	ldrh	r3, [r1, #0x02]
1781129254Scognet	strh	r2, [r0]
1782129254Scognet	strh	r3, [r0, #0x02]
1783137463Scognet	RET
1784129254Scognet	LMEMCPY_4_PAD
1785129254Scognet
1786129254Scognet/*
1787129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned
1788129254Scognet */
1789129254Scognet	ldr	r3, [r1, #1]		/* BE:r3 = 123x  LE:r3 = x321 */
1790129254Scognet	ldr	r2, [r1, #-3]		/* BE:r2 = xxx0  LE:r2 = 0xxx */
1791129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .123  LE:r1 = .x32 */
1792129254Scognet	strh	r1, [r0, #0x02]
1793129254Scognet#ifdef __ARMEB__
1794129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...1 */
1795129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = xx01 */
1796129254Scognet#else
1797129254Scognet	mov	r3, r3, lsl #8		/* r3 = 321. */
1798129254Scognet	orr	r3, r3, r2, lsr #24	/* r3 = 3210 */
1799129254Scognet#endif
1800129254Scognet	strh	r3, [r0]
1801137463Scognet	RET
1802129254Scognet	LMEMCPY_4_PAD
1803129254Scognet
1804129254Scognet/*
1805129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned
1806129254Scognet */
1807129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
1808129254Scognet#ifdef __ARMEB__
1809129254Scognet	strb	r2, [r0, #0x03]
1810129254Scognet	mov	r3, r2, lsr #8
1811129254Scognet	mov	r1, r2, lsr #24
1812129254Scognet	strh	r3, [r0, #0x01]
1813129254Scognet	strb	r1, [r0]
1814129254Scognet#else
1815129254Scognet	strb	r2, [r0]
1816129254Scognet	mov	r3, r2, lsr #8
1817129254Scognet	mov	r1, r2, lsr #24
1818129254Scognet	strh	r3, [r0, #0x01]
1819129254Scognet	strb	r1, [r0, #0x03]
1820129254Scognet#endif
1821137463Scognet	RET
1822129254Scognet	LMEMCPY_4_PAD
1823129254Scognet
1824129254Scognet/*
1825129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned
1826129254Scognet */
1827129254Scognet	ldrb	r2, [r1]
1828129254Scognet	ldrh	r3, [r1, #0x01]
1829129254Scognet	ldrb	r1, [r1, #0x03]
1830129254Scognet	strb	r2, [r0]
1831129254Scognet	strh	r3, [r0, #0x01]
1832129254Scognet	strb	r1, [r0, #0x03]
1833137463Scognet	RET
1834129254Scognet	LMEMCPY_4_PAD
1835129254Scognet
1836129254Scognet/*
1837129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned
1838129254Scognet */
1839129254Scognet#ifdef __ARMEB__
1840129254Scognet	ldrh	r3, [r1, #0x02]		/* BE:r3 = ..23  LE:r3 = ..32 */
1841129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1842129254Scognet	strb	r3, [r0, #0x03]
1843129254Scognet	mov	r3, r3, lsr #8		/* r3 = ...2 */
1844129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = ..12 */
1845129254Scognet	strh	r3, [r0, #0x01]
1846129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...0 */
1847129254Scognet	strb	r2, [r0]
1848129254Scognet#else
1849129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1850129254Scognet	ldrh	r3, [r1, #0x02]		/* BE:r3 = ..23  LE:r3 = ..32 */
1851129254Scognet	strb	r2, [r0]
1852129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
1853129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = .321 */
1854129254Scognet	strh	r2, [r0, #0x01]
1855129254Scognet	mov	r3, r3, lsr #8		/* r3 = ...3 */
1856129254Scognet	strb	r3, [r0, #0x03]
1857129254Scognet#endif
1858137463Scognet	RET
1859129254Scognet	LMEMCPY_4_PAD
1860129254Scognet
1861129254Scognet/*
1862129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned
1863129254Scognet */
1864129254Scognet	ldrb	r2, [r1]
1865129254Scognet	ldrh	r3, [r1, #0x01]
1866129254Scognet	ldrb	r1, [r1, #0x03]
1867129254Scognet	strb	r2, [r0]
1868129254Scognet	strh	r3, [r0, #0x01]
1869129254Scognet	strb	r1, [r0, #0x03]
1870137463Scognet	RET
1871129254Scognet	LMEMCPY_4_PAD
1872129254Scognet
1873129254Scognet
1874129254Scognet/******************************************************************************
1875129254Scognet * Special case for 6 byte copies
1876129254Scognet */
1877129254Scognet#define	LMEMCPY_6_LOG2	6	/* 64 bytes */
1878129254Scognet#define	LMEMCPY_6_PAD	.align LMEMCPY_6_LOG2
1879129254Scognet	LMEMCPY_6_PAD
1880129254Scognet.Lmemcpy_6:
1881129254Scognet	and	r2, r1, #0x03
1882129254Scognet	orr	r2, r2, r0, lsl #2
1883129254Scognet	ands	r2, r2, #0x0f
1884129254Scognet	sub	r3, pc, #0x14
1885129254Scognet	addne	pc, r3, r2, lsl #LMEMCPY_6_LOG2
1886129254Scognet
1887129254Scognet/*
1888129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned
1889129254Scognet */
1890129254Scognet	ldr	r2, [r1]
1891129254Scognet	ldrh	r3, [r1, #0x04]
1892129254Scognet	str	r2, [r0]
1893129254Scognet	strh	r3, [r0, #0x04]
1894137463Scognet	RET
1895129254Scognet	LMEMCPY_6_PAD
1896129254Scognet
1897129254Scognet/*
1898129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned
1899129254Scognet */
1900129254Scognet	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
1901129254Scognet	ldr	r3, [r1, #0x03]		/* BE:r3 = 345x  LE:r3 = x543 */
1902129254Scognet#ifdef __ARMEB__
1903129254Scognet	mov	r2, r2, lsl #8		/* r2 = 012. */
1904129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = 0123 */
1905129254Scognet#else
1906129254Scognet	mov	r2, r2, lsr #8		/* r2 = .210 */
1907129254Scognet	orr	r2, r2, r3, lsl #24	/* r2 = 3210 */
1908129254Scognet#endif
1909129254Scognet	mov	r3, r3, lsr #8		/* BE:r3 = .345  LE:r3 = .x54 */
1910129254Scognet	str	r2, [r0]
1911129254Scognet	strh	r3, [r0, #0x04]
1912137463Scognet	RET
1913129254Scognet	LMEMCPY_6_PAD
1914129254Scognet
1915129254Scognet/*
1916129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned
1917129254Scognet */
1918129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
1919129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1920129254Scognet#ifdef __ARMEB__
1921129254Scognet	mov	r1, r3, lsr #16		/* r1 = ..23 */
1922129254Scognet	orr	r1, r1, r2, lsl #16	/* r1 = 0123 */
1923129254Scognet	str	r1, [r0]
1924129254Scognet	strh	r3, [r0, #0x04]
1925129254Scognet#else
1926129254Scognet	mov	r1, r3, lsr #16		/* r1 = ..54 */
1927129254Scognet	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
1928129254Scognet	str	r2, [r0]
1929129254Scognet	strh	r1, [r0, #0x04]
1930129254Scognet#endif
1931137463Scognet	RET
1932129254Scognet	LMEMCPY_6_PAD
1933129254Scognet
1934129254Scognet/*
1935129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned
1936129254Scognet */
1937129254Scognet	ldr	r2, [r1, #-3]		/* BE:r2 = xxx0  LE:r2 = 0xxx */
1938129254Scognet	ldr	r3, [r1, #1]		/* BE:r3 = 1234  LE:r3 = 4321 */
1939129254Scognet	ldr	r1, [r1, #5]		/* BE:r1 = 5xxx  LE:r3 = xxx5 */
1940129254Scognet#ifdef __ARMEB__
1941129254Scognet	mov	r2, r2, lsl #24		/* r2 = 0... */
1942129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 0123 */
1943129254Scognet	mov	r3, r3, lsl #8		/* r3 = 234. */
1944129254Scognet	orr	r1, r3, r1, lsr #24	/* r1 = 2345 */
1945129254Scognet#else
1946129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...0 */
1947129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 3210 */
1948129254Scognet	mov	r1, r1, lsl #8		/* r1 = xx5. */
1949129254Scognet	orr	r1, r1, r3, lsr #24	/* r1 = xx54 */
1950129254Scognet#endif
1951129254Scognet	str	r2, [r0]
1952129254Scognet	strh	r1, [r0, #0x04]
1953137463Scognet	RET
1954129254Scognet	LMEMCPY_6_PAD
1955129254Scognet
1956129254Scognet/*
1957129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned
1958129254Scognet */
1959129254Scognet	ldr	r3, [r1]		/* BE:r3 = 0123  LE:r3 = 3210 */
1960129254Scognet	ldrh	r2, [r1, #0x04]		/* BE:r2 = ..45  LE:r2 = ..54 */
1961129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .012  LE:r1 = .321 */
1962129254Scognet	strh	r1, [r0, #0x01]
1963129254Scognet#ifdef __ARMEB__
1964129254Scognet	mov	r1, r3, lsr #24		/* r1 = ...0 */
1965129254Scognet	strb	r1, [r0]
1966129254Scognet	mov	r3, r3, lsl #8		/* r3 = 123. */
1967129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 1234 */
1968129254Scognet#else
1969129254Scognet	strb	r3, [r0]
1970129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...3 */
1971129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = .543 */
1972129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...5 */
1973129254Scognet#endif
1974129254Scognet	strh	r3, [r0, #0x03]
1975129254Scognet	strb	r2, [r0, #0x05]
1976137463Scognet	RET
1977129254Scognet	LMEMCPY_6_PAD
1978129254Scognet
1979129254Scognet/*
1980129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned
1981129254Scognet */
1982129254Scognet	ldrb	r2, [r1]
1983129254Scognet	ldrh	r3, [r1, #0x01]
1984129254Scognet	ldrh	ip, [r1, #0x03]
1985129254Scognet	ldrb	r1, [r1, #0x05]
1986129254Scognet	strb	r2, [r0]
1987129254Scognet	strh	r3, [r0, #0x01]
1988129254Scognet	strh	ip, [r0, #0x03]
1989129254Scognet	strb	r1, [r0, #0x05]
1990137463Scognet	RET
1991129254Scognet	LMEMCPY_6_PAD
1992129254Scognet
1993129254Scognet/*
1994129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned
1995129254Scognet */
1996129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1997129254Scognet	ldr	r1, [r1, #0x02]		/* BE:r1 = 2345  LE:r1 = 5432 */
1998129254Scognet#ifdef __ARMEB__
1999129254Scognet	mov	r3, r2, lsr #8		/* r3 = ...0 */
2000129254Scognet	strb	r3, [r0]
2001129254Scognet	strb	r1, [r0, #0x05]
2002129254Scognet	mov	r3, r1, lsr #8		/* r3 = .234 */
2003129254Scognet	strh	r3, [r0, #0x03]
2004129254Scognet	mov	r3, r2, lsl #8		/* r3 = .01. */
2005129254Scognet	orr	r3, r3, r1, lsr #24	/* r3 = .012 */
2006129254Scognet	strh	r3, [r0, #0x01]
2007129254Scognet#else
2008129254Scognet	strb	r2, [r0]
2009129254Scognet	mov	r3, r1, lsr #24
2010129254Scognet	strb	r3, [r0, #0x05]
2011129254Scognet	mov	r3, r1, lsr #8		/* r3 = .543 */
2012129254Scognet	strh	r3, [r0, #0x03]
2013129254Scognet	mov	r3, r2, lsr #8		/* r3 = ...1 */
2014129254Scognet	orr	r3, r3, r1, lsl #8	/* r3 = 4321 */
2015129254Scognet	strh	r3, [r0, #0x01]
2016129254Scognet#endif
2017137463Scognet	RET
2018129254Scognet	LMEMCPY_6_PAD
2019129254Scognet
2020129254Scognet/*
2021129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned
2022129254Scognet */
2023129254Scognet	ldrb	r2, [r1]
2024129254Scognet	ldrh	r3, [r1, #0x01]
2025129254Scognet	ldrh	ip, [r1, #0x03]
2026129254Scognet	ldrb	r1, [r1, #0x05]
2027129254Scognet	strb	r2, [r0]
2028129254Scognet	strh	r3, [r0, #0x01]
2029129254Scognet	strh	ip, [r0, #0x03]
2030129254Scognet	strb	r1, [r0, #0x05]
2031137463Scognet	RET
2032129254Scognet	LMEMCPY_6_PAD
2033129254Scognet
2034129254Scognet/*
2035129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned
2036129254Scognet */
2037129254Scognet#ifdef __ARMEB__
2038129254Scognet	ldr	r2, [r1]		/* r2 = 0123 */
2039129254Scognet	ldrh	r3, [r1, #0x04]		/* r3 = ..45 */
2040129254Scognet	mov	r1, r2, lsr #16		/* r1 = ..01 */
2041129254Scognet	orr	r3, r3, r2, lsl#16	/* r3 = 2345 */
2042129254Scognet	strh	r1, [r0]
2043129254Scognet	str	r3, [r0, #0x02]
2044129254Scognet#else
2045129254Scognet	ldrh	r2, [r1, #0x04]		/* r2 = ..54 */
2046129254Scognet	ldr	r3, [r1]		/* r3 = 3210 */
2047129254Scognet	mov	r2, r2, lsl #16		/* r2 = 54.. */
2048129254Scognet	orr	r2, r2, r3, lsr #16	/* r2 = 5432 */
2049129254Scognet	strh	r3, [r0]
2050129254Scognet	str	r2, [r0, #0x02]
2051129254Scognet#endif
2052137463Scognet	RET
2053129254Scognet	LMEMCPY_6_PAD
2054129254Scognet
2055129254Scognet/*
2056129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned
2057129254Scognet */
2058129254Scognet	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
2059129254Scognet	ldr	r2, [r1, #3]		/* BE:r2 = 345x  LE:r2 = x543 */
2060129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
2061129254Scognet#ifdef __ARMEB__
2062129254Scognet	mov	r2, r2, lsr #8		/* r2 = .345 */
2063129254Scognet	orr	r2, r2, r3, lsl #24	/* r2 = 2345 */
2064129254Scognet#else
2065129254Scognet	mov	r2, r2, lsl #8		/* r2 = 543. */
2066129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = 5432 */
2067129254Scognet#endif
2068129254Scognet	strh	r1, [r0]
2069129254Scognet	str	r2, [r0, #0x02]
2070137463Scognet	RET
2071129254Scognet	LMEMCPY_6_PAD
2072129254Scognet
2073129254Scognet/*
2074129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned
2075129254Scognet */
2076129254Scognet	ldrh	r2, [r1]
2077129254Scognet	ldr	r3, [r1, #0x02]
2078129254Scognet	strh	r2, [r0]
2079129254Scognet	str	r3, [r0, #0x02]
2080137463Scognet	RET
2081129254Scognet	LMEMCPY_6_PAD
2082129254Scognet
2083129254Scognet/*
2084129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned
2085129254Scognet */
2086129254Scognet	ldrb	r3, [r1]		/* r3 = ...0 */
2087129254Scognet	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
2088129254Scognet	ldrb	r1, [r1, #0x05]		/* r1 = ...5 */
2089129254Scognet#ifdef __ARMEB__
2090129254Scognet	mov	r3, r3, lsl #8		/* r3 = ..0. */
2091129254Scognet	orr	r3, r3, r2, lsr #24	/* r3 = ..01 */
2092129254Scognet	orr	r1, r1, r2, lsl #8	/* r1 = 2345 */
2093129254Scognet#else
2094129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
2095129254Scognet	mov	r1, r1, lsl #24		/* r1 = 5... */
2096129254Scognet	orr	r1, r1, r2, lsr #8	/* r1 = 5432 */
2097129254Scognet#endif
2098129254Scognet	strh	r3, [r0]
2099129254Scognet	str	r1, [r0, #0x02]
2100137463Scognet	RET
2101129254Scognet	LMEMCPY_6_PAD
2102129254Scognet
2103129254Scognet/*
2104129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned
2105129254Scognet */
2106129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2107129254Scognet	ldrh	r1, [r1, #0x04]		/* BE:r1 = ..45  LE:r1 = ..54 */
2108129254Scognet#ifdef __ARMEB__
2109129254Scognet	mov	r3, r2, lsr #24		/* r3 = ...0 */
2110129254Scognet	strb	r3, [r0]
2111129254Scognet	mov	r2, r2, lsl #8		/* r2 = 123. */
2112129254Scognet	orr	r2, r2, r1, lsr #8	/* r2 = 1234 */
2113129254Scognet#else
2114129254Scognet	strb	r2, [r0]
2115129254Scognet	mov	r2, r2, lsr #8		/* r2 = .321 */
2116129254Scognet	orr	r2, r2, r1, lsl #24	/* r2 = 4321 */
2117129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...5 */
2118129254Scognet#endif
2119129254Scognet	str	r2, [r0, #0x01]
2120129254Scognet	strb	r1, [r0, #0x05]
2121137463Scognet	RET
2122129254Scognet	LMEMCPY_6_PAD
2123129254Scognet
2124129254Scognet/*
2125129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned
2126129254Scognet */
2127129254Scognet	ldrb	r2, [r1]
2128129254Scognet	ldrh	r3, [r1, #0x01]
2129129254Scognet	ldrh	ip, [r1, #0x03]
2130129254Scognet	ldrb	r1, [r1, #0x05]
2131129254Scognet	strb	r2, [r0]
2132129254Scognet	strh	r3, [r0, #0x01]
2133129254Scognet	strh	ip, [r0, #0x03]
2134129254Scognet	strb	r1, [r0, #0x05]
2135137463Scognet	RET
2136129254Scognet	LMEMCPY_6_PAD
2137129254Scognet
2138129254Scognet/*
2139129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned
2140129254Scognet */
2141129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2142129254Scognet	ldr	r1, [r1, #0x02]		/* BE:r1 = 2345  LE:r1 = 5432 */
2143129254Scognet#ifdef __ARMEB__
2144129254Scognet	mov	r3, r2, lsr #8		/* r3 = ...0 */
2145129254Scognet	strb	r3, [r0]
2146129254Scognet	mov	r2, r2, lsl #24		/* r2 = 1... */
2147129254Scognet	orr	r2, r2, r1, lsr #8	/* r2 = 1234 */
2148129254Scognet#else
2149129254Scognet	strb	r2, [r0]
2150129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
2151129254Scognet	orr	r2, r2, r1, lsl #8	/* r2 = 4321 */
2152129254Scognet	mov	r1, r1, lsr #24		/* r1 = ...5 */
2153129254Scognet#endif
2154129254Scognet	str	r2, [r0, #0x01]
2155129254Scognet	strb	r1, [r0, #0x05]
2156137463Scognet	RET
2157129254Scognet	LMEMCPY_6_PAD
2158129254Scognet
2159129254Scognet/*
2160129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned
2161129254Scognet */
2162129254Scognet	ldrb	r2, [r1]
2163129254Scognet	ldr	r3, [r1, #0x01]
2164129254Scognet	ldrb	r1, [r1, #0x05]
2165129254Scognet	strb	r2, [r0]
2166129254Scognet	str	r3, [r0, #0x01]
2167129254Scognet	strb	r1, [r0, #0x05]
2168137463Scognet	RET
2169129254Scognet	LMEMCPY_6_PAD
2170129254Scognet
2171129254Scognet
2172129254Scognet/******************************************************************************
2173129254Scognet * Special case for 8 byte copies
2174129254Scognet */
2175129254Scognet#define	LMEMCPY_8_LOG2	6	/* 64 bytes */
2176129254Scognet#define	LMEMCPY_8_PAD	.align LMEMCPY_8_LOG2
2177129254Scognet	LMEMCPY_8_PAD
2178129254Scognet.Lmemcpy_8:
2179129254Scognet	and	r2, r1, #0x03
2180129254Scognet	orr	r2, r2, r0, lsl #2
2181129254Scognet	ands	r2, r2, #0x0f
2182129254Scognet	sub	r3, pc, #0x14
2183129254Scognet	addne	pc, r3, r2, lsl #LMEMCPY_8_LOG2
2184129254Scognet
2185129254Scognet/*
2186129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned
2187129254Scognet */
2188129254Scognet	ldr	r2, [r1]
2189129254Scognet	ldr	r3, [r1, #0x04]
2190129254Scognet	str	r2, [r0]
2191129254Scognet	str	r3, [r0, #0x04]
2192137463Scognet	RET
2193129254Scognet	LMEMCPY_8_PAD
2194129254Scognet
2195129254Scognet/*
2196129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned
2197129254Scognet */
2198129254Scognet	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
2199129254Scognet	ldr	r2, [r1, #0x03]		/* BE:r2 = 3456  LE:r2 = 6543 */
2200129254Scognet	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
2201129254Scognet#ifdef __ARMEB__
2202129254Scognet	mov	r3, r3, lsl #8		/* r3 = 012. */
2203129254Scognet	orr	r3, r3, r2, lsr #24	/* r3 = 0123 */
2204129254Scognet	orr	r2, r1, r2, lsl #8	/* r2 = 4567 */
2205129254Scognet#else
2206129254Scognet	mov	r3, r3, lsr #8		/* r3 = .210 */
2207129254Scognet	orr	r3, r3, r2, lsl #24	/* r3 = 3210 */
2208129254Scognet	mov	r1, r1, lsl #24		/* r1 = 7... */
2209129254Scognet	orr	r2, r1, r2, lsr #8	/* r2 = 7654 */
2210129254Scognet#endif
2211129254Scognet	str	r3, [r0]
2212129254Scognet	str	r2, [r0, #0x04]
2213137463Scognet	RET
2214129254Scognet	LMEMCPY_8_PAD
2215129254Scognet
2216129254Scognet/*
2217129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned
2218129254Scognet */
2219129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2220129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2221129254Scognet	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
2222129254Scognet#ifdef __ARMEB__
2223129254Scognet	mov	r2, r2, lsl #16		/* r2 = 01.. */
2224129254Scognet	orr	r2, r2, r3, lsr #16	/* r2 = 0123 */
2225129254Scognet	orr	r3, r1, r3, lsl #16	/* r3 = 4567 */
2226129254Scognet#else
2227129254Scognet	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
2228129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..54 */
2229129254Scognet	orr	r3, r3, r1, lsl #16	/* r3 = 7654 */
2230129254Scognet#endif
2231129254Scognet	str	r2, [r0]
2232129254Scognet	str	r3, [r0, #0x04]
2233137463Scognet	RET
2234129254Scognet	LMEMCPY_8_PAD
2235129254Scognet
2236129254Scognet/*
2237129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned
2238129254Scognet */
2239129254Scognet	ldrb	r3, [r1]		/* r3 = ...0 */
2240129254Scognet	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
2241129254Scognet	ldr	r1, [r1, #0x05]		/* BE:r1 = 567x  LE:r1 = x765 */
2242129254Scognet#ifdef __ARMEB__
2243129254Scognet	mov	r3, r3, lsl #24		/* r3 = 0... */
2244129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 0123 */
2245129254Scognet	mov	r2, r2, lsl #24		/* r2 = 4... */
2246129254Scognet	orr	r2, r2, r1, lsr #8	/* r2 = 4567 */
2247129254Scognet#else
2248129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
2249129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...4 */
2250129254Scognet	orr	r2, r2, r1, lsl #8	/* r2 = 7654 */
2251129254Scognet#endif
2252129254Scognet	str	r3, [r0]
2253129254Scognet	str	r2, [r0, #0x04]
2254137463Scognet	RET
2255129254Scognet	LMEMCPY_8_PAD
2256129254Scognet
2257129254Scognet/*
2258129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned
2259129254Scognet */
2260129254Scognet	ldr	r3, [r1]		/* BE:r3 = 0123  LE:r3 = 3210 */
2261129254Scognet	ldr	r2, [r1, #0x04]		/* BE:r2 = 4567  LE:r2 = 7654 */
2262129254Scognet#ifdef __ARMEB__
2263129254Scognet	mov	r1, r3, lsr #24		/* r1 = ...0 */
2264129254Scognet	strb	r1, [r0]
2265129254Scognet	mov	r1, r3, lsr #8		/* r1 = .012 */
2266129254Scognet	strb	r2, [r0, #0x07]
2267129254Scognet	mov	r3, r3, lsl #24		/* r3 = 3... */
2268129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 3456 */
2269129254Scognet#else
2270129254Scognet	strb	r3, [r0]
2271129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...7 */
2272129254Scognet	strb	r1, [r0, #0x07]
2273129254Scognet	mov	r1, r3, lsr #8		/* r1 = .321 */
2274129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...3 */
2275129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 6543 */
2276129254Scognet#endif
2277129254Scognet	strh	r1, [r0, #0x01]
2278129254Scognet	str	r3, [r0, #0x03]
2279137463Scognet	RET
2280129254Scognet	LMEMCPY_8_PAD
2281129254Scognet
2282129254Scognet/*
2283129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned
2284129254Scognet */
2285129254Scognet	ldrb	r2, [r1]
2286129254Scognet	ldrh	r3, [r1, #0x01]
2287129254Scognet	ldr	ip, [r1, #0x03]
2288129254Scognet	ldrb	r1, [r1, #0x07]
2289129254Scognet	strb	r2, [r0]
2290129254Scognet	strh	r3, [r0, #0x01]
2291129254Scognet	str	ip, [r0, #0x03]
2292129254Scognet	strb	r1, [r0, #0x07]
2293137463Scognet	RET
2294129254Scognet	LMEMCPY_8_PAD
2295129254Scognet
2296129254Scognet/*
2297129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned
2298129254Scognet */
2299129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2300129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2301129254Scognet	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
2302129254Scognet#ifdef __ARMEB__
2303129254Scognet	mov	ip, r2, lsr #8		/* ip = ...0 */
2304129254Scognet	strb	ip, [r0]
2305129254Scognet	mov	ip, r2, lsl #8		/* ip = .01. */
2306129254Scognet	orr	ip, ip, r3, lsr #24	/* ip = .012 */
2307129254Scognet	strb	r1, [r0, #0x07]
2308129254Scognet	mov	r3, r3, lsl #8		/* r3 = 345. */
2309129254Scognet	orr	r3, r3, r1, lsr #8	/* r3 = 3456 */
2310129254Scognet#else
2311129254Scognet	strb	r2, [r0]		/* 0 */
2312129254Scognet	mov	ip, r1, lsr #8		/* ip = ...7 */
2313129254Scognet	strb	ip, [r0, #0x07]		/* 7 */
2314129254Scognet	mov	ip, r2, lsr #8		/* ip = ...1 */
2315129254Scognet	orr	ip, ip, r3, lsl #8	/* ip = 4321 */
2316129254Scognet	mov	r3, r3, lsr #8		/* r3 = .543 */
2317129254Scognet	orr	r3, r3, r1, lsl #24	/* r3 = 6543 */
2318129254Scognet#endif
2319129254Scognet	strh	ip, [r0, #0x01]
2320129254Scognet	str	r3, [r0, #0x03]
2321137463Scognet	RET
2322129254Scognet	LMEMCPY_8_PAD
2323129254Scognet
2324129254Scognet/*
2325129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned
2326129254Scognet */
2327129254Scognet	ldrb	r3, [r1]		/* r3 = ...0 */
2328129254Scognet	ldr	ip, [r1, #0x01]		/* BE:ip = 1234  LE:ip = 4321 */
2329129254Scognet	ldrh	r2, [r1, #0x05]		/* BE:r2 = ..56  LE:r2 = ..65 */
2330129254Scognet	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
2331129254Scognet	strb	r3, [r0]
2332129254Scognet	mov	r3, ip, lsr #16		/* BE:r3 = ..12  LE:r3 = ..43 */
2333129254Scognet#ifdef __ARMEB__
2334129254Scognet	strh	r3, [r0, #0x01]
2335129254Scognet	orr	r2, r2, ip, lsl #16	/* r2 = 3456 */
2336129254Scognet#else
2337129254Scognet	strh	ip, [r0, #0x01]
2338129254Scognet	orr	r2, r3, r2, lsl #16	/* r2 = 6543 */
2339129254Scognet#endif
2340129254Scognet	str	r2, [r0, #0x03]
2341129254Scognet	strb	r1, [r0, #0x07]
2342137463Scognet	RET
2343129254Scognet	LMEMCPY_8_PAD
2344129254Scognet
2345129254Scognet/*
2346129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned
2347129254Scognet */
2348129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2349129254Scognet	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
2350129254Scognet	mov	r1, r2, lsr #16		/* BE:r1 = ..01  LE:r1 = ..32 */
2351129254Scognet#ifdef __ARMEB__
2352129254Scognet	strh	r1, [r0]
2353129254Scognet	mov	r1, r3, lsr #16		/* r1 = ..45 */
2354129254Scognet	orr	r2, r1 ,r2, lsl #16	/* r2 = 2345 */
2355129254Scognet#else
2356129254Scognet	strh	r2, [r0]
2357129254Scognet	orr	r2, r1, r3, lsl #16	/* r2 = 5432 */
2358129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..76 */
2359129254Scognet#endif
2360129254Scognet	str	r2, [r0, #0x02]
2361129254Scognet	strh	r3, [r0, #0x06]
2362137463Scognet	RET
2363129254Scognet	LMEMCPY_8_PAD
2364129254Scognet
2365129254Scognet/*
2366129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned
2367129254Scognet */
2368129254Scognet	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
2369129254Scognet	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
2370129254Scognet	ldrb	ip, [r1, #0x07]		/* ip = ...7 */
2371129254Scognet	mov	r1, r2, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
2372129254Scognet	strh	r1, [r0]
2373129254Scognet#ifdef __ARMEB__
2374129254Scognet	mov	r1, r2, lsl #24		/* r1 = 2... */
2375129254Scognet	orr	r1, r1, r3, lsr #8	/* r1 = 2345 */
2376129254Scognet	orr	r3, ip, r3, lsl #8	/* r3 = 4567 */
2377129254Scognet#else
2378129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...2 */
2379129254Scognet	orr	r1, r1, r3, lsl #8	/* r1 = 5432 */
2380129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...6 */
2381129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = ..76 */
2382129254Scognet#endif
2383129254Scognet	str	r1, [r0, #0x02]
2384129254Scognet	strh	r3, [r0, #0x06]
2385137463Scognet	RET
2386129254Scognet	LMEMCPY_8_PAD
2387129254Scognet
2388129254Scognet/*
2389129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned
2390129254Scognet */
2391129254Scognet	ldrh	r2, [r1]
2392129254Scognet	ldr	ip, [r1, #0x02]
2393129254Scognet	ldrh	r3, [r1, #0x06]
2394129254Scognet	strh	r2, [r0]
2395129254Scognet	str	ip, [r0, #0x02]
2396129254Scognet	strh	r3, [r0, #0x06]
2397137463Scognet	RET
2398129254Scognet	LMEMCPY_8_PAD
2399129254Scognet
2400129254Scognet/*
2401129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned
2402129254Scognet */
2403129254Scognet	ldr	r3, [r1, #0x05]		/* BE:r3 = 567x  LE:r3 = x765 */
2404129254Scognet	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
2405129254Scognet	ldrb	ip, [r1]		/* ip = ...0 */
2406129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .567  LE:r1 = .x76 */
2407129254Scognet	strh	r1, [r0, #0x06]
2408129254Scognet#ifdef __ARMEB__
2409129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...5 */
2410129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 2345 */
2411129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...1 */
2412129254Scognet	orr	r2, r2, ip, lsl #8	/* r2 = ..01 */
2413129254Scognet#else
2414129254Scognet	mov	r3, r3, lsl #24		/* r3 = 5... */
2415129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 5432 */
2416129254Scognet	orr	r2, ip, r2, lsl #8	/* r2 = 3210 */
2417129254Scognet#endif
2418129254Scognet	str	r3, [r0, #0x02]
2419129254Scognet	strh	r2, [r0]
2420137463Scognet	RET
2421129254Scognet	LMEMCPY_8_PAD
2422129254Scognet
2423129254Scognet/*
2424129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned
2425129254Scognet */
2426129254Scognet	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
2427129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2428129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .456  LE:r1 = .765 */
2429129254Scognet	strh	r1, [r0, #0x05]
2430129254Scognet#ifdef __ARMEB__
2431129254Scognet	strb	r3, [r0, #0x07]
2432129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...0 */
2433129254Scognet	strb	r1, [r0]
2434129254Scognet	mov	r2, r2, lsl #8		/* r2 = 123. */
2435129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = 1234 */
2436129254Scognet	str	r2, [r0, #0x01]
2437129254Scognet#else
2438129254Scognet	strb	r2, [r0]
2439129254Scognet	mov	r1, r3, lsr #24		/* r1 = ...7 */
2440129254Scognet	strb	r1, [r0, #0x07]
2441129254Scognet	mov	r2, r2, lsr #8		/* r2 = .321 */
2442129254Scognet	orr	r2, r2, r3, lsl #24	/* r2 = 4321 */
2443129254Scognet	str	r2, [r0, #0x01]
2444129254Scognet#endif
2445137463Scognet	RET
2446129254Scognet	LMEMCPY_8_PAD
2447129254Scognet
2448129254Scognet/*
2449129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned
2450129254Scognet */
2451129254Scognet	ldrb	r3, [r1]		/* r3 = ...0 */
2452129254Scognet	ldrh	r2, [r1, #0x01]		/* BE:r2 = ..12  LE:r2 = ..21 */
2453129254Scognet	ldr	ip, [r1, #0x03]		/* BE:ip = 3456  LE:ip = 6543 */
2454129254Scognet	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
2455129254Scognet	strb	r3, [r0]
2456129254Scognet	mov	r3, ip, lsr #16		/* BE:r3 = ..34  LE:r3 = ..65 */
2457129254Scognet#ifdef __ARMEB__
2458129254Scognet	strh	ip, [r0, #0x05]
2459129254Scognet	orr	r2, r3, r2, lsl #16	/* r2 = 1234 */
2460129254Scognet#else
2461129254Scognet	strh	r3, [r0, #0x05]
2462129254Scognet	orr	r2, r2, ip, lsl #16	/* r2 = 4321 */
2463129254Scognet#endif
2464129254Scognet	str	r2, [r0, #0x01]
2465129254Scognet	strb	r1, [r0, #0x07]
2466137463Scognet	RET
2467129254Scognet	LMEMCPY_8_PAD
2468129254Scognet
2469129254Scognet/*
2470129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned
2471129254Scognet */
2472129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2473129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2474129254Scognet	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
2475129254Scognet#ifdef __ARMEB__
2476129254Scognet	mov	ip, r2, lsr #8		/* ip = ...0 */
2477129254Scognet	strb	ip, [r0]
2478129254Scognet	mov	ip, r2, lsl #24		/* ip = 1... */
2479129254Scognet	orr	ip, ip, r3, lsr #8	/* ip = 1234 */
2480129254Scognet	strb	r1, [r0, #0x07]
2481129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...6 */
2482129254Scognet	orr	r1, r1, r3, lsl #8	/* r1 = 3456 */
2483129254Scognet#else
2484129254Scognet	strb	r2, [r0]
2485129254Scognet	mov	ip, r2, lsr #8		/* ip = ...1 */
2486129254Scognet	orr	ip, ip, r3, lsl #8	/* ip = 4321 */
2487129254Scognet	mov	r2, r1, lsr #8		/* r2 = ...7 */
2488129254Scognet	strb	r2, [r0, #0x07]
2489129254Scognet	mov	r1, r1, lsl #8		/* r1 = .76. */
2490129254Scognet	orr	r1, r1, r3, lsr #24	/* r1 = .765 */
2491129254Scognet#endif
2492129254Scognet	str	ip, [r0, #0x01]
2493129254Scognet	strh	r1, [r0, #0x05]
2494137463Scognet	RET
2495129254Scognet	LMEMCPY_8_PAD
2496129254Scognet
2497129254Scognet/*
2498129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned
2499129254Scognet */
2500129254Scognet	ldrb	r2, [r1]
2501129254Scognet	ldr	ip, [r1, #0x01]
2502129254Scognet	ldrh	r3, [r1, #0x05]
2503129254Scognet	ldrb	r1, [r1, #0x07]
2504129254Scognet	strb	r2, [r0]
2505129254Scognet	str	ip, [r0, #0x01]
2506129254Scognet	strh	r3, [r0, #0x05]
2507129254Scognet	strb	r1, [r0, #0x07]
2508137463Scognet	RET
2509129254Scognet	LMEMCPY_8_PAD
2510129254Scognet
2511129254Scognet/******************************************************************************
2512129254Scognet * Special case for 12 byte copies
2513129254Scognet */
2514129254Scognet#define	LMEMCPY_C_LOG2	7	/* 128 bytes */
2515129254Scognet#define	LMEMCPY_C_PAD	.align LMEMCPY_C_LOG2
2516129254Scognet	LMEMCPY_C_PAD
2517129254Scognet.Lmemcpy_c:
2518129254Scognet	and	r2, r1, #0x03
2519129254Scognet	orr	r2, r2, r0, lsl #2
2520129254Scognet	ands	r2, r2, #0x0f
2521129254Scognet	sub	r3, pc, #0x14
2522129254Scognet	addne	pc, r3, r2, lsl #LMEMCPY_C_LOG2
2523129254Scognet
2524129254Scognet/*
2525129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned
2526129254Scognet */
2527129254Scognet	ldr	r2, [r1]
2528129254Scognet	ldr	r3, [r1, #0x04]
2529129254Scognet	ldr	r1, [r1, #0x08]
2530129254Scognet	str	r2, [r0]
2531129254Scognet	str	r3, [r0, #0x04]
2532129254Scognet	str	r1, [r0, #0x08]
2533137463Scognet	RET
2534129254Scognet	LMEMCPY_C_PAD
2535129254Scognet
2536129254Scognet/*
2537129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned
2538129254Scognet */
2539129254Scognet	ldrb	r2, [r1, #0xb]		/* r2 = ...B */
2540129254Scognet	ldr	ip, [r1, #0x07]		/* BE:ip = 789A  LE:ip = A987 */
2541129254Scognet	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
2542129254Scognet	ldr	r1, [r1, #-1]		/* BE:r1 = x012  LE:r1 = 210x */
2543129254Scognet#ifdef __ARMEB__
2544129254Scognet	orr	r2, r2, ip, lsl #8	/* r2 = 89AB */
2545129254Scognet	str	r2, [r0, #0x08]
2546129254Scognet	mov	r2, ip, lsr #24		/* r2 = ...7 */
2547129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 4567 */
2548129254Scognet	mov	r1, r1, lsl #8		/* r1 = 012. */
2549129254Scognet	orr	r1, r1, r3, lsr #24	/* r1 = 0123 */
2550129254Scognet#else
2551129254Scognet	mov	r2, r2, lsl #24		/* r2 = B... */
2552129254Scognet	orr	r2, r2, ip, lsr #8	/* r2 = BA98 */
2553129254Scognet	str	r2, [r0, #0x08]
2554129254Scognet	mov	r2, ip, lsl #24		/* r2 = 7... */
2555129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 7654 */
2556129254Scognet	mov	r1, r1, lsr #8		/* r1 = .210 */
2557129254Scognet	orr	r1, r1, r3, lsl #24	/* r1 = 3210 */
2558129254Scognet#endif
2559129254Scognet	str	r2, [r0, #0x04]
2560129254Scognet	str	r1, [r0]
2561137463Scognet	RET
2562129254Scognet	LMEMCPY_C_PAD
2563129254Scognet
2564129254Scognet/*
2565129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned
2566129254Scognet */
2567129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2568129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2569129254Scognet	ldr	ip, [r1, #0x06]		/* BE:ip = 6789  LE:ip = 9876 */
2570129254Scognet	ldrh	r1, [r1, #0x0a]		/* BE:r1 = ..AB  LE:r1 = ..BA */
2571129254Scognet#ifdef __ARMEB__
2572129254Scognet	mov	r2, r2, lsl #16		/* r2 = 01.. */
2573129254Scognet	orr	r2, r2, r3, lsr #16	/* r2 = 0123 */
2574129254Scognet	str	r2, [r0]
2575129254Scognet	mov	r3, r3, lsl #16		/* r3 = 45.. */
2576129254Scognet	orr	r3, r3, ip, lsr #16	/* r3 = 4567 */
2577129254Scognet	orr	r1, r1, ip, lsl #16	/* r1 = 89AB */
2578129254Scognet#else
2579129254Scognet	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
2580129254Scognet	str	r2, [r0]
2581129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..54 */
2582129254Scognet	orr	r3, r3, ip, lsl #16	/* r3 = 7654 */
2583129254Scognet	mov	r1, r1, lsl #16		/* r1 = BA.. */
2584129254Scognet	orr	r1, r1, ip, lsr #16	/* r1 = BA98 */
2585129254Scognet#endif
2586129254Scognet	str	r3, [r0, #0x04]
2587129254Scognet	str	r1, [r0, #0x08]
2588137463Scognet	RET
2589129254Scognet	LMEMCPY_C_PAD
2590129254Scognet
2591129254Scognet/*
2592129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned
2593129254Scognet */
2594129254Scognet	ldrb	r2, [r1]		/* r2 = ...0 */
2595129254Scognet	ldr	r3, [r1, #0x01]		/* BE:r3 = 1234  LE:r3 = 4321 */
2596129254Scognet	ldr	ip, [r1, #0x05]		/* BE:ip = 5678  LE:ip = 8765 */
2597129254Scognet	ldr	r1, [r1, #0x09]		/* BE:r1 = 9ABx  LE:r1 = xBA9 */
2598129254Scognet#ifdef __ARMEB__
2599129254Scognet	mov	r2, r2, lsl #24		/* r2 = 0... */
2600129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 0123 */
2601129254Scognet	str	r2, [r0]
2602129254Scognet	mov	r3, r3, lsl #24		/* r3 = 4... */
2603129254Scognet	orr	r3, r3, ip, lsr #8	/* r3 = 4567 */
2604129254Scognet	mov	r1, r1, lsr #8		/* r1 = .9AB */
2605129254Scognet	orr	r1, r1, ip, lsl #24	/* r1 = 89AB */
2606129254Scognet#else
2607129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 3210 */
2608129254Scognet	str	r2, [r0]
2609129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...4 */
2610129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = 7654 */
2611129254Scognet	mov	r1, r1, lsl #8		/* r1 = BA9. */
2612129254Scognet	orr	r1, r1, ip, lsr #24	/* r1 = BA98 */
2613129254Scognet#endif
2614129254Scognet	str	r3, [r0, #0x04]
2615129254Scognet	str	r1, [r0, #0x08]
2616137463Scognet	RET
2617129254Scognet	LMEMCPY_C_PAD
2618129254Scognet
2619129254Scognet/*
2620129254Scognet * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
2621129254Scognet */
2622129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2623129254Scognet	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
2624129254Scognet	ldr	ip, [r1, #0x08]		/* BE:ip = 89AB  LE:ip = BA98 */
2625129254Scognet	mov	r1, r2, lsr #8		/* BE:r1 = .012  LE:r1 = .321 */
2626129254Scognet	strh	r1, [r0, #0x01]
2627129254Scognet#ifdef __ARMEB__
2628129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...0 */
2629129254Scognet	strb	r1, [r0]
2630129254Scognet	mov	r1, r2, lsl #24		/* r1 = 3... */
2631129254Scognet	orr	r2, r1, r3, lsr #8	/* r1 = 3456 */
2632129254Scognet	mov	r1, r3, lsl #24		/* r1 = 7... */
2633129254Scognet	orr	r1, r1, ip, lsr #8	/* r1 = 789A */
2634129254Scognet#else
2635129254Scognet	strb	r2, [r0]
2636129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...3 */
2637129254Scognet	orr	r2, r1, r3, lsl #8	/* r1 = 6543 */
2638129254Scognet	mov	r1, r3, lsr #24		/* r1 = ...7 */
2639129254Scognet	orr	r1, r1, ip, lsl #8	/* r1 = A987 */
2640129254Scognet	mov	ip, ip, lsr #24		/* ip = ...B */
2641129254Scognet#endif
2642129254Scognet	str	r2, [r0, #0x03]
2643129254Scognet	str	r1, [r0, #0x07]
2644129254Scognet	strb	ip, [r0, #0x0b]
2645137463Scognet	RET
2646129254Scognet	LMEMCPY_C_PAD
2647129254Scognet
2648129254Scognet/*
2649129254Scognet * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
2650129254Scognet */
2651129254Scognet	ldrb	r2, [r1]
2652129254Scognet	ldrh	r3, [r1, #0x01]
2653129254Scognet	ldr	ip, [r1, #0x03]
2654129254Scognet	strb	r2, [r0]
2655129254Scognet	ldr	r2, [r1, #0x07]
2656129254Scognet	ldrb	r1, [r1, #0x0b]
2657129254Scognet	strh	r3, [r0, #0x01]
2658129254Scognet	str	ip, [r0, #0x03]
2659129254Scognet	str	r2, [r0, #0x07]
2660129254Scognet	strb	r1, [r0, #0x0b]
2661137463Scognet	RET
2662129254Scognet	LMEMCPY_C_PAD
2663129254Scognet
2664129254Scognet/*
2665129254Scognet * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
2666129254Scognet */
2667129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2668129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2669129254Scognet	ldr	ip, [r1, #0x06]		/* BE:ip = 6789  LE:ip = 9876 */
2670129254Scognet	ldrh	r1, [r1, #0x0a]		/* BE:r1 = ..AB  LE:r1 = ..BA */
2671129254Scognet#ifdef __ARMEB__
2672129254Scognet	mov	r2, r2, ror #8		/* r2 = 1..0 */
2673129254Scognet	strb	r2, [r0]
2674129254Scognet	mov	r2, r2, lsr #16		/* r2 = ..1. */
2675129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = ..12 */
2676129254Scognet	strh	r2, [r0, #0x01]
2677129254Scognet	mov	r2, r3, lsl #8		/* r2 = 345. */
2678129254Scognet	orr	r3, r2, ip, lsr #24	/* r3 = 3456 */
2679129254Scognet	mov	r2, ip, lsl #8		/* r2 = 789. */
2680129254Scognet	orr	r2, r2, r1, lsr #8	/* r2 = 789A */
2681129254Scognet#else
2682129254Scognet	strb	r2, [r0]
2683129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
2684129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 4321 */
2685129254Scognet	strh	r2, [r0, #0x01]
2686129254Scognet	mov	r2, r3, lsr #8		/* r2 = .543 */
2687129254Scognet	orr	r3, r2, ip, lsl #24	/* r3 = 6543 */
2688129254Scognet	mov	r2, ip, lsr #8		/* r2 = .987 */
2689129254Scognet	orr	r2, r2, r1, lsl #24	/* r2 = A987 */
2690129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...B */
2691129254Scognet#endif
2692129254Scognet	str	r3, [r0, #0x03]
2693129254Scognet	str	r2, [r0, #0x07]
2694129254Scognet	strb	r1, [r0, #0x0b]
2695137463Scognet	RET
2696129254Scognet	LMEMCPY_C_PAD
2697129254Scognet
2698129254Scognet/*
2699129254Scognet * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
2700129254Scognet */
2701129254Scognet	ldrb	r2, [r1]
2702129254Scognet	ldr	r3, [r1, #0x01]		/* BE:r3 = 1234  LE:r3 = 4321 */
2703129254Scognet	ldr	ip, [r1, #0x05]		/* BE:ip = 5678  LE:ip = 8765 */
2704129254Scognet	ldr	r1, [r1, #0x09]		/* BE:r1 = 9ABx  LE:r1 = xBA9 */
2705129254Scognet	strb	r2, [r0]
2706129254Scognet#ifdef __ARMEB__
2707129254Scognet	mov	r2, r3, lsr #16		/* r2 = ..12 */
2708129254Scognet	strh	r2, [r0, #0x01]
2709129254Scognet	mov	r3, r3, lsl #16		/* r3 = 34.. */
2710129254Scognet	orr	r3, r3, ip, lsr #16	/* r3 = 3456 */
2711129254Scognet	mov	ip, ip, lsl #16		/* ip = 78.. */
2712129254Scognet	orr	ip, ip, r1, lsr #16	/* ip = 789A */
2713129254Scognet	mov	r1, r1, lsr #8		/* r1 = .9AB */
2714129254Scognet#else
2715129254Scognet	strh	r3, [r0, #0x01]
2716129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..43 */
2717129254Scognet	orr	r3, r3, ip, lsl #16	/* r3 = 6543 */
2718129254Scognet	mov	ip, ip, lsr #16		/* ip = ..87 */
2719129254Scognet	orr	ip, ip, r1, lsl #16	/* ip = A987 */
2720129254Scognet	mov	r1, r1, lsr #16		/* r1 = ..xB */
2721129254Scognet#endif
2722129254Scognet	str	r3, [r0, #0x03]
2723129254Scognet	str	ip, [r0, #0x07]
2724129254Scognet	strb	r1, [r0, #0x0b]
2725137463Scognet	RET
2726129254Scognet	LMEMCPY_C_PAD
2727129254Scognet
2728129254Scognet/*
2729129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned
2730129254Scognet */
2731129254Scognet	ldr	ip, [r1]		/* BE:ip = 0123  LE:ip = 3210 */
2732129254Scognet	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
2733129254Scognet	ldr	r2, [r1, #0x08]		/* BE:r2 = 89AB  LE:r2 = BA98 */
2734129254Scognet	mov	r1, ip, lsr #16		/* BE:r1 = ..01  LE:r1 = ..32 */
2735129254Scognet#ifdef __ARMEB__
2736129254Scognet	strh	r1, [r0]
2737129254Scognet	mov	r1, ip, lsl #16		/* r1 = 23.. */
2738129254Scognet	orr	r1, r1, r3, lsr #16	/* r1 = 2345 */
2739129254Scognet	mov	r3, r3, lsl #16		/* r3 = 67.. */
2740129254Scognet	orr	r3, r3, r2, lsr #16	/* r3 = 6789 */
2741129254Scognet#else
2742129254Scognet	strh	ip, [r0]
2743129254Scognet	orr	r1, r1, r3, lsl #16	/* r1 = 5432 */
2744129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..76 */
2745129254Scognet	orr	r3, r3, r2, lsl #16	/* r3 = 9876 */
2746129254Scognet	mov	r2, r2, lsr #16		/* r2 = ..BA */
2747129254Scognet#endif
2748129254Scognet	str	r1, [r0, #0x02]
2749129254Scognet	str	r3, [r0, #0x06]
2750129254Scognet	strh	r2, [r0, #0x0a]
2751137463Scognet	RET
2752129254Scognet	LMEMCPY_C_PAD
2753129254Scognet
2754129254Scognet/*
2755129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
2756129254Scognet */
2757129254Scognet	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
2758129254Scognet	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
2759129254Scognet	mov	ip, r2, lsr #8		/* BE:ip = .x01  LE:ip = .210 */
2760129254Scognet	strh	ip, [r0]
2761129254Scognet	ldr	ip, [r1, #0x07]		/* BE:ip = 789A  LE:ip = A987 */
2762129254Scognet	ldrb	r1, [r1, #0x0b]		/* r1 = ...B */
2763129254Scognet#ifdef __ARMEB__
2764129254Scognet	mov	r2, r2, lsl #24		/* r2 = 2... */
2765129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 2345 */
2766129254Scognet	mov	r3, r3, lsl #24		/* r3 = 6... */
2767129254Scognet	orr	r3, r3, ip, lsr #8	/* r3 = 6789 */
2768129254Scognet	orr	r1, r1, ip, lsl #8	/* r1 = 89AB */
2769129254Scognet#else
2770129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...2 */
2771129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 5432 */
2772129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...6 */
2773129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = 9876 */
2774129254Scognet	mov	r1, r1, lsl #8		/* r1 = ..B. */
2775129254Scognet	orr	r1, r1, ip, lsr #24	/* r1 = ..BA */
2776129254Scognet#endif
2777129254Scognet	str	r2, [r0, #0x02]
2778129254Scognet	str	r3, [r0, #0x06]
2779129254Scognet	strh	r1, [r0, #0x0a]
2780137463Scognet	RET
2781129254Scognet	LMEMCPY_C_PAD
2782129254Scognet
2783129254Scognet/*
2784129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned
2785129254Scognet */
2786129254Scognet	ldrh	r2, [r1]
2787129254Scognet	ldr	r3, [r1, #0x02]
2788129254Scognet	ldr	ip, [r1, #0x06]
2789129254Scognet	ldrh	r1, [r1, #0x0a]
2790129254Scognet	strh	r2, [r0]
2791129254Scognet	str	r3, [r0, #0x02]
2792129254Scognet	str	ip, [r0, #0x06]
2793129254Scognet	strh	r1, [r0, #0x0a]
2794137463Scognet	RET
2795129254Scognet	LMEMCPY_C_PAD
2796129254Scognet
2797129254Scognet/*
2798129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
2799129254Scognet */
2800129254Scognet	ldr	r2, [r1, #0x09]		/* BE:r2 = 9ABx  LE:r2 = xBA9 */
2801129254Scognet	ldr	r3, [r1, #0x05]		/* BE:r3 = 5678  LE:r3 = 8765 */
2802129254Scognet	mov	ip, r2, lsr #8		/* BE:ip = .9AB  LE:ip = .xBA */
2803129254Scognet	strh	ip, [r0, #0x0a]
2804129254Scognet	ldr	ip, [r1, #0x01]		/* BE:ip = 1234  LE:ip = 4321 */
2805129254Scognet	ldrb	r1, [r1]		/* r1 = ...0 */
2806129254Scognet#ifdef __ARMEB__
2807129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...9 */
2808129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 6789 */
2809129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...5 */
2810129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = 2345 */
2811129254Scognet	mov	r1, r1, lsl #8		/* r1 = ..0. */
2812129254Scognet	orr	r1, r1, ip, lsr #24	/* r1 = ..01 */
2813129254Scognet#else
2814129254Scognet	mov	r2, r2, lsl #24		/* r2 = 9... */
2815129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 9876 */
2816129254Scognet	mov	r3, r3, lsl #24		/* r3 = 5... */
2817129254Scognet	orr	r3, r3, ip, lsr #8	/* r3 = 5432 */
2818129254Scognet	orr	r1, r1, ip, lsl #8	/* r1 = 3210 */
2819129254Scognet#endif
2820129254Scognet	str	r2, [r0, #0x06]
2821129254Scognet	str	r3, [r0, #0x02]
2822129254Scognet	strh	r1, [r0]
2823137463Scognet	RET
2824129254Scognet	LMEMCPY_C_PAD
2825129254Scognet
2826129254Scognet/*
2827129254Scognet * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2828129254Scognet */
2829129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2830129254Scognet	ldr	ip, [r1, #0x04]		/* BE:ip = 4567  LE:ip = 7654 */
2831129254Scognet	ldr	r1, [r1, #0x08]		/* BE:r1 = 89AB  LE:r1 = BA98 */
2832129254Scognet#ifdef __ARMEB__
2833129254Scognet	mov	r3, r2, lsr #24		/* r3 = ...0 */
2834129254Scognet	strb	r3, [r0]
2835129254Scognet	mov	r2, r2, lsl #8		/* r2 = 123. */
2836129254Scognet	orr	r2, r2, ip, lsr #24	/* r2 = 1234 */
2837129254Scognet	str	r2, [r0, #0x01]
2838129254Scognet	mov	r2, ip, lsl #8		/* r2 = 567. */
2839129254Scognet	orr	r2, r2, r1, lsr #24	/* r2 = 5678 */
2840129254Scognet	str	r2, [r0, #0x05]
2841129254Scognet	mov	r2, r1, lsr #8		/* r2 = ..9A */
2842129254Scognet	strh	r2, [r0, #0x09]
2843129254Scognet	strb	r1, [r0, #0x0b]
2844129254Scognet#else
2845129254Scognet	strb	r2, [r0]
2846129254Scognet	mov	r3, r2, lsr #8		/* r3 = .321 */
2847129254Scognet	orr	r3, r3, ip, lsl #24	/* r3 = 4321 */
2848129254Scognet	str	r3, [r0, #0x01]
2849129254Scognet	mov	r3, ip, lsr #8		/* r3 = .765 */
2850129254Scognet	orr	r3, r3, r1, lsl #24	/* r3 = 8765 */
2851129254Scognet	str	r3, [r0, #0x05]
2852129254Scognet	mov	r1, r1, lsr #8		/* r1 = .BA9 */
2853129254Scognet	strh	r1, [r0, #0x09]
2854129254Scognet	mov	r1, r1, lsr #16		/* r1 = ...B */
2855129254Scognet	strb	r1, [r0, #0x0b]
2856129254Scognet#endif
2857137463Scognet	RET
2858129254Scognet	LMEMCPY_C_PAD
2859129254Scognet
2860129254Scognet/*
2861129254Scognet * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2862129254Scognet */
2863129254Scognet	ldrb	r2, [r1, #0x0b]		/* r2 = ...B */
2864129254Scognet	ldr	r3, [r1, #0x07]		/* BE:r3 = 789A  LE:r3 = A987 */
2865129254Scognet	ldr	ip, [r1, #0x03]		/* BE:ip = 3456  LE:ip = 6543 */
2866129254Scognet	ldr	r1, [r1, #-1]		/* BE:r1 = x012  LE:r1 = 210x */
2867129254Scognet	strb	r2, [r0, #0x0b]
2868129254Scognet#ifdef __ARMEB__
2869129254Scognet	strh	r3, [r0, #0x09]
2870129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..78 */
2871129254Scognet	orr	r3, r3, ip, lsl #16	/* r3 = 5678 */
2872129254Scognet	mov	ip, ip, lsr #16		/* ip = ..34 */
2873129254Scognet	orr	ip, ip, r1, lsl #16	/* ip = 1234 */
2874129254Scognet	mov	r1, r1, lsr #16		/* r1 = ..x0 */
2875129254Scognet#else
2876129254Scognet	mov	r2, r3, lsr #16		/* r2 = ..A9 */
2877129254Scognet	strh	r2, [r0, #0x09]
2878129254Scognet	mov	r3, r3, lsl #16		/* r3 = 87.. */
2879129254Scognet	orr	r3, r3, ip, lsr #16	/* r3 = 8765 */
2880129254Scognet	mov	ip, ip, lsl #16		/* ip = 43.. */
2881129254Scognet	orr	ip, ip, r1, lsr #16	/* ip = 4321 */
2882129254Scognet	mov	r1, r1, lsr #8		/* r1 = .210 */
2883129254Scognet#endif
2884129254Scognet	str	r3, [r0, #0x05]
2885129254Scognet	str	ip, [r0, #0x01]
2886129254Scognet	strb	r1, [r0]
2887137463Scognet	RET
2888129254Scognet	LMEMCPY_C_PAD
2889129254Scognet
2890129254Scognet/*
2891129254Scognet * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2892129254Scognet */
2893129254Scognet#ifdef __ARMEB__
2894129254Scognet	ldrh	r2, [r1, #0x0a]		/* r2 = ..AB */
2895129254Scognet	ldr	ip, [r1, #0x06]		/* ip = 6789 */
2896129254Scognet	ldr	r3, [r1, #0x02]		/* r3 = 2345 */
2897129254Scognet	ldrh	r1, [r1]		/* r1 = ..01 */
2898129254Scognet	strb	r2, [r0, #0x0b]
2899129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...A */
2900129254Scognet	orr	r2, r2, ip, lsl #8	/* r2 = 789A */
2901129254Scognet	mov	ip, ip, lsr #8		/* ip = .678 */
2902129254Scognet	orr	ip, ip, r3, lsl #24	/* ip = 5678 */
2903129254Scognet	mov	r3, r3, lsr #8		/* r3 = .234 */
2904129254Scognet	orr	r3, r3, r1, lsl #24	/* r3 = 1234 */
2905129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...0 */
2906129254Scognet	strb	r1, [r0]
2907129254Scognet	str	r3, [r0, #0x01]
2908129254Scognet	str	ip, [r0, #0x05]
2909129254Scognet	strh	r2, [r0, #0x09]
2910129254Scognet#else
2911129254Scognet	ldrh	r2, [r1]		/* r2 = ..10 */
2912129254Scognet	ldr	r3, [r1, #0x02]		/* r3 = 5432 */
2913129254Scognet	ldr	ip, [r1, #0x06]		/* ip = 9876 */
2914129254Scognet	ldrh	r1, [r1, #0x0a]		/* r1 = ..BA */
2915129254Scognet	strb	r2, [r0]
2916129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
2917129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 4321 */
2918129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...5 */
2919129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = 8765 */
2920129254Scognet	mov	ip, ip, lsr #24		/* ip = ...9 */
2921129254Scognet	orr	ip, ip, r1, lsl #8	/* ip = .BA9 */
2922129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...B */
2923129254Scognet	str	r2, [r0, #0x01]
2924129254Scognet	str	r3, [r0, #0x05]
2925129254Scognet	strh	ip, [r0, #0x09]
2926129254Scognet	strb	r1, [r0, #0x0b]
2927129254Scognet#endif
2928137463Scognet	RET
2929129254Scognet	LMEMCPY_C_PAD
2930129254Scognet
2931129254Scognet/*
2932129254Scognet * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2933129254Scognet */
2934129254Scognet	ldrb	r2, [r1]
2935129254Scognet	ldr	r3, [r1, #0x01]
2936129254Scognet	ldr	ip, [r1, #0x05]
2937129254Scognet	strb	r2, [r0]
2938129254Scognet	ldrh	r2, [r1, #0x09]
2939129254Scognet	ldrb	r1, [r1, #0x0b]
2940129254Scognet	str	r3, [r0, #0x01]
2941129254Scognet	str	ip, [r0, #0x05]
2942129254Scognet	strh	r2, [r0, #0x09]
2943129254Scognet	strb	r1, [r0, #0x0b]
2944137463Scognet	RET
2945248361SandrewEND(memcpy)
2946172614Scognet#endif /* _ARM_ARCH_5E */
2947135654Scognet
2948135654Scognet#ifdef GPROF
2949135654Scognet
2950135654ScognetENTRY(user)
2951135654Scognet	nop
2952269390SianEND(user)
2953135654ScognetENTRY(btrap)
2954135654Scognet	nop
2955269390SianEND(btrap)
2956135654ScognetENTRY(etrap)
2957135654Scognet	nop
2958269390SianEND(etrap)
2959135654ScognetENTRY(bintr)
2960135654Scognet	nop
2961269390SianEND(bintr)
2962135654ScognetENTRY(eintr)
2963135654Scognet	nop
2964269390SianEND(eintr)
2965135654Scognet#endif
2966