Deleted Added
full compact
memcpy_xscale.S (256281) memcpy_xscale.S (271337)
1/* $NetBSD: memcpy_xscale.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */
2
3/*
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include <machine/asm.h>
1/* $NetBSD: memcpy_xscale.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */
2
3/*
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include <machine/asm.h>
39__FBSDID("$FreeBSD: stable/10/lib/libc/arm/string/memcpy_xscale.S 135683 2004-09-23 23:11:32Z cognet $");
39__FBSDID("$FreeBSD: stable/10/lib/libc/arm/string/memcpy_xscale.S 271337 2014-09-09 22:24:01Z ian $");
40
41/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
42ENTRY(memcpy)
43 pld [r1]
44 cmp r2, #0x0c
45 ble .Lmemcpy_short /* <= 12 bytes */
46 mov r3, r0 /* We must not clobber r0 */
47
48 /* Word-align the destination buffer */
49 ands ip, r3, #0x03 /* Already word aligned? */
50 beq .Lmemcpy_wordaligned /* Yup */
51 cmp ip, #0x02
52 ldrb ip, [r1], #0x01
53 sub r2, r2, #0x01
54 strb ip, [r3], #0x01
55 ldrleb ip, [r1], #0x01
56 suble r2, r2, #0x01
57 strleb ip, [r3], #0x01
58 ldrltb ip, [r1], #0x01
59 sublt r2, r2, #0x01
60 strltb ip, [r3], #0x01
61
62 /* Destination buffer is now word aligned */
63.Lmemcpy_wordaligned:
64 ands ip, r1, #0x03 /* Is src also word-aligned? */
65 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
66
67 /* Quad-align the destination buffer */
68 tst r3, #0x07 /* Already quad aligned? */
69 ldrne ip, [r1], #0x04
70 stmfd sp!, {r4-r9} /* Free up some registers */
71 subne r2, r2, #0x04
72 strne ip, [r3], #0x04
73
74 /* Destination buffer quad aligned, source is at least word aligned */
75 subs r2, r2, #0x80
76 blt .Lmemcpy_w_lessthan128
77
78 /* Copy 128 bytes at a time */
79.Lmemcpy_w_loop128:
80 ldr r4, [r1], #0x04 /* LD:00-03 */
81 ldr r5, [r1], #0x04 /* LD:04-07 */
82 pld [r1, #0x18] /* Prefetch 0x20 */
83 ldr r6, [r1], #0x04 /* LD:08-0b */
84 ldr r7, [r1], #0x04 /* LD:0c-0f */
85 ldr r8, [r1], #0x04 /* LD:10-13 */
86 ldr r9, [r1], #0x04 /* LD:14-17 */
87 strd r4, [r3], #0x08 /* ST:00-07 */
88 ldr r4, [r1], #0x04 /* LD:18-1b */
89 ldr r5, [r1], #0x04 /* LD:1c-1f */
90 strd r6, [r3], #0x08 /* ST:08-0f */
91 ldr r6, [r1], #0x04 /* LD:20-23 */
92 ldr r7, [r1], #0x04 /* LD:24-27 */
93 pld [r1, #0x18] /* Prefetch 0x40 */
94 strd r8, [r3], #0x08 /* ST:10-17 */
95 ldr r8, [r1], #0x04 /* LD:28-2b */
96 ldr r9, [r1], #0x04 /* LD:2c-2f */
97 strd r4, [r3], #0x08 /* ST:18-1f */
98 ldr r4, [r1], #0x04 /* LD:30-33 */
99 ldr r5, [r1], #0x04 /* LD:34-37 */
100 strd r6, [r3], #0x08 /* ST:20-27 */
101 ldr r6, [r1], #0x04 /* LD:38-3b */
102 ldr r7, [r1], #0x04 /* LD:3c-3f */
103 strd r8, [r3], #0x08 /* ST:28-2f */
104 ldr r8, [r1], #0x04 /* LD:40-43 */
105 ldr r9, [r1], #0x04 /* LD:44-47 */
106 pld [r1, #0x18] /* Prefetch 0x60 */
107 strd r4, [r3], #0x08 /* ST:30-37 */
108 ldr r4, [r1], #0x04 /* LD:48-4b */
109 ldr r5, [r1], #0x04 /* LD:4c-4f */
110 strd r6, [r3], #0x08 /* ST:38-3f */
111 ldr r6, [r1], #0x04 /* LD:50-53 */
112 ldr r7, [r1], #0x04 /* LD:54-57 */
113 strd r8, [r3], #0x08 /* ST:40-47 */
114 ldr r8, [r1], #0x04 /* LD:58-5b */
115 ldr r9, [r1], #0x04 /* LD:5c-5f */
116 strd r4, [r3], #0x08 /* ST:48-4f */
117 ldr r4, [r1], #0x04 /* LD:60-63 */
118 ldr r5, [r1], #0x04 /* LD:64-67 */
119 pld [r1, #0x18] /* Prefetch 0x80 */
120 strd r6, [r3], #0x08 /* ST:50-57 */
121 ldr r6, [r1], #0x04 /* LD:68-6b */
122 ldr r7, [r1], #0x04 /* LD:6c-6f */
123 strd r8, [r3], #0x08 /* ST:58-5f */
124 ldr r8, [r1], #0x04 /* LD:70-73 */
125 ldr r9, [r1], #0x04 /* LD:74-77 */
126 strd r4, [r3], #0x08 /* ST:60-67 */
127 ldr r4, [r1], #0x04 /* LD:78-7b */
128 ldr r5, [r1], #0x04 /* LD:7c-7f */
129 strd r6, [r3], #0x08 /* ST:68-6f */
130 strd r8, [r3], #0x08 /* ST:70-77 */
131 subs r2, r2, #0x80
132 strd r4, [r3], #0x08 /* ST:78-7f */
133 bge .Lmemcpy_w_loop128
134
135.Lmemcpy_w_lessthan128:
136 adds r2, r2, #0x80 /* Adjust for extra sub */
137 ldmeqfd sp!, {r4-r9}
138 bxeq lr /* Return now if done */
139 subs r2, r2, #0x20
140 blt .Lmemcpy_w_lessthan32
141
142 /* Copy 32 bytes at a time */
143.Lmemcpy_w_loop32:
144 ldr r4, [r1], #0x04
145 ldr r5, [r1], #0x04
146 pld [r1, #0x18]
147 ldr r6, [r1], #0x04
148 ldr r7, [r1], #0x04
149 ldr r8, [r1], #0x04
150 ldr r9, [r1], #0x04
151 strd r4, [r3], #0x08
152 ldr r4, [r1], #0x04
153 ldr r5, [r1], #0x04
154 strd r6, [r3], #0x08
155 strd r8, [r3], #0x08
156 subs r2, r2, #0x20
157 strd r4, [r3], #0x08
158 bge .Lmemcpy_w_loop32
159
160.Lmemcpy_w_lessthan32:
161 adds r2, r2, #0x20 /* Adjust for extra sub */
162 ldmeqfd sp!, {r4-r9}
163 bxeq lr /* Return now if done */
164
165 and r4, r2, #0x18
166 rsbs r4, r4, #0x18
167 addne pc, pc, r4, lsl #1
168 nop
169
170 /* At least 24 bytes remaining */
171 ldr r4, [r1], #0x04
172 ldr r5, [r1], #0x04
173 sub r2, r2, #0x08
174 strd r4, [r3], #0x08
175
176 /* At least 16 bytes remaining */
177 ldr r4, [r1], #0x04
178 ldr r5, [r1], #0x04
179 sub r2, r2, #0x08
180 strd r4, [r3], #0x08
181
182 /* At least 8 bytes remaining */
183 ldr r4, [r1], #0x04
184 ldr r5, [r1], #0x04
185 subs r2, r2, #0x08
186 strd r4, [r3], #0x08
187
188 /* Less than 8 bytes remaining */
189 ldmfd sp!, {r4-r9}
190 bxeq lr /* Return now if done */
191 subs r2, r2, #0x04
192 ldrge ip, [r1], #0x04
193 strge ip, [r3], #0x04
194 bxeq lr /* Return now if done */
195 addlt r2, r2, #0x04
196 ldrb ip, [r1], #0x01
197 cmp r2, #0x02
198 ldrgeb r2, [r1], #0x01
199 strb ip, [r3], #0x01
200 ldrgtb ip, [r1]
201 strgeb r2, [r3], #0x01
202 strgtb ip, [r3]
203 bx lr
204
205
206/*
207 * At this point, it has not been possible to word align both buffers.
208 * The destination buffer is word aligned, but the source buffer is not.
209 */
210.Lmemcpy_bad_align:
211 stmfd sp!, {r4-r7}
212 bic r1, r1, #0x03
213 cmp ip, #2
214 ldr ip, [r1], #0x04
215 bgt .Lmemcpy_bad3
216 beq .Lmemcpy_bad2
217 b .Lmemcpy_bad1
218
219.Lmemcpy_bad1_loop16:
220#ifdef __ARMEB__
221 mov r4, ip, lsl #8
222#else
223 mov r4, ip, lsr #8
224#endif
225 ldr r5, [r1], #0x04
226 pld [r1, #0x018]
227 ldr r6, [r1], #0x04
228 ldr r7, [r1], #0x04
229 ldr ip, [r1], #0x04
230#ifdef __ARMEB__
231 orr r4, r4, r5, lsr #24
232 mov r5, r5, lsl #8
233 orr r5, r5, r6, lsr #24
234 mov r6, r6, lsl #8
235 orr r6, r6, r7, lsr #24
236 mov r7, r7, lsl #8
237 orr r7, r7, ip, lsr #24
238#else
239 orr r4, r4, r5, lsl #24
240 mov r5, r5, lsr #8
241 orr r5, r5, r6, lsl #24
242 mov r6, r6, lsr #8
243 orr r6, r6, r7, lsl #24
244 mov r7, r7, lsr #8
245 orr r7, r7, ip, lsl #24
246#endif
247 str r4, [r3], #0x04
248 str r5, [r3], #0x04
249 str r6, [r3], #0x04
250 str r7, [r3], #0x04
251.Lmemcpy_bad1:
252 subs r2, r2, #0x10
253 bge .Lmemcpy_bad1_loop16
254
255 adds r2, r2, #0x10
256 ldmeqfd sp!, {r4-r7}
257 bxeq lr /* Return now if done */
258 subs r2, r2, #0x04
259 sublt r1, r1, #0x03
260 blt .Lmemcpy_bad_done
261
262.Lmemcpy_bad1_loop4:
263#ifdef __ARMEB__
264 mov r4, ip, lsl #8
265#else
266 mov r4, ip, lsr #8
267#endif
268 ldr ip, [r1], #0x04
269 subs r2, r2, #0x04
270#ifdef __ARMEB__
271 orr r4, r4, ip, lsr #24
272#else
273 orr r4, r4, ip, lsl #24
274#endif
275 str r4, [r3], #0x04
276 bge .Lmemcpy_bad1_loop4
277 sub r1, r1, #0x03
278 b .Lmemcpy_bad_done
279
280.Lmemcpy_bad2_loop16:
281#ifdef __ARMEB__
282 mov r4, ip, lsl #16
283#else
284 mov r4, ip, lsr #16
285#endif
286 ldr r5, [r1], #0x04
287 pld [r1, #0x018]
288 ldr r6, [r1], #0x04
289 ldr r7, [r1], #0x04
290 ldr ip, [r1], #0x04
291#ifdef __ARMEB__
292 orr r4, r4, r5, lsr #16
293 mov r5, r5, lsl #16
294 orr r5, r5, r6, lsr #16
295 mov r6, r6, lsl #16
296 orr r6, r6, r7, lsr #16
297 mov r7, r7, lsl #16
298 orr r7, r7, ip, lsr #16
299#else
300 orr r4, r4, r5, lsl #16
301 mov r5, r5, lsr #16
302 orr r5, r5, r6, lsl #16
303 mov r6, r6, lsr #16
304 orr r6, r6, r7, lsl #16
305 mov r7, r7, lsr #16
306 orr r7, r7, ip, lsl #16
307#endif
308 str r4, [r3], #0x04
309 str r5, [r3], #0x04
310 str r6, [r3], #0x04
311 str r7, [r3], #0x04
312.Lmemcpy_bad2:
313 subs r2, r2, #0x10
314 bge .Lmemcpy_bad2_loop16
315
316 adds r2, r2, #0x10
317 ldmeqfd sp!, {r4-r7}
318 bxeq lr /* Return now if done */
319 subs r2, r2, #0x04
320 sublt r1, r1, #0x02
321 blt .Lmemcpy_bad_done
322
323.Lmemcpy_bad2_loop4:
324#ifdef __ARMEB__
325 mov r4, ip, lsl #16
326#else
327 mov r4, ip, lsr #16
328#endif
329 ldr ip, [r1], #0x04
330 subs r2, r2, #0x04
331#ifdef __ARMEB__
332 orr r4, r4, ip, lsr #16
333#else
334 orr r4, r4, ip, lsl #16
335#endif
336 str r4, [r3], #0x04
337 bge .Lmemcpy_bad2_loop4
338 sub r1, r1, #0x02
339 b .Lmemcpy_bad_done
340
341.Lmemcpy_bad3_loop16:
342#ifdef __ARMEB__
343 mov r4, ip, lsl #24
344#else
345 mov r4, ip, lsr #24
346#endif
347 ldr r5, [r1], #0x04
348 pld [r1, #0x018]
349 ldr r6, [r1], #0x04
350 ldr r7, [r1], #0x04
351 ldr ip, [r1], #0x04
352#ifdef __ARMEB__
353 orr r4, r4, r5, lsr #8
354 mov r5, r5, lsl #24
355 orr r5, r5, r6, lsr #8
356 mov r6, r6, lsl #24
357 orr r6, r6, r7, lsr #8
358 mov r7, r7, lsl #24
359 orr r7, r7, ip, lsr #8
360#else
361 orr r4, r4, r5, lsl #8
362 mov r5, r5, lsr #24
363 orr r5, r5, r6, lsl #8
364 mov r6, r6, lsr #24
365 orr r6, r6, r7, lsl #8
366 mov r7, r7, lsr #24
367 orr r7, r7, ip, lsl #8
368#endif
369 str r4, [r3], #0x04
370 str r5, [r3], #0x04
371 str r6, [r3], #0x04
372 str r7, [r3], #0x04
373.Lmemcpy_bad3:
374 subs r2, r2, #0x10
375 bge .Lmemcpy_bad3_loop16
376
377 adds r2, r2, #0x10
378 ldmeqfd sp!, {r4-r7}
379 bxeq lr /* Return now if done */
380 subs r2, r2, #0x04
381 sublt r1, r1, #0x01
382 blt .Lmemcpy_bad_done
383
384.Lmemcpy_bad3_loop4:
385#ifdef __ARMEB__
386 mov r4, ip, lsl #24
387#else
388 mov r4, ip, lsr #24
389#endif
390 ldr ip, [r1], #0x04
391 subs r2, r2, #0x04
392#ifdef __ARMEB__
393 orr r4, r4, ip, lsr #8
394#else
395 orr r4, r4, ip, lsl #8
396#endif
397 str r4, [r3], #0x04
398 bge .Lmemcpy_bad3_loop4
399 sub r1, r1, #0x01
400
401.Lmemcpy_bad_done:
402 ldmfd sp!, {r4-r7}
403 adds r2, r2, #0x04
404 bxeq lr
405 ldrb ip, [r1], #0x01
406 cmp r2, #0x02
407 ldrgeb r2, [r1], #0x01
408 strb ip, [r3], #0x01
409 ldrgtb ip, [r1]
410 strgeb r2, [r3], #0x01
411 strgtb ip, [r3]
412 bx lr
413
414
415/*
416 * Handle short copies (less than 16 bytes), possibly misaligned.
417 * Some of these are *very* common, thanks to the network stack,
418 * and so are handled specially.
419 */
420.Lmemcpy_short:
421#ifndef _STANDALONE
422 add pc, pc, r2, lsl #2
423 nop
424 bx lr /* 0x00 */
425 b .Lmemcpy_bytewise /* 0x01 */
426 b .Lmemcpy_bytewise /* 0x02 */
427 b .Lmemcpy_bytewise /* 0x03 */
428 b .Lmemcpy_4 /* 0x04 */
429 b .Lmemcpy_bytewise /* 0x05 */
430 b .Lmemcpy_6 /* 0x06 */
431 b .Lmemcpy_bytewise /* 0x07 */
432 b .Lmemcpy_8 /* 0x08 */
433 b .Lmemcpy_bytewise /* 0x09 */
434 b .Lmemcpy_bytewise /* 0x0a */
435 b .Lmemcpy_bytewise /* 0x0b */
436 b .Lmemcpy_c /* 0x0c */
437#endif
438.Lmemcpy_bytewise:
439 mov r3, r0 /* We must not clobber r0 */
440 ldrb ip, [r1], #0x01
4411: subs r2, r2, #0x01
442 strb ip, [r3], #0x01
443 ldrneb ip, [r1], #0x01
444 bne 1b
445 bx lr
446
447#ifndef _STANDALONE
448/******************************************************************************
449 * Special case for 4 byte copies
450 */
451#define LMEMCPY_4_LOG2 6 /* 64 bytes */
452#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
453 LMEMCPY_4_PAD
454.Lmemcpy_4:
455 and r2, r1, #0x03
456 orr r2, r2, r0, lsl #2
457 ands r2, r2, #0x0f
458 sub r3, pc, #0x14
459 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
460
461/*
462 * 0000: dst is 32-bit aligned, src is 32-bit aligned
463 */
464 ldr r2, [r1]
465 str r2, [r0]
466 bx lr
467 LMEMCPY_4_PAD
468
469/*
470 * 0001: dst is 32-bit aligned, src is 8-bit aligned
471 */
472 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
473 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
474#ifdef __ARMEB__
475 mov r3, r3, lsl #8 /* r3 = 012. */
476 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
477#else
478 mov r3, r3, lsr #8 /* r3 = .210 */
479 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
480#endif
481 str r3, [r0]
482 bx lr
483 LMEMCPY_4_PAD
484
485/*
486 * 0010: dst is 32-bit aligned, src is 16-bit aligned
487 */
488#ifdef __ARMEB__
489 ldrh r3, [r1]
490 ldrh r2, [r1, #0x02]
491#else
492 ldrh r3, [r1, #0x02]
493 ldrh r2, [r1]
494#endif
495 orr r3, r2, r3, lsl #16
496 str r3, [r0]
497 bx lr
498 LMEMCPY_4_PAD
499
500/*
501 * 0011: dst is 32-bit aligned, src is 8-bit aligned
502 */
503 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
504 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
505#ifdef __ARMEB__
506 mov r3, r3, lsl #24 /* r3 = 0... */
507 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
508#else
509 mov r3, r3, lsr #24 /* r3 = ...0 */
510 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
511#endif
512 str r3, [r0]
513 bx lr
514 LMEMCPY_4_PAD
515
516/*
517 * 0100: dst is 8-bit aligned, src is 32-bit aligned
518 */
519 ldr r2, [r1]
520#ifdef __ARMEB__
521 strb r2, [r0, #0x03]
522 mov r3, r2, lsr #8
523 mov r1, r2, lsr #24
524 strb r1, [r0]
525#else
526 strb r2, [r0]
527 mov r3, r2, lsr #8
528 mov r1, r2, lsr #24
529 strb r1, [r0, #0x03]
530#endif
531 strh r3, [r0, #0x01]
532 bx lr
533 LMEMCPY_4_PAD
534
535/*
536 * 0101: dst is 8-bit aligned, src is 8-bit aligned
537 */
538 ldrb r2, [r1]
539 ldrh r3, [r1, #0x01]
540 ldrb r1, [r1, #0x03]
541 strb r2, [r0]
542 strh r3, [r0, #0x01]
543 strb r1, [r0, #0x03]
544 bx lr
545 LMEMCPY_4_PAD
546
547/*
548 * 0110: dst is 8-bit aligned, src is 16-bit aligned
549 */
550 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
551 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
552#ifdef __ARMEB__
553 mov r1, r2, lsr #8 /* r1 = ...0 */
554 strb r1, [r0]
555 mov r2, r2, lsl #8 /* r2 = .01. */
556 orr r2, r2, r3, lsr #8 /* r2 = .012 */
557#else
558 strb r2, [r0]
559 mov r2, r2, lsr #8 /* r2 = ...1 */
560 orr r2, r2, r3, lsl #8 /* r2 = .321 */
561 mov r3, r3, lsr #8 /* r3 = ...3 */
562#endif
563 strh r2, [r0, #0x01]
564 strb r3, [r0, #0x03]
565 bx lr
566 LMEMCPY_4_PAD
567
568/*
569 * 0111: dst is 8-bit aligned, src is 8-bit aligned
570 */
571 ldrb r2, [r1]
572 ldrh r3, [r1, #0x01]
573 ldrb r1, [r1, #0x03]
574 strb r2, [r0]
575 strh r3, [r0, #0x01]
576 strb r1, [r0, #0x03]
577 bx lr
578 LMEMCPY_4_PAD
579
580/*
581 * 1000: dst is 16-bit aligned, src is 32-bit aligned
582 */
583 ldr r2, [r1]
584#ifdef __ARMEB__
585 strh r2, [r0, #0x02]
586 mov r3, r2, lsr #16
587 strh r3, [r0]
588#else
589 strh r2, [r0]
590 mov r3, r2, lsr #16
591 strh r3, [r0, #0x02]
592#endif
593 bx lr
594 LMEMCPY_4_PAD
595
596/*
597 * 1001: dst is 16-bit aligned, src is 8-bit aligned
598 */
599 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
600 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
601 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
602 strh r1, [r0]
603#ifdef __ARMEB__
604 mov r2, r2, lsl #8 /* r2 = 012. */
605 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
606#else
607 mov r2, r2, lsr #24 /* r2 = ...2 */
608 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
609#endif
610 strh r2, [r0, #0x02]
611 bx lr
612 LMEMCPY_4_PAD
613
614/*
615 * 1010: dst is 16-bit aligned, src is 16-bit aligned
616 */
617 ldrh r2, [r1]
618 ldrh r3, [r1, #0x02]
619 strh r2, [r0]
620 strh r3, [r0, #0x02]
621 bx lr
622 LMEMCPY_4_PAD
623
624/*
625 * 1011: dst is 16-bit aligned, src is 8-bit aligned
626 */
627 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
628 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
629 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
630 strh r1, [r0, #0x02]
631#ifdef __ARMEB__
632 mov r3, r3, lsr #24 /* r3 = ...1 */
633 orr r3, r3, r2, lsl #8 /* r3 = xx01 */
634#else
635 mov r3, r3, lsl #8 /* r3 = 321. */
636 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
637#endif
638 strh r3, [r0]
639 bx lr
640 LMEMCPY_4_PAD
641
642/*
643 * 1100: dst is 8-bit aligned, src is 32-bit aligned
644 */
645 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
646#ifdef __ARMEB__
647 strb r2, [r0, #0x03]
648 mov r3, r2, lsr #8
649 mov r1, r2, lsr #24
650 strh r3, [r0, #0x01]
651 strb r1, [r0]
652#else
653 strb r2, [r0]
654 mov r3, r2, lsr #8
655 mov r1, r2, lsr #24
656 strh r3, [r0, #0x01]
657 strb r1, [r0, #0x03]
658#endif
659 bx lr
660 LMEMCPY_4_PAD
661
662/*
663 * 1101: dst is 8-bit aligned, src is 8-bit aligned
664 */
665 ldrb r2, [r1]
666 ldrh r3, [r1, #0x01]
667 ldrb r1, [r1, #0x03]
668 strb r2, [r0]
669 strh r3, [r0, #0x01]
670 strb r1, [r0, #0x03]
671 bx lr
672 LMEMCPY_4_PAD
673
674/*
675 * 1110: dst is 8-bit aligned, src is 16-bit aligned
676 */
677#ifdef __ARMEB__
678 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
679 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
680 strb r3, [r0, #0x03]
681 mov r3, r3, lsr #8 /* r3 = ...2 */
682 orr r3, r3, r2, lsl #8 /* r3 = ..12 */
683 strh r3, [r0, #0x01]
684 mov r2, r2, lsr #8 /* r2 = ...0 */
685 strb r2, [r0]
686#else
687 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
688 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
689 strb r2, [r0]
690 mov r2, r2, lsr #8 /* r2 = ...1 */
691 orr r2, r2, r3, lsl #8 /* r2 = .321 */
692 strh r2, [r0, #0x01]
693 mov r3, r3, lsr #8 /* r3 = ...3 */
694 strb r3, [r0, #0x03]
695#endif
696 bx lr
697 LMEMCPY_4_PAD
698
699/*
700 * 1111: dst is 8-bit aligned, src is 8-bit aligned
701 */
702 ldrb r2, [r1]
703 ldrh r3, [r1, #0x01]
704 ldrb r1, [r1, #0x03]
705 strb r2, [r0]
706 strh r3, [r0, #0x01]
707 strb r1, [r0, #0x03]
708 bx lr
709 LMEMCPY_4_PAD
710
711
712/******************************************************************************
713 * Special case for 6 byte copies
714 */
715#define LMEMCPY_6_LOG2 6 /* 64 bytes */
716#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
717 LMEMCPY_6_PAD
718.Lmemcpy_6:
719 and r2, r1, #0x03
720 orr r2, r2, r0, lsl #2
721 ands r2, r2, #0x0f
722 sub r3, pc, #0x14
723 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
724
725/*
726 * 0000: dst is 32-bit aligned, src is 32-bit aligned
727 */
728 ldr r2, [r1]
729 ldrh r3, [r1, #0x04]
730 str r2, [r0]
731 strh r3, [r0, #0x04]
732 bx lr
733 LMEMCPY_6_PAD
734
735/*
736 * 0001: dst is 32-bit aligned, src is 8-bit aligned
737 */
738 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
739 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
740#ifdef __ARMEB__
741 mov r2, r2, lsl #8 /* r2 = 012. */
742 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
743#else
744 mov r2, r2, lsr #8 /* r2 = .210 */
745 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
746#endif
747 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
748 str r2, [r0]
749 strh r3, [r0, #0x04]
750 bx lr
751 LMEMCPY_6_PAD
752
753/*
754 * 0010: dst is 32-bit aligned, src is 16-bit aligned
755 */
756 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
757 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
758#ifdef __ARMEB__
759 mov r1, r3, lsr #16 /* r1 = ..23 */
760 orr r1, r1, r2, lsl #16 /* r1 = 0123 */
761 str r1, [r0]
762 strh r3, [r0, #0x04]
763#else
764 mov r1, r3, lsr #16 /* r1 = ..54 */
765 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
766 str r2, [r0]
767 strh r1, [r0, #0x04]
768#endif
769 bx lr
770 LMEMCPY_6_PAD
771
772/*
773 * 0011: dst is 32-bit aligned, src is 8-bit aligned
774 */
775 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
776 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
777 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
778#ifdef __ARMEB__
779 mov r2, r2, lsl #24 /* r2 = 0... */
780 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
781 mov r3, r3, lsl #8 /* r3 = 234. */
782 orr r1, r3, r1, lsr #24 /* r1 = 2345 */
783#else
784 mov r2, r2, lsr #24 /* r2 = ...0 */
785 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
786 mov r1, r1, lsl #8 /* r1 = xx5. */
787 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
788#endif
789 str r2, [r0]
790 strh r1, [r0, #0x04]
791 bx lr
792 LMEMCPY_6_PAD
793
794/*
795 * 0100: dst is 8-bit aligned, src is 32-bit aligned
796 */
797 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
798 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
799 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
800 strh r1, [r0, #0x01]
801#ifdef __ARMEB__
802 mov r1, r3, lsr #24 /* r1 = ...0 */
803 strb r1, [r0]
804 mov r3, r3, lsl #8 /* r3 = 123. */
805 orr r3, r3, r2, lsr #8 /* r3 = 1234 */
806#else
807 strb r3, [r0]
808 mov r3, r3, lsr #24 /* r3 = ...3 */
809 orr r3, r3, r2, lsl #8 /* r3 = .543 */
810 mov r2, r2, lsr #8 /* r2 = ...5 */
811#endif
812 strh r3, [r0, #0x03]
813 strb r2, [r0, #0x05]
814 bx lr
815 LMEMCPY_6_PAD
816
817/*
818 * 0101: dst is 8-bit aligned, src is 8-bit aligned
819 */
820 ldrb r2, [r1]
821 ldrh r3, [r1, #0x01]
822 ldrh ip, [r1, #0x03]
823 ldrb r1, [r1, #0x05]
824 strb r2, [r0]
825 strh r3, [r0, #0x01]
826 strh ip, [r0, #0x03]
827 strb r1, [r0, #0x05]
828 bx lr
829 LMEMCPY_6_PAD
830
831/*
832 * 0110: dst is 8-bit aligned, src is 16-bit aligned
833 */
834 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
835 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
836#ifdef __ARMEB__
837 mov r3, r2, lsr #8 /* r3 = ...0 */
838 strb r3, [r0]
839 strb r1, [r0, #0x05]
840 mov r3, r1, lsr #8 /* r3 = .234 */
841 strh r3, [r0, #0x03]
842 mov r3, r2, lsl #8 /* r3 = .01. */
843 orr r3, r3, r1, lsr #24 /* r3 = .012 */
844 strh r3, [r0, #0x01]
845#else
846 strb r2, [r0]
847 mov r3, r1, lsr #24
848 strb r3, [r0, #0x05]
849 mov r3, r1, lsr #8 /* r3 = .543 */
850 strh r3, [r0, #0x03]
851 mov r3, r2, lsr #8 /* r3 = ...1 */
852 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
853 strh r3, [r0, #0x01]
854#endif
855 bx lr
856 LMEMCPY_6_PAD
857
858/*
859 * 0111: dst is 8-bit aligned, src is 8-bit aligned
860 */
861 ldrb r2, [r1]
862 ldrh r3, [r1, #0x01]
863 ldrh ip, [r1, #0x03]
864 ldrb r1, [r1, #0x05]
865 strb r2, [r0]
866 strh r3, [r0, #0x01]
867 strh ip, [r0, #0x03]
868 strb r1, [r0, #0x05]
869 bx lr
870 LMEMCPY_6_PAD
871
872/*
873 * 1000: dst is 16-bit aligned, src is 32-bit aligned
874 */
875#ifdef __ARMEB__
876 ldr r2, [r1] /* r2 = 0123 */
877 ldrh r3, [r1, #0x04] /* r3 = ..45 */
878 mov r1, r2, lsr #16 /* r1 = ..01 */
879 orr r3, r3, r2, lsl#16 /* r3 = 2345 */
880 strh r1, [r0]
881 str r3, [r0, #0x02]
882#else
883 ldrh r2, [r1, #0x04] /* r2 = ..54 */
884 ldr r3, [r1] /* r3 = 3210 */
885 mov r2, r2, lsl #16 /* r2 = 54.. */
886 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
887 strh r3, [r0]
888 str r2, [r0, #0x02]
889#endif
890 bx lr
891 LMEMCPY_6_PAD
892
893/*
894 * 1001: dst is 16-bit aligned, src is 8-bit aligned
895 */
896 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
897 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
898 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
899#ifdef __ARMEB__
900 mov r2, r2, lsr #8 /* r2 = .345 */
901 orr r2, r2, r3, lsl #24 /* r2 = 2345 */
902#else
903 mov r2, r2, lsl #8 /* r2 = 543. */
904 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
905#endif
906 strh r1, [r0]
907 str r2, [r0, #0x02]
908 bx lr
909 LMEMCPY_6_PAD
910
911/*
912 * 1010: dst is 16-bit aligned, src is 16-bit aligned
913 */
914 ldrh r2, [r1]
915 ldr r3, [r1, #0x02]
916 strh r2, [r0]
917 str r3, [r0, #0x02]
918 bx lr
919 LMEMCPY_6_PAD
920
921/*
922 * 1011: dst is 16-bit aligned, src is 8-bit aligned
923 */
924 ldrb r3, [r1] /* r3 = ...0 */
925 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
926 ldrb r1, [r1, #0x05] /* r1 = ...5 */
927#ifdef __ARMEB__
928 mov r3, r3, lsl #8 /* r3 = ..0. */
929 orr r3, r3, r2, lsr #24 /* r3 = ..01 */
930 orr r1, r1, r2, lsl #8 /* r1 = 2345 */
931#else
932 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
933 mov r1, r1, lsl #24 /* r1 = 5... */
934 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
935#endif
936 strh r3, [r0]
937 str r1, [r0, #0x02]
938 bx lr
939 LMEMCPY_6_PAD
940
941/*
942 * 1100: dst is 8-bit aligned, src is 32-bit aligned
943 */
944 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
945 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
946#ifdef __ARMEB__
947 mov r3, r2, lsr #24 /* r3 = ...0 */
948 strb r3, [r0]
949 mov r2, r2, lsl #8 /* r2 = 123. */
950 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
951#else
952 strb r2, [r0]
953 mov r2, r2, lsr #8 /* r2 = .321 */
954 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
955 mov r1, r1, lsr #8 /* r1 = ...5 */
956#endif
957 str r2, [r0, #0x01]
958 strb r1, [r0, #0x05]
959 bx lr
960 LMEMCPY_6_PAD
961
962/*
963 * 1101: dst is 8-bit aligned, src is 8-bit aligned
964 */
965 ldrb r2, [r1]
966 ldrh r3, [r1, #0x01]
967 ldrh ip, [r1, #0x03]
968 ldrb r1, [r1, #0x05]
969 strb r2, [r0]
970 strh r3, [r0, #0x01]
971 strh ip, [r0, #0x03]
972 strb r1, [r0, #0x05]
973 bx lr
974 LMEMCPY_6_PAD
975
976/*
977 * 1110: dst is 8-bit aligned, src is 16-bit aligned
978 */
979 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
980 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
981#ifdef __ARMEB__
982 mov r3, r2, lsr #8 /* r3 = ...0 */
983 strb r3, [r0]
984 mov r2, r2, lsl #24 /* r2 = 1... */
985 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
986#else
987 strb r2, [r0]
988 mov r2, r2, lsr #8 /* r2 = ...1 */
989 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
990 mov r1, r1, lsr #24 /* r1 = ...5 */
991#endif
992 str r2, [r0, #0x01]
993 strb r1, [r0, #0x05]
994 bx lr
995 LMEMCPY_6_PAD
996
997/*
998 * 1111: dst is 8-bit aligned, src is 8-bit aligned
999 */
1000 ldrb r2, [r1]
1001 ldr r3, [r1, #0x01]
1002 ldrb r1, [r1, #0x05]
1003 strb r2, [r0]
1004 str r3, [r0, #0x01]
1005 strb r1, [r0, #0x05]
1006 bx lr
1007 LMEMCPY_6_PAD
1008
1009
1010/******************************************************************************
1011 * Special case for 8 byte copies
1012 */
1013#define LMEMCPY_8_LOG2 6 /* 64 bytes */
1014#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
1015 LMEMCPY_8_PAD
1016.Lmemcpy_8:
1017 and r2, r1, #0x03
1018 orr r2, r2, r0, lsl #2
1019 ands r2, r2, #0x0f
1020 sub r3, pc, #0x14
1021 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
1022
1023/*
1024 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1025 */
1026 ldr r2, [r1]
1027 ldr r3, [r1, #0x04]
1028 str r2, [r0]
1029 str r3, [r0, #0x04]
1030 bx lr
1031 LMEMCPY_8_PAD
1032
1033/*
1034 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1035 */
1036 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1037 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
1038 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1039#ifdef __ARMEB__
1040 mov r3, r3, lsl #8 /* r3 = 012. */
1041 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
1042 orr r2, r1, r2, lsl #8 /* r2 = 4567 */
1043#else
1044 mov r3, r3, lsr #8 /* r3 = .210 */
1045 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1046 mov r1, r1, lsl #24 /* r1 = 7... */
1047 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
1048#endif
1049 str r3, [r0]
1050 str r2, [r0, #0x04]
1051 bx lr
1052 LMEMCPY_8_PAD
1053
1054/*
1055 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1056 */
1057 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1058 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1059 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1060#ifdef __ARMEB__
1061 mov r2, r2, lsl #16 /* r2 = 01.. */
1062 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
1063 orr r3, r1, r3, lsl #16 /* r3 = 4567 */
1064#else
1065 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1066 mov r3, r3, lsr #16 /* r3 = ..54 */
1067 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
1068#endif
1069 str r2, [r0]
1070 str r3, [r0, #0x04]
1071 bx lr
1072 LMEMCPY_8_PAD
1073
1074/*
1075 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1076 */
1077 ldrb r3, [r1] /* r3 = ...0 */
1078 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1079 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
1080#ifdef __ARMEB__
1081 mov r3, r3, lsl #24 /* r3 = 0... */
1082 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
1083 mov r2, r2, lsl #24 /* r2 = 4... */
1084 orr r2, r2, r1, lsr #8 /* r2 = 4567 */
1085#else
1086 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1087 mov r2, r2, lsr #24 /* r2 = ...4 */
1088 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
1089#endif
1090 str r3, [r0]
1091 str r2, [r0, #0x04]
1092 bx lr
1093 LMEMCPY_8_PAD
1094
1095/*
1096 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1097 */
1098 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1099 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
1100#ifdef __ARMEB__
1101 mov r1, r3, lsr #24 /* r1 = ...0 */
1102 strb r1, [r0]
1103 mov r1, r3, lsr #8 /* r1 = .012 */
1104 strb r2, [r0, #0x07]
1105 mov r3, r3, lsl #24 /* r3 = 3... */
1106 orr r3, r3, r2, lsr #8 /* r3 = 3456 */
1107#else
1108 strb r3, [r0]
1109 mov r1, r2, lsr #24 /* r1 = ...7 */
1110 strb r1, [r0, #0x07]
1111 mov r1, r3, lsr #8 /* r1 = .321 */
1112 mov r3, r3, lsr #24 /* r3 = ...3 */
1113 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
1114#endif
1115 strh r1, [r0, #0x01]
1116 str r3, [r0, #0x03]
1117 bx lr
1118 LMEMCPY_8_PAD
1119
1120/*
1121 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1122 */
1123 ldrb r2, [r1]
1124 ldrh r3, [r1, #0x01]
1125 ldr ip, [r1, #0x03]
1126 ldrb r1, [r1, #0x07]
1127 strb r2, [r0]
1128 strh r3, [r0, #0x01]
1129 str ip, [r0, #0x03]
1130 strb r1, [r0, #0x07]
1131 bx lr
1132 LMEMCPY_8_PAD
1133
1134/*
1135 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1136 */
1137 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1138 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1139 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1140#ifdef __ARMEB__
1141 mov ip, r2, lsr #8 /* ip = ...0 */
1142 strb ip, [r0]
1143 mov ip, r2, lsl #8 /* ip = .01. */
1144 orr ip, ip, r3, lsr #24 /* ip = .012 */
1145 strb r1, [r0, #0x07]
1146 mov r3, r3, lsl #8 /* r3 = 345. */
1147 orr r3, r3, r1, lsr #8 /* r3 = 3456 */
1148#else
1149 strb r2, [r0] /* 0 */
1150 mov ip, r1, lsr #8 /* ip = ...7 */
1151 strb ip, [r0, #0x07] /* 7 */
1152 mov ip, r2, lsr #8 /* ip = ...1 */
1153 orr ip, ip, r3, lsl #8 /* ip = 4321 */
1154 mov r3, r3, lsr #8 /* r3 = .543 */
1155 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
1156#endif
1157 strh ip, [r0, #0x01]
1158 str r3, [r0, #0x03]
1159 bx lr
1160 LMEMCPY_8_PAD
1161
1162/*
1163 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1164 */
1165 ldrb r3, [r1] /* r3 = ...0 */
1166 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
1167 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
1168 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1169 strb r3, [r0]
1170 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
1171#ifdef __ARMEB__
1172 strh r3, [r0, #0x01]
1173 orr r2, r2, ip, lsl #16 /* r2 = 3456 */
1174#else
1175 strh ip, [r0, #0x01]
1176 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
1177#endif
1178 str r2, [r0, #0x03]
1179 strb r1, [r0, #0x07]
1180 bx lr
1181 LMEMCPY_8_PAD
1182
1183/*
1184 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1185 */
1186 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1187 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1188 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
1189#ifdef __ARMEB__
1190 strh r1, [r0]
1191 mov r1, r3, lsr #16 /* r1 = ..45 */
1192 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */
1193#else
1194 strh r2, [r0]
1195 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
1196 mov r3, r3, lsr #16 /* r3 = ..76 */
1197#endif
1198 str r2, [r0, #0x02]
1199 strh r3, [r0, #0x06]
1200 bx lr
1201 LMEMCPY_8_PAD
1202
1203/*
1204 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1205 */
1206 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1207 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1208 ldrb ip, [r1, #0x07] /* ip = ...7 */
1209 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1210 strh r1, [r0]
1211#ifdef __ARMEB__
1212 mov r1, r2, lsl #24 /* r1 = 2... */
1213 orr r1, r1, r3, lsr #8 /* r1 = 2345 */
1214 orr r3, ip, r3, lsl #8 /* r3 = 4567 */
1215#else
1216 mov r1, r2, lsr #24 /* r1 = ...2 */
1217 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
1218 mov r3, r3, lsr #24 /* r3 = ...6 */
1219 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
1220#endif
1221 str r1, [r0, #0x02]
1222 strh r3, [r0, #0x06]
1223 bx lr
1224 LMEMCPY_8_PAD
1225
1226/*
1227 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1228 */
1229 ldrh r2, [r1]
1230 ldr ip, [r1, #0x02]
1231 ldrh r3, [r1, #0x06]
1232 strh r2, [r0]
1233 str ip, [r0, #0x02]
1234 strh r3, [r0, #0x06]
1235 bx lr
1236 LMEMCPY_8_PAD
1237
1238/*
1239 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1240 */
1241 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
1242 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1243 ldrb ip, [r1] /* ip = ...0 */
1244 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
1245 strh r1, [r0, #0x06]
1246#ifdef __ARMEB__
1247 mov r3, r3, lsr #24 /* r3 = ...5 */
1248 orr r3, r3, r2, lsl #8 /* r3 = 2345 */
1249 mov r2, r2, lsr #24 /* r2 = ...1 */
1250 orr r2, r2, ip, lsl #8 /* r2 = ..01 */
1251#else
1252 mov r3, r3, lsl #24 /* r3 = 5... */
1253 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
1254 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
1255#endif
1256 str r3, [r0, #0x02]
1257 strh r2, [r0]
1258 bx lr
1259 LMEMCPY_8_PAD
1260
1261/*
1262 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1263 */
1264 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1265 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1266 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
1267 strh r1, [r0, #0x05]
1268#ifdef __ARMEB__
1269 strb r3, [r0, #0x07]
1270 mov r1, r2, lsr #24 /* r1 = ...0 */
1271 strb r1, [r0]
1272 mov r2, r2, lsl #8 /* r2 = 123. */
1273 orr r2, r2, r3, lsr #24 /* r2 = 1234 */
1274 str r2, [r0, #0x01]
1275#else
1276 strb r2, [r0]
1277 mov r1, r3, lsr #24 /* r1 = ...7 */
1278 strb r1, [r0, #0x07]
1279 mov r2, r2, lsr #8 /* r2 = .321 */
1280 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
1281 str r2, [r0, #0x01]
1282#endif
1283 bx lr
1284 LMEMCPY_8_PAD
1285
1286/*
1287 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1288 */
1289 ldrb r3, [r1] /* r3 = ...0 */
1290 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
1291 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
1292 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1293 strb r3, [r0]
1294 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
1295#ifdef __ARMEB__
1296 strh ip, [r0, #0x05]
1297 orr r2, r3, r2, lsl #16 /* r2 = 1234 */
1298#else
1299 strh r3, [r0, #0x05]
1300 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
1301#endif
1302 str r2, [r0, #0x01]
1303 strb r1, [r0, #0x07]
1304 bx lr
1305 LMEMCPY_8_PAD
1306
1307/*
1308 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1309 */
1310 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1311 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1312 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1313#ifdef __ARMEB__
1314 mov ip, r2, lsr #8 /* ip = ...0 */
1315 strb ip, [r0]
1316 mov ip, r2, lsl #24 /* ip = 1... */
1317 orr ip, ip, r3, lsr #8 /* ip = 1234 */
1318 strb r1, [r0, #0x07]
1319 mov r1, r1, lsr #8 /* r1 = ...6 */
1320 orr r1, r1, r3, lsl #8 /* r1 = 3456 */
1321#else
1322 strb r2, [r0]
1323 mov ip, r2, lsr #8 /* ip = ...1 */
1324 orr ip, ip, r3, lsl #8 /* ip = 4321 */
1325 mov r2, r1, lsr #8 /* r2 = ...7 */
1326 strb r2, [r0, #0x07]
1327 mov r1, r1, lsl #8 /* r1 = .76. */
1328 orr r1, r1, r3, lsr #24 /* r1 = .765 */
1329#endif
1330 str ip, [r0, #0x01]
1331 strh r1, [r0, #0x05]
1332 bx lr
1333 LMEMCPY_8_PAD
1334
1335/*
1336 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1337 */
1338 ldrb r2, [r1]
1339 ldr ip, [r1, #0x01]
1340 ldrh r3, [r1, #0x05]
1341 ldrb r1, [r1, #0x07]
1342 strb r2, [r0]
1343 str ip, [r0, #0x01]
1344 strh r3, [r0, #0x05]
1345 strb r1, [r0, #0x07]
1346 bx lr
1347 LMEMCPY_8_PAD
1348
1349/******************************************************************************
1350 * Special case for 12 byte copies
1351 */
1352#define LMEMCPY_C_LOG2 7 /* 128 bytes */
1353#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
1354 LMEMCPY_C_PAD
1355.Lmemcpy_c:
1356 and r2, r1, #0x03
1357 orr r2, r2, r0, lsl #2
1358 ands r2, r2, #0x0f
1359 sub r3, pc, #0x14
1360 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
1361
1362/*
1363 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1364 */
1365 ldr r2, [r1]
1366 ldr r3, [r1, #0x04]
1367 ldr r1, [r1, #0x08]
1368 str r2, [r0]
1369 str r3, [r0, #0x04]
1370 str r1, [r0, #0x08]
1371 bx lr
1372 LMEMCPY_C_PAD
1373
1374/*
1375 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1376 */
1377 ldrb r2, [r1, #0xb] /* r2 = ...B */
1378 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
1379 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1380 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
1381#ifdef __ARMEB__
1382 orr r2, r2, ip, lsl #8 /* r2 = 89AB */
1383 str r2, [r0, #0x08]
1384 mov r2, ip, lsr #24 /* r2 = ...7 */
1385 orr r2, r2, r3, lsl #8 /* r2 = 4567 */
1386 mov r1, r1, lsl #8 /* r1 = 012. */
1387 orr r1, r1, r3, lsr #24 /* r1 = 0123 */
1388#else
1389 mov r2, r2, lsl #24 /* r2 = B... */
1390 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
1391 str r2, [r0, #0x08]
1392 mov r2, ip, lsl #24 /* r2 = 7... */
1393 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
1394 mov r1, r1, lsr #8 /* r1 = .210 */
1395 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
1396#endif
1397 str r2, [r0, #0x04]
1398 str r1, [r0]
1399 bx lr
1400 LMEMCPY_C_PAD
1401
1402/*
1403 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1404 */
1405 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1406 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1407 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
1408 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
1409#ifdef __ARMEB__
1410 mov r2, r2, lsl #16 /* r2 = 01.. */
1411 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
1412 str r2, [r0]
1413 mov r3, r3, lsl #16 /* r3 = 45.. */
1414 orr r3, r3, ip, lsr #16 /* r3 = 4567 */
1415 orr r1, r1, ip, lsl #16 /* r1 = 89AB */
1416#else
1417 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1418 str r2, [r0]
1419 mov r3, r3, lsr #16 /* r3 = ..54 */
1420 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
1421 mov r1, r1, lsl #16 /* r1 = BA.. */
1422 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
1423#endif
1424 str r3, [r0, #0x04]
1425 str r1, [r0, #0x08]
1426 bx lr
1427 LMEMCPY_C_PAD
1428
1429/*
1430 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1431 */
1432 ldrb r2, [r1] /* r2 = ...0 */
1433 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
1434 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
1435 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
1436#ifdef __ARMEB__
1437 mov r2, r2, lsl #24 /* r2 = 0... */
1438 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
1439 str r2, [r0]
1440 mov r3, r3, lsl #24 /* r3 = 4... */
1441 orr r3, r3, ip, lsr #8 /* r3 = 4567 */
1442 mov r1, r1, lsr #8 /* r1 = .9AB */
1443 orr r1, r1, ip, lsl #24 /* r1 = 89AB */
1444#else
1445 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1446 str r2, [r0]
1447 mov r3, r3, lsr #24 /* r3 = ...4 */
1448 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
1449 mov r1, r1, lsl #8 /* r1 = BA9. */
1450 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
1451#endif
1452 str r3, [r0, #0x04]
1453 str r1, [r0, #0x08]
1454 bx lr
1455 LMEMCPY_C_PAD
1456
1457/*
1458 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
1459 */
1460 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1461 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1462 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
1463 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1464 strh r1, [r0, #0x01]
1465#ifdef __ARMEB__
1466 mov r1, r2, lsr #24 /* r1 = ...0 */
1467 strb r1, [r0]
1468 mov r1, r2, lsl #24 /* r1 = 3... */
1469 orr r2, r1, r3, lsr #8 /* r1 = 3456 */
1470 mov r1, r3, lsl #24 /* r1 = 7... */
1471 orr r1, r1, ip, lsr #8 /* r1 = 789A */
1472#else
1473 strb r2, [r0]
1474 mov r1, r2, lsr #24 /* r1 = ...3 */
1475 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
1476 mov r1, r3, lsr #24 /* r1 = ...7 */
1477 orr r1, r1, ip, lsl #8 /* r1 = A987 */
1478 mov ip, ip, lsr #24 /* ip = ...B */
1479#endif
1480 str r2, [r0, #0x03]
1481 str r1, [r0, #0x07]
1482 strb ip, [r0, #0x0b]
1483 bx lr
1484 LMEMCPY_C_PAD
1485
1486/*
1487 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
1488 */
1489 ldrb r2, [r1]
1490 ldrh r3, [r1, #0x01]
1491 ldr ip, [r1, #0x03]
1492 strb r2, [r0]
1493 ldr r2, [r1, #0x07]
1494 ldrb r1, [r1, #0x0b]
1495 strh r3, [r0, #0x01]
1496 str ip, [r0, #0x03]
1497 str r2, [r0, #0x07]
1498 strb r1, [r0, #0x0b]
1499 bx lr
1500 LMEMCPY_C_PAD
1501
1502/*
1503 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
1504 */
1505 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1506 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1507 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
1508 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
1509#ifdef __ARMEB__
1510 mov r2, r2, ror #8 /* r2 = 1..0 */
1511 strb r2, [r0]
1512 mov r2, r2, lsr #16 /* r2 = ..1. */
1513 orr r2, r2, r3, lsr #24 /* r2 = ..12 */
1514 strh r2, [r0, #0x01]
1515 mov r2, r3, lsl #8 /* r2 = 345. */
1516 orr r3, r2, ip, lsr #24 /* r3 = 3456 */
1517 mov r2, ip, lsl #8 /* r2 = 789. */
1518 orr r2, r2, r1, lsr #8 /* r2 = 789A */
1519#else
1520 strb r2, [r0]
1521 mov r2, r2, lsr #8 /* r2 = ...1 */
1522 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
1523 strh r2, [r0, #0x01]
1524 mov r2, r3, lsr #8 /* r2 = .543 */
1525 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
1526 mov r2, ip, lsr #8 /* r2 = .987 */
1527 orr r2, r2, r1, lsl #24 /* r2 = A987 */
1528 mov r1, r1, lsr #8 /* r1 = ...B */
1529#endif
1530 str r3, [r0, #0x03]
1531 str r2, [r0, #0x07]
1532 strb r1, [r0, #0x0b]
1533 bx lr
1534 LMEMCPY_C_PAD
1535
1536/*
1537 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
1538 */
1539 ldrb r2, [r1]
1540 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
1541 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
1542 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
1543 strb r2, [r0]
1544#ifdef __ARMEB__
1545 mov r2, r3, lsr #16 /* r2 = ..12 */
1546 strh r2, [r0, #0x01]
1547 mov r3, r3, lsl #16 /* r3 = 34.. */
1548 orr r3, r3, ip, lsr #16 /* r3 = 3456 */
1549 mov ip, ip, lsl #16 /* ip = 78.. */
1550 orr ip, ip, r1, lsr #16 /* ip = 789A */
1551 mov r1, r1, lsr #8 /* r1 = .9AB */
1552#else
1553 strh r3, [r0, #0x01]
1554 mov r3, r3, lsr #16 /* r3 = ..43 */
1555 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
1556 mov ip, ip, lsr #16 /* ip = ..87 */
1557 orr ip, ip, r1, lsl #16 /* ip = A987 */
1558 mov r1, r1, lsr #16 /* r1 = ..xB */
1559#endif
1560 str r3, [r0, #0x03]
1561 str ip, [r0, #0x07]
1562 strb r1, [r0, #0x0b]
1563 bx lr
1564 LMEMCPY_C_PAD
1565
1566/*
1567 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1568 */
1569 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
1570 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1571 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
1572 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
1573#ifdef __ARMEB__
1574 strh r1, [r0]
1575 mov r1, ip, lsl #16 /* r1 = 23.. */
1576 orr r1, r1, r3, lsr #16 /* r1 = 2345 */
1577 mov r3, r3, lsl #16 /* r3 = 67.. */
1578 orr r3, r3, r2, lsr #16 /* r3 = 6789 */
1579#else
1580 strh ip, [r0]
1581 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
1582 mov r3, r3, lsr #16 /* r3 = ..76 */
1583 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
1584 mov r2, r2, lsr #16 /* r2 = ..BA */
1585#endif
1586 str r1, [r0, #0x02]
1587 str r3, [r0, #0x06]
1588 strh r2, [r0, #0x0a]
1589 bx lr
1590 LMEMCPY_C_PAD
1591
1592/*
1593 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
1594 */
1595 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1596 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1597 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
1598 strh ip, [r0]
1599 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
1600 ldrb r1, [r1, #0x0b] /* r1 = ...B */
1601#ifdef __ARMEB__
1602 mov r2, r2, lsl #24 /* r2 = 2... */
1603 orr r2, r2, r3, lsr #8 /* r2 = 2345 */
1604 mov r3, r3, lsl #24 /* r3 = 6... */
1605 orr r3, r3, ip, lsr #8 /* r3 = 6789 */
1606 orr r1, r1, ip, lsl #8 /* r1 = 89AB */
1607#else
1608 mov r2, r2, lsr #24 /* r2 = ...2 */
1609 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
1610 mov r3, r3, lsr #24 /* r3 = ...6 */
1611 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
1612 mov r1, r1, lsl #8 /* r1 = ..B. */
1613 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
1614#endif
1615 str r2, [r0, #0x02]
1616 str r3, [r0, #0x06]
1617 strh r1, [r0, #0x0a]
1618 bx lr
1619 LMEMCPY_C_PAD
1620
1621/*
1622 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1623 */
1624 ldrh r2, [r1]
1625 ldr r3, [r1, #0x02]
1626 ldr ip, [r1, #0x06]
1627 ldrh r1, [r1, #0x0a]
1628 strh r2, [r0]
1629 str r3, [r0, #0x02]
1630 str ip, [r0, #0x06]
1631 strh r1, [r0, #0x0a]
1632 bx lr
1633 LMEMCPY_C_PAD
1634
1635/*
1636 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
1637 */
1638 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
1639 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
1640 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
1641 strh ip, [r0, #0x0a]
1642 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
1643 ldrb r1, [r1] /* r1 = ...0 */
1644#ifdef __ARMEB__
1645 mov r2, r2, lsr #24 /* r2 = ...9 */
1646 orr r2, r2, r3, lsl #8 /* r2 = 6789 */
1647 mov r3, r3, lsr #24 /* r3 = ...5 */
1648 orr r3, r3, ip, lsl #8 /* r3 = 2345 */
1649 mov r1, r1, lsl #8 /* r1 = ..0. */
1650 orr r1, r1, ip, lsr #24 /* r1 = ..01 */
1651#else
1652 mov r2, r2, lsl #24 /* r2 = 9... */
1653 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
1654 mov r3, r3, lsl #24 /* r3 = 5... */
1655 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
1656 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
1657#endif
1658 str r2, [r0, #0x06]
1659 str r3, [r0, #0x02]
1660 strh r1, [r0]
1661 bx lr
1662 LMEMCPY_C_PAD
1663
1664/*
1665 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
1666 */
1667 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1668 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
1669 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
1670#ifdef __ARMEB__
1671 mov r3, r2, lsr #24 /* r3 = ...0 */
1672 strb r3, [r0]
1673 mov r2, r2, lsl #8 /* r2 = 123. */
1674 orr r2, r2, ip, lsr #24 /* r2 = 1234 */
1675 str r2, [r0, #0x01]
1676 mov r2, ip, lsl #8 /* r2 = 567. */
1677 orr r2, r2, r1, lsr #24 /* r2 = 5678 */
1678 str r2, [r0, #0x05]
1679 mov r2, r1, lsr #8 /* r2 = ..9A */
1680 strh r2, [r0, #0x09]
1681 strb r1, [r0, #0x0b]
1682#else
1683 strb r2, [r0]
1684 mov r3, r2, lsr #8 /* r3 = .321 */
1685 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
1686 str r3, [r0, #0x01]
1687 mov r3, ip, lsr #8 /* r3 = .765 */
1688 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
1689 str r3, [r0, #0x05]
1690 mov r1, r1, lsr #8 /* r1 = .BA9 */
1691 strh r1, [r0, #0x09]
1692 mov r1, r1, lsr #16 /* r1 = ...B */
1693 strb r1, [r0, #0x0b]
1694#endif
1695 bx lr
1696 LMEMCPY_C_PAD
1697
1698/*
1699 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
1700 */
1701 ldrb r2, [r1, #0x0b] /* r2 = ...B */
1702 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
1703 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
1704 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
1705 strb r2, [r0, #0x0b]
1706#ifdef __ARMEB__
1707 strh r3, [r0, #0x09]
1708 mov r3, r3, lsr #16 /* r3 = ..78 */
1709 orr r3, r3, ip, lsl #16 /* r3 = 5678 */
1710 mov ip, ip, lsr #16 /* ip = ..34 */
1711 orr ip, ip, r1, lsl #16 /* ip = 1234 */
1712 mov r1, r1, lsr #16 /* r1 = ..x0 */
1713#else
1714 mov r2, r3, lsr #16 /* r2 = ..A9 */
1715 strh r2, [r0, #0x09]
1716 mov r3, r3, lsl #16 /* r3 = 87.. */
1717 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
1718 mov ip, ip, lsl #16 /* ip = 43.. */
1719 orr ip, ip, r1, lsr #16 /* ip = 4321 */
1720 mov r1, r1, lsr #8 /* r1 = .210 */
1721#endif
1722 str r3, [r0, #0x05]
1723 str ip, [r0, #0x01]
1724 strb r1, [r0]
1725 bx lr
1726 LMEMCPY_C_PAD
1727
1728/*
1729 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
1730 */
1731#ifdef __ARMEB__
1732 ldrh r2, [r1, #0x0a] /* r2 = ..AB */
1733 ldr ip, [r1, #0x06] /* ip = 6789 */
1734 ldr r3, [r1, #0x02] /* r3 = 2345 */
1735 ldrh r1, [r1] /* r1 = ..01 */
1736 strb r2, [r0, #0x0b]
1737 mov r2, r2, lsr #8 /* r2 = ...A */
1738 orr r2, r2, ip, lsl #8 /* r2 = 789A */
1739 mov ip, ip, lsr #8 /* ip = .678 */
1740 orr ip, ip, r3, lsl #24 /* ip = 5678 */
1741 mov r3, r3, lsr #8 /* r3 = .234 */
1742 orr r3, r3, r1, lsl #24 /* r3 = 1234 */
1743 mov r1, r1, lsr #8 /* r1 = ...0 */
1744 strb r1, [r0]
1745 str r3, [r0, #0x01]
1746 str ip, [r0, #0x05]
1747 strh r2, [r0, #0x09]
1748#else
1749 ldrh r2, [r1] /* r2 = ..10 */
1750 ldr r3, [r1, #0x02] /* r3 = 5432 */
1751 ldr ip, [r1, #0x06] /* ip = 9876 */
1752 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
1753 strb r2, [r0]
1754 mov r2, r2, lsr #8 /* r2 = ...1 */
1755 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
1756 mov r3, r3, lsr #24 /* r3 = ...5 */
1757 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
1758 mov ip, ip, lsr #24 /* ip = ...9 */
1759 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
1760 mov r1, r1, lsr #8 /* r1 = ...B */
1761 str r2, [r0, #0x01]
1762 str r3, [r0, #0x05]
1763 strh ip, [r0, #0x09]
1764 strb r1, [r0, #0x0b]
1765#endif
1766 bx lr
1767 LMEMCPY_C_PAD
1768
1769/*
1770 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
1771 */
1772 ldrb r2, [r1]
1773 ldr r3, [r1, #0x01]
1774 ldr ip, [r1, #0x05]
1775 strb r2, [r0]
1776 ldrh r2, [r1, #0x09]
1777 ldrb r1, [r1, #0x0b]
1778 str r3, [r0, #0x01]
1779 str ip, [r0, #0x05]
1780 strh r2, [r0, #0x09]
1781 strb r1, [r0, #0x0b]
1782 bx lr
1783#endif /* !_STANDALONE */
40
41/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
42ENTRY(memcpy)
43 pld [r1]
44 cmp r2, #0x0c
45 ble .Lmemcpy_short /* <= 12 bytes */
46 mov r3, r0 /* We must not clobber r0 */
47
48 /* Word-align the destination buffer */
49 ands ip, r3, #0x03 /* Already word aligned? */
50 beq .Lmemcpy_wordaligned /* Yup */
51 cmp ip, #0x02
52 ldrb ip, [r1], #0x01
53 sub r2, r2, #0x01
54 strb ip, [r3], #0x01
55 ldrleb ip, [r1], #0x01
56 suble r2, r2, #0x01
57 strleb ip, [r3], #0x01
58 ldrltb ip, [r1], #0x01
59 sublt r2, r2, #0x01
60 strltb ip, [r3], #0x01
61
62 /* Destination buffer is now word aligned */
63.Lmemcpy_wordaligned:
64 ands ip, r1, #0x03 /* Is src also word-aligned? */
65 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
66
67 /* Quad-align the destination buffer */
68 tst r3, #0x07 /* Already quad aligned? */
69 ldrne ip, [r1], #0x04
70 stmfd sp!, {r4-r9} /* Free up some registers */
71 subne r2, r2, #0x04
72 strne ip, [r3], #0x04
73
74 /* Destination buffer quad aligned, source is at least word aligned */
75 subs r2, r2, #0x80
76 blt .Lmemcpy_w_lessthan128
77
78 /* Copy 128 bytes at a time */
79.Lmemcpy_w_loop128:
80 ldr r4, [r1], #0x04 /* LD:00-03 */
81 ldr r5, [r1], #0x04 /* LD:04-07 */
82 pld [r1, #0x18] /* Prefetch 0x20 */
83 ldr r6, [r1], #0x04 /* LD:08-0b */
84 ldr r7, [r1], #0x04 /* LD:0c-0f */
85 ldr r8, [r1], #0x04 /* LD:10-13 */
86 ldr r9, [r1], #0x04 /* LD:14-17 */
87 strd r4, [r3], #0x08 /* ST:00-07 */
88 ldr r4, [r1], #0x04 /* LD:18-1b */
89 ldr r5, [r1], #0x04 /* LD:1c-1f */
90 strd r6, [r3], #0x08 /* ST:08-0f */
91 ldr r6, [r1], #0x04 /* LD:20-23 */
92 ldr r7, [r1], #0x04 /* LD:24-27 */
93 pld [r1, #0x18] /* Prefetch 0x40 */
94 strd r8, [r3], #0x08 /* ST:10-17 */
95 ldr r8, [r1], #0x04 /* LD:28-2b */
96 ldr r9, [r1], #0x04 /* LD:2c-2f */
97 strd r4, [r3], #0x08 /* ST:18-1f */
98 ldr r4, [r1], #0x04 /* LD:30-33 */
99 ldr r5, [r1], #0x04 /* LD:34-37 */
100 strd r6, [r3], #0x08 /* ST:20-27 */
101 ldr r6, [r1], #0x04 /* LD:38-3b */
102 ldr r7, [r1], #0x04 /* LD:3c-3f */
103 strd r8, [r3], #0x08 /* ST:28-2f */
104 ldr r8, [r1], #0x04 /* LD:40-43 */
105 ldr r9, [r1], #0x04 /* LD:44-47 */
106 pld [r1, #0x18] /* Prefetch 0x60 */
107 strd r4, [r3], #0x08 /* ST:30-37 */
108 ldr r4, [r1], #0x04 /* LD:48-4b */
109 ldr r5, [r1], #0x04 /* LD:4c-4f */
110 strd r6, [r3], #0x08 /* ST:38-3f */
111 ldr r6, [r1], #0x04 /* LD:50-53 */
112 ldr r7, [r1], #0x04 /* LD:54-57 */
113 strd r8, [r3], #0x08 /* ST:40-47 */
114 ldr r8, [r1], #0x04 /* LD:58-5b */
115 ldr r9, [r1], #0x04 /* LD:5c-5f */
116 strd r4, [r3], #0x08 /* ST:48-4f */
117 ldr r4, [r1], #0x04 /* LD:60-63 */
118 ldr r5, [r1], #0x04 /* LD:64-67 */
119 pld [r1, #0x18] /* Prefetch 0x80 */
120 strd r6, [r3], #0x08 /* ST:50-57 */
121 ldr r6, [r1], #0x04 /* LD:68-6b */
122 ldr r7, [r1], #0x04 /* LD:6c-6f */
123 strd r8, [r3], #0x08 /* ST:58-5f */
124 ldr r8, [r1], #0x04 /* LD:70-73 */
125 ldr r9, [r1], #0x04 /* LD:74-77 */
126 strd r4, [r3], #0x08 /* ST:60-67 */
127 ldr r4, [r1], #0x04 /* LD:78-7b */
128 ldr r5, [r1], #0x04 /* LD:7c-7f */
129 strd r6, [r3], #0x08 /* ST:68-6f */
130 strd r8, [r3], #0x08 /* ST:70-77 */
131 subs r2, r2, #0x80
132 strd r4, [r3], #0x08 /* ST:78-7f */
133 bge .Lmemcpy_w_loop128
134
135.Lmemcpy_w_lessthan128:
136 adds r2, r2, #0x80 /* Adjust for extra sub */
137 ldmeqfd sp!, {r4-r9}
138 bxeq lr /* Return now if done */
139 subs r2, r2, #0x20
140 blt .Lmemcpy_w_lessthan32
141
142 /* Copy 32 bytes at a time */
143.Lmemcpy_w_loop32:
144 ldr r4, [r1], #0x04
145 ldr r5, [r1], #0x04
146 pld [r1, #0x18]
147 ldr r6, [r1], #0x04
148 ldr r7, [r1], #0x04
149 ldr r8, [r1], #0x04
150 ldr r9, [r1], #0x04
151 strd r4, [r3], #0x08
152 ldr r4, [r1], #0x04
153 ldr r5, [r1], #0x04
154 strd r6, [r3], #0x08
155 strd r8, [r3], #0x08
156 subs r2, r2, #0x20
157 strd r4, [r3], #0x08
158 bge .Lmemcpy_w_loop32
159
160.Lmemcpy_w_lessthan32:
161 adds r2, r2, #0x20 /* Adjust for extra sub */
162 ldmeqfd sp!, {r4-r9}
163 bxeq lr /* Return now if done */
164
165 and r4, r2, #0x18
166 rsbs r4, r4, #0x18
167 addne pc, pc, r4, lsl #1
168 nop
169
170 /* At least 24 bytes remaining */
171 ldr r4, [r1], #0x04
172 ldr r5, [r1], #0x04
173 sub r2, r2, #0x08
174 strd r4, [r3], #0x08
175
176 /* At least 16 bytes remaining */
177 ldr r4, [r1], #0x04
178 ldr r5, [r1], #0x04
179 sub r2, r2, #0x08
180 strd r4, [r3], #0x08
181
182 /* At least 8 bytes remaining */
183 ldr r4, [r1], #0x04
184 ldr r5, [r1], #0x04
185 subs r2, r2, #0x08
186 strd r4, [r3], #0x08
187
188 /* Less than 8 bytes remaining */
189 ldmfd sp!, {r4-r9}
190 bxeq lr /* Return now if done */
191 subs r2, r2, #0x04
192 ldrge ip, [r1], #0x04
193 strge ip, [r3], #0x04
194 bxeq lr /* Return now if done */
195 addlt r2, r2, #0x04
196 ldrb ip, [r1], #0x01
197 cmp r2, #0x02
198 ldrgeb r2, [r1], #0x01
199 strb ip, [r3], #0x01
200 ldrgtb ip, [r1]
201 strgeb r2, [r3], #0x01
202 strgtb ip, [r3]
203 bx lr
204
205
206/*
207 * At this point, it has not been possible to word align both buffers.
208 * The destination buffer is word aligned, but the source buffer is not.
209 */
210.Lmemcpy_bad_align:
211 stmfd sp!, {r4-r7}
212 bic r1, r1, #0x03
213 cmp ip, #2
214 ldr ip, [r1], #0x04
215 bgt .Lmemcpy_bad3
216 beq .Lmemcpy_bad2
217 b .Lmemcpy_bad1
218
219.Lmemcpy_bad1_loop16:
220#ifdef __ARMEB__
221 mov r4, ip, lsl #8
222#else
223 mov r4, ip, lsr #8
224#endif
225 ldr r5, [r1], #0x04
226 pld [r1, #0x018]
227 ldr r6, [r1], #0x04
228 ldr r7, [r1], #0x04
229 ldr ip, [r1], #0x04
230#ifdef __ARMEB__
231 orr r4, r4, r5, lsr #24
232 mov r5, r5, lsl #8
233 orr r5, r5, r6, lsr #24
234 mov r6, r6, lsl #8
235 orr r6, r6, r7, lsr #24
236 mov r7, r7, lsl #8
237 orr r7, r7, ip, lsr #24
238#else
239 orr r4, r4, r5, lsl #24
240 mov r5, r5, lsr #8
241 orr r5, r5, r6, lsl #24
242 mov r6, r6, lsr #8
243 orr r6, r6, r7, lsl #24
244 mov r7, r7, lsr #8
245 orr r7, r7, ip, lsl #24
246#endif
247 str r4, [r3], #0x04
248 str r5, [r3], #0x04
249 str r6, [r3], #0x04
250 str r7, [r3], #0x04
251.Lmemcpy_bad1:
252 subs r2, r2, #0x10
253 bge .Lmemcpy_bad1_loop16
254
255 adds r2, r2, #0x10
256 ldmeqfd sp!, {r4-r7}
257 bxeq lr /* Return now if done */
258 subs r2, r2, #0x04
259 sublt r1, r1, #0x03
260 blt .Lmemcpy_bad_done
261
262.Lmemcpy_bad1_loop4:
263#ifdef __ARMEB__
264 mov r4, ip, lsl #8
265#else
266 mov r4, ip, lsr #8
267#endif
268 ldr ip, [r1], #0x04
269 subs r2, r2, #0x04
270#ifdef __ARMEB__
271 orr r4, r4, ip, lsr #24
272#else
273 orr r4, r4, ip, lsl #24
274#endif
275 str r4, [r3], #0x04
276 bge .Lmemcpy_bad1_loop4
277 sub r1, r1, #0x03
278 b .Lmemcpy_bad_done
279
280.Lmemcpy_bad2_loop16:
281#ifdef __ARMEB__
282 mov r4, ip, lsl #16
283#else
284 mov r4, ip, lsr #16
285#endif
286 ldr r5, [r1], #0x04
287 pld [r1, #0x018]
288 ldr r6, [r1], #0x04
289 ldr r7, [r1], #0x04
290 ldr ip, [r1], #0x04
291#ifdef __ARMEB__
292 orr r4, r4, r5, lsr #16
293 mov r5, r5, lsl #16
294 orr r5, r5, r6, lsr #16
295 mov r6, r6, lsl #16
296 orr r6, r6, r7, lsr #16
297 mov r7, r7, lsl #16
298 orr r7, r7, ip, lsr #16
299#else
300 orr r4, r4, r5, lsl #16
301 mov r5, r5, lsr #16
302 orr r5, r5, r6, lsl #16
303 mov r6, r6, lsr #16
304 orr r6, r6, r7, lsl #16
305 mov r7, r7, lsr #16
306 orr r7, r7, ip, lsl #16
307#endif
308 str r4, [r3], #0x04
309 str r5, [r3], #0x04
310 str r6, [r3], #0x04
311 str r7, [r3], #0x04
312.Lmemcpy_bad2:
313 subs r2, r2, #0x10
314 bge .Lmemcpy_bad2_loop16
315
316 adds r2, r2, #0x10
317 ldmeqfd sp!, {r4-r7}
318 bxeq lr /* Return now if done */
319 subs r2, r2, #0x04
320 sublt r1, r1, #0x02
321 blt .Lmemcpy_bad_done
322
323.Lmemcpy_bad2_loop4:
324#ifdef __ARMEB__
325 mov r4, ip, lsl #16
326#else
327 mov r4, ip, lsr #16
328#endif
329 ldr ip, [r1], #0x04
330 subs r2, r2, #0x04
331#ifdef __ARMEB__
332 orr r4, r4, ip, lsr #16
333#else
334 orr r4, r4, ip, lsl #16
335#endif
336 str r4, [r3], #0x04
337 bge .Lmemcpy_bad2_loop4
338 sub r1, r1, #0x02
339 b .Lmemcpy_bad_done
340
341.Lmemcpy_bad3_loop16:
342#ifdef __ARMEB__
343 mov r4, ip, lsl #24
344#else
345 mov r4, ip, lsr #24
346#endif
347 ldr r5, [r1], #0x04
348 pld [r1, #0x018]
349 ldr r6, [r1], #0x04
350 ldr r7, [r1], #0x04
351 ldr ip, [r1], #0x04
352#ifdef __ARMEB__
353 orr r4, r4, r5, lsr #8
354 mov r5, r5, lsl #24
355 orr r5, r5, r6, lsr #8
356 mov r6, r6, lsl #24
357 orr r6, r6, r7, lsr #8
358 mov r7, r7, lsl #24
359 orr r7, r7, ip, lsr #8
360#else
361 orr r4, r4, r5, lsl #8
362 mov r5, r5, lsr #24
363 orr r5, r5, r6, lsl #8
364 mov r6, r6, lsr #24
365 orr r6, r6, r7, lsl #8
366 mov r7, r7, lsr #24
367 orr r7, r7, ip, lsl #8
368#endif
369 str r4, [r3], #0x04
370 str r5, [r3], #0x04
371 str r6, [r3], #0x04
372 str r7, [r3], #0x04
373.Lmemcpy_bad3:
374 subs r2, r2, #0x10
375 bge .Lmemcpy_bad3_loop16
376
377 adds r2, r2, #0x10
378 ldmeqfd sp!, {r4-r7}
379 bxeq lr /* Return now if done */
380 subs r2, r2, #0x04
381 sublt r1, r1, #0x01
382 blt .Lmemcpy_bad_done
383
384.Lmemcpy_bad3_loop4:
385#ifdef __ARMEB__
386 mov r4, ip, lsl #24
387#else
388 mov r4, ip, lsr #24
389#endif
390 ldr ip, [r1], #0x04
391 subs r2, r2, #0x04
392#ifdef __ARMEB__
393 orr r4, r4, ip, lsr #8
394#else
395 orr r4, r4, ip, lsl #8
396#endif
397 str r4, [r3], #0x04
398 bge .Lmemcpy_bad3_loop4
399 sub r1, r1, #0x01
400
401.Lmemcpy_bad_done:
402 ldmfd sp!, {r4-r7}
403 adds r2, r2, #0x04
404 bxeq lr
405 ldrb ip, [r1], #0x01
406 cmp r2, #0x02
407 ldrgeb r2, [r1], #0x01
408 strb ip, [r3], #0x01
409 ldrgtb ip, [r1]
410 strgeb r2, [r3], #0x01
411 strgtb ip, [r3]
412 bx lr
413
414
415/*
416 * Handle short copies (less than 16 bytes), possibly misaligned.
417 * Some of these are *very* common, thanks to the network stack,
418 * and so are handled specially.
419 */
420.Lmemcpy_short:
421#ifndef _STANDALONE
422 add pc, pc, r2, lsl #2
423 nop
424 bx lr /* 0x00 */
425 b .Lmemcpy_bytewise /* 0x01 */
426 b .Lmemcpy_bytewise /* 0x02 */
427 b .Lmemcpy_bytewise /* 0x03 */
428 b .Lmemcpy_4 /* 0x04 */
429 b .Lmemcpy_bytewise /* 0x05 */
430 b .Lmemcpy_6 /* 0x06 */
431 b .Lmemcpy_bytewise /* 0x07 */
432 b .Lmemcpy_8 /* 0x08 */
433 b .Lmemcpy_bytewise /* 0x09 */
434 b .Lmemcpy_bytewise /* 0x0a */
435 b .Lmemcpy_bytewise /* 0x0b */
436 b .Lmemcpy_c /* 0x0c */
437#endif
438.Lmemcpy_bytewise:
439 mov r3, r0 /* We must not clobber r0 */
440 ldrb ip, [r1], #0x01
4411: subs r2, r2, #0x01
442 strb ip, [r3], #0x01
443 ldrneb ip, [r1], #0x01
444 bne 1b
445 bx lr
446
447#ifndef _STANDALONE
448/******************************************************************************
449 * Special case for 4 byte copies
450 */
451#define LMEMCPY_4_LOG2 6 /* 64 bytes */
452#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
453 LMEMCPY_4_PAD
454.Lmemcpy_4:
455 and r2, r1, #0x03
456 orr r2, r2, r0, lsl #2
457 ands r2, r2, #0x0f
458 sub r3, pc, #0x14
459 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
460
461/*
462 * 0000: dst is 32-bit aligned, src is 32-bit aligned
463 */
464 ldr r2, [r1]
465 str r2, [r0]
466 bx lr
467 LMEMCPY_4_PAD
468
469/*
470 * 0001: dst is 32-bit aligned, src is 8-bit aligned
471 */
472 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
473 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
474#ifdef __ARMEB__
475 mov r3, r3, lsl #8 /* r3 = 012. */
476 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
477#else
478 mov r3, r3, lsr #8 /* r3 = .210 */
479 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
480#endif
481 str r3, [r0]
482 bx lr
483 LMEMCPY_4_PAD
484
485/*
486 * 0010: dst is 32-bit aligned, src is 16-bit aligned
487 */
488#ifdef __ARMEB__
489 ldrh r3, [r1]
490 ldrh r2, [r1, #0x02]
491#else
492 ldrh r3, [r1, #0x02]
493 ldrh r2, [r1]
494#endif
495 orr r3, r2, r3, lsl #16
496 str r3, [r0]
497 bx lr
498 LMEMCPY_4_PAD
499
500/*
501 * 0011: dst is 32-bit aligned, src is 8-bit aligned
502 */
503 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
504 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
505#ifdef __ARMEB__
506 mov r3, r3, lsl #24 /* r3 = 0... */
507 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
508#else
509 mov r3, r3, lsr #24 /* r3 = ...0 */
510 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
511#endif
512 str r3, [r0]
513 bx lr
514 LMEMCPY_4_PAD
515
516/*
517 * 0100: dst is 8-bit aligned, src is 32-bit aligned
518 */
519 ldr r2, [r1]
520#ifdef __ARMEB__
521 strb r2, [r0, #0x03]
522 mov r3, r2, lsr #8
523 mov r1, r2, lsr #24
524 strb r1, [r0]
525#else
526 strb r2, [r0]
527 mov r3, r2, lsr #8
528 mov r1, r2, lsr #24
529 strb r1, [r0, #0x03]
530#endif
531 strh r3, [r0, #0x01]
532 bx lr
533 LMEMCPY_4_PAD
534
535/*
536 * 0101: dst is 8-bit aligned, src is 8-bit aligned
537 */
538 ldrb r2, [r1]
539 ldrh r3, [r1, #0x01]
540 ldrb r1, [r1, #0x03]
541 strb r2, [r0]
542 strh r3, [r0, #0x01]
543 strb r1, [r0, #0x03]
544 bx lr
545 LMEMCPY_4_PAD
546
547/*
548 * 0110: dst is 8-bit aligned, src is 16-bit aligned
549 */
550 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
551 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
552#ifdef __ARMEB__
553 mov r1, r2, lsr #8 /* r1 = ...0 */
554 strb r1, [r0]
555 mov r2, r2, lsl #8 /* r2 = .01. */
556 orr r2, r2, r3, lsr #8 /* r2 = .012 */
557#else
558 strb r2, [r0]
559 mov r2, r2, lsr #8 /* r2 = ...1 */
560 orr r2, r2, r3, lsl #8 /* r2 = .321 */
561 mov r3, r3, lsr #8 /* r3 = ...3 */
562#endif
563 strh r2, [r0, #0x01]
564 strb r3, [r0, #0x03]
565 bx lr
566 LMEMCPY_4_PAD
567
568/*
569 * 0111: dst is 8-bit aligned, src is 8-bit aligned
570 */
571 ldrb r2, [r1]
572 ldrh r3, [r1, #0x01]
573 ldrb r1, [r1, #0x03]
574 strb r2, [r0]
575 strh r3, [r0, #0x01]
576 strb r1, [r0, #0x03]
577 bx lr
578 LMEMCPY_4_PAD
579
580/*
581 * 1000: dst is 16-bit aligned, src is 32-bit aligned
582 */
583 ldr r2, [r1]
584#ifdef __ARMEB__
585 strh r2, [r0, #0x02]
586 mov r3, r2, lsr #16
587 strh r3, [r0]
588#else
589 strh r2, [r0]
590 mov r3, r2, lsr #16
591 strh r3, [r0, #0x02]
592#endif
593 bx lr
594 LMEMCPY_4_PAD
595
596/*
597 * 1001: dst is 16-bit aligned, src is 8-bit aligned
598 */
599 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
600 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
601 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
602 strh r1, [r0]
603#ifdef __ARMEB__
604 mov r2, r2, lsl #8 /* r2 = 012. */
605 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
606#else
607 mov r2, r2, lsr #24 /* r2 = ...2 */
608 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
609#endif
610 strh r2, [r0, #0x02]
611 bx lr
612 LMEMCPY_4_PAD
613
614/*
615 * 1010: dst is 16-bit aligned, src is 16-bit aligned
616 */
617 ldrh r2, [r1]
618 ldrh r3, [r1, #0x02]
619 strh r2, [r0]
620 strh r3, [r0, #0x02]
621 bx lr
622 LMEMCPY_4_PAD
623
624/*
625 * 1011: dst is 16-bit aligned, src is 8-bit aligned
626 */
627 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
628 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
629 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
630 strh r1, [r0, #0x02]
631#ifdef __ARMEB__
632 mov r3, r3, lsr #24 /* r3 = ...1 */
633 orr r3, r3, r2, lsl #8 /* r3 = xx01 */
634#else
635 mov r3, r3, lsl #8 /* r3 = 321. */
636 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
637#endif
638 strh r3, [r0]
639 bx lr
640 LMEMCPY_4_PAD
641
642/*
643 * 1100: dst is 8-bit aligned, src is 32-bit aligned
644 */
645 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
646#ifdef __ARMEB__
647 strb r2, [r0, #0x03]
648 mov r3, r2, lsr #8
649 mov r1, r2, lsr #24
650 strh r3, [r0, #0x01]
651 strb r1, [r0]
652#else
653 strb r2, [r0]
654 mov r3, r2, lsr #8
655 mov r1, r2, lsr #24
656 strh r3, [r0, #0x01]
657 strb r1, [r0, #0x03]
658#endif
659 bx lr
660 LMEMCPY_4_PAD
661
662/*
663 * 1101: dst is 8-bit aligned, src is 8-bit aligned
664 */
665 ldrb r2, [r1]
666 ldrh r3, [r1, #0x01]
667 ldrb r1, [r1, #0x03]
668 strb r2, [r0]
669 strh r3, [r0, #0x01]
670 strb r1, [r0, #0x03]
671 bx lr
672 LMEMCPY_4_PAD
673
674/*
675 * 1110: dst is 8-bit aligned, src is 16-bit aligned
676 */
677#ifdef __ARMEB__
678 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
679 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
680 strb r3, [r0, #0x03]
681 mov r3, r3, lsr #8 /* r3 = ...2 */
682 orr r3, r3, r2, lsl #8 /* r3 = ..12 */
683 strh r3, [r0, #0x01]
684 mov r2, r2, lsr #8 /* r2 = ...0 */
685 strb r2, [r0]
686#else
687 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
688 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
689 strb r2, [r0]
690 mov r2, r2, lsr #8 /* r2 = ...1 */
691 orr r2, r2, r3, lsl #8 /* r2 = .321 */
692 strh r2, [r0, #0x01]
693 mov r3, r3, lsr #8 /* r3 = ...3 */
694 strb r3, [r0, #0x03]
695#endif
696 bx lr
697 LMEMCPY_4_PAD
698
699/*
700 * 1111: dst is 8-bit aligned, src is 8-bit aligned
701 */
702 ldrb r2, [r1]
703 ldrh r3, [r1, #0x01]
704 ldrb r1, [r1, #0x03]
705 strb r2, [r0]
706 strh r3, [r0, #0x01]
707 strb r1, [r0, #0x03]
708 bx lr
709 LMEMCPY_4_PAD
710
711
712/******************************************************************************
713 * Special case for 6 byte copies
714 */
715#define LMEMCPY_6_LOG2 6 /* 64 bytes */
716#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
717 LMEMCPY_6_PAD
718.Lmemcpy_6:
719 and r2, r1, #0x03
720 orr r2, r2, r0, lsl #2
721 ands r2, r2, #0x0f
722 sub r3, pc, #0x14
723 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
724
725/*
726 * 0000: dst is 32-bit aligned, src is 32-bit aligned
727 */
728 ldr r2, [r1]
729 ldrh r3, [r1, #0x04]
730 str r2, [r0]
731 strh r3, [r0, #0x04]
732 bx lr
733 LMEMCPY_6_PAD
734
735/*
736 * 0001: dst is 32-bit aligned, src is 8-bit aligned
737 */
738 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
739 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
740#ifdef __ARMEB__
741 mov r2, r2, lsl #8 /* r2 = 012. */
742 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
743#else
744 mov r2, r2, lsr #8 /* r2 = .210 */
745 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
746#endif
747 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
748 str r2, [r0]
749 strh r3, [r0, #0x04]
750 bx lr
751 LMEMCPY_6_PAD
752
753/*
754 * 0010: dst is 32-bit aligned, src is 16-bit aligned
755 */
756 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
757 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
758#ifdef __ARMEB__
759 mov r1, r3, lsr #16 /* r1 = ..23 */
760 orr r1, r1, r2, lsl #16 /* r1 = 0123 */
761 str r1, [r0]
762 strh r3, [r0, #0x04]
763#else
764 mov r1, r3, lsr #16 /* r1 = ..54 */
765 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
766 str r2, [r0]
767 strh r1, [r0, #0x04]
768#endif
769 bx lr
770 LMEMCPY_6_PAD
771
772/*
773 * 0011: dst is 32-bit aligned, src is 8-bit aligned
774 */
775 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
776 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
777 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
778#ifdef __ARMEB__
779 mov r2, r2, lsl #24 /* r2 = 0... */
780 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
781 mov r3, r3, lsl #8 /* r3 = 234. */
782 orr r1, r3, r1, lsr #24 /* r1 = 2345 */
783#else
784 mov r2, r2, lsr #24 /* r2 = ...0 */
785 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
786 mov r1, r1, lsl #8 /* r1 = xx5. */
787 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
788#endif
789 str r2, [r0]
790 strh r1, [r0, #0x04]
791 bx lr
792 LMEMCPY_6_PAD
793
794/*
795 * 0100: dst is 8-bit aligned, src is 32-bit aligned
796 */
797 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
798 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
799 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
800 strh r1, [r0, #0x01]
801#ifdef __ARMEB__
802 mov r1, r3, lsr #24 /* r1 = ...0 */
803 strb r1, [r0]
804 mov r3, r3, lsl #8 /* r3 = 123. */
805 orr r3, r3, r2, lsr #8 /* r3 = 1234 */
806#else
807 strb r3, [r0]
808 mov r3, r3, lsr #24 /* r3 = ...3 */
809 orr r3, r3, r2, lsl #8 /* r3 = .543 */
810 mov r2, r2, lsr #8 /* r2 = ...5 */
811#endif
812 strh r3, [r0, #0x03]
813 strb r2, [r0, #0x05]
814 bx lr
815 LMEMCPY_6_PAD
816
817/*
818 * 0101: dst is 8-bit aligned, src is 8-bit aligned
819 */
820 ldrb r2, [r1]
821 ldrh r3, [r1, #0x01]
822 ldrh ip, [r1, #0x03]
823 ldrb r1, [r1, #0x05]
824 strb r2, [r0]
825 strh r3, [r0, #0x01]
826 strh ip, [r0, #0x03]
827 strb r1, [r0, #0x05]
828 bx lr
829 LMEMCPY_6_PAD
830
831/*
832 * 0110: dst is 8-bit aligned, src is 16-bit aligned
833 */
834 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
835 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
836#ifdef __ARMEB__
837 mov r3, r2, lsr #8 /* r3 = ...0 */
838 strb r3, [r0]
839 strb r1, [r0, #0x05]
840 mov r3, r1, lsr #8 /* r3 = .234 */
841 strh r3, [r0, #0x03]
842 mov r3, r2, lsl #8 /* r3 = .01. */
843 orr r3, r3, r1, lsr #24 /* r3 = .012 */
844 strh r3, [r0, #0x01]
845#else
846 strb r2, [r0]
847 mov r3, r1, lsr #24
848 strb r3, [r0, #0x05]
849 mov r3, r1, lsr #8 /* r3 = .543 */
850 strh r3, [r0, #0x03]
851 mov r3, r2, lsr #8 /* r3 = ...1 */
852 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
853 strh r3, [r0, #0x01]
854#endif
855 bx lr
856 LMEMCPY_6_PAD
857
858/*
859 * 0111: dst is 8-bit aligned, src is 8-bit aligned
860 */
861 ldrb r2, [r1]
862 ldrh r3, [r1, #0x01]
863 ldrh ip, [r1, #0x03]
864 ldrb r1, [r1, #0x05]
865 strb r2, [r0]
866 strh r3, [r0, #0x01]
867 strh ip, [r0, #0x03]
868 strb r1, [r0, #0x05]
869 bx lr
870 LMEMCPY_6_PAD
871
872/*
873 * 1000: dst is 16-bit aligned, src is 32-bit aligned
874 */
875#ifdef __ARMEB__
876 ldr r2, [r1] /* r2 = 0123 */
877 ldrh r3, [r1, #0x04] /* r3 = ..45 */
878 mov r1, r2, lsr #16 /* r1 = ..01 */
879 orr r3, r3, r2, lsl#16 /* r3 = 2345 */
880 strh r1, [r0]
881 str r3, [r0, #0x02]
882#else
883 ldrh r2, [r1, #0x04] /* r2 = ..54 */
884 ldr r3, [r1] /* r3 = 3210 */
885 mov r2, r2, lsl #16 /* r2 = 54.. */
886 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
887 strh r3, [r0]
888 str r2, [r0, #0x02]
889#endif
890 bx lr
891 LMEMCPY_6_PAD
892
893/*
894 * 1001: dst is 16-bit aligned, src is 8-bit aligned
895 */
896 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
897 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
898 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
899#ifdef __ARMEB__
900 mov r2, r2, lsr #8 /* r2 = .345 */
901 orr r2, r2, r3, lsl #24 /* r2 = 2345 */
902#else
903 mov r2, r2, lsl #8 /* r2 = 543. */
904 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
905#endif
906 strh r1, [r0]
907 str r2, [r0, #0x02]
908 bx lr
909 LMEMCPY_6_PAD
910
911/*
912 * 1010: dst is 16-bit aligned, src is 16-bit aligned
913 */
914 ldrh r2, [r1]
915 ldr r3, [r1, #0x02]
916 strh r2, [r0]
917 str r3, [r0, #0x02]
918 bx lr
919 LMEMCPY_6_PAD
920
921/*
922 * 1011: dst is 16-bit aligned, src is 8-bit aligned
923 */
924 ldrb r3, [r1] /* r3 = ...0 */
925 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
926 ldrb r1, [r1, #0x05] /* r1 = ...5 */
927#ifdef __ARMEB__
928 mov r3, r3, lsl #8 /* r3 = ..0. */
929 orr r3, r3, r2, lsr #24 /* r3 = ..01 */
930 orr r1, r1, r2, lsl #8 /* r1 = 2345 */
931#else
932 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
933 mov r1, r1, lsl #24 /* r1 = 5... */
934 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
935#endif
936 strh r3, [r0]
937 str r1, [r0, #0x02]
938 bx lr
939 LMEMCPY_6_PAD
940
941/*
942 * 1100: dst is 8-bit aligned, src is 32-bit aligned
943 */
944 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
945 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
946#ifdef __ARMEB__
947 mov r3, r2, lsr #24 /* r3 = ...0 */
948 strb r3, [r0]
949 mov r2, r2, lsl #8 /* r2 = 123. */
950 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
951#else
952 strb r2, [r0]
953 mov r2, r2, lsr #8 /* r2 = .321 */
954 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
955 mov r1, r1, lsr #8 /* r1 = ...5 */
956#endif
957 str r2, [r0, #0x01]
958 strb r1, [r0, #0x05]
959 bx lr
960 LMEMCPY_6_PAD
961
962/*
963 * 1101: dst is 8-bit aligned, src is 8-bit aligned
964 */
965 ldrb r2, [r1]
966 ldrh r3, [r1, #0x01]
967 ldrh ip, [r1, #0x03]
968 ldrb r1, [r1, #0x05]
969 strb r2, [r0]
970 strh r3, [r0, #0x01]
971 strh ip, [r0, #0x03]
972 strb r1, [r0, #0x05]
973 bx lr
974 LMEMCPY_6_PAD
975
976/*
977 * 1110: dst is 8-bit aligned, src is 16-bit aligned
978 */
979 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
980 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
981#ifdef __ARMEB__
982 mov r3, r2, lsr #8 /* r3 = ...0 */
983 strb r3, [r0]
984 mov r2, r2, lsl #24 /* r2 = 1... */
985 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
986#else
987 strb r2, [r0]
988 mov r2, r2, lsr #8 /* r2 = ...1 */
989 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
990 mov r1, r1, lsr #24 /* r1 = ...5 */
991#endif
992 str r2, [r0, #0x01]
993 strb r1, [r0, #0x05]
994 bx lr
995 LMEMCPY_6_PAD
996
997/*
998 * 1111: dst is 8-bit aligned, src is 8-bit aligned
999 */
1000 ldrb r2, [r1]
1001 ldr r3, [r1, #0x01]
1002 ldrb r1, [r1, #0x05]
1003 strb r2, [r0]
1004 str r3, [r0, #0x01]
1005 strb r1, [r0, #0x05]
1006 bx lr
1007 LMEMCPY_6_PAD
1008
1009
1010/******************************************************************************
1011 * Special case for 8 byte copies
1012 */
1013#define LMEMCPY_8_LOG2 6 /* 64 bytes */
1014#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
1015 LMEMCPY_8_PAD
1016.Lmemcpy_8:
1017 and r2, r1, #0x03
1018 orr r2, r2, r0, lsl #2
1019 ands r2, r2, #0x0f
1020 sub r3, pc, #0x14
1021 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
1022
1023/*
1024 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1025 */
1026 ldr r2, [r1]
1027 ldr r3, [r1, #0x04]
1028 str r2, [r0]
1029 str r3, [r0, #0x04]
1030 bx lr
1031 LMEMCPY_8_PAD
1032
1033/*
1034 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1035 */
1036 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1037 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
1038 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1039#ifdef __ARMEB__
1040 mov r3, r3, lsl #8 /* r3 = 012. */
1041 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
1042 orr r2, r1, r2, lsl #8 /* r2 = 4567 */
1043#else
1044 mov r3, r3, lsr #8 /* r3 = .210 */
1045 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1046 mov r1, r1, lsl #24 /* r1 = 7... */
1047 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
1048#endif
1049 str r3, [r0]
1050 str r2, [r0, #0x04]
1051 bx lr
1052 LMEMCPY_8_PAD
1053
1054/*
1055 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1056 */
1057 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1058 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1059 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1060#ifdef __ARMEB__
1061 mov r2, r2, lsl #16 /* r2 = 01.. */
1062 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
1063 orr r3, r1, r3, lsl #16 /* r3 = 4567 */
1064#else
1065 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1066 mov r3, r3, lsr #16 /* r3 = ..54 */
1067 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
1068#endif
1069 str r2, [r0]
1070 str r3, [r0, #0x04]
1071 bx lr
1072 LMEMCPY_8_PAD
1073
1074/*
1075 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1076 */
1077 ldrb r3, [r1] /* r3 = ...0 */
1078 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1079 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
1080#ifdef __ARMEB__
1081 mov r3, r3, lsl #24 /* r3 = 0... */
1082 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
1083 mov r2, r2, lsl #24 /* r2 = 4... */
1084 orr r2, r2, r1, lsr #8 /* r2 = 4567 */
1085#else
1086 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1087 mov r2, r2, lsr #24 /* r2 = ...4 */
1088 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
1089#endif
1090 str r3, [r0]
1091 str r2, [r0, #0x04]
1092 bx lr
1093 LMEMCPY_8_PAD
1094
1095/*
1096 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1097 */
1098 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1099 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
1100#ifdef __ARMEB__
1101 mov r1, r3, lsr #24 /* r1 = ...0 */
1102 strb r1, [r0]
1103 mov r1, r3, lsr #8 /* r1 = .012 */
1104 strb r2, [r0, #0x07]
1105 mov r3, r3, lsl #24 /* r3 = 3... */
1106 orr r3, r3, r2, lsr #8 /* r3 = 3456 */
1107#else
1108 strb r3, [r0]
1109 mov r1, r2, lsr #24 /* r1 = ...7 */
1110 strb r1, [r0, #0x07]
1111 mov r1, r3, lsr #8 /* r1 = .321 */
1112 mov r3, r3, lsr #24 /* r3 = ...3 */
1113 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
1114#endif
1115 strh r1, [r0, #0x01]
1116 str r3, [r0, #0x03]
1117 bx lr
1118 LMEMCPY_8_PAD
1119
1120/*
1121 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1122 */
1123 ldrb r2, [r1]
1124 ldrh r3, [r1, #0x01]
1125 ldr ip, [r1, #0x03]
1126 ldrb r1, [r1, #0x07]
1127 strb r2, [r0]
1128 strh r3, [r0, #0x01]
1129 str ip, [r0, #0x03]
1130 strb r1, [r0, #0x07]
1131 bx lr
1132 LMEMCPY_8_PAD
1133
1134/*
1135 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1136 */
1137 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1138 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1139 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1140#ifdef __ARMEB__
1141 mov ip, r2, lsr #8 /* ip = ...0 */
1142 strb ip, [r0]
1143 mov ip, r2, lsl #8 /* ip = .01. */
1144 orr ip, ip, r3, lsr #24 /* ip = .012 */
1145 strb r1, [r0, #0x07]
1146 mov r3, r3, lsl #8 /* r3 = 345. */
1147 orr r3, r3, r1, lsr #8 /* r3 = 3456 */
1148#else
1149 strb r2, [r0] /* 0 */
1150 mov ip, r1, lsr #8 /* ip = ...7 */
1151 strb ip, [r0, #0x07] /* 7 */
1152 mov ip, r2, lsr #8 /* ip = ...1 */
1153 orr ip, ip, r3, lsl #8 /* ip = 4321 */
1154 mov r3, r3, lsr #8 /* r3 = .543 */
1155 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
1156#endif
1157 strh ip, [r0, #0x01]
1158 str r3, [r0, #0x03]
1159 bx lr
1160 LMEMCPY_8_PAD
1161
1162/*
1163 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1164 */
1165 ldrb r3, [r1] /* r3 = ...0 */
1166 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
1167 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
1168 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1169 strb r3, [r0]
1170 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
1171#ifdef __ARMEB__
1172 strh r3, [r0, #0x01]
1173 orr r2, r2, ip, lsl #16 /* r2 = 3456 */
1174#else
1175 strh ip, [r0, #0x01]
1176 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
1177#endif
1178 str r2, [r0, #0x03]
1179 strb r1, [r0, #0x07]
1180 bx lr
1181 LMEMCPY_8_PAD
1182
1183/*
1184 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1185 */
1186 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1187 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1188 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
1189#ifdef __ARMEB__
1190 strh r1, [r0]
1191 mov r1, r3, lsr #16 /* r1 = ..45 */
1192 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */
1193#else
1194 strh r2, [r0]
1195 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
1196 mov r3, r3, lsr #16 /* r3 = ..76 */
1197#endif
1198 str r2, [r0, #0x02]
1199 strh r3, [r0, #0x06]
1200 bx lr
1201 LMEMCPY_8_PAD
1202
1203/*
1204 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1205 */
1206 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1207 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1208 ldrb ip, [r1, #0x07] /* ip = ...7 */
1209 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1210 strh r1, [r0]
1211#ifdef __ARMEB__
1212 mov r1, r2, lsl #24 /* r1 = 2... */
1213 orr r1, r1, r3, lsr #8 /* r1 = 2345 */
1214 orr r3, ip, r3, lsl #8 /* r3 = 4567 */
1215#else
1216 mov r1, r2, lsr #24 /* r1 = ...2 */
1217 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
1218 mov r3, r3, lsr #24 /* r3 = ...6 */
1219 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
1220#endif
1221 str r1, [r0, #0x02]
1222 strh r3, [r0, #0x06]
1223 bx lr
1224 LMEMCPY_8_PAD
1225
1226/*
1227 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1228 */
1229 ldrh r2, [r1]
1230 ldr ip, [r1, #0x02]
1231 ldrh r3, [r1, #0x06]
1232 strh r2, [r0]
1233 str ip, [r0, #0x02]
1234 strh r3, [r0, #0x06]
1235 bx lr
1236 LMEMCPY_8_PAD
1237
1238/*
1239 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1240 */
1241 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
1242 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
1243 ldrb ip, [r1] /* ip = ...0 */
1244 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
1245 strh r1, [r0, #0x06]
1246#ifdef __ARMEB__
1247 mov r3, r3, lsr #24 /* r3 = ...5 */
1248 orr r3, r3, r2, lsl #8 /* r3 = 2345 */
1249 mov r2, r2, lsr #24 /* r2 = ...1 */
1250 orr r2, r2, ip, lsl #8 /* r2 = ..01 */
1251#else
1252 mov r3, r3, lsl #24 /* r3 = 5... */
1253 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
1254 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
1255#endif
1256 str r3, [r0, #0x02]
1257 strh r2, [r0]
1258 bx lr
1259 LMEMCPY_8_PAD
1260
1261/*
1262 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1263 */
1264 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1265 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1266 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
1267 strh r1, [r0, #0x05]
1268#ifdef __ARMEB__
1269 strb r3, [r0, #0x07]
1270 mov r1, r2, lsr #24 /* r1 = ...0 */
1271 strb r1, [r0]
1272 mov r2, r2, lsl #8 /* r2 = 123. */
1273 orr r2, r2, r3, lsr #24 /* r2 = 1234 */
1274 str r2, [r0, #0x01]
1275#else
1276 strb r2, [r0]
1277 mov r1, r3, lsr #24 /* r1 = ...7 */
1278 strb r1, [r0, #0x07]
1279 mov r2, r2, lsr #8 /* r2 = .321 */
1280 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
1281 str r2, [r0, #0x01]
1282#endif
1283 bx lr
1284 LMEMCPY_8_PAD
1285
1286/*
1287 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1288 */
1289 ldrb r3, [r1] /* r3 = ...0 */
1290 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
1291 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
1292 ldrb r1, [r1, #0x07] /* r1 = ...7 */
1293 strb r3, [r0]
1294 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
1295#ifdef __ARMEB__
1296 strh ip, [r0, #0x05]
1297 orr r2, r3, r2, lsl #16 /* r2 = 1234 */
1298#else
1299 strh r3, [r0, #0x05]
1300 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
1301#endif
1302 str r2, [r0, #0x01]
1303 strb r1, [r0, #0x07]
1304 bx lr
1305 LMEMCPY_8_PAD
1306
1307/*
1308 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1309 */
1310 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1311 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1312 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1313#ifdef __ARMEB__
1314 mov ip, r2, lsr #8 /* ip = ...0 */
1315 strb ip, [r0]
1316 mov ip, r2, lsl #24 /* ip = 1... */
1317 orr ip, ip, r3, lsr #8 /* ip = 1234 */
1318 strb r1, [r0, #0x07]
1319 mov r1, r1, lsr #8 /* r1 = ...6 */
1320 orr r1, r1, r3, lsl #8 /* r1 = 3456 */
1321#else
1322 strb r2, [r0]
1323 mov ip, r2, lsr #8 /* ip = ...1 */
1324 orr ip, ip, r3, lsl #8 /* ip = 4321 */
1325 mov r2, r1, lsr #8 /* r2 = ...7 */
1326 strb r2, [r0, #0x07]
1327 mov r1, r1, lsl #8 /* r1 = .76. */
1328 orr r1, r1, r3, lsr #24 /* r1 = .765 */
1329#endif
1330 str ip, [r0, #0x01]
1331 strh r1, [r0, #0x05]
1332 bx lr
1333 LMEMCPY_8_PAD
1334
1335/*
1336 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1337 */
1338 ldrb r2, [r1]
1339 ldr ip, [r1, #0x01]
1340 ldrh r3, [r1, #0x05]
1341 ldrb r1, [r1, #0x07]
1342 strb r2, [r0]
1343 str ip, [r0, #0x01]
1344 strh r3, [r0, #0x05]
1345 strb r1, [r0, #0x07]
1346 bx lr
1347 LMEMCPY_8_PAD
1348
1349/******************************************************************************
1350 * Special case for 12 byte copies
1351 */
1352#define LMEMCPY_C_LOG2 7 /* 128 bytes */
1353#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
1354 LMEMCPY_C_PAD
1355.Lmemcpy_c:
1356 and r2, r1, #0x03
1357 orr r2, r2, r0, lsl #2
1358 ands r2, r2, #0x0f
1359 sub r3, pc, #0x14
1360 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
1361
1362/*
1363 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1364 */
1365 ldr r2, [r1]
1366 ldr r3, [r1, #0x04]
1367 ldr r1, [r1, #0x08]
1368 str r2, [r0]
1369 str r3, [r0, #0x04]
1370 str r1, [r0, #0x08]
1371 bx lr
1372 LMEMCPY_C_PAD
1373
1374/*
1375 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1376 */
1377 ldrb r2, [r1, #0xb] /* r2 = ...B */
1378 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
1379 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1380 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
1381#ifdef __ARMEB__
1382 orr r2, r2, ip, lsl #8 /* r2 = 89AB */
1383 str r2, [r0, #0x08]
1384 mov r2, ip, lsr #24 /* r2 = ...7 */
1385 orr r2, r2, r3, lsl #8 /* r2 = 4567 */
1386 mov r1, r1, lsl #8 /* r1 = 012. */
1387 orr r1, r1, r3, lsr #24 /* r1 = 0123 */
1388#else
1389 mov r2, r2, lsl #24 /* r2 = B... */
1390 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
1391 str r2, [r0, #0x08]
1392 mov r2, ip, lsl #24 /* r2 = 7... */
1393 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
1394 mov r1, r1, lsr #8 /* r1 = .210 */
1395 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
1396#endif
1397 str r2, [r0, #0x04]
1398 str r1, [r0]
1399 bx lr
1400 LMEMCPY_C_PAD
1401
1402/*
1403 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1404 */
1405 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1406 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1407 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
1408 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
1409#ifdef __ARMEB__
1410 mov r2, r2, lsl #16 /* r2 = 01.. */
1411 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
1412 str r2, [r0]
1413 mov r3, r3, lsl #16 /* r3 = 45.. */
1414 orr r3, r3, ip, lsr #16 /* r3 = 4567 */
1415 orr r1, r1, ip, lsl #16 /* r1 = 89AB */
1416#else
1417 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1418 str r2, [r0]
1419 mov r3, r3, lsr #16 /* r3 = ..54 */
1420 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
1421 mov r1, r1, lsl #16 /* r1 = BA.. */
1422 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
1423#endif
1424 str r3, [r0, #0x04]
1425 str r1, [r0, #0x08]
1426 bx lr
1427 LMEMCPY_C_PAD
1428
1429/*
1430 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1431 */
1432 ldrb r2, [r1] /* r2 = ...0 */
1433 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
1434 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
1435 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
1436#ifdef __ARMEB__
1437 mov r2, r2, lsl #24 /* r2 = 0... */
1438 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
1439 str r2, [r0]
1440 mov r3, r3, lsl #24 /* r3 = 4... */
1441 orr r3, r3, ip, lsr #8 /* r3 = 4567 */
1442 mov r1, r1, lsr #8 /* r1 = .9AB */
1443 orr r1, r1, ip, lsl #24 /* r1 = 89AB */
1444#else
1445 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1446 str r2, [r0]
1447 mov r3, r3, lsr #24 /* r3 = ...4 */
1448 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
1449 mov r1, r1, lsl #8 /* r1 = BA9. */
1450 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
1451#endif
1452 str r3, [r0, #0x04]
1453 str r1, [r0, #0x08]
1454 bx lr
1455 LMEMCPY_C_PAD
1456
1457/*
1458 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
1459 */
1460 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1461 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1462 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
1463 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1464 strh r1, [r0, #0x01]
1465#ifdef __ARMEB__
1466 mov r1, r2, lsr #24 /* r1 = ...0 */
1467 strb r1, [r0]
1468 mov r1, r2, lsl #24 /* r1 = 3... */
1469 orr r2, r1, r3, lsr #8 /* r1 = 3456 */
1470 mov r1, r3, lsl #24 /* r1 = 7... */
1471 orr r1, r1, ip, lsr #8 /* r1 = 789A */
1472#else
1473 strb r2, [r0]
1474 mov r1, r2, lsr #24 /* r1 = ...3 */
1475 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
1476 mov r1, r3, lsr #24 /* r1 = ...7 */
1477 orr r1, r1, ip, lsl #8 /* r1 = A987 */
1478 mov ip, ip, lsr #24 /* ip = ...B */
1479#endif
1480 str r2, [r0, #0x03]
1481 str r1, [r0, #0x07]
1482 strb ip, [r0, #0x0b]
1483 bx lr
1484 LMEMCPY_C_PAD
1485
1486/*
1487 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
1488 */
1489 ldrb r2, [r1]
1490 ldrh r3, [r1, #0x01]
1491 ldr ip, [r1, #0x03]
1492 strb r2, [r0]
1493 ldr r2, [r1, #0x07]
1494 ldrb r1, [r1, #0x0b]
1495 strh r3, [r0, #0x01]
1496 str ip, [r0, #0x03]
1497 str r2, [r0, #0x07]
1498 strb r1, [r0, #0x0b]
1499 bx lr
1500 LMEMCPY_C_PAD
1501
1502/*
1503 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
1504 */
1505 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1506 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1507 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
1508 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
1509#ifdef __ARMEB__
1510 mov r2, r2, ror #8 /* r2 = 1..0 */
1511 strb r2, [r0]
1512 mov r2, r2, lsr #16 /* r2 = ..1. */
1513 orr r2, r2, r3, lsr #24 /* r2 = ..12 */
1514 strh r2, [r0, #0x01]
1515 mov r2, r3, lsl #8 /* r2 = 345. */
1516 orr r3, r2, ip, lsr #24 /* r3 = 3456 */
1517 mov r2, ip, lsl #8 /* r2 = 789. */
1518 orr r2, r2, r1, lsr #8 /* r2 = 789A */
1519#else
1520 strb r2, [r0]
1521 mov r2, r2, lsr #8 /* r2 = ...1 */
1522 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
1523 strh r2, [r0, #0x01]
1524 mov r2, r3, lsr #8 /* r2 = .543 */
1525 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
1526 mov r2, ip, lsr #8 /* r2 = .987 */
1527 orr r2, r2, r1, lsl #24 /* r2 = A987 */
1528 mov r1, r1, lsr #8 /* r1 = ...B */
1529#endif
1530 str r3, [r0, #0x03]
1531 str r2, [r0, #0x07]
1532 strb r1, [r0, #0x0b]
1533 bx lr
1534 LMEMCPY_C_PAD
1535
1536/*
1537 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
1538 */
1539 ldrb r2, [r1]
1540 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
1541 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
1542 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
1543 strb r2, [r0]
1544#ifdef __ARMEB__
1545 mov r2, r3, lsr #16 /* r2 = ..12 */
1546 strh r2, [r0, #0x01]
1547 mov r3, r3, lsl #16 /* r3 = 34.. */
1548 orr r3, r3, ip, lsr #16 /* r3 = 3456 */
1549 mov ip, ip, lsl #16 /* ip = 78.. */
1550 orr ip, ip, r1, lsr #16 /* ip = 789A */
1551 mov r1, r1, lsr #8 /* r1 = .9AB */
1552#else
1553 strh r3, [r0, #0x01]
1554 mov r3, r3, lsr #16 /* r3 = ..43 */
1555 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
1556 mov ip, ip, lsr #16 /* ip = ..87 */
1557 orr ip, ip, r1, lsl #16 /* ip = A987 */
1558 mov r1, r1, lsr #16 /* r1 = ..xB */
1559#endif
1560 str r3, [r0, #0x03]
1561 str ip, [r0, #0x07]
1562 strb r1, [r0, #0x0b]
1563 bx lr
1564 LMEMCPY_C_PAD
1565
1566/*
1567 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1568 */
1569 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
1570 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1571 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
1572 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
1573#ifdef __ARMEB__
1574 strh r1, [r0]
1575 mov r1, ip, lsl #16 /* r1 = 23.. */
1576 orr r1, r1, r3, lsr #16 /* r1 = 2345 */
1577 mov r3, r3, lsl #16 /* r3 = 67.. */
1578 orr r3, r3, r2, lsr #16 /* r3 = 6789 */
1579#else
1580 strh ip, [r0]
1581 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
1582 mov r3, r3, lsr #16 /* r3 = ..76 */
1583 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
1584 mov r2, r2, lsr #16 /* r2 = ..BA */
1585#endif
1586 str r1, [r0, #0x02]
1587 str r3, [r0, #0x06]
1588 strh r2, [r0, #0x0a]
1589 bx lr
1590 LMEMCPY_C_PAD
1591
1592/*
1593 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
1594 */
1595 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1596 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1597 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
1598 strh ip, [r0]
1599 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
1600 ldrb r1, [r1, #0x0b] /* r1 = ...B */
1601#ifdef __ARMEB__
1602 mov r2, r2, lsl #24 /* r2 = 2... */
1603 orr r2, r2, r3, lsr #8 /* r2 = 2345 */
1604 mov r3, r3, lsl #24 /* r3 = 6... */
1605 orr r3, r3, ip, lsr #8 /* r3 = 6789 */
1606 orr r1, r1, ip, lsl #8 /* r1 = 89AB */
1607#else
1608 mov r2, r2, lsr #24 /* r2 = ...2 */
1609 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
1610 mov r3, r3, lsr #24 /* r3 = ...6 */
1611 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
1612 mov r1, r1, lsl #8 /* r1 = ..B. */
1613 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
1614#endif
1615 str r2, [r0, #0x02]
1616 str r3, [r0, #0x06]
1617 strh r1, [r0, #0x0a]
1618 bx lr
1619 LMEMCPY_C_PAD
1620
1621/*
1622 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1623 */
1624 ldrh r2, [r1]
1625 ldr r3, [r1, #0x02]
1626 ldr ip, [r1, #0x06]
1627 ldrh r1, [r1, #0x0a]
1628 strh r2, [r0]
1629 str r3, [r0, #0x02]
1630 str ip, [r0, #0x06]
1631 strh r1, [r0, #0x0a]
1632 bx lr
1633 LMEMCPY_C_PAD
1634
1635/*
1636 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
1637 */
1638 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
1639 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
1640 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
1641 strh ip, [r0, #0x0a]
1642 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
1643 ldrb r1, [r1] /* r1 = ...0 */
1644#ifdef __ARMEB__
1645 mov r2, r2, lsr #24 /* r2 = ...9 */
1646 orr r2, r2, r3, lsl #8 /* r2 = 6789 */
1647 mov r3, r3, lsr #24 /* r3 = ...5 */
1648 orr r3, r3, ip, lsl #8 /* r3 = 2345 */
1649 mov r1, r1, lsl #8 /* r1 = ..0. */
1650 orr r1, r1, ip, lsr #24 /* r1 = ..01 */
1651#else
1652 mov r2, r2, lsl #24 /* r2 = 9... */
1653 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
1654 mov r3, r3, lsl #24 /* r3 = 5... */
1655 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
1656 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
1657#endif
1658 str r2, [r0, #0x06]
1659 str r3, [r0, #0x02]
1660 strh r1, [r0]
1661 bx lr
1662 LMEMCPY_C_PAD
1663
1664/*
1665 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
1666 */
1667 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1668 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
1669 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
1670#ifdef __ARMEB__
1671 mov r3, r2, lsr #24 /* r3 = ...0 */
1672 strb r3, [r0]
1673 mov r2, r2, lsl #8 /* r2 = 123. */
1674 orr r2, r2, ip, lsr #24 /* r2 = 1234 */
1675 str r2, [r0, #0x01]
1676 mov r2, ip, lsl #8 /* r2 = 567. */
1677 orr r2, r2, r1, lsr #24 /* r2 = 5678 */
1678 str r2, [r0, #0x05]
1679 mov r2, r1, lsr #8 /* r2 = ..9A */
1680 strh r2, [r0, #0x09]
1681 strb r1, [r0, #0x0b]
1682#else
1683 strb r2, [r0]
1684 mov r3, r2, lsr #8 /* r3 = .321 */
1685 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
1686 str r3, [r0, #0x01]
1687 mov r3, ip, lsr #8 /* r3 = .765 */
1688 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
1689 str r3, [r0, #0x05]
1690 mov r1, r1, lsr #8 /* r1 = .BA9 */
1691 strh r1, [r0, #0x09]
1692 mov r1, r1, lsr #16 /* r1 = ...B */
1693 strb r1, [r0, #0x0b]
1694#endif
1695 bx lr
1696 LMEMCPY_C_PAD
1697
1698/*
1699 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
1700 */
1701 ldrb r2, [r1, #0x0b] /* r2 = ...B */
1702 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
1703 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
1704 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
1705 strb r2, [r0, #0x0b]
1706#ifdef __ARMEB__
1707 strh r3, [r0, #0x09]
1708 mov r3, r3, lsr #16 /* r3 = ..78 */
1709 orr r3, r3, ip, lsl #16 /* r3 = 5678 */
1710 mov ip, ip, lsr #16 /* ip = ..34 */
1711 orr ip, ip, r1, lsl #16 /* ip = 1234 */
1712 mov r1, r1, lsr #16 /* r1 = ..x0 */
1713#else
1714 mov r2, r3, lsr #16 /* r2 = ..A9 */
1715 strh r2, [r0, #0x09]
1716 mov r3, r3, lsl #16 /* r3 = 87.. */
1717 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
1718 mov ip, ip, lsl #16 /* ip = 43.. */
1719 orr ip, ip, r1, lsr #16 /* ip = 4321 */
1720 mov r1, r1, lsr #8 /* r1 = .210 */
1721#endif
1722 str r3, [r0, #0x05]
1723 str ip, [r0, #0x01]
1724 strb r1, [r0]
1725 bx lr
1726 LMEMCPY_C_PAD
1727
1728/*
1729 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
1730 */
1731#ifdef __ARMEB__
1732 ldrh r2, [r1, #0x0a] /* r2 = ..AB */
1733 ldr ip, [r1, #0x06] /* ip = 6789 */
1734 ldr r3, [r1, #0x02] /* r3 = 2345 */
1735 ldrh r1, [r1] /* r1 = ..01 */
1736 strb r2, [r0, #0x0b]
1737 mov r2, r2, lsr #8 /* r2 = ...A */
1738 orr r2, r2, ip, lsl #8 /* r2 = 789A */
1739 mov ip, ip, lsr #8 /* ip = .678 */
1740 orr ip, ip, r3, lsl #24 /* ip = 5678 */
1741 mov r3, r3, lsr #8 /* r3 = .234 */
1742 orr r3, r3, r1, lsl #24 /* r3 = 1234 */
1743 mov r1, r1, lsr #8 /* r1 = ...0 */
1744 strb r1, [r0]
1745 str r3, [r0, #0x01]
1746 str ip, [r0, #0x05]
1747 strh r2, [r0, #0x09]
1748#else
1749 ldrh r2, [r1] /* r2 = ..10 */
1750 ldr r3, [r1, #0x02] /* r3 = 5432 */
1751 ldr ip, [r1, #0x06] /* ip = 9876 */
1752 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
1753 strb r2, [r0]
1754 mov r2, r2, lsr #8 /* r2 = ...1 */
1755 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
1756 mov r3, r3, lsr #24 /* r3 = ...5 */
1757 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
1758 mov ip, ip, lsr #24 /* ip = ...9 */
1759 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
1760 mov r1, r1, lsr #8 /* r1 = ...B */
1761 str r2, [r0, #0x01]
1762 str r3, [r0, #0x05]
1763 strh ip, [r0, #0x09]
1764 strb r1, [r0, #0x0b]
1765#endif
1766 bx lr
1767 LMEMCPY_C_PAD
1768
1769/*
1770 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
1771 */
1772 ldrb r2, [r1]
1773 ldr r3, [r1, #0x01]
1774 ldr ip, [r1, #0x05]
1775 strb r2, [r0]
1776 ldrh r2, [r1, #0x09]
1777 ldrb r1, [r1, #0x0b]
1778 str r3, [r0, #0x01]
1779 str ip, [r0, #0x05]
1780 strh r2, [r0, #0x09]
1781 strb r1, [r0, #0x0b]
1782 bx lr
1783#endif /* !_STANDALONE */
1784END(memcpy)