memcpy_xscale.S revision 270882
1/* $NetBSD: memcpy_xscale.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */ 2 3/* 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39__FBSDID("$FreeBSD: head/lib/libc/arm/string/memcpy_xscale.S 270882 2014-08-31 17:21:51Z ian $"); 40 41/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 42ENTRY(memcpy) 43 pld [r1] 44 cmp r2, #0x0c 45 ble .Lmemcpy_short /* <= 12 bytes */ 46 mov r3, r0 /* We must not clobber r0 */ 47 48 /* Word-align the destination buffer */ 49 ands ip, r3, #0x03 /* Already word aligned? */ 50 beq .Lmemcpy_wordaligned /* Yup */ 51 cmp ip, #0x02 52 ldrb ip, [r1], #0x01 53 sub r2, r2, #0x01 54 strb ip, [r3], #0x01 55 ldrleb ip, [r1], #0x01 56 suble r2, r2, #0x01 57 strleb ip, [r3], #0x01 58 ldrltb ip, [r1], #0x01 59 sublt r2, r2, #0x01 60 strltb ip, [r3], #0x01 61 62 /* Destination buffer is now word aligned */ 63.Lmemcpy_wordaligned: 64 ands ip, r1, #0x03 /* Is src also word-aligned? */ 65 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 66 67 /* Quad-align the destination buffer */ 68 tst r3, #0x07 /* Already quad aligned? */ 69 ldrne ip, [r1], #0x04 70 stmfd sp!, {r4-r9} /* Free up some registers */ 71 subne r2, r2, #0x04 72 strne ip, [r3], #0x04 73 74 /* Destination buffer quad aligned, source is at least word aligned */ 75 subs r2, r2, #0x80 76 blt .Lmemcpy_w_lessthan128 77 78 /* Copy 128 bytes at a time */ 79.Lmemcpy_w_loop128: 80 ldr r4, [r1], #0x04 /* LD:00-03 */ 81 ldr r5, [r1], #0x04 /* LD:04-07 */ 82 pld [r1, #0x18] /* Prefetch 0x20 */ 83 ldr r6, [r1], #0x04 /* LD:08-0b */ 84 ldr r7, [r1], #0x04 /* LD:0c-0f */ 85 ldr r8, [r1], #0x04 /* LD:10-13 */ 86 ldr r9, [r1], #0x04 /* LD:14-17 */ 87 strd r4, [r3], #0x08 /* ST:00-07 */ 88 ldr r4, [r1], #0x04 /* LD:18-1b */ 89 ldr r5, [r1], #0x04 /* LD:1c-1f */ 90 strd r6, [r3], #0x08 /* ST:08-0f */ 91 ldr r6, [r1], #0x04 /* LD:20-23 */ 92 ldr r7, [r1], #0x04 /* LD:24-27 */ 93 pld [r1, #0x18] /* Prefetch 0x40 */ 94 strd r8, [r3], #0x08 /* ST:10-17 */ 95 ldr r8, [r1], #0x04 /* LD:28-2b */ 96 ldr r9, [r1], #0x04 /* LD:2c-2f */ 97 strd r4, [r3], #0x08 /* ST:18-1f */ 98 ldr r4, [r1], #0x04 /* LD:30-33 */ 99 ldr r5, [r1], #0x04 /* LD:34-37 */ 100 strd r6, [r3], #0x08 /* ST:20-27 */ 101 ldr r6, [r1], #0x04 /* LD:38-3b */ 102 ldr r7, [r1], #0x04 /* LD:3c-3f */ 103 strd r8, [r3], #0x08 /* ST:28-2f */ 104 ldr r8, [r1], #0x04 /* LD:40-43 */ 105 ldr r9, [r1], #0x04 /* LD:44-47 */ 106 pld [r1, #0x18] /* Prefetch 0x60 */ 107 strd r4, [r3], #0x08 /* ST:30-37 */ 108 ldr r4, [r1], #0x04 /* LD:48-4b */ 109 ldr r5, [r1], #0x04 /* LD:4c-4f */ 110 strd r6, [r3], #0x08 /* ST:38-3f */ 111 ldr r6, [r1], #0x04 /* LD:50-53 */ 112 ldr r7, [r1], #0x04 /* LD:54-57 */ 113 strd r8, [r3], #0x08 /* ST:40-47 */ 114 ldr r8, [r1], #0x04 /* LD:58-5b */ 115 ldr r9, [r1], #0x04 /* LD:5c-5f */ 116 strd r4, [r3], #0x08 /* ST:48-4f */ 117 ldr r4, [r1], #0x04 /* LD:60-63 */ 118 ldr r5, [r1], #0x04 /* LD:64-67 */ 119 pld [r1, #0x18] /* Prefetch 0x80 */ 120 strd r6, [r3], #0x08 /* ST:50-57 */ 121 ldr r6, [r1], #0x04 /* LD:68-6b */ 122 ldr r7, [r1], #0x04 /* LD:6c-6f */ 123 strd r8, [r3], #0x08 /* ST:58-5f */ 124 ldr r8, [r1], #0x04 /* LD:70-73 */ 125 ldr r9, [r1], #0x04 /* LD:74-77 */ 126 strd r4, [r3], #0x08 /* ST:60-67 */ 127 ldr r4, [r1], #0x04 /* LD:78-7b */ 128 ldr r5, [r1], #0x04 /* LD:7c-7f */ 129 strd r6, [r3], #0x08 /* ST:68-6f */ 130 strd r8, [r3], #0x08 /* ST:70-77 */ 131 subs r2, r2, #0x80 132 strd r4, [r3], #0x08 /* ST:78-7f */ 133 bge .Lmemcpy_w_loop128 134 135.Lmemcpy_w_lessthan128: 136 adds r2, r2, #0x80 /* Adjust for extra sub */ 137 ldmeqfd sp!, {r4-r9} 138 bxeq lr /* Return now if done */ 139 subs r2, r2, #0x20 140 blt .Lmemcpy_w_lessthan32 141 142 /* Copy 32 bytes at a time */ 143.Lmemcpy_w_loop32: 144 ldr r4, [r1], #0x04 145 ldr r5, [r1], #0x04 146 pld [r1, #0x18] 147 ldr r6, [r1], #0x04 148 ldr r7, [r1], #0x04 149 ldr r8, [r1], #0x04 150 ldr r9, [r1], #0x04 151 strd r4, [r3], #0x08 152 ldr r4, [r1], #0x04 153 ldr r5, [r1], #0x04 154 strd r6, [r3], #0x08 155 strd r8, [r3], #0x08 156 subs r2, r2, #0x20 157 strd r4, [r3], #0x08 158 bge .Lmemcpy_w_loop32 159 160.Lmemcpy_w_lessthan32: 161 adds r2, r2, #0x20 /* Adjust for extra sub */ 162 ldmeqfd sp!, {r4-r9} 163 bxeq lr /* Return now if done */ 164 165 and r4, r2, #0x18 166 rsbs r4, r4, #0x18 167 addne pc, pc, r4, lsl #1 168 nop 169 170 /* At least 24 bytes remaining */ 171 ldr r4, [r1], #0x04 172 ldr r5, [r1], #0x04 173 sub r2, r2, #0x08 174 strd r4, [r3], #0x08 175 176 /* At least 16 bytes remaining */ 177 ldr r4, [r1], #0x04 178 ldr r5, [r1], #0x04 179 sub r2, r2, #0x08 180 strd r4, [r3], #0x08 181 182 /* At least 8 bytes remaining */ 183 ldr r4, [r1], #0x04 184 ldr r5, [r1], #0x04 185 subs r2, r2, #0x08 186 strd r4, [r3], #0x08 187 188 /* Less than 8 bytes remaining */ 189 ldmfd sp!, {r4-r9} 190 bxeq lr /* Return now if done */ 191 subs r2, r2, #0x04 192 ldrge ip, [r1], #0x04 193 strge ip, [r3], #0x04 194 bxeq lr /* Return now if done */ 195 addlt r2, r2, #0x04 196 ldrb ip, [r1], #0x01 197 cmp r2, #0x02 198 ldrgeb r2, [r1], #0x01 199 strb ip, [r3], #0x01 200 ldrgtb ip, [r1] 201 strgeb r2, [r3], #0x01 202 strgtb ip, [r3] 203 bx lr 204 205 206/* 207 * At this point, it has not been possible to word align both buffers. 208 * The destination buffer is word aligned, but the source buffer is not. 209 */ 210.Lmemcpy_bad_align: 211 stmfd sp!, {r4-r7} 212 bic r1, r1, #0x03 213 cmp ip, #2 214 ldr ip, [r1], #0x04 215 bgt .Lmemcpy_bad3 216 beq .Lmemcpy_bad2 217 b .Lmemcpy_bad1 218 219.Lmemcpy_bad1_loop16: 220#ifdef __ARMEB__ 221 mov r4, ip, lsl #8 222#else 223 mov r4, ip, lsr #8 224#endif 225 ldr r5, [r1], #0x04 226 pld [r1, #0x018] 227 ldr r6, [r1], #0x04 228 ldr r7, [r1], #0x04 229 ldr ip, [r1], #0x04 230#ifdef __ARMEB__ 231 orr r4, r4, r5, lsr #24 232 mov r5, r5, lsl #8 233 orr r5, r5, r6, lsr #24 234 mov r6, r6, lsl #8 235 orr r6, r6, r7, lsr #24 236 mov r7, r7, lsl #8 237 orr r7, r7, ip, lsr #24 238#else 239 orr r4, r4, r5, lsl #24 240 mov r5, r5, lsr #8 241 orr r5, r5, r6, lsl #24 242 mov r6, r6, lsr #8 243 orr r6, r6, r7, lsl #24 244 mov r7, r7, lsr #8 245 orr r7, r7, ip, lsl #24 246#endif 247 str r4, [r3], #0x04 248 str r5, [r3], #0x04 249 str r6, [r3], #0x04 250 str r7, [r3], #0x04 251.Lmemcpy_bad1: 252 subs r2, r2, #0x10 253 bge .Lmemcpy_bad1_loop16 254 255 adds r2, r2, #0x10 256 ldmeqfd sp!, {r4-r7} 257 bxeq lr /* Return now if done */ 258 subs r2, r2, #0x04 259 sublt r1, r1, #0x03 260 blt .Lmemcpy_bad_done 261 262.Lmemcpy_bad1_loop4: 263#ifdef __ARMEB__ 264 mov r4, ip, lsl #8 265#else 266 mov r4, ip, lsr #8 267#endif 268 ldr ip, [r1], #0x04 269 subs r2, r2, #0x04 270#ifdef __ARMEB__ 271 orr r4, r4, ip, lsr #24 272#else 273 orr r4, r4, ip, lsl #24 274#endif 275 str r4, [r3], #0x04 276 bge .Lmemcpy_bad1_loop4 277 sub r1, r1, #0x03 278 b .Lmemcpy_bad_done 279 280.Lmemcpy_bad2_loop16: 281#ifdef __ARMEB__ 282 mov r4, ip, lsl #16 283#else 284 mov r4, ip, lsr #16 285#endif 286 ldr r5, [r1], #0x04 287 pld [r1, #0x018] 288 ldr r6, [r1], #0x04 289 ldr r7, [r1], #0x04 290 ldr ip, [r1], #0x04 291#ifdef __ARMEB__ 292 orr r4, r4, r5, lsr #16 293 mov r5, r5, lsl #16 294 orr r5, r5, r6, lsr #16 295 mov r6, r6, lsl #16 296 orr r6, r6, r7, lsr #16 297 mov r7, r7, lsl #16 298 orr r7, r7, ip, lsr #16 299#else 300 orr r4, r4, r5, lsl #16 301 mov r5, r5, lsr #16 302 orr r5, r5, r6, lsl #16 303 mov r6, r6, lsr #16 304 orr r6, r6, r7, lsl #16 305 mov r7, r7, lsr #16 306 orr r7, r7, ip, lsl #16 307#endif 308 str r4, [r3], #0x04 309 str r5, [r3], #0x04 310 str r6, [r3], #0x04 311 str r7, [r3], #0x04 312.Lmemcpy_bad2: 313 subs r2, r2, #0x10 314 bge .Lmemcpy_bad2_loop16 315 316 adds r2, r2, #0x10 317 ldmeqfd sp!, {r4-r7} 318 bxeq lr /* Return now if done */ 319 subs r2, r2, #0x04 320 sublt r1, r1, #0x02 321 blt .Lmemcpy_bad_done 322 323.Lmemcpy_bad2_loop4: 324#ifdef __ARMEB__ 325 mov r4, ip, lsl #16 326#else 327 mov r4, ip, lsr #16 328#endif 329 ldr ip, [r1], #0x04 330 subs r2, r2, #0x04 331#ifdef __ARMEB__ 332 orr r4, r4, ip, lsr #16 333#else 334 orr r4, r4, ip, lsl #16 335#endif 336 str r4, [r3], #0x04 337 bge .Lmemcpy_bad2_loop4 338 sub r1, r1, #0x02 339 b .Lmemcpy_bad_done 340 341.Lmemcpy_bad3_loop16: 342#ifdef __ARMEB__ 343 mov r4, ip, lsl #24 344#else 345 mov r4, ip, lsr #24 346#endif 347 ldr r5, [r1], #0x04 348 pld [r1, #0x018] 349 ldr r6, [r1], #0x04 350 ldr r7, [r1], #0x04 351 ldr ip, [r1], #0x04 352#ifdef __ARMEB__ 353 orr r4, r4, r5, lsr #8 354 mov r5, r5, lsl #24 355 orr r5, r5, r6, lsr #8 356 mov r6, r6, lsl #24 357 orr r6, r6, r7, lsr #8 358 mov r7, r7, lsl #24 359 orr r7, r7, ip, lsr #8 360#else 361 orr r4, r4, r5, lsl #8 362 mov r5, r5, lsr #24 363 orr r5, r5, r6, lsl #8 364 mov r6, r6, lsr #24 365 orr r6, r6, r7, lsl #8 366 mov r7, r7, lsr #24 367 orr r7, r7, ip, lsl #8 368#endif 369 str r4, [r3], #0x04 370 str r5, [r3], #0x04 371 str r6, [r3], #0x04 372 str r7, [r3], #0x04 373.Lmemcpy_bad3: 374 subs r2, r2, #0x10 375 bge .Lmemcpy_bad3_loop16 376 377 adds r2, r2, #0x10 378 ldmeqfd sp!, {r4-r7} 379 bxeq lr /* Return now if done */ 380 subs r2, r2, #0x04 381 sublt r1, r1, #0x01 382 blt .Lmemcpy_bad_done 383 384.Lmemcpy_bad3_loop4: 385#ifdef __ARMEB__ 386 mov r4, ip, lsl #24 387#else 388 mov r4, ip, lsr #24 389#endif 390 ldr ip, [r1], #0x04 391 subs r2, r2, #0x04 392#ifdef __ARMEB__ 393 orr r4, r4, ip, lsr #8 394#else 395 orr r4, r4, ip, lsl #8 396#endif 397 str r4, [r3], #0x04 398 bge .Lmemcpy_bad3_loop4 399 sub r1, r1, #0x01 400 401.Lmemcpy_bad_done: 402 ldmfd sp!, {r4-r7} 403 adds r2, r2, #0x04 404 bxeq lr 405 ldrb ip, [r1], #0x01 406 cmp r2, #0x02 407 ldrgeb r2, [r1], #0x01 408 strb ip, [r3], #0x01 409 ldrgtb ip, [r1] 410 strgeb r2, [r3], #0x01 411 strgtb ip, [r3] 412 bx lr 413 414 415/* 416 * Handle short copies (less than 16 bytes), possibly misaligned. 417 * Some of these are *very* common, thanks to the network stack, 418 * and so are handled specially. 419 */ 420.Lmemcpy_short: 421#ifndef _STANDALONE 422 add pc, pc, r2, lsl #2 423 nop 424 bx lr /* 0x00 */ 425 b .Lmemcpy_bytewise /* 0x01 */ 426 b .Lmemcpy_bytewise /* 0x02 */ 427 b .Lmemcpy_bytewise /* 0x03 */ 428 b .Lmemcpy_4 /* 0x04 */ 429 b .Lmemcpy_bytewise /* 0x05 */ 430 b .Lmemcpy_6 /* 0x06 */ 431 b .Lmemcpy_bytewise /* 0x07 */ 432 b .Lmemcpy_8 /* 0x08 */ 433 b .Lmemcpy_bytewise /* 0x09 */ 434 b .Lmemcpy_bytewise /* 0x0a */ 435 b .Lmemcpy_bytewise /* 0x0b */ 436 b .Lmemcpy_c /* 0x0c */ 437#endif 438.Lmemcpy_bytewise: 439 mov r3, r0 /* We must not clobber r0 */ 440 ldrb ip, [r1], #0x01 4411: subs r2, r2, #0x01 442 strb ip, [r3], #0x01 443 ldrneb ip, [r1], #0x01 444 bne 1b 445 bx lr 446 447#ifndef _STANDALONE 448/****************************************************************************** 449 * Special case for 4 byte copies 450 */ 451#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 452#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 453 LMEMCPY_4_PAD 454.Lmemcpy_4: 455 and r2, r1, #0x03 456 orr r2, r2, r0, lsl #2 457 ands r2, r2, #0x0f 458 sub r3, pc, #0x14 459 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 460 461/* 462 * 0000: dst is 32-bit aligned, src is 32-bit aligned 463 */ 464 ldr r2, [r1] 465 str r2, [r0] 466 bx lr 467 LMEMCPY_4_PAD 468 469/* 470 * 0001: dst is 32-bit aligned, src is 8-bit aligned 471 */ 472 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 473 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 474#ifdef __ARMEB__ 475 mov r3, r3, lsl #8 /* r3 = 012. */ 476 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 477#else 478 mov r3, r3, lsr #8 /* r3 = .210 */ 479 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 480#endif 481 str r3, [r0] 482 bx lr 483 LMEMCPY_4_PAD 484 485/* 486 * 0010: dst is 32-bit aligned, src is 16-bit aligned 487 */ 488#ifdef __ARMEB__ 489 ldrh r3, [r1] 490 ldrh r2, [r1, #0x02] 491#else 492 ldrh r3, [r1, #0x02] 493 ldrh r2, [r1] 494#endif 495 orr r3, r2, r3, lsl #16 496 str r3, [r0] 497 bx lr 498 LMEMCPY_4_PAD 499 500/* 501 * 0011: dst is 32-bit aligned, src is 8-bit aligned 502 */ 503 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 504 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 505#ifdef __ARMEB__ 506 mov r3, r3, lsl #24 /* r3 = 0... */ 507 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 508#else 509 mov r3, r3, lsr #24 /* r3 = ...0 */ 510 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 511#endif 512 str r3, [r0] 513 bx lr 514 LMEMCPY_4_PAD 515 516/* 517 * 0100: dst is 8-bit aligned, src is 32-bit aligned 518 */ 519 ldr r2, [r1] 520#ifdef __ARMEB__ 521 strb r2, [r0, #0x03] 522 mov r3, r2, lsr #8 523 mov r1, r2, lsr #24 524 strb r1, [r0] 525#else 526 strb r2, [r0] 527 mov r3, r2, lsr #8 528 mov r1, r2, lsr #24 529 strb r1, [r0, #0x03] 530#endif 531 strh r3, [r0, #0x01] 532 bx lr 533 LMEMCPY_4_PAD 534 535/* 536 * 0101: dst is 8-bit aligned, src is 8-bit aligned 537 */ 538 ldrb r2, [r1] 539 ldrh r3, [r1, #0x01] 540 ldrb r1, [r1, #0x03] 541 strb r2, [r0] 542 strh r3, [r0, #0x01] 543 strb r1, [r0, #0x03] 544 bx lr 545 LMEMCPY_4_PAD 546 547/* 548 * 0110: dst is 8-bit aligned, src is 16-bit aligned 549 */ 550 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 551 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 552#ifdef __ARMEB__ 553 mov r1, r2, lsr #8 /* r1 = ...0 */ 554 strb r1, [r0] 555 mov r2, r2, lsl #8 /* r2 = .01. */ 556 orr r2, r2, r3, lsr #8 /* r2 = .012 */ 557#else 558 strb r2, [r0] 559 mov r2, r2, lsr #8 /* r2 = ...1 */ 560 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 561 mov r3, r3, lsr #8 /* r3 = ...3 */ 562#endif 563 strh r2, [r0, #0x01] 564 strb r3, [r0, #0x03] 565 bx lr 566 LMEMCPY_4_PAD 567 568/* 569 * 0111: dst is 8-bit aligned, src is 8-bit aligned 570 */ 571 ldrb r2, [r1] 572 ldrh r3, [r1, #0x01] 573 ldrb r1, [r1, #0x03] 574 strb r2, [r0] 575 strh r3, [r0, #0x01] 576 strb r1, [r0, #0x03] 577 bx lr 578 LMEMCPY_4_PAD 579 580/* 581 * 1000: dst is 16-bit aligned, src is 32-bit aligned 582 */ 583 ldr r2, [r1] 584#ifdef __ARMEB__ 585 strh r2, [r0, #0x02] 586 mov r3, r2, lsr #16 587 strh r3, [r0] 588#else 589 strh r2, [r0] 590 mov r3, r2, lsr #16 591 strh r3, [r0, #0x02] 592#endif 593 bx lr 594 LMEMCPY_4_PAD 595 596/* 597 * 1001: dst is 16-bit aligned, src is 8-bit aligned 598 */ 599 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 600 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 601 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 602 strh r1, [r0] 603#ifdef __ARMEB__ 604 mov r2, r2, lsl #8 /* r2 = 012. */ 605 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 606#else 607 mov r2, r2, lsr #24 /* r2 = ...2 */ 608 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 609#endif 610 strh r2, [r0, #0x02] 611 bx lr 612 LMEMCPY_4_PAD 613 614/* 615 * 1010: dst is 16-bit aligned, src is 16-bit aligned 616 */ 617 ldrh r2, [r1] 618 ldrh r3, [r1, #0x02] 619 strh r2, [r0] 620 strh r3, [r0, #0x02] 621 bx lr 622 LMEMCPY_4_PAD 623 624/* 625 * 1011: dst is 16-bit aligned, src is 8-bit aligned 626 */ 627 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 628 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 629 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 630 strh r1, [r0, #0x02] 631#ifdef __ARMEB__ 632 mov r3, r3, lsr #24 /* r3 = ...1 */ 633 orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 634#else 635 mov r3, r3, lsl #8 /* r3 = 321. */ 636 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 637#endif 638 strh r3, [r0] 639 bx lr 640 LMEMCPY_4_PAD 641 642/* 643 * 1100: dst is 8-bit aligned, src is 32-bit aligned 644 */ 645 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 646#ifdef __ARMEB__ 647 strb r2, [r0, #0x03] 648 mov r3, r2, lsr #8 649 mov r1, r2, lsr #24 650 strh r3, [r0, #0x01] 651 strb r1, [r0] 652#else 653 strb r2, [r0] 654 mov r3, r2, lsr #8 655 mov r1, r2, lsr #24 656 strh r3, [r0, #0x01] 657 strb r1, [r0, #0x03] 658#endif 659 bx lr 660 LMEMCPY_4_PAD 661 662/* 663 * 1101: dst is 8-bit aligned, src is 8-bit aligned 664 */ 665 ldrb r2, [r1] 666 ldrh r3, [r1, #0x01] 667 ldrb r1, [r1, #0x03] 668 strb r2, [r0] 669 strh r3, [r0, #0x01] 670 strb r1, [r0, #0x03] 671 bx lr 672 LMEMCPY_4_PAD 673 674/* 675 * 1110: dst is 8-bit aligned, src is 16-bit aligned 676 */ 677#ifdef __ARMEB__ 678 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 679 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 680 strb r3, [r0, #0x03] 681 mov r3, r3, lsr #8 /* r3 = ...2 */ 682 orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 683 strh r3, [r0, #0x01] 684 mov r2, r2, lsr #8 /* r2 = ...0 */ 685 strb r2, [r0] 686#else 687 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 688 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 689 strb r2, [r0] 690 mov r2, r2, lsr #8 /* r2 = ...1 */ 691 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 692 strh r2, [r0, #0x01] 693 mov r3, r3, lsr #8 /* r3 = ...3 */ 694 strb r3, [r0, #0x03] 695#endif 696 bx lr 697 LMEMCPY_4_PAD 698 699/* 700 * 1111: dst is 8-bit aligned, src is 8-bit aligned 701 */ 702 ldrb r2, [r1] 703 ldrh r3, [r1, #0x01] 704 ldrb r1, [r1, #0x03] 705 strb r2, [r0] 706 strh r3, [r0, #0x01] 707 strb r1, [r0, #0x03] 708 bx lr 709 LMEMCPY_4_PAD 710 711 712/****************************************************************************** 713 * Special case for 6 byte copies 714 */ 715#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 716#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 717 LMEMCPY_6_PAD 718.Lmemcpy_6: 719 and r2, r1, #0x03 720 orr r2, r2, r0, lsl #2 721 ands r2, r2, #0x0f 722 sub r3, pc, #0x14 723 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 724 725/* 726 * 0000: dst is 32-bit aligned, src is 32-bit aligned 727 */ 728 ldr r2, [r1] 729 ldrh r3, [r1, #0x04] 730 str r2, [r0] 731 strh r3, [r0, #0x04] 732 bx lr 733 LMEMCPY_6_PAD 734 735/* 736 * 0001: dst is 32-bit aligned, src is 8-bit aligned 737 */ 738 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 739 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 740#ifdef __ARMEB__ 741 mov r2, r2, lsl #8 /* r2 = 012. */ 742 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 743#else 744 mov r2, r2, lsr #8 /* r2 = .210 */ 745 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 746#endif 747 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 748 str r2, [r0] 749 strh r3, [r0, #0x04] 750 bx lr 751 LMEMCPY_6_PAD 752 753/* 754 * 0010: dst is 32-bit aligned, src is 16-bit aligned 755 */ 756 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 757 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 758#ifdef __ARMEB__ 759 mov r1, r3, lsr #16 /* r1 = ..23 */ 760 orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 761 str r1, [r0] 762 strh r3, [r0, #0x04] 763#else 764 mov r1, r3, lsr #16 /* r1 = ..54 */ 765 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 766 str r2, [r0] 767 strh r1, [r0, #0x04] 768#endif 769 bx lr 770 LMEMCPY_6_PAD 771 772/* 773 * 0011: dst is 32-bit aligned, src is 8-bit aligned 774 */ 775 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 776 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 777 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 778#ifdef __ARMEB__ 779 mov r2, r2, lsl #24 /* r2 = 0... */ 780 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 781 mov r3, r3, lsl #8 /* r3 = 234. */ 782 orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 783#else 784 mov r2, r2, lsr #24 /* r2 = ...0 */ 785 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 786 mov r1, r1, lsl #8 /* r1 = xx5. */ 787 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 788#endif 789 str r2, [r0] 790 strh r1, [r0, #0x04] 791 bx lr 792 LMEMCPY_6_PAD 793 794/* 795 * 0100: dst is 8-bit aligned, src is 32-bit aligned 796 */ 797 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 798 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 799 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 800 strh r1, [r0, #0x01] 801#ifdef __ARMEB__ 802 mov r1, r3, lsr #24 /* r1 = ...0 */ 803 strb r1, [r0] 804 mov r3, r3, lsl #8 /* r3 = 123. */ 805 orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 806#else 807 strb r3, [r0] 808 mov r3, r3, lsr #24 /* r3 = ...3 */ 809 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 810 mov r2, r2, lsr #8 /* r2 = ...5 */ 811#endif 812 strh r3, [r0, #0x03] 813 strb r2, [r0, #0x05] 814 bx lr 815 LMEMCPY_6_PAD 816 817/* 818 * 0101: dst is 8-bit aligned, src is 8-bit aligned 819 */ 820 ldrb r2, [r1] 821 ldrh r3, [r1, #0x01] 822 ldrh ip, [r1, #0x03] 823 ldrb r1, [r1, #0x05] 824 strb r2, [r0] 825 strh r3, [r0, #0x01] 826 strh ip, [r0, #0x03] 827 strb r1, [r0, #0x05] 828 bx lr 829 LMEMCPY_6_PAD 830 831/* 832 * 0110: dst is 8-bit aligned, src is 16-bit aligned 833 */ 834 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 835 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 836#ifdef __ARMEB__ 837 mov r3, r2, lsr #8 /* r3 = ...0 */ 838 strb r3, [r0] 839 strb r1, [r0, #0x05] 840 mov r3, r1, lsr #8 /* r3 = .234 */ 841 strh r3, [r0, #0x03] 842 mov r3, r2, lsl #8 /* r3 = .01. */ 843 orr r3, r3, r1, lsr #24 /* r3 = .012 */ 844 strh r3, [r0, #0x01] 845#else 846 strb r2, [r0] 847 mov r3, r1, lsr #24 848 strb r3, [r0, #0x05] 849 mov r3, r1, lsr #8 /* r3 = .543 */ 850 strh r3, [r0, #0x03] 851 mov r3, r2, lsr #8 /* r3 = ...1 */ 852 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 853 strh r3, [r0, #0x01] 854#endif 855 bx lr 856 LMEMCPY_6_PAD 857 858/* 859 * 0111: dst is 8-bit aligned, src is 8-bit aligned 860 */ 861 ldrb r2, [r1] 862 ldrh r3, [r1, #0x01] 863 ldrh ip, [r1, #0x03] 864 ldrb r1, [r1, #0x05] 865 strb r2, [r0] 866 strh r3, [r0, #0x01] 867 strh ip, [r0, #0x03] 868 strb r1, [r0, #0x05] 869 bx lr 870 LMEMCPY_6_PAD 871 872/* 873 * 1000: dst is 16-bit aligned, src is 32-bit aligned 874 */ 875#ifdef __ARMEB__ 876 ldr r2, [r1] /* r2 = 0123 */ 877 ldrh r3, [r1, #0x04] /* r3 = ..45 */ 878 mov r1, r2, lsr #16 /* r1 = ..01 */ 879 orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 880 strh r1, [r0] 881 str r3, [r0, #0x02] 882#else 883 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 884 ldr r3, [r1] /* r3 = 3210 */ 885 mov r2, r2, lsl #16 /* r2 = 54.. */ 886 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 887 strh r3, [r0] 888 str r2, [r0, #0x02] 889#endif 890 bx lr 891 LMEMCPY_6_PAD 892 893/* 894 * 1001: dst is 16-bit aligned, src is 8-bit aligned 895 */ 896 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 897 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 898 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 899#ifdef __ARMEB__ 900 mov r2, r2, lsr #8 /* r2 = .345 */ 901 orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 902#else 903 mov r2, r2, lsl #8 /* r2 = 543. */ 904 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 905#endif 906 strh r1, [r0] 907 str r2, [r0, #0x02] 908 bx lr 909 LMEMCPY_6_PAD 910 911/* 912 * 1010: dst is 16-bit aligned, src is 16-bit aligned 913 */ 914 ldrh r2, [r1] 915 ldr r3, [r1, #0x02] 916 strh r2, [r0] 917 str r3, [r0, #0x02] 918 bx lr 919 LMEMCPY_6_PAD 920 921/* 922 * 1011: dst is 16-bit aligned, src is 8-bit aligned 923 */ 924 ldrb r3, [r1] /* r3 = ...0 */ 925 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 926 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 927#ifdef __ARMEB__ 928 mov r3, r3, lsl #8 /* r3 = ..0. */ 929 orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 930 orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 931#else 932 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 933 mov r1, r1, lsl #24 /* r1 = 5... */ 934 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 935#endif 936 strh r3, [r0] 937 str r1, [r0, #0x02] 938 bx lr 939 LMEMCPY_6_PAD 940 941/* 942 * 1100: dst is 8-bit aligned, src is 32-bit aligned 943 */ 944 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 945 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 946#ifdef __ARMEB__ 947 mov r3, r2, lsr #24 /* r3 = ...0 */ 948 strb r3, [r0] 949 mov r2, r2, lsl #8 /* r2 = 123. */ 950 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 951#else 952 strb r2, [r0] 953 mov r2, r2, lsr #8 /* r2 = .321 */ 954 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 955 mov r1, r1, lsr #8 /* r1 = ...5 */ 956#endif 957 str r2, [r0, #0x01] 958 strb r1, [r0, #0x05] 959 bx lr 960 LMEMCPY_6_PAD 961 962/* 963 * 1101: dst is 8-bit aligned, src is 8-bit aligned 964 */ 965 ldrb r2, [r1] 966 ldrh r3, [r1, #0x01] 967 ldrh ip, [r1, #0x03] 968 ldrb r1, [r1, #0x05] 969 strb r2, [r0] 970 strh r3, [r0, #0x01] 971 strh ip, [r0, #0x03] 972 strb r1, [r0, #0x05] 973 bx lr 974 LMEMCPY_6_PAD 975 976/* 977 * 1110: dst is 8-bit aligned, src is 16-bit aligned 978 */ 979 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 980 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 981#ifdef __ARMEB__ 982 mov r3, r2, lsr #8 /* r3 = ...0 */ 983 strb r3, [r0] 984 mov r2, r2, lsl #24 /* r2 = 1... */ 985 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 986#else 987 strb r2, [r0] 988 mov r2, r2, lsr #8 /* r2 = ...1 */ 989 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 990 mov r1, r1, lsr #24 /* r1 = ...5 */ 991#endif 992 str r2, [r0, #0x01] 993 strb r1, [r0, #0x05] 994 bx lr 995 LMEMCPY_6_PAD 996 997/* 998 * 1111: dst is 8-bit aligned, src is 8-bit aligned 999 */ 1000 ldrb r2, [r1] 1001 ldr r3, [r1, #0x01] 1002 ldrb r1, [r1, #0x05] 1003 strb r2, [r0] 1004 str r3, [r0, #0x01] 1005 strb r1, [r0, #0x05] 1006 bx lr 1007 LMEMCPY_6_PAD 1008 1009 1010/****************************************************************************** 1011 * Special case for 8 byte copies 1012 */ 1013#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 1014#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 1015 LMEMCPY_8_PAD 1016.Lmemcpy_8: 1017 and r2, r1, #0x03 1018 orr r2, r2, r0, lsl #2 1019 ands r2, r2, #0x0f 1020 sub r3, pc, #0x14 1021 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 1022 1023/* 1024 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1025 */ 1026 ldr r2, [r1] 1027 ldr r3, [r1, #0x04] 1028 str r2, [r0] 1029 str r3, [r0, #0x04] 1030 bx lr 1031 LMEMCPY_8_PAD 1032 1033/* 1034 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1035 */ 1036 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1037 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 1038 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1039#ifdef __ARMEB__ 1040 mov r3, r3, lsl #8 /* r3 = 012. */ 1041 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1042 orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 1043#else 1044 mov r3, r3, lsr #8 /* r3 = .210 */ 1045 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1046 mov r1, r1, lsl #24 /* r1 = 7... */ 1047 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 1048#endif 1049 str r3, [r0] 1050 str r2, [r0, #0x04] 1051 bx lr 1052 LMEMCPY_8_PAD 1053 1054/* 1055 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1056 */ 1057 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1058 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1059 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1060#ifdef __ARMEB__ 1061 mov r2, r2, lsl #16 /* r2 = 01.. */ 1062 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 1063 orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 1064#else 1065 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1066 mov r3, r3, lsr #16 /* r3 = ..54 */ 1067 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 1068#endif 1069 str r2, [r0] 1070 str r3, [r0, #0x04] 1071 bx lr 1072 LMEMCPY_8_PAD 1073 1074/* 1075 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1076 */ 1077 ldrb r3, [r1] /* r3 = ...0 */ 1078 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1079 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 1080#ifdef __ARMEB__ 1081 mov r3, r3, lsl #24 /* r3 = 0... */ 1082 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1083 mov r2, r2, lsl #24 /* r2 = 4... */ 1084 orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 1085#else 1086 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1087 mov r2, r2, lsr #24 /* r2 = ...4 */ 1088 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 1089#endif 1090 str r3, [r0] 1091 str r2, [r0, #0x04] 1092 bx lr 1093 LMEMCPY_8_PAD 1094 1095/* 1096 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1097 */ 1098 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1099 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 1100#ifdef __ARMEB__ 1101 mov r1, r3, lsr #24 /* r1 = ...0 */ 1102 strb r1, [r0] 1103 mov r1, r3, lsr #8 /* r1 = .012 */ 1104 strb r2, [r0, #0x07] 1105 mov r3, r3, lsl #24 /* r3 = 3... */ 1106 orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 1107#else 1108 strb r3, [r0] 1109 mov r1, r2, lsr #24 /* r1 = ...7 */ 1110 strb r1, [r0, #0x07] 1111 mov r1, r3, lsr #8 /* r1 = .321 */ 1112 mov r3, r3, lsr #24 /* r3 = ...3 */ 1113 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 1114#endif 1115 strh r1, [r0, #0x01] 1116 str r3, [r0, #0x03] 1117 bx lr 1118 LMEMCPY_8_PAD 1119 1120/* 1121 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1122 */ 1123 ldrb r2, [r1] 1124 ldrh r3, [r1, #0x01] 1125 ldr ip, [r1, #0x03] 1126 ldrb r1, [r1, #0x07] 1127 strb r2, [r0] 1128 strh r3, [r0, #0x01] 1129 str ip, [r0, #0x03] 1130 strb r1, [r0, #0x07] 1131 bx lr 1132 LMEMCPY_8_PAD 1133 1134/* 1135 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1136 */ 1137 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1138 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1139 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1140#ifdef __ARMEB__ 1141 mov ip, r2, lsr #8 /* ip = ...0 */ 1142 strb ip, [r0] 1143 mov ip, r2, lsl #8 /* ip = .01. */ 1144 orr ip, ip, r3, lsr #24 /* ip = .012 */ 1145 strb r1, [r0, #0x07] 1146 mov r3, r3, lsl #8 /* r3 = 345. */ 1147 orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 1148#else 1149 strb r2, [r0] /* 0 */ 1150 mov ip, r1, lsr #8 /* ip = ...7 */ 1151 strb ip, [r0, #0x07] /* 7 */ 1152 mov ip, r2, lsr #8 /* ip = ...1 */ 1153 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1154 mov r3, r3, lsr #8 /* r3 = .543 */ 1155 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 1156#endif 1157 strh ip, [r0, #0x01] 1158 str r3, [r0, #0x03] 1159 bx lr 1160 LMEMCPY_8_PAD 1161 1162/* 1163 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1164 */ 1165 ldrb r3, [r1] /* r3 = ...0 */ 1166 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1167 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 1168 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1169 strb r3, [r0] 1170 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 1171#ifdef __ARMEB__ 1172 strh r3, [r0, #0x01] 1173 orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 1174#else 1175 strh ip, [r0, #0x01] 1176 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 1177#endif 1178 str r2, [r0, #0x03] 1179 strb r1, [r0, #0x07] 1180 bx lr 1181 LMEMCPY_8_PAD 1182 1183/* 1184 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1185 */ 1186 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1187 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1188 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1189#ifdef __ARMEB__ 1190 strh r1, [r0] 1191 mov r1, r3, lsr #16 /* r1 = ..45 */ 1192 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 1193#else 1194 strh r2, [r0] 1195 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 1196 mov r3, r3, lsr #16 /* r3 = ..76 */ 1197#endif 1198 str r2, [r0, #0x02] 1199 strh r3, [r0, #0x06] 1200 bx lr 1201 LMEMCPY_8_PAD 1202 1203/* 1204 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1205 */ 1206 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1207 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1208 ldrb ip, [r1, #0x07] /* ip = ...7 */ 1209 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1210 strh r1, [r0] 1211#ifdef __ARMEB__ 1212 mov r1, r2, lsl #24 /* r1 = 2... */ 1213 orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 1214 orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 1215#else 1216 mov r1, r2, lsr #24 /* r1 = ...2 */ 1217 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 1218 mov r3, r3, lsr #24 /* r3 = ...6 */ 1219 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 1220#endif 1221 str r1, [r0, #0x02] 1222 strh r3, [r0, #0x06] 1223 bx lr 1224 LMEMCPY_8_PAD 1225 1226/* 1227 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1228 */ 1229 ldrh r2, [r1] 1230 ldr ip, [r1, #0x02] 1231 ldrh r3, [r1, #0x06] 1232 strh r2, [r0] 1233 str ip, [r0, #0x02] 1234 strh r3, [r0, #0x06] 1235 bx lr 1236 LMEMCPY_8_PAD 1237 1238/* 1239 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1240 */ 1241 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 1242 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1243 ldrb ip, [r1] /* ip = ...0 */ 1244 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 1245 strh r1, [r0, #0x06] 1246#ifdef __ARMEB__ 1247 mov r3, r3, lsr #24 /* r3 = ...5 */ 1248 orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 1249 mov r2, r2, lsr #24 /* r2 = ...1 */ 1250 orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 1251#else 1252 mov r3, r3, lsl #24 /* r3 = 5... */ 1253 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 1254 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 1255#endif 1256 str r3, [r0, #0x02] 1257 strh r2, [r0] 1258 bx lr 1259 LMEMCPY_8_PAD 1260 1261/* 1262 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1263 */ 1264 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1265 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1266 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 1267 strh r1, [r0, #0x05] 1268#ifdef __ARMEB__ 1269 strb r3, [r0, #0x07] 1270 mov r1, r2, lsr #24 /* r1 = ...0 */ 1271 strb r1, [r0] 1272 mov r2, r2, lsl #8 /* r2 = 123. */ 1273 orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 1274 str r2, [r0, #0x01] 1275#else 1276 strb r2, [r0] 1277 mov r1, r3, lsr #24 /* r1 = ...7 */ 1278 strb r1, [r0, #0x07] 1279 mov r2, r2, lsr #8 /* r2 = .321 */ 1280 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 1281 str r2, [r0, #0x01] 1282#endif 1283 bx lr 1284 LMEMCPY_8_PAD 1285 1286/* 1287 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1288 */ 1289 ldrb r3, [r1] /* r3 = ...0 */ 1290 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 1291 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1292 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1293 strb r3, [r0] 1294 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 1295#ifdef __ARMEB__ 1296 strh ip, [r0, #0x05] 1297 orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 1298#else 1299 strh r3, [r0, #0x05] 1300 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 1301#endif 1302 str r2, [r0, #0x01] 1303 strb r1, [r0, #0x07] 1304 bx lr 1305 LMEMCPY_8_PAD 1306 1307/* 1308 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1309 */ 1310 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1311 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1312 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1313#ifdef __ARMEB__ 1314 mov ip, r2, lsr #8 /* ip = ...0 */ 1315 strb ip, [r0] 1316 mov ip, r2, lsl #24 /* ip = 1... */ 1317 orr ip, ip, r3, lsr #8 /* ip = 1234 */ 1318 strb r1, [r0, #0x07] 1319 mov r1, r1, lsr #8 /* r1 = ...6 */ 1320 orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 1321#else 1322 strb r2, [r0] 1323 mov ip, r2, lsr #8 /* ip = ...1 */ 1324 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1325 mov r2, r1, lsr #8 /* r2 = ...7 */ 1326 strb r2, [r0, #0x07] 1327 mov r1, r1, lsl #8 /* r1 = .76. */ 1328 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 1329#endif 1330 str ip, [r0, #0x01] 1331 strh r1, [r0, #0x05] 1332 bx lr 1333 LMEMCPY_8_PAD 1334 1335/* 1336 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1337 */ 1338 ldrb r2, [r1] 1339 ldr ip, [r1, #0x01] 1340 ldrh r3, [r1, #0x05] 1341 ldrb r1, [r1, #0x07] 1342 strb r2, [r0] 1343 str ip, [r0, #0x01] 1344 strh r3, [r0, #0x05] 1345 strb r1, [r0, #0x07] 1346 bx lr 1347 LMEMCPY_8_PAD 1348 1349/****************************************************************************** 1350 * Special case for 12 byte copies 1351 */ 1352#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 1353#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 1354 LMEMCPY_C_PAD 1355.Lmemcpy_c: 1356 and r2, r1, #0x03 1357 orr r2, r2, r0, lsl #2 1358 ands r2, r2, #0x0f 1359 sub r3, pc, #0x14 1360 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 1361 1362/* 1363 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1364 */ 1365 ldr r2, [r1] 1366 ldr r3, [r1, #0x04] 1367 ldr r1, [r1, #0x08] 1368 str r2, [r0] 1369 str r3, [r0, #0x04] 1370 str r1, [r0, #0x08] 1371 bx lr 1372 LMEMCPY_C_PAD 1373 1374/* 1375 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1376 */ 1377 ldrb r2, [r1, #0xb] /* r2 = ...B */ 1378 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1379 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1380 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1381#ifdef __ARMEB__ 1382 orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 1383 str r2, [r0, #0x08] 1384 mov r2, ip, lsr #24 /* r2 = ...7 */ 1385 orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 1386 mov r1, r1, lsl #8 /* r1 = 012. */ 1387 orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 1388#else 1389 mov r2, r2, lsl #24 /* r2 = B... */ 1390 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 1391 str r2, [r0, #0x08] 1392 mov r2, ip, lsl #24 /* r2 = 7... */ 1393 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 1394 mov r1, r1, lsr #8 /* r1 = .210 */ 1395 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 1396#endif 1397 str r2, [r0, #0x04] 1398 str r1, [r0] 1399 bx lr 1400 LMEMCPY_C_PAD 1401 1402/* 1403 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1404 */ 1405 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1406 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1407 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1408 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1409#ifdef __ARMEB__ 1410 mov r2, r2, lsl #16 /* r2 = 01.. */ 1411 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 1412 str r2, [r0] 1413 mov r3, r3, lsl #16 /* r3 = 45.. */ 1414 orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 1415 orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 1416#else 1417 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1418 str r2, [r0] 1419 mov r3, r3, lsr #16 /* r3 = ..54 */ 1420 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 1421 mov r1, r1, lsl #16 /* r1 = BA.. */ 1422 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 1423#endif 1424 str r3, [r0, #0x04] 1425 str r1, [r0, #0x08] 1426 bx lr 1427 LMEMCPY_C_PAD 1428 1429/* 1430 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1431 */ 1432 ldrb r2, [r1] /* r2 = ...0 */ 1433 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1434 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1435 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1436#ifdef __ARMEB__ 1437 mov r2, r2, lsl #24 /* r2 = 0... */ 1438 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1439 str r2, [r0] 1440 mov r3, r3, lsl #24 /* r3 = 4... */ 1441 orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 1442 mov r1, r1, lsr #8 /* r1 = .9AB */ 1443 orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 1444#else 1445 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1446 str r2, [r0] 1447 mov r3, r3, lsr #24 /* r3 = ...4 */ 1448 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 1449 mov r1, r1, lsl #8 /* r1 = BA9. */ 1450 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 1451#endif 1452 str r3, [r0, #0x04] 1453 str r1, [r0, #0x08] 1454 bx lr 1455 LMEMCPY_C_PAD 1456 1457/* 1458 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 1459 */ 1460 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1461 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1462 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 1463 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1464 strh r1, [r0, #0x01] 1465#ifdef __ARMEB__ 1466 mov r1, r2, lsr #24 /* r1 = ...0 */ 1467 strb r1, [r0] 1468 mov r1, r2, lsl #24 /* r1 = 3... */ 1469 orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 1470 mov r1, r3, lsl #24 /* r1 = 7... */ 1471 orr r1, r1, ip, lsr #8 /* r1 = 789A */ 1472#else 1473 strb r2, [r0] 1474 mov r1, r2, lsr #24 /* r1 = ...3 */ 1475 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 1476 mov r1, r3, lsr #24 /* r1 = ...7 */ 1477 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 1478 mov ip, ip, lsr #24 /* ip = ...B */ 1479#endif 1480 str r2, [r0, #0x03] 1481 str r1, [r0, #0x07] 1482 strb ip, [r0, #0x0b] 1483 bx lr 1484 LMEMCPY_C_PAD 1485 1486/* 1487 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 1488 */ 1489 ldrb r2, [r1] 1490 ldrh r3, [r1, #0x01] 1491 ldr ip, [r1, #0x03] 1492 strb r2, [r0] 1493 ldr r2, [r1, #0x07] 1494 ldrb r1, [r1, #0x0b] 1495 strh r3, [r0, #0x01] 1496 str ip, [r0, #0x03] 1497 str r2, [r0, #0x07] 1498 strb r1, [r0, #0x0b] 1499 bx lr 1500 LMEMCPY_C_PAD 1501 1502/* 1503 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 1504 */ 1505 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1506 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1507 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1508 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1509#ifdef __ARMEB__ 1510 mov r2, r2, ror #8 /* r2 = 1..0 */ 1511 strb r2, [r0] 1512 mov r2, r2, lsr #16 /* r2 = ..1. */ 1513 orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 1514 strh r2, [r0, #0x01] 1515 mov r2, r3, lsl #8 /* r2 = 345. */ 1516 orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 1517 mov r2, ip, lsl #8 /* r2 = 789. */ 1518 orr r2, r2, r1, lsr #8 /* r2 = 789A */ 1519#else 1520 strb r2, [r0] 1521 mov r2, r2, lsr #8 /* r2 = ...1 */ 1522 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1523 strh r2, [r0, #0x01] 1524 mov r2, r3, lsr #8 /* r2 = .543 */ 1525 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 1526 mov r2, ip, lsr #8 /* r2 = .987 */ 1527 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 1528 mov r1, r1, lsr #8 /* r1 = ...B */ 1529#endif 1530 str r3, [r0, #0x03] 1531 str r2, [r0, #0x07] 1532 strb r1, [r0, #0x0b] 1533 bx lr 1534 LMEMCPY_C_PAD 1535 1536/* 1537 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 1538 */ 1539 ldrb r2, [r1] 1540 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1541 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1542 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1543 strb r2, [r0] 1544#ifdef __ARMEB__ 1545 mov r2, r3, lsr #16 /* r2 = ..12 */ 1546 strh r2, [r0, #0x01] 1547 mov r3, r3, lsl #16 /* r3 = 34.. */ 1548 orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 1549 mov ip, ip, lsl #16 /* ip = 78.. */ 1550 orr ip, ip, r1, lsr #16 /* ip = 789A */ 1551 mov r1, r1, lsr #8 /* r1 = .9AB */ 1552#else 1553 strh r3, [r0, #0x01] 1554 mov r3, r3, lsr #16 /* r3 = ..43 */ 1555 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 1556 mov ip, ip, lsr #16 /* ip = ..87 */ 1557 orr ip, ip, r1, lsl #16 /* ip = A987 */ 1558 mov r1, r1, lsr #16 /* r1 = ..xB */ 1559#endif 1560 str r3, [r0, #0x03] 1561 str ip, [r0, #0x07] 1562 strb r1, [r0, #0x0b] 1563 bx lr 1564 LMEMCPY_C_PAD 1565 1566/* 1567 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1568 */ 1569 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 1570 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1571 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 1572 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1573#ifdef __ARMEB__ 1574 strh r1, [r0] 1575 mov r1, ip, lsl #16 /* r1 = 23.. */ 1576 orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 1577 mov r3, r3, lsl #16 /* r3 = 67.. */ 1578 orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 1579#else 1580 strh ip, [r0] 1581 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 1582 mov r3, r3, lsr #16 /* r3 = ..76 */ 1583 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 1584 mov r2, r2, lsr #16 /* r2 = ..BA */ 1585#endif 1586 str r1, [r0, #0x02] 1587 str r3, [r0, #0x06] 1588 strh r2, [r0, #0x0a] 1589 bx lr 1590 LMEMCPY_C_PAD 1591 1592/* 1593 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 1594 */ 1595 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1596 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1597 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 1598 strh ip, [r0] 1599 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1600 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 1601#ifdef __ARMEB__ 1602 mov r2, r2, lsl #24 /* r2 = 2... */ 1603 orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 1604 mov r3, r3, lsl #24 /* r3 = 6... */ 1605 orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 1606 orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 1607#else 1608 mov r2, r2, lsr #24 /* r2 = ...2 */ 1609 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 1610 mov r3, r3, lsr #24 /* r3 = ...6 */ 1611 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 1612 mov r1, r1, lsl #8 /* r1 = ..B. */ 1613 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 1614#endif 1615 str r2, [r0, #0x02] 1616 str r3, [r0, #0x06] 1617 strh r1, [r0, #0x0a] 1618 bx lr 1619 LMEMCPY_C_PAD 1620 1621/* 1622 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1623 */ 1624 ldrh r2, [r1] 1625 ldr r3, [r1, #0x02] 1626 ldr ip, [r1, #0x06] 1627 ldrh r1, [r1, #0x0a] 1628 strh r2, [r0] 1629 str r3, [r0, #0x02] 1630 str ip, [r0, #0x06] 1631 strh r1, [r0, #0x0a] 1632 bx lr 1633 LMEMCPY_C_PAD 1634 1635/* 1636 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 1637 */ 1638 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 1639 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 1640 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 1641 strh ip, [r0, #0x0a] 1642 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1643 ldrb r1, [r1] /* r1 = ...0 */ 1644#ifdef __ARMEB__ 1645 mov r2, r2, lsr #24 /* r2 = ...9 */ 1646 orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 1647 mov r3, r3, lsr #24 /* r3 = ...5 */ 1648 orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 1649 mov r1, r1, lsl #8 /* r1 = ..0. */ 1650 orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 1651#else 1652 mov r2, r2, lsl #24 /* r2 = 9... */ 1653 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 1654 mov r3, r3, lsl #24 /* r3 = 5... */ 1655 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 1656 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 1657#endif 1658 str r2, [r0, #0x06] 1659 str r3, [r0, #0x02] 1660 strh r1, [r0] 1661 bx lr 1662 LMEMCPY_C_PAD 1663 1664/* 1665 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 1666 */ 1667 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1668 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 1669 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 1670#ifdef __ARMEB__ 1671 mov r3, r2, lsr #24 /* r3 = ...0 */ 1672 strb r3, [r0] 1673 mov r2, r2, lsl #8 /* r2 = 123. */ 1674 orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 1675 str r2, [r0, #0x01] 1676 mov r2, ip, lsl #8 /* r2 = 567. */ 1677 orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 1678 str r2, [r0, #0x05] 1679 mov r2, r1, lsr #8 /* r2 = ..9A */ 1680 strh r2, [r0, #0x09] 1681 strb r1, [r0, #0x0b] 1682#else 1683 strb r2, [r0] 1684 mov r3, r2, lsr #8 /* r3 = .321 */ 1685 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 1686 str r3, [r0, #0x01] 1687 mov r3, ip, lsr #8 /* r3 = .765 */ 1688 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 1689 str r3, [r0, #0x05] 1690 mov r1, r1, lsr #8 /* r1 = .BA9 */ 1691 strh r1, [r0, #0x09] 1692 mov r1, r1, lsr #16 /* r1 = ...B */ 1693 strb r1, [r0, #0x0b] 1694#endif 1695 bx lr 1696 LMEMCPY_C_PAD 1697 1698/* 1699 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 1700 */ 1701 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 1702 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 1703 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1704 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1705 strb r2, [r0, #0x0b] 1706#ifdef __ARMEB__ 1707 strh r3, [r0, #0x09] 1708 mov r3, r3, lsr #16 /* r3 = ..78 */ 1709 orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 1710 mov ip, ip, lsr #16 /* ip = ..34 */ 1711 orr ip, ip, r1, lsl #16 /* ip = 1234 */ 1712 mov r1, r1, lsr #16 /* r1 = ..x0 */ 1713#else 1714 mov r2, r3, lsr #16 /* r2 = ..A9 */ 1715 strh r2, [r0, #0x09] 1716 mov r3, r3, lsl #16 /* r3 = 87.. */ 1717 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 1718 mov ip, ip, lsl #16 /* ip = 43.. */ 1719 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 1720 mov r1, r1, lsr #8 /* r1 = .210 */ 1721#endif 1722 str r3, [r0, #0x05] 1723 str ip, [r0, #0x01] 1724 strb r1, [r0] 1725 bx lr 1726 LMEMCPY_C_PAD 1727 1728/* 1729 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 1730 */ 1731#ifdef __ARMEB__ 1732 ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 1733 ldr ip, [r1, #0x06] /* ip = 6789 */ 1734 ldr r3, [r1, #0x02] /* r3 = 2345 */ 1735 ldrh r1, [r1] /* r1 = ..01 */ 1736 strb r2, [r0, #0x0b] 1737 mov r2, r2, lsr #8 /* r2 = ...A */ 1738 orr r2, r2, ip, lsl #8 /* r2 = 789A */ 1739 mov ip, ip, lsr #8 /* ip = .678 */ 1740 orr ip, ip, r3, lsl #24 /* ip = 5678 */ 1741 mov r3, r3, lsr #8 /* r3 = .234 */ 1742 orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 1743 mov r1, r1, lsr #8 /* r1 = ...0 */ 1744 strb r1, [r0] 1745 str r3, [r0, #0x01] 1746 str ip, [r0, #0x05] 1747 strh r2, [r0, #0x09] 1748#else 1749 ldrh r2, [r1] /* r2 = ..10 */ 1750 ldr r3, [r1, #0x02] /* r3 = 5432 */ 1751 ldr ip, [r1, #0x06] /* ip = 9876 */ 1752 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 1753 strb r2, [r0] 1754 mov r2, r2, lsr #8 /* r2 = ...1 */ 1755 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1756 mov r3, r3, lsr #24 /* r3 = ...5 */ 1757 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 1758 mov ip, ip, lsr #24 /* ip = ...9 */ 1759 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 1760 mov r1, r1, lsr #8 /* r1 = ...B */ 1761 str r2, [r0, #0x01] 1762 str r3, [r0, #0x05] 1763 strh ip, [r0, #0x09] 1764 strb r1, [r0, #0x0b] 1765#endif 1766 bx lr 1767 LMEMCPY_C_PAD 1768 1769/* 1770 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 1771 */ 1772 ldrb r2, [r1] 1773 ldr r3, [r1, #0x01] 1774 ldr ip, [r1, #0x05] 1775 strb r2, [r0] 1776 ldrh r2, [r1, #0x09] 1777 ldrb r1, [r1, #0x0b] 1778 str r3, [r0, #0x01] 1779 str ip, [r0, #0x05] 1780 strh r2, [r0, #0x09] 1781 strb r1, [r0, #0x0b] 1782 bx lr 1783#endif /* !_STANDALONE */ 1784END(memcpy) 1785