1/* $NetBSD: memcpy_xscale.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */ 2 3/* 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39__FBSDID("$FreeBSD$"); 40 41.syntax unified 42 43/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 44ENTRY(memcpy) 45 pld [r1] 46 cmp r2, #0x0c 47 ble .Lmemcpy_short /* <= 12 bytes */ 48 mov r3, r0 /* We must not clobber r0 */ 49 50 /* Word-align the destination buffer */ 51 ands ip, r3, #0x03 /* Already word aligned? */ 52 beq .Lmemcpy_wordaligned /* Yup */ 53 cmp ip, #0x02 54 ldrb ip, [r1], #0x01 55 sub r2, r2, #0x01 56 strb ip, [r3], #0x01 57 ldrble ip, [r1], #0x01 58 suble r2, r2, #0x01 59 strble ip, [r3], #0x01 60 ldrblt ip, [r1], #0x01 61 sublt r2, r2, #0x01 62 strblt ip, [r3], #0x01 63 64 /* Destination buffer is now word aligned */ 65.Lmemcpy_wordaligned: 66 ands ip, r1, #0x03 /* Is src also word-aligned? */ 67 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 68 69 /* Quad-align the destination buffer */ 70 tst r3, #0x07 /* Already quad aligned? */ 71 ldrne ip, [r1], #0x04 72 stmfd sp!, {r4-r9} /* Free up some registers */ 73 subne r2, r2, #0x04 74 strne ip, [r3], #0x04 75 76 /* Destination buffer quad aligned, source is at least word aligned */ 77 subs r2, r2, #0x80 78 blt .Lmemcpy_w_lessthan128 79 80 /* Copy 128 bytes at a time */ 81.Lmemcpy_w_loop128: 82 ldr r4, [r1], #0x04 /* LD:00-03 */ 83 ldr r5, [r1], #0x04 /* LD:04-07 */ 84 pld [r1, #0x18] /* Prefetch 0x20 */ 85 ldr r6, [r1], #0x04 /* LD:08-0b */ 86 ldr r7, [r1], #0x04 /* LD:0c-0f */ 87 ldr r8, [r1], #0x04 /* LD:10-13 */ 88 ldr r9, [r1], #0x04 /* LD:14-17 */ 89 strd r4, [r3], #0x08 /* ST:00-07 */ 90 ldr r4, [r1], #0x04 /* LD:18-1b */ 91 ldr r5, [r1], #0x04 /* LD:1c-1f */ 92 strd r6, [r3], #0x08 /* ST:08-0f */ 93 ldr r6, [r1], #0x04 /* LD:20-23 */ 94 ldr r7, [r1], #0x04 /* LD:24-27 */ 95 pld [r1, #0x18] /* Prefetch 0x40 */ 96 strd r8, [r3], #0x08 /* ST:10-17 */ 97 ldr r8, [r1], #0x04 /* LD:28-2b */ 98 ldr r9, [r1], #0x04 /* LD:2c-2f */ 99 strd r4, [r3], #0x08 /* ST:18-1f */ 100 ldr r4, [r1], #0x04 /* LD:30-33 */ 101 ldr r5, [r1], #0x04 /* LD:34-37 */ 102 strd r6, [r3], #0x08 /* ST:20-27 */ 103 ldr r6, [r1], #0x04 /* LD:38-3b */ 104 ldr r7, [r1], #0x04 /* LD:3c-3f */ 105 strd r8, [r3], #0x08 /* ST:28-2f */ 106 ldr r8, [r1], #0x04 /* LD:40-43 */ 107 ldr r9, [r1], #0x04 /* LD:44-47 */ 108 pld [r1, #0x18] /* Prefetch 0x60 */ 109 strd r4, [r3], #0x08 /* ST:30-37 */ 110 ldr r4, [r1], #0x04 /* LD:48-4b */ 111 ldr r5, [r1], #0x04 /* LD:4c-4f */ 112 strd r6, [r3], #0x08 /* ST:38-3f */ 113 ldr r6, [r1], #0x04 /* LD:50-53 */ 114 ldr r7, [r1], #0x04 /* LD:54-57 */ 115 strd r8, [r3], #0x08 /* ST:40-47 */ 116 ldr r8, [r1], #0x04 /* LD:58-5b */ 117 ldr r9, [r1], #0x04 /* LD:5c-5f */ 118 strd r4, [r3], #0x08 /* ST:48-4f */ 119 ldr r4, [r1], #0x04 /* LD:60-63 */ 120 ldr r5, [r1], #0x04 /* LD:64-67 */ 121 pld [r1, #0x18] /* Prefetch 0x80 */ 122 strd r6, [r3], #0x08 /* ST:50-57 */ 123 ldr r6, [r1], #0x04 /* LD:68-6b */ 124 ldr r7, [r1], #0x04 /* LD:6c-6f */ 125 strd r8, [r3], #0x08 /* ST:58-5f */ 126 ldr r8, [r1], #0x04 /* LD:70-73 */ 127 ldr r9, [r1], #0x04 /* LD:74-77 */ 128 strd r4, [r3], #0x08 /* ST:60-67 */ 129 ldr r4, [r1], #0x04 /* LD:78-7b */ 130 ldr r5, [r1], #0x04 /* LD:7c-7f */ 131 strd r6, [r3], #0x08 /* ST:68-6f */ 132 strd r8, [r3], #0x08 /* ST:70-77 */ 133 subs r2, r2, #0x80 134 strd r4, [r3], #0x08 /* ST:78-7f */ 135 bge .Lmemcpy_w_loop128 136 137.Lmemcpy_w_lessthan128: 138 adds r2, r2, #0x80 /* Adjust for extra sub */ 139 ldmfdeq sp!, {r4-r9} 140 bxeq lr /* Return now if done */ 141 subs r2, r2, #0x20 142 blt .Lmemcpy_w_lessthan32 143 144 /* Copy 32 bytes at a time */ 145.Lmemcpy_w_loop32: 146 ldr r4, [r1], #0x04 147 ldr r5, [r1], #0x04 148 pld [r1, #0x18] 149 ldr r6, [r1], #0x04 150 ldr r7, [r1], #0x04 151 ldr r8, [r1], #0x04 152 ldr r9, [r1], #0x04 153 strd r4, [r3], #0x08 154 ldr r4, [r1], #0x04 155 ldr r5, [r1], #0x04 156 strd r6, [r3], #0x08 157 strd r8, [r3], #0x08 158 subs r2, r2, #0x20 159 strd r4, [r3], #0x08 160 bge .Lmemcpy_w_loop32 161 162.Lmemcpy_w_lessthan32: 163 adds r2, r2, #0x20 /* Adjust for extra sub */ 164 ldmfdeq sp!, {r4-r9} 165 bxeq lr /* Return now if done */ 166 167 and r4, r2, #0x18 168 rsbs r4, r4, #0x18 169 addne pc, pc, r4, lsl #1 170 nop 171 172 /* At least 24 bytes remaining */ 173 ldr r4, [r1], #0x04 174 ldr r5, [r1], #0x04 175 sub r2, r2, #0x08 176 strd r4, [r3], #0x08 177 178 /* At least 16 bytes remaining */ 179 ldr r4, [r1], #0x04 180 ldr r5, [r1], #0x04 181 sub r2, r2, #0x08 182 strd r4, [r3], #0x08 183 184 /* At least 8 bytes remaining */ 185 ldr r4, [r1], #0x04 186 ldr r5, [r1], #0x04 187 subs r2, r2, #0x08 188 strd r4, [r3], #0x08 189 190 /* Less than 8 bytes remaining */ 191 ldmfd sp!, {r4-r9} 192 bxeq lr /* Return now if done */ 193 subs r2, r2, #0x04 194 ldrge ip, [r1], #0x04 195 strge ip, [r3], #0x04 196 bxeq lr /* Return now if done */ 197 addlt r2, r2, #0x04 198 ldrb ip, [r1], #0x01 199 cmp r2, #0x02 200 ldrbge r2, [r1], #0x01 201 strb ip, [r3], #0x01 202 ldrbgt ip, [r1] 203 strbge r2, [r3], #0x01 204 strbgt ip, [r3] 205 bx lr 206 207 208/* 209 * At this point, it has not been possible to word align both buffers. 210 * The destination buffer is word aligned, but the source buffer is not. 211 */ 212.Lmemcpy_bad_align: 213 stmfd sp!, {r4-r7} 214 bic r1, r1, #0x03 215 cmp ip, #2 216 ldr ip, [r1], #0x04 217 bgt .Lmemcpy_bad3 218 beq .Lmemcpy_bad2 219 b .Lmemcpy_bad1 220 221.Lmemcpy_bad1_loop16: 222#ifdef __ARMEB__ 223 mov r4, ip, lsl #8 224#else 225 mov r4, ip, lsr #8 226#endif 227 ldr r5, [r1], #0x04 228 pld [r1, #0x018] 229 ldr r6, [r1], #0x04 230 ldr r7, [r1], #0x04 231 ldr ip, [r1], #0x04 232#ifdef __ARMEB__ 233 orr r4, r4, r5, lsr #24 234 mov r5, r5, lsl #8 235 orr r5, r5, r6, lsr #24 236 mov r6, r6, lsl #8 237 orr r6, r6, r7, lsr #24 238 mov r7, r7, lsl #8 239 orr r7, r7, ip, lsr #24 240#else 241 orr r4, r4, r5, lsl #24 242 mov r5, r5, lsr #8 243 orr r5, r5, r6, lsl #24 244 mov r6, r6, lsr #8 245 orr r6, r6, r7, lsl #24 246 mov r7, r7, lsr #8 247 orr r7, r7, ip, lsl #24 248#endif 249 str r4, [r3], #0x04 250 str r5, [r3], #0x04 251 str r6, [r3], #0x04 252 str r7, [r3], #0x04 253.Lmemcpy_bad1: 254 subs r2, r2, #0x10 255 bge .Lmemcpy_bad1_loop16 256 257 adds r2, r2, #0x10 258 ldmfdeq sp!, {r4-r7} 259 bxeq lr /* Return now if done */ 260 subs r2, r2, #0x04 261 sublt r1, r1, #0x03 262 blt .Lmemcpy_bad_done 263 264.Lmemcpy_bad1_loop4: 265#ifdef __ARMEB__ 266 mov r4, ip, lsl #8 267#else 268 mov r4, ip, lsr #8 269#endif 270 ldr ip, [r1], #0x04 271 subs r2, r2, #0x04 272#ifdef __ARMEB__ 273 orr r4, r4, ip, lsr #24 274#else 275 orr r4, r4, ip, lsl #24 276#endif 277 str r4, [r3], #0x04 278 bge .Lmemcpy_bad1_loop4 279 sub r1, r1, #0x03 280 b .Lmemcpy_bad_done 281 282.Lmemcpy_bad2_loop16: 283#ifdef __ARMEB__ 284 mov r4, ip, lsl #16 285#else 286 mov r4, ip, lsr #16 287#endif 288 ldr r5, [r1], #0x04 289 pld [r1, #0x018] 290 ldr r6, [r1], #0x04 291 ldr r7, [r1], #0x04 292 ldr ip, [r1], #0x04 293#ifdef __ARMEB__ 294 orr r4, r4, r5, lsr #16 295 mov r5, r5, lsl #16 296 orr r5, r5, r6, lsr #16 297 mov r6, r6, lsl #16 298 orr r6, r6, r7, lsr #16 299 mov r7, r7, lsl #16 300 orr r7, r7, ip, lsr #16 301#else 302 orr r4, r4, r5, lsl #16 303 mov r5, r5, lsr #16 304 orr r5, r5, r6, lsl #16 305 mov r6, r6, lsr #16 306 orr r6, r6, r7, lsl #16 307 mov r7, r7, lsr #16 308 orr r7, r7, ip, lsl #16 309#endif 310 str r4, [r3], #0x04 311 str r5, [r3], #0x04 312 str r6, [r3], #0x04 313 str r7, [r3], #0x04 314.Lmemcpy_bad2: 315 subs r2, r2, #0x10 316 bge .Lmemcpy_bad2_loop16 317 318 adds r2, r2, #0x10 319 ldmfdeq sp!, {r4-r7} 320 bxeq lr /* Return now if done */ 321 subs r2, r2, #0x04 322 sublt r1, r1, #0x02 323 blt .Lmemcpy_bad_done 324 325.Lmemcpy_bad2_loop4: 326#ifdef __ARMEB__ 327 mov r4, ip, lsl #16 328#else 329 mov r4, ip, lsr #16 330#endif 331 ldr ip, [r1], #0x04 332 subs r2, r2, #0x04 333#ifdef __ARMEB__ 334 orr r4, r4, ip, lsr #16 335#else 336 orr r4, r4, ip, lsl #16 337#endif 338 str r4, [r3], #0x04 339 bge .Lmemcpy_bad2_loop4 340 sub r1, r1, #0x02 341 b .Lmemcpy_bad_done 342 343.Lmemcpy_bad3_loop16: 344#ifdef __ARMEB__ 345 mov r4, ip, lsl #24 346#else 347 mov r4, ip, lsr #24 348#endif 349 ldr r5, [r1], #0x04 350 pld [r1, #0x018] 351 ldr r6, [r1], #0x04 352 ldr r7, [r1], #0x04 353 ldr ip, [r1], #0x04 354#ifdef __ARMEB__ 355 orr r4, r4, r5, lsr #8 356 mov r5, r5, lsl #24 357 orr r5, r5, r6, lsr #8 358 mov r6, r6, lsl #24 359 orr r6, r6, r7, lsr #8 360 mov r7, r7, lsl #24 361 orr r7, r7, ip, lsr #8 362#else 363 orr r4, r4, r5, lsl #8 364 mov r5, r5, lsr #24 365 orr r5, r5, r6, lsl #8 366 mov r6, r6, lsr #24 367 orr r6, r6, r7, lsl #8 368 mov r7, r7, lsr #24 369 orr r7, r7, ip, lsl #8 370#endif 371 str r4, [r3], #0x04 372 str r5, [r3], #0x04 373 str r6, [r3], #0x04 374 str r7, [r3], #0x04 375.Lmemcpy_bad3: 376 subs r2, r2, #0x10 377 bge .Lmemcpy_bad3_loop16 378 379 adds r2, r2, #0x10 380 ldmfdeq sp!, {r4-r7} 381 bxeq lr /* Return now if done */ 382 subs r2, r2, #0x04 383 sublt r1, r1, #0x01 384 blt .Lmemcpy_bad_done 385 386.Lmemcpy_bad3_loop4: 387#ifdef __ARMEB__ 388 mov r4, ip, lsl #24 389#else 390 mov r4, ip, lsr #24 391#endif 392 ldr ip, [r1], #0x04 393 subs r2, r2, #0x04 394#ifdef __ARMEB__ 395 orr r4, r4, ip, lsr #8 396#else 397 orr r4, r4, ip, lsl #8 398#endif 399 str r4, [r3], #0x04 400 bge .Lmemcpy_bad3_loop4 401 sub r1, r1, #0x01 402 403.Lmemcpy_bad_done: 404 ldmfd sp!, {r4-r7} 405 adds r2, r2, #0x04 406 bxeq lr 407 ldrb ip, [r1], #0x01 408 cmp r2, #0x02 409 ldrbge r2, [r1], #0x01 410 strb ip, [r3], #0x01 411 ldrbgt ip, [r1] 412 strbge r2, [r3], #0x01 413 strbgt ip, [r3] 414 bx lr 415 416 417/* 418 * Handle short copies (less than 16 bytes), possibly misaligned. 419 * Some of these are *very* common, thanks to the network stack, 420 * and so are handled specially. 421 */ 422.Lmemcpy_short: 423#ifndef _STANDALONE 424 add pc, pc, r2, lsl #2 425 nop 426 bx lr /* 0x00 */ 427 b .Lmemcpy_bytewise /* 0x01 */ 428 b .Lmemcpy_bytewise /* 0x02 */ 429 b .Lmemcpy_bytewise /* 0x03 */ 430 b .Lmemcpy_4 /* 0x04 */ 431 b .Lmemcpy_bytewise /* 0x05 */ 432 b .Lmemcpy_6 /* 0x06 */ 433 b .Lmemcpy_bytewise /* 0x07 */ 434 b .Lmemcpy_8 /* 0x08 */ 435 b .Lmemcpy_bytewise /* 0x09 */ 436 b .Lmemcpy_bytewise /* 0x0a */ 437 b .Lmemcpy_bytewise /* 0x0b */ 438 b .Lmemcpy_c /* 0x0c */ 439#endif 440.Lmemcpy_bytewise: 441 mov r3, r0 /* We must not clobber r0 */ 442 ldrb ip, [r1], #0x01 4431: subs r2, r2, #0x01 444 strb ip, [r3], #0x01 445 ldrbne ip, [r1], #0x01 446 bne 1b 447 bx lr 448 449#ifndef _STANDALONE 450/****************************************************************************** 451 * Special case for 4 byte copies 452 */ 453#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 454#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 455 LMEMCPY_4_PAD 456.Lmemcpy_4: 457 and r2, r1, #0x03 458 orr r2, r2, r0, lsl #2 459 ands r2, r2, #0x0f 460 sub r3, pc, #0x14 461 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 462 463/* 464 * 0000: dst is 32-bit aligned, src is 32-bit aligned 465 */ 466 ldr r2, [r1] 467 str r2, [r0] 468 bx lr 469 LMEMCPY_4_PAD 470 471/* 472 * 0001: dst is 32-bit aligned, src is 8-bit aligned 473 */ 474 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 475 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 476#ifdef __ARMEB__ 477 mov r3, r3, lsl #8 /* r3 = 012. */ 478 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 479#else 480 mov r3, r3, lsr #8 /* r3 = .210 */ 481 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 482#endif 483 str r3, [r0] 484 bx lr 485 LMEMCPY_4_PAD 486 487/* 488 * 0010: dst is 32-bit aligned, src is 16-bit aligned 489 */ 490#ifdef __ARMEB__ 491 ldrh r3, [r1] 492 ldrh r2, [r1, #0x02] 493#else 494 ldrh r3, [r1, #0x02] 495 ldrh r2, [r1] 496#endif 497 orr r3, r2, r3, lsl #16 498 str r3, [r0] 499 bx lr 500 LMEMCPY_4_PAD 501 502/* 503 * 0011: dst is 32-bit aligned, src is 8-bit aligned 504 */ 505 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 506 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 507#ifdef __ARMEB__ 508 mov r3, r3, lsl #24 /* r3 = 0... */ 509 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 510#else 511 mov r3, r3, lsr #24 /* r3 = ...0 */ 512 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 513#endif 514 str r3, [r0] 515 bx lr 516 LMEMCPY_4_PAD 517 518/* 519 * 0100: dst is 8-bit aligned, src is 32-bit aligned 520 */ 521 ldr r2, [r1] 522#ifdef __ARMEB__ 523 strb r2, [r0, #0x03] 524 mov r3, r2, lsr #8 525 mov r1, r2, lsr #24 526 strb r1, [r0] 527#else 528 strb r2, [r0] 529 mov r3, r2, lsr #8 530 mov r1, r2, lsr #24 531 strb r1, [r0, #0x03] 532#endif 533 strh r3, [r0, #0x01] 534 bx lr 535 LMEMCPY_4_PAD 536 537/* 538 * 0101: dst is 8-bit aligned, src is 8-bit aligned 539 */ 540 ldrb r2, [r1] 541 ldrh r3, [r1, #0x01] 542 ldrb r1, [r1, #0x03] 543 strb r2, [r0] 544 strh r3, [r0, #0x01] 545 strb r1, [r0, #0x03] 546 bx lr 547 LMEMCPY_4_PAD 548 549/* 550 * 0110: dst is 8-bit aligned, src is 16-bit aligned 551 */ 552 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 553 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 554#ifdef __ARMEB__ 555 mov r1, r2, lsr #8 /* r1 = ...0 */ 556 strb r1, [r0] 557 mov r2, r2, lsl #8 /* r2 = .01. */ 558 orr r2, r2, r3, lsr #8 /* r2 = .012 */ 559#else 560 strb r2, [r0] 561 mov r2, r2, lsr #8 /* r2 = ...1 */ 562 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 563 mov r3, r3, lsr #8 /* r3 = ...3 */ 564#endif 565 strh r2, [r0, #0x01] 566 strb r3, [r0, #0x03] 567 bx lr 568 LMEMCPY_4_PAD 569 570/* 571 * 0111: dst is 8-bit aligned, src is 8-bit aligned 572 */ 573 ldrb r2, [r1] 574 ldrh r3, [r1, #0x01] 575 ldrb r1, [r1, #0x03] 576 strb r2, [r0] 577 strh r3, [r0, #0x01] 578 strb r1, [r0, #0x03] 579 bx lr 580 LMEMCPY_4_PAD 581 582/* 583 * 1000: dst is 16-bit aligned, src is 32-bit aligned 584 */ 585 ldr r2, [r1] 586#ifdef __ARMEB__ 587 strh r2, [r0, #0x02] 588 mov r3, r2, lsr #16 589 strh r3, [r0] 590#else 591 strh r2, [r0] 592 mov r3, r2, lsr #16 593 strh r3, [r0, #0x02] 594#endif 595 bx lr 596 LMEMCPY_4_PAD 597 598/* 599 * 1001: dst is 16-bit aligned, src is 8-bit aligned 600 */ 601 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 602 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 603 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 604 strh r1, [r0] 605#ifdef __ARMEB__ 606 mov r2, r2, lsl #8 /* r2 = 012. */ 607 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 608#else 609 mov r2, r2, lsr #24 /* r2 = ...2 */ 610 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 611#endif 612 strh r2, [r0, #0x02] 613 bx lr 614 LMEMCPY_4_PAD 615 616/* 617 * 1010: dst is 16-bit aligned, src is 16-bit aligned 618 */ 619 ldrh r2, [r1] 620 ldrh r3, [r1, #0x02] 621 strh r2, [r0] 622 strh r3, [r0, #0x02] 623 bx lr 624 LMEMCPY_4_PAD 625 626/* 627 * 1011: dst is 16-bit aligned, src is 8-bit aligned 628 */ 629 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 630 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 631 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 632 strh r1, [r0, #0x02] 633#ifdef __ARMEB__ 634 mov r3, r3, lsr #24 /* r3 = ...1 */ 635 orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 636#else 637 mov r3, r3, lsl #8 /* r3 = 321. */ 638 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 639#endif 640 strh r3, [r0] 641 bx lr 642 LMEMCPY_4_PAD 643 644/* 645 * 1100: dst is 8-bit aligned, src is 32-bit aligned 646 */ 647 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 648#ifdef __ARMEB__ 649 strb r2, [r0, #0x03] 650 mov r3, r2, lsr #8 651 mov r1, r2, lsr #24 652 strh r3, [r0, #0x01] 653 strb r1, [r0] 654#else 655 strb r2, [r0] 656 mov r3, r2, lsr #8 657 mov r1, r2, lsr #24 658 strh r3, [r0, #0x01] 659 strb r1, [r0, #0x03] 660#endif 661 bx lr 662 LMEMCPY_4_PAD 663 664/* 665 * 1101: dst is 8-bit aligned, src is 8-bit aligned 666 */ 667 ldrb r2, [r1] 668 ldrh r3, [r1, #0x01] 669 ldrb r1, [r1, #0x03] 670 strb r2, [r0] 671 strh r3, [r0, #0x01] 672 strb r1, [r0, #0x03] 673 bx lr 674 LMEMCPY_4_PAD 675 676/* 677 * 1110: dst is 8-bit aligned, src is 16-bit aligned 678 */ 679#ifdef __ARMEB__ 680 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 681 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 682 strb r3, [r0, #0x03] 683 mov r3, r3, lsr #8 /* r3 = ...2 */ 684 orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 685 strh r3, [r0, #0x01] 686 mov r2, r2, lsr #8 /* r2 = ...0 */ 687 strb r2, [r0] 688#else 689 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 690 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 691 strb r2, [r0] 692 mov r2, r2, lsr #8 /* r2 = ...1 */ 693 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 694 strh r2, [r0, #0x01] 695 mov r3, r3, lsr #8 /* r3 = ...3 */ 696 strb r3, [r0, #0x03] 697#endif 698 bx lr 699 LMEMCPY_4_PAD 700 701/* 702 * 1111: dst is 8-bit aligned, src is 8-bit aligned 703 */ 704 ldrb r2, [r1] 705 ldrh r3, [r1, #0x01] 706 ldrb r1, [r1, #0x03] 707 strb r2, [r0] 708 strh r3, [r0, #0x01] 709 strb r1, [r0, #0x03] 710 bx lr 711 LMEMCPY_4_PAD 712 713 714/****************************************************************************** 715 * Special case for 6 byte copies 716 */ 717#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 718#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 719 LMEMCPY_6_PAD 720.Lmemcpy_6: 721 and r2, r1, #0x03 722 orr r2, r2, r0, lsl #2 723 ands r2, r2, #0x0f 724 sub r3, pc, #0x14 725 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 726 727/* 728 * 0000: dst is 32-bit aligned, src is 32-bit aligned 729 */ 730 ldr r2, [r1] 731 ldrh r3, [r1, #0x04] 732 str r2, [r0] 733 strh r3, [r0, #0x04] 734 bx lr 735 LMEMCPY_6_PAD 736 737/* 738 * 0001: dst is 32-bit aligned, src is 8-bit aligned 739 */ 740 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 741 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 742#ifdef __ARMEB__ 743 mov r2, r2, lsl #8 /* r2 = 012. */ 744 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 745#else 746 mov r2, r2, lsr #8 /* r2 = .210 */ 747 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 748#endif 749 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 750 str r2, [r0] 751 strh r3, [r0, #0x04] 752 bx lr 753 LMEMCPY_6_PAD 754 755/* 756 * 0010: dst is 32-bit aligned, src is 16-bit aligned 757 */ 758 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 759 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 760#ifdef __ARMEB__ 761 mov r1, r3, lsr #16 /* r1 = ..23 */ 762 orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 763 str r1, [r0] 764 strh r3, [r0, #0x04] 765#else 766 mov r1, r3, lsr #16 /* r1 = ..54 */ 767 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 768 str r2, [r0] 769 strh r1, [r0, #0x04] 770#endif 771 bx lr 772 LMEMCPY_6_PAD 773 774/* 775 * 0011: dst is 32-bit aligned, src is 8-bit aligned 776 */ 777 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 778 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 779 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 780#ifdef __ARMEB__ 781 mov r2, r2, lsl #24 /* r2 = 0... */ 782 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 783 mov r3, r3, lsl #8 /* r3 = 234. */ 784 orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 785#else 786 mov r2, r2, lsr #24 /* r2 = ...0 */ 787 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 788 mov r1, r1, lsl #8 /* r1 = xx5. */ 789 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 790#endif 791 str r2, [r0] 792 strh r1, [r0, #0x04] 793 bx lr 794 LMEMCPY_6_PAD 795 796/* 797 * 0100: dst is 8-bit aligned, src is 32-bit aligned 798 */ 799 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 800 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 801 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 802 strh r1, [r0, #0x01] 803#ifdef __ARMEB__ 804 mov r1, r3, lsr #24 /* r1 = ...0 */ 805 strb r1, [r0] 806 mov r3, r3, lsl #8 /* r3 = 123. */ 807 orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 808#else 809 strb r3, [r0] 810 mov r3, r3, lsr #24 /* r3 = ...3 */ 811 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 812 mov r2, r2, lsr #8 /* r2 = ...5 */ 813#endif 814 strh r3, [r0, #0x03] 815 strb r2, [r0, #0x05] 816 bx lr 817 LMEMCPY_6_PAD 818 819/* 820 * 0101: dst is 8-bit aligned, src is 8-bit aligned 821 */ 822 ldrb r2, [r1] 823 ldrh r3, [r1, #0x01] 824 ldrh ip, [r1, #0x03] 825 ldrb r1, [r1, #0x05] 826 strb r2, [r0] 827 strh r3, [r0, #0x01] 828 strh ip, [r0, #0x03] 829 strb r1, [r0, #0x05] 830 bx lr 831 LMEMCPY_6_PAD 832 833/* 834 * 0110: dst is 8-bit aligned, src is 16-bit aligned 835 */ 836 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 837 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 838#ifdef __ARMEB__ 839 mov r3, r2, lsr #8 /* r3 = ...0 */ 840 strb r3, [r0] 841 strb r1, [r0, #0x05] 842 mov r3, r1, lsr #8 /* r3 = .234 */ 843 strh r3, [r0, #0x03] 844 mov r3, r2, lsl #8 /* r3 = .01. */ 845 orr r3, r3, r1, lsr #24 /* r3 = .012 */ 846 strh r3, [r0, #0x01] 847#else 848 strb r2, [r0] 849 mov r3, r1, lsr #24 850 strb r3, [r0, #0x05] 851 mov r3, r1, lsr #8 /* r3 = .543 */ 852 strh r3, [r0, #0x03] 853 mov r3, r2, lsr #8 /* r3 = ...1 */ 854 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 855 strh r3, [r0, #0x01] 856#endif 857 bx lr 858 LMEMCPY_6_PAD 859 860/* 861 * 0111: dst is 8-bit aligned, src is 8-bit aligned 862 */ 863 ldrb r2, [r1] 864 ldrh r3, [r1, #0x01] 865 ldrh ip, [r1, #0x03] 866 ldrb r1, [r1, #0x05] 867 strb r2, [r0] 868 strh r3, [r0, #0x01] 869 strh ip, [r0, #0x03] 870 strb r1, [r0, #0x05] 871 bx lr 872 LMEMCPY_6_PAD 873 874/* 875 * 1000: dst is 16-bit aligned, src is 32-bit aligned 876 */ 877#ifdef __ARMEB__ 878 ldr r2, [r1] /* r2 = 0123 */ 879 ldrh r3, [r1, #0x04] /* r3 = ..45 */ 880 mov r1, r2, lsr #16 /* r1 = ..01 */ 881 orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 882 strh r1, [r0] 883 str r3, [r0, #0x02] 884#else 885 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 886 ldr r3, [r1] /* r3 = 3210 */ 887 mov r2, r2, lsl #16 /* r2 = 54.. */ 888 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 889 strh r3, [r0] 890 str r2, [r0, #0x02] 891#endif 892 bx lr 893 LMEMCPY_6_PAD 894 895/* 896 * 1001: dst is 16-bit aligned, src is 8-bit aligned 897 */ 898 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 899 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 900 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 901#ifdef __ARMEB__ 902 mov r2, r2, lsr #8 /* r2 = .345 */ 903 orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 904#else 905 mov r2, r2, lsl #8 /* r2 = 543. */ 906 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 907#endif 908 strh r1, [r0] 909 str r2, [r0, #0x02] 910 bx lr 911 LMEMCPY_6_PAD 912 913/* 914 * 1010: dst is 16-bit aligned, src is 16-bit aligned 915 */ 916 ldrh r2, [r1] 917 ldr r3, [r1, #0x02] 918 strh r2, [r0] 919 str r3, [r0, #0x02] 920 bx lr 921 LMEMCPY_6_PAD 922 923/* 924 * 1011: dst is 16-bit aligned, src is 8-bit aligned 925 */ 926 ldrb r3, [r1] /* r3 = ...0 */ 927 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 928 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 929#ifdef __ARMEB__ 930 mov r3, r3, lsl #8 /* r3 = ..0. */ 931 orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 932 orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 933#else 934 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 935 mov r1, r1, lsl #24 /* r1 = 5... */ 936 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 937#endif 938 strh r3, [r0] 939 str r1, [r0, #0x02] 940 bx lr 941 LMEMCPY_6_PAD 942 943/* 944 * 1100: dst is 8-bit aligned, src is 32-bit aligned 945 */ 946 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 947 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 948#ifdef __ARMEB__ 949 mov r3, r2, lsr #24 /* r3 = ...0 */ 950 strb r3, [r0] 951 mov r2, r2, lsl #8 /* r2 = 123. */ 952 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 953#else 954 strb r2, [r0] 955 mov r2, r2, lsr #8 /* r2 = .321 */ 956 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 957 mov r1, r1, lsr #8 /* r1 = ...5 */ 958#endif 959 str r2, [r0, #0x01] 960 strb r1, [r0, #0x05] 961 bx lr 962 LMEMCPY_6_PAD 963 964/* 965 * 1101: dst is 8-bit aligned, src is 8-bit aligned 966 */ 967 ldrb r2, [r1] 968 ldrh r3, [r1, #0x01] 969 ldrh ip, [r1, #0x03] 970 ldrb r1, [r1, #0x05] 971 strb r2, [r0] 972 strh r3, [r0, #0x01] 973 strh ip, [r0, #0x03] 974 strb r1, [r0, #0x05] 975 bx lr 976 LMEMCPY_6_PAD 977 978/* 979 * 1110: dst is 8-bit aligned, src is 16-bit aligned 980 */ 981 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 982 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 983#ifdef __ARMEB__ 984 mov r3, r2, lsr #8 /* r3 = ...0 */ 985 strb r3, [r0] 986 mov r2, r2, lsl #24 /* r2 = 1... */ 987 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 988#else 989 strb r2, [r0] 990 mov r2, r2, lsr #8 /* r2 = ...1 */ 991 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 992 mov r1, r1, lsr #24 /* r1 = ...5 */ 993#endif 994 str r2, [r0, #0x01] 995 strb r1, [r0, #0x05] 996 bx lr 997 LMEMCPY_6_PAD 998 999/* 1000 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1001 */ 1002 ldrb r2, [r1] 1003 ldr r3, [r1, #0x01] 1004 ldrb r1, [r1, #0x05] 1005 strb r2, [r0] 1006 str r3, [r0, #0x01] 1007 strb r1, [r0, #0x05] 1008 bx lr 1009 LMEMCPY_6_PAD 1010 1011 1012/****************************************************************************** 1013 * Special case for 8 byte copies 1014 */ 1015#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 1016#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 1017 LMEMCPY_8_PAD 1018.Lmemcpy_8: 1019 and r2, r1, #0x03 1020 orr r2, r2, r0, lsl #2 1021 ands r2, r2, #0x0f 1022 sub r3, pc, #0x14 1023 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 1024 1025/* 1026 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1027 */ 1028 ldr r2, [r1] 1029 ldr r3, [r1, #0x04] 1030 str r2, [r0] 1031 str r3, [r0, #0x04] 1032 bx lr 1033 LMEMCPY_8_PAD 1034 1035/* 1036 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1037 */ 1038 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1039 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 1040 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1041#ifdef __ARMEB__ 1042 mov r3, r3, lsl #8 /* r3 = 012. */ 1043 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1044 orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 1045#else 1046 mov r3, r3, lsr #8 /* r3 = .210 */ 1047 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1048 mov r1, r1, lsl #24 /* r1 = 7... */ 1049 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 1050#endif 1051 str r3, [r0] 1052 str r2, [r0, #0x04] 1053 bx lr 1054 LMEMCPY_8_PAD 1055 1056/* 1057 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1058 */ 1059 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1060 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1061 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1062#ifdef __ARMEB__ 1063 mov r2, r2, lsl #16 /* r2 = 01.. */ 1064 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 1065 orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 1066#else 1067 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1068 mov r3, r3, lsr #16 /* r3 = ..54 */ 1069 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 1070#endif 1071 str r2, [r0] 1072 str r3, [r0, #0x04] 1073 bx lr 1074 LMEMCPY_8_PAD 1075 1076/* 1077 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1078 */ 1079 ldrb r3, [r1] /* r3 = ...0 */ 1080 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1081 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 1082#ifdef __ARMEB__ 1083 mov r3, r3, lsl #24 /* r3 = 0... */ 1084 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1085 mov r2, r2, lsl #24 /* r2 = 4... */ 1086 orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 1087#else 1088 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1089 mov r2, r2, lsr #24 /* r2 = ...4 */ 1090 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 1091#endif 1092 str r3, [r0] 1093 str r2, [r0, #0x04] 1094 bx lr 1095 LMEMCPY_8_PAD 1096 1097/* 1098 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1099 */ 1100 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1101 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 1102#ifdef __ARMEB__ 1103 mov r1, r3, lsr #24 /* r1 = ...0 */ 1104 strb r1, [r0] 1105 mov r1, r3, lsr #8 /* r1 = .012 */ 1106 strb r2, [r0, #0x07] 1107 mov r3, r3, lsl #24 /* r3 = 3... */ 1108 orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 1109#else 1110 strb r3, [r0] 1111 mov r1, r2, lsr #24 /* r1 = ...7 */ 1112 strb r1, [r0, #0x07] 1113 mov r1, r3, lsr #8 /* r1 = .321 */ 1114 mov r3, r3, lsr #24 /* r3 = ...3 */ 1115 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 1116#endif 1117 strh r1, [r0, #0x01] 1118 str r3, [r0, #0x03] 1119 bx lr 1120 LMEMCPY_8_PAD 1121 1122/* 1123 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1124 */ 1125 ldrb r2, [r1] 1126 ldrh r3, [r1, #0x01] 1127 ldr ip, [r1, #0x03] 1128 ldrb r1, [r1, #0x07] 1129 strb r2, [r0] 1130 strh r3, [r0, #0x01] 1131 str ip, [r0, #0x03] 1132 strb r1, [r0, #0x07] 1133 bx lr 1134 LMEMCPY_8_PAD 1135 1136/* 1137 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1138 */ 1139 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1140 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1141 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1142#ifdef __ARMEB__ 1143 mov ip, r2, lsr #8 /* ip = ...0 */ 1144 strb ip, [r0] 1145 mov ip, r2, lsl #8 /* ip = .01. */ 1146 orr ip, ip, r3, lsr #24 /* ip = .012 */ 1147 strb r1, [r0, #0x07] 1148 mov r3, r3, lsl #8 /* r3 = 345. */ 1149 orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 1150#else 1151 strb r2, [r0] /* 0 */ 1152 mov ip, r1, lsr #8 /* ip = ...7 */ 1153 strb ip, [r0, #0x07] /* 7 */ 1154 mov ip, r2, lsr #8 /* ip = ...1 */ 1155 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1156 mov r3, r3, lsr #8 /* r3 = .543 */ 1157 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 1158#endif 1159 strh ip, [r0, #0x01] 1160 str r3, [r0, #0x03] 1161 bx lr 1162 LMEMCPY_8_PAD 1163 1164/* 1165 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1166 */ 1167 ldrb r3, [r1] /* r3 = ...0 */ 1168 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1169 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 1170 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1171 strb r3, [r0] 1172 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 1173#ifdef __ARMEB__ 1174 strh r3, [r0, #0x01] 1175 orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 1176#else 1177 strh ip, [r0, #0x01] 1178 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 1179#endif 1180 str r2, [r0, #0x03] 1181 strb r1, [r0, #0x07] 1182 bx lr 1183 LMEMCPY_8_PAD 1184 1185/* 1186 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1187 */ 1188 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1189 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1190 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1191#ifdef __ARMEB__ 1192 strh r1, [r0] 1193 mov r1, r3, lsr #16 /* r1 = ..45 */ 1194 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 1195#else 1196 strh r2, [r0] 1197 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 1198 mov r3, r3, lsr #16 /* r3 = ..76 */ 1199#endif 1200 str r2, [r0, #0x02] 1201 strh r3, [r0, #0x06] 1202 bx lr 1203 LMEMCPY_8_PAD 1204 1205/* 1206 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1207 */ 1208 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1209 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1210 ldrb ip, [r1, #0x07] /* ip = ...7 */ 1211 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1212 strh r1, [r0] 1213#ifdef __ARMEB__ 1214 mov r1, r2, lsl #24 /* r1 = 2... */ 1215 orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 1216 orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 1217#else 1218 mov r1, r2, lsr #24 /* r1 = ...2 */ 1219 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 1220 mov r3, r3, lsr #24 /* r3 = ...6 */ 1221 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 1222#endif 1223 str r1, [r0, #0x02] 1224 strh r3, [r0, #0x06] 1225 bx lr 1226 LMEMCPY_8_PAD 1227 1228/* 1229 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1230 */ 1231 ldrh r2, [r1] 1232 ldr ip, [r1, #0x02] 1233 ldrh r3, [r1, #0x06] 1234 strh r2, [r0] 1235 str ip, [r0, #0x02] 1236 strh r3, [r0, #0x06] 1237 bx lr 1238 LMEMCPY_8_PAD 1239 1240/* 1241 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1242 */ 1243 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 1244 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1245 ldrb ip, [r1] /* ip = ...0 */ 1246 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 1247 strh r1, [r0, #0x06] 1248#ifdef __ARMEB__ 1249 mov r3, r3, lsr #24 /* r3 = ...5 */ 1250 orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 1251 mov r2, r2, lsr #24 /* r2 = ...1 */ 1252 orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 1253#else 1254 mov r3, r3, lsl #24 /* r3 = 5... */ 1255 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 1256 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 1257#endif 1258 str r3, [r0, #0x02] 1259 strh r2, [r0] 1260 bx lr 1261 LMEMCPY_8_PAD 1262 1263/* 1264 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1265 */ 1266 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1267 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1268 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 1269 strh r1, [r0, #0x05] 1270#ifdef __ARMEB__ 1271 strb r3, [r0, #0x07] 1272 mov r1, r2, lsr #24 /* r1 = ...0 */ 1273 strb r1, [r0] 1274 mov r2, r2, lsl #8 /* r2 = 123. */ 1275 orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 1276 str r2, [r0, #0x01] 1277#else 1278 strb r2, [r0] 1279 mov r1, r3, lsr #24 /* r1 = ...7 */ 1280 strb r1, [r0, #0x07] 1281 mov r2, r2, lsr #8 /* r2 = .321 */ 1282 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 1283 str r2, [r0, #0x01] 1284#endif 1285 bx lr 1286 LMEMCPY_8_PAD 1287 1288/* 1289 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1290 */ 1291 ldrb r3, [r1] /* r3 = ...0 */ 1292 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 1293 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1294 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1295 strb r3, [r0] 1296 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 1297#ifdef __ARMEB__ 1298 strh ip, [r0, #0x05] 1299 orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 1300#else 1301 strh r3, [r0, #0x05] 1302 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 1303#endif 1304 str r2, [r0, #0x01] 1305 strb r1, [r0, #0x07] 1306 bx lr 1307 LMEMCPY_8_PAD 1308 1309/* 1310 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1311 */ 1312 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1313 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1314 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1315#ifdef __ARMEB__ 1316 mov ip, r2, lsr #8 /* ip = ...0 */ 1317 strb ip, [r0] 1318 mov ip, r2, lsl #24 /* ip = 1... */ 1319 orr ip, ip, r3, lsr #8 /* ip = 1234 */ 1320 strb r1, [r0, #0x07] 1321 mov r1, r1, lsr #8 /* r1 = ...6 */ 1322 orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 1323#else 1324 strb r2, [r0] 1325 mov ip, r2, lsr #8 /* ip = ...1 */ 1326 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1327 mov r2, r1, lsr #8 /* r2 = ...7 */ 1328 strb r2, [r0, #0x07] 1329 mov r1, r1, lsl #8 /* r1 = .76. */ 1330 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 1331#endif 1332 str ip, [r0, #0x01] 1333 strh r1, [r0, #0x05] 1334 bx lr 1335 LMEMCPY_8_PAD 1336 1337/* 1338 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1339 */ 1340 ldrb r2, [r1] 1341 ldr ip, [r1, #0x01] 1342 ldrh r3, [r1, #0x05] 1343 ldrb r1, [r1, #0x07] 1344 strb r2, [r0] 1345 str ip, [r0, #0x01] 1346 strh r3, [r0, #0x05] 1347 strb r1, [r0, #0x07] 1348 bx lr 1349 LMEMCPY_8_PAD 1350 1351/****************************************************************************** 1352 * Special case for 12 byte copies 1353 */ 1354#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 1355#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 1356 LMEMCPY_C_PAD 1357.Lmemcpy_c: 1358 and r2, r1, #0x03 1359 orr r2, r2, r0, lsl #2 1360 ands r2, r2, #0x0f 1361 sub r3, pc, #0x14 1362 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 1363 1364/* 1365 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1366 */ 1367 ldr r2, [r1] 1368 ldr r3, [r1, #0x04] 1369 ldr r1, [r1, #0x08] 1370 str r2, [r0] 1371 str r3, [r0, #0x04] 1372 str r1, [r0, #0x08] 1373 bx lr 1374 LMEMCPY_C_PAD 1375 1376/* 1377 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1378 */ 1379 ldrb r2, [r1, #0xb] /* r2 = ...B */ 1380 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1381 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1382 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1383#ifdef __ARMEB__ 1384 orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 1385 str r2, [r0, #0x08] 1386 mov r2, ip, lsr #24 /* r2 = ...7 */ 1387 orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 1388 mov r1, r1, lsl #8 /* r1 = 012. */ 1389 orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 1390#else 1391 mov r2, r2, lsl #24 /* r2 = B... */ 1392 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 1393 str r2, [r0, #0x08] 1394 mov r2, ip, lsl #24 /* r2 = 7... */ 1395 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 1396 mov r1, r1, lsr #8 /* r1 = .210 */ 1397 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 1398#endif 1399 str r2, [r0, #0x04] 1400 str r1, [r0] 1401 bx lr 1402 LMEMCPY_C_PAD 1403 1404/* 1405 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1406 */ 1407 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1408 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1409 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1410 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1411#ifdef __ARMEB__ 1412 mov r2, r2, lsl #16 /* r2 = 01.. */ 1413 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 1414 str r2, [r0] 1415 mov r3, r3, lsl #16 /* r3 = 45.. */ 1416 orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 1417 orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 1418#else 1419 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1420 str r2, [r0] 1421 mov r3, r3, lsr #16 /* r3 = ..54 */ 1422 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 1423 mov r1, r1, lsl #16 /* r1 = BA.. */ 1424 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 1425#endif 1426 str r3, [r0, #0x04] 1427 str r1, [r0, #0x08] 1428 bx lr 1429 LMEMCPY_C_PAD 1430 1431/* 1432 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1433 */ 1434 ldrb r2, [r1] /* r2 = ...0 */ 1435 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1436 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1437 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1438#ifdef __ARMEB__ 1439 mov r2, r2, lsl #24 /* r2 = 0... */ 1440 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1441 str r2, [r0] 1442 mov r3, r3, lsl #24 /* r3 = 4... */ 1443 orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 1444 mov r1, r1, lsr #8 /* r1 = .9AB */ 1445 orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 1446#else 1447 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1448 str r2, [r0] 1449 mov r3, r3, lsr #24 /* r3 = ...4 */ 1450 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 1451 mov r1, r1, lsl #8 /* r1 = BA9. */ 1452 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 1453#endif 1454 str r3, [r0, #0x04] 1455 str r1, [r0, #0x08] 1456 bx lr 1457 LMEMCPY_C_PAD 1458 1459/* 1460 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 1461 */ 1462 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1463 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1464 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 1465 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1466 strh r1, [r0, #0x01] 1467#ifdef __ARMEB__ 1468 mov r1, r2, lsr #24 /* r1 = ...0 */ 1469 strb r1, [r0] 1470 mov r1, r2, lsl #24 /* r1 = 3... */ 1471 orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 1472 mov r1, r3, lsl #24 /* r1 = 7... */ 1473 orr r1, r1, ip, lsr #8 /* r1 = 789A */ 1474#else 1475 strb r2, [r0] 1476 mov r1, r2, lsr #24 /* r1 = ...3 */ 1477 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 1478 mov r1, r3, lsr #24 /* r1 = ...7 */ 1479 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 1480 mov ip, ip, lsr #24 /* ip = ...B */ 1481#endif 1482 str r2, [r0, #0x03] 1483 str r1, [r0, #0x07] 1484 strb ip, [r0, #0x0b] 1485 bx lr 1486 LMEMCPY_C_PAD 1487 1488/* 1489 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 1490 */ 1491 ldrb r2, [r1] 1492 ldrh r3, [r1, #0x01] 1493 ldr ip, [r1, #0x03] 1494 strb r2, [r0] 1495 ldr r2, [r1, #0x07] 1496 ldrb r1, [r1, #0x0b] 1497 strh r3, [r0, #0x01] 1498 str ip, [r0, #0x03] 1499 str r2, [r0, #0x07] 1500 strb r1, [r0, #0x0b] 1501 bx lr 1502 LMEMCPY_C_PAD 1503 1504/* 1505 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 1506 */ 1507 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1508 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1509 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1510 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1511#ifdef __ARMEB__ 1512 mov r2, r2, ror #8 /* r2 = 1..0 */ 1513 strb r2, [r0] 1514 mov r2, r2, lsr #16 /* r2 = ..1. */ 1515 orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 1516 strh r2, [r0, #0x01] 1517 mov r2, r3, lsl #8 /* r2 = 345. */ 1518 orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 1519 mov r2, ip, lsl #8 /* r2 = 789. */ 1520 orr r2, r2, r1, lsr #8 /* r2 = 789A */ 1521#else 1522 strb r2, [r0] 1523 mov r2, r2, lsr #8 /* r2 = ...1 */ 1524 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1525 strh r2, [r0, #0x01] 1526 mov r2, r3, lsr #8 /* r2 = .543 */ 1527 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 1528 mov r2, ip, lsr #8 /* r2 = .987 */ 1529 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 1530 mov r1, r1, lsr #8 /* r1 = ...B */ 1531#endif 1532 str r3, [r0, #0x03] 1533 str r2, [r0, #0x07] 1534 strb r1, [r0, #0x0b] 1535 bx lr 1536 LMEMCPY_C_PAD 1537 1538/* 1539 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 1540 */ 1541 ldrb r2, [r1] 1542 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1543 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1544 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1545 strb r2, [r0] 1546#ifdef __ARMEB__ 1547 mov r2, r3, lsr #16 /* r2 = ..12 */ 1548 strh r2, [r0, #0x01] 1549 mov r3, r3, lsl #16 /* r3 = 34.. */ 1550 orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 1551 mov ip, ip, lsl #16 /* ip = 78.. */ 1552 orr ip, ip, r1, lsr #16 /* ip = 789A */ 1553 mov r1, r1, lsr #8 /* r1 = .9AB */ 1554#else 1555 strh r3, [r0, #0x01] 1556 mov r3, r3, lsr #16 /* r3 = ..43 */ 1557 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 1558 mov ip, ip, lsr #16 /* ip = ..87 */ 1559 orr ip, ip, r1, lsl #16 /* ip = A987 */ 1560 mov r1, r1, lsr #16 /* r1 = ..xB */ 1561#endif 1562 str r3, [r0, #0x03] 1563 str ip, [r0, #0x07] 1564 strb r1, [r0, #0x0b] 1565 bx lr 1566 LMEMCPY_C_PAD 1567 1568/* 1569 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1570 */ 1571 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 1572 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1573 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 1574 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1575#ifdef __ARMEB__ 1576 strh r1, [r0] 1577 mov r1, ip, lsl #16 /* r1 = 23.. */ 1578 orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 1579 mov r3, r3, lsl #16 /* r3 = 67.. */ 1580 orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 1581#else 1582 strh ip, [r0] 1583 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 1584 mov r3, r3, lsr #16 /* r3 = ..76 */ 1585 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 1586 mov r2, r2, lsr #16 /* r2 = ..BA */ 1587#endif 1588 str r1, [r0, #0x02] 1589 str r3, [r0, #0x06] 1590 strh r2, [r0, #0x0a] 1591 bx lr 1592 LMEMCPY_C_PAD 1593 1594/* 1595 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 1596 */ 1597 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1598 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1599 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 1600 strh ip, [r0] 1601 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1602 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 1603#ifdef __ARMEB__ 1604 mov r2, r2, lsl #24 /* r2 = 2... */ 1605 orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 1606 mov r3, r3, lsl #24 /* r3 = 6... */ 1607 orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 1608 orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 1609#else 1610 mov r2, r2, lsr #24 /* r2 = ...2 */ 1611 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 1612 mov r3, r3, lsr #24 /* r3 = ...6 */ 1613 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 1614 mov r1, r1, lsl #8 /* r1 = ..B. */ 1615 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 1616#endif 1617 str r2, [r0, #0x02] 1618 str r3, [r0, #0x06] 1619 strh r1, [r0, #0x0a] 1620 bx lr 1621 LMEMCPY_C_PAD 1622 1623/* 1624 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1625 */ 1626 ldrh r2, [r1] 1627 ldr r3, [r1, #0x02] 1628 ldr ip, [r1, #0x06] 1629 ldrh r1, [r1, #0x0a] 1630 strh r2, [r0] 1631 str r3, [r0, #0x02] 1632 str ip, [r0, #0x06] 1633 strh r1, [r0, #0x0a] 1634 bx lr 1635 LMEMCPY_C_PAD 1636 1637/* 1638 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 1639 */ 1640 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 1641 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 1642 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 1643 strh ip, [r0, #0x0a] 1644 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1645 ldrb r1, [r1] /* r1 = ...0 */ 1646#ifdef __ARMEB__ 1647 mov r2, r2, lsr #24 /* r2 = ...9 */ 1648 orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 1649 mov r3, r3, lsr #24 /* r3 = ...5 */ 1650 orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 1651 mov r1, r1, lsl #8 /* r1 = ..0. */ 1652 orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 1653#else 1654 mov r2, r2, lsl #24 /* r2 = 9... */ 1655 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 1656 mov r3, r3, lsl #24 /* r3 = 5... */ 1657 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 1658 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 1659#endif 1660 str r2, [r0, #0x06] 1661 str r3, [r0, #0x02] 1662 strh r1, [r0] 1663 bx lr 1664 LMEMCPY_C_PAD 1665 1666/* 1667 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 1668 */ 1669 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1670 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 1671 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 1672#ifdef __ARMEB__ 1673 mov r3, r2, lsr #24 /* r3 = ...0 */ 1674 strb r3, [r0] 1675 mov r2, r2, lsl #8 /* r2 = 123. */ 1676 orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 1677 str r2, [r0, #0x01] 1678 mov r2, ip, lsl #8 /* r2 = 567. */ 1679 orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 1680 str r2, [r0, #0x05] 1681 mov r2, r1, lsr #8 /* r2 = ..9A */ 1682 strh r2, [r0, #0x09] 1683 strb r1, [r0, #0x0b] 1684#else 1685 strb r2, [r0] 1686 mov r3, r2, lsr #8 /* r3 = .321 */ 1687 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 1688 str r3, [r0, #0x01] 1689 mov r3, ip, lsr #8 /* r3 = .765 */ 1690 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 1691 str r3, [r0, #0x05] 1692 mov r1, r1, lsr #8 /* r1 = .BA9 */ 1693 strh r1, [r0, #0x09] 1694 mov r1, r1, lsr #16 /* r1 = ...B */ 1695 strb r1, [r0, #0x0b] 1696#endif 1697 bx lr 1698 LMEMCPY_C_PAD 1699 1700/* 1701 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 1702 */ 1703 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 1704 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 1705 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1706 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1707 strb r2, [r0, #0x0b] 1708#ifdef __ARMEB__ 1709 strh r3, [r0, #0x09] 1710 mov r3, r3, lsr #16 /* r3 = ..78 */ 1711 orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 1712 mov ip, ip, lsr #16 /* ip = ..34 */ 1713 orr ip, ip, r1, lsl #16 /* ip = 1234 */ 1714 mov r1, r1, lsr #16 /* r1 = ..x0 */ 1715#else 1716 mov r2, r3, lsr #16 /* r2 = ..A9 */ 1717 strh r2, [r0, #0x09] 1718 mov r3, r3, lsl #16 /* r3 = 87.. */ 1719 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 1720 mov ip, ip, lsl #16 /* ip = 43.. */ 1721 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 1722 mov r1, r1, lsr #8 /* r1 = .210 */ 1723#endif 1724 str r3, [r0, #0x05] 1725 str ip, [r0, #0x01] 1726 strb r1, [r0] 1727 bx lr 1728 LMEMCPY_C_PAD 1729 1730/* 1731 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 1732 */ 1733#ifdef __ARMEB__ 1734 ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 1735 ldr ip, [r1, #0x06] /* ip = 6789 */ 1736 ldr r3, [r1, #0x02] /* r3 = 2345 */ 1737 ldrh r1, [r1] /* r1 = ..01 */ 1738 strb r2, [r0, #0x0b] 1739 mov r2, r2, lsr #8 /* r2 = ...A */ 1740 orr r2, r2, ip, lsl #8 /* r2 = 789A */ 1741 mov ip, ip, lsr #8 /* ip = .678 */ 1742 orr ip, ip, r3, lsl #24 /* ip = 5678 */ 1743 mov r3, r3, lsr #8 /* r3 = .234 */ 1744 orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 1745 mov r1, r1, lsr #8 /* r1 = ...0 */ 1746 strb r1, [r0] 1747 str r3, [r0, #0x01] 1748 str ip, [r0, #0x05] 1749 strh r2, [r0, #0x09] 1750#else 1751 ldrh r2, [r1] /* r2 = ..10 */ 1752 ldr r3, [r1, #0x02] /* r3 = 5432 */ 1753 ldr ip, [r1, #0x06] /* ip = 9876 */ 1754 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 1755 strb r2, [r0] 1756 mov r2, r2, lsr #8 /* r2 = ...1 */ 1757 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1758 mov r3, r3, lsr #24 /* r3 = ...5 */ 1759 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 1760 mov ip, ip, lsr #24 /* ip = ...9 */ 1761 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 1762 mov r1, r1, lsr #8 /* r1 = ...B */ 1763 str r2, [r0, #0x01] 1764 str r3, [r0, #0x05] 1765 strh ip, [r0, #0x09] 1766 strb r1, [r0, #0x0b] 1767#endif 1768 bx lr 1769 LMEMCPY_C_PAD 1770 1771/* 1772 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 1773 */ 1774 ldrb r2, [r1] 1775 ldr r3, [r1, #0x01] 1776 ldr ip, [r1, #0x05] 1777 strb r2, [r0] 1778 ldrh r2, [r1, #0x09] 1779 ldrb r1, [r1, #0x0b] 1780 str r3, [r0, #0x01] 1781 str ip, [r0, #0x05] 1782 strh r2, [r0, #0x09] 1783 strb r1, [r0, #0x0b] 1784 bx lr 1785#endif /* !_STANDALONE */ 1786END(memcpy) 1787