1/*- 2 * Copyright (c) 2004 Olivier Houchard 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26/* 27 * Copyright 2003 Wasabi Systems, Inc. 28 * All rights reserved. 29 * 30 * Written by Steve C. Woodford for Wasabi Systems, Inc. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed for the NetBSD Project by 43 * Wasabi Systems, Inc. 44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 45 * or promote products derived from this software without specific prior 46 * written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58 * POSSIBILITY OF SUCH DAMAGE. 59 */ 60/* 61 * Copyright (c) 1997 The NetBSD Foundation, Inc. 62 * All rights reserved. 63 * 64 * This code is derived from software contributed to The NetBSD Foundation 65 * by Neil A. Carson and Mark Brinicombe 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 86 * POSSIBILITY OF SUCH DAMAGE. 87 */ 88 89#include <machine/asm.h> 90#include <machine/asmacros.h> 91__FBSDID("$FreeBSD$"); 92 93#include "assym.s" 94 95.L_arm_memcpy: 96 .word _C_LABEL(_arm_memcpy) 97.L_arm_bzero: 98 .word _C_LABEL(_arm_bzero) 99.L_min_memcpy_size: 100 .word _C_LABEL(_min_memcpy_size) 101.L_min_bzero_size: 102 .word _C_LABEL(_min_bzero_size) 103/* 104 * memset: Sets a block of memory to the specified value 105 * 106 * On entry: 107 * r0 - dest address 108 * r1 - byte to write 109 * r2 - number of bytes to write 110 * 111 * On exit: 112 * r0 - dest address 113 */ 114/* LINTSTUB: Func: void bzero(void *, size_t) */ 115ENTRY(bzero) 116 ldr r3, .L_arm_bzero 117 ldr r3, [r3] 118 cmp r3, #0 119 beq .Lnormal0 120 ldr r2, .L_min_bzero_size 121 ldr r2, [r2] 122 cmp r1, r2 123 blt .Lnormal0 124 stmfd sp!, {r0, r1, lr} 125 mov r2, #0 126 mov lr, pc 127 mov pc, r3 128 cmp r0, #0 129 ldmfd sp!, {r0, r1, lr} 130 RETeq 131.Lnormal0: 132 mov r3, #0x00 133 b do_memset 134 135/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 136ENTRY(memset) 137 and r3, r1, #0xff /* We deal with bytes */ 138 mov r1, r2 139do_memset: 140 cmp r1, #0x04 /* Do we have less than 4 bytes */ 141 mov ip, r0 142 blt .Lmemset_lessthanfour 143 144 /* Ok first we will word align the address */ 145 ands r2, ip, #0x03 /* Get the bottom two bits */ 146 bne .Lmemset_wordunaligned /* The address is not word aligned */ 147 148 /* We are now word aligned */ 149.Lmemset_wordaligned: 150 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 151#ifdef _ARM_ARCH_5E 152 tst ip, #0x04 /* Quad-align for armv5e */ 153#else 154 cmp r1, #0x10 155#endif 156 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 157#ifdef _ARM_ARCH_5E 158 subne r1, r1, #0x04 /* Quad-align if necessary */ 159 strne r3, [ip], #0x04 160 cmp r1, #0x10 161#endif 162 blt .Lmemset_loop4 /* If less than 16 then use words */ 163 mov r2, r3 /* Duplicate data */ 164 cmp r1, #0x80 /* If < 128 then skip the big loop */ 165 blt .Lmemset_loop32 166 167 /* Do 128 bytes at a time */ 168.Lmemset_loop128: 169 subs r1, r1, #0x80 170#ifdef _ARM_ARCH_5E 171 strged r2, [ip], #0x08 172 strged r2, [ip], #0x08 173 strged r2, [ip], #0x08 174 strged r2, [ip], #0x08 175 strged r2, [ip], #0x08 176 strged r2, [ip], #0x08 177 strged r2, [ip], #0x08 178 strged r2, [ip], #0x08 179 strged r2, [ip], #0x08 180 strged r2, [ip], #0x08 181 strged r2, [ip], #0x08 182 strged r2, [ip], #0x08 183 strged r2, [ip], #0x08 184 strged r2, [ip], #0x08 185 strged r2, [ip], #0x08 186 strged r2, [ip], #0x08 187#else 188 stmgeia ip!, {r2-r3} 189 stmgeia ip!, {r2-r3} 190 stmgeia ip!, {r2-r3} 191 stmgeia ip!, {r2-r3} 192 stmgeia ip!, {r2-r3} 193 stmgeia ip!, {r2-r3} 194 stmgeia ip!, {r2-r3} 195 stmgeia ip!, {r2-r3} 196 stmgeia ip!, {r2-r3} 197 stmgeia ip!, {r2-r3} 198 stmgeia ip!, {r2-r3} 199 stmgeia ip!, {r2-r3} 200 stmgeia ip!, {r2-r3} 201 stmgeia ip!, {r2-r3} 202 stmgeia ip!, {r2-r3} 203 stmgeia ip!, {r2-r3} 204#endif 205 bgt .Lmemset_loop128 206 RETeq /* Zero length so just exit */ 207 208 add r1, r1, #0x80 /* Adjust for extra sub */ 209 210 /* Do 32 bytes at a time */ 211.Lmemset_loop32: 212 subs r1, r1, #0x20 213#ifdef _ARM_ARCH_5E 214 strged r2, [ip], #0x08 215 strged r2, [ip], #0x08 216 strged r2, [ip], #0x08 217 strged r2, [ip], #0x08 218#else 219 stmgeia ip!, {r2-r3} 220 stmgeia ip!, {r2-r3} 221 stmgeia ip!, {r2-r3} 222 stmgeia ip!, {r2-r3} 223#endif 224 bgt .Lmemset_loop32 225 RETeq /* Zero length so just exit */ 226 227 adds r1, r1, #0x10 /* Partially adjust for extra sub */ 228 229 /* Deal with 16 bytes or more */ 230#ifdef _ARM_ARCH_5E 231 strged r2, [ip], #0x08 232 strged r2, [ip], #0x08 233#else 234 stmgeia ip!, {r2-r3} 235 stmgeia ip!, {r2-r3} 236#endif 237 RETeq /* Zero length so just exit */ 238 239 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 240 241 /* We have at least 4 bytes so copy as words */ 242.Lmemset_loop4: 243 subs r1, r1, #0x04 244 strge r3, [ip], #0x04 245 bgt .Lmemset_loop4 246 RETeq /* Zero length so just exit */ 247 248#ifdef _ARM_ARCH_5E 249 /* Compensate for 64-bit alignment check */ 250 adds r1, r1, #0x04 251 RETeq 252 cmp r1, #2 253#else 254 cmp r1, #-2 255#endif 256 257 strb r3, [ip], #0x01 /* Set 1 byte */ 258 strgeb r3, [ip], #0x01 /* Set another byte */ 259 strgtb r3, [ip] /* and a third */ 260 RET /* Exit */ 261 262.Lmemset_wordunaligned: 263 rsb r2, r2, #0x004 264 strb r3, [ip], #0x01 /* Set 1 byte */ 265 cmp r2, #0x02 266 strgeb r3, [ip], #0x01 /* Set another byte */ 267 sub r1, r1, r2 268 strgtb r3, [ip], #0x01 /* and a third */ 269 cmp r1, #0x04 /* More than 4 bytes left? */ 270 bge .Lmemset_wordaligned /* Yup */ 271 272.Lmemset_lessthanfour: 273 cmp r1, #0x00 274 RETeq /* Zero length so exit */ 275 strb r3, [ip], #0x01 /* Set 1 byte */ 276 cmp r1, #0x02 277 strgeb r3, [ip], #0x01 /* Set another byte */ 278 strgtb r3, [ip] /* and a third */ 279 RET /* Exit */ 280END(bzero) 281END(memset) 282 283ENTRY(bcmp) 284 mov ip, r0 285 cmp r2, #0x06 286 beq .Lmemcmp_6bytes 287 mov r0, #0x00 288 289 /* Are both addresses aligned the same way? */ 290 cmp r2, #0x00 291 eornes r3, ip, r1 292 RETeq /* len == 0, or same addresses! */ 293 tst r3, #0x03 294 subne r2, r2, #0x01 295 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 296 297 /* Word-align the addresses, if necessary */ 298 sub r3, r1, #0x05 299 ands r3, r3, #0x03 300 add r3, r3, r3, lsl #1 301 addne pc, pc, r3, lsl #3 302 nop 303 304 /* Compare up to 3 bytes */ 305 ldrb r0, [ip], #0x01 306 ldrb r3, [r1], #0x01 307 subs r0, r0, r3 308 RETne 309 subs r2, r2, #0x01 310 RETeq 311 312 /* Compare up to 2 bytes */ 313 ldrb r0, [ip], #0x01 314 ldrb r3, [r1], #0x01 315 subs r0, r0, r3 316 RETne 317 subs r2, r2, #0x01 318 RETeq 319 320 /* Compare 1 byte */ 321 ldrb r0, [ip], #0x01 322 ldrb r3, [r1], #0x01 323 subs r0, r0, r3 324 RETne 325 subs r2, r2, #0x01 326 RETeq 327 328 /* Compare 4 bytes at a time, if possible */ 329 subs r2, r2, #0x04 330 bcc .Lmemcmp_bytewise 331.Lmemcmp_word_aligned: 332 ldr r0, [ip], #0x04 333 ldr r3, [r1], #0x04 334 subs r2, r2, #0x04 335 cmpcs r0, r3 336 beq .Lmemcmp_word_aligned 337 sub r0, r0, r3 338 339 /* Correct for extra subtraction, and check if done */ 340 adds r2, r2, #0x04 341 cmpeq r0, #0x00 /* If done, did all bytes match? */ 342 RETeq /* Yup. Just return */ 343 344 /* Re-do the final word byte-wise */ 345 sub ip, ip, #0x04 346 sub r1, r1, #0x04 347 348.Lmemcmp_bytewise: 349 add r2, r2, #0x03 350.Lmemcmp_bytewise2: 351 ldrb r0, [ip], #0x01 352 ldrb r3, [r1], #0x01 353 subs r2, r2, #0x01 354 cmpcs r0, r3 355 beq .Lmemcmp_bytewise2 356 sub r0, r0, r3 357 RET 358 359 /* 360 * 6 byte compares are very common, thanks to the network stack. 361 * This code is hand-scheduled to reduce the number of stalls for 362 * load results. Everything else being equal, this will be ~32% 363 * faster than a byte-wise memcmp. 364 */ 365 .align 5 366.Lmemcmp_6bytes: 367 ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 368 ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 369 ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 370 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 371 ldreqb r3, [ip, #0x01] /* r3 = b1#1 */ 372 RETne /* Return if mismatch on #0 */ 373 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 374 ldreqb r3, [r1, #0x02] /* r3 = b2#2 */ 375 ldreqb r0, [ip, #0x02] /* r0 = b1#2 */ 376 RETne /* Return if mismatch on #1 */ 377 ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 378 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 379 ldreqb r3, [ip, #0x03] /* r3 = b1#3 */ 380 RETne /* Return if mismatch on #2 */ 381 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 382 ldreqb r3, [r1, #0x04] /* r3 = b2#4 */ 383 ldreqb r0, [ip, #0x04] /* r0 = b1#4 */ 384 RETne /* Return if mismatch on #3 */ 385 ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 386 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 387 ldreqb r3, [ip, #0x05] /* r3 = b1#5 */ 388 RETne /* Return if mismatch on #4 */ 389 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 390 RET 391END(bcmp) 392 393ENTRY(bcopy) 394 /* switch the source and destination registers */ 395 eor r0, r1, r0 396 eor r1, r0, r1 397 eor r0, r1, r0 398ENTRY(memmove) 399 /* Do the buffers overlap? */ 400 cmp r0, r1 401 RETeq /* Bail now if src/dst are the same */ 402 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 403 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 404 cmp r3, r2 /* if (r3 < len) we have an overlap */ 405 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 406 407 /* Determine copy direction */ 408 cmp r1, r0 409 bcc .Lmemmove_backwards 410 411 moveq r0, #0 /* Quick abort for len=0 */ 412 RETeq 413 414 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 415 subs r2, r2, #4 416 blt .Lmemmove_fl4 /* less than 4 bytes */ 417 ands r12, r0, #3 418 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 419 ands r12, r1, #3 420 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 421 422.Lmemmove_ft8: 423 /* We have aligned source and destination */ 424 subs r2, r2, #8 425 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 426 subs r2, r2, #0x14 427 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 428 stmdb sp!, {r4} /* borrow r4 */ 429 430 /* blat 32 bytes at a time */ 431 /* XXX for really big copies perhaps we should use more registers */ 432.Lmemmove_floop32: 433 ldmia r1!, {r3, r4, r12, lr} 434 stmia r0!, {r3, r4, r12, lr} 435 ldmia r1!, {r3, r4, r12, lr} 436 stmia r0!, {r3, r4, r12, lr} 437 subs r2, r2, #0x20 438 bge .Lmemmove_floop32 439 440 cmn r2, #0x10 441 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 442 stmgeia r0!, {r3, r4, r12, lr} 443 subge r2, r2, #0x10 444 ldmia sp!, {r4} /* return r4 */ 445 446.Lmemmove_fl32: 447 adds r2, r2, #0x14 448 449 /* blat 12 bytes at a time */ 450.Lmemmove_floop12: 451 ldmgeia r1!, {r3, r12, lr} 452 stmgeia r0!, {r3, r12, lr} 453 subges r2, r2, #0x0c 454 bge .Lmemmove_floop12 455 456.Lmemmove_fl12: 457 adds r2, r2, #8 458 blt .Lmemmove_fl4 459 460 subs r2, r2, #4 461 ldrlt r3, [r1], #4 462 strlt r3, [r0], #4 463 ldmgeia r1!, {r3, r12} 464 stmgeia r0!, {r3, r12} 465 subge r2, r2, #4 466 467.Lmemmove_fl4: 468 /* less than 4 bytes to go */ 469 adds r2, r2, #4 470 ldmeqia sp!, {r0, pc} /* done */ 471 472 /* copy the crud byte at a time */ 473 cmp r2, #2 474 ldrb r3, [r1], #1 475 strb r3, [r0], #1 476 ldrgeb r3, [r1], #1 477 strgeb r3, [r0], #1 478 ldrgtb r3, [r1], #1 479 strgtb r3, [r0], #1 480 ldmia sp!, {r0, pc} 481 482 /* erg - unaligned destination */ 483.Lmemmove_fdestul: 484 rsb r12, r12, #4 485 cmp r12, #2 486 487 /* align destination with byte copies */ 488 ldrb r3, [r1], #1 489 strb r3, [r0], #1 490 ldrgeb r3, [r1], #1 491 strgeb r3, [r0], #1 492 ldrgtb r3, [r1], #1 493 strgtb r3, [r0], #1 494 subs r2, r2, r12 495 blt .Lmemmove_fl4 /* less the 4 bytes */ 496 497 ands r12, r1, #3 498 beq .Lmemmove_ft8 /* we have an aligned source */ 499 500 /* erg - unaligned source */ 501 /* This is where it gets nasty ... */ 502.Lmemmove_fsrcul: 503 bic r1, r1, #3 504 ldr lr, [r1], #4 505 cmp r12, #2 506 bgt .Lmemmove_fsrcul3 507 beq .Lmemmove_fsrcul2 508 cmp r2, #0x0c 509 blt .Lmemmove_fsrcul1loop4 510 sub r2, r2, #0x0c 511 stmdb sp!, {r4, r5} 512 513.Lmemmove_fsrcul1loop16: 514#ifdef __ARMEB__ 515 mov r3, lr, lsl #8 516#else 517 mov r3, lr, lsr #8 518#endif 519 ldmia r1!, {r4, r5, r12, lr} 520#ifdef __ARMEB__ 521 orr r3, r3, r4, lsr #24 522 mov r4, r4, lsl #8 523 orr r4, r4, r5, lsr #24 524 mov r5, r5, lsl #8 525 orr r5, r5, r12, lsr #24 526 mov r12, r12, lsl #8 527 orr r12, r12, lr, lsr #24 528#else 529 orr r3, r3, r4, lsl #24 530 mov r4, r4, lsr #8 531 orr r4, r4, r5, lsl #24 532 mov r5, r5, lsr #8 533 orr r5, r5, r12, lsl #24 534 mov r12, r12, lsr #8 535 orr r12, r12, lr, lsl #24 536#endif 537 stmia r0!, {r3-r5, r12} 538 subs r2, r2, #0x10 539 bge .Lmemmove_fsrcul1loop16 540 ldmia sp!, {r4, r5} 541 adds r2, r2, #0x0c 542 blt .Lmemmove_fsrcul1l4 543 544.Lmemmove_fsrcul1loop4: 545#ifdef __ARMEB__ 546 mov r12, lr, lsl #8 547#else 548 mov r12, lr, lsr #8 549#endif 550 ldr lr, [r1], #4 551#ifdef __ARMEB__ 552 orr r12, r12, lr, lsr #24 553#else 554 orr r12, r12, lr, lsl #24 555#endif 556 str r12, [r0], #4 557 subs r2, r2, #4 558 bge .Lmemmove_fsrcul1loop4 559 560.Lmemmove_fsrcul1l4: 561 sub r1, r1, #3 562 b .Lmemmove_fl4 563 564.Lmemmove_fsrcul2: 565 cmp r2, #0x0c 566 blt .Lmemmove_fsrcul2loop4 567 sub r2, r2, #0x0c 568 stmdb sp!, {r4, r5} 569 570.Lmemmove_fsrcul2loop16: 571#ifdef __ARMEB__ 572 mov r3, lr, lsl #16 573#else 574 mov r3, lr, lsr #16 575#endif 576 ldmia r1!, {r4, r5, r12, lr} 577#ifdef __ARMEB__ 578 orr r3, r3, r4, lsr #16 579 mov r4, r4, lsl #16 580 orr r4, r4, r5, lsr #16 581 mov r5, r5, lsl #16 582 orr r5, r5, r12, lsr #16 583 mov r12, r12, lsl #16 584 orr r12, r12, lr, lsr #16 585#else 586 orr r3, r3, r4, lsl #16 587 mov r4, r4, lsr #16 588 orr r4, r4, r5, lsl #16 589 mov r5, r5, lsr #16 590 orr r5, r5, r12, lsl #16 591 mov r12, r12, lsr #16 592 orr r12, r12, lr, lsl #16 593#endif 594 stmia r0!, {r3-r5, r12} 595 subs r2, r2, #0x10 596 bge .Lmemmove_fsrcul2loop16 597 ldmia sp!, {r4, r5} 598 adds r2, r2, #0x0c 599 blt .Lmemmove_fsrcul2l4 600 601.Lmemmove_fsrcul2loop4: 602#ifdef __ARMEB__ 603 mov r12, lr, lsl #16 604#else 605 mov r12, lr, lsr #16 606#endif 607 ldr lr, [r1], #4 608#ifdef __ARMEB__ 609 orr r12, r12, lr, lsr #16 610#else 611 orr r12, r12, lr, lsl #16 612#endif 613 str r12, [r0], #4 614 subs r2, r2, #4 615 bge .Lmemmove_fsrcul2loop4 616 617.Lmemmove_fsrcul2l4: 618 sub r1, r1, #2 619 b .Lmemmove_fl4 620 621.Lmemmove_fsrcul3: 622 cmp r2, #0x0c 623 blt .Lmemmove_fsrcul3loop4 624 sub r2, r2, #0x0c 625 stmdb sp!, {r4, r5} 626 627.Lmemmove_fsrcul3loop16: 628#ifdef __ARMEB__ 629 mov r3, lr, lsl #24 630#else 631 mov r3, lr, lsr #24 632#endif 633 ldmia r1!, {r4, r5, r12, lr} 634#ifdef __ARMEB__ 635 orr r3, r3, r4, lsr #8 636 mov r4, r4, lsl #24 637 orr r4, r4, r5, lsr #8 638 mov r5, r5, lsl #24 639 orr r5, r5, r12, lsr #8 640 mov r12, r12, lsl #24 641 orr r12, r12, lr, lsr #8 642#else 643 orr r3, r3, r4, lsl #8 644 mov r4, r4, lsr #24 645 orr r4, r4, r5, lsl #8 646 mov r5, r5, lsr #24 647 orr r5, r5, r12, lsl #8 648 mov r12, r12, lsr #24 649 orr r12, r12, lr, lsl #8 650#endif 651 stmia r0!, {r3-r5, r12} 652 subs r2, r2, #0x10 653 bge .Lmemmove_fsrcul3loop16 654 ldmia sp!, {r4, r5} 655 adds r2, r2, #0x0c 656 blt .Lmemmove_fsrcul3l4 657 658.Lmemmove_fsrcul3loop4: 659#ifdef __ARMEB__ 660 mov r12, lr, lsl #24 661#else 662 mov r12, lr, lsr #24 663#endif 664 ldr lr, [r1], #4 665#ifdef __ARMEB__ 666 orr r12, r12, lr, lsr #8 667#else 668 orr r12, r12, lr, lsl #8 669#endif 670 str r12, [r0], #4 671 subs r2, r2, #4 672 bge .Lmemmove_fsrcul3loop4 673 674.Lmemmove_fsrcul3l4: 675 sub r1, r1, #1 676 b .Lmemmove_fl4 677 678.Lmemmove_backwards: 679 add r1, r1, r2 680 add r0, r0, r2 681 subs r2, r2, #4 682 blt .Lmemmove_bl4 /* less than 4 bytes */ 683 ands r12, r0, #3 684 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 685 ands r12, r1, #3 686 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 687 688.Lmemmove_bt8: 689 /* We have aligned source and destination */ 690 subs r2, r2, #8 691 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 692 stmdb sp!, {r4, lr} 693 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 694 blt .Lmemmove_bl32 695 696 /* blat 32 bytes at a time */ 697 /* XXX for really big copies perhaps we should use more registers */ 698.Lmemmove_bloop32: 699 ldmdb r1!, {r3, r4, r12, lr} 700 stmdb r0!, {r3, r4, r12, lr} 701 ldmdb r1!, {r3, r4, r12, lr} 702 stmdb r0!, {r3, r4, r12, lr} 703 subs r2, r2, #0x20 704 bge .Lmemmove_bloop32 705 706.Lmemmove_bl32: 707 cmn r2, #0x10 708 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 709 stmgedb r0!, {r3, r4, r12, lr} 710 subge r2, r2, #0x10 711 adds r2, r2, #0x14 712 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 713 stmgedb r0!, {r3, r12, lr} 714 subge r2, r2, #0x0c 715 ldmia sp!, {r4, lr} 716 717.Lmemmove_bl12: 718 adds r2, r2, #8 719 blt .Lmemmove_bl4 720 subs r2, r2, #4 721 ldrlt r3, [r1, #-4]! 722 strlt r3, [r0, #-4]! 723 ldmgedb r1!, {r3, r12} 724 stmgedb r0!, {r3, r12} 725 subge r2, r2, #4 726 727.Lmemmove_bl4: 728 /* less than 4 bytes to go */ 729 adds r2, r2, #4 730 RETeq /* done */ 731 732 /* copy the crud byte at a time */ 733 cmp r2, #2 734 ldrb r3, [r1, #-1]! 735 strb r3, [r0, #-1]! 736 ldrgeb r3, [r1, #-1]! 737 strgeb r3, [r0, #-1]! 738 ldrgtb r3, [r1, #-1]! 739 strgtb r3, [r0, #-1]! 740 RET 741 742 /* erg - unaligned destination */ 743.Lmemmove_bdestul: 744 cmp r12, #2 745 746 /* align destination with byte copies */ 747 ldrb r3, [r1, #-1]! 748 strb r3, [r0, #-1]! 749 ldrgeb r3, [r1, #-1]! 750 strgeb r3, [r0, #-1]! 751 ldrgtb r3, [r1, #-1]! 752 strgtb r3, [r0, #-1]! 753 subs r2, r2, r12 754 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 755 ands r12, r1, #3 756 beq .Lmemmove_bt8 /* we have an aligned source */ 757 758 /* erg - unaligned source */ 759 /* This is where it gets nasty ... */ 760.Lmemmove_bsrcul: 761 bic r1, r1, #3 762 ldr r3, [r1, #0] 763 cmp r12, #2 764 blt .Lmemmove_bsrcul1 765 beq .Lmemmove_bsrcul2 766 cmp r2, #0x0c 767 blt .Lmemmove_bsrcul3loop4 768 sub r2, r2, #0x0c 769 stmdb sp!, {r4, r5, lr} 770 771.Lmemmove_bsrcul3loop16: 772#ifdef __ARMEB__ 773 mov lr, r3, lsr #8 774#else 775 mov lr, r3, lsl #8 776#endif 777 ldmdb r1!, {r3-r5, r12} 778#ifdef __ARMEB__ 779 orr lr, lr, r12, lsl #24 780 mov r12, r12, lsr #8 781 orr r12, r12, r5, lsl #24 782 mov r5, r5, lsr #8 783 orr r5, r5, r4, lsl #24 784 mov r4, r4, lsr #8 785 orr r4, r4, r3, lsl #24 786#else 787 orr lr, lr, r12, lsr #24 788 mov r12, r12, lsl #8 789 orr r12, r12, r5, lsr #24 790 mov r5, r5, lsl #8 791 orr r5, r5, r4, lsr #24 792 mov r4, r4, lsl #8 793 orr r4, r4, r3, lsr #24 794#endif 795 stmdb r0!, {r4, r5, r12, lr} 796 subs r2, r2, #0x10 797 bge .Lmemmove_bsrcul3loop16 798 ldmia sp!, {r4, r5, lr} 799 adds r2, r2, #0x0c 800 blt .Lmemmove_bsrcul3l4 801 802.Lmemmove_bsrcul3loop4: 803#ifdef __ARMEB__ 804 mov r12, r3, lsr #8 805#else 806 mov r12, r3, lsl #8 807#endif 808 ldr r3, [r1, #-4]! 809#ifdef __ARMEB__ 810 orr r12, r12, r3, lsl #24 811#else 812 orr r12, r12, r3, lsr #24 813#endif 814 str r12, [r0, #-4]! 815 subs r2, r2, #4 816 bge .Lmemmove_bsrcul3loop4 817 818.Lmemmove_bsrcul3l4: 819 add r1, r1, #3 820 b .Lmemmove_bl4 821 822.Lmemmove_bsrcul2: 823 cmp r2, #0x0c 824 blt .Lmemmove_bsrcul2loop4 825 sub r2, r2, #0x0c 826 stmdb sp!, {r4, r5, lr} 827 828.Lmemmove_bsrcul2loop16: 829#ifdef __ARMEB__ 830 mov lr, r3, lsr #16 831#else 832 mov lr, r3, lsl #16 833#endif 834 ldmdb r1!, {r3-r5, r12} 835#ifdef __ARMEB__ 836 orr lr, lr, r12, lsl #16 837 mov r12, r12, lsr #16 838 orr r12, r12, r5, lsl #16 839 mov r5, r5, lsr #16 840 orr r5, r5, r4, lsl #16 841 mov r4, r4, lsr #16 842 orr r4, r4, r3, lsl #16 843#else 844 orr lr, lr, r12, lsr #16 845 mov r12, r12, lsl #16 846 orr r12, r12, r5, lsr #16 847 mov r5, r5, lsl #16 848 orr r5, r5, r4, lsr #16 849 mov r4, r4, lsl #16 850 orr r4, r4, r3, lsr #16 851#endif 852 stmdb r0!, {r4, r5, r12, lr} 853 subs r2, r2, #0x10 854 bge .Lmemmove_bsrcul2loop16 855 ldmia sp!, {r4, r5, lr} 856 adds r2, r2, #0x0c 857 blt .Lmemmove_bsrcul2l4 858 859.Lmemmove_bsrcul2loop4: 860#ifdef __ARMEB__ 861 mov r12, r3, lsr #16 862#else 863 mov r12, r3, lsl #16 864#endif 865 ldr r3, [r1, #-4]! 866#ifdef __ARMEB__ 867 orr r12, r12, r3, lsl #16 868#else 869 orr r12, r12, r3, lsr #16 870#endif 871 str r12, [r0, #-4]! 872 subs r2, r2, #4 873 bge .Lmemmove_bsrcul2loop4 874 875.Lmemmove_bsrcul2l4: 876 add r1, r1, #2 877 b .Lmemmove_bl4 878 879.Lmemmove_bsrcul1: 880 cmp r2, #0x0c 881 blt .Lmemmove_bsrcul1loop4 882 sub r2, r2, #0x0c 883 stmdb sp!, {r4, r5, lr} 884 885.Lmemmove_bsrcul1loop32: 886#ifdef __ARMEB__ 887 mov lr, r3, lsr #24 888#else 889 mov lr, r3, lsl #24 890#endif 891 ldmdb r1!, {r3-r5, r12} 892#ifdef __ARMEB__ 893 orr lr, lr, r12, lsl #8 894 mov r12, r12, lsr #24 895 orr r12, r12, r5, lsl #8 896 mov r5, r5, lsr #24 897 orr r5, r5, r4, lsl #8 898 mov r4, r4, lsr #24 899 orr r4, r4, r3, lsl #8 900#else 901 orr lr, lr, r12, lsr #8 902 mov r12, r12, lsl #24 903 orr r12, r12, r5, lsr #8 904 mov r5, r5, lsl #24 905 orr r5, r5, r4, lsr #8 906 mov r4, r4, lsl #24 907 orr r4, r4, r3, lsr #8 908#endif 909 stmdb r0!, {r4, r5, r12, lr} 910 subs r2, r2, #0x10 911 bge .Lmemmove_bsrcul1loop32 912 ldmia sp!, {r4, r5, lr} 913 adds r2, r2, #0x0c 914 blt .Lmemmove_bsrcul1l4 915 916.Lmemmove_bsrcul1loop4: 917#ifdef __ARMEB__ 918 mov r12, r3, lsr #24 919#else 920 mov r12, r3, lsl #24 921#endif 922 ldr r3, [r1, #-4]! 923#ifdef __ARMEB__ 924 orr r12, r12, r3, lsl #8 925#else 926 orr r12, r12, r3, lsr #8 927#endif 928 str r12, [r0, #-4]! 929 subs r2, r2, #4 930 bge .Lmemmove_bsrcul1loop4 931 932.Lmemmove_bsrcul1l4: 933 add r1, r1, #1 934 b .Lmemmove_bl4 935END(bcopy) 936END(memmove) 937 938#if !defined(_ARM_ARCH_5E) 939ENTRY(memcpy) 940 /* save leaf functions having to store this away */ 941 /* Do not check arm_memcpy if we're running from flash */ 942#ifdef FLASHADDR 943#if FLASHADDR > PHYSADDR 944 ldr r3, =FLASHADDR 945 cmp r3, pc 946 bls .Lnormal 947#else 948 ldr r3, =FLASHADDR 949 cmp r3, pc 950 bhi .Lnormal 951#endif 952#endif 953 ldr r3, .L_arm_memcpy 954 ldr r3, [r3] 955 cmp r3, #0 956 beq .Lnormal 957 ldr r3, .L_min_memcpy_size 958 ldr r3, [r3] 959 cmp r2, r3 960 blt .Lnormal 961 stmfd sp!, {r0-r2, r4, lr} 962 mov r3, #0 963 ldr r4, .L_arm_memcpy 964 mov lr, pc 965 ldr pc, [r4] 966 cmp r0, #0 967 ldmfd sp!, {r0-r2, r4, lr} 968 RETeq 969 970.Lnormal: 971 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 972 973 subs r2, r2, #4 974 blt .Lmemcpy_l4 /* less than 4 bytes */ 975 ands r12, r0, #3 976 bne .Lmemcpy_destul /* oh unaligned destination addr */ 977 ands r12, r1, #3 978 bne .Lmemcpy_srcul /* oh unaligned source addr */ 979 980.Lmemcpy_t8: 981 /* We have aligned source and destination */ 982 subs r2, r2, #8 983 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 984 subs r2, r2, #0x14 985 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 986 stmdb sp!, {r4} /* borrow r4 */ 987 988 /* blat 32 bytes at a time */ 989 /* XXX for really big copies perhaps we should use more registers */ 990.Lmemcpy_loop32: 991 ldmia r1!, {r3, r4, r12, lr} 992 stmia r0!, {r3, r4, r12, lr} 993 ldmia r1!, {r3, r4, r12, lr} 994 stmia r0!, {r3, r4, r12, lr} 995 subs r2, r2, #0x20 996 bge .Lmemcpy_loop32 997 998 cmn r2, #0x10 999 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 1000 stmgeia r0!, {r3, r4, r12, lr} 1001 subge r2, r2, #0x10 1002 ldmia sp!, {r4} /* return r4 */ 1003 1004.Lmemcpy_l32: 1005 adds r2, r2, #0x14 1006 1007 /* blat 12 bytes at a time */ 1008.Lmemcpy_loop12: 1009 ldmgeia r1!, {r3, r12, lr} 1010 stmgeia r0!, {r3, r12, lr} 1011 subges r2, r2, #0x0c 1012 bge .Lmemcpy_loop12 1013 1014.Lmemcpy_l12: 1015 adds r2, r2, #8 1016 blt .Lmemcpy_l4 1017 1018 subs r2, r2, #4 1019 ldrlt r3, [r1], #4 1020 strlt r3, [r0], #4 1021 ldmgeia r1!, {r3, r12} 1022 stmgeia r0!, {r3, r12} 1023 subge r2, r2, #4 1024 1025.Lmemcpy_l4: 1026 /* less than 4 bytes to go */ 1027 adds r2, r2, #4 1028#ifdef __APCS_26_ 1029 ldmeqia sp!, {r0, pc}^ /* done */ 1030#else 1031 ldmeqia sp!, {r0, pc} /* done */ 1032#endif 1033 /* copy the crud byte at a time */ 1034 cmp r2, #2 1035 ldrb r3, [r1], #1 1036 strb r3, [r0], #1 1037 ldrgeb r3, [r1], #1 1038 strgeb r3, [r0], #1 1039 ldrgtb r3, [r1], #1 1040 strgtb r3, [r0], #1 1041 ldmia sp!, {r0, pc} 1042 1043 /* erg - unaligned destination */ 1044.Lmemcpy_destul: 1045 rsb r12, r12, #4 1046 cmp r12, #2 1047 1048 /* align destination with byte copies */ 1049 ldrb r3, [r1], #1 1050 strb r3, [r0], #1 1051 ldrgeb r3, [r1], #1 1052 strgeb r3, [r0], #1 1053 ldrgtb r3, [r1], #1 1054 strgtb r3, [r0], #1 1055 subs r2, r2, r12 1056 blt .Lmemcpy_l4 /* less the 4 bytes */ 1057 1058 ands r12, r1, #3 1059 beq .Lmemcpy_t8 /* we have an aligned source */ 1060 1061 /* erg - unaligned source */ 1062 /* This is where it gets nasty ... */ 1063.Lmemcpy_srcul: 1064 bic r1, r1, #3 1065 ldr lr, [r1], #4 1066 cmp r12, #2 1067 bgt .Lmemcpy_srcul3 1068 beq .Lmemcpy_srcul2 1069 cmp r2, #0x0c 1070 blt .Lmemcpy_srcul1loop4 1071 sub r2, r2, #0x0c 1072 stmdb sp!, {r4, r5} 1073 1074.Lmemcpy_srcul1loop16: 1075 mov r3, lr, lsr #8 1076 ldmia r1!, {r4, r5, r12, lr} 1077 orr r3, r3, r4, lsl #24 1078 mov r4, r4, lsr #8 1079 orr r4, r4, r5, lsl #24 1080 mov r5, r5, lsr #8 1081 orr r5, r5, r12, lsl #24 1082 mov r12, r12, lsr #8 1083 orr r12, r12, lr, lsl #24 1084 stmia r0!, {r3-r5, r12} 1085 subs r2, r2, #0x10 1086 bge .Lmemcpy_srcul1loop16 1087 ldmia sp!, {r4, r5} 1088 adds r2, r2, #0x0c 1089 blt .Lmemcpy_srcul1l4 1090 1091.Lmemcpy_srcul1loop4: 1092 mov r12, lr, lsr #8 1093 ldr lr, [r1], #4 1094 orr r12, r12, lr, lsl #24 1095 str r12, [r0], #4 1096 subs r2, r2, #4 1097 bge .Lmemcpy_srcul1loop4 1098 1099.Lmemcpy_srcul1l4: 1100 sub r1, r1, #3 1101 b .Lmemcpy_l4 1102 1103.Lmemcpy_srcul2: 1104 cmp r2, #0x0c 1105 blt .Lmemcpy_srcul2loop4 1106 sub r2, r2, #0x0c 1107 stmdb sp!, {r4, r5} 1108 1109.Lmemcpy_srcul2loop16: 1110 mov r3, lr, lsr #16 1111 ldmia r1!, {r4, r5, r12, lr} 1112 orr r3, r3, r4, lsl #16 1113 mov r4, r4, lsr #16 1114 orr r4, r4, r5, lsl #16 1115 mov r5, r5, lsr #16 1116 orr r5, r5, r12, lsl #16 1117 mov r12, r12, lsr #16 1118 orr r12, r12, lr, lsl #16 1119 stmia r0!, {r3-r5, r12} 1120 subs r2, r2, #0x10 1121 bge .Lmemcpy_srcul2loop16 1122 ldmia sp!, {r4, r5} 1123 adds r2, r2, #0x0c 1124 blt .Lmemcpy_srcul2l4 1125 1126.Lmemcpy_srcul2loop4: 1127 mov r12, lr, lsr #16 1128 ldr lr, [r1], #4 1129 orr r12, r12, lr, lsl #16 1130 str r12, [r0], #4 1131 subs r2, r2, #4 1132 bge .Lmemcpy_srcul2loop4 1133 1134.Lmemcpy_srcul2l4: 1135 sub r1, r1, #2 1136 b .Lmemcpy_l4 1137 1138.Lmemcpy_srcul3: 1139 cmp r2, #0x0c 1140 blt .Lmemcpy_srcul3loop4 1141 sub r2, r2, #0x0c 1142 stmdb sp!, {r4, r5} 1143 1144.Lmemcpy_srcul3loop16: 1145 mov r3, lr, lsr #24 1146 ldmia r1!, {r4, r5, r12, lr} 1147 orr r3, r3, r4, lsl #8 1148 mov r4, r4, lsr #24 1149 orr r4, r4, r5, lsl #8 1150 mov r5, r5, lsr #24 1151 orr r5, r5, r12, lsl #8 1152 mov r12, r12, lsr #24 1153 orr r12, r12, lr, lsl #8 1154 stmia r0!, {r3-r5, r12} 1155 subs r2, r2, #0x10 1156 bge .Lmemcpy_srcul3loop16 1157 ldmia sp!, {r4, r5} 1158 adds r2, r2, #0x0c 1159 blt .Lmemcpy_srcul3l4 1160 1161.Lmemcpy_srcul3loop4: 1162 mov r12, lr, lsr #24 1163 ldr lr, [r1], #4 1164 orr r12, r12, lr, lsl #8 1165 str r12, [r0], #4 1166 subs r2, r2, #4 1167 bge .Lmemcpy_srcul3loop4 1168 1169.Lmemcpy_srcul3l4: 1170 sub r1, r1, #1 1171 b .Lmemcpy_l4 1172END(memcpy) 1173 1174#else 1175/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 1176ENTRY(memcpy) 1177 pld [r1] 1178 cmp r2, #0x0c 1179 ble .Lmemcpy_short /* <= 12 bytes */ 1180#ifdef FLASHADDR 1181#if FLASHADDR > PHYSADDR 1182 ldr r3, =FLASHADDR 1183 cmp r3, pc 1184 bls .Lnormal 1185#else 1186 ldr r3, =FLASHADDR 1187 cmp r3, pc 1188 bhi .Lnormal 1189#endif 1190#endif 1191 ldr r3, .L_arm_memcpy 1192 ldr r3, [r3] 1193 cmp r3, #0 1194 beq .Lnormal 1195 ldr r3, .L_min_memcpy_size 1196 ldr r3, [r3] 1197 cmp r2, r3 1198 blt .Lnormal 1199 stmfd sp!, {r0-r2, r4, lr} 1200 mov r3, #0 1201 ldr r4, .L_arm_memcpy 1202 mov lr, pc 1203 ldr pc, [r4] 1204 cmp r0, #0 1205 ldmfd sp!, {r0-r2, r4, lr} 1206 RETeq 1207.Lnormal: 1208 mov r3, r0 /* We must not clobber r0 */ 1209 1210 /* Word-align the destination buffer */ 1211 ands ip, r3, #0x03 /* Already word aligned? */ 1212 beq .Lmemcpy_wordaligned /* Yup */ 1213 cmp ip, #0x02 1214 ldrb ip, [r1], #0x01 1215 sub r2, r2, #0x01 1216 strb ip, [r3], #0x01 1217 ldrleb ip, [r1], #0x01 1218 suble r2, r2, #0x01 1219 strleb ip, [r3], #0x01 1220 ldrltb ip, [r1], #0x01 1221 sublt r2, r2, #0x01 1222 strltb ip, [r3], #0x01 1223 1224 /* Destination buffer is now word aligned */ 1225.Lmemcpy_wordaligned: 1226 ands ip, r1, #0x03 /* Is src also word-aligned? */ 1227 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 1228 1229 /* Quad-align the destination buffer */ 1230 tst r3, #0x07 /* Already quad aligned? */ 1231 ldrne ip, [r1], #0x04 1232 stmfd sp!, {r4-r9} /* Free up some registers */ 1233 subne r2, r2, #0x04 1234 strne ip, [r3], #0x04 1235 1236 /* Destination buffer quad aligned, source is at least word aligned */ 1237 subs r2, r2, #0x80 1238 blt .Lmemcpy_w_lessthan128 1239 1240 /* Copy 128 bytes at a time */ 1241.Lmemcpy_w_loop128: 1242 ldr r4, [r1], #0x04 /* LD:00-03 */ 1243 ldr r5, [r1], #0x04 /* LD:04-07 */ 1244 pld [r1, #0x18] /* Prefetch 0x20 */ 1245 ldr r6, [r1], #0x04 /* LD:08-0b */ 1246 ldr r7, [r1], #0x04 /* LD:0c-0f */ 1247 ldr r8, [r1], #0x04 /* LD:10-13 */ 1248 ldr r9, [r1], #0x04 /* LD:14-17 */ 1249 strd r4, [r3], #0x08 /* ST:00-07 */ 1250 ldr r4, [r1], #0x04 /* LD:18-1b */ 1251 ldr r5, [r1], #0x04 /* LD:1c-1f */ 1252 strd r6, [r3], #0x08 /* ST:08-0f */ 1253 ldr r6, [r1], #0x04 /* LD:20-23 */ 1254 ldr r7, [r1], #0x04 /* LD:24-27 */ 1255 pld [r1, #0x18] /* Prefetch 0x40 */ 1256 strd r8, [r3], #0x08 /* ST:10-17 */ 1257 ldr r8, [r1], #0x04 /* LD:28-2b */ 1258 ldr r9, [r1], #0x04 /* LD:2c-2f */ 1259 strd r4, [r3], #0x08 /* ST:18-1f */ 1260 ldr r4, [r1], #0x04 /* LD:30-33 */ 1261 ldr r5, [r1], #0x04 /* LD:34-37 */ 1262 strd r6, [r3], #0x08 /* ST:20-27 */ 1263 ldr r6, [r1], #0x04 /* LD:38-3b */ 1264 ldr r7, [r1], #0x04 /* LD:3c-3f */ 1265 strd r8, [r3], #0x08 /* ST:28-2f */ 1266 ldr r8, [r1], #0x04 /* LD:40-43 */ 1267 ldr r9, [r1], #0x04 /* LD:44-47 */ 1268 pld [r1, #0x18] /* Prefetch 0x60 */ 1269 strd r4, [r3], #0x08 /* ST:30-37 */ 1270 ldr r4, [r1], #0x04 /* LD:48-4b */ 1271 ldr r5, [r1], #0x04 /* LD:4c-4f */ 1272 strd r6, [r3], #0x08 /* ST:38-3f */ 1273 ldr r6, [r1], #0x04 /* LD:50-53 */ 1274 ldr r7, [r1], #0x04 /* LD:54-57 */ 1275 strd r8, [r3], #0x08 /* ST:40-47 */ 1276 ldr r8, [r1], #0x04 /* LD:58-5b */ 1277 ldr r9, [r1], #0x04 /* LD:5c-5f */ 1278 strd r4, [r3], #0x08 /* ST:48-4f */ 1279 ldr r4, [r1], #0x04 /* LD:60-63 */ 1280 ldr r5, [r1], #0x04 /* LD:64-67 */ 1281 pld [r1, #0x18] /* Prefetch 0x80 */ 1282 strd r6, [r3], #0x08 /* ST:50-57 */ 1283 ldr r6, [r1], #0x04 /* LD:68-6b */ 1284 ldr r7, [r1], #0x04 /* LD:6c-6f */ 1285 strd r8, [r3], #0x08 /* ST:58-5f */ 1286 ldr r8, [r1], #0x04 /* LD:70-73 */ 1287 ldr r9, [r1], #0x04 /* LD:74-77 */ 1288 strd r4, [r3], #0x08 /* ST:60-67 */ 1289 ldr r4, [r1], #0x04 /* LD:78-7b */ 1290 ldr r5, [r1], #0x04 /* LD:7c-7f */ 1291 strd r6, [r3], #0x08 /* ST:68-6f */ 1292 strd r8, [r3], #0x08 /* ST:70-77 */ 1293 subs r2, r2, #0x80 1294 strd r4, [r3], #0x08 /* ST:78-7f */ 1295 bge .Lmemcpy_w_loop128 1296 1297.Lmemcpy_w_lessthan128: 1298 adds r2, r2, #0x80 /* Adjust for extra sub */ 1299 ldmeqfd sp!, {r4-r9} 1300 RETeq /* Return now if done */ 1301 subs r2, r2, #0x20 1302 blt .Lmemcpy_w_lessthan32 1303 1304 /* Copy 32 bytes at a time */ 1305.Lmemcpy_w_loop32: 1306 ldr r4, [r1], #0x04 1307 ldr r5, [r1], #0x04 1308 pld [r1, #0x18] 1309 ldr r6, [r1], #0x04 1310 ldr r7, [r1], #0x04 1311 ldr r8, [r1], #0x04 1312 ldr r9, [r1], #0x04 1313 strd r4, [r3], #0x08 1314 ldr r4, [r1], #0x04 1315 ldr r5, [r1], #0x04 1316 strd r6, [r3], #0x08 1317 strd r8, [r3], #0x08 1318 subs r2, r2, #0x20 1319 strd r4, [r3], #0x08 1320 bge .Lmemcpy_w_loop32 1321 1322.Lmemcpy_w_lessthan32: 1323 adds r2, r2, #0x20 /* Adjust for extra sub */ 1324 ldmeqfd sp!, {r4-r9} 1325 RETeq /* Return now if done */ 1326 1327 and r4, r2, #0x18 1328 rsbs r4, r4, #0x18 1329 addne pc, pc, r4, lsl #1 1330 nop 1331 1332 /* At least 24 bytes remaining */ 1333 ldr r4, [r1], #0x04 1334 ldr r5, [r1], #0x04 1335 sub r2, r2, #0x08 1336 strd r4, [r3], #0x08 1337 1338 /* At least 16 bytes remaining */ 1339 ldr r4, [r1], #0x04 1340 ldr r5, [r1], #0x04 1341 sub r2, r2, #0x08 1342 strd r4, [r3], #0x08 1343 1344 /* At least 8 bytes remaining */ 1345 ldr r4, [r1], #0x04 1346 ldr r5, [r1], #0x04 1347 subs r2, r2, #0x08 1348 strd r4, [r3], #0x08 1349 1350 /* Less than 8 bytes remaining */ 1351 ldmfd sp!, {r4-r9} 1352 RETeq /* Return now if done */ 1353 subs r2, r2, #0x04 1354 ldrge ip, [r1], #0x04 1355 strge ip, [r3], #0x04 1356 RETeq /* Return now if done */ 1357 addlt r2, r2, #0x04 1358 ldrb ip, [r1], #0x01 1359 cmp r2, #0x02 1360 ldrgeb r2, [r1], #0x01 1361 strb ip, [r3], #0x01 1362 ldrgtb ip, [r1] 1363 strgeb r2, [r3], #0x01 1364 strgtb ip, [r3] 1365 RET 1366 1367 1368/* 1369 * At this point, it has not been possible to word align both buffers. 1370 * The destination buffer is word aligned, but the source buffer is not. 1371 */ 1372.Lmemcpy_bad_align: 1373 stmfd sp!, {r4-r7} 1374 bic r1, r1, #0x03 1375 cmp ip, #2 1376 ldr ip, [r1], #0x04 1377 bgt .Lmemcpy_bad3 1378 beq .Lmemcpy_bad2 1379 b .Lmemcpy_bad1 1380 1381.Lmemcpy_bad1_loop16: 1382#ifdef __ARMEB__ 1383 mov r4, ip, lsl #8 1384#else 1385 mov r4, ip, lsr #8 1386#endif 1387 ldr r5, [r1], #0x04 1388 pld [r1, #0x018] 1389 ldr r6, [r1], #0x04 1390 ldr r7, [r1], #0x04 1391 ldr ip, [r1], #0x04 1392#ifdef __ARMEB__ 1393 orr r4, r4, r5, lsr #24 1394 mov r5, r5, lsl #8 1395 orr r5, r5, r6, lsr #24 1396 mov r6, r6, lsl #8 1397 orr r6, r6, r7, lsr #24 1398 mov r7, r7, lsl #8 1399 orr r7, r7, ip, lsr #24 1400#else 1401 orr r4, r4, r5, lsl #24 1402 mov r5, r5, lsr #8 1403 orr r5, r5, r6, lsl #24 1404 mov r6, r6, lsr #8 1405 orr r6, r6, r7, lsl #24 1406 mov r7, r7, lsr #8 1407 orr r7, r7, ip, lsl #24 1408#endif 1409 str r4, [r3], #0x04 1410 str r5, [r3], #0x04 1411 str r6, [r3], #0x04 1412 str r7, [r3], #0x04 1413.Lmemcpy_bad1: 1414 subs r2, r2, #0x10 1415 bge .Lmemcpy_bad1_loop16 1416 1417 adds r2, r2, #0x10 1418 ldmeqfd sp!, {r4-r7} 1419 RETeq /* Return now if done */ 1420 subs r2, r2, #0x04 1421 sublt r1, r1, #0x03 1422 blt .Lmemcpy_bad_done 1423 1424.Lmemcpy_bad1_loop4: 1425#ifdef __ARMEB__ 1426 mov r4, ip, lsl #8 1427#else 1428 mov r4, ip, lsr #8 1429#endif 1430 ldr ip, [r1], #0x04 1431 subs r2, r2, #0x04 1432#ifdef __ARMEB__ 1433 orr r4, r4, ip, lsr #24 1434#else 1435 orr r4, r4, ip, lsl #24 1436#endif 1437 str r4, [r3], #0x04 1438 bge .Lmemcpy_bad1_loop4 1439 sub r1, r1, #0x03 1440 b .Lmemcpy_bad_done 1441 1442.Lmemcpy_bad2_loop16: 1443#ifdef __ARMEB__ 1444 mov r4, ip, lsl #16 1445#else 1446 mov r4, ip, lsr #16 1447#endif 1448 ldr r5, [r1], #0x04 1449 pld [r1, #0x018] 1450 ldr r6, [r1], #0x04 1451 ldr r7, [r1], #0x04 1452 ldr ip, [r1], #0x04 1453#ifdef __ARMEB__ 1454 orr r4, r4, r5, lsr #16 1455 mov r5, r5, lsl #16 1456 orr r5, r5, r6, lsr #16 1457 mov r6, r6, lsl #16 1458 orr r6, r6, r7, lsr #16 1459 mov r7, r7, lsl #16 1460 orr r7, r7, ip, lsr #16 1461#else 1462 orr r4, r4, r5, lsl #16 1463 mov r5, r5, lsr #16 1464 orr r5, r5, r6, lsl #16 1465 mov r6, r6, lsr #16 1466 orr r6, r6, r7, lsl #16 1467 mov r7, r7, lsr #16 1468 orr r7, r7, ip, lsl #16 1469#endif 1470 str r4, [r3], #0x04 1471 str r5, [r3], #0x04 1472 str r6, [r3], #0x04 1473 str r7, [r3], #0x04 1474.Lmemcpy_bad2: 1475 subs r2, r2, #0x10 1476 bge .Lmemcpy_bad2_loop16 1477 1478 adds r2, r2, #0x10 1479 ldmeqfd sp!, {r4-r7} 1480 RETeq /* Return now if done */ 1481 subs r2, r2, #0x04 1482 sublt r1, r1, #0x02 1483 blt .Lmemcpy_bad_done 1484 1485.Lmemcpy_bad2_loop4: 1486#ifdef __ARMEB__ 1487 mov r4, ip, lsl #16 1488#else 1489 mov r4, ip, lsr #16 1490#endif 1491 ldr ip, [r1], #0x04 1492 subs r2, r2, #0x04 1493#ifdef __ARMEB__ 1494 orr r4, r4, ip, lsr #16 1495#else 1496 orr r4, r4, ip, lsl #16 1497#endif 1498 str r4, [r3], #0x04 1499 bge .Lmemcpy_bad2_loop4 1500 sub r1, r1, #0x02 1501 b .Lmemcpy_bad_done 1502 1503.Lmemcpy_bad3_loop16: 1504#ifdef __ARMEB__ 1505 mov r4, ip, lsl #24 1506#else 1507 mov r4, ip, lsr #24 1508#endif 1509 ldr r5, [r1], #0x04 1510 pld [r1, #0x018] 1511 ldr r6, [r1], #0x04 1512 ldr r7, [r1], #0x04 1513 ldr ip, [r1], #0x04 1514#ifdef __ARMEB__ 1515 orr r4, r4, r5, lsr #8 1516 mov r5, r5, lsl #24 1517 orr r5, r5, r6, lsr #8 1518 mov r6, r6, lsl #24 1519 orr r6, r6, r7, lsr #8 1520 mov r7, r7, lsl #24 1521 orr r7, r7, ip, lsr #8 1522#else 1523 orr r4, r4, r5, lsl #8 1524 mov r5, r5, lsr #24 1525 orr r5, r5, r6, lsl #8 1526 mov r6, r6, lsr #24 1527 orr r6, r6, r7, lsl #8 1528 mov r7, r7, lsr #24 1529 orr r7, r7, ip, lsl #8 1530#endif 1531 str r4, [r3], #0x04 1532 str r5, [r3], #0x04 1533 str r6, [r3], #0x04 1534 str r7, [r3], #0x04 1535.Lmemcpy_bad3: 1536 subs r2, r2, #0x10 1537 bge .Lmemcpy_bad3_loop16 1538 1539 adds r2, r2, #0x10 1540 ldmeqfd sp!, {r4-r7} 1541 RETeq /* Return now if done */ 1542 subs r2, r2, #0x04 1543 sublt r1, r1, #0x01 1544 blt .Lmemcpy_bad_done 1545 1546.Lmemcpy_bad3_loop4: 1547#ifdef __ARMEB__ 1548 mov r4, ip, lsl #24 1549#else 1550 mov r4, ip, lsr #24 1551#endif 1552 ldr ip, [r1], #0x04 1553 subs r2, r2, #0x04 1554#ifdef __ARMEB__ 1555 orr r4, r4, ip, lsr #8 1556#else 1557 orr r4, r4, ip, lsl #8 1558#endif 1559 str r4, [r3], #0x04 1560 bge .Lmemcpy_bad3_loop4 1561 sub r1, r1, #0x01 1562 1563.Lmemcpy_bad_done: 1564 ldmfd sp!, {r4-r7} 1565 adds r2, r2, #0x04 1566 RETeq 1567 ldrb ip, [r1], #0x01 1568 cmp r2, #0x02 1569 ldrgeb r2, [r1], #0x01 1570 strb ip, [r3], #0x01 1571 ldrgtb ip, [r1] 1572 strgeb r2, [r3], #0x01 1573 strgtb ip, [r3] 1574 RET 1575 1576 1577/* 1578 * Handle short copies (less than 16 bytes), possibly misaligned. 1579 * Some of these are *very* common, thanks to the network stack, 1580 * and so are handled specially. 1581 */ 1582.Lmemcpy_short: 1583 add pc, pc, r2, lsl #2 1584 nop 1585 RET /* 0x00 */ 1586 b .Lmemcpy_bytewise /* 0x01 */ 1587 b .Lmemcpy_bytewise /* 0x02 */ 1588 b .Lmemcpy_bytewise /* 0x03 */ 1589 b .Lmemcpy_4 /* 0x04 */ 1590 b .Lmemcpy_bytewise /* 0x05 */ 1591 b .Lmemcpy_6 /* 0x06 */ 1592 b .Lmemcpy_bytewise /* 0x07 */ 1593 b .Lmemcpy_8 /* 0x08 */ 1594 b .Lmemcpy_bytewise /* 0x09 */ 1595 b .Lmemcpy_bytewise /* 0x0a */ 1596 b .Lmemcpy_bytewise /* 0x0b */ 1597 b .Lmemcpy_c /* 0x0c */ 1598.Lmemcpy_bytewise: 1599 mov r3, r0 /* We must not clobber r0 */ 1600 ldrb ip, [r1], #0x01 16011: subs r2, r2, #0x01 1602 strb ip, [r3], #0x01 1603 ldrneb ip, [r1], #0x01 1604 bne 1b 1605 RET 1606 1607/****************************************************************************** 1608 * Special case for 4 byte copies 1609 */ 1610#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1611#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1612 LMEMCPY_4_PAD 1613.Lmemcpy_4: 1614 and r2, r1, #0x03 1615 orr r2, r2, r0, lsl #2 1616 ands r2, r2, #0x0f 1617 sub r3, pc, #0x14 1618 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1619 1620/* 1621 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1622 */ 1623 ldr r2, [r1] 1624 str r2, [r0] 1625 RET 1626 LMEMCPY_4_PAD 1627 1628/* 1629 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1630 */ 1631 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1632 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1633#ifdef __ARMEB__ 1634 mov r3, r3, lsl #8 /* r3 = 012. */ 1635 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1636#else 1637 mov r3, r3, lsr #8 /* r3 = .210 */ 1638 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1639#endif 1640 str r3, [r0] 1641 RET 1642 LMEMCPY_4_PAD 1643 1644/* 1645 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1646 */ 1647#ifdef __ARMEB__ 1648 ldrh r3, [r1] 1649 ldrh r2, [r1, #0x02] 1650#else 1651 ldrh r3, [r1, #0x02] 1652 ldrh r2, [r1] 1653#endif 1654 orr r3, r2, r3, lsl #16 1655 str r3, [r0] 1656 RET 1657 LMEMCPY_4_PAD 1658 1659/* 1660 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1661 */ 1662 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1663 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1664#ifdef __ARMEB__ 1665 mov r3, r3, lsl #24 /* r3 = 0... */ 1666 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1667#else 1668 mov r3, r3, lsr #24 /* r3 = ...0 */ 1669 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1670#endif 1671 str r3, [r0] 1672 RET 1673 LMEMCPY_4_PAD 1674 1675/* 1676 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1677 */ 1678 ldr r2, [r1] 1679#ifdef __ARMEB__ 1680 strb r2, [r0, #0x03] 1681 mov r3, r2, lsr #8 1682 mov r1, r2, lsr #24 1683 strb r1, [r0] 1684#else 1685 strb r2, [r0] 1686 mov r3, r2, lsr #8 1687 mov r1, r2, lsr #24 1688 strb r1, [r0, #0x03] 1689#endif 1690 strh r3, [r0, #0x01] 1691 RET 1692 LMEMCPY_4_PAD 1693 1694/* 1695 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1696 */ 1697 ldrb r2, [r1] 1698 ldrh r3, [r1, #0x01] 1699 ldrb r1, [r1, #0x03] 1700 strb r2, [r0] 1701 strh r3, [r0, #0x01] 1702 strb r1, [r0, #0x03] 1703 RET 1704 LMEMCPY_4_PAD 1705 1706/* 1707 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1708 */ 1709 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1710 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1711#ifdef __ARMEB__ 1712 mov r1, r2, lsr #8 /* r1 = ...0 */ 1713 strb r1, [r0] 1714 mov r2, r2, lsl #8 /* r2 = .01. */ 1715 orr r2, r2, r3, lsr #8 /* r2 = .012 */ 1716#else 1717 strb r2, [r0] 1718 mov r2, r2, lsr #8 /* r2 = ...1 */ 1719 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1720 mov r3, r3, lsr #8 /* r3 = ...3 */ 1721#endif 1722 strh r2, [r0, #0x01] 1723 strb r3, [r0, #0x03] 1724 RET 1725 LMEMCPY_4_PAD 1726 1727/* 1728 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1729 */ 1730 ldrb r2, [r1] 1731 ldrh r3, [r1, #0x01] 1732 ldrb r1, [r1, #0x03] 1733 strb r2, [r0] 1734 strh r3, [r0, #0x01] 1735 strb r1, [r0, #0x03] 1736 RET 1737 LMEMCPY_4_PAD 1738 1739/* 1740 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1741 */ 1742 ldr r2, [r1] 1743#ifdef __ARMEB__ 1744 strh r2, [r0, #0x02] 1745 mov r3, r2, lsr #16 1746 strh r3, [r0] 1747#else 1748 strh r2, [r0] 1749 mov r3, r2, lsr #16 1750 strh r3, [r0, #0x02] 1751#endif 1752 RET 1753 LMEMCPY_4_PAD 1754 1755/* 1756 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1757 */ 1758 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1759 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1760 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1761 strh r1, [r0] 1762#ifdef __ARMEB__ 1763 mov r2, r2, lsl #8 /* r2 = 012. */ 1764 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1765#else 1766 mov r2, r2, lsr #24 /* r2 = ...2 */ 1767 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1768#endif 1769 strh r2, [r0, #0x02] 1770 RET 1771 LMEMCPY_4_PAD 1772 1773/* 1774 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1775 */ 1776 ldrh r2, [r1] 1777 ldrh r3, [r1, #0x02] 1778 strh r2, [r0] 1779 strh r3, [r0, #0x02] 1780 RET 1781 LMEMCPY_4_PAD 1782 1783/* 1784 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1785 */ 1786 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1787 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1788 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1789 strh r1, [r0, #0x02] 1790#ifdef __ARMEB__ 1791 mov r3, r3, lsr #24 /* r3 = ...1 */ 1792 orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 1793#else 1794 mov r3, r3, lsl #8 /* r3 = 321. */ 1795 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1796#endif 1797 strh r3, [r0] 1798 RET 1799 LMEMCPY_4_PAD 1800 1801/* 1802 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1803 */ 1804 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1805#ifdef __ARMEB__ 1806 strb r2, [r0, #0x03] 1807 mov r3, r2, lsr #8 1808 mov r1, r2, lsr #24 1809 strh r3, [r0, #0x01] 1810 strb r1, [r0] 1811#else 1812 strb r2, [r0] 1813 mov r3, r2, lsr #8 1814 mov r1, r2, lsr #24 1815 strh r3, [r0, #0x01] 1816 strb r1, [r0, #0x03] 1817#endif 1818 RET 1819 LMEMCPY_4_PAD 1820 1821/* 1822 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1823 */ 1824 ldrb r2, [r1] 1825 ldrh r3, [r1, #0x01] 1826 ldrb r1, [r1, #0x03] 1827 strb r2, [r0] 1828 strh r3, [r0, #0x01] 1829 strb r1, [r0, #0x03] 1830 RET 1831 LMEMCPY_4_PAD 1832 1833/* 1834 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1835 */ 1836#ifdef __ARMEB__ 1837 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1838 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1839 strb r3, [r0, #0x03] 1840 mov r3, r3, lsr #8 /* r3 = ...2 */ 1841 orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 1842 strh r3, [r0, #0x01] 1843 mov r2, r2, lsr #8 /* r2 = ...0 */ 1844 strb r2, [r0] 1845#else 1846 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1847 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1848 strb r2, [r0] 1849 mov r2, r2, lsr #8 /* r2 = ...1 */ 1850 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1851 strh r2, [r0, #0x01] 1852 mov r3, r3, lsr #8 /* r3 = ...3 */ 1853 strb r3, [r0, #0x03] 1854#endif 1855 RET 1856 LMEMCPY_4_PAD 1857 1858/* 1859 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1860 */ 1861 ldrb r2, [r1] 1862 ldrh r3, [r1, #0x01] 1863 ldrb r1, [r1, #0x03] 1864 strb r2, [r0] 1865 strh r3, [r0, #0x01] 1866 strb r1, [r0, #0x03] 1867 RET 1868 LMEMCPY_4_PAD 1869 1870 1871/****************************************************************************** 1872 * Special case for 6 byte copies 1873 */ 1874#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1875#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1876 LMEMCPY_6_PAD 1877.Lmemcpy_6: 1878 and r2, r1, #0x03 1879 orr r2, r2, r0, lsl #2 1880 ands r2, r2, #0x0f 1881 sub r3, pc, #0x14 1882 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1883 1884/* 1885 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1886 */ 1887 ldr r2, [r1] 1888 ldrh r3, [r1, #0x04] 1889 str r2, [r0] 1890 strh r3, [r0, #0x04] 1891 RET 1892 LMEMCPY_6_PAD 1893 1894/* 1895 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1896 */ 1897 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1898 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1899#ifdef __ARMEB__ 1900 mov r2, r2, lsl #8 /* r2 = 012. */ 1901 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1902#else 1903 mov r2, r2, lsr #8 /* r2 = .210 */ 1904 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1905#endif 1906 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1907 str r2, [r0] 1908 strh r3, [r0, #0x04] 1909 RET 1910 LMEMCPY_6_PAD 1911 1912/* 1913 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1914 */ 1915 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1916 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1917#ifdef __ARMEB__ 1918 mov r1, r3, lsr #16 /* r1 = ..23 */ 1919 orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 1920 str r1, [r0] 1921 strh r3, [r0, #0x04] 1922#else 1923 mov r1, r3, lsr #16 /* r1 = ..54 */ 1924 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1925 str r2, [r0] 1926 strh r1, [r0, #0x04] 1927#endif 1928 RET 1929 LMEMCPY_6_PAD 1930 1931/* 1932 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1933 */ 1934 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1935 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1936 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1937#ifdef __ARMEB__ 1938 mov r2, r2, lsl #24 /* r2 = 0... */ 1939 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1940 mov r3, r3, lsl #8 /* r3 = 234. */ 1941 orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 1942#else 1943 mov r2, r2, lsr #24 /* r2 = ...0 */ 1944 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1945 mov r1, r1, lsl #8 /* r1 = xx5. */ 1946 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1947#endif 1948 str r2, [r0] 1949 strh r1, [r0, #0x04] 1950 RET 1951 LMEMCPY_6_PAD 1952 1953/* 1954 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1955 */ 1956 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1957 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1958 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1959 strh r1, [r0, #0x01] 1960#ifdef __ARMEB__ 1961 mov r1, r3, lsr #24 /* r1 = ...0 */ 1962 strb r1, [r0] 1963 mov r3, r3, lsl #8 /* r3 = 123. */ 1964 orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 1965#else 1966 strb r3, [r0] 1967 mov r3, r3, lsr #24 /* r3 = ...3 */ 1968 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1969 mov r2, r2, lsr #8 /* r2 = ...5 */ 1970#endif 1971 strh r3, [r0, #0x03] 1972 strb r2, [r0, #0x05] 1973 RET 1974 LMEMCPY_6_PAD 1975 1976/* 1977 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1978 */ 1979 ldrb r2, [r1] 1980 ldrh r3, [r1, #0x01] 1981 ldrh ip, [r1, #0x03] 1982 ldrb r1, [r1, #0x05] 1983 strb r2, [r0] 1984 strh r3, [r0, #0x01] 1985 strh ip, [r0, #0x03] 1986 strb r1, [r0, #0x05] 1987 RET 1988 LMEMCPY_6_PAD 1989 1990/* 1991 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1992 */ 1993 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1994 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1995#ifdef __ARMEB__ 1996 mov r3, r2, lsr #8 /* r3 = ...0 */ 1997 strb r3, [r0] 1998 strb r1, [r0, #0x05] 1999 mov r3, r1, lsr #8 /* r3 = .234 */ 2000 strh r3, [r0, #0x03] 2001 mov r3, r2, lsl #8 /* r3 = .01. */ 2002 orr r3, r3, r1, lsr #24 /* r3 = .012 */ 2003 strh r3, [r0, #0x01] 2004#else 2005 strb r2, [r0] 2006 mov r3, r1, lsr #24 2007 strb r3, [r0, #0x05] 2008 mov r3, r1, lsr #8 /* r3 = .543 */ 2009 strh r3, [r0, #0x03] 2010 mov r3, r2, lsr #8 /* r3 = ...1 */ 2011 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 2012 strh r3, [r0, #0x01] 2013#endif 2014 RET 2015 LMEMCPY_6_PAD 2016 2017/* 2018 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2019 */ 2020 ldrb r2, [r1] 2021 ldrh r3, [r1, #0x01] 2022 ldrh ip, [r1, #0x03] 2023 ldrb r1, [r1, #0x05] 2024 strb r2, [r0] 2025 strh r3, [r0, #0x01] 2026 strh ip, [r0, #0x03] 2027 strb r1, [r0, #0x05] 2028 RET 2029 LMEMCPY_6_PAD 2030 2031/* 2032 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2033 */ 2034#ifdef __ARMEB__ 2035 ldr r2, [r1] /* r2 = 0123 */ 2036 ldrh r3, [r1, #0x04] /* r3 = ..45 */ 2037 mov r1, r2, lsr #16 /* r1 = ..01 */ 2038 orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 2039 strh r1, [r0] 2040 str r3, [r0, #0x02] 2041#else 2042 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 2043 ldr r3, [r1] /* r3 = 3210 */ 2044 mov r2, r2, lsl #16 /* r2 = 54.. */ 2045 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 2046 strh r3, [r0] 2047 str r2, [r0, #0x02] 2048#endif 2049 RET 2050 LMEMCPY_6_PAD 2051 2052/* 2053 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2054 */ 2055 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2056 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 2057 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2058#ifdef __ARMEB__ 2059 mov r2, r2, lsr #8 /* r2 = .345 */ 2060 orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 2061#else 2062 mov r2, r2, lsl #8 /* r2 = 543. */ 2063 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 2064#endif 2065 strh r1, [r0] 2066 str r2, [r0, #0x02] 2067 RET 2068 LMEMCPY_6_PAD 2069 2070/* 2071 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2072 */ 2073 ldrh r2, [r1] 2074 ldr r3, [r1, #0x02] 2075 strh r2, [r0] 2076 str r3, [r0, #0x02] 2077 RET 2078 LMEMCPY_6_PAD 2079 2080/* 2081 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2082 */ 2083 ldrb r3, [r1] /* r3 = ...0 */ 2084 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2085 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 2086#ifdef __ARMEB__ 2087 mov r3, r3, lsl #8 /* r3 = ..0. */ 2088 orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 2089 orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 2090#else 2091 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2092 mov r1, r1, lsl #24 /* r1 = 5... */ 2093 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 2094#endif 2095 strh r3, [r0] 2096 str r1, [r0, #0x02] 2097 RET 2098 LMEMCPY_6_PAD 2099 2100/* 2101 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2102 */ 2103 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2104 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 2105#ifdef __ARMEB__ 2106 mov r3, r2, lsr #24 /* r3 = ...0 */ 2107 strb r3, [r0] 2108 mov r2, r2, lsl #8 /* r2 = 123. */ 2109 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2110#else 2111 strb r2, [r0] 2112 mov r2, r2, lsr #8 /* r2 = .321 */ 2113 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 2114 mov r1, r1, lsr #8 /* r1 = ...5 */ 2115#endif 2116 str r2, [r0, #0x01] 2117 strb r1, [r0, #0x05] 2118 RET 2119 LMEMCPY_6_PAD 2120 2121/* 2122 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2123 */ 2124 ldrb r2, [r1] 2125 ldrh r3, [r1, #0x01] 2126 ldrh ip, [r1, #0x03] 2127 ldrb r1, [r1, #0x05] 2128 strb r2, [r0] 2129 strh r3, [r0, #0x01] 2130 strh ip, [r0, #0x03] 2131 strb r1, [r0, #0x05] 2132 RET 2133 LMEMCPY_6_PAD 2134 2135/* 2136 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2137 */ 2138 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2139 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 2140#ifdef __ARMEB__ 2141 mov r3, r2, lsr #8 /* r3 = ...0 */ 2142 strb r3, [r0] 2143 mov r2, r2, lsl #24 /* r2 = 1... */ 2144 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2145#else 2146 strb r2, [r0] 2147 mov r2, r2, lsr #8 /* r2 = ...1 */ 2148 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 2149 mov r1, r1, lsr #24 /* r1 = ...5 */ 2150#endif 2151 str r2, [r0, #0x01] 2152 strb r1, [r0, #0x05] 2153 RET 2154 LMEMCPY_6_PAD 2155 2156/* 2157 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2158 */ 2159 ldrb r2, [r1] 2160 ldr r3, [r1, #0x01] 2161 ldrb r1, [r1, #0x05] 2162 strb r2, [r0] 2163 str r3, [r0, #0x01] 2164 strb r1, [r0, #0x05] 2165 RET 2166 LMEMCPY_6_PAD 2167 2168 2169/****************************************************************************** 2170 * Special case for 8 byte copies 2171 */ 2172#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 2173#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 2174 LMEMCPY_8_PAD 2175.Lmemcpy_8: 2176 and r2, r1, #0x03 2177 orr r2, r2, r0, lsl #2 2178 ands r2, r2, #0x0f 2179 sub r3, pc, #0x14 2180 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 2181 2182/* 2183 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2184 */ 2185 ldr r2, [r1] 2186 ldr r3, [r1, #0x04] 2187 str r2, [r0] 2188 str r3, [r0, #0x04] 2189 RET 2190 LMEMCPY_8_PAD 2191 2192/* 2193 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2194 */ 2195 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2196 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 2197 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2198#ifdef __ARMEB__ 2199 mov r3, r3, lsl #8 /* r3 = 012. */ 2200 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 2201 orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 2202#else 2203 mov r3, r3, lsr #8 /* r3 = .210 */ 2204 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 2205 mov r1, r1, lsl #24 /* r1 = 7... */ 2206 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 2207#endif 2208 str r3, [r0] 2209 str r2, [r0, #0x04] 2210 RET 2211 LMEMCPY_8_PAD 2212 2213/* 2214 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2215 */ 2216 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2217 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2218 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2219#ifdef __ARMEB__ 2220 mov r2, r2, lsl #16 /* r2 = 01.. */ 2221 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2222 orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 2223#else 2224 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2225 mov r3, r3, lsr #16 /* r3 = ..54 */ 2226 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 2227#endif 2228 str r2, [r0] 2229 str r3, [r0, #0x04] 2230 RET 2231 LMEMCPY_8_PAD 2232 2233/* 2234 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2235 */ 2236 ldrb r3, [r1] /* r3 = ...0 */ 2237 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2238 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 2239#ifdef __ARMEB__ 2240 mov r3, r3, lsl #24 /* r3 = 0... */ 2241 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 2242 mov r2, r2, lsl #24 /* r2 = 4... */ 2243 orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 2244#else 2245 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2246 mov r2, r2, lsr #24 /* r2 = ...4 */ 2247 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 2248#endif 2249 str r3, [r0] 2250 str r2, [r0, #0x04] 2251 RET 2252 LMEMCPY_8_PAD 2253 2254/* 2255 * 0100: dst is 8-bit aligned, src is 32-bit aligned 2256 */ 2257 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 2258 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 2259#ifdef __ARMEB__ 2260 mov r1, r3, lsr #24 /* r1 = ...0 */ 2261 strb r1, [r0] 2262 mov r1, r3, lsr #8 /* r1 = .012 */ 2263 strb r2, [r0, #0x07] 2264 mov r3, r3, lsl #24 /* r3 = 3... */ 2265 orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 2266#else 2267 strb r3, [r0] 2268 mov r1, r2, lsr #24 /* r1 = ...7 */ 2269 strb r1, [r0, #0x07] 2270 mov r1, r3, lsr #8 /* r1 = .321 */ 2271 mov r3, r3, lsr #24 /* r3 = ...3 */ 2272 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 2273#endif 2274 strh r1, [r0, #0x01] 2275 str r3, [r0, #0x03] 2276 RET 2277 LMEMCPY_8_PAD 2278 2279/* 2280 * 0101: dst is 8-bit aligned, src is 8-bit aligned 2281 */ 2282 ldrb r2, [r1] 2283 ldrh r3, [r1, #0x01] 2284 ldr ip, [r1, #0x03] 2285 ldrb r1, [r1, #0x07] 2286 strb r2, [r0] 2287 strh r3, [r0, #0x01] 2288 str ip, [r0, #0x03] 2289 strb r1, [r0, #0x07] 2290 RET 2291 LMEMCPY_8_PAD 2292 2293/* 2294 * 0110: dst is 8-bit aligned, src is 16-bit aligned 2295 */ 2296 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2297 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2298 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2299#ifdef __ARMEB__ 2300 mov ip, r2, lsr #8 /* ip = ...0 */ 2301 strb ip, [r0] 2302 mov ip, r2, lsl #8 /* ip = .01. */ 2303 orr ip, ip, r3, lsr #24 /* ip = .012 */ 2304 strb r1, [r0, #0x07] 2305 mov r3, r3, lsl #8 /* r3 = 345. */ 2306 orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 2307#else 2308 strb r2, [r0] /* 0 */ 2309 mov ip, r1, lsr #8 /* ip = ...7 */ 2310 strb ip, [r0, #0x07] /* 7 */ 2311 mov ip, r2, lsr #8 /* ip = ...1 */ 2312 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2313 mov r3, r3, lsr #8 /* r3 = .543 */ 2314 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 2315#endif 2316 strh ip, [r0, #0x01] 2317 str r3, [r0, #0x03] 2318 RET 2319 LMEMCPY_8_PAD 2320 2321/* 2322 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2323 */ 2324 ldrb r3, [r1] /* r3 = ...0 */ 2325 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2326 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 2327 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2328 strb r3, [r0] 2329 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 2330#ifdef __ARMEB__ 2331 strh r3, [r0, #0x01] 2332 orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 2333#else 2334 strh ip, [r0, #0x01] 2335 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 2336#endif 2337 str r2, [r0, #0x03] 2338 strb r1, [r0, #0x07] 2339 RET 2340 LMEMCPY_8_PAD 2341 2342/* 2343 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2344 */ 2345 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2346 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2347 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2348#ifdef __ARMEB__ 2349 strh r1, [r0] 2350 mov r1, r3, lsr #16 /* r1 = ..45 */ 2351 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 2352#else 2353 strh r2, [r0] 2354 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 2355 mov r3, r3, lsr #16 /* r3 = ..76 */ 2356#endif 2357 str r2, [r0, #0x02] 2358 strh r3, [r0, #0x06] 2359 RET 2360 LMEMCPY_8_PAD 2361 2362/* 2363 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2364 */ 2365 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2366 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2367 ldrb ip, [r1, #0x07] /* ip = ...7 */ 2368 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2369 strh r1, [r0] 2370#ifdef __ARMEB__ 2371 mov r1, r2, lsl #24 /* r1 = 2... */ 2372 orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 2373 orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 2374#else 2375 mov r1, r2, lsr #24 /* r1 = ...2 */ 2376 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 2377 mov r3, r3, lsr #24 /* r3 = ...6 */ 2378 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 2379#endif 2380 str r1, [r0, #0x02] 2381 strh r3, [r0, #0x06] 2382 RET 2383 LMEMCPY_8_PAD 2384 2385/* 2386 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2387 */ 2388 ldrh r2, [r1] 2389 ldr ip, [r1, #0x02] 2390 ldrh r3, [r1, #0x06] 2391 strh r2, [r0] 2392 str ip, [r0, #0x02] 2393 strh r3, [r0, #0x06] 2394 RET 2395 LMEMCPY_8_PAD 2396 2397/* 2398 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2399 */ 2400 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 2401 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2402 ldrb ip, [r1] /* ip = ...0 */ 2403 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 2404 strh r1, [r0, #0x06] 2405#ifdef __ARMEB__ 2406 mov r3, r3, lsr #24 /* r3 = ...5 */ 2407 orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 2408 mov r2, r2, lsr #24 /* r2 = ...1 */ 2409 orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 2410#else 2411 mov r3, r3, lsl #24 /* r3 = 5... */ 2412 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 2413 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 2414#endif 2415 str r3, [r0, #0x02] 2416 strh r2, [r0] 2417 RET 2418 LMEMCPY_8_PAD 2419 2420/* 2421 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2422 */ 2423 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2424 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2425 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 2426 strh r1, [r0, #0x05] 2427#ifdef __ARMEB__ 2428 strb r3, [r0, #0x07] 2429 mov r1, r2, lsr #24 /* r1 = ...0 */ 2430 strb r1, [r0] 2431 mov r2, r2, lsl #8 /* r2 = 123. */ 2432 orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 2433 str r2, [r0, #0x01] 2434#else 2435 strb r2, [r0] 2436 mov r1, r3, lsr #24 /* r1 = ...7 */ 2437 strb r1, [r0, #0x07] 2438 mov r2, r2, lsr #8 /* r2 = .321 */ 2439 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 2440 str r2, [r0, #0x01] 2441#endif 2442 RET 2443 LMEMCPY_8_PAD 2444 2445/* 2446 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2447 */ 2448 ldrb r3, [r1] /* r3 = ...0 */ 2449 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 2450 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2451 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2452 strb r3, [r0] 2453 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 2454#ifdef __ARMEB__ 2455 strh ip, [r0, #0x05] 2456 orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 2457#else 2458 strh r3, [r0, #0x05] 2459 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 2460#endif 2461 str r2, [r0, #0x01] 2462 strb r1, [r0, #0x07] 2463 RET 2464 LMEMCPY_8_PAD 2465 2466/* 2467 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2468 */ 2469 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2470 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2471 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2472#ifdef __ARMEB__ 2473 mov ip, r2, lsr #8 /* ip = ...0 */ 2474 strb ip, [r0] 2475 mov ip, r2, lsl #24 /* ip = 1... */ 2476 orr ip, ip, r3, lsr #8 /* ip = 1234 */ 2477 strb r1, [r0, #0x07] 2478 mov r1, r1, lsr #8 /* r1 = ...6 */ 2479 orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 2480#else 2481 strb r2, [r0] 2482 mov ip, r2, lsr #8 /* ip = ...1 */ 2483 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2484 mov r2, r1, lsr #8 /* r2 = ...7 */ 2485 strb r2, [r0, #0x07] 2486 mov r1, r1, lsl #8 /* r1 = .76. */ 2487 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 2488#endif 2489 str ip, [r0, #0x01] 2490 strh r1, [r0, #0x05] 2491 RET 2492 LMEMCPY_8_PAD 2493 2494/* 2495 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2496 */ 2497 ldrb r2, [r1] 2498 ldr ip, [r1, #0x01] 2499 ldrh r3, [r1, #0x05] 2500 ldrb r1, [r1, #0x07] 2501 strb r2, [r0] 2502 str ip, [r0, #0x01] 2503 strh r3, [r0, #0x05] 2504 strb r1, [r0, #0x07] 2505 RET 2506 LMEMCPY_8_PAD 2507 2508/****************************************************************************** 2509 * Special case for 12 byte copies 2510 */ 2511#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 2512#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 2513 LMEMCPY_C_PAD 2514.Lmemcpy_c: 2515 and r2, r1, #0x03 2516 orr r2, r2, r0, lsl #2 2517 ands r2, r2, #0x0f 2518 sub r3, pc, #0x14 2519 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 2520 2521/* 2522 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2523 */ 2524 ldr r2, [r1] 2525 ldr r3, [r1, #0x04] 2526 ldr r1, [r1, #0x08] 2527 str r2, [r0] 2528 str r3, [r0, #0x04] 2529 str r1, [r0, #0x08] 2530 RET 2531 LMEMCPY_C_PAD 2532 2533/* 2534 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2535 */ 2536 ldrb r2, [r1, #0xb] /* r2 = ...B */ 2537 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2538 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2539 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2540#ifdef __ARMEB__ 2541 orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 2542 str r2, [r0, #0x08] 2543 mov r2, ip, lsr #24 /* r2 = ...7 */ 2544 orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 2545 mov r1, r1, lsl #8 /* r1 = 012. */ 2546 orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 2547#else 2548 mov r2, r2, lsl #24 /* r2 = B... */ 2549 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 2550 str r2, [r0, #0x08] 2551 mov r2, ip, lsl #24 /* r2 = 7... */ 2552 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 2553 mov r1, r1, lsr #8 /* r1 = .210 */ 2554 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 2555#endif 2556 str r2, [r0, #0x04] 2557 str r1, [r0] 2558 RET 2559 LMEMCPY_C_PAD 2560 2561/* 2562 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2563 */ 2564 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2565 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2566 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2567 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2568#ifdef __ARMEB__ 2569 mov r2, r2, lsl #16 /* r2 = 01.. */ 2570 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2571 str r2, [r0] 2572 mov r3, r3, lsl #16 /* r3 = 45.. */ 2573 orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 2574 orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 2575#else 2576 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2577 str r2, [r0] 2578 mov r3, r3, lsr #16 /* r3 = ..54 */ 2579 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 2580 mov r1, r1, lsl #16 /* r1 = BA.. */ 2581 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 2582#endif 2583 str r3, [r0, #0x04] 2584 str r1, [r0, #0x08] 2585 RET 2586 LMEMCPY_C_PAD 2587 2588/* 2589 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2590 */ 2591 ldrb r2, [r1] /* r2 = ...0 */ 2592 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2593 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2594 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2595#ifdef __ARMEB__ 2596 mov r2, r2, lsl #24 /* r2 = 0... */ 2597 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 2598 str r2, [r0] 2599 mov r3, r3, lsl #24 /* r3 = 4... */ 2600 orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 2601 mov r1, r1, lsr #8 /* r1 = .9AB */ 2602 orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 2603#else 2604 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 2605 str r2, [r0] 2606 mov r3, r3, lsr #24 /* r3 = ...4 */ 2607 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 2608 mov r1, r1, lsl #8 /* r1 = BA9. */ 2609 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 2610#endif 2611 str r3, [r0, #0x04] 2612 str r1, [r0, #0x08] 2613 RET 2614 LMEMCPY_C_PAD 2615 2616/* 2617 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 2618 */ 2619 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2620 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2621 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 2622 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 2623 strh r1, [r0, #0x01] 2624#ifdef __ARMEB__ 2625 mov r1, r2, lsr #24 /* r1 = ...0 */ 2626 strb r1, [r0] 2627 mov r1, r2, lsl #24 /* r1 = 3... */ 2628 orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 2629 mov r1, r3, lsl #24 /* r1 = 7... */ 2630 orr r1, r1, ip, lsr #8 /* r1 = 789A */ 2631#else 2632 strb r2, [r0] 2633 mov r1, r2, lsr #24 /* r1 = ...3 */ 2634 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 2635 mov r1, r3, lsr #24 /* r1 = ...7 */ 2636 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 2637 mov ip, ip, lsr #24 /* ip = ...B */ 2638#endif 2639 str r2, [r0, #0x03] 2640 str r1, [r0, #0x07] 2641 strb ip, [r0, #0x0b] 2642 RET 2643 LMEMCPY_C_PAD 2644 2645/* 2646 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 2647 */ 2648 ldrb r2, [r1] 2649 ldrh r3, [r1, #0x01] 2650 ldr ip, [r1, #0x03] 2651 strb r2, [r0] 2652 ldr r2, [r1, #0x07] 2653 ldrb r1, [r1, #0x0b] 2654 strh r3, [r0, #0x01] 2655 str ip, [r0, #0x03] 2656 str r2, [r0, #0x07] 2657 strb r1, [r0, #0x0b] 2658 RET 2659 LMEMCPY_C_PAD 2660 2661/* 2662 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 2663 */ 2664 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2665 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2666 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2667 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2668#ifdef __ARMEB__ 2669 mov r2, r2, ror #8 /* r2 = 1..0 */ 2670 strb r2, [r0] 2671 mov r2, r2, lsr #16 /* r2 = ..1. */ 2672 orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 2673 strh r2, [r0, #0x01] 2674 mov r2, r3, lsl #8 /* r2 = 345. */ 2675 orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 2676 mov r2, ip, lsl #8 /* r2 = 789. */ 2677 orr r2, r2, r1, lsr #8 /* r2 = 789A */ 2678#else 2679 strb r2, [r0] 2680 mov r2, r2, lsr #8 /* r2 = ...1 */ 2681 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2682 strh r2, [r0, #0x01] 2683 mov r2, r3, lsr #8 /* r2 = .543 */ 2684 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 2685 mov r2, ip, lsr #8 /* r2 = .987 */ 2686 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 2687 mov r1, r1, lsr #8 /* r1 = ...B */ 2688#endif 2689 str r3, [r0, #0x03] 2690 str r2, [r0, #0x07] 2691 strb r1, [r0, #0x0b] 2692 RET 2693 LMEMCPY_C_PAD 2694 2695/* 2696 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 2697 */ 2698 ldrb r2, [r1] 2699 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2700 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2701 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2702 strb r2, [r0] 2703#ifdef __ARMEB__ 2704 mov r2, r3, lsr #16 /* r2 = ..12 */ 2705 strh r2, [r0, #0x01] 2706 mov r3, r3, lsl #16 /* r3 = 34.. */ 2707 orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 2708 mov ip, ip, lsl #16 /* ip = 78.. */ 2709 orr ip, ip, r1, lsr #16 /* ip = 789A */ 2710 mov r1, r1, lsr #8 /* r1 = .9AB */ 2711#else 2712 strh r3, [r0, #0x01] 2713 mov r3, r3, lsr #16 /* r3 = ..43 */ 2714 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 2715 mov ip, ip, lsr #16 /* ip = ..87 */ 2716 orr ip, ip, r1, lsl #16 /* ip = A987 */ 2717 mov r1, r1, lsr #16 /* r1 = ..xB */ 2718#endif 2719 str r3, [r0, #0x03] 2720 str ip, [r0, #0x07] 2721 strb r1, [r0, #0x0b] 2722 RET 2723 LMEMCPY_C_PAD 2724 2725/* 2726 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2727 */ 2728 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 2729 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2730 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 2731 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2732#ifdef __ARMEB__ 2733 strh r1, [r0] 2734 mov r1, ip, lsl #16 /* r1 = 23.. */ 2735 orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 2736 mov r3, r3, lsl #16 /* r3 = 67.. */ 2737 orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 2738#else 2739 strh ip, [r0] 2740 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 2741 mov r3, r3, lsr #16 /* r3 = ..76 */ 2742 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 2743 mov r2, r2, lsr #16 /* r2 = ..BA */ 2744#endif 2745 str r1, [r0, #0x02] 2746 str r3, [r0, #0x06] 2747 strh r2, [r0, #0x0a] 2748 RET 2749 LMEMCPY_C_PAD 2750 2751/* 2752 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 2753 */ 2754 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2755 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2756 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 2757 strh ip, [r0] 2758 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2759 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 2760#ifdef __ARMEB__ 2761 mov r2, r2, lsl #24 /* r2 = 2... */ 2762 orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 2763 mov r3, r3, lsl #24 /* r3 = 6... */ 2764 orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 2765 orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 2766#else 2767 mov r2, r2, lsr #24 /* r2 = ...2 */ 2768 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 2769 mov r3, r3, lsr #24 /* r3 = ...6 */ 2770 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 2771 mov r1, r1, lsl #8 /* r1 = ..B. */ 2772 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 2773#endif 2774 str r2, [r0, #0x02] 2775 str r3, [r0, #0x06] 2776 strh r1, [r0, #0x0a] 2777 RET 2778 LMEMCPY_C_PAD 2779 2780/* 2781 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2782 */ 2783 ldrh r2, [r1] 2784 ldr r3, [r1, #0x02] 2785 ldr ip, [r1, #0x06] 2786 ldrh r1, [r1, #0x0a] 2787 strh r2, [r0] 2788 str r3, [r0, #0x02] 2789 str ip, [r0, #0x06] 2790 strh r1, [r0, #0x0a] 2791 RET 2792 LMEMCPY_C_PAD 2793 2794/* 2795 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 2796 */ 2797 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 2798 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 2799 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 2800 strh ip, [r0, #0x0a] 2801 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2802 ldrb r1, [r1] /* r1 = ...0 */ 2803#ifdef __ARMEB__ 2804 mov r2, r2, lsr #24 /* r2 = ...9 */ 2805 orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 2806 mov r3, r3, lsr #24 /* r3 = ...5 */ 2807 orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 2808 mov r1, r1, lsl #8 /* r1 = ..0. */ 2809 orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 2810#else 2811 mov r2, r2, lsl #24 /* r2 = 9... */ 2812 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 2813 mov r3, r3, lsl #24 /* r3 = 5... */ 2814 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 2815 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 2816#endif 2817 str r2, [r0, #0x06] 2818 str r3, [r0, #0x02] 2819 strh r1, [r0] 2820 RET 2821 LMEMCPY_C_PAD 2822 2823/* 2824 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 2825 */ 2826 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2827 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 2828 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 2829#ifdef __ARMEB__ 2830 mov r3, r2, lsr #24 /* r3 = ...0 */ 2831 strb r3, [r0] 2832 mov r2, r2, lsl #8 /* r2 = 123. */ 2833 orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 2834 str r2, [r0, #0x01] 2835 mov r2, ip, lsl #8 /* r2 = 567. */ 2836 orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 2837 str r2, [r0, #0x05] 2838 mov r2, r1, lsr #8 /* r2 = ..9A */ 2839 strh r2, [r0, #0x09] 2840 strb r1, [r0, #0x0b] 2841#else 2842 strb r2, [r0] 2843 mov r3, r2, lsr #8 /* r3 = .321 */ 2844 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 2845 str r3, [r0, #0x01] 2846 mov r3, ip, lsr #8 /* r3 = .765 */ 2847 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 2848 str r3, [r0, #0x05] 2849 mov r1, r1, lsr #8 /* r1 = .BA9 */ 2850 strh r1, [r0, #0x09] 2851 mov r1, r1, lsr #16 /* r1 = ...B */ 2852 strb r1, [r0, #0x0b] 2853#endif 2854 RET 2855 LMEMCPY_C_PAD 2856 2857/* 2858 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2859 */ 2860 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2861 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2862 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2863 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2864 strb r2, [r0, #0x0b] 2865#ifdef __ARMEB__ 2866 strh r3, [r0, #0x09] 2867 mov r3, r3, lsr #16 /* r3 = ..78 */ 2868 orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 2869 mov ip, ip, lsr #16 /* ip = ..34 */ 2870 orr ip, ip, r1, lsl #16 /* ip = 1234 */ 2871 mov r1, r1, lsr #16 /* r1 = ..x0 */ 2872#else 2873 mov r2, r3, lsr #16 /* r2 = ..A9 */ 2874 strh r2, [r0, #0x09] 2875 mov r3, r3, lsl #16 /* r3 = 87.. */ 2876 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2877 mov ip, ip, lsl #16 /* ip = 43.. */ 2878 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2879 mov r1, r1, lsr #8 /* r1 = .210 */ 2880#endif 2881 str r3, [r0, #0x05] 2882 str ip, [r0, #0x01] 2883 strb r1, [r0] 2884 RET 2885 LMEMCPY_C_PAD 2886 2887/* 2888 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2889 */ 2890#ifdef __ARMEB__ 2891 ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 2892 ldr ip, [r1, #0x06] /* ip = 6789 */ 2893 ldr r3, [r1, #0x02] /* r3 = 2345 */ 2894 ldrh r1, [r1] /* r1 = ..01 */ 2895 strb r2, [r0, #0x0b] 2896 mov r2, r2, lsr #8 /* r2 = ...A */ 2897 orr r2, r2, ip, lsl #8 /* r2 = 789A */ 2898 mov ip, ip, lsr #8 /* ip = .678 */ 2899 orr ip, ip, r3, lsl #24 /* ip = 5678 */ 2900 mov r3, r3, lsr #8 /* r3 = .234 */ 2901 orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 2902 mov r1, r1, lsr #8 /* r1 = ...0 */ 2903 strb r1, [r0] 2904 str r3, [r0, #0x01] 2905 str ip, [r0, #0x05] 2906 strh r2, [r0, #0x09] 2907#else 2908 ldrh r2, [r1] /* r2 = ..10 */ 2909 ldr r3, [r1, #0x02] /* r3 = 5432 */ 2910 ldr ip, [r1, #0x06] /* ip = 9876 */ 2911 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2912 strb r2, [r0] 2913 mov r2, r2, lsr #8 /* r2 = ...1 */ 2914 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2915 mov r3, r3, lsr #24 /* r3 = ...5 */ 2916 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2917 mov ip, ip, lsr #24 /* ip = ...9 */ 2918 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2919 mov r1, r1, lsr #8 /* r1 = ...B */ 2920 str r2, [r0, #0x01] 2921 str r3, [r0, #0x05] 2922 strh ip, [r0, #0x09] 2923 strb r1, [r0, #0x0b] 2924#endif 2925 RET 2926 LMEMCPY_C_PAD 2927 2928/* 2929 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2930 */ 2931 ldrb r2, [r1] 2932 ldr r3, [r1, #0x01] 2933 ldr ip, [r1, #0x05] 2934 strb r2, [r0] 2935 ldrh r2, [r1, #0x09] 2936 ldrb r1, [r1, #0x0b] 2937 str r3, [r0, #0x01] 2938 str ip, [r0, #0x05] 2939 strh r2, [r0, #0x09] 2940 strb r1, [r0, #0x0b] 2941 RET 2942END(memcpy) 2943#endif /* _ARM_ARCH_5E */ 2944 2945#ifdef GPROF 2946 2947ENTRY(user) 2948 nop 2949ENTRY(btrap) 2950 nop 2951ENTRY(etrap) 2952 nop 2953ENTRY(bintr) 2954 nop 2955ENTRY(eintr) 2956 nop 2957 2958#endif 2959