1/*- 2 * Copyright (c) 2004 Olivier Houchard 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26/* 27 * Copyright 2003 Wasabi Systems, Inc. 28 * All rights reserved. 29 * 30 * Written by Steve C. Woodford for Wasabi Systems, Inc. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed for the NetBSD Project by 43 * Wasabi Systems, Inc. 44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 45 * or promote products derived from this software without specific prior 46 * written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58 * POSSIBILITY OF SUCH DAMAGE. 59 */ 60/* 61 * Copyright (c) 1997 The NetBSD Foundation, Inc. 62 * All rights reserved. 63 * 64 * This code is derived from software contributed to The NetBSD Foundation 65 * by Neil A. Carson and Mark Brinicombe 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 86 * POSSIBILITY OF SUCH DAMAGE. 87 */ 88 89#include <machine/asm.h> 90__FBSDID("$FreeBSD$"); 91 92#include "assym.s" 93 94.L_arm_memcpy: 95 .word _C_LABEL(_arm_memcpy) 96.L_arm_bzero: 97 .word _C_LABEL(_arm_bzero) 98.L_min_memcpy_size: 99 .word _C_LABEL(_min_memcpy_size) 100.L_min_bzero_size: 101 .word _C_LABEL(_min_bzero_size) 102/* 103 * memset: Sets a block of memory to the specified value 104 * 105 * On entry: 106 * r0 - dest address 107 * r1 - byte to write 108 * r2 - number of bytes to write 109 * 110 * On exit: 111 * r0 - dest address 112 */ 113/* LINTSTUB: Func: void bzero(void *, size_t) */ 114ENTRY(bzero) 115 ldr r3, .L_arm_bzero 116 ldr r3, [r3] 117 cmp r3, #0 118 beq .Lnormal0 119 ldr r2, .L_min_bzero_size 120 ldr r2, [r2] 121 cmp r1, r2 122 blt .Lnormal0 123 stmfd sp!, {r0, r1, lr} 124 mov r2, #0 125 mov lr, pc 126 mov pc, r3 127 cmp r0, #0 128 ldmfd sp!, {r0, r1, lr} 129 RETeq 130.Lnormal0: 131 mov r3, #0x00 132 b do_memset 133EEND(bzero) 134/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 135ENTRY(memset) 136 and r3, r1, #0xff /* We deal with bytes */ 137 mov r1, r2 138do_memset: 139 cmp r1, #0x04 /* Do we have less than 4 bytes */ 140 mov ip, r0 141 blt .Lmemset_lessthanfour 142 143 /* Ok first we will word align the address */ 144 ands r2, ip, #0x03 /* Get the bottom two bits */ 145 bne .Lmemset_wordunaligned /* The address is not word aligned */ 146 147 /* We are now word aligned */ 148.Lmemset_wordaligned: 149 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 150#ifdef _ARM_ARCH_5E 151 tst ip, #0x04 /* Quad-align for armv5e */ 152#else 153 cmp r1, #0x10 154#endif 155 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 156#ifdef _ARM_ARCH_5E 157 subne r1, r1, #0x04 /* Quad-align if necessary */ 158 strne r3, [ip], #0x04 159 cmp r1, #0x10 160#endif 161 blt .Lmemset_loop4 /* If less than 16 then use words */ 162 mov r2, r3 /* Duplicate data */ 163 cmp r1, #0x80 /* If < 128 then skip the big loop */ 164 blt .Lmemset_loop32 165 166 /* Do 128 bytes at a time */ 167.Lmemset_loop128: 168 subs r1, r1, #0x80 169#ifdef _ARM_ARCH_5E 170 strged r2, [ip], #0x08 171 strged r2, [ip], #0x08 172 strged r2, [ip], #0x08 173 strged r2, [ip], #0x08 174 strged r2, [ip], #0x08 175 strged r2, [ip], #0x08 176 strged r2, [ip], #0x08 177 strged r2, [ip], #0x08 178 strged r2, [ip], #0x08 179 strged r2, [ip], #0x08 180 strged r2, [ip], #0x08 181 strged r2, [ip], #0x08 182 strged r2, [ip], #0x08 183 strged r2, [ip], #0x08 184 strged r2, [ip], #0x08 185 strged r2, [ip], #0x08 186#else 187 stmgeia ip!, {r2-r3} 188 stmgeia ip!, {r2-r3} 189 stmgeia ip!, {r2-r3} 190 stmgeia ip!, {r2-r3} 191 stmgeia ip!, {r2-r3} 192 stmgeia ip!, {r2-r3} 193 stmgeia ip!, {r2-r3} 194 stmgeia ip!, {r2-r3} 195 stmgeia ip!, {r2-r3} 196 stmgeia ip!, {r2-r3} 197 stmgeia ip!, {r2-r3} 198 stmgeia ip!, {r2-r3} 199 stmgeia ip!, {r2-r3} 200 stmgeia ip!, {r2-r3} 201 stmgeia ip!, {r2-r3} 202 stmgeia ip!, {r2-r3} 203#endif 204 bgt .Lmemset_loop128 205 RETeq /* Zero length so just exit */ 206 207 add r1, r1, #0x80 /* Adjust for extra sub */ 208 209 /* Do 32 bytes at a time */ 210.Lmemset_loop32: 211 subs r1, r1, #0x20 212#ifdef _ARM_ARCH_5E 213 strged r2, [ip], #0x08 214 strged r2, [ip], #0x08 215 strged r2, [ip], #0x08 216 strged r2, [ip], #0x08 217#else 218 stmgeia ip!, {r2-r3} 219 stmgeia ip!, {r2-r3} 220 stmgeia ip!, {r2-r3} 221 stmgeia ip!, {r2-r3} 222#endif 223 bgt .Lmemset_loop32 224 RETeq /* Zero length so just exit */ 225 226 adds r1, r1, #0x10 /* Partially adjust for extra sub */ 227 228 /* Deal with 16 bytes or more */ 229#ifdef _ARM_ARCH_5E 230 strged r2, [ip], #0x08 231 strged r2, [ip], #0x08 232#else 233 stmgeia ip!, {r2-r3} 234 stmgeia ip!, {r2-r3} 235#endif 236 RETeq /* Zero length so just exit */ 237 238 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 239 240 /* We have at least 4 bytes so copy as words */ 241.Lmemset_loop4: 242 subs r1, r1, #0x04 243 strge r3, [ip], #0x04 244 bgt .Lmemset_loop4 245 RETeq /* Zero length so just exit */ 246 247#ifdef _ARM_ARCH_5E 248 /* Compensate for 64-bit alignment check */ 249 adds r1, r1, #0x04 250 RETeq 251 cmp r1, #2 252#else 253 cmp r1, #-2 254#endif 255 256 strb r3, [ip], #0x01 /* Set 1 byte */ 257 strgeb r3, [ip], #0x01 /* Set another byte */ 258 strgtb r3, [ip] /* and a third */ 259 RET /* Exit */ 260 261.Lmemset_wordunaligned: 262 rsb r2, r2, #0x004 263 strb r3, [ip], #0x01 /* Set 1 byte */ 264 cmp r2, #0x02 265 strgeb r3, [ip], #0x01 /* Set another byte */ 266 sub r1, r1, r2 267 strgtb r3, [ip], #0x01 /* and a third */ 268 cmp r1, #0x04 /* More than 4 bytes left? */ 269 bge .Lmemset_wordaligned /* Yup */ 270 271.Lmemset_lessthanfour: 272 cmp r1, #0x00 273 RETeq /* Zero length so exit */ 274 strb r3, [ip], #0x01 /* Set 1 byte */ 275 cmp r1, #0x02 276 strgeb r3, [ip], #0x01 /* Set another byte */ 277 strgtb r3, [ip] /* and a third */ 278 RET /* Exit */ 279END(memset) 280 281ENTRY(bcmp) 282 mov ip, r0 283 cmp r2, #0x06 284 beq .Lmemcmp_6bytes 285 mov r0, #0x00 286 287 /* Are both addresses aligned the same way? */ 288 cmp r2, #0x00 289 eornes r3, ip, r1 290 RETeq /* len == 0, or same addresses! */ 291 tst r3, #0x03 292 subne r2, r2, #0x01 293 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 294 295 /* Word-align the addresses, if necessary */ 296 sub r3, r1, #0x05 297 ands r3, r3, #0x03 298 add r3, r3, r3, lsl #1 299 addne pc, pc, r3, lsl #3 300 nop 301 302 /* Compare up to 3 bytes */ 303 ldrb r0, [ip], #0x01 304 ldrb r3, [r1], #0x01 305 subs r0, r0, r3 306 RETne 307 subs r2, r2, #0x01 308 RETeq 309 310 /* Compare up to 2 bytes */ 311 ldrb r0, [ip], #0x01 312 ldrb r3, [r1], #0x01 313 subs r0, r0, r3 314 RETne 315 subs r2, r2, #0x01 316 RETeq 317 318 /* Compare 1 byte */ 319 ldrb r0, [ip], #0x01 320 ldrb r3, [r1], #0x01 321 subs r0, r0, r3 322 RETne 323 subs r2, r2, #0x01 324 RETeq 325 326 /* Compare 4 bytes at a time, if possible */ 327 subs r2, r2, #0x04 328 bcc .Lmemcmp_bytewise 329.Lmemcmp_word_aligned: 330 ldr r0, [ip], #0x04 331 ldr r3, [r1], #0x04 332 subs r2, r2, #0x04 333 cmpcs r0, r3 334 beq .Lmemcmp_word_aligned 335 sub r0, r0, r3 336 337 /* Correct for extra subtraction, and check if done */ 338 adds r2, r2, #0x04 339 cmpeq r0, #0x00 /* If done, did all bytes match? */ 340 RETeq /* Yup. Just return */ 341 342 /* Re-do the final word byte-wise */ 343 sub ip, ip, #0x04 344 sub r1, r1, #0x04 345 346.Lmemcmp_bytewise: 347 add r2, r2, #0x03 348.Lmemcmp_bytewise2: 349 ldrb r0, [ip], #0x01 350 ldrb r3, [r1], #0x01 351 subs r2, r2, #0x01 352 cmpcs r0, r3 353 beq .Lmemcmp_bytewise2 354 sub r0, r0, r3 355 RET 356 357 /* 358 * 6 byte compares are very common, thanks to the network stack. 359 * This code is hand-scheduled to reduce the number of stalls for 360 * load results. Everything else being equal, this will be ~32% 361 * faster than a byte-wise memcmp. 362 */ 363 .align 5 364.Lmemcmp_6bytes: 365 ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 366 ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 367 ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 368 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 369 ldreqb r3, [ip, #0x01] /* r3 = b1#1 */ 370 RETne /* Return if mismatch on #0 */ 371 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 372 ldreqb r3, [r1, #0x02] /* r3 = b2#2 */ 373 ldreqb r0, [ip, #0x02] /* r0 = b1#2 */ 374 RETne /* Return if mismatch on #1 */ 375 ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 376 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 377 ldreqb r3, [ip, #0x03] /* r3 = b1#3 */ 378 RETne /* Return if mismatch on #2 */ 379 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 380 ldreqb r3, [r1, #0x04] /* r3 = b2#4 */ 381 ldreqb r0, [ip, #0x04] /* r0 = b1#4 */ 382 RETne /* Return if mismatch on #3 */ 383 ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 384 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 385 ldreqb r3, [ip, #0x05] /* r3 = b1#5 */ 386 RETne /* Return if mismatch on #4 */ 387 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 388 RET 389END(bcmp) 390 391ENTRY(bcopy) 392 /* switch the source and destination registers */ 393 eor r0, r1, r0 394 eor r1, r0, r1 395 eor r0, r1, r0 396EENTRY(memmove) 397 /* Do the buffers overlap? */ 398 cmp r0, r1 399 RETeq /* Bail now if src/dst are the same */ 400 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 401 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 402 cmp r3, r2 /* if (r3 < len) we have an overlap */ 403 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 404 405 /* Determine copy direction */ 406 cmp r1, r0 407 bcc .Lmemmove_backwards 408 409 moveq r0, #0 /* Quick abort for len=0 */ 410 RETeq 411 412 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 413 subs r2, r2, #4 414 blt .Lmemmove_fl4 /* less than 4 bytes */ 415 ands r12, r0, #3 416 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 417 ands r12, r1, #3 418 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 419 420.Lmemmove_ft8: 421 /* We have aligned source and destination */ 422 subs r2, r2, #8 423 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 424 subs r2, r2, #0x14 425 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 426 stmdb sp!, {r4} /* borrow r4 */ 427 428 /* blat 32 bytes at a time */ 429 /* XXX for really big copies perhaps we should use more registers */ 430.Lmemmove_floop32: 431 ldmia r1!, {r3, r4, r12, lr} 432 stmia r0!, {r3, r4, r12, lr} 433 ldmia r1!, {r3, r4, r12, lr} 434 stmia r0!, {r3, r4, r12, lr} 435 subs r2, r2, #0x20 436 bge .Lmemmove_floop32 437 438 cmn r2, #0x10 439 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 440 stmgeia r0!, {r3, r4, r12, lr} 441 subge r2, r2, #0x10 442 ldmia sp!, {r4} /* return r4 */ 443 444.Lmemmove_fl32: 445 adds r2, r2, #0x14 446 447 /* blat 12 bytes at a time */ 448.Lmemmove_floop12: 449 ldmgeia r1!, {r3, r12, lr} 450 stmgeia r0!, {r3, r12, lr} 451 subges r2, r2, #0x0c 452 bge .Lmemmove_floop12 453 454.Lmemmove_fl12: 455 adds r2, r2, #8 456 blt .Lmemmove_fl4 457 458 subs r2, r2, #4 459 ldrlt r3, [r1], #4 460 strlt r3, [r0], #4 461 ldmgeia r1!, {r3, r12} 462 stmgeia r0!, {r3, r12} 463 subge r2, r2, #4 464 465.Lmemmove_fl4: 466 /* less than 4 bytes to go */ 467 adds r2, r2, #4 468 ldmeqia sp!, {r0, pc} /* done */ 469 470 /* copy the crud byte at a time */ 471 cmp r2, #2 472 ldrb r3, [r1], #1 473 strb r3, [r0], #1 474 ldrgeb r3, [r1], #1 475 strgeb r3, [r0], #1 476 ldrgtb r3, [r1], #1 477 strgtb r3, [r0], #1 478 ldmia sp!, {r0, pc} 479 480 /* erg - unaligned destination */ 481.Lmemmove_fdestul: 482 rsb r12, r12, #4 483 cmp r12, #2 484 485 /* align destination with byte copies */ 486 ldrb r3, [r1], #1 487 strb r3, [r0], #1 488 ldrgeb r3, [r1], #1 489 strgeb r3, [r0], #1 490 ldrgtb r3, [r1], #1 491 strgtb r3, [r0], #1 492 subs r2, r2, r12 493 blt .Lmemmove_fl4 /* less the 4 bytes */ 494 495 ands r12, r1, #3 496 beq .Lmemmove_ft8 /* we have an aligned source */ 497 498 /* erg - unaligned source */ 499 /* This is where it gets nasty ... */ 500.Lmemmove_fsrcul: 501 bic r1, r1, #3 502 ldr lr, [r1], #4 503 cmp r12, #2 504 bgt .Lmemmove_fsrcul3 505 beq .Lmemmove_fsrcul2 506 cmp r2, #0x0c 507 blt .Lmemmove_fsrcul1loop4 508 sub r2, r2, #0x0c 509 stmdb sp!, {r4, r5} 510 511.Lmemmove_fsrcul1loop16: 512#ifdef __ARMEB__ 513 mov r3, lr, lsl #8 514#else 515 mov r3, lr, lsr #8 516#endif 517 ldmia r1!, {r4, r5, r12, lr} 518#ifdef __ARMEB__ 519 orr r3, r3, r4, lsr #24 520 mov r4, r4, lsl #8 521 orr r4, r4, r5, lsr #24 522 mov r5, r5, lsl #8 523 orr r5, r5, r12, lsr #24 524 mov r12, r12, lsl #8 525 orr r12, r12, lr, lsr #24 526#else 527 orr r3, r3, r4, lsl #24 528 mov r4, r4, lsr #8 529 orr r4, r4, r5, lsl #24 530 mov r5, r5, lsr #8 531 orr r5, r5, r12, lsl #24 532 mov r12, r12, lsr #8 533 orr r12, r12, lr, lsl #24 534#endif 535 stmia r0!, {r3-r5, r12} 536 subs r2, r2, #0x10 537 bge .Lmemmove_fsrcul1loop16 538 ldmia sp!, {r4, r5} 539 adds r2, r2, #0x0c 540 blt .Lmemmove_fsrcul1l4 541 542.Lmemmove_fsrcul1loop4: 543#ifdef __ARMEB__ 544 mov r12, lr, lsl #8 545#else 546 mov r12, lr, lsr #8 547#endif 548 ldr lr, [r1], #4 549#ifdef __ARMEB__ 550 orr r12, r12, lr, lsr #24 551#else 552 orr r12, r12, lr, lsl #24 553#endif 554 str r12, [r0], #4 555 subs r2, r2, #4 556 bge .Lmemmove_fsrcul1loop4 557 558.Lmemmove_fsrcul1l4: 559 sub r1, r1, #3 560 b .Lmemmove_fl4 561 562.Lmemmove_fsrcul2: 563 cmp r2, #0x0c 564 blt .Lmemmove_fsrcul2loop4 565 sub r2, r2, #0x0c 566 stmdb sp!, {r4, r5} 567 568.Lmemmove_fsrcul2loop16: 569#ifdef __ARMEB__ 570 mov r3, lr, lsl #16 571#else 572 mov r3, lr, lsr #16 573#endif 574 ldmia r1!, {r4, r5, r12, lr} 575#ifdef __ARMEB__ 576 orr r3, r3, r4, lsr #16 577 mov r4, r4, lsl #16 578 orr r4, r4, r5, lsr #16 579 mov r5, r5, lsl #16 580 orr r5, r5, r12, lsr #16 581 mov r12, r12, lsl #16 582 orr r12, r12, lr, lsr #16 583#else 584 orr r3, r3, r4, lsl #16 585 mov r4, r4, lsr #16 586 orr r4, r4, r5, lsl #16 587 mov r5, r5, lsr #16 588 orr r5, r5, r12, lsl #16 589 mov r12, r12, lsr #16 590 orr r12, r12, lr, lsl #16 591#endif 592 stmia r0!, {r3-r5, r12} 593 subs r2, r2, #0x10 594 bge .Lmemmove_fsrcul2loop16 595 ldmia sp!, {r4, r5} 596 adds r2, r2, #0x0c 597 blt .Lmemmove_fsrcul2l4 598 599.Lmemmove_fsrcul2loop4: 600#ifdef __ARMEB__ 601 mov r12, lr, lsl #16 602#else 603 mov r12, lr, lsr #16 604#endif 605 ldr lr, [r1], #4 606#ifdef __ARMEB__ 607 orr r12, r12, lr, lsr #16 608#else 609 orr r12, r12, lr, lsl #16 610#endif 611 str r12, [r0], #4 612 subs r2, r2, #4 613 bge .Lmemmove_fsrcul2loop4 614 615.Lmemmove_fsrcul2l4: 616 sub r1, r1, #2 617 b .Lmemmove_fl4 618 619.Lmemmove_fsrcul3: 620 cmp r2, #0x0c 621 blt .Lmemmove_fsrcul3loop4 622 sub r2, r2, #0x0c 623 stmdb sp!, {r4, r5} 624 625.Lmemmove_fsrcul3loop16: 626#ifdef __ARMEB__ 627 mov r3, lr, lsl #24 628#else 629 mov r3, lr, lsr #24 630#endif 631 ldmia r1!, {r4, r5, r12, lr} 632#ifdef __ARMEB__ 633 orr r3, r3, r4, lsr #8 634 mov r4, r4, lsl #24 635 orr r4, r4, r5, lsr #8 636 mov r5, r5, lsl #24 637 orr r5, r5, r12, lsr #8 638 mov r12, r12, lsl #24 639 orr r12, r12, lr, lsr #8 640#else 641 orr r3, r3, r4, lsl #8 642 mov r4, r4, lsr #24 643 orr r4, r4, r5, lsl #8 644 mov r5, r5, lsr #24 645 orr r5, r5, r12, lsl #8 646 mov r12, r12, lsr #24 647 orr r12, r12, lr, lsl #8 648#endif 649 stmia r0!, {r3-r5, r12} 650 subs r2, r2, #0x10 651 bge .Lmemmove_fsrcul3loop16 652 ldmia sp!, {r4, r5} 653 adds r2, r2, #0x0c 654 blt .Lmemmove_fsrcul3l4 655 656.Lmemmove_fsrcul3loop4: 657#ifdef __ARMEB__ 658 mov r12, lr, lsl #24 659#else 660 mov r12, lr, lsr #24 661#endif 662 ldr lr, [r1], #4 663#ifdef __ARMEB__ 664 orr r12, r12, lr, lsr #8 665#else 666 orr r12, r12, lr, lsl #8 667#endif 668 str r12, [r0], #4 669 subs r2, r2, #4 670 bge .Lmemmove_fsrcul3loop4 671 672.Lmemmove_fsrcul3l4: 673 sub r1, r1, #1 674 b .Lmemmove_fl4 675 676.Lmemmove_backwards: 677 add r1, r1, r2 678 add r0, r0, r2 679 subs r2, r2, #4 680 blt .Lmemmove_bl4 /* less than 4 bytes */ 681 ands r12, r0, #3 682 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 683 ands r12, r1, #3 684 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 685 686.Lmemmove_bt8: 687 /* We have aligned source and destination */ 688 subs r2, r2, #8 689 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 690 stmdb sp!, {r4, lr} 691 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 692 blt .Lmemmove_bl32 693 694 /* blat 32 bytes at a time */ 695 /* XXX for really big copies perhaps we should use more registers */ 696.Lmemmove_bloop32: 697 ldmdb r1!, {r3, r4, r12, lr} 698 stmdb r0!, {r3, r4, r12, lr} 699 ldmdb r1!, {r3, r4, r12, lr} 700 stmdb r0!, {r3, r4, r12, lr} 701 subs r2, r2, #0x20 702 bge .Lmemmove_bloop32 703 704.Lmemmove_bl32: 705 cmn r2, #0x10 706 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 707 stmgedb r0!, {r3, r4, r12, lr} 708 subge r2, r2, #0x10 709 adds r2, r2, #0x14 710 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 711 stmgedb r0!, {r3, r12, lr} 712 subge r2, r2, #0x0c 713 ldmia sp!, {r4, lr} 714 715.Lmemmove_bl12: 716 adds r2, r2, #8 717 blt .Lmemmove_bl4 718 subs r2, r2, #4 719 ldrlt r3, [r1, #-4]! 720 strlt r3, [r0, #-4]! 721 ldmgedb r1!, {r3, r12} 722 stmgedb r0!, {r3, r12} 723 subge r2, r2, #4 724 725.Lmemmove_bl4: 726 /* less than 4 bytes to go */ 727 adds r2, r2, #4 728 RETeq /* done */ 729 730 /* copy the crud byte at a time */ 731 cmp r2, #2 732 ldrb r3, [r1, #-1]! 733 strb r3, [r0, #-1]! 734 ldrgeb r3, [r1, #-1]! 735 strgeb r3, [r0, #-1]! 736 ldrgtb r3, [r1, #-1]! 737 strgtb r3, [r0, #-1]! 738 RET 739 740 /* erg - unaligned destination */ 741.Lmemmove_bdestul: 742 cmp r12, #2 743 744 /* align destination with byte copies */ 745 ldrb r3, [r1, #-1]! 746 strb r3, [r0, #-1]! 747 ldrgeb r3, [r1, #-1]! 748 strgeb r3, [r0, #-1]! 749 ldrgtb r3, [r1, #-1]! 750 strgtb r3, [r0, #-1]! 751 subs r2, r2, r12 752 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 753 ands r12, r1, #3 754 beq .Lmemmove_bt8 /* we have an aligned source */ 755 756 /* erg - unaligned source */ 757 /* This is where it gets nasty ... */ 758.Lmemmove_bsrcul: 759 bic r1, r1, #3 760 ldr r3, [r1, #0] 761 cmp r12, #2 762 blt .Lmemmove_bsrcul1 763 beq .Lmemmove_bsrcul2 764 cmp r2, #0x0c 765 blt .Lmemmove_bsrcul3loop4 766 sub r2, r2, #0x0c 767 stmdb sp!, {r4, r5, lr} 768 769.Lmemmove_bsrcul3loop16: 770#ifdef __ARMEB__ 771 mov lr, r3, lsr #8 772#else 773 mov lr, r3, lsl #8 774#endif 775 ldmdb r1!, {r3-r5, r12} 776#ifdef __ARMEB__ 777 orr lr, lr, r12, lsl #24 778 mov r12, r12, lsr #8 779 orr r12, r12, r5, lsl #24 780 mov r5, r5, lsr #8 781 orr r5, r5, r4, lsl #24 782 mov r4, r4, lsr #8 783 orr r4, r4, r3, lsl #24 784#else 785 orr lr, lr, r12, lsr #24 786 mov r12, r12, lsl #8 787 orr r12, r12, r5, lsr #24 788 mov r5, r5, lsl #8 789 orr r5, r5, r4, lsr #24 790 mov r4, r4, lsl #8 791 orr r4, r4, r3, lsr #24 792#endif 793 stmdb r0!, {r4, r5, r12, lr} 794 subs r2, r2, #0x10 795 bge .Lmemmove_bsrcul3loop16 796 ldmia sp!, {r4, r5, lr} 797 adds r2, r2, #0x0c 798 blt .Lmemmove_bsrcul3l4 799 800.Lmemmove_bsrcul3loop4: 801#ifdef __ARMEB__ 802 mov r12, r3, lsr #8 803#else 804 mov r12, r3, lsl #8 805#endif 806 ldr r3, [r1, #-4]! 807#ifdef __ARMEB__ 808 orr r12, r12, r3, lsl #24 809#else 810 orr r12, r12, r3, lsr #24 811#endif 812 str r12, [r0, #-4]! 813 subs r2, r2, #4 814 bge .Lmemmove_bsrcul3loop4 815 816.Lmemmove_bsrcul3l4: 817 add r1, r1, #3 818 b .Lmemmove_bl4 819 820.Lmemmove_bsrcul2: 821 cmp r2, #0x0c 822 blt .Lmemmove_bsrcul2loop4 823 sub r2, r2, #0x0c 824 stmdb sp!, {r4, r5, lr} 825 826.Lmemmove_bsrcul2loop16: 827#ifdef __ARMEB__ 828 mov lr, r3, lsr #16 829#else 830 mov lr, r3, lsl #16 831#endif 832 ldmdb r1!, {r3-r5, r12} 833#ifdef __ARMEB__ 834 orr lr, lr, r12, lsl #16 835 mov r12, r12, lsr #16 836 orr r12, r12, r5, lsl #16 837 mov r5, r5, lsr #16 838 orr r5, r5, r4, lsl #16 839 mov r4, r4, lsr #16 840 orr r4, r4, r3, lsl #16 841#else 842 orr lr, lr, r12, lsr #16 843 mov r12, r12, lsl #16 844 orr r12, r12, r5, lsr #16 845 mov r5, r5, lsl #16 846 orr r5, r5, r4, lsr #16 847 mov r4, r4, lsl #16 848 orr r4, r4, r3, lsr #16 849#endif 850 stmdb r0!, {r4, r5, r12, lr} 851 subs r2, r2, #0x10 852 bge .Lmemmove_bsrcul2loop16 853 ldmia sp!, {r4, r5, lr} 854 adds r2, r2, #0x0c 855 blt .Lmemmove_bsrcul2l4 856 857.Lmemmove_bsrcul2loop4: 858#ifdef __ARMEB__ 859 mov r12, r3, lsr #16 860#else 861 mov r12, r3, lsl #16 862#endif 863 ldr r3, [r1, #-4]! 864#ifdef __ARMEB__ 865 orr r12, r12, r3, lsl #16 866#else 867 orr r12, r12, r3, lsr #16 868#endif 869 str r12, [r0, #-4]! 870 subs r2, r2, #4 871 bge .Lmemmove_bsrcul2loop4 872 873.Lmemmove_bsrcul2l4: 874 add r1, r1, #2 875 b .Lmemmove_bl4 876 877.Lmemmove_bsrcul1: 878 cmp r2, #0x0c 879 blt .Lmemmove_bsrcul1loop4 880 sub r2, r2, #0x0c 881 stmdb sp!, {r4, r5, lr} 882 883.Lmemmove_bsrcul1loop32: 884#ifdef __ARMEB__ 885 mov lr, r3, lsr #24 886#else 887 mov lr, r3, lsl #24 888#endif 889 ldmdb r1!, {r3-r5, r12} 890#ifdef __ARMEB__ 891 orr lr, lr, r12, lsl #8 892 mov r12, r12, lsr #24 893 orr r12, r12, r5, lsl #8 894 mov r5, r5, lsr #24 895 orr r5, r5, r4, lsl #8 896 mov r4, r4, lsr #24 897 orr r4, r4, r3, lsl #8 898#else 899 orr lr, lr, r12, lsr #8 900 mov r12, r12, lsl #24 901 orr r12, r12, r5, lsr #8 902 mov r5, r5, lsl #24 903 orr r5, r5, r4, lsr #8 904 mov r4, r4, lsl #24 905 orr r4, r4, r3, lsr #8 906#endif 907 stmdb r0!, {r4, r5, r12, lr} 908 subs r2, r2, #0x10 909 bge .Lmemmove_bsrcul1loop32 910 ldmia sp!, {r4, r5, lr} 911 adds r2, r2, #0x0c 912 blt .Lmemmove_bsrcul1l4 913 914.Lmemmove_bsrcul1loop4: 915#ifdef __ARMEB__ 916 mov r12, r3, lsr #24 917#else 918 mov r12, r3, lsl #24 919#endif 920 ldr r3, [r1, #-4]! 921#ifdef __ARMEB__ 922 orr r12, r12, r3, lsl #8 923#else 924 orr r12, r12, r3, lsr #8 925#endif 926 str r12, [r0, #-4]! 927 subs r2, r2, #4 928 bge .Lmemmove_bsrcul1loop4 929 930.Lmemmove_bsrcul1l4: 931 add r1, r1, #1 932 b .Lmemmove_bl4 933EEND(memmove) 934END(bcopy) 935 936#if !defined(_ARM_ARCH_5E) 937ENTRY(memcpy) 938 /* save leaf functions having to store this away */ 939 /* Do not check arm_memcpy if we're running from flash */ 940#if defined(FLASHADDR) && defined(PHYSADDR) 941#if FLASHADDR > PHYSADDR 942 ldr r3, =FLASHADDR 943 cmp r3, pc 944 bls .Lnormal 945#else 946 ldr r3, =FLASHADDR 947 cmp r3, pc 948 bhi .Lnormal 949#endif 950#endif 951 ldr r3, .L_arm_memcpy 952 ldr r3, [r3] 953 cmp r3, #0 954 beq .Lnormal 955 ldr r3, .L_min_memcpy_size 956 ldr r3, [r3] 957 cmp r2, r3 958 blt .Lnormal 959 stmfd sp!, {r0-r2, r4, lr} 960 mov r3, #0 961 ldr r4, .L_arm_memcpy 962 mov lr, pc 963 ldr pc, [r4] 964 cmp r0, #0 965 ldmfd sp!, {r0-r2, r4, lr} 966 RETeq 967 968.Lnormal: 969 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 970 971 subs r2, r2, #4 972 blt .Lmemcpy_l4 /* less than 4 bytes */ 973 ands r12, r0, #3 974 bne .Lmemcpy_destul /* oh unaligned destination addr */ 975 ands r12, r1, #3 976 bne .Lmemcpy_srcul /* oh unaligned source addr */ 977 978.Lmemcpy_t8: 979 /* We have aligned source and destination */ 980 subs r2, r2, #8 981 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 982 subs r2, r2, #0x14 983 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 984 stmdb sp!, {r4} /* borrow r4 */ 985 986 /* blat 32 bytes at a time */ 987 /* XXX for really big copies perhaps we should use more registers */ 988.Lmemcpy_loop32: 989 ldmia r1!, {r3, r4, r12, lr} 990 stmia r0!, {r3, r4, r12, lr} 991 ldmia r1!, {r3, r4, r12, lr} 992 stmia r0!, {r3, r4, r12, lr} 993 subs r2, r2, #0x20 994 bge .Lmemcpy_loop32 995 996 cmn r2, #0x10 997 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 998 stmgeia r0!, {r3, r4, r12, lr} 999 subge r2, r2, #0x10 1000 ldmia sp!, {r4} /* return r4 */ 1001 1002.Lmemcpy_l32: 1003 adds r2, r2, #0x14 1004 1005 /* blat 12 bytes at a time */ 1006.Lmemcpy_loop12: 1007 ldmgeia r1!, {r3, r12, lr} 1008 stmgeia r0!, {r3, r12, lr} 1009 subges r2, r2, #0x0c 1010 bge .Lmemcpy_loop12 1011 1012.Lmemcpy_l12: 1013 adds r2, r2, #8 1014 blt .Lmemcpy_l4 1015 1016 subs r2, r2, #4 1017 ldrlt r3, [r1], #4 1018 strlt r3, [r0], #4 1019 ldmgeia r1!, {r3, r12} 1020 stmgeia r0!, {r3, r12} 1021 subge r2, r2, #4 1022 1023.Lmemcpy_l4: 1024 /* less than 4 bytes to go */ 1025 adds r2, r2, #4 1026#ifdef __APCS_26_ 1027 ldmeqia sp!, {r0, pc}^ /* done */ 1028#else 1029 ldmeqia sp!, {r0, pc} /* done */ 1030#endif 1031 /* copy the crud byte at a time */ 1032 cmp r2, #2 1033 ldrb r3, [r1], #1 1034 strb r3, [r0], #1 1035 ldrgeb r3, [r1], #1 1036 strgeb r3, [r0], #1 1037 ldrgtb r3, [r1], #1 1038 strgtb r3, [r0], #1 1039 ldmia sp!, {r0, pc} 1040 1041 /* erg - unaligned destination */ 1042.Lmemcpy_destul: 1043 rsb r12, r12, #4 1044 cmp r12, #2 1045 1046 /* align destination with byte copies */ 1047 ldrb r3, [r1], #1 1048 strb r3, [r0], #1 1049 ldrgeb r3, [r1], #1 1050 strgeb r3, [r0], #1 1051 ldrgtb r3, [r1], #1 1052 strgtb r3, [r0], #1 1053 subs r2, r2, r12 1054 blt .Lmemcpy_l4 /* less the 4 bytes */ 1055 1056 ands r12, r1, #3 1057 beq .Lmemcpy_t8 /* we have an aligned source */ 1058 1059 /* erg - unaligned source */ 1060 /* This is where it gets nasty ... */ 1061.Lmemcpy_srcul: 1062 bic r1, r1, #3 1063 ldr lr, [r1], #4 1064 cmp r12, #2 1065 bgt .Lmemcpy_srcul3 1066 beq .Lmemcpy_srcul2 1067 cmp r2, #0x0c 1068 blt .Lmemcpy_srcul1loop4 1069 sub r2, r2, #0x0c 1070 stmdb sp!, {r4, r5} 1071 1072.Lmemcpy_srcul1loop16: 1073 mov r3, lr, lsr #8 1074 ldmia r1!, {r4, r5, r12, lr} 1075 orr r3, r3, r4, lsl #24 1076 mov r4, r4, lsr #8 1077 orr r4, r4, r5, lsl #24 1078 mov r5, r5, lsr #8 1079 orr r5, r5, r12, lsl #24 1080 mov r12, r12, lsr #8 1081 orr r12, r12, lr, lsl #24 1082 stmia r0!, {r3-r5, r12} 1083 subs r2, r2, #0x10 1084 bge .Lmemcpy_srcul1loop16 1085 ldmia sp!, {r4, r5} 1086 adds r2, r2, #0x0c 1087 blt .Lmemcpy_srcul1l4 1088 1089.Lmemcpy_srcul1loop4: 1090 mov r12, lr, lsr #8 1091 ldr lr, [r1], #4 1092 orr r12, r12, lr, lsl #24 1093 str r12, [r0], #4 1094 subs r2, r2, #4 1095 bge .Lmemcpy_srcul1loop4 1096 1097.Lmemcpy_srcul1l4: 1098 sub r1, r1, #3 1099 b .Lmemcpy_l4 1100 1101.Lmemcpy_srcul2: 1102 cmp r2, #0x0c 1103 blt .Lmemcpy_srcul2loop4 1104 sub r2, r2, #0x0c 1105 stmdb sp!, {r4, r5} 1106 1107.Lmemcpy_srcul2loop16: 1108 mov r3, lr, lsr #16 1109 ldmia r1!, {r4, r5, r12, lr} 1110 orr r3, r3, r4, lsl #16 1111 mov r4, r4, lsr #16 1112 orr r4, r4, r5, lsl #16 1113 mov r5, r5, lsr #16 1114 orr r5, r5, r12, lsl #16 1115 mov r12, r12, lsr #16 1116 orr r12, r12, lr, lsl #16 1117 stmia r0!, {r3-r5, r12} 1118 subs r2, r2, #0x10 1119 bge .Lmemcpy_srcul2loop16 1120 ldmia sp!, {r4, r5} 1121 adds r2, r2, #0x0c 1122 blt .Lmemcpy_srcul2l4 1123 1124.Lmemcpy_srcul2loop4: 1125 mov r12, lr, lsr #16 1126 ldr lr, [r1], #4 1127 orr r12, r12, lr, lsl #16 1128 str r12, [r0], #4 1129 subs r2, r2, #4 1130 bge .Lmemcpy_srcul2loop4 1131 1132.Lmemcpy_srcul2l4: 1133 sub r1, r1, #2 1134 b .Lmemcpy_l4 1135 1136.Lmemcpy_srcul3: 1137 cmp r2, #0x0c 1138 blt .Lmemcpy_srcul3loop4 1139 sub r2, r2, #0x0c 1140 stmdb sp!, {r4, r5} 1141 1142.Lmemcpy_srcul3loop16: 1143 mov r3, lr, lsr #24 1144 ldmia r1!, {r4, r5, r12, lr} 1145 orr r3, r3, r4, lsl #8 1146 mov r4, r4, lsr #24 1147 orr r4, r4, r5, lsl #8 1148 mov r5, r5, lsr #24 1149 orr r5, r5, r12, lsl #8 1150 mov r12, r12, lsr #24 1151 orr r12, r12, lr, lsl #8 1152 stmia r0!, {r3-r5, r12} 1153 subs r2, r2, #0x10 1154 bge .Lmemcpy_srcul3loop16 1155 ldmia sp!, {r4, r5} 1156 adds r2, r2, #0x0c 1157 blt .Lmemcpy_srcul3l4 1158 1159.Lmemcpy_srcul3loop4: 1160 mov r12, lr, lsr #24 1161 ldr lr, [r1], #4 1162 orr r12, r12, lr, lsl #8 1163 str r12, [r0], #4 1164 subs r2, r2, #4 1165 bge .Lmemcpy_srcul3loop4 1166 1167.Lmemcpy_srcul3l4: 1168 sub r1, r1, #1 1169 b .Lmemcpy_l4 1170END(memcpy) 1171 1172#else 1173/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 1174ENTRY(memcpy) 1175 pld [r1] 1176 cmp r2, #0x0c 1177 ble .Lmemcpy_short /* <= 12 bytes */ 1178#ifdef FLASHADDR 1179#if FLASHADDR > PHYSADDR 1180 ldr r3, =FLASHADDR 1181 cmp r3, pc 1182 bls .Lnormal 1183#else 1184 ldr r3, =FLASHADDR 1185 cmp r3, pc 1186 bhi .Lnormal 1187#endif 1188#endif 1189 ldr r3, .L_arm_memcpy 1190 ldr r3, [r3] 1191 cmp r3, #0 1192 beq .Lnormal 1193 ldr r3, .L_min_memcpy_size 1194 ldr r3, [r3] 1195 cmp r2, r3 1196 blt .Lnormal 1197 stmfd sp!, {r0-r2, r4, lr} 1198 mov r3, #0 1199 ldr r4, .L_arm_memcpy 1200 mov lr, pc 1201 ldr pc, [r4] 1202 cmp r0, #0 1203 ldmfd sp!, {r0-r2, r4, lr} 1204 RETeq 1205.Lnormal: 1206 mov r3, r0 /* We must not clobber r0 */ 1207 1208 /* Word-align the destination buffer */ 1209 ands ip, r3, #0x03 /* Already word aligned? */ 1210 beq .Lmemcpy_wordaligned /* Yup */ 1211 cmp ip, #0x02 1212 ldrb ip, [r1], #0x01 1213 sub r2, r2, #0x01 1214 strb ip, [r3], #0x01 1215 ldrleb ip, [r1], #0x01 1216 suble r2, r2, #0x01 1217 strleb ip, [r3], #0x01 1218 ldrltb ip, [r1], #0x01 1219 sublt r2, r2, #0x01 1220 strltb ip, [r3], #0x01 1221 1222 /* Destination buffer is now word aligned */ 1223.Lmemcpy_wordaligned: 1224 ands ip, r1, #0x03 /* Is src also word-aligned? */ 1225 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 1226 1227 /* Quad-align the destination buffer */ 1228 tst r3, #0x07 /* Already quad aligned? */ 1229 ldrne ip, [r1], #0x04 1230 stmfd sp!, {r4-r9} /* Free up some registers */ 1231 subne r2, r2, #0x04 1232 strne ip, [r3], #0x04 1233 1234 /* Destination buffer quad aligned, source is at least word aligned */ 1235 subs r2, r2, #0x80 1236 blt .Lmemcpy_w_lessthan128 1237 1238 /* Copy 128 bytes at a time */ 1239.Lmemcpy_w_loop128: 1240 ldr r4, [r1], #0x04 /* LD:00-03 */ 1241 ldr r5, [r1], #0x04 /* LD:04-07 */ 1242 pld [r1, #0x18] /* Prefetch 0x20 */ 1243 ldr r6, [r1], #0x04 /* LD:08-0b */ 1244 ldr r7, [r1], #0x04 /* LD:0c-0f */ 1245 ldr r8, [r1], #0x04 /* LD:10-13 */ 1246 ldr r9, [r1], #0x04 /* LD:14-17 */ 1247 strd r4, [r3], #0x08 /* ST:00-07 */ 1248 ldr r4, [r1], #0x04 /* LD:18-1b */ 1249 ldr r5, [r1], #0x04 /* LD:1c-1f */ 1250 strd r6, [r3], #0x08 /* ST:08-0f */ 1251 ldr r6, [r1], #0x04 /* LD:20-23 */ 1252 ldr r7, [r1], #0x04 /* LD:24-27 */ 1253 pld [r1, #0x18] /* Prefetch 0x40 */ 1254 strd r8, [r3], #0x08 /* ST:10-17 */ 1255 ldr r8, [r1], #0x04 /* LD:28-2b */ 1256 ldr r9, [r1], #0x04 /* LD:2c-2f */ 1257 strd r4, [r3], #0x08 /* ST:18-1f */ 1258 ldr r4, [r1], #0x04 /* LD:30-33 */ 1259 ldr r5, [r1], #0x04 /* LD:34-37 */ 1260 strd r6, [r3], #0x08 /* ST:20-27 */ 1261 ldr r6, [r1], #0x04 /* LD:38-3b */ 1262 ldr r7, [r1], #0x04 /* LD:3c-3f */ 1263 strd r8, [r3], #0x08 /* ST:28-2f */ 1264 ldr r8, [r1], #0x04 /* LD:40-43 */ 1265 ldr r9, [r1], #0x04 /* LD:44-47 */ 1266 pld [r1, #0x18] /* Prefetch 0x60 */ 1267 strd r4, [r3], #0x08 /* ST:30-37 */ 1268 ldr r4, [r1], #0x04 /* LD:48-4b */ 1269 ldr r5, [r1], #0x04 /* LD:4c-4f */ 1270 strd r6, [r3], #0x08 /* ST:38-3f */ 1271 ldr r6, [r1], #0x04 /* LD:50-53 */ 1272 ldr r7, [r1], #0x04 /* LD:54-57 */ 1273 strd r8, [r3], #0x08 /* ST:40-47 */ 1274 ldr r8, [r1], #0x04 /* LD:58-5b */ 1275 ldr r9, [r1], #0x04 /* LD:5c-5f */ 1276 strd r4, [r3], #0x08 /* ST:48-4f */ 1277 ldr r4, [r1], #0x04 /* LD:60-63 */ 1278 ldr r5, [r1], #0x04 /* LD:64-67 */ 1279 pld [r1, #0x18] /* Prefetch 0x80 */ 1280 strd r6, [r3], #0x08 /* ST:50-57 */ 1281 ldr r6, [r1], #0x04 /* LD:68-6b */ 1282 ldr r7, [r1], #0x04 /* LD:6c-6f */ 1283 strd r8, [r3], #0x08 /* ST:58-5f */ 1284 ldr r8, [r1], #0x04 /* LD:70-73 */ 1285 ldr r9, [r1], #0x04 /* LD:74-77 */ 1286 strd r4, [r3], #0x08 /* ST:60-67 */ 1287 ldr r4, [r1], #0x04 /* LD:78-7b */ 1288 ldr r5, [r1], #0x04 /* LD:7c-7f */ 1289 strd r6, [r3], #0x08 /* ST:68-6f */ 1290 strd r8, [r3], #0x08 /* ST:70-77 */ 1291 subs r2, r2, #0x80 1292 strd r4, [r3], #0x08 /* ST:78-7f */ 1293 bge .Lmemcpy_w_loop128 1294 1295.Lmemcpy_w_lessthan128: 1296 adds r2, r2, #0x80 /* Adjust for extra sub */ 1297 ldmeqfd sp!, {r4-r9} 1298 RETeq /* Return now if done */ 1299 subs r2, r2, #0x20 1300 blt .Lmemcpy_w_lessthan32 1301 1302 /* Copy 32 bytes at a time */ 1303.Lmemcpy_w_loop32: 1304 ldr r4, [r1], #0x04 1305 ldr r5, [r1], #0x04 1306 pld [r1, #0x18] 1307 ldr r6, [r1], #0x04 1308 ldr r7, [r1], #0x04 1309 ldr r8, [r1], #0x04 1310 ldr r9, [r1], #0x04 1311 strd r4, [r3], #0x08 1312 ldr r4, [r1], #0x04 1313 ldr r5, [r1], #0x04 1314 strd r6, [r3], #0x08 1315 strd r8, [r3], #0x08 1316 subs r2, r2, #0x20 1317 strd r4, [r3], #0x08 1318 bge .Lmemcpy_w_loop32 1319 1320.Lmemcpy_w_lessthan32: 1321 adds r2, r2, #0x20 /* Adjust for extra sub */ 1322 ldmeqfd sp!, {r4-r9} 1323 RETeq /* Return now if done */ 1324 1325 and r4, r2, #0x18 1326 rsbs r4, r4, #0x18 1327 addne pc, pc, r4, lsl #1 1328 nop 1329 1330 /* At least 24 bytes remaining */ 1331 ldr r4, [r1], #0x04 1332 ldr r5, [r1], #0x04 1333 sub r2, r2, #0x08 1334 strd r4, [r3], #0x08 1335 1336 /* At least 16 bytes remaining */ 1337 ldr r4, [r1], #0x04 1338 ldr r5, [r1], #0x04 1339 sub r2, r2, #0x08 1340 strd r4, [r3], #0x08 1341 1342 /* At least 8 bytes remaining */ 1343 ldr r4, [r1], #0x04 1344 ldr r5, [r1], #0x04 1345 subs r2, r2, #0x08 1346 strd r4, [r3], #0x08 1347 1348 /* Less than 8 bytes remaining */ 1349 ldmfd sp!, {r4-r9} 1350 RETeq /* Return now if done */ 1351 subs r2, r2, #0x04 1352 ldrge ip, [r1], #0x04 1353 strge ip, [r3], #0x04 1354 RETeq /* Return now if done */ 1355 addlt r2, r2, #0x04 1356 ldrb ip, [r1], #0x01 1357 cmp r2, #0x02 1358 ldrgeb r2, [r1], #0x01 1359 strb ip, [r3], #0x01 1360 ldrgtb ip, [r1] 1361 strgeb r2, [r3], #0x01 1362 strgtb ip, [r3] 1363 RET 1364 1365 1366/* 1367 * At this point, it has not been possible to word align both buffers. 1368 * The destination buffer is word aligned, but the source buffer is not. 1369 */ 1370.Lmemcpy_bad_align: 1371 stmfd sp!, {r4-r7} 1372 bic r1, r1, #0x03 1373 cmp ip, #2 1374 ldr ip, [r1], #0x04 1375 bgt .Lmemcpy_bad3 1376 beq .Lmemcpy_bad2 1377 b .Lmemcpy_bad1 1378 1379.Lmemcpy_bad1_loop16: 1380#ifdef __ARMEB__ 1381 mov r4, ip, lsl #8 1382#else 1383 mov r4, ip, lsr #8 1384#endif 1385 ldr r5, [r1], #0x04 1386 pld [r1, #0x018] 1387 ldr r6, [r1], #0x04 1388 ldr r7, [r1], #0x04 1389 ldr ip, [r1], #0x04 1390#ifdef __ARMEB__ 1391 orr r4, r4, r5, lsr #24 1392 mov r5, r5, lsl #8 1393 orr r5, r5, r6, lsr #24 1394 mov r6, r6, lsl #8 1395 orr r6, r6, r7, lsr #24 1396 mov r7, r7, lsl #8 1397 orr r7, r7, ip, lsr #24 1398#else 1399 orr r4, r4, r5, lsl #24 1400 mov r5, r5, lsr #8 1401 orr r5, r5, r6, lsl #24 1402 mov r6, r6, lsr #8 1403 orr r6, r6, r7, lsl #24 1404 mov r7, r7, lsr #8 1405 orr r7, r7, ip, lsl #24 1406#endif 1407 str r4, [r3], #0x04 1408 str r5, [r3], #0x04 1409 str r6, [r3], #0x04 1410 str r7, [r3], #0x04 1411.Lmemcpy_bad1: 1412 subs r2, r2, #0x10 1413 bge .Lmemcpy_bad1_loop16 1414 1415 adds r2, r2, #0x10 1416 ldmeqfd sp!, {r4-r7} 1417 RETeq /* Return now if done */ 1418 subs r2, r2, #0x04 1419 sublt r1, r1, #0x03 1420 blt .Lmemcpy_bad_done 1421 1422.Lmemcpy_bad1_loop4: 1423#ifdef __ARMEB__ 1424 mov r4, ip, lsl #8 1425#else 1426 mov r4, ip, lsr #8 1427#endif 1428 ldr ip, [r1], #0x04 1429 subs r2, r2, #0x04 1430#ifdef __ARMEB__ 1431 orr r4, r4, ip, lsr #24 1432#else 1433 orr r4, r4, ip, lsl #24 1434#endif 1435 str r4, [r3], #0x04 1436 bge .Lmemcpy_bad1_loop4 1437 sub r1, r1, #0x03 1438 b .Lmemcpy_bad_done 1439 1440.Lmemcpy_bad2_loop16: 1441#ifdef __ARMEB__ 1442 mov r4, ip, lsl #16 1443#else 1444 mov r4, ip, lsr #16 1445#endif 1446 ldr r5, [r1], #0x04 1447 pld [r1, #0x018] 1448 ldr r6, [r1], #0x04 1449 ldr r7, [r1], #0x04 1450 ldr ip, [r1], #0x04 1451#ifdef __ARMEB__ 1452 orr r4, r4, r5, lsr #16 1453 mov r5, r5, lsl #16 1454 orr r5, r5, r6, lsr #16 1455 mov r6, r6, lsl #16 1456 orr r6, r6, r7, lsr #16 1457 mov r7, r7, lsl #16 1458 orr r7, r7, ip, lsr #16 1459#else 1460 orr r4, r4, r5, lsl #16 1461 mov r5, r5, lsr #16 1462 orr r5, r5, r6, lsl #16 1463 mov r6, r6, lsr #16 1464 orr r6, r6, r7, lsl #16 1465 mov r7, r7, lsr #16 1466 orr r7, r7, ip, lsl #16 1467#endif 1468 str r4, [r3], #0x04 1469 str r5, [r3], #0x04 1470 str r6, [r3], #0x04 1471 str r7, [r3], #0x04 1472.Lmemcpy_bad2: 1473 subs r2, r2, #0x10 1474 bge .Lmemcpy_bad2_loop16 1475 1476 adds r2, r2, #0x10 1477 ldmeqfd sp!, {r4-r7} 1478 RETeq /* Return now if done */ 1479 subs r2, r2, #0x04 1480 sublt r1, r1, #0x02 1481 blt .Lmemcpy_bad_done 1482 1483.Lmemcpy_bad2_loop4: 1484#ifdef __ARMEB__ 1485 mov r4, ip, lsl #16 1486#else 1487 mov r4, ip, lsr #16 1488#endif 1489 ldr ip, [r1], #0x04 1490 subs r2, r2, #0x04 1491#ifdef __ARMEB__ 1492 orr r4, r4, ip, lsr #16 1493#else 1494 orr r4, r4, ip, lsl #16 1495#endif 1496 str r4, [r3], #0x04 1497 bge .Lmemcpy_bad2_loop4 1498 sub r1, r1, #0x02 1499 b .Lmemcpy_bad_done 1500 1501.Lmemcpy_bad3_loop16: 1502#ifdef __ARMEB__ 1503 mov r4, ip, lsl #24 1504#else 1505 mov r4, ip, lsr #24 1506#endif 1507 ldr r5, [r1], #0x04 1508 pld [r1, #0x018] 1509 ldr r6, [r1], #0x04 1510 ldr r7, [r1], #0x04 1511 ldr ip, [r1], #0x04 1512#ifdef __ARMEB__ 1513 orr r4, r4, r5, lsr #8 1514 mov r5, r5, lsl #24 1515 orr r5, r5, r6, lsr #8 1516 mov r6, r6, lsl #24 1517 orr r6, r6, r7, lsr #8 1518 mov r7, r7, lsl #24 1519 orr r7, r7, ip, lsr #8 1520#else 1521 orr r4, r4, r5, lsl #8 1522 mov r5, r5, lsr #24 1523 orr r5, r5, r6, lsl #8 1524 mov r6, r6, lsr #24 1525 orr r6, r6, r7, lsl #8 1526 mov r7, r7, lsr #24 1527 orr r7, r7, ip, lsl #8 1528#endif 1529 str r4, [r3], #0x04 1530 str r5, [r3], #0x04 1531 str r6, [r3], #0x04 1532 str r7, [r3], #0x04 1533.Lmemcpy_bad3: 1534 subs r2, r2, #0x10 1535 bge .Lmemcpy_bad3_loop16 1536 1537 adds r2, r2, #0x10 1538 ldmeqfd sp!, {r4-r7} 1539 RETeq /* Return now if done */ 1540 subs r2, r2, #0x04 1541 sublt r1, r1, #0x01 1542 blt .Lmemcpy_bad_done 1543 1544.Lmemcpy_bad3_loop4: 1545#ifdef __ARMEB__ 1546 mov r4, ip, lsl #24 1547#else 1548 mov r4, ip, lsr #24 1549#endif 1550 ldr ip, [r1], #0x04 1551 subs r2, r2, #0x04 1552#ifdef __ARMEB__ 1553 orr r4, r4, ip, lsr #8 1554#else 1555 orr r4, r4, ip, lsl #8 1556#endif 1557 str r4, [r3], #0x04 1558 bge .Lmemcpy_bad3_loop4 1559 sub r1, r1, #0x01 1560 1561.Lmemcpy_bad_done: 1562 ldmfd sp!, {r4-r7} 1563 adds r2, r2, #0x04 1564 RETeq 1565 ldrb ip, [r1], #0x01 1566 cmp r2, #0x02 1567 ldrgeb r2, [r1], #0x01 1568 strb ip, [r3], #0x01 1569 ldrgtb ip, [r1] 1570 strgeb r2, [r3], #0x01 1571 strgtb ip, [r3] 1572 RET 1573 1574 1575/* 1576 * Handle short copies (less than 16 bytes), possibly misaligned. 1577 * Some of these are *very* common, thanks to the network stack, 1578 * and so are handled specially. 1579 */ 1580.Lmemcpy_short: 1581 add pc, pc, r2, lsl #2 1582 nop 1583 RET /* 0x00 */ 1584 b .Lmemcpy_bytewise /* 0x01 */ 1585 b .Lmemcpy_bytewise /* 0x02 */ 1586 b .Lmemcpy_bytewise /* 0x03 */ 1587 b .Lmemcpy_4 /* 0x04 */ 1588 b .Lmemcpy_bytewise /* 0x05 */ 1589 b .Lmemcpy_6 /* 0x06 */ 1590 b .Lmemcpy_bytewise /* 0x07 */ 1591 b .Lmemcpy_8 /* 0x08 */ 1592 b .Lmemcpy_bytewise /* 0x09 */ 1593 b .Lmemcpy_bytewise /* 0x0a */ 1594 b .Lmemcpy_bytewise /* 0x0b */ 1595 b .Lmemcpy_c /* 0x0c */ 1596.Lmemcpy_bytewise: 1597 mov r3, r0 /* We must not clobber r0 */ 1598 ldrb ip, [r1], #0x01 15991: subs r2, r2, #0x01 1600 strb ip, [r3], #0x01 1601 ldrneb ip, [r1], #0x01 1602 bne 1b 1603 RET 1604 1605/****************************************************************************** 1606 * Special case for 4 byte copies 1607 */ 1608#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1609#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1610 LMEMCPY_4_PAD 1611.Lmemcpy_4: 1612 and r2, r1, #0x03 1613 orr r2, r2, r0, lsl #2 1614 ands r2, r2, #0x0f 1615 sub r3, pc, #0x14 1616 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1617 1618/* 1619 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1620 */ 1621 ldr r2, [r1] 1622 str r2, [r0] 1623 RET 1624 LMEMCPY_4_PAD 1625 1626/* 1627 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1628 */ 1629 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1630 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1631#ifdef __ARMEB__ 1632 mov r3, r3, lsl #8 /* r3 = 012. */ 1633 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1634#else 1635 mov r3, r3, lsr #8 /* r3 = .210 */ 1636 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1637#endif 1638 str r3, [r0] 1639 RET 1640 LMEMCPY_4_PAD 1641 1642/* 1643 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1644 */ 1645#ifdef __ARMEB__ 1646 ldrh r3, [r1] 1647 ldrh r2, [r1, #0x02] 1648#else 1649 ldrh r3, [r1, #0x02] 1650 ldrh r2, [r1] 1651#endif 1652 orr r3, r2, r3, lsl #16 1653 str r3, [r0] 1654 RET 1655 LMEMCPY_4_PAD 1656 1657/* 1658 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1659 */ 1660 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1661 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1662#ifdef __ARMEB__ 1663 mov r3, r3, lsl #24 /* r3 = 0... */ 1664 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1665#else 1666 mov r3, r3, lsr #24 /* r3 = ...0 */ 1667 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1668#endif 1669 str r3, [r0] 1670 RET 1671 LMEMCPY_4_PAD 1672 1673/* 1674 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1675 */ 1676 ldr r2, [r1] 1677#ifdef __ARMEB__ 1678 strb r2, [r0, #0x03] 1679 mov r3, r2, lsr #8 1680 mov r1, r2, lsr #24 1681 strb r1, [r0] 1682#else 1683 strb r2, [r0] 1684 mov r3, r2, lsr #8 1685 mov r1, r2, lsr #24 1686 strb r1, [r0, #0x03] 1687#endif 1688 strh r3, [r0, #0x01] 1689 RET 1690 LMEMCPY_4_PAD 1691 1692/* 1693 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1694 */ 1695 ldrb r2, [r1] 1696 ldrh r3, [r1, #0x01] 1697 ldrb r1, [r1, #0x03] 1698 strb r2, [r0] 1699 strh r3, [r0, #0x01] 1700 strb r1, [r0, #0x03] 1701 RET 1702 LMEMCPY_4_PAD 1703 1704/* 1705 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1706 */ 1707 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1708 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1709#ifdef __ARMEB__ 1710 mov r1, r2, lsr #8 /* r1 = ...0 */ 1711 strb r1, [r0] 1712 mov r2, r2, lsl #8 /* r2 = .01. */ 1713 orr r2, r2, r3, lsr #8 /* r2 = .012 */ 1714#else 1715 strb r2, [r0] 1716 mov r2, r2, lsr #8 /* r2 = ...1 */ 1717 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1718 mov r3, r3, lsr #8 /* r3 = ...3 */ 1719#endif 1720 strh r2, [r0, #0x01] 1721 strb r3, [r0, #0x03] 1722 RET 1723 LMEMCPY_4_PAD 1724 1725/* 1726 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1727 */ 1728 ldrb r2, [r1] 1729 ldrh r3, [r1, #0x01] 1730 ldrb r1, [r1, #0x03] 1731 strb r2, [r0] 1732 strh r3, [r0, #0x01] 1733 strb r1, [r0, #0x03] 1734 RET 1735 LMEMCPY_4_PAD 1736 1737/* 1738 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1739 */ 1740 ldr r2, [r1] 1741#ifdef __ARMEB__ 1742 strh r2, [r0, #0x02] 1743 mov r3, r2, lsr #16 1744 strh r3, [r0] 1745#else 1746 strh r2, [r0] 1747 mov r3, r2, lsr #16 1748 strh r3, [r0, #0x02] 1749#endif 1750 RET 1751 LMEMCPY_4_PAD 1752 1753/* 1754 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1755 */ 1756 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1757 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1758 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1759 strh r1, [r0] 1760#ifdef __ARMEB__ 1761 mov r2, r2, lsl #8 /* r2 = 012. */ 1762 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1763#else 1764 mov r2, r2, lsr #24 /* r2 = ...2 */ 1765 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1766#endif 1767 strh r2, [r0, #0x02] 1768 RET 1769 LMEMCPY_4_PAD 1770 1771/* 1772 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1773 */ 1774 ldrh r2, [r1] 1775 ldrh r3, [r1, #0x02] 1776 strh r2, [r0] 1777 strh r3, [r0, #0x02] 1778 RET 1779 LMEMCPY_4_PAD 1780 1781/* 1782 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1783 */ 1784 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1785 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1786 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1787 strh r1, [r0, #0x02] 1788#ifdef __ARMEB__ 1789 mov r3, r3, lsr #24 /* r3 = ...1 */ 1790 orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 1791#else 1792 mov r3, r3, lsl #8 /* r3 = 321. */ 1793 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1794#endif 1795 strh r3, [r0] 1796 RET 1797 LMEMCPY_4_PAD 1798 1799/* 1800 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1801 */ 1802 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1803#ifdef __ARMEB__ 1804 strb r2, [r0, #0x03] 1805 mov r3, r2, lsr #8 1806 mov r1, r2, lsr #24 1807 strh r3, [r0, #0x01] 1808 strb r1, [r0] 1809#else 1810 strb r2, [r0] 1811 mov r3, r2, lsr #8 1812 mov r1, r2, lsr #24 1813 strh r3, [r0, #0x01] 1814 strb r1, [r0, #0x03] 1815#endif 1816 RET 1817 LMEMCPY_4_PAD 1818 1819/* 1820 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1821 */ 1822 ldrb r2, [r1] 1823 ldrh r3, [r1, #0x01] 1824 ldrb r1, [r1, #0x03] 1825 strb r2, [r0] 1826 strh r3, [r0, #0x01] 1827 strb r1, [r0, #0x03] 1828 RET 1829 LMEMCPY_4_PAD 1830 1831/* 1832 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1833 */ 1834#ifdef __ARMEB__ 1835 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1836 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1837 strb r3, [r0, #0x03] 1838 mov r3, r3, lsr #8 /* r3 = ...2 */ 1839 orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 1840 strh r3, [r0, #0x01] 1841 mov r2, r2, lsr #8 /* r2 = ...0 */ 1842 strb r2, [r0] 1843#else 1844 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1845 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1846 strb r2, [r0] 1847 mov r2, r2, lsr #8 /* r2 = ...1 */ 1848 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1849 strh r2, [r0, #0x01] 1850 mov r3, r3, lsr #8 /* r3 = ...3 */ 1851 strb r3, [r0, #0x03] 1852#endif 1853 RET 1854 LMEMCPY_4_PAD 1855 1856/* 1857 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1858 */ 1859 ldrb r2, [r1] 1860 ldrh r3, [r1, #0x01] 1861 ldrb r1, [r1, #0x03] 1862 strb r2, [r0] 1863 strh r3, [r0, #0x01] 1864 strb r1, [r0, #0x03] 1865 RET 1866 LMEMCPY_4_PAD 1867 1868 1869/****************************************************************************** 1870 * Special case for 6 byte copies 1871 */ 1872#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1873#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1874 LMEMCPY_6_PAD 1875.Lmemcpy_6: 1876 and r2, r1, #0x03 1877 orr r2, r2, r0, lsl #2 1878 ands r2, r2, #0x0f 1879 sub r3, pc, #0x14 1880 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1881 1882/* 1883 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1884 */ 1885 ldr r2, [r1] 1886 ldrh r3, [r1, #0x04] 1887 str r2, [r0] 1888 strh r3, [r0, #0x04] 1889 RET 1890 LMEMCPY_6_PAD 1891 1892/* 1893 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1894 */ 1895 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1896 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1897#ifdef __ARMEB__ 1898 mov r2, r2, lsl #8 /* r2 = 012. */ 1899 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1900#else 1901 mov r2, r2, lsr #8 /* r2 = .210 */ 1902 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1903#endif 1904 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1905 str r2, [r0] 1906 strh r3, [r0, #0x04] 1907 RET 1908 LMEMCPY_6_PAD 1909 1910/* 1911 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1912 */ 1913 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1914 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1915#ifdef __ARMEB__ 1916 mov r1, r3, lsr #16 /* r1 = ..23 */ 1917 orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 1918 str r1, [r0] 1919 strh r3, [r0, #0x04] 1920#else 1921 mov r1, r3, lsr #16 /* r1 = ..54 */ 1922 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1923 str r2, [r0] 1924 strh r1, [r0, #0x04] 1925#endif 1926 RET 1927 LMEMCPY_6_PAD 1928 1929/* 1930 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1931 */ 1932 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1933 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1934 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1935#ifdef __ARMEB__ 1936 mov r2, r2, lsl #24 /* r2 = 0... */ 1937 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1938 mov r3, r3, lsl #8 /* r3 = 234. */ 1939 orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 1940#else 1941 mov r2, r2, lsr #24 /* r2 = ...0 */ 1942 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1943 mov r1, r1, lsl #8 /* r1 = xx5. */ 1944 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1945#endif 1946 str r2, [r0] 1947 strh r1, [r0, #0x04] 1948 RET 1949 LMEMCPY_6_PAD 1950 1951/* 1952 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1953 */ 1954 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1955 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1956 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1957 strh r1, [r0, #0x01] 1958#ifdef __ARMEB__ 1959 mov r1, r3, lsr #24 /* r1 = ...0 */ 1960 strb r1, [r0] 1961 mov r3, r3, lsl #8 /* r3 = 123. */ 1962 orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 1963#else 1964 strb r3, [r0] 1965 mov r3, r3, lsr #24 /* r3 = ...3 */ 1966 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1967 mov r2, r2, lsr #8 /* r2 = ...5 */ 1968#endif 1969 strh r3, [r0, #0x03] 1970 strb r2, [r0, #0x05] 1971 RET 1972 LMEMCPY_6_PAD 1973 1974/* 1975 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1976 */ 1977 ldrb r2, [r1] 1978 ldrh r3, [r1, #0x01] 1979 ldrh ip, [r1, #0x03] 1980 ldrb r1, [r1, #0x05] 1981 strb r2, [r0] 1982 strh r3, [r0, #0x01] 1983 strh ip, [r0, #0x03] 1984 strb r1, [r0, #0x05] 1985 RET 1986 LMEMCPY_6_PAD 1987 1988/* 1989 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1990 */ 1991 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1992 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1993#ifdef __ARMEB__ 1994 mov r3, r2, lsr #8 /* r3 = ...0 */ 1995 strb r3, [r0] 1996 strb r1, [r0, #0x05] 1997 mov r3, r1, lsr #8 /* r3 = .234 */ 1998 strh r3, [r0, #0x03] 1999 mov r3, r2, lsl #8 /* r3 = .01. */ 2000 orr r3, r3, r1, lsr #24 /* r3 = .012 */ 2001 strh r3, [r0, #0x01] 2002#else 2003 strb r2, [r0] 2004 mov r3, r1, lsr #24 2005 strb r3, [r0, #0x05] 2006 mov r3, r1, lsr #8 /* r3 = .543 */ 2007 strh r3, [r0, #0x03] 2008 mov r3, r2, lsr #8 /* r3 = ...1 */ 2009 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 2010 strh r3, [r0, #0x01] 2011#endif 2012 RET 2013 LMEMCPY_6_PAD 2014 2015/* 2016 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2017 */ 2018 ldrb r2, [r1] 2019 ldrh r3, [r1, #0x01] 2020 ldrh ip, [r1, #0x03] 2021 ldrb r1, [r1, #0x05] 2022 strb r2, [r0] 2023 strh r3, [r0, #0x01] 2024 strh ip, [r0, #0x03] 2025 strb r1, [r0, #0x05] 2026 RET 2027 LMEMCPY_6_PAD 2028 2029/* 2030 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2031 */ 2032#ifdef __ARMEB__ 2033 ldr r2, [r1] /* r2 = 0123 */ 2034 ldrh r3, [r1, #0x04] /* r3 = ..45 */ 2035 mov r1, r2, lsr #16 /* r1 = ..01 */ 2036 orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 2037 strh r1, [r0] 2038 str r3, [r0, #0x02] 2039#else 2040 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 2041 ldr r3, [r1] /* r3 = 3210 */ 2042 mov r2, r2, lsl #16 /* r2 = 54.. */ 2043 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 2044 strh r3, [r0] 2045 str r2, [r0, #0x02] 2046#endif 2047 RET 2048 LMEMCPY_6_PAD 2049 2050/* 2051 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2052 */ 2053 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2054 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 2055 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2056#ifdef __ARMEB__ 2057 mov r2, r2, lsr #8 /* r2 = .345 */ 2058 orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 2059#else 2060 mov r2, r2, lsl #8 /* r2 = 543. */ 2061 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 2062#endif 2063 strh r1, [r0] 2064 str r2, [r0, #0x02] 2065 RET 2066 LMEMCPY_6_PAD 2067 2068/* 2069 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2070 */ 2071 ldrh r2, [r1] 2072 ldr r3, [r1, #0x02] 2073 strh r2, [r0] 2074 str r3, [r0, #0x02] 2075 RET 2076 LMEMCPY_6_PAD 2077 2078/* 2079 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2080 */ 2081 ldrb r3, [r1] /* r3 = ...0 */ 2082 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2083 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 2084#ifdef __ARMEB__ 2085 mov r3, r3, lsl #8 /* r3 = ..0. */ 2086 orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 2087 orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 2088#else 2089 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2090 mov r1, r1, lsl #24 /* r1 = 5... */ 2091 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 2092#endif 2093 strh r3, [r0] 2094 str r1, [r0, #0x02] 2095 RET 2096 LMEMCPY_6_PAD 2097 2098/* 2099 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2100 */ 2101 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2102 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 2103#ifdef __ARMEB__ 2104 mov r3, r2, lsr #24 /* r3 = ...0 */ 2105 strb r3, [r0] 2106 mov r2, r2, lsl #8 /* r2 = 123. */ 2107 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2108#else 2109 strb r2, [r0] 2110 mov r2, r2, lsr #8 /* r2 = .321 */ 2111 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 2112 mov r1, r1, lsr #8 /* r1 = ...5 */ 2113#endif 2114 str r2, [r0, #0x01] 2115 strb r1, [r0, #0x05] 2116 RET 2117 LMEMCPY_6_PAD 2118 2119/* 2120 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2121 */ 2122 ldrb r2, [r1] 2123 ldrh r3, [r1, #0x01] 2124 ldrh ip, [r1, #0x03] 2125 ldrb r1, [r1, #0x05] 2126 strb r2, [r0] 2127 strh r3, [r0, #0x01] 2128 strh ip, [r0, #0x03] 2129 strb r1, [r0, #0x05] 2130 RET 2131 LMEMCPY_6_PAD 2132 2133/* 2134 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2135 */ 2136 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2137 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 2138#ifdef __ARMEB__ 2139 mov r3, r2, lsr #8 /* r3 = ...0 */ 2140 strb r3, [r0] 2141 mov r2, r2, lsl #24 /* r2 = 1... */ 2142 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2143#else 2144 strb r2, [r0] 2145 mov r2, r2, lsr #8 /* r2 = ...1 */ 2146 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 2147 mov r1, r1, lsr #24 /* r1 = ...5 */ 2148#endif 2149 str r2, [r0, #0x01] 2150 strb r1, [r0, #0x05] 2151 RET 2152 LMEMCPY_6_PAD 2153 2154/* 2155 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2156 */ 2157 ldrb r2, [r1] 2158 ldr r3, [r1, #0x01] 2159 ldrb r1, [r1, #0x05] 2160 strb r2, [r0] 2161 str r3, [r0, #0x01] 2162 strb r1, [r0, #0x05] 2163 RET 2164 LMEMCPY_6_PAD 2165 2166 2167/****************************************************************************** 2168 * Special case for 8 byte copies 2169 */ 2170#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 2171#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 2172 LMEMCPY_8_PAD 2173.Lmemcpy_8: 2174 and r2, r1, #0x03 2175 orr r2, r2, r0, lsl #2 2176 ands r2, r2, #0x0f 2177 sub r3, pc, #0x14 2178 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 2179 2180/* 2181 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2182 */ 2183 ldr r2, [r1] 2184 ldr r3, [r1, #0x04] 2185 str r2, [r0] 2186 str r3, [r0, #0x04] 2187 RET 2188 LMEMCPY_8_PAD 2189 2190/* 2191 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2192 */ 2193 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2194 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 2195 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2196#ifdef __ARMEB__ 2197 mov r3, r3, lsl #8 /* r3 = 012. */ 2198 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 2199 orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 2200#else 2201 mov r3, r3, lsr #8 /* r3 = .210 */ 2202 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 2203 mov r1, r1, lsl #24 /* r1 = 7... */ 2204 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 2205#endif 2206 str r3, [r0] 2207 str r2, [r0, #0x04] 2208 RET 2209 LMEMCPY_8_PAD 2210 2211/* 2212 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2213 */ 2214 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2215 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2216 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2217#ifdef __ARMEB__ 2218 mov r2, r2, lsl #16 /* r2 = 01.. */ 2219 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2220 orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 2221#else 2222 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2223 mov r3, r3, lsr #16 /* r3 = ..54 */ 2224 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 2225#endif 2226 str r2, [r0] 2227 str r3, [r0, #0x04] 2228 RET 2229 LMEMCPY_8_PAD 2230 2231/* 2232 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2233 */ 2234 ldrb r3, [r1] /* r3 = ...0 */ 2235 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2236 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 2237#ifdef __ARMEB__ 2238 mov r3, r3, lsl #24 /* r3 = 0... */ 2239 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 2240 mov r2, r2, lsl #24 /* r2 = 4... */ 2241 orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 2242#else 2243 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2244 mov r2, r2, lsr #24 /* r2 = ...4 */ 2245 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 2246#endif 2247 str r3, [r0] 2248 str r2, [r0, #0x04] 2249 RET 2250 LMEMCPY_8_PAD 2251 2252/* 2253 * 0100: dst is 8-bit aligned, src is 32-bit aligned 2254 */ 2255 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 2256 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 2257#ifdef __ARMEB__ 2258 mov r1, r3, lsr #24 /* r1 = ...0 */ 2259 strb r1, [r0] 2260 mov r1, r3, lsr #8 /* r1 = .012 */ 2261 strb r2, [r0, #0x07] 2262 mov r3, r3, lsl #24 /* r3 = 3... */ 2263 orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 2264#else 2265 strb r3, [r0] 2266 mov r1, r2, lsr #24 /* r1 = ...7 */ 2267 strb r1, [r0, #0x07] 2268 mov r1, r3, lsr #8 /* r1 = .321 */ 2269 mov r3, r3, lsr #24 /* r3 = ...3 */ 2270 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 2271#endif 2272 strh r1, [r0, #0x01] 2273 str r3, [r0, #0x03] 2274 RET 2275 LMEMCPY_8_PAD 2276 2277/* 2278 * 0101: dst is 8-bit aligned, src is 8-bit aligned 2279 */ 2280 ldrb r2, [r1] 2281 ldrh r3, [r1, #0x01] 2282 ldr ip, [r1, #0x03] 2283 ldrb r1, [r1, #0x07] 2284 strb r2, [r0] 2285 strh r3, [r0, #0x01] 2286 str ip, [r0, #0x03] 2287 strb r1, [r0, #0x07] 2288 RET 2289 LMEMCPY_8_PAD 2290 2291/* 2292 * 0110: dst is 8-bit aligned, src is 16-bit aligned 2293 */ 2294 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2295 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2296 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2297#ifdef __ARMEB__ 2298 mov ip, r2, lsr #8 /* ip = ...0 */ 2299 strb ip, [r0] 2300 mov ip, r2, lsl #8 /* ip = .01. */ 2301 orr ip, ip, r3, lsr #24 /* ip = .012 */ 2302 strb r1, [r0, #0x07] 2303 mov r3, r3, lsl #8 /* r3 = 345. */ 2304 orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 2305#else 2306 strb r2, [r0] /* 0 */ 2307 mov ip, r1, lsr #8 /* ip = ...7 */ 2308 strb ip, [r0, #0x07] /* 7 */ 2309 mov ip, r2, lsr #8 /* ip = ...1 */ 2310 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2311 mov r3, r3, lsr #8 /* r3 = .543 */ 2312 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 2313#endif 2314 strh ip, [r0, #0x01] 2315 str r3, [r0, #0x03] 2316 RET 2317 LMEMCPY_8_PAD 2318 2319/* 2320 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2321 */ 2322 ldrb r3, [r1] /* r3 = ...0 */ 2323 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2324 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 2325 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2326 strb r3, [r0] 2327 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 2328#ifdef __ARMEB__ 2329 strh r3, [r0, #0x01] 2330 orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 2331#else 2332 strh ip, [r0, #0x01] 2333 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 2334#endif 2335 str r2, [r0, #0x03] 2336 strb r1, [r0, #0x07] 2337 RET 2338 LMEMCPY_8_PAD 2339 2340/* 2341 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2342 */ 2343 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2344 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2345 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2346#ifdef __ARMEB__ 2347 strh r1, [r0] 2348 mov r1, r3, lsr #16 /* r1 = ..45 */ 2349 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 2350#else 2351 strh r2, [r0] 2352 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 2353 mov r3, r3, lsr #16 /* r3 = ..76 */ 2354#endif 2355 str r2, [r0, #0x02] 2356 strh r3, [r0, #0x06] 2357 RET 2358 LMEMCPY_8_PAD 2359 2360/* 2361 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2362 */ 2363 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2364 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2365 ldrb ip, [r1, #0x07] /* ip = ...7 */ 2366 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2367 strh r1, [r0] 2368#ifdef __ARMEB__ 2369 mov r1, r2, lsl #24 /* r1 = 2... */ 2370 orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 2371 orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 2372#else 2373 mov r1, r2, lsr #24 /* r1 = ...2 */ 2374 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 2375 mov r3, r3, lsr #24 /* r3 = ...6 */ 2376 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 2377#endif 2378 str r1, [r0, #0x02] 2379 strh r3, [r0, #0x06] 2380 RET 2381 LMEMCPY_8_PAD 2382 2383/* 2384 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2385 */ 2386 ldrh r2, [r1] 2387 ldr ip, [r1, #0x02] 2388 ldrh r3, [r1, #0x06] 2389 strh r2, [r0] 2390 str ip, [r0, #0x02] 2391 strh r3, [r0, #0x06] 2392 RET 2393 LMEMCPY_8_PAD 2394 2395/* 2396 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2397 */ 2398 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 2399 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2400 ldrb ip, [r1] /* ip = ...0 */ 2401 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 2402 strh r1, [r0, #0x06] 2403#ifdef __ARMEB__ 2404 mov r3, r3, lsr #24 /* r3 = ...5 */ 2405 orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 2406 mov r2, r2, lsr #24 /* r2 = ...1 */ 2407 orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 2408#else 2409 mov r3, r3, lsl #24 /* r3 = 5... */ 2410 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 2411 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 2412#endif 2413 str r3, [r0, #0x02] 2414 strh r2, [r0] 2415 RET 2416 LMEMCPY_8_PAD 2417 2418/* 2419 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2420 */ 2421 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2422 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2423 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 2424 strh r1, [r0, #0x05] 2425#ifdef __ARMEB__ 2426 strb r3, [r0, #0x07] 2427 mov r1, r2, lsr #24 /* r1 = ...0 */ 2428 strb r1, [r0] 2429 mov r2, r2, lsl #8 /* r2 = 123. */ 2430 orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 2431 str r2, [r0, #0x01] 2432#else 2433 strb r2, [r0] 2434 mov r1, r3, lsr #24 /* r1 = ...7 */ 2435 strb r1, [r0, #0x07] 2436 mov r2, r2, lsr #8 /* r2 = .321 */ 2437 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 2438 str r2, [r0, #0x01] 2439#endif 2440 RET 2441 LMEMCPY_8_PAD 2442 2443/* 2444 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2445 */ 2446 ldrb r3, [r1] /* r3 = ...0 */ 2447 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 2448 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2449 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2450 strb r3, [r0] 2451 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 2452#ifdef __ARMEB__ 2453 strh ip, [r0, #0x05] 2454 orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 2455#else 2456 strh r3, [r0, #0x05] 2457 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 2458#endif 2459 str r2, [r0, #0x01] 2460 strb r1, [r0, #0x07] 2461 RET 2462 LMEMCPY_8_PAD 2463 2464/* 2465 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2466 */ 2467 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2468 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2469 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2470#ifdef __ARMEB__ 2471 mov ip, r2, lsr #8 /* ip = ...0 */ 2472 strb ip, [r0] 2473 mov ip, r2, lsl #24 /* ip = 1... */ 2474 orr ip, ip, r3, lsr #8 /* ip = 1234 */ 2475 strb r1, [r0, #0x07] 2476 mov r1, r1, lsr #8 /* r1 = ...6 */ 2477 orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 2478#else 2479 strb r2, [r0] 2480 mov ip, r2, lsr #8 /* ip = ...1 */ 2481 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2482 mov r2, r1, lsr #8 /* r2 = ...7 */ 2483 strb r2, [r0, #0x07] 2484 mov r1, r1, lsl #8 /* r1 = .76. */ 2485 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 2486#endif 2487 str ip, [r0, #0x01] 2488 strh r1, [r0, #0x05] 2489 RET 2490 LMEMCPY_8_PAD 2491 2492/* 2493 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2494 */ 2495 ldrb r2, [r1] 2496 ldr ip, [r1, #0x01] 2497 ldrh r3, [r1, #0x05] 2498 ldrb r1, [r1, #0x07] 2499 strb r2, [r0] 2500 str ip, [r0, #0x01] 2501 strh r3, [r0, #0x05] 2502 strb r1, [r0, #0x07] 2503 RET 2504 LMEMCPY_8_PAD 2505 2506/****************************************************************************** 2507 * Special case for 12 byte copies 2508 */ 2509#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 2510#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 2511 LMEMCPY_C_PAD 2512.Lmemcpy_c: 2513 and r2, r1, #0x03 2514 orr r2, r2, r0, lsl #2 2515 ands r2, r2, #0x0f 2516 sub r3, pc, #0x14 2517 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 2518 2519/* 2520 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2521 */ 2522 ldr r2, [r1] 2523 ldr r3, [r1, #0x04] 2524 ldr r1, [r1, #0x08] 2525 str r2, [r0] 2526 str r3, [r0, #0x04] 2527 str r1, [r0, #0x08] 2528 RET 2529 LMEMCPY_C_PAD 2530 2531/* 2532 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2533 */ 2534 ldrb r2, [r1, #0xb] /* r2 = ...B */ 2535 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2536 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2537 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2538#ifdef __ARMEB__ 2539 orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 2540 str r2, [r0, #0x08] 2541 mov r2, ip, lsr #24 /* r2 = ...7 */ 2542 orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 2543 mov r1, r1, lsl #8 /* r1 = 012. */ 2544 orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 2545#else 2546 mov r2, r2, lsl #24 /* r2 = B... */ 2547 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 2548 str r2, [r0, #0x08] 2549 mov r2, ip, lsl #24 /* r2 = 7... */ 2550 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 2551 mov r1, r1, lsr #8 /* r1 = .210 */ 2552 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 2553#endif 2554 str r2, [r0, #0x04] 2555 str r1, [r0] 2556 RET 2557 LMEMCPY_C_PAD 2558 2559/* 2560 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2561 */ 2562 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2563 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2564 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2565 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2566#ifdef __ARMEB__ 2567 mov r2, r2, lsl #16 /* r2 = 01.. */ 2568 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2569 str r2, [r0] 2570 mov r3, r3, lsl #16 /* r3 = 45.. */ 2571 orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 2572 orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 2573#else 2574 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2575 str r2, [r0] 2576 mov r3, r3, lsr #16 /* r3 = ..54 */ 2577 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 2578 mov r1, r1, lsl #16 /* r1 = BA.. */ 2579 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 2580#endif 2581 str r3, [r0, #0x04] 2582 str r1, [r0, #0x08] 2583 RET 2584 LMEMCPY_C_PAD 2585 2586/* 2587 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2588 */ 2589 ldrb r2, [r1] /* r2 = ...0 */ 2590 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2591 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2592 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2593#ifdef __ARMEB__ 2594 mov r2, r2, lsl #24 /* r2 = 0... */ 2595 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 2596 str r2, [r0] 2597 mov r3, r3, lsl #24 /* r3 = 4... */ 2598 orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 2599 mov r1, r1, lsr #8 /* r1 = .9AB */ 2600 orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 2601#else 2602 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 2603 str r2, [r0] 2604 mov r3, r3, lsr #24 /* r3 = ...4 */ 2605 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 2606 mov r1, r1, lsl #8 /* r1 = BA9. */ 2607 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 2608#endif 2609 str r3, [r0, #0x04] 2610 str r1, [r0, #0x08] 2611 RET 2612 LMEMCPY_C_PAD 2613 2614/* 2615 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 2616 */ 2617 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2618 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2619 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 2620 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 2621 strh r1, [r0, #0x01] 2622#ifdef __ARMEB__ 2623 mov r1, r2, lsr #24 /* r1 = ...0 */ 2624 strb r1, [r0] 2625 mov r1, r2, lsl #24 /* r1 = 3... */ 2626 orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 2627 mov r1, r3, lsl #24 /* r1 = 7... */ 2628 orr r1, r1, ip, lsr #8 /* r1 = 789A */ 2629#else 2630 strb r2, [r0] 2631 mov r1, r2, lsr #24 /* r1 = ...3 */ 2632 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 2633 mov r1, r3, lsr #24 /* r1 = ...7 */ 2634 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 2635 mov ip, ip, lsr #24 /* ip = ...B */ 2636#endif 2637 str r2, [r0, #0x03] 2638 str r1, [r0, #0x07] 2639 strb ip, [r0, #0x0b] 2640 RET 2641 LMEMCPY_C_PAD 2642 2643/* 2644 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 2645 */ 2646 ldrb r2, [r1] 2647 ldrh r3, [r1, #0x01] 2648 ldr ip, [r1, #0x03] 2649 strb r2, [r0] 2650 ldr r2, [r1, #0x07] 2651 ldrb r1, [r1, #0x0b] 2652 strh r3, [r0, #0x01] 2653 str ip, [r0, #0x03] 2654 str r2, [r0, #0x07] 2655 strb r1, [r0, #0x0b] 2656 RET 2657 LMEMCPY_C_PAD 2658 2659/* 2660 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 2661 */ 2662 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2663 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2664 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2665 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2666#ifdef __ARMEB__ 2667 mov r2, r2, ror #8 /* r2 = 1..0 */ 2668 strb r2, [r0] 2669 mov r2, r2, lsr #16 /* r2 = ..1. */ 2670 orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 2671 strh r2, [r0, #0x01] 2672 mov r2, r3, lsl #8 /* r2 = 345. */ 2673 orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 2674 mov r2, ip, lsl #8 /* r2 = 789. */ 2675 orr r2, r2, r1, lsr #8 /* r2 = 789A */ 2676#else 2677 strb r2, [r0] 2678 mov r2, r2, lsr #8 /* r2 = ...1 */ 2679 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2680 strh r2, [r0, #0x01] 2681 mov r2, r3, lsr #8 /* r2 = .543 */ 2682 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 2683 mov r2, ip, lsr #8 /* r2 = .987 */ 2684 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 2685 mov r1, r1, lsr #8 /* r1 = ...B */ 2686#endif 2687 str r3, [r0, #0x03] 2688 str r2, [r0, #0x07] 2689 strb r1, [r0, #0x0b] 2690 RET 2691 LMEMCPY_C_PAD 2692 2693/* 2694 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 2695 */ 2696 ldrb r2, [r1] 2697 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2698 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2699 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2700 strb r2, [r0] 2701#ifdef __ARMEB__ 2702 mov r2, r3, lsr #16 /* r2 = ..12 */ 2703 strh r2, [r0, #0x01] 2704 mov r3, r3, lsl #16 /* r3 = 34.. */ 2705 orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 2706 mov ip, ip, lsl #16 /* ip = 78.. */ 2707 orr ip, ip, r1, lsr #16 /* ip = 789A */ 2708 mov r1, r1, lsr #8 /* r1 = .9AB */ 2709#else 2710 strh r3, [r0, #0x01] 2711 mov r3, r3, lsr #16 /* r3 = ..43 */ 2712 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 2713 mov ip, ip, lsr #16 /* ip = ..87 */ 2714 orr ip, ip, r1, lsl #16 /* ip = A987 */ 2715 mov r1, r1, lsr #16 /* r1 = ..xB */ 2716#endif 2717 str r3, [r0, #0x03] 2718 str ip, [r0, #0x07] 2719 strb r1, [r0, #0x0b] 2720 RET 2721 LMEMCPY_C_PAD 2722 2723/* 2724 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2725 */ 2726 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 2727 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2728 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 2729 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2730#ifdef __ARMEB__ 2731 strh r1, [r0] 2732 mov r1, ip, lsl #16 /* r1 = 23.. */ 2733 orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 2734 mov r3, r3, lsl #16 /* r3 = 67.. */ 2735 orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 2736#else 2737 strh ip, [r0] 2738 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 2739 mov r3, r3, lsr #16 /* r3 = ..76 */ 2740 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 2741 mov r2, r2, lsr #16 /* r2 = ..BA */ 2742#endif 2743 str r1, [r0, #0x02] 2744 str r3, [r0, #0x06] 2745 strh r2, [r0, #0x0a] 2746 RET 2747 LMEMCPY_C_PAD 2748 2749/* 2750 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 2751 */ 2752 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2753 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2754 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 2755 strh ip, [r0] 2756 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2757 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 2758#ifdef __ARMEB__ 2759 mov r2, r2, lsl #24 /* r2 = 2... */ 2760 orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 2761 mov r3, r3, lsl #24 /* r3 = 6... */ 2762 orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 2763 orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 2764#else 2765 mov r2, r2, lsr #24 /* r2 = ...2 */ 2766 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 2767 mov r3, r3, lsr #24 /* r3 = ...6 */ 2768 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 2769 mov r1, r1, lsl #8 /* r1 = ..B. */ 2770 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 2771#endif 2772 str r2, [r0, #0x02] 2773 str r3, [r0, #0x06] 2774 strh r1, [r0, #0x0a] 2775 RET 2776 LMEMCPY_C_PAD 2777 2778/* 2779 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2780 */ 2781 ldrh r2, [r1] 2782 ldr r3, [r1, #0x02] 2783 ldr ip, [r1, #0x06] 2784 ldrh r1, [r1, #0x0a] 2785 strh r2, [r0] 2786 str r3, [r0, #0x02] 2787 str ip, [r0, #0x06] 2788 strh r1, [r0, #0x0a] 2789 RET 2790 LMEMCPY_C_PAD 2791 2792/* 2793 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 2794 */ 2795 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 2796 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 2797 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 2798 strh ip, [r0, #0x0a] 2799 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2800 ldrb r1, [r1] /* r1 = ...0 */ 2801#ifdef __ARMEB__ 2802 mov r2, r2, lsr #24 /* r2 = ...9 */ 2803 orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 2804 mov r3, r3, lsr #24 /* r3 = ...5 */ 2805 orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 2806 mov r1, r1, lsl #8 /* r1 = ..0. */ 2807 orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 2808#else 2809 mov r2, r2, lsl #24 /* r2 = 9... */ 2810 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 2811 mov r3, r3, lsl #24 /* r3 = 5... */ 2812 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 2813 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 2814#endif 2815 str r2, [r0, #0x06] 2816 str r3, [r0, #0x02] 2817 strh r1, [r0] 2818 RET 2819 LMEMCPY_C_PAD 2820 2821/* 2822 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 2823 */ 2824 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2825 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 2826 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 2827#ifdef __ARMEB__ 2828 mov r3, r2, lsr #24 /* r3 = ...0 */ 2829 strb r3, [r0] 2830 mov r2, r2, lsl #8 /* r2 = 123. */ 2831 orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 2832 str r2, [r0, #0x01] 2833 mov r2, ip, lsl #8 /* r2 = 567. */ 2834 orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 2835 str r2, [r0, #0x05] 2836 mov r2, r1, lsr #8 /* r2 = ..9A */ 2837 strh r2, [r0, #0x09] 2838 strb r1, [r0, #0x0b] 2839#else 2840 strb r2, [r0] 2841 mov r3, r2, lsr #8 /* r3 = .321 */ 2842 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 2843 str r3, [r0, #0x01] 2844 mov r3, ip, lsr #8 /* r3 = .765 */ 2845 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 2846 str r3, [r0, #0x05] 2847 mov r1, r1, lsr #8 /* r1 = .BA9 */ 2848 strh r1, [r0, #0x09] 2849 mov r1, r1, lsr #16 /* r1 = ...B */ 2850 strb r1, [r0, #0x0b] 2851#endif 2852 RET 2853 LMEMCPY_C_PAD 2854 2855/* 2856 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2857 */ 2858 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2859 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2860 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2861 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2862 strb r2, [r0, #0x0b] 2863#ifdef __ARMEB__ 2864 strh r3, [r0, #0x09] 2865 mov r3, r3, lsr #16 /* r3 = ..78 */ 2866 orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 2867 mov ip, ip, lsr #16 /* ip = ..34 */ 2868 orr ip, ip, r1, lsl #16 /* ip = 1234 */ 2869 mov r1, r1, lsr #16 /* r1 = ..x0 */ 2870#else 2871 mov r2, r3, lsr #16 /* r2 = ..A9 */ 2872 strh r2, [r0, #0x09] 2873 mov r3, r3, lsl #16 /* r3 = 87.. */ 2874 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2875 mov ip, ip, lsl #16 /* ip = 43.. */ 2876 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2877 mov r1, r1, lsr #8 /* r1 = .210 */ 2878#endif 2879 str r3, [r0, #0x05] 2880 str ip, [r0, #0x01] 2881 strb r1, [r0] 2882 RET 2883 LMEMCPY_C_PAD 2884 2885/* 2886 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2887 */ 2888#ifdef __ARMEB__ 2889 ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 2890 ldr ip, [r1, #0x06] /* ip = 6789 */ 2891 ldr r3, [r1, #0x02] /* r3 = 2345 */ 2892 ldrh r1, [r1] /* r1 = ..01 */ 2893 strb r2, [r0, #0x0b] 2894 mov r2, r2, lsr #8 /* r2 = ...A */ 2895 orr r2, r2, ip, lsl #8 /* r2 = 789A */ 2896 mov ip, ip, lsr #8 /* ip = .678 */ 2897 orr ip, ip, r3, lsl #24 /* ip = 5678 */ 2898 mov r3, r3, lsr #8 /* r3 = .234 */ 2899 orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 2900 mov r1, r1, lsr #8 /* r1 = ...0 */ 2901 strb r1, [r0] 2902 str r3, [r0, #0x01] 2903 str ip, [r0, #0x05] 2904 strh r2, [r0, #0x09] 2905#else 2906 ldrh r2, [r1] /* r2 = ..10 */ 2907 ldr r3, [r1, #0x02] /* r3 = 5432 */ 2908 ldr ip, [r1, #0x06] /* ip = 9876 */ 2909 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2910 strb r2, [r0] 2911 mov r2, r2, lsr #8 /* r2 = ...1 */ 2912 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2913 mov r3, r3, lsr #24 /* r3 = ...5 */ 2914 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2915 mov ip, ip, lsr #24 /* ip = ...9 */ 2916 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2917 mov r1, r1, lsr #8 /* r1 = ...B */ 2918 str r2, [r0, #0x01] 2919 str r3, [r0, #0x05] 2920 strh ip, [r0, #0x09] 2921 strb r1, [r0, #0x0b] 2922#endif 2923 RET 2924 LMEMCPY_C_PAD 2925 2926/* 2927 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2928 */ 2929 ldrb r2, [r1] 2930 ldr r3, [r1, #0x01] 2931 ldr ip, [r1, #0x05] 2932 strb r2, [r0] 2933 ldrh r2, [r1, #0x09] 2934 ldrb r1, [r1, #0x0b] 2935 str r3, [r0, #0x01] 2936 str ip, [r0, #0x05] 2937 strh r2, [r0, #0x09] 2938 strb r1, [r0, #0x0b] 2939 RET 2940END(memcpy) 2941#endif /* _ARM_ARCH_5E */ 2942 2943#ifdef GPROF 2944 2945ENTRY(user) 2946 nop 2947END(user) 2948ENTRY(btrap) 2949 nop 2950END(btrap) 2951ENTRY(etrap) 2952 nop 2953END(etrap) 2954ENTRY(bintr) 2955 nop 2956END(bintr) 2957ENTRY(eintr) 2958 nop 2959END(eintr) 2960#endif 2961