support.S revision 175255
1/*- 2 * Copyright (c) 2004 Olivier Houchard 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26/* 27 * Copyright 2003 Wasabi Systems, Inc. 28 * All rights reserved. 29 * 30 * Written by Steve C. Woodford for Wasabi Systems, Inc. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed for the NetBSD Project by 43 * Wasabi Systems, Inc. 44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 45 * or promote products derived from this software without specific prior 46 * written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58 * POSSIBILITY OF SUCH DAMAGE. 59 */ 60/* 61 * Copyright (c) 1997 The NetBSD Foundation, Inc. 62 * All rights reserved. 63 * 64 * This code is derived from software contributed to The NetBSD Foundation 65 * by Neil A. Carson and Mark Brinicombe 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 3. All advertising materials mentioning features or use of this software 76 * must display the following acknowledgement: 77 * This product includes software developed by the NetBSD 78 * Foundation, Inc. and its contributors. 79 * 4. Neither the name of The NetBSD Foundation nor the names of its 80 * contributors may be used to endorse or promote products derived 81 * from this software without specific prior written permission. 82 * 83 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 84 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 85 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 86 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 87 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 88 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 89 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 90 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 91 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 92 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 93 * POSSIBILITY OF SUCH DAMAGE. 94 */ 95 96#include <machine/asm.h> 97#include <machine/asmacros.h> 98__FBSDID("$FreeBSD: head/sys/arm/arm/support.S 175255 2008-01-12 21:11:43Z cognet $"); 99 100#include "assym.s" 101 102.L_arm_memcpy: 103 .word _C_LABEL(_arm_memcpy) 104.L_arm_bzero: 105 .word _C_LABEL(_arm_bzero) 106.L_min_memcpy_size: 107 .word _C_LABEL(_min_memcpy_size) 108.L_min_bzero_size: 109 .word _C_LABEL(_min_bzero_size) 110/* 111 * memset: Sets a block of memory to the specified value 112 * 113 * On entry: 114 * r0 - dest address 115 * r1 - byte to write 116 * r2 - number of bytes to write 117 * 118 * On exit: 119 * r0 - dest address 120 */ 121/* LINTSTUB: Func: void bzero(void *, size_t) */ 122ENTRY(bzero) 123 ldr r3, .L_arm_bzero 124 ldr r3, [r3] 125 cmp r3, #0 126 beq .Lnormal0 127 ldr r2, .L_min_bzero_size 128 ldr r2, [r2] 129 cmp r1, r2 130 blt .Lnormal0 131 stmfd sp!, {r0, r1, lr} 132 mov r2, #0 133 mov lr, pc 134 mov pc, r3 135 cmp r0, #0 136 ldmfd sp!, {r0, r1, lr} 137 RETeq 138.Lnormal0: 139 mov r3, #0x00 140 b do_memset 141 142/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 143ENTRY(memset) 144 and r3, r1, #0xff /* We deal with bytes */ 145 mov r1, r2 146do_memset: 147 cmp r1, #0x04 /* Do we have less than 4 bytes */ 148 mov ip, r0 149 blt .Lmemset_lessthanfour 150 151 /* Ok first we will word align the address */ 152 ands r2, ip, #0x03 /* Get the bottom two bits */ 153 bne .Lmemset_wordunaligned /* The address is not word aligned */ 154 155 /* We are now word aligned */ 156.Lmemset_wordaligned: 157 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 158#ifdef _ARM_ARCH_5E 159 tst ip, #0x04 /* Quad-align for armv5e */ 160#else 161 cmp r1, #0x10 162#endif 163 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 164#ifdef _ARM_ARCH_5E 165 subne r1, r1, #0x04 /* Quad-align if necessary */ 166 strne r3, [ip], #0x04 167 cmp r1, #0x10 168#endif 169 blt .Lmemset_loop4 /* If less than 16 then use words */ 170 mov r2, r3 /* Duplicate data */ 171 cmp r1, #0x80 /* If < 128 then skip the big loop */ 172 blt .Lmemset_loop32 173 174 /* Do 128 bytes at a time */ 175.Lmemset_loop128: 176 subs r1, r1, #0x80 177#ifdef _ARM_ARCH_5E 178 strged r2, [ip], #0x08 179 strged r2, [ip], #0x08 180 strged r2, [ip], #0x08 181 strged r2, [ip], #0x08 182 strged r2, [ip], #0x08 183 strged r2, [ip], #0x08 184 strged r2, [ip], #0x08 185 strged r2, [ip], #0x08 186 strged r2, [ip], #0x08 187 strged r2, [ip], #0x08 188 strged r2, [ip], #0x08 189 strged r2, [ip], #0x08 190 strged r2, [ip], #0x08 191 strged r2, [ip], #0x08 192 strged r2, [ip], #0x08 193 strged r2, [ip], #0x08 194#else 195 stmgeia ip!, {r2-r3} 196 stmgeia ip!, {r2-r3} 197 stmgeia ip!, {r2-r3} 198 stmgeia ip!, {r2-r3} 199 stmgeia ip!, {r2-r3} 200 stmgeia ip!, {r2-r3} 201 stmgeia ip!, {r2-r3} 202 stmgeia ip!, {r2-r3} 203 stmgeia ip!, {r2-r3} 204 stmgeia ip!, {r2-r3} 205 stmgeia ip!, {r2-r3} 206 stmgeia ip!, {r2-r3} 207 stmgeia ip!, {r2-r3} 208 stmgeia ip!, {r2-r3} 209 stmgeia ip!, {r2-r3} 210 stmgeia ip!, {r2-r3} 211#endif 212 bgt .Lmemset_loop128 213 RETeq /* Zero length so just exit */ 214 215 add r1, r1, #0x80 /* Adjust for extra sub */ 216 217 /* Do 32 bytes at a time */ 218.Lmemset_loop32: 219 subs r1, r1, #0x20 220#ifdef _ARM_ARCH_5E 221 strged r2, [ip], #0x08 222 strged r2, [ip], #0x08 223 strged r2, [ip], #0x08 224 strged r2, [ip], #0x08 225#else 226 stmgeia ip!, {r2-r3} 227 stmgeia ip!, {r2-r3} 228 stmgeia ip!, {r2-r3} 229 stmgeia ip!, {r2-r3} 230#endif 231 bgt .Lmemset_loop32 232 RETeq /* Zero length so just exit */ 233 234 adds r1, r1, #0x10 /* Partially adjust for extra sub */ 235 236 /* Deal with 16 bytes or more */ 237#ifdef _ARM_ARCH_5E 238 strged r2, [ip], #0x08 239 strged r2, [ip], #0x08 240#else 241 stmgeia ip!, {r2-r3} 242 stmgeia ip!, {r2-r3} 243#endif 244 RETeq /* Zero length so just exit */ 245 246 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 247 248 /* We have at least 4 bytes so copy as words */ 249.Lmemset_loop4: 250 subs r1, r1, #0x04 251 strge r3, [ip], #0x04 252 bgt .Lmemset_loop4 253 RETeq /* Zero length so just exit */ 254 255#ifdef _ARM_ARCH_5E 256 /* Compensate for 64-bit alignment check */ 257 adds r1, r1, #0x04 258 RETeq 259 cmp r1, #2 260#else 261 cmp r1, #-2 262#endif 263 264 strb r3, [ip], #0x01 /* Set 1 byte */ 265 strgeb r3, [ip], #0x01 /* Set another byte */ 266 strgtb r3, [ip] /* and a third */ 267 RET /* Exit */ 268 269.Lmemset_wordunaligned: 270 rsb r2, r2, #0x004 271 strb r3, [ip], #0x01 /* Set 1 byte */ 272 cmp r2, #0x02 273 strgeb r3, [ip], #0x01 /* Set another byte */ 274 sub r1, r1, r2 275 strgtb r3, [ip], #0x01 /* and a third */ 276 cmp r1, #0x04 /* More than 4 bytes left? */ 277 bge .Lmemset_wordaligned /* Yup */ 278 279.Lmemset_lessthanfour: 280 cmp r1, #0x00 281 RETeq /* Zero length so exit */ 282 strb r3, [ip], #0x01 /* Set 1 byte */ 283 cmp r1, #0x02 284 strgeb r3, [ip], #0x01 /* Set another byte */ 285 strgtb r3, [ip] /* and a third */ 286 RET /* Exit */ 287 288ENTRY(bcmp) 289 mov ip, r0 290 cmp r2, #0x06 291 beq .Lmemcmp_6bytes 292 mov r0, #0x00 293 294 /* Are both addresses aligned the same way? */ 295 cmp r2, #0x00 296 eornes r3, ip, r1 297 RETeq /* len == 0, or same addresses! */ 298 tst r3, #0x03 299 subne r2, r2, #0x01 300 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 301 302 /* Word-align the addresses, if necessary */ 303 sub r3, r1, #0x05 304 ands r3, r3, #0x03 305 add r3, r3, r3, lsl #1 306 addne pc, pc, r3, lsl #3 307 nop 308 309 /* Compare up to 3 bytes */ 310 ldrb r0, [ip], #0x01 311 ldrb r3, [r1], #0x01 312 subs r0, r0, r3 313 RETne 314 subs r2, r2, #0x01 315 RETeq 316 317 /* Compare up to 2 bytes */ 318 ldrb r0, [ip], #0x01 319 ldrb r3, [r1], #0x01 320 subs r0, r0, r3 321 RETne 322 subs r2, r2, #0x01 323 RETeq 324 325 /* Compare 1 byte */ 326 ldrb r0, [ip], #0x01 327 ldrb r3, [r1], #0x01 328 subs r0, r0, r3 329 RETne 330 subs r2, r2, #0x01 331 RETeq 332 333 /* Compare 4 bytes at a time, if possible */ 334 subs r2, r2, #0x04 335 bcc .Lmemcmp_bytewise 336.Lmemcmp_word_aligned: 337 ldr r0, [ip], #0x04 338 ldr r3, [r1], #0x04 339 subs r2, r2, #0x04 340 cmpcs r0, r3 341 beq .Lmemcmp_word_aligned 342 sub r0, r0, r3 343 344 /* Correct for extra subtraction, and check if done */ 345 adds r2, r2, #0x04 346 cmpeq r0, #0x00 /* If done, did all bytes match? */ 347 RETeq /* Yup. Just return */ 348 349 /* Re-do the final word byte-wise */ 350 sub ip, ip, #0x04 351 sub r1, r1, #0x04 352 353.Lmemcmp_bytewise: 354 add r2, r2, #0x03 355.Lmemcmp_bytewise2: 356 ldrb r0, [ip], #0x01 357 ldrb r3, [r1], #0x01 358 subs r2, r2, #0x01 359 cmpcs r0, r3 360 beq .Lmemcmp_bytewise2 361 sub r0, r0, r3 362 RET 363 364 /* 365 * 6 byte compares are very common, thanks to the network stack. 366 * This code is hand-scheduled to reduce the number of stalls for 367 * load results. Everything else being equal, this will be ~32% 368 * faster than a byte-wise memcmp. 369 */ 370 .align 5 371.Lmemcmp_6bytes: 372 ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 373 ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 374 ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 375 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 376 ldreqb r3, [ip, #0x01] /* r3 = b1#1 */ 377 RETne /* Return if mismatch on #0 */ 378 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 379 ldreqb r3, [r1, #0x02] /* r3 = b2#2 */ 380 ldreqb r0, [ip, #0x02] /* r0 = b1#2 */ 381 RETne /* Return if mismatch on #1 */ 382 ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 383 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 384 ldreqb r3, [ip, #0x03] /* r3 = b1#3 */ 385 RETne /* Return if mismatch on #2 */ 386 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 387 ldreqb r3, [r1, #0x04] /* r3 = b2#4 */ 388 ldreqb r0, [ip, #0x04] /* r0 = b1#4 */ 389 RETne /* Return if mismatch on #3 */ 390 ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 391 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 392 ldreqb r3, [ip, #0x05] /* r3 = b1#5 */ 393 RETne /* Return if mismatch on #4 */ 394 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 395 RET 396 397ENTRY(bcopy) 398 /* switch the source and destination registers */ 399 eor r0, r1, r0 400 eor r1, r0, r1 401 eor r0, r1, r0 402ENTRY(memmove) 403 /* Do the buffers overlap? */ 404 cmp r0, r1 405 RETeq /* Bail now if src/dst are the same */ 406 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 407 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 408 cmp r3, r2 /* if (r3 < len) we have an overlap */ 409 bcc PIC_SYM(_C_LABEL(memcpy), PLT) 410 411 /* Determine copy direction */ 412 cmp r1, r0 413 bcc .Lmemmove_backwards 414 415 moveq r0, #0 /* Quick abort for len=0 */ 416 RETeq 417 418 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 419 subs r2, r2, #4 420 blt .Lmemmove_fl4 /* less than 4 bytes */ 421 ands r12, r0, #3 422 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 423 ands r12, r1, #3 424 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 425 426.Lmemmove_ft8: 427 /* We have aligned source and destination */ 428 subs r2, r2, #8 429 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 430 subs r2, r2, #0x14 431 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 432 stmdb sp!, {r4} /* borrow r4 */ 433 434 /* blat 32 bytes at a time */ 435 /* XXX for really big copies perhaps we should use more registers */ 436.Lmemmove_floop32: 437 ldmia r1!, {r3, r4, r12, lr} 438 stmia r0!, {r3, r4, r12, lr} 439 ldmia r1!, {r3, r4, r12, lr} 440 stmia r0!, {r3, r4, r12, lr} 441 subs r2, r2, #0x20 442 bge .Lmemmove_floop32 443 444 cmn r2, #0x10 445 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 446 stmgeia r0!, {r3, r4, r12, lr} 447 subge r2, r2, #0x10 448 ldmia sp!, {r4} /* return r4 */ 449 450.Lmemmove_fl32: 451 adds r2, r2, #0x14 452 453 /* blat 12 bytes at a time */ 454.Lmemmove_floop12: 455 ldmgeia r1!, {r3, r12, lr} 456 stmgeia r0!, {r3, r12, lr} 457 subges r2, r2, #0x0c 458 bge .Lmemmove_floop12 459 460.Lmemmove_fl12: 461 adds r2, r2, #8 462 blt .Lmemmove_fl4 463 464 subs r2, r2, #4 465 ldrlt r3, [r1], #4 466 strlt r3, [r0], #4 467 ldmgeia r1!, {r3, r12} 468 stmgeia r0!, {r3, r12} 469 subge r2, r2, #4 470 471.Lmemmove_fl4: 472 /* less than 4 bytes to go */ 473 adds r2, r2, #4 474 ldmeqia sp!, {r0, pc} /* done */ 475 476 /* copy the crud byte at a time */ 477 cmp r2, #2 478 ldrb r3, [r1], #1 479 strb r3, [r0], #1 480 ldrgeb r3, [r1], #1 481 strgeb r3, [r0], #1 482 ldrgtb r3, [r1], #1 483 strgtb r3, [r0], #1 484 ldmia sp!, {r0, pc} 485 486 /* erg - unaligned destination */ 487.Lmemmove_fdestul: 488 rsb r12, r12, #4 489 cmp r12, #2 490 491 /* align destination with byte copies */ 492 ldrb r3, [r1], #1 493 strb r3, [r0], #1 494 ldrgeb r3, [r1], #1 495 strgeb r3, [r0], #1 496 ldrgtb r3, [r1], #1 497 strgtb r3, [r0], #1 498 subs r2, r2, r12 499 blt .Lmemmove_fl4 /* less the 4 bytes */ 500 501 ands r12, r1, #3 502 beq .Lmemmove_ft8 /* we have an aligned source */ 503 504 /* erg - unaligned source */ 505 /* This is where it gets nasty ... */ 506.Lmemmove_fsrcul: 507 bic r1, r1, #3 508 ldr lr, [r1], #4 509 cmp r12, #2 510 bgt .Lmemmove_fsrcul3 511 beq .Lmemmove_fsrcul2 512 cmp r2, #0x0c 513 blt .Lmemmove_fsrcul1loop4 514 sub r2, r2, #0x0c 515 stmdb sp!, {r4, r5} 516 517.Lmemmove_fsrcul1loop16: 518#ifdef __ARMEB__ 519 mov r3, lr, lsl #8 520#else 521 mov r3, lr, lsr #8 522#endif 523 ldmia r1!, {r4, r5, r12, lr} 524#ifdef __ARMEB__ 525 orr r3, r3, r4, lsr #24 526 mov r4, r4, lsl #8 527 orr r4, r4, r5, lsr #24 528 mov r5, r5, lsl #8 529 orr r5, r5, r12, lsr #24 530 mov r12, r12, lsl #8 531 orr r12, r12, lr, lsr #24 532#else 533 orr r3, r3, r4, lsl #24 534 mov r4, r4, lsr #8 535 orr r4, r4, r5, lsl #24 536 mov r5, r5, lsr #8 537 orr r5, r5, r12, lsl #24 538 mov r12, r12, lsr #8 539 orr r12, r12, lr, lsl #24 540#endif 541 stmia r0!, {r3-r5, r12} 542 subs r2, r2, #0x10 543 bge .Lmemmove_fsrcul1loop16 544 ldmia sp!, {r4, r5} 545 adds r2, r2, #0x0c 546 blt .Lmemmove_fsrcul1l4 547 548.Lmemmove_fsrcul1loop4: 549#ifdef __ARMEB__ 550 mov r12, lr, lsl #8 551#else 552 mov r12, lr, lsr #8 553#endif 554 ldr lr, [r1], #4 555#ifdef __ARMEB__ 556 orr r12, r12, lr, lsr #24 557#else 558 orr r12, r12, lr, lsl #24 559#endif 560 str r12, [r0], #4 561 subs r2, r2, #4 562 bge .Lmemmove_fsrcul1loop4 563 564.Lmemmove_fsrcul1l4: 565 sub r1, r1, #3 566 b .Lmemmove_fl4 567 568.Lmemmove_fsrcul2: 569 cmp r2, #0x0c 570 blt .Lmemmove_fsrcul2loop4 571 sub r2, r2, #0x0c 572 stmdb sp!, {r4, r5} 573 574.Lmemmove_fsrcul2loop16: 575#ifdef __ARMEB__ 576 mov r3, lr, lsl #16 577#else 578 mov r3, lr, lsr #16 579#endif 580 ldmia r1!, {r4, r5, r12, lr} 581#ifdef __ARMEB__ 582 orr r3, r3, r4, lsr #16 583 mov r4, r4, lsl #16 584 orr r4, r4, r5, lsr #16 585 mov r5, r5, lsl #16 586 orr r5, r5, r12, lsr #16 587 mov r12, r12, lsl #16 588 orr r12, r12, lr, lsr #16 589#else 590 orr r3, r3, r4, lsl #16 591 mov r4, r4, lsr #16 592 orr r4, r4, r5, lsl #16 593 mov r5, r5, lsr #16 594 orr r5, r5, r12, lsl #16 595 mov r12, r12, lsr #16 596 orr r12, r12, lr, lsl #16 597#endif 598 stmia r0!, {r3-r5, r12} 599 subs r2, r2, #0x10 600 bge .Lmemmove_fsrcul2loop16 601 ldmia sp!, {r4, r5} 602 adds r2, r2, #0x0c 603 blt .Lmemmove_fsrcul2l4 604 605.Lmemmove_fsrcul2loop4: 606#ifdef __ARMEB__ 607 mov r12, lr, lsl #16 608#else 609 mov r12, lr, lsr #16 610#endif 611 ldr lr, [r1], #4 612#ifdef __ARMEB__ 613 orr r12, r12, lr, lsr #16 614#else 615 orr r12, r12, lr, lsl #16 616#endif 617 str r12, [r0], #4 618 subs r2, r2, #4 619 bge .Lmemmove_fsrcul2loop4 620 621.Lmemmove_fsrcul2l4: 622 sub r1, r1, #2 623 b .Lmemmove_fl4 624 625.Lmemmove_fsrcul3: 626 cmp r2, #0x0c 627 blt .Lmemmove_fsrcul3loop4 628 sub r2, r2, #0x0c 629 stmdb sp!, {r4, r5} 630 631.Lmemmove_fsrcul3loop16: 632#ifdef __ARMEB__ 633 mov r3, lr, lsl #24 634#else 635 mov r3, lr, lsr #24 636#endif 637 ldmia r1!, {r4, r5, r12, lr} 638#ifdef __ARMEB__ 639 orr r3, r3, r4, lsr #8 640 mov r4, r4, lsl #24 641 orr r4, r4, r5, lsr #8 642 mov r5, r5, lsl #24 643 orr r5, r5, r12, lsr #8 644 mov r12, r12, lsl #24 645 orr r12, r12, lr, lsr #8 646#else 647 orr r3, r3, r4, lsl #8 648 mov r4, r4, lsr #24 649 orr r4, r4, r5, lsl #8 650 mov r5, r5, lsr #24 651 orr r5, r5, r12, lsl #8 652 mov r12, r12, lsr #24 653 orr r12, r12, lr, lsl #8 654#endif 655 stmia r0!, {r3-r5, r12} 656 subs r2, r2, #0x10 657 bge .Lmemmove_fsrcul3loop16 658 ldmia sp!, {r4, r5} 659 adds r2, r2, #0x0c 660 blt .Lmemmove_fsrcul3l4 661 662.Lmemmove_fsrcul3loop4: 663#ifdef __ARMEB__ 664 mov r12, lr, lsl #24 665#else 666 mov r12, lr, lsr #24 667#endif 668 ldr lr, [r1], #4 669#ifdef __ARMEB__ 670 orr r12, r12, lr, lsr #8 671#else 672 orr r12, r12, lr, lsl #8 673#endif 674 str r12, [r0], #4 675 subs r2, r2, #4 676 bge .Lmemmove_fsrcul3loop4 677 678.Lmemmove_fsrcul3l4: 679 sub r1, r1, #1 680 b .Lmemmove_fl4 681 682.Lmemmove_backwards: 683 add r1, r1, r2 684 add r0, r0, r2 685 subs r2, r2, #4 686 blt .Lmemmove_bl4 /* less than 4 bytes */ 687 ands r12, r0, #3 688 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 689 ands r12, r1, #3 690 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 691 692.Lmemmove_bt8: 693 /* We have aligned source and destination */ 694 subs r2, r2, #8 695 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 696 stmdb sp!, {r4, lr} 697 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 698 blt .Lmemmove_bl32 699 700 /* blat 32 bytes at a time */ 701 /* XXX for really big copies perhaps we should use more registers */ 702.Lmemmove_bloop32: 703 ldmdb r1!, {r3, r4, r12, lr} 704 stmdb r0!, {r3, r4, r12, lr} 705 ldmdb r1!, {r3, r4, r12, lr} 706 stmdb r0!, {r3, r4, r12, lr} 707 subs r2, r2, #0x20 708 bge .Lmemmove_bloop32 709 710.Lmemmove_bl32: 711 cmn r2, #0x10 712 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 713 stmgedb r0!, {r3, r4, r12, lr} 714 subge r2, r2, #0x10 715 adds r2, r2, #0x14 716 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 717 stmgedb r0!, {r3, r12, lr} 718 subge r2, r2, #0x0c 719 ldmia sp!, {r4, lr} 720 721.Lmemmove_bl12: 722 adds r2, r2, #8 723 blt .Lmemmove_bl4 724 subs r2, r2, #4 725 ldrlt r3, [r1, #-4]! 726 strlt r3, [r0, #-4]! 727 ldmgedb r1!, {r3, r12} 728 stmgedb r0!, {r3, r12} 729 subge r2, r2, #4 730 731.Lmemmove_bl4: 732 /* less than 4 bytes to go */ 733 adds r2, r2, #4 734 RETeq /* done */ 735 736 /* copy the crud byte at a time */ 737 cmp r2, #2 738 ldrb r3, [r1, #-1]! 739 strb r3, [r0, #-1]! 740 ldrgeb r3, [r1, #-1]! 741 strgeb r3, [r0, #-1]! 742 ldrgtb r3, [r1, #-1]! 743 strgtb r3, [r0, #-1]! 744 RET 745 746 /* erg - unaligned destination */ 747.Lmemmove_bdestul: 748 cmp r12, #2 749 750 /* align destination with byte copies */ 751 ldrb r3, [r1, #-1]! 752 strb r3, [r0, #-1]! 753 ldrgeb r3, [r1, #-1]! 754 strgeb r3, [r0, #-1]! 755 ldrgtb r3, [r1, #-1]! 756 strgtb r3, [r0, #-1]! 757 subs r2, r2, r12 758 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 759 ands r12, r1, #3 760 beq .Lmemmove_bt8 /* we have an aligned source */ 761 762 /* erg - unaligned source */ 763 /* This is where it gets nasty ... */ 764.Lmemmove_bsrcul: 765 bic r1, r1, #3 766 ldr r3, [r1, #0] 767 cmp r12, #2 768 blt .Lmemmove_bsrcul1 769 beq .Lmemmove_bsrcul2 770 cmp r2, #0x0c 771 blt .Lmemmove_bsrcul3loop4 772 sub r2, r2, #0x0c 773 stmdb sp!, {r4, r5, lr} 774 775.Lmemmove_bsrcul3loop16: 776#ifdef __ARMEB__ 777 mov lr, r3, lsr #8 778#else 779 mov lr, r3, lsl #8 780#endif 781 ldmdb r1!, {r3-r5, r12} 782#ifdef __ARMEB__ 783 orr lr, lr, r12, lsl #24 784 mov r12, r12, lsr #8 785 orr r12, r12, r5, lsl #24 786 mov r5, r5, lsr #8 787 orr r5, r5, r4, lsl #24 788 mov r4, r4, lsr #8 789 orr r4, r4, r3, lsl #24 790#else 791 orr lr, lr, r12, lsr #24 792 mov r12, r12, lsl #8 793 orr r12, r12, r5, lsr #24 794 mov r5, r5, lsl #8 795 orr r5, r5, r4, lsr #24 796 mov r4, r4, lsl #8 797 orr r4, r4, r3, lsr #24 798#endif 799 stmdb r0!, {r4, r5, r12, lr} 800 subs r2, r2, #0x10 801 bge .Lmemmove_bsrcul3loop16 802 ldmia sp!, {r4, r5, lr} 803 adds r2, r2, #0x0c 804 blt .Lmemmove_bsrcul3l4 805 806.Lmemmove_bsrcul3loop4: 807#ifdef __ARMEB__ 808 mov r12, r3, lsr #8 809#else 810 mov r12, r3, lsl #8 811#endif 812 ldr r3, [r1, #-4]! 813#ifdef __ARMEB__ 814 orr r12, r12, r3, lsl #24 815#else 816 orr r12, r12, r3, lsr #24 817#endif 818 str r12, [r0, #-4]! 819 subs r2, r2, #4 820 bge .Lmemmove_bsrcul3loop4 821 822.Lmemmove_bsrcul3l4: 823 add r1, r1, #3 824 b .Lmemmove_bl4 825 826.Lmemmove_bsrcul2: 827 cmp r2, #0x0c 828 blt .Lmemmove_bsrcul2loop4 829 sub r2, r2, #0x0c 830 stmdb sp!, {r4, r5, lr} 831 832.Lmemmove_bsrcul2loop16: 833#ifdef __ARMEB__ 834 mov lr, r3, lsr #16 835#else 836 mov lr, r3, lsl #16 837#endif 838 ldmdb r1!, {r3-r5, r12} 839#ifdef __ARMEB__ 840 orr lr, lr, r12, lsl #16 841 mov r12, r12, lsr #16 842 orr r12, r12, r5, lsl #16 843 mov r5, r5, lsr #16 844 orr r5, r5, r4, lsl #16 845 mov r4, r4, lsr #16 846 orr r4, r4, r3, lsl #16 847#else 848 orr lr, lr, r12, lsr #16 849 mov r12, r12, lsl #16 850 orr r12, r12, r5, lsr #16 851 mov r5, r5, lsl #16 852 orr r5, r5, r4, lsr #16 853 mov r4, r4, lsl #16 854 orr r4, r4, r3, lsr #16 855#endif 856 stmdb r0!, {r4, r5, r12, lr} 857 subs r2, r2, #0x10 858 bge .Lmemmove_bsrcul2loop16 859 ldmia sp!, {r4, r5, lr} 860 adds r2, r2, #0x0c 861 blt .Lmemmove_bsrcul2l4 862 863.Lmemmove_bsrcul2loop4: 864#ifdef __ARMEB__ 865 mov r12, r3, lsr #16 866#else 867 mov r12, r3, lsl #16 868#endif 869 ldr r3, [r1, #-4]! 870#ifdef __ARMEB__ 871 orr r12, r12, r3, lsl #16 872#else 873 orr r12, r12, r3, lsr #16 874#endif 875 str r12, [r0, #-4]! 876 subs r2, r2, #4 877 bge .Lmemmove_bsrcul2loop4 878 879.Lmemmove_bsrcul2l4: 880 add r1, r1, #2 881 b .Lmemmove_bl4 882 883.Lmemmove_bsrcul1: 884 cmp r2, #0x0c 885 blt .Lmemmove_bsrcul1loop4 886 sub r2, r2, #0x0c 887 stmdb sp!, {r4, r5, lr} 888 889.Lmemmove_bsrcul1loop32: 890#ifdef __ARMEB__ 891 mov lr, r3, lsr #24 892#else 893 mov lr, r3, lsl #24 894#endif 895 ldmdb r1!, {r3-r5, r12} 896#ifdef __ARMEB__ 897 orr lr, lr, r12, lsl #8 898 mov r12, r12, lsr #24 899 orr r12, r12, r5, lsl #8 900 mov r5, r5, lsr #24 901 orr r5, r5, r4, lsl #8 902 mov r4, r4, lsr #24 903 orr r4, r4, r3, lsl #8 904#else 905 orr lr, lr, r12, lsr #8 906 mov r12, r12, lsl #24 907 orr r12, r12, r5, lsr #8 908 mov r5, r5, lsl #24 909 orr r5, r5, r4, lsr #8 910 mov r4, r4, lsl #24 911 orr r4, r4, r3, lsr #8 912#endif 913 stmdb r0!, {r4, r5, r12, lr} 914 subs r2, r2, #0x10 915 bge .Lmemmove_bsrcul1loop32 916 ldmia sp!, {r4, r5, lr} 917 adds r2, r2, #0x0c 918 blt .Lmemmove_bsrcul1l4 919 920.Lmemmove_bsrcul1loop4: 921#ifdef __ARMEB__ 922 mov r12, r3, lsr #24 923#else 924 mov r12, r3, lsl #24 925#endif 926 ldr r3, [r1, #-4]! 927#ifdef __ARMEB__ 928 orr r12, r12, r3, lsl #8 929#else 930 orr r12, r12, r3, lsr #8 931#endif 932 str r12, [r0, #-4]! 933 subs r2, r2, #4 934 bge .Lmemmove_bsrcul1loop4 935 936.Lmemmove_bsrcul1l4: 937 add r1, r1, #1 938 b .Lmemmove_bl4 939 940#if !defined(_ARM_ARCH_5E) 941ENTRY(memcpy) 942 /* save leaf functions having to store this away */ 943 /* Do not check arm_memcpy if we're running from flash */ 944#ifdef FLASHADDR 945#if FLASHADDR > PHYSADDR 946 ldr r3, =FLASHADDR 947 cmp r3, pc 948 bls .Lnormal 949#else 950 ldr r3, =FLASHADDR 951 cmp r3, pc 952 bhi .Lnormal 953#endif 954#endif 955 ldr r3, .L_arm_memcpy 956 ldr r3, [r3] 957 cmp r3, #0 958 beq .Lnormal 959 ldr r3, .L_min_memcpy_size 960 ldr r3, [r3] 961 cmp r2, r3 962 blt .Lnormal 963 stmfd sp!, {r0-r2, r4, lr} 964 mov r3, #0 965 ldr r4, .L_arm_memcpy 966 mov lr, pc 967 ldr pc, [r4] 968 cmp r0, #0 969 ldmfd sp!, {r0-r2, r4, lr} 970 RETeq 971 972.Lnormal: 973 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 974 975 subs r2, r2, #4 976 blt .Lmemcpy_l4 /* less than 4 bytes */ 977 ands r12, r0, #3 978 bne .Lmemcpy_destul /* oh unaligned destination addr */ 979 ands r12, r1, #3 980 bne .Lmemcpy_srcul /* oh unaligned source addr */ 981 982.Lmemcpy_t8: 983 /* We have aligned source and destination */ 984 subs r2, r2, #8 985 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 986 subs r2, r2, #0x14 987 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 988 stmdb sp!, {r4} /* borrow r4 */ 989 990 /* blat 32 bytes at a time */ 991 /* XXX for really big copies perhaps we should use more registers */ 992.Lmemcpy_loop32: 993 ldmia r1!, {r3, r4, r12, lr} 994 stmia r0!, {r3, r4, r12, lr} 995 ldmia r1!, {r3, r4, r12, lr} 996 stmia r0!, {r3, r4, r12, lr} 997 subs r2, r2, #0x20 998 bge .Lmemcpy_loop32 999 1000 cmn r2, #0x10 1001 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 1002 stmgeia r0!, {r3, r4, r12, lr} 1003 subge r2, r2, #0x10 1004 ldmia sp!, {r4} /* return r4 */ 1005 1006.Lmemcpy_l32: 1007 adds r2, r2, #0x14 1008 1009 /* blat 12 bytes at a time */ 1010.Lmemcpy_loop12: 1011 ldmgeia r1!, {r3, r12, lr} 1012 stmgeia r0!, {r3, r12, lr} 1013 subges r2, r2, #0x0c 1014 bge .Lmemcpy_loop12 1015 1016.Lmemcpy_l12: 1017 adds r2, r2, #8 1018 blt .Lmemcpy_l4 1019 1020 subs r2, r2, #4 1021 ldrlt r3, [r1], #4 1022 strlt r3, [r0], #4 1023 ldmgeia r1!, {r3, r12} 1024 stmgeia r0!, {r3, r12} 1025 subge r2, r2, #4 1026 1027.Lmemcpy_l4: 1028 /* less than 4 bytes to go */ 1029 adds r2, r2, #4 1030#ifdef __APCS_26_ 1031 ldmeqia sp!, {r0, pc}^ /* done */ 1032#else 1033 ldmeqia sp!, {r0, pc} /* done */ 1034#endif 1035 /* copy the crud byte at a time */ 1036 cmp r2, #2 1037 ldrb r3, [r1], #1 1038 strb r3, [r0], #1 1039 ldrgeb r3, [r1], #1 1040 strgeb r3, [r0], #1 1041 ldrgtb r3, [r1], #1 1042 strgtb r3, [r0], #1 1043 ldmia sp!, {r0, pc} 1044 1045 /* erg - unaligned destination */ 1046.Lmemcpy_destul: 1047 rsb r12, r12, #4 1048 cmp r12, #2 1049 1050 /* align destination with byte copies */ 1051 ldrb r3, [r1], #1 1052 strb r3, [r0], #1 1053 ldrgeb r3, [r1], #1 1054 strgeb r3, [r0], #1 1055 ldrgtb r3, [r1], #1 1056 strgtb r3, [r0], #1 1057 subs r2, r2, r12 1058 blt .Lmemcpy_l4 /* less the 4 bytes */ 1059 1060 ands r12, r1, #3 1061 beq .Lmemcpy_t8 /* we have an aligned source */ 1062 1063 /* erg - unaligned source */ 1064 /* This is where it gets nasty ... */ 1065.Lmemcpy_srcul: 1066 bic r1, r1, #3 1067 ldr lr, [r1], #4 1068 cmp r12, #2 1069 bgt .Lmemcpy_srcul3 1070 beq .Lmemcpy_srcul2 1071 cmp r2, #0x0c 1072 blt .Lmemcpy_srcul1loop4 1073 sub r2, r2, #0x0c 1074 stmdb sp!, {r4, r5} 1075 1076.Lmemcpy_srcul1loop16: 1077 mov r3, lr, lsr #8 1078 ldmia r1!, {r4, r5, r12, lr} 1079 orr r3, r3, r4, lsl #24 1080 mov r4, r4, lsr #8 1081 orr r4, r4, r5, lsl #24 1082 mov r5, r5, lsr #8 1083 orr r5, r5, r12, lsl #24 1084 mov r12, r12, lsr #8 1085 orr r12, r12, lr, lsl #24 1086 stmia r0!, {r3-r5, r12} 1087 subs r2, r2, #0x10 1088 bge .Lmemcpy_srcul1loop16 1089 ldmia sp!, {r4, r5} 1090 adds r2, r2, #0x0c 1091 blt .Lmemcpy_srcul1l4 1092 1093.Lmemcpy_srcul1loop4: 1094 mov r12, lr, lsr #8 1095 ldr lr, [r1], #4 1096 orr r12, r12, lr, lsl #24 1097 str r12, [r0], #4 1098 subs r2, r2, #4 1099 bge .Lmemcpy_srcul1loop4 1100 1101.Lmemcpy_srcul1l4: 1102 sub r1, r1, #3 1103 b .Lmemcpy_l4 1104 1105.Lmemcpy_srcul2: 1106 cmp r2, #0x0c 1107 blt .Lmemcpy_srcul2loop4 1108 sub r2, r2, #0x0c 1109 stmdb sp!, {r4, r5} 1110 1111.Lmemcpy_srcul2loop16: 1112 mov r3, lr, lsr #16 1113 ldmia r1!, {r4, r5, r12, lr} 1114 orr r3, r3, r4, lsl #16 1115 mov r4, r4, lsr #16 1116 orr r4, r4, r5, lsl #16 1117 mov r5, r5, lsr #16 1118 orr r5, r5, r12, lsl #16 1119 mov r12, r12, lsr #16 1120 orr r12, r12, lr, lsl #16 1121 stmia r0!, {r3-r5, r12} 1122 subs r2, r2, #0x10 1123 bge .Lmemcpy_srcul2loop16 1124 ldmia sp!, {r4, r5} 1125 adds r2, r2, #0x0c 1126 blt .Lmemcpy_srcul2l4 1127 1128.Lmemcpy_srcul2loop4: 1129 mov r12, lr, lsr #16 1130 ldr lr, [r1], #4 1131 orr r12, r12, lr, lsl #16 1132 str r12, [r0], #4 1133 subs r2, r2, #4 1134 bge .Lmemcpy_srcul2loop4 1135 1136.Lmemcpy_srcul2l4: 1137 sub r1, r1, #2 1138 b .Lmemcpy_l4 1139 1140.Lmemcpy_srcul3: 1141 cmp r2, #0x0c 1142 blt .Lmemcpy_srcul3loop4 1143 sub r2, r2, #0x0c 1144 stmdb sp!, {r4, r5} 1145 1146.Lmemcpy_srcul3loop16: 1147 mov r3, lr, lsr #24 1148 ldmia r1!, {r4, r5, r12, lr} 1149 orr r3, r3, r4, lsl #8 1150 mov r4, r4, lsr #24 1151 orr r4, r4, r5, lsl #8 1152 mov r5, r5, lsr #24 1153 orr r5, r5, r12, lsl #8 1154 mov r12, r12, lsr #24 1155 orr r12, r12, lr, lsl #8 1156 stmia r0!, {r3-r5, r12} 1157 subs r2, r2, #0x10 1158 bge .Lmemcpy_srcul3loop16 1159 ldmia sp!, {r4, r5} 1160 adds r2, r2, #0x0c 1161 blt .Lmemcpy_srcul3l4 1162 1163.Lmemcpy_srcul3loop4: 1164 mov r12, lr, lsr #24 1165 ldr lr, [r1], #4 1166 orr r12, r12, lr, lsl #8 1167 str r12, [r0], #4 1168 subs r2, r2, #4 1169 bge .Lmemcpy_srcul3loop4 1170 1171.Lmemcpy_srcul3l4: 1172 sub r1, r1, #1 1173 b .Lmemcpy_l4 1174#else 1175/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 1176ENTRY(memcpy) 1177 pld [r1] 1178 cmp r2, #0x0c 1179 ble .Lmemcpy_short /* <= 12 bytes */ 1180#ifdef FLASHADDR 1181#if FLASHADDR > PHYSADDR 1182 ldr r3, =FLASHADDR 1183 cmp r3, pc 1184 bls .Lnormal 1185#else 1186 ldr r3, =FLASHADDR 1187 cmp r3, pc 1188 bhi .Lnormal 1189#endif 1190#endif 1191 ldr r3, .L_arm_memcpy 1192 ldr r3, [r3] 1193 cmp r3, #0 1194 beq .Lnormal 1195 ldr r3, .L_min_memcpy_size 1196 ldr r3, [r3] 1197 cmp r2, r3 1198 blt .Lnormal 1199 stmfd sp!, {r0-r2, r4, lr} 1200 mov r3, #0 1201 ldr r4, .L_arm_memcpy 1202 mov lr, pc 1203 ldr pc, [r4] 1204 cmp r0, #0 1205 ldmfd sp!, {r0-r2, r4, lr} 1206 RETeq 1207.Lnormal: 1208 mov r3, r0 /* We must not clobber r0 */ 1209 1210 /* Word-align the destination buffer */ 1211 ands ip, r3, #0x03 /* Already word aligned? */ 1212 beq .Lmemcpy_wordaligned /* Yup */ 1213 cmp ip, #0x02 1214 ldrb ip, [r1], #0x01 1215 sub r2, r2, #0x01 1216 strb ip, [r3], #0x01 1217 ldrleb ip, [r1], #0x01 1218 suble r2, r2, #0x01 1219 strleb ip, [r3], #0x01 1220 ldrltb ip, [r1], #0x01 1221 sublt r2, r2, #0x01 1222 strltb ip, [r3], #0x01 1223 1224 /* Destination buffer is now word aligned */ 1225.Lmemcpy_wordaligned: 1226 ands ip, r1, #0x03 /* Is src also word-aligned? */ 1227 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 1228 1229 /* Quad-align the destination buffer */ 1230 tst r3, #0x07 /* Already quad aligned? */ 1231 ldrne ip, [r1], #0x04 1232 stmfd sp!, {r4-r9} /* Free up some registers */ 1233 subne r2, r2, #0x04 1234 strne ip, [r3], #0x04 1235 1236 /* Destination buffer quad aligned, source is at least word aligned */ 1237 subs r2, r2, #0x80 1238 blt .Lmemcpy_w_lessthan128 1239 1240 /* Copy 128 bytes at a time */ 1241.Lmemcpy_w_loop128: 1242 ldr r4, [r1], #0x04 /* LD:00-03 */ 1243 ldr r5, [r1], #0x04 /* LD:04-07 */ 1244 pld [r1, #0x18] /* Prefetch 0x20 */ 1245 ldr r6, [r1], #0x04 /* LD:08-0b */ 1246 ldr r7, [r1], #0x04 /* LD:0c-0f */ 1247 ldr r8, [r1], #0x04 /* LD:10-13 */ 1248 ldr r9, [r1], #0x04 /* LD:14-17 */ 1249 strd r4, [r3], #0x08 /* ST:00-07 */ 1250 ldr r4, [r1], #0x04 /* LD:18-1b */ 1251 ldr r5, [r1], #0x04 /* LD:1c-1f */ 1252 strd r6, [r3], #0x08 /* ST:08-0f */ 1253 ldr r6, [r1], #0x04 /* LD:20-23 */ 1254 ldr r7, [r1], #0x04 /* LD:24-27 */ 1255 pld [r1, #0x18] /* Prefetch 0x40 */ 1256 strd r8, [r3], #0x08 /* ST:10-17 */ 1257 ldr r8, [r1], #0x04 /* LD:28-2b */ 1258 ldr r9, [r1], #0x04 /* LD:2c-2f */ 1259 strd r4, [r3], #0x08 /* ST:18-1f */ 1260 ldr r4, [r1], #0x04 /* LD:30-33 */ 1261 ldr r5, [r1], #0x04 /* LD:34-37 */ 1262 strd r6, [r3], #0x08 /* ST:20-27 */ 1263 ldr r6, [r1], #0x04 /* LD:38-3b */ 1264 ldr r7, [r1], #0x04 /* LD:3c-3f */ 1265 strd r8, [r3], #0x08 /* ST:28-2f */ 1266 ldr r8, [r1], #0x04 /* LD:40-43 */ 1267 ldr r9, [r1], #0x04 /* LD:44-47 */ 1268 pld [r1, #0x18] /* Prefetch 0x60 */ 1269 strd r4, [r3], #0x08 /* ST:30-37 */ 1270 ldr r4, [r1], #0x04 /* LD:48-4b */ 1271 ldr r5, [r1], #0x04 /* LD:4c-4f */ 1272 strd r6, [r3], #0x08 /* ST:38-3f */ 1273 ldr r6, [r1], #0x04 /* LD:50-53 */ 1274 ldr r7, [r1], #0x04 /* LD:54-57 */ 1275 strd r8, [r3], #0x08 /* ST:40-47 */ 1276 ldr r8, [r1], #0x04 /* LD:58-5b */ 1277 ldr r9, [r1], #0x04 /* LD:5c-5f */ 1278 strd r4, [r3], #0x08 /* ST:48-4f */ 1279 ldr r4, [r1], #0x04 /* LD:60-63 */ 1280 ldr r5, [r1], #0x04 /* LD:64-67 */ 1281 pld [r1, #0x18] /* Prefetch 0x80 */ 1282 strd r6, [r3], #0x08 /* ST:50-57 */ 1283 ldr r6, [r1], #0x04 /* LD:68-6b */ 1284 ldr r7, [r1], #0x04 /* LD:6c-6f */ 1285 strd r8, [r3], #0x08 /* ST:58-5f */ 1286 ldr r8, [r1], #0x04 /* LD:70-73 */ 1287 ldr r9, [r1], #0x04 /* LD:74-77 */ 1288 strd r4, [r3], #0x08 /* ST:60-67 */ 1289 ldr r4, [r1], #0x04 /* LD:78-7b */ 1290 ldr r5, [r1], #0x04 /* LD:7c-7f */ 1291 strd r6, [r3], #0x08 /* ST:68-6f */ 1292 strd r8, [r3], #0x08 /* ST:70-77 */ 1293 subs r2, r2, #0x80 1294 strd r4, [r3], #0x08 /* ST:78-7f */ 1295 bge .Lmemcpy_w_loop128 1296 1297.Lmemcpy_w_lessthan128: 1298 adds r2, r2, #0x80 /* Adjust for extra sub */ 1299 ldmeqfd sp!, {r4-r9} 1300 RETeq /* Return now if done */ 1301 subs r2, r2, #0x20 1302 blt .Lmemcpy_w_lessthan32 1303 1304 /* Copy 32 bytes at a time */ 1305.Lmemcpy_w_loop32: 1306 ldr r4, [r1], #0x04 1307 ldr r5, [r1], #0x04 1308 pld [r1, #0x18] 1309 ldr r6, [r1], #0x04 1310 ldr r7, [r1], #0x04 1311 ldr r8, [r1], #0x04 1312 ldr r9, [r1], #0x04 1313 strd r4, [r3], #0x08 1314 ldr r4, [r1], #0x04 1315 ldr r5, [r1], #0x04 1316 strd r6, [r3], #0x08 1317 strd r8, [r3], #0x08 1318 subs r2, r2, #0x20 1319 strd r4, [r3], #0x08 1320 bge .Lmemcpy_w_loop32 1321 1322.Lmemcpy_w_lessthan32: 1323 adds r2, r2, #0x20 /* Adjust for extra sub */ 1324 ldmeqfd sp!, {r4-r9} 1325 RETeq /* Return now if done */ 1326 1327 and r4, r2, #0x18 1328 rsbs r4, r4, #0x18 1329 addne pc, pc, r4, lsl #1 1330 nop 1331 1332 /* At least 24 bytes remaining */ 1333 ldr r4, [r1], #0x04 1334 ldr r5, [r1], #0x04 1335 sub r2, r2, #0x08 1336 strd r4, [r3], #0x08 1337 1338 /* At least 16 bytes remaining */ 1339 ldr r4, [r1], #0x04 1340 ldr r5, [r1], #0x04 1341 sub r2, r2, #0x08 1342 strd r4, [r3], #0x08 1343 1344 /* At least 8 bytes remaining */ 1345 ldr r4, [r1], #0x04 1346 ldr r5, [r1], #0x04 1347 subs r2, r2, #0x08 1348 strd r4, [r3], #0x08 1349 1350 /* Less than 8 bytes remaining */ 1351 ldmfd sp!, {r4-r9} 1352 RETeq /* Return now if done */ 1353 subs r2, r2, #0x04 1354 ldrge ip, [r1], #0x04 1355 strge ip, [r3], #0x04 1356 RETeq /* Return now if done */ 1357 addlt r2, r2, #0x04 1358 ldrb ip, [r1], #0x01 1359 cmp r2, #0x02 1360 ldrgeb r2, [r1], #0x01 1361 strb ip, [r3], #0x01 1362 ldrgtb ip, [r1] 1363 strgeb r2, [r3], #0x01 1364 strgtb ip, [r3] 1365 RET 1366 1367 1368/* 1369 * At this point, it has not been possible to word align both buffers. 1370 * The destination buffer is word aligned, but the source buffer is not. 1371 */ 1372.Lmemcpy_bad_align: 1373 stmfd sp!, {r4-r7} 1374 bic r1, r1, #0x03 1375 cmp ip, #2 1376 ldr ip, [r1], #0x04 1377 bgt .Lmemcpy_bad3 1378 beq .Lmemcpy_bad2 1379 b .Lmemcpy_bad1 1380 1381.Lmemcpy_bad1_loop16: 1382#ifdef __ARMEB__ 1383 mov r4, ip, lsl #8 1384#else 1385 mov r4, ip, lsr #8 1386#endif 1387 ldr r5, [r1], #0x04 1388 pld [r1, #0x018] 1389 ldr r6, [r1], #0x04 1390 ldr r7, [r1], #0x04 1391 ldr ip, [r1], #0x04 1392#ifdef __ARMEB__ 1393 orr r4, r4, r5, lsr #24 1394 mov r5, r5, lsl #8 1395 orr r5, r5, r6, lsr #24 1396 mov r6, r6, lsl #8 1397 orr r6, r6, r7, lsr #24 1398 mov r7, r7, lsl #8 1399 orr r7, r7, ip, lsr #24 1400#else 1401 orr r4, r4, r5, lsl #24 1402 mov r5, r5, lsr #8 1403 orr r5, r5, r6, lsl #24 1404 mov r6, r6, lsr #8 1405 orr r6, r6, r7, lsl #24 1406 mov r7, r7, lsr #8 1407 orr r7, r7, ip, lsl #24 1408#endif 1409 str r4, [r3], #0x04 1410 str r5, [r3], #0x04 1411 str r6, [r3], #0x04 1412 str r7, [r3], #0x04 1413.Lmemcpy_bad1: 1414 subs r2, r2, #0x10 1415 bge .Lmemcpy_bad1_loop16 1416 1417 adds r2, r2, #0x10 1418 ldmeqfd sp!, {r4-r7} 1419 RETeq /* Return now if done */ 1420 subs r2, r2, #0x04 1421 sublt r1, r1, #0x03 1422 blt .Lmemcpy_bad_done 1423 1424.Lmemcpy_bad1_loop4: 1425#ifdef __ARMEB__ 1426 mov r4, ip, lsl #8 1427#else 1428 mov r4, ip, lsr #8 1429#endif 1430 ldr ip, [r1], #0x04 1431 subs r2, r2, #0x04 1432#ifdef __ARMEB__ 1433 orr r4, r4, ip, lsr #24 1434#else 1435 orr r4, r4, ip, lsl #24 1436#endif 1437 str r4, [r3], #0x04 1438 bge .Lmemcpy_bad1_loop4 1439 sub r1, r1, #0x03 1440 b .Lmemcpy_bad_done 1441 1442.Lmemcpy_bad2_loop16: 1443#ifdef __ARMEB__ 1444 mov r4, ip, lsl #16 1445#else 1446 mov r4, ip, lsr #16 1447#endif 1448 ldr r5, [r1], #0x04 1449 pld [r1, #0x018] 1450 ldr r6, [r1], #0x04 1451 ldr r7, [r1], #0x04 1452 ldr ip, [r1], #0x04 1453#ifdef __ARMEB__ 1454 orr r4, r4, r5, lsr #16 1455 mov r5, r5, lsl #16 1456 orr r5, r5, r6, lsr #16 1457 mov r6, r6, lsl #16 1458 orr r6, r6, r7, lsr #16 1459 mov r7, r7, lsl #16 1460 orr r7, r7, ip, lsr #16 1461#else 1462 orr r4, r4, r5, lsl #16 1463 mov r5, r5, lsr #16 1464 orr r5, r5, r6, lsl #16 1465 mov r6, r6, lsr #16 1466 orr r6, r6, r7, lsl #16 1467 mov r7, r7, lsr #16 1468 orr r7, r7, ip, lsl #16 1469#endif 1470 str r4, [r3], #0x04 1471 str r5, [r3], #0x04 1472 str r6, [r3], #0x04 1473 str r7, [r3], #0x04 1474.Lmemcpy_bad2: 1475 subs r2, r2, #0x10 1476 bge .Lmemcpy_bad2_loop16 1477 1478 adds r2, r2, #0x10 1479 ldmeqfd sp!, {r4-r7} 1480 RETeq /* Return now if done */ 1481 subs r2, r2, #0x04 1482 sublt r1, r1, #0x02 1483 blt .Lmemcpy_bad_done 1484 1485.Lmemcpy_bad2_loop4: 1486#ifdef __ARMEB__ 1487 mov r4, ip, lsl #16 1488#else 1489 mov r4, ip, lsr #16 1490#endif 1491 ldr ip, [r1], #0x04 1492 subs r2, r2, #0x04 1493#ifdef __ARMEB__ 1494 orr r4, r4, ip, lsr #16 1495#else 1496 orr r4, r4, ip, lsl #16 1497#endif 1498 str r4, [r3], #0x04 1499 bge .Lmemcpy_bad2_loop4 1500 sub r1, r1, #0x02 1501 b .Lmemcpy_bad_done 1502 1503.Lmemcpy_bad3_loop16: 1504#ifdef __ARMEB__ 1505 mov r4, ip, lsl #24 1506#else 1507 mov r4, ip, lsr #24 1508#endif 1509 ldr r5, [r1], #0x04 1510 pld [r1, #0x018] 1511 ldr r6, [r1], #0x04 1512 ldr r7, [r1], #0x04 1513 ldr ip, [r1], #0x04 1514#ifdef __ARMEB__ 1515 orr r4, r4, r5, lsr #8 1516 mov r5, r5, lsl #24 1517 orr r5, r5, r6, lsr #8 1518 mov r6, r6, lsl #24 1519 orr r6, r6, r7, lsr #8 1520 mov r7, r7, lsl #24 1521 orr r7, r7, ip, lsr #8 1522#else 1523 orr r4, r4, r5, lsl #8 1524 mov r5, r5, lsr #24 1525 orr r5, r5, r6, lsl #8 1526 mov r6, r6, lsr #24 1527 orr r6, r6, r7, lsl #8 1528 mov r7, r7, lsr #24 1529 orr r7, r7, ip, lsl #8 1530#endif 1531 str r4, [r3], #0x04 1532 str r5, [r3], #0x04 1533 str r6, [r3], #0x04 1534 str r7, [r3], #0x04 1535.Lmemcpy_bad3: 1536 subs r2, r2, #0x10 1537 bge .Lmemcpy_bad3_loop16 1538 1539 adds r2, r2, #0x10 1540 ldmeqfd sp!, {r4-r7} 1541 RETeq /* Return now if done */ 1542 subs r2, r2, #0x04 1543 sublt r1, r1, #0x01 1544 blt .Lmemcpy_bad_done 1545 1546.Lmemcpy_bad3_loop4: 1547#ifdef __ARMEB__ 1548 mov r4, ip, lsl #24 1549#else 1550 mov r4, ip, lsr #24 1551#endif 1552 ldr ip, [r1], #0x04 1553 subs r2, r2, #0x04 1554#ifdef __ARMEB__ 1555 orr r4, r4, ip, lsr #8 1556#else 1557 orr r4, r4, ip, lsl #8 1558#endif 1559 str r4, [r3], #0x04 1560 bge .Lmemcpy_bad3_loop4 1561 sub r1, r1, #0x01 1562 1563.Lmemcpy_bad_done: 1564 ldmfd sp!, {r4-r7} 1565 adds r2, r2, #0x04 1566 RETeq 1567 ldrb ip, [r1], #0x01 1568 cmp r2, #0x02 1569 ldrgeb r2, [r1], #0x01 1570 strb ip, [r3], #0x01 1571 ldrgtb ip, [r1] 1572 strgeb r2, [r3], #0x01 1573 strgtb ip, [r3] 1574 RET 1575 1576 1577/* 1578 * Handle short copies (less than 16 bytes), possibly misaligned. 1579 * Some of these are *very* common, thanks to the network stack, 1580 * and so are handled specially. 1581 */ 1582.Lmemcpy_short: 1583 add pc, pc, r2, lsl #2 1584 nop 1585 RET /* 0x00 */ 1586 b .Lmemcpy_bytewise /* 0x01 */ 1587 b .Lmemcpy_bytewise /* 0x02 */ 1588 b .Lmemcpy_bytewise /* 0x03 */ 1589 b .Lmemcpy_4 /* 0x04 */ 1590 b .Lmemcpy_bytewise /* 0x05 */ 1591 b .Lmemcpy_6 /* 0x06 */ 1592 b .Lmemcpy_bytewise /* 0x07 */ 1593 b .Lmemcpy_8 /* 0x08 */ 1594 b .Lmemcpy_bytewise /* 0x09 */ 1595 b .Lmemcpy_bytewise /* 0x0a */ 1596 b .Lmemcpy_bytewise /* 0x0b */ 1597 b .Lmemcpy_c /* 0x0c */ 1598.Lmemcpy_bytewise: 1599 mov r3, r0 /* We must not clobber r0 */ 1600 ldrb ip, [r1], #0x01 16011: subs r2, r2, #0x01 1602 strb ip, [r3], #0x01 1603 ldrneb ip, [r1], #0x01 1604 bne 1b 1605 RET 1606 1607/****************************************************************************** 1608 * Special case for 4 byte copies 1609 */ 1610#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1611#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1612 LMEMCPY_4_PAD 1613.Lmemcpy_4: 1614 and r2, r1, #0x03 1615 orr r2, r2, r0, lsl #2 1616 ands r2, r2, #0x0f 1617 sub r3, pc, #0x14 1618 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1619 1620/* 1621 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1622 */ 1623 ldr r2, [r1] 1624 str r2, [r0] 1625 RET 1626 LMEMCPY_4_PAD 1627 1628/* 1629 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1630 */ 1631 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1632 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1633#ifdef __ARMEB__ 1634 mov r3, r3, lsl #8 /* r3 = 012. */ 1635 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1636#else 1637 mov r3, r3, lsr #8 /* r3 = .210 */ 1638 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1639#endif 1640 str r3, [r0] 1641 RET 1642 LMEMCPY_4_PAD 1643 1644/* 1645 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1646 */ 1647#ifdef __ARMEB__ 1648 ldrh r3, [r1] 1649 ldrh r2, [r1, #0x02] 1650#else 1651 ldrh r3, [r1, #0x02] 1652 ldrh r2, [r1] 1653#endif 1654 orr r3, r2, r3, lsl #16 1655 str r3, [r0] 1656 RET 1657 LMEMCPY_4_PAD 1658 1659/* 1660 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1661 */ 1662 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1663 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1664#ifdef __ARMEB__ 1665 mov r3, r3, lsl #24 /* r3 = 0... */ 1666 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1667#else 1668 mov r3, r3, lsr #24 /* r3 = ...0 */ 1669 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1670#endif 1671 str r3, [r0] 1672 RET 1673 LMEMCPY_4_PAD 1674 1675/* 1676 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1677 */ 1678 ldr r2, [r1] 1679#ifdef __ARMEB__ 1680 strb r2, [r0, #0x03] 1681 mov r3, r2, lsr #8 1682 mov r1, r2, lsr #24 1683 strb r1, [r0] 1684#else 1685 strb r2, [r0] 1686 mov r3, r2, lsr #8 1687 mov r1, r2, lsr #24 1688 strb r1, [r0, #0x03] 1689#endif 1690 strh r3, [r0, #0x01] 1691 RET 1692 LMEMCPY_4_PAD 1693 1694/* 1695 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1696 */ 1697 ldrb r2, [r1] 1698 ldrh r3, [r1, #0x01] 1699 ldrb r1, [r1, #0x03] 1700 strb r2, [r0] 1701 strh r3, [r0, #0x01] 1702 strb r1, [r0, #0x03] 1703 RET 1704 LMEMCPY_4_PAD 1705 1706/* 1707 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1708 */ 1709 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1710 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1711#ifdef __ARMEB__ 1712 mov r1, r2, lsr #8 /* r1 = ...0 */ 1713 strb r1, [r0] 1714 mov r2, r2, lsl #8 /* r2 = .01. */ 1715 orr r2, r2, r3, lsr #8 /* r2 = .012 */ 1716#else 1717 strb r2, [r0] 1718 mov r2, r2, lsr #8 /* r2 = ...1 */ 1719 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1720 mov r3, r3, lsr #8 /* r3 = ...3 */ 1721#endif 1722 strh r2, [r0, #0x01] 1723 strb r3, [r0, #0x03] 1724 RET 1725 LMEMCPY_4_PAD 1726 1727/* 1728 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1729 */ 1730 ldrb r2, [r1] 1731 ldrh r3, [r1, #0x01] 1732 ldrb r1, [r1, #0x03] 1733 strb r2, [r0] 1734 strh r3, [r0, #0x01] 1735 strb r1, [r0, #0x03] 1736 RET 1737 LMEMCPY_4_PAD 1738 1739/* 1740 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1741 */ 1742 ldr r2, [r1] 1743#ifdef __ARMEB__ 1744 strh r2, [r0, #0x02] 1745 mov r3, r2, lsr #16 1746 strh r3, [r0] 1747#else 1748 strh r2, [r0] 1749 mov r3, r2, lsr #16 1750 strh r3, [r0, #0x02] 1751#endif 1752 RET 1753 LMEMCPY_4_PAD 1754 1755/* 1756 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1757 */ 1758 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1759 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1760 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1761 strh r1, [r0] 1762#ifdef __ARMEB__ 1763 mov r2, r2, lsl #8 /* r2 = 012. */ 1764 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1765#else 1766 mov r2, r2, lsr #24 /* r2 = ...2 */ 1767 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1768#endif 1769 strh r2, [r0, #0x02] 1770 RET 1771 LMEMCPY_4_PAD 1772 1773/* 1774 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1775 */ 1776 ldrh r2, [r1] 1777 ldrh r3, [r1, #0x02] 1778 strh r2, [r0] 1779 strh r3, [r0, #0x02] 1780 RET 1781 LMEMCPY_4_PAD 1782 1783/* 1784 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1785 */ 1786 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1787 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1788 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1789 strh r1, [r0, #0x02] 1790#ifdef __ARMEB__ 1791 mov r3, r3, lsr #24 /* r3 = ...1 */ 1792 orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 1793#else 1794 mov r3, r3, lsl #8 /* r3 = 321. */ 1795 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1796#endif 1797 strh r3, [r0] 1798 RET 1799 LMEMCPY_4_PAD 1800 1801/* 1802 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1803 */ 1804 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1805#ifdef __ARMEB__ 1806 strb r2, [r0, #0x03] 1807 mov r3, r2, lsr #8 1808 mov r1, r2, lsr #24 1809 strh r3, [r0, #0x01] 1810 strb r1, [r0] 1811#else 1812 strb r2, [r0] 1813 mov r3, r2, lsr #8 1814 mov r1, r2, lsr #24 1815 strh r3, [r0, #0x01] 1816 strb r1, [r0, #0x03] 1817#endif 1818 RET 1819 LMEMCPY_4_PAD 1820 1821/* 1822 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1823 */ 1824 ldrb r2, [r1] 1825 ldrh r3, [r1, #0x01] 1826 ldrb r1, [r1, #0x03] 1827 strb r2, [r0] 1828 strh r3, [r0, #0x01] 1829 strb r1, [r0, #0x03] 1830 RET 1831 LMEMCPY_4_PAD 1832 1833/* 1834 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1835 */ 1836#ifdef __ARMEB__ 1837 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1838 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1839 strb r3, [r0, #0x03] 1840 mov r3, r3, lsr #8 /* r3 = ...2 */ 1841 orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 1842 strh r3, [r0, #0x01] 1843 mov r2, r2, lsr #8 /* r2 = ...0 */ 1844 strb r2, [r0] 1845#else 1846 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1847 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1848 strb r2, [r0] 1849 mov r2, r2, lsr #8 /* r2 = ...1 */ 1850 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1851 strh r2, [r0, #0x01] 1852 mov r3, r3, lsr #8 /* r3 = ...3 */ 1853 strb r3, [r0, #0x03] 1854#endif 1855 RET 1856 LMEMCPY_4_PAD 1857 1858/* 1859 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1860 */ 1861 ldrb r2, [r1] 1862 ldrh r3, [r1, #0x01] 1863 ldrb r1, [r1, #0x03] 1864 strb r2, [r0] 1865 strh r3, [r0, #0x01] 1866 strb r1, [r0, #0x03] 1867 RET 1868 LMEMCPY_4_PAD 1869 1870 1871/****************************************************************************** 1872 * Special case for 6 byte copies 1873 */ 1874#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1875#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1876 LMEMCPY_6_PAD 1877.Lmemcpy_6: 1878 and r2, r1, #0x03 1879 orr r2, r2, r0, lsl #2 1880 ands r2, r2, #0x0f 1881 sub r3, pc, #0x14 1882 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1883 1884/* 1885 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1886 */ 1887 ldr r2, [r1] 1888 ldrh r3, [r1, #0x04] 1889 str r2, [r0] 1890 strh r3, [r0, #0x04] 1891 RET 1892 LMEMCPY_6_PAD 1893 1894/* 1895 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1896 */ 1897 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1898 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1899#ifdef __ARMEB__ 1900 mov r2, r2, lsl #8 /* r2 = 012. */ 1901 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1902#else 1903 mov r2, r2, lsr #8 /* r2 = .210 */ 1904 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1905#endif 1906 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1907 str r2, [r0] 1908 strh r3, [r0, #0x04] 1909 RET 1910 LMEMCPY_6_PAD 1911 1912/* 1913 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1914 */ 1915 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1916 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1917#ifdef __ARMEB__ 1918 mov r1, r3, lsr #16 /* r1 = ..23 */ 1919 orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 1920 str r1, [r0] 1921 strh r3, [r0, #0x04] 1922#else 1923 mov r1, r3, lsr #16 /* r1 = ..54 */ 1924 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1925 str r2, [r0] 1926 strh r1, [r0, #0x04] 1927#endif 1928 RET 1929 LMEMCPY_6_PAD 1930 1931/* 1932 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1933 */ 1934 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1935 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1936 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1937#ifdef __ARMEB__ 1938 mov r2, r2, lsl #24 /* r2 = 0... */ 1939 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1940 mov r3, r3, lsl #8 /* r3 = 234. */ 1941 orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 1942#else 1943 mov r2, r2, lsr #24 /* r2 = ...0 */ 1944 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1945 mov r1, r1, lsl #8 /* r1 = xx5. */ 1946 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1947#endif 1948 str r2, [r0] 1949 strh r1, [r0, #0x04] 1950 RET 1951 LMEMCPY_6_PAD 1952 1953/* 1954 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1955 */ 1956 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1957 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1958 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1959 strh r1, [r0, #0x01] 1960#ifdef __ARMEB__ 1961 mov r1, r3, lsr #24 /* r1 = ...0 */ 1962 strb r1, [r0] 1963 mov r3, r3, lsl #8 /* r3 = 123. */ 1964 orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 1965#else 1966 strb r3, [r0] 1967 mov r3, r3, lsr #24 /* r3 = ...3 */ 1968 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1969 mov r2, r2, lsr #8 /* r2 = ...5 */ 1970#endif 1971 strh r3, [r0, #0x03] 1972 strb r2, [r0, #0x05] 1973 RET 1974 LMEMCPY_6_PAD 1975 1976/* 1977 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1978 */ 1979 ldrb r2, [r1] 1980 ldrh r3, [r1, #0x01] 1981 ldrh ip, [r1, #0x03] 1982 ldrb r1, [r1, #0x05] 1983 strb r2, [r0] 1984 strh r3, [r0, #0x01] 1985 strh ip, [r0, #0x03] 1986 strb r1, [r0, #0x05] 1987 RET 1988 LMEMCPY_6_PAD 1989 1990/* 1991 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1992 */ 1993 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1994 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1995#ifdef __ARMEB__ 1996 mov r3, r2, lsr #8 /* r3 = ...0 */ 1997 strb r3, [r0] 1998 strb r1, [r0, #0x05] 1999 mov r3, r1, lsr #8 /* r3 = .234 */ 2000 strh r3, [r0, #0x03] 2001 mov r3, r2, lsl #8 /* r3 = .01. */ 2002 orr r3, r3, r1, lsr #24 /* r3 = .012 */ 2003 strh r3, [r0, #0x01] 2004#else 2005 strb r2, [r0] 2006 mov r3, r1, lsr #24 2007 strb r3, [r0, #0x05] 2008 mov r3, r1, lsr #8 /* r3 = .543 */ 2009 strh r3, [r0, #0x03] 2010 mov r3, r2, lsr #8 /* r3 = ...1 */ 2011 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 2012 strh r3, [r0, #0x01] 2013#endif 2014 RET 2015 LMEMCPY_6_PAD 2016 2017/* 2018 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2019 */ 2020 ldrb r2, [r1] 2021 ldrh r3, [r1, #0x01] 2022 ldrh ip, [r1, #0x03] 2023 ldrb r1, [r1, #0x05] 2024 strb r2, [r0] 2025 strh r3, [r0, #0x01] 2026 strh ip, [r0, #0x03] 2027 strb r1, [r0, #0x05] 2028 RET 2029 LMEMCPY_6_PAD 2030 2031/* 2032 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2033 */ 2034#ifdef __ARMEB__ 2035 ldr r2, [r1] /* r2 = 0123 */ 2036 ldrh r3, [r1, #0x04] /* r3 = ..45 */ 2037 mov r1, r2, lsr #16 /* r1 = ..01 */ 2038 orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 2039 strh r1, [r0] 2040 str r3, [r0, #0x02] 2041#else 2042 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 2043 ldr r3, [r1] /* r3 = 3210 */ 2044 mov r2, r2, lsl #16 /* r2 = 54.. */ 2045 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 2046 strh r3, [r0] 2047 str r2, [r0, #0x02] 2048#endif 2049 RET 2050 LMEMCPY_6_PAD 2051 2052/* 2053 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2054 */ 2055 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2056 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 2057 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2058#ifdef __ARMEB__ 2059 mov r2, r2, lsr #8 /* r2 = .345 */ 2060 orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 2061#else 2062 mov r2, r2, lsl #8 /* r2 = 543. */ 2063 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 2064#endif 2065 strh r1, [r0] 2066 str r2, [r0, #0x02] 2067 RET 2068 LMEMCPY_6_PAD 2069 2070/* 2071 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2072 */ 2073 ldrh r2, [r1] 2074 ldr r3, [r1, #0x02] 2075 strh r2, [r0] 2076 str r3, [r0, #0x02] 2077 RET 2078 LMEMCPY_6_PAD 2079 2080/* 2081 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2082 */ 2083 ldrb r3, [r1] /* r3 = ...0 */ 2084 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2085 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 2086#ifdef __ARMEB__ 2087 mov r3, r3, lsl #8 /* r3 = ..0. */ 2088 orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 2089 orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 2090#else 2091 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2092 mov r1, r1, lsl #24 /* r1 = 5... */ 2093 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 2094#endif 2095 strh r3, [r0] 2096 str r1, [r0, #0x02] 2097 RET 2098 LMEMCPY_6_PAD 2099 2100/* 2101 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2102 */ 2103 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2104 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 2105#ifdef __ARMEB__ 2106 mov r3, r2, lsr #24 /* r3 = ...0 */ 2107 strb r3, [r0] 2108 mov r2, r2, lsl #8 /* r2 = 123. */ 2109 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2110#else 2111 strb r2, [r0] 2112 mov r2, r2, lsr #8 /* r2 = .321 */ 2113 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 2114 mov r1, r1, lsr #8 /* r1 = ...5 */ 2115#endif 2116 str r2, [r0, #0x01] 2117 strb r1, [r0, #0x05] 2118 RET 2119 LMEMCPY_6_PAD 2120 2121/* 2122 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2123 */ 2124 ldrb r2, [r1] 2125 ldrh r3, [r1, #0x01] 2126 ldrh ip, [r1, #0x03] 2127 ldrb r1, [r1, #0x05] 2128 strb r2, [r0] 2129 strh r3, [r0, #0x01] 2130 strh ip, [r0, #0x03] 2131 strb r1, [r0, #0x05] 2132 RET 2133 LMEMCPY_6_PAD 2134 2135/* 2136 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2137 */ 2138 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2139 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 2140#ifdef __ARMEB__ 2141 mov r3, r2, lsr #8 /* r3 = ...0 */ 2142 strb r3, [r0] 2143 mov r2, r2, lsl #24 /* r2 = 1... */ 2144 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2145#else 2146 strb r2, [r0] 2147 mov r2, r2, lsr #8 /* r2 = ...1 */ 2148 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 2149 mov r1, r1, lsr #24 /* r1 = ...5 */ 2150#endif 2151 str r2, [r0, #0x01] 2152 strb r1, [r0, #0x05] 2153 RET 2154 LMEMCPY_6_PAD 2155 2156/* 2157 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2158 */ 2159 ldrb r2, [r1] 2160 ldr r3, [r1, #0x01] 2161 ldrb r1, [r1, #0x05] 2162 strb r2, [r0] 2163 str r3, [r0, #0x01] 2164 strb r1, [r0, #0x05] 2165 RET 2166 LMEMCPY_6_PAD 2167 2168 2169/****************************************************************************** 2170 * Special case for 8 byte copies 2171 */ 2172#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 2173#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 2174 LMEMCPY_8_PAD 2175.Lmemcpy_8: 2176 and r2, r1, #0x03 2177 orr r2, r2, r0, lsl #2 2178 ands r2, r2, #0x0f 2179 sub r3, pc, #0x14 2180 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 2181 2182/* 2183 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2184 */ 2185 ldr r2, [r1] 2186 ldr r3, [r1, #0x04] 2187 str r2, [r0] 2188 str r3, [r0, #0x04] 2189 RET 2190 LMEMCPY_8_PAD 2191 2192/* 2193 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2194 */ 2195 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2196 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 2197 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2198#ifdef __ARMEB__ 2199 mov r3, r3, lsl #8 /* r3 = 012. */ 2200 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 2201 orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 2202#else 2203 mov r3, r3, lsr #8 /* r3 = .210 */ 2204 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 2205 mov r1, r1, lsl #24 /* r1 = 7... */ 2206 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 2207#endif 2208 str r3, [r0] 2209 str r2, [r0, #0x04] 2210 RET 2211 LMEMCPY_8_PAD 2212 2213/* 2214 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2215 */ 2216 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2217 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2218 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2219#ifdef __ARMEB__ 2220 mov r2, r2, lsl #16 /* r2 = 01.. */ 2221 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2222 orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 2223#else 2224 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2225 mov r3, r3, lsr #16 /* r3 = ..54 */ 2226 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 2227#endif 2228 str r2, [r0] 2229 str r3, [r0, #0x04] 2230 RET 2231 LMEMCPY_8_PAD 2232 2233/* 2234 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2235 */ 2236 ldrb r3, [r1] /* r3 = ...0 */ 2237 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2238 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 2239#ifdef __ARMEB__ 2240 mov r3, r3, lsl #24 /* r3 = 0... */ 2241 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 2242 mov r2, r2, lsl #24 /* r2 = 4... */ 2243 orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 2244#else 2245 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2246 mov r2, r2, lsr #24 /* r2 = ...4 */ 2247 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 2248#endif 2249 str r3, [r0] 2250 str r2, [r0, #0x04] 2251 RET 2252 LMEMCPY_8_PAD 2253 2254/* 2255 * 0100: dst is 8-bit aligned, src is 32-bit aligned 2256 */ 2257 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 2258 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 2259#ifdef __ARMEB__ 2260 mov r1, r3, lsr #24 /* r1 = ...0 */ 2261 strb r1, [r0] 2262 mov r1, r3, lsr #8 /* r1 = .012 */ 2263 strb r2, [r0, #0x07] 2264 mov r3, r3, lsl #24 /* r3 = 3... */ 2265 orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 2266#else 2267 strb r3, [r0] 2268 mov r1, r2, lsr #24 /* r1 = ...7 */ 2269 strb r1, [r0, #0x07] 2270 mov r1, r3, lsr #8 /* r1 = .321 */ 2271 mov r3, r3, lsr #24 /* r3 = ...3 */ 2272 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 2273#endif 2274 strh r1, [r0, #0x01] 2275 str r3, [r0, #0x03] 2276 RET 2277 LMEMCPY_8_PAD 2278 2279/* 2280 * 0101: dst is 8-bit aligned, src is 8-bit aligned 2281 */ 2282 ldrb r2, [r1] 2283 ldrh r3, [r1, #0x01] 2284 ldr ip, [r1, #0x03] 2285 ldrb r1, [r1, #0x07] 2286 strb r2, [r0] 2287 strh r3, [r0, #0x01] 2288 str ip, [r0, #0x03] 2289 strb r1, [r0, #0x07] 2290 RET 2291 LMEMCPY_8_PAD 2292 2293/* 2294 * 0110: dst is 8-bit aligned, src is 16-bit aligned 2295 */ 2296 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2297 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2298 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2299#ifdef __ARMEB__ 2300 mov ip, r2, lsr #8 /* ip = ...0 */ 2301 strb ip, [r0] 2302 mov ip, r2, lsl #8 /* ip = .01. */ 2303 orr ip, ip, r3, lsr #24 /* ip = .012 */ 2304 strb r1, [r0, #0x07] 2305 mov r3, r3, lsl #8 /* r3 = 345. */ 2306 orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 2307#else 2308 strb r2, [r0] /* 0 */ 2309 mov ip, r1, lsr #8 /* ip = ...7 */ 2310 strb ip, [r0, #0x07] /* 7 */ 2311 mov ip, r2, lsr #8 /* ip = ...1 */ 2312 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2313 mov r3, r3, lsr #8 /* r3 = .543 */ 2314 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 2315#endif 2316 strh ip, [r0, #0x01] 2317 str r3, [r0, #0x03] 2318 RET 2319 LMEMCPY_8_PAD 2320 2321/* 2322 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2323 */ 2324 ldrb r3, [r1] /* r3 = ...0 */ 2325 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2326 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 2327 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2328 strb r3, [r0] 2329 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 2330#ifdef __ARMEB__ 2331 strh r3, [r0, #0x01] 2332 orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 2333#else 2334 strh ip, [r0, #0x01] 2335 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 2336#endif 2337 str r2, [r0, #0x03] 2338 strb r1, [r0, #0x07] 2339 RET 2340 LMEMCPY_8_PAD 2341 2342/* 2343 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2344 */ 2345 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2346 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2347 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2348#ifdef __ARMEB__ 2349 strh r1, [r0] 2350 mov r1, r3, lsr #16 /* r1 = ..45 */ 2351 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 2352#else 2353 strh r2, [r0] 2354 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 2355 mov r3, r3, lsr #16 /* r3 = ..76 */ 2356#endif 2357 str r2, [r0, #0x02] 2358 strh r3, [r0, #0x06] 2359 RET 2360 LMEMCPY_8_PAD 2361 2362/* 2363 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2364 */ 2365 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2366 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2367 ldrb ip, [r1, #0x07] /* ip = ...7 */ 2368 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2369 strh r1, [r0] 2370#ifdef __ARMEB__ 2371 mov r1, r2, lsl #24 /* r1 = 2... */ 2372 orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 2373 orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 2374#else 2375 mov r1, r2, lsr #24 /* r1 = ...2 */ 2376 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 2377 mov r3, r3, lsr #24 /* r3 = ...6 */ 2378 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 2379#endif 2380 str r1, [r0, #0x02] 2381 strh r3, [r0, #0x06] 2382 RET 2383 LMEMCPY_8_PAD 2384 2385/* 2386 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2387 */ 2388 ldrh r2, [r1] 2389 ldr ip, [r1, #0x02] 2390 ldrh r3, [r1, #0x06] 2391 strh r2, [r0] 2392 str ip, [r0, #0x02] 2393 strh r3, [r0, #0x06] 2394 RET 2395 LMEMCPY_8_PAD 2396 2397/* 2398 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2399 */ 2400 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 2401 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2402 ldrb ip, [r1] /* ip = ...0 */ 2403 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 2404 strh r1, [r0, #0x06] 2405#ifdef __ARMEB__ 2406 mov r3, r3, lsr #24 /* r3 = ...5 */ 2407 orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 2408 mov r2, r2, lsr #24 /* r2 = ...1 */ 2409 orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 2410#else 2411 mov r3, r3, lsl #24 /* r3 = 5... */ 2412 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 2413 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 2414#endif 2415 str r3, [r0, #0x02] 2416 strh r2, [r0] 2417 RET 2418 LMEMCPY_8_PAD 2419 2420/* 2421 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2422 */ 2423 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2424 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2425 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 2426 strh r1, [r0, #0x05] 2427#ifdef __ARMEB__ 2428 strb r3, [r0, #0x07] 2429 mov r1, r2, lsr #24 /* r1 = ...0 */ 2430 strb r1, [r0] 2431 mov r2, r2, lsl #8 /* r2 = 123. */ 2432 orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 2433 str r2, [r0, #0x01] 2434#else 2435 strb r2, [r0] 2436 mov r1, r3, lsr #24 /* r1 = ...7 */ 2437 strb r1, [r0, #0x07] 2438 mov r2, r2, lsr #8 /* r2 = .321 */ 2439 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 2440 str r2, [r0, #0x01] 2441#endif 2442 RET 2443 LMEMCPY_8_PAD 2444 2445/* 2446 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2447 */ 2448 ldrb r3, [r1] /* r3 = ...0 */ 2449 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 2450 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2451 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2452 strb r3, [r0] 2453 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 2454#ifdef __ARMEB__ 2455 strh ip, [r0, #0x05] 2456 orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 2457#else 2458 strh r3, [r0, #0x05] 2459 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 2460#endif 2461 str r2, [r0, #0x01] 2462 strb r1, [r0, #0x07] 2463 RET 2464 LMEMCPY_8_PAD 2465 2466/* 2467 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2468 */ 2469 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2470 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2471 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2472#ifdef __ARMEB__ 2473 mov ip, r2, lsr #8 /* ip = ...0 */ 2474 strb ip, [r0] 2475 mov ip, r2, lsl #24 /* ip = 1... */ 2476 orr ip, ip, r3, lsr #8 /* ip = 1234 */ 2477 strb r1, [r0, #0x07] 2478 mov r1, r1, lsr #8 /* r1 = ...6 */ 2479 orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 2480#else 2481 strb r2, [r0] 2482 mov ip, r2, lsr #8 /* ip = ...1 */ 2483 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2484 mov r2, r1, lsr #8 /* r2 = ...7 */ 2485 strb r2, [r0, #0x07] 2486 mov r1, r1, lsl #8 /* r1 = .76. */ 2487 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 2488#endif 2489 str ip, [r0, #0x01] 2490 strh r1, [r0, #0x05] 2491 RET 2492 LMEMCPY_8_PAD 2493 2494/* 2495 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2496 */ 2497 ldrb r2, [r1] 2498 ldr ip, [r1, #0x01] 2499 ldrh r3, [r1, #0x05] 2500 ldrb r1, [r1, #0x07] 2501 strb r2, [r0] 2502 str ip, [r0, #0x01] 2503 strh r3, [r0, #0x05] 2504 strb r1, [r0, #0x07] 2505 RET 2506 LMEMCPY_8_PAD 2507 2508/****************************************************************************** 2509 * Special case for 12 byte copies 2510 */ 2511#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 2512#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 2513 LMEMCPY_C_PAD 2514.Lmemcpy_c: 2515 and r2, r1, #0x03 2516 orr r2, r2, r0, lsl #2 2517 ands r2, r2, #0x0f 2518 sub r3, pc, #0x14 2519 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 2520 2521/* 2522 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2523 */ 2524 ldr r2, [r1] 2525 ldr r3, [r1, #0x04] 2526 ldr r1, [r1, #0x08] 2527 str r2, [r0] 2528 str r3, [r0, #0x04] 2529 str r1, [r0, #0x08] 2530 RET 2531 LMEMCPY_C_PAD 2532 2533/* 2534 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2535 */ 2536 ldrb r2, [r1, #0xb] /* r2 = ...B */ 2537 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2538 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2539 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2540#ifdef __ARMEB__ 2541 orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 2542 str r2, [r0, #0x08] 2543 mov r2, ip, lsr #24 /* r2 = ...7 */ 2544 orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 2545 mov r1, r1, lsl #8 /* r1 = 012. */ 2546 orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 2547#else 2548 mov r2, r2, lsl #24 /* r2 = B... */ 2549 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 2550 str r2, [r0, #0x08] 2551 mov r2, ip, lsl #24 /* r2 = 7... */ 2552 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 2553 mov r1, r1, lsr #8 /* r1 = .210 */ 2554 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 2555#endif 2556 str r2, [r0, #0x04] 2557 str r1, [r0] 2558 RET 2559 LMEMCPY_C_PAD 2560 2561/* 2562 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2563 */ 2564 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2565 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2566 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2567 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2568#ifdef __ARMEB__ 2569 mov r2, r2, lsl #16 /* r2 = 01.. */ 2570 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2571 str r2, [r0] 2572 mov r3, r3, lsl #16 /* r3 = 45.. */ 2573 orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 2574 orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 2575#else 2576 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2577 str r2, [r0] 2578 mov r3, r3, lsr #16 /* r3 = ..54 */ 2579 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 2580 mov r1, r1, lsl #16 /* r1 = BA.. */ 2581 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 2582#endif 2583 str r3, [r0, #0x04] 2584 str r1, [r0, #0x08] 2585 RET 2586 LMEMCPY_C_PAD 2587 2588/* 2589 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2590 */ 2591 ldrb r2, [r1] /* r2 = ...0 */ 2592 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2593 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2594 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2595#ifdef __ARMEB__ 2596 mov r2, r2, lsl #24 /* r2 = 0... */ 2597 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 2598 str r2, [r0] 2599 mov r3, r3, lsl #24 /* r3 = 4... */ 2600 orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 2601 mov r1, r1, lsr #8 /* r1 = .9AB */ 2602 orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 2603#else 2604 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 2605 str r2, [r0] 2606 mov r3, r3, lsr #24 /* r3 = ...4 */ 2607 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 2608 mov r1, r1, lsl #8 /* r1 = BA9. */ 2609 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 2610#endif 2611 str r3, [r0, #0x04] 2612 str r1, [r0, #0x08] 2613 RET 2614 LMEMCPY_C_PAD 2615 2616/* 2617 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 2618 */ 2619 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2620 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2621 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 2622 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 2623 strh r1, [r0, #0x01] 2624#ifdef __ARMEB__ 2625 mov r1, r2, lsr #24 /* r1 = ...0 */ 2626 strb r1, [r0] 2627 mov r1, r2, lsl #24 /* r1 = 3... */ 2628 orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 2629 mov r1, r3, lsl #24 /* r1 = 7... */ 2630 orr r1, r1, ip, lsr #8 /* r1 = 789A */ 2631#else 2632 strb r2, [r0] 2633 mov r1, r2, lsr #24 /* r1 = ...3 */ 2634 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 2635 mov r1, r3, lsr #24 /* r1 = ...7 */ 2636 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 2637 mov ip, ip, lsr #24 /* ip = ...B */ 2638#endif 2639 str r2, [r0, #0x03] 2640 str r1, [r0, #0x07] 2641 strb ip, [r0, #0x0b] 2642 RET 2643 LMEMCPY_C_PAD 2644 2645/* 2646 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 2647 */ 2648 ldrb r2, [r1] 2649 ldrh r3, [r1, #0x01] 2650 ldr ip, [r1, #0x03] 2651 strb r2, [r0] 2652 ldr r2, [r1, #0x07] 2653 ldrb r1, [r1, #0x0b] 2654 strh r3, [r0, #0x01] 2655 str ip, [r0, #0x03] 2656 str r2, [r0, #0x07] 2657 strb r1, [r0, #0x0b] 2658 RET 2659 LMEMCPY_C_PAD 2660 2661/* 2662 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 2663 */ 2664 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2665 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2666 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2667 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2668#ifdef __ARMEB__ 2669 mov r2, r2, ror #8 /* r2 = 1..0 */ 2670 strb r2, [r0] 2671 mov r2, r2, lsr #16 /* r2 = ..1. */ 2672 orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 2673 strh r2, [r0, #0x01] 2674 mov r2, r3, lsl #8 /* r2 = 345. */ 2675 orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 2676 mov r2, ip, lsl #8 /* r2 = 789. */ 2677 orr r2, r2, r1, lsr #8 /* r2 = 789A */ 2678#else 2679 strb r2, [r0] 2680 mov r2, r2, lsr #8 /* r2 = ...1 */ 2681 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2682 strh r2, [r0, #0x01] 2683 mov r2, r3, lsr #8 /* r2 = .543 */ 2684 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 2685 mov r2, ip, lsr #8 /* r2 = .987 */ 2686 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 2687 mov r1, r1, lsr #8 /* r1 = ...B */ 2688#endif 2689 str r3, [r0, #0x03] 2690 str r2, [r0, #0x07] 2691 strb r1, [r0, #0x0b] 2692 RET 2693 LMEMCPY_C_PAD 2694 2695/* 2696 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 2697 */ 2698 ldrb r2, [r1] 2699 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2700 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2701 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2702 strb r2, [r0] 2703#ifdef __ARMEB__ 2704 mov r2, r3, lsr #16 /* r2 = ..12 */ 2705 strh r2, [r0, #0x01] 2706 mov r3, r3, lsl #16 /* r3 = 34.. */ 2707 orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 2708 mov ip, ip, lsl #16 /* ip = 78.. */ 2709 orr ip, ip, r1, lsr #16 /* ip = 789A */ 2710 mov r1, r1, lsr #8 /* r1 = .9AB */ 2711#else 2712 strh r3, [r0, #0x01] 2713 mov r3, r3, lsr #16 /* r3 = ..43 */ 2714 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 2715 mov ip, ip, lsr #16 /* ip = ..87 */ 2716 orr ip, ip, r1, lsl #16 /* ip = A987 */ 2717 mov r1, r1, lsr #16 /* r1 = ..xB */ 2718#endif 2719 str r3, [r0, #0x03] 2720 str ip, [r0, #0x07] 2721 strb r1, [r0, #0x0b] 2722 RET 2723 LMEMCPY_C_PAD 2724 2725/* 2726 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2727 */ 2728 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 2729 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2730 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 2731 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2732#ifdef __ARMEB__ 2733 strh r1, [r0] 2734 mov r1, ip, lsl #16 /* r1 = 23.. */ 2735 orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 2736 mov r3, r3, lsl #16 /* r3 = 67.. */ 2737 orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 2738#else 2739 strh ip, [r0] 2740 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 2741 mov r3, r3, lsr #16 /* r3 = ..76 */ 2742 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 2743 mov r2, r2, lsr #16 /* r2 = ..BA */ 2744#endif 2745 str r1, [r0, #0x02] 2746 str r3, [r0, #0x06] 2747 strh r2, [r0, #0x0a] 2748 RET 2749 LMEMCPY_C_PAD 2750 2751/* 2752 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 2753 */ 2754 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2755 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2756 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 2757 strh ip, [r0] 2758 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2759 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 2760#ifdef __ARMEB__ 2761 mov r2, r2, lsl #24 /* r2 = 2... */ 2762 orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 2763 mov r3, r3, lsl #24 /* r3 = 6... */ 2764 orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 2765 orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 2766#else 2767 mov r2, r2, lsr #24 /* r2 = ...2 */ 2768 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 2769 mov r3, r3, lsr #24 /* r3 = ...6 */ 2770 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 2771 mov r1, r1, lsl #8 /* r1 = ..B. */ 2772 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 2773#endif 2774 str r2, [r0, #0x02] 2775 str r3, [r0, #0x06] 2776 strh r1, [r0, #0x0a] 2777 RET 2778 LMEMCPY_C_PAD 2779 2780/* 2781 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2782 */ 2783 ldrh r2, [r1] 2784 ldr r3, [r1, #0x02] 2785 ldr ip, [r1, #0x06] 2786 ldrh r1, [r1, #0x0a] 2787 strh r2, [r0] 2788 str r3, [r0, #0x02] 2789 str ip, [r0, #0x06] 2790 strh r1, [r0, #0x0a] 2791 RET 2792 LMEMCPY_C_PAD 2793 2794/* 2795 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 2796 */ 2797 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 2798 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 2799 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 2800 strh ip, [r0, #0x0a] 2801 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2802 ldrb r1, [r1] /* r1 = ...0 */ 2803#ifdef __ARMEB__ 2804 mov r2, r2, lsr #24 /* r2 = ...9 */ 2805 orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 2806 mov r3, r3, lsr #24 /* r3 = ...5 */ 2807 orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 2808 mov r1, r1, lsl #8 /* r1 = ..0. */ 2809 orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 2810#else 2811 mov r2, r2, lsl #24 /* r2 = 9... */ 2812 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 2813 mov r3, r3, lsl #24 /* r3 = 5... */ 2814 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 2815 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 2816#endif 2817 str r2, [r0, #0x06] 2818 str r3, [r0, #0x02] 2819 strh r1, [r0] 2820 RET 2821 LMEMCPY_C_PAD 2822 2823/* 2824 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 2825 */ 2826 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2827 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 2828 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 2829#ifdef __ARMEB__ 2830 mov r3, r2, lsr #24 /* r3 = ...0 */ 2831 strb r3, [r0] 2832 mov r2, r2, lsl #8 /* r2 = 123. */ 2833 orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 2834 str r2, [r0, #0x01] 2835 mov r2, ip, lsl #8 /* r2 = 567. */ 2836 orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 2837 str r2, [r0, #0x05] 2838 mov r2, r1, lsr #8 /* r2 = ..9A */ 2839 strh r2, [r0, #0x09] 2840 strb r1, [r0, #0x0b] 2841#else 2842 strb r2, [r0] 2843 mov r3, r2, lsr #8 /* r3 = .321 */ 2844 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 2845 str r3, [r0, #0x01] 2846 mov r3, ip, lsr #8 /* r3 = .765 */ 2847 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 2848 str r3, [r0, #0x05] 2849 mov r1, r1, lsr #8 /* r1 = .BA9 */ 2850 strh r1, [r0, #0x09] 2851 mov r1, r1, lsr #16 /* r1 = ...B */ 2852 strb r1, [r0, #0x0b] 2853#endif 2854 RET 2855 LMEMCPY_C_PAD 2856 2857/* 2858 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2859 */ 2860 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2861 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2862 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2863 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2864 strb r2, [r0, #0x0b] 2865#ifdef __ARMEB__ 2866 strh r3, [r0, #0x09] 2867 mov r3, r3, lsr #16 /* r3 = ..78 */ 2868 orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 2869 mov ip, ip, lsr #16 /* ip = ..34 */ 2870 orr ip, ip, r1, lsl #16 /* ip = 1234 */ 2871 mov r1, r1, lsr #16 /* r1 = ..x0 */ 2872#else 2873 mov r2, r3, lsr #16 /* r2 = ..A9 */ 2874 strh r2, [r0, #0x09] 2875 mov r3, r3, lsl #16 /* r3 = 87.. */ 2876 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2877 mov ip, ip, lsl #16 /* ip = 43.. */ 2878 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2879 mov r1, r1, lsr #8 /* r1 = .210 */ 2880#endif 2881 str r3, [r0, #0x05] 2882 str ip, [r0, #0x01] 2883 strb r1, [r0] 2884 RET 2885 LMEMCPY_C_PAD 2886 2887/* 2888 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2889 */ 2890#ifdef __ARMEB__ 2891 ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 2892 ldr ip, [r1, #0x06] /* ip = 6789 */ 2893 ldr r3, [r1, #0x02] /* r3 = 2345 */ 2894 ldrh r1, [r1] /* r1 = ..01 */ 2895 strb r2, [r0, #0x0b] 2896 mov r2, r2, lsr #8 /* r2 = ...A */ 2897 orr r2, r2, ip, lsl #8 /* r2 = 789A */ 2898 mov ip, ip, lsr #8 /* ip = .678 */ 2899 orr ip, ip, r3, lsl #24 /* ip = 5678 */ 2900 mov r3, r3, lsr #8 /* r3 = .234 */ 2901 orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 2902 mov r1, r1, lsr #8 /* r1 = ...0 */ 2903 strb r1, [r0] 2904 str r3, [r0, #0x01] 2905 str ip, [r0, #0x05] 2906 strh r2, [r0, #0x09] 2907#else 2908 ldrh r2, [r1] /* r2 = ..10 */ 2909 ldr r3, [r1, #0x02] /* r3 = 5432 */ 2910 ldr ip, [r1, #0x06] /* ip = 9876 */ 2911 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2912 strb r2, [r0] 2913 mov r2, r2, lsr #8 /* r2 = ...1 */ 2914 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2915 mov r3, r3, lsr #24 /* r3 = ...5 */ 2916 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2917 mov ip, ip, lsr #24 /* ip = ...9 */ 2918 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2919 mov r1, r1, lsr #8 /* r1 = ...B */ 2920 str r2, [r0, #0x01] 2921 str r3, [r0, #0x05] 2922 strh ip, [r0, #0x09] 2923 strb r1, [r0, #0x0b] 2924#endif 2925 RET 2926 LMEMCPY_C_PAD 2927 2928/* 2929 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2930 */ 2931 ldrb r2, [r1] 2932 ldr r3, [r1, #0x01] 2933 ldr ip, [r1, #0x05] 2934 strb r2, [r0] 2935 ldrh r2, [r1, #0x09] 2936 ldrb r1, [r1, #0x0b] 2937 str r3, [r0, #0x01] 2938 str ip, [r0, #0x05] 2939 strh r2, [r0, #0x09] 2940 strb r1, [r0, #0x0b] 2941 RET 2942#endif /* _ARM_ARCH_5E */ 2943 2944#ifdef GPROF 2945 2946ENTRY(user) 2947 nop 2948ENTRY(btrap) 2949 nop 2950ENTRY(etrap) 2951 nop 2952ENTRY(bintr) 2953 nop 2954ENTRY(eintr) 2955 nop 2956 2957#endif 2958