support.S revision 144967
1129198Scognet/*- 2129198Scognet * Copyright (c) 2004 Olivier Houchard 3129198Scognet * All rights reserved. 4129198Scognet * 5129198Scognet * Redistribution and use in source and binary forms, with or without 6129198Scognet * modification, are permitted provided that the following conditions 7129198Scognet * are met: 8129198Scognet * 1. Redistributions of source code must retain the above copyright 9129198Scognet * notice, this list of conditions and the following disclaimer. 10129198Scognet * 2. Redistributions in binary form must reproduce the above copyright 11129198Scognet * notice, this list of conditions and the following disclaimer in the 12129198Scognet * documentation and/or other materials provided with the distribution. 13129198Scognet * 14129198Scognet * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15129198Scognet * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16129198Scognet * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17129198Scognet * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18129198Scognet * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19129198Scognet * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20129198Scognet * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21129198Scognet * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22129198Scognet * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23129198Scognet * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24129198Scognet * SUCH DAMAGE. 25129198Scognet */ 26129198Scognet 27129198Scognet#include <machine/asm.h> 28129198Scognet#include <machine/asmacros.h> 29129198Scognet__FBSDID("$FreeBSD: head/sys/arm/arm/support.S 144967 2005-04-12 22:46:09Z cognet $"); 30129198Scognet 31129198Scognet#include "assym.s" 32129198Scognet 33129198Scognet/* 34129250Scognet * memset: Sets a block of memory to the specified value 35129250Scognet * 36129250Scognet * On entry: 37129250Scognet * r0 - dest address 38129250Scognet * r1 - byte to write 39129250Scognet * r2 - number of bytes to write 40129250Scognet * 41129250Scognet * On exit: 42129250Scognet * r0 - dest address 43129250Scognet */ 44129250Scognet/* LINTSTUB: Func: void bzero(void *, size_t) */ 45129250ScognetENTRY(bzero) 46129250Scognet mov r3, #0x00 47129250Scognet b do_memset 48129250Scognet 49129250Scognet/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 50129250ScognetENTRY(memset) 51129250Scognet and r3, r1, #0xff /* We deal with bytes */ 52129250Scognet mov r1, r2 53129250Scognetdo_memset: 54129250Scognet cmp r1, #0x04 /* Do we have less than 4 bytes */ 55129250Scognet mov ip, r0 56129250Scognet blt .Lmemset_lessthanfour 57129250Scognet 58129250Scognet /* Ok first we will word align the address */ 59129250Scognet ands r2, ip, #0x03 /* Get the bottom two bits */ 60129250Scognet bne .Lmemset_wordunaligned /* The address is not word aligned */ 61129250Scognet 62129250Scognet /* We are now word aligned */ 63129250Scognet.Lmemset_wordaligned: 64129250Scognet orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 65129250Scognet#ifdef __XSCALE__ 66129250Scognet tst ip, #0x04 /* Quad-align for Xscale */ 67129250Scognet#else 68129250Scognet cmp r1, #0x10 69129250Scognet#endif 70129250Scognet orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 71129250Scognet#ifdef __XSCALE__ 72129250Scognet subne r1, r1, #0x04 /* Quad-align if necessary */ 73129250Scognet strne r3, [ip], #0x04 74129250Scognet cmp r1, #0x10 75129250Scognet#endif 76129250Scognet blt .Lmemset_loop4 /* If less than 16 then use words */ 77129250Scognet mov r2, r3 /* Duplicate data */ 78129250Scognet cmp r1, #0x80 /* If < 128 then skip the big loop */ 79129250Scognet blt .Lmemset_loop32 80129250Scognet 81129250Scognet /* Do 128 bytes at a time */ 82129250Scognet.Lmemset_loop128: 83129250Scognet subs r1, r1, #0x80 84129250Scognet#ifdef __XSCALE__ 85129250Scognet strged r2, [ip], #0x08 86129250Scognet strged r2, [ip], #0x08 87129250Scognet strged r2, [ip], #0x08 88129250Scognet strged r2, [ip], #0x08 89129250Scognet strged r2, [ip], #0x08 90129250Scognet strged r2, [ip], #0x08 91129250Scognet strged r2, [ip], #0x08 92129250Scognet strged r2, [ip], #0x08 93129250Scognet strged r2, [ip], #0x08 94129250Scognet strged r2, [ip], #0x08 95129250Scognet strged r2, [ip], #0x08 96129250Scognet strged r2, [ip], #0x08 97129250Scognet strged r2, [ip], #0x08 98129250Scognet strged r2, [ip], #0x08 99129250Scognet strged r2, [ip], #0x08 100129250Scognet strged r2, [ip], #0x08 101129250Scognet#else 102129250Scognet stmgeia ip!, {r2-r3} 103129250Scognet stmgeia ip!, {r2-r3} 104129250Scognet stmgeia ip!, {r2-r3} 105129250Scognet stmgeia ip!, {r2-r3} 106129250Scognet stmgeia ip!, {r2-r3} 107129250Scognet stmgeia ip!, {r2-r3} 108129250Scognet stmgeia ip!, {r2-r3} 109129250Scognet stmgeia ip!, {r2-r3} 110129250Scognet stmgeia ip!, {r2-r3} 111129250Scognet stmgeia ip!, {r2-r3} 112129250Scognet stmgeia ip!, {r2-r3} 113129250Scognet stmgeia ip!, {r2-r3} 114129250Scognet stmgeia ip!, {r2-r3} 115129250Scognet stmgeia ip!, {r2-r3} 116129250Scognet stmgeia ip!, {r2-r3} 117129250Scognet stmgeia ip!, {r2-r3} 118129250Scognet#endif 119129250Scognet bgt .Lmemset_loop128 120137463Scognet RETeq /* Zero length so just exit */ 121129250Scognet 122129250Scognet add r1, r1, #0x80 /* Adjust for extra sub */ 123129250Scognet 124129250Scognet /* Do 32 bytes at a time */ 125129250Scognet.Lmemset_loop32: 126129250Scognet subs r1, r1, #0x20 127129250Scognet#ifdef __XSCALE__ 128129250Scognet strged r2, [ip], #0x08 129129250Scognet strged r2, [ip], #0x08 130129250Scognet strged r2, [ip], #0x08 131129250Scognet strged r2, [ip], #0x08 132129250Scognet#else 133129250Scognet stmgeia ip!, {r2-r3} 134129250Scognet stmgeia ip!, {r2-r3} 135129250Scognet stmgeia ip!, {r2-r3} 136129250Scognet stmgeia ip!, {r2-r3} 137129250Scognet#endif 138129250Scognet bgt .Lmemset_loop32 139137463Scognet RETeq /* Zero length so just exit */ 140129250Scognet 141129250Scognet adds r1, r1, #0x10 /* Partially adjust for extra sub */ 142129250Scognet 143129250Scognet /* Deal with 16 bytes or more */ 144129250Scognet#ifdef __XSCALE__ 145129250Scognet strged r2, [ip], #0x08 146129250Scognet strged r2, [ip], #0x08 147129250Scognet#else 148129250Scognet stmgeia ip!, {r2-r3} 149129250Scognet stmgeia ip!, {r2-r3} 150129250Scognet#endif 151137463Scognet RETeq /* Zero length so just exit */ 152129250Scognet 153129250Scognet addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 154129250Scognet 155129250Scognet /* We have at least 4 bytes so copy as words */ 156129250Scognet.Lmemset_loop4: 157129250Scognet subs r1, r1, #0x04 158129250Scognet strge r3, [ip], #0x04 159129250Scognet bgt .Lmemset_loop4 160137463Scognet RETeq /* Zero length so just exit */ 161129250Scognet 162129250Scognet#ifdef __XSCALE__ 163129250Scognet /* Compensate for 64-bit alignment check */ 164129250Scognet adds r1, r1, #0x04 165137463Scognet RETeq 166129250Scognet cmp r1, #2 167129250Scognet#else 168129250Scognet cmp r1, #-2 169129250Scognet#endif 170129250Scognet 171129250Scognet strb r3, [ip], #0x01 /* Set 1 byte */ 172129250Scognet strgeb r3, [ip], #0x01 /* Set another byte */ 173129250Scognet strgtb r3, [ip] /* and a third */ 174137463Scognet RET /* Exit */ 175129250Scognet 176129250Scognet.Lmemset_wordunaligned: 177129250Scognet rsb r2, r2, #0x004 178129250Scognet strb r3, [ip], #0x01 /* Set 1 byte */ 179129250Scognet cmp r2, #0x02 180129250Scognet strgeb r3, [ip], #0x01 /* Set another byte */ 181129250Scognet sub r1, r1, r2 182129250Scognet strgtb r3, [ip], #0x01 /* and a third */ 183129250Scognet cmp r1, #0x04 /* More than 4 bytes left? */ 184129250Scognet bge .Lmemset_wordaligned /* Yup */ 185129250Scognet 186129250Scognet.Lmemset_lessthanfour: 187129250Scognet cmp r1, #0x00 188137463Scognet RETeq /* Zero length so exit */ 189129250Scognet strb r3, [ip], #0x01 /* Set 1 byte */ 190129250Scognet cmp r1, #0x02 191129250Scognet strgeb r3, [ip], #0x01 /* Set another byte */ 192129250Scognet strgtb r3, [ip] /* and a third */ 193137463Scognet RET /* Exit */ 194129254Scognet 195144967ScognetENTRY(bcmp) 196129254Scognet mov ip, r0 197129254Scognet cmp r2, #0x06 198129254Scognet beq .Lmemcmp_6bytes 199129254Scognet mov r0, #0x00 200129254Scognet 201129254Scognet /* Are both addresses aligned the same way? */ 202129254Scognet cmp r2, #0x00 203129254Scognet eornes r3, ip, r1 204137463Scognet RETeq /* len == 0, or same addresses! */ 205129254Scognet tst r3, #0x03 206129254Scognet subne r2, r2, #0x01 207129254Scognet bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 208129254Scognet 209129254Scognet /* Word-align the addresses, if necessary */ 210129254Scognet sub r3, r1, #0x05 211129254Scognet ands r3, r3, #0x03 212129254Scognet add r3, r3, r3, lsl #1 213129254Scognet addne pc, pc, r3, lsl #3 214129254Scognet nop 215129254Scognet 216129254Scognet /* Compare up to 3 bytes */ 217129254Scognet ldrb r0, [ip], #0x01 218129254Scognet ldrb r3, [r1], #0x01 219129254Scognet subs r0, r0, r3 220137463Scognet RETne 221129254Scognet subs r2, r2, #0x01 222137463Scognet RETeq 223129254Scognet 224129254Scognet /* Compare up to 2 bytes */ 225129254Scognet ldrb r0, [ip], #0x01 226129254Scognet ldrb r3, [r1], #0x01 227129254Scognet subs r0, r0, r3 228137463Scognet RETne 229129254Scognet subs r2, r2, #0x01 230137463Scognet RETeq 231129254Scognet 232129254Scognet /* Compare 1 byte */ 233129254Scognet ldrb r0, [ip], #0x01 234129254Scognet ldrb r3, [r1], #0x01 235129254Scognet subs r0, r0, r3 236137463Scognet RETne 237129254Scognet subs r2, r2, #0x01 238137463Scognet RETeq 239129254Scognet 240129254Scognet /* Compare 4 bytes at a time, if possible */ 241129254Scognet subs r2, r2, #0x04 242129254Scognet bcc .Lmemcmp_bytewise 243129254Scognet.Lmemcmp_word_aligned: 244129254Scognet ldr r0, [ip], #0x04 245129254Scognet ldr r3, [r1], #0x04 246129254Scognet subs r2, r2, #0x04 247129254Scognet cmpcs r0, r3 248129254Scognet beq .Lmemcmp_word_aligned 249129254Scognet sub r0, r0, r3 250129254Scognet 251129254Scognet /* Correct for extra subtraction, and check if done */ 252129254Scognet adds r2, r2, #0x04 253129254Scognet cmpeq r0, #0x00 /* If done, did all bytes match? */ 254137463Scognet RETeq /* Yup. Just return */ 255129254Scognet 256129254Scognet /* Re-do the final word byte-wise */ 257129254Scognet sub ip, ip, #0x04 258129254Scognet sub r1, r1, #0x04 259129254Scognet 260129254Scognet.Lmemcmp_bytewise: 261129254Scognet add r2, r2, #0x03 262129254Scognet.Lmemcmp_bytewise2: 263129254Scognet ldrb r0, [ip], #0x01 264129254Scognet ldrb r3, [r1], #0x01 265129254Scognet subs r2, r2, #0x01 266129254Scognet cmpcs r0, r3 267129254Scognet beq .Lmemcmp_bytewise2 268129254Scognet sub r0, r0, r3 269137463Scognet RET 270129254Scognet 271129254Scognet /* 272129254Scognet * 6 byte compares are very common, thanks to the network stack. 273129254Scognet * This code is hand-scheduled to reduce the number of stalls for 274129254Scognet * load results. Everything else being equal, this will be ~32% 275129254Scognet * faster than a byte-wise memcmp. 276129254Scognet */ 277129254Scognet .align 5 278129254Scognet.Lmemcmp_6bytes: 279129254Scognet ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 280129254Scognet ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 281129254Scognet ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 282129254Scognet subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 283129254Scognet ldreqb r3, [ip, #0x01] /* r3 = b1#1 */ 284137463Scognet RETne /* Return if mismatch on #0 */ 285129254Scognet subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 286129254Scognet ldreqb r3, [r1, #0x02] /* r3 = b2#2 */ 287129254Scognet ldreqb r0, [ip, #0x02] /* r0 = b1#2 */ 288137463Scognet RETne /* Return if mismatch on #1 */ 289129254Scognet ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 290129254Scognet subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 291129254Scognet ldreqb r3, [ip, #0x03] /* r3 = b1#3 */ 292137463Scognet RETne /* Return if mismatch on #2 */ 293129254Scognet subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 294129254Scognet ldreqb r3, [r1, #0x04] /* r3 = b2#4 */ 295129254Scognet ldreqb r0, [ip, #0x04] /* r0 = b1#4 */ 296137463Scognet RETne /* Return if mismatch on #3 */ 297129254Scognet ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 298129254Scognet subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 299129254Scognet ldreqb r3, [ip, #0x05] /* r3 = b1#5 */ 300137463Scognet RETne /* Return if mismatch on #4 */ 301129254Scognet sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 302137463Scognet RET 303129254Scognet 304129254ScognetENTRY(bcopy) 305143175Scognet /* switch the source and destination registers */ 306143175Scognet eor r0, r1, r0 307143175Scognet eor r1, r0, r1 308143175Scognet eor r0, r1, r0 309143175ScognetENTRY(memmove) 310143175Scognet /* Do the buffers overlap? */ 311143175Scognet cmp r0, r1 312143175Scognet RETeq /* Bail now if src/dst are the same */ 313143175Scognet subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 314143175Scognet subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 315143175Scognet cmp r3, r2 /* if (r3 < len) we have an overlap */ 316143175Scognet bcc PIC_SYM(_C_LABEL(memcpy), PLT) 317143175Scognet 318143175Scognet /* Determine copy direction */ 319143175Scognet cmp r1, r0 320143175Scognet bcc .Lmemmove_backwards 321143175Scognet 322143175Scognet moveq r0, #0 /* Quick abort for len=0 */ 323143175Scognet RETeq 324143175Scognet 325143175Scognet stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 326143175Scognet subs r2, r2, #4 327143175Scognet blt .Lmemmove_fl4 /* less than 4 bytes */ 328143175Scognet ands r12, r0, #3 329143175Scognet bne .Lmemmove_fdestul /* oh unaligned destination addr */ 330143175Scognet ands r12, r1, #3 331143175Scognet bne .Lmemmove_fsrcul /* oh unaligned source addr */ 332143175Scognet 333143175Scognet.Lmemmove_ft8: 334143175Scognet /* We have aligned source and destination */ 335143175Scognet subs r2, r2, #8 336143175Scognet blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 337143175Scognet subs r2, r2, #0x14 338143175Scognet blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 339143175Scognet stmdb sp!, {r4} /* borrow r4 */ 340143175Scognet 341143175Scognet /* blat 32 bytes at a time */ 342143175Scognet /* XXX for really big copies perhaps we should use more registers */ 343143175Scognet.Lmemmove_floop32: 344143175Scognet ldmia r1!, {r3, r4, r12, lr} 345143175Scognet stmia r0!, {r3, r4, r12, lr} 346143175Scognet ldmia r1!, {r3, r4, r12, lr} 347143175Scognet stmia r0!, {r3, r4, r12, lr} 348143175Scognet subs r2, r2, #0x20 349143175Scognet bge .Lmemmove_floop32 350143175Scognet 351143175Scognet cmn r2, #0x10 352143175Scognet ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 353143175Scognet stmgeia r0!, {r3, r4, r12, lr} 354143175Scognet subge r2, r2, #0x10 355143175Scognet ldmia sp!, {r4} /* return r4 */ 356143175Scognet 357143175Scognet.Lmemmove_fl32: 358143175Scognet adds r2, r2, #0x14 359143175Scognet 360143175Scognet /* blat 12 bytes at a time */ 361143175Scognet.Lmemmove_floop12: 362143175Scognet ldmgeia r1!, {r3, r12, lr} 363143175Scognet stmgeia r0!, {r3, r12, lr} 364143175Scognet subges r2, r2, #0x0c 365143175Scognet bge .Lmemmove_floop12 366143175Scognet 367143175Scognet.Lmemmove_fl12: 368143175Scognet adds r2, r2, #8 369143175Scognet blt .Lmemmove_fl4 370143175Scognet 371143175Scognet subs r2, r2, #4 372143175Scognet ldrlt r3, [r1], #4 373143175Scognet strlt r3, [r0], #4 374143175Scognet ldmgeia r1!, {r3, r12} 375143175Scognet stmgeia r0!, {r3, r12} 376143175Scognet subge r2, r2, #4 377143175Scognet 378143175Scognet.Lmemmove_fl4: 379143175Scognet /* less than 4 bytes to go */ 380143175Scognet adds r2, r2, #4 381143175Scognet ldmeqia sp!, {r0, pc} /* done */ 382143175Scognet 383143175Scognet /* copy the crud byte at a time */ 384143175Scognet cmp r2, #2 385143175Scognet ldrb r3, [r1], #1 386143175Scognet strb r3, [r0], #1 387143175Scognet ldrgeb r3, [r1], #1 388143175Scognet strgeb r3, [r0], #1 389143175Scognet ldrgtb r3, [r1], #1 390143175Scognet strgtb r3, [r0], #1 391143175Scognet ldmia sp!, {r0, pc} 392143175Scognet 393143175Scognet /* erg - unaligned destination */ 394143175Scognet.Lmemmove_fdestul: 395143175Scognet rsb r12, r12, #4 396143175Scognet cmp r12, #2 397143175Scognet 398143175Scognet /* align destination with byte copies */ 399143175Scognet ldrb r3, [r1], #1 400143175Scognet strb r3, [r0], #1 401143175Scognet ldrgeb r3, [r1], #1 402143175Scognet strgeb r3, [r0], #1 403143175Scognet ldrgtb r3, [r1], #1 404143175Scognet strgtb r3, [r0], #1 405143175Scognet subs r2, r2, r12 406143175Scognet blt .Lmemmove_fl4 /* less the 4 bytes */ 407143175Scognet 408143175Scognet ands r12, r1, #3 409143175Scognet beq .Lmemmove_ft8 /* we have an aligned source */ 410143175Scognet 411143175Scognet /* erg - unaligned source */ 412143175Scognet /* This is where it gets nasty ... */ 413143175Scognet.Lmemmove_fsrcul: 414143175Scognet bic r1, r1, #3 415143175Scognet ldr lr, [r1], #4 416143175Scognet cmp r12, #2 417143175Scognet bgt .Lmemmove_fsrcul3 418143175Scognet beq .Lmemmove_fsrcul2 419143175Scognet cmp r2, #0x0c 420143175Scognet blt .Lmemmove_fsrcul1loop4 421143175Scognet sub r2, r2, #0x0c 422143175Scognet stmdb sp!, {r4, r5} 423143175Scognet 424143175Scognet.Lmemmove_fsrcul1loop16: 425143175Scognet#ifdef __ARMEB__ 426143175Scognet mov r3, lr, lsl #8 427143175Scognet#else 428143175Scognet mov r3, lr, lsr #8 429143175Scognet#endif 430143175Scognet ldmia r1!, {r4, r5, r12, lr} 431143175Scognet#ifdef __ARMEB__ 432143175Scognet orr r3, r3, r4, lsr #24 433143175Scognet mov r4, r4, lsl #8 434143175Scognet orr r4, r4, r5, lsr #24 435143175Scognet mov r5, r5, lsl #8 436143175Scognet orr r5, r5, r12, lsr #24 437143175Scognet mov r12, r12, lsl #8 438143175Scognet orr r12, r12, lr, lsr #24 439143175Scognet#else 440143175Scognet orr r3, r3, r4, lsl #24 441143175Scognet mov r4, r4, lsr #8 442143175Scognet orr r4, r4, r5, lsl #24 443143175Scognet mov r5, r5, lsr #8 444143175Scognet orr r5, r5, r12, lsl #24 445143175Scognet mov r12, r12, lsr #8 446143175Scognet orr r12, r12, lr, lsl #24 447143175Scognet#endif 448143175Scognet stmia r0!, {r3-r5, r12} 449143175Scognet subs r2, r2, #0x10 450143175Scognet bge .Lmemmove_fsrcul1loop16 451143175Scognet ldmia sp!, {r4, r5} 452143175Scognet adds r2, r2, #0x0c 453143175Scognet blt .Lmemmove_fsrcul1l4 454143175Scognet 455143175Scognet.Lmemmove_fsrcul1loop4: 456143175Scognet#ifdef __ARMEB__ 457143175Scognet mov r12, lr, lsl #8 458143175Scognet#else 459143175Scognet mov r12, lr, lsr #8 460143175Scognet#endif 461143175Scognet ldr lr, [r1], #4 462143175Scognet#ifdef __ARMEB__ 463143175Scognet orr r12, r12, lr, lsr #24 464143175Scognet#else 465143175Scognet orr r12, r12, lr, lsl #24 466143175Scognet#endif 467143175Scognet str r12, [r0], #4 468143175Scognet subs r2, r2, #4 469143175Scognet bge .Lmemmove_fsrcul1loop4 470143175Scognet 471143175Scognet.Lmemmove_fsrcul1l4: 472143175Scognet sub r1, r1, #3 473143175Scognet b .Lmemmove_fl4 474143175Scognet 475143175Scognet.Lmemmove_fsrcul2: 476143175Scognet cmp r2, #0x0c 477143175Scognet blt .Lmemmove_fsrcul2loop4 478143175Scognet sub r2, r2, #0x0c 479143175Scognet stmdb sp!, {r4, r5} 480143175Scognet 481143175Scognet.Lmemmove_fsrcul2loop16: 482143175Scognet#ifdef __ARMEB__ 483143175Scognet mov r3, lr, lsl #16 484143175Scognet#else 485143175Scognet mov r3, lr, lsr #16 486143175Scognet#endif 487143175Scognet ldmia r1!, {r4, r5, r12, lr} 488143175Scognet#ifdef __ARMEB__ 489143175Scognet orr r3, r3, r4, lsr #16 490143175Scognet mov r4, r4, lsl #16 491143175Scognet orr r4, r4, r5, lsr #16 492143175Scognet mov r5, r5, lsl #16 493143175Scognet orr r5, r5, r12, lsr #16 494143175Scognet mov r12, r12, lsl #16 495143175Scognet orr r12, r12, lr, lsr #16 496143175Scognet#else 497143175Scognet orr r3, r3, r4, lsl #16 498143175Scognet mov r4, r4, lsr #16 499143175Scognet orr r4, r4, r5, lsl #16 500143175Scognet mov r5, r5, lsr #16 501143175Scognet orr r5, r5, r12, lsl #16 502143175Scognet mov r12, r12, lsr #16 503143175Scognet orr r12, r12, lr, lsl #16 504143175Scognet#endif 505143175Scognet stmia r0!, {r3-r5, r12} 506143175Scognet subs r2, r2, #0x10 507143175Scognet bge .Lmemmove_fsrcul2loop16 508143175Scognet ldmia sp!, {r4, r5} 509143175Scognet adds r2, r2, #0x0c 510143175Scognet blt .Lmemmove_fsrcul2l4 511143175Scognet 512143175Scognet.Lmemmove_fsrcul2loop4: 513143175Scognet#ifdef __ARMEB__ 514143175Scognet mov r12, lr, lsl #16 515143175Scognet#else 516143175Scognet mov r12, lr, lsr #16 517143175Scognet#endif 518143175Scognet ldr lr, [r1], #4 519143175Scognet#ifdef __ARMEB__ 520143175Scognet orr r12, r12, lr, lsr #16 521143175Scognet#else 522143175Scognet orr r12, r12, lr, lsl #16 523143175Scognet#endif 524143175Scognet str r12, [r0], #4 525143175Scognet subs r2, r2, #4 526143175Scognet bge .Lmemmove_fsrcul2loop4 527143175Scognet 528143175Scognet.Lmemmove_fsrcul2l4: 529143175Scognet sub r1, r1, #2 530143175Scognet b .Lmemmove_fl4 531143175Scognet 532143175Scognet.Lmemmove_fsrcul3: 533143175Scognet cmp r2, #0x0c 534143175Scognet blt .Lmemmove_fsrcul3loop4 535143175Scognet sub r2, r2, #0x0c 536143175Scognet stmdb sp!, {r4, r5} 537143175Scognet 538143175Scognet.Lmemmove_fsrcul3loop16: 539143175Scognet#ifdef __ARMEB__ 540143175Scognet mov r3, lr, lsl #24 541143175Scognet#else 542143175Scognet mov r3, lr, lsr #24 543143175Scognet#endif 544143175Scognet ldmia r1!, {r4, r5, r12, lr} 545143175Scognet#ifdef __ARMEB__ 546143175Scognet orr r3, r3, r4, lsr #8 547143175Scognet mov r4, r4, lsl #24 548143175Scognet orr r4, r4, r5, lsr #8 549143175Scognet mov r5, r5, lsl #24 550143175Scognet orr r5, r5, r12, lsr #8 551143175Scognet mov r12, r12, lsl #24 552143175Scognet orr r12, r12, lr, lsr #8 553143175Scognet#else 554143175Scognet orr r3, r3, r4, lsl #8 555143175Scognet mov r4, r4, lsr #24 556143175Scognet orr r4, r4, r5, lsl #8 557143175Scognet mov r5, r5, lsr #24 558143175Scognet orr r5, r5, r12, lsl #8 559143175Scognet mov r12, r12, lsr #24 560143175Scognet orr r12, r12, lr, lsl #8 561143175Scognet#endif 562143175Scognet stmia r0!, {r3-r5, r12} 563143175Scognet subs r2, r2, #0x10 564143175Scognet bge .Lmemmove_fsrcul3loop16 565143175Scognet ldmia sp!, {r4, r5} 566143175Scognet adds r2, r2, #0x0c 567143175Scognet blt .Lmemmove_fsrcul3l4 568143175Scognet 569143175Scognet.Lmemmove_fsrcul3loop4: 570143175Scognet#ifdef __ARMEB__ 571143175Scognet mov r12, lr, lsl #24 572143175Scognet#else 573143175Scognet mov r12, lr, lsr #24 574143175Scognet#endif 575143175Scognet ldr lr, [r1], #4 576143175Scognet#ifdef __ARMEB__ 577143175Scognet orr r12, r12, lr, lsr #8 578143175Scognet#else 579143175Scognet orr r12, r12, lr, lsl #8 580143175Scognet#endif 581143175Scognet str r12, [r0], #4 582143175Scognet subs r2, r2, #4 583143175Scognet bge .Lmemmove_fsrcul3loop4 584143175Scognet 585143175Scognet.Lmemmove_fsrcul3l4: 586143175Scognet sub r1, r1, #1 587143175Scognet b .Lmemmove_fl4 588143175Scognet 589143175Scognet.Lmemmove_backwards: 590143175Scognet add r1, r1, r2 591143175Scognet add r0, r0, r2 592143175Scognet subs r2, r2, #4 593143175Scognet blt .Lmemmove_bl4 /* less than 4 bytes */ 594143175Scognet ands r12, r0, #3 595143175Scognet bne .Lmemmove_bdestul /* oh unaligned destination addr */ 596143175Scognet ands r12, r1, #3 597143175Scognet bne .Lmemmove_bsrcul /* oh unaligned source addr */ 598143175Scognet 599143175Scognet.Lmemmove_bt8: 600143175Scognet /* We have aligned source and destination */ 601143175Scognet subs r2, r2, #8 602143175Scognet blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 603143175Scognet stmdb sp!, {r4, lr} 604143175Scognet subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 605143175Scognet blt .Lmemmove_bl32 606143175Scognet 607143175Scognet /* blat 32 bytes at a time */ 608143175Scognet /* XXX for really big copies perhaps we should use more registers */ 609143175Scognet.Lmemmove_bloop32: 610143175Scognet ldmdb r1!, {r3, r4, r12, lr} 611143175Scognet stmdb r0!, {r3, r4, r12, lr} 612143175Scognet ldmdb r1!, {r3, r4, r12, lr} 613143175Scognet stmdb r0!, {r3, r4, r12, lr} 614143175Scognet subs r2, r2, #0x20 615143175Scognet bge .Lmemmove_bloop32 616143175Scognet 617143175Scognet.Lmemmove_bl32: 618143175Scognet cmn r2, #0x10 619143175Scognet ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 620143175Scognet stmgedb r0!, {r3, r4, r12, lr} 621143175Scognet subge r2, r2, #0x10 622143175Scognet adds r2, r2, #0x14 623143175Scognet ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 624143175Scognet stmgedb r0!, {r3, r12, lr} 625143175Scognet subge r2, r2, #0x0c 626143175Scognet ldmia sp!, {r4, lr} 627143175Scognet 628143175Scognet.Lmemmove_bl12: 629143175Scognet adds r2, r2, #8 630143175Scognet blt .Lmemmove_bl4 631143175Scognet subs r2, r2, #4 632143175Scognet ldrlt r3, [r1, #-4]! 633143175Scognet strlt r3, [r0, #-4]! 634143175Scognet ldmgedb r1!, {r3, r12} 635143175Scognet stmgedb r0!, {r3, r12} 636143175Scognet subge r2, r2, #4 637143175Scognet 638143175Scognet.Lmemmove_bl4: 639143175Scognet /* less than 4 bytes to go */ 640143175Scognet adds r2, r2, #4 641143175Scognet RETeq /* done */ 642143175Scognet 643143175Scognet /* copy the crud byte at a time */ 644143175Scognet cmp r2, #2 645143175Scognet ldrb r3, [r1, #-1]! 646143175Scognet strb r3, [r0, #-1]! 647143175Scognet ldrgeb r3, [r1, #-1]! 648143175Scognet strgeb r3, [r0, #-1]! 649143175Scognet ldrgtb r3, [r1, #-1]! 650143175Scognet strgtb r3, [r0, #-1]! 651143175Scognet RET 652143175Scognet 653143175Scognet /* erg - unaligned destination */ 654143175Scognet.Lmemmove_bdestul: 655143175Scognet cmp r12, #2 656143175Scognet 657143175Scognet /* align destination with byte copies */ 658143175Scognet ldrb r3, [r1, #-1]! 659143175Scognet strb r3, [r0, #-1]! 660143175Scognet ldrgeb r3, [r1, #-1]! 661143175Scognet strgeb r3, [r0, #-1]! 662143175Scognet ldrgtb r3, [r1, #-1]! 663143175Scognet strgtb r3, [r0, #-1]! 664143175Scognet subs r2, r2, r12 665143175Scognet blt .Lmemmove_bl4 /* less than 4 bytes to go */ 666143175Scognet ands r12, r1, #3 667143175Scognet beq .Lmemmove_bt8 /* we have an aligned source */ 668143175Scognet 669143175Scognet /* erg - unaligned source */ 670143175Scognet /* This is where it gets nasty ... */ 671143175Scognet.Lmemmove_bsrcul: 672143175Scognet bic r1, r1, #3 673143175Scognet ldr r3, [r1, #0] 674143175Scognet cmp r12, #2 675143175Scognet blt .Lmemmove_bsrcul1 676143175Scognet beq .Lmemmove_bsrcul2 677143175Scognet cmp r2, #0x0c 678143175Scognet blt .Lmemmove_bsrcul3loop4 679143175Scognet sub r2, r2, #0x0c 680143175Scognet stmdb sp!, {r4, r5, lr} 681143175Scognet 682143175Scognet.Lmemmove_bsrcul3loop16: 683143175Scognet#ifdef __ARMEB__ 684143175Scognet mov lr, r3, lsr #8 685143175Scognet#else 686143175Scognet mov lr, r3, lsl #8 687143175Scognet#endif 688143175Scognet ldmdb r1!, {r3-r5, r12} 689143175Scognet#ifdef __ARMEB__ 690143175Scognet orr lr, lr, r12, lsl #24 691143175Scognet mov r12, r12, lsr #8 692143175Scognet orr r12, r12, r5, lsl #24 693143175Scognet mov r5, r5, lsr #8 694143175Scognet orr r5, r5, r4, lsl #24 695143175Scognet mov r4, r4, lsr #8 696143175Scognet orr r4, r4, r3, lsl #24 697143175Scognet#else 698143175Scognet orr lr, lr, r12, lsr #24 699143175Scognet mov r12, r12, lsl #8 700143175Scognet orr r12, r12, r5, lsr #24 701143175Scognet mov r5, r5, lsl #8 702143175Scognet orr r5, r5, r4, lsr #24 703143175Scognet mov r4, r4, lsl #8 704143175Scognet orr r4, r4, r3, lsr #24 705143175Scognet#endif 706143175Scognet stmdb r0!, {r4, r5, r12, lr} 707143175Scognet subs r2, r2, #0x10 708143175Scognet bge .Lmemmove_bsrcul3loop16 709143175Scognet ldmia sp!, {r4, r5, lr} 710143175Scognet adds r2, r2, #0x0c 711143175Scognet blt .Lmemmove_bsrcul3l4 712143175Scognet 713143175Scognet.Lmemmove_bsrcul3loop4: 714143175Scognet#ifdef __ARMEB__ 715143175Scognet mov r12, r3, lsr #8 716143175Scognet#else 717143175Scognet mov r12, r3, lsl #8 718143175Scognet#endif 719143175Scognet ldr r3, [r1, #-4]! 720143175Scognet#ifdef __ARMEB__ 721143175Scognet orr r12, r12, r3, lsl #24 722143175Scognet#else 723143175Scognet orr r12, r12, r3, lsr #24 724143175Scognet#endif 725143175Scognet str r12, [r0, #-4]! 726143175Scognet subs r2, r2, #4 727143175Scognet bge .Lmemmove_bsrcul3loop4 728143175Scognet 729143175Scognet.Lmemmove_bsrcul3l4: 730143175Scognet add r1, r1, #3 731143175Scognet b .Lmemmove_bl4 732143175Scognet 733143175Scognet.Lmemmove_bsrcul2: 734143175Scognet cmp r2, #0x0c 735143175Scognet blt .Lmemmove_bsrcul2loop4 736143175Scognet sub r2, r2, #0x0c 737143175Scognet stmdb sp!, {r4, r5, lr} 738143175Scognet 739143175Scognet.Lmemmove_bsrcul2loop16: 740143175Scognet#ifdef __ARMEB__ 741143175Scognet mov lr, r3, lsr #16 742143175Scognet#else 743143175Scognet mov lr, r3, lsl #16 744143175Scognet#endif 745143175Scognet ldmdb r1!, {r3-r5, r12} 746143175Scognet#ifdef __ARMEB__ 747143175Scognet orr lr, lr, r12, lsl #16 748143175Scognet mov r12, r12, lsr #16 749143175Scognet orr r12, r12, r5, lsl #16 750143175Scognet mov r5, r5, lsr #16 751143175Scognet orr r5, r5, r4, lsl #16 752143175Scognet mov r4, r4, lsr #16 753143175Scognet orr r4, r4, r3, lsl #16 754143175Scognet#else 755143175Scognet orr lr, lr, r12, lsr #16 756143175Scognet mov r12, r12, lsl #16 757143175Scognet orr r12, r12, r5, lsr #16 758143175Scognet mov r5, r5, lsl #16 759143175Scognet orr r5, r5, r4, lsr #16 760143175Scognet mov r4, r4, lsl #16 761143175Scognet orr r4, r4, r3, lsr #16 762143175Scognet#endif 763143175Scognet stmdb r0!, {r4, r5, r12, lr} 764143175Scognet subs r2, r2, #0x10 765143175Scognet bge .Lmemmove_bsrcul2loop16 766143175Scognet ldmia sp!, {r4, r5, lr} 767143175Scognet adds r2, r2, #0x0c 768143175Scognet blt .Lmemmove_bsrcul2l4 769143175Scognet 770143175Scognet.Lmemmove_bsrcul2loop4: 771143175Scognet#ifdef __ARMEB__ 772143175Scognet mov r12, r3, lsr #16 773143175Scognet#else 774143175Scognet mov r12, r3, lsl #16 775143175Scognet#endif 776143175Scognet ldr r3, [r1, #-4]! 777143175Scognet#ifdef __ARMEB__ 778143175Scognet orr r12, r12, r3, lsl #16 779143175Scognet#else 780143175Scognet orr r12, r12, r3, lsr #16 781143175Scognet#endif 782143175Scognet str r12, [r0, #-4]! 783143175Scognet subs r2, r2, #4 784143175Scognet bge .Lmemmove_bsrcul2loop4 785143175Scognet 786143175Scognet.Lmemmove_bsrcul2l4: 787143175Scognet add r1, r1, #2 788143175Scognet b .Lmemmove_bl4 789143175Scognet 790143175Scognet.Lmemmove_bsrcul1: 791143175Scognet cmp r2, #0x0c 792143175Scognet blt .Lmemmove_bsrcul1loop4 793143175Scognet sub r2, r2, #0x0c 794143175Scognet stmdb sp!, {r4, r5, lr} 795143175Scognet 796143175Scognet.Lmemmove_bsrcul1loop32: 797143175Scognet#ifdef __ARMEB__ 798143175Scognet mov lr, r3, lsr #24 799143175Scognet#else 800143175Scognet mov lr, r3, lsl #24 801143175Scognet#endif 802143175Scognet ldmdb r1!, {r3-r5, r12} 803143175Scognet#ifdef __ARMEB__ 804143175Scognet orr lr, lr, r12, lsl #8 805143175Scognet mov r12, r12, lsr #24 806143175Scognet orr r12, r12, r5, lsl #8 807143175Scognet mov r5, r5, lsr #24 808143175Scognet orr r5, r5, r4, lsl #8 809143175Scognet mov r4, r4, lsr #24 810143175Scognet orr r4, r4, r3, lsl #8 811143175Scognet#else 812143175Scognet orr lr, lr, r12, lsr #8 813143175Scognet mov r12, r12, lsl #24 814143175Scognet orr r12, r12, r5, lsr #8 815143175Scognet mov r5, r5, lsl #24 816143175Scognet orr r5, r5, r4, lsr #8 817143175Scognet mov r4, r4, lsl #24 818143175Scognet orr r4, r4, r3, lsr #8 819143175Scognet#endif 820143175Scognet stmdb r0!, {r4, r5, r12, lr} 821143175Scognet subs r2, r2, #0x10 822143175Scognet bge .Lmemmove_bsrcul1loop32 823143175Scognet ldmia sp!, {r4, r5, lr} 824143175Scognet adds r2, r2, #0x0c 825143175Scognet blt .Lmemmove_bsrcul1l4 826143175Scognet 827143175Scognet.Lmemmove_bsrcul1loop4: 828143175Scognet#ifdef __ARMEB__ 829143175Scognet mov r12, r3, lsr #24 830143175Scognet#else 831143175Scognet mov r12, r3, lsl #24 832143175Scognet#endif 833143175Scognet ldr r3, [r1, #-4]! 834143175Scognet#ifdef __ARMEB__ 835143175Scognet orr r12, r12, r3, lsl #8 836143175Scognet#else 837143175Scognet orr r12, r12, r3, lsr #8 838143175Scognet#endif 839143175Scognet str r12, [r0, #-4]! 840143175Scognet subs r2, r2, #4 841143175Scognet bge .Lmemmove_bsrcul1loop4 842143175Scognet 843143175Scognet.Lmemmove_bsrcul1l4: 844143175Scognet add r1, r1, #1 845143175Scognet b .Lmemmove_bl4 846143175Scognet 847129254Scognet#if !defined(__XSCALE__) 848129254ScognetENTRY(memcpy) 849129254Scognet /* save leaf functions having to store this away */ 850129254Scognet stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 851129254Scognet 852129254Scognet subs r2, r2, #4 853129254Scognet blt .Lmemcpy_l4 /* less than 4 bytes */ 854129254Scognet ands r12, r0, #3 855129254Scognet bne .Lmemcpy_destul /* oh unaligned destination addr */ 856129254Scognet ands r12, r1, #3 857129254Scognet bne .Lmemcpy_srcul /* oh unaligned source addr */ 858129254Scognet 859129254Scognet.Lmemcpy_t8: 860129254Scognet /* We have aligned source and destination */ 861129254Scognet subs r2, r2, #8 862129254Scognet blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 863129254Scognet subs r2, r2, #0x14 864129254Scognet blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 865129254Scognet stmdb sp!, {r4} /* borrow r4 */ 866129254Scognet 867129254Scognet /* blat 32 bytes at a time */ 868129254Scognet /* XXX for really big copies perhaps we should use more registers */ 869129254Scognet.Lmemcpy_loop32: 870129254Scognet ldmia r1!, {r3, r4, r12, lr} 871129254Scognet stmia r0!, {r3, r4, r12, lr} 872129254Scognet ldmia r1!, {r3, r4, r12, lr} 873129254Scognet stmia r0!, {r3, r4, r12, lr} 874129254Scognet subs r2, r2, #0x20 875129254Scognet bge .Lmemcpy_loop32 876129254Scognet 877129254Scognet cmn r2, #0x10 878129254Scognet ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 879129254Scognet stmgeia r0!, {r3, r4, r12, lr} 880129254Scognet subge r2, r2, #0x10 881129254Scognet ldmia sp!, {r4} /* return r4 */ 882129254Scognet 883129254Scognet.Lmemcpy_l32: 884129254Scognet adds r2, r2, #0x14 885129254Scognet 886129254Scognet /* blat 12 bytes at a time */ 887129254Scognet.Lmemcpy_loop12: 888129254Scognet ldmgeia r1!, {r3, r12, lr} 889129254Scognet stmgeia r0!, {r3, r12, lr} 890129254Scognet subges r2, r2, #0x0c 891129254Scognet bge .Lmemcpy_loop12 892129254Scognet 893129254Scognet.Lmemcpy_l12: 894129254Scognet adds r2, r2, #8 895129254Scognet blt .Lmemcpy_l4 896129254Scognet 897129254Scognet subs r2, r2, #4 898129254Scognet ldrlt r3, [r1], #4 899129254Scognet strlt r3, [r0], #4 900129254Scognet ldmgeia r1!, {r3, r12} 901129254Scognet stmgeia r0!, {r3, r12} 902129254Scognet subge r2, r2, #4 903129254Scognet 904129254Scognet.Lmemcpy_l4: 905129254Scognet /* less than 4 bytes to go */ 906129254Scognet adds r2, r2, #4 907129254Scognet#ifdef __APCS_26_ 908129254Scognet ldmeqia sp!, {r0, pc}^ /* done */ 909129254Scognet#else 910129254Scognet ldmeqia sp!, {r0, pc} /* done */ 911129254Scognet#endif 912129254Scognet /* copy the crud byte at a time */ 913129254Scognet cmp r2, #2 914129254Scognet ldrb r3, [r1], #1 915129254Scognet strb r3, [r0], #1 916129254Scognet ldrgeb r3, [r1], #1 917129254Scognet strgeb r3, [r0], #1 918129254Scognet ldrgtb r3, [r1], #1 919129254Scognet strgtb r3, [r0], #1 920129254Scognet ldmia sp!, {r0, pc} 921129254Scognet 922129254Scognet /* erg - unaligned destination */ 923129254Scognet.Lmemcpy_destul: 924129254Scognet rsb r12, r12, #4 925129254Scognet cmp r12, #2 926129254Scognet 927129254Scognet /* align destination with byte copies */ 928129254Scognet ldrb r3, [r1], #1 929129254Scognet strb r3, [r0], #1 930129254Scognet ldrgeb r3, [r1], #1 931129254Scognet strgeb r3, [r0], #1 932129254Scognet ldrgtb r3, [r1], #1 933129254Scognet strgtb r3, [r0], #1 934129254Scognet subs r2, r2, r12 935129254Scognet blt .Lmemcpy_l4 /* less the 4 bytes */ 936129254Scognet 937129254Scognet ands r12, r1, #3 938129254Scognet beq .Lmemcpy_t8 /* we have an aligned source */ 939129254Scognet 940129254Scognet /* erg - unaligned source */ 941129254Scognet /* This is where it gets nasty ... */ 942129254Scognet.Lmemcpy_srcul: 943129254Scognet bic r1, r1, #3 944129254Scognet ldr lr, [r1], #4 945129254Scognet cmp r12, #2 946129254Scognet bgt .Lmemcpy_srcul3 947129254Scognet beq .Lmemcpy_srcul2 948129254Scognet cmp r2, #0x0c 949129254Scognet blt .Lmemcpy_srcul1loop4 950129254Scognet sub r2, r2, #0x0c 951129254Scognet stmdb sp!, {r4, r5} 952129254Scognet 953129254Scognet.Lmemcpy_srcul1loop16: 954129254Scognet mov r3, lr, lsr #8 955129254Scognet ldmia r1!, {r4, r5, r12, lr} 956129254Scognet orr r3, r3, r4, lsl #24 957129254Scognet mov r4, r4, lsr #8 958129254Scognet orr r4, r4, r5, lsl #24 959129254Scognet mov r5, r5, lsr #8 960129254Scognet orr r5, r5, r12, lsl #24 961129254Scognet mov r12, r12, lsr #8 962129254Scognet orr r12, r12, lr, lsl #24 963129254Scognet stmia r0!, {r3-r5, r12} 964129254Scognet subs r2, r2, #0x10 965129254Scognet bge .Lmemcpy_srcul1loop16 966129254Scognet ldmia sp!, {r4, r5} 967129254Scognet adds r2, r2, #0x0c 968129254Scognet blt .Lmemcpy_srcul1l4 969129254Scognet 970129254Scognet.Lmemcpy_srcul1loop4: 971129254Scognet mov r12, lr, lsr #8 972129254Scognet ldr lr, [r1], #4 973129254Scognet orr r12, r12, lr, lsl #24 974129254Scognet str r12, [r0], #4 975129254Scognet subs r2, r2, #4 976129254Scognet bge .Lmemcpy_srcul1loop4 977129254Scognet 978129254Scognet.Lmemcpy_srcul1l4: 979129254Scognet sub r1, r1, #3 980129254Scognet b .Lmemcpy_l4 981129254Scognet 982129254Scognet.Lmemcpy_srcul2: 983129254Scognet cmp r2, #0x0c 984129254Scognet blt .Lmemcpy_srcul2loop4 985129254Scognet sub r2, r2, #0x0c 986129254Scognet stmdb sp!, {r4, r5} 987129254Scognet 988129254Scognet.Lmemcpy_srcul2loop16: 989129254Scognet mov r3, lr, lsr #16 990129254Scognet ldmia r1!, {r4, r5, r12, lr} 991129254Scognet orr r3, r3, r4, lsl #16 992129254Scognet mov r4, r4, lsr #16 993129254Scognet orr r4, r4, r5, lsl #16 994129254Scognet mov r5, r5, lsr #16 995129254Scognet orr r5, r5, r12, lsl #16 996129254Scognet mov r12, r12, lsr #16 997129254Scognet orr r12, r12, lr, lsl #16 998129254Scognet stmia r0!, {r3-r5, r12} 999129254Scognet subs r2, r2, #0x10 1000129254Scognet bge .Lmemcpy_srcul2loop16 1001129254Scognet ldmia sp!, {r4, r5} 1002129254Scognet adds r2, r2, #0x0c 1003129254Scognet blt .Lmemcpy_srcul2l4 1004129254Scognet 1005129254Scognet.Lmemcpy_srcul2loop4: 1006129254Scognet mov r12, lr, lsr #16 1007129254Scognet ldr lr, [r1], #4 1008129254Scognet orr r12, r12, lr, lsl #16 1009129254Scognet str r12, [r0], #4 1010129254Scognet subs r2, r2, #4 1011129254Scognet bge .Lmemcpy_srcul2loop4 1012129254Scognet 1013129254Scognet.Lmemcpy_srcul2l4: 1014129254Scognet sub r1, r1, #2 1015129254Scognet b .Lmemcpy_l4 1016129254Scognet 1017129254Scognet.Lmemcpy_srcul3: 1018129254Scognet cmp r2, #0x0c 1019129254Scognet blt .Lmemcpy_srcul3loop4 1020129254Scognet sub r2, r2, #0x0c 1021129254Scognet stmdb sp!, {r4, r5} 1022129254Scognet 1023129254Scognet.Lmemcpy_srcul3loop16: 1024129254Scognet mov r3, lr, lsr #24 1025129254Scognet ldmia r1!, {r4, r5, r12, lr} 1026129254Scognet orr r3, r3, r4, lsl #8 1027129254Scognet mov r4, r4, lsr #24 1028129254Scognet orr r4, r4, r5, lsl #8 1029129254Scognet mov r5, r5, lsr #24 1030129254Scognet orr r5, r5, r12, lsl #8 1031129254Scognet mov r12, r12, lsr #24 1032129254Scognet orr r12, r12, lr, lsl #8 1033129254Scognet stmia r0!, {r3-r5, r12} 1034129254Scognet subs r2, r2, #0x10 1035129254Scognet bge .Lmemcpy_srcul3loop16 1036129254Scognet ldmia sp!, {r4, r5} 1037129254Scognet adds r2, r2, #0x0c 1038129254Scognet blt .Lmemcpy_srcul3l4 1039129254Scognet 1040129254Scognet.Lmemcpy_srcul3loop4: 1041129254Scognet mov r12, lr, lsr #24 1042129254Scognet ldr lr, [r1], #4 1043129254Scognet orr r12, r12, lr, lsl #8 1044129254Scognet str r12, [r0], #4 1045129254Scognet subs r2, r2, #4 1046129254Scognet bge .Lmemcpy_srcul3loop4 1047129254Scognet 1048129254Scognet.Lmemcpy_srcul3l4: 1049129254Scognet sub r1, r1, #1 1050129254Scognet b .Lmemcpy_l4 1051129254Scognet#else 1052129254Scognet/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 1053129254ScognetENTRY(memcpy) 1054129254Scognet pld [r1] 1055129254Scognet cmp r2, #0x0c 1056129254Scognet ble .Lmemcpy_short /* <= 12 bytes */ 1057129254Scognet mov r3, r0 /* We must not clobber r0 */ 1058129254Scognet 1059129254Scognet /* Word-align the destination buffer */ 1060129254Scognet ands ip, r3, #0x03 /* Already word aligned? */ 1061129254Scognet beq .Lmemcpy_wordaligned /* Yup */ 1062129254Scognet cmp ip, #0x02 1063129254Scognet ldrb ip, [r1], #0x01 1064129254Scognet sub r2, r2, #0x01 1065129254Scognet strb ip, [r3], #0x01 1066129254Scognet ldrleb ip, [r1], #0x01 1067129254Scognet suble r2, r2, #0x01 1068129254Scognet strleb ip, [r3], #0x01 1069129254Scognet ldrltb ip, [r1], #0x01 1070129254Scognet sublt r2, r2, #0x01 1071129254Scognet strltb ip, [r3], #0x01 1072129254Scognet 1073129254Scognet /* Destination buffer is now word aligned */ 1074129254Scognet.Lmemcpy_wordaligned: 1075129254Scognet ands ip, r1, #0x03 /* Is src also word-aligned? */ 1076129254Scognet bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 1077129254Scognet 1078129254Scognet /* Quad-align the destination buffer */ 1079129254Scognet tst r3, #0x07 /* Already quad aligned? */ 1080129254Scognet ldrne ip, [r1], #0x04 1081129254Scognet stmfd sp!, {r4-r9} /* Free up some registers */ 1082129254Scognet subne r2, r2, #0x04 1083129254Scognet strne ip, [r3], #0x04 1084129254Scognet 1085129254Scognet /* Destination buffer quad aligned, source is at least word aligned */ 1086129254Scognet subs r2, r2, #0x80 1087129254Scognet blt .Lmemcpy_w_lessthan128 1088129254Scognet 1089129254Scognet /* Copy 128 bytes at a time */ 1090129254Scognet.Lmemcpy_w_loop128: 1091129254Scognet ldr r4, [r1], #0x04 /* LD:00-03 */ 1092129254Scognet ldr r5, [r1], #0x04 /* LD:04-07 */ 1093129254Scognet pld [r1, #0x18] /* Prefetch 0x20 */ 1094129254Scognet ldr r6, [r1], #0x04 /* LD:08-0b */ 1095129254Scognet ldr r7, [r1], #0x04 /* LD:0c-0f */ 1096129254Scognet ldr r8, [r1], #0x04 /* LD:10-13 */ 1097129254Scognet ldr r9, [r1], #0x04 /* LD:14-17 */ 1098129254Scognet strd r4, [r3], #0x08 /* ST:00-07 */ 1099129254Scognet ldr r4, [r1], #0x04 /* LD:18-1b */ 1100129254Scognet ldr r5, [r1], #0x04 /* LD:1c-1f */ 1101129254Scognet strd r6, [r3], #0x08 /* ST:08-0f */ 1102129254Scognet ldr r6, [r1], #0x04 /* LD:20-23 */ 1103129254Scognet ldr r7, [r1], #0x04 /* LD:24-27 */ 1104129254Scognet pld [r1, #0x18] /* Prefetch 0x40 */ 1105129254Scognet strd r8, [r3], #0x08 /* ST:10-17 */ 1106129254Scognet ldr r8, [r1], #0x04 /* LD:28-2b */ 1107129254Scognet ldr r9, [r1], #0x04 /* LD:2c-2f */ 1108129254Scognet strd r4, [r3], #0x08 /* ST:18-1f */ 1109129254Scognet ldr r4, [r1], #0x04 /* LD:30-33 */ 1110129254Scognet ldr r5, [r1], #0x04 /* LD:34-37 */ 1111129254Scognet strd r6, [r3], #0x08 /* ST:20-27 */ 1112129254Scognet ldr r6, [r1], #0x04 /* LD:38-3b */ 1113129254Scognet ldr r7, [r1], #0x04 /* LD:3c-3f */ 1114129254Scognet strd r8, [r3], #0x08 /* ST:28-2f */ 1115129254Scognet ldr r8, [r1], #0x04 /* LD:40-43 */ 1116129254Scognet ldr r9, [r1], #0x04 /* LD:44-47 */ 1117129254Scognet pld [r1, #0x18] /* Prefetch 0x60 */ 1118129254Scognet strd r4, [r3], #0x08 /* ST:30-37 */ 1119129254Scognet ldr r4, [r1], #0x04 /* LD:48-4b */ 1120129254Scognet ldr r5, [r1], #0x04 /* LD:4c-4f */ 1121129254Scognet strd r6, [r3], #0x08 /* ST:38-3f */ 1122129254Scognet ldr r6, [r1], #0x04 /* LD:50-53 */ 1123129254Scognet ldr r7, [r1], #0x04 /* LD:54-57 */ 1124129254Scognet strd r8, [r3], #0x08 /* ST:40-47 */ 1125129254Scognet ldr r8, [r1], #0x04 /* LD:58-5b */ 1126129254Scognet ldr r9, [r1], #0x04 /* LD:5c-5f */ 1127129254Scognet strd r4, [r3], #0x08 /* ST:48-4f */ 1128129254Scognet ldr r4, [r1], #0x04 /* LD:60-63 */ 1129129254Scognet ldr r5, [r1], #0x04 /* LD:64-67 */ 1130129254Scognet pld [r1, #0x18] /* Prefetch 0x80 */ 1131129254Scognet strd r6, [r3], #0x08 /* ST:50-57 */ 1132129254Scognet ldr r6, [r1], #0x04 /* LD:68-6b */ 1133129254Scognet ldr r7, [r1], #0x04 /* LD:6c-6f */ 1134129254Scognet strd r8, [r3], #0x08 /* ST:58-5f */ 1135129254Scognet ldr r8, [r1], #0x04 /* LD:70-73 */ 1136129254Scognet ldr r9, [r1], #0x04 /* LD:74-77 */ 1137129254Scognet strd r4, [r3], #0x08 /* ST:60-67 */ 1138129254Scognet ldr r4, [r1], #0x04 /* LD:78-7b */ 1139129254Scognet ldr r5, [r1], #0x04 /* LD:7c-7f */ 1140129254Scognet strd r6, [r3], #0x08 /* ST:68-6f */ 1141129254Scognet strd r8, [r3], #0x08 /* ST:70-77 */ 1142129254Scognet subs r2, r2, #0x80 1143129254Scognet strd r4, [r3], #0x08 /* ST:78-7f */ 1144129254Scognet bge .Lmemcpy_w_loop128 1145129254Scognet 1146129254Scognet.Lmemcpy_w_lessthan128: 1147129254Scognet adds r2, r2, #0x80 /* Adjust for extra sub */ 1148129254Scognet ldmeqfd sp!, {r4-r9} 1149137463Scognet RETeq /* Return now if done */ 1150129254Scognet subs r2, r2, #0x20 1151129254Scognet blt .Lmemcpy_w_lessthan32 1152129254Scognet 1153129254Scognet /* Copy 32 bytes at a time */ 1154129254Scognet.Lmemcpy_w_loop32: 1155129254Scognet ldr r4, [r1], #0x04 1156129254Scognet ldr r5, [r1], #0x04 1157129254Scognet pld [r1, #0x18] 1158129254Scognet ldr r6, [r1], #0x04 1159129254Scognet ldr r7, [r1], #0x04 1160129254Scognet ldr r8, [r1], #0x04 1161129254Scognet ldr r9, [r1], #0x04 1162129254Scognet strd r4, [r3], #0x08 1163129254Scognet ldr r4, [r1], #0x04 1164129254Scognet ldr r5, [r1], #0x04 1165129254Scognet strd r6, [r3], #0x08 1166129254Scognet strd r8, [r3], #0x08 1167129254Scognet subs r2, r2, #0x20 1168129254Scognet strd r4, [r3], #0x08 1169129254Scognet bge .Lmemcpy_w_loop32 1170129254Scognet 1171129254Scognet.Lmemcpy_w_lessthan32: 1172129254Scognet adds r2, r2, #0x20 /* Adjust for extra sub */ 1173129254Scognet ldmeqfd sp!, {r4-r9} 1174137463Scognet RETeq /* Return now if done */ 1175129254Scognet 1176129254Scognet and r4, r2, #0x18 1177129254Scognet rsbs r4, r4, #0x18 1178129254Scognet addne pc, pc, r4, lsl #1 1179129254Scognet nop 1180129254Scognet 1181129254Scognet /* At least 24 bytes remaining */ 1182129254Scognet ldr r4, [r1], #0x04 1183129254Scognet ldr r5, [r1], #0x04 1184129254Scognet sub r2, r2, #0x08 1185129254Scognet strd r4, [r3], #0x08 1186129254Scognet 1187129254Scognet /* At least 16 bytes remaining */ 1188129254Scognet ldr r4, [r1], #0x04 1189129254Scognet ldr r5, [r1], #0x04 1190129254Scognet sub r2, r2, #0x08 1191129254Scognet strd r4, [r3], #0x08 1192129254Scognet 1193129254Scognet /* At least 8 bytes remaining */ 1194129254Scognet ldr r4, [r1], #0x04 1195129254Scognet ldr r5, [r1], #0x04 1196129254Scognet subs r2, r2, #0x08 1197129254Scognet strd r4, [r3], #0x08 1198129254Scognet 1199129254Scognet /* Less than 8 bytes remaining */ 1200129254Scognet ldmfd sp!, {r4-r9} 1201137463Scognet RETeq /* Return now if done */ 1202129254Scognet subs r2, r2, #0x04 1203129254Scognet ldrge ip, [r1], #0x04 1204129254Scognet strge ip, [r3], #0x04 1205137463Scognet RETeq /* Return now if done */ 1206129254Scognet addlt r2, r2, #0x04 1207129254Scognet ldrb ip, [r1], #0x01 1208129254Scognet cmp r2, #0x02 1209129254Scognet ldrgeb r2, [r1], #0x01 1210129254Scognet strb ip, [r3], #0x01 1211129254Scognet ldrgtb ip, [r1] 1212129254Scognet strgeb r2, [r3], #0x01 1213129254Scognet strgtb ip, [r3] 1214137463Scognet RET 1215129254Scognet 1216129254Scognet 1217129254Scognet/* 1218129254Scognet * At this point, it has not been possible to word align both buffers. 1219129254Scognet * The destination buffer is word aligned, but the source buffer is not. 1220129254Scognet */ 1221129254Scognet.Lmemcpy_bad_align: 1222129254Scognet stmfd sp!, {r4-r7} 1223129254Scognet bic r1, r1, #0x03 1224129254Scognet cmp ip, #2 1225129254Scognet ldr ip, [r1], #0x04 1226129254Scognet bgt .Lmemcpy_bad3 1227129254Scognet beq .Lmemcpy_bad2 1228129254Scognet b .Lmemcpy_bad1 1229129254Scognet 1230129254Scognet.Lmemcpy_bad1_loop16: 1231129254Scognet#ifdef __ARMEB__ 1232129254Scognet mov r4, ip, lsl #8 1233129254Scognet#else 1234129254Scognet mov r4, ip, lsr #8 1235129254Scognet#endif 1236129254Scognet ldr r5, [r1], #0x04 1237129254Scognet pld [r1, #0x018] 1238129254Scognet ldr r6, [r1], #0x04 1239129254Scognet ldr r7, [r1], #0x04 1240129254Scognet ldr ip, [r1], #0x04 1241129254Scognet#ifdef __ARMEB__ 1242129254Scognet orr r4, r4, r5, lsr #24 1243129254Scognet mov r5, r5, lsl #8 1244129254Scognet orr r5, r5, r6, lsr #24 1245129254Scognet mov r6, r6, lsl #8 1246129254Scognet orr r6, r6, r7, lsr #24 1247129254Scognet mov r7, r7, lsl #8 1248129254Scognet orr r7, r7, ip, lsr #24 1249129254Scognet#else 1250129254Scognet orr r4, r4, r5, lsl #24 1251129254Scognet mov r5, r5, lsr #8 1252129254Scognet orr r5, r5, r6, lsl #24 1253129254Scognet mov r6, r6, lsr #8 1254129254Scognet orr r6, r6, r7, lsl #24 1255129254Scognet mov r7, r7, lsr #8 1256129254Scognet orr r7, r7, ip, lsl #24 1257129254Scognet#endif 1258129254Scognet str r4, [r3], #0x04 1259129254Scognet str r5, [r3], #0x04 1260129254Scognet str r6, [r3], #0x04 1261129254Scognet str r7, [r3], #0x04 1262129254Scognet.Lmemcpy_bad1: 1263129254Scognet subs r2, r2, #0x10 1264129254Scognet bge .Lmemcpy_bad1_loop16 1265129254Scognet 1266129254Scognet adds r2, r2, #0x10 1267129254Scognet ldmeqfd sp!, {r4-r7} 1268137463Scognet RETeq /* Return now if done */ 1269129254Scognet subs r2, r2, #0x04 1270129254Scognet sublt r1, r1, #0x03 1271129254Scognet blt .Lmemcpy_bad_done 1272129254Scognet 1273129254Scognet.Lmemcpy_bad1_loop4: 1274129254Scognet#ifdef __ARMEB__ 1275129254Scognet mov r4, ip, lsl #8 1276129254Scognet#else 1277129254Scognet mov r4, ip, lsr #8 1278129254Scognet#endif 1279129254Scognet ldr ip, [r1], #0x04 1280129254Scognet subs r2, r2, #0x04 1281129254Scognet#ifdef __ARMEB__ 1282129254Scognet orr r4, r4, ip, lsr #24 1283129254Scognet#else 1284129254Scognet orr r4, r4, ip, lsl #24 1285129254Scognet#endif 1286129254Scognet str r4, [r3], #0x04 1287129254Scognet bge .Lmemcpy_bad1_loop4 1288129254Scognet sub r1, r1, #0x03 1289129254Scognet b .Lmemcpy_bad_done 1290129254Scognet 1291129254Scognet.Lmemcpy_bad2_loop16: 1292129254Scognet#ifdef __ARMEB__ 1293129254Scognet mov r4, ip, lsl #16 1294129254Scognet#else 1295129254Scognet mov r4, ip, lsr #16 1296129254Scognet#endif 1297129254Scognet ldr r5, [r1], #0x04 1298129254Scognet pld [r1, #0x018] 1299129254Scognet ldr r6, [r1], #0x04 1300129254Scognet ldr r7, [r1], #0x04 1301129254Scognet ldr ip, [r1], #0x04 1302129254Scognet#ifdef __ARMEB__ 1303129254Scognet orr r4, r4, r5, lsr #16 1304129254Scognet mov r5, r5, lsl #16 1305129254Scognet orr r5, r5, r6, lsr #16 1306129254Scognet mov r6, r6, lsl #16 1307129254Scognet orr r6, r6, r7, lsr #16 1308129254Scognet mov r7, r7, lsl #16 1309129254Scognet orr r7, r7, ip, lsr #16 1310129254Scognet#else 1311129254Scognet orr r4, r4, r5, lsl #16 1312129254Scognet mov r5, r5, lsr #16 1313129254Scognet orr r5, r5, r6, lsl #16 1314129254Scognet mov r6, r6, lsr #16 1315129254Scognet orr r6, r6, r7, lsl #16 1316129254Scognet mov r7, r7, lsr #16 1317129254Scognet orr r7, r7, ip, lsl #16 1318129254Scognet#endif 1319129254Scognet str r4, [r3], #0x04 1320129254Scognet str r5, [r3], #0x04 1321129254Scognet str r6, [r3], #0x04 1322129254Scognet str r7, [r3], #0x04 1323129254Scognet.Lmemcpy_bad2: 1324129254Scognet subs r2, r2, #0x10 1325129254Scognet bge .Lmemcpy_bad2_loop16 1326129254Scognet 1327129254Scognet adds r2, r2, #0x10 1328129254Scognet ldmeqfd sp!, {r4-r7} 1329137463Scognet RETeq /* Return now if done */ 1330129254Scognet subs r2, r2, #0x04 1331129254Scognet sublt r1, r1, #0x02 1332129254Scognet blt .Lmemcpy_bad_done 1333129254Scognet 1334129254Scognet.Lmemcpy_bad2_loop4: 1335129254Scognet#ifdef __ARMEB__ 1336129254Scognet mov r4, ip, lsl #16 1337129254Scognet#else 1338129254Scognet mov r4, ip, lsr #16 1339129254Scognet#endif 1340129254Scognet ldr ip, [r1], #0x04 1341129254Scognet subs r2, r2, #0x04 1342129254Scognet#ifdef __ARMEB__ 1343129254Scognet orr r4, r4, ip, lsr #16 1344129254Scognet#else 1345129254Scognet orr r4, r4, ip, lsl #16 1346129254Scognet#endif 1347129254Scognet str r4, [r3], #0x04 1348129254Scognet bge .Lmemcpy_bad2_loop4 1349129254Scognet sub r1, r1, #0x02 1350129254Scognet b .Lmemcpy_bad_done 1351129254Scognet 1352129254Scognet.Lmemcpy_bad3_loop16: 1353129254Scognet#ifdef __ARMEB__ 1354129254Scognet mov r4, ip, lsl #24 1355129254Scognet#else 1356129254Scognet mov r4, ip, lsr #24 1357129254Scognet#endif 1358129254Scognet ldr r5, [r1], #0x04 1359129254Scognet pld [r1, #0x018] 1360129254Scognet ldr r6, [r1], #0x04 1361129254Scognet ldr r7, [r1], #0x04 1362129254Scognet ldr ip, [r1], #0x04 1363129254Scognet#ifdef __ARMEB__ 1364129254Scognet orr r4, r4, r5, lsr #8 1365129254Scognet mov r5, r5, lsl #24 1366129254Scognet orr r5, r5, r6, lsr #8 1367129254Scognet mov r6, r6, lsl #24 1368129254Scognet orr r6, r6, r7, lsr #8 1369129254Scognet mov r7, r7, lsl #24 1370129254Scognet orr r7, r7, ip, lsr #8 1371129254Scognet#else 1372129254Scognet orr r4, r4, r5, lsl #8 1373129254Scognet mov r5, r5, lsr #24 1374129254Scognet orr r5, r5, r6, lsl #8 1375129254Scognet mov r6, r6, lsr #24 1376129254Scognet orr r6, r6, r7, lsl #8 1377129254Scognet mov r7, r7, lsr #24 1378129254Scognet orr r7, r7, ip, lsl #8 1379129254Scognet#endif 1380129254Scognet str r4, [r3], #0x04 1381129254Scognet str r5, [r3], #0x04 1382129254Scognet str r6, [r3], #0x04 1383129254Scognet str r7, [r3], #0x04 1384129254Scognet.Lmemcpy_bad3: 1385129254Scognet subs r2, r2, #0x10 1386129254Scognet bge .Lmemcpy_bad3_loop16 1387129254Scognet 1388129254Scognet adds r2, r2, #0x10 1389129254Scognet ldmeqfd sp!, {r4-r7} 1390137463Scognet RETeq /* Return now if done */ 1391129254Scognet subs r2, r2, #0x04 1392129254Scognet sublt r1, r1, #0x01 1393129254Scognet blt .Lmemcpy_bad_done 1394129254Scognet 1395129254Scognet.Lmemcpy_bad3_loop4: 1396129254Scognet#ifdef __ARMEB__ 1397129254Scognet mov r4, ip, lsl #24 1398129254Scognet#else 1399129254Scognet mov r4, ip, lsr #24 1400129254Scognet#endif 1401129254Scognet ldr ip, [r1], #0x04 1402129254Scognet subs r2, r2, #0x04 1403129254Scognet#ifdef __ARMEB__ 1404129254Scognet orr r4, r4, ip, lsr #8 1405129254Scognet#else 1406129254Scognet orr r4, r4, ip, lsl #8 1407129254Scognet#endif 1408129254Scognet str r4, [r3], #0x04 1409129254Scognet bge .Lmemcpy_bad3_loop4 1410129254Scognet sub r1, r1, #0x01 1411129254Scognet 1412129254Scognet.Lmemcpy_bad_done: 1413129254Scognet ldmfd sp!, {r4-r7} 1414129254Scognet adds r2, r2, #0x04 1415137463Scognet RETeq 1416129254Scognet ldrb ip, [r1], #0x01 1417129254Scognet cmp r2, #0x02 1418129254Scognet ldrgeb r2, [r1], #0x01 1419129254Scognet strb ip, [r3], #0x01 1420129254Scognet ldrgtb ip, [r1] 1421129254Scognet strgeb r2, [r3], #0x01 1422129254Scognet strgtb ip, [r3] 1423137463Scognet RET 1424129254Scognet 1425129254Scognet 1426129254Scognet/* 1427129254Scognet * Handle short copies (less than 16 bytes), possibly misaligned. 1428129254Scognet * Some of these are *very* common, thanks to the network stack, 1429129254Scognet * and so are handled specially. 1430129254Scognet */ 1431129254Scognet.Lmemcpy_short: 1432129254Scognet add pc, pc, r2, lsl #2 1433129254Scognet nop 1434137463Scognet RET /* 0x00 */ 1435129254Scognet b .Lmemcpy_bytewise /* 0x01 */ 1436129254Scognet b .Lmemcpy_bytewise /* 0x02 */ 1437129254Scognet b .Lmemcpy_bytewise /* 0x03 */ 1438129254Scognet b .Lmemcpy_4 /* 0x04 */ 1439129254Scognet b .Lmemcpy_bytewise /* 0x05 */ 1440129254Scognet b .Lmemcpy_6 /* 0x06 */ 1441129254Scognet b .Lmemcpy_bytewise /* 0x07 */ 1442129254Scognet b .Lmemcpy_8 /* 0x08 */ 1443129254Scognet b .Lmemcpy_bytewise /* 0x09 */ 1444129254Scognet b .Lmemcpy_bytewise /* 0x0a */ 1445129254Scognet b .Lmemcpy_bytewise /* 0x0b */ 1446129254Scognet b .Lmemcpy_c /* 0x0c */ 1447129254Scognet.Lmemcpy_bytewise: 1448129254Scognet mov r3, r0 /* We must not clobber r0 */ 1449129254Scognet ldrb ip, [r1], #0x01 1450129254Scognet1: subs r2, r2, #0x01 1451129254Scognet strb ip, [r3], #0x01 1452129254Scognet ldrneb ip, [r1], #0x01 1453129254Scognet bne 1b 1454137463Scognet RET 1455129254Scognet 1456129254Scognet/****************************************************************************** 1457129254Scognet * Special case for 4 byte copies 1458129254Scognet */ 1459129254Scognet#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1460129254Scognet#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1461129254Scognet LMEMCPY_4_PAD 1462129254Scognet.Lmemcpy_4: 1463129254Scognet and r2, r1, #0x03 1464129254Scognet orr r2, r2, r0, lsl #2 1465129254Scognet ands r2, r2, #0x0f 1466129254Scognet sub r3, pc, #0x14 1467129254Scognet addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1468129254Scognet 1469129254Scognet/* 1470129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned 1471129254Scognet */ 1472129254Scognet ldr r2, [r1] 1473129254Scognet str r2, [r0] 1474137463Scognet RET 1475129254Scognet LMEMCPY_4_PAD 1476129254Scognet 1477129254Scognet/* 1478129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned 1479129254Scognet */ 1480129254Scognet ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1481129254Scognet ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1482129254Scognet#ifdef __ARMEB__ 1483129254Scognet mov r3, r3, lsl #8 /* r3 = 012. */ 1484129254Scognet orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1485129254Scognet#else 1486129254Scognet mov r3, r3, lsr #8 /* r3 = .210 */ 1487129254Scognet orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1488129254Scognet#endif 1489129254Scognet str r3, [r0] 1490137463Scognet RET 1491129254Scognet LMEMCPY_4_PAD 1492129254Scognet 1493129254Scognet/* 1494129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned 1495129254Scognet */ 1496129254Scognet#ifdef __ARMEB__ 1497129254Scognet ldrh r3, [r1] 1498129254Scognet ldrh r2, [r1, #0x02] 1499129254Scognet#else 1500129254Scognet ldrh r3, [r1, #0x02] 1501129254Scognet ldrh r2, [r1] 1502129254Scognet#endif 1503129254Scognet orr r3, r2, r3, lsl #16 1504129254Scognet str r3, [r0] 1505137463Scognet RET 1506129254Scognet LMEMCPY_4_PAD 1507129254Scognet 1508129254Scognet/* 1509129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned 1510129254Scognet */ 1511129254Scognet ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1512129254Scognet ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1513129254Scognet#ifdef __ARMEB__ 1514129254Scognet mov r3, r3, lsl #24 /* r3 = 0... */ 1515129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1516129254Scognet#else 1517129254Scognet mov r3, r3, lsr #24 /* r3 = ...0 */ 1518129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1519129254Scognet#endif 1520129254Scognet str r3, [r0] 1521137463Scognet RET 1522129254Scognet LMEMCPY_4_PAD 1523129254Scognet 1524129254Scognet/* 1525129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned 1526129254Scognet */ 1527129254Scognet ldr r2, [r1] 1528129254Scognet#ifdef __ARMEB__ 1529129254Scognet strb r2, [r0, #0x03] 1530129254Scognet mov r3, r2, lsr #8 1531129254Scognet mov r1, r2, lsr #24 1532129254Scognet strb r1, [r0] 1533129254Scognet#else 1534129254Scognet strb r2, [r0] 1535129254Scognet mov r3, r2, lsr #8 1536129254Scognet mov r1, r2, lsr #24 1537129254Scognet strb r1, [r0, #0x03] 1538129254Scognet#endif 1539129254Scognet strh r3, [r0, #0x01] 1540137463Scognet RET 1541129254Scognet LMEMCPY_4_PAD 1542129254Scognet 1543129254Scognet/* 1544129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned 1545129254Scognet */ 1546129254Scognet ldrb r2, [r1] 1547129254Scognet ldrh r3, [r1, #0x01] 1548129254Scognet ldrb r1, [r1, #0x03] 1549129254Scognet strb r2, [r0] 1550129254Scognet strh r3, [r0, #0x01] 1551129254Scognet strb r1, [r0, #0x03] 1552137463Scognet RET 1553129254Scognet LMEMCPY_4_PAD 1554129254Scognet 1555129254Scognet/* 1556129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned 1557129254Scognet */ 1558129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1559129254Scognet ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1560129254Scognet#ifdef __ARMEB__ 1561129254Scognet mov r1, r2, lsr #8 /* r1 = ...0 */ 1562129254Scognet strb r1, [r0] 1563129254Scognet mov r2, r2, lsl #8 /* r2 = .01. */ 1564129254Scognet orr r2, r2, r3, lsr #8 /* r2 = .012 */ 1565129254Scognet#else 1566129254Scognet strb r2, [r0] 1567129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 1568129254Scognet orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1569129254Scognet mov r3, r3, lsr #8 /* r3 = ...3 */ 1570129254Scognet#endif 1571129254Scognet strh r2, [r0, #0x01] 1572129254Scognet strb r3, [r0, #0x03] 1573137463Scognet RET 1574129254Scognet LMEMCPY_4_PAD 1575129254Scognet 1576129254Scognet/* 1577129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned 1578129254Scognet */ 1579129254Scognet ldrb r2, [r1] 1580129254Scognet ldrh r3, [r1, #0x01] 1581129254Scognet ldrb r1, [r1, #0x03] 1582129254Scognet strb r2, [r0] 1583129254Scognet strh r3, [r0, #0x01] 1584129254Scognet strb r1, [r0, #0x03] 1585137463Scognet RET 1586129254Scognet LMEMCPY_4_PAD 1587129254Scognet 1588129254Scognet/* 1589129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned 1590129254Scognet */ 1591129254Scognet ldr r2, [r1] 1592129254Scognet#ifdef __ARMEB__ 1593129254Scognet strh r2, [r0, #0x02] 1594129254Scognet mov r3, r2, lsr #16 1595129254Scognet strh r3, [r0] 1596129254Scognet#else 1597129254Scognet strh r2, [r0] 1598129254Scognet mov r3, r2, lsr #16 1599129254Scognet strh r3, [r0, #0x02] 1600129254Scognet#endif 1601137463Scognet RET 1602129254Scognet LMEMCPY_4_PAD 1603129254Scognet 1604129254Scognet/* 1605129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned 1606129254Scognet */ 1607129254Scognet ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1608129254Scognet ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1609129254Scognet mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1610129254Scognet strh r1, [r0] 1611129254Scognet#ifdef __ARMEB__ 1612129254Scognet mov r2, r2, lsl #8 /* r2 = 012. */ 1613129254Scognet orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1614129254Scognet#else 1615129254Scognet mov r2, r2, lsr #24 /* r2 = ...2 */ 1616129254Scognet orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1617129254Scognet#endif 1618129254Scognet strh r2, [r0, #0x02] 1619137463Scognet RET 1620129254Scognet LMEMCPY_4_PAD 1621129254Scognet 1622129254Scognet/* 1623129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned 1624129254Scognet */ 1625129254Scognet ldrh r2, [r1] 1626129254Scognet ldrh r3, [r1, #0x02] 1627129254Scognet strh r2, [r0] 1628129254Scognet strh r3, [r0, #0x02] 1629137463Scognet RET 1630129254Scognet LMEMCPY_4_PAD 1631129254Scognet 1632129254Scognet/* 1633129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned 1634129254Scognet */ 1635129254Scognet ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1636129254Scognet ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1637129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1638129254Scognet strh r1, [r0, #0x02] 1639129254Scognet#ifdef __ARMEB__ 1640129254Scognet mov r3, r3, lsr #24 /* r3 = ...1 */ 1641129254Scognet orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 1642129254Scognet#else 1643129254Scognet mov r3, r3, lsl #8 /* r3 = 321. */ 1644129254Scognet orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1645129254Scognet#endif 1646129254Scognet strh r3, [r0] 1647137463Scognet RET 1648129254Scognet LMEMCPY_4_PAD 1649129254Scognet 1650129254Scognet/* 1651129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned 1652129254Scognet */ 1653129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1654129254Scognet#ifdef __ARMEB__ 1655129254Scognet strb r2, [r0, #0x03] 1656129254Scognet mov r3, r2, lsr #8 1657129254Scognet mov r1, r2, lsr #24 1658129254Scognet strh r3, [r0, #0x01] 1659129254Scognet strb r1, [r0] 1660129254Scognet#else 1661129254Scognet strb r2, [r0] 1662129254Scognet mov r3, r2, lsr #8 1663129254Scognet mov r1, r2, lsr #24 1664129254Scognet strh r3, [r0, #0x01] 1665129254Scognet strb r1, [r0, #0x03] 1666129254Scognet#endif 1667137463Scognet RET 1668129254Scognet LMEMCPY_4_PAD 1669129254Scognet 1670129254Scognet/* 1671129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned 1672129254Scognet */ 1673129254Scognet ldrb r2, [r1] 1674129254Scognet ldrh r3, [r1, #0x01] 1675129254Scognet ldrb r1, [r1, #0x03] 1676129254Scognet strb r2, [r0] 1677129254Scognet strh r3, [r0, #0x01] 1678129254Scognet strb r1, [r0, #0x03] 1679137463Scognet RET 1680129254Scognet LMEMCPY_4_PAD 1681129254Scognet 1682129254Scognet/* 1683129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned 1684129254Scognet */ 1685129254Scognet#ifdef __ARMEB__ 1686129254Scognet ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1687129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1688129254Scognet strb r3, [r0, #0x03] 1689129254Scognet mov r3, r3, lsr #8 /* r3 = ...2 */ 1690129254Scognet orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 1691129254Scognet strh r3, [r0, #0x01] 1692129254Scognet mov r2, r2, lsr #8 /* r2 = ...0 */ 1693129254Scognet strb r2, [r0] 1694129254Scognet#else 1695129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1696129254Scognet ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1697129254Scognet strb r2, [r0] 1698129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 1699129254Scognet orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1700129254Scognet strh r2, [r0, #0x01] 1701129254Scognet mov r3, r3, lsr #8 /* r3 = ...3 */ 1702129254Scognet strb r3, [r0, #0x03] 1703129254Scognet#endif 1704137463Scognet RET 1705129254Scognet LMEMCPY_4_PAD 1706129254Scognet 1707129254Scognet/* 1708129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned 1709129254Scognet */ 1710129254Scognet ldrb r2, [r1] 1711129254Scognet ldrh r3, [r1, #0x01] 1712129254Scognet ldrb r1, [r1, #0x03] 1713129254Scognet strb r2, [r0] 1714129254Scognet strh r3, [r0, #0x01] 1715129254Scognet strb r1, [r0, #0x03] 1716137463Scognet RET 1717129254Scognet LMEMCPY_4_PAD 1718129254Scognet 1719129254Scognet 1720129254Scognet/****************************************************************************** 1721129254Scognet * Special case for 6 byte copies 1722129254Scognet */ 1723129254Scognet#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1724129254Scognet#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1725129254Scognet LMEMCPY_6_PAD 1726129254Scognet.Lmemcpy_6: 1727129254Scognet and r2, r1, #0x03 1728129254Scognet orr r2, r2, r0, lsl #2 1729129254Scognet ands r2, r2, #0x0f 1730129254Scognet sub r3, pc, #0x14 1731129254Scognet addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1732129254Scognet 1733129254Scognet/* 1734129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned 1735129254Scognet */ 1736129254Scognet ldr r2, [r1] 1737129254Scognet ldrh r3, [r1, #0x04] 1738129254Scognet str r2, [r0] 1739129254Scognet strh r3, [r0, #0x04] 1740137463Scognet RET 1741129254Scognet LMEMCPY_6_PAD 1742129254Scognet 1743129254Scognet/* 1744129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned 1745129254Scognet */ 1746129254Scognet ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1747129254Scognet ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1748129254Scognet#ifdef __ARMEB__ 1749129254Scognet mov r2, r2, lsl #8 /* r2 = 012. */ 1750129254Scognet orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1751129254Scognet#else 1752129254Scognet mov r2, r2, lsr #8 /* r2 = .210 */ 1753129254Scognet orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1754129254Scognet#endif 1755129254Scognet mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1756129254Scognet str r2, [r0] 1757129254Scognet strh r3, [r0, #0x04] 1758137463Scognet RET 1759129254Scognet LMEMCPY_6_PAD 1760129254Scognet 1761129254Scognet/* 1762129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned 1763129254Scognet */ 1764129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1765129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1766129254Scognet#ifdef __ARMEB__ 1767129254Scognet mov r1, r3, lsr #16 /* r1 = ..23 */ 1768129254Scognet orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 1769129254Scognet str r1, [r0] 1770129254Scognet strh r3, [r0, #0x04] 1771129254Scognet#else 1772129254Scognet mov r1, r3, lsr #16 /* r1 = ..54 */ 1773129254Scognet orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1774129254Scognet str r2, [r0] 1775129254Scognet strh r1, [r0, #0x04] 1776129254Scognet#endif 1777137463Scognet RET 1778129254Scognet LMEMCPY_6_PAD 1779129254Scognet 1780129254Scognet/* 1781129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned 1782129254Scognet */ 1783129254Scognet ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1784129254Scognet ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1785129254Scognet ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1786129254Scognet#ifdef __ARMEB__ 1787129254Scognet mov r2, r2, lsl #24 /* r2 = 0... */ 1788129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1789129254Scognet mov r3, r3, lsl #8 /* r3 = 234. */ 1790129254Scognet orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 1791129254Scognet#else 1792129254Scognet mov r2, r2, lsr #24 /* r2 = ...0 */ 1793129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1794129254Scognet mov r1, r1, lsl #8 /* r1 = xx5. */ 1795129254Scognet orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1796129254Scognet#endif 1797129254Scognet str r2, [r0] 1798129254Scognet strh r1, [r0, #0x04] 1799137463Scognet RET 1800129254Scognet LMEMCPY_6_PAD 1801129254Scognet 1802129254Scognet/* 1803129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned 1804129254Scognet */ 1805129254Scognet ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1806129254Scognet ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1807129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1808129254Scognet strh r1, [r0, #0x01] 1809129254Scognet#ifdef __ARMEB__ 1810129254Scognet mov r1, r3, lsr #24 /* r1 = ...0 */ 1811129254Scognet strb r1, [r0] 1812129254Scognet mov r3, r3, lsl #8 /* r3 = 123. */ 1813129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 1814129254Scognet#else 1815129254Scognet strb r3, [r0] 1816129254Scognet mov r3, r3, lsr #24 /* r3 = ...3 */ 1817129254Scognet orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1818129254Scognet mov r2, r2, lsr #8 /* r2 = ...5 */ 1819129254Scognet#endif 1820129254Scognet strh r3, [r0, #0x03] 1821129254Scognet strb r2, [r0, #0x05] 1822137463Scognet RET 1823129254Scognet LMEMCPY_6_PAD 1824129254Scognet 1825129254Scognet/* 1826129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned 1827129254Scognet */ 1828129254Scognet ldrb r2, [r1] 1829129254Scognet ldrh r3, [r1, #0x01] 1830129254Scognet ldrh ip, [r1, #0x03] 1831129254Scognet ldrb r1, [r1, #0x05] 1832129254Scognet strb r2, [r0] 1833129254Scognet strh r3, [r0, #0x01] 1834129254Scognet strh ip, [r0, #0x03] 1835129254Scognet strb r1, [r0, #0x05] 1836137463Scognet RET 1837129254Scognet LMEMCPY_6_PAD 1838129254Scognet 1839129254Scognet/* 1840129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned 1841129254Scognet */ 1842129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1843129254Scognet ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1844129254Scognet#ifdef __ARMEB__ 1845129254Scognet mov r3, r2, lsr #8 /* r3 = ...0 */ 1846129254Scognet strb r3, [r0] 1847129254Scognet strb r1, [r0, #0x05] 1848129254Scognet mov r3, r1, lsr #8 /* r3 = .234 */ 1849129254Scognet strh r3, [r0, #0x03] 1850129254Scognet mov r3, r2, lsl #8 /* r3 = .01. */ 1851129254Scognet orr r3, r3, r1, lsr #24 /* r3 = .012 */ 1852129254Scognet strh r3, [r0, #0x01] 1853129254Scognet#else 1854129254Scognet strb r2, [r0] 1855129254Scognet mov r3, r1, lsr #24 1856129254Scognet strb r3, [r0, #0x05] 1857129254Scognet mov r3, r1, lsr #8 /* r3 = .543 */ 1858129254Scognet strh r3, [r0, #0x03] 1859129254Scognet mov r3, r2, lsr #8 /* r3 = ...1 */ 1860129254Scognet orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 1861129254Scognet strh r3, [r0, #0x01] 1862129254Scognet#endif 1863137463Scognet RET 1864129254Scognet LMEMCPY_6_PAD 1865129254Scognet 1866129254Scognet/* 1867129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned 1868129254Scognet */ 1869129254Scognet ldrb r2, [r1] 1870129254Scognet ldrh r3, [r1, #0x01] 1871129254Scognet ldrh ip, [r1, #0x03] 1872129254Scognet ldrb r1, [r1, #0x05] 1873129254Scognet strb r2, [r0] 1874129254Scognet strh r3, [r0, #0x01] 1875129254Scognet strh ip, [r0, #0x03] 1876129254Scognet strb r1, [r0, #0x05] 1877137463Scognet RET 1878129254Scognet LMEMCPY_6_PAD 1879129254Scognet 1880129254Scognet/* 1881129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned 1882129254Scognet */ 1883129254Scognet#ifdef __ARMEB__ 1884129254Scognet ldr r2, [r1] /* r2 = 0123 */ 1885129254Scognet ldrh r3, [r1, #0x04] /* r3 = ..45 */ 1886129254Scognet mov r1, r2, lsr #16 /* r1 = ..01 */ 1887129254Scognet orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 1888129254Scognet strh r1, [r0] 1889129254Scognet str r3, [r0, #0x02] 1890129254Scognet#else 1891129254Scognet ldrh r2, [r1, #0x04] /* r2 = ..54 */ 1892129254Scognet ldr r3, [r1] /* r3 = 3210 */ 1893129254Scognet mov r2, r2, lsl #16 /* r2 = 54.. */ 1894129254Scognet orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 1895129254Scognet strh r3, [r0] 1896129254Scognet str r2, [r0, #0x02] 1897129254Scognet#endif 1898137463Scognet RET 1899129254Scognet LMEMCPY_6_PAD 1900129254Scognet 1901129254Scognet/* 1902129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned 1903129254Scognet */ 1904129254Scognet ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1905129254Scognet ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 1906129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1907129254Scognet#ifdef __ARMEB__ 1908129254Scognet mov r2, r2, lsr #8 /* r2 = .345 */ 1909129254Scognet orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 1910129254Scognet#else 1911129254Scognet mov r2, r2, lsl #8 /* r2 = 543. */ 1912129254Scognet orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 1913129254Scognet#endif 1914129254Scognet strh r1, [r0] 1915129254Scognet str r2, [r0, #0x02] 1916137463Scognet RET 1917129254Scognet LMEMCPY_6_PAD 1918129254Scognet 1919129254Scognet/* 1920129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned 1921129254Scognet */ 1922129254Scognet ldrh r2, [r1] 1923129254Scognet ldr r3, [r1, #0x02] 1924129254Scognet strh r2, [r0] 1925129254Scognet str r3, [r0, #0x02] 1926137463Scognet RET 1927129254Scognet LMEMCPY_6_PAD 1928129254Scognet 1929129254Scognet/* 1930129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned 1931129254Scognet */ 1932129254Scognet ldrb r3, [r1] /* r3 = ...0 */ 1933129254Scognet ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1934129254Scognet ldrb r1, [r1, #0x05] /* r1 = ...5 */ 1935129254Scognet#ifdef __ARMEB__ 1936129254Scognet mov r3, r3, lsl #8 /* r3 = ..0. */ 1937129254Scognet orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 1938129254Scognet orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 1939129254Scognet#else 1940129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1941129254Scognet mov r1, r1, lsl #24 /* r1 = 5... */ 1942129254Scognet orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 1943129254Scognet#endif 1944129254Scognet strh r3, [r0] 1945129254Scognet str r1, [r0, #0x02] 1946137463Scognet RET 1947129254Scognet LMEMCPY_6_PAD 1948129254Scognet 1949129254Scognet/* 1950129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned 1951129254Scognet */ 1952129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1953129254Scognet ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 1954129254Scognet#ifdef __ARMEB__ 1955129254Scognet mov r3, r2, lsr #24 /* r3 = ...0 */ 1956129254Scognet strb r3, [r0] 1957129254Scognet mov r2, r2, lsl #8 /* r2 = 123. */ 1958129254Scognet orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 1959129254Scognet#else 1960129254Scognet strb r2, [r0] 1961129254Scognet mov r2, r2, lsr #8 /* r2 = .321 */ 1962129254Scognet orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 1963129254Scognet mov r1, r1, lsr #8 /* r1 = ...5 */ 1964129254Scognet#endif 1965129254Scognet str r2, [r0, #0x01] 1966129254Scognet strb r1, [r0, #0x05] 1967137463Scognet RET 1968129254Scognet LMEMCPY_6_PAD 1969129254Scognet 1970129254Scognet/* 1971129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned 1972129254Scognet */ 1973129254Scognet ldrb r2, [r1] 1974129254Scognet ldrh r3, [r1, #0x01] 1975129254Scognet ldrh ip, [r1, #0x03] 1976129254Scognet ldrb r1, [r1, #0x05] 1977129254Scognet strb r2, [r0] 1978129254Scognet strh r3, [r0, #0x01] 1979129254Scognet strh ip, [r0, #0x03] 1980129254Scognet strb r1, [r0, #0x05] 1981137463Scognet RET 1982129254Scognet LMEMCPY_6_PAD 1983129254Scognet 1984129254Scognet/* 1985129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned 1986129254Scognet */ 1987129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1988129254Scognet ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1989129254Scognet#ifdef __ARMEB__ 1990129254Scognet mov r3, r2, lsr #8 /* r3 = ...0 */ 1991129254Scognet strb r3, [r0] 1992129254Scognet mov r2, r2, lsl #24 /* r2 = 1... */ 1993129254Scognet orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 1994129254Scognet#else 1995129254Scognet strb r2, [r0] 1996129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 1997129254Scognet orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 1998129254Scognet mov r1, r1, lsr #24 /* r1 = ...5 */ 1999129254Scognet#endif 2000129254Scognet str r2, [r0, #0x01] 2001129254Scognet strb r1, [r0, #0x05] 2002137463Scognet RET 2003129254Scognet LMEMCPY_6_PAD 2004129254Scognet 2005129254Scognet/* 2006129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned 2007129254Scognet */ 2008129254Scognet ldrb r2, [r1] 2009129254Scognet ldr r3, [r1, #0x01] 2010129254Scognet ldrb r1, [r1, #0x05] 2011129254Scognet strb r2, [r0] 2012129254Scognet str r3, [r0, #0x01] 2013129254Scognet strb r1, [r0, #0x05] 2014137463Scognet RET 2015129254Scognet LMEMCPY_6_PAD 2016129254Scognet 2017129254Scognet 2018129254Scognet/****************************************************************************** 2019129254Scognet * Special case for 8 byte copies 2020129254Scognet */ 2021129254Scognet#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 2022129254Scognet#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 2023129254Scognet LMEMCPY_8_PAD 2024129254Scognet.Lmemcpy_8: 2025129254Scognet and r2, r1, #0x03 2026129254Scognet orr r2, r2, r0, lsl #2 2027129254Scognet ands r2, r2, #0x0f 2028129254Scognet sub r3, pc, #0x14 2029129254Scognet addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 2030129254Scognet 2031129254Scognet/* 2032129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned 2033129254Scognet */ 2034129254Scognet ldr r2, [r1] 2035129254Scognet ldr r3, [r1, #0x04] 2036129254Scognet str r2, [r0] 2037129254Scognet str r3, [r0, #0x04] 2038137463Scognet RET 2039129254Scognet LMEMCPY_8_PAD 2040129254Scognet 2041129254Scognet/* 2042129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned 2043129254Scognet */ 2044129254Scognet ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2045129254Scognet ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 2046129254Scognet ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2047129254Scognet#ifdef __ARMEB__ 2048129254Scognet mov r3, r3, lsl #8 /* r3 = 012. */ 2049129254Scognet orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 2050129254Scognet orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 2051129254Scognet#else 2052129254Scognet mov r3, r3, lsr #8 /* r3 = .210 */ 2053129254Scognet orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 2054129254Scognet mov r1, r1, lsl #24 /* r1 = 7... */ 2055129254Scognet orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 2056129254Scognet#endif 2057129254Scognet str r3, [r0] 2058129254Scognet str r2, [r0, #0x04] 2059137463Scognet RET 2060129254Scognet LMEMCPY_8_PAD 2061129254Scognet 2062129254Scognet/* 2063129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned 2064129254Scognet */ 2065129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2066129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2067129254Scognet ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2068129254Scognet#ifdef __ARMEB__ 2069129254Scognet mov r2, r2, lsl #16 /* r2 = 01.. */ 2070129254Scognet orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2071129254Scognet orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 2072129254Scognet#else 2073129254Scognet orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2074129254Scognet mov r3, r3, lsr #16 /* r3 = ..54 */ 2075129254Scognet orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 2076129254Scognet#endif 2077129254Scognet str r2, [r0] 2078129254Scognet str r3, [r0, #0x04] 2079137463Scognet RET 2080129254Scognet LMEMCPY_8_PAD 2081129254Scognet 2082129254Scognet/* 2083129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned 2084129254Scognet */ 2085129254Scognet ldrb r3, [r1] /* r3 = ...0 */ 2086129254Scognet ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2087129254Scognet ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 2088129254Scognet#ifdef __ARMEB__ 2089129254Scognet mov r3, r3, lsl #24 /* r3 = 0... */ 2090129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 2091129254Scognet mov r2, r2, lsl #24 /* r2 = 4... */ 2092129254Scognet orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 2093129254Scognet#else 2094129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2095129254Scognet mov r2, r2, lsr #24 /* r2 = ...4 */ 2096129254Scognet orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 2097129254Scognet#endif 2098129254Scognet str r3, [r0] 2099129254Scognet str r2, [r0, #0x04] 2100137463Scognet RET 2101129254Scognet LMEMCPY_8_PAD 2102129254Scognet 2103129254Scognet/* 2104129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned 2105129254Scognet */ 2106129254Scognet ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 2107129254Scognet ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 2108129254Scognet#ifdef __ARMEB__ 2109129254Scognet mov r1, r3, lsr #24 /* r1 = ...0 */ 2110129254Scognet strb r1, [r0] 2111129254Scognet mov r1, r3, lsr #8 /* r1 = .012 */ 2112129254Scognet strb r2, [r0, #0x07] 2113129254Scognet mov r3, r3, lsl #24 /* r3 = 3... */ 2114129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 2115129254Scognet#else 2116129254Scognet strb r3, [r0] 2117129254Scognet mov r1, r2, lsr #24 /* r1 = ...7 */ 2118129254Scognet strb r1, [r0, #0x07] 2119129254Scognet mov r1, r3, lsr #8 /* r1 = .321 */ 2120129254Scognet mov r3, r3, lsr #24 /* r3 = ...3 */ 2121129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 2122129254Scognet#endif 2123129254Scognet strh r1, [r0, #0x01] 2124129254Scognet str r3, [r0, #0x03] 2125137463Scognet RET 2126129254Scognet LMEMCPY_8_PAD 2127129254Scognet 2128129254Scognet/* 2129129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned 2130129254Scognet */ 2131129254Scognet ldrb r2, [r1] 2132129254Scognet ldrh r3, [r1, #0x01] 2133129254Scognet ldr ip, [r1, #0x03] 2134129254Scognet ldrb r1, [r1, #0x07] 2135129254Scognet strb r2, [r0] 2136129254Scognet strh r3, [r0, #0x01] 2137129254Scognet str ip, [r0, #0x03] 2138129254Scognet strb r1, [r0, #0x07] 2139137463Scognet RET 2140129254Scognet LMEMCPY_8_PAD 2141129254Scognet 2142129254Scognet/* 2143129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned 2144129254Scognet */ 2145129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2146129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2147129254Scognet ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2148129254Scognet#ifdef __ARMEB__ 2149129254Scognet mov ip, r2, lsr #8 /* ip = ...0 */ 2150129254Scognet strb ip, [r0] 2151129254Scognet mov ip, r2, lsl #8 /* ip = .01. */ 2152129254Scognet orr ip, ip, r3, lsr #24 /* ip = .012 */ 2153129254Scognet strb r1, [r0, #0x07] 2154129254Scognet mov r3, r3, lsl #8 /* r3 = 345. */ 2155129254Scognet orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 2156129254Scognet#else 2157129254Scognet strb r2, [r0] /* 0 */ 2158129254Scognet mov ip, r1, lsr #8 /* ip = ...7 */ 2159129254Scognet strb ip, [r0, #0x07] /* 7 */ 2160129254Scognet mov ip, r2, lsr #8 /* ip = ...1 */ 2161129254Scognet orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2162129254Scognet mov r3, r3, lsr #8 /* r3 = .543 */ 2163129254Scognet orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 2164129254Scognet#endif 2165129254Scognet strh ip, [r0, #0x01] 2166129254Scognet str r3, [r0, #0x03] 2167137463Scognet RET 2168129254Scognet LMEMCPY_8_PAD 2169129254Scognet 2170129254Scognet/* 2171129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned 2172129254Scognet */ 2173129254Scognet ldrb r3, [r1] /* r3 = ...0 */ 2174129254Scognet ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2175129254Scognet ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 2176129254Scognet ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2177129254Scognet strb r3, [r0] 2178129254Scognet mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 2179129254Scognet#ifdef __ARMEB__ 2180129254Scognet strh r3, [r0, #0x01] 2181129254Scognet orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 2182129254Scognet#else 2183129254Scognet strh ip, [r0, #0x01] 2184129254Scognet orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 2185129254Scognet#endif 2186129254Scognet str r2, [r0, #0x03] 2187129254Scognet strb r1, [r0, #0x07] 2188137463Scognet RET 2189129254Scognet LMEMCPY_8_PAD 2190129254Scognet 2191129254Scognet/* 2192129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned 2193129254Scognet */ 2194129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2195129254Scognet ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2196129254Scognet mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2197129254Scognet#ifdef __ARMEB__ 2198129254Scognet strh r1, [r0] 2199129254Scognet mov r1, r3, lsr #16 /* r1 = ..45 */ 2200129254Scognet orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 2201129254Scognet#else 2202129254Scognet strh r2, [r0] 2203129254Scognet orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 2204129254Scognet mov r3, r3, lsr #16 /* r3 = ..76 */ 2205129254Scognet#endif 2206129254Scognet str r2, [r0, #0x02] 2207129254Scognet strh r3, [r0, #0x06] 2208137463Scognet RET 2209129254Scognet LMEMCPY_8_PAD 2210129254Scognet 2211129254Scognet/* 2212129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned 2213129254Scognet */ 2214129254Scognet ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2215129254Scognet ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2216129254Scognet ldrb ip, [r1, #0x07] /* ip = ...7 */ 2217129254Scognet mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2218129254Scognet strh r1, [r0] 2219129254Scognet#ifdef __ARMEB__ 2220129254Scognet mov r1, r2, lsl #24 /* r1 = 2... */ 2221129254Scognet orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 2222129254Scognet orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 2223129254Scognet#else 2224129254Scognet mov r1, r2, lsr #24 /* r1 = ...2 */ 2225129254Scognet orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 2226129254Scognet mov r3, r3, lsr #24 /* r3 = ...6 */ 2227129254Scognet orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 2228129254Scognet#endif 2229129254Scognet str r1, [r0, #0x02] 2230129254Scognet strh r3, [r0, #0x06] 2231137463Scognet RET 2232129254Scognet LMEMCPY_8_PAD 2233129254Scognet 2234129254Scognet/* 2235129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned 2236129254Scognet */ 2237129254Scognet ldrh r2, [r1] 2238129254Scognet ldr ip, [r1, #0x02] 2239129254Scognet ldrh r3, [r1, #0x06] 2240129254Scognet strh r2, [r0] 2241129254Scognet str ip, [r0, #0x02] 2242129254Scognet strh r3, [r0, #0x06] 2243137463Scognet RET 2244129254Scognet LMEMCPY_8_PAD 2245129254Scognet 2246129254Scognet/* 2247129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned 2248129254Scognet */ 2249129254Scognet ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 2250129254Scognet ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2251129254Scognet ldrb ip, [r1] /* ip = ...0 */ 2252129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 2253129254Scognet strh r1, [r0, #0x06] 2254129254Scognet#ifdef __ARMEB__ 2255129254Scognet mov r3, r3, lsr #24 /* r3 = ...5 */ 2256129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 2257129254Scognet mov r2, r2, lsr #24 /* r2 = ...1 */ 2258129254Scognet orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 2259129254Scognet#else 2260129254Scognet mov r3, r3, lsl #24 /* r3 = 5... */ 2261129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 2262129254Scognet orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 2263129254Scognet#endif 2264129254Scognet str r3, [r0, #0x02] 2265129254Scognet strh r2, [r0] 2266137463Scognet RET 2267129254Scognet LMEMCPY_8_PAD 2268129254Scognet 2269129254Scognet/* 2270129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned 2271129254Scognet */ 2272129254Scognet ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2273129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2274129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 2275129254Scognet strh r1, [r0, #0x05] 2276129254Scognet#ifdef __ARMEB__ 2277129254Scognet strb r3, [r0, #0x07] 2278129254Scognet mov r1, r2, lsr #24 /* r1 = ...0 */ 2279129254Scognet strb r1, [r0] 2280129254Scognet mov r2, r2, lsl #8 /* r2 = 123. */ 2281129254Scognet orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 2282129254Scognet str r2, [r0, #0x01] 2283129254Scognet#else 2284129254Scognet strb r2, [r0] 2285129254Scognet mov r1, r3, lsr #24 /* r1 = ...7 */ 2286129254Scognet strb r1, [r0, #0x07] 2287129254Scognet mov r2, r2, lsr #8 /* r2 = .321 */ 2288129254Scognet orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 2289129254Scognet str r2, [r0, #0x01] 2290129254Scognet#endif 2291137463Scognet RET 2292129254Scognet LMEMCPY_8_PAD 2293129254Scognet 2294129254Scognet/* 2295129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned 2296129254Scognet */ 2297129254Scognet ldrb r3, [r1] /* r3 = ...0 */ 2298129254Scognet ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 2299129254Scognet ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2300129254Scognet ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2301129254Scognet strb r3, [r0] 2302129254Scognet mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 2303129254Scognet#ifdef __ARMEB__ 2304129254Scognet strh ip, [r0, #0x05] 2305129254Scognet orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 2306129254Scognet#else 2307129254Scognet strh r3, [r0, #0x05] 2308129254Scognet orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 2309129254Scognet#endif 2310129254Scognet str r2, [r0, #0x01] 2311129254Scognet strb r1, [r0, #0x07] 2312137463Scognet RET 2313129254Scognet LMEMCPY_8_PAD 2314129254Scognet 2315129254Scognet/* 2316129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned 2317129254Scognet */ 2318129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2319129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2320129254Scognet ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2321129254Scognet#ifdef __ARMEB__ 2322129254Scognet mov ip, r2, lsr #8 /* ip = ...0 */ 2323129254Scognet strb ip, [r0] 2324129254Scognet mov ip, r2, lsl #24 /* ip = 1... */ 2325129254Scognet orr ip, ip, r3, lsr #8 /* ip = 1234 */ 2326129254Scognet strb r1, [r0, #0x07] 2327129254Scognet mov r1, r1, lsr #8 /* r1 = ...6 */ 2328129254Scognet orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 2329129254Scognet#else 2330129254Scognet strb r2, [r0] 2331129254Scognet mov ip, r2, lsr #8 /* ip = ...1 */ 2332129254Scognet orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2333129254Scognet mov r2, r1, lsr #8 /* r2 = ...7 */ 2334129254Scognet strb r2, [r0, #0x07] 2335129254Scognet mov r1, r1, lsl #8 /* r1 = .76. */ 2336129254Scognet orr r1, r1, r3, lsr #24 /* r1 = .765 */ 2337129254Scognet#endif 2338129254Scognet str ip, [r0, #0x01] 2339129254Scognet strh r1, [r0, #0x05] 2340137463Scognet RET 2341129254Scognet LMEMCPY_8_PAD 2342129254Scognet 2343129254Scognet/* 2344129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned 2345129254Scognet */ 2346129254Scognet ldrb r2, [r1] 2347129254Scognet ldr ip, [r1, #0x01] 2348129254Scognet ldrh r3, [r1, #0x05] 2349129254Scognet ldrb r1, [r1, #0x07] 2350129254Scognet strb r2, [r0] 2351129254Scognet str ip, [r0, #0x01] 2352129254Scognet strh r3, [r0, #0x05] 2353129254Scognet strb r1, [r0, #0x07] 2354137463Scognet RET 2355129254Scognet LMEMCPY_8_PAD 2356129254Scognet 2357129254Scognet/****************************************************************************** 2358129254Scognet * Special case for 12 byte copies 2359129254Scognet */ 2360129254Scognet#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 2361129254Scognet#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 2362129254Scognet LMEMCPY_C_PAD 2363129254Scognet.Lmemcpy_c: 2364129254Scognet and r2, r1, #0x03 2365129254Scognet orr r2, r2, r0, lsl #2 2366129254Scognet ands r2, r2, #0x0f 2367129254Scognet sub r3, pc, #0x14 2368129254Scognet addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 2369129254Scognet 2370129254Scognet/* 2371129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned 2372129254Scognet */ 2373129254Scognet ldr r2, [r1] 2374129254Scognet ldr r3, [r1, #0x04] 2375129254Scognet ldr r1, [r1, #0x08] 2376129254Scognet str r2, [r0] 2377129254Scognet str r3, [r0, #0x04] 2378129254Scognet str r1, [r0, #0x08] 2379137463Scognet RET 2380129254Scognet LMEMCPY_C_PAD 2381129254Scognet 2382129254Scognet/* 2383129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned 2384129254Scognet */ 2385129254Scognet ldrb r2, [r1, #0xb] /* r2 = ...B */ 2386129254Scognet ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2387129254Scognet ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2388129254Scognet ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2389129254Scognet#ifdef __ARMEB__ 2390129254Scognet orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 2391129254Scognet str r2, [r0, #0x08] 2392129254Scognet mov r2, ip, lsr #24 /* r2 = ...7 */ 2393129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 2394129254Scognet mov r1, r1, lsl #8 /* r1 = 012. */ 2395129254Scognet orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 2396129254Scognet#else 2397129254Scognet mov r2, r2, lsl #24 /* r2 = B... */ 2398129254Scognet orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 2399129254Scognet str r2, [r0, #0x08] 2400129254Scognet mov r2, ip, lsl #24 /* r2 = 7... */ 2401129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 2402129254Scognet mov r1, r1, lsr #8 /* r1 = .210 */ 2403129254Scognet orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 2404129254Scognet#endif 2405129254Scognet str r2, [r0, #0x04] 2406129254Scognet str r1, [r0] 2407137463Scognet RET 2408129254Scognet LMEMCPY_C_PAD 2409129254Scognet 2410129254Scognet/* 2411129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned 2412129254Scognet */ 2413129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2414129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2415129254Scognet ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2416129254Scognet ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2417129254Scognet#ifdef __ARMEB__ 2418129254Scognet mov r2, r2, lsl #16 /* r2 = 01.. */ 2419129254Scognet orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2420129254Scognet str r2, [r0] 2421129254Scognet mov r3, r3, lsl #16 /* r3 = 45.. */ 2422129254Scognet orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 2423129254Scognet orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 2424129254Scognet#else 2425129254Scognet orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2426129254Scognet str r2, [r0] 2427129254Scognet mov r3, r3, lsr #16 /* r3 = ..54 */ 2428129254Scognet orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 2429129254Scognet mov r1, r1, lsl #16 /* r1 = BA.. */ 2430129254Scognet orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 2431129254Scognet#endif 2432129254Scognet str r3, [r0, #0x04] 2433129254Scognet str r1, [r0, #0x08] 2434137463Scognet RET 2435129254Scognet LMEMCPY_C_PAD 2436129254Scognet 2437129254Scognet/* 2438129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned 2439129254Scognet */ 2440129254Scognet ldrb r2, [r1] /* r2 = ...0 */ 2441129254Scognet ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2442129254Scognet ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2443129254Scognet ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2444129254Scognet#ifdef __ARMEB__ 2445129254Scognet mov r2, r2, lsl #24 /* r2 = 0... */ 2446129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 2447129254Scognet str r2, [r0] 2448129254Scognet mov r3, r3, lsl #24 /* r3 = 4... */ 2449129254Scognet orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 2450129254Scognet mov r1, r1, lsr #8 /* r1 = .9AB */ 2451129254Scognet orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 2452129254Scognet#else 2453129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 2454129254Scognet str r2, [r0] 2455129254Scognet mov r3, r3, lsr #24 /* r3 = ...4 */ 2456129254Scognet orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 2457129254Scognet mov r1, r1, lsl #8 /* r1 = BA9. */ 2458129254Scognet orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 2459129254Scognet#endif 2460129254Scognet str r3, [r0, #0x04] 2461129254Scognet str r1, [r0, #0x08] 2462137463Scognet RET 2463129254Scognet LMEMCPY_C_PAD 2464129254Scognet 2465129254Scognet/* 2466129254Scognet * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 2467129254Scognet */ 2468129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2469129254Scognet ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2470129254Scognet ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 2471129254Scognet mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 2472129254Scognet strh r1, [r0, #0x01] 2473129254Scognet#ifdef __ARMEB__ 2474129254Scognet mov r1, r2, lsr #24 /* r1 = ...0 */ 2475129254Scognet strb r1, [r0] 2476129254Scognet mov r1, r2, lsl #24 /* r1 = 3... */ 2477129254Scognet orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 2478129254Scognet mov r1, r3, lsl #24 /* r1 = 7... */ 2479129254Scognet orr r1, r1, ip, lsr #8 /* r1 = 789A */ 2480129254Scognet#else 2481129254Scognet strb r2, [r0] 2482129254Scognet mov r1, r2, lsr #24 /* r1 = ...3 */ 2483129254Scognet orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 2484129254Scognet mov r1, r3, lsr #24 /* r1 = ...7 */ 2485129254Scognet orr r1, r1, ip, lsl #8 /* r1 = A987 */ 2486129254Scognet mov ip, ip, lsr #24 /* ip = ...B */ 2487129254Scognet#endif 2488129254Scognet str r2, [r0, #0x03] 2489129254Scognet str r1, [r0, #0x07] 2490129254Scognet strb ip, [r0, #0x0b] 2491137463Scognet RET 2492129254Scognet LMEMCPY_C_PAD 2493129254Scognet 2494129254Scognet/* 2495129254Scognet * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 2496129254Scognet */ 2497129254Scognet ldrb r2, [r1] 2498129254Scognet ldrh r3, [r1, #0x01] 2499129254Scognet ldr ip, [r1, #0x03] 2500129254Scognet strb r2, [r0] 2501129254Scognet ldr r2, [r1, #0x07] 2502129254Scognet ldrb r1, [r1, #0x0b] 2503129254Scognet strh r3, [r0, #0x01] 2504129254Scognet str ip, [r0, #0x03] 2505129254Scognet str r2, [r0, #0x07] 2506129254Scognet strb r1, [r0, #0x0b] 2507137463Scognet RET 2508129254Scognet LMEMCPY_C_PAD 2509129254Scognet 2510129254Scognet/* 2511129254Scognet * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 2512129254Scognet */ 2513129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2514129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2515129254Scognet ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2516129254Scognet ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2517129254Scognet#ifdef __ARMEB__ 2518129254Scognet mov r2, r2, ror #8 /* r2 = 1..0 */ 2519129254Scognet strb r2, [r0] 2520129254Scognet mov r2, r2, lsr #16 /* r2 = ..1. */ 2521129254Scognet orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 2522129254Scognet strh r2, [r0, #0x01] 2523129254Scognet mov r2, r3, lsl #8 /* r2 = 345. */ 2524129254Scognet orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 2525129254Scognet mov r2, ip, lsl #8 /* r2 = 789. */ 2526129254Scognet orr r2, r2, r1, lsr #8 /* r2 = 789A */ 2527129254Scognet#else 2528129254Scognet strb r2, [r0] 2529129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 2530129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2531129254Scognet strh r2, [r0, #0x01] 2532129254Scognet mov r2, r3, lsr #8 /* r2 = .543 */ 2533129254Scognet orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 2534129254Scognet mov r2, ip, lsr #8 /* r2 = .987 */ 2535129254Scognet orr r2, r2, r1, lsl #24 /* r2 = A987 */ 2536129254Scognet mov r1, r1, lsr #8 /* r1 = ...B */ 2537129254Scognet#endif 2538129254Scognet str r3, [r0, #0x03] 2539129254Scognet str r2, [r0, #0x07] 2540129254Scognet strb r1, [r0, #0x0b] 2541137463Scognet RET 2542129254Scognet LMEMCPY_C_PAD 2543129254Scognet 2544129254Scognet/* 2545129254Scognet * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 2546129254Scognet */ 2547129254Scognet ldrb r2, [r1] 2548129254Scognet ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2549129254Scognet ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2550129254Scognet ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2551129254Scognet strb r2, [r0] 2552129254Scognet#ifdef __ARMEB__ 2553129254Scognet mov r2, r3, lsr #16 /* r2 = ..12 */ 2554129254Scognet strh r2, [r0, #0x01] 2555129254Scognet mov r3, r3, lsl #16 /* r3 = 34.. */ 2556129254Scognet orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 2557129254Scognet mov ip, ip, lsl #16 /* ip = 78.. */ 2558129254Scognet orr ip, ip, r1, lsr #16 /* ip = 789A */ 2559129254Scognet mov r1, r1, lsr #8 /* r1 = .9AB */ 2560129254Scognet#else 2561129254Scognet strh r3, [r0, #0x01] 2562129254Scognet mov r3, r3, lsr #16 /* r3 = ..43 */ 2563129254Scognet orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 2564129254Scognet mov ip, ip, lsr #16 /* ip = ..87 */ 2565129254Scognet orr ip, ip, r1, lsl #16 /* ip = A987 */ 2566129254Scognet mov r1, r1, lsr #16 /* r1 = ..xB */ 2567129254Scognet#endif 2568129254Scognet str r3, [r0, #0x03] 2569129254Scognet str ip, [r0, #0x07] 2570129254Scognet strb r1, [r0, #0x0b] 2571137463Scognet RET 2572129254Scognet LMEMCPY_C_PAD 2573129254Scognet 2574129254Scognet/* 2575129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned 2576129254Scognet */ 2577129254Scognet ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 2578129254Scognet ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2579129254Scognet ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 2580129254Scognet mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2581129254Scognet#ifdef __ARMEB__ 2582129254Scognet strh r1, [r0] 2583129254Scognet mov r1, ip, lsl #16 /* r1 = 23.. */ 2584129254Scognet orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 2585129254Scognet mov r3, r3, lsl #16 /* r3 = 67.. */ 2586129254Scognet orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 2587129254Scognet#else 2588129254Scognet strh ip, [r0] 2589129254Scognet orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 2590129254Scognet mov r3, r3, lsr #16 /* r3 = ..76 */ 2591129254Scognet orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 2592129254Scognet mov r2, r2, lsr #16 /* r2 = ..BA */ 2593129254Scognet#endif 2594129254Scognet str r1, [r0, #0x02] 2595129254Scognet str r3, [r0, #0x06] 2596129254Scognet strh r2, [r0, #0x0a] 2597137463Scognet RET 2598129254Scognet LMEMCPY_C_PAD 2599129254Scognet 2600129254Scognet/* 2601129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 2602129254Scognet */ 2603129254Scognet ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2604129254Scognet ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2605129254Scognet mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 2606129254Scognet strh ip, [r0] 2607129254Scognet ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2608129254Scognet ldrb r1, [r1, #0x0b] /* r1 = ...B */ 2609129254Scognet#ifdef __ARMEB__ 2610129254Scognet mov r2, r2, lsl #24 /* r2 = 2... */ 2611129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 2612129254Scognet mov r3, r3, lsl #24 /* r3 = 6... */ 2613129254Scognet orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 2614129254Scognet orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 2615129254Scognet#else 2616129254Scognet mov r2, r2, lsr #24 /* r2 = ...2 */ 2617129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 2618129254Scognet mov r3, r3, lsr #24 /* r3 = ...6 */ 2619129254Scognet orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 2620129254Scognet mov r1, r1, lsl #8 /* r1 = ..B. */ 2621129254Scognet orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 2622129254Scognet#endif 2623129254Scognet str r2, [r0, #0x02] 2624129254Scognet str r3, [r0, #0x06] 2625129254Scognet strh r1, [r0, #0x0a] 2626137463Scognet RET 2627129254Scognet LMEMCPY_C_PAD 2628129254Scognet 2629129254Scognet/* 2630129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned 2631129254Scognet */ 2632129254Scognet ldrh r2, [r1] 2633129254Scognet ldr r3, [r1, #0x02] 2634129254Scognet ldr ip, [r1, #0x06] 2635129254Scognet ldrh r1, [r1, #0x0a] 2636129254Scognet strh r2, [r0] 2637129254Scognet str r3, [r0, #0x02] 2638129254Scognet str ip, [r0, #0x06] 2639129254Scognet strh r1, [r0, #0x0a] 2640137463Scognet RET 2641129254Scognet LMEMCPY_C_PAD 2642129254Scognet 2643129254Scognet/* 2644129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 2645129254Scognet */ 2646129254Scognet ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 2647129254Scognet ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 2648129254Scognet mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 2649129254Scognet strh ip, [r0, #0x0a] 2650129254Scognet ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2651129254Scognet ldrb r1, [r1] /* r1 = ...0 */ 2652129254Scognet#ifdef __ARMEB__ 2653129254Scognet mov r2, r2, lsr #24 /* r2 = ...9 */ 2654129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 2655129254Scognet mov r3, r3, lsr #24 /* r3 = ...5 */ 2656129254Scognet orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 2657129254Scognet mov r1, r1, lsl #8 /* r1 = ..0. */ 2658129254Scognet orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 2659129254Scognet#else 2660129254Scognet mov r2, r2, lsl #24 /* r2 = 9... */ 2661129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 2662129254Scognet mov r3, r3, lsl #24 /* r3 = 5... */ 2663129254Scognet orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 2664129254Scognet orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 2665129254Scognet#endif 2666129254Scognet str r2, [r0, #0x06] 2667129254Scognet str r3, [r0, #0x02] 2668129254Scognet strh r1, [r0] 2669137463Scognet RET 2670129254Scognet LMEMCPY_C_PAD 2671129254Scognet 2672129254Scognet/* 2673129254Scognet * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 2674129254Scognet */ 2675129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2676129254Scognet ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 2677129254Scognet ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 2678129254Scognet#ifdef __ARMEB__ 2679129254Scognet mov r3, r2, lsr #24 /* r3 = ...0 */ 2680129254Scognet strb r3, [r0] 2681129254Scognet mov r2, r2, lsl #8 /* r2 = 123. */ 2682129254Scognet orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 2683129254Scognet str r2, [r0, #0x01] 2684129254Scognet mov r2, ip, lsl #8 /* r2 = 567. */ 2685129254Scognet orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 2686129254Scognet str r2, [r0, #0x05] 2687129254Scognet mov r2, r1, lsr #8 /* r2 = ..9A */ 2688129254Scognet strh r2, [r0, #0x09] 2689129254Scognet strb r1, [r0, #0x0b] 2690129254Scognet#else 2691129254Scognet strb r2, [r0] 2692129254Scognet mov r3, r2, lsr #8 /* r3 = .321 */ 2693129254Scognet orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 2694129254Scognet str r3, [r0, #0x01] 2695129254Scognet mov r3, ip, lsr #8 /* r3 = .765 */ 2696129254Scognet orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 2697129254Scognet str r3, [r0, #0x05] 2698129254Scognet mov r1, r1, lsr #8 /* r1 = .BA9 */ 2699129254Scognet strh r1, [r0, #0x09] 2700129254Scognet mov r1, r1, lsr #16 /* r1 = ...B */ 2701129254Scognet strb r1, [r0, #0x0b] 2702129254Scognet#endif 2703137463Scognet RET 2704129254Scognet LMEMCPY_C_PAD 2705129254Scognet 2706129254Scognet/* 2707129254Scognet * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2708129254Scognet */ 2709129254Scognet ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2710129254Scognet ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2711129254Scognet ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2712129254Scognet ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2713129254Scognet strb r2, [r0, #0x0b] 2714129254Scognet#ifdef __ARMEB__ 2715129254Scognet strh r3, [r0, #0x09] 2716129254Scognet mov r3, r3, lsr #16 /* r3 = ..78 */ 2717129254Scognet orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 2718129254Scognet mov ip, ip, lsr #16 /* ip = ..34 */ 2719129254Scognet orr ip, ip, r1, lsl #16 /* ip = 1234 */ 2720129254Scognet mov r1, r1, lsr #16 /* r1 = ..x0 */ 2721129254Scognet#else 2722129254Scognet mov r2, r3, lsr #16 /* r2 = ..A9 */ 2723129254Scognet strh r2, [r0, #0x09] 2724129254Scognet mov r3, r3, lsl #16 /* r3 = 87.. */ 2725129254Scognet orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2726129254Scognet mov ip, ip, lsl #16 /* ip = 43.. */ 2727129254Scognet orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2728129254Scognet mov r1, r1, lsr #8 /* r1 = .210 */ 2729129254Scognet#endif 2730129254Scognet str r3, [r0, #0x05] 2731129254Scognet str ip, [r0, #0x01] 2732129254Scognet strb r1, [r0] 2733137463Scognet RET 2734129254Scognet LMEMCPY_C_PAD 2735129254Scognet 2736129254Scognet/* 2737129254Scognet * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2738129254Scognet */ 2739129254Scognet#ifdef __ARMEB__ 2740129254Scognet ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 2741129254Scognet ldr ip, [r1, #0x06] /* ip = 6789 */ 2742129254Scognet ldr r3, [r1, #0x02] /* r3 = 2345 */ 2743129254Scognet ldrh r1, [r1] /* r1 = ..01 */ 2744129254Scognet strb r2, [r0, #0x0b] 2745129254Scognet mov r2, r2, lsr #8 /* r2 = ...A */ 2746129254Scognet orr r2, r2, ip, lsl #8 /* r2 = 789A */ 2747129254Scognet mov ip, ip, lsr #8 /* ip = .678 */ 2748129254Scognet orr ip, ip, r3, lsl #24 /* ip = 5678 */ 2749129254Scognet mov r3, r3, lsr #8 /* r3 = .234 */ 2750129254Scognet orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 2751129254Scognet mov r1, r1, lsr #8 /* r1 = ...0 */ 2752129254Scognet strb r1, [r0] 2753129254Scognet str r3, [r0, #0x01] 2754129254Scognet str ip, [r0, #0x05] 2755129254Scognet strh r2, [r0, #0x09] 2756129254Scognet#else 2757129254Scognet ldrh r2, [r1] /* r2 = ..10 */ 2758129254Scognet ldr r3, [r1, #0x02] /* r3 = 5432 */ 2759129254Scognet ldr ip, [r1, #0x06] /* ip = 9876 */ 2760129254Scognet ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2761129254Scognet strb r2, [r0] 2762129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 2763129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2764129254Scognet mov r3, r3, lsr #24 /* r3 = ...5 */ 2765129254Scognet orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2766129254Scognet mov ip, ip, lsr #24 /* ip = ...9 */ 2767129254Scognet orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2768129254Scognet mov r1, r1, lsr #8 /* r1 = ...B */ 2769129254Scognet str r2, [r0, #0x01] 2770129254Scognet str r3, [r0, #0x05] 2771129254Scognet strh ip, [r0, #0x09] 2772129254Scognet strb r1, [r0, #0x0b] 2773129254Scognet#endif 2774137463Scognet RET 2775129254Scognet LMEMCPY_C_PAD 2776129254Scognet 2777129254Scognet/* 2778129254Scognet * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2779129254Scognet */ 2780129254Scognet ldrb r2, [r1] 2781129254Scognet ldr r3, [r1, #0x01] 2782129254Scognet ldr ip, [r1, #0x05] 2783129254Scognet strb r2, [r0] 2784129254Scognet ldrh r2, [r1, #0x09] 2785129254Scognet ldrb r1, [r1, #0x0b] 2786129254Scognet str r3, [r0, #0x01] 2787129254Scognet str ip, [r0, #0x05] 2788129254Scognet strh r2, [r0, #0x09] 2789129254Scognet strb r1, [r0, #0x0b] 2790137463Scognet RET 2791129254Scognet#endif /* __XSCALE__ */ 2792135654Scognet 2793135654Scognet#ifdef GPROF 2794135654Scognet 2795135654ScognetENTRY(user) 2796135654Scognet nop 2797135654ScognetENTRY(btrap) 2798135654Scognet nop 2799135654ScognetENTRY(etrap) 2800135654Scognet nop 2801135654ScognetENTRY(bintr) 2802135654Scognet nop 2803135654ScognetENTRY(eintr) 2804135654Scognet nop 2805135654Scognet 2806135654Scognet#endif 2807