1129198Scognet/*- 2129198Scognet * Copyright (c) 2004 Olivier Houchard 3129198Scognet * All rights reserved. 4129198Scognet * 5129198Scognet * Redistribution and use in source and binary forms, with or without 6129198Scognet * modification, are permitted provided that the following conditions 7129198Scognet * are met: 8129198Scognet * 1. Redistributions of source code must retain the above copyright 9129198Scognet * notice, this list of conditions and the following disclaimer. 10129198Scognet * 2. Redistributions in binary form must reproduce the above copyright 11129198Scognet * notice, this list of conditions and the following disclaimer in the 12129198Scognet * documentation and/or other materials provided with the distribution. 13129198Scognet * 14129198Scognet * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15129198Scognet * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16129198Scognet * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17129198Scognet * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18129198Scognet * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19129198Scognet * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20129198Scognet * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21129198Scognet * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22129198Scognet * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23129198Scognet * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24129198Scognet * SUCH DAMAGE. 25129198Scognet */ 26175255Scognet/* 27175255Scognet * Copyright 2003 Wasabi Systems, Inc. 28175255Scognet * All rights reserved. 29175255Scognet * 30175255Scognet * Written by Steve C. Woodford for Wasabi Systems, Inc. 31175255Scognet * 32175255Scognet * Redistribution and use in source and binary forms, with or without 33175255Scognet * modification, are permitted provided that the following conditions 34175255Scognet * are met: 35175255Scognet * 1. Redistributions of source code must retain the above copyright 36175255Scognet * notice, this list of conditions and the following disclaimer. 37175255Scognet * 2. Redistributions in binary form must reproduce the above copyright 38175255Scognet * notice, this list of conditions and the following disclaimer in the 39175255Scognet * documentation and/or other materials provided with the distribution. 40175255Scognet * 3. All advertising materials mentioning features or use of this software 41175255Scognet * must display the following acknowledgement: 42175255Scognet * This product includes software developed for the NetBSD Project by 43175255Scognet * Wasabi Systems, Inc. 44175255Scognet * 4. The name of Wasabi Systems, Inc. may not be used to endorse 45175255Scognet * or promote products derived from this software without specific prior 46175255Scognet * written permission. 47175255Scognet * 48175255Scognet * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 49175255Scognet * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 50175255Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 51175255Scognet * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 52175255Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 53175255Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 54175255Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 55175255Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56175255Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 57175255Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58175255Scognet * POSSIBILITY OF SUCH DAMAGE. 59175255Scognet */ 60175255Scognet/* 61175255Scognet * Copyright (c) 1997 The NetBSD Foundation, Inc. 62175255Scognet * All rights reserved. 63175255Scognet * 64175255Scognet * This code is derived from software contributed to The NetBSD Foundation 65175255Scognet * by Neil A. Carson and Mark Brinicombe 66175255Scognet * 67175255Scognet * Redistribution and use in source and binary forms, with or without 68175255Scognet * modification, are permitted provided that the following conditions 69175255Scognet * are met: 70175255Scognet * 1. Redistributions of source code must retain the above copyright 71175255Scognet * notice, this list of conditions and the following disclaimer. 72175255Scognet * 2. Redistributions in binary form must reproduce the above copyright 73175255Scognet * notice, this list of conditions and the following disclaimer in the 74175255Scognet * documentation and/or other materials provided with the distribution. 75175255Scognet * 76175255Scognet * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 77175255Scognet * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 78175255Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 79175255Scognet * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 80175255Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 81175255Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 82175255Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 83175255Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 84175255Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 85175255Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 86175255Scognet * POSSIBILITY OF SUCH DAMAGE. 87175255Scognet */ 88129198Scognet 89129198Scognet#include <machine/asm.h> 90129198Scognet__FBSDID("$FreeBSD$"); 91129198Scognet 92129198Scognet#include "assym.s" 93129198Scognet 94275418Sandrew .syntax unified 95275418Sandrew 96150864Scognet.L_arm_memcpy: 97150864Scognet .word _C_LABEL(_arm_memcpy) 98150864Scognet.L_arm_bzero: 99150864Scognet .word _C_LABEL(_arm_bzero) 100150864Scognet.L_min_memcpy_size: 101150864Scognet .word _C_LABEL(_min_memcpy_size) 102150864Scognet.L_min_bzero_size: 103150864Scognet .word _C_LABEL(_min_bzero_size) 104129198Scognet/* 105129250Scognet * memset: Sets a block of memory to the specified value 106129250Scognet * 107129250Scognet * On entry: 108129250Scognet * r0 - dest address 109129250Scognet * r1 - byte to write 110129250Scognet * r2 - number of bytes to write 111129250Scognet * 112129250Scognet * On exit: 113129250Scognet * r0 - dest address 114129250Scognet */ 115129250Scognet/* LINTSTUB: Func: void bzero(void *, size_t) */ 116129250ScognetENTRY(bzero) 117150864Scognet ldr r3, .L_arm_bzero 118150864Scognet ldr r3, [r3] 119150864Scognet cmp r3, #0 120150864Scognet beq .Lnormal0 121150864Scognet ldr r2, .L_min_bzero_size 122150864Scognet ldr r2, [r2] 123150864Scognet cmp r1, r2 124150864Scognet blt .Lnormal0 125150864Scognet stmfd sp!, {r0, r1, lr} 126150864Scognet mov r2, #0 127150864Scognet mov lr, pc 128150864Scognet mov pc, r3 129150864Scognet cmp r0, #0 130150864Scognet ldmfd sp!, {r0, r1, lr} 131150864Scognet RETeq 132150864Scognet.Lnormal0: 133129250Scognet mov r3, #0x00 134129250Scognet b do_memset 135275322SandrewEND(bzero) 136129250Scognet/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 137129250ScognetENTRY(memset) 138129250Scognet and r3, r1, #0xff /* We deal with bytes */ 139129250Scognet mov r1, r2 140129250Scognetdo_memset: 141129250Scognet cmp r1, #0x04 /* Do we have less than 4 bytes */ 142129250Scognet mov ip, r0 143129250Scognet blt .Lmemset_lessthanfour 144129250Scognet 145129250Scognet /* Ok first we will word align the address */ 146129250Scognet ands r2, ip, #0x03 /* Get the bottom two bits */ 147129250Scognet bne .Lmemset_wordunaligned /* The address is not word aligned */ 148129250Scognet 149129250Scognet /* We are now word aligned */ 150129250Scognet.Lmemset_wordaligned: 151129250Scognet orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 152172614Scognet#ifdef _ARM_ARCH_5E 153172614Scognet tst ip, #0x04 /* Quad-align for armv5e */ 154129250Scognet#else 155129250Scognet cmp r1, #0x10 156129250Scognet#endif 157129250Scognet orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 158172614Scognet#ifdef _ARM_ARCH_5E 159129250Scognet subne r1, r1, #0x04 /* Quad-align if necessary */ 160129250Scognet strne r3, [ip], #0x04 161129250Scognet cmp r1, #0x10 162129250Scognet#endif 163129250Scognet blt .Lmemset_loop4 /* If less than 16 then use words */ 164129250Scognet mov r2, r3 /* Duplicate data */ 165129250Scognet cmp r1, #0x80 /* If < 128 then skip the big loop */ 166129250Scognet blt .Lmemset_loop32 167129250Scognet 168129250Scognet /* Do 128 bytes at a time */ 169129250Scognet.Lmemset_loop128: 170129250Scognet subs r1, r1, #0x80 171172614Scognet#ifdef _ARM_ARCH_5E 172275418Sandrew strdge r2, [ip], #0x08 173275418Sandrew strdge r2, [ip], #0x08 174275418Sandrew strdge r2, [ip], #0x08 175275418Sandrew strdge r2, [ip], #0x08 176275418Sandrew strdge r2, [ip], #0x08 177275418Sandrew strdge r2, [ip], #0x08 178275418Sandrew strdge r2, [ip], #0x08 179275418Sandrew strdge r2, [ip], #0x08 180275418Sandrew strdge r2, [ip], #0x08 181275418Sandrew strdge r2, [ip], #0x08 182275418Sandrew strdge r2, [ip], #0x08 183275418Sandrew strdge r2, [ip], #0x08 184275418Sandrew strdge r2, [ip], #0x08 185275418Sandrew strdge r2, [ip], #0x08 186275418Sandrew strdge r2, [ip], #0x08 187275418Sandrew strdge r2, [ip], #0x08 188129250Scognet#else 189275418Sandrew stmiage ip!, {r2-r3} 190275418Sandrew stmiage ip!, {r2-r3} 191275418Sandrew stmiage ip!, {r2-r3} 192275418Sandrew stmiage ip!, {r2-r3} 193275418Sandrew stmiage ip!, {r2-r3} 194275418Sandrew stmiage ip!, {r2-r3} 195275418Sandrew stmiage ip!, {r2-r3} 196275418Sandrew stmiage ip!, {r2-r3} 197275418Sandrew stmiage ip!, {r2-r3} 198275418Sandrew stmiage ip!, {r2-r3} 199275418Sandrew stmiage ip!, {r2-r3} 200275418Sandrew stmiage ip!, {r2-r3} 201275418Sandrew stmiage ip!, {r2-r3} 202275418Sandrew stmiage ip!, {r2-r3} 203275418Sandrew stmiage ip!, {r2-r3} 204275418Sandrew stmiage ip!, {r2-r3} 205129250Scognet#endif 206129250Scognet bgt .Lmemset_loop128 207137463Scognet RETeq /* Zero length so just exit */ 208129250Scognet 209129250Scognet add r1, r1, #0x80 /* Adjust for extra sub */ 210129250Scognet 211129250Scognet /* Do 32 bytes at a time */ 212129250Scognet.Lmemset_loop32: 213129250Scognet subs r1, r1, #0x20 214172614Scognet#ifdef _ARM_ARCH_5E 215275418Sandrew strdge r2, [ip], #0x08 216275418Sandrew strdge r2, [ip], #0x08 217275418Sandrew strdge r2, [ip], #0x08 218275418Sandrew strdge r2, [ip], #0x08 219129250Scognet#else 220275418Sandrew stmiage ip!, {r2-r3} 221275418Sandrew stmiage ip!, {r2-r3} 222275418Sandrew stmiage ip!, {r2-r3} 223275418Sandrew stmiage ip!, {r2-r3} 224129250Scognet#endif 225129250Scognet bgt .Lmemset_loop32 226137463Scognet RETeq /* Zero length so just exit */ 227129250Scognet 228129250Scognet adds r1, r1, #0x10 /* Partially adjust for extra sub */ 229129250Scognet 230129250Scognet /* Deal with 16 bytes or more */ 231172614Scognet#ifdef _ARM_ARCH_5E 232275418Sandrew strdge r2, [ip], #0x08 233275418Sandrew strdge r2, [ip], #0x08 234129250Scognet#else 235275418Sandrew stmiage ip!, {r2-r3} 236275418Sandrew stmiage ip!, {r2-r3} 237129250Scognet#endif 238137463Scognet RETeq /* Zero length so just exit */ 239129250Scognet 240129250Scognet addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 241129250Scognet 242129250Scognet /* We have at least 4 bytes so copy as words */ 243129250Scognet.Lmemset_loop4: 244129250Scognet subs r1, r1, #0x04 245129250Scognet strge r3, [ip], #0x04 246129250Scognet bgt .Lmemset_loop4 247137463Scognet RETeq /* Zero length so just exit */ 248129250Scognet 249172614Scognet#ifdef _ARM_ARCH_5E 250129250Scognet /* Compensate for 64-bit alignment check */ 251129250Scognet adds r1, r1, #0x04 252137463Scognet RETeq 253129250Scognet cmp r1, #2 254129250Scognet#else 255129250Scognet cmp r1, #-2 256129250Scognet#endif 257129250Scognet 258129250Scognet strb r3, [ip], #0x01 /* Set 1 byte */ 259275418Sandrew strbge r3, [ip], #0x01 /* Set another byte */ 260275418Sandrew strbgt r3, [ip] /* and a third */ 261137463Scognet RET /* Exit */ 262129250Scognet 263129250Scognet.Lmemset_wordunaligned: 264129250Scognet rsb r2, r2, #0x004 265129250Scognet strb r3, [ip], #0x01 /* Set 1 byte */ 266129250Scognet cmp r2, #0x02 267275418Sandrew strbge r3, [ip], #0x01 /* Set another byte */ 268129250Scognet sub r1, r1, r2 269275418Sandrew strbgt r3, [ip], #0x01 /* and a third */ 270129250Scognet cmp r1, #0x04 /* More than 4 bytes left? */ 271129250Scognet bge .Lmemset_wordaligned /* Yup */ 272129250Scognet 273129250Scognet.Lmemset_lessthanfour: 274129250Scognet cmp r1, #0x00 275137463Scognet RETeq /* Zero length so exit */ 276129250Scognet strb r3, [ip], #0x01 /* Set 1 byte */ 277129250Scognet cmp r1, #0x02 278275418Sandrew strbge r3, [ip], #0x01 /* Set another byte */ 279275418Sandrew strbgt r3, [ip] /* and a third */ 280137463Scognet RET /* Exit */ 281275418SandrewEEND(memset) 282275418SandrewEND(bzero) 283129254Scognet 284144967ScognetENTRY(bcmp) 285129254Scognet mov ip, r0 286129254Scognet cmp r2, #0x06 287129254Scognet beq .Lmemcmp_6bytes 288129254Scognet mov r0, #0x00 289129254Scognet 290129254Scognet /* Are both addresses aligned the same way? */ 291129254Scognet cmp r2, #0x00 292275418Sandrew eorsne r3, ip, r1 293137463Scognet RETeq /* len == 0, or same addresses! */ 294129254Scognet tst r3, #0x03 295129254Scognet subne r2, r2, #0x01 296129254Scognet bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 297129254Scognet 298129254Scognet /* Word-align the addresses, if necessary */ 299129254Scognet sub r3, r1, #0x05 300129254Scognet ands r3, r3, #0x03 301129254Scognet add r3, r3, r3, lsl #1 302129254Scognet addne pc, pc, r3, lsl #3 303129254Scognet nop 304129254Scognet 305129254Scognet /* Compare up to 3 bytes */ 306129254Scognet ldrb r0, [ip], #0x01 307129254Scognet ldrb r3, [r1], #0x01 308129254Scognet subs r0, r0, r3 309137463Scognet RETne 310129254Scognet subs r2, r2, #0x01 311137463Scognet RETeq 312129254Scognet 313129254Scognet /* Compare up to 2 bytes */ 314129254Scognet ldrb r0, [ip], #0x01 315129254Scognet ldrb r3, [r1], #0x01 316129254Scognet subs r0, r0, r3 317137463Scognet RETne 318129254Scognet subs r2, r2, #0x01 319137463Scognet RETeq 320129254Scognet 321129254Scognet /* Compare 1 byte */ 322129254Scognet ldrb r0, [ip], #0x01 323129254Scognet ldrb r3, [r1], #0x01 324129254Scognet subs r0, r0, r3 325137463Scognet RETne 326129254Scognet subs r2, r2, #0x01 327137463Scognet RETeq 328129254Scognet 329129254Scognet /* Compare 4 bytes at a time, if possible */ 330129254Scognet subs r2, r2, #0x04 331129254Scognet bcc .Lmemcmp_bytewise 332129254Scognet.Lmemcmp_word_aligned: 333129254Scognet ldr r0, [ip], #0x04 334129254Scognet ldr r3, [r1], #0x04 335129254Scognet subs r2, r2, #0x04 336129254Scognet cmpcs r0, r3 337129254Scognet beq .Lmemcmp_word_aligned 338129254Scognet sub r0, r0, r3 339129254Scognet 340129254Scognet /* Correct for extra subtraction, and check if done */ 341129254Scognet adds r2, r2, #0x04 342129254Scognet cmpeq r0, #0x00 /* If done, did all bytes match? */ 343137463Scognet RETeq /* Yup. Just return */ 344129254Scognet 345129254Scognet /* Re-do the final word byte-wise */ 346129254Scognet sub ip, ip, #0x04 347129254Scognet sub r1, r1, #0x04 348129254Scognet 349129254Scognet.Lmemcmp_bytewise: 350129254Scognet add r2, r2, #0x03 351129254Scognet.Lmemcmp_bytewise2: 352129254Scognet ldrb r0, [ip], #0x01 353129254Scognet ldrb r3, [r1], #0x01 354129254Scognet subs r2, r2, #0x01 355129254Scognet cmpcs r0, r3 356129254Scognet beq .Lmemcmp_bytewise2 357129254Scognet sub r0, r0, r3 358137463Scognet RET 359129254Scognet 360129254Scognet /* 361129254Scognet * 6 byte compares are very common, thanks to the network stack. 362129254Scognet * This code is hand-scheduled to reduce the number of stalls for 363129254Scognet * load results. Everything else being equal, this will be ~32% 364129254Scognet * faster than a byte-wise memcmp. 365129254Scognet */ 366129254Scognet .align 5 367129254Scognet.Lmemcmp_6bytes: 368129254Scognet ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 369129254Scognet ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 370129254Scognet ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 371129254Scognet subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 372275418Sandrew ldrbeq r3, [ip, #0x01] /* r3 = b1#1 */ 373137463Scognet RETne /* Return if mismatch on #0 */ 374129254Scognet subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 375275418Sandrew ldrbeq r3, [r1, #0x02] /* r3 = b2#2 */ 376275418Sandrew ldrbeq r0, [ip, #0x02] /* r0 = b1#2 */ 377137463Scognet RETne /* Return if mismatch on #1 */ 378129254Scognet ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 379129254Scognet subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 380275418Sandrew ldrbeq r3, [ip, #0x03] /* r3 = b1#3 */ 381137463Scognet RETne /* Return if mismatch on #2 */ 382129254Scognet subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 383275418Sandrew ldrbeq r3, [r1, #0x04] /* r3 = b2#4 */ 384275418Sandrew ldrbeq r0, [ip, #0x04] /* r0 = b1#4 */ 385137463Scognet RETne /* Return if mismatch on #3 */ 386129254Scognet ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 387129254Scognet subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 388275418Sandrew ldrbeq r3, [ip, #0x05] /* r3 = b1#5 */ 389137463Scognet RETne /* Return if mismatch on #4 */ 390129254Scognet sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 391137463Scognet RET 392248361SandrewEND(bcmp) 393129254Scognet 394129254ScognetENTRY(bcopy) 395143175Scognet /* switch the source and destination registers */ 396236991Simp eor r0, r1, r0 397236991Simp eor r1, r0, r1 398236991Simp eor r0, r1, r0 399269390SianEENTRY(memmove) 400143175Scognet /* Do the buffers overlap? */ 401143175Scognet cmp r0, r1 402143175Scognet RETeq /* Bail now if src/dst are the same */ 403143175Scognet subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 404143175Scognet subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 405143175Scognet cmp r3, r2 /* if (r3 < len) we have an overlap */ 406143175Scognet bcc PIC_SYM(_C_LABEL(memcpy), PLT) 407143175Scognet 408143175Scognet /* Determine copy direction */ 409143175Scognet cmp r1, r0 410143175Scognet bcc .Lmemmove_backwards 411143175Scognet 412143175Scognet moveq r0, #0 /* Quick abort for len=0 */ 413143175Scognet RETeq 414143175Scognet 415143175Scognet stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 416143175Scognet subs r2, r2, #4 417143175Scognet blt .Lmemmove_fl4 /* less than 4 bytes */ 418143175Scognet ands r12, r0, #3 419143175Scognet bne .Lmemmove_fdestul /* oh unaligned destination addr */ 420143175Scognet ands r12, r1, #3 421143175Scognet bne .Lmemmove_fsrcul /* oh unaligned source addr */ 422143175Scognet 423143175Scognet.Lmemmove_ft8: 424143175Scognet /* We have aligned source and destination */ 425143175Scognet subs r2, r2, #8 426143175Scognet blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 427236991Simp subs r2, r2, #0x14 428143175Scognet blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 429143175Scognet stmdb sp!, {r4} /* borrow r4 */ 430143175Scognet 431143175Scognet /* blat 32 bytes at a time */ 432143175Scognet /* XXX for really big copies perhaps we should use more registers */ 433283366Sandrew.Lmemmove_floop32: 434143175Scognet ldmia r1!, {r3, r4, r12, lr} 435143175Scognet stmia r0!, {r3, r4, r12, lr} 436143175Scognet ldmia r1!, {r3, r4, r12, lr} 437143175Scognet stmia r0!, {r3, r4, r12, lr} 438236991Simp subs r2, r2, #0x20 439143175Scognet bge .Lmemmove_floop32 440143175Scognet 441143175Scognet cmn r2, #0x10 442275418Sandrew ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 443275418Sandrew stmiage r0!, {r3, r4, r12, lr} 444236991Simp subge r2, r2, #0x10 445143175Scognet ldmia sp!, {r4} /* return r4 */ 446143175Scognet 447143175Scognet.Lmemmove_fl32: 448236991Simp adds r2, r2, #0x14 449143175Scognet 450143175Scognet /* blat 12 bytes at a time */ 451143175Scognet.Lmemmove_floop12: 452275418Sandrew ldmiage r1!, {r3, r12, lr} 453275418Sandrew stmiage r0!, {r3, r12, lr} 454275418Sandrew subsge r2, r2, #0x0c 455143175Scognet bge .Lmemmove_floop12 456143175Scognet 457143175Scognet.Lmemmove_fl12: 458143175Scognet adds r2, r2, #8 459143175Scognet blt .Lmemmove_fl4 460143175Scognet 461143175Scognet subs r2, r2, #4 462143175Scognet ldrlt r3, [r1], #4 463143175Scognet strlt r3, [r0], #4 464275418Sandrew ldmiage r1!, {r3, r12} 465275418Sandrew stmiage r0!, {r3, r12} 466143175Scognet subge r2, r2, #4 467143175Scognet 468143175Scognet.Lmemmove_fl4: 469143175Scognet /* less than 4 bytes to go */ 470143175Scognet adds r2, r2, #4 471275418Sandrew ldmiaeq sp!, {r0, pc} /* done */ 472143175Scognet 473143175Scognet /* copy the crud byte at a time */ 474143175Scognet cmp r2, #2 475143175Scognet ldrb r3, [r1], #1 476143175Scognet strb r3, [r0], #1 477275418Sandrew ldrbge r3, [r1], #1 478275418Sandrew strbge r3, [r0], #1 479275418Sandrew ldrbgt r3, [r1], #1 480275418Sandrew strbgt r3, [r0], #1 481143175Scognet ldmia sp!, {r0, pc} 482143175Scognet 483143175Scognet /* erg - unaligned destination */ 484143175Scognet.Lmemmove_fdestul: 485143175Scognet rsb r12, r12, #4 486143175Scognet cmp r12, #2 487143175Scognet 488143175Scognet /* align destination with byte copies */ 489143175Scognet ldrb r3, [r1], #1 490143175Scognet strb r3, [r0], #1 491275418Sandrew ldrbge r3, [r1], #1 492275418Sandrew strbge r3, [r0], #1 493275418Sandrew ldrbgt r3, [r1], #1 494275418Sandrew strbgt r3, [r0], #1 495143175Scognet subs r2, r2, r12 496143175Scognet blt .Lmemmove_fl4 /* less the 4 bytes */ 497143175Scognet 498143175Scognet ands r12, r1, #3 499143175Scognet beq .Lmemmove_ft8 /* we have an aligned source */ 500143175Scognet 501143175Scognet /* erg - unaligned source */ 502143175Scognet /* This is where it gets nasty ... */ 503143175Scognet.Lmemmove_fsrcul: 504143175Scognet bic r1, r1, #3 505143175Scognet ldr lr, [r1], #4 506143175Scognet cmp r12, #2 507143175Scognet bgt .Lmemmove_fsrcul3 508143175Scognet beq .Lmemmove_fsrcul2 509236991Simp cmp r2, #0x0c 510143175Scognet blt .Lmemmove_fsrcul1loop4 511236991Simp sub r2, r2, #0x0c 512143175Scognet stmdb sp!, {r4, r5} 513143175Scognet 514143175Scognet.Lmemmove_fsrcul1loop16: 515143175Scognet#ifdef __ARMEB__ 516143175Scognet mov r3, lr, lsl #8 517143175Scognet#else 518143175Scognet mov r3, lr, lsr #8 519143175Scognet#endif 520143175Scognet ldmia r1!, {r4, r5, r12, lr} 521143175Scognet#ifdef __ARMEB__ 522143175Scognet orr r3, r3, r4, lsr #24 523143175Scognet mov r4, r4, lsl #8 524143175Scognet orr r4, r4, r5, lsr #24 525143175Scognet mov r5, r5, lsl #8 526143175Scognet orr r5, r5, r12, lsr #24 527143175Scognet mov r12, r12, lsl #8 528143175Scognet orr r12, r12, lr, lsr #24 529143175Scognet#else 530143175Scognet orr r3, r3, r4, lsl #24 531143175Scognet mov r4, r4, lsr #8 532143175Scognet orr r4, r4, r5, lsl #24 533143175Scognet mov r5, r5, lsr #8 534143175Scognet orr r5, r5, r12, lsl #24 535143175Scognet mov r12, r12, lsr #8 536143175Scognet orr r12, r12, lr, lsl #24 537143175Scognet#endif 538143175Scognet stmia r0!, {r3-r5, r12} 539236991Simp subs r2, r2, #0x10 540143175Scognet bge .Lmemmove_fsrcul1loop16 541143175Scognet ldmia sp!, {r4, r5} 542236991Simp adds r2, r2, #0x0c 543143175Scognet blt .Lmemmove_fsrcul1l4 544143175Scognet 545143175Scognet.Lmemmove_fsrcul1loop4: 546143175Scognet#ifdef __ARMEB__ 547143175Scognet mov r12, lr, lsl #8 548143175Scognet#else 549143175Scognet mov r12, lr, lsr #8 550143175Scognet#endif 551143175Scognet ldr lr, [r1], #4 552143175Scognet#ifdef __ARMEB__ 553143175Scognet orr r12, r12, lr, lsr #24 554143175Scognet#else 555143175Scognet orr r12, r12, lr, lsl #24 556143175Scognet#endif 557143175Scognet str r12, [r0], #4 558143175Scognet subs r2, r2, #4 559143175Scognet bge .Lmemmove_fsrcul1loop4 560143175Scognet 561143175Scognet.Lmemmove_fsrcul1l4: 562143175Scognet sub r1, r1, #3 563143175Scognet b .Lmemmove_fl4 564143175Scognet 565143175Scognet.Lmemmove_fsrcul2: 566236991Simp cmp r2, #0x0c 567143175Scognet blt .Lmemmove_fsrcul2loop4 568236991Simp sub r2, r2, #0x0c 569143175Scognet stmdb sp!, {r4, r5} 570143175Scognet 571143175Scognet.Lmemmove_fsrcul2loop16: 572143175Scognet#ifdef __ARMEB__ 573143175Scognet mov r3, lr, lsl #16 574143175Scognet#else 575143175Scognet mov r3, lr, lsr #16 576143175Scognet#endif 577143175Scognet ldmia r1!, {r4, r5, r12, lr} 578143175Scognet#ifdef __ARMEB__ 579143175Scognet orr r3, r3, r4, lsr #16 580143175Scognet mov r4, r4, lsl #16 581143175Scognet orr r4, r4, r5, lsr #16 582143175Scognet mov r5, r5, lsl #16 583143175Scognet orr r5, r5, r12, lsr #16 584143175Scognet mov r12, r12, lsl #16 585143175Scognet orr r12, r12, lr, lsr #16 586143175Scognet#else 587143175Scognet orr r3, r3, r4, lsl #16 588143175Scognet mov r4, r4, lsr #16 589143175Scognet orr r4, r4, r5, lsl #16 590143175Scognet mov r5, r5, lsr #16 591143175Scognet orr r5, r5, r12, lsl #16 592143175Scognet mov r12, r12, lsr #16 593143175Scognet orr r12, r12, lr, lsl #16 594143175Scognet#endif 595143175Scognet stmia r0!, {r3-r5, r12} 596236991Simp subs r2, r2, #0x10 597143175Scognet bge .Lmemmove_fsrcul2loop16 598143175Scognet ldmia sp!, {r4, r5} 599236991Simp adds r2, r2, #0x0c 600143175Scognet blt .Lmemmove_fsrcul2l4 601143175Scognet 602143175Scognet.Lmemmove_fsrcul2loop4: 603143175Scognet#ifdef __ARMEB__ 604143175Scognet mov r12, lr, lsl #16 605143175Scognet#else 606143175Scognet mov r12, lr, lsr #16 607143175Scognet#endif 608143175Scognet ldr lr, [r1], #4 609143175Scognet#ifdef __ARMEB__ 610143175Scognet orr r12, r12, lr, lsr #16 611143175Scognet#else 612143175Scognet orr r12, r12, lr, lsl #16 613143175Scognet#endif 614143175Scognet str r12, [r0], #4 615143175Scognet subs r2, r2, #4 616143175Scognet bge .Lmemmove_fsrcul2loop4 617143175Scognet 618143175Scognet.Lmemmove_fsrcul2l4: 619143175Scognet sub r1, r1, #2 620143175Scognet b .Lmemmove_fl4 621143175Scognet 622143175Scognet.Lmemmove_fsrcul3: 623236991Simp cmp r2, #0x0c 624143175Scognet blt .Lmemmove_fsrcul3loop4 625236991Simp sub r2, r2, #0x0c 626143175Scognet stmdb sp!, {r4, r5} 627143175Scognet 628143175Scognet.Lmemmove_fsrcul3loop16: 629143175Scognet#ifdef __ARMEB__ 630143175Scognet mov r3, lr, lsl #24 631143175Scognet#else 632143175Scognet mov r3, lr, lsr #24 633143175Scognet#endif 634143175Scognet ldmia r1!, {r4, r5, r12, lr} 635143175Scognet#ifdef __ARMEB__ 636143175Scognet orr r3, r3, r4, lsr #8 637143175Scognet mov r4, r4, lsl #24 638143175Scognet orr r4, r4, r5, lsr #8 639143175Scognet mov r5, r5, lsl #24 640143175Scognet orr r5, r5, r12, lsr #8 641143175Scognet mov r12, r12, lsl #24 642143175Scognet orr r12, r12, lr, lsr #8 643143175Scognet#else 644143175Scognet orr r3, r3, r4, lsl #8 645143175Scognet mov r4, r4, lsr #24 646143175Scognet orr r4, r4, r5, lsl #8 647143175Scognet mov r5, r5, lsr #24 648143175Scognet orr r5, r5, r12, lsl #8 649143175Scognet mov r12, r12, lsr #24 650143175Scognet orr r12, r12, lr, lsl #8 651143175Scognet#endif 652143175Scognet stmia r0!, {r3-r5, r12} 653236991Simp subs r2, r2, #0x10 654143175Scognet bge .Lmemmove_fsrcul3loop16 655143175Scognet ldmia sp!, {r4, r5} 656236991Simp adds r2, r2, #0x0c 657143175Scognet blt .Lmemmove_fsrcul3l4 658143175Scognet 659143175Scognet.Lmemmove_fsrcul3loop4: 660143175Scognet#ifdef __ARMEB__ 661143175Scognet mov r12, lr, lsl #24 662143175Scognet#else 663143175Scognet mov r12, lr, lsr #24 664143175Scognet#endif 665143175Scognet ldr lr, [r1], #4 666143175Scognet#ifdef __ARMEB__ 667143175Scognet orr r12, r12, lr, lsr #8 668143175Scognet#else 669143175Scognet orr r12, r12, lr, lsl #8 670143175Scognet#endif 671143175Scognet str r12, [r0], #4 672143175Scognet subs r2, r2, #4 673143175Scognet bge .Lmemmove_fsrcul3loop4 674143175Scognet 675143175Scognet.Lmemmove_fsrcul3l4: 676143175Scognet sub r1, r1, #1 677143175Scognet b .Lmemmove_fl4 678143175Scognet 679143175Scognet.Lmemmove_backwards: 680143175Scognet add r1, r1, r2 681143175Scognet add r0, r0, r2 682143175Scognet subs r2, r2, #4 683143175Scognet blt .Lmemmove_bl4 /* less than 4 bytes */ 684143175Scognet ands r12, r0, #3 685143175Scognet bne .Lmemmove_bdestul /* oh unaligned destination addr */ 686143175Scognet ands r12, r1, #3 687143175Scognet bne .Lmemmove_bsrcul /* oh unaligned source addr */ 688143175Scognet 689143175Scognet.Lmemmove_bt8: 690143175Scognet /* We have aligned source and destination */ 691143175Scognet subs r2, r2, #8 692143175Scognet blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 693143175Scognet stmdb sp!, {r4, lr} 694143175Scognet subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 695143175Scognet blt .Lmemmove_bl32 696143175Scognet 697143175Scognet /* blat 32 bytes at a time */ 698143175Scognet /* XXX for really big copies perhaps we should use more registers */ 699143175Scognet.Lmemmove_bloop32: 700143175Scognet ldmdb r1!, {r3, r4, r12, lr} 701143175Scognet stmdb r0!, {r3, r4, r12, lr} 702143175Scognet ldmdb r1!, {r3, r4, r12, lr} 703143175Scognet stmdb r0!, {r3, r4, r12, lr} 704236991Simp subs r2, r2, #0x20 705143175Scognet bge .Lmemmove_bloop32 706143175Scognet 707143175Scognet.Lmemmove_bl32: 708236991Simp cmn r2, #0x10 709275418Sandrew ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 710275418Sandrew stmdbge r0!, {r3, r4, r12, lr} 711236991Simp subge r2, r2, #0x10 712236991Simp adds r2, r2, #0x14 713275418Sandrew ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 714275418Sandrew stmdbge r0!, {r3, r12, lr} 715236991Simp subge r2, r2, #0x0c 716143175Scognet ldmia sp!, {r4, lr} 717143175Scognet 718143175Scognet.Lmemmove_bl12: 719143175Scognet adds r2, r2, #8 720143175Scognet blt .Lmemmove_bl4 721143175Scognet subs r2, r2, #4 722143175Scognet ldrlt r3, [r1, #-4]! 723143175Scognet strlt r3, [r0, #-4]! 724275418Sandrew ldmdbge r1!, {r3, r12} 725275418Sandrew stmdbge r0!, {r3, r12} 726143175Scognet subge r2, r2, #4 727143175Scognet 728143175Scognet.Lmemmove_bl4: 729143175Scognet /* less than 4 bytes to go */ 730143175Scognet adds r2, r2, #4 731143175Scognet RETeq /* done */ 732143175Scognet 733143175Scognet /* copy the crud byte at a time */ 734143175Scognet cmp r2, #2 735143175Scognet ldrb r3, [r1, #-1]! 736143175Scognet strb r3, [r0, #-1]! 737275418Sandrew ldrbge r3, [r1, #-1]! 738275418Sandrew strbge r3, [r0, #-1]! 739275418Sandrew ldrbgt r3, [r1, #-1]! 740275418Sandrew strbgt r3, [r0, #-1]! 741143175Scognet RET 742143175Scognet 743143175Scognet /* erg - unaligned destination */ 744143175Scognet.Lmemmove_bdestul: 745143175Scognet cmp r12, #2 746143175Scognet 747143175Scognet /* align destination with byte copies */ 748143175Scognet ldrb r3, [r1, #-1]! 749143175Scognet strb r3, [r0, #-1]! 750275418Sandrew ldrbge r3, [r1, #-1]! 751275418Sandrew strbge r3, [r0, #-1]! 752275418Sandrew ldrbgt r3, [r1, #-1]! 753275418Sandrew strbgt r3, [r0, #-1]! 754143175Scognet subs r2, r2, r12 755143175Scognet blt .Lmemmove_bl4 /* less than 4 bytes to go */ 756143175Scognet ands r12, r1, #3 757143175Scognet beq .Lmemmove_bt8 /* we have an aligned source */ 758143175Scognet 759143175Scognet /* erg - unaligned source */ 760143175Scognet /* This is where it gets nasty ... */ 761143175Scognet.Lmemmove_bsrcul: 762143175Scognet bic r1, r1, #3 763143175Scognet ldr r3, [r1, #0] 764143175Scognet cmp r12, #2 765143175Scognet blt .Lmemmove_bsrcul1 766143175Scognet beq .Lmemmove_bsrcul2 767236991Simp cmp r2, #0x0c 768143175Scognet blt .Lmemmove_bsrcul3loop4 769236991Simp sub r2, r2, #0x0c 770143175Scognet stmdb sp!, {r4, r5, lr} 771143175Scognet 772143175Scognet.Lmemmove_bsrcul3loop16: 773143175Scognet#ifdef __ARMEB__ 774143175Scognet mov lr, r3, lsr #8 775143175Scognet#else 776143175Scognet mov lr, r3, lsl #8 777143175Scognet#endif 778143175Scognet ldmdb r1!, {r3-r5, r12} 779143175Scognet#ifdef __ARMEB__ 780143175Scognet orr lr, lr, r12, lsl #24 781143175Scognet mov r12, r12, lsr #8 782143175Scognet orr r12, r12, r5, lsl #24 783143175Scognet mov r5, r5, lsr #8 784143175Scognet orr r5, r5, r4, lsl #24 785143175Scognet mov r4, r4, lsr #8 786143175Scognet orr r4, r4, r3, lsl #24 787143175Scognet#else 788143175Scognet orr lr, lr, r12, lsr #24 789143175Scognet mov r12, r12, lsl #8 790143175Scognet orr r12, r12, r5, lsr #24 791143175Scognet mov r5, r5, lsl #8 792143175Scognet orr r5, r5, r4, lsr #24 793143175Scognet mov r4, r4, lsl #8 794143175Scognet orr r4, r4, r3, lsr #24 795143175Scognet#endif 796143175Scognet stmdb r0!, {r4, r5, r12, lr} 797236991Simp subs r2, r2, #0x10 798143175Scognet bge .Lmemmove_bsrcul3loop16 799143175Scognet ldmia sp!, {r4, r5, lr} 800236991Simp adds r2, r2, #0x0c 801143175Scognet blt .Lmemmove_bsrcul3l4 802143175Scognet 803143175Scognet.Lmemmove_bsrcul3loop4: 804143175Scognet#ifdef __ARMEB__ 805143175Scognet mov r12, r3, lsr #8 806143175Scognet#else 807143175Scognet mov r12, r3, lsl #8 808143175Scognet#endif 809143175Scognet ldr r3, [r1, #-4]! 810143175Scognet#ifdef __ARMEB__ 811143175Scognet orr r12, r12, r3, lsl #24 812143175Scognet#else 813143175Scognet orr r12, r12, r3, lsr #24 814143175Scognet#endif 815143175Scognet str r12, [r0, #-4]! 816143175Scognet subs r2, r2, #4 817143175Scognet bge .Lmemmove_bsrcul3loop4 818143175Scognet 819143175Scognet.Lmemmove_bsrcul3l4: 820143175Scognet add r1, r1, #3 821143175Scognet b .Lmemmove_bl4 822143175Scognet 823143175Scognet.Lmemmove_bsrcul2: 824236991Simp cmp r2, #0x0c 825143175Scognet blt .Lmemmove_bsrcul2loop4 826236991Simp sub r2, r2, #0x0c 827143175Scognet stmdb sp!, {r4, r5, lr} 828143175Scognet 829143175Scognet.Lmemmove_bsrcul2loop16: 830143175Scognet#ifdef __ARMEB__ 831143175Scognet mov lr, r3, lsr #16 832143175Scognet#else 833143175Scognet mov lr, r3, lsl #16 834143175Scognet#endif 835143175Scognet ldmdb r1!, {r3-r5, r12} 836143175Scognet#ifdef __ARMEB__ 837143175Scognet orr lr, lr, r12, lsl #16 838143175Scognet mov r12, r12, lsr #16 839143175Scognet orr r12, r12, r5, lsl #16 840143175Scognet mov r5, r5, lsr #16 841143175Scognet orr r5, r5, r4, lsl #16 842143175Scognet mov r4, r4, lsr #16 843143175Scognet orr r4, r4, r3, lsl #16 844143175Scognet#else 845143175Scognet orr lr, lr, r12, lsr #16 846143175Scognet mov r12, r12, lsl #16 847143175Scognet orr r12, r12, r5, lsr #16 848143175Scognet mov r5, r5, lsl #16 849143175Scognet orr r5, r5, r4, lsr #16 850143175Scognet mov r4, r4, lsl #16 851143175Scognet orr r4, r4, r3, lsr #16 852143175Scognet#endif 853143175Scognet stmdb r0!, {r4, r5, r12, lr} 854236991Simp subs r2, r2, #0x10 855143175Scognet bge .Lmemmove_bsrcul2loop16 856143175Scognet ldmia sp!, {r4, r5, lr} 857236991Simp adds r2, r2, #0x0c 858143175Scognet blt .Lmemmove_bsrcul2l4 859143175Scognet 860143175Scognet.Lmemmove_bsrcul2loop4: 861143175Scognet#ifdef __ARMEB__ 862143175Scognet mov r12, r3, lsr #16 863143175Scognet#else 864143175Scognet mov r12, r3, lsl #16 865143175Scognet#endif 866143175Scognet ldr r3, [r1, #-4]! 867143175Scognet#ifdef __ARMEB__ 868143175Scognet orr r12, r12, r3, lsl #16 869143175Scognet#else 870143175Scognet orr r12, r12, r3, lsr #16 871143175Scognet#endif 872143175Scognet str r12, [r0, #-4]! 873143175Scognet subs r2, r2, #4 874143175Scognet bge .Lmemmove_bsrcul2loop4 875143175Scognet 876143175Scognet.Lmemmove_bsrcul2l4: 877143175Scognet add r1, r1, #2 878143175Scognet b .Lmemmove_bl4 879143175Scognet 880143175Scognet.Lmemmove_bsrcul1: 881236991Simp cmp r2, #0x0c 882143175Scognet blt .Lmemmove_bsrcul1loop4 883236991Simp sub r2, r2, #0x0c 884143175Scognet stmdb sp!, {r4, r5, lr} 885143175Scognet 886143175Scognet.Lmemmove_bsrcul1loop32: 887143175Scognet#ifdef __ARMEB__ 888143175Scognet mov lr, r3, lsr #24 889143175Scognet#else 890143175Scognet mov lr, r3, lsl #24 891143175Scognet#endif 892143175Scognet ldmdb r1!, {r3-r5, r12} 893143175Scognet#ifdef __ARMEB__ 894143175Scognet orr lr, lr, r12, lsl #8 895143175Scognet mov r12, r12, lsr #24 896143175Scognet orr r12, r12, r5, lsl #8 897143175Scognet mov r5, r5, lsr #24 898143175Scognet orr r5, r5, r4, lsl #8 899143175Scognet mov r4, r4, lsr #24 900143175Scognet orr r4, r4, r3, lsl #8 901143175Scognet#else 902143175Scognet orr lr, lr, r12, lsr #8 903143175Scognet mov r12, r12, lsl #24 904143175Scognet orr r12, r12, r5, lsr #8 905143175Scognet mov r5, r5, lsl #24 906143175Scognet orr r5, r5, r4, lsr #8 907143175Scognet mov r4, r4, lsl #24 908143175Scognet orr r4, r4, r3, lsr #8 909143175Scognet#endif 910143175Scognet stmdb r0!, {r4, r5, r12, lr} 911236991Simp subs r2, r2, #0x10 912143175Scognet bge .Lmemmove_bsrcul1loop32 913143175Scognet ldmia sp!, {r4, r5, lr} 914236991Simp adds r2, r2, #0x0c 915143175Scognet blt .Lmemmove_bsrcul1l4 916143175Scognet 917143175Scognet.Lmemmove_bsrcul1loop4: 918143175Scognet#ifdef __ARMEB__ 919143175Scognet mov r12, r3, lsr #24 920143175Scognet#else 921143175Scognet mov r12, r3, lsl #24 922143175Scognet#endif 923143175Scognet ldr r3, [r1, #-4]! 924143175Scognet#ifdef __ARMEB__ 925143175Scognet orr r12, r12, r3, lsl #8 926143175Scognet#else 927143175Scognet orr r12, r12, r3, lsr #8 928143175Scognet#endif 929143175Scognet str r12, [r0, #-4]! 930143175Scognet subs r2, r2, #4 931143175Scognet bge .Lmemmove_bsrcul1loop4 932143175Scognet 933143175Scognet.Lmemmove_bsrcul1l4: 934143175Scognet add r1, r1, #1 935143175Scognet b .Lmemmove_bl4 936269390SianEEND(memmove) 937248361SandrewEND(bcopy) 938143175Scognet 939172614Scognet#if !defined(_ARM_ARCH_5E) 940129254ScognetENTRY(memcpy) 941129254Scognet /* save leaf functions having to store this away */ 942167003Scognet /* Do not check arm_memcpy if we're running from flash */ 943261596Sian#if defined(FLASHADDR) && defined(PHYSADDR) 944167003Scognet#if FLASHADDR > PHYSADDR 945167003Scognet ldr r3, =FLASHADDR 946167003Scognet cmp r3, pc 947167003Scognet bls .Lnormal 948167003Scognet#else 949167003Scognet ldr r3, =FLASHADDR 950167003Scognet cmp r3, pc 951167003Scognet bhi .Lnormal 952167003Scognet#endif 953167003Scognet#endif 954150864Scognet ldr r3, .L_arm_memcpy 955150864Scognet ldr r3, [r3] 956150864Scognet cmp r3, #0 957150864Scognet beq .Lnormal 958150864Scognet ldr r3, .L_min_memcpy_size 959150864Scognet ldr r3, [r3] 960150864Scognet cmp r2, r3 961150864Scognet blt .Lnormal 962150864Scognet stmfd sp!, {r0-r2, r4, lr} 963150864Scognet mov r3, #0 964150864Scognet ldr r4, .L_arm_memcpy 965150864Scognet mov lr, pc 966150864Scognet ldr pc, [r4] 967150864Scognet cmp r0, #0 968150864Scognet ldmfd sp!, {r0-r2, r4, lr} 969150864Scognet RETeq 970150864Scognet 971151596Scognet.Lnormal: 972129254Scognet stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 973129254Scognet 974129254Scognet subs r2, r2, #4 975129254Scognet blt .Lmemcpy_l4 /* less than 4 bytes */ 976129254Scognet ands r12, r0, #3 977129254Scognet bne .Lmemcpy_destul /* oh unaligned destination addr */ 978129254Scognet ands r12, r1, #3 979129254Scognet bne .Lmemcpy_srcul /* oh unaligned source addr */ 980129254Scognet 981129254Scognet.Lmemcpy_t8: 982129254Scognet /* We have aligned source and destination */ 983129254Scognet subs r2, r2, #8 984129254Scognet blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 985236991Simp subs r2, r2, #0x14 986129254Scognet blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 987129254Scognet stmdb sp!, {r4} /* borrow r4 */ 988129254Scognet 989129254Scognet /* blat 32 bytes at a time */ 990129254Scognet /* XXX for really big copies perhaps we should use more registers */ 991283366Sandrew.Lmemcpy_loop32: 992129254Scognet ldmia r1!, {r3, r4, r12, lr} 993129254Scognet stmia r0!, {r3, r4, r12, lr} 994129254Scognet ldmia r1!, {r3, r4, r12, lr} 995129254Scognet stmia r0!, {r3, r4, r12, lr} 996236991Simp subs r2, r2, #0x20 997129254Scognet bge .Lmemcpy_loop32 998129254Scognet 999129254Scognet cmn r2, #0x10 1000275418Sandrew ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 1001275418Sandrew stmiage r0!, {r3, r4, r12, lr} 1002236991Simp subge r2, r2, #0x10 1003129254Scognet ldmia sp!, {r4} /* return r4 */ 1004129254Scognet 1005129254Scognet.Lmemcpy_l32: 1006236991Simp adds r2, r2, #0x14 1007129254Scognet 1008129254Scognet /* blat 12 bytes at a time */ 1009129254Scognet.Lmemcpy_loop12: 1010275418Sandrew ldmiage r1!, {r3, r12, lr} 1011275418Sandrew stmiage r0!, {r3, r12, lr} 1012275418Sandrew subsge r2, r2, #0x0c 1013129254Scognet bge .Lmemcpy_loop12 1014129254Scognet 1015129254Scognet.Lmemcpy_l12: 1016129254Scognet adds r2, r2, #8 1017129254Scognet blt .Lmemcpy_l4 1018129254Scognet 1019129254Scognet subs r2, r2, #4 1020129254Scognet ldrlt r3, [r1], #4 1021129254Scognet strlt r3, [r0], #4 1022275418Sandrew ldmiage r1!, {r3, r12} 1023275418Sandrew stmiage r0!, {r3, r12} 1024129254Scognet subge r2, r2, #4 1025129254Scognet 1026129254Scognet.Lmemcpy_l4: 1027129254Scognet /* less than 4 bytes to go */ 1028129254Scognet adds r2, r2, #4 1029129254Scognet#ifdef __APCS_26_ 1030275418Sandrew ldmiaeq sp!, {r0, pc}^ /* done */ 1031129254Scognet#else 1032275418Sandrew ldmiaeq sp!, {r0, pc} /* done */ 1033129254Scognet#endif 1034129254Scognet /* copy the crud byte at a time */ 1035129254Scognet cmp r2, #2 1036129254Scognet ldrb r3, [r1], #1 1037129254Scognet strb r3, [r0], #1 1038275418Sandrew ldrbge r3, [r1], #1 1039275418Sandrew strbge r3, [r0], #1 1040275418Sandrew ldrbgt r3, [r1], #1 1041275418Sandrew strbgt r3, [r0], #1 1042129254Scognet ldmia sp!, {r0, pc} 1043129254Scognet 1044129254Scognet /* erg - unaligned destination */ 1045129254Scognet.Lmemcpy_destul: 1046129254Scognet rsb r12, r12, #4 1047129254Scognet cmp r12, #2 1048129254Scognet 1049129254Scognet /* align destination with byte copies */ 1050129254Scognet ldrb r3, [r1], #1 1051129254Scognet strb r3, [r0], #1 1052275418Sandrew ldrbge r3, [r1], #1 1053275418Sandrew strbge r3, [r0], #1 1054275418Sandrew ldrbgt r3, [r1], #1 1055275418Sandrew strbgt r3, [r0], #1 1056129254Scognet subs r2, r2, r12 1057129254Scognet blt .Lmemcpy_l4 /* less the 4 bytes */ 1058129254Scognet 1059129254Scognet ands r12, r1, #3 1060129254Scognet beq .Lmemcpy_t8 /* we have an aligned source */ 1061129254Scognet 1062129254Scognet /* erg - unaligned source */ 1063129254Scognet /* This is where it gets nasty ... */ 1064129254Scognet.Lmemcpy_srcul: 1065129254Scognet bic r1, r1, #3 1066129254Scognet ldr lr, [r1], #4 1067129254Scognet cmp r12, #2 1068129254Scognet bgt .Lmemcpy_srcul3 1069129254Scognet beq .Lmemcpy_srcul2 1070236991Simp cmp r2, #0x0c 1071129254Scognet blt .Lmemcpy_srcul1loop4 1072236991Simp sub r2, r2, #0x0c 1073129254Scognet stmdb sp!, {r4, r5} 1074129254Scognet 1075129254Scognet.Lmemcpy_srcul1loop16: 1076129254Scognet mov r3, lr, lsr #8 1077129254Scognet ldmia r1!, {r4, r5, r12, lr} 1078129254Scognet orr r3, r3, r4, lsl #24 1079129254Scognet mov r4, r4, lsr #8 1080129254Scognet orr r4, r4, r5, lsl #24 1081129254Scognet mov r5, r5, lsr #8 1082129254Scognet orr r5, r5, r12, lsl #24 1083129254Scognet mov r12, r12, lsr #8 1084129254Scognet orr r12, r12, lr, lsl #24 1085129254Scognet stmia r0!, {r3-r5, r12} 1086236991Simp subs r2, r2, #0x10 1087129254Scognet bge .Lmemcpy_srcul1loop16 1088129254Scognet ldmia sp!, {r4, r5} 1089236991Simp adds r2, r2, #0x0c 1090129254Scognet blt .Lmemcpy_srcul1l4 1091129254Scognet 1092129254Scognet.Lmemcpy_srcul1loop4: 1093129254Scognet mov r12, lr, lsr #8 1094129254Scognet ldr lr, [r1], #4 1095129254Scognet orr r12, r12, lr, lsl #24 1096129254Scognet str r12, [r0], #4 1097129254Scognet subs r2, r2, #4 1098129254Scognet bge .Lmemcpy_srcul1loop4 1099129254Scognet 1100129254Scognet.Lmemcpy_srcul1l4: 1101129254Scognet sub r1, r1, #3 1102129254Scognet b .Lmemcpy_l4 1103129254Scognet 1104129254Scognet.Lmemcpy_srcul2: 1105236991Simp cmp r2, #0x0c 1106129254Scognet blt .Lmemcpy_srcul2loop4 1107236991Simp sub r2, r2, #0x0c 1108129254Scognet stmdb sp!, {r4, r5} 1109129254Scognet 1110129254Scognet.Lmemcpy_srcul2loop16: 1111129254Scognet mov r3, lr, lsr #16 1112129254Scognet ldmia r1!, {r4, r5, r12, lr} 1113129254Scognet orr r3, r3, r4, lsl #16 1114129254Scognet mov r4, r4, lsr #16 1115129254Scognet orr r4, r4, r5, lsl #16 1116129254Scognet mov r5, r5, lsr #16 1117129254Scognet orr r5, r5, r12, lsl #16 1118129254Scognet mov r12, r12, lsr #16 1119129254Scognet orr r12, r12, lr, lsl #16 1120129254Scognet stmia r0!, {r3-r5, r12} 1121236991Simp subs r2, r2, #0x10 1122129254Scognet bge .Lmemcpy_srcul2loop16 1123129254Scognet ldmia sp!, {r4, r5} 1124236991Simp adds r2, r2, #0x0c 1125129254Scognet blt .Lmemcpy_srcul2l4 1126129254Scognet 1127129254Scognet.Lmemcpy_srcul2loop4: 1128129254Scognet mov r12, lr, lsr #16 1129129254Scognet ldr lr, [r1], #4 1130129254Scognet orr r12, r12, lr, lsl #16 1131129254Scognet str r12, [r0], #4 1132129254Scognet subs r2, r2, #4 1133129254Scognet bge .Lmemcpy_srcul2loop4 1134129254Scognet 1135129254Scognet.Lmemcpy_srcul2l4: 1136129254Scognet sub r1, r1, #2 1137129254Scognet b .Lmemcpy_l4 1138129254Scognet 1139129254Scognet.Lmemcpy_srcul3: 1140236991Simp cmp r2, #0x0c 1141129254Scognet blt .Lmemcpy_srcul3loop4 1142236991Simp sub r2, r2, #0x0c 1143129254Scognet stmdb sp!, {r4, r5} 1144129254Scognet 1145129254Scognet.Lmemcpy_srcul3loop16: 1146129254Scognet mov r3, lr, lsr #24 1147129254Scognet ldmia r1!, {r4, r5, r12, lr} 1148129254Scognet orr r3, r3, r4, lsl #8 1149129254Scognet mov r4, r4, lsr #24 1150129254Scognet orr r4, r4, r5, lsl #8 1151129254Scognet mov r5, r5, lsr #24 1152129254Scognet orr r5, r5, r12, lsl #8 1153129254Scognet mov r12, r12, lsr #24 1154129254Scognet orr r12, r12, lr, lsl #8 1155129254Scognet stmia r0!, {r3-r5, r12} 1156236991Simp subs r2, r2, #0x10 1157129254Scognet bge .Lmemcpy_srcul3loop16 1158129254Scognet ldmia sp!, {r4, r5} 1159236991Simp adds r2, r2, #0x0c 1160129254Scognet blt .Lmemcpy_srcul3l4 1161129254Scognet 1162129254Scognet.Lmemcpy_srcul3loop4: 1163129254Scognet mov r12, lr, lsr #24 1164129254Scognet ldr lr, [r1], #4 1165129254Scognet orr r12, r12, lr, lsl #8 1166129254Scognet str r12, [r0], #4 1167129254Scognet subs r2, r2, #4 1168129254Scognet bge .Lmemcpy_srcul3loop4 1169129254Scognet 1170129254Scognet.Lmemcpy_srcul3l4: 1171129254Scognet sub r1, r1, #1 1172129254Scognet b .Lmemcpy_l4 1173248361SandrewEND(memcpy) 1174248361Sandrew 1175129254Scognet#else 1176129254Scognet/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 1177129254ScognetENTRY(memcpy) 1178129254Scognet pld [r1] 1179129254Scognet cmp r2, #0x0c 1180129254Scognet ble .Lmemcpy_short /* <= 12 bytes */ 1181167003Scognet#ifdef FLASHADDR 1182167003Scognet#if FLASHADDR > PHYSADDR 1183167003Scognet ldr r3, =FLASHADDR 1184167003Scognet cmp r3, pc 1185167003Scognet bls .Lnormal 1186167003Scognet#else 1187167003Scognet ldr r3, =FLASHADDR 1188167003Scognet cmp r3, pc 1189167003Scognet bhi .Lnormal 1190167003Scognet#endif 1191167003Scognet#endif 1192150864Scognet ldr r3, .L_arm_memcpy 1193150864Scognet ldr r3, [r3] 1194150864Scognet cmp r3, #0 1195150864Scognet beq .Lnormal 1196150864Scognet ldr r3, .L_min_memcpy_size 1197150864Scognet ldr r3, [r3] 1198150864Scognet cmp r2, r3 1199150864Scognet blt .Lnormal 1200150864Scognet stmfd sp!, {r0-r2, r4, lr} 1201150864Scognet mov r3, #0 1202150864Scognet ldr r4, .L_arm_memcpy 1203150864Scognet mov lr, pc 1204150864Scognet ldr pc, [r4] 1205150864Scognet cmp r0, #0 1206150864Scognet ldmfd sp!, {r0-r2, r4, lr} 1207150864Scognet RETeq 1208150864Scognet.Lnormal: 1209129254Scognet mov r3, r0 /* We must not clobber r0 */ 1210129254Scognet 1211129254Scognet /* Word-align the destination buffer */ 1212129254Scognet ands ip, r3, #0x03 /* Already word aligned? */ 1213129254Scognet beq .Lmemcpy_wordaligned /* Yup */ 1214129254Scognet cmp ip, #0x02 1215129254Scognet ldrb ip, [r1], #0x01 1216129254Scognet sub r2, r2, #0x01 1217129254Scognet strb ip, [r3], #0x01 1218275418Sandrew ldrble ip, [r1], #0x01 1219129254Scognet suble r2, r2, #0x01 1220275418Sandrew strble ip, [r3], #0x01 1221275418Sandrew ldrblt ip, [r1], #0x01 1222129254Scognet sublt r2, r2, #0x01 1223275418Sandrew strblt ip, [r3], #0x01 1224129254Scognet 1225129254Scognet /* Destination buffer is now word aligned */ 1226129254Scognet.Lmemcpy_wordaligned: 1227129254Scognet ands ip, r1, #0x03 /* Is src also word-aligned? */ 1228129254Scognet bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 1229129254Scognet 1230129254Scognet /* Quad-align the destination buffer */ 1231129254Scognet tst r3, #0x07 /* Already quad aligned? */ 1232129254Scognet ldrne ip, [r1], #0x04 1233129254Scognet stmfd sp!, {r4-r9} /* Free up some registers */ 1234129254Scognet subne r2, r2, #0x04 1235129254Scognet strne ip, [r3], #0x04 1236129254Scognet 1237129254Scognet /* Destination buffer quad aligned, source is at least word aligned */ 1238129254Scognet subs r2, r2, #0x80 1239129254Scognet blt .Lmemcpy_w_lessthan128 1240129254Scognet 1241129254Scognet /* Copy 128 bytes at a time */ 1242129254Scognet.Lmemcpy_w_loop128: 1243129254Scognet ldr r4, [r1], #0x04 /* LD:00-03 */ 1244129254Scognet ldr r5, [r1], #0x04 /* LD:04-07 */ 1245129254Scognet pld [r1, #0x18] /* Prefetch 0x20 */ 1246129254Scognet ldr r6, [r1], #0x04 /* LD:08-0b */ 1247129254Scognet ldr r7, [r1], #0x04 /* LD:0c-0f */ 1248129254Scognet ldr r8, [r1], #0x04 /* LD:10-13 */ 1249129254Scognet ldr r9, [r1], #0x04 /* LD:14-17 */ 1250129254Scognet strd r4, [r3], #0x08 /* ST:00-07 */ 1251129254Scognet ldr r4, [r1], #0x04 /* LD:18-1b */ 1252129254Scognet ldr r5, [r1], #0x04 /* LD:1c-1f */ 1253129254Scognet strd r6, [r3], #0x08 /* ST:08-0f */ 1254129254Scognet ldr r6, [r1], #0x04 /* LD:20-23 */ 1255129254Scognet ldr r7, [r1], #0x04 /* LD:24-27 */ 1256129254Scognet pld [r1, #0x18] /* Prefetch 0x40 */ 1257129254Scognet strd r8, [r3], #0x08 /* ST:10-17 */ 1258129254Scognet ldr r8, [r1], #0x04 /* LD:28-2b */ 1259129254Scognet ldr r9, [r1], #0x04 /* LD:2c-2f */ 1260129254Scognet strd r4, [r3], #0x08 /* ST:18-1f */ 1261129254Scognet ldr r4, [r1], #0x04 /* LD:30-33 */ 1262129254Scognet ldr r5, [r1], #0x04 /* LD:34-37 */ 1263129254Scognet strd r6, [r3], #0x08 /* ST:20-27 */ 1264129254Scognet ldr r6, [r1], #0x04 /* LD:38-3b */ 1265129254Scognet ldr r7, [r1], #0x04 /* LD:3c-3f */ 1266129254Scognet strd r8, [r3], #0x08 /* ST:28-2f */ 1267129254Scognet ldr r8, [r1], #0x04 /* LD:40-43 */ 1268129254Scognet ldr r9, [r1], #0x04 /* LD:44-47 */ 1269129254Scognet pld [r1, #0x18] /* Prefetch 0x60 */ 1270129254Scognet strd r4, [r3], #0x08 /* ST:30-37 */ 1271129254Scognet ldr r4, [r1], #0x04 /* LD:48-4b */ 1272129254Scognet ldr r5, [r1], #0x04 /* LD:4c-4f */ 1273129254Scognet strd r6, [r3], #0x08 /* ST:38-3f */ 1274129254Scognet ldr r6, [r1], #0x04 /* LD:50-53 */ 1275129254Scognet ldr r7, [r1], #0x04 /* LD:54-57 */ 1276129254Scognet strd r8, [r3], #0x08 /* ST:40-47 */ 1277129254Scognet ldr r8, [r1], #0x04 /* LD:58-5b */ 1278129254Scognet ldr r9, [r1], #0x04 /* LD:5c-5f */ 1279129254Scognet strd r4, [r3], #0x08 /* ST:48-4f */ 1280129254Scognet ldr r4, [r1], #0x04 /* LD:60-63 */ 1281129254Scognet ldr r5, [r1], #0x04 /* LD:64-67 */ 1282129254Scognet pld [r1, #0x18] /* Prefetch 0x80 */ 1283129254Scognet strd r6, [r3], #0x08 /* ST:50-57 */ 1284129254Scognet ldr r6, [r1], #0x04 /* LD:68-6b */ 1285129254Scognet ldr r7, [r1], #0x04 /* LD:6c-6f */ 1286129254Scognet strd r8, [r3], #0x08 /* ST:58-5f */ 1287129254Scognet ldr r8, [r1], #0x04 /* LD:70-73 */ 1288129254Scognet ldr r9, [r1], #0x04 /* LD:74-77 */ 1289129254Scognet strd r4, [r3], #0x08 /* ST:60-67 */ 1290129254Scognet ldr r4, [r1], #0x04 /* LD:78-7b */ 1291129254Scognet ldr r5, [r1], #0x04 /* LD:7c-7f */ 1292129254Scognet strd r6, [r3], #0x08 /* ST:68-6f */ 1293129254Scognet strd r8, [r3], #0x08 /* ST:70-77 */ 1294129254Scognet subs r2, r2, #0x80 1295129254Scognet strd r4, [r3], #0x08 /* ST:78-7f */ 1296129254Scognet bge .Lmemcpy_w_loop128 1297129254Scognet 1298129254Scognet.Lmemcpy_w_lessthan128: 1299129254Scognet adds r2, r2, #0x80 /* Adjust for extra sub */ 1300275418Sandrew ldmfdeq sp!, {r4-r9} 1301137463Scognet RETeq /* Return now if done */ 1302129254Scognet subs r2, r2, #0x20 1303129254Scognet blt .Lmemcpy_w_lessthan32 1304129254Scognet 1305129254Scognet /* Copy 32 bytes at a time */ 1306129254Scognet.Lmemcpy_w_loop32: 1307129254Scognet ldr r4, [r1], #0x04 1308129254Scognet ldr r5, [r1], #0x04 1309129254Scognet pld [r1, #0x18] 1310129254Scognet ldr r6, [r1], #0x04 1311129254Scognet ldr r7, [r1], #0x04 1312129254Scognet ldr r8, [r1], #0x04 1313129254Scognet ldr r9, [r1], #0x04 1314129254Scognet strd r4, [r3], #0x08 1315129254Scognet ldr r4, [r1], #0x04 1316129254Scognet ldr r5, [r1], #0x04 1317129254Scognet strd r6, [r3], #0x08 1318129254Scognet strd r8, [r3], #0x08 1319129254Scognet subs r2, r2, #0x20 1320129254Scognet strd r4, [r3], #0x08 1321129254Scognet bge .Lmemcpy_w_loop32 1322129254Scognet 1323129254Scognet.Lmemcpy_w_lessthan32: 1324129254Scognet adds r2, r2, #0x20 /* Adjust for extra sub */ 1325275418Sandrew ldmfdeq sp!, {r4-r9} 1326137463Scognet RETeq /* Return now if done */ 1327129254Scognet 1328129254Scognet and r4, r2, #0x18 1329129254Scognet rsbs r4, r4, #0x18 1330129254Scognet addne pc, pc, r4, lsl #1 1331129254Scognet nop 1332129254Scognet 1333129254Scognet /* At least 24 bytes remaining */ 1334129254Scognet ldr r4, [r1], #0x04 1335129254Scognet ldr r5, [r1], #0x04 1336129254Scognet sub r2, r2, #0x08 1337129254Scognet strd r4, [r3], #0x08 1338129254Scognet 1339129254Scognet /* At least 16 bytes remaining */ 1340129254Scognet ldr r4, [r1], #0x04 1341129254Scognet ldr r5, [r1], #0x04 1342129254Scognet sub r2, r2, #0x08 1343129254Scognet strd r4, [r3], #0x08 1344129254Scognet 1345129254Scognet /* At least 8 bytes remaining */ 1346129254Scognet ldr r4, [r1], #0x04 1347129254Scognet ldr r5, [r1], #0x04 1348129254Scognet subs r2, r2, #0x08 1349129254Scognet strd r4, [r3], #0x08 1350129254Scognet 1351129254Scognet /* Less than 8 bytes remaining */ 1352129254Scognet ldmfd sp!, {r4-r9} 1353137463Scognet RETeq /* Return now if done */ 1354129254Scognet subs r2, r2, #0x04 1355129254Scognet ldrge ip, [r1], #0x04 1356129254Scognet strge ip, [r3], #0x04 1357137463Scognet RETeq /* Return now if done */ 1358129254Scognet addlt r2, r2, #0x04 1359129254Scognet ldrb ip, [r1], #0x01 1360129254Scognet cmp r2, #0x02 1361275418Sandrew ldrbge r2, [r1], #0x01 1362129254Scognet strb ip, [r3], #0x01 1363275418Sandrew ldrbgt ip, [r1] 1364275418Sandrew strbge r2, [r3], #0x01 1365275418Sandrew strbgt ip, [r3] 1366137463Scognet RET 1367275522Sandrew/* Place a literal pool here for the above ldr instructions to use */ 1368275522Sandrew.ltorg 1369129254Scognet 1370129254Scognet 1371129254Scognet/* 1372129254Scognet * At this point, it has not been possible to word align both buffers. 1373129254Scognet * The destination buffer is word aligned, but the source buffer is not. 1374129254Scognet */ 1375129254Scognet.Lmemcpy_bad_align: 1376129254Scognet stmfd sp!, {r4-r7} 1377129254Scognet bic r1, r1, #0x03 1378129254Scognet cmp ip, #2 1379129254Scognet ldr ip, [r1], #0x04 1380129254Scognet bgt .Lmemcpy_bad3 1381129254Scognet beq .Lmemcpy_bad2 1382129254Scognet b .Lmemcpy_bad1 1383129254Scognet 1384129254Scognet.Lmemcpy_bad1_loop16: 1385129254Scognet#ifdef __ARMEB__ 1386129254Scognet mov r4, ip, lsl #8 1387129254Scognet#else 1388129254Scognet mov r4, ip, lsr #8 1389129254Scognet#endif 1390129254Scognet ldr r5, [r1], #0x04 1391129254Scognet pld [r1, #0x018] 1392129254Scognet ldr r6, [r1], #0x04 1393129254Scognet ldr r7, [r1], #0x04 1394129254Scognet ldr ip, [r1], #0x04 1395129254Scognet#ifdef __ARMEB__ 1396129254Scognet orr r4, r4, r5, lsr #24 1397129254Scognet mov r5, r5, lsl #8 1398129254Scognet orr r5, r5, r6, lsr #24 1399129254Scognet mov r6, r6, lsl #8 1400129254Scognet orr r6, r6, r7, lsr #24 1401129254Scognet mov r7, r7, lsl #8 1402129254Scognet orr r7, r7, ip, lsr #24 1403129254Scognet#else 1404129254Scognet orr r4, r4, r5, lsl #24 1405129254Scognet mov r5, r5, lsr #8 1406129254Scognet orr r5, r5, r6, lsl #24 1407129254Scognet mov r6, r6, lsr #8 1408129254Scognet orr r6, r6, r7, lsl #24 1409129254Scognet mov r7, r7, lsr #8 1410129254Scognet orr r7, r7, ip, lsl #24 1411129254Scognet#endif 1412129254Scognet str r4, [r3], #0x04 1413129254Scognet str r5, [r3], #0x04 1414129254Scognet str r6, [r3], #0x04 1415129254Scognet str r7, [r3], #0x04 1416129254Scognet.Lmemcpy_bad1: 1417236991Simp subs r2, r2, #0x10 1418129254Scognet bge .Lmemcpy_bad1_loop16 1419129254Scognet 1420236991Simp adds r2, r2, #0x10 1421275418Sandrew ldmfdeq sp!, {r4-r7} 1422137463Scognet RETeq /* Return now if done */ 1423129254Scognet subs r2, r2, #0x04 1424129254Scognet sublt r1, r1, #0x03 1425129254Scognet blt .Lmemcpy_bad_done 1426129254Scognet 1427129254Scognet.Lmemcpy_bad1_loop4: 1428129254Scognet#ifdef __ARMEB__ 1429129254Scognet mov r4, ip, lsl #8 1430129254Scognet#else 1431129254Scognet mov r4, ip, lsr #8 1432129254Scognet#endif 1433129254Scognet ldr ip, [r1], #0x04 1434129254Scognet subs r2, r2, #0x04 1435129254Scognet#ifdef __ARMEB__ 1436129254Scognet orr r4, r4, ip, lsr #24 1437129254Scognet#else 1438129254Scognet orr r4, r4, ip, lsl #24 1439129254Scognet#endif 1440129254Scognet str r4, [r3], #0x04 1441129254Scognet bge .Lmemcpy_bad1_loop4 1442129254Scognet sub r1, r1, #0x03 1443129254Scognet b .Lmemcpy_bad_done 1444129254Scognet 1445129254Scognet.Lmemcpy_bad2_loop16: 1446129254Scognet#ifdef __ARMEB__ 1447129254Scognet mov r4, ip, lsl #16 1448129254Scognet#else 1449129254Scognet mov r4, ip, lsr #16 1450129254Scognet#endif 1451129254Scognet ldr r5, [r1], #0x04 1452129254Scognet pld [r1, #0x018] 1453129254Scognet ldr r6, [r1], #0x04 1454129254Scognet ldr r7, [r1], #0x04 1455129254Scognet ldr ip, [r1], #0x04 1456129254Scognet#ifdef __ARMEB__ 1457129254Scognet orr r4, r4, r5, lsr #16 1458129254Scognet mov r5, r5, lsl #16 1459129254Scognet orr r5, r5, r6, lsr #16 1460129254Scognet mov r6, r6, lsl #16 1461129254Scognet orr r6, r6, r7, lsr #16 1462129254Scognet mov r7, r7, lsl #16 1463129254Scognet orr r7, r7, ip, lsr #16 1464129254Scognet#else 1465129254Scognet orr r4, r4, r5, lsl #16 1466129254Scognet mov r5, r5, lsr #16 1467129254Scognet orr r5, r5, r6, lsl #16 1468129254Scognet mov r6, r6, lsr #16 1469129254Scognet orr r6, r6, r7, lsl #16 1470129254Scognet mov r7, r7, lsr #16 1471129254Scognet orr r7, r7, ip, lsl #16 1472129254Scognet#endif 1473129254Scognet str r4, [r3], #0x04 1474129254Scognet str r5, [r3], #0x04 1475129254Scognet str r6, [r3], #0x04 1476129254Scognet str r7, [r3], #0x04 1477129254Scognet.Lmemcpy_bad2: 1478236991Simp subs r2, r2, #0x10 1479129254Scognet bge .Lmemcpy_bad2_loop16 1480129254Scognet 1481236991Simp adds r2, r2, #0x10 1482275418Sandrew ldmfdeq sp!, {r4-r7} 1483137463Scognet RETeq /* Return now if done */ 1484129254Scognet subs r2, r2, #0x04 1485129254Scognet sublt r1, r1, #0x02 1486129254Scognet blt .Lmemcpy_bad_done 1487129254Scognet 1488129254Scognet.Lmemcpy_bad2_loop4: 1489129254Scognet#ifdef __ARMEB__ 1490129254Scognet mov r4, ip, lsl #16 1491129254Scognet#else 1492129254Scognet mov r4, ip, lsr #16 1493129254Scognet#endif 1494129254Scognet ldr ip, [r1], #0x04 1495129254Scognet subs r2, r2, #0x04 1496129254Scognet#ifdef __ARMEB__ 1497129254Scognet orr r4, r4, ip, lsr #16 1498129254Scognet#else 1499129254Scognet orr r4, r4, ip, lsl #16 1500129254Scognet#endif 1501129254Scognet str r4, [r3], #0x04 1502129254Scognet bge .Lmemcpy_bad2_loop4 1503129254Scognet sub r1, r1, #0x02 1504129254Scognet b .Lmemcpy_bad_done 1505129254Scognet 1506129254Scognet.Lmemcpy_bad3_loop16: 1507129254Scognet#ifdef __ARMEB__ 1508129254Scognet mov r4, ip, lsl #24 1509129254Scognet#else 1510129254Scognet mov r4, ip, lsr #24 1511129254Scognet#endif 1512129254Scognet ldr r5, [r1], #0x04 1513129254Scognet pld [r1, #0x018] 1514129254Scognet ldr r6, [r1], #0x04 1515129254Scognet ldr r7, [r1], #0x04 1516129254Scognet ldr ip, [r1], #0x04 1517129254Scognet#ifdef __ARMEB__ 1518129254Scognet orr r4, r4, r5, lsr #8 1519129254Scognet mov r5, r5, lsl #24 1520129254Scognet orr r5, r5, r6, lsr #8 1521129254Scognet mov r6, r6, lsl #24 1522129254Scognet orr r6, r6, r7, lsr #8 1523129254Scognet mov r7, r7, lsl #24 1524129254Scognet orr r7, r7, ip, lsr #8 1525129254Scognet#else 1526129254Scognet orr r4, r4, r5, lsl #8 1527129254Scognet mov r5, r5, lsr #24 1528129254Scognet orr r5, r5, r6, lsl #8 1529129254Scognet mov r6, r6, lsr #24 1530129254Scognet orr r6, r6, r7, lsl #8 1531129254Scognet mov r7, r7, lsr #24 1532129254Scognet orr r7, r7, ip, lsl #8 1533129254Scognet#endif 1534129254Scognet str r4, [r3], #0x04 1535129254Scognet str r5, [r3], #0x04 1536129254Scognet str r6, [r3], #0x04 1537129254Scognet str r7, [r3], #0x04 1538129254Scognet.Lmemcpy_bad3: 1539236991Simp subs r2, r2, #0x10 1540129254Scognet bge .Lmemcpy_bad3_loop16 1541129254Scognet 1542236991Simp adds r2, r2, #0x10 1543275418Sandrew ldmfdeq sp!, {r4-r7} 1544137463Scognet RETeq /* Return now if done */ 1545129254Scognet subs r2, r2, #0x04 1546129254Scognet sublt r1, r1, #0x01 1547129254Scognet blt .Lmemcpy_bad_done 1548129254Scognet 1549129254Scognet.Lmemcpy_bad3_loop4: 1550129254Scognet#ifdef __ARMEB__ 1551129254Scognet mov r4, ip, lsl #24 1552129254Scognet#else 1553129254Scognet mov r4, ip, lsr #24 1554129254Scognet#endif 1555129254Scognet ldr ip, [r1], #0x04 1556129254Scognet subs r2, r2, #0x04 1557129254Scognet#ifdef __ARMEB__ 1558129254Scognet orr r4, r4, ip, lsr #8 1559129254Scognet#else 1560129254Scognet orr r4, r4, ip, lsl #8 1561129254Scognet#endif 1562129254Scognet str r4, [r3], #0x04 1563129254Scognet bge .Lmemcpy_bad3_loop4 1564129254Scognet sub r1, r1, #0x01 1565129254Scognet 1566129254Scognet.Lmemcpy_bad_done: 1567129254Scognet ldmfd sp!, {r4-r7} 1568129254Scognet adds r2, r2, #0x04 1569137463Scognet RETeq 1570129254Scognet ldrb ip, [r1], #0x01 1571129254Scognet cmp r2, #0x02 1572275418Sandrew ldrbge r2, [r1], #0x01 1573129254Scognet strb ip, [r3], #0x01 1574275418Sandrew ldrbgt ip, [r1] 1575275418Sandrew strbge r2, [r3], #0x01 1576275418Sandrew strbgt ip, [r3] 1577137463Scognet RET 1578129254Scognet 1579129254Scognet 1580129254Scognet/* 1581129254Scognet * Handle short copies (less than 16 bytes), possibly misaligned. 1582129254Scognet * Some of these are *very* common, thanks to the network stack, 1583129254Scognet * and so are handled specially. 1584129254Scognet */ 1585129254Scognet.Lmemcpy_short: 1586129254Scognet add pc, pc, r2, lsl #2 1587129254Scognet nop 1588137463Scognet RET /* 0x00 */ 1589129254Scognet b .Lmemcpy_bytewise /* 0x01 */ 1590129254Scognet b .Lmemcpy_bytewise /* 0x02 */ 1591129254Scognet b .Lmemcpy_bytewise /* 0x03 */ 1592129254Scognet b .Lmemcpy_4 /* 0x04 */ 1593129254Scognet b .Lmemcpy_bytewise /* 0x05 */ 1594129254Scognet b .Lmemcpy_6 /* 0x06 */ 1595129254Scognet b .Lmemcpy_bytewise /* 0x07 */ 1596129254Scognet b .Lmemcpy_8 /* 0x08 */ 1597129254Scognet b .Lmemcpy_bytewise /* 0x09 */ 1598129254Scognet b .Lmemcpy_bytewise /* 0x0a */ 1599129254Scognet b .Lmemcpy_bytewise /* 0x0b */ 1600129254Scognet b .Lmemcpy_c /* 0x0c */ 1601129254Scognet.Lmemcpy_bytewise: 1602129254Scognet mov r3, r0 /* We must not clobber r0 */ 1603129254Scognet ldrb ip, [r1], #0x01 1604129254Scognet1: subs r2, r2, #0x01 1605129254Scognet strb ip, [r3], #0x01 1606275418Sandrew ldrbne ip, [r1], #0x01 1607129254Scognet bne 1b 1608137463Scognet RET 1609129254Scognet 1610129254Scognet/****************************************************************************** 1611129254Scognet * Special case for 4 byte copies 1612129254Scognet */ 1613129254Scognet#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1614129254Scognet#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1615129254Scognet LMEMCPY_4_PAD 1616129254Scognet.Lmemcpy_4: 1617129254Scognet and r2, r1, #0x03 1618129254Scognet orr r2, r2, r0, lsl #2 1619129254Scognet ands r2, r2, #0x0f 1620129254Scognet sub r3, pc, #0x14 1621129254Scognet addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1622129254Scognet 1623129254Scognet/* 1624129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned 1625129254Scognet */ 1626129254Scognet ldr r2, [r1] 1627129254Scognet str r2, [r0] 1628137463Scognet RET 1629129254Scognet LMEMCPY_4_PAD 1630129254Scognet 1631129254Scognet/* 1632129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned 1633129254Scognet */ 1634129254Scognet ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1635129254Scognet ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1636129254Scognet#ifdef __ARMEB__ 1637129254Scognet mov r3, r3, lsl #8 /* r3 = 012. */ 1638129254Scognet orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1639129254Scognet#else 1640129254Scognet mov r3, r3, lsr #8 /* r3 = .210 */ 1641129254Scognet orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1642129254Scognet#endif 1643129254Scognet str r3, [r0] 1644137463Scognet RET 1645129254Scognet LMEMCPY_4_PAD 1646129254Scognet 1647129254Scognet/* 1648129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned 1649129254Scognet */ 1650129254Scognet#ifdef __ARMEB__ 1651129254Scognet ldrh r3, [r1] 1652129254Scognet ldrh r2, [r1, #0x02] 1653129254Scognet#else 1654129254Scognet ldrh r3, [r1, #0x02] 1655129254Scognet ldrh r2, [r1] 1656129254Scognet#endif 1657129254Scognet orr r3, r2, r3, lsl #16 1658129254Scognet str r3, [r0] 1659137463Scognet RET 1660129254Scognet LMEMCPY_4_PAD 1661129254Scognet 1662129254Scognet/* 1663129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned 1664129254Scognet */ 1665129254Scognet ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1666129254Scognet ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1667129254Scognet#ifdef __ARMEB__ 1668129254Scognet mov r3, r3, lsl #24 /* r3 = 0... */ 1669129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1670129254Scognet#else 1671129254Scognet mov r3, r3, lsr #24 /* r3 = ...0 */ 1672129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1673129254Scognet#endif 1674129254Scognet str r3, [r0] 1675137463Scognet RET 1676129254Scognet LMEMCPY_4_PAD 1677129254Scognet 1678129254Scognet/* 1679129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned 1680129254Scognet */ 1681129254Scognet ldr r2, [r1] 1682129254Scognet#ifdef __ARMEB__ 1683129254Scognet strb r2, [r0, #0x03] 1684129254Scognet mov r3, r2, lsr #8 1685129254Scognet mov r1, r2, lsr #24 1686129254Scognet strb r1, [r0] 1687129254Scognet#else 1688129254Scognet strb r2, [r0] 1689129254Scognet mov r3, r2, lsr #8 1690129254Scognet mov r1, r2, lsr #24 1691129254Scognet strb r1, [r0, #0x03] 1692129254Scognet#endif 1693129254Scognet strh r3, [r0, #0x01] 1694137463Scognet RET 1695129254Scognet LMEMCPY_4_PAD 1696129254Scognet 1697129254Scognet/* 1698129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned 1699129254Scognet */ 1700129254Scognet ldrb r2, [r1] 1701129254Scognet ldrh r3, [r1, #0x01] 1702129254Scognet ldrb r1, [r1, #0x03] 1703129254Scognet strb r2, [r0] 1704129254Scognet strh r3, [r0, #0x01] 1705129254Scognet strb r1, [r0, #0x03] 1706137463Scognet RET 1707129254Scognet LMEMCPY_4_PAD 1708129254Scognet 1709129254Scognet/* 1710129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned 1711129254Scognet */ 1712129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1713129254Scognet ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1714129254Scognet#ifdef __ARMEB__ 1715129254Scognet mov r1, r2, lsr #8 /* r1 = ...0 */ 1716129254Scognet strb r1, [r0] 1717129254Scognet mov r2, r2, lsl #8 /* r2 = .01. */ 1718129254Scognet orr r2, r2, r3, lsr #8 /* r2 = .012 */ 1719129254Scognet#else 1720129254Scognet strb r2, [r0] 1721129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 1722129254Scognet orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1723129254Scognet mov r3, r3, lsr #8 /* r3 = ...3 */ 1724129254Scognet#endif 1725129254Scognet strh r2, [r0, #0x01] 1726129254Scognet strb r3, [r0, #0x03] 1727137463Scognet RET 1728129254Scognet LMEMCPY_4_PAD 1729129254Scognet 1730129254Scognet/* 1731129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned 1732129254Scognet */ 1733129254Scognet ldrb r2, [r1] 1734129254Scognet ldrh r3, [r1, #0x01] 1735129254Scognet ldrb r1, [r1, #0x03] 1736129254Scognet strb r2, [r0] 1737129254Scognet strh r3, [r0, #0x01] 1738129254Scognet strb r1, [r0, #0x03] 1739137463Scognet RET 1740129254Scognet LMEMCPY_4_PAD 1741129254Scognet 1742129254Scognet/* 1743129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned 1744129254Scognet */ 1745129254Scognet ldr r2, [r1] 1746129254Scognet#ifdef __ARMEB__ 1747129254Scognet strh r2, [r0, #0x02] 1748129254Scognet mov r3, r2, lsr #16 1749129254Scognet strh r3, [r0] 1750129254Scognet#else 1751129254Scognet strh r2, [r0] 1752129254Scognet mov r3, r2, lsr #16 1753129254Scognet strh r3, [r0, #0x02] 1754129254Scognet#endif 1755137463Scognet RET 1756129254Scognet LMEMCPY_4_PAD 1757129254Scognet 1758129254Scognet/* 1759129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned 1760129254Scognet */ 1761129254Scognet ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1762129254Scognet ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1763129254Scognet mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1764129254Scognet strh r1, [r0] 1765129254Scognet#ifdef __ARMEB__ 1766129254Scognet mov r2, r2, lsl #8 /* r2 = 012. */ 1767129254Scognet orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1768129254Scognet#else 1769129254Scognet mov r2, r2, lsr #24 /* r2 = ...2 */ 1770129254Scognet orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1771129254Scognet#endif 1772129254Scognet strh r2, [r0, #0x02] 1773137463Scognet RET 1774129254Scognet LMEMCPY_4_PAD 1775129254Scognet 1776129254Scognet/* 1777129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned 1778129254Scognet */ 1779129254Scognet ldrh r2, [r1] 1780129254Scognet ldrh r3, [r1, #0x02] 1781129254Scognet strh r2, [r0] 1782129254Scognet strh r3, [r0, #0x02] 1783137463Scognet RET 1784129254Scognet LMEMCPY_4_PAD 1785129254Scognet 1786129254Scognet/* 1787129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned 1788129254Scognet */ 1789129254Scognet ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1790129254Scognet ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1791129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1792129254Scognet strh r1, [r0, #0x02] 1793129254Scognet#ifdef __ARMEB__ 1794129254Scognet mov r3, r3, lsr #24 /* r3 = ...1 */ 1795129254Scognet orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 1796129254Scognet#else 1797129254Scognet mov r3, r3, lsl #8 /* r3 = 321. */ 1798129254Scognet orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1799129254Scognet#endif 1800129254Scognet strh r3, [r0] 1801137463Scognet RET 1802129254Scognet LMEMCPY_4_PAD 1803129254Scognet 1804129254Scognet/* 1805129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned 1806129254Scognet */ 1807129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1808129254Scognet#ifdef __ARMEB__ 1809129254Scognet strb r2, [r0, #0x03] 1810129254Scognet mov r3, r2, lsr #8 1811129254Scognet mov r1, r2, lsr #24 1812129254Scognet strh r3, [r0, #0x01] 1813129254Scognet strb r1, [r0] 1814129254Scognet#else 1815129254Scognet strb r2, [r0] 1816129254Scognet mov r3, r2, lsr #8 1817129254Scognet mov r1, r2, lsr #24 1818129254Scognet strh r3, [r0, #0x01] 1819129254Scognet strb r1, [r0, #0x03] 1820129254Scognet#endif 1821137463Scognet RET 1822129254Scognet LMEMCPY_4_PAD 1823129254Scognet 1824129254Scognet/* 1825129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned 1826129254Scognet */ 1827129254Scognet ldrb r2, [r1] 1828129254Scognet ldrh r3, [r1, #0x01] 1829129254Scognet ldrb r1, [r1, #0x03] 1830129254Scognet strb r2, [r0] 1831129254Scognet strh r3, [r0, #0x01] 1832129254Scognet strb r1, [r0, #0x03] 1833137463Scognet RET 1834129254Scognet LMEMCPY_4_PAD 1835129254Scognet 1836129254Scognet/* 1837129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned 1838129254Scognet */ 1839129254Scognet#ifdef __ARMEB__ 1840129254Scognet ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1841129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1842129254Scognet strb r3, [r0, #0x03] 1843129254Scognet mov r3, r3, lsr #8 /* r3 = ...2 */ 1844129254Scognet orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 1845129254Scognet strh r3, [r0, #0x01] 1846129254Scognet mov r2, r2, lsr #8 /* r2 = ...0 */ 1847129254Scognet strb r2, [r0] 1848129254Scognet#else 1849129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1850129254Scognet ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1851129254Scognet strb r2, [r0] 1852129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 1853129254Scognet orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1854129254Scognet strh r2, [r0, #0x01] 1855129254Scognet mov r3, r3, lsr #8 /* r3 = ...3 */ 1856129254Scognet strb r3, [r0, #0x03] 1857129254Scognet#endif 1858137463Scognet RET 1859129254Scognet LMEMCPY_4_PAD 1860129254Scognet 1861129254Scognet/* 1862129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned 1863129254Scognet */ 1864129254Scognet ldrb r2, [r1] 1865129254Scognet ldrh r3, [r1, #0x01] 1866129254Scognet ldrb r1, [r1, #0x03] 1867129254Scognet strb r2, [r0] 1868129254Scognet strh r3, [r0, #0x01] 1869129254Scognet strb r1, [r0, #0x03] 1870137463Scognet RET 1871129254Scognet LMEMCPY_4_PAD 1872129254Scognet 1873129254Scognet 1874129254Scognet/****************************************************************************** 1875129254Scognet * Special case for 6 byte copies 1876129254Scognet */ 1877129254Scognet#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1878129254Scognet#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1879129254Scognet LMEMCPY_6_PAD 1880129254Scognet.Lmemcpy_6: 1881129254Scognet and r2, r1, #0x03 1882129254Scognet orr r2, r2, r0, lsl #2 1883129254Scognet ands r2, r2, #0x0f 1884129254Scognet sub r3, pc, #0x14 1885129254Scognet addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1886129254Scognet 1887129254Scognet/* 1888129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned 1889129254Scognet */ 1890129254Scognet ldr r2, [r1] 1891129254Scognet ldrh r3, [r1, #0x04] 1892129254Scognet str r2, [r0] 1893129254Scognet strh r3, [r0, #0x04] 1894137463Scognet RET 1895129254Scognet LMEMCPY_6_PAD 1896129254Scognet 1897129254Scognet/* 1898129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned 1899129254Scognet */ 1900129254Scognet ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1901129254Scognet ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1902129254Scognet#ifdef __ARMEB__ 1903129254Scognet mov r2, r2, lsl #8 /* r2 = 012. */ 1904129254Scognet orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1905129254Scognet#else 1906129254Scognet mov r2, r2, lsr #8 /* r2 = .210 */ 1907129254Scognet orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1908129254Scognet#endif 1909129254Scognet mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1910129254Scognet str r2, [r0] 1911129254Scognet strh r3, [r0, #0x04] 1912137463Scognet RET 1913129254Scognet LMEMCPY_6_PAD 1914129254Scognet 1915129254Scognet/* 1916129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned 1917129254Scognet */ 1918129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1919129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1920129254Scognet#ifdef __ARMEB__ 1921129254Scognet mov r1, r3, lsr #16 /* r1 = ..23 */ 1922129254Scognet orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 1923129254Scognet str r1, [r0] 1924129254Scognet strh r3, [r0, #0x04] 1925129254Scognet#else 1926129254Scognet mov r1, r3, lsr #16 /* r1 = ..54 */ 1927129254Scognet orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1928129254Scognet str r2, [r0] 1929129254Scognet strh r1, [r0, #0x04] 1930129254Scognet#endif 1931137463Scognet RET 1932129254Scognet LMEMCPY_6_PAD 1933129254Scognet 1934129254Scognet/* 1935129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned 1936129254Scognet */ 1937129254Scognet ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1938129254Scognet ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1939129254Scognet ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1940129254Scognet#ifdef __ARMEB__ 1941129254Scognet mov r2, r2, lsl #24 /* r2 = 0... */ 1942129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1943129254Scognet mov r3, r3, lsl #8 /* r3 = 234. */ 1944129254Scognet orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 1945129254Scognet#else 1946129254Scognet mov r2, r2, lsr #24 /* r2 = ...0 */ 1947129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1948129254Scognet mov r1, r1, lsl #8 /* r1 = xx5. */ 1949129254Scognet orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1950129254Scognet#endif 1951129254Scognet str r2, [r0] 1952129254Scognet strh r1, [r0, #0x04] 1953137463Scognet RET 1954129254Scognet LMEMCPY_6_PAD 1955129254Scognet 1956129254Scognet/* 1957129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned 1958129254Scognet */ 1959129254Scognet ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1960129254Scognet ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1961129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1962129254Scognet strh r1, [r0, #0x01] 1963129254Scognet#ifdef __ARMEB__ 1964129254Scognet mov r1, r3, lsr #24 /* r1 = ...0 */ 1965129254Scognet strb r1, [r0] 1966129254Scognet mov r3, r3, lsl #8 /* r3 = 123. */ 1967129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 1968129254Scognet#else 1969129254Scognet strb r3, [r0] 1970129254Scognet mov r3, r3, lsr #24 /* r3 = ...3 */ 1971129254Scognet orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1972129254Scognet mov r2, r2, lsr #8 /* r2 = ...5 */ 1973129254Scognet#endif 1974129254Scognet strh r3, [r0, #0x03] 1975129254Scognet strb r2, [r0, #0x05] 1976137463Scognet RET 1977129254Scognet LMEMCPY_6_PAD 1978129254Scognet 1979129254Scognet/* 1980129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned 1981129254Scognet */ 1982129254Scognet ldrb r2, [r1] 1983129254Scognet ldrh r3, [r1, #0x01] 1984129254Scognet ldrh ip, [r1, #0x03] 1985129254Scognet ldrb r1, [r1, #0x05] 1986129254Scognet strb r2, [r0] 1987129254Scognet strh r3, [r0, #0x01] 1988129254Scognet strh ip, [r0, #0x03] 1989129254Scognet strb r1, [r0, #0x05] 1990137463Scognet RET 1991129254Scognet LMEMCPY_6_PAD 1992129254Scognet 1993129254Scognet/* 1994129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned 1995129254Scognet */ 1996129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1997129254Scognet ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1998129254Scognet#ifdef __ARMEB__ 1999129254Scognet mov r3, r2, lsr #8 /* r3 = ...0 */ 2000129254Scognet strb r3, [r0] 2001129254Scognet strb r1, [r0, #0x05] 2002129254Scognet mov r3, r1, lsr #8 /* r3 = .234 */ 2003129254Scognet strh r3, [r0, #0x03] 2004129254Scognet mov r3, r2, lsl #8 /* r3 = .01. */ 2005129254Scognet orr r3, r3, r1, lsr #24 /* r3 = .012 */ 2006129254Scognet strh r3, [r0, #0x01] 2007129254Scognet#else 2008129254Scognet strb r2, [r0] 2009129254Scognet mov r3, r1, lsr #24 2010129254Scognet strb r3, [r0, #0x05] 2011129254Scognet mov r3, r1, lsr #8 /* r3 = .543 */ 2012129254Scognet strh r3, [r0, #0x03] 2013129254Scognet mov r3, r2, lsr #8 /* r3 = ...1 */ 2014129254Scognet orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 2015129254Scognet strh r3, [r0, #0x01] 2016129254Scognet#endif 2017137463Scognet RET 2018129254Scognet LMEMCPY_6_PAD 2019129254Scognet 2020129254Scognet/* 2021129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned 2022129254Scognet */ 2023129254Scognet ldrb r2, [r1] 2024129254Scognet ldrh r3, [r1, #0x01] 2025129254Scognet ldrh ip, [r1, #0x03] 2026129254Scognet ldrb r1, [r1, #0x05] 2027129254Scognet strb r2, [r0] 2028129254Scognet strh r3, [r0, #0x01] 2029129254Scognet strh ip, [r0, #0x03] 2030129254Scognet strb r1, [r0, #0x05] 2031137463Scognet RET 2032129254Scognet LMEMCPY_6_PAD 2033129254Scognet 2034129254Scognet/* 2035129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned 2036129254Scognet */ 2037129254Scognet#ifdef __ARMEB__ 2038129254Scognet ldr r2, [r1] /* r2 = 0123 */ 2039129254Scognet ldrh r3, [r1, #0x04] /* r3 = ..45 */ 2040129254Scognet mov r1, r2, lsr #16 /* r1 = ..01 */ 2041129254Scognet orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 2042129254Scognet strh r1, [r0] 2043129254Scognet str r3, [r0, #0x02] 2044129254Scognet#else 2045129254Scognet ldrh r2, [r1, #0x04] /* r2 = ..54 */ 2046129254Scognet ldr r3, [r1] /* r3 = 3210 */ 2047129254Scognet mov r2, r2, lsl #16 /* r2 = 54.. */ 2048129254Scognet orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 2049129254Scognet strh r3, [r0] 2050129254Scognet str r2, [r0, #0x02] 2051129254Scognet#endif 2052137463Scognet RET 2053129254Scognet LMEMCPY_6_PAD 2054129254Scognet 2055129254Scognet/* 2056129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned 2057129254Scognet */ 2058129254Scognet ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2059129254Scognet ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 2060129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2061129254Scognet#ifdef __ARMEB__ 2062129254Scognet mov r2, r2, lsr #8 /* r2 = .345 */ 2063129254Scognet orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 2064129254Scognet#else 2065129254Scognet mov r2, r2, lsl #8 /* r2 = 543. */ 2066129254Scognet orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 2067129254Scognet#endif 2068129254Scognet strh r1, [r0] 2069129254Scognet str r2, [r0, #0x02] 2070137463Scognet RET 2071129254Scognet LMEMCPY_6_PAD 2072129254Scognet 2073129254Scognet/* 2074129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned 2075129254Scognet */ 2076129254Scognet ldrh r2, [r1] 2077129254Scognet ldr r3, [r1, #0x02] 2078129254Scognet strh r2, [r0] 2079129254Scognet str r3, [r0, #0x02] 2080137463Scognet RET 2081129254Scognet LMEMCPY_6_PAD 2082129254Scognet 2083129254Scognet/* 2084129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned 2085129254Scognet */ 2086129254Scognet ldrb r3, [r1] /* r3 = ...0 */ 2087129254Scognet ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2088129254Scognet ldrb r1, [r1, #0x05] /* r1 = ...5 */ 2089129254Scognet#ifdef __ARMEB__ 2090129254Scognet mov r3, r3, lsl #8 /* r3 = ..0. */ 2091129254Scognet orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 2092129254Scognet orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 2093129254Scognet#else 2094129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2095129254Scognet mov r1, r1, lsl #24 /* r1 = 5... */ 2096129254Scognet orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 2097129254Scognet#endif 2098129254Scognet strh r3, [r0] 2099129254Scognet str r1, [r0, #0x02] 2100137463Scognet RET 2101129254Scognet LMEMCPY_6_PAD 2102129254Scognet 2103129254Scognet/* 2104129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned 2105129254Scognet */ 2106129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2107129254Scognet ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 2108129254Scognet#ifdef __ARMEB__ 2109129254Scognet mov r3, r2, lsr #24 /* r3 = ...0 */ 2110129254Scognet strb r3, [r0] 2111129254Scognet mov r2, r2, lsl #8 /* r2 = 123. */ 2112129254Scognet orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2113129254Scognet#else 2114129254Scognet strb r2, [r0] 2115129254Scognet mov r2, r2, lsr #8 /* r2 = .321 */ 2116129254Scognet orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 2117129254Scognet mov r1, r1, lsr #8 /* r1 = ...5 */ 2118129254Scognet#endif 2119129254Scognet str r2, [r0, #0x01] 2120129254Scognet strb r1, [r0, #0x05] 2121137463Scognet RET 2122129254Scognet LMEMCPY_6_PAD 2123129254Scognet 2124129254Scognet/* 2125129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned 2126129254Scognet */ 2127129254Scognet ldrb r2, [r1] 2128129254Scognet ldrh r3, [r1, #0x01] 2129129254Scognet ldrh ip, [r1, #0x03] 2130129254Scognet ldrb r1, [r1, #0x05] 2131129254Scognet strb r2, [r0] 2132129254Scognet strh r3, [r0, #0x01] 2133129254Scognet strh ip, [r0, #0x03] 2134129254Scognet strb r1, [r0, #0x05] 2135137463Scognet RET 2136129254Scognet LMEMCPY_6_PAD 2137129254Scognet 2138129254Scognet/* 2139129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned 2140129254Scognet */ 2141129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2142129254Scognet ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 2143129254Scognet#ifdef __ARMEB__ 2144129254Scognet mov r3, r2, lsr #8 /* r3 = ...0 */ 2145129254Scognet strb r3, [r0] 2146129254Scognet mov r2, r2, lsl #24 /* r2 = 1... */ 2147129254Scognet orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2148129254Scognet#else 2149129254Scognet strb r2, [r0] 2150129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 2151129254Scognet orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 2152129254Scognet mov r1, r1, lsr #24 /* r1 = ...5 */ 2153129254Scognet#endif 2154129254Scognet str r2, [r0, #0x01] 2155129254Scognet strb r1, [r0, #0x05] 2156137463Scognet RET 2157129254Scognet LMEMCPY_6_PAD 2158129254Scognet 2159129254Scognet/* 2160129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned 2161129254Scognet */ 2162129254Scognet ldrb r2, [r1] 2163129254Scognet ldr r3, [r1, #0x01] 2164129254Scognet ldrb r1, [r1, #0x05] 2165129254Scognet strb r2, [r0] 2166129254Scognet str r3, [r0, #0x01] 2167129254Scognet strb r1, [r0, #0x05] 2168137463Scognet RET 2169129254Scognet LMEMCPY_6_PAD 2170129254Scognet 2171129254Scognet 2172129254Scognet/****************************************************************************** 2173129254Scognet * Special case for 8 byte copies 2174129254Scognet */ 2175129254Scognet#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 2176129254Scognet#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 2177129254Scognet LMEMCPY_8_PAD 2178129254Scognet.Lmemcpy_8: 2179129254Scognet and r2, r1, #0x03 2180129254Scognet orr r2, r2, r0, lsl #2 2181129254Scognet ands r2, r2, #0x0f 2182129254Scognet sub r3, pc, #0x14 2183129254Scognet addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 2184129254Scognet 2185129254Scognet/* 2186129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned 2187129254Scognet */ 2188129254Scognet ldr r2, [r1] 2189129254Scognet ldr r3, [r1, #0x04] 2190129254Scognet str r2, [r0] 2191129254Scognet str r3, [r0, #0x04] 2192137463Scognet RET 2193129254Scognet LMEMCPY_8_PAD 2194129254Scognet 2195129254Scognet/* 2196129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned 2197129254Scognet */ 2198129254Scognet ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2199129254Scognet ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 2200129254Scognet ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2201129254Scognet#ifdef __ARMEB__ 2202129254Scognet mov r3, r3, lsl #8 /* r3 = 012. */ 2203129254Scognet orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 2204129254Scognet orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 2205129254Scognet#else 2206129254Scognet mov r3, r3, lsr #8 /* r3 = .210 */ 2207129254Scognet orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 2208129254Scognet mov r1, r1, lsl #24 /* r1 = 7... */ 2209129254Scognet orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 2210129254Scognet#endif 2211129254Scognet str r3, [r0] 2212129254Scognet str r2, [r0, #0x04] 2213137463Scognet RET 2214129254Scognet LMEMCPY_8_PAD 2215129254Scognet 2216129254Scognet/* 2217129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned 2218129254Scognet */ 2219129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2220129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2221129254Scognet ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2222129254Scognet#ifdef __ARMEB__ 2223129254Scognet mov r2, r2, lsl #16 /* r2 = 01.. */ 2224129254Scognet orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2225129254Scognet orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 2226129254Scognet#else 2227129254Scognet orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2228129254Scognet mov r3, r3, lsr #16 /* r3 = ..54 */ 2229129254Scognet orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 2230129254Scognet#endif 2231129254Scognet str r2, [r0] 2232129254Scognet str r3, [r0, #0x04] 2233137463Scognet RET 2234129254Scognet LMEMCPY_8_PAD 2235129254Scognet 2236129254Scognet/* 2237129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned 2238129254Scognet */ 2239129254Scognet ldrb r3, [r1] /* r3 = ...0 */ 2240129254Scognet ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2241129254Scognet ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 2242129254Scognet#ifdef __ARMEB__ 2243129254Scognet mov r3, r3, lsl #24 /* r3 = 0... */ 2244129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 2245129254Scognet mov r2, r2, lsl #24 /* r2 = 4... */ 2246129254Scognet orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 2247129254Scognet#else 2248129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2249129254Scognet mov r2, r2, lsr #24 /* r2 = ...4 */ 2250129254Scognet orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 2251129254Scognet#endif 2252129254Scognet str r3, [r0] 2253129254Scognet str r2, [r0, #0x04] 2254137463Scognet RET 2255129254Scognet LMEMCPY_8_PAD 2256129254Scognet 2257129254Scognet/* 2258129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned 2259129254Scognet */ 2260129254Scognet ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 2261129254Scognet ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 2262129254Scognet#ifdef __ARMEB__ 2263129254Scognet mov r1, r3, lsr #24 /* r1 = ...0 */ 2264129254Scognet strb r1, [r0] 2265129254Scognet mov r1, r3, lsr #8 /* r1 = .012 */ 2266129254Scognet strb r2, [r0, #0x07] 2267129254Scognet mov r3, r3, lsl #24 /* r3 = 3... */ 2268129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 2269129254Scognet#else 2270129254Scognet strb r3, [r0] 2271129254Scognet mov r1, r2, lsr #24 /* r1 = ...7 */ 2272129254Scognet strb r1, [r0, #0x07] 2273129254Scognet mov r1, r3, lsr #8 /* r1 = .321 */ 2274129254Scognet mov r3, r3, lsr #24 /* r3 = ...3 */ 2275129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 2276129254Scognet#endif 2277129254Scognet strh r1, [r0, #0x01] 2278129254Scognet str r3, [r0, #0x03] 2279137463Scognet RET 2280129254Scognet LMEMCPY_8_PAD 2281129254Scognet 2282129254Scognet/* 2283129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned 2284129254Scognet */ 2285129254Scognet ldrb r2, [r1] 2286129254Scognet ldrh r3, [r1, #0x01] 2287129254Scognet ldr ip, [r1, #0x03] 2288129254Scognet ldrb r1, [r1, #0x07] 2289129254Scognet strb r2, [r0] 2290129254Scognet strh r3, [r0, #0x01] 2291129254Scognet str ip, [r0, #0x03] 2292129254Scognet strb r1, [r0, #0x07] 2293137463Scognet RET 2294129254Scognet LMEMCPY_8_PAD 2295129254Scognet 2296129254Scognet/* 2297129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned 2298129254Scognet */ 2299129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2300129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2301129254Scognet ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2302129254Scognet#ifdef __ARMEB__ 2303129254Scognet mov ip, r2, lsr #8 /* ip = ...0 */ 2304129254Scognet strb ip, [r0] 2305129254Scognet mov ip, r2, lsl #8 /* ip = .01. */ 2306129254Scognet orr ip, ip, r3, lsr #24 /* ip = .012 */ 2307129254Scognet strb r1, [r0, #0x07] 2308129254Scognet mov r3, r3, lsl #8 /* r3 = 345. */ 2309129254Scognet orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 2310129254Scognet#else 2311129254Scognet strb r2, [r0] /* 0 */ 2312129254Scognet mov ip, r1, lsr #8 /* ip = ...7 */ 2313129254Scognet strb ip, [r0, #0x07] /* 7 */ 2314129254Scognet mov ip, r2, lsr #8 /* ip = ...1 */ 2315129254Scognet orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2316129254Scognet mov r3, r3, lsr #8 /* r3 = .543 */ 2317129254Scognet orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 2318129254Scognet#endif 2319129254Scognet strh ip, [r0, #0x01] 2320129254Scognet str r3, [r0, #0x03] 2321137463Scognet RET 2322129254Scognet LMEMCPY_8_PAD 2323129254Scognet 2324129254Scognet/* 2325129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned 2326129254Scognet */ 2327129254Scognet ldrb r3, [r1] /* r3 = ...0 */ 2328129254Scognet ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2329129254Scognet ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 2330129254Scognet ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2331129254Scognet strb r3, [r0] 2332129254Scognet mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 2333129254Scognet#ifdef __ARMEB__ 2334129254Scognet strh r3, [r0, #0x01] 2335129254Scognet orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 2336129254Scognet#else 2337129254Scognet strh ip, [r0, #0x01] 2338129254Scognet orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 2339129254Scognet#endif 2340129254Scognet str r2, [r0, #0x03] 2341129254Scognet strb r1, [r0, #0x07] 2342137463Scognet RET 2343129254Scognet LMEMCPY_8_PAD 2344129254Scognet 2345129254Scognet/* 2346129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned 2347129254Scognet */ 2348129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2349129254Scognet ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2350129254Scognet mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2351129254Scognet#ifdef __ARMEB__ 2352129254Scognet strh r1, [r0] 2353129254Scognet mov r1, r3, lsr #16 /* r1 = ..45 */ 2354129254Scognet orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 2355129254Scognet#else 2356129254Scognet strh r2, [r0] 2357129254Scognet orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 2358129254Scognet mov r3, r3, lsr #16 /* r3 = ..76 */ 2359129254Scognet#endif 2360129254Scognet str r2, [r0, #0x02] 2361129254Scognet strh r3, [r0, #0x06] 2362137463Scognet RET 2363129254Scognet LMEMCPY_8_PAD 2364129254Scognet 2365129254Scognet/* 2366129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned 2367129254Scognet */ 2368129254Scognet ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2369129254Scognet ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2370129254Scognet ldrb ip, [r1, #0x07] /* ip = ...7 */ 2371129254Scognet mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2372129254Scognet strh r1, [r0] 2373129254Scognet#ifdef __ARMEB__ 2374129254Scognet mov r1, r2, lsl #24 /* r1 = 2... */ 2375129254Scognet orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 2376129254Scognet orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 2377129254Scognet#else 2378129254Scognet mov r1, r2, lsr #24 /* r1 = ...2 */ 2379129254Scognet orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 2380129254Scognet mov r3, r3, lsr #24 /* r3 = ...6 */ 2381129254Scognet orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 2382129254Scognet#endif 2383129254Scognet str r1, [r0, #0x02] 2384129254Scognet strh r3, [r0, #0x06] 2385137463Scognet RET 2386129254Scognet LMEMCPY_8_PAD 2387129254Scognet 2388129254Scognet/* 2389129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned 2390129254Scognet */ 2391129254Scognet ldrh r2, [r1] 2392129254Scognet ldr ip, [r1, #0x02] 2393129254Scognet ldrh r3, [r1, #0x06] 2394129254Scognet strh r2, [r0] 2395129254Scognet str ip, [r0, #0x02] 2396129254Scognet strh r3, [r0, #0x06] 2397137463Scognet RET 2398129254Scognet LMEMCPY_8_PAD 2399129254Scognet 2400129254Scognet/* 2401129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned 2402129254Scognet */ 2403129254Scognet ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 2404129254Scognet ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2405129254Scognet ldrb ip, [r1] /* ip = ...0 */ 2406129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 2407129254Scognet strh r1, [r0, #0x06] 2408129254Scognet#ifdef __ARMEB__ 2409129254Scognet mov r3, r3, lsr #24 /* r3 = ...5 */ 2410129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 2411129254Scognet mov r2, r2, lsr #24 /* r2 = ...1 */ 2412129254Scognet orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 2413129254Scognet#else 2414129254Scognet mov r3, r3, lsl #24 /* r3 = 5... */ 2415129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 2416129254Scognet orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 2417129254Scognet#endif 2418129254Scognet str r3, [r0, #0x02] 2419129254Scognet strh r2, [r0] 2420137463Scognet RET 2421129254Scognet LMEMCPY_8_PAD 2422129254Scognet 2423129254Scognet/* 2424129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned 2425129254Scognet */ 2426129254Scognet ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2427129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2428129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 2429129254Scognet strh r1, [r0, #0x05] 2430129254Scognet#ifdef __ARMEB__ 2431129254Scognet strb r3, [r0, #0x07] 2432129254Scognet mov r1, r2, lsr #24 /* r1 = ...0 */ 2433129254Scognet strb r1, [r0] 2434129254Scognet mov r2, r2, lsl #8 /* r2 = 123. */ 2435129254Scognet orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 2436129254Scognet str r2, [r0, #0x01] 2437129254Scognet#else 2438129254Scognet strb r2, [r0] 2439129254Scognet mov r1, r3, lsr #24 /* r1 = ...7 */ 2440129254Scognet strb r1, [r0, #0x07] 2441129254Scognet mov r2, r2, lsr #8 /* r2 = .321 */ 2442129254Scognet orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 2443129254Scognet str r2, [r0, #0x01] 2444129254Scognet#endif 2445137463Scognet RET 2446129254Scognet LMEMCPY_8_PAD 2447129254Scognet 2448129254Scognet/* 2449129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned 2450129254Scognet */ 2451129254Scognet ldrb r3, [r1] /* r3 = ...0 */ 2452129254Scognet ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 2453129254Scognet ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2454129254Scognet ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2455129254Scognet strb r3, [r0] 2456129254Scognet mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 2457129254Scognet#ifdef __ARMEB__ 2458129254Scognet strh ip, [r0, #0x05] 2459129254Scognet orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 2460129254Scognet#else 2461129254Scognet strh r3, [r0, #0x05] 2462129254Scognet orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 2463129254Scognet#endif 2464129254Scognet str r2, [r0, #0x01] 2465129254Scognet strb r1, [r0, #0x07] 2466137463Scognet RET 2467129254Scognet LMEMCPY_8_PAD 2468129254Scognet 2469129254Scognet/* 2470129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned 2471129254Scognet */ 2472129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2473129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2474129254Scognet ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2475129254Scognet#ifdef __ARMEB__ 2476129254Scognet mov ip, r2, lsr #8 /* ip = ...0 */ 2477129254Scognet strb ip, [r0] 2478129254Scognet mov ip, r2, lsl #24 /* ip = 1... */ 2479129254Scognet orr ip, ip, r3, lsr #8 /* ip = 1234 */ 2480129254Scognet strb r1, [r0, #0x07] 2481129254Scognet mov r1, r1, lsr #8 /* r1 = ...6 */ 2482129254Scognet orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 2483129254Scognet#else 2484129254Scognet strb r2, [r0] 2485129254Scognet mov ip, r2, lsr #8 /* ip = ...1 */ 2486129254Scognet orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2487129254Scognet mov r2, r1, lsr #8 /* r2 = ...7 */ 2488129254Scognet strb r2, [r0, #0x07] 2489129254Scognet mov r1, r1, lsl #8 /* r1 = .76. */ 2490129254Scognet orr r1, r1, r3, lsr #24 /* r1 = .765 */ 2491129254Scognet#endif 2492129254Scognet str ip, [r0, #0x01] 2493129254Scognet strh r1, [r0, #0x05] 2494137463Scognet RET 2495129254Scognet LMEMCPY_8_PAD 2496129254Scognet 2497129254Scognet/* 2498129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned 2499129254Scognet */ 2500129254Scognet ldrb r2, [r1] 2501129254Scognet ldr ip, [r1, #0x01] 2502129254Scognet ldrh r3, [r1, #0x05] 2503129254Scognet ldrb r1, [r1, #0x07] 2504129254Scognet strb r2, [r0] 2505129254Scognet str ip, [r0, #0x01] 2506129254Scognet strh r3, [r0, #0x05] 2507129254Scognet strb r1, [r0, #0x07] 2508137463Scognet RET 2509129254Scognet LMEMCPY_8_PAD 2510129254Scognet 2511129254Scognet/****************************************************************************** 2512129254Scognet * Special case for 12 byte copies 2513129254Scognet */ 2514129254Scognet#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 2515129254Scognet#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 2516129254Scognet LMEMCPY_C_PAD 2517129254Scognet.Lmemcpy_c: 2518129254Scognet and r2, r1, #0x03 2519129254Scognet orr r2, r2, r0, lsl #2 2520129254Scognet ands r2, r2, #0x0f 2521129254Scognet sub r3, pc, #0x14 2522129254Scognet addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 2523129254Scognet 2524129254Scognet/* 2525129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned 2526129254Scognet */ 2527129254Scognet ldr r2, [r1] 2528129254Scognet ldr r3, [r1, #0x04] 2529129254Scognet ldr r1, [r1, #0x08] 2530129254Scognet str r2, [r0] 2531129254Scognet str r3, [r0, #0x04] 2532129254Scognet str r1, [r0, #0x08] 2533137463Scognet RET 2534129254Scognet LMEMCPY_C_PAD 2535129254Scognet 2536129254Scognet/* 2537129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned 2538129254Scognet */ 2539129254Scognet ldrb r2, [r1, #0xb] /* r2 = ...B */ 2540129254Scognet ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2541129254Scognet ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2542129254Scognet ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2543129254Scognet#ifdef __ARMEB__ 2544129254Scognet orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 2545129254Scognet str r2, [r0, #0x08] 2546129254Scognet mov r2, ip, lsr #24 /* r2 = ...7 */ 2547129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 2548129254Scognet mov r1, r1, lsl #8 /* r1 = 012. */ 2549129254Scognet orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 2550129254Scognet#else 2551129254Scognet mov r2, r2, lsl #24 /* r2 = B... */ 2552129254Scognet orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 2553129254Scognet str r2, [r0, #0x08] 2554129254Scognet mov r2, ip, lsl #24 /* r2 = 7... */ 2555129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 2556129254Scognet mov r1, r1, lsr #8 /* r1 = .210 */ 2557129254Scognet orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 2558129254Scognet#endif 2559129254Scognet str r2, [r0, #0x04] 2560129254Scognet str r1, [r0] 2561137463Scognet RET 2562129254Scognet LMEMCPY_C_PAD 2563129254Scognet 2564129254Scognet/* 2565129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned 2566129254Scognet */ 2567129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2568129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2569129254Scognet ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2570129254Scognet ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2571129254Scognet#ifdef __ARMEB__ 2572129254Scognet mov r2, r2, lsl #16 /* r2 = 01.. */ 2573129254Scognet orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2574129254Scognet str r2, [r0] 2575129254Scognet mov r3, r3, lsl #16 /* r3 = 45.. */ 2576129254Scognet orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 2577129254Scognet orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 2578129254Scognet#else 2579129254Scognet orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2580129254Scognet str r2, [r0] 2581129254Scognet mov r3, r3, lsr #16 /* r3 = ..54 */ 2582129254Scognet orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 2583129254Scognet mov r1, r1, lsl #16 /* r1 = BA.. */ 2584129254Scognet orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 2585129254Scognet#endif 2586129254Scognet str r3, [r0, #0x04] 2587129254Scognet str r1, [r0, #0x08] 2588137463Scognet RET 2589129254Scognet LMEMCPY_C_PAD 2590129254Scognet 2591129254Scognet/* 2592129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned 2593129254Scognet */ 2594129254Scognet ldrb r2, [r1] /* r2 = ...0 */ 2595129254Scognet ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2596129254Scognet ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2597129254Scognet ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2598129254Scognet#ifdef __ARMEB__ 2599129254Scognet mov r2, r2, lsl #24 /* r2 = 0... */ 2600129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 2601129254Scognet str r2, [r0] 2602129254Scognet mov r3, r3, lsl #24 /* r3 = 4... */ 2603129254Scognet orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 2604129254Scognet mov r1, r1, lsr #8 /* r1 = .9AB */ 2605129254Scognet orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 2606129254Scognet#else 2607129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 2608129254Scognet str r2, [r0] 2609129254Scognet mov r3, r3, lsr #24 /* r3 = ...4 */ 2610129254Scognet orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 2611129254Scognet mov r1, r1, lsl #8 /* r1 = BA9. */ 2612129254Scognet orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 2613129254Scognet#endif 2614129254Scognet str r3, [r0, #0x04] 2615129254Scognet str r1, [r0, #0x08] 2616137463Scognet RET 2617129254Scognet LMEMCPY_C_PAD 2618129254Scognet 2619129254Scognet/* 2620129254Scognet * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 2621129254Scognet */ 2622129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2623129254Scognet ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2624129254Scognet ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 2625129254Scognet mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 2626129254Scognet strh r1, [r0, #0x01] 2627129254Scognet#ifdef __ARMEB__ 2628129254Scognet mov r1, r2, lsr #24 /* r1 = ...0 */ 2629129254Scognet strb r1, [r0] 2630129254Scognet mov r1, r2, lsl #24 /* r1 = 3... */ 2631129254Scognet orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 2632129254Scognet mov r1, r3, lsl #24 /* r1 = 7... */ 2633129254Scognet orr r1, r1, ip, lsr #8 /* r1 = 789A */ 2634129254Scognet#else 2635129254Scognet strb r2, [r0] 2636129254Scognet mov r1, r2, lsr #24 /* r1 = ...3 */ 2637129254Scognet orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 2638129254Scognet mov r1, r3, lsr #24 /* r1 = ...7 */ 2639129254Scognet orr r1, r1, ip, lsl #8 /* r1 = A987 */ 2640129254Scognet mov ip, ip, lsr #24 /* ip = ...B */ 2641129254Scognet#endif 2642129254Scognet str r2, [r0, #0x03] 2643129254Scognet str r1, [r0, #0x07] 2644129254Scognet strb ip, [r0, #0x0b] 2645137463Scognet RET 2646129254Scognet LMEMCPY_C_PAD 2647129254Scognet 2648129254Scognet/* 2649129254Scognet * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 2650129254Scognet */ 2651129254Scognet ldrb r2, [r1] 2652129254Scognet ldrh r3, [r1, #0x01] 2653129254Scognet ldr ip, [r1, #0x03] 2654129254Scognet strb r2, [r0] 2655129254Scognet ldr r2, [r1, #0x07] 2656129254Scognet ldrb r1, [r1, #0x0b] 2657129254Scognet strh r3, [r0, #0x01] 2658129254Scognet str ip, [r0, #0x03] 2659129254Scognet str r2, [r0, #0x07] 2660129254Scognet strb r1, [r0, #0x0b] 2661137463Scognet RET 2662129254Scognet LMEMCPY_C_PAD 2663129254Scognet 2664129254Scognet/* 2665129254Scognet * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 2666129254Scognet */ 2667129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2668129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2669129254Scognet ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2670129254Scognet ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2671129254Scognet#ifdef __ARMEB__ 2672129254Scognet mov r2, r2, ror #8 /* r2 = 1..0 */ 2673129254Scognet strb r2, [r0] 2674129254Scognet mov r2, r2, lsr #16 /* r2 = ..1. */ 2675129254Scognet orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 2676129254Scognet strh r2, [r0, #0x01] 2677129254Scognet mov r2, r3, lsl #8 /* r2 = 345. */ 2678129254Scognet orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 2679129254Scognet mov r2, ip, lsl #8 /* r2 = 789. */ 2680129254Scognet orr r2, r2, r1, lsr #8 /* r2 = 789A */ 2681129254Scognet#else 2682129254Scognet strb r2, [r0] 2683129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 2684129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2685129254Scognet strh r2, [r0, #0x01] 2686129254Scognet mov r2, r3, lsr #8 /* r2 = .543 */ 2687129254Scognet orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 2688129254Scognet mov r2, ip, lsr #8 /* r2 = .987 */ 2689129254Scognet orr r2, r2, r1, lsl #24 /* r2 = A987 */ 2690129254Scognet mov r1, r1, lsr #8 /* r1 = ...B */ 2691129254Scognet#endif 2692129254Scognet str r3, [r0, #0x03] 2693129254Scognet str r2, [r0, #0x07] 2694129254Scognet strb r1, [r0, #0x0b] 2695137463Scognet RET 2696129254Scognet LMEMCPY_C_PAD 2697129254Scognet 2698129254Scognet/* 2699129254Scognet * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 2700129254Scognet */ 2701129254Scognet ldrb r2, [r1] 2702129254Scognet ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2703129254Scognet ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2704129254Scognet ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2705129254Scognet strb r2, [r0] 2706129254Scognet#ifdef __ARMEB__ 2707129254Scognet mov r2, r3, lsr #16 /* r2 = ..12 */ 2708129254Scognet strh r2, [r0, #0x01] 2709129254Scognet mov r3, r3, lsl #16 /* r3 = 34.. */ 2710129254Scognet orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 2711129254Scognet mov ip, ip, lsl #16 /* ip = 78.. */ 2712129254Scognet orr ip, ip, r1, lsr #16 /* ip = 789A */ 2713129254Scognet mov r1, r1, lsr #8 /* r1 = .9AB */ 2714129254Scognet#else 2715129254Scognet strh r3, [r0, #0x01] 2716129254Scognet mov r3, r3, lsr #16 /* r3 = ..43 */ 2717129254Scognet orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 2718129254Scognet mov ip, ip, lsr #16 /* ip = ..87 */ 2719129254Scognet orr ip, ip, r1, lsl #16 /* ip = A987 */ 2720129254Scognet mov r1, r1, lsr #16 /* r1 = ..xB */ 2721129254Scognet#endif 2722129254Scognet str r3, [r0, #0x03] 2723129254Scognet str ip, [r0, #0x07] 2724129254Scognet strb r1, [r0, #0x0b] 2725137463Scognet RET 2726129254Scognet LMEMCPY_C_PAD 2727129254Scognet 2728129254Scognet/* 2729129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned 2730129254Scognet */ 2731129254Scognet ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 2732129254Scognet ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2733129254Scognet ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 2734129254Scognet mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2735129254Scognet#ifdef __ARMEB__ 2736129254Scognet strh r1, [r0] 2737129254Scognet mov r1, ip, lsl #16 /* r1 = 23.. */ 2738129254Scognet orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 2739129254Scognet mov r3, r3, lsl #16 /* r3 = 67.. */ 2740129254Scognet orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 2741129254Scognet#else 2742129254Scognet strh ip, [r0] 2743129254Scognet orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 2744129254Scognet mov r3, r3, lsr #16 /* r3 = ..76 */ 2745129254Scognet orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 2746129254Scognet mov r2, r2, lsr #16 /* r2 = ..BA */ 2747129254Scognet#endif 2748129254Scognet str r1, [r0, #0x02] 2749129254Scognet str r3, [r0, #0x06] 2750129254Scognet strh r2, [r0, #0x0a] 2751137463Scognet RET 2752129254Scognet LMEMCPY_C_PAD 2753129254Scognet 2754129254Scognet/* 2755129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 2756129254Scognet */ 2757129254Scognet ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2758129254Scognet ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2759129254Scognet mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 2760129254Scognet strh ip, [r0] 2761129254Scognet ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2762129254Scognet ldrb r1, [r1, #0x0b] /* r1 = ...B */ 2763129254Scognet#ifdef __ARMEB__ 2764129254Scognet mov r2, r2, lsl #24 /* r2 = 2... */ 2765129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 2766129254Scognet mov r3, r3, lsl #24 /* r3 = 6... */ 2767129254Scognet orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 2768129254Scognet orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 2769129254Scognet#else 2770129254Scognet mov r2, r2, lsr #24 /* r2 = ...2 */ 2771129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 2772129254Scognet mov r3, r3, lsr #24 /* r3 = ...6 */ 2773129254Scognet orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 2774129254Scognet mov r1, r1, lsl #8 /* r1 = ..B. */ 2775129254Scognet orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 2776129254Scognet#endif 2777129254Scognet str r2, [r0, #0x02] 2778129254Scognet str r3, [r0, #0x06] 2779129254Scognet strh r1, [r0, #0x0a] 2780137463Scognet RET 2781129254Scognet LMEMCPY_C_PAD 2782129254Scognet 2783129254Scognet/* 2784129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned 2785129254Scognet */ 2786129254Scognet ldrh r2, [r1] 2787129254Scognet ldr r3, [r1, #0x02] 2788129254Scognet ldr ip, [r1, #0x06] 2789129254Scognet ldrh r1, [r1, #0x0a] 2790129254Scognet strh r2, [r0] 2791129254Scognet str r3, [r0, #0x02] 2792129254Scognet str ip, [r0, #0x06] 2793129254Scognet strh r1, [r0, #0x0a] 2794137463Scognet RET 2795129254Scognet LMEMCPY_C_PAD 2796129254Scognet 2797129254Scognet/* 2798129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 2799129254Scognet */ 2800129254Scognet ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 2801129254Scognet ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 2802129254Scognet mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 2803129254Scognet strh ip, [r0, #0x0a] 2804129254Scognet ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2805129254Scognet ldrb r1, [r1] /* r1 = ...0 */ 2806129254Scognet#ifdef __ARMEB__ 2807129254Scognet mov r2, r2, lsr #24 /* r2 = ...9 */ 2808129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 2809129254Scognet mov r3, r3, lsr #24 /* r3 = ...5 */ 2810129254Scognet orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 2811129254Scognet mov r1, r1, lsl #8 /* r1 = ..0. */ 2812129254Scognet orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 2813129254Scognet#else 2814129254Scognet mov r2, r2, lsl #24 /* r2 = 9... */ 2815129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 2816129254Scognet mov r3, r3, lsl #24 /* r3 = 5... */ 2817129254Scognet orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 2818129254Scognet orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 2819129254Scognet#endif 2820129254Scognet str r2, [r0, #0x06] 2821129254Scognet str r3, [r0, #0x02] 2822129254Scognet strh r1, [r0] 2823137463Scognet RET 2824129254Scognet LMEMCPY_C_PAD 2825129254Scognet 2826129254Scognet/* 2827129254Scognet * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 2828129254Scognet */ 2829129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2830129254Scognet ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 2831129254Scognet ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 2832129254Scognet#ifdef __ARMEB__ 2833129254Scognet mov r3, r2, lsr #24 /* r3 = ...0 */ 2834129254Scognet strb r3, [r0] 2835129254Scognet mov r2, r2, lsl #8 /* r2 = 123. */ 2836129254Scognet orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 2837129254Scognet str r2, [r0, #0x01] 2838129254Scognet mov r2, ip, lsl #8 /* r2 = 567. */ 2839129254Scognet orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 2840129254Scognet str r2, [r0, #0x05] 2841129254Scognet mov r2, r1, lsr #8 /* r2 = ..9A */ 2842129254Scognet strh r2, [r0, #0x09] 2843129254Scognet strb r1, [r0, #0x0b] 2844129254Scognet#else 2845129254Scognet strb r2, [r0] 2846129254Scognet mov r3, r2, lsr #8 /* r3 = .321 */ 2847129254Scognet orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 2848129254Scognet str r3, [r0, #0x01] 2849129254Scognet mov r3, ip, lsr #8 /* r3 = .765 */ 2850129254Scognet orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 2851129254Scognet str r3, [r0, #0x05] 2852129254Scognet mov r1, r1, lsr #8 /* r1 = .BA9 */ 2853129254Scognet strh r1, [r0, #0x09] 2854129254Scognet mov r1, r1, lsr #16 /* r1 = ...B */ 2855129254Scognet strb r1, [r0, #0x0b] 2856129254Scognet#endif 2857137463Scognet RET 2858129254Scognet LMEMCPY_C_PAD 2859129254Scognet 2860129254Scognet/* 2861129254Scognet * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2862129254Scognet */ 2863129254Scognet ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2864129254Scognet ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2865129254Scognet ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2866129254Scognet ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2867129254Scognet strb r2, [r0, #0x0b] 2868129254Scognet#ifdef __ARMEB__ 2869129254Scognet strh r3, [r0, #0x09] 2870129254Scognet mov r3, r3, lsr #16 /* r3 = ..78 */ 2871129254Scognet orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 2872129254Scognet mov ip, ip, lsr #16 /* ip = ..34 */ 2873129254Scognet orr ip, ip, r1, lsl #16 /* ip = 1234 */ 2874129254Scognet mov r1, r1, lsr #16 /* r1 = ..x0 */ 2875129254Scognet#else 2876129254Scognet mov r2, r3, lsr #16 /* r2 = ..A9 */ 2877129254Scognet strh r2, [r0, #0x09] 2878129254Scognet mov r3, r3, lsl #16 /* r3 = 87.. */ 2879129254Scognet orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2880129254Scognet mov ip, ip, lsl #16 /* ip = 43.. */ 2881129254Scognet orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2882129254Scognet mov r1, r1, lsr #8 /* r1 = .210 */ 2883129254Scognet#endif 2884129254Scognet str r3, [r0, #0x05] 2885129254Scognet str ip, [r0, #0x01] 2886129254Scognet strb r1, [r0] 2887137463Scognet RET 2888129254Scognet LMEMCPY_C_PAD 2889129254Scognet 2890129254Scognet/* 2891129254Scognet * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2892129254Scognet */ 2893129254Scognet#ifdef __ARMEB__ 2894129254Scognet ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 2895129254Scognet ldr ip, [r1, #0x06] /* ip = 6789 */ 2896129254Scognet ldr r3, [r1, #0x02] /* r3 = 2345 */ 2897129254Scognet ldrh r1, [r1] /* r1 = ..01 */ 2898129254Scognet strb r2, [r0, #0x0b] 2899129254Scognet mov r2, r2, lsr #8 /* r2 = ...A */ 2900129254Scognet orr r2, r2, ip, lsl #8 /* r2 = 789A */ 2901129254Scognet mov ip, ip, lsr #8 /* ip = .678 */ 2902129254Scognet orr ip, ip, r3, lsl #24 /* ip = 5678 */ 2903129254Scognet mov r3, r3, lsr #8 /* r3 = .234 */ 2904129254Scognet orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 2905129254Scognet mov r1, r1, lsr #8 /* r1 = ...0 */ 2906129254Scognet strb r1, [r0] 2907129254Scognet str r3, [r0, #0x01] 2908129254Scognet str ip, [r0, #0x05] 2909129254Scognet strh r2, [r0, #0x09] 2910129254Scognet#else 2911129254Scognet ldrh r2, [r1] /* r2 = ..10 */ 2912129254Scognet ldr r3, [r1, #0x02] /* r3 = 5432 */ 2913129254Scognet ldr ip, [r1, #0x06] /* ip = 9876 */ 2914129254Scognet ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2915129254Scognet strb r2, [r0] 2916129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 2917129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2918129254Scognet mov r3, r3, lsr #24 /* r3 = ...5 */ 2919129254Scognet orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2920129254Scognet mov ip, ip, lsr #24 /* ip = ...9 */ 2921129254Scognet orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2922129254Scognet mov r1, r1, lsr #8 /* r1 = ...B */ 2923129254Scognet str r2, [r0, #0x01] 2924129254Scognet str r3, [r0, #0x05] 2925129254Scognet strh ip, [r0, #0x09] 2926129254Scognet strb r1, [r0, #0x0b] 2927129254Scognet#endif 2928137463Scognet RET 2929129254Scognet LMEMCPY_C_PAD 2930129254Scognet 2931129254Scognet/* 2932129254Scognet * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2933129254Scognet */ 2934129254Scognet ldrb r2, [r1] 2935129254Scognet ldr r3, [r1, #0x01] 2936129254Scognet ldr ip, [r1, #0x05] 2937129254Scognet strb r2, [r0] 2938129254Scognet ldrh r2, [r1, #0x09] 2939129254Scognet ldrb r1, [r1, #0x0b] 2940129254Scognet str r3, [r0, #0x01] 2941129254Scognet str ip, [r0, #0x05] 2942129254Scognet strh r2, [r0, #0x09] 2943129254Scognet strb r1, [r0, #0x0b] 2944137463Scognet RET 2945248361SandrewEND(memcpy) 2946172614Scognet#endif /* _ARM_ARCH_5E */ 2947135654Scognet 2948135654Scognet#ifdef GPROF 2949135654Scognet 2950135654ScognetENTRY(user) 2951135654Scognet nop 2952269390SianEND(user) 2953135654ScognetENTRY(btrap) 2954135654Scognet nop 2955269390SianEND(btrap) 2956135654ScognetENTRY(etrap) 2957135654Scognet nop 2958269390SianEND(etrap) 2959135654ScognetENTRY(bintr) 2960135654Scognet nop 2961269390SianEND(bintr) 2962135654ScognetENTRY(eintr) 2963135654Scognet nop 2964269390SianEND(eintr) 2965135654Scognet#endif 2966