support.S revision 175255
1192886Sedwin/*- 2192886Sedwin * Copyright (c) 2004 Olivier Houchard 367578Swollman * All rights reserved. 4273718Sedwin * 52742Swollman * Redistribution and use in source and binary forms, with or without 6273718Sedwin * modification, are permitted provided that the following conditions 7273718Sedwin * are met: 82742Swollman * 1. Redistributions of source code must retain the above copyright 9274559Sedwin * notice, this list of conditions and the following disclaimer. 102742Swollman * 2. Redistributions in binary form must reproduce the above copyright 11274559Sedwin * notice, this list of conditions and the following disclaimer in the 12158421Swollman * documentation and/or other materials provided with the distribution. 13158421Swollman * 14274559Sedwin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 152742Swollman * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1686222Swollman * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1720094Swollman * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1820094Swollman * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1920094Swollman * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20274559Sedwin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21274559Sedwin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2220094Swollman * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 232742Swollman * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 242742Swollman * SUCH DAMAGE. 252742Swollman */ 262742Swollman/* 27248307Sedwin * Copyright 2003 Wasabi Systems, Inc. 28273718Sedwin * All rights reserved. 29273718Sedwin * 30248307Sedwin * Written by Steve C. Woodford for Wasabi Systems, Inc. 3114343Swollman * 3258787Sru * Redistribution and use in source and binary forms, with or without 3314343Swollman * modification, are permitted provided that the following conditions 3430711Swollman * are met: 3530711Swollman * 1. Redistributions of source code must retain the above copyright 36149514Swollman * notice, this list of conditions and the following disclaimer. 37270728Spluknet * 2. Redistributions in binary form must reproduce the above copyright 38270728Spluknet * notice, this list of conditions and the following disclaimer in the 39270728Spluknet * documentation and/or other materials provided with the distribution. 40270728Spluknet * 3. All advertising materials mentioning features or use of this software 41270728Spluknet * must display the following acknowledgement: 422742Swollman * This product includes software developed for the NetBSD Project by 43270728Spluknet * Wasabi Systems, Inc. 4430711Swollman * 4. The name of Wasabi Systems, Inc. may not be used to endorse 4530711Swollman * or promote products derived from this software without specific prior 4630711Swollman * written permission. 4730711Swollman * 482742Swollman * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 4930711Swollman * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 5030711Swollman * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 5130711Swollman * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 5230711Swollman * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 5330711Swollman * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 5430711Swollman * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 5530711Swollman * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56270728Spluknet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 5730711Swollman * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 5830711Swollman * POSSIBILITY OF SUCH DAMAGE. 592742Swollman */ 6030711Swollman/* 6130711Swollman * Copyright (c) 1997 The NetBSD Foundation, Inc. 6230711Swollman * All rights reserved. 63226289Sedwin * 6430711Swollman * This code is derived from software contributed to The NetBSD Foundation 6530711Swollman * by Neil A. Carson and Mark Brinicombe 662742Swollman * 672742Swollman * Redistribution and use in source and binary forms, with or without 682742Swollman * modification, are permitted provided that the following conditions 692742Swollman * are met: 7019878Swollman * 1. Redistributions of source code must retain the above copyright 71158421Swollman * notice, this list of conditions and the following disclaimer. 7219878Swollman * 2. Redistributions in binary form must reproduce the above copyright 7319878Swollman * notice, this list of conditions and the following disclaimer in the 7419878Swollman * documentation and/or other materials provided with the distribution. 7519878Swollman * 3. All advertising materials mentioning features or use of this software 762742Swollman * must display the following acknowledgement: 7719878Swollman * This product includes software developed by the NetBSD 782742Swollman * Foundation, Inc. and its contributors. 7919878Swollman * 4. Neither the name of The NetBSD Foundation nor the names of its 802742Swollman * contributors may be used to endorse or promote products derived 81158421Swollman * from this software without specific prior written permission. 822742Swollman * 832742Swollman * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 8419878Swollman * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 852742Swollman * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 8619878Swollman * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 872742Swollman * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 8819878Swollman * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 892742Swollman * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 9019878Swollman * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 912742Swollman * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 92158421Swollman * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 93158421Swollman * POSSIBILITY OF SUCH DAMAGE. 942742Swollman */ 95273718Sedwin 96273718Sedwin#include <machine/asm.h> 97273718Sedwin#include <machine/asmacros.h> 9819878Swollman__FBSDID("$FreeBSD: head/sys/arm/arm/support.S 175255 2008-01-12 21:11:43Z cognet $"); 992742Swollman 10019878Swollman#include "assym.s" 10119878Swollman 10219878Swollman.L_arm_memcpy: 10319878Swollman .word _C_LABEL(_arm_memcpy) 10419878Swollman.L_arm_bzero: 1052742Swollman .word _C_LABEL(_arm_bzero) 1062742Swollman.L_min_memcpy_size: 1072742Swollman .word _C_LABEL(_min_memcpy_size) 108273718Sedwin.L_min_bzero_size: 1092742Swollman .word _C_LABEL(_min_bzero_size) 1102742Swollman/* 111273718Sedwin * memset: Sets a block of memory to the specified value 1122742Swollman * 1132742Swollman * On entry: 114270728Spluknet * r0 - dest address 1152742Swollman * r1 - byte to write 1162742Swollman * r2 - number of bytes to write 117273718Sedwin * 1182742Swollman * On exit: 1192742Swollman * r0 - dest address 120273718Sedwin */ 1212742Swollman/* LINTSTUB: Func: void bzero(void *, size_t) */ 1222742SwollmanENTRY(bzero) 123273718Sedwin ldr r3, .L_arm_bzero 124273718Sedwin ldr r3, [r3] 125273718Sedwin cmp r3, #0 126273718Sedwin beq .Lnormal0 127273718Sedwin ldr r2, .L_min_bzero_size 128273718Sedwin ldr r2, [r2] 1292742Swollman cmp r1, r2 130273718Sedwin blt .Lnormal0 1312742Swollman stmfd sp!, {r0, r1, lr} 1322742Swollman mov r2, #0 133273718Sedwin mov lr, pc 13419878Swollman mov pc, r3 1352742Swollman cmp r0, #0 1362742Swollman ldmfd sp!, {r0, r1, lr} 137273718Sedwin RETeq 1382742Swollman.Lnormal0: 1392742Swollman mov r3, #0x00 1402742Swollman b do_memset 141273718Sedwin 14230711Swollman/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 14330711SwollmanENTRY(memset) 14430711Swollman and r3, r1, #0xff /* We deal with bytes */ 1452742Swollman mov r1, r2 1462742Swollmando_memset: 147274559Sedwin cmp r1, #0x04 /* Do we have less than 4 bytes */ 1482742Swollman mov ip, r0 149273718Sedwin blt .Lmemset_lessthanfour 150273718Sedwin 15130711Swollman /* Ok first we will word align the address */ 15230711Swollman ands r2, ip, #0x03 /* Get the bottom two bits */ 153273718Sedwin bne .Lmemset_wordunaligned /* The address is not word aligned */ 1542742Swollman 155273718Sedwin /* We are now word aligned */ 1562742Swollman.Lmemset_wordaligned: 1572742Swollman orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 15830711Swollman#ifdef _ARM_ARCH_5E 159270728Spluknet tst ip, #0x04 /* Quad-align for armv5e */ 160270728Spluknet#else 161270728Spluknet cmp r1, #0x10 162270728Spluknet#endif 163270728Spluknet orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 164270728Spluknet#ifdef _ARM_ARCH_5E 165270728Spluknet subne r1, r1, #0x04 /* Quad-align if necessary */ 166270728Spluknet strne r3, [ip], #0x04 167270728Spluknet cmp r1, #0x10 168270728Spluknet#endif 1692742Swollman blt .Lmemset_loop4 /* If less than 16 then use words */ 1702742Swollman mov r2, r3 /* Duplicate data */ 171274559Sedwin cmp r1, #0x80 /* If < 128 then skip the big loop */ 1722742Swollman blt .Lmemset_loop32 1732742Swollman 1742742Swollman /* Do 128 bytes at a time */ 1752742Swollman.Lmemset_loop128: 1762742Swollman subs r1, r1, #0x80 177248307Sedwin#ifdef _ARM_ARCH_5E 178248307Sedwin strged r2, [ip], #0x08 179248307Sedwin strged r2, [ip], #0x08 180248307Sedwin strged r2, [ip], #0x08 181248307Sedwin strged r2, [ip], #0x08 1822742Swollman strged r2, [ip], #0x08 18319878Swollman strged r2, [ip], #0x08 1842742Swollman strged r2, [ip], #0x08 18519878Swollman strged r2, [ip], #0x08 1862742Swollman strged r2, [ip], #0x08 18719878Swollman strged r2, [ip], #0x08 1882742Swollman strged r2, [ip], #0x08 1892742Swollman strged r2, [ip], #0x08 19019878Swollman strged r2, [ip], #0x08 19119878Swollman strged r2, [ip], #0x08 1922742Swollman strged r2, [ip], #0x08 19319878Swollman strged r2, [ip], #0x08 19419878Swollman#else 1952742Swollman stmgeia ip!, {r2-r3} 19630711Swollman stmgeia ip!, {r2-r3} 19719878Swollman stmgeia ip!, {r2-r3} 19819878Swollman stmgeia ip!, {r2-r3} 19919878Swollman stmgeia ip!, {r2-r3} 20019878Swollman stmgeia ip!, {r2-r3} 20130711Swollman stmgeia ip!, {r2-r3} 20243014Swollman stmgeia ip!, {r2-r3} 20343543Swollman stmgeia ip!, {r2-r3} 204221092Sedwin stmgeia ip!, {r2-r3} 205221092Sedwin stmgeia ip!, {r2-r3} 206221092Sedwin stmgeia ip!, {r2-r3} 207221092Sedwin stmgeia ip!, {r2-r3} 208221092Sedwin stmgeia ip!, {r2-r3} 209221092Sedwin stmgeia ip!, {r2-r3} 210221092Sedwin stmgeia ip!, {r2-r3} 211221092Sedwin#endif 212221092Sedwin bgt .Lmemset_loop128 213221092Sedwin RETeq /* Zero length so just exit */ 214221092Sedwin 215267473Sedwin add r1, r1, #0x80 /* Adjust for extra sub */ 216163302Sru 217163302Sru /* Do 32 bytes at a time */ 218163302Sru.Lmemset_loop32: 219163302Sru subs r1, r1, #0x20 220163302Sru#ifdef _ARM_ARCH_5E 221267473Sedwin strged r2, [ip], #0x08 222171948Sedwin strged r2, [ip], #0x08 223171948Sedwin strged r2, [ip], #0x08 224171948Sedwin strged r2, [ip], #0x08 225270728Spluknet#else 226240457Sedwin stmgeia ip!, {r2-r3} 227171948Sedwin stmgeia ip!, {r2-r3} 228172479Sedwin stmgeia ip!, {r2-r3} 229172479Sedwin stmgeia ip!, {r2-r3} 230267473Sedwin#endif 231172479Sedwin bgt .Lmemset_loop32 232172479Sedwin RETeq /* Zero length so just exit */ 233172479Sedwin 234172479Sedwin adds r1, r1, #0x10 /* Partially adjust for extra sub */ 235172479Sedwin 236172479Sedwin /* Deal with 16 bytes or more */ 237172479Sedwin#ifdef _ARM_ARCH_5E 238171948Sedwin strged r2, [ip], #0x08 239172479Sedwin strged r2, [ip], #0x08 24020094Swollman#else 241191618Sedwin stmgeia ip!, {r2-r3} 242191618Sedwin stmgeia ip!, {r2-r3} 243191618Sedwin#endif 244191618Sedwin RETeq /* Zero length so just exit */ 245192886Sedwin 246191618Sedwin addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 247192886Sedwin 248192886Sedwin /* We have at least 4 bytes so copy as words */ 249191618Sedwin.Lmemset_loop4: 250192886Sedwin subs r1, r1, #0x04 251192886Sedwin strge r3, [ip], #0x04 252191618Sedwin bgt .Lmemset_loop4 253192886Sedwin RETeq /* Zero length so just exit */ 254192886Sedwin 255191618Sedwin#ifdef _ARM_ARCH_5E 256192886Sedwin /* Compensate for 64-bit alignment check */ 257191618Sedwin adds r1, r1, #0x04 258191618Sedwin RETeq 259191618Sedwin cmp r1, #2 260191618Sedwin#else 261191618Sedwin cmp r1, #-2 262191618Sedwin#endif 263191618Sedwin 264270728Spluknet strb r3, [ip], #0x01 /* Set 1 byte */ 265270728Spluknet strgeb r3, [ip], #0x01 /* Set another byte */ 266270728Spluknet strgtb r3, [ip] /* and a third */ 267191618Sedwin RET /* Exit */ 268191618Sedwin 269191618Sedwin.Lmemset_wordunaligned: 270191618Sedwin rsb r2, r2, #0x004 271191618Sedwin strb r3, [ip], #0x01 /* Set 1 byte */ 272196582Sedwin cmp r2, #0x02 273196582Sedwin strgeb r3, [ip], #0x01 /* Set another byte */ 274240457Sedwin sub r1, r1, r2 275196582Sedwin strgtb r3, [ip], #0x01 /* and a third */ 276196582Sedwin cmp r1, #0x04 /* More than 4 bytes left? */ 277240457Sedwin bge .Lmemset_wordaligned /* Yup */ 278196582Sedwin 279196582Sedwin.Lmemset_lessthanfour: 280196582Sedwin cmp r1, #0x00 281240457Sedwin RETeq /* Zero length so exit */ 282196582Sedwin strb r3, [ip], #0x01 /* Set 1 byte */ 283196582Sedwin cmp r1, #0x02 284196582Sedwin strgeb r3, [ip], #0x01 /* Set another byte */ 285196582Sedwin strgtb r3, [ip] /* and a third */ 286210718Sedwin RET /* Exit */ 287270728Spluknet 288210718SedwinENTRY(bcmp) 289210718Sedwin mov ip, r0 290210718Sedwin cmp r2, #0x06 291210718Sedwin beq .Lmemcmp_6bytes 292270728Spluknet mov r0, #0x00 293210718Sedwin 294210718Sedwin /* Are both addresses aligned the same way? */ 295210718Sedwin cmp r2, #0x00 296265978Sedwin eornes r3, ip, r1 297265978Sedwin RETeq /* len == 0, or same addresses! */ 298265978Sedwin tst r3, #0x03 299265978Sedwin subne r2, r2, #0x01 300273718Sedwin bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 301265978Sedwin 302265978Sedwin /* Word-align the addresses, if necessary */ 303267473Sedwin sub r3, r1, #0x05 304267473Sedwin ands r3, r3, #0x03 305267473Sedwin add r3, r3, r3, lsl #1 306267473Sedwin addne pc, pc, r3, lsl #3 307267473Sedwin nop 308267473Sedwin 309267473Sedwin /* Compare up to 3 bytes */ 310267473Sedwin ldrb r0, [ip], #0x01 311267473Sedwin ldrb r3, [r1], #0x01 312267473Sedwin subs r0, r0, r3 313267473Sedwin RETne 314267473Sedwin subs r2, r2, #0x01 315267473Sedwin RETeq 316267473Sedwin 317265978Sedwin /* Compare up to 2 bytes */ 318265978Sedwin ldrb r0, [ip], #0x01 319267473Sedwin ldrb r3, [r1], #0x01 320267473Sedwin subs r0, r0, r3 321265978Sedwin RETne 322283042Sedwin subs r2, r2, #0x01 323283042Sedwin RETeq 324283042Sedwin 325283042Sedwin /* Compare 1 byte */ 326283042Sedwin ldrb r0, [ip], #0x01 327283042Sedwin ldrb r3, [r1], #0x01 328283042Sedwin subs r0, r0, r3 329283042Sedwin RETne 330283042Sedwin subs r2, r2, #0x01 331283079Sedwin RETeq 332283079Sedwin 333283079Sedwin /* Compare 4 bytes at a time, if possible */ 334283079Sedwin subs r2, r2, #0x04 335283079Sedwin bcc .Lmemcmp_bytewise 336283079Sedwin.Lmemcmp_word_aligned: 337283079Sedwin ldr r0, [ip], #0x04 338283079Sedwin ldr r3, [r1], #0x04 339283079Sedwin subs r2, r2, #0x04 340283079Sedwin cmpcs r0, r3 341284397Sedwin beq .Lmemcmp_word_aligned 342284397Sedwin sub r0, r0, r3 343284397Sedwin 344284397Sedwin /* Correct for extra subtraction, and check if done */ 345283079Sedwin adds r2, r2, #0x04 346267473Sedwin cmpeq r0, #0x00 /* If done, did all bytes match? */ 347267473Sedwin RETeq /* Yup. Just return */ 348267473Sedwin 349267473Sedwin /* Re-do the final word byte-wise */ 350267473Sedwin sub ip, ip, #0x04 351265978Sedwin sub r1, r1, #0x04 352267473Sedwin 353267473Sedwin.Lmemcmp_bytewise: 354283079Sedwin add r2, r2, #0x03 355191618Sedwin.Lmemcmp_bytewise2: 3562742Swollman ldrb r0, [ip], #0x01 357248307Sedwin ldrb r3, [r1], #0x01 35819878Swollman subs r2, r2, #0x01 3592742Swollman cmpcs r0, r3 3602742Swollman beq .Lmemcmp_bytewise2 361273718Sedwin sub r0, r0, r3 3622742Swollman RET 3632742Swollman 3642742Swollman /* 365274559Sedwin * 6 byte compares are very common, thanks to the network stack. 3662742Swollman * This code is hand-scheduled to reduce the number of stalls for 3672742Swollman * load results. Everything else being equal, this will be ~32% 368273718Sedwin * faster than a byte-wise memcmp. 3692742Swollman */ 3702742Swollman .align 5 371270728Spluknet.Lmemcmp_6bytes: 3722742Swollman ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 3732742Swollman ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 3742742Swollman ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 375270728Spluknet subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 376270728Spluknet ldreqb r3, [ip, #0x01] /* r3 = b1#1 */ 377270728Spluknet RETne /* Return if mismatch on #0 */ 378270728Spluknet subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 379270728Spluknet ldreqb r3, [r1, #0x02] /* r3 = b2#2 */ 380270728Spluknet ldreqb r0, [ip, #0x02] /* r0 = b1#2 */ 381270728Spluknet RETne /* Return if mismatch on #1 */ 382270728Spluknet ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 3832742Swollman subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 3842742Swollman ldreqb r3, [ip, #0x03] /* r3 = b1#3 */ 3852742Swollman RETne /* Return if mismatch on #2 */ 3862742Swollman subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 3872742Swollman ldreqb r3, [r1, #0x04] /* r3 = b2#4 */ 388270728Spluknet ldreqb r0, [ip, #0x04] /* r0 = b1#4 */ 3892742Swollman RETne /* Return if mismatch on #3 */ 3902742Swollman ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 391273718Sedwin subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 392273718Sedwin ldreqb r3, [ip, #0x05] /* r3 = b1#5 */ 393273718Sedwin RETne /* Return if mismatch on #4 */ 394273718Sedwin sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 395273718Sedwin RET 396273718Sedwin 3972742SwollmanENTRY(bcopy) 398273718Sedwin /* switch the source and destination registers */ 39930711Swollman eor r0, r1, r0 40030711Swollman eor r1, r0, r1 4012742Swollman eor r0, r1, r0 4022742SwollmanENTRY(memmove) 4032742Swollman /* Do the buffers overlap? */ 4042742Swollman cmp r0, r1 4052742Swollman RETeq /* Bail now if src/dst are the same */ 40630711Swollman subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 407226289Sedwin subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 4082742Swollman cmp r3, r2 /* if (r3 < len) we have an overlap */ 409274559Sedwin bcc PIC_SYM(_C_LABEL(memcpy), PLT) 410274559Sedwin 411274559Sedwin /* Determine copy direction */ 412274559Sedwin cmp r1, r0 413274559Sedwin bcc .Lmemmove_backwards 414274559Sedwin 415274559Sedwin moveq r0, #0 /* Quick abort for len=0 */ 416274559Sedwin RETeq 417274559Sedwin 4182742Swollman stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 4192742Swollman subs r2, r2, #4 420273718Sedwin blt .Lmemmove_fl4 /* less than 4 bytes */ 4212742Swollman ands r12, r0, #3 4222742Swollman bne .Lmemmove_fdestul /* oh unaligned destination addr */ 423158421Swollman ands r12, r1, #3 4242742Swollman bne .Lmemmove_fsrcul /* oh unaligned source addr */ 42586222Swollman 42614343Swollman.Lmemmove_ft8: 427158421Swollman /* We have aligned source and destination */ 428158421Swollman subs r2, r2, #8 429158421Swollman blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 430158421Swollman subs r2, r2, #0x14 4312742Swollman blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 4322742Swollman stmdb sp!, {r4} /* borrow r4 */ 4332742Swollman 43419878Swollman /* blat 32 bytes at a time */ 43530711Swollman /* XXX for really big copies perhaps we should use more registers */ 4362742Swollman.Lmemmove_floop32: 4372742Swollman ldmia r1!, {r3, r4, r12, lr} 4382742Swollman stmia r0!, {r3, r4, r12, lr} 4392742Swollman ldmia r1!, {r3, r4, r12, lr} 4402742Swollman stmia r0!, {r3, r4, r12, lr} 441243003Sedwin subs r2, r2, #0x20 442243003Sedwin bge .Lmemmove_floop32 443243003Sedwin 444243003Sedwin cmn r2, #0x10 445243003Sedwin ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 446243003Sedwin stmgeia r0!, {r3, r4, r12, lr} 447243003Sedwin subge r2, r2, #0x10 448243003Sedwin ldmia sp!, {r4} /* return r4 */ 449243003Sedwin 450243003Sedwin.Lmemmove_fl32: 451243003Sedwin adds r2, r2, #0x14 452243003Sedwin 453243003Sedwin /* blat 12 bytes at a time */ 454243003Sedwin.Lmemmove_floop12: 455257681Sedwin ldmgeia r1!, {r3, r12, lr} 456257681Sedwin stmgeia r0!, {r3, r12, lr} 457257681Sedwin subges r2, r2, #0x0c 458257681Sedwin bge .Lmemmove_floop12 459257681Sedwin 460257681Sedwin.Lmemmove_fl12: 461257681Sedwin adds r2, r2, #8 462257681Sedwin blt .Lmemmove_fl4 4632742Swollman 46419878Swollman subs r2, r2, #4 4652742Swollman ldrlt r3, [r1], #4 46619878Swollman strlt r3, [r0], #4 4672742Swollman ldmgeia r1!, {r3, r12} 46819878Swollman stmgeia r0!, {r3, r12} 4692742Swollman subge r2, r2, #4 47019878Swollman 4712742Swollman.Lmemmove_fl4: 47219878Swollman /* less than 4 bytes to go */ 47319878Swollman adds r2, r2, #4 4742742Swollman ldmeqia sp!, {r0, pc} /* done */ 47519878Swollman 476149514Swollman /* copy the crud byte at a time */ 477243003Sedwin cmp r2, #2 478243003Sedwin ldrb r3, [r1], #1 479257681Sedwin strb r3, [r0], #1 480257681Sedwin ldrgeb r3, [r1], #1 4812742Swollman strgeb r3, [r0], #1 4822742Swollman ldrgtb r3, [r1], #1 48319878Swollman strgtb r3, [r0], #1 4842742Swollman ldmia sp!, {r0, pc} 48558787Sru 486243003Sedwin /* erg - unaligned destination */ 487273718Sedwin.Lmemmove_fdestul: 48858787Sru rsb r12, r12, #4 489243003Sedwin cmp r12, #2 490273718Sedwin 491273718Sedwin /* align destination with byte copies */ 492257681Sedwin ldrb r3, [r1], #1 4932742Swollman strb r3, [r0], #1 4942742Swollman ldrgeb r3, [r1], #1 495274559Sedwin strgeb r3, [r0], #1 4962742Swollman ldrgtb r3, [r1], #1 4972742Swollman strgtb r3, [r0], #1 498273718Sedwin subs r2, r2, r12 4992742Swollman blt .Lmemmove_fl4 /* less the 4 bytes */ 5002742Swollman 5012742Swollman ands r12, r1, #3 502270728Spluknet beq .Lmemmove_ft8 /* we have an aligned source */ 5032742Swollman 5042742Swollman /* erg - unaligned source */ 505181421Sedwin /* This is where it gets nasty ... */ 506181421Sedwin.Lmemmove_fsrcul: 507181421Sedwin bic r1, r1, #3 508181421Sedwin ldr lr, [r1], #4 509240457Sedwin cmp r12, #2 510181421Sedwin bgt .Lmemmove_fsrcul3 511181421Sedwin beq .Lmemmove_fsrcul2 512181424Sedwin cmp r2, #0x0c 513181421Sedwin blt .Lmemmove_fsrcul1loop4 514181421Sedwin sub r2, r2, #0x0c 515181424Sedwin stmdb sp!, {r4, r5} 516181421Sedwin 517181421Sedwin.Lmemmove_fsrcul1loop16: 518181421Sedwin#ifdef __ARMEB__ 519181421Sedwin mov r3, lr, lsl #8 520181424Sedwin#else 521181421Sedwin mov r3, lr, lsr #8 522181421Sedwin#endif 523181421Sedwin ldmia r1!, {r4, r5, r12, lr} 524181424Sedwin#ifdef __ARMEB__ 525181424Sedwin orr r3, r3, r4, lsr #24 526181424Sedwin mov r4, r4, lsl #8 527181424Sedwin orr r4, r4, r5, lsr #24 528181424Sedwin mov r5, r5, lsl #8 529181424Sedwin orr r5, r5, r12, lsr #24 530181424Sedwin mov r12, r12, lsl #8 531240457Sedwin orr r12, r12, lr, lsr #24 532240457Sedwin#else 533240457Sedwin orr r3, r3, r4, lsl #24 534240457Sedwin mov r4, r4, lsr #8 535240457Sedwin orr r4, r4, r5, lsl #24 536240457Sedwin mov r5, r5, lsr #8 537181424Sedwin orr r5, r5, r12, lsl #24 538181424Sedwin mov r12, r12, lsr #8 539181424Sedwin orr r12, r12, lr, lsl #24 540181424Sedwin#endif 541286750Sedwin stmia r0!, {r3-r5, r12} 542181424Sedwin subs r2, r2, #0x10 543181424Sedwin bge .Lmemmove_fsrcul1loop16 544181424Sedwin ldmia sp!, {r4, r5} 545270728Spluknet adds r2, r2, #0x0c 546270728Spluknet blt .Lmemmove_fsrcul1l4 547181424Sedwin 548181424Sedwin.Lmemmove_fsrcul1loop4: 549183066Sedwin#ifdef __ARMEB__ 550183066Sedwin mov r12, lr, lsl #8 551192886Sedwin#else 552183066Sedwin mov r12, lr, lsr #8 553183066Sedwin#endif 554183066Sedwin ldr lr, [r1], #4 555183066Sedwin#ifdef __ARMEB__ 556183066Sedwin orr r12, r12, lr, lsr #24 557183066Sedwin#else 558183066Sedwin orr r12, r12, lr, lsl #24 559183066Sedwin#endif 560183066Sedwin str r12, [r0], #4 561270728Spluknet subs r2, r2, #4 562183066Sedwin bge .Lmemmove_fsrcul1loop4 563183066Sedwin 564183864Sedwin.Lmemmove_fsrcul1l4: 565183864Sedwin sub r1, r1, #3 566183864Sedwin b .Lmemmove_fl4 567183864Sedwin 568183864Sedwin.Lmemmove_fsrcul2: 569183864Sedwin cmp r2, #0x0c 570196581Sedwin blt .Lmemmove_fsrcul2loop4 571196581Sedwin sub r2, r2, #0x0c 572196581Sedwin stmdb sp!, {r4, r5} 573196581Sedwin 574196581Sedwin.Lmemmove_fsrcul2loop16: 575196581Sedwin#ifdef __ARMEB__ 576196581Sedwin mov r3, lr, lsl #16 577196581Sedwin#else 578196581Sedwin mov r3, lr, lsr #16 579196581Sedwin#endif 580196581Sedwin ldmia r1!, {r4, r5, r12, lr} 581196581Sedwin#ifdef __ARMEB__ 582240457Sedwin orr r3, r3, r4, lsr #16 583196581Sedwin mov r4, r4, lsl #16 584196581Sedwin orr r4, r4, r5, lsr #16 585196581Sedwin mov r5, r5, lsl #16 586181421Sedwin orr r5, r5, r12, lsr #16 587181421Sedwin mov r12, r12, lsl #16 588181421Sedwin orr r12, r12, lr, lsr #16 589196581Sedwin#else 590196581Sedwin orr r3, r3, r4, lsl #16 5912742Swollman mov r4, r4, lsr #16 592273718Sedwin orr r4, r4, r5, lsl #16 593181421Sedwin mov r5, r5, lsr #16 5942742Swollman orr r5, r5, r12, lsl #16 5952742Swollman mov r12, r12, lsr #16 5962742Swollman orr r12, r12, lr, lsl #16 5972742Swollman#endif 598274559Sedwin stmia r0!, {r3-r5, r12} 5992742Swollman subs r2, r2, #0x10 6002742Swollman bge .Lmemmove_fsrcul2loop16 601270728Spluknet ldmia sp!, {r4, r5} 602181418Sedwin adds r2, r2, #0x0c 603181418Sedwin blt .Lmemmove_fsrcul2l4 604181418Sedwin 605181418Sedwin.Lmemmove_fsrcul2loop4: 606181418Sedwin#ifdef __ARMEB__ 607181418Sedwin mov r12, lr, lsl #16 608181418Sedwin#else 609270728Spluknet mov r12, lr, lsr #16 610181418Sedwin#endif 611181418Sedwin ldr lr, [r1], #4 612181418Sedwin#ifdef __ARMEB__ 613181418Sedwin orr r12, r12, lr, lsr #16 614181418Sedwin#else 615273718Sedwin orr r12, r12, lr, lsl #16 616273718Sedwin#endif 617181418Sedwin str r12, [r0], #4 618181418Sedwin subs r2, r2, #4 619181418Sedwin bge .Lmemmove_fsrcul2loop4 620181418Sedwin 621181418Sedwin.Lmemmove_fsrcul2l4: 622181418Sedwin sub r1, r1, #2 623181418Sedwin b .Lmemmove_fl4 624181418Sedwin 625270728Spluknet.Lmemmove_fsrcul3: 626270728Spluknet cmp r2, #0x0c 627181418Sedwin blt .Lmemmove_fsrcul3loop4 628270728Spluknet sub r2, r2, #0x0c 629270728Spluknet stmdb sp!, {r4, r5} 630270728Spluknet 631181418Sedwin.Lmemmove_fsrcul3loop16: 632270728Spluknet#ifdef __ARMEB__ 633270728Spluknet mov r3, lr, lsl #24 634270728Spluknet#else 635270728Spluknet mov r3, lr, lsr #24 636181418Sedwin#endif 637181418Sedwin ldmia r1!, {r4, r5, r12, lr} 638181418Sedwin#ifdef __ARMEB__ 639181418Sedwin orr r3, r3, r4, lsr #8 640181418Sedwin mov r4, r4, lsl #24 641181418Sedwin orr r4, r4, r5, lsr #8 642270728Spluknet mov r5, r5, lsl #24 643270728Spluknet orr r5, r5, r12, lsr #8 644181418Sedwin mov r12, r12, lsl #24 645270728Spluknet orr r12, r12, lr, lsr #8 646181418Sedwin#else 647181418Sedwin orr r3, r3, r4, lsl #8 648183066Sedwin mov r4, r4, lsr #24 649240457Sedwin orr r4, r4, r5, lsl #8 650240457Sedwin mov r5, r5, lsr #24 651183066Sedwin orr r5, r5, r12, lsl #8 652183066Sedwin mov r12, r12, lsr #24 653183066Sedwin orr r12, r12, lr, lsl #8 654183066Sedwin#endif 655183066Sedwin stmia r0!, {r3-r5, r12} 656183066Sedwin subs r2, r2, #0x10 657183066Sedwin bge .Lmemmove_fsrcul3loop16 658190372Sedwin ldmia sp!, {r4, r5} 659190372Sedwin adds r2, r2, #0x0c 660190372Sedwin blt .Lmemmove_fsrcul3l4 661190372Sedwin 662190372Sedwin.Lmemmove_fsrcul3loop4: 663190372Sedwin#ifdef __ARMEB__ 664190372Sedwin mov r12, lr, lsl #24 665190372Sedwin#else 666190372Sedwin mov r12, lr, lsr #24 667190372Sedwin#endif 668190372Sedwin ldr lr, [r1], #4 669190372Sedwin#ifdef __ARMEB__ 670190372Sedwin orr r12, r12, lr, lsr #8 671270728Spluknet#else 672190372Sedwin orr r12, r12, lr, lsl #8 673286750Sedwin#endif 674190372Sedwin str r12, [r0], #4 675190372Sedwin subs r2, r2, #4 676190372Sedwin bge .Lmemmove_fsrcul3loop4 677190372Sedwin 678190372Sedwin.Lmemmove_fsrcul3l4: 679190372Sedwin sub r1, r1, #1 680190372Sedwin b .Lmemmove_fl4 681206868Sedwin 682206868Sedwin.Lmemmove_backwards: 683206868Sedwin add r1, r1, r2 684206868Sedwin add r0, r0, r2 685206868Sedwin subs r2, r2, #4 686206868Sedwin blt .Lmemmove_bl4 /* less than 4 bytes */ 687206868Sedwin ands r12, r0, #3 688206868Sedwin bne .Lmemmove_bdestul /* oh unaligned destination addr */ 689206868Sedwin ands r12, r1, #3 690206868Sedwin bne .Lmemmove_bsrcul /* oh unaligned source addr */ 691206868Sedwin 692220286Sedwin.Lmemmove_bt8: 693220286Sedwin /* We have aligned source and destination */ 694273718Sedwin subs r2, r2, #8 695273718Sedwin blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 696220286Sedwin stmdb sp!, {r4, lr} 697220286Sedwin subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 698220286Sedwin blt .Lmemmove_bl32 699220286Sedwin 700220286Sedwin /* blat 32 bytes at a time */ 701220286Sedwin /* XXX for really big copies perhaps we should use more registers */ 702220286Sedwin.Lmemmove_bloop32: 703220286Sedwin ldmdb r1!, {r3, r4, r12, lr} 704220286Sedwin stmdb r0!, {r3, r4, r12, lr} 705220286Sedwin ldmdb r1!, {r3, r4, r12, lr} 706220286Sedwin stmdb r0!, {r3, r4, r12, lr} 707270728Spluknet subs r2, r2, #0x20 708220286Sedwin bge .Lmemmove_bloop32 709220286Sedwin 710220286Sedwin.Lmemmove_bl32: 711220286Sedwin cmn r2, #0x10 712220286Sedwin ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 713220286Sedwin stmgedb r0!, {r3, r4, r12, lr} 714220286Sedwin subge r2, r2, #0x10 715220286Sedwin adds r2, r2, #0x14 716220286Sedwin ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 717220286Sedwin stmgedb r0!, {r3, r12, lr} 718220286Sedwin subge r2, r2, #0x0c 719220286Sedwin ldmia sp!, {r4, lr} 720240457Sedwin 721270728Spluknet.Lmemmove_bl12: 722270728Spluknet adds r2, r2, #8 723270728Spluknet blt .Lmemmove_bl4 724240457Sedwin subs r2, r2, #4 725240457Sedwin ldrlt r3, [r1, #-4]! 726240457Sedwin strlt r3, [r0, #-4]! 727240457Sedwin ldmgedb r1!, {r3, r12} 728240457Sedwin stmgedb r0!, {r3, r12} 729240457Sedwin subge r2, r2, #4 730240457Sedwin 731240457Sedwin.Lmemmove_bl4: 732240457Sedwin /* less than 4 bytes to go */ 733240457Sedwin adds r2, r2, #4 734240457Sedwin RETeq /* done */ 735240457Sedwin 736240457Sedwin /* copy the crud byte at a time */ 737240457Sedwin cmp r2, #2 738240457Sedwin ldrb r3, [r1, #-1]! 739240457Sedwin strb r3, [r0, #-1]! 740240457Sedwin ldrgeb r3, [r1, #-1]! 741240457Sedwin strgeb r3, [r0, #-1]! 742270728Spluknet ldrgtb r3, [r1, #-1]! 743270728Spluknet strgtb r3, [r0, #-1]! 744240457Sedwin RET 745240457Sedwin 746240457Sedwin /* erg - unaligned destination */ 747240457Sedwin.Lmemmove_bdestul: 748240457Sedwin cmp r12, #2 749240457Sedwin 750240457Sedwin /* align destination with byte copies */ 751240457Sedwin ldrb r3, [r1, #-1]! 752240457Sedwin strb r3, [r0, #-1]! 753240457Sedwin ldrgeb r3, [r1, #-1]! 754240457Sedwin strgeb r3, [r0, #-1]! 755240457Sedwin ldrgtb r3, [r1, #-1]! 756240457Sedwin strgtb r3, [r0, #-1]! 757240457Sedwin subs r2, r2, r12 758240457Sedwin blt .Lmemmove_bl4 /* less than 4 bytes to go */ 759240457Sedwin ands r12, r1, #3 760248307Sedwin beq .Lmemmove_bt8 /* we have an aligned source */ 761248307Sedwin 762248307Sedwin /* erg - unaligned source */ 763248307Sedwin /* This is where it gets nasty ... */ 764248307Sedwin.Lmemmove_bsrcul: 765273718Sedwin bic r1, r1, #3 766253009Sedwin ldr r3, [r1, #0] 767253009Sedwin cmp r12, #2 768253009Sedwin blt .Lmemmove_bsrcul1 769253009Sedwin beq .Lmemmove_bsrcul2 770253009Sedwin cmp r2, #0x0c 771253009Sedwin blt .Lmemmove_bsrcul3loop4 772257681Sedwin sub r2, r2, #0x0c 773257681Sedwin stmdb sp!, {r4, r5, lr} 774257681Sedwin 775257681Sedwin.Lmemmove_bsrcul3loop16: 776257681Sedwin#ifdef __ARMEB__ 777257681Sedwin mov lr, r3, lsr #8 778257681Sedwin#else 779257681Sedwin mov lr, r3, lsl #8 780257681Sedwin#endif 781257681Sedwin ldmdb r1!, {r3-r5, r12} 782263901Sedwin#ifdef __ARMEB__ 783263901Sedwin orr lr, lr, r12, lsl #24 784263901Sedwin mov r12, r12, lsr #8 785267473Sedwin orr r12, r12, r5, lsl #24 786267473Sedwin mov r5, r5, lsr #8 787267473Sedwin orr r5, r5, r4, lsl #24 788267473Sedwin mov r4, r4, lsr #8 789267473Sedwin orr r4, r4, r3, lsl #24 790267473Sedwin#else 791284397Sedwin orr lr, lr, r12, lsr #24 792284397Sedwin mov r12, r12, lsl #8 793284397Sedwin orr r12, r12, r5, lsr #24 794284397Sedwin mov r5, r5, lsl #8 795284397Sedwin orr r5, r5, r4, lsr #24 796284397Sedwin mov r4, r4, lsl #8 797284397Sedwin orr r4, r4, r3, lsr #24 798284397Sedwin#endif 799284397Sedwin stmdb r0!, {r4, r5, r12, lr} 800284397Sedwin subs r2, r2, #0x10 801284397Sedwin bge .Lmemmove_bsrcul3loop16 802284397Sedwin ldmia sp!, {r4, r5, lr} 803284397Sedwin adds r2, r2, #0x0c 804284397Sedwin blt .Lmemmove_bsrcul3l4 805284397Sedwin 806284397Sedwin.Lmemmove_bsrcul3loop4: 807284397Sedwin#ifdef __ARMEB__ 808284397Sedwin mov r12, r3, lsr #8 809284397Sedwin#else 810284397Sedwin mov r12, r3, lsl #8 811284397Sedwin#endif 812284397Sedwin ldr r3, [r1, #-4]! 813267473Sedwin#ifdef __ARMEB__ 814284397Sedwin orr r12, r12, r3, lsl #24 815284397Sedwin#else 816267473Sedwin orr r12, r12, r3, lsr #24 817267473Sedwin#endif 818284397Sedwin str r12, [r0, #-4]! 819284397Sedwin subs r2, r2, #4 820267473Sedwin bge .Lmemmove_bsrcul3loop4 821267473Sedwin 822267473Sedwin.Lmemmove_bsrcul3l4: 823284397Sedwin add r1, r1, #3 824284397Sedwin b .Lmemmove_bl4 825284397Sedwin 826267473Sedwin.Lmemmove_bsrcul2: 827267473Sedwin cmp r2, #0x0c 828267473Sedwin blt .Lmemmove_bsrcul2loop4 829267473Sedwin sub r2, r2, #0x0c 830267473Sedwin stmdb sp!, {r4, r5, lr} 831267473Sedwin 832267473Sedwin.Lmemmove_bsrcul2loop16: 833267473Sedwin#ifdef __ARMEB__ 834267473Sedwin mov lr, r3, lsr #16 835248307Sedwin#else 836248307Sedwin mov lr, r3, lsl #16 83720094Swollman#endif 838183066Sedwin ldmdb r1!, {r3-r5, r12} 83919878Swollman#ifdef __ARMEB__ 8402742Swollman orr lr, lr, r12, lsl #16 84119878Swollman mov r12, r12, lsr #16 8422742Swollman orr r12, r12, r5, lsl #16 84319878Swollman mov r5, r5, lsr #16 8442742Swollman orr r5, r5, r4, lsl #16 84519878Swollman mov r4, r4, lsr #16 8462742Swollman orr r4, r4, r3, lsl #16 84719878Swollman#else 8482742Swollman orr lr, lr, r12, lsr #16 84919878Swollman mov r12, r12, lsl #16 8502742Swollman orr r12, r12, r5, lsr #16 8512742Swollman mov r5, r5, lsl #16 85219878Swollman orr r5, r5, r4, lsr #16 8532742Swollman mov r4, r4, lsl #16 854181418Sedwin orr r4, r4, r3, lsr #16 855183066Sedwin#endif 856190372Sedwin stmdb r0!, {r4, r5, r12, lr} 857267473Sedwin subs r2, r2, #0x10 858206868Sedwin bge .Lmemmove_bsrcul2loop16 859206868Sedwin ldmia sp!, {r4, r5, lr} 860220286Sedwin adds r2, r2, #0x0c 861267473Sedwin blt .Lmemmove_bsrcul2l4 862267473Sedwin 863267473Sedwin.Lmemmove_bsrcul2loop4: 864267473Sedwin#ifdef __ARMEB__ 865267473Sedwin mov r12, r3, lsr #16 866267473Sedwin#else 867267473Sedwin mov r12, r3, lsl #16 868267473Sedwin#endif 869284397Sedwin ldr r3, [r1, #-4]! 870267473Sedwin#ifdef __ARMEB__ 871267473Sedwin orr r12, r12, r3, lsl #16 872284397Sedwin#else 873284397Sedwin orr r12, r12, r3, lsr #16 874284397Sedwin#endif 875284397Sedwin str r12, [r0, #-4]! 876284397Sedwin subs r2, r2, #4 877284397Sedwin bge .Lmemmove_bsrcul2loop4 878284397Sedwin 879284397Sedwin.Lmemmove_bsrcul2l4: 880284397Sedwin add r1, r1, #2 881284397Sedwin b .Lmemmove_bl4 882284397Sedwin 883284397Sedwin.Lmemmove_bsrcul1: 884284397Sedwin cmp r2, #0x0c 885284397Sedwin blt .Lmemmove_bsrcul1loop4 886284397Sedwin sub r2, r2, #0x0c 887284397Sedwin stmdb sp!, {r4, r5, lr} 888284397Sedwin 889284397Sedwin.Lmemmove_bsrcul1loop32: 890267473Sedwin#ifdef __ARMEB__ 891284397Sedwin mov lr, r3, lsr #24 892284397Sedwin#else 893240457Sedwin mov lr, r3, lsl #24 8942742Swollman#endif 8952742Swollman ldmdb r1!, {r3-r5, r12} 89619878Swollman#ifdef __ARMEB__ 89719878Swollman orr lr, lr, r12, lsl #8 898181418Sedwin mov r12, r12, lsr #24 899257681Sedwin orr r12, r12, r5, lsl #8 90019878Swollman mov r5, r5, lsr #24 901257681Sedwin orr r5, r5, r4, lsl #8 902257681Sedwin mov r4, r4, lsr #24 903257681Sedwin orr r4, r4, r3, lsl #8 904257681Sedwin#else 905257681Sedwin orr lr, lr, r12, lsr #8 906257681Sedwin mov r12, r12, lsl #24 907257681Sedwin orr r12, r12, r5, lsr #8 908257681Sedwin mov r5, r5, lsl #24 909257681Sedwin orr r5, r5, r4, lsr #8 910257681Sedwin mov r4, r4, lsl #24 911270728Spluknet orr r4, r4, r3, lsr #8 91230711Swollman#endif 913257681Sedwin stmdb r0!, {r4, r5, r12, lr} 9148029Swollman subs r2, r2, #0x10 9152742Swollman bge .Lmemmove_bsrcul1loop32 916273718Sedwin ldmia sp!, {r4, r5, lr} 917273718Sedwin adds r2, r2, #0x0c 918273718Sedwin blt .Lmemmove_bsrcul1l4 919273718Sedwin 920273718Sedwin.Lmemmove_bsrcul1loop4: 921273718Sedwin#ifdef __ARMEB__ 9222742Swollman mov r12, r3, lsr #24 9232742Swollman#else 92430711Swollman mov r12, r3, lsl #24 925273718Sedwin#endif 926273718Sedwin ldr r3, [r1, #-4]! 927273718Sedwin#ifdef __ARMEB__ 928273718Sedwin orr r12, r12, r3, lsl #8 929273718Sedwin#else 930273718Sedwin orr r12, r12, r3, lsr #8 931273718Sedwin#endif 9322742Swollman str r12, [r0, #-4]! 9332742Swollman subs r2, r2, #4 934158421Swollman bge .Lmemmove_bsrcul1loop4 935158421Swollman 936169811Swollman.Lmemmove_bsrcul1l4: 937273718Sedwin add r1, r1, #1 938273718Sedwin b .Lmemmove_bl4 939169811Swollman 940169811Swollman#if !defined(_ARM_ARCH_5E) 941169811SwollmanENTRY(memcpy) 942169811Swollman /* save leaf functions having to store this away */ 943169811Swollman /* Do not check arm_memcpy if we're running from flash */ 944169811Swollman#ifdef FLASHADDR 945240457Sedwin#if FLASHADDR > PHYSADDR 946169811Swollman ldr r3, =FLASHADDR 947169811Swollman cmp r3, pc 948169811Swollman bls .Lnormal 949169811Swollman#else 95020094Swollman ldr r3, =FLASHADDR 95120094Swollman cmp r3, pc 95220094Swollman bhi .Lnormal 9532742Swollman#endif 9542742Swollman#endif 955273718Sedwin ldr r3, .L_arm_memcpy 956273718Sedwin ldr r3, [r3] 957273718Sedwin cmp r3, #0 95830711Swollman beq .Lnormal 95930711Swollman ldr r3, .L_min_memcpy_size 96030711Swollman ldr r3, [r3] 9612742Swollman cmp r2, r3 9622742Swollman blt .Lnormal 963273718Sedwin stmfd sp!, {r0-r2, r4, lr} 9642742Swollman mov r3, #0 9652742Swollman ldr r4, .L_arm_memcpy 9662742Swollman mov lr, pc 9672742Swollman ldr pc, [r4] 96830711Swollman cmp r0, #0 969273718Sedwin ldmfd sp!, {r0-r2, r4, lr} 970273718Sedwin RETeq 971273718Sedwin 972273718Sedwin.Lnormal: 973273718Sedwin stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 974273718Sedwin 975273718Sedwin subs r2, r2, #4 976273718Sedwin blt .Lmemcpy_l4 /* less than 4 bytes */ 977273718Sedwin ands r12, r0, #3 9782742Swollman bne .Lmemcpy_destul /* oh unaligned destination addr */ 979270728Spluknet ands r12, r1, #3 9802742Swollman bne .Lmemcpy_srcul /* oh unaligned source addr */ 981273718Sedwin 982270728Spluknet.Lmemcpy_t8: 98343014Swollman /* We have aligned source and destination */ 984270728Spluknet subs r2, r2, #8 985270728Spluknet blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 986270728Spluknet subs r2, r2, #0x14 98743014Swollman blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 988270728Spluknet stmdb sp!, {r4} /* borrow r4 */ 989270728Spluknet 99058787Sru /* blat 32 bytes at a time */ 99143014Swollman /* XXX for really big copies perhaps we should use more registers */ 99243014Swollman.Lmemcpy_loop32: 99343014Swollman ldmia r1!, {r3, r4, r12, lr} 99443014Swollman stmia r0!, {r3, r4, r12, lr} 99543014Swollman ldmia r1!, {r3, r4, r12, lr} 99643014Swollman stmia r0!, {r3, r4, r12, lr} 9972742Swollman subs r2, r2, #0x20 9982742Swollman bge .Lmemcpy_loop32 999273718Sedwin 10002742Swollman cmn r2, #0x10 10012742Swollman ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 1002270728Spluknet stmgeia r0!, {r3, r4, r12, lr} 100314343Swollman subge r2, r2, #0x10 100430711Swollman ldmia sp!, {r4} /* return r4 */ 1005270728Spluknet 100658787Sru.Lmemcpy_l32: 100758787Sru adds r2, r2, #0x14 1008270728Spluknet 100914343Swollman /* blat 12 bytes at a time */ 1010270728Spluknet.Lmemcpy_loop12: 10112742Swollman ldmgeia r1!, {r3, r12, lr} 1012270728Spluknet stmgeia r0!, {r3, r12, lr} 10132742Swollman subges r2, r2, #0x0c 10142742Swollman bge .Lmemcpy_loop12 10152742Swollman 1016273718Sedwin.Lmemcpy_l12: 101719878Swollman adds r2, r2, #8 101886222Swollman blt .Lmemcpy_l4 101986222Swollman 102086222Swollman subs r2, r2, #4 102186222Swollman ldrlt r3, [r1], #4 102286222Swollman strlt r3, [r0], #4 102386222Swollman ldmgeia r1!, {r3, r12} 10242742Swollman stmgeia r0!, {r3, r12} 10252742Swollman subge r2, r2, #4 1026270728Spluknet 10272742Swollman.Lmemcpy_l4: 10282742Swollman /* less than 4 bytes to go */ 1029274559Sedwin adds r2, r2, #4 10302742Swollman#ifdef __APCS_26_ 10312742Swollman ldmeqia sp!, {r0, pc}^ /* done */ 10322742Swollman#else 103330711Swollman ldmeqia sp!, {r0, pc} /* done */ 10342742Swollman#endif 10352742Swollman /* copy the crud byte at a time */ 10362742Swollman cmp r2, #2 103730711Swollman ldrb r3, [r1], #1 103830711Swollman strb r3, [r0], #1 1039273718Sedwin ldrgeb r3, [r1], #1 1040273718Sedwin strgeb r3, [r0], #1 1041273718Sedwin ldrgtb r3, [r1], #1 104230711Swollman strgtb r3, [r0], #1 104358787Sru ldmia sp!, {r0, pc} 10442742Swollman 10452742Swollman /* erg - unaligned destination */ 10462742Swollman.Lmemcpy_destul: 104758787Sru rsb r12, r12, #4 1048270728Spluknet cmp r12, #2 1049270728Spluknet 1050270728Spluknet /* align destination with byte copies */ 105158787Sru ldrb r3, [r1], #1 105258787Sru strb r3, [r0], #1 105358787Sru ldrgeb r3, [r1], #1 105458787Sru strgeb r3, [r0], #1 10552742Swollman ldrgtb r3, [r1], #1 105619878Swollman strgtb r3, [r0], #1 105714343Swollman subs r2, r2, r12 105819878Swollman blt .Lmemcpy_l4 /* less the 4 bytes */ 105919878Swollman 10602742Swollman ands r12, r1, #3 10612742Swollman beq .Lmemcpy_t8 /* we have an aligned source */ 106258787Sru 106358787Sru /* erg - unaligned source */ 10642742Swollman /* This is where it gets nasty ... */ 1065226289Sedwin.Lmemcpy_srcul: 1066257681Sedwin bic r1, r1, #3 1067226289Sedwin ldr lr, [r1], #4 10682742Swollman cmp r12, #2 1069273718Sedwin bgt .Lmemcpy_srcul3 10702742Swollman beq .Lmemcpy_srcul2 10712742Swollman cmp r2, #0x0c 1072274559Sedwin blt .Lmemcpy_srcul1loop4 10732742Swollman sub r2, r2, #0x0c 10742742Swollman stmdb sp!, {r4, r5} 1075270728Spluknet 10762742Swollman.Lmemcpy_srcul1loop16: 10772742Swollman mov r3, lr, lsr #8 1078149514Swollman ldmia r1!, {r4, r5, r12, lr} 1079149514Swollman orr r3, r3, r4, lsl #24 1080270728Spluknet mov r4, r4, lsr #8 1081149514Swollman orr r4, r4, r5, lsl #24 1082149514Swollman mov r5, r5, lsr #8 1083149514Swollman orr r5, r5, r12, lsl #24 1084149514Swollman mov r12, r12, lsr #8 1085149514Swollman orr r12, r12, lr, lsl #24 1086149514Swollman stmia r0!, {r3-r5, r12} 1087149514Swollman subs r2, r2, #0x10 1088149514Swollman bge .Lmemcpy_srcul1loop16 1089270728Spluknet ldmia sp!, {r4, r5} 1090273718Sedwin adds r2, r2, #0x0c 1091149514Swollman blt .Lmemcpy_srcul1l4 1092149514Swollman 1093158421Swollman.Lmemcpy_srcul1loop4: 1094158421Swollman mov r12, lr, lsr #8 1095158421Swollman ldr lr, [r1], #4 1096158421Swollman orr r12, r12, lr, lsl #24 1097158421Swollman str r12, [r0], #4 1098158421Swollman subs r2, r2, #4 1099149514Swollman bge .Lmemcpy_srcul1loop4 1100190372Sedwin 1101190372Sedwin.Lmemcpy_srcul1l4: 1102190372Sedwin sub r1, r1, #3 1103190372Sedwin b .Lmemcpy_l4 1104190372Sedwin 1105190372Sedwin.Lmemcpy_srcul2: 1106190372Sedwin cmp r2, #0x0c 1107190372Sedwin blt .Lmemcpy_srcul2loop4 1108190372Sedwin sub r2, r2, #0x0c 1109190372Sedwin stmdb sp!, {r4, r5} 1110190372Sedwin 1111190372Sedwin.Lmemcpy_srcul2loop16: 1112190372Sedwin mov r3, lr, lsr #16 1113190372Sedwin ldmia r1!, {r4, r5, r12, lr} 1114190372Sedwin orr r3, r3, r4, lsl #16 1115190372Sedwin mov r4, r4, lsr #16 1116190372Sedwin orr r4, r4, r5, lsl #16 1117190372Sedwin mov r5, r5, lsr #16 1118190372Sedwin orr r5, r5, r12, lsl #16 1119190372Sedwin mov r12, r12, lsr #16 1120190372Sedwin orr r12, r12, lr, lsl #16 1121270728Spluknet stmia r0!, {r3-r5, r12} 1122270728Spluknet subs r2, r2, #0x10 1123270728Spluknet bge .Lmemcpy_srcul2loop16 1124270728Spluknet ldmia sp!, {r4, r5} 1125270728Spluknet adds r2, r2, #0x0c 1126270728Spluknet blt .Lmemcpy_srcul2l4 1127190372Sedwin 1128206868Sedwin.Lmemcpy_srcul2loop4: 1129206868Sedwin mov r12, lr, lsr #16 1130206868Sedwin ldr lr, [r1], #4 1131206868Sedwin orr r12, r12, lr, lsl #16 1132206868Sedwin str r12, [r0], #4 1133206868Sedwin subs r2, r2, #4 1134206868Sedwin bge .Lmemcpy_srcul2loop4 1135206868Sedwin 1136206868Sedwin.Lmemcpy_srcul2l4: 1137206868Sedwin sub r1, r1, #2 1138206868Sedwin b .Lmemcpy_l4 1139206868Sedwin 1140206868Sedwin.Lmemcpy_srcul3: 1141206868Sedwin cmp r2, #0x0c 11422742Swollman blt .Lmemcpy_srcul3loop4 114319878Swollman sub r2, r2, #0x0c 11442742Swollman stmdb sp!, {r4, r5} 114519878Swollman 11462742Swollman.Lmemcpy_srcul3loop16: 114719878Swollman mov r3, lr, lsr #24 11482742Swollman ldmia r1!, {r4, r5, r12, lr} 114919878Swollman orr r3, r3, r4, lsl #8 11502742Swollman mov r4, r4, lsr #24 115119878Swollman orr r4, r4, r5, lsl #8 11522742Swollman mov r5, r5, lsr #24 115319878Swollman orr r5, r5, r12, lsl #8 11542742Swollman mov r12, r12, lsr #24 11552742Swollman orr r12, r12, lr, lsl #8 115619878Swollman stmia r0!, {r3-r5, r12} 11572742Swollman subs r2, r2, #0x10 115819878Swollman bge .Lmemcpy_srcul3loop16 11592742Swollman ldmia sp!, {r4, r5} 116019878Swollman adds r2, r2, #0x0c 116120094Swollman blt .Lmemcpy_srcul3l4 116219878Swollman 116319878Swollman.Lmemcpy_srcul3loop4: 1164149514Swollman mov r12, lr, lsr #24 1165149514Swollman ldr lr, [r1], #4 1166190372Sedwin orr r12, r12, lr, lsl #8 1167190372Sedwin str r12, [r0], #4 1168206868Sedwin subs r2, r2, #4 1169158421Swollman bge .Lmemcpy_srcul3loop4 1170158421Swollman 1171158421Swollman.Lmemcpy_srcul3l4: 11722742Swollman sub r1, r1, #1 11732742Swollman b .Lmemcpy_l4 1174273718Sedwin#else 117519878Swollman/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 11762742SwollmanENTRY(memcpy) 11772742Swollman pld [r1] 1178274559Sedwin cmp r2, #0x0c 11792742Swollman ble .Lmemcpy_short /* <= 12 bytes */ 11802742Swollman#ifdef FLASHADDR 11812742Swollman#if FLASHADDR > PHYSADDR 1182273718Sedwin ldr r3, =FLASHADDR 1183 cmp r3, pc 1184 bls .Lnormal 1185#else 1186 ldr r3, =FLASHADDR 1187 cmp r3, pc 1188 bhi .Lnormal 1189#endif 1190#endif 1191 ldr r3, .L_arm_memcpy 1192 ldr r3, [r3] 1193 cmp r3, #0 1194 beq .Lnormal 1195 ldr r3, .L_min_memcpy_size 1196 ldr r3, [r3] 1197 cmp r2, r3 1198 blt .Lnormal 1199 stmfd sp!, {r0-r2, r4, lr} 1200 mov r3, #0 1201 ldr r4, .L_arm_memcpy 1202 mov lr, pc 1203 ldr pc, [r4] 1204 cmp r0, #0 1205 ldmfd sp!, {r0-r2, r4, lr} 1206 RETeq 1207.Lnormal: 1208 mov r3, r0 /* We must not clobber r0 */ 1209 1210 /* Word-align the destination buffer */ 1211 ands ip, r3, #0x03 /* Already word aligned? */ 1212 beq .Lmemcpy_wordaligned /* Yup */ 1213 cmp ip, #0x02 1214 ldrb ip, [r1], #0x01 1215 sub r2, r2, #0x01 1216 strb ip, [r3], #0x01 1217 ldrleb ip, [r1], #0x01 1218 suble r2, r2, #0x01 1219 strleb ip, [r3], #0x01 1220 ldrltb ip, [r1], #0x01 1221 sublt r2, r2, #0x01 1222 strltb ip, [r3], #0x01 1223 1224 /* Destination buffer is now word aligned */ 1225.Lmemcpy_wordaligned: 1226 ands ip, r1, #0x03 /* Is src also word-aligned? */ 1227 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 1228 1229 /* Quad-align the destination buffer */ 1230 tst r3, #0x07 /* Already quad aligned? */ 1231 ldrne ip, [r1], #0x04 1232 stmfd sp!, {r4-r9} /* Free up some registers */ 1233 subne r2, r2, #0x04 1234 strne ip, [r3], #0x04 1235 1236 /* Destination buffer quad aligned, source is at least word aligned */ 1237 subs r2, r2, #0x80 1238 blt .Lmemcpy_w_lessthan128 1239 1240 /* Copy 128 bytes at a time */ 1241.Lmemcpy_w_loop128: 1242 ldr r4, [r1], #0x04 /* LD:00-03 */ 1243 ldr r5, [r1], #0x04 /* LD:04-07 */ 1244 pld [r1, #0x18] /* Prefetch 0x20 */ 1245 ldr r6, [r1], #0x04 /* LD:08-0b */ 1246 ldr r7, [r1], #0x04 /* LD:0c-0f */ 1247 ldr r8, [r1], #0x04 /* LD:10-13 */ 1248 ldr r9, [r1], #0x04 /* LD:14-17 */ 1249 strd r4, [r3], #0x08 /* ST:00-07 */ 1250 ldr r4, [r1], #0x04 /* LD:18-1b */ 1251 ldr r5, [r1], #0x04 /* LD:1c-1f */ 1252 strd r6, [r3], #0x08 /* ST:08-0f */ 1253 ldr r6, [r1], #0x04 /* LD:20-23 */ 1254 ldr r7, [r1], #0x04 /* LD:24-27 */ 1255 pld [r1, #0x18] /* Prefetch 0x40 */ 1256 strd r8, [r3], #0x08 /* ST:10-17 */ 1257 ldr r8, [r1], #0x04 /* LD:28-2b */ 1258 ldr r9, [r1], #0x04 /* LD:2c-2f */ 1259 strd r4, [r3], #0x08 /* ST:18-1f */ 1260 ldr r4, [r1], #0x04 /* LD:30-33 */ 1261 ldr r5, [r1], #0x04 /* LD:34-37 */ 1262 strd r6, [r3], #0x08 /* ST:20-27 */ 1263 ldr r6, [r1], #0x04 /* LD:38-3b */ 1264 ldr r7, [r1], #0x04 /* LD:3c-3f */ 1265 strd r8, [r3], #0x08 /* ST:28-2f */ 1266 ldr r8, [r1], #0x04 /* LD:40-43 */ 1267 ldr r9, [r1], #0x04 /* LD:44-47 */ 1268 pld [r1, #0x18] /* Prefetch 0x60 */ 1269 strd r4, [r3], #0x08 /* ST:30-37 */ 1270 ldr r4, [r1], #0x04 /* LD:48-4b */ 1271 ldr r5, [r1], #0x04 /* LD:4c-4f */ 1272 strd r6, [r3], #0x08 /* ST:38-3f */ 1273 ldr r6, [r1], #0x04 /* LD:50-53 */ 1274 ldr r7, [r1], #0x04 /* LD:54-57 */ 1275 strd r8, [r3], #0x08 /* ST:40-47 */ 1276 ldr r8, [r1], #0x04 /* LD:58-5b */ 1277 ldr r9, [r1], #0x04 /* LD:5c-5f */ 1278 strd r4, [r3], #0x08 /* ST:48-4f */ 1279 ldr r4, [r1], #0x04 /* LD:60-63 */ 1280 ldr r5, [r1], #0x04 /* LD:64-67 */ 1281 pld [r1, #0x18] /* Prefetch 0x80 */ 1282 strd r6, [r3], #0x08 /* ST:50-57 */ 1283 ldr r6, [r1], #0x04 /* LD:68-6b */ 1284 ldr r7, [r1], #0x04 /* LD:6c-6f */ 1285 strd r8, [r3], #0x08 /* ST:58-5f */ 1286 ldr r8, [r1], #0x04 /* LD:70-73 */ 1287 ldr r9, [r1], #0x04 /* LD:74-77 */ 1288 strd r4, [r3], #0x08 /* ST:60-67 */ 1289 ldr r4, [r1], #0x04 /* LD:78-7b */ 1290 ldr r5, [r1], #0x04 /* LD:7c-7f */ 1291 strd r6, [r3], #0x08 /* ST:68-6f */ 1292 strd r8, [r3], #0x08 /* ST:70-77 */ 1293 subs r2, r2, #0x80 1294 strd r4, [r3], #0x08 /* ST:78-7f */ 1295 bge .Lmemcpy_w_loop128 1296 1297.Lmemcpy_w_lessthan128: 1298 adds r2, r2, #0x80 /* Adjust for extra sub */ 1299 ldmeqfd sp!, {r4-r9} 1300 RETeq /* Return now if done */ 1301 subs r2, r2, #0x20 1302 blt .Lmemcpy_w_lessthan32 1303 1304 /* Copy 32 bytes at a time */ 1305.Lmemcpy_w_loop32: 1306 ldr r4, [r1], #0x04 1307 ldr r5, [r1], #0x04 1308 pld [r1, #0x18] 1309 ldr r6, [r1], #0x04 1310 ldr r7, [r1], #0x04 1311 ldr r8, [r1], #0x04 1312 ldr r9, [r1], #0x04 1313 strd r4, [r3], #0x08 1314 ldr r4, [r1], #0x04 1315 ldr r5, [r1], #0x04 1316 strd r6, [r3], #0x08 1317 strd r8, [r3], #0x08 1318 subs r2, r2, #0x20 1319 strd r4, [r3], #0x08 1320 bge .Lmemcpy_w_loop32 1321 1322.Lmemcpy_w_lessthan32: 1323 adds r2, r2, #0x20 /* Adjust for extra sub */ 1324 ldmeqfd sp!, {r4-r9} 1325 RETeq /* Return now if done */ 1326 1327 and r4, r2, #0x18 1328 rsbs r4, r4, #0x18 1329 addne pc, pc, r4, lsl #1 1330 nop 1331 1332 /* At least 24 bytes remaining */ 1333 ldr r4, [r1], #0x04 1334 ldr r5, [r1], #0x04 1335 sub r2, r2, #0x08 1336 strd r4, [r3], #0x08 1337 1338 /* At least 16 bytes remaining */ 1339 ldr r4, [r1], #0x04 1340 ldr r5, [r1], #0x04 1341 sub r2, r2, #0x08 1342 strd r4, [r3], #0x08 1343 1344 /* At least 8 bytes remaining */ 1345 ldr r4, [r1], #0x04 1346 ldr r5, [r1], #0x04 1347 subs r2, r2, #0x08 1348 strd r4, [r3], #0x08 1349 1350 /* Less than 8 bytes remaining */ 1351 ldmfd sp!, {r4-r9} 1352 RETeq /* Return now if done */ 1353 subs r2, r2, #0x04 1354 ldrge ip, [r1], #0x04 1355 strge ip, [r3], #0x04 1356 RETeq /* Return now if done */ 1357 addlt r2, r2, #0x04 1358 ldrb ip, [r1], #0x01 1359 cmp r2, #0x02 1360 ldrgeb r2, [r1], #0x01 1361 strb ip, [r3], #0x01 1362 ldrgtb ip, [r1] 1363 strgeb r2, [r3], #0x01 1364 strgtb ip, [r3] 1365 RET 1366 1367 1368/* 1369 * At this point, it has not been possible to word align both buffers. 1370 * The destination buffer is word aligned, but the source buffer is not. 1371 */ 1372.Lmemcpy_bad_align: 1373 stmfd sp!, {r4-r7} 1374 bic r1, r1, #0x03 1375 cmp ip, #2 1376 ldr ip, [r1], #0x04 1377 bgt .Lmemcpy_bad3 1378 beq .Lmemcpy_bad2 1379 b .Lmemcpy_bad1 1380 1381.Lmemcpy_bad1_loop16: 1382#ifdef __ARMEB__ 1383 mov r4, ip, lsl #8 1384#else 1385 mov r4, ip, lsr #8 1386#endif 1387 ldr r5, [r1], #0x04 1388 pld [r1, #0x018] 1389 ldr r6, [r1], #0x04 1390 ldr r7, [r1], #0x04 1391 ldr ip, [r1], #0x04 1392#ifdef __ARMEB__ 1393 orr r4, r4, r5, lsr #24 1394 mov r5, r5, lsl #8 1395 orr r5, r5, r6, lsr #24 1396 mov r6, r6, lsl #8 1397 orr r6, r6, r7, lsr #24 1398 mov r7, r7, lsl #8 1399 orr r7, r7, ip, lsr #24 1400#else 1401 orr r4, r4, r5, lsl #24 1402 mov r5, r5, lsr #8 1403 orr r5, r5, r6, lsl #24 1404 mov r6, r6, lsr #8 1405 orr r6, r6, r7, lsl #24 1406 mov r7, r7, lsr #8 1407 orr r7, r7, ip, lsl #24 1408#endif 1409 str r4, [r3], #0x04 1410 str r5, [r3], #0x04 1411 str r6, [r3], #0x04 1412 str r7, [r3], #0x04 1413.Lmemcpy_bad1: 1414 subs r2, r2, #0x10 1415 bge .Lmemcpy_bad1_loop16 1416 1417 adds r2, r2, #0x10 1418 ldmeqfd sp!, {r4-r7} 1419 RETeq /* Return now if done */ 1420 subs r2, r2, #0x04 1421 sublt r1, r1, #0x03 1422 blt .Lmemcpy_bad_done 1423 1424.Lmemcpy_bad1_loop4: 1425#ifdef __ARMEB__ 1426 mov r4, ip, lsl #8 1427#else 1428 mov r4, ip, lsr #8 1429#endif 1430 ldr ip, [r1], #0x04 1431 subs r2, r2, #0x04 1432#ifdef __ARMEB__ 1433 orr r4, r4, ip, lsr #24 1434#else 1435 orr r4, r4, ip, lsl #24 1436#endif 1437 str r4, [r3], #0x04 1438 bge .Lmemcpy_bad1_loop4 1439 sub r1, r1, #0x03 1440 b .Lmemcpy_bad_done 1441 1442.Lmemcpy_bad2_loop16: 1443#ifdef __ARMEB__ 1444 mov r4, ip, lsl #16 1445#else 1446 mov r4, ip, lsr #16 1447#endif 1448 ldr r5, [r1], #0x04 1449 pld [r1, #0x018] 1450 ldr r6, [r1], #0x04 1451 ldr r7, [r1], #0x04 1452 ldr ip, [r1], #0x04 1453#ifdef __ARMEB__ 1454 orr r4, r4, r5, lsr #16 1455 mov r5, r5, lsl #16 1456 orr r5, r5, r6, lsr #16 1457 mov r6, r6, lsl #16 1458 orr r6, r6, r7, lsr #16 1459 mov r7, r7, lsl #16 1460 orr r7, r7, ip, lsr #16 1461#else 1462 orr r4, r4, r5, lsl #16 1463 mov r5, r5, lsr #16 1464 orr r5, r5, r6, lsl #16 1465 mov r6, r6, lsr #16 1466 orr r6, r6, r7, lsl #16 1467 mov r7, r7, lsr #16 1468 orr r7, r7, ip, lsl #16 1469#endif 1470 str r4, [r3], #0x04 1471 str r5, [r3], #0x04 1472 str r6, [r3], #0x04 1473 str r7, [r3], #0x04 1474.Lmemcpy_bad2: 1475 subs r2, r2, #0x10 1476 bge .Lmemcpy_bad2_loop16 1477 1478 adds r2, r2, #0x10 1479 ldmeqfd sp!, {r4-r7} 1480 RETeq /* Return now if done */ 1481 subs r2, r2, #0x04 1482 sublt r1, r1, #0x02 1483 blt .Lmemcpy_bad_done 1484 1485.Lmemcpy_bad2_loop4: 1486#ifdef __ARMEB__ 1487 mov r4, ip, lsl #16 1488#else 1489 mov r4, ip, lsr #16 1490#endif 1491 ldr ip, [r1], #0x04 1492 subs r2, r2, #0x04 1493#ifdef __ARMEB__ 1494 orr r4, r4, ip, lsr #16 1495#else 1496 orr r4, r4, ip, lsl #16 1497#endif 1498 str r4, [r3], #0x04 1499 bge .Lmemcpy_bad2_loop4 1500 sub r1, r1, #0x02 1501 b .Lmemcpy_bad_done 1502 1503.Lmemcpy_bad3_loop16: 1504#ifdef __ARMEB__ 1505 mov r4, ip, lsl #24 1506#else 1507 mov r4, ip, lsr #24 1508#endif 1509 ldr r5, [r1], #0x04 1510 pld [r1, #0x018] 1511 ldr r6, [r1], #0x04 1512 ldr r7, [r1], #0x04 1513 ldr ip, [r1], #0x04 1514#ifdef __ARMEB__ 1515 orr r4, r4, r5, lsr #8 1516 mov r5, r5, lsl #24 1517 orr r5, r5, r6, lsr #8 1518 mov r6, r6, lsl #24 1519 orr r6, r6, r7, lsr #8 1520 mov r7, r7, lsl #24 1521 orr r7, r7, ip, lsr #8 1522#else 1523 orr r4, r4, r5, lsl #8 1524 mov r5, r5, lsr #24 1525 orr r5, r5, r6, lsl #8 1526 mov r6, r6, lsr #24 1527 orr r6, r6, r7, lsl #8 1528 mov r7, r7, lsr #24 1529 orr r7, r7, ip, lsl #8 1530#endif 1531 str r4, [r3], #0x04 1532 str r5, [r3], #0x04 1533 str r6, [r3], #0x04 1534 str r7, [r3], #0x04 1535.Lmemcpy_bad3: 1536 subs r2, r2, #0x10 1537 bge .Lmemcpy_bad3_loop16 1538 1539 adds r2, r2, #0x10 1540 ldmeqfd sp!, {r4-r7} 1541 RETeq /* Return now if done */ 1542 subs r2, r2, #0x04 1543 sublt r1, r1, #0x01 1544 blt .Lmemcpy_bad_done 1545 1546.Lmemcpy_bad3_loop4: 1547#ifdef __ARMEB__ 1548 mov r4, ip, lsl #24 1549#else 1550 mov r4, ip, lsr #24 1551#endif 1552 ldr ip, [r1], #0x04 1553 subs r2, r2, #0x04 1554#ifdef __ARMEB__ 1555 orr r4, r4, ip, lsr #8 1556#else 1557 orr r4, r4, ip, lsl #8 1558#endif 1559 str r4, [r3], #0x04 1560 bge .Lmemcpy_bad3_loop4 1561 sub r1, r1, #0x01 1562 1563.Lmemcpy_bad_done: 1564 ldmfd sp!, {r4-r7} 1565 adds r2, r2, #0x04 1566 RETeq 1567 ldrb ip, [r1], #0x01 1568 cmp r2, #0x02 1569 ldrgeb r2, [r1], #0x01 1570 strb ip, [r3], #0x01 1571 ldrgtb ip, [r1] 1572 strgeb r2, [r3], #0x01 1573 strgtb ip, [r3] 1574 RET 1575 1576 1577/* 1578 * Handle short copies (less than 16 bytes), possibly misaligned. 1579 * Some of these are *very* common, thanks to the network stack, 1580 * and so are handled specially. 1581 */ 1582.Lmemcpy_short: 1583 add pc, pc, r2, lsl #2 1584 nop 1585 RET /* 0x00 */ 1586 b .Lmemcpy_bytewise /* 0x01 */ 1587 b .Lmemcpy_bytewise /* 0x02 */ 1588 b .Lmemcpy_bytewise /* 0x03 */ 1589 b .Lmemcpy_4 /* 0x04 */ 1590 b .Lmemcpy_bytewise /* 0x05 */ 1591 b .Lmemcpy_6 /* 0x06 */ 1592 b .Lmemcpy_bytewise /* 0x07 */ 1593 b .Lmemcpy_8 /* 0x08 */ 1594 b .Lmemcpy_bytewise /* 0x09 */ 1595 b .Lmemcpy_bytewise /* 0x0a */ 1596 b .Lmemcpy_bytewise /* 0x0b */ 1597 b .Lmemcpy_c /* 0x0c */ 1598.Lmemcpy_bytewise: 1599 mov r3, r0 /* We must not clobber r0 */ 1600 ldrb ip, [r1], #0x01 16011: subs r2, r2, #0x01 1602 strb ip, [r3], #0x01 1603 ldrneb ip, [r1], #0x01 1604 bne 1b 1605 RET 1606 1607/****************************************************************************** 1608 * Special case for 4 byte copies 1609 */ 1610#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1611#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1612 LMEMCPY_4_PAD 1613.Lmemcpy_4: 1614 and r2, r1, #0x03 1615 orr r2, r2, r0, lsl #2 1616 ands r2, r2, #0x0f 1617 sub r3, pc, #0x14 1618 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1619 1620/* 1621 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1622 */ 1623 ldr r2, [r1] 1624 str r2, [r0] 1625 RET 1626 LMEMCPY_4_PAD 1627 1628/* 1629 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1630 */ 1631 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1632 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1633#ifdef __ARMEB__ 1634 mov r3, r3, lsl #8 /* r3 = 012. */ 1635 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1636#else 1637 mov r3, r3, lsr #8 /* r3 = .210 */ 1638 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1639#endif 1640 str r3, [r0] 1641 RET 1642 LMEMCPY_4_PAD 1643 1644/* 1645 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1646 */ 1647#ifdef __ARMEB__ 1648 ldrh r3, [r1] 1649 ldrh r2, [r1, #0x02] 1650#else 1651 ldrh r3, [r1, #0x02] 1652 ldrh r2, [r1] 1653#endif 1654 orr r3, r2, r3, lsl #16 1655 str r3, [r0] 1656 RET 1657 LMEMCPY_4_PAD 1658 1659/* 1660 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1661 */ 1662 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1663 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1664#ifdef __ARMEB__ 1665 mov r3, r3, lsl #24 /* r3 = 0... */ 1666 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1667#else 1668 mov r3, r3, lsr #24 /* r3 = ...0 */ 1669 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1670#endif 1671 str r3, [r0] 1672 RET 1673 LMEMCPY_4_PAD 1674 1675/* 1676 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1677 */ 1678 ldr r2, [r1] 1679#ifdef __ARMEB__ 1680 strb r2, [r0, #0x03] 1681 mov r3, r2, lsr #8 1682 mov r1, r2, lsr #24 1683 strb r1, [r0] 1684#else 1685 strb r2, [r0] 1686 mov r3, r2, lsr #8 1687 mov r1, r2, lsr #24 1688 strb r1, [r0, #0x03] 1689#endif 1690 strh r3, [r0, #0x01] 1691 RET 1692 LMEMCPY_4_PAD 1693 1694/* 1695 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1696 */ 1697 ldrb r2, [r1] 1698 ldrh r3, [r1, #0x01] 1699 ldrb r1, [r1, #0x03] 1700 strb r2, [r0] 1701 strh r3, [r0, #0x01] 1702 strb r1, [r0, #0x03] 1703 RET 1704 LMEMCPY_4_PAD 1705 1706/* 1707 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1708 */ 1709 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1710 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1711#ifdef __ARMEB__ 1712 mov r1, r2, lsr #8 /* r1 = ...0 */ 1713 strb r1, [r0] 1714 mov r2, r2, lsl #8 /* r2 = .01. */ 1715 orr r2, r2, r3, lsr #8 /* r2 = .012 */ 1716#else 1717 strb r2, [r0] 1718 mov r2, r2, lsr #8 /* r2 = ...1 */ 1719 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1720 mov r3, r3, lsr #8 /* r3 = ...3 */ 1721#endif 1722 strh r2, [r0, #0x01] 1723 strb r3, [r0, #0x03] 1724 RET 1725 LMEMCPY_4_PAD 1726 1727/* 1728 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1729 */ 1730 ldrb r2, [r1] 1731 ldrh r3, [r1, #0x01] 1732 ldrb r1, [r1, #0x03] 1733 strb r2, [r0] 1734 strh r3, [r0, #0x01] 1735 strb r1, [r0, #0x03] 1736 RET 1737 LMEMCPY_4_PAD 1738 1739/* 1740 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1741 */ 1742 ldr r2, [r1] 1743#ifdef __ARMEB__ 1744 strh r2, [r0, #0x02] 1745 mov r3, r2, lsr #16 1746 strh r3, [r0] 1747#else 1748 strh r2, [r0] 1749 mov r3, r2, lsr #16 1750 strh r3, [r0, #0x02] 1751#endif 1752 RET 1753 LMEMCPY_4_PAD 1754 1755/* 1756 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1757 */ 1758 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1759 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1760 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1761 strh r1, [r0] 1762#ifdef __ARMEB__ 1763 mov r2, r2, lsl #8 /* r2 = 012. */ 1764 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1765#else 1766 mov r2, r2, lsr #24 /* r2 = ...2 */ 1767 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1768#endif 1769 strh r2, [r0, #0x02] 1770 RET 1771 LMEMCPY_4_PAD 1772 1773/* 1774 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1775 */ 1776 ldrh r2, [r1] 1777 ldrh r3, [r1, #0x02] 1778 strh r2, [r0] 1779 strh r3, [r0, #0x02] 1780 RET 1781 LMEMCPY_4_PAD 1782 1783/* 1784 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1785 */ 1786 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1787 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1788 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1789 strh r1, [r0, #0x02] 1790#ifdef __ARMEB__ 1791 mov r3, r3, lsr #24 /* r3 = ...1 */ 1792 orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 1793#else 1794 mov r3, r3, lsl #8 /* r3 = 321. */ 1795 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1796#endif 1797 strh r3, [r0] 1798 RET 1799 LMEMCPY_4_PAD 1800 1801/* 1802 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1803 */ 1804 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1805#ifdef __ARMEB__ 1806 strb r2, [r0, #0x03] 1807 mov r3, r2, lsr #8 1808 mov r1, r2, lsr #24 1809 strh r3, [r0, #0x01] 1810 strb r1, [r0] 1811#else 1812 strb r2, [r0] 1813 mov r3, r2, lsr #8 1814 mov r1, r2, lsr #24 1815 strh r3, [r0, #0x01] 1816 strb r1, [r0, #0x03] 1817#endif 1818 RET 1819 LMEMCPY_4_PAD 1820 1821/* 1822 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1823 */ 1824 ldrb r2, [r1] 1825 ldrh r3, [r1, #0x01] 1826 ldrb r1, [r1, #0x03] 1827 strb r2, [r0] 1828 strh r3, [r0, #0x01] 1829 strb r1, [r0, #0x03] 1830 RET 1831 LMEMCPY_4_PAD 1832 1833/* 1834 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1835 */ 1836#ifdef __ARMEB__ 1837 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1838 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1839 strb r3, [r0, #0x03] 1840 mov r3, r3, lsr #8 /* r3 = ...2 */ 1841 orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 1842 strh r3, [r0, #0x01] 1843 mov r2, r2, lsr #8 /* r2 = ...0 */ 1844 strb r2, [r0] 1845#else 1846 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1847 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1848 strb r2, [r0] 1849 mov r2, r2, lsr #8 /* r2 = ...1 */ 1850 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1851 strh r2, [r0, #0x01] 1852 mov r3, r3, lsr #8 /* r3 = ...3 */ 1853 strb r3, [r0, #0x03] 1854#endif 1855 RET 1856 LMEMCPY_4_PAD 1857 1858/* 1859 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1860 */ 1861 ldrb r2, [r1] 1862 ldrh r3, [r1, #0x01] 1863 ldrb r1, [r1, #0x03] 1864 strb r2, [r0] 1865 strh r3, [r0, #0x01] 1866 strb r1, [r0, #0x03] 1867 RET 1868 LMEMCPY_4_PAD 1869 1870 1871/****************************************************************************** 1872 * Special case for 6 byte copies 1873 */ 1874#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1875#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1876 LMEMCPY_6_PAD 1877.Lmemcpy_6: 1878 and r2, r1, #0x03 1879 orr r2, r2, r0, lsl #2 1880 ands r2, r2, #0x0f 1881 sub r3, pc, #0x14 1882 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1883 1884/* 1885 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1886 */ 1887 ldr r2, [r1] 1888 ldrh r3, [r1, #0x04] 1889 str r2, [r0] 1890 strh r3, [r0, #0x04] 1891 RET 1892 LMEMCPY_6_PAD 1893 1894/* 1895 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1896 */ 1897 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1898 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1899#ifdef __ARMEB__ 1900 mov r2, r2, lsl #8 /* r2 = 012. */ 1901 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1902#else 1903 mov r2, r2, lsr #8 /* r2 = .210 */ 1904 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1905#endif 1906 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1907 str r2, [r0] 1908 strh r3, [r0, #0x04] 1909 RET 1910 LMEMCPY_6_PAD 1911 1912/* 1913 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1914 */ 1915 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1916 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1917#ifdef __ARMEB__ 1918 mov r1, r3, lsr #16 /* r1 = ..23 */ 1919 orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 1920 str r1, [r0] 1921 strh r3, [r0, #0x04] 1922#else 1923 mov r1, r3, lsr #16 /* r1 = ..54 */ 1924 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1925 str r2, [r0] 1926 strh r1, [r0, #0x04] 1927#endif 1928 RET 1929 LMEMCPY_6_PAD 1930 1931/* 1932 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1933 */ 1934 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1935 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1936 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1937#ifdef __ARMEB__ 1938 mov r2, r2, lsl #24 /* r2 = 0... */ 1939 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1940 mov r3, r3, lsl #8 /* r3 = 234. */ 1941 orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 1942#else 1943 mov r2, r2, lsr #24 /* r2 = ...0 */ 1944 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1945 mov r1, r1, lsl #8 /* r1 = xx5. */ 1946 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1947#endif 1948 str r2, [r0] 1949 strh r1, [r0, #0x04] 1950 RET 1951 LMEMCPY_6_PAD 1952 1953/* 1954 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1955 */ 1956 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1957 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1958 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1959 strh r1, [r0, #0x01] 1960#ifdef __ARMEB__ 1961 mov r1, r3, lsr #24 /* r1 = ...0 */ 1962 strb r1, [r0] 1963 mov r3, r3, lsl #8 /* r3 = 123. */ 1964 orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 1965#else 1966 strb r3, [r0] 1967 mov r3, r3, lsr #24 /* r3 = ...3 */ 1968 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1969 mov r2, r2, lsr #8 /* r2 = ...5 */ 1970#endif 1971 strh r3, [r0, #0x03] 1972 strb r2, [r0, #0x05] 1973 RET 1974 LMEMCPY_6_PAD 1975 1976/* 1977 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1978 */ 1979 ldrb r2, [r1] 1980 ldrh r3, [r1, #0x01] 1981 ldrh ip, [r1, #0x03] 1982 ldrb r1, [r1, #0x05] 1983 strb r2, [r0] 1984 strh r3, [r0, #0x01] 1985 strh ip, [r0, #0x03] 1986 strb r1, [r0, #0x05] 1987 RET 1988 LMEMCPY_6_PAD 1989 1990/* 1991 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1992 */ 1993 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1994 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1995#ifdef __ARMEB__ 1996 mov r3, r2, lsr #8 /* r3 = ...0 */ 1997 strb r3, [r0] 1998 strb r1, [r0, #0x05] 1999 mov r3, r1, lsr #8 /* r3 = .234 */ 2000 strh r3, [r0, #0x03] 2001 mov r3, r2, lsl #8 /* r3 = .01. */ 2002 orr r3, r3, r1, lsr #24 /* r3 = .012 */ 2003 strh r3, [r0, #0x01] 2004#else 2005 strb r2, [r0] 2006 mov r3, r1, lsr #24 2007 strb r3, [r0, #0x05] 2008 mov r3, r1, lsr #8 /* r3 = .543 */ 2009 strh r3, [r0, #0x03] 2010 mov r3, r2, lsr #8 /* r3 = ...1 */ 2011 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 2012 strh r3, [r0, #0x01] 2013#endif 2014 RET 2015 LMEMCPY_6_PAD 2016 2017/* 2018 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2019 */ 2020 ldrb r2, [r1] 2021 ldrh r3, [r1, #0x01] 2022 ldrh ip, [r1, #0x03] 2023 ldrb r1, [r1, #0x05] 2024 strb r2, [r0] 2025 strh r3, [r0, #0x01] 2026 strh ip, [r0, #0x03] 2027 strb r1, [r0, #0x05] 2028 RET 2029 LMEMCPY_6_PAD 2030 2031/* 2032 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2033 */ 2034#ifdef __ARMEB__ 2035 ldr r2, [r1] /* r2 = 0123 */ 2036 ldrh r3, [r1, #0x04] /* r3 = ..45 */ 2037 mov r1, r2, lsr #16 /* r1 = ..01 */ 2038 orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 2039 strh r1, [r0] 2040 str r3, [r0, #0x02] 2041#else 2042 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 2043 ldr r3, [r1] /* r3 = 3210 */ 2044 mov r2, r2, lsl #16 /* r2 = 54.. */ 2045 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 2046 strh r3, [r0] 2047 str r2, [r0, #0x02] 2048#endif 2049 RET 2050 LMEMCPY_6_PAD 2051 2052/* 2053 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2054 */ 2055 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2056 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 2057 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2058#ifdef __ARMEB__ 2059 mov r2, r2, lsr #8 /* r2 = .345 */ 2060 orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 2061#else 2062 mov r2, r2, lsl #8 /* r2 = 543. */ 2063 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 2064#endif 2065 strh r1, [r0] 2066 str r2, [r0, #0x02] 2067 RET 2068 LMEMCPY_6_PAD 2069 2070/* 2071 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2072 */ 2073 ldrh r2, [r1] 2074 ldr r3, [r1, #0x02] 2075 strh r2, [r0] 2076 str r3, [r0, #0x02] 2077 RET 2078 LMEMCPY_6_PAD 2079 2080/* 2081 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2082 */ 2083 ldrb r3, [r1] /* r3 = ...0 */ 2084 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2085 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 2086#ifdef __ARMEB__ 2087 mov r3, r3, lsl #8 /* r3 = ..0. */ 2088 orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 2089 orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 2090#else 2091 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2092 mov r1, r1, lsl #24 /* r1 = 5... */ 2093 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 2094#endif 2095 strh r3, [r0] 2096 str r1, [r0, #0x02] 2097 RET 2098 LMEMCPY_6_PAD 2099 2100/* 2101 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2102 */ 2103 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2104 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 2105#ifdef __ARMEB__ 2106 mov r3, r2, lsr #24 /* r3 = ...0 */ 2107 strb r3, [r0] 2108 mov r2, r2, lsl #8 /* r2 = 123. */ 2109 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2110#else 2111 strb r2, [r0] 2112 mov r2, r2, lsr #8 /* r2 = .321 */ 2113 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 2114 mov r1, r1, lsr #8 /* r1 = ...5 */ 2115#endif 2116 str r2, [r0, #0x01] 2117 strb r1, [r0, #0x05] 2118 RET 2119 LMEMCPY_6_PAD 2120 2121/* 2122 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2123 */ 2124 ldrb r2, [r1] 2125 ldrh r3, [r1, #0x01] 2126 ldrh ip, [r1, #0x03] 2127 ldrb r1, [r1, #0x05] 2128 strb r2, [r0] 2129 strh r3, [r0, #0x01] 2130 strh ip, [r0, #0x03] 2131 strb r1, [r0, #0x05] 2132 RET 2133 LMEMCPY_6_PAD 2134 2135/* 2136 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2137 */ 2138 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2139 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 2140#ifdef __ARMEB__ 2141 mov r3, r2, lsr #8 /* r3 = ...0 */ 2142 strb r3, [r0] 2143 mov r2, r2, lsl #24 /* r2 = 1... */ 2144 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2145#else 2146 strb r2, [r0] 2147 mov r2, r2, lsr #8 /* r2 = ...1 */ 2148 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 2149 mov r1, r1, lsr #24 /* r1 = ...5 */ 2150#endif 2151 str r2, [r0, #0x01] 2152 strb r1, [r0, #0x05] 2153 RET 2154 LMEMCPY_6_PAD 2155 2156/* 2157 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2158 */ 2159 ldrb r2, [r1] 2160 ldr r3, [r1, #0x01] 2161 ldrb r1, [r1, #0x05] 2162 strb r2, [r0] 2163 str r3, [r0, #0x01] 2164 strb r1, [r0, #0x05] 2165 RET 2166 LMEMCPY_6_PAD 2167 2168 2169/****************************************************************************** 2170 * Special case for 8 byte copies 2171 */ 2172#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 2173#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 2174 LMEMCPY_8_PAD 2175.Lmemcpy_8: 2176 and r2, r1, #0x03 2177 orr r2, r2, r0, lsl #2 2178 ands r2, r2, #0x0f 2179 sub r3, pc, #0x14 2180 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 2181 2182/* 2183 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2184 */ 2185 ldr r2, [r1] 2186 ldr r3, [r1, #0x04] 2187 str r2, [r0] 2188 str r3, [r0, #0x04] 2189 RET 2190 LMEMCPY_8_PAD 2191 2192/* 2193 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2194 */ 2195 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2196 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 2197 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2198#ifdef __ARMEB__ 2199 mov r3, r3, lsl #8 /* r3 = 012. */ 2200 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 2201 orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 2202#else 2203 mov r3, r3, lsr #8 /* r3 = .210 */ 2204 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 2205 mov r1, r1, lsl #24 /* r1 = 7... */ 2206 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 2207#endif 2208 str r3, [r0] 2209 str r2, [r0, #0x04] 2210 RET 2211 LMEMCPY_8_PAD 2212 2213/* 2214 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2215 */ 2216 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2217 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2218 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2219#ifdef __ARMEB__ 2220 mov r2, r2, lsl #16 /* r2 = 01.. */ 2221 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2222 orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 2223#else 2224 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2225 mov r3, r3, lsr #16 /* r3 = ..54 */ 2226 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 2227#endif 2228 str r2, [r0] 2229 str r3, [r0, #0x04] 2230 RET 2231 LMEMCPY_8_PAD 2232 2233/* 2234 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2235 */ 2236 ldrb r3, [r1] /* r3 = ...0 */ 2237 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2238 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 2239#ifdef __ARMEB__ 2240 mov r3, r3, lsl #24 /* r3 = 0... */ 2241 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 2242 mov r2, r2, lsl #24 /* r2 = 4... */ 2243 orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 2244#else 2245 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2246 mov r2, r2, lsr #24 /* r2 = ...4 */ 2247 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 2248#endif 2249 str r3, [r0] 2250 str r2, [r0, #0x04] 2251 RET 2252 LMEMCPY_8_PAD 2253 2254/* 2255 * 0100: dst is 8-bit aligned, src is 32-bit aligned 2256 */ 2257 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 2258 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 2259#ifdef __ARMEB__ 2260 mov r1, r3, lsr #24 /* r1 = ...0 */ 2261 strb r1, [r0] 2262 mov r1, r3, lsr #8 /* r1 = .012 */ 2263 strb r2, [r0, #0x07] 2264 mov r3, r3, lsl #24 /* r3 = 3... */ 2265 orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 2266#else 2267 strb r3, [r0] 2268 mov r1, r2, lsr #24 /* r1 = ...7 */ 2269 strb r1, [r0, #0x07] 2270 mov r1, r3, lsr #8 /* r1 = .321 */ 2271 mov r3, r3, lsr #24 /* r3 = ...3 */ 2272 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 2273#endif 2274 strh r1, [r0, #0x01] 2275 str r3, [r0, #0x03] 2276 RET 2277 LMEMCPY_8_PAD 2278 2279/* 2280 * 0101: dst is 8-bit aligned, src is 8-bit aligned 2281 */ 2282 ldrb r2, [r1] 2283 ldrh r3, [r1, #0x01] 2284 ldr ip, [r1, #0x03] 2285 ldrb r1, [r1, #0x07] 2286 strb r2, [r0] 2287 strh r3, [r0, #0x01] 2288 str ip, [r0, #0x03] 2289 strb r1, [r0, #0x07] 2290 RET 2291 LMEMCPY_8_PAD 2292 2293/* 2294 * 0110: dst is 8-bit aligned, src is 16-bit aligned 2295 */ 2296 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2297 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2298 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2299#ifdef __ARMEB__ 2300 mov ip, r2, lsr #8 /* ip = ...0 */ 2301 strb ip, [r0] 2302 mov ip, r2, lsl #8 /* ip = .01. */ 2303 orr ip, ip, r3, lsr #24 /* ip = .012 */ 2304 strb r1, [r0, #0x07] 2305 mov r3, r3, lsl #8 /* r3 = 345. */ 2306 orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 2307#else 2308 strb r2, [r0] /* 0 */ 2309 mov ip, r1, lsr #8 /* ip = ...7 */ 2310 strb ip, [r0, #0x07] /* 7 */ 2311 mov ip, r2, lsr #8 /* ip = ...1 */ 2312 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2313 mov r3, r3, lsr #8 /* r3 = .543 */ 2314 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 2315#endif 2316 strh ip, [r0, #0x01] 2317 str r3, [r0, #0x03] 2318 RET 2319 LMEMCPY_8_PAD 2320 2321/* 2322 * 0111: dst is 8-bit aligned, src is 8-bit aligned 2323 */ 2324 ldrb r3, [r1] /* r3 = ...0 */ 2325 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2326 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 2327 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2328 strb r3, [r0] 2329 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 2330#ifdef __ARMEB__ 2331 strh r3, [r0, #0x01] 2332 orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 2333#else 2334 strh ip, [r0, #0x01] 2335 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 2336#endif 2337 str r2, [r0, #0x03] 2338 strb r1, [r0, #0x07] 2339 RET 2340 LMEMCPY_8_PAD 2341 2342/* 2343 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2344 */ 2345 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2346 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2347 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2348#ifdef __ARMEB__ 2349 strh r1, [r0] 2350 mov r1, r3, lsr #16 /* r1 = ..45 */ 2351 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 2352#else 2353 strh r2, [r0] 2354 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 2355 mov r3, r3, lsr #16 /* r3 = ..76 */ 2356#endif 2357 str r2, [r0, #0x02] 2358 strh r3, [r0, #0x06] 2359 RET 2360 LMEMCPY_8_PAD 2361 2362/* 2363 * 1001: dst is 16-bit aligned, src is 8-bit aligned 2364 */ 2365 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2366 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2367 ldrb ip, [r1, #0x07] /* ip = ...7 */ 2368 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2369 strh r1, [r0] 2370#ifdef __ARMEB__ 2371 mov r1, r2, lsl #24 /* r1 = 2... */ 2372 orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 2373 orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 2374#else 2375 mov r1, r2, lsr #24 /* r1 = ...2 */ 2376 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 2377 mov r3, r3, lsr #24 /* r3 = ...6 */ 2378 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 2379#endif 2380 str r1, [r0, #0x02] 2381 strh r3, [r0, #0x06] 2382 RET 2383 LMEMCPY_8_PAD 2384 2385/* 2386 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2387 */ 2388 ldrh r2, [r1] 2389 ldr ip, [r1, #0x02] 2390 ldrh r3, [r1, #0x06] 2391 strh r2, [r0] 2392 str ip, [r0, #0x02] 2393 strh r3, [r0, #0x06] 2394 RET 2395 LMEMCPY_8_PAD 2396 2397/* 2398 * 1011: dst is 16-bit aligned, src is 8-bit aligned 2399 */ 2400 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 2401 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2402 ldrb ip, [r1] /* ip = ...0 */ 2403 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 2404 strh r1, [r0, #0x06] 2405#ifdef __ARMEB__ 2406 mov r3, r3, lsr #24 /* r3 = ...5 */ 2407 orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 2408 mov r2, r2, lsr #24 /* r2 = ...1 */ 2409 orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 2410#else 2411 mov r3, r3, lsl #24 /* r3 = 5... */ 2412 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 2413 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 2414#endif 2415 str r3, [r0, #0x02] 2416 strh r2, [r0] 2417 RET 2418 LMEMCPY_8_PAD 2419 2420/* 2421 * 1100: dst is 8-bit aligned, src is 32-bit aligned 2422 */ 2423 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2424 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2425 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 2426 strh r1, [r0, #0x05] 2427#ifdef __ARMEB__ 2428 strb r3, [r0, #0x07] 2429 mov r1, r2, lsr #24 /* r1 = ...0 */ 2430 strb r1, [r0] 2431 mov r2, r2, lsl #8 /* r2 = 123. */ 2432 orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 2433 str r2, [r0, #0x01] 2434#else 2435 strb r2, [r0] 2436 mov r1, r3, lsr #24 /* r1 = ...7 */ 2437 strb r1, [r0, #0x07] 2438 mov r2, r2, lsr #8 /* r2 = .321 */ 2439 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 2440 str r2, [r0, #0x01] 2441#endif 2442 RET 2443 LMEMCPY_8_PAD 2444 2445/* 2446 * 1101: dst is 8-bit aligned, src is 8-bit aligned 2447 */ 2448 ldrb r3, [r1] /* r3 = ...0 */ 2449 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 2450 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2451 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2452 strb r3, [r0] 2453 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 2454#ifdef __ARMEB__ 2455 strh ip, [r0, #0x05] 2456 orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 2457#else 2458 strh r3, [r0, #0x05] 2459 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 2460#endif 2461 str r2, [r0, #0x01] 2462 strb r1, [r0, #0x07] 2463 RET 2464 LMEMCPY_8_PAD 2465 2466/* 2467 * 1110: dst is 8-bit aligned, src is 16-bit aligned 2468 */ 2469 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2470 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2471 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2472#ifdef __ARMEB__ 2473 mov ip, r2, lsr #8 /* ip = ...0 */ 2474 strb ip, [r0] 2475 mov ip, r2, lsl #24 /* ip = 1... */ 2476 orr ip, ip, r3, lsr #8 /* ip = 1234 */ 2477 strb r1, [r0, #0x07] 2478 mov r1, r1, lsr #8 /* r1 = ...6 */ 2479 orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 2480#else 2481 strb r2, [r0] 2482 mov ip, r2, lsr #8 /* ip = ...1 */ 2483 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2484 mov r2, r1, lsr #8 /* r2 = ...7 */ 2485 strb r2, [r0, #0x07] 2486 mov r1, r1, lsl #8 /* r1 = .76. */ 2487 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 2488#endif 2489 str ip, [r0, #0x01] 2490 strh r1, [r0, #0x05] 2491 RET 2492 LMEMCPY_8_PAD 2493 2494/* 2495 * 1111: dst is 8-bit aligned, src is 8-bit aligned 2496 */ 2497 ldrb r2, [r1] 2498 ldr ip, [r1, #0x01] 2499 ldrh r3, [r1, #0x05] 2500 ldrb r1, [r1, #0x07] 2501 strb r2, [r0] 2502 str ip, [r0, #0x01] 2503 strh r3, [r0, #0x05] 2504 strb r1, [r0, #0x07] 2505 RET 2506 LMEMCPY_8_PAD 2507 2508/****************************************************************************** 2509 * Special case for 12 byte copies 2510 */ 2511#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 2512#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 2513 LMEMCPY_C_PAD 2514.Lmemcpy_c: 2515 and r2, r1, #0x03 2516 orr r2, r2, r0, lsl #2 2517 ands r2, r2, #0x0f 2518 sub r3, pc, #0x14 2519 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 2520 2521/* 2522 * 0000: dst is 32-bit aligned, src is 32-bit aligned 2523 */ 2524 ldr r2, [r1] 2525 ldr r3, [r1, #0x04] 2526 ldr r1, [r1, #0x08] 2527 str r2, [r0] 2528 str r3, [r0, #0x04] 2529 str r1, [r0, #0x08] 2530 RET 2531 LMEMCPY_C_PAD 2532 2533/* 2534 * 0001: dst is 32-bit aligned, src is 8-bit aligned 2535 */ 2536 ldrb r2, [r1, #0xb] /* r2 = ...B */ 2537 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2538 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2539 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2540#ifdef __ARMEB__ 2541 orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 2542 str r2, [r0, #0x08] 2543 mov r2, ip, lsr #24 /* r2 = ...7 */ 2544 orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 2545 mov r1, r1, lsl #8 /* r1 = 012. */ 2546 orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 2547#else 2548 mov r2, r2, lsl #24 /* r2 = B... */ 2549 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 2550 str r2, [r0, #0x08] 2551 mov r2, ip, lsl #24 /* r2 = 7... */ 2552 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 2553 mov r1, r1, lsr #8 /* r1 = .210 */ 2554 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 2555#endif 2556 str r2, [r0, #0x04] 2557 str r1, [r0] 2558 RET 2559 LMEMCPY_C_PAD 2560 2561/* 2562 * 0010: dst is 32-bit aligned, src is 16-bit aligned 2563 */ 2564 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2565 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2566 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2567 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2568#ifdef __ARMEB__ 2569 mov r2, r2, lsl #16 /* r2 = 01.. */ 2570 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2571 str r2, [r0] 2572 mov r3, r3, lsl #16 /* r3 = 45.. */ 2573 orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 2574 orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 2575#else 2576 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2577 str r2, [r0] 2578 mov r3, r3, lsr #16 /* r3 = ..54 */ 2579 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 2580 mov r1, r1, lsl #16 /* r1 = BA.. */ 2581 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 2582#endif 2583 str r3, [r0, #0x04] 2584 str r1, [r0, #0x08] 2585 RET 2586 LMEMCPY_C_PAD 2587 2588/* 2589 * 0011: dst is 32-bit aligned, src is 8-bit aligned 2590 */ 2591 ldrb r2, [r1] /* r2 = ...0 */ 2592 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2593 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2594 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2595#ifdef __ARMEB__ 2596 mov r2, r2, lsl #24 /* r2 = 0... */ 2597 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 2598 str r2, [r0] 2599 mov r3, r3, lsl #24 /* r3 = 4... */ 2600 orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 2601 mov r1, r1, lsr #8 /* r1 = .9AB */ 2602 orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 2603#else 2604 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 2605 str r2, [r0] 2606 mov r3, r3, lsr #24 /* r3 = ...4 */ 2607 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 2608 mov r1, r1, lsl #8 /* r1 = BA9. */ 2609 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 2610#endif 2611 str r3, [r0, #0x04] 2612 str r1, [r0, #0x08] 2613 RET 2614 LMEMCPY_C_PAD 2615 2616/* 2617 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 2618 */ 2619 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2620 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2621 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 2622 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 2623 strh r1, [r0, #0x01] 2624#ifdef __ARMEB__ 2625 mov r1, r2, lsr #24 /* r1 = ...0 */ 2626 strb r1, [r0] 2627 mov r1, r2, lsl #24 /* r1 = 3... */ 2628 orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 2629 mov r1, r3, lsl #24 /* r1 = 7... */ 2630 orr r1, r1, ip, lsr #8 /* r1 = 789A */ 2631#else 2632 strb r2, [r0] 2633 mov r1, r2, lsr #24 /* r1 = ...3 */ 2634 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 2635 mov r1, r3, lsr #24 /* r1 = ...7 */ 2636 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 2637 mov ip, ip, lsr #24 /* ip = ...B */ 2638#endif 2639 str r2, [r0, #0x03] 2640 str r1, [r0, #0x07] 2641 strb ip, [r0, #0x0b] 2642 RET 2643 LMEMCPY_C_PAD 2644 2645/* 2646 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 2647 */ 2648 ldrb r2, [r1] 2649 ldrh r3, [r1, #0x01] 2650 ldr ip, [r1, #0x03] 2651 strb r2, [r0] 2652 ldr r2, [r1, #0x07] 2653 ldrb r1, [r1, #0x0b] 2654 strh r3, [r0, #0x01] 2655 str ip, [r0, #0x03] 2656 str r2, [r0, #0x07] 2657 strb r1, [r0, #0x0b] 2658 RET 2659 LMEMCPY_C_PAD 2660 2661/* 2662 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 2663 */ 2664 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2665 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2666 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2667 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2668#ifdef __ARMEB__ 2669 mov r2, r2, ror #8 /* r2 = 1..0 */ 2670 strb r2, [r0] 2671 mov r2, r2, lsr #16 /* r2 = ..1. */ 2672 orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 2673 strh r2, [r0, #0x01] 2674 mov r2, r3, lsl #8 /* r2 = 345. */ 2675 orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 2676 mov r2, ip, lsl #8 /* r2 = 789. */ 2677 orr r2, r2, r1, lsr #8 /* r2 = 789A */ 2678#else 2679 strb r2, [r0] 2680 mov r2, r2, lsr #8 /* r2 = ...1 */ 2681 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2682 strh r2, [r0, #0x01] 2683 mov r2, r3, lsr #8 /* r2 = .543 */ 2684 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 2685 mov r2, ip, lsr #8 /* r2 = .987 */ 2686 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 2687 mov r1, r1, lsr #8 /* r1 = ...B */ 2688#endif 2689 str r3, [r0, #0x03] 2690 str r2, [r0, #0x07] 2691 strb r1, [r0, #0x0b] 2692 RET 2693 LMEMCPY_C_PAD 2694 2695/* 2696 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 2697 */ 2698 ldrb r2, [r1] 2699 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2700 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2701 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2702 strb r2, [r0] 2703#ifdef __ARMEB__ 2704 mov r2, r3, lsr #16 /* r2 = ..12 */ 2705 strh r2, [r0, #0x01] 2706 mov r3, r3, lsl #16 /* r3 = 34.. */ 2707 orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 2708 mov ip, ip, lsl #16 /* ip = 78.. */ 2709 orr ip, ip, r1, lsr #16 /* ip = 789A */ 2710 mov r1, r1, lsr #8 /* r1 = .9AB */ 2711#else 2712 strh r3, [r0, #0x01] 2713 mov r3, r3, lsr #16 /* r3 = ..43 */ 2714 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 2715 mov ip, ip, lsr #16 /* ip = ..87 */ 2716 orr ip, ip, r1, lsl #16 /* ip = A987 */ 2717 mov r1, r1, lsr #16 /* r1 = ..xB */ 2718#endif 2719 str r3, [r0, #0x03] 2720 str ip, [r0, #0x07] 2721 strb r1, [r0, #0x0b] 2722 RET 2723 LMEMCPY_C_PAD 2724 2725/* 2726 * 1000: dst is 16-bit aligned, src is 32-bit aligned 2727 */ 2728 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 2729 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2730 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 2731 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2732#ifdef __ARMEB__ 2733 strh r1, [r0] 2734 mov r1, ip, lsl #16 /* r1 = 23.. */ 2735 orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 2736 mov r3, r3, lsl #16 /* r3 = 67.. */ 2737 orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 2738#else 2739 strh ip, [r0] 2740 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 2741 mov r3, r3, lsr #16 /* r3 = ..76 */ 2742 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 2743 mov r2, r2, lsr #16 /* r2 = ..BA */ 2744#endif 2745 str r1, [r0, #0x02] 2746 str r3, [r0, #0x06] 2747 strh r2, [r0, #0x0a] 2748 RET 2749 LMEMCPY_C_PAD 2750 2751/* 2752 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 2753 */ 2754 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2755 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2756 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 2757 strh ip, [r0] 2758 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2759 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 2760#ifdef __ARMEB__ 2761 mov r2, r2, lsl #24 /* r2 = 2... */ 2762 orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 2763 mov r3, r3, lsl #24 /* r3 = 6... */ 2764 orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 2765 orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 2766#else 2767 mov r2, r2, lsr #24 /* r2 = ...2 */ 2768 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 2769 mov r3, r3, lsr #24 /* r3 = ...6 */ 2770 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 2771 mov r1, r1, lsl #8 /* r1 = ..B. */ 2772 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 2773#endif 2774 str r2, [r0, #0x02] 2775 str r3, [r0, #0x06] 2776 strh r1, [r0, #0x0a] 2777 RET 2778 LMEMCPY_C_PAD 2779 2780/* 2781 * 1010: dst is 16-bit aligned, src is 16-bit aligned 2782 */ 2783 ldrh r2, [r1] 2784 ldr r3, [r1, #0x02] 2785 ldr ip, [r1, #0x06] 2786 ldrh r1, [r1, #0x0a] 2787 strh r2, [r0] 2788 str r3, [r0, #0x02] 2789 str ip, [r0, #0x06] 2790 strh r1, [r0, #0x0a] 2791 RET 2792 LMEMCPY_C_PAD 2793 2794/* 2795 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 2796 */ 2797 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 2798 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 2799 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 2800 strh ip, [r0, #0x0a] 2801 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2802 ldrb r1, [r1] /* r1 = ...0 */ 2803#ifdef __ARMEB__ 2804 mov r2, r2, lsr #24 /* r2 = ...9 */ 2805 orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 2806 mov r3, r3, lsr #24 /* r3 = ...5 */ 2807 orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 2808 mov r1, r1, lsl #8 /* r1 = ..0. */ 2809 orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 2810#else 2811 mov r2, r2, lsl #24 /* r2 = 9... */ 2812 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 2813 mov r3, r3, lsl #24 /* r3 = 5... */ 2814 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 2815 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 2816#endif 2817 str r2, [r0, #0x06] 2818 str r3, [r0, #0x02] 2819 strh r1, [r0] 2820 RET 2821 LMEMCPY_C_PAD 2822 2823/* 2824 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 2825 */ 2826 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2827 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 2828 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 2829#ifdef __ARMEB__ 2830 mov r3, r2, lsr #24 /* r3 = ...0 */ 2831 strb r3, [r0] 2832 mov r2, r2, lsl #8 /* r2 = 123. */ 2833 orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 2834 str r2, [r0, #0x01] 2835 mov r2, ip, lsl #8 /* r2 = 567. */ 2836 orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 2837 str r2, [r0, #0x05] 2838 mov r2, r1, lsr #8 /* r2 = ..9A */ 2839 strh r2, [r0, #0x09] 2840 strb r1, [r0, #0x0b] 2841#else 2842 strb r2, [r0] 2843 mov r3, r2, lsr #8 /* r3 = .321 */ 2844 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 2845 str r3, [r0, #0x01] 2846 mov r3, ip, lsr #8 /* r3 = .765 */ 2847 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 2848 str r3, [r0, #0x05] 2849 mov r1, r1, lsr #8 /* r1 = .BA9 */ 2850 strh r1, [r0, #0x09] 2851 mov r1, r1, lsr #16 /* r1 = ...B */ 2852 strb r1, [r0, #0x0b] 2853#endif 2854 RET 2855 LMEMCPY_C_PAD 2856 2857/* 2858 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2859 */ 2860 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2861 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2862 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2863 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2864 strb r2, [r0, #0x0b] 2865#ifdef __ARMEB__ 2866 strh r3, [r0, #0x09] 2867 mov r3, r3, lsr #16 /* r3 = ..78 */ 2868 orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 2869 mov ip, ip, lsr #16 /* ip = ..34 */ 2870 orr ip, ip, r1, lsl #16 /* ip = 1234 */ 2871 mov r1, r1, lsr #16 /* r1 = ..x0 */ 2872#else 2873 mov r2, r3, lsr #16 /* r2 = ..A9 */ 2874 strh r2, [r0, #0x09] 2875 mov r3, r3, lsl #16 /* r3 = 87.. */ 2876 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2877 mov ip, ip, lsl #16 /* ip = 43.. */ 2878 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2879 mov r1, r1, lsr #8 /* r1 = .210 */ 2880#endif 2881 str r3, [r0, #0x05] 2882 str ip, [r0, #0x01] 2883 strb r1, [r0] 2884 RET 2885 LMEMCPY_C_PAD 2886 2887/* 2888 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2889 */ 2890#ifdef __ARMEB__ 2891 ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 2892 ldr ip, [r1, #0x06] /* ip = 6789 */ 2893 ldr r3, [r1, #0x02] /* r3 = 2345 */ 2894 ldrh r1, [r1] /* r1 = ..01 */ 2895 strb r2, [r0, #0x0b] 2896 mov r2, r2, lsr #8 /* r2 = ...A */ 2897 orr r2, r2, ip, lsl #8 /* r2 = 789A */ 2898 mov ip, ip, lsr #8 /* ip = .678 */ 2899 orr ip, ip, r3, lsl #24 /* ip = 5678 */ 2900 mov r3, r3, lsr #8 /* r3 = .234 */ 2901 orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 2902 mov r1, r1, lsr #8 /* r1 = ...0 */ 2903 strb r1, [r0] 2904 str r3, [r0, #0x01] 2905 str ip, [r0, #0x05] 2906 strh r2, [r0, #0x09] 2907#else 2908 ldrh r2, [r1] /* r2 = ..10 */ 2909 ldr r3, [r1, #0x02] /* r3 = 5432 */ 2910 ldr ip, [r1, #0x06] /* ip = 9876 */ 2911 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2912 strb r2, [r0] 2913 mov r2, r2, lsr #8 /* r2 = ...1 */ 2914 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2915 mov r3, r3, lsr #24 /* r3 = ...5 */ 2916 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2917 mov ip, ip, lsr #24 /* ip = ...9 */ 2918 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2919 mov r1, r1, lsr #8 /* r1 = ...B */ 2920 str r2, [r0, #0x01] 2921 str r3, [r0, #0x05] 2922 strh ip, [r0, #0x09] 2923 strb r1, [r0, #0x0b] 2924#endif 2925 RET 2926 LMEMCPY_C_PAD 2927 2928/* 2929 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2930 */ 2931 ldrb r2, [r1] 2932 ldr r3, [r1, #0x01] 2933 ldr ip, [r1, #0x05] 2934 strb r2, [r0] 2935 ldrh r2, [r1, #0x09] 2936 ldrb r1, [r1, #0x0b] 2937 str r3, [r0, #0x01] 2938 str ip, [r0, #0x05] 2939 strh r2, [r0, #0x09] 2940 strb r1, [r0, #0x0b] 2941 RET 2942#endif /* _ARM_ARCH_5E */ 2943 2944#ifdef GPROF 2945 2946ENTRY(user) 2947 nop 2948ENTRY(btrap) 2949 nop 2950ENTRY(etrap) 2951 nop 2952ENTRY(bintr) 2953 nop 2954ENTRY(eintr) 2955 nop 2956 2957#endif 2958