memcpy_arm.S revision 129202
1129202Scognet/* $NetBSD: memcpy_arm.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */ 2129202Scognet 3129202Scognet/*- 4129202Scognet * Copyright (c) 1997 The NetBSD Foundation, Inc. 5129202Scognet * All rights reserved. 6129202Scognet * 7129202Scognet * This code is derived from software contributed to The NetBSD Foundation 8129202Scognet * by Neil A. Carson and Mark Brinicombe 9129202Scognet * 10129202Scognet * Redistribution and use in source and binary forms, with or without 11129202Scognet * modification, are permitted provided that the following conditions 12129202Scognet * are met: 13129202Scognet * 1. Redistributions of source code must retain the above copyright 14129202Scognet * notice, this list of conditions and the following disclaimer. 15129202Scognet * 2. Redistributions in binary form must reproduce the above copyright 16129202Scognet * notice, this list of conditions and the following disclaimer in the 17129202Scognet * documentation and/or other materials provided with the distribution. 18129202Scognet * 3. All advertising materials mentioning features or use of this software 19129202Scognet * must display the following acknowledgement: 20129202Scognet * This product includes software developed by the NetBSD 21129202Scognet * Foundation, Inc. and its contributors. 22129202Scognet * 4. Neither the name of The NetBSD Foundation nor the names of its 23129202Scognet * contributors may be used to endorse or promote products derived 24129202Scognet * from this software without specific prior written permission. 25129202Scognet * 26129202Scognet * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27129202Scognet * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28129202Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29129202Scognet * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30129202Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31129202Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32129202Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33129202Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34129202Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35129202Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36129202Scognet * POSSIBILITY OF SUCH DAMAGE. 37129202Scognet */ 38129202Scognet 39129202Scognet#include <machine/asm.h> 40129202Scognet__FBSDID("$FreeBSD: head/lib/libc/arm/string/memcpy_arm.S 129202 2004-05-14 12:04:31Z cognet $"); 41129202Scognet/* 42129202Scognet * This is one fun bit of code ... 43129202Scognet * Some easy listening music is suggested while trying to understand this 44129202Scognet * code e.g. Iron Maiden 45129202Scognet * 46129202Scognet * For anyone attempting to understand it : 47129202Scognet * 48129202Scognet * The core code is implemented here with simple stubs for memcpy(). 49129202Scognet * 50129202Scognet * All local labels are prefixed with Lmemcpy_ 51129202Scognet * Following the prefix a label starting f is used in the forward copy code 52129202Scognet * while a label using b is used in the backwards copy code 53129202Scognet * The source and destination addresses determine whether a forward or 54129202Scognet * backward copy is performed. 55129202Scognet * Separate bits of code are used to deal with the following situations 56129202Scognet * for both the forward and backwards copy. 57129202Scognet * unaligned source address 58129202Scognet * unaligned destination address 59129202Scognet * Separate copy routines are used to produce an optimised result for each 60129202Scognet * of these cases. 61129202Scognet * The copy code will use LDM/STM instructions to copy up to 32 bytes at 62129202Scognet * a time where possible. 63129202Scognet * 64129202Scognet * Note: r12 (aka ip) can be trashed during the function along with 65129202Scognet * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. 66129202Scognet * Additional registers are preserved prior to use i.e. r4, r5 & lr 67129202Scognet * 68129202Scognet * Apologies for the state of the comments ;-) 69129202Scognet */ 70129202Scognet/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 71129202ScognetENTRY(memcpy) 72129202Scognet /* save leaf functions having to store this away */ 73129202Scognet stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 74129202Scognet 75129202Scognet subs r2, r2, #4 76129202Scognet blt .Lmemcpy_l4 /* less than 4 bytes */ 77129202Scognet ands r12, r0, #3 78129202Scognet bne .Lmemcpy_destul /* oh unaligned destination addr */ 79129202Scognet ands r12, r1, #3 80129202Scognet bne .Lmemcpy_srcul /* oh unaligned source addr */ 81129202Scognet 82129202Scognet.Lmemcpy_t8: 83129202Scognet /* We have aligned source and destination */ 84129202Scognet subs r2, r2, #8 85129202Scognet blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 86129202Scognet subs r2, r2, #0x14 87129202Scognet blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 88129202Scognet stmdb sp!, {r4} /* borrow r4 */ 89129202Scognet 90129202Scognet /* blat 32 bytes at a time */ 91129202Scognet /* XXX for really big copies perhaps we should use more registers */ 92129202Scognet.Lmemcpy_loop32: 93129202Scognet ldmia r1!, {r3, r4, r12, lr} 94129202Scognet stmia r0!, {r3, r4, r12, lr} 95129202Scognet ldmia r1!, {r3, r4, r12, lr} 96129202Scognet stmia r0!, {r3, r4, r12, lr} 97129202Scognet subs r2, r2, #0x20 98129202Scognet bge .Lmemcpy_loop32 99129202Scognet 100129202Scognet cmn r2, #0x10 101129202Scognet ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 102129202Scognet stmgeia r0!, {r3, r4, r12, lr} 103129202Scognet subge r2, r2, #0x10 104129202Scognet ldmia sp!, {r4} /* return r4 */ 105129202Scognet 106129202Scognet.Lmemcpy_l32: 107129202Scognet adds r2, r2, #0x14 108129202Scognet 109129202Scognet /* blat 12 bytes at a time */ 110129202Scognet.Lmemcpy_loop12: 111129202Scognet ldmgeia r1!, {r3, r12, lr} 112129202Scognet stmgeia r0!, {r3, r12, lr} 113129202Scognet subges r2, r2, #0x0c 114129202Scognet bge .Lmemcpy_loop12 115129202Scognet 116129202Scognet.Lmemcpy_l12: 117129202Scognet adds r2, r2, #8 118129202Scognet blt .Lmemcpy_l4 119129202Scognet 120129202Scognet subs r2, r2, #4 121129202Scognet ldrlt r3, [r1], #4 122129202Scognet strlt r3, [r0], #4 123129202Scognet ldmgeia r1!, {r3, r12} 124129202Scognet stmgeia r0!, {r3, r12} 125129202Scognet subge r2, r2, #4 126129202Scognet 127129202Scognet.Lmemcpy_l4: 128129202Scognet /* less than 4 bytes to go */ 129129202Scognet adds r2, r2, #4 130129202Scognet#ifdef __APCS_26_ 131129202Scognet ldmeqia sp!, {r0, pc}^ /* done */ 132129202Scognet#else 133129202Scognet ldmeqia sp!, {r0, pc} /* done */ 134129202Scognet#endif 135129202Scognet /* copy the crud byte at a time */ 136129202Scognet cmp r2, #2 137129202Scognet ldrb r3, [r1], #1 138129202Scognet strb r3, [r0], #1 139129202Scognet ldrgeb r3, [r1], #1 140129202Scognet strgeb r3, [r0], #1 141129202Scognet ldrgtb r3, [r1], #1 142129202Scognet strgtb r3, [r0], #1 143129202Scognet ldmia sp!, {r0, pc} 144129202Scognet 145129202Scognet /* erg - unaligned destination */ 146129202Scognet.Lmemcpy_destul: 147129202Scognet rsb r12, r12, #4 148129202Scognet cmp r12, #2 149129202Scognet 150129202Scognet /* align destination with byte copies */ 151129202Scognet ldrb r3, [r1], #1 152129202Scognet strb r3, [r0], #1 153129202Scognet ldrgeb r3, [r1], #1 154129202Scognet strgeb r3, [r0], #1 155129202Scognet ldrgtb r3, [r1], #1 156129202Scognet strgtb r3, [r0], #1 157129202Scognet subs r2, r2, r12 158129202Scognet blt .Lmemcpy_l4 /* less the 4 bytes */ 159129202Scognet 160129202Scognet ands r12, r1, #3 161129202Scognet beq .Lmemcpy_t8 /* we have an aligned source */ 162129202Scognet 163129202Scognet /* erg - unaligned source */ 164129202Scognet /* This is where it gets nasty ... */ 165129202Scognet.Lmemcpy_srcul: 166129202Scognet bic r1, r1, #3 167129202Scognet ldr lr, [r1], #4 168129202Scognet cmp r12, #2 169129202Scognet bgt .Lmemcpy_srcul3 170129202Scognet beq .Lmemcpy_srcul2 171129202Scognet cmp r2, #0x0c 172129202Scognet blt .Lmemcpy_srcul1loop4 173129202Scognet sub r2, r2, #0x0c 174129202Scognet stmdb sp!, {r4, r5} 175129202Scognet 176129202Scognet.Lmemcpy_srcul1loop16: 177129202Scognet#ifdef __ARMEB__ 178129202Scognet mov r3, lr, lsl #8 179129202Scognet#else 180129202Scognet mov r3, lr, lsr #8 181129202Scognet#endif 182129202Scognet ldmia r1!, {r4, r5, r12, lr} 183129202Scognet#ifdef __ARMEB__ 184129202Scognet orr r3, r3, r4, lsr #24 185129202Scognet mov r4, r4, lsl #8 186129202Scognet orr r4, r4, r5, lsr #24 187129202Scognet mov r5, r5, lsl #8 188129202Scognet orr r5, r5, r12, lsr #24 189129202Scognet mov r12, r12, lsl #8 190129202Scognet orr r12, r12, lr, lsr #24 191129202Scognet#else 192129202Scognet orr r3, r3, r4, lsl #24 193129202Scognet mov r4, r4, lsr #8 194129202Scognet orr r4, r4, r5, lsl #24 195129202Scognet mov r5, r5, lsr #8 196129202Scognet orr r5, r5, r12, lsl #24 197129202Scognet mov r12, r12, lsr #8 198129202Scognet orr r12, r12, lr, lsl #24 199129202Scognet#endif 200129202Scognet stmia r0!, {r3-r5, r12} 201129202Scognet subs r2, r2, #0x10 202129202Scognet bge .Lmemcpy_srcul1loop16 203129202Scognet ldmia sp!, {r4, r5} 204129202Scognet adds r2, r2, #0x0c 205129202Scognet blt .Lmemcpy_srcul1l4 206129202Scognet 207129202Scognet.Lmemcpy_srcul1loop4: 208129202Scognet#ifdef __ARMEB__ 209129202Scognet mov r12, lr, lsl #8 210129202Scognet#else 211129202Scognet mov r12, lr, lsr #8 212129202Scognet#endif 213129202Scognet ldr lr, [r1], #4 214129202Scognet#ifdef __ARMEB__ 215129202Scognet orr r12, r12, lr, lsr #24 216129202Scognet#else 217129202Scognet orr r12, r12, lr, lsl #24 218129202Scognet#endif 219129202Scognet str r12, [r0], #4 220129202Scognet subs r2, r2, #4 221129202Scognet bge .Lmemcpy_srcul1loop4 222129202Scognet 223129202Scognet.Lmemcpy_srcul1l4: 224129202Scognet sub r1, r1, #3 225129202Scognet b .Lmemcpy_l4 226129202Scognet 227129202Scognet.Lmemcpy_srcul2: 228129202Scognet cmp r2, #0x0c 229129202Scognet blt .Lmemcpy_srcul2loop4 230129202Scognet sub r2, r2, #0x0c 231129202Scognet stmdb sp!, {r4, r5} 232129202Scognet 233129202Scognet.Lmemcpy_srcul2loop16: 234129202Scognet#ifdef __ARMEB__ 235129202Scognet mov r3, lr, lsl #16 236129202Scognet#else 237129202Scognet mov r3, lr, lsr #16 238129202Scognet#endif 239129202Scognet ldmia r1!, {r4, r5, r12, lr} 240129202Scognet#ifdef __ARMEB__ 241129202Scognet orr r3, r3, r4, lsr #16 242129202Scognet mov r4, r4, lsl #16 243129202Scognet orr r4, r4, r5, lsr #16 244129202Scognet mov r5, r5, lsl #16 245129202Scognet orr r5, r5, r12, lsr #16 246129202Scognet mov r12, r12, lsl #16 247129202Scognet orr r12, r12, lr, lsr #16 248129202Scognet#else 249129202Scognet orr r3, r3, r4, lsl #16 250129202Scognet mov r4, r4, lsr #16 251129202Scognet orr r4, r4, r5, lsl #16 252129202Scognet mov r5, r5, lsr #16 253129202Scognet orr r5, r5, r12, lsl #16 254129202Scognet mov r12, r12, lsr #16 255129202Scognet orr r12, r12, lr, lsl #16 256129202Scognet#endif 257129202Scognet stmia r0!, {r3-r5, r12} 258129202Scognet subs r2, r2, #0x10 259129202Scognet bge .Lmemcpy_srcul2loop16 260129202Scognet ldmia sp!, {r4, r5} 261129202Scognet adds r2, r2, #0x0c 262129202Scognet blt .Lmemcpy_srcul2l4 263129202Scognet 264129202Scognet.Lmemcpy_srcul2loop4: 265129202Scognet#ifdef __ARMEB__ 266129202Scognet mov r12, lr, lsl #16 267129202Scognet#else 268129202Scognet mov r12, lr, lsr #16 269129202Scognet#endif 270129202Scognet ldr lr, [r1], #4 271129202Scognet#ifdef __ARMEB__ 272129202Scognet orr r12, r12, lr, lsr #16 273129202Scognet#else 274129202Scognet orr r12, r12, lr, lsl #16 275129202Scognet#endif 276129202Scognet str r12, [r0], #4 277129202Scognet subs r2, r2, #4 278129202Scognet bge .Lmemcpy_srcul2loop4 279129202Scognet 280129202Scognet.Lmemcpy_srcul2l4: 281129202Scognet sub r1, r1, #2 282129202Scognet b .Lmemcpy_l4 283129202Scognet 284129202Scognet.Lmemcpy_srcul3: 285129202Scognet cmp r2, #0x0c 286129202Scognet blt .Lmemcpy_srcul3loop4 287129202Scognet sub r2, r2, #0x0c 288129202Scognet stmdb sp!, {r4, r5} 289129202Scognet 290129202Scognet.Lmemcpy_srcul3loop16: 291129202Scognet#ifdef __ARMEB__ 292129202Scognet mov r3, lr, lsl #24 293129202Scognet#else 294129202Scognet mov r3, lr, lsr #24 295129202Scognet#endif 296129202Scognet ldmia r1!, {r4, r5, r12, lr} 297129202Scognet#ifdef __ARMEB__ 298129202Scognet orr r3, r3, r4, lsr #8 299129202Scognet mov r4, r4, lsl #24 300129202Scognet orr r4, r4, r5, lsr #8 301129202Scognet mov r5, r5, lsl #24 302129202Scognet orr r5, r5, r12, lsr #8 303129202Scognet mov r12, r12, lsl #24 304129202Scognet orr r12, r12, lr, lsr #8 305129202Scognet#else 306129202Scognet orr r3, r3, r4, lsl #8 307129202Scognet mov r4, r4, lsr #24 308129202Scognet orr r4, r4, r5, lsl #8 309129202Scognet mov r5, r5, lsr #24 310129202Scognet orr r5, r5, r12, lsl #8 311129202Scognet mov r12, r12, lsr #24 312129202Scognet orr r12, r12, lr, lsl #8 313129202Scognet#endif 314129202Scognet stmia r0!, {r3-r5, r12} 315129202Scognet subs r2, r2, #0x10 316129202Scognet bge .Lmemcpy_srcul3loop16 317129202Scognet ldmia sp!, {r4, r5} 318129202Scognet adds r2, r2, #0x0c 319129202Scognet blt .Lmemcpy_srcul3l4 320129202Scognet 321129202Scognet.Lmemcpy_srcul3loop4: 322129202Scognet#ifdef __ARMEB__ 323129202Scognet mov r12, lr, lsl #24 324129202Scognet#else 325129202Scognet mov r12, lr, lsr #24 326129202Scognet#endif 327129202Scognet ldr lr, [r1], #4 328129202Scognet#ifdef __ARMEB__ 329129202Scognet orr r12, r12, lr, lsr #8 330129202Scognet#else 331129202Scognet orr r12, r12, lr, lsl #8 332129202Scognet#endif 333129202Scognet str r12, [r0], #4 334129202Scognet subs r2, r2, #4 335129202Scognet bge .Lmemcpy_srcul3loop4 336129202Scognet 337129202Scognet.Lmemcpy_srcul3l4: 338129202Scognet sub r1, r1, #1 339129202Scognet b .Lmemcpy_l4 340