1129202Scognet/* $NetBSD: memcpy_arm.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */ 2129202Scognet 3129202Scognet/*- 4129202Scognet * Copyright (c) 1997 The NetBSD Foundation, Inc. 5129202Scognet * All rights reserved. 6129202Scognet * 7129202Scognet * This code is derived from software contributed to The NetBSD Foundation 8129202Scognet * by Neil A. Carson and Mark Brinicombe 9129202Scognet * 10129202Scognet * Redistribution and use in source and binary forms, with or without 11129202Scognet * modification, are permitted provided that the following conditions 12129202Scognet * are met: 13129202Scognet * 1. Redistributions of source code must retain the above copyright 14129202Scognet * notice, this list of conditions and the following disclaimer. 15129202Scognet * 2. Redistributions in binary form must reproduce the above copyright 16129202Scognet * notice, this list of conditions and the following disclaimer in the 17129202Scognet * documentation and/or other materials provided with the distribution. 18129202Scognet * 19129202Scognet * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20129202Scognet * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21129202Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22129202Scognet * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23129202Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24129202Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25129202Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26129202Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27129202Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28129202Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29129202Scognet * POSSIBILITY OF SUCH DAMAGE. 30129202Scognet */ 31129202Scognet 32129202Scognet#include <machine/asm.h> 33129202Scognet__FBSDID("$FreeBSD$"); 34129202Scognet/* 35129202Scognet * This is one fun bit of code ... 36129202Scognet * Some easy listening music is suggested while trying to understand this 37129202Scognet * code e.g. Iron Maiden 38129202Scognet * 39129202Scognet * For anyone attempting to understand it : 40129202Scognet * 41129202Scognet * The core code is implemented here with simple stubs for memcpy(). 42129202Scognet * 43129202Scognet * All local labels are prefixed with Lmemcpy_ 44129202Scognet * Following the prefix a label starting f is used in the forward copy code 45129202Scognet * while a label using b is used in the backwards copy code 46129202Scognet * The source and destination addresses determine whether a forward or 47129202Scognet * backward copy is performed. 48129202Scognet * Separate bits of code are used to deal with the following situations 49129202Scognet * for both the forward and backwards copy. 50129202Scognet * unaligned source address 51129202Scognet * unaligned destination address 52129202Scognet * Separate copy routines are used to produce an optimised result for each 53129202Scognet * of these cases. 54129202Scognet * The copy code will use LDM/STM instructions to copy up to 32 bytes at 55129202Scognet * a time where possible. 56129202Scognet * 57129202Scognet * Note: r12 (aka ip) can be trashed during the function along with 58129202Scognet * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. 59129202Scognet * Additional registers are preserved prior to use i.e. r4, r5 & lr 60129202Scognet * 61129202Scognet * Apologies for the state of the comments ;-) 62129202Scognet */ 63129202Scognet/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 64129202ScognetENTRY(memcpy) 65129202Scognet /* save leaf functions having to store this away */ 66129202Scognet stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 67129202Scognet 68129202Scognet subs r2, r2, #4 69129202Scognet blt .Lmemcpy_l4 /* less than 4 bytes */ 70129202Scognet ands r12, r0, #3 71129202Scognet bne .Lmemcpy_destul /* oh unaligned destination addr */ 72129202Scognet ands r12, r1, #3 73129202Scognet bne .Lmemcpy_srcul /* oh unaligned source addr */ 74129202Scognet 75129202Scognet.Lmemcpy_t8: 76129202Scognet /* We have aligned source and destination */ 77129202Scognet subs r2, r2, #8 78129202Scognet blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 79129202Scognet subs r2, r2, #0x14 80129202Scognet blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 81129202Scognet stmdb sp!, {r4} /* borrow r4 */ 82129202Scognet 83129202Scognet /* blat 32 bytes at a time */ 84129202Scognet /* XXX for really big copies perhaps we should use more registers */ 85129202Scognet.Lmemcpy_loop32: 86129202Scognet ldmia r1!, {r3, r4, r12, lr} 87129202Scognet stmia r0!, {r3, r4, r12, lr} 88129202Scognet ldmia r1!, {r3, r4, r12, lr} 89129202Scognet stmia r0!, {r3, r4, r12, lr} 90129202Scognet subs r2, r2, #0x20 91129202Scognet bge .Lmemcpy_loop32 92129202Scognet 93129202Scognet cmn r2, #0x10 94129202Scognet ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 95129202Scognet stmgeia r0!, {r3, r4, r12, lr} 96129202Scognet subge r2, r2, #0x10 97129202Scognet ldmia sp!, {r4} /* return r4 */ 98129202Scognet 99129202Scognet.Lmemcpy_l32: 100129202Scognet adds r2, r2, #0x14 101129202Scognet 102129202Scognet /* blat 12 bytes at a time */ 103129202Scognet.Lmemcpy_loop12: 104129202Scognet ldmgeia r1!, {r3, r12, lr} 105129202Scognet stmgeia r0!, {r3, r12, lr} 106129202Scognet subges r2, r2, #0x0c 107129202Scognet bge .Lmemcpy_loop12 108129202Scognet 109129202Scognet.Lmemcpy_l12: 110129202Scognet adds r2, r2, #8 111129202Scognet blt .Lmemcpy_l4 112129202Scognet 113129202Scognet subs r2, r2, #4 114129202Scognet ldrlt r3, [r1], #4 115129202Scognet strlt r3, [r0], #4 116129202Scognet ldmgeia r1!, {r3, r12} 117129202Scognet stmgeia r0!, {r3, r12} 118129202Scognet subge r2, r2, #4 119129202Scognet 120129202Scognet.Lmemcpy_l4: 121129202Scognet /* less than 4 bytes to go */ 122129202Scognet adds r2, r2, #4 123129202Scognet#ifdef __APCS_26_ 124129202Scognet ldmeqia sp!, {r0, pc}^ /* done */ 125129202Scognet#else 126129202Scognet ldmeqia sp!, {r0, pc} /* done */ 127129202Scognet#endif 128129202Scognet /* copy the crud byte at a time */ 129129202Scognet cmp r2, #2 130129202Scognet ldrb r3, [r1], #1 131129202Scognet strb r3, [r0], #1 132129202Scognet ldrgeb r3, [r1], #1 133129202Scognet strgeb r3, [r0], #1 134129202Scognet ldrgtb r3, [r1], #1 135129202Scognet strgtb r3, [r0], #1 136129202Scognet ldmia sp!, {r0, pc} 137129202Scognet 138129202Scognet /* erg - unaligned destination */ 139129202Scognet.Lmemcpy_destul: 140129202Scognet rsb r12, r12, #4 141129202Scognet cmp r12, #2 142129202Scognet 143129202Scognet /* align destination with byte copies */ 144129202Scognet ldrb r3, [r1], #1 145129202Scognet strb r3, [r0], #1 146129202Scognet ldrgeb r3, [r1], #1 147129202Scognet strgeb r3, [r0], #1 148129202Scognet ldrgtb r3, [r1], #1 149129202Scognet strgtb r3, [r0], #1 150129202Scognet subs r2, r2, r12 151129202Scognet blt .Lmemcpy_l4 /* less the 4 bytes */ 152129202Scognet 153129202Scognet ands r12, r1, #3 154129202Scognet beq .Lmemcpy_t8 /* we have an aligned source */ 155129202Scognet 156129202Scognet /* erg - unaligned source */ 157129202Scognet /* This is where it gets nasty ... */ 158129202Scognet.Lmemcpy_srcul: 159129202Scognet bic r1, r1, #3 160129202Scognet ldr lr, [r1], #4 161129202Scognet cmp r12, #2 162129202Scognet bgt .Lmemcpy_srcul3 163129202Scognet beq .Lmemcpy_srcul2 164129202Scognet cmp r2, #0x0c 165129202Scognet blt .Lmemcpy_srcul1loop4 166129202Scognet sub r2, r2, #0x0c 167129202Scognet stmdb sp!, {r4, r5} 168129202Scognet 169129202Scognet.Lmemcpy_srcul1loop16: 170129202Scognet#ifdef __ARMEB__ 171129202Scognet mov r3, lr, lsl #8 172129202Scognet#else 173129202Scognet mov r3, lr, lsr #8 174129202Scognet#endif 175129202Scognet ldmia r1!, {r4, r5, r12, lr} 176129202Scognet#ifdef __ARMEB__ 177129202Scognet orr r3, r3, r4, lsr #24 178129202Scognet mov r4, r4, lsl #8 179129202Scognet orr r4, r4, r5, lsr #24 180129202Scognet mov r5, r5, lsl #8 181129202Scognet orr r5, r5, r12, lsr #24 182129202Scognet mov r12, r12, lsl #8 183129202Scognet orr r12, r12, lr, lsr #24 184129202Scognet#else 185129202Scognet orr r3, r3, r4, lsl #24 186129202Scognet mov r4, r4, lsr #8 187129202Scognet orr r4, r4, r5, lsl #24 188129202Scognet mov r5, r5, lsr #8 189129202Scognet orr r5, r5, r12, lsl #24 190129202Scognet mov r12, r12, lsr #8 191129202Scognet orr r12, r12, lr, lsl #24 192129202Scognet#endif 193129202Scognet stmia r0!, {r3-r5, r12} 194129202Scognet subs r2, r2, #0x10 195129202Scognet bge .Lmemcpy_srcul1loop16 196129202Scognet ldmia sp!, {r4, r5} 197129202Scognet adds r2, r2, #0x0c 198129202Scognet blt .Lmemcpy_srcul1l4 199129202Scognet 200129202Scognet.Lmemcpy_srcul1loop4: 201129202Scognet#ifdef __ARMEB__ 202129202Scognet mov r12, lr, lsl #8 203129202Scognet#else 204129202Scognet mov r12, lr, lsr #8 205129202Scognet#endif 206129202Scognet ldr lr, [r1], #4 207129202Scognet#ifdef __ARMEB__ 208129202Scognet orr r12, r12, lr, lsr #24 209129202Scognet#else 210129202Scognet orr r12, r12, lr, lsl #24 211129202Scognet#endif 212129202Scognet str r12, [r0], #4 213129202Scognet subs r2, r2, #4 214129202Scognet bge .Lmemcpy_srcul1loop4 215129202Scognet 216129202Scognet.Lmemcpy_srcul1l4: 217129202Scognet sub r1, r1, #3 218129202Scognet b .Lmemcpy_l4 219129202Scognet 220129202Scognet.Lmemcpy_srcul2: 221129202Scognet cmp r2, #0x0c 222129202Scognet blt .Lmemcpy_srcul2loop4 223129202Scognet sub r2, r2, #0x0c 224129202Scognet stmdb sp!, {r4, r5} 225129202Scognet 226129202Scognet.Lmemcpy_srcul2loop16: 227129202Scognet#ifdef __ARMEB__ 228129202Scognet mov r3, lr, lsl #16 229129202Scognet#else 230129202Scognet mov r3, lr, lsr #16 231129202Scognet#endif 232129202Scognet ldmia r1!, {r4, r5, r12, lr} 233129202Scognet#ifdef __ARMEB__ 234129202Scognet orr r3, r3, r4, lsr #16 235129202Scognet mov r4, r4, lsl #16 236129202Scognet orr r4, r4, r5, lsr #16 237129202Scognet mov r5, r5, lsl #16 238129202Scognet orr r5, r5, r12, lsr #16 239129202Scognet mov r12, r12, lsl #16 240129202Scognet orr r12, r12, lr, lsr #16 241129202Scognet#else 242129202Scognet orr r3, r3, r4, lsl #16 243129202Scognet mov r4, r4, lsr #16 244129202Scognet orr r4, r4, r5, lsl #16 245129202Scognet mov r5, r5, lsr #16 246129202Scognet orr r5, r5, r12, lsl #16 247129202Scognet mov r12, r12, lsr #16 248129202Scognet orr r12, r12, lr, lsl #16 249129202Scognet#endif 250129202Scognet stmia r0!, {r3-r5, r12} 251129202Scognet subs r2, r2, #0x10 252129202Scognet bge .Lmemcpy_srcul2loop16 253129202Scognet ldmia sp!, {r4, r5} 254129202Scognet adds r2, r2, #0x0c 255129202Scognet blt .Lmemcpy_srcul2l4 256129202Scognet 257129202Scognet.Lmemcpy_srcul2loop4: 258129202Scognet#ifdef __ARMEB__ 259129202Scognet mov r12, lr, lsl #16 260129202Scognet#else 261129202Scognet mov r12, lr, lsr #16 262129202Scognet#endif 263129202Scognet ldr lr, [r1], #4 264129202Scognet#ifdef __ARMEB__ 265129202Scognet orr r12, r12, lr, lsr #16 266129202Scognet#else 267129202Scognet orr r12, r12, lr, lsl #16 268129202Scognet#endif 269129202Scognet str r12, [r0], #4 270129202Scognet subs r2, r2, #4 271129202Scognet bge .Lmemcpy_srcul2loop4 272129202Scognet 273129202Scognet.Lmemcpy_srcul2l4: 274129202Scognet sub r1, r1, #2 275129202Scognet b .Lmemcpy_l4 276129202Scognet 277129202Scognet.Lmemcpy_srcul3: 278129202Scognet cmp r2, #0x0c 279129202Scognet blt .Lmemcpy_srcul3loop4 280129202Scognet sub r2, r2, #0x0c 281129202Scognet stmdb sp!, {r4, r5} 282129202Scognet 283129202Scognet.Lmemcpy_srcul3loop16: 284129202Scognet#ifdef __ARMEB__ 285129202Scognet mov r3, lr, lsl #24 286129202Scognet#else 287129202Scognet mov r3, lr, lsr #24 288129202Scognet#endif 289129202Scognet ldmia r1!, {r4, r5, r12, lr} 290129202Scognet#ifdef __ARMEB__ 291129202Scognet orr r3, r3, r4, lsr #8 292129202Scognet mov r4, r4, lsl #24 293129202Scognet orr r4, r4, r5, lsr #8 294129202Scognet mov r5, r5, lsl #24 295129202Scognet orr r5, r5, r12, lsr #8 296129202Scognet mov r12, r12, lsl #24 297129202Scognet orr r12, r12, lr, lsr #8 298129202Scognet#else 299129202Scognet orr r3, r3, r4, lsl #8 300129202Scognet mov r4, r4, lsr #24 301129202Scognet orr r4, r4, r5, lsl #8 302129202Scognet mov r5, r5, lsr #24 303129202Scognet orr r5, r5, r12, lsl #8 304129202Scognet mov r12, r12, lsr #24 305129202Scognet orr r12, r12, lr, lsl #8 306129202Scognet#endif 307129202Scognet stmia r0!, {r3-r5, r12} 308129202Scognet subs r2, r2, #0x10 309129202Scognet bge .Lmemcpy_srcul3loop16 310129202Scognet ldmia sp!, {r4, r5} 311129202Scognet adds r2, r2, #0x0c 312129202Scognet blt .Lmemcpy_srcul3l4 313129202Scognet 314129202Scognet.Lmemcpy_srcul3loop4: 315129202Scognet#ifdef __ARMEB__ 316129202Scognet mov r12, lr, lsl #24 317129202Scognet#else 318129202Scognet mov r12, lr, lsr #24 319129202Scognet#endif 320129202Scognet ldr lr, [r1], #4 321129202Scognet#ifdef __ARMEB__ 322129202Scognet orr r12, r12, lr, lsr #8 323129202Scognet#else 324129202Scognet orr r12, r12, lr, lsl #8 325129202Scognet#endif 326129202Scognet str r12, [r0], #4 327129202Scognet subs r2, r2, #4 328129202Scognet bge .Lmemcpy_srcul3loop4 329129202Scognet 330129202Scognet.Lmemcpy_srcul3l4: 331129202Scognet sub r1, r1, #1 332129202Scognet b .Lmemcpy_l4 333271337SianEND(memcpy) 334