1129202Scognet/* $NetBSD: memcpy_arm.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */ 2129202Scognet 3129202Scognet/*- 4129202Scognet * Copyright (c) 1997 The NetBSD Foundation, Inc. 5129202Scognet * All rights reserved. 6129202Scognet * 7129202Scognet * This code is derived from software contributed to The NetBSD Foundation 8129202Scognet * by Neil A. Carson and Mark Brinicombe 9129202Scognet * 10129202Scognet * Redistribution and use in source and binary forms, with or without 11129202Scognet * modification, are permitted provided that the following conditions 12129202Scognet * are met: 13129202Scognet * 1. Redistributions of source code must retain the above copyright 14129202Scognet * notice, this list of conditions and the following disclaimer. 15129202Scognet * 2. Redistributions in binary form must reproduce the above copyright 16129202Scognet * notice, this list of conditions and the following disclaimer in the 17129202Scognet * documentation and/or other materials provided with the distribution. 18129202Scognet * 19129202Scognet * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20129202Scognet * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21129202Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22129202Scognet * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23129202Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24129202Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25129202Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26129202Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27129202Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28129202Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29129202Scognet * POSSIBILITY OF SUCH DAMAGE. 30129202Scognet */ 31129202Scognet 32129202Scognet#include <machine/asm.h> 33129202Scognet__FBSDID("$FreeBSD$"); 34275767Sandrew 35275767Sandrew.syntax unified 36275767Sandrew 37129202Scognet/* 38129202Scognet * This is one fun bit of code ... 39129202Scognet * Some easy listening music is suggested while trying to understand this 40129202Scognet * code e.g. Iron Maiden 41129202Scognet * 42129202Scognet * For anyone attempting to understand it : 43129202Scognet * 44129202Scognet * The core code is implemented here with simple stubs for memcpy(). 45129202Scognet * 46129202Scognet * All local labels are prefixed with Lmemcpy_ 47129202Scognet * Following the prefix a label starting f is used in the forward copy code 48129202Scognet * while a label using b is used in the backwards copy code 49129202Scognet * The source and destination addresses determine whether a forward or 50129202Scognet * backward copy is performed. 51129202Scognet * Separate bits of code are used to deal with the following situations 52129202Scognet * for both the forward and backwards copy. 53129202Scognet * unaligned source address 54129202Scognet * unaligned destination address 55129202Scognet * Separate copy routines are used to produce an optimised result for each 56129202Scognet * of these cases. 57129202Scognet * The copy code will use LDM/STM instructions to copy up to 32 bytes at 58129202Scognet * a time where possible. 59129202Scognet * 60129202Scognet * Note: r12 (aka ip) can be trashed during the function along with 61129202Scognet * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. 62129202Scognet * Additional registers are preserved prior to use i.e. r4, r5 & lr 63129202Scognet * 64129202Scognet * Apologies for the state of the comments ;-) 65129202Scognet */ 66129202Scognet/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 67129202ScognetENTRY(memcpy) 68129202Scognet /* save leaf functions having to store this away */ 69129202Scognet stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 70129202Scognet 71129202Scognet subs r2, r2, #4 72129202Scognet blt .Lmemcpy_l4 /* less than 4 bytes */ 73129202Scognet ands r12, r0, #3 74129202Scognet bne .Lmemcpy_destul /* oh unaligned destination addr */ 75129202Scognet ands r12, r1, #3 76129202Scognet bne .Lmemcpy_srcul /* oh unaligned source addr */ 77129202Scognet 78129202Scognet.Lmemcpy_t8: 79129202Scognet /* We have aligned source and destination */ 80129202Scognet subs r2, r2, #8 81129202Scognet blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 82129202Scognet subs r2, r2, #0x14 83129202Scognet blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 84129202Scognet stmdb sp!, {r4} /* borrow r4 */ 85129202Scognet 86129202Scognet /* blat 32 bytes at a time */ 87129202Scognet /* XXX for really big copies perhaps we should use more registers */ 88129202Scognet.Lmemcpy_loop32: 89129202Scognet ldmia r1!, {r3, r4, r12, lr} 90129202Scognet stmia r0!, {r3, r4, r12, lr} 91129202Scognet ldmia r1!, {r3, r4, r12, lr} 92129202Scognet stmia r0!, {r3, r4, r12, lr} 93129202Scognet subs r2, r2, #0x20 94129202Scognet bge .Lmemcpy_loop32 95129202Scognet 96129202Scognet cmn r2, #0x10 97275767Sandrew ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 98275767Sandrew stmiage r0!, {r3, r4, r12, lr} 99129202Scognet subge r2, r2, #0x10 100129202Scognet ldmia sp!, {r4} /* return r4 */ 101129202Scognet 102129202Scognet.Lmemcpy_l32: 103129202Scognet adds r2, r2, #0x14 104129202Scognet 105129202Scognet /* blat 12 bytes at a time */ 106129202Scognet.Lmemcpy_loop12: 107275767Sandrew ldmiage r1!, {r3, r12, lr} 108275767Sandrew stmiage r0!, {r3, r12, lr} 109275767Sandrew subsge r2, r2, #0x0c 110129202Scognet bge .Lmemcpy_loop12 111129202Scognet 112129202Scognet.Lmemcpy_l12: 113129202Scognet adds r2, r2, #8 114129202Scognet blt .Lmemcpy_l4 115129202Scognet 116129202Scognet subs r2, r2, #4 117129202Scognet ldrlt r3, [r1], #4 118129202Scognet strlt r3, [r0], #4 119275767Sandrew ldmiage r1!, {r3, r12} 120275767Sandrew stmiage r0!, {r3, r12} 121129202Scognet subge r2, r2, #4 122129202Scognet 123129202Scognet.Lmemcpy_l4: 124129202Scognet /* less than 4 bytes to go */ 125129202Scognet adds r2, r2, #4 126129202Scognet#ifdef __APCS_26_ 127275767Sandrew ldmiaeq sp!, {r0, pc}^ /* done */ 128129202Scognet#else 129275767Sandrew ldmiaeq sp!, {r0, pc} /* done */ 130129202Scognet#endif 131129202Scognet /* copy the crud byte at a time */ 132129202Scognet cmp r2, #2 133129202Scognet ldrb r3, [r1], #1 134129202Scognet strb r3, [r0], #1 135275767Sandrew ldrbge r3, [r1], #1 136275767Sandrew strbge r3, [r0], #1 137275767Sandrew ldrbgt r3, [r1], #1 138275767Sandrew strbgt r3, [r0], #1 139129202Scognet ldmia sp!, {r0, pc} 140129202Scognet 141129202Scognet /* erg - unaligned destination */ 142129202Scognet.Lmemcpy_destul: 143129202Scognet rsb r12, r12, #4 144129202Scognet cmp r12, #2 145129202Scognet 146129202Scognet /* align destination with byte copies */ 147129202Scognet ldrb r3, [r1], #1 148129202Scognet strb r3, [r0], #1 149275767Sandrew ldrbge r3, [r1], #1 150275767Sandrew strbge r3, [r0], #1 151275767Sandrew ldrbgt r3, [r1], #1 152275767Sandrew strbgt r3, [r0], #1 153129202Scognet subs r2, r2, r12 154129202Scognet blt .Lmemcpy_l4 /* less the 4 bytes */ 155129202Scognet 156129202Scognet ands r12, r1, #3 157129202Scognet beq .Lmemcpy_t8 /* we have an aligned source */ 158129202Scognet 159129202Scognet /* erg - unaligned source */ 160129202Scognet /* This is where it gets nasty ... */ 161129202Scognet.Lmemcpy_srcul: 162129202Scognet bic r1, r1, #3 163129202Scognet ldr lr, [r1], #4 164129202Scognet cmp r12, #2 165129202Scognet bgt .Lmemcpy_srcul3 166129202Scognet beq .Lmemcpy_srcul2 167129202Scognet cmp r2, #0x0c 168129202Scognet blt .Lmemcpy_srcul1loop4 169129202Scognet sub r2, r2, #0x0c 170129202Scognet stmdb sp!, {r4, r5} 171129202Scognet 172129202Scognet.Lmemcpy_srcul1loop16: 173129202Scognet#ifdef __ARMEB__ 174129202Scognet mov r3, lr, lsl #8 175129202Scognet#else 176129202Scognet mov r3, lr, lsr #8 177129202Scognet#endif 178129202Scognet ldmia r1!, {r4, r5, r12, lr} 179129202Scognet#ifdef __ARMEB__ 180129202Scognet orr r3, r3, r4, lsr #24 181129202Scognet mov r4, r4, lsl #8 182129202Scognet orr r4, r4, r5, lsr #24 183129202Scognet mov r5, r5, lsl #8 184129202Scognet orr r5, r5, r12, lsr #24 185129202Scognet mov r12, r12, lsl #8 186129202Scognet orr r12, r12, lr, lsr #24 187129202Scognet#else 188129202Scognet orr r3, r3, r4, lsl #24 189129202Scognet mov r4, r4, lsr #8 190129202Scognet orr r4, r4, r5, lsl #24 191129202Scognet mov r5, r5, lsr #8 192129202Scognet orr r5, r5, r12, lsl #24 193129202Scognet mov r12, r12, lsr #8 194129202Scognet orr r12, r12, lr, lsl #24 195129202Scognet#endif 196129202Scognet stmia r0!, {r3-r5, r12} 197129202Scognet subs r2, r2, #0x10 198129202Scognet bge .Lmemcpy_srcul1loop16 199129202Scognet ldmia sp!, {r4, r5} 200129202Scognet adds r2, r2, #0x0c 201129202Scognet blt .Lmemcpy_srcul1l4 202129202Scognet 203129202Scognet.Lmemcpy_srcul1loop4: 204129202Scognet#ifdef __ARMEB__ 205129202Scognet mov r12, lr, lsl #8 206129202Scognet#else 207129202Scognet mov r12, lr, lsr #8 208129202Scognet#endif 209129202Scognet ldr lr, [r1], #4 210129202Scognet#ifdef __ARMEB__ 211129202Scognet orr r12, r12, lr, lsr #24 212129202Scognet#else 213129202Scognet orr r12, r12, lr, lsl #24 214129202Scognet#endif 215129202Scognet str r12, [r0], #4 216129202Scognet subs r2, r2, #4 217129202Scognet bge .Lmemcpy_srcul1loop4 218129202Scognet 219129202Scognet.Lmemcpy_srcul1l4: 220129202Scognet sub r1, r1, #3 221129202Scognet b .Lmemcpy_l4 222129202Scognet 223129202Scognet.Lmemcpy_srcul2: 224129202Scognet cmp r2, #0x0c 225129202Scognet blt .Lmemcpy_srcul2loop4 226129202Scognet sub r2, r2, #0x0c 227129202Scognet stmdb sp!, {r4, r5} 228129202Scognet 229129202Scognet.Lmemcpy_srcul2loop16: 230129202Scognet#ifdef __ARMEB__ 231129202Scognet mov r3, lr, lsl #16 232129202Scognet#else 233129202Scognet mov r3, lr, lsr #16 234129202Scognet#endif 235129202Scognet ldmia r1!, {r4, r5, r12, lr} 236129202Scognet#ifdef __ARMEB__ 237129202Scognet orr r3, r3, r4, lsr #16 238129202Scognet mov r4, r4, lsl #16 239129202Scognet orr r4, r4, r5, lsr #16 240129202Scognet mov r5, r5, lsl #16 241129202Scognet orr r5, r5, r12, lsr #16 242129202Scognet mov r12, r12, lsl #16 243129202Scognet orr r12, r12, lr, lsr #16 244129202Scognet#else 245129202Scognet orr r3, r3, r4, lsl #16 246129202Scognet mov r4, r4, lsr #16 247129202Scognet orr r4, r4, r5, lsl #16 248129202Scognet mov r5, r5, lsr #16 249129202Scognet orr r5, r5, r12, lsl #16 250129202Scognet mov r12, r12, lsr #16 251129202Scognet orr r12, r12, lr, lsl #16 252129202Scognet#endif 253129202Scognet stmia r0!, {r3-r5, r12} 254129202Scognet subs r2, r2, #0x10 255129202Scognet bge .Lmemcpy_srcul2loop16 256129202Scognet ldmia sp!, {r4, r5} 257129202Scognet adds r2, r2, #0x0c 258129202Scognet blt .Lmemcpy_srcul2l4 259129202Scognet 260129202Scognet.Lmemcpy_srcul2loop4: 261129202Scognet#ifdef __ARMEB__ 262129202Scognet mov r12, lr, lsl #16 263129202Scognet#else 264129202Scognet mov r12, lr, lsr #16 265129202Scognet#endif 266129202Scognet ldr lr, [r1], #4 267129202Scognet#ifdef __ARMEB__ 268129202Scognet orr r12, r12, lr, lsr #16 269129202Scognet#else 270129202Scognet orr r12, r12, lr, lsl #16 271129202Scognet#endif 272129202Scognet str r12, [r0], #4 273129202Scognet subs r2, r2, #4 274129202Scognet bge .Lmemcpy_srcul2loop4 275129202Scognet 276129202Scognet.Lmemcpy_srcul2l4: 277129202Scognet sub r1, r1, #2 278129202Scognet b .Lmemcpy_l4 279129202Scognet 280129202Scognet.Lmemcpy_srcul3: 281129202Scognet cmp r2, #0x0c 282129202Scognet blt .Lmemcpy_srcul3loop4 283129202Scognet sub r2, r2, #0x0c 284129202Scognet stmdb sp!, {r4, r5} 285129202Scognet 286129202Scognet.Lmemcpy_srcul3loop16: 287129202Scognet#ifdef __ARMEB__ 288129202Scognet mov r3, lr, lsl #24 289129202Scognet#else 290129202Scognet mov r3, lr, lsr #24 291129202Scognet#endif 292129202Scognet ldmia r1!, {r4, r5, r12, lr} 293129202Scognet#ifdef __ARMEB__ 294129202Scognet orr r3, r3, r4, lsr #8 295129202Scognet mov r4, r4, lsl #24 296129202Scognet orr r4, r4, r5, lsr #8 297129202Scognet mov r5, r5, lsl #24 298129202Scognet orr r5, r5, r12, lsr #8 299129202Scognet mov r12, r12, lsl #24 300129202Scognet orr r12, r12, lr, lsr #8 301129202Scognet#else 302129202Scognet orr r3, r3, r4, lsl #8 303129202Scognet mov r4, r4, lsr #24 304129202Scognet orr r4, r4, r5, lsl #8 305129202Scognet mov r5, r5, lsr #24 306129202Scognet orr r5, r5, r12, lsl #8 307129202Scognet mov r12, r12, lsr #24 308129202Scognet orr r12, r12, lr, lsl #8 309129202Scognet#endif 310129202Scognet stmia r0!, {r3-r5, r12} 311129202Scognet subs r2, r2, #0x10 312129202Scognet bge .Lmemcpy_srcul3loop16 313129202Scognet ldmia sp!, {r4, r5} 314129202Scognet adds r2, r2, #0x0c 315129202Scognet blt .Lmemcpy_srcul3l4 316129202Scognet 317129202Scognet.Lmemcpy_srcul3loop4: 318129202Scognet#ifdef __ARMEB__ 319129202Scognet mov r12, lr, lsl #24 320129202Scognet#else 321129202Scognet mov r12, lr, lsr #24 322129202Scognet#endif 323129202Scognet ldr lr, [r1], #4 324129202Scognet#ifdef __ARMEB__ 325129202Scognet orr r12, r12, lr, lsr #8 326129202Scognet#else 327129202Scognet orr r12, r12, lr, lsl #8 328129202Scognet#endif 329129202Scognet str r12, [r0], #4 330129202Scognet subs r2, r2, #4 331129202Scognet bge .Lmemcpy_srcul3loop4 332129202Scognet 333129202Scognet.Lmemcpy_srcul3l4: 334129202Scognet sub r1, r1, #1 335129202Scognet b .Lmemcpy_l4 336271337SianEND(memcpy) 337