1/* $NetBSD: memcpy_arm.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33__FBSDID("$FreeBSD$"); 34 35.syntax unified 36 37/* 38 * This is one fun bit of code ... 39 * Some easy listening music is suggested while trying to understand this 40 * code e.g. Iron Maiden 41 * 42 * For anyone attempting to understand it : 43 * 44 * The core code is implemented here with simple stubs for memcpy(). 45 * 46 * All local labels are prefixed with Lmemcpy_ 47 * Following the prefix a label starting f is used in the forward copy code 48 * while a label using b is used in the backwards copy code 49 * The source and destination addresses determine whether a forward or 50 * backward copy is performed. 51 * Separate bits of code are used to deal with the following situations 52 * for both the forward and backwards copy. 53 * unaligned source address 54 * unaligned destination address 55 * Separate copy routines are used to produce an optimised result for each 56 * of these cases. 57 * The copy code will use LDM/STM instructions to copy up to 32 bytes at 58 * a time where possible. 59 * 60 * Note: r12 (aka ip) can be trashed during the function along with 61 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. 62 * Additional registers are preserved prior to use i.e. r4, r5 & lr 63 * 64 * Apologies for the state of the comments ;-) 65 */ 66/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 67ENTRY(memcpy) 68 /* save leaf functions having to store this away */ 69 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 70 71 subs r2, r2, #4 72 blt .Lmemcpy_l4 /* less than 4 bytes */ 73 ands r12, r0, #3 74 bne .Lmemcpy_destul /* oh unaligned destination addr */ 75 ands r12, r1, #3 76 bne .Lmemcpy_srcul /* oh unaligned source addr */ 77 78.Lmemcpy_t8: 79 /* We have aligned source and destination */ 80 subs r2, r2, #8 81 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 82 subs r2, r2, #0x14 83 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 84 stmdb sp!, {r4} /* borrow r4 */ 85 86 /* blat 32 bytes at a time */ 87 /* XXX for really big copies perhaps we should use more registers */ 88.Lmemcpy_loop32: 89 ldmia r1!, {r3, r4, r12, lr} 90 stmia r0!, {r3, r4, r12, lr} 91 ldmia r1!, {r3, r4, r12, lr} 92 stmia r0!, {r3, r4, r12, lr} 93 subs r2, r2, #0x20 94 bge .Lmemcpy_loop32 95 96 cmn r2, #0x10 97 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 98 stmiage r0!, {r3, r4, r12, lr} 99 subge r2, r2, #0x10 100 ldmia sp!, {r4} /* return r4 */ 101 102.Lmemcpy_l32: 103 adds r2, r2, #0x14 104 105 /* blat 12 bytes at a time */ 106.Lmemcpy_loop12: 107 ldmiage r1!, {r3, r12, lr} 108 stmiage r0!, {r3, r12, lr} 109 subsge r2, r2, #0x0c 110 bge .Lmemcpy_loop12 111 112.Lmemcpy_l12: 113 adds r2, r2, #8 114 blt .Lmemcpy_l4 115 116 subs r2, r2, #4 117 ldrlt r3, [r1], #4 118 strlt r3, [r0], #4 119 ldmiage r1!, {r3, r12} 120 stmiage r0!, {r3, r12} 121 subge r2, r2, #4 122 123.Lmemcpy_l4: 124 /* less than 4 bytes to go */ 125 adds r2, r2, #4 126#ifdef __APCS_26_ 127 ldmiaeq sp!, {r0, pc}^ /* done */ 128#else 129 ldmiaeq sp!, {r0, pc} /* done */ 130#endif 131 /* copy the crud byte at a time */ 132 cmp r2, #2 133 ldrb r3, [r1], #1 134 strb r3, [r0], #1 135 ldrbge r3, [r1], #1 136 strbge r3, [r0], #1 137 ldrbgt r3, [r1], #1 138 strbgt r3, [r0], #1 139 ldmia sp!, {r0, pc} 140 141 /* erg - unaligned destination */ 142.Lmemcpy_destul: 143 rsb r12, r12, #4 144 cmp r12, #2 145 146 /* align destination with byte copies */ 147 ldrb r3, [r1], #1 148 strb r3, [r0], #1 149 ldrbge r3, [r1], #1 150 strbge r3, [r0], #1 151 ldrbgt r3, [r1], #1 152 strbgt r3, [r0], #1 153 subs r2, r2, r12 154 blt .Lmemcpy_l4 /* less the 4 bytes */ 155 156 ands r12, r1, #3 157 beq .Lmemcpy_t8 /* we have an aligned source */ 158 159 /* erg - unaligned source */ 160 /* This is where it gets nasty ... */ 161.Lmemcpy_srcul: 162 bic r1, r1, #3 163 ldr lr, [r1], #4 164 cmp r12, #2 165 bgt .Lmemcpy_srcul3 166 beq .Lmemcpy_srcul2 167 cmp r2, #0x0c 168 blt .Lmemcpy_srcul1loop4 169 sub r2, r2, #0x0c 170 stmdb sp!, {r4, r5} 171 172.Lmemcpy_srcul1loop16: 173#ifdef __ARMEB__ 174 mov r3, lr, lsl #8 175#else 176 mov r3, lr, lsr #8 177#endif 178 ldmia r1!, {r4, r5, r12, lr} 179#ifdef __ARMEB__ 180 orr r3, r3, r4, lsr #24 181 mov r4, r4, lsl #8 182 orr r4, r4, r5, lsr #24 183 mov r5, r5, lsl #8 184 orr r5, r5, r12, lsr #24 185 mov r12, r12, lsl #8 186 orr r12, r12, lr, lsr #24 187#else 188 orr r3, r3, r4, lsl #24 189 mov r4, r4, lsr #8 190 orr r4, r4, r5, lsl #24 191 mov r5, r5, lsr #8 192 orr r5, r5, r12, lsl #24 193 mov r12, r12, lsr #8 194 orr r12, r12, lr, lsl #24 195#endif 196 stmia r0!, {r3-r5, r12} 197 subs r2, r2, #0x10 198 bge .Lmemcpy_srcul1loop16 199 ldmia sp!, {r4, r5} 200 adds r2, r2, #0x0c 201 blt .Lmemcpy_srcul1l4 202 203.Lmemcpy_srcul1loop4: 204#ifdef __ARMEB__ 205 mov r12, lr, lsl #8 206#else 207 mov r12, lr, lsr #8 208#endif 209 ldr lr, [r1], #4 210#ifdef __ARMEB__ 211 orr r12, r12, lr, lsr #24 212#else 213 orr r12, r12, lr, lsl #24 214#endif 215 str r12, [r0], #4 216 subs r2, r2, #4 217 bge .Lmemcpy_srcul1loop4 218 219.Lmemcpy_srcul1l4: 220 sub r1, r1, #3 221 b .Lmemcpy_l4 222 223.Lmemcpy_srcul2: 224 cmp r2, #0x0c 225 blt .Lmemcpy_srcul2loop4 226 sub r2, r2, #0x0c 227 stmdb sp!, {r4, r5} 228 229.Lmemcpy_srcul2loop16: 230#ifdef __ARMEB__ 231 mov r3, lr, lsl #16 232#else 233 mov r3, lr, lsr #16 234#endif 235 ldmia r1!, {r4, r5, r12, lr} 236#ifdef __ARMEB__ 237 orr r3, r3, r4, lsr #16 238 mov r4, r4, lsl #16 239 orr r4, r4, r5, lsr #16 240 mov r5, r5, lsl #16 241 orr r5, r5, r12, lsr #16 242 mov r12, r12, lsl #16 243 orr r12, r12, lr, lsr #16 244#else 245 orr r3, r3, r4, lsl #16 246 mov r4, r4, lsr #16 247 orr r4, r4, r5, lsl #16 248 mov r5, r5, lsr #16 249 orr r5, r5, r12, lsl #16 250 mov r12, r12, lsr #16 251 orr r12, r12, lr, lsl #16 252#endif 253 stmia r0!, {r3-r5, r12} 254 subs r2, r2, #0x10 255 bge .Lmemcpy_srcul2loop16 256 ldmia sp!, {r4, r5} 257 adds r2, r2, #0x0c 258 blt .Lmemcpy_srcul2l4 259 260.Lmemcpy_srcul2loop4: 261#ifdef __ARMEB__ 262 mov r12, lr, lsl #16 263#else 264 mov r12, lr, lsr #16 265#endif 266 ldr lr, [r1], #4 267#ifdef __ARMEB__ 268 orr r12, r12, lr, lsr #16 269#else 270 orr r12, r12, lr, lsl #16 271#endif 272 str r12, [r0], #4 273 subs r2, r2, #4 274 bge .Lmemcpy_srcul2loop4 275 276.Lmemcpy_srcul2l4: 277 sub r1, r1, #2 278 b .Lmemcpy_l4 279 280.Lmemcpy_srcul3: 281 cmp r2, #0x0c 282 blt .Lmemcpy_srcul3loop4 283 sub r2, r2, #0x0c 284 stmdb sp!, {r4, r5} 285 286.Lmemcpy_srcul3loop16: 287#ifdef __ARMEB__ 288 mov r3, lr, lsl #24 289#else 290 mov r3, lr, lsr #24 291#endif 292 ldmia r1!, {r4, r5, r12, lr} 293#ifdef __ARMEB__ 294 orr r3, r3, r4, lsr #8 295 mov r4, r4, lsl #24 296 orr r4, r4, r5, lsr #8 297 mov r5, r5, lsl #24 298 orr r5, r5, r12, lsr #8 299 mov r12, r12, lsl #24 300 orr r12, r12, lr, lsr #8 301#else 302 orr r3, r3, r4, lsl #8 303 mov r4, r4, lsr #24 304 orr r4, r4, r5, lsl #8 305 mov r5, r5, lsr #24 306 orr r5, r5, r12, lsl #8 307 mov r12, r12, lsr #24 308 orr r12, r12, lr, lsl #8 309#endif 310 stmia r0!, {r3-r5, r12} 311 subs r2, r2, #0x10 312 bge .Lmemcpy_srcul3loop16 313 ldmia sp!, {r4, r5} 314 adds r2, r2, #0x0c 315 blt .Lmemcpy_srcul3l4 316 317.Lmemcpy_srcul3loop4: 318#ifdef __ARMEB__ 319 mov r12, lr, lsl #24 320#else 321 mov r12, lr, lsr #24 322#endif 323 ldr lr, [r1], #4 324#ifdef __ARMEB__ 325 orr r12, r12, lr, lsr #8 326#else 327 orr r12, r12, lr, lsl #8 328#endif 329 str r12, [r0], #4 330 subs r2, r2, #4 331 bge .Lmemcpy_srcul3loop4 332 333.Lmemcpy_srcul3l4: 334 sub r1, r1, #1 335 b .Lmemcpy_l4 336END(memcpy) 337 338 .section .note.GNU-stack,"",%progbits 339