memcpy_arm.S revision 129202
1/* $NetBSD: memcpy_arm.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39#include <machine/asm.h> 40__FBSDID("$FreeBSD: head/lib/libc/arm/string/memcpy_arm.S 129202 2004-05-14 12:04:31Z cognet $"); 41/* 42 * This is one fun bit of code ... 43 * Some easy listening music is suggested while trying to understand this 44 * code e.g. Iron Maiden 45 * 46 * For anyone attempting to understand it : 47 * 48 * The core code is implemented here with simple stubs for memcpy(). 49 * 50 * All local labels are prefixed with Lmemcpy_ 51 * Following the prefix a label starting f is used in the forward copy code 52 * while a label using b is used in the backwards copy code 53 * The source and destination addresses determine whether a forward or 54 * backward copy is performed. 55 * Separate bits of code are used to deal with the following situations 56 * for both the forward and backwards copy. 57 * unaligned source address 58 * unaligned destination address 59 * Separate copy routines are used to produce an optimised result for each 60 * of these cases. 61 * The copy code will use LDM/STM instructions to copy up to 32 bytes at 62 * a time where possible. 63 * 64 * Note: r12 (aka ip) can be trashed during the function along with 65 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. 66 * Additional registers are preserved prior to use i.e. r4, r5 & lr 67 * 68 * Apologies for the state of the comments ;-) 69 */ 70/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 71ENTRY(memcpy) 72 /* save leaf functions having to store this away */ 73 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 74 75 subs r2, r2, #4 76 blt .Lmemcpy_l4 /* less than 4 bytes */ 77 ands r12, r0, #3 78 bne .Lmemcpy_destul /* oh unaligned destination addr */ 79 ands r12, r1, #3 80 bne .Lmemcpy_srcul /* oh unaligned source addr */ 81 82.Lmemcpy_t8: 83 /* We have aligned source and destination */ 84 subs r2, r2, #8 85 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 86 subs r2, r2, #0x14 87 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 88 stmdb sp!, {r4} /* borrow r4 */ 89 90 /* blat 32 bytes at a time */ 91 /* XXX for really big copies perhaps we should use more registers */ 92.Lmemcpy_loop32: 93 ldmia r1!, {r3, r4, r12, lr} 94 stmia r0!, {r3, r4, r12, lr} 95 ldmia r1!, {r3, r4, r12, lr} 96 stmia r0!, {r3, r4, r12, lr} 97 subs r2, r2, #0x20 98 bge .Lmemcpy_loop32 99 100 cmn r2, #0x10 101 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 102 stmgeia r0!, {r3, r4, r12, lr} 103 subge r2, r2, #0x10 104 ldmia sp!, {r4} /* return r4 */ 105 106.Lmemcpy_l32: 107 adds r2, r2, #0x14 108 109 /* blat 12 bytes at a time */ 110.Lmemcpy_loop12: 111 ldmgeia r1!, {r3, r12, lr} 112 stmgeia r0!, {r3, r12, lr} 113 subges r2, r2, #0x0c 114 bge .Lmemcpy_loop12 115 116.Lmemcpy_l12: 117 adds r2, r2, #8 118 blt .Lmemcpy_l4 119 120 subs r2, r2, #4 121 ldrlt r3, [r1], #4 122 strlt r3, [r0], #4 123 ldmgeia r1!, {r3, r12} 124 stmgeia r0!, {r3, r12} 125 subge r2, r2, #4 126 127.Lmemcpy_l4: 128 /* less than 4 bytes to go */ 129 adds r2, r2, #4 130#ifdef __APCS_26_ 131 ldmeqia sp!, {r0, pc}^ /* done */ 132#else 133 ldmeqia sp!, {r0, pc} /* done */ 134#endif 135 /* copy the crud byte at a time */ 136 cmp r2, #2 137 ldrb r3, [r1], #1 138 strb r3, [r0], #1 139 ldrgeb r3, [r1], #1 140 strgeb r3, [r0], #1 141 ldrgtb r3, [r1], #1 142 strgtb r3, [r0], #1 143 ldmia sp!, {r0, pc} 144 145 /* erg - unaligned destination */ 146.Lmemcpy_destul: 147 rsb r12, r12, #4 148 cmp r12, #2 149 150 /* align destination with byte copies */ 151 ldrb r3, [r1], #1 152 strb r3, [r0], #1 153 ldrgeb r3, [r1], #1 154 strgeb r3, [r0], #1 155 ldrgtb r3, [r1], #1 156 strgtb r3, [r0], #1 157 subs r2, r2, r12 158 blt .Lmemcpy_l4 /* less the 4 bytes */ 159 160 ands r12, r1, #3 161 beq .Lmemcpy_t8 /* we have an aligned source */ 162 163 /* erg - unaligned source */ 164 /* This is where it gets nasty ... */ 165.Lmemcpy_srcul: 166 bic r1, r1, #3 167 ldr lr, [r1], #4 168 cmp r12, #2 169 bgt .Lmemcpy_srcul3 170 beq .Lmemcpy_srcul2 171 cmp r2, #0x0c 172 blt .Lmemcpy_srcul1loop4 173 sub r2, r2, #0x0c 174 stmdb sp!, {r4, r5} 175 176.Lmemcpy_srcul1loop16: 177#ifdef __ARMEB__ 178 mov r3, lr, lsl #8 179#else 180 mov r3, lr, lsr #8 181#endif 182 ldmia r1!, {r4, r5, r12, lr} 183#ifdef __ARMEB__ 184 orr r3, r3, r4, lsr #24 185 mov r4, r4, lsl #8 186 orr r4, r4, r5, lsr #24 187 mov r5, r5, lsl #8 188 orr r5, r5, r12, lsr #24 189 mov r12, r12, lsl #8 190 orr r12, r12, lr, lsr #24 191#else 192 orr r3, r3, r4, lsl #24 193 mov r4, r4, lsr #8 194 orr r4, r4, r5, lsl #24 195 mov r5, r5, lsr #8 196 orr r5, r5, r12, lsl #24 197 mov r12, r12, lsr #8 198 orr r12, r12, lr, lsl #24 199#endif 200 stmia r0!, {r3-r5, r12} 201 subs r2, r2, #0x10 202 bge .Lmemcpy_srcul1loop16 203 ldmia sp!, {r4, r5} 204 adds r2, r2, #0x0c 205 blt .Lmemcpy_srcul1l4 206 207.Lmemcpy_srcul1loop4: 208#ifdef __ARMEB__ 209 mov r12, lr, lsl #8 210#else 211 mov r12, lr, lsr #8 212#endif 213 ldr lr, [r1], #4 214#ifdef __ARMEB__ 215 orr r12, r12, lr, lsr #24 216#else 217 orr r12, r12, lr, lsl #24 218#endif 219 str r12, [r0], #4 220 subs r2, r2, #4 221 bge .Lmemcpy_srcul1loop4 222 223.Lmemcpy_srcul1l4: 224 sub r1, r1, #3 225 b .Lmemcpy_l4 226 227.Lmemcpy_srcul2: 228 cmp r2, #0x0c 229 blt .Lmemcpy_srcul2loop4 230 sub r2, r2, #0x0c 231 stmdb sp!, {r4, r5} 232 233.Lmemcpy_srcul2loop16: 234#ifdef __ARMEB__ 235 mov r3, lr, lsl #16 236#else 237 mov r3, lr, lsr #16 238#endif 239 ldmia r1!, {r4, r5, r12, lr} 240#ifdef __ARMEB__ 241 orr r3, r3, r4, lsr #16 242 mov r4, r4, lsl #16 243 orr r4, r4, r5, lsr #16 244 mov r5, r5, lsl #16 245 orr r5, r5, r12, lsr #16 246 mov r12, r12, lsl #16 247 orr r12, r12, lr, lsr #16 248#else 249 orr r3, r3, r4, lsl #16 250 mov r4, r4, lsr #16 251 orr r4, r4, r5, lsl #16 252 mov r5, r5, lsr #16 253 orr r5, r5, r12, lsl #16 254 mov r12, r12, lsr #16 255 orr r12, r12, lr, lsl #16 256#endif 257 stmia r0!, {r3-r5, r12} 258 subs r2, r2, #0x10 259 bge .Lmemcpy_srcul2loop16 260 ldmia sp!, {r4, r5} 261 adds r2, r2, #0x0c 262 blt .Lmemcpy_srcul2l4 263 264.Lmemcpy_srcul2loop4: 265#ifdef __ARMEB__ 266 mov r12, lr, lsl #16 267#else 268 mov r12, lr, lsr #16 269#endif 270 ldr lr, [r1], #4 271#ifdef __ARMEB__ 272 orr r12, r12, lr, lsr #16 273#else 274 orr r12, r12, lr, lsl #16 275#endif 276 str r12, [r0], #4 277 subs r2, r2, #4 278 bge .Lmemcpy_srcul2loop4 279 280.Lmemcpy_srcul2l4: 281 sub r1, r1, #2 282 b .Lmemcpy_l4 283 284.Lmemcpy_srcul3: 285 cmp r2, #0x0c 286 blt .Lmemcpy_srcul3loop4 287 sub r2, r2, #0x0c 288 stmdb sp!, {r4, r5} 289 290.Lmemcpy_srcul3loop16: 291#ifdef __ARMEB__ 292 mov r3, lr, lsl #24 293#else 294 mov r3, lr, lsr #24 295#endif 296 ldmia r1!, {r4, r5, r12, lr} 297#ifdef __ARMEB__ 298 orr r3, r3, r4, lsr #8 299 mov r4, r4, lsl #24 300 orr r4, r4, r5, lsr #8 301 mov r5, r5, lsl #24 302 orr r5, r5, r12, lsr #8 303 mov r12, r12, lsl #24 304 orr r12, r12, lr, lsr #8 305#else 306 orr r3, r3, r4, lsl #8 307 mov r4, r4, lsr #24 308 orr r4, r4, r5, lsl #8 309 mov r5, r5, lsr #24 310 orr r5, r5, r12, lsl #8 311 mov r12, r12, lsr #24 312 orr r12, r12, lr, lsl #8 313#endif 314 stmia r0!, {r3-r5, r12} 315 subs r2, r2, #0x10 316 bge .Lmemcpy_srcul3loop16 317 ldmia sp!, {r4, r5} 318 adds r2, r2, #0x0c 319 blt .Lmemcpy_srcul3l4 320 321.Lmemcpy_srcul3loop4: 322#ifdef __ARMEB__ 323 mov r12, lr, lsl #24 324#else 325 mov r12, lr, lsr #24 326#endif 327 ldr lr, [r1], #4 328#ifdef __ARMEB__ 329 orr r12, r12, lr, lsr #8 330#else 331 orr r12, r12, lr, lsl #8 332#endif 333 str r12, [r0], #4 334 subs r2, r2, #4 335 bge .Lmemcpy_srcul3loop4 336 337.Lmemcpy_srcul3l4: 338 sub r1, r1, #1 339 b .Lmemcpy_l4 340