1/* 2 * Copyright (c) 2006, 2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24/***************************************************************************** 25 * ARMv5 and ARMv6 implementation, also used in dyld on later archs * 26 *****************************************************************************/ 27 28#include <arm/arch.h> 29 30.text 31.align 2 32 33 .globl _memcpy 34 .globl _bcopy 35 .globl _memmove 36 37_bcopy: /* void bcopy(const void *src, void *dest, size_t len); */ 38 mov r3, r0 39 mov r0, r1 40 mov r1, r3 41 42_memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */ 43 /* check for zero len or if the pointers are the same */ 44 cmp r2, #0 45 cmpne r0, r1 46 bxeq lr 47 48 /* save r0 (return value), r4 (scratch), and r5 (scratch) */ 49 stmfd sp!, { r0, r4, r5, r7, lr } 50 add r7, sp, #12 51 52 /* check for overlap. r3 <- distance between src & dest */ 53 subhs r3, r0, r1 54 sublo r3, r1, r0 55 cmp r3, r2 /* if distance(src, dest) < len, we have overlap */ 56 blo Loverlap 57 58Lnormalforwardcopy: 59 /* are src and dest dissimilarly word aligned? */ 60 mov r12, r0, lsl #30 61 cmp r12, r1, lsl #30 62 bne Lnonwordaligned_forward 63 64 /* if len < 64, do a quick forward copy */ 65 cmp r2, #64 66 blt Lsmallforwardcopy 67 68 /* check for 16 byte src/dest unalignment */ 69 tst r0, #0xf 70 bne Lsimilarlyunaligned 71 72 /* check for 32 byte dest unalignment */ 73 tst r0, #(1<<4) 74 bne Lunaligned_32 75 76Lmorethan64_aligned: 77 /* save some more registers to use in the copy */ 78 stmfd sp!, { r6, r8, r10, r11 } 79 80 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ 81 sub r2, r2, #64 82 83L64loop: 84 /* copy 64 bytes at a time */ 85 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } 86#ifdef _ARM_ARCH_6 87 pld [r1, #32] 88#endif 89 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } 90 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } 91 subs r2, r2, #64 92#ifdef _ARM_ARCH_6 93 pld [r1, #32] 94#endif 95 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } 96 bge L64loop 97 98 /* restore the scratch registers we just saved */ 99 ldmfd sp!, { r6, r8, r10, r11 } 100 101 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ 102 adds r2, r2, #64 103 beq Lexit 104 105Llessthan64_aligned: 106 /* copy 16 bytes at a time until we have < 16 bytes */ 107 cmp r2, #16 108 ldmiage r1!, { r3, r4, r5, r12 } 109 stmiage r0!, { r3, r4, r5, r12 } 110 subsge r2, r2, #16 111 bgt Llessthan64_aligned 112 beq Lexit 113 114Llessthan16_aligned: 115 mov r2, r2, lsl #28 116 msr cpsr_f, r2 117 118 ldmiami r1!, { r2, r3 } 119 ldreq r4, [r1], #4 120 ldrhcs r5, [r1], #2 121 ldrbvs r12, [r1], #1 122 123 stmiami r0!, { r2, r3 } 124 streq r4, [r0], #4 125 strhcs r5, [r0], #2 126 strbvs r12, [r0], #1 127 b Lexit 128 129Lsimilarlyunaligned: 130 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ 131 mov r12, r0, lsl #28 132 rsb r12, r12, #0 133 msr cpsr_f, r12 134 135 ldrbvs r3, [r1], #1 136 ldrhcs r4, [r1], #2 137 ldreq r5, [r1], #4 138 139 strbvs r3, [r0], #1 140 strhcs r4, [r0], #2 141 streq r5, [r0], #4 142 143 ldmiami r1!, { r3, r4 } 144 stmiami r0!, { r3, r4 } 145 146 subs r2, r2, r12, lsr #28 147 beq Lexit 148 149Lunaligned_32: 150 /* bring up to dest 32 byte alignment */ 151 tst r0, #(1 << 4) 152 ldmiane r1!, { r3, r4, r5, r12 } 153 stmiane r0!, { r3, r4, r5, r12 } 154 subne r2, r2, #16 155 156 /* we should now be aligned, see what copy method we should use */ 157 cmp r2, #64 158 bge Lmorethan64_aligned 159 b Llessthan64_aligned 160 161Lbytewise2: 162 /* copy 2 bytes at a time */ 163 subs r2, r2, #2 164 165 ldrb r3, [r1], #1 166 ldrbpl r4, [r1], #1 167 168 strb r3, [r0], #1 169 strbpl r4, [r0], #1 170 171 bhi Lbytewise2 172 b Lexit 173 174Lbytewise: 175 /* simple bytewise forward copy */ 176 ldrb r3, [r1], #1 177 subs r2, r2, #1 178 strb r3, [r0], #1 179 bne Lbytewise 180 b Lexit 181 182Lsmallforwardcopy: 183 /* src and dest are word aligned similarly, less than 64 bytes to copy */ 184 cmp r2, #4 185 blt Lbytewise2 186 187 /* bytewise copy until word aligned */ 188 tst r1, #3 189Lwordalignloop: 190 ldrbne r3, [r1], #1 191 strbne r3, [r0], #1 192 subne r2, r2, #1 193 tstne r1, #3 194 bne Lwordalignloop 195 196 cmp r2, #16 197 bge Llessthan64_aligned 198 blt Llessthan16_aligned 199 200Loverlap: 201 /* src and dest overlap in some way, len > 0 */ 202 cmp r0, r1 /* if dest > src */ 203 bhi Loverlap_srclower 204 205Loverlap_destlower: 206 /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */ 207 cmp r3, #64 208 bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */ 209 210 cmp r3, #2 211 bge Lbytewise2 212 b Lbytewise 213 214 /* the following routines deal with having to copy in the reverse direction */ 215Loverlap_srclower: 216 /* src < dest, with overlap */ 217 218 /* src += len; dest += len; */ 219 add r0, r0, r2 220 add r1, r1, r2 221 222 /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */ 223 cmp r2, #64 /* less than 64 bytes to copy? */ 224 cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */ 225 blt Lbytewise_reverse 226 227 /* test of src and dest are nonword aligned differently */ 228 mov r3, r0, lsl #30 229 cmp r3, r1, lsl #30 230 bne Lbytewise_reverse 231 232 /* test if src and dest are non word aligned or dest is non 16 byte aligned */ 233 tst r0, #0xf 234 bne Lunaligned_reverse_similarly 235 236 /* test for dest 32 byte alignment */ 237 tst r0, #(1<<4) 238 bne Lunaligned_32_reverse_similarly 239 240 /* 64 byte reverse block copy, src and dest aligned */ 241Lmorethan64_aligned_reverse: 242 /* save some more registers to use in the copy */ 243 stmfd sp!, { r6, r8, r10, r11 } 244 245 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ 246 sub r2, r2, #64 247 248L64loop_reverse: 249 /* copy 64 bytes at a time */ 250 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } 251#ifdef _ARM_ARCH_6 252 pld [r1, #-32] 253#endif 254 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } 255 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } 256 subs r2, r2, #64 257#ifdef _ARM_ARCH_6 258 pld [r1, #-32] 259#endif 260 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } 261 bge L64loop_reverse 262 263 /* restore the scratch registers we just saved */ 264 ldmfd sp!, { r6, r8, r10, r11 } 265 266 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ 267 adds r2, r2, #64 268 beq Lexit 269 270Lbytewise_reverse: 271 ldrb r3, [r1, #-1]! 272 strb r3, [r0, #-1]! 273 subs r2, r2, #1 274 bne Lbytewise_reverse 275 b Lexit 276 277Lunaligned_reverse_similarly: 278 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ 279 mov r12, r0, lsl #28 280 msr cpsr_f, r12 281 282 ldrbvs r3, [r1, #-1]! 283 ldrhcs r4, [r1, #-2]! 284 ldreq r5, [r1, #-4]! 285 286 strbvs r3, [r0, #-1]! 287 strhcs r4, [r0, #-2]! 288 streq r5, [r0, #-4]! 289 290 ldmdbmi r1!, { r3, r4 } 291 stmdbmi r0!, { r3, r4 } 292 293 subs r2, r2, r12, lsr #28 294 beq Lexit 295 296Lunaligned_32_reverse_similarly: 297 /* bring up to dest 32 byte alignment */ 298 tst r0, #(1 << 4) 299 ldmdbne r1!, { r3, r4, r5, r12 } 300 stmdbne r0!, { r3, r4, r5, r12 } 301 subne r2, r2, #16 302 303 /* we should now be aligned, see what copy method we should use */ 304 cmp r2, #64 305 bge Lmorethan64_aligned_reverse 306 b Lbytewise_reverse 307 308 /* the following routines deal with non word aligned copies */ 309Lnonwordaligned_forward: 310 cmp r2, #8 311 blt Lbytewise2 /* not worth the effort with less than 24 bytes total */ 312 313 /* bytewise copy until src word aligned */ 314 tst r1, #3 315Lwordalignloop2: 316 ldrbne r3, [r1], #1 317 strbne r3, [r0], #1 318 subne r2, r2, #1 319 tstne r1, #3 320 bne Lwordalignloop2 321 322 /* figure out how the src and dest are unaligned */ 323 and r3, r0, #3 324 cmp r3, #2 325 blt Lalign1_forward 326 beq Lalign2_forward 327 bgt Lalign3_forward 328 329Lalign1_forward: 330 /* the dest pointer is 1 byte off from src */ 331 mov r12, r2, lsr #2 /* number of words we should copy */ 332 sub r0, r0, #1 333 334 /* prime the copy */ 335 ldrb r4, [r0] /* load D[7:0] */ 336 337Lalign1_forward_loop: 338 ldr r3, [r1], #4 /* load S */ 339 orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */ 340 str r4, [r0], #4 /* save D */ 341 mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */ 342 subs r12, r12, #1 343 bne Lalign1_forward_loop 344 345 /* finish the copy off */ 346 strb r4, [r0], #1 /* save D[7:0] */ 347 348 ands r2, r2, #3 349 beq Lexit 350 b Lbytewise2 351 352Lalign2_forward: 353 /* the dest pointer is 2 bytes off from src */ 354 mov r12, r2, lsr #2 /* number of words we should copy */ 355 sub r0, r0, #2 356 357 /* prime the copy */ 358 ldrh r4, [r0] /* load D[15:0] */ 359 360Lalign2_forward_loop: 361 ldr r3, [r1], #4 /* load S */ 362 orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */ 363 str r4, [r0], #4 /* save D */ 364 mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */ 365 subs r12, r12, #1 366 bne Lalign2_forward_loop 367 368 /* finish the copy off */ 369 strh r4, [r0], #2 /* save D[15:0] */ 370 371 ands r2, r2, #3 372 beq Lexit 373 b Lbytewise2 374 375Lalign3_forward: 376 /* the dest pointer is 3 bytes off from src */ 377 mov r12, r2, lsr #2 /* number of words we should copy */ 378 sub r0, r0, #3 379 380 /* prime the copy */ 381 ldr r4, [r0] 382 and r4, r4, #0x00ffffff /* load D[24:0] */ 383 384Lalign3_forward_loop: 385 ldr r3, [r1], #4 /* load S */ 386 orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */ 387 str r4, [r0], #4 /* save D */ 388 mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */ 389 subs r12, r12, #1 390 bne Lalign3_forward_loop 391 392 /* finish the copy off */ 393 strh r4, [r0], #2 /* save D[15:0] */ 394 mov r4, r4, lsr #16 395 strb r4, [r0], #1 /* save D[23:16] */ 396 397 ands r2, r2, #3 398 beq Lexit 399 b Lbytewise2 400 401Lexit: 402 ldmfd sp!, {r0, r4, r5, r7, pc} 403 404