1/* 2 * Copyright (c) 2006, 2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24#include <arm/arch.h> 25 26/* 27 * A reasonably well-optimized bzero/memset. Should work equally well on arm11 and arm9 based 28 * cores. 29 * 30 * The algorithm is to align the destination pointer on a 32 byte boundary and then 31 * blast data 64 bytes at a time, in two stores of 32 bytes per loop. 32 */ 33 .text 34 .align 2 35 36 .globl _memset 37/* void *memset(void *ptr, int c, size_t len); */ 38_memset: 39 /* move len into r1, unpack c into r2 */ 40 mov r3, r2 41 and r1, r1, #0xff 42 orr r1, r1, r1, lsl #8 43 orr r2, r1, r1, lsl #16 44 mov r1, r3 45 b Lbzeroengine 46 47 .globl _bzero 48/* void bzero(void *ptr, size_t len); */ 49_bzero: 50 /* zero out r2 so we can be just like memset(0) */ 51 mov r2, #0 52 53Lbzeroengine: 54 /* move the base pointer into r12 and leave r0 alone so that we return the original pointer */ 55 mov r12, r0 56 57 /* copy r2 into r3 for 64-bit stores */ 58 mov r3, r2 59 60 /* check for zero len */ 61 cmp r1, #0 62 bxeq lr 63 64 /* fall back to a bytewise store for less than 32 bytes */ 65 cmp r1, #32 66 blt L_bytewise 67 68 /* check for 32 byte unaligned ptr */ 69 tst r12, #0x1f 70 bne L_unaligned 71 72 /* make sure we have more than 64 bytes to zero */ 73 cmp r1, #64 74 blt L_lessthan64aligned 75 76 /* >= 64 bytes of len, 32 byte aligned */ 77L_64ormorealigned: 78 79 /* we need some registers, avoid r7 (frame pointer) and r9 (thread register) */ 80 stmfd sp!, { r4-r6, r8, r10-r11 } 81 mov r4, r2 82 mov r5, r2 83 mov r6, r2 84 mov r8, r2 85 mov r10, r2 86 mov r11, r2 87 88 /* pre-subtract 64 from the len to avoid an extra compare in the loop */ 89 sub r1, r1, #64 90 91L_64loop: 92 stmia r12!, { r2-r6, r8, r10-r11 } 93 subs r1, r1, #64 94 stmia r12!, { r2-r6, r8, r10-r11 } 95 bge L_64loop 96 97 /* restore the saved regs */ 98 ldmfd sp!, { r4-r6, r8, r10-r11 } 99 100 /* check for completion (had previously subtracted an extra 64 from len) */ 101 adds r1, r1, #64 102 bxeq lr 103 104L_lessthan64aligned: 105 /* do we have 16 or more bytes left */ 106 cmp r1, #16 107 stmiage r12!, { r2-r3 } 108 stmiage r12!, { r2-r3 } 109 subsge r1, r1, #16 110 bgt L_lessthan64aligned 111 bxeq lr 112 113L_lessthan16aligned: 114 /* store 0 to 15 bytes */ 115 mov r1, r1, lsl #28 /* move the remaining len bits [3:0] to the flags area of cpsr */ 116 msr cpsr_f, r1 117 118 stmiami r12!, { r2-r3 } /* n is set, store 8 bytes */ 119 streq r2, [r12], #4 /* z is set, store 4 bytes */ 120 strhcs r2, [r12], #2 /* c is set, store 2 bytes */ 121 strbvs r2, [r12], #1 /* v is set, store 1 byte */ 122 bx lr 123 124L_bytewise: 125 /* bytewise copy, 2 bytes at a time, alignment not guaranteed */ 126 subs r1, r1, #2 127 strb r2, [r12], #1 128 strbpl r2, [r12], #1 129 bhi L_bytewise 130 bx lr 131 132L_unaligned: 133 /* unaligned on 32 byte boundary, store 1-15 bytes until we're 16 byte aligned */ 134 mov r3, r12, lsl #28 135 rsb r3, r3, #0x00000000 136 msr cpsr_f, r3 137 138 strbvs r2, [r12], #1 /* v is set, unaligned in the 1s column */ 139 strhcs r2, [r12], #2 /* c is set, unaligned in the 2s column */ 140 streq r2, [r12], #4 /* z is set, unaligned in the 4s column */ 141 strmi r2, [r12], #4 /* n is set, unaligned in the 8s column */ 142 strmi r2, [r12], #4 143 144 subs r1, r1, r3, lsr #28 145 bxeq lr 146 147 /* we had previously trashed r3, restore it */ 148 mov r3, r2 149 150 /* now make sure we're 32 byte aligned */ 151 tst r12, #(1 << 4) 152 stmiane r12!, { r2-r3 } 153 stmiane r12!, { r2-r3 } 154 subsne r1, r1, #16 155 156 /* we're now aligned, check for >= 64 bytes left */ 157 cmp r1, #64 158 bge L_64ormorealigned 159 b L_lessthan64aligned 160