1/*
2 * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24#include <arm/arch.h>
25
26/*
27 * A reasonably well-optimized bzero/memset. Should work equally well on arm11 and arm9 based
28 * cores.
29 *
30 * The algorithm is to align the destination pointer on a 32 byte boundary and then
31 * blast data 64 bytes at a time, in two stores of 32 bytes per loop.
32 */
33	.text
34	.align 2
35
36	.globl _memset
37/* void *memset(void *ptr, int c, size_t len); */
38_memset:
39	/* move len into r1, unpack c into r2 */
40	mov		r3, r2
41	and		r1, r1, #0xff
42	orr		r1, r1, r1, lsl #8
43	orr		r2, r1, r1, lsl #16
44	mov		r1, r3
45	b		Lbzeroengine
46
47	.globl _bzero
48/* void bzero(void *ptr, size_t len); */
49_bzero:
50	/* zero out r2 so we can be just like memset(0) */
51	mov		r2, #0
52
53Lbzeroengine:
54	/* move the base pointer into r12 and leave r0 alone so that we return the original pointer */
55	mov		r12, r0
56
57	/* copy r2 into r3 for 64-bit stores */
58	mov		r3, r2
59
60	/* check for zero len */
61	cmp		r1, #0
62	bxeq	lr
63
64	/* fall back to a bytewise store for less than 32 bytes */
65	cmp		r1, #32
66	blt		L_bytewise
67
68	/* check for 32 byte unaligned ptr */
69	tst		r12, #0x1f
70	bne		L_unaligned
71
72	/* make sure we have more than 64 bytes to zero */
73	cmp		r1, #64
74	blt		L_lessthan64aligned
75
76	/* >= 64 bytes of len, 32 byte aligned */
77L_64ormorealigned:
78
79	/* we need some registers, avoid r7 (frame pointer) and r9 (thread register) */
80	stmfd	sp!, { r4-r6, r8, r10-r11 }
81	mov		r4, r2
82	mov		r5, r2
83	mov		r6, r2
84	mov		r8, r2
85	mov		r10, r2
86	mov		r11, r2
87
88	/* pre-subtract 64 from the len to avoid an extra compare in the loop */
89	sub		r1, r1, #64
90
91L_64loop:
92	stmia	r12!, { r2-r6, r8, r10-r11 }
93	subs	r1, r1, #64
94	stmia	r12!, { r2-r6, r8, r10-r11 }
95	bge		L_64loop
96
97	/* restore the saved regs */
98	ldmfd	sp!, { r4-r6, r8, r10-r11 }
99
100	/* check for completion (had previously subtracted an extra 64 from len) */
101	adds	r1, r1, #64
102	bxeq	lr
103
104L_lessthan64aligned:
105	/* do we have 16 or more bytes left */
106	cmp		r1, #16
107	stmiage	r12!, { r2-r3 }
108	stmiage	r12!, { r2-r3 }
109	subsge	r1, r1, #16
110	bgt		L_lessthan64aligned
111	bxeq	lr
112
113L_lessthan16aligned:
114	/* store 0 to 15 bytes */
115	mov		r1, r1, lsl #28		/* move the remaining len bits [3:0] to the flags area of cpsr */
116	msr		cpsr_f, r1
117
118	stmiami	r12!, { r2-r3 }		/* n is set, store 8 bytes */
119	streq	r2, [r12], #4		/* z is set, store 4 bytes */
120	strhcs	r2, [r12], #2		/* c is set, store 2 bytes */
121	strbvs	r2, [r12], #1		/* v is set, store 1 byte */
122	bx		lr
123
124L_bytewise:
125	/* bytewise copy, 2 bytes at a time, alignment not guaranteed */
126	subs	r1, r1, #2
127	strb	r2, [r12], #1
128	strbpl	r2, [r12], #1
129	bhi		L_bytewise
130	bx		lr
131
132L_unaligned:
133	/* unaligned on 32 byte boundary, store 1-15 bytes until we're 16 byte aligned */
134	mov		r3, r12, lsl #28
135	rsb     r3, r3, #0x00000000
136	msr		cpsr_f, r3
137
138	strbvs	r2, [r12], #1		/* v is set, unaligned in the 1s column */
139	strhcs	r2, [r12], #2		/* c is set, unaligned in the 2s column */
140	streq	r2, [r12], #4		/* z is set, unaligned in the 4s column */
141	strmi	r2, [r12], #4		/* n is set, unaligned in the 8s column */
142	strmi	r2, [r12], #4
143
144	subs	r1, r1, r3, lsr #28
145	bxeq	lr
146
147	/* we had previously trashed r3, restore it */
148	mov		r3, r2
149
150	/* now make sure we're 32 byte aligned */
151	tst		r12, #(1 << 4)
152	stmiane	r12!, { r2-r3 }
153	stmiane	r12!, { r2-r3 }
154	subsne	r1, r1, #16
155
156	/* we're now aligned, check for >= 64 bytes left */
157	cmp		r1, #64
158	bge		L_64ormorealigned
159	b		L_lessthan64aligned
160