1/*	$NetBSD: bzero.S,v 1.10 2011/01/19 02:47:01 matt Exp $ */
2
3/*-
4 * Copyright (C) 2001	Martin J. Laubach <mjl@NetBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29/*----------------------------------------------------------------------*/
30
31#include <machine/asm.h>
32
33
34#if defined(LIBC_SCCS) && !defined(lint)
35__RCSID("$NetBSD: bzero.S,v 1.10 2011/01/19 02:47:01 matt Exp $")
36#endif /* LIBC_SCCS && !lint */
37
38#ifdef _KERNEL
39#include <assym.h>
40#endif
41
42#define USE_STSWX 0	/* don't. slower than trivial copy loop */
43
44/*----------------------------------------------------------------------*/
45/*
46     void bzero(void *b %r3, size_t len %r4);
47     void * memset(void *b %r3, int c %r4, size_t len %r5);
48*/
49/*----------------------------------------------------------------------*/
50
51#define r_dst	%r3
52#define r_len	%r4
53#define r_val	%r0
54
55		.text
56		.align 4
57ENTRY(bzero)
58		li	r_val, 0		/* Value to stuff in */
59		b	cb_memset
60END(bzero)
61
62ENTRY(memset)
63		cmplwi	%cr1, %r5, 0
64		mr.	%r0, %r4
65		mr	%r8, %r3
66		beqlr-	%cr1			/* Nothing to do */
67
68		rlwimi	%r0, %r4, 8, 16, 23	/* word extend fill value */
69		rlwimi	%r0, %r0, 16, 0, 15
70		mr	%r4, %r5
71		bne-	simple_fill		/* =! 0, use trivial fill */
72cb_memset:
73
74/*----------------------------------------------------------------------*/
75#ifndef _KERNEL
76		/* First find out cache line size */
77		mflr	%r9
78#ifdef PIC
79		PIC_GOTSETUP(%r10)
80		mtlr	%r9
81		lwz	%r5,cache_info@got(%r10)
82#else
83		lis	%r5,cache_info@h
84		ori	%r5,%r5,cache_info@l
85#endif
86		lwz	%r6, 4(%r5)
87		cmpwi	%r6, -1
88		bne+	cb_cacheline_known
89
90/*----------------------------------------------------------------------*/
91#define CTL_MACHDEP	7
92#define CPU_CACHELINE	1
93#define	CPU_CACHEINFO	5
94
95#define STKFRAME_SZ	64
96#define MIB		8
97#define OLDPLEN		16
98#define R3_SAVE		20
99#define R4_SAVE		24
100#define R0_SAVE		28
101#define R8_SAVE		32
102#define R31_SAVE	36
103#ifdef PIC
104#define R30_SAVE	40
105#endif
106
107		stw	%r9, 4(%r1)
108		stwu	%r1, -STKFRAME_SZ(%r1)
109
110		stw	%r31, R31_SAVE(%r1)
111		mr	%r31, %r5		/* cache info */
112
113#ifdef PIC
114		stw	%r30, R30_SAVE(%r1)
115		PIC_TOCSETUP(cb_memset,%r30)
116#endif
117
118		stw	%r8, R8_SAVE(%r1)
119		stw	%r3, R3_SAVE(%r1)
120		stw	%r4, R4_SAVE(%r1)
121		stw	%r0, R0_SAVE(%r1)
122
123		li	%r0, CTL_MACHDEP		/* Construct MIB */
124		stw	%r0, MIB(%r1)
125		li	%r0, CPU_CACHEINFO
126		stw	%r0, MIB+4(%r1)
127
128		li	%r0, 4*4			/* Oldlenp := 4*4 */
129		stw	%r0, OLDPLEN(%r1)
130
131		addi	%r3, %r1, MIB
132		li	%r4, 2			/* namelen */
133		/* %r5 already contains &cache_info */
134		addi	%r6, %r1, OLDPLEN
135		li	%r7, 0
136		li	%r8, 0
137		bl	PIC_PLT(_C_LABEL(sysctl))
138
139		cmpwi	%r3, 0			/* Check result */
140		beq	1f
141
142		/* Failure, try older sysctl */
143
144		li	%r0, CTL_MACHDEP	/* Construct MIB */
145		stw	%r0, MIB(%r1)
146		li	%r0, CPU_CACHELINE
147		stw	%r0, MIB+4(%r1)
148
149		li	%r0, 4			/* Oldlenp := 4 */
150		stw	%r0, OLDPLEN(%r1)
151
152		addi	%r3, %r1, MIB
153		li	%r4, 2			/* namelen */
154		addi	%r5, %r31, 4
155		addi	%r6, %r1, OLDPLEN
156		li	%r7, 0
157		li	%r8, 0
158		bl	PIC_PLT(_C_LABEL(sysctl))
1591:
160		lwz	%r3, R3_SAVE(%r1)
161		lwz	%r4, R4_SAVE(%r1)
162		lwz	%r8, R8_SAVE(%r1)
163		lwz	%r0, R0_SAVE(%r1)
164		lwz	%r9, 4(%r31)
165		lwz	%r31, R31_SAVE(%r1)
166#ifdef PIC
167		lwz	%r30, R30_SAVE(%r1)
168#endif
169		addi	%r1, %r1, STKFRAME_SZ
170		lwz	%r7, 4(%r1)
171		mtlr	%r7
172
173		cntlzw	%r6, %r9			/* compute shift value */
174		li	%r5, 31
175		subf	%r5, %r6, %r5
176
177#ifdef PIC
178		mflr	%r9
179		PIC_GOTSETUP(%r10)
180		mtlr	%r9
181		lwz	%r6, cache_sh@got(%r10)
182		stw	%r5, 0(%r6)
183#else
184		lis	%r6, cache_sh@ha
185		stw	%r5, cache_sh@l(%r6)
186#endif
187/*----------------------------------------------------------------------*/
188/* Okay, we know the cache line size (%r9) and shift value (%r10) */
189cb_cacheline_known:
190#ifdef PIC
191		lwz	%r5, cache_info@got(%r10)
192		lwz	%r9, 4(%r5)
193		lwz	%r5, cache_sh@got(%r10)
194		lwz	%r10, 0(%r5)
195#else
196		lis	%r9, cache_info+4@ha
197		lwz	%r9, cache_info+4@l(%r9)
198		lis	%r10, cache_sh@ha
199		lwz	%r10, cache_sh@l(%r10)
200#endif
201
202#else /* _KERNEL */
203#ifdef	MULTIPROCESSOR
204		mfsprg	%r10, 0			/* Get cpu_info pointer */
205#else
206		lis	%r10, cpu_info_store@ha
207		addi	%r10, %r10, cpu_info_store@l
208#endif
209		lwz	%r9, CPU_CI+4(%r10)	/* Load D$ line size */
210		cntlzw	%r10, %r9			/* Calculate shift.. */
211		li	%r6, 31
212		subf	%r10, %r10, %r6
213#endif /* _KERNEL */
214		/* Back in memory filling business */
215
216		cmplwi	%cr1, r_len, 0		/* Nothing to do? */
217		add	%r5, %r9, %r9
218		cmplw	r_len, %r5		/* <= 2*CL bytes to move? */
219		beqlr-	%cr1			/* then do nothing */
220
221		blt+	simple_fill		/* a trivial fill routine */
222
223		/* Word align the block, fill bytewise until dst even*/
224
225		andi.	%r5, r_dst, 0x03
226		li	%r6, 4
227		beq+	cb_aligned_w		/* already aligned to word? */
228
229		subf	%r5, %r5, %r6		/* bytes to fill to align4 */
230#if USE_STSWX
231		mtxer	%r5
232		stswx	%r0, 0, r_dst
233		add	r_dst, %r5, r_dst
234#else
235		mtctr	%r5
236
237		subi	r_dst, r_dst, 1
2381:		stbu	r_val, 1(r_dst)		/* Fill bytewise */
239		bdnz	1b
240
241		addi	r_dst, r_dst, 1
242#endif
243		subf	r_len, %r5, r_len
244
245cb_aligned_w:	/* Cache block align, fill wordwise until dst aligned */
246
247		/* I know I have something to do since we had > 2*CL initially */
248		/* so no need to check for r_len = 0 */
249
250		subi	%r6, %r9, 1		/* CL mask */
251		and.	%r5, r_dst, %r6
252		srwi	%r5, %r5, 2
253		srwi	%r6, %r9, 2
254		beq	cb_aligned_cb		/* already on CL boundary? */
255
256		subf	%r5, %r5, %r6		/* words to fill to alignment */
257		mtctr	%r5
258		slwi	%r5, %r5, 2
259		subf	r_len, %r5, r_len
260
261		subi	r_dst, r_dst, 4
2621:		stwu	r_val, 4(r_dst)		/* Fill wordwise */
263		bdnz	1b
264		addi	r_dst, r_dst, 4
265
266cb_aligned_cb:	/* no need to check r_len, see above */
267
268		srw.	%r5, r_len, %r10		/* Number of cache blocks */
269		mtctr	%r5
270		beq	cblocks_done
271
272		slw	%r5, %r5, %r10
273		subf	r_len, %r5, r_len
274
2751:		dcbz	0, r_dst		/* Clear blockwise */
276		add	r_dst, r_dst, %r9
277		bdnz	1b
278
279cblocks_done:	/* still CL aligned, but less than CL bytes left */
280		cmplwi	%cr1, r_len, 0
281		cmplwi	r_len, 8
282		beq-	%cr1, sf_return
283
284		blt-	sf_bytewise		/* <8 remaining? */
285		b	sf_aligned_w
286
287/*----------------------------------------------------------------------*/
288wbzero:		li	r_val, 0
289
290		cmplwi	r_len, 0
291		beqlr-				/* Nothing to do */
292
293simple_fill:
294#if USE_STSWX
295		cmplwi	%cr1, r_len, 12		/* < 12 bytes to move? */
296#else
297		cmplwi	%cr1, r_len, 8		/* < 8 bytes to move? */
298#endif
299		andi.	%r5, r_dst, 0x03		/* bytes to fill to align4 */
300		blt	%cr1, sf_bytewise	/* trivial byte mover */
301
302		li	%r6, 4
303		subf	%r5, %r5, %r6
304		beq+	sf_aligned_w		/* dest is word aligned */
305
306#if USE_STSWX
307		mtxer	%r5
308		stswx	%r0, 0, r_dst
309		add	r_dst, %r5, r_dst
310#else
311		mtctr	%r5			/* nope, then fill bytewise */
312		subi	r_dst, r_dst, 1		/* until it is */
3131:		stbu	r_val, 1(r_dst)
314		bdnz	1b
315
316		addi	r_dst, r_dst, 1
317#endif
318		subf	r_len, %r5, r_len
319
320sf_aligned_w:	/* no need to check r_len since it were >= 8 bytes initially */
321#if USE_STSWX
322		mr	%r6, %r0
323		mr	%r7, %r0
324
325		srwi	%r5, r_len, 3
326		mtctr	%r5
327
328		slwi	%r5, %r5, 3		/* adjust len */
329		subf.	r_len, %r5, r_len
330
3311:		stswi	%r6, r_dst, 8
332		addi	r_dst, r_dst, 8
333		bdnz	1b
334#else
335		srwi	%r5, r_len, 2		/* words to fill */
336		mtctr	%r5
337
338		slwi	%r5, %r5, 2
339		subf.	r_len, %r5, r_len	/* adjust len for fill */
340
341		subi	r_dst, r_dst, 4
3421:		stwu	r_val, 4(r_dst)
343		bdnz	1b
344		addi	r_dst, r_dst, 4
345#endif
346
347sf_word_done:	bne-	sf_bytewise
348
349sf_return:	mr	%r3, %r8			/* restore orig ptr */
350		blr				/* for memset functionality */
351
352sf_bytewise:
353#if USE_STSWX
354		mr	%r5, %r0
355		mr	%r6, %r0
356		mr	%r7, %r0
357
358		mtxer	r_len
359		stswx	%r5, 0, r_dst
360#else
361		mtctr	r_len
362
363		subi	r_dst, r_dst, 1
3641:		stbu	r_val, 1(r_dst)
365		bdnz	1b
366#endif
367		mr	%r3, %r8			/* restore orig ptr */
368		blr				/* for memset functionality */
369END(memset)
370
371/*----------------------------------------------------------------------*/
372#ifndef _KERNEL
373		.data
374cache_info:	.long	-1, -1, -1, -1
375cache_sh:	.long	0
376
377#endif
378/*----------------------------------------------------------------------*/
379