1/*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/appleapiopts.h>
30#include <ppc/asm.h>
31#include <machine/cpu_capabilities.h>
32#include <machine/commpage.h>
33
34        .text
35        .align	2
36
37
38// *******************
39// * B Z E R O _ 3 2 *
40// *******************
41//
42// For 32-bit processors with a 32-byte cache line.
43//
44// Register use:
45//		r0 = zero
46//		r3 = original ptr, not changed since memset returns it
47//		r4 = count of bytes to set
48//		r9 = working operand ptr
49// We do not touch r2 and r10-r12, which some callers depend on.
50
51        .align	5
52bzero_32:						// void	bzero(void *b, size_t len);
53        cmplwi	cr7,r4,32		// too short for DCBZ?
54        li		r0,0			// get a 0
55        neg		r5,r3			// start to compute #bytes to align
56        mr		r9,r3			// make copy of operand ptr (can't change r3)
57        blt		cr7,Ltail		// length < 32, too short for DCBZ
58
59// At least 32 bytes long, so compute alignment and #cache blocks.
60
61        andi.	r5,r5,0x1F		// r5 <-  #bytes to 32-byte align
62        sub		r4,r4,r5		// adjust length
63        srwi	r8,r4,5			// r8 <- #32-byte chunks
64        cmpwi	cr1,r8,0		// any chunks?
65        mtctr	r8				// set up loop count
66        beq		1f				// skip if already 32-byte aligned (r8!=0)
67
68// 32-byte align.  We just store 32 0s, rather than test and use conditional
69// branches.  We've already stored the first few bytes above.
70
71        stw		r0,0(r9)
72        stw		r0,4(r9)
73        stw		r0,8(r9)
74        stw		r0,12(r9)
75        stw		r0,16(r9)
76        stw		r0,20(r9)
77        stw		r0,24(r9)
78        stw		r0,28(r9)
79        add		r9,r9,r5		// now rp is 32-byte aligned
80        beq		cr1,Ltail		// skip if no 32-byte chunks
81
82// Loop doing 32-byte version of DCBZ instruction.
83// NB: we take alignment exceptions on cache-inhibited memory.
84// The kernel could be changed to zero cr7 when emulating a
85// dcbz (as it does on 64-bit processors), so we could avoid all
86// but the first.
87
881:
89        andi.	r5,r4,0x1F		// will there be trailing bytes?
90        b		2f
91        .align	4
922:
93        dcbz	0,r9			// zero another 32 bytes
94        addi	r9,r9,32
95        bdnz	2b
96
97        beqlr					// no trailing bytes
98
99// Store trailing bytes.
100
101Ltail:
102        andi.	r5,r4,0x10		// test bit 27 separately
103        mtcrf	0x01,r4			// remaining byte count to cr7
104
105        beq		2f				// no 16-byte chunks
106        stw		r0,0(r9)
107        stw		r0,4(r9)
108        stw		r0,8(r9)
109        stw		r0,12(r9)
110        addi	r9,r9,16
1112:
112        bf		28,4f			// 8-byte chunk?
113        stw		r0,0(r9)
114        stw		r0,4(r9)
115        addi	r9,r9,8
1164:
117        bf		29,5f			// word?
118        stw		r0,0(r9)
119        addi	r9,r9,4
1205:
121        bf		30,6f			// halfword?
122        sth		r0,0(r9)
123        addi	r9,r9,2
1246:
125        bflr	31				// byte?
126        stb		r0,0(r9)
127        blr
128
129	COMMPAGE_DESCRIPTOR(bzero_32,_COMM_PAGE_BZERO,kCache32,0,kCommPage32)
130