1/*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* =======================================
29 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
30 * =======================================
31 *
32 * Version of 2/20/2003, tuned for G3.
33 *
34 * Register usage.  Note we use R2, so this code will not run in a PEF/CFM
35 * environment.
36 *
37 *   r0  = "w7" or temp
38 *   r2  = "w8"
39 *   r3  = not used, as memcpy and memmove return 1st parameter as a value
40 *   r4  = source ptr ("rs")
41 *   r5  = count of bytes to move ("rc")
42 *   r6  = "w1"
43 *   r7  = "w2"
44 *   r8  = "w3"
45 *   r9  = "w4"
46 *   r10 = "w5"
47 *   r11 = "w6"
48 *   r12 = destination ptr ("rd")
49 * f0-f3 = used for moving 8-byte aligned data
50 */
51#define rs	r4		// NB: we depend on rs==r4 in "lswx" instructions
52#define rd	r12
53#define rc	r5
54
55#define w1	r6
56#define w2	r7
57#define w3	r8
58#define	w4	r9
59#define	w5	r10
60#define	w6	r11
61#define	w7	r0
62#define	w8	r2
63
64#include <sys/appleapiopts.h>
65#include <ppc/asm.h>
66#include <machine/cpu_capabilities.h>
67#include <machine/commpage.h>
68
69        .text
70
71
72#define	kLong	33					// too long for string ops
73
74
75// Main entry points.
76
77        .align 	5
78bcopy_g3:							// void bcopy(const void *src, void *dst, size_t len)
79        cmplwi	rc,kLong			// length > 32 bytes?
80        sub		w1,r4,r3			// must move in reverse if (rd-rs)<rc
81        mr		rd,r4				// start to move source & dest to canonic spot
82        bge		LLong0				// skip if long operand
83        mtxer	rc					// set length for string ops
84        lswx	r5,0,r3				// load bytes into r5-r12
85        stswx	r5,0,r4				// store them
86        blr
87
88// NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
89
90        .align	5
91Lmemcpy_g3:							// void* memcpy(void *dst, void *src, size_t len)
92Lmemmove_g3:						// void* memmove(void *dst, const void *src, size_t len)
93        cmplwi	rc,kLong			// length > 32 bytes?
94        sub		w1,r3,rs			// must move in reverse if (rd-rs)<rc
95        mr		rd,r3				// must leave r3 alone, it is return value for memcpy etc
96        bge		LLong1				// longer than 32 bytes
97        mtxer	rc					// set length for string ops
98        lswx	r5,0,r4				// load bytes into r5-r12
99        stswx	r5,0,r3				// store them
100        blr
101
102// Long operands (more than 32 bytes.)
103//		w1  = (rd-rs), used to check for alignment
104
105LLong0:								// enter from bcopy()
106        mr		rs,r3				// must leave r3 alone (it is return value for memcpy)
107LLong1:								// enter from memcpy() and memmove()
108        cmplw	cr1,w1,rc			// set cr1 blt iff we must move reverse
109        rlwinm	r0,w1,0,0x3			// are operands relatively word-aligned?
110        neg		w2,rd				// prepare to align destination
111        cmpwi	cr5,r0,0			// set cr5 beq if relatively word aligned
112        blt		cr1,LLongReverse	// handle reverse move
113        andi.	w4,w2,3				// w4 <- #bytes to word align destination
114        beq		cr5,LLongFloat		// relatively aligned so use FPRs
115        sub		rc,rc,w4			// adjust count for alignment
116        srwi	r0,rc,5				// get #chunks to xfer (>=1)
117        rlwinm	rc,rc,0,0x1F		// mask down to leftover bytes
118        mtctr	r0					// set up loop count
119        beq		1f					// dest already word aligned
120
121// Word align the destination.
122
123        mtxer	w4					// byte count to xer
124        cmpwi	r0,0				// any chunks to xfer?
125        lswx	w1,0,rs				// move w4 bytes to align dest
126        add		rs,rs,w4
127        stswx	w1,0,rd
128        add		rd,rd,w4
129        beq-	2f					// pathologic case, no chunks to xfer
130
131// Forward, unaligned loop.
132
1331:
134        lwz		w1,0(rs)
135        lwz		w2,4(rs)
136        lwz		w3,8(rs)
137        lwz		w4,12(rs)
138        lwz		w5,16(rs)
139        lwz		w6,20(rs)
140        lwz		w7,24(rs)
141        lwz		w8,28(rs)
142        addi	rs,rs,32
143        stw		w1,0(rd)
144        stw		w2,4(rd)
145        stw		w3,8(rd)
146        stw		w4,12(rd)
147        stw		w5,16(rd)
148        stw		w6,20(rd)
149        stw		w7,24(rd)
150        stw		w8,28(rd)
151        addi	rd,rd,32
152        bdnz	1b
1532:									// rc = remaining bytes (0-31)
154        mtxer	rc					// set up count for string ops
155        mr		r0,rd				// move dest ptr out of the way
156        lswx	r5,0,rs				// load xer bytes into r5-r12 (rs==r4)
157        stswx	r5,0,r0				// store them
158        blr
159
160
161
162// Forward, aligned loop.  We use FPRs.
163
164LLongFloat:
165        andi.	w4,w2,7				// W4 <- #bytes to doubleword-align destination
166        sub		rc,rc,w4			// adjust count for alignment
167        srwi	r0,rc,5				// number of 32-byte chunks to xfer
168        rlwinm	rc,rc,0,0x1F		// mask down to leftover bytes
169        mtctr	r0					// set up loop count
170        beq		1f					// dest already doubleword aligned
171
172// Doubleword align the destination.
173
174        mtxer	w4					// byte count to xer
175        cmpwi	r0,0				// any chunks to xfer?
176        lswx	w1,0,rs				// move w4 bytes to align dest
177        add		rs,rs,w4
178        stswx	w1,0,rd
179        add		rd,rd,w4
180        beq-	2f					// pathologic case, no chunks to xfer
1811:									// loop over 32-byte chunks
182        lfd		f0,0(rs)
183        lfd		f1,8(rs)
184        lfd		f2,16(rs)
185        lfd		f3,24(rs)
186        addi	rs,rs,32
187        stfd	f0,0(rd)
188        stfd	f1,8(rd)
189        stfd	f2,16(rd)
190        stfd	f3,24(rd)
191        addi	rd,rd,32
192        bdnz	1b
1932:									// rc = remaining bytes (0-31)
194        mtxer	rc					// set up count for string ops
195        mr		r0,rd				// move dest ptr out of the way
196        lswx	r5,0,rs				// load xer bytes into r5-r12 (rs==r4)
197        stswx	r5,0,r0				// store them
198        blr
199
200
201// Long, reverse moves.
202//		cr5 = beq if relatively word aligned
203
204LLongReverse:
205        add		rd,rd,rc			// point to end of operands + 1
206        add		rs,rs,rc
207        beq		cr5,LReverseFloat	// aligned operands so can use FPRs
208        srwi	r0,rc,5				// get chunk count
209        rlwinm	rc,rc,0,0x1F		// mask down to leftover bytes
210        mtctr	r0					// set up loop count
211        mtxer	rc					// set up for trailing bytes
2121:
213        lwz		w1,-4(rs)
214        lwz		w2,-8(rs)
215        lwz		w3,-12(rs)
216        lwz		w4,-16(rs)
217        stw		w1,-4(rd)
218        lwz		w5,-20(rs)
219        stw		w2,-8(rd)
220        lwz		w6,-24(rs)
221        stw		w3,-12(rd)
222        lwz		w7,-28(rs)
223        stw		w4,-16(rd)
224        lwzu	w8,-32(rs)
225        stw		w5,-20(rd)
226        stw		w6,-24(rd)
227        stw		w7,-28(rd)
228        stwu	w8,-32(rd)
229        bdnz	1b
230
231        sub		r4,rs,rc			// point to 1st (leftmost) leftover byte (0..31)
232        sub		r0,rd,rc			// move dest ptr out of way
233        lswx	r5,0,r4				// load xer bytes into r5-r12
234        stswx	r5,0,r0				// store them
235        blr
236
237
238// Long, reverse aligned moves.  We use FPRs.
239
240LReverseFloat:
241        andi.	w4,rd,7				// W3 <- #bytes to doubleword-align destination
242        sub		rc,rc,w4			// adjust count for alignment
243        srwi	r0,rc,5				// number of 32-byte chunks to xfer
244        rlwinm	rc,rc,0,0x1F		// mask down to leftover bytes
245        mtctr	r0					// set up loop count
246        beq		1f					// dest already doubleword aligned
247
248// Doubleword align the destination.
249
250        mtxer	w4					// byte count to xer
251        cmpwi	r0,0				// any chunks to xfer?
252        sub		rs,rs,w4			// point to 1st bytes to xfer
253        sub		rd,rd,w4
254        lswx	w1,0,rs				// move w3 bytes to align dest
255        stswx	w1,0,rd
256        beq-	2f					// pathologic case, no chunks to xfer
2571:
258        lfd		f0,-8(rs)
259        lfd		f1,-16(rs)
260        lfd		f2,-24(rs)
261        lfdu	f3,-32(rs)
262        stfd	f0,-8(rd)
263        stfd	f1,-16(rd)
264        stfd	f2,-24(rd)
265        stfdu	f3,-32(rd)
266        bdnz	1b
2672:									// rc = remaining bytes (0-31)
268        mtxer	rc					// set up count for string ops
269        sub		r4,rs,rc			// point to 1st (leftmost) leftover byte (0..31)
270        sub		r0,rd,rc			// move dest ptr out of way
271        lswx	r5,0,r4				// load xer bytes into r5-r12
272        stswx	r5,0,r0				// store them
273        blr
274
275	COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32)
276