1209231Sjchandra/*	$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $	*/
2178580Simp
3178580Simp/*
4178580Simp * Mach Operating System
5178580Simp * Copyright (c) 1993 Carnegie Mellon University
6178580Simp * All Rights Reserved.
7178580Simp *
8178580Simp * Permission to use, copy, modify and distribute this software and its
9178580Simp * documentation is hereby granted, provided that both the copyright
10178580Simp * notice and this permission notice appear in all copies of the
11178580Simp * software, derivative works or modified versions, and any portions
12178580Simp * thereof, and that both notices appear in supporting documentation.
13178580Simp *
14178580Simp * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15178580Simp * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16178580Simp * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17178580Simp *
18178580Simp * Carnegie Mellon requests users of this software to return to
19178580Simp *
20178580Simp *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21178580Simp *  School of Computer Science
22178580Simp *  Carnegie Mellon University
23178580Simp *  Pittsburgh PA 15213-3890
24178580Simp *
25178580Simp * any improvements or extensions that they make and grant Carnegie Mellon
26178580Simp * the rights to redistribute these changes.
27178580Simp */
28178580Simp
29178580Simp/*
30178580Simp *	File:	mips_bcopy.s
31178580Simp *	Author:	Chris Maeda
32178580Simp *	Date:	June 1993
33178580Simp *
34178580Simp *	Fast copy routine.  Derived from aligned_block_copy.
35178580Simp */
36178580Simp
37178580Simp
38178580Simp#include <machine/asm.h>
39178580Simp__FBSDID("$FreeBSD$");
40178580Simp
41209231Sjchandra#define _LOCORE		/* XXX not really, just assembly-code source */
42209231Sjchandra#include <machine/endian.h>
43209231Sjchandra
44178580Simp#if defined(LIBC_SCCS) && !defined(lint)
45209231Sjchandra#if 0
46178580Simp	ASMSTR("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93")
47209231Sjchandra#else
48209231Sjchandra	ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $")
49209231Sjchandra#endif
50178580Simp#endif /* LIBC_SCCS and not lint */
51178580Simp
52178580Simp#ifdef __ABICALLS__
53178580Simp	.abicalls
54178580Simp#endif
55178580Simp
56178580Simp/*
57178580Simp *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
58178580Simp *
59178580Simp *	a0 	src address
60178580Simp *	a1	dst address
61178580Simp *	a2	length
62178580Simp */
63178580Simp
64178580Simp#if defined(MEMCOPY) || defined(MEMMOVE)
65178580Simp#ifdef MEMCOPY
66178580Simp#define	FUNCTION	memcpy
67178580Simp#else
68178580Simp#define FUNCTION	memmove
69178580Simp#endif
70178580Simp#define	SRCREG		a1
71178580Simp#define	DSTREG		a0
72178580Simp#else
73178580Simp#define	FUNCTION	bcopy
74178580Simp#define	SRCREG		a0
75178580Simp#define	DSTREG		a1
76178580Simp#endif
77178580Simp
78178580Simp#define	SIZEREG		a2
79178580Simp
80178580SimpLEAF(FUNCTION)
81178580Simp	.set	noat
82178580Simp	.set	noreorder
83178580Simp
84178580Simp#if defined(MEMCOPY) || defined(MEMMOVE)
85178580Simp	/* set up return value, while we still can */
86178580Simp	move	v0,DSTREG
87178580Simp#endif
88178580Simp	/*
89178580Simp	 *	Make sure we can copy forwards.
90178580Simp	 */
91178580Simp	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
92178580Simp	bne	t0,zero,6f		# copy backwards
93178580Simp
94178580Simp	/*
95178580Simp	 * 	There are four alignment cases (with frequency)
96178580Simp	 *	(Based on measurements taken with a DECstation 5000/200
97178580Simp	 *	 inside a Mach kernel.)
98178580Simp	 *
99178580Simp	 * 	aligned   -> aligned		(mostly)
100178580Simp	 * 	unaligned -> aligned		(sometimes)
101178580Simp	 * 	aligned,unaligned -> unaligned	(almost never)
102178580Simp	 *
103178580Simp	 *	Note that we could add another case that checks if
104178580Simp	 *	the destination and source are unaligned but the
105178580Simp	 *	copy is alignable.  eg if src and dest are both
106178580Simp	 *	on a halfword boundary.
107178580Simp	 */
108209231Sjchandra	andi		t1,DSTREG,(SZREG-1)	# get last bits of dest
109209231Sjchandra	bne		t1,zero,3f		# dest unaligned
110209231Sjchandra	andi		t0,SRCREG,(SZREG-1)	# get last bits of src
111209231Sjchandra	bne		t0,zero,5f
112178580Simp
113178580Simp	/*
114209231Sjchandra	 *	Forward aligned->aligned copy, 8 words at a time.
115178580Simp	 */
116209231Sjchandra98:
117209231Sjchandra	li		AT,-(SZREG*8)
118209231Sjchandra	and		t0,SIZEREG,AT		# count truncated to multiples
119209231Sjchandra	PTR_ADDU	a3,SRCREG,t0		# run fast loop up to this addr
120209231Sjchandra	sltu		AT,SRCREG,a3		# any work to do?
121209231Sjchandra	beq		AT,zero,2f
122209231Sjchandra	PTR_SUBU	SIZEREG,t0
123178580Simp
124178580Simp	/*
125178580Simp	 *	loop body
126178580Simp	 */
127178580Simp1:	# cp
128209231Sjchandra	REG_L		t3,(0*SZREG)(SRCREG)
129209231Sjchandra	REG_L		v1,(1*SZREG)(SRCREG)
130209231Sjchandra	REG_L		t0,(2*SZREG)(SRCREG)
131209231Sjchandra	REG_L		t1,(3*SZREG)(SRCREG)
132209231Sjchandra	PTR_ADDU	SRCREG,SZREG*8
133209231Sjchandra	REG_S		t3,(0*SZREG)(DSTREG)
134209231Sjchandra	REG_S		v1,(1*SZREG)(DSTREG)
135209231Sjchandra	REG_S		t0,(2*SZREG)(DSTREG)
136209231Sjchandra	REG_S		t1,(3*SZREG)(DSTREG)
137209231Sjchandra	REG_L		t1,(-1*SZREG)(SRCREG)
138209231Sjchandra	REG_L		t0,(-2*SZREG)(SRCREG)
139209231Sjchandra	REG_L		v1,(-3*SZREG)(SRCREG)
140209231Sjchandra	REG_L		t3,(-4*SZREG)(SRCREG)
141209231Sjchandra	PTR_ADDU	DSTREG,SZREG*8
142209231Sjchandra	REG_S		t1,(-1*SZREG)(DSTREG)
143209231Sjchandra	REG_S		t0,(-2*SZREG)(DSTREG)
144209231Sjchandra	REG_S		v1,(-3*SZREG)(DSTREG)
145209231Sjchandra	bne		SRCREG,a3,1b
146209231Sjchandra	REG_S		t3,(-4*SZREG)(DSTREG)
147178580Simp
148178580Simp	/*
149178580Simp	 *	Copy a word at a time, no loop unrolling.
150178580Simp	 */
151178580Simp2:	# wordcopy
152209231Sjchandra	andi		t2,SIZEREG,(SZREG-1)	# get byte count / SZREG
153209231Sjchandra	PTR_SUBU	t2,SIZEREG,t2		# t2 = words to copy * SZREG
154209231Sjchandra	beq		t2,zero,3f
155209231Sjchandra	PTR_ADDU	t0,SRCREG,t2		# stop at t0
156209231Sjchandra	PTR_SUBU	SIZEREG,SIZEREG,t2
157178580Simp1:
158209231Sjchandra	REG_L		t3,0(SRCREG)
159209231Sjchandra	PTR_ADDU	SRCREG,SZREG
160209231Sjchandra	REG_S		t3,0(DSTREG)
161209231Sjchandra	bne		SRCREG,t0,1b
162209231Sjchandra	PTR_ADDU	DSTREG,SZREG
163178580Simp
164178580Simp3:	# bytecopy
165209231Sjchandra	beq		SIZEREG,zero,4f		# nothing left to do?
166178580Simp	nop
167178580Simp1:
168209231Sjchandra	lb		t3,0(SRCREG)
169209231Sjchandra	PTR_ADDU	SRCREG,1
170209231Sjchandra	sb		t3,0(DSTREG)
171209231Sjchandra	PTR_SUBU	SIZEREG,1
172209231Sjchandra	bgtz		SIZEREG,1b
173209231Sjchandra	PTR_ADDU	DSTREG,1
174178580Simp
175178580Simp4:	# copydone
176178580Simp	j	ra
177178580Simp	nop
178178580Simp
179178580Simp	/*
180178580Simp	 *	Copy from unaligned source to aligned dest.
181178580Simp	 */
182178580Simp5:	# destaligned
183209231Sjchandra	andi		t0,SIZEREG,(SZREG-1)	# t0 = bytecount mod SZREG
184209231Sjchandra	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
185209231Sjchandra	beq		a3,zero,3b
186178580Simp	nop
187209231Sjchandra	move		SIZEREG,t0		# this many to do after we are done
188209231Sjchandra	PTR_ADDU	a3,SRCREG,a3		# stop point
189178580Simp
190178580Simp1:
191209231Sjchandra	REG_LHI		t3,0(SRCREG)
192209231Sjchandra	REG_LLO		t3,SZREG-1(SRCREG)
193209231Sjchandra	PTR_ADDI	SRCREG,SZREG
194209231Sjchandra	REG_S		t3,0(DSTREG)
195209231Sjchandra	bne		SRCREG,a3,1b
196209231Sjchandra	PTR_ADDI	DSTREG,SZREG
197178580Simp
198209231Sjchandra	b		3b
199178580Simp	nop
200178580Simp
201178580Simp6:	# backcopy -- based on above
202209231Sjchandra	PTR_ADDU	SRCREG,SIZEREG
203209231Sjchandra	PTR_ADDU	DSTREG,SIZEREG
204209231Sjchandra	andi		t1,DSTREG,SZREG-1	# get last 3 bits of dest
205209231Sjchandra	bne		t1,zero,3f
206209231Sjchandra	andi		t0,SRCREG,SZREG-1	# get last 3 bits of src
207209231Sjchandra	bne		t0,zero,5f
208178580Simp
209178580Simp	/*
210178580Simp	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
211178580Simp	 */
212209231Sjchandra	li		AT,(-8*SZREG)
213209231Sjchandra	and		t0,SIZEREG,AT		# count truncated to multiple of 32
214209231Sjchandra	beq		t0,zero,2f		# any work to do?
215209231Sjchandra	PTR_SUBU	SIZEREG,t0
216209231Sjchandra	PTR_SUBU	a3,SRCREG,t0
217178580Simp
218178580Simp	/*
219178580Simp	 *	loop body
220178580Simp	 */
221178580Simp1:	# cp
222209231Sjchandra	REG_L		t3,(-4*SZREG)(SRCREG)
223209231Sjchandra	REG_L		v1,(-3*SZREG)(SRCREG)
224209231Sjchandra	REG_L		t0,(-2*SZREG)(SRCREG)
225209231Sjchandra	REG_L		t1,(-1*SZREG)(SRCREG)
226209231Sjchandra	PTR_SUBU	SRCREG,8*SZREG
227209231Sjchandra	REG_S		t3,(-4*SZREG)(DSTREG)
228209231Sjchandra	REG_S		v1,(-3*SZREG)(DSTREG)
229209231Sjchandra	REG_S		t0,(-2*SZREG)(DSTREG)
230209231Sjchandra	REG_S		t1,(-1*SZREG)(DSTREG)
231209231Sjchandra	REG_L		t1,(3*SZREG)(SRCREG)
232209231Sjchandra	REG_L		t0,(2*SZREG)(SRCREG)
233209231Sjchandra	REG_L		v1,(1*SZREG)(SRCREG)
234209231Sjchandra	REG_L		t3,(0*SZREG)(SRCREG)
235209231Sjchandra	PTR_SUBU	DSTREG,8*SZREG
236209231Sjchandra	REG_S		t1,(3*SZREG)(DSTREG)
237209231Sjchandra	REG_S		t0,(2*SZREG)(DSTREG)
238209231Sjchandra	REG_S		v1,(1*SZREG)(DSTREG)
239209231Sjchandra	bne		SRCREG,a3,1b
240209231Sjchandra	REG_S		t3,(0*SZREG)(DSTREG)
241178580Simp
242178580Simp	/*
243178580Simp	 *	Copy a word at a time, no loop unrolling.
244178580Simp	 */
245178580Simp2:	# wordcopy
246209231Sjchandra	andi		t2,SIZEREG,SZREG-1	# get byte count / 4
247209231Sjchandra	PTR_SUBU	t2,SIZEREG,t2		# t2 = number of words to copy
248209231Sjchandra	beq		t2,zero,3f
249209231Sjchandra	PTR_SUBU	t0,SRCREG,t2		# stop at t0
250209231Sjchandra	PTR_SUBU	SIZEREG,SIZEREG,t2
251178580Simp1:
252209231Sjchandra	REG_L		t3,-SZREG(SRCREG)
253209231Sjchandra	PTR_SUBU	SRCREG,SZREG
254209231Sjchandra	REG_S		t3,-SZREG(DSTREG)
255209231Sjchandra	bne		SRCREG,t0,1b
256209231Sjchandra	PTR_SUBU	DSTREG,SZREG
257178580Simp
258178580Simp3:	# bytecopy
259209231Sjchandra	beq		SIZEREG,zero,4f		# nothing left to do?
260178580Simp	nop
261178580Simp1:
262209231Sjchandra	lb		t3,-1(SRCREG)
263209231Sjchandra	PTR_SUBU	SRCREG,1
264209231Sjchandra	sb		t3,-1(DSTREG)
265209231Sjchandra	PTR_SUBU	SIZEREG,1
266209231Sjchandra	bgtz		SIZEREG,1b
267209231Sjchandra	PTR_SUBU	DSTREG,1
268178580Simp
269178580Simp4:	# copydone
270178580Simp	j	ra
271178580Simp	nop
272178580Simp
273178580Simp	/*
274178580Simp	 *	Copy from unaligned source to aligned dest.
275178580Simp	 */
276178580Simp5:	# destaligned
277209231Sjchandra	andi		t0,SIZEREG,SZREG-1	# t0 = bytecount mod 4
278209231Sjchandra	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
279209231Sjchandra	beq		a3,zero,3b
280178580Simp	nop
281209231Sjchandra	move		SIZEREG,t0		# this many to do after we are done
282209231Sjchandra	PTR_SUBU	a3,SRCREG,a3		# stop point
283178580Simp
284178580Simp1:
285209231Sjchandra	REG_LHI		t3,-SZREG(SRCREG)
286209231Sjchandra	REG_LLO		t3,-1(SRCREG)
287209231Sjchandra	PTR_SUBU	SRCREG,SZREG
288209231Sjchandra	REG_S		t3,-SZREG(DSTREG)
289209231Sjchandra	bne		SRCREG,a3,1b
290209231Sjchandra	PTR_SUBU	DSTREG,SZREG
291178580Simp
292209231Sjchandra	b		3b
293178580Simp	nop
294178580Simp
295178580Simp	.set	reorder
296178580Simp	.set	at
297178580Simp	END(FUNCTION)
298