1/*	$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $	*/
2
3/*
4 * Mach Operating System
5 * Copyright (c) 1993 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie Mellon
26 * the rights to redistribute these changes.
27 */
28
29/*
30 *	File:	mips_bcopy.s
31 *	Author:	Chris Maeda
32 *	Date:	June 1993
33 *
34 *	Fast copy routine.  Derived from aligned_block_copy.
35 */
36
37
38#include <machine/asm.h>
39__FBSDID("$FreeBSD$");
40
41#include <machine/endian.h>
42
43#if defined(LIBC_SCCS) && !defined(lint)
44#if 0
45	ASMSTR("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93")
46#else
47	ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $")
48#endif
49#endif /* LIBC_SCCS and not lint */
50
51#ifdef __ABICALLS__
52	.abicalls
53#endif
54
55/*
56 *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
57 *
58 *	a0 	src address
59 *	a1	dst address
60 *	a2	length
61 */
62
63#define	SRCREG	a0
64#define	DSTREG	a1
65#define	SIZEREG	a2
66
67LEAF(memcpy)
68	.set	noat
69	.set	noreorder
70
71	move	v0, a0
72	move	a0, a1
73	move	a1, v0
74
75ALEAF(bcopy)
76ALEAF(ovbcopy)
77	/*
78	 *	Make sure we can copy forwards.
79	 */
80	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
81	bne	t0,zero,6f		# copy backwards
82
83	/*
84	 * 	There are four alignment cases (with frequency)
85	 *	(Based on measurements taken with a DECstation 5000/200
86	 *	 inside a Mach kernel.)
87	 *
88	 * 	aligned   -> aligned		(mostly)
89	 * 	unaligned -> aligned		(sometimes)
90	 * 	aligned,unaligned -> unaligned	(almost never)
91	 *
92	 *	Note that we could add another case that checks if
93	 *	the destination and source are unaligned but the
94	 *	copy is alignable.  eg if src and dest are both
95	 *	on a halfword boundary.
96	 */
97	andi		t1,DSTREG,(SZREG-1)	# get last bits of dest
98	bne		t1,zero,3f		# dest unaligned
99	andi		t0,SRCREG,(SZREG-1)	# get last bits of src
100	bne		t0,zero,5f
101
102	/*
103	 *	Forward aligned->aligned copy, 8 words at a time.
104	 */
10598:
106	li		AT,-(SZREG*8)
107	and		t0,SIZEREG,AT		# count truncated to multiples
108	PTR_ADDU	a3,SRCREG,t0		# run fast loop up to this addr
109	sltu		AT,SRCREG,a3		# any work to do?
110	beq		AT,zero,2f
111	PTR_SUBU	SIZEREG,t0
112
113	/*
114	 *	loop body
115	 */
1161:	# cp
117	REG_L		t3,(0*SZREG)(SRCREG)
118	REG_L		v1,(1*SZREG)(SRCREG)
119	REG_L		t0,(2*SZREG)(SRCREG)
120	REG_L		t1,(3*SZREG)(SRCREG)
121	PTR_ADDU	SRCREG,SZREG*8
122	REG_S		t3,(0*SZREG)(DSTREG)
123	REG_S		v1,(1*SZREG)(DSTREG)
124	REG_S		t0,(2*SZREG)(DSTREG)
125	REG_S		t1,(3*SZREG)(DSTREG)
126	REG_L		t1,(-1*SZREG)(SRCREG)
127	REG_L		t0,(-2*SZREG)(SRCREG)
128	REG_L		v1,(-3*SZREG)(SRCREG)
129	REG_L		t3,(-4*SZREG)(SRCREG)
130	PTR_ADDU	DSTREG,SZREG*8
131	REG_S		t1,(-1*SZREG)(DSTREG)
132	REG_S		t0,(-2*SZREG)(DSTREG)
133	REG_S		v1,(-3*SZREG)(DSTREG)
134	bne		SRCREG,a3,1b
135	REG_S		t3,(-4*SZREG)(DSTREG)
136
137	/*
138	 *	Copy a word at a time, no loop unrolling.
139	 */
1402:	# wordcopy
141	andi		t2,SIZEREG,(SZREG-1)	# get byte count / SZREG
142	PTR_SUBU	t2,SIZEREG,t2		# t2 = words to copy * SZREG
143	beq		t2,zero,3f
144	PTR_ADDU	t0,SRCREG,t2		# stop at t0
145	PTR_SUBU	SIZEREG,SIZEREG,t2
1461:
147	REG_L		t3,0(SRCREG)
148	PTR_ADDU	SRCREG,SZREG
149	REG_S		t3,0(DSTREG)
150	bne		SRCREG,t0,1b
151	PTR_ADDU	DSTREG,SZREG
152
1533:	# bytecopy
154	beq		SIZEREG,zero,4f		# nothing left to do?
155	nop
1561:
157	lb		t3,0(SRCREG)
158	PTR_ADDU	SRCREG,1
159	sb		t3,0(DSTREG)
160	PTR_SUBU	SIZEREG,1
161	bgtz		SIZEREG,1b
162	PTR_ADDU	DSTREG,1
163
1644:	# copydone
165	j	ra
166	nop
167
168	/*
169	 *	Copy from unaligned source to aligned dest.
170	 */
1715:	# destaligned
172	andi		t0,SIZEREG,(SZREG-1)	# t0 = bytecount mod SZREG
173	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
174	beq		a3,zero,3b
175	nop
176	move		SIZEREG,t0		# this many to do after we are done
177	PTR_ADDU	a3,SRCREG,a3		# stop point
178
1791:
180	REG_LHI		t3,0(SRCREG)
181	REG_LLO		t3,SZREG-1(SRCREG)
182	PTR_ADDI	SRCREG,SZREG
183	REG_S		t3,0(DSTREG)
184	bne		SRCREG,a3,1b
185	PTR_ADDI	DSTREG,SZREG
186
187	b		3b
188	nop
189
1906:	# backcopy -- based on above
191	PTR_ADDU	SRCREG,SIZEREG
192	PTR_ADDU	DSTREG,SIZEREG
193	andi		t1,DSTREG,SZREG-1	# get last 3 bits of dest
194	bne		t1,zero,3f
195	andi		t0,SRCREG,SZREG-1	# get last 3 bits of src
196	bne		t0,zero,5f
197
198	/*
199	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
200	 */
201	li		AT,(-8*SZREG)
202	and		t0,SIZEREG,AT		# count truncated to multiple of 32
203	beq		t0,zero,2f		# any work to do?
204	PTR_SUBU	SIZEREG,t0
205	PTR_SUBU	a3,SRCREG,t0
206
207	/*
208	 *	loop body
209	 */
2101:	# cp
211	REG_L		t3,(-4*SZREG)(SRCREG)
212	REG_L		v1,(-3*SZREG)(SRCREG)
213	REG_L		t0,(-2*SZREG)(SRCREG)
214	REG_L		t1,(-1*SZREG)(SRCREG)
215	PTR_SUBU	SRCREG,8*SZREG
216	REG_S		t3,(-4*SZREG)(DSTREG)
217	REG_S		v1,(-3*SZREG)(DSTREG)
218	REG_S		t0,(-2*SZREG)(DSTREG)
219	REG_S		t1,(-1*SZREG)(DSTREG)
220	REG_L		t1,(3*SZREG)(SRCREG)
221	REG_L		t0,(2*SZREG)(SRCREG)
222	REG_L		v1,(1*SZREG)(SRCREG)
223	REG_L		t3,(0*SZREG)(SRCREG)
224	PTR_SUBU	DSTREG,8*SZREG
225	REG_S		t1,(3*SZREG)(DSTREG)
226	REG_S		t0,(2*SZREG)(DSTREG)
227	REG_S		v1,(1*SZREG)(DSTREG)
228	bne		SRCREG,a3,1b
229	REG_S		t3,(0*SZREG)(DSTREG)
230
231	/*
232	 *	Copy a word at a time, no loop unrolling.
233	 */
2342:	# wordcopy
235	andi		t2,SIZEREG,SZREG-1	# get byte count / 4
236	PTR_SUBU	t2,SIZEREG,t2		# t2 = number of words to copy
237	beq		t2,zero,3f
238	PTR_SUBU	t0,SRCREG,t2		# stop at t0
239	PTR_SUBU	SIZEREG,SIZEREG,t2
2401:
241	REG_L		t3,-SZREG(SRCREG)
242	PTR_SUBU	SRCREG,SZREG
243	REG_S		t3,-SZREG(DSTREG)
244	bne		SRCREG,t0,1b
245	PTR_SUBU	DSTREG,SZREG
246
2473:	# bytecopy
248	beq		SIZEREG,zero,4f		# nothing left to do?
249	nop
2501:
251	lb		t3,-1(SRCREG)
252	PTR_SUBU	SRCREG,1
253	sb		t3,-1(DSTREG)
254	PTR_SUBU	SIZEREG,1
255	bgtz		SIZEREG,1b
256	PTR_SUBU	DSTREG,1
257
2584:	# copydone
259	j	ra
260	nop
261
262	/*
263	 *	Copy from unaligned source to aligned dest.
264	 */
2655:	# destaligned
266	andi		t0,SIZEREG,SZREG-1	# t0 = bytecount mod 4
267	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
268	beq		a3,zero,3b
269	nop
270	move		SIZEREG,t0		# this many to do after we are done
271	PTR_SUBU	a3,SRCREG,a3		# stop point
272
2731:
274	REG_LHI		t3,-SZREG(SRCREG)
275	REG_LLO		t3,-1(SRCREG)
276	PTR_SUBU	SRCREG,SZREG
277	REG_S		t3,-SZREG(DSTREG)
278	bne		SRCREG,a3,1b
279	PTR_SUBU	DSTREG,SZREG
280
281	b		3b
282	nop
283
284	.set	reorder
285	.set	at
286END(memcpy)
287