1/*	$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $	*/
2
3/*
4 * Mach Operating System
5 * Copyright (c) 1993 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie Mellon
26 * the rights to redistribute these changes.
27 */
28
29/*
30 *	File:	mips_bcopy.s
31 *	Author:	Chris Maeda
32 *	Date:	June 1993
33 *
34 *	Fast copy routine.  Derived from aligned_block_copy.
35 */
36
37
38#include <machine/asm.h>
39__FBSDID("$FreeBSD: releng/10.2/lib/libc/mips/string/bcopy.S 209231 2010-06-16 12:55:14Z jchandra $");
40
41#define _LOCORE		/* XXX not really, just assembly-code source */
42#include <machine/endian.h>
43
44#if defined(LIBC_SCCS) && !defined(lint)
45#if 0
46	ASMSTR("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93")
47#else
48	ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $")
49#endif
50#endif /* LIBC_SCCS and not lint */
51
52#ifdef __ABICALLS__
53	.abicalls
54#endif
55
56/*
57 *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
58 *
59 *	a0 	src address
60 *	a1	dst address
61 *	a2	length
62 */
63
64#if defined(MEMCOPY) || defined(MEMMOVE)
65#ifdef MEMCOPY
66#define	FUNCTION	memcpy
67#else
68#define FUNCTION	memmove
69#endif
70#define	SRCREG		a1
71#define	DSTREG		a0
72#else
73#define	FUNCTION	bcopy
74#define	SRCREG		a0
75#define	DSTREG		a1
76#endif
77
78#define	SIZEREG		a2
79
80LEAF(FUNCTION)
81	.set	noat
82	.set	noreorder
83
84#if defined(MEMCOPY) || defined(MEMMOVE)
85	/* set up return value, while we still can */
86	move	v0,DSTREG
87#endif
88	/*
89	 *	Make sure we can copy forwards.
90	 */
91	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
92	bne	t0,zero,6f		# copy backwards
93
94	/*
95	 * 	There are four alignment cases (with frequency)
96	 *	(Based on measurements taken with a DECstation 5000/200
97	 *	 inside a Mach kernel.)
98	 *
99	 * 	aligned   -> aligned		(mostly)
100	 * 	unaligned -> aligned		(sometimes)
101	 * 	aligned,unaligned -> unaligned	(almost never)
102	 *
103	 *	Note that we could add another case that checks if
104	 *	the destination and source are unaligned but the
105	 *	copy is alignable.  eg if src and dest are both
106	 *	on a halfword boundary.
107	 */
108	andi		t1,DSTREG,(SZREG-1)	# get last bits of dest
109	bne		t1,zero,3f		# dest unaligned
110	andi		t0,SRCREG,(SZREG-1)	# get last bits of src
111	bne		t0,zero,5f
112
113	/*
114	 *	Forward aligned->aligned copy, 8 words at a time.
115	 */
11698:
117	li		AT,-(SZREG*8)
118	and		t0,SIZEREG,AT		# count truncated to multiples
119	PTR_ADDU	a3,SRCREG,t0		# run fast loop up to this addr
120	sltu		AT,SRCREG,a3		# any work to do?
121	beq		AT,zero,2f
122	PTR_SUBU	SIZEREG,t0
123
124	/*
125	 *	loop body
126	 */
1271:	# cp
128	REG_L		t3,(0*SZREG)(SRCREG)
129	REG_L		v1,(1*SZREG)(SRCREG)
130	REG_L		t0,(2*SZREG)(SRCREG)
131	REG_L		t1,(3*SZREG)(SRCREG)
132	PTR_ADDU	SRCREG,SZREG*8
133	REG_S		t3,(0*SZREG)(DSTREG)
134	REG_S		v1,(1*SZREG)(DSTREG)
135	REG_S		t0,(2*SZREG)(DSTREG)
136	REG_S		t1,(3*SZREG)(DSTREG)
137	REG_L		t1,(-1*SZREG)(SRCREG)
138	REG_L		t0,(-2*SZREG)(SRCREG)
139	REG_L		v1,(-3*SZREG)(SRCREG)
140	REG_L		t3,(-4*SZREG)(SRCREG)
141	PTR_ADDU	DSTREG,SZREG*8
142	REG_S		t1,(-1*SZREG)(DSTREG)
143	REG_S		t0,(-2*SZREG)(DSTREG)
144	REG_S		v1,(-3*SZREG)(DSTREG)
145	bne		SRCREG,a3,1b
146	REG_S		t3,(-4*SZREG)(DSTREG)
147
148	/*
149	 *	Copy a word at a time, no loop unrolling.
150	 */
1512:	# wordcopy
152	andi		t2,SIZEREG,(SZREG-1)	# get byte count / SZREG
153	PTR_SUBU	t2,SIZEREG,t2		# t2 = words to copy * SZREG
154	beq		t2,zero,3f
155	PTR_ADDU	t0,SRCREG,t2		# stop at t0
156	PTR_SUBU	SIZEREG,SIZEREG,t2
1571:
158	REG_L		t3,0(SRCREG)
159	PTR_ADDU	SRCREG,SZREG
160	REG_S		t3,0(DSTREG)
161	bne		SRCREG,t0,1b
162	PTR_ADDU	DSTREG,SZREG
163
1643:	# bytecopy
165	beq		SIZEREG,zero,4f		# nothing left to do?
166	nop
1671:
168	lb		t3,0(SRCREG)
169	PTR_ADDU	SRCREG,1
170	sb		t3,0(DSTREG)
171	PTR_SUBU	SIZEREG,1
172	bgtz		SIZEREG,1b
173	PTR_ADDU	DSTREG,1
174
1754:	# copydone
176	j	ra
177	nop
178
179	/*
180	 *	Copy from unaligned source to aligned dest.
181	 */
1825:	# destaligned
183	andi		t0,SIZEREG,(SZREG-1)	# t0 = bytecount mod SZREG
184	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
185	beq		a3,zero,3b
186	nop
187	move		SIZEREG,t0		# this many to do after we are done
188	PTR_ADDU	a3,SRCREG,a3		# stop point
189
1901:
191	REG_LHI		t3,0(SRCREG)
192	REG_LLO		t3,SZREG-1(SRCREG)
193	PTR_ADDI	SRCREG,SZREG
194	REG_S		t3,0(DSTREG)
195	bne		SRCREG,a3,1b
196	PTR_ADDI	DSTREG,SZREG
197
198	b		3b
199	nop
200
2016:	# backcopy -- based on above
202	PTR_ADDU	SRCREG,SIZEREG
203	PTR_ADDU	DSTREG,SIZEREG
204	andi		t1,DSTREG,SZREG-1	# get last 3 bits of dest
205	bne		t1,zero,3f
206	andi		t0,SRCREG,SZREG-1	# get last 3 bits of src
207	bne		t0,zero,5f
208
209	/*
210	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
211	 */
212	li		AT,(-8*SZREG)
213	and		t0,SIZEREG,AT		# count truncated to multiple of 32
214	beq		t0,zero,2f		# any work to do?
215	PTR_SUBU	SIZEREG,t0
216	PTR_SUBU	a3,SRCREG,t0
217
218	/*
219	 *	loop body
220	 */
2211:	# cp
222	REG_L		t3,(-4*SZREG)(SRCREG)
223	REG_L		v1,(-3*SZREG)(SRCREG)
224	REG_L		t0,(-2*SZREG)(SRCREG)
225	REG_L		t1,(-1*SZREG)(SRCREG)
226	PTR_SUBU	SRCREG,8*SZREG
227	REG_S		t3,(-4*SZREG)(DSTREG)
228	REG_S		v1,(-3*SZREG)(DSTREG)
229	REG_S		t0,(-2*SZREG)(DSTREG)
230	REG_S		t1,(-1*SZREG)(DSTREG)
231	REG_L		t1,(3*SZREG)(SRCREG)
232	REG_L		t0,(2*SZREG)(SRCREG)
233	REG_L		v1,(1*SZREG)(SRCREG)
234	REG_L		t3,(0*SZREG)(SRCREG)
235	PTR_SUBU	DSTREG,8*SZREG
236	REG_S		t1,(3*SZREG)(DSTREG)
237	REG_S		t0,(2*SZREG)(DSTREG)
238	REG_S		v1,(1*SZREG)(DSTREG)
239	bne		SRCREG,a3,1b
240	REG_S		t3,(0*SZREG)(DSTREG)
241
242	/*
243	 *	Copy a word at a time, no loop unrolling.
244	 */
2452:	# wordcopy
246	andi		t2,SIZEREG,SZREG-1	# get byte count / 4
247	PTR_SUBU	t2,SIZEREG,t2		# t2 = number of words to copy
248	beq		t2,zero,3f
249	PTR_SUBU	t0,SRCREG,t2		# stop at t0
250	PTR_SUBU	SIZEREG,SIZEREG,t2
2511:
252	REG_L		t3,-SZREG(SRCREG)
253	PTR_SUBU	SRCREG,SZREG
254	REG_S		t3,-SZREG(DSTREG)
255	bne		SRCREG,t0,1b
256	PTR_SUBU	DSTREG,SZREG
257
2583:	# bytecopy
259	beq		SIZEREG,zero,4f		# nothing left to do?
260	nop
2611:
262	lb		t3,-1(SRCREG)
263	PTR_SUBU	SRCREG,1
264	sb		t3,-1(DSTREG)
265	PTR_SUBU	SIZEREG,1
266	bgtz		SIZEREG,1b
267	PTR_SUBU	DSTREG,1
268
2694:	# copydone
270	j	ra
271	nop
272
273	/*
274	 *	Copy from unaligned source to aligned dest.
275	 */
2765:	# destaligned
277	andi		t0,SIZEREG,SZREG-1	# t0 = bytecount mod 4
278	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
279	beq		a3,zero,3b
280	nop
281	move		SIZEREG,t0		# this many to do after we are done
282	PTR_SUBU	a3,SRCREG,a3		# stop point
283
2841:
285	REG_LHI		t3,-SZREG(SRCREG)
286	REG_LLO		t3,-1(SRCREG)
287	PTR_SUBU	SRCREG,SZREG
288	REG_S		t3,-SZREG(DSTREG)
289	bne		SRCREG,a3,1b
290	PTR_SUBU	DSTREG,SZREG
291
292	b		3b
293	nop
294
295	.set	reorder
296	.set	at
297	END(FUNCTION)
298