bcopy.S revision 1.4
1/*	$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $	*/
2
3/*
4 * Mach Operating System
5 * Copyright (c) 1993 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie Mellon
26 * the rights to redistribute these changes.
27 */
28
29/*
30 *	File:	mips_bcopy.s
31 *	Author:	Chris Maeda
32 *	Date:	June 1993
33 *
34 *	Fast copy routine.  Derived from aligned_block_copy.
35 */
36
37
38#include <mips/asm.h>
39#ifndef _LOCORE
40#define _LOCORE		/* XXX not really, just assembly-code source */
41#endif
42#include <machine/endian.h>
43
44
45#if defined(LIBC_SCCS) && !defined(lint)
46#if 0
47	RCSID("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93")
48#else
49	RCSID("$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $")
50#endif
51#endif /* LIBC_SCCS and not lint */
52
53/*
54 *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
55 *
56 *	a0 	src address
57 *	a1	dst address
58 *	a2	length
59 */
60
61#if defined(MEMCOPY) || defined(MEMMOVE)
62#ifdef MEMCOPY
63#define	FUNCTION	memcpy
64#else
65#define FUNCTION	memmove
66#endif
67#define	SRCREG		a1
68#define	DSTREG		a0
69#else
70#define	FUNCTION	bcopy
71#define	SRCREG		a0
72#define	DSTREG		a1
73#endif
74
75#define	SIZEREG		a2
76
77LEAF(FUNCTION)
78	.set	noat
79	.set	noreorder
80
81#if defined(MEMCOPY) || defined(MEMMOVE)
82	/* set up return value, while we still can */
83	move	v0,DSTREG
84#endif
85	/*
86	 *	Make sure we can copy forwards.
87	 */
88	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
89	bne	t0,zero,6f		# copy backwards
90
91	/*
92	 * 	There are four alignment cases (with frequency)
93	 *	(Based on measurements taken with a DECstation 5000/200
94	 *	 inside a Mach kernel.)
95	 *
96	 * 	aligned   -> aligned		(mostly)
97	 * 	unaligned -> aligned		(sometimes)
98	 * 	aligned,unaligned -> unaligned	(almost never)
99	 *
100	 *	Note that we could add another case that checks if
101	 *	the destination and source are unaligned but the
102	 *	copy is alignable.  eg if src and dest are both
103	 *	on a halfword boundary.
104	 */
105	andi		t1,DSTREG,(SZREG-1)	# get last bits of dest
106	bne		t1,zero,3f		# dest unaligned
107	andi		t0,SRCREG,(SZREG-1)	# get last bits of src
108	bne		t0,zero,5f
109
110	/*
111	 *	Forward aligned->aligned copy, 8 words at a time.
112	 */
11398:
114	li		AT,-(SZREG*8)
115	and		t0,SIZEREG,AT		# count truncated to multiples
116	PTR_ADDU	a3,SRCREG,t0		# run fast loop up to this addr
117	sltu		AT,SRCREG,a3		# any work to do?
118	beq		AT,zero,2f
119	PTR_SUBU	SIZEREG,t0
120
121	/*
122	 *	loop body
123	 */
1241:	# cp
125	REG_L		t3,(0*SZREG)(SRCREG)
126	REG_L		v1,(1*SZREG)(SRCREG)
127	REG_L		t0,(2*SZREG)(SRCREG)
128	REG_L		t1,(3*SZREG)(SRCREG)
129	PTR_ADDU	SRCREG,SZREG*8
130	REG_S		t3,(0*SZREG)(DSTREG)
131	REG_S		v1,(1*SZREG)(DSTREG)
132	REG_S		t0,(2*SZREG)(DSTREG)
133	REG_S		t1,(3*SZREG)(DSTREG)
134	REG_L		t1,(-1*SZREG)(SRCREG)
135	REG_L		t0,(-2*SZREG)(SRCREG)
136	REG_L		v1,(-3*SZREG)(SRCREG)
137	REG_L		t3,(-4*SZREG)(SRCREG)
138	PTR_ADDU	DSTREG,SZREG*8
139	REG_S		t1,(-1*SZREG)(DSTREG)
140	REG_S		t0,(-2*SZREG)(DSTREG)
141	REG_S		v1,(-3*SZREG)(DSTREG)
142	bne		SRCREG,a3,1b
143	REG_S		t3,(-4*SZREG)(DSTREG)
144
145	/*
146	 *	Copy a word at a time, no loop unrolling.
147	 */
1482:	# wordcopy
149	andi		t2,SIZEREG,(SZREG-1)	# get byte count / SZREG
150	PTR_SUBU	t2,SIZEREG,t2		# t2 = words to copy * SZREG
151	beq		t2,zero,3f
152	PTR_ADDU	t0,SRCREG,t2		# stop at t0
153	PTR_SUBU	SIZEREG,SIZEREG,t2
1541:
155	REG_L		t3,0(SRCREG)
156	PTR_ADDU	SRCREG,SZREG
157	REG_S		t3,0(DSTREG)
158	bne		SRCREG,t0,1b
159	PTR_ADDU	DSTREG,SZREG
160
1613:	# bytecopy
162	beq		SIZEREG,zero,4f		# nothing left to do?
163	nop
1641:
165	lb		t3,0(SRCREG)
166	PTR_ADDU	SRCREG,1
167	sb		t3,0(DSTREG)
168	PTR_SUBU	SIZEREG,1
169	bgtz		SIZEREG,1b
170	PTR_ADDU	DSTREG,1
171
1724:	# copydone
173	.set at		#-mfix-loongson2f-btb
174	j	ra
175	nop
176	.set noat
177
178	/*
179	 *	Copy from unaligned source to aligned dest.
180	 */
1815:	# destaligned
182	andi		t0,SIZEREG,(SZREG-1)	# t0 = bytecount mod SZREG
183	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
184	beq		a3,zero,3b
185	nop
186	move		SIZEREG,t0		# this many to do after we are done
187	PTR_ADDU	a3,SRCREG,a3		# stop point
188
1891:
190	REG_LHI		t3,0(SRCREG)
191	REG_LLO		t3,SZREG-1(SRCREG)
192	PTR_ADDI	SRCREG,SZREG
193	REG_S		t3,0(DSTREG)
194	bne		SRCREG,a3,1b
195	PTR_ADDI	DSTREG,SZREG
196
197	b		3b
198	nop
199
2006:	# backcopy -- based on above
201	PTR_ADDU	SRCREG,SIZEREG
202	PTR_ADDU	DSTREG,SIZEREG
203	andi		t1,DSTREG,SZREG-1	# get last 3 bits of dest
204	bne		t1,zero,3f
205	andi		t0,SRCREG,SZREG-1	# get last 3 bits of src
206	bne		t0,zero,5f
207
208	/*
209	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
210	 */
211	li		AT,(-8*SZREG)
212	and		t0,SIZEREG,AT		# count truncated to multiple of 32
213	beq		t0,zero,2f		# any work to do?
214	PTR_SUBU	SIZEREG,t0
215	PTR_SUBU	a3,SRCREG,t0
216
217	/*
218	 *	loop body
219	 */
2201:	# cp
221	REG_L		t3,(-4*SZREG)(SRCREG)
222	REG_L		v1,(-3*SZREG)(SRCREG)
223	REG_L		t0,(-2*SZREG)(SRCREG)
224	REG_L		t1,(-1*SZREG)(SRCREG)
225	PTR_SUBU	SRCREG,8*SZREG
226	REG_S		t3,(-4*SZREG)(DSTREG)
227	REG_S		v1,(-3*SZREG)(DSTREG)
228	REG_S		t0,(-2*SZREG)(DSTREG)
229	REG_S		t1,(-1*SZREG)(DSTREG)
230	REG_L		t1,(3*SZREG)(SRCREG)
231	REG_L		t0,(2*SZREG)(SRCREG)
232	REG_L		v1,(1*SZREG)(SRCREG)
233	REG_L		t3,(0*SZREG)(SRCREG)
234	PTR_SUBU	DSTREG,8*SZREG
235	REG_S		t1,(3*SZREG)(DSTREG)
236	REG_S		t0,(2*SZREG)(DSTREG)
237	REG_S		v1,(1*SZREG)(DSTREG)
238	bne		SRCREG,a3,1b
239	REG_S		t3,(0*SZREG)(DSTREG)
240
241	/*
242	 *	Copy a word at a time, no loop unrolling.
243	 */
2442:	# wordcopy
245	andi		t2,SIZEREG,SZREG-1	# get byte count / 4
246	PTR_SUBU	t2,SIZEREG,t2		# t2 = number of words to copy
247	beq		t2,zero,3f
248	PTR_SUBU	t0,SRCREG,t2		# stop at t0
249	PTR_SUBU	SIZEREG,SIZEREG,t2
2501:
251	REG_L		t3,-SZREG(SRCREG)
252	PTR_SUBU	SRCREG,SZREG
253	REG_S		t3,-SZREG(DSTREG)
254	bne		SRCREG,t0,1b
255	PTR_SUBU	DSTREG,SZREG
256
2573:	# bytecopy
258	beq		SIZEREG,zero,4f		# nothing left to do?
259	nop
2601:
261	lb		t3,-1(SRCREG)
262	PTR_SUBU	SRCREG,1
263	sb		t3,-1(DSTREG)
264	PTR_SUBU	SIZEREG,1
265	bgtz		SIZEREG,1b
266	PTR_SUBU	DSTREG,1
267
2684:	# copydone
269	.set at		#-mfix-loongson2f-btb
270	j	ra
271	nop
272	.set noat
273
274	/*
275	 *	Copy from unaligned source to aligned dest.
276	 */
2775:	# destaligned
278	andi		t0,SIZEREG,SZREG-1	# t0 = bytecount mod 4
279	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
280	beq		a3,zero,3b
281	nop
282	move		SIZEREG,t0		# this many to do after we are done
283	PTR_SUBU	a3,SRCREG,a3		# stop point
284
2851:
286	REG_LHI		t3,-SZREG(SRCREG)
287	REG_LLO		t3,-1(SRCREG)
288	PTR_SUBU	SRCREG,SZREG
289	REG_S		t3,-SZREG(DSTREG)
290	bne		SRCREG,a3,1b
291	PTR_SUBU	DSTREG,SZREG
292
293	b		3b
294	nop
295
296	.set	reorder
297	.set	at
298	END(FUNCTION)
299