1162911Ssimon/*	$NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $	*/
2162911Ssimon
3162911Ssimon/*-
4162911Ssimon * Copyright (c) 2002 SHIMIZU Ryo.  All rights reserved.
5162911Ssimon *
6162911Ssimon * Redistribution and use in source and binary forms, with or without
7162911Ssimon * modification, are permitted provided that the following conditions
8162911Ssimon * are met:
9162911Ssimon * 1. Redistributions of source code must retain the above copyright
10296465Sdelphij *    notice, this list of conditions and the following disclaimer.
11162911Ssimon * 2. Redistributions in binary form must reproduce the above copyright
12162911Ssimon *    notice, this list of conditions and the following disclaimer in the
13162911Ssimon *    documentation and/or other materials provided with the distribution.
14162911Ssimon * 3. The name of the author may not be used to endorse or promote products
15162911Ssimon *    derived from this software without specific prior written permission.
16162911Ssimon *
17162911Ssimon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18162911Ssimon * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19162911Ssimon * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20162911Ssimon * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21162911Ssimon * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22162911Ssimon * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23162911Ssimon * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24162911Ssimon * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25162911Ssimon * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26162911Ssimon * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27162911Ssimon */
28162911Ssimon
29162911Ssimon#include <machine/asm.h>
30162911Ssimon
31162911Ssimon#if defined(LIBC_SCCS) && !defined(lint)
32162911Ssimon	RCSID("$NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $")
33162911Ssimon#endif
34162911Ssimon
35162911Ssimon#define	REG_PTR				r0
36162911Ssimon#define	REG_TMP1			r1
37162911Ssimon
38162911Ssimon#ifdef BZERO
39162911Ssimon# define	REG_C			r2
40162911Ssimon# define	REG_DST			r4
41162911Ssimon# define	REG_LEN			r5
42162911Ssimon#else
43162911Ssimon# define	REG_DST0		r3
44162911Ssimon# define	REG_DST			r4
45162911Ssimon# define	REG_C			r5
46162911Ssimon# define	REG_LEN			r6
47162911Ssimon#endif
48162911Ssimon
49162911Ssimon#ifdef BZERO
50162911SsimonENTRY(bzero)
51162911Ssimon#else
52162911SsimonENTRY(memset)
53162911Ssimon	mov	REG_DST,REG_DST0	/* for return value */
54162911Ssimon#endif
55162911Ssimon	/* small amount to fill ? */
56162911Ssimon	mov	#28,REG_TMP1
57296465Sdelphij	cmp/hs	REG_TMP1,REG_LEN	/* if (len >= 28) goto large; */
58162911Ssimon	bt/s	large
59162911Ssimon	mov	#12,REG_TMP1		/* if (len >= 12) goto small; */
60162911Ssimon	cmp/hs	REG_TMP1,REG_LEN
61162911Ssimon	bt/s	small
62162911Ssimon#ifdef BZERO
63162911Ssimon	mov	#0,REG_C
64296465Sdelphij#endif
65162911Ssimon	/* very little fill (0 ~ 11 bytes) */
66162911Ssimon	tst	REG_LEN,REG_LEN
67162911Ssimon	add	REG_DST,REG_LEN
68162911Ssimon	bt/s	done
69162911Ssimon	add	#1,REG_DST
70162911Ssimon
71296465Sdelphij	/* unroll 4 loops */
72162911Ssimon	cmp/eq	REG_DST,REG_LEN
73162911Ssimon1:	mov.b	REG_C,@-REG_LEN
74162911Ssimon	bt/s	done
75162911Ssimon	cmp/eq	REG_DST,REG_LEN
76162911Ssimon	mov.b	REG_C,@-REG_LEN
77162911Ssimon	bt/s	done
78162911Ssimon	cmp/eq	REG_DST,REG_LEN
79162911Ssimon	mov.b	REG_C,@-REG_LEN
80162911Ssimon	bt/s	done
81162911Ssimon	cmp/eq	REG_DST,REG_LEN
82162911Ssimon	mov.b	REG_C,@-REG_LEN
83162911Ssimon	bf/s	1b
84162911Ssimon	cmp/eq	REG_DST,REG_LEN
85162911Ssimondone:
86296465Sdelphij#ifdef BZERO
87162911Ssimon	rts
88162911Ssimon	nop
89296465Sdelphij#else
90162911Ssimon	rts
91162911Ssimon	mov	REG_DST0,r0
92162911Ssimon#endif
93162911Ssimon
94162911Ssimon
95162911Ssimonsmall:
96162911Ssimon	mov	REG_DST,r0
97162911Ssimon	tst	#1,r0
98162911Ssimon	bt/s	small_aligned
99162911Ssimon	mov	REG_DST,REG_TMP1
100162911Ssimon	shll	REG_LEN
101296465Sdelphij	mova	1f,r0			/* 1f must be 4bytes aligned! */
102162911Ssimon	add	#16,REG_TMP1		/* REG_TMP1 = dst+16; */
103162911Ssimon	sub	REG_LEN,r0
104162911Ssimon	jmp	@r0
105162911Ssimon	mov	REG_C,r0
106162911Ssimon
107162911Ssimon	.align	2
108162911Ssimon	mov.b	r0,@(15,REG_TMP1)
109162911Ssimon	mov.b	r0,@(14,REG_TMP1)
110162911Ssimon	mov.b	r0,@(13,REG_TMP1)
111162911Ssimon	mov.b	r0,@(12,REG_TMP1)
112162911Ssimon	mov.b	r0,@(11,REG_TMP1)
113162911Ssimon	mov.b	r0,@(10,REG_TMP1)
114162911Ssimon	mov.b	r0,@(9,REG_TMP1)
115162911Ssimon	mov.b	r0,@(8,REG_TMP1)
116162911Ssimon	mov.b	r0,@(7,REG_TMP1)
117162911Ssimon	mov.b	r0,@(6,REG_TMP1)
118162911Ssimon	mov.b	r0,@(5,REG_TMP1)
119162911Ssimon	mov.b	r0,@(4,REG_TMP1)
120296465Sdelphij	mov.b	r0,@(3,REG_TMP1)
121296465Sdelphij	mov.b	r0,@(2,REG_TMP1)
122296465Sdelphij	mov.b	r0,@(1,REG_TMP1)
123296465Sdelphij	mov.b	r0,@REG_TMP1
124162911Ssimon	mov.b	r0,@(15,REG_DST)
125162911Ssimon	mov.b	r0,@(14,REG_DST)
126162911Ssimon	mov.b	r0,@(13,REG_DST)
127296465Sdelphij	mov.b	r0,@(12,REG_DST)
128296465Sdelphij	mov.b	r0,@(11,REG_DST)
129296465Sdelphij	mov.b	r0,@(10,REG_DST)
130296465Sdelphij	mov.b	r0,@(9,REG_DST)
131162911Ssimon	mov.b	r0,@(8,REG_DST)
132296465Sdelphij	mov.b	r0,@(7,REG_DST)
133296465Sdelphij	mov.b	r0,@(6,REG_DST)
134296465Sdelphij	mov.b	r0,@(5,REG_DST)
135162911Ssimon	mov.b	r0,@(4,REG_DST)
136296465Sdelphij	mov.b	r0,@(3,REG_DST)
137162911Ssimon	mov.b	r0,@(2,REG_DST)
138296465Sdelphij	mov.b	r0,@(1,REG_DST)
139162911Ssimon#ifdef BZERO
140296465Sdelphij	rts
141296465Sdelphij1:	mov.b	r0,@REG_DST
142296465Sdelphij#else
143296465Sdelphij	mov.b	r0,@REG_DST
144296465Sdelphij1:	rts
145296465Sdelphij	mov	REG_DST0,r0
146296465Sdelphij#endif
147296465Sdelphij
148296465Sdelphij
149296465Sdelphij/* 2 bytes aligned small fill */
150296465Sdelphijsmall_aligned:
151296465Sdelphij#ifndef BZERO
152296465Sdelphij	extu.b	REG_C,REG_TMP1		/* REG_C = ??????xx, REG_TMP1 = ????00xx */
153296465Sdelphij	shll8	REG_C			/* REG_C = ????xx00, REG_TMP1 = ????00xx */
154296465Sdelphij	or	REG_TMP1,REG_C		/* REG_C = ????xxxx */
155296465Sdelphij#endif
156296465Sdelphij
157296465Sdelphij	mov	REG_LEN,r0
158296465Sdelphij	tst	#1,r0			/* len is aligned? */
159162911Ssimon	bt/s	1f
160296465Sdelphij	add	#-1,r0
161296465Sdelphij	mov.b	REG_C,@(r0,REG_DST)	/* fill last a byte */
162162911Ssimon	mov	r0,REG_LEN
163296465Sdelphij1:
164296465Sdelphij
165296465Sdelphij	mova	1f,r0			/* 1f must be 4bytes aligned! */
166296465Sdelphij	sub	REG_LEN,r0
167296465Sdelphij	jmp	@r0
168296465Sdelphij	mov	REG_C,r0
169296465Sdelphij
170296465Sdelphij	.align	2
171296465Sdelphij	mov.w	r0,@(30,REG_DST)
172296465Sdelphij	mov.w	r0,@(28,REG_DST)
173162911Ssimon	mov.w	r0,@(26,REG_DST)
174296465Sdelphij	mov.w	r0,@(24,REG_DST)
175296465Sdelphij	mov.w	r0,@(22,REG_DST)
176162911Ssimon	mov.w	r0,@(20,REG_DST)
177296465Sdelphij	mov.w	r0,@(18,REG_DST)
178296465Sdelphij	mov.w	r0,@(16,REG_DST)
179296465Sdelphij	mov.w	r0,@(14,REG_DST)
180296465Sdelphij	mov.w	r0,@(12,REG_DST)
181296465Sdelphij	mov.w	r0,@(10,REG_DST)
182296465Sdelphij	mov.w	r0,@(8,REG_DST)
183296465Sdelphij	mov.w	r0,@(6,REG_DST)
184296465Sdelphij	mov.w	r0,@(4,REG_DST)
185296465Sdelphij	mov.w	r0,@(2,REG_DST)
186296465Sdelphij#ifdef BZERO
187296465Sdelphij	rts
188296465Sdelphij1:	mov.w	r0,@REG_DST
189296465Sdelphij#else
190296465Sdelphij	mov.w	r0,@REG_DST
191296465Sdelphij1:	rts
192296465Sdelphij	mov	REG_DST0,r0
193296465Sdelphij#endif
194296465Sdelphij
195296465Sdelphij
196162911Ssimon
197296465Sdelphij	.align	2
198296465Sdelphijlarge:
199162911Ssimon#ifdef BZERO
200162911Ssimon	mov	#0,REG_C
201162911Ssimon#else
202296465Sdelphij	extu.b	REG_C,REG_TMP1		/* REG_C = ??????xx, REG_TMP1 = ????00xx */
203296465Sdelphij	shll8	REG_C			/* REG_C = ????xx00, REG_TMP1 = ????00xx */
204296465Sdelphij	or	REG_C,REG_TMP1		/* REG_C = ????xx00, REG_TMP1 = ????xxxx */
205296465Sdelphij	swap.w	REG_TMP1,REG_C		/* REG_C = xxxx????, REG_TMP1 = ????xxxx */
206296465Sdelphij	xtrct	REG_TMP1,REG_C		/* REG_C = xxxxxxxx */
207296465Sdelphij#endif
208162911Ssimon
209296465Sdelphij	mov	#3,REG_TMP1
210296465Sdelphij	tst	REG_TMP1,REG_DST
211162911Ssimon	mov	REG_DST,REG_PTR
212296465Sdelphij	bf/s	unaligned_dst
213296465Sdelphij	add	REG_LEN,REG_PTR		/* REG_PTR = dst + len; */
214296465Sdelphij	tst	REG_TMP1,REG_LEN
215296465Sdelphij	bf/s	unaligned_len
216296465Sdelphij
217296465Sdelphijaligned:
218296465Sdelphij	/* fill 32*n bytes */
219296465Sdelphij	mov	#32,REG_TMP1
220162911Ssimon	cmp/hi	REG_LEN,REG_TMP1
221162911Ssimon	bt	9f
222296465Sdelphij	.align	2
223296465Sdelphij1:	sub	REG_TMP1,REG_PTR
224296465Sdelphij	mov.l	REG_C,@REG_PTR
225296465Sdelphij	sub	REG_TMP1,REG_LEN
226296465Sdelphij	mov.l	REG_C,@(4,REG_PTR)
227162911Ssimon	cmp/hi	REG_LEN,REG_TMP1
228296465Sdelphij	mov.l	REG_C,@(8,REG_PTR)
229296465Sdelphij	mov.l	REG_C,@(12,REG_PTR)
230162911Ssimon	mov.l	REG_C,@(16,REG_PTR)
231296465Sdelphij	mov.l	REG_C,@(20,REG_PTR)
232296465Sdelphij	mov.l	REG_C,@(24,REG_PTR)
233296465Sdelphij	bf/s	1b
234	mov.l	REG_C,@(28,REG_PTR)
2359:
236
237	/* fill left 4*n bytes */
238	cmp/eq	REG_DST,REG_PTR
239	bt	9f
240	add	#4,REG_DST
241	cmp/eq	REG_DST,REG_PTR
2421:	mov.l	REG_C,@-REG_PTR
243	bt/s	9f
244	cmp/eq	REG_DST,REG_PTR
245	mov.l	REG_C,@-REG_PTR
246	bt/s	9f
247	cmp/eq	REG_DST,REG_PTR
248	mov.l	REG_C,@-REG_PTR
249	bt/s	9f
250	cmp/eq	REG_DST,REG_PTR
251	mov.l	REG_C,@-REG_PTR
252	bf/s	1b
253	cmp/eq	REG_DST,REG_PTR
2549:
255#ifdef BZERO
256	rts
257	nop
258#else
259	rts
260	mov	REG_DST0,r0
261#endif
262
263
264unaligned_dst:
265	mov	#1,REG_TMP1
266	tst	REG_TMP1,REG_DST	/* if (dst & 1) {               */
267	add	#1,REG_TMP1
268	bt/s	2f
269	tst	REG_TMP1,REG_DST
270	mov.b	REG_C,@REG_DST		/*   *dst++ = c;                */
271	add	#1,REG_DST
272	tst	REG_TMP1,REG_DST
2732:					/* }                            */
274					/* if (dst & 2) {               */
275	bt	4f
276	mov.w	REG_C,@REG_DST		/*   *(uint16_t*)dst++ = c;    */
277	add	#2,REG_DST
2784:					/* }                            */
279
280
281	tst	#3,REG_PTR		/* if (ptr & 3) {               */
282	bt/s	4f			/*                              */
283unaligned_len:
284	tst	#1,REG_PTR		/*   if (ptr & 1) {             */
285	bt/s	2f
286	tst	#2,REG_PTR
287	mov.b	REG_C,@-REG_PTR		/*     --ptr = c;               */
2882:					/*   }                          */
289					/*   if (ptr & 2) {             */
290	bt	4f
291	mov.w	REG_C,@-REG_PTR		/*     *--(uint16_t*)ptr = c;  */
2924:					/*   }                          */
293					/* }                            */
294
295	mov	REG_PTR,REG_LEN
296	bra	aligned
297	sub	REG_DST,REG_LEN
298
299