support.s revision 22975
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	$Id$
34 */
35
36#include "opt_cpu.h"
37
38#include <machine/asmacros.h>
39#include <machine/cputypes.h>
40#include <machine/pmap.h>
41#include <machine/specialreg.h>
42
43#include "assym.s"
44
45#define KDSEL		0x10			/* kernel data selector */
46#define IDXSHIFT	10
47
48	.data
49	.globl	_bcopy_vector
50_bcopy_vector:
51	.long	_generic_bcopy
52	.globl	_bzero
53_bzero:
54	.long	_generic_bzero
55	.globl	_copyin_vector
56_copyin_vector:
57	.long	_generic_copyin
58	.globl	_copyout_vector
59_copyout_vector:
60	.long	_generic_copyout
61	.globl	_ovbcopy_vector
62_ovbcopy_vector:
63	.long	_generic_bcopy
64kernel_fpu_lock:
65	.byte	0xfe
66	.space	3
67
68	.text
69
70/*
71 * bcopy family
72 * void bzero(void *buf, u_int len)
73 */
74
75ENTRY(generic_bzero)
76	pushl	%edi
77	movl	8(%esp),%edi
78	movl	12(%esp),%ecx
79	xorl	%eax,%eax
80	shrl	$2,%ecx
81	cld
82	rep
83	stosl
84	movl	12(%esp),%ecx
85	andl	$3,%ecx
86	rep
87	stosb
88	popl	%edi
89	ret
90
91#if defined(I486_CPU)
92ENTRY(i486_bzero)
93	movl	4(%esp),%edx
94	movl	8(%esp),%ecx
95	xorl	%eax,%eax
96/*
97 * do 64 byte chunks first
98 *
99 * XXX this is probably over-unrolled at least for DX2's
100 */
1012:
102	cmpl	$64,%ecx
103	jb	3f
104	movl	%eax,(%edx)
105	movl	%eax,4(%edx)
106	movl	%eax,8(%edx)
107	movl	%eax,12(%edx)
108	movl	%eax,16(%edx)
109	movl	%eax,20(%edx)
110	movl	%eax,24(%edx)
111	movl	%eax,28(%edx)
112	movl	%eax,32(%edx)
113	movl	%eax,36(%edx)
114	movl	%eax,40(%edx)
115	movl	%eax,44(%edx)
116	movl	%eax,48(%edx)
117	movl	%eax,52(%edx)
118	movl	%eax,56(%edx)
119	movl	%eax,60(%edx)
120	addl	$64,%edx
121	subl	$64,%ecx
122	jnz	2b
123	ret
124
125/*
126 * do 16 byte chunks
127 */
128	SUPERALIGN_TEXT
1293:
130	cmpl	$16,%ecx
131	jb	4f
132	movl	%eax,(%edx)
133	movl	%eax,4(%edx)
134	movl	%eax,8(%edx)
135	movl	%eax,12(%edx)
136	addl	$16,%edx
137	subl	$16,%ecx
138	jnz	3b
139	ret
140
141/*
142 * do 4 byte chunks
143 */
144	SUPERALIGN_TEXT
1454:
146	cmpl	$4,%ecx
147	jb	5f
148	movl	%eax,(%edx)
149	addl	$4,%edx
150	subl	$4,%ecx
151	jnz	4b
152	ret
153
154/*
155 * do 1 byte chunks
156 * a jump table seems to be faster than a loop or more range reductions
157 *
158 * XXX need a const section for non-text
159 */
160	.data
161jtab:
162	.long	do0
163	.long	do1
164	.long	do2
165	.long	do3
166
167	.text
168	SUPERALIGN_TEXT
1695:
170	jmp	jtab(,%ecx,4)
171
172	SUPERALIGN_TEXT
173do3:
174	movw	%ax,(%edx)
175	movb	%al,2(%edx)
176	ret
177
178	SUPERALIGN_TEXT
179do2:
180	movw	%ax,(%edx)
181	ret
182
183	SUPERALIGN_TEXT
184do1:
185	movb	%al,(%edx)
186	ret
187
188	SUPERALIGN_TEXT
189do0:
190	ret
191#endif
192
193#ifdef I586_CPU
194ENTRY(i586_bzero)
195	movl	4(%esp),%edx
196	movl	8(%esp),%ecx
197
198	/*
199	 * The FPU register method is twice as fast as the integer register
200	 * method unless the target is in the L1 cache and we pre-allocate a
201	 * cache line for it (then the integer register method is 4-5 times
202	 * faster).  However, we never pre-allocate cache lines, since that
203	 * would make the integer method 25% or more slower for the common
204	 * case when the target isn't in either the L1 cache or the L2 cache.
205	 * Thus we normally use the FPU register method unless the overhead
206	 * would be too large.
207	 */
208	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
209	jb	intreg_i586_bzero
210
211	/*
212	 * The FPU registers may belong to an application or to fastmove()
213	 * or to another invocation of bcopy() or ourself in a higher level
214	 * interrupt or trap handler.  Preserving the registers is
215	 * complicated since we avoid it if possible at all levels.  We
216	 * want to localize the complications even when that increases them.
217	 * Here the extra work involves preserving CR0_TS in TS.
218	 * `npxproc != NULL' is supposed to be the condition that all the
219	 * FPU resources belong to an application, but npxproc and CR0_TS
220	 * aren't set atomically enough for this condition to work in
221	 * interrupt handlers.
222	 *
223	 * Case 1: FPU registers belong to the application: we must preserve
224	 * the registers if we use them, so we only use the FPU register
225	 * method if the target size is large enough to amortize the extra
226	 * overhead for preserving them.  CR0_TS must be preserved although
227	 * it is very likely to end up as set.
228	 *
229	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
230	 * makes the registers look like they belong to an application so
231	 * that cpu_switch() and savectx() don't have to know about it, so
232	 * this case reduces to case 1.
233	 *
234	 * Case 3: FPU registers belong to the kernel: don't use the FPU
235	 * register method.  This case is unlikely, and supporting it would
236	 * be more complicated and might take too much stack.
237	 *
238	 * Case 4: FPU registers don't belong to anyone: the FPU registers
239	 * don't need to be preserved, so we always use the FPU register
240	 * method.  CR0_TS must be preserved although it is very likely to
241	 * always end up as clear.
242	 */
243	cmpl	$0,_npxproc
244	je	i586_bz1
245	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
246	jb	intreg_i586_bzero
247	sarb	$1,kernel_fpu_lock
248	jc	intreg_i586_bzero
249	smsw	%ax
250	clts
251	subl	$108,%esp
252	fnsave	0(%esp)
253	jmp	i586_bz2
254
255i586_bz1:
256	sarb	$1,kernel_fpu_lock
257	jc	intreg_i586_bzero
258	smsw	%ax
259	clts
260	fninit				/* XXX should avoid needing this */
261i586_bz2:
262	fldz
263
264	/*
265	 * Align to an 8 byte boundary (misalignment in the main loop would
266	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
267	 * already aligned) by always zeroing 8 bytes and using the part up
268	 * to the _next_ alignment position.
269	 */
270	fstl	0(%edx)
271	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
272	addl	$8,%edx
273	andl	$~7,%edx
274	subl	%edx,%ecx
275
276	/*
277	 * Similarly align `len' to a multiple of 8.
278	 */
279	fstl	-8(%edx,%ecx)
280	decl	%ecx
281	andl	$~7,%ecx
282
283	/*
284	 * This wouldn't be any faster if it were unrolled, since the loop
285	 * control instructions are much faster than the fstl and/or done
286	 * in parallel with it so their overhead is insignificant.
287	 */
288fpureg_i586_bzero_loop:
289	fstl	0(%edx)
290	addl	$8,%edx
291	subl	$8,%ecx
292	cmpl	$8,%ecx
293	jae	fpureg_i586_bzero_loop
294
295	cmpl	$0,_npxproc
296	je	i586_bz3
297	frstor	0(%esp)
298	addl	$108,%esp
299	lmsw	%ax
300	movb	$0xfe,kernel_fpu_lock
301	ret
302
303i586_bz3:
304	fstpl	%st(0)
305	lmsw	%ax
306	movb	$0xfe,kernel_fpu_lock
307	ret
308
309intreg_i586_bzero:
310	/*
311	 * `rep stos' seems to be the best method in practice for small
312	 * counts.  Fancy methods usually take too long to start up due
313	 * to cache and BTB misses.
314	 */
315	pushl	%edi
316	movl	%edx,%edi
317	xorl	%eax,%eax
318	shrl	$2,%ecx
319	cld
320	rep
321	stosl
322	movl	12(%esp),%ecx
323	andl	$3,%ecx
324	jne	1f
325	popl	%edi
326	ret
327
3281:
329	rep
330	stosb
331	popl	%edi
332	ret
333#endif /* I586_CPU */
334
335/* fillw(pat, base, cnt) */
336ENTRY(fillw)
337	pushl	%edi
338	movl	8(%esp),%eax
339	movl	12(%esp),%edi
340	movl	16(%esp),%ecx
341	cld
342	rep
343	stosw
344	popl	%edi
345	ret
346
347ENTRY(bcopyb)
348bcopyb:
349	pushl	%esi
350	pushl	%edi
351	movl	12(%esp),%esi
352	movl	16(%esp),%edi
353	movl	20(%esp),%ecx
354	movl	%edi,%eax
355	subl	%esi,%eax
356	cmpl	%ecx,%eax			/* overlapping && src < dst? */
357	jb	1f
358	cld					/* nope, copy forwards */
359	rep
360	movsb
361	popl	%edi
362	popl	%esi
363	ret
364
365	ALIGN_TEXT
3661:
367	addl	%ecx,%edi			/* copy backwards. */
368	addl	%ecx,%esi
369	decl	%edi
370	decl	%esi
371	std
372	rep
373	movsb
374	popl	%edi
375	popl	%esi
376	cld
377	ret
378
379ENTRY(bcopy)
380	MEXITCOUNT
381	jmp	*_bcopy_vector
382
383ENTRY(ovbcopy)
384	MEXITCOUNT
385	jmp	*_ovbcopy_vector
386
387/*
388 * generic_bcopy(src, dst, cnt)
389 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
390 */
391ENTRY(generic_bcopy)
392	pushl	%esi
393	pushl	%edi
394	movl	12(%esp),%esi
395	movl	16(%esp),%edi
396	movl	20(%esp),%ecx
397
398	movl	%edi,%eax
399	subl	%esi,%eax
400	cmpl	%ecx,%eax			/* overlapping && src < dst? */
401	jb	1f
402
403	shrl	$2,%ecx				/* copy by 32-bit words */
404	cld					/* nope, copy forwards */
405	rep
406	movsl
407	movl	20(%esp),%ecx
408	andl	$3,%ecx				/* any bytes left? */
409	rep
410	movsb
411	popl	%edi
412	popl	%esi
413	ret
414
415	ALIGN_TEXT
4161:
417	addl	%ecx,%edi			/* copy backwards */
418	addl	%ecx,%esi
419	decl	%edi
420	decl	%esi
421	andl	$3,%ecx				/* any fractional bytes? */
422	std
423	rep
424	movsb
425	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
426	shrl	$2,%ecx
427	subl	$3,%esi
428	subl	$3,%edi
429	rep
430	movsl
431	popl	%edi
432	popl	%esi
433	cld
434	ret
435
436#ifdef I586_CPU
437ENTRY(i586_bcopy)
438	pushl	%esi
439	pushl	%edi
440	movl	12(%esp),%esi
441	movl	16(%esp),%edi
442	movl	20(%esp),%ecx
443
444	movl	%edi,%eax
445	subl	%esi,%eax
446	cmpl	%ecx,%eax			/* overlapping && src < dst? */
447	jb	1f
448
449	cmpl	$1024,%ecx
450	jb	small_i586_bcopy
451
452	sarb	$1,kernel_fpu_lock
453	jc	small_i586_bcopy
454	cmpl	$0,_npxproc
455	je	i586_bc1
456	smsw	%dx
457	clts
458	subl	$108,%esp
459	fnsave	0(%esp)
460	jmp	4f
461
462i586_bc1:
463	smsw	%dx
464	clts
465	fninit				/* XXX should avoid needing this */
466
467	ALIGN_TEXT
4684:
469	pushl	%ecx
470#define	DCACHE_SIZE	8192
471	cmpl	$(DCACHE_SIZE-512)/2,%ecx
472	jbe	2f
473	movl	$(DCACHE_SIZE-512)/2,%ecx
4742:
475	subl	%ecx,0(%esp)
476	cmpl	$256,%ecx
477	jb	5f			/* XXX should prefetch if %ecx >= 32 */
478	pushl	%esi
479	pushl	%ecx
480	ALIGN_TEXT
4813:
482	movl	0(%esi),%eax
483	movl	32(%esi),%eax
484	movl	64(%esi),%eax
485	movl	96(%esi),%eax
486	movl	128(%esi),%eax
487	movl	160(%esi),%eax
488	movl	192(%esi),%eax
489	movl	224(%esi),%eax
490	addl	$256,%esi
491	subl	$256,%ecx
492	cmpl	$256,%ecx
493	jae	3b
494	popl	%ecx
495	popl	%esi
4965:
497	ALIGN_TEXT
498large_i586_bcopy_loop:
499	fildq	0(%esi)
500	fildq	8(%esi)
501	fildq	16(%esi)
502	fildq	24(%esi)
503	fildq	32(%esi)
504	fildq	40(%esi)
505	fildq	48(%esi)
506	fildq	56(%esi)
507	fistpq	56(%edi)
508	fistpq	48(%edi)
509	fistpq	40(%edi)
510	fistpq	32(%edi)
511	fistpq	24(%edi)
512	fistpq	16(%edi)
513	fistpq	8(%edi)
514	fistpq	0(%edi)
515	addl	$64,%esi
516	addl	$64,%edi
517	subl	$64,%ecx
518	cmpl	$64,%ecx
519	jae	large_i586_bcopy_loop
520	popl	%eax
521	addl	%eax,%ecx
522	cmpl	$64,%ecx
523	jae	4b
524
525	cmpl	$0,_npxproc
526	je	i586_bc2
527	frstor	0(%esp)
528	addl	$108,%esp
529i586_bc2:
530	lmsw	%dx
531	movb	$0xfe,kernel_fpu_lock
532
533/*
534 * This is a duplicate of the main part of generic_bcopy.  See the comments
535 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
536 * would mess up high resolution profiling.
537 */
538	ALIGN_TEXT
539small_i586_bcopy:
540	shrl	$2,%ecx
541	cld
542	rep
543	movsl
544	movl	20(%esp),%ecx
545	andl	$3,%ecx
546	rep
547	movsb
548	popl	%edi
549	popl	%esi
550	ret
551
552	ALIGN_TEXT
5531:
554	addl	%ecx,%edi
555	addl	%ecx,%esi
556	decl	%edi
557	decl	%esi
558	andl	$3,%ecx
559	std
560	rep
561	movsb
562	movl	20(%esp),%ecx
563	shrl	$2,%ecx
564	subl	$3,%esi
565	subl	$3,%edi
566	rep
567	movsl
568	popl	%edi
569	popl	%esi
570	cld
571	ret
572#endif /* I586_CPU */
573
574/*
575 * Note: memcpy does not support overlapping copies
576 */
577ENTRY(memcpy)
578	pushl	%edi
579	pushl	%esi
580	movl	12(%esp),%edi
581	movl	16(%esp),%esi
582	movl	20(%esp),%ecx
583	movl	%edi,%eax
584	shrl	$2,%ecx				/* copy by 32-bit words */
585	cld					/* nope, copy forwards */
586	rep
587	movsl
588	movl	20(%esp),%ecx
589	andl	$3,%ecx				/* any bytes left? */
590	rep
591	movsb
592	popl	%esi
593	popl	%edi
594	ret
595
596
597/*****************************************************************************/
598/* copyout and fubyte family                                                 */
599/*****************************************************************************/
600/*
601 * Access user memory from inside the kernel. These routines and possibly
602 * the math- and DOS emulators should be the only places that do this.
603 *
604 * We have to access the memory with user's permissions, so use a segment
605 * selector with RPL 3. For writes to user space we have to additionally
606 * check the PTE for write permission, because the 386 does not check
607 * write permissions when we are executing with EPL 0. The 486 does check
608 * this if the WP bit is set in CR0, so we can use a simpler version here.
609 *
610 * These routines set curpcb->onfault for the time they execute. When a
611 * protection violation occurs inside the functions, the trap handler
612 * returns to *curpcb->onfault instead of the function.
613 */
614
615/* copyout(from_kernel, to_user, len) */
616ENTRY(copyout)
617	MEXITCOUNT
618	jmp	*_copyout_vector
619
620ENTRY(generic_copyout)
621	movl	_curpcb,%eax
622	movl	$copyout_fault,PCB_ONFAULT(%eax)
623	pushl	%esi
624	pushl	%edi
625	pushl	%ebx
626	movl	16(%esp),%esi
627	movl	20(%esp),%edi
628	movl	24(%esp),%ebx
629	testl	%ebx,%ebx			/* anything to do? */
630	jz	done_copyout
631
632	/*
633	 * Check explicitly for non-user addresses.  If 486 write protection
634	 * is being used, this check is essential because we are in kernel
635	 * mode so the h/w does not provide any protection against writing
636	 * kernel addresses.
637	 */
638
639	/*
640	 * First, prevent address wrapping.
641	 */
642	movl	%edi,%eax
643	addl	%ebx,%eax
644	jc	copyout_fault
645/*
646 * XXX STOP USING VM_MAXUSER_ADDRESS.
647 * It is an end address, not a max, so every time it is used correctly it
648 * looks like there is an off by one error, and of course it caused an off
649 * by one error in several places.
650 */
651	cmpl	$VM_MAXUSER_ADDRESS,%eax
652	ja	copyout_fault
653
654#if defined(I386_CPU)
655
656#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
657	cmpl	$CPUCLASS_386,_cpu_class
658	jne	3f
659#endif
660/*
661 * We have to check each PTE for user write permission.
662 * The checking may cause a page fault, so it is important to set
663 * up everything for return via copyout_fault before here.
664 */
665	/* compute number of pages */
666	movl	%edi,%ecx
667	andl	$PAGE_MASK,%ecx
668	addl	%ebx,%ecx
669	decl	%ecx
670	shrl	$IDXSHIFT+2,%ecx
671	incl	%ecx
672
673	/* compute PTE offset for start address */
674	movl	%edi,%edx
675	shrl	$IDXSHIFT,%edx
676	andb	$0xfc,%dl
677
6781:
679	/* check PTE for each page */
680	leal	_PTmap(%edx),%eax
681	shrl	$IDXSHIFT,%eax
682	andb	$0xfc,%al
683	testb	$PG_V,_PTmap(%eax)		/* PTE page must be valid */
684	je	4f
685	movb	_PTmap(%edx),%al
686	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
687	cmpb	$PG_V|PG_RW|PG_U,%al
688	je	2f
689
6904:
691	/* simulate a trap */
692	pushl	%edx
693	pushl	%ecx
694	shll	$IDXSHIFT,%edx
695	pushl	%edx
696	call	_trapwrite			/* trapwrite(addr) */
697	popl	%edx
698	popl	%ecx
699	popl	%edx
700
701	testl	%eax,%eax			/* if not ok, return EFAULT */
702	jnz	copyout_fault
703
7042:
705	addl	$4,%edx
706	decl	%ecx
707	jnz	1b				/* check next page */
708#endif /* I386_CPU */
709
710	/* bcopy(%esi, %edi, %ebx) */
7113:
712	movl	%ebx,%ecx
713
714#ifdef I586_CPU
715	ALIGN_TEXT
716slow_copyout:
717#endif
718	shrl	$2,%ecx
719	cld
720	rep
721	movsl
722	movb	%bl,%cl
723	andb	$3,%cl
724	rep
725	movsb
726
727done_copyout:
728	popl	%ebx
729	popl	%edi
730	popl	%esi
731	xorl	%eax,%eax
732	movl	_curpcb,%edx
733	movl	%eax,PCB_ONFAULT(%edx)
734	ret
735
736	ALIGN_TEXT
737copyout_fault:
738	popl	%ebx
739	popl	%edi
740	popl	%esi
741	movl	_curpcb,%edx
742	movl	$0,PCB_ONFAULT(%edx)
743	movl	$EFAULT,%eax
744	ret
745
746#ifdef I586_CPU
747ENTRY(i586_copyout)
748	/*
749	 * Duplicated from generic_copyout.  Could be done a bit better.
750	 */
751	movl	_curpcb,%eax
752	movl	$copyout_fault,PCB_ONFAULT(%eax)
753	pushl	%esi
754	pushl	%edi
755	pushl	%ebx
756	movl	16(%esp),%esi
757	movl	20(%esp),%edi
758	movl	24(%esp),%ebx
759	testl	%ebx,%ebx			/* anything to do? */
760	jz	done_copyout
761
762	/*
763	 * Check explicitly for non-user addresses.  If 486 write protection
764	 * is being used, this check is essential because we are in kernel
765	 * mode so the h/w does not provide any protection against writing
766	 * kernel addresses.
767	 */
768
769	/*
770	 * First, prevent address wrapping.
771	 */
772	movl	%edi,%eax
773	addl	%ebx,%eax
774	jc	copyout_fault
775/*
776 * XXX STOP USING VM_MAXUSER_ADDRESS.
777 * It is an end address, not a max, so every time it is used correctly it
778 * looks like there is an off by one error, and of course it caused an off
779 * by one error in several places.
780 */
781	cmpl	$VM_MAXUSER_ADDRESS,%eax
782	ja	copyout_fault
783
784	/* bcopy(%esi, %edi, %ebx) */
7853:
786	movl	%ebx,%ecx
787	/*
788	 * End of duplicated code.
789	 */
790
791	cmpl	$1024,%ecx
792	jb	slow_copyout
793
794	pushl	%ecx
795	call	_fastmove
796	addl	$4,%esp
797	jmp	done_copyout
798#endif /* I586_CPU */
799
800/* copyin(from_user, to_kernel, len) */
801ENTRY(copyin)
802	MEXITCOUNT
803	jmp	*_copyin_vector
804
805ENTRY(generic_copyin)
806	movl	_curpcb,%eax
807	movl	$copyin_fault,PCB_ONFAULT(%eax)
808	pushl	%esi
809	pushl	%edi
810	movl	12(%esp),%esi			/* caddr_t from */
811	movl	16(%esp),%edi			/* caddr_t to */
812	movl	20(%esp),%ecx			/* size_t  len */
813
814	/*
815	 * make sure address is valid
816	 */
817	movl	%esi,%edx
818	addl	%ecx,%edx
819	jc	copyin_fault
820	cmpl	$VM_MAXUSER_ADDRESS,%edx
821	ja	copyin_fault
822
823#ifdef I586_CPU
824	ALIGN_TEXT
825slow_copyin:
826#endif
827	movb	%cl,%al
828	shrl	$2,%ecx				/* copy longword-wise */
829	cld
830	rep
831	movsl
832	movb	%al,%cl
833	andb	$3,%cl				/* copy remaining bytes */
834	rep
835	movsb
836
837#if defined(I586_CPU)
838	ALIGN_TEXT
839done_copyin:
840#endif /* I586_CPU */
841	popl	%edi
842	popl	%esi
843	xorl	%eax,%eax
844	movl	_curpcb,%edx
845	movl	%eax,PCB_ONFAULT(%edx)
846	ret
847
848	ALIGN_TEXT
849copyin_fault:
850	popl	%edi
851	popl	%esi
852	movl	_curpcb,%edx
853	movl	$0,PCB_ONFAULT(%edx)
854	movl	$EFAULT,%eax
855	ret
856
857#ifdef I586_CPU
858ENTRY(i586_copyin)
859	/*
860	 * Duplicated from generic_copyin.  Could be done a bit better.
861	 */
862	movl	_curpcb,%eax
863	movl	$copyin_fault,PCB_ONFAULT(%eax)
864	pushl	%esi
865	pushl	%edi
866	movl	12(%esp),%esi			/* caddr_t from */
867	movl	16(%esp),%edi			/* caddr_t to */
868	movl	20(%esp),%ecx			/* size_t  len */
869
870	/*
871	 * make sure address is valid
872	 */
873	movl	%esi,%edx
874	addl	%ecx,%edx
875	jc	copyin_fault
876	cmpl	$VM_MAXUSER_ADDRESS,%edx
877	ja	copyin_fault
878	/*
879	 * End of duplicated code.
880	 */
881
882	cmpl	$1024,%ecx
883	jb	slow_copyin
884
885	pushl	%ebx			/* XXX prepare for fastmove_fault */
886	pushl	%ecx
887	call	_fastmove
888	addl	$8,%esp
889	jmp	done_copyin
890#endif /* I586_CPU */
891
892#if defined(I586_CPU)
893/* fastmove(src, dst, len)
894	src in %esi
895	dst in %edi
896	len in %ecx		XXX changed to on stack for profiling
897	uses %eax and %edx for tmp. storage
898 */
899/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
900ENTRY(fastmove)
901	pushl	%ebp
902	movl	%esp,%ebp
903	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
904
905	movl	8(%ebp),%ecx
906	cmpl	$63,%ecx
907	jbe	fastmove_tail
908
909	testl	$7,%esi	/* check if src addr is multiple of 8 */
910	jnz	fastmove_tail
911
912	testl	$7,%edi	/* check if dst addr is multiple of 8 */
913	jnz	fastmove_tail
914
915/* if (npxproc != NULL) { */
916	cmpl	$0,_npxproc
917	je	6f
918/*    fnsave(&curpcb->pcb_savefpu); */
919	movl	_curpcb,%eax
920	fnsave	PCB_SAVEFPU(%eax)
921/*   npxproc = NULL; */
922	movl	$0,_npxproc
923/* } */
9246:
925/* now we own the FPU. */
926
927/*
928 * The process' FP state is saved in the pcb, but if we get
929 * switched, the cpu_switch() will store our FP state in the
930 * pcb.  It should be possible to avoid all the copying for
931 * this, e.g., by setting a flag to tell cpu_switch() to
932 * save the state somewhere else.
933 */
934/* tmp = curpcb->pcb_savefpu; */
935	movl	%ecx,-12(%ebp)
936	movl	%esi,-8(%ebp)
937	movl	%edi,-4(%ebp)
938	movl	%esp,%edi
939	movl	_curpcb,%esi
940	addl	$PCB_SAVEFPU,%esi
941	cld
942	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
943	rep
944	movsl
945	movl	-12(%ebp),%ecx
946	movl	-8(%ebp),%esi
947	movl	-4(%ebp),%edi
948/* stop_emulating(); */
949	clts
950/* npxproc = curproc; */
951	movl	_curproc,%eax
952	movl	%eax,_npxproc
953	movl	_curpcb,%eax
954	movl	$fastmove_fault,PCB_ONFAULT(%eax)
9554:
956	movl	%ecx,-12(%ebp)
957	cmpl	$1792,%ecx
958	jbe	2f
959	movl	$1792,%ecx
9602:
961	subl	%ecx,-12(%ebp)
962	cmpl	$256,%ecx
963	jb	5f
964	movl	%ecx,-8(%ebp)
965	movl	%esi,-4(%ebp)
966	ALIGN_TEXT
9673:
968	movl	0(%esi),%eax
969	movl	32(%esi),%eax
970	movl	64(%esi),%eax
971	movl	96(%esi),%eax
972	movl	128(%esi),%eax
973	movl	160(%esi),%eax
974	movl	192(%esi),%eax
975	movl	224(%esi),%eax
976	addl	$256,%esi
977	subl	$256,%ecx
978	cmpl	$256,%ecx
979	jae	3b
980	movl	-8(%ebp),%ecx
981	movl	-4(%ebp),%esi
9825:
983	ALIGN_TEXT
984fastmove_loop:
985	fildq	0(%esi)
986	fildq	8(%esi)
987	fildq	16(%esi)
988	fildq	24(%esi)
989	fildq	32(%esi)
990	fildq	40(%esi)
991	fildq	48(%esi)
992	fildq	56(%esi)
993	fistpq	56(%edi)
994	fistpq	48(%edi)
995	fistpq	40(%edi)
996	fistpq	32(%edi)
997	fistpq	24(%edi)
998	fistpq	16(%edi)
999	fistpq	8(%edi)
1000	fistpq	0(%edi)
1001	addl	$-64,%ecx
1002	addl	$64,%esi
1003	addl	$64,%edi
1004	cmpl	$63,%ecx
1005	ja	fastmove_loop
1006	movl	-12(%ebp),%eax
1007	addl	%eax,%ecx
1008	cmpl	$64,%ecx
1009	jae	4b
1010
1011/* curpcb->pcb_savefpu = tmp; */
1012	movl	%ecx,-12(%ebp)
1013	movl	%esi,-8(%ebp)
1014	movl	%edi,-4(%ebp)
1015	movl	_curpcb,%edi
1016	addl	$PCB_SAVEFPU,%edi
1017	movl	%esp,%esi
1018	cld
1019	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1020	rep
1021	movsl
1022	movl	-12(%ebp),%ecx
1023	movl	-8(%ebp),%esi
1024	movl	-4(%ebp),%edi
1025
1026/* start_emulating(); */
1027	smsw	%ax
1028	orb	$CR0_TS,%al
1029	lmsw	%ax
1030/* npxproc = NULL; */
1031	movl	$0,_npxproc
1032
1033	ALIGN_TEXT
1034fastmove_tail:
1035	movl	_curpcb,%eax
1036	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1037
1038	movb	%cl,%al
1039	shrl	$2,%ecx				/* copy longword-wise */
1040	cld
1041	rep
1042	movsl
1043	movb	%al,%cl
1044	andb	$3,%cl				/* copy remaining bytes */
1045	rep
1046	movsb
1047
1048	movl	%ebp,%esp
1049	popl	%ebp
1050	ret
1051
1052	ALIGN_TEXT
1053fastmove_fault:
1054	movl	_curpcb,%edi
1055	addl	$PCB_SAVEFPU,%edi
1056	movl	%esp,%esi
1057	cld
1058	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1059	rep
1060	movsl
1061
1062	smsw	%ax
1063	orb	$CR0_TS,%al
1064	lmsw	%ax
1065	movl	$0,_npxproc
1066
1067fastmove_tail_fault:
1068	movl	%ebp,%esp
1069	popl	%ebp
1070	addl	$8,%esp
1071	popl	%ebx
1072	popl	%edi
1073	popl	%esi
1074	movl	_curpcb,%edx
1075	movl	$0,PCB_ONFAULT(%edx)
1076	movl	$EFAULT,%eax
1077	ret
1078#endif /* I586_CPU */
1079
1080/*
1081 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory
1082 */
1083ENTRY(fuword)
1084	movl	_curpcb,%ecx
1085	movl	$fusufault,PCB_ONFAULT(%ecx)
1086	movl	4(%esp),%edx			/* from */
1087
1088	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1089	ja	fusufault
1090
1091	movl	(%edx),%eax
1092	movl	$0,PCB_ONFAULT(%ecx)
1093	ret
1094
1095/*
1096 * These two routines are called from the profiling code, potentially
1097 * at interrupt time. If they fail, that's okay, good things will
1098 * happen later. Fail all the time for now - until the trap code is
1099 * able to deal with this.
1100 */
1101ALTENTRY(suswintr)
1102ENTRY(fuswintr)
1103	movl	$-1,%eax
1104	ret
1105
1106ENTRY(fusword)
1107	movl	_curpcb,%ecx
1108	movl	$fusufault,PCB_ONFAULT(%ecx)
1109	movl	4(%esp),%edx
1110
1111	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1112	ja	fusufault
1113
1114	movzwl	(%edx),%eax
1115	movl	$0,PCB_ONFAULT(%ecx)
1116	ret
1117
1118ENTRY(fubyte)
1119	movl	_curpcb,%ecx
1120	movl	$fusufault,PCB_ONFAULT(%ecx)
1121	movl	4(%esp),%edx
1122
1123	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1124	ja	fusufault
1125
1126	movzbl	(%edx),%eax
1127	movl	$0,PCB_ONFAULT(%ecx)
1128	ret
1129
1130	ALIGN_TEXT
1131fusufault:
1132	movl	_curpcb,%ecx
1133	xorl	%eax,%eax
1134	movl	%eax,PCB_ONFAULT(%ecx)
1135	decl	%eax
1136	ret
1137
1138/*
1139 * su{byte,sword,word}: write a byte (word, longword) to user memory
1140 */
1141ENTRY(suword)
1142	movl	_curpcb,%ecx
1143	movl	$fusufault,PCB_ONFAULT(%ecx)
1144	movl	4(%esp),%edx
1145
1146#if defined(I386_CPU)
1147
1148#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1149	cmpl	$CPUCLASS_386,_cpu_class
1150	jne	2f				/* we only have to set the right segment selector */
1151#endif /* I486_CPU || I586_CPU || I686_CPU */
1152
1153	/* XXX - page boundary crossing is still not handled */
1154	movl	%edx,%eax
1155	shrl	$IDXSHIFT,%edx
1156	andb	$0xfc,%dl
1157
1158	leal	_PTmap(%edx),%ecx
1159	shrl	$IDXSHIFT,%ecx
1160	andb	$0xfc,%cl
1161	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1162	je	4f
1163	movb	_PTmap(%edx),%dl
1164	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1165	cmpb	$PG_V|PG_RW|PG_U,%dl
1166	je	1f
1167
11684:
1169	/* simulate a trap */
1170	pushl	%eax
1171	call	_trapwrite
1172	popl	%edx				/* remove junk parameter from stack */
1173	testl	%eax,%eax
1174	jnz	fusufault
11751:
1176	movl	4(%esp),%edx
1177#endif
1178
11792:
1180	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1181	ja	fusufault
1182
1183	movl	8(%esp),%eax
1184	movl	%eax,(%edx)
1185	xorl	%eax,%eax
1186	movl	_curpcb,%ecx
1187	movl	%eax,PCB_ONFAULT(%ecx)
1188	ret
1189
1190ENTRY(susword)
1191	movl	_curpcb,%ecx
1192	movl	$fusufault,PCB_ONFAULT(%ecx)
1193	movl	4(%esp),%edx
1194
1195#if defined(I386_CPU)
1196
1197#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1198	cmpl	$CPUCLASS_386,_cpu_class
1199	jne	2f
1200#endif /* I486_CPU || I586_CPU || I686_CPU */
1201
1202	/* XXX - page boundary crossing is still not handled */
1203	movl	%edx,%eax
1204	shrl	$IDXSHIFT,%edx
1205	andb	$0xfc,%dl
1206
1207	leal	_PTmap(%edx),%ecx
1208	shrl	$IDXSHIFT,%ecx
1209	andb	$0xfc,%cl
1210	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1211	je	4f
1212	movb	_PTmap(%edx),%dl
1213	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1214	cmpb	$PG_V|PG_RW|PG_U,%dl
1215	je	1f
1216
12174:
1218	/* simulate a trap */
1219	pushl	%eax
1220	call	_trapwrite
1221	popl	%edx				/* remove junk parameter from stack */
1222	testl	%eax,%eax
1223	jnz	fusufault
12241:
1225	movl	4(%esp),%edx
1226#endif
1227
12282:
1229	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1230	ja	fusufault
1231
1232	movw	8(%esp),%ax
1233	movw	%ax,(%edx)
1234	xorl	%eax,%eax
1235	movl	_curpcb,%ecx			/* restore trashed register */
1236	movl	%eax,PCB_ONFAULT(%ecx)
1237	ret
1238
1239ALTENTRY(suibyte)
1240ENTRY(subyte)
1241	movl	_curpcb,%ecx
1242	movl	$fusufault,PCB_ONFAULT(%ecx)
1243	movl	4(%esp),%edx
1244
1245#if defined(I386_CPU)
1246
1247#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1248	cmpl	$CPUCLASS_386,_cpu_class
1249	jne	2f
1250#endif /* I486_CPU || I586_CPU || I686_CPU */
1251
1252	movl	%edx,%eax
1253	shrl	$IDXSHIFT,%edx
1254	andb	$0xfc,%dl
1255
1256	leal	_PTmap(%edx),%ecx
1257	shrl	$IDXSHIFT,%ecx
1258	andb	$0xfc,%cl
1259	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1260	je	4f
1261	movb	_PTmap(%edx),%dl
1262	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1263	cmpb	$PG_V|PG_RW|PG_U,%dl
1264	je	1f
1265
12664:
1267	/* simulate a trap */
1268	pushl	%eax
1269	call	_trapwrite
1270	popl	%edx				/* remove junk parameter from stack */
1271	testl	%eax,%eax
1272	jnz	fusufault
12731:
1274	movl	4(%esp),%edx
1275#endif
1276
12772:
1278	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1279	ja	fusufault
1280
1281	movb	8(%esp),%al
1282	movb	%al,(%edx)
1283	xorl	%eax,%eax
1284	movl	_curpcb,%ecx			/* restore trashed register */
1285	movl	%eax,PCB_ONFAULT(%ecx)
1286	ret
1287
1288/*
1289 * copyinstr(from, to, maxlen, int *lencopied)
1290 *	copy a string from from to to, stop when a 0 character is reached.
1291 *	return ENAMETOOLONG if string is longer than maxlen, and
1292 *	EFAULT on protection violations. If lencopied is non-zero,
1293 *	return the actual length in *lencopied.
1294 */
1295ENTRY(copyinstr)
1296	pushl	%esi
1297	pushl	%edi
1298	movl	_curpcb,%ecx
1299	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1300
1301	movl	12(%esp),%esi			/* %esi = from */
1302	movl	16(%esp),%edi			/* %edi = to */
1303	movl	20(%esp),%edx			/* %edx = maxlen */
1304
1305	movl	$VM_MAXUSER_ADDRESS,%eax
1306
1307	/* make sure 'from' is within bounds */
1308	subl	%esi,%eax
1309	jbe	cpystrflt
1310
1311	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1312	cmpl	%edx,%eax
1313	jae	1f
1314	movl	%eax,%edx
1315	movl	%eax,20(%esp)
13161:
1317	incl	%edx
1318	cld
1319
13202:
1321	decl	%edx
1322	jz	3f
1323
1324	lodsb
1325	stosb
1326	orb	%al,%al
1327	jnz	2b
1328
1329	/* Success -- 0 byte reached */
1330	decl	%edx
1331	xorl	%eax,%eax
1332	jmp	cpystrflt_x
13333:
1334	/* edx is zero - return ENAMETOOLONG or EFAULT */
1335	cmpl	$VM_MAXUSER_ADDRESS,%esi
1336	jae	cpystrflt
13374:
1338	movl	$ENAMETOOLONG,%eax
1339	jmp	cpystrflt_x
1340
1341cpystrflt:
1342	movl	$EFAULT,%eax
1343
1344cpystrflt_x:
1345	/* set *lencopied and return %eax */
1346	movl	_curpcb,%ecx
1347	movl	$0,PCB_ONFAULT(%ecx)
1348	movl	20(%esp),%ecx
1349	subl	%edx,%ecx
1350	movl	24(%esp),%edx
1351	testl	%edx,%edx
1352	jz	1f
1353	movl	%ecx,(%edx)
13541:
1355	popl	%edi
1356	popl	%esi
1357	ret
1358
1359
1360/*
1361 * copystr(from, to, maxlen, int *lencopied)
1362 */
1363ENTRY(copystr)
1364	pushl	%esi
1365	pushl	%edi
1366
1367	movl	12(%esp),%esi			/* %esi = from */
1368	movl	16(%esp),%edi			/* %edi = to */
1369	movl	20(%esp),%edx			/* %edx = maxlen */
1370	incl	%edx
1371	cld
13721:
1373	decl	%edx
1374	jz	4f
1375	lodsb
1376	stosb
1377	orb	%al,%al
1378	jnz	1b
1379
1380	/* Success -- 0 byte reached */
1381	decl	%edx
1382	xorl	%eax,%eax
1383	jmp	6f
13844:
1385	/* edx is zero -- return ENAMETOOLONG */
1386	movl	$ENAMETOOLONG,%eax
1387
13886:
1389	/* set *lencopied and return %eax */
1390	movl	20(%esp),%ecx
1391	subl	%edx,%ecx
1392	movl	24(%esp),%edx
1393	testl	%edx,%edx
1394	jz	7f
1395	movl	%ecx,(%edx)
13967:
1397	popl	%edi
1398	popl	%esi
1399	ret
1400
1401ENTRY(bcmp)
1402	pushl	%edi
1403	pushl	%esi
1404	movl	12(%esp),%edi
1405	movl	16(%esp),%esi
1406	movl	20(%esp),%edx
1407	xorl	%eax,%eax
1408
1409	movl	%edx,%ecx
1410	shrl	$2,%ecx
1411	cld					/* compare forwards */
1412	repe
1413	cmpsl
1414	jne	1f
1415
1416	movl	%edx,%ecx
1417	andl	$3,%ecx
1418	repe
1419	cmpsb
1420	je	2f
14211:
1422	incl	%eax
14232:
1424	popl	%esi
1425	popl	%edi
1426	ret
1427
1428
1429/*
1430 * Handling of special 386 registers and descriptor tables etc
1431 */
1432/* void lgdt(struct region_descriptor *rdp); */
1433ENTRY(lgdt)
1434	/* reload the descriptor table */
1435	movl	4(%esp),%eax
1436	lgdt	(%eax)
1437
1438	/* flush the prefetch q */
1439	jmp	1f
1440	nop
14411:
1442	/* reload "stale" selectors */
1443	movl	$KDSEL,%eax
1444	movl	%ax,%ds
1445	movl	%ax,%es
1446	movl	%ax,%ss
1447
1448	/* reload code selector by turning return into intersegmental return */
1449	movl	(%esp),%eax
1450	pushl	%eax
1451#	movl	$KCSEL,4(%esp)
1452	movl	$8,4(%esp)
1453	lret
1454
1455/*
1456 * void lidt(struct region_descriptor *rdp);
1457 */
1458ENTRY(lidt)
1459	movl	4(%esp),%eax
1460	lidt	(%eax)
1461	ret
1462
1463/*
1464 * void lldt(u_short sel)
1465 */
1466ENTRY(lldt)
1467	lldt	4(%esp)
1468	ret
1469
1470/*
1471 * void ltr(u_short sel)
1472 */
1473ENTRY(ltr)
1474	ltr	4(%esp)
1475	ret
1476
1477/* ssdtosd(*ssdp,*sdp) */
1478ENTRY(ssdtosd)
1479	pushl	%ebx
1480	movl	8(%esp),%ecx
1481	movl	8(%ecx),%ebx
1482	shll	$16,%ebx
1483	movl	(%ecx),%edx
1484	roll	$16,%edx
1485	movb	%dh,%bl
1486	movb	%dl,%bh
1487	rorl	$8,%ebx
1488	movl	4(%ecx),%eax
1489	movw	%ax,%dx
1490	andl	$0xf0000,%eax
1491	orl	%eax,%ebx
1492	movl	12(%esp),%ecx
1493	movl	%edx,(%ecx)
1494	movl	%ebx,4(%ecx)
1495	popl	%ebx
1496	ret
1497
1498/* load_cr0(cr0) */
1499ENTRY(load_cr0)
1500	movl	4(%esp),%eax
1501	movl	%eax,%cr0
1502	ret
1503
1504/* rcr0() */
1505ENTRY(rcr0)
1506	movl	%cr0,%eax
1507	ret
1508
1509/* rcr3() */
1510ENTRY(rcr3)
1511	movl	%cr3,%eax
1512	ret
1513
1514/* void load_cr3(caddr_t cr3) */
1515ENTRY(load_cr3)
1516	movl	4(%esp),%eax
1517	movl	%eax,%cr3
1518	ret
1519
1520
1521/*****************************************************************************/
1522/* setjump, longjump                                                         */
1523/*****************************************************************************/
1524
1525ENTRY(setjmp)
1526	movl	4(%esp),%eax
1527	movl	%ebx,(%eax)			/* save ebx */
1528	movl	%esp,4(%eax)			/* save esp */
1529	movl	%ebp,8(%eax)			/* save ebp */
1530	movl	%esi,12(%eax)			/* save esi */
1531	movl	%edi,16(%eax)			/* save edi */
1532	movl	(%esp),%edx			/* get rta */
1533	movl	%edx,20(%eax)			/* save eip */
1534	xorl	%eax,%eax			/* return(0); */
1535	ret
1536
1537ENTRY(longjmp)
1538	movl	4(%esp),%eax
1539	movl	(%eax),%ebx			/* restore ebx */
1540	movl	4(%eax),%esp			/* restore esp */
1541	movl	8(%eax),%ebp			/* restore ebp */
1542	movl	12(%eax),%esi			/* restore esi */
1543	movl	16(%eax),%edi			/* restore edi */
1544	movl	20(%eax),%edx			/* get rta */
1545	movl	%edx,(%esp)			/* put in return frame */
1546	xorl	%eax,%eax			/* return(1); */
1547	incl	%eax
1548	ret
1549
1550/*
1551 * Here for doing BB-profiling (gcc -a).
1552 * We rely on the "bbset" instead, but need a dummy function.
1553 */
1554NON_GPROF_ENTRY(__bb_init_func)
1555	movl	4(%esp),%eax
1556	movl	$1,(%eax)
1557	.byte	0xc3				/* avoid macro for `ret' */
1558