support.s revision 21944
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	$FreeBSD: head/sys/i386/i386/support.s 21944 1997-01-22 06:15:27Z dyson $
34 */
35
36#include "opt_cpu.h"
37
38#include <machine/asmacros.h>
39#include <machine/cputypes.h>
40#include <machine/specialreg.h>
41
42#include "assym.s"
43
44#define KDSEL		0x10			/* kernel data selector */
45#define IDXSHIFT	10
46
47	.data
48	.globl	_bcopy_vector
49_bcopy_vector:
50	.long	_generic_bcopy
51	.globl	_bzero
52_bzero:
53	.long	_generic_bzero
54	.globl	_copyin_vector
55_copyin_vector:
56	.long	_generic_copyin
57	.globl	_copyout_vector
58_copyout_vector:
59	.long	_generic_copyout
60	.globl	_ovbcopy_vector
61_ovbcopy_vector:
62	.long	_generic_bcopy
63kernel_fpu_lock:
64	.byte	0xfe
65	.space	3
66
67	.text
68
69/*
70 * bcopy family
71 * void bzero(void *buf, u_int len)
72 */
73
74ENTRY(generic_bzero)
75	pushl	%edi
76	movl	8(%esp),%edi
77	movl	12(%esp),%ecx
78	xorl	%eax,%eax
79	shrl	$2,%ecx
80	cld
81	rep
82	stosl
83	movl	12(%esp),%ecx
84	andl	$3,%ecx
85	rep
86	stosb
87	popl	%edi
88	ret
89
90#if defined(I486_CPU)
91ENTRY(i486_bzero)
92	movl	4(%esp),%edx
93	movl	8(%esp),%ecx
94	xorl	%eax,%eax
95/*
96 * do 64 byte chunks first
97 *
98 * XXX this is probably over-unrolled at least for DX2's
99 */
1002:
101	cmpl	$64,%ecx
102	jb	3f
103	movl	%eax,(%edx)
104	movl	%eax,4(%edx)
105	movl	%eax,8(%edx)
106	movl	%eax,12(%edx)
107	movl	%eax,16(%edx)
108	movl	%eax,20(%edx)
109	movl	%eax,24(%edx)
110	movl	%eax,28(%edx)
111	movl	%eax,32(%edx)
112	movl	%eax,36(%edx)
113	movl	%eax,40(%edx)
114	movl	%eax,44(%edx)
115	movl	%eax,48(%edx)
116	movl	%eax,52(%edx)
117	movl	%eax,56(%edx)
118	movl	%eax,60(%edx)
119	addl	$64,%edx
120	subl	$64,%ecx
121	jnz	2b
122	ret
123
124/*
125 * do 16 byte chunks
126 */
127	SUPERALIGN_TEXT
1283:
129	cmpl	$16,%ecx
130	jb	4f
131	movl	%eax,(%edx)
132	movl	%eax,4(%edx)
133	movl	%eax,8(%edx)
134	movl	%eax,12(%edx)
135	addl	$16,%edx
136	subl	$16,%ecx
137	jnz	3b
138	ret
139
140/*
141 * do 4 byte chunks
142 */
143	SUPERALIGN_TEXT
1444:
145	cmpl	$4,%ecx
146	jb	5f
147	movl	%eax,(%edx)
148	addl	$4,%edx
149	subl	$4,%ecx
150	jnz	4b
151	ret
152
153/*
154 * do 1 byte chunks
155 * a jump table seems to be faster than a loop or more range reductions
156 *
157 * XXX need a const section for non-text
158 */
159	.data
160jtab:
161	.long	do0
162	.long	do1
163	.long	do2
164	.long	do3
165
166	.text
167	SUPERALIGN_TEXT
1685:
169	jmp	jtab(,%ecx,4)
170
171	SUPERALIGN_TEXT
172do3:
173	movw	%ax,(%edx)
174	movb	%al,2(%edx)
175	ret
176
177	SUPERALIGN_TEXT
178do2:
179	movw	%ax,(%edx)
180	ret
181
182	SUPERALIGN_TEXT
183do1:
184	movb	%al,(%edx)
185	ret
186
187	SUPERALIGN_TEXT
188do0:
189	ret
190#endif
191
192#ifdef I586_CPU
193ENTRY(i586_bzero)
194	movl	4(%esp),%edx
195	movl	8(%esp),%ecx
196
197	/*
198	 * The FPU register method is twice as fast as the integer register
199	 * method unless the target is in the L1 cache and we pre-allocate a
200	 * cache line for it (then the integer register method is 4-5 times
201	 * faster).  However, we never pre-allocate cache lines, since that
202	 * would make the integer method 25% or more slower for the common
203	 * case when the target isn't in either the L1 cache or the L2 cache.
204	 * Thus we normally use the FPU register method unless the overhead
205	 * would be too large.
206	 */
207	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
208	jb	intreg_i586_bzero
209
210	/*
211	 * The FPU registers may belong to an application or to fastmove()
212	 * or to another invocation of bcopy() or ourself in a higher level
213	 * interrupt or trap handler.  Preserving the registers is
214	 * complicated since we avoid it if possible at all levels.  We
215	 * want to localize the complications even when that increases them.
216	 * Here the extra work involves preserving CR0_TS in TS.
217	 * `npxproc != NULL' is supposed to be the condition that all the
218	 * FPU resources belong to an application, but npxproc and CR0_TS
219	 * aren't set atomically enough for this condition to work in
220	 * interrupt handlers.
221	 *
222	 * Case 1: FPU registers belong to the application: we must preserve
223	 * the registers if we use them, so we only use the FPU register
224	 * method if the target size is large enough to amortize the extra
225	 * overhead for preserving them.  CR0_TS must be preserved although
226	 * it is very likely to end up as set.
227	 *
228	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
229	 * makes the registers look like they belong to an application so
230	 * that cpu_switch() and savectx() don't have to know about it, so
231	 * this case reduces to case 1.
232	 *
233	 * Case 3: FPU registers belong to the kernel: don't use the FPU
234	 * register method.  This case is unlikely, and supporting it would
235	 * be more complicated and might take too much stack.
236	 *
237	 * Case 4: FPU registers don't belong to anyone: the FPU registers
238	 * don't need to be preserved, so we always use the FPU register
239	 * method.  CR0_TS must be preserved although it is very likely to
240	 * always end up as clear.
241	 */
242	cmpl	$0,_npxproc
243	je	i586_bz1
244	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
245	jb	intreg_i586_bzero
246	sarb	$1,kernel_fpu_lock
247	jc	intreg_i586_bzero
248	smsw	%ax
249	clts
250	subl	$108,%esp
251	fnsave	0(%esp)
252	jmp	i586_bz2
253
254i586_bz1:
255	sarb	$1,kernel_fpu_lock
256	jc	intreg_i586_bzero
257	smsw	%ax
258	clts
259	fninit				/* XXX should avoid needing this */
260i586_bz2:
261	fldz
262
263	/*
264	 * Align to an 8 byte boundary (misalignment in the main loop would
265	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
266	 * already aligned) by always zeroing 8 bytes and using the part up
267	 * to the _next_ alignment position.
268	 */
269	fstl	0(%edx)
270	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
271	addl	$8,%edx
272	andl	$~7,%edx
273	subl	%edx,%ecx
274
275	/*
276	 * Similarly align `len' to a multiple of 8.
277	 */
278	fstl	-8(%edx,%ecx)
279	decl	%ecx
280	andl	$~7,%ecx
281
282	/*
283	 * This wouldn't be any faster if it were unrolled, since the loop
284	 * control instructions are much faster than the fstl and/or done
285	 * in parallel with it so their overhead is insignificant.
286	 */
287fpureg_i586_bzero_loop:
288	fstl	0(%edx)
289	addl	$8,%edx
290	subl	$8,%ecx
291	cmpl	$8,%ecx
292	jae	fpureg_i586_bzero_loop
293
294	cmpl	$0,_npxproc
295	je	i586_bz3
296	frstor	0(%esp)
297	addl	$108,%esp
298	lmsw	%ax
299	movb	$0xfe,kernel_fpu_lock
300	ret
301
302i586_bz3:
303	fstpl	%st(0)
304	lmsw	%ax
305	movb	$0xfe,kernel_fpu_lock
306	ret
307
308intreg_i586_bzero:
309	/*
310	 * `rep stos' seems to be the best method in practice for small
311	 * counts.  Fancy methods usually take too long to start up due
312	 * to cache and BTB misses.
313	 */
314	pushl	%edi
315	movl	%edx,%edi
316	xorl	%eax,%eax
317	shrl	$2,%ecx
318	cld
319	rep
320	stosl
321	movl	12(%esp),%ecx
322	andl	$3,%ecx
323	jne	1f
324	popl	%edi
325	ret
326
3271:
328	rep
329	stosb
330	popl	%edi
331	ret
332#endif /* I586_CPU */
333
334/* fillw(pat, base, cnt) */
335ENTRY(fillw)
336	pushl	%edi
337	movl	8(%esp),%eax
338	movl	12(%esp),%edi
339	movl	16(%esp),%ecx
340	cld
341	rep
342	stosw
343	popl	%edi
344	ret
345
346ENTRY(bcopyb)
347bcopyb:
348	pushl	%esi
349	pushl	%edi
350	movl	12(%esp),%esi
351	movl	16(%esp),%edi
352	movl	20(%esp),%ecx
353	movl	%edi,%eax
354	subl	%esi,%eax
355	cmpl	%ecx,%eax			/* overlapping && src < dst? */
356	jb	1f
357	cld					/* nope, copy forwards */
358	rep
359	movsb
360	popl	%edi
361	popl	%esi
362	ret
363
364	ALIGN_TEXT
3651:
366	addl	%ecx,%edi			/* copy backwards. */
367	addl	%ecx,%esi
368	decl	%edi
369	decl	%esi
370	std
371	rep
372	movsb
373	popl	%edi
374	popl	%esi
375	cld
376	ret
377
378ENTRY(bcopy)
379	MEXITCOUNT
380	jmp	*_bcopy_vector
381
382ENTRY(ovbcopy)
383	MEXITCOUNT
384	jmp	*_ovbcopy_vector
385
386/*
387 * generic_bcopy(src, dst, cnt)
388 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
389 */
390ENTRY(generic_bcopy)
391	pushl	%esi
392	pushl	%edi
393	movl	12(%esp),%esi
394	movl	16(%esp),%edi
395	movl	20(%esp),%ecx
396
397	movl	%edi,%eax
398	subl	%esi,%eax
399	cmpl	%ecx,%eax			/* overlapping && src < dst? */
400	jb	1f
401
402	shrl	$2,%ecx				/* copy by 32-bit words */
403	cld					/* nope, copy forwards */
404	rep
405	movsl
406	movl	20(%esp),%ecx
407	andl	$3,%ecx				/* any bytes left? */
408	rep
409	movsb
410	popl	%edi
411	popl	%esi
412	ret
413
414	ALIGN_TEXT
4151:
416	addl	%ecx,%edi			/* copy backwards */
417	addl	%ecx,%esi
418	decl	%edi
419	decl	%esi
420	andl	$3,%ecx				/* any fractional bytes? */
421	std
422	rep
423	movsb
424	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
425	shrl	$2,%ecx
426	subl	$3,%esi
427	subl	$3,%edi
428	rep
429	movsl
430	popl	%edi
431	popl	%esi
432	cld
433	ret
434
435#ifdef I586_CPU
436ENTRY(i586_bcopy)
437	pushl	%esi
438	pushl	%edi
439	movl	12(%esp),%esi
440	movl	16(%esp),%edi
441	movl	20(%esp),%ecx
442
443	movl	%edi,%eax
444	subl	%esi,%eax
445	cmpl	%ecx,%eax			/* overlapping && src < dst? */
446	jb	1f
447
448	cmpl	$1024,%ecx
449	jb	small_i586_bcopy
450
451	sarb	$1,kernel_fpu_lock
452	jc	small_i586_bcopy
453	cmpl	$0,_npxproc
454	je	i586_bc1
455	smsw	%dx
456	clts
457	subl	$108,%esp
458	fnsave	0(%esp)
459	jmp	4f
460
461i586_bc1:
462	smsw	%dx
463	clts
464	fninit				/* XXX should avoid needing this */
465
466	ALIGN_TEXT
4674:
468	pushl	%ecx
469#define	DCACHE_SIZE	8192
470	cmpl	$(DCACHE_SIZE-512)/2,%ecx
471	jbe	2f
472	movl	$(DCACHE_SIZE-512)/2,%ecx
4732:
474	subl	%ecx,0(%esp)
475	cmpl	$256,%ecx
476	jb	5f			/* XXX should prefetch if %ecx >= 32 */
477	pushl	%esi
478	pushl	%ecx
479	ALIGN_TEXT
4803:
481	movl	0(%esi),%eax
482	movl	32(%esi),%eax
483	movl	64(%esi),%eax
484	movl	96(%esi),%eax
485	movl	128(%esi),%eax
486	movl	160(%esi),%eax
487	movl	192(%esi),%eax
488	movl	224(%esi),%eax
489	addl	$256,%esi
490	subl	$256,%ecx
491	cmpl	$256,%ecx
492	jae	3b
493	popl	%ecx
494	popl	%esi
4955:
496	ALIGN_TEXT
497large_i586_bcopy_loop:
498	fildq	0(%esi)
499	fildq	8(%esi)
500	fildq	16(%esi)
501	fildq	24(%esi)
502	fildq	32(%esi)
503	fildq	40(%esi)
504	fildq	48(%esi)
505	fildq	56(%esi)
506	fistpq	56(%edi)
507	fistpq	48(%edi)
508	fistpq	40(%edi)
509	fistpq	32(%edi)
510	fistpq	24(%edi)
511	fistpq	16(%edi)
512	fistpq	8(%edi)
513	fistpq	0(%edi)
514	addl	$64,%esi
515	addl	$64,%edi
516	subl	$64,%ecx
517	cmpl	$64,%ecx
518	jae	large_i586_bcopy_loop
519	popl	%eax
520	addl	%eax,%ecx
521	cmpl	$64,%ecx
522	jae	4b
523
524	cmpl	$0,_npxproc
525	je	i586_bc2
526	frstor	0(%esp)
527	addl	$108,%esp
528i586_bc2:
529	lmsw	%dx
530	movb	$0xfe,kernel_fpu_lock
531
532/*
533 * This is a duplicate of the main part of generic_bcopy.  See the comments
534 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
535 * would mess up high resolution profiling.
536 */
537	ALIGN_TEXT
538small_i586_bcopy:
539	shrl	$2,%ecx
540	cld
541	rep
542	movsl
543	movl	20(%esp),%ecx
544	andl	$3,%ecx
545	rep
546	movsb
547	popl	%edi
548	popl	%esi
549	ret
550
551	ALIGN_TEXT
5521:
553	addl	%ecx,%edi
554	addl	%ecx,%esi
555	decl	%edi
556	decl	%esi
557	andl	$3,%ecx
558	std
559	rep
560	movsb
561	movl	20(%esp),%ecx
562	shrl	$2,%ecx
563	subl	$3,%esi
564	subl	$3,%edi
565	rep
566	movsl
567	popl	%edi
568	popl	%esi
569	cld
570	ret
571#endif /* I586_CPU */
572
573/*
574 * Note: memcpy does not support overlapping copies
575 */
576ENTRY(memcpy)
577	pushl	%edi
578	pushl	%esi
579	movl	12(%esp),%edi
580	movl	16(%esp),%esi
581	movl	20(%esp),%ecx
582	movl	%edi,%eax
583	shrl	$2,%ecx				/* copy by 32-bit words */
584	cld					/* nope, copy forwards */
585	rep
586	movsl
587	movl	20(%esp),%ecx
588	andl	$3,%ecx				/* any bytes left? */
589	rep
590	movsb
591	popl	%esi
592	popl	%edi
593	ret
594
595
596/*****************************************************************************/
597/* copyout and fubyte family                                                 */
598/*****************************************************************************/
599/*
600 * Access user memory from inside the kernel. These routines and possibly
601 * the math- and DOS emulators should be the only places that do this.
602 *
603 * We have to access the memory with user's permissions, so use a segment
604 * selector with RPL 3. For writes to user space we have to additionally
605 * check the PTE for write permission, because the 386 does not check
606 * write permissions when we are executing with EPL 0. The 486 does check
607 * this if the WP bit is set in CR0, so we can use a simpler version here.
608 *
609 * These routines set curpcb->onfault for the time they execute. When a
610 * protection violation occurs inside the functions, the trap handler
611 * returns to *curpcb->onfault instead of the function.
612 */
613
614/* copyout(from_kernel, to_user, len) */
615ENTRY(copyout)
616	MEXITCOUNT
617	jmp	*_copyout_vector
618
619ENTRY(generic_copyout)
620	movl	_curpcb,%eax
621	movl	$copyout_fault,PCB_ONFAULT(%eax)
622	pushl	%esi
623	pushl	%edi
624	pushl	%ebx
625	movl	16(%esp),%esi
626	movl	20(%esp),%edi
627	movl	24(%esp),%ebx
628	testl	%ebx,%ebx			/* anything to do? */
629	jz	done_copyout
630
631	/*
632	 * Check explicitly for non-user addresses.  If 486 write protection
633	 * is being used, this check is essential because we are in kernel
634	 * mode so the h/w does not provide any protection against writing
635	 * kernel addresses.
636	 */
637
638	/*
639	 * First, prevent address wrapping.
640	 */
641	movl	%edi,%eax
642	addl	%ebx,%eax
643	jc	copyout_fault
644/*
645 * XXX STOP USING VM_MAXUSER_ADDRESS.
646 * It is an end address, not a max, so every time it is used correctly it
647 * looks like there is an off by one error, and of course it caused an off
648 * by one error in several places.
649 */
650	cmpl	$VM_MAXUSER_ADDRESS,%eax
651	ja	copyout_fault
652
653#if defined(I386_CPU)
654
655#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
656	cmpl	$CPUCLASS_386,_cpu_class
657	jne	3f
658#endif
659/*
660 * We have to check each PTE for user write permission.
661 * The checking may cause a page fault, so it is important to set
662 * up everything for return via copyout_fault before here.
663 */
664	/* compute number of pages */
665	movl	%edi,%ecx
666	andl	$PAGE_MASK,%ecx
667	addl	%ebx,%ecx
668	decl	%ecx
669	shrl	$IDXSHIFT+2,%ecx
670	incl	%ecx
671
672	/* compute PTE offset for start address */
673	movl	%edi,%edx
674	shrl	$IDXSHIFT,%edx
675	andb	$0xfc,%dl
676
677 1:	/* check PTE for each page */
678	leal	_PTmap(%edx),%eax
679	shrl	$IDXSHIFT,%eax
680	andb	$0xfc,%al
681	testb	$0x01,_PTmap(%eax)	/* PTE Page must be VALID */
682	je	4f
683 	movb	_PTmap(%edx),%al
684 	andb	$0x07,%al	/* Pages must be VALID + USERACC + WRITABLE */
685 	cmpb	$0x07,%al
686 	je	2f
687
6884:	/* simulate a trap */
689	pushl	%edx
690	pushl	%ecx
691	shll	$IDXSHIFT,%edx
692	pushl	%edx
693	call	_trapwrite			/* trapwrite(addr) */
694	popl	%edx
695	popl	%ecx
696	popl	%edx
697
698	testl	%eax,%eax			/* if not ok, return EFAULT */
699	jnz	copyout_fault
700
7012:
702	addl	$4,%edx
703	decl	%ecx
704	jnz	1b				/* check next page */
705#endif /* I386_CPU */
706
707	/* bcopy(%esi, %edi, %ebx) */
7083:
709	movl	%ebx,%ecx
710
711#ifdef I586_CPU
712	ALIGN_TEXT
713slow_copyout:
714#endif
715	shrl	$2,%ecx
716	cld
717	rep
718	movsl
719	movb	%bl,%cl
720	andb	$3,%cl
721	rep
722	movsb
723
724done_copyout:
725	popl	%ebx
726	popl	%edi
727	popl	%esi
728	xorl	%eax,%eax
729	movl	_curpcb,%edx
730	movl	%eax,PCB_ONFAULT(%edx)
731	ret
732
733	ALIGN_TEXT
734copyout_fault:
735	popl	%ebx
736	popl	%edi
737	popl	%esi
738	movl	_curpcb,%edx
739	movl	$0,PCB_ONFAULT(%edx)
740	movl	$EFAULT,%eax
741	ret
742
743#ifdef I586_CPU
744ENTRY(i586_copyout)
745	/*
746	 * Duplicated from generic_copyout.  Could be done a bit better.
747	 */
748	movl	_curpcb,%eax
749	movl	$copyout_fault,PCB_ONFAULT(%eax)
750	pushl	%esi
751	pushl	%edi
752	pushl	%ebx
753	movl	16(%esp),%esi
754	movl	20(%esp),%edi
755	movl	24(%esp),%ebx
756	testl	%ebx,%ebx			/* anything to do? */
757	jz	done_copyout
758
759	/*
760	 * Check explicitly for non-user addresses.  If 486 write protection
761	 * is being used, this check is essential because we are in kernel
762	 * mode so the h/w does not provide any protection against writing
763	 * kernel addresses.
764	 */
765
766	/*
767	 * First, prevent address wrapping.
768	 */
769	movl	%edi,%eax
770	addl	%ebx,%eax
771	jc	copyout_fault
772/*
773 * XXX STOP USING VM_MAXUSER_ADDRESS.
774 * It is an end address, not a max, so every time it is used correctly it
775 * looks like there is an off by one error, and of course it caused an off
776 * by one error in several places.
777 */
778	cmpl	$VM_MAXUSER_ADDRESS,%eax
779	ja	copyout_fault
780
781	/* bcopy(%esi, %edi, %ebx) */
7823:
783	movl	%ebx,%ecx
784	/*
785	 * End of duplicated code.
786	 */
787
788	cmpl	$1024,%ecx
789	jb	slow_copyout
790
791	pushl	%ecx
792	call	_fastmove
793	addl	$4,%esp
794	jmp	done_copyout
795#endif /* I586_CPU */
796
797/* copyin(from_user, to_kernel, len) */
798ENTRY(copyin)
799	MEXITCOUNT
800	jmp	*_copyin_vector
801
802ENTRY(generic_copyin)
803	movl	_curpcb,%eax
804	movl	$copyin_fault,PCB_ONFAULT(%eax)
805	pushl	%esi
806	pushl	%edi
807	movl	12(%esp),%esi			/* caddr_t from */
808	movl	16(%esp),%edi			/* caddr_t to */
809	movl	20(%esp),%ecx			/* size_t  len */
810
811	/*
812	 * make sure address is valid
813	 */
814	movl	%esi,%edx
815	addl	%ecx,%edx
816	jc	copyin_fault
817	cmpl	$VM_MAXUSER_ADDRESS,%edx
818	ja	copyin_fault
819
820#ifdef I586_CPU
821	ALIGN_TEXT
822slow_copyin:
823#endif
824	movb	%cl,%al
825	shrl	$2,%ecx				/* copy longword-wise */
826	cld
827	rep
828	movsl
829	movb	%al,%cl
830	andb	$3,%cl				/* copy remaining bytes */
831	rep
832	movsb
833
834#if defined(I586_CPU)
835	ALIGN_TEXT
836done_copyin:
837#endif /* I586_CPU */
838	popl	%edi
839	popl	%esi
840	xorl	%eax,%eax
841	movl	_curpcb,%edx
842	movl	%eax,PCB_ONFAULT(%edx)
843	ret
844
845	ALIGN_TEXT
846copyin_fault:
847	popl	%edi
848	popl	%esi
849	movl	_curpcb,%edx
850	movl	$0,PCB_ONFAULT(%edx)
851	movl	$EFAULT,%eax
852	ret
853
854#ifdef I586_CPU
855ENTRY(i586_copyin)
856	/*
857	 * Duplicated from generic_copyin.  Could be done a bit better.
858	 */
859	movl	_curpcb,%eax
860	movl	$copyin_fault,PCB_ONFAULT(%eax)
861	pushl	%esi
862	pushl	%edi
863	movl	12(%esp),%esi			/* caddr_t from */
864	movl	16(%esp),%edi			/* caddr_t to */
865	movl	20(%esp),%ecx			/* size_t  len */
866
867	/*
868	 * make sure address is valid
869	 */
870	movl	%esi,%edx
871	addl	%ecx,%edx
872	jc	copyin_fault
873	cmpl	$VM_MAXUSER_ADDRESS,%edx
874	ja	copyin_fault
875	/*
876	 * End of duplicated code.
877	 */
878
879	cmpl	$1024,%ecx
880	jb	slow_copyin
881
882	pushl	%ebx			/* XXX prepare for fastmove_fault */
883	pushl	%ecx
884	call	_fastmove
885	addl	$8,%esp
886	jmp	done_copyin
887#endif /* I586_CPU */
888
889#if defined(I586_CPU)
890/* fastmove(src, dst, len)
891	src in %esi
892	dst in %edi
893	len in %ecx		XXX changed to on stack for profiling
894	uses %eax and %edx for tmp. storage
895 */
896/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
897ENTRY(fastmove)
898	pushl	%ebp
899	movl	%esp,%ebp
900	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
901
902	movl	8(%ebp),%ecx
903	cmpl	$63,%ecx
904	jbe	fastmove_tail
905
906	testl	$7,%esi	/* check if src addr is multiple of 8 */
907	jnz	fastmove_tail
908
909	testl	$7,%edi	/* check if dst addr is multiple of 8 */
910	jnz	fastmove_tail
911
912/* if (npxproc != NULL) { */
913	cmpl	$0,_npxproc
914	je	6f
915/*    fnsave(&curpcb->pcb_savefpu); */
916	movl	_curpcb,%eax
917	fnsave	PCB_SAVEFPU(%eax)
918/*   npxproc = NULL; */
919	movl	$0,_npxproc
920/* } */
9216:
922/* now we own the FPU. */
923
924/*
925 * The process' FP state is saved in the pcb, but if we get
926 * switched, the cpu_switch() will store our FP state in the
927 * pcb.  It should be possible to avoid all the copying for
928 * this, e.g., by setting a flag to tell cpu_switch() to
929 * save the state somewhere else.
930 */
931/* tmp = curpcb->pcb_savefpu; */
932	movl	%ecx,-12(%ebp)
933	movl	%esi,-8(%ebp)
934	movl	%edi,-4(%ebp)
935	movl	%esp,%edi
936	movl	_curpcb,%esi
937	addl	$PCB_SAVEFPU,%esi
938	cld
939	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
940	rep
941	movsl
942	movl	-12(%ebp),%ecx
943	movl	-8(%ebp),%esi
944	movl	-4(%ebp),%edi
945/* stop_emulating(); */
946	clts
947/* npxproc = curproc; */
948	movl	_curproc,%eax
949	movl	%eax,_npxproc
950	movl	_curpcb,%eax
951	movl	$fastmove_fault,PCB_ONFAULT(%eax)
9524:
953	movl	%ecx,-12(%ebp)
954	cmpl	$1792,%ecx
955	jbe	2f
956	movl	$1792,%ecx
9572:
958	subl	%ecx,-12(%ebp)
959	cmpl	$256,%ecx
960	jb	5f
961	movl	%ecx,-8(%ebp)
962	movl	%esi,-4(%ebp)
963	ALIGN_TEXT
9643:
965	movl	0(%esi),%eax
966	movl	32(%esi),%eax
967	movl	64(%esi),%eax
968	movl	96(%esi),%eax
969	movl	128(%esi),%eax
970	movl	160(%esi),%eax
971	movl	192(%esi),%eax
972	movl	224(%esi),%eax
973	addl	$256,%esi
974	subl	$256,%ecx
975	cmpl	$256,%ecx
976	jae	3b
977	movl	-8(%ebp),%ecx
978	movl	-4(%ebp),%esi
9795:
980	ALIGN_TEXT
981fastmove_loop:
982	fildq	0(%esi)
983	fildq	8(%esi)
984	fildq	16(%esi)
985	fildq	24(%esi)
986	fildq	32(%esi)
987	fildq	40(%esi)
988	fildq	48(%esi)
989	fildq	56(%esi)
990	fistpq	56(%edi)
991	fistpq	48(%edi)
992	fistpq	40(%edi)
993	fistpq	32(%edi)
994	fistpq	24(%edi)
995	fistpq	16(%edi)
996	fistpq	8(%edi)
997	fistpq	0(%edi)
998	addl	$-64,%ecx
999	addl	$64,%esi
1000	addl	$64,%edi
1001	cmpl	$63,%ecx
1002	ja	fastmove_loop
1003	movl	-12(%ebp),%eax
1004	addl	%eax,%ecx
1005	cmpl	$64,%ecx
1006	jae	4b
1007
1008/* curpcb->pcb_savefpu = tmp; */
1009	movl	%ecx,-12(%ebp)
1010	movl	%esi,-8(%ebp)
1011	movl	%edi,-4(%ebp)
1012	movl	_curpcb,%edi
1013	addl	$PCB_SAVEFPU,%edi
1014	movl	%esp,%esi
1015	cld
1016	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1017	rep
1018	movsl
1019	movl	-12(%ebp),%ecx
1020	movl	-8(%ebp),%esi
1021	movl	-4(%ebp),%edi
1022
1023/* start_emulating(); */
1024	smsw	%ax
1025	orb	$CR0_TS,%al
1026	lmsw	%ax
1027/* npxproc = NULL; */
1028	movl	$0,_npxproc
1029
1030	ALIGN_TEXT
1031fastmove_tail:
1032	movl	_curpcb,%eax
1033	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1034
1035	movb	%cl,%al
1036	shrl	$2,%ecx				/* copy longword-wise */
1037	cld
1038	rep
1039	movsl
1040	movb	%al,%cl
1041	andb	$3,%cl				/* copy remaining bytes */
1042	rep
1043	movsb
1044
1045	movl	%ebp,%esp
1046	popl	%ebp
1047	ret
1048
1049	ALIGN_TEXT
1050fastmove_fault:
1051	movl	_curpcb,%edi
1052	addl	$PCB_SAVEFPU,%edi
1053	movl	%esp,%esi
1054	cld
1055	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1056	rep
1057	movsl
1058
1059	smsw	%ax
1060	orb	$CR0_TS,%al
1061	lmsw	%ax
1062	movl	$0,_npxproc
1063
1064fastmove_tail_fault:
1065	movl	%ebp,%esp
1066	popl	%ebp
1067	addl	$8,%esp
1068	popl	%ebx
1069	popl	%edi
1070	popl	%esi
1071	movl	_curpcb,%edx
1072	movl	$0,PCB_ONFAULT(%edx)
1073	movl	$EFAULT,%eax
1074	ret
1075#endif /* I586_CPU */
1076
1077/*
1078 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory
1079 */
1080ENTRY(fuword)
1081	movl	_curpcb,%ecx
1082	movl	$fusufault,PCB_ONFAULT(%ecx)
1083	movl	4(%esp),%edx			/* from */
1084
1085	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1086	ja	fusufault
1087
1088	movl	(%edx),%eax
1089	movl	$0,PCB_ONFAULT(%ecx)
1090	ret
1091
1092/*
1093 * These two routines are called from the profiling code, potentially
1094 * at interrupt time. If they fail, that's okay, good things will
1095 * happen later. Fail all the time for now - until the trap code is
1096 * able to deal with this.
1097 */
1098ALTENTRY(suswintr)
1099ENTRY(fuswintr)
1100	movl	$-1,%eax
1101	ret
1102
1103ENTRY(fusword)
1104	movl	_curpcb,%ecx
1105	movl	$fusufault,PCB_ONFAULT(%ecx)
1106	movl	4(%esp),%edx
1107
1108	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1109	ja	fusufault
1110
1111	movzwl	(%edx),%eax
1112	movl	$0,PCB_ONFAULT(%ecx)
1113	ret
1114
1115ENTRY(fubyte)
1116	movl	_curpcb,%ecx
1117	movl	$fusufault,PCB_ONFAULT(%ecx)
1118	movl	4(%esp),%edx
1119
1120	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1121	ja	fusufault
1122
1123	movzbl	(%edx),%eax
1124	movl	$0,PCB_ONFAULT(%ecx)
1125	ret
1126
1127	ALIGN_TEXT
1128fusufault:
1129	movl	_curpcb,%ecx
1130	xorl	%eax,%eax
1131	movl	%eax,PCB_ONFAULT(%ecx)
1132	decl	%eax
1133	ret
1134
1135/*
1136 * su{byte,sword,word}: write a byte (word, longword) to user memory
1137 */
1138ENTRY(suword)
1139	movl	_curpcb,%ecx
1140	movl	$fusufault,PCB_ONFAULT(%ecx)
1141	movl	4(%esp),%edx
1142
1143#if defined(I386_CPU)
1144
1145#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1146	cmpl	$CPUCLASS_386,_cpu_class
1147	jne	2f				/* we only have to set the right segment selector */
1148#endif /* I486_CPU || I586_CPU || I686_CPU */
1149
1150	/* XXX - page boundary crossing is still not handled */
1151	movl	%edx,%eax
1152	shrl	$IDXSHIFT,%edx
1153	andb	$0xfc,%dl
1154
1155	leal	_PTmap(%edx),%ecx
1156	shrl	$IDXSHIFT,%ecx
1157	andb	$0xfc,%cl
1158	testb	$0x01,_PTmap(%ecx)	/* PTE Page must be VALID */
1159	je	4f
1160	movb	_PTmap(%edx),%dl
1161	andb	$0x7,%dl		/* must be VALID + USERACC + WRITE */
1162	cmpb	$0x7,%dl
1163	je	1f
1164
1165	/* simulate a trap */
11664:	pushl	%eax
1167	call	_trapwrite
1168	popl	%edx				/* remove junk parameter from stack */
1169	testl	%eax,%eax
1170	jnz	fusufault
11711:
1172	movl	4(%esp),%edx
1173#endif
1174
11752:
1176	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1177	ja	fusufault
1178
1179	movl	8(%esp),%eax
1180	movl	%eax,(%edx)
1181	xorl	%eax,%eax
1182	movl	_curpcb,%ecx
1183	movl	%eax,PCB_ONFAULT(%ecx)
1184	ret
1185
1186ENTRY(susword)
1187	movl	_curpcb,%ecx
1188	movl	$fusufault,PCB_ONFAULT(%ecx)
1189	movl	4(%esp),%edx
1190
1191#if defined(I386_CPU)
1192
1193#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1194	cmpl	$CPUCLASS_386,_cpu_class
1195	jne	2f
1196#endif /* I486_CPU || I586_CPU || I686_CPU */
1197
1198	/* XXX - page boundary crossing is still not handled */
1199	movl	%edx,%eax
1200	shrl	$IDXSHIFT,%edx
1201	andb	$0xfc,%dl
1202
1203	leal	_PTmap(%edx),%ecx
1204	shrl	$IDXSHIFT,%ecx
1205	andb	$0xfc,%cl
1206	testb	$0x01,_PTmap(%ecx)	/* PTE Page must be VALID */
1207	je	4f
1208	movb	_PTmap(%edx),%dl
1209	andb	$0x7,%dl			/* must be VALID + USERACC + WRITE */
1210	cmpb	$0x7,%dl
1211	je	1f
1212
12134:	/* simulate a trap */
1214	pushl	%eax
1215	call	_trapwrite
1216	popl	%edx				/* remove junk parameter from stack */
1217	testl	%eax,%eax
1218	jnz	fusufault
12191:
1220	movl	4(%esp),%edx
1221#endif
1222
12232:
1224	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1225	ja	fusufault
1226
1227	movw	8(%esp),%ax
1228	movw	%ax,(%edx)
1229	xorl	%eax,%eax
1230	movl	_curpcb,%ecx			/* restore trashed register */
1231	movl	%eax,PCB_ONFAULT(%ecx)
1232	ret
1233
1234ALTENTRY(suibyte)
1235ENTRY(subyte)
1236	movl	_curpcb,%ecx
1237	movl	$fusufault,PCB_ONFAULT(%ecx)
1238	movl	4(%esp),%edx
1239
1240#if defined(I386_CPU)
1241
1242#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1243	cmpl	$CPUCLASS_386,_cpu_class
1244	jne	2f
1245#endif /* I486_CPU || I586_CPU || I686_CPU */
1246
1247	movl	%edx,%eax
1248	shrl	$IDXSHIFT,%edx
1249	andb	$0xfc,%dl
1250
1251	leal	_PTmap(%edx),%ecx
1252	shrl	$IDXSHIFT,%ecx
1253	andb	$0xfc,%cl
1254	testb	$0x01,_PTmap(%ecx)	/* PTE Page must be VALID */
1255	je	4f
1256
1257	movb	_PTmap(%edx),%dl
1258	andb	$0x7,%dl			/* must be VALID + USERACC + WRITE */
1259	cmpb	$0x7,%dl
1260	je	1f
1261
12624:	/* simulate a trap */
1263	pushl	%eax
1264	call	_trapwrite
1265	popl	%edx				/* remove junk parameter from stack */
1266	testl	%eax,%eax
1267	jnz	fusufault
12681:
1269	movl	4(%esp),%edx
1270#endif
1271
12722:
1273	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1274	ja	fusufault
1275
1276	movb	8(%esp),%al
1277	movb	%al,(%edx)
1278	xorl	%eax,%eax
1279	movl	_curpcb,%ecx			/* restore trashed register */
1280	movl	%eax,PCB_ONFAULT(%ecx)
1281	ret
1282
1283/*
1284 * copyinstr(from, to, maxlen, int *lencopied)
1285 *	copy a string from from to to, stop when a 0 character is reached.
1286 *	return ENAMETOOLONG if string is longer than maxlen, and
1287 *	EFAULT on protection violations. If lencopied is non-zero,
1288 *	return the actual length in *lencopied.
1289 */
1290ENTRY(copyinstr)
1291	pushl	%esi
1292	pushl	%edi
1293	movl	_curpcb,%ecx
1294	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1295
1296	movl	12(%esp),%esi			/* %esi = from */
1297	movl	16(%esp),%edi			/* %edi = to */
1298	movl	20(%esp),%edx			/* %edx = maxlen */
1299
1300	movl	$VM_MAXUSER_ADDRESS,%eax
1301
1302	/* make sure 'from' is within bounds */
1303	subl	%esi,%eax
1304	jbe	cpystrflt
1305
1306	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1307	cmpl	%edx,%eax
1308	jae	1f
1309	movl	%eax,%edx
1310	movl	%eax,20(%esp)
13111:
1312	incl	%edx
1313	cld
1314
13152:
1316	decl	%edx
1317	jz	3f
1318
1319	lodsb
1320	stosb
1321	orb	%al,%al
1322	jnz	2b
1323
1324	/* Success -- 0 byte reached */
1325	decl	%edx
1326	xorl	%eax,%eax
1327	jmp	cpystrflt_x
13283:
1329	/* edx is zero - return ENAMETOOLONG or EFAULT */
1330	cmpl	$VM_MAXUSER_ADDRESS,%esi
1331	jae	cpystrflt
13324:
1333	movl	$ENAMETOOLONG,%eax
1334	jmp	cpystrflt_x
1335
1336cpystrflt:
1337	movl	$EFAULT,%eax
1338
1339cpystrflt_x:
1340	/* set *lencopied and return %eax */
1341	movl	_curpcb,%ecx
1342	movl	$0,PCB_ONFAULT(%ecx)
1343	movl	20(%esp),%ecx
1344	subl	%edx,%ecx
1345	movl	24(%esp),%edx
1346	testl	%edx,%edx
1347	jz	1f
1348	movl	%ecx,(%edx)
13491:
1350	popl	%edi
1351	popl	%esi
1352	ret
1353
1354
1355/*
1356 * copystr(from, to, maxlen, int *lencopied)
1357 */
1358ENTRY(copystr)
1359	pushl	%esi
1360	pushl	%edi
1361
1362	movl	12(%esp),%esi			/* %esi = from */
1363	movl	16(%esp),%edi			/* %edi = to */
1364	movl	20(%esp),%edx			/* %edx = maxlen */
1365	incl	%edx
1366	cld
13671:
1368	decl	%edx
1369	jz	4f
1370	lodsb
1371	stosb
1372	orb	%al,%al
1373	jnz	1b
1374
1375	/* Success -- 0 byte reached */
1376	decl	%edx
1377	xorl	%eax,%eax
1378	jmp	6f
13794:
1380	/* edx is zero -- return ENAMETOOLONG */
1381	movl	$ENAMETOOLONG,%eax
1382
13836:
1384	/* set *lencopied and return %eax */
1385	movl	20(%esp),%ecx
1386	subl	%edx,%ecx
1387	movl	24(%esp),%edx
1388	testl	%edx,%edx
1389	jz	7f
1390	movl	%ecx,(%edx)
13917:
1392	popl	%edi
1393	popl	%esi
1394	ret
1395
1396ENTRY(bcmp)
1397	pushl	%edi
1398	pushl	%esi
1399	movl	12(%esp),%edi
1400	movl	16(%esp),%esi
1401	movl	20(%esp),%edx
1402	xorl	%eax,%eax
1403
1404	movl	%edx,%ecx
1405	shrl	$2,%ecx
1406	cld					/* compare forwards */
1407	repe
1408	cmpsl
1409	jne	1f
1410
1411	movl	%edx,%ecx
1412	andl	$3,%ecx
1413	repe
1414	cmpsb
1415	je	2f
14161:
1417	incl	%eax
14182:
1419	popl	%esi
1420	popl	%edi
1421	ret
1422
1423
1424/*
1425 * Handling of special 386 registers and descriptor tables etc
1426 */
1427/* void lgdt(struct region_descriptor *rdp); */
1428ENTRY(lgdt)
1429	/* reload the descriptor table */
1430	movl	4(%esp),%eax
1431	lgdt	(%eax)
1432
1433	/* flush the prefetch q */
1434	jmp	1f
1435	nop
14361:
1437	/* reload "stale" selectors */
1438	movl	$KDSEL,%eax
1439	movl	%ax,%ds
1440	movl	%ax,%es
1441	movl	%ax,%ss
1442
1443	/* reload code selector by turning return into intersegmental return */
1444	movl	(%esp),%eax
1445	pushl	%eax
1446#	movl	$KCSEL,4(%esp)
1447	movl	$8,4(%esp)
1448	lret
1449
1450/*
1451 * void lidt(struct region_descriptor *rdp);
1452 */
1453ENTRY(lidt)
1454	movl	4(%esp),%eax
1455	lidt	(%eax)
1456	ret
1457
1458/*
1459 * void lldt(u_short sel)
1460 */
1461ENTRY(lldt)
1462	lldt	4(%esp)
1463	ret
1464
1465/*
1466 * void ltr(u_short sel)
1467 */
1468ENTRY(ltr)
1469	ltr	4(%esp)
1470	ret
1471
1472/* ssdtosd(*ssdp,*sdp) */
1473ENTRY(ssdtosd)
1474	pushl	%ebx
1475	movl	8(%esp),%ecx
1476	movl	8(%ecx),%ebx
1477	shll	$16,%ebx
1478	movl	(%ecx),%edx
1479	roll	$16,%edx
1480	movb	%dh,%bl
1481	movb	%dl,%bh
1482	rorl	$8,%ebx
1483	movl	4(%ecx),%eax
1484	movw	%ax,%dx
1485	andl	$0xf0000,%eax
1486	orl	%eax,%ebx
1487	movl	12(%esp),%ecx
1488	movl	%edx,(%ecx)
1489	movl	%ebx,4(%ecx)
1490	popl	%ebx
1491	ret
1492
1493/* load_cr0(cr0) */
1494ENTRY(load_cr0)
1495	movl	4(%esp),%eax
1496	movl	%eax,%cr0
1497	ret
1498
1499/* rcr0() */
1500ENTRY(rcr0)
1501	movl	%cr0,%eax
1502	ret
1503
1504/* rcr3() */
1505ENTRY(rcr3)
1506	movl	%cr3,%eax
1507	ret
1508
1509/* void load_cr3(caddr_t cr3) */
1510ENTRY(load_cr3)
1511	movl	4(%esp),%eax
1512	movl	%eax,%cr3
1513	ret
1514
1515
1516/*****************************************************************************/
1517/* setjump, longjump                                                         */
1518/*****************************************************************************/
1519
1520ENTRY(setjmp)
1521	movl	4(%esp),%eax
1522	movl	%ebx,(%eax)			/* save ebx */
1523	movl	%esp,4(%eax)			/* save esp */
1524	movl	%ebp,8(%eax)			/* save ebp */
1525	movl	%esi,12(%eax)			/* save esi */
1526	movl	%edi,16(%eax)			/* save edi */
1527	movl	(%esp),%edx			/* get rta */
1528	movl	%edx,20(%eax)			/* save eip */
1529	xorl	%eax,%eax			/* return(0); */
1530	ret
1531
1532ENTRY(longjmp)
1533	movl	4(%esp),%eax
1534	movl	(%eax),%ebx			/* restore ebx */
1535	movl	4(%eax),%esp			/* restore esp */
1536	movl	8(%eax),%ebp			/* restore ebp */
1537	movl	12(%eax),%esi			/* restore esi */
1538	movl	16(%eax),%edi			/* restore edi */
1539	movl	20(%eax),%edx			/* get rta */
1540	movl	%edx,(%esp)			/* put in return frame */
1541	xorl	%eax,%eax			/* return(1); */
1542	incl	%eax
1543	ret
1544
1545/*
1546 * Here for doing BB-profiling (gcc -a).
1547 * We rely on the "bbset" instead, but need a dummy function.
1548 */
1549NON_GPROF_ENTRY(__bb_init_func)
1550	movl	4(%esp),%eax
1551	movl	$1,(%eax)
1552	.byte	0xc3				/* avoid macro for `ret' */
1553