support.s revision 26812
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	$Id: support.s,v 1.53 1997/05/29 05:11:10 peter Exp $
34 */
35
36#include "npx.h"
37#include "opt_cpu.h"
38
39#include <machine/asmacros.h>
40#include <machine/cputypes.h>
41#include <machine/pmap.h>
42#include <machine/specialreg.h>
43
44#include "assym.s"
45
46#define KDSEL		0x10			/* kernel data selector */
47#define KCSEL		0x8			/* kernel code selector */
48#define IDXSHIFT	10
49
50	.data
51	.globl	_bcopy_vector
52_bcopy_vector:
53	.long	_generic_bcopy
54	.globl	_bzero
55_bzero:
56	.long	_generic_bzero
57	.globl	_copyin_vector
58_copyin_vector:
59	.long	_generic_copyin
60	.globl	_copyout_vector
61_copyout_vector:
62	.long	_generic_copyout
63	.globl	_ovbcopy_vector
64_ovbcopy_vector:
65	.long	_generic_bcopy
66#if defined(I586_CPU) && NNPX > 0
67kernel_fpu_lock:
68	.byte	0xfe
69	.space	3
70#endif
71
72	.text
73
74/*
75 * bcopy family
76 * void bzero(void *buf, u_int len)
77 */
78
79ENTRY(generic_bzero)
80	pushl	%edi
81	movl	8(%esp),%edi
82	movl	12(%esp),%ecx
83	xorl	%eax,%eax
84	shrl	$2,%ecx
85	cld
86	rep
87	stosl
88	movl	12(%esp),%ecx
89	andl	$3,%ecx
90	rep
91	stosb
92	popl	%edi
93	ret
94
95#if defined(I486_CPU)
96ENTRY(i486_bzero)
97	movl	4(%esp),%edx
98	movl	8(%esp),%ecx
99	xorl	%eax,%eax
100/*
101 * do 64 byte chunks first
102 *
103 * XXX this is probably over-unrolled at least for DX2's
104 */
1052:
106	cmpl	$64,%ecx
107	jb	3f
108	movl	%eax,(%edx)
109	movl	%eax,4(%edx)
110	movl	%eax,8(%edx)
111	movl	%eax,12(%edx)
112	movl	%eax,16(%edx)
113	movl	%eax,20(%edx)
114	movl	%eax,24(%edx)
115	movl	%eax,28(%edx)
116	movl	%eax,32(%edx)
117	movl	%eax,36(%edx)
118	movl	%eax,40(%edx)
119	movl	%eax,44(%edx)
120	movl	%eax,48(%edx)
121	movl	%eax,52(%edx)
122	movl	%eax,56(%edx)
123	movl	%eax,60(%edx)
124	addl	$64,%edx
125	subl	$64,%ecx
126	jnz	2b
127	ret
128
129/*
130 * do 16 byte chunks
131 */
132	SUPERALIGN_TEXT
1333:
134	cmpl	$16,%ecx
135	jb	4f
136	movl	%eax,(%edx)
137	movl	%eax,4(%edx)
138	movl	%eax,8(%edx)
139	movl	%eax,12(%edx)
140	addl	$16,%edx
141	subl	$16,%ecx
142	jnz	3b
143	ret
144
145/*
146 * do 4 byte chunks
147 */
148	SUPERALIGN_TEXT
1494:
150	cmpl	$4,%ecx
151	jb	5f
152	movl	%eax,(%edx)
153	addl	$4,%edx
154	subl	$4,%ecx
155	jnz	4b
156	ret
157
158/*
159 * do 1 byte chunks
160 * a jump table seems to be faster than a loop or more range reductions
161 *
162 * XXX need a const section for non-text
163 */
164	.data
165jtab:
166	.long	do0
167	.long	do1
168	.long	do2
169	.long	do3
170
171	.text
172	SUPERALIGN_TEXT
1735:
174	jmp	jtab(,%ecx,4)
175
176	SUPERALIGN_TEXT
177do3:
178	movw	%ax,(%edx)
179	movb	%al,2(%edx)
180	ret
181
182	SUPERALIGN_TEXT
183do2:
184	movw	%ax,(%edx)
185	ret
186
187	SUPERALIGN_TEXT
188do1:
189	movb	%al,(%edx)
190	ret
191
192	SUPERALIGN_TEXT
193do0:
194	ret
195#endif
196
197#if defined(I586_CPU) && NNPX > 0
198ENTRY(i586_bzero)
199	movl	4(%esp),%edx
200	movl	8(%esp),%ecx
201
202	/*
203	 * The FPU register method is twice as fast as the integer register
204	 * method unless the target is in the L1 cache and we pre-allocate a
205	 * cache line for it (then the integer register method is 4-5 times
206	 * faster).  However, we never pre-allocate cache lines, since that
207	 * would make the integer method 25% or more slower for the common
208	 * case when the target isn't in either the L1 cache or the L2 cache.
209	 * Thus we normally use the FPU register method unless the overhead
210	 * would be too large.
211	 */
212	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
213	jb	intreg_i586_bzero
214
215	/*
216	 * The FPU registers may belong to an application or to fastmove()
217	 * or to another invocation of bcopy() or ourself in a higher level
218	 * interrupt or trap handler.  Preserving the registers is
219	 * complicated since we avoid it if possible at all levels.  We
220	 * want to localize the complications even when that increases them.
221	 * Here the extra work involves preserving CR0_TS in TS.
222	 * `npxproc != NULL' is supposed to be the condition that all the
223	 * FPU resources belong to an application, but npxproc and CR0_TS
224	 * aren't set atomically enough for this condition to work in
225	 * interrupt handlers.
226	 *
227	 * Case 1: FPU registers belong to the application: we must preserve
228	 * the registers if we use them, so we only use the FPU register
229	 * method if the target size is large enough to amortize the extra
230	 * overhead for preserving them.  CR0_TS must be preserved although
231	 * it is very likely to end up as set.
232	 *
233	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
234	 * makes the registers look like they belong to an application so
235	 * that cpu_switch() and savectx() don't have to know about it, so
236	 * this case reduces to case 1.
237	 *
238	 * Case 3: FPU registers belong to the kernel: don't use the FPU
239	 * register method.  This case is unlikely, and supporting it would
240	 * be more complicated and might take too much stack.
241	 *
242	 * Case 4: FPU registers don't belong to anyone: the FPU registers
243	 * don't need to be preserved, so we always use the FPU register
244	 * method.  CR0_TS must be preserved although it is very likely to
245	 * always end up as clear.
246	 */
247	cmpl	$0,_npxproc
248	je	i586_bz1
249	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
250	jb	intreg_i586_bzero
251	sarb	$1,kernel_fpu_lock
252	jc	intreg_i586_bzero
253	smsw	%ax
254	clts
255	subl	$108,%esp
256	fnsave	0(%esp)
257	jmp	i586_bz2
258
259i586_bz1:
260	sarb	$1,kernel_fpu_lock
261	jc	intreg_i586_bzero
262	smsw	%ax
263	clts
264	fninit				/* XXX should avoid needing this */
265i586_bz2:
266	fldz
267
268	/*
269	 * Align to an 8 byte boundary (misalignment in the main loop would
270	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
271	 * already aligned) by always zeroing 8 bytes and using the part up
272	 * to the _next_ alignment position.
273	 */
274	fstl	0(%edx)
275	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
276	addl	$8,%edx
277	andl	$~7,%edx
278	subl	%edx,%ecx
279
280	/*
281	 * Similarly align `len' to a multiple of 8.
282	 */
283	fstl	-8(%edx,%ecx)
284	decl	%ecx
285	andl	$~7,%ecx
286
287	/*
288	 * This wouldn't be any faster if it were unrolled, since the loop
289	 * control instructions are much faster than the fstl and/or done
290	 * in parallel with it so their overhead is insignificant.
291	 */
292fpureg_i586_bzero_loop:
293	fstl	0(%edx)
294	addl	$8,%edx
295	subl	$8,%ecx
296	cmpl	$8,%ecx
297	jae	fpureg_i586_bzero_loop
298
299	cmpl	$0,_npxproc
300	je	i586_bz3
301	frstor	0(%esp)
302	addl	$108,%esp
303	lmsw	%ax
304	movb	$0xfe,kernel_fpu_lock
305	ret
306
307i586_bz3:
308	fstpl	%st(0)
309	lmsw	%ax
310	movb	$0xfe,kernel_fpu_lock
311	ret
312
313intreg_i586_bzero:
314	/*
315	 * `rep stos' seems to be the best method in practice for small
316	 * counts.  Fancy methods usually take too long to start up due
317	 * to cache and BTB misses.
318	 */
319	pushl	%edi
320	movl	%edx,%edi
321	xorl	%eax,%eax
322	shrl	$2,%ecx
323	cld
324	rep
325	stosl
326	movl	12(%esp),%ecx
327	andl	$3,%ecx
328	jne	1f
329	popl	%edi
330	ret
331
3321:
333	rep
334	stosb
335	popl	%edi
336	ret
337#endif /* I586_CPU && NNPX > 0 */
338
339/* fillw(pat, base, cnt) */
340ENTRY(fillw)
341	pushl	%edi
342	movl	8(%esp),%eax
343	movl	12(%esp),%edi
344	movl	16(%esp),%ecx
345	cld
346	rep
347	stosw
348	popl	%edi
349	ret
350
351ENTRY(bcopyb)
352bcopyb:
353	pushl	%esi
354	pushl	%edi
355	movl	12(%esp),%esi
356	movl	16(%esp),%edi
357	movl	20(%esp),%ecx
358	movl	%edi,%eax
359	subl	%esi,%eax
360	cmpl	%ecx,%eax			/* overlapping && src < dst? */
361	jb	1f
362	cld					/* nope, copy forwards */
363	rep
364	movsb
365	popl	%edi
366	popl	%esi
367	ret
368
369	ALIGN_TEXT
3701:
371	addl	%ecx,%edi			/* copy backwards. */
372	addl	%ecx,%esi
373	decl	%edi
374	decl	%esi
375	std
376	rep
377	movsb
378	popl	%edi
379	popl	%esi
380	cld
381	ret
382
383ENTRY(bcopy)
384	MEXITCOUNT
385	jmp	*_bcopy_vector
386
387ENTRY(ovbcopy)
388	MEXITCOUNT
389	jmp	*_ovbcopy_vector
390
391/*
392 * generic_bcopy(src, dst, cnt)
393 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
394 */
395ENTRY(generic_bcopy)
396	pushl	%esi
397	pushl	%edi
398	movl	12(%esp),%esi
399	movl	16(%esp),%edi
400	movl	20(%esp),%ecx
401
402	movl	%edi,%eax
403	subl	%esi,%eax
404	cmpl	%ecx,%eax			/* overlapping && src < dst? */
405	jb	1f
406
407	shrl	$2,%ecx				/* copy by 32-bit words */
408	cld					/* nope, copy forwards */
409	rep
410	movsl
411	movl	20(%esp),%ecx
412	andl	$3,%ecx				/* any bytes left? */
413	rep
414	movsb
415	popl	%edi
416	popl	%esi
417	ret
418
419	ALIGN_TEXT
4201:
421	addl	%ecx,%edi			/* copy backwards */
422	addl	%ecx,%esi
423	decl	%edi
424	decl	%esi
425	andl	$3,%ecx				/* any fractional bytes? */
426	std
427	rep
428	movsb
429	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
430	shrl	$2,%ecx
431	subl	$3,%esi
432	subl	$3,%edi
433	rep
434	movsl
435	popl	%edi
436	popl	%esi
437	cld
438	ret
439
440#if defined(I586_CPU) && NNPX > 0
441ENTRY(i586_bcopy)
442	pushl	%esi
443	pushl	%edi
444	movl	12(%esp),%esi
445	movl	16(%esp),%edi
446	movl	20(%esp),%ecx
447
448	movl	%edi,%eax
449	subl	%esi,%eax
450	cmpl	%ecx,%eax			/* overlapping && src < dst? */
451	jb	1f
452
453	cmpl	$1024,%ecx
454	jb	small_i586_bcopy
455
456	sarb	$1,kernel_fpu_lock
457	jc	small_i586_bcopy
458	cmpl	$0,_npxproc
459	je	i586_bc1
460	smsw	%dx
461	clts
462	subl	$108,%esp
463	fnsave	0(%esp)
464	jmp	4f
465
466i586_bc1:
467	smsw	%dx
468	clts
469	fninit				/* XXX should avoid needing this */
470
471	ALIGN_TEXT
4724:
473	pushl	%ecx
474#define	DCACHE_SIZE	8192
475	cmpl	$(DCACHE_SIZE-512)/2,%ecx
476	jbe	2f
477	movl	$(DCACHE_SIZE-512)/2,%ecx
4782:
479	subl	%ecx,0(%esp)
480	cmpl	$256,%ecx
481	jb	5f			/* XXX should prefetch if %ecx >= 32 */
482	pushl	%esi
483	pushl	%ecx
484	ALIGN_TEXT
4853:
486	movl	0(%esi),%eax
487	movl	32(%esi),%eax
488	movl	64(%esi),%eax
489	movl	96(%esi),%eax
490	movl	128(%esi),%eax
491	movl	160(%esi),%eax
492	movl	192(%esi),%eax
493	movl	224(%esi),%eax
494	addl	$256,%esi
495	subl	$256,%ecx
496	cmpl	$256,%ecx
497	jae	3b
498	popl	%ecx
499	popl	%esi
5005:
501	ALIGN_TEXT
502large_i586_bcopy_loop:
503	fildq	0(%esi)
504	fildq	8(%esi)
505	fildq	16(%esi)
506	fildq	24(%esi)
507	fildq	32(%esi)
508	fildq	40(%esi)
509	fildq	48(%esi)
510	fildq	56(%esi)
511	fistpq	56(%edi)
512	fistpq	48(%edi)
513	fistpq	40(%edi)
514	fistpq	32(%edi)
515	fistpq	24(%edi)
516	fistpq	16(%edi)
517	fistpq	8(%edi)
518	fistpq	0(%edi)
519	addl	$64,%esi
520	addl	$64,%edi
521	subl	$64,%ecx
522	cmpl	$64,%ecx
523	jae	large_i586_bcopy_loop
524	popl	%eax
525	addl	%eax,%ecx
526	cmpl	$64,%ecx
527	jae	4b
528
529	cmpl	$0,_npxproc
530	je	i586_bc2
531	frstor	0(%esp)
532	addl	$108,%esp
533i586_bc2:
534	lmsw	%dx
535	movb	$0xfe,kernel_fpu_lock
536
537/*
538 * This is a duplicate of the main part of generic_bcopy.  See the comments
539 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
540 * would mess up high resolution profiling.
541 */
542	ALIGN_TEXT
543small_i586_bcopy:
544	shrl	$2,%ecx
545	cld
546	rep
547	movsl
548	movl	20(%esp),%ecx
549	andl	$3,%ecx
550	rep
551	movsb
552	popl	%edi
553	popl	%esi
554	ret
555
556	ALIGN_TEXT
5571:
558	addl	%ecx,%edi
559	addl	%ecx,%esi
560	decl	%edi
561	decl	%esi
562	andl	$3,%ecx
563	std
564	rep
565	movsb
566	movl	20(%esp),%ecx
567	shrl	$2,%ecx
568	subl	$3,%esi
569	subl	$3,%edi
570	rep
571	movsl
572	popl	%edi
573	popl	%esi
574	cld
575	ret
576#endif /* I586_CPU && NNPX > 0 */
577
578/*
579 * Note: memcpy does not support overlapping copies
580 */
581ENTRY(memcpy)
582	pushl	%edi
583	pushl	%esi
584	movl	12(%esp),%edi
585	movl	16(%esp),%esi
586	movl	20(%esp),%ecx
587	movl	%edi,%eax
588	shrl	$2,%ecx				/* copy by 32-bit words */
589	cld					/* nope, copy forwards */
590	rep
591	movsl
592	movl	20(%esp),%ecx
593	andl	$3,%ecx				/* any bytes left? */
594	rep
595	movsb
596	popl	%esi
597	popl	%edi
598	ret
599
600
601/*****************************************************************************/
602/* copyout and fubyte family                                                 */
603/*****************************************************************************/
604/*
605 * Access user memory from inside the kernel. These routines and possibly
606 * the math- and DOS emulators should be the only places that do this.
607 *
608 * We have to access the memory with user's permissions, so use a segment
609 * selector with RPL 3. For writes to user space we have to additionally
610 * check the PTE for write permission, because the 386 does not check
611 * write permissions when we are executing with EPL 0. The 486 does check
612 * this if the WP bit is set in CR0, so we can use a simpler version here.
613 *
614 * These routines set curpcb->onfault for the time they execute. When a
615 * protection violation occurs inside the functions, the trap handler
616 * returns to *curpcb->onfault instead of the function.
617 */
618
619/* copyout(from_kernel, to_user, len) */
620ENTRY(copyout)
621	MEXITCOUNT
622	jmp	*_copyout_vector
623
624ENTRY(generic_copyout)
625	movl	_curpcb,%eax
626	movl	$copyout_fault,PCB_ONFAULT(%eax)
627	pushl	%esi
628	pushl	%edi
629	pushl	%ebx
630	movl	16(%esp),%esi
631	movl	20(%esp),%edi
632	movl	24(%esp),%ebx
633	testl	%ebx,%ebx			/* anything to do? */
634	jz	done_copyout
635
636	/*
637	 * Check explicitly for non-user addresses.  If 486 write protection
638	 * is being used, this check is essential because we are in kernel
639	 * mode so the h/w does not provide any protection against writing
640	 * kernel addresses.
641	 */
642
643	/*
644	 * First, prevent address wrapping.
645	 */
646	movl	%edi,%eax
647	addl	%ebx,%eax
648	jc	copyout_fault
649/*
650 * XXX STOP USING VM_MAXUSER_ADDRESS.
651 * It is an end address, not a max, so every time it is used correctly it
652 * looks like there is an off by one error, and of course it caused an off
653 * by one error in several places.
654 */
655	cmpl	$VM_MAXUSER_ADDRESS,%eax
656	ja	copyout_fault
657
658#if defined(I386_CPU)
659
660#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
661	cmpl	$CPUCLASS_386,_cpu_class
662	jne	3f
663#endif
664/*
665 * We have to check each PTE for user write permission.
666 * The checking may cause a page fault, so it is important to set
667 * up everything for return via copyout_fault before here.
668 */
669	/* compute number of pages */
670	movl	%edi,%ecx
671	andl	$PAGE_MASK,%ecx
672	addl	%ebx,%ecx
673	decl	%ecx
674	shrl	$IDXSHIFT+2,%ecx
675	incl	%ecx
676
677	/* compute PTE offset for start address */
678	movl	%edi,%edx
679	shrl	$IDXSHIFT,%edx
680	andb	$0xfc,%dl
681
6821:
683	/* check PTE for each page */
684	leal	_PTmap(%edx),%eax
685	shrl	$IDXSHIFT,%eax
686	andb	$0xfc,%al
687	testb	$PG_V,_PTmap(%eax)		/* PTE page must be valid */
688	je	4f
689	movb	_PTmap(%edx),%al
690	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
691	cmpb	$PG_V|PG_RW|PG_U,%al
692	je	2f
693
6944:
695	/* simulate a trap */
696	pushl	%edx
697	pushl	%ecx
698	shll	$IDXSHIFT,%edx
699	pushl	%edx
700	call	_trapwrite			/* trapwrite(addr) */
701	popl	%edx
702	popl	%ecx
703	popl	%edx
704
705	testl	%eax,%eax			/* if not ok, return EFAULT */
706	jnz	copyout_fault
707
7082:
709	addl	$4,%edx
710	decl	%ecx
711	jnz	1b				/* check next page */
712#endif /* I386_CPU */
713
714	/* bcopy(%esi, %edi, %ebx) */
7153:
716	movl	%ebx,%ecx
717
718#if defined(I586_CPU) && NNPX > 0
719	ALIGN_TEXT
720slow_copyout:
721#endif
722	shrl	$2,%ecx
723	cld
724	rep
725	movsl
726	movb	%bl,%cl
727	andb	$3,%cl
728	rep
729	movsb
730
731done_copyout:
732	popl	%ebx
733	popl	%edi
734	popl	%esi
735	xorl	%eax,%eax
736	movl	_curpcb,%edx
737	movl	%eax,PCB_ONFAULT(%edx)
738	ret
739
740	ALIGN_TEXT
741copyout_fault:
742	popl	%ebx
743	popl	%edi
744	popl	%esi
745	movl	_curpcb,%edx
746	movl	$0,PCB_ONFAULT(%edx)
747	movl	$EFAULT,%eax
748	ret
749
750#if defined(I586_CPU) && NNPX > 0
751ENTRY(i586_copyout)
752	/*
753	 * Duplicated from generic_copyout.  Could be done a bit better.
754	 */
755	movl	_curpcb,%eax
756	movl	$copyout_fault,PCB_ONFAULT(%eax)
757	pushl	%esi
758	pushl	%edi
759	pushl	%ebx
760	movl	16(%esp),%esi
761	movl	20(%esp),%edi
762	movl	24(%esp),%ebx
763	testl	%ebx,%ebx			/* anything to do? */
764	jz	done_copyout
765
766	/*
767	 * Check explicitly for non-user addresses.  If 486 write protection
768	 * is being used, this check is essential because we are in kernel
769	 * mode so the h/w does not provide any protection against writing
770	 * kernel addresses.
771	 */
772
773	/*
774	 * First, prevent address wrapping.
775	 */
776	movl	%edi,%eax
777	addl	%ebx,%eax
778	jc	copyout_fault
779/*
780 * XXX STOP USING VM_MAXUSER_ADDRESS.
781 * It is an end address, not a max, so every time it is used correctly it
782 * looks like there is an off by one error, and of course it caused an off
783 * by one error in several places.
784 */
785	cmpl	$VM_MAXUSER_ADDRESS,%eax
786	ja	copyout_fault
787
788	/* bcopy(%esi, %edi, %ebx) */
7893:
790	movl	%ebx,%ecx
791	/*
792	 * End of duplicated code.
793	 */
794
795	cmpl	$1024,%ecx
796	jb	slow_copyout
797
798	pushl	%ecx
799	call	_fastmove
800	addl	$4,%esp
801	jmp	done_copyout
802#endif /* I586_CPU && NNPX > 0 */
803
804/* copyin(from_user, to_kernel, len) */
805ENTRY(copyin)
806	MEXITCOUNT
807	jmp	*_copyin_vector
808
809ENTRY(generic_copyin)
810	movl	_curpcb,%eax
811	movl	$copyin_fault,PCB_ONFAULT(%eax)
812	pushl	%esi
813	pushl	%edi
814	movl	12(%esp),%esi			/* caddr_t from */
815	movl	16(%esp),%edi			/* caddr_t to */
816	movl	20(%esp),%ecx			/* size_t  len */
817
818	/*
819	 * make sure address is valid
820	 */
821	movl	%esi,%edx
822	addl	%ecx,%edx
823	jc	copyin_fault
824	cmpl	$VM_MAXUSER_ADDRESS,%edx
825	ja	copyin_fault
826
827#if defined(I586_CPU) && NNPX > 0
828	ALIGN_TEXT
829slow_copyin:
830#endif
831	movb	%cl,%al
832	shrl	$2,%ecx				/* copy longword-wise */
833	cld
834	rep
835	movsl
836	movb	%al,%cl
837	andb	$3,%cl				/* copy remaining bytes */
838	rep
839	movsb
840
841#if defined(I586_CPU) && NNPX > 0
842	ALIGN_TEXT
843done_copyin:
844#endif
845	popl	%edi
846	popl	%esi
847	xorl	%eax,%eax
848	movl	_curpcb,%edx
849	movl	%eax,PCB_ONFAULT(%edx)
850	ret
851
852	ALIGN_TEXT
853copyin_fault:
854	popl	%edi
855	popl	%esi
856	movl	_curpcb,%edx
857	movl	$0,PCB_ONFAULT(%edx)
858	movl	$EFAULT,%eax
859	ret
860
861#if defined(I586_CPU) && NNPX > 0
862ENTRY(i586_copyin)
863	/*
864	 * Duplicated from generic_copyin.  Could be done a bit better.
865	 */
866	movl	_curpcb,%eax
867	movl	$copyin_fault,PCB_ONFAULT(%eax)
868	pushl	%esi
869	pushl	%edi
870	movl	12(%esp),%esi			/* caddr_t from */
871	movl	16(%esp),%edi			/* caddr_t to */
872	movl	20(%esp),%ecx			/* size_t  len */
873
874	/*
875	 * make sure address is valid
876	 */
877	movl	%esi,%edx
878	addl	%ecx,%edx
879	jc	copyin_fault
880	cmpl	$VM_MAXUSER_ADDRESS,%edx
881	ja	copyin_fault
882	/*
883	 * End of duplicated code.
884	 */
885
886	cmpl	$1024,%ecx
887	jb	slow_copyin
888
889	pushl	%ebx			/* XXX prepare for fastmove_fault */
890	pushl	%ecx
891	call	_fastmove
892	addl	$8,%esp
893	jmp	done_copyin
894#endif /* I586_CPU && NNPX > 0 */
895
896#if defined(I586_CPU) && NNPX > 0
897/* fastmove(src, dst, len)
898	src in %esi
899	dst in %edi
900	len in %ecx		XXX changed to on stack for profiling
901	uses %eax and %edx for tmp. storage
902 */
903/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
904ENTRY(fastmove)
905	pushl	%ebp
906	movl	%esp,%ebp
907	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
908
909	movl	8(%ebp),%ecx
910	cmpl	$63,%ecx
911	jbe	fastmove_tail
912
913	testl	$7,%esi	/* check if src addr is multiple of 8 */
914	jnz	fastmove_tail
915
916	testl	$7,%edi	/* check if dst addr is multiple of 8 */
917	jnz	fastmove_tail
918
919/* if (npxproc != NULL) { */
920	cmpl	$0,_npxproc
921	je	6f
922/*    fnsave(&curpcb->pcb_savefpu); */
923	movl	_curpcb,%eax
924	fnsave	PCB_SAVEFPU(%eax)
925/*   npxproc = NULL; */
926	movl	$0,_npxproc
927/* } */
9286:
929/* now we own the FPU. */
930
931/*
932 * The process' FP state is saved in the pcb, but if we get
933 * switched, the cpu_switch() will store our FP state in the
934 * pcb.  It should be possible to avoid all the copying for
935 * this, e.g., by setting a flag to tell cpu_switch() to
936 * save the state somewhere else.
937 */
938/* tmp = curpcb->pcb_savefpu; */
939	movl	%ecx,-12(%ebp)
940	movl	%esi,-8(%ebp)
941	movl	%edi,-4(%ebp)
942	movl	%esp,%edi
943	movl	_curpcb,%esi
944	addl	$PCB_SAVEFPU,%esi
945	cld
946	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
947	rep
948	movsl
949	movl	-12(%ebp),%ecx
950	movl	-8(%ebp),%esi
951	movl	-4(%ebp),%edi
952/* stop_emulating(); */
953	clts
954/* npxproc = curproc; */
955	movl	_curproc,%eax
956	movl	%eax,_npxproc
957	movl	_curpcb,%eax
958	movl	$fastmove_fault,PCB_ONFAULT(%eax)
9594:
960	movl	%ecx,-12(%ebp)
961	cmpl	$1792,%ecx
962	jbe	2f
963	movl	$1792,%ecx
9642:
965	subl	%ecx,-12(%ebp)
966	cmpl	$256,%ecx
967	jb	5f
968	movl	%ecx,-8(%ebp)
969	movl	%esi,-4(%ebp)
970	ALIGN_TEXT
9713:
972	movl	0(%esi),%eax
973	movl	32(%esi),%eax
974	movl	64(%esi),%eax
975	movl	96(%esi),%eax
976	movl	128(%esi),%eax
977	movl	160(%esi),%eax
978	movl	192(%esi),%eax
979	movl	224(%esi),%eax
980	addl	$256,%esi
981	subl	$256,%ecx
982	cmpl	$256,%ecx
983	jae	3b
984	movl	-8(%ebp),%ecx
985	movl	-4(%ebp),%esi
9865:
987	ALIGN_TEXT
988fastmove_loop:
989	fildq	0(%esi)
990	fildq	8(%esi)
991	fildq	16(%esi)
992	fildq	24(%esi)
993	fildq	32(%esi)
994	fildq	40(%esi)
995	fildq	48(%esi)
996	fildq	56(%esi)
997	fistpq	56(%edi)
998	fistpq	48(%edi)
999	fistpq	40(%edi)
1000	fistpq	32(%edi)
1001	fistpq	24(%edi)
1002	fistpq	16(%edi)
1003	fistpq	8(%edi)
1004	fistpq	0(%edi)
1005	addl	$-64,%ecx
1006	addl	$64,%esi
1007	addl	$64,%edi
1008	cmpl	$63,%ecx
1009	ja	fastmove_loop
1010	movl	-12(%ebp),%eax
1011	addl	%eax,%ecx
1012	cmpl	$64,%ecx
1013	jae	4b
1014
1015/* curpcb->pcb_savefpu = tmp; */
1016	movl	%ecx,-12(%ebp)
1017	movl	%esi,-8(%ebp)
1018	movl	%edi,-4(%ebp)
1019	movl	_curpcb,%edi
1020	addl	$PCB_SAVEFPU,%edi
1021	movl	%esp,%esi
1022	cld
1023	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1024	rep
1025	movsl
1026	movl	-12(%ebp),%ecx
1027	movl	-8(%ebp),%esi
1028	movl	-4(%ebp),%edi
1029
1030/* start_emulating(); */
1031	smsw	%ax
1032	orb	$CR0_TS,%al
1033	lmsw	%ax
1034/* npxproc = NULL; */
1035	movl	$0,_npxproc
1036
1037	ALIGN_TEXT
1038fastmove_tail:
1039	movl	_curpcb,%eax
1040	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1041
1042	movb	%cl,%al
1043	shrl	$2,%ecx				/* copy longword-wise */
1044	cld
1045	rep
1046	movsl
1047	movb	%al,%cl
1048	andb	$3,%cl				/* copy remaining bytes */
1049	rep
1050	movsb
1051
1052	movl	%ebp,%esp
1053	popl	%ebp
1054	ret
1055
1056	ALIGN_TEXT
1057fastmove_fault:
1058	movl	_curpcb,%edi
1059	addl	$PCB_SAVEFPU,%edi
1060	movl	%esp,%esi
1061	cld
1062	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1063	rep
1064	movsl
1065
1066	smsw	%ax
1067	orb	$CR0_TS,%al
1068	lmsw	%ax
1069	movl	$0,_npxproc
1070
1071fastmove_tail_fault:
1072	movl	%ebp,%esp
1073	popl	%ebp
1074	addl	$8,%esp
1075	popl	%ebx
1076	popl	%edi
1077	popl	%esi
1078	movl	_curpcb,%edx
1079	movl	$0,PCB_ONFAULT(%edx)
1080	movl	$EFAULT,%eax
1081	ret
1082#endif /* I586_CPU && NNPX > 0 */
1083
1084/*
1085 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory
1086 */
1087ENTRY(fuword)
1088	movl	_curpcb,%ecx
1089	movl	$fusufault,PCB_ONFAULT(%ecx)
1090	movl	4(%esp),%edx			/* from */
1091
1092	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1093	ja	fusufault
1094
1095	movl	(%edx),%eax
1096	movl	$0,PCB_ONFAULT(%ecx)
1097	ret
1098
1099/*
1100 * These two routines are called from the profiling code, potentially
1101 * at interrupt time. If they fail, that's okay, good things will
1102 * happen later. Fail all the time for now - until the trap code is
1103 * able to deal with this.
1104 */
1105ALTENTRY(suswintr)
1106ENTRY(fuswintr)
1107	movl	$-1,%eax
1108	ret
1109
1110ENTRY(fusword)
1111	movl	_curpcb,%ecx
1112	movl	$fusufault,PCB_ONFAULT(%ecx)
1113	movl	4(%esp),%edx
1114
1115	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1116	ja	fusufault
1117
1118	movzwl	(%edx),%eax
1119	movl	$0,PCB_ONFAULT(%ecx)
1120	ret
1121
1122ENTRY(fubyte)
1123	movl	_curpcb,%ecx
1124	movl	$fusufault,PCB_ONFAULT(%ecx)
1125	movl	4(%esp),%edx
1126
1127	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1128	ja	fusufault
1129
1130	movzbl	(%edx),%eax
1131	movl	$0,PCB_ONFAULT(%ecx)
1132	ret
1133
1134	ALIGN_TEXT
1135fusufault:
1136	movl	_curpcb,%ecx
1137	xorl	%eax,%eax
1138	movl	%eax,PCB_ONFAULT(%ecx)
1139	decl	%eax
1140	ret
1141
1142/*
1143 * su{byte,sword,word}: write a byte (word, longword) to user memory
1144 */
1145ENTRY(suword)
1146	movl	_curpcb,%ecx
1147	movl	$fusufault,PCB_ONFAULT(%ecx)
1148	movl	4(%esp),%edx
1149
1150#if defined(I386_CPU)
1151
1152#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1153	cmpl	$CPUCLASS_386,_cpu_class
1154	jne	2f				/* we only have to set the right segment selector */
1155#endif /* I486_CPU || I586_CPU || I686_CPU */
1156
1157	/* XXX - page boundary crossing is still not handled */
1158	movl	%edx,%eax
1159	shrl	$IDXSHIFT,%edx
1160	andb	$0xfc,%dl
1161
1162	leal	_PTmap(%edx),%ecx
1163	shrl	$IDXSHIFT,%ecx
1164	andb	$0xfc,%cl
1165	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1166	je	4f
1167	movb	_PTmap(%edx),%dl
1168	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1169	cmpb	$PG_V|PG_RW|PG_U,%dl
1170	je	1f
1171
11724:
1173	/* simulate a trap */
1174	pushl	%eax
1175	call	_trapwrite
1176	popl	%edx				/* remove junk parameter from stack */
1177	testl	%eax,%eax
1178	jnz	fusufault
11791:
1180	movl	4(%esp),%edx
1181#endif
1182
11832:
1184	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1185	ja	fusufault
1186
1187	movl	8(%esp),%eax
1188	movl	%eax,(%edx)
1189	xorl	%eax,%eax
1190	movl	_curpcb,%ecx
1191	movl	%eax,PCB_ONFAULT(%ecx)
1192	ret
1193
1194ENTRY(susword)
1195	movl	_curpcb,%ecx
1196	movl	$fusufault,PCB_ONFAULT(%ecx)
1197	movl	4(%esp),%edx
1198
1199#if defined(I386_CPU)
1200
1201#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1202	cmpl	$CPUCLASS_386,_cpu_class
1203	jne	2f
1204#endif /* I486_CPU || I586_CPU || I686_CPU */
1205
1206	/* XXX - page boundary crossing is still not handled */
1207	movl	%edx,%eax
1208	shrl	$IDXSHIFT,%edx
1209	andb	$0xfc,%dl
1210
1211	leal	_PTmap(%edx),%ecx
1212	shrl	$IDXSHIFT,%ecx
1213	andb	$0xfc,%cl
1214	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1215	je	4f
1216	movb	_PTmap(%edx),%dl
1217	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1218	cmpb	$PG_V|PG_RW|PG_U,%dl
1219	je	1f
1220
12214:
1222	/* simulate a trap */
1223	pushl	%eax
1224	call	_trapwrite
1225	popl	%edx				/* remove junk parameter from stack */
1226	testl	%eax,%eax
1227	jnz	fusufault
12281:
1229	movl	4(%esp),%edx
1230#endif
1231
12322:
1233	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1234	ja	fusufault
1235
1236	movw	8(%esp),%ax
1237	movw	%ax,(%edx)
1238	xorl	%eax,%eax
1239	movl	_curpcb,%ecx			/* restore trashed register */
1240	movl	%eax,PCB_ONFAULT(%ecx)
1241	ret
1242
1243ALTENTRY(suibyte)
1244ENTRY(subyte)
1245	movl	_curpcb,%ecx
1246	movl	$fusufault,PCB_ONFAULT(%ecx)
1247	movl	4(%esp),%edx
1248
1249#if defined(I386_CPU)
1250
1251#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1252	cmpl	$CPUCLASS_386,_cpu_class
1253	jne	2f
1254#endif /* I486_CPU || I586_CPU || I686_CPU */
1255
1256	movl	%edx,%eax
1257	shrl	$IDXSHIFT,%edx
1258	andb	$0xfc,%dl
1259
1260	leal	_PTmap(%edx),%ecx
1261	shrl	$IDXSHIFT,%ecx
1262	andb	$0xfc,%cl
1263	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1264	je	4f
1265	movb	_PTmap(%edx),%dl
1266	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1267	cmpb	$PG_V|PG_RW|PG_U,%dl
1268	je	1f
1269
12704:
1271	/* simulate a trap */
1272	pushl	%eax
1273	call	_trapwrite
1274	popl	%edx				/* remove junk parameter from stack */
1275	testl	%eax,%eax
1276	jnz	fusufault
12771:
1278	movl	4(%esp),%edx
1279#endif
1280
12812:
1282	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1283	ja	fusufault
1284
1285	movb	8(%esp),%al
1286	movb	%al,(%edx)
1287	xorl	%eax,%eax
1288	movl	_curpcb,%ecx			/* restore trashed register */
1289	movl	%eax,PCB_ONFAULT(%ecx)
1290	ret
1291
1292/*
1293 * copyinstr(from, to, maxlen, int *lencopied)
1294 *	copy a string from from to to, stop when a 0 character is reached.
1295 *	return ENAMETOOLONG if string is longer than maxlen, and
1296 *	EFAULT on protection violations. If lencopied is non-zero,
1297 *	return the actual length in *lencopied.
1298 */
1299ENTRY(copyinstr)
1300	pushl	%esi
1301	pushl	%edi
1302	movl	_curpcb,%ecx
1303	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1304
1305	movl	12(%esp),%esi			/* %esi = from */
1306	movl	16(%esp),%edi			/* %edi = to */
1307	movl	20(%esp),%edx			/* %edx = maxlen */
1308
1309	movl	$VM_MAXUSER_ADDRESS,%eax
1310
1311	/* make sure 'from' is within bounds */
1312	subl	%esi,%eax
1313	jbe	cpystrflt
1314
1315	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1316	cmpl	%edx,%eax
1317	jae	1f
1318	movl	%eax,%edx
1319	movl	%eax,20(%esp)
13201:
1321	incl	%edx
1322	cld
1323
13242:
1325	decl	%edx
1326	jz	3f
1327
1328	lodsb
1329	stosb
1330	orb	%al,%al
1331	jnz	2b
1332
1333	/* Success -- 0 byte reached */
1334	decl	%edx
1335	xorl	%eax,%eax
1336	jmp	cpystrflt_x
13373:
1338	/* edx is zero - return ENAMETOOLONG or EFAULT */
1339	cmpl	$VM_MAXUSER_ADDRESS,%esi
1340	jae	cpystrflt
13414:
1342	movl	$ENAMETOOLONG,%eax
1343	jmp	cpystrflt_x
1344
1345cpystrflt:
1346	movl	$EFAULT,%eax
1347
1348cpystrflt_x:
1349	/* set *lencopied and return %eax */
1350	movl	_curpcb,%ecx
1351	movl	$0,PCB_ONFAULT(%ecx)
1352	movl	20(%esp),%ecx
1353	subl	%edx,%ecx
1354	movl	24(%esp),%edx
1355	testl	%edx,%edx
1356	jz	1f
1357	movl	%ecx,(%edx)
13581:
1359	popl	%edi
1360	popl	%esi
1361	ret
1362
1363
1364/*
1365 * copystr(from, to, maxlen, int *lencopied)
1366 */
1367ENTRY(copystr)
1368	pushl	%esi
1369	pushl	%edi
1370
1371	movl	12(%esp),%esi			/* %esi = from */
1372	movl	16(%esp),%edi			/* %edi = to */
1373	movl	20(%esp),%edx			/* %edx = maxlen */
1374	incl	%edx
1375	cld
13761:
1377	decl	%edx
1378	jz	4f
1379	lodsb
1380	stosb
1381	orb	%al,%al
1382	jnz	1b
1383
1384	/* Success -- 0 byte reached */
1385	decl	%edx
1386	xorl	%eax,%eax
1387	jmp	6f
13884:
1389	/* edx is zero -- return ENAMETOOLONG */
1390	movl	$ENAMETOOLONG,%eax
1391
13926:
1393	/* set *lencopied and return %eax */
1394	movl	20(%esp),%ecx
1395	subl	%edx,%ecx
1396	movl	24(%esp),%edx
1397	testl	%edx,%edx
1398	jz	7f
1399	movl	%ecx,(%edx)
14007:
1401	popl	%edi
1402	popl	%esi
1403	ret
1404
1405ENTRY(bcmp)
1406	pushl	%edi
1407	pushl	%esi
1408	movl	12(%esp),%edi
1409	movl	16(%esp),%esi
1410	movl	20(%esp),%edx
1411	xorl	%eax,%eax
1412
1413	movl	%edx,%ecx
1414	shrl	$2,%ecx
1415	cld					/* compare forwards */
1416	repe
1417	cmpsl
1418	jne	1f
1419
1420	movl	%edx,%ecx
1421	andl	$3,%ecx
1422	repe
1423	cmpsb
1424	je	2f
14251:
1426	incl	%eax
14272:
1428	popl	%esi
1429	popl	%edi
1430	ret
1431
1432
1433/*
1434 * Handling of special 386 registers and descriptor tables etc
1435 */
1436/* void lgdt(struct region_descriptor *rdp); */
1437ENTRY(lgdt)
1438	/* reload the descriptor table */
1439	movl	4(%esp),%eax
1440	lgdt	(%eax)
1441
1442	/* flush the prefetch q */
1443	jmp	1f
1444	nop
14451:
1446	/* reload "stale" selectors */
1447	movl	$KDSEL,%eax
1448	movl	%ax,%ds
1449	movl	%ax,%es
1450	movl	%ax,%fs
1451	movl	%ax,%gs
1452	movl	%ax,%ss
1453
1454	/* reload code selector by turning return into intersegmental return */
1455	movl	(%esp),%eax
1456	pushl	%eax
1457	movl	$KCSEL,4(%esp)
1458	lret
1459
1460/*
1461 * void lidt(struct region_descriptor *rdp);
1462 */
1463ENTRY(lidt)
1464	movl	4(%esp),%eax
1465	lidt	(%eax)
1466	ret
1467
1468/*
1469 * void lldt(u_short sel)
1470 */
1471ENTRY(lldt)
1472	lldt	4(%esp)
1473	ret
1474
1475/*
1476 * void ltr(u_short sel)
1477 */
1478ENTRY(ltr)
1479	ltr	4(%esp)
1480	ret
1481
1482/* ssdtosd(*ssdp,*sdp) */
1483ENTRY(ssdtosd)
1484	pushl	%ebx
1485	movl	8(%esp),%ecx
1486	movl	8(%ecx),%ebx
1487	shll	$16,%ebx
1488	movl	(%ecx),%edx
1489	roll	$16,%edx
1490	movb	%dh,%bl
1491	movb	%dl,%bh
1492	rorl	$8,%ebx
1493	movl	4(%ecx),%eax
1494	movw	%ax,%dx
1495	andl	$0xf0000,%eax
1496	orl	%eax,%ebx
1497	movl	12(%esp),%ecx
1498	movl	%edx,(%ecx)
1499	movl	%ebx,4(%ecx)
1500	popl	%ebx
1501	ret
1502
1503/* load_cr0(cr0) */
1504ENTRY(load_cr0)
1505	movl	4(%esp),%eax
1506	movl	%eax,%cr0
1507	ret
1508
1509/* rcr0() */
1510ENTRY(rcr0)
1511	movl	%cr0,%eax
1512	ret
1513
1514/* rcr3() */
1515ENTRY(rcr3)
1516	movl	%cr3,%eax
1517	ret
1518
1519/* void load_cr3(caddr_t cr3) */
1520ENTRY(load_cr3)
1521	movl	4(%esp),%eax
1522	movl	%eax,%cr3
1523	ret
1524
1525
1526/*****************************************************************************/
1527/* setjump, longjump                                                         */
1528/*****************************************************************************/
1529
1530ENTRY(setjmp)
1531	movl	4(%esp),%eax
1532	movl	%ebx,(%eax)			/* save ebx */
1533	movl	%esp,4(%eax)			/* save esp */
1534	movl	%ebp,8(%eax)			/* save ebp */
1535	movl	%esi,12(%eax)			/* save esi */
1536	movl	%edi,16(%eax)			/* save edi */
1537	movl	(%esp),%edx			/* get rta */
1538	movl	%edx,20(%eax)			/* save eip */
1539	xorl	%eax,%eax			/* return(0); */
1540	ret
1541
1542ENTRY(longjmp)
1543	movl	4(%esp),%eax
1544	movl	(%eax),%ebx			/* restore ebx */
1545	movl	4(%eax),%esp			/* restore esp */
1546	movl	8(%eax),%ebp			/* restore ebp */
1547	movl	12(%eax),%esi			/* restore esi */
1548	movl	16(%eax),%edi			/* restore edi */
1549	movl	20(%eax),%edx			/* get rta */
1550	movl	%edx,(%esp)			/* put in return frame */
1551	xorl	%eax,%eax			/* return(1); */
1552	incl	%eax
1553	ret
1554
1555/*
1556 * Here for doing BB-profiling (gcc -a).
1557 * We rely on the "bbset" instead, but need a dummy function.
1558 */
1559NON_GPROF_ENTRY(__bb_init_func)
1560	movl	4(%esp),%eax
1561	movl	$1,(%eax)
1562	.byte	0xc3				/* avoid macro for `ret' */
1563