support.s revision 26267
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	$Id: support.s,v 1.52 1997/04/26 11:45:21 peter Exp $
34 */
35
36#include "npx.h"
37#include "opt_cpu.h"
38
39#include <machine/asmacros.h>
40#include <machine/cputypes.h>
41#include <machine/pmap.h>
42#include <machine/specialreg.h>
43#include <machine/smpasm.h>
44
45#include "assym.s"
46
47#define KDSEL		0x10			/* kernel data selector */
48#define KCSEL		0x8			/* kernel code selector */
49#define IDXSHIFT	10
50
51	.data
52	.globl	_bcopy_vector
53_bcopy_vector:
54	.long	_generic_bcopy
55	.globl	_bzero
56_bzero:
57	.long	_generic_bzero
58	.globl	_copyin_vector
59_copyin_vector:
60	.long	_generic_copyin
61	.globl	_copyout_vector
62_copyout_vector:
63	.long	_generic_copyout
64	.globl	_ovbcopy_vector
65_ovbcopy_vector:
66	.long	_generic_bcopy
67#if defined(I586_CPU) && NNPX > 0
68kernel_fpu_lock:
69	.byte	0xfe
70	.space	3
71#endif
72
73	.text
74
75/*
76 * bcopy family
77 * void bzero(void *buf, u_int len)
78 */
79
80ENTRY(generic_bzero)
81	pushl	%edi
82	movl	8(%esp),%edi
83	movl	12(%esp),%ecx
84	xorl	%eax,%eax
85	shrl	$2,%ecx
86	cld
87	rep
88	stosl
89	movl	12(%esp),%ecx
90	andl	$3,%ecx
91	rep
92	stosb
93	popl	%edi
94	ret
95
96#if defined(I486_CPU)
97ENTRY(i486_bzero)
98	movl	4(%esp),%edx
99	movl	8(%esp),%ecx
100	xorl	%eax,%eax
101/*
102 * do 64 byte chunks first
103 *
104 * XXX this is probably over-unrolled at least for DX2's
105 */
1062:
107	cmpl	$64,%ecx
108	jb	3f
109	movl	%eax,(%edx)
110	movl	%eax,4(%edx)
111	movl	%eax,8(%edx)
112	movl	%eax,12(%edx)
113	movl	%eax,16(%edx)
114	movl	%eax,20(%edx)
115	movl	%eax,24(%edx)
116	movl	%eax,28(%edx)
117	movl	%eax,32(%edx)
118	movl	%eax,36(%edx)
119	movl	%eax,40(%edx)
120	movl	%eax,44(%edx)
121	movl	%eax,48(%edx)
122	movl	%eax,52(%edx)
123	movl	%eax,56(%edx)
124	movl	%eax,60(%edx)
125	addl	$64,%edx
126	subl	$64,%ecx
127	jnz	2b
128	ret
129
130/*
131 * do 16 byte chunks
132 */
133	SUPERALIGN_TEXT
1343:
135	cmpl	$16,%ecx
136	jb	4f
137	movl	%eax,(%edx)
138	movl	%eax,4(%edx)
139	movl	%eax,8(%edx)
140	movl	%eax,12(%edx)
141	addl	$16,%edx
142	subl	$16,%ecx
143	jnz	3b
144	ret
145
146/*
147 * do 4 byte chunks
148 */
149	SUPERALIGN_TEXT
1504:
151	cmpl	$4,%ecx
152	jb	5f
153	movl	%eax,(%edx)
154	addl	$4,%edx
155	subl	$4,%ecx
156	jnz	4b
157	ret
158
159/*
160 * do 1 byte chunks
161 * a jump table seems to be faster than a loop or more range reductions
162 *
163 * XXX need a const section for non-text
164 */
165	.data
166jtab:
167	.long	do0
168	.long	do1
169	.long	do2
170	.long	do3
171
172	.text
173	SUPERALIGN_TEXT
1745:
175	jmp	jtab(,%ecx,4)
176
177	SUPERALIGN_TEXT
178do3:
179	movw	%ax,(%edx)
180	movb	%al,2(%edx)
181	ret
182
183	SUPERALIGN_TEXT
184do2:
185	movw	%ax,(%edx)
186	ret
187
188	SUPERALIGN_TEXT
189do1:
190	movb	%al,(%edx)
191	ret
192
193	SUPERALIGN_TEXT
194do0:
195	ret
196#endif
197
198#if defined(I586_CPU) && NNPX > 0 && !defined(SMP)
199ENTRY(i586_bzero)
200	movl	4(%esp),%edx
201	movl	8(%esp),%ecx
202
203	/*
204	 * The FPU register method is twice as fast as the integer register
205	 * method unless the target is in the L1 cache and we pre-allocate a
206	 * cache line for it (then the integer register method is 4-5 times
207	 * faster).  However, we never pre-allocate cache lines, since that
208	 * would make the integer method 25% or more slower for the common
209	 * case when the target isn't in either the L1 cache or the L2 cache.
210	 * Thus we normally use the FPU register method unless the overhead
211	 * would be too large.
212	 */
213	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
214	jb	intreg_i586_bzero
215
216	/*
217	 * The FPU registers may belong to an application or to fastmove()
218	 * or to another invocation of bcopy() or ourself in a higher level
219	 * interrupt or trap handler.  Preserving the registers is
220	 * complicated since we avoid it if possible at all levels.  We
221	 * want to localize the complications even when that increases them.
222	 * Here the extra work involves preserving CR0_TS in TS.
223	 * `npxproc != NULL' is supposed to be the condition that all the
224	 * FPU resources belong to an application, but npxproc and CR0_TS
225	 * aren't set atomically enough for this condition to work in
226	 * interrupt handlers.
227	 *
228	 * Case 1: FPU registers belong to the application: we must preserve
229	 * the registers if we use them, so we only use the FPU register
230	 * method if the target size is large enough to amortize the extra
231	 * overhead for preserving them.  CR0_TS must be preserved although
232	 * it is very likely to end up as set.
233	 *
234	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
235	 * makes the registers look like they belong to an application so
236	 * that cpu_switch() and savectx() don't have to know about it, so
237	 * this case reduces to case 1.
238	 *
239	 * Case 3: FPU registers belong to the kernel: don't use the FPU
240	 * register method.  This case is unlikely, and supporting it would
241	 * be more complicated and might take too much stack.
242	 *
243	 * Case 4: FPU registers don't belong to anyone: the FPU registers
244	 * don't need to be preserved, so we always use the FPU register
245	 * method.  CR0_TS must be preserved although it is very likely to
246	 * always end up as clear.
247	 */
248	cmpl	$0,_npxproc
249	je	i586_bz1
250	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
251	jb	intreg_i586_bzero
252	sarb	$1,kernel_fpu_lock
253	jc	intreg_i586_bzero
254	smsw	%ax
255	clts
256	subl	$108,%esp
257	fnsave	0(%esp)
258	jmp	i586_bz2
259
260i586_bz1:
261	sarb	$1,kernel_fpu_lock
262	jc	intreg_i586_bzero
263	smsw	%ax
264	clts
265	fninit				/* XXX should avoid needing this */
266i586_bz2:
267	fldz
268
269	/*
270	 * Align to an 8 byte boundary (misalignment in the main loop would
271	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
272	 * already aligned) by always zeroing 8 bytes and using the part up
273	 * to the _next_ alignment position.
274	 */
275	fstl	0(%edx)
276	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
277	addl	$8,%edx
278	andl	$~7,%edx
279	subl	%edx,%ecx
280
281	/*
282	 * Similarly align `len' to a multiple of 8.
283	 */
284	fstl	-8(%edx,%ecx)
285	decl	%ecx
286	andl	$~7,%ecx
287
288	/*
289	 * This wouldn't be any faster if it were unrolled, since the loop
290	 * control instructions are much faster than the fstl and/or done
291	 * in parallel with it so their overhead is insignificant.
292	 */
293fpureg_i586_bzero_loop:
294	fstl	0(%edx)
295	addl	$8,%edx
296	subl	$8,%ecx
297	cmpl	$8,%ecx
298	jae	fpureg_i586_bzero_loop
299
300	cmpl	$0,_npxproc
301	je	i586_bz3
302	frstor	0(%esp)
303	addl	$108,%esp
304	lmsw	%ax
305	movb	$0xfe,kernel_fpu_lock
306	ret
307
308i586_bz3:
309	fstpl	%st(0)
310	lmsw	%ax
311	movb	$0xfe,kernel_fpu_lock
312	ret
313
314intreg_i586_bzero:
315	/*
316	 * `rep stos' seems to be the best method in practice for small
317	 * counts.  Fancy methods usually take too long to start up due
318	 * to cache and BTB misses.
319	 */
320	pushl	%edi
321	movl	%edx,%edi
322	xorl	%eax,%eax
323	shrl	$2,%ecx
324	cld
325	rep
326	stosl
327	movl	12(%esp),%ecx
328	andl	$3,%ecx
329	jne	1f
330	popl	%edi
331	ret
332
3331:
334	rep
335	stosb
336	popl	%edi
337	ret
338#endif /* I586_CPU && NNPX > 0 */
339
340/* fillw(pat, base, cnt) */
341ENTRY(fillw)
342	pushl	%edi
343	movl	8(%esp),%eax
344	movl	12(%esp),%edi
345	movl	16(%esp),%ecx
346	cld
347	rep
348	stosw
349	popl	%edi
350	ret
351
352ENTRY(bcopyb)
353bcopyb:
354	pushl	%esi
355	pushl	%edi
356	movl	12(%esp),%esi
357	movl	16(%esp),%edi
358	movl	20(%esp),%ecx
359	movl	%edi,%eax
360	subl	%esi,%eax
361	cmpl	%ecx,%eax			/* overlapping && src < dst? */
362	jb	1f
363	cld					/* nope, copy forwards */
364	rep
365	movsb
366	popl	%edi
367	popl	%esi
368	ret
369
370	ALIGN_TEXT
3711:
372	addl	%ecx,%edi			/* copy backwards. */
373	addl	%ecx,%esi
374	decl	%edi
375	decl	%esi
376	std
377	rep
378	movsb
379	popl	%edi
380	popl	%esi
381	cld
382	ret
383
384ENTRY(bcopy)
385	MEXITCOUNT
386	jmp	*_bcopy_vector
387
388ENTRY(ovbcopy)
389	MEXITCOUNT
390	jmp	*_ovbcopy_vector
391
392/*
393 * generic_bcopy(src, dst, cnt)
394 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
395 */
396ENTRY(generic_bcopy)
397	pushl	%esi
398	pushl	%edi
399	movl	12(%esp),%esi
400	movl	16(%esp),%edi
401	movl	20(%esp),%ecx
402
403	movl	%edi,%eax
404	subl	%esi,%eax
405	cmpl	%ecx,%eax			/* overlapping && src < dst? */
406	jb	1f
407
408	shrl	$2,%ecx				/* copy by 32-bit words */
409	cld					/* nope, copy forwards */
410	rep
411	movsl
412	movl	20(%esp),%ecx
413	andl	$3,%ecx				/* any bytes left? */
414	rep
415	movsb
416	popl	%edi
417	popl	%esi
418	ret
419
420	ALIGN_TEXT
4211:
422	addl	%ecx,%edi			/* copy backwards */
423	addl	%ecx,%esi
424	decl	%edi
425	decl	%esi
426	andl	$3,%ecx				/* any fractional bytes? */
427	std
428	rep
429	movsb
430	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
431	shrl	$2,%ecx
432	subl	$3,%esi
433	subl	$3,%edi
434	rep
435	movsl
436	popl	%edi
437	popl	%esi
438	cld
439	ret
440
441#if defined(I586_CPU) && NNPX > 0 && !defined(SMP)
442ENTRY(i586_bcopy)
443	pushl	%esi
444	pushl	%edi
445	movl	12(%esp),%esi
446	movl	16(%esp),%edi
447	movl	20(%esp),%ecx
448
449	movl	%edi,%eax
450	subl	%esi,%eax
451	cmpl	%ecx,%eax			/* overlapping && src < dst? */
452	jb	1f
453
454	cmpl	$1024,%ecx
455	jb	small_i586_bcopy
456
457	sarb	$1,kernel_fpu_lock
458	jc	small_i586_bcopy
459	cmpl	$0,_npxproc
460	je	i586_bc1
461	smsw	%dx
462	clts
463	subl	$108,%esp
464	fnsave	0(%esp)
465	jmp	4f
466
467i586_bc1:
468	smsw	%dx
469	clts
470	fninit				/* XXX should avoid needing this */
471
472	ALIGN_TEXT
4734:
474	pushl	%ecx
475#define	DCACHE_SIZE	8192
476	cmpl	$(DCACHE_SIZE-512)/2,%ecx
477	jbe	2f
478	movl	$(DCACHE_SIZE-512)/2,%ecx
4792:
480	subl	%ecx,0(%esp)
481	cmpl	$256,%ecx
482	jb	5f			/* XXX should prefetch if %ecx >= 32 */
483	pushl	%esi
484	pushl	%ecx
485	ALIGN_TEXT
4863:
487	movl	0(%esi),%eax
488	movl	32(%esi),%eax
489	movl	64(%esi),%eax
490	movl	96(%esi),%eax
491	movl	128(%esi),%eax
492	movl	160(%esi),%eax
493	movl	192(%esi),%eax
494	movl	224(%esi),%eax
495	addl	$256,%esi
496	subl	$256,%ecx
497	cmpl	$256,%ecx
498	jae	3b
499	popl	%ecx
500	popl	%esi
5015:
502	ALIGN_TEXT
503large_i586_bcopy_loop:
504	fildq	0(%esi)
505	fildq	8(%esi)
506	fildq	16(%esi)
507	fildq	24(%esi)
508	fildq	32(%esi)
509	fildq	40(%esi)
510	fildq	48(%esi)
511	fildq	56(%esi)
512	fistpq	56(%edi)
513	fistpq	48(%edi)
514	fistpq	40(%edi)
515	fistpq	32(%edi)
516	fistpq	24(%edi)
517	fistpq	16(%edi)
518	fistpq	8(%edi)
519	fistpq	0(%edi)
520	addl	$64,%esi
521	addl	$64,%edi
522	subl	$64,%ecx
523	cmpl	$64,%ecx
524	jae	large_i586_bcopy_loop
525	popl	%eax
526	addl	%eax,%ecx
527	cmpl	$64,%ecx
528	jae	4b
529
530	cmpl	$0,_npxproc
531	je	i586_bc2
532	frstor	0(%esp)
533	addl	$108,%esp
534i586_bc2:
535	lmsw	%dx
536	movb	$0xfe,kernel_fpu_lock
537
538/*
539 * This is a duplicate of the main part of generic_bcopy.  See the comments
540 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
541 * would mess up high resolution profiling.
542 */
543	ALIGN_TEXT
544small_i586_bcopy:
545	shrl	$2,%ecx
546	cld
547	rep
548	movsl
549	movl	20(%esp),%ecx
550	andl	$3,%ecx
551	rep
552	movsb
553	popl	%edi
554	popl	%esi
555	ret
556
557	ALIGN_TEXT
5581:
559	addl	%ecx,%edi
560	addl	%ecx,%esi
561	decl	%edi
562	decl	%esi
563	andl	$3,%ecx
564	std
565	rep
566	movsb
567	movl	20(%esp),%ecx
568	shrl	$2,%ecx
569	subl	$3,%esi
570	subl	$3,%edi
571	rep
572	movsl
573	popl	%edi
574	popl	%esi
575	cld
576	ret
577#endif /* I586_CPU && NNPX > 0 */
578
579/*
580 * Note: memcpy does not support overlapping copies
581 */
582ENTRY(memcpy)
583	pushl	%edi
584	pushl	%esi
585	movl	12(%esp),%edi
586	movl	16(%esp),%esi
587	movl	20(%esp),%ecx
588	movl	%edi,%eax
589	shrl	$2,%ecx				/* copy by 32-bit words */
590	cld					/* nope, copy forwards */
591	rep
592	movsl
593	movl	20(%esp),%ecx
594	andl	$3,%ecx				/* any bytes left? */
595	rep
596	movsb
597	popl	%esi
598	popl	%edi
599	ret
600
601
602/*****************************************************************************/
603/* copyout and fubyte family                                                 */
604/*****************************************************************************/
605/*
606 * Access user memory from inside the kernel. These routines and possibly
607 * the math- and DOS emulators should be the only places that do this.
608 *
609 * We have to access the memory with user's permissions, so use a segment
610 * selector with RPL 3. For writes to user space we have to additionally
611 * check the PTE for write permission, because the 386 does not check
612 * write permissions when we are executing with EPL 0. The 486 does check
613 * this if the WP bit is set in CR0, so we can use a simpler version here.
614 *
615 * These routines set curpcb->onfault for the time they execute. When a
616 * protection violation occurs inside the functions, the trap handler
617 * returns to *curpcb->onfault instead of the function.
618 */
619
620/* copyout(from_kernel, to_user, len) */
621ENTRY(copyout)
622	MEXITCOUNT
623	jmp	*_copyout_vector
624
625ENTRY(generic_copyout)
626	GETCURPCB(%eax)
627	movl	$copyout_fault,PCB_ONFAULT(%eax)
628	pushl	%esi
629	pushl	%edi
630	pushl	%ebx
631	movl	16(%esp),%esi
632	movl	20(%esp),%edi
633	movl	24(%esp),%ebx
634	testl	%ebx,%ebx			/* anything to do? */
635	jz	done_copyout
636
637	/*
638	 * Check explicitly for non-user addresses.  If 486 write protection
639	 * is being used, this check is essential because we are in kernel
640	 * mode so the h/w does not provide any protection against writing
641	 * kernel addresses.
642	 */
643
644	/*
645	 * First, prevent address wrapping.
646	 */
647	movl	%edi,%eax
648	addl	%ebx,%eax
649	jc	copyout_fault
650/*
651 * XXX STOP USING VM_MAXUSER_ADDRESS.
652 * It is an end address, not a max, so every time it is used correctly it
653 * looks like there is an off by one error, and of course it caused an off
654 * by one error in several places.
655 */
656	cmpl	$VM_MAXUSER_ADDRESS,%eax
657	ja	copyout_fault
658
659#if defined(I386_CPU)
660
661#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
662	cmpl	$CPUCLASS_386,_cpu_class
663	jne	3f
664#endif
665/*
666 * We have to check each PTE for user write permission.
667 * The checking may cause a page fault, so it is important to set
668 * up everything for return via copyout_fault before here.
669 */
670	/* compute number of pages */
671	movl	%edi,%ecx
672	andl	$PAGE_MASK,%ecx
673	addl	%ebx,%ecx
674	decl	%ecx
675	shrl	$IDXSHIFT+2,%ecx
676	incl	%ecx
677
678	/* compute PTE offset for start address */
679	movl	%edi,%edx
680	shrl	$IDXSHIFT,%edx
681	andb	$0xfc,%dl
682
6831:
684	/* check PTE for each page */
685	leal	_PTmap(%edx),%eax
686	shrl	$IDXSHIFT,%eax
687	andb	$0xfc,%al
688	testb	$PG_V,_PTmap(%eax)		/* PTE page must be valid */
689	je	4f
690	movb	_PTmap(%edx),%al
691	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
692	cmpb	$PG_V|PG_RW|PG_U,%al
693	je	2f
694
6954:
696	/* simulate a trap */
697	pushl	%edx
698	pushl	%ecx
699	shll	$IDXSHIFT,%edx
700	pushl	%edx
701	call	_trapwrite			/* trapwrite(addr) */
702	popl	%edx
703	popl	%ecx
704	popl	%edx
705
706	testl	%eax,%eax			/* if not ok, return EFAULT */
707	jnz	copyout_fault
708
7092:
710	addl	$4,%edx
711	decl	%ecx
712	jnz	1b				/* check next page */
713#endif /* I386_CPU */
714
715	/* bcopy(%esi, %edi, %ebx) */
7163:
717	movl	%ebx,%ecx
718
719#if defined(I586_CPU) && NNPX > 0 && !defined(SMP)
720	ALIGN_TEXT
721slow_copyout:
722#endif
723	shrl	$2,%ecx
724	cld
725	rep
726	movsl
727	movb	%bl,%cl
728	andb	$3,%cl
729	rep
730	movsb
731
732done_copyout:
733	popl	%ebx
734	popl	%edi
735	popl	%esi
736	xorl	%eax,%eax
737	GETCURPCB(%edx)
738	movl	%eax,PCB_ONFAULT(%edx)
739	ret
740
741	ALIGN_TEXT
742copyout_fault:
743	popl	%ebx
744	popl	%edi
745	popl	%esi
746	GETCURPCB(%edx)
747	movl	$0,PCB_ONFAULT(%edx)
748	movl	$EFAULT,%eax
749	ret
750
751#if defined(I586_CPU) && NNPX > 0 && !defined(SMP)
752ENTRY(i586_copyout)
753	/*
754	 * Duplicated from generic_copyout.  Could be done a bit better.
755	 */
756	movl	_curpcb,%eax
757	movl	$copyout_fault,PCB_ONFAULT(%eax)
758	pushl	%esi
759	pushl	%edi
760	pushl	%ebx
761	movl	16(%esp),%esi
762	movl	20(%esp),%edi
763	movl	24(%esp),%ebx
764	testl	%ebx,%ebx			/* anything to do? */
765	jz	done_copyout
766
767	/*
768	 * Check explicitly for non-user addresses.  If 486 write protection
769	 * is being used, this check is essential because we are in kernel
770	 * mode so the h/w does not provide any protection against writing
771	 * kernel addresses.
772	 */
773
774	/*
775	 * First, prevent address wrapping.
776	 */
777	movl	%edi,%eax
778	addl	%ebx,%eax
779	jc	copyout_fault
780/*
781 * XXX STOP USING VM_MAXUSER_ADDRESS.
782 * It is an end address, not a max, so every time it is used correctly it
783 * looks like there is an off by one error, and of course it caused an off
784 * by one error in several places.
785 */
786	cmpl	$VM_MAXUSER_ADDRESS,%eax
787	ja	copyout_fault
788
789	/* bcopy(%esi, %edi, %ebx) */
7903:
791	movl	%ebx,%ecx
792	/*
793	 * End of duplicated code.
794	 */
795
796	cmpl	$1024,%ecx
797	jb	slow_copyout
798
799	pushl	%ecx
800	call	_fastmove
801	addl	$4,%esp
802	jmp	done_copyout
803#endif /* I586_CPU && NNPX > 0 */
804
805/* copyin(from_user, to_kernel, len) */
806ENTRY(copyin)
807	MEXITCOUNT
808	jmp	*_copyin_vector
809
810ENTRY(generic_copyin)
811	GETCURPCB(%eax)
812	movl	$copyin_fault,PCB_ONFAULT(%eax)
813	pushl	%esi
814	pushl	%edi
815	movl	12(%esp),%esi			/* caddr_t from */
816	movl	16(%esp),%edi			/* caddr_t to */
817	movl	20(%esp),%ecx			/* size_t  len */
818
819	/*
820	 * make sure address is valid
821	 */
822	movl	%esi,%edx
823	addl	%ecx,%edx
824	jc	copyin_fault
825	cmpl	$VM_MAXUSER_ADDRESS,%edx
826	ja	copyin_fault
827
828#if defined(I586_CPU) && NNPX > 0 && !defined(SMP)
829	ALIGN_TEXT
830slow_copyin:
831#endif
832	movb	%cl,%al
833	shrl	$2,%ecx				/* copy longword-wise */
834	cld
835	rep
836	movsl
837	movb	%al,%cl
838	andb	$3,%cl				/* copy remaining bytes */
839	rep
840	movsb
841
842#if defined(I586_CPU) && NNPX > 0 && !defined(SMP)
843	ALIGN_TEXT
844done_copyin:
845#endif
846	popl	%edi
847	popl	%esi
848	xorl	%eax,%eax
849	GETCURPCB(%edx)
850	movl	%eax,PCB_ONFAULT(%edx)
851	ret
852
853	ALIGN_TEXT
854copyin_fault:
855	popl	%edi
856	popl	%esi
857	GETCURPCB(%edx)
858	movl	$0,PCB_ONFAULT(%edx)
859	movl	$EFAULT,%eax
860	ret
861
862#if defined(I586_CPU) && NNPX > 0 && !defined(SMP)
863ENTRY(i586_copyin)
864	/*
865	 * Duplicated from generic_copyin.  Could be done a bit better.
866	 */
867	movl	_curpcb,%eax
868	movl	$copyin_fault,PCB_ONFAULT(%eax)
869	pushl	%esi
870	pushl	%edi
871	movl	12(%esp),%esi			/* caddr_t from */
872	movl	16(%esp),%edi			/* caddr_t to */
873	movl	20(%esp),%ecx			/* size_t  len */
874
875	/*
876	 * make sure address is valid
877	 */
878	movl	%esi,%edx
879	addl	%ecx,%edx
880	jc	copyin_fault
881	cmpl	$VM_MAXUSER_ADDRESS,%edx
882	ja	copyin_fault
883	/*
884	 * End of duplicated code.
885	 */
886
887	cmpl	$1024,%ecx
888	jb	slow_copyin
889
890	pushl	%ebx			/* XXX prepare for fastmove_fault */
891	pushl	%ecx
892	call	_fastmove
893	addl	$8,%esp
894	jmp	done_copyin
895#endif /* I586_CPU && NNPX > 0 */
896
897#if defined(I586_CPU) && NNPX > 0 && !defined(SMP)
898/* fastmove(src, dst, len)
899	src in %esi
900	dst in %edi
901	len in %ecx		XXX changed to on stack for profiling
902	uses %eax and %edx for tmp. storage
903 */
904/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
905ENTRY(fastmove)
906	pushl	%ebp
907	movl	%esp,%ebp
908	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
909
910	movl	8(%ebp),%ecx
911	cmpl	$63,%ecx
912	jbe	fastmove_tail
913
914	testl	$7,%esi	/* check if src addr is multiple of 8 */
915	jnz	fastmove_tail
916
917	testl	$7,%edi	/* check if dst addr is multiple of 8 */
918	jnz	fastmove_tail
919
920/* if (npxproc != NULL) { */
921	cmpl	$0,_npxproc
922	je	6f
923/*    fnsave(&curpcb->pcb_savefpu); */
924	movl	_curpcb,%eax
925	fnsave	PCB_SAVEFPU(%eax)
926/*   npxproc = NULL; */
927	movl	$0,_npxproc
928/* } */
9296:
930/* now we own the FPU. */
931
932/*
933 * The process' FP state is saved in the pcb, but if we get
934 * switched, the cpu_switch() will store our FP state in the
935 * pcb.  It should be possible to avoid all the copying for
936 * this, e.g., by setting a flag to tell cpu_switch() to
937 * save the state somewhere else.
938 */
939/* tmp = curpcb->pcb_savefpu; */
940	movl	%ecx,-12(%ebp)
941	movl	%esi,-8(%ebp)
942	movl	%edi,-4(%ebp)
943	movl	%esp,%edi
944	movl	_curpcb,%esi
945	addl	$PCB_SAVEFPU,%esi
946	cld
947	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
948	rep
949	movsl
950	movl	-12(%ebp),%ecx
951	movl	-8(%ebp),%esi
952	movl	-4(%ebp),%edi
953/* stop_emulating(); */
954	clts
955/* npxproc = curproc; */
956	movl	_curproc,%eax
957	movl	%eax,_npxproc
958	movl	_curpcb,%eax
959	movl	$fastmove_fault,PCB_ONFAULT(%eax)
9604:
961	movl	%ecx,-12(%ebp)
962	cmpl	$1792,%ecx
963	jbe	2f
964	movl	$1792,%ecx
9652:
966	subl	%ecx,-12(%ebp)
967	cmpl	$256,%ecx
968	jb	5f
969	movl	%ecx,-8(%ebp)
970	movl	%esi,-4(%ebp)
971	ALIGN_TEXT
9723:
973	movl	0(%esi),%eax
974	movl	32(%esi),%eax
975	movl	64(%esi),%eax
976	movl	96(%esi),%eax
977	movl	128(%esi),%eax
978	movl	160(%esi),%eax
979	movl	192(%esi),%eax
980	movl	224(%esi),%eax
981	addl	$256,%esi
982	subl	$256,%ecx
983	cmpl	$256,%ecx
984	jae	3b
985	movl	-8(%ebp),%ecx
986	movl	-4(%ebp),%esi
9875:
988	ALIGN_TEXT
989fastmove_loop:
990	fildq	0(%esi)
991	fildq	8(%esi)
992	fildq	16(%esi)
993	fildq	24(%esi)
994	fildq	32(%esi)
995	fildq	40(%esi)
996	fildq	48(%esi)
997	fildq	56(%esi)
998	fistpq	56(%edi)
999	fistpq	48(%edi)
1000	fistpq	40(%edi)
1001	fistpq	32(%edi)
1002	fistpq	24(%edi)
1003	fistpq	16(%edi)
1004	fistpq	8(%edi)
1005	fistpq	0(%edi)
1006	addl	$-64,%ecx
1007	addl	$64,%esi
1008	addl	$64,%edi
1009	cmpl	$63,%ecx
1010	ja	fastmove_loop
1011	movl	-12(%ebp),%eax
1012	addl	%eax,%ecx
1013	cmpl	$64,%ecx
1014	jae	4b
1015
1016/* curpcb->pcb_savefpu = tmp; */
1017	movl	%ecx,-12(%ebp)
1018	movl	%esi,-8(%ebp)
1019	movl	%edi,-4(%ebp)
1020	movl	_curpcb,%edi
1021	addl	$PCB_SAVEFPU,%edi
1022	movl	%esp,%esi
1023	cld
1024	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1025	rep
1026	movsl
1027	movl	-12(%ebp),%ecx
1028	movl	-8(%ebp),%esi
1029	movl	-4(%ebp),%edi
1030
1031/* start_emulating(); */
1032	smsw	%ax
1033	orb	$CR0_TS,%al
1034	lmsw	%ax
1035/* npxproc = NULL; */
1036	movl	$0,_npxproc
1037
1038	ALIGN_TEXT
1039fastmove_tail:
1040	movl	_curpcb,%eax
1041	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1042
1043	movb	%cl,%al
1044	shrl	$2,%ecx				/* copy longword-wise */
1045	cld
1046	rep
1047	movsl
1048	movb	%al,%cl
1049	andb	$3,%cl				/* copy remaining bytes */
1050	rep
1051	movsb
1052
1053	movl	%ebp,%esp
1054	popl	%ebp
1055	ret
1056
1057	ALIGN_TEXT
1058fastmove_fault:
1059	movl	_curpcb,%edi
1060	addl	$PCB_SAVEFPU,%edi
1061	movl	%esp,%esi
1062	cld
1063	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1064	rep
1065	movsl
1066
1067	smsw	%ax
1068	orb	$CR0_TS,%al
1069	lmsw	%ax
1070	movl	$0,_npxproc
1071
1072fastmove_tail_fault:
1073	movl	%ebp,%esp
1074	popl	%ebp
1075	addl	$8,%esp
1076	popl	%ebx
1077	popl	%edi
1078	popl	%esi
1079	movl	_curpcb,%edx
1080	movl	$0,PCB_ONFAULT(%edx)
1081	movl	$EFAULT,%eax
1082	ret
1083#endif /* I586_CPU && NNPX > 0 */
1084
1085/*
1086 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory
1087 */
1088ENTRY(fuword)
1089	GETCURPCB(%ecx)
1090	movl	$fusufault,PCB_ONFAULT(%ecx)
1091	movl	4(%esp),%edx			/* from */
1092
1093	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1094	ja	fusufault
1095
1096	movl	(%edx),%eax
1097	movl	$0,PCB_ONFAULT(%ecx)
1098	ret
1099
1100/*
1101 * These two routines are called from the profiling code, potentially
1102 * at interrupt time. If they fail, that's okay, good things will
1103 * happen later. Fail all the time for now - until the trap code is
1104 * able to deal with this.
1105 */
1106ALTENTRY(suswintr)
1107ENTRY(fuswintr)
1108	movl	$-1,%eax
1109	ret
1110
1111ENTRY(fusword)
1112	GETCURPCB(%ecx)
1113	movl	$fusufault,PCB_ONFAULT(%ecx)
1114	movl	4(%esp),%edx
1115
1116	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1117	ja	fusufault
1118
1119	movzwl	(%edx),%eax
1120	movl	$0,PCB_ONFAULT(%ecx)
1121	ret
1122
1123ENTRY(fubyte)
1124	GETCURPCB(%ecx)
1125	movl	$fusufault,PCB_ONFAULT(%ecx)
1126	movl	4(%esp),%edx
1127
1128	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1129	ja	fusufault
1130
1131	movzbl	(%edx),%eax
1132	movl	$0,PCB_ONFAULT(%ecx)
1133	ret
1134
1135	ALIGN_TEXT
1136fusufault:
1137	GETCURPCB(%ecx)
1138	xorl	%eax,%eax
1139	movl	%eax,PCB_ONFAULT(%ecx)
1140	decl	%eax
1141	ret
1142
1143/*
1144 * su{byte,sword,word}: write a byte (word, longword) to user memory
1145 */
1146ENTRY(suword)
1147	GETCURPCB(%ecx)
1148	movl	$fusufault,PCB_ONFAULT(%ecx)
1149	movl	4(%esp),%edx
1150
1151#if defined(I386_CPU)
1152
1153#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1154	cmpl	$CPUCLASS_386,_cpu_class
1155	jne	2f				/* we only have to set the right segment selector */
1156#endif /* I486_CPU || I586_CPU || I686_CPU */
1157
1158	/* XXX - page boundary crossing is still not handled */
1159	movl	%edx,%eax
1160	shrl	$IDXSHIFT,%edx
1161	andb	$0xfc,%dl
1162
1163	leal	_PTmap(%edx),%ecx
1164	shrl	$IDXSHIFT,%ecx
1165	andb	$0xfc,%cl
1166	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1167	je	4f
1168	movb	_PTmap(%edx),%dl
1169	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1170	cmpb	$PG_V|PG_RW|PG_U,%dl
1171	je	1f
1172
11734:
1174	/* simulate a trap */
1175	pushl	%eax
1176	call	_trapwrite
1177	popl	%edx				/* remove junk parameter from stack */
1178	testl	%eax,%eax
1179	jnz	fusufault
11801:
1181	movl	4(%esp),%edx
1182#endif
1183
11842:
1185	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1186	ja	fusufault
1187
1188	movl	8(%esp),%eax
1189	movl	%eax,(%edx)
1190	xorl	%eax,%eax
1191	GETCURPCB(%ecx)
1192	movl	%eax,PCB_ONFAULT(%ecx)
1193	ret
1194
1195ENTRY(susword)
1196	GETCURPCB(%ecx)
1197	movl	$fusufault,PCB_ONFAULT(%ecx)
1198	movl	4(%esp),%edx
1199
1200#if defined(I386_CPU)
1201
1202#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1203	cmpl	$CPUCLASS_386,_cpu_class
1204	jne	2f
1205#endif /* I486_CPU || I586_CPU || I686_CPU */
1206
1207	/* XXX - page boundary crossing is still not handled */
1208	movl	%edx,%eax
1209	shrl	$IDXSHIFT,%edx
1210	andb	$0xfc,%dl
1211
1212	leal	_PTmap(%edx),%ecx
1213	shrl	$IDXSHIFT,%ecx
1214	andb	$0xfc,%cl
1215	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1216	je	4f
1217	movb	_PTmap(%edx),%dl
1218	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1219	cmpb	$PG_V|PG_RW|PG_U,%dl
1220	je	1f
1221
12224:
1223	/* simulate a trap */
1224	pushl	%eax
1225	call	_trapwrite
1226	popl	%edx				/* remove junk parameter from stack */
1227	testl	%eax,%eax
1228	jnz	fusufault
12291:
1230	movl	4(%esp),%edx
1231#endif
1232
12332:
1234	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1235	ja	fusufault
1236
1237	movw	8(%esp),%ax
1238	movw	%ax,(%edx)
1239	xorl	%eax,%eax
1240	GETCURPCB(%ecx)				/* restore trashed register */
1241	movl	%eax,PCB_ONFAULT(%ecx)
1242	ret
1243
1244ALTENTRY(suibyte)
1245ENTRY(subyte)
1246	GETCURPCB(%ecx)
1247	movl	$fusufault,PCB_ONFAULT(%ecx)
1248	movl	4(%esp),%edx
1249
1250#if defined(I386_CPU)
1251
1252#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1253	cmpl	$CPUCLASS_386,_cpu_class
1254	jne	2f
1255#endif /* I486_CPU || I586_CPU || I686_CPU */
1256
1257	movl	%edx,%eax
1258	shrl	$IDXSHIFT,%edx
1259	andb	$0xfc,%dl
1260
1261	leal	_PTmap(%edx),%ecx
1262	shrl	$IDXSHIFT,%ecx
1263	andb	$0xfc,%cl
1264	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1265	je	4f
1266	movb	_PTmap(%edx),%dl
1267	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1268	cmpb	$PG_V|PG_RW|PG_U,%dl
1269	je	1f
1270
12714:
1272	/* simulate a trap */
1273	pushl	%eax
1274	call	_trapwrite
1275	popl	%edx				/* remove junk parameter from stack */
1276	testl	%eax,%eax
1277	jnz	fusufault
12781:
1279	movl	4(%esp),%edx
1280#endif
1281
12822:
1283	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1284	ja	fusufault
1285
1286	movb	8(%esp),%al
1287	movb	%al,(%edx)
1288	xorl	%eax,%eax
1289	GETCURPCB(%ecx)				/* restore trashed register */
1290	movl	%eax,PCB_ONFAULT(%ecx)
1291	ret
1292
1293/*
1294 * copyinstr(from, to, maxlen, int *lencopied)
1295 *	copy a string from from to to, stop when a 0 character is reached.
1296 *	return ENAMETOOLONG if string is longer than maxlen, and
1297 *	EFAULT on protection violations. If lencopied is non-zero,
1298 *	return the actual length in *lencopied.
1299 */
1300ENTRY(copyinstr)
1301	pushl	%esi
1302	pushl	%edi
1303	GETCURPCB(%ecx)
1304	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1305
1306	movl	12(%esp),%esi			/* %esi = from */
1307	movl	16(%esp),%edi			/* %edi = to */
1308	movl	20(%esp),%edx			/* %edx = maxlen */
1309
1310	movl	$VM_MAXUSER_ADDRESS,%eax
1311
1312	/* make sure 'from' is within bounds */
1313	subl	%esi,%eax
1314	jbe	cpystrflt
1315
1316	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1317	cmpl	%edx,%eax
1318	jae	1f
1319	movl	%eax,%edx
1320	movl	%eax,20(%esp)
13211:
1322	incl	%edx
1323	cld
1324
13252:
1326	decl	%edx
1327	jz	3f
1328
1329	lodsb
1330	stosb
1331	orb	%al,%al
1332	jnz	2b
1333
1334	/* Success -- 0 byte reached */
1335	decl	%edx
1336	xorl	%eax,%eax
1337	jmp	cpystrflt_x
13383:
1339	/* edx is zero - return ENAMETOOLONG or EFAULT */
1340	cmpl	$VM_MAXUSER_ADDRESS,%esi
1341	jae	cpystrflt
13424:
1343	movl	$ENAMETOOLONG,%eax
1344	jmp	cpystrflt_x
1345
1346cpystrflt:
1347	movl	$EFAULT,%eax
1348
1349cpystrflt_x:
1350	/* set *lencopied and return %eax */
1351	GETCURPCB(%ecx)
1352	movl	$0,PCB_ONFAULT(%ecx)
1353	movl	20(%esp),%ecx
1354	subl	%edx,%ecx
1355	movl	24(%esp),%edx
1356	testl	%edx,%edx
1357	jz	1f
1358	movl	%ecx,(%edx)
13591:
1360	popl	%edi
1361	popl	%esi
1362	ret
1363
1364
1365/*
1366 * copystr(from, to, maxlen, int *lencopied)
1367 */
1368ENTRY(copystr)
1369	pushl	%esi
1370	pushl	%edi
1371
1372	movl	12(%esp),%esi			/* %esi = from */
1373	movl	16(%esp),%edi			/* %edi = to */
1374	movl	20(%esp),%edx			/* %edx = maxlen */
1375	incl	%edx
1376	cld
13771:
1378	decl	%edx
1379	jz	4f
1380	lodsb
1381	stosb
1382	orb	%al,%al
1383	jnz	1b
1384
1385	/* Success -- 0 byte reached */
1386	decl	%edx
1387	xorl	%eax,%eax
1388	jmp	6f
13894:
1390	/* edx is zero -- return ENAMETOOLONG */
1391	movl	$ENAMETOOLONG,%eax
1392
13936:
1394	/* set *lencopied and return %eax */
1395	movl	20(%esp),%ecx
1396	subl	%edx,%ecx
1397	movl	24(%esp),%edx
1398	testl	%edx,%edx
1399	jz	7f
1400	movl	%ecx,(%edx)
14017:
1402	popl	%edi
1403	popl	%esi
1404	ret
1405
1406ENTRY(bcmp)
1407	pushl	%edi
1408	pushl	%esi
1409	movl	12(%esp),%edi
1410	movl	16(%esp),%esi
1411	movl	20(%esp),%edx
1412	xorl	%eax,%eax
1413
1414	movl	%edx,%ecx
1415	shrl	$2,%ecx
1416	cld					/* compare forwards */
1417	repe
1418	cmpsl
1419	jne	1f
1420
1421	movl	%edx,%ecx
1422	andl	$3,%ecx
1423	repe
1424	cmpsb
1425	je	2f
14261:
1427	incl	%eax
14282:
1429	popl	%esi
1430	popl	%edi
1431	ret
1432
1433
1434/*
1435 * Handling of special 386 registers and descriptor tables etc
1436 */
1437/* void lgdt(struct region_descriptor *rdp); */
1438ENTRY(lgdt)
1439	/* reload the descriptor table */
1440	movl	4(%esp),%eax
1441	lgdt	(%eax)
1442
1443	/* flush the prefetch q */
1444	jmp	1f
1445	nop
14461:
1447	/* reload "stale" selectors */
1448	movl	$KDSEL,%eax
1449	movl	%ax,%ds
1450	movl	%ax,%es
1451	movl	%ax,%fs
1452	movl	%ax,%gs
1453	movl	%ax,%ss
1454
1455	/* reload code selector by turning return into intersegmental return */
1456	movl	(%esp),%eax
1457	pushl	%eax
1458	movl	$KCSEL,4(%esp)
1459	lret
1460
1461/*
1462 * void lidt(struct region_descriptor *rdp);
1463 */
1464ENTRY(lidt)
1465	movl	4(%esp),%eax
1466	lidt	(%eax)
1467	ret
1468
1469/*
1470 * void lldt(u_short sel)
1471 */
1472ENTRY(lldt)
1473	lldt	4(%esp)
1474	ret
1475
1476/*
1477 * void ltr(u_short sel)
1478 */
1479ENTRY(ltr)
1480	ltr	4(%esp)
1481	ret
1482
1483/* ssdtosd(*ssdp,*sdp) */
1484ENTRY(ssdtosd)
1485	pushl	%ebx
1486	movl	8(%esp),%ecx
1487	movl	8(%ecx),%ebx
1488	shll	$16,%ebx
1489	movl	(%ecx),%edx
1490	roll	$16,%edx
1491	movb	%dh,%bl
1492	movb	%dl,%bh
1493	rorl	$8,%ebx
1494	movl	4(%ecx),%eax
1495	movw	%ax,%dx
1496	andl	$0xf0000,%eax
1497	orl	%eax,%ebx
1498	movl	12(%esp),%ecx
1499	movl	%edx,(%ecx)
1500	movl	%ebx,4(%ecx)
1501	popl	%ebx
1502	ret
1503
1504/* load_cr0(cr0) */
1505ENTRY(load_cr0)
1506	movl	4(%esp),%eax
1507	movl	%eax,%cr0
1508	ret
1509
1510/* rcr0() */
1511ENTRY(rcr0)
1512	movl	%cr0,%eax
1513	ret
1514
1515/* rcr3() */
1516ENTRY(rcr3)
1517	movl	%cr3,%eax
1518	ret
1519
1520/* void load_cr3(caddr_t cr3) */
1521ENTRY(load_cr3)
1522	movl	4(%esp),%eax
1523	movl	%eax,%cr3
1524	ret
1525
1526
1527/*****************************************************************************/
1528/* setjump, longjump                                                         */
1529/*****************************************************************************/
1530
1531ENTRY(setjmp)
1532	movl	4(%esp),%eax
1533	movl	%ebx,(%eax)			/* save ebx */
1534	movl	%esp,4(%eax)			/* save esp */
1535	movl	%ebp,8(%eax)			/* save ebp */
1536	movl	%esi,12(%eax)			/* save esi */
1537	movl	%edi,16(%eax)			/* save edi */
1538	movl	(%esp),%edx			/* get rta */
1539	movl	%edx,20(%eax)			/* save eip */
1540	xorl	%eax,%eax			/* return(0); */
1541	ret
1542
1543ENTRY(longjmp)
1544	movl	4(%esp),%eax
1545	movl	(%eax),%ebx			/* restore ebx */
1546	movl	4(%eax),%esp			/* restore esp */
1547	movl	8(%eax),%ebp			/* restore ebp */
1548	movl	12(%eax),%esi			/* restore esi */
1549	movl	16(%eax),%edi			/* restore edi */
1550	movl	20(%eax),%edx			/* get rta */
1551	movl	%edx,(%esp)			/* put in return frame */
1552	xorl	%eax,%eax			/* return(1); */
1553	incl	%eax
1554	ret
1555
1556/*
1557 * Here for doing BB-profiling (gcc -a).
1558 * We rely on the "bbset" instead, but need a dummy function.
1559 */
1560NON_GPROF_ENTRY(__bb_init_func)
1561	movl	4(%esp),%eax
1562	movl	$1,(%eax)
1563	.byte	0xc3				/* avoid macro for `ret' */
1564