support.s revision 29041
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	$Id: support.s,v 1.56 1997/08/09 00:02:44 dyson Exp $
34 */
35
36#include "npx.h"
37
38#include <machine/asmacros.h>
39#include <machine/cputypes.h>
40#include <machine/pmap.h>
41#include <machine/specialreg.h>
42
43#include "assym.s"
44
45#define KDSEL		0x10			/* kernel data selector */
46#define KCSEL		0x8			/* kernel code selector */
47#define IDXSHIFT	10
48
49	.data
50	.globl	_bcopy_vector
51_bcopy_vector:
52	.long	_generic_bcopy
53	.globl	_bzero
54_bzero:
55	.long	_generic_bzero
56	.globl	_copyin_vector
57_copyin_vector:
58	.long	_generic_copyin
59	.globl	_copyout_vector
60_copyout_vector:
61	.long	_generic_copyout
62	.globl	_ovbcopy_vector
63_ovbcopy_vector:
64	.long	_generic_bcopy
65#if defined(I586_CPU) && NNPX > 0
66kernel_fpu_lock:
67	.byte	0xfe
68	.space	3
69#endif
70
71	.text
72
73/*
74 * bcopy family
75 * void bzero(void *buf, u_int len)
76 */
77
78ENTRY(generic_bzero)
79	pushl	%edi
80	movl	8(%esp),%edi
81	movl	12(%esp),%ecx
82	xorl	%eax,%eax
83	shrl	$2,%ecx
84	cld
85	rep
86	stosl
87	movl	12(%esp),%ecx
88	andl	$3,%ecx
89	rep
90	stosb
91	popl	%edi
92	ret
93
94#if defined(I486_CPU)
95ENTRY(i486_bzero)
96	movl	4(%esp),%edx
97	movl	8(%esp),%ecx
98	xorl	%eax,%eax
99/*
100 * do 64 byte chunks first
101 *
102 * XXX this is probably over-unrolled at least for DX2's
103 */
1042:
105	cmpl	$64,%ecx
106	jb	3f
107	movl	%eax,(%edx)
108	movl	%eax,4(%edx)
109	movl	%eax,8(%edx)
110	movl	%eax,12(%edx)
111	movl	%eax,16(%edx)
112	movl	%eax,20(%edx)
113	movl	%eax,24(%edx)
114	movl	%eax,28(%edx)
115	movl	%eax,32(%edx)
116	movl	%eax,36(%edx)
117	movl	%eax,40(%edx)
118	movl	%eax,44(%edx)
119	movl	%eax,48(%edx)
120	movl	%eax,52(%edx)
121	movl	%eax,56(%edx)
122	movl	%eax,60(%edx)
123	addl	$64,%edx
124	subl	$64,%ecx
125	jnz	2b
126	ret
127
128/*
129 * do 16 byte chunks
130 */
131	SUPERALIGN_TEXT
1323:
133	cmpl	$16,%ecx
134	jb	4f
135	movl	%eax,(%edx)
136	movl	%eax,4(%edx)
137	movl	%eax,8(%edx)
138	movl	%eax,12(%edx)
139	addl	$16,%edx
140	subl	$16,%ecx
141	jnz	3b
142	ret
143
144/*
145 * do 4 byte chunks
146 */
147	SUPERALIGN_TEXT
1484:
149	cmpl	$4,%ecx
150	jb	5f
151	movl	%eax,(%edx)
152	addl	$4,%edx
153	subl	$4,%ecx
154	jnz	4b
155	ret
156
157/*
158 * do 1 byte chunks
159 * a jump table seems to be faster than a loop or more range reductions
160 *
161 * XXX need a const section for non-text
162 */
163	.data
164jtab:
165	.long	do0
166	.long	do1
167	.long	do2
168	.long	do3
169
170	.text
171	SUPERALIGN_TEXT
1725:
173	jmp	jtab(,%ecx,4)
174
175	SUPERALIGN_TEXT
176do3:
177	movw	%ax,(%edx)
178	movb	%al,2(%edx)
179	ret
180
181	SUPERALIGN_TEXT
182do2:
183	movw	%ax,(%edx)
184	ret
185
186	SUPERALIGN_TEXT
187do1:
188	movb	%al,(%edx)
189	ret
190
191	SUPERALIGN_TEXT
192do0:
193	ret
194#endif
195
196#if defined(I586_CPU) && NNPX > 0
197ENTRY(i586_bzero)
198	movl	4(%esp),%edx
199	movl	8(%esp),%ecx
200
201	/*
202	 * The FPU register method is twice as fast as the integer register
203	 * method unless the target is in the L1 cache and we pre-allocate a
204	 * cache line for it (then the integer register method is 4-5 times
205	 * faster).  However, we never pre-allocate cache lines, since that
206	 * would make the integer method 25% or more slower for the common
207	 * case when the target isn't in either the L1 cache or the L2 cache.
208	 * Thus we normally use the FPU register method unless the overhead
209	 * would be too large.
210	 */
211	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
212	jb	intreg_i586_bzero
213
214	/*
215	 * The FPU registers may belong to an application or to fastmove()
216	 * or to another invocation of bcopy() or ourself in a higher level
217	 * interrupt or trap handler.  Preserving the registers is
218	 * complicated since we avoid it if possible at all levels.  We
219	 * want to localize the complications even when that increases them.
220	 * Here the extra work involves preserving CR0_TS in TS.
221	 * `npxproc != NULL' is supposed to be the condition that all the
222	 * FPU resources belong to an application, but npxproc and CR0_TS
223	 * aren't set atomically enough for this condition to work in
224	 * interrupt handlers.
225	 *
226	 * Case 1: FPU registers belong to the application: we must preserve
227	 * the registers if we use them, so we only use the FPU register
228	 * method if the target size is large enough to amortize the extra
229	 * overhead for preserving them.  CR0_TS must be preserved although
230	 * it is very likely to end up as set.
231	 *
232	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
233	 * makes the registers look like they belong to an application so
234	 * that cpu_switch() and savectx() don't have to know about it, so
235	 * this case reduces to case 1.
236	 *
237	 * Case 3: FPU registers belong to the kernel: don't use the FPU
238	 * register method.  This case is unlikely, and supporting it would
239	 * be more complicated and might take too much stack.
240	 *
241	 * Case 4: FPU registers don't belong to anyone: the FPU registers
242	 * don't need to be preserved, so we always use the FPU register
243	 * method.  CR0_TS must be preserved although it is very likely to
244	 * always end up as clear.
245	 */
246	cmpl	$0,_npxproc
247	je	i586_bz1
248	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
249	jb	intreg_i586_bzero
250	sarb	$1,kernel_fpu_lock
251	jc	intreg_i586_bzero
252	smsw	%ax
253	clts
254	subl	$108,%esp
255	fnsave	0(%esp)
256	jmp	i586_bz2
257
258i586_bz1:
259	sarb	$1,kernel_fpu_lock
260	jc	intreg_i586_bzero
261	smsw	%ax
262	clts
263	fninit				/* XXX should avoid needing this */
264i586_bz2:
265	fldz
266
267	/*
268	 * Align to an 8 byte boundary (misalignment in the main loop would
269	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
270	 * already aligned) by always zeroing 8 bytes and using the part up
271	 * to the _next_ alignment position.
272	 */
273	fstl	0(%edx)
274	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
275	addl	$8,%edx
276	andl	$~7,%edx
277	subl	%edx,%ecx
278
279	/*
280	 * Similarly align `len' to a multiple of 8.
281	 */
282	fstl	-8(%edx,%ecx)
283	decl	%ecx
284	andl	$~7,%ecx
285
286	/*
287	 * This wouldn't be any faster if it were unrolled, since the loop
288	 * control instructions are much faster than the fstl and/or done
289	 * in parallel with it so their overhead is insignificant.
290	 */
291fpureg_i586_bzero_loop:
292	fstl	0(%edx)
293	addl	$8,%edx
294	subl	$8,%ecx
295	cmpl	$8,%ecx
296	jae	fpureg_i586_bzero_loop
297
298	cmpl	$0,_npxproc
299	je	i586_bz3
300	frstor	0(%esp)
301	addl	$108,%esp
302	lmsw	%ax
303	movb	$0xfe,kernel_fpu_lock
304	ret
305
306i586_bz3:
307	fstpl	%st(0)
308	lmsw	%ax
309	movb	$0xfe,kernel_fpu_lock
310	ret
311
312intreg_i586_bzero:
313	/*
314	 * `rep stos' seems to be the best method in practice for small
315	 * counts.  Fancy methods usually take too long to start up due
316	 * to cache and BTB misses.
317	 */
318	pushl	%edi
319	movl	%edx,%edi
320	xorl	%eax,%eax
321	shrl	$2,%ecx
322	cld
323	rep
324	stosl
325	movl	12(%esp),%ecx
326	andl	$3,%ecx
327	jne	1f
328	popl	%edi
329	ret
330
3311:
332	rep
333	stosb
334	popl	%edi
335	ret
336#endif /* I586_CPU && NNPX > 0 */
337
338/* fillw(pat, base, cnt) */
339ENTRY(fillw)
340	pushl	%edi
341	movl	8(%esp),%eax
342	movl	12(%esp),%edi
343	movl	16(%esp),%ecx
344	cld
345	rep
346	stosw
347	popl	%edi
348	ret
349
350ENTRY(bcopyb)
351bcopyb:
352	pushl	%esi
353	pushl	%edi
354	movl	12(%esp),%esi
355	movl	16(%esp),%edi
356	movl	20(%esp),%ecx
357	movl	%edi,%eax
358	subl	%esi,%eax
359	cmpl	%ecx,%eax			/* overlapping && src < dst? */
360	jb	1f
361	cld					/* nope, copy forwards */
362	rep
363	movsb
364	popl	%edi
365	popl	%esi
366	ret
367
368	ALIGN_TEXT
3691:
370	addl	%ecx,%edi			/* copy backwards. */
371	addl	%ecx,%esi
372	decl	%edi
373	decl	%esi
374	std
375	rep
376	movsb
377	popl	%edi
378	popl	%esi
379	cld
380	ret
381
382ENTRY(bcopy)
383	MEXITCOUNT
384	jmp	*_bcopy_vector
385
386ENTRY(ovbcopy)
387	MEXITCOUNT
388	jmp	*_ovbcopy_vector
389
390/*
391 * generic_bcopy(src, dst, cnt)
392 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
393 */
394ENTRY(generic_bcopy)
395	pushl	%esi
396	pushl	%edi
397	movl	12(%esp),%esi
398	movl	16(%esp),%edi
399	movl	20(%esp),%ecx
400
401	movl	%edi,%eax
402	subl	%esi,%eax
403	cmpl	%ecx,%eax			/* overlapping && src < dst? */
404	jb	1f
405
406	shrl	$2,%ecx				/* copy by 32-bit words */
407	cld					/* nope, copy forwards */
408	rep
409	movsl
410	movl	20(%esp),%ecx
411	andl	$3,%ecx				/* any bytes left? */
412	rep
413	movsb
414	popl	%edi
415	popl	%esi
416	ret
417
418	ALIGN_TEXT
4191:
420	addl	%ecx,%edi			/* copy backwards */
421	addl	%ecx,%esi
422	decl	%edi
423	decl	%esi
424	andl	$3,%ecx				/* any fractional bytes? */
425	std
426	rep
427	movsb
428	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
429	shrl	$2,%ecx
430	subl	$3,%esi
431	subl	$3,%edi
432	rep
433	movsl
434	popl	%edi
435	popl	%esi
436	cld
437	ret
438
439#if defined(I586_CPU) && NNPX > 0
440ENTRY(i586_bcopy)
441	pushl	%esi
442	pushl	%edi
443	movl	12(%esp),%esi
444	movl	16(%esp),%edi
445	movl	20(%esp),%ecx
446
447	movl	%edi,%eax
448	subl	%esi,%eax
449	cmpl	%ecx,%eax			/* overlapping && src < dst? */
450	jb	1f
451
452	cmpl	$1024,%ecx
453	jb	small_i586_bcopy
454
455	sarb	$1,kernel_fpu_lock
456	jc	small_i586_bcopy
457	cmpl	$0,_npxproc
458	je	i586_bc1
459	smsw	%dx
460	clts
461	subl	$108,%esp
462	fnsave	0(%esp)
463	jmp	4f
464
465i586_bc1:
466	smsw	%dx
467	clts
468	fninit				/* XXX should avoid needing this */
469
470	ALIGN_TEXT
4714:
472	pushl	%ecx
473#define	DCACHE_SIZE	8192
474	cmpl	$(DCACHE_SIZE-512)/2,%ecx
475	jbe	2f
476	movl	$(DCACHE_SIZE-512)/2,%ecx
4772:
478	subl	%ecx,0(%esp)
479	cmpl	$256,%ecx
480	jb	5f			/* XXX should prefetch if %ecx >= 32 */
481	pushl	%esi
482	pushl	%ecx
483	ALIGN_TEXT
4843:
485	movl	0(%esi),%eax
486	movl	32(%esi),%eax
487	movl	64(%esi),%eax
488	movl	96(%esi),%eax
489	movl	128(%esi),%eax
490	movl	160(%esi),%eax
491	movl	192(%esi),%eax
492	movl	224(%esi),%eax
493	addl	$256,%esi
494	subl	$256,%ecx
495	cmpl	$256,%ecx
496	jae	3b
497	popl	%ecx
498	popl	%esi
4995:
500	ALIGN_TEXT
501large_i586_bcopy_loop:
502	fildq	0(%esi)
503	fildq	8(%esi)
504	fildq	16(%esi)
505	fildq	24(%esi)
506	fildq	32(%esi)
507	fildq	40(%esi)
508	fildq	48(%esi)
509	fildq	56(%esi)
510	fistpq	56(%edi)
511	fistpq	48(%edi)
512	fistpq	40(%edi)
513	fistpq	32(%edi)
514	fistpq	24(%edi)
515	fistpq	16(%edi)
516	fistpq	8(%edi)
517	fistpq	0(%edi)
518	addl	$64,%esi
519	addl	$64,%edi
520	subl	$64,%ecx
521	cmpl	$64,%ecx
522	jae	large_i586_bcopy_loop
523	popl	%eax
524	addl	%eax,%ecx
525	cmpl	$64,%ecx
526	jae	4b
527
528	cmpl	$0,_npxproc
529	je	i586_bc2
530	frstor	0(%esp)
531	addl	$108,%esp
532i586_bc2:
533	lmsw	%dx
534	movb	$0xfe,kernel_fpu_lock
535
536/*
537 * This is a duplicate of the main part of generic_bcopy.  See the comments
538 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
539 * would mess up high resolution profiling.
540 */
541	ALIGN_TEXT
542small_i586_bcopy:
543	shrl	$2,%ecx
544	cld
545	rep
546	movsl
547	movl	20(%esp),%ecx
548	andl	$3,%ecx
549	rep
550	movsb
551	popl	%edi
552	popl	%esi
553	ret
554
555	ALIGN_TEXT
5561:
557	addl	%ecx,%edi
558	addl	%ecx,%esi
559	decl	%edi
560	decl	%esi
561	andl	$3,%ecx
562	std
563	rep
564	movsb
565	movl	20(%esp),%ecx
566	shrl	$2,%ecx
567	subl	$3,%esi
568	subl	$3,%edi
569	rep
570	movsl
571	popl	%edi
572	popl	%esi
573	cld
574	ret
575#endif /* I586_CPU && NNPX > 0 */
576
577/*
578 * Note: memcpy does not support overlapping copies
579 */
580ENTRY(memcpy)
581	pushl	%edi
582	pushl	%esi
583	movl	12(%esp),%edi
584	movl	16(%esp),%esi
585	movl	20(%esp),%ecx
586	movl	%edi,%eax
587	shrl	$2,%ecx				/* copy by 32-bit words */
588	cld					/* nope, copy forwards */
589	rep
590	movsl
591	movl	20(%esp),%ecx
592	andl	$3,%ecx				/* any bytes left? */
593	rep
594	movsb
595	popl	%esi
596	popl	%edi
597	ret
598
599
600/*****************************************************************************/
601/* copyout and fubyte family                                                 */
602/*****************************************************************************/
603/*
604 * Access user memory from inside the kernel. These routines and possibly
605 * the math- and DOS emulators should be the only places that do this.
606 *
607 * We have to access the memory with user's permissions, so use a segment
608 * selector with RPL 3. For writes to user space we have to additionally
609 * check the PTE for write permission, because the 386 does not check
610 * write permissions when we are executing with EPL 0. The 486 does check
611 * this if the WP bit is set in CR0, so we can use a simpler version here.
612 *
613 * These routines set curpcb->onfault for the time they execute. When a
614 * protection violation occurs inside the functions, the trap handler
615 * returns to *curpcb->onfault instead of the function.
616 */
617
618/* copyout(from_kernel, to_user, len) */
619ENTRY(copyout)
620	MEXITCOUNT
621	jmp	*_copyout_vector
622
623ENTRY(generic_copyout)
624	movl	_curpcb,%eax
625	movl	$copyout_fault,PCB_ONFAULT(%eax)
626	pushl	%esi
627	pushl	%edi
628	pushl	%ebx
629	movl	16(%esp),%esi
630	movl	20(%esp),%edi
631	movl	24(%esp),%ebx
632	testl	%ebx,%ebx			/* anything to do? */
633	jz	done_copyout
634
635	/*
636	 * Check explicitly for non-user addresses.  If 486 write protection
637	 * is being used, this check is essential because we are in kernel
638	 * mode so the h/w does not provide any protection against writing
639	 * kernel addresses.
640	 */
641
642	/*
643	 * First, prevent address wrapping.
644	 */
645	movl	%edi,%eax
646	addl	%ebx,%eax
647	jc	copyout_fault
648/*
649 * XXX STOP USING VM_MAXUSER_ADDRESS.
650 * It is an end address, not a max, so every time it is used correctly it
651 * looks like there is an off by one error, and of course it caused an off
652 * by one error in several places.
653 */
654	cmpl	$VM_MAXUSER_ADDRESS,%eax
655	ja	copyout_fault
656
657#if defined(I386_CPU)
658
659#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
660	cmpl	$CPUCLASS_386,_cpu_class
661	jne	3f
662#endif
663/*
664 * We have to check each PTE for user write permission.
665 * The checking may cause a page fault, so it is important to set
666 * up everything for return via copyout_fault before here.
667 */
668	/* compute number of pages */
669	movl	%edi,%ecx
670	andl	$PAGE_MASK,%ecx
671	addl	%ebx,%ecx
672	decl	%ecx
673	shrl	$IDXSHIFT+2,%ecx
674	incl	%ecx
675
676	/* compute PTE offset for start address */
677	movl	%edi,%edx
678	shrl	$IDXSHIFT,%edx
679	andb	$0xfc,%dl
680
6811:
682	/* check PTE for each page */
683	leal	_PTmap(%edx),%eax
684	shrl	$IDXSHIFT,%eax
685	andb	$0xfc,%al
686	testb	$PG_V,_PTmap(%eax)		/* PTE page must be valid */
687	je	4f
688	movb	_PTmap(%edx),%al
689	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
690	cmpb	$PG_V|PG_RW|PG_U,%al
691	je	2f
692
6934:
694	/* simulate a trap */
695	pushl	%edx
696	pushl	%ecx
697	shll	$IDXSHIFT,%edx
698	pushl	%edx
699	call	_trapwrite			/* trapwrite(addr) */
700	popl	%edx
701	popl	%ecx
702	popl	%edx
703
704	testl	%eax,%eax			/* if not ok, return EFAULT */
705	jnz	copyout_fault
706
7072:
708	addl	$4,%edx
709	decl	%ecx
710	jnz	1b				/* check next page */
711#endif /* I386_CPU */
712
713	/* bcopy(%esi, %edi, %ebx) */
7143:
715	movl	%ebx,%ecx
716
717#if defined(I586_CPU) && NNPX > 0
718	ALIGN_TEXT
719slow_copyout:
720#endif
721	shrl	$2,%ecx
722	cld
723	rep
724	movsl
725	movb	%bl,%cl
726	andb	$3,%cl
727	rep
728	movsb
729
730done_copyout:
731	popl	%ebx
732	popl	%edi
733	popl	%esi
734	xorl	%eax,%eax
735	movl	_curpcb,%edx
736	movl	%eax,PCB_ONFAULT(%edx)
737	ret
738
739	ALIGN_TEXT
740copyout_fault:
741	popl	%ebx
742	popl	%edi
743	popl	%esi
744	movl	_curpcb,%edx
745	movl	$0,PCB_ONFAULT(%edx)
746	movl	$EFAULT,%eax
747	ret
748
749#if defined(I586_CPU) && NNPX > 0
750ENTRY(i586_copyout)
751	/*
752	 * Duplicated from generic_copyout.  Could be done a bit better.
753	 */
754	movl	_curpcb,%eax
755	movl	$copyout_fault,PCB_ONFAULT(%eax)
756	pushl	%esi
757	pushl	%edi
758	pushl	%ebx
759	movl	16(%esp),%esi
760	movl	20(%esp),%edi
761	movl	24(%esp),%ebx
762	testl	%ebx,%ebx			/* anything to do? */
763	jz	done_copyout
764
765	/*
766	 * Check explicitly for non-user addresses.  If 486 write protection
767	 * is being used, this check is essential because we are in kernel
768	 * mode so the h/w does not provide any protection against writing
769	 * kernel addresses.
770	 */
771
772	/*
773	 * First, prevent address wrapping.
774	 */
775	movl	%edi,%eax
776	addl	%ebx,%eax
777	jc	copyout_fault
778/*
779 * XXX STOP USING VM_MAXUSER_ADDRESS.
780 * It is an end address, not a max, so every time it is used correctly it
781 * looks like there is an off by one error, and of course it caused an off
782 * by one error in several places.
783 */
784	cmpl	$VM_MAXUSER_ADDRESS,%eax
785	ja	copyout_fault
786
787	/* bcopy(%esi, %edi, %ebx) */
7883:
789	movl	%ebx,%ecx
790	/*
791	 * End of duplicated code.
792	 */
793
794	cmpl	$1024,%ecx
795	jb	slow_copyout
796
797	pushl	%ecx
798	call	_fastmove
799	addl	$4,%esp
800	jmp	done_copyout
801#endif /* I586_CPU && NNPX > 0 */
802
803/* copyin(from_user, to_kernel, len) */
804ENTRY(copyin)
805	MEXITCOUNT
806	jmp	*_copyin_vector
807
808ENTRY(generic_copyin)
809	movl	_curpcb,%eax
810	movl	$copyin_fault,PCB_ONFAULT(%eax)
811	pushl	%esi
812	pushl	%edi
813	movl	12(%esp),%esi			/* caddr_t from */
814	movl	16(%esp),%edi			/* caddr_t to */
815	movl	20(%esp),%ecx			/* size_t  len */
816
817	/*
818	 * make sure address is valid
819	 */
820	movl	%esi,%edx
821	addl	%ecx,%edx
822	jc	copyin_fault
823	cmpl	$VM_MAXUSER_ADDRESS,%edx
824	ja	copyin_fault
825
826#if defined(I586_CPU) && NNPX > 0
827	ALIGN_TEXT
828slow_copyin:
829#endif
830	movb	%cl,%al
831	shrl	$2,%ecx				/* copy longword-wise */
832	cld
833	rep
834	movsl
835	movb	%al,%cl
836	andb	$3,%cl				/* copy remaining bytes */
837	rep
838	movsb
839
840#if defined(I586_CPU) && NNPX > 0
841	ALIGN_TEXT
842done_copyin:
843#endif
844	popl	%edi
845	popl	%esi
846	xorl	%eax,%eax
847	movl	_curpcb,%edx
848	movl	%eax,PCB_ONFAULT(%edx)
849	ret
850
851	ALIGN_TEXT
852copyin_fault:
853	popl	%edi
854	popl	%esi
855	movl	_curpcb,%edx
856	movl	$0,PCB_ONFAULT(%edx)
857	movl	$EFAULT,%eax
858	ret
859
860#if defined(I586_CPU) && NNPX > 0
861ENTRY(i586_copyin)
862	/*
863	 * Duplicated from generic_copyin.  Could be done a bit better.
864	 */
865	movl	_curpcb,%eax
866	movl	$copyin_fault,PCB_ONFAULT(%eax)
867	pushl	%esi
868	pushl	%edi
869	movl	12(%esp),%esi			/* caddr_t from */
870	movl	16(%esp),%edi			/* caddr_t to */
871	movl	20(%esp),%ecx			/* size_t  len */
872
873	/*
874	 * make sure address is valid
875	 */
876	movl	%esi,%edx
877	addl	%ecx,%edx
878	jc	copyin_fault
879	cmpl	$VM_MAXUSER_ADDRESS,%edx
880	ja	copyin_fault
881	/*
882	 * End of duplicated code.
883	 */
884
885	cmpl	$1024,%ecx
886	jb	slow_copyin
887
888	pushl	%ebx			/* XXX prepare for fastmove_fault */
889	pushl	%ecx
890	call	_fastmove
891	addl	$8,%esp
892	jmp	done_copyin
893#endif /* I586_CPU && NNPX > 0 */
894
895#if defined(I586_CPU) && NNPX > 0
896/* fastmove(src, dst, len)
897	src in %esi
898	dst in %edi
899	len in %ecx		XXX changed to on stack for profiling
900	uses %eax and %edx for tmp. storage
901 */
902/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
903ENTRY(fastmove)
904	pushl	%ebp
905	movl	%esp,%ebp
906	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
907
908	movl	8(%ebp),%ecx
909	cmpl	$63,%ecx
910	jbe	fastmove_tail
911
912	testl	$7,%esi	/* check if src addr is multiple of 8 */
913	jnz	fastmove_tail
914
915	testl	$7,%edi	/* check if dst addr is multiple of 8 */
916	jnz	fastmove_tail
917
918/* if (npxproc != NULL) { */
919	cmpl	$0,_npxproc
920	je	6f
921/*    fnsave(&curpcb->pcb_savefpu); */
922	movl	_curpcb,%eax
923	fnsave	PCB_SAVEFPU(%eax)
924/*   npxproc = NULL; */
925	movl	$0,_npxproc
926/* } */
9276:
928/* now we own the FPU. */
929
930/*
931 * The process' FP state is saved in the pcb, but if we get
932 * switched, the cpu_switch() will store our FP state in the
933 * pcb.  It should be possible to avoid all the copying for
934 * this, e.g., by setting a flag to tell cpu_switch() to
935 * save the state somewhere else.
936 */
937/* tmp = curpcb->pcb_savefpu; */
938	movl	%ecx,-12(%ebp)
939	movl	%esi,-8(%ebp)
940	movl	%edi,-4(%ebp)
941	movl	%esp,%edi
942	movl	_curpcb,%esi
943	addl	$PCB_SAVEFPU,%esi
944	cld
945	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
946	rep
947	movsl
948	movl	-12(%ebp),%ecx
949	movl	-8(%ebp),%esi
950	movl	-4(%ebp),%edi
951/* stop_emulating(); */
952	clts
953/* npxproc = curproc; */
954	movl	_curproc,%eax
955	movl	%eax,_npxproc
956	movl	_curpcb,%eax
957	movl	$fastmove_fault,PCB_ONFAULT(%eax)
9584:
959	movl	%ecx,-12(%ebp)
960	cmpl	$1792,%ecx
961	jbe	2f
962	movl	$1792,%ecx
9632:
964	subl	%ecx,-12(%ebp)
965	cmpl	$256,%ecx
966	jb	5f
967	movl	%ecx,-8(%ebp)
968	movl	%esi,-4(%ebp)
969	ALIGN_TEXT
9703:
971	movl	0(%esi),%eax
972	movl	32(%esi),%eax
973	movl	64(%esi),%eax
974	movl	96(%esi),%eax
975	movl	128(%esi),%eax
976	movl	160(%esi),%eax
977	movl	192(%esi),%eax
978	movl	224(%esi),%eax
979	addl	$256,%esi
980	subl	$256,%ecx
981	cmpl	$256,%ecx
982	jae	3b
983	movl	-8(%ebp),%ecx
984	movl	-4(%ebp),%esi
9855:
986	ALIGN_TEXT
987fastmove_loop:
988	fildq	0(%esi)
989	fildq	8(%esi)
990	fildq	16(%esi)
991	fildq	24(%esi)
992	fildq	32(%esi)
993	fildq	40(%esi)
994	fildq	48(%esi)
995	fildq	56(%esi)
996	fistpq	56(%edi)
997	fistpq	48(%edi)
998	fistpq	40(%edi)
999	fistpq	32(%edi)
1000	fistpq	24(%edi)
1001	fistpq	16(%edi)
1002	fistpq	8(%edi)
1003	fistpq	0(%edi)
1004	addl	$-64,%ecx
1005	addl	$64,%esi
1006	addl	$64,%edi
1007	cmpl	$63,%ecx
1008	ja	fastmove_loop
1009	movl	-12(%ebp),%eax
1010	addl	%eax,%ecx
1011	cmpl	$64,%ecx
1012	jae	4b
1013
1014/* curpcb->pcb_savefpu = tmp; */
1015	movl	%ecx,-12(%ebp)
1016	movl	%esi,-8(%ebp)
1017	movl	%edi,-4(%ebp)
1018	movl	_curpcb,%edi
1019	addl	$PCB_SAVEFPU,%edi
1020	movl	%esp,%esi
1021	cld
1022	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1023	rep
1024	movsl
1025	movl	-12(%ebp),%ecx
1026	movl	-8(%ebp),%esi
1027	movl	-4(%ebp),%edi
1028
1029/* start_emulating(); */
1030	smsw	%ax
1031	orb	$CR0_TS,%al
1032	lmsw	%ax
1033/* npxproc = NULL; */
1034	movl	$0,_npxproc
1035
1036	ALIGN_TEXT
1037fastmove_tail:
1038	movl	_curpcb,%eax
1039	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1040
1041	movb	%cl,%al
1042	shrl	$2,%ecx				/* copy longword-wise */
1043	cld
1044	rep
1045	movsl
1046	movb	%al,%cl
1047	andb	$3,%cl				/* copy remaining bytes */
1048	rep
1049	movsb
1050
1051	movl	%ebp,%esp
1052	popl	%ebp
1053	ret
1054
1055	ALIGN_TEXT
1056fastmove_fault:
1057	movl	_curpcb,%edi
1058	addl	$PCB_SAVEFPU,%edi
1059	movl	%esp,%esi
1060	cld
1061	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1062	rep
1063	movsl
1064
1065	smsw	%ax
1066	orb	$CR0_TS,%al
1067	lmsw	%ax
1068	movl	$0,_npxproc
1069
1070fastmove_tail_fault:
1071	movl	%ebp,%esp
1072	popl	%ebp
1073	addl	$8,%esp
1074	popl	%ebx
1075	popl	%edi
1076	popl	%esi
1077	movl	_curpcb,%edx
1078	movl	$0,PCB_ONFAULT(%edx)
1079	movl	$EFAULT,%eax
1080	ret
1081#endif /* I586_CPU && NNPX > 0 */
1082
1083/*
1084 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory
1085 */
1086ENTRY(fuword)
1087	movl	_curpcb,%ecx
1088	movl	$fusufault,PCB_ONFAULT(%ecx)
1089	movl	4(%esp),%edx			/* from */
1090
1091	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1092	ja	fusufault
1093
1094	movl	(%edx),%eax
1095	movl	$0,PCB_ONFAULT(%ecx)
1096	ret
1097
1098/*
1099 * These two routines are called from the profiling code, potentially
1100 * at interrupt time. If they fail, that's okay, good things will
1101 * happen later. Fail all the time for now - until the trap code is
1102 * able to deal with this.
1103 */
1104ALTENTRY(suswintr)
1105ENTRY(fuswintr)
1106	movl	$-1,%eax
1107	ret
1108
1109ENTRY(fusword)
1110	movl	_curpcb,%ecx
1111	movl	$fusufault,PCB_ONFAULT(%ecx)
1112	movl	4(%esp),%edx
1113
1114	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1115	ja	fusufault
1116
1117	movzwl	(%edx),%eax
1118	movl	$0,PCB_ONFAULT(%ecx)
1119	ret
1120
1121ENTRY(fubyte)
1122	movl	_curpcb,%ecx
1123	movl	$fusufault,PCB_ONFAULT(%ecx)
1124	movl	4(%esp),%edx
1125
1126	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1127	ja	fusufault
1128
1129	movzbl	(%edx),%eax
1130	movl	$0,PCB_ONFAULT(%ecx)
1131	ret
1132
1133	ALIGN_TEXT
1134fusufault:
1135	movl	_curpcb,%ecx
1136	xorl	%eax,%eax
1137	movl	%eax,PCB_ONFAULT(%ecx)
1138	decl	%eax
1139	ret
1140
1141/*
1142 * su{byte,sword,word}: write a byte (word, longword) to user memory
1143 */
1144ENTRY(suword)
1145	movl	_curpcb,%ecx
1146	movl	$fusufault,PCB_ONFAULT(%ecx)
1147	movl	4(%esp),%edx
1148
1149#if defined(I386_CPU)
1150
1151#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1152	cmpl	$CPUCLASS_386,_cpu_class
1153	jne	2f				/* we only have to set the right segment selector */
1154#endif /* I486_CPU || I586_CPU || I686_CPU */
1155
1156	/* XXX - page boundary crossing is still not handled */
1157	movl	%edx,%eax
1158	shrl	$IDXSHIFT,%edx
1159	andb	$0xfc,%dl
1160
1161	leal	_PTmap(%edx),%ecx
1162	shrl	$IDXSHIFT,%ecx
1163	andb	$0xfc,%cl
1164	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1165	je	4f
1166	movb	_PTmap(%edx),%dl
1167	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1168	cmpb	$PG_V|PG_RW|PG_U,%dl
1169	je	1f
1170
11714:
1172	/* simulate a trap */
1173	pushl	%eax
1174	call	_trapwrite
1175	popl	%edx				/* remove junk parameter from stack */
1176	testl	%eax,%eax
1177	jnz	fusufault
11781:
1179	movl	4(%esp),%edx
1180#endif
1181
11822:
1183	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1184	ja	fusufault
1185
1186	movl	8(%esp),%eax
1187	movl	%eax,(%edx)
1188	xorl	%eax,%eax
1189	movl	_curpcb,%ecx
1190	movl	%eax,PCB_ONFAULT(%ecx)
1191	ret
1192
1193ENTRY(susword)
1194	movl	_curpcb,%ecx
1195	movl	$fusufault,PCB_ONFAULT(%ecx)
1196	movl	4(%esp),%edx
1197
1198#if defined(I386_CPU)
1199
1200#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1201	cmpl	$CPUCLASS_386,_cpu_class
1202	jne	2f
1203#endif /* I486_CPU || I586_CPU || I686_CPU */
1204
1205	/* XXX - page boundary crossing is still not handled */
1206	movl	%edx,%eax
1207	shrl	$IDXSHIFT,%edx
1208	andb	$0xfc,%dl
1209
1210	leal	_PTmap(%edx),%ecx
1211	shrl	$IDXSHIFT,%ecx
1212	andb	$0xfc,%cl
1213	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1214	je	4f
1215	movb	_PTmap(%edx),%dl
1216	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1217	cmpb	$PG_V|PG_RW|PG_U,%dl
1218	je	1f
1219
12204:
1221	/* simulate a trap */
1222	pushl	%eax
1223	call	_trapwrite
1224	popl	%edx				/* remove junk parameter from stack */
1225	testl	%eax,%eax
1226	jnz	fusufault
12271:
1228	movl	4(%esp),%edx
1229#endif
1230
12312:
1232	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1233	ja	fusufault
1234
1235	movw	8(%esp),%ax
1236	movw	%ax,(%edx)
1237	xorl	%eax,%eax
1238	movl	_curpcb,%ecx			/* restore trashed register */
1239	movl	%eax,PCB_ONFAULT(%ecx)
1240	ret
1241
1242ALTENTRY(suibyte)
1243ENTRY(subyte)
1244	movl	_curpcb,%ecx
1245	movl	$fusufault,PCB_ONFAULT(%ecx)
1246	movl	4(%esp),%edx
1247
1248#if defined(I386_CPU)
1249
1250#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1251	cmpl	$CPUCLASS_386,_cpu_class
1252	jne	2f
1253#endif /* I486_CPU || I586_CPU || I686_CPU */
1254
1255	movl	%edx,%eax
1256	shrl	$IDXSHIFT,%edx
1257	andb	$0xfc,%dl
1258
1259	leal	_PTmap(%edx),%ecx
1260	shrl	$IDXSHIFT,%ecx
1261	andb	$0xfc,%cl
1262	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1263	je	4f
1264	movb	_PTmap(%edx),%dl
1265	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1266	cmpb	$PG_V|PG_RW|PG_U,%dl
1267	je	1f
1268
12694:
1270	/* simulate a trap */
1271	pushl	%eax
1272	call	_trapwrite
1273	popl	%edx				/* remove junk parameter from stack */
1274	testl	%eax,%eax
1275	jnz	fusufault
12761:
1277	movl	4(%esp),%edx
1278#endif
1279
12802:
1281	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1282	ja	fusufault
1283
1284	movb	8(%esp),%al
1285	movb	%al,(%edx)
1286	xorl	%eax,%eax
1287	movl	_curpcb,%ecx			/* restore trashed register */
1288	movl	%eax,PCB_ONFAULT(%ecx)
1289	ret
1290
1291/*
1292 * copyinstr(from, to, maxlen, int *lencopied)
1293 *	copy a string from from to to, stop when a 0 character is reached.
1294 *	return ENAMETOOLONG if string is longer than maxlen, and
1295 *	EFAULT on protection violations. If lencopied is non-zero,
1296 *	return the actual length in *lencopied.
1297 */
1298ENTRY(copyinstr)
1299	pushl	%esi
1300	pushl	%edi
1301	movl	_curpcb,%ecx
1302	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1303
1304	movl	12(%esp),%esi			/* %esi = from */
1305	movl	16(%esp),%edi			/* %edi = to */
1306	movl	20(%esp),%edx			/* %edx = maxlen */
1307
1308	movl	$VM_MAXUSER_ADDRESS,%eax
1309
1310	/* make sure 'from' is within bounds */
1311	subl	%esi,%eax
1312	jbe	cpystrflt
1313
1314	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1315	cmpl	%edx,%eax
1316	jae	1f
1317	movl	%eax,%edx
1318	movl	%eax,20(%esp)
13191:
1320	incl	%edx
1321	cld
1322
13232:
1324	decl	%edx
1325	jz	3f
1326
1327	lodsb
1328	stosb
1329	orb	%al,%al
1330	jnz	2b
1331
1332	/* Success -- 0 byte reached */
1333	decl	%edx
1334	xorl	%eax,%eax
1335	jmp	cpystrflt_x
13363:
1337	/* edx is zero - return ENAMETOOLONG or EFAULT */
1338	cmpl	$VM_MAXUSER_ADDRESS,%esi
1339	jae	cpystrflt
13404:
1341	movl	$ENAMETOOLONG,%eax
1342	jmp	cpystrflt_x
1343
1344cpystrflt:
1345	movl	$EFAULT,%eax
1346
1347cpystrflt_x:
1348	/* set *lencopied and return %eax */
1349	movl	_curpcb,%ecx
1350	movl	$0,PCB_ONFAULT(%ecx)
1351	movl	20(%esp),%ecx
1352	subl	%edx,%ecx
1353	movl	24(%esp),%edx
1354	testl	%edx,%edx
1355	jz	1f
1356	movl	%ecx,(%edx)
13571:
1358	popl	%edi
1359	popl	%esi
1360	ret
1361
1362
1363/*
1364 * copystr(from, to, maxlen, int *lencopied)
1365 */
1366ENTRY(copystr)
1367	pushl	%esi
1368	pushl	%edi
1369
1370	movl	12(%esp),%esi			/* %esi = from */
1371	movl	16(%esp),%edi			/* %edi = to */
1372	movl	20(%esp),%edx			/* %edx = maxlen */
1373	incl	%edx
1374	cld
13751:
1376	decl	%edx
1377	jz	4f
1378	lodsb
1379	stosb
1380	orb	%al,%al
1381	jnz	1b
1382
1383	/* Success -- 0 byte reached */
1384	decl	%edx
1385	xorl	%eax,%eax
1386	jmp	6f
13874:
1388	/* edx is zero -- return ENAMETOOLONG */
1389	movl	$ENAMETOOLONG,%eax
1390
13916:
1392	/* set *lencopied and return %eax */
1393	movl	20(%esp),%ecx
1394	subl	%edx,%ecx
1395	movl	24(%esp),%edx
1396	testl	%edx,%edx
1397	jz	7f
1398	movl	%ecx,(%edx)
13997:
1400	popl	%edi
1401	popl	%esi
1402	ret
1403
1404ENTRY(bcmp)
1405	pushl	%edi
1406	pushl	%esi
1407	movl	12(%esp),%edi
1408	movl	16(%esp),%esi
1409	movl	20(%esp),%edx
1410	xorl	%eax,%eax
1411
1412	movl	%edx,%ecx
1413	shrl	$2,%ecx
1414	cld					/* compare forwards */
1415	repe
1416	cmpsl
1417	jne	1f
1418
1419	movl	%edx,%ecx
1420	andl	$3,%ecx
1421	repe
1422	cmpsb
1423	je	2f
14241:
1425	incl	%eax
14262:
1427	popl	%esi
1428	popl	%edi
1429	ret
1430
1431
1432/*
1433 * Handling of special 386 registers and descriptor tables etc
1434 */
1435/* void lgdt(struct region_descriptor *rdp); */
1436ENTRY(lgdt)
1437	/* reload the descriptor table */
1438	movl	4(%esp),%eax
1439	lgdt	(%eax)
1440
1441	/* flush the prefetch q */
1442	jmp	1f
1443	nop
14441:
1445	/* reload "stale" selectors */
1446	movl	$KDSEL,%eax
1447	movl	%ax,%ds
1448	movl	%ax,%es
1449	movl	%ax,%fs
1450	movl	%ax,%gs
1451	movl	%ax,%ss
1452
1453	/* reload code selector by turning return into intersegmental return */
1454	movl	(%esp),%eax
1455	pushl	%eax
1456	movl	$KCSEL,4(%esp)
1457	lret
1458
1459/*
1460 * void lidt(struct region_descriptor *rdp);
1461 */
1462ENTRY(lidt)
1463	movl	4(%esp),%eax
1464	lidt	(%eax)
1465	ret
1466
1467/*
1468 * void lldt(u_short sel)
1469 */
1470ENTRY(lldt)
1471	lldt	4(%esp)
1472	ret
1473
1474/*
1475 * void ltr(u_short sel)
1476 */
1477ENTRY(ltr)
1478	ltr	4(%esp)
1479	ret
1480
1481/* ssdtosd(*ssdp,*sdp) */
1482ENTRY(ssdtosd)
1483	pushl	%ebx
1484	movl	8(%esp),%ecx
1485	movl	8(%ecx),%ebx
1486	shll	$16,%ebx
1487	movl	(%ecx),%edx
1488	roll	$16,%edx
1489	movb	%dh,%bl
1490	movb	%dl,%bh
1491	rorl	$8,%ebx
1492	movl	4(%ecx),%eax
1493	movw	%ax,%dx
1494	andl	$0xf0000,%eax
1495	orl	%eax,%ebx
1496	movl	12(%esp),%ecx
1497	movl	%edx,(%ecx)
1498	movl	%ebx,4(%ecx)
1499	popl	%ebx
1500	ret
1501
1502/* load_cr0(cr0) */
1503ENTRY(load_cr0)
1504	movl	4(%esp),%eax
1505	movl	%eax,%cr0
1506	ret
1507
1508/* rcr0() */
1509ENTRY(rcr0)
1510	movl	%cr0,%eax
1511	ret
1512
1513/* rcr3() */
1514ENTRY(rcr3)
1515	movl	%cr3,%eax
1516	ret
1517
1518/* void load_cr3(caddr_t cr3) */
1519ENTRY(load_cr3)
1520	movl	4(%esp),%eax
1521	movl	%eax,%cr3
1522	ret
1523
1524/* rcr4() */
1525ENTRY(rcr4)
1526	movl	%cr4,%eax
1527	ret
1528
1529/* void load_cr4(caddr_t cr4) */
1530ENTRY(load_cr4)
1531	movl	4(%esp),%eax
1532	movl	%eax,%cr4
1533	ret
1534
1535/*****************************************************************************/
1536/* setjump, longjump                                                         */
1537/*****************************************************************************/
1538
1539ENTRY(setjmp)
1540	movl	4(%esp),%eax
1541	movl	%ebx,(%eax)			/* save ebx */
1542	movl	%esp,4(%eax)			/* save esp */
1543	movl	%ebp,8(%eax)			/* save ebp */
1544	movl	%esi,12(%eax)			/* save esi */
1545	movl	%edi,16(%eax)			/* save edi */
1546	movl	(%esp),%edx			/* get rta */
1547	movl	%edx,20(%eax)			/* save eip */
1548	xorl	%eax,%eax			/* return(0); */
1549	ret
1550
1551ENTRY(longjmp)
1552	movl	4(%esp),%eax
1553	movl	(%eax),%ebx			/* restore ebx */
1554	movl	4(%eax),%esp			/* restore esp */
1555	movl	8(%eax),%ebp			/* restore ebp */
1556	movl	12(%eax),%esi			/* restore esi */
1557	movl	16(%eax),%edi			/* restore edi */
1558	movl	20(%eax),%edx			/* get rta */
1559	movl	%edx,(%esp)			/* put in return frame */
1560	xorl	%eax,%eax			/* return(1); */
1561	incl	%eax
1562	ret
1563
1564/*
1565 * Here for doing BB-profiling (gcc -a).
1566 * We rely on the "bbset" instead, but need a dummy function.
1567 */
1568NON_GPROF_ENTRY(__bb_init_func)
1569	movl	4(%esp),%eax
1570	movl	$1,(%eax)
1571	.byte	0xc3				/* avoid macro for `ret' */
1572