support.s revision 35933
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	$Id: support.s,v 1.58 1997/12/14 02:11:09 dyson Exp $
34 */
35
36#include "npx.h"
37
38#include <machine/asmacros.h>
39#include <machine/cputypes.h>
40#include <machine/pmap.h>
41#include <machine/specialreg.h>
42
43#include "assym.s"
44
45#define KDSEL		0x10			/* kernel data selector */
46#define KCSEL		0x8			/* kernel code selector */
47#define IDXSHIFT	10
48
49	.data
50	.globl	_bcopy_vector
51_bcopy_vector:
52	.long	_generic_bcopy
53	.globl	_bzero
54_bzero:
55	.long	_generic_bzero
56	.globl	_copyin_vector
57_copyin_vector:
58	.long	_generic_copyin
59	.globl	_copyout_vector
60_copyout_vector:
61	.long	_generic_copyout
62	.globl	_ovbcopy_vector
63_ovbcopy_vector:
64	.long	_generic_bcopy
65#if defined(I586_CPU) && NNPX > 0
66kernel_fpu_lock:
67	.byte	0xfe
68	.space	3
69#endif
70
71	.text
72
73/*
74 * bcopy family
75 * void bzero(void *buf, u_int len)
76 */
77
78ENTRY(generic_bzero)
79	pushl	%edi
80	movl	8(%esp),%edi
81	movl	12(%esp),%ecx
82	xorl	%eax,%eax
83	shrl	$2,%ecx
84	cld
85	rep
86	stosl
87	movl	12(%esp),%ecx
88	andl	$3,%ecx
89	rep
90	stosb
91	popl	%edi
92	ret
93
94#if defined(I486_CPU)
95ENTRY(i486_bzero)
96	movl	4(%esp),%edx
97	movl	8(%esp),%ecx
98	xorl	%eax,%eax
99/*
100 * do 64 byte chunks first
101 *
102 * XXX this is probably over-unrolled at least for DX2's
103 */
1042:
105	cmpl	$64,%ecx
106	jb	3f
107	movl	%eax,(%edx)
108	movl	%eax,4(%edx)
109	movl	%eax,8(%edx)
110	movl	%eax,12(%edx)
111	movl	%eax,16(%edx)
112	movl	%eax,20(%edx)
113	movl	%eax,24(%edx)
114	movl	%eax,28(%edx)
115	movl	%eax,32(%edx)
116	movl	%eax,36(%edx)
117	movl	%eax,40(%edx)
118	movl	%eax,44(%edx)
119	movl	%eax,48(%edx)
120	movl	%eax,52(%edx)
121	movl	%eax,56(%edx)
122	movl	%eax,60(%edx)
123	addl	$64,%edx
124	subl	$64,%ecx
125	jnz	2b
126	ret
127
128/*
129 * do 16 byte chunks
130 */
131	SUPERALIGN_TEXT
1323:
133	cmpl	$16,%ecx
134	jb	4f
135	movl	%eax,(%edx)
136	movl	%eax,4(%edx)
137	movl	%eax,8(%edx)
138	movl	%eax,12(%edx)
139	addl	$16,%edx
140	subl	$16,%ecx
141	jnz	3b
142	ret
143
144/*
145 * do 4 byte chunks
146 */
147	SUPERALIGN_TEXT
1484:
149	cmpl	$4,%ecx
150	jb	5f
151	movl	%eax,(%edx)
152	addl	$4,%edx
153	subl	$4,%ecx
154	jnz	4b
155	ret
156
157/*
158 * do 1 byte chunks
159 * a jump table seems to be faster than a loop or more range reductions
160 *
161 * XXX need a const section for non-text
162 */
163	.data
164jtab:
165	.long	do0
166	.long	do1
167	.long	do2
168	.long	do3
169
170	.text
171	SUPERALIGN_TEXT
1725:
173	jmp	jtab(,%ecx,4)
174
175	SUPERALIGN_TEXT
176do3:
177	movw	%ax,(%edx)
178	movb	%al,2(%edx)
179	ret
180
181	SUPERALIGN_TEXT
182do2:
183	movw	%ax,(%edx)
184	ret
185
186	SUPERALIGN_TEXT
187do1:
188	movb	%al,(%edx)
189	ret
190
191	SUPERALIGN_TEXT
192do0:
193	ret
194#endif
195
196#if defined(I586_CPU) && NNPX > 0
197ENTRY(i586_bzero)
198	movl	4(%esp),%edx
199	movl	8(%esp),%ecx
200
201	/*
202	 * The FPU register method is twice as fast as the integer register
203	 * method unless the target is in the L1 cache and we pre-allocate a
204	 * cache line for it (then the integer register method is 4-5 times
205	 * faster).  However, we never pre-allocate cache lines, since that
206	 * would make the integer method 25% or more slower for the common
207	 * case when the target isn't in either the L1 cache or the L2 cache.
208	 * Thus we normally use the FPU register method unless the overhead
209	 * would be too large.
210	 */
211	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
212	jb	intreg_i586_bzero
213
214	/*
215	 * The FPU registers may belong to an application or to fastmove()
216	 * or to another invocation of bcopy() or ourself in a higher level
217	 * interrupt or trap handler.  Preserving the registers is
218	 * complicated since we avoid it if possible at all levels.  We
219	 * want to localize the complications even when that increases them.
220	 * Here the extra work involves preserving CR0_TS in TS.
221	 * `npxproc != NULL' is supposed to be the condition that all the
222	 * FPU resources belong to an application, but npxproc and CR0_TS
223	 * aren't set atomically enough for this condition to work in
224	 * interrupt handlers.
225	 *
226	 * Case 1: FPU registers belong to the application: we must preserve
227	 * the registers if we use them, so we only use the FPU register
228	 * method if the target size is large enough to amortize the extra
229	 * overhead for preserving them.  CR0_TS must be preserved although
230	 * it is very likely to end up as set.
231	 *
232	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
233	 * makes the registers look like they belong to an application so
234	 * that cpu_switch() and savectx() don't have to know about it, so
235	 * this case reduces to case 1.
236	 *
237	 * Case 3: FPU registers belong to the kernel: don't use the FPU
238	 * register method.  This case is unlikely, and supporting it would
239	 * be more complicated and might take too much stack.
240	 *
241	 * Case 4: FPU registers don't belong to anyone: the FPU registers
242	 * don't need to be preserved, so we always use the FPU register
243	 * method.  CR0_TS must be preserved although it is very likely to
244	 * always end up as clear.
245	 */
246	cmpl	$0,_npxproc
247	je	i586_bz1
248	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
249	jb	intreg_i586_bzero
250	sarb	$1,kernel_fpu_lock
251	jc	intreg_i586_bzero
252	smsw	%ax
253	clts
254	subl	$108,%esp
255	fnsave	0(%esp)
256	jmp	i586_bz2
257
258i586_bz1:
259	sarb	$1,kernel_fpu_lock
260	jc	intreg_i586_bzero
261	smsw	%ax
262	clts
263	fninit				/* XXX should avoid needing this */
264i586_bz2:
265	fldz
266
267	/*
268	 * Align to an 8 byte boundary (misalignment in the main loop would
269	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
270	 * already aligned) by always zeroing 8 bytes and using the part up
271	 * to the _next_ alignment position.
272	 */
273	fstl	0(%edx)
274	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
275	addl	$8,%edx
276	andl	$~7,%edx
277	subl	%edx,%ecx
278
279	/*
280	 * Similarly align `len' to a multiple of 8.
281	 */
282	fstl	-8(%edx,%ecx)
283	decl	%ecx
284	andl	$~7,%ecx
285
286	/*
287	 * This wouldn't be any faster if it were unrolled, since the loop
288	 * control instructions are much faster than the fstl and/or done
289	 * in parallel with it so their overhead is insignificant.
290	 */
291fpureg_i586_bzero_loop:
292	fstl	0(%edx)
293	addl	$8,%edx
294	subl	$8,%ecx
295	cmpl	$8,%ecx
296	jae	fpureg_i586_bzero_loop
297
298	cmpl	$0,_npxproc
299	je	i586_bz3
300	frstor	0(%esp)
301	addl	$108,%esp
302	lmsw	%ax
303	movb	$0xfe,kernel_fpu_lock
304	ret
305
306i586_bz3:
307	fstpl	%st(0)
308	lmsw	%ax
309	movb	$0xfe,kernel_fpu_lock
310	ret
311
312intreg_i586_bzero:
313	/*
314	 * `rep stos' seems to be the best method in practice for small
315	 * counts.  Fancy methods usually take too long to start up due
316	 * to cache and BTB misses.
317	 */
318	pushl	%edi
319	movl	%edx,%edi
320	xorl	%eax,%eax
321	shrl	$2,%ecx
322	cld
323	rep
324	stosl
325	movl	12(%esp),%ecx
326	andl	$3,%ecx
327	jne	1f
328	popl	%edi
329	ret
330
3311:
332	rep
333	stosb
334	popl	%edi
335	ret
336#endif /* I586_CPU && NNPX > 0 */
337
338ENTRY(i686_pagezero)
339	pushl	%edi
340	pushl	%ebx
341
342	movl	12(%esp), %edi
343	movl	$1024, %ecx
344	cld
345
346	ALIGN_TEXT
3471:
348	xorl	%eax, %eax
349	repe
350	scasl
351	jnz	2f
352
353	popl	%ebx
354	popl	%edi
355	ret
356
357	ALIGN_TEXT
358
3592:
360	incl	%ecx
361	subl	$4, %edi
362
363	movl	%ecx, %edx
364	cmpl	$16, %ecx
365
366	jge	3f
367
368	movl	%edi, %ebx
369	andl	$0x3f, %ebx
370	shrl	%ebx
371	shrl	%ebx
372	movl	$16, %ecx
373	subl	%ebx, %ecx
374
3753:
376	subl	%ecx, %edx
377	rep
378	stosl
379
380	movl	%edx, %ecx
381	testl	%edx, %edx
382	jnz	1b
383
384	popl	%ebx
385	popl	%edi
386	ret
387
388/* fillw(pat, base, cnt) */
389ENTRY(fillw)
390	pushl	%edi
391	movl	8(%esp),%eax
392	movl	12(%esp),%edi
393	movl	16(%esp),%ecx
394	cld
395	rep
396	stosw
397	popl	%edi
398	ret
399
400ENTRY(bcopyb)
401bcopyb:
402	pushl	%esi
403	pushl	%edi
404	movl	12(%esp),%esi
405	movl	16(%esp),%edi
406	movl	20(%esp),%ecx
407	movl	%edi,%eax
408	subl	%esi,%eax
409	cmpl	%ecx,%eax			/* overlapping && src < dst? */
410	jb	1f
411	cld					/* nope, copy forwards */
412	rep
413	movsb
414	popl	%edi
415	popl	%esi
416	ret
417
418	ALIGN_TEXT
4191:
420	addl	%ecx,%edi			/* copy backwards. */
421	addl	%ecx,%esi
422	decl	%edi
423	decl	%esi
424	std
425	rep
426	movsb
427	popl	%edi
428	popl	%esi
429	cld
430	ret
431
432ENTRY(bcopy)
433	MEXITCOUNT
434	jmp	*_bcopy_vector
435
436ENTRY(ovbcopy)
437	MEXITCOUNT
438	jmp	*_ovbcopy_vector
439
440/*
441 * generic_bcopy(src, dst, cnt)
442 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
443 */
444ENTRY(generic_bcopy)
445	pushl	%esi
446	pushl	%edi
447	movl	12(%esp),%esi
448	movl	16(%esp),%edi
449	movl	20(%esp),%ecx
450
451	movl	%edi,%eax
452	subl	%esi,%eax
453	cmpl	%ecx,%eax			/* overlapping && src < dst? */
454	jb	1f
455
456	shrl	$2,%ecx				/* copy by 32-bit words */
457	cld					/* nope, copy forwards */
458	rep
459	movsl
460	movl	20(%esp),%ecx
461	andl	$3,%ecx				/* any bytes left? */
462	rep
463	movsb
464	popl	%edi
465	popl	%esi
466	ret
467
468	ALIGN_TEXT
4691:
470	addl	%ecx,%edi			/* copy backwards */
471	addl	%ecx,%esi
472	decl	%edi
473	decl	%esi
474	andl	$3,%ecx				/* any fractional bytes? */
475	std
476	rep
477	movsb
478	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
479	shrl	$2,%ecx
480	subl	$3,%esi
481	subl	$3,%edi
482	rep
483	movsl
484	popl	%edi
485	popl	%esi
486	cld
487	ret
488
489#if defined(I586_CPU) && NNPX > 0
490ENTRY(i586_bcopy)
491	pushl	%esi
492	pushl	%edi
493	movl	12(%esp),%esi
494	movl	16(%esp),%edi
495	movl	20(%esp),%ecx
496
497	movl	%edi,%eax
498	subl	%esi,%eax
499	cmpl	%ecx,%eax			/* overlapping && src < dst? */
500	jb	1f
501
502	cmpl	$1024,%ecx
503	jb	small_i586_bcopy
504
505	sarb	$1,kernel_fpu_lock
506	jc	small_i586_bcopy
507	cmpl	$0,_npxproc
508	je	i586_bc1
509	smsw	%dx
510	clts
511	subl	$108,%esp
512	fnsave	0(%esp)
513	jmp	4f
514
515i586_bc1:
516	smsw	%dx
517	clts
518	fninit				/* XXX should avoid needing this */
519
520	ALIGN_TEXT
5214:
522	pushl	%ecx
523#define	DCACHE_SIZE	8192
524	cmpl	$(DCACHE_SIZE-512)/2,%ecx
525	jbe	2f
526	movl	$(DCACHE_SIZE-512)/2,%ecx
5272:
528	subl	%ecx,0(%esp)
529	cmpl	$256,%ecx
530	jb	5f			/* XXX should prefetch if %ecx >= 32 */
531	pushl	%esi
532	pushl	%ecx
533	ALIGN_TEXT
5343:
535	movl	0(%esi),%eax
536	movl	32(%esi),%eax
537	movl	64(%esi),%eax
538	movl	96(%esi),%eax
539	movl	128(%esi),%eax
540	movl	160(%esi),%eax
541	movl	192(%esi),%eax
542	movl	224(%esi),%eax
543	addl	$256,%esi
544	subl	$256,%ecx
545	cmpl	$256,%ecx
546	jae	3b
547	popl	%ecx
548	popl	%esi
5495:
550	ALIGN_TEXT
551large_i586_bcopy_loop:
552	fildq	0(%esi)
553	fildq	8(%esi)
554	fildq	16(%esi)
555	fildq	24(%esi)
556	fildq	32(%esi)
557	fildq	40(%esi)
558	fildq	48(%esi)
559	fildq	56(%esi)
560	fistpq	56(%edi)
561	fistpq	48(%edi)
562	fistpq	40(%edi)
563	fistpq	32(%edi)
564	fistpq	24(%edi)
565	fistpq	16(%edi)
566	fistpq	8(%edi)
567	fistpq	0(%edi)
568	addl	$64,%esi
569	addl	$64,%edi
570	subl	$64,%ecx
571	cmpl	$64,%ecx
572	jae	large_i586_bcopy_loop
573	popl	%eax
574	addl	%eax,%ecx
575	cmpl	$64,%ecx
576	jae	4b
577
578	cmpl	$0,_npxproc
579	je	i586_bc2
580	frstor	0(%esp)
581	addl	$108,%esp
582i586_bc2:
583	lmsw	%dx
584	movb	$0xfe,kernel_fpu_lock
585
586/*
587 * This is a duplicate of the main part of generic_bcopy.  See the comments
588 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
589 * would mess up high resolution profiling.
590 */
591	ALIGN_TEXT
592small_i586_bcopy:
593	shrl	$2,%ecx
594	cld
595	rep
596	movsl
597	movl	20(%esp),%ecx
598	andl	$3,%ecx
599	rep
600	movsb
601	popl	%edi
602	popl	%esi
603	ret
604
605	ALIGN_TEXT
6061:
607	addl	%ecx,%edi
608	addl	%ecx,%esi
609	decl	%edi
610	decl	%esi
611	andl	$3,%ecx
612	std
613	rep
614	movsb
615	movl	20(%esp),%ecx
616	shrl	$2,%ecx
617	subl	$3,%esi
618	subl	$3,%edi
619	rep
620	movsl
621	popl	%edi
622	popl	%esi
623	cld
624	ret
625#endif /* I586_CPU && NNPX > 0 */
626
627/*
628 * Note: memcpy does not support overlapping copies
629 */
630ENTRY(memcpy)
631	pushl	%edi
632	pushl	%esi
633	movl	12(%esp),%edi
634	movl	16(%esp),%esi
635	movl	20(%esp),%ecx
636	movl	%edi,%eax
637	shrl	$2,%ecx				/* copy by 32-bit words */
638	cld					/* nope, copy forwards */
639	rep
640	movsl
641	movl	20(%esp),%ecx
642	andl	$3,%ecx				/* any bytes left? */
643	rep
644	movsb
645	popl	%esi
646	popl	%edi
647	ret
648
649
650/*****************************************************************************/
651/* copyout and fubyte family                                                 */
652/*****************************************************************************/
653/*
654 * Access user memory from inside the kernel. These routines and possibly
655 * the math- and DOS emulators should be the only places that do this.
656 *
657 * We have to access the memory with user's permissions, so use a segment
658 * selector with RPL 3. For writes to user space we have to additionally
659 * check the PTE for write permission, because the 386 does not check
660 * write permissions when we are executing with EPL 0. The 486 does check
661 * this if the WP bit is set in CR0, so we can use a simpler version here.
662 *
663 * These routines set curpcb->onfault for the time they execute. When a
664 * protection violation occurs inside the functions, the trap handler
665 * returns to *curpcb->onfault instead of the function.
666 */
667
668/* copyout(from_kernel, to_user, len) */
669ENTRY(copyout)
670	MEXITCOUNT
671	jmp	*_copyout_vector
672
673ENTRY(generic_copyout)
674	movl	_curpcb,%eax
675	movl	$copyout_fault,PCB_ONFAULT(%eax)
676	pushl	%esi
677	pushl	%edi
678	pushl	%ebx
679	movl	16(%esp),%esi
680	movl	20(%esp),%edi
681	movl	24(%esp),%ebx
682	testl	%ebx,%ebx			/* anything to do? */
683	jz	done_copyout
684
685	/*
686	 * Check explicitly for non-user addresses.  If 486 write protection
687	 * is being used, this check is essential because we are in kernel
688	 * mode so the h/w does not provide any protection against writing
689	 * kernel addresses.
690	 */
691
692	/*
693	 * First, prevent address wrapping.
694	 */
695	movl	%edi,%eax
696	addl	%ebx,%eax
697	jc	copyout_fault
698/*
699 * XXX STOP USING VM_MAXUSER_ADDRESS.
700 * It is an end address, not a max, so every time it is used correctly it
701 * looks like there is an off by one error, and of course it caused an off
702 * by one error in several places.
703 */
704	cmpl	$VM_MAXUSER_ADDRESS,%eax
705	ja	copyout_fault
706
707#if defined(I386_CPU)
708
709#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
710	cmpl	$CPUCLASS_386,_cpu_class
711	jne	3f
712#endif
713/*
714 * We have to check each PTE for user write permission.
715 * The checking may cause a page fault, so it is important to set
716 * up everything for return via copyout_fault before here.
717 */
718	/* compute number of pages */
719	movl	%edi,%ecx
720	andl	$PAGE_MASK,%ecx
721	addl	%ebx,%ecx
722	decl	%ecx
723	shrl	$IDXSHIFT+2,%ecx
724	incl	%ecx
725
726	/* compute PTE offset for start address */
727	movl	%edi,%edx
728	shrl	$IDXSHIFT,%edx
729	andb	$0xfc,%dl
730
7311:
732	/* check PTE for each page */
733	leal	_PTmap(%edx),%eax
734	shrl	$IDXSHIFT,%eax
735	andb	$0xfc,%al
736	testb	$PG_V,_PTmap(%eax)		/* PTE page must be valid */
737	je	4f
738	movb	_PTmap(%edx),%al
739	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
740	cmpb	$PG_V|PG_RW|PG_U,%al
741	je	2f
742
7434:
744	/* simulate a trap */
745	pushl	%edx
746	pushl	%ecx
747	shll	$IDXSHIFT,%edx
748	pushl	%edx
749	call	_trapwrite			/* trapwrite(addr) */
750	popl	%edx
751	popl	%ecx
752	popl	%edx
753
754	testl	%eax,%eax			/* if not ok, return EFAULT */
755	jnz	copyout_fault
756
7572:
758	addl	$4,%edx
759	decl	%ecx
760	jnz	1b				/* check next page */
761#endif /* I386_CPU */
762
763	/* bcopy(%esi, %edi, %ebx) */
7643:
765	movl	%ebx,%ecx
766
767#if defined(I586_CPU) && NNPX > 0
768	ALIGN_TEXT
769slow_copyout:
770#endif
771	shrl	$2,%ecx
772	cld
773	rep
774	movsl
775	movb	%bl,%cl
776	andb	$3,%cl
777	rep
778	movsb
779
780done_copyout:
781	popl	%ebx
782	popl	%edi
783	popl	%esi
784	xorl	%eax,%eax
785	movl	_curpcb,%edx
786	movl	%eax,PCB_ONFAULT(%edx)
787	ret
788
789	ALIGN_TEXT
790copyout_fault:
791	popl	%ebx
792	popl	%edi
793	popl	%esi
794	movl	_curpcb,%edx
795	movl	$0,PCB_ONFAULT(%edx)
796	movl	$EFAULT,%eax
797	ret
798
799#if defined(I586_CPU) && NNPX > 0
800ENTRY(i586_copyout)
801	/*
802	 * Duplicated from generic_copyout.  Could be done a bit better.
803	 */
804	movl	_curpcb,%eax
805	movl	$copyout_fault,PCB_ONFAULT(%eax)
806	pushl	%esi
807	pushl	%edi
808	pushl	%ebx
809	movl	16(%esp),%esi
810	movl	20(%esp),%edi
811	movl	24(%esp),%ebx
812	testl	%ebx,%ebx			/* anything to do? */
813	jz	done_copyout
814
815	/*
816	 * Check explicitly for non-user addresses.  If 486 write protection
817	 * is being used, this check is essential because we are in kernel
818	 * mode so the h/w does not provide any protection against writing
819	 * kernel addresses.
820	 */
821
822	/*
823	 * First, prevent address wrapping.
824	 */
825	movl	%edi,%eax
826	addl	%ebx,%eax
827	jc	copyout_fault
828/*
829 * XXX STOP USING VM_MAXUSER_ADDRESS.
830 * It is an end address, not a max, so every time it is used correctly it
831 * looks like there is an off by one error, and of course it caused an off
832 * by one error in several places.
833 */
834	cmpl	$VM_MAXUSER_ADDRESS,%eax
835	ja	copyout_fault
836
837	/* bcopy(%esi, %edi, %ebx) */
8383:
839	movl	%ebx,%ecx
840	/*
841	 * End of duplicated code.
842	 */
843
844	cmpl	$1024,%ecx
845	jb	slow_copyout
846
847	pushl	%ecx
848	call	_fastmove
849	addl	$4,%esp
850	jmp	done_copyout
851#endif /* I586_CPU && NNPX > 0 */
852
853/* copyin(from_user, to_kernel, len) */
854ENTRY(copyin)
855	MEXITCOUNT
856	jmp	*_copyin_vector
857
858ENTRY(generic_copyin)
859	movl	_curpcb,%eax
860	movl	$copyin_fault,PCB_ONFAULT(%eax)
861	pushl	%esi
862	pushl	%edi
863	movl	12(%esp),%esi			/* caddr_t from */
864	movl	16(%esp),%edi			/* caddr_t to */
865	movl	20(%esp),%ecx			/* size_t  len */
866
867	/*
868	 * make sure address is valid
869	 */
870	movl	%esi,%edx
871	addl	%ecx,%edx
872	jc	copyin_fault
873	cmpl	$VM_MAXUSER_ADDRESS,%edx
874	ja	copyin_fault
875
876#if defined(I586_CPU) && NNPX > 0
877	ALIGN_TEXT
878slow_copyin:
879#endif
880	movb	%cl,%al
881	shrl	$2,%ecx				/* copy longword-wise */
882	cld
883	rep
884	movsl
885	movb	%al,%cl
886	andb	$3,%cl				/* copy remaining bytes */
887	rep
888	movsb
889
890#if defined(I586_CPU) && NNPX > 0
891	ALIGN_TEXT
892done_copyin:
893#endif
894	popl	%edi
895	popl	%esi
896	xorl	%eax,%eax
897	movl	_curpcb,%edx
898	movl	%eax,PCB_ONFAULT(%edx)
899	ret
900
901	ALIGN_TEXT
902copyin_fault:
903	popl	%edi
904	popl	%esi
905	movl	_curpcb,%edx
906	movl	$0,PCB_ONFAULT(%edx)
907	movl	$EFAULT,%eax
908	ret
909
910#if defined(I586_CPU) && NNPX > 0
911ENTRY(i586_copyin)
912	/*
913	 * Duplicated from generic_copyin.  Could be done a bit better.
914	 */
915	movl	_curpcb,%eax
916	movl	$copyin_fault,PCB_ONFAULT(%eax)
917	pushl	%esi
918	pushl	%edi
919	movl	12(%esp),%esi			/* caddr_t from */
920	movl	16(%esp),%edi			/* caddr_t to */
921	movl	20(%esp),%ecx			/* size_t  len */
922
923	/*
924	 * make sure address is valid
925	 */
926	movl	%esi,%edx
927	addl	%ecx,%edx
928	jc	copyin_fault
929	cmpl	$VM_MAXUSER_ADDRESS,%edx
930	ja	copyin_fault
931	/*
932	 * End of duplicated code.
933	 */
934
935	cmpl	$1024,%ecx
936	jb	slow_copyin
937
938	pushl	%ebx			/* XXX prepare for fastmove_fault */
939	pushl	%ecx
940	call	_fastmove
941	addl	$8,%esp
942	jmp	done_copyin
943#endif /* I586_CPU && NNPX > 0 */
944
945#if defined(I586_CPU) && NNPX > 0
946/* fastmove(src, dst, len)
947	src in %esi
948	dst in %edi
949	len in %ecx		XXX changed to on stack for profiling
950	uses %eax and %edx for tmp. storage
951 */
952/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
953ENTRY(fastmove)
954	pushl	%ebp
955	movl	%esp,%ebp
956	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
957
958	movl	8(%ebp),%ecx
959	cmpl	$63,%ecx
960	jbe	fastmove_tail
961
962	testl	$7,%esi	/* check if src addr is multiple of 8 */
963	jnz	fastmove_tail
964
965	testl	$7,%edi	/* check if dst addr is multiple of 8 */
966	jnz	fastmove_tail
967
968/* if (npxproc != NULL) { */
969	cmpl	$0,_npxproc
970	je	6f
971/*    fnsave(&curpcb->pcb_savefpu); */
972	movl	_curpcb,%eax
973	fnsave	PCB_SAVEFPU(%eax)
974/*   npxproc = NULL; */
975	movl	$0,_npxproc
976/* } */
9776:
978/* now we own the FPU. */
979
980/*
981 * The process' FP state is saved in the pcb, but if we get
982 * switched, the cpu_switch() will store our FP state in the
983 * pcb.  It should be possible to avoid all the copying for
984 * this, e.g., by setting a flag to tell cpu_switch() to
985 * save the state somewhere else.
986 */
987/* tmp = curpcb->pcb_savefpu; */
988	movl	%ecx,-12(%ebp)
989	movl	%esi,-8(%ebp)
990	movl	%edi,-4(%ebp)
991	movl	%esp,%edi
992	movl	_curpcb,%esi
993	addl	$PCB_SAVEFPU,%esi
994	cld
995	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
996	rep
997	movsl
998	movl	-12(%ebp),%ecx
999	movl	-8(%ebp),%esi
1000	movl	-4(%ebp),%edi
1001/* stop_emulating(); */
1002	clts
1003/* npxproc = curproc; */
1004	movl	_curproc,%eax
1005	movl	%eax,_npxproc
1006	movl	_curpcb,%eax
1007	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10084:
1009	movl	%ecx,-12(%ebp)
1010	cmpl	$1792,%ecx
1011	jbe	2f
1012	movl	$1792,%ecx
10132:
1014	subl	%ecx,-12(%ebp)
1015	cmpl	$256,%ecx
1016	jb	5f
1017	movl	%ecx,-8(%ebp)
1018	movl	%esi,-4(%ebp)
1019	ALIGN_TEXT
10203:
1021	movl	0(%esi),%eax
1022	movl	32(%esi),%eax
1023	movl	64(%esi),%eax
1024	movl	96(%esi),%eax
1025	movl	128(%esi),%eax
1026	movl	160(%esi),%eax
1027	movl	192(%esi),%eax
1028	movl	224(%esi),%eax
1029	addl	$256,%esi
1030	subl	$256,%ecx
1031	cmpl	$256,%ecx
1032	jae	3b
1033	movl	-8(%ebp),%ecx
1034	movl	-4(%ebp),%esi
10355:
1036	ALIGN_TEXT
1037fastmove_loop:
1038	fildq	0(%esi)
1039	fildq	8(%esi)
1040	fildq	16(%esi)
1041	fildq	24(%esi)
1042	fildq	32(%esi)
1043	fildq	40(%esi)
1044	fildq	48(%esi)
1045	fildq	56(%esi)
1046	fistpq	56(%edi)
1047	fistpq	48(%edi)
1048	fistpq	40(%edi)
1049	fistpq	32(%edi)
1050	fistpq	24(%edi)
1051	fistpq	16(%edi)
1052	fistpq	8(%edi)
1053	fistpq	0(%edi)
1054	addl	$-64,%ecx
1055	addl	$64,%esi
1056	addl	$64,%edi
1057	cmpl	$63,%ecx
1058	ja	fastmove_loop
1059	movl	-12(%ebp),%eax
1060	addl	%eax,%ecx
1061	cmpl	$64,%ecx
1062	jae	4b
1063
1064/* curpcb->pcb_savefpu = tmp; */
1065	movl	%ecx,-12(%ebp)
1066	movl	%esi,-8(%ebp)
1067	movl	%edi,-4(%ebp)
1068	movl	_curpcb,%edi
1069	addl	$PCB_SAVEFPU,%edi
1070	movl	%esp,%esi
1071	cld
1072	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1073	rep
1074	movsl
1075	movl	-12(%ebp),%ecx
1076	movl	-8(%ebp),%esi
1077	movl	-4(%ebp),%edi
1078
1079/* start_emulating(); */
1080	smsw	%ax
1081	orb	$CR0_TS,%al
1082	lmsw	%ax
1083/* npxproc = NULL; */
1084	movl	$0,_npxproc
1085
1086	ALIGN_TEXT
1087fastmove_tail:
1088	movl	_curpcb,%eax
1089	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1090
1091	movb	%cl,%al
1092	shrl	$2,%ecx				/* copy longword-wise */
1093	cld
1094	rep
1095	movsl
1096	movb	%al,%cl
1097	andb	$3,%cl				/* copy remaining bytes */
1098	rep
1099	movsb
1100
1101	movl	%ebp,%esp
1102	popl	%ebp
1103	ret
1104
1105	ALIGN_TEXT
1106fastmove_fault:
1107	movl	_curpcb,%edi
1108	addl	$PCB_SAVEFPU,%edi
1109	movl	%esp,%esi
1110	cld
1111	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1112	rep
1113	movsl
1114
1115	smsw	%ax
1116	orb	$CR0_TS,%al
1117	lmsw	%ax
1118	movl	$0,_npxproc
1119
1120fastmove_tail_fault:
1121	movl	%ebp,%esp
1122	popl	%ebp
1123	addl	$8,%esp
1124	popl	%ebx
1125	popl	%edi
1126	popl	%esi
1127	movl	_curpcb,%edx
1128	movl	$0,PCB_ONFAULT(%edx)
1129	movl	$EFAULT,%eax
1130	ret
1131#endif /* I586_CPU && NNPX > 0 */
1132
1133/*
1134 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory
1135 */
1136ENTRY(fuword)
1137	movl	_curpcb,%ecx
1138	movl	$fusufault,PCB_ONFAULT(%ecx)
1139	movl	4(%esp),%edx			/* from */
1140
1141	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1142	ja	fusufault
1143
1144	movl	(%edx),%eax
1145	movl	$0,PCB_ONFAULT(%ecx)
1146	ret
1147
1148/*
1149 * These two routines are called from the profiling code, potentially
1150 * at interrupt time. If they fail, that's okay, good things will
1151 * happen later. Fail all the time for now - until the trap code is
1152 * able to deal with this.
1153 */
1154ALTENTRY(suswintr)
1155ENTRY(fuswintr)
1156	movl	$-1,%eax
1157	ret
1158
1159ENTRY(fusword)
1160	movl	_curpcb,%ecx
1161	movl	$fusufault,PCB_ONFAULT(%ecx)
1162	movl	4(%esp),%edx
1163
1164	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1165	ja	fusufault
1166
1167	movzwl	(%edx),%eax
1168	movl	$0,PCB_ONFAULT(%ecx)
1169	ret
1170
1171ENTRY(fubyte)
1172	movl	_curpcb,%ecx
1173	movl	$fusufault,PCB_ONFAULT(%ecx)
1174	movl	4(%esp),%edx
1175
1176	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1177	ja	fusufault
1178
1179	movzbl	(%edx),%eax
1180	movl	$0,PCB_ONFAULT(%ecx)
1181	ret
1182
1183	ALIGN_TEXT
1184fusufault:
1185	movl	_curpcb,%ecx
1186	xorl	%eax,%eax
1187	movl	%eax,PCB_ONFAULT(%ecx)
1188	decl	%eax
1189	ret
1190
1191/*
1192 * su{byte,sword,word}: write a byte (word, longword) to user memory
1193 */
1194ENTRY(suword)
1195	movl	_curpcb,%ecx
1196	movl	$fusufault,PCB_ONFAULT(%ecx)
1197	movl	4(%esp),%edx
1198
1199#if defined(I386_CPU)
1200
1201#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1202	cmpl	$CPUCLASS_386,_cpu_class
1203	jne	2f				/* we only have to set the right segment selector */
1204#endif /* I486_CPU || I586_CPU || I686_CPU */
1205
1206	/* XXX - page boundary crossing is still not handled */
1207	movl	%edx,%eax
1208	shrl	$IDXSHIFT,%edx
1209	andb	$0xfc,%dl
1210
1211	leal	_PTmap(%edx),%ecx
1212	shrl	$IDXSHIFT,%ecx
1213	andb	$0xfc,%cl
1214	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1215	je	4f
1216	movb	_PTmap(%edx),%dl
1217	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1218	cmpb	$PG_V|PG_RW|PG_U,%dl
1219	je	1f
1220
12214:
1222	/* simulate a trap */
1223	pushl	%eax
1224	call	_trapwrite
1225	popl	%edx				/* remove junk parameter from stack */
1226	testl	%eax,%eax
1227	jnz	fusufault
12281:
1229	movl	4(%esp),%edx
1230#endif
1231
12322:
1233	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1234	ja	fusufault
1235
1236	movl	8(%esp),%eax
1237	movl	%eax,(%edx)
1238	xorl	%eax,%eax
1239	movl	_curpcb,%ecx
1240	movl	%eax,PCB_ONFAULT(%ecx)
1241	ret
1242
1243ENTRY(susword)
1244	movl	_curpcb,%ecx
1245	movl	$fusufault,PCB_ONFAULT(%ecx)
1246	movl	4(%esp),%edx
1247
1248#if defined(I386_CPU)
1249
1250#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1251	cmpl	$CPUCLASS_386,_cpu_class
1252	jne	2f
1253#endif /* I486_CPU || I586_CPU || I686_CPU */
1254
1255	/* XXX - page boundary crossing is still not handled */
1256	movl	%edx,%eax
1257	shrl	$IDXSHIFT,%edx
1258	andb	$0xfc,%dl
1259
1260	leal	_PTmap(%edx),%ecx
1261	shrl	$IDXSHIFT,%ecx
1262	andb	$0xfc,%cl
1263	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1264	je	4f
1265	movb	_PTmap(%edx),%dl
1266	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1267	cmpb	$PG_V|PG_RW|PG_U,%dl
1268	je	1f
1269
12704:
1271	/* simulate a trap */
1272	pushl	%eax
1273	call	_trapwrite
1274	popl	%edx				/* remove junk parameter from stack */
1275	testl	%eax,%eax
1276	jnz	fusufault
12771:
1278	movl	4(%esp),%edx
1279#endif
1280
12812:
1282	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1283	ja	fusufault
1284
1285	movw	8(%esp),%ax
1286	movw	%ax,(%edx)
1287	xorl	%eax,%eax
1288	movl	_curpcb,%ecx			/* restore trashed register */
1289	movl	%eax,PCB_ONFAULT(%ecx)
1290	ret
1291
1292ALTENTRY(suibyte)
1293ENTRY(subyte)
1294	movl	_curpcb,%ecx
1295	movl	$fusufault,PCB_ONFAULT(%ecx)
1296	movl	4(%esp),%edx
1297
1298#if defined(I386_CPU)
1299
1300#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1301	cmpl	$CPUCLASS_386,_cpu_class
1302	jne	2f
1303#endif /* I486_CPU || I586_CPU || I686_CPU */
1304
1305	movl	%edx,%eax
1306	shrl	$IDXSHIFT,%edx
1307	andb	$0xfc,%dl
1308
1309	leal	_PTmap(%edx),%ecx
1310	shrl	$IDXSHIFT,%ecx
1311	andb	$0xfc,%cl
1312	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1313	je	4f
1314	movb	_PTmap(%edx),%dl
1315	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1316	cmpb	$PG_V|PG_RW|PG_U,%dl
1317	je	1f
1318
13194:
1320	/* simulate a trap */
1321	pushl	%eax
1322	call	_trapwrite
1323	popl	%edx				/* remove junk parameter from stack */
1324	testl	%eax,%eax
1325	jnz	fusufault
13261:
1327	movl	4(%esp),%edx
1328#endif
1329
13302:
1331	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1332	ja	fusufault
1333
1334	movb	8(%esp),%al
1335	movb	%al,(%edx)
1336	xorl	%eax,%eax
1337	movl	_curpcb,%ecx			/* restore trashed register */
1338	movl	%eax,PCB_ONFAULT(%ecx)
1339	ret
1340
1341/*
1342 * copyinstr(from, to, maxlen, int *lencopied)
1343 *	copy a string from from to to, stop when a 0 character is reached.
1344 *	return ENAMETOOLONG if string is longer than maxlen, and
1345 *	EFAULT on protection violations. If lencopied is non-zero,
1346 *	return the actual length in *lencopied.
1347 */
1348ENTRY(copyinstr)
1349	pushl	%esi
1350	pushl	%edi
1351	movl	_curpcb,%ecx
1352	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1353
1354	movl	12(%esp),%esi			/* %esi = from */
1355	movl	16(%esp),%edi			/* %edi = to */
1356	movl	20(%esp),%edx			/* %edx = maxlen */
1357
1358	movl	$VM_MAXUSER_ADDRESS,%eax
1359
1360	/* make sure 'from' is within bounds */
1361	subl	%esi,%eax
1362	jbe	cpystrflt
1363
1364	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1365	cmpl	%edx,%eax
1366	jae	1f
1367	movl	%eax,%edx
1368	movl	%eax,20(%esp)
13691:
1370	incl	%edx
1371	cld
1372
13732:
1374	decl	%edx
1375	jz	3f
1376
1377	lodsb
1378	stosb
1379	orb	%al,%al
1380	jnz	2b
1381
1382	/* Success -- 0 byte reached */
1383	decl	%edx
1384	xorl	%eax,%eax
1385	jmp	cpystrflt_x
13863:
1387	/* edx is zero - return ENAMETOOLONG or EFAULT */
1388	cmpl	$VM_MAXUSER_ADDRESS,%esi
1389	jae	cpystrflt
13904:
1391	movl	$ENAMETOOLONG,%eax
1392	jmp	cpystrflt_x
1393
1394cpystrflt:
1395	movl	$EFAULT,%eax
1396
1397cpystrflt_x:
1398	/* set *lencopied and return %eax */
1399	movl	_curpcb,%ecx
1400	movl	$0,PCB_ONFAULT(%ecx)
1401	movl	20(%esp),%ecx
1402	subl	%edx,%ecx
1403	movl	24(%esp),%edx
1404	testl	%edx,%edx
1405	jz	1f
1406	movl	%ecx,(%edx)
14071:
1408	popl	%edi
1409	popl	%esi
1410	ret
1411
1412
1413/*
1414 * copystr(from, to, maxlen, int *lencopied)
1415 */
1416ENTRY(copystr)
1417	pushl	%esi
1418	pushl	%edi
1419
1420	movl	12(%esp),%esi			/* %esi = from */
1421	movl	16(%esp),%edi			/* %edi = to */
1422	movl	20(%esp),%edx			/* %edx = maxlen */
1423	incl	%edx
1424	cld
14251:
1426	decl	%edx
1427	jz	4f
1428	lodsb
1429	stosb
1430	orb	%al,%al
1431	jnz	1b
1432
1433	/* Success -- 0 byte reached */
1434	decl	%edx
1435	xorl	%eax,%eax
1436	jmp	6f
14374:
1438	/* edx is zero -- return ENAMETOOLONG */
1439	movl	$ENAMETOOLONG,%eax
1440
14416:
1442	/* set *lencopied and return %eax */
1443	movl	20(%esp),%ecx
1444	subl	%edx,%ecx
1445	movl	24(%esp),%edx
1446	testl	%edx,%edx
1447	jz	7f
1448	movl	%ecx,(%edx)
14497:
1450	popl	%edi
1451	popl	%esi
1452	ret
1453
1454ENTRY(bcmp)
1455	pushl	%edi
1456	pushl	%esi
1457	movl	12(%esp),%edi
1458	movl	16(%esp),%esi
1459	movl	20(%esp),%edx
1460	xorl	%eax,%eax
1461
1462	movl	%edx,%ecx
1463	shrl	$2,%ecx
1464	cld					/* compare forwards */
1465	repe
1466	cmpsl
1467	jne	1f
1468
1469	movl	%edx,%ecx
1470	andl	$3,%ecx
1471	repe
1472	cmpsb
1473	je	2f
14741:
1475	incl	%eax
14762:
1477	popl	%esi
1478	popl	%edi
1479	ret
1480
1481
1482/*
1483 * Handling of special 386 registers and descriptor tables etc
1484 */
1485/* void lgdt(struct region_descriptor *rdp); */
1486ENTRY(lgdt)
1487	/* reload the descriptor table */
1488	movl	4(%esp),%eax
1489	lgdt	(%eax)
1490
1491	/* flush the prefetch q */
1492	jmp	1f
1493	nop
14941:
1495	/* reload "stale" selectors */
1496	movl	$KDSEL,%eax
1497	movl	%ax,%ds
1498	movl	%ax,%es
1499	movl	%ax,%fs
1500	movl	%ax,%gs
1501	movl	%ax,%ss
1502
1503	/* reload code selector by turning return into intersegmental return */
1504	movl	(%esp),%eax
1505	pushl	%eax
1506	movl	$KCSEL,4(%esp)
1507	lret
1508
1509/*
1510 * void lidt(struct region_descriptor *rdp);
1511 */
1512ENTRY(lidt)
1513	movl	4(%esp),%eax
1514	lidt	(%eax)
1515	ret
1516
1517/*
1518 * void lldt(u_short sel)
1519 */
1520ENTRY(lldt)
1521	lldt	4(%esp)
1522	ret
1523
1524/*
1525 * void ltr(u_short sel)
1526 */
1527ENTRY(ltr)
1528	ltr	4(%esp)
1529	ret
1530
1531/* ssdtosd(*ssdp,*sdp) */
1532ENTRY(ssdtosd)
1533	pushl	%ebx
1534	movl	8(%esp),%ecx
1535	movl	8(%ecx),%ebx
1536	shll	$16,%ebx
1537	movl	(%ecx),%edx
1538	roll	$16,%edx
1539	movb	%dh,%bl
1540	movb	%dl,%bh
1541	rorl	$8,%ebx
1542	movl	4(%ecx),%eax
1543	movw	%ax,%dx
1544	andl	$0xf0000,%eax
1545	orl	%eax,%ebx
1546	movl	12(%esp),%ecx
1547	movl	%edx,(%ecx)
1548	movl	%ebx,4(%ecx)
1549	popl	%ebx
1550	ret
1551
1552/* load_cr0(cr0) */
1553ENTRY(load_cr0)
1554	movl	4(%esp),%eax
1555	movl	%eax,%cr0
1556	ret
1557
1558/* rcr0() */
1559ENTRY(rcr0)
1560	movl	%cr0,%eax
1561	ret
1562
1563/* rcr3() */
1564ENTRY(rcr3)
1565	movl	%cr3,%eax
1566	ret
1567
1568/* void load_cr3(caddr_t cr3) */
1569ENTRY(load_cr3)
1570#if defined(SWTCH_OPTIM_STATS)
1571	incl	_tlb_flush_count
1572#endif
1573	movl	4(%esp),%eax
1574	movl	%eax,%cr3
1575	ret
1576
1577/* rcr4() */
1578ENTRY(rcr4)
1579	movl	%cr4,%eax
1580	ret
1581
1582/* void load_cr4(caddr_t cr4) */
1583ENTRY(load_cr4)
1584	movl	4(%esp),%eax
1585	movl	%eax,%cr4
1586	ret
1587
1588/*****************************************************************************/
1589/* setjump, longjump                                                         */
1590/*****************************************************************************/
1591
1592ENTRY(setjmp)
1593	movl	4(%esp),%eax
1594	movl	%ebx,(%eax)			/* save ebx */
1595	movl	%esp,4(%eax)			/* save esp */
1596	movl	%ebp,8(%eax)			/* save ebp */
1597	movl	%esi,12(%eax)			/* save esi */
1598	movl	%edi,16(%eax)			/* save edi */
1599	movl	(%esp),%edx			/* get rta */
1600	movl	%edx,20(%eax)			/* save eip */
1601	xorl	%eax,%eax			/* return(0); */
1602	ret
1603
1604ENTRY(longjmp)
1605	movl	4(%esp),%eax
1606	movl	(%eax),%ebx			/* restore ebx */
1607	movl	4(%eax),%esp			/* restore esp */
1608	movl	8(%eax),%ebp			/* restore ebp */
1609	movl	12(%eax),%esi			/* restore esi */
1610	movl	16(%eax),%edi			/* restore edi */
1611	movl	20(%eax),%edx			/* get rta */
1612	movl	%edx,(%esp)			/* put in return frame */
1613	xorl	%eax,%eax			/* return(1); */
1614	incl	%eax
1615	ret
1616
1617/*
1618 * Here for doing BB-profiling (gcc -a).
1619 * We rely on the "bbset" instead, but need a dummy function.
1620 */
1621NON_GPROF_ENTRY(__bb_init_func)
1622	movl	4(%esp),%eax
1623	movl	$1,(%eax)
1624	.byte	0xc3				/* avoid macro for `ret' */
1625