support.s revision 51984
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD: head/sys/i386/i386/support.s 51984 1999-10-07 12:40:34Z marcel $
34 */
35
36#include "opt_smp.h"
37#include "npx.h"
38
39#include <machine/asmacros.h>
40#include <machine/cputypes.h>
41#include <machine/pmap.h>
42#include <machine/specialreg.h>
43
44#include "assym.s"
45
46#define IDXSHIFT	10
47
48	.data
49	.globl	_bcopy_vector
50_bcopy_vector:
51	.long	_generic_bcopy
52	.globl	_bzero
53_bzero:
54	.long	_generic_bzero
55	.globl	_copyin_vector
56_copyin_vector:
57	.long	_generic_copyin
58	.globl	_copyout_vector
59_copyout_vector:
60	.long	_generic_copyout
61	.globl	_ovbcopy_vector
62_ovbcopy_vector:
63	.long	_generic_bcopy
64#if defined(I586_CPU) && NNPX > 0
65kernel_fpu_lock:
66	.byte	0xfe
67	.space	3
68#endif
69
70	.text
71
72/*
73 * bcopy family
74 * void bzero(void *buf, u_int len)
75 */
76
77ENTRY(generic_bzero)
78	pushl	%edi
79	movl	8(%esp),%edi
80	movl	12(%esp),%ecx
81	xorl	%eax,%eax
82	shrl	$2,%ecx
83	cld
84	rep
85	stosl
86	movl	12(%esp),%ecx
87	andl	$3,%ecx
88	rep
89	stosb
90	popl	%edi
91	ret
92
93#if defined(I486_CPU)
94ENTRY(i486_bzero)
95	movl	4(%esp),%edx
96	movl	8(%esp),%ecx
97	xorl	%eax,%eax
98/*
99 * do 64 byte chunks first
100 *
101 * XXX this is probably over-unrolled at least for DX2's
102 */
1032:
104	cmpl	$64,%ecx
105	jb	3f
106	movl	%eax,(%edx)
107	movl	%eax,4(%edx)
108	movl	%eax,8(%edx)
109	movl	%eax,12(%edx)
110	movl	%eax,16(%edx)
111	movl	%eax,20(%edx)
112	movl	%eax,24(%edx)
113	movl	%eax,28(%edx)
114	movl	%eax,32(%edx)
115	movl	%eax,36(%edx)
116	movl	%eax,40(%edx)
117	movl	%eax,44(%edx)
118	movl	%eax,48(%edx)
119	movl	%eax,52(%edx)
120	movl	%eax,56(%edx)
121	movl	%eax,60(%edx)
122	addl	$64,%edx
123	subl	$64,%ecx
124	jnz	2b
125	ret
126
127/*
128 * do 16 byte chunks
129 */
130	SUPERALIGN_TEXT
1313:
132	cmpl	$16,%ecx
133	jb	4f
134	movl	%eax,(%edx)
135	movl	%eax,4(%edx)
136	movl	%eax,8(%edx)
137	movl	%eax,12(%edx)
138	addl	$16,%edx
139	subl	$16,%ecx
140	jnz	3b
141	ret
142
143/*
144 * do 4 byte chunks
145 */
146	SUPERALIGN_TEXT
1474:
148	cmpl	$4,%ecx
149	jb	5f
150	movl	%eax,(%edx)
151	addl	$4,%edx
152	subl	$4,%ecx
153	jnz	4b
154	ret
155
156/*
157 * do 1 byte chunks
158 * a jump table seems to be faster than a loop or more range reductions
159 *
160 * XXX need a const section for non-text
161 */
162	.data
163jtab:
164	.long	do0
165	.long	do1
166	.long	do2
167	.long	do3
168
169	.text
170	SUPERALIGN_TEXT
1715:
172	jmp	jtab(,%ecx,4)
173
174	SUPERALIGN_TEXT
175do3:
176	movw	%ax,(%edx)
177	movb	%al,2(%edx)
178	ret
179
180	SUPERALIGN_TEXT
181do2:
182	movw	%ax,(%edx)
183	ret
184
185	SUPERALIGN_TEXT
186do1:
187	movb	%al,(%edx)
188	ret
189
190	SUPERALIGN_TEXT
191do0:
192	ret
193#endif
194
195#if defined(I586_CPU) && NNPX > 0
196ENTRY(i586_bzero)
197	movl	4(%esp),%edx
198	movl	8(%esp),%ecx
199
200	/*
201	 * The FPU register method is twice as fast as the integer register
202	 * method unless the target is in the L1 cache and we pre-allocate a
203	 * cache line for it (then the integer register method is 4-5 times
204	 * faster).  However, we never pre-allocate cache lines, since that
205	 * would make the integer method 25% or more slower for the common
206	 * case when the target isn't in either the L1 cache or the L2 cache.
207	 * Thus we normally use the FPU register method unless the overhead
208	 * would be too large.
209	 */
210	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
211	jb	intreg_i586_bzero
212
213	/*
214	 * The FPU registers may belong to an application or to fastmove()
215	 * or to another invocation of bcopy() or ourself in a higher level
216	 * interrupt or trap handler.  Preserving the registers is
217	 * complicated since we avoid it if possible at all levels.  We
218	 * want to localize the complications even when that increases them.
219	 * Here the extra work involves preserving CR0_TS in TS.
220	 * `npxproc != NULL' is supposed to be the condition that all the
221	 * FPU resources belong to an application, but npxproc and CR0_TS
222	 * aren't set atomically enough for this condition to work in
223	 * interrupt handlers.
224	 *
225	 * Case 1: FPU registers belong to the application: we must preserve
226	 * the registers if we use them, so we only use the FPU register
227	 * method if the target size is large enough to amortize the extra
228	 * overhead for preserving them.  CR0_TS must be preserved although
229	 * it is very likely to end up as set.
230	 *
231	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
232	 * makes the registers look like they belong to an application so
233	 * that cpu_switch() and savectx() don't have to know about it, so
234	 * this case reduces to case 1.
235	 *
236	 * Case 3: FPU registers belong to the kernel: don't use the FPU
237	 * register method.  This case is unlikely, and supporting it would
238	 * be more complicated and might take too much stack.
239	 *
240	 * Case 4: FPU registers don't belong to anyone: the FPU registers
241	 * don't need to be preserved, so we always use the FPU register
242	 * method.  CR0_TS must be preserved although it is very likely to
243	 * always end up as clear.
244	 */
245	cmpl	$0,_npxproc
246	je	i586_bz1
247	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
248	jb	intreg_i586_bzero
249	sarb	$1,kernel_fpu_lock
250	jc	intreg_i586_bzero
251	smsw	%ax
252	clts
253	subl	$108,%esp
254	fnsave	0(%esp)
255	jmp	i586_bz2
256
257i586_bz1:
258	sarb	$1,kernel_fpu_lock
259	jc	intreg_i586_bzero
260	smsw	%ax
261	clts
262	fninit				/* XXX should avoid needing this */
263i586_bz2:
264	fldz
265
266	/*
267	 * Align to an 8 byte boundary (misalignment in the main loop would
268	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
269	 * already aligned) by always zeroing 8 bytes and using the part up
270	 * to the _next_ alignment position.
271	 */
272	fstl	0(%edx)
273	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
274	addl	$8,%edx
275	andl	$~7,%edx
276	subl	%edx,%ecx
277
278	/*
279	 * Similarly align `len' to a multiple of 8.
280	 */
281	fstl	-8(%edx,%ecx)
282	decl	%ecx
283	andl	$~7,%ecx
284
285	/*
286	 * This wouldn't be any faster if it were unrolled, since the loop
287	 * control instructions are much faster than the fstl and/or done
288	 * in parallel with it so their overhead is insignificant.
289	 */
290fpureg_i586_bzero_loop:
291	fstl	0(%edx)
292	addl	$8,%edx
293	subl	$8,%ecx
294	cmpl	$8,%ecx
295	jae	fpureg_i586_bzero_loop
296
297	cmpl	$0,_npxproc
298	je	i586_bz3
299	frstor	0(%esp)
300	addl	$108,%esp
301	lmsw	%ax
302	movb	$0xfe,kernel_fpu_lock
303	ret
304
305i586_bz3:
306	fstpl	%st(0)
307	lmsw	%ax
308	movb	$0xfe,kernel_fpu_lock
309	ret
310
311intreg_i586_bzero:
312	/*
313	 * `rep stos' seems to be the best method in practice for small
314	 * counts.  Fancy methods usually take too long to start up due
315	 * to cache and BTB misses.
316	 */
317	pushl	%edi
318	movl	%edx,%edi
319	xorl	%eax,%eax
320	shrl	$2,%ecx
321	cld
322	rep
323	stosl
324	movl	12(%esp),%ecx
325	andl	$3,%ecx
326	jne	1f
327	popl	%edi
328	ret
329
3301:
331	rep
332	stosb
333	popl	%edi
334	ret
335#endif /* I586_CPU && NNPX > 0 */
336
337ENTRY(i686_pagezero)
338	pushl	%edi
339	pushl	%ebx
340
341	movl	12(%esp), %edi
342	movl	$1024, %ecx
343	cld
344
345	ALIGN_TEXT
3461:
347	xorl	%eax, %eax
348	repe
349	scasl
350	jnz	2f
351
352	popl	%ebx
353	popl	%edi
354	ret
355
356	ALIGN_TEXT
357
3582:
359	incl	%ecx
360	subl	$4, %edi
361
362	movl	%ecx, %edx
363	cmpl	$16, %ecx
364
365	jge	3f
366
367	movl	%edi, %ebx
368	andl	$0x3f, %ebx
369	shrl	%ebx
370	shrl	%ebx
371	movl	$16, %ecx
372	subl	%ebx, %ecx
373
3743:
375	subl	%ecx, %edx
376	rep
377	stosl
378
379	movl	%edx, %ecx
380	testl	%edx, %edx
381	jnz	1b
382
383	popl	%ebx
384	popl	%edi
385	ret
386
387/* fillw(pat, base, cnt) */
388ENTRY(fillw)
389	pushl	%edi
390	movl	8(%esp),%eax
391	movl	12(%esp),%edi
392	movl	16(%esp),%ecx
393	cld
394	rep
395	stosw
396	popl	%edi
397	ret
398
399ENTRY(bcopyb)
400	pushl	%esi
401	pushl	%edi
402	movl	12(%esp),%esi
403	movl	16(%esp),%edi
404	movl	20(%esp),%ecx
405	movl	%edi,%eax
406	subl	%esi,%eax
407	cmpl	%ecx,%eax			/* overlapping && src < dst? */
408	jb	1f
409	cld					/* nope, copy forwards */
410	rep
411	movsb
412	popl	%edi
413	popl	%esi
414	ret
415
416	ALIGN_TEXT
4171:
418	addl	%ecx,%edi			/* copy backwards. */
419	addl	%ecx,%esi
420	decl	%edi
421	decl	%esi
422	std
423	rep
424	movsb
425	popl	%edi
426	popl	%esi
427	cld
428	ret
429
430ENTRY(bcopy)
431	MEXITCOUNT
432	jmp	*_bcopy_vector
433
434ENTRY(ovbcopy)
435	MEXITCOUNT
436	jmp	*_ovbcopy_vector
437
438/*
439 * generic_bcopy(src, dst, cnt)
440 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
441 */
442ENTRY(generic_bcopy)
443	pushl	%esi
444	pushl	%edi
445	movl	12(%esp),%esi
446	movl	16(%esp),%edi
447	movl	20(%esp),%ecx
448
449	movl	%edi,%eax
450	subl	%esi,%eax
451	cmpl	%ecx,%eax			/* overlapping && src < dst? */
452	jb	1f
453
454	shrl	$2,%ecx				/* copy by 32-bit words */
455	cld					/* nope, copy forwards */
456	rep
457	movsl
458	movl	20(%esp),%ecx
459	andl	$3,%ecx				/* any bytes left? */
460	rep
461	movsb
462	popl	%edi
463	popl	%esi
464	ret
465
466	ALIGN_TEXT
4671:
468	addl	%ecx,%edi			/* copy backwards */
469	addl	%ecx,%esi
470	decl	%edi
471	decl	%esi
472	andl	$3,%ecx				/* any fractional bytes? */
473	std
474	rep
475	movsb
476	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
477	shrl	$2,%ecx
478	subl	$3,%esi
479	subl	$3,%edi
480	rep
481	movsl
482	popl	%edi
483	popl	%esi
484	cld
485	ret
486
487#if defined(I586_CPU) && NNPX > 0
488ENTRY(i586_bcopy)
489	pushl	%esi
490	pushl	%edi
491	movl	12(%esp),%esi
492	movl	16(%esp),%edi
493	movl	20(%esp),%ecx
494
495	movl	%edi,%eax
496	subl	%esi,%eax
497	cmpl	%ecx,%eax			/* overlapping && src < dst? */
498	jb	1f
499
500	cmpl	$1024,%ecx
501	jb	small_i586_bcopy
502
503	sarb	$1,kernel_fpu_lock
504	jc	small_i586_bcopy
505	cmpl	$0,_npxproc
506	je	i586_bc1
507	smsw	%dx
508	clts
509	subl	$108,%esp
510	fnsave	0(%esp)
511	jmp	4f
512
513i586_bc1:
514	smsw	%dx
515	clts
516	fninit				/* XXX should avoid needing this */
517
518	ALIGN_TEXT
5194:
520	pushl	%ecx
521#define	DCACHE_SIZE	8192
522	cmpl	$(DCACHE_SIZE-512)/2,%ecx
523	jbe	2f
524	movl	$(DCACHE_SIZE-512)/2,%ecx
5252:
526	subl	%ecx,0(%esp)
527	cmpl	$256,%ecx
528	jb	5f			/* XXX should prefetch if %ecx >= 32 */
529	pushl	%esi
530	pushl	%ecx
531	ALIGN_TEXT
5323:
533	movl	0(%esi),%eax
534	movl	32(%esi),%eax
535	movl	64(%esi),%eax
536	movl	96(%esi),%eax
537	movl	128(%esi),%eax
538	movl	160(%esi),%eax
539	movl	192(%esi),%eax
540	movl	224(%esi),%eax
541	addl	$256,%esi
542	subl	$256,%ecx
543	cmpl	$256,%ecx
544	jae	3b
545	popl	%ecx
546	popl	%esi
5475:
548	ALIGN_TEXT
549large_i586_bcopy_loop:
550	fildq	0(%esi)
551	fildq	8(%esi)
552	fildq	16(%esi)
553	fildq	24(%esi)
554	fildq	32(%esi)
555	fildq	40(%esi)
556	fildq	48(%esi)
557	fildq	56(%esi)
558	fistpq	56(%edi)
559	fistpq	48(%edi)
560	fistpq	40(%edi)
561	fistpq	32(%edi)
562	fistpq	24(%edi)
563	fistpq	16(%edi)
564	fistpq	8(%edi)
565	fistpq	0(%edi)
566	addl	$64,%esi
567	addl	$64,%edi
568	subl	$64,%ecx
569	cmpl	$64,%ecx
570	jae	large_i586_bcopy_loop
571	popl	%eax
572	addl	%eax,%ecx
573	cmpl	$64,%ecx
574	jae	4b
575
576	cmpl	$0,_npxproc
577	je	i586_bc2
578	frstor	0(%esp)
579	addl	$108,%esp
580i586_bc2:
581	lmsw	%dx
582	movb	$0xfe,kernel_fpu_lock
583
584/*
585 * This is a duplicate of the main part of generic_bcopy.  See the comments
586 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
587 * would mess up high resolution profiling.
588 */
589	ALIGN_TEXT
590small_i586_bcopy:
591	shrl	$2,%ecx
592	cld
593	rep
594	movsl
595	movl	20(%esp),%ecx
596	andl	$3,%ecx
597	rep
598	movsb
599	popl	%edi
600	popl	%esi
601	ret
602
603	ALIGN_TEXT
6041:
605	addl	%ecx,%edi
606	addl	%ecx,%esi
607	decl	%edi
608	decl	%esi
609	andl	$3,%ecx
610	std
611	rep
612	movsb
613	movl	20(%esp),%ecx
614	shrl	$2,%ecx
615	subl	$3,%esi
616	subl	$3,%edi
617	rep
618	movsl
619	popl	%edi
620	popl	%esi
621	cld
622	ret
623#endif /* I586_CPU && NNPX > 0 */
624
625/*
626 * Note: memcpy does not support overlapping copies
627 */
628ENTRY(memcpy)
629	pushl	%edi
630	pushl	%esi
631	movl	12(%esp),%edi
632	movl	16(%esp),%esi
633	movl	20(%esp),%ecx
634	movl	%edi,%eax
635	shrl	$2,%ecx				/* copy by 32-bit words */
636	cld					/* nope, copy forwards */
637	rep
638	movsl
639	movl	20(%esp),%ecx
640	andl	$3,%ecx				/* any bytes left? */
641	rep
642	movsb
643	popl	%esi
644	popl	%edi
645	ret
646
647
648/*****************************************************************************/
649/* copyout and fubyte family                                                 */
650/*****************************************************************************/
651/*
652 * Access user memory from inside the kernel. These routines and possibly
653 * the math- and DOS emulators should be the only places that do this.
654 *
655 * We have to access the memory with user's permissions, so use a segment
656 * selector with RPL 3. For writes to user space we have to additionally
657 * check the PTE for write permission, because the 386 does not check
658 * write permissions when we are executing with EPL 0. The 486 does check
659 * this if the WP bit is set in CR0, so we can use a simpler version here.
660 *
661 * These routines set curpcb->onfault for the time they execute. When a
662 * protection violation occurs inside the functions, the trap handler
663 * returns to *curpcb->onfault instead of the function.
664 */
665
666/* copyout(from_kernel, to_user, len) */
667ENTRY(copyout)
668	MEXITCOUNT
669	jmp	*_copyout_vector
670
671ENTRY(generic_copyout)
672	movl	_curpcb,%eax
673	movl	$copyout_fault,PCB_ONFAULT(%eax)
674	pushl	%esi
675	pushl	%edi
676	pushl	%ebx
677	movl	16(%esp),%esi
678	movl	20(%esp),%edi
679	movl	24(%esp),%ebx
680	testl	%ebx,%ebx			/* anything to do? */
681	jz	done_copyout
682
683	/*
684	 * Check explicitly for non-user addresses.  If 486 write protection
685	 * is being used, this check is essential because we are in kernel
686	 * mode so the h/w does not provide any protection against writing
687	 * kernel addresses.
688	 */
689
690	/*
691	 * First, prevent address wrapping.
692	 */
693	movl	%edi,%eax
694	addl	%ebx,%eax
695	jc	copyout_fault
696/*
697 * XXX STOP USING VM_MAXUSER_ADDRESS.
698 * It is an end address, not a max, so every time it is used correctly it
699 * looks like there is an off by one error, and of course it caused an off
700 * by one error in several places.
701 */
702	cmpl	$VM_MAXUSER_ADDRESS,%eax
703	ja	copyout_fault
704
705#if defined(I386_CPU)
706
707#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
708	cmpl	$CPUCLASS_386,_cpu_class
709	jne	3f
710#endif
711/*
712 * We have to check each PTE for user write permission.
713 * The checking may cause a page fault, so it is important to set
714 * up everything for return via copyout_fault before here.
715 */
716	/* compute number of pages */
717	movl	%edi,%ecx
718	andl	$PAGE_MASK,%ecx
719	addl	%ebx,%ecx
720	decl	%ecx
721	shrl	$IDXSHIFT+2,%ecx
722	incl	%ecx
723
724	/* compute PTE offset for start address */
725	movl	%edi,%edx
726	shrl	$IDXSHIFT,%edx
727	andb	$0xfc,%dl
728
7291:
730	/* check PTE for each page */
731	leal	_PTmap(%edx),%eax
732	shrl	$IDXSHIFT,%eax
733	andb	$0xfc,%al
734	testb	$PG_V,_PTmap(%eax)		/* PTE page must be valid */
735	je	4f
736	movb	_PTmap(%edx),%al
737	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
738	cmpb	$PG_V|PG_RW|PG_U,%al
739	je	2f
740
7414:
742	/* simulate a trap */
743	pushl	%edx
744	pushl	%ecx
745	shll	$IDXSHIFT,%edx
746	pushl	%edx
747	call	_trapwrite			/* trapwrite(addr) */
748	popl	%edx
749	popl	%ecx
750	popl	%edx
751
752	testl	%eax,%eax			/* if not ok, return EFAULT */
753	jnz	copyout_fault
754
7552:
756	addl	$4,%edx
757	decl	%ecx
758	jnz	1b				/* check next page */
759#endif /* I386_CPU */
760
761	/* bcopy(%esi, %edi, %ebx) */
7623:
763	movl	%ebx,%ecx
764
765#if defined(I586_CPU) && NNPX > 0
766	ALIGN_TEXT
767slow_copyout:
768#endif
769	shrl	$2,%ecx
770	cld
771	rep
772	movsl
773	movb	%bl,%cl
774	andb	$3,%cl
775	rep
776	movsb
777
778done_copyout:
779	popl	%ebx
780	popl	%edi
781	popl	%esi
782	xorl	%eax,%eax
783	movl	_curpcb,%edx
784	movl	%eax,PCB_ONFAULT(%edx)
785	ret
786
787	ALIGN_TEXT
788copyout_fault:
789	popl	%ebx
790	popl	%edi
791	popl	%esi
792	movl	_curpcb,%edx
793	movl	$0,PCB_ONFAULT(%edx)
794	movl	$EFAULT,%eax
795	ret
796
797#if defined(I586_CPU) && NNPX > 0
798ENTRY(i586_copyout)
799	/*
800	 * Duplicated from generic_copyout.  Could be done a bit better.
801	 */
802	movl	_curpcb,%eax
803	movl	$copyout_fault,PCB_ONFAULT(%eax)
804	pushl	%esi
805	pushl	%edi
806	pushl	%ebx
807	movl	16(%esp),%esi
808	movl	20(%esp),%edi
809	movl	24(%esp),%ebx
810	testl	%ebx,%ebx			/* anything to do? */
811	jz	done_copyout
812
813	/*
814	 * Check explicitly for non-user addresses.  If 486 write protection
815	 * is being used, this check is essential because we are in kernel
816	 * mode so the h/w does not provide any protection against writing
817	 * kernel addresses.
818	 */
819
820	/*
821	 * First, prevent address wrapping.
822	 */
823	movl	%edi,%eax
824	addl	%ebx,%eax
825	jc	copyout_fault
826/*
827 * XXX STOP USING VM_MAXUSER_ADDRESS.
828 * It is an end address, not a max, so every time it is used correctly it
829 * looks like there is an off by one error, and of course it caused an off
830 * by one error in several places.
831 */
832	cmpl	$VM_MAXUSER_ADDRESS,%eax
833	ja	copyout_fault
834
835	/* bcopy(%esi, %edi, %ebx) */
8363:
837	movl	%ebx,%ecx
838	/*
839	 * End of duplicated code.
840	 */
841
842	cmpl	$1024,%ecx
843	jb	slow_copyout
844
845	pushl	%ecx
846	call	_fastmove
847	addl	$4,%esp
848	jmp	done_copyout
849#endif /* I586_CPU && NNPX > 0 */
850
851/* copyin(from_user, to_kernel, len) */
852ENTRY(copyin)
853	MEXITCOUNT
854	jmp	*_copyin_vector
855
856ENTRY(generic_copyin)
857	movl	_curpcb,%eax
858	movl	$copyin_fault,PCB_ONFAULT(%eax)
859	pushl	%esi
860	pushl	%edi
861	movl	12(%esp),%esi			/* caddr_t from */
862	movl	16(%esp),%edi			/* caddr_t to */
863	movl	20(%esp),%ecx			/* size_t  len */
864
865	/*
866	 * make sure address is valid
867	 */
868	movl	%esi,%edx
869	addl	%ecx,%edx
870	jc	copyin_fault
871	cmpl	$VM_MAXUSER_ADDRESS,%edx
872	ja	copyin_fault
873
874#if defined(I586_CPU) && NNPX > 0
875	ALIGN_TEXT
876slow_copyin:
877#endif
878	movb	%cl,%al
879	shrl	$2,%ecx				/* copy longword-wise */
880	cld
881	rep
882	movsl
883	movb	%al,%cl
884	andb	$3,%cl				/* copy remaining bytes */
885	rep
886	movsb
887
888#if defined(I586_CPU) && NNPX > 0
889	ALIGN_TEXT
890done_copyin:
891#endif
892	popl	%edi
893	popl	%esi
894	xorl	%eax,%eax
895	movl	_curpcb,%edx
896	movl	%eax,PCB_ONFAULT(%edx)
897	ret
898
899	ALIGN_TEXT
900copyin_fault:
901	popl	%edi
902	popl	%esi
903	movl	_curpcb,%edx
904	movl	$0,PCB_ONFAULT(%edx)
905	movl	$EFAULT,%eax
906	ret
907
908#if defined(I586_CPU) && NNPX > 0
909ENTRY(i586_copyin)
910	/*
911	 * Duplicated from generic_copyin.  Could be done a bit better.
912	 */
913	movl	_curpcb,%eax
914	movl	$copyin_fault,PCB_ONFAULT(%eax)
915	pushl	%esi
916	pushl	%edi
917	movl	12(%esp),%esi			/* caddr_t from */
918	movl	16(%esp),%edi			/* caddr_t to */
919	movl	20(%esp),%ecx			/* size_t  len */
920
921	/*
922	 * make sure address is valid
923	 */
924	movl	%esi,%edx
925	addl	%ecx,%edx
926	jc	copyin_fault
927	cmpl	$VM_MAXUSER_ADDRESS,%edx
928	ja	copyin_fault
929	/*
930	 * End of duplicated code.
931	 */
932
933	cmpl	$1024,%ecx
934	jb	slow_copyin
935
936	pushl	%ebx			/* XXX prepare for fastmove_fault */
937	pushl	%ecx
938	call	_fastmove
939	addl	$8,%esp
940	jmp	done_copyin
941#endif /* I586_CPU && NNPX > 0 */
942
943#if defined(I586_CPU) && NNPX > 0
944/* fastmove(src, dst, len)
945	src in %esi
946	dst in %edi
947	len in %ecx		XXX changed to on stack for profiling
948	uses %eax and %edx for tmp. storage
949 */
950/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
951ENTRY(fastmove)
952	pushl	%ebp
953	movl	%esp,%ebp
954	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
955
956	movl	8(%ebp),%ecx
957	cmpl	$63,%ecx
958	jbe	fastmove_tail
959
960	testl	$7,%esi	/* check if src addr is multiple of 8 */
961	jnz	fastmove_tail
962
963	testl	$7,%edi	/* check if dst addr is multiple of 8 */
964	jnz	fastmove_tail
965
966/* if (npxproc != NULL) { */
967	cmpl	$0,_npxproc
968	je	6f
969/*    fnsave(&curpcb->pcb_savefpu); */
970	movl	_curpcb,%eax
971	fnsave	PCB_SAVEFPU(%eax)
972/*   npxproc = NULL; */
973	movl	$0,_npxproc
974/* } */
9756:
976/* now we own the FPU. */
977
978/*
979 * The process' FP state is saved in the pcb, but if we get
980 * switched, the cpu_switch() will store our FP state in the
981 * pcb.  It should be possible to avoid all the copying for
982 * this, e.g., by setting a flag to tell cpu_switch() to
983 * save the state somewhere else.
984 */
985/* tmp = curpcb->pcb_savefpu; */
986	movl	%ecx,-12(%ebp)
987	movl	%esi,-8(%ebp)
988	movl	%edi,-4(%ebp)
989	movl	%esp,%edi
990	movl	_curpcb,%esi
991	addl	$PCB_SAVEFPU,%esi
992	cld
993	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
994	rep
995	movsl
996	movl	-12(%ebp),%ecx
997	movl	-8(%ebp),%esi
998	movl	-4(%ebp),%edi
999/* stop_emulating(); */
1000	clts
1001/* npxproc = curproc; */
1002	movl	_curproc,%eax
1003	movl	%eax,_npxproc
1004	movl	_curpcb,%eax
1005	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10064:
1007	movl	%ecx,-12(%ebp)
1008	cmpl	$1792,%ecx
1009	jbe	2f
1010	movl	$1792,%ecx
10112:
1012	subl	%ecx,-12(%ebp)
1013	cmpl	$256,%ecx
1014	jb	5f
1015	movl	%ecx,-8(%ebp)
1016	movl	%esi,-4(%ebp)
1017	ALIGN_TEXT
10183:
1019	movl	0(%esi),%eax
1020	movl	32(%esi),%eax
1021	movl	64(%esi),%eax
1022	movl	96(%esi),%eax
1023	movl	128(%esi),%eax
1024	movl	160(%esi),%eax
1025	movl	192(%esi),%eax
1026	movl	224(%esi),%eax
1027	addl	$256,%esi
1028	subl	$256,%ecx
1029	cmpl	$256,%ecx
1030	jae	3b
1031	movl	-8(%ebp),%ecx
1032	movl	-4(%ebp),%esi
10335:
1034	ALIGN_TEXT
1035fastmove_loop:
1036	fildq	0(%esi)
1037	fildq	8(%esi)
1038	fildq	16(%esi)
1039	fildq	24(%esi)
1040	fildq	32(%esi)
1041	fildq	40(%esi)
1042	fildq	48(%esi)
1043	fildq	56(%esi)
1044	fistpq	56(%edi)
1045	fistpq	48(%edi)
1046	fistpq	40(%edi)
1047	fistpq	32(%edi)
1048	fistpq	24(%edi)
1049	fistpq	16(%edi)
1050	fistpq	8(%edi)
1051	fistpq	0(%edi)
1052	addl	$-64,%ecx
1053	addl	$64,%esi
1054	addl	$64,%edi
1055	cmpl	$63,%ecx
1056	ja	fastmove_loop
1057	movl	-12(%ebp),%eax
1058	addl	%eax,%ecx
1059	cmpl	$64,%ecx
1060	jae	4b
1061
1062/* curpcb->pcb_savefpu = tmp; */
1063	movl	%ecx,-12(%ebp)
1064	movl	%esi,-8(%ebp)
1065	movl	%edi,-4(%ebp)
1066	movl	_curpcb,%edi
1067	addl	$PCB_SAVEFPU,%edi
1068	movl	%esp,%esi
1069	cld
1070	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1071	rep
1072	movsl
1073	movl	-12(%ebp),%ecx
1074	movl	-8(%ebp),%esi
1075	movl	-4(%ebp),%edi
1076
1077/* start_emulating(); */
1078	smsw	%ax
1079	orb	$CR0_TS,%al
1080	lmsw	%ax
1081/* npxproc = NULL; */
1082	movl	$0,_npxproc
1083
1084	ALIGN_TEXT
1085fastmove_tail:
1086	movl	_curpcb,%eax
1087	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1088
1089	movb	%cl,%al
1090	shrl	$2,%ecx				/* copy longword-wise */
1091	cld
1092	rep
1093	movsl
1094	movb	%al,%cl
1095	andb	$3,%cl				/* copy remaining bytes */
1096	rep
1097	movsb
1098
1099	movl	%ebp,%esp
1100	popl	%ebp
1101	ret
1102
1103	ALIGN_TEXT
1104fastmove_fault:
1105	movl	_curpcb,%edi
1106	addl	$PCB_SAVEFPU,%edi
1107	movl	%esp,%esi
1108	cld
1109	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1110	rep
1111	movsl
1112
1113	smsw	%ax
1114	orb	$CR0_TS,%al
1115	lmsw	%ax
1116	movl	$0,_npxproc
1117
1118fastmove_tail_fault:
1119	movl	%ebp,%esp
1120	popl	%ebp
1121	addl	$8,%esp
1122	popl	%ebx
1123	popl	%edi
1124	popl	%esi
1125	movl	_curpcb,%edx
1126	movl	$0,PCB_ONFAULT(%edx)
1127	movl	$EFAULT,%eax
1128	ret
1129#endif /* I586_CPU && NNPX > 0 */
1130
1131/*
1132 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory
1133 */
1134ENTRY(fuword)
1135	movl	_curpcb,%ecx
1136	movl	$fusufault,PCB_ONFAULT(%ecx)
1137	movl	4(%esp),%edx			/* from */
1138
1139	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1140	ja	fusufault
1141
1142	movl	(%edx),%eax
1143	movl	$0,PCB_ONFAULT(%ecx)
1144	ret
1145
1146/*
1147 * These two routines are called from the profiling code, potentially
1148 * at interrupt time. If they fail, that's okay, good things will
1149 * happen later. Fail all the time for now - until the trap code is
1150 * able to deal with this.
1151 */
1152ALTENTRY(suswintr)
1153ENTRY(fuswintr)
1154	movl	$-1,%eax
1155	ret
1156
1157ENTRY(fusword)
1158	movl	_curpcb,%ecx
1159	movl	$fusufault,PCB_ONFAULT(%ecx)
1160	movl	4(%esp),%edx
1161
1162	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1163	ja	fusufault
1164
1165	movzwl	(%edx),%eax
1166	movl	$0,PCB_ONFAULT(%ecx)
1167	ret
1168
1169ENTRY(fubyte)
1170	movl	_curpcb,%ecx
1171	movl	$fusufault,PCB_ONFAULT(%ecx)
1172	movl	4(%esp),%edx
1173
1174	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1175	ja	fusufault
1176
1177	movzbl	(%edx),%eax
1178	movl	$0,PCB_ONFAULT(%ecx)
1179	ret
1180
1181	ALIGN_TEXT
1182fusufault:
1183	movl	_curpcb,%ecx
1184	xorl	%eax,%eax
1185	movl	%eax,PCB_ONFAULT(%ecx)
1186	decl	%eax
1187	ret
1188
1189/*
1190 * su{byte,sword,word}: write a byte (word, longword) to user memory
1191 */
1192ENTRY(suword)
1193	movl	_curpcb,%ecx
1194	movl	$fusufault,PCB_ONFAULT(%ecx)
1195	movl	4(%esp),%edx
1196
1197#if defined(I386_CPU)
1198
1199#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1200	cmpl	$CPUCLASS_386,_cpu_class
1201	jne	2f				/* we only have to set the right segment selector */
1202#endif /* I486_CPU || I586_CPU || I686_CPU */
1203
1204	/* XXX - page boundary crossing is still not handled */
1205	movl	%edx,%eax
1206	shrl	$IDXSHIFT,%edx
1207	andb	$0xfc,%dl
1208
1209	leal	_PTmap(%edx),%ecx
1210	shrl	$IDXSHIFT,%ecx
1211	andb	$0xfc,%cl
1212	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1213	je	4f
1214	movb	_PTmap(%edx),%dl
1215	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1216	cmpb	$PG_V|PG_RW|PG_U,%dl
1217	je	1f
1218
12194:
1220	/* simulate a trap */
1221	pushl	%eax
1222	call	_trapwrite
1223	popl	%edx				/* remove junk parameter from stack */
1224	testl	%eax,%eax
1225	jnz	fusufault
12261:
1227	movl	4(%esp),%edx
1228#endif
1229
12302:
1231	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1232	ja	fusufault
1233
1234	movl	8(%esp),%eax
1235	movl	%eax,(%edx)
1236	xorl	%eax,%eax
1237	movl	_curpcb,%ecx
1238	movl	%eax,PCB_ONFAULT(%ecx)
1239	ret
1240
1241ENTRY(susword)
1242	movl	_curpcb,%ecx
1243	movl	$fusufault,PCB_ONFAULT(%ecx)
1244	movl	4(%esp),%edx
1245
1246#if defined(I386_CPU)
1247
1248#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1249	cmpl	$CPUCLASS_386,_cpu_class
1250	jne	2f
1251#endif /* I486_CPU || I586_CPU || I686_CPU */
1252
1253	/* XXX - page boundary crossing is still not handled */
1254	movl	%edx,%eax
1255	shrl	$IDXSHIFT,%edx
1256	andb	$0xfc,%dl
1257
1258	leal	_PTmap(%edx),%ecx
1259	shrl	$IDXSHIFT,%ecx
1260	andb	$0xfc,%cl
1261	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1262	je	4f
1263	movb	_PTmap(%edx),%dl
1264	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1265	cmpb	$PG_V|PG_RW|PG_U,%dl
1266	je	1f
1267
12684:
1269	/* simulate a trap */
1270	pushl	%eax
1271	call	_trapwrite
1272	popl	%edx				/* remove junk parameter from stack */
1273	testl	%eax,%eax
1274	jnz	fusufault
12751:
1276	movl	4(%esp),%edx
1277#endif
1278
12792:
1280	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1281	ja	fusufault
1282
1283	movw	8(%esp),%ax
1284	movw	%ax,(%edx)
1285	xorl	%eax,%eax
1286	movl	_curpcb,%ecx			/* restore trashed register */
1287	movl	%eax,PCB_ONFAULT(%ecx)
1288	ret
1289
1290ALTENTRY(suibyte)
1291ENTRY(subyte)
1292	movl	_curpcb,%ecx
1293	movl	$fusufault,PCB_ONFAULT(%ecx)
1294	movl	4(%esp),%edx
1295
1296#if defined(I386_CPU)
1297
1298#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1299	cmpl	$CPUCLASS_386,_cpu_class
1300	jne	2f
1301#endif /* I486_CPU || I586_CPU || I686_CPU */
1302
1303	movl	%edx,%eax
1304	shrl	$IDXSHIFT,%edx
1305	andb	$0xfc,%dl
1306
1307	leal	_PTmap(%edx),%ecx
1308	shrl	$IDXSHIFT,%ecx
1309	andb	$0xfc,%cl
1310	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1311	je	4f
1312	movb	_PTmap(%edx),%dl
1313	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1314	cmpb	$PG_V|PG_RW|PG_U,%dl
1315	je	1f
1316
13174:
1318	/* simulate a trap */
1319	pushl	%eax
1320	call	_trapwrite
1321	popl	%edx				/* remove junk parameter from stack */
1322	testl	%eax,%eax
1323	jnz	fusufault
13241:
1325	movl	4(%esp),%edx
1326#endif
1327
13282:
1329	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1330	ja	fusufault
1331
1332	movb	8(%esp),%al
1333	movb	%al,(%edx)
1334	xorl	%eax,%eax
1335	movl	_curpcb,%ecx			/* restore trashed register */
1336	movl	%eax,PCB_ONFAULT(%ecx)
1337	ret
1338
1339/*
1340 * copyinstr(from, to, maxlen, int *lencopied)
1341 *	copy a string from from to to, stop when a 0 character is reached.
1342 *	return ENAMETOOLONG if string is longer than maxlen, and
1343 *	EFAULT on protection violations. If lencopied is non-zero,
1344 *	return the actual length in *lencopied.
1345 */
1346ENTRY(copyinstr)
1347	pushl	%esi
1348	pushl	%edi
1349	movl	_curpcb,%ecx
1350	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1351
1352	movl	12(%esp),%esi			/* %esi = from */
1353	movl	16(%esp),%edi			/* %edi = to */
1354	movl	20(%esp),%edx			/* %edx = maxlen */
1355
1356	movl	$VM_MAXUSER_ADDRESS,%eax
1357
1358	/* make sure 'from' is within bounds */
1359	subl	%esi,%eax
1360	jbe	cpystrflt
1361
1362	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1363	cmpl	%edx,%eax
1364	jae	1f
1365	movl	%eax,%edx
1366	movl	%eax,20(%esp)
13671:
1368	incl	%edx
1369	cld
1370
13712:
1372	decl	%edx
1373	jz	3f
1374
1375	lodsb
1376	stosb
1377	orb	%al,%al
1378	jnz	2b
1379
1380	/* Success -- 0 byte reached */
1381	decl	%edx
1382	xorl	%eax,%eax
1383	jmp	cpystrflt_x
13843:
1385	/* edx is zero - return ENAMETOOLONG or EFAULT */
1386	cmpl	$VM_MAXUSER_ADDRESS,%esi
1387	jae	cpystrflt
13884:
1389	movl	$ENAMETOOLONG,%eax
1390	jmp	cpystrflt_x
1391
1392cpystrflt:
1393	movl	$EFAULT,%eax
1394
1395cpystrflt_x:
1396	/* set *lencopied and return %eax */
1397	movl	_curpcb,%ecx
1398	movl	$0,PCB_ONFAULT(%ecx)
1399	movl	20(%esp),%ecx
1400	subl	%edx,%ecx
1401	movl	24(%esp),%edx
1402	testl	%edx,%edx
1403	jz	1f
1404	movl	%ecx,(%edx)
14051:
1406	popl	%edi
1407	popl	%esi
1408	ret
1409
1410
1411/*
1412 * copystr(from, to, maxlen, int *lencopied)
1413 */
1414ENTRY(copystr)
1415	pushl	%esi
1416	pushl	%edi
1417
1418	movl	12(%esp),%esi			/* %esi = from */
1419	movl	16(%esp),%edi			/* %edi = to */
1420	movl	20(%esp),%edx			/* %edx = maxlen */
1421	incl	%edx
1422	cld
14231:
1424	decl	%edx
1425	jz	4f
1426	lodsb
1427	stosb
1428	orb	%al,%al
1429	jnz	1b
1430
1431	/* Success -- 0 byte reached */
1432	decl	%edx
1433	xorl	%eax,%eax
1434	jmp	6f
14354:
1436	/* edx is zero -- return ENAMETOOLONG */
1437	movl	$ENAMETOOLONG,%eax
1438
14396:
1440	/* set *lencopied and return %eax */
1441	movl	20(%esp),%ecx
1442	subl	%edx,%ecx
1443	movl	24(%esp),%edx
1444	testl	%edx,%edx
1445	jz	7f
1446	movl	%ecx,(%edx)
14477:
1448	popl	%edi
1449	popl	%esi
1450	ret
1451
1452ENTRY(bcmp)
1453	pushl	%edi
1454	pushl	%esi
1455	movl	12(%esp),%edi
1456	movl	16(%esp),%esi
1457	movl	20(%esp),%edx
1458	xorl	%eax,%eax
1459
1460	movl	%edx,%ecx
1461	shrl	$2,%ecx
1462	cld					/* compare forwards */
1463	repe
1464	cmpsl
1465	jne	1f
1466
1467	movl	%edx,%ecx
1468	andl	$3,%ecx
1469	repe
1470	cmpsb
1471	je	2f
14721:
1473	incl	%eax
14742:
1475	popl	%esi
1476	popl	%edi
1477	ret
1478
1479
1480/*
1481 * Handling of special 386 registers and descriptor tables etc
1482 */
1483/* void lgdt(struct region_descriptor *rdp); */
1484ENTRY(lgdt)
1485	/* reload the descriptor table */
1486	movl	4(%esp),%eax
1487	lgdt	(%eax)
1488
1489	/* flush the prefetch q */
1490	jmp	1f
1491	nop
14921:
1493	/* reload "stale" selectors */
1494	movl	$KDSEL,%eax
1495	movl	%ax,%ds
1496	movl	%ax,%es
1497	movl	%ax,%gs
1498	movl	%ax,%ss
1499#ifdef SMP
1500	movl	$KPSEL,%eax
1501#endif
1502	movl	%ax,%fs
1503
1504	/* reload code selector by turning return into intersegmental return */
1505	movl	(%esp),%eax
1506	pushl	%eax
1507	movl	$KCSEL,4(%esp)
1508	lret
1509
1510/*
1511 * void lidt(struct region_descriptor *rdp);
1512 */
1513ENTRY(lidt)
1514	movl	4(%esp),%eax
1515	lidt	(%eax)
1516	ret
1517
1518/*
1519 * void lldt(u_short sel)
1520 */
1521ENTRY(lldt)
1522	lldt	4(%esp)
1523	ret
1524
1525/*
1526 * void ltr(u_short sel)
1527 */
1528ENTRY(ltr)
1529	ltr	4(%esp)
1530	ret
1531
1532/* ssdtosd(*ssdp,*sdp) */
1533ENTRY(ssdtosd)
1534	pushl	%ebx
1535	movl	8(%esp),%ecx
1536	movl	8(%ecx),%ebx
1537	shll	$16,%ebx
1538	movl	(%ecx),%edx
1539	roll	$16,%edx
1540	movb	%dh,%bl
1541	movb	%dl,%bh
1542	rorl	$8,%ebx
1543	movl	4(%ecx),%eax
1544	movw	%ax,%dx
1545	andl	$0xf0000,%eax
1546	orl	%eax,%ebx
1547	movl	12(%esp),%ecx
1548	movl	%edx,(%ecx)
1549	movl	%ebx,4(%ecx)
1550	popl	%ebx
1551	ret
1552
1553/* load_cr0(cr0) */
1554ENTRY(load_cr0)
1555	movl	4(%esp),%eax
1556	movl	%eax,%cr0
1557	ret
1558
1559/* rcr0() */
1560ENTRY(rcr0)
1561	movl	%cr0,%eax
1562	ret
1563
1564/* rcr3() */
1565ENTRY(rcr3)
1566	movl	%cr3,%eax
1567	ret
1568
1569/* void load_cr3(caddr_t cr3) */
1570ENTRY(load_cr3)
1571#if defined(SWTCH_OPTIM_STATS)
1572	incl	_tlb_flush_count
1573#endif
1574	movl	4(%esp),%eax
1575	movl	%eax,%cr3
1576	ret
1577
1578/* rcr4() */
1579ENTRY(rcr4)
1580	movl	%cr4,%eax
1581	ret
1582
1583/* void load_cr4(caddr_t cr4) */
1584ENTRY(load_cr4)
1585	movl	4(%esp),%eax
1586	movl	%eax,%cr4
1587	ret
1588
1589/* int load_gs_param(caddr_t gs) */
1590ENTRY(load_gs_param)
1591	movl	_curpcb,%ecx
1592	movl	$load_gs_param_fault,PCB_ONFAULT(%ecx)
1593	movl	4(%esp),%eax
1594	movl	%eax,%gs
1595	xor	%eax,%eax
1596	movl	%eax,PCB_ONFAULT(%ecx)
1597	ret
1598load_gs_param_fault:
1599	movl	_curpcb,%ecx
1600	movl	$0,PCB_ONFAULT(%ecx)
1601	movl	$EFAULT,%eax
1602	ret
1603
1604/*****************************************************************************/
1605/* setjump, longjump                                                         */
1606/*****************************************************************************/
1607
1608ENTRY(setjmp)
1609	movl	4(%esp),%eax
1610	movl	%ebx,(%eax)			/* save ebx */
1611	movl	%esp,4(%eax)			/* save esp */
1612	movl	%ebp,8(%eax)			/* save ebp */
1613	movl	%esi,12(%eax)			/* save esi */
1614	movl	%edi,16(%eax)			/* save edi */
1615	movl	(%esp),%edx			/* get rta */
1616	movl	%edx,20(%eax)			/* save eip */
1617	xorl	%eax,%eax			/* return(0); */
1618	ret
1619
1620ENTRY(longjmp)
1621	movl	4(%esp),%eax
1622	movl	(%eax),%ebx			/* restore ebx */
1623	movl	4(%eax),%esp			/* restore esp */
1624	movl	8(%eax),%ebp			/* restore ebp */
1625	movl	12(%eax),%esi			/* restore esi */
1626	movl	16(%eax),%edi			/* restore edi */
1627	movl	20(%eax),%edx			/* get rta */
1628	movl	%edx,(%esp)			/* put in return frame */
1629	xorl	%eax,%eax			/* return(1); */
1630	incl	%eax
1631	ret
1632
1633/*
1634 * Support for BB-profiling (gcc -a).  The kernbb program will extract
1635 * the data from the kernel.
1636 */
1637
1638	.data
1639	ALIGN_DATA
1640	.globl bbhead
1641bbhead:
1642	.long 0
1643
1644	.text
1645NON_GPROF_ENTRY(__bb_init_func)
1646	movl	4(%esp),%eax
1647	movl	$1,(%eax)
1648	movl	bbhead,%edx
1649	movl	%edx,16(%eax)
1650	movl	%eax,bbhead
1651	.byte	0xc3				/* avoid macro for `ret' */
1652