support.s revision 58717
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD: head/sys/i386/i386/support.s 58717 2000-03-28 07:16:37Z dillon $
34 */
35
36#include "opt_smp.h"
37#include "npx.h"
38
39#include <machine/asmacros.h>
40#include <machine/cputypes.h>
41#include <machine/pmap.h>
42#include <machine/specialreg.h>
43
44#include "assym.s"
45
46#define IDXSHIFT	10
47
48	.data
49	.globl	_bcopy_vector
50_bcopy_vector:
51	.long	_generic_bcopy
52	.globl	_bzero
53_bzero:
54	.long	_generic_bzero
55	.globl	_copyin_vector
56_copyin_vector:
57	.long	_generic_copyin
58	.globl	_copyout_vector
59_copyout_vector:
60	.long	_generic_copyout
61	.globl	_ovbcopy_vector
62_ovbcopy_vector:
63	.long	_generic_bcopy
64#if defined(I586_CPU) && NNPX > 0
65kernel_fpu_lock:
66	.byte	0xfe
67	.space	3
68#endif
69
70	.text
71
72/*
73 * bcopy family
74 * void bzero(void *buf, u_int len)
75 */
76
77ENTRY(generic_bzero)
78	pushl	%edi
79	movl	8(%esp),%edi
80	movl	12(%esp),%ecx
81	xorl	%eax,%eax
82	shrl	$2,%ecx
83	cld
84	rep
85	stosl
86	movl	12(%esp),%ecx
87	andl	$3,%ecx
88	rep
89	stosb
90	popl	%edi
91	ret
92
93#if defined(I486_CPU)
94ENTRY(i486_bzero)
95	movl	4(%esp),%edx
96	movl	8(%esp),%ecx
97	xorl	%eax,%eax
98/*
99 * do 64 byte chunks first
100 *
101 * XXX this is probably over-unrolled at least for DX2's
102 */
1032:
104	cmpl	$64,%ecx
105	jb	3f
106	movl	%eax,(%edx)
107	movl	%eax,4(%edx)
108	movl	%eax,8(%edx)
109	movl	%eax,12(%edx)
110	movl	%eax,16(%edx)
111	movl	%eax,20(%edx)
112	movl	%eax,24(%edx)
113	movl	%eax,28(%edx)
114	movl	%eax,32(%edx)
115	movl	%eax,36(%edx)
116	movl	%eax,40(%edx)
117	movl	%eax,44(%edx)
118	movl	%eax,48(%edx)
119	movl	%eax,52(%edx)
120	movl	%eax,56(%edx)
121	movl	%eax,60(%edx)
122	addl	$64,%edx
123	subl	$64,%ecx
124	jnz	2b
125	ret
126
127/*
128 * do 16 byte chunks
129 */
130	SUPERALIGN_TEXT
1313:
132	cmpl	$16,%ecx
133	jb	4f
134	movl	%eax,(%edx)
135	movl	%eax,4(%edx)
136	movl	%eax,8(%edx)
137	movl	%eax,12(%edx)
138	addl	$16,%edx
139	subl	$16,%ecx
140	jnz	3b
141	ret
142
143/*
144 * do 4 byte chunks
145 */
146	SUPERALIGN_TEXT
1474:
148	cmpl	$4,%ecx
149	jb	5f
150	movl	%eax,(%edx)
151	addl	$4,%edx
152	subl	$4,%ecx
153	jnz	4b
154	ret
155
156/*
157 * do 1 byte chunks
158 * a jump table seems to be faster than a loop or more range reductions
159 *
160 * XXX need a const section for non-text
161 */
162	.data
163jtab:
164	.long	do0
165	.long	do1
166	.long	do2
167	.long	do3
168
169	.text
170	SUPERALIGN_TEXT
1715:
172	jmp	jtab(,%ecx,4)
173
174	SUPERALIGN_TEXT
175do3:
176	movw	%ax,(%edx)
177	movb	%al,2(%edx)
178	ret
179
180	SUPERALIGN_TEXT
181do2:
182	movw	%ax,(%edx)
183	ret
184
185	SUPERALIGN_TEXT
186do1:
187	movb	%al,(%edx)
188	ret
189
190	SUPERALIGN_TEXT
191do0:
192	ret
193#endif
194
195#if defined(I586_CPU) && NNPX > 0
196ENTRY(i586_bzero)
197	movl	4(%esp),%edx
198	movl	8(%esp),%ecx
199
200	/*
201	 * The FPU register method is twice as fast as the integer register
202	 * method unless the target is in the L1 cache and we pre-allocate a
203	 * cache line for it (then the integer register method is 4-5 times
204	 * faster).  However, we never pre-allocate cache lines, since that
205	 * would make the integer method 25% or more slower for the common
206	 * case when the target isn't in either the L1 cache or the L2 cache.
207	 * Thus we normally use the FPU register method unless the overhead
208	 * would be too large.
209	 */
210	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
211	jb	intreg_i586_bzero
212
213	/*
214	 * The FPU registers may belong to an application or to fastmove()
215	 * or to another invocation of bcopy() or ourself in a higher level
216	 * interrupt or trap handler.  Preserving the registers is
217	 * complicated since we avoid it if possible at all levels.  We
218	 * want to localize the complications even when that increases them.
219	 * Here the extra work involves preserving CR0_TS in TS.
220	 * `npxproc != NULL' is supposed to be the condition that all the
221	 * FPU resources belong to an application, but npxproc and CR0_TS
222	 * aren't set atomically enough for this condition to work in
223	 * interrupt handlers.
224	 *
225	 * Case 1: FPU registers belong to the application: we must preserve
226	 * the registers if we use them, so we only use the FPU register
227	 * method if the target size is large enough to amortize the extra
228	 * overhead for preserving them.  CR0_TS must be preserved although
229	 * it is very likely to end up as set.
230	 *
231	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
232	 * makes the registers look like they belong to an application so
233	 * that cpu_switch() and savectx() don't have to know about it, so
234	 * this case reduces to case 1.
235	 *
236	 * Case 3: FPU registers belong to the kernel: don't use the FPU
237	 * register method.  This case is unlikely, and supporting it would
238	 * be more complicated and might take too much stack.
239	 *
240	 * Case 4: FPU registers don't belong to anyone: the FPU registers
241	 * don't need to be preserved, so we always use the FPU register
242	 * method.  CR0_TS must be preserved although it is very likely to
243	 * always end up as clear.
244	 */
245	cmpl	$0,_npxproc
246	je	i586_bz1
247	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
248	jb	intreg_i586_bzero
249	sarb	$1,kernel_fpu_lock
250	jc	intreg_i586_bzero
251	smsw	%ax
252	clts
253	subl	$108,%esp
254	fnsave	0(%esp)
255	jmp	i586_bz2
256
257i586_bz1:
258	sarb	$1,kernel_fpu_lock
259	jc	intreg_i586_bzero
260	smsw	%ax
261	clts
262	fninit				/* XXX should avoid needing this */
263i586_bz2:
264	fldz
265
266	/*
267	 * Align to an 8 byte boundary (misalignment in the main loop would
268	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
269	 * already aligned) by always zeroing 8 bytes and using the part up
270	 * to the _next_ alignment position.
271	 */
272	fstl	0(%edx)
273	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
274	addl	$8,%edx
275	andl	$~7,%edx
276	subl	%edx,%ecx
277
278	/*
279	 * Similarly align `len' to a multiple of 8.
280	 */
281	fstl	-8(%edx,%ecx)
282	decl	%ecx
283	andl	$~7,%ecx
284
285	/*
286	 * This wouldn't be any faster if it were unrolled, since the loop
287	 * control instructions are much faster than the fstl and/or done
288	 * in parallel with it so their overhead is insignificant.
289	 */
290fpureg_i586_bzero_loop:
291	fstl	0(%edx)
292	addl	$8,%edx
293	subl	$8,%ecx
294	cmpl	$8,%ecx
295	jae	fpureg_i586_bzero_loop
296
297	cmpl	$0,_npxproc
298	je	i586_bz3
299	frstor	0(%esp)
300	addl	$108,%esp
301	lmsw	%ax
302	movb	$0xfe,kernel_fpu_lock
303	ret
304
305i586_bz3:
306	fstpl	%st(0)
307	lmsw	%ax
308	movb	$0xfe,kernel_fpu_lock
309	ret
310
311intreg_i586_bzero:
312	/*
313	 * `rep stos' seems to be the best method in practice for small
314	 * counts.  Fancy methods usually take too long to start up due
315	 * to cache and BTB misses.
316	 */
317	pushl	%edi
318	movl	%edx,%edi
319	xorl	%eax,%eax
320	shrl	$2,%ecx
321	cld
322	rep
323	stosl
324	movl	12(%esp),%ecx
325	andl	$3,%ecx
326	jne	1f
327	popl	%edi
328	ret
329
3301:
331	rep
332	stosb
333	popl	%edi
334	ret
335#endif /* I586_CPU && NNPX > 0 */
336
337ENTRY(i686_pagezero)
338	pushl	%edi
339	pushl	%ebx
340
341	movl	12(%esp), %edi
342	movl	$1024, %ecx
343	cld
344
345	ALIGN_TEXT
3461:
347	xorl	%eax, %eax
348	repe
349	scasl
350	jnz	2f
351
352	popl	%ebx
353	popl	%edi
354	ret
355
356	ALIGN_TEXT
357
3582:
359	incl	%ecx
360	subl	$4, %edi
361
362	movl	%ecx, %edx
363	cmpl	$16, %ecx
364
365	jge	3f
366
367	movl	%edi, %ebx
368	andl	$0x3f, %ebx
369	shrl	%ebx
370	shrl	%ebx
371	movl	$16, %ecx
372	subl	%ebx, %ecx
373
3743:
375	subl	%ecx, %edx
376	rep
377	stosl
378
379	movl	%edx, %ecx
380	testl	%edx, %edx
381	jnz	1b
382
383	popl	%ebx
384	popl	%edi
385	ret
386
387/* fillw(pat, base, cnt) */
388ENTRY(fillw)
389	pushl	%edi
390	movl	8(%esp),%eax
391	movl	12(%esp),%edi
392	movl	16(%esp),%ecx
393	cld
394	rep
395	stosw
396	popl	%edi
397	ret
398
399ENTRY(bcopyb)
400	pushl	%esi
401	pushl	%edi
402	movl	12(%esp),%esi
403	movl	16(%esp),%edi
404	movl	20(%esp),%ecx
405	movl	%edi,%eax
406	subl	%esi,%eax
407	cmpl	%ecx,%eax			/* overlapping && src < dst? */
408	jb	1f
409	cld					/* nope, copy forwards */
410	rep
411	movsb
412	popl	%edi
413	popl	%esi
414	ret
415
416	ALIGN_TEXT
4171:
418	addl	%ecx,%edi			/* copy backwards. */
419	addl	%ecx,%esi
420	decl	%edi
421	decl	%esi
422	std
423	rep
424	movsb
425	popl	%edi
426	popl	%esi
427	cld
428	ret
429
430ENTRY(bcopy)
431	MEXITCOUNT
432	jmp	*_bcopy_vector
433
434ENTRY(ovbcopy)
435	MEXITCOUNT
436	jmp	*_ovbcopy_vector
437
438/*
439 * generic_bcopy(src, dst, cnt)
440 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
441 */
442ENTRY(generic_bcopy)
443	pushl	%esi
444	pushl	%edi
445	movl	12(%esp),%esi
446	movl	16(%esp),%edi
447	movl	20(%esp),%ecx
448
449	movl	%edi,%eax
450	subl	%esi,%eax
451	cmpl	%ecx,%eax			/* overlapping && src < dst? */
452	jb	1f
453
454	shrl	$2,%ecx				/* copy by 32-bit words */
455	cld					/* nope, copy forwards */
456	rep
457	movsl
458	movl	20(%esp),%ecx
459	andl	$3,%ecx				/* any bytes left? */
460	rep
461	movsb
462	popl	%edi
463	popl	%esi
464	ret
465
466	ALIGN_TEXT
4671:
468	addl	%ecx,%edi			/* copy backwards */
469	addl	%ecx,%esi
470	decl	%edi
471	decl	%esi
472	andl	$3,%ecx				/* any fractional bytes? */
473	std
474	rep
475	movsb
476	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
477	shrl	$2,%ecx
478	subl	$3,%esi
479	subl	$3,%edi
480	rep
481	movsl
482	popl	%edi
483	popl	%esi
484	cld
485	ret
486
487#if defined(I586_CPU) && NNPX > 0
488ENTRY(i586_bcopy)
489	pushl	%esi
490	pushl	%edi
491	movl	12(%esp),%esi
492	movl	16(%esp),%edi
493	movl	20(%esp),%ecx
494
495	movl	%edi,%eax
496	subl	%esi,%eax
497	cmpl	%ecx,%eax			/* overlapping && src < dst? */
498	jb	1f
499
500	cmpl	$1024,%ecx
501	jb	small_i586_bcopy
502
503	sarb	$1,kernel_fpu_lock
504	jc	small_i586_bcopy
505	cmpl	$0,_npxproc
506	je	i586_bc1
507	smsw	%dx
508	clts
509	subl	$108,%esp
510	fnsave	0(%esp)
511	jmp	4f
512
513i586_bc1:
514	smsw	%dx
515	clts
516	fninit				/* XXX should avoid needing this */
517
518	ALIGN_TEXT
5194:
520	pushl	%ecx
521#define	DCACHE_SIZE	8192
522	cmpl	$(DCACHE_SIZE-512)/2,%ecx
523	jbe	2f
524	movl	$(DCACHE_SIZE-512)/2,%ecx
5252:
526	subl	%ecx,0(%esp)
527	cmpl	$256,%ecx
528	jb	5f			/* XXX should prefetch if %ecx >= 32 */
529	pushl	%esi
530	pushl	%ecx
531	ALIGN_TEXT
5323:
533	movl	0(%esi),%eax
534	movl	32(%esi),%eax
535	movl	64(%esi),%eax
536	movl	96(%esi),%eax
537	movl	128(%esi),%eax
538	movl	160(%esi),%eax
539	movl	192(%esi),%eax
540	movl	224(%esi),%eax
541	addl	$256,%esi
542	subl	$256,%ecx
543	cmpl	$256,%ecx
544	jae	3b
545	popl	%ecx
546	popl	%esi
5475:
548	ALIGN_TEXT
549large_i586_bcopy_loop:
550	fildq	0(%esi)
551	fildq	8(%esi)
552	fildq	16(%esi)
553	fildq	24(%esi)
554	fildq	32(%esi)
555	fildq	40(%esi)
556	fildq	48(%esi)
557	fildq	56(%esi)
558	fistpq	56(%edi)
559	fistpq	48(%edi)
560	fistpq	40(%edi)
561	fistpq	32(%edi)
562	fistpq	24(%edi)
563	fistpq	16(%edi)
564	fistpq	8(%edi)
565	fistpq	0(%edi)
566	addl	$64,%esi
567	addl	$64,%edi
568	subl	$64,%ecx
569	cmpl	$64,%ecx
570	jae	large_i586_bcopy_loop
571	popl	%eax
572	addl	%eax,%ecx
573	cmpl	$64,%ecx
574	jae	4b
575
576	cmpl	$0,_npxproc
577	je	i586_bc2
578	frstor	0(%esp)
579	addl	$108,%esp
580i586_bc2:
581	lmsw	%dx
582	movb	$0xfe,kernel_fpu_lock
583
584/*
585 * This is a duplicate of the main part of generic_bcopy.  See the comments
586 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
587 * would mess up high resolution profiling.
588 */
589	ALIGN_TEXT
590small_i586_bcopy:
591	shrl	$2,%ecx
592	cld
593	rep
594	movsl
595	movl	20(%esp),%ecx
596	andl	$3,%ecx
597	rep
598	movsb
599	popl	%edi
600	popl	%esi
601	ret
602
603	ALIGN_TEXT
6041:
605	addl	%ecx,%edi
606	addl	%ecx,%esi
607	decl	%edi
608	decl	%esi
609	andl	$3,%ecx
610	std
611	rep
612	movsb
613	movl	20(%esp),%ecx
614	shrl	$2,%ecx
615	subl	$3,%esi
616	subl	$3,%edi
617	rep
618	movsl
619	popl	%edi
620	popl	%esi
621	cld
622	ret
623#endif /* I586_CPU && NNPX > 0 */
624
625/*
626 * Note: memcpy does not support overlapping copies
627 */
628ENTRY(memcpy)
629	pushl	%edi
630	pushl	%esi
631	movl	12(%esp),%edi
632	movl	16(%esp),%esi
633	movl	20(%esp),%ecx
634	movl	%edi,%eax
635	shrl	$2,%ecx				/* copy by 32-bit words */
636	cld					/* nope, copy forwards */
637	rep
638	movsl
639	movl	20(%esp),%ecx
640	andl	$3,%ecx				/* any bytes left? */
641	rep
642	movsb
643	popl	%esi
644	popl	%edi
645	ret
646
647
648/*****************************************************************************/
649/* copyout and fubyte family                                                 */
650/*****************************************************************************/
651/*
652 * Access user memory from inside the kernel. These routines and possibly
653 * the math- and DOS emulators should be the only places that do this.
654 *
655 * We have to access the memory with user's permissions, so use a segment
656 * selector with RPL 3. For writes to user space we have to additionally
657 * check the PTE for write permission, because the 386 does not check
658 * write permissions when we are executing with EPL 0. The 486 does check
659 * this if the WP bit is set in CR0, so we can use a simpler version here.
660 *
661 * These routines set curpcb->onfault for the time they execute. When a
662 * protection violation occurs inside the functions, the trap handler
663 * returns to *curpcb->onfault instead of the function.
664 */
665
666/* copyout(from_kernel, to_user, len) */
667ENTRY(copyout)
668	MEXITCOUNT
669	jmp	*_copyout_vector
670
671ENTRY(generic_copyout)
672	movl	_curpcb,%eax
673	movl	$copyout_fault,PCB_ONFAULT(%eax)
674	pushl	%esi
675	pushl	%edi
676	pushl	%ebx
677	movl	16(%esp),%esi
678	movl	20(%esp),%edi
679	movl	24(%esp),%ebx
680	testl	%ebx,%ebx			/* anything to do? */
681	jz	done_copyout
682
683	/*
684	 * Check explicitly for non-user addresses.  If 486 write protection
685	 * is being used, this check is essential because we are in kernel
686	 * mode so the h/w does not provide any protection against writing
687	 * kernel addresses.
688	 */
689
690	/*
691	 * First, prevent address wrapping.
692	 */
693	movl	%edi,%eax
694	addl	%ebx,%eax
695	jc	copyout_fault
696/*
697 * XXX STOP USING VM_MAXUSER_ADDRESS.
698 * It is an end address, not a max, so every time it is used correctly it
699 * looks like there is an off by one error, and of course it caused an off
700 * by one error in several places.
701 */
702	cmpl	$VM_MAXUSER_ADDRESS,%eax
703	ja	copyout_fault
704
705#if defined(I386_CPU)
706
707#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
708	cmpl	$CPUCLASS_386,_cpu_class
709	jne	3f
710#endif
711/*
712 * We have to check each PTE for user write permission.
713 * The checking may cause a page fault, so it is important to set
714 * up everything for return via copyout_fault before here.
715 */
716	/* compute number of pages */
717	movl	%edi,%ecx
718	andl	$PAGE_MASK,%ecx
719	addl	%ebx,%ecx
720	decl	%ecx
721	shrl	$IDXSHIFT+2,%ecx
722	incl	%ecx
723
724	/* compute PTE offset for start address */
725	movl	%edi,%edx
726	shrl	$IDXSHIFT,%edx
727	andb	$0xfc,%dl
728
7291:
730	/* check PTE for each page */
731	leal	_PTmap(%edx),%eax
732	shrl	$IDXSHIFT,%eax
733	andb	$0xfc,%al
734	testb	$PG_V,_PTmap(%eax)		/* PTE page must be valid */
735	je	4f
736	movb	_PTmap(%edx),%al
737	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
738	cmpb	$PG_V|PG_RW|PG_U,%al
739	je	2f
740
7414:
742	/* simulate a trap */
743	pushl	%edx
744	pushl	%ecx
745	shll	$IDXSHIFT,%edx
746	pushl	%edx
747	call	_trapwrite			/* trapwrite(addr) */
748	popl	%edx
749	popl	%ecx
750	popl	%edx
751
752	testl	%eax,%eax			/* if not ok, return EFAULT */
753	jnz	copyout_fault
754
7552:
756	addl	$4,%edx
757	decl	%ecx
758	jnz	1b				/* check next page */
759#endif /* I386_CPU */
760
761	/* bcopy(%esi, %edi, %ebx) */
7623:
763	movl	%ebx,%ecx
764
765#if defined(I586_CPU) && NNPX > 0
766	ALIGN_TEXT
767slow_copyout:
768#endif
769	shrl	$2,%ecx
770	cld
771	rep
772	movsl
773	movb	%bl,%cl
774	andb	$3,%cl
775	rep
776	movsb
777
778done_copyout:
779	popl	%ebx
780	popl	%edi
781	popl	%esi
782	xorl	%eax,%eax
783	movl	_curpcb,%edx
784	movl	%eax,PCB_ONFAULT(%edx)
785	ret
786
787	ALIGN_TEXT
788copyout_fault:
789	popl	%ebx
790	popl	%edi
791	popl	%esi
792	movl	_curpcb,%edx
793	movl	$0,PCB_ONFAULT(%edx)
794	movl	$EFAULT,%eax
795	ret
796
797#if defined(I586_CPU) && NNPX > 0
798ENTRY(i586_copyout)
799	/*
800	 * Duplicated from generic_copyout.  Could be done a bit better.
801	 */
802	movl	_curpcb,%eax
803	movl	$copyout_fault,PCB_ONFAULT(%eax)
804	pushl	%esi
805	pushl	%edi
806	pushl	%ebx
807	movl	16(%esp),%esi
808	movl	20(%esp),%edi
809	movl	24(%esp),%ebx
810	testl	%ebx,%ebx			/* anything to do? */
811	jz	done_copyout
812
813	/*
814	 * Check explicitly for non-user addresses.  If 486 write protection
815	 * is being used, this check is essential because we are in kernel
816	 * mode so the h/w does not provide any protection against writing
817	 * kernel addresses.
818	 */
819
820	/*
821	 * First, prevent address wrapping.
822	 */
823	movl	%edi,%eax
824	addl	%ebx,%eax
825	jc	copyout_fault
826/*
827 * XXX STOP USING VM_MAXUSER_ADDRESS.
828 * It is an end address, not a max, so every time it is used correctly it
829 * looks like there is an off by one error, and of course it caused an off
830 * by one error in several places.
831 */
832	cmpl	$VM_MAXUSER_ADDRESS,%eax
833	ja	copyout_fault
834
835	/* bcopy(%esi, %edi, %ebx) */
8363:
837	movl	%ebx,%ecx
838	/*
839	 * End of duplicated code.
840	 */
841
842	cmpl	$1024,%ecx
843	jb	slow_copyout
844
845	pushl	%ecx
846	call	_fastmove
847	addl	$4,%esp
848	jmp	done_copyout
849#endif /* I586_CPU && NNPX > 0 */
850
851/*
852 * copyin(from_user, to_kernel, len)
853 *
854 * MPSAFE
855 */
856ENTRY(copyin)
857	MEXITCOUNT
858	jmp	*_copyin_vector
859
860ENTRY(generic_copyin)
861	movl	_curpcb,%eax
862	movl	$copyin_fault,PCB_ONFAULT(%eax)
863	pushl	%esi
864	pushl	%edi
865	movl	12(%esp),%esi			/* caddr_t from */
866	movl	16(%esp),%edi			/* caddr_t to */
867	movl	20(%esp),%ecx			/* size_t  len */
868
869	/*
870	 * make sure address is valid
871	 */
872	movl	%esi,%edx
873	addl	%ecx,%edx
874	jc	copyin_fault
875	cmpl	$VM_MAXUSER_ADDRESS,%edx
876	ja	copyin_fault
877
878#if defined(I586_CPU) && NNPX > 0
879	ALIGN_TEXT
880slow_copyin:
881#endif
882	movb	%cl,%al
883	shrl	$2,%ecx				/* copy longword-wise */
884	cld
885	rep
886	movsl
887	movb	%al,%cl
888	andb	$3,%cl				/* copy remaining bytes */
889	rep
890	movsb
891
892#if defined(I586_CPU) && NNPX > 0
893	ALIGN_TEXT
894done_copyin:
895#endif
896	popl	%edi
897	popl	%esi
898	xorl	%eax,%eax
899	movl	_curpcb,%edx
900	movl	%eax,PCB_ONFAULT(%edx)
901	ret
902
903	ALIGN_TEXT
904copyin_fault:
905	popl	%edi
906	popl	%esi
907	movl	_curpcb,%edx
908	movl	$0,PCB_ONFAULT(%edx)
909	movl	$EFAULT,%eax
910	ret
911
912#if defined(I586_CPU) && NNPX > 0
913ENTRY(i586_copyin)
914	/*
915	 * Duplicated from generic_copyin.  Could be done a bit better.
916	 */
917	movl	_curpcb,%eax
918	movl	$copyin_fault,PCB_ONFAULT(%eax)
919	pushl	%esi
920	pushl	%edi
921	movl	12(%esp),%esi			/* caddr_t from */
922	movl	16(%esp),%edi			/* caddr_t to */
923	movl	20(%esp),%ecx			/* size_t  len */
924
925	/*
926	 * make sure address is valid
927	 */
928	movl	%esi,%edx
929	addl	%ecx,%edx
930	jc	copyin_fault
931	cmpl	$VM_MAXUSER_ADDRESS,%edx
932	ja	copyin_fault
933	/*
934	 * End of duplicated code.
935	 */
936
937	cmpl	$1024,%ecx
938	jb	slow_copyin
939
940	pushl	%ebx			/* XXX prepare for fastmove_fault */
941	pushl	%ecx
942	call	_fastmove
943	addl	$8,%esp
944	jmp	done_copyin
945#endif /* I586_CPU && NNPX > 0 */
946
947#if defined(I586_CPU) && NNPX > 0
948/* fastmove(src, dst, len)
949	src in %esi
950	dst in %edi
951	len in %ecx		XXX changed to on stack for profiling
952	uses %eax and %edx for tmp. storage
953 */
954/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
955ENTRY(fastmove)
956	pushl	%ebp
957	movl	%esp,%ebp
958	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
959
960	movl	8(%ebp),%ecx
961	cmpl	$63,%ecx
962	jbe	fastmove_tail
963
964	testl	$7,%esi	/* check if src addr is multiple of 8 */
965	jnz	fastmove_tail
966
967	testl	$7,%edi	/* check if dst addr is multiple of 8 */
968	jnz	fastmove_tail
969
970/* if (npxproc != NULL) { */
971	cmpl	$0,_npxproc
972	je	6f
973/*    fnsave(&curpcb->pcb_savefpu); */
974	movl	_curpcb,%eax
975	fnsave	PCB_SAVEFPU(%eax)
976/*   npxproc = NULL; */
977	movl	$0,_npxproc
978/* } */
9796:
980/* now we own the FPU. */
981
982/*
983 * The process' FP state is saved in the pcb, but if we get
984 * switched, the cpu_switch() will store our FP state in the
985 * pcb.  It should be possible to avoid all the copying for
986 * this, e.g., by setting a flag to tell cpu_switch() to
987 * save the state somewhere else.
988 */
989/* tmp = curpcb->pcb_savefpu; */
990	movl	%ecx,-12(%ebp)
991	movl	%esi,-8(%ebp)
992	movl	%edi,-4(%ebp)
993	movl	%esp,%edi
994	movl	_curpcb,%esi
995	addl	$PCB_SAVEFPU,%esi
996	cld
997	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
998	rep
999	movsl
1000	movl	-12(%ebp),%ecx
1001	movl	-8(%ebp),%esi
1002	movl	-4(%ebp),%edi
1003/* stop_emulating(); */
1004	clts
1005/* npxproc = curproc; */
1006	movl	_curproc,%eax
1007	movl	%eax,_npxproc
1008	movl	_curpcb,%eax
1009	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10104:
1011	movl	%ecx,-12(%ebp)
1012	cmpl	$1792,%ecx
1013	jbe	2f
1014	movl	$1792,%ecx
10152:
1016	subl	%ecx,-12(%ebp)
1017	cmpl	$256,%ecx
1018	jb	5f
1019	movl	%ecx,-8(%ebp)
1020	movl	%esi,-4(%ebp)
1021	ALIGN_TEXT
10223:
1023	movl	0(%esi),%eax
1024	movl	32(%esi),%eax
1025	movl	64(%esi),%eax
1026	movl	96(%esi),%eax
1027	movl	128(%esi),%eax
1028	movl	160(%esi),%eax
1029	movl	192(%esi),%eax
1030	movl	224(%esi),%eax
1031	addl	$256,%esi
1032	subl	$256,%ecx
1033	cmpl	$256,%ecx
1034	jae	3b
1035	movl	-8(%ebp),%ecx
1036	movl	-4(%ebp),%esi
10375:
1038	ALIGN_TEXT
1039fastmove_loop:
1040	fildq	0(%esi)
1041	fildq	8(%esi)
1042	fildq	16(%esi)
1043	fildq	24(%esi)
1044	fildq	32(%esi)
1045	fildq	40(%esi)
1046	fildq	48(%esi)
1047	fildq	56(%esi)
1048	fistpq	56(%edi)
1049	fistpq	48(%edi)
1050	fistpq	40(%edi)
1051	fistpq	32(%edi)
1052	fistpq	24(%edi)
1053	fistpq	16(%edi)
1054	fistpq	8(%edi)
1055	fistpq	0(%edi)
1056	addl	$-64,%ecx
1057	addl	$64,%esi
1058	addl	$64,%edi
1059	cmpl	$63,%ecx
1060	ja	fastmove_loop
1061	movl	-12(%ebp),%eax
1062	addl	%eax,%ecx
1063	cmpl	$64,%ecx
1064	jae	4b
1065
1066/* curpcb->pcb_savefpu = tmp; */
1067	movl	%ecx,-12(%ebp)
1068	movl	%esi,-8(%ebp)
1069	movl	%edi,-4(%ebp)
1070	movl	_curpcb,%edi
1071	addl	$PCB_SAVEFPU,%edi
1072	movl	%esp,%esi
1073	cld
1074	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1075	rep
1076	movsl
1077	movl	-12(%ebp),%ecx
1078	movl	-8(%ebp),%esi
1079	movl	-4(%ebp),%edi
1080
1081/* start_emulating(); */
1082	smsw	%ax
1083	orb	$CR0_TS,%al
1084	lmsw	%ax
1085/* npxproc = NULL; */
1086	movl	$0,_npxproc
1087
1088	ALIGN_TEXT
1089fastmove_tail:
1090	movl	_curpcb,%eax
1091	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1092
1093	movb	%cl,%al
1094	shrl	$2,%ecx				/* copy longword-wise */
1095	cld
1096	rep
1097	movsl
1098	movb	%al,%cl
1099	andb	$3,%cl				/* copy remaining bytes */
1100	rep
1101	movsb
1102
1103	movl	%ebp,%esp
1104	popl	%ebp
1105	ret
1106
1107	ALIGN_TEXT
1108fastmove_fault:
1109	movl	_curpcb,%edi
1110	addl	$PCB_SAVEFPU,%edi
1111	movl	%esp,%esi
1112	cld
1113	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1114	rep
1115	movsl
1116
1117	smsw	%ax
1118	orb	$CR0_TS,%al
1119	lmsw	%ax
1120	movl	$0,_npxproc
1121
1122fastmove_tail_fault:
1123	movl	%ebp,%esp
1124	popl	%ebp
1125	addl	$8,%esp
1126	popl	%ebx
1127	popl	%edi
1128	popl	%esi
1129	movl	_curpcb,%edx
1130	movl	$0,PCB_ONFAULT(%edx)
1131	movl	$EFAULT,%eax
1132	ret
1133#endif /* I586_CPU && NNPX > 0 */
1134
1135/*
1136 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory
1137 *
1138 * MP SAFE
1139 */
1140ENTRY(fuword)
1141	movl	_curpcb,%ecx
1142	movl	$fusufault,PCB_ONFAULT(%ecx)
1143	movl	4(%esp),%edx			/* from */
1144
1145	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1146	ja	fusufault
1147
1148	movl	(%edx),%eax
1149	movl	$0,PCB_ONFAULT(%ecx)
1150	ret
1151
1152/*
1153 * These two routines are called from the profiling code, potentially
1154 * at interrupt time. If they fail, that's okay, good things will
1155 * happen later. Fail all the time for now - until the trap code is
1156 * able to deal with this.
1157 */
1158ALTENTRY(suswintr)
1159ENTRY(fuswintr)
1160	movl	$-1,%eax
1161	ret
1162
1163/*
1164 * MP SAFE
1165 */
1166ENTRY(fusword)
1167	movl	_curpcb,%ecx
1168	movl	$fusufault,PCB_ONFAULT(%ecx)
1169	movl	4(%esp),%edx
1170
1171	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1172	ja	fusufault
1173
1174	movzwl	(%edx),%eax
1175	movl	$0,PCB_ONFAULT(%ecx)
1176	ret
1177
1178/*
1179 * MP SAFE
1180 */
1181ENTRY(fubyte)
1182	movl	_curpcb,%ecx
1183	movl	$fusufault,PCB_ONFAULT(%ecx)
1184	movl	4(%esp),%edx
1185
1186	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1187	ja	fusufault
1188
1189	movzbl	(%edx),%eax
1190	movl	$0,PCB_ONFAULT(%ecx)
1191	ret
1192
1193	ALIGN_TEXT
1194fusufault:
1195	movl	_curpcb,%ecx
1196	xorl	%eax,%eax
1197	movl	%eax,PCB_ONFAULT(%ecx)
1198	decl	%eax
1199	ret
1200
1201/*
1202 * su{byte,sword,word}: write a byte (word, longword) to user memory
1203 */
1204ENTRY(suword)
1205	movl	_curpcb,%ecx
1206	movl	$fusufault,PCB_ONFAULT(%ecx)
1207	movl	4(%esp),%edx
1208
1209#if defined(I386_CPU)
1210
1211#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1212	cmpl	$CPUCLASS_386,_cpu_class
1213	jne	2f				/* we only have to set the right segment selector */
1214#endif /* I486_CPU || I586_CPU || I686_CPU */
1215
1216	/* XXX - page boundary crossing is still not handled */
1217	movl	%edx,%eax
1218	shrl	$IDXSHIFT,%edx
1219	andb	$0xfc,%dl
1220
1221	leal	_PTmap(%edx),%ecx
1222	shrl	$IDXSHIFT,%ecx
1223	andb	$0xfc,%cl
1224	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1225	je	4f
1226	movb	_PTmap(%edx),%dl
1227	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1228	cmpb	$PG_V|PG_RW|PG_U,%dl
1229	je	1f
1230
12314:
1232	/* simulate a trap */
1233	pushl	%eax
1234	call	_trapwrite
1235	popl	%edx				/* remove junk parameter from stack */
1236	testl	%eax,%eax
1237	jnz	fusufault
12381:
1239	movl	4(%esp),%edx
1240#endif
1241
12422:
1243	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1244	ja	fusufault
1245
1246	movl	8(%esp),%eax
1247	movl	%eax,(%edx)
1248	xorl	%eax,%eax
1249	movl	_curpcb,%ecx
1250	movl	%eax,PCB_ONFAULT(%ecx)
1251	ret
1252
1253ENTRY(susword)
1254	movl	_curpcb,%ecx
1255	movl	$fusufault,PCB_ONFAULT(%ecx)
1256	movl	4(%esp),%edx
1257
1258#if defined(I386_CPU)
1259
1260#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1261	cmpl	$CPUCLASS_386,_cpu_class
1262	jne	2f
1263#endif /* I486_CPU || I586_CPU || I686_CPU */
1264
1265	/* XXX - page boundary crossing is still not handled */
1266	movl	%edx,%eax
1267	shrl	$IDXSHIFT,%edx
1268	andb	$0xfc,%dl
1269
1270	leal	_PTmap(%edx),%ecx
1271	shrl	$IDXSHIFT,%ecx
1272	andb	$0xfc,%cl
1273	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1274	je	4f
1275	movb	_PTmap(%edx),%dl
1276	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1277	cmpb	$PG_V|PG_RW|PG_U,%dl
1278	je	1f
1279
12804:
1281	/* simulate a trap */
1282	pushl	%eax
1283	call	_trapwrite
1284	popl	%edx				/* remove junk parameter from stack */
1285	testl	%eax,%eax
1286	jnz	fusufault
12871:
1288	movl	4(%esp),%edx
1289#endif
1290
12912:
1292	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1293	ja	fusufault
1294
1295	movw	8(%esp),%ax
1296	movw	%ax,(%edx)
1297	xorl	%eax,%eax
1298	movl	_curpcb,%ecx			/* restore trashed register */
1299	movl	%eax,PCB_ONFAULT(%ecx)
1300	ret
1301
1302ALTENTRY(suibyte)
1303ENTRY(subyte)
1304	movl	_curpcb,%ecx
1305	movl	$fusufault,PCB_ONFAULT(%ecx)
1306	movl	4(%esp),%edx
1307
1308#if defined(I386_CPU)
1309
1310#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1311	cmpl	$CPUCLASS_386,_cpu_class
1312	jne	2f
1313#endif /* I486_CPU || I586_CPU || I686_CPU */
1314
1315	movl	%edx,%eax
1316	shrl	$IDXSHIFT,%edx
1317	andb	$0xfc,%dl
1318
1319	leal	_PTmap(%edx),%ecx
1320	shrl	$IDXSHIFT,%ecx
1321	andb	$0xfc,%cl
1322	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1323	je	4f
1324	movb	_PTmap(%edx),%dl
1325	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1326	cmpb	$PG_V|PG_RW|PG_U,%dl
1327	je	1f
1328
13294:
1330	/* simulate a trap */
1331	pushl	%eax
1332	call	_trapwrite
1333	popl	%edx				/* remove junk parameter from stack */
1334	testl	%eax,%eax
1335	jnz	fusufault
13361:
1337	movl	4(%esp),%edx
1338#endif
1339
13402:
1341	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1342	ja	fusufault
1343
1344	movb	8(%esp),%al
1345	movb	%al,(%edx)
1346	xorl	%eax,%eax
1347	movl	_curpcb,%ecx			/* restore trashed register */
1348	movl	%eax,PCB_ONFAULT(%ecx)
1349	ret
1350
1351/*
1352 * copyinstr(from, to, maxlen, int *lencopied)
1353 *	copy a string from from to to, stop when a 0 character is reached.
1354 *	return ENAMETOOLONG if string is longer than maxlen, and
1355 *	EFAULT on protection violations. If lencopied is non-zero,
1356 *	return the actual length in *lencopied.
1357 */
1358ENTRY(copyinstr)
1359	pushl	%esi
1360	pushl	%edi
1361	movl	_curpcb,%ecx
1362	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1363
1364	movl	12(%esp),%esi			/* %esi = from */
1365	movl	16(%esp),%edi			/* %edi = to */
1366	movl	20(%esp),%edx			/* %edx = maxlen */
1367
1368	movl	$VM_MAXUSER_ADDRESS,%eax
1369
1370	/* make sure 'from' is within bounds */
1371	subl	%esi,%eax
1372	jbe	cpystrflt
1373
1374	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1375	cmpl	%edx,%eax
1376	jae	1f
1377	movl	%eax,%edx
1378	movl	%eax,20(%esp)
13791:
1380	incl	%edx
1381	cld
1382
13832:
1384	decl	%edx
1385	jz	3f
1386
1387	lodsb
1388	stosb
1389	orb	%al,%al
1390	jnz	2b
1391
1392	/* Success -- 0 byte reached */
1393	decl	%edx
1394	xorl	%eax,%eax
1395	jmp	cpystrflt_x
13963:
1397	/* edx is zero - return ENAMETOOLONG or EFAULT */
1398	cmpl	$VM_MAXUSER_ADDRESS,%esi
1399	jae	cpystrflt
14004:
1401	movl	$ENAMETOOLONG,%eax
1402	jmp	cpystrflt_x
1403
1404cpystrflt:
1405	movl	$EFAULT,%eax
1406
1407cpystrflt_x:
1408	/* set *lencopied and return %eax */
1409	movl	_curpcb,%ecx
1410	movl	$0,PCB_ONFAULT(%ecx)
1411	movl	20(%esp),%ecx
1412	subl	%edx,%ecx
1413	movl	24(%esp),%edx
1414	testl	%edx,%edx
1415	jz	1f
1416	movl	%ecx,(%edx)
14171:
1418	popl	%edi
1419	popl	%esi
1420	ret
1421
1422
1423/*
1424 * copystr(from, to, maxlen, int *lencopied)
1425 */
1426ENTRY(copystr)
1427	pushl	%esi
1428	pushl	%edi
1429
1430	movl	12(%esp),%esi			/* %esi = from */
1431	movl	16(%esp),%edi			/* %edi = to */
1432	movl	20(%esp),%edx			/* %edx = maxlen */
1433	incl	%edx
1434	cld
14351:
1436	decl	%edx
1437	jz	4f
1438	lodsb
1439	stosb
1440	orb	%al,%al
1441	jnz	1b
1442
1443	/* Success -- 0 byte reached */
1444	decl	%edx
1445	xorl	%eax,%eax
1446	jmp	6f
14474:
1448	/* edx is zero -- return ENAMETOOLONG */
1449	movl	$ENAMETOOLONG,%eax
1450
14516:
1452	/* set *lencopied and return %eax */
1453	movl	20(%esp),%ecx
1454	subl	%edx,%ecx
1455	movl	24(%esp),%edx
1456	testl	%edx,%edx
1457	jz	7f
1458	movl	%ecx,(%edx)
14597:
1460	popl	%edi
1461	popl	%esi
1462	ret
1463
1464ENTRY(bcmp)
1465	pushl	%edi
1466	pushl	%esi
1467	movl	12(%esp),%edi
1468	movl	16(%esp),%esi
1469	movl	20(%esp),%edx
1470	xorl	%eax,%eax
1471
1472	movl	%edx,%ecx
1473	shrl	$2,%ecx
1474	cld					/* compare forwards */
1475	repe
1476	cmpsl
1477	jne	1f
1478
1479	movl	%edx,%ecx
1480	andl	$3,%ecx
1481	repe
1482	cmpsb
1483	je	2f
14841:
1485	incl	%eax
14862:
1487	popl	%esi
1488	popl	%edi
1489	ret
1490
1491
1492/*
1493 * Handling of special 386 registers and descriptor tables etc
1494 */
1495/* void lgdt(struct region_descriptor *rdp); */
1496ENTRY(lgdt)
1497	/* reload the descriptor table */
1498	movl	4(%esp),%eax
1499	lgdt	(%eax)
1500
1501	/* flush the prefetch q */
1502	jmp	1f
1503	nop
15041:
1505	/* reload "stale" selectors */
1506	movl	$KDSEL,%eax
1507	movl	%ax,%ds
1508	movl	%ax,%es
1509	movl	%ax,%gs
1510	movl	%ax,%ss
1511#ifdef SMP
1512	movl	$KPSEL,%eax
1513#endif
1514	movl	%ax,%fs
1515
1516	/* reload code selector by turning return into intersegmental return */
1517	movl	(%esp),%eax
1518	pushl	%eax
1519	movl	$KCSEL,4(%esp)
1520	lret
1521
1522/*
1523 * void lidt(struct region_descriptor *rdp);
1524 */
1525ENTRY(lidt)
1526	movl	4(%esp),%eax
1527	lidt	(%eax)
1528	ret
1529
1530/*
1531 * void lldt(u_short sel)
1532 */
1533ENTRY(lldt)
1534	lldt	4(%esp)
1535	ret
1536
1537/*
1538 * void ltr(u_short sel)
1539 */
1540ENTRY(ltr)
1541	ltr	4(%esp)
1542	ret
1543
1544/* ssdtosd(*ssdp,*sdp) */
1545ENTRY(ssdtosd)
1546	pushl	%ebx
1547	movl	8(%esp),%ecx
1548	movl	8(%ecx),%ebx
1549	shll	$16,%ebx
1550	movl	(%ecx),%edx
1551	roll	$16,%edx
1552	movb	%dh,%bl
1553	movb	%dl,%bh
1554	rorl	$8,%ebx
1555	movl	4(%ecx),%eax
1556	movw	%ax,%dx
1557	andl	$0xf0000,%eax
1558	orl	%eax,%ebx
1559	movl	12(%esp),%ecx
1560	movl	%edx,(%ecx)
1561	movl	%ebx,4(%ecx)
1562	popl	%ebx
1563	ret
1564
1565/* load_cr0(cr0) */
1566ENTRY(load_cr0)
1567	movl	4(%esp),%eax
1568	movl	%eax,%cr0
1569	ret
1570
1571/* rcr0() */
1572ENTRY(rcr0)
1573	movl	%cr0,%eax
1574	ret
1575
1576/* rcr3() */
1577ENTRY(rcr3)
1578	movl	%cr3,%eax
1579	ret
1580
1581/* void load_cr3(caddr_t cr3) */
1582ENTRY(load_cr3)
1583#if defined(SWTCH_OPTIM_STATS)
1584	incl	_tlb_flush_count
1585#endif
1586	movl	4(%esp),%eax
1587	movl	%eax,%cr3
1588	ret
1589
1590/* rcr4() */
1591ENTRY(rcr4)
1592	movl	%cr4,%eax
1593	ret
1594
1595/* void load_cr4(caddr_t cr4) */
1596ENTRY(load_cr4)
1597	movl	4(%esp),%eax
1598	movl	%eax,%cr4
1599	ret
1600
1601/* void load_dr6(u_int dr6) */
1602ENTRY(load_dr6)
1603	movl    4(%esp),%eax
1604	movl    %eax,%dr6
1605	ret
1606
1607/* void reset_dbregs() */
1608ENTRY(reset_dbregs)
1609	movl    $0,%eax
1610	movl    %eax,%dr7     /* disable all breapoints first */
1611	movl    %eax,%dr0
1612	movl    %eax,%dr1
1613	movl    %eax,%dr2
1614	movl    %eax,%dr3
1615	movl    %eax,%dr6
1616	ret
1617
1618/*****************************************************************************/
1619/* setjump, longjump                                                         */
1620/*****************************************************************************/
1621
1622ENTRY(setjmp)
1623	movl	4(%esp),%eax
1624	movl	%ebx,(%eax)			/* save ebx */
1625	movl	%esp,4(%eax)			/* save esp */
1626	movl	%ebp,8(%eax)			/* save ebp */
1627	movl	%esi,12(%eax)			/* save esi */
1628	movl	%edi,16(%eax)			/* save edi */
1629	movl	(%esp),%edx			/* get rta */
1630	movl	%edx,20(%eax)			/* save eip */
1631	xorl	%eax,%eax			/* return(0); */
1632	ret
1633
1634ENTRY(longjmp)
1635	movl	4(%esp),%eax
1636	movl	(%eax),%ebx			/* restore ebx */
1637	movl	4(%eax),%esp			/* restore esp */
1638	movl	8(%eax),%ebp			/* restore ebp */
1639	movl	12(%eax),%esi			/* restore esi */
1640	movl	16(%eax),%edi			/* restore edi */
1641	movl	20(%eax),%edx			/* get rta */
1642	movl	%edx,(%esp)			/* put in return frame */
1643	xorl	%eax,%eax			/* return(1); */
1644	incl	%eax
1645	ret
1646
1647/*
1648 * Support for BB-profiling (gcc -a).  The kernbb program will extract
1649 * the data from the kernel.
1650 */
1651
1652	.data
1653	ALIGN_DATA
1654	.globl bbhead
1655bbhead:
1656	.long 0
1657
1658	.text
1659NON_GPROF_ENTRY(__bb_init_func)
1660	movl	4(%esp),%eax
1661	movl	$1,(%eax)
1662	movl	bbhead,%edx
1663	movl	%edx,16(%eax)
1664	movl	%eax,bbhead
1665	.byte	0xc3				/* avoid macro for `ret' */
1666