support.s revision 60303
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD: head/sys/i386/i386/support.s 60303 2000-05-10 01:24:23Z obrien $
34 */
35
36#include "opt_smp.h"
37#include "npx.h"
38
39#include <machine/asmacros.h>
40#include <machine/cputypes.h>
41#include <machine/pmap.h>
42#include <machine/specialreg.h>
43
44#include "assym.s"
45
46#define IDXSHIFT	10
47
48	.data
49	.globl	_bcopy_vector
50_bcopy_vector:
51	.long	_generic_bcopy
52	.globl	_bzero
53_bzero:
54	.long	_generic_bzero
55	.globl	_copyin_vector
56_copyin_vector:
57	.long	_generic_copyin
58	.globl	_copyout_vector
59_copyout_vector:
60	.long	_generic_copyout
61	.globl	_ovbcopy_vector
62_ovbcopy_vector:
63	.long	_generic_bcopy
64#if defined(I586_CPU) && NNPX > 0
65kernel_fpu_lock:
66	.byte	0xfe
67	.space	3
68#endif
69
70	.text
71
72/*
73 * bcopy family
74 * void bzero(void *buf, u_int len)
75 */
76
77ENTRY(generic_bzero)
78	pushl	%edi
79	movl	8(%esp),%edi
80	movl	12(%esp),%ecx
81	xorl	%eax,%eax
82	shrl	$2,%ecx
83	cld
84	rep
85	stosl
86	movl	12(%esp),%ecx
87	andl	$3,%ecx
88	rep
89	stosb
90	popl	%edi
91	ret
92
93#if defined(I486_CPU)
94ENTRY(i486_bzero)
95	movl	4(%esp),%edx
96	movl	8(%esp),%ecx
97	xorl	%eax,%eax
98/*
99 * do 64 byte chunks first
100 *
101 * XXX this is probably over-unrolled at least for DX2's
102 */
1032:
104	cmpl	$64,%ecx
105	jb	3f
106	movl	%eax,(%edx)
107	movl	%eax,4(%edx)
108	movl	%eax,8(%edx)
109	movl	%eax,12(%edx)
110	movl	%eax,16(%edx)
111	movl	%eax,20(%edx)
112	movl	%eax,24(%edx)
113	movl	%eax,28(%edx)
114	movl	%eax,32(%edx)
115	movl	%eax,36(%edx)
116	movl	%eax,40(%edx)
117	movl	%eax,44(%edx)
118	movl	%eax,48(%edx)
119	movl	%eax,52(%edx)
120	movl	%eax,56(%edx)
121	movl	%eax,60(%edx)
122	addl	$64,%edx
123	subl	$64,%ecx
124	jnz	2b
125	ret
126
127/*
128 * do 16 byte chunks
129 */
130	SUPERALIGN_TEXT
1313:
132	cmpl	$16,%ecx
133	jb	4f
134	movl	%eax,(%edx)
135	movl	%eax,4(%edx)
136	movl	%eax,8(%edx)
137	movl	%eax,12(%edx)
138	addl	$16,%edx
139	subl	$16,%ecx
140	jnz	3b
141	ret
142
143/*
144 * do 4 byte chunks
145 */
146	SUPERALIGN_TEXT
1474:
148	cmpl	$4,%ecx
149	jb	5f
150	movl	%eax,(%edx)
151	addl	$4,%edx
152	subl	$4,%ecx
153	jnz	4b
154	ret
155
156/*
157 * do 1 byte chunks
158 * a jump table seems to be faster than a loop or more range reductions
159 *
160 * XXX need a const section for non-text
161 */
162	.data
163jtab:
164	.long	do0
165	.long	do1
166	.long	do2
167	.long	do3
168
169	.text
170	SUPERALIGN_TEXT
1715:
172	jmp	*jtab(,%ecx,4)
173
174	SUPERALIGN_TEXT
175do3:
176	movw	%ax,(%edx)
177	movb	%al,2(%edx)
178	ret
179
180	SUPERALIGN_TEXT
181do2:
182	movw	%ax,(%edx)
183	ret
184
185	SUPERALIGN_TEXT
186do1:
187	movb	%al,(%edx)
188	ret
189
190	SUPERALIGN_TEXT
191do0:
192	ret
193#endif
194
195#if defined(I586_CPU) && NNPX > 0
196ENTRY(i586_bzero)
197	movl	4(%esp),%edx
198	movl	8(%esp),%ecx
199
200	/*
201	 * The FPU register method is twice as fast as the integer register
202	 * method unless the target is in the L1 cache and we pre-allocate a
203	 * cache line for it (then the integer register method is 4-5 times
204	 * faster).  However, we never pre-allocate cache lines, since that
205	 * would make the integer method 25% or more slower for the common
206	 * case when the target isn't in either the L1 cache or the L2 cache.
207	 * Thus we normally use the FPU register method unless the overhead
208	 * would be too large.
209	 */
210	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
211	jb	intreg_i586_bzero
212
213	/*
214	 * The FPU registers may belong to an application or to fastmove()
215	 * or to another invocation of bcopy() or ourself in a higher level
216	 * interrupt or trap handler.  Preserving the registers is
217	 * complicated since we avoid it if possible at all levels.  We
218	 * want to localize the complications even when that increases them.
219	 * Here the extra work involves preserving CR0_TS in TS.
220	 * `npxproc != NULL' is supposed to be the condition that all the
221	 * FPU resources belong to an application, but npxproc and CR0_TS
222	 * aren't set atomically enough for this condition to work in
223	 * interrupt handlers.
224	 *
225	 * Case 1: FPU registers belong to the application: we must preserve
226	 * the registers if we use them, so we only use the FPU register
227	 * method if the target size is large enough to amortize the extra
228	 * overhead for preserving them.  CR0_TS must be preserved although
229	 * it is very likely to end up as set.
230	 *
231	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
232	 * makes the registers look like they belong to an application so
233	 * that cpu_switch() and savectx() don't have to know about it, so
234	 * this case reduces to case 1.
235	 *
236	 * Case 3: FPU registers belong to the kernel: don't use the FPU
237	 * register method.  This case is unlikely, and supporting it would
238	 * be more complicated and might take too much stack.
239	 *
240	 * Case 4: FPU registers don't belong to anyone: the FPU registers
241	 * don't need to be preserved, so we always use the FPU register
242	 * method.  CR0_TS must be preserved although it is very likely to
243	 * always end up as clear.
244	 */
245	cmpl	$0,_npxproc
246	je	i586_bz1
247	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
248	jb	intreg_i586_bzero
249	sarb	$1,kernel_fpu_lock
250	jc	intreg_i586_bzero
251	smsw	%ax
252	clts
253	subl	$108,%esp
254	fnsave	0(%esp)
255	jmp	i586_bz2
256
257i586_bz1:
258	sarb	$1,kernel_fpu_lock
259	jc	intreg_i586_bzero
260	smsw	%ax
261	clts
262	fninit				/* XXX should avoid needing this */
263i586_bz2:
264	fldz
265
266	/*
267	 * Align to an 8 byte boundary (misalignment in the main loop would
268	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
269	 * already aligned) by always zeroing 8 bytes and using the part up
270	 * to the _next_ alignment position.
271	 */
272	fstl	0(%edx)
273	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
274	addl	$8,%edx
275	andl	$~7,%edx
276	subl	%edx,%ecx
277
278	/*
279	 * Similarly align `len' to a multiple of 8.
280	 */
281	fstl	-8(%edx,%ecx)
282	decl	%ecx
283	andl	$~7,%ecx
284
285	/*
286	 * This wouldn't be any faster if it were unrolled, since the loop
287	 * control instructions are much faster than the fstl and/or done
288	 * in parallel with it so their overhead is insignificant.
289	 */
290fpureg_i586_bzero_loop:
291	fstl	0(%edx)
292	addl	$8,%edx
293	subl	$8,%ecx
294	cmpl	$8,%ecx
295	jae	fpureg_i586_bzero_loop
296
297	cmpl	$0,_npxproc
298	je	i586_bz3
299	frstor	0(%esp)
300	addl	$108,%esp
301	lmsw	%ax
302	movb	$0xfe,kernel_fpu_lock
303	ret
304
305i586_bz3:
306	fstp	%st(0)
307	lmsw	%ax
308	movb	$0xfe,kernel_fpu_lock
309	ret
310
311intreg_i586_bzero:
312	/*
313	 * `rep stos' seems to be the best method in practice for small
314	 * counts.  Fancy methods usually take too long to start up due
315	 * to cache and BTB misses.
316	 */
317	pushl	%edi
318	movl	%edx,%edi
319	xorl	%eax,%eax
320	shrl	$2,%ecx
321	cld
322	rep
323	stosl
324	movl	12(%esp),%ecx
325	andl	$3,%ecx
326	jne	1f
327	popl	%edi
328	ret
329
3301:
331	rep
332	stosb
333	popl	%edi
334	ret
335#endif /* I586_CPU && NNPX > 0 */
336
337ENTRY(i686_pagezero)
338	pushl	%edi
339	pushl	%ebx
340
341	movl	12(%esp), %edi
342	movl	$1024, %ecx
343	cld
344
345	ALIGN_TEXT
3461:
347	xorl	%eax, %eax
348	repe
349	scasl
350	jnz	2f
351
352	popl	%ebx
353	popl	%edi
354	ret
355
356	ALIGN_TEXT
357
3582:
359	incl	%ecx
360	subl	$4, %edi
361
362	movl	%ecx, %edx
363	cmpl	$16, %ecx
364
365	jge	3f
366
367	movl	%edi, %ebx
368	andl	$0x3f, %ebx
369	shrl	%ebx
370	shrl	%ebx
371	movl	$16, %ecx
372	subl	%ebx, %ecx
373
3743:
375	subl	%ecx, %edx
376	rep
377	stosl
378
379	movl	%edx, %ecx
380	testl	%edx, %edx
381	jnz	1b
382
383	popl	%ebx
384	popl	%edi
385	ret
386
387/* fillw(pat, base, cnt) */
388ENTRY(fillw)
389	pushl	%edi
390	movl	8(%esp),%eax
391	movl	12(%esp),%edi
392	movl	16(%esp),%ecx
393	cld
394	rep
395	stosw
396	popl	%edi
397	ret
398
399ENTRY(bcopyb)
400	pushl	%esi
401	pushl	%edi
402	movl	12(%esp),%esi
403	movl	16(%esp),%edi
404	movl	20(%esp),%ecx
405	movl	%edi,%eax
406	subl	%esi,%eax
407	cmpl	%ecx,%eax			/* overlapping && src < dst? */
408	jb	1f
409	cld					/* nope, copy forwards */
410	rep
411	movsb
412	popl	%edi
413	popl	%esi
414	ret
415
416	ALIGN_TEXT
4171:
418	addl	%ecx,%edi			/* copy backwards. */
419	addl	%ecx,%esi
420	decl	%edi
421	decl	%esi
422	std
423	rep
424	movsb
425	popl	%edi
426	popl	%esi
427	cld
428	ret
429
430ENTRY(bcopy)
431	MEXITCOUNT
432	jmp	*_bcopy_vector
433
434ENTRY(ovbcopy)
435	MEXITCOUNT
436	jmp	*_ovbcopy_vector
437
438/*
439 * generic_bcopy(src, dst, cnt)
440 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
441 */
442ENTRY(generic_bcopy)
443	pushl	%esi
444	pushl	%edi
445	movl	12(%esp),%esi
446	movl	16(%esp),%edi
447	movl	20(%esp),%ecx
448
449	movl	%edi,%eax
450	subl	%esi,%eax
451	cmpl	%ecx,%eax			/* overlapping && src < dst? */
452	jb	1f
453
454	shrl	$2,%ecx				/* copy by 32-bit words */
455	cld					/* nope, copy forwards */
456	rep
457	movsl
458	movl	20(%esp),%ecx
459	andl	$3,%ecx				/* any bytes left? */
460	rep
461	movsb
462	popl	%edi
463	popl	%esi
464	ret
465
466	ALIGN_TEXT
4671:
468	addl	%ecx,%edi			/* copy backwards */
469	addl	%ecx,%esi
470	decl	%edi
471	decl	%esi
472	andl	$3,%ecx				/* any fractional bytes? */
473	std
474	rep
475	movsb
476	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
477	shrl	$2,%ecx
478	subl	$3,%esi
479	subl	$3,%edi
480	rep
481	movsl
482	popl	%edi
483	popl	%esi
484	cld
485	ret
486
487#if defined(I586_CPU) && NNPX > 0
488ENTRY(i586_bcopy)
489	pushl	%esi
490	pushl	%edi
491	movl	12(%esp),%esi
492	movl	16(%esp),%edi
493	movl	20(%esp),%ecx
494
495	movl	%edi,%eax
496	subl	%esi,%eax
497	cmpl	%ecx,%eax			/* overlapping && src < dst? */
498	jb	1f
499
500	cmpl	$1024,%ecx
501	jb	small_i586_bcopy
502
503	sarb	$1,kernel_fpu_lock
504	jc	small_i586_bcopy
505	cmpl	$0,_npxproc
506	je	i586_bc1
507	smsw	%dx
508	clts
509	subl	$108,%esp
510	fnsave	0(%esp)
511	jmp	4f
512
513i586_bc1:
514	smsw	%dx
515	clts
516	fninit				/* XXX should avoid needing this */
517
518	ALIGN_TEXT
5194:
520	pushl	%ecx
521#define	DCACHE_SIZE	8192
522	cmpl	$(DCACHE_SIZE-512)/2,%ecx
523	jbe	2f
524	movl	$(DCACHE_SIZE-512)/2,%ecx
5252:
526	subl	%ecx,0(%esp)
527	cmpl	$256,%ecx
528	jb	5f			/* XXX should prefetch if %ecx >= 32 */
529	pushl	%esi
530	pushl	%ecx
531	ALIGN_TEXT
5323:
533	movl	0(%esi),%eax
534	movl	32(%esi),%eax
535	movl	64(%esi),%eax
536	movl	96(%esi),%eax
537	movl	128(%esi),%eax
538	movl	160(%esi),%eax
539	movl	192(%esi),%eax
540	movl	224(%esi),%eax
541	addl	$256,%esi
542	subl	$256,%ecx
543	cmpl	$256,%ecx
544	jae	3b
545	popl	%ecx
546	popl	%esi
5475:
548	ALIGN_TEXT
549large_i586_bcopy_loop:
550	fildq	0(%esi)
551	fildq	8(%esi)
552	fildq	16(%esi)
553	fildq	24(%esi)
554	fildq	32(%esi)
555	fildq	40(%esi)
556	fildq	48(%esi)
557	fildq	56(%esi)
558	fistpq	56(%edi)
559	fistpq	48(%edi)
560	fistpq	40(%edi)
561	fistpq	32(%edi)
562	fistpq	24(%edi)
563	fistpq	16(%edi)
564	fistpq	8(%edi)
565	fistpq	0(%edi)
566	addl	$64,%esi
567	addl	$64,%edi
568	subl	$64,%ecx
569	cmpl	$64,%ecx
570	jae	large_i586_bcopy_loop
571	popl	%eax
572	addl	%eax,%ecx
573	cmpl	$64,%ecx
574	jae	4b
575
576	cmpl	$0,_npxproc
577	je	i586_bc2
578	frstor	0(%esp)
579	addl	$108,%esp
580i586_bc2:
581	lmsw	%dx
582	movb	$0xfe,kernel_fpu_lock
583
584/*
585 * This is a duplicate of the main part of generic_bcopy.  See the comments
586 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
587 * would mess up high resolution profiling.
588 */
589	ALIGN_TEXT
590small_i586_bcopy:
591	shrl	$2,%ecx
592	cld
593	rep
594	movsl
595	movl	20(%esp),%ecx
596	andl	$3,%ecx
597	rep
598	movsb
599	popl	%edi
600	popl	%esi
601	ret
602
603	ALIGN_TEXT
6041:
605	addl	%ecx,%edi
606	addl	%ecx,%esi
607	decl	%edi
608	decl	%esi
609	andl	$3,%ecx
610	std
611	rep
612	movsb
613	movl	20(%esp),%ecx
614	shrl	$2,%ecx
615	subl	$3,%esi
616	subl	$3,%edi
617	rep
618	movsl
619	popl	%edi
620	popl	%esi
621	cld
622	ret
623#endif /* I586_CPU && NNPX > 0 */
624
625/*
626 * Note: memcpy does not support overlapping copies
627 */
628ENTRY(memcpy)
629	pushl	%edi
630	pushl	%esi
631	movl	12(%esp),%edi
632	movl	16(%esp),%esi
633	movl	20(%esp),%ecx
634	movl	%edi,%eax
635	shrl	$2,%ecx				/* copy by 32-bit words */
636	cld					/* nope, copy forwards */
637	rep
638	movsl
639	movl	20(%esp),%ecx
640	andl	$3,%ecx				/* any bytes left? */
641	rep
642	movsb
643	popl	%esi
644	popl	%edi
645	ret
646
647
648/*****************************************************************************/
649/* copyout and fubyte family                                                 */
650/*****************************************************************************/
651/*
652 * Access user memory from inside the kernel. These routines and possibly
653 * the math- and DOS emulators should be the only places that do this.
654 *
655 * We have to access the memory with user's permissions, so use a segment
656 * selector with RPL 3. For writes to user space we have to additionally
657 * check the PTE for write permission, because the 386 does not check
658 * write permissions when we are executing with EPL 0. The 486 does check
659 * this if the WP bit is set in CR0, so we can use a simpler version here.
660 *
661 * These routines set curpcb->onfault for the time they execute. When a
662 * protection violation occurs inside the functions, the trap handler
663 * returns to *curpcb->onfault instead of the function.
664 */
665
666/*
667 * copyout(from_kernel, to_user, len)  - MP SAFE (if not I386_CPU)
668 */
669ENTRY(copyout)
670	MEXITCOUNT
671	jmp	*_copyout_vector
672
673ENTRY(generic_copyout)
674	movl	_curpcb,%eax
675	movl	$copyout_fault,PCB_ONFAULT(%eax)
676	pushl	%esi
677	pushl	%edi
678	pushl	%ebx
679	movl	16(%esp),%esi
680	movl	20(%esp),%edi
681	movl	24(%esp),%ebx
682	testl	%ebx,%ebx			/* anything to do? */
683	jz	done_copyout
684
685	/*
686	 * Check explicitly for non-user addresses.  If 486 write protection
687	 * is being used, this check is essential because we are in kernel
688	 * mode so the h/w does not provide any protection against writing
689	 * kernel addresses.
690	 */
691
692	/*
693	 * First, prevent address wrapping.
694	 */
695	movl	%edi,%eax
696	addl	%ebx,%eax
697	jc	copyout_fault
698/*
699 * XXX STOP USING VM_MAXUSER_ADDRESS.
700 * It is an end address, not a max, so every time it is used correctly it
701 * looks like there is an off by one error, and of course it caused an off
702 * by one error in several places.
703 */
704	cmpl	$VM_MAXUSER_ADDRESS,%eax
705	ja	copyout_fault
706
707#if defined(I386_CPU)
708
709#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
710	cmpl	$CPUCLASS_386,_cpu_class
711	jne	3f
712#endif
713/*
714 * We have to check each PTE for user write permission.
715 * The checking may cause a page fault, so it is important to set
716 * up everything for return via copyout_fault before here.
717 */
718	/* compute number of pages */
719	movl	%edi,%ecx
720	andl	$PAGE_MASK,%ecx
721	addl	%ebx,%ecx
722	decl	%ecx
723	shrl	$IDXSHIFT+2,%ecx
724	incl	%ecx
725
726	/* compute PTE offset for start address */
727	movl	%edi,%edx
728	shrl	$IDXSHIFT,%edx
729	andb	$0xfc,%dl
730
7311:
732	/* check PTE for each page */
733	leal	_PTmap(%edx),%eax
734	shrl	$IDXSHIFT,%eax
735	andb	$0xfc,%al
736	testb	$PG_V,_PTmap(%eax)		/* PTE page must be valid */
737	je	4f
738	movb	_PTmap(%edx),%al
739	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
740	cmpb	$PG_V|PG_RW|PG_U,%al
741	je	2f
742
7434:
744	/* simulate a trap */
745	pushl	%edx
746	pushl	%ecx
747	shll	$IDXSHIFT,%edx
748	pushl	%edx
749	call	_trapwrite			/* trapwrite(addr) */
750	popl	%edx
751	popl	%ecx
752	popl	%edx
753
754	testl	%eax,%eax			/* if not ok, return EFAULT */
755	jnz	copyout_fault
756
7572:
758	addl	$4,%edx
759	decl	%ecx
760	jnz	1b				/* check next page */
761#endif /* I386_CPU */
762
763	/* bcopy(%esi, %edi, %ebx) */
7643:
765	movl	%ebx,%ecx
766
767#if defined(I586_CPU) && NNPX > 0
768	ALIGN_TEXT
769slow_copyout:
770#endif
771	shrl	$2,%ecx
772	cld
773	rep
774	movsl
775	movb	%bl,%cl
776	andb	$3,%cl
777	rep
778	movsb
779
780done_copyout:
781	popl	%ebx
782	popl	%edi
783	popl	%esi
784	xorl	%eax,%eax
785	movl	_curpcb,%edx
786	movl	%eax,PCB_ONFAULT(%edx)
787	ret
788
789	ALIGN_TEXT
790copyout_fault:
791	popl	%ebx
792	popl	%edi
793	popl	%esi
794	movl	_curpcb,%edx
795	movl	$0,PCB_ONFAULT(%edx)
796	movl	$EFAULT,%eax
797	ret
798
799#if defined(I586_CPU) && NNPX > 0
800ENTRY(i586_copyout)
801	/*
802	 * Duplicated from generic_copyout.  Could be done a bit better.
803	 */
804	movl	_curpcb,%eax
805	movl	$copyout_fault,PCB_ONFAULT(%eax)
806	pushl	%esi
807	pushl	%edi
808	pushl	%ebx
809	movl	16(%esp),%esi
810	movl	20(%esp),%edi
811	movl	24(%esp),%ebx
812	testl	%ebx,%ebx			/* anything to do? */
813	jz	done_copyout
814
815	/*
816	 * Check explicitly for non-user addresses.  If 486 write protection
817	 * is being used, this check is essential because we are in kernel
818	 * mode so the h/w does not provide any protection against writing
819	 * kernel addresses.
820	 */
821
822	/*
823	 * First, prevent address wrapping.
824	 */
825	movl	%edi,%eax
826	addl	%ebx,%eax
827	jc	copyout_fault
828/*
829 * XXX STOP USING VM_MAXUSER_ADDRESS.
830 * It is an end address, not a max, so every time it is used correctly it
831 * looks like there is an off by one error, and of course it caused an off
832 * by one error in several places.
833 */
834	cmpl	$VM_MAXUSER_ADDRESS,%eax
835	ja	copyout_fault
836
837	/* bcopy(%esi, %edi, %ebx) */
8383:
839	movl	%ebx,%ecx
840	/*
841	 * End of duplicated code.
842	 */
843
844	cmpl	$1024,%ecx
845	jb	slow_copyout
846
847	pushl	%ecx
848	call	_fastmove
849	addl	$4,%esp
850	jmp	done_copyout
851#endif /* I586_CPU && NNPX > 0 */
852
853/*
854 * copyin(from_user, to_kernel, len) - MP SAFE
855 */
856ENTRY(copyin)
857	MEXITCOUNT
858	jmp	*_copyin_vector
859
860ENTRY(generic_copyin)
861	movl	_curpcb,%eax
862	movl	$copyin_fault,PCB_ONFAULT(%eax)
863	pushl	%esi
864	pushl	%edi
865	movl	12(%esp),%esi			/* caddr_t from */
866	movl	16(%esp),%edi			/* caddr_t to */
867	movl	20(%esp),%ecx			/* size_t  len */
868
869	/*
870	 * make sure address is valid
871	 */
872	movl	%esi,%edx
873	addl	%ecx,%edx
874	jc	copyin_fault
875	cmpl	$VM_MAXUSER_ADDRESS,%edx
876	ja	copyin_fault
877
878#if defined(I586_CPU) && NNPX > 0
879	ALIGN_TEXT
880slow_copyin:
881#endif
882	movb	%cl,%al
883	shrl	$2,%ecx				/* copy longword-wise */
884	cld
885	rep
886	movsl
887	movb	%al,%cl
888	andb	$3,%cl				/* copy remaining bytes */
889	rep
890	movsb
891
892#if defined(I586_CPU) && NNPX > 0
893	ALIGN_TEXT
894done_copyin:
895#endif
896	popl	%edi
897	popl	%esi
898	xorl	%eax,%eax
899	movl	_curpcb,%edx
900	movl	%eax,PCB_ONFAULT(%edx)
901	ret
902
903	ALIGN_TEXT
904copyin_fault:
905	popl	%edi
906	popl	%esi
907	movl	_curpcb,%edx
908	movl	$0,PCB_ONFAULT(%edx)
909	movl	$EFAULT,%eax
910	ret
911
912#if defined(I586_CPU) && NNPX > 0
913ENTRY(i586_copyin)
914	/*
915	 * Duplicated from generic_copyin.  Could be done a bit better.
916	 */
917	movl	_curpcb,%eax
918	movl	$copyin_fault,PCB_ONFAULT(%eax)
919	pushl	%esi
920	pushl	%edi
921	movl	12(%esp),%esi			/* caddr_t from */
922	movl	16(%esp),%edi			/* caddr_t to */
923	movl	20(%esp),%ecx			/* size_t  len */
924
925	/*
926	 * make sure address is valid
927	 */
928	movl	%esi,%edx
929	addl	%ecx,%edx
930	jc	copyin_fault
931	cmpl	$VM_MAXUSER_ADDRESS,%edx
932	ja	copyin_fault
933	/*
934	 * End of duplicated code.
935	 */
936
937	cmpl	$1024,%ecx
938	jb	slow_copyin
939
940	pushl	%ebx			/* XXX prepare for fastmove_fault */
941	pushl	%ecx
942	call	_fastmove
943	addl	$8,%esp
944	jmp	done_copyin
945#endif /* I586_CPU && NNPX > 0 */
946
947#if defined(I586_CPU) && NNPX > 0
948/* fastmove(src, dst, len)
949	src in %esi
950	dst in %edi
951	len in %ecx		XXX changed to on stack for profiling
952	uses %eax and %edx for tmp. storage
953 */
954/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
955ENTRY(fastmove)
956	pushl	%ebp
957	movl	%esp,%ebp
958	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
959
960	movl	8(%ebp),%ecx
961	cmpl	$63,%ecx
962	jbe	fastmove_tail
963
964	testl	$7,%esi	/* check if src addr is multiple of 8 */
965	jnz	fastmove_tail
966
967	testl	$7,%edi	/* check if dst addr is multiple of 8 */
968	jnz	fastmove_tail
969
970/* if (npxproc != NULL) { */
971	cmpl	$0,_npxproc
972	je	6f
973/*    fnsave(&curpcb->pcb_savefpu); */
974	movl	_curpcb,%eax
975	fnsave	PCB_SAVEFPU(%eax)
976/*   npxproc = NULL; */
977	movl	$0,_npxproc
978/* } */
9796:
980/* now we own the FPU. */
981
982/*
983 * The process' FP state is saved in the pcb, but if we get
984 * switched, the cpu_switch() will store our FP state in the
985 * pcb.  It should be possible to avoid all the copying for
986 * this, e.g., by setting a flag to tell cpu_switch() to
987 * save the state somewhere else.
988 */
989/* tmp = curpcb->pcb_savefpu; */
990	movl	%ecx,-12(%ebp)
991	movl	%esi,-8(%ebp)
992	movl	%edi,-4(%ebp)
993	movl	%esp,%edi
994	movl	_curpcb,%esi
995	addl	$PCB_SAVEFPU,%esi
996	cld
997	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
998	rep
999	movsl
1000	movl	-12(%ebp),%ecx
1001	movl	-8(%ebp),%esi
1002	movl	-4(%ebp),%edi
1003/* stop_emulating(); */
1004	clts
1005/* npxproc = curproc; */
1006	movl	_curproc,%eax
1007	movl	%eax,_npxproc
1008	movl	_curpcb,%eax
1009	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10104:
1011	movl	%ecx,-12(%ebp)
1012	cmpl	$1792,%ecx
1013	jbe	2f
1014	movl	$1792,%ecx
10152:
1016	subl	%ecx,-12(%ebp)
1017	cmpl	$256,%ecx
1018	jb	5f
1019	movl	%ecx,-8(%ebp)
1020	movl	%esi,-4(%ebp)
1021	ALIGN_TEXT
10223:
1023	movl	0(%esi),%eax
1024	movl	32(%esi),%eax
1025	movl	64(%esi),%eax
1026	movl	96(%esi),%eax
1027	movl	128(%esi),%eax
1028	movl	160(%esi),%eax
1029	movl	192(%esi),%eax
1030	movl	224(%esi),%eax
1031	addl	$256,%esi
1032	subl	$256,%ecx
1033	cmpl	$256,%ecx
1034	jae	3b
1035	movl	-8(%ebp),%ecx
1036	movl	-4(%ebp),%esi
10375:
1038	ALIGN_TEXT
1039fastmove_loop:
1040	fildq	0(%esi)
1041	fildq	8(%esi)
1042	fildq	16(%esi)
1043	fildq	24(%esi)
1044	fildq	32(%esi)
1045	fildq	40(%esi)
1046	fildq	48(%esi)
1047	fildq	56(%esi)
1048	fistpq	56(%edi)
1049	fistpq	48(%edi)
1050	fistpq	40(%edi)
1051	fistpq	32(%edi)
1052	fistpq	24(%edi)
1053	fistpq	16(%edi)
1054	fistpq	8(%edi)
1055	fistpq	0(%edi)
1056	addl	$-64,%ecx
1057	addl	$64,%esi
1058	addl	$64,%edi
1059	cmpl	$63,%ecx
1060	ja	fastmove_loop
1061	movl	-12(%ebp),%eax
1062	addl	%eax,%ecx
1063	cmpl	$64,%ecx
1064	jae	4b
1065
1066/* curpcb->pcb_savefpu = tmp; */
1067	movl	%ecx,-12(%ebp)
1068	movl	%esi,-8(%ebp)
1069	movl	%edi,-4(%ebp)
1070	movl	_curpcb,%edi
1071	addl	$PCB_SAVEFPU,%edi
1072	movl	%esp,%esi
1073	cld
1074	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1075	rep
1076	movsl
1077	movl	-12(%ebp),%ecx
1078	movl	-8(%ebp),%esi
1079	movl	-4(%ebp),%edi
1080
1081/* start_emulating(); */
1082	smsw	%ax
1083	orb	$CR0_TS,%al
1084	lmsw	%ax
1085/* npxproc = NULL; */
1086	movl	$0,_npxproc
1087
1088	ALIGN_TEXT
1089fastmove_tail:
1090	movl	_curpcb,%eax
1091	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1092
1093	movb	%cl,%al
1094	shrl	$2,%ecx				/* copy longword-wise */
1095	cld
1096	rep
1097	movsl
1098	movb	%al,%cl
1099	andb	$3,%cl				/* copy remaining bytes */
1100	rep
1101	movsb
1102
1103	movl	%ebp,%esp
1104	popl	%ebp
1105	ret
1106
1107	ALIGN_TEXT
1108fastmove_fault:
1109	movl	_curpcb,%edi
1110	addl	$PCB_SAVEFPU,%edi
1111	movl	%esp,%esi
1112	cld
1113	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1114	rep
1115	movsl
1116
1117	smsw	%ax
1118	orb	$CR0_TS,%al
1119	lmsw	%ax
1120	movl	$0,_npxproc
1121
1122fastmove_tail_fault:
1123	movl	%ebp,%esp
1124	popl	%ebp
1125	addl	$8,%esp
1126	popl	%ebx
1127	popl	%edi
1128	popl	%esi
1129	movl	_curpcb,%edx
1130	movl	$0,PCB_ONFAULT(%edx)
1131	movl	$EFAULT,%eax
1132	ret
1133#endif /* I586_CPU && NNPX > 0 */
1134
1135/*
1136 * fu{byte,sword,word} - MP SAFE
1137 *
1138 *	Fetch a byte (sword, word) from user memory
1139 */
1140ENTRY(fuword)
1141	movl	_curpcb,%ecx
1142	movl	$fusufault,PCB_ONFAULT(%ecx)
1143	movl	4(%esp),%edx			/* from */
1144
1145	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1146	ja	fusufault
1147
1148	movl	(%edx),%eax
1149	movl	$0,PCB_ONFAULT(%ecx)
1150	ret
1151
1152/*
1153 * These two routines are called from the profiling code, potentially
1154 * at interrupt time. If they fail, that's okay, good things will
1155 * happen later. Fail all the time for now - until the trap code is
1156 * able to deal with this.
1157 */
1158ALTENTRY(suswintr)
1159ENTRY(fuswintr)
1160	movl	$-1,%eax
1161	ret
1162
1163/*
1164 * fusword - MP SAFE
1165 */
1166ENTRY(fusword)
1167	movl	_curpcb,%ecx
1168	movl	$fusufault,PCB_ONFAULT(%ecx)
1169	movl	4(%esp),%edx
1170
1171	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1172	ja	fusufault
1173
1174	movzwl	(%edx),%eax
1175	movl	$0,PCB_ONFAULT(%ecx)
1176	ret
1177
1178/*
1179 * fubyte - MP SAFE
1180 */
1181ENTRY(fubyte)
1182	movl	_curpcb,%ecx
1183	movl	$fusufault,PCB_ONFAULT(%ecx)
1184	movl	4(%esp),%edx
1185
1186	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1187	ja	fusufault
1188
1189	movzbl	(%edx),%eax
1190	movl	$0,PCB_ONFAULT(%ecx)
1191	ret
1192
1193	ALIGN_TEXT
1194fusufault:
1195	movl	_curpcb,%ecx
1196	xorl	%eax,%eax
1197	movl	%eax,PCB_ONFAULT(%ecx)
1198	decl	%eax
1199	ret
1200
1201/*
1202 * su{byte,sword,word} - MP SAFE (if not I386_CPU)
1203 *
1204 *	Write a byte (word, longword) to user memory
1205 */
1206ENTRY(suword)
1207	movl	_curpcb,%ecx
1208	movl	$fusufault,PCB_ONFAULT(%ecx)
1209	movl	4(%esp),%edx
1210
1211#if defined(I386_CPU)
1212
1213#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1214	cmpl	$CPUCLASS_386,_cpu_class
1215	jne	2f				/* we only have to set the right segment selector */
1216#endif /* I486_CPU || I586_CPU || I686_CPU */
1217
1218	/* XXX - page boundary crossing is still not handled */
1219	movl	%edx,%eax
1220	shrl	$IDXSHIFT,%edx
1221	andb	$0xfc,%dl
1222
1223	leal	_PTmap(%edx),%ecx
1224	shrl	$IDXSHIFT,%ecx
1225	andb	$0xfc,%cl
1226	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1227	je	4f
1228	movb	_PTmap(%edx),%dl
1229	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1230	cmpb	$PG_V|PG_RW|PG_U,%dl
1231	je	1f
1232
12334:
1234	/* simulate a trap */
1235	pushl	%eax
1236	call	_trapwrite
1237	popl	%edx				/* remove junk parameter from stack */
1238	testl	%eax,%eax
1239	jnz	fusufault
12401:
1241	movl	4(%esp),%edx
1242#endif
1243
12442:
1245	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1246	ja	fusufault
1247
1248	movl	8(%esp),%eax
1249	movl	%eax,(%edx)
1250	xorl	%eax,%eax
1251	movl	_curpcb,%ecx
1252	movl	%eax,PCB_ONFAULT(%ecx)
1253	ret
1254
1255/*
1256 * susword - MP SAFE (if not I386_CPU)
1257 */
1258ENTRY(susword)
1259	movl	_curpcb,%ecx
1260	movl	$fusufault,PCB_ONFAULT(%ecx)
1261	movl	4(%esp),%edx
1262
1263#if defined(I386_CPU)
1264
1265#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1266	cmpl	$CPUCLASS_386,_cpu_class
1267	jne	2f
1268#endif /* I486_CPU || I586_CPU || I686_CPU */
1269
1270	/* XXX - page boundary crossing is still not handled */
1271	movl	%edx,%eax
1272	shrl	$IDXSHIFT,%edx
1273	andb	$0xfc,%dl
1274
1275	leal	_PTmap(%edx),%ecx
1276	shrl	$IDXSHIFT,%ecx
1277	andb	$0xfc,%cl
1278	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1279	je	4f
1280	movb	_PTmap(%edx),%dl
1281	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1282	cmpb	$PG_V|PG_RW|PG_U,%dl
1283	je	1f
1284
12854:
1286	/* simulate a trap */
1287	pushl	%eax
1288	call	_trapwrite
1289	popl	%edx				/* remove junk parameter from stack */
1290	testl	%eax,%eax
1291	jnz	fusufault
12921:
1293	movl	4(%esp),%edx
1294#endif
1295
12962:
1297	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1298	ja	fusufault
1299
1300	movw	8(%esp),%ax
1301	movw	%ax,(%edx)
1302	xorl	%eax,%eax
1303	movl	_curpcb,%ecx			/* restore trashed register */
1304	movl	%eax,PCB_ONFAULT(%ecx)
1305	ret
1306
1307/*
1308 * su[i]byte - MP SAFE (if not I386_CPU)
1309 */
1310ALTENTRY(suibyte)
1311ENTRY(subyte)
1312	movl	_curpcb,%ecx
1313	movl	$fusufault,PCB_ONFAULT(%ecx)
1314	movl	4(%esp),%edx
1315
1316#if defined(I386_CPU)
1317
1318#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1319	cmpl	$CPUCLASS_386,_cpu_class
1320	jne	2f
1321#endif /* I486_CPU || I586_CPU || I686_CPU */
1322
1323	movl	%edx,%eax
1324	shrl	$IDXSHIFT,%edx
1325	andb	$0xfc,%dl
1326
1327	leal	_PTmap(%edx),%ecx
1328	shrl	$IDXSHIFT,%ecx
1329	andb	$0xfc,%cl
1330	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1331	je	4f
1332	movb	_PTmap(%edx),%dl
1333	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1334	cmpb	$PG_V|PG_RW|PG_U,%dl
1335	je	1f
1336
13374:
1338	/* simulate a trap */
1339	pushl	%eax
1340	call	_trapwrite
1341	popl	%edx				/* remove junk parameter from stack */
1342	testl	%eax,%eax
1343	jnz	fusufault
13441:
1345	movl	4(%esp),%edx
1346#endif
1347
13482:
1349	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1350	ja	fusufault
1351
1352	movb	8(%esp),%al
1353	movb	%al,(%edx)
1354	xorl	%eax,%eax
1355	movl	_curpcb,%ecx			/* restore trashed register */
1356	movl	%eax,PCB_ONFAULT(%ecx)
1357	ret
1358
1359/*
1360 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
1361 *
1362 *	copy a string from from to to, stop when a 0 character is reached.
1363 *	return ENAMETOOLONG if string is longer than maxlen, and
1364 *	EFAULT on protection violations. If lencopied is non-zero,
1365 *	return the actual length in *lencopied.
1366 */
1367ENTRY(copyinstr)
1368	pushl	%esi
1369	pushl	%edi
1370	movl	_curpcb,%ecx
1371	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1372
1373	movl	12(%esp),%esi			/* %esi = from */
1374	movl	16(%esp),%edi			/* %edi = to */
1375	movl	20(%esp),%edx			/* %edx = maxlen */
1376
1377	movl	$VM_MAXUSER_ADDRESS,%eax
1378
1379	/* make sure 'from' is within bounds */
1380	subl	%esi,%eax
1381	jbe	cpystrflt
1382
1383	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1384	cmpl	%edx,%eax
1385	jae	1f
1386	movl	%eax,%edx
1387	movl	%eax,20(%esp)
13881:
1389	incl	%edx
1390	cld
1391
13922:
1393	decl	%edx
1394	jz	3f
1395
1396	lodsb
1397	stosb
1398	orb	%al,%al
1399	jnz	2b
1400
1401	/* Success -- 0 byte reached */
1402	decl	%edx
1403	xorl	%eax,%eax
1404	jmp	cpystrflt_x
14053:
1406	/* edx is zero - return ENAMETOOLONG or EFAULT */
1407	cmpl	$VM_MAXUSER_ADDRESS,%esi
1408	jae	cpystrflt
14094:
1410	movl	$ENAMETOOLONG,%eax
1411	jmp	cpystrflt_x
1412
1413cpystrflt:
1414	movl	$EFAULT,%eax
1415
1416cpystrflt_x:
1417	/* set *lencopied and return %eax */
1418	movl	_curpcb,%ecx
1419	movl	$0,PCB_ONFAULT(%ecx)
1420	movl	20(%esp),%ecx
1421	subl	%edx,%ecx
1422	movl	24(%esp),%edx
1423	testl	%edx,%edx
1424	jz	1f
1425	movl	%ecx,(%edx)
14261:
1427	popl	%edi
1428	popl	%esi
1429	ret
1430
1431
1432/*
1433 * copystr(from, to, maxlen, int *lencopied) - MP SAFE
1434 */
1435ENTRY(copystr)
1436	pushl	%esi
1437	pushl	%edi
1438
1439	movl	12(%esp),%esi			/* %esi = from */
1440	movl	16(%esp),%edi			/* %edi = to */
1441	movl	20(%esp),%edx			/* %edx = maxlen */
1442	incl	%edx
1443	cld
14441:
1445	decl	%edx
1446	jz	4f
1447	lodsb
1448	stosb
1449	orb	%al,%al
1450	jnz	1b
1451
1452	/* Success -- 0 byte reached */
1453	decl	%edx
1454	xorl	%eax,%eax
1455	jmp	6f
14564:
1457	/* edx is zero -- return ENAMETOOLONG */
1458	movl	$ENAMETOOLONG,%eax
1459
14606:
1461	/* set *lencopied and return %eax */
1462	movl	20(%esp),%ecx
1463	subl	%edx,%ecx
1464	movl	24(%esp),%edx
1465	testl	%edx,%edx
1466	jz	7f
1467	movl	%ecx,(%edx)
14687:
1469	popl	%edi
1470	popl	%esi
1471	ret
1472
1473ENTRY(bcmp)
1474	pushl	%edi
1475	pushl	%esi
1476	movl	12(%esp),%edi
1477	movl	16(%esp),%esi
1478	movl	20(%esp),%edx
1479	xorl	%eax,%eax
1480
1481	movl	%edx,%ecx
1482	shrl	$2,%ecx
1483	cld					/* compare forwards */
1484	repe
1485	cmpsl
1486	jne	1f
1487
1488	movl	%edx,%ecx
1489	andl	$3,%ecx
1490	repe
1491	cmpsb
1492	je	2f
14931:
1494	incl	%eax
14952:
1496	popl	%esi
1497	popl	%edi
1498	ret
1499
1500
1501/*
1502 * Handling of special 386 registers and descriptor tables etc
1503 */
1504/* void lgdt(struct region_descriptor *rdp); */
1505ENTRY(lgdt)
1506	/* reload the descriptor table */
1507	movl	4(%esp),%eax
1508	lgdt	(%eax)
1509
1510	/* flush the prefetch q */
1511	jmp	1f
1512	nop
15131:
1514	/* reload "stale" selectors */
1515	movl	$KDSEL,%eax
1516	mov	%ax,%ds
1517	mov	%ax,%es
1518	mov	%ax,%gs
1519	mov	%ax,%ss
1520#ifdef SMP
1521	movl	$KPSEL,%eax
1522#endif
1523	mov	%ax,%fs
1524
1525	/* reload code selector by turning return into intersegmental return */
1526	movl	(%esp),%eax
1527	pushl	%eax
1528	movl	$KCSEL,4(%esp)
1529	lret
1530
1531/*
1532 * void lidt(struct region_descriptor *rdp);
1533 */
1534ENTRY(lidt)
1535	movl	4(%esp),%eax
1536	lidt	(%eax)
1537	ret
1538
1539/*
1540 * void lldt(u_short sel)
1541 */
1542ENTRY(lldt)
1543	lldt	4(%esp)
1544	ret
1545
1546/*
1547 * void ltr(u_short sel)
1548 */
1549ENTRY(ltr)
1550	ltr	4(%esp)
1551	ret
1552
1553/* ssdtosd(*ssdp,*sdp) */
1554ENTRY(ssdtosd)
1555	pushl	%ebx
1556	movl	8(%esp),%ecx
1557	movl	8(%ecx),%ebx
1558	shll	$16,%ebx
1559	movl	(%ecx),%edx
1560	roll	$16,%edx
1561	movb	%dh,%bl
1562	movb	%dl,%bh
1563	rorl	$8,%ebx
1564	movl	4(%ecx),%eax
1565	movw	%ax,%dx
1566	andl	$0xf0000,%eax
1567	orl	%eax,%ebx
1568	movl	12(%esp),%ecx
1569	movl	%edx,(%ecx)
1570	movl	%ebx,4(%ecx)
1571	popl	%ebx
1572	ret
1573
1574/* load_cr0(cr0) */
1575ENTRY(load_cr0)
1576	movl	4(%esp),%eax
1577	movl	%eax,%cr0
1578	ret
1579
1580/* rcr0() */
1581ENTRY(rcr0)
1582	movl	%cr0,%eax
1583	ret
1584
1585/* rcr3() */
1586ENTRY(rcr3)
1587	movl	%cr3,%eax
1588	ret
1589
1590/* void load_cr3(caddr_t cr3) */
1591ENTRY(load_cr3)
1592#if defined(SWTCH_OPTIM_STATS)
1593	incl	_tlb_flush_count
1594#endif
1595	movl	4(%esp),%eax
1596	movl	%eax,%cr3
1597	ret
1598
1599/* rcr4() */
1600ENTRY(rcr4)
1601	movl	%cr4,%eax
1602	ret
1603
1604/* void load_cr4(caddr_t cr4) */
1605ENTRY(load_cr4)
1606	movl	4(%esp),%eax
1607	movl	%eax,%cr4
1608	ret
1609
1610/* void load_dr6(u_int dr6) */
1611ENTRY(load_dr6)
1612	movl    4(%esp),%eax
1613	movl    %eax,%dr6
1614	ret
1615
1616/* void reset_dbregs() */
1617ENTRY(reset_dbregs)
1618	movl    $0,%eax
1619	movl    %eax,%dr7     /* disable all breapoints first */
1620	movl    %eax,%dr0
1621	movl    %eax,%dr1
1622	movl    %eax,%dr2
1623	movl    %eax,%dr3
1624	movl    %eax,%dr6
1625	ret
1626
1627/*****************************************************************************/
1628/* setjump, longjump                                                         */
1629/*****************************************************************************/
1630
1631ENTRY(setjmp)
1632	movl	4(%esp),%eax
1633	movl	%ebx,(%eax)			/* save ebx */
1634	movl	%esp,4(%eax)			/* save esp */
1635	movl	%ebp,8(%eax)			/* save ebp */
1636	movl	%esi,12(%eax)			/* save esi */
1637	movl	%edi,16(%eax)			/* save edi */
1638	movl	(%esp),%edx			/* get rta */
1639	movl	%edx,20(%eax)			/* save eip */
1640	xorl	%eax,%eax			/* return(0); */
1641	ret
1642
1643ENTRY(longjmp)
1644	movl	4(%esp),%eax
1645	movl	(%eax),%ebx			/* restore ebx */
1646	movl	4(%eax),%esp			/* restore esp */
1647	movl	8(%eax),%ebp			/* restore ebp */
1648	movl	12(%eax),%esi			/* restore esi */
1649	movl	16(%eax),%edi			/* restore edi */
1650	movl	20(%eax),%edx			/* get rta */
1651	movl	%edx,(%esp)			/* put in return frame */
1652	xorl	%eax,%eax			/* return(1); */
1653	incl	%eax
1654	ret
1655
1656/*
1657 * Support for BB-profiling (gcc -a).  The kernbb program will extract
1658 * the data from the kernel.
1659 */
1660
1661	.data
1662	ALIGN_DATA
1663	.globl bbhead
1664bbhead:
1665	.long 0
1666
1667	.text
1668NON_GPROF_ENTRY(__bb_init_func)
1669	movl	4(%esp),%eax
1670	movl	$1,(%eax)
1671	movl	bbhead,%edx
1672	movl	%edx,16(%eax)
1673	movl	%eax,bbhead
1674	.byte	0xc3				/* avoid macro for `ret' */
1675