support.s revision 58941
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD: head/sys/i386/i386/support.s 58941 2000-04-02 17:52:43Z dillon $
34 */
35
36#include "opt_smp.h"
37#include "npx.h"
38
39#include <machine/asmacros.h>
40#include <machine/cputypes.h>
41#include <machine/pmap.h>
42#include <machine/specialreg.h>
43
44#include "assym.s"
45
46#define IDXSHIFT	10
47
48	.data
49	.globl	_bcopy_vector
50_bcopy_vector:
51	.long	_generic_bcopy
52	.globl	_bzero
53_bzero:
54	.long	_generic_bzero
55	.globl	_copyin_vector
56_copyin_vector:
57	.long	_generic_copyin
58	.globl	_copyout_vector
59_copyout_vector:
60	.long	_generic_copyout
61	.globl	_ovbcopy_vector
62_ovbcopy_vector:
63	.long	_generic_bcopy
64#if defined(I586_CPU) && NNPX > 0
65kernel_fpu_lock:
66	.byte	0xfe
67	.space	3
68#endif
69
70	.text
71
72/*
73 * bcopy family
74 * void bzero(void *buf, u_int len)
75 */
76
77ENTRY(generic_bzero)
78	pushl	%edi
79	movl	8(%esp),%edi
80	movl	12(%esp),%ecx
81	xorl	%eax,%eax
82	shrl	$2,%ecx
83	cld
84	rep
85	stosl
86	movl	12(%esp),%ecx
87	andl	$3,%ecx
88	rep
89	stosb
90	popl	%edi
91	ret
92
93#if defined(I486_CPU)
94ENTRY(i486_bzero)
95	movl	4(%esp),%edx
96	movl	8(%esp),%ecx
97	xorl	%eax,%eax
98/*
99 * do 64 byte chunks first
100 *
101 * XXX this is probably over-unrolled at least for DX2's
102 */
1032:
104	cmpl	$64,%ecx
105	jb	3f
106	movl	%eax,(%edx)
107	movl	%eax,4(%edx)
108	movl	%eax,8(%edx)
109	movl	%eax,12(%edx)
110	movl	%eax,16(%edx)
111	movl	%eax,20(%edx)
112	movl	%eax,24(%edx)
113	movl	%eax,28(%edx)
114	movl	%eax,32(%edx)
115	movl	%eax,36(%edx)
116	movl	%eax,40(%edx)
117	movl	%eax,44(%edx)
118	movl	%eax,48(%edx)
119	movl	%eax,52(%edx)
120	movl	%eax,56(%edx)
121	movl	%eax,60(%edx)
122	addl	$64,%edx
123	subl	$64,%ecx
124	jnz	2b
125	ret
126
127/*
128 * do 16 byte chunks
129 */
130	SUPERALIGN_TEXT
1313:
132	cmpl	$16,%ecx
133	jb	4f
134	movl	%eax,(%edx)
135	movl	%eax,4(%edx)
136	movl	%eax,8(%edx)
137	movl	%eax,12(%edx)
138	addl	$16,%edx
139	subl	$16,%ecx
140	jnz	3b
141	ret
142
143/*
144 * do 4 byte chunks
145 */
146	SUPERALIGN_TEXT
1474:
148	cmpl	$4,%ecx
149	jb	5f
150	movl	%eax,(%edx)
151	addl	$4,%edx
152	subl	$4,%ecx
153	jnz	4b
154	ret
155
156/*
157 * do 1 byte chunks
158 * a jump table seems to be faster than a loop or more range reductions
159 *
160 * XXX need a const section for non-text
161 */
162	.data
163jtab:
164	.long	do0
165	.long	do1
166	.long	do2
167	.long	do3
168
169	.text
170	SUPERALIGN_TEXT
1715:
172	jmp	jtab(,%ecx,4)
173
174	SUPERALIGN_TEXT
175do3:
176	movw	%ax,(%edx)
177	movb	%al,2(%edx)
178	ret
179
180	SUPERALIGN_TEXT
181do2:
182	movw	%ax,(%edx)
183	ret
184
185	SUPERALIGN_TEXT
186do1:
187	movb	%al,(%edx)
188	ret
189
190	SUPERALIGN_TEXT
191do0:
192	ret
193#endif
194
195#if defined(I586_CPU) && NNPX > 0
196ENTRY(i586_bzero)
197	movl	4(%esp),%edx
198	movl	8(%esp),%ecx
199
200	/*
201	 * The FPU register method is twice as fast as the integer register
202	 * method unless the target is in the L1 cache and we pre-allocate a
203	 * cache line for it (then the integer register method is 4-5 times
204	 * faster).  However, we never pre-allocate cache lines, since that
205	 * would make the integer method 25% or more slower for the common
206	 * case when the target isn't in either the L1 cache or the L2 cache.
207	 * Thus we normally use the FPU register method unless the overhead
208	 * would be too large.
209	 */
210	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
211	jb	intreg_i586_bzero
212
213	/*
214	 * The FPU registers may belong to an application or to fastmove()
215	 * or to another invocation of bcopy() or ourself in a higher level
216	 * interrupt or trap handler.  Preserving the registers is
217	 * complicated since we avoid it if possible at all levels.  We
218	 * want to localize the complications even when that increases them.
219	 * Here the extra work involves preserving CR0_TS in TS.
220	 * `npxproc != NULL' is supposed to be the condition that all the
221	 * FPU resources belong to an application, but npxproc and CR0_TS
222	 * aren't set atomically enough for this condition to work in
223	 * interrupt handlers.
224	 *
225	 * Case 1: FPU registers belong to the application: we must preserve
226	 * the registers if we use them, so we only use the FPU register
227	 * method if the target size is large enough to amortize the extra
228	 * overhead for preserving them.  CR0_TS must be preserved although
229	 * it is very likely to end up as set.
230	 *
231	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
232	 * makes the registers look like they belong to an application so
233	 * that cpu_switch() and savectx() don't have to know about it, so
234	 * this case reduces to case 1.
235	 *
236	 * Case 3: FPU registers belong to the kernel: don't use the FPU
237	 * register method.  This case is unlikely, and supporting it would
238	 * be more complicated and might take too much stack.
239	 *
240	 * Case 4: FPU registers don't belong to anyone: the FPU registers
241	 * don't need to be preserved, so we always use the FPU register
242	 * method.  CR0_TS must be preserved although it is very likely to
243	 * always end up as clear.
244	 */
245	cmpl	$0,_npxproc
246	je	i586_bz1
247	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
248	jb	intreg_i586_bzero
249	sarb	$1,kernel_fpu_lock
250	jc	intreg_i586_bzero
251	smsw	%ax
252	clts
253	subl	$108,%esp
254	fnsave	0(%esp)
255	jmp	i586_bz2
256
257i586_bz1:
258	sarb	$1,kernel_fpu_lock
259	jc	intreg_i586_bzero
260	smsw	%ax
261	clts
262	fninit				/* XXX should avoid needing this */
263i586_bz2:
264	fldz
265
266	/*
267	 * Align to an 8 byte boundary (misalignment in the main loop would
268	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
269	 * already aligned) by always zeroing 8 bytes and using the part up
270	 * to the _next_ alignment position.
271	 */
272	fstl	0(%edx)
273	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
274	addl	$8,%edx
275	andl	$~7,%edx
276	subl	%edx,%ecx
277
278	/*
279	 * Similarly align `len' to a multiple of 8.
280	 */
281	fstl	-8(%edx,%ecx)
282	decl	%ecx
283	andl	$~7,%ecx
284
285	/*
286	 * This wouldn't be any faster if it were unrolled, since the loop
287	 * control instructions are much faster than the fstl and/or done
288	 * in parallel with it so their overhead is insignificant.
289	 */
290fpureg_i586_bzero_loop:
291	fstl	0(%edx)
292	addl	$8,%edx
293	subl	$8,%ecx
294	cmpl	$8,%ecx
295	jae	fpureg_i586_bzero_loop
296
297	cmpl	$0,_npxproc
298	je	i586_bz3
299	frstor	0(%esp)
300	addl	$108,%esp
301	lmsw	%ax
302	movb	$0xfe,kernel_fpu_lock
303	ret
304
305i586_bz3:
306	fstpl	%st(0)
307	lmsw	%ax
308	movb	$0xfe,kernel_fpu_lock
309	ret
310
311intreg_i586_bzero:
312	/*
313	 * `rep stos' seems to be the best method in practice for small
314	 * counts.  Fancy methods usually take too long to start up due
315	 * to cache and BTB misses.
316	 */
317	pushl	%edi
318	movl	%edx,%edi
319	xorl	%eax,%eax
320	shrl	$2,%ecx
321	cld
322	rep
323	stosl
324	movl	12(%esp),%ecx
325	andl	$3,%ecx
326	jne	1f
327	popl	%edi
328	ret
329
3301:
331	rep
332	stosb
333	popl	%edi
334	ret
335#endif /* I586_CPU && NNPX > 0 */
336
337ENTRY(i686_pagezero)
338	pushl	%edi
339	pushl	%ebx
340
341	movl	12(%esp), %edi
342	movl	$1024, %ecx
343	cld
344
345	ALIGN_TEXT
3461:
347	xorl	%eax, %eax
348	repe
349	scasl
350	jnz	2f
351
352	popl	%ebx
353	popl	%edi
354	ret
355
356	ALIGN_TEXT
357
3582:
359	incl	%ecx
360	subl	$4, %edi
361
362	movl	%ecx, %edx
363	cmpl	$16, %ecx
364
365	jge	3f
366
367	movl	%edi, %ebx
368	andl	$0x3f, %ebx
369	shrl	%ebx
370	shrl	%ebx
371	movl	$16, %ecx
372	subl	%ebx, %ecx
373
3743:
375	subl	%ecx, %edx
376	rep
377	stosl
378
379	movl	%edx, %ecx
380	testl	%edx, %edx
381	jnz	1b
382
383	popl	%ebx
384	popl	%edi
385	ret
386
387/* fillw(pat, base, cnt) */
388ENTRY(fillw)
389	pushl	%edi
390	movl	8(%esp),%eax
391	movl	12(%esp),%edi
392	movl	16(%esp),%ecx
393	cld
394	rep
395	stosw
396	popl	%edi
397	ret
398
399ENTRY(bcopyb)
400	pushl	%esi
401	pushl	%edi
402	movl	12(%esp),%esi
403	movl	16(%esp),%edi
404	movl	20(%esp),%ecx
405	movl	%edi,%eax
406	subl	%esi,%eax
407	cmpl	%ecx,%eax			/* overlapping && src < dst? */
408	jb	1f
409	cld					/* nope, copy forwards */
410	rep
411	movsb
412	popl	%edi
413	popl	%esi
414	ret
415
416	ALIGN_TEXT
4171:
418	addl	%ecx,%edi			/* copy backwards. */
419	addl	%ecx,%esi
420	decl	%edi
421	decl	%esi
422	std
423	rep
424	movsb
425	popl	%edi
426	popl	%esi
427	cld
428	ret
429
430ENTRY(bcopy)
431	MEXITCOUNT
432	jmp	*_bcopy_vector
433
434ENTRY(ovbcopy)
435	MEXITCOUNT
436	jmp	*_ovbcopy_vector
437
438/*
439 * generic_bcopy(src, dst, cnt)
440 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
441 */
442ENTRY(generic_bcopy)
443	pushl	%esi
444	pushl	%edi
445	movl	12(%esp),%esi
446	movl	16(%esp),%edi
447	movl	20(%esp),%ecx
448
449	movl	%edi,%eax
450	subl	%esi,%eax
451	cmpl	%ecx,%eax			/* overlapping && src < dst? */
452	jb	1f
453
454	shrl	$2,%ecx				/* copy by 32-bit words */
455	cld					/* nope, copy forwards */
456	rep
457	movsl
458	movl	20(%esp),%ecx
459	andl	$3,%ecx				/* any bytes left? */
460	rep
461	movsb
462	popl	%edi
463	popl	%esi
464	ret
465
466	ALIGN_TEXT
4671:
468	addl	%ecx,%edi			/* copy backwards */
469	addl	%ecx,%esi
470	decl	%edi
471	decl	%esi
472	andl	$3,%ecx				/* any fractional bytes? */
473	std
474	rep
475	movsb
476	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
477	shrl	$2,%ecx
478	subl	$3,%esi
479	subl	$3,%edi
480	rep
481	movsl
482	popl	%edi
483	popl	%esi
484	cld
485	ret
486
487#if defined(I586_CPU) && NNPX > 0
488ENTRY(i586_bcopy)
489	pushl	%esi
490	pushl	%edi
491	movl	12(%esp),%esi
492	movl	16(%esp),%edi
493	movl	20(%esp),%ecx
494
495	movl	%edi,%eax
496	subl	%esi,%eax
497	cmpl	%ecx,%eax			/* overlapping && src < dst? */
498	jb	1f
499
500	cmpl	$1024,%ecx
501	jb	small_i586_bcopy
502
503	sarb	$1,kernel_fpu_lock
504	jc	small_i586_bcopy
505	cmpl	$0,_npxproc
506	je	i586_bc1
507	smsw	%dx
508	clts
509	subl	$108,%esp
510	fnsave	0(%esp)
511	jmp	4f
512
513i586_bc1:
514	smsw	%dx
515	clts
516	fninit				/* XXX should avoid needing this */
517
518	ALIGN_TEXT
5194:
520	pushl	%ecx
521#define	DCACHE_SIZE	8192
522	cmpl	$(DCACHE_SIZE-512)/2,%ecx
523	jbe	2f
524	movl	$(DCACHE_SIZE-512)/2,%ecx
5252:
526	subl	%ecx,0(%esp)
527	cmpl	$256,%ecx
528	jb	5f			/* XXX should prefetch if %ecx >= 32 */
529	pushl	%esi
530	pushl	%ecx
531	ALIGN_TEXT
5323:
533	movl	0(%esi),%eax
534	movl	32(%esi),%eax
535	movl	64(%esi),%eax
536	movl	96(%esi),%eax
537	movl	128(%esi),%eax
538	movl	160(%esi),%eax
539	movl	192(%esi),%eax
540	movl	224(%esi),%eax
541	addl	$256,%esi
542	subl	$256,%ecx
543	cmpl	$256,%ecx
544	jae	3b
545	popl	%ecx
546	popl	%esi
5475:
548	ALIGN_TEXT
549large_i586_bcopy_loop:
550	fildq	0(%esi)
551	fildq	8(%esi)
552	fildq	16(%esi)
553	fildq	24(%esi)
554	fildq	32(%esi)
555	fildq	40(%esi)
556	fildq	48(%esi)
557	fildq	56(%esi)
558	fistpq	56(%edi)
559	fistpq	48(%edi)
560	fistpq	40(%edi)
561	fistpq	32(%edi)
562	fistpq	24(%edi)
563	fistpq	16(%edi)
564	fistpq	8(%edi)
565	fistpq	0(%edi)
566	addl	$64,%esi
567	addl	$64,%edi
568	subl	$64,%ecx
569	cmpl	$64,%ecx
570	jae	large_i586_bcopy_loop
571	popl	%eax
572	addl	%eax,%ecx
573	cmpl	$64,%ecx
574	jae	4b
575
576	cmpl	$0,_npxproc
577	je	i586_bc2
578	frstor	0(%esp)
579	addl	$108,%esp
580i586_bc2:
581	lmsw	%dx
582	movb	$0xfe,kernel_fpu_lock
583
584/*
585 * This is a duplicate of the main part of generic_bcopy.  See the comments
586 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
587 * would mess up high resolution profiling.
588 */
589	ALIGN_TEXT
590small_i586_bcopy:
591	shrl	$2,%ecx
592	cld
593	rep
594	movsl
595	movl	20(%esp),%ecx
596	andl	$3,%ecx
597	rep
598	movsb
599	popl	%edi
600	popl	%esi
601	ret
602
603	ALIGN_TEXT
6041:
605	addl	%ecx,%edi
606	addl	%ecx,%esi
607	decl	%edi
608	decl	%esi
609	andl	$3,%ecx
610	std
611	rep
612	movsb
613	movl	20(%esp),%ecx
614	shrl	$2,%ecx
615	subl	$3,%esi
616	subl	$3,%edi
617	rep
618	movsl
619	popl	%edi
620	popl	%esi
621	cld
622	ret
623#endif /* I586_CPU && NNPX > 0 */
624
625/*
626 * Note: memcpy does not support overlapping copies
627 */
628ENTRY(memcpy)
629	pushl	%edi
630	pushl	%esi
631	movl	12(%esp),%edi
632	movl	16(%esp),%esi
633	movl	20(%esp),%ecx
634	movl	%edi,%eax
635	shrl	$2,%ecx				/* copy by 32-bit words */
636	cld					/* nope, copy forwards */
637	rep
638	movsl
639	movl	20(%esp),%ecx
640	andl	$3,%ecx				/* any bytes left? */
641	rep
642	movsb
643	popl	%esi
644	popl	%edi
645	ret
646
647
648/*****************************************************************************/
649/* copyout and fubyte family                                                 */
650/*****************************************************************************/
651/*
652 * Access user memory from inside the kernel. These routines and possibly
653 * the math- and DOS emulators should be the only places that do this.
654 *
655 * We have to access the memory with user's permissions, so use a segment
656 * selector with RPL 3. For writes to user space we have to additionally
657 * check the PTE for write permission, because the 386 does not check
658 * write permissions when we are executing with EPL 0. The 486 does check
659 * this if the WP bit is set in CR0, so we can use a simpler version here.
660 *
661 * These routines set curpcb->onfault for the time they execute. When a
662 * protection violation occurs inside the functions, the trap handler
663 * returns to *curpcb->onfault instead of the function.
664 */
665
666/*
667 * copyout(from_kernel, to_user, len)  - MP SAFE (if not I386_CPU)
668 */
669ENTRY(copyout)
670	MEXITCOUNT
671	jmp	*_copyout_vector
672
673ENTRY(generic_copyout)
674	movl	_curpcb,%eax
675	movl	$copyout_fault,PCB_ONFAULT(%eax)
676	pushl	%esi
677	pushl	%edi
678	pushl	%ebx
679	movl	16(%esp),%esi
680	movl	20(%esp),%edi
681	movl	24(%esp),%ebx
682	testl	%ebx,%ebx			/* anything to do? */
683	jz	done_copyout
684
685	/*
686	 * Check explicitly for non-user addresses.  If 486 write protection
687	 * is being used, this check is essential because we are in kernel
688	 * mode so the h/w does not provide any protection against writing
689	 * kernel addresses.
690	 */
691
692	/*
693	 * First, prevent address wrapping.
694	 */
695	movl	%edi,%eax
696	addl	%ebx,%eax
697	jc	copyout_fault
698/*
699 * XXX STOP USING VM_MAXUSER_ADDRESS.
700 * It is an end address, not a max, so every time it is used correctly it
701 * looks like there is an off by one error, and of course it caused an off
702 * by one error in several places.
703 */
704	cmpl	$VM_MAXUSER_ADDRESS,%eax
705	ja	copyout_fault
706
707#if defined(I386_CPU)
708
709#if defined(SMP)
710#error I386_CPU option not supported if SMP
711#endif
712
713#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
714	cmpl	$CPUCLASS_386,_cpu_class
715	jne	3f
716#endif
717/*
718 * We have to check each PTE for user write permission.
719 * The checking may cause a page fault, so it is important to set
720 * up everything for return via copyout_fault before here.
721 */
722	/* compute number of pages */
723	movl	%edi,%ecx
724	andl	$PAGE_MASK,%ecx
725	addl	%ebx,%ecx
726	decl	%ecx
727	shrl	$IDXSHIFT+2,%ecx
728	incl	%ecx
729
730	/* compute PTE offset for start address */
731	movl	%edi,%edx
732	shrl	$IDXSHIFT,%edx
733	andb	$0xfc,%dl
734
7351:
736	/* check PTE for each page */
737	leal	_PTmap(%edx),%eax
738	shrl	$IDXSHIFT,%eax
739	andb	$0xfc,%al
740	testb	$PG_V,_PTmap(%eax)		/* PTE page must be valid */
741	je	4f
742	movb	_PTmap(%edx),%al
743	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
744	cmpb	$PG_V|PG_RW|PG_U,%al
745	je	2f
746
7474:
748	/* simulate a trap */
749	pushl	%edx
750	pushl	%ecx
751	shll	$IDXSHIFT,%edx
752	pushl	%edx
753	call	_trapwrite			/* trapwrite(addr) */
754	popl	%edx
755	popl	%ecx
756	popl	%edx
757
758	testl	%eax,%eax			/* if not ok, return EFAULT */
759	jnz	copyout_fault
760
7612:
762	addl	$4,%edx
763	decl	%ecx
764	jnz	1b				/* check next page */
765#endif /* I386_CPU */
766
767	/* bcopy(%esi, %edi, %ebx) */
7683:
769	movl	%ebx,%ecx
770
771#if defined(I586_CPU) && NNPX > 0
772	ALIGN_TEXT
773slow_copyout:
774#endif
775	shrl	$2,%ecx
776	cld
777	rep
778	movsl
779	movb	%bl,%cl
780	andb	$3,%cl
781	rep
782	movsb
783
784done_copyout:
785	popl	%ebx
786	popl	%edi
787	popl	%esi
788	xorl	%eax,%eax
789	movl	_curpcb,%edx
790	movl	%eax,PCB_ONFAULT(%edx)
791	ret
792
793	ALIGN_TEXT
794copyout_fault:
795	popl	%ebx
796	popl	%edi
797	popl	%esi
798	movl	_curpcb,%edx
799	movl	$0,PCB_ONFAULT(%edx)
800	movl	$EFAULT,%eax
801	ret
802
803#if defined(I586_CPU) && NNPX > 0
804ENTRY(i586_copyout)
805	/*
806	 * Duplicated from generic_copyout.  Could be done a bit better.
807	 */
808	movl	_curpcb,%eax
809	movl	$copyout_fault,PCB_ONFAULT(%eax)
810	pushl	%esi
811	pushl	%edi
812	pushl	%ebx
813	movl	16(%esp),%esi
814	movl	20(%esp),%edi
815	movl	24(%esp),%ebx
816	testl	%ebx,%ebx			/* anything to do? */
817	jz	done_copyout
818
819	/*
820	 * Check explicitly for non-user addresses.  If 486 write protection
821	 * is being used, this check is essential because we are in kernel
822	 * mode so the h/w does not provide any protection against writing
823	 * kernel addresses.
824	 */
825
826	/*
827	 * First, prevent address wrapping.
828	 */
829	movl	%edi,%eax
830	addl	%ebx,%eax
831	jc	copyout_fault
832/*
833 * XXX STOP USING VM_MAXUSER_ADDRESS.
834 * It is an end address, not a max, so every time it is used correctly it
835 * looks like there is an off by one error, and of course it caused an off
836 * by one error in several places.
837 */
838	cmpl	$VM_MAXUSER_ADDRESS,%eax
839	ja	copyout_fault
840
841	/* bcopy(%esi, %edi, %ebx) */
8423:
843	movl	%ebx,%ecx
844	/*
845	 * End of duplicated code.
846	 */
847
848	cmpl	$1024,%ecx
849	jb	slow_copyout
850
851	pushl	%ecx
852	call	_fastmove
853	addl	$4,%esp
854	jmp	done_copyout
855#endif /* I586_CPU && NNPX > 0 */
856
857/*
858 * copyin(from_user, to_kernel, len) - MP SAFE
859 */
860ENTRY(copyin)
861	MEXITCOUNT
862	jmp	*_copyin_vector
863
864ENTRY(generic_copyin)
865	movl	_curpcb,%eax
866	movl	$copyin_fault,PCB_ONFAULT(%eax)
867	pushl	%esi
868	pushl	%edi
869	movl	12(%esp),%esi			/* caddr_t from */
870	movl	16(%esp),%edi			/* caddr_t to */
871	movl	20(%esp),%ecx			/* size_t  len */
872
873	/*
874	 * make sure address is valid
875	 */
876	movl	%esi,%edx
877	addl	%ecx,%edx
878	jc	copyin_fault
879	cmpl	$VM_MAXUSER_ADDRESS,%edx
880	ja	copyin_fault
881
882#if defined(I586_CPU) && NNPX > 0
883	ALIGN_TEXT
884slow_copyin:
885#endif
886	movb	%cl,%al
887	shrl	$2,%ecx				/* copy longword-wise */
888	cld
889	rep
890	movsl
891	movb	%al,%cl
892	andb	$3,%cl				/* copy remaining bytes */
893	rep
894	movsb
895
896#if defined(I586_CPU) && NNPX > 0
897	ALIGN_TEXT
898done_copyin:
899#endif
900	popl	%edi
901	popl	%esi
902	xorl	%eax,%eax
903	movl	_curpcb,%edx
904	movl	%eax,PCB_ONFAULT(%edx)
905	ret
906
907	ALIGN_TEXT
908copyin_fault:
909	popl	%edi
910	popl	%esi
911	movl	_curpcb,%edx
912	movl	$0,PCB_ONFAULT(%edx)
913	movl	$EFAULT,%eax
914	ret
915
916#if defined(I586_CPU) && NNPX > 0
917ENTRY(i586_copyin)
918	/*
919	 * Duplicated from generic_copyin.  Could be done a bit better.
920	 */
921	movl	_curpcb,%eax
922	movl	$copyin_fault,PCB_ONFAULT(%eax)
923	pushl	%esi
924	pushl	%edi
925	movl	12(%esp),%esi			/* caddr_t from */
926	movl	16(%esp),%edi			/* caddr_t to */
927	movl	20(%esp),%ecx			/* size_t  len */
928
929	/*
930	 * make sure address is valid
931	 */
932	movl	%esi,%edx
933	addl	%ecx,%edx
934	jc	copyin_fault
935	cmpl	$VM_MAXUSER_ADDRESS,%edx
936	ja	copyin_fault
937	/*
938	 * End of duplicated code.
939	 */
940
941	cmpl	$1024,%ecx
942	jb	slow_copyin
943
944	pushl	%ebx			/* XXX prepare for fastmove_fault */
945	pushl	%ecx
946	call	_fastmove
947	addl	$8,%esp
948	jmp	done_copyin
949#endif /* I586_CPU && NNPX > 0 */
950
951#if defined(I586_CPU) && NNPX > 0
952/* fastmove(src, dst, len)
953	src in %esi
954	dst in %edi
955	len in %ecx		XXX changed to on stack for profiling
956	uses %eax and %edx for tmp. storage
957 */
958/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
959ENTRY(fastmove)
960	pushl	%ebp
961	movl	%esp,%ebp
962	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
963
964	movl	8(%ebp),%ecx
965	cmpl	$63,%ecx
966	jbe	fastmove_tail
967
968	testl	$7,%esi	/* check if src addr is multiple of 8 */
969	jnz	fastmove_tail
970
971	testl	$7,%edi	/* check if dst addr is multiple of 8 */
972	jnz	fastmove_tail
973
974/* if (npxproc != NULL) { */
975	cmpl	$0,_npxproc
976	je	6f
977/*    fnsave(&curpcb->pcb_savefpu); */
978	movl	_curpcb,%eax
979	fnsave	PCB_SAVEFPU(%eax)
980/*   npxproc = NULL; */
981	movl	$0,_npxproc
982/* } */
9836:
984/* now we own the FPU. */
985
986/*
987 * The process' FP state is saved in the pcb, but if we get
988 * switched, the cpu_switch() will store our FP state in the
989 * pcb.  It should be possible to avoid all the copying for
990 * this, e.g., by setting a flag to tell cpu_switch() to
991 * save the state somewhere else.
992 */
993/* tmp = curpcb->pcb_savefpu; */
994	movl	%ecx,-12(%ebp)
995	movl	%esi,-8(%ebp)
996	movl	%edi,-4(%ebp)
997	movl	%esp,%edi
998	movl	_curpcb,%esi
999	addl	$PCB_SAVEFPU,%esi
1000	cld
1001	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1002	rep
1003	movsl
1004	movl	-12(%ebp),%ecx
1005	movl	-8(%ebp),%esi
1006	movl	-4(%ebp),%edi
1007/* stop_emulating(); */
1008	clts
1009/* npxproc = curproc; */
1010	movl	_curproc,%eax
1011	movl	%eax,_npxproc
1012	movl	_curpcb,%eax
1013	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10144:
1015	movl	%ecx,-12(%ebp)
1016	cmpl	$1792,%ecx
1017	jbe	2f
1018	movl	$1792,%ecx
10192:
1020	subl	%ecx,-12(%ebp)
1021	cmpl	$256,%ecx
1022	jb	5f
1023	movl	%ecx,-8(%ebp)
1024	movl	%esi,-4(%ebp)
1025	ALIGN_TEXT
10263:
1027	movl	0(%esi),%eax
1028	movl	32(%esi),%eax
1029	movl	64(%esi),%eax
1030	movl	96(%esi),%eax
1031	movl	128(%esi),%eax
1032	movl	160(%esi),%eax
1033	movl	192(%esi),%eax
1034	movl	224(%esi),%eax
1035	addl	$256,%esi
1036	subl	$256,%ecx
1037	cmpl	$256,%ecx
1038	jae	3b
1039	movl	-8(%ebp),%ecx
1040	movl	-4(%ebp),%esi
10415:
1042	ALIGN_TEXT
1043fastmove_loop:
1044	fildq	0(%esi)
1045	fildq	8(%esi)
1046	fildq	16(%esi)
1047	fildq	24(%esi)
1048	fildq	32(%esi)
1049	fildq	40(%esi)
1050	fildq	48(%esi)
1051	fildq	56(%esi)
1052	fistpq	56(%edi)
1053	fistpq	48(%edi)
1054	fistpq	40(%edi)
1055	fistpq	32(%edi)
1056	fistpq	24(%edi)
1057	fistpq	16(%edi)
1058	fistpq	8(%edi)
1059	fistpq	0(%edi)
1060	addl	$-64,%ecx
1061	addl	$64,%esi
1062	addl	$64,%edi
1063	cmpl	$63,%ecx
1064	ja	fastmove_loop
1065	movl	-12(%ebp),%eax
1066	addl	%eax,%ecx
1067	cmpl	$64,%ecx
1068	jae	4b
1069
1070/* curpcb->pcb_savefpu = tmp; */
1071	movl	%ecx,-12(%ebp)
1072	movl	%esi,-8(%ebp)
1073	movl	%edi,-4(%ebp)
1074	movl	_curpcb,%edi
1075	addl	$PCB_SAVEFPU,%edi
1076	movl	%esp,%esi
1077	cld
1078	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1079	rep
1080	movsl
1081	movl	-12(%ebp),%ecx
1082	movl	-8(%ebp),%esi
1083	movl	-4(%ebp),%edi
1084
1085/* start_emulating(); */
1086	smsw	%ax
1087	orb	$CR0_TS,%al
1088	lmsw	%ax
1089/* npxproc = NULL; */
1090	movl	$0,_npxproc
1091
1092	ALIGN_TEXT
1093fastmove_tail:
1094	movl	_curpcb,%eax
1095	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1096
1097	movb	%cl,%al
1098	shrl	$2,%ecx				/* copy longword-wise */
1099	cld
1100	rep
1101	movsl
1102	movb	%al,%cl
1103	andb	$3,%cl				/* copy remaining bytes */
1104	rep
1105	movsb
1106
1107	movl	%ebp,%esp
1108	popl	%ebp
1109	ret
1110
1111	ALIGN_TEXT
1112fastmove_fault:
1113	movl	_curpcb,%edi
1114	addl	$PCB_SAVEFPU,%edi
1115	movl	%esp,%esi
1116	cld
1117	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1118	rep
1119	movsl
1120
1121	smsw	%ax
1122	orb	$CR0_TS,%al
1123	lmsw	%ax
1124	movl	$0,_npxproc
1125
1126fastmove_tail_fault:
1127	movl	%ebp,%esp
1128	popl	%ebp
1129	addl	$8,%esp
1130	popl	%ebx
1131	popl	%edi
1132	popl	%esi
1133	movl	_curpcb,%edx
1134	movl	$0,PCB_ONFAULT(%edx)
1135	movl	$EFAULT,%eax
1136	ret
1137#endif /* I586_CPU && NNPX > 0 */
1138
1139/*
1140 * fu{byte,sword,word} - MP SAFE
1141 *
1142 *	Fetch a byte (sword, word) from user memory
1143 */
1144ENTRY(fuword)
1145	movl	_curpcb,%ecx
1146	movl	$fusufault,PCB_ONFAULT(%ecx)
1147	movl	4(%esp),%edx			/* from */
1148
1149	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1150	ja	fusufault
1151
1152	movl	(%edx),%eax
1153	movl	$0,PCB_ONFAULT(%ecx)
1154	ret
1155
1156/*
1157 * These two routines are called from the profiling code, potentially
1158 * at interrupt time. If they fail, that's okay, good things will
1159 * happen later. Fail all the time for now - until the trap code is
1160 * able to deal with this.
1161 */
1162ALTENTRY(suswintr)
1163ENTRY(fuswintr)
1164	movl	$-1,%eax
1165	ret
1166
1167/*
1168 * fusword - MP SAFE
1169 */
1170ENTRY(fusword)
1171	movl	_curpcb,%ecx
1172	movl	$fusufault,PCB_ONFAULT(%ecx)
1173	movl	4(%esp),%edx
1174
1175	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1176	ja	fusufault
1177
1178	movzwl	(%edx),%eax
1179	movl	$0,PCB_ONFAULT(%ecx)
1180	ret
1181
1182/*
1183 * fubyte - MP SAFE
1184 */
1185ENTRY(fubyte)
1186	movl	_curpcb,%ecx
1187	movl	$fusufault,PCB_ONFAULT(%ecx)
1188	movl	4(%esp),%edx
1189
1190	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1191	ja	fusufault
1192
1193	movzbl	(%edx),%eax
1194	movl	$0,PCB_ONFAULT(%ecx)
1195	ret
1196
1197	ALIGN_TEXT
1198fusufault:
1199	movl	_curpcb,%ecx
1200	xorl	%eax,%eax
1201	movl	%eax,PCB_ONFAULT(%ecx)
1202	decl	%eax
1203	ret
1204
1205/*
1206 * su{byte,sword,word} - MP SAFE (if not I386_CPU)
1207 *
1208 *	Write a byte (word, longword) to user memory
1209 */
1210ENTRY(suword)
1211	movl	_curpcb,%ecx
1212	movl	$fusufault,PCB_ONFAULT(%ecx)
1213	movl	4(%esp),%edx
1214
1215#if defined(I386_CPU)
1216
1217#if defined(SMP)
1218#error I386_CPU option not supported if SMP
1219#endif
1220
1221#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1222	cmpl	$CPUCLASS_386,_cpu_class
1223	jne	2f				/* we only have to set the right segment selector */
1224#endif /* I486_CPU || I586_CPU || I686_CPU */
1225
1226	/* XXX - page boundary crossing is still not handled */
1227	movl	%edx,%eax
1228	shrl	$IDXSHIFT,%edx
1229	andb	$0xfc,%dl
1230
1231	leal	_PTmap(%edx),%ecx
1232	shrl	$IDXSHIFT,%ecx
1233	andb	$0xfc,%cl
1234	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1235	je	4f
1236	movb	_PTmap(%edx),%dl
1237	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1238	cmpb	$PG_V|PG_RW|PG_U,%dl
1239	je	1f
1240
12414:
1242	/* simulate a trap */
1243	pushl	%eax
1244	call	_trapwrite
1245	popl	%edx				/* remove junk parameter from stack */
1246	testl	%eax,%eax
1247	jnz	fusufault
12481:
1249	movl	4(%esp),%edx
1250#endif
1251
12522:
1253	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1254	ja	fusufault
1255
1256	movl	8(%esp),%eax
1257	movl	%eax,(%edx)
1258	xorl	%eax,%eax
1259	movl	_curpcb,%ecx
1260	movl	%eax,PCB_ONFAULT(%ecx)
1261	ret
1262
1263/*
1264 * susword - MP SAFE (if not I386_CPU)
1265 */
1266ENTRY(susword)
1267	movl	_curpcb,%ecx
1268	movl	$fusufault,PCB_ONFAULT(%ecx)
1269	movl	4(%esp),%edx
1270
1271#if defined(I386_CPU)
1272
1273#if defined(SMP)
1274#error I386_CPU option not supported if SMP
1275#endif
1276
1277#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1278	cmpl	$CPUCLASS_386,_cpu_class
1279	jne	2f
1280#endif /* I486_CPU || I586_CPU || I686_CPU */
1281
1282	/* XXX - page boundary crossing is still not handled */
1283	movl	%edx,%eax
1284	shrl	$IDXSHIFT,%edx
1285	andb	$0xfc,%dl
1286
1287	leal	_PTmap(%edx),%ecx
1288	shrl	$IDXSHIFT,%ecx
1289	andb	$0xfc,%cl
1290	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1291	je	4f
1292	movb	_PTmap(%edx),%dl
1293	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1294	cmpb	$PG_V|PG_RW|PG_U,%dl
1295	je	1f
1296
12974:
1298	/* simulate a trap */
1299	pushl	%eax
1300	call	_trapwrite
1301	popl	%edx				/* remove junk parameter from stack */
1302	testl	%eax,%eax
1303	jnz	fusufault
13041:
1305	movl	4(%esp),%edx
1306#endif
1307
13082:
1309	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1310	ja	fusufault
1311
1312	movw	8(%esp),%ax
1313	movw	%ax,(%edx)
1314	xorl	%eax,%eax
1315	movl	_curpcb,%ecx			/* restore trashed register */
1316	movl	%eax,PCB_ONFAULT(%ecx)
1317	ret
1318
1319/*
1320 * su[i]byte - MP SAFE (if not I386_CPU)
1321 */
1322ALTENTRY(suibyte)
1323ENTRY(subyte)
1324	movl	_curpcb,%ecx
1325	movl	$fusufault,PCB_ONFAULT(%ecx)
1326	movl	4(%esp),%edx
1327
1328#if defined(I386_CPU)
1329
1330#if defined(SMP)
1331#error I386_CPU option not supported if SMP
1332#endif
1333
1334#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1335	cmpl	$CPUCLASS_386,_cpu_class
1336	jne	2f
1337#endif /* I486_CPU || I586_CPU || I686_CPU */
1338
1339	movl	%edx,%eax
1340	shrl	$IDXSHIFT,%edx
1341	andb	$0xfc,%dl
1342
1343	leal	_PTmap(%edx),%ecx
1344	shrl	$IDXSHIFT,%ecx
1345	andb	$0xfc,%cl
1346	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1347	je	4f
1348	movb	_PTmap(%edx),%dl
1349	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1350	cmpb	$PG_V|PG_RW|PG_U,%dl
1351	je	1f
1352
13534:
1354	/* simulate a trap */
1355	pushl	%eax
1356	call	_trapwrite
1357	popl	%edx				/* remove junk parameter from stack */
1358	testl	%eax,%eax
1359	jnz	fusufault
13601:
1361	movl	4(%esp),%edx
1362#endif
1363
13642:
1365	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1366	ja	fusufault
1367
1368	movb	8(%esp),%al
1369	movb	%al,(%edx)
1370	xorl	%eax,%eax
1371	movl	_curpcb,%ecx			/* restore trashed register */
1372	movl	%eax,PCB_ONFAULT(%ecx)
1373	ret
1374
1375/*
1376 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
1377 *
1378 *	copy a string from from to to, stop when a 0 character is reached.
1379 *	return ENAMETOOLONG if string is longer than maxlen, and
1380 *	EFAULT on protection violations. If lencopied is non-zero,
1381 *	return the actual length in *lencopied.
1382 */
1383ENTRY(copyinstr)
1384	pushl	%esi
1385	pushl	%edi
1386	movl	_curpcb,%ecx
1387	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1388
1389	movl	12(%esp),%esi			/* %esi = from */
1390	movl	16(%esp),%edi			/* %edi = to */
1391	movl	20(%esp),%edx			/* %edx = maxlen */
1392
1393	movl	$VM_MAXUSER_ADDRESS,%eax
1394
1395	/* make sure 'from' is within bounds */
1396	subl	%esi,%eax
1397	jbe	cpystrflt
1398
1399	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1400	cmpl	%edx,%eax
1401	jae	1f
1402	movl	%eax,%edx
1403	movl	%eax,20(%esp)
14041:
1405	incl	%edx
1406	cld
1407
14082:
1409	decl	%edx
1410	jz	3f
1411
1412	lodsb
1413	stosb
1414	orb	%al,%al
1415	jnz	2b
1416
1417	/* Success -- 0 byte reached */
1418	decl	%edx
1419	xorl	%eax,%eax
1420	jmp	cpystrflt_x
14213:
1422	/* edx is zero - return ENAMETOOLONG or EFAULT */
1423	cmpl	$VM_MAXUSER_ADDRESS,%esi
1424	jae	cpystrflt
14254:
1426	movl	$ENAMETOOLONG,%eax
1427	jmp	cpystrflt_x
1428
1429cpystrflt:
1430	movl	$EFAULT,%eax
1431
1432cpystrflt_x:
1433	/* set *lencopied and return %eax */
1434	movl	_curpcb,%ecx
1435	movl	$0,PCB_ONFAULT(%ecx)
1436	movl	20(%esp),%ecx
1437	subl	%edx,%ecx
1438	movl	24(%esp),%edx
1439	testl	%edx,%edx
1440	jz	1f
1441	movl	%ecx,(%edx)
14421:
1443	popl	%edi
1444	popl	%esi
1445	ret
1446
1447
1448/*
1449 * copystr(from, to, maxlen, int *lencopied) - MP SAFE
1450 */
1451ENTRY(copystr)
1452	pushl	%esi
1453	pushl	%edi
1454
1455	movl	12(%esp),%esi			/* %esi = from */
1456	movl	16(%esp),%edi			/* %edi = to */
1457	movl	20(%esp),%edx			/* %edx = maxlen */
1458	incl	%edx
1459	cld
14601:
1461	decl	%edx
1462	jz	4f
1463	lodsb
1464	stosb
1465	orb	%al,%al
1466	jnz	1b
1467
1468	/* Success -- 0 byte reached */
1469	decl	%edx
1470	xorl	%eax,%eax
1471	jmp	6f
14724:
1473	/* edx is zero -- return ENAMETOOLONG */
1474	movl	$ENAMETOOLONG,%eax
1475
14766:
1477	/* set *lencopied and return %eax */
1478	movl	20(%esp),%ecx
1479	subl	%edx,%ecx
1480	movl	24(%esp),%edx
1481	testl	%edx,%edx
1482	jz	7f
1483	movl	%ecx,(%edx)
14847:
1485	popl	%edi
1486	popl	%esi
1487	ret
1488
1489ENTRY(bcmp)
1490	pushl	%edi
1491	pushl	%esi
1492	movl	12(%esp),%edi
1493	movl	16(%esp),%esi
1494	movl	20(%esp),%edx
1495	xorl	%eax,%eax
1496
1497	movl	%edx,%ecx
1498	shrl	$2,%ecx
1499	cld					/* compare forwards */
1500	repe
1501	cmpsl
1502	jne	1f
1503
1504	movl	%edx,%ecx
1505	andl	$3,%ecx
1506	repe
1507	cmpsb
1508	je	2f
15091:
1510	incl	%eax
15112:
1512	popl	%esi
1513	popl	%edi
1514	ret
1515
1516
1517/*
1518 * Handling of special 386 registers and descriptor tables etc
1519 */
1520/* void lgdt(struct region_descriptor *rdp); */
1521ENTRY(lgdt)
1522	/* reload the descriptor table */
1523	movl	4(%esp),%eax
1524	lgdt	(%eax)
1525
1526	/* flush the prefetch q */
1527	jmp	1f
1528	nop
15291:
1530	/* reload "stale" selectors */
1531	movl	$KDSEL,%eax
1532	movl	%ax,%ds
1533	movl	%ax,%es
1534	movl	%ax,%gs
1535	movl	%ax,%ss
1536#ifdef SMP
1537	movl	$KPSEL,%eax
1538#endif
1539	movl	%ax,%fs
1540
1541	/* reload code selector by turning return into intersegmental return */
1542	movl	(%esp),%eax
1543	pushl	%eax
1544	movl	$KCSEL,4(%esp)
1545	lret
1546
1547/*
1548 * void lidt(struct region_descriptor *rdp);
1549 */
1550ENTRY(lidt)
1551	movl	4(%esp),%eax
1552	lidt	(%eax)
1553	ret
1554
1555/*
1556 * void lldt(u_short sel)
1557 */
1558ENTRY(lldt)
1559	lldt	4(%esp)
1560	ret
1561
1562/*
1563 * void ltr(u_short sel)
1564 */
1565ENTRY(ltr)
1566	ltr	4(%esp)
1567	ret
1568
1569/* ssdtosd(*ssdp,*sdp) */
1570ENTRY(ssdtosd)
1571	pushl	%ebx
1572	movl	8(%esp),%ecx
1573	movl	8(%ecx),%ebx
1574	shll	$16,%ebx
1575	movl	(%ecx),%edx
1576	roll	$16,%edx
1577	movb	%dh,%bl
1578	movb	%dl,%bh
1579	rorl	$8,%ebx
1580	movl	4(%ecx),%eax
1581	movw	%ax,%dx
1582	andl	$0xf0000,%eax
1583	orl	%eax,%ebx
1584	movl	12(%esp),%ecx
1585	movl	%edx,(%ecx)
1586	movl	%ebx,4(%ecx)
1587	popl	%ebx
1588	ret
1589
1590/* load_cr0(cr0) */
1591ENTRY(load_cr0)
1592	movl	4(%esp),%eax
1593	movl	%eax,%cr0
1594	ret
1595
1596/* rcr0() */
1597ENTRY(rcr0)
1598	movl	%cr0,%eax
1599	ret
1600
1601/* rcr3() */
1602ENTRY(rcr3)
1603	movl	%cr3,%eax
1604	ret
1605
1606/* void load_cr3(caddr_t cr3) */
1607ENTRY(load_cr3)
1608#if defined(SWTCH_OPTIM_STATS)
1609	incl	_tlb_flush_count
1610#endif
1611	movl	4(%esp),%eax
1612	movl	%eax,%cr3
1613	ret
1614
1615/* rcr4() */
1616ENTRY(rcr4)
1617	movl	%cr4,%eax
1618	ret
1619
1620/* void load_cr4(caddr_t cr4) */
1621ENTRY(load_cr4)
1622	movl	4(%esp),%eax
1623	movl	%eax,%cr4
1624	ret
1625
1626/* void load_dr6(u_int dr6) */
1627ENTRY(load_dr6)
1628	movl    4(%esp),%eax
1629	movl    %eax,%dr6
1630	ret
1631
1632/* void reset_dbregs() */
1633ENTRY(reset_dbregs)
1634	movl    $0,%eax
1635	movl    %eax,%dr7     /* disable all breapoints first */
1636	movl    %eax,%dr0
1637	movl    %eax,%dr1
1638	movl    %eax,%dr2
1639	movl    %eax,%dr3
1640	movl    %eax,%dr6
1641	ret
1642
1643/*****************************************************************************/
1644/* setjump, longjump                                                         */
1645/*****************************************************************************/
1646
1647ENTRY(setjmp)
1648	movl	4(%esp),%eax
1649	movl	%ebx,(%eax)			/* save ebx */
1650	movl	%esp,4(%eax)			/* save esp */
1651	movl	%ebp,8(%eax)			/* save ebp */
1652	movl	%esi,12(%eax)			/* save esi */
1653	movl	%edi,16(%eax)			/* save edi */
1654	movl	(%esp),%edx			/* get rta */
1655	movl	%edx,20(%eax)			/* save eip */
1656	xorl	%eax,%eax			/* return(0); */
1657	ret
1658
1659ENTRY(longjmp)
1660	movl	4(%esp),%eax
1661	movl	(%eax),%ebx			/* restore ebx */
1662	movl	4(%eax),%esp			/* restore esp */
1663	movl	8(%eax),%ebp			/* restore ebp */
1664	movl	12(%eax),%esi			/* restore esi */
1665	movl	16(%eax),%edi			/* restore edi */
1666	movl	20(%eax),%edx			/* get rta */
1667	movl	%edx,(%esp)			/* put in return frame */
1668	xorl	%eax,%eax			/* return(1); */
1669	incl	%eax
1670	ret
1671
1672/*
1673 * Support for BB-profiling (gcc -a).  The kernbb program will extract
1674 * the data from the kernel.
1675 */
1676
1677	.data
1678	ALIGN_DATA
1679	.globl bbhead
1680bbhead:
1681	.long 0
1682
1683	.text
1684NON_GPROF_ENTRY(__bb_init_func)
1685	movl	4(%esp),%eax
1686	movl	$1,(%eax)
1687	movl	bbhead,%edx
1688	movl	%edx,16(%eax)
1689	movl	%eax,bbhead
1690	.byte	0xc3				/* avoid macro for `ret' */
1691