support.s revision 181430
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/i386/i386/support.s 181430 2008-08-08 16:26:53Z stas $
30 */
31
32#include "opt_npx.h"
33
34#include <machine/asmacros.h>
35#include <machine/cputypes.h>
36#include <machine/intr_machdep.h>
37#include <machine/pmap.h>
38#include <machine/specialreg.h>
39
40#include "assym.s"
41
42#define IDXSHIFT	10
43
44	.data
45	.globl	bcopy_vector
46bcopy_vector:
47	.long	generic_bcopy
48	.globl	bzero_vector
49bzero_vector:
50	.long	generic_bzero
51	.globl	copyin_vector
52copyin_vector:
53	.long	generic_copyin
54	.globl	copyout_vector
55copyout_vector:
56	.long	generic_copyout
57#if defined(I586_CPU) && defined(DEV_NPX)
58kernel_fpu_lock:
59	.byte	0xfe
60	.space	3
61#endif
62	ALIGN_DATA
63	.globl	intrcnt, eintrcnt
64intrcnt:
65	.space	INTRCNT_COUNT * 4
66eintrcnt:
67
68	.globl	intrnames, eintrnames
69intrnames:
70	.space	INTRCNT_COUNT * (MAXCOMLEN + 1)
71eintrnames:
72
73	.text
74
75/*
76 * bcopy family
77 * void bzero(void *buf, u_int len)
78 */
79
80ENTRY(bzero)
81	MEXITCOUNT
82	jmp	*bzero_vector
83END(bzero)
84
85ENTRY(generic_bzero)
86	pushl	%edi
87	movl	8(%esp),%edi
88	movl	12(%esp),%ecx
89	xorl	%eax,%eax
90	shrl	$2,%ecx
91	cld
92	rep
93	stosl
94	movl	12(%esp),%ecx
95	andl	$3,%ecx
96	rep
97	stosb
98	popl	%edi
99	ret
100END(generic_bzero)
101
102#ifdef I486_CPU
103ENTRY(i486_bzero)
104	movl	4(%esp),%edx
105	movl	8(%esp),%ecx
106	xorl	%eax,%eax
107/*
108 * do 64 byte chunks first
109 *
110 * XXX this is probably over-unrolled at least for DX2's
111 */
1122:
113	cmpl	$64,%ecx
114	jb	3f
115	movl	%eax,(%edx)
116	movl	%eax,4(%edx)
117	movl	%eax,8(%edx)
118	movl	%eax,12(%edx)
119	movl	%eax,16(%edx)
120	movl	%eax,20(%edx)
121	movl	%eax,24(%edx)
122	movl	%eax,28(%edx)
123	movl	%eax,32(%edx)
124	movl	%eax,36(%edx)
125	movl	%eax,40(%edx)
126	movl	%eax,44(%edx)
127	movl	%eax,48(%edx)
128	movl	%eax,52(%edx)
129	movl	%eax,56(%edx)
130	movl	%eax,60(%edx)
131	addl	$64,%edx
132	subl	$64,%ecx
133	jnz	2b
134	ret
135
136/*
137 * do 16 byte chunks
138 */
139	SUPERALIGN_TEXT
1403:
141	cmpl	$16,%ecx
142	jb	4f
143	movl	%eax,(%edx)
144	movl	%eax,4(%edx)
145	movl	%eax,8(%edx)
146	movl	%eax,12(%edx)
147	addl	$16,%edx
148	subl	$16,%ecx
149	jnz	3b
150	ret
151
152/*
153 * do 4 byte chunks
154 */
155	SUPERALIGN_TEXT
1564:
157	cmpl	$4,%ecx
158	jb	5f
159	movl	%eax,(%edx)
160	addl	$4,%edx
161	subl	$4,%ecx
162	jnz	4b
163	ret
164
165/*
166 * do 1 byte chunks
167 * a jump table seems to be faster than a loop or more range reductions
168 *
169 * XXX need a const section for non-text
170 */
171	.data
172jtab:
173	.long	do0
174	.long	do1
175	.long	do2
176	.long	do3
177
178	.text
179	SUPERALIGN_TEXT
1805:
181	jmp	*jtab(,%ecx,4)
182
183	SUPERALIGN_TEXT
184do3:
185	movw	%ax,(%edx)
186	movb	%al,2(%edx)
187	ret
188
189	SUPERALIGN_TEXT
190do2:
191	movw	%ax,(%edx)
192	ret
193
194	SUPERALIGN_TEXT
195do1:
196	movb	%al,(%edx)
197	ret
198
199	SUPERALIGN_TEXT
200do0:
201	ret
202END(i486_bzero)
203#endif
204
205#if defined(I586_CPU) && defined(DEV_NPX)
206ENTRY(i586_bzero)
207	movl	4(%esp),%edx
208	movl	8(%esp),%ecx
209
210	/*
211	 * The FPU register method is twice as fast as the integer register
212	 * method unless the target is in the L1 cache and we pre-allocate a
213	 * cache line for it (then the integer register method is 4-5 times
214	 * faster).  However, we never pre-allocate cache lines, since that
215	 * would make the integer method 25% or more slower for the common
216	 * case when the target isn't in either the L1 cache or the L2 cache.
217	 * Thus we normally use the FPU register method unless the overhead
218	 * would be too large.
219	 */
220	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
221	jb	intreg_i586_bzero
222
223	/*
224	 * The FPU registers may belong to an application or to fastmove()
225	 * or to another invocation of bcopy() or ourself in a higher level
226	 * interrupt or trap handler.  Preserving the registers is
227	 * complicated since we avoid it if possible at all levels.  We
228	 * want to localize the complications even when that increases them.
229	 * Here the extra work involves preserving CR0_TS in TS.
230	 * `fpcurthread != NULL' is supposed to be the condition that all the
231	 * FPU resources belong to an application, but fpcurthread and CR0_TS
232	 * aren't set atomically enough for this condition to work in
233	 * interrupt handlers.
234	 *
235	 * Case 1: FPU registers belong to the application: we must preserve
236	 * the registers if we use them, so we only use the FPU register
237	 * method if the target size is large enough to amortize the extra
238	 * overhead for preserving them.  CR0_TS must be preserved although
239	 * it is very likely to end up as set.
240	 *
241	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
242	 * makes the registers look like they belong to an application so
243	 * that cpu_switch() and savectx() don't have to know about it, so
244	 * this case reduces to case 1.
245	 *
246	 * Case 3: FPU registers belong to the kernel: don't use the FPU
247	 * register method.  This case is unlikely, and supporting it would
248	 * be more complicated and might take too much stack.
249	 *
250	 * Case 4: FPU registers don't belong to anyone: the FPU registers
251	 * don't need to be preserved, so we always use the FPU register
252	 * method.  CR0_TS must be preserved although it is very likely to
253	 * always end up as clear.
254	 */
255	cmpl	$0,PCPU(FPCURTHREAD)
256	je	i586_bz1
257
258	/*
259	 * XXX don't use the FPU for cases 1 and 2, since preemptive
260	 * scheduling of ithreads broke these cases.  Note that we can
261	 * no longer get here from an interrupt handler, since the
262	 * context sitch to the interrupt handler will have saved the
263	 * FPU state.
264	 */
265	jmp	intreg_i586_bzero
266
267	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
268	jb	intreg_i586_bzero
269	sarb	$1,kernel_fpu_lock
270	jc	intreg_i586_bzero
271	smsw	%ax
272	clts
273	subl	$108,%esp
274	fnsave	0(%esp)
275	jmp	i586_bz2
276
277i586_bz1:
278	sarb	$1,kernel_fpu_lock
279	jc	intreg_i586_bzero
280	smsw	%ax
281	clts
282	fninit				/* XXX should avoid needing this */
283i586_bz2:
284	fldz
285
286	/*
287	 * Align to an 8 byte boundary (misalignment in the main loop would
288	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
289	 * already aligned) by always zeroing 8 bytes and using the part up
290	 * to the _next_ alignment position.
291	 */
292	fstl	0(%edx)
293	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
294	addl	$8,%edx
295	andl	$~7,%edx
296	subl	%edx,%ecx
297
298	/*
299	 * Similarly align `len' to a multiple of 8.
300	 */
301	fstl	-8(%edx,%ecx)
302	decl	%ecx
303	andl	$~7,%ecx
304
305	/*
306	 * This wouldn't be any faster if it were unrolled, since the loop
307	 * control instructions are much faster than the fstl and/or done
308	 * in parallel with it so their overhead is insignificant.
309	 */
310fpureg_i586_bzero_loop:
311	fstl	0(%edx)
312	addl	$8,%edx
313	subl	$8,%ecx
314	cmpl	$8,%ecx
315	jae	fpureg_i586_bzero_loop
316
317	cmpl	$0,PCPU(FPCURTHREAD)
318	je	i586_bz3
319
320	/* XXX check that the condition for cases 1-2 stayed false. */
321i586_bzero_oops:
322	int	$3
323	jmp	i586_bzero_oops
324
325	frstor	0(%esp)
326	addl	$108,%esp
327	lmsw	%ax
328	movb	$0xfe,kernel_fpu_lock
329	ret
330
331i586_bz3:
332	fstp	%st(0)
333	lmsw	%ax
334	movb	$0xfe,kernel_fpu_lock
335	ret
336
337intreg_i586_bzero:
338	/*
339	 * `rep stos' seems to be the best method in practice for small
340	 * counts.  Fancy methods usually take too long to start up due
341	 * to cache and BTB misses.
342	 */
343	pushl	%edi
344	movl	%edx,%edi
345	xorl	%eax,%eax
346	shrl	$2,%ecx
347	cld
348	rep
349	stosl
350	movl	12(%esp),%ecx
351	andl	$3,%ecx
352	jne	1f
353	popl	%edi
354	ret
355
3561:
357	rep
358	stosb
359	popl	%edi
360	ret
361END(i586_bzero)
362#endif /* I586_CPU && defined(DEV_NPX) */
363
364ENTRY(sse2_pagezero)
365	pushl	%ebx
366	movl	8(%esp),%ecx
367	movl	%ecx,%eax
368	addl	$4096,%eax
369	xor	%ebx,%ebx
3701:
371	movnti	%ebx,(%ecx)
372	addl	$4,%ecx
373	cmpl	%ecx,%eax
374	jne	1b
375	sfence
376	popl	%ebx
377	ret
378END(sse2_pagezero)
379
380ENTRY(i686_pagezero)
381	pushl	%edi
382	pushl	%ebx
383
384	movl	12(%esp),%edi
385	movl	$1024,%ecx
386	cld
387
388	ALIGN_TEXT
3891:
390	xorl	%eax,%eax
391	repe
392	scasl
393	jnz	2f
394
395	popl	%ebx
396	popl	%edi
397	ret
398
399	ALIGN_TEXT
400
4012:
402	incl	%ecx
403	subl	$4,%edi
404
405	movl	%ecx,%edx
406	cmpl	$16,%ecx
407
408	jge	3f
409
410	movl	%edi,%ebx
411	andl	$0x3f,%ebx
412	shrl	%ebx
413	shrl	%ebx
414	movl	$16,%ecx
415	subl	%ebx,%ecx
416
4173:
418	subl	%ecx,%edx
419	rep
420	stosl
421
422	movl	%edx,%ecx
423	testl	%edx,%edx
424	jnz	1b
425
426	popl	%ebx
427	popl	%edi
428	ret
429END(i686_pagezero)
430
431/* fillw(pat, base, cnt) */
432ENTRY(fillw)
433	pushl	%edi
434	movl	8(%esp),%eax
435	movl	12(%esp),%edi
436	movl	16(%esp),%ecx
437	cld
438	rep
439	stosw
440	popl	%edi
441	ret
442END(fillw)
443
444ENTRY(bcopyb)
445	pushl	%esi
446	pushl	%edi
447	movl	12(%esp),%esi
448	movl	16(%esp),%edi
449	movl	20(%esp),%ecx
450	movl	%edi,%eax
451	subl	%esi,%eax
452	cmpl	%ecx,%eax			/* overlapping && src < dst? */
453	jb	1f
454	cld					/* nope, copy forwards */
455	rep
456	movsb
457	popl	%edi
458	popl	%esi
459	ret
460
461	ALIGN_TEXT
4621:
463	addl	%ecx,%edi			/* copy backwards. */
464	addl	%ecx,%esi
465	decl	%edi
466	decl	%esi
467	std
468	rep
469	movsb
470	popl	%edi
471	popl	%esi
472	cld
473	ret
474END(bcopyb)
475
476ENTRY(bcopy)
477	MEXITCOUNT
478	jmp	*bcopy_vector
479END(bcopy)
480
481/*
482 * generic_bcopy(src, dst, cnt)
483 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
484 */
485ENTRY(generic_bcopy)
486	pushl	%esi
487	pushl	%edi
488	movl	12(%esp),%esi
489	movl	16(%esp),%edi
490	movl	20(%esp),%ecx
491
492	movl	%edi,%eax
493	subl	%esi,%eax
494	cmpl	%ecx,%eax			/* overlapping && src < dst? */
495	jb	1f
496
497	shrl	$2,%ecx				/* copy by 32-bit words */
498	cld					/* nope, copy forwards */
499	rep
500	movsl
501	movl	20(%esp),%ecx
502	andl	$3,%ecx				/* any bytes left? */
503	rep
504	movsb
505	popl	%edi
506	popl	%esi
507	ret
508
509	ALIGN_TEXT
5101:
511	addl	%ecx,%edi			/* copy backwards */
512	addl	%ecx,%esi
513	decl	%edi
514	decl	%esi
515	andl	$3,%ecx				/* any fractional bytes? */
516	std
517	rep
518	movsb
519	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
520	shrl	$2,%ecx
521	subl	$3,%esi
522	subl	$3,%edi
523	rep
524	movsl
525	popl	%edi
526	popl	%esi
527	cld
528	ret
529END(generic_bcopy)
530
531#if defined(I586_CPU) && defined(DEV_NPX)
532ENTRY(i586_bcopy)
533	pushl	%esi
534	pushl	%edi
535	movl	12(%esp),%esi
536	movl	16(%esp),%edi
537	movl	20(%esp),%ecx
538
539	movl	%edi,%eax
540	subl	%esi,%eax
541	cmpl	%ecx,%eax			/* overlapping && src < dst? */
542	jb	1f
543
544	cmpl	$1024,%ecx
545	jb	small_i586_bcopy
546
547	sarb	$1,kernel_fpu_lock
548	jc	small_i586_bcopy
549	cmpl	$0,PCPU(FPCURTHREAD)
550	je	i586_bc1
551
552	/* XXX turn off handling of cases 1-2, as above. */
553	movb	$0xfe,kernel_fpu_lock
554	jmp	small_i586_bcopy
555
556	smsw	%dx
557	clts
558	subl	$108,%esp
559	fnsave	0(%esp)
560	jmp	4f
561
562i586_bc1:
563	smsw	%dx
564	clts
565	fninit				/* XXX should avoid needing this */
566
567	ALIGN_TEXT
5684:
569	pushl	%ecx
570#define	DCACHE_SIZE	8192
571	cmpl	$(DCACHE_SIZE-512)/2,%ecx
572	jbe	2f
573	movl	$(DCACHE_SIZE-512)/2,%ecx
5742:
575	subl	%ecx,0(%esp)
576	cmpl	$256,%ecx
577	jb	5f			/* XXX should prefetch if %ecx >= 32 */
578	pushl	%esi
579	pushl	%ecx
580	ALIGN_TEXT
5813:
582	movl	0(%esi),%eax
583	movl	32(%esi),%eax
584	movl	64(%esi),%eax
585	movl	96(%esi),%eax
586	movl	128(%esi),%eax
587	movl	160(%esi),%eax
588	movl	192(%esi),%eax
589	movl	224(%esi),%eax
590	addl	$256,%esi
591	subl	$256,%ecx
592	cmpl	$256,%ecx
593	jae	3b
594	popl	%ecx
595	popl	%esi
5965:
597	ALIGN_TEXT
598large_i586_bcopy_loop:
599	fildq	0(%esi)
600	fildq	8(%esi)
601	fildq	16(%esi)
602	fildq	24(%esi)
603	fildq	32(%esi)
604	fildq	40(%esi)
605	fildq	48(%esi)
606	fildq	56(%esi)
607	fistpq	56(%edi)
608	fistpq	48(%edi)
609	fistpq	40(%edi)
610	fistpq	32(%edi)
611	fistpq	24(%edi)
612	fistpq	16(%edi)
613	fistpq	8(%edi)
614	fistpq	0(%edi)
615	addl	$64,%esi
616	addl	$64,%edi
617	subl	$64,%ecx
618	cmpl	$64,%ecx
619	jae	large_i586_bcopy_loop
620	popl	%eax
621	addl	%eax,%ecx
622	cmpl	$64,%ecx
623	jae	4b
624
625	cmpl	$0,PCPU(FPCURTHREAD)
626	je	i586_bc2
627
628	/* XXX check that the condition for cases 1-2 stayed false. */
629i586_bcopy_oops:
630	int	$3
631	jmp	i586_bcopy_oops
632
633	frstor	0(%esp)
634	addl	$108,%esp
635i586_bc2:
636	lmsw	%dx
637	movb	$0xfe,kernel_fpu_lock
638
639/*
640 * This is a duplicate of the main part of generic_bcopy.  See the comments
641 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
642 * would mess up high resolution profiling.
643 */
644	ALIGN_TEXT
645small_i586_bcopy:
646	shrl	$2,%ecx
647	cld
648	rep
649	movsl
650	movl	20(%esp),%ecx
651	andl	$3,%ecx
652	rep
653	movsb
654	popl	%edi
655	popl	%esi
656	ret
657
658	ALIGN_TEXT
6591:
660	addl	%ecx,%edi
661	addl	%ecx,%esi
662	decl	%edi
663	decl	%esi
664	andl	$3,%ecx
665	std
666	rep
667	movsb
668	movl	20(%esp),%ecx
669	shrl	$2,%ecx
670	subl	$3,%esi
671	subl	$3,%edi
672	rep
673	movsl
674	popl	%edi
675	popl	%esi
676	cld
677	ret
678END(i586_bcopy)
679#endif /* I586_CPU && defined(DEV_NPX) */
680
681/*
682 * Note: memcpy does not support overlapping copies
683 */
684ENTRY(memcpy)
685	pushl	%edi
686	pushl	%esi
687	movl	12(%esp),%edi
688	movl	16(%esp),%esi
689	movl	20(%esp),%ecx
690	movl	%edi,%eax
691	shrl	$2,%ecx				/* copy by 32-bit words */
692	cld					/* nope, copy forwards */
693	rep
694	movsl
695	movl	20(%esp),%ecx
696	andl	$3,%ecx				/* any bytes left? */
697	rep
698	movsb
699	popl	%esi
700	popl	%edi
701	ret
702END(memcpy)
703
704/*****************************************************************************/
705/* copyout and fubyte family                                                 */
706/*****************************************************************************/
707/*
708 * Access user memory from inside the kernel. These routines and possibly
709 * the math- and DOS emulators should be the only places that do this.
710 *
711 * We have to access the memory with user's permissions, so use a segment
712 * selector with RPL 3. For writes to user space we have to additionally
713 * check the PTE for write permission, because the 386 does not check
714 * write permissions when we are executing with EPL 0. The 486 does check
715 * this if the WP bit is set in CR0, so we can use a simpler version here.
716 *
717 * These routines set curpcb->onfault for the time they execute. When a
718 * protection violation occurs inside the functions, the trap handler
719 * returns to *curpcb->onfault instead of the function.
720 */
721
722/*
723 * copyout(from_kernel, to_user, len)  - MP SAFE
724 */
725ENTRY(copyout)
726	MEXITCOUNT
727	jmp	*copyout_vector
728END(copyout)
729
730ENTRY(generic_copyout)
731	movl	PCPU(CURPCB),%eax
732	movl	$copyout_fault,PCB_ONFAULT(%eax)
733	pushl	%esi
734	pushl	%edi
735	pushl	%ebx
736	movl	16(%esp),%esi
737	movl	20(%esp),%edi
738	movl	24(%esp),%ebx
739	testl	%ebx,%ebx			/* anything to do? */
740	jz	done_copyout
741
742	/*
743	 * Check explicitly for non-user addresses.  If 486 write protection
744	 * is being used, this check is essential because we are in kernel
745	 * mode so the h/w does not provide any protection against writing
746	 * kernel addresses.
747	 */
748
749	/*
750	 * First, prevent address wrapping.
751	 */
752	movl	%edi,%eax
753	addl	%ebx,%eax
754	jc	copyout_fault
755/*
756 * XXX STOP USING VM_MAXUSER_ADDRESS.
757 * It is an end address, not a max, so every time it is used correctly it
758 * looks like there is an off by one error, and of course it caused an off
759 * by one error in several places.
760 */
761	cmpl	$VM_MAXUSER_ADDRESS,%eax
762	ja	copyout_fault
763
764	/* bcopy(%esi, %edi, %ebx) */
765	movl	%ebx,%ecx
766
767#if defined(I586_CPU) && defined(DEV_NPX)
768	ALIGN_TEXT
769slow_copyout:
770#endif
771	shrl	$2,%ecx
772	cld
773	rep
774	movsl
775	movb	%bl,%cl
776	andb	$3,%cl
777	rep
778	movsb
779
780done_copyout:
781	popl	%ebx
782	popl	%edi
783	popl	%esi
784	xorl	%eax,%eax
785	movl	PCPU(CURPCB),%edx
786	movl	%eax,PCB_ONFAULT(%edx)
787	ret
788END(generic_copyout)
789
790	ALIGN_TEXT
791copyout_fault:
792	popl	%ebx
793	popl	%edi
794	popl	%esi
795	movl	PCPU(CURPCB),%edx
796	movl	$0,PCB_ONFAULT(%edx)
797	movl	$EFAULT,%eax
798	ret
799
800#if defined(I586_CPU) && defined(DEV_NPX)
801ENTRY(i586_copyout)
802	/*
803	 * Duplicated from generic_copyout.  Could be done a bit better.
804	 */
805	movl	PCPU(CURPCB),%eax
806	movl	$copyout_fault,PCB_ONFAULT(%eax)
807	pushl	%esi
808	pushl	%edi
809	pushl	%ebx
810	movl	16(%esp),%esi
811	movl	20(%esp),%edi
812	movl	24(%esp),%ebx
813	testl	%ebx,%ebx			/* anything to do? */
814	jz	done_copyout
815
816	/*
817	 * Check explicitly for non-user addresses.  If 486 write protection
818	 * is being used, this check is essential because we are in kernel
819	 * mode so the h/w does not provide any protection against writing
820	 * kernel addresses.
821	 */
822
823	/*
824	 * First, prevent address wrapping.
825	 */
826	movl	%edi,%eax
827	addl	%ebx,%eax
828	jc	copyout_fault
829/*
830 * XXX STOP USING VM_MAXUSER_ADDRESS.
831 * It is an end address, not a max, so every time it is used correctly it
832 * looks like there is an off by one error, and of course it caused an off
833 * by one error in several places.
834 */
835	cmpl	$VM_MAXUSER_ADDRESS,%eax
836	ja	copyout_fault
837
838	/* bcopy(%esi, %edi, %ebx) */
8393:
840	movl	%ebx,%ecx
841	/*
842	 * End of duplicated code.
843	 */
844
845	cmpl	$1024,%ecx
846	jb	slow_copyout
847
848	pushl	%ecx
849	call	fastmove
850	addl	$4,%esp
851	jmp	done_copyout
852END(i586_copyout)
853#endif /* I586_CPU && defined(DEV_NPX) */
854
855/*
856 * copyin(from_user, to_kernel, len) - MP SAFE
857 */
858ENTRY(copyin)
859	MEXITCOUNT
860	jmp	*copyin_vector
861END(copyin)
862
863ENTRY(generic_copyin)
864	movl	PCPU(CURPCB),%eax
865	movl	$copyin_fault,PCB_ONFAULT(%eax)
866	pushl	%esi
867	pushl	%edi
868	movl	12(%esp),%esi			/* caddr_t from */
869	movl	16(%esp),%edi			/* caddr_t to */
870	movl	20(%esp),%ecx			/* size_t  len */
871
872	/*
873	 * make sure address is valid
874	 */
875	movl	%esi,%edx
876	addl	%ecx,%edx
877	jc	copyin_fault
878	cmpl	$VM_MAXUSER_ADDRESS,%edx
879	ja	copyin_fault
880
881#if defined(I586_CPU) && defined(DEV_NPX)
882	ALIGN_TEXT
883slow_copyin:
884#endif
885	movb	%cl,%al
886	shrl	$2,%ecx				/* copy longword-wise */
887	cld
888	rep
889	movsl
890	movb	%al,%cl
891	andb	$3,%cl				/* copy remaining bytes */
892	rep
893	movsb
894
895#if defined(I586_CPU) && defined(DEV_NPX)
896	ALIGN_TEXT
897done_copyin:
898#endif
899	popl	%edi
900	popl	%esi
901	xorl	%eax,%eax
902	movl	PCPU(CURPCB),%edx
903	movl	%eax,PCB_ONFAULT(%edx)
904	ret
905END(generic_copyin)
906
907	ALIGN_TEXT
908copyin_fault:
909	popl	%edi
910	popl	%esi
911	movl	PCPU(CURPCB),%edx
912	movl	$0,PCB_ONFAULT(%edx)
913	movl	$EFAULT,%eax
914	ret
915
916#if defined(I586_CPU) && defined(DEV_NPX)
917ENTRY(i586_copyin)
918	/*
919	 * Duplicated from generic_copyin.  Could be done a bit better.
920	 */
921	movl	PCPU(CURPCB),%eax
922	movl	$copyin_fault,PCB_ONFAULT(%eax)
923	pushl	%esi
924	pushl	%edi
925	movl	12(%esp),%esi			/* caddr_t from */
926	movl	16(%esp),%edi			/* caddr_t to */
927	movl	20(%esp),%ecx			/* size_t  len */
928
929	/*
930	 * make sure address is valid
931	 */
932	movl	%esi,%edx
933	addl	%ecx,%edx
934	jc	copyin_fault
935	cmpl	$VM_MAXUSER_ADDRESS,%edx
936	ja	copyin_fault
937	/*
938	 * End of duplicated code.
939	 */
940
941	cmpl	$1024,%ecx
942	jb	slow_copyin
943
944	pushl	%ebx			/* XXX prepare for fastmove_fault */
945	pushl	%ecx
946	call	fastmove
947	addl	$8,%esp
948	jmp	done_copyin
949END(i586_copyin)
950#endif /* I586_CPU && defined(DEV_NPX) */
951
952#if defined(I586_CPU) && defined(DEV_NPX)
953/* fastmove(src, dst, len)
954	src in %esi
955	dst in %edi
956	len in %ecx		XXX changed to on stack for profiling
957	uses %eax and %edx for tmp. storage
958 */
959/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
960ENTRY(fastmove)
961	pushl	%ebp
962	movl	%esp,%ebp
963	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
964
965	movl	8(%ebp),%ecx
966	cmpl	$63,%ecx
967	jbe	fastmove_tail
968
969	testl	$7,%esi	/* check if src addr is multiple of 8 */
970	jnz	fastmove_tail
971
972	testl	$7,%edi	/* check if dst addr is multiple of 8 */
973	jnz	fastmove_tail
974
975	/* XXX grab FPU context atomically. */
976	cli
977
978/* if (fpcurthread != NULL) { */
979	cmpl	$0,PCPU(FPCURTHREAD)
980	je	6f
981/*    fnsave(&curpcb->pcb_savefpu); */
982	movl	PCPU(CURPCB),%eax
983	fnsave	PCB_SAVEFPU(%eax)
984/*   FPCURTHREAD = NULL; */
985	movl	$0,PCPU(FPCURTHREAD)
986/* } */
9876:
988/* now we own the FPU. */
989
990/*
991 * The process' FP state is saved in the pcb, but if we get
992 * switched, the cpu_switch() will store our FP state in the
993 * pcb.  It should be possible to avoid all the copying for
994 * this, e.g., by setting a flag to tell cpu_switch() to
995 * save the state somewhere else.
996 */
997/* tmp = curpcb->pcb_savefpu; */
998	movl	%ecx,-12(%ebp)
999	movl	%esi,-8(%ebp)
1000	movl	%edi,-4(%ebp)
1001	movl	%esp,%edi
1002	movl	PCPU(CURPCB),%esi
1003	addl	$PCB_SAVEFPU,%esi
1004	cld
1005	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1006	rep
1007	movsl
1008	movl	-12(%ebp),%ecx
1009	movl	-8(%ebp),%esi
1010	movl	-4(%ebp),%edi
1011/* stop_emulating(); */
1012	clts
1013/* fpcurthread = curthread; */
1014	movl	PCPU(CURTHREAD),%eax
1015	movl	%eax,PCPU(FPCURTHREAD)
1016	movl	PCPU(CURPCB),%eax
1017
1018	/* XXX end of atomic FPU context grab. */
1019	sti
1020
1021	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10224:
1023	movl	%ecx,-12(%ebp)
1024	cmpl	$1792,%ecx
1025	jbe	2f
1026	movl	$1792,%ecx
10272:
1028	subl	%ecx,-12(%ebp)
1029	cmpl	$256,%ecx
1030	jb	5f
1031	movl	%ecx,-8(%ebp)
1032	movl	%esi,-4(%ebp)
1033	ALIGN_TEXT
10343:
1035	movl	0(%esi),%eax
1036	movl	32(%esi),%eax
1037	movl	64(%esi),%eax
1038	movl	96(%esi),%eax
1039	movl	128(%esi),%eax
1040	movl	160(%esi),%eax
1041	movl	192(%esi),%eax
1042	movl	224(%esi),%eax
1043	addl	$256,%esi
1044	subl	$256,%ecx
1045	cmpl	$256,%ecx
1046	jae	3b
1047	movl	-8(%ebp),%ecx
1048	movl	-4(%ebp),%esi
10495:
1050	ALIGN_TEXT
1051fastmove_loop:
1052	fildq	0(%esi)
1053	fildq	8(%esi)
1054	fildq	16(%esi)
1055	fildq	24(%esi)
1056	fildq	32(%esi)
1057	fildq	40(%esi)
1058	fildq	48(%esi)
1059	fildq	56(%esi)
1060	fistpq	56(%edi)
1061	fistpq	48(%edi)
1062	fistpq	40(%edi)
1063	fistpq	32(%edi)
1064	fistpq	24(%edi)
1065	fistpq	16(%edi)
1066	fistpq	8(%edi)
1067	fistpq	0(%edi)
1068	addl	$-64,%ecx
1069	addl	$64,%esi
1070	addl	$64,%edi
1071	cmpl	$63,%ecx
1072	ja	fastmove_loop
1073	movl	-12(%ebp),%eax
1074	addl	%eax,%ecx
1075	cmpl	$64,%ecx
1076	jae	4b
1077
1078	/* XXX ungrab FPU context atomically. */
1079	cli
1080
1081/* curpcb->pcb_savefpu = tmp; */
1082	movl	%ecx,-12(%ebp)
1083	movl	%esi,-8(%ebp)
1084	movl	%edi,-4(%ebp)
1085	movl	PCPU(CURPCB),%edi
1086	addl	$PCB_SAVEFPU,%edi
1087	movl	%esp,%esi
1088	cld
1089	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1090	rep
1091	movsl
1092	movl	-12(%ebp),%ecx
1093	movl	-8(%ebp),%esi
1094	movl	-4(%ebp),%edi
1095
1096/* start_emulating(); */
1097	smsw	%ax
1098	orb	$CR0_TS,%al
1099	lmsw	%ax
1100/* fpcurthread = NULL; */
1101	movl	$0,PCPU(FPCURTHREAD)
1102
1103	/* XXX end of atomic FPU context ungrab. */
1104	sti
1105
1106	ALIGN_TEXT
1107fastmove_tail:
1108	movl	PCPU(CURPCB),%eax
1109	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1110
1111	movb	%cl,%al
1112	shrl	$2,%ecx				/* copy longword-wise */
1113	cld
1114	rep
1115	movsl
1116	movb	%al,%cl
1117	andb	$3,%cl				/* copy remaining bytes */
1118	rep
1119	movsb
1120
1121	movl	%ebp,%esp
1122	popl	%ebp
1123	ret
1124
1125	ALIGN_TEXT
1126fastmove_fault:
1127	/* XXX ungrab FPU context atomically. */
1128	cli
1129
1130	movl	PCPU(CURPCB),%edi
1131	addl	$PCB_SAVEFPU,%edi
1132	movl	%esp,%esi
1133	cld
1134	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1135	rep
1136	movsl
1137
1138	smsw	%ax
1139	orb	$CR0_TS,%al
1140	lmsw	%ax
1141	movl	$0,PCPU(FPCURTHREAD)
1142
1143	/* XXX end of atomic FPU context ungrab. */
1144	sti
1145
1146fastmove_tail_fault:
1147	movl	%ebp,%esp
1148	popl	%ebp
1149	addl	$8,%esp
1150	popl	%ebx
1151	popl	%edi
1152	popl	%esi
1153	movl	PCPU(CURPCB),%edx
1154	movl	$0,PCB_ONFAULT(%edx)
1155	movl	$EFAULT,%eax
1156	ret
1157END(fastmove)
1158#endif /* I586_CPU && defined(DEV_NPX) */
1159
1160/*
1161 * casuword.  Compare and set user word.  Returns -1 or the current value.
1162 */
1163
1164ALTENTRY(casuword32)
1165ENTRY(casuword)
1166	movl	PCPU(CURPCB),%ecx
1167	movl	$fusufault,PCB_ONFAULT(%ecx)
1168	movl	4(%esp),%edx			/* dst */
1169	movl	8(%esp),%eax			/* old */
1170	movl	12(%esp),%ecx			/* new */
1171
1172	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1173	ja	fusufault
1174
1175#ifdef SMP
1176	lock
1177#endif
1178	cmpxchgl %ecx,(%edx)			/* Compare and set. */
1179
1180	/*
1181	 * The old value is in %eax.  If the store succeeded it will be the
1182	 * value we expected (old) from before the store, otherwise it will
1183	 * be the current value.
1184	 */
1185
1186	movl	PCPU(CURPCB),%ecx
1187	movl	$fusufault,PCB_ONFAULT(%ecx)
1188	movl	$0,PCB_ONFAULT(%ecx)
1189	ret
1190END(casuword32)
1191END(casuword)
1192
1193/*
1194 * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
1195 * memory.  All these functions are MPSAFE.
1196 */
1197
1198ALTENTRY(fuword32)
1199ENTRY(fuword)
1200	movl	PCPU(CURPCB),%ecx
1201	movl	$fusufault,PCB_ONFAULT(%ecx)
1202	movl	4(%esp),%edx			/* from */
1203
1204	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1205	ja	fusufault
1206
1207	movl	(%edx),%eax
1208	movl	$0,PCB_ONFAULT(%ecx)
1209	ret
1210END(fuword32)
1211END(fuword)
1212
1213/*
1214 * fuswintr() and suswintr() are specialized variants of fuword16() and
1215 * suword16(), respectively.  They are called from the profiling code,
1216 * potentially at interrupt time.  If they fail, that's okay; good things
1217 * will happen later.  They always fail for now, until the trap code is
1218 * able to deal with this.
1219 */
1220ALTENTRY(suswintr)
1221ENTRY(fuswintr)
1222	movl	$-1,%eax
1223	ret
1224END(suswintr)
1225END(fuswintr)
1226
1227ENTRY(fuword16)
1228	movl	PCPU(CURPCB),%ecx
1229	movl	$fusufault,PCB_ONFAULT(%ecx)
1230	movl	4(%esp),%edx
1231
1232	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1233	ja	fusufault
1234
1235	movzwl	(%edx),%eax
1236	movl	$0,PCB_ONFAULT(%ecx)
1237	ret
1238END(fuword16)
1239
1240ENTRY(fubyte)
1241	movl	PCPU(CURPCB),%ecx
1242	movl	$fusufault,PCB_ONFAULT(%ecx)
1243	movl	4(%esp),%edx
1244
1245	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1246	ja	fusufault
1247
1248	movzbl	(%edx),%eax
1249	movl	$0,PCB_ONFAULT(%ecx)
1250	ret
1251END(fubyte)
1252
1253	ALIGN_TEXT
1254fusufault:
1255	movl	PCPU(CURPCB),%ecx
1256	xorl	%eax,%eax
1257	movl	%eax,PCB_ONFAULT(%ecx)
1258	decl	%eax
1259	ret
1260
1261/*
1262 * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory.
1263 * All these functions are MPSAFE.
1264 */
1265
1266ALTENTRY(suword32)
1267ENTRY(suword)
1268	movl	PCPU(CURPCB),%ecx
1269	movl	$fusufault,PCB_ONFAULT(%ecx)
1270	movl	4(%esp),%edx
1271
1272	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1273	ja	fusufault
1274
1275	movl	8(%esp),%eax
1276	movl	%eax,(%edx)
1277	xorl	%eax,%eax
1278	movl	PCPU(CURPCB),%ecx
1279	movl	%eax,PCB_ONFAULT(%ecx)
1280	ret
1281END(suword32)
1282END(suword)
1283
1284ENTRY(suword16)
1285	movl	PCPU(CURPCB),%ecx
1286	movl	$fusufault,PCB_ONFAULT(%ecx)
1287	movl	4(%esp),%edx
1288
1289	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1290	ja	fusufault
1291
1292	movw	8(%esp),%ax
1293	movw	%ax,(%edx)
1294	xorl	%eax,%eax
1295	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1296	movl	%eax,PCB_ONFAULT(%ecx)
1297	ret
1298END(suword16)
1299
1300ENTRY(subyte)
1301	movl	PCPU(CURPCB),%ecx
1302	movl	$fusufault,PCB_ONFAULT(%ecx)
1303	movl	4(%esp),%edx
1304
1305	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1306	ja	fusufault
1307
1308	movb	8(%esp),%al
1309	movb	%al,(%edx)
1310	xorl	%eax,%eax
1311	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1312	movl	%eax,PCB_ONFAULT(%ecx)
1313	ret
1314END(subyte)
1315
1316/*
1317 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
1318 *
1319 *	copy a string from from to to, stop when a 0 character is reached.
1320 *	return ENAMETOOLONG if string is longer than maxlen, and
1321 *	EFAULT on protection violations. If lencopied is non-zero,
1322 *	return the actual length in *lencopied.
1323 */
1324ENTRY(copyinstr)
1325	pushl	%esi
1326	pushl	%edi
1327	movl	PCPU(CURPCB),%ecx
1328	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1329
1330	movl	12(%esp),%esi			/* %esi = from */
1331	movl	16(%esp),%edi			/* %edi = to */
1332	movl	20(%esp),%edx			/* %edx = maxlen */
1333
1334	movl	$VM_MAXUSER_ADDRESS,%eax
1335
1336	/* make sure 'from' is within bounds */
1337	subl	%esi,%eax
1338	jbe	cpystrflt
1339
1340	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1341	cmpl	%edx,%eax
1342	jae	1f
1343	movl	%eax,%edx
1344	movl	%eax,20(%esp)
13451:
1346	incl	%edx
1347	cld
1348
13492:
1350	decl	%edx
1351	jz	3f
1352
1353	lodsb
1354	stosb
1355	orb	%al,%al
1356	jnz	2b
1357
1358	/* Success -- 0 byte reached */
1359	decl	%edx
1360	xorl	%eax,%eax
1361	jmp	cpystrflt_x
13623:
1363	/* edx is zero - return ENAMETOOLONG or EFAULT */
1364	cmpl	$VM_MAXUSER_ADDRESS,%esi
1365	jae	cpystrflt
13664:
1367	movl	$ENAMETOOLONG,%eax
1368	jmp	cpystrflt_x
1369
1370cpystrflt:
1371	movl	$EFAULT,%eax
1372
1373cpystrflt_x:
1374	/* set *lencopied and return %eax */
1375	movl	PCPU(CURPCB),%ecx
1376	movl	$0,PCB_ONFAULT(%ecx)
1377	movl	20(%esp),%ecx
1378	subl	%edx,%ecx
1379	movl	24(%esp),%edx
1380	testl	%edx,%edx
1381	jz	1f
1382	movl	%ecx,(%edx)
13831:
1384	popl	%edi
1385	popl	%esi
1386	ret
1387END(copyinstr)
1388
1389/*
1390 * copystr(from, to, maxlen, int *lencopied) - MP SAFE
1391 */
1392ENTRY(copystr)
1393	pushl	%esi
1394	pushl	%edi
1395
1396	movl	12(%esp),%esi			/* %esi = from */
1397	movl	16(%esp),%edi			/* %edi = to */
1398	movl	20(%esp),%edx			/* %edx = maxlen */
1399	incl	%edx
1400	cld
14011:
1402	decl	%edx
1403	jz	4f
1404	lodsb
1405	stosb
1406	orb	%al,%al
1407	jnz	1b
1408
1409	/* Success -- 0 byte reached */
1410	decl	%edx
1411	xorl	%eax,%eax
1412	jmp	6f
14134:
1414	/* edx is zero -- return ENAMETOOLONG */
1415	movl	$ENAMETOOLONG,%eax
1416
14176:
1418	/* set *lencopied and return %eax */
1419	movl	20(%esp),%ecx
1420	subl	%edx,%ecx
1421	movl	24(%esp),%edx
1422	testl	%edx,%edx
1423	jz	7f
1424	movl	%ecx,(%edx)
14257:
1426	popl	%edi
1427	popl	%esi
1428	ret
1429END(copystr)
1430
1431ENTRY(bcmp)
1432	pushl	%edi
1433	pushl	%esi
1434	movl	12(%esp),%edi
1435	movl	16(%esp),%esi
1436	movl	20(%esp),%edx
1437
1438	movl	%edx,%ecx
1439	shrl	$2,%ecx
1440	cld					/* compare forwards */
1441	repe
1442	cmpsl
1443	jne	1f
1444
1445	movl	%edx,%ecx
1446	andl	$3,%ecx
1447	repe
1448	cmpsb
14491:
1450	setne	%al
1451	movsbl	%al,%eax
1452	popl	%esi
1453	popl	%edi
1454	ret
1455END(bcmp)
1456
1457/*
1458 * Handling of special 386 registers and descriptor tables etc
1459 */
1460/* void lgdt(struct region_descriptor *rdp); */
1461ENTRY(lgdt)
1462	/* reload the descriptor table */
1463	movl	4(%esp),%eax
1464	lgdt	(%eax)
1465
1466	/* flush the prefetch q */
1467	jmp	1f
1468	nop
14691:
1470	/* reload "stale" selectors */
1471	movl	$KDSEL,%eax
1472	movl	%eax,%ds
1473	movl	%eax,%es
1474	movl	%eax,%gs
1475	movl	%eax,%ss
1476	movl	$KPSEL,%eax
1477	movl	%eax,%fs
1478
1479	/* reload code selector by turning return into intersegmental return */
1480	movl	(%esp),%eax
1481	pushl	%eax
1482	movl	$KCSEL,4(%esp)
1483	MEXITCOUNT
1484	lret
1485END(lgdt)
1486
1487/* ssdtosd(*ssdp,*sdp) */
1488ENTRY(ssdtosd)
1489	pushl	%ebx
1490	movl	8(%esp),%ecx
1491	movl	8(%ecx),%ebx
1492	shll	$16,%ebx
1493	movl	(%ecx),%edx
1494	roll	$16,%edx
1495	movb	%dh,%bl
1496	movb	%dl,%bh
1497	rorl	$8,%ebx
1498	movl	4(%ecx),%eax
1499	movw	%ax,%dx
1500	andl	$0xf0000,%eax
1501	orl	%eax,%ebx
1502	movl	12(%esp),%ecx
1503	movl	%edx,(%ecx)
1504	movl	%ebx,4(%ecx)
1505	popl	%ebx
1506	ret
1507END(ssdtosd)
1508
1509/* void reset_dbregs() */
1510ENTRY(reset_dbregs)
1511	movl    $0,%eax
1512	movl    %eax,%dr7     /* disable all breapoints first */
1513	movl    %eax,%dr0
1514	movl    %eax,%dr1
1515	movl    %eax,%dr2
1516	movl    %eax,%dr3
1517	movl    %eax,%dr6
1518	ret
1519END(reset_dbregs)
1520
1521/*****************************************************************************/
1522/* setjump, longjump                                                         */
1523/*****************************************************************************/
1524
1525ENTRY(setjmp)
1526	movl	4(%esp),%eax
1527	movl	%ebx,(%eax)			/* save ebx */
1528	movl	%esp,4(%eax)			/* save esp */
1529	movl	%ebp,8(%eax)			/* save ebp */
1530	movl	%esi,12(%eax)			/* save esi */
1531	movl	%edi,16(%eax)			/* save edi */
1532	movl	(%esp),%edx			/* get rta */
1533	movl	%edx,20(%eax)			/* save eip */
1534	xorl	%eax,%eax			/* return(0); */
1535	ret
1536END(setjmp)
1537
1538ENTRY(longjmp)
1539	movl	4(%esp),%eax
1540	movl	(%eax),%ebx			/* restore ebx */
1541	movl	4(%eax),%esp			/* restore esp */
1542	movl	8(%eax),%ebp			/* restore ebp */
1543	movl	12(%eax),%esi			/* restore esi */
1544	movl	16(%eax),%edi			/* restore edi */
1545	movl	20(%eax),%edx			/* get rta */
1546	movl	%edx,(%esp)			/* put in return frame */
1547	xorl	%eax,%eax			/* return(1); */
1548	incl	%eax
1549	ret
1550END(longjmp)
1551
1552/*
1553 * Support for BB-profiling (gcc -a).  The kernbb program will extract
1554 * the data from the kernel.
1555 */
1556
1557	.data
1558	ALIGN_DATA
1559	.globl bbhead
1560bbhead:
1561	.long 0
1562
1563	.text
1564NON_GPROF_ENTRY(__bb_init_func)
1565	movl	4(%esp),%eax
1566	movl	$1,(%eax)
1567	movl	bbhead,%edx
1568	movl	%edx,16(%eax)
1569	movl	%eax,bbhead
1570	NON_GPROF_RET
1571
1572/*
1573 * Support for reading MSRs in the safe manner.
1574 */
1575ENTRY(rdmsr_safe)
1576/* int rdmsr_safe(u_int msr, uint64_t *data) */
1577	movl	PCPU(CURPCB),%ecx
1578	movl	$msr_onfault,PCB_ONFAULT(%ecx)
1579
1580	movl	4(%esp),%ecx
1581	rdmsr
1582	movl	8(%esp),%ecx
1583	movl	%eax,(%ecx)
1584	movl	%edx,4(%ecx)
1585	xorl	%eax,%eax
1586
1587	movl	PCPU(CURPCB),%ecx
1588	movl	%eax,PCB_ONFAULT(%ecx)
1589
1590	ret
1591
1592/*
1593 * Support for writing MSRs in the safe manner.
1594 */
1595ENTRY(wrmsr_safe)
1596/* int wrmsr_safe(u_int msr, uint64_t data) */
1597	movl	PCPU(CURPCB),%ecx
1598	movl	$msr_onfault,PCB_ONFAULT(%ecx)
1599
1600	movl	4(%esp),%ecx
1601	movl	8(%esp),%eax
1602	movl	12(%esp),%edx
1603	wrmsr
1604	xorl	%eax,%eax
1605
1606	movl	PCPU(CURPCB),%ecx
1607	movl	%eax,PCB_ONFAULT(%ecx)
1608
1609	ret
1610
1611/*
1612 * MSR operations fault handler
1613 */
1614	ALIGN_TEXT
1615msr_onfault:
1616	movl	PCPU(CURPCB),%ecx
1617	movl	$0,PCB_ONFAULT(%ecx)
1618	movl	$EFAULT,%eax
1619	ret
1620