support.s revision 161314
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/i386/i386/support.s 161314 2006-08-15 13:45:39Z netchild $
30 */
31
32#include "opt_npx.h"
33
34#include <machine/asmacros.h>
35#include <machine/cputypes.h>
36#include <machine/intr_machdep.h>
37#include <machine/pmap.h>
38#include <machine/specialreg.h>
39
40#include "opt_global.h"
41#include "assym.s"
42
43#define IDXSHIFT	10
44
45	.data
46	.globl	bcopy_vector
47bcopy_vector:
48	.long	generic_bcopy
49	.globl	bzero_vector
50bzero_vector:
51	.long	generic_bzero
52	.globl	copyin_vector
53copyin_vector:
54	.long	generic_copyin
55	.globl	copyout_vector
56copyout_vector:
57	.long	generic_copyout
58#if defined(I586_CPU) && defined(DEV_NPX)
59kernel_fpu_lock:
60	.byte	0xfe
61	.space	3
62#endif
63	ALIGN_DATA
64	.globl	intrcnt, eintrcnt
65intrcnt:
66	.space	INTRCNT_COUNT * 4
67eintrcnt:
68
69	.globl	intrnames, eintrnames
70intrnames:
71	.space	INTRCNT_COUNT * (MAXCOMLEN + 1)
72eintrnames:
73
74	.text
75
76/*
77 * bcopy family
78 * void bzero(void *buf, u_int len)
79 */
80
81ENTRY(bzero)
82	MEXITCOUNT
83	jmp	*bzero_vector
84
85ENTRY(generic_bzero)
86	pushl	%edi
87	movl	8(%esp),%edi
88	movl	12(%esp),%ecx
89	xorl	%eax,%eax
90	shrl	$2,%ecx
91	cld
92	rep
93	stosl
94	movl	12(%esp),%ecx
95	andl	$3,%ecx
96	rep
97	stosb
98	popl	%edi
99	ret
100
101#ifdef I486_CPU
102ENTRY(i486_bzero)
103	movl	4(%esp),%edx
104	movl	8(%esp),%ecx
105	xorl	%eax,%eax
106/*
107 * do 64 byte chunks first
108 *
109 * XXX this is probably over-unrolled at least for DX2's
110 */
1112:
112	cmpl	$64,%ecx
113	jb	3f
114	movl	%eax,(%edx)
115	movl	%eax,4(%edx)
116	movl	%eax,8(%edx)
117	movl	%eax,12(%edx)
118	movl	%eax,16(%edx)
119	movl	%eax,20(%edx)
120	movl	%eax,24(%edx)
121	movl	%eax,28(%edx)
122	movl	%eax,32(%edx)
123	movl	%eax,36(%edx)
124	movl	%eax,40(%edx)
125	movl	%eax,44(%edx)
126	movl	%eax,48(%edx)
127	movl	%eax,52(%edx)
128	movl	%eax,56(%edx)
129	movl	%eax,60(%edx)
130	addl	$64,%edx
131	subl	$64,%ecx
132	jnz	2b
133	ret
134
135/*
136 * do 16 byte chunks
137 */
138	SUPERALIGN_TEXT
1393:
140	cmpl	$16,%ecx
141	jb	4f
142	movl	%eax,(%edx)
143	movl	%eax,4(%edx)
144	movl	%eax,8(%edx)
145	movl	%eax,12(%edx)
146	addl	$16,%edx
147	subl	$16,%ecx
148	jnz	3b
149	ret
150
151/*
152 * do 4 byte chunks
153 */
154	SUPERALIGN_TEXT
1554:
156	cmpl	$4,%ecx
157	jb	5f
158	movl	%eax,(%edx)
159	addl	$4,%edx
160	subl	$4,%ecx
161	jnz	4b
162	ret
163
164/*
165 * do 1 byte chunks
166 * a jump table seems to be faster than a loop or more range reductions
167 *
168 * XXX need a const section for non-text
169 */
170	.data
171jtab:
172	.long	do0
173	.long	do1
174	.long	do2
175	.long	do3
176
177	.text
178	SUPERALIGN_TEXT
1795:
180	jmp	*jtab(,%ecx,4)
181
182	SUPERALIGN_TEXT
183do3:
184	movw	%ax,(%edx)
185	movb	%al,2(%edx)
186	ret
187
188	SUPERALIGN_TEXT
189do2:
190	movw	%ax,(%edx)
191	ret
192
193	SUPERALIGN_TEXT
194do1:
195	movb	%al,(%edx)
196	ret
197
198	SUPERALIGN_TEXT
199do0:
200	ret
201#endif
202
203#if defined(I586_CPU) && defined(DEV_NPX)
204ENTRY(i586_bzero)
205	movl	4(%esp),%edx
206	movl	8(%esp),%ecx
207
208	/*
209	 * The FPU register method is twice as fast as the integer register
210	 * method unless the target is in the L1 cache and we pre-allocate a
211	 * cache line for it (then the integer register method is 4-5 times
212	 * faster).  However, we never pre-allocate cache lines, since that
213	 * would make the integer method 25% or more slower for the common
214	 * case when the target isn't in either the L1 cache or the L2 cache.
215	 * Thus we normally use the FPU register method unless the overhead
216	 * would be too large.
217	 */
218	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
219	jb	intreg_i586_bzero
220
221	/*
222	 * The FPU registers may belong to an application or to fastmove()
223	 * or to another invocation of bcopy() or ourself in a higher level
224	 * interrupt or trap handler.  Preserving the registers is
225	 * complicated since we avoid it if possible at all levels.  We
226	 * want to localize the complications even when that increases them.
227	 * Here the extra work involves preserving CR0_TS in TS.
228	 * `fpcurthread != NULL' is supposed to be the condition that all the
229	 * FPU resources belong to an application, but fpcurthread and CR0_TS
230	 * aren't set atomically enough for this condition to work in
231	 * interrupt handlers.
232	 *
233	 * Case 1: FPU registers belong to the application: we must preserve
234	 * the registers if we use them, so we only use the FPU register
235	 * method if the target size is large enough to amortize the extra
236	 * overhead for preserving them.  CR0_TS must be preserved although
237	 * it is very likely to end up as set.
238	 *
239	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
240	 * makes the registers look like they belong to an application so
241	 * that cpu_switch() and savectx() don't have to know about it, so
242	 * this case reduces to case 1.
243	 *
244	 * Case 3: FPU registers belong to the kernel: don't use the FPU
245	 * register method.  This case is unlikely, and supporting it would
246	 * be more complicated and might take too much stack.
247	 *
248	 * Case 4: FPU registers don't belong to anyone: the FPU registers
249	 * don't need to be preserved, so we always use the FPU register
250	 * method.  CR0_TS must be preserved although it is very likely to
251	 * always end up as clear.
252	 */
253	cmpl	$0,PCPU(FPCURTHREAD)
254	je	i586_bz1
255
256	/*
257	 * XXX don't use the FPU for cases 1 and 2, since preemptive
258	 * scheduling of ithreads broke these cases.  Note that we can
259	 * no longer get here from an interrupt handler, since the
260	 * context sitch to the interrupt handler will have saved the
261	 * FPU state.
262	 */
263	jmp	intreg_i586_bzero
264
265	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
266	jb	intreg_i586_bzero
267	sarb	$1,kernel_fpu_lock
268	jc	intreg_i586_bzero
269	smsw	%ax
270	clts
271	subl	$108,%esp
272	fnsave	0(%esp)
273	jmp	i586_bz2
274
275i586_bz1:
276	sarb	$1,kernel_fpu_lock
277	jc	intreg_i586_bzero
278	smsw	%ax
279	clts
280	fninit				/* XXX should avoid needing this */
281i586_bz2:
282	fldz
283
284	/*
285	 * Align to an 8 byte boundary (misalignment in the main loop would
286	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
287	 * already aligned) by always zeroing 8 bytes and using the part up
288	 * to the _next_ alignment position.
289	 */
290	fstl	0(%edx)
291	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
292	addl	$8,%edx
293	andl	$~7,%edx
294	subl	%edx,%ecx
295
296	/*
297	 * Similarly align `len' to a multiple of 8.
298	 */
299	fstl	-8(%edx,%ecx)
300	decl	%ecx
301	andl	$~7,%ecx
302
303	/*
304	 * This wouldn't be any faster if it were unrolled, since the loop
305	 * control instructions are much faster than the fstl and/or done
306	 * in parallel with it so their overhead is insignificant.
307	 */
308fpureg_i586_bzero_loop:
309	fstl	0(%edx)
310	addl	$8,%edx
311	subl	$8,%ecx
312	cmpl	$8,%ecx
313	jae	fpureg_i586_bzero_loop
314
315	cmpl	$0,PCPU(FPCURTHREAD)
316	je	i586_bz3
317
318	/* XXX check that the condition for cases 1-2 stayed false. */
319i586_bzero_oops:
320	int	$3
321	jmp	i586_bzero_oops
322
323	frstor	0(%esp)
324	addl	$108,%esp
325	lmsw	%ax
326	movb	$0xfe,kernel_fpu_lock
327	ret
328
329i586_bz3:
330	fstp	%st(0)
331	lmsw	%ax
332	movb	$0xfe,kernel_fpu_lock
333	ret
334
335intreg_i586_bzero:
336	/*
337	 * `rep stos' seems to be the best method in practice for small
338	 * counts.  Fancy methods usually take too long to start up due
339	 * to cache and BTB misses.
340	 */
341	pushl	%edi
342	movl	%edx,%edi
343	xorl	%eax,%eax
344	shrl	$2,%ecx
345	cld
346	rep
347	stosl
348	movl	12(%esp),%ecx
349	andl	$3,%ecx
350	jne	1f
351	popl	%edi
352	ret
353
3541:
355	rep
356	stosb
357	popl	%edi
358	ret
359#endif /* I586_CPU && defined(DEV_NPX) */
360
361ENTRY(sse2_pagezero)
362	pushl	%ebx
363	movl	8(%esp),%ecx
364	movl	%ecx,%eax
365	addl	$4096,%eax
366	xor	%ebx,%ebx
3671:
368	movnti	%ebx,(%ecx)
369	addl	$4,%ecx
370	cmpl	%ecx,%eax
371	jne	1b
372	sfence
373	popl	%ebx
374	ret
375
376ENTRY(i686_pagezero)
377	pushl	%edi
378	pushl	%ebx
379
380	movl	12(%esp), %edi
381	movl	$1024, %ecx
382	cld
383
384	ALIGN_TEXT
3851:
386	xorl	%eax, %eax
387	repe
388	scasl
389	jnz	2f
390
391	popl	%ebx
392	popl	%edi
393	ret
394
395	ALIGN_TEXT
396
3972:
398	incl	%ecx
399	subl	$4, %edi
400
401	movl	%ecx, %edx
402	cmpl	$16, %ecx
403
404	jge	3f
405
406	movl	%edi, %ebx
407	andl	$0x3f, %ebx
408	shrl	%ebx
409	shrl	%ebx
410	movl	$16, %ecx
411	subl	%ebx, %ecx
412
4133:
414	subl	%ecx, %edx
415	rep
416	stosl
417
418	movl	%edx, %ecx
419	testl	%edx, %edx
420	jnz	1b
421
422	popl	%ebx
423	popl	%edi
424	ret
425
426/* fillw(pat, base, cnt) */
427ENTRY(fillw)
428	pushl	%edi
429	movl	8(%esp),%eax
430	movl	12(%esp),%edi
431	movl	16(%esp),%ecx
432	cld
433	rep
434	stosw
435	popl	%edi
436	ret
437
438ENTRY(bcopyb)
439	pushl	%esi
440	pushl	%edi
441	movl	12(%esp),%esi
442	movl	16(%esp),%edi
443	movl	20(%esp),%ecx
444	movl	%edi,%eax
445	subl	%esi,%eax
446	cmpl	%ecx,%eax			/* overlapping && src < dst? */
447	jb	1f
448	cld					/* nope, copy forwards */
449	rep
450	movsb
451	popl	%edi
452	popl	%esi
453	ret
454
455	ALIGN_TEXT
4561:
457	addl	%ecx,%edi			/* copy backwards. */
458	addl	%ecx,%esi
459	decl	%edi
460	decl	%esi
461	std
462	rep
463	movsb
464	popl	%edi
465	popl	%esi
466	cld
467	ret
468
469ENTRY(bcopy)
470	MEXITCOUNT
471	jmp	*bcopy_vector
472
473/*
474 * generic_bcopy(src, dst, cnt)
475 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
476 */
477ENTRY(generic_bcopy)
478	pushl	%esi
479	pushl	%edi
480	movl	12(%esp),%esi
481	movl	16(%esp),%edi
482	movl	20(%esp),%ecx
483
484	movl	%edi,%eax
485	subl	%esi,%eax
486	cmpl	%ecx,%eax			/* overlapping && src < dst? */
487	jb	1f
488
489	shrl	$2,%ecx				/* copy by 32-bit words */
490	cld					/* nope, copy forwards */
491	rep
492	movsl
493	movl	20(%esp),%ecx
494	andl	$3,%ecx				/* any bytes left? */
495	rep
496	movsb
497	popl	%edi
498	popl	%esi
499	ret
500
501	ALIGN_TEXT
5021:
503	addl	%ecx,%edi			/* copy backwards */
504	addl	%ecx,%esi
505	decl	%edi
506	decl	%esi
507	andl	$3,%ecx				/* any fractional bytes? */
508	std
509	rep
510	movsb
511	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
512	shrl	$2,%ecx
513	subl	$3,%esi
514	subl	$3,%edi
515	rep
516	movsl
517	popl	%edi
518	popl	%esi
519	cld
520	ret
521
522#if defined(I586_CPU) && defined(DEV_NPX)
523ENTRY(i586_bcopy)
524	pushl	%esi
525	pushl	%edi
526	movl	12(%esp),%esi
527	movl	16(%esp),%edi
528	movl	20(%esp),%ecx
529
530	movl	%edi,%eax
531	subl	%esi,%eax
532	cmpl	%ecx,%eax			/* overlapping && src < dst? */
533	jb	1f
534
535	cmpl	$1024,%ecx
536	jb	small_i586_bcopy
537
538	sarb	$1,kernel_fpu_lock
539	jc	small_i586_bcopy
540	cmpl	$0,PCPU(FPCURTHREAD)
541	je	i586_bc1
542
543	/* XXX turn off handling of cases 1-2, as above. */
544	movb	$0xfe,kernel_fpu_lock
545	jmp	small_i586_bcopy
546
547	smsw	%dx
548	clts
549	subl	$108,%esp
550	fnsave	0(%esp)
551	jmp	4f
552
553i586_bc1:
554	smsw	%dx
555	clts
556	fninit				/* XXX should avoid needing this */
557
558	ALIGN_TEXT
5594:
560	pushl	%ecx
561#define	DCACHE_SIZE	8192
562	cmpl	$(DCACHE_SIZE-512)/2,%ecx
563	jbe	2f
564	movl	$(DCACHE_SIZE-512)/2,%ecx
5652:
566	subl	%ecx,0(%esp)
567	cmpl	$256,%ecx
568	jb	5f			/* XXX should prefetch if %ecx >= 32 */
569	pushl	%esi
570	pushl	%ecx
571	ALIGN_TEXT
5723:
573	movl	0(%esi),%eax
574	movl	32(%esi),%eax
575	movl	64(%esi),%eax
576	movl	96(%esi),%eax
577	movl	128(%esi),%eax
578	movl	160(%esi),%eax
579	movl	192(%esi),%eax
580	movl	224(%esi),%eax
581	addl	$256,%esi
582	subl	$256,%ecx
583	cmpl	$256,%ecx
584	jae	3b
585	popl	%ecx
586	popl	%esi
5875:
588	ALIGN_TEXT
589large_i586_bcopy_loop:
590	fildq	0(%esi)
591	fildq	8(%esi)
592	fildq	16(%esi)
593	fildq	24(%esi)
594	fildq	32(%esi)
595	fildq	40(%esi)
596	fildq	48(%esi)
597	fildq	56(%esi)
598	fistpq	56(%edi)
599	fistpq	48(%edi)
600	fistpq	40(%edi)
601	fistpq	32(%edi)
602	fistpq	24(%edi)
603	fistpq	16(%edi)
604	fistpq	8(%edi)
605	fistpq	0(%edi)
606	addl	$64,%esi
607	addl	$64,%edi
608	subl	$64,%ecx
609	cmpl	$64,%ecx
610	jae	large_i586_bcopy_loop
611	popl	%eax
612	addl	%eax,%ecx
613	cmpl	$64,%ecx
614	jae	4b
615
616	cmpl	$0,PCPU(FPCURTHREAD)
617	je	i586_bc2
618
619	/* XXX check that the condition for cases 1-2 stayed false. */
620i586_bcopy_oops:
621	int	$3
622	jmp	i586_bcopy_oops
623
624	frstor	0(%esp)
625	addl	$108,%esp
626i586_bc2:
627	lmsw	%dx
628	movb	$0xfe,kernel_fpu_lock
629
630/*
631 * This is a duplicate of the main part of generic_bcopy.  See the comments
632 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
633 * would mess up high resolution profiling.
634 */
635	ALIGN_TEXT
636small_i586_bcopy:
637	shrl	$2,%ecx
638	cld
639	rep
640	movsl
641	movl	20(%esp),%ecx
642	andl	$3,%ecx
643	rep
644	movsb
645	popl	%edi
646	popl	%esi
647	ret
648
649	ALIGN_TEXT
6501:
651	addl	%ecx,%edi
652	addl	%ecx,%esi
653	decl	%edi
654	decl	%esi
655	andl	$3,%ecx
656	std
657	rep
658	movsb
659	movl	20(%esp),%ecx
660	shrl	$2,%ecx
661	subl	$3,%esi
662	subl	$3,%edi
663	rep
664	movsl
665	popl	%edi
666	popl	%esi
667	cld
668	ret
669#endif /* I586_CPU && defined(DEV_NPX) */
670
671/*
672 * Note: memcpy does not support overlapping copies
673 */
674ENTRY(memcpy)
675	pushl	%edi
676	pushl	%esi
677	movl	12(%esp),%edi
678	movl	16(%esp),%esi
679	movl	20(%esp),%ecx
680	movl	%edi,%eax
681	shrl	$2,%ecx				/* copy by 32-bit words */
682	cld					/* nope, copy forwards */
683	rep
684	movsl
685	movl	20(%esp),%ecx
686	andl	$3,%ecx				/* any bytes left? */
687	rep
688	movsb
689	popl	%esi
690	popl	%edi
691	ret
692
693
694/*****************************************************************************/
695/* copyout and fubyte family                                                 */
696/*****************************************************************************/
697/*
698 * Access user memory from inside the kernel. These routines and possibly
699 * the math- and DOS emulators should be the only places that do this.
700 *
701 * We have to access the memory with user's permissions, so use a segment
702 * selector with RPL 3. For writes to user space we have to additionally
703 * check the PTE for write permission, because the 386 does not check
704 * write permissions when we are executing with EPL 0. The 486 does check
705 * this if the WP bit is set in CR0, so we can use a simpler version here.
706 *
707 * These routines set curpcb->onfault for the time they execute. When a
708 * protection violation occurs inside the functions, the trap handler
709 * returns to *curpcb->onfault instead of the function.
710 */
711
712/*
713 * copyout(from_kernel, to_user, len)  - MP SAFE
714 */
715ENTRY(copyout)
716	MEXITCOUNT
717	jmp	*copyout_vector
718
719ENTRY(generic_copyout)
720	movl	PCPU(CURPCB),%eax
721	movl	$copyout_fault,PCB_ONFAULT(%eax)
722	pushl	%esi
723	pushl	%edi
724	pushl	%ebx
725	movl	16(%esp),%esi
726	movl	20(%esp),%edi
727	movl	24(%esp),%ebx
728	testl	%ebx,%ebx			/* anything to do? */
729	jz	done_copyout
730
731	/*
732	 * Check explicitly for non-user addresses.  If 486 write protection
733	 * is being used, this check is essential because we are in kernel
734	 * mode so the h/w does not provide any protection against writing
735	 * kernel addresses.
736	 */
737
738	/*
739	 * First, prevent address wrapping.
740	 */
741	movl	%edi,%eax
742	addl	%ebx,%eax
743	jc	copyout_fault
744/*
745 * XXX STOP USING VM_MAXUSER_ADDRESS.
746 * It is an end address, not a max, so every time it is used correctly it
747 * looks like there is an off by one error, and of course it caused an off
748 * by one error in several places.
749 */
750	cmpl	$VM_MAXUSER_ADDRESS,%eax
751	ja	copyout_fault
752
753	/* bcopy(%esi, %edi, %ebx) */
754	movl	%ebx,%ecx
755
756#if defined(I586_CPU) && defined(DEV_NPX)
757	ALIGN_TEXT
758slow_copyout:
759#endif
760	shrl	$2,%ecx
761	cld
762	rep
763	movsl
764	movb	%bl,%cl
765	andb	$3,%cl
766	rep
767	movsb
768
769done_copyout:
770	popl	%ebx
771	popl	%edi
772	popl	%esi
773	xorl	%eax,%eax
774	movl	PCPU(CURPCB),%edx
775	movl	%eax,PCB_ONFAULT(%edx)
776	ret
777
778	ALIGN_TEXT
779copyout_fault:
780	popl	%ebx
781	popl	%edi
782	popl	%esi
783	movl	PCPU(CURPCB),%edx
784	movl	$0,PCB_ONFAULT(%edx)
785	movl	$EFAULT,%eax
786	ret
787
788#if defined(I586_CPU) && defined(DEV_NPX)
789ENTRY(i586_copyout)
790	/*
791	 * Duplicated from generic_copyout.  Could be done a bit better.
792	 */
793	movl	PCPU(CURPCB),%eax
794	movl	$copyout_fault,PCB_ONFAULT(%eax)
795	pushl	%esi
796	pushl	%edi
797	pushl	%ebx
798	movl	16(%esp),%esi
799	movl	20(%esp),%edi
800	movl	24(%esp),%ebx
801	testl	%ebx,%ebx			/* anything to do? */
802	jz	done_copyout
803
804	/*
805	 * Check explicitly for non-user addresses.  If 486 write protection
806	 * is being used, this check is essential because we are in kernel
807	 * mode so the h/w does not provide any protection against writing
808	 * kernel addresses.
809	 */
810
811	/*
812	 * First, prevent address wrapping.
813	 */
814	movl	%edi,%eax
815	addl	%ebx,%eax
816	jc	copyout_fault
817/*
818 * XXX STOP USING VM_MAXUSER_ADDRESS.
819 * It is an end address, not a max, so every time it is used correctly it
820 * looks like there is an off by one error, and of course it caused an off
821 * by one error in several places.
822 */
823	cmpl	$VM_MAXUSER_ADDRESS,%eax
824	ja	copyout_fault
825
826	/* bcopy(%esi, %edi, %ebx) */
8273:
828	movl	%ebx,%ecx
829	/*
830	 * End of duplicated code.
831	 */
832
833	cmpl	$1024,%ecx
834	jb	slow_copyout
835
836	pushl	%ecx
837	call	fastmove
838	addl	$4,%esp
839	jmp	done_copyout
840#endif /* I586_CPU && defined(DEV_NPX) */
841
842/*
843 * copyin(from_user, to_kernel, len) - MP SAFE
844 */
845ENTRY(copyin)
846	MEXITCOUNT
847	jmp	*copyin_vector
848
849ENTRY(generic_copyin)
850	movl	PCPU(CURPCB),%eax
851	movl	$copyin_fault,PCB_ONFAULT(%eax)
852	pushl	%esi
853	pushl	%edi
854	movl	12(%esp),%esi			/* caddr_t from */
855	movl	16(%esp),%edi			/* caddr_t to */
856	movl	20(%esp),%ecx			/* size_t  len */
857
858	/*
859	 * make sure address is valid
860	 */
861	movl	%esi,%edx
862	addl	%ecx,%edx
863	jc	copyin_fault
864	cmpl	$VM_MAXUSER_ADDRESS,%edx
865	ja	copyin_fault
866
867#if defined(I586_CPU) && defined(DEV_NPX)
868	ALIGN_TEXT
869slow_copyin:
870#endif
871	movb	%cl,%al
872	shrl	$2,%ecx				/* copy longword-wise */
873	cld
874	rep
875	movsl
876	movb	%al,%cl
877	andb	$3,%cl				/* copy remaining bytes */
878	rep
879	movsb
880
881#if defined(I586_CPU) && defined(DEV_NPX)
882	ALIGN_TEXT
883done_copyin:
884#endif
885	popl	%edi
886	popl	%esi
887	xorl	%eax,%eax
888	movl	PCPU(CURPCB),%edx
889	movl	%eax,PCB_ONFAULT(%edx)
890	ret
891
892	ALIGN_TEXT
893copyin_fault:
894	popl	%edi
895	popl	%esi
896	movl	PCPU(CURPCB),%edx
897	movl	$0,PCB_ONFAULT(%edx)
898	movl	$EFAULT,%eax
899	ret
900
901#if defined(I586_CPU) && defined(DEV_NPX)
902ENTRY(i586_copyin)
903	/*
904	 * Duplicated from generic_copyin.  Could be done a bit better.
905	 */
906	movl	PCPU(CURPCB),%eax
907	movl	$copyin_fault,PCB_ONFAULT(%eax)
908	pushl	%esi
909	pushl	%edi
910	movl	12(%esp),%esi			/* caddr_t from */
911	movl	16(%esp),%edi			/* caddr_t to */
912	movl	20(%esp),%ecx			/* size_t  len */
913
914	/*
915	 * make sure address is valid
916	 */
917	movl	%esi,%edx
918	addl	%ecx,%edx
919	jc	copyin_fault
920	cmpl	$VM_MAXUSER_ADDRESS,%edx
921	ja	copyin_fault
922	/*
923	 * End of duplicated code.
924	 */
925
926	cmpl	$1024,%ecx
927	jb	slow_copyin
928
929	pushl	%ebx			/* XXX prepare for fastmove_fault */
930	pushl	%ecx
931	call	fastmove
932	addl	$8,%esp
933	jmp	done_copyin
934#endif /* I586_CPU && defined(DEV_NPX) */
935
936#if defined(I586_CPU) && defined(DEV_NPX)
937/* fastmove(src, dst, len)
938	src in %esi
939	dst in %edi
940	len in %ecx		XXX changed to on stack for profiling
941	uses %eax and %edx for tmp. storage
942 */
943/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
944ENTRY(fastmove)
945	pushl	%ebp
946	movl	%esp,%ebp
947	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
948
949	movl	8(%ebp),%ecx
950	cmpl	$63,%ecx
951	jbe	fastmove_tail
952
953	testl	$7,%esi	/* check if src addr is multiple of 8 */
954	jnz	fastmove_tail
955
956	testl	$7,%edi	/* check if dst addr is multiple of 8 */
957	jnz	fastmove_tail
958
959	/* XXX grab FPU context atomically. */
960	cli
961
962/* if (fpcurthread != NULL) { */
963	cmpl	$0,PCPU(FPCURTHREAD)
964	je	6f
965/*    fnsave(&curpcb->pcb_savefpu); */
966	movl	PCPU(CURPCB),%eax
967	fnsave	PCB_SAVEFPU(%eax)
968/*   FPCURTHREAD = NULL; */
969	movl	$0,PCPU(FPCURTHREAD)
970/* } */
9716:
972/* now we own the FPU. */
973
974/*
975 * The process' FP state is saved in the pcb, but if we get
976 * switched, the cpu_switch() will store our FP state in the
977 * pcb.  It should be possible to avoid all the copying for
978 * this, e.g., by setting a flag to tell cpu_switch() to
979 * save the state somewhere else.
980 */
981/* tmp = curpcb->pcb_savefpu; */
982	movl	%ecx,-12(%ebp)
983	movl	%esi,-8(%ebp)
984	movl	%edi,-4(%ebp)
985	movl	%esp,%edi
986	movl	PCPU(CURPCB),%esi
987	addl	$PCB_SAVEFPU,%esi
988	cld
989	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
990	rep
991	movsl
992	movl	-12(%ebp),%ecx
993	movl	-8(%ebp),%esi
994	movl	-4(%ebp),%edi
995/* stop_emulating(); */
996	clts
997/* fpcurthread = curthread; */
998	movl	PCPU(CURTHREAD),%eax
999	movl	%eax,PCPU(FPCURTHREAD)
1000	movl	PCPU(CURPCB),%eax
1001
1002	/* XXX end of atomic FPU context grab. */
1003	sti
1004
1005	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10064:
1007	movl	%ecx,-12(%ebp)
1008	cmpl	$1792,%ecx
1009	jbe	2f
1010	movl	$1792,%ecx
10112:
1012	subl	%ecx,-12(%ebp)
1013	cmpl	$256,%ecx
1014	jb	5f
1015	movl	%ecx,-8(%ebp)
1016	movl	%esi,-4(%ebp)
1017	ALIGN_TEXT
10183:
1019	movl	0(%esi),%eax
1020	movl	32(%esi),%eax
1021	movl	64(%esi),%eax
1022	movl	96(%esi),%eax
1023	movl	128(%esi),%eax
1024	movl	160(%esi),%eax
1025	movl	192(%esi),%eax
1026	movl	224(%esi),%eax
1027	addl	$256,%esi
1028	subl	$256,%ecx
1029	cmpl	$256,%ecx
1030	jae	3b
1031	movl	-8(%ebp),%ecx
1032	movl	-4(%ebp),%esi
10335:
1034	ALIGN_TEXT
1035fastmove_loop:
1036	fildq	0(%esi)
1037	fildq	8(%esi)
1038	fildq	16(%esi)
1039	fildq	24(%esi)
1040	fildq	32(%esi)
1041	fildq	40(%esi)
1042	fildq	48(%esi)
1043	fildq	56(%esi)
1044	fistpq	56(%edi)
1045	fistpq	48(%edi)
1046	fistpq	40(%edi)
1047	fistpq	32(%edi)
1048	fistpq	24(%edi)
1049	fistpq	16(%edi)
1050	fistpq	8(%edi)
1051	fistpq	0(%edi)
1052	addl	$-64,%ecx
1053	addl	$64,%esi
1054	addl	$64,%edi
1055	cmpl	$63,%ecx
1056	ja	fastmove_loop
1057	movl	-12(%ebp),%eax
1058	addl	%eax,%ecx
1059	cmpl	$64,%ecx
1060	jae	4b
1061
1062	/* XXX ungrab FPU context atomically. */
1063	cli
1064
1065/* curpcb->pcb_savefpu = tmp; */
1066	movl	%ecx,-12(%ebp)
1067	movl	%esi,-8(%ebp)
1068	movl	%edi,-4(%ebp)
1069	movl	PCPU(CURPCB),%edi
1070	addl	$PCB_SAVEFPU,%edi
1071	movl	%esp,%esi
1072	cld
1073	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1074	rep
1075	movsl
1076	movl	-12(%ebp),%ecx
1077	movl	-8(%ebp),%esi
1078	movl	-4(%ebp),%edi
1079
1080/* start_emulating(); */
1081	smsw	%ax
1082	orb	$CR0_TS,%al
1083	lmsw	%ax
1084/* fpcurthread = NULL; */
1085	movl	$0,PCPU(FPCURTHREAD)
1086
1087	/* XXX end of atomic FPU context ungrab. */
1088	sti
1089
1090	ALIGN_TEXT
1091fastmove_tail:
1092	movl	PCPU(CURPCB),%eax
1093	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1094
1095	movb	%cl,%al
1096	shrl	$2,%ecx				/* copy longword-wise */
1097	cld
1098	rep
1099	movsl
1100	movb	%al,%cl
1101	andb	$3,%cl				/* copy remaining bytes */
1102	rep
1103	movsb
1104
1105	movl	%ebp,%esp
1106	popl	%ebp
1107	ret
1108
1109	ALIGN_TEXT
1110fastmove_fault:
1111	/* XXX ungrab FPU context atomically. */
1112	cli
1113
1114	movl	PCPU(CURPCB),%edi
1115	addl	$PCB_SAVEFPU,%edi
1116	movl	%esp,%esi
1117	cld
1118	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1119	rep
1120	movsl
1121
1122	smsw	%ax
1123	orb	$CR0_TS,%al
1124	lmsw	%ax
1125	movl	$0,PCPU(FPCURTHREAD)
1126
1127	/* XXX end of atomic FPU context ungrab. */
1128	sti
1129
1130fastmove_tail_fault:
1131	movl	%ebp,%esp
1132	popl	%ebp
1133	addl	$8,%esp
1134	popl	%ebx
1135	popl	%edi
1136	popl	%esi
1137	movl	PCPU(CURPCB),%edx
1138	movl	$0,PCB_ONFAULT(%edx)
1139	movl	$EFAULT,%eax
1140	ret
1141#endif /* I586_CPU && defined(DEV_NPX) */
1142
1143/*
1144 * casuptr.  Compare and set user pointer.  Returns -1 or the current value.
1145 */
1146ENTRY(casuptr)
1147	movl	PCPU(CURPCB),%ecx
1148	movl	$fusufault,PCB_ONFAULT(%ecx)
1149	movl	4(%esp),%edx			/* dst */
1150	movl	8(%esp),%eax			/* old */
1151	movl	12(%esp),%ecx			/* new */
1152
1153	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1154	ja	fusufault
1155
1156#ifdef SMP
1157	lock
1158#endif
1159	cmpxchgl %ecx, (%edx)			/* Compare and set. */
1160
1161	/*
1162	 * The old value is in %eax.  If the store succeeded it will be the
1163	 * value we expected (old) from before the store, otherwise it will
1164	 * be the current value.
1165	 */
1166
1167	movl	PCPU(CURPCB),%ecx
1168	movl	$fusufault,PCB_ONFAULT(%ecx)
1169	movl	$0,PCB_ONFAULT(%ecx)
1170	ret
1171
1172/*
1173 * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
1174 * memory.  All these functions are MPSAFE.
1175 */
1176
1177ALTENTRY(fuword32)
1178ENTRY(fuword)
1179	movl	PCPU(CURPCB),%ecx
1180	movl	$fusufault,PCB_ONFAULT(%ecx)
1181	movl	4(%esp),%edx			/* from */
1182
1183	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1184	ja	fusufault
1185
1186	movl	(%edx),%eax
1187	movl	$0,PCB_ONFAULT(%ecx)
1188	ret
1189
1190/*
1191 * fuswintr() and suswintr() are specialized variants of fuword16() and
1192 * suword16(), respectively.  They are called from the profiling code,
1193 * potentially at interrupt time.  If they fail, that's okay; good things
1194 * will happen later.  They always fail for now, until the trap code is
1195 * able to deal with this.
1196 */
1197ALTENTRY(suswintr)
1198ENTRY(fuswintr)
1199	movl	$-1,%eax
1200	ret
1201
1202ENTRY(fuword16)
1203	movl	PCPU(CURPCB),%ecx
1204	movl	$fusufault,PCB_ONFAULT(%ecx)
1205	movl	4(%esp),%edx
1206
1207	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1208	ja	fusufault
1209
1210	movzwl	(%edx),%eax
1211	movl	$0,PCB_ONFAULT(%ecx)
1212	ret
1213
1214ENTRY(fubyte)
1215	movl	PCPU(CURPCB),%ecx
1216	movl	$fusufault,PCB_ONFAULT(%ecx)
1217	movl	4(%esp),%edx
1218
1219	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1220	ja	fusufault
1221
1222	movzbl	(%edx),%eax
1223	movl	$0,PCB_ONFAULT(%ecx)
1224	ret
1225
1226	ALIGN_TEXT
1227fusufault:
1228	movl	PCPU(CURPCB),%ecx
1229	xorl	%eax,%eax
1230	movl	%eax,PCB_ONFAULT(%ecx)
1231	decl	%eax
1232	ret
1233
1234/*
1235 * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory.
1236 * All these functions are MPSAFE.
1237 */
1238
1239ALTENTRY(suword32)
1240ENTRY(suword)
1241	movl	PCPU(CURPCB),%ecx
1242	movl	$fusufault,PCB_ONFAULT(%ecx)
1243	movl	4(%esp),%edx
1244
1245	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1246	ja	fusufault
1247
1248	movl	8(%esp),%eax
1249	movl	%eax,(%edx)
1250	xorl	%eax,%eax
1251	movl	PCPU(CURPCB),%ecx
1252	movl	%eax,PCB_ONFAULT(%ecx)
1253	ret
1254
1255ENTRY(suword16)
1256	movl	PCPU(CURPCB),%ecx
1257	movl	$fusufault,PCB_ONFAULT(%ecx)
1258	movl	4(%esp),%edx
1259
1260	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1261	ja	fusufault
1262
1263	movw	8(%esp),%ax
1264	movw	%ax,(%edx)
1265	xorl	%eax,%eax
1266	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1267	movl	%eax,PCB_ONFAULT(%ecx)
1268	ret
1269
1270ENTRY(subyte)
1271	movl	PCPU(CURPCB),%ecx
1272	movl	$fusufault,PCB_ONFAULT(%ecx)
1273	movl	4(%esp),%edx
1274
1275	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1276	ja	fusufault
1277
1278	movb	8(%esp),%al
1279	movb	%al,(%edx)
1280	xorl	%eax,%eax
1281	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1282	movl	%eax,PCB_ONFAULT(%ecx)
1283	ret
1284
1285/*
1286 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
1287 *
1288 *	copy a string from from to to, stop when a 0 character is reached.
1289 *	return ENAMETOOLONG if string is longer than maxlen, and
1290 *	EFAULT on protection violations. If lencopied is non-zero,
1291 *	return the actual length in *lencopied.
1292 */
1293ENTRY(copyinstr)
1294	pushl	%esi
1295	pushl	%edi
1296	movl	PCPU(CURPCB),%ecx
1297	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1298
1299	movl	12(%esp),%esi			/* %esi = from */
1300	movl	16(%esp),%edi			/* %edi = to */
1301	movl	20(%esp),%edx			/* %edx = maxlen */
1302
1303	movl	$VM_MAXUSER_ADDRESS,%eax
1304
1305	/* make sure 'from' is within bounds */
1306	subl	%esi,%eax
1307	jbe	cpystrflt
1308
1309	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1310	cmpl	%edx,%eax
1311	jae	1f
1312	movl	%eax,%edx
1313	movl	%eax,20(%esp)
13141:
1315	incl	%edx
1316	cld
1317
13182:
1319	decl	%edx
1320	jz	3f
1321
1322	lodsb
1323	stosb
1324	orb	%al,%al
1325	jnz	2b
1326
1327	/* Success -- 0 byte reached */
1328	decl	%edx
1329	xorl	%eax,%eax
1330	jmp	cpystrflt_x
13313:
1332	/* edx is zero - return ENAMETOOLONG or EFAULT */
1333	cmpl	$VM_MAXUSER_ADDRESS,%esi
1334	jae	cpystrflt
13354:
1336	movl	$ENAMETOOLONG,%eax
1337	jmp	cpystrflt_x
1338
1339cpystrflt:
1340	movl	$EFAULT,%eax
1341
1342cpystrflt_x:
1343	/* set *lencopied and return %eax */
1344	movl	PCPU(CURPCB),%ecx
1345	movl	$0,PCB_ONFAULT(%ecx)
1346	movl	20(%esp),%ecx
1347	subl	%edx,%ecx
1348	movl	24(%esp),%edx
1349	testl	%edx,%edx
1350	jz	1f
1351	movl	%ecx,(%edx)
13521:
1353	popl	%edi
1354	popl	%esi
1355	ret
1356
1357
1358/*
1359 * copystr(from, to, maxlen, int *lencopied) - MP SAFE
1360 */
1361ENTRY(copystr)
1362	pushl	%esi
1363	pushl	%edi
1364
1365	movl	12(%esp),%esi			/* %esi = from */
1366	movl	16(%esp),%edi			/* %edi = to */
1367	movl	20(%esp),%edx			/* %edx = maxlen */
1368	incl	%edx
1369	cld
13701:
1371	decl	%edx
1372	jz	4f
1373	lodsb
1374	stosb
1375	orb	%al,%al
1376	jnz	1b
1377
1378	/* Success -- 0 byte reached */
1379	decl	%edx
1380	xorl	%eax,%eax
1381	jmp	6f
13824:
1383	/* edx is zero -- return ENAMETOOLONG */
1384	movl	$ENAMETOOLONG,%eax
1385
13866:
1387	/* set *lencopied and return %eax */
1388	movl	20(%esp),%ecx
1389	subl	%edx,%ecx
1390	movl	24(%esp),%edx
1391	testl	%edx,%edx
1392	jz	7f
1393	movl	%ecx,(%edx)
13947:
1395	popl	%edi
1396	popl	%esi
1397	ret
1398
1399ENTRY(bcmp)
1400	pushl	%edi
1401	pushl	%esi
1402	movl	12(%esp),%edi
1403	movl	16(%esp),%esi
1404	movl	20(%esp),%edx
1405
1406	movl	%edx,%ecx
1407	shrl	$2,%ecx
1408	cld					/* compare forwards */
1409	repe
1410	cmpsl
1411	jne	1f
1412
1413	movl	%edx,%ecx
1414	andl	$3,%ecx
1415	repe
1416	cmpsb
14171:
1418	setne	%al
1419	movsbl	%al,%eax
1420	popl	%esi
1421	popl	%edi
1422	ret
1423
1424
1425/*
1426 * Handling of special 386 registers and descriptor tables etc
1427 */
1428/* void lgdt(struct region_descriptor *rdp); */
1429ENTRY(lgdt)
1430	/* reload the descriptor table */
1431	movl	4(%esp),%eax
1432	lgdt	(%eax)
1433
1434	/* flush the prefetch q */
1435	jmp	1f
1436	nop
14371:
1438	/* reload "stale" selectors */
1439	movl	$KDSEL,%eax
1440	movl	%eax,%ds
1441	movl	%eax,%es
1442	movl	%eax,%gs
1443	movl	%eax,%ss
1444	movl	$KPSEL,%eax
1445	movl	%eax,%fs
1446
1447	/* reload code selector by turning return into intersegmental return */
1448	movl	(%esp),%eax
1449	pushl	%eax
1450	movl	$KCSEL,4(%esp)
1451	MEXITCOUNT
1452	lret
1453
1454/* ssdtosd(*ssdp,*sdp) */
1455ENTRY(ssdtosd)
1456	pushl	%ebx
1457	movl	8(%esp),%ecx
1458	movl	8(%ecx),%ebx
1459	shll	$16,%ebx
1460	movl	(%ecx),%edx
1461	roll	$16,%edx
1462	movb	%dh,%bl
1463	movb	%dl,%bh
1464	rorl	$8,%ebx
1465	movl	4(%ecx),%eax
1466	movw	%ax,%dx
1467	andl	$0xf0000,%eax
1468	orl	%eax,%ebx
1469	movl	12(%esp),%ecx
1470	movl	%edx,(%ecx)
1471	movl	%ebx,4(%ecx)
1472	popl	%ebx
1473	ret
1474
1475/* void reset_dbregs() */
1476ENTRY(reset_dbregs)
1477	movl    $0,%eax
1478	movl    %eax,%dr7     /* disable all breapoints first */
1479	movl    %eax,%dr0
1480	movl    %eax,%dr1
1481	movl    %eax,%dr2
1482	movl    %eax,%dr3
1483	movl    %eax,%dr6
1484	ret
1485
1486/*****************************************************************************/
1487/* setjump, longjump                                                         */
1488/*****************************************************************************/
1489
1490ENTRY(setjmp)
1491	movl	4(%esp),%eax
1492	movl	%ebx,(%eax)			/* save ebx */
1493	movl	%esp,4(%eax)			/* save esp */
1494	movl	%ebp,8(%eax)			/* save ebp */
1495	movl	%esi,12(%eax)			/* save esi */
1496	movl	%edi,16(%eax)			/* save edi */
1497	movl	(%esp),%edx			/* get rta */
1498	movl	%edx,20(%eax)			/* save eip */
1499	xorl	%eax,%eax			/* return(0); */
1500	ret
1501
1502ENTRY(longjmp)
1503	movl	4(%esp),%eax
1504	movl	(%eax),%ebx			/* restore ebx */
1505	movl	4(%eax),%esp			/* restore esp */
1506	movl	8(%eax),%ebp			/* restore ebp */
1507	movl	12(%eax),%esi			/* restore esi */
1508	movl	16(%eax),%edi			/* restore edi */
1509	movl	20(%eax),%edx			/* get rta */
1510	movl	%edx,(%esp)			/* put in return frame */
1511	xorl	%eax,%eax			/* return(1); */
1512	incl	%eax
1513	ret
1514
1515/*
1516 * Support for BB-profiling (gcc -a).  The kernbb program will extract
1517 * the data from the kernel.
1518 */
1519
1520	.data
1521	ALIGN_DATA
1522	.globl bbhead
1523bbhead:
1524	.long 0
1525
1526#if defined(SMP) || !defined(_KERNEL)
1527#define MPLOCKED        lock ;
1528#else
1529#define MPLOCKED
1530#endif
1531
1532	.text
1533NON_GPROF_ENTRY(__bb_init_func)
1534	movl	4(%esp),%eax
1535	movl	$1,(%eax)
1536	movl	bbhead,%edx
1537	movl	%edx,16(%eax)
1538	movl	%eax,bbhead
1539	NON_GPROF_RET
1540
1541/* necessary for linux_futex support */
1542	.text
1543
1544futex_fault:
1545	movl	PCPU(CURPCB), %edx
1546	movl	$0, PCB_ONFAULT(%edx)
1547	movl	$-EFAULT, %eax
1548	ret
1549
1550/* int futex_xchgl(int oparg, caddr_t uaddr, int *oldval); */
1551	.globl	futex_xchgl
1552futex_xchgl:
1553	movl	PCPU(CURPCB), %eax
1554	movl	$futex_fault, PCB_ONFAULT(%eax)
1555	movl	4(%esp), %eax
1556	movl	8(%esp), %edx
1557	cmpl    $VM_MAXUSER_ADDRESS,%edx
1558	ja     	futex_fault
1559
1560	MPLOCKED xchgl	%eax, (%edx)
1561	movl	0xc(%esp), %edx
1562	movl	%eax, (%edx)
1563	xorl	%eax, %eax
1564
1565	movl	PCPU(CURPCB), %edx
1566	movl	$0, PCB_ONFAULT(%edx)
1567	ret
1568
1569/* int futex_addl(int oparg, caddr_t uaddr, int *oldval); */
1570	.globl	futex_addl
1571futex_addl:
1572	movl	PCPU(CURPCB), %eax
1573	movl	$futex_fault, PCB_ONFAULT(%eax)
1574	movl	4(%esp), %eax
1575	movl	8(%esp), %edx
1576	cmpl    $VM_MAXUSER_ADDRESS,%edx
1577	ja     	futex_fault
1578
1579	MPLOCKED xaddl	%eax, (%edx)
1580	movl	0xc(%esp), %edx
1581	movl	%eax, (%edx)
1582	xorl	%eax, %eax
1583
1584	movl	PCPU(CURPCB), %edx
1585	movl	$0, PCB_ONFAULT(%edx)
1586	ret
1587
1588/* int futex_orl(int oparg, caddr_t uaddr, int *oldval); */
1589	.globl	futex_orl
1590futex_orl:
1591	movl	PCPU(CURPCB), %eax
1592	movl	$futex_fault, PCB_ONFAULT(%eax)
1593	movl	4(%esp), %eax
1594	movl	8(%esp), %edx
1595	cmpl    $VM_MAXUSER_ADDRESS,%edx
1596	ja     	futex_fault
1597
1598	MPLOCKED orl 	%eax, (%edx)
1599	movl	0xc(%esp), %edx
1600	movl	%eax, (%edx)
1601	xorl	%eax, %eax
1602
1603	movl	PCPU(CURPCB), %edx
1604	movl	$0, PCB_ONFAULT(%edx)
1605	ret
1606
1607/* int futex_andnl(int oparg, caddr_t uaddr, int *oldval); */
1608	.globl	futex_andnl
1609futex_andnl:
1610	movl	PCPU(CURPCB), %eax
1611	movl	$futex_fault, PCB_ONFAULT(%eax)
1612	movl	4(%esp), %eax
1613	movl	8(%esp), %edx
1614	cmpl    $VM_MAXUSER_ADDRESS,%edx
1615	ja     	futex_fault
1616
1617	notl	(%edx)
1618	MPLOCKED andl 	%eax, (%edx)
1619	movl	0xc(%esp), %edx
1620	movl	%eax, (%edx)
1621	xorl	%eax, %eax
1622
1623	movl	PCPU(CURPCB), %edx
1624	movl	$0, PCB_ONFAULT(%edx)
1625	ret
1626
1627/* int futex_xorl(int oparg, caddr_t uaddr, int *oldval); */
1628	.globl	futex_xorl
1629futex_xorl:
1630	movl	PCPU(CURPCB), %eax
1631	movl	$futex_fault, PCB_ONFAULT(%eax)
1632	movl	4(%esp), %eax
1633	movl	8(%esp), %edx
1634	cmpl    $VM_MAXUSER_ADDRESS,%edx
1635	ja     	futex_fault
1636
1637	MPLOCKED xorl 	%eax, (%edx)
1638	movl	0xc(%esp), %edx
1639	movl	%eax, (%edx)
1640	xorl	%eax, %eax
1641
1642	movl	PCPU(CURPCB), %edx
1643	movl	$0, PCB_ONFAULT(%edx)
1644	ret
1645