support.s revision 161303
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/i386/i386/support.s 161303 2006-08-15 12:14:36Z netchild $
30 */
31
32#include "opt_npx.h"
33
34#include <machine/asmacros.h>
35#include <machine/cputypes.h>
36#include <machine/intr_machdep.h>
37#include <machine/pmap.h>
38#include <machine/specialreg.h>
39
40#include "assym.s"
41
42#define IDXSHIFT	10
43
44	.data
45	.globl	bcopy_vector
46bcopy_vector:
47	.long	generic_bcopy
48	.globl	bzero_vector
49bzero_vector:
50	.long	generic_bzero
51	.globl	copyin_vector
52copyin_vector:
53	.long	generic_copyin
54	.globl	copyout_vector
55copyout_vector:
56	.long	generic_copyout
57#if defined(I586_CPU) && defined(DEV_NPX)
58kernel_fpu_lock:
59	.byte	0xfe
60	.space	3
61#endif
62	ALIGN_DATA
63	.globl	intrcnt, eintrcnt
64intrcnt:
65	.space	INTRCNT_COUNT * 4
66eintrcnt:
67
68	.globl	intrnames, eintrnames
69intrnames:
70	.space	INTRCNT_COUNT * (MAXCOMLEN + 1)
71eintrnames:
72
73	.text
74
75/*
76 * bcopy family
77 * void bzero(void *buf, u_int len)
78 */
79
80ENTRY(bzero)
81	MEXITCOUNT
82	jmp	*bzero_vector
83
84ENTRY(generic_bzero)
85	pushl	%edi
86	movl	8(%esp),%edi
87	movl	12(%esp),%ecx
88	xorl	%eax,%eax
89	shrl	$2,%ecx
90	cld
91	rep
92	stosl
93	movl	12(%esp),%ecx
94	andl	$3,%ecx
95	rep
96	stosb
97	popl	%edi
98	ret
99
100#ifdef I486_CPU
101ENTRY(i486_bzero)
102	movl	4(%esp),%edx
103	movl	8(%esp),%ecx
104	xorl	%eax,%eax
105/*
106 * do 64 byte chunks first
107 *
108 * XXX this is probably over-unrolled at least for DX2's
109 */
1102:
111	cmpl	$64,%ecx
112	jb	3f
113	movl	%eax,(%edx)
114	movl	%eax,4(%edx)
115	movl	%eax,8(%edx)
116	movl	%eax,12(%edx)
117	movl	%eax,16(%edx)
118	movl	%eax,20(%edx)
119	movl	%eax,24(%edx)
120	movl	%eax,28(%edx)
121	movl	%eax,32(%edx)
122	movl	%eax,36(%edx)
123	movl	%eax,40(%edx)
124	movl	%eax,44(%edx)
125	movl	%eax,48(%edx)
126	movl	%eax,52(%edx)
127	movl	%eax,56(%edx)
128	movl	%eax,60(%edx)
129	addl	$64,%edx
130	subl	$64,%ecx
131	jnz	2b
132	ret
133
134/*
135 * do 16 byte chunks
136 */
137	SUPERALIGN_TEXT
1383:
139	cmpl	$16,%ecx
140	jb	4f
141	movl	%eax,(%edx)
142	movl	%eax,4(%edx)
143	movl	%eax,8(%edx)
144	movl	%eax,12(%edx)
145	addl	$16,%edx
146	subl	$16,%ecx
147	jnz	3b
148	ret
149
150/*
151 * do 4 byte chunks
152 */
153	SUPERALIGN_TEXT
1544:
155	cmpl	$4,%ecx
156	jb	5f
157	movl	%eax,(%edx)
158	addl	$4,%edx
159	subl	$4,%ecx
160	jnz	4b
161	ret
162
163/*
164 * do 1 byte chunks
165 * a jump table seems to be faster than a loop or more range reductions
166 *
167 * XXX need a const section for non-text
168 */
169	.data
170jtab:
171	.long	do0
172	.long	do1
173	.long	do2
174	.long	do3
175
176	.text
177	SUPERALIGN_TEXT
1785:
179	jmp	*jtab(,%ecx,4)
180
181	SUPERALIGN_TEXT
182do3:
183	movw	%ax,(%edx)
184	movb	%al,2(%edx)
185	ret
186
187	SUPERALIGN_TEXT
188do2:
189	movw	%ax,(%edx)
190	ret
191
192	SUPERALIGN_TEXT
193do1:
194	movb	%al,(%edx)
195	ret
196
197	SUPERALIGN_TEXT
198do0:
199	ret
200#endif
201
202#if defined(I586_CPU) && defined(DEV_NPX)
203ENTRY(i586_bzero)
204	movl	4(%esp),%edx
205	movl	8(%esp),%ecx
206
207	/*
208	 * The FPU register method is twice as fast as the integer register
209	 * method unless the target is in the L1 cache and we pre-allocate a
210	 * cache line for it (then the integer register method is 4-5 times
211	 * faster).  However, we never pre-allocate cache lines, since that
212	 * would make the integer method 25% or more slower for the common
213	 * case when the target isn't in either the L1 cache or the L2 cache.
214	 * Thus we normally use the FPU register method unless the overhead
215	 * would be too large.
216	 */
217	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
218	jb	intreg_i586_bzero
219
220	/*
221	 * The FPU registers may belong to an application or to fastmove()
222	 * or to another invocation of bcopy() or ourself in a higher level
223	 * interrupt or trap handler.  Preserving the registers is
224	 * complicated since we avoid it if possible at all levels.  We
225	 * want to localize the complications even when that increases them.
226	 * Here the extra work involves preserving CR0_TS in TS.
227	 * `fpcurthread != NULL' is supposed to be the condition that all the
228	 * FPU resources belong to an application, but fpcurthread and CR0_TS
229	 * aren't set atomically enough for this condition to work in
230	 * interrupt handlers.
231	 *
232	 * Case 1: FPU registers belong to the application: we must preserve
233	 * the registers if we use them, so we only use the FPU register
234	 * method if the target size is large enough to amortize the extra
235	 * overhead for preserving them.  CR0_TS must be preserved although
236	 * it is very likely to end up as set.
237	 *
238	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
239	 * makes the registers look like they belong to an application so
240	 * that cpu_switch() and savectx() don't have to know about it, so
241	 * this case reduces to case 1.
242	 *
243	 * Case 3: FPU registers belong to the kernel: don't use the FPU
244	 * register method.  This case is unlikely, and supporting it would
245	 * be more complicated and might take too much stack.
246	 *
247	 * Case 4: FPU registers don't belong to anyone: the FPU registers
248	 * don't need to be preserved, so we always use the FPU register
249	 * method.  CR0_TS must be preserved although it is very likely to
250	 * always end up as clear.
251	 */
252	cmpl	$0,PCPU(FPCURTHREAD)
253	je	i586_bz1
254
255	/*
256	 * XXX don't use the FPU for cases 1 and 2, since preemptive
257	 * scheduling of ithreads broke these cases.  Note that we can
258	 * no longer get here from an interrupt handler, since the
259	 * context sitch to the interrupt handler will have saved the
260	 * FPU state.
261	 */
262	jmp	intreg_i586_bzero
263
264	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
265	jb	intreg_i586_bzero
266	sarb	$1,kernel_fpu_lock
267	jc	intreg_i586_bzero
268	smsw	%ax
269	clts
270	subl	$108,%esp
271	fnsave	0(%esp)
272	jmp	i586_bz2
273
274i586_bz1:
275	sarb	$1,kernel_fpu_lock
276	jc	intreg_i586_bzero
277	smsw	%ax
278	clts
279	fninit				/* XXX should avoid needing this */
280i586_bz2:
281	fldz
282
283	/*
284	 * Align to an 8 byte boundary (misalignment in the main loop would
285	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
286	 * already aligned) by always zeroing 8 bytes and using the part up
287	 * to the _next_ alignment position.
288	 */
289	fstl	0(%edx)
290	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
291	addl	$8,%edx
292	andl	$~7,%edx
293	subl	%edx,%ecx
294
295	/*
296	 * Similarly align `len' to a multiple of 8.
297	 */
298	fstl	-8(%edx,%ecx)
299	decl	%ecx
300	andl	$~7,%ecx
301
302	/*
303	 * This wouldn't be any faster if it were unrolled, since the loop
304	 * control instructions are much faster than the fstl and/or done
305	 * in parallel with it so their overhead is insignificant.
306	 */
307fpureg_i586_bzero_loop:
308	fstl	0(%edx)
309	addl	$8,%edx
310	subl	$8,%ecx
311	cmpl	$8,%ecx
312	jae	fpureg_i586_bzero_loop
313
314	cmpl	$0,PCPU(FPCURTHREAD)
315	je	i586_bz3
316
317	/* XXX check that the condition for cases 1-2 stayed false. */
318i586_bzero_oops:
319	int	$3
320	jmp	i586_bzero_oops
321
322	frstor	0(%esp)
323	addl	$108,%esp
324	lmsw	%ax
325	movb	$0xfe,kernel_fpu_lock
326	ret
327
328i586_bz3:
329	fstp	%st(0)
330	lmsw	%ax
331	movb	$0xfe,kernel_fpu_lock
332	ret
333
334intreg_i586_bzero:
335	/*
336	 * `rep stos' seems to be the best method in practice for small
337	 * counts.  Fancy methods usually take too long to start up due
338	 * to cache and BTB misses.
339	 */
340	pushl	%edi
341	movl	%edx,%edi
342	xorl	%eax,%eax
343	shrl	$2,%ecx
344	cld
345	rep
346	stosl
347	movl	12(%esp),%ecx
348	andl	$3,%ecx
349	jne	1f
350	popl	%edi
351	ret
352
3531:
354	rep
355	stosb
356	popl	%edi
357	ret
358#endif /* I586_CPU && defined(DEV_NPX) */
359
360ENTRY(sse2_pagezero)
361	pushl	%ebx
362	movl	8(%esp),%ecx
363	movl	%ecx,%eax
364	addl	$4096,%eax
365	xor	%ebx,%ebx
3661:
367	movnti	%ebx,(%ecx)
368	addl	$4,%ecx
369	cmpl	%ecx,%eax
370	jne	1b
371	sfence
372	popl	%ebx
373	ret
374
375ENTRY(i686_pagezero)
376	pushl	%edi
377	pushl	%ebx
378
379	movl	12(%esp), %edi
380	movl	$1024, %ecx
381	cld
382
383	ALIGN_TEXT
3841:
385	xorl	%eax, %eax
386	repe
387	scasl
388	jnz	2f
389
390	popl	%ebx
391	popl	%edi
392	ret
393
394	ALIGN_TEXT
395
3962:
397	incl	%ecx
398	subl	$4, %edi
399
400	movl	%ecx, %edx
401	cmpl	$16, %ecx
402
403	jge	3f
404
405	movl	%edi, %ebx
406	andl	$0x3f, %ebx
407	shrl	%ebx
408	shrl	%ebx
409	movl	$16, %ecx
410	subl	%ebx, %ecx
411
4123:
413	subl	%ecx, %edx
414	rep
415	stosl
416
417	movl	%edx, %ecx
418	testl	%edx, %edx
419	jnz	1b
420
421	popl	%ebx
422	popl	%edi
423	ret
424
425/* fillw(pat, base, cnt) */
426ENTRY(fillw)
427	pushl	%edi
428	movl	8(%esp),%eax
429	movl	12(%esp),%edi
430	movl	16(%esp),%ecx
431	cld
432	rep
433	stosw
434	popl	%edi
435	ret
436
437ENTRY(bcopyb)
438	pushl	%esi
439	pushl	%edi
440	movl	12(%esp),%esi
441	movl	16(%esp),%edi
442	movl	20(%esp),%ecx
443	movl	%edi,%eax
444	subl	%esi,%eax
445	cmpl	%ecx,%eax			/* overlapping && src < dst? */
446	jb	1f
447	cld					/* nope, copy forwards */
448	rep
449	movsb
450	popl	%edi
451	popl	%esi
452	ret
453
454	ALIGN_TEXT
4551:
456	addl	%ecx,%edi			/* copy backwards. */
457	addl	%ecx,%esi
458	decl	%edi
459	decl	%esi
460	std
461	rep
462	movsb
463	popl	%edi
464	popl	%esi
465	cld
466	ret
467
468ENTRY(bcopy)
469	MEXITCOUNT
470	jmp	*bcopy_vector
471
472/*
473 * generic_bcopy(src, dst, cnt)
474 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
475 */
476ENTRY(generic_bcopy)
477	pushl	%esi
478	pushl	%edi
479	movl	12(%esp),%esi
480	movl	16(%esp),%edi
481	movl	20(%esp),%ecx
482
483	movl	%edi,%eax
484	subl	%esi,%eax
485	cmpl	%ecx,%eax			/* overlapping && src < dst? */
486	jb	1f
487
488	shrl	$2,%ecx				/* copy by 32-bit words */
489	cld					/* nope, copy forwards */
490	rep
491	movsl
492	movl	20(%esp),%ecx
493	andl	$3,%ecx				/* any bytes left? */
494	rep
495	movsb
496	popl	%edi
497	popl	%esi
498	ret
499
500	ALIGN_TEXT
5011:
502	addl	%ecx,%edi			/* copy backwards */
503	addl	%ecx,%esi
504	decl	%edi
505	decl	%esi
506	andl	$3,%ecx				/* any fractional bytes? */
507	std
508	rep
509	movsb
510	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
511	shrl	$2,%ecx
512	subl	$3,%esi
513	subl	$3,%edi
514	rep
515	movsl
516	popl	%edi
517	popl	%esi
518	cld
519	ret
520
521#if defined(I586_CPU) && defined(DEV_NPX)
522ENTRY(i586_bcopy)
523	pushl	%esi
524	pushl	%edi
525	movl	12(%esp),%esi
526	movl	16(%esp),%edi
527	movl	20(%esp),%ecx
528
529	movl	%edi,%eax
530	subl	%esi,%eax
531	cmpl	%ecx,%eax			/* overlapping && src < dst? */
532	jb	1f
533
534	cmpl	$1024,%ecx
535	jb	small_i586_bcopy
536
537	sarb	$1,kernel_fpu_lock
538	jc	small_i586_bcopy
539	cmpl	$0,PCPU(FPCURTHREAD)
540	je	i586_bc1
541
542	/* XXX turn off handling of cases 1-2, as above. */
543	movb	$0xfe,kernel_fpu_lock
544	jmp	small_i586_bcopy
545
546	smsw	%dx
547	clts
548	subl	$108,%esp
549	fnsave	0(%esp)
550	jmp	4f
551
552i586_bc1:
553	smsw	%dx
554	clts
555	fninit				/* XXX should avoid needing this */
556
557	ALIGN_TEXT
5584:
559	pushl	%ecx
560#define	DCACHE_SIZE	8192
561	cmpl	$(DCACHE_SIZE-512)/2,%ecx
562	jbe	2f
563	movl	$(DCACHE_SIZE-512)/2,%ecx
5642:
565	subl	%ecx,0(%esp)
566	cmpl	$256,%ecx
567	jb	5f			/* XXX should prefetch if %ecx >= 32 */
568	pushl	%esi
569	pushl	%ecx
570	ALIGN_TEXT
5713:
572	movl	0(%esi),%eax
573	movl	32(%esi),%eax
574	movl	64(%esi),%eax
575	movl	96(%esi),%eax
576	movl	128(%esi),%eax
577	movl	160(%esi),%eax
578	movl	192(%esi),%eax
579	movl	224(%esi),%eax
580	addl	$256,%esi
581	subl	$256,%ecx
582	cmpl	$256,%ecx
583	jae	3b
584	popl	%ecx
585	popl	%esi
5865:
587	ALIGN_TEXT
588large_i586_bcopy_loop:
589	fildq	0(%esi)
590	fildq	8(%esi)
591	fildq	16(%esi)
592	fildq	24(%esi)
593	fildq	32(%esi)
594	fildq	40(%esi)
595	fildq	48(%esi)
596	fildq	56(%esi)
597	fistpq	56(%edi)
598	fistpq	48(%edi)
599	fistpq	40(%edi)
600	fistpq	32(%edi)
601	fistpq	24(%edi)
602	fistpq	16(%edi)
603	fistpq	8(%edi)
604	fistpq	0(%edi)
605	addl	$64,%esi
606	addl	$64,%edi
607	subl	$64,%ecx
608	cmpl	$64,%ecx
609	jae	large_i586_bcopy_loop
610	popl	%eax
611	addl	%eax,%ecx
612	cmpl	$64,%ecx
613	jae	4b
614
615	cmpl	$0,PCPU(FPCURTHREAD)
616	je	i586_bc2
617
618	/* XXX check that the condition for cases 1-2 stayed false. */
619i586_bcopy_oops:
620	int	$3
621	jmp	i586_bcopy_oops
622
623	frstor	0(%esp)
624	addl	$108,%esp
625i586_bc2:
626	lmsw	%dx
627	movb	$0xfe,kernel_fpu_lock
628
629/*
630 * This is a duplicate of the main part of generic_bcopy.  See the comments
631 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
632 * would mess up high resolution profiling.
633 */
634	ALIGN_TEXT
635small_i586_bcopy:
636	shrl	$2,%ecx
637	cld
638	rep
639	movsl
640	movl	20(%esp),%ecx
641	andl	$3,%ecx
642	rep
643	movsb
644	popl	%edi
645	popl	%esi
646	ret
647
648	ALIGN_TEXT
6491:
650	addl	%ecx,%edi
651	addl	%ecx,%esi
652	decl	%edi
653	decl	%esi
654	andl	$3,%ecx
655	std
656	rep
657	movsb
658	movl	20(%esp),%ecx
659	shrl	$2,%ecx
660	subl	$3,%esi
661	subl	$3,%edi
662	rep
663	movsl
664	popl	%edi
665	popl	%esi
666	cld
667	ret
668#endif /* I586_CPU && defined(DEV_NPX) */
669
670/*
671 * Note: memcpy does not support overlapping copies
672 */
673ENTRY(memcpy)
674	pushl	%edi
675	pushl	%esi
676	movl	12(%esp),%edi
677	movl	16(%esp),%esi
678	movl	20(%esp),%ecx
679	movl	%edi,%eax
680	shrl	$2,%ecx				/* copy by 32-bit words */
681	cld					/* nope, copy forwards */
682	rep
683	movsl
684	movl	20(%esp),%ecx
685	andl	$3,%ecx				/* any bytes left? */
686	rep
687	movsb
688	popl	%esi
689	popl	%edi
690	ret
691
692
693/*****************************************************************************/
694/* copyout and fubyte family                                                 */
695/*****************************************************************************/
696/*
697 * Access user memory from inside the kernel. These routines and possibly
698 * the math- and DOS emulators should be the only places that do this.
699 *
700 * We have to access the memory with user's permissions, so use a segment
701 * selector with RPL 3. For writes to user space we have to additionally
702 * check the PTE for write permission, because the 386 does not check
703 * write permissions when we are executing with EPL 0. The 486 does check
704 * this if the WP bit is set in CR0, so we can use a simpler version here.
705 *
706 * These routines set curpcb->onfault for the time they execute. When a
707 * protection violation occurs inside the functions, the trap handler
708 * returns to *curpcb->onfault instead of the function.
709 */
710
711/*
712 * copyout(from_kernel, to_user, len)  - MP SAFE
713 */
714ENTRY(copyout)
715	MEXITCOUNT
716	jmp	*copyout_vector
717
718ENTRY(generic_copyout)
719	movl	PCPU(CURPCB),%eax
720	movl	$copyout_fault,PCB_ONFAULT(%eax)
721	pushl	%esi
722	pushl	%edi
723	pushl	%ebx
724	movl	16(%esp),%esi
725	movl	20(%esp),%edi
726	movl	24(%esp),%ebx
727	testl	%ebx,%ebx			/* anything to do? */
728	jz	done_copyout
729
730	/*
731	 * Check explicitly for non-user addresses.  If 486 write protection
732	 * is being used, this check is essential because we are in kernel
733	 * mode so the h/w does not provide any protection against writing
734	 * kernel addresses.
735	 */
736
737	/*
738	 * First, prevent address wrapping.
739	 */
740	movl	%edi,%eax
741	addl	%ebx,%eax
742	jc	copyout_fault
743/*
744 * XXX STOP USING VM_MAXUSER_ADDRESS.
745 * It is an end address, not a max, so every time it is used correctly it
746 * looks like there is an off by one error, and of course it caused an off
747 * by one error in several places.
748 */
749	cmpl	$VM_MAXUSER_ADDRESS,%eax
750	ja	copyout_fault
751
752	/* bcopy(%esi, %edi, %ebx) */
753	movl	%ebx,%ecx
754
755#if defined(I586_CPU) && defined(DEV_NPX)
756	ALIGN_TEXT
757slow_copyout:
758#endif
759	shrl	$2,%ecx
760	cld
761	rep
762	movsl
763	movb	%bl,%cl
764	andb	$3,%cl
765	rep
766	movsb
767
768done_copyout:
769	popl	%ebx
770	popl	%edi
771	popl	%esi
772	xorl	%eax,%eax
773	movl	PCPU(CURPCB),%edx
774	movl	%eax,PCB_ONFAULT(%edx)
775	ret
776
777	ALIGN_TEXT
778copyout_fault:
779	popl	%ebx
780	popl	%edi
781	popl	%esi
782	movl	PCPU(CURPCB),%edx
783	movl	$0,PCB_ONFAULT(%edx)
784	movl	$EFAULT,%eax
785	ret
786
787#if defined(I586_CPU) && defined(DEV_NPX)
788ENTRY(i586_copyout)
789	/*
790	 * Duplicated from generic_copyout.  Could be done a bit better.
791	 */
792	movl	PCPU(CURPCB),%eax
793	movl	$copyout_fault,PCB_ONFAULT(%eax)
794	pushl	%esi
795	pushl	%edi
796	pushl	%ebx
797	movl	16(%esp),%esi
798	movl	20(%esp),%edi
799	movl	24(%esp),%ebx
800	testl	%ebx,%ebx			/* anything to do? */
801	jz	done_copyout
802
803	/*
804	 * Check explicitly for non-user addresses.  If 486 write protection
805	 * is being used, this check is essential because we are in kernel
806	 * mode so the h/w does not provide any protection against writing
807	 * kernel addresses.
808	 */
809
810	/*
811	 * First, prevent address wrapping.
812	 */
813	movl	%edi,%eax
814	addl	%ebx,%eax
815	jc	copyout_fault
816/*
817 * XXX STOP USING VM_MAXUSER_ADDRESS.
818 * It is an end address, not a max, so every time it is used correctly it
819 * looks like there is an off by one error, and of course it caused an off
820 * by one error in several places.
821 */
822	cmpl	$VM_MAXUSER_ADDRESS,%eax
823	ja	copyout_fault
824
825	/* bcopy(%esi, %edi, %ebx) */
8263:
827	movl	%ebx,%ecx
828	/*
829	 * End of duplicated code.
830	 */
831
832	cmpl	$1024,%ecx
833	jb	slow_copyout
834
835	pushl	%ecx
836	call	fastmove
837	addl	$4,%esp
838	jmp	done_copyout
839#endif /* I586_CPU && defined(DEV_NPX) */
840
841/*
842 * copyin(from_user, to_kernel, len) - MP SAFE
843 */
844ENTRY(copyin)
845	MEXITCOUNT
846	jmp	*copyin_vector
847
848ENTRY(generic_copyin)
849	movl	PCPU(CURPCB),%eax
850	movl	$copyin_fault,PCB_ONFAULT(%eax)
851	pushl	%esi
852	pushl	%edi
853	movl	12(%esp),%esi			/* caddr_t from */
854	movl	16(%esp),%edi			/* caddr_t to */
855	movl	20(%esp),%ecx			/* size_t  len */
856
857	/*
858	 * make sure address is valid
859	 */
860	movl	%esi,%edx
861	addl	%ecx,%edx
862	jc	copyin_fault
863	cmpl	$VM_MAXUSER_ADDRESS,%edx
864	ja	copyin_fault
865
866#if defined(I586_CPU) && defined(DEV_NPX)
867	ALIGN_TEXT
868slow_copyin:
869#endif
870	movb	%cl,%al
871	shrl	$2,%ecx				/* copy longword-wise */
872	cld
873	rep
874	movsl
875	movb	%al,%cl
876	andb	$3,%cl				/* copy remaining bytes */
877	rep
878	movsb
879
880#if defined(I586_CPU) && defined(DEV_NPX)
881	ALIGN_TEXT
882done_copyin:
883#endif
884	popl	%edi
885	popl	%esi
886	xorl	%eax,%eax
887	movl	PCPU(CURPCB),%edx
888	movl	%eax,PCB_ONFAULT(%edx)
889	ret
890
891	ALIGN_TEXT
892copyin_fault:
893	popl	%edi
894	popl	%esi
895	movl	PCPU(CURPCB),%edx
896	movl	$0,PCB_ONFAULT(%edx)
897	movl	$EFAULT,%eax
898	ret
899
900#if defined(I586_CPU) && defined(DEV_NPX)
901ENTRY(i586_copyin)
902	/*
903	 * Duplicated from generic_copyin.  Could be done a bit better.
904	 */
905	movl	PCPU(CURPCB),%eax
906	movl	$copyin_fault,PCB_ONFAULT(%eax)
907	pushl	%esi
908	pushl	%edi
909	movl	12(%esp),%esi			/* caddr_t from */
910	movl	16(%esp),%edi			/* caddr_t to */
911	movl	20(%esp),%ecx			/* size_t  len */
912
913	/*
914	 * make sure address is valid
915	 */
916	movl	%esi,%edx
917	addl	%ecx,%edx
918	jc	copyin_fault
919	cmpl	$VM_MAXUSER_ADDRESS,%edx
920	ja	copyin_fault
921	/*
922	 * End of duplicated code.
923	 */
924
925	cmpl	$1024,%ecx
926	jb	slow_copyin
927
928	pushl	%ebx			/* XXX prepare for fastmove_fault */
929	pushl	%ecx
930	call	fastmove
931	addl	$8,%esp
932	jmp	done_copyin
933#endif /* I586_CPU && defined(DEV_NPX) */
934
935#if defined(I586_CPU) && defined(DEV_NPX)
936/* fastmove(src, dst, len)
937	src in %esi
938	dst in %edi
939	len in %ecx		XXX changed to on stack for profiling
940	uses %eax and %edx for tmp. storage
941 */
942/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
943ENTRY(fastmove)
944	pushl	%ebp
945	movl	%esp,%ebp
946	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
947
948	movl	8(%ebp),%ecx
949	cmpl	$63,%ecx
950	jbe	fastmove_tail
951
952	testl	$7,%esi	/* check if src addr is multiple of 8 */
953	jnz	fastmove_tail
954
955	testl	$7,%edi	/* check if dst addr is multiple of 8 */
956	jnz	fastmove_tail
957
958	/* XXX grab FPU context atomically. */
959	cli
960
961/* if (fpcurthread != NULL) { */
962	cmpl	$0,PCPU(FPCURTHREAD)
963	je	6f
964/*    fnsave(&curpcb->pcb_savefpu); */
965	movl	PCPU(CURPCB),%eax
966	fnsave	PCB_SAVEFPU(%eax)
967/*   FPCURTHREAD = NULL; */
968	movl	$0,PCPU(FPCURTHREAD)
969/* } */
9706:
971/* now we own the FPU. */
972
973/*
974 * The process' FP state is saved in the pcb, but if we get
975 * switched, the cpu_switch() will store our FP state in the
976 * pcb.  It should be possible to avoid all the copying for
977 * this, e.g., by setting a flag to tell cpu_switch() to
978 * save the state somewhere else.
979 */
980/* tmp = curpcb->pcb_savefpu; */
981	movl	%ecx,-12(%ebp)
982	movl	%esi,-8(%ebp)
983	movl	%edi,-4(%ebp)
984	movl	%esp,%edi
985	movl	PCPU(CURPCB),%esi
986	addl	$PCB_SAVEFPU,%esi
987	cld
988	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
989	rep
990	movsl
991	movl	-12(%ebp),%ecx
992	movl	-8(%ebp),%esi
993	movl	-4(%ebp),%edi
994/* stop_emulating(); */
995	clts
996/* fpcurthread = curthread; */
997	movl	PCPU(CURTHREAD),%eax
998	movl	%eax,PCPU(FPCURTHREAD)
999	movl	PCPU(CURPCB),%eax
1000
1001	/* XXX end of atomic FPU context grab. */
1002	sti
1003
1004	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10054:
1006	movl	%ecx,-12(%ebp)
1007	cmpl	$1792,%ecx
1008	jbe	2f
1009	movl	$1792,%ecx
10102:
1011	subl	%ecx,-12(%ebp)
1012	cmpl	$256,%ecx
1013	jb	5f
1014	movl	%ecx,-8(%ebp)
1015	movl	%esi,-4(%ebp)
1016	ALIGN_TEXT
10173:
1018	movl	0(%esi),%eax
1019	movl	32(%esi),%eax
1020	movl	64(%esi),%eax
1021	movl	96(%esi),%eax
1022	movl	128(%esi),%eax
1023	movl	160(%esi),%eax
1024	movl	192(%esi),%eax
1025	movl	224(%esi),%eax
1026	addl	$256,%esi
1027	subl	$256,%ecx
1028	cmpl	$256,%ecx
1029	jae	3b
1030	movl	-8(%ebp),%ecx
1031	movl	-4(%ebp),%esi
10325:
1033	ALIGN_TEXT
1034fastmove_loop:
1035	fildq	0(%esi)
1036	fildq	8(%esi)
1037	fildq	16(%esi)
1038	fildq	24(%esi)
1039	fildq	32(%esi)
1040	fildq	40(%esi)
1041	fildq	48(%esi)
1042	fildq	56(%esi)
1043	fistpq	56(%edi)
1044	fistpq	48(%edi)
1045	fistpq	40(%edi)
1046	fistpq	32(%edi)
1047	fistpq	24(%edi)
1048	fistpq	16(%edi)
1049	fistpq	8(%edi)
1050	fistpq	0(%edi)
1051	addl	$-64,%ecx
1052	addl	$64,%esi
1053	addl	$64,%edi
1054	cmpl	$63,%ecx
1055	ja	fastmove_loop
1056	movl	-12(%ebp),%eax
1057	addl	%eax,%ecx
1058	cmpl	$64,%ecx
1059	jae	4b
1060
1061	/* XXX ungrab FPU context atomically. */
1062	cli
1063
1064/* curpcb->pcb_savefpu = tmp; */
1065	movl	%ecx,-12(%ebp)
1066	movl	%esi,-8(%ebp)
1067	movl	%edi,-4(%ebp)
1068	movl	PCPU(CURPCB),%edi
1069	addl	$PCB_SAVEFPU,%edi
1070	movl	%esp,%esi
1071	cld
1072	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1073	rep
1074	movsl
1075	movl	-12(%ebp),%ecx
1076	movl	-8(%ebp),%esi
1077	movl	-4(%ebp),%edi
1078
1079/* start_emulating(); */
1080	smsw	%ax
1081	orb	$CR0_TS,%al
1082	lmsw	%ax
1083/* fpcurthread = NULL; */
1084	movl	$0,PCPU(FPCURTHREAD)
1085
1086	/* XXX end of atomic FPU context ungrab. */
1087	sti
1088
1089	ALIGN_TEXT
1090fastmove_tail:
1091	movl	PCPU(CURPCB),%eax
1092	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1093
1094	movb	%cl,%al
1095	shrl	$2,%ecx				/* copy longword-wise */
1096	cld
1097	rep
1098	movsl
1099	movb	%al,%cl
1100	andb	$3,%cl				/* copy remaining bytes */
1101	rep
1102	movsb
1103
1104	movl	%ebp,%esp
1105	popl	%ebp
1106	ret
1107
1108	ALIGN_TEXT
1109fastmove_fault:
1110	/* XXX ungrab FPU context atomically. */
1111	cli
1112
1113	movl	PCPU(CURPCB),%edi
1114	addl	$PCB_SAVEFPU,%edi
1115	movl	%esp,%esi
1116	cld
1117	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1118	rep
1119	movsl
1120
1121	smsw	%ax
1122	orb	$CR0_TS,%al
1123	lmsw	%ax
1124	movl	$0,PCPU(FPCURTHREAD)
1125
1126	/* XXX end of atomic FPU context ungrab. */
1127	sti
1128
1129fastmove_tail_fault:
1130	movl	%ebp,%esp
1131	popl	%ebp
1132	addl	$8,%esp
1133	popl	%ebx
1134	popl	%edi
1135	popl	%esi
1136	movl	PCPU(CURPCB),%edx
1137	movl	$0,PCB_ONFAULT(%edx)
1138	movl	$EFAULT,%eax
1139	ret
1140#endif /* I586_CPU && defined(DEV_NPX) */
1141
1142/*
1143 * casuptr.  Compare and set user pointer.  Returns -1 or the current value.
1144 */
1145ENTRY(casuptr)
1146	movl	PCPU(CURPCB),%ecx
1147	movl	$fusufault,PCB_ONFAULT(%ecx)
1148	movl	4(%esp),%edx			/* dst */
1149	movl	8(%esp),%eax			/* old */
1150	movl	12(%esp),%ecx			/* new */
1151
1152	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1153	ja	fusufault
1154
1155#ifdef SMP
1156	lock
1157#endif
1158	cmpxchgl %ecx, (%edx)			/* Compare and set. */
1159
1160	/*
1161	 * The old value is in %eax.  If the store succeeded it will be the
1162	 * value we expected (old) from before the store, otherwise it will
1163	 * be the current value.
1164	 */
1165
1166	movl	PCPU(CURPCB),%ecx
1167	movl	$fusufault,PCB_ONFAULT(%ecx)
1168	movl	$0,PCB_ONFAULT(%ecx)
1169	ret
1170
1171/*
1172 * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
1173 * memory.  All these functions are MPSAFE.
1174 */
1175
1176ALTENTRY(fuword32)
1177ENTRY(fuword)
1178	movl	PCPU(CURPCB),%ecx
1179	movl	$fusufault,PCB_ONFAULT(%ecx)
1180	movl	4(%esp),%edx			/* from */
1181
1182	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1183	ja	fusufault
1184
1185	movl	(%edx),%eax
1186	movl	$0,PCB_ONFAULT(%ecx)
1187	ret
1188
1189/*
1190 * fuswintr() and suswintr() are specialized variants of fuword16() and
1191 * suword16(), respectively.  They are called from the profiling code,
1192 * potentially at interrupt time.  If they fail, that's okay; good things
1193 * will happen later.  They always fail for now, until the trap code is
1194 * able to deal with this.
1195 */
1196ALTENTRY(suswintr)
1197ENTRY(fuswintr)
1198	movl	$-1,%eax
1199	ret
1200
1201ENTRY(fuword16)
1202	movl	PCPU(CURPCB),%ecx
1203	movl	$fusufault,PCB_ONFAULT(%ecx)
1204	movl	4(%esp),%edx
1205
1206	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1207	ja	fusufault
1208
1209	movzwl	(%edx),%eax
1210	movl	$0,PCB_ONFAULT(%ecx)
1211	ret
1212
1213ENTRY(fubyte)
1214	movl	PCPU(CURPCB),%ecx
1215	movl	$fusufault,PCB_ONFAULT(%ecx)
1216	movl	4(%esp),%edx
1217
1218	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1219	ja	fusufault
1220
1221	movzbl	(%edx),%eax
1222	movl	$0,PCB_ONFAULT(%ecx)
1223	ret
1224
1225	ALIGN_TEXT
1226fusufault:
1227	movl	PCPU(CURPCB),%ecx
1228	xorl	%eax,%eax
1229	movl	%eax,PCB_ONFAULT(%ecx)
1230	decl	%eax
1231	ret
1232
1233/*
1234 * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory.
1235 * All these functions are MPSAFE.
1236 */
1237
1238ALTENTRY(suword32)
1239ENTRY(suword)
1240	movl	PCPU(CURPCB),%ecx
1241	movl	$fusufault,PCB_ONFAULT(%ecx)
1242	movl	4(%esp),%edx
1243
1244	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1245	ja	fusufault
1246
1247	movl	8(%esp),%eax
1248	movl	%eax,(%edx)
1249	xorl	%eax,%eax
1250	movl	PCPU(CURPCB),%ecx
1251	movl	%eax,PCB_ONFAULT(%ecx)
1252	ret
1253
1254ENTRY(suword16)
1255	movl	PCPU(CURPCB),%ecx
1256	movl	$fusufault,PCB_ONFAULT(%ecx)
1257	movl	4(%esp),%edx
1258
1259	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1260	ja	fusufault
1261
1262	movw	8(%esp),%ax
1263	movw	%ax,(%edx)
1264	xorl	%eax,%eax
1265	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1266	movl	%eax,PCB_ONFAULT(%ecx)
1267	ret
1268
1269ENTRY(subyte)
1270	movl	PCPU(CURPCB),%ecx
1271	movl	$fusufault,PCB_ONFAULT(%ecx)
1272	movl	4(%esp),%edx
1273
1274	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1275	ja	fusufault
1276
1277	movb	8(%esp),%al
1278	movb	%al,(%edx)
1279	xorl	%eax,%eax
1280	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1281	movl	%eax,PCB_ONFAULT(%ecx)
1282	ret
1283
1284/*
1285 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
1286 *
1287 *	copy a string from from to to, stop when a 0 character is reached.
1288 *	return ENAMETOOLONG if string is longer than maxlen, and
1289 *	EFAULT on protection violations. If lencopied is non-zero,
1290 *	return the actual length in *lencopied.
1291 */
1292ENTRY(copyinstr)
1293	pushl	%esi
1294	pushl	%edi
1295	movl	PCPU(CURPCB),%ecx
1296	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1297
1298	movl	12(%esp),%esi			/* %esi = from */
1299	movl	16(%esp),%edi			/* %edi = to */
1300	movl	20(%esp),%edx			/* %edx = maxlen */
1301
1302	movl	$VM_MAXUSER_ADDRESS,%eax
1303
1304	/* make sure 'from' is within bounds */
1305	subl	%esi,%eax
1306	jbe	cpystrflt
1307
1308	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1309	cmpl	%edx,%eax
1310	jae	1f
1311	movl	%eax,%edx
1312	movl	%eax,20(%esp)
13131:
1314	incl	%edx
1315	cld
1316
13172:
1318	decl	%edx
1319	jz	3f
1320
1321	lodsb
1322	stosb
1323	orb	%al,%al
1324	jnz	2b
1325
1326	/* Success -- 0 byte reached */
1327	decl	%edx
1328	xorl	%eax,%eax
1329	jmp	cpystrflt_x
13303:
1331	/* edx is zero - return ENAMETOOLONG or EFAULT */
1332	cmpl	$VM_MAXUSER_ADDRESS,%esi
1333	jae	cpystrflt
13344:
1335	movl	$ENAMETOOLONG,%eax
1336	jmp	cpystrflt_x
1337
1338cpystrflt:
1339	movl	$EFAULT,%eax
1340
1341cpystrflt_x:
1342	/* set *lencopied and return %eax */
1343	movl	PCPU(CURPCB),%ecx
1344	movl	$0,PCB_ONFAULT(%ecx)
1345	movl	20(%esp),%ecx
1346	subl	%edx,%ecx
1347	movl	24(%esp),%edx
1348	testl	%edx,%edx
1349	jz	1f
1350	movl	%ecx,(%edx)
13511:
1352	popl	%edi
1353	popl	%esi
1354	ret
1355
1356
1357/*
1358 * copystr(from, to, maxlen, int *lencopied) - MP SAFE
1359 */
1360ENTRY(copystr)
1361	pushl	%esi
1362	pushl	%edi
1363
1364	movl	12(%esp),%esi			/* %esi = from */
1365	movl	16(%esp),%edi			/* %edi = to */
1366	movl	20(%esp),%edx			/* %edx = maxlen */
1367	incl	%edx
1368	cld
13691:
1370	decl	%edx
1371	jz	4f
1372	lodsb
1373	stosb
1374	orb	%al,%al
1375	jnz	1b
1376
1377	/* Success -- 0 byte reached */
1378	decl	%edx
1379	xorl	%eax,%eax
1380	jmp	6f
13814:
1382	/* edx is zero -- return ENAMETOOLONG */
1383	movl	$ENAMETOOLONG,%eax
1384
13856:
1386	/* set *lencopied and return %eax */
1387	movl	20(%esp),%ecx
1388	subl	%edx,%ecx
1389	movl	24(%esp),%edx
1390	testl	%edx,%edx
1391	jz	7f
1392	movl	%ecx,(%edx)
13937:
1394	popl	%edi
1395	popl	%esi
1396	ret
1397
1398ENTRY(bcmp)
1399	pushl	%edi
1400	pushl	%esi
1401	movl	12(%esp),%edi
1402	movl	16(%esp),%esi
1403	movl	20(%esp),%edx
1404
1405	movl	%edx,%ecx
1406	shrl	$2,%ecx
1407	cld					/* compare forwards */
1408	repe
1409	cmpsl
1410	jne	1f
1411
1412	movl	%edx,%ecx
1413	andl	$3,%ecx
1414	repe
1415	cmpsb
14161:
1417	setne	%al
1418	movsbl	%al,%eax
1419	popl	%esi
1420	popl	%edi
1421	ret
1422
1423
1424/*
1425 * Handling of special 386 registers and descriptor tables etc
1426 */
1427/* void lgdt(struct region_descriptor *rdp); */
1428ENTRY(lgdt)
1429	/* reload the descriptor table */
1430	movl	4(%esp),%eax
1431	lgdt	(%eax)
1432
1433	/* flush the prefetch q */
1434	jmp	1f
1435	nop
14361:
1437	/* reload "stale" selectors */
1438	movl	$KDSEL,%eax
1439	movl	%eax,%ds
1440	movl	%eax,%es
1441	movl	%eax,%gs
1442	movl	%eax,%ss
1443	movl	$KPSEL,%eax
1444	movl	%eax,%fs
1445
1446	/* reload code selector by turning return into intersegmental return */
1447	movl	(%esp),%eax
1448	pushl	%eax
1449	movl	$KCSEL,4(%esp)
1450	MEXITCOUNT
1451	lret
1452
1453/* ssdtosd(*ssdp,*sdp) */
1454ENTRY(ssdtosd)
1455	pushl	%ebx
1456	movl	8(%esp),%ecx
1457	movl	8(%ecx),%ebx
1458	shll	$16,%ebx
1459	movl	(%ecx),%edx
1460	roll	$16,%edx
1461	movb	%dh,%bl
1462	movb	%dl,%bh
1463	rorl	$8,%ebx
1464	movl	4(%ecx),%eax
1465	movw	%ax,%dx
1466	andl	$0xf0000,%eax
1467	orl	%eax,%ebx
1468	movl	12(%esp),%ecx
1469	movl	%edx,(%ecx)
1470	movl	%ebx,4(%ecx)
1471	popl	%ebx
1472	ret
1473
1474/* void reset_dbregs() */
1475ENTRY(reset_dbregs)
1476	movl    $0,%eax
1477	movl    %eax,%dr7     /* disable all breapoints first */
1478	movl    %eax,%dr0
1479	movl    %eax,%dr1
1480	movl    %eax,%dr2
1481	movl    %eax,%dr3
1482	movl    %eax,%dr6
1483	ret
1484
1485/*****************************************************************************/
1486/* setjump, longjump                                                         */
1487/*****************************************************************************/
1488
1489ENTRY(setjmp)
1490	movl	4(%esp),%eax
1491	movl	%ebx,(%eax)			/* save ebx */
1492	movl	%esp,4(%eax)			/* save esp */
1493	movl	%ebp,8(%eax)			/* save ebp */
1494	movl	%esi,12(%eax)			/* save esi */
1495	movl	%edi,16(%eax)			/* save edi */
1496	movl	(%esp),%edx			/* get rta */
1497	movl	%edx,20(%eax)			/* save eip */
1498	xorl	%eax,%eax			/* return(0); */
1499	ret
1500
1501ENTRY(longjmp)
1502	movl	4(%esp),%eax
1503	movl	(%eax),%ebx			/* restore ebx */
1504	movl	4(%eax),%esp			/* restore esp */
1505	movl	8(%eax),%ebp			/* restore ebp */
1506	movl	12(%eax),%esi			/* restore esi */
1507	movl	16(%eax),%edi			/* restore edi */
1508	movl	20(%eax),%edx			/* get rta */
1509	movl	%edx,(%esp)			/* put in return frame */
1510	xorl	%eax,%eax			/* return(1); */
1511	incl	%eax
1512	ret
1513
1514/*
1515 * Support for BB-profiling (gcc -a).  The kernbb program will extract
1516 * the data from the kernel.
1517 */
1518
1519	.data
1520	ALIGN_DATA
1521	.globl bbhead
1522bbhead:
1523	.long 0
1524
1525#if defined(SMP) || !defined(_KERNEL)
1526#define MPLOCKED        lock ;
1527#else
1528#define MPLOCKED
1529#endif
1530
1531	.text
1532NON_GPROF_ENTRY(__bb_init_func)
1533	movl	4(%esp),%eax
1534	movl	$1,(%eax)
1535	movl	bbhead,%edx
1536	movl	%edx,16(%eax)
1537	movl	%eax,bbhead
1538	NON_GPROF_RET
1539
1540/* necessary for linux_futex support */
1541	.text
1542
1543futex_fault:
1544	movl	PCPU(CURPCB), %edx
1545	movl	$0, PCB_ONFAULT(%edx)
1546	movl	$-EFAULT, %eax
1547	ret
1548
1549/* int futex_xchgl(int oparg, caddr_t uaddr, int *oldval); */
1550	.globl	futex_xchgl
1551futex_xchgl:
1552	movl	PCPU(CURPCB), %eax
1553	movl	$futex_fault, PCB_ONFAULT(%eax)
1554	movl	4(%esp), %eax
1555	movl	8(%esp), %edx
1556	cmpl    $VM_MAXUSER_ADDRESS,%edx
1557	ja     	futex_fault
1558
1559	MPLOCKED xchgl	%eax, (%edx)
1560	movl	0xc(%esp), %edx
1561	movl	%eax, (%edx)
1562	xorl	%eax, %eax
1563
1564	movl	PCPU(CURPCB), %edx
1565	movl	$0, PCB_ONFAULT(%edx)
1566	ret
1567
1568/* int futex_addl(int oparg, caddr_t uaddr, int *oldval); */
1569	.globl	futex_addl
1570futex_addl:
1571	movl	PCPU(CURPCB), %eax
1572	movl	$futex_fault, PCB_ONFAULT(%eax)
1573	movl	4(%esp), %eax
1574	movl	8(%esp), %edx
1575	cmpl    $VM_MAXUSER_ADDRESS,%edx
1576	ja     	futex_fault
1577
1578	MPLOCKED xaddl	%eax, (%edx)
1579	movl	0xc(%esp), %edx
1580	movl	%eax, (%edx)
1581	xorl	%eax, %eax
1582
1583	movl	PCPU(CURPCB), %edx
1584	movl	$0, PCB_ONFAULT(%edx)
1585	ret
1586
1587/* int futex_orl(int oparg, caddr_t uaddr, int *oldval); */
1588	.globl	futex_orl
1589futex_orl:
1590	movl	PCPU(CURPCB), %eax
1591	movl	$futex_fault, PCB_ONFAULT(%eax)
1592	movl	4(%esp), %eax
1593	movl	8(%esp), %edx
1594	cmpl    $VM_MAXUSER_ADDRESS,%edx
1595	ja     	futex_fault
1596
1597	MPLOCKED orl 	%eax, (%edx)
1598	movl	0xc(%esp), %edx
1599	movl	%eax, (%edx)
1600	xorl	%eax, %eax
1601
1602	movl	PCPU(CURPCB), %edx
1603	movl	$0, PCB_ONFAULT(%edx)
1604	ret
1605
1606/* int futex_andnl(int oparg, caddr_t uaddr, int *oldval); */
1607	.globl	futex_andnl
1608futex_andnl:
1609	movl	PCPU(CURPCB), %eax
1610	movl	$futex_fault, PCB_ONFAULT(%eax)
1611	movl	4(%esp), %eax
1612	movl	8(%esp), %edx
1613	cmpl    $VM_MAXUSER_ADDRESS,%edx
1614	ja     	futex_fault
1615
1616	notl	(%edx)
1617	MPLOCKED andl 	%eax, (%edx)
1618	movl	0xc(%esp), %edx
1619	movl	%eax, (%edx)
1620	xorl	%eax, %eax
1621
1622	movl	PCPU(CURPCB), %edx
1623	movl	$0, PCB_ONFAULT(%edx)
1624	ret
1625
1626/* int futex_xorl(int oparg, caddr_t uaddr, int *oldval); */
1627	.globl	futex_xorl
1628futex_xorl:
1629	movl	PCPU(CURPCB), %eax
1630	movl	$futex_fault, PCB_ONFAULT(%eax)
1631	movl	4(%esp), %eax
1632	movl	8(%esp), %edx
1633	cmpl    $VM_MAXUSER_ADDRESS,%edx
1634	ja     	futex_fault
1635
1636	MPLOCKED xorl 	%eax, (%edx)
1637	movl	0xc(%esp), %edx
1638	movl	%eax, (%edx)
1639	xorl	%eax, %eax
1640
1641	movl	PCPU(CURPCB), %edx
1642	movl	$0, PCB_ONFAULT(%edx)
1643	ret
1644