support.s revision 118951
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD: head/sys/i386/i386/support.s 118951 2003-08-15 15:19:21Z jhb $
34 */
35
36#include "opt_npx.h"
37
38#include <machine/asmacros.h>
39#include <machine/cputypes.h>
40#include <machine/pmap.h>
41#include <machine/specialreg.h>
42
43#include "assym.s"
44
45#define IDXSHIFT	10
46
47	.data
48	.globl	bcopy_vector
49bcopy_vector:
50	.long	generic_bcopy
51	.globl	bzero_vector
52bzero_vector:
53	.long	generic_bzero
54	.globl	copyin_vector
55copyin_vector:
56	.long	generic_copyin
57	.globl	copyout_vector
58copyout_vector:
59	.long	generic_copyout
60#if defined(I586_CPU) && defined(DEV_NPX)
61kernel_fpu_lock:
62	.byte	0xfe
63	.space	3
64#endif
65
66	.text
67
68/*
69 * bcopy family
70 * void bzero(void *buf, u_int len)
71 */
72
73ENTRY(bzero)
74	MEXITCOUNT
75	jmp	*bzero_vector
76
77ENTRY(generic_bzero)
78	pushl	%edi
79	movl	8(%esp),%edi
80	movl	12(%esp),%ecx
81	xorl	%eax,%eax
82	shrl	$2,%ecx
83	cld
84	rep
85	stosl
86	movl	12(%esp),%ecx
87	andl	$3,%ecx
88	rep
89	stosb
90	popl	%edi
91	ret
92
93#ifdef I486_CPU
94ENTRY(i486_bzero)
95	movl	4(%esp),%edx
96	movl	8(%esp),%ecx
97	xorl	%eax,%eax
98/*
99 * do 64 byte chunks first
100 *
101 * XXX this is probably over-unrolled at least for DX2's
102 */
1032:
104	cmpl	$64,%ecx
105	jb	3f
106	movl	%eax,(%edx)
107	movl	%eax,4(%edx)
108	movl	%eax,8(%edx)
109	movl	%eax,12(%edx)
110	movl	%eax,16(%edx)
111	movl	%eax,20(%edx)
112	movl	%eax,24(%edx)
113	movl	%eax,28(%edx)
114	movl	%eax,32(%edx)
115	movl	%eax,36(%edx)
116	movl	%eax,40(%edx)
117	movl	%eax,44(%edx)
118	movl	%eax,48(%edx)
119	movl	%eax,52(%edx)
120	movl	%eax,56(%edx)
121	movl	%eax,60(%edx)
122	addl	$64,%edx
123	subl	$64,%ecx
124	jnz	2b
125	ret
126
127/*
128 * do 16 byte chunks
129 */
130	SUPERALIGN_TEXT
1313:
132	cmpl	$16,%ecx
133	jb	4f
134	movl	%eax,(%edx)
135	movl	%eax,4(%edx)
136	movl	%eax,8(%edx)
137	movl	%eax,12(%edx)
138	addl	$16,%edx
139	subl	$16,%ecx
140	jnz	3b
141	ret
142
143/*
144 * do 4 byte chunks
145 */
146	SUPERALIGN_TEXT
1474:
148	cmpl	$4,%ecx
149	jb	5f
150	movl	%eax,(%edx)
151	addl	$4,%edx
152	subl	$4,%ecx
153	jnz	4b
154	ret
155
156/*
157 * do 1 byte chunks
158 * a jump table seems to be faster than a loop or more range reductions
159 *
160 * XXX need a const section for non-text
161 */
162	.data
163jtab:
164	.long	do0
165	.long	do1
166	.long	do2
167	.long	do3
168
169	.text
170	SUPERALIGN_TEXT
1715:
172	jmp	*jtab(,%ecx,4)
173
174	SUPERALIGN_TEXT
175do3:
176	movw	%ax,(%edx)
177	movb	%al,2(%edx)
178	ret
179
180	SUPERALIGN_TEXT
181do2:
182	movw	%ax,(%edx)
183	ret
184
185	SUPERALIGN_TEXT
186do1:
187	movb	%al,(%edx)
188	ret
189
190	SUPERALIGN_TEXT
191do0:
192	ret
193#endif
194
195#if defined(I586_CPU) && defined(DEV_NPX)
196ENTRY(i586_bzero)
197	movl	4(%esp),%edx
198	movl	8(%esp),%ecx
199
200	/*
201	 * The FPU register method is twice as fast as the integer register
202	 * method unless the target is in the L1 cache and we pre-allocate a
203	 * cache line for it (then the integer register method is 4-5 times
204	 * faster).  However, we never pre-allocate cache lines, since that
205	 * would make the integer method 25% or more slower for the common
206	 * case when the target isn't in either the L1 cache or the L2 cache.
207	 * Thus we normally use the FPU register method unless the overhead
208	 * would be too large.
209	 */
210	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
211	jb	intreg_i586_bzero
212
213	/*
214	 * The FPU registers may belong to an application or to fastmove()
215	 * or to another invocation of bcopy() or ourself in a higher level
216	 * interrupt or trap handler.  Preserving the registers is
217	 * complicated since we avoid it if possible at all levels.  We
218	 * want to localize the complications even when that increases them.
219	 * Here the extra work involves preserving CR0_TS in TS.
220	 * `fpcurthread != NULL' is supposed to be the condition that all the
221	 * FPU resources belong to an application, but fpcurthread and CR0_TS
222	 * aren't set atomically enough for this condition to work in
223	 * interrupt handlers.
224	 *
225	 * Case 1: FPU registers belong to the application: we must preserve
226	 * the registers if we use them, so we only use the FPU register
227	 * method if the target size is large enough to amortize the extra
228	 * overhead for preserving them.  CR0_TS must be preserved although
229	 * it is very likely to end up as set.
230	 *
231	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
232	 * makes the registers look like they belong to an application so
233	 * that cpu_switch() and savectx() don't have to know about it, so
234	 * this case reduces to case 1.
235	 *
236	 * Case 3: FPU registers belong to the kernel: don't use the FPU
237	 * register method.  This case is unlikely, and supporting it would
238	 * be more complicated and might take too much stack.
239	 *
240	 * Case 4: FPU registers don't belong to anyone: the FPU registers
241	 * don't need to be preserved, so we always use the FPU register
242	 * method.  CR0_TS must be preserved although it is very likely to
243	 * always end up as clear.
244	 */
245	cmpl	$0,PCPU(FPCURTHREAD)
246	je	i586_bz1
247
248	/*
249	 * XXX don't use the FPU for cases 1 and 2, since preemptive
250	 * scheduling of ithreads broke these cases.  Note that we can
251	 * no longer get here from an interrupt handler, since the
252	 * context sitch to the interrupt handler will have saved the
253	 * FPU state.
254	 */
255	jmp	intreg_i586_bzero
256
257	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
258	jb	intreg_i586_bzero
259	sarb	$1,kernel_fpu_lock
260	jc	intreg_i586_bzero
261	smsw	%ax
262	clts
263	subl	$108,%esp
264	fnsave	0(%esp)
265	jmp	i586_bz2
266
267i586_bz1:
268	sarb	$1,kernel_fpu_lock
269	jc	intreg_i586_bzero
270	smsw	%ax
271	clts
272	fninit				/* XXX should avoid needing this */
273i586_bz2:
274	fldz
275
276	/*
277	 * Align to an 8 byte boundary (misalignment in the main loop would
278	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
279	 * already aligned) by always zeroing 8 bytes and using the part up
280	 * to the _next_ alignment position.
281	 */
282	fstl	0(%edx)
283	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
284	addl	$8,%edx
285	andl	$~7,%edx
286	subl	%edx,%ecx
287
288	/*
289	 * Similarly align `len' to a multiple of 8.
290	 */
291	fstl	-8(%edx,%ecx)
292	decl	%ecx
293	andl	$~7,%ecx
294
295	/*
296	 * This wouldn't be any faster if it were unrolled, since the loop
297	 * control instructions are much faster than the fstl and/or done
298	 * in parallel with it so their overhead is insignificant.
299	 */
300fpureg_i586_bzero_loop:
301	fstl	0(%edx)
302	addl	$8,%edx
303	subl	$8,%ecx
304	cmpl	$8,%ecx
305	jae	fpureg_i586_bzero_loop
306
307	cmpl	$0,PCPU(FPCURTHREAD)
308	je	i586_bz3
309
310	/* XXX check that the condition for cases 1-2 stayed false. */
311i586_bzero_oops:
312	int	$3
313	jmp	i586_bzero_oops
314
315	frstor	0(%esp)
316	addl	$108,%esp
317	lmsw	%ax
318	movb	$0xfe,kernel_fpu_lock
319	ret
320
321i586_bz3:
322	fstp	%st(0)
323	lmsw	%ax
324	movb	$0xfe,kernel_fpu_lock
325	ret
326
327intreg_i586_bzero:
328	/*
329	 * `rep stos' seems to be the best method in practice for small
330	 * counts.  Fancy methods usually take too long to start up due
331	 * to cache and BTB misses.
332	 */
333	pushl	%edi
334	movl	%edx,%edi
335	xorl	%eax,%eax
336	shrl	$2,%ecx
337	cld
338	rep
339	stosl
340	movl	12(%esp),%ecx
341	andl	$3,%ecx
342	jne	1f
343	popl	%edi
344	ret
345
3461:
347	rep
348	stosb
349	popl	%edi
350	ret
351#endif /* I586_CPU && defined(DEV_NPX) */
352
353ENTRY(i686_pagezero)
354	pushl	%edi
355	pushl	%ebx
356
357	movl	12(%esp), %edi
358	movl	$1024, %ecx
359	cld
360
361	ALIGN_TEXT
3621:
363	xorl	%eax, %eax
364	repe
365	scasl
366	jnz	2f
367
368	popl	%ebx
369	popl	%edi
370	ret
371
372	ALIGN_TEXT
373
3742:
375	incl	%ecx
376	subl	$4, %edi
377
378	movl	%ecx, %edx
379	cmpl	$16, %ecx
380
381	jge	3f
382
383	movl	%edi, %ebx
384	andl	$0x3f, %ebx
385	shrl	%ebx
386	shrl	%ebx
387	movl	$16, %ecx
388	subl	%ebx, %ecx
389
3903:
391	subl	%ecx, %edx
392	rep
393	stosl
394
395	movl	%edx, %ecx
396	testl	%edx, %edx
397	jnz	1b
398
399	popl	%ebx
400	popl	%edi
401	ret
402
403/* fillw(pat, base, cnt) */
404ENTRY(fillw)
405	pushl	%edi
406	movl	8(%esp),%eax
407	movl	12(%esp),%edi
408	movl	16(%esp),%ecx
409	cld
410	rep
411	stosw
412	popl	%edi
413	ret
414
415ENTRY(bcopyb)
416	pushl	%esi
417	pushl	%edi
418	movl	12(%esp),%esi
419	movl	16(%esp),%edi
420	movl	20(%esp),%ecx
421	movl	%edi,%eax
422	subl	%esi,%eax
423	cmpl	%ecx,%eax			/* overlapping && src < dst? */
424	jb	1f
425	cld					/* nope, copy forwards */
426	rep
427	movsb
428	popl	%edi
429	popl	%esi
430	ret
431
432	ALIGN_TEXT
4331:
434	addl	%ecx,%edi			/* copy backwards. */
435	addl	%ecx,%esi
436	decl	%edi
437	decl	%esi
438	std
439	rep
440	movsb
441	popl	%edi
442	popl	%esi
443	cld
444	ret
445
446ENTRY(bcopy)
447	MEXITCOUNT
448	jmp	*bcopy_vector
449
450/*
451 * generic_bcopy(src, dst, cnt)
452 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
453 */
454ENTRY(generic_bcopy)
455	pushl	%esi
456	pushl	%edi
457	movl	12(%esp),%esi
458	movl	16(%esp),%edi
459	movl	20(%esp),%ecx
460
461	movl	%edi,%eax
462	subl	%esi,%eax
463	cmpl	%ecx,%eax			/* overlapping && src < dst? */
464	jb	1f
465
466	shrl	$2,%ecx				/* copy by 32-bit words */
467	cld					/* nope, copy forwards */
468	rep
469	movsl
470	movl	20(%esp),%ecx
471	andl	$3,%ecx				/* any bytes left? */
472	rep
473	movsb
474	popl	%edi
475	popl	%esi
476	ret
477
478	ALIGN_TEXT
4791:
480	addl	%ecx,%edi			/* copy backwards */
481	addl	%ecx,%esi
482	decl	%edi
483	decl	%esi
484	andl	$3,%ecx				/* any fractional bytes? */
485	std
486	rep
487	movsb
488	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
489	shrl	$2,%ecx
490	subl	$3,%esi
491	subl	$3,%edi
492	rep
493	movsl
494	popl	%edi
495	popl	%esi
496	cld
497	ret
498
499#if defined(I586_CPU) && defined(DEV_NPX)
500ENTRY(i586_bcopy)
501	pushl	%esi
502	pushl	%edi
503	movl	12(%esp),%esi
504	movl	16(%esp),%edi
505	movl	20(%esp),%ecx
506
507	movl	%edi,%eax
508	subl	%esi,%eax
509	cmpl	%ecx,%eax			/* overlapping && src < dst? */
510	jb	1f
511
512	cmpl	$1024,%ecx
513	jb	small_i586_bcopy
514
515	sarb	$1,kernel_fpu_lock
516	jc	small_i586_bcopy
517	cmpl	$0,PCPU(FPCURTHREAD)
518	je	i586_bc1
519
520	/* XXX turn off handling of cases 1-2, as above. */
521	movb	$0xfe,kernel_fpu_lock
522	jmp	small_i586_bcopy
523
524	smsw	%dx
525	clts
526	subl	$108,%esp
527	fnsave	0(%esp)
528	jmp	4f
529
530i586_bc1:
531	smsw	%dx
532	clts
533	fninit				/* XXX should avoid needing this */
534
535	ALIGN_TEXT
5364:
537	pushl	%ecx
538#define	DCACHE_SIZE	8192
539	cmpl	$(DCACHE_SIZE-512)/2,%ecx
540	jbe	2f
541	movl	$(DCACHE_SIZE-512)/2,%ecx
5422:
543	subl	%ecx,0(%esp)
544	cmpl	$256,%ecx
545	jb	5f			/* XXX should prefetch if %ecx >= 32 */
546	pushl	%esi
547	pushl	%ecx
548	ALIGN_TEXT
5493:
550	movl	0(%esi),%eax
551	movl	32(%esi),%eax
552	movl	64(%esi),%eax
553	movl	96(%esi),%eax
554	movl	128(%esi),%eax
555	movl	160(%esi),%eax
556	movl	192(%esi),%eax
557	movl	224(%esi),%eax
558	addl	$256,%esi
559	subl	$256,%ecx
560	cmpl	$256,%ecx
561	jae	3b
562	popl	%ecx
563	popl	%esi
5645:
565	ALIGN_TEXT
566large_i586_bcopy_loop:
567	fildq	0(%esi)
568	fildq	8(%esi)
569	fildq	16(%esi)
570	fildq	24(%esi)
571	fildq	32(%esi)
572	fildq	40(%esi)
573	fildq	48(%esi)
574	fildq	56(%esi)
575	fistpq	56(%edi)
576	fistpq	48(%edi)
577	fistpq	40(%edi)
578	fistpq	32(%edi)
579	fistpq	24(%edi)
580	fistpq	16(%edi)
581	fistpq	8(%edi)
582	fistpq	0(%edi)
583	addl	$64,%esi
584	addl	$64,%edi
585	subl	$64,%ecx
586	cmpl	$64,%ecx
587	jae	large_i586_bcopy_loop
588	popl	%eax
589	addl	%eax,%ecx
590	cmpl	$64,%ecx
591	jae	4b
592
593	cmpl	$0,PCPU(FPCURTHREAD)
594	je	i586_bc2
595
596	/* XXX check that the condition for cases 1-2 stayed false. */
597i586_bcopy_oops:
598	int	$3
599	jmp	i586_bcopy_oops
600
601	frstor	0(%esp)
602	addl	$108,%esp
603i586_bc2:
604	lmsw	%dx
605	movb	$0xfe,kernel_fpu_lock
606
607/*
608 * This is a duplicate of the main part of generic_bcopy.  See the comments
609 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
610 * would mess up high resolution profiling.
611 */
612	ALIGN_TEXT
613small_i586_bcopy:
614	shrl	$2,%ecx
615	cld
616	rep
617	movsl
618	movl	20(%esp),%ecx
619	andl	$3,%ecx
620	rep
621	movsb
622	popl	%edi
623	popl	%esi
624	ret
625
626	ALIGN_TEXT
6271:
628	addl	%ecx,%edi
629	addl	%ecx,%esi
630	decl	%edi
631	decl	%esi
632	andl	$3,%ecx
633	std
634	rep
635	movsb
636	movl	20(%esp),%ecx
637	shrl	$2,%ecx
638	subl	$3,%esi
639	subl	$3,%edi
640	rep
641	movsl
642	popl	%edi
643	popl	%esi
644	cld
645	ret
646#endif /* I586_CPU && defined(DEV_NPX) */
647
648/*
649 * Note: memcpy does not support overlapping copies
650 */
651ENTRY(memcpy)
652	pushl	%edi
653	pushl	%esi
654	movl	12(%esp),%edi
655	movl	16(%esp),%esi
656	movl	20(%esp),%ecx
657	movl	%edi,%eax
658	shrl	$2,%ecx				/* copy by 32-bit words */
659	cld					/* nope, copy forwards */
660	rep
661	movsl
662	movl	20(%esp),%ecx
663	andl	$3,%ecx				/* any bytes left? */
664	rep
665	movsb
666	popl	%esi
667	popl	%edi
668	ret
669
670
671/*****************************************************************************/
672/* copyout and fubyte family                                                 */
673/*****************************************************************************/
674/*
675 * Access user memory from inside the kernel. These routines and possibly
676 * the math- and DOS emulators should be the only places that do this.
677 *
678 * We have to access the memory with user's permissions, so use a segment
679 * selector with RPL 3. For writes to user space we have to additionally
680 * check the PTE for write permission, because the 386 does not check
681 * write permissions when we are executing with EPL 0. The 486 does check
682 * this if the WP bit is set in CR0, so we can use a simpler version here.
683 *
684 * These routines set curpcb->onfault for the time they execute. When a
685 * protection violation occurs inside the functions, the trap handler
686 * returns to *curpcb->onfault instead of the function.
687 */
688
689/*
690 * copyout(from_kernel, to_user, len)  - MP SAFE (if not I386_CPU)
691 */
692ENTRY(copyout)
693	MEXITCOUNT
694	jmp	*copyout_vector
695
696ENTRY(generic_copyout)
697	movl	PCPU(CURPCB),%eax
698	movl	$copyout_fault,PCB_ONFAULT(%eax)
699	pushl	%esi
700	pushl	%edi
701	pushl	%ebx
702	movl	16(%esp),%esi
703	movl	20(%esp),%edi
704	movl	24(%esp),%ebx
705	testl	%ebx,%ebx			/* anything to do? */
706	jz	done_copyout
707
708	/*
709	 * Check explicitly for non-user addresses.  If 486 write protection
710	 * is being used, this check is essential because we are in kernel
711	 * mode so the h/w does not provide any protection against writing
712	 * kernel addresses.
713	 */
714
715	/*
716	 * First, prevent address wrapping.
717	 */
718	movl	%edi,%eax
719	addl	%ebx,%eax
720	jc	copyout_fault
721/*
722 * XXX STOP USING VM_MAXUSER_ADDRESS.
723 * It is an end address, not a max, so every time it is used correctly it
724 * looks like there is an off by one error, and of course it caused an off
725 * by one error in several places.
726 */
727	cmpl	$VM_MAXUSER_ADDRESS,%eax
728	ja	copyout_fault
729
730#ifdef I386_CPU
731
732/*
733 * We have to check each PTE for user write permission.
734 * The checking may cause a page fault, so it is important to set
735 * up everything for return via copyout_fault before here.
736 */
737	/* compute number of pages */
738	movl	%edi,%ecx
739	andl	$PAGE_MASK,%ecx
740	addl	%ebx,%ecx
741	decl	%ecx
742	shrl	$IDXSHIFT+2,%ecx
743	incl	%ecx
744
745	/* compute PTE offset for start address */
746	movl	%edi,%edx
747	shrl	$IDXSHIFT,%edx
748	andb	$0xfc,%dl
749
7501:
751	/* check PTE for each page */
752	leal	PTmap(%edx),%eax
753	shrl	$IDXSHIFT,%eax
754	andb	$0xfc,%al
755	testb	$PG_V,PTmap(%eax)		/* PTE page must be valid */
756	je	4f
757	movb	PTmap(%edx),%al
758	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
759	cmpb	$PG_V|PG_RW|PG_U,%al
760	je	2f
761
7624:
763	/* simulate a trap */
764	pushl	%edx
765	pushl	%ecx
766	shll	$IDXSHIFT,%edx
767	pushl	%edx
768	call	trapwrite			/* trapwrite(addr) */
769	popl	%edx
770	popl	%ecx
771	popl	%edx
772
773	testl	%eax,%eax			/* if not ok, return EFAULT */
774	jnz	copyout_fault
775
7762:
777	addl	$4,%edx
778	decl	%ecx
779	jnz	1b				/* check next page */
780#endif /* I386_CPU */
781
782	/* bcopy(%esi, %edi, %ebx) */
783	movl	%ebx,%ecx
784
785#if defined(I586_CPU) && defined(DEV_NPX)
786	ALIGN_TEXT
787slow_copyout:
788#endif
789	shrl	$2,%ecx
790	cld
791	rep
792	movsl
793	movb	%bl,%cl
794	andb	$3,%cl
795	rep
796	movsb
797
798done_copyout:
799	popl	%ebx
800	popl	%edi
801	popl	%esi
802	xorl	%eax,%eax
803	movl	PCPU(CURPCB),%edx
804	movl	%eax,PCB_ONFAULT(%edx)
805	ret
806
807	ALIGN_TEXT
808copyout_fault:
809	popl	%ebx
810	popl	%edi
811	popl	%esi
812	movl	PCPU(CURPCB),%edx
813	movl	$0,PCB_ONFAULT(%edx)
814	movl	$EFAULT,%eax
815	ret
816
817#if defined(I586_CPU) && defined(DEV_NPX)
818ENTRY(i586_copyout)
819	/*
820	 * Duplicated from generic_copyout.  Could be done a bit better.
821	 */
822	movl	PCPU(CURPCB),%eax
823	movl	$copyout_fault,PCB_ONFAULT(%eax)
824	pushl	%esi
825	pushl	%edi
826	pushl	%ebx
827	movl	16(%esp),%esi
828	movl	20(%esp),%edi
829	movl	24(%esp),%ebx
830	testl	%ebx,%ebx			/* anything to do? */
831	jz	done_copyout
832
833	/*
834	 * Check explicitly for non-user addresses.  If 486 write protection
835	 * is being used, this check is essential because we are in kernel
836	 * mode so the h/w does not provide any protection against writing
837	 * kernel addresses.
838	 */
839
840	/*
841	 * First, prevent address wrapping.
842	 */
843	movl	%edi,%eax
844	addl	%ebx,%eax
845	jc	copyout_fault
846/*
847 * XXX STOP USING VM_MAXUSER_ADDRESS.
848 * It is an end address, not a max, so every time it is used correctly it
849 * looks like there is an off by one error, and of course it caused an off
850 * by one error in several places.
851 */
852	cmpl	$VM_MAXUSER_ADDRESS,%eax
853	ja	copyout_fault
854
855	/* bcopy(%esi, %edi, %ebx) */
8563:
857	movl	%ebx,%ecx
858	/*
859	 * End of duplicated code.
860	 */
861
862	cmpl	$1024,%ecx
863	jb	slow_copyout
864
865	pushl	%ecx
866	call	fastmove
867	addl	$4,%esp
868	jmp	done_copyout
869#endif /* I586_CPU && defined(DEV_NPX) */
870
871/*
872 * copyin(from_user, to_kernel, len) - MP SAFE
873 */
874ENTRY(copyin)
875	MEXITCOUNT
876	jmp	*copyin_vector
877
878ENTRY(generic_copyin)
879	movl	PCPU(CURPCB),%eax
880	movl	$copyin_fault,PCB_ONFAULT(%eax)
881	pushl	%esi
882	pushl	%edi
883	movl	12(%esp),%esi			/* caddr_t from */
884	movl	16(%esp),%edi			/* caddr_t to */
885	movl	20(%esp),%ecx			/* size_t  len */
886
887	/*
888	 * make sure address is valid
889	 */
890	movl	%esi,%edx
891	addl	%ecx,%edx
892	jc	copyin_fault
893	cmpl	$VM_MAXUSER_ADDRESS,%edx
894	ja	copyin_fault
895
896#if defined(I586_CPU) && defined(DEV_NPX)
897	ALIGN_TEXT
898slow_copyin:
899#endif
900	movb	%cl,%al
901	shrl	$2,%ecx				/* copy longword-wise */
902	cld
903	rep
904	movsl
905	movb	%al,%cl
906	andb	$3,%cl				/* copy remaining bytes */
907	rep
908	movsb
909
910#if defined(I586_CPU) && defined(DEV_NPX)
911	ALIGN_TEXT
912done_copyin:
913#endif
914	popl	%edi
915	popl	%esi
916	xorl	%eax,%eax
917	movl	PCPU(CURPCB),%edx
918	movl	%eax,PCB_ONFAULT(%edx)
919	ret
920
921	ALIGN_TEXT
922copyin_fault:
923	popl	%edi
924	popl	%esi
925	movl	PCPU(CURPCB),%edx
926	movl	$0,PCB_ONFAULT(%edx)
927	movl	$EFAULT,%eax
928	ret
929
930#if defined(I586_CPU) && defined(DEV_NPX)
931ENTRY(i586_copyin)
932	/*
933	 * Duplicated from generic_copyin.  Could be done a bit better.
934	 */
935	movl	PCPU(CURPCB),%eax
936	movl	$copyin_fault,PCB_ONFAULT(%eax)
937	pushl	%esi
938	pushl	%edi
939	movl	12(%esp),%esi			/* caddr_t from */
940	movl	16(%esp),%edi			/* caddr_t to */
941	movl	20(%esp),%ecx			/* size_t  len */
942
943	/*
944	 * make sure address is valid
945	 */
946	movl	%esi,%edx
947	addl	%ecx,%edx
948	jc	copyin_fault
949	cmpl	$VM_MAXUSER_ADDRESS,%edx
950	ja	copyin_fault
951	/*
952	 * End of duplicated code.
953	 */
954
955	cmpl	$1024,%ecx
956	jb	slow_copyin
957
958	pushl	%ebx			/* XXX prepare for fastmove_fault */
959	pushl	%ecx
960	call	fastmove
961	addl	$8,%esp
962	jmp	done_copyin
963#endif /* I586_CPU && defined(DEV_NPX) */
964
965#if defined(I586_CPU) && defined(DEV_NPX)
966/* fastmove(src, dst, len)
967	src in %esi
968	dst in %edi
969	len in %ecx		XXX changed to on stack for profiling
970	uses %eax and %edx for tmp. storage
971 */
972/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
973ENTRY(fastmove)
974	pushl	%ebp
975	movl	%esp,%ebp
976	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
977
978	movl	8(%ebp),%ecx
979	cmpl	$63,%ecx
980	jbe	fastmove_tail
981
982	testl	$7,%esi	/* check if src addr is multiple of 8 */
983	jnz	fastmove_tail
984
985	testl	$7,%edi	/* check if dst addr is multiple of 8 */
986	jnz	fastmove_tail
987
988	/* XXX grab FPU context atomically. */
989	cli
990
991/* if (fpcurthread != NULL) { */
992	cmpl	$0,PCPU(FPCURTHREAD)
993	je	6f
994/*    fnsave(&curpcb->pcb_savefpu); */
995	movl	PCPU(CURPCB),%eax
996	fnsave	PCB_SAVEFPU(%eax)
997/*   FPCURTHREAD = NULL; */
998	movl	$0,PCPU(FPCURTHREAD)
999/* } */
10006:
1001/* now we own the FPU. */
1002
1003/*
1004 * The process' FP state is saved in the pcb, but if we get
1005 * switched, the cpu_switch() will store our FP state in the
1006 * pcb.  It should be possible to avoid all the copying for
1007 * this, e.g., by setting a flag to tell cpu_switch() to
1008 * save the state somewhere else.
1009 */
1010/* tmp = curpcb->pcb_savefpu; */
1011	movl	%ecx,-12(%ebp)
1012	movl	%esi,-8(%ebp)
1013	movl	%edi,-4(%ebp)
1014	movl	%esp,%edi
1015	movl	PCPU(CURPCB),%esi
1016	addl	$PCB_SAVEFPU,%esi
1017	cld
1018	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1019	rep
1020	movsl
1021	movl	-12(%ebp),%ecx
1022	movl	-8(%ebp),%esi
1023	movl	-4(%ebp),%edi
1024/* stop_emulating(); */
1025	clts
1026/* fpcurthread = curthread; */
1027	movl	PCPU(CURTHREAD),%eax
1028	movl	%eax,PCPU(FPCURTHREAD)
1029	movl	PCPU(CURPCB),%eax
1030
1031	/* XXX end of atomic FPU context grab. */
1032	sti
1033
1034	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10354:
1036	movl	%ecx,-12(%ebp)
1037	cmpl	$1792,%ecx
1038	jbe	2f
1039	movl	$1792,%ecx
10402:
1041	subl	%ecx,-12(%ebp)
1042	cmpl	$256,%ecx
1043	jb	5f
1044	movl	%ecx,-8(%ebp)
1045	movl	%esi,-4(%ebp)
1046	ALIGN_TEXT
10473:
1048	movl	0(%esi),%eax
1049	movl	32(%esi),%eax
1050	movl	64(%esi),%eax
1051	movl	96(%esi),%eax
1052	movl	128(%esi),%eax
1053	movl	160(%esi),%eax
1054	movl	192(%esi),%eax
1055	movl	224(%esi),%eax
1056	addl	$256,%esi
1057	subl	$256,%ecx
1058	cmpl	$256,%ecx
1059	jae	3b
1060	movl	-8(%ebp),%ecx
1061	movl	-4(%ebp),%esi
10625:
1063	ALIGN_TEXT
1064fastmove_loop:
1065	fildq	0(%esi)
1066	fildq	8(%esi)
1067	fildq	16(%esi)
1068	fildq	24(%esi)
1069	fildq	32(%esi)
1070	fildq	40(%esi)
1071	fildq	48(%esi)
1072	fildq	56(%esi)
1073	fistpq	56(%edi)
1074	fistpq	48(%edi)
1075	fistpq	40(%edi)
1076	fistpq	32(%edi)
1077	fistpq	24(%edi)
1078	fistpq	16(%edi)
1079	fistpq	8(%edi)
1080	fistpq	0(%edi)
1081	addl	$-64,%ecx
1082	addl	$64,%esi
1083	addl	$64,%edi
1084	cmpl	$63,%ecx
1085	ja	fastmove_loop
1086	movl	-12(%ebp),%eax
1087	addl	%eax,%ecx
1088	cmpl	$64,%ecx
1089	jae	4b
1090
1091	/* XXX ungrab FPU context atomically. */
1092	cli
1093
1094/* curpcb->pcb_savefpu = tmp; */
1095	movl	%ecx,-12(%ebp)
1096	movl	%esi,-8(%ebp)
1097	movl	%edi,-4(%ebp)
1098	movl	PCPU(CURPCB),%edi
1099	addl	$PCB_SAVEFPU,%edi
1100	movl	%esp,%esi
1101	cld
1102	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1103	rep
1104	movsl
1105	movl	-12(%ebp),%ecx
1106	movl	-8(%ebp),%esi
1107	movl	-4(%ebp),%edi
1108
1109/* start_emulating(); */
1110	smsw	%ax
1111	orb	$CR0_TS,%al
1112	lmsw	%ax
1113/* fpcurthread = NULL; */
1114	movl	$0,PCPU(FPCURTHREAD)
1115
1116	/* XXX end of atomic FPU context ungrab. */
1117	sti
1118
1119	ALIGN_TEXT
1120fastmove_tail:
1121	movl	PCPU(CURPCB),%eax
1122	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1123
1124	movb	%cl,%al
1125	shrl	$2,%ecx				/* copy longword-wise */
1126	cld
1127	rep
1128	movsl
1129	movb	%al,%cl
1130	andb	$3,%cl				/* copy remaining bytes */
1131	rep
1132	movsb
1133
1134	movl	%ebp,%esp
1135	popl	%ebp
1136	ret
1137
1138	ALIGN_TEXT
1139fastmove_fault:
1140	/* XXX ungrab FPU context atomically. */
1141	cli
1142
1143	movl	PCPU(CURPCB),%edi
1144	addl	$PCB_SAVEFPU,%edi
1145	movl	%esp,%esi
1146	cld
1147	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1148	rep
1149	movsl
1150
1151	smsw	%ax
1152	orb	$CR0_TS,%al
1153	lmsw	%ax
1154	movl	$0,PCPU(FPCURTHREAD)
1155
1156	/* XXX end of atomic FPU context ungrab. */
1157	sti
1158
1159fastmove_tail_fault:
1160	movl	%ebp,%esp
1161	popl	%ebp
1162	addl	$8,%esp
1163	popl	%ebx
1164	popl	%edi
1165	popl	%esi
1166	movl	PCPU(CURPCB),%edx
1167	movl	$0,PCB_ONFAULT(%edx)
1168	movl	$EFAULT,%eax
1169	ret
1170#endif /* I586_CPU && defined(DEV_NPX) */
1171
1172/*
1173 * casuptr.  Compare and set user pointer.  Returns -1 or the current value.
1174 */
1175ENTRY(casuptr)
1176	movl	PCPU(CURPCB),%ecx
1177	movl	$fusufault,PCB_ONFAULT(%ecx)
1178	movl	4(%esp),%edx			/* dst */
1179	movl	8(%esp),%eax			/* old */
1180	movl	12(%esp),%ecx			/* new */
1181
1182	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1183	ja	fusufault
1184
1185#ifdef SMP
1186	lock
1187#endif
1188	cmpxchgl %ecx, (%edx)			/* Compare and set. */
1189
1190	/*
1191	 * The old value is in %eax.  If the store succeeded it will be the
1192	 * value we expected (old) from before the store, otherwise it will
1193	 * be the current value.
1194	 */
1195
1196	movl	PCPU(CURPCB),%ecx
1197	movl	$fusufault,PCB_ONFAULT(%ecx)
1198	movl	$0,PCB_ONFAULT(%ecx)
1199	ret
1200
1201/*
1202 * fu{byte,sword,word} - MP SAFE
1203 *
1204 *	Fetch a byte (sword, word) from user memory
1205 */
1206ENTRY(fuword)
1207	movl	PCPU(CURPCB),%ecx
1208	movl	$fusufault,PCB_ONFAULT(%ecx)
1209	movl	4(%esp),%edx			/* from */
1210
1211	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1212	ja	fusufault
1213
1214	movl	(%edx),%eax
1215	movl	$0,PCB_ONFAULT(%ecx)
1216	ret
1217
1218ENTRY(fuword32)
1219	jmp	fuword
1220
1221/*
1222 * These two routines are called from the profiling code, potentially
1223 * at interrupt time. If they fail, that's okay, good things will
1224 * happen later. Fail all the time for now - until the trap code is
1225 * able to deal with this.
1226 */
1227ALTENTRY(suswintr)
1228ENTRY(fuswintr)
1229	movl	$-1,%eax
1230	ret
1231
1232/*
1233 * fuword16 - MP SAFE
1234 */
1235ENTRY(fuword16)
1236	movl	PCPU(CURPCB),%ecx
1237	movl	$fusufault,PCB_ONFAULT(%ecx)
1238	movl	4(%esp),%edx
1239
1240	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1241	ja	fusufault
1242
1243	movzwl	(%edx),%eax
1244	movl	$0,PCB_ONFAULT(%ecx)
1245	ret
1246
1247/*
1248 * fubyte - MP SAFE
1249 */
1250ENTRY(fubyte)
1251	movl	PCPU(CURPCB),%ecx
1252	movl	$fusufault,PCB_ONFAULT(%ecx)
1253	movl	4(%esp),%edx
1254
1255	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1256	ja	fusufault
1257
1258	movzbl	(%edx),%eax
1259	movl	$0,PCB_ONFAULT(%ecx)
1260	ret
1261
1262	ALIGN_TEXT
1263fusufault:
1264	movl	PCPU(CURPCB),%ecx
1265	xorl	%eax,%eax
1266	movl	%eax,PCB_ONFAULT(%ecx)
1267	decl	%eax
1268	ret
1269
1270/*
1271 * su{byte,sword,word} - MP SAFE (if not I386_CPU)
1272 *
1273 *	Write a byte (word, longword) to user memory
1274 */
1275ENTRY(suword)
1276	movl	PCPU(CURPCB),%ecx
1277	movl	$fusufault,PCB_ONFAULT(%ecx)
1278	movl	4(%esp),%edx
1279
1280#ifdef I386_CPU
1281
1282	/* XXX - page boundary crossing is still not handled */
1283	movl	%edx,%eax
1284	shrl	$IDXSHIFT,%edx
1285	andb	$0xfc,%dl
1286
1287	leal	PTmap(%edx),%ecx
1288	shrl	$IDXSHIFT,%ecx
1289	andb	$0xfc,%cl
1290	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1291	je	4f
1292	movb	PTmap(%edx),%dl
1293	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1294	cmpb	$PG_V|PG_RW|PG_U,%dl
1295	je	1f
1296
12974:
1298	/* simulate a trap */
1299	pushl	%eax
1300	call	trapwrite
1301	popl	%edx				/* remove junk parameter from stack */
1302	testl	%eax,%eax
1303	jnz	fusufault
13041:
1305	movl	4(%esp),%edx
1306#endif
1307
1308	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1309	ja	fusufault
1310
1311	movl	8(%esp),%eax
1312	movl	%eax,(%edx)
1313	xorl	%eax,%eax
1314	movl	PCPU(CURPCB),%ecx
1315	movl	%eax,PCB_ONFAULT(%ecx)
1316	ret
1317
1318ENTRY(suword32)
1319	jmp	suword
1320
1321/*
1322 * suword16 - MP SAFE (if not I386_CPU)
1323 */
1324ENTRY(suword16)
1325	movl	PCPU(CURPCB),%ecx
1326	movl	$fusufault,PCB_ONFAULT(%ecx)
1327	movl	4(%esp),%edx
1328
1329#ifdef I386_CPU
1330
1331	/* XXX - page boundary crossing is still not handled */
1332	movl	%edx,%eax
1333	shrl	$IDXSHIFT,%edx
1334	andb	$0xfc,%dl
1335
1336	leal	PTmap(%edx),%ecx
1337	shrl	$IDXSHIFT,%ecx
1338	andb	$0xfc,%cl
1339	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1340	je	4f
1341	movb	PTmap(%edx),%dl
1342	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1343	cmpb	$PG_V|PG_RW|PG_U,%dl
1344	je	1f
1345
13464:
1347	/* simulate a trap */
1348	pushl	%eax
1349	call	trapwrite
1350	popl	%edx				/* remove junk parameter from stack */
1351	testl	%eax,%eax
1352	jnz	fusufault
13531:
1354	movl	4(%esp),%edx
1355#endif
1356
1357	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1358	ja	fusufault
1359
1360	movw	8(%esp),%ax
1361	movw	%ax,(%edx)
1362	xorl	%eax,%eax
1363	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1364	movl	%eax,PCB_ONFAULT(%ecx)
1365	ret
1366
1367/*
1368 * subyte - MP SAFE (if not I386_CPU)
1369 */
1370ENTRY(subyte)
1371	movl	PCPU(CURPCB),%ecx
1372	movl	$fusufault,PCB_ONFAULT(%ecx)
1373	movl	4(%esp),%edx
1374
1375#ifdef I386_CPU
1376
1377	movl	%edx,%eax
1378	shrl	$IDXSHIFT,%edx
1379	andb	$0xfc,%dl
1380
1381	leal	PTmap(%edx),%ecx
1382	shrl	$IDXSHIFT,%ecx
1383	andb	$0xfc,%cl
1384	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1385	je	4f
1386	movb	PTmap(%edx),%dl
1387	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1388	cmpb	$PG_V|PG_RW|PG_U,%dl
1389	je	1f
1390
13914:
1392	/* simulate a trap */
1393	pushl	%eax
1394	call	trapwrite
1395	popl	%edx				/* remove junk parameter from stack */
1396	testl	%eax,%eax
1397	jnz	fusufault
13981:
1399	movl	4(%esp),%edx
1400#endif
1401
1402	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1403	ja	fusufault
1404
1405	movb	8(%esp),%al
1406	movb	%al,(%edx)
1407	xorl	%eax,%eax
1408	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1409	movl	%eax,PCB_ONFAULT(%ecx)
1410	ret
1411
1412/*
1413 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
1414 *
1415 *	copy a string from from to to, stop when a 0 character is reached.
1416 *	return ENAMETOOLONG if string is longer than maxlen, and
1417 *	EFAULT on protection violations. If lencopied is non-zero,
1418 *	return the actual length in *lencopied.
1419 */
1420ENTRY(copyinstr)
1421	pushl	%esi
1422	pushl	%edi
1423	movl	PCPU(CURPCB),%ecx
1424	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1425
1426	movl	12(%esp),%esi			/* %esi = from */
1427	movl	16(%esp),%edi			/* %edi = to */
1428	movl	20(%esp),%edx			/* %edx = maxlen */
1429
1430	movl	$VM_MAXUSER_ADDRESS,%eax
1431
1432	/* make sure 'from' is within bounds */
1433	subl	%esi,%eax
1434	jbe	cpystrflt
1435
1436	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1437	cmpl	%edx,%eax
1438	jae	1f
1439	movl	%eax,%edx
1440	movl	%eax,20(%esp)
14411:
1442	incl	%edx
1443	cld
1444
14452:
1446	decl	%edx
1447	jz	3f
1448
1449	lodsb
1450	stosb
1451	orb	%al,%al
1452	jnz	2b
1453
1454	/* Success -- 0 byte reached */
1455	decl	%edx
1456	xorl	%eax,%eax
1457	jmp	cpystrflt_x
14583:
1459	/* edx is zero - return ENAMETOOLONG or EFAULT */
1460	cmpl	$VM_MAXUSER_ADDRESS,%esi
1461	jae	cpystrflt
14624:
1463	movl	$ENAMETOOLONG,%eax
1464	jmp	cpystrflt_x
1465
1466cpystrflt:
1467	movl	$EFAULT,%eax
1468
1469cpystrflt_x:
1470	/* set *lencopied and return %eax */
1471	movl	PCPU(CURPCB),%ecx
1472	movl	$0,PCB_ONFAULT(%ecx)
1473	movl	20(%esp),%ecx
1474	subl	%edx,%ecx
1475	movl	24(%esp),%edx
1476	testl	%edx,%edx
1477	jz	1f
1478	movl	%ecx,(%edx)
14791:
1480	popl	%edi
1481	popl	%esi
1482	ret
1483
1484
1485/*
1486 * copystr(from, to, maxlen, int *lencopied) - MP SAFE
1487 */
1488ENTRY(copystr)
1489	pushl	%esi
1490	pushl	%edi
1491
1492	movl	12(%esp),%esi			/* %esi = from */
1493	movl	16(%esp),%edi			/* %edi = to */
1494	movl	20(%esp),%edx			/* %edx = maxlen */
1495	incl	%edx
1496	cld
14971:
1498	decl	%edx
1499	jz	4f
1500	lodsb
1501	stosb
1502	orb	%al,%al
1503	jnz	1b
1504
1505	/* Success -- 0 byte reached */
1506	decl	%edx
1507	xorl	%eax,%eax
1508	jmp	6f
15094:
1510	/* edx is zero -- return ENAMETOOLONG */
1511	movl	$ENAMETOOLONG,%eax
1512
15136:
1514	/* set *lencopied and return %eax */
1515	movl	20(%esp),%ecx
1516	subl	%edx,%ecx
1517	movl	24(%esp),%edx
1518	testl	%edx,%edx
1519	jz	7f
1520	movl	%ecx,(%edx)
15217:
1522	popl	%edi
1523	popl	%esi
1524	ret
1525
1526ENTRY(bcmp)
1527	pushl	%edi
1528	pushl	%esi
1529	movl	12(%esp),%edi
1530	movl	16(%esp),%esi
1531	movl	20(%esp),%edx
1532	xorl	%eax,%eax
1533
1534	movl	%edx,%ecx
1535	shrl	$2,%ecx
1536	cld					/* compare forwards */
1537	repe
1538	cmpsl
1539	jne	1f
1540
1541	movl	%edx,%ecx
1542	andl	$3,%ecx
1543	repe
1544	cmpsb
1545	je	2f
15461:
1547	incl	%eax
15482:
1549	popl	%esi
1550	popl	%edi
1551	ret
1552
1553
1554/*
1555 * Handling of special 386 registers and descriptor tables etc
1556 */
1557/* void lgdt(struct region_descriptor *rdp); */
1558ENTRY(lgdt)
1559	/* reload the descriptor table */
1560	movl	4(%esp),%eax
1561	lgdt	(%eax)
1562
1563	/* flush the prefetch q */
1564	jmp	1f
1565	nop
15661:
1567	/* reload "stale" selectors */
1568	movl	$KDSEL,%eax
1569	mov	%ax,%ds
1570	mov	%ax,%es
1571	mov	%ax,%gs
1572	mov	%ax,%ss
1573	movl	$KPSEL,%eax
1574	mov	%ax,%fs
1575
1576	/* reload code selector by turning return into intersegmental return */
1577	movl	(%esp),%eax
1578	pushl	%eax
1579	movl	$KCSEL,4(%esp)
1580	lret
1581
1582/* ssdtosd(*ssdp,*sdp) */
1583ENTRY(ssdtosd)
1584	pushl	%ebx
1585	movl	8(%esp),%ecx
1586	movl	8(%ecx),%ebx
1587	shll	$16,%ebx
1588	movl	(%ecx),%edx
1589	roll	$16,%edx
1590	movb	%dh,%bl
1591	movb	%dl,%bh
1592	rorl	$8,%ebx
1593	movl	4(%ecx),%eax
1594	movw	%ax,%dx
1595	andl	$0xf0000,%eax
1596	orl	%eax,%ebx
1597	movl	12(%esp),%ecx
1598	movl	%edx,(%ecx)
1599	movl	%ebx,4(%ecx)
1600	popl	%ebx
1601	ret
1602
1603/* void reset_dbregs() */
1604ENTRY(reset_dbregs)
1605	movl    $0,%eax
1606	movl    %eax,%dr7     /* disable all breapoints first */
1607	movl    %eax,%dr0
1608	movl    %eax,%dr1
1609	movl    %eax,%dr2
1610	movl    %eax,%dr3
1611	movl    %eax,%dr6
1612	ret
1613
1614/*****************************************************************************/
1615/* setjump, longjump                                                         */
1616/*****************************************************************************/
1617
1618ENTRY(setjmp)
1619	movl	4(%esp),%eax
1620	movl	%ebx,(%eax)			/* save ebx */
1621	movl	%esp,4(%eax)			/* save esp */
1622	movl	%ebp,8(%eax)			/* save ebp */
1623	movl	%esi,12(%eax)			/* save esi */
1624	movl	%edi,16(%eax)			/* save edi */
1625	movl	(%esp),%edx			/* get rta */
1626	movl	%edx,20(%eax)			/* save eip */
1627	xorl	%eax,%eax			/* return(0); */
1628	ret
1629
1630ENTRY(longjmp)
1631	movl	4(%esp),%eax
1632	movl	(%eax),%ebx			/* restore ebx */
1633	movl	4(%eax),%esp			/* restore esp */
1634	movl	8(%eax),%ebp			/* restore ebp */
1635	movl	12(%eax),%esi			/* restore esi */
1636	movl	16(%eax),%edi			/* restore edi */
1637	movl	20(%eax),%edx			/* get rta */
1638	movl	%edx,(%esp)			/* put in return frame */
1639	xorl	%eax,%eax			/* return(1); */
1640	incl	%eax
1641	ret
1642
1643/*
1644 * Support for BB-profiling (gcc -a).  The kernbb program will extract
1645 * the data from the kernel.
1646 */
1647
1648	.data
1649	ALIGN_DATA
1650	.globl bbhead
1651bbhead:
1652	.long 0
1653
1654	.text
1655NON_GPROF_ENTRY(__bb_init_func)
1656	movl	4(%esp),%eax
1657	movl	$1,(%eax)
1658	movl	bbhead,%edx
1659	movl	%edx,16(%eax)
1660	movl	%eax,bbhead
1661	NON_GPROF_RET
1662