support.s revision 113090
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD: head/sys/i386/i386/support.s 113090 2003-04-04 17:29:55Z des $
34 */
35
36#include "opt_npx.h"
37
38#include <machine/asmacros.h>
39#include <machine/cputypes.h>
40#include <machine/pmap.h>
41#include <machine/specialreg.h>
42
43#include "assym.s"
44
45#define IDXSHIFT	10
46
47	.data
48	.globl	bcopy_vector
49bcopy_vector:
50	.long	generic_bcopy
51	.globl	bzero_vector
52bzero_vector:
53	.long	generic_bzero
54	.globl	copyin_vector
55copyin_vector:
56	.long	generic_copyin
57	.globl	copyout_vector
58copyout_vector:
59	.long	generic_copyout
60#if defined(I586_CPU) && defined(DEV_NPX)
61kernel_fpu_lock:
62	.byte	0xfe
63	.space	3
64#endif
65
66	.text
67
68/*
69 * bcopy family
70 * void bzero(void *buf, u_int len)
71 */
72
73ENTRY(bzero)
74	MEXITCOUNT
75	jmp	*bzero_vector
76
77ENTRY(generic_bzero)
78	pushl	%edi
79	movl	8(%esp),%edi
80	movl	12(%esp),%ecx
81	xorl	%eax,%eax
82	shrl	$2,%ecx
83	cld
84	rep
85	stosl
86	movl	12(%esp),%ecx
87	andl	$3,%ecx
88	rep
89	stosb
90	popl	%edi
91	ret
92
93#ifdef I486_CPU
94ENTRY(i486_bzero)
95	movl	4(%esp),%edx
96	movl	8(%esp),%ecx
97	xorl	%eax,%eax
98/*
99 * do 64 byte chunks first
100 *
101 * XXX this is probably over-unrolled at least for DX2's
102 */
1032:
104	cmpl	$64,%ecx
105	jb	3f
106	movl	%eax,(%edx)
107	movl	%eax,4(%edx)
108	movl	%eax,8(%edx)
109	movl	%eax,12(%edx)
110	movl	%eax,16(%edx)
111	movl	%eax,20(%edx)
112	movl	%eax,24(%edx)
113	movl	%eax,28(%edx)
114	movl	%eax,32(%edx)
115	movl	%eax,36(%edx)
116	movl	%eax,40(%edx)
117	movl	%eax,44(%edx)
118	movl	%eax,48(%edx)
119	movl	%eax,52(%edx)
120	movl	%eax,56(%edx)
121	movl	%eax,60(%edx)
122	addl	$64,%edx
123	subl	$64,%ecx
124	jnz	2b
125	ret
126
127/*
128 * do 16 byte chunks
129 */
130	SUPERALIGN_TEXT
1313:
132	cmpl	$16,%ecx
133	jb	4f
134	movl	%eax,(%edx)
135	movl	%eax,4(%edx)
136	movl	%eax,8(%edx)
137	movl	%eax,12(%edx)
138	addl	$16,%edx
139	subl	$16,%ecx
140	jnz	3b
141	ret
142
143/*
144 * do 4 byte chunks
145 */
146	SUPERALIGN_TEXT
1474:
148	cmpl	$4,%ecx
149	jb	5f
150	movl	%eax,(%edx)
151	addl	$4,%edx
152	subl	$4,%ecx
153	jnz	4b
154	ret
155
156/*
157 * do 1 byte chunks
158 * a jump table seems to be faster than a loop or more range reductions
159 *
160 * XXX need a const section for non-text
161 */
162	.data
163jtab:
164	.long	do0
165	.long	do1
166	.long	do2
167	.long	do3
168
169	.text
170	SUPERALIGN_TEXT
1715:
172	jmp	*jtab(,%ecx,4)
173
174	SUPERALIGN_TEXT
175do3:
176	movw	%ax,(%edx)
177	movb	%al,2(%edx)
178	ret
179
180	SUPERALIGN_TEXT
181do2:
182	movw	%ax,(%edx)
183	ret
184
185	SUPERALIGN_TEXT
186do1:
187	movb	%al,(%edx)
188	ret
189
190	SUPERALIGN_TEXT
191do0:
192	ret
193#endif
194
195#if defined(I586_CPU) && defined(DEV_NPX)
196ENTRY(i586_bzero)
197	movl	4(%esp),%edx
198	movl	8(%esp),%ecx
199
200	/*
201	 * The FPU register method is twice as fast as the integer register
202	 * method unless the target is in the L1 cache and we pre-allocate a
203	 * cache line for it (then the integer register method is 4-5 times
204	 * faster).  However, we never pre-allocate cache lines, since that
205	 * would make the integer method 25% or more slower for the common
206	 * case when the target isn't in either the L1 cache or the L2 cache.
207	 * Thus we normally use the FPU register method unless the overhead
208	 * would be too large.
209	 */
210	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
211	jb	intreg_i586_bzero
212
213	/*
214	 * The FPU registers may belong to an application or to fastmove()
215	 * or to another invocation of bcopy() or ourself in a higher level
216	 * interrupt or trap handler.  Preserving the registers is
217	 * complicated since we avoid it if possible at all levels.  We
218	 * want to localize the complications even when that increases them.
219	 * Here the extra work involves preserving CR0_TS in TS.
220	 * `fpcurthread != NULL' is supposed to be the condition that all the
221	 * FPU resources belong to an application, but fpcurthread and CR0_TS
222	 * aren't set atomically enough for this condition to work in
223	 * interrupt handlers.
224	 *
225	 * Case 1: FPU registers belong to the application: we must preserve
226	 * the registers if we use them, so we only use the FPU register
227	 * method if the target size is large enough to amortize the extra
228	 * overhead for preserving them.  CR0_TS must be preserved although
229	 * it is very likely to end up as set.
230	 *
231	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
232	 * makes the registers look like they belong to an application so
233	 * that cpu_switch() and savectx() don't have to know about it, so
234	 * this case reduces to case 1.
235	 *
236	 * Case 3: FPU registers belong to the kernel: don't use the FPU
237	 * register method.  This case is unlikely, and supporting it would
238	 * be more complicated and might take too much stack.
239	 *
240	 * Case 4: FPU registers don't belong to anyone: the FPU registers
241	 * don't need to be preserved, so we always use the FPU register
242	 * method.  CR0_TS must be preserved although it is very likely to
243	 * always end up as clear.
244	 */
245	cmpl	$0,PCPU(FPCURTHREAD)
246	je	i586_bz1
247
248	/*
249	 * XXX don't use the FPU for cases 1 and 2, since preemptive
250	 * scheduling of ithreads broke these cases.  Note that we can
251	 * no longer get here from an interrupt handler, since the
252	 * context sitch to the interrupt handler will have saved the
253	 * FPU state.
254	 */
255	jmp	intreg_i586_bzero
256
257	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
258	jb	intreg_i586_bzero
259	sarb	$1,kernel_fpu_lock
260	jc	intreg_i586_bzero
261	smsw	%ax
262	clts
263	subl	$108,%esp
264	fnsave	0(%esp)
265	jmp	i586_bz2
266
267i586_bz1:
268	sarb	$1,kernel_fpu_lock
269	jc	intreg_i586_bzero
270	smsw	%ax
271	clts
272	fninit				/* XXX should avoid needing this */
273i586_bz2:
274	fldz
275
276	/*
277	 * Align to an 8 byte boundary (misalignment in the main loop would
278	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
279	 * already aligned) by always zeroing 8 bytes and using the part up
280	 * to the _next_ alignment position.
281	 */
282	fstl	0(%edx)
283	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
284	addl	$8,%edx
285	andl	$~7,%edx
286	subl	%edx,%ecx
287
288	/*
289	 * Similarly align `len' to a multiple of 8.
290	 */
291	fstl	-8(%edx,%ecx)
292	decl	%ecx
293	andl	$~7,%ecx
294
295	/*
296	 * This wouldn't be any faster if it were unrolled, since the loop
297	 * control instructions are much faster than the fstl and/or done
298	 * in parallel with it so their overhead is insignificant.
299	 */
300fpureg_i586_bzero_loop:
301	fstl	0(%edx)
302	addl	$8,%edx
303	subl	$8,%ecx
304	cmpl	$8,%ecx
305	jae	fpureg_i586_bzero_loop
306
307	cmpl	$0,PCPU(FPCURTHREAD)
308	je	i586_bz3
309
310	/* XXX check that the condition for cases 1-2 stayed false. */
311i586_bzero_oops:
312	int	$3
313	jmp	i586_bzero_oops
314
315	frstor	0(%esp)
316	addl	$108,%esp
317	lmsw	%ax
318	movb	$0xfe,kernel_fpu_lock
319	ret
320
321i586_bz3:
322	fstp	%st(0)
323	lmsw	%ax
324	movb	$0xfe,kernel_fpu_lock
325	ret
326
327intreg_i586_bzero:
328	/*
329	 * `rep stos' seems to be the best method in practice for small
330	 * counts.  Fancy methods usually take too long to start up due
331	 * to cache and BTB misses.
332	 */
333	pushl	%edi
334	movl	%edx,%edi
335	xorl	%eax,%eax
336	shrl	$2,%ecx
337	cld
338	rep
339	stosl
340	movl	12(%esp),%ecx
341	andl	$3,%ecx
342	jne	1f
343	popl	%edi
344	ret
345
3461:
347	rep
348	stosb
349	popl	%edi
350	ret
351#endif /* I586_CPU && defined(DEV_NPX) */
352
353ENTRY(i686_pagezero)
354	pushl	%edi
355	pushl	%ebx
356
357	movl	12(%esp), %edi
358	movl	$1024, %ecx
359	cld
360
361	ALIGN_TEXT
3621:
363	xorl	%eax, %eax
364	repe
365	scasl
366	jnz	2f
367
368	popl	%ebx
369	popl	%edi
370	ret
371
372	ALIGN_TEXT
373
3742:
375	incl	%ecx
376	subl	$4, %edi
377
378	movl	%ecx, %edx
379	cmpl	$16, %ecx
380
381	jge	3f
382
383	movl	%edi, %ebx
384	andl	$0x3f, %ebx
385	shrl	%ebx
386	shrl	%ebx
387	movl	$16, %ecx
388	subl	%ebx, %ecx
389
3903:
391	subl	%ecx, %edx
392	rep
393	stosl
394
395	movl	%edx, %ecx
396	testl	%edx, %edx
397	jnz	1b
398
399	popl	%ebx
400	popl	%edi
401	ret
402
403/* fillw(pat, base, cnt) */
404ENTRY(fillw)
405	pushl	%edi
406	movl	8(%esp),%eax
407	movl	12(%esp),%edi
408	movl	16(%esp),%ecx
409	cld
410	rep
411	stosw
412	popl	%edi
413	ret
414
415ENTRY(bcopyb)
416	pushl	%esi
417	pushl	%edi
418	movl	12(%esp),%esi
419	movl	16(%esp),%edi
420	movl	20(%esp),%ecx
421	movl	%edi,%eax
422	subl	%esi,%eax
423	cmpl	%ecx,%eax			/* overlapping && src < dst? */
424	jb	1f
425	cld					/* nope, copy forwards */
426	rep
427	movsb
428	popl	%edi
429	popl	%esi
430	ret
431
432	ALIGN_TEXT
4331:
434	addl	%ecx,%edi			/* copy backwards. */
435	addl	%ecx,%esi
436	decl	%edi
437	decl	%esi
438	std
439	rep
440	movsb
441	popl	%edi
442	popl	%esi
443	cld
444	ret
445
446ENTRY(bcopy)
447	MEXITCOUNT
448	jmp	*bcopy_vector
449
450/*
451 * generic_bcopy(src, dst, cnt)
452 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
453 */
454ENTRY(generic_bcopy)
455	pushl	%esi
456	pushl	%edi
457	movl	12(%esp),%esi
458	movl	16(%esp),%edi
459	movl	20(%esp),%ecx
460
461	movl	%edi,%eax
462	subl	%esi,%eax
463	cmpl	%ecx,%eax			/* overlapping && src < dst? */
464	jb	1f
465
466	shrl	$2,%ecx				/* copy by 32-bit words */
467	cld					/* nope, copy forwards */
468	rep
469	movsl
470	movl	20(%esp),%ecx
471	andl	$3,%ecx				/* any bytes left? */
472	rep
473	movsb
474	popl	%edi
475	popl	%esi
476	ret
477
478	ALIGN_TEXT
4791:
480	addl	%ecx,%edi			/* copy backwards */
481	addl	%ecx,%esi
482	decl	%edi
483	decl	%esi
484	andl	$3,%ecx				/* any fractional bytes? */
485	std
486	rep
487	movsb
488	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
489	shrl	$2,%ecx
490	subl	$3,%esi
491	subl	$3,%edi
492	rep
493	movsl
494	popl	%edi
495	popl	%esi
496	cld
497	ret
498
499#if defined(I586_CPU) && defined(DEV_NPX)
500ENTRY(i586_bcopy)
501	pushl	%esi
502	pushl	%edi
503	movl	12(%esp),%esi
504	movl	16(%esp),%edi
505	movl	20(%esp),%ecx
506
507	movl	%edi,%eax
508	subl	%esi,%eax
509	cmpl	%ecx,%eax			/* overlapping && src < dst? */
510	jb	1f
511
512	cmpl	$1024,%ecx
513	jb	small_i586_bcopy
514
515	sarb	$1,kernel_fpu_lock
516	jc	small_i586_bcopy
517	cmpl	$0,PCPU(FPCURTHREAD)
518	je	i586_bc1
519
520	/* XXX turn off handling of cases 1-2, as above. */
521	movb	$0xfe,kernel_fpu_lock
522	jmp	small_i586_bcopy
523
524	smsw	%dx
525	clts
526	subl	$108,%esp
527	fnsave	0(%esp)
528	jmp	4f
529
530i586_bc1:
531	smsw	%dx
532	clts
533	fninit				/* XXX should avoid needing this */
534
535	ALIGN_TEXT
5364:
537	pushl	%ecx
538#define	DCACHE_SIZE	8192
539	cmpl	$(DCACHE_SIZE-512)/2,%ecx
540	jbe	2f
541	movl	$(DCACHE_SIZE-512)/2,%ecx
5422:
543	subl	%ecx,0(%esp)
544	cmpl	$256,%ecx
545	jb	5f			/* XXX should prefetch if %ecx >= 32 */
546	pushl	%esi
547	pushl	%ecx
548	ALIGN_TEXT
5493:
550	movl	0(%esi),%eax
551	movl	32(%esi),%eax
552	movl	64(%esi),%eax
553	movl	96(%esi),%eax
554	movl	128(%esi),%eax
555	movl	160(%esi),%eax
556	movl	192(%esi),%eax
557	movl	224(%esi),%eax
558	addl	$256,%esi
559	subl	$256,%ecx
560	cmpl	$256,%ecx
561	jae	3b
562	popl	%ecx
563	popl	%esi
5645:
565	ALIGN_TEXT
566large_i586_bcopy_loop:
567	fildq	0(%esi)
568	fildq	8(%esi)
569	fildq	16(%esi)
570	fildq	24(%esi)
571	fildq	32(%esi)
572	fildq	40(%esi)
573	fildq	48(%esi)
574	fildq	56(%esi)
575	fistpq	56(%edi)
576	fistpq	48(%edi)
577	fistpq	40(%edi)
578	fistpq	32(%edi)
579	fistpq	24(%edi)
580	fistpq	16(%edi)
581	fistpq	8(%edi)
582	fistpq	0(%edi)
583	addl	$64,%esi
584	addl	$64,%edi
585	subl	$64,%ecx
586	cmpl	$64,%ecx
587	jae	large_i586_bcopy_loop
588	popl	%eax
589	addl	%eax,%ecx
590	cmpl	$64,%ecx
591	jae	4b
592
593	cmpl	$0,PCPU(FPCURTHREAD)
594	je	i586_bc2
595
596	/* XXX check that the condition for cases 1-2 stayed false. */
597i586_bcopy_oops:
598	int	$3
599	jmp	i586_bcopy_oops
600
601	frstor	0(%esp)
602	addl	$108,%esp
603i586_bc2:
604	lmsw	%dx
605	movb	$0xfe,kernel_fpu_lock
606
607/*
608 * This is a duplicate of the main part of generic_bcopy.  See the comments
609 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
610 * would mess up high resolution profiling.
611 */
612	ALIGN_TEXT
613small_i586_bcopy:
614	shrl	$2,%ecx
615	cld
616	rep
617	movsl
618	movl	20(%esp),%ecx
619	andl	$3,%ecx
620	rep
621	movsb
622	popl	%edi
623	popl	%esi
624	ret
625
626	ALIGN_TEXT
6271:
628	addl	%ecx,%edi
629	addl	%ecx,%esi
630	decl	%edi
631	decl	%esi
632	andl	$3,%ecx
633	std
634	rep
635	movsb
636	movl	20(%esp),%ecx
637	shrl	$2,%ecx
638	subl	$3,%esi
639	subl	$3,%edi
640	rep
641	movsl
642	popl	%edi
643	popl	%esi
644	cld
645	ret
646#endif /* I586_CPU && defined(DEV_NPX) */
647
648/*
649 * Note: memcpy does not support overlapping copies
650 */
651ENTRY(memcpy)
652	pushl	%edi
653	pushl	%esi
654	movl	12(%esp),%edi
655	movl	16(%esp),%esi
656	movl	20(%esp),%ecx
657	movl	%edi,%eax
658	shrl	$2,%ecx				/* copy by 32-bit words */
659	cld					/* nope, copy forwards */
660	rep
661	movsl
662	movl	20(%esp),%ecx
663	andl	$3,%ecx				/* any bytes left? */
664	rep
665	movsb
666	popl	%esi
667	popl	%edi
668	ret
669
670
671/*****************************************************************************/
672/* copyout and fubyte family                                                 */
673/*****************************************************************************/
674/*
675 * Access user memory from inside the kernel. These routines and possibly
676 * the math- and DOS emulators should be the only places that do this.
677 *
678 * We have to access the memory with user's permissions, so use a segment
679 * selector with RPL 3. For writes to user space we have to additionally
680 * check the PTE for write permission, because the 386 does not check
681 * write permissions when we are executing with EPL 0. The 486 does check
682 * this if the WP bit is set in CR0, so we can use a simpler version here.
683 *
684 * These routines set curpcb->onfault for the time they execute. When a
685 * protection violation occurs inside the functions, the trap handler
686 * returns to *curpcb->onfault instead of the function.
687 */
688
689/*
690 * copyout(from_kernel, to_user, len)  - MP SAFE (if not I386_CPU)
691 */
692ENTRY(copyout)
693	MEXITCOUNT
694	jmp	*copyout_vector
695
696ENTRY(generic_copyout)
697	movl	PCPU(CURPCB),%eax
698	movl	$copyout_fault,PCB_ONFAULT(%eax)
699	pushl	%esi
700	pushl	%edi
701	pushl	%ebx
702	movl	16(%esp),%esi
703	movl	20(%esp),%edi
704	movl	24(%esp),%ebx
705	testl	%ebx,%ebx			/* anything to do? */
706	jz	done_copyout
707
708	/*
709	 * Check explicitly for non-user addresses.  If 486 write protection
710	 * is being used, this check is essential because we are in kernel
711	 * mode so the h/w does not provide any protection against writing
712	 * kernel addresses.
713	 */
714
715	/*
716	 * First, prevent address wrapping.
717	 */
718	movl	%edi,%eax
719	addl	%ebx,%eax
720	jc	copyout_fault
721/*
722 * XXX STOP USING VM_MAXUSER_ADDRESS.
723 * It is an end address, not a max, so every time it is used correctly it
724 * looks like there is an off by one error, and of course it caused an off
725 * by one error in several places.
726 */
727	cmpl	$VM_MAXUSER_ADDRESS,%eax
728	ja	copyout_fault
729
730#ifdef I386_CPU
731
732/*
733 * We have to check each PTE for user write permission.
734 * The checking may cause a page fault, so it is important to set
735 * up everything for return via copyout_fault before here.
736 */
737	/* compute number of pages */
738	movl	%edi,%ecx
739	andl	$PAGE_MASK,%ecx
740	addl	%ebx,%ecx
741	decl	%ecx
742	shrl	$IDXSHIFT+2,%ecx
743	incl	%ecx
744
745	/* compute PTE offset for start address */
746	movl	%edi,%edx
747	shrl	$IDXSHIFT,%edx
748	andb	$0xfc,%dl
749
7501:
751	/* check PTE for each page */
752	leal	PTmap(%edx),%eax
753	shrl	$IDXSHIFT,%eax
754	andb	$0xfc,%al
755	testb	$PG_V,PTmap(%eax)		/* PTE page must be valid */
756	je	4f
757	movb	PTmap(%edx),%al
758	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
759	cmpb	$PG_V|PG_RW|PG_U,%al
760	je	2f
761
7624:
763	/* simulate a trap */
764	pushl	%edx
765	pushl	%ecx
766	shll	$IDXSHIFT,%edx
767	pushl	%edx
768	call	trapwrite			/* trapwrite(addr) */
769	popl	%edx
770	popl	%ecx
771	popl	%edx
772
773	testl	%eax,%eax			/* if not ok, return EFAULT */
774	jnz	copyout_fault
775
7762:
777	addl	$4,%edx
778	decl	%ecx
779	jnz	1b				/* check next page */
780#endif /* I386_CPU */
781
782	/* bcopy(%esi, %edi, %ebx) */
783	movl	%ebx,%ecx
784
785#if defined(I586_CPU) && defined(DEV_NPX)
786	ALIGN_TEXT
787slow_copyout:
788#endif
789	shrl	$2,%ecx
790	cld
791	rep
792	movsl
793	movb	%bl,%cl
794	andb	$3,%cl
795	rep
796	movsb
797
798done_copyout:
799	popl	%ebx
800	popl	%edi
801	popl	%esi
802	xorl	%eax,%eax
803	movl	PCPU(CURPCB),%edx
804	movl	%eax,PCB_ONFAULT(%edx)
805	ret
806
807	ALIGN_TEXT
808copyout_fault:
809	popl	%ebx
810	popl	%edi
811	popl	%esi
812	movl	PCPU(CURPCB),%edx
813	movl	$0,PCB_ONFAULT(%edx)
814	movl	$EFAULT,%eax
815	ret
816
817#if defined(I586_CPU) && defined(DEV_NPX)
818ENTRY(i586_copyout)
819	/*
820	 * Duplicated from generic_copyout.  Could be done a bit better.
821	 */
822	movl	PCPU(CURPCB),%eax
823	movl	$copyout_fault,PCB_ONFAULT(%eax)
824	pushl	%esi
825	pushl	%edi
826	pushl	%ebx
827	movl	16(%esp),%esi
828	movl	20(%esp),%edi
829	movl	24(%esp),%ebx
830	testl	%ebx,%ebx			/* anything to do? */
831	jz	done_copyout
832
833	/*
834	 * Check explicitly for non-user addresses.  If 486 write protection
835	 * is being used, this check is essential because we are in kernel
836	 * mode so the h/w does not provide any protection against writing
837	 * kernel addresses.
838	 */
839
840	/*
841	 * First, prevent address wrapping.
842	 */
843	movl	%edi,%eax
844	addl	%ebx,%eax
845	jc	copyout_fault
846/*
847 * XXX STOP USING VM_MAXUSER_ADDRESS.
848 * It is an end address, not a max, so every time it is used correctly it
849 * looks like there is an off by one error, and of course it caused an off
850 * by one error in several places.
851 */
852	cmpl	$VM_MAXUSER_ADDRESS,%eax
853	ja	copyout_fault
854
855	/* bcopy(%esi, %edi, %ebx) */
8563:
857	movl	%ebx,%ecx
858	/*
859	 * End of duplicated code.
860	 */
861
862	cmpl	$1024,%ecx
863	jb	slow_copyout
864
865	pushl	%ecx
866	call	fastmove
867	addl	$4,%esp
868	jmp	done_copyout
869#endif /* I586_CPU && defined(DEV_NPX) */
870
871/*
872 * copyin(from_user, to_kernel, len) - MP SAFE
873 */
874ENTRY(copyin)
875	MEXITCOUNT
876	jmp	*copyin_vector
877
878ENTRY(generic_copyin)
879	movl	PCPU(CURPCB),%eax
880	movl	$copyin_fault,PCB_ONFAULT(%eax)
881	pushl	%esi
882	pushl	%edi
883	movl	12(%esp),%esi			/* caddr_t from */
884	movl	16(%esp),%edi			/* caddr_t to */
885	movl	20(%esp),%ecx			/* size_t  len */
886
887	/*
888	 * make sure address is valid
889	 */
890	movl	%esi,%edx
891	addl	%ecx,%edx
892	jc	copyin_fault
893	cmpl	$VM_MAXUSER_ADDRESS,%edx
894	ja	copyin_fault
895
896#if defined(I586_CPU) && defined(DEV_NPX)
897	ALIGN_TEXT
898slow_copyin:
899#endif
900	movb	%cl,%al
901	shrl	$2,%ecx				/* copy longword-wise */
902	cld
903	rep
904	movsl
905	movb	%al,%cl
906	andb	$3,%cl				/* copy remaining bytes */
907	rep
908	movsb
909
910#if defined(I586_CPU) && defined(DEV_NPX)
911	ALIGN_TEXT
912done_copyin:
913#endif
914	popl	%edi
915	popl	%esi
916	xorl	%eax,%eax
917	movl	PCPU(CURPCB),%edx
918	movl	%eax,PCB_ONFAULT(%edx)
919	ret
920
921	ALIGN_TEXT
922copyin_fault:
923	popl	%edi
924	popl	%esi
925	movl	PCPU(CURPCB),%edx
926	movl	$0,PCB_ONFAULT(%edx)
927	movl	$EFAULT,%eax
928	ret
929
930#if defined(I586_CPU) && defined(DEV_NPX)
931ENTRY(i586_copyin)
932	/*
933	 * Duplicated from generic_copyin.  Could be done a bit better.
934	 */
935	movl	PCPU(CURPCB),%eax
936	movl	$copyin_fault,PCB_ONFAULT(%eax)
937	pushl	%esi
938	pushl	%edi
939	movl	12(%esp),%esi			/* caddr_t from */
940	movl	16(%esp),%edi			/* caddr_t to */
941	movl	20(%esp),%ecx			/* size_t  len */
942
943	/*
944	 * make sure address is valid
945	 */
946	movl	%esi,%edx
947	addl	%ecx,%edx
948	jc	copyin_fault
949	cmpl	$VM_MAXUSER_ADDRESS,%edx
950	ja	copyin_fault
951	/*
952	 * End of duplicated code.
953	 */
954
955	cmpl	$1024,%ecx
956	jb	slow_copyin
957
958	pushl	%ebx			/* XXX prepare for fastmove_fault */
959	pushl	%ecx
960	call	fastmove
961	addl	$8,%esp
962	jmp	done_copyin
963#endif /* I586_CPU && defined(DEV_NPX) */
964
965#if defined(I586_CPU) && defined(DEV_NPX)
966/* fastmove(src, dst, len)
967	src in %esi
968	dst in %edi
969	len in %ecx		XXX changed to on stack for profiling
970	uses %eax and %edx for tmp. storage
971 */
972/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
973ENTRY(fastmove)
974	pushl	%ebp
975	movl	%esp,%ebp
976	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
977
978	movl	8(%ebp),%ecx
979	cmpl	$63,%ecx
980	jbe	fastmove_tail
981
982	testl	$7,%esi	/* check if src addr is multiple of 8 */
983	jnz	fastmove_tail
984
985	testl	$7,%edi	/* check if dst addr is multiple of 8 */
986	jnz	fastmove_tail
987
988	/* XXX grab FPU context atomically. */
989	cli
990
991/* if (fpcurthread != NULL) { */
992	cmpl	$0,PCPU(FPCURTHREAD)
993	je	6f
994/*    fnsave(&curpcb->pcb_savefpu); */
995	movl	PCPU(CURPCB),%eax
996	fnsave	PCB_SAVEFPU(%eax)
997/*   FPCURTHREAD = NULL; */
998	movl	$0,PCPU(FPCURTHREAD)
999/* } */
10006:
1001/* now we own the FPU. */
1002
1003/*
1004 * The process' FP state is saved in the pcb, but if we get
1005 * switched, the cpu_switch() will store our FP state in the
1006 * pcb.  It should be possible to avoid all the copying for
1007 * this, e.g., by setting a flag to tell cpu_switch() to
1008 * save the state somewhere else.
1009 */
1010/* tmp = curpcb->pcb_savefpu; */
1011	movl	%ecx,-12(%ebp)
1012	movl	%esi,-8(%ebp)
1013	movl	%edi,-4(%ebp)
1014	movl	%esp,%edi
1015	movl	PCPU(CURPCB),%esi
1016	addl	$PCB_SAVEFPU,%esi
1017	cld
1018	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1019	rep
1020	movsl
1021	movl	-12(%ebp),%ecx
1022	movl	-8(%ebp),%esi
1023	movl	-4(%ebp),%edi
1024/* stop_emulating(); */
1025	clts
1026/* fpcurthread = curthread; */
1027	movl	PCPU(CURTHREAD),%eax
1028	movl	%eax,PCPU(FPCURTHREAD)
1029	movl	PCPU(CURPCB),%eax
1030
1031	/* XXX end of atomic FPU context grab. */
1032	sti
1033
1034	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10354:
1036	movl	%ecx,-12(%ebp)
1037	cmpl	$1792,%ecx
1038	jbe	2f
1039	movl	$1792,%ecx
10402:
1041	subl	%ecx,-12(%ebp)
1042	cmpl	$256,%ecx
1043	jb	5f
1044	movl	%ecx,-8(%ebp)
1045	movl	%esi,-4(%ebp)
1046	ALIGN_TEXT
10473:
1048	movl	0(%esi),%eax
1049	movl	32(%esi),%eax
1050	movl	64(%esi),%eax
1051	movl	96(%esi),%eax
1052	movl	128(%esi),%eax
1053	movl	160(%esi),%eax
1054	movl	192(%esi),%eax
1055	movl	224(%esi),%eax
1056	addl	$256,%esi
1057	subl	$256,%ecx
1058	cmpl	$256,%ecx
1059	jae	3b
1060	movl	-8(%ebp),%ecx
1061	movl	-4(%ebp),%esi
10625:
1063	ALIGN_TEXT
1064fastmove_loop:
1065	fildq	0(%esi)
1066	fildq	8(%esi)
1067	fildq	16(%esi)
1068	fildq	24(%esi)
1069	fildq	32(%esi)
1070	fildq	40(%esi)
1071	fildq	48(%esi)
1072	fildq	56(%esi)
1073	fistpq	56(%edi)
1074	fistpq	48(%edi)
1075	fistpq	40(%edi)
1076	fistpq	32(%edi)
1077	fistpq	24(%edi)
1078	fistpq	16(%edi)
1079	fistpq	8(%edi)
1080	fistpq	0(%edi)
1081	addl	$-64,%ecx
1082	addl	$64,%esi
1083	addl	$64,%edi
1084	cmpl	$63,%ecx
1085	ja	fastmove_loop
1086	movl	-12(%ebp),%eax
1087	addl	%eax,%ecx
1088	cmpl	$64,%ecx
1089	jae	4b
1090
1091	/* XXX ungrab FPU context atomically. */
1092	cli
1093
1094/* curpcb->pcb_savefpu = tmp; */
1095	movl	%ecx,-12(%ebp)
1096	movl	%esi,-8(%ebp)
1097	movl	%edi,-4(%ebp)
1098	movl	PCPU(CURPCB),%edi
1099	addl	$PCB_SAVEFPU,%edi
1100	movl	%esp,%esi
1101	cld
1102	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1103	rep
1104	movsl
1105	movl	-12(%ebp),%ecx
1106	movl	-8(%ebp),%esi
1107	movl	-4(%ebp),%edi
1108
1109/* start_emulating(); */
1110	smsw	%ax
1111	orb	$CR0_TS,%al
1112	lmsw	%ax
1113/* fpcurthread = NULL; */
1114	movl	$0,PCPU(FPCURTHREAD)
1115
1116	/* XXX end of atomic FPU context ungrab. */
1117	sti
1118
1119	ALIGN_TEXT
1120fastmove_tail:
1121	movl	PCPU(CURPCB),%eax
1122	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1123
1124	movb	%cl,%al
1125	shrl	$2,%ecx				/* copy longword-wise */
1126	cld
1127	rep
1128	movsl
1129	movb	%al,%cl
1130	andb	$3,%cl				/* copy remaining bytes */
1131	rep
1132	movsb
1133
1134	movl	%ebp,%esp
1135	popl	%ebp
1136	ret
1137
1138	ALIGN_TEXT
1139fastmove_fault:
1140	/* XXX ungrab FPU context atomically. */
1141	cli
1142
1143	movl	PCPU(CURPCB),%edi
1144	addl	$PCB_SAVEFPU,%edi
1145	movl	%esp,%esi
1146	cld
1147	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1148	rep
1149	movsl
1150
1151	smsw	%ax
1152	orb	$CR0_TS,%al
1153	lmsw	%ax
1154	movl	$0,PCPU(FPCURTHREAD)
1155
1156	/* XXX end of atomic FPU context ungrab. */
1157	sti
1158
1159fastmove_tail_fault:
1160	movl	%ebp,%esp
1161	popl	%ebp
1162	addl	$8,%esp
1163	popl	%ebx
1164	popl	%edi
1165	popl	%esi
1166	movl	PCPU(CURPCB),%edx
1167	movl	$0,PCB_ONFAULT(%edx)
1168	movl	$EFAULT,%eax
1169	ret
1170#endif /* I586_CPU && defined(DEV_NPX) */
1171
1172/*
1173 * casuptr.  Compare and set user pointer.  Returns -1 or the current value.
1174 */
1175ENTRY(casuptr)
1176	movl	PCPU(CURPCB),%ecx
1177	movl	$fusufault,PCB_ONFAULT(%ecx)
1178	movl	4(%esp),%edx			/* dst */
1179	movl	8(%esp),%eax			/* old */
1180	movl	12(%esp),%ecx			/* new */
1181
1182	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1183	ja	fusufault
1184
1185#if defined(SMP)
1186	lock cmpxchgl %ecx, (%edx)		/* Compare and set. */
1187#else	/* !SMP */
1188	cmpxchgl %ecx, (%edx)
1189#endif	/* !SMP */
1190
1191	/*
1192	 * The old value is in %eax.  If the store succeeded it will be the
1193	 * value we expected (old) from before the store, otherwise it will
1194	 * be the current value.
1195	 */
1196
1197	movl	PCPU(CURPCB),%ecx
1198	movl	$fusufault,PCB_ONFAULT(%ecx)
1199	movl	$0,PCB_ONFAULT(%ecx)
1200	ret
1201
1202/*
1203 * fu{byte,sword,word} - MP SAFE
1204 *
1205 *	Fetch a byte (sword, word) from user memory
1206 */
1207ENTRY(fuword)
1208	movl	PCPU(CURPCB),%ecx
1209	movl	$fusufault,PCB_ONFAULT(%ecx)
1210	movl	4(%esp),%edx			/* from */
1211
1212	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1213	ja	fusufault
1214
1215	movl	(%edx),%eax
1216	movl	$0,PCB_ONFAULT(%ecx)
1217	ret
1218
1219ENTRY(fuword32)
1220	jmp	fuword
1221
1222/*
1223 * These two routines are called from the profiling code, potentially
1224 * at interrupt time. If they fail, that's okay, good things will
1225 * happen later. Fail all the time for now - until the trap code is
1226 * able to deal with this.
1227 */
1228ALTENTRY(suswintr)
1229ENTRY(fuswintr)
1230	movl	$-1,%eax
1231	ret
1232
1233/*
1234 * fuword16 - MP SAFE
1235 */
1236ENTRY(fuword16)
1237	movl	PCPU(CURPCB),%ecx
1238	movl	$fusufault,PCB_ONFAULT(%ecx)
1239	movl	4(%esp),%edx
1240
1241	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1242	ja	fusufault
1243
1244	movzwl	(%edx),%eax
1245	movl	$0,PCB_ONFAULT(%ecx)
1246	ret
1247
1248/*
1249 * fubyte - MP SAFE
1250 */
1251ENTRY(fubyte)
1252	movl	PCPU(CURPCB),%ecx
1253	movl	$fusufault,PCB_ONFAULT(%ecx)
1254	movl	4(%esp),%edx
1255
1256	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1257	ja	fusufault
1258
1259	movzbl	(%edx),%eax
1260	movl	$0,PCB_ONFAULT(%ecx)
1261	ret
1262
1263	ALIGN_TEXT
1264fusufault:
1265	movl	PCPU(CURPCB),%ecx
1266	xorl	%eax,%eax
1267	movl	%eax,PCB_ONFAULT(%ecx)
1268	decl	%eax
1269	ret
1270
1271/*
1272 * su{byte,sword,word} - MP SAFE (if not I386_CPU)
1273 *
1274 *	Write a byte (word, longword) to user memory
1275 */
1276ENTRY(suword)
1277	movl	PCPU(CURPCB),%ecx
1278	movl	$fusufault,PCB_ONFAULT(%ecx)
1279	movl	4(%esp),%edx
1280
1281#ifdef I386_CPU
1282
1283	/* XXX - page boundary crossing is still not handled */
1284	movl	%edx,%eax
1285	shrl	$IDXSHIFT,%edx
1286	andb	$0xfc,%dl
1287
1288	leal	PTmap(%edx),%ecx
1289	shrl	$IDXSHIFT,%ecx
1290	andb	$0xfc,%cl
1291	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1292	je	4f
1293	movb	PTmap(%edx),%dl
1294	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1295	cmpb	$PG_V|PG_RW|PG_U,%dl
1296	je	1f
1297
12984:
1299	/* simulate a trap */
1300	pushl	%eax
1301	call	trapwrite
1302	popl	%edx				/* remove junk parameter from stack */
1303	testl	%eax,%eax
1304	jnz	fusufault
13051:
1306	movl	4(%esp),%edx
1307#endif
1308
1309	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1310	ja	fusufault
1311
1312	movl	8(%esp),%eax
1313	movl	%eax,(%edx)
1314	xorl	%eax,%eax
1315	movl	PCPU(CURPCB),%ecx
1316	movl	%eax,PCB_ONFAULT(%ecx)
1317	ret
1318
1319ENTRY(suword32)
1320	jmp	suword
1321
1322/*
1323 * suword16 - MP SAFE (if not I386_CPU)
1324 */
1325ENTRY(suword16)
1326	movl	PCPU(CURPCB),%ecx
1327	movl	$fusufault,PCB_ONFAULT(%ecx)
1328	movl	4(%esp),%edx
1329
1330#ifdef I386_CPU
1331
1332	/* XXX - page boundary crossing is still not handled */
1333	movl	%edx,%eax
1334	shrl	$IDXSHIFT,%edx
1335	andb	$0xfc,%dl
1336
1337	leal	PTmap(%edx),%ecx
1338	shrl	$IDXSHIFT,%ecx
1339	andb	$0xfc,%cl
1340	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1341	je	4f
1342	movb	PTmap(%edx),%dl
1343	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1344	cmpb	$PG_V|PG_RW|PG_U,%dl
1345	je	1f
1346
13474:
1348	/* simulate a trap */
1349	pushl	%eax
1350	call	trapwrite
1351	popl	%edx				/* remove junk parameter from stack */
1352	testl	%eax,%eax
1353	jnz	fusufault
13541:
1355	movl	4(%esp),%edx
1356#endif
1357
1358	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1359	ja	fusufault
1360
1361	movw	8(%esp),%ax
1362	movw	%ax,(%edx)
1363	xorl	%eax,%eax
1364	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1365	movl	%eax,PCB_ONFAULT(%ecx)
1366	ret
1367
1368/*
1369 * subyte - MP SAFE (if not I386_CPU)
1370 */
1371ENTRY(subyte)
1372	movl	PCPU(CURPCB),%ecx
1373	movl	$fusufault,PCB_ONFAULT(%ecx)
1374	movl	4(%esp),%edx
1375
1376#ifdef I386_CPU
1377
1378	movl	%edx,%eax
1379	shrl	$IDXSHIFT,%edx
1380	andb	$0xfc,%dl
1381
1382	leal	PTmap(%edx),%ecx
1383	shrl	$IDXSHIFT,%ecx
1384	andb	$0xfc,%cl
1385	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1386	je	4f
1387	movb	PTmap(%edx),%dl
1388	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1389	cmpb	$PG_V|PG_RW|PG_U,%dl
1390	je	1f
1391
13924:
1393	/* simulate a trap */
1394	pushl	%eax
1395	call	trapwrite
1396	popl	%edx				/* remove junk parameter from stack */
1397	testl	%eax,%eax
1398	jnz	fusufault
13991:
1400	movl	4(%esp),%edx
1401#endif
1402
1403	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1404	ja	fusufault
1405
1406	movb	8(%esp),%al
1407	movb	%al,(%edx)
1408	xorl	%eax,%eax
1409	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1410	movl	%eax,PCB_ONFAULT(%ecx)
1411	ret
1412
1413/*
1414 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
1415 *
1416 *	copy a string from from to to, stop when a 0 character is reached.
1417 *	return ENAMETOOLONG if string is longer than maxlen, and
1418 *	EFAULT on protection violations. If lencopied is non-zero,
1419 *	return the actual length in *lencopied.
1420 */
1421ENTRY(copyinstr)
1422	pushl	%esi
1423	pushl	%edi
1424	movl	PCPU(CURPCB),%ecx
1425	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1426
1427	movl	12(%esp),%esi			/* %esi = from */
1428	movl	16(%esp),%edi			/* %edi = to */
1429	movl	20(%esp),%edx			/* %edx = maxlen */
1430
1431	movl	$VM_MAXUSER_ADDRESS,%eax
1432
1433	/* make sure 'from' is within bounds */
1434	subl	%esi,%eax
1435	jbe	cpystrflt
1436
1437	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1438	cmpl	%edx,%eax
1439	jae	1f
1440	movl	%eax,%edx
1441	movl	%eax,20(%esp)
14421:
1443	incl	%edx
1444	cld
1445
14462:
1447	decl	%edx
1448	jz	3f
1449
1450	lodsb
1451	stosb
1452	orb	%al,%al
1453	jnz	2b
1454
1455	/* Success -- 0 byte reached */
1456	decl	%edx
1457	xorl	%eax,%eax
1458	jmp	cpystrflt_x
14593:
1460	/* edx is zero - return ENAMETOOLONG or EFAULT */
1461	cmpl	$VM_MAXUSER_ADDRESS,%esi
1462	jae	cpystrflt
14634:
1464	movl	$ENAMETOOLONG,%eax
1465	jmp	cpystrflt_x
1466
1467cpystrflt:
1468	movl	$EFAULT,%eax
1469
1470cpystrflt_x:
1471	/* set *lencopied and return %eax */
1472	movl	PCPU(CURPCB),%ecx
1473	movl	$0,PCB_ONFAULT(%ecx)
1474	movl	20(%esp),%ecx
1475	subl	%edx,%ecx
1476	movl	24(%esp),%edx
1477	testl	%edx,%edx
1478	jz	1f
1479	movl	%ecx,(%edx)
14801:
1481	popl	%edi
1482	popl	%esi
1483	ret
1484
1485
1486/*
1487 * copystr(from, to, maxlen, int *lencopied) - MP SAFE
1488 */
1489ENTRY(copystr)
1490	pushl	%esi
1491	pushl	%edi
1492
1493	movl	12(%esp),%esi			/* %esi = from */
1494	movl	16(%esp),%edi			/* %edi = to */
1495	movl	20(%esp),%edx			/* %edx = maxlen */
1496	incl	%edx
1497	cld
14981:
1499	decl	%edx
1500	jz	4f
1501	lodsb
1502	stosb
1503	orb	%al,%al
1504	jnz	1b
1505
1506	/* Success -- 0 byte reached */
1507	decl	%edx
1508	xorl	%eax,%eax
1509	jmp	6f
15104:
1511	/* edx is zero -- return ENAMETOOLONG */
1512	movl	$ENAMETOOLONG,%eax
1513
15146:
1515	/* set *lencopied and return %eax */
1516	movl	20(%esp),%ecx
1517	subl	%edx,%ecx
1518	movl	24(%esp),%edx
1519	testl	%edx,%edx
1520	jz	7f
1521	movl	%ecx,(%edx)
15227:
1523	popl	%edi
1524	popl	%esi
1525	ret
1526
1527ENTRY(bcmp)
1528	pushl	%edi
1529	pushl	%esi
1530	movl	12(%esp),%edi
1531	movl	16(%esp),%esi
1532	movl	20(%esp),%edx
1533	xorl	%eax,%eax
1534
1535	movl	%edx,%ecx
1536	shrl	$2,%ecx
1537	cld					/* compare forwards */
1538	repe
1539	cmpsl
1540	jne	1f
1541
1542	movl	%edx,%ecx
1543	andl	$3,%ecx
1544	repe
1545	cmpsb
1546	je	2f
15471:
1548	incl	%eax
15492:
1550	popl	%esi
1551	popl	%edi
1552	ret
1553
1554
1555/*
1556 * Handling of special 386 registers and descriptor tables etc
1557 */
1558/* void lgdt(struct region_descriptor *rdp); */
1559ENTRY(lgdt)
1560	/* reload the descriptor table */
1561	movl	4(%esp),%eax
1562	lgdt	(%eax)
1563
1564	/* flush the prefetch q */
1565	jmp	1f
1566	nop
15671:
1568	/* reload "stale" selectors */
1569	movl	$KDSEL,%eax
1570	mov	%ax,%ds
1571	mov	%ax,%es
1572	mov	%ax,%gs
1573	mov	%ax,%ss
1574	movl	$KPSEL,%eax
1575	mov	%ax,%fs
1576
1577	/* reload code selector by turning return into intersegmental return */
1578	movl	(%esp),%eax
1579	pushl	%eax
1580	movl	$KCSEL,4(%esp)
1581	lret
1582
1583/* ssdtosd(*ssdp,*sdp) */
1584ENTRY(ssdtosd)
1585	pushl	%ebx
1586	movl	8(%esp),%ecx
1587	movl	8(%ecx),%ebx
1588	shll	$16,%ebx
1589	movl	(%ecx),%edx
1590	roll	$16,%edx
1591	movb	%dh,%bl
1592	movb	%dl,%bh
1593	rorl	$8,%ebx
1594	movl	4(%ecx),%eax
1595	movw	%ax,%dx
1596	andl	$0xf0000,%eax
1597	orl	%eax,%ebx
1598	movl	12(%esp),%ecx
1599	movl	%edx,(%ecx)
1600	movl	%ebx,4(%ecx)
1601	popl	%ebx
1602	ret
1603
1604/* void reset_dbregs() */
1605ENTRY(reset_dbregs)
1606	movl    $0,%eax
1607	movl    %eax,%dr7     /* disable all breapoints first */
1608	movl    %eax,%dr0
1609	movl    %eax,%dr1
1610	movl    %eax,%dr2
1611	movl    %eax,%dr3
1612	movl    %eax,%dr6
1613	ret
1614
1615/*****************************************************************************/
1616/* setjump, longjump                                                         */
1617/*****************************************************************************/
1618
1619ENTRY(setjmp)
1620	movl	4(%esp),%eax
1621	movl	%ebx,(%eax)			/* save ebx */
1622	movl	%esp,4(%eax)			/* save esp */
1623	movl	%ebp,8(%eax)			/* save ebp */
1624	movl	%esi,12(%eax)			/* save esi */
1625	movl	%edi,16(%eax)			/* save edi */
1626	movl	(%esp),%edx			/* get rta */
1627	movl	%edx,20(%eax)			/* save eip */
1628	xorl	%eax,%eax			/* return(0); */
1629	ret
1630
1631ENTRY(longjmp)
1632	movl	4(%esp),%eax
1633	movl	(%eax),%ebx			/* restore ebx */
1634	movl	4(%eax),%esp			/* restore esp */
1635	movl	8(%eax),%ebp			/* restore ebp */
1636	movl	12(%eax),%esi			/* restore esi */
1637	movl	16(%eax),%edi			/* restore edi */
1638	movl	20(%eax),%edx			/* get rta */
1639	movl	%edx,(%esp)			/* put in return frame */
1640	xorl	%eax,%eax			/* return(1); */
1641	incl	%eax
1642	ret
1643
1644/*
1645 * Support for BB-profiling (gcc -a).  The kernbb program will extract
1646 * the data from the kernel.
1647 */
1648
1649	.data
1650	ALIGN_DATA
1651	.globl bbhead
1652bbhead:
1653	.long 0
1654
1655	.text
1656NON_GPROF_ENTRY(__bb_init_func)
1657	movl	4(%esp),%eax
1658	movl	$1,(%eax)
1659	movl	bbhead,%edx
1660	movl	%edx,16(%eax)
1661	movl	%eax,bbhead
1662	NON_GPROF_RET
1663