support.s revision 128019
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/i386/i386/support.s 128019 2004-04-07 20:46:16Z imp $
30 */
31
32#include "opt_npx.h"
33
34#include <machine/asmacros.h>
35#include <machine/cputypes.h>
36#include <machine/intr_machdep.h>
37#include <machine/pmap.h>
38#include <machine/specialreg.h>
39
40#include "assym.s"
41
42#define IDXSHIFT	10
43
44	.data
45	.globl	bcopy_vector
46bcopy_vector:
47	.long	generic_bcopy
48	.globl	bzero_vector
49bzero_vector:
50	.long	generic_bzero
51	.globl	copyin_vector
52copyin_vector:
53	.long	generic_copyin
54	.globl	copyout_vector
55copyout_vector:
56	.long	generic_copyout
57#if defined(I586_CPU) && defined(DEV_NPX)
58kernel_fpu_lock:
59	.byte	0xfe
60	.space	3
61#endif
62	ALIGN_DATA
63	.globl	intrcnt, eintrcnt
64intrcnt:
65	.space	INTRCNT_COUNT * 4
66eintrcnt:
67
68	.globl	intrnames, eintrnames
69intrnames:
70	.space	INTRCNT_COUNT * (MAXCOMLEN + 1)
71eintrnames:
72
73	.text
74
75/*
76 * bcopy family
77 * void bzero(void *buf, u_int len)
78 */
79
80ENTRY(bzero)
81	MEXITCOUNT
82	jmp	*bzero_vector
83
84ENTRY(generic_bzero)
85	pushl	%edi
86	movl	8(%esp),%edi
87	movl	12(%esp),%ecx
88	xorl	%eax,%eax
89	shrl	$2,%ecx
90	cld
91	rep
92	stosl
93	movl	12(%esp),%ecx
94	andl	$3,%ecx
95	rep
96	stosb
97	popl	%edi
98	ret
99
100#ifdef I486_CPU
101ENTRY(i486_bzero)
102	movl	4(%esp),%edx
103	movl	8(%esp),%ecx
104	xorl	%eax,%eax
105/*
106 * do 64 byte chunks first
107 *
108 * XXX this is probably over-unrolled at least for DX2's
109 */
1102:
111	cmpl	$64,%ecx
112	jb	3f
113	movl	%eax,(%edx)
114	movl	%eax,4(%edx)
115	movl	%eax,8(%edx)
116	movl	%eax,12(%edx)
117	movl	%eax,16(%edx)
118	movl	%eax,20(%edx)
119	movl	%eax,24(%edx)
120	movl	%eax,28(%edx)
121	movl	%eax,32(%edx)
122	movl	%eax,36(%edx)
123	movl	%eax,40(%edx)
124	movl	%eax,44(%edx)
125	movl	%eax,48(%edx)
126	movl	%eax,52(%edx)
127	movl	%eax,56(%edx)
128	movl	%eax,60(%edx)
129	addl	$64,%edx
130	subl	$64,%ecx
131	jnz	2b
132	ret
133
134/*
135 * do 16 byte chunks
136 */
137	SUPERALIGN_TEXT
1383:
139	cmpl	$16,%ecx
140	jb	4f
141	movl	%eax,(%edx)
142	movl	%eax,4(%edx)
143	movl	%eax,8(%edx)
144	movl	%eax,12(%edx)
145	addl	$16,%edx
146	subl	$16,%ecx
147	jnz	3b
148	ret
149
150/*
151 * do 4 byte chunks
152 */
153	SUPERALIGN_TEXT
1544:
155	cmpl	$4,%ecx
156	jb	5f
157	movl	%eax,(%edx)
158	addl	$4,%edx
159	subl	$4,%ecx
160	jnz	4b
161	ret
162
163/*
164 * do 1 byte chunks
165 * a jump table seems to be faster than a loop or more range reductions
166 *
167 * XXX need a const section for non-text
168 */
169	.data
170jtab:
171	.long	do0
172	.long	do1
173	.long	do2
174	.long	do3
175
176	.text
177	SUPERALIGN_TEXT
1785:
179	jmp	*jtab(,%ecx,4)
180
181	SUPERALIGN_TEXT
182do3:
183	movw	%ax,(%edx)
184	movb	%al,2(%edx)
185	ret
186
187	SUPERALIGN_TEXT
188do2:
189	movw	%ax,(%edx)
190	ret
191
192	SUPERALIGN_TEXT
193do1:
194	movb	%al,(%edx)
195	ret
196
197	SUPERALIGN_TEXT
198do0:
199	ret
200#endif
201
202#if defined(I586_CPU) && defined(DEV_NPX)
203ENTRY(i586_bzero)
204	movl	4(%esp),%edx
205	movl	8(%esp),%ecx
206
207	/*
208	 * The FPU register method is twice as fast as the integer register
209	 * method unless the target is in the L1 cache and we pre-allocate a
210	 * cache line for it (then the integer register method is 4-5 times
211	 * faster).  However, we never pre-allocate cache lines, since that
212	 * would make the integer method 25% or more slower for the common
213	 * case when the target isn't in either the L1 cache or the L2 cache.
214	 * Thus we normally use the FPU register method unless the overhead
215	 * would be too large.
216	 */
217	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
218	jb	intreg_i586_bzero
219
220	/*
221	 * The FPU registers may belong to an application or to fastmove()
222	 * or to another invocation of bcopy() or ourself in a higher level
223	 * interrupt or trap handler.  Preserving the registers is
224	 * complicated since we avoid it if possible at all levels.  We
225	 * want to localize the complications even when that increases them.
226	 * Here the extra work involves preserving CR0_TS in TS.
227	 * `fpcurthread != NULL' is supposed to be the condition that all the
228	 * FPU resources belong to an application, but fpcurthread and CR0_TS
229	 * aren't set atomically enough for this condition to work in
230	 * interrupt handlers.
231	 *
232	 * Case 1: FPU registers belong to the application: we must preserve
233	 * the registers if we use them, so we only use the FPU register
234	 * method if the target size is large enough to amortize the extra
235	 * overhead for preserving them.  CR0_TS must be preserved although
236	 * it is very likely to end up as set.
237	 *
238	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
239	 * makes the registers look like they belong to an application so
240	 * that cpu_switch() and savectx() don't have to know about it, so
241	 * this case reduces to case 1.
242	 *
243	 * Case 3: FPU registers belong to the kernel: don't use the FPU
244	 * register method.  This case is unlikely, and supporting it would
245	 * be more complicated and might take too much stack.
246	 *
247	 * Case 4: FPU registers don't belong to anyone: the FPU registers
248	 * don't need to be preserved, so we always use the FPU register
249	 * method.  CR0_TS must be preserved although it is very likely to
250	 * always end up as clear.
251	 */
252	cmpl	$0,PCPU(FPCURTHREAD)
253	je	i586_bz1
254
255	/*
256	 * XXX don't use the FPU for cases 1 and 2, since preemptive
257	 * scheduling of ithreads broke these cases.  Note that we can
258	 * no longer get here from an interrupt handler, since the
259	 * context sitch to the interrupt handler will have saved the
260	 * FPU state.
261	 */
262	jmp	intreg_i586_bzero
263
264	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
265	jb	intreg_i586_bzero
266	sarb	$1,kernel_fpu_lock
267	jc	intreg_i586_bzero
268	smsw	%ax
269	clts
270	subl	$108,%esp
271	fnsave	0(%esp)
272	jmp	i586_bz2
273
274i586_bz1:
275	sarb	$1,kernel_fpu_lock
276	jc	intreg_i586_bzero
277	smsw	%ax
278	clts
279	fninit				/* XXX should avoid needing this */
280i586_bz2:
281	fldz
282
283	/*
284	 * Align to an 8 byte boundary (misalignment in the main loop would
285	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
286	 * already aligned) by always zeroing 8 bytes and using the part up
287	 * to the _next_ alignment position.
288	 */
289	fstl	0(%edx)
290	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
291	addl	$8,%edx
292	andl	$~7,%edx
293	subl	%edx,%ecx
294
295	/*
296	 * Similarly align `len' to a multiple of 8.
297	 */
298	fstl	-8(%edx,%ecx)
299	decl	%ecx
300	andl	$~7,%ecx
301
302	/*
303	 * This wouldn't be any faster if it were unrolled, since the loop
304	 * control instructions are much faster than the fstl and/or done
305	 * in parallel with it so their overhead is insignificant.
306	 */
307fpureg_i586_bzero_loop:
308	fstl	0(%edx)
309	addl	$8,%edx
310	subl	$8,%ecx
311	cmpl	$8,%ecx
312	jae	fpureg_i586_bzero_loop
313
314	cmpl	$0,PCPU(FPCURTHREAD)
315	je	i586_bz3
316
317	/* XXX check that the condition for cases 1-2 stayed false. */
318i586_bzero_oops:
319	int	$3
320	jmp	i586_bzero_oops
321
322	frstor	0(%esp)
323	addl	$108,%esp
324	lmsw	%ax
325	movb	$0xfe,kernel_fpu_lock
326	ret
327
328i586_bz3:
329	fstp	%st(0)
330	lmsw	%ax
331	movb	$0xfe,kernel_fpu_lock
332	ret
333
334intreg_i586_bzero:
335	/*
336	 * `rep stos' seems to be the best method in practice for small
337	 * counts.  Fancy methods usually take too long to start up due
338	 * to cache and BTB misses.
339	 */
340	pushl	%edi
341	movl	%edx,%edi
342	xorl	%eax,%eax
343	shrl	$2,%ecx
344	cld
345	rep
346	stosl
347	movl	12(%esp),%ecx
348	andl	$3,%ecx
349	jne	1f
350	popl	%edi
351	ret
352
3531:
354	rep
355	stosb
356	popl	%edi
357	ret
358#endif /* I586_CPU && defined(DEV_NPX) */
359
360ENTRY(sse2_pagezero)
361	pushl	%ebx
362	movl	8(%esp),%ecx
363	movl	%ecx,%eax
364	addl	$4096,%eax
365	xor	%ebx,%ebx
3661:
367	movnti	%ebx,(%ecx)
368	addl	$4,%ecx
369	cmpl	%ecx,%eax
370	jne	1b
371	sfence
372	popl	%ebx
373	ret
374
375ENTRY(i686_pagezero)
376	pushl	%edi
377	pushl	%ebx
378
379	movl	12(%esp), %edi
380	movl	$1024, %ecx
381	cld
382
383	ALIGN_TEXT
3841:
385	xorl	%eax, %eax
386	repe
387	scasl
388	jnz	2f
389
390	popl	%ebx
391	popl	%edi
392	ret
393
394	ALIGN_TEXT
395
3962:
397	incl	%ecx
398	subl	$4, %edi
399
400	movl	%ecx, %edx
401	cmpl	$16, %ecx
402
403	jge	3f
404
405	movl	%edi, %ebx
406	andl	$0x3f, %ebx
407	shrl	%ebx
408	shrl	%ebx
409	movl	$16, %ecx
410	subl	%ebx, %ecx
411
4123:
413	subl	%ecx, %edx
414	rep
415	stosl
416
417	movl	%edx, %ecx
418	testl	%edx, %edx
419	jnz	1b
420
421	popl	%ebx
422	popl	%edi
423	ret
424
425/* fillw(pat, base, cnt) */
426ENTRY(fillw)
427	pushl	%edi
428	movl	8(%esp),%eax
429	movl	12(%esp),%edi
430	movl	16(%esp),%ecx
431	cld
432	rep
433	stosw
434	popl	%edi
435	ret
436
437ENTRY(bcopyb)
438	pushl	%esi
439	pushl	%edi
440	movl	12(%esp),%esi
441	movl	16(%esp),%edi
442	movl	20(%esp),%ecx
443	movl	%edi,%eax
444	subl	%esi,%eax
445	cmpl	%ecx,%eax			/* overlapping && src < dst? */
446	jb	1f
447	cld					/* nope, copy forwards */
448	rep
449	movsb
450	popl	%edi
451	popl	%esi
452	ret
453
454	ALIGN_TEXT
4551:
456	addl	%ecx,%edi			/* copy backwards. */
457	addl	%ecx,%esi
458	decl	%edi
459	decl	%esi
460	std
461	rep
462	movsb
463	popl	%edi
464	popl	%esi
465	cld
466	ret
467
468ENTRY(bcopy)
469	MEXITCOUNT
470	jmp	*bcopy_vector
471
472/*
473 * generic_bcopy(src, dst, cnt)
474 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
475 */
476ENTRY(generic_bcopy)
477	pushl	%esi
478	pushl	%edi
479	movl	12(%esp),%esi
480	movl	16(%esp),%edi
481	movl	20(%esp),%ecx
482
483	movl	%edi,%eax
484	subl	%esi,%eax
485	cmpl	%ecx,%eax			/* overlapping && src < dst? */
486	jb	1f
487
488	shrl	$2,%ecx				/* copy by 32-bit words */
489	cld					/* nope, copy forwards */
490	rep
491	movsl
492	movl	20(%esp),%ecx
493	andl	$3,%ecx				/* any bytes left? */
494	rep
495	movsb
496	popl	%edi
497	popl	%esi
498	ret
499
500	ALIGN_TEXT
5011:
502	addl	%ecx,%edi			/* copy backwards */
503	addl	%ecx,%esi
504	decl	%edi
505	decl	%esi
506	andl	$3,%ecx				/* any fractional bytes? */
507	std
508	rep
509	movsb
510	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
511	shrl	$2,%ecx
512	subl	$3,%esi
513	subl	$3,%edi
514	rep
515	movsl
516	popl	%edi
517	popl	%esi
518	cld
519	ret
520
521#if defined(I586_CPU) && defined(DEV_NPX)
522ENTRY(i586_bcopy)
523	pushl	%esi
524	pushl	%edi
525	movl	12(%esp),%esi
526	movl	16(%esp),%edi
527	movl	20(%esp),%ecx
528
529	movl	%edi,%eax
530	subl	%esi,%eax
531	cmpl	%ecx,%eax			/* overlapping && src < dst? */
532	jb	1f
533
534	cmpl	$1024,%ecx
535	jb	small_i586_bcopy
536
537	sarb	$1,kernel_fpu_lock
538	jc	small_i586_bcopy
539	cmpl	$0,PCPU(FPCURTHREAD)
540	je	i586_bc1
541
542	/* XXX turn off handling of cases 1-2, as above. */
543	movb	$0xfe,kernel_fpu_lock
544	jmp	small_i586_bcopy
545
546	smsw	%dx
547	clts
548	subl	$108,%esp
549	fnsave	0(%esp)
550	jmp	4f
551
552i586_bc1:
553	smsw	%dx
554	clts
555	fninit				/* XXX should avoid needing this */
556
557	ALIGN_TEXT
5584:
559	pushl	%ecx
560#define	DCACHE_SIZE	8192
561	cmpl	$(DCACHE_SIZE-512)/2,%ecx
562	jbe	2f
563	movl	$(DCACHE_SIZE-512)/2,%ecx
5642:
565	subl	%ecx,0(%esp)
566	cmpl	$256,%ecx
567	jb	5f			/* XXX should prefetch if %ecx >= 32 */
568	pushl	%esi
569	pushl	%ecx
570	ALIGN_TEXT
5713:
572	movl	0(%esi),%eax
573	movl	32(%esi),%eax
574	movl	64(%esi),%eax
575	movl	96(%esi),%eax
576	movl	128(%esi),%eax
577	movl	160(%esi),%eax
578	movl	192(%esi),%eax
579	movl	224(%esi),%eax
580	addl	$256,%esi
581	subl	$256,%ecx
582	cmpl	$256,%ecx
583	jae	3b
584	popl	%ecx
585	popl	%esi
5865:
587	ALIGN_TEXT
588large_i586_bcopy_loop:
589	fildq	0(%esi)
590	fildq	8(%esi)
591	fildq	16(%esi)
592	fildq	24(%esi)
593	fildq	32(%esi)
594	fildq	40(%esi)
595	fildq	48(%esi)
596	fildq	56(%esi)
597	fistpq	56(%edi)
598	fistpq	48(%edi)
599	fistpq	40(%edi)
600	fistpq	32(%edi)
601	fistpq	24(%edi)
602	fistpq	16(%edi)
603	fistpq	8(%edi)
604	fistpq	0(%edi)
605	addl	$64,%esi
606	addl	$64,%edi
607	subl	$64,%ecx
608	cmpl	$64,%ecx
609	jae	large_i586_bcopy_loop
610	popl	%eax
611	addl	%eax,%ecx
612	cmpl	$64,%ecx
613	jae	4b
614
615	cmpl	$0,PCPU(FPCURTHREAD)
616	je	i586_bc2
617
618	/* XXX check that the condition for cases 1-2 stayed false. */
619i586_bcopy_oops:
620	int	$3
621	jmp	i586_bcopy_oops
622
623	frstor	0(%esp)
624	addl	$108,%esp
625i586_bc2:
626	lmsw	%dx
627	movb	$0xfe,kernel_fpu_lock
628
629/*
630 * This is a duplicate of the main part of generic_bcopy.  See the comments
631 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
632 * would mess up high resolution profiling.
633 */
634	ALIGN_TEXT
635small_i586_bcopy:
636	shrl	$2,%ecx
637	cld
638	rep
639	movsl
640	movl	20(%esp),%ecx
641	andl	$3,%ecx
642	rep
643	movsb
644	popl	%edi
645	popl	%esi
646	ret
647
648	ALIGN_TEXT
6491:
650	addl	%ecx,%edi
651	addl	%ecx,%esi
652	decl	%edi
653	decl	%esi
654	andl	$3,%ecx
655	std
656	rep
657	movsb
658	movl	20(%esp),%ecx
659	shrl	$2,%ecx
660	subl	$3,%esi
661	subl	$3,%edi
662	rep
663	movsl
664	popl	%edi
665	popl	%esi
666	cld
667	ret
668#endif /* I586_CPU && defined(DEV_NPX) */
669
670/*
671 * Note: memcpy does not support overlapping copies
672 */
673ENTRY(memcpy)
674	pushl	%edi
675	pushl	%esi
676	movl	12(%esp),%edi
677	movl	16(%esp),%esi
678	movl	20(%esp),%ecx
679	movl	%edi,%eax
680	shrl	$2,%ecx				/* copy by 32-bit words */
681	cld					/* nope, copy forwards */
682	rep
683	movsl
684	movl	20(%esp),%ecx
685	andl	$3,%ecx				/* any bytes left? */
686	rep
687	movsb
688	popl	%esi
689	popl	%edi
690	ret
691
692
693/*****************************************************************************/
694/* copyout and fubyte family                                                 */
695/*****************************************************************************/
696/*
697 * Access user memory from inside the kernel. These routines and possibly
698 * the math- and DOS emulators should be the only places that do this.
699 *
700 * We have to access the memory with user's permissions, so use a segment
701 * selector with RPL 3. For writes to user space we have to additionally
702 * check the PTE for write permission, because the 386 does not check
703 * write permissions when we are executing with EPL 0. The 486 does check
704 * this if the WP bit is set in CR0, so we can use a simpler version here.
705 *
706 * These routines set curpcb->onfault for the time they execute. When a
707 * protection violation occurs inside the functions, the trap handler
708 * returns to *curpcb->onfault instead of the function.
709 */
710
711/*
712 * copyout(from_kernel, to_user, len)  - MP SAFE (if not I386_CPU)
713 */
714ENTRY(copyout)
715	MEXITCOUNT
716	jmp	*copyout_vector
717
718ENTRY(generic_copyout)
719	movl	PCPU(CURPCB),%eax
720	movl	$copyout_fault,PCB_ONFAULT(%eax)
721	pushl	%esi
722	pushl	%edi
723	pushl	%ebx
724	movl	16(%esp),%esi
725	movl	20(%esp),%edi
726	movl	24(%esp),%ebx
727	testl	%ebx,%ebx			/* anything to do? */
728	jz	done_copyout
729
730	/*
731	 * Check explicitly for non-user addresses.  If 486 write protection
732	 * is being used, this check is essential because we are in kernel
733	 * mode so the h/w does not provide any protection against writing
734	 * kernel addresses.
735	 */
736
737	/*
738	 * First, prevent address wrapping.
739	 */
740	movl	%edi,%eax
741	addl	%ebx,%eax
742	jc	copyout_fault
743/*
744 * XXX STOP USING VM_MAXUSER_ADDRESS.
745 * It is an end address, not a max, so every time it is used correctly it
746 * looks like there is an off by one error, and of course it caused an off
747 * by one error in several places.
748 */
749	cmpl	$VM_MAXUSER_ADDRESS,%eax
750	ja	copyout_fault
751
752#ifdef I386_CPU
753
754/*
755 * We have to check each PTE for user write permission.
756 * The checking may cause a page fault, so it is important to set
757 * up everything for return via copyout_fault before here.
758 */
759	/* compute number of pages */
760	movl	%edi,%ecx
761	andl	$PAGE_MASK,%ecx
762	addl	%ebx,%ecx
763	decl	%ecx
764	shrl	$IDXSHIFT+2,%ecx
765	incl	%ecx
766
767	/* compute PTE offset for start address */
768	movl	%edi,%edx
769	shrl	$IDXSHIFT,%edx
770	andb	$0xfc,%dl
771
7721:
773	/* check PTE for each page */
774	leal	PTmap(%edx),%eax
775	shrl	$IDXSHIFT,%eax
776	andb	$0xfc,%al
777	testb	$PG_V,PTmap(%eax)		/* PTE page must be valid */
778	je	4f
779	movb	PTmap(%edx),%al
780	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
781	cmpb	$PG_V|PG_RW|PG_U,%al
782	je	2f
783
7844:
785	/* simulate a trap */
786	pushl	%edx
787	pushl	%ecx
788	shll	$IDXSHIFT,%edx
789	pushl	%edx
790	call	trapwrite			/* trapwrite(addr) */
791	popl	%edx
792	popl	%ecx
793	popl	%edx
794
795	testl	%eax,%eax			/* if not ok, return EFAULT */
796	jnz	copyout_fault
797
7982:
799	addl	$4,%edx
800	decl	%ecx
801	jnz	1b				/* check next page */
802#endif /* I386_CPU */
803
804	/* bcopy(%esi, %edi, %ebx) */
805	movl	%ebx,%ecx
806
807#if defined(I586_CPU) && defined(DEV_NPX)
808	ALIGN_TEXT
809slow_copyout:
810#endif
811	shrl	$2,%ecx
812	cld
813	rep
814	movsl
815	movb	%bl,%cl
816	andb	$3,%cl
817	rep
818	movsb
819
820done_copyout:
821	popl	%ebx
822	popl	%edi
823	popl	%esi
824	xorl	%eax,%eax
825	movl	PCPU(CURPCB),%edx
826	movl	%eax,PCB_ONFAULT(%edx)
827	ret
828
829	ALIGN_TEXT
830copyout_fault:
831	popl	%ebx
832	popl	%edi
833	popl	%esi
834	movl	PCPU(CURPCB),%edx
835	movl	$0,PCB_ONFAULT(%edx)
836	movl	$EFAULT,%eax
837	ret
838
839#if defined(I586_CPU) && defined(DEV_NPX)
840ENTRY(i586_copyout)
841	/*
842	 * Duplicated from generic_copyout.  Could be done a bit better.
843	 */
844	movl	PCPU(CURPCB),%eax
845	movl	$copyout_fault,PCB_ONFAULT(%eax)
846	pushl	%esi
847	pushl	%edi
848	pushl	%ebx
849	movl	16(%esp),%esi
850	movl	20(%esp),%edi
851	movl	24(%esp),%ebx
852	testl	%ebx,%ebx			/* anything to do? */
853	jz	done_copyout
854
855	/*
856	 * Check explicitly for non-user addresses.  If 486 write protection
857	 * is being used, this check is essential because we are in kernel
858	 * mode so the h/w does not provide any protection against writing
859	 * kernel addresses.
860	 */
861
862	/*
863	 * First, prevent address wrapping.
864	 */
865	movl	%edi,%eax
866	addl	%ebx,%eax
867	jc	copyout_fault
868/*
869 * XXX STOP USING VM_MAXUSER_ADDRESS.
870 * It is an end address, not a max, so every time it is used correctly it
871 * looks like there is an off by one error, and of course it caused an off
872 * by one error in several places.
873 */
874	cmpl	$VM_MAXUSER_ADDRESS,%eax
875	ja	copyout_fault
876
877	/* bcopy(%esi, %edi, %ebx) */
8783:
879	movl	%ebx,%ecx
880	/*
881	 * End of duplicated code.
882	 */
883
884	cmpl	$1024,%ecx
885	jb	slow_copyout
886
887	pushl	%ecx
888	call	fastmove
889	addl	$4,%esp
890	jmp	done_copyout
891#endif /* I586_CPU && defined(DEV_NPX) */
892
893/*
894 * copyin(from_user, to_kernel, len) - MP SAFE
895 */
896ENTRY(copyin)
897	MEXITCOUNT
898	jmp	*copyin_vector
899
900ENTRY(generic_copyin)
901	movl	PCPU(CURPCB),%eax
902	movl	$copyin_fault,PCB_ONFAULT(%eax)
903	pushl	%esi
904	pushl	%edi
905	movl	12(%esp),%esi			/* caddr_t from */
906	movl	16(%esp),%edi			/* caddr_t to */
907	movl	20(%esp),%ecx			/* size_t  len */
908
909	/*
910	 * make sure address is valid
911	 */
912	movl	%esi,%edx
913	addl	%ecx,%edx
914	jc	copyin_fault
915	cmpl	$VM_MAXUSER_ADDRESS,%edx
916	ja	copyin_fault
917
918#if defined(I586_CPU) && defined(DEV_NPX)
919	ALIGN_TEXT
920slow_copyin:
921#endif
922	movb	%cl,%al
923	shrl	$2,%ecx				/* copy longword-wise */
924	cld
925	rep
926	movsl
927	movb	%al,%cl
928	andb	$3,%cl				/* copy remaining bytes */
929	rep
930	movsb
931
932#if defined(I586_CPU) && defined(DEV_NPX)
933	ALIGN_TEXT
934done_copyin:
935#endif
936	popl	%edi
937	popl	%esi
938	xorl	%eax,%eax
939	movl	PCPU(CURPCB),%edx
940	movl	%eax,PCB_ONFAULT(%edx)
941	ret
942
943	ALIGN_TEXT
944copyin_fault:
945	popl	%edi
946	popl	%esi
947	movl	PCPU(CURPCB),%edx
948	movl	$0,PCB_ONFAULT(%edx)
949	movl	$EFAULT,%eax
950	ret
951
952#if defined(I586_CPU) && defined(DEV_NPX)
953ENTRY(i586_copyin)
954	/*
955	 * Duplicated from generic_copyin.  Could be done a bit better.
956	 */
957	movl	PCPU(CURPCB),%eax
958	movl	$copyin_fault,PCB_ONFAULT(%eax)
959	pushl	%esi
960	pushl	%edi
961	movl	12(%esp),%esi			/* caddr_t from */
962	movl	16(%esp),%edi			/* caddr_t to */
963	movl	20(%esp),%ecx			/* size_t  len */
964
965	/*
966	 * make sure address is valid
967	 */
968	movl	%esi,%edx
969	addl	%ecx,%edx
970	jc	copyin_fault
971	cmpl	$VM_MAXUSER_ADDRESS,%edx
972	ja	copyin_fault
973	/*
974	 * End of duplicated code.
975	 */
976
977	cmpl	$1024,%ecx
978	jb	slow_copyin
979
980	pushl	%ebx			/* XXX prepare for fastmove_fault */
981	pushl	%ecx
982	call	fastmove
983	addl	$8,%esp
984	jmp	done_copyin
985#endif /* I586_CPU && defined(DEV_NPX) */
986
987#if defined(I586_CPU) && defined(DEV_NPX)
988/* fastmove(src, dst, len)
989	src in %esi
990	dst in %edi
991	len in %ecx		XXX changed to on stack for profiling
992	uses %eax and %edx for tmp. storage
993 */
994/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
995ENTRY(fastmove)
996	pushl	%ebp
997	movl	%esp,%ebp
998	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
999
1000	movl	8(%ebp),%ecx
1001	cmpl	$63,%ecx
1002	jbe	fastmove_tail
1003
1004	testl	$7,%esi	/* check if src addr is multiple of 8 */
1005	jnz	fastmove_tail
1006
1007	testl	$7,%edi	/* check if dst addr is multiple of 8 */
1008	jnz	fastmove_tail
1009
1010	/* XXX grab FPU context atomically. */
1011	cli
1012
1013/* if (fpcurthread != NULL) { */
1014	cmpl	$0,PCPU(FPCURTHREAD)
1015	je	6f
1016/*    fnsave(&curpcb->pcb_savefpu); */
1017	movl	PCPU(CURPCB),%eax
1018	fnsave	PCB_SAVEFPU(%eax)
1019/*   FPCURTHREAD = NULL; */
1020	movl	$0,PCPU(FPCURTHREAD)
1021/* } */
10226:
1023/* now we own the FPU. */
1024
1025/*
1026 * The process' FP state is saved in the pcb, but if we get
1027 * switched, the cpu_switch() will store our FP state in the
1028 * pcb.  It should be possible to avoid all the copying for
1029 * this, e.g., by setting a flag to tell cpu_switch() to
1030 * save the state somewhere else.
1031 */
1032/* tmp = curpcb->pcb_savefpu; */
1033	movl	%ecx,-12(%ebp)
1034	movl	%esi,-8(%ebp)
1035	movl	%edi,-4(%ebp)
1036	movl	%esp,%edi
1037	movl	PCPU(CURPCB),%esi
1038	addl	$PCB_SAVEFPU,%esi
1039	cld
1040	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1041	rep
1042	movsl
1043	movl	-12(%ebp),%ecx
1044	movl	-8(%ebp),%esi
1045	movl	-4(%ebp),%edi
1046/* stop_emulating(); */
1047	clts
1048/* fpcurthread = curthread; */
1049	movl	PCPU(CURTHREAD),%eax
1050	movl	%eax,PCPU(FPCURTHREAD)
1051	movl	PCPU(CURPCB),%eax
1052
1053	/* XXX end of atomic FPU context grab. */
1054	sti
1055
1056	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10574:
1058	movl	%ecx,-12(%ebp)
1059	cmpl	$1792,%ecx
1060	jbe	2f
1061	movl	$1792,%ecx
10622:
1063	subl	%ecx,-12(%ebp)
1064	cmpl	$256,%ecx
1065	jb	5f
1066	movl	%ecx,-8(%ebp)
1067	movl	%esi,-4(%ebp)
1068	ALIGN_TEXT
10693:
1070	movl	0(%esi),%eax
1071	movl	32(%esi),%eax
1072	movl	64(%esi),%eax
1073	movl	96(%esi),%eax
1074	movl	128(%esi),%eax
1075	movl	160(%esi),%eax
1076	movl	192(%esi),%eax
1077	movl	224(%esi),%eax
1078	addl	$256,%esi
1079	subl	$256,%ecx
1080	cmpl	$256,%ecx
1081	jae	3b
1082	movl	-8(%ebp),%ecx
1083	movl	-4(%ebp),%esi
10845:
1085	ALIGN_TEXT
1086fastmove_loop:
1087	fildq	0(%esi)
1088	fildq	8(%esi)
1089	fildq	16(%esi)
1090	fildq	24(%esi)
1091	fildq	32(%esi)
1092	fildq	40(%esi)
1093	fildq	48(%esi)
1094	fildq	56(%esi)
1095	fistpq	56(%edi)
1096	fistpq	48(%edi)
1097	fistpq	40(%edi)
1098	fistpq	32(%edi)
1099	fistpq	24(%edi)
1100	fistpq	16(%edi)
1101	fistpq	8(%edi)
1102	fistpq	0(%edi)
1103	addl	$-64,%ecx
1104	addl	$64,%esi
1105	addl	$64,%edi
1106	cmpl	$63,%ecx
1107	ja	fastmove_loop
1108	movl	-12(%ebp),%eax
1109	addl	%eax,%ecx
1110	cmpl	$64,%ecx
1111	jae	4b
1112
1113	/* XXX ungrab FPU context atomically. */
1114	cli
1115
1116/* curpcb->pcb_savefpu = tmp; */
1117	movl	%ecx,-12(%ebp)
1118	movl	%esi,-8(%ebp)
1119	movl	%edi,-4(%ebp)
1120	movl	PCPU(CURPCB),%edi
1121	addl	$PCB_SAVEFPU,%edi
1122	movl	%esp,%esi
1123	cld
1124	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1125	rep
1126	movsl
1127	movl	-12(%ebp),%ecx
1128	movl	-8(%ebp),%esi
1129	movl	-4(%ebp),%edi
1130
1131/* start_emulating(); */
1132	smsw	%ax
1133	orb	$CR0_TS,%al
1134	lmsw	%ax
1135/* fpcurthread = NULL; */
1136	movl	$0,PCPU(FPCURTHREAD)
1137
1138	/* XXX end of atomic FPU context ungrab. */
1139	sti
1140
1141	ALIGN_TEXT
1142fastmove_tail:
1143	movl	PCPU(CURPCB),%eax
1144	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1145
1146	movb	%cl,%al
1147	shrl	$2,%ecx				/* copy longword-wise */
1148	cld
1149	rep
1150	movsl
1151	movb	%al,%cl
1152	andb	$3,%cl				/* copy remaining bytes */
1153	rep
1154	movsb
1155
1156	movl	%ebp,%esp
1157	popl	%ebp
1158	ret
1159
1160	ALIGN_TEXT
1161fastmove_fault:
1162	/* XXX ungrab FPU context atomically. */
1163	cli
1164
1165	movl	PCPU(CURPCB),%edi
1166	addl	$PCB_SAVEFPU,%edi
1167	movl	%esp,%esi
1168	cld
1169	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1170	rep
1171	movsl
1172
1173	smsw	%ax
1174	orb	$CR0_TS,%al
1175	lmsw	%ax
1176	movl	$0,PCPU(FPCURTHREAD)
1177
1178	/* XXX end of atomic FPU context ungrab. */
1179	sti
1180
1181fastmove_tail_fault:
1182	movl	%ebp,%esp
1183	popl	%ebp
1184	addl	$8,%esp
1185	popl	%ebx
1186	popl	%edi
1187	popl	%esi
1188	movl	PCPU(CURPCB),%edx
1189	movl	$0,PCB_ONFAULT(%edx)
1190	movl	$EFAULT,%eax
1191	ret
1192#endif /* I586_CPU && defined(DEV_NPX) */
1193
1194/*
1195 * casuptr.  Compare and set user pointer.  Returns -1 or the current value.
1196 */
1197ENTRY(casuptr)
1198	movl	PCPU(CURPCB),%ecx
1199	movl	$fusufault,PCB_ONFAULT(%ecx)
1200	movl	4(%esp),%edx			/* dst */
1201	movl	8(%esp),%eax			/* old */
1202	movl	12(%esp),%ecx			/* new */
1203
1204	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1205	ja	fusufault
1206
1207#ifdef SMP
1208	lock
1209#endif
1210	cmpxchgl %ecx, (%edx)			/* Compare and set. */
1211
1212	/*
1213	 * The old value is in %eax.  If the store succeeded it will be the
1214	 * value we expected (old) from before the store, otherwise it will
1215	 * be the current value.
1216	 */
1217
1218	movl	PCPU(CURPCB),%ecx
1219	movl	$fusufault,PCB_ONFAULT(%ecx)
1220	movl	$0,PCB_ONFAULT(%ecx)
1221	ret
1222
1223/*
1224 * fu{byte,sword,word} - MP SAFE
1225 *
1226 *	Fetch a byte (sword, word) from user memory
1227 */
1228ENTRY(fuword)
1229	movl	PCPU(CURPCB),%ecx
1230	movl	$fusufault,PCB_ONFAULT(%ecx)
1231	movl	4(%esp),%edx			/* from */
1232
1233	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1234	ja	fusufault
1235
1236	movl	(%edx),%eax
1237	movl	$0,PCB_ONFAULT(%ecx)
1238	ret
1239
1240ENTRY(fuword32)
1241	jmp	fuword
1242
1243/*
1244 * These two routines are called from the profiling code, potentially
1245 * at interrupt time. If they fail, that's okay, good things will
1246 * happen later. Fail all the time for now - until the trap code is
1247 * able to deal with this.
1248 */
1249ALTENTRY(suswintr)
1250ENTRY(fuswintr)
1251	movl	$-1,%eax
1252	ret
1253
1254/*
1255 * fuword16 - MP SAFE
1256 */
1257ENTRY(fuword16)
1258	movl	PCPU(CURPCB),%ecx
1259	movl	$fusufault,PCB_ONFAULT(%ecx)
1260	movl	4(%esp),%edx
1261
1262	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1263	ja	fusufault
1264
1265	movzwl	(%edx),%eax
1266	movl	$0,PCB_ONFAULT(%ecx)
1267	ret
1268
1269/*
1270 * fubyte - MP SAFE
1271 */
1272ENTRY(fubyte)
1273	movl	PCPU(CURPCB),%ecx
1274	movl	$fusufault,PCB_ONFAULT(%ecx)
1275	movl	4(%esp),%edx
1276
1277	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1278	ja	fusufault
1279
1280	movzbl	(%edx),%eax
1281	movl	$0,PCB_ONFAULT(%ecx)
1282	ret
1283
1284	ALIGN_TEXT
1285fusufault:
1286	movl	PCPU(CURPCB),%ecx
1287	xorl	%eax,%eax
1288	movl	%eax,PCB_ONFAULT(%ecx)
1289	decl	%eax
1290	ret
1291
1292/*
1293 * su{byte,sword,word} - MP SAFE (if not I386_CPU)
1294 *
1295 *	Write a byte (word, longword) to user memory
1296 */
1297ENTRY(suword)
1298	movl	PCPU(CURPCB),%ecx
1299	movl	$fusufault,PCB_ONFAULT(%ecx)
1300	movl	4(%esp),%edx
1301
1302#ifdef I386_CPU
1303
1304	/* XXX - page boundary crossing is still not handled */
1305	movl	%edx,%eax
1306	shrl	$IDXSHIFT,%edx
1307	andb	$0xfc,%dl
1308
1309	leal	PTmap(%edx),%ecx
1310	shrl	$IDXSHIFT,%ecx
1311	andb	$0xfc,%cl
1312	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1313	je	4f
1314	movb	PTmap(%edx),%dl
1315	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1316	cmpb	$PG_V|PG_RW|PG_U,%dl
1317	je	1f
1318
13194:
1320	/* simulate a trap */
1321	pushl	%eax
1322	call	trapwrite
1323	popl	%edx				/* remove junk parameter from stack */
1324	testl	%eax,%eax
1325	jnz	fusufault
13261:
1327	movl	4(%esp),%edx
1328#endif
1329
1330	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1331	ja	fusufault
1332
1333	movl	8(%esp),%eax
1334	movl	%eax,(%edx)
1335	xorl	%eax,%eax
1336	movl	PCPU(CURPCB),%ecx
1337	movl	%eax,PCB_ONFAULT(%ecx)
1338	ret
1339
1340ENTRY(suword32)
1341	jmp	suword
1342
1343/*
1344 * suword16 - MP SAFE (if not I386_CPU)
1345 */
1346ENTRY(suword16)
1347	movl	PCPU(CURPCB),%ecx
1348	movl	$fusufault,PCB_ONFAULT(%ecx)
1349	movl	4(%esp),%edx
1350
1351#ifdef I386_CPU
1352
1353	/* XXX - page boundary crossing is still not handled */
1354	movl	%edx,%eax
1355	shrl	$IDXSHIFT,%edx
1356	andb	$0xfc,%dl
1357
1358	leal	PTmap(%edx),%ecx
1359	shrl	$IDXSHIFT,%ecx
1360	andb	$0xfc,%cl
1361	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1362	je	4f
1363	movb	PTmap(%edx),%dl
1364	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1365	cmpb	$PG_V|PG_RW|PG_U,%dl
1366	je	1f
1367
13684:
1369	/* simulate a trap */
1370	pushl	%eax
1371	call	trapwrite
1372	popl	%edx				/* remove junk parameter from stack */
1373	testl	%eax,%eax
1374	jnz	fusufault
13751:
1376	movl	4(%esp),%edx
1377#endif
1378
1379	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1380	ja	fusufault
1381
1382	movw	8(%esp),%ax
1383	movw	%ax,(%edx)
1384	xorl	%eax,%eax
1385	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1386	movl	%eax,PCB_ONFAULT(%ecx)
1387	ret
1388
1389/*
1390 * subyte - MP SAFE (if not I386_CPU)
1391 */
1392ENTRY(subyte)
1393	movl	PCPU(CURPCB),%ecx
1394	movl	$fusufault,PCB_ONFAULT(%ecx)
1395	movl	4(%esp),%edx
1396
1397#ifdef I386_CPU
1398
1399	movl	%edx,%eax
1400	shrl	$IDXSHIFT,%edx
1401	andb	$0xfc,%dl
1402
1403	leal	PTmap(%edx),%ecx
1404	shrl	$IDXSHIFT,%ecx
1405	andb	$0xfc,%cl
1406	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1407	je	4f
1408	movb	PTmap(%edx),%dl
1409	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1410	cmpb	$PG_V|PG_RW|PG_U,%dl
1411	je	1f
1412
14134:
1414	/* simulate a trap */
1415	pushl	%eax
1416	call	trapwrite
1417	popl	%edx				/* remove junk parameter from stack */
1418	testl	%eax,%eax
1419	jnz	fusufault
14201:
1421	movl	4(%esp),%edx
1422#endif
1423
1424	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1425	ja	fusufault
1426
1427	movb	8(%esp),%al
1428	movb	%al,(%edx)
1429	xorl	%eax,%eax
1430	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1431	movl	%eax,PCB_ONFAULT(%ecx)
1432	ret
1433
1434/*
1435 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
1436 *
1437 *	copy a string from from to to, stop when a 0 character is reached.
1438 *	return ENAMETOOLONG if string is longer than maxlen, and
1439 *	EFAULT on protection violations. If lencopied is non-zero,
1440 *	return the actual length in *lencopied.
1441 */
1442ENTRY(copyinstr)
1443	pushl	%esi
1444	pushl	%edi
1445	movl	PCPU(CURPCB),%ecx
1446	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1447
1448	movl	12(%esp),%esi			/* %esi = from */
1449	movl	16(%esp),%edi			/* %edi = to */
1450	movl	20(%esp),%edx			/* %edx = maxlen */
1451
1452	movl	$VM_MAXUSER_ADDRESS,%eax
1453
1454	/* make sure 'from' is within bounds */
1455	subl	%esi,%eax
1456	jbe	cpystrflt
1457
1458	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1459	cmpl	%edx,%eax
1460	jae	1f
1461	movl	%eax,%edx
1462	movl	%eax,20(%esp)
14631:
1464	incl	%edx
1465	cld
1466
14672:
1468	decl	%edx
1469	jz	3f
1470
1471	lodsb
1472	stosb
1473	orb	%al,%al
1474	jnz	2b
1475
1476	/* Success -- 0 byte reached */
1477	decl	%edx
1478	xorl	%eax,%eax
1479	jmp	cpystrflt_x
14803:
1481	/* edx is zero - return ENAMETOOLONG or EFAULT */
1482	cmpl	$VM_MAXUSER_ADDRESS,%esi
1483	jae	cpystrflt
14844:
1485	movl	$ENAMETOOLONG,%eax
1486	jmp	cpystrflt_x
1487
1488cpystrflt:
1489	movl	$EFAULT,%eax
1490
1491cpystrflt_x:
1492	/* set *lencopied and return %eax */
1493	movl	PCPU(CURPCB),%ecx
1494	movl	$0,PCB_ONFAULT(%ecx)
1495	movl	20(%esp),%ecx
1496	subl	%edx,%ecx
1497	movl	24(%esp),%edx
1498	testl	%edx,%edx
1499	jz	1f
1500	movl	%ecx,(%edx)
15011:
1502	popl	%edi
1503	popl	%esi
1504	ret
1505
1506
1507/*
1508 * copystr(from, to, maxlen, int *lencopied) - MP SAFE
1509 */
1510ENTRY(copystr)
1511	pushl	%esi
1512	pushl	%edi
1513
1514	movl	12(%esp),%esi			/* %esi = from */
1515	movl	16(%esp),%edi			/* %edi = to */
1516	movl	20(%esp),%edx			/* %edx = maxlen */
1517	incl	%edx
1518	cld
15191:
1520	decl	%edx
1521	jz	4f
1522	lodsb
1523	stosb
1524	orb	%al,%al
1525	jnz	1b
1526
1527	/* Success -- 0 byte reached */
1528	decl	%edx
1529	xorl	%eax,%eax
1530	jmp	6f
15314:
1532	/* edx is zero -- return ENAMETOOLONG */
1533	movl	$ENAMETOOLONG,%eax
1534
15356:
1536	/* set *lencopied and return %eax */
1537	movl	20(%esp),%ecx
1538	subl	%edx,%ecx
1539	movl	24(%esp),%edx
1540	testl	%edx,%edx
1541	jz	7f
1542	movl	%ecx,(%edx)
15437:
1544	popl	%edi
1545	popl	%esi
1546	ret
1547
1548ENTRY(bcmp)
1549	pushl	%edi
1550	pushl	%esi
1551	movl	12(%esp),%edi
1552	movl	16(%esp),%esi
1553	movl	20(%esp),%edx
1554	xorl	%eax,%eax
1555
1556	movl	%edx,%ecx
1557	shrl	$2,%ecx
1558	cld					/* compare forwards */
1559	repe
1560	cmpsl
1561	jne	1f
1562
1563	movl	%edx,%ecx
1564	andl	$3,%ecx
1565	repe
1566	cmpsb
1567	je	2f
15681:
1569	incl	%eax
15702:
1571	popl	%esi
1572	popl	%edi
1573	ret
1574
1575
1576/*
1577 * Handling of special 386 registers and descriptor tables etc
1578 */
1579/* void lgdt(struct region_descriptor *rdp); */
1580ENTRY(lgdt)
1581	/* reload the descriptor table */
1582	movl	4(%esp),%eax
1583	lgdt	(%eax)
1584
1585	/* flush the prefetch q */
1586	jmp	1f
1587	nop
15881:
1589	/* reload "stale" selectors */
1590	movl	$KDSEL,%eax
1591	mov	%ax,%ds
1592	mov	%ax,%es
1593	mov	%ax,%gs
1594	mov	%ax,%ss
1595	movl	$KPSEL,%eax
1596	mov	%ax,%fs
1597
1598	/* reload code selector by turning return into intersegmental return */
1599	movl	(%esp),%eax
1600	pushl	%eax
1601	movl	$KCSEL,4(%esp)
1602	lret
1603
1604/* ssdtosd(*ssdp,*sdp) */
1605ENTRY(ssdtosd)
1606	pushl	%ebx
1607	movl	8(%esp),%ecx
1608	movl	8(%ecx),%ebx
1609	shll	$16,%ebx
1610	movl	(%ecx),%edx
1611	roll	$16,%edx
1612	movb	%dh,%bl
1613	movb	%dl,%bh
1614	rorl	$8,%ebx
1615	movl	4(%ecx),%eax
1616	movw	%ax,%dx
1617	andl	$0xf0000,%eax
1618	orl	%eax,%ebx
1619	movl	12(%esp),%ecx
1620	movl	%edx,(%ecx)
1621	movl	%ebx,4(%ecx)
1622	popl	%ebx
1623	ret
1624
1625/* void reset_dbregs() */
1626ENTRY(reset_dbregs)
1627	movl    $0,%eax
1628	movl    %eax,%dr7     /* disable all breapoints first */
1629	movl    %eax,%dr0
1630	movl    %eax,%dr1
1631	movl    %eax,%dr2
1632	movl    %eax,%dr3
1633	movl    %eax,%dr6
1634	ret
1635
1636/*****************************************************************************/
1637/* setjump, longjump                                                         */
1638/*****************************************************************************/
1639
1640ENTRY(setjmp)
1641	movl	4(%esp),%eax
1642	movl	%ebx,(%eax)			/* save ebx */
1643	movl	%esp,4(%eax)			/* save esp */
1644	movl	%ebp,8(%eax)			/* save ebp */
1645	movl	%esi,12(%eax)			/* save esi */
1646	movl	%edi,16(%eax)			/* save edi */
1647	movl	(%esp),%edx			/* get rta */
1648	movl	%edx,20(%eax)			/* save eip */
1649	xorl	%eax,%eax			/* return(0); */
1650	ret
1651
1652ENTRY(longjmp)
1653	movl	4(%esp),%eax
1654	movl	(%eax),%ebx			/* restore ebx */
1655	movl	4(%eax),%esp			/* restore esp */
1656	movl	8(%eax),%ebp			/* restore ebp */
1657	movl	12(%eax),%esi			/* restore esi */
1658	movl	16(%eax),%edi			/* restore edi */
1659	movl	20(%eax),%edx			/* get rta */
1660	movl	%edx,(%esp)			/* put in return frame */
1661	xorl	%eax,%eax			/* return(1); */
1662	incl	%eax
1663	ret
1664
1665/*
1666 * Support for BB-profiling (gcc -a).  The kernbb program will extract
1667 * the data from the kernel.
1668 */
1669
1670	.data
1671	ALIGN_DATA
1672	.globl bbhead
1673bbhead:
1674	.long 0
1675
1676	.text
1677NON_GPROF_ENTRY(__bb_init_func)
1678	movl	4(%esp),%eax
1679	movl	$1,(%eax)
1680	movl	bbhead,%edx
1681	movl	%edx,16(%eax)
1682	movl	%eax,bbhead
1683	NON_GPROF_RET
1684