support.s revision 121983
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD: head/sys/i386/i386/support.s 121983 2003-11-03 21:28:54Z jhb $
34 */
35
36#include "opt_npx.h"
37
38#include <machine/asmacros.h>
39#include <machine/cputypes.h>
40#include <machine/intr_machdep.h>
41#include <machine/pmap.h>
42#include <machine/specialreg.h>
43
44#include "assym.s"
45
46#define IDXSHIFT	10
47
48	.data
49	.globl	bcopy_vector
50bcopy_vector:
51	.long	generic_bcopy
52	.globl	bzero_vector
53bzero_vector:
54	.long	generic_bzero
55	.globl	copyin_vector
56copyin_vector:
57	.long	generic_copyin
58	.globl	copyout_vector
59copyout_vector:
60	.long	generic_copyout
61#if defined(I586_CPU) && defined(DEV_NPX)
62kernel_fpu_lock:
63	.byte	0xfe
64	.space	3
65#endif
66	ALIGN_DATA
67	.globl	intrcnt, eintrcnt
68intrcnt:
69	.space	INTRCNT_COUNT * 4
70eintrcnt:
71
72	.globl	intrnames, eintrnames
73intrnames:
74	.space	INTRCNT_COUNT * (MAXCOMLEN + 1)
75eintrnames:
76
77	.text
78
79/*
80 * bcopy family
81 * void bzero(void *buf, u_int len)
82 */
83
84ENTRY(bzero)
85	MEXITCOUNT
86	jmp	*bzero_vector
87
88ENTRY(generic_bzero)
89	pushl	%edi
90	movl	8(%esp),%edi
91	movl	12(%esp),%ecx
92	xorl	%eax,%eax
93	shrl	$2,%ecx
94	cld
95	rep
96	stosl
97	movl	12(%esp),%ecx
98	andl	$3,%ecx
99	rep
100	stosb
101	popl	%edi
102	ret
103
104#ifdef I486_CPU
105ENTRY(i486_bzero)
106	movl	4(%esp),%edx
107	movl	8(%esp),%ecx
108	xorl	%eax,%eax
109/*
110 * do 64 byte chunks first
111 *
112 * XXX this is probably over-unrolled at least for DX2's
113 */
1142:
115	cmpl	$64,%ecx
116	jb	3f
117	movl	%eax,(%edx)
118	movl	%eax,4(%edx)
119	movl	%eax,8(%edx)
120	movl	%eax,12(%edx)
121	movl	%eax,16(%edx)
122	movl	%eax,20(%edx)
123	movl	%eax,24(%edx)
124	movl	%eax,28(%edx)
125	movl	%eax,32(%edx)
126	movl	%eax,36(%edx)
127	movl	%eax,40(%edx)
128	movl	%eax,44(%edx)
129	movl	%eax,48(%edx)
130	movl	%eax,52(%edx)
131	movl	%eax,56(%edx)
132	movl	%eax,60(%edx)
133	addl	$64,%edx
134	subl	$64,%ecx
135	jnz	2b
136	ret
137
138/*
139 * do 16 byte chunks
140 */
141	SUPERALIGN_TEXT
1423:
143	cmpl	$16,%ecx
144	jb	4f
145	movl	%eax,(%edx)
146	movl	%eax,4(%edx)
147	movl	%eax,8(%edx)
148	movl	%eax,12(%edx)
149	addl	$16,%edx
150	subl	$16,%ecx
151	jnz	3b
152	ret
153
154/*
155 * do 4 byte chunks
156 */
157	SUPERALIGN_TEXT
1584:
159	cmpl	$4,%ecx
160	jb	5f
161	movl	%eax,(%edx)
162	addl	$4,%edx
163	subl	$4,%ecx
164	jnz	4b
165	ret
166
167/*
168 * do 1 byte chunks
169 * a jump table seems to be faster than a loop or more range reductions
170 *
171 * XXX need a const section for non-text
172 */
173	.data
174jtab:
175	.long	do0
176	.long	do1
177	.long	do2
178	.long	do3
179
180	.text
181	SUPERALIGN_TEXT
1825:
183	jmp	*jtab(,%ecx,4)
184
185	SUPERALIGN_TEXT
186do3:
187	movw	%ax,(%edx)
188	movb	%al,2(%edx)
189	ret
190
191	SUPERALIGN_TEXT
192do2:
193	movw	%ax,(%edx)
194	ret
195
196	SUPERALIGN_TEXT
197do1:
198	movb	%al,(%edx)
199	ret
200
201	SUPERALIGN_TEXT
202do0:
203	ret
204#endif
205
206#if defined(I586_CPU) && defined(DEV_NPX)
207ENTRY(i586_bzero)
208	movl	4(%esp),%edx
209	movl	8(%esp),%ecx
210
211	/*
212	 * The FPU register method is twice as fast as the integer register
213	 * method unless the target is in the L1 cache and we pre-allocate a
214	 * cache line for it (then the integer register method is 4-5 times
215	 * faster).  However, we never pre-allocate cache lines, since that
216	 * would make the integer method 25% or more slower for the common
217	 * case when the target isn't in either the L1 cache or the L2 cache.
218	 * Thus we normally use the FPU register method unless the overhead
219	 * would be too large.
220	 */
221	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
222	jb	intreg_i586_bzero
223
224	/*
225	 * The FPU registers may belong to an application or to fastmove()
226	 * or to another invocation of bcopy() or ourself in a higher level
227	 * interrupt or trap handler.  Preserving the registers is
228	 * complicated since we avoid it if possible at all levels.  We
229	 * want to localize the complications even when that increases them.
230	 * Here the extra work involves preserving CR0_TS in TS.
231	 * `fpcurthread != NULL' is supposed to be the condition that all the
232	 * FPU resources belong to an application, but fpcurthread and CR0_TS
233	 * aren't set atomically enough for this condition to work in
234	 * interrupt handlers.
235	 *
236	 * Case 1: FPU registers belong to the application: we must preserve
237	 * the registers if we use them, so we only use the FPU register
238	 * method if the target size is large enough to amortize the extra
239	 * overhead for preserving them.  CR0_TS must be preserved although
240	 * it is very likely to end up as set.
241	 *
242	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
243	 * makes the registers look like they belong to an application so
244	 * that cpu_switch() and savectx() don't have to know about it, so
245	 * this case reduces to case 1.
246	 *
247	 * Case 3: FPU registers belong to the kernel: don't use the FPU
248	 * register method.  This case is unlikely, and supporting it would
249	 * be more complicated and might take too much stack.
250	 *
251	 * Case 4: FPU registers don't belong to anyone: the FPU registers
252	 * don't need to be preserved, so we always use the FPU register
253	 * method.  CR0_TS must be preserved although it is very likely to
254	 * always end up as clear.
255	 */
256	cmpl	$0,PCPU(FPCURTHREAD)
257	je	i586_bz1
258
259	/*
260	 * XXX don't use the FPU for cases 1 and 2, since preemptive
261	 * scheduling of ithreads broke these cases.  Note that we can
262	 * no longer get here from an interrupt handler, since the
263	 * context sitch to the interrupt handler will have saved the
264	 * FPU state.
265	 */
266	jmp	intreg_i586_bzero
267
268	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
269	jb	intreg_i586_bzero
270	sarb	$1,kernel_fpu_lock
271	jc	intreg_i586_bzero
272	smsw	%ax
273	clts
274	subl	$108,%esp
275	fnsave	0(%esp)
276	jmp	i586_bz2
277
278i586_bz1:
279	sarb	$1,kernel_fpu_lock
280	jc	intreg_i586_bzero
281	smsw	%ax
282	clts
283	fninit				/* XXX should avoid needing this */
284i586_bz2:
285	fldz
286
287	/*
288	 * Align to an 8 byte boundary (misalignment in the main loop would
289	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
290	 * already aligned) by always zeroing 8 bytes and using the part up
291	 * to the _next_ alignment position.
292	 */
293	fstl	0(%edx)
294	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
295	addl	$8,%edx
296	andl	$~7,%edx
297	subl	%edx,%ecx
298
299	/*
300	 * Similarly align `len' to a multiple of 8.
301	 */
302	fstl	-8(%edx,%ecx)
303	decl	%ecx
304	andl	$~7,%ecx
305
306	/*
307	 * This wouldn't be any faster if it were unrolled, since the loop
308	 * control instructions are much faster than the fstl and/or done
309	 * in parallel with it so their overhead is insignificant.
310	 */
311fpureg_i586_bzero_loop:
312	fstl	0(%edx)
313	addl	$8,%edx
314	subl	$8,%ecx
315	cmpl	$8,%ecx
316	jae	fpureg_i586_bzero_loop
317
318	cmpl	$0,PCPU(FPCURTHREAD)
319	je	i586_bz3
320
321	/* XXX check that the condition for cases 1-2 stayed false. */
322i586_bzero_oops:
323	int	$3
324	jmp	i586_bzero_oops
325
326	frstor	0(%esp)
327	addl	$108,%esp
328	lmsw	%ax
329	movb	$0xfe,kernel_fpu_lock
330	ret
331
332i586_bz3:
333	fstp	%st(0)
334	lmsw	%ax
335	movb	$0xfe,kernel_fpu_lock
336	ret
337
338intreg_i586_bzero:
339	/*
340	 * `rep stos' seems to be the best method in practice for small
341	 * counts.  Fancy methods usually take too long to start up due
342	 * to cache and BTB misses.
343	 */
344	pushl	%edi
345	movl	%edx,%edi
346	xorl	%eax,%eax
347	shrl	$2,%ecx
348	cld
349	rep
350	stosl
351	movl	12(%esp),%ecx
352	andl	$3,%ecx
353	jne	1f
354	popl	%edi
355	ret
356
3571:
358	rep
359	stosb
360	popl	%edi
361	ret
362#endif /* I586_CPU && defined(DEV_NPX) */
363
364ENTRY(sse2_pagezero)
365	pushl	%ebx
366	movl	8(%esp),%ecx
367	movl	%ecx,%eax
368	addl	$4096,%eax
369	xor	%ebx,%ebx
3701:
371	movnti	%ebx,(%ecx)
372	addl	$4,%ecx
373	cmpl	%ecx,%eax
374	jne	1b
375	sfence
376	popl	%ebx
377	ret
378
379ENTRY(i686_pagezero)
380	pushl	%edi
381	pushl	%ebx
382
383	movl	12(%esp), %edi
384	movl	$1024, %ecx
385	cld
386
387	ALIGN_TEXT
3881:
389	xorl	%eax, %eax
390	repe
391	scasl
392	jnz	2f
393
394	popl	%ebx
395	popl	%edi
396	ret
397
398	ALIGN_TEXT
399
4002:
401	incl	%ecx
402	subl	$4, %edi
403
404	movl	%ecx, %edx
405	cmpl	$16, %ecx
406
407	jge	3f
408
409	movl	%edi, %ebx
410	andl	$0x3f, %ebx
411	shrl	%ebx
412	shrl	%ebx
413	movl	$16, %ecx
414	subl	%ebx, %ecx
415
4163:
417	subl	%ecx, %edx
418	rep
419	stosl
420
421	movl	%edx, %ecx
422	testl	%edx, %edx
423	jnz	1b
424
425	popl	%ebx
426	popl	%edi
427	ret
428
429/* fillw(pat, base, cnt) */
430ENTRY(fillw)
431	pushl	%edi
432	movl	8(%esp),%eax
433	movl	12(%esp),%edi
434	movl	16(%esp),%ecx
435	cld
436	rep
437	stosw
438	popl	%edi
439	ret
440
441ENTRY(bcopyb)
442	pushl	%esi
443	pushl	%edi
444	movl	12(%esp),%esi
445	movl	16(%esp),%edi
446	movl	20(%esp),%ecx
447	movl	%edi,%eax
448	subl	%esi,%eax
449	cmpl	%ecx,%eax			/* overlapping && src < dst? */
450	jb	1f
451	cld					/* nope, copy forwards */
452	rep
453	movsb
454	popl	%edi
455	popl	%esi
456	ret
457
458	ALIGN_TEXT
4591:
460	addl	%ecx,%edi			/* copy backwards. */
461	addl	%ecx,%esi
462	decl	%edi
463	decl	%esi
464	std
465	rep
466	movsb
467	popl	%edi
468	popl	%esi
469	cld
470	ret
471
472ENTRY(bcopy)
473	MEXITCOUNT
474	jmp	*bcopy_vector
475
476/*
477 * generic_bcopy(src, dst, cnt)
478 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
479 */
480ENTRY(generic_bcopy)
481	pushl	%esi
482	pushl	%edi
483	movl	12(%esp),%esi
484	movl	16(%esp),%edi
485	movl	20(%esp),%ecx
486
487	movl	%edi,%eax
488	subl	%esi,%eax
489	cmpl	%ecx,%eax			/* overlapping && src < dst? */
490	jb	1f
491
492	shrl	$2,%ecx				/* copy by 32-bit words */
493	cld					/* nope, copy forwards */
494	rep
495	movsl
496	movl	20(%esp),%ecx
497	andl	$3,%ecx				/* any bytes left? */
498	rep
499	movsb
500	popl	%edi
501	popl	%esi
502	ret
503
504	ALIGN_TEXT
5051:
506	addl	%ecx,%edi			/* copy backwards */
507	addl	%ecx,%esi
508	decl	%edi
509	decl	%esi
510	andl	$3,%ecx				/* any fractional bytes? */
511	std
512	rep
513	movsb
514	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
515	shrl	$2,%ecx
516	subl	$3,%esi
517	subl	$3,%edi
518	rep
519	movsl
520	popl	%edi
521	popl	%esi
522	cld
523	ret
524
525#if defined(I586_CPU) && defined(DEV_NPX)
526ENTRY(i586_bcopy)
527	pushl	%esi
528	pushl	%edi
529	movl	12(%esp),%esi
530	movl	16(%esp),%edi
531	movl	20(%esp),%ecx
532
533	movl	%edi,%eax
534	subl	%esi,%eax
535	cmpl	%ecx,%eax			/* overlapping && src < dst? */
536	jb	1f
537
538	cmpl	$1024,%ecx
539	jb	small_i586_bcopy
540
541	sarb	$1,kernel_fpu_lock
542	jc	small_i586_bcopy
543	cmpl	$0,PCPU(FPCURTHREAD)
544	je	i586_bc1
545
546	/* XXX turn off handling of cases 1-2, as above. */
547	movb	$0xfe,kernel_fpu_lock
548	jmp	small_i586_bcopy
549
550	smsw	%dx
551	clts
552	subl	$108,%esp
553	fnsave	0(%esp)
554	jmp	4f
555
556i586_bc1:
557	smsw	%dx
558	clts
559	fninit				/* XXX should avoid needing this */
560
561	ALIGN_TEXT
5624:
563	pushl	%ecx
564#define	DCACHE_SIZE	8192
565	cmpl	$(DCACHE_SIZE-512)/2,%ecx
566	jbe	2f
567	movl	$(DCACHE_SIZE-512)/2,%ecx
5682:
569	subl	%ecx,0(%esp)
570	cmpl	$256,%ecx
571	jb	5f			/* XXX should prefetch if %ecx >= 32 */
572	pushl	%esi
573	pushl	%ecx
574	ALIGN_TEXT
5753:
576	movl	0(%esi),%eax
577	movl	32(%esi),%eax
578	movl	64(%esi),%eax
579	movl	96(%esi),%eax
580	movl	128(%esi),%eax
581	movl	160(%esi),%eax
582	movl	192(%esi),%eax
583	movl	224(%esi),%eax
584	addl	$256,%esi
585	subl	$256,%ecx
586	cmpl	$256,%ecx
587	jae	3b
588	popl	%ecx
589	popl	%esi
5905:
591	ALIGN_TEXT
592large_i586_bcopy_loop:
593	fildq	0(%esi)
594	fildq	8(%esi)
595	fildq	16(%esi)
596	fildq	24(%esi)
597	fildq	32(%esi)
598	fildq	40(%esi)
599	fildq	48(%esi)
600	fildq	56(%esi)
601	fistpq	56(%edi)
602	fistpq	48(%edi)
603	fistpq	40(%edi)
604	fistpq	32(%edi)
605	fistpq	24(%edi)
606	fistpq	16(%edi)
607	fistpq	8(%edi)
608	fistpq	0(%edi)
609	addl	$64,%esi
610	addl	$64,%edi
611	subl	$64,%ecx
612	cmpl	$64,%ecx
613	jae	large_i586_bcopy_loop
614	popl	%eax
615	addl	%eax,%ecx
616	cmpl	$64,%ecx
617	jae	4b
618
619	cmpl	$0,PCPU(FPCURTHREAD)
620	je	i586_bc2
621
622	/* XXX check that the condition for cases 1-2 stayed false. */
623i586_bcopy_oops:
624	int	$3
625	jmp	i586_bcopy_oops
626
627	frstor	0(%esp)
628	addl	$108,%esp
629i586_bc2:
630	lmsw	%dx
631	movb	$0xfe,kernel_fpu_lock
632
633/*
634 * This is a duplicate of the main part of generic_bcopy.  See the comments
635 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
636 * would mess up high resolution profiling.
637 */
638	ALIGN_TEXT
639small_i586_bcopy:
640	shrl	$2,%ecx
641	cld
642	rep
643	movsl
644	movl	20(%esp),%ecx
645	andl	$3,%ecx
646	rep
647	movsb
648	popl	%edi
649	popl	%esi
650	ret
651
652	ALIGN_TEXT
6531:
654	addl	%ecx,%edi
655	addl	%ecx,%esi
656	decl	%edi
657	decl	%esi
658	andl	$3,%ecx
659	std
660	rep
661	movsb
662	movl	20(%esp),%ecx
663	shrl	$2,%ecx
664	subl	$3,%esi
665	subl	$3,%edi
666	rep
667	movsl
668	popl	%edi
669	popl	%esi
670	cld
671	ret
672#endif /* I586_CPU && defined(DEV_NPX) */
673
674/*
675 * Note: memcpy does not support overlapping copies
676 */
677ENTRY(memcpy)
678	pushl	%edi
679	pushl	%esi
680	movl	12(%esp),%edi
681	movl	16(%esp),%esi
682	movl	20(%esp),%ecx
683	movl	%edi,%eax
684	shrl	$2,%ecx				/* copy by 32-bit words */
685	cld					/* nope, copy forwards */
686	rep
687	movsl
688	movl	20(%esp),%ecx
689	andl	$3,%ecx				/* any bytes left? */
690	rep
691	movsb
692	popl	%esi
693	popl	%edi
694	ret
695
696
697/*****************************************************************************/
698/* copyout and fubyte family                                                 */
699/*****************************************************************************/
700/*
701 * Access user memory from inside the kernel. These routines and possibly
702 * the math- and DOS emulators should be the only places that do this.
703 *
704 * We have to access the memory with user's permissions, so use a segment
705 * selector with RPL 3. For writes to user space we have to additionally
706 * check the PTE for write permission, because the 386 does not check
707 * write permissions when we are executing with EPL 0. The 486 does check
708 * this if the WP bit is set in CR0, so we can use a simpler version here.
709 *
710 * These routines set curpcb->onfault for the time they execute. When a
711 * protection violation occurs inside the functions, the trap handler
712 * returns to *curpcb->onfault instead of the function.
713 */
714
715/*
716 * copyout(from_kernel, to_user, len)  - MP SAFE (if not I386_CPU)
717 */
718ENTRY(copyout)
719	MEXITCOUNT
720	jmp	*copyout_vector
721
722ENTRY(generic_copyout)
723	movl	PCPU(CURPCB),%eax
724	movl	$copyout_fault,PCB_ONFAULT(%eax)
725	pushl	%esi
726	pushl	%edi
727	pushl	%ebx
728	movl	16(%esp),%esi
729	movl	20(%esp),%edi
730	movl	24(%esp),%ebx
731	testl	%ebx,%ebx			/* anything to do? */
732	jz	done_copyout
733
734	/*
735	 * Check explicitly for non-user addresses.  If 486 write protection
736	 * is being used, this check is essential because we are in kernel
737	 * mode so the h/w does not provide any protection against writing
738	 * kernel addresses.
739	 */
740
741	/*
742	 * First, prevent address wrapping.
743	 */
744	movl	%edi,%eax
745	addl	%ebx,%eax
746	jc	copyout_fault
747/*
748 * XXX STOP USING VM_MAXUSER_ADDRESS.
749 * It is an end address, not a max, so every time it is used correctly it
750 * looks like there is an off by one error, and of course it caused an off
751 * by one error in several places.
752 */
753	cmpl	$VM_MAXUSER_ADDRESS,%eax
754	ja	copyout_fault
755
756#ifdef I386_CPU
757
758/*
759 * We have to check each PTE for user write permission.
760 * The checking may cause a page fault, so it is important to set
761 * up everything for return via copyout_fault before here.
762 */
763	/* compute number of pages */
764	movl	%edi,%ecx
765	andl	$PAGE_MASK,%ecx
766	addl	%ebx,%ecx
767	decl	%ecx
768	shrl	$IDXSHIFT+2,%ecx
769	incl	%ecx
770
771	/* compute PTE offset for start address */
772	movl	%edi,%edx
773	shrl	$IDXSHIFT,%edx
774	andb	$0xfc,%dl
775
7761:
777	/* check PTE for each page */
778	leal	PTmap(%edx),%eax
779	shrl	$IDXSHIFT,%eax
780	andb	$0xfc,%al
781	testb	$PG_V,PTmap(%eax)		/* PTE page must be valid */
782	je	4f
783	movb	PTmap(%edx),%al
784	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
785	cmpb	$PG_V|PG_RW|PG_U,%al
786	je	2f
787
7884:
789	/* simulate a trap */
790	pushl	%edx
791	pushl	%ecx
792	shll	$IDXSHIFT,%edx
793	pushl	%edx
794	call	trapwrite			/* trapwrite(addr) */
795	popl	%edx
796	popl	%ecx
797	popl	%edx
798
799	testl	%eax,%eax			/* if not ok, return EFAULT */
800	jnz	copyout_fault
801
8022:
803	addl	$4,%edx
804	decl	%ecx
805	jnz	1b				/* check next page */
806#endif /* I386_CPU */
807
808	/* bcopy(%esi, %edi, %ebx) */
809	movl	%ebx,%ecx
810
811#if defined(I586_CPU) && defined(DEV_NPX)
812	ALIGN_TEXT
813slow_copyout:
814#endif
815	shrl	$2,%ecx
816	cld
817	rep
818	movsl
819	movb	%bl,%cl
820	andb	$3,%cl
821	rep
822	movsb
823
824done_copyout:
825	popl	%ebx
826	popl	%edi
827	popl	%esi
828	xorl	%eax,%eax
829	movl	PCPU(CURPCB),%edx
830	movl	%eax,PCB_ONFAULT(%edx)
831	ret
832
833	ALIGN_TEXT
834copyout_fault:
835	popl	%ebx
836	popl	%edi
837	popl	%esi
838	movl	PCPU(CURPCB),%edx
839	movl	$0,PCB_ONFAULT(%edx)
840	movl	$EFAULT,%eax
841	ret
842
843#if defined(I586_CPU) && defined(DEV_NPX)
844ENTRY(i586_copyout)
845	/*
846	 * Duplicated from generic_copyout.  Could be done a bit better.
847	 */
848	movl	PCPU(CURPCB),%eax
849	movl	$copyout_fault,PCB_ONFAULT(%eax)
850	pushl	%esi
851	pushl	%edi
852	pushl	%ebx
853	movl	16(%esp),%esi
854	movl	20(%esp),%edi
855	movl	24(%esp),%ebx
856	testl	%ebx,%ebx			/* anything to do? */
857	jz	done_copyout
858
859	/*
860	 * Check explicitly for non-user addresses.  If 486 write protection
861	 * is being used, this check is essential because we are in kernel
862	 * mode so the h/w does not provide any protection against writing
863	 * kernel addresses.
864	 */
865
866	/*
867	 * First, prevent address wrapping.
868	 */
869	movl	%edi,%eax
870	addl	%ebx,%eax
871	jc	copyout_fault
872/*
873 * XXX STOP USING VM_MAXUSER_ADDRESS.
874 * It is an end address, not a max, so every time it is used correctly it
875 * looks like there is an off by one error, and of course it caused an off
876 * by one error in several places.
877 */
878	cmpl	$VM_MAXUSER_ADDRESS,%eax
879	ja	copyout_fault
880
881	/* bcopy(%esi, %edi, %ebx) */
8823:
883	movl	%ebx,%ecx
884	/*
885	 * End of duplicated code.
886	 */
887
888	cmpl	$1024,%ecx
889	jb	slow_copyout
890
891	pushl	%ecx
892	call	fastmove
893	addl	$4,%esp
894	jmp	done_copyout
895#endif /* I586_CPU && defined(DEV_NPX) */
896
897/*
898 * copyin(from_user, to_kernel, len) - MP SAFE
899 */
900ENTRY(copyin)
901	MEXITCOUNT
902	jmp	*copyin_vector
903
904ENTRY(generic_copyin)
905	movl	PCPU(CURPCB),%eax
906	movl	$copyin_fault,PCB_ONFAULT(%eax)
907	pushl	%esi
908	pushl	%edi
909	movl	12(%esp),%esi			/* caddr_t from */
910	movl	16(%esp),%edi			/* caddr_t to */
911	movl	20(%esp),%ecx			/* size_t  len */
912
913	/*
914	 * make sure address is valid
915	 */
916	movl	%esi,%edx
917	addl	%ecx,%edx
918	jc	copyin_fault
919	cmpl	$VM_MAXUSER_ADDRESS,%edx
920	ja	copyin_fault
921
922#if defined(I586_CPU) && defined(DEV_NPX)
923	ALIGN_TEXT
924slow_copyin:
925#endif
926	movb	%cl,%al
927	shrl	$2,%ecx				/* copy longword-wise */
928	cld
929	rep
930	movsl
931	movb	%al,%cl
932	andb	$3,%cl				/* copy remaining bytes */
933	rep
934	movsb
935
936#if defined(I586_CPU) && defined(DEV_NPX)
937	ALIGN_TEXT
938done_copyin:
939#endif
940	popl	%edi
941	popl	%esi
942	xorl	%eax,%eax
943	movl	PCPU(CURPCB),%edx
944	movl	%eax,PCB_ONFAULT(%edx)
945	ret
946
947	ALIGN_TEXT
948copyin_fault:
949	popl	%edi
950	popl	%esi
951	movl	PCPU(CURPCB),%edx
952	movl	$0,PCB_ONFAULT(%edx)
953	movl	$EFAULT,%eax
954	ret
955
956#if defined(I586_CPU) && defined(DEV_NPX)
957ENTRY(i586_copyin)
958	/*
959	 * Duplicated from generic_copyin.  Could be done a bit better.
960	 */
961	movl	PCPU(CURPCB),%eax
962	movl	$copyin_fault,PCB_ONFAULT(%eax)
963	pushl	%esi
964	pushl	%edi
965	movl	12(%esp),%esi			/* caddr_t from */
966	movl	16(%esp),%edi			/* caddr_t to */
967	movl	20(%esp),%ecx			/* size_t  len */
968
969	/*
970	 * make sure address is valid
971	 */
972	movl	%esi,%edx
973	addl	%ecx,%edx
974	jc	copyin_fault
975	cmpl	$VM_MAXUSER_ADDRESS,%edx
976	ja	copyin_fault
977	/*
978	 * End of duplicated code.
979	 */
980
981	cmpl	$1024,%ecx
982	jb	slow_copyin
983
984	pushl	%ebx			/* XXX prepare for fastmove_fault */
985	pushl	%ecx
986	call	fastmove
987	addl	$8,%esp
988	jmp	done_copyin
989#endif /* I586_CPU && defined(DEV_NPX) */
990
991#if defined(I586_CPU) && defined(DEV_NPX)
992/* fastmove(src, dst, len)
993	src in %esi
994	dst in %edi
995	len in %ecx		XXX changed to on stack for profiling
996	uses %eax and %edx for tmp. storage
997 */
998/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
999ENTRY(fastmove)
1000	pushl	%ebp
1001	movl	%esp,%ebp
1002	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
1003
1004	movl	8(%ebp),%ecx
1005	cmpl	$63,%ecx
1006	jbe	fastmove_tail
1007
1008	testl	$7,%esi	/* check if src addr is multiple of 8 */
1009	jnz	fastmove_tail
1010
1011	testl	$7,%edi	/* check if dst addr is multiple of 8 */
1012	jnz	fastmove_tail
1013
1014	/* XXX grab FPU context atomically. */
1015	cli
1016
1017/* if (fpcurthread != NULL) { */
1018	cmpl	$0,PCPU(FPCURTHREAD)
1019	je	6f
1020/*    fnsave(&curpcb->pcb_savefpu); */
1021	movl	PCPU(CURPCB),%eax
1022	fnsave	PCB_SAVEFPU(%eax)
1023/*   FPCURTHREAD = NULL; */
1024	movl	$0,PCPU(FPCURTHREAD)
1025/* } */
10266:
1027/* now we own the FPU. */
1028
1029/*
1030 * The process' FP state is saved in the pcb, but if we get
1031 * switched, the cpu_switch() will store our FP state in the
1032 * pcb.  It should be possible to avoid all the copying for
1033 * this, e.g., by setting a flag to tell cpu_switch() to
1034 * save the state somewhere else.
1035 */
1036/* tmp = curpcb->pcb_savefpu; */
1037	movl	%ecx,-12(%ebp)
1038	movl	%esi,-8(%ebp)
1039	movl	%edi,-4(%ebp)
1040	movl	%esp,%edi
1041	movl	PCPU(CURPCB),%esi
1042	addl	$PCB_SAVEFPU,%esi
1043	cld
1044	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1045	rep
1046	movsl
1047	movl	-12(%ebp),%ecx
1048	movl	-8(%ebp),%esi
1049	movl	-4(%ebp),%edi
1050/* stop_emulating(); */
1051	clts
1052/* fpcurthread = curthread; */
1053	movl	PCPU(CURTHREAD),%eax
1054	movl	%eax,PCPU(FPCURTHREAD)
1055	movl	PCPU(CURPCB),%eax
1056
1057	/* XXX end of atomic FPU context grab. */
1058	sti
1059
1060	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10614:
1062	movl	%ecx,-12(%ebp)
1063	cmpl	$1792,%ecx
1064	jbe	2f
1065	movl	$1792,%ecx
10662:
1067	subl	%ecx,-12(%ebp)
1068	cmpl	$256,%ecx
1069	jb	5f
1070	movl	%ecx,-8(%ebp)
1071	movl	%esi,-4(%ebp)
1072	ALIGN_TEXT
10733:
1074	movl	0(%esi),%eax
1075	movl	32(%esi),%eax
1076	movl	64(%esi),%eax
1077	movl	96(%esi),%eax
1078	movl	128(%esi),%eax
1079	movl	160(%esi),%eax
1080	movl	192(%esi),%eax
1081	movl	224(%esi),%eax
1082	addl	$256,%esi
1083	subl	$256,%ecx
1084	cmpl	$256,%ecx
1085	jae	3b
1086	movl	-8(%ebp),%ecx
1087	movl	-4(%ebp),%esi
10885:
1089	ALIGN_TEXT
1090fastmove_loop:
1091	fildq	0(%esi)
1092	fildq	8(%esi)
1093	fildq	16(%esi)
1094	fildq	24(%esi)
1095	fildq	32(%esi)
1096	fildq	40(%esi)
1097	fildq	48(%esi)
1098	fildq	56(%esi)
1099	fistpq	56(%edi)
1100	fistpq	48(%edi)
1101	fistpq	40(%edi)
1102	fistpq	32(%edi)
1103	fistpq	24(%edi)
1104	fistpq	16(%edi)
1105	fistpq	8(%edi)
1106	fistpq	0(%edi)
1107	addl	$-64,%ecx
1108	addl	$64,%esi
1109	addl	$64,%edi
1110	cmpl	$63,%ecx
1111	ja	fastmove_loop
1112	movl	-12(%ebp),%eax
1113	addl	%eax,%ecx
1114	cmpl	$64,%ecx
1115	jae	4b
1116
1117	/* XXX ungrab FPU context atomically. */
1118	cli
1119
1120/* curpcb->pcb_savefpu = tmp; */
1121	movl	%ecx,-12(%ebp)
1122	movl	%esi,-8(%ebp)
1123	movl	%edi,-4(%ebp)
1124	movl	PCPU(CURPCB),%edi
1125	addl	$PCB_SAVEFPU,%edi
1126	movl	%esp,%esi
1127	cld
1128	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1129	rep
1130	movsl
1131	movl	-12(%ebp),%ecx
1132	movl	-8(%ebp),%esi
1133	movl	-4(%ebp),%edi
1134
1135/* start_emulating(); */
1136	smsw	%ax
1137	orb	$CR0_TS,%al
1138	lmsw	%ax
1139/* fpcurthread = NULL; */
1140	movl	$0,PCPU(FPCURTHREAD)
1141
1142	/* XXX end of atomic FPU context ungrab. */
1143	sti
1144
1145	ALIGN_TEXT
1146fastmove_tail:
1147	movl	PCPU(CURPCB),%eax
1148	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1149
1150	movb	%cl,%al
1151	shrl	$2,%ecx				/* copy longword-wise */
1152	cld
1153	rep
1154	movsl
1155	movb	%al,%cl
1156	andb	$3,%cl				/* copy remaining bytes */
1157	rep
1158	movsb
1159
1160	movl	%ebp,%esp
1161	popl	%ebp
1162	ret
1163
1164	ALIGN_TEXT
1165fastmove_fault:
1166	/* XXX ungrab FPU context atomically. */
1167	cli
1168
1169	movl	PCPU(CURPCB),%edi
1170	addl	$PCB_SAVEFPU,%edi
1171	movl	%esp,%esi
1172	cld
1173	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1174	rep
1175	movsl
1176
1177	smsw	%ax
1178	orb	$CR0_TS,%al
1179	lmsw	%ax
1180	movl	$0,PCPU(FPCURTHREAD)
1181
1182	/* XXX end of atomic FPU context ungrab. */
1183	sti
1184
1185fastmove_tail_fault:
1186	movl	%ebp,%esp
1187	popl	%ebp
1188	addl	$8,%esp
1189	popl	%ebx
1190	popl	%edi
1191	popl	%esi
1192	movl	PCPU(CURPCB),%edx
1193	movl	$0,PCB_ONFAULT(%edx)
1194	movl	$EFAULT,%eax
1195	ret
1196#endif /* I586_CPU && defined(DEV_NPX) */
1197
1198/*
1199 * casuptr.  Compare and set user pointer.  Returns -1 or the current value.
1200 */
1201ENTRY(casuptr)
1202	movl	PCPU(CURPCB),%ecx
1203	movl	$fusufault,PCB_ONFAULT(%ecx)
1204	movl	4(%esp),%edx			/* dst */
1205	movl	8(%esp),%eax			/* old */
1206	movl	12(%esp),%ecx			/* new */
1207
1208	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1209	ja	fusufault
1210
1211#ifdef SMP
1212	lock
1213#endif
1214	cmpxchgl %ecx, (%edx)			/* Compare and set. */
1215
1216	/*
1217	 * The old value is in %eax.  If the store succeeded it will be the
1218	 * value we expected (old) from before the store, otherwise it will
1219	 * be the current value.
1220	 */
1221
1222	movl	PCPU(CURPCB),%ecx
1223	movl	$fusufault,PCB_ONFAULT(%ecx)
1224	movl	$0,PCB_ONFAULT(%ecx)
1225	ret
1226
1227/*
1228 * fu{byte,sword,word} - MP SAFE
1229 *
1230 *	Fetch a byte (sword, word) from user memory
1231 */
1232ENTRY(fuword)
1233	movl	PCPU(CURPCB),%ecx
1234	movl	$fusufault,PCB_ONFAULT(%ecx)
1235	movl	4(%esp),%edx			/* from */
1236
1237	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1238	ja	fusufault
1239
1240	movl	(%edx),%eax
1241	movl	$0,PCB_ONFAULT(%ecx)
1242	ret
1243
1244ENTRY(fuword32)
1245	jmp	fuword
1246
1247/*
1248 * These two routines are called from the profiling code, potentially
1249 * at interrupt time. If they fail, that's okay, good things will
1250 * happen later. Fail all the time for now - until the trap code is
1251 * able to deal with this.
1252 */
1253ALTENTRY(suswintr)
1254ENTRY(fuswintr)
1255	movl	$-1,%eax
1256	ret
1257
1258/*
1259 * fuword16 - MP SAFE
1260 */
1261ENTRY(fuword16)
1262	movl	PCPU(CURPCB),%ecx
1263	movl	$fusufault,PCB_ONFAULT(%ecx)
1264	movl	4(%esp),%edx
1265
1266	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1267	ja	fusufault
1268
1269	movzwl	(%edx),%eax
1270	movl	$0,PCB_ONFAULT(%ecx)
1271	ret
1272
1273/*
1274 * fubyte - MP SAFE
1275 */
1276ENTRY(fubyte)
1277	movl	PCPU(CURPCB),%ecx
1278	movl	$fusufault,PCB_ONFAULT(%ecx)
1279	movl	4(%esp),%edx
1280
1281	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1282	ja	fusufault
1283
1284	movzbl	(%edx),%eax
1285	movl	$0,PCB_ONFAULT(%ecx)
1286	ret
1287
1288	ALIGN_TEXT
1289fusufault:
1290	movl	PCPU(CURPCB),%ecx
1291	xorl	%eax,%eax
1292	movl	%eax,PCB_ONFAULT(%ecx)
1293	decl	%eax
1294	ret
1295
1296/*
1297 * su{byte,sword,word} - MP SAFE (if not I386_CPU)
1298 *
1299 *	Write a byte (word, longword) to user memory
1300 */
1301ENTRY(suword)
1302	movl	PCPU(CURPCB),%ecx
1303	movl	$fusufault,PCB_ONFAULT(%ecx)
1304	movl	4(%esp),%edx
1305
1306#ifdef I386_CPU
1307
1308	/* XXX - page boundary crossing is still not handled */
1309	movl	%edx,%eax
1310	shrl	$IDXSHIFT,%edx
1311	andb	$0xfc,%dl
1312
1313	leal	PTmap(%edx),%ecx
1314	shrl	$IDXSHIFT,%ecx
1315	andb	$0xfc,%cl
1316	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1317	je	4f
1318	movb	PTmap(%edx),%dl
1319	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1320	cmpb	$PG_V|PG_RW|PG_U,%dl
1321	je	1f
1322
13234:
1324	/* simulate a trap */
1325	pushl	%eax
1326	call	trapwrite
1327	popl	%edx				/* remove junk parameter from stack */
1328	testl	%eax,%eax
1329	jnz	fusufault
13301:
1331	movl	4(%esp),%edx
1332#endif
1333
1334	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1335	ja	fusufault
1336
1337	movl	8(%esp),%eax
1338	movl	%eax,(%edx)
1339	xorl	%eax,%eax
1340	movl	PCPU(CURPCB),%ecx
1341	movl	%eax,PCB_ONFAULT(%ecx)
1342	ret
1343
1344ENTRY(suword32)
1345	jmp	suword
1346
1347/*
1348 * suword16 - MP SAFE (if not I386_CPU)
1349 */
1350ENTRY(suword16)
1351	movl	PCPU(CURPCB),%ecx
1352	movl	$fusufault,PCB_ONFAULT(%ecx)
1353	movl	4(%esp),%edx
1354
1355#ifdef I386_CPU
1356
1357	/* XXX - page boundary crossing is still not handled */
1358	movl	%edx,%eax
1359	shrl	$IDXSHIFT,%edx
1360	andb	$0xfc,%dl
1361
1362	leal	PTmap(%edx),%ecx
1363	shrl	$IDXSHIFT,%ecx
1364	andb	$0xfc,%cl
1365	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1366	je	4f
1367	movb	PTmap(%edx),%dl
1368	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1369	cmpb	$PG_V|PG_RW|PG_U,%dl
1370	je	1f
1371
13724:
1373	/* simulate a trap */
1374	pushl	%eax
1375	call	trapwrite
1376	popl	%edx				/* remove junk parameter from stack */
1377	testl	%eax,%eax
1378	jnz	fusufault
13791:
1380	movl	4(%esp),%edx
1381#endif
1382
1383	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1384	ja	fusufault
1385
1386	movw	8(%esp),%ax
1387	movw	%ax,(%edx)
1388	xorl	%eax,%eax
1389	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1390	movl	%eax,PCB_ONFAULT(%ecx)
1391	ret
1392
1393/*
1394 * subyte - MP SAFE (if not I386_CPU)
1395 */
1396ENTRY(subyte)
1397	movl	PCPU(CURPCB),%ecx
1398	movl	$fusufault,PCB_ONFAULT(%ecx)
1399	movl	4(%esp),%edx
1400
1401#ifdef I386_CPU
1402
1403	movl	%edx,%eax
1404	shrl	$IDXSHIFT,%edx
1405	andb	$0xfc,%dl
1406
1407	leal	PTmap(%edx),%ecx
1408	shrl	$IDXSHIFT,%ecx
1409	andb	$0xfc,%cl
1410	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1411	je	4f
1412	movb	PTmap(%edx),%dl
1413	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1414	cmpb	$PG_V|PG_RW|PG_U,%dl
1415	je	1f
1416
14174:
1418	/* simulate a trap */
1419	pushl	%eax
1420	call	trapwrite
1421	popl	%edx				/* remove junk parameter from stack */
1422	testl	%eax,%eax
1423	jnz	fusufault
14241:
1425	movl	4(%esp),%edx
1426#endif
1427
1428	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1429	ja	fusufault
1430
1431	movb	8(%esp),%al
1432	movb	%al,(%edx)
1433	xorl	%eax,%eax
1434	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1435	movl	%eax,PCB_ONFAULT(%ecx)
1436	ret
1437
1438/*
1439 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
1440 *
1441 *	copy a string from from to to, stop when a 0 character is reached.
1442 *	return ENAMETOOLONG if string is longer than maxlen, and
1443 *	EFAULT on protection violations. If lencopied is non-zero,
1444 *	return the actual length in *lencopied.
1445 */
1446ENTRY(copyinstr)
1447	pushl	%esi
1448	pushl	%edi
1449	movl	PCPU(CURPCB),%ecx
1450	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1451
1452	movl	12(%esp),%esi			/* %esi = from */
1453	movl	16(%esp),%edi			/* %edi = to */
1454	movl	20(%esp),%edx			/* %edx = maxlen */
1455
1456	movl	$VM_MAXUSER_ADDRESS,%eax
1457
1458	/* make sure 'from' is within bounds */
1459	subl	%esi,%eax
1460	jbe	cpystrflt
1461
1462	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1463	cmpl	%edx,%eax
1464	jae	1f
1465	movl	%eax,%edx
1466	movl	%eax,20(%esp)
14671:
1468	incl	%edx
1469	cld
1470
14712:
1472	decl	%edx
1473	jz	3f
1474
1475	lodsb
1476	stosb
1477	orb	%al,%al
1478	jnz	2b
1479
1480	/* Success -- 0 byte reached */
1481	decl	%edx
1482	xorl	%eax,%eax
1483	jmp	cpystrflt_x
14843:
1485	/* edx is zero - return ENAMETOOLONG or EFAULT */
1486	cmpl	$VM_MAXUSER_ADDRESS,%esi
1487	jae	cpystrflt
14884:
1489	movl	$ENAMETOOLONG,%eax
1490	jmp	cpystrflt_x
1491
1492cpystrflt:
1493	movl	$EFAULT,%eax
1494
1495cpystrflt_x:
1496	/* set *lencopied and return %eax */
1497	movl	PCPU(CURPCB),%ecx
1498	movl	$0,PCB_ONFAULT(%ecx)
1499	movl	20(%esp),%ecx
1500	subl	%edx,%ecx
1501	movl	24(%esp),%edx
1502	testl	%edx,%edx
1503	jz	1f
1504	movl	%ecx,(%edx)
15051:
1506	popl	%edi
1507	popl	%esi
1508	ret
1509
1510
1511/*
1512 * copystr(from, to, maxlen, int *lencopied) - MP SAFE
1513 */
1514ENTRY(copystr)
1515	pushl	%esi
1516	pushl	%edi
1517
1518	movl	12(%esp),%esi			/* %esi = from */
1519	movl	16(%esp),%edi			/* %edi = to */
1520	movl	20(%esp),%edx			/* %edx = maxlen */
1521	incl	%edx
1522	cld
15231:
1524	decl	%edx
1525	jz	4f
1526	lodsb
1527	stosb
1528	orb	%al,%al
1529	jnz	1b
1530
1531	/* Success -- 0 byte reached */
1532	decl	%edx
1533	xorl	%eax,%eax
1534	jmp	6f
15354:
1536	/* edx is zero -- return ENAMETOOLONG */
1537	movl	$ENAMETOOLONG,%eax
1538
15396:
1540	/* set *lencopied and return %eax */
1541	movl	20(%esp),%ecx
1542	subl	%edx,%ecx
1543	movl	24(%esp),%edx
1544	testl	%edx,%edx
1545	jz	7f
1546	movl	%ecx,(%edx)
15477:
1548	popl	%edi
1549	popl	%esi
1550	ret
1551
1552ENTRY(bcmp)
1553	pushl	%edi
1554	pushl	%esi
1555	movl	12(%esp),%edi
1556	movl	16(%esp),%esi
1557	movl	20(%esp),%edx
1558	xorl	%eax,%eax
1559
1560	movl	%edx,%ecx
1561	shrl	$2,%ecx
1562	cld					/* compare forwards */
1563	repe
1564	cmpsl
1565	jne	1f
1566
1567	movl	%edx,%ecx
1568	andl	$3,%ecx
1569	repe
1570	cmpsb
1571	je	2f
15721:
1573	incl	%eax
15742:
1575	popl	%esi
1576	popl	%edi
1577	ret
1578
1579
1580/*
1581 * Handling of special 386 registers and descriptor tables etc
1582 */
1583/* void lgdt(struct region_descriptor *rdp); */
1584ENTRY(lgdt)
1585	/* reload the descriptor table */
1586	movl	4(%esp),%eax
1587	lgdt	(%eax)
1588
1589	/* flush the prefetch q */
1590	jmp	1f
1591	nop
15921:
1593	/* reload "stale" selectors */
1594	movl	$KDSEL,%eax
1595	mov	%ax,%ds
1596	mov	%ax,%es
1597	mov	%ax,%gs
1598	mov	%ax,%ss
1599	movl	$KPSEL,%eax
1600	mov	%ax,%fs
1601
1602	/* reload code selector by turning return into intersegmental return */
1603	movl	(%esp),%eax
1604	pushl	%eax
1605	movl	$KCSEL,4(%esp)
1606	lret
1607
1608/* ssdtosd(*ssdp,*sdp) */
1609ENTRY(ssdtosd)
1610	pushl	%ebx
1611	movl	8(%esp),%ecx
1612	movl	8(%ecx),%ebx
1613	shll	$16,%ebx
1614	movl	(%ecx),%edx
1615	roll	$16,%edx
1616	movb	%dh,%bl
1617	movb	%dl,%bh
1618	rorl	$8,%ebx
1619	movl	4(%ecx),%eax
1620	movw	%ax,%dx
1621	andl	$0xf0000,%eax
1622	orl	%eax,%ebx
1623	movl	12(%esp),%ecx
1624	movl	%edx,(%ecx)
1625	movl	%ebx,4(%ecx)
1626	popl	%ebx
1627	ret
1628
1629/* void reset_dbregs() */
1630ENTRY(reset_dbregs)
1631	movl    $0,%eax
1632	movl    %eax,%dr7     /* disable all breapoints first */
1633	movl    %eax,%dr0
1634	movl    %eax,%dr1
1635	movl    %eax,%dr2
1636	movl    %eax,%dr3
1637	movl    %eax,%dr6
1638	ret
1639
1640/*****************************************************************************/
1641/* setjump, longjump                                                         */
1642/*****************************************************************************/
1643
1644ENTRY(setjmp)
1645	movl	4(%esp),%eax
1646	movl	%ebx,(%eax)			/* save ebx */
1647	movl	%esp,4(%eax)			/* save esp */
1648	movl	%ebp,8(%eax)			/* save ebp */
1649	movl	%esi,12(%eax)			/* save esi */
1650	movl	%edi,16(%eax)			/* save edi */
1651	movl	(%esp),%edx			/* get rta */
1652	movl	%edx,20(%eax)			/* save eip */
1653	xorl	%eax,%eax			/* return(0); */
1654	ret
1655
1656ENTRY(longjmp)
1657	movl	4(%esp),%eax
1658	movl	(%eax),%ebx			/* restore ebx */
1659	movl	4(%eax),%esp			/* restore esp */
1660	movl	8(%eax),%ebp			/* restore ebp */
1661	movl	12(%eax),%esi			/* restore esi */
1662	movl	16(%eax),%edi			/* restore edi */
1663	movl	20(%eax),%edx			/* get rta */
1664	movl	%edx,(%esp)			/* put in return frame */
1665	xorl	%eax,%eax			/* return(1); */
1666	incl	%eax
1667	ret
1668
1669/*
1670 * Support for BB-profiling (gcc -a).  The kernbb program will extract
1671 * the data from the kernel.
1672 */
1673
1674	.data
1675	ALIGN_DATA
1676	.globl bbhead
1677bbhead:
1678	.long 0
1679
1680	.text
1681NON_GPROF_ENTRY(__bb_init_func)
1682	movl	4(%esp),%eax
1683	movl	$1,(%eax)
1684	movl	bbhead,%edx
1685	movl	%edx,16(%eax)
1686	movl	%eax,bbhead
1687	NON_GPROF_RET
1688