support.s revision 129550
1218799Snwhitehorn/*-
2218799Snwhitehorn * Copyright (c) 1993 The Regents of the University of California.
3218799Snwhitehorn * All rights reserved.
4218799Snwhitehorn *
5218799Snwhitehorn * Redistribution and use in source and binary forms, with or without
6218799Snwhitehorn * modification, are permitted provided that the following conditions
7218799Snwhitehorn * are met:
8218799Snwhitehorn * 1. Redistributions of source code must retain the above copyright
9218799Snwhitehorn *    notice, this list of conditions and the following disclaimer.
10218799Snwhitehorn * 2. Redistributions in binary form must reproduce the above copyright
11218799Snwhitehorn *    notice, this list of conditions and the following disclaimer in the
12218799Snwhitehorn *    documentation and/or other materials provided with the distribution.
13218799Snwhitehorn * 4. Neither the name of the University nor the names of its contributors
14218799Snwhitehorn *    may be used to endorse or promote products derived from this software
15218799Snwhitehorn *    without specific prior written permission.
16218799Snwhitehorn *
17218799Snwhitehorn * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18218799Snwhitehorn * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19218799Snwhitehorn * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20218799Snwhitehorn * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21218799Snwhitehorn * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22218799Snwhitehorn * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23218799Snwhitehorn * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24218799Snwhitehorn * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25218799Snwhitehorn * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26218799Snwhitehorn * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27218799Snwhitehorn * SUCH DAMAGE.
28218799Snwhitehorn *
29228192Skensmith * $FreeBSD: head/sys/i386/i386/support.s 129550 2004-05-21 16:08:26Z bde $
30228192Skensmith */
31218799Snwhitehorn
32259143Seadler#include "opt_npx.h"
33218799Snwhitehorn
34218799Snwhitehorn#include <machine/asmacros.h>
35218799Snwhitehorn#include <machine/cputypes.h>
36218799Snwhitehorn#include <machine/intr_machdep.h>
37218799Snwhitehorn#include <machine/pmap.h>
38218799Snwhitehorn#include <machine/specialreg.h>
39218799Snwhitehorn
40218799Snwhitehorn#include "assym.s"
41218799Snwhitehorn
42218799Snwhitehorn#define IDXSHIFT	10
43218799Snwhitehorn
44218799Snwhitehorn	.data
45218799Snwhitehorn	.globl	bcopy_vector
46218799Snwhitehornbcopy_vector:
47218799Snwhitehorn	.long	generic_bcopy
48245333Snwhitehorn	.globl	bzero_vector
49256348Sdteskebzero_vector:
50218799Snwhitehorn	.long	generic_bzero
51218799Snwhitehorn	.globl	copyin_vector
52218799Snwhitehorncopyin_vector:
53259276Sdteske	.long	generic_copyin
54218799Snwhitehorn	.globl	copyout_vector
55256348Sdteskecopyout_vector:
56256348Sdteske	.long	generic_copyout
57259276Sdteske#if defined(I586_CPU) && defined(DEV_NPX)
58259276Sdteskekernel_fpu_lock:
59256348Sdteske	.byte	0xfe
60256348Sdteske	.space	3
61256348Sdteske#endif
62218799Snwhitehorn	ALIGN_DATA
63218799Snwhitehorn	.globl	intrcnt, eintrcnt
64218799Snwhitehornintrcnt:
65256348Sdteske	.space	INTRCNT_COUNT * 4
66228192Skensmitheintrcnt:
67228192Skensmith
68	.globl	intrnames, eintrnames
69intrnames:
70	.space	INTRCNT_COUNT * (MAXCOMLEN + 1)
71eintrnames:
72
73	.text
74
75/*
76 * bcopy family
77 * void bzero(void *buf, u_int len)
78 */
79
80ENTRY(bzero)
81	MEXITCOUNT
82	jmp	*bzero_vector
83
84ENTRY(generic_bzero)
85	pushl	%edi
86	movl	8(%esp),%edi
87	movl	12(%esp),%ecx
88	xorl	%eax,%eax
89	shrl	$2,%ecx
90	cld
91	rep
92	stosl
93	movl	12(%esp),%ecx
94	andl	$3,%ecx
95	rep
96	stosb
97	popl	%edi
98	ret
99
100#ifdef I486_CPU
101ENTRY(i486_bzero)
102	movl	4(%esp),%edx
103	movl	8(%esp),%ecx
104	xorl	%eax,%eax
105/*
106 * do 64 byte chunks first
107 *
108 * XXX this is probably over-unrolled at least for DX2's
109 */
1102:
111	cmpl	$64,%ecx
112	jb	3f
113	movl	%eax,(%edx)
114	movl	%eax,4(%edx)
115	movl	%eax,8(%edx)
116	movl	%eax,12(%edx)
117	movl	%eax,16(%edx)
118	movl	%eax,20(%edx)
119	movl	%eax,24(%edx)
120	movl	%eax,28(%edx)
121	movl	%eax,32(%edx)
122	movl	%eax,36(%edx)
123	movl	%eax,40(%edx)
124	movl	%eax,44(%edx)
125	movl	%eax,48(%edx)
126	movl	%eax,52(%edx)
127	movl	%eax,56(%edx)
128	movl	%eax,60(%edx)
129	addl	$64,%edx
130	subl	$64,%ecx
131	jnz	2b
132	ret
133
134/*
135 * do 16 byte chunks
136 */
137	SUPERALIGN_TEXT
1383:
139	cmpl	$16,%ecx
140	jb	4f
141	movl	%eax,(%edx)
142	movl	%eax,4(%edx)
143	movl	%eax,8(%edx)
144	movl	%eax,12(%edx)
145	addl	$16,%edx
146	subl	$16,%ecx
147	jnz	3b
148	ret
149
150/*
151 * do 4 byte chunks
152 */
153	SUPERALIGN_TEXT
1544:
155	cmpl	$4,%ecx
156	jb	5f
157	movl	%eax,(%edx)
158	addl	$4,%edx
159	subl	$4,%ecx
160	jnz	4b
161	ret
162
163/*
164 * do 1 byte chunks
165 * a jump table seems to be faster than a loop or more range reductions
166 *
167 * XXX need a const section for non-text
168 */
169	.data
170jtab:
171	.long	do0
172	.long	do1
173	.long	do2
174	.long	do3
175
176	.text
177	SUPERALIGN_TEXT
1785:
179	jmp	*jtab(,%ecx,4)
180
181	SUPERALIGN_TEXT
182do3:
183	movw	%ax,(%edx)
184	movb	%al,2(%edx)
185	ret
186
187	SUPERALIGN_TEXT
188do2:
189	movw	%ax,(%edx)
190	ret
191
192	SUPERALIGN_TEXT
193do1:
194	movb	%al,(%edx)
195	ret
196
197	SUPERALIGN_TEXT
198do0:
199	ret
200#endif
201
202#if defined(I586_CPU) && defined(DEV_NPX)
203ENTRY(i586_bzero)
204	movl	4(%esp),%edx
205	movl	8(%esp),%ecx
206
207	/*
208	 * The FPU register method is twice as fast as the integer register
209	 * method unless the target is in the L1 cache and we pre-allocate a
210	 * cache line for it (then the integer register method is 4-5 times
211	 * faster).  However, we never pre-allocate cache lines, since that
212	 * would make the integer method 25% or more slower for the common
213	 * case when the target isn't in either the L1 cache or the L2 cache.
214	 * Thus we normally use the FPU register method unless the overhead
215	 * would be too large.
216	 */
217	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
218	jb	intreg_i586_bzero
219
220	/*
221	 * The FPU registers may belong to an application or to fastmove()
222	 * or to another invocation of bcopy() or ourself in a higher level
223	 * interrupt or trap handler.  Preserving the registers is
224	 * complicated since we avoid it if possible at all levels.  We
225	 * want to localize the complications even when that increases them.
226	 * Here the extra work involves preserving CR0_TS in TS.
227	 * `fpcurthread != NULL' is supposed to be the condition that all the
228	 * FPU resources belong to an application, but fpcurthread and CR0_TS
229	 * aren't set atomically enough for this condition to work in
230	 * interrupt handlers.
231	 *
232	 * Case 1: FPU registers belong to the application: we must preserve
233	 * the registers if we use them, so we only use the FPU register
234	 * method if the target size is large enough to amortize the extra
235	 * overhead for preserving them.  CR0_TS must be preserved although
236	 * it is very likely to end up as set.
237	 *
238	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
239	 * makes the registers look like they belong to an application so
240	 * that cpu_switch() and savectx() don't have to know about it, so
241	 * this case reduces to case 1.
242	 *
243	 * Case 3: FPU registers belong to the kernel: don't use the FPU
244	 * register method.  This case is unlikely, and supporting it would
245	 * be more complicated and might take too much stack.
246	 *
247	 * Case 4: FPU registers don't belong to anyone: the FPU registers
248	 * don't need to be preserved, so we always use the FPU register
249	 * method.  CR0_TS must be preserved although it is very likely to
250	 * always end up as clear.
251	 */
252	cmpl	$0,PCPU(FPCURTHREAD)
253	je	i586_bz1
254
255	/*
256	 * XXX don't use the FPU for cases 1 and 2, since preemptive
257	 * scheduling of ithreads broke these cases.  Note that we can
258	 * no longer get here from an interrupt handler, since the
259	 * context sitch to the interrupt handler will have saved the
260	 * FPU state.
261	 */
262	jmp	intreg_i586_bzero
263
264	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
265	jb	intreg_i586_bzero
266	sarb	$1,kernel_fpu_lock
267	jc	intreg_i586_bzero
268	smsw	%ax
269	clts
270	subl	$108,%esp
271	fnsave	0(%esp)
272	jmp	i586_bz2
273
274i586_bz1:
275	sarb	$1,kernel_fpu_lock
276	jc	intreg_i586_bzero
277	smsw	%ax
278	clts
279	fninit				/* XXX should avoid needing this */
280i586_bz2:
281	fldz
282
283	/*
284	 * Align to an 8 byte boundary (misalignment in the main loop would
285	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
286	 * already aligned) by always zeroing 8 bytes and using the part up
287	 * to the _next_ alignment position.
288	 */
289	fstl	0(%edx)
290	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
291	addl	$8,%edx
292	andl	$~7,%edx
293	subl	%edx,%ecx
294
295	/*
296	 * Similarly align `len' to a multiple of 8.
297	 */
298	fstl	-8(%edx,%ecx)
299	decl	%ecx
300	andl	$~7,%ecx
301
302	/*
303	 * This wouldn't be any faster if it were unrolled, since the loop
304	 * control instructions are much faster than the fstl and/or done
305	 * in parallel with it so their overhead is insignificant.
306	 */
307fpureg_i586_bzero_loop:
308	fstl	0(%edx)
309	addl	$8,%edx
310	subl	$8,%ecx
311	cmpl	$8,%ecx
312	jae	fpureg_i586_bzero_loop
313
314	cmpl	$0,PCPU(FPCURTHREAD)
315	je	i586_bz3
316
317	/* XXX check that the condition for cases 1-2 stayed false. */
318i586_bzero_oops:
319	int	$3
320	jmp	i586_bzero_oops
321
322	frstor	0(%esp)
323	addl	$108,%esp
324	lmsw	%ax
325	movb	$0xfe,kernel_fpu_lock
326	ret
327
328i586_bz3:
329	fstp	%st(0)
330	lmsw	%ax
331	movb	$0xfe,kernel_fpu_lock
332	ret
333
334intreg_i586_bzero:
335	/*
336	 * `rep stos' seems to be the best method in practice for small
337	 * counts.  Fancy methods usually take too long to start up due
338	 * to cache and BTB misses.
339	 */
340	pushl	%edi
341	movl	%edx,%edi
342	xorl	%eax,%eax
343	shrl	$2,%ecx
344	cld
345	rep
346	stosl
347	movl	12(%esp),%ecx
348	andl	$3,%ecx
349	jne	1f
350	popl	%edi
351	ret
352
3531:
354	rep
355	stosb
356	popl	%edi
357	ret
358#endif /* I586_CPU && defined(DEV_NPX) */
359
360ENTRY(sse2_pagezero)
361	pushl	%ebx
362	movl	8(%esp),%ecx
363	movl	%ecx,%eax
364	addl	$4096,%eax
365	xor	%ebx,%ebx
3661:
367	movnti	%ebx,(%ecx)
368	addl	$4,%ecx
369	cmpl	%ecx,%eax
370	jne	1b
371	sfence
372	popl	%ebx
373	ret
374
375ENTRY(i686_pagezero)
376	pushl	%edi
377	pushl	%ebx
378
379	movl	12(%esp), %edi
380	movl	$1024, %ecx
381	cld
382
383	ALIGN_TEXT
3841:
385	xorl	%eax, %eax
386	repe
387	scasl
388	jnz	2f
389
390	popl	%ebx
391	popl	%edi
392	ret
393
394	ALIGN_TEXT
395
3962:
397	incl	%ecx
398	subl	$4, %edi
399
400	movl	%ecx, %edx
401	cmpl	$16, %ecx
402
403	jge	3f
404
405	movl	%edi, %ebx
406	andl	$0x3f, %ebx
407	shrl	%ebx
408	shrl	%ebx
409	movl	$16, %ecx
410	subl	%ebx, %ecx
411
4123:
413	subl	%ecx, %edx
414	rep
415	stosl
416
417	movl	%edx, %ecx
418	testl	%edx, %edx
419	jnz	1b
420
421	popl	%ebx
422	popl	%edi
423	ret
424
425/* fillw(pat, base, cnt) */
426ENTRY(fillw)
427	pushl	%edi
428	movl	8(%esp),%eax
429	movl	12(%esp),%edi
430	movl	16(%esp),%ecx
431	cld
432	rep
433	stosw
434	popl	%edi
435	ret
436
437ENTRY(bcopyb)
438	pushl	%esi
439	pushl	%edi
440	movl	12(%esp),%esi
441	movl	16(%esp),%edi
442	movl	20(%esp),%ecx
443	movl	%edi,%eax
444	subl	%esi,%eax
445	cmpl	%ecx,%eax			/* overlapping && src < dst? */
446	jb	1f
447	cld					/* nope, copy forwards */
448	rep
449	movsb
450	popl	%edi
451	popl	%esi
452	ret
453
454	ALIGN_TEXT
4551:
456	addl	%ecx,%edi			/* copy backwards. */
457	addl	%ecx,%esi
458	decl	%edi
459	decl	%esi
460	std
461	rep
462	movsb
463	popl	%edi
464	popl	%esi
465	cld
466	ret
467
468ENTRY(bcopy)
469	MEXITCOUNT
470	jmp	*bcopy_vector
471
472/*
473 * generic_bcopy(src, dst, cnt)
474 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
475 */
476ENTRY(generic_bcopy)
477	pushl	%esi
478	pushl	%edi
479	movl	12(%esp),%esi
480	movl	16(%esp),%edi
481	movl	20(%esp),%ecx
482
483	movl	%edi,%eax
484	subl	%esi,%eax
485	cmpl	%ecx,%eax			/* overlapping && src < dst? */
486	jb	1f
487
488	shrl	$2,%ecx				/* copy by 32-bit words */
489	cld					/* nope, copy forwards */
490	rep
491	movsl
492	movl	20(%esp),%ecx
493	andl	$3,%ecx				/* any bytes left? */
494	rep
495	movsb
496	popl	%edi
497	popl	%esi
498	ret
499
500	ALIGN_TEXT
5011:
502	addl	%ecx,%edi			/* copy backwards */
503	addl	%ecx,%esi
504	decl	%edi
505	decl	%esi
506	andl	$3,%ecx				/* any fractional bytes? */
507	std
508	rep
509	movsb
510	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
511	shrl	$2,%ecx
512	subl	$3,%esi
513	subl	$3,%edi
514	rep
515	movsl
516	popl	%edi
517	popl	%esi
518	cld
519	ret
520
521#if defined(I586_CPU) && defined(DEV_NPX)
522ENTRY(i586_bcopy)
523	pushl	%esi
524	pushl	%edi
525	movl	12(%esp),%esi
526	movl	16(%esp),%edi
527	movl	20(%esp),%ecx
528
529	movl	%edi,%eax
530	subl	%esi,%eax
531	cmpl	%ecx,%eax			/* overlapping && src < dst? */
532	jb	1f
533
534	cmpl	$1024,%ecx
535	jb	small_i586_bcopy
536
537	sarb	$1,kernel_fpu_lock
538	jc	small_i586_bcopy
539	cmpl	$0,PCPU(FPCURTHREAD)
540	je	i586_bc1
541
542	/* XXX turn off handling of cases 1-2, as above. */
543	movb	$0xfe,kernel_fpu_lock
544	jmp	small_i586_bcopy
545
546	smsw	%dx
547	clts
548	subl	$108,%esp
549	fnsave	0(%esp)
550	jmp	4f
551
552i586_bc1:
553	smsw	%dx
554	clts
555	fninit				/* XXX should avoid needing this */
556
557	ALIGN_TEXT
5584:
559	pushl	%ecx
560#define	DCACHE_SIZE	8192
561	cmpl	$(DCACHE_SIZE-512)/2,%ecx
562	jbe	2f
563	movl	$(DCACHE_SIZE-512)/2,%ecx
5642:
565	subl	%ecx,0(%esp)
566	cmpl	$256,%ecx
567	jb	5f			/* XXX should prefetch if %ecx >= 32 */
568	pushl	%esi
569	pushl	%ecx
570	ALIGN_TEXT
5713:
572	movl	0(%esi),%eax
573	movl	32(%esi),%eax
574	movl	64(%esi),%eax
575	movl	96(%esi),%eax
576	movl	128(%esi),%eax
577	movl	160(%esi),%eax
578	movl	192(%esi),%eax
579	movl	224(%esi),%eax
580	addl	$256,%esi
581	subl	$256,%ecx
582	cmpl	$256,%ecx
583	jae	3b
584	popl	%ecx
585	popl	%esi
5865:
587	ALIGN_TEXT
588large_i586_bcopy_loop:
589	fildq	0(%esi)
590	fildq	8(%esi)
591	fildq	16(%esi)
592	fildq	24(%esi)
593	fildq	32(%esi)
594	fildq	40(%esi)
595	fildq	48(%esi)
596	fildq	56(%esi)
597	fistpq	56(%edi)
598	fistpq	48(%edi)
599	fistpq	40(%edi)
600	fistpq	32(%edi)
601	fistpq	24(%edi)
602	fistpq	16(%edi)
603	fistpq	8(%edi)
604	fistpq	0(%edi)
605	addl	$64,%esi
606	addl	$64,%edi
607	subl	$64,%ecx
608	cmpl	$64,%ecx
609	jae	large_i586_bcopy_loop
610	popl	%eax
611	addl	%eax,%ecx
612	cmpl	$64,%ecx
613	jae	4b
614
615	cmpl	$0,PCPU(FPCURTHREAD)
616	je	i586_bc2
617
618	/* XXX check that the condition for cases 1-2 stayed false. */
619i586_bcopy_oops:
620	int	$3
621	jmp	i586_bcopy_oops
622
623	frstor	0(%esp)
624	addl	$108,%esp
625i586_bc2:
626	lmsw	%dx
627	movb	$0xfe,kernel_fpu_lock
628
629/*
630 * This is a duplicate of the main part of generic_bcopy.  See the comments
631 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
632 * would mess up high resolution profiling.
633 */
634	ALIGN_TEXT
635small_i586_bcopy:
636	shrl	$2,%ecx
637	cld
638	rep
639	movsl
640	movl	20(%esp),%ecx
641	andl	$3,%ecx
642	rep
643	movsb
644	popl	%edi
645	popl	%esi
646	ret
647
648	ALIGN_TEXT
6491:
650	addl	%ecx,%edi
651	addl	%ecx,%esi
652	decl	%edi
653	decl	%esi
654	andl	$3,%ecx
655	std
656	rep
657	movsb
658	movl	20(%esp),%ecx
659	shrl	$2,%ecx
660	subl	$3,%esi
661	subl	$3,%edi
662	rep
663	movsl
664	popl	%edi
665	popl	%esi
666	cld
667	ret
668#endif /* I586_CPU && defined(DEV_NPX) */
669
670/*
671 * Note: memcpy does not support overlapping copies
672 */
673ENTRY(memcpy)
674	pushl	%edi
675	pushl	%esi
676	movl	12(%esp),%edi
677	movl	16(%esp),%esi
678	movl	20(%esp),%ecx
679	movl	%edi,%eax
680	shrl	$2,%ecx				/* copy by 32-bit words */
681	cld					/* nope, copy forwards */
682	rep
683	movsl
684	movl	20(%esp),%ecx
685	andl	$3,%ecx				/* any bytes left? */
686	rep
687	movsb
688	popl	%esi
689	popl	%edi
690	ret
691
692
693/*****************************************************************************/
694/* copyout and fubyte family                                                 */
695/*****************************************************************************/
696/*
697 * Access user memory from inside the kernel. These routines and possibly
698 * the math- and DOS emulators should be the only places that do this.
699 *
700 * We have to access the memory with user's permissions, so use a segment
701 * selector with RPL 3. For writes to user space we have to additionally
702 * check the PTE for write permission, because the 386 does not check
703 * write permissions when we are executing with EPL 0. The 486 does check
704 * this if the WP bit is set in CR0, so we can use a simpler version here.
705 *
706 * These routines set curpcb->onfault for the time they execute. When a
707 * protection violation occurs inside the functions, the trap handler
708 * returns to *curpcb->onfault instead of the function.
709 */
710
711/*
712 * copyout(from_kernel, to_user, len)  - MP SAFE (if not I386_CPU)
713 */
714ENTRY(copyout)
715	MEXITCOUNT
716	jmp	*copyout_vector
717
718ENTRY(generic_copyout)
719	movl	PCPU(CURPCB),%eax
720	movl	$copyout_fault,PCB_ONFAULT(%eax)
721	pushl	%esi
722	pushl	%edi
723	pushl	%ebx
724	movl	16(%esp),%esi
725	movl	20(%esp),%edi
726	movl	24(%esp),%ebx
727	testl	%ebx,%ebx			/* anything to do? */
728	jz	done_copyout
729
730	/*
731	 * Check explicitly for non-user addresses.  If 486 write protection
732	 * is being used, this check is essential because we are in kernel
733	 * mode so the h/w does not provide any protection against writing
734	 * kernel addresses.
735	 */
736
737	/*
738	 * First, prevent address wrapping.
739	 */
740	movl	%edi,%eax
741	addl	%ebx,%eax
742	jc	copyout_fault
743/*
744 * XXX STOP USING VM_MAXUSER_ADDRESS.
745 * It is an end address, not a max, so every time it is used correctly it
746 * looks like there is an off by one error, and of course it caused an off
747 * by one error in several places.
748 */
749	cmpl	$VM_MAXUSER_ADDRESS,%eax
750	ja	copyout_fault
751
752#ifdef I386_CPU
753
754/*
755 * We have to check each PTE for user write permission.
756 * The checking may cause a page fault, so it is important to set
757 * up everything for return via copyout_fault before here.
758 */
759	/* compute number of pages */
760	movl	%edi,%ecx
761	andl	$PAGE_MASK,%ecx
762	addl	%ebx,%ecx
763	decl	%ecx
764	shrl	$IDXSHIFT+2,%ecx
765	incl	%ecx
766
767	/* compute PTE offset for start address */
768	movl	%edi,%edx
769	shrl	$IDXSHIFT,%edx
770	andb	$0xfc,%dl
771
7721:
773	/* check PTE for each page */
774	leal	PTmap(%edx),%eax
775	shrl	$IDXSHIFT,%eax
776	andb	$0xfc,%al
777	testb	$PG_V,PTmap(%eax)		/* PTE page must be valid */
778	je	4f
779	movb	PTmap(%edx),%al
780	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
781	cmpb	$PG_V|PG_RW|PG_U,%al
782	je	2f
783
7844:
785	/* simulate a trap */
786	pushl	%edx
787	pushl	%ecx
788	shll	$IDXSHIFT,%edx
789	pushl	%edx
790	call	trapwrite			/* trapwrite(addr) */
791	popl	%edx
792	popl	%ecx
793	popl	%edx
794
795	testl	%eax,%eax			/* if not ok, return EFAULT */
796	jnz	copyout_fault
797
7982:
799	addl	$4,%edx
800	decl	%ecx
801	jnz	1b				/* check next page */
802#endif /* I386_CPU */
803
804	/* bcopy(%esi, %edi, %ebx) */
805	movl	%ebx,%ecx
806
807#if defined(I586_CPU) && defined(DEV_NPX)
808	ALIGN_TEXT
809slow_copyout:
810#endif
811	shrl	$2,%ecx
812	cld
813	rep
814	movsl
815	movb	%bl,%cl
816	andb	$3,%cl
817	rep
818	movsb
819
820done_copyout:
821	popl	%ebx
822	popl	%edi
823	popl	%esi
824	xorl	%eax,%eax
825	movl	PCPU(CURPCB),%edx
826	movl	%eax,PCB_ONFAULT(%edx)
827	ret
828
829	ALIGN_TEXT
830copyout_fault:
831	popl	%ebx
832	popl	%edi
833	popl	%esi
834	movl	PCPU(CURPCB),%edx
835	movl	$0,PCB_ONFAULT(%edx)
836	movl	$EFAULT,%eax
837	ret
838
839#if defined(I586_CPU) && defined(DEV_NPX)
840ENTRY(i586_copyout)
841	/*
842	 * Duplicated from generic_copyout.  Could be done a bit better.
843	 */
844	movl	PCPU(CURPCB),%eax
845	movl	$copyout_fault,PCB_ONFAULT(%eax)
846	pushl	%esi
847	pushl	%edi
848	pushl	%ebx
849	movl	16(%esp),%esi
850	movl	20(%esp),%edi
851	movl	24(%esp),%ebx
852	testl	%ebx,%ebx			/* anything to do? */
853	jz	done_copyout
854
855	/*
856	 * Check explicitly for non-user addresses.  If 486 write protection
857	 * is being used, this check is essential because we are in kernel
858	 * mode so the h/w does not provide any protection against writing
859	 * kernel addresses.
860	 */
861
862	/*
863	 * First, prevent address wrapping.
864	 */
865	movl	%edi,%eax
866	addl	%ebx,%eax
867	jc	copyout_fault
868/*
869 * XXX STOP USING VM_MAXUSER_ADDRESS.
870 * It is an end address, not a max, so every time it is used correctly it
871 * looks like there is an off by one error, and of course it caused an off
872 * by one error in several places.
873 */
874	cmpl	$VM_MAXUSER_ADDRESS,%eax
875	ja	copyout_fault
876
877	/* bcopy(%esi, %edi, %ebx) */
8783:
879	movl	%ebx,%ecx
880	/*
881	 * End of duplicated code.
882	 */
883
884	cmpl	$1024,%ecx
885	jb	slow_copyout
886
887	pushl	%ecx
888	call	fastmove
889	addl	$4,%esp
890	jmp	done_copyout
891#endif /* I586_CPU && defined(DEV_NPX) */
892
893/*
894 * copyin(from_user, to_kernel, len) - MP SAFE
895 */
896ENTRY(copyin)
897	MEXITCOUNT
898	jmp	*copyin_vector
899
900ENTRY(generic_copyin)
901	movl	PCPU(CURPCB),%eax
902	movl	$copyin_fault,PCB_ONFAULT(%eax)
903	pushl	%esi
904	pushl	%edi
905	movl	12(%esp),%esi			/* caddr_t from */
906	movl	16(%esp),%edi			/* caddr_t to */
907	movl	20(%esp),%ecx			/* size_t  len */
908
909	/*
910	 * make sure address is valid
911	 */
912	movl	%esi,%edx
913	addl	%ecx,%edx
914	jc	copyin_fault
915	cmpl	$VM_MAXUSER_ADDRESS,%edx
916	ja	copyin_fault
917
918#if defined(I586_CPU) && defined(DEV_NPX)
919	ALIGN_TEXT
920slow_copyin:
921#endif
922	movb	%cl,%al
923	shrl	$2,%ecx				/* copy longword-wise */
924	cld
925	rep
926	movsl
927	movb	%al,%cl
928	andb	$3,%cl				/* copy remaining bytes */
929	rep
930	movsb
931
932#if defined(I586_CPU) && defined(DEV_NPX)
933	ALIGN_TEXT
934done_copyin:
935#endif
936	popl	%edi
937	popl	%esi
938	xorl	%eax,%eax
939	movl	PCPU(CURPCB),%edx
940	movl	%eax,PCB_ONFAULT(%edx)
941	ret
942
943	ALIGN_TEXT
944copyin_fault:
945	popl	%edi
946	popl	%esi
947	movl	PCPU(CURPCB),%edx
948	movl	$0,PCB_ONFAULT(%edx)
949	movl	$EFAULT,%eax
950	ret
951
952#if defined(I586_CPU) && defined(DEV_NPX)
953ENTRY(i586_copyin)
954	/*
955	 * Duplicated from generic_copyin.  Could be done a bit better.
956	 */
957	movl	PCPU(CURPCB),%eax
958	movl	$copyin_fault,PCB_ONFAULT(%eax)
959	pushl	%esi
960	pushl	%edi
961	movl	12(%esp),%esi			/* caddr_t from */
962	movl	16(%esp),%edi			/* caddr_t to */
963	movl	20(%esp),%ecx			/* size_t  len */
964
965	/*
966	 * make sure address is valid
967	 */
968	movl	%esi,%edx
969	addl	%ecx,%edx
970	jc	copyin_fault
971	cmpl	$VM_MAXUSER_ADDRESS,%edx
972	ja	copyin_fault
973	/*
974	 * End of duplicated code.
975	 */
976
977	cmpl	$1024,%ecx
978	jb	slow_copyin
979
980	pushl	%ebx			/* XXX prepare for fastmove_fault */
981	pushl	%ecx
982	call	fastmove
983	addl	$8,%esp
984	jmp	done_copyin
985#endif /* I586_CPU && defined(DEV_NPX) */
986
987#if defined(I586_CPU) && defined(DEV_NPX)
988/* fastmove(src, dst, len)
989	src in %esi
990	dst in %edi
991	len in %ecx		XXX changed to on stack for profiling
992	uses %eax and %edx for tmp. storage
993 */
994/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
995ENTRY(fastmove)
996	pushl	%ebp
997	movl	%esp,%ebp
998	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
999
1000	movl	8(%ebp),%ecx
1001	cmpl	$63,%ecx
1002	jbe	fastmove_tail
1003
1004	testl	$7,%esi	/* check if src addr is multiple of 8 */
1005	jnz	fastmove_tail
1006
1007	testl	$7,%edi	/* check if dst addr is multiple of 8 */
1008	jnz	fastmove_tail
1009
1010	/* XXX grab FPU context atomically. */
1011	cli
1012
1013/* if (fpcurthread != NULL) { */
1014	cmpl	$0,PCPU(FPCURTHREAD)
1015	je	6f
1016/*    fnsave(&curpcb->pcb_savefpu); */
1017	movl	PCPU(CURPCB),%eax
1018	fnsave	PCB_SAVEFPU(%eax)
1019/*   FPCURTHREAD = NULL; */
1020	movl	$0,PCPU(FPCURTHREAD)
1021/* } */
10226:
1023/* now we own the FPU. */
1024
1025/*
1026 * The process' FP state is saved in the pcb, but if we get
1027 * switched, the cpu_switch() will store our FP state in the
1028 * pcb.  It should be possible to avoid all the copying for
1029 * this, e.g., by setting a flag to tell cpu_switch() to
1030 * save the state somewhere else.
1031 */
1032/* tmp = curpcb->pcb_savefpu; */
1033	movl	%ecx,-12(%ebp)
1034	movl	%esi,-8(%ebp)
1035	movl	%edi,-4(%ebp)
1036	movl	%esp,%edi
1037	movl	PCPU(CURPCB),%esi
1038	addl	$PCB_SAVEFPU,%esi
1039	cld
1040	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1041	rep
1042	movsl
1043	movl	-12(%ebp),%ecx
1044	movl	-8(%ebp),%esi
1045	movl	-4(%ebp),%edi
1046/* stop_emulating(); */
1047	clts
1048/* fpcurthread = curthread; */
1049	movl	PCPU(CURTHREAD),%eax
1050	movl	%eax,PCPU(FPCURTHREAD)
1051	movl	PCPU(CURPCB),%eax
1052
1053	/* XXX end of atomic FPU context grab. */
1054	sti
1055
1056	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10574:
1058	movl	%ecx,-12(%ebp)
1059	cmpl	$1792,%ecx
1060	jbe	2f
1061	movl	$1792,%ecx
10622:
1063	subl	%ecx,-12(%ebp)
1064	cmpl	$256,%ecx
1065	jb	5f
1066	movl	%ecx,-8(%ebp)
1067	movl	%esi,-4(%ebp)
1068	ALIGN_TEXT
10693:
1070	movl	0(%esi),%eax
1071	movl	32(%esi),%eax
1072	movl	64(%esi),%eax
1073	movl	96(%esi),%eax
1074	movl	128(%esi),%eax
1075	movl	160(%esi),%eax
1076	movl	192(%esi),%eax
1077	movl	224(%esi),%eax
1078	addl	$256,%esi
1079	subl	$256,%ecx
1080	cmpl	$256,%ecx
1081	jae	3b
1082	movl	-8(%ebp),%ecx
1083	movl	-4(%ebp),%esi
10845:
1085	ALIGN_TEXT
1086fastmove_loop:
1087	fildq	0(%esi)
1088	fildq	8(%esi)
1089	fildq	16(%esi)
1090	fildq	24(%esi)
1091	fildq	32(%esi)
1092	fildq	40(%esi)
1093	fildq	48(%esi)
1094	fildq	56(%esi)
1095	fistpq	56(%edi)
1096	fistpq	48(%edi)
1097	fistpq	40(%edi)
1098	fistpq	32(%edi)
1099	fistpq	24(%edi)
1100	fistpq	16(%edi)
1101	fistpq	8(%edi)
1102	fistpq	0(%edi)
1103	addl	$-64,%ecx
1104	addl	$64,%esi
1105	addl	$64,%edi
1106	cmpl	$63,%ecx
1107	ja	fastmove_loop
1108	movl	-12(%ebp),%eax
1109	addl	%eax,%ecx
1110	cmpl	$64,%ecx
1111	jae	4b
1112
1113	/* XXX ungrab FPU context atomically. */
1114	cli
1115
1116/* curpcb->pcb_savefpu = tmp; */
1117	movl	%ecx,-12(%ebp)
1118	movl	%esi,-8(%ebp)
1119	movl	%edi,-4(%ebp)
1120	movl	PCPU(CURPCB),%edi
1121	addl	$PCB_SAVEFPU,%edi
1122	movl	%esp,%esi
1123	cld
1124	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1125	rep
1126	movsl
1127	movl	-12(%ebp),%ecx
1128	movl	-8(%ebp),%esi
1129	movl	-4(%ebp),%edi
1130
1131/* start_emulating(); */
1132	smsw	%ax
1133	orb	$CR0_TS,%al
1134	lmsw	%ax
1135/* fpcurthread = NULL; */
1136	movl	$0,PCPU(FPCURTHREAD)
1137
1138	/* XXX end of atomic FPU context ungrab. */
1139	sti
1140
1141	ALIGN_TEXT
1142fastmove_tail:
1143	movl	PCPU(CURPCB),%eax
1144	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1145
1146	movb	%cl,%al
1147	shrl	$2,%ecx				/* copy longword-wise */
1148	cld
1149	rep
1150	movsl
1151	movb	%al,%cl
1152	andb	$3,%cl				/* copy remaining bytes */
1153	rep
1154	movsb
1155
1156	movl	%ebp,%esp
1157	popl	%ebp
1158	ret
1159
1160	ALIGN_TEXT
1161fastmove_fault:
1162	/* XXX ungrab FPU context atomically. */
1163	cli
1164
1165	movl	PCPU(CURPCB),%edi
1166	addl	$PCB_SAVEFPU,%edi
1167	movl	%esp,%esi
1168	cld
1169	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1170	rep
1171	movsl
1172
1173	smsw	%ax
1174	orb	$CR0_TS,%al
1175	lmsw	%ax
1176	movl	$0,PCPU(FPCURTHREAD)
1177
1178	/* XXX end of atomic FPU context ungrab. */
1179	sti
1180
1181fastmove_tail_fault:
1182	movl	%ebp,%esp
1183	popl	%ebp
1184	addl	$8,%esp
1185	popl	%ebx
1186	popl	%edi
1187	popl	%esi
1188	movl	PCPU(CURPCB),%edx
1189	movl	$0,PCB_ONFAULT(%edx)
1190	movl	$EFAULT,%eax
1191	ret
1192#endif /* I586_CPU && defined(DEV_NPX) */
1193
1194/*
1195 * casuptr.  Compare and set user pointer.  Returns -1 or the current value.
1196 */
1197ENTRY(casuptr)
1198	movl	PCPU(CURPCB),%ecx
1199	movl	$fusufault,PCB_ONFAULT(%ecx)
1200	movl	4(%esp),%edx			/* dst */
1201	movl	8(%esp),%eax			/* old */
1202	movl	12(%esp),%ecx			/* new */
1203
1204	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1205	ja	fusufault
1206
1207#ifdef SMP
1208	lock
1209#endif
1210	cmpxchgl %ecx, (%edx)			/* Compare and set. */
1211
1212	/*
1213	 * The old value is in %eax.  If the store succeeded it will be the
1214	 * value we expected (old) from before the store, otherwise it will
1215	 * be the current value.
1216	 */
1217
1218	movl	PCPU(CURPCB),%ecx
1219	movl	$fusufault,PCB_ONFAULT(%ecx)
1220	movl	$0,PCB_ONFAULT(%ecx)
1221	ret
1222
1223/*
1224 * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
1225 * memory.  All these functions are MPSAFE.
1226 */
1227
1228ALTENTRY(fuword32)
1229ENTRY(fuword)
1230	movl	PCPU(CURPCB),%ecx
1231	movl	$fusufault,PCB_ONFAULT(%ecx)
1232	movl	4(%esp),%edx			/* from */
1233
1234	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1235	ja	fusufault
1236
1237	movl	(%edx),%eax
1238	movl	$0,PCB_ONFAULT(%ecx)
1239	ret
1240
1241/*
1242 * fuswintr() and suswintr() are specialized variants of fuword16() and
1243 * suword16(), respectively.  They are called from the profiling code,
1244 * potentially at interrupt time.  If they fail, that's okay; good things
1245 * will happen later.  They always fail for now, until the trap code is
1246 * able to deal with this.
1247 */
1248ALTENTRY(suswintr)
1249ENTRY(fuswintr)
1250	movl	$-1,%eax
1251	ret
1252
1253ENTRY(fuword16)
1254	movl	PCPU(CURPCB),%ecx
1255	movl	$fusufault,PCB_ONFAULT(%ecx)
1256	movl	4(%esp),%edx
1257
1258	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1259	ja	fusufault
1260
1261	movzwl	(%edx),%eax
1262	movl	$0,PCB_ONFAULT(%ecx)
1263	ret
1264
1265ENTRY(fubyte)
1266	movl	PCPU(CURPCB),%ecx
1267	movl	$fusufault,PCB_ONFAULT(%ecx)
1268	movl	4(%esp),%edx
1269
1270	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1271	ja	fusufault
1272
1273	movzbl	(%edx),%eax
1274	movl	$0,PCB_ONFAULT(%ecx)
1275	ret
1276
1277	ALIGN_TEXT
1278fusufault:
1279	movl	PCPU(CURPCB),%ecx
1280	xorl	%eax,%eax
1281	movl	%eax,PCB_ONFAULT(%ecx)
1282	decl	%eax
1283	ret
1284
1285/*
1286 * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory.
1287 * All these functions are MPSAFE unless I386_CPU is configured.
1288 */
1289
1290ALTENTRY(suword32)
1291ENTRY(suword)
1292	movl	PCPU(CURPCB),%ecx
1293	movl	$fusufault,PCB_ONFAULT(%ecx)
1294	movl	4(%esp),%edx
1295
1296#ifdef I386_CPU
1297
1298	/* XXX - page boundary crossing is still not handled */
1299	movl	%edx,%eax
1300	shrl	$IDXSHIFT,%edx
1301	andb	$0xfc,%dl
1302
1303	leal	PTmap(%edx),%ecx
1304	shrl	$IDXSHIFT,%ecx
1305	andb	$0xfc,%cl
1306	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1307	je	4f
1308	movb	PTmap(%edx),%dl
1309	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1310	cmpb	$PG_V|PG_RW|PG_U,%dl
1311	je	1f
1312
13134:
1314	/* simulate a trap */
1315	pushl	%eax
1316	call	trapwrite
1317	popl	%edx				/* remove junk parameter from stack */
1318	testl	%eax,%eax
1319	jnz	fusufault
13201:
1321	movl	4(%esp),%edx
1322#endif
1323
1324	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1325	ja	fusufault
1326
1327	movl	8(%esp),%eax
1328	movl	%eax,(%edx)
1329	xorl	%eax,%eax
1330	movl	PCPU(CURPCB),%ecx
1331	movl	%eax,PCB_ONFAULT(%ecx)
1332	ret
1333
1334ENTRY(suword16)
1335	movl	PCPU(CURPCB),%ecx
1336	movl	$fusufault,PCB_ONFAULT(%ecx)
1337	movl	4(%esp),%edx
1338
1339#ifdef I386_CPU
1340
1341	/* XXX - page boundary crossing is still not handled */
1342	movl	%edx,%eax
1343	shrl	$IDXSHIFT,%edx
1344	andb	$0xfc,%dl
1345
1346	leal	PTmap(%edx),%ecx
1347	shrl	$IDXSHIFT,%ecx
1348	andb	$0xfc,%cl
1349	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1350	je	4f
1351	movb	PTmap(%edx),%dl
1352	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1353	cmpb	$PG_V|PG_RW|PG_U,%dl
1354	je	1f
1355
13564:
1357	/* simulate a trap */
1358	pushl	%eax
1359	call	trapwrite
1360	popl	%edx				/* remove junk parameter from stack */
1361	testl	%eax,%eax
1362	jnz	fusufault
13631:
1364	movl	4(%esp),%edx
1365#endif
1366
1367	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1368	ja	fusufault
1369
1370	movw	8(%esp),%ax
1371	movw	%ax,(%edx)
1372	xorl	%eax,%eax
1373	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1374	movl	%eax,PCB_ONFAULT(%ecx)
1375	ret
1376
1377ENTRY(subyte)
1378	movl	PCPU(CURPCB),%ecx
1379	movl	$fusufault,PCB_ONFAULT(%ecx)
1380	movl	4(%esp),%edx
1381
1382#ifdef I386_CPU
1383
1384	movl	%edx,%eax
1385	shrl	$IDXSHIFT,%edx
1386	andb	$0xfc,%dl
1387
1388	leal	PTmap(%edx),%ecx
1389	shrl	$IDXSHIFT,%ecx
1390	andb	$0xfc,%cl
1391	testb	$PG_V,PTmap(%ecx)		/* PTE page must be valid */
1392	je	4f
1393	movb	PTmap(%edx),%dl
1394	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1395	cmpb	$PG_V|PG_RW|PG_U,%dl
1396	je	1f
1397
13984:
1399	/* simulate a trap */
1400	pushl	%eax
1401	call	trapwrite
1402	popl	%edx				/* remove junk parameter from stack */
1403	testl	%eax,%eax
1404	jnz	fusufault
14051:
1406	movl	4(%esp),%edx
1407#endif
1408
1409	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1410	ja	fusufault
1411
1412	movb	8(%esp),%al
1413	movb	%al,(%edx)
1414	xorl	%eax,%eax
1415	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
1416	movl	%eax,PCB_ONFAULT(%ecx)
1417	ret
1418
1419/*
1420 * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
1421 *
1422 *	copy a string from from to to, stop when a 0 character is reached.
1423 *	return ENAMETOOLONG if string is longer than maxlen, and
1424 *	EFAULT on protection violations. If lencopied is non-zero,
1425 *	return the actual length in *lencopied.
1426 */
1427ENTRY(copyinstr)
1428	pushl	%esi
1429	pushl	%edi
1430	movl	PCPU(CURPCB),%ecx
1431	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1432
1433	movl	12(%esp),%esi			/* %esi = from */
1434	movl	16(%esp),%edi			/* %edi = to */
1435	movl	20(%esp),%edx			/* %edx = maxlen */
1436
1437	movl	$VM_MAXUSER_ADDRESS,%eax
1438
1439	/* make sure 'from' is within bounds */
1440	subl	%esi,%eax
1441	jbe	cpystrflt
1442
1443	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1444	cmpl	%edx,%eax
1445	jae	1f
1446	movl	%eax,%edx
1447	movl	%eax,20(%esp)
14481:
1449	incl	%edx
1450	cld
1451
14522:
1453	decl	%edx
1454	jz	3f
1455
1456	lodsb
1457	stosb
1458	orb	%al,%al
1459	jnz	2b
1460
1461	/* Success -- 0 byte reached */
1462	decl	%edx
1463	xorl	%eax,%eax
1464	jmp	cpystrflt_x
14653:
1466	/* edx is zero - return ENAMETOOLONG or EFAULT */
1467	cmpl	$VM_MAXUSER_ADDRESS,%esi
1468	jae	cpystrflt
14694:
1470	movl	$ENAMETOOLONG,%eax
1471	jmp	cpystrflt_x
1472
1473cpystrflt:
1474	movl	$EFAULT,%eax
1475
1476cpystrflt_x:
1477	/* set *lencopied and return %eax */
1478	movl	PCPU(CURPCB),%ecx
1479	movl	$0,PCB_ONFAULT(%ecx)
1480	movl	20(%esp),%ecx
1481	subl	%edx,%ecx
1482	movl	24(%esp),%edx
1483	testl	%edx,%edx
1484	jz	1f
1485	movl	%ecx,(%edx)
14861:
1487	popl	%edi
1488	popl	%esi
1489	ret
1490
1491
1492/*
1493 * copystr(from, to, maxlen, int *lencopied) - MP SAFE
1494 */
1495ENTRY(copystr)
1496	pushl	%esi
1497	pushl	%edi
1498
1499	movl	12(%esp),%esi			/* %esi = from */
1500	movl	16(%esp),%edi			/* %edi = to */
1501	movl	20(%esp),%edx			/* %edx = maxlen */
1502	incl	%edx
1503	cld
15041:
1505	decl	%edx
1506	jz	4f
1507	lodsb
1508	stosb
1509	orb	%al,%al
1510	jnz	1b
1511
1512	/* Success -- 0 byte reached */
1513	decl	%edx
1514	xorl	%eax,%eax
1515	jmp	6f
15164:
1517	/* edx is zero -- return ENAMETOOLONG */
1518	movl	$ENAMETOOLONG,%eax
1519
15206:
1521	/* set *lencopied and return %eax */
1522	movl	20(%esp),%ecx
1523	subl	%edx,%ecx
1524	movl	24(%esp),%edx
1525	testl	%edx,%edx
1526	jz	7f
1527	movl	%ecx,(%edx)
15287:
1529	popl	%edi
1530	popl	%esi
1531	ret
1532
1533ENTRY(bcmp)
1534	pushl	%edi
1535	pushl	%esi
1536	movl	12(%esp),%edi
1537	movl	16(%esp),%esi
1538	movl	20(%esp),%edx
1539	xorl	%eax,%eax
1540
1541	movl	%edx,%ecx
1542	shrl	$2,%ecx
1543	cld					/* compare forwards */
1544	repe
1545	cmpsl
1546	jne	1f
1547
1548	movl	%edx,%ecx
1549	andl	$3,%ecx
1550	repe
1551	cmpsb
1552	je	2f
15531:
1554	incl	%eax
15552:
1556	popl	%esi
1557	popl	%edi
1558	ret
1559
1560
1561/*
1562 * Handling of special 386 registers and descriptor tables etc
1563 */
1564/* void lgdt(struct region_descriptor *rdp); */
1565ENTRY(lgdt)
1566	/* reload the descriptor table */
1567	movl	4(%esp),%eax
1568	lgdt	(%eax)
1569
1570	/* flush the prefetch q */
1571	jmp	1f
1572	nop
15731:
1574	/* reload "stale" selectors */
1575	movl	$KDSEL,%eax
1576	movl	%eax,%ds
1577	movl	%eax,%es
1578	movl	%eax,%gs
1579	movl	%eax,%ss
1580	movl	$KPSEL,%eax
1581	movl	%eax,%fs
1582
1583	/* reload code selector by turning return into intersegmental return */
1584	movl	(%esp),%eax
1585	pushl	%eax
1586	movl	$KCSEL,4(%esp)
1587	lret
1588
1589/* ssdtosd(*ssdp,*sdp) */
1590ENTRY(ssdtosd)
1591	pushl	%ebx
1592	movl	8(%esp),%ecx
1593	movl	8(%ecx),%ebx
1594	shll	$16,%ebx
1595	movl	(%ecx),%edx
1596	roll	$16,%edx
1597	movb	%dh,%bl
1598	movb	%dl,%bh
1599	rorl	$8,%ebx
1600	movl	4(%ecx),%eax
1601	movw	%ax,%dx
1602	andl	$0xf0000,%eax
1603	orl	%eax,%ebx
1604	movl	12(%esp),%ecx
1605	movl	%edx,(%ecx)
1606	movl	%ebx,4(%ecx)
1607	popl	%ebx
1608	ret
1609
1610/* void reset_dbregs() */
1611ENTRY(reset_dbregs)
1612	movl    $0,%eax
1613	movl    %eax,%dr7     /* disable all breapoints first */
1614	movl    %eax,%dr0
1615	movl    %eax,%dr1
1616	movl    %eax,%dr2
1617	movl    %eax,%dr3
1618	movl    %eax,%dr6
1619	ret
1620
1621/*****************************************************************************/
1622/* setjump, longjump                                                         */
1623/*****************************************************************************/
1624
1625ENTRY(setjmp)
1626	movl	4(%esp),%eax
1627	movl	%ebx,(%eax)			/* save ebx */
1628	movl	%esp,4(%eax)			/* save esp */
1629	movl	%ebp,8(%eax)			/* save ebp */
1630	movl	%esi,12(%eax)			/* save esi */
1631	movl	%edi,16(%eax)			/* save edi */
1632	movl	(%esp),%edx			/* get rta */
1633	movl	%edx,20(%eax)			/* save eip */
1634	xorl	%eax,%eax			/* return(0); */
1635	ret
1636
1637ENTRY(longjmp)
1638	movl	4(%esp),%eax
1639	movl	(%eax),%ebx			/* restore ebx */
1640	movl	4(%eax),%esp			/* restore esp */
1641	movl	8(%eax),%ebp			/* restore ebp */
1642	movl	12(%eax),%esi			/* restore esi */
1643	movl	16(%eax),%edi			/* restore edi */
1644	movl	20(%eax),%edx			/* get rta */
1645	movl	%edx,(%esp)			/* put in return frame */
1646	xorl	%eax,%eax			/* return(1); */
1647	incl	%eax
1648	ret
1649
1650/*
1651 * Support for BB-profiling (gcc -a).  The kernbb program will extract
1652 * the data from the kernel.
1653 */
1654
1655	.data
1656	ALIGN_DATA
1657	.globl bbhead
1658bbhead:
1659	.long 0
1660
1661	.text
1662NON_GPROF_ENTRY(__bb_init_func)
1663	movl	4(%esp),%eax
1664	movl	$1,(%eax)
1665	movl	bbhead,%edx
1666	movl	%edx,16(%eax)
1667	movl	%eax,bbhead
1668	NON_GPROF_RET
1669