support.s revision 42440
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	$Id: support.s,v 1.59 1998/05/11 02:13:43 dyson Exp $
34 */
35
36#include "npx.h"
37
38#include <machine/asmacros.h>
39#include <machine/cputypes.h>
40#include <machine/pmap.h>
41#include <machine/specialreg.h>
42
43#include "assym.s"
44
45#define KDSEL		0x10			/* kernel data selector */
46#define KCSEL		0x8			/* kernel code selector */
47#define IDXSHIFT	10
48
49	.data
50	.globl	_bcopy_vector
51_bcopy_vector:
52	.long	_generic_bcopy
53	.globl	_bzero
54_bzero:
55	.long	_generic_bzero
56	.globl	_copyin_vector
57_copyin_vector:
58	.long	_generic_copyin
59	.globl	_copyout_vector
60_copyout_vector:
61	.long	_generic_copyout
62	.globl	_ovbcopy_vector
63_ovbcopy_vector:
64	.long	_generic_bcopy
65#if defined(I586_CPU) && NNPX > 0
66kernel_fpu_lock:
67	.byte	0xfe
68	.space	3
69#endif
70
71	.text
72
73/*
74 * bcopy family
75 * void bzero(void *buf, u_int len)
76 */
77
78ENTRY(generic_bzero)
79	pushl	%edi
80	movl	8(%esp),%edi
81	movl	12(%esp),%ecx
82	xorl	%eax,%eax
83	shrl	$2,%ecx
84	cld
85	rep
86	stosl
87	movl	12(%esp),%ecx
88	andl	$3,%ecx
89	rep
90	stosb
91	popl	%edi
92	ret
93
94#if defined(I486_CPU)
95ENTRY(i486_bzero)
96	movl	4(%esp),%edx
97	movl	8(%esp),%ecx
98	xorl	%eax,%eax
99/*
100 * do 64 byte chunks first
101 *
102 * XXX this is probably over-unrolled at least for DX2's
103 */
1042:
105	cmpl	$64,%ecx
106	jb	3f
107	movl	%eax,(%edx)
108	movl	%eax,4(%edx)
109	movl	%eax,8(%edx)
110	movl	%eax,12(%edx)
111	movl	%eax,16(%edx)
112	movl	%eax,20(%edx)
113	movl	%eax,24(%edx)
114	movl	%eax,28(%edx)
115	movl	%eax,32(%edx)
116	movl	%eax,36(%edx)
117	movl	%eax,40(%edx)
118	movl	%eax,44(%edx)
119	movl	%eax,48(%edx)
120	movl	%eax,52(%edx)
121	movl	%eax,56(%edx)
122	movl	%eax,60(%edx)
123	addl	$64,%edx
124	subl	$64,%ecx
125	jnz	2b
126	ret
127
128/*
129 * do 16 byte chunks
130 */
131	SUPERALIGN_TEXT
1323:
133	cmpl	$16,%ecx
134	jb	4f
135	movl	%eax,(%edx)
136	movl	%eax,4(%edx)
137	movl	%eax,8(%edx)
138	movl	%eax,12(%edx)
139	addl	$16,%edx
140	subl	$16,%ecx
141	jnz	3b
142	ret
143
144/*
145 * do 4 byte chunks
146 */
147	SUPERALIGN_TEXT
1484:
149	cmpl	$4,%ecx
150	jb	5f
151	movl	%eax,(%edx)
152	addl	$4,%edx
153	subl	$4,%ecx
154	jnz	4b
155	ret
156
157/*
158 * do 1 byte chunks
159 * a jump table seems to be faster than a loop or more range reductions
160 *
161 * XXX need a const section for non-text
162 */
163	.data
164jtab:
165	.long	do0
166	.long	do1
167	.long	do2
168	.long	do3
169
170	.text
171	SUPERALIGN_TEXT
1725:
173	jmp	jtab(,%ecx,4)
174
175	SUPERALIGN_TEXT
176do3:
177	movw	%ax,(%edx)
178	movb	%al,2(%edx)
179	ret
180
181	SUPERALIGN_TEXT
182do2:
183	movw	%ax,(%edx)
184	ret
185
186	SUPERALIGN_TEXT
187do1:
188	movb	%al,(%edx)
189	ret
190
191	SUPERALIGN_TEXT
192do0:
193	ret
194#endif
195
196#if defined(I586_CPU) && NNPX > 0
197ENTRY(i586_bzero)
198	movl	4(%esp),%edx
199	movl	8(%esp),%ecx
200
201	/*
202	 * The FPU register method is twice as fast as the integer register
203	 * method unless the target is in the L1 cache and we pre-allocate a
204	 * cache line for it (then the integer register method is 4-5 times
205	 * faster).  However, we never pre-allocate cache lines, since that
206	 * would make the integer method 25% or more slower for the common
207	 * case when the target isn't in either the L1 cache or the L2 cache.
208	 * Thus we normally use the FPU register method unless the overhead
209	 * would be too large.
210	 */
211	cmpl	$256,%ecx	/* empirical; clts, fninit, smsw cost a lot */
212	jb	intreg_i586_bzero
213
214	/*
215	 * The FPU registers may belong to an application or to fastmove()
216	 * or to another invocation of bcopy() or ourself in a higher level
217	 * interrupt or trap handler.  Preserving the registers is
218	 * complicated since we avoid it if possible at all levels.  We
219	 * want to localize the complications even when that increases them.
220	 * Here the extra work involves preserving CR0_TS in TS.
221	 * `npxproc != NULL' is supposed to be the condition that all the
222	 * FPU resources belong to an application, but npxproc and CR0_TS
223	 * aren't set atomically enough for this condition to work in
224	 * interrupt handlers.
225	 *
226	 * Case 1: FPU registers belong to the application: we must preserve
227	 * the registers if we use them, so we only use the FPU register
228	 * method if the target size is large enough to amortize the extra
229	 * overhead for preserving them.  CR0_TS must be preserved although
230	 * it is very likely to end up as set.
231	 *
232	 * Case 2: FPU registers belong to fastmove(): fastmove() currently
233	 * makes the registers look like they belong to an application so
234	 * that cpu_switch() and savectx() don't have to know about it, so
235	 * this case reduces to case 1.
236	 *
237	 * Case 3: FPU registers belong to the kernel: don't use the FPU
238	 * register method.  This case is unlikely, and supporting it would
239	 * be more complicated and might take too much stack.
240	 *
241	 * Case 4: FPU registers don't belong to anyone: the FPU registers
242	 * don't need to be preserved, so we always use the FPU register
243	 * method.  CR0_TS must be preserved although it is very likely to
244	 * always end up as clear.
245	 */
246	cmpl	$0,_npxproc
247	je	i586_bz1
248	cmpl	$256+184,%ecx		/* empirical; not quite 2*108 more */
249	jb	intreg_i586_bzero
250	sarb	$1,kernel_fpu_lock
251	jc	intreg_i586_bzero
252	smsw	%ax
253	clts
254	subl	$108,%esp
255	fnsave	0(%esp)
256	jmp	i586_bz2
257
258i586_bz1:
259	sarb	$1,kernel_fpu_lock
260	jc	intreg_i586_bzero
261	smsw	%ax
262	clts
263	fninit				/* XXX should avoid needing this */
264i586_bz2:
265	fldz
266
267	/*
268	 * Align to an 8 byte boundary (misalignment in the main loop would
269	 * cost a factor of >= 2).  Avoid jumps (at little cost if it is
270	 * already aligned) by always zeroing 8 bytes and using the part up
271	 * to the _next_ alignment position.
272	 */
273	fstl	0(%edx)
274	addl	%edx,%ecx		/* part of %ecx -= new_%edx - %edx */
275	addl	$8,%edx
276	andl	$~7,%edx
277	subl	%edx,%ecx
278
279	/*
280	 * Similarly align `len' to a multiple of 8.
281	 */
282	fstl	-8(%edx,%ecx)
283	decl	%ecx
284	andl	$~7,%ecx
285
286	/*
287	 * This wouldn't be any faster if it were unrolled, since the loop
288	 * control instructions are much faster than the fstl and/or done
289	 * in parallel with it so their overhead is insignificant.
290	 */
291fpureg_i586_bzero_loop:
292	fstl	0(%edx)
293	addl	$8,%edx
294	subl	$8,%ecx
295	cmpl	$8,%ecx
296	jae	fpureg_i586_bzero_loop
297
298	cmpl	$0,_npxproc
299	je	i586_bz3
300	frstor	0(%esp)
301	addl	$108,%esp
302	lmsw	%ax
303	movb	$0xfe,kernel_fpu_lock
304	ret
305
306i586_bz3:
307	fstpl	%st(0)
308	lmsw	%ax
309	movb	$0xfe,kernel_fpu_lock
310	ret
311
312intreg_i586_bzero:
313	/*
314	 * `rep stos' seems to be the best method in practice for small
315	 * counts.  Fancy methods usually take too long to start up due
316	 * to cache and BTB misses.
317	 */
318	pushl	%edi
319	movl	%edx,%edi
320	xorl	%eax,%eax
321	shrl	$2,%ecx
322	cld
323	rep
324	stosl
325	movl	12(%esp),%ecx
326	andl	$3,%ecx
327	jne	1f
328	popl	%edi
329	ret
330
3311:
332	rep
333	stosb
334	popl	%edi
335	ret
336#endif /* I586_CPU && NNPX > 0 */
337
338ENTRY(i686_pagezero)
339	pushl	%edi
340	pushl	%ebx
341
342	movl	12(%esp), %edi
343	movl	$1024, %ecx
344	cld
345
346	ALIGN_TEXT
3471:
348	xorl	%eax, %eax
349	repe
350	scasl
351	jnz	2f
352
353	popl	%ebx
354	popl	%edi
355	ret
356
357	ALIGN_TEXT
358
3592:
360	incl	%ecx
361	subl	$4, %edi
362
363	movl	%ecx, %edx
364	cmpl	$16, %ecx
365
366	jge	3f
367
368	movl	%edi, %ebx
369	andl	$0x3f, %ebx
370	shrl	%ebx
371	shrl	%ebx
372	movl	$16, %ecx
373	subl	%ebx, %ecx
374
3753:
376	subl	%ecx, %edx
377	rep
378	stosl
379
380	movl	%edx, %ecx
381	testl	%edx, %edx
382	jnz	1b
383
384	popl	%ebx
385	popl	%edi
386	ret
387
388/* fillw(pat, base, cnt) */
389ENTRY(fillw)
390	pushl	%edi
391	movl	8(%esp),%eax
392	movl	12(%esp),%edi
393	movl	16(%esp),%ecx
394	cld
395	rep
396	stosw
397	popl	%edi
398	ret
399
400ENTRY(bcopyb)
401	pushl	%esi
402	pushl	%edi
403	movl	12(%esp),%esi
404	movl	16(%esp),%edi
405	movl	20(%esp),%ecx
406	movl	%edi,%eax
407	subl	%esi,%eax
408	cmpl	%ecx,%eax			/* overlapping && src < dst? */
409	jb	1f
410	cld					/* nope, copy forwards */
411	rep
412	movsb
413	popl	%edi
414	popl	%esi
415	ret
416
417	ALIGN_TEXT
4181:
419	addl	%ecx,%edi			/* copy backwards. */
420	addl	%ecx,%esi
421	decl	%edi
422	decl	%esi
423	std
424	rep
425	movsb
426	popl	%edi
427	popl	%esi
428	cld
429	ret
430
431ENTRY(bcopy)
432	MEXITCOUNT
433	jmp	*_bcopy_vector
434
435ENTRY(ovbcopy)
436	MEXITCOUNT
437	jmp	*_ovbcopy_vector
438
439/*
440 * generic_bcopy(src, dst, cnt)
441 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
442 */
443ENTRY(generic_bcopy)
444	pushl	%esi
445	pushl	%edi
446	movl	12(%esp),%esi
447	movl	16(%esp),%edi
448	movl	20(%esp),%ecx
449
450	movl	%edi,%eax
451	subl	%esi,%eax
452	cmpl	%ecx,%eax			/* overlapping && src < dst? */
453	jb	1f
454
455	shrl	$2,%ecx				/* copy by 32-bit words */
456	cld					/* nope, copy forwards */
457	rep
458	movsl
459	movl	20(%esp),%ecx
460	andl	$3,%ecx				/* any bytes left? */
461	rep
462	movsb
463	popl	%edi
464	popl	%esi
465	ret
466
467	ALIGN_TEXT
4681:
469	addl	%ecx,%edi			/* copy backwards */
470	addl	%ecx,%esi
471	decl	%edi
472	decl	%esi
473	andl	$3,%ecx				/* any fractional bytes? */
474	std
475	rep
476	movsb
477	movl	20(%esp),%ecx			/* copy remainder by 32-bit words */
478	shrl	$2,%ecx
479	subl	$3,%esi
480	subl	$3,%edi
481	rep
482	movsl
483	popl	%edi
484	popl	%esi
485	cld
486	ret
487
488#if defined(I586_CPU) && NNPX > 0
489ENTRY(i586_bcopy)
490	pushl	%esi
491	pushl	%edi
492	movl	12(%esp),%esi
493	movl	16(%esp),%edi
494	movl	20(%esp),%ecx
495
496	movl	%edi,%eax
497	subl	%esi,%eax
498	cmpl	%ecx,%eax			/* overlapping && src < dst? */
499	jb	1f
500
501	cmpl	$1024,%ecx
502	jb	small_i586_bcopy
503
504	sarb	$1,kernel_fpu_lock
505	jc	small_i586_bcopy
506	cmpl	$0,_npxproc
507	je	i586_bc1
508	smsw	%dx
509	clts
510	subl	$108,%esp
511	fnsave	0(%esp)
512	jmp	4f
513
514i586_bc1:
515	smsw	%dx
516	clts
517	fninit				/* XXX should avoid needing this */
518
519	ALIGN_TEXT
5204:
521	pushl	%ecx
522#define	DCACHE_SIZE	8192
523	cmpl	$(DCACHE_SIZE-512)/2,%ecx
524	jbe	2f
525	movl	$(DCACHE_SIZE-512)/2,%ecx
5262:
527	subl	%ecx,0(%esp)
528	cmpl	$256,%ecx
529	jb	5f			/* XXX should prefetch if %ecx >= 32 */
530	pushl	%esi
531	pushl	%ecx
532	ALIGN_TEXT
5333:
534	movl	0(%esi),%eax
535	movl	32(%esi),%eax
536	movl	64(%esi),%eax
537	movl	96(%esi),%eax
538	movl	128(%esi),%eax
539	movl	160(%esi),%eax
540	movl	192(%esi),%eax
541	movl	224(%esi),%eax
542	addl	$256,%esi
543	subl	$256,%ecx
544	cmpl	$256,%ecx
545	jae	3b
546	popl	%ecx
547	popl	%esi
5485:
549	ALIGN_TEXT
550large_i586_bcopy_loop:
551	fildq	0(%esi)
552	fildq	8(%esi)
553	fildq	16(%esi)
554	fildq	24(%esi)
555	fildq	32(%esi)
556	fildq	40(%esi)
557	fildq	48(%esi)
558	fildq	56(%esi)
559	fistpq	56(%edi)
560	fistpq	48(%edi)
561	fistpq	40(%edi)
562	fistpq	32(%edi)
563	fistpq	24(%edi)
564	fistpq	16(%edi)
565	fistpq	8(%edi)
566	fistpq	0(%edi)
567	addl	$64,%esi
568	addl	$64,%edi
569	subl	$64,%ecx
570	cmpl	$64,%ecx
571	jae	large_i586_bcopy_loop
572	popl	%eax
573	addl	%eax,%ecx
574	cmpl	$64,%ecx
575	jae	4b
576
577	cmpl	$0,_npxproc
578	je	i586_bc2
579	frstor	0(%esp)
580	addl	$108,%esp
581i586_bc2:
582	lmsw	%dx
583	movb	$0xfe,kernel_fpu_lock
584
585/*
586 * This is a duplicate of the main part of generic_bcopy.  See the comments
587 * there.  Jumping into generic_bcopy would cost a whole 0-1 cycles and
588 * would mess up high resolution profiling.
589 */
590	ALIGN_TEXT
591small_i586_bcopy:
592	shrl	$2,%ecx
593	cld
594	rep
595	movsl
596	movl	20(%esp),%ecx
597	andl	$3,%ecx
598	rep
599	movsb
600	popl	%edi
601	popl	%esi
602	ret
603
604	ALIGN_TEXT
6051:
606	addl	%ecx,%edi
607	addl	%ecx,%esi
608	decl	%edi
609	decl	%esi
610	andl	$3,%ecx
611	std
612	rep
613	movsb
614	movl	20(%esp),%ecx
615	shrl	$2,%ecx
616	subl	$3,%esi
617	subl	$3,%edi
618	rep
619	movsl
620	popl	%edi
621	popl	%esi
622	cld
623	ret
624#endif /* I586_CPU && NNPX > 0 */
625
626/*
627 * Note: memcpy does not support overlapping copies
628 */
629ENTRY(memcpy)
630	pushl	%edi
631	pushl	%esi
632	movl	12(%esp),%edi
633	movl	16(%esp),%esi
634	movl	20(%esp),%ecx
635	movl	%edi,%eax
636	shrl	$2,%ecx				/* copy by 32-bit words */
637	cld					/* nope, copy forwards */
638	rep
639	movsl
640	movl	20(%esp),%ecx
641	andl	$3,%ecx				/* any bytes left? */
642	rep
643	movsb
644	popl	%esi
645	popl	%edi
646	ret
647
648
649/*****************************************************************************/
650/* copyout and fubyte family                                                 */
651/*****************************************************************************/
652/*
653 * Access user memory from inside the kernel. These routines and possibly
654 * the math- and DOS emulators should be the only places that do this.
655 *
656 * We have to access the memory with user's permissions, so use a segment
657 * selector with RPL 3. For writes to user space we have to additionally
658 * check the PTE for write permission, because the 386 does not check
659 * write permissions when we are executing with EPL 0. The 486 does check
660 * this if the WP bit is set in CR0, so we can use a simpler version here.
661 *
662 * These routines set curpcb->onfault for the time they execute. When a
663 * protection violation occurs inside the functions, the trap handler
664 * returns to *curpcb->onfault instead of the function.
665 */
666
667/* copyout(from_kernel, to_user, len) */
668ENTRY(copyout)
669	MEXITCOUNT
670	jmp	*_copyout_vector
671
672ENTRY(generic_copyout)
673	movl	_curpcb,%eax
674	movl	$copyout_fault,PCB_ONFAULT(%eax)
675	pushl	%esi
676	pushl	%edi
677	pushl	%ebx
678	movl	16(%esp),%esi
679	movl	20(%esp),%edi
680	movl	24(%esp),%ebx
681	testl	%ebx,%ebx			/* anything to do? */
682	jz	done_copyout
683
684	/*
685	 * Check explicitly for non-user addresses.  If 486 write protection
686	 * is being used, this check is essential because we are in kernel
687	 * mode so the h/w does not provide any protection against writing
688	 * kernel addresses.
689	 */
690
691	/*
692	 * First, prevent address wrapping.
693	 */
694	movl	%edi,%eax
695	addl	%ebx,%eax
696	jc	copyout_fault
697/*
698 * XXX STOP USING VM_MAXUSER_ADDRESS.
699 * It is an end address, not a max, so every time it is used correctly it
700 * looks like there is an off by one error, and of course it caused an off
701 * by one error in several places.
702 */
703	cmpl	$VM_MAXUSER_ADDRESS,%eax
704	ja	copyout_fault
705
706#if defined(I386_CPU)
707
708#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
709	cmpl	$CPUCLASS_386,_cpu_class
710	jne	3f
711#endif
712/*
713 * We have to check each PTE for user write permission.
714 * The checking may cause a page fault, so it is important to set
715 * up everything for return via copyout_fault before here.
716 */
717	/* compute number of pages */
718	movl	%edi,%ecx
719	andl	$PAGE_MASK,%ecx
720	addl	%ebx,%ecx
721	decl	%ecx
722	shrl	$IDXSHIFT+2,%ecx
723	incl	%ecx
724
725	/* compute PTE offset for start address */
726	movl	%edi,%edx
727	shrl	$IDXSHIFT,%edx
728	andb	$0xfc,%dl
729
7301:
731	/* check PTE for each page */
732	leal	_PTmap(%edx),%eax
733	shrl	$IDXSHIFT,%eax
734	andb	$0xfc,%al
735	testb	$PG_V,_PTmap(%eax)		/* PTE page must be valid */
736	je	4f
737	movb	_PTmap(%edx),%al
738	andb	$PG_V|PG_RW|PG_U,%al		/* page must be valid and user writable */
739	cmpb	$PG_V|PG_RW|PG_U,%al
740	je	2f
741
7424:
743	/* simulate a trap */
744	pushl	%edx
745	pushl	%ecx
746	shll	$IDXSHIFT,%edx
747	pushl	%edx
748	call	_trapwrite			/* trapwrite(addr) */
749	popl	%edx
750	popl	%ecx
751	popl	%edx
752
753	testl	%eax,%eax			/* if not ok, return EFAULT */
754	jnz	copyout_fault
755
7562:
757	addl	$4,%edx
758	decl	%ecx
759	jnz	1b				/* check next page */
760#endif /* I386_CPU */
761
762	/* bcopy(%esi, %edi, %ebx) */
7633:
764	movl	%ebx,%ecx
765
766#if defined(I586_CPU) && NNPX > 0
767	ALIGN_TEXT
768slow_copyout:
769#endif
770	shrl	$2,%ecx
771	cld
772	rep
773	movsl
774	movb	%bl,%cl
775	andb	$3,%cl
776	rep
777	movsb
778
779done_copyout:
780	popl	%ebx
781	popl	%edi
782	popl	%esi
783	xorl	%eax,%eax
784	movl	_curpcb,%edx
785	movl	%eax,PCB_ONFAULT(%edx)
786	ret
787
788	ALIGN_TEXT
789copyout_fault:
790	popl	%ebx
791	popl	%edi
792	popl	%esi
793	movl	_curpcb,%edx
794	movl	$0,PCB_ONFAULT(%edx)
795	movl	$EFAULT,%eax
796	ret
797
798#if defined(I586_CPU) && NNPX > 0
799ENTRY(i586_copyout)
800	/*
801	 * Duplicated from generic_copyout.  Could be done a bit better.
802	 */
803	movl	_curpcb,%eax
804	movl	$copyout_fault,PCB_ONFAULT(%eax)
805	pushl	%esi
806	pushl	%edi
807	pushl	%ebx
808	movl	16(%esp),%esi
809	movl	20(%esp),%edi
810	movl	24(%esp),%ebx
811	testl	%ebx,%ebx			/* anything to do? */
812	jz	done_copyout
813
814	/*
815	 * Check explicitly for non-user addresses.  If 486 write protection
816	 * is being used, this check is essential because we are in kernel
817	 * mode so the h/w does not provide any protection against writing
818	 * kernel addresses.
819	 */
820
821	/*
822	 * First, prevent address wrapping.
823	 */
824	movl	%edi,%eax
825	addl	%ebx,%eax
826	jc	copyout_fault
827/*
828 * XXX STOP USING VM_MAXUSER_ADDRESS.
829 * It is an end address, not a max, so every time it is used correctly it
830 * looks like there is an off by one error, and of course it caused an off
831 * by one error in several places.
832 */
833	cmpl	$VM_MAXUSER_ADDRESS,%eax
834	ja	copyout_fault
835
836	/* bcopy(%esi, %edi, %ebx) */
8373:
838	movl	%ebx,%ecx
839	/*
840	 * End of duplicated code.
841	 */
842
843	cmpl	$1024,%ecx
844	jb	slow_copyout
845
846	pushl	%ecx
847	call	_fastmove
848	addl	$4,%esp
849	jmp	done_copyout
850#endif /* I586_CPU && NNPX > 0 */
851
852/* copyin(from_user, to_kernel, len) */
853ENTRY(copyin)
854	MEXITCOUNT
855	jmp	*_copyin_vector
856
857ENTRY(generic_copyin)
858	movl	_curpcb,%eax
859	movl	$copyin_fault,PCB_ONFAULT(%eax)
860	pushl	%esi
861	pushl	%edi
862	movl	12(%esp),%esi			/* caddr_t from */
863	movl	16(%esp),%edi			/* caddr_t to */
864	movl	20(%esp),%ecx			/* size_t  len */
865
866	/*
867	 * make sure address is valid
868	 */
869	movl	%esi,%edx
870	addl	%ecx,%edx
871	jc	copyin_fault
872	cmpl	$VM_MAXUSER_ADDRESS,%edx
873	ja	copyin_fault
874
875#if defined(I586_CPU) && NNPX > 0
876	ALIGN_TEXT
877slow_copyin:
878#endif
879	movb	%cl,%al
880	shrl	$2,%ecx				/* copy longword-wise */
881	cld
882	rep
883	movsl
884	movb	%al,%cl
885	andb	$3,%cl				/* copy remaining bytes */
886	rep
887	movsb
888
889#if defined(I586_CPU) && NNPX > 0
890	ALIGN_TEXT
891done_copyin:
892#endif
893	popl	%edi
894	popl	%esi
895	xorl	%eax,%eax
896	movl	_curpcb,%edx
897	movl	%eax,PCB_ONFAULT(%edx)
898	ret
899
900	ALIGN_TEXT
901copyin_fault:
902	popl	%edi
903	popl	%esi
904	movl	_curpcb,%edx
905	movl	$0,PCB_ONFAULT(%edx)
906	movl	$EFAULT,%eax
907	ret
908
909#if defined(I586_CPU) && NNPX > 0
910ENTRY(i586_copyin)
911	/*
912	 * Duplicated from generic_copyin.  Could be done a bit better.
913	 */
914	movl	_curpcb,%eax
915	movl	$copyin_fault,PCB_ONFAULT(%eax)
916	pushl	%esi
917	pushl	%edi
918	movl	12(%esp),%esi			/* caddr_t from */
919	movl	16(%esp),%edi			/* caddr_t to */
920	movl	20(%esp),%ecx			/* size_t  len */
921
922	/*
923	 * make sure address is valid
924	 */
925	movl	%esi,%edx
926	addl	%ecx,%edx
927	jc	copyin_fault
928	cmpl	$VM_MAXUSER_ADDRESS,%edx
929	ja	copyin_fault
930	/*
931	 * End of duplicated code.
932	 */
933
934	cmpl	$1024,%ecx
935	jb	slow_copyin
936
937	pushl	%ebx			/* XXX prepare for fastmove_fault */
938	pushl	%ecx
939	call	_fastmove
940	addl	$8,%esp
941	jmp	done_copyin
942#endif /* I586_CPU && NNPX > 0 */
943
944#if defined(I586_CPU) && NNPX > 0
945/* fastmove(src, dst, len)
946	src in %esi
947	dst in %edi
948	len in %ecx		XXX changed to on stack for profiling
949	uses %eax and %edx for tmp. storage
950 */
951/* XXX use ENTRY() to get profiling.  fastmove() is actually a non-entry. */
952ENTRY(fastmove)
953	pushl	%ebp
954	movl	%esp,%ebp
955	subl	$PCB_SAVEFPU_SIZE+3*4,%esp
956
957	movl	8(%ebp),%ecx
958	cmpl	$63,%ecx
959	jbe	fastmove_tail
960
961	testl	$7,%esi	/* check if src addr is multiple of 8 */
962	jnz	fastmove_tail
963
964	testl	$7,%edi	/* check if dst addr is multiple of 8 */
965	jnz	fastmove_tail
966
967/* if (npxproc != NULL) { */
968	cmpl	$0,_npxproc
969	je	6f
970/*    fnsave(&curpcb->pcb_savefpu); */
971	movl	_curpcb,%eax
972	fnsave	PCB_SAVEFPU(%eax)
973/*   npxproc = NULL; */
974	movl	$0,_npxproc
975/* } */
9766:
977/* now we own the FPU. */
978
979/*
980 * The process' FP state is saved in the pcb, but if we get
981 * switched, the cpu_switch() will store our FP state in the
982 * pcb.  It should be possible to avoid all the copying for
983 * this, e.g., by setting a flag to tell cpu_switch() to
984 * save the state somewhere else.
985 */
986/* tmp = curpcb->pcb_savefpu; */
987	movl	%ecx,-12(%ebp)
988	movl	%esi,-8(%ebp)
989	movl	%edi,-4(%ebp)
990	movl	%esp,%edi
991	movl	_curpcb,%esi
992	addl	$PCB_SAVEFPU,%esi
993	cld
994	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
995	rep
996	movsl
997	movl	-12(%ebp),%ecx
998	movl	-8(%ebp),%esi
999	movl	-4(%ebp),%edi
1000/* stop_emulating(); */
1001	clts
1002/* npxproc = curproc; */
1003	movl	_curproc,%eax
1004	movl	%eax,_npxproc
1005	movl	_curpcb,%eax
1006	movl	$fastmove_fault,PCB_ONFAULT(%eax)
10074:
1008	movl	%ecx,-12(%ebp)
1009	cmpl	$1792,%ecx
1010	jbe	2f
1011	movl	$1792,%ecx
10122:
1013	subl	%ecx,-12(%ebp)
1014	cmpl	$256,%ecx
1015	jb	5f
1016	movl	%ecx,-8(%ebp)
1017	movl	%esi,-4(%ebp)
1018	ALIGN_TEXT
10193:
1020	movl	0(%esi),%eax
1021	movl	32(%esi),%eax
1022	movl	64(%esi),%eax
1023	movl	96(%esi),%eax
1024	movl	128(%esi),%eax
1025	movl	160(%esi),%eax
1026	movl	192(%esi),%eax
1027	movl	224(%esi),%eax
1028	addl	$256,%esi
1029	subl	$256,%ecx
1030	cmpl	$256,%ecx
1031	jae	3b
1032	movl	-8(%ebp),%ecx
1033	movl	-4(%ebp),%esi
10345:
1035	ALIGN_TEXT
1036fastmove_loop:
1037	fildq	0(%esi)
1038	fildq	8(%esi)
1039	fildq	16(%esi)
1040	fildq	24(%esi)
1041	fildq	32(%esi)
1042	fildq	40(%esi)
1043	fildq	48(%esi)
1044	fildq	56(%esi)
1045	fistpq	56(%edi)
1046	fistpq	48(%edi)
1047	fistpq	40(%edi)
1048	fistpq	32(%edi)
1049	fistpq	24(%edi)
1050	fistpq	16(%edi)
1051	fistpq	8(%edi)
1052	fistpq	0(%edi)
1053	addl	$-64,%ecx
1054	addl	$64,%esi
1055	addl	$64,%edi
1056	cmpl	$63,%ecx
1057	ja	fastmove_loop
1058	movl	-12(%ebp),%eax
1059	addl	%eax,%ecx
1060	cmpl	$64,%ecx
1061	jae	4b
1062
1063/* curpcb->pcb_savefpu = tmp; */
1064	movl	%ecx,-12(%ebp)
1065	movl	%esi,-8(%ebp)
1066	movl	%edi,-4(%ebp)
1067	movl	_curpcb,%edi
1068	addl	$PCB_SAVEFPU,%edi
1069	movl	%esp,%esi
1070	cld
1071	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1072	rep
1073	movsl
1074	movl	-12(%ebp),%ecx
1075	movl	-8(%ebp),%esi
1076	movl	-4(%ebp),%edi
1077
1078/* start_emulating(); */
1079	smsw	%ax
1080	orb	$CR0_TS,%al
1081	lmsw	%ax
1082/* npxproc = NULL; */
1083	movl	$0,_npxproc
1084
1085	ALIGN_TEXT
1086fastmove_tail:
1087	movl	_curpcb,%eax
1088	movl	$fastmove_tail_fault,PCB_ONFAULT(%eax)
1089
1090	movb	%cl,%al
1091	shrl	$2,%ecx				/* copy longword-wise */
1092	cld
1093	rep
1094	movsl
1095	movb	%al,%cl
1096	andb	$3,%cl				/* copy remaining bytes */
1097	rep
1098	movsb
1099
1100	movl	%ebp,%esp
1101	popl	%ebp
1102	ret
1103
1104	ALIGN_TEXT
1105fastmove_fault:
1106	movl	_curpcb,%edi
1107	addl	$PCB_SAVEFPU,%edi
1108	movl	%esp,%esi
1109	cld
1110	movl	$PCB_SAVEFPU_SIZE>>2,%ecx
1111	rep
1112	movsl
1113
1114	smsw	%ax
1115	orb	$CR0_TS,%al
1116	lmsw	%ax
1117	movl	$0,_npxproc
1118
1119fastmove_tail_fault:
1120	movl	%ebp,%esp
1121	popl	%ebp
1122	addl	$8,%esp
1123	popl	%ebx
1124	popl	%edi
1125	popl	%esi
1126	movl	_curpcb,%edx
1127	movl	$0,PCB_ONFAULT(%edx)
1128	movl	$EFAULT,%eax
1129	ret
1130#endif /* I586_CPU && NNPX > 0 */
1131
1132/*
1133 * fu{byte,sword,word} : fetch a byte (sword, word) from user memory
1134 */
1135ENTRY(fuword)
1136	movl	_curpcb,%ecx
1137	movl	$fusufault,PCB_ONFAULT(%ecx)
1138	movl	4(%esp),%edx			/* from */
1139
1140	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
1141	ja	fusufault
1142
1143	movl	(%edx),%eax
1144	movl	$0,PCB_ONFAULT(%ecx)
1145	ret
1146
1147/*
1148 * These two routines are called from the profiling code, potentially
1149 * at interrupt time. If they fail, that's okay, good things will
1150 * happen later. Fail all the time for now - until the trap code is
1151 * able to deal with this.
1152 */
1153ALTENTRY(suswintr)
1154ENTRY(fuswintr)
1155	movl	$-1,%eax
1156	ret
1157
1158ENTRY(fusword)
1159	movl	_curpcb,%ecx
1160	movl	$fusufault,PCB_ONFAULT(%ecx)
1161	movl	4(%esp),%edx
1162
1163	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
1164	ja	fusufault
1165
1166	movzwl	(%edx),%eax
1167	movl	$0,PCB_ONFAULT(%ecx)
1168	ret
1169
1170ENTRY(fubyte)
1171	movl	_curpcb,%ecx
1172	movl	$fusufault,PCB_ONFAULT(%ecx)
1173	movl	4(%esp),%edx
1174
1175	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
1176	ja	fusufault
1177
1178	movzbl	(%edx),%eax
1179	movl	$0,PCB_ONFAULT(%ecx)
1180	ret
1181
1182	ALIGN_TEXT
1183fusufault:
1184	movl	_curpcb,%ecx
1185	xorl	%eax,%eax
1186	movl	%eax,PCB_ONFAULT(%ecx)
1187	decl	%eax
1188	ret
1189
1190/*
1191 * su{byte,sword,word}: write a byte (word, longword) to user memory
1192 */
1193ENTRY(suword)
1194	movl	_curpcb,%ecx
1195	movl	$fusufault,PCB_ONFAULT(%ecx)
1196	movl	4(%esp),%edx
1197
1198#if defined(I386_CPU)
1199
1200#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1201	cmpl	$CPUCLASS_386,_cpu_class
1202	jne	2f				/* we only have to set the right segment selector */
1203#endif /* I486_CPU || I586_CPU || I686_CPU */
1204
1205	/* XXX - page boundary crossing is still not handled */
1206	movl	%edx,%eax
1207	shrl	$IDXSHIFT,%edx
1208	andb	$0xfc,%dl
1209
1210	leal	_PTmap(%edx),%ecx
1211	shrl	$IDXSHIFT,%ecx
1212	andb	$0xfc,%cl
1213	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1214	je	4f
1215	movb	_PTmap(%edx),%dl
1216	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1217	cmpb	$PG_V|PG_RW|PG_U,%dl
1218	je	1f
1219
12204:
1221	/* simulate a trap */
1222	pushl	%eax
1223	call	_trapwrite
1224	popl	%edx				/* remove junk parameter from stack */
1225	testl	%eax,%eax
1226	jnz	fusufault
12271:
1228	movl	4(%esp),%edx
1229#endif
1230
12312:
1232	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
1233	ja	fusufault
1234
1235	movl	8(%esp),%eax
1236	movl	%eax,(%edx)
1237	xorl	%eax,%eax
1238	movl	_curpcb,%ecx
1239	movl	%eax,PCB_ONFAULT(%ecx)
1240	ret
1241
1242ENTRY(susword)
1243	movl	_curpcb,%ecx
1244	movl	$fusufault,PCB_ONFAULT(%ecx)
1245	movl	4(%esp),%edx
1246
1247#if defined(I386_CPU)
1248
1249#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1250	cmpl	$CPUCLASS_386,_cpu_class
1251	jne	2f
1252#endif /* I486_CPU || I586_CPU || I686_CPU */
1253
1254	/* XXX - page boundary crossing is still not handled */
1255	movl	%edx,%eax
1256	shrl	$IDXSHIFT,%edx
1257	andb	$0xfc,%dl
1258
1259	leal	_PTmap(%edx),%ecx
1260	shrl	$IDXSHIFT,%ecx
1261	andb	$0xfc,%cl
1262	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1263	je	4f
1264	movb	_PTmap(%edx),%dl
1265	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1266	cmpb	$PG_V|PG_RW|PG_U,%dl
1267	je	1f
1268
12694:
1270	/* simulate a trap */
1271	pushl	%eax
1272	call	_trapwrite
1273	popl	%edx				/* remove junk parameter from stack */
1274	testl	%eax,%eax
1275	jnz	fusufault
12761:
1277	movl	4(%esp),%edx
1278#endif
1279
12802:
1281	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
1282	ja	fusufault
1283
1284	movw	8(%esp),%ax
1285	movw	%ax,(%edx)
1286	xorl	%eax,%eax
1287	movl	_curpcb,%ecx			/* restore trashed register */
1288	movl	%eax,PCB_ONFAULT(%ecx)
1289	ret
1290
1291ALTENTRY(suibyte)
1292ENTRY(subyte)
1293	movl	_curpcb,%ecx
1294	movl	$fusufault,PCB_ONFAULT(%ecx)
1295	movl	4(%esp),%edx
1296
1297#if defined(I386_CPU)
1298
1299#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
1300	cmpl	$CPUCLASS_386,_cpu_class
1301	jne	2f
1302#endif /* I486_CPU || I586_CPU || I686_CPU */
1303
1304	movl	%edx,%eax
1305	shrl	$IDXSHIFT,%edx
1306	andb	$0xfc,%dl
1307
1308	leal	_PTmap(%edx),%ecx
1309	shrl	$IDXSHIFT,%ecx
1310	andb	$0xfc,%cl
1311	testb	$PG_V,_PTmap(%ecx)		/* PTE page must be valid */
1312	je	4f
1313	movb	_PTmap(%edx),%dl
1314	andb	$PG_V|PG_RW|PG_U,%dl		/* page must be valid and user writable */
1315	cmpb	$PG_V|PG_RW|PG_U,%dl
1316	je	1f
1317
13184:
1319	/* simulate a trap */
1320	pushl	%eax
1321	call	_trapwrite
1322	popl	%edx				/* remove junk parameter from stack */
1323	testl	%eax,%eax
1324	jnz	fusufault
13251:
1326	movl	4(%esp),%edx
1327#endif
1328
13292:
1330	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
1331	ja	fusufault
1332
1333	movb	8(%esp),%al
1334	movb	%al,(%edx)
1335	xorl	%eax,%eax
1336	movl	_curpcb,%ecx			/* restore trashed register */
1337	movl	%eax,PCB_ONFAULT(%ecx)
1338	ret
1339
1340/*
1341 * copyinstr(from, to, maxlen, int *lencopied)
1342 *	copy a string from from to to, stop when a 0 character is reached.
1343 *	return ENAMETOOLONG if string is longer than maxlen, and
1344 *	EFAULT on protection violations. If lencopied is non-zero,
1345 *	return the actual length in *lencopied.
1346 */
1347ENTRY(copyinstr)
1348	pushl	%esi
1349	pushl	%edi
1350	movl	_curpcb,%ecx
1351	movl	$cpystrflt,PCB_ONFAULT(%ecx)
1352
1353	movl	12(%esp),%esi			/* %esi = from */
1354	movl	16(%esp),%edi			/* %edi = to */
1355	movl	20(%esp),%edx			/* %edx = maxlen */
1356
1357	movl	$VM_MAXUSER_ADDRESS,%eax
1358
1359	/* make sure 'from' is within bounds */
1360	subl	%esi,%eax
1361	jbe	cpystrflt
1362
1363	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1364	cmpl	%edx,%eax
1365	jae	1f
1366	movl	%eax,%edx
1367	movl	%eax,20(%esp)
13681:
1369	incl	%edx
1370	cld
1371
13722:
1373	decl	%edx
1374	jz	3f
1375
1376	lodsb
1377	stosb
1378	orb	%al,%al
1379	jnz	2b
1380
1381	/* Success -- 0 byte reached */
1382	decl	%edx
1383	xorl	%eax,%eax
1384	jmp	cpystrflt_x
13853:
1386	/* edx is zero - return ENAMETOOLONG or EFAULT */
1387	cmpl	$VM_MAXUSER_ADDRESS,%esi
1388	jae	cpystrflt
13894:
1390	movl	$ENAMETOOLONG,%eax
1391	jmp	cpystrflt_x
1392
1393cpystrflt:
1394	movl	$EFAULT,%eax
1395
1396cpystrflt_x:
1397	/* set *lencopied and return %eax */
1398	movl	_curpcb,%ecx
1399	movl	$0,PCB_ONFAULT(%ecx)
1400	movl	20(%esp),%ecx
1401	subl	%edx,%ecx
1402	movl	24(%esp),%edx
1403	testl	%edx,%edx
1404	jz	1f
1405	movl	%ecx,(%edx)
14061:
1407	popl	%edi
1408	popl	%esi
1409	ret
1410
1411
1412/*
1413 * copystr(from, to, maxlen, int *lencopied)
1414 */
1415ENTRY(copystr)
1416	pushl	%esi
1417	pushl	%edi
1418
1419	movl	12(%esp),%esi			/* %esi = from */
1420	movl	16(%esp),%edi			/* %edi = to */
1421	movl	20(%esp),%edx			/* %edx = maxlen */
1422	incl	%edx
1423	cld
14241:
1425	decl	%edx
1426	jz	4f
1427	lodsb
1428	stosb
1429	orb	%al,%al
1430	jnz	1b
1431
1432	/* Success -- 0 byte reached */
1433	decl	%edx
1434	xorl	%eax,%eax
1435	jmp	6f
14364:
1437	/* edx is zero -- return ENAMETOOLONG */
1438	movl	$ENAMETOOLONG,%eax
1439
14406:
1441	/* set *lencopied and return %eax */
1442	movl	20(%esp),%ecx
1443	subl	%edx,%ecx
1444	movl	24(%esp),%edx
1445	testl	%edx,%edx
1446	jz	7f
1447	movl	%ecx,(%edx)
14487:
1449	popl	%edi
1450	popl	%esi
1451	ret
1452
1453ENTRY(bcmp)
1454	pushl	%edi
1455	pushl	%esi
1456	movl	12(%esp),%edi
1457	movl	16(%esp),%esi
1458	movl	20(%esp),%edx
1459	xorl	%eax,%eax
1460
1461	movl	%edx,%ecx
1462	shrl	$2,%ecx
1463	cld					/* compare forwards */
1464	repe
1465	cmpsl
1466	jne	1f
1467
1468	movl	%edx,%ecx
1469	andl	$3,%ecx
1470	repe
1471	cmpsb
1472	je	2f
14731:
1474	incl	%eax
14752:
1476	popl	%esi
1477	popl	%edi
1478	ret
1479
1480
1481/*
1482 * Handling of special 386 registers and descriptor tables etc
1483 */
1484/* void lgdt(struct region_descriptor *rdp); */
1485ENTRY(lgdt)
1486	/* reload the descriptor table */
1487	movl	4(%esp),%eax
1488	lgdt	(%eax)
1489
1490	/* flush the prefetch q */
1491	jmp	1f
1492	nop
14931:
1494	/* reload "stale" selectors */
1495	movl	$KDSEL,%eax
1496	movl	%ax,%ds
1497	movl	%ax,%es
1498	movl	%ax,%fs
1499	movl	%ax,%gs
1500	movl	%ax,%ss
1501
1502	/* reload code selector by turning return into intersegmental return */
1503	movl	(%esp),%eax
1504	pushl	%eax
1505	movl	$KCSEL,4(%esp)
1506	lret
1507
1508/*
1509 * void lidt(struct region_descriptor *rdp);
1510 */
1511ENTRY(lidt)
1512	movl	4(%esp),%eax
1513	lidt	(%eax)
1514	ret
1515
1516/*
1517 * void lldt(u_short sel)
1518 */
1519ENTRY(lldt)
1520	lldt	4(%esp)
1521	ret
1522
1523/*
1524 * void ltr(u_short sel)
1525 */
1526ENTRY(ltr)
1527	ltr	4(%esp)
1528	ret
1529
1530/* ssdtosd(*ssdp,*sdp) */
1531ENTRY(ssdtosd)
1532	pushl	%ebx
1533	movl	8(%esp),%ecx
1534	movl	8(%ecx),%ebx
1535	shll	$16,%ebx
1536	movl	(%ecx),%edx
1537	roll	$16,%edx
1538	movb	%dh,%bl
1539	movb	%dl,%bh
1540	rorl	$8,%ebx
1541	movl	4(%ecx),%eax
1542	movw	%ax,%dx
1543	andl	$0xf0000,%eax
1544	orl	%eax,%ebx
1545	movl	12(%esp),%ecx
1546	movl	%edx,(%ecx)
1547	movl	%ebx,4(%ecx)
1548	popl	%ebx
1549	ret
1550
1551/* load_cr0(cr0) */
1552ENTRY(load_cr0)
1553	movl	4(%esp),%eax
1554	movl	%eax,%cr0
1555	ret
1556
1557/* rcr0() */
1558ENTRY(rcr0)
1559	movl	%cr0,%eax
1560	ret
1561
1562/* rcr3() */
1563ENTRY(rcr3)
1564	movl	%cr3,%eax
1565	ret
1566
1567/* void load_cr3(caddr_t cr3) */
1568ENTRY(load_cr3)
1569#if defined(SWTCH_OPTIM_STATS)
1570	incl	_tlb_flush_count
1571#endif
1572	movl	4(%esp),%eax
1573	movl	%eax,%cr3
1574	ret
1575
1576/* rcr4() */
1577ENTRY(rcr4)
1578	movl	%cr4,%eax
1579	ret
1580
1581/* void load_cr4(caddr_t cr4) */
1582ENTRY(load_cr4)
1583	movl	4(%esp),%eax
1584	movl	%eax,%cr4
1585	ret
1586
1587/*****************************************************************************/
1588/* setjump, longjump                                                         */
1589/*****************************************************************************/
1590
1591ENTRY(setjmp)
1592	movl	4(%esp),%eax
1593	movl	%ebx,(%eax)			/* save ebx */
1594	movl	%esp,4(%eax)			/* save esp */
1595	movl	%ebp,8(%eax)			/* save ebp */
1596	movl	%esi,12(%eax)			/* save esi */
1597	movl	%edi,16(%eax)			/* save edi */
1598	movl	(%esp),%edx			/* get rta */
1599	movl	%edx,20(%eax)			/* save eip */
1600	xorl	%eax,%eax			/* return(0); */
1601	ret
1602
1603ENTRY(longjmp)
1604	movl	4(%esp),%eax
1605	movl	(%eax),%ebx			/* restore ebx */
1606	movl	4(%eax),%esp			/* restore esp */
1607	movl	8(%eax),%ebp			/* restore ebp */
1608	movl	12(%eax),%esi			/* restore esi */
1609	movl	16(%eax),%edi			/* restore edi */
1610	movl	20(%eax),%edx			/* get rta */
1611	movl	%edx,(%esp)			/* put in return frame */
1612	xorl	%eax,%eax			/* return(1); */
1613	incl	%eax
1614	ret
1615
1616/*
1617 * Here for doing BB-profiling (gcc -a).
1618 * We rely on the "bbset" instead, but need a dummy function.
1619 */
1620NON_GPROF_ENTRY(__bb_init_func)
1621	movl	4(%esp),%eax
1622	movl	$1,(%eax)
1623	.byte	0xc3				/* avoid macro for `ret' */
1624