subr_trap.c revision 981
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * the University of Utah, and William Jolitz.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
37 *	$Id: trap.c,v 1.14 1994/01/14 16:23:41 davidg Exp $
38 */
39
40/*
41 * 386 Trap and System call handleing
42 */
43
44#include "npx.h"
45#include "machine/cpu.h"
46#include "machine/psl.h"
47#include "machine/reg.h"
48
49#include "param.h"
50#include "systm.h"
51#include "proc.h"
52#include "user.h"
53#include "acct.h"
54#include "kernel.h"
55#ifdef KTRACE
56#include "ktrace.h"
57#endif
58
59#include "vm/vm_param.h"
60#include "vm/pmap.h"
61#include "vm/vm_map.h"
62#include "vm/vm_user.h"
63#include "vm/vm_page.h"
64#include "sys/vmmeter.h"
65
66#include "machine/trap.h"
67
68#ifdef	__GNUC__
69
70/*
71 * The "r" contraint could be "rm" except for fatal bugs in gas.  As usual,
72 * we omit the size from the mov instruction to avoid nonfatal bugs in gas.
73 */
74#define	read_gs()	({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; })
75#define	write_gs(newgs)	__asm("mov %0,%%gs" : : "r" ((u_short) newgs))
76
77#else	/* not __GNUC__ */
78
79u_short	read_gs		__P((void));
80void	write_gs	__P((/* promoted u_short */ int gs));
81
82#endif	/* __GNUC__ */
83
84struct	sysent sysent[];
85int	nsysent;
86extern short cpl;
87
88#define MAX_TRAP_MSG		27
89char *trap_msg[] = {
90	"reserved addressing fault",		/*  0 T_RESADFLT */
91	"privileged instruction fault",		/*  1 T_PRIVINFLT */
92	"reserved operand fault",		/*  2 T_RESOPFLT */
93	"breakpoint instruction fault",		/*  3 T_BPTFLT */
94	"",					/*  4 unused */
95	"system call trap",			/*  5 T_SYSCALL */
96	"arithmetic trap",			/*  6 T_ARITHTRAP */
97	"system forced exception",		/*  7 T_ASTFLT */
98	"segmentation (limit) fault",		/*  8 T_SEGFLT */
99	"protection fault",			/*  9 T_PROTFLT */
100	"trace trap",				/* 10 T_TRCTRAP */
101	"",					/* 11 unused */
102	"page fault",				/* 12 T_PAGEFLT */
103	"page table fault",			/* 13 T_TABLEFLT */
104	"alignment fault",			/* 14 T_ALIGNFLT */
105	"kernel stack pointer not valid",	/* 15 T_KSPNOTVAL */
106	"bus error",				/* 16 T_BUSERR */
107	"kernel debugger fault",		/* 17 T_KDBTRAP */
108	"integer divide fault",			/* 18 T_DIVIDE */
109	"non-maskable interrupt trap",		/* 19 T_NMI */
110	"overflow trap",			/* 20 T_OFLOW */
111	"FPU bounds check fault",		/* 21 T_BOUND */
112	"FPU device not available",		/* 22 T_DNA */
113	"double fault",				/* 23 T_DOUBLEFLT */
114	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
115	"invalid TSS fault",			/* 25 T_TSSFLT */
116	"segment not present fault",		/* 26 T_SEGNPFLT */
117	"stack fault",				/* 27 T_STKFLT */
118};
119
120#define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v)
121
122/*
123 * trap(frame):
124 *	Exception, fault, and trap interface to BSD kernel. This
125 * common code is called from assembly language IDT gate entry
126 * routines that prepare a suitable stack frame, and restore this
127 * frame after the exception has been processed. Note that the
128 * effect is as if the arguments were passed call by reference.
129 */
130
131/*ARGSUSED*/
132void
133trap(frame)
134	struct trapframe frame;
135{
136	register int i;
137	register struct proc *p = curproc;
138	struct timeval syst;
139	int ucode, type, code, eva;
140
141	frame.tf_eflags &= ~PSL_NT;	/* clear nested trap XXX */
142	type = frame.tf_trapno;
143#include "ddb.h"
144#if NDDB > 0
145	if (curpcb && curpcb->pcb_onfault) {
146		if (frame.tf_trapno == T_BPTFLT
147		    || frame.tf_trapno == T_TRCTRAP)
148			if (kdb_trap (type, 0, &frame))
149				return;
150	}
151#endif
152
153/*pg("trap type %d code = %x eip = %x cs = %x eva = %x esp %x",
154			frame.tf_trapno, frame.tf_err, frame.tf_eip,
155			frame.tf_cs, rcr2(), frame.tf_esp);*/
156	if (curpcb == 0 || curproc == 0)
157		goto skiptoswitch;
158	if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) {
159		extern int _udatasel;
160
161		if (read_gs() != (u_short) _udatasel)
162			/*
163			 * Some user has corrupted %gs but we depend on it in
164			 * copyout() etc.  Fix it up and retry.
165			 *
166			 * (We don't preserve %fs or %gs, so users can change
167			 * them to either _ucodesel, _udatasel or a not-present
168			 * selector, possibly ORed with 0 to 3, making them
169			 * volatile for other users.  Not preserving them saves
170			 * time and doesn't lose functionality or open security
171			 * holes.)
172			 */
173			write_gs(_udatasel);
174		else
175copyfault:
176			frame.tf_eip = (int)curpcb->pcb_onfault;
177		return;
178	}
179
180	syst = p->p_stime;
181	if (ISPL(frame.tf_cs) == SEL_UPL) {
182		type |= T_USER;
183		p->p_regs = (int *)&frame;
184	}
185
186skiptoswitch:
187	ucode=0;
188	eva = rcr2();
189	code = frame.tf_err;
190
191	if ((type & ~T_USER) == T_PAGEFLT)
192		goto pfault;
193
194	switch (type) {
195
196	default:
197	we_re_toast:
198#ifdef KDB
199		if (kdb_trap(&psl))
200			return;
201#endif
202#if NDDB > 0
203		if (kdb_trap (type, 0, &frame))
204			return;
205#endif
206
207		if ((type & ~T_USER) <= MAX_TRAP_MSG)
208			printf("\n\nFatal trap %d: %s while in %s mode\n",
209				type & ~T_USER, trap_msg[type & ~T_USER],
210				(type & T_USER) ? "user" : "kernel");
211
212		printf("trap type = %d, code = %x\n     eip = %x, cs = %x, eflags = %x, ",
213			frame.tf_trapno, frame.tf_err, frame.tf_eip,
214			frame.tf_cs, frame.tf_eflags);
215		eva = rcr2();
216		printf("cr2 = %x, current priority = %x\n", eva, cpl);
217
218		type &= ~T_USER;
219		if (type <= MAX_TRAP_MSG)
220			panic(trap_msg[type]);
221		else
222			panic("unknown/reserved trap");
223
224		/*NOTREACHED*/
225
226	case T_SEGNPFLT|T_USER:
227	case T_STKFLT|T_USER:
228	case T_PROTFLT|T_USER:		/* protection fault */
229		ucode = code + BUS_SEGM_FAULT ;
230		i = SIGBUS;
231		break;
232
233	case T_PRIVINFLT|T_USER:	/* privileged instruction fault */
234	case T_RESADFLT|T_USER:		/* reserved addressing fault */
235	case T_RESOPFLT|T_USER:		/* reserved operand fault */
236	case T_FPOPFLT|T_USER:		/* coprocessor operand fault */
237		ucode = type &~ T_USER;
238		i = SIGILL;
239		break;
240
241	case T_ASTFLT|T_USER:		/* Allow process switch */
242		astoff();
243		cnt.v_soft++;
244		if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) {
245			addupc(frame.tf_eip, &p->p_stats->p_prof, 1);
246			p->p_flag &= ~SOWEUPC;
247		}
248		goto out;
249
250	case T_DNA|T_USER:
251#if NNPX > 0
252		/* if a transparent fault (due to context switch "late") */
253		if (npxdna()) return;
254#endif	/* NNPX > 0 */
255#ifdef	MATH_EMULATE
256		i = math_emulate(&frame);
257		if (i == 0) return;
258#else	/* MATH_EMULTATE */
259		panic("trap: math emulation necessary!");
260#endif	/* MATH_EMULTATE */
261		ucode = FPE_FPU_NP_TRAP;
262		break;
263
264	case T_BOUND|T_USER:
265		ucode = FPE_SUBRNG_TRAP;
266		i = SIGFPE;
267		break;
268
269	case T_OFLOW|T_USER:
270		ucode = FPE_INTOVF_TRAP;
271		i = SIGFPE;
272		break;
273
274	case T_DIVIDE|T_USER:
275		ucode = FPE_INTDIV_TRAP;
276		i = SIGFPE;
277		break;
278
279	case T_ARITHTRAP|T_USER:
280		ucode = code;
281		i = SIGFPE;
282		break;
283
284	case T_PAGEFLT:			/* allow page faults in kernel mode */
285#if 0
286		/* XXX - check only applies to 386's and 486's with WP off */
287		if (code & PGEX_P) goto we_re_toast;
288#endif
289
290	pfault:
291		/* fall into */
292	case T_PAGEFLT|T_USER:		/* page fault */
293	    {
294		register vm_offset_t va;
295		register struct vmspace *vm;
296		register vm_map_t map;
297		int rv=0;
298		vm_prot_t ftype;
299		extern vm_map_t kernel_map;
300		unsigned nss,v;
301		int oldflags;
302
303		va = trunc_page((vm_offset_t)eva);
304		/*
305		 * It is only a kernel address space fault iff:
306		 * 	1. (type & T_USER) == 0  and
307		 * 	2. pcb_onfault not set or
308		 *	3. pcb_onfault set but supervisor space fault
309		 * The last can occur during an exec() copyin where the
310		 * argument space is lazy-allocated.
311		 */
312
313		if ((p == 0) || (type == T_PAGEFLT && va >= KERNBASE)) {
314			vm = 0;
315			map = kernel_map;
316		} else {
317			vm = p->p_vmspace;
318			map = &vm->vm_map;
319		}
320
321		if (code & PGEX_W)
322			ftype = VM_PROT_READ | VM_PROT_WRITE;
323		else
324			ftype = VM_PROT_READ;
325
326#ifdef DEBUG
327		if (map == kernel_map && va == 0) {
328			printf("trap: bad kernel access at %x\n", va);
329			goto we_re_toast;
330		}
331#endif
332
333/*
334 * keep swapout from messing with us during this
335 * critical time.
336 */
337		oldflags = p->p_flag;
338		if (map != kernel_map) {
339				p->p_flag |= SLOCK;
340		}
341		/*
342		 * XXX: rude hack to make stack limits "work"
343		 */
344
345		nss = 0;
346		if (map != kernel_map && (caddr_t)va >= vm->vm_maxsaddr
347			&& (caddr_t)va < (caddr_t)USRSTACK) {
348			caddr_t v;
349			nss = roundup(USRSTACK - (unsigned)va, PAGE_SIZE);
350			if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur) {
351				rv = KERN_FAILURE;
352				p->p_flag &= ~SLOCK;
353				p->p_flag |= (oldflags & SLOCK);
354				goto nogo;
355			}
356
357			if (vm->vm_ssize && roundup(vm->vm_ssize << PGSHIFT,
358			    DFLSSIZ) < nss) {
359				int grow_amount;
360				/*
361				 * If necessary, grow the VM that the stack occupies
362				 * to allow for the rlimit. This allows us to not have
363				 * to allocate all of the VM up-front in execve (which
364				 * is expensive).
365				 * Grow the VM by the amount requested rounded up to
366				 * the nearest DFLSSIZ to provide for some hysteresis.
367				 */
368				grow_amount = roundup((nss - (vm->vm_ssize << PGSHIFT)), DFLSSIZ);
369				v = (char *)USRSTACK - roundup(vm->vm_ssize << PGSHIFT,
370				    DFLSSIZ) - grow_amount;
371				/*
372				 * If there isn't enough room to extend by DFLSSIZ, then
373				 * just extend to the maximum size
374				 */
375				if (v < vm->vm_maxsaddr) {
376					v = vm->vm_maxsaddr;
377					grow_amount = MAXSSIZ - (vm->vm_ssize << PGSHIFT);
378				}
379				if (vm_allocate(&vm->vm_map, (vm_offset_t *)&v,
380						grow_amount, FALSE) !=
381				    KERN_SUCCESS) {
382					p->p_flag &= ~SLOCK;
383					p->p_flag |= (oldflags & SLOCK);
384					goto nogo;
385				}
386			}
387		}
388
389
390		/* check if page table is mapped, if not, fault it first */
391#define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v)
392		{
393
394			if (map != kernel_map) {
395				vm_offset_t pa;
396				vm_offset_t v = (vm_offset_t) vtopte(va);
397
398				/* Fault the pte only if needed: */
399				*(volatile char *)v += 0;
400
401				/* Get the physical address: */
402				pa = pmap_extract(vm_map_pmap(map), v);
403
404				/* And wire the pte page at system vm level: */
405				vm_page_wire(PHYS_TO_VM_PAGE(pa));
406
407				/* Fault in the user page: */
408				rv = vm_fault(map, va, ftype, FALSE);
409
410				/* Unwire the pte page: */
411				vm_page_unwire(PHYS_TO_VM_PAGE(pa));
412
413			} else {
414				/*
415				 * Since we know that kernel virtual address addresses
416				 * always have pte pages mapped, we just have to fault
417				 * the page.
418				 */
419				rv = vm_fault(map, va, ftype, FALSE);
420			}
421
422		}
423		if (map != kernel_map) {
424			p->p_flag &= ~SLOCK;
425			p->p_flag |= (oldflags & SLOCK);
426		}
427		if (rv == KERN_SUCCESS) {
428			/*
429			 * XXX: continuation of rude stack hack
430			 */
431			nss = nss >> PGSHIFT;
432			if (vm && nss > vm->vm_ssize) {
433				vm->vm_ssize = nss;
434			}
435 			/*
436 			 * va could be a page table address, if the fault
437			 */
438			if (type == T_PAGEFLT)
439				return;
440			goto out;
441		}
442nogo:
443		if (type == T_PAGEFLT) {
444			if (curpcb->pcb_onfault)
445				goto copyfault;
446			printf("vm_fault(%x, %x, %x, 0) -> %x\n",
447			       map, va, ftype, rv);
448			printf("  type %x, code %x\n",
449			       type, code);
450			goto we_re_toast;
451		}
452		i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
453
454		/* kludge to pass faulting virtual address to sendsig */
455		ucode = type &~ T_USER;
456		frame.tf_err = eva;
457
458		break;
459	    }
460
461#if NDDB == 0
462	case T_TRCTRAP:	 /* trace trap -- someone single stepping lcall's */
463		frame.tf_eflags &= ~PSL_T;
464
465			/* Q: how do we turn it on again? */
466		return;
467#endif
468
469	case T_BPTFLT|T_USER:		/* bpt instruction fault */
470	case T_TRCTRAP|T_USER:		/* trace trap */
471		frame.tf_eflags &= ~PSL_T;
472		i = SIGTRAP;
473		break;
474
475#include "isa.h"
476#if	NISA > 0
477	case T_NMI:
478	case T_NMI|T_USER:
479#if NDDB > 0
480		/* NMI can be hooked up to a pushbutton for debugging */
481		printf ("NMI ... going to debugger\n");
482		if (kdb_trap (type, 0, &frame))
483			return;
484#endif
485		/* machine/parity/power fail/"kitchen sink" faults */
486		if (isa_nmi(code) == 0) return;
487		else goto we_re_toast;
488#endif
489	}
490
491	trapsignal(p, i, ucode);
492	if ((type & T_USER) == 0)
493		return;
494out:
495	while (i = CURSIG(p))
496		psig(i);
497	p->p_pri = p->p_usrpri;
498	if (want_resched) {
499		int s;
500		/*
501		 * Since we are curproc, clock will normally just change
502		 * our priority without moving us from one queue to another
503		 * (since the running process is not on a queue.)
504		 * If that happened after we setrq ourselves but before we
505		 * swtch()'ed, we might not be on the queue indicated by
506		 * our priority.
507		 */
508		s = splclock();
509		setrq(p);
510		p->p_stats->p_ru.ru_nivcsw++;
511		swtch();
512		splx(s);
513		while (i = CURSIG(p))
514			psig(i);
515	}
516	if (p->p_stats->p_prof.pr_scale) {
517		int ticks;
518		struct timeval *tv = &p->p_stime;
519
520		ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
521			(tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
522		if (ticks) {
523#ifdef PROFTIMER
524			extern int profscale;
525			addupc(frame.tf_eip, &p->p_stats->p_prof,
526			    ticks * profscale);
527#else
528			addupc(frame.tf_eip, &p->p_stats->p_prof, ticks);
529#endif
530		}
531	}
532	curpri = p->p_pri;
533}
534
535/*
536 * Compensate for 386 brain damage (missing URKR).
537 * This is a little simpler than the pagefault handler in trap() because
538 * it the page tables have already been faulted in and high addresses
539 * are thrown out early for other reasons.
540 */
541int trapwrite(addr)
542	unsigned addr;
543{
544	unsigned nss;
545	struct proc *p;
546	vm_offset_t va;
547	struct vmspace *vm;
548	int oldflags;
549	int rv;
550
551	va = trunc_page((vm_offset_t)addr);
552	/*
553	 * XXX - MAX is END.  Changed > to >= for temp. fix.
554	 */
555	if (va >= VM_MAXUSER_ADDRESS)
556		return (1);
557	/*
558	 * XXX: rude stack hack adapted from trap().
559	 */
560	nss = 0;
561	p = curproc;
562	vm = p->p_vmspace;
563
564	oldflags = p->p_flag;
565	p->p_flag |= SLOCK;
566
567	if ((caddr_t)va >= vm->vm_maxsaddr
568	    && (caddr_t)va < (caddr_t)USRSTACK) {
569		nss = roundup(((unsigned)USRSTACK - (unsigned)va), PAGE_SIZE);
570		if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur) {
571			p->p_flag &= ~SLOCK;
572			p->p_flag |= (oldflags & SLOCK);
573			return (1);
574		}
575
576		if (vm->vm_ssize && roundup(vm->vm_ssize << PGSHIFT,
577			DFLSSIZ) < nss) {
578			caddr_t v;
579			int grow_amount;
580			/*
581			 * If necessary, grow the VM that the stack occupies
582			 * to allow for the rlimit. This allows us to not have
583			 * to allocate all of the VM up-front in execve (which
584			 * is expensive).
585			 * Grow the VM by the amount requested rounded up to
586			 * the nearest DFLSSIZ to provide for some hysteresis.
587			 */
588			grow_amount = roundup((nss - (vm->vm_ssize << PGSHIFT)), DFLSSIZ);
589			v = (char *)USRSTACK - roundup(vm->vm_ssize << PGSHIFT, DFLSSIZ) -
590				grow_amount;
591			/*
592			 * If there isn't enough room to extend by DFLSSIZ, then
593			 * just extend to the maximum size
594			 */
595			if (v < vm->vm_maxsaddr) {
596				v = vm->vm_maxsaddr;
597				grow_amount = MAXSSIZ - (vm->vm_ssize << PGSHIFT);
598			}
599			if (vm_allocate(&vm->vm_map, (vm_offset_t *)&v,
600					grow_amount, FALSE)
601			    != KERN_SUCCESS) {
602				p->p_flag &= ~SLOCK;
603				p->p_flag |= (oldflags & SLOCK);
604				return(1);
605			}
606				printf("new stack growth: %lx, %d\n", v, grow_amount);
607		}
608	}
609
610
611	{
612		vm_offset_t v;
613		v = trunc_page(vtopte(va));
614		/*
615		 * wire the pte page
616		 */
617		if (va < USRSTACK) {
618			vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE);
619		}
620		/*
621		 * fault the data page
622		 */
623		rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE);
624		/*
625		 * unwire the pte page
626		 */
627		if (va < USRSTACK) {
628			vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE);
629		}
630	}
631	p->p_flag &= ~SLOCK;
632	p->p_flag |= (oldflags & SLOCK);
633
634	if (rv != KERN_SUCCESS)
635		return 1;
636	/*
637	 * XXX: continuation of rude stack hack
638	 */
639	nss >>= PGSHIFT;
640	if (nss > vm->vm_ssize) {
641		vm->vm_ssize = nss;
642	}
643	return (0);
644}
645
646/*
647 * syscall(frame):
648 *	System call request from POSIX system call gate interface to kernel.
649 * Like trap(), argument is call by reference.
650 */
651/*ARGSUSED*/
652void
653syscall(frame)
654	volatile struct trapframe frame;
655{
656	register int *locr0 = ((int *)&frame);
657	register caddr_t params;
658	register int i;
659	register struct sysent *callp;
660	register struct proc *p = curproc;
661	struct timeval syst;
662	int error, opc;
663	int args[8], rval[2];
664	int code;
665
666#ifdef lint
667	r0 = 0; r0 = r0; r1 = 0; r1 = r1;
668#endif
669	syst = p->p_stime;
670	if (ISPL(frame.tf_cs) != SEL_UPL)
671		panic("syscall");
672
673	code = frame.tf_eax;
674	p->p_regs = (int *)&frame;
675	params = (caddr_t)frame.tf_esp + sizeof (int) ;
676
677	/*
678	 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always.
679	 */
680	opc = frame.tf_eip - 7;
681	if (code == 0) {
682		code = fuword(params);
683		params += sizeof (int);
684	}
685	if (code < 0 || code >= nsysent)
686		callp = &sysent[0];
687	else
688		callp = &sysent[code];
689
690	if ((i = callp->sy_narg * sizeof (int)) &&
691	    (error = copyin(params, (caddr_t)args, (u_int)i))) {
692		frame.tf_eax = error;
693		frame.tf_eflags |= PSL_C;	/* carry bit */
694#ifdef KTRACE
695		if (KTRPOINT(p, KTR_SYSCALL))
696			ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
697#endif
698		goto done;
699	}
700#ifdef KTRACE
701	if (KTRPOINT(p, KTR_SYSCALL))
702		ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
703#endif
704	rval[0] = 0;
705	rval[1] = frame.tf_edx;
706/*pg("%d. s %d\n", p->p_pid, code);*/
707	error = (*callp->sy_call)(p, args, rval);
708	if (error == ERESTART)
709		frame.tf_eip = opc;
710	else if (error != EJUSTRETURN) {
711		if (error) {
712/*pg("error %d", error);*/
713			frame.tf_eax = error;
714			frame.tf_eflags |= PSL_C;	/* carry bit */
715		} else {
716			frame.tf_eax = rval[0];
717			frame.tf_edx = rval[1];
718			frame.tf_eflags &= ~PSL_C;	/* carry bit */
719		}
720	}
721	/* else if (error == EJUSTRETURN) */
722		/* nothing to do */
723done:
724	/*
725	 * Reinitialize proc pointer `p' as it may be different
726	 * if this is a child returning from fork syscall.
727	 */
728	p = curproc;
729	while (i = CURSIG(p))
730		psig(i);
731	p->p_pri = p->p_usrpri;
732	if (want_resched) {
733		int s;
734		/*
735		 * Since we are curproc, clock will normally just change
736		 * our priority without moving us from one queue to another
737		 * (since the running process is not on a queue.)
738		 * If that happened after we setrq ourselves but before we
739		 * swtch()'ed, we might not be on the queue indicated by
740		 * our priority.
741		 */
742		s = splclock();
743		setrq(p);
744		p->p_stats->p_ru.ru_nivcsw++;
745		swtch();
746		splx(s);
747		while (i = CURSIG(p))
748			psig(i);
749	}
750	if (p->p_stats->p_prof.pr_scale) {
751		int ticks;
752		struct timeval *tv = &p->p_stime;
753
754		ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
755			(tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
756		if (ticks) {
757#ifdef PROFTIMER
758			extern int profscale;
759			addupc(frame.tf_eip, &p->p_stats->p_prof,
760			    ticks * profscale);
761#else
762			addupc(frame.tf_eip, &p->p_stats->p_prof, ticks);
763#endif
764		}
765	}
766	curpri = p->p_pri;
767#ifdef KTRACE
768	if (KTRPOINT(p, KTR_SYSRET))
769		ktrsysret(p->p_tracep, code, error, rval[0]);
770#endif
771#ifdef	DIAGNOSTICx
772{ extern int _udatasel, _ucodesel;
773	if (frame.tf_ss != _udatasel)
774		printf("ss %x call %d\n", frame.tf_ss, code);
775	if ((frame.tf_cs&0xffff) != _ucodesel)
776		printf("cs %x call %d\n", frame.tf_cs, code);
777	if (frame.tf_eip > VM_MAXUSER_ADDRESS) {
778		printf("eip %x call %d\n", frame.tf_eip, code);
779		frame.tf_eip = 0;
780	}
781}
782#endif
783}
784