subr_syscall.c revision 974
1/*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * the University of Utah, and William Jolitz.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
37 *	$Id: trap.c,v 1.13 1994/01/03 07:55:24 davidg Exp $
38 */
39
40/*
41 * 386 Trap and System call handleing
42 */
43
44#include "npx.h"
45#include "machine/cpu.h"
46#include "machine/psl.h"
47#include "machine/reg.h"
48
49#include "param.h"
50#include "systm.h"
51#include "proc.h"
52#include "user.h"
53#include "acct.h"
54#include "kernel.h"
55#ifdef KTRACE
56#include "ktrace.h"
57#endif
58
59#include "vm/vm_param.h"
60#include "vm/pmap.h"
61#include "vm/vm_map.h"
62#include "vm/vm_user.h"
63#include "vm/vm_page.h"
64#include "sys/vmmeter.h"
65
66#include "machine/trap.h"
67
68#ifdef	__GNUC__
69
70/*
71 * The "r" contraint could be "rm" except for fatal bugs in gas.  As usual,
72 * we omit the size from the mov instruction to avoid nonfatal bugs in gas.
73 */
74#define	read_gs()	({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; })
75#define	write_gs(newgs)	__asm("mov %0,%%gs" : : "r" ((u_short) newgs))
76
77#else	/* not __GNUC__ */
78
79u_short	read_gs		__P((void));
80void	write_gs	__P((/* promoted u_short */ int gs));
81
82#endif	/* __GNUC__ */
83
84struct	sysent sysent[];
85int	nsysent;
86extern short cpl;
87
88#define MAX_TRAP_MSG		27
89char *trap_msg[] = {
90	"reserved addressing fault",		/*  0 T_RESADFLT */
91	"privileged instruction fault",		/*  1 T_PRIVINFLT */
92	"reserved operand fault",		/*  2 T_RESOPFLT */
93	"breakpoint instruction fault",		/*  3 T_BPTFLT */
94	"",					/*  4 unused */
95	"system call trap",			/*  5 T_SYSCALL */
96	"arithmetic trap",			/*  6 T_ARITHTRAP */
97	"system forced exception",		/*  7 T_ASTFLT */
98	"segmentation (limit) fault",		/*  8 T_SEGFLT */
99	"protection fault",			/*  9 T_PROTFLT */
100	"trace trap",				/* 10 T_TRCTRAP */
101	"",					/* 11 unused */
102	"page fault",				/* 12 T_PAGEFLT */
103	"page table fault",			/* 13 T_TABLEFLT */
104	"alignment fault",			/* 14 T_ALIGNFLT */
105	"kernel stack pointer not valid",	/* 15 T_KSPNOTVAL */
106	"bus error",				/* 16 T_BUSERR */
107	"kernel debugger fault",		/* 17 T_KDBTRAP */
108	"integer divide fault",			/* 18 T_DIVIDE */
109	"non-maskable interrupt trap",		/* 19 T_NMI */
110	"overflow trap",			/* 20 T_OFLOW */
111	"FPU bounds check fault",		/* 21 T_BOUND */
112	"FPU device not available",		/* 22 T_DNA */
113	"double fault",				/* 23 T_DOUBLEFLT */
114	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
115	"invalid TSS fault",			/* 25 T_TSSFLT */
116	"segment not present fault",		/* 26 T_SEGNPFLT */
117	"stack fault",				/* 27 T_STKFLT */
118};
119
120#define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v)
121
122/*
123 * trap(frame):
124 *	Exception, fault, and trap interface to BSD kernel. This
125 * common code is called from assembly language IDT gate entry
126 * routines that prepare a suitable stack frame, and restore this
127 * frame after the exception has been processed. Note that the
128 * effect is as if the arguments were passed call by reference.
129 */
130
131/*ARGSUSED*/
132void
133trap(frame)
134	struct trapframe frame;
135{
136	register int i;
137	register struct proc *p = curproc;
138	struct timeval syst;
139	int ucode, type, code, eva;
140
141	frame.tf_eflags &= ~PSL_NT;	/* clear nested trap XXX */
142	type = frame.tf_trapno;
143#include "ddb.h"
144#if NDDB > 0
145	if (curpcb && curpcb->pcb_onfault) {
146		if (frame.tf_trapno == T_BPTFLT
147		    || frame.tf_trapno == T_TRCTRAP)
148			if (kdb_trap (type, 0, &frame))
149				return;
150	}
151#endif
152
153/*pg("trap type %d code = %x eip = %x cs = %x eva = %x esp %x",
154			frame.tf_trapno, frame.tf_err, frame.tf_eip,
155			frame.tf_cs, rcr2(), frame.tf_esp);*/
156	if (curpcb == 0 || curproc == 0)
157		goto skiptoswitch;
158	if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) {
159		extern int _udatasel;
160
161		if (read_gs() != (u_short) _udatasel)
162			/*
163			 * Some user has corrupted %gs but we depend on it in
164			 * copyout() etc.  Fix it up and retry.
165			 *
166			 * (We don't preserve %fs or %gs, so users can change
167			 * them to either _ucodesel, _udatasel or a not-present
168			 * selector, possibly ORed with 0 to 3, making them
169			 * volatile for other users.  Not preserving them saves
170			 * time and doesn't lose functionality or open security
171			 * holes.)
172			 */
173			write_gs(_udatasel);
174		else
175copyfault:
176			frame.tf_eip = (int)curpcb->pcb_onfault;
177		return;
178	}
179
180	syst = p->p_stime;
181	if (ISPL(frame.tf_cs) == SEL_UPL) {
182		type |= T_USER;
183		p->p_regs = (int *)&frame;
184	}
185
186skiptoswitch:
187	ucode=0;
188	eva = rcr2();
189	code = frame.tf_err;
190
191	if ((type & ~T_USER) == T_PAGEFLT)
192		goto pfault;
193
194	switch (type) {
195
196	default:
197	we_re_toast:
198#ifdef KDB
199		if (kdb_trap(&psl))
200			return;
201#endif
202#if NDDB > 0
203		if (kdb_trap (type, 0, &frame))
204			return;
205#endif
206
207		if ((type & ~T_USER) <= MAX_TRAP_MSG)
208			printf("\n\nFatal trap %d: %s while in %s mode\n",
209				type & ~T_USER, trap_msg[type & ~T_USER],
210				(type & T_USER) ? "user" : "kernel");
211
212		printf("trap type = %d, code = %x\n     eip = %x, cs = %x, eflags = %x, ",
213			frame.tf_trapno, frame.tf_err, frame.tf_eip,
214			frame.tf_cs, frame.tf_eflags);
215		eva = rcr2();
216		printf("cr2 = %x, current priority = %x\n", eva, cpl);
217
218		type &= ~T_USER;
219		if (type <= MAX_TRAP_MSG)
220			panic(trap_msg[type]);
221		else
222			panic("unknown/reserved trap");
223
224		/*NOTREACHED*/
225
226	case T_SEGNPFLT|T_USER:
227	case T_STKFLT|T_USER:
228	case T_PROTFLT|T_USER:		/* protection fault */
229		ucode = code + BUS_SEGM_FAULT ;
230		i = SIGBUS;
231		break;
232
233	case T_PRIVINFLT|T_USER:	/* privileged instruction fault */
234	case T_RESADFLT|T_USER:		/* reserved addressing fault */
235	case T_RESOPFLT|T_USER:		/* reserved operand fault */
236	case T_FPOPFLT|T_USER:		/* coprocessor operand fault */
237		ucode = type &~ T_USER;
238		i = SIGILL;
239		break;
240
241	case T_ASTFLT|T_USER:		/* Allow process switch */
242		astoff();
243		cnt.v_soft++;
244		if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) {
245			addupc(frame.tf_eip, &p->p_stats->p_prof, 1);
246			p->p_flag &= ~SOWEUPC;
247		}
248		goto out;
249
250	case T_DNA|T_USER:
251#if NNPX > 0
252		/* if a transparent fault (due to context switch "late") */
253		if (npxdna()) return;
254#endif	/* NNPX > 0 */
255#ifdef	MATH_EMULATE
256		i = math_emulate(&frame);
257		if (i == 0) return;
258#else	/* MATH_EMULTATE */
259		panic("trap: math emulation necessary!");
260#endif	/* MATH_EMULTATE */
261		ucode = FPE_FPU_NP_TRAP;
262		break;
263
264	case T_BOUND|T_USER:
265		ucode = FPE_SUBRNG_TRAP;
266		i = SIGFPE;
267		break;
268
269	case T_OFLOW|T_USER:
270		ucode = FPE_INTOVF_TRAP;
271		i = SIGFPE;
272		break;
273
274	case T_DIVIDE|T_USER:
275		ucode = FPE_INTDIV_TRAP;
276		i = SIGFPE;
277		break;
278
279	case T_ARITHTRAP|T_USER:
280		ucode = code;
281		i = SIGFPE;
282		break;
283
284	case T_PAGEFLT:			/* allow page faults in kernel mode */
285#if 0
286		/* XXX - check only applies to 386's and 486's with WP off */
287		if (code & PGEX_P) goto we_re_toast;
288#endif
289
290	pfault:
291		/* fall into */
292	case T_PAGEFLT|T_USER:		/* page fault */
293	    {
294		register vm_offset_t va;
295		register struct vmspace *vm;
296		register vm_map_t map;
297		int rv=0;
298		vm_prot_t ftype;
299		extern vm_map_t kernel_map;
300		unsigned nss,v;
301		int oldflags;
302
303		va = trunc_page((vm_offset_t)eva);
304		/*
305		 * It is only a kernel address space fault iff:
306		 * 	1. (type & T_USER) == 0  and
307		 * 	2. pcb_onfault not set or
308		 *	3. pcb_onfault set but supervisor space fault
309		 * The last can occur during an exec() copyin where the
310		 * argument space is lazy-allocated.
311		 */
312
313		if ((p == 0) || (type == T_PAGEFLT && va >= KERNBASE)) {
314			vm = 0;
315			map = kernel_map;
316		} else {
317			vm = p->p_vmspace;
318			map = &vm->vm_map;
319		}
320
321		if (code & PGEX_W)
322			ftype = VM_PROT_READ | VM_PROT_WRITE;
323		else
324			ftype = VM_PROT_READ;
325
326#ifdef DEBUG
327		if (map == kernel_map && va == 0) {
328			printf("trap: bad kernel access at %x\n", va);
329			goto we_re_toast;
330		}
331#endif
332
333/*
334 * keep swapout from messing with us during this
335 * critical time.
336 */
337		oldflags = p->p_flag;
338		if (map != kernel_map) {
339				p->p_flag |= SLOCK;
340		}
341		/*
342		 * XXX: rude hack to make stack limits "work"
343		 */
344
345		nss = 0;
346		if (map != kernel_map && (caddr_t)va >= vm->vm_maxsaddr
347			&& (caddr_t)va < (caddr_t)USRSTACK) {
348			caddr_t v;
349			nss = roundup(USRSTACK - (unsigned)va, PAGE_SIZE);
350			if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur) {
351				rv = KERN_FAILURE;
352				p->p_flag &= ~SLOCK;
353				p->p_flag |= (oldflags & SLOCK);
354				goto nogo;
355			}
356
357			if (vm->vm_ssize && roundup(vm->vm_ssize << PGSHIFT,
358			    DFLSSIZ) < nss) {
359				int grow_amount;
360				/*
361				 * If necessary, grow the VM that the stack occupies
362				 * to allow for the rlimit. This allows us to not have
363				 * to allocate all of the VM up-front in execve (which
364				 * is expensive).
365				 * Grow the VM by the amount requested rounded up to
366				 * the nearest DFLSSIZ to provide for some hysteresis.
367				 */
368				grow_amount = roundup((nss - (vm->vm_ssize << PGSHIFT)), DFLSSIZ);
369				v = (char *)USRSTACK - roundup(vm->vm_ssize << PGSHIFT,
370				    DFLSSIZ) - grow_amount;
371				/*
372				 * If there isn't enough room to extend by DFLSSIZ, then
373				 * just extend to the maximum size
374				 */
375				if (v < vm->vm_maxsaddr) {
376					v = vm->vm_maxsaddr;
377					grow_amount = MAXSSIZ - (vm->vm_ssize << PGSHIFT);
378				}
379				if (vm_allocate(&vm->vm_map, (vm_offset_t *)&v,
380						grow_amount, FALSE) !=
381				    KERN_SUCCESS) {
382					p->p_flag &= ~SLOCK;
383					p->p_flag |= (oldflags & SLOCK);
384					goto nogo;
385				}
386			}
387		}
388
389
390		/* check if page table is mapped, if not, fault it first */
391#define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v)
392		{
393			vm_offset_t v = trunc_page(vtopte(va));
394
395			if (map != kernel_map) {
396				vm_offset_t pa;
397
398				/* Fault the pte only if needed: */
399				*(volatile char *)v += 0;
400
401				/* Get the physical address: */
402				pa = pmap_extract(vm_map_pmap(map), v);
403
404				/* And wire the page at system vm level: */
405				vm_page_wire(PHYS_TO_VM_PAGE(pa));
406
407				/* Fault in the user page: */
408				rv = vm_fault(map, va, ftype, FALSE);
409
410				/* Unwire the pte page */
411				vm_page_unwire(PHYS_TO_VM_PAGE(pa));
412
413			} else {
414				rv = vm_fault(map, va, ftype, FALSE);
415			}
416
417		}
418		if (map != kernel_map) {
419			p->p_flag &= ~SLOCK;
420			p->p_flag |= (oldflags & SLOCK);
421		}
422		if (rv == KERN_SUCCESS) {
423			/*
424			 * XXX: continuation of rude stack hack
425			 */
426			nss = nss >> PGSHIFT;
427			if (vm && nss > vm->vm_ssize) {
428				vm->vm_ssize = nss;
429			}
430 			/*
431 			 * va could be a page table address, if the fault
432			 */
433			if (type == T_PAGEFLT)
434				return;
435			goto out;
436		}
437nogo:
438		if (type == T_PAGEFLT) {
439			if (curpcb->pcb_onfault)
440				goto copyfault;
441			printf("vm_fault(%x, %x, %x, 0) -> %x\n",
442			       map, va, ftype, rv);
443			printf("  type %x, code %x\n",
444			       type, code);
445			goto we_re_toast;
446		}
447		i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
448
449		/* kludge to pass faulting virtual address to sendsig */
450		ucode = type &~ T_USER;
451		frame.tf_err = eva;
452
453		break;
454	    }
455
456#if NDDB == 0
457	case T_TRCTRAP:	 /* trace trap -- someone single stepping lcall's */
458		frame.tf_eflags &= ~PSL_T;
459
460			/* Q: how do we turn it on again? */
461		return;
462#endif
463
464	case T_BPTFLT|T_USER:		/* bpt instruction fault */
465	case T_TRCTRAP|T_USER:		/* trace trap */
466		frame.tf_eflags &= ~PSL_T;
467		i = SIGTRAP;
468		break;
469
470#include "isa.h"
471#if	NISA > 0
472	case T_NMI:
473	case T_NMI|T_USER:
474#if NDDB > 0
475		/* NMI can be hooked up to a pushbutton for debugging */
476		printf ("NMI ... going to debugger\n");
477		if (kdb_trap (type, 0, &frame))
478			return;
479#endif
480		/* machine/parity/power fail/"kitchen sink" faults */
481		if (isa_nmi(code) == 0) return;
482		else goto we_re_toast;
483#endif
484	}
485
486	trapsignal(p, i, ucode);
487	if ((type & T_USER) == 0)
488		return;
489out:
490	while (i = CURSIG(p))
491		psig(i);
492	p->p_pri = p->p_usrpri;
493	if (want_resched) {
494		int s;
495		/*
496		 * Since we are curproc, clock will normally just change
497		 * our priority without moving us from one queue to another
498		 * (since the running process is not on a queue.)
499		 * If that happened after we setrq ourselves but before we
500		 * swtch()'ed, we might not be on the queue indicated by
501		 * our priority.
502		 */
503		s = splclock();
504		setrq(p);
505		p->p_stats->p_ru.ru_nivcsw++;
506		swtch();
507		splx(s);
508		while (i = CURSIG(p))
509			psig(i);
510	}
511	if (p->p_stats->p_prof.pr_scale) {
512		int ticks;
513		struct timeval *tv = &p->p_stime;
514
515		ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
516			(tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
517		if (ticks) {
518#ifdef PROFTIMER
519			extern int profscale;
520			addupc(frame.tf_eip, &p->p_stats->p_prof,
521			    ticks * profscale);
522#else
523			addupc(frame.tf_eip, &p->p_stats->p_prof, ticks);
524#endif
525		}
526	}
527	curpri = p->p_pri;
528}
529
530/*
531 * Compensate for 386 brain damage (missing URKR).
532 * This is a little simpler than the pagefault handler in trap() because
533 * it the page tables have already been faulted in and high addresses
534 * are thrown out early for other reasons.
535 */
536int trapwrite(addr)
537	unsigned addr;
538{
539	unsigned nss;
540	struct proc *p;
541	vm_offset_t va;
542	struct vmspace *vm;
543	int oldflags;
544	int rv;
545
546	va = trunc_page((vm_offset_t)addr);
547	/*
548	 * XXX - MAX is END.  Changed > to >= for temp. fix.
549	 */
550	if (va >= VM_MAXUSER_ADDRESS)
551		return (1);
552	/*
553	 * XXX: rude stack hack adapted from trap().
554	 */
555	nss = 0;
556	p = curproc;
557	vm = p->p_vmspace;
558
559	oldflags = p->p_flag;
560	p->p_flag |= SLOCK;
561
562	if ((caddr_t)va >= vm->vm_maxsaddr
563	    && (caddr_t)va < (caddr_t)USRSTACK) {
564		nss = roundup(((unsigned)USRSTACK - (unsigned)va), PAGE_SIZE);
565		if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur) {
566			p->p_flag &= ~SLOCK;
567			p->p_flag |= (oldflags & SLOCK);
568			return (1);
569		}
570
571		if (vm->vm_ssize && roundup(vm->vm_ssize << PGSHIFT,
572			DFLSSIZ) < nss) {
573			caddr_t v;
574			int grow_amount;
575			/*
576			 * If necessary, grow the VM that the stack occupies
577			 * to allow for the rlimit. This allows us to not have
578			 * to allocate all of the VM up-front in execve (which
579			 * is expensive).
580			 * Grow the VM by the amount requested rounded up to
581			 * the nearest DFLSSIZ to provide for some hysteresis.
582			 */
583			grow_amount = roundup((nss - (vm->vm_ssize << PGSHIFT)), DFLSSIZ);
584			v = (char *)USRSTACK - roundup(vm->vm_ssize << PGSHIFT, DFLSSIZ) -
585				grow_amount;
586			/*
587			 * If there isn't enough room to extend by DFLSSIZ, then
588			 * just extend to the maximum size
589			 */
590			if (v < vm->vm_maxsaddr) {
591				v = vm->vm_maxsaddr;
592				grow_amount = MAXSSIZ - (vm->vm_ssize << PGSHIFT);
593			}
594			if (vm_allocate(&vm->vm_map, (vm_offset_t *)&v,
595					grow_amount, FALSE)
596			    != KERN_SUCCESS) {
597				p->p_flag &= ~SLOCK;
598				p->p_flag |= (oldflags & SLOCK);
599				return(1);
600			}
601				printf("new stack growth: %lx, %d\n", v, grow_amount);
602		}
603	}
604
605
606	{
607		vm_offset_t v;
608		v = trunc_page(vtopte(va));
609		if (va < USRSTACK) {
610			vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE);
611		}
612		rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE);
613		if (va < USRSTACK) {
614			vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE);
615		}
616	}
617	p->p_flag &= ~SLOCK;
618	p->p_flag |= (oldflags & SLOCK);
619
620	if (rv != KERN_SUCCESS)
621		return 1;
622	/*
623	 * XXX: continuation of rude stack hack
624	 */
625	nss >>= PGSHIFT;
626	if (nss > vm->vm_ssize) {
627		vm->vm_ssize = nss;
628	}
629	return (0);
630}
631
632/*
633 * syscall(frame):
634 *	System call request from POSIX system call gate interface to kernel.
635 * Like trap(), argument is call by reference.
636 */
637/*ARGSUSED*/
638void
639syscall(frame)
640	volatile struct trapframe frame;
641{
642	register int *locr0 = ((int *)&frame);
643	register caddr_t params;
644	register int i;
645	register struct sysent *callp;
646	register struct proc *p = curproc;
647	struct timeval syst;
648	int error, opc;
649	int args[8], rval[2];
650	int code;
651
652#ifdef lint
653	r0 = 0; r0 = r0; r1 = 0; r1 = r1;
654#endif
655	syst = p->p_stime;
656	if (ISPL(frame.tf_cs) != SEL_UPL)
657		panic("syscall");
658
659	code = frame.tf_eax;
660	p->p_regs = (int *)&frame;
661	params = (caddr_t)frame.tf_esp + sizeof (int) ;
662
663	/*
664	 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always.
665	 */
666	opc = frame.tf_eip - 7;
667	if (code == 0) {
668		code = fuword(params);
669		params += sizeof (int);
670	}
671	if (code < 0 || code >= nsysent)
672		callp = &sysent[0];
673	else
674		callp = &sysent[code];
675
676	if ((i = callp->sy_narg * sizeof (int)) &&
677	    (error = copyin(params, (caddr_t)args, (u_int)i))) {
678		frame.tf_eax = error;
679		frame.tf_eflags |= PSL_C;	/* carry bit */
680#ifdef KTRACE
681		if (KTRPOINT(p, KTR_SYSCALL))
682			ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
683#endif
684		goto done;
685	}
686#ifdef KTRACE
687	if (KTRPOINT(p, KTR_SYSCALL))
688		ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
689#endif
690	rval[0] = 0;
691	rval[1] = frame.tf_edx;
692/*pg("%d. s %d\n", p->p_pid, code);*/
693	error = (*callp->sy_call)(p, args, rval);
694	if (error == ERESTART)
695		frame.tf_eip = opc;
696	else if (error != EJUSTRETURN) {
697		if (error) {
698/*pg("error %d", error);*/
699			frame.tf_eax = error;
700			frame.tf_eflags |= PSL_C;	/* carry bit */
701		} else {
702			frame.tf_eax = rval[0];
703			frame.tf_edx = rval[1];
704			frame.tf_eflags &= ~PSL_C;	/* carry bit */
705		}
706	}
707	/* else if (error == EJUSTRETURN) */
708		/* nothing to do */
709done:
710	/*
711	 * Reinitialize proc pointer `p' as it may be different
712	 * if this is a child returning from fork syscall.
713	 */
714	p = curproc;
715	while (i = CURSIG(p))
716		psig(i);
717	p->p_pri = p->p_usrpri;
718	if (want_resched) {
719		int s;
720		/*
721		 * Since we are curproc, clock will normally just change
722		 * our priority without moving us from one queue to another
723		 * (since the running process is not on a queue.)
724		 * If that happened after we setrq ourselves but before we
725		 * swtch()'ed, we might not be on the queue indicated by
726		 * our priority.
727		 */
728		s = splclock();
729		setrq(p);
730		p->p_stats->p_ru.ru_nivcsw++;
731		swtch();
732		splx(s);
733		while (i = CURSIG(p))
734			psig(i);
735	}
736	if (p->p_stats->p_prof.pr_scale) {
737		int ticks;
738		struct timeval *tv = &p->p_stime;
739
740		ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
741			(tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
742		if (ticks) {
743#ifdef PROFTIMER
744			extern int profscale;
745			addupc(frame.tf_eip, &p->p_stats->p_prof,
746			    ticks * profscale);
747#else
748			addupc(frame.tf_eip, &p->p_stats->p_prof, ticks);
749#endif
750		}
751	}
752	curpri = p->p_pri;
753#ifdef KTRACE
754	if (KTRPOINT(p, KTR_SYSRET))
755		ktrsysret(p->p_tracep, code, error, rval[0]);
756#endif
757#ifdef	DIAGNOSTICx
758{ extern int _udatasel, _ucodesel;
759	if (frame.tf_ss != _udatasel)
760		printf("ss %x call %d\n", frame.tf_ss, code);
761	if ((frame.tf_cs&0xffff) != _ucodesel)
762		printf("cs %x call %d\n", frame.tf_cs, code);
763	if (frame.tf_eip > VM_MAXUSER_ADDRESS) {
764		printf("eip %x call %d\n", frame.tf_eip, code);
765		frame.tf_eip = 0;
766	}
767}
768#endif
769}
770