subr_syscall.c revision 1321
1234285Sdim/*-
2234285Sdim * Copyright (c) 1990 The Regents of the University of California.
3234285Sdim * All rights reserved.
4234285Sdim *
5234285Sdim * This code is derived from software contributed to Berkeley by
6234285Sdim * the University of Utah, and William Jolitz.
7234285Sdim *
8234285Sdim * Redistribution and use in source and binary forms, with or without
9234285Sdim * modification, are permitted provided that the following conditions
10234285Sdim * are met:
11234285Sdim * 1. Redistributions of source code must retain the above copyright
12234285Sdim *    notice, this list of conditions and the following disclaimer.
13234285Sdim * 2. Redistributions in binary form must reproduce the above copyright
14234285Sdim *    notice, this list of conditions and the following disclaimer in the
15234285Sdim *    documentation and/or other materials provided with the distribution.
16234285Sdim * 3. All advertising materials mentioning features or use of this software
17234285Sdim *    must display the following acknowledgement:
18234285Sdim *	This product includes software developed by the University of
19239462Sdim *	California, Berkeley and its contributors.
20234285Sdim * 4. Neither the name of the University nor the names of its contributors
21234285Sdim *    may be used to endorse or promote products derived from this software
22234285Sdim *    without specific prior written permission.
23234285Sdim *
24234285Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25234285Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26234285Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27234285Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28234285Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29234285Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30234285Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31234285Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32234285Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33234285Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34234285Sdim * SUCH DAMAGE.
35234285Sdim *
36234285Sdim *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
37234285Sdim *	$Id: trap.c,v 1.20 1994/03/24 23:12:34 davidg Exp $
38234285Sdim */
39234285Sdim
40234285Sdim/*
41234285Sdim * 386 Trap and System call handleing
42234285Sdim */
43234285Sdim
44234285Sdim#include "isa.h"
45234285Sdim#include "npx.h"
46234285Sdim#include "ddb.h"
47234285Sdim#include "machine/cpu.h"
48234285Sdim#include "machine/psl.h"
49234285Sdim#include "machine/reg.h"
50234285Sdim#include "machine/eflags.h"
51234285Sdim
52234285Sdim#include "param.h"
53234285Sdim#include "systm.h"
54234285Sdim#include "proc.h"
55234285Sdim#include "user.h"
56234285Sdim#include "acct.h"
57234285Sdim#include "kernel.h"
58234285Sdim#ifdef KTRACE
59234285Sdim#include "ktrace.h"
60234285Sdim#endif
61234285Sdim
62234285Sdim#include "vm/vm_param.h"
63234285Sdim#include "vm/pmap.h"
64234285Sdim#include "vm/vm_map.h"
65234285Sdim#include "vm/vm_user.h"
66234285Sdim#include "vm/vm_page.h"
67234285Sdim#include "sys/vmmeter.h"
68234285Sdim
69234285Sdim#include "machine/trap.h"
70234285Sdim
71234285Sdim#ifdef	__GNUC__
72234285Sdim
73234285Sdim/*
74234285Sdim * The "r" contraint could be "rm" except for fatal bugs in gas.  As usual,
75234285Sdim * we omit the size from the mov instruction to avoid nonfatal bugs in gas.
76234285Sdim */
77234285Sdim#define	read_gs()	({ u_short gs; __asm("mov %%gs,%0" : "=r" (gs)); gs; })
78234285Sdim#define	write_gs(newgs)	__asm("mov %0,%%gs" : : "r" ((u_short) newgs))
79234285Sdim
80234285Sdim#else	/* not __GNUC__ */
81234285Sdim
82234285Sdimu_short	read_gs		__P((void));
83234285Sdimvoid	write_gs	__P((/* promoted u_short */ int gs));
84234285Sdim
85234285Sdim#endif	/* __GNUC__ */
86234285Sdim
87234285Sdimextern int grow(struct proc *,int);
88234285Sdim
89234285Sdimstruct	sysent sysent[];
90234285Sdimint	nsysent;
91234285Sdim
92234285Sdim#define MAX_TRAP_MSG		27
93234285Sdimchar *trap_msg[] = {
94234285Sdim	"reserved addressing fault",		/*  0 T_RESADFLT */
95234285Sdim	"privileged instruction fault",		/*  1 T_PRIVINFLT */
96234285Sdim	"reserved operand fault",		/*  2 T_RESOPFLT */
97234285Sdim	"breakpoint instruction fault",		/*  3 T_BPTFLT */
98234285Sdim	"",					/*  4 unused */
99234285Sdim	"system call trap",			/*  5 T_SYSCALL */
100234285Sdim	"arithmetic trap",			/*  6 T_ARITHTRAP */
101234285Sdim	"system forced exception",		/*  7 T_ASTFLT */
102234285Sdim	"segmentation (limit) fault",		/*  8 T_SEGFLT */
103234285Sdim	"protection fault",			/*  9 T_PROTFLT */
104234285Sdim	"trace trap",				/* 10 T_TRCTRAP */
105234285Sdim	"",					/* 11 unused */
106234285Sdim	"page fault",				/* 12 T_PAGEFLT */
107234285Sdim	"page table fault",			/* 13 T_TABLEFLT */
108234285Sdim	"alignment fault",			/* 14 T_ALIGNFLT */
109234285Sdim	"kernel stack pointer not valid",	/* 15 T_KSPNOTVAL */
110234285Sdim	"bus error",				/* 16 T_BUSERR */
111234285Sdim	"kernel debugger fault",		/* 17 T_KDBTRAP */
112234285Sdim	"integer divide fault",			/* 18 T_DIVIDE */
113234285Sdim	"non-maskable interrupt trap",		/* 19 T_NMI */
114234285Sdim	"overflow trap",			/* 20 T_OFLOW */
115234285Sdim	"FPU bounds check fault",		/* 21 T_BOUND */
116234285Sdim	"FPU device not available",		/* 22 T_DNA */
117234285Sdim	"double fault",				/* 23 T_DOUBLEFLT */
118234285Sdim	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
119234285Sdim	"invalid TSS fault",			/* 25 T_TSSFLT */
120234285Sdim	"segment not present fault",		/* 26 T_SEGNPFLT */
121234285Sdim	"stack fault",				/* 27 T_STKFLT */
122234285Sdim};
123234285Sdim
124234285Sdim#define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v)
125234285Sdim
126234285Sdim/*
127234285Sdim * trap(frame):
128234285Sdim *	Exception, fault, and trap interface to BSD kernel. This
129234285Sdim * common code is called from assembly language IDT gate entry
130234285Sdim * routines that prepare a suitable stack frame, and restore this
131234285Sdim * frame after the exception has been processed. Note that the
132234285Sdim * effect is as if the arguments were passed call by reference.
133234285Sdim */
134234285Sdim
135234285Sdim/*ARGSUSED*/
136234285Sdimvoid
137234285Sdimtrap(frame)
138234285Sdim	struct trapframe frame;
139234285Sdim{
140234285Sdim	register int i;
141239462Sdim	register struct proc *p = curproc;
142234285Sdim	struct timeval syst;
143234285Sdim	int ucode, type, code, eva, fault_type;
144234285Sdim
145234285Sdim	frame.tf_eflags &= ~PSL_NT;	/* clear nested trap XXX */
146234285Sdim	type = frame.tf_trapno;
147234285Sdim#if NDDB > 0
148234285Sdim	if (curpcb && curpcb->pcb_onfault) {
149234285Sdim		if (frame.tf_trapno == T_BPTFLT
150234285Sdim		    || frame.tf_trapno == T_TRCTRAP)
151234285Sdim			if (kdb_trap (type, 0, &frame))
152234285Sdim				return;
153234285Sdim	}
154234285Sdim#endif
155239462Sdim
156234285Sdim	if (curpcb == 0 || curproc == 0)
157234285Sdim		goto skiptoswitch;
158234285Sdim	if (curpcb->pcb_onfault && frame.tf_trapno != T_PAGEFLT) {
159234285Sdim		extern int _udatasel;
160234285Sdim
161234285Sdim		if (read_gs() != (u_short) _udatasel)
162234285Sdim			/*
163234285Sdim			 * Some user has corrupted %gs but we depend on it in
164234285Sdim			 * copyout() etc.  Fix it up and retry.
165234285Sdim			 *
166234285Sdim			 * (We don't preserve %fs or %gs, so users can change
167234285Sdim			 * them to either _ucodesel, _udatasel or a not-present
168234285Sdim			 * selector, possibly ORed with 0 to 3, making them
169234285Sdim			 * volatile for other users.  Not preserving them saves
170234285Sdim			 * time and doesn't lose functionality or open security
171234285Sdim			 * holes.)
172234285Sdim			 */
173234285Sdim			write_gs(_udatasel);
174234285Sdim		else
175234285Sdimcopyfault:
176234285Sdim			frame.tf_eip = (int)curpcb->pcb_onfault;
177234285Sdim		return;
178234285Sdim	}
179234285Sdim
180234285Sdim	syst = p->p_stime;
181234285Sdim	if (ISPL(frame.tf_cs) == SEL_UPL) {
182234285Sdim		type |= T_USER;
183234285Sdim		p->p_regs = (int *)&frame;
184	}
185
186skiptoswitch:
187	ucode=0;
188	eva = rcr2();
189	code = frame.tf_err;
190
191	if ((type & ~T_USER) == T_PAGEFLT)
192		goto pfault;
193
194	switch (type) {
195	case T_SEGNPFLT|T_USER:
196	case T_STKFLT|T_USER:
197	case T_PROTFLT|T_USER:		/* protection fault */
198		ucode = code + BUS_SEGM_FAULT ;
199		i = SIGBUS;
200		break;
201
202	case T_PRIVINFLT|T_USER:	/* privileged instruction fault */
203	case T_RESADFLT|T_USER:		/* reserved addressing fault */
204	case T_RESOPFLT|T_USER:		/* reserved operand fault */
205	case T_FPOPFLT|T_USER:		/* coprocessor operand fault */
206		ucode = type &~ T_USER;
207		i = SIGILL;
208		break;
209
210	case T_ASTFLT|T_USER:		/* Allow process switch */
211		astoff();
212		cnt.v_soft++;
213		if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) {
214			addupc(frame.tf_eip, &p->p_stats->p_prof, 1);
215			p->p_flag &= ~SOWEUPC;
216		}
217		goto out;
218
219	case T_DNA|T_USER:
220#if NNPX > 0
221		/* if a transparent fault (due to context switch "late") */
222		if (npxdna()) return;
223#endif	/* NNPX > 0 */
224#ifdef	MATH_EMULATE
225		i = math_emulate(&frame);
226		if (i == 0) return;
227#else	/* MATH_EMULATE */
228		panic("trap: math emulation necessary!");
229#endif	/* MATH_EMULATE */
230		ucode = FPE_FPU_NP_TRAP;
231		break;
232
233	case T_BOUND|T_USER:
234		ucode = FPE_SUBRNG_TRAP;
235		i = SIGFPE;
236		break;
237
238	case T_OFLOW|T_USER:
239		ucode = FPE_INTOVF_TRAP;
240		i = SIGFPE;
241		break;
242
243	case T_DIVIDE|T_USER:
244		ucode = FPE_INTDIV_TRAP;
245		i = SIGFPE;
246		break;
247
248	case T_ARITHTRAP|T_USER:
249		ucode = code;
250		i = SIGFPE;
251		break;
252
253	pfault:
254	case T_PAGEFLT:			/* allow page faults in kernel mode */
255	case T_PAGEFLT|T_USER:		/* page fault */
256	    {
257		vm_offset_t va;
258		struct vmspace *vm;
259		vm_map_t map = 0;
260		int rv = 0, oldflags;
261		vm_prot_t ftype;
262		unsigned v;
263		extern vm_map_t kernel_map;
264
265		va = trunc_page((vm_offset_t)eva);
266
267		/*
268		 * Don't allow user-mode faults in kernel address space
269		 */
270		if ((type == (T_PAGEFLT|T_USER)) && (va >= KERNBASE)) {
271			goto nogo;
272		}
273
274		if ((p == 0) || (type == T_PAGEFLT && va >= KERNBASE)) {
275			vm = 0;
276			map = kernel_map;
277		} else {
278			vm = p->p_vmspace;
279			map = &vm->vm_map;
280		}
281
282		if (code & PGEX_W)
283			ftype = VM_PROT_READ | VM_PROT_WRITE;
284		else
285			ftype = VM_PROT_READ;
286
287		oldflags = p->p_flag;
288		if (map != kernel_map) {
289			vm_offset_t pa;
290			vm_offset_t v = (vm_offset_t) vtopte(va);
291			vm_page_t ptepg;
292
293			/*
294			 * Keep swapout from messing with us during this
295			 *	critical time.
296			 */
297			p->p_flag |= SLOCK;
298
299			/*
300			 * Grow the stack if necessary
301			 */
302			if ((caddr_t)va > vm->vm_maxsaddr
303			    && (caddr_t)va < (caddr_t)USRSTACK) {
304				if (!grow(p, va)) {
305					rv = KERN_FAILURE;
306					p->p_flag &= ~SLOCK;
307					p->p_flag |= (oldflags & SLOCK);
308					goto nogo;
309				}
310			}
311
312			/*
313			 * Check if page table is mapped, if not,
314			 *	fault it first
315			 */
316
317			/* Fault the pte only if needed: */
318			*(volatile char *)v += 0;
319
320			ptepg = (vm_page_t) pmap_pte_vm_page(vm_map_pmap(map), v);
321			vm_page_hold(ptepg);
322
323			/* Fault in the user page: */
324			rv = vm_fault(map, va, ftype, FALSE);
325
326			vm_page_unhold(ptepg);
327
328			/*
329			 * page table pages don't need to be kept if they
330			 * are not held
331			 */
332			if( ptepg->hold_count == 0 && ptepg->wire_count == 0) {
333				pmap_page_protect( VM_PAGE_TO_PHYS(ptepg),
334					VM_PROT_NONE);
335				if( ptepg->flags & PG_CLEAN)
336					vm_page_free(ptepg);
337			}
338
339
340			p->p_flag &= ~SLOCK;
341			p->p_flag |= (oldflags & SLOCK);
342		} else {
343			/*
344			 * Since we know that kernel virtual address addresses
345			 * always have pte pages mapped, we just have to fault
346			 * the page.
347			 */
348			rv = vm_fault(map, va, ftype, FALSE);
349		}
350
351		if (rv == KERN_SUCCESS) {
352			if (type == T_PAGEFLT)
353				return;
354			goto out;
355		}
356nogo:
357		if (type == T_PAGEFLT) {
358			if (curpcb->pcb_onfault)
359				goto copyfault;
360
361			goto we_re_toast;
362		}
363		i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
364
365		/* kludge to pass faulting virtual address to sendsig */
366		ucode = type &~ T_USER;
367		frame.tf_err = eva;
368
369		break;
370	    }
371
372#if NDDB == 0
373	case T_TRCTRAP:	 /* trace trap -- someone single stepping lcall's */
374		frame.tf_eflags &= ~PSL_T;
375
376			/* Q: how do we turn it on again? */
377		return;
378#endif
379
380	case T_BPTFLT|T_USER:		/* bpt instruction fault */
381	case T_TRCTRAP|T_USER:		/* trace trap */
382		frame.tf_eflags &= ~PSL_T;
383		i = SIGTRAP;
384		break;
385
386#if NISA > 0
387	case T_NMI:
388	case T_NMI|T_USER:
389#if NDDB > 0
390		/* NMI can be hooked up to a pushbutton for debugging */
391		printf ("NMI ... going to debugger\n");
392		if (kdb_trap (type, 0, &frame))
393			return;
394#endif
395		/* machine/parity/power fail/"kitchen sink" faults */
396		if (isa_nmi(code) == 0) return;
397		/* FALL THROUGH */
398#endif
399	default:
400	we_re_toast:
401
402		fault_type = type & ~T_USER;
403		if (fault_type <= MAX_TRAP_MSG)
404			printf("\n\nFatal trap %d: %s while in %s mode\n",
405				fault_type, trap_msg[fault_type],
406				ISPL(frame.tf_cs) == SEL_UPL ? "user" : "kernel");
407		if (fault_type == T_PAGEFLT) {
408			printf("fault virtual address	= 0x%x\n", eva);
409			printf("fault code		= %s %s, %s\n",
410				code & PGEX_U ? "user" : "supervisor",
411				code & PGEX_W ? "write" : "read",
412				code & PGEX_P ? "protection violation" : "page not present");
413		}
414		printf("instruction pointer	= 0x%x\n", frame.tf_eip);
415		printf("processor eflags	= ");
416		if (frame.tf_eflags & EFL_TF)
417			printf("trace/trap, ");
418		if (frame.tf_eflags & EFL_IF)
419			printf("interrupt enabled, ");
420		if (frame.tf_eflags & EFL_NT)
421			printf("nested task, ");
422		if (frame.tf_eflags & EFL_RF)
423			printf("resume, ");
424		if (frame.tf_eflags & EFL_VM)
425			printf("vm86, ");
426		printf("IOPL = %d\n", (frame.tf_eflags & EFL_IOPL) >> 12);
427		printf("current process		= ");
428		if (curproc) {
429			printf("%d (%s)\n",
430			    curproc->p_pid, curproc->p_comm ?
431			    curproc->p_comm : "");
432		} else {
433			printf("Idle\n");
434		}
435		printf("interrupt mask		= ");
436		if ((cpl & net_imask) == net_imask)
437			printf("net ");
438		if ((cpl & tty_imask) == tty_imask)
439			printf("tty ");
440		if ((cpl & bio_imask) == bio_imask)
441			printf("bio ");
442		if (cpl == 0)
443			printf("none");
444		printf("\n");
445
446#ifdef KDB
447		if (kdb_trap(&psl))
448			return;
449#endif
450#if NDDB > 0
451		if (kdb_trap (type, 0, &frame))
452			return;
453#endif
454		if (fault_type <= MAX_TRAP_MSG)
455			panic(trap_msg[fault_type]);
456		else
457			panic("unknown/reserved trap");
458
459		/* NOTREACHED */
460	}
461
462	trapsignal(p, i, ucode);
463	if ((type & T_USER) == 0)
464		return;
465out:
466	while (i = CURSIG(p))
467		psig(i);
468	p->p_pri = p->p_usrpri;
469	if (want_resched) {
470		int s;
471		/*
472		 * Since we are curproc, clock will normally just change
473		 * our priority without moving us from one queue to another
474		 * (since the running process is not on a queue.)
475		 * If that happened after we setrq ourselves but before we
476		 * swtch()'ed, we might not be on the queue indicated by
477		 * our priority.
478		 */
479		s = splclock();
480		setrq(p);
481		p->p_stats->p_ru.ru_nivcsw++;
482		swtch();
483		splx(s);
484		while (i = CURSIG(p))
485			psig(i);
486	}
487	if (p->p_stats->p_prof.pr_scale) {
488		int ticks;
489		struct timeval *tv = &p->p_stime;
490
491		ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
492			(tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
493		if (ticks) {
494#ifdef PROFTIMER
495			extern int profscale;
496			addupc(frame.tf_eip, &p->p_stats->p_prof,
497			    ticks * profscale);
498#else
499			addupc(frame.tf_eip, &p->p_stats->p_prof, ticks);
500#endif
501		}
502	}
503	curpri = p->p_pri;
504}
505
506/*
507 * Compensate for 386 brain damage (missing URKR).
508 * This is a little simpler than the pagefault handler in trap() because
509 * it the page tables have already been faulted in and high addresses
510 * are thrown out early for other reasons.
511 */
512int trapwrite(addr)
513	unsigned addr;
514{
515	struct proc *p;
516	vm_offset_t va, v;
517	struct vmspace *vm;
518	int oldflags;
519	int rv;
520
521	va = trunc_page((vm_offset_t)addr);
522	/*
523	 * XXX - MAX is END.  Changed > to >= for temp. fix.
524	 */
525	if (va >= VM_MAXUSER_ADDRESS)
526		return (1);
527
528	p = curproc;
529	vm = p->p_vmspace;
530
531	oldflags = p->p_flag;
532	p->p_flag |= SLOCK;
533
534	if ((caddr_t)va >= vm->vm_maxsaddr
535	    && (caddr_t)va < (caddr_t)USRSTACK) {
536		if (!grow(p, va)) {
537			p->p_flag &= ~SLOCK;
538			p->p_flag |= (oldflags & SLOCK);
539			return (1);
540		}
541	}
542
543	v = trunc_page(vtopte(va));
544
545	/*
546	 * wire the pte page
547	 */
548	if (va < USRSTACK) {
549		vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE);
550	}
551
552	/*
553	 * fault the data page
554	 */
555	rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE);
556
557	/*
558	 * unwire the pte page
559	 */
560	if (va < USRSTACK) {
561		vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE);
562	}
563
564	p->p_flag &= ~SLOCK;
565	p->p_flag |= (oldflags & SLOCK);
566
567	if (rv != KERN_SUCCESS)
568		return 1;
569
570	return (0);
571}
572
573/*
574 * syscall(frame):
575 *	System call request from POSIX system call gate interface to kernel.
576 * Like trap(), argument is call by reference.
577 */
578/*ARGSUSED*/
579void
580syscall(frame)
581	volatile struct trapframe frame;
582{
583	register int *locr0 = ((int *)&frame);
584	register caddr_t params;
585	register int i;
586	register struct sysent *callp;
587	register struct proc *p = curproc;
588	struct timeval syst;
589	int error, opc;
590	int args[8], rval[2];
591	int code;
592
593#ifdef lint
594	r0 = 0; r0 = r0; r1 = 0; r1 = r1;
595#endif
596	syst = p->p_stime;
597	if (ISPL(frame.tf_cs) != SEL_UPL)
598		panic("syscall");
599
600	code = frame.tf_eax;
601	p->p_regs = (int *)&frame;
602	params = (caddr_t)frame.tf_esp + sizeof (int) ;
603
604	/*
605	 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always.
606	 */
607	opc = frame.tf_eip - 7;
608	if (code == 0) {
609		code = fuword(params);
610		params += sizeof (int);
611	}
612	if (code < 0 || code >= nsysent)
613		callp = &sysent[0];
614	else
615		callp = &sysent[code];
616
617	if ((i = callp->sy_narg * sizeof (int)) &&
618	    (error = copyin(params, (caddr_t)args, (u_int)i))) {
619		frame.tf_eax = error;
620		frame.tf_eflags |= PSL_C;	/* carry bit */
621#ifdef KTRACE
622		if (KTRPOINT(p, KTR_SYSCALL))
623			ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
624#endif
625		goto done;
626	}
627#ifdef KTRACE
628	if (KTRPOINT(p, KTR_SYSCALL))
629		ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
630#endif
631	rval[0] = 0;
632	rval[1] = frame.tf_edx;
633/*pg("%d. s %d\n", p->p_pid, code);*/
634	error = (*callp->sy_call)(p, args, rval);
635	if (error == ERESTART)
636		frame.tf_eip = opc;
637	else if (error != EJUSTRETURN) {
638		if (error) {
639/*pg("error %d", error);*/
640			frame.tf_eax = error;
641			frame.tf_eflags |= PSL_C;	/* carry bit */
642		} else {
643			frame.tf_eax = rval[0];
644			frame.tf_edx = rval[1];
645			frame.tf_eflags &= ~PSL_C;	/* carry bit */
646		}
647	}
648	/* else if (error == EJUSTRETURN) */
649		/* nothing to do */
650done:
651	/*
652	 * Reinitialize proc pointer `p' as it may be different
653	 * if this is a child returning from fork syscall.
654	 */
655	p = curproc;
656	while (i = CURSIG(p))
657		psig(i);
658	p->p_pri = p->p_usrpri;
659	if (want_resched) {
660		int s;
661		/*
662		 * Since we are curproc, clock will normally just change
663		 * our priority without moving us from one queue to another
664		 * (since the running process is not on a queue.)
665		 * If that happened after we setrq ourselves but before we
666		 * swtch()'ed, we might not be on the queue indicated by
667		 * our priority.
668		 */
669		s = splclock();
670		setrq(p);
671		p->p_stats->p_ru.ru_nivcsw++;
672		swtch();
673		splx(s);
674		while (i = CURSIG(p))
675			psig(i);
676	}
677	if (p->p_stats->p_prof.pr_scale) {
678		int ticks;
679		struct timeval *tv = &p->p_stime;
680
681		ticks = ((tv->tv_sec - syst.tv_sec) * 1000 +
682			(tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000);
683		if (ticks) {
684#ifdef PROFTIMER
685			extern int profscale;
686			addupc(frame.tf_eip, &p->p_stats->p_prof,
687			    ticks * profscale);
688#else
689			addupc(frame.tf_eip, &p->p_stats->p_prof, ticks);
690#endif
691		}
692	}
693	curpri = p->p_pri;
694#ifdef KTRACE
695	if (KTRPOINT(p, KTR_SYSRET))
696		ktrsysret(p->p_tracep, code, error, rval[0]);
697#endif
698#ifdef	DIAGNOSTICx
699{ extern int _udatasel, _ucodesel;
700	if (frame.tf_ss != _udatasel)
701		printf("ss %x call %d\n", frame.tf_ss, code);
702	if ((frame.tf_cs&0xffff) != _ucodesel)
703		printf("cs %x call %d\n", frame.tf_cs, code);
704	if (frame.tf_eip > VM_MAXUSER_ADDRESS) {
705		printf("eip %x call %d\n", frame.tf_eip, code);
706		frame.tf_eip = 0;
707	}
708}
709#endif
710}
711