subr_syscall.c revision 31544
12490Sjkh/*-
251862Sdcs * Copyright (C) 1994, David Greenman
32490Sjkh * Copyright (c) 1990, 1993
460992Shoek *	The Regents of the University of California.  All rights reserved.
560992Shoek *
62490Sjkh * This code is derived from software contributed to Berkeley by
72490Sjkh * the University of Utah, and William Jolitz.
852571Sjkh *
952571Sjkh * Redistribution and use in source and binary forms, with or without
1052571Sjkh * modification, are permitted provided that the following conditions
1152571Sjkh * are met:
1252571Sjkh * 1. Redistributions of source code must retain the above copyright
1340478Sbde *    notice, this list of conditions and the following disclaimer.
1440478Sbde * 2. Redistributions in binary form must reproduce the above copyright
152490Sjkh *    notice, this list of conditions and the following disclaimer in the
1640478Sbde *    documentation and/or other materials provided with the distribution.
1760992Shoek * 3. All advertising materials mentioning features or use of this software
1860992Shoek *    must display the following acknowledgement:
192490Sjkh *	This product includes software developed by the University of
202490Sjkh *	California, Berkeley and its contributors.
2140478Sbde * 4. Neither the name of the University nor the names of its contributors
222490Sjkh *    may be used to endorse or promote products derived from this software
232490Sjkh *    without specific prior written permission.
242490Sjkh *
252490Sjkh * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2640478Sbde * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2715930Sache * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2840478Sbde * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2922449Swosch * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3040478Sbde * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3122449Swosch * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3222449Swosch * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
332490Sjkh * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3460992Shoek * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3540478Sbde * SUCH DAMAGE.
3652888Sjoerg *
3752586Sdcs *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
3840478Sbde *	$Id: trap.c,v 1.117 1997/12/04 14:35:40 jkh Exp $
392490Sjkh */
4039479Sphk
4151909Sdcs/*
4252586Sdcs * 386 Trap and System call handling
432490Sjkh */
4439479Sphk
4555059Smarcel#include "opt_cpu.h"
462490Sjkh#include "opt_ddb.h"
472490Sjkh#include "opt_ktrace.h"
48#include "opt_vm86.h"
49
50#include <sys/param.h>
51#include <sys/systm.h>
52#include <sys/proc.h>
53#include <sys/kernel.h>
54#include <sys/resourcevar.h>
55#include <sys/signalvar.h>
56#include <sys/syscall.h>
57#include <sys/sysent.h>
58#include <sys/vmmeter.h>
59#ifdef KTRACE
60#include <sys/ktrace.h>
61#endif
62
63#include <vm/vm.h>
64#include <vm/vm_param.h>
65#include <vm/vm_prot.h>
66#include <sys/lock.h>
67#include <vm/pmap.h>
68#include <vm/vm_kern.h>
69#include <vm/vm_map.h>
70#include <vm/vm_page.h>
71#include <vm/vm_extern.h>
72
73#include <machine/cpu.h>
74#include <machine/ipl.h>
75#include <machine/md_var.h>
76#include <machine/pcb.h>
77#ifdef SMP
78#include <machine/smp.h>
79#endif
80#include <machine/tss.h>
81
82#include <i386/isa/intr_machdep.h>
83
84#ifdef POWERFAIL_NMI
85#include <sys/syslog.h>
86#include <machine/clock.h>
87#endif
88
89#ifdef VM86
90#include <machine/vm86.h>
91#endif
92
93#include "isa.h"
94#include "npx.h"
95
96extern struct i386tss common_tss;
97
98int (*pmath_emulate) __P((struct trapframe *));
99
100extern void trap __P((struct trapframe frame));
101extern int trapwrite __P((unsigned addr));
102extern void syscall __P((struct trapframe frame));
103
104static int trap_pfault __P((struct trapframe *, int));
105static void trap_fatal __P((struct trapframe *));
106void dblfault_handler __P((void));
107
108extern inthand_t IDTVEC(syscall);
109
110#define MAX_TRAP_MSG		28
111static char *trap_msg[] = {
112	"",					/*  0 unused */
113	"privileged instruction fault",		/*  1 T_PRIVINFLT */
114	"",					/*  2 unused */
115	"breakpoint instruction fault",		/*  3 T_BPTFLT */
116	"",					/*  4 unused */
117	"",					/*  5 unused */
118	"arithmetic trap",			/*  6 T_ARITHTRAP */
119	"system forced exception",		/*  7 T_ASTFLT */
120	"",					/*  8 unused */
121	"general protection fault",		/*  9 T_PROTFLT */
122	"trace trap",				/* 10 T_TRCTRAP */
123	"",					/* 11 unused */
124	"page fault",				/* 12 T_PAGEFLT */
125	"",					/* 13 unused */
126	"alignment fault",			/* 14 T_ALIGNFLT */
127	"",					/* 15 unused */
128	"",					/* 16 unused */
129	"",					/* 17 unused */
130	"integer divide fault",			/* 18 T_DIVIDE */
131	"non-maskable interrupt trap",		/* 19 T_NMI */
132	"overflow trap",			/* 20 T_OFLOW */
133	"FPU bounds check fault",		/* 21 T_BOUND */
134	"FPU device not available",		/* 22 T_DNA */
135	"double fault",				/* 23 T_DOUBLEFLT */
136	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
137	"invalid TSS fault",			/* 25 T_TSSFLT */
138	"segment not present fault",		/* 26 T_SEGNPFLT */
139	"stack fault",				/* 27 T_STKFLT */
140	"machine check trap",			/* 28 T_MCHK */
141};
142
143static void userret __P((struct proc *p, struct trapframe *frame,
144			 u_quad_t oticks));
145
146#if defined(I586_CPU) && !defined(NO_F00F_HACK)
147extern struct gate_descriptor *t_idt;
148extern int has_f00f_bug;
149#endif
150
151static inline void
152userret(p, frame, oticks)
153	struct proc *p;
154	struct trapframe *frame;
155	u_quad_t oticks;
156{
157	int sig, s;
158
159	while ((sig = CURSIG(p)) != 0)
160		postsig(sig);
161
162#if 0
163	if (!want_resched &&
164		(p->p_priority <= p->p_usrpri) &&
165		(p->p_rtprio.type == RTP_PRIO_NORMAL)) {
166		 int newpriority;
167		 p->p_estcpu += 1;
168		 newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
169		 newpriority = min(newpriority, MAXPRI);
170		 p->p_usrpri = newpriority;
171	}
172#endif
173
174	p->p_priority = p->p_usrpri;
175	if (want_resched) {
176		/*
177		 * Since we are curproc, clock will normally just change
178		 * our priority without moving us from one queue to another
179		 * (since the running process is not on a queue.)
180		 * If that happened after we setrunqueue ourselves but before we
181		 * mi_switch()'ed, we might not be on the queue indicated by
182		 * our priority.
183		 */
184		s = splhigh();
185		setrunqueue(p);
186		p->p_stats->p_ru.ru_nivcsw++;
187		mi_switch();
188		splx(s);
189		while ((sig = CURSIG(p)) != 0)
190			postsig(sig);
191	}
192	/*
193	 * Charge system time if profiling.
194	 */
195	if (p->p_flag & P_PROFIL)
196		addupc_task(p, frame->tf_eip,
197			    (u_int)(p->p_sticks - oticks) * psratio);
198
199	curpriority = p->p_priority;
200}
201
202/*
203 * Exception, fault, and trap interface to the FreeBSD kernel.
204 * This common code is called from assembly language IDT gate entry
205 * routines that prepare a suitable stack frame, and restore this
206 * frame after the exception has been processed.
207 */
208
209void
210trap(frame)
211	struct trapframe frame;
212{
213	struct proc *p = curproc;
214	u_quad_t sticks = 0;
215	int i = 0, ucode = 0, type, code;
216#ifdef DEBUG
217	u_long eva;
218#endif
219
220#if defined(I586_CPU) && !defined(NO_F00F_HACK)
221restart:
222#endif
223	type = frame.tf_trapno;
224	code = frame.tf_err;
225
226        if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) {
227		/* user trap */
228
229		sticks = p->p_sticks;
230		p->p_md.md_regs = &frame;
231
232		switch (type) {
233		case T_PRIVINFLT:	/* privileged instruction fault */
234			ucode = type;
235			i = SIGILL;
236			break;
237
238		case T_BPTFLT:		/* bpt instruction fault */
239		case T_TRCTRAP:		/* trace trap */
240			frame.tf_eflags &= ~PSL_T;
241			i = SIGTRAP;
242			break;
243
244		case T_ARITHTRAP:	/* arithmetic trap */
245			ucode = code;
246			i = SIGFPE;
247			break;
248
249		case T_ASTFLT:		/* Allow process switch */
250			astoff();
251			cnt.v_soft++;
252			if (p->p_flag & P_OWEUPC) {
253				p->p_flag &= ~P_OWEUPC;
254				addupc_task(p, p->p_stats->p_prof.pr_addr,
255					    p->p_stats->p_prof.pr_ticks);
256			}
257			goto out;
258
259			/*
260			 * The following two traps can happen in
261			 * vm86 mode, and, if so, we want to handle
262			 * them specially.
263			 */
264		case T_PROTFLT:		/* general protection fault */
265		case T_STKFLT:		/* stack fault */
266#ifdef VM86
267			if (frame.tf_eflags & PSL_VM) {
268				i = vm86_emulate((struct vm86frame *)&frame);
269				if (i == 0)
270					goto out;
271				break;
272			}
273#endif /* VM86 */
274			/* FALL THROUGH */
275
276		case T_SEGNPFLT:	/* segment not present fault */
277		case T_TSSFLT:		/* invalid TSS fault */
278		case T_DOUBLEFLT:	/* double fault */
279		default:
280			ucode = code + BUS_SEGM_FAULT ;
281			i = SIGBUS;
282			break;
283
284		case T_PAGEFLT:		/* page fault */
285			i = trap_pfault(&frame, TRUE);
286			if (i == -1)
287				return;
288#if defined(I586_CPU) && !defined(NO_F00F_HACK)
289			if (i == -2)
290				goto restart;
291#endif
292			if (i == 0)
293				goto out;
294
295			ucode = T_PAGEFLT;
296			break;
297
298		case T_DIVIDE:		/* integer divide fault */
299			ucode = FPE_INTDIV_TRAP;
300			i = SIGFPE;
301			break;
302
303#if NISA > 0
304		case T_NMI:
305#ifdef POWERFAIL_NMI
306			goto handle_powerfail;
307#else /* !POWERFAIL_NMI */
308#ifdef DDB
309			/* NMI can be hooked up to a pushbutton for debugging */
310			printf ("NMI ... going to debugger\n");
311			if (kdb_trap (type, 0, &frame))
312				return;
313#endif /* DDB */
314			/* machine/parity/power fail/"kitchen sink" faults */
315			if (isa_nmi(code) == 0) return;
316			panic("NMI indicates hardware failure");
317#endif /* POWERFAIL_NMI */
318#endif /* NISA > 0 */
319
320		case T_OFLOW:		/* integer overflow fault */
321			ucode = FPE_INTOVF_TRAP;
322			i = SIGFPE;
323			break;
324
325		case T_BOUND:		/* bounds check fault */
326			ucode = FPE_SUBRNG_TRAP;
327			i = SIGFPE;
328			break;
329
330		case T_DNA:
331#if NNPX > 0
332			/* if a transparent fault (due to context switch "late") */
333			if (npxdna())
334				return;
335#endif
336			if (!pmath_emulate) {
337				i = SIGFPE;
338				ucode = FPE_FPU_NP_TRAP;
339				break;
340			}
341			i = (*pmath_emulate)(&frame);
342			if (i == 0) {
343				if (!(frame.tf_eflags & PSL_T))
344					return;
345				frame.tf_eflags &= ~PSL_T;
346				i = SIGTRAP;
347			}
348			/* else ucode = emulator_only_knows() XXX */
349			break;
350
351		case T_FPOPFLT:		/* FPU operand fetch fault */
352			ucode = T_FPOPFLT;
353			i = SIGILL;
354			break;
355		}
356	} else {
357		/* kernel trap */
358
359		switch (type) {
360		case T_PAGEFLT:			/* page fault */
361			(void) trap_pfault(&frame, FALSE);
362			return;
363
364		case T_DNA:
365#if NNPX > 0
366			/*
367			 * The kernel is apparently using npx for copying.
368			 * XXX this should be fatal unless the kernel has
369			 * registered such use.
370			 */
371			if (npxdna())
372				return;
373#endif
374			break;
375
376		case T_PROTFLT:		/* general protection fault */
377		case T_SEGNPFLT:	/* segment not present fault */
378			/*
379			 * Invalid segment selectors and out of bounds
380			 * %eip's and %esp's can be set up in user mode.
381			 * This causes a fault in kernel mode when the
382			 * kernel tries to return to user mode.  We want
383			 * to get this fault so that we can fix the
384			 * problem here and not have to check all the
385			 * selectors and pointers when the user changes
386			 * them.
387			 */
388#define	MAYBE_DORETI_FAULT(where, whereto)				\
389	do {								\
390		if (frame.tf_eip == (int)where) {			\
391			frame.tf_eip = (int)whereto;			\
392			return;						\
393		}							\
394	} while (0)
395
396			if (intr_nesting_level == 0) {
397				/*
398				 * Invalid %fs's and %gs's can be created using
399				 * procfs or PT_SETREGS or by invalidating the
400				 * underlying LDT entry.  This causes a fault
401				 * in kernel mode when the kernel attempts to
402				 * switch contexts.  Lose the bad context
403				 * (XXX) so that we can continue, and generate
404				 * a signal.
405				 */
406				if (frame.tf_eip == (int)cpu_switch_load_fs) {
407					curpcb->pcb_fs = 0;
408					psignal(p, SIGBUS);
409					return;
410				}
411				if (frame.tf_eip == (int)cpu_switch_load_gs) {
412					curpcb->pcb_gs = 0;
413					psignal(p, SIGBUS);
414					return;
415				}
416				MAYBE_DORETI_FAULT(doreti_iret,
417						   doreti_iret_fault);
418				MAYBE_DORETI_FAULT(doreti_popl_ds,
419						   doreti_popl_ds_fault);
420				MAYBE_DORETI_FAULT(doreti_popl_es,
421						   doreti_popl_es_fault);
422				if (curpcb && curpcb->pcb_onfault) {
423					frame.tf_eip = (int)curpcb->pcb_onfault;
424					return;
425				}
426			}
427			break;
428
429		case T_TSSFLT:
430			/*
431			 * PSL_NT can be set in user mode and isn't cleared
432			 * automatically when the kernel is entered.  This
433			 * causes a TSS fault when the kernel attempts to
434			 * `iret' because the TSS link is uninitialized.  We
435			 * want to get this fault so that we can fix the
436			 * problem here and not every time the kernel is
437			 * entered.
438			 */
439			if (frame.tf_eflags & PSL_NT) {
440				frame.tf_eflags &= ~PSL_NT;
441				return;
442			}
443			break;
444
445		case T_TRCTRAP:	 /* trace trap */
446			if (frame.tf_eip == (int)IDTVEC(syscall)) {
447				/*
448				 * We've just entered system mode via the
449				 * syscall lcall.  Continue single stepping
450				 * silently until the syscall handler has
451				 * saved the flags.
452				 */
453				return;
454			}
455			if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
456				/*
457				 * The syscall handler has now saved the
458				 * flags.  Stop single stepping it.
459				 */
460				frame.tf_eflags &= ~PSL_T;
461				return;
462			}
463			/*
464			 * Fall through.
465			 */
466		case T_BPTFLT:
467			/*
468			 * If DDB is enabled, let it handle the debugger trap.
469			 * Otherwise, debugger traps "can't happen".
470			 */
471#ifdef DDB
472			if (kdb_trap (type, 0, &frame))
473				return;
474#endif
475			break;
476
477#if NISA > 0
478		case T_NMI:
479#ifdef POWERFAIL_NMI
480#ifndef TIMER_FREQ
481#  define TIMER_FREQ 1193182
482#endif
483	handle_powerfail:
484		{
485		  static unsigned lastalert = 0;
486
487		  if(time.tv_sec - lastalert > 10)
488		    {
489		      log(LOG_WARNING, "NMI: power fail\n");
490		      sysbeep(TIMER_FREQ/880, hz);
491		      lastalert = time.tv_sec;
492		    }
493		  return;
494		}
495#else /* !POWERFAIL_NMI */
496#ifdef DDB
497			/* NMI can be hooked up to a pushbutton for debugging */
498			printf ("NMI ... going to debugger\n");
499			if (kdb_trap (type, 0, &frame))
500				return;
501#endif /* DDB */
502			/* machine/parity/power fail/"kitchen sink" faults */
503			if (isa_nmi(code) == 0) return;
504			/* FALL THROUGH */
505#endif /* POWERFAIL_NMI */
506#endif /* NISA > 0 */
507		}
508
509		trap_fatal(&frame);
510		return;
511	}
512
513	trapsignal(p, i, ucode);
514
515#ifdef DEBUG
516	eva = rcr2();
517	if (type <= MAX_TRAP_MSG) {
518		uprintf("fatal process exception: %s",
519			trap_msg[type]);
520		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
521			uprintf(", fault VA = 0x%x", eva);
522		uprintf("\n");
523	}
524#endif
525
526out:
527	userret(p, &frame, sticks);
528}
529
530#ifdef notyet
531/*
532 * This version doesn't allow a page fault to user space while
533 * in the kernel. The rest of the kernel needs to be made "safe"
534 * before this can be used. I think the only things remaining
535 * to be made safe are the iBCS2 code and the process tracing/
536 * debugging code.
537 */
538static int
539trap_pfault(frame, usermode)
540	struct trapframe *frame;
541	int usermode;
542{
543	vm_offset_t va;
544	struct vmspace *vm = NULL;
545	vm_map_t map = 0;
546	int rv = 0;
547	vm_prot_t ftype;
548	int eva;
549	struct proc *p = curproc;
550
551	if (frame->tf_err & PGEX_W)
552		ftype = VM_PROT_READ | VM_PROT_WRITE;
553	else
554		ftype = VM_PROT_READ;
555
556	eva = rcr2();
557	va = trunc_page((vm_offset_t)eva);
558
559	if (va < VM_MIN_KERNEL_ADDRESS) {
560		vm_offset_t v;
561		vm_page_t mpte;
562
563		if (p == NULL ||
564		    (!usermode && va < VM_MAXUSER_ADDRESS &&
565		     (intr_nesting_level != 0 || curpcb == NULL ||
566		      curpcb->pcb_onfault == NULL))) {
567			trap_fatal(frame);
568			return (-1);
569		}
570
571		/*
572		 * This is a fault on non-kernel virtual memory.
573		 * vm is initialized above to NULL. If curproc is NULL
574		 * or curproc->p_vmspace is NULL the fault is fatal.
575		 */
576		vm = p->p_vmspace;
577		if (vm == NULL)
578			goto nogo;
579
580		map = &vm->vm_map;
581
582		/*
583		 * Keep swapout from messing with us during this
584		 *	critical time.
585		 */
586		++p->p_lock;
587
588		/*
589		 * Grow the stack if necessary
590		 */
591		if ((caddr_t)va > vm->vm_maxsaddr
592		    && (caddr_t)va < (caddr_t)USRSTACK) {
593			if (!grow(p, va)) {
594				rv = KERN_FAILURE;
595				--p->p_lock;
596				goto nogo;
597			}
598		}
599
600		/* Fault in the user page: */
601		rv = vm_fault(map, va, ftype,
602			(ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0);
603
604		--p->p_lock;
605	} else {
606		/*
607		 * Don't allow user-mode faults in kernel address space.
608		 */
609		if (usermode)
610			goto nogo;
611
612		/*
613		 * Since we know that kernel virtual address addresses
614		 * always have pte pages mapped, we just have to fault
615		 * the page.
616		 */
617		rv = vm_fault(kernel_map, va, ftype, FALSE);
618	}
619
620	if (rv == KERN_SUCCESS)
621		return (0);
622nogo:
623	if (!usermode) {
624		if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) {
625			frame->tf_eip = (int)curpcb->pcb_onfault;
626			return (0);
627		}
628		trap_fatal(frame);
629		return (-1);
630	}
631
632	/* kludge to pass faulting virtual address to sendsig */
633	frame->tf_err = eva;
634
635	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
636}
637#endif
638
639int
640trap_pfault(frame, usermode)
641	struct trapframe *frame;
642	int usermode;
643{
644	vm_offset_t va;
645	struct vmspace *vm = NULL;
646	vm_map_t map = 0;
647	int rv = 0;
648	vm_prot_t ftype;
649	int eva;
650	struct proc *p = curproc;
651
652	eva = rcr2();
653	va = trunc_page((vm_offset_t)eva);
654
655	if (va >= KERNBASE) {
656		/*
657		 * Don't allow user-mode faults in kernel address space.
658		 * An exception:  if the faulting address is the invalid
659		 * instruction entry in the IDT, then the Intel Pentium
660		 * F00F bug workaround was triggered, and we need to
661		 * treat it is as an illegal instruction, and not a page
662		 * fault.
663		 */
664#if defined(I586_CPU) && !defined(NO_F00F_HACK)
665		if ((eva == (unsigned int)&t_idt[6]) && has_f00f_bug) {
666			frame->tf_trapno = T_PRIVINFLT;
667			return -2;
668		}
669#endif
670		if (usermode)
671			goto nogo;
672
673		map = kernel_map;
674	} else {
675		/*
676		 * This is a fault on non-kernel virtual memory.
677		 * vm is initialized above to NULL. If curproc is NULL
678		 * or curproc->p_vmspace is NULL the fault is fatal.
679		 */
680		if (p != NULL)
681			vm = p->p_vmspace;
682
683		if (vm == NULL)
684			goto nogo;
685
686		map = &vm->vm_map;
687	}
688
689	if (frame->tf_err & PGEX_W)
690		ftype = VM_PROT_READ | VM_PROT_WRITE;
691	else
692		ftype = VM_PROT_READ;
693
694	if (map != kernel_map) {
695		/*
696		 * Keep swapout from messing with us during this
697		 *	critical time.
698		 */
699		++p->p_lock;
700
701		/*
702		 * Grow the stack if necessary
703		 */
704		if ((caddr_t)va > vm->vm_maxsaddr
705		    && (caddr_t)va < (caddr_t)USRSTACK) {
706			if (!grow(p, va)) {
707				rv = KERN_FAILURE;
708				--p->p_lock;
709				goto nogo;
710			}
711		}
712
713		/* Fault in the user page: */
714		rv = vm_fault(map, va, ftype,
715			(ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0);
716
717		--p->p_lock;
718	} else {
719		/*
720		 * Don't have to worry about process locking or stacks in the kernel.
721		 */
722		rv = vm_fault(map, va, ftype, FALSE);
723	}
724
725	if (rv == KERN_SUCCESS)
726		return (0);
727nogo:
728	if (!usermode) {
729		if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) {
730			frame->tf_eip = (int)curpcb->pcb_onfault;
731			return (0);
732		}
733		trap_fatal(frame);
734		return (-1);
735	}
736
737	/* kludge to pass faulting virtual address to sendsig */
738	frame->tf_err = eva;
739
740	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
741}
742
743static void
744trap_fatal(frame)
745	struct trapframe *frame;
746{
747	int code, type, eva, ss, esp;
748	struct soft_segment_descriptor softseg;
749
750	code = frame->tf_err;
751	type = frame->tf_trapno;
752	eva = rcr2();
753	sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
754
755	if (type <= MAX_TRAP_MSG)
756		printf("\n\nFatal trap %d: %s while in %s mode\n",
757			type, trap_msg[type],
758        		frame->tf_eflags & PSL_VM ? "vm86" :
759			ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
760#ifdef SMP
761	/* three seperate prints in case of a trap on an unmapped page */
762	printf("mp_lock = %08x; ", mp_lock);
763	printf("cpuid = %d; ", cpuid);
764	printf("lapic.id = %08x\n", lapic.id);
765#endif
766	if (type == T_PAGEFLT) {
767		printf("fault virtual address	= 0x%x\n", eva);
768		printf("fault code		= %s %s, %s\n",
769			code & PGEX_U ? "user" : "supervisor",
770			code & PGEX_W ? "write" : "read",
771			code & PGEX_P ? "protection violation" : "page not present");
772	}
773	printf("instruction pointer	= 0x%x:0x%x\n",
774	       frame->tf_cs & 0xffff, frame->tf_eip);
775        if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
776		ss = frame->tf_ss & 0xffff;
777		esp = frame->tf_esp;
778	} else {
779		ss = GSEL(GDATA_SEL, SEL_KPL);
780		esp = (int)&frame->tf_esp;
781	}
782	printf("stack pointer	        = 0x%x:0x%x\n", ss, esp);
783	printf("frame pointer	        = 0x%x:0x%x\n", ss, frame->tf_ebp);
784	printf("code segment		= base 0x%x, limit 0x%x, type 0x%x\n",
785	       softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
786	printf("			= DPL %d, pres %d, def32 %d, gran %d\n",
787	       softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
788	       softseg.ssd_gran);
789	printf("processor eflags	= ");
790	if (frame->tf_eflags & PSL_T)
791		printf("trace trap, ");
792	if (frame->tf_eflags & PSL_I)
793		printf("interrupt enabled, ");
794	if (frame->tf_eflags & PSL_NT)
795		printf("nested task, ");
796	if (frame->tf_eflags & PSL_RF)
797		printf("resume, ");
798	if (frame->tf_eflags & PSL_VM)
799		printf("vm86, ");
800	printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
801	printf("current process		= ");
802	if (curproc) {
803		printf("%lu (%s)\n",
804		    (u_long)curproc->p_pid, curproc->p_comm ?
805		    curproc->p_comm : "");
806	} else {
807		printf("Idle\n");
808	}
809	printf("interrupt mask		= ");
810	if ((cpl & net_imask) == net_imask)
811		printf("net ");
812	if ((cpl & tty_imask) == tty_imask)
813		printf("tty ");
814	if ((cpl & bio_imask) == bio_imask)
815		printf("bio ");
816	if ((cpl & cam_imask) == cam_imask)
817		printf("cam ");
818	if (cpl == 0)
819		printf("none");
820#ifdef SMP
821/**
822 *  XXX FIXME:
823 *	we probably SHOULD have stopped the other CPUs before now!
824 *	another CPU COULD have been touching cpl at this moment...
825 */
826	printf(" <- SMP: XXX");
827#endif
828	printf("\n");
829
830#ifdef KDB
831	if (kdb_trap(&psl))
832		return;
833#endif
834#ifdef DDB
835	if (kdb_trap (type, 0, frame))
836		return;
837#endif
838	printf("trap number		= %d\n", type);
839	if (type <= MAX_TRAP_MSG)
840		panic(trap_msg[type]);
841	else
842		panic("unknown/reserved trap");
843}
844
845/*
846 * Double fault handler. Called when a fault occurs while writing
847 * a frame for a trap/exception onto the stack. This usually occurs
848 * when the stack overflows (such is the case with infinite recursion,
849 * for example).
850 *
851 * XXX Note that the current PTD gets replaced by IdlePTD when the
852 * task switch occurs. This means that the stack that was active at
853 * the time of the double fault is not available at <kstack> unless
854 * the machine was idle when the double fault occurred. The downside
855 * of this is that "trace <ebp>" in ddb won't work.
856 */
857void
858dblfault_handler()
859{
860	printf("\nFatal double fault:\n");
861	printf("eip = 0x%x\n", common_tss.tss_eip);
862	printf("esp = 0x%x\n", common_tss.tss_esp);
863	printf("ebp = 0x%x\n", common_tss.tss_ebp);
864#ifdef SMP
865	/* three seperate prints in case of a trap on an unmapped page */
866	printf("mp_lock = %08x; ", mp_lock);
867	printf("cpuid = %d; ", cpuid);
868	printf("lapic.id = %08x\n", lapic.id);
869#endif
870	panic("double fault");
871}
872
873/*
874 * Compensate for 386 brain damage (missing URKR).
875 * This is a little simpler than the pagefault handler in trap() because
876 * it the page tables have already been faulted in and high addresses
877 * are thrown out early for other reasons.
878 */
879int trapwrite(addr)
880	unsigned addr;
881{
882	struct proc *p;
883	vm_offset_t va;
884	struct vmspace *vm;
885	int rv;
886
887	va = trunc_page((vm_offset_t)addr);
888	/*
889	 * XXX - MAX is END.  Changed > to >= for temp. fix.
890	 */
891	if (va >= VM_MAXUSER_ADDRESS)
892		return (1);
893
894	p = curproc;
895	vm = p->p_vmspace;
896
897	++p->p_lock;
898
899	if ((caddr_t)va >= vm->vm_maxsaddr
900	    && (caddr_t)va < (caddr_t)USRSTACK) {
901		if (!grow(p, va)) {
902			--p->p_lock;
903			return (1);
904		}
905	}
906
907	/*
908	 * fault the data page
909	 */
910	rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, VM_FAULT_DIRTY);
911
912	--p->p_lock;
913
914	if (rv != KERN_SUCCESS)
915		return 1;
916
917	return (0);
918}
919
920/*
921 * System call request from POSIX system call gate interface to kernel.
922 * Like trap(), argument is call by reference.
923 */
924void
925syscall(frame)
926	struct trapframe frame;
927{
928	caddr_t params;
929	int i;
930	struct sysent *callp;
931	struct proc *p = curproc;
932	u_quad_t sticks;
933	int error;
934	int args[8];
935	u_int code;
936
937#ifdef DIAGNOSTIC
938	if (ISPL(frame.tf_cs) != SEL_UPL)
939		panic("syscall");
940#endif
941	sticks = p->p_sticks;
942	p->p_md.md_regs = &frame;
943	params = (caddr_t)frame.tf_esp + sizeof(int);
944	code = frame.tf_eax;
945	if (p->p_sysent->sv_prepsyscall) {
946		(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
947	} else {
948		/*
949		 * Need to check if this is a 32 bit or 64 bit syscall.
950		 */
951		if (code == SYS_syscall) {
952			/*
953			 * Code is first argument, followed by actual args.
954			 */
955			code = fuword(params);
956			params += sizeof(int);
957		} else if (code == SYS___syscall) {
958			/*
959			 * Like syscall, but code is a quad, so as to maintain
960			 * quad alignment for the rest of the arguments.
961			 */
962			code = fuword(params);
963			params += sizeof(quad_t);
964		}
965	}
966
967 	if (p->p_sysent->sv_mask)
968 		code &= p->p_sysent->sv_mask;
969
970 	if (code >= p->p_sysent->sv_size)
971 		callp = &p->p_sysent->sv_table[0];
972  	else
973 		callp = &p->p_sysent->sv_table[code];
974
975	if (params && (i = callp->sy_narg * sizeof(int)) &&
976	    (error = copyin(params, (caddr_t)args, (u_int)i))) {
977#ifdef KTRACE
978		if (KTRPOINT(p, KTR_SYSCALL))
979			ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
980#endif
981		goto bad;
982	}
983#ifdef KTRACE
984	if (KTRPOINT(p, KTR_SYSCALL))
985		ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
986#endif
987	p->p_retval[0] = 0;
988	p->p_retval[1] = frame.tf_edx;
989
990	error = (*callp->sy_call)(p, args);
991
992	switch (error) {
993
994	case 0:
995		/*
996		 * Reinitialize proc pointer `p' as it may be different
997		 * if this is a child returning from fork syscall.
998		 */
999		p = curproc;
1000		frame.tf_eax = p->p_retval[0];
1001		frame.tf_edx = p->p_retval[1];
1002		frame.tf_eflags &= ~PSL_C;
1003		break;
1004
1005	case ERESTART:
1006		/*
1007		 * Reconstruct pc, assuming lcall $X,y is 7 bytes,
1008		 * int 0x80 is 2 bytes. We saved this in tf_err.
1009		 */
1010		frame.tf_eip -= frame.tf_err;
1011		break;
1012
1013	case EJUSTRETURN:
1014		break;
1015
1016	default:
1017bad:
1018 		if (p->p_sysent->sv_errsize)
1019 			if (error >= p->p_sysent->sv_errsize)
1020  				error = -1;	/* XXX */
1021   			else
1022  				error = p->p_sysent->sv_errtbl[error];
1023		frame.tf_eax = error;
1024		frame.tf_eflags |= PSL_C;
1025		break;
1026	}
1027
1028	if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
1029		/* Traced syscall. */
1030		frame.tf_eflags &= ~PSL_T;
1031		trapsignal(p, SIGTRAP, 0);
1032	}
1033
1034	userret(p, &frame, sticks);
1035
1036#ifdef KTRACE
1037	if (KTRPOINT(p, KTR_SYSRET))
1038		ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
1039#endif
1040}
1041
1042/*
1043 * Simplified back end of syscall(), used when returning from fork()
1044 * directly into user mode.
1045 */
1046void
1047fork_return(p, frame)
1048	struct proc *p;
1049	struct trapframe frame;
1050{
1051	frame.tf_eax = 0;		/* Child returns zero */
1052	frame.tf_eflags &= ~PSL_C;	/* success */
1053	frame.tf_edx = 1;
1054
1055	userret(p, &frame, 0);
1056#ifdef KTRACE
1057	if (KTRPOINT(p, KTR_SYSRET))
1058		ktrsysret(p->p_tracep, SYS_fork, 0, 0);
1059#endif
1060}
1061