subr_syscall.c revision 31389
14Srgrimes/*- 21690Sdg * Copyright (C) 1994, David Greenman 31690Sdg * Copyright (c) 1990, 1993 41690Sdg * The Regents of the University of California. All rights reserved. 54Srgrimes * 64Srgrimes * This code is derived from software contributed to Berkeley by 74Srgrimes * the University of Utah, and William Jolitz. 84Srgrimes * 94Srgrimes * Redistribution and use in source and binary forms, with or without 104Srgrimes * modification, are permitted provided that the following conditions 114Srgrimes * are met: 124Srgrimes * 1. Redistributions of source code must retain the above copyright 134Srgrimes * notice, this list of conditions and the following disclaimer. 144Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 154Srgrimes * notice, this list of conditions and the following disclaimer in the 164Srgrimes * documentation and/or other materials provided with the distribution. 174Srgrimes * 3. All advertising materials mentioning features or use of this software 184Srgrimes * must display the following acknowledgement: 194Srgrimes * This product includes software developed by the University of 204Srgrimes * California, Berkeley and its contributors. 214Srgrimes * 4. Neither the name of the University nor the names of its contributors 224Srgrimes * may be used to endorse or promote products derived from this software 234Srgrimes * without specific prior written permission. 244Srgrimes * 254Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 264Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 274Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 284Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 294Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 304Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 314Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 324Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 334Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 344Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 354Srgrimes * SUCH DAMAGE. 364Srgrimes * 37608Srgrimes * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 3831389Sbde * $Id: trap.c,v 1.114 1997/11/06 19:28:09 phk Exp $ 394Srgrimes */ 404Srgrimes 414Srgrimes/* 421704Sdg * 386 Trap and System call handling 434Srgrimes */ 444Srgrimes 4513203Swollman#include "opt_ktrace.h" 4613228Swollman#include "opt_ddb.h" 4730265Speter#include "opt_vm86.h" 4813203Swollman 491549Srgrimes#include <sys/param.h> 501549Srgrimes#include <sys/systm.h> 511549Srgrimes#include <sys/proc.h> 521549Srgrimes#include <sys/kernel.h> 5331389Sbde#include <sys/resourcevar.h> 5431389Sbde#include <sys/signalvar.h> 551549Srgrimes#include <sys/syscall.h> 562257Ssos#include <sys/sysent.h> 5712662Sdg#include <sys/vmmeter.h> 584Srgrimes#ifdef KTRACE 591549Srgrimes#include <sys/ktrace.h> 604Srgrimes#endif 614Srgrimes 6212662Sdg#include <vm/vm.h> 631549Srgrimes#include <vm/vm_param.h> 6412662Sdg#include <vm/vm_prot.h> 6522521Sdyson#include <sys/lock.h> 661549Srgrimes#include <vm/pmap.h> 677090Sbde#include <vm/vm_kern.h> 681549Srgrimes#include <vm/vm_map.h> 691549Srgrimes#include <vm/vm_page.h> 7012662Sdg#include <vm/vm_extern.h> 714Srgrimes 721549Srgrimes#include <machine/cpu.h> 7326309Speter#include <machine/ipl.h> 747090Sbde#include <machine/md_var.h> 7531389Sbde#include <machine/pcb.h> 7631389Sbde#ifdef SMP 7725164Speter#include <machine/smp.h> 7831389Sbde#endif 7930275Speter#include <machine/tss.h> 801549Srgrimes 8131389Sbde#include <i386/isa/intr_machdep.h> 8231389Sbde 839545Sjoerg#ifdef POWERFAIL_NMI 8418207Sbde#include <sys/syslog.h> 8518207Sbde#include <machine/clock.h> 869545Sjoerg#endif 879545Sjoerg 8830275Speter#ifdef VM86 8930275Speter#include <machine/vm86.h> 9030275Speter#endif 9130275Speter 921549Srgrimes#include "isa.h" 931549Srgrimes#include "npx.h" 941549Srgrimes 9524925Sbdeextern struct i386tss common_tss; 9624925Sbde 9712817Sphkint (*pmath_emulate) __P((struct trapframe *)); 9812817Sphk 9911343Sbdeextern void trap __P((struct trapframe frame)); 10011343Sbdeextern int trapwrite __P((unsigned addr)); 10111343Sbdeextern void syscall __P((struct trapframe frame)); 10211343Sbde 10312929Sdgstatic int trap_pfault __P((struct trapframe *, int)); 10412929Sdgstatic void trap_fatal __P((struct trapframe *)); 10512929Sdgvoid dblfault_handler __P((void)); 1064Srgrimes 10711163Sjulianextern inthand_t IDTVEC(syscall); 10811163Sjulian 10917521Sdg#define MAX_TRAP_MSG 28 11012702Sphkstatic char *trap_msg[] = { 1115603Sbde "", /* 0 unused */ 112757Sdg "privileged instruction fault", /* 1 T_PRIVINFLT */ 1135603Sbde "", /* 2 unused */ 114757Sdg "breakpoint instruction fault", /* 3 T_BPTFLT */ 115757Sdg "", /* 4 unused */ 1165603Sbde "", /* 5 unused */ 117757Sdg "arithmetic trap", /* 6 T_ARITHTRAP */ 118757Sdg "system forced exception", /* 7 T_ASTFLT */ 1195603Sbde "", /* 8 unused */ 1201690Sdg "general protection fault", /* 9 T_PROTFLT */ 121757Sdg "trace trap", /* 10 T_TRCTRAP */ 122757Sdg "", /* 11 unused */ 123757Sdg "page fault", /* 12 T_PAGEFLT */ 1245603Sbde "", /* 13 unused */ 125757Sdg "alignment fault", /* 14 T_ALIGNFLT */ 1265603Sbde "", /* 15 unused */ 1275603Sbde "", /* 16 unused */ 1285603Sbde "", /* 17 unused */ 129757Sdg "integer divide fault", /* 18 T_DIVIDE */ 130757Sdg "non-maskable interrupt trap", /* 19 T_NMI */ 131757Sdg "overflow trap", /* 20 T_OFLOW */ 132757Sdg "FPU bounds check fault", /* 21 T_BOUND */ 133757Sdg "FPU device not available", /* 22 T_DNA */ 134757Sdg "double fault", /* 23 T_DOUBLEFLT */ 135757Sdg "FPU operand fetch fault", /* 24 T_FPOPFLT */ 136757Sdg "invalid TSS fault", /* 25 T_TSSFLT */ 137757Sdg "segment not present fault", /* 26 T_SEGNPFLT */ 138757Sdg "stack fault", /* 27 T_STKFLT */ 13917521Sdg "machine check trap", /* 28 T_MCHK */ 140757Sdg}; 1414Srgrimes 14211343Sbdestatic void userret __P((struct proc *p, struct trapframe *frame, 14311343Sbde u_quad_t oticks)); 14411343Sbde 1451690Sdgstatic inline void 1461690Sdguserret(p, frame, oticks) 1471690Sdg struct proc *p; 1481690Sdg struct trapframe *frame; 1491690Sdg u_quad_t oticks; 1501690Sdg{ 1511690Sdg int sig, s; 152757Sdg 1533436Sphk while ((sig = CURSIG(p)) != 0) 1541690Sdg postsig(sig); 15528013Sdyson 15628124Sdyson#if 0 15728013Sdyson if (!want_resched && 15828013Sdyson (p->p_priority <= p->p_usrpri) && 15928013Sdyson (p->p_rtprio.type == RTP_PRIO_NORMAL)) { 16028013Sdyson int newpriority; 16128013Sdyson p->p_estcpu += 1; 16228013Sdyson newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice; 16328013Sdyson newpriority = min(newpriority, MAXPRI); 16428013Sdyson p->p_usrpri = newpriority; 16528013Sdyson } 16628013Sdyson#endif 16728013Sdyson 1681690Sdg p->p_priority = p->p_usrpri; 1691690Sdg if (want_resched) { 1701690Sdg /* 1711690Sdg * Since we are curproc, clock will normally just change 1721690Sdg * our priority without moving us from one queue to another 1731690Sdg * (since the running process is not on a queue.) 1741690Sdg * If that happened after we setrunqueue ourselves but before we 1751690Sdg * mi_switch()'ed, we might not be on the queue indicated by 1761690Sdg * our priority. 1771690Sdg */ 17817986Sdg s = splhigh(); 1791690Sdg setrunqueue(p); 1801690Sdg p->p_stats->p_ru.ru_nivcsw++; 1811690Sdg mi_switch(); 1821690Sdg splx(s); 1833436Sphk while ((sig = CURSIG(p)) != 0) 1841690Sdg postsig(sig); 1851690Sdg } 1866296Sdg /* 1876296Sdg * Charge system time if profiling. 1886296Sdg */ 18916725Sbde if (p->p_flag & P_PROFIL) 19016725Sbde addupc_task(p, frame->tf_eip, 19116725Sbde (u_int)(p->p_sticks - oticks) * psratio); 1921690Sdg 1931690Sdg curpriority = p->p_priority; 1941690Sdg} 1951690Sdg 1964Srgrimes/* 19711343Sbde * Exception, fault, and trap interface to the FreeBSD kernel. 1981690Sdg * This common code is called from assembly language IDT gate entry 1994Srgrimes * routines that prepare a suitable stack frame, and restore this 2001690Sdg * frame after the exception has been processed. 2014Srgrimes */ 2024Srgrimes 203798Swollmanvoid 2044Srgrimestrap(frame) 2054Srgrimes struct trapframe frame; 2064Srgrimes{ 2071690Sdg struct proc *p = curproc; 2081549Srgrimes u_quad_t sticks = 0; 2093436Sphk int i = 0, ucode = 0, type, code; 21011872Sphk#ifdef DEBUG 2113744Swollman u_long eva; 2123744Swollman#endif 2134Srgrimes 2144Srgrimes type = frame.tf_trapno; 2151690Sdg code = frame.tf_err; 2168876Srgrimes 21728496Scharnier if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) { 2181690Sdg /* user trap */ 219200Sdg 2201690Sdg sticks = p->p_sticks; 22125555Speter p->p_md.md_regs = &frame; 2224Srgrimes 2231690Sdg switch (type) { 2241690Sdg case T_PRIVINFLT: /* privileged instruction fault */ 2251690Sdg ucode = type; 2261690Sdg i = SIGILL; 2271690Sdg break; 228974Sdg 2291690Sdg case T_BPTFLT: /* bpt instruction fault */ 2301690Sdg case T_TRCTRAP: /* trace trap */ 2311690Sdg frame.tf_eflags &= ~PSL_T; 2321690Sdg i = SIGTRAP; 2331690Sdg break; 234974Sdg 2351690Sdg case T_ARITHTRAP: /* arithmetic trap */ 2361690Sdg ucode = code; 2371690Sdg i = SIGFPE; 2381690Sdg break; 2394Srgrimes 2401690Sdg case T_ASTFLT: /* Allow process switch */ 2411690Sdg astoff(); 2421690Sdg cnt.v_soft++; 2436297Sdg if (p->p_flag & P_OWEUPC) { 2441690Sdg p->p_flag &= ~P_OWEUPC; 24516725Sbde addupc_task(p, p->p_stats->p_prof.pr_addr, 24616725Sbde p->p_stats->p_prof.pr_ticks); 2471690Sdg } 2481690Sdg goto out; 2494Srgrimes 25027993Sdyson /* 25127993Sdyson * The following two traps can happen in 25227993Sdyson * vm86 mode, and, if so, we want to handle 25327993Sdyson * them specially. 25427993Sdyson */ 2551690Sdg case T_PROTFLT: /* general protection fault */ 25627993Sdyson case T_STKFLT: /* stack fault */ 25728872Sjlemon#ifdef VM86 25828872Sjlemon if (frame.tf_eflags & PSL_VM) { 25928872Sjlemon i = vm86_emulate((struct vm86frame *)&frame); 26027993Sdyson if (i == 0) 26127993Sdyson goto out; 26227993Sdyson break; 26327993Sdyson } 26428872Sjlemon#endif /* VM86 */ 26527993Sdyson /* FALL THROUGH */ 26627993Sdyson 2671690Sdg case T_SEGNPFLT: /* segment not present fault */ 2685603Sbde case T_TSSFLT: /* invalid TSS fault */ 2695603Sbde case T_DOUBLEFLT: /* double fault */ 2705603Sbde default: 2711690Sdg ucode = code + BUS_SEGM_FAULT ; 2721690Sdg i = SIGBUS; 2731690Sdg break; 2744Srgrimes 2751690Sdg case T_PAGEFLT: /* page fault */ 2761690Sdg i = trap_pfault(&frame, TRUE); 2774014Sbde if (i == -1) 2784014Sbde return; 2791690Sdg if (i == 0) 2801690Sdg goto out; 2814Srgrimes 2821690Sdg ucode = T_PAGEFLT; 2831690Sdg break; 2844Srgrimes 2851690Sdg case T_DIVIDE: /* integer divide fault */ 2861690Sdg ucode = FPE_INTDIV_TRAP; 2871690Sdg i = SIGFPE; 2881690Sdg break; 2894Srgrimes 2901690Sdg#if NISA > 0 2911690Sdg case T_NMI: 2929545Sjoerg#ifdef POWERFAIL_NMI 2939545Sjoerg goto handle_powerfail; 2949545Sjoerg#else /* !POWERFAIL_NMI */ 2952320Sdg#ifdef DDB 2961690Sdg /* NMI can be hooked up to a pushbutton for debugging */ 2971690Sdg printf ("NMI ... going to debugger\n"); 2981690Sdg if (kdb_trap (type, 0, &frame)) 2991690Sdg return; 3009545Sjoerg#endif /* DDB */ 3011690Sdg /* machine/parity/power fail/"kitchen sink" faults */ 3021690Sdg if (isa_nmi(code) == 0) return; 3032001Swollman panic("NMI indicates hardware failure"); 3049545Sjoerg#endif /* POWERFAIL_NMI */ 3059545Sjoerg#endif /* NISA > 0 */ 3064Srgrimes 3071690Sdg case T_OFLOW: /* integer overflow fault */ 3081690Sdg ucode = FPE_INTOVF_TRAP; 3091690Sdg i = SIGFPE; 3101690Sdg break; 3114Srgrimes 3121690Sdg case T_BOUND: /* bounds check fault */ 3131690Sdg ucode = FPE_SUBRNG_TRAP; 3141690Sdg i = SIGFPE; 3151690Sdg break; 3164Srgrimes 3171690Sdg case T_DNA: 3181690Sdg#if NNPX > 0 3191690Sdg /* if a transparent fault (due to context switch "late") */ 3201690Sdg if (npxdna()) 3211690Sdg return; 32217117Sbde#endif 32312817Sphk if (!pmath_emulate) { 32412817Sphk i = SIGFPE; 32512817Sphk ucode = FPE_FPU_NP_TRAP; 32612817Sphk break; 32712817Sphk } 32812817Sphk i = (*pmath_emulate)(&frame); 3295220Sbde if (i == 0) { 3305220Sbde if (!(frame.tf_eflags & PSL_T)) 3315220Sbde return; 3325220Sbde frame.tf_eflags &= ~PSL_T; 3335220Sbde i = SIGTRAP; 3345220Sbde } 3355220Sbde /* else ucode = emulator_only_knows() XXX */ 3361690Sdg break; 337974Sdg 3381690Sdg case T_FPOPFLT: /* FPU operand fetch fault */ 3391690Sdg ucode = T_FPOPFLT; 3401690Sdg i = SIGILL; 3411690Sdg break; 342974Sdg } 3431690Sdg } else { 3441690Sdg /* kernel trap */ 345974Sdg 3461690Sdg switch (type) { 3471690Sdg case T_PAGEFLT: /* page fault */ 3481690Sdg (void) trap_pfault(&frame, FALSE); 3491690Sdg return; 3504Srgrimes 35116344Sasami case T_DNA: 35216344Sasami#if NNPX > 0 35317117Sbde /* 35417117Sbde * The kernel is apparently using npx for copying. 35517117Sbde * XXX this should be fatal unless the kernel has 35617117Sbde * registered such use. 35717117Sbde */ 35816344Sasami if (npxdna()) 35916344Sasami return; 36017117Sbde#endif 36116344Sasami break; 36216344Sasami 3631690Sdg case T_PROTFLT: /* general protection fault */ 3641690Sdg case T_SEGNPFLT: /* segment not present fault */ 3655603Sbde /* 3665603Sbde * Invalid segment selectors and out of bounds 3675603Sbde * %eip's and %esp's can be set up in user mode. 3685603Sbde * This causes a fault in kernel mode when the 3695603Sbde * kernel tries to return to user mode. We want 3705603Sbde * to get this fault so that we can fix the 3715603Sbde * problem here and not have to check all the 3725603Sbde * selectors and pointers when the user changes 3735603Sbde * them. 3745603Sbde */ 3755603Sbde#define MAYBE_DORETI_FAULT(where, whereto) \ 3765603Sbde do { \ 3775603Sbde if (frame.tf_eip == (int)where) { \ 3785603Sbde frame.tf_eip = (int)whereto; \ 3795603Sbde return; \ 3805603Sbde } \ 3815603Sbde } while (0) 3825603Sbde 3835603Sbde if (intr_nesting_level == 0) { 38426494Sbde /* 38526494Sbde * Invalid %fs's and %gs's can be created using 38626494Sbde * procfs or PT_SETREGS or by invalidating the 38726494Sbde * underlying LDT entry. This causes a fault 38826494Sbde * in kernel mode when the kernel attempts to 38926494Sbde * switch contexts. Lose the bad context 39026494Sbde * (XXX) so that we can continue, and generate 39126494Sbde * a signal. 39226494Sbde */ 39326494Sbde if (frame.tf_eip == (int)cpu_switch_load_fs) { 39426494Sbde curpcb->pcb_fs = 0; 39526494Sbde psignal(p, SIGBUS); 39626494Sbde return; 39726494Sbde } 39826494Sbde if (frame.tf_eip == (int)cpu_switch_load_gs) { 39926494Sbde curpcb->pcb_gs = 0; 40026494Sbde psignal(p, SIGBUS); 40126494Sbde return; 40226494Sbde } 4035603Sbde MAYBE_DORETI_FAULT(doreti_iret, 4045603Sbde doreti_iret_fault); 4055603Sbde MAYBE_DORETI_FAULT(doreti_popl_ds, 4065603Sbde doreti_popl_ds_fault); 4075603Sbde MAYBE_DORETI_FAULT(doreti_popl_es, 4085603Sbde doreti_popl_es_fault); 40920651Sbde if (curpcb && curpcb->pcb_onfault) { 41020651Sbde frame.tf_eip = (int)curpcb->pcb_onfault; 41120651Sbde return; 41220651Sbde } 4135603Sbde } 4141690Sdg break; 4154Srgrimes 4165603Sbde case T_TSSFLT: 4175603Sbde /* 4185603Sbde * PSL_NT can be set in user mode and isn't cleared 4195603Sbde * automatically when the kernel is entered. This 4205603Sbde * causes a TSS fault when the kernel attempts to 4215603Sbde * `iret' because the TSS link is uninitialized. We 4225603Sbde * want to get this fault so that we can fix the 4235603Sbde * problem here and not every time the kernel is 4245603Sbde * entered. 4255603Sbde */ 4265603Sbde if (frame.tf_eflags & PSL_NT) { 4275603Sbde frame.tf_eflags &= ~PSL_NT; 4285603Sbde return; 4295603Sbde } 4305603Sbde break; 4315603Sbde 43211343Sbde case T_TRCTRAP: /* trace trap */ 43311343Sbde if (frame.tf_eip == (int)IDTVEC(syscall)) { 43411343Sbde /* 43511343Sbde * We've just entered system mode via the 43611343Sbde * syscall lcall. Continue single stepping 43711343Sbde * silently until the syscall handler has 43811343Sbde * saved the flags. 43911343Sbde */ 44011343Sbde return; 44111343Sbde } 44211343Sbde if (frame.tf_eip == (int)IDTVEC(syscall) + 1) { 44311343Sbde /* 44411343Sbde * The syscall handler has now saved the 44511343Sbde * flags. Stop single stepping it. 44611343Sbde */ 44711343Sbde frame.tf_eflags &= ~PSL_T; 44811343Sbde return; 44911343Sbde } 45011343Sbde /* 45111343Sbde * Fall through. 45211343Sbde */ 45311343Sbde case T_BPTFLT: 45411343Sbde /* 45511343Sbde * If DDB is enabled, let it handle the debugger trap. 45611343Sbde * Otherwise, debugger traps "can't happen". 45711343Sbde */ 4582320Sdg#ifdef DDB 4591690Sdg if (kdb_trap (type, 0, &frame)) 4604Srgrimes return; 46111343Sbde#endif 4621690Sdg break; 4638876Srgrimes 4641072Sdg#if NISA > 0 4651690Sdg case T_NMI: 4669545Sjoerg#ifdef POWERFAIL_NMI 4679545Sjoerg#ifndef TIMER_FREQ 4689545Sjoerg# define TIMER_FREQ 1193182 4699545Sjoerg#endif 4709545Sjoerg handle_powerfail: 4719545Sjoerg { 4729545Sjoerg static unsigned lastalert = 0; 4739545Sjoerg 4749545Sjoerg if(time.tv_sec - lastalert > 10) 4759545Sjoerg { 4769545Sjoerg log(LOG_WARNING, "NMI: power fail\n"); 4779545Sjoerg sysbeep(TIMER_FREQ/880, hz); 4789545Sjoerg lastalert = time.tv_sec; 4799545Sjoerg } 4809545Sjoerg return; 4819545Sjoerg } 4829545Sjoerg#else /* !POWERFAIL_NMI */ 4832320Sdg#ifdef DDB 4841690Sdg /* NMI can be hooked up to a pushbutton for debugging */ 4851690Sdg printf ("NMI ... going to debugger\n"); 4861342Sdg if (kdb_trap (type, 0, &frame)) 4871342Sdg return; 4889545Sjoerg#endif /* DDB */ 4891690Sdg /* machine/parity/power fail/"kitchen sink" faults */ 4901690Sdg if (isa_nmi(code) == 0) return; 4911690Sdg /* FALL THROUGH */ 4929545Sjoerg#endif /* POWERFAIL_NMI */ 4939545Sjoerg#endif /* NISA > 0 */ 4941072Sdg } 4951072Sdg 4961690Sdg trap_fatal(&frame); 4974014Sbde return; 4984Srgrimes } 4994Srgrimes 5004Srgrimes trapsignal(p, i, ucode); 5011342Sdg 5027213Sdg#ifdef DEBUG 5031690Sdg eva = rcr2(); 5041690Sdg if (type <= MAX_TRAP_MSG) { 5058876Srgrimes uprintf("fatal process exception: %s", 5061690Sdg trap_msg[type]); 5071690Sdg if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 5081342Sdg uprintf(", fault VA = 0x%x", eva); 5091342Sdg uprintf("\n"); 5101342Sdg } 5111342Sdg#endif 5121342Sdg 5134Srgrimesout: 5141690Sdg userret(p, &frame, sticks); 5151690Sdg} 5161690Sdg 5177214Sdg#ifdef notyet 5187214Sdg/* 5197214Sdg * This version doesn't allow a page fault to user space while 5207214Sdg * in the kernel. The rest of the kernel needs to be made "safe" 5217214Sdg * before this can be used. I think the only things remaining 5227214Sdg * to be made safe are the iBCS2 code and the process tracing/ 5237214Sdg * debugging code. 5247214Sdg */ 52512702Sphkstatic int 5261690Sdgtrap_pfault(frame, usermode) 5271690Sdg struct trapframe *frame; 5281690Sdg int usermode; 5291690Sdg{ 5301690Sdg vm_offset_t va; 5312660Sdg struct vmspace *vm = NULL; 5321690Sdg vm_map_t map = 0; 5333436Sphk int rv = 0; 5341690Sdg vm_prot_t ftype; 5351690Sdg int eva; 5361690Sdg struct proc *p = curproc; 5371690Sdg 5387214Sdg if (frame->tf_err & PGEX_W) 5397214Sdg ftype = VM_PROT_READ | VM_PROT_WRITE; 5407214Sdg else 5417214Sdg ftype = VM_PROT_READ; 5427214Sdg 5431690Sdg eva = rcr2(); 5441690Sdg va = trunc_page((vm_offset_t)eva); 5451690Sdg 5467214Sdg if (va < VM_MIN_KERNEL_ADDRESS) { 5477214Sdg vm_offset_t v; 54814243Sdyson vm_page_t mpte; 5497214Sdg 5509799Sdg if (p == NULL || 5517214Sdg (!usermode && va < VM_MAXUSER_ADDRESS && 55220651Sbde (intr_nesting_level != 0 || curpcb == NULL || 55320651Sbde curpcb->pcb_onfault == NULL))) { 5547214Sdg trap_fatal(frame); 5557214Sdg return (-1); 5567214Sdg } 5577214Sdg 5587214Sdg /* 5597214Sdg * This is a fault on non-kernel virtual memory. 5607214Sdg * vm is initialized above to NULL. If curproc is NULL 5617214Sdg * or curproc->p_vmspace is NULL the fault is fatal. 5627214Sdg */ 5637214Sdg vm = p->p_vmspace; 5647214Sdg if (vm == NULL) 5657214Sdg goto nogo; 5667214Sdg 5677214Sdg map = &vm->vm_map; 5687214Sdg 5697214Sdg /* 5707214Sdg * Keep swapout from messing with us during this 5717214Sdg * critical time. 5727214Sdg */ 5737214Sdg ++p->p_lock; 5747214Sdg 5757214Sdg /* 5767214Sdg * Grow the stack if necessary 5777214Sdg */ 5787214Sdg if ((caddr_t)va > vm->vm_maxsaddr 5797214Sdg && (caddr_t)va < (caddr_t)USRSTACK) { 5807214Sdg if (!grow(p, va)) { 5817214Sdg rv = KERN_FAILURE; 5827214Sdg --p->p_lock; 5837214Sdg goto nogo; 5847214Sdg } 5857214Sdg } 5867214Sdg 5877214Sdg /* Fault in the user page: */ 58824666Sdyson rv = vm_fault(map, va, ftype, 58924666Sdyson (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0); 5907214Sdg 5917214Sdg --p->p_lock; 5927214Sdg } else { 5937214Sdg /* 5947214Sdg * Don't allow user-mode faults in kernel address space. 5957214Sdg */ 5967214Sdg if (usermode) 5977214Sdg goto nogo; 5987214Sdg 5997214Sdg /* 6007214Sdg * Since we know that kernel virtual address addresses 6017214Sdg * always have pte pages mapped, we just have to fault 6027214Sdg * the page. 6037214Sdg */ 6047214Sdg rv = vm_fault(kernel_map, va, ftype, FALSE); 6057214Sdg } 6067214Sdg 6077214Sdg if (rv == KERN_SUCCESS) 6087214Sdg return (0); 6097214Sdgnogo: 6107214Sdg if (!usermode) { 61120651Sbde if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) { 6127214Sdg frame->tf_eip = (int)curpcb->pcb_onfault; 6137214Sdg return (0); 6147214Sdg } 6157214Sdg trap_fatal(frame); 6167214Sdg return (-1); 6177214Sdg } 6187214Sdg 6197214Sdg /* kludge to pass faulting virtual address to sendsig */ 6207214Sdg frame->tf_err = eva; 6217214Sdg 6227214Sdg return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 6237214Sdg} 6247214Sdg#endif 6257214Sdg 6267214Sdgint 6277214Sdgtrap_pfault(frame, usermode) 6287214Sdg struct trapframe *frame; 6297214Sdg int usermode; 6307214Sdg{ 6317214Sdg vm_offset_t va; 6327214Sdg struct vmspace *vm = NULL; 6337214Sdg vm_map_t map = 0; 6347214Sdg int rv = 0; 6357214Sdg vm_prot_t ftype; 6367214Sdg int eva; 6377214Sdg struct proc *p = curproc; 6387214Sdg 6397214Sdg eva = rcr2(); 6407214Sdg va = trunc_page((vm_offset_t)eva); 6417214Sdg 6422660Sdg if (va >= KERNBASE) { 6432660Sdg /* 6442660Sdg * Don't allow user-mode faults in kernel address space. 6452660Sdg */ 6462660Sdg if (usermode) 6472660Sdg goto nogo; 6481690Sdg 6491690Sdg map = kernel_map; 6501690Sdg } else { 6512660Sdg /* 6522660Sdg * This is a fault on non-kernel virtual memory. 6532660Sdg * vm is initialized above to NULL. If curproc is NULL 6542660Sdg * or curproc->p_vmspace is NULL the fault is fatal. 6552660Sdg */ 6562660Sdg if (p != NULL) 6572660Sdg vm = p->p_vmspace; 6582660Sdg 6592660Sdg if (vm == NULL) 6602660Sdg goto nogo; 6612660Sdg 6621690Sdg map = &vm->vm_map; 6631690Sdg } 6641690Sdg 6651690Sdg if (frame->tf_err & PGEX_W) 6661690Sdg ftype = VM_PROT_READ | VM_PROT_WRITE; 6671690Sdg else 6681690Sdg ftype = VM_PROT_READ; 6691690Sdg 6701690Sdg if (map != kernel_map) { 6714Srgrimes /* 6721690Sdg * Keep swapout from messing with us during this 6731690Sdg * critical time. 6744Srgrimes */ 6751690Sdg ++p->p_lock; 6761690Sdg 6771690Sdg /* 6781690Sdg * Grow the stack if necessary 6791690Sdg */ 6801690Sdg if ((caddr_t)va > vm->vm_maxsaddr 6811690Sdg && (caddr_t)va < (caddr_t)USRSTACK) { 6821690Sdg if (!grow(p, va)) { 6831690Sdg rv = KERN_FAILURE; 6841690Sdg --p->p_lock; 6851690Sdg goto nogo; 6861690Sdg } 6871690Sdg } 6881690Sdg 6891690Sdg /* Fault in the user page: */ 69024666Sdyson rv = vm_fault(map, va, ftype, 69124666Sdyson (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0); 6921690Sdg 6931690Sdg --p->p_lock; 6941690Sdg } else { 6951690Sdg /* 69624666Sdyson * Don't have to worry about process locking or stacks in the kernel. 6971690Sdg */ 6981690Sdg rv = vm_fault(map, va, ftype, FALSE); 6994Srgrimes } 7004Srgrimes 7011690Sdg if (rv == KERN_SUCCESS) 7021690Sdg return (0); 7031690Sdgnogo: 7041690Sdg if (!usermode) { 70520651Sbde if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) { 7061690Sdg frame->tf_eip = (int)curpcb->pcb_onfault; 7071690Sdg return (0); 7084Srgrimes } 7091690Sdg trap_fatal(frame); 7104014Sbde return (-1); 7114Srgrimes } 7121690Sdg 7131690Sdg /* kludge to pass faulting virtual address to sendsig */ 7141690Sdg frame->tf_err = eva; 7151690Sdg 7161690Sdg return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 7174Srgrimes} 7184Srgrimes 71912702Sphkstatic void 7201690Sdgtrap_fatal(frame) 7211690Sdg struct trapframe *frame; 7221690Sdg{ 72314837Sbde int code, type, eva, ss, esp; 7243258Sdg struct soft_segment_descriptor softseg; 7251690Sdg 7261690Sdg code = frame->tf_err; 7271690Sdg type = frame->tf_trapno; 7281690Sdg eva = rcr2(); 7294014Sbde sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); 7301690Sdg 7311690Sdg if (type <= MAX_TRAP_MSG) 7321690Sdg printf("\n\nFatal trap %d: %s while in %s mode\n", 7331690Sdg type, trap_msg[type], 73427993Sdyson frame->tf_eflags & PSL_VM ? "vm86" : 73528496Scharnier ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); 73625164Speter#ifdef SMP 73729128Speter /* three seperate prints in case of a trap on an unmapped page */ 73829128Speter printf("mp_lock = %08x; ", mp_lock); 73929128Speter printf("cpuid = %d; ", cpuid); 74029128Speter printf("lapic.id = %08x\n", lapic.id); 74125164Speter#endif 7421690Sdg if (type == T_PAGEFLT) { 7431690Sdg printf("fault virtual address = 0x%x\n", eva); 7441690Sdg printf("fault code = %s %s, %s\n", 7451690Sdg code & PGEX_U ? "user" : "supervisor", 7461690Sdg code & PGEX_W ? "write" : "read", 7471690Sdg code & PGEX_P ? "protection violation" : "page not present"); 7481690Sdg } 74914837Sbde printf("instruction pointer = 0x%x:0x%x\n", 75014837Sbde frame->tf_cs & 0xffff, frame->tf_eip); 75128496Scharnier if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) { 75214837Sbde ss = frame->tf_ss & 0xffff; 75314837Sbde esp = frame->tf_esp; 75414837Sbde } else { 75514837Sbde ss = GSEL(GDATA_SEL, SEL_KPL); 75614837Sbde esp = (int)&frame->tf_esp; 75714837Sbde } 75814837Sbde printf("stack pointer = 0x%x:0x%x\n", ss, esp); 75914837Sbde printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); 7603258Sdg printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", 76114837Sbde softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); 7623258Sdg printf(" = DPL %d, pres %d, def32 %d, gran %d\n", 76314837Sbde softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, 76414837Sbde softseg.ssd_gran); 7651690Sdg printf("processor eflags = "); 7662578Sbde if (frame->tf_eflags & PSL_T) 76714837Sbde printf("trace trap, "); 7682578Sbde if (frame->tf_eflags & PSL_I) 7691690Sdg printf("interrupt enabled, "); 7702578Sbde if (frame->tf_eflags & PSL_NT) 7711690Sdg printf("nested task, "); 7722578Sbde if (frame->tf_eflags & PSL_RF) 7731690Sdg printf("resume, "); 7742578Sbde if (frame->tf_eflags & PSL_VM) 7751690Sdg printf("vm86, "); 7762578Sbde printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); 7771690Sdg printf("current process = "); 7781690Sdg if (curproc) { 7793436Sphk printf("%lu (%s)\n", 7803436Sphk (u_long)curproc->p_pid, curproc->p_comm ? 7811690Sdg curproc->p_comm : ""); 7821690Sdg } else { 7831690Sdg printf("Idle\n"); 7841690Sdg } 7851690Sdg printf("interrupt mask = "); 7861690Sdg if ((cpl & net_imask) == net_imask) 7871690Sdg printf("net "); 7881690Sdg if ((cpl & tty_imask) == tty_imask) 7891690Sdg printf("tty "); 7901690Sdg if ((cpl & bio_imask) == bio_imask) 7911690Sdg printf("bio "); 79229675Sgibbs if ((cpl & cam_imask) == cam_imask) 79329675Sgibbs printf("cam "); 7941690Sdg if (cpl == 0) 7951690Sdg printf("none"); 79628442Sfsmp#ifdef SMP 79728442Sfsmp/** 79828442Sfsmp * XXX FIXME: 79928442Sfsmp * we probably SHOULD have stopped the other CPUs before now! 80028442Sfsmp * another CPU COULD have been touching cpl at this moment... 80128442Sfsmp */ 80228442Sfsmp printf(" <- SMP: XXX"); 80328442Sfsmp#endif 8041690Sdg printf("\n"); 8051690Sdg 8061690Sdg#ifdef KDB 8071690Sdg if (kdb_trap(&psl)) 8081690Sdg return; 8091690Sdg#endif 8102320Sdg#ifdef DDB 8111690Sdg if (kdb_trap (type, 0, frame)) 8121690Sdg return; 8131690Sdg#endif 81425164Speter printf("trap number = %d\n", type); 8151690Sdg if (type <= MAX_TRAP_MSG) 8161690Sdg panic(trap_msg[type]); 8171690Sdg else 8181690Sdg panic("unknown/reserved trap"); 8191690Sdg} 8201690Sdg 8214Srgrimes/* 82212929Sdg * Double fault handler. Called when a fault occurs while writing 82312929Sdg * a frame for a trap/exception onto the stack. This usually occurs 82412929Sdg * when the stack overflows (such is the case with infinite recursion, 82512929Sdg * for example). 82612929Sdg * 82712929Sdg * XXX Note that the current PTD gets replaced by IdlePTD when the 82812929Sdg * task switch occurs. This means that the stack that was active at 82912929Sdg * the time of the double fault is not available at <kstack> unless 83012930Sdg * the machine was idle when the double fault occurred. The downside 83112929Sdg * of this is that "trace <ebp>" in ddb won't work. 83212929Sdg */ 83312929Sdgvoid 83412929Sdgdblfault_handler() 83512929Sdg{ 83624925Sbde printf("\nFatal double fault:\n"); 83724925Sbde printf("eip = 0x%x\n", common_tss.tss_eip); 83824925Sbde printf("esp = 0x%x\n", common_tss.tss_esp); 83924925Sbde printf("ebp = 0x%x\n", common_tss.tss_ebp); 84026812Speter#ifdef SMP 84129128Speter /* three seperate prints in case of a trap on an unmapped page */ 84229128Speter printf("mp_lock = %08x; ", mp_lock); 84329128Speter printf("cpuid = %d; ", cpuid); 84429128Speter printf("lapic.id = %08x\n", lapic.id); 84525164Speter#endif 84612929Sdg panic("double fault"); 84712929Sdg} 84812929Sdg 84912929Sdg/* 850200Sdg * Compensate for 386 brain damage (missing URKR). 851200Sdg * This is a little simpler than the pagefault handler in trap() because 852200Sdg * it the page tables have already been faulted in and high addresses 853200Sdg * are thrown out early for other reasons. 8544Srgrimes */ 855200Sdgint trapwrite(addr) 856200Sdg unsigned addr; 857200Sdg{ 858200Sdg struct proc *p; 85921953Sdyson vm_offset_t va; 860200Sdg struct vmspace *vm; 861974Sdg int rv; 8624Srgrimes 8634Srgrimes va = trunc_page((vm_offset_t)addr); 864200Sdg /* 865200Sdg * XXX - MAX is END. Changed > to >= for temp. fix. 866200Sdg */ 867200Sdg if (va >= VM_MAXUSER_ADDRESS) 868200Sdg return (1); 8691127Sdg 870200Sdg p = curproc; 871200Sdg vm = p->p_vmspace; 872974Sdg 8731549Srgrimes ++p->p_lock; 874974Sdg 875806Sdg if ((caddr_t)va >= vm->vm_maxsaddr 876849Sdg && (caddr_t)va < (caddr_t)USRSTACK) { 8771127Sdg if (!grow(p, va)) { 8781549Srgrimes --p->p_lock; 879200Sdg return (1); 880974Sdg } 881200Sdg } 882200Sdg 8831127Sdg /* 8841127Sdg * fault the data page 8851127Sdg */ 88624666Sdyson rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, VM_FAULT_DIRTY); 8871127Sdg 8881549Srgrimes --p->p_lock; 889974Sdg 890974Sdg if (rv != KERN_SUCCESS) 891974Sdg return 1; 8921127Sdg 893200Sdg return (0); 8944Srgrimes} 8954Srgrimes 8964Srgrimes/* 89711343Sbde * System call request from POSIX system call gate interface to kernel. 8984Srgrimes * Like trap(), argument is call by reference. 8994Srgrimes */ 900798Swollmanvoid 9014Srgrimessyscall(frame) 9021690Sdg struct trapframe frame; 9034Srgrimes{ 9041690Sdg caddr_t params; 9051690Sdg int i; 9061690Sdg struct sysent *callp; 9071690Sdg struct proc *p = curproc; 9081549Srgrimes u_quad_t sticks; 90910157Sdg int error; 91030994Sphk int args[8]; 9111549Srgrimes u_int code; 9124Srgrimes 91331389Sbde#ifdef DIAGNOSTIC 91428496Scharnier if (ISPL(frame.tf_cs) != SEL_UPL) 9154Srgrimes panic("syscall"); 91631389Sbde#endif 91731389Sbde sticks = p->p_sticks; 91825555Speter p->p_md.md_regs = &frame; 91910157Sdg params = (caddr_t)frame.tf_esp + sizeof(int); 920924Sdg code = frame.tf_eax; 92114331Speter if (p->p_sysent->sv_prepsyscall) { 92214331Speter (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); 92314331Speter } else { 9241549Srgrimes /* 92514331Speter * Need to check if this is a 32 bit or 64 bit syscall. 9261549Srgrimes */ 92714331Speter if (code == SYS_syscall) { 92814331Speter /* 92914331Speter * Code is first argument, followed by actual args. 93014331Speter */ 93114331Speter code = fuword(params); 93214331Speter params += sizeof(int); 93314331Speter } else if (code == SYS___syscall) { 93414331Speter /* 93514331Speter * Like syscall, but code is a quad, so as to maintain 93614331Speter * quad alignment for the rest of the arguments. 93714331Speter */ 93814331Speter code = fuword(params); 93914331Speter params += sizeof(quad_t); 94014331Speter } 9414Srgrimes } 9421549Srgrimes 9432257Ssos if (p->p_sysent->sv_mask) 94410157Sdg code &= p->p_sysent->sv_mask; 9458876Srgrimes 9462357Sbde if (code >= p->p_sysent->sv_size) 9472257Ssos callp = &p->p_sysent->sv_table[0]; 9482257Ssos else 9492257Ssos callp = &p->p_sysent->sv_table[code]; 9504Srgrimes 95114331Speter if (params && (i = callp->sy_narg * sizeof(int)) && 9524Srgrimes (error = copyin(params, (caddr_t)args, (u_int)i))) { 9534Srgrimes#ifdef KTRACE 9544Srgrimes if (KTRPOINT(p, KTR_SYSCALL)) 955879Swollman ktrsyscall(p->p_tracep, code, callp->sy_narg, args); 9564Srgrimes#endif 9571690Sdg goto bad; 9584Srgrimes } 9594Srgrimes#ifdef KTRACE 9604Srgrimes if (KTRPOINT(p, KTR_SYSCALL)) 961879Swollman ktrsyscall(p->p_tracep, code, callp->sy_narg, args); 9624Srgrimes#endif 96330994Sphk p->p_retval[0] = 0; 96430994Sphk p->p_retval[1] = frame.tf_edx; 9651690Sdg 96630994Sphk error = (*callp->sy_call)(p, args); 9671690Sdg 9681690Sdg switch (error) { 9691690Sdg 9701690Sdg case 0: 9714Srgrimes /* 9721690Sdg * Reinitialize proc pointer `p' as it may be different 9731690Sdg * if this is a child returning from fork syscall. 9744Srgrimes */ 9751690Sdg p = curproc; 97630994Sphk frame.tf_eax = p->p_retval[0]; 97730994Sphk frame.tf_edx = p->p_retval[1]; 97811343Sbde frame.tf_eflags &= ~PSL_C; 9791690Sdg break; 9801690Sdg 9811690Sdg case ERESTART: 98210157Sdg /* 98314331Speter * Reconstruct pc, assuming lcall $X,y is 7 bytes, 98414331Speter * int 0x80 is 2 bytes. We saved this in tf_err. 98510157Sdg */ 98614331Speter frame.tf_eip -= frame.tf_err; 9871690Sdg break; 9881690Sdg 9891690Sdg case EJUSTRETURN: 9901690Sdg break; 9911690Sdg 9921690Sdg default: 99310157Sdgbad: 9943495Ssos if (p->p_sysent->sv_errsize) 9953495Ssos if (error >= p->p_sysent->sv_errsize) 9963495Ssos error = -1; /* XXX */ 9978876Srgrimes else 9983495Ssos error = p->p_sysent->sv_errtbl[error]; 9991690Sdg frame.tf_eax = error; 100011343Sbde frame.tf_eflags |= PSL_C; 10011690Sdg break; 10024Srgrimes } 10034Srgrimes 100427993Sdyson if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) { 100511343Sbde /* Traced syscall. */ 100611163Sjulian frame.tf_eflags &= ~PSL_T; 100711343Sbde trapsignal(p, SIGTRAP, 0); 100811163Sjulian } 100911343Sbde 10101690Sdg userret(p, &frame, sticks); 10111690Sdg 10124Srgrimes#ifdef KTRACE 10134Srgrimes if (KTRPOINT(p, KTR_SYSRET)) 101430994Sphk ktrsysret(p->p_tracep, code, error, p->p_retval[0]); 10154Srgrimes#endif 10164Srgrimes} 101724691Speter 101824691Speter/* 101924691Speter * Simplified back end of syscall(), used when returning from fork() 102024691Speter * directly into user mode. 102124691Speter */ 102224691Spetervoid 102324691Speterfork_return(p, frame) 102424691Speter struct proc *p; 102524691Speter struct trapframe frame; 102624691Speter{ 102724691Speter frame.tf_eax = 0; /* Child returns zero */ 102824691Speter frame.tf_eflags &= ~PSL_C; /* success */ 102925472Sdyson frame.tf_edx = 1; 103024691Speter 103124691Speter userret(p, &frame, 0); 103224691Speter#ifdef KTRACE 103324691Speter if (KTRPOINT(p, KTR_SYSRET)) 103424691Speter ktrsysret(p->p_tracep, SYS_fork, 0, 0); 103524691Speter#endif 103624691Speter} 1037