subr_syscall.c revision 5
1/*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * the University of Utah, and William Jolitz. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)trap.c 7.4 (Berkeley) 5/13/91 37 * 38 * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE 39 * -------------------- ----- ---------------------- 40 * CURRENT PATCH LEVEL: 1 00137 41 * -------------------- ----- ---------------------- 42 * 43 * 08 Apr 93 Bruce Evans Several VM system fixes 44 * Paul Kranenburg Add counter for vmstat 45 */ 46static char rcsid[] = "$Header: /usr/bill/working/sys/i386/i386/RCS/trap.c,v 1.2 92/01/21 14:22:13 william Exp $"; 47 48/* 49 * 386 Trap and System call handleing 50 */ 51 52#include "machine/cpu.h" 53#include "machine/psl.h" 54#include "machine/reg.h" 55 56#include "param.h" 57#include "systm.h" 58#include "proc.h" 59#include "user.h" 60#include "acct.h" 61#include "kernel.h" 62#ifdef KTRACE 63#include "ktrace.h" 64#endif 65 66#include "vm/vm_param.h" 67#include "vm/pmap.h" 68#include "vm/vm_map.h" 69#include "sys/vmmeter.h" 70 71#include "machine/trap.h" 72 73 74struct sysent sysent[]; 75int nsysent; 76int dostacklimits; 77unsigned rcr2(); 78extern short cpl; 79 80 81/* 82 * trap(frame): 83 * Exception, fault, and trap interface to BSD kernel. This 84 * common code is called from assembly language IDT gate entry 85 * routines that prepare a suitable stack frame, and restore this 86 * frame after the exception has been processed. Note that the 87 * effect is as if the arguments were passed call by reference. 88 */ 89 90/*ARGSUSED*/ 91trap(frame) 92 struct trapframe frame; 93{ 94 register int i; 95 register struct proc *p = curproc; 96 struct timeval syst; 97 int ucode, type, code, eva; 98 99 frame.tf_eflags &= ~PSL_NT; /* clear nested trap XXX */ 100 type = frame.tf_trapno; 101#include "ddb.h" 102#if NDDB > 0 103 if (curpcb && curpcb->pcb_onfault) { 104 if (frame.tf_trapno == T_BPTFLT 105 || frame.tf_trapno == T_TRCTRAP) 106 if (kdb_trap (type, 0, &frame)) 107 return; 108 } 109#endif 110 111/*pg("trap type %d code = %x eip = %x cs = %x eva = %x esp %x", 112 frame.tf_trapno, frame.tf_err, frame.tf_eip, 113 frame.tf_cs, rcr2(), frame.tf_esp);*/ 114if(curpcb == 0 || curproc == 0) goto we_re_toast; 115 if (curpcb->pcb_onfault && frame.tf_trapno != 0xc) { 116copyfault: 117 frame.tf_eip = (int)curpcb->pcb_onfault; 118 return; 119 } 120 121 syst = p->p_stime; 122 if (ISPL(frame.tf_cs) == SEL_UPL) { 123 type |= T_USER; 124 p->p_regs = (int *)&frame; 125 curpcb->pcb_flags |= FM_TRAP; /* used by sendsig */ 126 } 127 128 ucode=0; 129 eva = rcr2(); 130 code = frame.tf_err; 131 switch (type) { 132 133 default: 134 we_re_toast: 135#ifdef KDB 136 if (kdb_trap(&psl)) 137 return; 138#endif 139#if NDDB > 0 140 if (kdb_trap (type, 0, &frame)) 141 return; 142#endif 143 144 printf("trap type %d code = %x eip = %x cs = %x eflags = %x ", 145 frame.tf_trapno, frame.tf_err, frame.tf_eip, 146 frame.tf_cs, frame.tf_eflags); 147 eva = rcr2(); 148 printf("cr2 %x cpl %x\n", eva, cpl); 149 /* type &= ~T_USER; */ /* XXX what the hell is this */ 150 panic("trap"); 151 /*NOTREACHED*/ 152 153 case T_SEGNPFLT|T_USER: 154 case T_STKFLT|T_USER: 155 case T_PROTFLT|T_USER: /* protection fault */ 156 ucode = code + BUS_SEGM_FAULT ; 157 i = SIGBUS; 158 break; 159 160 case T_PRIVINFLT|T_USER: /* privileged instruction fault */ 161 case T_RESADFLT|T_USER: /* reserved addressing fault */ 162 case T_RESOPFLT|T_USER: /* reserved operand fault */ 163 case T_FPOPFLT|T_USER: /* coprocessor operand fault */ 164 ucode = type &~ T_USER; 165 i = SIGILL; 166 break; 167 168 case T_ASTFLT|T_USER: /* Allow process switch */ 169 astoff(); 170 cnt.v_soft++; 171 if ((p->p_flag & SOWEUPC) && p->p_stats->p_prof.pr_scale) { 172 addupc(frame.tf_eip, &p->p_stats->p_prof, 1); 173 p->p_flag &= ~SOWEUPC; 174 } 175 goto out; 176 177 case T_DNA|T_USER: 178#ifdef NPX 179 /* if a transparent fault (due to context switch "late") */ 180 if (npxdna()) return; 181#endif 182 i = math_emulate(&frame); 183 if (i == 0) return; 184 ucode = FPE_FPU_NP_TRAP; 185 break; 186 187 case T_BOUND|T_USER: 188 ucode = FPE_SUBRNG_TRAP; 189 i = SIGFPE; 190 break; 191 192 case T_OFLOW|T_USER: 193 ucode = FPE_INTOVF_TRAP; 194 i = SIGFPE; 195 break; 196 197 case T_DIVIDE|T_USER: 198 ucode = FPE_INTDIV_TRAP; 199 i = SIGFPE; 200 break; 201 202 case T_ARITHTRAP|T_USER: 203 ucode = code; 204 i = SIGFPE; 205 break; 206 207 case T_PAGEFLT: /* allow page faults in kernel mode */ 208#if 0 209 /* XXX - check only applies to 386's and 486's with WP off */ 210 if (code & PGEX_P) goto we_re_toast; 211#endif 212 213 /* fall into */ 214 case T_PAGEFLT|T_USER: /* page fault */ 215 { 216 register vm_offset_t va; 217 register struct vmspace *vm = p->p_vmspace; 218 register vm_map_t map; 219 int rv; 220 vm_prot_t ftype; 221 extern vm_map_t kernel_map; 222 unsigned nss,v; 223 224 va = trunc_page((vm_offset_t)eva); 225 /* 226 * Avoid even looking at pde_v(va) for high va's. va's 227 * above VM_MAX_KERNEL_ADDRESS don't correspond to normal 228 * PDE's (half of them correspond to APDEpde and half to 229 * an unmapped kernel PDE). va's betweeen 0xFEC00000 and 230 * VM_MAX_KERNEL_ADDRESS correspond to unmapped kernel PDE's 231 * (XXX - why are only 3 initialized when 6 are required to 232 * reach VM_MAX_KERNEL_ADDRESS?). Faulting in an unmapped 233 * kernel page table would give inconsistent PTD's. 234 * 235 * XXX - faulting in unmapped page tables wastes a page if 236 * va turns out to be invalid. 237 * 238 * XXX - should "kernel address space" cover the kernel page 239 * tables? Might have same problem with PDEpde as with 240 * APDEpde (or there may be no problem with APDEpde). 241 */ 242 if (va > 0xFEBFF000) { 243 rv = KERN_FAILURE; /* becomes SIGBUS */ 244 goto nogo; 245 } 246 /* 247 * It is only a kernel address space fault iff: 248 * 1. (type & T_USER) == 0 and 249 * 2. pcb_onfault not set or 250 * 3. pcb_onfault set but supervisor space fault 251 * The last can occur during an exec() copyin where the 252 * argument space is lazy-allocated. 253 */ 254 if (type == T_PAGEFLT && va >= KERNBASE) 255 map = kernel_map; 256 else 257 map = &vm->vm_map; 258 if (code & PGEX_W) 259 ftype = VM_PROT_READ | VM_PROT_WRITE; 260 else 261 ftype = VM_PROT_READ; 262 263#ifdef DEBUG 264 if (map == kernel_map && va == 0) { 265 printf("trap: bad kernel access at %x\n", va); 266 goto we_re_toast; 267 } 268#endif 269 270 /* 271 * XXX: rude hack to make stack limits "work" 272 */ 273 nss = 0; 274 if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map 275 && dostacklimits) { 276 nss = clrnd(btoc((unsigned)vm->vm_maxsaddr 277 + MAXSSIZ - (unsigned)va)); 278 if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur)) { 279/*pg("trap rlimit %d, maxsaddr %x va %x ", nss, vm->vm_maxsaddr, va);*/ 280 rv = KERN_FAILURE; 281 goto nogo; 282 } 283 } 284 285 /* check if page table is mapped, if not, fault it first */ 286#define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v) 287 if (!pde_v(va)) { 288 v = trunc_page(vtopte(va)); 289 rv = vm_fault(map, v, ftype, FALSE); 290 if (rv != KERN_SUCCESS) goto nogo; 291 /* check if page table fault, increment wiring */ 292 vm_map_pageable(map, v, round_page(v+1), FALSE); 293 } else v=0; 294 rv = vm_fault(map, va, ftype, FALSE); 295 if (rv == KERN_SUCCESS) { 296 /* 297 * XXX: continuation of rude stack hack 298 */ 299 if (nss > vm->vm_ssize) 300 vm->vm_ssize = nss; 301 va = trunc_page(vtopte(va)); 302 /* for page table, increment wiring 303 as long as not a page table fault as well */ 304 if (!v && type != T_PAGEFLT) 305 vm_map_pageable(map, va, round_page(va+1), FALSE); 306 if (type == T_PAGEFLT) 307 return; 308 goto out; 309 } 310nogo: 311 if (type == T_PAGEFLT) { 312 if (curpcb->pcb_onfault) 313 goto copyfault; 314 printf("vm_fault(%x, %x, %x, 0) -> %x\n", 315 map, va, ftype, rv); 316 printf(" type %x, code %x\n", 317 type, code); 318 goto we_re_toast; 319 } 320 i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV; 321 break; 322 } 323 324#if NDDB == 0 325 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */ 326 frame.tf_eflags &= ~PSL_T; 327 328 /* Q: how do we turn it on again? */ 329 return; 330#endif 331 332 case T_BPTFLT|T_USER: /* bpt instruction fault */ 333 case T_TRCTRAP|T_USER: /* trace trap */ 334 frame.tf_eflags &= ~PSL_T; 335 i = SIGTRAP; 336 break; 337 338#include "isa.h" 339#if NISA > 0 340 case T_NMI: 341 case T_NMI|T_USER: 342#if NDDB > 0 343 /* NMI can be hooked up to a pushbutton for debugging */ 344 printf ("NMI ... going to debugger\n"); 345 if (kdb_trap (type, 0, &frame)) 346 return; 347#endif 348 /* machine/parity/power fail/"kitchen sink" faults */ 349 if(isa_nmi(code) == 0) return; 350 else goto we_re_toast; 351#endif 352 } 353 354 trapsignal(p, i, ucode); 355 if ((type & T_USER) == 0) 356 return; 357out: 358 while (i = CURSIG(p)) 359 psig(i); 360 p->p_pri = p->p_usrpri; 361 if (want_resched) { 362 /* 363 * Since we are curproc, clock will normally just change 364 * our priority without moving us from one queue to another 365 * (since the running process is not on a queue.) 366 * If that happened after we setrq ourselves but before we 367 * swtch()'ed, we might not be on the queue indicated by 368 * our priority. 369 */ 370 (void) splclock(); 371 setrq(p); 372 p->p_stats->p_ru.ru_nivcsw++; 373 swtch(); 374 (void) splnone(); 375 while (i = CURSIG(p)) 376 psig(i); 377 } 378 if (p->p_stats->p_prof.pr_scale) { 379 int ticks; 380 struct timeval *tv = &p->p_stime; 381 382 ticks = ((tv->tv_sec - syst.tv_sec) * 1000 + 383 (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000); 384 if (ticks) { 385#ifdef PROFTIMER 386 extern int profscale; 387 addupc(frame.tf_eip, &p->p_stats->p_prof, 388 ticks * profscale); 389#else 390 addupc(frame.tf_eip, &p->p_stats->p_prof, ticks); 391#endif 392 } 393 } 394 curpri = p->p_pri; 395 curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ 396} 397 398/* 399 * Compensate for 386 brain damage (missing URKR) 400 */ 401int trapwrite(unsigned addr) { 402 int rv; 403 vm_offset_t va; 404 405 va = trunc_page((vm_offset_t)addr); 406 if (va > VM_MAXUSER_ADDRESS) return(1); 407 rv = vm_fault(&curproc->p_vmspace->vm_map, va, 408 VM_PROT_READ | VM_PROT_WRITE, FALSE); 409 if (rv == KERN_SUCCESS) return(0); 410 else return(1); 411} 412 413/* 414 * syscall(frame): 415 * System call request from POSIX system call gate interface to kernel. 416 * Like trap(), argument is call by reference. 417 */ 418/*ARGSUSED*/ 419syscall(frame) 420 volatile struct syscframe frame; 421{ 422 register int *locr0 = ((int *)&frame); 423 register caddr_t params; 424 register int i; 425 register struct sysent *callp; 426 register struct proc *p = curproc; 427 struct timeval syst; 428 int error, opc; 429 int args[8], rval[2]; 430 int code; 431 432#ifdef lint 433 r0 = 0; r0 = r0; r1 = 0; r1 = r1; 434#endif 435 syst = p->p_stime; 436 if (ISPL(frame.sf_cs) != SEL_UPL) 437 panic("syscall"); 438 439 code = frame.sf_eax; 440 curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */ 441 p->p_regs = (int *)&frame; 442 params = (caddr_t)frame.sf_esp + sizeof (int) ; 443 444 /* 445 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always. 446 */ 447 opc = frame.sf_eip - 7; 448 callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; 449 if (callp == sysent) { 450 i = fuword(params); 451 params += sizeof (int); 452 callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; 453 } 454 455 if ((i = callp->sy_narg * sizeof (int)) && 456 (error = copyin(params, (caddr_t)args, (u_int)i))) { 457 frame.sf_eax = error; 458 frame.sf_eflags |= PSL_C; /* carry bit */ 459#ifdef KTRACE 460 if (KTRPOINT(p, KTR_SYSCALL)) 461 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 462#endif 463 goto done; 464 } 465#ifdef KTRACE 466 if (KTRPOINT(p, KTR_SYSCALL)) 467 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 468#endif 469 rval[0] = 0; 470 rval[1] = frame.sf_edx; 471/*pg("%d. s %d\n", p->p_pid, code);*/ 472 error = (*callp->sy_call)(p, args, rval); 473 if (error == ERESTART) 474 frame.sf_eip = opc; 475 else if (error != EJUSTRETURN) { 476 if (error) { 477/*pg("error %d", error);*/ 478 frame.sf_eax = error; 479 frame.sf_eflags |= PSL_C; /* carry bit */ 480 } else { 481 frame.sf_eax = rval[0]; 482 frame.sf_edx = rval[1]; 483 frame.sf_eflags &= ~PSL_C; /* carry bit */ 484 } 485 } 486 /* else if (error == EJUSTRETURN) */ 487 /* nothing to do */ 488done: 489 /* 490 * Reinitialize proc pointer `p' as it may be different 491 * if this is a child returning from fork syscall. 492 */ 493 p = curproc; 494 while (i = CURSIG(p)) 495 psig(i); 496 p->p_pri = p->p_usrpri; 497 if (want_resched) { 498 /* 499 * Since we are curproc, clock will normally just change 500 * our priority without moving us from one queue to another 501 * (since the running process is not on a queue.) 502 * If that happened after we setrq ourselves but before we 503 * swtch()'ed, we might not be on the queue indicated by 504 * our priority. 505 */ 506 (void) splclock(); 507 setrq(p); 508 p->p_stats->p_ru.ru_nivcsw++; 509 swtch(); 510 (void) splnone(); 511 while (i = CURSIG(p)) 512 psig(i); 513 } 514 if (p->p_stats->p_prof.pr_scale) { 515 int ticks; 516 struct timeval *tv = &p->p_stime; 517 518 ticks = ((tv->tv_sec - syst.tv_sec) * 1000 + 519 (tv->tv_usec - syst.tv_usec) / 1000) / (tick / 1000); 520 if (ticks) { 521#ifdef PROFTIMER 522 extern int profscale; 523 addupc(frame.sf_eip, &p->p_stats->p_prof, 524 ticks * profscale); 525#else 526 addupc(frame.sf_eip, &p->p_stats->p_prof, ticks); 527#endif 528 } 529 } 530 curpri = p->p_pri; 531#ifdef KTRACE 532 if (KTRPOINT(p, KTR_SYSRET)) 533 ktrsysret(p->p_tracep, code, error, rval[0]); 534#endif 535#ifdef DIAGNOSTICx 536{ extern int _udatasel, _ucodesel; 537 if (frame.sf_ss != _udatasel) 538 printf("ss %x call %d\n", frame.sf_ss, code); 539 if ((frame.sf_cs&0xffff) != _ucodesel) 540 printf("cs %x call %d\n", frame.sf_cs, code); 541 if (frame.sf_eip > VM_MAXUSER_ADDRESS) { 542 printf("eip %x call %d\n", frame.sf_eip, code); 543 frame.sf_eip = 0; 544 } 545} 546#endif 547} 548