subr_syscall.c revision 4014
1/*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 * $Id: trap.c,v 1.40 1994/10/21 01:18:38 wollman Exp $ 39 */ 40 41/* 42 * 386 Trap and System call handling 43 */ 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/proc.h> 48#include <sys/user.h> 49#include <sys/acct.h> 50#include <sys/kernel.h> 51#include <sys/syscall.h> 52#include <sys/sysent.h> 53#ifdef KTRACE 54#include <sys/ktrace.h> 55#endif 56 57#include <vm/vm_param.h> 58#include <vm/pmap.h> 59#include <vm/vm_map.h> 60#include <vm/vm_page.h> 61 62#include <machine/cpu.h> 63#include <machine/psl.h> 64#include <machine/reg.h> 65#include <machine/trap.h> 66#include <machine/../isa/isa_device.h> 67 68#include "isa.h" 69#include "npx.h" 70 71int trap_pfault __P((struct trapframe *, int)); 72void trap_fatal __P((struct trapframe *)); 73 74#define MAX_TRAP_MSG 27 75char *trap_msg[] = { 76 "reserved addressing fault", /* 0 T_RESADFLT */ 77 "privileged instruction fault", /* 1 T_PRIVINFLT */ 78 "reserved operand fault", /* 2 T_RESOPFLT */ 79 "breakpoint instruction fault", /* 3 T_BPTFLT */ 80 "", /* 4 unused */ 81 "system call trap", /* 5 T_SYSCALL */ 82 "arithmetic trap", /* 6 T_ARITHTRAP */ 83 "system forced exception", /* 7 T_ASTFLT */ 84 "segmentation (limit) fault", /* 8 T_SEGFLT */ 85 "general protection fault", /* 9 T_PROTFLT */ 86 "trace trap", /* 10 T_TRCTRAP */ 87 "", /* 11 unused */ 88 "page fault", /* 12 T_PAGEFLT */ 89 "page table fault", /* 13 T_TABLEFLT */ 90 "alignment fault", /* 14 T_ALIGNFLT */ 91 "kernel stack pointer not valid", /* 15 T_KSPNOTVAL */ 92 "bus error", /* 16 T_BUSERR */ 93 "kernel debugger fault", /* 17 T_KDBTRAP */ 94 "integer divide fault", /* 18 T_DIVIDE */ 95 "non-maskable interrupt trap", /* 19 T_NMI */ 96 "overflow trap", /* 20 T_OFLOW */ 97 "FPU bounds check fault", /* 21 T_BOUND */ 98 "FPU device not available", /* 22 T_DNA */ 99 "double fault", /* 23 T_DOUBLEFLT */ 100 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 101 "invalid TSS fault", /* 25 T_TSSFLT */ 102 "segment not present fault", /* 26 T_SEGNPFLT */ 103 "stack fault", /* 27 T_STKFLT */ 104}; 105 106static inline void 107userret(p, frame, oticks) 108 struct proc *p; 109 struct trapframe *frame; 110 u_quad_t oticks; 111{ 112 int sig, s; 113 114 while ((sig = CURSIG(p)) != 0) 115 postsig(sig); 116 p->p_priority = p->p_usrpri; 117 if (want_resched) { 118 /* 119 * Since we are curproc, clock will normally just change 120 * our priority without moving us from one queue to another 121 * (since the running process is not on a queue.) 122 * If that happened after we setrunqueue ourselves but before we 123 * mi_switch()'ed, we might not be on the queue indicated by 124 * our priority. 125 */ 126 s = splclock(); 127 setrunqueue(p); 128 p->p_stats->p_ru.ru_nivcsw++; 129 mi_switch(); 130 splx(s); 131 while ((sig = CURSIG(p)) != 0) 132 postsig(sig); 133 } 134 if (p->p_stats->p_prof.pr_scale) { 135 u_quad_t ticks = p->p_sticks - oticks; 136 137 if (ticks) { 138#ifdef PROFTIMER 139 extern int profscale; 140 addupc(frame->tf_eip, &p->p_stats->p_prof, 141 ticks * profscale); 142#else 143 addupc(frame->tf_eip, &p->p_stats->p_prof, ticks); 144#endif 145 } 146 } 147 curpriority = p->p_priority; 148} 149 150/* 151 * trap(frame): 152 * Exception, fault, and trap interface to the FreeBSD kernel. 153 * This common code is called from assembly language IDT gate entry 154 * routines that prepare a suitable stack frame, and restore this 155 * frame after the exception has been processed. 156 */ 157 158/*ARGSUSED*/ 159void 160trap(frame) 161 struct trapframe frame; 162{ 163 struct proc *p = curproc; 164 u_quad_t sticks = 0; 165 int i = 0, ucode = 0, type, code; 166#ifdef DIAGNOSTIC 167 u_long eva; 168#endif 169 170 frame.tf_eflags &= ~PSL_NT; /* clear nested trap XXX */ 171 type = frame.tf_trapno; 172 code = frame.tf_err; 173 174 if (ISPL(frame.tf_cs) == SEL_UPL) { 175 /* user trap */ 176 177 sticks = p->p_sticks; 178 p->p_md.md_regs = (int *)&frame; 179 180 switch (type) { 181 case T_RESADFLT: /* reserved addressing fault */ 182 case T_PRIVINFLT: /* privileged instruction fault */ 183 case T_RESOPFLT: /* reserved operand fault */ 184 ucode = type; 185 i = SIGILL; 186 break; 187 188 case T_BPTFLT: /* bpt instruction fault */ 189 case T_TRCTRAP: /* trace trap */ 190 frame.tf_eflags &= ~PSL_T; 191 i = SIGTRAP; 192 break; 193 194 case T_ARITHTRAP: /* arithmetic trap */ 195 ucode = code; 196 i = SIGFPE; 197 break; 198 199 case T_ASTFLT: /* Allow process switch */ 200 astoff(); 201 cnt.v_soft++; 202 if ((p->p_flag & P_OWEUPC) && p->p_stats->p_prof.pr_scale) { 203 addupc(frame.tf_eip, &p->p_stats->p_prof, 1); 204 p->p_flag &= ~P_OWEUPC; 205 } 206 goto out; 207 208 case T_PROTFLT: /* general protection fault */ 209 case T_SEGNPFLT: /* segment not present fault */ 210 case T_STKFLT: /* stack fault */ 211 ucode = code + BUS_SEGM_FAULT ; 212 i = SIGBUS; 213 break; 214 215 case T_PAGEFLT: /* page fault */ 216 i = trap_pfault(&frame, TRUE); 217 if (i == -1) 218 return; 219 if (i == 0) 220 goto out; 221 222 ucode = T_PAGEFLT; 223 break; 224 225 case T_DIVIDE: /* integer divide fault */ 226 ucode = FPE_INTDIV_TRAP; 227 i = SIGFPE; 228 break; 229 230#if NISA > 0 231 case T_NMI: 232#ifdef DDB 233 /* NMI can be hooked up to a pushbutton for debugging */ 234 printf ("NMI ... going to debugger\n"); 235 if (kdb_trap (type, 0, &frame)) 236 return; 237#endif 238 /* machine/parity/power fail/"kitchen sink" faults */ 239 if (isa_nmi(code) == 0) return; 240 panic("NMI indicates hardware failure"); 241#endif 242 243 case T_OFLOW: /* integer overflow fault */ 244 ucode = FPE_INTOVF_TRAP; 245 i = SIGFPE; 246 break; 247 248 case T_BOUND: /* bounds check fault */ 249 ucode = FPE_SUBRNG_TRAP; 250 i = SIGFPE; 251 break; 252 253 case T_DNA: 254#if NNPX > 0 255 /* if a transparent fault (due to context switch "late") */ 256 if (npxdna()) 257 return; 258#endif /* NNPX > 0 */ 259 260#if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE) 261 i = math_emulate(&frame); 262 if (i == 0) return; 263#else /* MATH_EMULATE || GPL_MATH_EMULATE */ 264 panic("trap: math emulation necessary!"); 265#endif /* MATH_EMULATE || GPL_MATH_EMULATE */ 266 ucode = FPE_FPU_NP_TRAP; 267 break; 268 269 case T_FPOPFLT: /* FPU operand fetch fault */ 270 ucode = T_FPOPFLT; 271 i = SIGILL; 272 break; 273 274 default: 275 trap_fatal(&frame); 276 return; 277 } 278 } else { 279 /* kernel trap */ 280 281 switch (type) { 282 case T_PAGEFLT: /* page fault */ 283 (void) trap_pfault(&frame, FALSE); 284 return; 285 286 case T_PROTFLT: /* general protection fault */ 287 case T_SEGNPFLT: /* segment not present fault */ 288 if (curpcb && curpcb->pcb_onfault) { 289 frame.tf_eip = (int)curpcb->pcb_onfault; 290 return; 291 } 292 break; 293 294#ifdef DDB 295 case T_BPTFLT: 296 case T_TRCTRAP: 297 if (kdb_trap (type, 0, &frame)) 298 return; 299 break; 300#else 301 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */ 302 /* Q: how do we turn it on again? */ 303 frame.tf_eflags &= ~PSL_T; 304 return; 305#endif 306 307#if NISA > 0 308 case T_NMI: 309#ifdef DDB 310 /* NMI can be hooked up to a pushbutton for debugging */ 311 printf ("NMI ... going to debugger\n"); 312 if (kdb_trap (type, 0, &frame)) 313 return; 314#endif 315 /* machine/parity/power fail/"kitchen sink" faults */ 316 if (isa_nmi(code) == 0) return; 317 /* FALL THROUGH */ 318#endif 319 } 320 321 trap_fatal(&frame); 322 return; 323 } 324 325 trapsignal(p, i, ucode); 326 327#ifdef DIAGNOSTIC 328 eva = rcr2(); 329 if (type <= MAX_TRAP_MSG) { 330 uprintf("fatal process exception: %s", 331 trap_msg[type]); 332 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 333 uprintf(", fault VA = 0x%x", eva); 334 uprintf("\n"); 335 } 336#endif 337 338out: 339 userret(p, &frame, sticks); 340} 341 342int 343trap_pfault(frame, usermode) 344 struct trapframe *frame; 345 int usermode; 346{ 347 vm_offset_t va; 348 struct vmspace *vm = NULL; 349 vm_map_t map = 0; 350 int rv = 0; 351 vm_prot_t ftype; 352 extern vm_map_t kernel_map; 353 int eva; 354 struct proc *p = curproc; 355 356 eva = rcr2(); 357 va = trunc_page((vm_offset_t)eva); 358 359 if (va >= KERNBASE) { 360 /* 361 * Don't allow user-mode faults in kernel address space. 362 */ 363 if (usermode) 364 goto nogo; 365 366 map = kernel_map; 367 } else { 368 /* 369 * This is a fault on non-kernel virtual memory. 370 * vm is initialized above to NULL. If curproc is NULL 371 * or curproc->p_vmspace is NULL the fault is fatal. 372 */ 373 if (p != NULL) 374 vm = p->p_vmspace; 375 376 if (vm == NULL) 377 goto nogo; 378 379 map = &vm->vm_map; 380 } 381 382 if (frame->tf_err & PGEX_W) 383 ftype = VM_PROT_READ | VM_PROT_WRITE; 384 else 385 ftype = VM_PROT_READ; 386 387 if (map != kernel_map) { 388 vm_offset_t v = (vm_offset_t) vtopte(va); 389 vm_page_t ptepg; 390 391 /* 392 * Keep swapout from messing with us during this 393 * critical time. 394 */ 395 ++p->p_lock; 396 397 /* 398 * Grow the stack if necessary 399 */ 400 if ((caddr_t)va > vm->vm_maxsaddr 401 && (caddr_t)va < (caddr_t)USRSTACK) { 402 if (!grow(p, va)) { 403 rv = KERN_FAILURE; 404 --p->p_lock; 405 goto nogo; 406 } 407 } 408 409 /* 410 * Check if page table is mapped, if not, 411 * fault it first 412 */ 413 414 /* Fault the pte only if needed: */ 415 *(volatile char *)v += 0; 416 417 ptepg = (vm_page_t) pmap_pte_vm_page(vm_map_pmap(map), v); 418 if( ptepg->hold_count == 0) 419 ptepg->act_count += 3; 420 vm_page_hold(ptepg); 421 422 /* Fault in the user page: */ 423 rv = vm_fault(map, va, ftype, FALSE); 424 425 vm_page_unhold(ptepg); 426 427 /* 428 * page table pages don't need to be kept if they 429 * are not held 430 */ 431 if( ptepg->hold_count == 0 && ptepg->wire_count == 0) { 432 pmap_page_protect( VM_PAGE_TO_PHYS(ptepg), 433 VM_PROT_NONE); 434 vm_page_free(ptepg); 435 } 436 437 --p->p_lock; 438 } else { 439 /* 440 * Since we know that kernel virtual address addresses 441 * always have pte pages mapped, we just have to fault 442 * the page. 443 */ 444 rv = vm_fault(map, va, ftype, FALSE); 445 } 446 447 if (rv == KERN_SUCCESS) 448 return (0); 449nogo: 450 if (!usermode) { 451 if (curpcb && curpcb->pcb_onfault) { 452 frame->tf_eip = (int)curpcb->pcb_onfault; 453 return (0); 454 } 455 trap_fatal(frame); 456 return (-1); 457 } 458 459 /* kludge to pass faulting virtual address to sendsig */ 460 frame->tf_err = eva; 461 462 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 463} 464 465void 466trap_fatal(frame) 467 struct trapframe *frame; 468{ 469 int code, type, eva; 470 struct soft_segment_descriptor softseg; 471 472 code = frame->tf_err; 473 type = frame->tf_trapno; 474 eva = rcr2(); 475 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); 476 477 if (type <= MAX_TRAP_MSG) 478 printf("\n\nFatal trap %d: %s while in %s mode\n", 479 type, trap_msg[type], 480 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); 481 if (type == T_PAGEFLT) { 482 printf("fault virtual address = 0x%x\n", eva); 483 printf("fault code = %s %s, %s\n", 484 code & PGEX_U ? "user" : "supervisor", 485 code & PGEX_W ? "write" : "read", 486 code & PGEX_P ? "protection violation" : "page not present"); 487 } 488 printf("instruction pointer = 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip); 489 printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", 490 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); 491 printf(" = DPL %d, pres %d, def32 %d, gran %d\n", 492 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran); 493 printf("processor eflags = "); 494 if (frame->tf_eflags & PSL_T) 495 printf("trace/trap, "); 496 if (frame->tf_eflags & PSL_I) 497 printf("interrupt enabled, "); 498 if (frame->tf_eflags & PSL_NT) 499 printf("nested task, "); 500 if (frame->tf_eflags & PSL_RF) 501 printf("resume, "); 502 if (frame->tf_eflags & PSL_VM) 503 printf("vm86, "); 504 printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); 505 printf("current process = "); 506 if (curproc) { 507 printf("%lu (%s)\n", 508 (u_long)curproc->p_pid, curproc->p_comm ? 509 curproc->p_comm : ""); 510 } else { 511 printf("Idle\n"); 512 } 513 printf("interrupt mask = "); 514 if ((cpl & net_imask) == net_imask) 515 printf("net "); 516 if ((cpl & tty_imask) == tty_imask) 517 printf("tty "); 518 if ((cpl & bio_imask) == bio_imask) 519 printf("bio "); 520 if (cpl == 0) 521 printf("none"); 522 printf("\n"); 523 524#ifdef KDB 525 if (kdb_trap(&psl)) 526 return; 527#endif 528#ifdef DDB 529 if (kdb_trap (type, 0, frame)) 530 return; 531#endif 532 if (type <= MAX_TRAP_MSG) 533 panic(trap_msg[type]); 534 else 535 panic("unknown/reserved trap"); 536} 537 538/* 539 * Compensate for 386 brain damage (missing URKR). 540 * This is a little simpler than the pagefault handler in trap() because 541 * it the page tables have already been faulted in and high addresses 542 * are thrown out early for other reasons. 543 */ 544int trapwrite(addr) 545 unsigned addr; 546{ 547 struct proc *p; 548 vm_offset_t va, v; 549 struct vmspace *vm; 550 int rv; 551 552 va = trunc_page((vm_offset_t)addr); 553 /* 554 * XXX - MAX is END. Changed > to >= for temp. fix. 555 */ 556 if (va >= VM_MAXUSER_ADDRESS) 557 return (1); 558 559 p = curproc; 560 vm = p->p_vmspace; 561 562 ++p->p_lock; 563 564 if ((caddr_t)va >= vm->vm_maxsaddr 565 && (caddr_t)va < (caddr_t)USRSTACK) { 566 if (!grow(p, va)) { 567 --p->p_lock; 568 return (1); 569 } 570 } 571 572 v = trunc_page(vtopte(va)); 573 574 /* 575 * wire the pte page 576 */ 577 if (va < USRSTACK) { 578 vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE); 579 } 580 581 /* 582 * fault the data page 583 */ 584 rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE); 585 586 /* 587 * unwire the pte page 588 */ 589 if (va < USRSTACK) { 590 vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE); 591 } 592 593 --p->p_lock; 594 595 if (rv != KERN_SUCCESS) 596 return 1; 597 598 return (0); 599} 600 601/* 602 * syscall(frame): 603 * System call request from POSIX system call gate interface to kernel. 604 * Like trap(), argument is call by reference. 605 */ 606/*ARGSUSED*/ 607void 608syscall(frame) 609 struct trapframe frame; 610{ 611 caddr_t params; 612 int i; 613 struct sysent *callp; 614 struct proc *p = curproc; 615 u_quad_t sticks; 616 int error, opc; 617 int args[8], rval[2]; 618 u_int code; 619 620 sticks = p->p_sticks; 621 if (ISPL(frame.tf_cs) != SEL_UPL) 622 panic("syscall"); 623 624 code = frame.tf_eax; 625 p->p_md.md_regs = (int *)&frame; 626 params = (caddr_t)frame.tf_esp + sizeof (int) ; 627 628 /* 629 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always. 630 */ 631 opc = frame.tf_eip - 7; 632 /* 633 * Need to check if this is a 32 bit or 64 bit syscall. 634 */ 635 if (code == SYS_syscall) { 636 /* 637 * Code is first argument, followed by actual args. 638 */ 639 code = fuword(params); 640 params += sizeof (int); 641 } else if (code == SYS___syscall) { 642 /* 643 * Like syscall, but code is a quad, so as to maintain 644 * quad alignment for the rest of the arguments. 645 */ 646 code = fuword(params + _QUAD_LOWWORD * sizeof(int)); 647 params += sizeof(quad_t); 648 } 649 650 if (p->p_sysent->sv_mask) 651 code = code & p->p_sysent->sv_mask; 652 653 if (code >= p->p_sysent->sv_size) 654 callp = &p->p_sysent->sv_table[0]; 655 else 656 callp = &p->p_sysent->sv_table[code]; 657 658 if ((i = callp->sy_narg * sizeof (int)) && 659 (error = copyin(params, (caddr_t)args, (u_int)i))) { 660#ifdef KTRACE 661 if (KTRPOINT(p, KTR_SYSCALL)) 662 ktrsyscall(p->p_tracep, code, callp->sy_narg, args); 663#endif 664 goto bad; 665 } 666#ifdef KTRACE 667 if (KTRPOINT(p, KTR_SYSCALL)) 668 ktrsyscall(p->p_tracep, code, callp->sy_narg, args); 669#endif 670 rval[0] = 0; 671 rval[1] = frame.tf_edx; 672 673 error = (*callp->sy_call)(p, args, rval); 674 675 switch (error) { 676 677 case 0: 678 /* 679 * Reinitialize proc pointer `p' as it may be different 680 * if this is a child returning from fork syscall. 681 */ 682 p = curproc; 683 frame.tf_eax = rval[0]; 684 frame.tf_edx = rval[1]; 685 frame.tf_eflags &= ~PSL_C; /* carry bit */ 686 break; 687 688 case ERESTART: 689 frame.tf_eip = opc; 690 break; 691 692 case EJUSTRETURN: 693 break; 694 695 default: 696 bad: 697 if (p->p_sysent->sv_errsize) 698 if (error >= p->p_sysent->sv_errsize) 699 error = -1; /* XXX */ 700 else 701 error = p->p_sysent->sv_errtbl[error]; 702 frame.tf_eax = error; 703 frame.tf_eflags |= PSL_C; /* carry bit */ 704 break; 705 } 706 707 userret(p, &frame, sticks); 708 709#ifdef KTRACE 710 if (KTRPOINT(p, KTR_SYSRET)) 711 ktrsysret(p->p_tracep, code, error, rval[0]); 712#endif 713} 714