subr_syscall.c revision 44611
1/*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 * $Id: trap.c,v 1.133 1999/01/06 23:05:36 julian Exp $ 39 */ 40 41/* 42 * 386 Trap and System call handling 43 */ 44 45#include "opt_cpu.h" 46#include "opt_ddb.h" 47#include "opt_ktrace.h" 48#include "opt_clock.h" 49#include "opt_trap.h" 50#include "opt_vm86.h" 51 52#include <sys/param.h> 53#include <sys/systm.h> 54#include <sys/proc.h> 55#include <sys/pioctl.h> 56#include <sys/kernel.h> 57#include <sys/resourcevar.h> 58#include <sys/signalvar.h> 59#include <sys/syscall.h> 60#include <sys/sysent.h> 61#include <sys/uio.h> 62#include <sys/vmmeter.h> 63#ifdef KTRACE 64#include <sys/ktrace.h> 65#endif 66 67#include <vm/vm.h> 68#include <vm/vm_param.h> 69#include <vm/vm_prot.h> 70#include <sys/lock.h> 71#include <vm/pmap.h> 72#include <vm/vm_kern.h> 73#include <vm/vm_map.h> 74#include <vm/vm_page.h> 75#include <vm/vm_extern.h> 76 77#include <machine/cpu.h> 78#include <machine/ipl.h> 79#include <machine/md_var.h> 80#include <machine/pcb.h> 81#ifdef SMP 82#include <machine/smp.h> 83#endif 84#include <machine/tss.h> 85 86#include <i386/isa/intr_machdep.h> 87 88#ifdef POWERFAIL_NMI 89#include <sys/syslog.h> 90#include <machine/clock.h> 91#endif 92 93#ifdef VM86 94#include <machine/vm86.h> 95#endif 96 97#ifdef DDB 98 extern int in_Debugger, debugger_on_panic; 99#endif 100 101#include "isa.h" 102#include "npx.h" 103 104extern struct i386tss common_tss; 105 106int (*pmath_emulate) __P((struct trapframe *)); 107 108extern void trap __P((struct trapframe frame)); 109extern int trapwrite __P((unsigned addr)); 110extern void syscall __P((struct trapframe frame)); 111 112static int trap_pfault __P((struct trapframe *, int, vm_offset_t)); 113static void trap_fatal __P((struct trapframe *, vm_offset_t)); 114void dblfault_handler __P((void)); 115 116extern inthand_t IDTVEC(syscall); 117 118#define MAX_TRAP_MSG 28 119static char *trap_msg[] = { 120 "", /* 0 unused */ 121 "privileged instruction fault", /* 1 T_PRIVINFLT */ 122 "", /* 2 unused */ 123 "breakpoint instruction fault", /* 3 T_BPTFLT */ 124 "", /* 4 unused */ 125 "", /* 5 unused */ 126 "arithmetic trap", /* 6 T_ARITHTRAP */ 127 "system forced exception", /* 7 T_ASTFLT */ 128 "", /* 8 unused */ 129 "general protection fault", /* 9 T_PROTFLT */ 130 "trace trap", /* 10 T_TRCTRAP */ 131 "", /* 11 unused */ 132 "page fault", /* 12 T_PAGEFLT */ 133 "", /* 13 unused */ 134 "alignment fault", /* 14 T_ALIGNFLT */ 135 "", /* 15 unused */ 136 "", /* 16 unused */ 137 "", /* 17 unused */ 138 "integer divide fault", /* 18 T_DIVIDE */ 139 "non-maskable interrupt trap", /* 19 T_NMI */ 140 "overflow trap", /* 20 T_OFLOW */ 141 "FPU bounds check fault", /* 21 T_BOUND */ 142 "FPU device not available", /* 22 T_DNA */ 143 "double fault", /* 23 T_DOUBLEFLT */ 144 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 145 "invalid TSS fault", /* 25 T_TSSFLT */ 146 "segment not present fault", /* 26 T_SEGNPFLT */ 147 "stack fault", /* 27 T_STKFLT */ 148 "machine check trap", /* 28 T_MCHK */ 149}; 150 151static __inline void userret __P((struct proc *p, struct trapframe *frame, 152 u_quad_t oticks)); 153 154#if defined(I586_CPU) && !defined(NO_F00F_HACK) 155extern struct gate_descriptor *t_idt; 156extern int has_f00f_bug; 157#endif 158 159static __inline void 160userret(p, frame, oticks) 161 struct proc *p; 162 struct trapframe *frame; 163 u_quad_t oticks; 164{ 165 int sig, s; 166 167 while ((sig = CURSIG(p)) != 0) 168 postsig(sig); 169 170#if 0 171 if (!want_resched && 172 (p->p_priority <= p->p_usrpri) && 173 (p->p_rtprio.type == RTP_PRIO_NORMAL)) { 174 int newpriority; 175 p->p_estcpu += 1; 176 newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice; 177 newpriority = min(newpriority, MAXPRI); 178 p->p_usrpri = newpriority; 179 } 180#endif 181 182 p->p_priority = p->p_usrpri; 183 if (want_resched) { 184 /* 185 * Since we are curproc, clock will normally just change 186 * our priority without moving us from one queue to another 187 * (since the running process is not on a queue.) 188 * If that happened after we setrunqueue ourselves but before we 189 * mi_switch()'ed, we might not be on the queue indicated by 190 * our priority. 191 */ 192 s = splhigh(); 193 setrunqueue(p); 194 p->p_stats->p_ru.ru_nivcsw++; 195 mi_switch(); 196 splx(s); 197 while ((sig = CURSIG(p)) != 0) 198 postsig(sig); 199 } 200 /* 201 * Charge system time if profiling. 202 */ 203 if (p->p_flag & P_PROFIL) 204 addupc_task(p, frame->tf_eip, 205 (u_int)(p->p_sticks - oticks) * psratio); 206 207 curpriority = p->p_priority; 208} 209 210/* 211 * Exception, fault, and trap interface to the FreeBSD kernel. 212 * This common code is called from assembly language IDT gate entry 213 * routines that prepare a suitable stack frame, and restore this 214 * frame after the exception has been processed. 215 */ 216 217void 218trap(frame) 219 struct trapframe frame; 220{ 221 struct proc *p = curproc; 222 u_quad_t sticks = 0; 223 int i = 0, ucode = 0, type, code; 224 vm_offset_t eva; 225 226 if (!(frame.tf_eflags & PSL_I)) { 227 /* 228 * Buggy application or kernel code has disabled interrupts 229 * and then trapped. Enabling interrupts now is wrong, but 230 * it is better than running with interrupts disabled until 231 * they are accidentally enabled later. 232 */ 233 type = frame.tf_trapno; 234 if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM)) 235 printf( 236 "pid %ld (%s): trap %d with interrupts disabled\n", 237 (long)curproc->p_pid, curproc->p_comm, type); 238 else if (type != T_BPTFLT && type != T_TRCTRAP) 239 /* 240 * XXX not quite right, since this may be for a 241 * multiple fault in user mode. 242 */ 243 printf("kernel trap %d with interrupts disabled\n", 244 type); 245 enable_intr(); 246 } 247 248 eva = 0; 249 if (frame.tf_trapno == T_PAGEFLT) { 250 /* 251 * For some Cyrix CPUs, %cr2 is clobbered by interrupts. 252 * This problem is worked around by using an interrupt 253 * gate for the pagefault handler. We are finally ready 254 * to read %cr2 and then must reenable interrupts. 255 * 256 * XXX this should be in the switch statement, but the 257 * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the 258 * flow of control too much for this to be obviously 259 * correct. 260 */ 261 eva = rcr2(); 262 enable_intr(); 263 } 264 265#if defined(I586_CPU) && !defined(NO_F00F_HACK) 266restart: 267#endif 268 type = frame.tf_trapno; 269 code = frame.tf_err; 270 271#ifdef VM86 272 if (in_vm86call) { 273 if (frame.tf_eflags & PSL_VM && 274 (type == T_PROTFLT || type == T_STKFLT)) { 275 i = vm86_emulate((struct vm86frame *)&frame); 276 if (i != 0) 277 /* 278 * returns to original process 279 */ 280 vm86_trap((struct vm86frame *)&frame); 281 return; 282 } 283 switch (type) { 284 /* 285 * these traps want either a process context, or 286 * assume a normal userspace trap. 287 */ 288 case T_PROTFLT: 289 case T_SEGNPFLT: 290 trap_fatal(&frame, eva); 291 return; 292 case T_TRCTRAP: 293 type = T_BPTFLT; /* kernel breakpoint */ 294 /* FALL THROUGH */ 295 } 296 goto kernel_trap; /* normal kernel trap handling */ 297 } 298#endif 299 300 if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) { 301 /* user trap */ 302 303 sticks = p->p_sticks; 304 p->p_md.md_regs = &frame; 305 306 switch (type) { 307 case T_PRIVINFLT: /* privileged instruction fault */ 308 ucode = type; 309 i = SIGILL; 310 break; 311 312 case T_BPTFLT: /* bpt instruction fault */ 313 case T_TRCTRAP: /* trace trap */ 314 frame.tf_eflags &= ~PSL_T; 315 i = SIGTRAP; 316 break; 317 318 case T_ARITHTRAP: /* arithmetic trap */ 319 ucode = code; 320 i = SIGFPE; 321 break; 322 323 case T_ASTFLT: /* Allow process switch */ 324 astoff(); 325 cnt.v_soft++; 326 if (p->p_flag & P_OWEUPC) { 327 p->p_flag &= ~P_OWEUPC; 328 addupc_task(p, p->p_stats->p_prof.pr_addr, 329 p->p_stats->p_prof.pr_ticks); 330 } 331 goto out; 332 333 /* 334 * The following two traps can happen in 335 * vm86 mode, and, if so, we want to handle 336 * them specially. 337 */ 338 case T_PROTFLT: /* general protection fault */ 339 case T_STKFLT: /* stack fault */ 340#ifdef VM86 341 if (frame.tf_eflags & PSL_VM) { 342 i = vm86_emulate((struct vm86frame *)&frame); 343 if (i == 0) 344 goto out; 345 break; 346 } 347#endif /* VM86 */ 348 /* FALL THROUGH */ 349 350 case T_SEGNPFLT: /* segment not present fault */ 351 case T_TSSFLT: /* invalid TSS fault */ 352 case T_DOUBLEFLT: /* double fault */ 353 default: 354 ucode = code + BUS_SEGM_FAULT ; 355 i = SIGBUS; 356 break; 357 358 case T_PAGEFLT: /* page fault */ 359 i = trap_pfault(&frame, TRUE, eva); 360 if (i == -1) 361 return; 362#if defined(I586_CPU) && !defined(NO_F00F_HACK) 363 if (i == -2) 364 goto restart; 365#endif 366 if (i == 0) 367 goto out; 368 369 ucode = T_PAGEFLT; 370 break; 371 372 case T_DIVIDE: /* integer divide fault */ 373 ucode = FPE_INTDIV_TRAP; 374 i = SIGFPE; 375 break; 376 377#if NISA > 0 378 case T_NMI: 379#ifdef POWERFAIL_NMI 380 goto handle_powerfail; 381#else /* !POWERFAIL_NMI */ 382#ifdef DDB 383 /* NMI can be hooked up to a pushbutton for debugging */ 384 printf ("NMI ... going to debugger\n"); 385 if (kdb_trap (type, 0, &frame)) 386 return; 387#endif /* DDB */ 388 /* machine/parity/power fail/"kitchen sink" faults */ 389 if (isa_nmi(code) == 0) return; 390 panic("NMI indicates hardware failure"); 391#endif /* POWERFAIL_NMI */ 392#endif /* NISA > 0 */ 393 394 case T_OFLOW: /* integer overflow fault */ 395 ucode = FPE_INTOVF_TRAP; 396 i = SIGFPE; 397 break; 398 399 case T_BOUND: /* bounds check fault */ 400 ucode = FPE_SUBRNG_TRAP; 401 i = SIGFPE; 402 break; 403 404 case T_DNA: 405#if NNPX > 0 406 /* if a transparent fault (due to context switch "late") */ 407 if (npxdna()) 408 return; 409#endif 410 if (!pmath_emulate) { 411 i = SIGFPE; 412 ucode = FPE_FPU_NP_TRAP; 413 break; 414 } 415 i = (*pmath_emulate)(&frame); 416 if (i == 0) { 417 if (!(frame.tf_eflags & PSL_T)) 418 return; 419 frame.tf_eflags &= ~PSL_T; 420 i = SIGTRAP; 421 } 422 /* else ucode = emulator_only_knows() XXX */ 423 break; 424 425 case T_FPOPFLT: /* FPU operand fetch fault */ 426 ucode = T_FPOPFLT; 427 i = SIGILL; 428 break; 429 } 430 } else { 431#ifdef VM86 432kernel_trap: 433#endif 434 /* kernel trap */ 435 436 switch (type) { 437 case T_PAGEFLT: /* page fault */ 438 (void) trap_pfault(&frame, FALSE, eva); 439 return; 440 441 case T_DNA: 442#if NNPX > 0 443 /* 444 * The kernel is apparently using npx for copying. 445 * XXX this should be fatal unless the kernel has 446 * registered such use. 447 */ 448 if (npxdna()) 449 return; 450#endif 451 break; 452 453 case T_PROTFLT: /* general protection fault */ 454 case T_SEGNPFLT: /* segment not present fault */ 455 /* 456 * Invalid segment selectors and out of bounds 457 * %eip's and %esp's can be set up in user mode. 458 * This causes a fault in kernel mode when the 459 * kernel tries to return to user mode. We want 460 * to get this fault so that we can fix the 461 * problem here and not have to check all the 462 * selectors and pointers when the user changes 463 * them. 464 */ 465#define MAYBE_DORETI_FAULT(where, whereto) \ 466 do { \ 467 if (frame.tf_eip == (int)where) { \ 468 frame.tf_eip = (int)whereto; \ 469 return; \ 470 } \ 471 } while (0) 472 473 if (intr_nesting_level == 0) { 474 /* 475 * Invalid %fs's and %gs's can be created using 476 * procfs or PT_SETREGS or by invalidating the 477 * underlying LDT entry. This causes a fault 478 * in kernel mode when the kernel attempts to 479 * switch contexts. Lose the bad context 480 * (XXX) so that we can continue, and generate 481 * a signal. 482 */ 483 if (frame.tf_eip == (int)cpu_switch_load_fs) { 484 curpcb->pcb_fs = 0; 485 psignal(p, SIGBUS); 486 return; 487 } 488 if (frame.tf_eip == (int)cpu_switch_load_gs) { 489 curpcb->pcb_gs = 0; 490 psignal(p, SIGBUS); 491 return; 492 } 493 MAYBE_DORETI_FAULT(doreti_iret, 494 doreti_iret_fault); 495 MAYBE_DORETI_FAULT(doreti_popl_ds, 496 doreti_popl_ds_fault); 497 MAYBE_DORETI_FAULT(doreti_popl_es, 498 doreti_popl_es_fault); 499 if (curpcb && curpcb->pcb_onfault) { 500 frame.tf_eip = (int)curpcb->pcb_onfault; 501 return; 502 } 503 } 504 break; 505 506 case T_TSSFLT: 507 /* 508 * PSL_NT can be set in user mode and isn't cleared 509 * automatically when the kernel is entered. This 510 * causes a TSS fault when the kernel attempts to 511 * `iret' because the TSS link is uninitialized. We 512 * want to get this fault so that we can fix the 513 * problem here and not every time the kernel is 514 * entered. 515 */ 516 if (frame.tf_eflags & PSL_NT) { 517 frame.tf_eflags &= ~PSL_NT; 518 return; 519 } 520 break; 521 522 case T_TRCTRAP: /* trace trap */ 523 if (frame.tf_eip == (int)IDTVEC(syscall)) { 524 /* 525 * We've just entered system mode via the 526 * syscall lcall. Continue single stepping 527 * silently until the syscall handler has 528 * saved the flags. 529 */ 530 return; 531 } 532 if (frame.tf_eip == (int)IDTVEC(syscall) + 1) { 533 /* 534 * The syscall handler has now saved the 535 * flags. Stop single stepping it. 536 */ 537 frame.tf_eflags &= ~PSL_T; 538 return; 539 } 540 /* 541 * Fall through. 542 */ 543 case T_BPTFLT: 544 /* 545 * If DDB is enabled, let it handle the debugger trap. 546 * Otherwise, debugger traps "can't happen". 547 */ 548#ifdef DDB 549 if (kdb_trap (type, 0, &frame)) 550 return; 551#endif 552 break; 553 554#if NISA > 0 555 case T_NMI: 556#ifdef POWERFAIL_NMI 557#ifndef TIMER_FREQ 558# define TIMER_FREQ 1193182 559#endif 560 handle_powerfail: 561 { 562 static unsigned lastalert = 0; 563 564 if(time_second - lastalert > 10) 565 { 566 log(LOG_WARNING, "NMI: power fail\n"); 567 sysbeep(TIMER_FREQ/880, hz); 568 lastalert = time_second; 569 } 570 return; 571 } 572#else /* !POWERFAIL_NMI */ 573#ifdef DDB 574 /* NMI can be hooked up to a pushbutton for debugging */ 575 printf ("NMI ... going to debugger\n"); 576 if (kdb_trap (type, 0, &frame)) 577 return; 578#endif /* DDB */ 579 /* machine/parity/power fail/"kitchen sink" faults */ 580 if (isa_nmi(code) == 0) return; 581 /* FALL THROUGH */ 582#endif /* POWERFAIL_NMI */ 583#endif /* NISA > 0 */ 584 } 585 586 trap_fatal(&frame, eva); 587 return; 588 } 589 590 /* Translate fault for emulators (e.g. Linux) */ 591 if (*p->p_sysent->sv_transtrap) 592 i = (*p->p_sysent->sv_transtrap)(i, type); 593 594 trapsignal(p, i, ucode); 595 596#ifdef DEBUG 597 if (type <= MAX_TRAP_MSG) { 598 uprintf("fatal process exception: %s", 599 trap_msg[type]); 600 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 601 uprintf(", fault VA = 0x%lx", (u_long)eva); 602 uprintf("\n"); 603 } 604#endif 605 606out: 607 userret(p, &frame, sticks); 608} 609 610#ifdef notyet 611/* 612 * This version doesn't allow a page fault to user space while 613 * in the kernel. The rest of the kernel needs to be made "safe" 614 * before this can be used. I think the only things remaining 615 * to be made safe are the iBCS2 code and the process tracing/ 616 * debugging code. 617 */ 618static int 619trap_pfault(frame, usermode, eva) 620 struct trapframe *frame; 621 int usermode; 622 vm_offset_t eva; 623{ 624 vm_offset_t va; 625 struct vmspace *vm = NULL; 626 vm_map_t map = 0; 627 int rv = 0; 628 vm_prot_t ftype; 629 struct proc *p = curproc; 630 631 if (frame->tf_err & PGEX_W) 632 ftype = VM_PROT_READ | VM_PROT_WRITE; 633 else 634 ftype = VM_PROT_READ; 635 636 va = trunc_page(eva); 637 if (va < VM_MIN_KERNEL_ADDRESS) { 638 vm_offset_t v; 639 vm_page_t mpte; 640 641 if (p == NULL || 642 (!usermode && va < VM_MAXUSER_ADDRESS && 643 (intr_nesting_level != 0 || curpcb == NULL || 644 curpcb->pcb_onfault == NULL))) { 645 trap_fatal(frame, eva); 646 return (-1); 647 } 648 649 /* 650 * This is a fault on non-kernel virtual memory. 651 * vm is initialized above to NULL. If curproc is NULL 652 * or curproc->p_vmspace is NULL the fault is fatal. 653 */ 654 vm = p->p_vmspace; 655 if (vm == NULL) 656 goto nogo; 657 658 map = &vm->vm_map; 659 660 /* 661 * Keep swapout from messing with us during this 662 * critical time. 663 */ 664 ++p->p_lock; 665 666 /* 667 * Grow the stack if necessary 668 */ 669#ifndef VM_STACK 670 if ((caddr_t)va > vm->vm_maxsaddr && va < USRSTACK) { 671 if (!grow(p, va)) { 672 rv = KERN_FAILURE; 673 --p->p_lock; 674 goto nogo; 675 } 676 } 677 678#else 679 /* grow_stack returns false only if va falls into 680 * a growable stack region and the stack growth 681 * fails. It returns true if va was not within 682 * a growable stack region, or if the stack 683 * growth succeeded. 684 */ 685 if (!grow_stack (p, va)) { 686 rv = KERN_FAILURE; 687 --p->p_lock; 688 goto nogo; 689 } 690#endif 691 692 /* Fault in the user page: */ 693 rv = vm_fault(map, va, ftype, 694 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0); 695 696 --p->p_lock; 697 } else { 698 /* 699 * Don't allow user-mode faults in kernel address space. 700 */ 701 if (usermode) 702 goto nogo; 703 704 /* 705 * Since we know that kernel virtual address addresses 706 * always have pte pages mapped, we just have to fault 707 * the page. 708 */ 709 rv = vm_fault(kernel_map, va, ftype, FALSE); 710 } 711 712 if (rv == KERN_SUCCESS) 713 return (0); 714nogo: 715 if (!usermode) { 716 if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) { 717 frame->tf_eip = (int)curpcb->pcb_onfault; 718 return (0); 719 } 720 trap_fatal(frame, eva); 721 return (-1); 722 } 723 724 /* kludge to pass faulting virtual address to sendsig */ 725 frame->tf_err = eva; 726 727 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 728} 729#endif 730 731int 732trap_pfault(frame, usermode, eva) 733 struct trapframe *frame; 734 int usermode; 735 vm_offset_t eva; 736{ 737 vm_offset_t va; 738 struct vmspace *vm = NULL; 739 vm_map_t map = 0; 740 int rv = 0; 741 vm_prot_t ftype; 742 struct proc *p = curproc; 743 744 va = trunc_page(eva); 745 if (va >= KERNBASE) { 746 /* 747 * Don't allow user-mode faults in kernel address space. 748 * An exception: if the faulting address is the invalid 749 * instruction entry in the IDT, then the Intel Pentium 750 * F00F bug workaround was triggered, and we need to 751 * treat it is as an illegal instruction, and not a page 752 * fault. 753 */ 754#if defined(I586_CPU) && !defined(NO_F00F_HACK) 755 if ((eva == (unsigned int)&t_idt[6]) && has_f00f_bug) { 756 frame->tf_trapno = T_PRIVINFLT; 757 return -2; 758 } 759#endif 760 if (usermode) 761 goto nogo; 762 763 map = kernel_map; 764 } else { 765 /* 766 * This is a fault on non-kernel virtual memory. 767 * vm is initialized above to NULL. If curproc is NULL 768 * or curproc->p_vmspace is NULL the fault is fatal. 769 */ 770 if (p != NULL) 771 vm = p->p_vmspace; 772 773 if (vm == NULL) 774 goto nogo; 775 776 map = &vm->vm_map; 777 } 778 779 if (frame->tf_err & PGEX_W) 780 ftype = VM_PROT_READ | VM_PROT_WRITE; 781 else 782 ftype = VM_PROT_READ; 783 784 if (map != kernel_map) { 785 /* 786 * Keep swapout from messing with us during this 787 * critical time. 788 */ 789 ++p->p_lock; 790 791 /* 792 * Grow the stack if necessary 793 */ 794#ifndef VM_STACK 795 if ((caddr_t)va > vm->vm_maxsaddr && va < USRSTACK) { 796 if (!grow(p, va)) { 797 rv = KERN_FAILURE; 798 --p->p_lock; 799 goto nogo; 800 } 801 } 802#else 803 /* grow_stack returns false only if va falls into 804 * a growable stack region and the stack growth 805 * fails. It returns true if va was not within 806 * a growable stack region, or if the stack 807 * growth succeeded. 808 */ 809 if (!grow_stack (p, va)) { 810 rv = KERN_FAILURE; 811 --p->p_lock; 812 goto nogo; 813 } 814#endif 815 816 /* Fault in the user page: */ 817 rv = vm_fault(map, va, ftype, 818 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0); 819 820 --p->p_lock; 821 } else { 822 /* 823 * Don't have to worry about process locking or stacks in the kernel. 824 */ 825 rv = vm_fault(map, va, ftype, FALSE); 826 } 827 828 if (rv == KERN_SUCCESS) 829 return (0); 830nogo: 831 if (!usermode) { 832 if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) { 833 frame->tf_eip = (int)curpcb->pcb_onfault; 834 return (0); 835 } 836 trap_fatal(frame, eva); 837 return (-1); 838 } 839 840 /* kludge to pass faulting virtual address to sendsig */ 841 frame->tf_err = eva; 842 843 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 844} 845 846static void 847trap_fatal(frame, eva) 848 struct trapframe *frame; 849 vm_offset_t eva; 850{ 851 int code, type, ss, esp; 852 struct soft_segment_descriptor softseg; 853 854 code = frame->tf_err; 855 type = frame->tf_trapno; 856 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); 857 858 if (type <= MAX_TRAP_MSG) 859 printf("\n\nFatal trap %d: %s while in %s mode\n", 860 type, trap_msg[type], 861 frame->tf_eflags & PSL_VM ? "vm86" : 862 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); 863#ifdef SMP 864 /* three seperate prints in case of a trap on an unmapped page */ 865 printf("mp_lock = %08x; ", mp_lock); 866 printf("cpuid = %d; ", cpuid); 867 printf("lapic.id = %08x\n", lapic.id); 868#endif 869 if (type == T_PAGEFLT) { 870 printf("fault virtual address = 0x%x\n", eva); 871 printf("fault code = %s %s, %s\n", 872 code & PGEX_U ? "user" : "supervisor", 873 code & PGEX_W ? "write" : "read", 874 code & PGEX_P ? "protection violation" : "page not present"); 875 } 876 printf("instruction pointer = 0x%x:0x%x\n", 877 frame->tf_cs & 0xffff, frame->tf_eip); 878 if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) { 879 ss = frame->tf_ss & 0xffff; 880 esp = frame->tf_esp; 881 } else { 882 ss = GSEL(GDATA_SEL, SEL_KPL); 883 esp = (int)&frame->tf_esp; 884 } 885 printf("stack pointer = 0x%x:0x%x\n", ss, esp); 886 printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); 887 printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", 888 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); 889 printf(" = DPL %d, pres %d, def32 %d, gran %d\n", 890 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, 891 softseg.ssd_gran); 892 printf("processor eflags = "); 893 if (frame->tf_eflags & PSL_T) 894 printf("trace trap, "); 895 if (frame->tf_eflags & PSL_I) 896 printf("interrupt enabled, "); 897 if (frame->tf_eflags & PSL_NT) 898 printf("nested task, "); 899 if (frame->tf_eflags & PSL_RF) 900 printf("resume, "); 901 if (frame->tf_eflags & PSL_VM) 902 printf("vm86, "); 903 printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); 904 printf("current process = "); 905 if (curproc) { 906 printf("%lu (%s)\n", 907 (u_long)curproc->p_pid, curproc->p_comm ? 908 curproc->p_comm : ""); 909 } else { 910 printf("Idle\n"); 911 } 912 printf("interrupt mask = "); 913 if ((cpl & net_imask) == net_imask) 914 printf("net "); 915 if ((cpl & tty_imask) == tty_imask) 916 printf("tty "); 917 if ((cpl & bio_imask) == bio_imask) 918 printf("bio "); 919 if ((cpl & cam_imask) == cam_imask) 920 printf("cam "); 921 if (cpl == 0) 922 printf("none"); 923#ifdef SMP 924/** 925 * XXX FIXME: 926 * we probably SHOULD have stopped the other CPUs before now! 927 * another CPU COULD have been touching cpl at this moment... 928 */ 929 printf(" <- SMP: XXX"); 930#endif 931 printf("\n"); 932 933#ifdef KDB 934 if (kdb_trap(&psl)) 935 return; 936#endif 937#ifdef DDB 938 if ((debugger_on_panic || in_Debugger) && kdb_trap(type, 0, frame)) 939 return; 940#endif 941 printf("trap number = %d\n", type); 942 if (type <= MAX_TRAP_MSG) 943 panic(trap_msg[type]); 944 else 945 panic("unknown/reserved trap"); 946} 947 948/* 949 * Double fault handler. Called when a fault occurs while writing 950 * a frame for a trap/exception onto the stack. This usually occurs 951 * when the stack overflows (such is the case with infinite recursion, 952 * for example). 953 * 954 * XXX Note that the current PTD gets replaced by IdlePTD when the 955 * task switch occurs. This means that the stack that was active at 956 * the time of the double fault is not available at <kstack> unless 957 * the machine was idle when the double fault occurred. The downside 958 * of this is that "trace <ebp>" in ddb won't work. 959 */ 960void 961dblfault_handler() 962{ 963 printf("\nFatal double fault:\n"); 964 printf("eip = 0x%x\n", common_tss.tss_eip); 965 printf("esp = 0x%x\n", common_tss.tss_esp); 966 printf("ebp = 0x%x\n", common_tss.tss_ebp); 967#ifdef SMP 968 /* three seperate prints in case of a trap on an unmapped page */ 969 printf("mp_lock = %08x; ", mp_lock); 970 printf("cpuid = %d; ", cpuid); 971 printf("lapic.id = %08x\n", lapic.id); 972#endif 973 panic("double fault"); 974} 975 976/* 977 * Compensate for 386 brain damage (missing URKR). 978 * This is a little simpler than the pagefault handler in trap() because 979 * it the page tables have already been faulted in and high addresses 980 * are thrown out early for other reasons. 981 */ 982int trapwrite(addr) 983 unsigned addr; 984{ 985 struct proc *p; 986 vm_offset_t va; 987 struct vmspace *vm; 988 int rv; 989 990 va = trunc_page((vm_offset_t)addr); 991 /* 992 * XXX - MAX is END. Changed > to >= for temp. fix. 993 */ 994 if (va >= VM_MAXUSER_ADDRESS) 995 return (1); 996 997 p = curproc; 998 vm = p->p_vmspace; 999 1000 ++p->p_lock; 1001 1002#ifndef VM_STACK 1003 if ((caddr_t)va >= vm->vm_maxsaddr && va < USRSTACK) { 1004 if (!grow(p, va)) { 1005 --p->p_lock; 1006 return (1); 1007 } 1008 } 1009#else 1010 if (!grow_stack (p, va)) { 1011 --p->p_lock; 1012 return (1); 1013 } 1014#endif 1015 1016 /* 1017 * fault the data page 1018 */ 1019 rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, VM_FAULT_DIRTY); 1020 1021 --p->p_lock; 1022 1023 if (rv != KERN_SUCCESS) 1024 return 1; 1025 1026 return (0); 1027} 1028 1029/* 1030 * System call request from POSIX system call gate interface to kernel. 1031 * Like trap(), argument is call by reference. 1032 */ 1033void 1034syscall(frame) 1035 struct trapframe frame; 1036{ 1037 caddr_t params; 1038 int i; 1039 struct sysent *callp; 1040 struct proc *p = curproc; 1041 u_quad_t sticks; 1042 int error; 1043 int args[8]; 1044 u_int code; 1045 1046#ifdef DIAGNOSTIC 1047 if (ISPL(frame.tf_cs) != SEL_UPL) 1048 panic("syscall"); 1049#endif 1050 sticks = p->p_sticks; 1051 p->p_md.md_regs = &frame; 1052 params = (caddr_t)frame.tf_esp + sizeof(int); 1053 code = frame.tf_eax; 1054 if (p->p_sysent->sv_prepsyscall) { 1055 (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); 1056 } else { 1057 /* 1058 * Need to check if this is a 32 bit or 64 bit syscall. 1059 */ 1060 if (code == SYS_syscall) { 1061 /* 1062 * Code is first argument, followed by actual args. 1063 */ 1064 code = fuword(params); 1065 params += sizeof(int); 1066 } else if (code == SYS___syscall) { 1067 /* 1068 * Like syscall, but code is a quad, so as to maintain 1069 * quad alignment for the rest of the arguments. 1070 */ 1071 code = fuword(params); 1072 params += sizeof(quad_t); 1073 } 1074 } 1075 1076 if (p->p_sysent->sv_mask) 1077 code &= p->p_sysent->sv_mask; 1078 1079 if (code >= p->p_sysent->sv_size) 1080 callp = &p->p_sysent->sv_table[0]; 1081 else 1082 callp = &p->p_sysent->sv_table[code]; 1083 1084 if (params && (i = callp->sy_narg * sizeof(int)) && 1085 (error = copyin(params, (caddr_t)args, (u_int)i))) { 1086#ifdef KTRACE 1087 if (KTRPOINT(p, KTR_SYSCALL)) 1088 ktrsyscall(p->p_tracep, code, callp->sy_narg, args); 1089#endif 1090 goto bad; 1091 } 1092#ifdef KTRACE 1093 if (KTRPOINT(p, KTR_SYSCALL)) 1094 ktrsyscall(p->p_tracep, code, callp->sy_narg, args); 1095#endif 1096 p->p_retval[0] = 0; 1097 p->p_retval[1] = frame.tf_edx; 1098 1099 STOPEVENT(p, S_SCE, callp->sy_narg); 1100 1101 error = (*callp->sy_call)(p, args); 1102 1103 switch (error) { 1104 1105 case 0: 1106 /* 1107 * Reinitialize proc pointer `p' as it may be different 1108 * if this is a child returning from fork syscall. 1109 */ 1110 p = curproc; 1111 frame.tf_eax = p->p_retval[0]; 1112 frame.tf_edx = p->p_retval[1]; 1113 frame.tf_eflags &= ~PSL_C; 1114 break; 1115 1116 case ERESTART: 1117 /* 1118 * Reconstruct pc, assuming lcall $X,y is 7 bytes, 1119 * int 0x80 is 2 bytes. We saved this in tf_err. 1120 */ 1121 frame.tf_eip -= frame.tf_err; 1122 break; 1123 1124 case EJUSTRETURN: 1125 break; 1126 1127 default: 1128bad: 1129 if (p->p_sysent->sv_errsize) 1130 if (error >= p->p_sysent->sv_errsize) 1131 error = -1; /* XXX */ 1132 else 1133 error = p->p_sysent->sv_errtbl[error]; 1134 frame.tf_eax = error; 1135 frame.tf_eflags |= PSL_C; 1136 break; 1137 } 1138 1139 if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) { 1140 /* Traced syscall. */ 1141 frame.tf_eflags &= ~PSL_T; 1142 trapsignal(p, SIGTRAP, 0); 1143 } 1144 1145 userret(p, &frame, sticks); 1146 1147#ifdef KTRACE 1148 if (KTRPOINT(p, KTR_SYSRET)) 1149 ktrsysret(p->p_tracep, code, error, p->p_retval[0]); 1150#endif 1151 1152 /* 1153 * This works because errno is findable through the 1154 * register set. If we ever support an emulation where this 1155 * is not the case, this code will need to be revisited. 1156 */ 1157 STOPEVENT(p, S_SCX, code); 1158 1159} 1160 1161/* 1162 * Simplified back end of syscall(), used when returning from fork() 1163 * directly into user mode. 1164 */ 1165void 1166fork_return(p, frame) 1167 struct proc *p; 1168 struct trapframe frame; 1169{ 1170 frame.tf_eax = 0; /* Child returns zero */ 1171 frame.tf_eflags &= ~PSL_C; /* success */ 1172 frame.tf_edx = 1; 1173 1174 userret(p, &frame, 0); 1175#ifdef KTRACE 1176 if (KTRPOINT(p, KTR_SYSRET)) 1177 ktrsysret(p->p_tracep, SYS_fork, 0, 0); 1178#endif 1179} 1180