subr_syscall.c revision 45821
1/*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 * $Id: trap.c,v 1.134 1999/03/09 20:20:09 phk Exp $ 39 */ 40 41/* 42 * 386 Trap and System call handling 43 */ 44 45#include "opt_cpu.h" 46#include "opt_ddb.h" 47#include "opt_ktrace.h" 48#include "opt_clock.h" 49#include "opt_trap.h" 50#include "opt_vm86.h" 51 52#include <sys/param.h> 53#include <sys/systm.h> 54#include <sys/proc.h> 55#include <sys/pioctl.h> 56#include <sys/kernel.h> 57#include <sys/resourcevar.h> 58#include <sys/signalvar.h> 59#include <sys/syscall.h> 60#include <sys/sysent.h> 61#include <sys/uio.h> 62#include <sys/vmmeter.h> 63#ifdef KTRACE 64#include <sys/ktrace.h> 65#endif 66 67#include <vm/vm.h> 68#include <vm/vm_param.h> 69#include <vm/vm_prot.h> 70#include <sys/lock.h> 71#include <vm/pmap.h> 72#include <vm/vm_kern.h> 73#include <vm/vm_map.h> 74#include <vm/vm_page.h> 75#include <vm/vm_extern.h> 76 77#include <machine/cpu.h> 78#include <machine/ipl.h> 79#include <machine/md_var.h> 80#include <machine/pcb.h> 81#ifdef SMP 82#include <machine/smp.h> 83#endif 84#include <machine/tss.h> 85 86#include <i386/isa/intr_machdep.h> 87 88#ifdef POWERFAIL_NMI 89#include <sys/syslog.h> 90#include <machine/clock.h> 91#endif 92 93#ifdef VM86 94#include <machine/vm86.h> 95#endif 96 97#ifdef DDB 98 extern int in_Debugger, debugger_on_panic; 99#endif 100 101#include "isa.h" 102#include "npx.h" 103 104extern struct i386tss common_tss; 105 106int (*pmath_emulate) __P((struct trapframe *)); 107 108extern void trap __P((struct trapframe frame)); 109extern int trapwrite __P((unsigned addr)); 110extern void syscall __P((struct trapframe frame)); 111 112static int trap_pfault __P((struct trapframe *, int, vm_offset_t)); 113static void trap_fatal __P((struct trapframe *, vm_offset_t)); 114void dblfault_handler __P((void)); 115 116extern inthand_t IDTVEC(syscall); 117 118#define MAX_TRAP_MSG 28 119static char *trap_msg[] = { 120 "", /* 0 unused */ 121 "privileged instruction fault", /* 1 T_PRIVINFLT */ 122 "", /* 2 unused */ 123 "breakpoint instruction fault", /* 3 T_BPTFLT */ 124 "", /* 4 unused */ 125 "", /* 5 unused */ 126 "arithmetic trap", /* 6 T_ARITHTRAP */ 127 "system forced exception", /* 7 T_ASTFLT */ 128 "", /* 8 unused */ 129 "general protection fault", /* 9 T_PROTFLT */ 130 "trace trap", /* 10 T_TRCTRAP */ 131 "", /* 11 unused */ 132 "page fault", /* 12 T_PAGEFLT */ 133 "", /* 13 unused */ 134 "alignment fault", /* 14 T_ALIGNFLT */ 135 "", /* 15 unused */ 136 "", /* 16 unused */ 137 "", /* 17 unused */ 138 "integer divide fault", /* 18 T_DIVIDE */ 139 "non-maskable interrupt trap", /* 19 T_NMI */ 140 "overflow trap", /* 20 T_OFLOW */ 141 "FPU bounds check fault", /* 21 T_BOUND */ 142 "FPU device not available", /* 22 T_DNA */ 143 "double fault", /* 23 T_DOUBLEFLT */ 144 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 145 "invalid TSS fault", /* 25 T_TSSFLT */ 146 "segment not present fault", /* 26 T_SEGNPFLT */ 147 "stack fault", /* 27 T_STKFLT */ 148 "machine check trap", /* 28 T_MCHK */ 149}; 150 151static __inline void userret __P((struct proc *p, struct trapframe *frame, 152 u_quad_t oticks)); 153 154#if defined(I586_CPU) && !defined(NO_F00F_HACK) 155extern struct gate_descriptor *t_idt; 156extern int has_f00f_bug; 157#endif 158 159static __inline void 160userret(p, frame, oticks) 161 struct proc *p; 162 struct trapframe *frame; 163 u_quad_t oticks; 164{ 165 int sig, s; 166 167 while ((sig = CURSIG(p)) != 0) 168 postsig(sig); 169 170#if 0 171 if (!want_resched && 172 (p->p_priority <= p->p_usrpri) && 173 (p->p_rtprio.type == RTP_PRIO_NORMAL)) { 174 int newpriority; 175 p->p_estcpu += 1; 176 newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice; 177 newpriority = min(newpriority, MAXPRI); 178 p->p_usrpri = newpriority; 179 } 180#endif 181 182 p->p_priority = p->p_usrpri; 183 if (want_resched) { 184 /* 185 * Since we are curproc, clock will normally just change 186 * our priority without moving us from one queue to another 187 * (since the running process is not on a queue.) 188 * If that happened after we setrunqueue ourselves but before we 189 * mi_switch()'ed, we might not be on the queue indicated by 190 * our priority. 191 */ 192 s = splhigh(); 193 setrunqueue(p); 194 p->p_stats->p_ru.ru_nivcsw++; 195 mi_switch(); 196 splx(s); 197 while ((sig = CURSIG(p)) != 0) 198 postsig(sig); 199 } 200 /* 201 * Charge system time if profiling. 202 */ 203 if (p->p_flag & P_PROFIL) 204 addupc_task(p, frame->tf_eip, 205 (u_int)(p->p_sticks - oticks) * psratio); 206 207 curpriority = p->p_priority; 208} 209 210/* 211 * Exception, fault, and trap interface to the FreeBSD kernel. 212 * This common code is called from assembly language IDT gate entry 213 * routines that prepare a suitable stack frame, and restore this 214 * frame after the exception has been processed. 215 */ 216 217void 218trap(frame) 219 struct trapframe frame; 220{ 221 struct proc *p = curproc; 222 u_quad_t sticks = 0; 223 int i = 0, ucode = 0, type, code; 224 vm_offset_t eva; 225 226 if (!(frame.tf_eflags & PSL_I)) { 227 /* 228 * Buggy application or kernel code has disabled interrupts 229 * and then trapped. Enabling interrupts now is wrong, but 230 * it is better than running with interrupts disabled until 231 * they are accidentally enabled later. 232 */ 233 type = frame.tf_trapno; 234 if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM)) 235 printf( 236 "pid %ld (%s): trap %d with interrupts disabled\n", 237 (long)curproc->p_pid, curproc->p_comm, type); 238 else if (type != T_BPTFLT && type != T_TRCTRAP) 239 /* 240 * XXX not quite right, since this may be for a 241 * multiple fault in user mode. 242 */ 243 printf("kernel trap %d with interrupts disabled\n", 244 type); 245 enable_intr(); 246 } 247 248 eva = 0; 249 if (frame.tf_trapno == T_PAGEFLT) { 250 /* 251 * For some Cyrix CPUs, %cr2 is clobbered by interrupts. 252 * This problem is worked around by using an interrupt 253 * gate for the pagefault handler. We are finally ready 254 * to read %cr2 and then must reenable interrupts. 255 * 256 * XXX this should be in the switch statement, but the 257 * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the 258 * flow of control too much for this to be obviously 259 * correct. 260 */ 261 eva = rcr2(); 262 enable_intr(); 263 } 264 265#if defined(I586_CPU) && !defined(NO_F00F_HACK) 266restart: 267#endif 268 type = frame.tf_trapno; 269 code = frame.tf_err; 270 271#ifdef VM86 272 if (in_vm86call) { 273 if (frame.tf_eflags & PSL_VM && 274 (type == T_PROTFLT || type == T_STKFLT)) { 275 i = vm86_emulate((struct vm86frame *)&frame); 276 if (i != 0) 277 /* 278 * returns to original process 279 */ 280 vm86_trap((struct vm86frame *)&frame); 281 return; 282 } 283 switch (type) { 284 /* 285 * these traps want either a process context, or 286 * assume a normal userspace trap. 287 */ 288 case T_PROTFLT: 289 case T_SEGNPFLT: 290 trap_fatal(&frame, eva); 291 return; 292 case T_TRCTRAP: 293 type = T_BPTFLT; /* kernel breakpoint */ 294 /* FALL THROUGH */ 295 } 296 goto kernel_trap; /* normal kernel trap handling */ 297 } 298#endif 299 300 if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) { 301 /* user trap */ 302 303 sticks = p->p_sticks; 304 p->p_md.md_regs = &frame; 305 306 switch (type) { 307 case T_PRIVINFLT: /* privileged instruction fault */ 308 ucode = type; 309 i = SIGILL; 310 break; 311 312 case T_BPTFLT: /* bpt instruction fault */ 313 case T_TRCTRAP: /* trace trap */ 314 frame.tf_eflags &= ~PSL_T; 315 i = SIGTRAP; 316 break; 317 318 case T_ARITHTRAP: /* arithmetic trap */ 319 ucode = code; 320 i = SIGFPE; 321 break; 322 323 case T_ASTFLT: /* Allow process switch */ 324 astoff(); 325 cnt.v_soft++; 326 if (p->p_flag & P_OWEUPC) { 327 p->p_flag &= ~P_OWEUPC; 328 addupc_task(p, p->p_stats->p_prof.pr_addr, 329 p->p_stats->p_prof.pr_ticks); 330 } 331 goto out; 332 333 /* 334 * The following two traps can happen in 335 * vm86 mode, and, if so, we want to handle 336 * them specially. 337 */ 338 case T_PROTFLT: /* general protection fault */ 339 case T_STKFLT: /* stack fault */ 340#ifdef VM86 341 if (frame.tf_eflags & PSL_VM) { 342 i = vm86_emulate((struct vm86frame *)&frame); 343 if (i == 0) 344 goto out; 345 break; 346 } 347#endif /* VM86 */ 348 /* FALL THROUGH */ 349 350 case T_SEGNPFLT: /* segment not present fault */ 351 case T_TSSFLT: /* invalid TSS fault */ 352 case T_DOUBLEFLT: /* double fault */ 353 default: 354 ucode = code + BUS_SEGM_FAULT ; 355 i = SIGBUS; 356 break; 357 358 case T_PAGEFLT: /* page fault */ 359 i = trap_pfault(&frame, TRUE, eva); 360 if (i == -1) 361 return; 362#if defined(I586_CPU) && !defined(NO_F00F_HACK) 363 if (i == -2) 364 goto restart; 365#endif 366 if (i == 0) 367 goto out; 368 369 ucode = T_PAGEFLT; 370 break; 371 372 case T_DIVIDE: /* integer divide fault */ 373 ucode = FPE_INTDIV_TRAP; 374 i = SIGFPE; 375 break; 376 377#if NISA > 0 378 case T_NMI: 379#ifdef POWERFAIL_NMI 380 goto handle_powerfail; 381#else /* !POWERFAIL_NMI */ 382#ifdef DDB 383 /* NMI can be hooked up to a pushbutton for debugging */ 384 printf ("NMI ... going to debugger\n"); 385 if (kdb_trap (type, 0, &frame)) 386 return; 387#endif /* DDB */ 388 /* machine/parity/power fail/"kitchen sink" faults */ 389 if (isa_nmi(code) == 0) return; 390 panic("NMI indicates hardware failure"); 391#endif /* POWERFAIL_NMI */ 392#endif /* NISA > 0 */ 393 394 case T_OFLOW: /* integer overflow fault */ 395 ucode = FPE_INTOVF_TRAP; 396 i = SIGFPE; 397 break; 398 399 case T_BOUND: /* bounds check fault */ 400 ucode = FPE_SUBRNG_TRAP; 401 i = SIGFPE; 402 break; 403 404 case T_DNA: 405#if NNPX > 0 406 /* if a transparent fault (due to context switch "late") */ 407 if (npxdna()) 408 return; 409#endif 410 if (!pmath_emulate) { 411 i = SIGFPE; 412 ucode = FPE_FPU_NP_TRAP; 413 break; 414 } 415 i = (*pmath_emulate)(&frame); 416 if (i == 0) { 417 if (!(frame.tf_eflags & PSL_T)) 418 return; 419 frame.tf_eflags &= ~PSL_T; 420 i = SIGTRAP; 421 } 422 /* else ucode = emulator_only_knows() XXX */ 423 break; 424 425 case T_FPOPFLT: /* FPU operand fetch fault */ 426 ucode = T_FPOPFLT; 427 i = SIGILL; 428 break; 429 } 430 } else { 431#ifdef VM86 432kernel_trap: 433#endif 434 /* kernel trap */ 435 436 switch (type) { 437 case T_PAGEFLT: /* page fault */ 438 (void) trap_pfault(&frame, FALSE, eva); 439 return; 440 441 case T_DNA: 442#if NNPX > 0 443 /* 444 * The kernel is apparently using npx for copying. 445 * XXX this should be fatal unless the kernel has 446 * registered such use. 447 */ 448 if (npxdna()) 449 return; 450#endif 451 break; 452 453 case T_PROTFLT: /* general protection fault */ 454 case T_SEGNPFLT: /* segment not present fault */ 455 /* 456 * Invalid segment selectors and out of bounds 457 * %eip's and %esp's can be set up in user mode. 458 * This causes a fault in kernel mode when the 459 * kernel tries to return to user mode. We want 460 * to get this fault so that we can fix the 461 * problem here and not have to check all the 462 * selectors and pointers when the user changes 463 * them. 464 */ 465#define MAYBE_DORETI_FAULT(where, whereto) \ 466 do { \ 467 if (frame.tf_eip == (int)where) { \ 468 frame.tf_eip = (int)whereto; \ 469 return; \ 470 } \ 471 } while (0) 472 473 if (intr_nesting_level == 0) { 474 /* 475 * Invalid %fs's and %gs's can be created using 476 * procfs or PT_SETREGS or by invalidating the 477 * underlying LDT entry. This causes a fault 478 * in kernel mode when the kernel attempts to 479 * switch contexts. Lose the bad context 480 * (XXX) so that we can continue, and generate 481 * a signal. 482 */ 483 if (frame.tf_eip == (int)cpu_switch_load_fs) { 484 curpcb->pcb_fs = 0; 485 psignal(p, SIGBUS); 486 return; 487 } 488 if (frame.tf_eip == (int)cpu_switch_load_gs) { 489 curpcb->pcb_gs = 0; 490 psignal(p, SIGBUS); 491 return; 492 } 493 MAYBE_DORETI_FAULT(doreti_iret, 494 doreti_iret_fault); 495 MAYBE_DORETI_FAULT(doreti_popl_ds, 496 doreti_popl_ds_fault); 497 MAYBE_DORETI_FAULT(doreti_popl_es, 498 doreti_popl_es_fault); 499 if (curpcb && curpcb->pcb_onfault) { 500 frame.tf_eip = (int)curpcb->pcb_onfault; 501 return; 502 } 503 } 504 break; 505 506 case T_TSSFLT: 507 /* 508 * PSL_NT can be set in user mode and isn't cleared 509 * automatically when the kernel is entered. This 510 * causes a TSS fault when the kernel attempts to 511 * `iret' because the TSS link is uninitialized. We 512 * want to get this fault so that we can fix the 513 * problem here and not every time the kernel is 514 * entered. 515 */ 516 if (frame.tf_eflags & PSL_NT) { 517 frame.tf_eflags &= ~PSL_NT; 518 return; 519 } 520 break; 521 522 case T_TRCTRAP: /* trace trap */ 523 if (frame.tf_eip == (int)IDTVEC(syscall)) { 524 /* 525 * We've just entered system mode via the 526 * syscall lcall. Continue single stepping 527 * silently until the syscall handler has 528 * saved the flags. 529 */ 530 return; 531 } 532 if (frame.tf_eip == (int)IDTVEC(syscall) + 1) { 533 /* 534 * The syscall handler has now saved the 535 * flags. Stop single stepping it. 536 */ 537 frame.tf_eflags &= ~PSL_T; 538 return; 539 } 540 /* 541 * Fall through. 542 */ 543 case T_BPTFLT: 544 /* 545 * If DDB is enabled, let it handle the debugger trap. 546 * Otherwise, debugger traps "can't happen". 547 */ 548#ifdef DDB 549 if (kdb_trap (type, 0, &frame)) 550 return; 551#endif 552 break; 553 554#if NISA > 0 555 case T_NMI: 556#ifdef POWERFAIL_NMI 557#ifndef TIMER_FREQ 558# define TIMER_FREQ 1193182 559#endif 560 handle_powerfail: 561 { 562 static unsigned lastalert = 0; 563 564 if(time_second - lastalert > 10) 565 { 566 log(LOG_WARNING, "NMI: power fail\n"); 567 sysbeep(TIMER_FREQ/880, hz); 568 lastalert = time_second; 569 } 570 return; 571 } 572#else /* !POWERFAIL_NMI */ 573#ifdef DDB 574 /* NMI can be hooked up to a pushbutton for debugging */ 575 printf ("NMI ... going to debugger\n"); 576 if (kdb_trap (type, 0, &frame)) 577 return; 578#endif /* DDB */ 579 /* machine/parity/power fail/"kitchen sink" faults */ 580 if (isa_nmi(code) == 0) return; 581 /* FALL THROUGH */ 582#endif /* POWERFAIL_NMI */ 583#endif /* NISA > 0 */ 584 } 585 586 trap_fatal(&frame, eva); 587 return; 588 } 589 590 /* Translate fault for emulators (e.g. Linux) */ 591 if (*p->p_sysent->sv_transtrap) 592 i = (*p->p_sysent->sv_transtrap)(i, type); 593 594 trapsignal(p, i, ucode); 595 596#ifdef DEBUG 597 if (type <= MAX_TRAP_MSG) { 598 uprintf("fatal process exception: %s", 599 trap_msg[type]); 600 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 601 uprintf(", fault VA = 0x%lx", (u_long)eva); 602 uprintf("\n"); 603 } 604#endif 605 606out: 607 userret(p, &frame, sticks); 608} 609 610#ifdef notyet 611/* 612 * This version doesn't allow a page fault to user space while 613 * in the kernel. The rest of the kernel needs to be made "safe" 614 * before this can be used. I think the only things remaining 615 * to be made safe are the iBCS2 code and the process tracing/ 616 * debugging code. 617 */ 618static int 619trap_pfault(frame, usermode, eva) 620 struct trapframe *frame; 621 int usermode; 622 vm_offset_t eva; 623{ 624 vm_offset_t va; 625 struct vmspace *vm = NULL; 626 vm_map_t map = 0; 627 int rv = 0; 628 vm_prot_t ftype; 629 struct proc *p = curproc; 630 631 if (frame->tf_err & PGEX_W) 632 ftype = VM_PROT_READ | VM_PROT_WRITE; 633 else 634 ftype = VM_PROT_READ; 635 636 va = trunc_page(eva); 637 if (va < VM_MIN_KERNEL_ADDRESS) { 638 vm_offset_t v; 639 vm_page_t mpte; 640 641 if (p == NULL || 642 (!usermode && va < VM_MAXUSER_ADDRESS && 643 (intr_nesting_level != 0 || curpcb == NULL || 644 curpcb->pcb_onfault == NULL))) { 645 trap_fatal(frame, eva); 646 return (-1); 647 } 648 649 /* 650 * This is a fault on non-kernel virtual memory. 651 * vm is initialized above to NULL. If curproc is NULL 652 * or curproc->p_vmspace is NULL the fault is fatal. 653 */ 654 vm = p->p_vmspace; 655 if (vm == NULL) 656 goto nogo; 657 658 map = &vm->vm_map; 659 660 /* 661 * Keep swapout from messing with us during this 662 * critical time. 663 */ 664 ++p->p_lock; 665 666 /* 667 * Grow the stack if necessary 668 */ 669 /* grow_stack returns false only if va falls into 670 * a growable stack region and the stack growth 671 * fails. It returns true if va was not within 672 * a growable stack region, or if the stack 673 * growth succeeded. 674 */ 675 if (!grow_stack (p, va)) { 676 rv = KERN_FAILURE; 677 --p->p_lock; 678 goto nogo; 679 } 680 681 /* Fault in the user page: */ 682 rv = vm_fault(map, va, ftype, 683 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0); 684 685 --p->p_lock; 686 } else { 687 /* 688 * Don't allow user-mode faults in kernel address space. 689 */ 690 if (usermode) 691 goto nogo; 692 693 /* 694 * Since we know that kernel virtual address addresses 695 * always have pte pages mapped, we just have to fault 696 * the page. 697 */ 698 rv = vm_fault(kernel_map, va, ftype, FALSE); 699 } 700 701 if (rv == KERN_SUCCESS) 702 return (0); 703nogo: 704 if (!usermode) { 705 if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) { 706 frame->tf_eip = (int)curpcb->pcb_onfault; 707 return (0); 708 } 709 trap_fatal(frame, eva); 710 return (-1); 711 } 712 713 /* kludge to pass faulting virtual address to sendsig */ 714 frame->tf_err = eva; 715 716 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 717} 718#endif 719 720int 721trap_pfault(frame, usermode, eva) 722 struct trapframe *frame; 723 int usermode; 724 vm_offset_t eva; 725{ 726 vm_offset_t va; 727 struct vmspace *vm = NULL; 728 vm_map_t map = 0; 729 int rv = 0; 730 vm_prot_t ftype; 731 struct proc *p = curproc; 732 733 va = trunc_page(eva); 734 if (va >= KERNBASE) { 735 /* 736 * Don't allow user-mode faults in kernel address space. 737 * An exception: if the faulting address is the invalid 738 * instruction entry in the IDT, then the Intel Pentium 739 * F00F bug workaround was triggered, and we need to 740 * treat it is as an illegal instruction, and not a page 741 * fault. 742 */ 743#if defined(I586_CPU) && !defined(NO_F00F_HACK) 744 if ((eva == (unsigned int)&t_idt[6]) && has_f00f_bug) { 745 frame->tf_trapno = T_PRIVINFLT; 746 return -2; 747 } 748#endif 749 if (usermode) 750 goto nogo; 751 752 map = kernel_map; 753 } else { 754 /* 755 * This is a fault on non-kernel virtual memory. 756 * vm is initialized above to NULL. If curproc is NULL 757 * or curproc->p_vmspace is NULL the fault is fatal. 758 */ 759 if (p != NULL) 760 vm = p->p_vmspace; 761 762 if (vm == NULL) 763 goto nogo; 764 765 map = &vm->vm_map; 766 } 767 768 if (frame->tf_err & PGEX_W) 769 ftype = VM_PROT_READ | VM_PROT_WRITE; 770 else 771 ftype = VM_PROT_READ; 772 773 if (map != kernel_map) { 774 /* 775 * Keep swapout from messing with us during this 776 * critical time. 777 */ 778 ++p->p_lock; 779 780 /* 781 * Grow the stack if necessary 782 */ 783 /* grow_stack returns false only if va falls into 784 * a growable stack region and the stack growth 785 * fails. It returns true if va was not within 786 * a growable stack region, or if the stack 787 * growth succeeded. 788 */ 789 if (!grow_stack (p, va)) { 790 rv = KERN_FAILURE; 791 --p->p_lock; 792 goto nogo; 793 } 794 795 /* Fault in the user page: */ 796 rv = vm_fault(map, va, ftype, 797 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0); 798 799 --p->p_lock; 800 } else { 801 /* 802 * Don't have to worry about process locking or stacks in the kernel. 803 */ 804 rv = vm_fault(map, va, ftype, FALSE); 805 } 806 807 if (rv == KERN_SUCCESS) 808 return (0); 809nogo: 810 if (!usermode) { 811 if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) { 812 frame->tf_eip = (int)curpcb->pcb_onfault; 813 return (0); 814 } 815 trap_fatal(frame, eva); 816 return (-1); 817 } 818 819 /* kludge to pass faulting virtual address to sendsig */ 820 frame->tf_err = eva; 821 822 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 823} 824 825static void 826trap_fatal(frame, eva) 827 struct trapframe *frame; 828 vm_offset_t eva; 829{ 830 int code, type, ss, esp; 831 struct soft_segment_descriptor softseg; 832 833 code = frame->tf_err; 834 type = frame->tf_trapno; 835 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); 836 837 if (type <= MAX_TRAP_MSG) 838 printf("\n\nFatal trap %d: %s while in %s mode\n", 839 type, trap_msg[type], 840 frame->tf_eflags & PSL_VM ? "vm86" : 841 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); 842#ifdef SMP 843 /* three seperate prints in case of a trap on an unmapped page */ 844 printf("mp_lock = %08x; ", mp_lock); 845 printf("cpuid = %d; ", cpuid); 846 printf("lapic.id = %08x\n", lapic.id); 847#endif 848 if (type == T_PAGEFLT) { 849 printf("fault virtual address = 0x%x\n", eva); 850 printf("fault code = %s %s, %s\n", 851 code & PGEX_U ? "user" : "supervisor", 852 code & PGEX_W ? "write" : "read", 853 code & PGEX_P ? "protection violation" : "page not present"); 854 } 855 printf("instruction pointer = 0x%x:0x%x\n", 856 frame->tf_cs & 0xffff, frame->tf_eip); 857 if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) { 858 ss = frame->tf_ss & 0xffff; 859 esp = frame->tf_esp; 860 } else { 861 ss = GSEL(GDATA_SEL, SEL_KPL); 862 esp = (int)&frame->tf_esp; 863 } 864 printf("stack pointer = 0x%x:0x%x\n", ss, esp); 865 printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); 866 printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", 867 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); 868 printf(" = DPL %d, pres %d, def32 %d, gran %d\n", 869 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, 870 softseg.ssd_gran); 871 printf("processor eflags = "); 872 if (frame->tf_eflags & PSL_T) 873 printf("trace trap, "); 874 if (frame->tf_eflags & PSL_I) 875 printf("interrupt enabled, "); 876 if (frame->tf_eflags & PSL_NT) 877 printf("nested task, "); 878 if (frame->tf_eflags & PSL_RF) 879 printf("resume, "); 880 if (frame->tf_eflags & PSL_VM) 881 printf("vm86, "); 882 printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); 883 printf("current process = "); 884 if (curproc) { 885 printf("%lu (%s)\n", 886 (u_long)curproc->p_pid, curproc->p_comm ? 887 curproc->p_comm : ""); 888 } else { 889 printf("Idle\n"); 890 } 891 printf("interrupt mask = "); 892 if ((cpl & net_imask) == net_imask) 893 printf("net "); 894 if ((cpl & tty_imask) == tty_imask) 895 printf("tty "); 896 if ((cpl & bio_imask) == bio_imask) 897 printf("bio "); 898 if ((cpl & cam_imask) == cam_imask) 899 printf("cam "); 900 if (cpl == 0) 901 printf("none"); 902#ifdef SMP 903/** 904 * XXX FIXME: 905 * we probably SHOULD have stopped the other CPUs before now! 906 * another CPU COULD have been touching cpl at this moment... 907 */ 908 printf(" <- SMP: XXX"); 909#endif 910 printf("\n"); 911 912#ifdef KDB 913 if (kdb_trap(&psl)) 914 return; 915#endif 916#ifdef DDB 917 if ((debugger_on_panic || in_Debugger) && kdb_trap(type, 0, frame)) 918 return; 919#endif 920 printf("trap number = %d\n", type); 921 if (type <= MAX_TRAP_MSG) 922 panic(trap_msg[type]); 923 else 924 panic("unknown/reserved trap"); 925} 926 927/* 928 * Double fault handler. Called when a fault occurs while writing 929 * a frame for a trap/exception onto the stack. This usually occurs 930 * when the stack overflows (such is the case with infinite recursion, 931 * for example). 932 * 933 * XXX Note that the current PTD gets replaced by IdlePTD when the 934 * task switch occurs. This means that the stack that was active at 935 * the time of the double fault is not available at <kstack> unless 936 * the machine was idle when the double fault occurred. The downside 937 * of this is that "trace <ebp>" in ddb won't work. 938 */ 939void 940dblfault_handler() 941{ 942 printf("\nFatal double fault:\n"); 943 printf("eip = 0x%x\n", common_tss.tss_eip); 944 printf("esp = 0x%x\n", common_tss.tss_esp); 945 printf("ebp = 0x%x\n", common_tss.tss_ebp); 946#ifdef SMP 947 /* three seperate prints in case of a trap on an unmapped page */ 948 printf("mp_lock = %08x; ", mp_lock); 949 printf("cpuid = %d; ", cpuid); 950 printf("lapic.id = %08x\n", lapic.id); 951#endif 952 panic("double fault"); 953} 954 955/* 956 * Compensate for 386 brain damage (missing URKR). 957 * This is a little simpler than the pagefault handler in trap() because 958 * it the page tables have already been faulted in and high addresses 959 * are thrown out early for other reasons. 960 */ 961int trapwrite(addr) 962 unsigned addr; 963{ 964 struct proc *p; 965 vm_offset_t va; 966 struct vmspace *vm; 967 int rv; 968 969 va = trunc_page((vm_offset_t)addr); 970 /* 971 * XXX - MAX is END. Changed > to >= for temp. fix. 972 */ 973 if (va >= VM_MAXUSER_ADDRESS) 974 return (1); 975 976 p = curproc; 977 vm = p->p_vmspace; 978 979 ++p->p_lock; 980 981 if (!grow_stack (p, va)) { 982 --p->p_lock; 983 return (1); 984 } 985 986 /* 987 * fault the data page 988 */ 989 rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, VM_FAULT_DIRTY); 990 991 --p->p_lock; 992 993 if (rv != KERN_SUCCESS) 994 return 1; 995 996 return (0); 997} 998 999/* 1000 * System call request from POSIX system call gate interface to kernel. 1001 * Like trap(), argument is call by reference. 1002 */ 1003void 1004syscall(frame) 1005 struct trapframe frame; 1006{ 1007 caddr_t params; 1008 int i; 1009 struct sysent *callp; 1010 struct proc *p = curproc; 1011 u_quad_t sticks; 1012 int error; 1013 int args[8]; 1014 u_int code; 1015 1016#ifdef DIAGNOSTIC 1017 if (ISPL(frame.tf_cs) != SEL_UPL) 1018 panic("syscall"); 1019#endif 1020 sticks = p->p_sticks; 1021 p->p_md.md_regs = &frame; 1022 params = (caddr_t)frame.tf_esp + sizeof(int); 1023 code = frame.tf_eax; 1024 if (p->p_sysent->sv_prepsyscall) { 1025 (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); 1026 } else { 1027 /* 1028 * Need to check if this is a 32 bit or 64 bit syscall. 1029 */ 1030 if (code == SYS_syscall) { 1031 /* 1032 * Code is first argument, followed by actual args. 1033 */ 1034 code = fuword(params); 1035 params += sizeof(int); 1036 } else if (code == SYS___syscall) { 1037 /* 1038 * Like syscall, but code is a quad, so as to maintain 1039 * quad alignment for the rest of the arguments. 1040 */ 1041 code = fuword(params); 1042 params += sizeof(quad_t); 1043 } 1044 } 1045 1046 if (p->p_sysent->sv_mask) 1047 code &= p->p_sysent->sv_mask; 1048 1049 if (code >= p->p_sysent->sv_size) 1050 callp = &p->p_sysent->sv_table[0]; 1051 else 1052 callp = &p->p_sysent->sv_table[code]; 1053 1054 if (params && (i = callp->sy_narg * sizeof(int)) && 1055 (error = copyin(params, (caddr_t)args, (u_int)i))) { 1056#ifdef KTRACE 1057 if (KTRPOINT(p, KTR_SYSCALL)) 1058 ktrsyscall(p->p_tracep, code, callp->sy_narg, args); 1059#endif 1060 goto bad; 1061 } 1062#ifdef KTRACE 1063 if (KTRPOINT(p, KTR_SYSCALL)) 1064 ktrsyscall(p->p_tracep, code, callp->sy_narg, args); 1065#endif 1066 p->p_retval[0] = 0; 1067 p->p_retval[1] = frame.tf_edx; 1068 1069 STOPEVENT(p, S_SCE, callp->sy_narg); 1070 1071 error = (*callp->sy_call)(p, args); 1072 1073 switch (error) { 1074 1075 case 0: 1076 /* 1077 * Reinitialize proc pointer `p' as it may be different 1078 * if this is a child returning from fork syscall. 1079 */ 1080 p = curproc; 1081 frame.tf_eax = p->p_retval[0]; 1082 frame.tf_edx = p->p_retval[1]; 1083 frame.tf_eflags &= ~PSL_C; 1084 break; 1085 1086 case ERESTART: 1087 /* 1088 * Reconstruct pc, assuming lcall $X,y is 7 bytes, 1089 * int 0x80 is 2 bytes. We saved this in tf_err. 1090 */ 1091 frame.tf_eip -= frame.tf_err; 1092 break; 1093 1094 case EJUSTRETURN: 1095 break; 1096 1097 default: 1098bad: 1099 if (p->p_sysent->sv_errsize) 1100 if (error >= p->p_sysent->sv_errsize) 1101 error = -1; /* XXX */ 1102 else 1103 error = p->p_sysent->sv_errtbl[error]; 1104 frame.tf_eax = error; 1105 frame.tf_eflags |= PSL_C; 1106 break; 1107 } 1108 1109 if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) { 1110 /* Traced syscall. */ 1111 frame.tf_eflags &= ~PSL_T; 1112 trapsignal(p, SIGTRAP, 0); 1113 } 1114 1115 userret(p, &frame, sticks); 1116 1117#ifdef KTRACE 1118 if (KTRPOINT(p, KTR_SYSRET)) 1119 ktrsysret(p->p_tracep, code, error, p->p_retval[0]); 1120#endif 1121 1122 /* 1123 * This works because errno is findable through the 1124 * register set. If we ever support an emulation where this 1125 * is not the case, this code will need to be revisited. 1126 */ 1127 STOPEVENT(p, S_SCX, code); 1128 1129} 1130 1131/* 1132 * Simplified back end of syscall(), used when returning from fork() 1133 * directly into user mode. 1134 */ 1135void 1136fork_return(p, frame) 1137 struct proc *p; 1138 struct trapframe frame; 1139{ 1140 frame.tf_eax = 0; /* Child returns zero */ 1141 frame.tf_eflags &= ~PSL_C; /* success */ 1142 frame.tf_edx = 1; 1143 1144 userret(p, &frame, 0); 1145#ifdef KTRACE 1146 if (KTRPOINT(p, KTR_SYSRET)) 1147 ktrsysret(p->p_tracep, SYS_fork, 0, 0); 1148#endif 1149} 1150