subr_syscall.c revision 9533
1/*- 2 * Copyright (C) 1994, David Greenman 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the University of Utah, and William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 38 * $Id: trap.c,v 1.54 1995/07/14 09:25:51 davidg Exp $ 39 */ 40 41/* 42 * 386 Trap and System call handling 43 */ 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/proc.h> 48#include <sys/user.h> 49#include <sys/acct.h> 50#include <sys/kernel.h> 51#include <sys/syscall.h> 52#include <sys/sysent.h> 53#ifdef KTRACE 54#include <sys/ktrace.h> 55#endif 56 57#include <vm/vm_param.h> 58#include <vm/pmap.h> 59#include <vm/vm_kern.h> 60#include <vm/vm_map.h> 61#include <vm/vm_page.h> 62 63#include <machine/cpu.h> 64#include <machine/md_var.h> 65#include <machine/psl.h> 66#include <machine/reg.h> 67#include <machine/trap.h> 68#include <machine/../isa/isa_device.h> 69 70#include "isa.h" 71#include "npx.h" 72 73int trap_pfault __P((struct trapframe *, int)); 74void trap_fatal __P((struct trapframe *)); 75 76#define MAX_TRAP_MSG 27 77char *trap_msg[] = { 78 "", /* 0 unused */ 79 "privileged instruction fault", /* 1 T_PRIVINFLT */ 80 "", /* 2 unused */ 81 "breakpoint instruction fault", /* 3 T_BPTFLT */ 82 "", /* 4 unused */ 83 "", /* 5 unused */ 84 "arithmetic trap", /* 6 T_ARITHTRAP */ 85 "system forced exception", /* 7 T_ASTFLT */ 86 "", /* 8 unused */ 87 "general protection fault", /* 9 T_PROTFLT */ 88 "trace trap", /* 10 T_TRCTRAP */ 89 "", /* 11 unused */ 90 "page fault", /* 12 T_PAGEFLT */ 91 "", /* 13 unused */ 92 "alignment fault", /* 14 T_ALIGNFLT */ 93 "", /* 15 unused */ 94 "", /* 16 unused */ 95 "", /* 17 unused */ 96 "integer divide fault", /* 18 T_DIVIDE */ 97 "non-maskable interrupt trap", /* 19 T_NMI */ 98 "overflow trap", /* 20 T_OFLOW */ 99 "FPU bounds check fault", /* 21 T_BOUND */ 100 "FPU device not available", /* 22 T_DNA */ 101 "double fault", /* 23 T_DOUBLEFLT */ 102 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 103 "invalid TSS fault", /* 25 T_TSSFLT */ 104 "segment not present fault", /* 26 T_SEGNPFLT */ 105 "stack fault", /* 27 T_STKFLT */ 106}; 107 108static inline void 109userret(p, frame, oticks) 110 struct proc *p; 111 struct trapframe *frame; 112 u_quad_t oticks; 113{ 114 int sig, s; 115 116 while ((sig = CURSIG(p)) != 0) 117 postsig(sig); 118 p->p_priority = p->p_usrpri; 119 if (want_resched) { 120 /* 121 * Since we are curproc, clock will normally just change 122 * our priority without moving us from one queue to another 123 * (since the running process is not on a queue.) 124 * If that happened after we setrunqueue ourselves but before we 125 * mi_switch()'ed, we might not be on the queue indicated by 126 * our priority. 127 */ 128 s = splclock(); 129 setrunqueue(p); 130 p->p_stats->p_ru.ru_nivcsw++; 131 mi_switch(); 132 splx(s); 133 while ((sig = CURSIG(p)) != 0) 134 postsig(sig); 135 } 136 /* 137 * Charge system time if profiling. 138 */ 139 if (p->p_flag & P_PROFIL) { 140 u_quad_t ticks = p->p_sticks - oticks; 141 142 if (ticks) { 143#ifdef PROFTIMER 144 extern int profscale; 145 addupc(frame->tf_eip, &p->p_stats->p_prof, 146 ticks * profscale); 147#else 148 addupc(frame->tf_eip, &p->p_stats->p_prof, ticks); 149#endif 150 } 151 } 152 curpriority = p->p_priority; 153} 154 155/* 156 * trap(frame): 157 * Exception, fault, and trap interface to the FreeBSD kernel. 158 * This common code is called from assembly language IDT gate entry 159 * routines that prepare a suitable stack frame, and restore this 160 * frame after the exception has been processed. 161 */ 162 163/*ARGSUSED*/ 164void 165trap(frame) 166 struct trapframe frame; 167{ 168 struct proc *p = curproc; 169 u_quad_t sticks = 0; 170 int i = 0, ucode = 0, type, code; 171#ifdef DIAGNOSTIC 172 u_long eva; 173#endif 174 175 type = frame.tf_trapno; 176 code = frame.tf_err; 177 178 if (ISPL(frame.tf_cs) == SEL_UPL) { 179 /* user trap */ 180 181 sticks = p->p_sticks; 182 p->p_md.md_regs = (int *)&frame; 183 184 switch (type) { 185 case T_PRIVINFLT: /* privileged instruction fault */ 186 ucode = type; 187 i = SIGILL; 188 break; 189 190 case T_BPTFLT: /* bpt instruction fault */ 191 case T_TRCTRAP: /* trace trap */ 192 frame.tf_eflags &= ~PSL_T; 193 i = SIGTRAP; 194 break; 195 196 case T_ARITHTRAP: /* arithmetic trap */ 197 ucode = code; 198 i = SIGFPE; 199 break; 200 201 case T_ASTFLT: /* Allow process switch */ 202 astoff(); 203 cnt.v_soft++; 204 if (p->p_flag & P_OWEUPC) { 205 addupc(frame.tf_eip, &p->p_stats->p_prof, 1); 206 p->p_flag &= ~P_OWEUPC; 207 } 208 goto out; 209 210 case T_PROTFLT: /* general protection fault */ 211 case T_SEGNPFLT: /* segment not present fault */ 212 case T_STKFLT: /* stack fault */ 213 case T_TSSFLT: /* invalid TSS fault */ 214 case T_DOUBLEFLT: /* double fault */ 215 default: 216 ucode = code + BUS_SEGM_FAULT ; 217 i = SIGBUS; 218 break; 219 220 case T_PAGEFLT: /* page fault */ 221 i = trap_pfault(&frame, TRUE); 222 if (i == -1) 223 return; 224 if (i == 0) 225 goto out; 226 227 ucode = T_PAGEFLT; 228 break; 229 230 case T_DIVIDE: /* integer divide fault */ 231 ucode = FPE_INTDIV_TRAP; 232 i = SIGFPE; 233 break; 234 235#if NISA > 0 236 case T_NMI: 237#ifdef DDB 238 /* NMI can be hooked up to a pushbutton for debugging */ 239 printf ("NMI ... going to debugger\n"); 240 if (kdb_trap (type, 0, &frame)) 241 return; 242#endif 243 /* machine/parity/power fail/"kitchen sink" faults */ 244 if (isa_nmi(code) == 0) return; 245 panic("NMI indicates hardware failure"); 246#endif 247 248 case T_OFLOW: /* integer overflow fault */ 249 ucode = FPE_INTOVF_TRAP; 250 i = SIGFPE; 251 break; 252 253 case T_BOUND: /* bounds check fault */ 254 ucode = FPE_SUBRNG_TRAP; 255 i = SIGFPE; 256 break; 257 258 case T_DNA: 259#if NNPX > 0 260 /* if a transparent fault (due to context switch "late") */ 261 if (npxdna()) 262 return; 263#endif /* NNPX > 0 */ 264 265#if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE) 266 i = math_emulate(&frame); 267 if (i == 0) { 268 if (!(frame.tf_eflags & PSL_T)) 269 return; 270 frame.tf_eflags &= ~PSL_T; 271 i = SIGTRAP; 272 } 273 /* else ucode = emulator_only_knows() XXX */ 274#else /* MATH_EMULATE || GPL_MATH_EMULATE */ 275 i = SIGFPE; 276 ucode = FPE_FPU_NP_TRAP; 277#endif /* MATH_EMULATE || GPL_MATH_EMULATE */ 278 break; 279 280 case T_FPOPFLT: /* FPU operand fetch fault */ 281 ucode = T_FPOPFLT; 282 i = SIGILL; 283 break; 284 } 285 } else { 286 /* kernel trap */ 287 288 switch (type) { 289 case T_PAGEFLT: /* page fault */ 290 (void) trap_pfault(&frame, FALSE); 291 return; 292 293 case T_PROTFLT: /* general protection fault */ 294 case T_SEGNPFLT: /* segment not present fault */ 295 /* 296 * Invalid segment selectors and out of bounds 297 * %eip's and %esp's can be set up in user mode. 298 * This causes a fault in kernel mode when the 299 * kernel tries to return to user mode. We want 300 * to get this fault so that we can fix the 301 * problem here and not have to check all the 302 * selectors and pointers when the user changes 303 * them. 304 */ 305#define MAYBE_DORETI_FAULT(where, whereto) \ 306 do { \ 307 if (frame.tf_eip == (int)where) { \ 308 frame.tf_eip = (int)whereto; \ 309 return; \ 310 } \ 311 } while (0) 312 313 if (intr_nesting_level == 0) { 314 MAYBE_DORETI_FAULT(doreti_iret, 315 doreti_iret_fault); 316 MAYBE_DORETI_FAULT(doreti_popl_ds, 317 doreti_popl_ds_fault); 318 MAYBE_DORETI_FAULT(doreti_popl_es, 319 doreti_popl_es_fault); 320 } 321 if (curpcb && curpcb->pcb_onfault) { 322 frame.tf_eip = (int)curpcb->pcb_onfault; 323 return; 324 } 325 break; 326 327 case T_TSSFLT: 328 /* 329 * PSL_NT can be set in user mode and isn't cleared 330 * automatically when the kernel is entered. This 331 * causes a TSS fault when the kernel attempts to 332 * `iret' because the TSS link is uninitialized. We 333 * want to get this fault so that we can fix the 334 * problem here and not every time the kernel is 335 * entered. 336 */ 337 if (frame.tf_eflags & PSL_NT) { 338 frame.tf_eflags &= ~PSL_NT; 339 return; 340 } 341 break; 342 343#ifdef DDB 344 case T_BPTFLT: 345 case T_TRCTRAP: 346 if (kdb_trap (type, 0, &frame)) 347 return; 348 break; 349#else 350 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */ 351 /* Q: how do we turn it on again? */ 352 frame.tf_eflags &= ~PSL_T; 353 return; 354#endif 355 356#if NISA > 0 357 case T_NMI: 358#ifdef DDB 359 /* NMI can be hooked up to a pushbutton for debugging */ 360 printf ("NMI ... going to debugger\n"); 361 if (kdb_trap (type, 0, &frame)) 362 return; 363#endif 364 /* machine/parity/power fail/"kitchen sink" faults */ 365 if (isa_nmi(code) == 0) return; 366 /* FALL THROUGH */ 367#endif 368 } 369 370 trap_fatal(&frame); 371 return; 372 } 373 374 trapsignal(p, i, ucode); 375 376#ifdef DEBUG 377 eva = rcr2(); 378 if (type <= MAX_TRAP_MSG) { 379 uprintf("fatal process exception: %s", 380 trap_msg[type]); 381 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 382 uprintf(", fault VA = 0x%x", eva); 383 uprintf("\n"); 384 } 385#endif 386 387out: 388 userret(p, &frame, sticks); 389} 390 391#ifdef notyet 392/* 393 * This version doesn't allow a page fault to user space while 394 * in the kernel. The rest of the kernel needs to be made "safe" 395 * before this can be used. I think the only things remaining 396 * to be made safe are the iBCS2 code and the process tracing/ 397 * debugging code. 398 */ 399int 400trap_pfault(frame, usermode) 401 struct trapframe *frame; 402 int usermode; 403{ 404 vm_offset_t va; 405 struct vmspace *vm = NULL; 406 vm_map_t map = 0; 407 int rv = 0; 408 vm_prot_t ftype; 409 int eva; 410 struct proc *p = curproc; 411 412 if (frame->tf_err & PGEX_W) 413 ftype = VM_PROT_READ | VM_PROT_WRITE; 414 else 415 ftype = VM_PROT_READ; 416 417 eva = rcr2(); 418 va = trunc_page((vm_offset_t)eva); 419 420 if (va < VM_MIN_KERNEL_ADDRESS) { 421 vm_offset_t v; 422 vm_page_t ptepg; 423 424 if ((p == NULL) || 425 (!usermode && va < VM_MAXUSER_ADDRESS && 426 curpcb->pcb_onfault == NULL)) { 427 trap_fatal(frame); 428 return (-1); 429 } 430 431 /* 432 * This is a fault on non-kernel virtual memory. 433 * vm is initialized above to NULL. If curproc is NULL 434 * or curproc->p_vmspace is NULL the fault is fatal. 435 */ 436 vm = p->p_vmspace; 437 if (vm == NULL) 438 goto nogo; 439 440 map = &vm->vm_map; 441 442 /* 443 * Keep swapout from messing with us during this 444 * critical time. 445 */ 446 ++p->p_lock; 447 448 /* 449 * Grow the stack if necessary 450 */ 451 if ((caddr_t)va > vm->vm_maxsaddr 452 && (caddr_t)va < (caddr_t)USRSTACK) { 453 if (!grow(p, va)) { 454 rv = KERN_FAILURE; 455 --p->p_lock; 456 goto nogo; 457 } 458 } 459 460 /* 461 * Check if page table is mapped, if not, 462 * fault it first 463 */ 464 v = (vm_offset_t) vtopte(va); 465 466 /* Fault the pte only if needed: */ 467 if (*((int *)vtopte(v)) == 0) 468 (void) vm_fault(map, trunc_page(v), VM_PROT_WRITE, FALSE); 469 470 pmap_use_pt( vm_map_pmap(map), va); 471 472 /* Fault in the user page: */ 473 rv = vm_fault(map, va, ftype, FALSE); 474 475 pmap_unuse_pt( vm_map_pmap(map), va); 476 477 --p->p_lock; 478 } else { 479 /* 480 * Don't allow user-mode faults in kernel address space. 481 */ 482 if (usermode) 483 goto nogo; 484 485 /* 486 * Since we know that kernel virtual address addresses 487 * always have pte pages mapped, we just have to fault 488 * the page. 489 */ 490 rv = vm_fault(kernel_map, va, ftype, FALSE); 491 } 492 493 if (rv == KERN_SUCCESS) 494 return (0); 495nogo: 496 if (!usermode) { 497 if (curpcb && curpcb->pcb_onfault) { 498 frame->tf_eip = (int)curpcb->pcb_onfault; 499 return (0); 500 } 501 trap_fatal(frame); 502 return (-1); 503 } 504 505 /* kludge to pass faulting virtual address to sendsig */ 506 frame->tf_err = eva; 507 508 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 509} 510#endif 511 512int 513trap_pfault(frame, usermode) 514 struct trapframe *frame; 515 int usermode; 516{ 517 vm_offset_t va; 518 struct vmspace *vm = NULL; 519 vm_map_t map = 0; 520 int rv = 0; 521 vm_prot_t ftype; 522 int eva; 523 struct proc *p = curproc; 524 525 eva = rcr2(); 526 va = trunc_page((vm_offset_t)eva); 527 528 if (va >= KERNBASE) { 529 /* 530 * Don't allow user-mode faults in kernel address space. 531 */ 532 if (usermode) 533 goto nogo; 534 535 map = kernel_map; 536 } else { 537 /* 538 * This is a fault on non-kernel virtual memory. 539 * vm is initialized above to NULL. If curproc is NULL 540 * or curproc->p_vmspace is NULL the fault is fatal. 541 */ 542 if (p != NULL) 543 vm = p->p_vmspace; 544 545 if (vm == NULL) 546 goto nogo; 547 548 map = &vm->vm_map; 549 } 550 551 if (frame->tf_err & PGEX_W) 552 ftype = VM_PROT_READ | VM_PROT_WRITE; 553 else 554 ftype = VM_PROT_READ; 555 556 if (map != kernel_map) { 557 vm_offset_t v; 558 vm_page_t ptepg; 559 560 /* 561 * Keep swapout from messing with us during this 562 * critical time. 563 */ 564 ++p->p_lock; 565 566 /* 567 * Grow the stack if necessary 568 */ 569 if ((caddr_t)va > vm->vm_maxsaddr 570 && (caddr_t)va < (caddr_t)USRSTACK) { 571 if (!grow(p, va)) { 572 rv = KERN_FAILURE; 573 --p->p_lock; 574 goto nogo; 575 } 576 } 577 578 /* 579 * Check if page table is mapped, if not, 580 * fault it first 581 */ 582 v = (vm_offset_t) vtopte(va); 583 584 /* Fault the pte only if needed: */ 585 if (*((int *)vtopte(v)) == 0) 586 (void) vm_fault(map, trunc_page(v), VM_PROT_WRITE, FALSE); 587 588 pmap_use_pt( vm_map_pmap(map), va); 589 590 /* Fault in the user page: */ 591 rv = vm_fault(map, va, ftype, FALSE); 592 593 pmap_unuse_pt( vm_map_pmap(map), va); 594 595 --p->p_lock; 596 } else { 597 /* 598 * Since we know that kernel virtual address addresses 599 * always have pte pages mapped, we just have to fault 600 * the page. 601 */ 602 rv = vm_fault(map, va, ftype, FALSE); 603 } 604 605 if (rv == KERN_SUCCESS) 606 return (0); 607nogo: 608 if (!usermode) { 609 if (curpcb && curpcb->pcb_onfault) { 610 frame->tf_eip = (int)curpcb->pcb_onfault; 611 return (0); 612 } 613 trap_fatal(frame); 614 return (-1); 615 } 616 617 /* kludge to pass faulting virtual address to sendsig */ 618 frame->tf_err = eva; 619 620 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 621} 622 623void 624trap_fatal(frame) 625 struct trapframe *frame; 626{ 627 int code, type, eva; 628 struct soft_segment_descriptor softseg; 629 630 code = frame->tf_err; 631 type = frame->tf_trapno; 632 eva = rcr2(); 633 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); 634 635 if (type <= MAX_TRAP_MSG) 636 printf("\n\nFatal trap %d: %s while in %s mode\n", 637 type, trap_msg[type], 638 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); 639 if (type == T_PAGEFLT) { 640 printf("fault virtual address = 0x%x\n", eva); 641 printf("fault code = %s %s, %s\n", 642 code & PGEX_U ? "user" : "supervisor", 643 code & PGEX_W ? "write" : "read", 644 code & PGEX_P ? "protection violation" : "page not present"); 645 } 646 printf("instruction pointer = 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip); 647 printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", 648 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); 649 printf(" = DPL %d, pres %d, def32 %d, gran %d\n", 650 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran); 651 printf("processor eflags = "); 652 if (frame->tf_eflags & PSL_T) 653 printf("trace/trap, "); 654 if (frame->tf_eflags & PSL_I) 655 printf("interrupt enabled, "); 656 if (frame->tf_eflags & PSL_NT) 657 printf("nested task, "); 658 if (frame->tf_eflags & PSL_RF) 659 printf("resume, "); 660 if (frame->tf_eflags & PSL_VM) 661 printf("vm86, "); 662 printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); 663 printf("current process = "); 664 if (curproc) { 665 printf("%lu (%s)\n", 666 (u_long)curproc->p_pid, curproc->p_comm ? 667 curproc->p_comm : ""); 668 } else { 669 printf("Idle\n"); 670 } 671 printf("interrupt mask = "); 672 if ((cpl & net_imask) == net_imask) 673 printf("net "); 674 if ((cpl & tty_imask) == tty_imask) 675 printf("tty "); 676 if ((cpl & bio_imask) == bio_imask) 677 printf("bio "); 678 if (cpl == 0) 679 printf("none"); 680 printf("\n"); 681 682#ifdef KDB 683 if (kdb_trap(&psl)) 684 return; 685#endif 686#ifdef DDB 687 if (kdb_trap (type, 0, frame)) 688 return; 689#endif 690 if (type <= MAX_TRAP_MSG) 691 panic(trap_msg[type]); 692 else 693 panic("unknown/reserved trap"); 694} 695 696/* 697 * Compensate for 386 brain damage (missing URKR). 698 * This is a little simpler than the pagefault handler in trap() because 699 * it the page tables have already been faulted in and high addresses 700 * are thrown out early for other reasons. 701 */ 702int trapwrite(addr) 703 unsigned addr; 704{ 705 struct proc *p; 706 vm_offset_t va, v; 707 struct vmspace *vm; 708 int rv; 709 710 va = trunc_page((vm_offset_t)addr); 711 /* 712 * XXX - MAX is END. Changed > to >= for temp. fix. 713 */ 714 if (va >= VM_MAXUSER_ADDRESS) 715 return (1); 716 717 p = curproc; 718 vm = p->p_vmspace; 719 720 ++p->p_lock; 721 722 if ((caddr_t)va >= vm->vm_maxsaddr 723 && (caddr_t)va < (caddr_t)USRSTACK) { 724 if (!grow(p, va)) { 725 --p->p_lock; 726 return (1); 727 } 728 } 729 730 v = trunc_page(vtopte(va)); 731 732 /* 733 * wire the pte page 734 */ 735 if (va < USRSTACK) { 736 vm_map_pageable(&vm->vm_map, v, round_page(v+1), FALSE); 737 } 738 739 /* 740 * fault the data page 741 */ 742 rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, FALSE); 743 744 /* 745 * unwire the pte page 746 */ 747 if (va < USRSTACK) { 748 vm_map_pageable(&vm->vm_map, v, round_page(v+1), TRUE); 749 } 750 751 --p->p_lock; 752 753 if (rv != KERN_SUCCESS) 754 return 1; 755 756 return (0); 757} 758 759/* 760 * syscall(frame): 761 * System call request from POSIX system call gate interface to kernel. 762 * Like trap(), argument is call by reference. 763 */ 764/*ARGSUSED*/ 765void 766syscall(frame) 767 struct trapframe frame; 768{ 769 caddr_t params; 770 int i; 771 struct sysent *callp; 772 struct proc *p = curproc; 773 u_quad_t sticks; 774 int error, opc; 775 int args[8], rval[2]; 776 u_int code; 777 778 sticks = p->p_sticks; 779 if (ISPL(frame.tf_cs) != SEL_UPL) 780 panic("syscall"); 781 782 code = frame.tf_eax; 783 p->p_md.md_regs = (int *)&frame; 784 params = (caddr_t)frame.tf_esp + sizeof (int) ; 785 786 /* 787 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always. 788 */ 789 opc = frame.tf_eip - 7; 790 /* 791 * Need to check if this is a 32 bit or 64 bit syscall. 792 */ 793 if (code == SYS_syscall) { 794 /* 795 * Code is first argument, followed by actual args. 796 */ 797 code = fuword(params); 798 params += sizeof (int); 799 } else if (code == SYS___syscall) { 800 /* 801 * Like syscall, but code is a quad, so as to maintain 802 * quad alignment for the rest of the arguments. 803 */ 804 code = fuword(params + _QUAD_LOWWORD * sizeof(int)); 805 params += sizeof(quad_t); 806 } 807 808 if (p->p_sysent->sv_mask) 809 code = code & p->p_sysent->sv_mask; 810 811 if (code >= p->p_sysent->sv_size) 812 callp = &p->p_sysent->sv_table[0]; 813 else 814 callp = &p->p_sysent->sv_table[code]; 815 816 if ((i = callp->sy_narg * sizeof (int)) && 817 (error = copyin(params, (caddr_t)args, (u_int)i))) { 818#ifdef KTRACE 819 if (KTRPOINT(p, KTR_SYSCALL)) 820 ktrsyscall(p->p_tracep, code, callp->sy_narg, args); 821#endif 822 goto bad; 823 } 824#ifdef KTRACE 825 if (KTRPOINT(p, KTR_SYSCALL)) 826 ktrsyscall(p->p_tracep, code, callp->sy_narg, args); 827#endif 828 rval[0] = 0; 829 rval[1] = frame.tf_edx; 830 831 error = (*callp->sy_call)(p, args, rval); 832 833 switch (error) { 834 835 case 0: 836 /* 837 * Reinitialize proc pointer `p' as it may be different 838 * if this is a child returning from fork syscall. 839 */ 840 p = curproc; 841 frame.tf_eax = rval[0]; 842 frame.tf_edx = rval[1]; 843 frame.tf_eflags &= ~PSL_C; /* carry bit */ 844 break; 845 846 case ERESTART: 847 frame.tf_eip = opc; 848 break; 849 850 case EJUSTRETURN: 851 break; 852 853 default: 854 bad: 855 if (p->p_sysent->sv_errsize) 856 if (error >= p->p_sysent->sv_errsize) 857 error = -1; /* XXX */ 858 else 859 error = p->p_sysent->sv_errtbl[error]; 860 frame.tf_eax = error; 861 frame.tf_eflags |= PSL_C; /* carry bit */ 862 break; 863 } 864 865 userret(p, &frame, sticks); 866 867#ifdef KTRACE 868 if (KTRPOINT(p, KTR_SYSRET)) 869 ktrsysret(p->p_tracep, code, error, rval[0]); 870#endif 871} 872 873#ifdef COMPAT_LINUX 874/* 875 * linux_syscall(frame): 876 */ 877/*ARGSUSED*/ 878void 879linux_syscall(frame) 880 struct trapframe frame; 881{ 882 caddr_t params; 883 int i; 884 struct proc *p = curproc; 885 struct sysent *callp; 886 u_quad_t sticks; 887 int error, opc; 888 int rval[2]; 889 int code; 890 struct linux_syscall_args { 891 int arg1; 892 int arg2; 893 int arg3; 894 int arg4; 895 int arg5; 896 } args; 897 898 args.arg1 = frame.tf_ebx; 899 args.arg2 = frame.tf_ecx; 900 args.arg3 = frame.tf_edx; 901 args.arg4 = frame.tf_esi; 902 args.arg5 = frame.tf_edi; 903 904 sticks = p->p_sticks; 905 if (ISPL(frame.tf_cs) != SEL_UPL) 906 panic("linux syscall"); 907 908 code = frame.tf_eax; 909 p->p_md.md_regs = (int *)&frame; 910 params = (caddr_t)frame.tf_esp + sizeof (int) ; 911 912 /* Reconstruct pc, subtract size of int 0x80 */ 913 opc = frame.tf_eip - 2; 914 if (code == 0) { 915 code = fuword(params); 916 params += sizeof (int); 917 } 918 if (p->p_sysent->sv_mask) 919 code = code & p->p_sysent->sv_mask; 920 921 if (code < 0 || code >= p->p_sysent->sv_size) 922 callp = &p->p_sysent->sv_table[0]; 923 else 924 callp = &p->p_sysent->sv_table[code]; 925 926#ifdef KTRACE 927 if (KTRPOINT(p, KTR_SYSCALL)) 928 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 929#endif 930 931#ifdef KTRACE 932 if (KTRPOINT(p, KTR_SYSCALL)) 933 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args); 934#endif 935 rval[0] = 0; 936 rval[1] = frame.tf_edx; 937 938 error = (*callp->sy_call)(p, &args, rval); 939 940 switch (error) { 941 942 case 0: 943 /* 944 * Reinitialize proc pointer `p' as it may be different 945 * if this is a child returning from fork syscall. 946 */ 947 p = curproc; 948 frame.tf_eax = rval[0]; 949 frame.tf_eflags &= ~PSL_C; /* carry bit */ 950 break; 951 952 case ERESTART: 953 frame.tf_eip = opc; 954 break; 955 956 case EJUSTRETURN: 957 break; 958 959 default: 960 bad: 961 if (p->p_sysent->sv_errsize) 962 if (error >= p->p_sysent->sv_errsize) 963 error = -1; /* XXX */ 964 else 965 error = p->p_sysent->sv_errtbl[error]; 966 frame.tf_eax = -error; 967 frame.tf_eflags |= PSL_C; /* carry bit */ 968 break; 969 } 970 971 userret(p, &frame, sticks); 972 973#ifdef KTRACE 974 if (KTRPOINT(p, KTR_SYSRET)) 975 ktrsysret(p->p_tracep, code, error, rval[0]); 976#endif 977} 978#endif /* COMPAT_LINUX */ 979