1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28#include <kern/task.h> 29#include <kern/thread.h> 30#include <kern/assert.h> 31#include <kern/clock.h> 32#include <kern/locks.h> 33#include <kern/sched_prim.h> 34#include <mach/machine/thread_status.h> 35#include <mach/thread_act.h> 36 37#include <sys/kernel.h> 38#include <sys/vm.h> 39#include <sys/proc_internal.h> 40#include <sys/syscall.h> 41#include <sys/systm.h> 42#include <sys/user.h> 43#include <sys/errno.h> 44#include <sys/kdebug.h> 45#include <sys/sysent.h> 46#include <sys/sysproto.h> 47#include <sys/kauth.h> 48#include <sys/systm.h> 49 50#include <bsm/audit_kernel.h> 51 52#include <i386/seg.h> 53#include <i386/machine_routines.h> 54#include <mach/i386/syscall_sw.h> 55 56#if CONFIG_DTRACE 57extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *); 58extern void dtrace_systrace_syscall_return(unsigned short, int, int *); 59#endif 60 61extern void unix_syscall(x86_saved_state_t *); 62extern void unix_syscall64(x86_saved_state_t *); 63extern void *find_user_regs(thread_t); 64 65extern void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid); 66extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread); 67/* 68 * Function: unix_syscall 69 * 70 * Inputs: regs - pointer to i386 save area 71 * 72 * Outputs: none 73 */ 74void 75unix_syscall(x86_saved_state_t *state) 76{ 77 thread_t thread; 78 void *vt; 79 unsigned int code; 80 struct sysent *callp; 81 82 int error; 83 vm_offset_t params; 84 struct proc *p; 85 struct uthread *uthread; 86 x86_saved_state32_t *regs; 87 boolean_t args_in_uthread; 88 89 assert(is_saved_state32(state)); 90 regs = saved_state32(state); 91#if DEBUG 92 if (regs->eax == 0x800) 93 thread_exception_return(); 94#endif 95 thread = current_thread(); 96 uthread = get_bsdthread_info(thread); 97 98 /* Get the approriate proc; may be different from task's for vfork() */ 99 if (!(uthread->uu_flag & UT_VFORK)) 100 p = (struct proc *)get_bsdtask_info(current_task()); 101 else 102 p = current_proc(); 103 104 /* Verify that we are not being called from a task without a proc */ 105 if (p == NULL) { 106 regs->eax = EPERM; 107 regs->efl |= EFL_CF; 108 task_terminate_internal(current_task()); 109 thread_exception_return(); 110 /* NOTREACHED */ 111 } 112 113 code = regs->eax & I386_SYSCALL_NUMBER_MASK; 114 args_in_uthread = ((regs->eax & I386_SYSCALL_ARG_BYTES_MASK) != 0) && x86_sysenter_arg_store_isvalid(thread); 115 params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); 116 117 regs->efl &= ~(EFL_CF); 118 119 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; 120 121 if (callp == sysent) { 122 code = fuword(params); 123 params += sizeof(int); 124 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; 125 } 126 127 vt = (void *)uthread->uu_arg; 128 129 if (callp->sy_arg_bytes != 0) { 130 sy_munge_t *mungerp; 131 132 assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg)); 133 if (!args_in_uthread) 134 { 135 uint32_t nargs; 136 nargs = callp->sy_arg_bytes; 137 error = copyin((user_addr_t) params, (char *) vt, nargs); 138 if (error) { 139 regs->eax = error; 140 regs->efl |= EFL_CF; 141 thread_exception_return(); 142 /* NOTREACHED */ 143 } 144 } 145 146 if (code != 180) { 147 int *ip = (int *)vt; 148 149 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 150 *ip, *(ip+1), *(ip+2), *(ip+3), 0); 151 } 152 mungerp = callp->sy_arg_munge32; 153 154 /* 155 * If non-NULL, then call the syscall argument munger to 156 * copy in arguments (see xnu/bsd/dev/i386/munge.s); the 157 * first argument is NULL because we are munging in place 158 * after a copyin because the ABI currently doesn't use 159 * registers to pass system call arguments. 160 */ 161 if (mungerp != NULL) 162 (*mungerp)(NULL, vt); 163 } else 164 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 165 0, 0, 0, 0, 0); 166 167 /* 168 * Delayed binding of thread credential to process credential, if we 169 * are not running with an explicitly set thread credential. 170 */ 171 kauth_cred_uthread_update(uthread, p); 172 173 uthread->uu_rval[0] = 0; 174 uthread->uu_rval[1] = regs->edx; 175 uthread->uu_flag |= UT_NOTCANCELPT; 176 177 178#ifdef JOE_DEBUG 179 uthread->uu_iocount = 0; 180 uthread->uu_vpindex = 0; 181#endif 182 183 AUDIT_SYSCALL_ENTER(code, p, uthread); 184 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0])); 185 AUDIT_SYSCALL_EXIT(code, p, uthread, error); 186 187#ifdef JOE_DEBUG 188 if (uthread->uu_iocount) 189 joe_debug("system call returned with uu_iocount != 0"); 190#endif 191#if CONFIG_DTRACE 192 uthread->t_dtrace_errno = error; 193#endif /* CONFIG_DTRACE */ 194 195 if (error == ERESTART) { 196 /* 197 * Move the user's pc back to repeat the syscall: 198 * 5 bytes for a sysenter, or 2 for an int 8x. 199 * The SYSENTER_TF_CS covers single-stepping over a sysenter 200 * - see debug trap handler in idt.s/idt64.s 201 */ 202 if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS) { 203 regs->eip -= 5; 204 } 205 else 206 regs->eip -= 2; 207 } 208 else if (error != EJUSTRETURN) { 209 if (error) { 210 regs->eax = error; 211 regs->efl |= EFL_CF; /* carry bit */ 212 } else { /* (not error) */ 213 regs->eax = uthread->uu_rval[0]; 214 regs->edx = uthread->uu_rval[1]; 215 } 216 } 217 218 uthread->uu_flag &= ~UT_NOTCANCELPT; 219#if DEBUG 220 /* 221 * if we're holding the funnel panic 222 */ 223 syscall_exit_funnelcheck(); 224#endif /* DEBUG */ 225 if (uthread->uu_lowpri_window) { 226 /* 227 * task is marked as a low priority I/O type 228 * and the I/O we issued while in this system call 229 * collided with normal I/O operations... we'll 230 * delay in order to mitigate the impact of this 231 * task on the normal operation of the system 232 */ 233 throttle_lowpri_io(TRUE); 234 } 235 if (code != 180) 236 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, 237 error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); 238 239 thread_exception_return(); 240 /* NOTREACHED */ 241} 242 243 244void 245unix_syscall64(x86_saved_state_t *state) 246{ 247 thread_t thread; 248 unsigned int code; 249 struct sysent *callp; 250 void *uargp; 251 int args_in_regs; 252 int error; 253 struct proc *p; 254 struct uthread *uthread; 255 x86_saved_state64_t *regs; 256 257 assert(is_saved_state64(state)); 258 regs = saved_state64(state); 259 260 if (regs->rax == 0x2000800) 261 thread_exception_return(); 262 263 thread = current_thread(); 264 uthread = get_bsdthread_info(thread); 265 266 /* Get the approriate proc; may be different from task's for vfork() */ 267 if (!(uthread->uu_flag & UT_VFORK)) 268 p = (struct proc *)get_bsdtask_info(current_task()); 269 else 270 p = current_proc(); 271 272 /* Verify that we are not being called from a task without a proc */ 273 if (p == NULL) { 274 regs->rax = EPERM; 275 regs->isf.rflags |= EFL_CF; 276 task_terminate_internal(current_task()); 277 thread_exception_return(); 278 /* NOTREACHED */ 279 } 280 args_in_regs = 6; 281 282 code = regs->rax & SYSCALL_NUMBER_MASK; 283 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; 284 uargp = (void *)(®s->rdi); 285 286 if (callp == sysent) { 287 /* 288 * indirect system call... system call number 289 * passed as 'arg0' 290 */ 291 code = regs->rdi; 292 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; 293 uargp = (void *)(®s->rsi); 294 args_in_regs = 5; 295 } 296 297 if (callp->sy_narg != 0) { 298 if (code != 180) { 299 uint64_t *ip = (uint64_t *)uargp; 300 301 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 302 (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0); 303 } 304 assert(callp->sy_narg <= 8); 305 306 if (callp->sy_narg > args_in_regs) { 307 int copyin_count; 308 309 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t); 310 311 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count); 312 if (error) { 313 regs->rax = error; 314 regs->isf.rflags |= EFL_CF; 315 thread_exception_return(); 316 /* NOTREACHED */ 317 } 318 } 319 /* 320 * XXX Turn 64 bit unsafe calls into nosys() 321 */ 322 if (callp->sy_flags & UNSAFE_64BIT) { 323 callp = &sysent[63]; 324 goto unsafe; 325 } 326 } else 327 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 328 0, 0, 0, 0, 0); 329unsafe: 330 331 /* 332 * Delayed binding of thread credential to process credential, if we 333 * are not running with an explicitly set thread credential. 334 */ 335 kauth_cred_uthread_update(uthread, p); 336 337 uthread->uu_rval[0] = 0; 338 uthread->uu_rval[1] = 0; 339 340 341 uthread->uu_flag |= UT_NOTCANCELPT; 342 343 344 AUDIT_SYSCALL_ENTER(code, p, uthread); 345 error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0])); 346 AUDIT_SYSCALL_EXIT(code, p, uthread, error); 347 348#if CONFIG_DTRACE 349 uthread->t_dtrace_errno = error; 350#endif /* CONFIG_DTRACE */ 351 352 if (error == ERESTART) { 353 /* 354 * all system calls come through via the syscall instruction 355 * in 64 bit mode... its 2 bytes in length 356 * move the user's pc back to repeat the syscall: 357 */ 358 regs->isf.rip -= 2; 359 } 360 else if (error != EJUSTRETURN) { 361 if (error) { 362 regs->rax = error; 363 regs->isf.rflags |= EFL_CF; /* carry bit */ 364 } else { /* (not error) */ 365 366 switch (callp->sy_return_type) { 367 case _SYSCALL_RET_INT_T: 368 regs->rax = uthread->uu_rval[0]; 369 regs->rdx = uthread->uu_rval[1]; 370 break; 371 case _SYSCALL_RET_UINT_T: 372 regs->rax = ((u_int)uthread->uu_rval[0]); 373 regs->rdx = ((u_int)uthread->uu_rval[1]); 374 break; 375 case _SYSCALL_RET_OFF_T: 376 case _SYSCALL_RET_ADDR_T: 377 case _SYSCALL_RET_SIZE_T: 378 case _SYSCALL_RET_SSIZE_T: 379 regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); 380 regs->rdx = 0; 381 break; 382 case _SYSCALL_RET_NONE: 383 break; 384 default: 385 panic("unix_syscall: unknown return type"); 386 break; 387 } 388 regs->isf.rflags &= ~EFL_CF; 389 } 390 } 391 392 393 uthread->uu_flag &= ~UT_NOTCANCELPT; 394 395 /* 396 * if we're holding the funnel panic 397 */ 398 syscall_exit_funnelcheck(); 399 400 if (uthread->uu_lowpri_window) { 401 /* 402 * task is marked as a low priority I/O type 403 * and the I/O we issued while in this system call 404 * collided with normal I/O operations... we'll 405 * delay in order to mitigate the impact of this 406 * task on the normal operation of the system 407 */ 408 throttle_lowpri_io(TRUE); 409 } 410 if (code != 180) 411 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, 412 error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); 413 414 thread_exception_return(); 415 /* NOTREACHED */ 416} 417 418 419void 420unix_syscall_return(int error) 421{ 422 thread_t thread; 423 struct uthread *uthread; 424 struct proc *p; 425 unsigned int code; 426 vm_offset_t params; 427 struct sysent *callp; 428 429 thread = current_thread(); 430 uthread = get_bsdthread_info(thread); 431 432 p = current_proc(); 433 434 if (proc_is64bit(p)) { 435 x86_saved_state64_t *regs; 436 437 regs = saved_state64(find_user_regs(thread)); 438 439 /* reconstruct code for tracing before blasting rax */ 440 code = regs->rax & SYSCALL_NUMBER_MASK; 441 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; 442 443 if (callp == sysent) 444 /* 445 * indirect system call... system call number 446 * passed as 'arg0' 447 */ 448 code = regs->rdi; 449 450#if CONFIG_DTRACE 451 if (callp->sy_call == dtrace_systrace_syscall) 452 dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); 453#endif /* CONFIG_DTRACE */ 454 455 if (error == ERESTART) { 456 /* 457 * all system calls come through via the syscall instruction 458 * in 64 bit mode... its 2 bytes in length 459 * move the user's pc back to repeat the syscall: 460 */ 461 regs->isf.rip -= 2; 462 } 463 else if (error != EJUSTRETURN) { 464 if (error) { 465 regs->rax = error; 466 regs->isf.rflags |= EFL_CF; /* carry bit */ 467 } else { /* (not error) */ 468 469 switch (callp->sy_return_type) { 470 case _SYSCALL_RET_INT_T: 471 regs->rax = uthread->uu_rval[0]; 472 regs->rdx = uthread->uu_rval[1]; 473 break; 474 case _SYSCALL_RET_UINT_T: 475 regs->rax = ((u_int)uthread->uu_rval[0]); 476 regs->rdx = ((u_int)uthread->uu_rval[1]); 477 break; 478 case _SYSCALL_RET_OFF_T: 479 case _SYSCALL_RET_ADDR_T: 480 case _SYSCALL_RET_SIZE_T: 481 case _SYSCALL_RET_SSIZE_T: 482 regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); 483 regs->rdx = 0; 484 break; 485 case _SYSCALL_RET_NONE: 486 break; 487 default: 488 panic("unix_syscall: unknown return type"); 489 break; 490 } 491 regs->isf.rflags &= ~EFL_CF; 492 } 493 } 494 } else { 495 x86_saved_state32_t *regs; 496 497 regs = saved_state32(find_user_regs(thread)); 498 499 regs->efl &= ~(EFL_CF); 500 /* reconstruct code for tracing before blasting eax */ 501 code = regs->eax & I386_SYSCALL_NUMBER_MASK; 502 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; 503 504#if CONFIG_DTRACE 505 if (callp->sy_call == dtrace_systrace_syscall) 506 dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); 507#endif /* CONFIG_DTRACE */ 508 509 if (callp == sysent) { 510 params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); 511 code = fuword(params); 512 } 513 if (error == ERESTART) { 514 regs->eip -= ((regs->cs & 0xffff) == SYSENTER_CS) ? 5 : 2; 515 } 516 else if (error != EJUSTRETURN) { 517 if (error) { 518 regs->eax = error; 519 regs->efl |= EFL_CF; /* carry bit */ 520 } else { /* (not error) */ 521 regs->eax = uthread->uu_rval[0]; 522 regs->edx = uthread->uu_rval[1]; 523 } 524 } 525 } 526 527 528 uthread->uu_flag &= ~UT_NOTCANCELPT; 529 530 /* 531 * if we're holding the funnel panic 532 */ 533 syscall_exit_funnelcheck(); 534 535 if (uthread->uu_lowpri_window) { 536 /* 537 * task is marked as a low priority I/O type 538 * and the I/O we issued while in this system call 539 * collided with normal I/O operations... we'll 540 * delay in order to mitigate the impact of this 541 * task on the normal operation of the system 542 */ 543 throttle_lowpri_io(TRUE); 544 } 545 if (code != 180) 546 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, 547 error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); 548 549 thread_exception_return(); 550 /* NOTREACHED */ 551} 552 553void 554munge_wwwlww( 555 __unused const void *in32, 556 void *out64) 557{ 558 uint32_t *arg32; 559 uint64_t *arg64; 560 561 /* we convert in place in out64 */ 562 arg32 = (uint32_t *) out64; 563 arg64 = (uint64_t *) out64; 564 565 arg64[5] = arg32[6]; /* wwwlwW */ 566 arg64[4] = arg32[5]; /* wwwlWw */ 567 arg32[7] = arg32[4]; /* wwwLww (hi) */ 568 arg32[6] = arg32[3]; /* wwwLww (lo) */ 569 arg64[2] = arg32[2]; /* wwWlww */ 570 arg64[1] = arg32[1]; /* wWwlww */ 571 arg64[0] = arg32[0]; /* Wwwlww */ 572} 573 574 575void 576munge_wwlwww( 577 __unused const void *in32, 578 void *out64) 579{ 580 uint32_t *arg32; 581 uint64_t *arg64; 582 583 /* we convert in place in out64 */ 584 arg32 = (uint32_t *) out64; 585 arg64 = (uint64_t *) out64; 586 587 arg64[5] = arg32[6]; /* wwlwwW */ 588 arg64[4] = arg32[5]; /* wwlwWw */ 589 arg64[3] = arg32[4]; /* wwlWww */ 590 arg32[5] = arg32[3]; /* wwLwww (hi) */ 591 arg32[4] = arg32[2]; /* wwLwww (lo) */ 592 arg64[1] = arg32[1]; /* wWlwww */ 593 arg64[0] = arg32[0]; /* Wwlwww */ 594} 595 596#ifdef JOE_DEBUG 597joe_debug(char *p) { 598 599 printf("%s\n", p); 600} 601#endif 602 603 604