1/* 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28#include <kern/task.h> 29#include <kern/thread.h> 30#include <kern/assert.h> 31#include <kern/clock.h> 32#include <kern/locks.h> 33#include <kern/sched_prim.h> 34#include <kern/debug.h> 35#include <mach/machine/thread_status.h> 36#include <mach/thread_act.h> 37#include <mach/branch_predicates.h> 38 39#include <sys/kernel.h> 40#include <sys/vm.h> 41#include <sys/proc_internal.h> 42#include <sys/syscall.h> 43#include <sys/systm.h> 44#include <sys/user.h> 45#include <sys/errno.h> 46#include <sys/kdebug.h> 47#include <sys/sysent.h> 48#include <sys/sysproto.h> 49#include <sys/kauth.h> 50#include <sys/systm.h> 51 52#include <security/audit/audit.h> 53 54#include <i386/seg.h> 55#include <i386/machine_routines.h> 56#include <mach/i386/syscall_sw.h> 57 58#include <machine/pal_routines.h> 59 60#if CONFIG_DTRACE 61extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *); 62extern void dtrace_systrace_syscall_return(unsigned short, int, int *); 63#endif 64 65extern void unix_syscall(x86_saved_state_t *); 66extern void unix_syscall64(x86_saved_state_t *); 67extern void *find_user_regs(thread_t); 68 69/* dynamically generated at build time based on syscalls.master */ 70extern const char *syscallnames[]; 71 72/* 73 * Function: unix_syscall 74 * 75 * Inputs: regs - pointer to i386 save area 76 * 77 * Outputs: none 78 */ 79void 80unix_syscall(x86_saved_state_t *state) 81{ 82 thread_t thread; 83 void *vt; 84 unsigned int code; 85 struct sysent *callp; 86 87 int error; 88 vm_offset_t params; 89 struct proc *p; 90 struct uthread *uthread; 91 x86_saved_state32_t *regs; 92 boolean_t is_vfork; 93 94 assert(is_saved_state32(state)); 95 regs = saved_state32(state); 96#if DEBUG 97 if (regs->eax == 0x800) 98 thread_exception_return(); 99#endif 100 thread = current_thread(); 101 uthread = get_bsdthread_info(thread); 102 103 /* Get the approriate proc; may be different from task's for vfork() */ 104 is_vfork = uthread->uu_flag & UT_VFORK; 105 if (__improbable(is_vfork != 0)) 106 p = current_proc(); 107 else 108 p = (struct proc *)get_bsdtask_info(current_task()); 109 110 /* Verify that we are not being called from a task without a proc */ 111 if (__improbable(p == NULL)) { 112 regs->eax = EPERM; 113 regs->efl |= EFL_CF; 114 task_terminate_internal(current_task()); 115 thread_exception_return(); 116 /* NOTREACHED */ 117 } 118 119 code = regs->eax & I386_SYSCALL_NUMBER_MASK; 120 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n", 121 code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip); 122 params = (vm_offset_t) (regs->uesp + sizeof (int)); 123 124 regs->efl &= ~(EFL_CF); 125 126 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; 127 128 if (__improbable(callp == sysent)) { 129 code = fuword(params); 130 params += sizeof(int); 131 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; 132 } 133 134 vt = (void *)uthread->uu_arg; 135 136 if (callp->sy_arg_bytes != 0) { 137#if CONFIG_REQUIRES_U32_MUNGING 138 sy_munge_t *mungerp; 139#else 140#error U32 syscalls on x86_64 kernel requires munging 141#endif 142 uint32_t nargs; 143 144 assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg)); 145 nargs = callp->sy_arg_bytes; 146 error = copyin((user_addr_t) params, (char *) vt, nargs); 147 if (error) { 148 regs->eax = error; 149 regs->efl |= EFL_CF; 150 thread_exception_return(); 151 /* NOTREACHED */ 152 } 153 154 if (__probable(code != 180)) { 155 int *ip = (int *)vt; 156 157 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 158 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 159 *ip, *(ip+1), *(ip+2), *(ip+3), 0); 160 } 161 162#if CONFIG_REQUIRES_U32_MUNGING 163 mungerp = callp->sy_arg_munge32; 164 165 if (mungerp != NULL) 166 (*mungerp)(vt); 167#endif 168 } else 169 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 170 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 171 0, 0, 0, 0, 0); 172 173 /* 174 * Delayed binding of thread credential to process credential, if we 175 * are not running with an explicitly set thread credential. 176 */ 177 kauth_cred_uthread_update(uthread, p); 178 179 uthread->uu_rval[0] = 0; 180 uthread->uu_rval[1] = 0; 181 uthread->uu_flag |= UT_NOTCANCELPT; 182 uthread->syscall_code = code; 183 184#ifdef JOE_DEBUG 185 uthread->uu_iocount = 0; 186 uthread->uu_vpindex = 0; 187#endif 188 189 AUDIT_SYSCALL_ENTER(code, p, uthread); 190 error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0])); 191 AUDIT_SYSCALL_EXIT(code, p, uthread, error); 192 193#ifdef JOE_DEBUG 194 if (uthread->uu_iocount) 195 printf("system call returned with uu_iocount != 0\n"); 196#endif 197#if CONFIG_DTRACE 198 uthread->t_dtrace_errno = error; 199#endif /* CONFIG_DTRACE */ 200 201 if (__improbable(error == ERESTART)) { 202 /* 203 * Move the user's pc back to repeat the syscall: 204 * 5 bytes for a sysenter, or 2 for an int 8x. 205 * The SYSENTER_TF_CS covers single-stepping over a sysenter 206 * - see debug trap handler in idt.s/idt64.s 207 */ 208 209 pal_syscall_restart(thread, state); 210 } 211 else if (error != EJUSTRETURN) { 212 if (__improbable(error)) { 213 regs->eax = error; 214 regs->efl |= EFL_CF; /* carry bit */ 215 } else { /* (not error) */ 216 /* 217 * We split retval across two registers, in case the 218 * syscall had a 64-bit return value, in which case 219 * eax/edx matches the function call ABI. 220 */ 221 regs->eax = uthread->uu_rval[0]; 222 regs->edx = uthread->uu_rval[1]; 223 } 224 } 225 226 DEBUG_KPRINT_SYSCALL_UNIX( 227 "unix_syscall: error=%d retval=(%u,%u)\n", 228 error, regs->eax, regs->edx); 229 230 uthread->uu_flag &= ~UT_NOTCANCELPT; 231 232 if (__improbable(uthread->uu_lowpri_window)) { 233 /* 234 * task is marked as a low priority I/O type 235 * and the I/O we issued while in this system call 236 * collided with normal I/O operations... we'll 237 * delay in order to mitigate the impact of this 238 * task on the normal operation of the system 239 */ 240 throttle_lowpri_io(1); 241 } 242 if (__probable(code != 180)) 243 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 244 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, 245 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); 246 247 if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) { 248 pal_execve_return(thread); 249 } 250 251 thread_exception_return(); 252 /* NOTREACHED */ 253} 254 255 256void 257unix_syscall64(x86_saved_state_t *state) 258{ 259 thread_t thread; 260 void *vt; 261 unsigned int code; 262 struct sysent *callp; 263 int args_in_regs; 264 boolean_t args_start_at_rdi; 265 int error; 266 struct proc *p; 267 struct uthread *uthread; 268 x86_saved_state64_t *regs; 269 270 assert(is_saved_state64(state)); 271 regs = saved_state64(state); 272#if DEBUG 273 if (regs->rax == 0x2000800) 274 thread_exception_return(); 275#endif 276 thread = current_thread(); 277 uthread = get_bsdthread_info(thread); 278 279 /* Get the approriate proc; may be different from task's for vfork() */ 280 if (__probable(!(uthread->uu_flag & UT_VFORK))) 281 p = (struct proc *)get_bsdtask_info(current_task()); 282 else 283 p = current_proc(); 284 285 /* Verify that we are not being called from a task without a proc */ 286 if (__improbable(p == NULL)) { 287 regs->rax = EPERM; 288 regs->isf.rflags |= EFL_CF; 289 task_terminate_internal(current_task()); 290 thread_exception_return(); 291 /* NOTREACHED */ 292 } 293 294 code = regs->rax & SYSCALL_NUMBER_MASK; 295 DEBUG_KPRINT_SYSCALL_UNIX( 296 "unix_syscall64: code=%d(%s) rip=%llx\n", 297 code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip); 298 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; 299 300 vt = (void *)uthread->uu_arg; 301 302 if (__improbable(callp == sysent)) { 303 /* 304 * indirect system call... system call number 305 * passed as 'arg0' 306 */ 307 code = regs->rdi; 308 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; 309 args_start_at_rdi = FALSE; 310 args_in_regs = 5; 311 } else { 312 args_start_at_rdi = TRUE; 313 args_in_regs = 6; 314 } 315 316 if (callp->sy_narg != 0) { 317 assert(callp->sy_narg <= 8); /* size of uu_arg */ 318 319 args_in_regs = MIN(args_in_regs, callp->sy_narg); 320 memcpy(vt, args_start_at_rdi ? ®s->rdi : ®s->rsi, args_in_regs * sizeof(syscall_arg_t)); 321 322 323 if (code != 180) { 324 uint64_t *ip = (uint64_t *)vt; 325 326 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 327 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 328 (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0); 329 } 330 331 if (__improbable(callp->sy_narg > args_in_regs)) { 332 int copyin_count; 333 334 copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t); 335 336 error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count); 337 if (error) { 338 regs->rax = error; 339 regs->isf.rflags |= EFL_CF; 340 thread_exception_return(); 341 /* NOTREACHED */ 342 } 343 } 344 } else 345 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 346 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 347 0, 0, 0, 0, 0); 348 349 /* 350 * Delayed binding of thread credential to process credential, if we 351 * are not running with an explicitly set thread credential. 352 */ 353 kauth_cred_uthread_update(uthread, p); 354 355 uthread->uu_rval[0] = 0; 356 uthread->uu_rval[1] = 0; 357 uthread->uu_flag |= UT_NOTCANCELPT; 358 uthread->syscall_code = code; 359 360#ifdef JOE_DEBUG 361 uthread->uu_iocount = 0; 362 uthread->uu_vpindex = 0; 363#endif 364 365 AUDIT_SYSCALL_ENTER(code, p, uthread); 366 error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0])); 367 AUDIT_SYSCALL_EXIT(code, p, uthread, error); 368 369#ifdef JOE_DEBUG 370 if (uthread->uu_iocount) 371 printf("system call returned with uu_iocount != 0\n"); 372#endif 373 374#if CONFIG_DTRACE 375 uthread->t_dtrace_errno = error; 376#endif /* CONFIG_DTRACE */ 377 378 if (__improbable(error == ERESTART)) { 379 /* 380 * all system calls come through via the syscall instruction 381 * in 64 bit mode... its 2 bytes in length 382 * move the user's pc back to repeat the syscall: 383 */ 384 pal_syscall_restart( thread, state ); 385 } 386 else if (error != EJUSTRETURN) { 387 if (__improbable(error)) { 388 regs->rax = error; 389 regs->isf.rflags |= EFL_CF; /* carry bit */ 390 } else { /* (not error) */ 391 392 switch (callp->sy_return_type) { 393 case _SYSCALL_RET_INT_T: 394 regs->rax = uthread->uu_rval[0]; 395 regs->rdx = uthread->uu_rval[1]; 396 break; 397 case _SYSCALL_RET_UINT_T: 398 regs->rax = ((u_int)uthread->uu_rval[0]); 399 regs->rdx = ((u_int)uthread->uu_rval[1]); 400 break; 401 case _SYSCALL_RET_OFF_T: 402 case _SYSCALL_RET_ADDR_T: 403 case _SYSCALL_RET_SIZE_T: 404 case _SYSCALL_RET_SSIZE_T: 405 case _SYSCALL_RET_UINT64_T: 406 regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); 407 regs->rdx = 0; 408 break; 409 case _SYSCALL_RET_NONE: 410 break; 411 default: 412 panic("unix_syscall: unknown return type"); 413 break; 414 } 415 regs->isf.rflags &= ~EFL_CF; 416 } 417 } 418 419 DEBUG_KPRINT_SYSCALL_UNIX( 420 "unix_syscall64: error=%d retval=(%llu,%llu)\n", 421 error, regs->rax, regs->rdx); 422 423 uthread->uu_flag &= ~UT_NOTCANCELPT; 424 425 if (__improbable(uthread->uu_lowpri_window)) { 426 /* 427 * task is marked as a low priority I/O type 428 * and the I/O we issued while in this system call 429 * collided with normal I/O operations... we'll 430 * delay in order to mitigate the impact of this 431 * task on the normal operation of the system 432 */ 433 throttle_lowpri_io(1); 434 } 435 if (__probable(code != 180)) 436 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 437 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, 438 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); 439 440 thread_exception_return(); 441 /* NOTREACHED */ 442} 443 444 445void 446unix_syscall_return(int error) 447{ 448 thread_t thread; 449 struct uthread *uthread; 450 struct proc *p; 451 unsigned int code; 452 struct sysent *callp; 453 454 thread = current_thread(); 455 uthread = get_bsdthread_info(thread); 456 457 pal_register_cache_state(thread, DIRTY); 458 459 p = current_proc(); 460 461 if (proc_is64bit(p)) { 462 x86_saved_state64_t *regs; 463 464 regs = saved_state64(find_user_regs(thread)); 465 466 code = uthread->syscall_code; 467 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; 468 469#if CONFIG_DTRACE 470 if (callp->sy_call == dtrace_systrace_syscall) 471 dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); 472#endif /* CONFIG_DTRACE */ 473 AUDIT_SYSCALL_EXIT(code, p, uthread, error); 474 475 if (error == ERESTART) { 476 /* 477 * repeat the syscall 478 */ 479 pal_syscall_restart( thread, find_user_regs(thread) ); 480 } 481 else if (error != EJUSTRETURN) { 482 if (error) { 483 regs->rax = error; 484 regs->isf.rflags |= EFL_CF; /* carry bit */ 485 } else { /* (not error) */ 486 487 switch (callp->sy_return_type) { 488 case _SYSCALL_RET_INT_T: 489 regs->rax = uthread->uu_rval[0]; 490 regs->rdx = uthread->uu_rval[1]; 491 break; 492 case _SYSCALL_RET_UINT_T: 493 regs->rax = ((u_int)uthread->uu_rval[0]); 494 regs->rdx = ((u_int)uthread->uu_rval[1]); 495 break; 496 case _SYSCALL_RET_OFF_T: 497 case _SYSCALL_RET_ADDR_T: 498 case _SYSCALL_RET_SIZE_T: 499 case _SYSCALL_RET_SSIZE_T: 500 case _SYSCALL_RET_UINT64_T: 501 regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); 502 regs->rdx = 0; 503 break; 504 case _SYSCALL_RET_NONE: 505 break; 506 default: 507 panic("unix_syscall: unknown return type"); 508 break; 509 } 510 regs->isf.rflags &= ~EFL_CF; 511 } 512 } 513 DEBUG_KPRINT_SYSCALL_UNIX( 514 "unix_syscall_return: error=%d retval=(%llu,%llu)\n", 515 error, regs->rax, regs->rdx); 516 } else { 517 x86_saved_state32_t *regs; 518 519 regs = saved_state32(find_user_regs(thread)); 520 521 regs->efl &= ~(EFL_CF); 522 523 code = uthread->syscall_code; 524 callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; 525 526#if CONFIG_DTRACE 527 if (callp->sy_call == dtrace_systrace_syscall) 528 dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); 529#endif /* CONFIG_DTRACE */ 530 AUDIT_SYSCALL_EXIT(code, p, uthread, error); 531 532 if (error == ERESTART) { 533 pal_syscall_restart( thread, find_user_regs(thread) ); 534 } 535 else if (error != EJUSTRETURN) { 536 if (error) { 537 regs->eax = error; 538 regs->efl |= EFL_CF; /* carry bit */ 539 } else { /* (not error) */ 540 regs->eax = uthread->uu_rval[0]; 541 regs->edx = uthread->uu_rval[1]; 542 } 543 } 544 DEBUG_KPRINT_SYSCALL_UNIX( 545 "unix_syscall_return: error=%d retval=(%u,%u)\n", 546 error, regs->eax, regs->edx); 547 } 548 549 550 uthread->uu_flag &= ~UT_NOTCANCELPT; 551 552 if (uthread->uu_lowpri_window) { 553 /* 554 * task is marked as a low priority I/O type 555 * and the I/O we issued while in this system call 556 * collided with normal I/O operations... we'll 557 * delay in order to mitigate the impact of this 558 * task on the normal operation of the system 559 */ 560 throttle_lowpri_io(1); 561 } 562 if (code != 180) 563 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 564 BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, 565 error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); 566 567 thread_exception_return(); 568 /* NOTREACHED */ 569} 570 571