1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1982, 1986, 1989, 1991, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)kern_fork.c 8.8 (Berkeley) 2/14/95 67 */ 68/* 69 * NOTICE: This file was modified by McAfee Research in 2004 to introduce 70 * support for mandatory and extensible security protections. This notice 71 * is included in support of clause 2.2 (b) of the Apple Public License, 72 * Version 2.0. 73 */ 74/* 75 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 76 * support for mandatory and extensible security protections. This notice 77 * is included in support of clause 2.2 (b) of the Apple Public License, 78 * Version 2.0. 79 */ 80 81#include <kern/assert.h> 82#include <sys/param.h> 83#include <sys/systm.h> 84#include <sys/filedesc.h> 85#include <sys/kernel.h> 86#include <sys/malloc.h> 87#include <sys/proc_internal.h> 88#include <sys/kauth.h> 89#include <sys/user.h> 90#include <sys/resourcevar.h> 91#include <sys/vnode_internal.h> 92#include <sys/file_internal.h> 93#include <sys/acct.h> 94#include <sys/codesign.h> 95#include <sys/sysproto.h> 96#if CONFIG_DTRACE 97/* Do not include dtrace.h, it redefines kmem_[alloc/free] */ 98extern void dtrace_fasttrap_fork(proc_t, proc_t); 99extern void (*dtrace_helpers_fork)(proc_t, proc_t); 100extern void dtrace_lazy_dofs_duplicate(proc_t, proc_t); 101 102#include <sys/dtrace_ptss.h> 103#endif 104 105#include <security/audit/audit.h> 106 107#include <mach/mach_types.h> 108#include <kern/kern_types.h> 109#include <kern/kalloc.h> 110#include <kern/mach_param.h> 111#include <kern/task.h> 112#include <kern/thread.h> 113#include <kern/thread_call.h> 114#include <kern/zalloc.h> 115 116#include <machine/spl.h> 117 118#if CONFIG_MACF 119#include <security/mac.h> 120#include <security/mac_mach_internal.h> 121#endif 122 123#include <vm/vm_map.h> 124#include <vm/vm_protos.h> 125#include <vm/vm_shared_region.h> 126 127#include <sys/shm_internal.h> /* for shmfork() */ 128#include <mach/task.h> /* for thread_create() */ 129#include <mach/thread_act.h> /* for thread_resume() */ 130 131#include <sys/sdt.h> 132 133#if CONFIG_MEMORYSTATUS 134#include <sys/kern_memorystatus.h> 135#endif 136 137/* XXX routines which should have Mach prototypes, but don't */ 138void thread_set_parent(thread_t parent, int pid); 139extern void act_thread_catt(void *ctx); 140void thread_set_child(thread_t child, int pid); 141void *act_thread_csave(void); 142 143 144thread_t cloneproc(task_t, proc_t, int, int); 145proc_t forkproc(proc_t); 146void forkproc_free(proc_t); 147thread_t fork_create_child(task_t parent_task, proc_t child, int inherit_memory, int is64bit); 148void proc_vfork_begin(proc_t parent_proc); 149void proc_vfork_end(proc_t parent_proc); 150 151#define DOFORK 0x1 /* fork() system call */ 152#define DOVFORK 0x2 /* vfork() system call */ 153 154/* 155 * proc_vfork_begin 156 * 157 * Description: start a vfork on a process 158 * 159 * Parameters: parent_proc process (re)entering vfork state 160 * 161 * Returns: (void) 162 * 163 * Notes: Although this function increments a count, a count in 164 * excess of 1 is not currently supported. According to the 165 * POSIX standard, calling anything other than execve() or 166 * _exit() following a vfork(), including calling vfork() 167 * itself again, will result in undefined behaviour 168 */ 169void 170proc_vfork_begin(proc_t parent_proc) 171{ 172 proc_lock(parent_proc); 173 parent_proc->p_lflag |= P_LVFORK; 174 parent_proc->p_vforkcnt++; 175 proc_unlock(parent_proc); 176} 177 178/* 179 * proc_vfork_end 180 * 181 * Description: stop a vfork on a process 182 * 183 * Parameters: parent_proc process leaving vfork state 184 * 185 * Returns: (void) 186 * 187 * Notes: Decrements the count; currently, reentrancy of vfork() 188 * is unsupported on the current process 189 */ 190void 191proc_vfork_end(proc_t parent_proc) 192{ 193 proc_lock(parent_proc); 194 parent_proc->p_vforkcnt--; 195 if (parent_proc->p_vforkcnt < 0) 196 panic("vfork cnt is -ve"); 197 if (parent_proc->p_vforkcnt == 0) 198 parent_proc->p_lflag &= ~P_LVFORK; 199 proc_unlock(parent_proc); 200} 201 202 203/* 204 * vfork 205 * 206 * Description: vfork system call 207 * 208 * Parameters: void [no arguments] 209 * 210 * Retval: 0 (to child process) 211 * !0 pid of child (to parent process) 212 * -1 error (see "Returns:") 213 * 214 * Returns: EAGAIN Administrative limit reached 215 * EINVAL vfork() called during vfork() 216 * ENOMEM Failed to allocate new process 217 * 218 * Note: After a successful call to this function, the parent process 219 * has its task, thread, and uthread lent to the child process, 220 * and control is returned to the caller; if this function is 221 * invoked as a system call, the return is to user space, and 222 * is effectively running on the child process. 223 * 224 * Subsequent calls that operate on process state are permitted, 225 * though discouraged, and will operate on the child process; any 226 * operations on the task, thread, or uthread will result in 227 * changes in the parent state, and, if inheritable, the child 228 * state, when a task, thread, and uthread are realized for the 229 * child process at execve() time, will also be effected. Given 230 * this, it's recemmended that people use the posix_spawn() call 231 * instead. 232 * 233 * BLOCK DIAGRAM OF VFORK 234 * 235 * Before: 236 * 237 * ,----------------. ,-------------. 238 * | | task | | 239 * | parent_thread | ------> | parent_task | 240 * | | <.list. | | 241 * `----------------' `-------------' 242 * uthread | ^ bsd_info | ^ 243 * v | vc_thread v | task 244 * ,----------------. ,-------------. 245 * | | | | 246 * | parent_uthread | <.list. | parent_proc | <-- current_proc() 247 * | | | | 248 * `----------------' `-------------' 249 * uu_proc | 250 * v 251 * NULL 252 * 253 * After: 254 * 255 * ,----------------. ,-------------. 256 * | | task | | 257 * ,----> | parent_thread | ------> | parent_task | 258 * | | | <.list. | | 259 * | `----------------' `-------------' 260 * | uthread | ^ bsd_info | ^ 261 * | v | vc_thread v | task 262 * | ,----------------. ,-------------. 263 * | | | | | 264 * | | parent_uthread | <.list. | parent_proc | 265 * | | | | | 266 * | `----------------' `-------------' 267 * | uu_proc | . list 268 * | v v 269 * | ,----------------. 270 * `----- | | 271 * p_vforkact | child_proc | <-- current_proc() 272 * | | 273 * `----------------' 274 */ 275int 276vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval) 277{ 278 thread_t child_thread; 279 int err; 280 281 if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK)) != 0) { 282 retval[1] = 0; 283 } else { 284 uthread_t ut = get_bsdthread_info(current_thread()); 285 proc_t child_proc = ut->uu_proc; 286 287 retval[0] = child_proc->p_pid; 288 retval[1] = 1; /* flag child return for user space */ 289 290 /* 291 * Drop the signal lock on the child which was taken on our 292 * behalf by forkproc()/cloneproc() to prevent signals being 293 * received by the child in a partially constructed state. 294 */ 295 proc_signalend(child_proc, 0); 296 proc_transend(child_proc, 0); 297 298 proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid); 299 DTRACE_PROC1(create, proc_t, child_proc); 300 ut->uu_flag &= ~UT_VFORKING; 301 } 302 303 return (err); 304} 305 306 307/* 308 * fork1 309 * 310 * Description: common code used by all new process creation other than the 311 * bootstrap of the initial process on the system 312 * 313 * Parameters: parent_proc parent process of the process being 314 * child_threadp pointer to location to receive the 315 * Mach thread_t of the child process 316 * breated 317 * kind kind of creation being requested 318 * 319 * Notes: Permissable values for 'kind': 320 * 321 * PROC_CREATE_FORK Create a complete process which will 322 * return actively running in both the 323 * parent and the child; the child copies 324 * the parent address space. 325 * PROC_CREATE_SPAWN Create a complete process which will 326 * return actively running in the parent 327 * only after returning actively running 328 * in the child; the child address space 329 * is newly created by an image activator, 330 * after which the child is run. 331 * PROC_CREATE_VFORK Creates a partial process which will 332 * borrow the parent task, thread, and 333 * uthread to return running in the child; 334 * the child address space and other parts 335 * are lazily created at execve() time, or 336 * the child is terminated, and the parent 337 * does not actively run until that 338 * happens. 339 * 340 * At first it may seem strange that we return the child thread 341 * address rather than process structure, since the process is 342 * the only part guaranteed to be "new"; however, since we do 343 * not actualy adjust other references between Mach and BSD (see 344 * the block diagram above the implementation of vfork()), this 345 * is the only method which guarantees us the ability to get 346 * back to the other information. 347 */ 348int 349fork1(proc_t parent_proc, thread_t *child_threadp, int kind) 350{ 351 thread_t parent_thread = (thread_t)current_thread(); 352 uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread); 353 proc_t child_proc = NULL; /* set in switch, but compiler... */ 354 thread_t child_thread = NULL; 355 uid_t uid; 356 int count; 357 int err = 0; 358 int spawn = 0; 359 360 /* 361 * Although process entries are dynamically created, we still keep 362 * a global limit on the maximum number we will create. Don't allow 363 * a nonprivileged user to use the last process; don't let root 364 * exceed the limit. The variable nprocs is the current number of 365 * processes, maxproc is the limit. 366 */ 367 uid = kauth_getruid(); 368 proc_list_lock(); 369 if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { 370 proc_list_unlock(); 371 tablefull("proc"); 372 return (EAGAIN); 373 } 374 proc_list_unlock(); 375 376 /* 377 * Increment the count of procs running with this uid. Don't allow 378 * a nonprivileged user to exceed their current limit, which is 379 * always less than what an rlim_t can hold. 380 * (locking protection is provided by list lock held in chgproccnt) 381 */ 382 count = chgproccnt(uid, 1); 383 if (uid != 0 && 384 (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) { 385 err = EAGAIN; 386 goto bad; 387 } 388 389#if CONFIG_MACF 390 /* 391 * Determine if MAC policies applied to the process will allow 392 * it to fork. This is an advisory-only check. 393 */ 394 err = mac_proc_check_fork(parent_proc); 395 if (err != 0) { 396 goto bad; 397 } 398#endif 399 400 switch(kind) { 401 case PROC_CREATE_VFORK: 402 /* 403 * Prevent a vfork while we are in vfork(); we should 404 * also likely preventing a fork here as well, and this 405 * check should then be outside the switch statement, 406 * since the proc struct contents will copy from the 407 * child and the tash/thread/uthread from the parent in 408 * that case. We do not support vfork() in vfork() 409 * because we don't have to; the same non-requirement 410 * is true of both fork() and posix_spawn() and any 411 * call other than execve() amd _exit(), but we've 412 * been historically lenient, so we continue to be so 413 * (for now). 414 * 415 * <rdar://6640521> Probably a source of random panics 416 */ 417 if (parent_uthread->uu_flag & UT_VFORK) { 418 printf("fork1 called within vfork by %s\n", parent_proc->p_comm); 419 err = EINVAL; 420 goto bad; 421 } 422 423 /* 424 * Flag us in progress; if we chose to support vfork() in 425 * vfork(), we would chain our parent at this point (in 426 * effect, a stack push). We don't, since we actually want 427 * to disallow everything not specified in the standard 428 */ 429 proc_vfork_begin(parent_proc); 430 431 /* The newly created process comes with signal lock held */ 432 if ((child_proc = forkproc(parent_proc)) == NULL) { 433 /* Failed to allocate new process */ 434 proc_vfork_end(parent_proc); 435 err = ENOMEM; 436 goto bad; 437 } 438 439// XXX BEGIN: wants to move to be common code (and safe) 440#if CONFIG_MACF 441 /* 442 * allow policies to associate the credential/label that 443 * we referenced from the parent ... with the child 444 * JMM - this really isn't safe, as we can drop that 445 * association without informing the policy in other 446 * situations (keep long enough to get policies changed) 447 */ 448 mac_cred_label_associate_fork(child_proc->p_ucred, child_proc); 449#endif 450 451 /* 452 * Propogate change of PID - may get new cred if auditing. 453 * 454 * NOTE: This has no effect in the vfork case, since 455 * child_proc->task != current_task(), but we duplicate it 456 * because this is probably, ultimately, wrong, since we 457 * will be running in the "child" which is the parent task 458 * with the wrong token until we get to the execve() or 459 * _exit() call; a lot of "undefined" can happen before 460 * that. 461 * 462 * <rdar://6640530> disallow everything but exeve()/_exit()? 463 */ 464 set_security_token(child_proc); 465 466 AUDIT_ARG(pid, child_proc->p_pid); 467 468// XXX END: wants to move to be common code (and safe) 469 470 /* 471 * BORROW PARENT TASK, THREAD, UTHREAD FOR CHILD 472 * 473 * Note: this is where we would "push" state instead of setting 474 * it for nested vfork() support (see proc_vfork_end() for 475 * description if issues here). 476 */ 477 child_proc->task = parent_proc->task; 478 479 child_proc->p_lflag |= P_LINVFORK; 480 child_proc->p_vforkact = parent_thread; 481 child_proc->p_stat = SRUN; 482 483 /* 484 * Until UT_VFORKING is cleared at the end of the vfork 485 * syscall, the process identity of this thread is slightly 486 * murky. 487 * 488 * As long as UT_VFORK and it's associated field (uu_proc) 489 * is set, current_proc() will always return the child process. 490 * 491 * However dtrace_proc_selfpid() returns the parent pid to 492 * ensure that e.g. the proc:::create probe actions accrue 493 * to the parent. (Otherwise the child magically seems to 494 * have created itself!) 495 */ 496 parent_uthread->uu_flag |= UT_VFORK | UT_VFORKING; 497 parent_uthread->uu_proc = child_proc; 498 parent_uthread->uu_userstate = (void *)act_thread_csave(); 499 parent_uthread->uu_vforkmask = parent_uthread->uu_sigmask; 500 501 /* temporarily drop thread-set-id state */ 502 if (parent_uthread->uu_flag & UT_SETUID) { 503 parent_uthread->uu_flag |= UT_WASSETUID; 504 parent_uthread->uu_flag &= ~UT_SETUID; 505 } 506 507 /* blow thread state information */ 508 /* XXX is this actually necessary, given syscall return? */ 509 thread_set_child(parent_thread, child_proc->p_pid); 510 511 child_proc->p_acflag = AFORK; /* forked but not exec'ed */ 512 513 /* 514 * Preserve synchronization semantics of vfork. If 515 * waiting for child to exec or exit, set P_PPWAIT 516 * on child, and sleep on our proc (in case of exit). 517 */ 518 child_proc->p_lflag |= P_LPPWAIT; 519 pinsertchild(parent_proc, child_proc); /* set visible */ 520 521 break; 522 523 case PROC_CREATE_SPAWN: 524 /* 525 * A spawned process differs from a forked process in that 526 * the spawned process does not carry around the parents 527 * baggage with regard to address space copying, dtrace, 528 * and so on. 529 */ 530 spawn = 1; 531 532 /* FALLSTHROUGH */ 533 534 case PROC_CREATE_FORK: 535 /* 536 * When we clone the parent process, we are going to inherit 537 * its task attributes and memory, since when we fork, we 538 * will, in effect, create a duplicate of it, with only minor 539 * differences. Contrarily, spawned processes do not inherit. 540 */ 541 if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE, FALSE)) == NULL) { 542 /* Failed to create thread */ 543 err = EAGAIN; 544 goto bad; 545 } 546 547 /* copy current thread state into the child thread (only for fork) */ 548 if (!spawn) { 549 thread_dup(child_thread); 550 } 551 552 /* child_proc = child_thread->task->proc; */ 553 child_proc = (proc_t)(get_bsdtask_info(get_threadtask(child_thread))); 554 555// XXX BEGIN: wants to move to be common code (and safe) 556#if CONFIG_MACF 557 /* 558 * allow policies to associate the credential/label that 559 * we referenced from the parent ... with the child 560 * JMM - this really isn't safe, as we can drop that 561 * association without informing the policy in other 562 * situations (keep long enough to get policies changed) 563 */ 564 mac_cred_label_associate_fork(child_proc->p_ucred, child_proc); 565#endif 566 567 /* 568 * Propogate change of PID - may get new cred if auditing. 569 * 570 * NOTE: This has no effect in the vfork case, since 571 * child_proc->task != current_task(), but we duplicate it 572 * because this is probably, ultimately, wrong, since we 573 * will be running in the "child" which is the parent task 574 * with the wrong token until we get to the execve() or 575 * _exit() call; a lot of "undefined" can happen before 576 * that. 577 * 578 * <rdar://6640530> disallow everything but exeve()/_exit()? 579 */ 580 set_security_token(child_proc); 581 582 AUDIT_ARG(pid, child_proc->p_pid); 583 584// XXX END: wants to move to be common code (and safe) 585 586 /* 587 * Blow thread state information; this is what gives the child 588 * process its "return" value from a fork() call. 589 * 590 * Note: this should probably move to fork() proper, since it 591 * is not relevent to spawn, and the value won't matter 592 * until we resume the child there. If you are in here 593 * refactoring code, consider doing this at the same time. 594 */ 595 thread_set_child(child_thread, child_proc->p_pid); 596 597 child_proc->p_acflag = AFORK; /* forked but not exec'ed */ 598 599// <rdar://6598155> dtrace code cleanup needed 600#if CONFIG_DTRACE 601 /* 602 * This code applies to new processes who are copying the task 603 * and thread state and address spaces of their parent process. 604 */ 605 if (!spawn) { 606// <rdar://6598155> call dtrace specific function here instead of all this... 607 /* 608 * APPLE NOTE: Solaris does a sprlock() and drops the 609 * proc_lock here. We're cheating a bit and only taking 610 * the p_dtrace_sprlock lock. A full sprlock would 611 * task_suspend the parent. 612 */ 613 lck_mtx_lock(&parent_proc->p_dtrace_sprlock); 614 615 /* 616 * Remove all DTrace tracepoints from the child process. We 617 * need to do this _before_ duplicating USDT providers since 618 * any associated probes may be immediately enabled. 619 */ 620 if (parent_proc->p_dtrace_count > 0) { 621 dtrace_fasttrap_fork(parent_proc, child_proc); 622 } 623 624 lck_mtx_unlock(&parent_proc->p_dtrace_sprlock); 625 626 /* 627 * Duplicate any lazy dof(s). This must be done while NOT 628 * holding the parent sprlock! Lock ordering is 629 * dtrace_dof_mode_lock, then sprlock. It is imperative we 630 * always call dtrace_lazy_dofs_duplicate, rather than null 631 * check and call if !NULL. If we NULL test, during lazy dof 632 * faulting we can race with the faulting code and proceed 633 * from here to beyond the helpers copy. The lazy dof 634 * faulting will then fail to copy the helpers to the child 635 * process. 636 */ 637 dtrace_lazy_dofs_duplicate(parent_proc, child_proc); 638 639 /* 640 * Duplicate any helper actions and providers. The SFORKING 641 * we set above informs the code to enable USDT probes that 642 * sprlock() may fail because the child is being forked. 643 */ 644 /* 645 * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent 646 * never fails to find the child. We do not set SFORKING. 647 */ 648 if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) { 649 (*dtrace_helpers_fork)(parent_proc, child_proc); 650 } 651 652 } 653#endif /* CONFIG_DTRACE */ 654 655 break; 656 657 default: 658 panic("fork1 called with unknown kind %d", kind); 659 break; 660 } 661 662 663 /* return the thread pointer to the caller */ 664 *child_threadp = child_thread; 665 666bad: 667 /* 668 * In the error case, we return a 0 value for the returned pid (but 669 * it is ignored in the trampoline due to the error return); this 670 * is probably not necessary. 671 */ 672 if (err) { 673 (void)chgproccnt(uid, -1); 674 } 675 676 return (err); 677} 678 679 680/* 681 * vfork_return 682 * 683 * Description: "Return" to parent vfork thread() following execve/_exit; 684 * this is done by reassociating the parent process structure 685 * with the task, thread, and uthread. 686 * 687 * Refer to the ASCII art above vfork() to figure out the 688 * state we're undoing. 689 * 690 * Parameters: child_proc Child process 691 * retval System call return value array 692 * rval Return value to present to parent 693 * 694 * Returns: void 695 * 696 * Notes: The caller resumes or exits the parent, as appropriate, after 697 * calling this function. 698 */ 699void 700vfork_return(proc_t child_proc, int32_t *retval, int rval) 701{ 702 task_t parent_task = get_threadtask(child_proc->p_vforkact); 703 proc_t parent_proc = get_bsdtask_info(parent_task); 704 thread_t th = current_thread(); 705 uthread_t uth = get_bsdthread_info(th); 706 707 act_thread_catt(uth->uu_userstate); 708 709 /* clear vfork state in parent proc structure */ 710 proc_vfork_end(parent_proc); 711 712 /* REPATRIATE PARENT TASK, THREAD, UTHREAD */ 713 uth->uu_userstate = 0; 714 uth->uu_flag &= ~UT_VFORK; 715 /* restore thread-set-id state */ 716 if (uth->uu_flag & UT_WASSETUID) { 717 uth->uu_flag |= UT_SETUID; 718 uth->uu_flag &= UT_WASSETUID; 719 } 720 uth->uu_proc = 0; 721 uth->uu_sigmask = uth->uu_vforkmask; 722 723 proc_lock(child_proc); 724 child_proc->p_lflag &= ~P_LINVFORK; 725 child_proc->p_vforkact = 0; 726 proc_unlock(child_proc); 727 728 thread_set_parent(th, rval); 729 730 if (retval) { 731 retval[0] = rval; 732 retval[1] = 0; /* mark parent */ 733 } 734} 735 736 737/* 738 * fork_create_child 739 * 740 * Description: Common operations associated with the creation of a child 741 * process 742 * 743 * Parameters: parent_task parent task 744 * child_proc child process 745 * inherit_memory TRUE, if the parents address space is 746 * to be inherited by the child 747 * is64bit TRUE, if the child being created will 748 * be associated with a 64 bit process 749 * rather than a 32 bit process 750 * 751 * Note: This code is called in the fork() case, from the execve() call 752 * graph, if implementing an execve() following a vfork(), from 753 * the posix_spawn() call graph (which implicitly includes a 754 * vfork() equivalent call, and in the system bootstrap case. 755 * 756 * It creates a new task and thread (and as a side effect of the 757 * thread creation, a uthread), which is then associated with the 758 * process 'child'. If the parent process address space is to 759 * be inherited, then a flag indicates that the newly created 760 * task should inherit this from the child task. 761 * 762 * As a special concession to bootstrapping the initial process 763 * in the system, it's possible for 'parent_task' to be TASK_NULL; 764 * in this case, 'inherit_memory' MUST be FALSE. 765 */ 766thread_t 767fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit) 768{ 769 thread_t child_thread = NULL; 770 task_t child_task; 771 kern_return_t result; 772 773 /* Create a new task for the child process */ 774 result = task_create_internal(parent_task, 775 inherit_memory, 776 is64bit, 777 &child_task); 778 if (result != KERN_SUCCESS) { 779 printf("%s: task_create_internal failed. Code: %d\n", 780 __func__, result); 781 goto bad; 782 } 783 784 /* Set the child process task to the new task */ 785 child_proc->task = child_task; 786 787 /* Set child task process to child proc */ 788 set_bsdtask_info(child_task, child_proc); 789 790 /* Propagate CPU limit timer from parent */ 791 if (timerisset(&child_proc->p_rlim_cpu)) 792 task_vtimer_set(child_task, TASK_VTIMER_RLIM); 793 794 /* Set/clear 64 bit vm_map flag */ 795 if (is64bit) 796 vm_map_set_64bit(get_task_map(child_task)); 797 else 798 vm_map_set_32bit(get_task_map(child_task)); 799 800#if CONFIG_MACF 801 /* Update task for MAC framework */ 802 /* valid to use p_ucred as child is still not running ... */ 803 mac_task_label_update_cred(child_proc->p_ucred, child_task); 804#endif 805 806 /* 807 * Set child process BSD visible scheduler priority if nice value 808 * inherited from parent 809 */ 810 if (child_proc->p_nice != 0) 811 resetpriority(child_proc); 812 813 /* Create a new thread for the child process */ 814 result = thread_create(child_task, &child_thread); 815 if (result != KERN_SUCCESS) { 816 printf("%s: thread_create failed. Code: %d\n", 817 __func__, result); 818 task_deallocate(child_task); 819 child_task = NULL; 820 } 821 822 /* 823 * Tag thread as being the first thread in its task. 824 */ 825 thread_set_tag(child_thread, THREAD_TAG_MAINTHREAD); 826 827bad: 828 thread_yield_internal(1); 829 830 return(child_thread); 831} 832 833 834/* 835 * fork 836 * 837 * Description: fork system call. 838 * 839 * Parameters: parent Parent process to fork 840 * uap (void) [unused] 841 * retval Return value 842 * 843 * Returns: 0 Success 844 * EAGAIN Resource unavailable, try again 845 * 846 * Notes: Attempts to create a new child process which inherits state 847 * from the parent process. If successful, the call returns 848 * having created an initially suspended child process with an 849 * extra Mach task and thread reference, for which the thread 850 * is initially suspended. Until we resume the child process, 851 * it is not yet running. 852 * 853 * The return information to the child is contained in the 854 * thread state structure of the new child, and does not 855 * become visible to the child through a normal return process, 856 * since it never made the call into the kernel itself in the 857 * first place. 858 * 859 * After resuming the thread, this function returns directly to 860 * the parent process which invoked the fork() system call. 861 * 862 * Important: The child thread_resume occurs before the parent returns; 863 * depending on scheduling latency, this means that it is not 864 * deterministic as to whether the parent or child is scheduled 865 * to run first. It is entirely possible that the child could 866 * run to completion prior to the parent running. 867 */ 868int 869fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval) 870{ 871 thread_t child_thread; 872 int err; 873 874 retval[1] = 0; /* flag parent return for user space */ 875 876 if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_FORK)) == 0) { 877 task_t child_task; 878 proc_t child_proc; 879 880 /* Return to the parent */ 881 child_proc = (proc_t)get_bsdthreadtask_info(child_thread); 882 retval[0] = child_proc->p_pid; 883 884 /* 885 * Drop the signal lock on the child which was taken on our 886 * behalf by forkproc()/cloneproc() to prevent signals being 887 * received by the child in a partially constructed state. 888 */ 889 proc_signalend(child_proc, 0); 890 proc_transend(child_proc, 0); 891 892 /* flag the fork has occurred */ 893 proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid); 894 DTRACE_PROC1(create, proc_t, child_proc); 895 896 /* "Return" to the child */ 897 (void)thread_resume(child_thread); 898 899 /* drop the extra references we got during the creation */ 900 if ((child_task = (task_t)get_threadtask(child_thread)) != NULL) { 901 task_deallocate(child_task); 902 } 903 thread_deallocate(child_thread); 904 } 905 906 return(err); 907} 908 909 910/* 911 * cloneproc 912 * 913 * Description: Create a new process from a specified process. 914 * 915 * Parameters: parent_task The parent task to be cloned, or 916 * TASK_NULL is task characteristics 917 * are not to be inherited 918 * be cloned, or TASK_NULL if the new 919 * task is not to inherit the VM 920 * characteristics of the parent 921 * parent_proc The parent process to be cloned 922 * inherit_memory True if the child is to inherit 923 * memory from the parent; if this is 924 * non-NULL, then the parent_task must 925 * also be non-NULL 926 * memstat_internal Whether to track the process in the 927 * jetsam priority list (if configured) 928 * 929 * Returns: !NULL pointer to new child thread 930 * NULL Failure (unspecified) 931 * 932 * Note: On return newly created child process has signal lock held 933 * to block delivery of signal to it if called with lock set. 934 * fork() code needs to explicity remove this lock before 935 * signals can be delivered 936 * 937 * In the case of bootstrap, this function can be called from 938 * bsd_utaskbootstrap() in order to bootstrap the first process; 939 * the net effect is to provide a uthread structure for the 940 * kernel process associated with the kernel task. 941 * 942 * XXX: Tristating using the value parent_task as the major key 943 * and inherit_memory as the minor key is something we should 944 * refactor later; we owe the current semantics, ultimately, 945 * to the semantics of task_create_internal. For now, we will 946 * live with this being somewhat awkward. 947 */ 948thread_t 949cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory, int memstat_internal) 950{ 951#if !CONFIG_MEMORYSTATUS 952#pragma unused(memstat_internal) 953#endif 954 task_t child_task; 955 proc_t child_proc; 956 thread_t child_thread = NULL; 957 958 if ((child_proc = forkproc(parent_proc)) == NULL) { 959 /* Failed to allocate new process */ 960 goto bad; 961 } 962 963 child_thread = fork_create_child(parent_task, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64)); 964 965 if (child_thread == NULL) { 966 /* 967 * Failed to create thread; now we must deconstruct the new 968 * process previously obtained from forkproc(). 969 */ 970 forkproc_free(child_proc); 971 goto bad; 972 } 973 974 child_task = get_threadtask(child_thread); 975 if (parent_proc->p_flag & P_LP64) { 976 task_set_64bit(child_task, TRUE); 977 OSBitOrAtomic(P_LP64, (UInt32 *)&child_proc->p_flag); 978 } else { 979 task_set_64bit(child_task, FALSE); 980 OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child_proc->p_flag); 981 } 982 983#if CONFIG_MEMORYSTATUS 984 if (memstat_internal) { 985 proc_list_lock(); 986 child_proc->p_memstat_state |= P_MEMSTAT_INTERNAL; 987 proc_list_unlock(); 988 } 989#endif 990 991 /* make child visible */ 992 pinsertchild(parent_proc, child_proc); 993 994 /* 995 * Make child runnable, set start time. 996 */ 997 child_proc->p_stat = SRUN; 998bad: 999 return(child_thread); 1000} 1001 1002 1003/* 1004 * Destroy a process structure that resulted from a call to forkproc(), but 1005 * which must be returned to the system because of a subsequent failure 1006 * preventing it from becoming active. 1007 * 1008 * Parameters: p The incomplete process from forkproc() 1009 * 1010 * Returns: (void) 1011 * 1012 * Note: This function should only be used in an error handler following 1013 * a call to forkproc(). 1014 * 1015 * Operations occur in reverse order of those in forkproc(). 1016 */ 1017void 1018forkproc_free(proc_t p) 1019{ 1020 1021 /* We held signal and a transition locks; drop them */ 1022 proc_signalend(p, 0); 1023 proc_transend(p, 0); 1024 1025 /* 1026 * If we have our own copy of the resource limits structure, we 1027 * need to free it. If it's a shared copy, we need to drop our 1028 * reference on it. 1029 */ 1030 proc_limitdrop(p, 0); 1031 p->p_limit = NULL; 1032 1033#if SYSV_SHM 1034 /* Need to drop references to the shared memory segment(s), if any */ 1035 if (p->vm_shm) { 1036 /* 1037 * Use shmexec(): we have no address space, so no mappings 1038 * 1039 * XXX Yes, the routine is badly named. 1040 */ 1041 shmexec(p); 1042 } 1043#endif 1044 1045 /* Need to undo the effects of the fdcopy(), if any */ 1046 fdfree(p); 1047 1048 /* 1049 * Drop the reference on a text vnode pointer, if any 1050 * XXX This code is broken in forkproc(); see <rdar://4256419>; 1051 * XXX if anyone ever uses this field, we will be extremely unhappy. 1052 */ 1053 if (p->p_textvp) { 1054 vnode_rele(p->p_textvp); 1055 p->p_textvp = NULL; 1056 } 1057 1058 /* Stop the profiling clock */ 1059 stopprofclock(p); 1060 1061 /* Update the audit session proc count */ 1062 AUDIT_SESSION_PROCEXIT(p); 1063 1064 /* Release the credential reference */ 1065 kauth_cred_unref(&p->p_ucred); 1066 1067 proc_list_lock(); 1068 /* Decrement the count of processes in the system */ 1069 nprocs--; 1070 proc_list_unlock(); 1071 1072 thread_call_free(p->p_rcall); 1073 1074 /* Free allocated memory */ 1075 FREE_ZONE(p->p_sigacts, sizeof *p->p_sigacts, M_SIGACTS); 1076 FREE_ZONE(p->p_stats, sizeof *p->p_stats, M_PSTATS); 1077 proc_checkdeadrefs(p); 1078 FREE_ZONE(p, sizeof *p, M_PROC); 1079} 1080 1081 1082/* 1083 * forkproc 1084 * 1085 * Description: Create a new process structure, given a parent process 1086 * structure. 1087 * 1088 * Parameters: parent_proc The parent process 1089 * 1090 * Returns: !NULL The new process structure 1091 * NULL Error (insufficient free memory) 1092 * 1093 * Note: When successful, the newly created process structure is 1094 * partially initialized; if a caller needs to deconstruct the 1095 * returned structure, they must call forkproc_free() to do so. 1096 */ 1097proc_t 1098forkproc(proc_t parent_proc) 1099{ 1100 proc_t child_proc; /* Our new process */ 1101 static int nextpid = 0, pidwrap = 0, nextpidversion = 0; 1102 static uint64_t nextuniqueid = 0; 1103 int error = 0; 1104 struct session *sessp; 1105 uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread()); 1106 1107 MALLOC_ZONE(child_proc, proc_t , sizeof *child_proc, M_PROC, M_WAITOK); 1108 if (child_proc == NULL) { 1109 printf("forkproc: M_PROC zone exhausted\n"); 1110 goto bad; 1111 } 1112 /* zero it out as we need to insert in hash */ 1113 bzero(child_proc, sizeof *child_proc); 1114 1115 MALLOC_ZONE(child_proc->p_stats, struct pstats *, 1116 sizeof *child_proc->p_stats, M_PSTATS, M_WAITOK); 1117 if (child_proc->p_stats == NULL) { 1118 printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n"); 1119 FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); 1120 child_proc = NULL; 1121 goto bad; 1122 } 1123 MALLOC_ZONE(child_proc->p_sigacts, struct sigacts *, 1124 sizeof *child_proc->p_sigacts, M_SIGACTS, M_WAITOK); 1125 if (child_proc->p_sigacts == NULL) { 1126 printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n"); 1127 FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS); 1128 FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); 1129 child_proc = NULL; 1130 goto bad; 1131 } 1132 1133 /* allocate a callout for use by interval timers */ 1134 child_proc->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child_proc); 1135 if (child_proc->p_rcall == NULL) { 1136 FREE_ZONE(child_proc->p_sigacts, sizeof *child_proc->p_sigacts, M_SIGACTS); 1137 FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS); 1138 FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); 1139 child_proc = NULL; 1140 goto bad; 1141 } 1142 1143 1144 /* 1145 * Find an unused PID. 1146 */ 1147 1148 proc_list_lock(); 1149 1150 nextpid++; 1151retry: 1152 /* 1153 * If the process ID prototype has wrapped around, 1154 * restart somewhat above 0, as the low-numbered procs 1155 * tend to include daemons that don't exit. 1156 */ 1157 if (nextpid >= PID_MAX) { 1158 nextpid = 100; 1159 pidwrap = 1; 1160 } 1161 if (pidwrap != 0) { 1162 1163 /* if the pid stays in hash both for zombie and runniing state */ 1164 if (pfind_locked(nextpid) != PROC_NULL) { 1165 nextpid++; 1166 goto retry; 1167 } 1168 1169 if (pgfind_internal(nextpid) != PGRP_NULL) { 1170 nextpid++; 1171 goto retry; 1172 } 1173 if (session_find_internal(nextpid) != SESSION_NULL) { 1174 nextpid++; 1175 goto retry; 1176 } 1177 } 1178 nprocs++; 1179 child_proc->p_pid = nextpid; 1180 child_proc->p_idversion = nextpidversion++; 1181 /* kernel process is handcrafted and not from fork, so start from 1 */ 1182 child_proc->p_uniqueid = ++nextuniqueid; 1183#if 1 1184 if (child_proc->p_pid != 0) { 1185 if (pfind_locked(child_proc->p_pid) != PROC_NULL) 1186 panic("proc in the list already\n"); 1187 } 1188#endif 1189 /* Insert in the hash */ 1190 child_proc->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE); 1191 LIST_INSERT_HEAD(PIDHASH(child_proc->p_pid), child_proc, p_hash); 1192 proc_list_unlock(); 1193 1194 1195 /* 1196 * We've identified the PID we are going to use; initialize the new 1197 * process structure. 1198 */ 1199 child_proc->p_stat = SIDL; 1200 child_proc->p_pgrpid = PGRPID_DEAD; 1201 1202 /* 1203 * The zero'ing of the proc was at the allocation time due to need 1204 * for insertion to hash. Copy the section that is to be copied 1205 * directly from the parent. 1206 */ 1207 bcopy(&parent_proc->p_startcopy, &child_proc->p_startcopy, 1208 (unsigned) ((caddr_t)&child_proc->p_endcopy - (caddr_t)&child_proc->p_startcopy)); 1209 1210 /* 1211 * Some flags are inherited from the parent. 1212 * Duplicate sub-structures as needed. 1213 * Increase reference counts on shared objects. 1214 * The p_stats and p_sigacts substructs are set in vm_fork. 1215 */ 1216 child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY | P_DISABLE_ASLR | P_DELAYIDLESLEEP)); 1217 if (parent_proc->p_flag & P_PROFIL) 1218 startprofclock(child_proc); 1219 1220 child_proc->p_vfs_iopolicy = (parent_proc->p_vfs_iopolicy & (P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY)); 1221 1222 /* 1223 * Note that if the current thread has an assumed identity, this 1224 * credential will be granted to the new process. 1225 */ 1226 child_proc->p_ucred = kauth_cred_get_with_ref(); 1227 /* update cred on proc */ 1228 PROC_UPDATE_CREDS_ONPROC(child_proc); 1229 /* update audit session proc count */ 1230 AUDIT_SESSION_PROCNEW(child_proc); 1231 1232#if CONFIG_FINE_LOCK_GROUPS 1233 lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr); 1234 lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr); 1235#if CONFIG_DTRACE 1236 lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); 1237#endif 1238 lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr); 1239#else /* !CONFIG_FINE_LOCK_GROUPS */ 1240 lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr); 1241 lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr); 1242#if CONFIG_DTRACE 1243 lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); 1244#endif 1245 lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr); 1246#endif /* !CONFIG_FINE_LOCK_GROUPS */ 1247 klist_init(&child_proc->p_klist); 1248 1249 if (child_proc->p_textvp != NULLVP) { 1250 /* bump references to the text vnode */ 1251 /* Need to hold iocount across the ref call */ 1252 if (vnode_getwithref(child_proc->p_textvp) == 0) { 1253 error = vnode_ref(child_proc->p_textvp); 1254 vnode_put(child_proc->p_textvp); 1255 if (error != 0) 1256 child_proc->p_textvp = NULLVP; 1257 } 1258 } 1259 1260 /* 1261 * Copy the parents per process open file table to the child; if 1262 * there is a per-thread current working directory, set the childs 1263 * per-process current working directory to that instead of the 1264 * parents. 1265 * 1266 * XXX may fail to copy descriptors to child 1267 */ 1268 child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir); 1269 1270#if SYSV_SHM 1271 if (parent_proc->vm_shm) { 1272 /* XXX may fail to attach shm to child */ 1273 (void)shmfork(parent_proc, child_proc); 1274 } 1275#endif 1276 /* 1277 * inherit the limit structure to child 1278 */ 1279 proc_limitfork(parent_proc, child_proc); 1280 1281 if (child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { 1282 uint64_t rlim_cur = child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur; 1283 child_proc->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur; 1284 } 1285 1286 /* Intialize new process stats, including start time */ 1287 /* <rdar://6640543> non-zeroed portion contains garbage AFAICT */ 1288 bzero(child_proc->p_stats, sizeof(*child_proc->p_stats)); 1289 microtime_with_abstime(&child_proc->p_start, &child_proc->p_stats->ps_start); 1290 1291 if (parent_proc->p_sigacts != NULL) 1292 (void)memcpy(child_proc->p_sigacts, 1293 parent_proc->p_sigacts, sizeof *child_proc->p_sigacts); 1294 else 1295 (void)memset(child_proc->p_sigacts, 0, sizeof *child_proc->p_sigacts); 1296 1297 sessp = proc_session(parent_proc); 1298 if (sessp->s_ttyvp != NULL && parent_proc->p_flag & P_CONTROLT) 1299 OSBitOrAtomic(P_CONTROLT, &child_proc->p_flag); 1300 session_rele(sessp); 1301 1302 /* 1303 * block all signals to reach the process. 1304 * no transition race should be occuring with the child yet, 1305 * but indicate that the process is in (the creation) transition. 1306 */ 1307 proc_signalstart(child_proc, 0); 1308 proc_transstart(child_proc, 0); 1309 1310 child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX; 1311 TAILQ_INIT(&child_proc->p_uthlist); 1312 TAILQ_INIT(&child_proc->p_aio_activeq); 1313 TAILQ_INIT(&child_proc->p_aio_doneq); 1314 1315 /* Inherit the parent flags for code sign */ 1316 child_proc->p_csflags = (parent_proc->p_csflags & ~CS_KILLED); 1317 1318 /* 1319 * All processes have work queue locks; cleaned up by 1320 * reap_child_locked() 1321 */ 1322 workqueue_init_lock(child_proc); 1323 1324 /* 1325 * Copy work queue information 1326 * 1327 * Note: This should probably only happen in the case where we are 1328 * creating a child that is a copy of the parent; since this 1329 * routine is called in the non-duplication case of vfork() 1330 * or posix_spawn(), then this information should likely not 1331 * be duplicated. 1332 * 1333 * <rdar://6640553> Work queue pointers that no longer point to code 1334 */ 1335 child_proc->p_wqthread = parent_proc->p_wqthread; 1336 child_proc->p_threadstart = parent_proc->p_threadstart; 1337 child_proc->p_pthsize = parent_proc->p_pthsize; 1338 child_proc->p_targconc = parent_proc->p_targconc; 1339 if ((parent_proc->p_lflag & P_LREGISTER) != 0) { 1340 child_proc->p_lflag |= P_LREGISTER; 1341 } 1342 child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset; 1343 child_proc->p_dispatchqueue_serialno_offset = parent_proc->p_dispatchqueue_serialno_offset; 1344#if PSYNCH 1345 pth_proc_hashinit(child_proc); 1346#endif /* PSYNCH */ 1347 1348#if CONFIG_LCTX 1349 child_proc->p_lctx = NULL; 1350 /* Add new process to login context (if any). */ 1351 if (parent_proc->p_lctx != NULL) { 1352 /* 1353 * <rdar://6640564> This should probably be delayed in the 1354 * vfork() or posix_spawn() cases. 1355 */ 1356 LCTX_LOCK(parent_proc->p_lctx); 1357 enterlctx(child_proc, parent_proc->p_lctx, 0); 1358 } 1359#endif 1360 1361#if CONFIG_MEMORYSTATUS 1362 /* Memorystatus + jetsam init */ 1363 child_proc->p_memstat_state = 0; 1364 child_proc->p_memstat_effectivepriority = JETSAM_PRIORITY_DEFAULT; 1365 child_proc->p_memstat_requestedpriority = JETSAM_PRIORITY_DEFAULT; 1366 child_proc->p_memstat_userdata = 0; 1367#if CONFIG_FREEZE 1368 child_proc->p_memstat_suspendedfootprint = 0; 1369#endif 1370 child_proc->p_memstat_dirty = 0; 1371 child_proc->p_memstat_idledeadline = 0; 1372#endif /* CONFIG_MEMORYSTATUS */ 1373 1374bad: 1375 return(child_proc); 1376} 1377 1378void 1379proc_lock(proc_t p) 1380{ 1381 lck_mtx_lock(&p->p_mlock); 1382} 1383 1384void 1385proc_unlock(proc_t p) 1386{ 1387 lck_mtx_unlock(&p->p_mlock); 1388} 1389 1390void 1391proc_spinlock(proc_t p) 1392{ 1393 lck_spin_lock(&p->p_slock); 1394} 1395 1396void 1397proc_spinunlock(proc_t p) 1398{ 1399 lck_spin_unlock(&p->p_slock); 1400} 1401 1402void 1403proc_list_lock(void) 1404{ 1405 lck_mtx_lock(proc_list_mlock); 1406} 1407 1408void 1409proc_list_unlock(void) 1410{ 1411 lck_mtx_unlock(proc_list_mlock); 1412} 1413 1414#include <kern/zalloc.h> 1415 1416struct zone *uthread_zone; 1417static int uthread_zone_inited = 0; 1418 1419static void 1420uthread_zone_init(void) 1421{ 1422 if (!uthread_zone_inited) { 1423 uthread_zone = zinit(sizeof(struct uthread), 1424 thread_max * sizeof(struct uthread), 1425 THREAD_CHUNK * sizeof(struct uthread), 1426 "uthreads"); 1427 uthread_zone_inited = 1; 1428 } 1429} 1430 1431void * 1432uthread_alloc(task_t task, thread_t thread, int noinherit) 1433{ 1434 proc_t p; 1435 uthread_t uth; 1436 uthread_t uth_parent; 1437 void *ut; 1438 1439 if (!uthread_zone_inited) 1440 uthread_zone_init(); 1441 1442 ut = (void *)zalloc(uthread_zone); 1443 bzero(ut, sizeof(struct uthread)); 1444 1445 p = (proc_t) get_bsdtask_info(task); 1446 uth = (uthread_t)ut; 1447 uth->uu_thread = thread; 1448 1449 /* 1450 * Thread inherits credential from the creating thread, if both 1451 * are in the same task. 1452 * 1453 * If the creating thread has no credential or is from another 1454 * task we can leave the new thread credential NULL. If it needs 1455 * one later, it will be lazily assigned from the task's process. 1456 */ 1457 uth_parent = (uthread_t)get_bsdthread_info(current_thread()); 1458 if ((noinherit == 0) && task == current_task() && 1459 uth_parent != NULL && 1460 IS_VALID_CRED(uth_parent->uu_ucred)) { 1461 /* 1462 * XXX The new thread is, in theory, being created in context 1463 * XXX of parent thread, so a direct reference to the parent 1464 * XXX is OK. 1465 */ 1466 kauth_cred_ref(uth_parent->uu_ucred); 1467 uth->uu_ucred = uth_parent->uu_ucred; 1468 /* the credential we just inherited is an assumed credential */ 1469 if (uth_parent->uu_flag & UT_SETUID) 1470 uth->uu_flag |= UT_SETUID; 1471 } else { 1472 /* sometimes workqueue threads are created out task context */ 1473 if ((task != kernel_task) && (p != PROC_NULL)) 1474 uth->uu_ucred = kauth_cred_proc_ref(p); 1475 else 1476 uth->uu_ucred = NOCRED; 1477 } 1478 1479 1480 if ((task != kernel_task) && p) { 1481 1482 proc_lock(p); 1483 if (noinherit != 0) { 1484 /* workq threads will not inherit masks */ 1485 uth->uu_sigmask = ~workq_threadmask; 1486 } else if (uth_parent) { 1487 if (uth_parent->uu_flag & UT_SAS_OLDMASK) 1488 uth->uu_sigmask = uth_parent->uu_oldmask; 1489 else 1490 uth->uu_sigmask = uth_parent->uu_sigmask; 1491 } 1492 uth->uu_context.vc_thread = thread; 1493 TAILQ_INSERT_TAIL(&p->p_uthlist, uth, uu_list); 1494 proc_unlock(p); 1495 1496#if CONFIG_DTRACE 1497 if (p->p_dtrace_ptss_pages != NULL) { 1498 uth->t_dtrace_scratch = dtrace_ptss_claim_entry(p); 1499 } 1500#endif 1501#if CONFIG_MACF 1502 mac_thread_label_init(uth); 1503#endif 1504 } 1505 1506 return (ut); 1507} 1508 1509 1510/* 1511 * This routine frees all the BSD context in uthread except the credential. 1512 * It does not free the uthread structure as well 1513 */ 1514void 1515uthread_cleanup(task_t task, void *uthread, void * bsd_info) 1516{ 1517 struct _select *sel; 1518 uthread_t uth = (uthread_t)uthread; 1519 proc_t p = (proc_t)bsd_info; 1520 1521 1522 if (uth->uu_lowpri_window || uth->uu_throttle_info) { 1523 /* 1524 * task is marked as a low priority I/O type 1525 * and we've somehow managed to not dismiss the throttle 1526 * through the normal exit paths back to user space... 1527 * no need to throttle this thread since its going away 1528 * but we do need to update our bookeeping w/r to throttled threads 1529 * 1530 * Calling this routine will clean up any throttle info reference 1531 * still inuse by the thread. 1532 */ 1533 throttle_lowpri_io(0); 1534 } 1535 /* 1536 * Per-thread audit state should never last beyond system 1537 * call return. Since we don't audit the thread creation/ 1538 * removal, the thread state pointer should never be 1539 * non-NULL when we get here. 1540 */ 1541 assert(uth->uu_ar == NULL); 1542 1543 sel = &uth->uu_select; 1544 /* cleanup the select bit space */ 1545 if (sel->nbytes) { 1546 FREE(sel->ibits, M_TEMP); 1547 FREE(sel->obits, M_TEMP); 1548 sel->nbytes = 0; 1549 } 1550 1551 if (uth->uu_cdir) { 1552 vnode_rele(uth->uu_cdir); 1553 uth->uu_cdir = NULLVP; 1554 } 1555 1556 if (uth->uu_allocsize && uth->uu_wqset){ 1557 kfree(uth->uu_wqset, uth->uu_allocsize); 1558 uth->uu_allocsize = 0; 1559 uth->uu_wqset = 0; 1560 } 1561 1562 if(uth->pth_name != NULL) 1563 { 1564 kfree(uth->pth_name, MAXTHREADNAMESIZE); 1565 uth->pth_name = 0; 1566 } 1567 if ((task != kernel_task) && p) { 1568 1569 if (((uth->uu_flag & UT_VFORK) == UT_VFORK) && (uth->uu_proc != PROC_NULL)) { 1570 vfork_exit_internal(uth->uu_proc, 0, 1); 1571 } 1572 /* 1573 * Remove the thread from the process list and 1574 * transfer [appropriate] pending signals to the process. 1575 */ 1576 if (get_bsdtask_info(task) == p) { 1577 proc_lock(p); 1578 TAILQ_REMOVE(&p->p_uthlist, uth, uu_list); 1579 p->p_siglist |= (uth->uu_siglist & execmask & (~p->p_sigignore | sigcantmask)); 1580 proc_unlock(p); 1581 } 1582#if CONFIG_DTRACE 1583 struct dtrace_ptss_page_entry *tmpptr = uth->t_dtrace_scratch; 1584 uth->t_dtrace_scratch = NULL; 1585 if (tmpptr != NULL) { 1586 dtrace_ptss_release_entry(p, tmpptr); 1587 } 1588#endif 1589#if CONFIG_MACF 1590 mac_thread_label_destroy(uth); 1591#endif 1592 } 1593} 1594 1595/* This routine releases the credential stored in uthread */ 1596void 1597uthread_cred_free(void *uthread) 1598{ 1599 uthread_t uth = (uthread_t)uthread; 1600 1601 /* and free the uthread itself */ 1602 if (IS_VALID_CRED(uth->uu_ucred)) { 1603 kauth_cred_t oldcred = uth->uu_ucred; 1604 uth->uu_ucred = NOCRED; 1605 kauth_cred_unref(&oldcred); 1606 } 1607} 1608 1609/* This routine frees the uthread structure held in thread structure */ 1610void 1611uthread_zone_free(void *uthread) 1612{ 1613 uthread_t uth = (uthread_t)uthread; 1614 1615 if (uth->t_tombstone) { 1616 kfree(uth->t_tombstone, sizeof(struct doc_tombstone)); 1617 uth->t_tombstone = NULL; 1618 } 1619 1620 /* and free the uthread itself */ 1621 zfree(uthread_zone, uthread); 1622} 1623