1/* 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Mach Operating System 31 * Copyright (c) 1987 Carnegie-Mellon University 32 * All rights reserved. The CMU software License Agreement specifies 33 * the terms and conditions for use and redistribution. 34 */ 35 36/*- 37 * Copyright (c) 1982, 1986, 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * (c) UNIX System Laboratories, Inc. 40 * All or some portions of this file are derived from material licensed 41 * to the University of California by American Telephone and Telegraph 42 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 43 * the permission of UNIX System Laboratories, Inc. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 3. All advertising materials mentioning features or use of this software 54 * must display the following acknowledgement: 55 * This product includes software developed by the University of 56 * California, Berkeley and its contributors. 57 * 4. Neither the name of the University nor the names of its contributors 58 * may be used to endorse or promote products derived from this software 59 * without specific prior written permission. 60 * 61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 71 * SUCH DAMAGE. 72 * 73 * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93 74 */ 75/* 76 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 77 * support for mandatory and extensible security protections. This notice 78 * is included in support of clause 2.2 (b) of the Apple Public License, 79 * Version 2.0. 80 */ 81#include <machine/reg.h> 82#include <machine/cpu_capabilities.h> 83 84#include <sys/param.h> 85#include <sys/systm.h> 86#include <sys/filedesc.h> 87#include <sys/kernel.h> 88#include <sys/proc_internal.h> 89#include <sys/kauth.h> 90#include <sys/user.h> 91#include <sys/socketvar.h> 92#include <sys/malloc.h> 93#include <sys/namei.h> 94#include <sys/mount_internal.h> 95#include <sys/vnode_internal.h> 96#include <sys/file_internal.h> 97#include <sys/stat.h> 98#include <sys/uio_internal.h> 99#include <sys/acct.h> 100#include <sys/exec.h> 101#include <sys/kdebug.h> 102#include <sys/signal.h> 103#include <sys/aio_kern.h> 104#include <sys/sysproto.h> 105#if SYSV_SHM 106#include <sys/shm_internal.h> /* shmexec() */ 107#endif 108#include <sys/ubc_internal.h> /* ubc_map() */ 109#include <sys/spawn.h> 110#include <sys/spawn_internal.h> 111#include <sys/process_policy.h> 112#include <sys/codesign.h> 113#include <crypto/sha1.h> 114 115#include <libkern/libkern.h> 116 117#include <security/audit/audit.h> 118 119#include <ipc/ipc_types.h> 120 121#include <mach/mach_types.h> 122#include <mach/port.h> 123#include <mach/task.h> 124#include <mach/task_access.h> 125#include <mach/thread_act.h> 126#include <mach/vm_map.h> 127#include <mach/mach_vm.h> 128#include <mach/vm_param.h> 129 130#include <kern/sched_prim.h> /* thread_wakeup() */ 131#include <kern/affinity.h> 132#include <kern/assert.h> 133#include <kern/task.h> 134#include <kern/coalition.h> 135 136#if CONFIG_MACF 137#include <security/mac.h> 138#include <security/mac_mach_internal.h> 139#endif 140 141#include <vm/vm_map.h> 142#include <vm/vm_kern.h> 143#include <vm/vm_protos.h> 144#include <vm/vm_kern.h> 145#include <vm/vm_fault.h> 146#include <vm/vm_pageout.h> 147 148#include <kdp/kdp_dyld.h> 149 150#include <machine/pal_routines.h> 151 152#include <pexpert/pexpert.h> 153 154#if CONFIG_MEMORYSTATUS 155#include <sys/kern_memorystatus.h> 156#endif 157 158#if CONFIG_DTRACE 159/* Do not include dtrace.h, it redefines kmem_[alloc/free] */ 160extern void (*dtrace_fasttrap_exec_ptr)(proc_t); 161extern void (*dtrace_proc_waitfor_exec_ptr)(proc_t); 162extern void (*dtrace_helpers_cleanup)(proc_t); 163extern void dtrace_lazy_dofs_destroy(proc_t); 164 165/* 166 * Since dtrace_proc_waitfor_exec_ptr can be added/removed in dtrace_subr.c, 167 * we will store its value before actually calling it. 168 */ 169static void (*dtrace_proc_waitfor_hook)(proc_t) = NULL; 170 171#include <sys/dtrace_ptss.h> 172#endif 173 174/* support for child creation in exec after vfork */ 175thread_t fork_create_child(task_t parent_task, coalition_t parent_coalition, proc_t child_proc, int inherit_memory, int is64bit); 176void vfork_exit(proc_t p, int rv); 177int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart); 178extern void proc_apply_task_networkbg_internal(proc_t, thread_t); 179 180/* 181 * Mach things for which prototypes are unavailable from Mach headers 182 */ 183void ipc_task_reset( 184 task_t task); 185void ipc_thread_reset( 186 thread_t thread); 187kern_return_t ipc_object_copyin( 188 ipc_space_t space, 189 mach_port_name_t name, 190 mach_msg_type_name_t msgt_name, 191 ipc_object_t *objectp); 192void ipc_port_release_send(ipc_port_t); 193 194#if DEVELOPMENT || DEBUG 195void task_importance_update_owner_info(task_t); 196#endif 197 198extern struct savearea *get_user_regs(thread_t); 199 200__attribute__((noinline)) int __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid); 201 202#include <kern/thread.h> 203#include <kern/task.h> 204#include <kern/ast.h> 205#include <kern/mach_loader.h> 206#include <kern/mach_fat.h> 207#include <mach-o/fat.h> 208#include <mach-o/loader.h> 209#include <machine/vmparam.h> 210#include <sys/imgact.h> 211 212#include <sys/sdt.h> 213 214 215/* 216 * EAI_ITERLIMIT The maximum number of times to iterate an image 217 * activator in exec_activate_image() before treating 218 * it as malformed/corrupt. 219 */ 220#define EAI_ITERLIMIT 10 221 222/* 223 * For #! interpreter parsing 224 */ 225#define IS_WHITESPACE(ch) ((ch == ' ') || (ch == '\t')) 226#define IS_EOL(ch) ((ch == '#') || (ch == '\n')) 227 228extern vm_map_t bsd_pageable_map; 229extern const struct fileops vnops; 230 231#define ROUND_PTR(type, addr) \ 232 (type *)( ( (uintptr_t)(addr) + 16 - 1) \ 233 & ~(16 - 1) ) 234 235struct image_params; /* Forward */ 236static int exec_activate_image(struct image_params *imgp); 237static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp); 238static int load_return_to_errno(load_return_t lrtn); 239static int execargs_alloc(struct image_params *imgp); 240static int execargs_free(struct image_params *imgp); 241static int exec_check_permissions(struct image_params *imgp); 242static int exec_extract_strings(struct image_params *imgp); 243static int exec_add_apple_strings(struct image_params *imgp); 244static int exec_handle_sugid(struct image_params *imgp); 245static int sugid_scripts = 0; 246SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, ""); 247static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p); 248static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size); 249static void exec_resettextvp(proc_t, struct image_params *); 250static int check_for_signature(proc_t, struct image_params *); 251static void exec_prefault_data(proc_t, struct image_params *, load_result_t *); 252static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, boolean_t * portwatch_present, ipc_port_t * portwatch_ports); 253static errno_t exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, 254 ipc_port_t * portwatch_ports, int portwatch_count); 255 256/* 257 * exec_add_user_string 258 * 259 * Add the requested string to the string space area. 260 * 261 * Parameters; struct image_params * image parameter block 262 * user_addr_t string to add to strings area 263 * int segment from which string comes 264 * boolean_t TRUE if string contributes to NCARGS 265 * 266 * Returns: 0 Success 267 * !0 Failure errno from copyinstr() 268 * 269 * Implicit returns: 270 * (imgp->ip_strendp) updated location of next add, if any 271 * (imgp->ip_strspace) updated byte count of space remaining 272 * (imgp->ip_argspace) updated byte count of space in NCARGS 273 */ 274static int 275exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs) 276{ 277 int error = 0; 278 279 do { 280 size_t len = 0; 281 int space; 282 283 if (is_ncargs) 284 space = imgp->ip_argspace; /* by definition smaller than ip_strspace */ 285 else 286 space = imgp->ip_strspace; 287 288 if (space <= 0) { 289 error = E2BIG; 290 break; 291 } 292 293 if (!UIO_SEG_IS_USER_SPACE(seg)) { 294 char *kstr = CAST_DOWN(char *,str); /* SAFE */ 295 error = copystr(kstr, imgp->ip_strendp, space, &len); 296 } else { 297 error = copyinstr(str, imgp->ip_strendp, space, &len); 298 } 299 300 imgp->ip_strendp += len; 301 imgp->ip_strspace -= len; 302 if (is_ncargs) 303 imgp->ip_argspace -= len; 304 305 } while (error == ENAMETOOLONG); 306 307 return error; 308} 309 310/* 311 * exec_save_path 312 * 313 * To support new app package launching for Mac OS X, the dyld needs the 314 * first argument to execve() stored on the user stack. 315 * 316 * Save the executable path name at the bottom of the strings area and set 317 * the argument vector pointer to the location following that to indicate 318 * the start of the argument and environment tuples, setting the remaining 319 * string space count to the size of the string area minus the path length. 320 * 321 * Parameters; struct image_params * image parameter block 322 * char * path used to invoke program 323 * int segment from which path comes 324 * 325 * Returns: int 0 Success 326 * EFAULT Bad address 327 * copy[in]str:EFAULT Bad address 328 * copy[in]str:ENAMETOOLONG Filename too long 329 * 330 * Implicit returns: 331 * (imgp->ip_strings) saved path 332 * (imgp->ip_strspace) space remaining in ip_strings 333 * (imgp->ip_strendp) start of remaining copy area 334 * (imgp->ip_argspace) space remaining of NCARGS 335 * (imgp->ip_applec) Initial applev[0] 336 * 337 * Note: We have to do this before the initial namei() since in the 338 * path contains symbolic links, namei() will overwrite the 339 * original path buffer contents. If the last symbolic link 340 * resolved was a relative pathname, we would lose the original 341 * "path", which could be an absolute pathname. This might be 342 * unacceptable for dyld. 343 */ 344static int 345exec_save_path(struct image_params *imgp, user_addr_t path, int seg) 346{ 347 int error; 348 size_t len; 349 char *kpath; 350 351 len = MIN(MAXPATHLEN, imgp->ip_strspace); 352 353 switch(seg) { 354 case UIO_USERSPACE32: 355 case UIO_USERSPACE64: /* Same for copyin()... */ 356 error = copyinstr(path, imgp->ip_strings, len, &len); 357 break; 358 case UIO_SYSSPACE: 359 kpath = CAST_DOWN(char *,path); /* SAFE */ 360 error = copystr(kpath, imgp->ip_strings, len, &len); 361 break; 362 default: 363 error = EFAULT; 364 break; 365 } 366 367 if (!error) { 368 imgp->ip_strendp += len; 369 imgp->ip_strspace -= len; 370 } 371 372 return(error); 373} 374 375/* 376 * exec_reset_save_path 377 * 378 * If we detect a shell script, we need to reset the string area 379 * state so that the interpreter can be saved onto the stack. 380 381 * Parameters; struct image_params * image parameter block 382 * 383 * Returns: int 0 Success 384 * 385 * Implicit returns: 386 * (imgp->ip_strings) saved path 387 * (imgp->ip_strspace) space remaining in ip_strings 388 * (imgp->ip_strendp) start of remaining copy area 389 * (imgp->ip_argspace) space remaining of NCARGS 390 * 391 */ 392static int 393exec_reset_save_path(struct image_params *imgp) 394{ 395 imgp->ip_strendp = imgp->ip_strings; 396 imgp->ip_argspace = NCARGS; 397 imgp->ip_strspace = ( NCARGS + PAGE_SIZE ); 398 399 return (0); 400} 401 402/* 403 * exec_shell_imgact 404 * 405 * Image activator for interpreter scripts. If the image begins with the 406 * characters "#!", then it is an interpreter script. Verify that we are 407 * not already executing in PowerPC mode, and that the length of the script 408 * line indicating the interpreter is not in excess of the maximum allowed 409 * size. If this is the case, then break out the arguments, if any, which 410 * are separated by white space, and copy them into the argument save area 411 * as if they were provided on the command line before all other arguments. 412 * The line ends when we encounter a comment character ('#') or newline. 413 * 414 * Parameters; struct image_params * image parameter block 415 * 416 * Returns: -1 not an interpreter (keep looking) 417 * -3 Success: interpreter: relookup 418 * >0 Failure: interpreter: error number 419 * 420 * A return value other than -1 indicates subsequent image activators should 421 * not be given the opportunity to attempt to activate the image. 422 */ 423static int 424exec_shell_imgact(struct image_params *imgp) 425{ 426 char *vdata = imgp->ip_vdata; 427 char *ihp; 428 char *line_startp, *line_endp; 429 char *interp; 430 proc_t p; 431 struct fileproc *fp; 432 int fd; 433 int error; 434 435 /* 436 * Make sure it's a shell script. If we've already redirected 437 * from an interpreted file once, don't do it again. 438 * 439 * Note: We disallow PowerPC, since the expectation is that we 440 * may run a PowerPC interpreter, but not an interpret a PowerPC 441 * image. This is consistent with historical behaviour. 442 */ 443 if (vdata[0] != '#' || 444 vdata[1] != '!' || 445 (imgp->ip_flags & IMGPF_INTERPRET) != 0) { 446 return (-1); 447 } 448 449 imgp->ip_flags |= IMGPF_INTERPRET; 450 imgp->ip_interp_sugid_fd = -1; 451 imgp->ip_interp_buffer[0] = '\0'; 452 453 /* Check to see if SUGID scripts are permitted. If they aren't then 454 * clear the SUGID bits. 455 * imgp->ip_vattr is known to be valid. 456 */ 457 if (sugid_scripts == 0) { 458 imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID); 459 } 460 461 /* Try to find the first non-whitespace character */ 462 for( ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++ ) { 463 if (IS_EOL(*ihp)) { 464 /* Did not find interpreter, "#!\n" */ 465 return (ENOEXEC); 466 } else if (IS_WHITESPACE(*ihp)) { 467 /* Whitespace, like "#! /bin/sh\n", keep going. */ 468 } else { 469 /* Found start of interpreter */ 470 break; 471 } 472 } 473 474 if (ihp == &vdata[IMG_SHSIZE]) { 475 /* All whitespace, like "#! " */ 476 return (ENOEXEC); 477 } 478 479 line_startp = ihp; 480 481 /* Try to find the end of the interpreter+args string */ 482 for ( ; ihp < &vdata[IMG_SHSIZE]; ihp++ ) { 483 if (IS_EOL(*ihp)) { 484 /* Got it */ 485 break; 486 } else { 487 /* Still part of interpreter or args */ 488 } 489 } 490 491 if (ihp == &vdata[IMG_SHSIZE]) { 492 /* A long line, like "#! blah blah blah" without end */ 493 return (ENOEXEC); 494 } 495 496 /* Backtrack until we find the last non-whitespace */ 497 while (IS_EOL(*ihp) || IS_WHITESPACE(*ihp)) { 498 ihp--; 499 } 500 501 /* The character after the last non-whitespace is our logical end of line */ 502 line_endp = ihp + 1; 503 504 /* 505 * Now we have pointers to the usable part of: 506 * 507 * "#! /usr/bin/int first second third \n" 508 * ^ line_startp ^ line_endp 509 */ 510 511 /* copy the interpreter name */ 512 interp = imgp->ip_interp_buffer; 513 for ( ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++) 514 *interp++ = *ihp; 515 *interp = '\0'; 516 517 exec_reset_save_path(imgp); 518 exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer), 519 UIO_SYSSPACE); 520 521 /* Copy the entire interpreter + args for later processing into argv[] */ 522 interp = imgp->ip_interp_buffer; 523 for ( ihp = line_startp; (ihp < line_endp); ihp++) 524 *interp++ = *ihp; 525 *interp = '\0'; 526 527 /* 528 * If we have a SUID oder SGID script, create a file descriptor 529 * from the vnode and pass /dev/fd/%d instead of the actual 530 * path name so that the script does not get opened twice 531 */ 532 if (imgp->ip_origvattr->va_mode & (VSUID | VSGID)) { 533 p = vfs_context_proc(imgp->ip_vfs_context); 534 error = falloc(p, &fp, &fd, imgp->ip_vfs_context); 535 if (error) 536 return(error); 537 538 fp->f_fglob->fg_flag = FREAD; 539 fp->f_fglob->fg_ops = &vnops; 540 fp->f_fglob->fg_data = (caddr_t)imgp->ip_vp; 541 542 proc_fdlock(p); 543 procfdtbl_releasefd(p, fd, NULL); 544 fp_drop(p, fd, fp, 1); 545 proc_fdunlock(p); 546 vnode_ref(imgp->ip_vp); 547 548 imgp->ip_interp_sugid_fd = fd; 549 } 550 551 return (-3); 552} 553 554 555 556/* 557 * exec_fat_imgact 558 * 559 * Image activator for fat 1.0 binaries. If the binary is fat, then we 560 * need to select an image from it internally, and make that the image 561 * we are going to attempt to execute. At present, this consists of 562 * reloading the first page for the image with a first page from the 563 * offset location indicated by the fat header. 564 * 565 * Parameters; struct image_params * image parameter block 566 * 567 * Returns: -1 not a fat binary (keep looking) 568 * -2 Success: encapsulated binary: reread 569 * >0 Failure: error number 570 * 571 * Important: This image activator is byte order neutral. 572 * 573 * Note: A return value other than -1 indicates subsequent image 574 * activators should not be given the opportunity to attempt 575 * to activate the image. 576 * 577 * If we find an encapsulated binary, we make no assertions 578 * about its validity; instead, we leave that up to a rescan 579 * for an activator to claim it, and, if it is claimed by one, 580 * that activator is responsible for determining validity. 581 */ 582static int 583exec_fat_imgact(struct image_params *imgp) 584{ 585 proc_t p = vfs_context_proc(imgp->ip_vfs_context); 586 kauth_cred_t cred = kauth_cred_proc_ref(p); 587 struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata; 588 struct _posix_spawnattr *psa = NULL; 589 struct fat_arch fat_arch; 590 int resid, error; 591 load_return_t lret; 592 593 /* Make sure it's a fat binary */ 594 if ((fat_header->magic != FAT_MAGIC) && 595 (fat_header->magic != FAT_CIGAM)) { 596 error = -1; 597 goto bad; 598 } 599 600#if DEVELOPMENT || DEBUG 601 if (cpu_type() == CPU_TYPE_ARM64) { 602 uint32_t fat_nfat_arch = OSSwapBigToHostInt32(fat_header->nfat_arch); 603 struct fat_arch *archs; 604 int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC); 605 int spawn = (imgp->ip_flags & IMGPF_SPAWN); 606 607 archs = (struct fat_arch *)(imgp->ip_vdata + sizeof(struct fat_header)); 608 609 /* ip_vdata always has PAGE_SIZE of data */ 610 if (PAGE_SIZE >= (sizeof(struct fat_header) + (fat_nfat_arch + 1) * sizeof(struct fat_arch))) { 611 if (fat_nfat_arch > 0 612 && OSSwapBigToHostInt32(archs[fat_nfat_arch].cputype) == CPU_TYPE_ARM64) { 613 614 /* rdar://problem/15001727 */ 615 printf("Attempt to execute malformed binary %s\n", imgp->ip_strings); 616 617 proc_lock(p); 618 p->p_csflags |= CS_KILLED; 619 proc_unlock(p); 620 621 /* 622 * We can't stop the system call, so make sure the child never executes 623 * For vfork exec, the current implementation has not set up the thread in the 624 * child process, so we cannot signal it. Return an error code in that case. 625 */ 626 if (!vfexec && !spawn) { 627 psignal(p, SIGKILL); 628 error = 0; 629 } else { 630 error = EBADEXEC; 631 } 632 goto bad; 633 } 634 } 635 } 636#endif 637 638 /* If posix_spawn binprefs exist, respect those prefs. */ 639 psa = (struct _posix_spawnattr *) imgp->ip_px_sa; 640 if (psa != NULL && psa->psa_binprefs[0] != 0) { 641 struct fat_arch *arches = (struct fat_arch *) (fat_header + 1); 642 int nfat_arch = 0, pr = 0, f = 0; 643 644 nfat_arch = OSSwapBigToHostInt32(fat_header->nfat_arch); 645 /* Check each preference listed against all arches in header */ 646 for (pr = 0; pr < NBINPREFS; pr++) { 647 cpu_type_t pref = psa->psa_binprefs[pr]; 648 if (pref == 0) { 649 /* No suitable arch in the pref list */ 650 error = EBADARCH; 651 goto bad; 652 } 653 654 if (pref == CPU_TYPE_ANY) { 655 /* Fall through to regular grading */ 656 break; 657 } 658 659 for (f = 0; f < nfat_arch; f++) { 660 cpu_type_t archtype = OSSwapBigToHostInt32( 661 arches[f].cputype); 662 cpu_type_t archsubtype = OSSwapBigToHostInt32( 663 arches[f].cpusubtype) & ~CPU_SUBTYPE_MASK; 664 if (pref == archtype && 665 grade_binary(archtype, archsubtype)) { 666 /* We have a winner! */ 667 fat_arch.cputype = archtype; 668 fat_arch.cpusubtype = archsubtype; 669 fat_arch.offset = OSSwapBigToHostInt32( 670 arches[f].offset); 671 fat_arch.size = OSSwapBigToHostInt32( 672 arches[f].size); 673 fat_arch.align = OSSwapBigToHostInt32( 674 arches[f].align); 675 goto use_arch; 676 } 677 } 678 } 679 } 680 681 /* Look up our preferred architecture in the fat file. */ 682 lret = fatfile_getarch_affinity(imgp->ip_vp, 683 (vm_offset_t)fat_header, 684 &fat_arch, 685 (p->p_flag & P_AFFINITY)); 686 if (lret != LOAD_SUCCESS) { 687 error = load_return_to_errno(lret); 688 goto bad; 689 } 690 691use_arch: 692 /* Read the Mach-O header out of fat_arch */ 693 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, 694 PAGE_SIZE, fat_arch.offset, 695 UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED), 696 cred, &resid, p); 697 if (error) { 698 goto bad; 699 } 700 701 /* Did we read a complete header? */ 702 if (resid) { 703 error = EBADEXEC; 704 goto bad; 705 } 706 707 /* Success. Indicate we have identified an encapsulated binary */ 708 error = -2; 709 imgp->ip_arch_offset = (user_size_t)fat_arch.offset; 710 imgp->ip_arch_size = (user_size_t)fat_arch.size; 711 712bad: 713 kauth_cred_unref(&cred); 714 return (error); 715} 716 717/* 718 * exec_mach_imgact 719 * 720 * Image activator for mach-o 1.0 binaries. 721 * 722 * Parameters; struct image_params * image parameter block 723 * 724 * Returns: -1 not a fat binary (keep looking) 725 * -2 Success: encapsulated binary: reread 726 * >0 Failure: error number 727 * EBADARCH Mach-o binary, but with an unrecognized 728 * architecture 729 * ENOMEM No memory for child process after - 730 * can only happen after vfork() 731 * 732 * Important: This image activator is NOT byte order neutral. 733 * 734 * Note: A return value other than -1 indicates subsequent image 735 * activators should not be given the opportunity to attempt 736 * to activate the image. 737 * 738 * TODO: More gracefully handle failures after vfork 739 */ 740static int 741exec_mach_imgact(struct image_params *imgp) 742{ 743 struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata; 744 proc_t p = vfs_context_proc(imgp->ip_vfs_context); 745 int error = 0; 746 task_t task; 747 task_t new_task = NULL; /* protected by vfexec */ 748 thread_t thread; 749 struct uthread *uthread; 750 vm_map_t old_map = VM_MAP_NULL; 751 vm_map_t map; 752 load_return_t lret; 753 load_result_t load_result; 754 struct _posix_spawnattr *psa = NULL; 755 int spawn = (imgp->ip_flags & IMGPF_SPAWN); 756 int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC); 757 758 /* 759 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference 760 * is a reserved field on the end, so for the most part, we can 761 * treat them as if they were identical. Reverse-endian Mach-O 762 * binaries are recognized but not compatible. 763 */ 764 if ((mach_header->magic == MH_CIGAM) || 765 (mach_header->magic == MH_CIGAM_64)) { 766 error = EBADARCH; 767 goto bad; 768 } 769 770 if ((mach_header->magic != MH_MAGIC) && 771 (mach_header->magic != MH_MAGIC_64)) { 772 error = -1; 773 goto bad; 774 } 775 776 switch (mach_header->filetype) { 777 case MH_DYLIB: 778 case MH_BUNDLE: 779 error = -1; 780 goto bad; 781 } 782 783 if (!imgp->ip_origcputype) { 784 imgp->ip_origcputype = mach_header->cputype; 785 imgp->ip_origcpusubtype = mach_header->cpusubtype; 786 } 787 788 task = current_task(); 789 thread = current_thread(); 790 uthread = get_bsdthread_info(thread); 791 792 if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64) 793 imgp->ip_flags |= IMGPF_IS_64BIT; 794 795 /* If posix_spawn binprefs exist, respect those prefs. */ 796 psa = (struct _posix_spawnattr *) imgp->ip_px_sa; 797 if (psa != NULL && psa->psa_binprefs[0] != 0) { 798 int pr = 0; 799 for (pr = 0; pr < NBINPREFS; pr++) { 800 cpu_type_t pref = psa->psa_binprefs[pr]; 801 if (pref == 0) { 802 /* No suitable arch in the pref list */ 803 error = EBADARCH; 804 goto bad; 805 } 806 807 if (pref == CPU_TYPE_ANY) { 808 /* Jump to regular grading */ 809 goto grade; 810 } 811 812 if (pref == imgp->ip_origcputype) { 813 /* We have a match! */ 814 goto grade; 815 } 816 } 817 error = EBADARCH; 818 goto bad; 819 } 820grade: 821 if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) { 822 error = EBADARCH; 823 goto bad; 824 } 825 826 /* Copy in arguments/environment from the old process */ 827 error = exec_extract_strings(imgp); 828 if (error) 829 goto bad; 830 831 error = exec_add_apple_strings(imgp); 832 if (error) 833 goto bad; 834 835 AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc, 836 imgp->ip_endargv - imgp->ip_startargv); 837 AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc, 838 imgp->ip_endenvv - imgp->ip_endargv); 839 840 /* 841 * We are being called to activate an image subsequent to a vfork() 842 * operation; in this case, we know that our task, thread, and 843 * uthread are actually those of our parent, and our proc, which we 844 * obtained indirectly from the image_params vfs_context_t, is the 845 * new child process. 846 */ 847 if (vfexec || spawn) { 848 if (vfexec) { 849 imgp->ip_new_thread = fork_create_child(task, COALITION_NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT)); 850 if (imgp->ip_new_thread == NULL) { 851 error = ENOMEM; 852 goto bad; 853 } 854 } 855 856 /* reset local idea of thread, uthread, task */ 857 thread = imgp->ip_new_thread; 858 uthread = get_bsdthread_info(thread); 859 task = new_task = get_threadtask(thread); 860 map = get_task_map(task); 861 } else { 862 map = VM_MAP_NULL; 863 } 864 865 /* 866 * We set these flags here; this is OK, since if we fail after 867 * this point, we have already destroyed the parent process anyway. 868 */ 869 task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0); 870 if (imgp->ip_flags & IMGPF_IS_64BIT) { 871 task_set_64bit(task, TRUE); 872 OSBitOrAtomic(P_LP64, &p->p_flag); 873 } else { 874 task_set_64bit(task, FALSE); 875 OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag); 876 } 877 878 /* 879 * Load the Mach-O file. 880 * 881 * NOTE: An error after this point indicates we have potentially 882 * destroyed or overwritten some process state while attempting an 883 * execve() following a vfork(), which is an unrecoverable condition. 884 * We send the new process an immediate SIGKILL to avoid it executing 885 * any instructions in the mutated address space. For true spawns, 886 * this is not the case, and "too late" is still not too late to 887 * return an error code to the parent process. 888 */ 889 890 /* 891 * Actually load the image file we previously decided to load. 892 */ 893 lret = load_machfile(imgp, mach_header, thread, map, &load_result); 894 895 if (lret != LOAD_SUCCESS) { 896 error = load_return_to_errno(lret); 897 goto badtoolate; 898 } 899 900 proc_lock(p); 901 p->p_cputype = imgp->ip_origcputype; 902 p->p_cpusubtype = imgp->ip_origcpusubtype; 903 proc_unlock(p); 904 905 vm_map_set_user_wire_limit(get_task_map(task), p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur); 906 907 /* 908 * Set code-signing flags if this binary is signed, or if parent has 909 * requested them on exec. 910 */ 911 if (load_result.csflags & CS_VALID) { 912 imgp->ip_csflags |= load_result.csflags & 913 (CS_VALID| 914 CS_HARD|CS_KILL|CS_ENFORCEMENT|CS_REQUIRE_LV|CS_DYLD_PLATFORM| 915 CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT); 916 } else { 917 imgp->ip_csflags &= ~CS_VALID; 918 } 919 920 if (p->p_csflags & CS_EXEC_SET_HARD) 921 imgp->ip_csflags |= CS_HARD; 922 if (p->p_csflags & CS_EXEC_SET_KILL) 923 imgp->ip_csflags |= CS_KILL; 924 if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT) 925 imgp->ip_csflags |= CS_ENFORCEMENT; 926 if (p->p_csflags & CS_EXEC_SET_INSTALLER) 927 imgp->ip_csflags |= CS_INSTALLER; 928 929 930 /* 931 * Set up the system reserved areas in the new address space. 932 */ 933 vm_map_exec(get_task_map(task), 934 task, 935 (void *) p->p_fd->fd_rdir, 936 cpu_type()); 937 938 /* 939 * Close file descriptors which specify close-on-exec. 940 */ 941 fdexec(p, psa != NULL ? psa->psa_flags : 0); 942 943 /* 944 * deal with set[ug]id. 945 */ 946 error = exec_handle_sugid(imgp); 947 if (error) { 948 goto badtoolate; 949 } 950 951 /* 952 * deal with voucher on exec-calling thread. 953 */ 954 if (imgp->ip_new_thread == NULL) 955 thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL); 956 957 /* Make sure we won't interrupt ourself signalling a partial process */ 958 if (!vfexec && !spawn && (p->p_lflag & P_LTRACED)) 959 psignal(p, SIGTRAP); 960 961 if (load_result.unixproc && 962 create_unix_stack(get_task_map(task), 963 &load_result, 964 p) != KERN_SUCCESS) { 965 error = load_return_to_errno(LOAD_NOSPACE); 966 goto badtoolate; 967 } 968 969 if (vfexec || spawn) { 970 old_map = vm_map_switch(get_task_map(task)); 971 } 972 973 if (load_result.unixproc) { 974 user_addr_t ap; 975 976 /* 977 * Copy the strings area out into the new process address 978 * space. 979 */ 980 ap = p->user_stack; 981 error = exec_copyout_strings(imgp, &ap); 982 if (error) { 983 if (vfexec || spawn) 984 vm_map_switch(old_map); 985 goto badtoolate; 986 } 987 /* Set the stack */ 988 thread_setuserstack(thread, ap); 989 } 990 991 if (load_result.dynlinker) { 992 uint64_t ap; 993 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4; 994 995 /* Adjust the stack */ 996 ap = thread_adjuserstack(thread, -new_ptr_size); 997 error = copyoutptr(load_result.mach_header, ap, new_ptr_size); 998 999 if (error) { 1000 if (vfexec || spawn) 1001 vm_map_switch(old_map); 1002 goto badtoolate; 1003 } 1004 task_set_dyld_info(task, load_result.all_image_info_addr, 1005 load_result.all_image_info_size); 1006 } 1007 1008 /* Avoid immediate VM faults back into kernel */ 1009 exec_prefault_data(p, imgp, &load_result); 1010 1011 if (vfexec || spawn) { 1012 vm_map_switch(old_map); 1013 } 1014 /* Set the entry point */ 1015 thread_setentrypoint(thread, load_result.entry_point); 1016 1017 /* Stop profiling */ 1018 stopprofclock(p); 1019 1020 /* 1021 * Reset signal state. 1022 */ 1023 execsigs(p, thread); 1024 1025 /* 1026 * need to cancel async IO requests that can be cancelled and wait for those 1027 * already active. MAY BLOCK! 1028 */ 1029 _aio_exec( p ); 1030 1031#if SYSV_SHM 1032 /* FIXME: Till vmspace inherit is fixed: */ 1033 if (!vfexec && p->vm_shm) 1034 shmexec(p); 1035#endif 1036#if SYSV_SEM 1037 /* Clean up the semaphores */ 1038 semexit(p); 1039#endif 1040 1041 /* 1042 * Remember file name for accounting. 1043 */ 1044 p->p_acflag &= ~AFORK; 1045 /* If the translated name isn't NULL, then we want to use 1046 * that translated name as the name we show as the "real" name. 1047 * Otherwise, use the name passed into exec. 1048 */ 1049 if (0 != imgp->ip_p_comm[0]) { 1050 bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm, 1051 sizeof(p->p_comm)); 1052 } else { 1053 if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN) 1054 imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN; 1055 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm, 1056 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen); 1057 p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0'; 1058 } 1059 1060 pal_dbg_set_task_name( p->task ); 1061 1062#if DEVELOPMENT || DEBUG 1063 /* 1064 * Update the pid an proc name for importance base if any 1065 */ 1066 task_importance_update_owner_info(p->task); 1067#endif 1068 1069 memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid)); 1070 1071// <rdar://6598155> dtrace code cleanup needed 1072#if CONFIG_DTRACE 1073 /* 1074 * Invalidate any predicate evaluation already cached for this thread by DTrace. 1075 * That's because we've just stored to p_comm and DTrace refers to that when it 1076 * evaluates the "execname" special variable. uid and gid may have changed as well. 1077 */ 1078 dtrace_set_thread_predcache(current_thread(), 0); 1079 1080 /* 1081 * Free any outstanding lazy dof entries. It is imperative we 1082 * always call dtrace_lazy_dofs_destroy, rather than null check 1083 * and call if !NULL. If we NULL test, during lazy dof faulting 1084 * we can race with the faulting code and proceed from here to 1085 * beyond the helpers cleanup. The lazy dof faulting will then 1086 * install new helpers which no longer belong to this process! 1087 */ 1088 dtrace_lazy_dofs_destroy(p); 1089 1090 1091 /* 1092 * Clean up any DTrace helpers for the process. 1093 */ 1094 if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) { 1095 (*dtrace_helpers_cleanup)(p); 1096 } 1097 1098 /* 1099 * Cleanup the DTrace provider associated with this process. 1100 */ 1101 proc_lock(p); 1102 if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) { 1103 (*dtrace_fasttrap_exec_ptr)(p); 1104 } 1105 proc_unlock(p); 1106#endif 1107 1108 if (kdebug_enable) { 1109 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4; 1110 1111 /* 1112 * Collect the pathname for tracing 1113 */ 1114 kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); 1115 1116 if (vfexec || spawn) { 1117 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE, 1118 p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread)); 1119 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE, 1120 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread)); 1121 } else { 1122 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE, 1123 p->p_pid ,0,0,0,0); 1124 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE, 1125 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0); 1126 } 1127 } 1128 1129 /* 1130 * Ensure the 'translated' and 'affinity' flags are cleared, since we 1131 * no longer run PowerPC binaries. 1132 */ 1133 OSBitAndAtomic(~((uint32_t)(P_TRANSLATED | P_AFFINITY)), &p->p_flag); 1134 1135 /* 1136 * If posix_spawned with the START_SUSPENDED flag, stop the 1137 * process before it runs. 1138 */ 1139 if (imgp->ip_px_sa != NULL) { 1140 psa = (struct _posix_spawnattr *) imgp->ip_px_sa; 1141 if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) { 1142 proc_lock(p); 1143 p->p_stat = SSTOP; 1144 proc_unlock(p); 1145 (void) task_suspend(p->task); 1146 } 1147 } 1148 1149 /* 1150 * mark as execed, wakeup the process that vforked (if any) and tell 1151 * it that it now has its own resources back 1152 */ 1153 OSBitOrAtomic(P_EXEC, &p->p_flag); 1154 proc_resetregister(p); 1155 if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) { 1156 proc_lock(p); 1157 p->p_lflag &= ~P_LPPWAIT; 1158 proc_unlock(p); 1159 wakeup((caddr_t)p->p_pptr); 1160 } 1161 1162 /* 1163 * Pay for our earlier safety; deliver the delayed signals from 1164 * the incomplete vfexec process now that it's complete. 1165 */ 1166 if (vfexec && (p->p_lflag & P_LTRACED)) { 1167 psignal_vfork(p, new_task, thread, SIGTRAP); 1168 } 1169 1170 goto done; 1171 1172badtoolate: 1173 /* Don't allow child process to execute any instructions */ 1174 if (!spawn) { 1175 if (vfexec) { 1176 psignal_vfork(p, new_task, thread, SIGKILL); 1177 } else { 1178 psignal(p, SIGKILL); 1179 } 1180 1181 /* We can't stop this system call at this point, so just pretend we succeeded */ 1182 error = 0; 1183 } 1184 1185done: 1186 if (!spawn) { 1187 /* notify only if it has not failed due to FP Key error */ 1188 if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0) 1189 proc_knote(p, NOTE_EXEC); 1190 } 1191 1192 /* Drop extra references for cases where we don't expect the caller to clean up */ 1193 if (vfexec || (spawn && error == 0)) { 1194 task_deallocate(new_task); 1195 thread_deallocate(thread); 1196 } 1197 1198bad: 1199 return(error); 1200} 1201 1202 1203 1204 1205/* 1206 * Our image activator table; this is the table of the image types we are 1207 * capable of loading. We list them in order of preference to ensure the 1208 * fastest image load speed. 1209 * 1210 * XXX hardcoded, for now; should use linker sets 1211 */ 1212struct execsw { 1213 int (*ex_imgact)(struct image_params *); 1214 const char *ex_name; 1215} execsw[] = { 1216 { exec_mach_imgact, "Mach-o Binary" }, 1217 { exec_fat_imgact, "Fat Binary" }, 1218 { exec_shell_imgact, "Interpreter Script" }, 1219 { NULL, NULL} 1220}; 1221 1222 1223/* 1224 * exec_activate_image 1225 * 1226 * Description: Iterate through the available image activators, and activate 1227 * the image associated with the imgp structure. We start with 1228 * the 1229 * 1230 * Parameters: struct image_params * Image parameter block 1231 * 1232 * Returns: 0 Success 1233 * EBADEXEC The executable is corrupt/unknown 1234 * execargs_alloc:EINVAL Invalid argument 1235 * execargs_alloc:EACCES Permission denied 1236 * execargs_alloc:EINTR Interrupted function 1237 * execargs_alloc:ENOMEM Not enough space 1238 * exec_save_path:EFAULT Bad address 1239 * exec_save_path:ENAMETOOLONG Filename too long 1240 * exec_check_permissions:EACCES Permission denied 1241 * exec_check_permissions:ENOEXEC Executable file format error 1242 * exec_check_permissions:ETXTBSY Text file busy [misuse of error code] 1243 * exec_check_permissions:??? 1244 * namei:??? 1245 * vn_rdwr:??? [anything vn_rdwr can return] 1246 * <ex_imgact>:??? [anything an imgact can return] 1247 */ 1248static int 1249exec_activate_image(struct image_params *imgp) 1250{ 1251 struct nameidata *ndp = NULL; 1252 int error; 1253 int resid; 1254 int once = 1; /* save SGUID-ness for interpreted files */ 1255 int i; 1256 int iterlimit = EAI_ITERLIMIT; 1257 proc_t p = vfs_context_proc(imgp->ip_vfs_context); 1258 1259 error = execargs_alloc(imgp); 1260 if (error) 1261 goto bad_notrans; 1262 1263 error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg); 1264 if (error) { 1265 goto bad_notrans; 1266 } 1267 1268 /* Use imgp->ip_strings, which contains the copyin-ed exec path */ 1269 DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings); 1270 1271 MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO); 1272 if (ndp == NULL) { 1273 error = ENOMEM; 1274 goto bad_notrans; 1275 } 1276 1277 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, 1278 UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context); 1279 1280again: 1281 error = namei(ndp); 1282 if (error) 1283 goto bad_notrans; 1284 imgp->ip_ndp = ndp; /* successful namei(); call nameidone() later */ 1285 imgp->ip_vp = ndp->ni_vp; /* if set, need to vnode_put() at some point */ 1286 1287 /* 1288 * Before we start the transition from binary A to binary B, make 1289 * sure another thread hasn't started exiting the process. We grab 1290 * the proc lock to check p_lflag initially, and the transition 1291 * mechanism ensures that the value doesn't change after we release 1292 * the lock. 1293 */ 1294 proc_lock(p); 1295 if (p->p_lflag & P_LEXIT) { 1296 proc_unlock(p); 1297 goto bad_notrans; 1298 } 1299 error = proc_transstart(p, 1, 0); 1300 proc_unlock(p); 1301 if (error) 1302 goto bad_notrans; 1303 1304 error = exec_check_permissions(imgp); 1305 if (error) 1306 goto bad; 1307 1308 /* Copy; avoid invocation of an interpreter overwriting the original */ 1309 if (once) { 1310 once = 0; 1311 *imgp->ip_origvattr = *imgp->ip_vattr; 1312 } 1313 1314 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0, 1315 UIO_SYSSPACE, IO_NODELOCKED, 1316 vfs_context_ucred(imgp->ip_vfs_context), 1317 &resid, vfs_context_proc(imgp->ip_vfs_context)); 1318 if (error) 1319 goto bad; 1320 1321encapsulated_binary: 1322 /* Limit the number of iterations we will attempt on each binary */ 1323 if (--iterlimit == 0) { 1324 error = EBADEXEC; 1325 goto bad; 1326 } 1327 error = -1; 1328 for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) { 1329 1330 error = (*execsw[i].ex_imgact)(imgp); 1331 1332 switch (error) { 1333 /* case -1: not claimed: continue */ 1334 case -2: /* Encapsulated binary */ 1335 goto encapsulated_binary; 1336 1337 case -3: /* Interpreter */ 1338#if CONFIG_MACF 1339 /* 1340 * Copy the script label for later use. Note that 1341 * the label can be different when the script is 1342 * actually read by the interpreter. 1343 */ 1344 if (imgp->ip_scriptlabelp) 1345 mac_vnode_label_free(imgp->ip_scriptlabelp); 1346 imgp->ip_scriptlabelp = mac_vnode_label_alloc(); 1347 if (imgp->ip_scriptlabelp == NULL) { 1348 error = ENOMEM; 1349 break; 1350 } 1351 mac_vnode_label_copy(imgp->ip_vp->v_label, 1352 imgp->ip_scriptlabelp); 1353 1354 /* 1355 * Take a ref of the script vnode for later use. 1356 */ 1357 if (imgp->ip_scriptvp) 1358 vnode_put(imgp->ip_scriptvp); 1359 if (vnode_getwithref(imgp->ip_vp) == 0) 1360 imgp->ip_scriptvp = imgp->ip_vp; 1361#endif 1362 1363 nameidone(ndp); 1364 1365 vnode_put(imgp->ip_vp); 1366 imgp->ip_vp = NULL; /* already put */ 1367 imgp->ip_ndp = NULL; /* already nameidone */ 1368 1369 /* Use imgp->ip_strings, which exec_shell_imgact reset to the interpreter */ 1370 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, 1371 UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context); 1372 1373 proc_transend(p, 0); 1374 goto again; 1375 1376 default: 1377 break; 1378 } 1379 } 1380 1381 /* 1382 * Call out to allow 3rd party notification of exec. 1383 * Ignore result of kauth_authorize_fileop call. 1384 */ 1385 if (error == 0 && kauth_authorize_fileop_has_listeners()) { 1386 kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context), 1387 KAUTH_FILEOP_EXEC, 1388 (uintptr_t)ndp->ni_vp, 0); 1389 } 1390 1391bad: 1392 proc_transend(p, 0); 1393 1394bad_notrans: 1395 if (imgp->ip_strings) 1396 execargs_free(imgp); 1397 if (imgp->ip_ndp) 1398 nameidone(imgp->ip_ndp); 1399 if (ndp) 1400 FREE(ndp, M_TEMP); 1401 1402 return (error); 1403} 1404 1405 1406/* 1407 * exec_handle_spawnattr_policy 1408 * 1409 * Description: Decode and apply the posix_spawn apptype, qos clamp, and watchport ports to the task. 1410 * 1411 * Parameters: proc_t p process to apply attributes to 1412 * int psa_apptype posix spawn attribute apptype 1413 * 1414 * Returns: 0 Success 1415 */ 1416static errno_t 1417exec_handle_spawnattr_policy(proc_t p, int psa_apptype, uint64_t psa_qos_clamp, 1418 ipc_port_t * portwatch_ports, int portwatch_count) 1419{ 1420 int apptype = TASK_APPTYPE_NONE; 1421 int qos_clamp = THREAD_QOS_UNSPECIFIED; 1422 1423 if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) { 1424 int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK; 1425 1426 switch(proctype) { 1427 case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE: 1428 apptype = TASK_APPTYPE_DAEMON_INTERACTIVE; 1429 break; 1430 case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD: 1431 apptype = TASK_APPTYPE_DAEMON_STANDARD; 1432 break; 1433 case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE: 1434 apptype = TASK_APPTYPE_DAEMON_ADAPTIVE; 1435 break; 1436 case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND: 1437 apptype = TASK_APPTYPE_DAEMON_BACKGROUND; 1438 break; 1439 case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT: 1440 apptype = TASK_APPTYPE_APP_DEFAULT; 1441 break; 1442 case POSIX_SPAWN_PROC_TYPE_APP_TAL: 1443 apptype = TASK_APPTYPE_APP_TAL; 1444 break; 1445 default: 1446 apptype = TASK_APPTYPE_NONE; 1447 /* TODO: Should an invalid value here fail the spawn? */ 1448 break; 1449 } 1450 } 1451 1452 if (psa_qos_clamp != POSIX_SPAWN_PROC_CLAMP_NONE) { 1453 switch (psa_qos_clamp) { 1454 case POSIX_SPAWN_PROC_CLAMP_UTILITY: 1455 qos_clamp = THREAD_QOS_UTILITY; 1456 break; 1457 case POSIX_SPAWN_PROC_CLAMP_BACKGROUND: 1458 qos_clamp = THREAD_QOS_BACKGROUND; 1459 break; 1460 case POSIX_SPAWN_PROC_CLAMP_MAINTENANCE: 1461 qos_clamp = THREAD_QOS_MAINTENANCE; 1462 break; 1463 default: 1464 qos_clamp = THREAD_QOS_UNSPECIFIED; 1465 /* TODO: Should an invalid value here fail the spawn? */ 1466 break; 1467 } 1468 } 1469 1470 if (psa_apptype != TASK_APPTYPE_NONE || qos_clamp != THREAD_QOS_UNSPECIFIED) { 1471 proc_set_task_spawnpolicy(p->task, apptype, qos_clamp, 1472 portwatch_ports, portwatch_count); 1473 } 1474 1475 return (0); 1476} 1477 1478 1479/* 1480 * exec_handle_port_actions 1481 * 1482 * Description: Go through the _posix_port_actions_t contents, 1483 * calling task_set_special_port, task_set_exception_ports 1484 * and/or audit_session_spawnjoin for the current task. 1485 * 1486 * Parameters: struct image_params * Image parameter block 1487 * short psa_flags posix spawn attribute flags 1488 * 1489 * Returns: 0 Success 1490 * EINVAL Failure 1491 * ENOTSUP Illegal posix_spawn attr flag was set 1492 */ 1493static errno_t 1494exec_handle_port_actions(struct image_params *imgp, short psa_flags, boolean_t * portwatch_present, ipc_port_t * portwatch_ports) 1495{ 1496 _posix_spawn_port_actions_t pacts = imgp->ip_px_spa; 1497 proc_t p = vfs_context_proc(imgp->ip_vfs_context); 1498 _ps_port_action_t *act = NULL; 1499 task_t task = p->task; 1500 ipc_port_t port = NULL; 1501 errno_t ret = 0; 1502 int i; 1503 1504 *portwatch_present = FALSE; 1505 1506 for (i = 0; i < pacts->pspa_count; i++) { 1507 act = &pacts->pspa_actions[i]; 1508 1509 if (ipc_object_copyin(get_task_ipcspace(current_task()), 1510 act->new_port, MACH_MSG_TYPE_COPY_SEND, 1511 (ipc_object_t *) &port) != KERN_SUCCESS) { 1512 ret = EINVAL; 1513 goto done; 1514 } 1515 1516 switch (act->port_type) { 1517 case PSPA_SPECIAL: 1518 /* Only allowed when not under vfork */ 1519 if (!(psa_flags & POSIX_SPAWN_SETEXEC)) 1520 ret = ENOTSUP; 1521 else if (task_set_special_port(task, 1522 act->which, port) != KERN_SUCCESS) 1523 ret = EINVAL; 1524 break; 1525 1526 case PSPA_EXCEPTION: 1527 /* Only allowed when not under vfork */ 1528 if (!(psa_flags & POSIX_SPAWN_SETEXEC)) 1529 ret = ENOTSUP; 1530 else if (task_set_exception_ports(task, 1531 act->mask, port, act->behavior, 1532 act->flavor) != KERN_SUCCESS) 1533 ret = EINVAL; 1534 break; 1535#if CONFIG_AUDIT 1536 case PSPA_AU_SESSION: 1537 ret = audit_session_spawnjoin(p, port); 1538 break; 1539#endif 1540 case PSPA_IMP_WATCHPORTS: 1541 if (portwatch_ports != NULL) { 1542 *portwatch_present = TRUE; 1543 /* hold on to this till end of spawn */ 1544 portwatch_ports[i] = port; 1545 ret = 0; 1546 } else 1547 ipc_port_release_send(port); 1548 break; 1549 default: 1550 ret = EINVAL; 1551 break; 1552 } 1553 1554 /* action failed, so release port resources */ 1555 1556 if (ret) { 1557 ipc_port_release_send(port); 1558 break; 1559 } 1560 } 1561 1562done: 1563 if (0 != ret) 1564 DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port); 1565 return (ret); 1566} 1567 1568/* 1569 * exec_handle_file_actions 1570 * 1571 * Description: Go through the _posix_file_actions_t contents applying the 1572 * open, close, and dup2 operations to the open file table for 1573 * the current process. 1574 * 1575 * Parameters: struct image_params * Image parameter block 1576 * 1577 * Returns: 0 Success 1578 * ??? 1579 * 1580 * Note: Actions are applied in the order specified, with the credential 1581 * of the parent process. This is done to permit the parent 1582 * process to utilize POSIX_SPAWN_RESETIDS to drop privilege in 1583 * the child following operations the child may in fact not be 1584 * normally permitted to perform. 1585 */ 1586static int 1587exec_handle_file_actions(struct image_params *imgp, short psa_flags) 1588{ 1589 int error = 0; 1590 int action; 1591 proc_t p = vfs_context_proc(imgp->ip_vfs_context); 1592 _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa; 1593 int ival[2]; /* dummy retval for system calls) */ 1594 1595 for (action = 0; action < px_sfap->psfa_act_count; action++) { 1596 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[ action]; 1597 1598 switch(psfa->psfaa_type) { 1599 case PSFA_OPEN: { 1600 /* 1601 * Open is different, in that it requires the use of 1602 * a path argument, which is normally copied in from 1603 * user space; because of this, we have to support an 1604 * open from kernel space that passes an address space 1605 * context of UIO_SYSSPACE, and casts the address 1606 * argument to a user_addr_t. 1607 */ 1608 char *bufp = NULL; 1609 struct vnode_attr *vap; 1610 struct nameidata *ndp; 1611 int mode = psfa->psfaa_openargs.psfao_mode; 1612 struct dup2_args dup2a; 1613 struct close_nocancel_args ca; 1614 int origfd; 1615 1616 MALLOC(bufp, char *, sizeof(*vap) + sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO); 1617 if (bufp == NULL) { 1618 error = ENOMEM; 1619 break; 1620 } 1621 1622 vap = (struct vnode_attr *) bufp; 1623 ndp = (struct nameidata *) (bufp + sizeof(*vap)); 1624 1625 VATTR_INIT(vap); 1626 /* Mask off all but regular access permissions */ 1627 mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1628 VATTR_SET(vap, va_mode, mode & ACCESSPERMS); 1629 1630 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE, 1631 CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path), 1632 imgp->ip_vfs_context); 1633 1634 error = open1(imgp->ip_vfs_context, 1635 ndp, 1636 psfa->psfaa_openargs.psfao_oflag, 1637 vap, 1638 fileproc_alloc_init, NULL, 1639 ival); 1640 1641 FREE(bufp, M_TEMP); 1642 1643 /* 1644 * If there's an error, or we get the right fd by 1645 * accident, then drop out here. This is easier than 1646 * reworking all the open code to preallocate fd 1647 * slots, and internally taking one as an argument. 1648 */ 1649 if (error || ival[0] == psfa->psfaa_filedes) 1650 break; 1651 1652 origfd = ival[0]; 1653 /* 1654 * If we didn't fall out from an error, we ended up 1655 * with the wrong fd; so now we've got to try to dup2 1656 * it to the right one. 1657 */ 1658 dup2a.from = origfd; 1659 dup2a.to = psfa->psfaa_filedes; 1660 1661 /* 1662 * The dup2() system call implementation sets 1663 * ival to newfd in the success case, but we 1664 * can ignore that, since if we didn't get the 1665 * fd we wanted, the error will stop us. 1666 */ 1667 error = dup2(p, &dup2a, ival); 1668 if (error) 1669 break; 1670 1671 /* 1672 * Finally, close the original fd. 1673 */ 1674 ca.fd = origfd; 1675 1676 error = close_nocancel(p, &ca, ival); 1677 } 1678 break; 1679 1680 case PSFA_DUP2: { 1681 struct dup2_args dup2a; 1682 1683 dup2a.from = psfa->psfaa_filedes; 1684 dup2a.to = psfa->psfaa_openargs.psfao_oflag; 1685 1686 /* 1687 * The dup2() system call implementation sets 1688 * ival to newfd in the success case, but we 1689 * can ignore that, since if we didn't get the 1690 * fd we wanted, the error will stop us. 1691 */ 1692 error = dup2(p, &dup2a, ival); 1693 } 1694 break; 1695 1696 case PSFA_CLOSE: { 1697 struct close_nocancel_args ca; 1698 1699 ca.fd = psfa->psfaa_filedes; 1700 1701 error = close_nocancel(p, &ca, ival); 1702 } 1703 break; 1704 1705 case PSFA_INHERIT: { 1706 struct fcntl_nocancel_args fcntla; 1707 1708 /* 1709 * Check to see if the descriptor exists, and 1710 * ensure it's -not- marked as close-on-exec. 1711 * 1712 * Attempting to "inherit" a guarded fd will 1713 * result in a error. 1714 */ 1715 fcntla.fd = psfa->psfaa_filedes; 1716 fcntla.cmd = F_GETFD; 1717 if ((error = fcntl_nocancel(p, &fcntla, ival)) != 0) 1718 break; 1719 1720 if ((ival[0] & FD_CLOEXEC) == FD_CLOEXEC) { 1721 fcntla.fd = psfa->psfaa_filedes; 1722 fcntla.cmd = F_SETFD; 1723 fcntla.arg = ival[0] & ~FD_CLOEXEC; 1724 error = fcntl_nocancel(p, &fcntla, ival); 1725 } 1726 1727 } 1728 break; 1729 1730 default: 1731 error = EINVAL; 1732 break; 1733 } 1734 1735 /* All file actions failures are considered fatal, per POSIX */ 1736 1737 if (error) { 1738 if (PSFA_OPEN == psfa->psfaa_type) { 1739 DTRACE_PROC1(spawn__open__failure, uintptr_t, 1740 psfa->psfaa_openargs.psfao_path); 1741 } else { 1742 DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes); 1743 } 1744 break; 1745 } 1746 } 1747 1748 if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0) 1749 return (error); 1750 1751 /* 1752 * If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during 1753 * this spawn only) as if "close on exec" is the default 1754 * disposition of all pre-existing file descriptors. In this case, 1755 * the list of file descriptors mentioned in the file actions 1756 * are the only ones that can be inherited, so mark them now. 1757 * 1758 * The actual closing part comes later, in fdexec(). 1759 */ 1760 proc_fdlock(p); 1761 for (action = 0; action < px_sfap->psfa_act_count; action++) { 1762 _psfa_action_t *psfa = &px_sfap->psfa_act_acts[action]; 1763 int fd = psfa->psfaa_filedes; 1764 1765 switch (psfa->psfaa_type) { 1766 case PSFA_DUP2: 1767 fd = psfa->psfaa_openargs.psfao_oflag; 1768 /*FALLTHROUGH*/ 1769 case PSFA_OPEN: 1770 case PSFA_INHERIT: 1771 *fdflags(p, fd) |= UF_INHERIT; 1772 break; 1773 1774 case PSFA_CLOSE: 1775 break; 1776 } 1777 } 1778 proc_fdunlock(p); 1779 1780 return (0); 1781} 1782 1783#if CONFIG_MACF 1784/* 1785 * exec_spawnattr_getmacpolicyinfo 1786 */ 1787void * 1788exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp) 1789{ 1790 const struct _posix_spawn_mac_policy_extensions *psmx = macextensions; 1791 int i; 1792 1793 if (psmx == NULL) 1794 return NULL; 1795 1796 for (i = 0; i < psmx->psmx_count; i++) { 1797 const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i]; 1798 if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == 0) { 1799 if (lenp != NULL) 1800 *lenp = extension->datalen; 1801 return extension->datap; 1802 } 1803 } 1804 1805 if (lenp != NULL) 1806 *lenp = 0; 1807 return NULL; 1808} 1809 1810static int 1811spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args, _posix_spawn_mac_policy_extensions_t *psmxp) 1812{ 1813 _posix_spawn_mac_policy_extensions_t psmx = NULL; 1814 int error = 0; 1815 int copycnt = 0; 1816 int i = 0; 1817 1818 *psmxp = NULL; 1819 1820 if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) || 1821 px_args->mac_extensions_size > PAGE_SIZE) { 1822 error = EINVAL; 1823 goto bad; 1824 } 1825 1826 MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK); 1827 if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != 0) 1828 goto bad; 1829 1830 if (PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count) > px_args->mac_extensions_size) { 1831 error = EINVAL; 1832 goto bad; 1833 } 1834 1835 for (i = 0; i < psmx->psmx_count; i++) { 1836 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i]; 1837 if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) { 1838 error = EINVAL; 1839 goto bad; 1840 } 1841 } 1842 1843 for (copycnt = 0; copycnt < psmx->psmx_count; copycnt++) { 1844 _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[copycnt]; 1845 void *data = NULL; 1846 1847 MALLOC(data, void *, extension->datalen, M_TEMP, M_WAITOK); 1848 if ((error = copyin(extension->data, data, extension->datalen)) != 0) { 1849 FREE(data, M_TEMP); 1850 goto bad; 1851 } 1852 extension->datap = data; 1853 } 1854 1855 *psmxp = psmx; 1856 return 0; 1857 1858bad: 1859 if (psmx != NULL) { 1860 for (i = 0; i < copycnt; i++) 1861 FREE(psmx->psmx_extensions[i].datap, M_TEMP); 1862 FREE(psmx, M_TEMP); 1863 } 1864 return error; 1865} 1866 1867static void 1868spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx) 1869{ 1870 int i; 1871 1872 if (psmx == NULL) 1873 return; 1874 for (i = 0; i < psmx->psmx_count; i++) 1875 FREE(psmx->psmx_extensions[i].datap, M_TEMP); 1876 FREE(psmx, M_TEMP); 1877} 1878#endif /* CONFIG_MACF */ 1879 1880/* 1881 * posix_spawn 1882 * 1883 * Parameters: uap->pid Pointer to pid return area 1884 * uap->fname File name to exec 1885 * uap->argp Argument list 1886 * uap->envp Environment list 1887 * 1888 * Returns: 0 Success 1889 * EINVAL Invalid argument 1890 * ENOTSUP Not supported 1891 * ENOEXEC Executable file format error 1892 * exec_activate_image:EINVAL Invalid argument 1893 * exec_activate_image:EACCES Permission denied 1894 * exec_activate_image:EINTR Interrupted function 1895 * exec_activate_image:ENOMEM Not enough space 1896 * exec_activate_image:EFAULT Bad address 1897 * exec_activate_image:ENAMETOOLONG Filename too long 1898 * exec_activate_image:ENOEXEC Executable file format error 1899 * exec_activate_image:ETXTBSY Text file busy [misuse of error code] 1900 * exec_activate_image:EBADEXEC The executable is corrupt/unknown 1901 * exec_activate_image:??? 1902 * mac_execve_enter:??? 1903 * 1904 * TODO: Expect to need __mac_posix_spawn() at some point... 1905 * Handle posix_spawnattr_t 1906 * Handle posix_spawn_file_actions_t 1907 */ 1908int 1909posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) 1910{ 1911 proc_t p = ap; /* quiet bogus GCC vfork() warning */ 1912 user_addr_t pid = uap->pid; 1913 int ival[2]; /* dummy retval for setpgid() */ 1914 char *bufp = NULL; 1915 struct image_params *imgp; 1916 struct vnode_attr *vap; 1917 struct vnode_attr *origvap; 1918 struct uthread *uthread = 0; /* compiler complains if not set to 0*/ 1919 int error, sig; 1920 char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */ 1921 int is_64 = IS_64BIT_PROCESS(p); 1922 struct vfs_context context; 1923 struct user__posix_spawn_args_desc px_args; 1924 struct _posix_spawnattr px_sa; 1925 _posix_spawn_file_actions_t px_sfap = NULL; 1926 _posix_spawn_port_actions_t px_spap = NULL; 1927 struct __kern_sigaction vec; 1928 boolean_t spawn_no_exec = FALSE; 1929 boolean_t proc_transit_set = TRUE; 1930 boolean_t exec_done = FALSE; 1931 int portwatch_count = 0; 1932 ipc_port_t * portwatch_ports = NULL; 1933 vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports); 1934 1935 /* 1936 * Allocate a big chunk for locals instead of using stack since these 1937 * structures are pretty big. 1938 */ 1939 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO); 1940 imgp = (struct image_params *) bufp; 1941 if (bufp == NULL) { 1942 error = ENOMEM; 1943 goto bad; 1944 } 1945 vap = (struct vnode_attr *) (bufp + sizeof(*imgp)); 1946 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap)); 1947 1948 /* Initialize the common data in the image_params structure */ 1949 imgp->ip_user_fname = uap->path; 1950 imgp->ip_user_argv = uap->argv; 1951 imgp->ip_user_envv = uap->envp; 1952 imgp->ip_vattr = vap; 1953 imgp->ip_origvattr = origvap; 1954 imgp->ip_vfs_context = &context; 1955 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE); 1956 imgp->ip_p_comm = alt_p_comm; /* for PowerPC */ 1957 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32); 1958 imgp->ip_mac_return = 0; 1959 1960 if (uap->adesc != USER_ADDR_NULL) { 1961 if(is_64) { 1962 error = copyin(uap->adesc, &px_args, sizeof(px_args)); 1963 } else { 1964 struct user32__posix_spawn_args_desc px_args32; 1965 1966 error = copyin(uap->adesc, &px_args32, sizeof(px_args32)); 1967 1968 /* 1969 * Convert arguments descriptor from external 32 bit 1970 * representation to internal 64 bit representation 1971 */ 1972 px_args.attr_size = px_args32.attr_size; 1973 px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp); 1974 px_args.file_actions_size = px_args32.file_actions_size; 1975 px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions); 1976 px_args.port_actions_size = px_args32.port_actions_size; 1977 px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions); 1978 px_args.mac_extensions_size = px_args32.mac_extensions_size; 1979 px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions); 1980 } 1981 if (error) 1982 goto bad; 1983 1984 if (px_args.attr_size != 0) { 1985 /* 1986 * We are not copying the port_actions pointer, 1987 * because we already have it from px_args. 1988 * This is a bit fragile: <rdar://problem/16427422> 1989 */ 1990 1991 if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset) != 0)) 1992 goto bad; 1993 1994 bzero( (void *)( (unsigned long) &px_sa + px_sa_offset), sizeof(px_sa) - px_sa_offset ); 1995 1996 imgp->ip_px_sa = &px_sa; 1997 } 1998 if (px_args.file_actions_size != 0) { 1999 /* Limit file_actions to allowed number of open files */ 2000 int maxfa = (p->p_limit ? p->p_rlimit[RLIMIT_NOFILE].rlim_cur : NOFILE); 2001 if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) || 2002 px_args.file_actions_size > PSF_ACTIONS_SIZE(maxfa)) { 2003 error = EINVAL; 2004 goto bad; 2005 } 2006 MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK); 2007 if (px_sfap == NULL) { 2008 error = ENOMEM; 2009 goto bad; 2010 } 2011 imgp->ip_px_sfa = px_sfap; 2012 2013 if ((error = copyin(px_args.file_actions, px_sfap, 2014 px_args.file_actions_size)) != 0) 2015 goto bad; 2016 2017 /* Verify that the action count matches the struct size */ 2018 if (PSF_ACTIONS_SIZE(px_sfap->psfa_act_count) != px_args.file_actions_size) { 2019 error = EINVAL; 2020 goto bad; 2021 } 2022 } 2023 if (px_args.port_actions_size != 0) { 2024 /* Limit port_actions to one page of data */ 2025 if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) || 2026 px_args.port_actions_size > PAGE_SIZE) { 2027 error = EINVAL; 2028 goto bad; 2029 } 2030 2031 MALLOC(px_spap, _posix_spawn_port_actions_t, 2032 px_args.port_actions_size, M_TEMP, M_WAITOK); 2033 if (px_spap == NULL) { 2034 error = ENOMEM; 2035 goto bad; 2036 } 2037 imgp->ip_px_spa = px_spap; 2038 2039 if ((error = copyin(px_args.port_actions, px_spap, 2040 px_args.port_actions_size)) != 0) 2041 goto bad; 2042 2043 /* Verify that the action count matches the struct size */ 2044 if (PS_PORT_ACTIONS_SIZE(px_spap->pspa_count) != px_args.port_actions_size) { 2045 error = EINVAL; 2046 goto bad; 2047 } 2048 } 2049#if CONFIG_MACF 2050 if (px_args.mac_extensions_size != 0) { 2051 if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0) 2052 goto bad; 2053 } 2054#endif /* CONFIG_MACF */ 2055 } 2056 2057 /* set uthread to parent */ 2058 uthread = get_bsdthread_info(current_thread()); 2059 2060 /* 2061 * <rdar://6640530>; this does not result in a behaviour change 2062 * relative to Leopard, so there should not be any existing code 2063 * which depends on it. 2064 */ 2065 if (uthread->uu_flag & UT_VFORK) { 2066 error = EINVAL; 2067 goto bad; 2068 } 2069 2070 /* 2071 * If we don't have the extension flag that turns "posix_spawn()" 2072 * into "execve() with options", then we will be creating a new 2073 * process which does not inherit memory from the parent process, 2074 * which is one of the most expensive things about using fork() 2075 * and execve(). 2076 */ 2077 if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){ 2078 2079 /* 2080 * Set the new task's coalition, if it is requested. 2081 * TODO: privilege check - 15365900 2082 */ 2083 coalition_t coal = COALITION_NULL; 2084#if CONFIG_COALITIONS 2085 if (imgp->ip_px_sa) { 2086 uint64_t cid = px_sa.psa_coalitionid; 2087 if (cid != 0) { 2088#if COALITION_DEBUG 2089 printf("%s: searching for coalition ID %llu\n", __func__, cid); 2090#endif 2091 coal = coalition_find_and_activate_by_id(cid); 2092 if (coal == COALITION_NULL) { 2093#if COALITION_DEBUG 2094 printf("%s: could not find coalition ID %llu (perhaps it has been terminated or reaped)\n", __func__, cid); 2095#endif 2096 error = ESRCH; 2097 goto bad; 2098 } 2099 } 2100 } 2101#endif /* CONFIG_COALITIONS */ 2102 2103 error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN, coal); 2104 2105 if (error != 0) { 2106 if (coal != COALITION_NULL) { 2107#if CONFIG_COALITIONS 2108 coalition_remove_active(coal); 2109 coalition_release(coal); 2110#endif /* CONFIG_COALITIONS */ 2111 } 2112 goto bad; 2113 } 2114 imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */ 2115 spawn_no_exec = TRUE; /* used in later tests */ 2116 2117 if (coal != COALITION_NULL) { 2118#if CONFIG_COALITIONS 2119 coalition_remove_active(coal); 2120 coalition_release(coal); 2121#endif /* CONFIG_COALITIONS */ 2122 } 2123 } 2124 2125 if (spawn_no_exec) { 2126 p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread); 2127 2128 /* 2129 * We had to wait until this point before firing the 2130 * proc:::create probe, otherwise p would not point to the 2131 * child process. 2132 */ 2133 DTRACE_PROC1(create, proc_t, p); 2134 } 2135 assert(p != NULL); 2136 2137 /* By default, the thread everyone plays with is the parent */ 2138 context.vc_thread = current_thread(); 2139 context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */ 2140 2141 /* 2142 * However, if we're not in the setexec case, redirect the context 2143 * to the newly created process instead 2144 */ 2145 if (spawn_no_exec) 2146 context.vc_thread = imgp->ip_new_thread; 2147 2148 /* 2149 * Post fdcopy(), pre exec_handle_sugid() - this is where we want 2150 * to handle the file_actions. Since vfork() also ends up setting 2151 * us into the parent process group, and saved off the signal flags, 2152 * this is also where we want to handle the spawn flags. 2153 */ 2154 2155 /* Has spawn file actions? */ 2156 if (imgp->ip_px_sfa != NULL) { 2157 /* 2158 * The POSIX_SPAWN_CLOEXEC_DEFAULT flag 2159 * is handled in exec_handle_file_actions(). 2160 */ 2161 if ((error = exec_handle_file_actions(imgp, 2162 imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0) 2163 goto bad; 2164 } 2165 2166 /* Has spawn port actions? */ 2167 if (imgp->ip_px_spa != NULL) { 2168 boolean_t is_adaptive = FALSE; 2169 boolean_t portwatch_present = FALSE; 2170 2171 /* Will this process become adaptive? The apptype isn't ready yet, so we can't look there. */ 2172 if (imgp->ip_px_sa != NULL && px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE) 2173 is_adaptive = TRUE; 2174 2175 /* 2176 * portwatch only: 2177 * Allocate a place to store the ports we want to bind to the new task 2178 * We can't bind them until after the apptype is set. 2179 */ 2180 if (px_spap->pspa_count != 0 && is_adaptive) { 2181 portwatch_count = px_spap->pspa_count; 2182 MALLOC(portwatch_ports, ipc_port_t *, (sizeof(ipc_port_t) * portwatch_count), M_TEMP, M_WAITOK | M_ZERO); 2183 } else { 2184 portwatch_ports = NULL; 2185 } 2186 2187 if ((error = exec_handle_port_actions(imgp, 2188 imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0, &portwatch_present, portwatch_ports)) != 0) 2189 goto bad; 2190 2191 if (portwatch_present == FALSE && portwatch_ports != NULL) { 2192 FREE(portwatch_ports, M_TEMP); 2193 portwatch_ports = NULL; 2194 portwatch_count = 0; 2195 } 2196 } 2197 2198 /* Has spawn attr? */ 2199 if (imgp->ip_px_sa != NULL) { 2200 /* 2201 * Set the process group ID of the child process; this has 2202 * to happen before the image activation. 2203 */ 2204 if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) { 2205 struct setpgid_args spga; 2206 spga.pid = p->p_pid; 2207 spga.pgid = px_sa.psa_pgroup; 2208 /* 2209 * Effectively, call setpgid() system call; works 2210 * because there are no pointer arguments. 2211 */ 2212 if((error = setpgid(p, &spga, ival)) != 0) 2213 goto bad; 2214 } 2215 2216 /* 2217 * Reset UID/GID to parent's RUID/RGID; This works only 2218 * because the operation occurs *after* the vfork() and 2219 * before the call to exec_handle_sugid() by the image 2220 * activator called from exec_activate_image(). POSIX 2221 * requires that any setuid/setgid bits on the process 2222 * image will take precedence over the spawn attributes 2223 * (re)setting them. 2224 * 2225 * The use of p_ucred is safe, since we are acting on the 2226 * new process, and it has no threads other than the one 2227 * we are creating for it. 2228 */ 2229 if (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) { 2230 kauth_cred_t my_cred = p->p_ucred; 2231 kauth_cred_t my_new_cred = kauth_cred_setuidgid(my_cred, kauth_cred_getruid(my_cred), kauth_cred_getrgid(my_cred)); 2232 if (my_new_cred != my_cred) { 2233 p->p_ucred = my_new_cred; 2234 /* update cred on proc */ 2235 PROC_UPDATE_CREDS_ONPROC(p); 2236 } 2237 } 2238 2239 /* 2240 * Disable ASLR for the spawned process. 2241 */ 2242 /* 2243 * But only do so if we are not embedded; embedded allows for a 2244 * boot-arg (-disable_aslr) to deal with this (which itself is 2245 * only honored on DEVELOPMENT or DEBUG builds of xnu). 2246 */ 2247 if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR) 2248 OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag); 2249 2250 /* 2251 * Forcibly disallow execution from data pages for the spawned process 2252 * even if it would otherwise be permitted by the architecture default. 2253 */ 2254 if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC) 2255 imgp->ip_flags |= IMGPF_ALLOW_DATA_EXEC; 2256 } 2257 2258 /* 2259 * Disable ASLR during image activation. This occurs either if the 2260 * _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if 2261 * P_DISABLE_ASLR was inherited from the parent process. 2262 */ 2263 if (p->p_flag & P_DISABLE_ASLR) 2264 imgp->ip_flags |= IMGPF_DISABLE_ASLR; 2265 2266 /* 2267 * Clear transition flag so we won't hang if exec_activate_image() causes 2268 * an automount (and launchd does a proc sysctl to service it). 2269 * 2270 * <rdar://problem/6848672>, <rdar://problem/5959568>. 2271 */ 2272 if (spawn_no_exec) { 2273 proc_transend(p, 0); 2274 proc_transit_set = 0; 2275 } 2276 2277#if MAC_SPAWN /* XXX */ 2278 if (uap->mac_p != USER_ADDR_NULL) { 2279 error = mac_execve_enter(uap->mac_p, imgp); 2280 if (error) 2281 goto bad; 2282 } 2283#endif 2284 2285 /* 2286 * Activate the image 2287 */ 2288 error = exec_activate_image(imgp); 2289 2290 if (error == 0) { 2291 /* process completed the exec */ 2292 exec_done = TRUE; 2293 } else if (error == -1) { 2294 /* Image not claimed by any activator? */ 2295 error = ENOEXEC; 2296 } 2297 2298 /* 2299 * If we have a spawn attr, and it contains signal related flags, 2300 * the we need to process them in the "context" of the new child 2301 * process, so we have to process it following image activation, 2302 * prior to making the thread runnable in user space. This is 2303 * necessitated by some signal information being per-thread rather 2304 * than per-process, and we don't have the new allocation in hand 2305 * until after the image is activated. 2306 */ 2307 if (!error && imgp->ip_px_sa != NULL) { 2308 thread_t child_thread = current_thread(); 2309 uthread_t child_uthread = uthread; 2310 2311 /* 2312 * If we created a new child thread, then the thread and 2313 * uthread are different than the current ones; otherwise, 2314 * we leave them, since we are in the exec case instead. 2315 */ 2316 if (spawn_no_exec) { 2317 child_thread = imgp->ip_new_thread; 2318 child_uthread = get_bsdthread_info(child_thread); 2319 } 2320 2321 /* 2322 * Mask a list of signals, instead of them being unmasked, if 2323 * they were unmasked in the parent; note that some signals 2324 * are not maskable. 2325 */ 2326 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK) 2327 child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask); 2328 /* 2329 * Default a list of signals instead of ignoring them, if 2330 * they were ignored in the parent. Note that we pass 2331 * spawn_no_exec to setsigvec() to indicate that we called 2332 * fork1() and therefore do not need to call proc_signalstart() 2333 * internally. 2334 */ 2335 if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) { 2336 vec.sa_handler = SIG_DFL; 2337 vec.sa_tramp = 0; 2338 vec.sa_mask = 0; 2339 vec.sa_flags = 0; 2340 for (sig = 0; sig < NSIG; sig++) 2341 if (px_sa.psa_sigdefault & (1 << sig)) { 2342 error = setsigvec(p, child_thread, sig + 1, &vec, spawn_no_exec); 2343 } 2344 } 2345 2346 /* 2347 * Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU 2348 * usage limit, which will generate a resource exceeded exception if any one thread exceeds the 2349 * limit. 2350 * 2351 * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds. 2352 */ 2353 if (px_sa.psa_cpumonitor_percent != 0) { 2354 /* 2355 * Always treat a CPU monitor activation coming from spawn as entitled. Requiring 2356 * an entitlement to configure the monitor a certain way seems silly, since 2357 * whomever is turning it on could just as easily choose not to do so. 2358 * 2359 * XXX - Ignore the parameters that we get from userland. The spawnattr method of 2360 * activating the monitor always gets the system default parameters. Once we have 2361 * an explicit spawn SPI for configuring the defaults, we can revert this to 2362 * respect the params passed in from userland. 2363 */ 2364 error = proc_set_task_ruse_cpu(p->task, 2365 TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC, 2366 PROC_POLICY_CPUMON_DEFAULTS, 0, 2367 0, TRUE); 2368 } 2369 } 2370 2371bad: 2372 2373 if (error == 0) { 2374 /* reset delay idle sleep status if set */ 2375 if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP) 2376 OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag); 2377 /* upon successful spawn, re/set the proc control state */ 2378 if (imgp->ip_px_sa != NULL) { 2379 switch (px_sa.psa_pcontrol) { 2380 case POSIX_SPAWN_PCONTROL_THROTTLE: 2381 p->p_pcaction = P_PCTHROTTLE; 2382 break; 2383 case POSIX_SPAWN_PCONTROL_SUSPEND: 2384 p->p_pcaction = P_PCSUSP; 2385 break; 2386 case POSIX_SPAWN_PCONTROL_KILL: 2387 p->p_pcaction = P_PCKILL; 2388 break; 2389 case POSIX_SPAWN_PCONTROL_NONE: 2390 default: 2391 p->p_pcaction = 0; 2392 break; 2393 }; 2394 } 2395 exec_resettextvp(p, imgp); 2396 2397#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM 2398 /* Has jetsam attributes? */ 2399 if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) { 2400 memorystatus_update(p, px_sa.psa_priority, 0, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY), 2401 TRUE, px_sa.psa_high_water_mark, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND), 2402 (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_MEMLIMIT_FATAL)); 2403 } 2404#endif 2405 } 2406 2407 /* 2408 * If we successfully called fork1(), we always need to do this; 2409 * we identify this case by noting the IMGPF_SPAWN flag. This is 2410 * because we come back from that call with signals blocked in the 2411 * child, and we have to unblock them, but we want to wait until 2412 * after we've performed any spawn actions. This has to happen 2413 * before check_for_signature(), which uses psignal. 2414 */ 2415 if (spawn_no_exec) { 2416 if (proc_transit_set) 2417 proc_transend(p, 0); 2418 2419 /* 2420 * Drop the signal lock on the child which was taken on our 2421 * behalf by forkproc()/cloneproc() to prevent signals being 2422 * received by the child in a partially constructed state. 2423 */ 2424 proc_signalend(p, 0); 2425 2426 /* flag the 'fork' has occurred */ 2427 proc_knote(p->p_pptr, NOTE_FORK | p->p_pid); 2428 /* then flag exec has occurred */ 2429 /* notify only if it has not failed due to FP Key error */ 2430 if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0) 2431 proc_knote(p, NOTE_EXEC); 2432 } else { 2433 /* reset the importance attribute from our previous life */ 2434 task_importance_reset(p->task); 2435 2436 /* reset atm context from task */ 2437 task_atm_reset(p->task); 2438 } 2439 2440 /* 2441 * Apply the spawnattr policy, apptype (which primes the task for importance donation), 2442 * and bind any portwatch ports to the new task. 2443 * This must be done after the exec so that the child's thread is ready, 2444 * and after the in transit state has been released, because priority is 2445 * dropped here so we need to be prepared for a potentially long preemption interval 2446 * 2447 * TODO: Consider splitting this up into separate phases 2448 */ 2449 if (error == 0 && imgp->ip_px_sa != NULL) { 2450 struct _posix_spawnattr *psa = (struct _posix_spawnattr *) imgp->ip_px_sa; 2451 2452 exec_handle_spawnattr_policy(p, psa->psa_apptype, psa->psa_qos_clamp, 2453 portwatch_ports, portwatch_count); 2454 } 2455 2456 /* Apply the main thread qos */ 2457 if (error == 0) { 2458 thread_t main_thread = (imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread(); 2459 2460 task_set_main_thread_qos(p->task, main_thread); 2461 } 2462 2463 /* 2464 * Release any ports we kept around for binding to the new task 2465 * We need to release the rights even if the posix_spawn has failed. 2466 */ 2467 if (portwatch_ports != NULL) { 2468 for (int i = 0; i < portwatch_count; i++) { 2469 ipc_port_t port = NULL; 2470 if ((port = portwatch_ports[i]) != NULL) { 2471 ipc_port_release_send(port); 2472 } 2473 } 2474 FREE(portwatch_ports, M_TEMP); 2475 portwatch_ports = NULL; 2476 portwatch_count = 0; 2477 } 2478 2479 /* 2480 * We have to delay operations which might throw a signal until after 2481 * the signals have been unblocked; however, we want that to happen 2482 * after exec_resettextvp() so that the textvp is correct when they 2483 * fire. 2484 */ 2485 if (error == 0) { 2486 error = check_for_signature(p, imgp); 2487 2488 /* 2489 * Pay for our earlier safety; deliver the delayed signals from 2490 * the incomplete spawn process now that it's complete. 2491 */ 2492 if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) { 2493 psignal_vfork(p, p->task, imgp->ip_new_thread, SIGTRAP); 2494 } 2495 } 2496 2497 2498 if (imgp != NULL) { 2499 if (imgp->ip_vp) 2500 vnode_put(imgp->ip_vp); 2501 if (imgp->ip_scriptvp) 2502 vnode_put(imgp->ip_scriptvp); 2503 if (imgp->ip_strings) 2504 execargs_free(imgp); 2505 if (imgp->ip_px_sfa != NULL) 2506 FREE(imgp->ip_px_sfa, M_TEMP); 2507 if (imgp->ip_px_spa != NULL) 2508 FREE(imgp->ip_px_spa, M_TEMP); 2509 2510#if CONFIG_MACF 2511 if (imgp->ip_px_smpx != NULL) 2512 spawn_free_macpolicyinfo(imgp->ip_px_smpx); 2513 if (imgp->ip_execlabelp) 2514 mac_cred_label_free(imgp->ip_execlabelp); 2515 if (imgp->ip_scriptlabelp) 2516 mac_vnode_label_free(imgp->ip_scriptlabelp); 2517#endif 2518 } 2519 2520#if CONFIG_DTRACE 2521 if (spawn_no_exec) { 2522 /* 2523 * In the original DTrace reference implementation, 2524 * posix_spawn() was a libc routine that just 2525 * did vfork(2) then exec(2). Thus the proc::: probes 2526 * are very fork/exec oriented. The details of this 2527 * in-kernel implementation of posix_spawn() is different 2528 * (while producing the same process-observable effects) 2529 * particularly w.r.t. errors, and which thread/process 2530 * is constructing what on behalf of whom. 2531 */ 2532 if (error) { 2533 DTRACE_PROC1(spawn__failure, int, error); 2534 } else { 2535 DTRACE_PROC(spawn__success); 2536 /* 2537 * Some DTrace scripts, e.g. newproc.d in 2538 * /usr/bin, rely on the the 'exec-success' 2539 * probe being fired in the child after the 2540 * new process image has been constructed 2541 * in order to determine the associated pid. 2542 * 2543 * So, even though the parent built the image 2544 * here, for compatibility, mark the new thread 2545 * so 'exec-success' fires on it as it leaves 2546 * the kernel. 2547 */ 2548 dtrace_thread_didexec(imgp->ip_new_thread); 2549 } 2550 } else { 2551 if (error) { 2552 DTRACE_PROC1(exec__failure, int, error); 2553 } else { 2554 DTRACE_PROC(exec__success); 2555 } 2556 } 2557 2558 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) 2559 (*dtrace_proc_waitfor_hook)(p); 2560#endif 2561 2562 /* Return to both the parent and the child? */ 2563 if (imgp != NULL && spawn_no_exec) { 2564 /* 2565 * If the parent wants the pid, copy it out 2566 */ 2567 if (pid != USER_ADDR_NULL) 2568 (void)suword(pid, p->p_pid); 2569 retval[0] = error; 2570 2571 /* 2572 * If we had an error, perform an internal reap ; this is 2573 * entirely safe, as we have a real process backing us. 2574 */ 2575 if (error) { 2576 proc_list_lock(); 2577 p->p_listflag |= P_LIST_DEADPARENT; 2578 proc_list_unlock(); 2579 proc_lock(p); 2580 /* make sure no one else has killed it off... */ 2581 if (p->p_stat != SZOMB && p->exit_thread == NULL) { 2582 p->exit_thread = current_thread(); 2583 proc_unlock(p); 2584 exit1(p, 1, (int *)NULL); 2585 if (exec_done == FALSE) { 2586 task_deallocate(get_threadtask(imgp->ip_new_thread)); 2587 thread_deallocate(imgp->ip_new_thread); 2588 } 2589 } else { 2590 /* someone is doing it for us; just skip it */ 2591 proc_unlock(p); 2592 } 2593 } else { 2594 2595 /* 2596 * Return to the child 2597 * 2598 * Note: the image activator earlier dropped the 2599 * task/thread references to the newly spawned 2600 * process; this is OK, since we still have suspended 2601 * queue references on them, so we should be fine 2602 * with the delayed resume of the thread here. 2603 */ 2604 (void)thread_resume(imgp->ip_new_thread); 2605 } 2606 } 2607 if (bufp != NULL) { 2608 FREE(bufp, M_TEMP); 2609 } 2610 2611 return(error); 2612} 2613 2614 2615/* 2616 * execve 2617 * 2618 * Parameters: uap->fname File name to exec 2619 * uap->argp Argument list 2620 * uap->envp Environment list 2621 * 2622 * Returns: 0 Success 2623 * __mac_execve:EINVAL Invalid argument 2624 * __mac_execve:ENOTSUP Invalid argument 2625 * __mac_execve:EACCES Permission denied 2626 * __mac_execve:EINTR Interrupted function 2627 * __mac_execve:ENOMEM Not enough space 2628 * __mac_execve:EFAULT Bad address 2629 * __mac_execve:ENAMETOOLONG Filename too long 2630 * __mac_execve:ENOEXEC Executable file format error 2631 * __mac_execve:ETXTBSY Text file busy [misuse of error code] 2632 * __mac_execve:??? 2633 * 2634 * TODO: Dynamic linker header address on stack is copied via suword() 2635 */ 2636/* ARGSUSED */ 2637int 2638execve(proc_t p, struct execve_args *uap, int32_t *retval) 2639{ 2640 struct __mac_execve_args muap; 2641 int err; 2642 2643 memoryshot(VM_EXECVE, DBG_FUNC_NONE); 2644 2645 muap.fname = uap->fname; 2646 muap.argp = uap->argp; 2647 muap.envp = uap->envp; 2648 muap.mac_p = USER_ADDR_NULL; 2649 err = __mac_execve(p, &muap, retval); 2650 2651 return(err); 2652} 2653 2654/* 2655 * __mac_execve 2656 * 2657 * Parameters: uap->fname File name to exec 2658 * uap->argp Argument list 2659 * uap->envp Environment list 2660 * uap->mac_p MAC label supplied by caller 2661 * 2662 * Returns: 0 Success 2663 * EINVAL Invalid argument 2664 * ENOTSUP Not supported 2665 * ENOEXEC Executable file format error 2666 * exec_activate_image:EINVAL Invalid argument 2667 * exec_activate_image:EACCES Permission denied 2668 * exec_activate_image:EINTR Interrupted function 2669 * exec_activate_image:ENOMEM Not enough space 2670 * exec_activate_image:EFAULT Bad address 2671 * exec_activate_image:ENAMETOOLONG Filename too long 2672 * exec_activate_image:ENOEXEC Executable file format error 2673 * exec_activate_image:ETXTBSY Text file busy [misuse of error code] 2674 * exec_activate_image:EBADEXEC The executable is corrupt/unknown 2675 * exec_activate_image:??? 2676 * mac_execve_enter:??? 2677 * 2678 * TODO: Dynamic linker header address on stack is copied via suword() 2679 */ 2680int 2681__mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) 2682{ 2683 char *bufp = NULL; 2684 struct image_params *imgp; 2685 struct vnode_attr *vap; 2686 struct vnode_attr *origvap; 2687 int error; 2688 char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */ 2689 int is_64 = IS_64BIT_PROCESS(p); 2690 struct vfs_context context; 2691 struct uthread *uthread; 2692 2693 context.vc_thread = current_thread(); 2694 context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */ 2695 2696 /* Allocate a big chunk for locals instead of using stack since these 2697 * structures a pretty big. 2698 */ 2699 MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO); 2700 imgp = (struct image_params *) bufp; 2701 if (bufp == NULL) { 2702 error = ENOMEM; 2703 goto exit_with_error; 2704 } 2705 vap = (struct vnode_attr *) (bufp + sizeof(*imgp)); 2706 origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap)); 2707 2708 /* Initialize the common data in the image_params structure */ 2709 imgp->ip_user_fname = uap->fname; 2710 imgp->ip_user_argv = uap->argp; 2711 imgp->ip_user_envv = uap->envp; 2712 imgp->ip_vattr = vap; 2713 imgp->ip_origvattr = origvap; 2714 imgp->ip_vfs_context = &context; 2715 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE); 2716 imgp->ip_p_comm = alt_p_comm; /* for PowerPC */ 2717 imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32); 2718 imgp->ip_mac_return = 0; 2719 2720 uthread = get_bsdthread_info(current_thread()); 2721 if (uthread->uu_flag & UT_VFORK) { 2722 imgp->ip_flags |= IMGPF_VFORK_EXEC; 2723 } 2724 2725#if CONFIG_MACF 2726 if (uap->mac_p != USER_ADDR_NULL) { 2727 error = mac_execve_enter(uap->mac_p, imgp); 2728 if (error) { 2729 kauth_cred_unref(&context.vc_ucred); 2730 goto exit_with_error; 2731 } 2732 } 2733#endif 2734 2735 error = exec_activate_image(imgp); 2736 2737 kauth_cred_unref(&context.vc_ucred); 2738 2739 /* Image not claimed by any activator? */ 2740 if (error == -1) 2741 error = ENOEXEC; 2742 2743 if (error == 0) { 2744 exec_resettextvp(p, imgp); 2745 error = check_for_signature(p, imgp); 2746 } 2747 if (imgp->ip_vp != NULLVP) 2748 vnode_put(imgp->ip_vp); 2749 if (imgp->ip_scriptvp != NULLVP) 2750 vnode_put(imgp->ip_scriptvp); 2751 if (imgp->ip_strings) 2752 execargs_free(imgp); 2753#if CONFIG_MACF 2754 if (imgp->ip_execlabelp) 2755 mac_cred_label_free(imgp->ip_execlabelp); 2756 if (imgp->ip_scriptlabelp) 2757 mac_vnode_label_free(imgp->ip_scriptlabelp); 2758#endif 2759 if (!error) { 2760 /* Sever any extant thread affinity */ 2761 thread_affinity_exec(current_thread()); 2762 2763 thread_t main_thread = (imgp->ip_new_thread != NULL) ? imgp->ip_new_thread : current_thread(); 2764 2765 task_set_main_thread_qos(p->task, main_thread); 2766 2767 /* reset task importance */ 2768 task_importance_reset(p->task); 2769 2770 /* reset atm context from task */ 2771 task_atm_reset(p->task); 2772 2773 DTRACE_PROC(exec__success); 2774 2775#if CONFIG_DTRACE 2776 if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) 2777 (*dtrace_proc_waitfor_hook)(p); 2778#endif 2779 2780 if (imgp->ip_flags & IMGPF_VFORK_EXEC) { 2781 vfork_return(p, retval, p->p_pid); 2782 (void)thread_resume(imgp->ip_new_thread); 2783 } 2784 } else { 2785 DTRACE_PROC1(exec__failure, int, error); 2786 } 2787 2788exit_with_error: 2789 if (bufp != NULL) { 2790 FREE(bufp, M_TEMP); 2791 } 2792 2793 return(error); 2794} 2795 2796 2797/* 2798 * copyinptr 2799 * 2800 * Description: Copy a pointer in from user space to a user_addr_t in kernel 2801 * space, based on 32/64 bitness of the user space 2802 * 2803 * Parameters: froma User space address 2804 * toptr Address of kernel space user_addr_t 2805 * ptr_size 4/8, based on 'froma' address space 2806 * 2807 * Returns: 0 Success 2808 * EFAULT Bad 'froma' 2809 * 2810 * Implicit returns: 2811 * *ptr_size Modified 2812 */ 2813static int 2814copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size) 2815{ 2816 int error; 2817 2818 if (ptr_size == 4) { 2819 /* 64 bit value containing 32 bit address */ 2820 unsigned int i; 2821 2822 error = copyin(froma, &i, 4); 2823 *toptr = CAST_USER_ADDR_T(i); /* SAFE */ 2824 } else { 2825 error = copyin(froma, toptr, 8); 2826 } 2827 return (error); 2828} 2829 2830 2831/* 2832 * copyoutptr 2833 * 2834 * Description: Copy a pointer out from a user_addr_t in kernel space to 2835 * user space, based on 32/64 bitness of the user space 2836 * 2837 * Parameters: ua User space address to copy to 2838 * ptr Address of kernel space user_addr_t 2839 * ptr_size 4/8, based on 'ua' address space 2840 * 2841 * Returns: 0 Success 2842 * EFAULT Bad 'ua' 2843 * 2844 */ 2845static int 2846copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size) 2847{ 2848 int error; 2849 2850 if (ptr_size == 4) { 2851 /* 64 bit value containing 32 bit address */ 2852 unsigned int i = CAST_DOWN_EXPLICIT(unsigned int,ua); /* SAFE */ 2853 2854 error = copyout(&i, ptr, 4); 2855 } else { 2856 error = copyout(&ua, ptr, 8); 2857 } 2858 return (error); 2859} 2860 2861 2862/* 2863 * exec_copyout_strings 2864 * 2865 * Copy out the strings segment to user space. The strings segment is put 2866 * on a preinitialized stack frame. 2867 * 2868 * Parameters: struct image_params * the image parameter block 2869 * int * a pointer to the stack offset variable 2870 * 2871 * Returns: 0 Success 2872 * !0 Faiure: errno 2873 * 2874 * Implicit returns: 2875 * (*stackp) The stack offset, modified 2876 * 2877 * Note: The strings segment layout is backward, from the beginning 2878 * of the top of the stack to consume the minimal amount of 2879 * space possible; the returned stack pointer points to the 2880 * end of the area consumed (stacks grow downward). 2881 * 2882 * argc is an int; arg[i] are pointers; env[i] are pointers; 2883 * the 0's are (void *)NULL's 2884 * 2885 * The stack frame layout is: 2886 * 2887 * +-------------+ <- p->user_stack 2888 * | 16b | 2889 * +-------------+ 2890 * | STRING AREA | 2891 * | : | 2892 * | : | 2893 * | : | 2894 * +- -- -- -- --+ 2895 * | PATH AREA | 2896 * +-------------+ 2897 * | 0 | 2898 * +-------------+ 2899 * | applev[n] | 2900 * +-------------+ 2901 * : 2902 * : 2903 * +-------------+ 2904 * | applev[1] | 2905 * +-------------+ 2906 * | exec_path / | 2907 * | applev[0] | 2908 * +-------------+ 2909 * | 0 | 2910 * +-------------+ 2911 * | env[n] | 2912 * +-------------+ 2913 * : 2914 * : 2915 * +-------------+ 2916 * | env[0] | 2917 * +-------------+ 2918 * | 0 | 2919 * +-------------+ 2920 * | arg[argc-1] | 2921 * +-------------+ 2922 * : 2923 * : 2924 * +-------------+ 2925 * | arg[0] | 2926 * +-------------+ 2927 * | argc | 2928 * sp-> +-------------+ 2929 * 2930 * Although technically a part of the STRING AREA, we treat the PATH AREA as 2931 * a separate entity. This allows us to align the beginning of the PATH AREA 2932 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers 2933 * which preceed it on the stack are properly aligned. 2934 */ 2935 2936static int 2937exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp) 2938{ 2939 proc_t p = vfs_context_proc(imgp->ip_vfs_context); 2940 int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4; 2941 int ptr_area_size; 2942 void *ptr_buffer_start, *ptr_buffer; 2943 int string_size; 2944 2945 user_addr_t string_area; /* *argv[], *env[] */ 2946 user_addr_t ptr_area; /* argv[], env[], applev[] */ 2947 user_addr_t argc_area; /* argc */ 2948 user_addr_t stack; 2949 int error; 2950 2951 unsigned i; 2952 struct copyout_desc { 2953 char *start_string; 2954 int count; 2955#if CONFIG_DTRACE 2956 user_addr_t *dtrace_cookie; 2957#endif 2958 boolean_t null_term; 2959 } descriptors[] = { 2960 { 2961 .start_string = imgp->ip_startargv, 2962 .count = imgp->ip_argc, 2963#if CONFIG_DTRACE 2964 .dtrace_cookie = &p->p_dtrace_argv, 2965#endif 2966 .null_term = TRUE 2967 }, 2968 { 2969 .start_string = imgp->ip_endargv, 2970 .count = imgp->ip_envc, 2971#if CONFIG_DTRACE 2972 .dtrace_cookie = &p->p_dtrace_envp, 2973#endif 2974 .null_term = TRUE 2975 }, 2976 { 2977 .start_string = imgp->ip_strings, 2978 .count = 1, 2979#if CONFIG_DTRACE 2980 .dtrace_cookie = NULL, 2981#endif 2982 .null_term = FALSE 2983 }, 2984 { 2985 .start_string = imgp->ip_endenvv, 2986 .count = imgp->ip_applec - 1, /* exec_path handled above */ 2987#if CONFIG_DTRACE 2988 .dtrace_cookie = NULL, 2989#endif 2990 .null_term = TRUE 2991 } 2992 }; 2993 2994 stack = *stackp; 2995 2996 /* 2997 * All previous contributors to the string area 2998 * should have aligned their sub-area 2999 */ 3000 if (imgp->ip_strspace % ptr_size != 0) { 3001 error = EINVAL; 3002 goto bad; 3003 } 3004 3005 /* Grow the stack down for the strings we've been building up */ 3006 string_size = imgp->ip_strendp - imgp->ip_strings; 3007 stack -= string_size; 3008 string_area = stack; 3009 3010 /* 3011 * Need room for one pointer for each string, plus 3012 * one for the NULLs terminating the argv, envv, and apple areas. 3013 */ 3014 ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) * 3015 ptr_size; 3016 stack -= ptr_area_size; 3017 ptr_area = stack; 3018 3019 /* We'll construct all the pointer arrays in our string buffer, 3020 * which we already know is aligned properly, and ip_argspace 3021 * was used to verify we have enough space. 3022 */ 3023 ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp; 3024 3025 /* 3026 * Need room for pointer-aligned argc slot. 3027 */ 3028 stack -= ptr_size; 3029 argc_area = stack; 3030 3031 /* 3032 * Record the size of the arguments area so that sysctl_procargs() 3033 * can return the argument area without having to parse the arguments. 3034 */ 3035 proc_lock(p); 3036 p->p_argc = imgp->ip_argc; 3037 p->p_argslen = (int)(*stackp - string_area); 3038 proc_unlock(p); 3039 3040 /* Return the initial stack address: the location of argc */ 3041 *stackp = stack; 3042 3043 /* 3044 * Copy out the entire strings area. 3045 */ 3046 error = copyout(imgp->ip_strings, string_area, 3047 string_size); 3048 if (error) 3049 goto bad; 3050 3051 for (i = 0; i < sizeof(descriptors)/sizeof(descriptors[0]); i++) { 3052 char *cur_string = descriptors[i].start_string; 3053 int j; 3054 3055#if CONFIG_DTRACE 3056 if (descriptors[i].dtrace_cookie) { 3057 proc_lock(p); 3058 *descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience */ 3059 proc_unlock(p); 3060 } 3061#endif /* CONFIG_DTRACE */ 3062 3063 /* 3064 * For each segment (argv, envv, applev), copy as many pointers as requested 3065 * to our pointer buffer. 3066 */ 3067 for (j = 0; j < descriptors[i].count; j++) { 3068 user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings); 3069 3070 /* Copy out the pointer to the current string. Alignment has been verified */ 3071 if (ptr_size == 8) { 3072 *(uint64_t *)ptr_buffer = (uint64_t)cur_address; 3073 } else { 3074 *(uint32_t *)ptr_buffer = (uint32_t)cur_address; 3075 } 3076 3077 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size); 3078 cur_string += strlen(cur_string) + 1; /* Only a NUL between strings in the same area */ 3079 } 3080 3081 if (descriptors[i].null_term) { 3082 if (ptr_size == 8) { 3083 *(uint64_t *)ptr_buffer = 0ULL; 3084 } else { 3085 *(uint32_t *)ptr_buffer = 0; 3086 } 3087 3088 ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size); 3089 } 3090 } 3091 3092 /* 3093 * Copy out all our pointer arrays in bulk. 3094 */ 3095 error = copyout(ptr_buffer_start, ptr_area, 3096 ptr_area_size); 3097 if (error) 3098 goto bad; 3099 3100 /* argc (int32, stored in a ptr_size area) */ 3101 error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size); 3102 if (error) 3103 goto bad; 3104 3105bad: 3106 return(error); 3107} 3108 3109 3110/* 3111 * exec_extract_strings 3112 * 3113 * Copy arguments and environment from user space into work area; we may 3114 * have already copied some early arguments into the work area, and if 3115 * so, any arguments opied in are appended to those already there. 3116 * This function is the primary manipulator of ip_argspace, since 3117 * these are the arguments the client of execve(2) knows about. After 3118 * each argv[]/envv[] string is copied, we charge the string length 3119 * and argv[]/envv[] pointer slot to ip_argspace, so that we can 3120 * full preflight the arg list size. 3121 * 3122 * Parameters: struct image_params * the image parameter block 3123 * 3124 * Returns: 0 Success 3125 * !0 Failure: errno 3126 * 3127 * Implicit returns; 3128 * (imgp->ip_argc) Count of arguments, updated 3129 * (imgp->ip_envc) Count of environment strings, updated 3130 * (imgp->ip_argspace) Count of remaining of NCARGS 3131 * (imgp->ip_interp_buffer) Interpreter and args (mutated in place) 3132 * 3133 * 3134 * Note: The argument and environment vectors are user space pointers 3135 * to arrays of user space pointers. 3136 */ 3137static int 3138exec_extract_strings(struct image_params *imgp) 3139{ 3140 int error = 0; 3141 int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4; 3142 int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4; 3143 user_addr_t argv = imgp->ip_user_argv; 3144 user_addr_t envv = imgp->ip_user_envv; 3145 3146 /* 3147 * Adjust space reserved for the path name by however much padding it 3148 * needs. Doing this here since we didn't know if this would be a 32- 3149 * or 64-bit process back in exec_save_path. 3150 */ 3151 while (imgp->ip_strspace % new_ptr_size != 0) { 3152 *imgp->ip_strendp++ = '\0'; 3153 imgp->ip_strspace--; 3154 /* imgp->ip_argspace--; not counted towards exec args total */ 3155 } 3156 3157 /* 3158 * From now on, we start attributing string space to ip_argspace 3159 */ 3160 imgp->ip_startargv = imgp->ip_strendp; 3161 imgp->ip_argc = 0; 3162 3163 if((imgp->ip_flags & IMGPF_INTERPRET) != 0) { 3164 user_addr_t arg; 3165 char *argstart, *ch; 3166 3167 /* First, the arguments in the "#!" string are tokenized and extracted. */ 3168 argstart = imgp->ip_interp_buffer; 3169 while (argstart) { 3170 ch = argstart; 3171 while (*ch && !IS_WHITESPACE(*ch)) { 3172 ch++; 3173 } 3174 3175 if (*ch == '\0') { 3176 /* last argument, no need to NUL-terminate */ 3177 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE); 3178 argstart = NULL; 3179 } else { 3180 /* NUL-terminate */ 3181 *ch = '\0'; 3182 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE); 3183 3184 /* 3185 * Find the next string. We know spaces at the end of the string have already 3186 * been stripped. 3187 */ 3188 argstart = ch + 1; 3189 while (IS_WHITESPACE(*argstart)) { 3190 argstart++; 3191 } 3192 } 3193 3194 /* Error-check, regardless of whether this is the last interpreter arg or not */ 3195 if (error) 3196 goto bad; 3197 if (imgp->ip_argspace < new_ptr_size) { 3198 error = E2BIG; 3199 goto bad; 3200 } 3201 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */ 3202 imgp->ip_argc++; 3203 } 3204 3205 if (argv != 0LL) { 3206 /* 3207 * If we are running an interpreter, replace the av[0] that was 3208 * passed to execve() with the path name that was 3209 * passed to execve() for interpreters which do not use the PATH 3210 * to locate their script arguments. 3211 */ 3212 error = copyinptr(argv, &arg, ptr_size); 3213 if (error) 3214 goto bad; 3215 if (arg != 0LL) { 3216 argv += ptr_size; /* consume without using */ 3217 } 3218 } 3219 3220 if (imgp->ip_interp_sugid_fd != -1) { 3221 char temp[19]; /* "/dev/fd/" + 10 digits + NUL */ 3222 snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd); 3223 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE); 3224 } else { 3225 error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE); 3226 } 3227 3228 if (error) 3229 goto bad; 3230 if (imgp->ip_argspace < new_ptr_size) { 3231 error = E2BIG; 3232 goto bad; 3233 } 3234 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */ 3235 imgp->ip_argc++; 3236 } 3237 3238 while (argv != 0LL) { 3239 user_addr_t arg; 3240 3241 error = copyinptr(argv, &arg, ptr_size); 3242 if (error) 3243 goto bad; 3244 3245 if (arg == 0LL) { 3246 break; 3247 } 3248 3249 argv += ptr_size; 3250 3251 /* 3252 * av[n...] = arg[n] 3253 */ 3254 error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE); 3255 if (error) 3256 goto bad; 3257 if (imgp->ip_argspace < new_ptr_size) { 3258 error = E2BIG; 3259 goto bad; 3260 } 3261 imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */ 3262 imgp->ip_argc++; 3263 } 3264 3265 /* Save space for argv[] NULL terminator */ 3266 if (imgp->ip_argspace < new_ptr_size) { 3267 error = E2BIG; 3268 goto bad; 3269 } 3270 imgp->ip_argspace -= new_ptr_size; 3271 3272 /* Note where the args ends and env begins. */ 3273 imgp->ip_endargv = imgp->ip_strendp; 3274 imgp->ip_envc = 0; 3275 3276 /* Now, get the environment */ 3277 while (envv != 0LL) { 3278 user_addr_t env; 3279 3280 error = copyinptr(envv, &env, ptr_size); 3281 if (error) 3282 goto bad; 3283 3284 envv += ptr_size; 3285 if (env == 0LL) { 3286 break; 3287 } 3288 /* 3289 * av[n...] = env[n] 3290 */ 3291 error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE); 3292 if (error) 3293 goto bad; 3294 if (imgp->ip_argspace < new_ptr_size) { 3295 error = E2BIG; 3296 goto bad; 3297 } 3298 imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */ 3299 imgp->ip_envc++; 3300 } 3301 3302 /* Save space for envv[] NULL terminator */ 3303 if (imgp->ip_argspace < new_ptr_size) { 3304 error = E2BIG; 3305 goto bad; 3306 } 3307 imgp->ip_argspace -= new_ptr_size; 3308 3309 /* Align the tail of the combined argv+envv area */ 3310 while (imgp->ip_strspace % new_ptr_size != 0) { 3311 if (imgp->ip_argspace < 1) { 3312 error = E2BIG; 3313 goto bad; 3314 } 3315 *imgp->ip_strendp++ = '\0'; 3316 imgp->ip_strspace--; 3317 imgp->ip_argspace--; 3318 } 3319 3320 /* Note where the envv ends and applev begins. */ 3321 imgp->ip_endenvv = imgp->ip_strendp; 3322 3323 /* 3324 * From now on, we are no longer charging argument 3325 * space to ip_argspace. 3326 */ 3327 3328bad: 3329 return error; 3330} 3331 3332static char * 3333random_hex_str(char *str, int len, boolean_t embedNUL) 3334{ 3335 uint64_t low, high, value; 3336 int idx; 3337 char digit; 3338 3339 /* A 64-bit value will only take 16 characters, plus '0x' and NULL. */ 3340 if (len > 19) 3341 len = 19; 3342 3343 /* We need enough room for at least 1 digit */ 3344 if (len < 4) 3345 return (NULL); 3346 3347 low = random(); 3348 high = random(); 3349 value = high << 32 | low; 3350 3351 if (embedNUL) { 3352 /* 3353 * Zero a byte to protect against C string vulnerabilities 3354 * e.g. for userland __stack_chk_guard. 3355 */ 3356 value &= ~(0xffull << 8); 3357 } 3358 3359 str[0] = '0'; 3360 str[1] = 'x'; 3361 for (idx = 2; idx < len - 1; idx++) { 3362 digit = value & 0xf; 3363 value = value >> 4; 3364 if (digit < 10) 3365 str[idx] = '0' + digit; 3366 else 3367 str[idx] = 'a' + (digit - 10); 3368 } 3369 str[idx] = '\0'; 3370 return (str); 3371} 3372 3373/* 3374 * Libc has an 8-element array set up for stack guard values. It only fills 3375 * in one of those entries, and both gcc and llvm seem to use only a single 3376 * 8-byte guard. Until somebody needs more than an 8-byte guard value, don't 3377 * do the work to construct them. 3378 */ 3379#define GUARD_VALUES 1 3380#define GUARD_KEY "stack_guard=" 3381 3382/* 3383 * System malloc needs some entropy when it is initialized. 3384 */ 3385#define ENTROPY_VALUES 2 3386#define ENTROPY_KEY "malloc_entropy=" 3387 3388/* 3389 * System malloc engages nanozone for UIAPP. 3390 */ 3391#define NANO_ENGAGE_KEY "MallocNanoZone=1" 3392 3393#define PFZ_KEY "pfz=" 3394extern user32_addr_t commpage_text32_location; 3395extern user64_addr_t commpage_text64_location; 3396/* 3397 * Build up the contents of the apple[] string vector 3398 */ 3399static int 3400exec_add_apple_strings(struct image_params *imgp) 3401{ 3402 int i, error; 3403 int new_ptr_size=4; 3404 char guard[19]; 3405 char guard_vec[strlen(GUARD_KEY) + 19 * GUARD_VALUES + 1]; 3406 3407 char entropy[19]; 3408 char entropy_vec[strlen(ENTROPY_KEY) + 19 * ENTROPY_VALUES + 1]; 3409 3410 char pfz_string[strlen(PFZ_KEY) + 16 + 4 +1]; 3411 3412 if( imgp->ip_flags & IMGPF_IS_64BIT) { 3413 new_ptr_size = 8; 3414 snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%llx",commpage_text64_location); 3415 } else { 3416 snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%x",commpage_text32_location); 3417 } 3418 3419 /* exec_save_path stored the first string */ 3420 imgp->ip_applec = 1; 3421 3422 /* adding the pfz string */ 3423 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string),UIO_SYSSPACE,FALSE); 3424 if(error) 3425 goto bad; 3426 imgp->ip_applec++; 3427 3428 /* adding the NANO_ENGAGE_KEY key */ 3429 if (imgp->ip_px_sa) { 3430 int proc_flags = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_flags); 3431 3432 if ((proc_flags & _POSIX_SPAWN_NANO_ALLOCATOR) == _POSIX_SPAWN_NANO_ALLOCATOR) { 3433 char uiapp_string[strlen(NANO_ENGAGE_KEY) + 1]; 3434 3435 snprintf(uiapp_string, sizeof(uiapp_string), NANO_ENGAGE_KEY); 3436 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(uiapp_string),UIO_SYSSPACE,FALSE); 3437 if (error) 3438 goto bad; 3439 imgp->ip_applec++; 3440 } 3441 } 3442 3443 /* 3444 * Supply libc with a collection of random values to use when 3445 * implementing -fstack-protector. 3446 * 3447 * (The first random string always contains an embedded NUL so that 3448 * __stack_chk_guard also protects against C string vulnerabilities) 3449 */ 3450 (void)strlcpy(guard_vec, GUARD_KEY, sizeof (guard_vec)); 3451 for (i = 0; i < GUARD_VALUES; i++) { 3452 random_hex_str(guard, sizeof (guard), i == 0); 3453 if (i) 3454 (void)strlcat(guard_vec, ",", sizeof (guard_vec)); 3455 (void)strlcat(guard_vec, guard, sizeof (guard_vec)); 3456 } 3457 3458 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(guard_vec), UIO_SYSSPACE, FALSE); 3459 if (error) 3460 goto bad; 3461 imgp->ip_applec++; 3462 3463 /* 3464 * Supply libc with entropy for system malloc. 3465 */ 3466 (void)strlcpy(entropy_vec, ENTROPY_KEY, sizeof(entropy_vec)); 3467 for (i = 0; i < ENTROPY_VALUES; i++) { 3468 random_hex_str(entropy, sizeof (entropy), FALSE); 3469 if (i) 3470 (void)strlcat(entropy_vec, ",", sizeof (entropy_vec)); 3471 (void)strlcat(entropy_vec, entropy, sizeof (entropy_vec)); 3472 } 3473 3474 error = exec_add_user_string(imgp, CAST_USER_ADDR_T(entropy_vec), UIO_SYSSPACE, FALSE); 3475 if (error) 3476 goto bad; 3477 imgp->ip_applec++; 3478 3479 /* Align the tail of the combined applev area */ 3480 while (imgp->ip_strspace % new_ptr_size != 0) { 3481 *imgp->ip_strendp++ = '\0'; 3482 imgp->ip_strspace--; 3483 } 3484 3485bad: 3486 return error; 3487} 3488 3489#define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur) 3490 3491/* 3492 * exec_check_permissions 3493 * 3494 * Description: Verify that the file that is being attempted to be executed 3495 * is in fact allowed to be executed based on it POSIX file 3496 * permissions and other access control criteria 3497 * 3498 * Parameters: struct image_params * the image parameter block 3499 * 3500 * Returns: 0 Success 3501 * EACCES Permission denied 3502 * ENOEXEC Executable file format error 3503 * ETXTBSY Text file busy [misuse of error code] 3504 * vnode_getattr:??? 3505 * vnode_authorize:??? 3506 */ 3507static int 3508exec_check_permissions(struct image_params *imgp) 3509{ 3510 struct vnode *vp = imgp->ip_vp; 3511 struct vnode_attr *vap = imgp->ip_vattr; 3512 proc_t p = vfs_context_proc(imgp->ip_vfs_context); 3513 int error; 3514 kauth_action_t action; 3515 3516 /* Only allow execution of regular files */ 3517 if (!vnode_isreg(vp)) 3518 return (EACCES); 3519 3520 /* Get the file attributes that we will be using here and elsewhere */ 3521 VATTR_INIT(vap); 3522 VATTR_WANTED(vap, va_uid); 3523 VATTR_WANTED(vap, va_gid); 3524 VATTR_WANTED(vap, va_mode); 3525 VATTR_WANTED(vap, va_fsid); 3526 VATTR_WANTED(vap, va_fileid); 3527 VATTR_WANTED(vap, va_data_size); 3528 if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0) 3529 return (error); 3530 3531 /* 3532 * Ensure that at least one execute bit is on - otherwise root 3533 * will always succeed, and we don't want to happen unless the 3534 * file really is executable. 3535 */ 3536 if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) 3537 return (EACCES); 3538 3539 /* Disallow zero length files */ 3540 if (vap->va_data_size == 0) 3541 return (ENOEXEC); 3542 3543 imgp->ip_arch_offset = (user_size_t)0; 3544 imgp->ip_arch_size = vap->va_data_size; 3545 3546 /* Disable setuid-ness for traced programs or if MNT_NOSUID */ 3547 if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED)) 3548 vap->va_mode &= ~(VSUID | VSGID); 3549 3550 /* 3551 * Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR 3552 * flags for setuid/setgid binaries. 3553 */ 3554 if (vap->va_mode & (VSUID | VSGID)) 3555 imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC | IMGPF_DISABLE_ASLR); 3556 3557#if CONFIG_MACF 3558 error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp); 3559 if (error) 3560 return (error); 3561#endif 3562 3563 /* Check for execute permission */ 3564 action = KAUTH_VNODE_EXECUTE; 3565 /* Traced images must also be readable */ 3566 if (p->p_lflag & P_LTRACED) 3567 action |= KAUTH_VNODE_READ_DATA; 3568 if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0) 3569 return (error); 3570 3571#if 0 3572 /* Don't let it run if anyone had it open for writing */ 3573 vnode_lock(vp); 3574 if (vp->v_writecount) { 3575 panic("going to return ETXTBSY %x", vp); 3576 vnode_unlock(vp); 3577 return (ETXTBSY); 3578 } 3579 vnode_unlock(vp); 3580#endif 3581 3582 3583 /* XXX May want to indicate to underlying FS that vnode is open */ 3584 3585 return (error); 3586} 3587 3588 3589/* 3590 * exec_handle_sugid 3591 * 3592 * Initially clear the P_SUGID in the process flags; if an SUGID process is 3593 * exec'ing a non-SUGID image, then this is the point of no return. 3594 * 3595 * If the image being activated is SUGID, then replace the credential with a 3596 * copy, disable tracing (unless the tracing process is root), reset the 3597 * mach task port to revoke it, set the P_SUGID bit, 3598 * 3599 * If the saved user and group ID will be changing, then make sure it happens 3600 * to a new credential, rather than a shared one. 3601 * 3602 * Set the security token (this is probably obsolete, given that the token 3603 * should not technically be separate from the credential itself). 3604 * 3605 * Parameters: struct image_params * the image parameter block 3606 * 3607 * Returns: void No failure indication 3608 * 3609 * Implicit returns: 3610 * <process credential> Potentially modified/replaced 3611 * <task port> Potentially revoked 3612 * <process flags> P_SUGID bit potentially modified 3613 * <security token> Potentially modified 3614 */ 3615static int 3616exec_handle_sugid(struct image_params *imgp) 3617{ 3618 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context); 3619 proc_t p = vfs_context_proc(imgp->ip_vfs_context); 3620 int i; 3621 int leave_sugid_clear = 0; 3622 int mac_reset_ipc = 0; 3623 int error = 0; 3624#if CONFIG_MACF 3625 int mac_transition, disjoint_cred = 0; 3626 int label_update_return = 0; 3627 3628 /* 3629 * Determine whether a call to update the MAC label will result in the 3630 * credential changing. 3631 * 3632 * Note: MAC policies which do not actually end up modifying 3633 * the label subsequently are strongly encouraged to 3634 * return 0 for this check, since a non-zero answer will 3635 * slow down the exec fast path for normal binaries. 3636 */ 3637 mac_transition = mac_cred_check_label_update_execve( 3638 imgp->ip_vfs_context, 3639 imgp->ip_vp, 3640 imgp->ip_arch_offset, 3641 imgp->ip_scriptvp, 3642 imgp->ip_scriptlabelp, 3643 imgp->ip_execlabelp, 3644 p, 3645 imgp->ip_px_smpx); 3646#endif 3647 3648 OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag); 3649 3650 /* 3651 * Order of the following is important; group checks must go last, 3652 * as we use the success of the 'ismember' check combined with the 3653 * failure of the explicit match to indicate that we will be setting 3654 * the egid of the process even though the new process did not 3655 * require VSUID/VSGID bits in order for it to set the new group as 3656 * its egid. 3657 * 3658 * Note: Technically, by this we are implying a call to 3659 * setegid() in the new process, rather than implying 3660 * it used its VSGID bit to set the effective group, 3661 * even though there is no code in that process to make 3662 * such a call. 3663 */ 3664 if (((imgp->ip_origvattr->va_mode & VSUID) != 0 && 3665 kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) || 3666 ((imgp->ip_origvattr->va_mode & VSGID) != 0 && 3667 ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) || 3668 (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) { 3669 3670#if CONFIG_MACF 3671/* label for MAC transition and neither VSUID nor VSGID */ 3672handle_mac_transition: 3673#endif 3674 3675 /* 3676 * Replace the credential with a copy of itself if euid or 3677 * egid change. 3678 * 3679 * Note: setuid binaries will automatically opt out of 3680 * group resolver participation as a side effect 3681 * of this operation. This is an intentional 3682 * part of the security model, which requires a 3683 * participating credential be established by 3684 * escalating privilege, setting up all other 3685 * aspects of the credential including whether 3686 * or not to participate in external group 3687 * membership resolution, then dropping their 3688 * effective privilege to that of the desired 3689 * final credential state. 3690 */ 3691 if (imgp->ip_origvattr->va_mode & VSUID) { 3692 p->p_ucred = kauth_cred_setresuid(p->p_ucred, KAUTH_UID_NONE, imgp->ip_origvattr->va_uid, imgp->ip_origvattr->va_uid, KAUTH_UID_NONE); 3693 /* update cred on proc */ 3694 PROC_UPDATE_CREDS_ONPROC(p); 3695 } 3696 if (imgp->ip_origvattr->va_mode & VSGID) { 3697 p->p_ucred = kauth_cred_setresgid(p->p_ucred, KAUTH_GID_NONE, imgp->ip_origvattr->va_gid, imgp->ip_origvattr->va_gid); 3698 /* update cred on proc */ 3699 PROC_UPDATE_CREDS_ONPROC(p); 3700 } 3701 3702#if CONFIG_MACF 3703 /* 3704 * If a policy has indicated that it will transition the label, 3705 * before making the call into the MAC policies, get a new 3706 * duplicate credential, so they can modify it without 3707 * modifying any others sharing it. 3708 */ 3709 if (mac_transition) { 3710 kauth_proc_label_update_execve(p, 3711 imgp->ip_vfs_context, 3712 imgp->ip_vp, 3713 imgp->ip_arch_offset, 3714 imgp->ip_scriptvp, 3715 imgp->ip_scriptlabelp, 3716 imgp->ip_execlabelp, 3717 &imgp->ip_csflags, 3718 imgp->ip_px_smpx, 3719 &disjoint_cred, /* will be non zero if disjoint */ 3720 &label_update_return); 3721 3722 if (disjoint_cred) { 3723 /* 3724 * If updating the MAC label resulted in a 3725 * disjoint credential, flag that we need to 3726 * set the P_SUGID bit. This protects 3727 * against debuggers being attached by an 3728 * insufficiently privileged process onto the 3729 * result of a transition to a more privileged 3730 * credential. 3731 */ 3732 leave_sugid_clear = 0; 3733 } 3734 3735 imgp->ip_mac_return = label_update_return; 3736 } 3737 3738 mac_reset_ipc = mac_proc_check_inherit_ipc_ports(p, p->p_textvp, p->p_textoff, imgp->ip_vp, imgp->ip_arch_offset, imgp->ip_scriptvp); 3739 3740#endif /* CONFIG_MACF */ 3741 3742 /* 3743 * If 'leave_sugid_clear' is non-zero, then we passed the 3744 * VSUID and MACF checks, and successfully determined that 3745 * the previous cred was a member of the VSGID group, but 3746 * that it was not the default at the time of the execve, 3747 * and that the post-labelling credential was not disjoint. 3748 * So we don't set the P_SUGID or reset mach ports and fds 3749 * on the basis of simply running this code. 3750 */ 3751 if (mac_reset_ipc || !leave_sugid_clear) { 3752 /* 3753 * Have mach reset the task and thread ports. 3754 * We don't want anyone who had the ports before 3755 * a setuid exec to be able to access/control the 3756 * task/thread after. 3757 */ 3758 ipc_task_reset(p->task); 3759 ipc_thread_reset((imgp->ip_new_thread != NULL) ? 3760 imgp->ip_new_thread : current_thread()); 3761 } 3762 3763 if (!leave_sugid_clear) { 3764 /* 3765 * Flag the process as setuid. 3766 */ 3767 OSBitOrAtomic(P_SUGID, &p->p_flag); 3768 3769 /* 3770 * Radar 2261856; setuid security hole fix 3771 * XXX For setuid processes, attempt to ensure that 3772 * stdin, stdout, and stderr are already allocated. 3773 * We do not want userland to accidentally allocate 3774 * descriptors in this range which has implied meaning 3775 * to libc. 3776 */ 3777 for (i = 0; i < 3; i++) { 3778 3779 if (p->p_fd->fd_ofiles[i] != NULL) 3780 continue; 3781 3782 /* 3783 * Do the kernel equivalent of 3784 * 3785 * if i == 0 3786 * (void) open("/dev/null", O_RDONLY); 3787 * else 3788 * (void) open("/dev/null", O_WRONLY); 3789 */ 3790 3791 struct fileproc *fp; 3792 int indx; 3793 int flag; 3794 struct nameidata *ndp = NULL; 3795 3796 if (i == 0) 3797 flag = FREAD; 3798 else 3799 flag = FWRITE; 3800 3801 if ((error = falloc(p, 3802 &fp, &indx, imgp->ip_vfs_context)) != 0) 3803 continue; 3804 3805 MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO); 3806 if (ndp == NULL) { 3807 error = ENOMEM; 3808 break; 3809 } 3810 3811 NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, 3812 CAST_USER_ADDR_T("/dev/null"), 3813 imgp->ip_vfs_context); 3814 3815 if ((error = vn_open(ndp, flag, 0)) != 0) { 3816 fp_free(p, indx, fp); 3817 break; 3818 } 3819 3820 struct fileglob *fg = fp->f_fglob; 3821 3822 fg->fg_flag = flag; 3823 fg->fg_ops = &vnops; 3824 fg->fg_data = ndp->ni_vp; 3825 3826 vnode_put(ndp->ni_vp); 3827 3828 proc_fdlock(p); 3829 procfdtbl_releasefd(p, indx, NULL); 3830 fp_drop(p, indx, fp, 1); 3831 proc_fdunlock(p); 3832 3833 FREE(ndp, M_TEMP); 3834 } 3835 } 3836 } 3837#if CONFIG_MACF 3838 else { 3839 /* 3840 * We are here because we were told that the MAC label will 3841 * be transitioned, and the binary is not VSUID or VSGID; to 3842 * deal with this case, we could either duplicate a lot of 3843 * code, or we can indicate we want to default the P_SUGID 3844 * bit clear and jump back up. 3845 */ 3846 if (mac_transition) { 3847 leave_sugid_clear = 1; 3848 goto handle_mac_transition; 3849 } 3850 } 3851 3852#endif /* CONFIG_MACF */ 3853 3854 /* 3855 * Implement the semantic where the effective user and group become 3856 * the saved user and group in exec'ed programs. 3857 */ 3858 p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred), kauth_cred_getgid(p->p_ucred)); 3859 /* update cred on proc */ 3860 PROC_UPDATE_CREDS_ONPROC(p); 3861 3862 /* Update the process' identity version and set the security token */ 3863 p->p_idversion++; 3864 set_security_token(p); 3865 3866 return(error); 3867} 3868 3869 3870/* 3871 * create_unix_stack 3872 * 3873 * Description: Set the user stack address for the process to the provided 3874 * address. If a custom stack was not set as a result of the 3875 * load process (i.e. as specified by the image file for the 3876 * executable), then allocate the stack in the provided map and 3877 * set up appropriate guard pages for enforcing administrative 3878 * limits on stack growth, if they end up being needed. 3879 * 3880 * Parameters: p Process to set stack on 3881 * load_result Information from mach-o load commands 3882 * map Address map in which to allocate the new stack 3883 * 3884 * Returns: KERN_SUCCESS Stack successfully created 3885 * !KERN_SUCCESS Mach failure code 3886 */ 3887static kern_return_t 3888create_unix_stack(vm_map_t map, load_result_t* load_result, 3889 proc_t p) 3890{ 3891 mach_vm_size_t size, prot_size; 3892 mach_vm_offset_t addr, prot_addr; 3893 kern_return_t kr; 3894 3895 mach_vm_address_t user_stack = load_result->user_stack; 3896 3897 proc_lock(p); 3898 p->user_stack = user_stack; 3899 proc_unlock(p); 3900 3901 if (!load_result->prog_allocated_stack) { 3902 /* 3903 * Allocate enough space for the maximum stack size we 3904 * will ever authorize and an extra page to act as 3905 * a guard page for stack overflows. For default stacks, 3906 * vm_initial_limit_stack takes care of the extra guard page. 3907 * Otherwise we must allocate it ourselves. 3908 */ 3909 3910 size = mach_vm_round_page(load_result->user_stack_size); 3911 if (load_result->prog_stack_size) 3912 size += PAGE_SIZE; 3913 addr = mach_vm_trunc_page(load_result->user_stack - size); 3914 kr = mach_vm_allocate(map, &addr, size, 3915 VM_MAKE_TAG(VM_MEMORY_STACK) | 3916 VM_FLAGS_FIXED); 3917 if (kr != KERN_SUCCESS) { 3918 /* If can't allocate at default location, try anywhere */ 3919 addr = 0; 3920 kr = mach_vm_allocate(map, &addr, size, 3921 VM_MAKE_TAG(VM_MEMORY_STACK) | 3922 VM_FLAGS_ANYWHERE); 3923 if (kr != KERN_SUCCESS) 3924 return kr; 3925 3926 user_stack = addr + size; 3927 load_result->user_stack = user_stack; 3928 3929 proc_lock(p); 3930 p->user_stack = user_stack; 3931 proc_unlock(p); 3932 } 3933 3934 /* 3935 * And prevent access to what's above the current stack 3936 * size limit for this process. 3937 */ 3938 prot_addr = addr; 3939 if (load_result->prog_stack_size) 3940 prot_size = PAGE_SIZE; 3941 else 3942 prot_size = mach_vm_trunc_page(size - unix_stack_size(p)); 3943 kr = mach_vm_protect(map, 3944 prot_addr, 3945 prot_size, 3946 FALSE, 3947 VM_PROT_NONE); 3948 if (kr != KERN_SUCCESS) { 3949 (void) mach_vm_deallocate(map, addr, size); 3950 return kr; 3951 } 3952 } 3953 3954 return KERN_SUCCESS; 3955} 3956 3957#include <sys/reboot.h> 3958 3959static const char * init_programs[] = { 3960#if DEVELOPMENT || DEBUG 3961 "/usr/local/sbin/launchd.development", 3962#endif 3963 "/sbin/launchd", 3964}; 3965 3966/* 3967 * load_init_program 3968 * 3969 * Description: Load the "init" program; in most cases, this will be "launchd" 3970 * 3971 * Parameters: p Process to call execve() to create 3972 * the "init" program 3973 * 3974 * Returns: (void) 3975 * 3976 * Notes: The process that is passed in is the first manufactured 3977 * process on the system, and gets here via bsd_ast() firing 3978 * for the first time. This is done to ensure that bsd_init() 3979 * has run to completion. 3980 */ 3981void 3982load_init_program(proc_t p) 3983{ 3984 vm_offset_t init_addr, addr; 3985 int argc; 3986 uint32_t argv[3]; 3987 unsigned int i; 3988 int error; 3989 int retval[2]; 3990 const char *init_program_name; 3991 struct execve_args init_exec_args; 3992 3993 init_addr = VM_MIN_ADDRESS; 3994 (void) vm_allocate(current_map(), &init_addr, PAGE_SIZE, VM_FLAGS_ANYWHERE); 3995 if (init_addr == 0) 3996 init_addr++; 3997 3998 for (i = 0; i < sizeof(init_programs)/sizeof(init_programs[0]); i++) { 3999 4000 init_program_name = init_programs[i]; 4001 addr = init_addr; 4002 argc = 0; 4003 4004 /* 4005 * Copy out program name. 4006 */ 4007 (void) copyout(init_program_name, CAST_USER_ADDR_T(addr), strlen(init_program_name)+1); 4008 4009 argv[argc++] = (uint32_t)addr; 4010 addr += strlen(init_program_name)+1; 4011 addr = (vm_offset_t)ROUND_PTR(char, addr); 4012 4013 /* 4014 * Put out first (and only) argument, similarly. 4015 * Assumes everything fits in a page as allocated above. 4016 */ 4017 if (boothowto & RB_SINGLE) { 4018 const char *init_args = "-s"; 4019 4020 copyout(init_args, CAST_USER_ADDR_T(addr), strlen(init_args)+1); 4021 4022 argv[argc++] = (uint32_t)addr; 4023 addr += strlen(init_args)+1; 4024 addr = (vm_offset_t)ROUND_PTR(char, addr); 4025 } 4026 4027 /* 4028 * Null-end the argument list 4029 */ 4030 argv[argc] = 0; 4031 4032 /* 4033 * Copy out the argument list. 4034 */ 4035 (void) copyout(argv, CAST_USER_ADDR_T(addr), sizeof(argv)); 4036 4037 /* 4038 * Set up argument block for fake call to execve. 4039 */ 4040 init_exec_args.fname = CAST_USER_ADDR_T(argv[0]); 4041 init_exec_args.argp = CAST_USER_ADDR_T((char **)addr); 4042 init_exec_args.envp = CAST_USER_ADDR_T(0); 4043 4044 /* 4045 * So that init task is set with uid,gid 0 token 4046 */ 4047 set_security_token(p); 4048 4049 error = execve(p, &init_exec_args, retval); 4050 if (!error) 4051 return; 4052 } 4053 4054 panic("Process 1 exec of %s failed, errno %d", init_program_name, error); 4055} 4056 4057/* 4058 * load_return_to_errno 4059 * 4060 * Description: Convert a load_return_t (Mach error) to an errno (BSD error) 4061 * 4062 * Parameters: lrtn Mach error number 4063 * 4064 * Returns: (int) BSD error number 4065 * 0 Success 4066 * EBADARCH Bad architecture 4067 * EBADMACHO Bad Mach object file 4068 * ESHLIBVERS Bad shared library version 4069 * ENOMEM Out of memory/resource shortage 4070 * EACCES Access denied 4071 * ENOENT Entry not found (usually "file does 4072 * does not exist") 4073 * EIO An I/O error occurred 4074 * EBADEXEC The executable is corrupt/unknown 4075 */ 4076static int 4077load_return_to_errno(load_return_t lrtn) 4078{ 4079 switch (lrtn) { 4080 case LOAD_SUCCESS: 4081 return 0; 4082 case LOAD_BADARCH: 4083 return EBADARCH; 4084 case LOAD_BADMACHO: 4085 return EBADMACHO; 4086 case LOAD_SHLIB: 4087 return ESHLIBVERS; 4088 case LOAD_NOSPACE: 4089 case LOAD_RESOURCE: 4090 return ENOMEM; 4091 case LOAD_PROTECT: 4092 return EACCES; 4093 case LOAD_ENOENT: 4094 return ENOENT; 4095 case LOAD_IOERROR: 4096 return EIO; 4097 case LOAD_FAILURE: 4098 case LOAD_DECRYPTFAIL: 4099 default: 4100 return EBADEXEC; 4101 } 4102} 4103 4104#include <mach/mach_types.h> 4105#include <mach/vm_prot.h> 4106#include <mach/semaphore.h> 4107#include <mach/sync_policy.h> 4108#include <kern/clock.h> 4109#include <mach/kern_return.h> 4110 4111/* 4112 * execargs_alloc 4113 * 4114 * Description: Allocate the block of memory used by the execve arguments. 4115 * At the same time, we allocate a page so that we can read in 4116 * the first page of the image. 4117 * 4118 * Parameters: struct image_params * the image parameter block 4119 * 4120 * Returns: 0 Success 4121 * EINVAL Invalid argument 4122 * EACCES Permission denied 4123 * EINTR Interrupted function 4124 * ENOMEM Not enough space 4125 * 4126 * Notes: This is a temporary allocation into the kernel address space 4127 * to enable us to copy arguments in from user space. This is 4128 * necessitated by not mapping the process calling execve() into 4129 * the kernel address space during the execve() system call. 4130 * 4131 * We assemble the argument and environment, etc., into this 4132 * region before copying it as a single block into the child 4133 * process address space (at the top or bottom of the stack, 4134 * depending on which way the stack grows; see the function 4135 * exec_copyout_strings() for details). 4136 * 4137 * This ends up with a second (possibly unnecessary) copy compared 4138 * with assembing the data directly into the child address space, 4139 * instead, but since we cannot be guaranteed that the parent has 4140 * not modified its environment, we can't really know that it's 4141 * really a block there as well. 4142 */ 4143 4144 4145static int execargs_waiters = 0; 4146lck_mtx_t *execargs_cache_lock; 4147 4148static void 4149execargs_lock_lock(void) { 4150 lck_mtx_lock_spin(execargs_cache_lock); 4151} 4152 4153static void 4154execargs_lock_unlock(void) { 4155 lck_mtx_unlock(execargs_cache_lock); 4156} 4157 4158static wait_result_t 4159execargs_lock_sleep(void) { 4160 return(lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE)); 4161} 4162 4163static kern_return_t 4164execargs_purgeable_allocate(char **execarg_address) { 4165 kern_return_t kr = vm_allocate(bsd_pageable_map, (vm_offset_t *)execarg_address, BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE); 4166 assert(kr == KERN_SUCCESS); 4167 return kr; 4168} 4169 4170static kern_return_t 4171execargs_purgeable_reference(void *execarg_address) { 4172 int state = VM_PURGABLE_NONVOLATILE; 4173 kern_return_t kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state); 4174 4175 assert(kr == KERN_SUCCESS); 4176 return kr; 4177} 4178 4179static kern_return_t 4180execargs_purgeable_volatilize(void *execarg_address) { 4181 int state = VM_PURGABLE_VOLATILE | VM_PURGABLE_ORDERING_OBSOLETE; 4182 kern_return_t kr; 4183 kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state); 4184 4185 assert(kr == KERN_SUCCESS); 4186 4187 return kr; 4188} 4189 4190static void 4191execargs_wakeup_waiters(void) { 4192 thread_wakeup(&execargs_free_count); 4193} 4194 4195static int 4196execargs_alloc(struct image_params *imgp) 4197{ 4198 kern_return_t kret; 4199 wait_result_t res; 4200 int i, cache_index = -1; 4201 4202 execargs_lock_lock(); 4203 4204 while (execargs_free_count == 0) { 4205 execargs_waiters++; 4206 res = execargs_lock_sleep(); 4207 execargs_waiters--; 4208 if (res != THREAD_AWAKENED) { 4209 execargs_lock_unlock(); 4210 return (EINTR); 4211 } 4212 } 4213 4214 execargs_free_count--; 4215 4216 for (i = 0; i < execargs_cache_size; i++) { 4217 vm_offset_t element = execargs_cache[i]; 4218 if (element) { 4219 cache_index = i; 4220 imgp->ip_strings = (char *)(execargs_cache[i]); 4221 execargs_cache[i] = 0; 4222 break; 4223 } 4224 } 4225 4226 assert(execargs_free_count >= 0); 4227 4228 execargs_lock_unlock(); 4229 4230 if (cache_index == -1) { 4231 kret = execargs_purgeable_allocate(&imgp->ip_strings); 4232 } 4233 else 4234 kret = execargs_purgeable_reference(imgp->ip_strings); 4235 4236 assert(kret == KERN_SUCCESS); 4237 if (kret != KERN_SUCCESS) { 4238 return (ENOMEM); 4239 } 4240 4241 /* last page used to read in file headers */ 4242 imgp->ip_vdata = imgp->ip_strings + ( NCARGS + PAGE_SIZE ); 4243 imgp->ip_strendp = imgp->ip_strings; 4244 imgp->ip_argspace = NCARGS; 4245 imgp->ip_strspace = ( NCARGS + PAGE_SIZE ); 4246 4247 return (0); 4248} 4249 4250/* 4251 * execargs_free 4252 * 4253 * Description: Free the block of memory used by the execve arguments and the 4254 * first page of the executable by a previous call to the function 4255 * execargs_alloc(). 4256 * 4257 * Parameters: struct image_params * the image parameter block 4258 * 4259 * Returns: 0 Success 4260 * EINVAL Invalid argument 4261 * EINTR Oeration interrupted 4262 */ 4263static int 4264execargs_free(struct image_params *imgp) 4265{ 4266 kern_return_t kret; 4267 int i; 4268 boolean_t needs_wakeup = FALSE; 4269 4270 kret = execargs_purgeable_volatilize(imgp->ip_strings); 4271 4272 execargs_lock_lock(); 4273 execargs_free_count++; 4274 4275 for (i = 0; i < execargs_cache_size; i++) { 4276 vm_offset_t element = execargs_cache[i]; 4277 if (element == 0) { 4278 execargs_cache[i] = (vm_offset_t) imgp->ip_strings; 4279 imgp->ip_strings = NULL; 4280 break; 4281 } 4282 } 4283 4284 assert(imgp->ip_strings == NULL); 4285 4286 if (execargs_waiters > 0) 4287 needs_wakeup = TRUE; 4288 4289 execargs_lock_unlock(); 4290 4291 if (needs_wakeup == TRUE) 4292 execargs_wakeup_waiters(); 4293 4294 return ((kret == KERN_SUCCESS ? 0 : EINVAL)); 4295} 4296 4297static void 4298exec_resettextvp(proc_t p, struct image_params *imgp) 4299{ 4300 vnode_t vp; 4301 off_t offset; 4302 vnode_t tvp = p->p_textvp; 4303 int ret; 4304 4305 vp = imgp->ip_vp; 4306 offset = imgp->ip_arch_offset; 4307 4308 if (vp == NULLVP) 4309 panic("exec_resettextvp: expected valid vp"); 4310 4311 ret = vnode_ref(vp); 4312 proc_lock(p); 4313 if (ret == 0) { 4314 p->p_textvp = vp; 4315 p->p_textoff = offset; 4316 } else { 4317 p->p_textvp = NULLVP; /* this is paranoia */ 4318 p->p_textoff = 0; 4319 } 4320 proc_unlock(p); 4321 4322 if ( tvp != NULLVP) { 4323 if (vnode_getwithref(tvp) == 0) { 4324 vnode_rele(tvp); 4325 vnode_put(tvp); 4326 } 4327 } 4328 4329} 4330 4331/* 4332 * If the process is not signed or if it contains entitlements, we 4333 * need to communicate through the task_access_port to taskgated. 4334 * 4335 * taskgated will provide a detached code signature if present, and 4336 * will enforce any restrictions on entitlements. 4337 */ 4338 4339static boolean_t 4340taskgated_required(proc_t p, boolean_t *require_success) 4341{ 4342 size_t length; 4343 void *blob; 4344 int error; 4345 4346 if ((p->p_csflags & CS_VALID) == 0) { 4347 *require_success = FALSE; 4348 return TRUE; 4349 } 4350 4351 error = cs_entitlements_blob_get(p, &blob, &length); 4352 if (error == 0 && blob != NULL) { 4353 /* 4354 * fatal on the desktop when entitlements are present, 4355 * unless we started in single-user mode 4356 */ 4357 if ((boothowto & RB_SINGLE) == 0) 4358 *require_success = TRUE; 4359 /* 4360 * Allow initproc to run without causing taskgated to launch 4361 */ 4362 if (p == initproc) { 4363 *require_success = FALSE; 4364 return FALSE; 4365 } 4366 4367 return TRUE; 4368 } 4369 4370 *require_success = FALSE; 4371 return 0; 4372} 4373 4374/* 4375 * __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__ 4376 * 4377 * Description: Waits for the userspace daemon to respond to the request 4378 * we made. Function declared non inline to be visible in 4379 * stackshots and spindumps as well as debugging. 4380 */ 4381__attribute__((noinline)) int 4382__EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(mach_port_t task_access_port, int32_t new_pid) 4383{ 4384 return find_code_signature(task_access_port, new_pid); 4385} 4386 4387static int 4388check_for_signature(proc_t p, struct image_params *imgp) 4389{ 4390 mach_port_t port = NULL; 4391 kern_return_t kr = KERN_FAILURE; 4392 int error = EACCES; 4393 boolean_t unexpected_failure = FALSE; 4394 unsigned char hash[SHA1_RESULTLEN]; 4395 boolean_t require_success = FALSE; 4396 int spawn = (imgp->ip_flags & IMGPF_SPAWN); 4397 int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC); 4398 4399 /* 4400 * Override inherited code signing flags with the 4401 * ones for the process that is being successfully 4402 * loaded 4403 */ 4404 proc_lock(p); 4405 p->p_csflags = imgp->ip_csflags; 4406 proc_unlock(p); 4407 4408 /* Set the switch_protect flag on the map */ 4409 if(p->p_csflags & (CS_HARD|CS_KILL)) { 4410 vm_map_switch_protect(get_task_map(p->task), TRUE); 4411 } 4412 4413 /* 4414 * image activation may be failed due to policy 4415 * which is unexpected but security framework does not 4416 * approve of exec, kill and return immediately. 4417 */ 4418 if (imgp->ip_mac_return != 0) { 4419 error = imgp->ip_mac_return; 4420 unexpected_failure = TRUE; 4421 goto done; 4422 } 4423 4424 /* check if callout to taskgated is needed */ 4425 if (!taskgated_required(p, &require_success)) { 4426 error = 0; 4427 goto done; 4428 } 4429 4430 kr = task_get_task_access_port(p->task, &port); 4431 if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) { 4432 error = 0; 4433 if (require_success) 4434 error = EACCES; 4435 goto done; 4436 } 4437 4438 /* 4439 * taskgated returns KERN_SUCCESS if it has completed its work 4440 * and the exec should continue, KERN_FAILURE if the exec should 4441 * fail, or it may error out with different error code in an 4442 * event of mig failure (e.g. process was signalled during the 4443 * rpc call, taskgated died, mig server died etc.). 4444 */ 4445 4446 kr = __EXEC_WAITING_ON_TASKGATED_CODE_SIGNATURE_UPCALL__(port, p->p_pid); 4447 switch (kr) { 4448 case KERN_SUCCESS: 4449 error = 0; 4450 break; 4451 case KERN_FAILURE: 4452 error = EACCES; 4453 goto done; 4454 default: 4455 error = EACCES; 4456 unexpected_failure = TRUE; 4457 goto done; 4458 } 4459 4460 /* Only do this if exec_resettextvp() did not fail */ 4461 if (p->p_textvp != NULLVP) { 4462 /* 4463 * If there's a new code directory, mark this process 4464 * as signed. 4465 */ 4466 if (0 == ubc_cs_getcdhash(p->p_textvp, p->p_textoff, hash)) { 4467 proc_lock(p); 4468 p->p_csflags |= CS_VALID; 4469 proc_unlock(p); 4470 } 4471 } 4472 4473done: 4474 if (0 != error) { 4475 if (!unexpected_failure) 4476 p->p_csflags |= CS_KILLED; 4477 /* make very sure execution fails */ 4478 if (vfexec || spawn) { 4479 psignal_vfork(p, p->task, imgp->ip_new_thread, SIGKILL); 4480 error = 0; 4481 } else { 4482 psignal(p, SIGKILL); 4483 } 4484 } 4485 return error; 4486} 4487 4488/* 4489 * Typically as soon as we start executing this process, the 4490 * first instruction will trigger a VM fault to bring the text 4491 * pages (as executable) into the address space, followed soon 4492 * thereafter by dyld data structures (for dynamic executable). 4493 * To optimize this, as well as improve support for hardware 4494 * debuggers that can only access resident pages present 4495 * in the process' page tables, we prefault some pages if 4496 * possible. Errors are non-fatal. 4497 */ 4498static void exec_prefault_data(proc_t p __unused, struct image_params *imgp, load_result_t *load_result) 4499{ 4500 int ret; 4501 size_t expected_all_image_infos_size; 4502 4503 /* 4504 * Prefault executable or dyld entry point. 4505 */ 4506 vm_fault(current_map(), 4507 vm_map_trunc_page(load_result->entry_point, 4508 vm_map_page_mask(current_map())), 4509 VM_PROT_READ | VM_PROT_EXECUTE, 4510 FALSE, 4511 THREAD_UNINT, NULL, 0); 4512 4513 if (imgp->ip_flags & IMGPF_IS_64BIT) { 4514 expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos); 4515 } else { 4516 expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos); 4517 } 4518 4519 /* Decode dyld anchor structure from <mach-o/dyld_images.h> */ 4520 if (load_result->dynlinker && 4521 load_result->all_image_info_addr && 4522 load_result->all_image_info_size >= expected_all_image_infos_size) { 4523 union { 4524 struct user64_dyld_all_image_infos infos64; 4525 struct user32_dyld_all_image_infos infos32; 4526 } all_image_infos; 4527 4528 /* 4529 * Pre-fault to avoid copyin() going through the trap handler 4530 * and recovery path. 4531 */ 4532 vm_fault(current_map(), 4533 vm_map_trunc_page(load_result->all_image_info_addr, 4534 vm_map_page_mask(current_map())), 4535 VM_PROT_READ | VM_PROT_WRITE, 4536 FALSE, 4537 THREAD_UNINT, NULL, 0); 4538 if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) { 4539 /* all_image_infos straddles a page */ 4540 vm_fault(current_map(), 4541 vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1, 4542 vm_map_page_mask(current_map())), 4543 VM_PROT_READ | VM_PROT_WRITE, 4544 FALSE, 4545 THREAD_UNINT, NULL, 0); 4546 } 4547 4548 ret = copyin(load_result->all_image_info_addr, 4549 &all_image_infos, 4550 expected_all_image_infos_size); 4551 if (ret == 0 && all_image_infos.infos32.version >= 9) { 4552 4553 user_addr_t notification_address; 4554 user_addr_t dyld_image_address; 4555 user_addr_t dyld_version_address; 4556 user_addr_t dyld_all_image_infos_address; 4557 user_addr_t dyld_slide_amount; 4558 4559 if (imgp->ip_flags & IMGPF_IS_64BIT) { 4560 notification_address = all_image_infos.infos64.notification; 4561 dyld_image_address = all_image_infos.infos64.dyldImageLoadAddress; 4562 dyld_version_address = all_image_infos.infos64.dyldVersion; 4563 dyld_all_image_infos_address = all_image_infos.infos64.dyldAllImageInfosAddress; 4564 } else { 4565 notification_address = all_image_infos.infos32.notification; 4566 dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress; 4567 dyld_version_address = all_image_infos.infos32.dyldVersion; 4568 dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress; 4569 } 4570 4571 /* 4572 * dyld statically sets up the all_image_infos in its Mach-O 4573 * binary at static link time, with pointers relative to its default 4574 * load address. Since ASLR might slide dyld before its first 4575 * instruction is executed, "dyld_slide_amount" tells us how far 4576 * dyld was loaded compared to its default expected load address. 4577 * All other pointers into dyld's image should be adjusted by this 4578 * amount. At some point later, dyld will fix up pointers to take 4579 * into account the slide, at which point the all_image_infos_address 4580 * field in the structure will match the runtime load address, and 4581 * "dyld_slide_amount" will be 0, if we were to consult it again. 4582 */ 4583 4584 dyld_slide_amount = load_result->all_image_info_addr - dyld_all_image_infos_address; 4585 4586#if 0 4587 kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", 4588 (uint64_t)load_result->all_image_info_addr, 4589 all_image_infos.infos32.version, 4590 (uint64_t)notification_address, 4591 (uint64_t)dyld_image_address, 4592 (uint64_t)dyld_version_address, 4593 (uint64_t)dyld_all_image_infos_address); 4594#endif 4595 4596 vm_fault(current_map(), 4597 vm_map_trunc_page(notification_address + dyld_slide_amount, 4598 vm_map_page_mask(current_map())), 4599 VM_PROT_READ | VM_PROT_EXECUTE, 4600 FALSE, 4601 THREAD_UNINT, NULL, 0); 4602 vm_fault(current_map(), 4603 vm_map_trunc_page(dyld_image_address + dyld_slide_amount, 4604 vm_map_page_mask(current_map())), 4605 VM_PROT_READ | VM_PROT_EXECUTE, 4606 FALSE, 4607 THREAD_UNINT, NULL, 0); 4608 vm_fault(current_map(), 4609 vm_map_trunc_page(dyld_version_address + dyld_slide_amount, 4610 vm_map_page_mask(current_map())), 4611 VM_PROT_READ, 4612 FALSE, 4613 THREAD_UNINT, NULL, 0); 4614 vm_fault(current_map(), 4615 vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount, 4616 vm_map_page_mask(current_map())), 4617 VM_PROT_READ | VM_PROT_WRITE, 4618 FALSE, 4619 THREAD_UNINT, NULL, 0); 4620 } 4621 } 4622} 4623