1/*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Mach Operating System
31 * Copyright (c) 1987 Carnegie-Mellon University
32 * All rights reserved.  The CMU software License Agreement specifies
33 * the terms and conditions for use and redistribution.
34 */
35
36#include <cputypes.h>
37
38/*-
39 * Copyright (c) 1982, 1986, 1991, 1993
40 *	The Regents of the University of California.  All rights reserved.
41 * (c) UNIX System Laboratories, Inc.
42 * All or some portions of this file are derived from material licensed
43 * to the University of California by American Telephone and Telegraph
44 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
45 * the permission of UNIX System Laboratories, Inc.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 *    notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 *    notice, this list of conditions and the following disclaimer in the
54 *    documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 *    must display the following acknowledgement:
57 *	This product includes software developed by the University of
58 *	California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 *    may be used to endorse or promote products derived from this software
61 *    without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 *	from: @(#)kern_exec.c	8.1 (Berkeley) 6/10/93
76 */
77/*
78 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
79 * support for mandatory and extensible security protections.  This notice
80 * is included in support of clause 2.2 (b) of the Apple Public License,
81 * Version 2.0.
82 */
83#include <machine/reg.h>
84#include <machine/cpu_capabilities.h>
85
86#include <sys/param.h>
87#include <sys/systm.h>
88#include <sys/filedesc.h>
89#include <sys/kernel.h>
90#include <sys/proc_internal.h>
91#include <sys/kauth.h>
92#include <sys/user.h>
93#include <sys/socketvar.h>
94#include <sys/malloc.h>
95#include <sys/namei.h>
96#include <sys/mount_internal.h>
97#include <sys/vnode_internal.h>
98#include <sys/file_internal.h>
99#include <sys/stat.h>
100#include <sys/uio_internal.h>
101#include <sys/acct.h>
102#include <sys/exec.h>
103#include <sys/kdebug.h>
104#include <sys/signal.h>
105#include <sys/aio_kern.h>
106#include <sys/sysproto.h>
107#if SYSV_SHM
108#include <sys/shm_internal.h>		/* shmexec() */
109#endif
110#include <sys/ubc_internal.h>		/* ubc_map() */
111#include <sys/spawn.h>
112#include <sys/spawn_internal.h>
113#include <sys/process_policy.h>
114#include <sys/codesign.h>
115#include <crypto/sha1.h>
116
117#include <libkern/libkern.h>
118
119#include <security/audit/audit.h>
120
121#include <ipc/ipc_types.h>
122
123#include <mach/mach_types.h>
124#include <mach/port.h>
125#include <mach/task.h>
126#include <mach/task_access.h>
127#include <mach/thread_act.h>
128#include <mach/vm_map.h>
129#include <mach/mach_vm.h>
130#include <mach/vm_param.h>
131
132#include <kern/sched_prim.h> /* thread_wakeup() */
133#include <kern/affinity.h>
134#include <kern/assert.h>
135#include <kern/task.h>
136
137#if CONFIG_MACF
138#include <security/mac.h>
139#include <security/mac_mach_internal.h>
140#endif
141
142#include <vm/vm_map.h>
143#include <vm/vm_kern.h>
144#include <vm/vm_protos.h>
145#include <vm/vm_kern.h>
146#include <vm/vm_fault.h>
147#include <vm/vm_pageout.h>
148
149#include <kdp/kdp_dyld.h>
150
151#include <machine/pal_routines.h>
152
153#include <pexpert/pexpert.h>
154
155#if CONFIG_MEMORYSTATUS
156#include <sys/kern_memorystatus.h>
157#endif
158
159#if CONFIG_DTRACE
160/* Do not include dtrace.h, it redefines kmem_[alloc/free] */
161extern void (*dtrace_fasttrap_exec_ptr)(proc_t);
162extern void (*dtrace_helpers_cleanup)(proc_t);
163extern void dtrace_lazy_dofs_destroy(proc_t);
164
165#include <sys/dtrace_ptss.h>
166#endif
167
168/* support for child creation in exec after vfork */
169thread_t fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit);
170void vfork_exit(proc_t p, int rv);
171int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart);
172extern void proc_apply_task_networkbg_internal(proc_t, thread_t);
173
174/*
175 * Mach things for which prototypes are unavailable from Mach headers
176 */
177void		ipc_task_reset(
178			task_t		task);
179void		ipc_thread_reset(
180			thread_t	thread);
181kern_return_t ipc_object_copyin(
182	ipc_space_t		space,
183	mach_port_name_t	name,
184	mach_msg_type_name_t	msgt_name,
185	ipc_object_t		*objectp);
186void ipc_port_release_send(ipc_port_t);
187
188extern struct savearea *get_user_regs(thread_t);
189
190
191#include <kern/thread.h>
192#include <kern/task.h>
193#include <kern/ast.h>
194#include <kern/mach_loader.h>
195#include <kern/mach_fat.h>
196#include <mach-o/fat.h>
197#include <mach-o/loader.h>
198#include <machine/vmparam.h>
199#include <sys/imgact.h>
200
201#include <sys/sdt.h>
202
203
204/*
205 * EAI_ITERLIMIT	The maximum number of times to iterate an image
206 *			activator in exec_activate_image() before treating
207 *			it as malformed/corrupt.
208 */
209#define EAI_ITERLIMIT		10
210
211/*
212 * For #! interpreter parsing
213 */
214#define IS_WHITESPACE(ch) ((ch == ' ') || (ch == '\t'))
215#define IS_EOL(ch) ((ch == '#') || (ch == '\n'))
216
217extern vm_map_t bsd_pageable_map;
218extern const struct fileops vnops;
219
220#define	ROUND_PTR(type, addr)	\
221	(type *)( ( (uintptr_t)(addr) + 16 - 1) \
222		  & ~(16 - 1) )
223
224struct image_params;	/* Forward */
225static int exec_activate_image(struct image_params *imgp);
226static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
227static int load_return_to_errno(load_return_t lrtn);
228static int execargs_alloc(struct image_params *imgp);
229static int execargs_free(struct image_params *imgp);
230static int exec_check_permissions(struct image_params *imgp);
231static int exec_extract_strings(struct image_params *imgp);
232static int exec_add_apple_strings(struct image_params *imgp);
233static int exec_handle_sugid(struct image_params *imgp);
234static int sugid_scripts = 0;
235SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, "");
236static kern_return_t create_unix_stack(vm_map_t map, load_result_t* load_result, proc_t p);
237static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
238static void exec_resettextvp(proc_t, struct image_params *);
239static int check_for_signature(proc_t, struct image_params *);
240static void exec_prefault_data(proc_t, struct image_params *, load_result_t *);
241static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_portwatch, ipc_port_t * portwatch);
242static errno_t exec_handle_spawnattr_apptype(proc_t p, int psa_apptype);
243
244/*
245 * exec_add_user_string
246 *
247 * Add the requested string to the string space area.
248 *
249 * Parameters;	struct image_params *		image parameter block
250 *		user_addr_t			string to add to strings area
251 *		int				segment from which string comes
252 *		boolean_t			TRUE if string contributes to NCARGS
253 *
254 * Returns:	0			Success
255 *		!0			Failure errno from copyinstr()
256 *
257 * Implicit returns:
258 *		(imgp->ip_strendp)	updated location of next add, if any
259 *		(imgp->ip_strspace)	updated byte count of space remaining
260 *		(imgp->ip_argspace) updated byte count of space in NCARGS
261 */
262static int
263exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs)
264{
265	int error = 0;
266
267	do {
268		size_t len = 0;
269		int space;
270
271		if (is_ncargs)
272			space = imgp->ip_argspace; /* by definition smaller than ip_strspace */
273		else
274			space = imgp->ip_strspace;
275
276		if (space <= 0) {
277			error = E2BIG;
278			break;
279		}
280
281		if (!UIO_SEG_IS_USER_SPACE(seg)) {
282			char *kstr = CAST_DOWN(char *,str);	/* SAFE */
283			error = copystr(kstr, imgp->ip_strendp, space, &len);
284		} else  {
285			error = copyinstr(str, imgp->ip_strendp, space, &len);
286		}
287
288		imgp->ip_strendp += len;
289		imgp->ip_strspace -= len;
290		if (is_ncargs)
291			imgp->ip_argspace -= len;
292
293	} while (error == ENAMETOOLONG);
294
295	return error;
296}
297
298/*
299 * exec_save_path
300 *
301 * To support new app package launching for Mac OS X, the dyld needs the
302 * first argument to execve() stored on the user stack.
303 *
304 * Save the executable path name at the bottom of the strings area and set
305 * the argument vector pointer to the location following that to indicate
306 * the start of the argument and environment tuples, setting the remaining
307 * string space count to the size of the string area minus the path length.
308 *
309 * Parameters;	struct image_params *		image parameter block
310 *		char *				path used to invoke program
311 *		int				segment from which path comes
312 *
313 * Returns:	int			0	Success
314 *		EFAULT				Bad address
315 *	copy[in]str:EFAULT			Bad address
316 *	copy[in]str:ENAMETOOLONG		Filename too long
317 *
318 * Implicit returns:
319 *		(imgp->ip_strings)		saved path
320 *		(imgp->ip_strspace)		space remaining in ip_strings
321 *		(imgp->ip_strendp)		start of remaining copy area
322 *		(imgp->ip_argspace)		space remaining of NCARGS
323 *		(imgp->ip_applec)		Initial applev[0]
324 *
325 * Note:	We have to do this before the initial namei() since in the
326 *		path contains symbolic links, namei() will overwrite the
327 *		original path buffer contents.  If the last symbolic link
328 *		resolved was a relative pathname, we would lose the original
329 *		"path", which could be an absolute pathname. This might be
330 *		unacceptable for dyld.
331 */
332static int
333exec_save_path(struct image_params *imgp, user_addr_t path, int seg)
334{
335	int error;
336	size_t	len;
337	char *kpath;
338
339	len = MIN(MAXPATHLEN, imgp->ip_strspace);
340
341	switch(seg) {
342	case UIO_USERSPACE32:
343	case UIO_USERSPACE64:	/* Same for copyin()... */
344		error = copyinstr(path, imgp->ip_strings, len, &len);
345		break;
346	case UIO_SYSSPACE:
347		kpath = CAST_DOWN(char *,path);	/* SAFE */
348		error = copystr(kpath, imgp->ip_strings, len, &len);
349		break;
350	default:
351		error = EFAULT;
352		break;
353	}
354
355	if (!error) {
356		imgp->ip_strendp += len;
357		imgp->ip_strspace -= len;
358	}
359
360	return(error);
361}
362
363/*
364 * exec_reset_save_path
365 *
366 * If we detect a shell script, we need to reset the string area
367 * state so that the interpreter can be saved onto the stack.
368
369 * Parameters;	struct image_params *		image parameter block
370 *
371 * Returns:	int			0	Success
372 *
373 * Implicit returns:
374 *		(imgp->ip_strings)		saved path
375 *		(imgp->ip_strspace)		space remaining in ip_strings
376 *		(imgp->ip_strendp)		start of remaining copy area
377 *		(imgp->ip_argspace)		space remaining of NCARGS
378 *
379 */
380static int
381exec_reset_save_path(struct image_params *imgp)
382{
383	imgp->ip_strendp = imgp->ip_strings;
384	imgp->ip_argspace = NCARGS;
385	imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
386
387	return (0);
388}
389
390/*
391 * exec_shell_imgact
392 *
393 * Image activator for interpreter scripts.  If the image begins with the
394 * characters "#!", then it is an interpreter script.  Verify that we are
395 * not already executing in PowerPC mode, and that the length of the script
396 * line indicating the interpreter is not in excess of the maximum allowed
397 * size.  If this is the case, then break out the arguments, if any, which
398 * are separated by white space, and copy them into the argument save area
399 * as if they were provided on the command line before all other arguments.
400 * The line ends when we encounter a comment character ('#') or newline.
401 *
402 * Parameters;	struct image_params *	image parameter block
403 *
404 * Returns:	-1			not an interpreter (keep looking)
405 *		-3			Success: interpreter: relookup
406 *		>0			Failure: interpreter: error number
407 *
408 * A return value other than -1 indicates subsequent image activators should
409 * not be given the opportunity to attempt to activate the image.
410 */
411static int
412exec_shell_imgact(struct image_params *imgp)
413{
414	char *vdata = imgp->ip_vdata;
415	char *ihp;
416	char *line_startp, *line_endp;
417	char *interp;
418	proc_t p;
419	struct fileproc *fp;
420	int fd;
421	int error;
422
423	/*
424	 * Make sure it's a shell script.  If we've already redirected
425	 * from an interpreted file once, don't do it again.
426	 *
427	 * Note: We disallow PowerPC, since the expectation is that we
428	 * may run a PowerPC interpreter, but not an interpret a PowerPC
429	 * image.  This is consistent with historical behaviour.
430	 */
431	if (vdata[0] != '#' ||
432	    vdata[1] != '!' ||
433	    (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
434		return (-1);
435	}
436
437	imgp->ip_flags |= IMGPF_INTERPRET;
438	imgp->ip_interp_sugid_fd = -1;
439	imgp->ip_interp_buffer[0] = '\0';
440
441	/* Check to see if SUGID scripts are permitted.  If they aren't then
442	 * clear the SUGID bits.
443	 * imgp->ip_vattr is known to be valid.
444	 */
445	if (sugid_scripts == 0) {
446		imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
447	}
448
449	/* Try to find the first non-whitespace character */
450	for( ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
451		if (IS_EOL(*ihp)) {
452			/* Did not find interpreter, "#!\n" */
453			return (ENOEXEC);
454		} else if (IS_WHITESPACE(*ihp)) {
455			/* Whitespace, like "#!    /bin/sh\n", keep going. */
456		} else {
457			/* Found start of interpreter */
458			break;
459		}
460	}
461
462	if (ihp == &vdata[IMG_SHSIZE]) {
463		/* All whitespace, like "#!           " */
464		return (ENOEXEC);
465	}
466
467	line_startp = ihp;
468
469	/* Try to find the end of the interpreter+args string */
470	for ( ; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
471		if (IS_EOL(*ihp)) {
472			/* Got it */
473			break;
474		} else {
475			/* Still part of interpreter or args */
476		}
477	}
478
479	if (ihp == &vdata[IMG_SHSIZE]) {
480		/* A long line, like "#! blah blah blah" without end */
481		return (ENOEXEC);
482	}
483
484	/* Backtrack until we find the last non-whitespace */
485	while (IS_EOL(*ihp) || IS_WHITESPACE(*ihp)) {
486		ihp--;
487	}
488
489	/* The character after the last non-whitespace is our logical end of line */
490	line_endp = ihp + 1;
491
492	/*
493	 * Now we have pointers to the usable part of:
494	 *
495	 * "#!  /usr/bin/int first    second   third    \n"
496	 *      ^ line_startp                       ^ line_endp
497	 */
498
499	/* copy the interpreter name */
500	interp = imgp->ip_interp_buffer;
501	for ( ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++)
502		*interp++ = *ihp;
503	*interp = '\0';
504
505	exec_reset_save_path(imgp);
506	exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
507							UIO_SYSSPACE);
508
509	/* Copy the entire interpreter + args for later processing into argv[] */
510	interp = imgp->ip_interp_buffer;
511	for ( ihp = line_startp; (ihp < line_endp); ihp++)
512		*interp++ = *ihp;
513	*interp = '\0';
514
515	/*
516	 * If we have a SUID oder SGID script, create a file descriptor
517	 * from the vnode and pass /dev/fd/%d instead of the actual
518	 * path name so that the script does not get opened twice
519	 */
520	if (imgp->ip_origvattr->va_mode & (VSUID | VSGID)) {
521		p = vfs_context_proc(imgp->ip_vfs_context);
522		error = falloc(p, &fp, &fd, imgp->ip_vfs_context);
523		if (error)
524			return(error);
525
526		fp->f_fglob->fg_flag = FREAD;
527		fp->f_fglob->fg_ops = &vnops;
528		fp->f_fglob->fg_data = (caddr_t)imgp->ip_vp;
529
530		proc_fdlock(p);
531		procfdtbl_releasefd(p, fd, NULL);
532		fp_drop(p, fd, fp, 1);
533		proc_fdunlock(p);
534		vnode_ref(imgp->ip_vp);
535
536		imgp->ip_interp_sugid_fd = fd;
537	}
538
539	return (-3);
540}
541
542
543
544/*
545 * exec_fat_imgact
546 *
547 * Image activator for fat 1.0 binaries.  If the binary is fat, then we
548 * need to select an image from it internally, and make that the image
549 * we are going to attempt to execute.  At present, this consists of
550 * reloading the first page for the image with a first page from the
551 * offset location indicated by the fat header.
552 *
553 * Parameters;	struct image_params *	image parameter block
554 *
555 * Returns:	-1			not a fat binary (keep looking)
556 *		-2			Success: encapsulated binary: reread
557 *		>0			Failure: error number
558 *
559 * Important:	This image activator is byte order neutral.
560 *
561 * Note:	A return value other than -1 indicates subsequent image
562 *		activators should not be given the opportunity to attempt
563 *		to activate the image.
564 *
565 * 		If we find an encapsulated binary, we make no assertions
566 *		about its  validity; instead, we leave that up to a rescan
567 *		for an activator to claim it, and, if it is claimed by one,
568 *		that activator is responsible for determining validity.
569 */
570static int
571exec_fat_imgact(struct image_params *imgp)
572{
573	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
574	kauth_cred_t cred = kauth_cred_proc_ref(p);
575	struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
576	struct _posix_spawnattr *psa = NULL;
577	struct fat_arch fat_arch;
578	int resid, error;
579	load_return_t lret;
580
581	/* Make sure it's a fat binary */
582	if ((fat_header->magic != FAT_MAGIC) &&
583            (fat_header->magic != FAT_CIGAM)) {
584	    	error = -1;
585		goto bad;
586	}
587
588	/* If posix_spawn binprefs exist, respect those prefs. */
589	psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
590	if (psa != NULL && psa->psa_binprefs[0] != 0) {
591		struct fat_arch *arches = (struct fat_arch *) (fat_header + 1);
592		int nfat_arch = 0, pr = 0, f = 0;
593
594		nfat_arch = OSSwapBigToHostInt32(fat_header->nfat_arch);
595		/* Check each preference listed against all arches in header */
596		for (pr = 0; pr < NBINPREFS; pr++) {
597			cpu_type_t pref = psa->psa_binprefs[pr];
598			if (pref == 0) {
599				/* No suitable arch in the pref list */
600				error = EBADARCH;
601				goto bad;
602			}
603
604			if (pref == CPU_TYPE_ANY) {
605				/* Fall through to regular grading */
606				break;
607			}
608
609			for (f = 0; f < nfat_arch; f++) {
610				cpu_type_t archtype = OSSwapBigToHostInt32(
611						arches[f].cputype);
612				cpu_type_t archsubtype = OSSwapBigToHostInt32(
613						arches[f].cpusubtype) & ~CPU_SUBTYPE_MASK;
614				if (pref == archtype &&
615					grade_binary(archtype, archsubtype)) {
616					/* We have a winner! */
617					fat_arch.cputype = archtype;
618					fat_arch.cpusubtype = archsubtype;
619					fat_arch.offset = OSSwapBigToHostInt32(
620							arches[f].offset);
621					fat_arch.size = OSSwapBigToHostInt32(
622							arches[f].size);
623					fat_arch.align = OSSwapBigToHostInt32(
624							arches[f].align);
625					goto use_arch;
626				}
627			}
628		}
629	}
630
631	/* Look up our preferred architecture in the fat file. */
632	lret = fatfile_getarch_affinity(imgp->ip_vp,
633					(vm_offset_t)fat_header,
634					&fat_arch,
635					(p->p_flag & P_AFFINITY));
636	if (lret != LOAD_SUCCESS) {
637		error = load_return_to_errno(lret);
638		goto bad;
639	}
640
641use_arch:
642	/* Read the Mach-O header out of fat_arch */
643	error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
644			PAGE_SIZE, fat_arch.offset,
645			UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED),
646			cred, &resid, p);
647	if (error) {
648		goto bad;
649	}
650
651	/* Did we read a complete header? */
652	if (resid) {
653		error = EBADEXEC;
654		goto bad;
655	}
656
657	/* Success.  Indicate we have identified an encapsulated binary */
658	error = -2;
659	imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
660	imgp->ip_arch_size = (user_size_t)fat_arch.size;
661
662bad:
663	kauth_cred_unref(&cred);
664	return (error);
665}
666
667/*
668 * exec_mach_imgact
669 *
670 * Image activator for mach-o 1.0 binaries.
671 *
672 * Parameters;	struct image_params *	image parameter block
673 *
674 * Returns:	-1			not a fat binary (keep looking)
675 *		-2			Success: encapsulated binary: reread
676 *		>0			Failure: error number
677 *		EBADARCH		Mach-o binary, but with an unrecognized
678 *					architecture
679 *		ENOMEM			No memory for child process after -
680 *					can only happen after vfork()
681 *
682 * Important:	This image activator is NOT byte order neutral.
683 *
684 * Note:	A return value other than -1 indicates subsequent image
685 *		activators should not be given the opportunity to attempt
686 *		to activate the image.
687 *
688 * TODO:	More gracefully handle failures after vfork
689 */
690static int
691exec_mach_imgact(struct image_params *imgp)
692{
693	struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
694	proc_t			p = vfs_context_proc(imgp->ip_vfs_context);
695	int			error = 0;
696	int			vfexec = 0;
697	task_t			task;
698	task_t			new_task = NULL; /* protected by vfexec */
699	thread_t		thread;
700	struct uthread		*uthread;
701	vm_map_t old_map = VM_MAP_NULL;
702	vm_map_t map;
703	load_return_t		lret;
704	load_result_t		load_result;
705	struct _posix_spawnattr *psa = NULL;
706	int spawn = (imgp->ip_flags & IMGPF_SPAWN);
707
708	/*
709	 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
710	 * is a reserved field on the end, so for the most part, we can
711	 * treat them as if they were identical. Reverse-endian Mach-O
712	 * binaries are recognized but not compatible.
713 	 */
714	if ((mach_header->magic == MH_CIGAM) ||
715	    (mach_header->magic == MH_CIGAM_64)) {
716		error = EBADARCH;
717		goto bad;
718	}
719
720	if ((mach_header->magic != MH_MAGIC) &&
721	    (mach_header->magic != MH_MAGIC_64)) {
722		error = -1;
723		goto bad;
724	}
725
726	switch (mach_header->filetype) {
727	case MH_DYLIB:
728	case MH_BUNDLE:
729		error = -1;
730		goto bad;
731	}
732
733	if (!imgp->ip_origcputype) {
734		imgp->ip_origcputype = mach_header->cputype;
735		imgp->ip_origcpusubtype = mach_header->cpusubtype;
736	}
737
738	task = current_task();
739	thread = current_thread();
740	uthread = get_bsdthread_info(thread);
741
742	/*
743	 * Save off the vfexec state up front; we have to do this, because
744	 * we need to know if we were in this state initially subsequent to
745	 * creating the backing task, thread, and uthread for the child
746	 * process (from the vfs_context_t from in img_parms).
747	 */
748	if (uthread->uu_flag & UT_VFORK)
749		vfexec = 1;	 /* Mark in exec */
750
751	if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
752		imgp->ip_flags |= IMGPF_IS_64BIT;
753
754	/* If posix_spawn binprefs exist, respect those prefs. */
755	psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
756	if (psa != NULL && psa->psa_binprefs[0] != 0) {
757		int pr = 0;
758		for (pr = 0; pr < NBINPREFS; pr++) {
759			cpu_type_t pref = psa->psa_binprefs[pr];
760			if (pref == 0) {
761				/* No suitable arch in the pref list */
762				error = EBADARCH;
763				goto bad;
764			}
765
766			if (pref == CPU_TYPE_ANY) {
767				/* Jump to regular grading */
768				goto grade;
769			}
770
771			if (pref == imgp->ip_origcputype) {
772				/* We have a match! */
773				goto grade;
774			}
775		}
776		error = EBADARCH;
777		goto bad;
778	}
779grade:
780	if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) {
781		error = EBADARCH;
782		goto bad;
783	}
784
785	/* Copy in arguments/environment from the old process */
786	error = exec_extract_strings(imgp);
787	if (error)
788		goto bad;
789
790	error = exec_add_apple_strings(imgp);
791	if (error)
792		goto bad;
793
794	AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
795	    imgp->ip_endargv - imgp->ip_startargv);
796	AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
797	    imgp->ip_endenvv - imgp->ip_endargv);
798
799	/*
800	 * We are being called to activate an image subsequent to a vfork()
801	 * operation; in this case, we know that our task, thread, and
802	 * uthread are actually those of our parent, and our proc, which we
803	 * obtained indirectly from the image_params vfs_context_t, is the
804	 * new child process.
805	 */
806	if (vfexec || spawn) {
807		if (vfexec) {
808			imgp->ip_new_thread = fork_create_child(task, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT));
809			if (imgp->ip_new_thread == NULL) {
810				error = ENOMEM;
811				goto bad;
812			}
813		}
814
815		/* reset local idea of thread, uthread, task */
816		thread = imgp->ip_new_thread;
817		uthread = get_bsdthread_info(thread);
818		task = new_task = get_threadtask(thread);
819		map = get_task_map(task);
820	} else {
821		map = VM_MAP_NULL;
822	}
823
824	/*
825	 * We set these flags here; this is OK, since if we fail after
826	 * this point, we have already destroyed the parent process anyway.
827	 */
828	task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0);
829	if (imgp->ip_flags & IMGPF_IS_64BIT) {
830		task_set_64bit(task, TRUE);
831		OSBitOrAtomic(P_LP64, &p->p_flag);
832	} else {
833		task_set_64bit(task, FALSE);
834		OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
835	}
836
837	/*
838	 *	Load the Mach-O file.
839	 *
840	 * NOTE: An error after this point  indicates we have potentially
841	 * destroyed or overwritten some process state while attempting an
842	 * execve() following a vfork(), which is an unrecoverable condition.
843	 */
844
845	/*
846	 * Actually load the image file we previously decided to load.
847	 */
848	lret = load_machfile(imgp, mach_header, thread, map, &load_result);
849
850	if (lret != LOAD_SUCCESS) {
851		error = load_return_to_errno(lret);
852		goto badtoolate;
853	}
854
855	vm_map_set_user_wire_limit(get_task_map(task), p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
856
857	/*
858	 * Set code-signing flags if this binary is signed, or if parent has
859	 * requested them on exec.
860	 */
861	if (load_result.csflags & CS_VALID) {
862		imgp->ip_csflags |= load_result.csflags &
863			(CS_VALID|
864			 CS_HARD|CS_KILL|CS_ENFORCEMENT|
865			 CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT);
866	} else {
867		imgp->ip_csflags &= ~CS_VALID;
868	}
869
870	if (p->p_csflags & CS_EXEC_SET_HARD)
871		imgp->ip_csflags |= CS_HARD;
872	if (p->p_csflags & CS_EXEC_SET_KILL)
873		imgp->ip_csflags |= CS_KILL;
874	if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT)
875		imgp->ip_csflags |= CS_ENFORCEMENT;
876
877
878	/*
879	 * Set up the system reserved areas in the new address space.
880	 */
881	vm_map_exec(get_task_map(task),
882		    task,
883		    (void *) p->p_fd->fd_rdir,
884		    cpu_type());
885
886	/*
887	 * Close file descriptors which specify close-on-exec.
888	 */
889	fdexec(p, psa != NULL ? psa->psa_flags : 0);
890
891	/*
892	 * deal with set[ug]id.
893	 */
894	error = exec_handle_sugid(imgp);
895
896	/* Make sure we won't interrupt ourself signalling a partial process */
897	if (!vfexec && !spawn && (p->p_lflag & P_LTRACED))
898		psignal(p, SIGTRAP);
899
900	if (error) {
901		goto badtoolate;
902	}
903
904	if (load_result.unixproc &&
905		create_unix_stack(get_task_map(task),
906				  &load_result,
907				  p) != KERN_SUCCESS) {
908		error = load_return_to_errno(LOAD_NOSPACE);
909		goto badtoolate;
910	}
911
912	if (vfexec || spawn) {
913		old_map = vm_map_switch(get_task_map(task));
914	}
915
916	if (load_result.unixproc) {
917		user_addr_t	ap;
918
919		/*
920		 * Copy the strings area out into the new process address
921		 * space.
922		 */
923		ap = p->user_stack;
924		error = exec_copyout_strings(imgp, &ap);
925		if (error) {
926			if (vfexec || spawn)
927				vm_map_switch(old_map);
928			goto badtoolate;
929		}
930		/* Set the stack */
931		thread_setuserstack(thread, ap);
932	}
933
934	if (load_result.dynlinker) {
935		uint64_t	ap;
936		int			new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
937
938		/* Adjust the stack */
939		ap = thread_adjuserstack(thread, -new_ptr_size);
940		error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
941
942		if (error) {
943		        if (vfexec || spawn)
944			        vm_map_switch(old_map);
945			goto badtoolate;
946		}
947		task_set_dyld_info(task, load_result.all_image_info_addr,
948		    load_result.all_image_info_size);
949	}
950
951	/* Avoid immediate VM faults back into kernel */
952	exec_prefault_data(p, imgp, &load_result);
953
954	if (vfexec || spawn) {
955		vm_map_switch(old_map);
956	}
957	/* Set the entry point */
958	thread_setentrypoint(thread, load_result.entry_point);
959
960	/* Stop profiling */
961	stopprofclock(p);
962
963	/*
964	 * Reset signal state.
965	 */
966	execsigs(p, thread);
967
968	/*
969	 * need to cancel async IO requests that can be cancelled and wait for those
970	 * already active.  MAY BLOCK!
971	 */
972	_aio_exec( p );
973
974#if SYSV_SHM
975	/* FIXME: Till vmspace inherit is fixed: */
976	if (!vfexec && p->vm_shm)
977		shmexec(p);
978#endif
979#if SYSV_SEM
980	/* Clean up the semaphores */
981	semexit(p);
982#endif
983
984	/*
985	 * Remember file name for accounting.
986	 */
987	p->p_acflag &= ~AFORK;
988	/* If the translated name isn't NULL, then we want to use
989	 * that translated name as the name we show as the "real" name.
990	 * Otherwise, use the name passed into exec.
991	 */
992	if (0 != imgp->ip_p_comm[0]) {
993		bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm,
994			sizeof(p->p_comm));
995	} else {
996		if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
997			imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
998		bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
999			(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
1000		p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
1001	}
1002
1003	pal_dbg_set_task_name( p->task );
1004
1005	memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
1006
1007// <rdar://6598155> dtrace code cleanup needed
1008#if CONFIG_DTRACE
1009	/*
1010	 * Invalidate any predicate evaluation already cached for this thread by DTrace.
1011	 * That's because we've just stored to p_comm and DTrace refers to that when it
1012	 * evaluates the "execname" special variable. uid and gid may have changed as well.
1013	 */
1014	dtrace_set_thread_predcache(current_thread(), 0);
1015
1016	/*
1017	 * Free any outstanding lazy dof entries. It is imperative we
1018	 * always call dtrace_lazy_dofs_destroy, rather than null check
1019	 * and call if !NULL. If we NULL test, during lazy dof faulting
1020	 * we can race with the faulting code and proceed from here to
1021	 * beyond the helpers cleanup. The lazy dof faulting will then
1022	 * install new helpers which no longer belong to this process!
1023	 */
1024	dtrace_lazy_dofs_destroy(p);
1025
1026
1027	/*
1028    	 * Clean up any DTrace helpers for the process.
1029    	 */
1030    	if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) {
1031    		(*dtrace_helpers_cleanup)(p);
1032    	}
1033
1034    	/*
1035    	 * Cleanup the DTrace provider associated with this process.
1036    	 */
1037	proc_lock(p);
1038	if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) {
1039    		(*dtrace_fasttrap_exec_ptr)(p);
1040    	}
1041	proc_unlock(p);
1042#endif
1043
1044	if (kdebug_enable) {
1045		long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
1046
1047		/*
1048		 * Collect the pathname for tracing
1049		 */
1050		kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
1051
1052		if (vfexec || spawn) {
1053			KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
1054					p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread));
1055			KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
1056					dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread));
1057		} else {
1058			KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
1059					p->p_pid ,0,0,0,0);
1060			KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
1061					dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
1062		}
1063	}
1064
1065	/*
1066	 * Ensure the 'translated' and 'affinity' flags are cleared, since we
1067	 * no longer run PowerPC binaries.
1068	 */
1069	OSBitAndAtomic(~((uint32_t)(P_TRANSLATED | P_AFFINITY)), &p->p_flag);
1070
1071	/*
1072	 * If posix_spawned with the START_SUSPENDED flag, stop the
1073	 * process before it runs.
1074	 */
1075	if (imgp->ip_px_sa != NULL) {
1076		psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1077		if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
1078			proc_lock(p);
1079			p->p_stat = SSTOP;
1080			proc_unlock(p);
1081			(void) task_suspend(p->task);
1082		}
1083	}
1084
1085	/*
1086	 * Apply the apptype state (which primes the task for importance donation)
1087	 * This must be done after the exec so that the child's thread is ready
1088	 */
1089	if (imgp->ip_px_sa != NULL) {
1090		psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1091		exec_handle_spawnattr_apptype(p, psa->psa_apptype);
1092	}
1093
1094	/*
1095	 * mark as execed, wakeup the process that vforked (if any) and tell
1096	 * it that it now has its own resources back
1097	 */
1098	OSBitOrAtomic(P_EXEC, &p->p_flag);
1099	proc_resetregister(p);
1100	if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
1101		proc_lock(p);
1102		p->p_lflag &= ~P_LPPWAIT;
1103		proc_unlock(p);
1104		wakeup((caddr_t)p->p_pptr);
1105	}
1106
1107	/*
1108	 * Pay for our earlier safety; deliver the delayed signals from
1109	 * the incomplete vfexec process now that it's complete.
1110	 */
1111	if (vfexec && (p->p_lflag & P_LTRACED)) {
1112		psignal_vfork(p, new_task, thread, SIGTRAP);
1113	}
1114
1115badtoolate:
1116if (!spawn)
1117	/* notify only if it has not failed due to FP Key error */
1118	if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
1119		proc_knote(p, NOTE_EXEC);
1120
1121	if (vfexec || spawn) {
1122		task_deallocate(new_task);
1123		thread_deallocate(thread);
1124		if (error)
1125			error = 0;
1126	}
1127
1128bad:
1129	return(error);
1130}
1131
1132
1133
1134
1135/*
1136 * Our image activator table; this is the table of the image types we are
1137 * capable of loading.  We list them in order of preference to ensure the
1138 * fastest image load speed.
1139 *
1140 * XXX hardcoded, for now; should use linker sets
1141 */
1142struct execsw {
1143	int (*ex_imgact)(struct image_params *);
1144	const char *ex_name;
1145} execsw[] = {
1146	{ exec_mach_imgact,		"Mach-o Binary" },
1147	{ exec_fat_imgact,		"Fat Binary" },
1148	{ exec_shell_imgact,		"Interpreter Script" },
1149	{ NULL, NULL}
1150};
1151
1152
1153/*
1154 * exec_activate_image
1155 *
1156 * Description:	Iterate through the available image activators, and activate
1157 *		the image associated with the imgp structure.  We start with
1158 *		the
1159 *
1160 * Parameters:	struct image_params *	Image parameter block
1161 *
1162 * Returns:	0			Success
1163 *		EBADEXEC		The executable is corrupt/unknown
1164 *	execargs_alloc:EINVAL		Invalid argument
1165 *	execargs_alloc:EACCES		Permission denied
1166 *	execargs_alloc:EINTR		Interrupted function
1167 *	execargs_alloc:ENOMEM		Not enough space
1168 *	exec_save_path:EFAULT		Bad address
1169 *	exec_save_path:ENAMETOOLONG	Filename too long
1170 *	exec_check_permissions:EACCES	Permission denied
1171 *	exec_check_permissions:ENOEXEC	Executable file format error
1172 *	exec_check_permissions:ETXTBSY	Text file busy [misuse of error code]
1173 *	exec_check_permissions:???
1174 *	namei:???
1175 *	vn_rdwr:???			[anything vn_rdwr can return]
1176 *	<ex_imgact>:???			[anything an imgact can return]
1177 */
1178static int
1179exec_activate_image(struct image_params *imgp)
1180{
1181	struct nameidata nd;
1182	int error;
1183	int resid;
1184	int once = 1;	/* save SGUID-ness for interpreted files */
1185	int i;
1186	int iterlimit = EAI_ITERLIMIT;
1187	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1188
1189	error = execargs_alloc(imgp);
1190	if (error)
1191		goto bad_notrans;
1192
1193	error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg);
1194	if (error) {
1195		goto bad_notrans;
1196	}
1197
1198	/* Use imgp->ip_strings, which contains the copyin-ed exec path */
1199	DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings);
1200
1201	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
1202		   UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
1203
1204again:
1205	error = namei(&nd);
1206	if (error)
1207		goto bad_notrans;
1208	imgp->ip_ndp = &nd;	/* successful namei(); call nameidone() later */
1209	imgp->ip_vp = nd.ni_vp;	/* if set, need to vnode_put() at some point */
1210
1211	/*
1212	 * Before we start the transition from binary A to binary B, make
1213	 * sure another thread hasn't started exiting the process.  We grab
1214	 * the proc lock to check p_lflag initially, and the transition
1215	 * mechanism ensures that the value doesn't change after we release
1216	 * the lock.
1217	 */
1218	proc_lock(p);
1219	if (p->p_lflag & P_LEXIT) {
1220		proc_unlock(p);
1221		goto bad_notrans;
1222	}
1223	error = proc_transstart(p, 1);
1224	proc_unlock(p);
1225	if (error)
1226		goto bad_notrans;
1227
1228	error = exec_check_permissions(imgp);
1229	if (error)
1230		goto bad;
1231
1232	/* Copy; avoid invocation of an interpreter overwriting the original */
1233	if (once) {
1234		once = 0;
1235		*imgp->ip_origvattr = *imgp->ip_vattr;
1236	}
1237
1238	error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
1239			UIO_SYSSPACE, IO_NODELOCKED,
1240			vfs_context_ucred(imgp->ip_vfs_context),
1241			&resid, vfs_context_proc(imgp->ip_vfs_context));
1242	if (error)
1243		goto bad;
1244
1245encapsulated_binary:
1246	/* Limit the number of iterations we will attempt on each binary */
1247	if (--iterlimit == 0) {
1248		error = EBADEXEC;
1249		goto bad;
1250	}
1251	error = -1;
1252	for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
1253
1254		error = (*execsw[i].ex_imgact)(imgp);
1255
1256		switch (error) {
1257		/* case -1: not claimed: continue */
1258		case -2:		/* Encapsulated binary */
1259			goto encapsulated_binary;
1260
1261		case -3:		/* Interpreter */
1262#if CONFIG_MACF
1263			/*
1264			 * Copy the script label for later use. Note that
1265			 * the label can be different when the script is
1266			 * actually read by the interpreter.
1267			 */
1268			if (imgp->ip_scriptlabelp)
1269				mac_vnode_label_free(imgp->ip_scriptlabelp);
1270			imgp->ip_scriptlabelp = mac_vnode_label_alloc();
1271			if (imgp->ip_scriptlabelp == NULL) {
1272				error = ENOMEM;
1273				break;
1274			}
1275			mac_vnode_label_copy(imgp->ip_vp->v_label,
1276					     imgp->ip_scriptlabelp);
1277
1278			/*
1279			 * Take a ref of the script vnode for later use.
1280			 */
1281			if (imgp->ip_scriptvp)
1282				vnode_put(imgp->ip_scriptvp);
1283			if (vnode_getwithref(imgp->ip_vp) == 0)
1284				imgp->ip_scriptvp = imgp->ip_vp;
1285#endif
1286
1287			nameidone(&nd);
1288
1289			vnode_put(imgp->ip_vp);
1290			imgp->ip_vp = NULL;	/* already put */
1291			imgp->ip_ndp = NULL; /* already nameidone */
1292
1293			/* Use imgp->ip_strings, which exec_shell_imgact reset to the interpreter */
1294			NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
1295				   UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
1296
1297			proc_transend(p, 0);
1298			goto again;
1299
1300		default:
1301			break;
1302		}
1303	}
1304
1305	/*
1306	 * Call out to allow 3rd party notification of exec.
1307	 * Ignore result of kauth_authorize_fileop call.
1308	 */
1309	if (error == 0 && kauth_authorize_fileop_has_listeners()) {
1310		kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
1311					KAUTH_FILEOP_EXEC,
1312					(uintptr_t)nd.ni_vp, 0);
1313	}
1314
1315bad:
1316	proc_transend(p, 0);
1317
1318bad_notrans:
1319	if (imgp->ip_strings)
1320		execargs_free(imgp);
1321	if (imgp->ip_ndp)
1322		nameidone(imgp->ip_ndp);
1323
1324	return (error);
1325}
1326
1327
1328/*
1329 * exec_handle_spawnattr_apptype
1330 *
1331 * Description: Decode and apply the posix_spawn apptype to the task.
1332 *
1333 * Parameters:  proc_t p                process to apply attributes to
1334 *              int psa_apptype         posix spawn attribute apptype
1335 *
1336 * Returns:     0                       Success
1337 */
1338static errno_t
1339exec_handle_spawnattr_apptype(proc_t p, int psa_apptype)
1340{
1341	if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) {
1342		int apptype = TASK_APPTYPE_NONE;
1343		int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK;
1344
1345		switch(proctype) {
1346			case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE:
1347				apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
1348				break;
1349			case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD:
1350				apptype = TASK_APPTYPE_DAEMON_STANDARD;
1351				break;
1352			case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE:
1353				apptype = TASK_APPTYPE_DAEMON_ADAPTIVE;
1354				break;
1355			case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND:
1356				apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
1357				break;
1358			case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT:
1359				apptype = TASK_APPTYPE_APP_DEFAULT;
1360				break;
1361			case POSIX_SPAWN_PROC_TYPE_APP_TAL:
1362				apptype = TASK_APPTYPE_APP_TAL;
1363				break;
1364			default:
1365				apptype = TASK_APPTYPE_NONE;
1366				break;
1367		}
1368
1369		proc_set_task_apptype(p->task, apptype);
1370
1371		/* TODO: Should an invalid value here fail the spawn? */
1372		return (0);
1373	}
1374
1375	return (0);
1376}
1377
1378
1379/*
1380 * exec_handle_port_actions
1381 *
1382 * Description:	Go through the _posix_port_actions_t contents,
1383 * 		calling task_set_special_port, task_set_exception_ports
1384 * 		and/or audit_session_spawnjoin for the current task.
1385 *
1386 * Parameters:	struct image_params *	Image parameter block
1387 * 		short psa_flags		posix spawn attribute flags
1388 *
1389 * Returns:	0			Success
1390 * 		EINVAL			Failure
1391 * 		ENOTSUP			Illegal posix_spawn attr flag was set
1392 */
1393static errno_t
1394exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_portwatch, ipc_port_t * portwatch_ports)
1395{
1396	_posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
1397	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1398	_ps_port_action_t *act = NULL;
1399	task_t task = p->task;
1400	ipc_port_t port = NULL;
1401	errno_t ret = 0;
1402	int i;
1403
1404	if (need_portwatch != NULL)
1405		*need_portwatch  = 0;
1406
1407	for (i = 0; i < pacts->pspa_count; i++) {
1408		act = &pacts->pspa_actions[i];
1409
1410		if (ipc_object_copyin(get_task_ipcspace(current_task()),
1411		    act->new_port, MACH_MSG_TYPE_COPY_SEND,
1412		    (ipc_object_t *) &port) != KERN_SUCCESS) {
1413			ret = EINVAL;
1414			goto done;
1415		}
1416
1417		switch (act->port_type) {
1418		case PSPA_SPECIAL:
1419			/* Only allowed when not under vfork */
1420			if (!(psa_flags & POSIX_SPAWN_SETEXEC))
1421				ret = ENOTSUP;
1422			else if (task_set_special_port(task,
1423			act->which, port) != KERN_SUCCESS)
1424				ret = EINVAL;
1425			break;
1426
1427		case PSPA_EXCEPTION:
1428			/* Only allowed when not under vfork */
1429			if (!(psa_flags & POSIX_SPAWN_SETEXEC))
1430				ret = ENOTSUP;
1431			else if (task_set_exception_ports(task,
1432			act->mask, port, act->behavior,
1433			act->flavor) != KERN_SUCCESS)
1434				ret = EINVAL;
1435			break;
1436#if CONFIG_AUDIT
1437		case PSPA_AU_SESSION:
1438			ret = audit_session_spawnjoin(p, port);
1439			break;
1440#endif
1441		case PSPA_IMP_WATCHPORTS:
1442			if (portwatch_ports != NULL) {
1443				if (need_portwatch != NULL)
1444					*need_portwatch  = 1;
1445				/* hold on to this till end of spawn */
1446				portwatch_ports[i] = port;
1447				ret = 0;
1448			} else
1449				ipc_port_release_send(port);
1450			break;
1451		default:
1452			ret = EINVAL;
1453			break;
1454		}
1455
1456		/* action failed, so release port resources */
1457
1458		if (ret) {
1459			ipc_port_release_send(port);
1460			break;
1461		}
1462	}
1463
1464done:
1465	if (0 != ret)
1466		DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port);
1467	return (ret);
1468}
1469
1470/*
1471 * exec_handle_file_actions
1472 *
1473 * Description:	Go through the _posix_file_actions_t contents applying the
1474 *		open, close, and dup2 operations to the open file table for
1475 *		the current process.
1476 *
1477 * Parameters:	struct image_params *	Image parameter block
1478 *
1479 * Returns:	0			Success
1480 *		???
1481 *
1482 * Note:	Actions are applied in the order specified, with the credential
1483 *		of the parent process.  This is done to permit the parent
1484 *		process to utilize POSIX_SPAWN_RESETIDS to drop privilege in
1485 *		the child following operations the child may in fact not be
1486 *		normally permitted to perform.
1487 */
1488static int
1489exec_handle_file_actions(struct image_params *imgp, short psa_flags)
1490{
1491	int error = 0;
1492	int action;
1493	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1494	_posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
1495	int ival[2];		/* dummy retval for system calls) */
1496
1497	for (action = 0; action < px_sfap->psfa_act_count; action++) {
1498		_psfa_action_t *psfa = &px_sfap->psfa_act_acts[ action];
1499
1500		switch(psfa->psfaa_type) {
1501		case PSFA_OPEN: {
1502			/*
1503			 * Open is different, in that it requires the use of
1504			 * a path argument, which is normally copied in from
1505			 * user space; because of this, we have to support an
1506			 * open from kernel space that passes an address space
1507			 * context of UIO_SYSSPACE, and casts the address
1508			 * argument to a user_addr_t.
1509			 */
1510			struct vnode_attr va;
1511			struct nameidata nd;
1512			int mode = psfa->psfaa_openargs.psfao_mode;
1513			struct dup2_args dup2a;
1514			struct close_nocancel_args ca;
1515			int origfd;
1516
1517			VATTR_INIT(&va);
1518			/* Mask off all but regular access permissions */
1519			mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1520			VATTR_SET(&va, va_mode, mode & ACCESSPERMS);
1521
1522			NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
1523			       CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
1524			       imgp->ip_vfs_context);
1525
1526			error = open1(imgp->ip_vfs_context,
1527					&nd,
1528					psfa->psfaa_openargs.psfao_oflag,
1529					&va,
1530					fileproc_alloc_init, NULL,
1531					ival);
1532
1533			/*
1534			 * If there's an error, or we get the right fd by
1535			 * accident, then drop out here.  This is easier than
1536			 * reworking all the open code to preallocate fd
1537			 * slots, and internally taking one as an argument.
1538			 */
1539			if (error || ival[0] == psfa->psfaa_filedes)
1540				break;
1541
1542			origfd = ival[0];
1543			/*
1544			 * If we didn't fall out from an error, we ended up
1545			 * with the wrong fd; so now we've got to try to dup2
1546			 * it to the right one.
1547			 */
1548			dup2a.from = origfd;
1549			dup2a.to = psfa->psfaa_filedes;
1550
1551			/*
1552			 * The dup2() system call implementation sets
1553			 * ival to newfd in the success case, but we
1554			 * can ignore that, since if we didn't get the
1555			 * fd we wanted, the error will stop us.
1556			 */
1557			error = dup2(p, &dup2a, ival);
1558			if (error)
1559				break;
1560
1561			/*
1562			 * Finally, close the original fd.
1563			 */
1564			ca.fd = origfd;
1565
1566			error = close_nocancel(p, &ca, ival);
1567			}
1568			break;
1569
1570		case PSFA_DUP2: {
1571			struct dup2_args dup2a;
1572
1573			dup2a.from = psfa->psfaa_filedes;
1574			dup2a.to = psfa->psfaa_openargs.psfao_oflag;
1575
1576			/*
1577			 * The dup2() system call implementation sets
1578			 * ival to newfd in the success case, but we
1579			 * can ignore that, since if we didn't get the
1580			 * fd we wanted, the error will stop us.
1581			 */
1582			error = dup2(p, &dup2a, ival);
1583			}
1584			break;
1585
1586		case PSFA_CLOSE: {
1587			struct close_nocancel_args ca;
1588
1589			ca.fd = psfa->psfaa_filedes;
1590
1591			error = close_nocancel(p, &ca, ival);
1592			}
1593			break;
1594
1595		case PSFA_INHERIT: {
1596			struct fcntl_nocancel_args fcntla;
1597
1598			/*
1599			 * Check to see if the descriptor exists, and
1600			 * ensure it's -not- marked as close-on-exec.
1601			 *
1602			 * Attempting to "inherit" a guarded fd will
1603			 * result in a error.
1604			 */
1605			fcntla.fd = psfa->psfaa_filedes;
1606			fcntla.cmd = F_GETFD;
1607			if ((error = fcntl_nocancel(p, &fcntla, ival)) != 0)
1608				break;
1609
1610			if ((ival[0] & FD_CLOEXEC) == FD_CLOEXEC) {
1611				fcntla.fd = psfa->psfaa_filedes;
1612				fcntla.cmd = F_SETFD;
1613				fcntla.arg = ival[0] & ~FD_CLOEXEC;
1614				error = fcntl_nocancel(p, &fcntla, ival);
1615			}
1616
1617			}
1618			break;
1619
1620		default:
1621			error = EINVAL;
1622			break;
1623		}
1624
1625		/* All file actions failures are considered fatal, per POSIX */
1626
1627		if (error) {
1628			if (PSFA_OPEN == psfa->psfaa_type) {
1629				DTRACE_PROC1(spawn__open__failure, uintptr_t,
1630			            psfa->psfaa_openargs.psfao_path);
1631			} else {
1632				DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes);
1633			}
1634			break;
1635		}
1636	}
1637
1638	if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0)
1639		return (error);
1640
1641	/*
1642	 * If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during
1643	 * this spawn only) as if "close on exec" is the default
1644	 * disposition of all pre-existing file descriptors.  In this case,
1645	 * the list of file descriptors mentioned in the file actions
1646	 * are the only ones that can be inherited, so mark them now.
1647	 *
1648	 * The actual closing part comes later, in fdexec().
1649	 */
1650	proc_fdlock(p);
1651	for (action = 0; action < px_sfap->psfa_act_count; action++) {
1652		_psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
1653		int fd = psfa->psfaa_filedes;
1654
1655		switch (psfa->psfaa_type) {
1656		case PSFA_DUP2:
1657			fd = psfa->psfaa_openargs.psfao_oflag;
1658			/*FALLTHROUGH*/
1659		case PSFA_OPEN:
1660		case PSFA_INHERIT:
1661			*fdflags(p, fd) |= UF_INHERIT;
1662			break;
1663
1664		case PSFA_CLOSE:
1665			break;
1666		}
1667	}
1668	proc_fdunlock(p);
1669
1670	return (0);
1671}
1672
1673#if CONFIG_MACF
1674/*
1675 * exec_spawnattr_getmacpolicyinfo
1676 */
1677void *
1678exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp)
1679{
1680	const struct _posix_spawn_mac_policy_extensions *psmx = macextensions;
1681	int i;
1682
1683	if (psmx == NULL)
1684		return NULL;
1685
1686	for (i = 0; i < psmx->psmx_count; i++) {
1687		const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1688		if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == 0) {
1689			if (lenp != NULL)
1690				*lenp = extension->datalen;
1691			return extension->datap;
1692		}
1693	}
1694
1695	if (lenp != NULL)
1696		*lenp = 0;
1697	return NULL;
1698}
1699
1700static int
1701spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args, _posix_spawn_mac_policy_extensions_t *psmxp)
1702{
1703	_posix_spawn_mac_policy_extensions_t psmx = NULL;
1704	int error = 0;
1705	int copycnt = 0;
1706	int i = 0;
1707
1708	*psmxp = NULL;
1709
1710	if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) ||
1711	    px_args->mac_extensions_size > PAGE_SIZE) {
1712		error = EINVAL;
1713		goto bad;
1714	}
1715
1716	MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK);
1717	if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != 0)
1718		goto bad;
1719
1720	if (PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count) > px_args->mac_extensions_size) {
1721		error = EINVAL;
1722		goto bad;
1723	}
1724
1725	for (i = 0; i < psmx->psmx_count; i++) {
1726		_ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i];
1727		if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) {
1728			error = EINVAL;
1729			goto bad;
1730		}
1731	}
1732
1733	for (copycnt = 0; copycnt < psmx->psmx_count; copycnt++) {
1734		_ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[copycnt];
1735		void *data = NULL;
1736
1737		MALLOC(data, void *, extension->datalen, M_TEMP, M_WAITOK);
1738		if ((error = copyin(extension->data, data, extension->datalen)) != 0) {
1739			FREE(data, M_TEMP);
1740			goto bad;
1741		}
1742		extension->datap = data;
1743	}
1744
1745	*psmxp = psmx;
1746	return 0;
1747
1748bad:
1749	if (psmx != NULL) {
1750		for (i = 0; i < copycnt; i++)
1751			FREE(psmx->psmx_extensions[i].datap, M_TEMP);
1752		FREE(psmx, M_TEMP);
1753	}
1754	return error;
1755}
1756
1757static void
1758spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx)
1759{
1760	int i;
1761
1762	if (psmx == NULL)
1763		return;
1764	for (i = 0; i < psmx->psmx_count; i++)
1765		FREE(psmx->psmx_extensions[i].datap, M_TEMP);
1766	FREE(psmx, M_TEMP);
1767}
1768#endif /* CONFIG_MACF */
1769
1770/*
1771 * posix_spawn
1772 *
1773 * Parameters:	uap->pid		Pointer to pid return area
1774 *		uap->fname		File name to exec
1775 *		uap->argp		Argument list
1776 *		uap->envp		Environment list
1777 *
1778 * Returns:	0			Success
1779 *		EINVAL			Invalid argument
1780 *		ENOTSUP			Not supported
1781 *		ENOEXEC			Executable file format error
1782 *	exec_activate_image:EINVAL	Invalid argument
1783 *	exec_activate_image:EACCES	Permission denied
1784 *	exec_activate_image:EINTR	Interrupted function
1785 *	exec_activate_image:ENOMEM	Not enough space
1786 *	exec_activate_image:EFAULT	Bad address
1787 *	exec_activate_image:ENAMETOOLONG	Filename too long
1788 *	exec_activate_image:ENOEXEC	Executable file format error
1789 *	exec_activate_image:ETXTBSY	Text file busy [misuse of error code]
1790 *	exec_activate_image:EBADEXEC	The executable is corrupt/unknown
1791 *	exec_activate_image:???
1792 *	mac_execve_enter:???
1793 *
1794 * TODO:	Expect to need __mac_posix_spawn() at some point...
1795 *		Handle posix_spawnattr_t
1796 *		Handle posix_spawn_file_actions_t
1797 */
1798int
1799posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
1800{
1801	proc_t p = ap;		/* quiet bogus GCC vfork() warning */
1802	user_addr_t pid = uap->pid;
1803	int ival[2];		/* dummy retval for setpgid() */
1804	char *bufp = NULL;
1805	struct image_params *imgp;
1806	struct vnode_attr *vap;
1807	struct vnode_attr *origvap;
1808	struct uthread	*uthread = 0;	/* compiler complains if not set to 0*/
1809	int error, sig;
1810	char alt_p_comm[sizeof(p->p_comm)] = {0};	/* for PowerPC */
1811	int is_64 = IS_64BIT_PROCESS(p);
1812	struct vfs_context context;
1813	struct user__posix_spawn_args_desc px_args;
1814	struct _posix_spawnattr px_sa;
1815	_posix_spawn_file_actions_t px_sfap = NULL;
1816	_posix_spawn_port_actions_t px_spap = NULL;
1817	struct __kern_sigaction vec;
1818	boolean_t spawn_no_exec = FALSE;
1819	boolean_t proc_transit_set = TRUE;
1820	boolean_t exec_done = FALSE;
1821	int need_portwatch = 0, portwatch_count = 0;
1822	ipc_port_t * portwatch_ports = NULL;
1823	vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports);
1824
1825	/*
1826	 * Allocate a big chunk for locals instead of using stack since these
1827	 * structures are pretty big.
1828	 */
1829	MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
1830	imgp = (struct image_params *) bufp;
1831	if (bufp == NULL) {
1832		error = ENOMEM;
1833		goto bad;
1834	}
1835	vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
1836	origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
1837
1838	/* Initialize the common data in the image_params structure */
1839	imgp->ip_user_fname = uap->path;
1840	imgp->ip_user_argv = uap->argv;
1841	imgp->ip_user_envv = uap->envp;
1842	imgp->ip_vattr = vap;
1843	imgp->ip_origvattr = origvap;
1844	imgp->ip_vfs_context = &context;
1845	imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
1846	imgp->ip_p_comm = alt_p_comm;		/* for PowerPC */
1847	imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
1848
1849	if (uap->adesc != USER_ADDR_NULL) {
1850		if(is_64) {
1851			error = copyin(uap->adesc, &px_args, sizeof(px_args));
1852		} else {
1853			struct user32__posix_spawn_args_desc px_args32;
1854
1855			error = copyin(uap->adesc, &px_args32, sizeof(px_args32));
1856
1857			/*
1858			 * Convert arguments descriptor from external 32 bit
1859			 * representation to internal 64 bit representation
1860			 */
1861			px_args.attr_size = px_args32.attr_size;
1862			px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp);
1863			px_args.file_actions_size = px_args32.file_actions_size;
1864			px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions);
1865			px_args.port_actions_size = px_args32.port_actions_size;
1866			px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
1867			px_args.mac_extensions_size = px_args32.mac_extensions_size;
1868			px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions);
1869		}
1870		if (error)
1871			goto bad;
1872
1873		if (px_args.attr_size != 0) {
1874			/*
1875			 * We are not copying the port_actions pointer,
1876			 * because we already have it from px_args.
1877			 */
1878
1879
1880			if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset) != 0))
1881			goto bad;
1882
1883			bzero( (void *)( (unsigned long) &px_sa + px_sa_offset), sizeof(px_sa) - px_sa_offset );
1884
1885			imgp->ip_px_sa = &px_sa;
1886		}
1887		if (px_args.file_actions_size != 0) {
1888			/* Limit file_actions to allowed number of open files */
1889			int maxfa = (p->p_limit ? p->p_rlimit[RLIMIT_NOFILE].rlim_cur : NOFILE);
1890			if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) ||
1891				px_args.file_actions_size > PSF_ACTIONS_SIZE(maxfa)) {
1892				error = EINVAL;
1893				goto bad;
1894			}
1895			MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK);
1896			if (px_sfap == NULL) {
1897				error = ENOMEM;
1898				goto bad;
1899			}
1900			imgp->ip_px_sfa = px_sfap;
1901
1902			if ((error = copyin(px_args.file_actions, px_sfap,
1903							px_args.file_actions_size)) != 0)
1904				goto bad;
1905
1906			/* Verify that the action count matches the struct size */
1907			if (PSF_ACTIONS_SIZE(px_sfap->psfa_act_count) != px_args.file_actions_size) {
1908				error = EINVAL;
1909				goto bad;
1910			}
1911		}
1912		if (px_args.port_actions_size != 0) {
1913			/* Limit port_actions to one page of data */
1914			if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) ||
1915				px_args.port_actions_size > PAGE_SIZE) {
1916				error = EINVAL;
1917				goto bad;
1918			}
1919
1920			MALLOC(px_spap, _posix_spawn_port_actions_t,
1921					px_args.port_actions_size, M_TEMP, M_WAITOK);
1922			if (px_spap == NULL) {
1923				error = ENOMEM;
1924				goto bad;
1925			}
1926			imgp->ip_px_spa = px_spap;
1927
1928			if ((error = copyin(px_args.port_actions, px_spap,
1929							px_args.port_actions_size)) != 0)
1930				goto bad;
1931
1932			/* Verify that the action count matches the struct size */
1933			if (PS_PORT_ACTIONS_SIZE(px_spap->pspa_count) != px_args.port_actions_size) {
1934				error = EINVAL;
1935				goto bad;
1936			}
1937		}
1938#if CONFIG_MACF
1939		if (px_args.mac_extensions_size != 0) {
1940			if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0)
1941				goto bad;
1942		}
1943#endif /* CONFIG_MACF */
1944	}
1945
1946	/* set uthread to parent */
1947	uthread = get_bsdthread_info(current_thread());
1948
1949	/*
1950	 * <rdar://6640530>; this does not result in a behaviour change
1951	 * relative to Leopard, so there should not be any existing code
1952	 * which depends on it.
1953	 */
1954	if (uthread->uu_flag & UT_VFORK) {
1955	    error = EINVAL;
1956	    goto bad;
1957	}
1958
1959	/*
1960	 * If we don't have the extension flag that turns "posix_spawn()"
1961	 * into "execve() with options", then we will be creating a new
1962	 * process which does not inherit memory from the parent process,
1963	 * which is one of the most expensive things about using fork()
1964	 * and execve().
1965	 */
1966	if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){
1967		if ((error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN)) != 0)
1968			goto bad;
1969		imgp->ip_flags |= IMGPF_SPAWN;	/* spawn w/o exec */
1970		spawn_no_exec = TRUE;		/* used in later tests */
1971		DTRACE_PROC1(create, proc_t, p);
1972	}
1973
1974	if (spawn_no_exec)
1975		p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
1976	assert(p != NULL);
1977
1978	/* By default, the thread everyone plays with is the parent */
1979	context.vc_thread = current_thread();
1980	context.vc_ucred = p->p_ucred;	/* XXX must NOT be kauth_cred_get() */
1981
1982	/*
1983	 * However, if we're not in the setexec case, redirect the context
1984	 * to the newly created process instead
1985	 */
1986	if (spawn_no_exec)
1987		context.vc_thread = imgp->ip_new_thread;
1988
1989	/*
1990	 * Post fdcopy(), pre exec_handle_sugid() - this is where we want
1991	 * to handle the file_actions.  Since vfork() also ends up setting
1992	 * us into the parent process group, and saved off the signal flags,
1993	 * this is also where we want to handle the spawn flags.
1994	 */
1995
1996	/* Has spawn file actions? */
1997	if (imgp->ip_px_sfa != NULL) {
1998		/*
1999		 * The POSIX_SPAWN_CLOEXEC_DEFAULT flag
2000		 * is handled in exec_handle_file_actions().
2001		 */
2002		if ((error = exec_handle_file_actions(imgp,
2003		    imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0)
2004			goto bad;
2005	}
2006
2007	/* Has spawn port actions? */
2008	if (imgp->ip_px_spa != NULL) {
2009		boolean_t is_adaptive = FALSE;
2010
2011		/* Will this process become adaptive? The apptype isn't ready yet, so we can't look there. */
2012		if (imgp->ip_px_sa != NULL && px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE)
2013			is_adaptive = TRUE;
2014
2015		/*
2016		 * portwatch only:
2017		 * Allocate a place to store the ports we want to bind to the new task
2018		 * We can't bind them until after the apptype is set.
2019		 */
2020		if (px_spap->pspa_count != 0 && is_adaptive) {
2021			portwatch_count = px_spap->pspa_count;
2022			MALLOC(portwatch_ports, ipc_port_t *, (sizeof(ipc_port_t) * portwatch_count), M_TEMP, M_WAITOK | M_ZERO);
2023		} else {
2024			portwatch_ports = NULL;
2025		}
2026
2027		if ((error = exec_handle_port_actions(imgp,
2028		    imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0, &need_portwatch, portwatch_ports)) != 0)
2029			goto bad;
2030	}
2031
2032	/* Has spawn attr? */
2033	if (imgp->ip_px_sa != NULL) {
2034		/*
2035		 * Set the process group ID of the child process; this has
2036		 * to happen before the image activation.
2037		 */
2038		if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) {
2039			struct setpgid_args spga;
2040			spga.pid = p->p_pid;
2041			spga.pgid = px_sa.psa_pgroup;
2042			/*
2043			 * Effectively, call setpgid() system call; works
2044			 * because there are no pointer arguments.
2045			 */
2046			if((error = setpgid(p, &spga, ival)) != 0)
2047				goto bad;
2048		}
2049
2050		/*
2051		 * Reset UID/GID to parent's RUID/RGID; This works only
2052		 * because the operation occurs *after* the vfork() and
2053		 * before the call to exec_handle_sugid() by the image
2054		 * activator called from exec_activate_image().  POSIX
2055		 * requires that any setuid/setgid bits on the process
2056		 * image will take precedence over the spawn attributes
2057		 * (re)setting them.
2058		 *
2059		 * The use of p_ucred is safe, since we are acting on the
2060		 * new process, and it has no threads other than the one
2061		 * we are creating for it.
2062		 */
2063		if (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
2064			kauth_cred_t my_cred = p->p_ucred;
2065			kauth_cred_t my_new_cred = kauth_cred_setuidgid(my_cred, kauth_cred_getruid(my_cred), kauth_cred_getrgid(my_cred));
2066			if (my_new_cred != my_cred) {
2067				p->p_ucred = my_new_cred;
2068				/* update cred on proc */
2069				PROC_UPDATE_CREDS_ONPROC(p);
2070			}
2071		}
2072
2073		/*
2074		 * Disable ASLR for the spawned process.
2075		 */
2076		/*
2077		 * But only do so if we are not embedded; embedded allows for a
2078		 * boot-arg (-disable_aslr) to deal with this (which itself is
2079		 * only honored on DEVELOPMENT or DEBUG builds of xnu).
2080		 */
2081		if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR)
2082			OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
2083
2084		/*
2085		 * Forcibly disallow execution from data pages for the spawned process
2086		 * even if it would otherwise be permitted by the architecture default.
2087		 */
2088		if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC)
2089			imgp->ip_flags |= IMGPF_ALLOW_DATA_EXEC;
2090	}
2091
2092	/*
2093	 * Disable ASLR during image activation.  This occurs either if the
2094	 * _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if
2095	 * P_DISABLE_ASLR was inherited from the parent process.
2096	 */
2097	if (p->p_flag & P_DISABLE_ASLR)
2098		imgp->ip_flags |= IMGPF_DISABLE_ASLR;
2099
2100	/*
2101	 * Clear transition flag so we won't hang if exec_activate_image() causes
2102	 * an automount (and launchd does a proc sysctl to service it).
2103	 *
2104	 * <rdar://problem/6848672>, <rdar://problem/5959568>.
2105	 */
2106	if (spawn_no_exec) {
2107		proc_transend(p, 0);
2108		proc_transit_set = 0;
2109	}
2110
2111#if MAC_SPAWN	/* XXX */
2112	if (uap->mac_p != USER_ADDR_NULL) {
2113		error = mac_execve_enter(uap->mac_p, imgp);
2114		if (error)
2115			goto bad;
2116	}
2117#endif
2118
2119	/*
2120	 * Activate the image
2121	 */
2122	error = exec_activate_image(imgp);
2123
2124	if (error == 0) {
2125		/* process completed the exec */
2126		exec_done = TRUE;
2127	} else if (error == -1) {
2128		/* Image not claimed by any activator? */
2129		error = ENOEXEC;
2130	}
2131
2132	/*
2133	 * If we have a spawn attr, and it contains signal related flags,
2134	 * the we need to process them in the "context" of the new child
2135	 * process, so we have to process it following image activation,
2136	 * prior to making the thread runnable in user space.  This is
2137	 * necessitated by some signal information being per-thread rather
2138	 * than per-process, and we don't have the new allocation in hand
2139	 * until after the image is activated.
2140	 */
2141	if (!error && imgp->ip_px_sa != NULL) {
2142		thread_t child_thread = current_thread();
2143		uthread_t child_uthread = uthread;
2144
2145		/*
2146		 * If we created a new child thread, then the thread and
2147		 * uthread are different than the current ones; otherwise,
2148		 * we leave them, since we are in the exec case instead.
2149		 */
2150		if (spawn_no_exec) {
2151			child_thread = imgp->ip_new_thread;
2152			child_uthread = get_bsdthread_info(child_thread);
2153		}
2154
2155		/*
2156		 * Mask a list of signals, instead of them being unmasked, if
2157		 * they were unmasked in the parent; note that some signals
2158		 * are not maskable.
2159		 */
2160		if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK)
2161			child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask);
2162		/*
2163		 * Default a list of signals instead of ignoring them, if
2164		 * they were ignored in the parent.  Note that we pass
2165		 * spawn_no_exec to setsigvec() to indicate that we called
2166		 * fork1() and therefore do not need to call proc_signalstart()
2167		 * internally.
2168		 */
2169		if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) {
2170			vec.sa_handler = SIG_DFL;
2171			vec.sa_tramp = 0;
2172			vec.sa_mask = 0;
2173			vec.sa_flags = 0;
2174			for (sig = 0; sig < NSIG; sig++)
2175				if (px_sa.psa_sigdefault & (1 << sig)) {
2176					error = setsigvec(p, child_thread, sig + 1, &vec, spawn_no_exec);
2177			}
2178		}
2179
2180		/*
2181		 * Activate the CPU usage monitor, if requested. This is done via a task-wide, per-thread CPU
2182		 * usage limit, which will generate a resource exceeded exception if any one thread exceeds the
2183		 * limit.
2184		 *
2185		 * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds.
2186		 */
2187		if (px_sa.psa_cpumonitor_percent != 0) {
2188			/*
2189			 * Always treat a CPU monitor activation coming from spawn as entitled. Requiring
2190			 * an entitlement to configure the monitor a certain way seems silly, since
2191			 * whomever is turning it on could just as easily choose not to do so.
2192			 *
2193			 * XXX - Ignore the parameters that we get from userland. The spawnattr method of
2194			 * activating the monitor always gets the system default parameters. Once we have
2195			 * an explicit spawn SPI for configuring the defaults, we can revert this to
2196			 * respect the params passed in from userland.
2197			 */
2198			error = proc_set_task_ruse_cpu(p->task,
2199					TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC,
2200					PROC_POLICY_CPUMON_DEFAULTS, 0,
2201					0, TRUE);
2202		}
2203	}
2204
2205bad:
2206
2207	if (portwatch_ports != NULL) {
2208		int needboost = 0;
2209
2210		/*
2211		 * Mark the ports as destined to be handed off to the new task, and
2212		 * transfer any boosts to the new task.
2213		 * We need to release the rights even if the posix_spawn has failed.
2214		 */
2215		if (need_portwatch != 0) {
2216			for (int i = 0; i < portwatch_count; i++) {
2217				ipc_port_t port = NULL;
2218
2219				if ((port = portwatch_ports[i]) != NULL) {
2220					int boost = 0;
2221					if (error == 0)
2222						task_add_importance_watchport(p->task, p->p_pid, port, &boost);
2223					ipc_port_release_send(port);
2224					needboost += boost;
2225				}
2226			}
2227		}
2228
2229		if (needboost != 0) {
2230			/*
2231			 * Apply the boost count found on the ports, which will keep the
2232			 * newly created process out of background until it handles the incoming messages.
2233			 */
2234			task_hold_multiple_assertion(p->task, needboost);
2235		}
2236
2237		FREE(portwatch_ports, M_TEMP);
2238		portwatch_ports = NULL;
2239		portwatch_count = 0;
2240	}
2241
2242	if (error == 0) {
2243		/* reset delay idle sleep status if set */
2244		if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP)
2245			OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag);
2246		/* upon  successful spawn, re/set the proc control state */
2247		if (imgp->ip_px_sa != NULL) {
2248			switch (px_sa.psa_pcontrol) {
2249				case POSIX_SPAWN_PCONTROL_THROTTLE:
2250					p->p_pcaction = P_PCTHROTTLE;
2251					break;
2252				case POSIX_SPAWN_PCONTROL_SUSPEND:
2253					p->p_pcaction = P_PCSUSP;
2254					break;
2255				case POSIX_SPAWN_PCONTROL_KILL:
2256					p->p_pcaction = P_PCKILL;
2257					break;
2258				case POSIX_SPAWN_PCONTROL_NONE:
2259				default:
2260					p->p_pcaction = 0;
2261					break;
2262			};
2263		}
2264		exec_resettextvp(p, imgp);
2265
2266#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
2267		/* Has jetsam attributes? */
2268		if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) {
2269			memorystatus_update(p, px_sa.psa_priority, 0, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY),
2270			    TRUE, px_sa.psa_high_water_mark, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND));
2271		}
2272#endif
2273	}
2274
2275	/*
2276	 * If we successfully called fork1(), we always need to do this;
2277	 * we identify this case by noting the IMGPF_SPAWN flag.  This is
2278	 * because we come back from that call with signals blocked in the
2279	 * child, and we have to unblock them, but we want to wait until
2280	 * after we've performed any spawn actions.  This has to happen
2281	 * before check_for_signature(), which uses psignal.
2282	 */
2283	if (spawn_no_exec) {
2284		if (proc_transit_set)
2285			proc_transend(p, 0);
2286
2287		/*
2288		 * Drop the signal lock on the child which was taken on our
2289		 * behalf by forkproc()/cloneproc() to prevent signals being
2290		 * received by the child in a partially constructed state.
2291		 */
2292		proc_signalend(p, 0);
2293
2294		/* flag the 'fork' has occurred */
2295		proc_knote(p->p_pptr, NOTE_FORK | p->p_pid);
2296		/* then flag exec has occurred */
2297		/* notify only if it has not failed due to FP Key error */
2298		if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
2299			proc_knote(p, NOTE_EXEC);
2300	}
2301
2302	/*
2303	 * We have to delay operations which might throw a signal until after
2304	 * the signals have been unblocked; however, we want that to happen
2305	 * after exec_resettextvp() so that the textvp is correct when they
2306	 * fire.
2307	 */
2308	if (error == 0) {
2309		error = check_for_signature(p, imgp);
2310
2311		/*
2312		 * Pay for our earlier safety; deliver the delayed signals from
2313		 * the incomplete spawn process now that it's complete.
2314		 */
2315		if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) {
2316			psignal_vfork(p, p->task, imgp->ip_new_thread, SIGTRAP);
2317		}
2318	}
2319
2320
2321	if (imgp != NULL) {
2322		if (imgp->ip_vp)
2323			vnode_put(imgp->ip_vp);
2324		if (imgp->ip_scriptvp)
2325			vnode_put(imgp->ip_scriptvp);
2326		if (imgp->ip_strings)
2327			execargs_free(imgp);
2328		if (imgp->ip_px_sfa != NULL)
2329			FREE(imgp->ip_px_sfa, M_TEMP);
2330		if (imgp->ip_px_spa != NULL)
2331			FREE(imgp->ip_px_spa, M_TEMP);
2332
2333#if CONFIG_MACF
2334		if (imgp->ip_px_smpx != NULL)
2335			spawn_free_macpolicyinfo(imgp->ip_px_smpx);
2336		if (imgp->ip_execlabelp)
2337			mac_cred_label_free(imgp->ip_execlabelp);
2338		if (imgp->ip_scriptlabelp)
2339			mac_vnode_label_free(imgp->ip_scriptlabelp);
2340#endif
2341	}
2342
2343#if CONFIG_DTRACE
2344	if (spawn_no_exec) {
2345		/*
2346		 * In the original DTrace reference implementation,
2347		 * posix_spawn() was a libc routine that just
2348		 * did vfork(2) then exec(2).  Thus the proc::: probes
2349		 * are very fork/exec oriented.  The details of this
2350		 * in-kernel implementation of posix_spawn() is different
2351		 * (while producing the same process-observable effects)
2352		 * particularly w.r.t. errors, and which thread/process
2353		 * is constructing what on behalf of whom.
2354		 */
2355		if (error) {
2356			DTRACE_PROC1(spawn__failure, int, error);
2357		} else {
2358			DTRACE_PROC(spawn__success);
2359			/*
2360			 * Some DTrace scripts, e.g. newproc.d in
2361			 * /usr/bin, rely on the the 'exec-success'
2362			 * probe being fired in the child after the
2363			 * new process image has been constructed
2364			 * in order to determine the associated pid.
2365			 *
2366			 * So, even though the parent built the image
2367			 * here, for compatibility, mark the new thread
2368			 * so 'exec-success' fires on it as it leaves
2369			 * the kernel.
2370			 */
2371			dtrace_thread_didexec(imgp->ip_new_thread);
2372		}
2373	} else {
2374		if (error) {
2375			DTRACE_PROC1(exec__failure, int, error);
2376		} else {
2377			DTRACE_PROC(exec__success);
2378		}
2379	}
2380#endif
2381
2382	/* Return to both the parent and the child? */
2383	if (imgp != NULL && spawn_no_exec) {
2384		/*
2385		 * If the parent wants the pid, copy it out
2386		 */
2387		if (pid != USER_ADDR_NULL)
2388			(void)suword(pid, p->p_pid);
2389		retval[0] = error;
2390
2391		/*
2392		 * If we had an error, perform an internal reap ; this is
2393		 * entirely safe, as we have a real process backing us.
2394		 */
2395		if (error) {
2396			proc_list_lock();
2397			p->p_listflag |= P_LIST_DEADPARENT;
2398			proc_list_unlock();
2399			proc_lock(p);
2400			/* make sure no one else has killed it off... */
2401			if (p->p_stat != SZOMB && p->exit_thread == NULL) {
2402				p->exit_thread = current_thread();
2403				proc_unlock(p);
2404				exit1(p, 1, (int *)NULL);
2405				if (exec_done == FALSE) {
2406					task_deallocate(get_threadtask(imgp->ip_new_thread));
2407					thread_deallocate(imgp->ip_new_thread);
2408				}
2409			} else {
2410				/* someone is doing it for us; just skip it */
2411				proc_unlock(p);
2412			}
2413		} else {
2414
2415			/*
2416			 * Return to the child
2417			 *
2418			 * Note: the image activator earlier dropped the
2419			 * task/thread references to the newly spawned
2420			 * process; this is OK, since we still have suspended
2421			 * queue references on them, so we should be fine
2422			 * with the delayed resume of the thread here.
2423			 */
2424			(void)thread_resume(imgp->ip_new_thread);
2425		}
2426	}
2427	if (bufp != NULL) {
2428		FREE(bufp, M_TEMP);
2429	}
2430
2431	return(error);
2432}
2433
2434
2435/*
2436 * execve
2437 *
2438 * Parameters:	uap->fname		File name to exec
2439 *		uap->argp		Argument list
2440 *		uap->envp		Environment list
2441 *
2442 * Returns:	0			Success
2443 *	__mac_execve:EINVAL		Invalid argument
2444 *	__mac_execve:ENOTSUP		Invalid argument
2445 *	__mac_execve:EACCES		Permission denied
2446 *	__mac_execve:EINTR		Interrupted function
2447 *	__mac_execve:ENOMEM		Not enough space
2448 *	__mac_execve:EFAULT		Bad address
2449 *	__mac_execve:ENAMETOOLONG	Filename too long
2450 *	__mac_execve:ENOEXEC		Executable file format error
2451 *	__mac_execve:ETXTBSY		Text file busy [misuse of error code]
2452 *	__mac_execve:???
2453 *
2454 * TODO:	Dynamic linker header address on stack is copied via suword()
2455 */
2456/* ARGSUSED */
2457int
2458execve(proc_t p, struct execve_args *uap, int32_t *retval)
2459{
2460	struct __mac_execve_args muap;
2461	int err;
2462
2463	memoryshot(VM_EXECVE, DBG_FUNC_NONE);
2464
2465	muap.fname = uap->fname;
2466	muap.argp = uap->argp;
2467	muap.envp = uap->envp;
2468	muap.mac_p = USER_ADDR_NULL;
2469	err = __mac_execve(p, &muap, retval);
2470
2471	return(err);
2472}
2473
2474/*
2475 * __mac_execve
2476 *
2477 * Parameters:	uap->fname		File name to exec
2478 *		uap->argp		Argument list
2479 *		uap->envp		Environment list
2480 *		uap->mac_p		MAC label supplied by caller
2481 *
2482 * Returns:	0			Success
2483 *		EINVAL			Invalid argument
2484 *		ENOTSUP			Not supported
2485 *		ENOEXEC			Executable file format error
2486 *	exec_activate_image:EINVAL	Invalid argument
2487 *	exec_activate_image:EACCES	Permission denied
2488 *	exec_activate_image:EINTR	Interrupted function
2489 *	exec_activate_image:ENOMEM	Not enough space
2490 *	exec_activate_image:EFAULT	Bad address
2491 *	exec_activate_image:ENAMETOOLONG	Filename too long
2492 *	exec_activate_image:ENOEXEC	Executable file format error
2493 *	exec_activate_image:ETXTBSY	Text file busy [misuse of error code]
2494 *	exec_activate_image:EBADEXEC	The executable is corrupt/unknown
2495 *	exec_activate_image:???
2496 *	mac_execve_enter:???
2497 *
2498 * TODO:	Dynamic linker header address on stack is copied via suword()
2499 */
2500int
2501__mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
2502{
2503	char *bufp = NULL;
2504	struct image_params *imgp;
2505	struct vnode_attr *vap;
2506	struct vnode_attr *origvap;
2507	int error;
2508	char alt_p_comm[sizeof(p->p_comm)] = {0};	/* for PowerPC */
2509	int is_64 = IS_64BIT_PROCESS(p);
2510	struct vfs_context context;
2511
2512	context.vc_thread = current_thread();
2513	context.vc_ucred = kauth_cred_proc_ref(p);	/* XXX must NOT be kauth_cred_get() */
2514
2515	/* Allocate a big chunk for locals instead of using stack since these
2516	 * structures a pretty big.
2517	 */
2518	MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
2519	imgp = (struct image_params *) bufp;
2520	if (bufp == NULL) {
2521		error = ENOMEM;
2522		goto exit_with_error;
2523	}
2524	vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
2525	origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
2526
2527	/* Initialize the common data in the image_params structure */
2528	imgp->ip_user_fname = uap->fname;
2529	imgp->ip_user_argv = uap->argp;
2530	imgp->ip_user_envv = uap->envp;
2531	imgp->ip_vattr = vap;
2532	imgp->ip_origvattr = origvap;
2533	imgp->ip_vfs_context = &context;
2534	imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
2535	imgp->ip_p_comm = alt_p_comm;		/* for PowerPC */
2536	imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
2537
2538#if CONFIG_MACF
2539	if (uap->mac_p != USER_ADDR_NULL) {
2540		error = mac_execve_enter(uap->mac_p, imgp);
2541		if (error) {
2542			kauth_cred_unref(&context.vc_ucred);
2543			goto exit_with_error;
2544		}
2545	}
2546#endif
2547
2548	error = exec_activate_image(imgp);
2549
2550	kauth_cred_unref(&context.vc_ucred);
2551
2552	/* Image not claimed by any activator? */
2553	if (error == -1)
2554		error = ENOEXEC;
2555
2556	if (error == 0) {
2557		exec_resettextvp(p, imgp);
2558		error = check_for_signature(p, imgp);
2559	}
2560	if (imgp->ip_vp != NULLVP)
2561		vnode_put(imgp->ip_vp);
2562	if (imgp->ip_scriptvp != NULLVP)
2563		vnode_put(imgp->ip_scriptvp);
2564	if (imgp->ip_strings)
2565		execargs_free(imgp);
2566#if CONFIG_MACF
2567	if (imgp->ip_execlabelp)
2568		mac_cred_label_free(imgp->ip_execlabelp);
2569	if (imgp->ip_scriptlabelp)
2570		mac_vnode_label_free(imgp->ip_scriptlabelp);
2571#endif
2572	if (!error) {
2573		struct uthread	*uthread;
2574
2575		/* Sever any extant thread affinity */
2576		thread_affinity_exec(current_thread());
2577
2578		DTRACE_PROC(exec__success);
2579		uthread = get_bsdthread_info(current_thread());
2580		if (uthread->uu_flag & UT_VFORK) {
2581			vfork_return(p, retval, p->p_pid);
2582			(void)thread_resume(imgp->ip_new_thread);
2583		}
2584	} else {
2585		DTRACE_PROC1(exec__failure, int, error);
2586	}
2587
2588exit_with_error:
2589	if (bufp != NULL) {
2590		FREE(bufp, M_TEMP);
2591	}
2592
2593	return(error);
2594}
2595
2596
2597/*
2598 * copyinptr
2599 *
2600 * Description:	Copy a pointer in from user space to a user_addr_t in kernel
2601 *		space, based on 32/64 bitness of the user space
2602 *
2603 * Parameters:	froma			User space address
2604 *		toptr			Address of kernel space user_addr_t
2605 *		ptr_size		4/8, based on 'froma' address space
2606 *
2607 * Returns:	0			Success
2608 *		EFAULT			Bad 'froma'
2609 *
2610 * Implicit returns:
2611 *		*ptr_size		Modified
2612 */
2613static int
2614copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
2615{
2616	int error;
2617
2618	if (ptr_size == 4) {
2619		/* 64 bit value containing 32 bit address */
2620		unsigned int i;
2621
2622		error = copyin(froma, &i, 4);
2623		*toptr = CAST_USER_ADDR_T(i);	/* SAFE */
2624	} else {
2625		error = copyin(froma, toptr, 8);
2626	}
2627	return (error);
2628}
2629
2630
2631/*
2632 * copyoutptr
2633 *
2634 * Description:	Copy a pointer out from a user_addr_t in kernel space to
2635 *		user space, based on 32/64 bitness of the user space
2636 *
2637 * Parameters:	ua			User space address to copy to
2638 *		ptr			Address of kernel space user_addr_t
2639 *		ptr_size		4/8, based on 'ua' address space
2640 *
2641 * Returns:	0			Success
2642 *		EFAULT			Bad 'ua'
2643 *
2644 */
2645static int
2646copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
2647{
2648	int error;
2649
2650	if (ptr_size == 4) {
2651		/* 64 bit value containing 32 bit address */
2652		unsigned int i = CAST_DOWN_EXPLICIT(unsigned int,ua);	/* SAFE */
2653
2654		error = copyout(&i, ptr, 4);
2655	} else {
2656		error = copyout(&ua, ptr, 8);
2657	}
2658	return (error);
2659}
2660
2661
2662/*
2663 * exec_copyout_strings
2664 *
2665 * Copy out the strings segment to user space.  The strings segment is put
2666 * on a preinitialized stack frame.
2667 *
2668 * Parameters:	struct image_params *	the image parameter block
2669 *		int *			a pointer to the stack offset variable
2670 *
2671 * Returns:	0			Success
2672 *		!0			Faiure: errno
2673 *
2674 * Implicit returns:
2675 *		(*stackp)		The stack offset, modified
2676 *
2677 * Note:	The strings segment layout is backward, from the beginning
2678 *		of the top of the stack to consume the minimal amount of
2679 *		space possible; the returned stack pointer points to the
2680 *		end of the area consumed (stacks grow downward).
2681 *
2682 *		argc is an int; arg[i] are pointers; env[i] are pointers;
2683 *		the 0's are (void *)NULL's
2684 *
2685 * The stack frame layout is:
2686 *
2687 *      +-------------+ <- p->user_stack
2688 *      |     16b     |
2689 *      +-------------+
2690 *      | STRING AREA |
2691 *      |      :      |
2692 *      |      :      |
2693 *      |      :      |
2694 *      +- -- -- -- --+
2695 *      |  PATH AREA  |
2696 *      +-------------+
2697 *      |      0      |
2698 *      +-------------+
2699 *      |  applev[n]  |
2700 *      +-------------+
2701 *             :
2702 *             :
2703 *      +-------------+
2704 *      |  applev[1]  |
2705 *      +-------------+
2706 *      | exec_path / |
2707 *      |  applev[0]  |
2708 *      +-------------+
2709 *      |      0      |
2710 *      +-------------+
2711 *      |    env[n]   |
2712 *      +-------------+
2713 *             :
2714 *             :
2715 *      +-------------+
2716 *      |    env[0]   |
2717 *      +-------------+
2718 *      |      0      |
2719 *      +-------------+
2720 *      | arg[argc-1] |
2721 *      +-------------+
2722 *             :
2723 *             :
2724 *      +-------------+
2725 *      |    arg[0]   |
2726 *      +-------------+
2727 *      |     argc    |
2728 * sp-> +-------------+
2729 *
2730 * Although technically a part of the STRING AREA, we treat the PATH AREA as
2731 * a separate entity.  This allows us to align the beginning of the PATH AREA
2732 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
2733 * which preceed it on the stack are properly aligned.
2734 */
2735
2736static int
2737exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
2738{
2739	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
2740	int	ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
2741	int	ptr_area_size;
2742	void *ptr_buffer_start, *ptr_buffer;
2743	int string_size;
2744
2745	user_addr_t	string_area;	/* *argv[], *env[] */
2746	user_addr_t	ptr_area;	/* argv[], env[], applev[] */
2747	user_addr_t argc_area;	/* argc */
2748	user_addr_t	stack;
2749	int error;
2750
2751	unsigned i;
2752	struct copyout_desc {
2753		char	*start_string;
2754		int		count;
2755#if CONFIG_DTRACE
2756		user_addr_t	*dtrace_cookie;
2757#endif
2758		boolean_t	null_term;
2759	} descriptors[] = {
2760		{
2761			.start_string = imgp->ip_startargv,
2762			.count = imgp->ip_argc,
2763#if CONFIG_DTRACE
2764			.dtrace_cookie = &p->p_dtrace_argv,
2765#endif
2766			.null_term = TRUE
2767		},
2768		{
2769			.start_string = imgp->ip_endargv,
2770			.count = imgp->ip_envc,
2771#if CONFIG_DTRACE
2772			.dtrace_cookie = &p->p_dtrace_envp,
2773#endif
2774			.null_term = TRUE
2775		},
2776		{
2777			.start_string = imgp->ip_strings,
2778			.count = 1,
2779#if CONFIG_DTRACE
2780			.dtrace_cookie = NULL,
2781#endif
2782			.null_term = FALSE
2783		},
2784		{
2785			.start_string = imgp->ip_endenvv,
2786			.count = imgp->ip_applec - 1, /* exec_path handled above */
2787#if CONFIG_DTRACE
2788			.dtrace_cookie = NULL,
2789#endif
2790			.null_term = TRUE
2791		}
2792	};
2793
2794	stack = *stackp;
2795
2796	/*
2797	 * All previous contributors to the string area
2798	 * should have aligned their sub-area
2799	 */
2800	if (imgp->ip_strspace % ptr_size != 0) {
2801		error = EINVAL;
2802		goto bad;
2803	}
2804
2805	/* Grow the stack down for the strings we've been building up */
2806	string_size = imgp->ip_strendp - imgp->ip_strings;
2807	stack -= string_size;
2808	string_area = stack;
2809
2810	/*
2811	 * Need room for one pointer for each string, plus
2812	 * one for the NULLs terminating the argv, envv, and apple areas.
2813	 */
2814	ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) *
2815	    ptr_size;
2816	stack -= ptr_area_size;
2817	ptr_area = stack;
2818
2819	/* We'll construct all the pointer arrays in our string buffer,
2820	 * which we already know is aligned properly, and ip_argspace
2821	 * was used to verify we have enough space.
2822	 */
2823	ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp;
2824
2825	/*
2826	 * Need room for pointer-aligned argc slot.
2827	 */
2828	stack -= ptr_size;
2829	argc_area = stack;
2830
2831	/*
2832	 * Record the size of the arguments area so that sysctl_procargs()
2833	 * can return the argument area without having to parse the arguments.
2834	 */
2835	proc_lock(p);
2836	p->p_argc = imgp->ip_argc;
2837	p->p_argslen = (int)(*stackp - string_area);
2838	proc_unlock(p);
2839
2840	/* Return the initial stack address: the location of argc */
2841	*stackp = stack;
2842
2843	/*
2844	 * Copy out the entire strings area.
2845	 */
2846	error = copyout(imgp->ip_strings, string_area,
2847						   string_size);
2848	if (error)
2849		goto bad;
2850
2851	for (i = 0; i < sizeof(descriptors)/sizeof(descriptors[0]); i++) {
2852		char *cur_string = descriptors[i].start_string;
2853		int j;
2854
2855#if CONFIG_DTRACE
2856		if (descriptors[i].dtrace_cookie) {
2857			proc_lock(p);
2858			*descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience */
2859			proc_unlock(p);
2860		}
2861#endif /* CONFIG_DTRACE */
2862
2863		/*
2864		 * For each segment (argv, envv, applev), copy as many pointers as requested
2865		 * to our pointer buffer.
2866		 */
2867		for (j = 0; j < descriptors[i].count; j++) {
2868			user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings);
2869
2870			/* Copy out the pointer to the current string. Alignment has been verified  */
2871			if (ptr_size == 8) {
2872				*(uint64_t *)ptr_buffer = (uint64_t)cur_address;
2873			} else {
2874				*(uint32_t *)ptr_buffer = (uint32_t)cur_address;
2875			}
2876
2877			ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
2878			cur_string += strlen(cur_string) + 1; /* Only a NUL between strings in the same area */
2879		}
2880
2881		if (descriptors[i].null_term) {
2882			if (ptr_size == 8) {
2883				*(uint64_t *)ptr_buffer = 0ULL;
2884			} else {
2885				*(uint32_t *)ptr_buffer = 0;
2886			}
2887
2888			ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
2889		}
2890	}
2891
2892	/*
2893	 * Copy out all our pointer arrays in bulk.
2894	 */
2895	error = copyout(ptr_buffer_start, ptr_area,
2896					ptr_area_size);
2897	if (error)
2898		goto bad;
2899
2900	/* argc (int32, stored in a ptr_size area) */
2901	error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size);
2902	if (error)
2903		goto bad;
2904
2905bad:
2906	return(error);
2907}
2908
2909
2910/*
2911 * exec_extract_strings
2912 *
2913 * Copy arguments and environment from user space into work area; we may
2914 * have already copied some early arguments into the work area, and if
2915 * so, any arguments opied in are appended to those already there.
2916 * This function is the primary manipulator of ip_argspace, since
2917 * these are the arguments the client of execve(2) knows about. After
2918 * each argv[]/envv[] string is copied, we charge the string length
2919 * and argv[]/envv[] pointer slot to ip_argspace, so that we can
2920 * full preflight the arg list size.
2921 *
2922 * Parameters:	struct image_params *	the image parameter block
2923 *
2924 * Returns:	0			Success
2925 *		!0			Failure: errno
2926 *
2927 * Implicit returns;
2928 *		(imgp->ip_argc)		Count of arguments, updated
2929 *		(imgp->ip_envc)		Count of environment strings, updated
2930 *		(imgp->ip_argspace)	Count of remaining of NCARGS
2931 *		(imgp->ip_interp_buffer)	Interpreter and args (mutated in place)
2932 *
2933 *
2934 * Note:	The argument and environment vectors are user space pointers
2935 *		to arrays of user space pointers.
2936 */
2937static int
2938exec_extract_strings(struct image_params *imgp)
2939{
2940	int error = 0;
2941	int	ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4;
2942	int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
2943	user_addr_t	argv = imgp->ip_user_argv;
2944	user_addr_t	envv = imgp->ip_user_envv;
2945
2946	/*
2947	 * Adjust space reserved for the path name by however much padding it
2948	 * needs. Doing this here since we didn't know if this would be a 32-
2949	 * or 64-bit process back in exec_save_path.
2950	 */
2951	while (imgp->ip_strspace % new_ptr_size != 0) {
2952		*imgp->ip_strendp++ = '\0';
2953		imgp->ip_strspace--;
2954		/* imgp->ip_argspace--; not counted towards exec args total */
2955	}
2956
2957	/*
2958	 * From now on, we start attributing string space to ip_argspace
2959	 */
2960	imgp->ip_startargv = imgp->ip_strendp;
2961	imgp->ip_argc = 0;
2962
2963	if((imgp->ip_flags & IMGPF_INTERPRET) != 0) {
2964		user_addr_t	arg;
2965		char *argstart, *ch;
2966
2967		/* First, the arguments in the "#!" string are tokenized and extracted. */
2968		argstart = imgp->ip_interp_buffer;
2969		while (argstart) {
2970			ch = argstart;
2971			while (*ch && !IS_WHITESPACE(*ch)) {
2972				ch++;
2973			}
2974
2975			if (*ch == '\0') {
2976				/* last argument, no need to NUL-terminate */
2977				error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
2978				argstart = NULL;
2979			} else {
2980				/* NUL-terminate */
2981				*ch = '\0';
2982				error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
2983
2984				/*
2985				 * Find the next string. We know spaces at the end of the string have already
2986				 * been stripped.
2987				 */
2988				argstart = ch + 1;
2989				while (IS_WHITESPACE(*argstart)) {
2990					argstart++;
2991				}
2992			}
2993
2994			/* Error-check, regardless of whether this is the last interpreter arg or not */
2995			if (error)
2996				goto bad;
2997			if (imgp->ip_argspace < new_ptr_size) {
2998				error = E2BIG;
2999				goto bad;
3000			}
3001			imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3002			imgp->ip_argc++;
3003		}
3004
3005		if (argv != 0LL) {
3006			/*
3007			 * If we are running an interpreter, replace the av[0] that was
3008			 * passed to execve() with the path name that was
3009			 * passed to execve() for interpreters which do not use the PATH
3010			 * to locate their script arguments.
3011			 */
3012			error = copyinptr(argv, &arg, ptr_size);
3013			if (error)
3014				goto bad;
3015			if (arg != 0LL) {
3016				argv += ptr_size; /* consume without using */
3017			}
3018		}
3019
3020		if (imgp->ip_interp_sugid_fd != -1) {
3021			char temp[19]; /* "/dev/fd/" + 10 digits + NUL */
3022			snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd);
3023			error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE);
3024		} else {
3025			error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE);
3026		}
3027
3028		if (error)
3029			goto bad;
3030		if (imgp->ip_argspace < new_ptr_size) {
3031			error = E2BIG;
3032			goto bad;
3033		}
3034		imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3035		imgp->ip_argc++;
3036	}
3037
3038	while (argv != 0LL) {
3039		user_addr_t	arg;
3040
3041		error = copyinptr(argv, &arg, ptr_size);
3042		if (error)
3043			goto bad;
3044
3045		if (arg == 0LL) {
3046			break;
3047		}
3048
3049		argv += ptr_size;
3050
3051		/*
3052		* av[n...] = arg[n]
3053		*/
3054		error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE);
3055		if (error)
3056			goto bad;
3057		if (imgp->ip_argspace < new_ptr_size) {
3058			error = E2BIG;
3059			goto bad;
3060		}
3061		imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
3062		imgp->ip_argc++;
3063	}
3064
3065	/* Save space for argv[] NULL terminator */
3066	if (imgp->ip_argspace < new_ptr_size) {
3067		error = E2BIG;
3068		goto bad;
3069	}
3070	imgp->ip_argspace -= new_ptr_size;
3071
3072	/* Note where the args ends and env begins. */
3073	imgp->ip_endargv = imgp->ip_strendp;
3074	imgp->ip_envc = 0;
3075
3076	/* Now, get the environment */
3077	while (envv != 0LL) {
3078		user_addr_t	env;
3079
3080		error = copyinptr(envv, &env, ptr_size);
3081		if (error)
3082			goto bad;
3083
3084		envv += ptr_size;
3085		if (env == 0LL) {
3086			break;
3087		}
3088		/*
3089		* av[n...] = env[n]
3090		*/
3091		error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE);
3092		if (error)
3093			goto bad;
3094		if (imgp->ip_argspace < new_ptr_size) {
3095			error = E2BIG;
3096			goto bad;
3097		}
3098		imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */
3099		imgp->ip_envc++;
3100	}
3101
3102	/* Save space for envv[] NULL terminator */
3103	if (imgp->ip_argspace < new_ptr_size) {
3104		error = E2BIG;
3105		goto bad;
3106	}
3107	imgp->ip_argspace -= new_ptr_size;
3108
3109	/* Align the tail of the combined argv+envv area */
3110	while (imgp->ip_strspace % new_ptr_size != 0) {
3111		if (imgp->ip_argspace < 1) {
3112			error = E2BIG;
3113			goto bad;
3114		}
3115		*imgp->ip_strendp++ = '\0';
3116		imgp->ip_strspace--;
3117		imgp->ip_argspace--;
3118	}
3119
3120	/* Note where the envv ends and applev begins. */
3121	imgp->ip_endenvv = imgp->ip_strendp;
3122
3123	/*
3124	 * From now on, we are no longer charging argument
3125	 * space to ip_argspace.
3126	 */
3127
3128bad:
3129	return error;
3130}
3131
3132static char *
3133random_hex_str(char *str, int len, boolean_t embedNUL)
3134{
3135	uint64_t low, high, value;
3136	int idx;
3137	char digit;
3138
3139	/* A 64-bit value will only take 16 characters, plus '0x' and NULL. */
3140	if (len > 19)
3141		len = 19;
3142
3143	/* We need enough room for at least 1 digit */
3144	if (len < 4)
3145		return (NULL);
3146
3147	low = random();
3148	high = random();
3149	value = high << 32 | low;
3150
3151	if (embedNUL) {
3152		/*
3153		 * Zero a byte to protect against C string vulnerabilities
3154		 * e.g. for userland __stack_chk_guard.
3155		 */
3156		value &= ~(0xffull << 8);
3157	}
3158
3159	str[0] = '0';
3160	str[1] = 'x';
3161	for (idx = 2; idx < len - 1; idx++) {
3162		digit = value & 0xf;
3163		value = value >> 4;
3164		if (digit < 10)
3165			str[idx] = '0' + digit;
3166		else
3167			str[idx] = 'a' + (digit - 10);
3168	}
3169	str[idx] = '\0';
3170	return (str);
3171}
3172
3173/*
3174 * Libc has an 8-element array set up for stack guard values.  It only fills
3175 * in one of those entries, and both gcc and llvm seem to use only a single
3176 * 8-byte guard.  Until somebody needs more than an 8-byte guard value, don't
3177 * do the work to construct them.
3178 */
3179#define	GUARD_VALUES 1
3180#define	GUARD_KEY "stack_guard="
3181
3182/*
3183 * System malloc needs some entropy when it is initialized.
3184 */
3185#define	ENTROPY_VALUES 2
3186#define ENTROPY_KEY "malloc_entropy="
3187
3188/*
3189 * System malloc engages nanozone for UIAPP.
3190 */
3191#define NANO_ENGAGE_KEY "MallocNanoZone=1"
3192
3193#define PFZ_KEY "pfz="
3194extern user32_addr_t commpage_text32_location;
3195extern user64_addr_t commpage_text64_location;
3196/*
3197 * Build up the contents of the apple[] string vector
3198 */
3199static int
3200exec_add_apple_strings(struct image_params *imgp)
3201{
3202	int i, error;
3203	int new_ptr_size=4;
3204	char guard[19];
3205	char guard_vec[strlen(GUARD_KEY) + 19 * GUARD_VALUES + 1];
3206
3207	char entropy[19];
3208	char entropy_vec[strlen(ENTROPY_KEY) + 19 * ENTROPY_VALUES + 1];
3209
3210	char pfz_string[strlen(PFZ_KEY) + 16 + 4 +1];
3211
3212	if( imgp->ip_flags & IMGPF_IS_64BIT) {
3213		new_ptr_size = 8;
3214		snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%llx",commpage_text64_location);
3215	} else {
3216		snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%x",commpage_text32_location);
3217	}
3218
3219	/* exec_save_path stored the first string */
3220	imgp->ip_applec = 1;
3221
3222	/* adding the pfz string */
3223	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(pfz_string),UIO_SYSSPACE,FALSE);
3224	if(error)
3225		goto bad;
3226	imgp->ip_applec++;
3227
3228	/* adding the NANO_ENGAGE_KEY key */
3229	if (imgp->ip_px_sa) {
3230	    int proc_type = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_apptype) & POSIX_SPAWN_PROC_TYPE_MASK;
3231
3232	    if (proc_type == POSIX_SPAWN_PROC_TYPE_APP_DEFAULT || proc_type == POSIX_SPAWN_PROC_TYPE_APP_TAL) {
3233		char uiapp_string[strlen(NANO_ENGAGE_KEY) + 1];
3234
3235		snprintf(uiapp_string, sizeof(uiapp_string), NANO_ENGAGE_KEY);
3236		error = exec_add_user_string(imgp, CAST_USER_ADDR_T(uiapp_string),UIO_SYSSPACE,FALSE);
3237		if(error)
3238			goto bad;
3239		imgp->ip_applec++;
3240	    }
3241	}
3242
3243	/*
3244	 * Supply libc with a collection of random values to use when
3245	 * implementing -fstack-protector.
3246	 *
3247	 * (The first random string always contains an embedded NUL so that
3248	 * __stack_chk_guard also protects against C string vulnerabilities)
3249	 */
3250	(void)strlcpy(guard_vec, GUARD_KEY, sizeof (guard_vec));
3251	for (i = 0; i < GUARD_VALUES; i++) {
3252		random_hex_str(guard, sizeof (guard), i == 0);
3253		if (i)
3254			(void)strlcat(guard_vec, ",", sizeof (guard_vec));
3255		(void)strlcat(guard_vec, guard, sizeof (guard_vec));
3256	}
3257
3258	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(guard_vec), UIO_SYSSPACE, FALSE);
3259	if (error)
3260		goto bad;
3261	imgp->ip_applec++;
3262
3263	/*
3264	 * Supply libc with entropy for system malloc.
3265	 */
3266	(void)strlcpy(entropy_vec, ENTROPY_KEY, sizeof(entropy_vec));
3267	for (i = 0; i < ENTROPY_VALUES; i++) {
3268		random_hex_str(entropy, sizeof (entropy), FALSE);
3269		if (i)
3270			(void)strlcat(entropy_vec, ",", sizeof (entropy_vec));
3271		(void)strlcat(entropy_vec, entropy, sizeof (entropy_vec));
3272	}
3273
3274	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(entropy_vec), UIO_SYSSPACE, FALSE);
3275	if (error)
3276		goto bad;
3277	imgp->ip_applec++;
3278
3279	/* Align the tail of the combined applev area */
3280	while (imgp->ip_strspace % new_ptr_size != 0) {
3281		*imgp->ip_strendp++ = '\0';
3282		imgp->ip_strspace--;
3283	}
3284
3285bad:
3286	return error;
3287}
3288
3289#define	unix_stack_size(p)	(p->p_rlimit[RLIMIT_STACK].rlim_cur)
3290
3291/*
3292 * exec_check_permissions
3293 *
3294 * Description:	Verify that the file that is being attempted to be executed
3295 *		is in fact allowed to be executed based on it POSIX file
3296 *		permissions and other access control criteria
3297 *
3298 * Parameters:	struct image_params *	the image parameter block
3299 *
3300 * Returns:	0			Success
3301 *		EACCES			Permission denied
3302 *		ENOEXEC			Executable file format error
3303 *		ETXTBSY			Text file busy [misuse of error code]
3304 *	vnode_getattr:???
3305 *	vnode_authorize:???
3306 */
3307static int
3308exec_check_permissions(struct image_params *imgp)
3309{
3310	struct vnode *vp = imgp->ip_vp;
3311	struct vnode_attr *vap = imgp->ip_vattr;
3312	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
3313	int error;
3314	kauth_action_t action;
3315
3316	/* Only allow execution of regular files */
3317	if (!vnode_isreg(vp))
3318		return (EACCES);
3319
3320	/* Get the file attributes that we will be using here and elsewhere */
3321	VATTR_INIT(vap);
3322	VATTR_WANTED(vap, va_uid);
3323	VATTR_WANTED(vap, va_gid);
3324	VATTR_WANTED(vap, va_mode);
3325	VATTR_WANTED(vap, va_fsid);
3326	VATTR_WANTED(vap, va_fileid);
3327	VATTR_WANTED(vap, va_data_size);
3328	if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0)
3329		return (error);
3330
3331	/*
3332	 * Ensure that at least one execute bit is on - otherwise root
3333	 * will always succeed, and we don't want to happen unless the
3334	 * file really is executable.
3335	 */
3336	if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0))
3337		return (EACCES);
3338
3339	/* Disallow zero length files */
3340	if (vap->va_data_size == 0)
3341		return (ENOEXEC);
3342
3343	imgp->ip_arch_offset = (user_size_t)0;
3344	imgp->ip_arch_size = vap->va_data_size;
3345
3346	/* Disable setuid-ness for traced programs or if MNT_NOSUID */
3347	if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED))
3348		vap->va_mode &= ~(VSUID | VSGID);
3349
3350	/*
3351	 * Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR
3352	 * flags for setuid/setgid binaries.
3353	 */
3354	if (vap->va_mode & (VSUID | VSGID))
3355		imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC | IMGPF_DISABLE_ASLR);
3356
3357#if CONFIG_MACF
3358	error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
3359	if (error)
3360		return (error);
3361#endif
3362
3363  	/* Check for execute permission */
3364 	action = KAUTH_VNODE_EXECUTE;
3365  	/* Traced images must also be readable */
3366 	if (p->p_lflag & P_LTRACED)
3367 		action |= KAUTH_VNODE_READ_DATA;
3368 	if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0)
3369		return (error);
3370
3371#if 0
3372	/* Don't let it run if anyone had it open for writing */
3373	vnode_lock(vp);
3374	if (vp->v_writecount) {
3375		panic("going to return ETXTBSY %x", vp);
3376		vnode_unlock(vp);
3377		return (ETXTBSY);
3378	}
3379	vnode_unlock(vp);
3380#endif
3381
3382
3383	/* XXX May want to indicate to underlying FS that vnode is open */
3384
3385	return (error);
3386}
3387
3388
3389/*
3390 * exec_handle_sugid
3391 *
3392 * Initially clear the P_SUGID in the process flags; if an SUGID process is
3393 * exec'ing a non-SUGID image, then  this is the point of no return.
3394 *
3395 * If the image being activated is SUGID, then replace the credential with a
3396 * copy, disable tracing (unless the tracing process is root), reset the
3397 * mach task port to revoke it, set the P_SUGID bit,
3398 *
3399 * If the saved user and group ID will be changing, then make sure it happens
3400 * to a new credential, rather than a shared one.
3401 *
3402 * Set the security token (this is probably obsolete, given that the token
3403 * should not technically be separate from the credential itself).
3404 *
3405 * Parameters:	struct image_params *	the image parameter block
3406 *
3407 * Returns:	void			No failure indication
3408 *
3409 * Implicit returns:
3410 *		<process credential>	Potentially modified/replaced
3411 *		<task port>		Potentially revoked
3412 *		<process flags>		P_SUGID bit potentially modified
3413 *		<security token>	Potentially modified
3414 */
3415static int
3416exec_handle_sugid(struct image_params *imgp)
3417{
3418	kauth_cred_t		cred = vfs_context_ucred(imgp->ip_vfs_context);
3419	proc_t			p = vfs_context_proc(imgp->ip_vfs_context);
3420	int			i;
3421	int			leave_sugid_clear = 0;
3422	int			error = 0;
3423#if CONFIG_MACF
3424	int			mac_transition;
3425
3426	/*
3427	 * Determine whether a call to update the MAC label will result in the
3428	 * credential changing.
3429	 *
3430	 * Note:	MAC policies which do not actually end up modifying
3431	 *		the label subsequently are strongly encouraged to
3432	 *		return 0 for this check, since a non-zero answer will
3433	 *		slow down the exec fast path for normal binaries.
3434	 */
3435	mac_transition = mac_cred_check_label_update_execve(
3436							imgp->ip_vfs_context,
3437							imgp->ip_vp,
3438							imgp->ip_scriptvp,
3439							imgp->ip_scriptlabelp,
3440							imgp->ip_execlabelp,
3441							p,
3442							imgp->ip_px_smpx);
3443#endif
3444
3445	OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag);
3446
3447	/*
3448	 * Order of the following is important; group checks must go last,
3449	 * as we use the success of the 'ismember' check combined with the
3450	 * failure of the explicit match to indicate that we will be setting
3451	 * the egid of the process even though the new process did not
3452	 * require VSUID/VSGID bits in order for it to set the new group as
3453	 * its egid.
3454	 *
3455	 * Note:	Technically, by this we are implying a call to
3456	 *		setegid() in the new process, rather than implying
3457	 *		it used its VSGID bit to set the effective group,
3458	 *		even though there is no code in that process to make
3459	 *		such a call.
3460	 */
3461	if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
3462	     kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
3463	    ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
3464		 ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
3465		 (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
3466
3467#if CONFIG_MACF
3468/* label for MAC transition and neither VSUID nor VSGID */
3469handle_mac_transition:
3470#endif
3471
3472		/*
3473		 * Replace the credential with a copy of itself if euid or
3474		 * egid change.
3475		 *
3476		 * Note:	setuid binaries will automatically opt out of
3477		 *		group resolver participation as a side effect
3478		 *		of this operation.  This is an intentional
3479		 *		part of the security model, which requires a
3480		 *		participating credential be established by
3481		 *		escalating privilege, setting up all other
3482		 *		aspects of the credential including whether
3483		 *		or not to participate in external group
3484		 *		membership resolution, then dropping their
3485		 *		effective privilege to that of the desired
3486		 *		final credential state.
3487		 */
3488		if (imgp->ip_origvattr->va_mode & VSUID) {
3489			p->p_ucred  = kauth_cred_setresuid(p->p_ucred, KAUTH_UID_NONE, imgp->ip_origvattr->va_uid, imgp->ip_origvattr->va_uid, KAUTH_UID_NONE);
3490			/* update cred on proc */
3491			PROC_UPDATE_CREDS_ONPROC(p);
3492		}
3493		if (imgp->ip_origvattr->va_mode & VSGID) {
3494			p->p_ucred = kauth_cred_setresgid(p->p_ucred, KAUTH_GID_NONE, imgp->ip_origvattr->va_gid, imgp->ip_origvattr->va_gid);
3495			/* update cred on proc */
3496			PROC_UPDATE_CREDS_ONPROC(p);
3497		}
3498
3499#if CONFIG_MACF
3500		/*
3501		 * If a policy has indicated that it will transition the label,
3502		 * before making the call into the MAC policies, get a new
3503		 * duplicate credential, so they can modify it without
3504		 * modifying any others sharing it.
3505		 */
3506		if (mac_transition) {
3507			kauth_cred_t	my_cred;
3508			if (kauth_proc_label_update_execve(p,
3509						imgp->ip_vfs_context,
3510						imgp->ip_vp,
3511						imgp->ip_scriptvp,
3512						imgp->ip_scriptlabelp,
3513						imgp->ip_execlabelp,
3514						imgp->ip_px_smpx)) {
3515				/*
3516				 * If updating the MAC label resulted in a
3517				 * disjoint credential, flag that we need to
3518				 * set the P_SUGID bit.  This protects
3519				 * against debuggers being attached by an
3520				 * insufficiently privileged process onto the
3521				 * result of a transition to a more privileged
3522				 * credential.
3523				 */
3524				leave_sugid_clear = 0;
3525			}
3526
3527			my_cred = kauth_cred_proc_ref(p);
3528			mac_task_label_update_cred(my_cred, p->task);
3529			kauth_cred_unref(&my_cred);
3530		}
3531#endif	/* CONFIG_MACF */
3532
3533		/*
3534		 * If 'leave_sugid_clear' is non-zero, then we passed the
3535		 * VSUID and MACF checks, and successfully determined that
3536		 * the previous cred was a member of the VSGID group, but
3537		 * that it was not the default at the time of the execve,
3538		 * and that the post-labelling credential was not disjoint.
3539		 * So we don't set the P_SUGID or reset mach ports and fds
3540		 * on the basis of simply running this code.
3541		 */
3542		if (!leave_sugid_clear) {
3543			/*
3544		 	 * Have mach reset the task and thread ports.
3545		 	 * We don't want anyone who had the ports before
3546		 	 * a setuid exec to be able to access/control the
3547		 	 * task/thread after.
3548		 	 */
3549			ipc_task_reset(p->task);
3550			ipc_thread_reset((imgp->ip_new_thread != NULL) ?
3551				 	 imgp->ip_new_thread : current_thread());
3552
3553			/*
3554			 * Flag the process as setuid.
3555			 */
3556			OSBitOrAtomic(P_SUGID, &p->p_flag);
3557
3558			/*
3559			 * Radar 2261856; setuid security hole fix
3560			 * XXX For setuid processes, attempt to ensure that
3561			 * stdin, stdout, and stderr are already allocated.
3562			 * We do not want userland to accidentally allocate
3563			 * descriptors in this range which has implied meaning
3564			 * to libc.
3565			 */
3566			for (i = 0; i < 3; i++) {
3567
3568				if (p->p_fd->fd_ofiles[i] != NULL)
3569					continue;
3570
3571				/*
3572				 * Do the kernel equivalent of
3573				 *
3574				 * 	if i == 0
3575				 * 		(void) open("/dev/null", O_RDONLY);
3576				 * 	else
3577				 * 		(void) open("/dev/null", O_WRONLY);
3578				 */
3579
3580				struct fileproc *fp;
3581				int indx;
3582				int flag;
3583
3584				if (i == 0)
3585					flag = FREAD;
3586				else
3587					flag = FWRITE;
3588
3589				if ((error = falloc(p,
3590				    &fp, &indx, imgp->ip_vfs_context)) != 0)
3591					continue;
3592
3593				struct nameidata nd1;
3594
3595				NDINIT(&nd1, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
3596				    CAST_USER_ADDR_T("/dev/null"),
3597				    imgp->ip_vfs_context);
3598
3599				if ((error = vn_open(&nd1, flag, 0)) != 0) {
3600					fp_free(p, indx, fp);
3601					break;
3602				}
3603
3604				struct fileglob *fg = fp->f_fglob;
3605
3606				fg->fg_flag = flag;
3607				fg->fg_ops = &vnops;
3608				fg->fg_data = nd1.ni_vp;
3609
3610				vnode_put(nd1.ni_vp);
3611
3612				proc_fdlock(p);
3613				procfdtbl_releasefd(p, indx, NULL);
3614				fp_drop(p, indx, fp, 1);
3615				proc_fdunlock(p);
3616			}
3617		}
3618	}
3619#if CONFIG_MACF
3620	else {
3621		/*
3622		 * We are here because we were told that the MAC label will
3623		 * be transitioned, and the binary is not VSUID or VSGID; to
3624		 * deal with this case, we could either duplicate a lot of
3625		 * code, or we can indicate we want to default the P_SUGID
3626		 * bit clear and jump back up.
3627		 */
3628		if (mac_transition) {
3629			leave_sugid_clear = 1;
3630			goto handle_mac_transition;
3631		}
3632	}
3633#endif	/* CONFIG_MACF */
3634
3635	/*
3636	 * Implement the semantic where the effective user and group become
3637	 * the saved user and group in exec'ed programs.
3638	 */
3639	p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred),  kauth_cred_getgid(p->p_ucred));
3640	/* update cred on proc */
3641	PROC_UPDATE_CREDS_ONPROC(p);
3642
3643	/* Update the process' identity version and set the security token */
3644	p->p_idversion++;
3645	set_security_token(p);
3646
3647	return(error);
3648}
3649
3650
3651/*
3652 * create_unix_stack
3653 *
3654 * Description:	Set the user stack address for the process to the provided
3655 *		address.  If a custom stack was not set as a result of the
3656 *		load process (i.e. as specified by the image file for the
3657 *		executable), then allocate the stack in the provided map and
3658 *		set up appropriate guard pages for enforcing administrative
3659 *		limits on stack growth, if they end up being needed.
3660 *
3661 * Parameters:	p			Process to set stack on
3662 *		load_result		Information from mach-o load commands
3663 *		map			Address map in which to allocate the new stack
3664 *
3665 * Returns:	KERN_SUCCESS		Stack successfully created
3666 *		!KERN_SUCCESS		Mach failure code
3667 */
3668static kern_return_t
3669create_unix_stack(vm_map_t map, load_result_t* load_result,
3670			proc_t p)
3671{
3672	mach_vm_size_t		size, prot_size;
3673	mach_vm_offset_t	addr, prot_addr;
3674	kern_return_t		kr;
3675
3676	mach_vm_address_t	user_stack = load_result->user_stack;
3677
3678	proc_lock(p);
3679	p->user_stack = user_stack;
3680	proc_unlock(p);
3681
3682	if (!load_result->prog_allocated_stack) {
3683		/*
3684		 * Allocate enough space for the maximum stack size we
3685		 * will ever authorize and an extra page to act as
3686		 * a guard page for stack overflows. For default stacks,
3687		 * vm_initial_limit_stack takes care of the extra guard page.
3688		 * Otherwise we must allocate it ourselves.
3689		 */
3690
3691		size = mach_vm_round_page(load_result->user_stack_size);
3692		if (load_result->prog_stack_size)
3693			size += PAGE_SIZE;
3694		addr = mach_vm_trunc_page(load_result->user_stack - size);
3695		kr = mach_vm_allocate(map, &addr, size,
3696					VM_MAKE_TAG(VM_MEMORY_STACK) |
3697					VM_FLAGS_FIXED);
3698		if (kr != KERN_SUCCESS) {
3699			/* If can't allocate at default location, try anywhere */
3700			addr = 0;
3701			kr = mach_vm_allocate(map, &addr, size,
3702								  VM_MAKE_TAG(VM_MEMORY_STACK) |
3703								  VM_FLAGS_ANYWHERE);
3704			if (kr != KERN_SUCCESS)
3705				return kr;
3706
3707			user_stack = addr + size;
3708			load_result->user_stack = user_stack;
3709
3710			proc_lock(p);
3711			p->user_stack = user_stack;
3712			proc_unlock(p);
3713		}
3714
3715		/*
3716		 * And prevent access to what's above the current stack
3717		 * size limit for this process.
3718		 */
3719		prot_addr = addr;
3720		if (load_result->prog_stack_size)
3721			prot_size = PAGE_SIZE;
3722		else
3723			prot_size = mach_vm_trunc_page(size - unix_stack_size(p));
3724		kr = mach_vm_protect(map,
3725							 prot_addr,
3726							 prot_size,
3727							 FALSE,
3728							 VM_PROT_NONE);
3729		if (kr != KERN_SUCCESS) {
3730			(void) mach_vm_deallocate(map, addr, size);
3731			return kr;
3732		}
3733	}
3734
3735	return KERN_SUCCESS;
3736}
3737
3738#include <sys/reboot.h>
3739
3740static char		init_program_name[128] = "/sbin/launchd";
3741
3742struct execve_args	init_exec_args;
3743
3744/*
3745 * load_init_program
3746 *
3747 * Description:	Load the "init" program; in most cases, this will be "launchd"
3748 *
3749 * Parameters:	p			Process to call execve() to create
3750 *					the "init" program
3751 *
3752 * Returns:	(void)
3753 *
3754 * Notes:	The process that is passed in is the first manufactured
3755 *		process on the system, and gets here via bsd_ast() firing
3756 *		for the first time.  This is done to ensure that bsd_init()
3757 *		has run to completion.
3758 */
3759void
3760load_init_program(proc_t p)
3761{
3762	vm_offset_t	init_addr;
3763	int		argc = 0;
3764	uint32_t argv[3];
3765	int			error;
3766	int 		retval[2];
3767
3768	/*
3769	 * Copy out program name.
3770	 */
3771
3772	init_addr = VM_MIN_ADDRESS;
3773	(void) vm_allocate(current_map(), &init_addr, PAGE_SIZE,
3774				VM_FLAGS_ANYWHERE);
3775	if (init_addr == 0)
3776		init_addr++;
3777
3778	(void) copyout((caddr_t) init_program_name, CAST_USER_ADDR_T(init_addr),
3779			(unsigned) sizeof(init_program_name)+1);
3780
3781	argv[argc++] = (uint32_t)init_addr;
3782	init_addr += sizeof(init_program_name);
3783	init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
3784
3785	/*
3786	 * Put out first (and only) argument, similarly.
3787	 * Assumes everything fits in a page as allocated
3788	 * above.
3789	 */
3790	if (boothowto & RB_SINGLE) {
3791		const char *init_args = "-s";
3792
3793		copyout(init_args, CAST_USER_ADDR_T(init_addr),
3794			strlen(init_args));
3795
3796		argv[argc++] = (uint32_t)init_addr;
3797		init_addr += strlen(init_args);
3798		init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
3799
3800	}
3801
3802	/*
3803	 * Null-end the argument list
3804	 */
3805	argv[argc] = 0;
3806
3807	/*
3808	 * Copy out the argument list.
3809	 */
3810
3811	(void) copyout((caddr_t) argv, CAST_USER_ADDR_T(init_addr),
3812			(unsigned) sizeof(argv));
3813
3814	/*
3815	 * Set up argument block for fake call to execve.
3816	 */
3817
3818	init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
3819	init_exec_args.argp = CAST_USER_ADDR_T((char **)init_addr);
3820	init_exec_args.envp = CAST_USER_ADDR_T(0);
3821
3822	/*
3823	 * So that mach_init task is set with uid,gid 0 token
3824	 */
3825	set_security_token(p);
3826
3827	error = execve(p,&init_exec_args,retval);
3828	if (error)
3829		panic("Process 1 exec of %s failed, errno %d",
3830		      init_program_name, error);
3831}
3832
3833/*
3834 * load_return_to_errno
3835 *
3836 * Description:	Convert a load_return_t (Mach error) to an errno (BSD error)
3837 *
3838 * Parameters:	lrtn			Mach error number
3839 *
3840 * Returns:	(int)			BSD error number
3841 *		0			Success
3842 *		EBADARCH		Bad architecture
3843 *		EBADMACHO		Bad Mach object file
3844 *		ESHLIBVERS		Bad shared library version
3845 *		ENOMEM			Out of memory/resource shortage
3846 *		EACCES			Access denied
3847 *		ENOENT			Entry not found (usually "file does
3848 *					does not exist")
3849 *		EIO			An I/O error occurred
3850 *		EBADEXEC		The executable is corrupt/unknown
3851 */
3852static int
3853load_return_to_errno(load_return_t lrtn)
3854{
3855	switch (lrtn) {
3856	case LOAD_SUCCESS:
3857		return 0;
3858	case LOAD_BADARCH:
3859		return EBADARCH;
3860	case LOAD_BADMACHO:
3861		return EBADMACHO;
3862	case LOAD_SHLIB:
3863		return ESHLIBVERS;
3864	case LOAD_NOSPACE:
3865	case LOAD_RESOURCE:
3866		return ENOMEM;
3867	case LOAD_PROTECT:
3868		return EACCES;
3869	case LOAD_ENOENT:
3870		return ENOENT;
3871	case LOAD_IOERROR:
3872		return EIO;
3873	case LOAD_FAILURE:
3874	case LOAD_DECRYPTFAIL:
3875	default:
3876		return EBADEXEC;
3877	}
3878}
3879
3880#include <mach/mach_types.h>
3881#include <mach/vm_prot.h>
3882#include <mach/semaphore.h>
3883#include <mach/sync_policy.h>
3884#include <kern/clock.h>
3885#include <mach/kern_return.h>
3886
3887/*
3888 * execargs_alloc
3889 *
3890 * Description:	Allocate the block of memory used by the execve arguments.
3891 *		At the same time, we allocate a page so that we can read in
3892 *		the first page of the image.
3893 *
3894 * Parameters:	struct image_params *	the image parameter block
3895 *
3896 * Returns:	0			Success
3897 *		EINVAL			Invalid argument
3898 *		EACCES			Permission denied
3899 *		EINTR			Interrupted function
3900 *		ENOMEM			Not enough space
3901 *
3902 * Notes:	This is a temporary allocation into the kernel address space
3903 *		to enable us to copy arguments in from user space.  This is
3904 *		necessitated by not mapping the process calling execve() into
3905 *		the kernel address space during the execve() system call.
3906 *
3907 *		We assemble the argument and environment, etc., into this
3908 *		region before copying it as a single block into the child
3909 *		process address space (at the top or bottom of the stack,
3910 *		depending on which way the stack grows; see the function
3911 *		exec_copyout_strings() for details).
3912 *
3913 *		This ends up with a second (possibly unnecessary) copy compared
3914 *		with assembing the data directly into the child address space,
3915 *		instead, but since we cannot be guaranteed that the parent has
3916 *		not modified its environment, we can't really know that it's
3917 *		really a block there as well.
3918 */
3919
3920
3921static int execargs_waiters = 0;
3922lck_mtx_t *execargs_cache_lock;
3923
3924static void
3925execargs_lock_lock(void) {
3926	lck_mtx_lock_spin(execargs_cache_lock);
3927}
3928
3929static void
3930execargs_lock_unlock(void) {
3931	lck_mtx_unlock(execargs_cache_lock);
3932}
3933
3934static wait_result_t
3935execargs_lock_sleep(void) {
3936	return(lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE));
3937}
3938
3939static kern_return_t
3940execargs_purgeable_allocate(char **execarg_address) {
3941	kern_return_t kr = vm_allocate(bsd_pageable_map, (vm_offset_t *)execarg_address, BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE);
3942	assert(kr == KERN_SUCCESS);
3943	return kr;
3944}
3945
3946static kern_return_t
3947execargs_purgeable_reference(void *execarg_address) {
3948	int state = VM_PURGABLE_NONVOLATILE;
3949	kern_return_t kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
3950
3951	assert(kr == KERN_SUCCESS);
3952	return kr;
3953}
3954
3955static kern_return_t
3956execargs_purgeable_volatilize(void *execarg_address) {
3957	int state = VM_PURGABLE_VOLATILE | VM_PURGABLE_ORDERING_OBSOLETE;
3958	kern_return_t kr;
3959	kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state);
3960
3961	assert(kr == KERN_SUCCESS);
3962
3963	return kr;
3964}
3965
3966static void
3967execargs_wakeup_waiters(void) {
3968	thread_wakeup(&execargs_free_count);
3969}
3970
3971static int
3972execargs_alloc(struct image_params *imgp)
3973{
3974	kern_return_t kret;
3975	wait_result_t res;
3976	int i, cache_index = -1;
3977
3978	execargs_lock_lock();
3979
3980	while (execargs_free_count == 0) {
3981		execargs_waiters++;
3982		res = execargs_lock_sleep();
3983		execargs_waiters--;
3984		if (res != THREAD_AWAKENED) {
3985			execargs_lock_unlock();
3986			return (EINTR);
3987		}
3988	}
3989
3990	execargs_free_count--;
3991
3992	for (i = 0; i < execargs_cache_size; i++) {
3993		vm_offset_t element = execargs_cache[i];
3994		if (element) {
3995			cache_index = i;
3996			imgp->ip_strings = (char *)(execargs_cache[i]);
3997			execargs_cache[i] = 0;
3998			break;
3999		}
4000	}
4001
4002	assert(execargs_free_count >= 0);
4003
4004	execargs_lock_unlock();
4005
4006	if (cache_index == -1) {
4007		kret = execargs_purgeable_allocate(&imgp->ip_strings);
4008	}
4009	else
4010		kret = execargs_purgeable_reference(imgp->ip_strings);
4011
4012	assert(kret == KERN_SUCCESS);
4013	if (kret != KERN_SUCCESS) {
4014		return (ENOMEM);
4015	}
4016
4017	/* last page used to read in file headers */
4018	imgp->ip_vdata = imgp->ip_strings + ( NCARGS + PAGE_SIZE );
4019	imgp->ip_strendp = imgp->ip_strings;
4020	imgp->ip_argspace = NCARGS;
4021	imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
4022
4023	return (0);
4024}
4025
4026/*
4027 * execargs_free
4028 *
4029 * Description:	Free the block of memory used by the execve arguments and the
4030 *		first page of the executable by a previous call to the function
4031 *		execargs_alloc().
4032 *
4033 * Parameters:	struct image_params *	the image parameter block
4034 *
4035 * Returns:	0			Success
4036 *		EINVAL			Invalid argument
4037 *		EINTR			Oeration interrupted
4038 */
4039static int
4040execargs_free(struct image_params *imgp)
4041{
4042	kern_return_t kret;
4043	int i;
4044	boolean_t needs_wakeup = FALSE;
4045
4046	kret = execargs_purgeable_volatilize(imgp->ip_strings);
4047
4048	execargs_lock_lock();
4049	execargs_free_count++;
4050
4051	for (i = 0; i < execargs_cache_size; i++) {
4052		vm_offset_t element = execargs_cache[i];
4053		if (element == 0) {
4054			execargs_cache[i] = (vm_offset_t) imgp->ip_strings;
4055			imgp->ip_strings = NULL;
4056			break;
4057		}
4058	}
4059
4060	assert(imgp->ip_strings == NULL);
4061
4062	if (execargs_waiters > 0)
4063		needs_wakeup = TRUE;
4064
4065	execargs_lock_unlock();
4066
4067	if (needs_wakeup == TRUE)
4068		execargs_wakeup_waiters();
4069
4070	return ((kret == KERN_SUCCESS ? 0 : EINVAL));
4071}
4072
4073static void
4074exec_resettextvp(proc_t p, struct image_params *imgp)
4075{
4076	vnode_t vp;
4077	off_t offset;
4078	vnode_t tvp  = p->p_textvp;
4079	int ret;
4080
4081	vp = imgp->ip_vp;
4082	offset = imgp->ip_arch_offset;
4083
4084	if (vp == NULLVP)
4085		panic("exec_resettextvp: expected valid vp");
4086
4087	ret = vnode_ref(vp);
4088	proc_lock(p);
4089	if (ret == 0) {
4090		p->p_textvp = vp;
4091		p->p_textoff = offset;
4092	} else {
4093		p->p_textvp = NULLVP;	/* this is paranoia */
4094		p->p_textoff = 0;
4095	}
4096	proc_unlock(p);
4097
4098	if ( tvp != NULLVP) {
4099		if (vnode_getwithref(tvp) == 0) {
4100			vnode_rele(tvp);
4101			vnode_put(tvp);
4102		}
4103	}
4104
4105}
4106
4107/*
4108 * If the process is not signed or if it contains entitlements, we
4109 * need to communicate through the task_access_port to taskgated.
4110 *
4111 * taskgated will provide a detached code signature if present, and
4112 * will enforce any restrictions on entitlements.
4113 */
4114
4115static boolean_t
4116taskgated_required(proc_t p, boolean_t *require_success)
4117{
4118	size_t length;
4119	void *blob;
4120	int error;
4121
4122	if ((p->p_csflags & CS_VALID) == 0) {
4123		*require_success = FALSE;
4124		return TRUE;
4125	}
4126
4127	error = cs_entitlements_blob_get(p, &blob, &length);
4128	if (error == 0 && blob != NULL) {
4129		*require_success = TRUE; /* fatal on the desktop when entitlements are present */
4130		return TRUE;
4131	}
4132
4133	*require_success = FALSE;
4134	return 0;
4135}
4136
4137
4138static int
4139check_for_signature(proc_t p, struct image_params *imgp)
4140{
4141	mach_port_t port = NULL;
4142	kern_return_t kr = KERN_FAILURE;
4143	int error = EACCES;
4144	boolean_t unexpected_failure = FALSE;
4145	unsigned char hash[SHA1_RESULTLEN];
4146	boolean_t require_success = FALSE;
4147
4148	/*
4149	 * Override inherited code signing flags with the
4150	 * ones for the process that is being successfully
4151	 * loaded
4152	 */
4153	proc_lock(p);
4154	p->p_csflags = imgp->ip_csflags;
4155	proc_unlock(p);
4156
4157	/* Set the switch_protect flag on the map */
4158	if(p->p_csflags & (CS_HARD|CS_KILL)) {
4159		vm_map_switch_protect(get_task_map(p->task), TRUE);
4160	}
4161
4162	/* check if callout to taskgated is needed */
4163	if (!taskgated_required(p, &require_success)) {
4164		error = 0;
4165		goto done;
4166	}
4167
4168	kr = task_get_task_access_port(p->task, &port);
4169	if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) {
4170		error = 0;
4171		if (require_success)
4172			error = EACCES;
4173		goto done;
4174	}
4175
4176	/*
4177	 * taskgated returns KERN_SUCCESS if it has completed its work
4178	 * and the exec should continue, KERN_FAILURE if the exec should
4179	 * fail, or it may error out with different error code in an
4180	 * event of mig failure (e.g. process was signalled during the
4181	 * rpc call, taskgated died, mig server died etc.).
4182	 */
4183
4184	kr = find_code_signature(port, p->p_pid);
4185	switch (kr) {
4186	case KERN_SUCCESS:
4187		error = 0;
4188		break;
4189	case KERN_FAILURE:
4190		error = EACCES;
4191		goto done;
4192	default:
4193		error = EACCES;
4194		unexpected_failure = TRUE;
4195		goto done;
4196	}
4197
4198	/* Only do this if exec_resettextvp() did not fail */
4199	if (p->p_textvp != NULLVP) {
4200		/*
4201		 * If there's a new code directory, mark this process
4202		 * as signed.
4203		 */
4204		if (0 == ubc_cs_getcdhash(p->p_textvp, p->p_textoff, hash)) {
4205			proc_lock(p);
4206			p->p_csflags |= CS_VALID;
4207			proc_unlock(p);
4208		}
4209	}
4210
4211done:
4212	if (0 != error) {
4213		if (!unexpected_failure)
4214			p->p_csflags |= CS_KILLED;
4215		/* make very sure execution fails */
4216		psignal(p, SIGKILL);
4217	}
4218	return error;
4219}
4220
4221/*
4222 * Typically as soon as we start executing this process, the
4223 * first instruction will trigger a VM fault to bring the text
4224 * pages (as executable) into the address space, followed soon
4225 * thereafter by dyld data structures (for dynamic executable).
4226 * To optimize this, as well as improve support for hardware
4227 * debuggers that can only access resident pages present
4228 * in the process' page tables, we prefault some pages if
4229 * possible. Errors are non-fatal.
4230 */
4231static void exec_prefault_data(proc_t p __unused, struct image_params *imgp, load_result_t *load_result)
4232{
4233	int ret;
4234	size_t expected_all_image_infos_size;
4235
4236	/*
4237	 * Prefault executable or dyld entry point.
4238	 */
4239	vm_fault(current_map(),
4240		 vm_map_trunc_page(load_result->entry_point,
4241				   vm_map_page_mask(current_map())),
4242		 VM_PROT_READ | VM_PROT_EXECUTE,
4243		 FALSE,
4244		 THREAD_UNINT, NULL, 0);
4245
4246	if (imgp->ip_flags & IMGPF_IS_64BIT) {
4247		expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos);
4248	} else {
4249		expected_all_image_infos_size = sizeof(struct user32_dyld_all_image_infos);
4250	}
4251
4252	/* Decode dyld anchor structure from <mach-o/dyld_images.h> */
4253	if (load_result->dynlinker &&
4254		load_result->all_image_info_addr &&
4255		load_result->all_image_info_size >= expected_all_image_infos_size) {
4256		union {
4257			struct user64_dyld_all_image_infos	infos64;
4258			struct user32_dyld_all_image_infos	infos32;
4259		} all_image_infos;
4260
4261		/*
4262		 * Pre-fault to avoid copyin() going through the trap handler
4263		 * and recovery path.
4264		 */
4265		vm_fault(current_map(),
4266			 vm_map_trunc_page(load_result->all_image_info_addr,
4267					   vm_map_page_mask(current_map())),
4268			 VM_PROT_READ | VM_PROT_WRITE,
4269			 FALSE,
4270			 THREAD_UNINT, NULL, 0);
4271		if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) {
4272			/* all_image_infos straddles a page */
4273			vm_fault(current_map(),
4274				 vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1,
4275						   vm_map_page_mask(current_map())),
4276				 VM_PROT_READ | VM_PROT_WRITE,
4277				 FALSE,
4278				 THREAD_UNINT, NULL, 0);
4279		}
4280
4281		ret = copyin(load_result->all_image_info_addr,
4282					 &all_image_infos,
4283					 expected_all_image_infos_size);
4284		if (ret == 0 && all_image_infos.infos32.version >= 9) {
4285
4286			user_addr_t notification_address;
4287			user_addr_t dyld_image_address;
4288			user_addr_t dyld_version_address;
4289			user_addr_t dyld_all_image_infos_address;
4290			user_addr_t dyld_slide_amount;
4291
4292			if (imgp->ip_flags & IMGPF_IS_64BIT) {
4293				notification_address = all_image_infos.infos64.notification;
4294				dyld_image_address = all_image_infos.infos64.dyldImageLoadAddress;
4295				dyld_version_address = all_image_infos.infos64.dyldVersion;
4296				dyld_all_image_infos_address = all_image_infos.infos64.dyldAllImageInfosAddress;
4297			} else {
4298				notification_address = all_image_infos.infos32.notification;
4299				dyld_image_address = all_image_infos.infos32.dyldImageLoadAddress;
4300				dyld_version_address = all_image_infos.infos32.dyldVersion;
4301				dyld_all_image_infos_address = all_image_infos.infos32.dyldAllImageInfosAddress;
4302			}
4303
4304			/*
4305			 * dyld statically sets up the all_image_infos in its Mach-O
4306			 * binary at static link time, with pointers relative to its default
4307			 * load address. Since ASLR might slide dyld before its first
4308			 * instruction is executed, "dyld_slide_amount" tells us how far
4309			 * dyld was loaded compared to its default expected load address.
4310			 * All other pointers into dyld's image should be adjusted by this
4311			 * amount. At some point later, dyld will fix up pointers to take
4312			 * into account the slide, at which point the all_image_infos_address
4313			 * field in the structure will match the runtime load address, and
4314			 * "dyld_slide_amount" will be 0, if we were to consult it again.
4315			 */
4316
4317			dyld_slide_amount = load_result->all_image_info_addr - dyld_all_image_infos_address;
4318
4319#if 0
4320			kprintf("exec_prefault: 0x%016llx 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
4321					(uint64_t)load_result->all_image_info_addr,
4322					all_image_infos.infos32.version,
4323					(uint64_t)notification_address,
4324					(uint64_t)dyld_image_address,
4325					(uint64_t)dyld_version_address,
4326					(uint64_t)dyld_all_image_infos_address);
4327#endif
4328
4329			vm_fault(current_map(),
4330				 vm_map_trunc_page(notification_address + dyld_slide_amount,
4331						   vm_map_page_mask(current_map())),
4332				 VM_PROT_READ | VM_PROT_EXECUTE,
4333				 FALSE,
4334				 THREAD_UNINT, NULL, 0);
4335			vm_fault(current_map(),
4336				 vm_map_trunc_page(dyld_image_address + dyld_slide_amount,
4337						   vm_map_page_mask(current_map())),
4338				 VM_PROT_READ | VM_PROT_EXECUTE,
4339				 FALSE,
4340				 THREAD_UNINT, NULL, 0);
4341			vm_fault(current_map(),
4342				 vm_map_trunc_page(dyld_version_address + dyld_slide_amount,
4343						   vm_map_page_mask(current_map())),
4344				 VM_PROT_READ,
4345				 FALSE,
4346				 THREAD_UNINT, NULL, 0);
4347			vm_fault(current_map(),
4348				 vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount,
4349						   vm_map_page_mask(current_map())),
4350				 VM_PROT_READ | VM_PROT_WRITE,
4351				 FALSE,
4352				 THREAD_UNINT, NULL, 0);
4353		}
4354	}
4355}
4356