1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Mach Operating System
31 * Copyright (c) 1987 Carnegie-Mellon University
32 * All rights reserved.  The CMU software License Agreement specifies
33 * the terms and conditions for use and redistribution.
34 */
35
36#include <cputypes.h>
37
38/*-
39 * Copyright (c) 1982, 1986, 1991, 1993
40 *	The Regents of the University of California.  All rights reserved.
41 * (c) UNIX System Laboratories, Inc.
42 * All or some portions of this file are derived from material licensed
43 * to the University of California by American Telephone and Telegraph
44 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
45 * the permission of UNIX System Laboratories, Inc.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 *    notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 *    notice, this list of conditions and the following disclaimer in the
54 *    documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 *    must display the following acknowledgement:
57 *	This product includes software developed by the University of
58 *	California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 *    may be used to endorse or promote products derived from this software
61 *    without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 *	from: @(#)kern_exec.c	8.1 (Berkeley) 6/10/93
76 */
77/*
78 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
79 * support for mandatory and extensible security protections.  This notice
80 * is included in support of clause 2.2 (b) of the Apple Public License,
81 * Version 2.0.
82 */
83#include <machine/reg.h>
84
85#include <sys/param.h>
86#include <sys/systm.h>
87#include <sys/filedesc.h>
88#include <sys/kernel.h>
89#include <sys/proc_internal.h>
90#include <sys/kauth.h>
91#include <sys/user.h>
92#include <sys/socketvar.h>
93#include <sys/malloc.h>
94#include <sys/namei.h>
95#include <sys/mount_internal.h>
96#include <sys/vnode_internal.h>
97#include <sys/file_internal.h>
98#include <sys/stat.h>
99#include <sys/uio_internal.h>
100#include <sys/acct.h>
101#include <sys/exec.h>
102#include <sys/kdebug.h>
103#include <sys/signal.h>
104#include <sys/aio_kern.h>
105#include <sys/sysproto.h>
106#if SYSV_SHM
107#include <sys/shm_internal.h>		/* shmexec() */
108#endif
109#include <sys/ubc_internal.h>		/* ubc_map() */
110#include <sys/spawn.h>
111#include <sys/spawn_internal.h>
112#include <sys/codesign.h>
113
114#include <bsm/audit_kernel.h>
115
116#include <ipc/ipc_types.h>
117
118#include <mach/mach_types.h>
119#include <mach/task.h>
120#include <mach/thread_act.h>
121#include <mach/vm_map.h>
122#include <mach/mach_vm.h>
123#include <mach/vm_param.h>
124
125#if CONFIG_MACF
126#include <security/mac.h>
127#include <security/mac_mach_internal.h>
128#endif
129
130#include <vm/vm_map.h>
131#include <vm/vm_kern.h>
132#include <vm/vm_protos.h>
133#include <vm/vm_kern.h>
134
135#if CONFIG_DTRACE
136/* Do not include dtrace.h, it redefines kmem_[alloc/free] */
137extern void (*dtrace_fasttrap_exec_ptr)(proc_t);
138extern void (*dtrace_helpers_cleanup)(proc_t);
139extern void dtrace_lazy_dofs_destroy(proc_t);
140
141#include <sys/dtrace_ptss.h>
142#endif
143
144/* support for child creation in exec after vfork */
145thread_t fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit);
146void vfork_exit(proc_t p, int rv);
147int setsigvec(proc_t, int, struct __user_sigaction *);
148
149/*
150 * Mach things for which prototypes are unavailable from Mach headers
151 */
152void		ipc_task_reset(
153			task_t		task);
154void		ipc_thread_reset(
155			thread_t	thread);
156kern_return_t ipc_object_copyin(
157	ipc_space_t		space,
158	mach_port_name_t	name,
159	mach_msg_type_name_t	msgt_name,
160	ipc_object_t		*objectp);
161void ipc_port_release_send(ipc_port_t);
162
163extern struct savearea *get_user_regs(thread_t);
164
165
166#include <kern/thread.h>
167#include <kern/task.h>
168#include <kern/ast.h>
169#include <kern/mach_loader.h>
170#include <mach-o/fat.h>
171#include <mach-o/loader.h>
172#include <machine/vmparam.h>
173#include <sys/imgact.h>
174
175#include <sys/sdt.h>
176
177
178/*
179 * SIZE_MAXPTR		The maximum size of a user space pointer, in bytes
180 * SIZE_IMG_STRSPACE	The available string space, minus two pointers; we
181 *			define it interms of the maximum, since we don't
182 *			know the pointer size going in, until after we've
183 *			parsed the executable image.
184 */
185#define	SIZE_MAXPTR		8				/* 64 bits */
186#define	SIZE_IMG_STRSPACE	(NCARGS - 2 * SIZE_MAXPTR)
187
188/*
189 * EAI_ITERLIMIT	The maximum number of times to iterate an image
190 *			activator in exec_activate_image() before treating
191 *			it as malformed/corrupt.
192 */
193#define EAI_ITERLIMIT		10
194
195extern vm_map_t bsd_pageable_map;
196extern struct fileops vnops;
197
198#define	ROUND_PTR(type, addr)	\
199	(type *)( ( (unsigned)(addr) + 16 - 1) \
200		  & ~(16 - 1) )
201
202struct image_params;	/* Forward */
203static int exec_activate_image(struct image_params *imgp);
204static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
205static int load_return_to_errno(load_return_t lrtn);
206static int execargs_alloc(struct image_params *imgp);
207static int execargs_free(struct image_params *imgp);
208static int exec_check_permissions(struct image_params *imgp);
209static int exec_extract_strings(struct image_params *imgp);
210static int exec_handle_sugid(struct image_params *imgp);
211static int sugid_scripts = 0;
212SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW, &sugid_scripts, 0, "");
213static kern_return_t create_unix_stack(vm_map_t map, user_addr_t user_stack,
214					int customstack, proc_t p);
215static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
216static void exec_resettextvp(proc_t, struct image_params *);
217
218/* We don't want this one exported */
219__private_extern__
220int  open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, register_t *);
221
222/*
223 * exec_add_string
224 *
225 * Add the requested string to the string space area.
226 *
227 * Parameters;	struct image_params *		image parameter block
228 *		user_addr_t			string to add to strings area
229 *
230 * Returns:	0			Success
231 *		!0			Failure errno from copyinstr()
232 *
233 * Implicit returns:
234 *		(imgp->ip_strendp)	updated location of next add, if any
235 *		(imgp->ip_strspace)	updated byte count of space remaining
236 */
237static int
238exec_add_string(struct image_params *imgp, user_addr_t str)
239{
240        int error = 0;
241
242        do {
243                size_t len = 0;
244		if (imgp->ip_strspace <= 0) {
245			error = E2BIG;
246			break;
247		}
248		if (IS_UIO_SYS_SPACE(imgp->ip_seg)) {
249			char *kstr = CAST_DOWN(char *,str);	/* SAFE */
250			error = copystr(kstr, imgp->ip_strendp, imgp->ip_strspace, &len);
251		} else  {
252			error = copyinstr(str, imgp->ip_strendp, imgp->ip_strspace,
253			    &len);
254		}
255		imgp->ip_strendp += len;
256		imgp->ip_strspace -= len;
257	} while (error == ENAMETOOLONG);
258
259	return error;
260}
261
262/*
263 * exec_save_path
264 *
265 * To support new app package launching for Mac OS X, the dyld needs the
266 * first argument to execve() stored on the user stack.
267 *
268 * Save the executable path name at the top of the strings area and set
269 * the argument vector pointer to the location following that to indicate
270 * the start of the argument and environment tuples, setting the remaining
271 * string space count to the size of the string area minus the path length
272 * and a reserve for two pointers.
273 *
274 * Parameters;	struct image_params *		image parameter block
275 *		char *				path used to invoke program
276 *		int				segment from which path comes
277 *
278 * Returns:	int			0	Success
279 *		EFAULT				Bad address
280 *	copy[in]str:EFAULT			Bad address
281 *	copy[in]str:ENAMETOOLONG		Filename too long
282 *
283 * Implicit returns:
284 *		(imgp->ip_strings)		saved path
285 *		(imgp->ip_strspace)		space remaining in ip_strings
286 *		(imgp->ip_argv)			beginning of argument list
287 *		(imgp->ip_strendp)		start of remaining copy area
288 *
289 * Note:	We have to do this before the initial namei() since in the
290 *		path contains symbolic links, namei() will overwrite the
291 *		original path buffer contents.  If the last symbolic link
292 *		resolved was a relative pathname, we would lose the original
293 *		"path", which could be an absolute pathname. This might be
294 *		unacceptable for dyld.
295 */
296static int
297exec_save_path(struct image_params *imgp, user_addr_t path, int seg)
298{
299	int error;
300	size_t	len;
301	char *kpath = CAST_DOWN(char *,path);	/* SAFE */
302
303	imgp->ip_strendp = imgp->ip_strings;
304	imgp->ip_strspace = SIZE_IMG_STRSPACE;
305
306	len = MIN(MAXPATHLEN, imgp->ip_strspace);
307
308	switch(seg) {
309	case UIO_USERSPACE32:
310	case UIO_USERSPACE64:	/* Same for copyin()... */
311		error = copyinstr(path, imgp->ip_strings, len, &len);
312		break;
313	case UIO_SYSSPACE32:
314		error = copystr(kpath, imgp->ip_strings, len, &len);
315		break;
316	default:
317		error = EFAULT;
318		break;
319	}
320
321	if (!error) {
322		imgp->ip_strendp += len;
323		imgp->ip_strspace -= len;
324		imgp->ip_argv = imgp->ip_strendp;
325	}
326
327	return(error);
328}
329
330#ifdef IMGPF_POWERPC
331/*
332 * exec_powerpc32_imgact
333 *
334 * Implicitly invoke the PowerPC handler for a byte-swapped image magic
335 * number.  This may happen either as a result of an attempt to invoke a
336 * PowerPC image directly, or indirectly as the interpreter used in an
337 * interpreter script.
338 *
339 * Parameters;	struct image_params *	image parameter block
340 *
341 * Returns:	-1		not an PowerPC image (keep looking)
342 *		-3		Success: exec_archhandler_ppc: relookup
343 *		>0		Failure: exec_archhandler_ppc: error number
344 *
345 * Note:	This image activator does not handle the case of a direct
346 *		invocation of the exec_archhandler_ppc, since in that case, the
347 *		exec_archhandler_ppc itself is not a PowerPC binary; instead,
348 *		binary image activators must recognize the exec_archhandler_ppc;
349 *		This is managed in exec_check_permissions().
350 *
351 * Note:	This image activator is limited to 32 bit powerpc images;
352 *		if support for 64 bit powerpc images is desired, it would
353 *		be more in line with this design to write a separate 64 bit
354 *		image activator.
355 */
356static int
357exec_powerpc32_imgact(struct image_params *imgp)
358{
359	struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
360	int error;
361	size_t len = 0;
362
363	/*
364	 * Make sure it's a PowerPC binary.  If we've already redirected
365	 * from an interpreted file once, don't do it again.
366	 */
367	if (mach_header->magic != MH_CIGAM) {
368		/*
369		 * If it's a cross-architecture 64 bit binary, then claim
370		 * it, but refuse to run it.
371		 */
372		if (mach_header->magic == MH_CIGAM_64)
373			return (EBADARCH);
374		return (-1);
375	}
376
377	/* If there is no exec_archhandler_ppc, we can't run it */
378	if (exec_archhandler_ppc.path[0] == 0)
379		return (EBADARCH);
380
381	/* Remember the type of the original file for later grading */
382	if (!imgp->ip_origcputype) {
383		imgp->ip_origcputype =
384			OSSwapBigToHostInt32(mach_header->cputype);
385		imgp->ip_origcpusubtype =
386			OSSwapBigToHostInt32(mach_header->cpusubtype);
387	}
388
389	/*
390	 * The PowerPC flag will be set by the exec_check_permissions()
391	 * call anyway; however, we set this flag here so that the relookup
392	 * in execve() does not follow symbolic links, as a side effect.
393	 */
394	imgp->ip_flags |= IMGPF_POWERPC;
395
396	/* impute an interpreter */
397	error = copystr(exec_archhandler_ppc.path, imgp->ip_interp_name,
398			IMG_SHSIZE, &len);
399	if (error)
400		return (error);
401
402	/*
403	 * provide a replacement string for p->p_comm; we have to use an
404	 * an alternate buffer for this, rather than replacing it directly,
405	 * since the exec may fail and return to the parent.  In that case,
406	 * we would have erroneously changed the parent p->p_comm instead.
407	 */
408	strlcpy(imgp->ip_p_comm, imgp->ip_ndp->ni_cnd.cn_nameptr, MAXCOMLEN);
409
410	return (-3);
411}
412#endif	/* IMGPF_POWERPC */
413
414
415/*
416 * exec_shell_imgact
417 *
418 * Image activator for interpreter scripts.  If the image begins with the
419 * characters "#!", then it is an interpreter script.  Verify that we are
420 * not already executing in PowerPC mode, and that the length of the script
421 * line indicating the interpreter is not in excess of the maximum allowed
422 * size.  If this is the case, then break out the arguments, if any, which
423 * are separated by white space, and copy them into the argument save area
424 * as if they were provided on the command line before all other arguments.
425 * The line ends when we encounter a comment character ('#') or newline.
426 *
427 * Parameters;	struct image_params *	image parameter block
428 *
429 * Returns:	-1			not an interpreter (keep looking)
430 *		-3			Success: interpreter: relookup
431 *		>0			Failure: interpreter: error number
432 *
433 * A return value other than -1 indicates subsequent image activators should
434 * not be given the opportunity to attempt to activate the image.
435 */
436static int
437exec_shell_imgact(struct image_params *imgp)
438{
439	char *vdata = imgp->ip_vdata;
440	char *ihp;
441	char *line_endp;
442	char *interp;
443	char temp[16];
444	proc_t p;
445	struct fileproc *fp;
446	int fd;
447	int error;
448	size_t len;
449
450	/*
451	 * Make sure it's a shell script.  If we've already redirected
452	 * from an interpreted file once, don't do it again.
453	 *
454	 * Note: We disallow PowerPC, since the expectation is that we
455	 * may run a PowerPC interpreter, but not an interpret a PowerPC
456	 * image.  This is consistent with historical behaviour.
457	 */
458	if (vdata[0] != '#' ||
459	    vdata[1] != '!' ||
460	    (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
461		return (-1);
462	}
463
464#ifdef IMGPF_POWERPC
465	if ((imgp->ip_flags & IMGPF_POWERPC) != 0)
466		  return (EBADARCH);
467#endif	/* IMGPF_POWERPC */
468
469	imgp->ip_flags |= IMGPF_INTERPRET;
470
471        /* Check to see if SUGID scripts are permitted.  If they aren't then
472	 * clear the SUGID bits.
473	 * imgp->ip_vattr is known to be valid.
474         */
475        if (sugid_scripts == 0) {
476	   imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
477	}
478
479	/* Find the nominal end of the interpreter line */
480	for( ihp = &vdata[2]; *ihp != '\n' && *ihp != '#'; ihp++) {
481		if (ihp >= &vdata[IMG_SHSIZE])
482			return (ENOEXEC);
483	}
484
485	line_endp = ihp;
486	ihp = &vdata[2];
487	/* Skip over leading spaces - until the interpreter name */
488	while ( ihp < line_endp && ((*ihp == ' ') || (*ihp == '\t')))
489		ihp++;
490
491	/*
492	 * Find the last non-whitespace character before the end of line or
493	 * the beginning of a comment; this is our new end of line.
494	 */
495	for (;line_endp > ihp && ((*line_endp == ' ') || (*line_endp == '\t')); line_endp--)
496		continue;
497
498	/* Empty? */
499	if (line_endp == ihp)
500		return (ENOEXEC);
501
502	/* copy the interpreter name */
503	interp = imgp->ip_interp_name;
504	while ((ihp < line_endp) && (*ihp != ' ') && (*ihp != '\t'))
505		*interp++ = *ihp++;
506	*interp = '\0';
507
508	exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_name),
509							UIO_SYSSPACE32);
510
511	ihp = &vdata[2];
512	while (ihp < line_endp) {
513		/* Skip leading whitespace before each argument */
514		while ((*ihp == ' ') || (*ihp == '\t'))
515			ihp++;
516
517		if (ihp >= line_endp)
518			break;
519
520		/* We have an argument; copy it */
521		while ((ihp < line_endp) && (*ihp != ' ') && (*ihp != '\t')) {
522			*imgp->ip_strendp++ = *ihp++;
523			imgp->ip_strspace--;
524		}
525		*imgp->ip_strendp++ = 0;
526		imgp->ip_strspace--;
527		imgp->ip_argc++;
528	}
529
530	/*
531	 * If we have a SUID oder SGID script, create a file descriptor
532	 * from the vnode and pass /dev/fd/%d instead of the actual
533	 * path name so that the script does not get opened twice
534	 */
535	if (imgp->ip_origvattr->va_mode & (VSUID | VSGID)) {
536		p = vfs_context_proc(imgp->ip_vfs_context);
537		error = falloc(p, &fp, &fd, imgp->ip_vfs_context);
538		if (error)
539			return(error);
540
541		fp->f_fglob->fg_flag = FREAD;
542		fp->f_fglob->fg_type = DTYPE_VNODE;
543		fp->f_fglob->fg_ops = &vnops;
544		fp->f_fglob->fg_data = (caddr_t)imgp->ip_vp;
545
546		proc_fdlock(p);
547		procfdtbl_releasefd(p, fd, NULL);
548		fp_drop(p, fd, fp, 1);
549		proc_fdunlock(p);
550		vnode_ref(imgp->ip_vp);
551
552		snprintf(temp, sizeof(temp), "/dev/fd/%d", fd);
553		error = copyoutstr(temp, imgp->ip_user_fname, sizeof(temp), &len);
554		if (error)
555			return(error);
556	}
557
558	return (-3);
559}
560
561
562
563/*
564 * exec_fat_imgact
565 *
566 * Image activator for fat 1.0 binaries.  If the binary is fat, then we
567 * need to select an image from it internally, and make that the image
568 * we are going to attempt to execute.  At present, this consists of
569 * reloading the first page for the image with a first page from the
570 * offset location indicated by the fat header.
571 *
572 * Parameters;	struct image_params *	image parameter block
573 *
574 * Returns:	-1			not a fat binary (keep looking)
575 *		-2			Success: encapsulated binary: reread
576 *		>0			Failure: error number
577 *
578 * Important:	This image activator is byte order neutral.
579 *
580 * Note:	A return value other than -1 indicates subsequent image
581 *		activators should not be given the opportunity to attempt
582 *		to activate the image.
583 *
584 * 		If we find an encapsulated binary, we make no assertions
585 *		about its  validity; instead, we leave that up to a rescan
586 *		for an activator to claim it, and, if it is claimed by one,
587 *		that activator is responsible for determining validity.
588 */
589static int
590exec_fat_imgact(struct image_params *imgp)
591{
592	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
593	kauth_cred_t cred = kauth_cred_proc_ref(p);
594	struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
595	struct _posix_spawnattr *psa = NULL;
596	struct fat_arch fat_arch;
597	int resid, error;
598	load_return_t lret;
599
600	/* Make sure it's a fat binary */
601	if ((fat_header->magic != FAT_MAGIC) &&
602            (fat_header->magic != FAT_CIGAM)) {
603	    	error = -1;
604		goto bad;
605	}
606
607	/* If posix_spawn binprefs exist, respect those prefs. */
608	psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
609	if (psa != NULL && psa->psa_binprefs[0] != 0) {
610		struct fat_arch *arches = (struct fat_arch *) (fat_header + 1);
611		int nfat_arch = 0, pr = 0, f = 0;
612
613		nfat_arch = OSSwapBigToHostInt32(fat_header->nfat_arch);
614		/* Check each preference listed against all arches in header */
615		for (pr = 0; pr < NBINPREFS; pr++) {
616			cpu_type_t pref = psa->psa_binprefs[pr];
617			if (pref == 0) {
618				/* No suitable arch in the pref list */
619				error = EBADARCH;
620				goto bad;
621			}
622
623			if (pref == CPU_TYPE_ANY) {
624				/* Fall through to regular grading */
625				break;
626			}
627
628			for (f = 0; f < nfat_arch; f++) {
629				cpu_type_t archtype = OSSwapBigToHostInt32(
630						arches[f].cputype);
631				cpu_type_t archsubtype = OSSwapBigToHostInt32(
632						arches[f].cpusubtype) & ~CPU_SUBTYPE_MASK;
633				if (pref == archtype &&
634					grade_binary(archtype, archsubtype)) {
635					/* We have a winner! */
636					fat_arch.cputype = archtype;
637					fat_arch.cpusubtype = archsubtype;
638					fat_arch.offset = OSSwapBigToHostInt32(
639							arches[f].offset);
640					fat_arch.size = OSSwapBigToHostInt32(
641							arches[f].size);
642					fat_arch.align = OSSwapBigToHostInt32(
643							arches[f].align);
644					goto use_arch;
645				}
646			}
647		}
648	}
649
650	/* Look up our preferred architecture in the fat file. */
651	lret = fatfile_getarch_affinity(imgp->ip_vp,
652					(vm_offset_t)fat_header,
653					&fat_arch,
654					(p->p_flag & P_AFFINITY));
655	if (lret != LOAD_SUCCESS) {
656		error = load_return_to_errno(lret);
657		goto bad;
658	}
659
660use_arch:
661	/* Read the Mach-O header out of fat_arch */
662	error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
663			PAGE_SIZE, fat_arch.offset,
664			UIO_SYSSPACE32, (IO_UNIT|IO_NODELOCKED),
665			cred, &resid, p);
666	if (error) {
667		goto bad;
668	}
669
670	/* Did we read a complete header? */
671	if (resid) {
672		error = EBADEXEC;
673		goto bad;
674	}
675
676	/* Success.  Indicate we have identified an encapsulated binary */
677	error = -2;
678	imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
679	imgp->ip_arch_size = (user_size_t)fat_arch.size;
680
681bad:
682	kauth_cred_unref(&cred);
683	return (error);
684}
685
686/*
687 * exec_mach_imgact
688 *
689 * Image activator for mach-o 1.0 binaries.
690 *
691 * Parameters;	struct image_params *	image parameter block
692 *
693 * Returns:	-1			not a fat binary (keep looking)
694 *		-2			Success: encapsulated binary: reread
695 *		>0			Failure: error number
696 *		EBADARCH		Mach-o binary, but with an unrecognized
697 *					architecture
698 *		ENOMEM			No memory for child process after -
699 *					can only happen after vfork()
700 *
701 * Important:	This image activator is NOT byte order neutral.
702 *
703 * Note:	A return value other than -1 indicates subsequent image
704 *		activators should not be given the opportunity to attempt
705 *		to activate the image.
706 *
707 * TODO:	More gracefully handle failures after vfork
708 */
709static int
710exec_mach_imgact(struct image_params *imgp)
711{
712	struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
713	proc_t			p = vfs_context_proc(imgp->ip_vfs_context);
714	int			error = 0;
715	int			vfexec = 0;
716	task_t			task;
717	task_t			new_task = NULL; /* protected by vfexec */
718	thread_t		thread;
719	struct uthread		*uthread;
720	vm_map_t old_map = VM_MAP_NULL;
721	vm_map_t map;
722	load_return_t		lret;
723	load_result_t		load_result;
724	struct _posix_spawnattr *psa = NULL;
725
726	/*
727	 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
728	 * is a reserved field on the end, so for the most part, we can
729	 * treat them as if they were identical.
730	 */
731	if ((mach_header->magic != MH_MAGIC) &&
732	    (mach_header->magic != MH_MAGIC_64)) {
733		error = -1;
734		goto bad;
735	}
736
737	switch (mach_header->filetype) {
738	case MH_DYLIB:
739	case MH_BUNDLE:
740		error = -1;
741		goto bad;
742	}
743
744	if (!imgp->ip_origcputype) {
745		imgp->ip_origcputype = mach_header->cputype;
746		imgp->ip_origcpusubtype = mach_header->cpusubtype;
747	}
748
749	task = current_task();
750	thread = current_thread();
751	uthread = get_bsdthread_info(thread);
752
753	if (uthread->uu_flag & UT_VFORK)
754		vfexec = 1;	 /* Mark in exec */
755
756	if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
757		imgp->ip_flags |= IMGPF_IS_64BIT;
758
759	/* If posix_spawn binprefs exist, respect those prefs. */
760	psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
761	if (psa != NULL && psa->psa_binprefs[0] != 0) {
762		int pr = 0;
763		for (pr = 0; pr < NBINPREFS; pr++) {
764			cpu_type_t pref = psa->psa_binprefs[pr];
765			if (pref == 0) {
766				/* No suitable arch in the pref list */
767				error = EBADARCH;
768				goto bad;
769			}
770
771			if (pref == CPU_TYPE_ANY) {
772				/* Jump to regular grading */
773				goto grade;
774			}
775
776			if (pref == imgp->ip_origcputype) {
777				/* We have a match! */
778				goto grade;
779			}
780		}
781		error = EBADARCH;
782		goto bad;
783	}
784grade:
785	if (!grade_binary(imgp->ip_origcputype & ~CPU_SUBTYPE_LIB64,
786				imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) {
787		error = EBADARCH;
788		goto bad;
789	}
790
791	/* Copy in arguments/environment from the old process */
792	error = exec_extract_strings(imgp);
793	if (error)
794		goto bad;
795
796	/*
797	 * Hack for binary compatability; put three NULs on the end of the
798	 * string area, and round it up to the next word boundary.  This
799	 * ensures padding with NULs to the boundary.
800	 */
801	imgp->ip_strendp[0] = 0;
802	imgp->ip_strendp[1] = 0;
803	imgp->ip_strendp[2] = 0;
804	imgp->ip_strendp += (((imgp->ip_strendp - imgp->ip_strings) + NBPW-1) & ~(NBPW-1));
805
806#ifdef IMGPF_POWERPC
807	/*
808	 * XXX
809	 *
810	 * Should be factored out; this is here because we might be getting
811	 * invoked this way as the result of a shell script, and the check
812	 * in exec_check_permissions() is not interior to the jump back up
813	 * to the "encapsulated_binary:" label in exec_activate_image().
814	 */
815	if (imgp->ip_vattr->va_fsid == exec_archhandler_ppc.fsid &&
816		imgp->ip_vattr->va_fileid == (uint64_t)((u_long)exec_archhandler_ppc.fileid)) {
817		imgp->ip_flags |= IMGPF_POWERPC;
818	}
819#endif	/* IMGPF_POWERPC */
820
821	if (vfexec) {
822		imgp->ip_vfork_thread = fork_create_child(task, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT));
823		if (imgp->ip_vfork_thread == NULL) {
824			error = ENOMEM;
825			goto bad;
826		}
827		/* reset local idea of thread, uthread, task */
828		thread = imgp->ip_vfork_thread;
829		uthread = get_bsdthread_info(thread);
830		task = new_task = get_threadtask(thread);
831		map = get_task_map(task);
832	} else {
833		map = VM_MAP_NULL;
834	}
835
836	/*
837	 * We set these flags here; this is OK, since if we fail after
838	 * this point, we have already destroyed the parent process anyway.
839	 */
840	if (imgp->ip_flags & IMGPF_IS_64BIT) {
841		task_set_64bit(task, TRUE);
842		OSBitOrAtomic(P_LP64, (UInt32 *)&p->p_flag);
843	} else {
844		task_set_64bit(task, FALSE);
845		OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&p->p_flag);
846	}
847
848	/*
849	 *	Load the Mach-O file.
850	 */
851
852	/*
853	 * NOTE: An error after this point  indicates we have potentially
854	 * destroyed or overwrote some process state while attempting an
855	 * execve() following a vfork(), which is an unrecoverable condition.
856	 */
857
858	/*
859	 * We reset the task to 64-bit (or not) here.  It may have picked up
860	 * a new map, and we need that to reflect its true 64-bit nature.
861	 */
862
863	task_set_64bit(task,
864		       ((imgp->ip_flags & IMGPF_IS_64BIT) == IMGPF_IS_64BIT));
865
866	/*
867	 * Actually load the image file we previously decided to load.
868	 */
869	lret = load_machfile(imgp, mach_header, thread, map, &load_result);
870
871	if (lret != LOAD_SUCCESS) {
872		error = load_return_to_errno(lret);
873		goto badtoolate;
874	}
875
876	vm_map_set_user_wire_limit(get_task_map(task), p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
877
878	/*
879	 * Set code-signing flags if this binary is signed, or if parent has
880	 * requested them on exec.
881	 */
882	if (load_result.csflags & CS_VALID) {
883		imgp->ip_csflags |= load_result.csflags &
884			(CS_VALID|
885			 CS_HARD|CS_KILL|CS_EXEC_SET_HARD|CS_EXEC_SET_KILL);
886	} else {
887		imgp->ip_csflags &= ~CS_VALID;
888	}
889
890	if (p->p_csflags & CS_EXEC_SET_HARD)
891		imgp->ip_csflags |= CS_HARD;
892	if (p->p_csflags & CS_EXEC_SET_KILL)
893		imgp->ip_csflags |= CS_KILL;
894
895
896	/*
897	 * Set up the system reserved areas in the new address space.
898	 */
899	vm_map_exec(get_task_map(task),
900		    task,
901		    (void *) p->p_fd->fd_rdir,
902#ifdef IMGPF_POWERPC
903		    imgp->ip_flags & IMGPF_POWERPC ?
904		    CPU_TYPE_POWERPC :
905#endif
906		    cpu_type());
907
908	/*
909	 * Close file descriptors
910	 * which specify close-on-exec.
911	 */
912	fdexec(p);
913
914	/*
915	 * deal with set[ug]id.
916	 */
917	error = exec_handle_sugid(imgp);
918
919	if (!vfexec && (p->p_lflag & P_LTRACED))
920		psignal(p, SIGTRAP);
921
922	if (error) {
923		goto badtoolate;
924	}
925
926#if CONFIG_MACF
927	/* Determine if the map will allow VM_PROT_COPY */
928	error = mac_proc_check_map_prot_copy_allow(p);
929	vm_map_set_prot_copy_allow(get_task_map(task),
930				   error ? FALSE : TRUE);
931#endif
932
933	if (load_result.unixproc &&
934		create_unix_stack(get_task_map(task),
935				  load_result.user_stack,
936				  load_result.customstack,
937				  p) != KERN_SUCCESS) {
938		error = load_return_to_errno(LOAD_NOSPACE);
939		goto badtoolate;
940	}
941
942	if (vfexec) {
943		old_map = vm_map_switch(get_task_map(task));
944	}
945
946	if (load_result.unixproc) {
947		user_addr_t	ap;
948
949		/*
950		 * Copy the strings area out into the new process address
951		 * space.
952		 */
953		ap = p->user_stack;
954		error = exec_copyout_strings(imgp, &ap);
955		if (error) {
956			if (vfexec)
957				vm_map_switch(old_map);
958			goto badtoolate;
959		}
960		/* Set the stack */
961		thread_setuserstack(thread, ap);
962	}
963
964	if (load_result.dynlinker) {
965		uint64_t	ap;
966
967		/* Adjust the stack */
968		if (imgp->ip_flags & IMGPF_IS_64BIT) {
969			ap = thread_adjuserstack(thread, -8);
970			error = copyoutptr(load_result.mach_header, ap, 8);
971		} else {
972			ap = thread_adjuserstack(thread, -4);
973			error = suword(ap, load_result.mach_header);
974		}
975		if (error) {
976		        if (vfexec)
977			        vm_map_switch(old_map);
978			goto badtoolate;
979		}
980	}
981
982	if (vfexec) {
983		vm_map_switch(old_map);
984	}
985	/* Set the entry point */
986	thread_setentrypoint(thread, load_result.entry_point);
987
988	/* Stop profiling */
989	stopprofclock(p);
990
991	/*
992	 * Reset signal state.
993	 */
994	execsigs(p, thread);
995
996	/*
997	 * need to cancel async IO requests that can be cancelled and wait for those
998	 * already active.  MAY BLOCK!
999	 */
1000	_aio_exec( p );
1001
1002#if SYSV_SHM
1003	/* FIXME: Till vmspace inherit is fixed: */
1004	if (!vfexec && p->vm_shm)
1005		shmexec(p);
1006#endif
1007#if SYSV_SEM
1008	/* Clean up the semaphores */
1009	semexit(p);
1010#endif
1011
1012	/*
1013	 * Remember file name for accounting.
1014	 */
1015	p->p_acflag &= ~AFORK;
1016	/* If the translated name isn't NULL, then we want to use
1017	 * that translated name as the name we show as the "real" name.
1018	 * Otherwise, use the name passed into exec.
1019	 */
1020	if (0 != imgp->ip_p_comm[0]) {
1021		bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm,
1022			sizeof(p->p_comm));
1023	} else {
1024		if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
1025			imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
1026		bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
1027			(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
1028		p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
1029	}
1030
1031#if CONFIG_DTRACE
1032	/*
1033	 * Invalidate any predicate evaluation already cached for this thread by DTrace.
1034	 * That's because we've just stored to p_comm and DTrace refers to that when it
1035	 * evaluates the "execname" special variable. uid and gid may have changed as well.
1036	 */
1037	dtrace_set_thread_predcache(current_thread(), 0);
1038
1039	/*
1040	 * Free any outstanding lazy dof entries. It is imperative we
1041	 * always call dtrace_lazy_dofs_destroy, rather than null check
1042	 * and call if !NULL. If we NULL test, during lazy dof faulting
1043	 * we can race with the faulting code and proceed from here to
1044	 * beyond the helpers cleanup. The lazy dof faulting will then
1045	 * install new helpers which no longer belong to this process!
1046	 */
1047	dtrace_lazy_dofs_destroy(p);
1048
1049
1050	/*
1051    	 * Clean up any DTrace helpers for the process.
1052    	 */
1053    	if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) {
1054    		(*dtrace_helpers_cleanup)(p);
1055    	}
1056
1057    	/*
1058    	 * Cleanup the DTrace provider associated with this process.
1059    	 */
1060	proc_lock(p);
1061	if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) {
1062    		(*dtrace_fasttrap_exec_ptr)(p);
1063    	}
1064	proc_unlock(p);
1065#endif
1066
1067	if (kdebug_enable) {
1068		long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
1069
1070		/*
1071		 * Collect the pathname for tracing
1072		 */
1073		kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
1074
1075		if (vfexec) {
1076			KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
1077					p->p_pid ,0,0,0, (unsigned int)thread);
1078			KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
1079					dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (unsigned int)thread);
1080		} else {
1081			KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
1082					p->p_pid ,0,0,0,0);
1083			KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
1084					dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
1085		}
1086	}
1087
1088#ifdef IMGPF_POWERPC
1089	/*
1090	 * Mark the process as powerpc or not.  If powerpc, set the affinity
1091	 * flag, which will be used for grading binaries in future exec's
1092	 * from the process.
1093	 */
1094	if (((imgp->ip_flags & IMGPF_POWERPC) != 0))
1095		OSBitOrAtomic(P_TRANSLATED, (UInt32 *)&p->p_flag);
1096	else
1097#endif	/* IMGPF_POWERPC */
1098		OSBitAndAtomic(~((uint32_t)P_TRANSLATED), (UInt32 *)&p->p_flag);
1099	OSBitAndAtomic(~((uint32_t)P_AFFINITY), (UInt32 *)&p->p_flag);
1100
1101	/*
1102	 * If posix_spawned with the START_SUSPENDED flag, stop the
1103	 * process before it runs.
1104	 */
1105	if (imgp->ip_px_sa != NULL) {
1106		psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
1107		if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
1108			proc_lock(p);
1109			p->p_stat = SSTOP;
1110			proc_unlock(p);
1111			(void) task_suspend(p->task);
1112		}
1113	}
1114
1115	/*
1116	 * mark as execed, wakeup the process that vforked (if any) and tell
1117	 * it that it now has it's own resources back
1118	 */
1119	OSBitOrAtomic(P_EXEC, (UInt32 *)&p->p_flag);
1120	if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
1121		proc_lock(p);
1122		p->p_lflag &= ~P_LPPWAIT;
1123		proc_unlock(p);
1124		wakeup((caddr_t)p->p_pptr);
1125	}
1126
1127	if (vfexec && (p->p_lflag & P_LTRACED)) {
1128		psignal_vfork(p, new_task, thread, SIGTRAP);
1129	}
1130
1131badtoolate:
1132	proc_knote(p, NOTE_EXEC);
1133
1134	if (vfexec) {
1135		task_deallocate(new_task);
1136		thread_deallocate(thread);
1137		if (error)
1138			error = 0;
1139	}
1140
1141bad:
1142	return(error);
1143}
1144
1145
1146
1147
1148/*
1149 * Our image activator table; this is the table of the image types we are
1150 * capable of loading.  We list them in order of preference to ensure the
1151 * fastest image load speed.
1152 *
1153 * XXX hardcoded, for now; should use linker sets
1154 */
1155struct execsw {
1156	int (*ex_imgact)(struct image_params *);
1157	const char *ex_name;
1158} execsw[] = {
1159	{ exec_mach_imgact,		"Mach-o Binary" },
1160	{ exec_fat_imgact,		"Fat Binary" },
1161#ifdef IMGPF_POWERPC
1162	{ exec_powerpc32_imgact,	"PowerPC binary" },
1163#endif	/* IMGPF_POWERPC */
1164	{ exec_shell_imgact,		"Interpreter Script" },
1165	{ NULL, NULL}
1166};
1167
1168
1169/*
1170 * exec_activate_image
1171 *
1172 * Description:	Iterate through the available image activators, and activate
1173 *		the image associated with the imgp structure.  We start with
1174 *		the
1175 *
1176 * Parameters:	struct image_params *	Image parameter block
1177 *
1178 * Returns:	0			Success
1179 *		EBADEXEC		The executable is corrupt/unknown
1180 *	execargs_alloc:EINVAL		Invalid argument
1181 *	execargs_alloc:EACCES		Permission denied
1182 *	execargs_alloc:EINTR		Interrupted function
1183 *	execargs_alloc:ENOMEM		Not enough space
1184 *	exec_save_path:EFAULT		Bad address
1185 *	exec_save_path:ENAMETOOLONG	Filename too long
1186 *	exec_check_permissions:EACCES	Permission denied
1187 *	exec_check_permissions:ENOEXEC	Executable file format error
1188 *	exec_check_permissions:ETXTBSY	Text file busy [misuse of error code]
1189 *	exec_check_permissions:???
1190 *	namei:???
1191 *	vn_rdwr:???			[anything vn_rdwr can return]
1192 *	<ex_imgact>:???			[anything an imgact can return]
1193 */
1194static int
1195exec_activate_image(struct image_params *imgp)
1196{
1197	struct nameidata nd;
1198	int error;
1199	int resid;
1200	int once = 1;	/* save SGUID-ness for interpreted files */
1201	int i;
1202	int iterlimit = EAI_ITERLIMIT;
1203	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1204
1205	error = execargs_alloc(imgp);
1206	if (error)
1207		goto bad;
1208
1209	/*
1210	 * XXXAUDIT: Note: the double copyin introduces an audit
1211	 * race.  To correct this race, we must use a single
1212	 * copyin(), e.g. by passing a flag to namei to indicate an
1213	 * external path buffer is being used.
1214	 */
1215	error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg);
1216	if (error) {
1217		goto bad_notrans;
1218	}
1219
1220	DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings);
1221
1222	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
1223		imgp->ip_seg, imgp->ip_user_fname, imgp->ip_vfs_context);
1224
1225again:
1226	error = namei(&nd);
1227	if (error)
1228		goto bad_notrans;
1229	imgp->ip_ndp = &nd;	/* successful namei(); call nameidone() later */
1230	imgp->ip_vp = nd.ni_vp;	/* if set, need to vnode_put() at some point */
1231
1232	proc_transstart(p, 0);
1233
1234	error = exec_check_permissions(imgp);
1235	if (error)
1236		goto bad;
1237
1238	/* Copy; avoid invocation of an interpreter overwriting the original */
1239	if (once) {
1240		once = 0;
1241		*imgp->ip_origvattr = *imgp->ip_vattr;
1242	}
1243
1244	error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
1245			UIO_SYSSPACE32, IO_NODELOCKED,
1246			vfs_context_ucred(imgp->ip_vfs_context),
1247			&resid, vfs_context_proc(imgp->ip_vfs_context));
1248	if (error)
1249		goto bad;
1250
1251encapsulated_binary:
1252	/* Limit the number of iterations we will attempt on each binary */
1253	if (--iterlimit == 0) {
1254		error = EBADEXEC;
1255		goto bad;
1256	}
1257	error = -1;
1258	for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
1259
1260		error = (*execsw[i].ex_imgact)(imgp);
1261
1262		switch (error) {
1263		/* case -1: not claimed: continue */
1264		case -2:		/* Encapsulated binary */
1265			goto encapsulated_binary;
1266
1267		case -3:		/* Interpreter */
1268#if CONFIG_MACF
1269			/*
1270			 * Copy the script label for later use. Note that
1271			 * the label can be different when the script is
1272			 * actually read by the interpreter.
1273			 */
1274			if (imgp->ip_scriptlabelp)
1275				mac_vnode_label_free(imgp->ip_scriptlabelp);
1276			imgp->ip_scriptlabelp = mac_vnode_label_alloc();
1277			if (imgp->ip_scriptlabelp == NULL) {
1278				error = ENOMEM;
1279				break;
1280			}
1281			mac_vnode_label_copy(imgp->ip_vp->v_label,
1282				    imgp->ip_scriptlabelp);
1283#endif
1284			vnode_put(imgp->ip_vp);
1285			imgp->ip_vp = NULL;	/* already put */
1286			nd.ni_cnd.cn_nameiop = LOOKUP;
1287			nd.ni_cnd.cn_flags = (nd.ni_cnd.cn_flags & HASBUF) |
1288						(FOLLOW | LOCKLEAF);
1289
1290#ifdef IMGPF_POWERPC
1291			/*
1292			 * PowerPC does not follow symlinks because the
1293			 * code which sets exec_archhandler_ppc.fsid and
1294			 * exec_archhandler_ppc.fileid doesn't follow them.
1295			 */
1296			if (imgp->ip_flags & IMGPF_POWERPC)
1297				nd.ni_cnd.cn_flags &= ~FOLLOW;
1298#endif	/* IMGPF_POWERPC */
1299
1300			nd.ni_segflg = UIO_SYSSPACE32;
1301			nd.ni_dirp = CAST_USER_ADDR_T(imgp->ip_interp_name);
1302			proc_transend(p, 0);
1303			goto again;
1304
1305		default:
1306			break;
1307		}
1308	}
1309
1310	/*
1311	 * Call out to allow 3rd party notification of exec.
1312	 * Ignore result of kauth_authorize_fileop call.
1313	 */
1314	if (error == 0 && kauth_authorize_fileop_has_listeners()) {
1315		kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
1316					KAUTH_FILEOP_EXEC,
1317					(uintptr_t)nd.ni_vp, 0);
1318	}
1319
1320bad:
1321	proc_transend(p, 0);
1322
1323bad_notrans:
1324	if (imgp->ip_strings)
1325		execargs_free(imgp);
1326	if (imgp->ip_ndp)
1327		nameidone(imgp->ip_ndp);
1328
1329	return (error);
1330}
1331
1332/*
1333 * exec_handle_port_actions
1334 *
1335 * Description:	Go through the _posix_port_actions_t contents,
1336 * 		calling task_set_special_port and task_set_exception_ports
1337 * 		for the current task.
1338 *
1339 * Parameters:	struct image_params *	Image parameter block
1340 *
1341 * Returns:	0			Success
1342 * 		KERN_FAILURE		Failure
1343 */
1344static int
1345exec_handle_port_actions(struct image_params *imgp)
1346{
1347	_posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
1348	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1349	_ps_port_action_t *act = NULL;
1350	task_t task = p->task;
1351	ipc_port_t port = NULL;
1352	kern_return_t ret = KERN_SUCCESS;
1353	int i;
1354
1355	for (i = 0; i < pacts->pspa_count; i++) {
1356		act = &pacts->pspa_actions[i];
1357
1358		ret = ipc_object_copyin(get_task_ipcspace(current_task()),
1359				(mach_port_name_t) act->new_port,
1360				MACH_MSG_TYPE_COPY_SEND,
1361				(ipc_object_t *) &port);
1362
1363		if (ret)
1364			return ret;
1365
1366		switch (act->port_type) {
1367			case PSPA_SPECIAL:
1368				ret = task_set_special_port(task,
1369						act->which,
1370						port);
1371				break;
1372			case PSPA_EXCEPTION:
1373				ret = task_set_exception_ports(task,
1374						act->mask,
1375						port,
1376						act->behavior,
1377						act->flavor);
1378				break;
1379			default:
1380				ret = KERN_FAILURE;
1381		}
1382		/* action failed, so release port resources */
1383		if (ret) {
1384			ipc_port_release_send(port);
1385			return ret;
1386		}
1387	}
1388
1389	return ret;
1390}
1391
1392/*
1393 * exec_handle_file_actions
1394 *
1395 * Description:	Go through the _posix_file_actions_t contents applying the
1396 *		open, close, and dup2 operations to the open file table for
1397 *		the current process.
1398 *
1399 * Parameters:	struct image_params *	Image parameter block
1400 *
1401 * Returns:	0			Success
1402 *		???
1403 *
1404 * Note:	Actions are applied in the order specified, with the credential
1405 *		of the parent process.  This is done to permit the parent
1406 *		process to utilize POSIX_SPAWN_RESETIDS to drop privilege in
1407 *		the child following operations the child may in fact not be
1408 *		normally permitted to perform.
1409 */
1410static int
1411exec_handle_file_actions(struct image_params *imgp)
1412{
1413	int error = 0;
1414	int action;
1415	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
1416	_posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa;
1417	register_t ival[2];		/* dummy retval for system calls) */
1418
1419	for (action = 0; action < px_sfap->psfa_act_count; action++) {
1420		_psfa_action_t *psfa = &px_sfap->psfa_act_acts[ action];
1421
1422		switch(psfa->psfaa_type) {
1423		case PSFA_OPEN: {
1424			/*
1425			 * Open is different, in that it requires the use of
1426			 * a path argument, which is normally copied in from
1427			 * user space; because of this, we have to support an
1428			 * open from kernel space that passes an address space
1429			 * context oof UIO_SYSSPACE, and casts the address
1430			 * argument to a user_addr_t.
1431			 */
1432			struct vnode_attr va;
1433			struct nameidata nd;
1434			int mode = psfa->psfaa_openargs.psfao_mode;
1435			struct dup2_args dup2a;
1436			struct close_nocancel_args ca;
1437			int origfd;
1438
1439			VATTR_INIT(&va);
1440			/* Mask off all but regular access permissions */
1441			mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
1442			VATTR_SET(&va, va_mode, mode & ACCESSPERMS);
1443
1444			NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
1445			       CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
1446			       imgp->ip_vfs_context);
1447
1448			error = open1(imgp->ip_vfs_context,
1449					&nd,
1450					psfa->psfaa_openargs.psfao_oflag,
1451					&va,
1452					ival);
1453
1454			/*
1455			 * If there's an error, or we get the right fd by
1456			 * accident, then drop out here.  This is easier that
1457			 * rearchitecting all the open code to preallocate fd
1458			 * slots, and internally taking one as an argument.
1459			 */
1460			if (error || ival[0] == psfa->psfaa_filedes)
1461				break;
1462
1463			origfd = ival[0];
1464			/*
1465			 * If we didn't fall out from an error, we ended up
1466			 * with the wrong fd; so now we've got to try to dup2
1467			 * it to the right one.
1468			 */
1469			dup2a.from = origfd;
1470			dup2a.to = psfa->psfaa_filedes;
1471
1472			/*
1473			 * The dup2() system call implementation sets
1474			 * ival to newfd in the success case, but we
1475			 * can ignore that, since if we didn't get the
1476			 * fd we wanted, the error will stop us.
1477			 */
1478			error = dup2(p, &dup2a, ival);
1479			if (error)
1480				break;
1481
1482			/*
1483			 * Finally, close the original fd.
1484			 */
1485			ca.fd = origfd;
1486
1487			error = close_nocancel(p, &ca, ival);
1488			}
1489			break;
1490
1491		case PSFA_DUP2: {
1492			struct dup2_args dup2a;
1493
1494			dup2a.from = psfa->psfaa_filedes;
1495			dup2a.to = psfa->psfaa_openargs.psfao_oflag;
1496
1497			/*
1498			 * The dup2() system call implementation sets
1499			 * ival to newfd in the success case, but we
1500			 * can ignore that, since if we didn't get the
1501			 * fd we wanted, the error will stop us.
1502			 */
1503			error = dup2(p, &dup2a, ival);
1504			}
1505			break;
1506
1507		case PSFA_CLOSE: {
1508			struct close_nocancel_args ca;
1509
1510			ca.fd = psfa->psfaa_filedes;
1511
1512			error = close_nocancel(p, &ca, ival);
1513			}
1514			break;
1515
1516		default:
1517			error = EINVAL;
1518			break;
1519		}
1520		/* All file actions failures are considered fatal, per POSIX */
1521		if (error)
1522			break;
1523	}
1524
1525	return (error);
1526}
1527
1528
1529/*
1530 * posix_spawn
1531 *
1532 * Parameters:	uap->pid		Pointer to pid return area
1533 *		uap->fname		File name to exec
1534 *		uap->argp		Argument list
1535 *		uap->envp		Environment list
1536 *
1537 * Returns:	0			Success
1538 *		EINVAL			Invalid argument
1539 *		ENOTSUP			Not supported
1540 *		ENOEXEC			Executable file format error
1541 *	exec_activate_image:EINVAL	Invalid argument
1542 *	exec_activate_image:EACCES	Permission denied
1543 *	exec_activate_image:EINTR	Interrupted function
1544 *	exec_activate_image:ENOMEM	Not enough space
1545 *	exec_activate_image:EFAULT	Bad address
1546 *	exec_activate_image:ENAMETOOLONG	Filename too long
1547 *	exec_activate_image:ENOEXEC	Executable file format error
1548 *	exec_activate_image:ETXTBSY	Text file busy [misuse of error code]
1549 *	exec_activate_image:EBADEXEC	The executable is corrupt/unknown
1550 *	exec_activate_image:???
1551 *	mac_execve_enter:???
1552 *
1553 * TODO:	More gracefully handle failures after vfork
1554 *		Expect to need __mac_posix_spawn() at some point...
1555 *		Handle posix_spawnattr_t
1556 *		Handle posix_spawn_file_actions_t
1557 */
1558int
1559posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval)
1560{
1561	proc_t p = ap;		/* quiet bogus GCC vfork() warning */
1562	user_addr_t pid = uap->pid;
1563	register_t ival[2];		/* dummy retval for vfork() */
1564	struct image_params image_params, *imgp;
1565	struct vnode_attr va;
1566	struct vnode_attr origva;
1567	struct uthread	*uthread = 0;	/* compiler complains if not set to 0*/
1568	int error, sig;
1569	task_t  task;
1570	int numthreads;
1571	char alt_p_comm[sizeof(p->p_comm)] = {0};	/* for PowerPC */
1572	int is_64 = IS_64BIT_PROCESS(p);
1573	int undo_vfork = 0;
1574	struct vfs_context context;
1575	struct user__posix_spawn_args_desc px_args;
1576	struct _posix_spawnattr px_sa;
1577	_posix_spawn_file_actions_t px_sfap = NULL;
1578	_posix_spawn_port_actions_t px_spap = NULL;
1579	struct __user_sigaction vec;
1580
1581	imgp = &image_params;
1582
1583	/* Initialize the common data in the image_params structure */
1584	bzero(imgp, sizeof(*imgp));
1585	imgp->ip_user_fname = uap->path;
1586	imgp->ip_user_argv = uap->argv;
1587	imgp->ip_user_envv = uap->envp;
1588	imgp->ip_vattr = &va;
1589	imgp->ip_origvattr = &origva;
1590	imgp->ip_vfs_context = &context;
1591	imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
1592	imgp->ip_p_comm = alt_p_comm;		/* for PowerPC */
1593	imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
1594
1595	if (uap->adesc != USER_ADDR_NULL) {
1596		if(is_64) {
1597			error = copyin(uap->adesc, &px_args, sizeof(px_args));
1598		} else {
1599			struct _posix_spawn_args_desc px_args32;
1600
1601			error = copyin(uap->adesc, &px_args32, sizeof(px_args32));
1602
1603			/*
1604			 * Convert arguments descriptor from external 32 bit
1605			 * representation to internal 64 bit representation
1606			 */
1607			px_args.attr_size = px_args32.attr_size;
1608			px_args.attrp = CAST_USER_ADDR_T(px_args32.attrp);
1609			px_args.file_actions_size = px_args32.file_actions_size;
1610			px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions);
1611			px_args.port_actions_size = px_args32.port_actions_size;
1612			px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions);
1613		}
1614		if (error)
1615			goto bad;
1616
1617		if (px_args.attr_size != 0) {
1618			/*
1619			 * This could lose some of the port_actions pointer,
1620			 * but we already have it from px_args.
1621			 */
1622			if ((error = copyin(px_args.attrp, &px_sa, sizeof(px_sa))) != 0)
1623			goto bad;
1624
1625			imgp->ip_px_sa = &px_sa;
1626		}
1627		if (px_args.file_actions_size != 0) {
1628			/* Limit file_actions to allowed number of open files */
1629			int maxfa = (p->p_limit ? p->p_rlimit[RLIMIT_NOFILE].rlim_cur : NOFILE);
1630			if (px_args.file_actions_size < PSF_ACTIONS_SIZE(1) ||
1631				px_args.file_actions_size > PSF_ACTIONS_SIZE(maxfa)) {
1632				error = EINVAL;
1633				goto bad;
1634			}
1635			MALLOC(px_sfap, _posix_spawn_file_actions_t, px_args.file_actions_size, M_TEMP, M_WAITOK);
1636			if (px_sfap == NULL) {
1637				error = ENOMEM;
1638				goto bad;
1639			}
1640			imgp->ip_px_sfa = px_sfap;
1641
1642			if ((error = copyin(px_args.file_actions, px_sfap,
1643							px_args.file_actions_size)) != 0)
1644				goto bad;
1645		}
1646		if (px_args.port_actions_size != 0) {
1647			/* Limit port_actions to one page of data */
1648			if (px_args.port_actions_size < PS_PORT_ACTIONS_SIZE(1) ||
1649				px_args.port_actions_size > PAGE_SIZE) {
1650				error = EINVAL;
1651				goto bad;
1652			}
1653
1654			MALLOC(px_spap, _posix_spawn_port_actions_t,
1655					px_args.port_actions_size, M_TEMP, M_WAITOK);
1656			if (px_spap == NULL) {
1657				error = ENOMEM;
1658				goto bad;
1659			}
1660			imgp->ip_px_spa = px_spap;
1661
1662			if ((error = copyin(px_args.port_actions, px_spap,
1663							px_args.port_actions_size)) != 0)
1664				goto bad;
1665		}
1666	}
1667
1668	if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){
1669		if ((error = vfork(p, NULL, ival)) != 0)
1670			goto bad;
1671		undo_vfork = 1;
1672	}
1673
1674	/* "reenter the kernel" on a new vfork()'ed process */
1675	uthread = get_bsdthread_info(current_thread());
1676	if (undo_vfork)
1677		p = uthread->uu_proc;
1678
1679	context.vc_thread = current_thread();
1680	context.vc_ucred = p->p_ucred;	/* XXX must NOT be kauth_cred_get() */
1681
1682	/*
1683	 * Post fdcopy(), pre exec_handle_sugid() - this is where we want
1684	 * to handle the file_actions.  Since vfork() also ends up setting
1685	 * us into the parent process group, and saved off the signal flags,
1686	 * this is also where we want to handle the spawn flags.
1687	 */
1688	/* Has spawn file actions? */
1689	if (imgp->ip_px_sfa != NULL &&
1690	    (error = exec_handle_file_actions(imgp)) != 0) {
1691		goto bad;
1692	}
1693
1694	/* Has spawn port actions? */
1695	if (imgp->ip_px_spa != NULL) {
1696		/* Only allowed when not under vfork */
1697		if (!(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)) {
1698			error = ENOTSUP;
1699			goto bad;
1700		}
1701		if((error = exec_handle_port_actions(imgp)) != 0)
1702			goto bad;
1703	}
1704
1705	/* Has spawn attr? */
1706	if (imgp->ip_px_sa != NULL) {
1707		/* Set the process group ID of the child process */
1708		if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) {
1709			struct setpgid_args spga;
1710			spga.pid = p->p_pid;
1711			spga.pgid = px_sa.psa_pgroup;
1712			/*
1713			 * Effectively, call setpgid() system call; works
1714			 * because there are no pointer arguments.
1715			 */
1716			if((error = setpgid(p, &spga, ival)) != 0)
1717				goto bad;
1718		}
1719		/*
1720		 * Reset UID/GID to parent's RUID/RGID; This works only
1721		 * because the operation occurs *after* the vfork() and
1722		 * before the call to exec_handle_sugid() by the image
1723		 * activator called from exec_activate_image().
1724		 *
1725		 * The use of p_ucred is safe, since we are acting on the
1726		 * new process, and it has no threads other than the one
1727		 * we are creating for it.
1728		 */
1729		if (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
1730			kauth_cred_t my_cred = p->p_ucred;
1731			kauth_cred_t my_new_cred = kauth_cred_setuidgid(my_cred, my_cred->cr_ruid, my_cred->cr_rgid);
1732			if (my_new_cred != my_cred)
1733				p->p_ucred = my_new_cred;
1734		}
1735		/*
1736		 * Mask a list of signals, instead of them being unmasked, if
1737		 * they were unmasked in the parent; note that some signals
1738		 * are not maskable.
1739		 */
1740		if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK)
1741			uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask);
1742		/*
1743		 * Default a list of signals instead of ignoring them, if
1744		 * they were ignored in the parent.
1745		 */
1746		if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) {
1747			vec.sa_handler = SIG_DFL;
1748			vec.sa_tramp = 0;
1749			vec.sa_mask = 0;
1750			vec.sa_flags = 0;
1751			for (sig = 0; sig < NSIG; sig++)
1752				if (px_sa.psa_sigdefault && 1 << sig) {
1753					error = setsigvec(p, sig, &vec);
1754			}
1755		}
1756	}
1757
1758	/*
1759         * XXXAUDIT: Currently, we only audit the pathname of the binary.
1760         * There may also be poor interaction with dyld.
1761         */
1762
1763	task = current_task();
1764
1765	/* If we're not in vfork, don't permit a mutithreaded task to exec */
1766	if (!(uthread->uu_flag & UT_VFORK)) {
1767		if (task != kernel_task) {
1768			numthreads = get_task_numacts(task);
1769			if (numthreads <= 0 ) {
1770				error = EINVAL;
1771				goto bad;
1772			}
1773			if (numthreads > 1) {
1774				error = ENOTSUP;
1775				goto bad;
1776			}
1777		}
1778	}
1779
1780#if MAC_SPAWN	/* XXX */
1781	if (uap->mac_p != USER_ADDR_NULL) {
1782		error = mac_execve_enter(uap->mac_p, imgp);
1783		if (error)
1784			goto bad;
1785	}
1786#endif
1787
1788	if ((error = exec_activate_image(imgp)) != 0)
1789		goto bad;
1790bad:
1791	/* Image not claimed by any activator? */
1792	if (error == -1)
1793		error = ENOEXEC;
1794	if (error == 0) {
1795		exec_resettextvp(p, imgp);
1796	}
1797	if (imgp->ip_vp)
1798		vnode_put(imgp->ip_vp);
1799	if (imgp->ip_strings)
1800		execargs_free(imgp);
1801	if (imgp->ip_px_sfa != NULL)
1802		FREE(imgp->ip_px_sfa, M_TEMP);
1803	if (imgp->ip_px_spa != NULL)
1804		FREE(imgp->ip_px_spa, M_TEMP);
1805
1806#if CONFIG_MACF
1807	if (imgp->ip_execlabelp)
1808		mac_cred_label_free(imgp->ip_execlabelp);
1809	if (imgp->ip_scriptlabelp)
1810		mac_vnode_label_free(imgp->ip_scriptlabelp);
1811#endif
1812	if (undo_vfork) {
1813		if (error) {
1814			DTRACE_PROC1(exec__failure, int, error);
1815			vfork_exit(p, W_EXITCODE(-1, 0));
1816		} else {
1817			DTRACE_PROC(exec__success);
1818		}
1819		/*
1820		 * Returning to the parent process...
1821		 *
1822		 * If the parent wants the pid, copy it out
1823		 */
1824		if (pid != USER_ADDR_NULL)
1825			(void)suword(pid, p->p_pid);
1826		retval[0] = error;
1827		/*
1828		 * Override inherited code signing flags with the
1829		 * ones for the process that is being successfully
1830		 * loaded
1831		 */
1832		proc_lock(p);
1833		p->p_csflags = imgp->ip_csflags;
1834		proc_unlock(p);
1835		vfork_return(p, NULL, error);
1836		(void)thread_resume(imgp->ip_vfork_thread);
1837	}
1838
1839	if (!error) {
1840		/*
1841		 * Override inherited code signing flags with the
1842		 * ones for the process that is being successfully
1843		 * loaded
1844		 */
1845		proc_lock(p);
1846		p->p_csflags = imgp->ip_csflags;
1847		proc_unlock(p);
1848		DTRACE_PROC(exec__success);
1849	} else {
1850		DTRACE_PROC1(exec__failure, int, error);
1851	}
1852
1853	return(error);
1854}
1855
1856
1857/*
1858 * execve
1859 *
1860 * Parameters:	uap->fname		File name to exec
1861 *		uap->argp		Argument list
1862 *		uap->envp		Environment list
1863 *
1864 * Returns:	0			Success
1865 *	__mac_execve:EINVAL		Invalid argument
1866 *	__mac_execve:ENOTSUP		Invalid argument
1867 *	__mac_execve:EACCES		Permission denied
1868 *	__mac_execve:EINTR		Interrupted function
1869 *	__mac_execve:ENOMEM		Not enough space
1870 *	__mac_execve:EFAULT		Bad address
1871 *	__mac_execve:ENAMETOOLONG	Filename too long
1872 *	__mac_execve:ENOEXEC		Executable file format error
1873 *	__mac_execve:ETXTBSY		Text file busy [misuse of error code]
1874 *	__mac_execve:???
1875 *
1876 * TODO:	Dynamic linker header address on stack is copied via suword()
1877 */
1878/* ARGSUSED */
1879int
1880execve(proc_t p, struct execve_args *uap, register_t *retval)
1881{
1882	struct __mac_execve_args muap;
1883	int err;
1884
1885	muap.fname = uap->fname;
1886	muap.argp = uap->argp;
1887	muap.envp = uap->envp;
1888	muap.mac_p = USER_ADDR_NULL;
1889	err = __mac_execve(p, &muap, retval);
1890
1891	return(err);
1892}
1893
1894/*
1895 * __mac_execve
1896 *
1897 * Parameters:	uap->fname		File name to exec
1898 *		uap->argp		Argument list
1899 *		uap->envp		Environment list
1900 *		uap->mac_p		MAC label supplied by caller
1901 *
1902 * Returns:	0			Success
1903 *		EINVAL			Invalid argument
1904 *		ENOTSUP			Not supported
1905 *		ENOEXEC			Executable file format error
1906 *	exec_activate_image:EINVAL	Invalid argument
1907 *	exec_activate_image:EACCES	Permission denied
1908 *	exec_activate_image:EINTR	Interrupted function
1909 *	exec_activate_image:ENOMEM	Not enough space
1910 *	exec_activate_image:EFAULT	Bad address
1911 *	exec_activate_image:ENAMETOOLONG	Filename too long
1912 *	exec_activate_image:ENOEXEC	Executable file format error
1913 *	exec_activate_image:ETXTBSY	Text file busy [misuse of error code]
1914 *	exec_activate_image:EBADEXEC	The executable is corrupt/unknown
1915 *	exec_activate_image:???
1916 *	mac_execve_enter:???
1917 *
1918 * TODO:	Dynamic linker header address on stack is copied via suword()
1919 */
1920int
1921__mac_execve(proc_t p, struct __mac_execve_args *uap, register_t *retval)
1922{
1923	struct image_params image_params, *imgp;
1924	struct vnode_attr va;
1925	struct vnode_attr origva;
1926	struct uthread		*uthread;
1927	int error;
1928	task_t  task;
1929	int numthreads;
1930	char alt_p_comm[sizeof(p->p_comm)] = {0};	/* for PowerPC */
1931	int is_64 = IS_64BIT_PROCESS(p);
1932	struct vfs_context context;
1933
1934	context.vc_thread = current_thread();
1935	context.vc_ucred = kauth_cred_proc_ref(p);	/* XXX must NOT be kauth_cred_get() */
1936
1937	imgp = &image_params;
1938
1939	/* Initialize the common data in the image_params structure */
1940	bzero(imgp, sizeof(*imgp));
1941	imgp->ip_user_fname = uap->fname;
1942	imgp->ip_user_argv = uap->argp;
1943	imgp->ip_user_envv = uap->envp;
1944	imgp->ip_vattr = &va;
1945	imgp->ip_origvattr = &origva;
1946	imgp->ip_vfs_context = &context;
1947	imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
1948	imgp->ip_p_comm = alt_p_comm;		/* for PowerPC */
1949	imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
1950
1951	/*
1952         * XXXAUDIT: Currently, we only audit the pathname of the binary.
1953         * There may also be poor interaction with dyld.
1954         */
1955
1956	task = current_task();
1957	uthread = get_bsdthread_info(current_thread());
1958
1959	/* If we're not in vfork, don't permit a mutithreaded task to exec */
1960	if (!(uthread->uu_flag & UT_VFORK)) {
1961		if (task != kernel_task) {
1962			proc_lock(p);
1963			numthreads = get_task_numactivethreads(task);
1964			if (numthreads <= 0 ) {
1965				proc_unlock(p);
1966				kauth_cred_unref(&context.vc_ucred);
1967				return(EINVAL);
1968			}
1969			if (numthreads > 1) {
1970				proc_unlock(p);
1971				kauth_cred_unref(&context.vc_ucred);
1972				return(ENOTSUP);
1973			}
1974			proc_unlock(p);
1975		}
1976	}
1977
1978#if CONFIG_MACF
1979	if (uap->mac_p != USER_ADDR_NULL) {
1980		error = mac_execve_enter(uap->mac_p, imgp);
1981		if (error) {
1982			kauth_cred_unref(&context.vc_ucred);
1983			return (error);
1984		}
1985	}
1986#endif
1987
1988	error = exec_activate_image(imgp);
1989
1990	kauth_cred_unref(&context.vc_ucred);
1991
1992	/* Image not claimed by any activator? */
1993	if (error == -1)
1994		error = ENOEXEC;
1995
1996	if (error == 0) {
1997		exec_resettextvp(p, imgp);
1998	}
1999	if (imgp->ip_vp != NULLVP)
2000		vnode_put(imgp->ip_vp);
2001	if (imgp->ip_strings)
2002		execargs_free(imgp);
2003#if CONFIG_MACF
2004	if (imgp->ip_execlabelp)
2005		mac_cred_label_free(imgp->ip_execlabelp);
2006	if (imgp->ip_scriptlabelp)
2007		mac_vnode_label_free(imgp->ip_scriptlabelp);
2008#endif
2009	if (!error) {
2010		/*
2011		 * Override inherited code signing flags with the
2012		 * ones for the process that is being successfully
2013		 * loaded
2014		 */
2015		proc_lock(p);
2016		p->p_csflags = imgp->ip_csflags;
2017		proc_unlock(p);
2018		DTRACE_PROC(exec__success);
2019
2020		if (uthread->uu_flag & UT_VFORK) {
2021			vfork_return(p, retval, p->p_pid);
2022			(void)thread_resume(imgp->ip_vfork_thread);
2023		}
2024	} else {
2025		DTRACE_PROC1(exec__failure, int, error);
2026	}
2027
2028	return(error);
2029}
2030
2031
2032/*
2033 * copyinptr
2034 *
2035 * Description:	Copy a pointer in from user space to a user_addr_t in kernel
2036 *		space, based on 32/64 bitness of the user space
2037 *
2038 * Parameters:	froma			User space address
2039 *		toptr			Address of kernel space user_addr_t
2040 *		ptr_size		4/8, based on 'froma' address space
2041 *
2042 * Returns:	0			Success
2043 *		EFAULT			Bad 'froma'
2044 *
2045 * Implicit returns:
2046 *		*ptr_size		Modified
2047 */
2048static int
2049copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
2050{
2051	int error;
2052
2053	if (ptr_size == 4) {
2054		/* 64 bit value containing 32 bit address */
2055		unsigned int i;
2056
2057		error = copyin(froma, &i, 4);
2058		*toptr = CAST_USER_ADDR_T(i);	/* SAFE */
2059	} else {
2060		error = copyin(froma, toptr, 8);
2061	}
2062	return (error);
2063}
2064
2065
2066/*
2067 * copyoutptr
2068 *
2069 * Description:	Copy a pointer out from a user_addr_t in kernel space to
2070 *		user space, based on 32/64 bitness of the user space
2071 *
2072 * Parameters:	ua			User space address to copy to
2073 *		ptr			Address of kernel space user_addr_t
2074 *		ptr_size		4/8, based on 'ua' address space
2075 *
2076 * Returns:	0			Success
2077 *		EFAULT			Bad 'ua'
2078 *
2079 * Implicit returns:
2080 *		*ptr_size		Modified
2081 */
2082static int
2083copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
2084{
2085	int error;
2086
2087	if (ptr_size == 4) {
2088		/* 64 bit value containing 32 bit address */
2089		unsigned int i = CAST_DOWN(unsigned int,ua);	/* SAFE */
2090
2091		error = copyout(&i, ptr, 4);
2092	} else {
2093		error = copyout(&ua, ptr, 8);
2094	}
2095	return (error);
2096}
2097
2098
2099/*
2100 * exec_copyout_strings
2101 *
2102 * Copy out the strings segment to user space.  The strings segment is put
2103 * on a preinitialized stack frame.
2104 *
2105 * Parameters:	struct image_params *	the image parameter block
2106 *		int *			a pointer to the stack offset variable
2107 *
2108 * Returns:	0			Success
2109 *		!0			Faiure: errno
2110 *
2111 * Implicit returns:
2112 *		(*stackp)		The stack offset, modified
2113 *
2114 * Note:	The strings segment layout is backward, from the beginning
2115 *		of the top of the stack to consume the minimal amount of
2116 *		space possible; the returned stack pointer points to the
2117 *		end of the area consumed (stacks grow upward).
2118 *
2119 *		argc is an int; arg[i] are pointers; env[i] are pointers;
2120 *		exec_path is a pointer; the 0's are (void *)NULL's
2121 *
2122 * The stack frame layout is:
2123 *
2124 *	+-------------+
2125 * sp->	|     argc    |
2126 *	+-------------+
2127 *	|    arg[0]   |
2128 *	+-------------+
2129 *	       :
2130 *	       :
2131 *	+-------------+
2132 *	| arg[argc-1] |
2133 *	+-------------+
2134 *	|      0      |
2135 *	+-------------+
2136 *	|    env[0]   |
2137 *	+-------------+
2138 *	       :
2139 *	       :
2140 *	+-------------+
2141 *	|    env[n]   |
2142 *	+-------------+
2143 *	|      0      |
2144 *	+-------------+
2145 *	|  exec_path  |	In MacOS X PR2 Beaker2E the path passed to exec() is
2146 *	+-------------+	passed on the stack just after the trailing 0 of the
2147 *	|      0      | the envp[] array as a pointer to a string.
2148 *	+-------------+
2149 *	|  PATH AREA  |
2150 *	+-------------+
2151 *	| STRING AREA |
2152 *	       :
2153 *	       :
2154 *	|             | <- p->user_stack
2155 *	+-------------+
2156 *
2157 * Although technically a part of the STRING AREA, we treat the PATH AREA as
2158 * a separate entity.  This allows us to align the beginning of the PATH AREA
2159 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
2160 * which preceed it on the stack are properly aligned.
2161 *
2162 * TODO:	argc copied with suword(), which takes a 64 bit address
2163 */
2164static int
2165exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
2166{
2167	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
2168	int	ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
2169	char	*argv = imgp->ip_argv;	/* modifiable copy of argv */
2170	user_addr_t	string_area;	/* *argv[], *env[] */
2171	user_addr_t	path_area;	/* package launch path */
2172	user_addr_t	ptr_area;	/* argv[], env[], exec_path */
2173	user_addr_t	stack;
2174	int	stringc = imgp->ip_argc + imgp->ip_envc;
2175	int len;
2176	int error;
2177	int strspace;
2178
2179	stack = *stackp;
2180
2181	unsigned patharea_len = imgp->ip_argv - imgp->ip_strings;
2182	int envc_add = 0;
2183
2184	/*
2185	 * Set up pointers to the beginning of the string area, the beginning
2186	 * of the path area, and the beginning of the pointer area (actually,
2187	 * the location of argc, an int, which may be smaller than a pointer,
2188	 * but we use ptr_size worth of space for it, for alignment).
2189	 */
2190	string_area = stack - (((imgp->ip_strendp - imgp->ip_strings) + ptr_size-1) & ~(ptr_size-1)) - ptr_size;
2191	path_area = string_area - ((patharea_len + ptr_size-1) & ~(ptr_size-1));
2192	ptr_area = path_area - ((imgp->ip_argc + imgp->ip_envc + 4 + envc_add) * ptr_size) - ptr_size /*argc*/;
2193
2194	/* Return the initial stack address: the location of argc */
2195	*stackp = ptr_area;
2196
2197	/*
2198	 * Record the size of the arguments area so that sysctl_procargs()
2199	 * can return the argument area without having to parse the arguments.
2200	 */
2201	proc_lock(p);
2202	p->p_argc = imgp->ip_argc;
2203	p->p_argslen = (int)(stack - path_area);
2204	proc_unlock(p);
2205
2206
2207	/*
2208	 * Support for new app package launching for Mac OS X allocates
2209	 * the "path" at the begining of the imgp->ip_strings buffer.
2210	 * copy it just before the string area.
2211	 */
2212	len = 0;
2213	error = copyoutstr(imgp->ip_strings, path_area,
2214						   patharea_len,
2215				(size_t *)&len);
2216	if (error)
2217		goto bad;
2218
2219
2220	/* Save a NULL pointer below it */
2221	(void)copyoutptr(0LL, path_area - ptr_size, ptr_size);
2222
2223	/* Save the pointer to "path" just below it */
2224	(void)copyoutptr(path_area, path_area - 2*ptr_size, ptr_size);
2225
2226	/*
2227	 * ptr_size for 2 NULL one each ofter arg[argc -1] and env[n]
2228	 * ptr_size for argc
2229	 * skip over saved path, ptr_size for pointer to path,
2230	 * and ptr_size for the NULL after pointer to path.
2231	 */
2232
2233	/* argc (int32, stored in a ptr_size area) */
2234	(void)suword(ptr_area, imgp->ip_argc);
2235	ptr_area += sizeof(int);
2236	/* pad to ptr_size, if 64 bit image, to ensure user stack alignment */
2237	if (imgp->ip_flags & IMGPF_IS_64BIT) {
2238		(void)suword(ptr_area, 0);	/* int, not long: ignored */
2239		ptr_area += sizeof(int);
2240	}
2241
2242#if CONFIG_DTRACE
2243	p->p_dtrace_argv = ptr_area; /* user_addr_t &argv[0] for dtrace convenience */
2244#endif /* CONFIG_DTRACE */
2245
2246	/*
2247	 * We use (string_area - path_area) here rather than the more
2248	 * intuitive (imgp->ip_argv - imgp->ip_strings) because we are
2249	 * interested in the length of the PATH_AREA in user space,
2250	 * rather than the actual length of the execution path, since
2251	 * it includes alignment padding of the PATH_AREA + STRING_AREA
2252	 * to a ptr_size boundary.
2253	 */
2254	strspace = SIZE_IMG_STRSPACE - (string_area - path_area);
2255	for (;;) {
2256		if (stringc == imgp->ip_envc) {
2257			/* argv[n] = NULL */
2258			(void)copyoutptr(0LL, ptr_area, ptr_size);
2259			ptr_area += ptr_size;
2260#if CONFIG_DTRACE
2261			p->p_dtrace_envp = ptr_area; /* user_addr_t &env[0] for dtrace convenience */
2262#endif /* CONFIG_DTRACE */
2263		}
2264		if (--stringc < 0)
2265			break;
2266
2267		/* pointer: argv[n]/env[n] */
2268		(void)copyoutptr(string_area, ptr_area, ptr_size);
2269
2270		/* string : argv[n][]/env[n][] */
2271		do {
2272			if (strspace <= 0) {
2273				error = E2BIG;
2274				break;
2275			}
2276			error = copyoutstr(argv, string_area,
2277						(unsigned)strspace,
2278						(size_t *)&len);
2279			string_area += len;
2280			argv += len;
2281			strspace -= len;
2282		} while (error == ENAMETOOLONG);
2283		if (error == EFAULT || error == E2BIG)
2284			break;	/* bad stack - user's problem */
2285		ptr_area += ptr_size;
2286	}
2287	/* env[n] = NULL */
2288	(void)copyoutptr(0LL, ptr_area, ptr_size);
2289
2290bad:
2291	return(error);
2292}
2293
2294
2295/*
2296 * exec_extract_strings
2297 *
2298 * Copy arguments and environment from user space into work area; we may
2299 * have already copied some early arguments into the work area, and if
2300 * so, any arguments opied in are appended to those already there.
2301 *
2302 * Parameters:	struct image_params *	the image parameter block
2303 *
2304 * Returns:	0			Success
2305 *		!0			Failure: errno
2306 *
2307 * Implicit returns;
2308 *		(imgp->ip_argc)		Count of arguments, updated
2309 *		(imgp->ip_envc)		Count of environment strings, updated
2310 *
2311 *
2312 * Note:	The argument and environment vectors are user space pointers
2313 *		to arrays of user space pointers.
2314 */
2315static int
2316exec_extract_strings(struct image_params *imgp)
2317{
2318	int error = 0;
2319	int	ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4;
2320	user_addr_t	argv = imgp->ip_user_argv;
2321	user_addr_t	envv = imgp->ip_user_envv;
2322
2323	/*
2324	 * If the argument vector is NULL, this is the system startup
2325	 * bootstrap from load_init_program(), and there's nothing to do
2326	 */
2327	if (imgp->ip_user_argv == 0LL)
2328		goto bad;
2329
2330	/* Now, get rest of arguments */
2331
2332	/*
2333	 * If we are running an interpreter, replace the av[0] that was
2334	 * passed to execve() with the fully qualified path name that was
2335	 * passed to execve() for interpreters which do not use the PATH
2336	 * to locate their script arguments.
2337	 */
2338	if((imgp->ip_flags & IMGPF_INTERPRET) != 0 && argv != 0LL) {
2339		user_addr_t	arg;
2340
2341		error = copyinptr(argv, &arg, ptr_size);
2342		if (error)
2343			goto bad;
2344		if (arg != 0LL && arg != (user_addr_t)-1) {
2345			argv += ptr_size;
2346			error = exec_add_string(imgp, imgp->ip_user_fname);
2347			if (error)
2348				goto bad;
2349			imgp->ip_argc++;
2350		}
2351	}
2352
2353	while (argv != 0LL) {
2354		user_addr_t	arg;
2355
2356		error = copyinptr(argv, &arg, ptr_size);
2357		if (error)
2358			goto bad;
2359
2360		argv += ptr_size;
2361		if (arg == 0LL) {
2362			break;
2363		} else if (arg == (user_addr_t)-1) {
2364			/* Um... why would it be -1? */
2365			error = EFAULT;
2366			goto bad;
2367		}
2368		/*
2369		* av[n...] = arg[n]
2370		*/
2371		error = exec_add_string(imgp, arg);
2372		if (error)
2373			goto bad;
2374		imgp->ip_argc++;
2375	}
2376
2377	/* Now, get the environment */
2378	while (envv != 0LL) {
2379		user_addr_t	env;
2380
2381		error = copyinptr(envv, &env, ptr_size);
2382		if (error)
2383			goto bad;
2384
2385		envv += ptr_size;
2386		if (env == 0LL) {
2387			break;
2388		} else if (env == (user_addr_t)-1) {
2389			error = EFAULT;
2390			goto bad;
2391		}
2392		/*
2393		* av[n...] = env[n]
2394		*/
2395		error = exec_add_string(imgp, env);
2396		if (error)
2397			goto bad;
2398		imgp->ip_envc++;
2399	}
2400bad:
2401	return error;
2402}
2403
2404
2405#define	unix_stack_size(p)	(p->p_rlimit[RLIMIT_STACK].rlim_cur)
2406
2407/*
2408 * exec_check_permissions
2409 *
2410 * Decription:	Verify that the file that is being attempted to be executed
2411 *		is in fact allowed to be executed based on it POSIX file
2412 *		permissions and other access control criteria
2413 *
2414 * Parameters:	struct image_params *	the image parameter block
2415 *
2416 * Returns:	0			Success
2417 *		EACCES			Permission denied
2418 *		ENOEXEC			Executable file format error
2419 *		ETXTBSY			Text file busy [misuse of error code]
2420 *	vnode_getattr:???
2421 *	vnode_authorize:???
2422 */
2423static int
2424exec_check_permissions(struct image_params *imgp)
2425{
2426	struct vnode *vp = imgp->ip_vp;
2427	struct vnode_attr *vap = imgp->ip_vattr;
2428	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
2429	int error;
2430	kauth_action_t action;
2431
2432	/* Only allow execution of regular files */
2433	if (!vnode_isreg(vp))
2434		return (EACCES);
2435
2436	/* Get the file attributes that we will be using here and elsewhere */
2437	VATTR_INIT(vap);
2438	VATTR_WANTED(vap, va_uid);
2439	VATTR_WANTED(vap, va_gid);
2440	VATTR_WANTED(vap, va_mode);
2441	VATTR_WANTED(vap, va_fsid);
2442	VATTR_WANTED(vap, va_fileid);
2443	VATTR_WANTED(vap, va_data_size);
2444	if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0)
2445		return (error);
2446
2447	/*
2448	 * Ensure that at least one execute bit is on - otherwise root
2449	 * will always succeed, and we don't want to happen unless the
2450	 * file really is executable.
2451	 */
2452	if ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)
2453		return (EACCES);
2454
2455	/* Disallow zero length files */
2456	if (vap->va_data_size == 0)
2457		return (ENOEXEC);
2458
2459	imgp->ip_arch_offset = (user_size_t)0;
2460	imgp->ip_arch_size = vap->va_data_size;
2461
2462	/* Disable setuid-ness for traced programs or if MNT_NOSUID */
2463	if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED)) {
2464		vap->va_mode &= ~(VSUID | VSGID);
2465#if CONFIG_MACF
2466		imgp->ip_no_trans = 1;
2467#endif
2468	}
2469
2470#if CONFIG_MACF
2471	error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp);
2472	if (error)
2473		return (error);
2474#endif
2475
2476  	/* Check for execute permission */
2477 	action = KAUTH_VNODE_EXECUTE;
2478  	/* Traced images must also be readable */
2479 	if (p->p_lflag & P_LTRACED)
2480 		action |= KAUTH_VNODE_READ_DATA;
2481 	if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0)
2482		return (error);
2483
2484#if 0
2485	/* Don't let it run if anyone had it open for writing */
2486	vnode_lock(vp);
2487	if (vp->v_writecount) {
2488		panic("going to return ETXTBSY %x", vp);
2489		vnode_unlock(vp);
2490		return (ETXTBSY);
2491	}
2492	vnode_unlock(vp);
2493#endif
2494
2495
2496#ifdef IMGPF_POWERPC
2497	/*
2498	 * If the file we are about to attempt to load is the exec_handler_ppc,
2499	 * which is determined by matching the vattr fields against previously
2500	 * cached values, then we set the PowerPC environment flag.
2501	 */
2502	if (vap->va_fsid == exec_archhandler_ppc.fsid &&
2503		vap->va_fileid == (uint64_t)((u_long)exec_archhandler_ppc.fileid)) {
2504		imgp->ip_flags |= IMGPF_POWERPC;
2505	}
2506#endif	/* IMGPF_POWERPC */
2507
2508	/* XXX May want to indicate to underlying FS that vnode is open */
2509
2510	return (error);
2511}
2512
2513
2514/*
2515 * exec_handle_sugid
2516 *
2517 * Initially clear the P_SUGID in the process flags; if an SUGID process is
2518 * exec'ing a non-SUGID image, then  this is the point of no return.
2519 *
2520 * If the image being activated is SUGID, then replace the credential with a
2521 * copy, disable tracing (unless the tracing process is root), reset the
2522 * mach task port to revoke it, set the P_SUGID bit,
2523 *
2524 * If the saved user and group ID will be changing, then make sure it happens
2525 * to a new credential, rather than a shared one.
2526 *
2527 * Set the security token (this is probably obsolete, given that the token
2528 * should not technically be separate from the credential itself).
2529 *
2530 * Parameters:	struct image_params *	the image parameter block
2531 *
2532 * Returns:	void			No failure indication
2533 *
2534 * Implicit returns:
2535 *		<process credential>	Potentially modified/replaced
2536 *		<task port>		Potentially revoked
2537 *		<process flags>		P_SUGID bit potentially modified
2538 *		<security token>	Potentially modified
2539 */
2540static int
2541exec_handle_sugid(struct image_params *imgp)
2542{
2543	kauth_cred_t		cred = vfs_context_ucred(imgp->ip_vfs_context);
2544	proc_t			p = vfs_context_proc(imgp->ip_vfs_context);
2545	int			i;
2546	int			leave_sugid_clear = 0;
2547	int			error = 0;
2548	struct vnode	*dev_null = NULLVP;
2549#if CONFIG_MACF
2550	int			mac_transition;
2551
2552	/*
2553	 * Determine whether a call to update the MAC label will result in the
2554	 * credential changing.
2555	 *
2556	 * Note:	MAC policies which do not actually end up modifying
2557	 *		the label subsequently are strongly encouraged to
2558	 *		return 0 for this check, since a non-zero answer will
2559	 *		slow down the exec fast path for normal binaries.
2560	 */
2561	mac_transition = mac_cred_check_label_update_execve(
2562							imgp->ip_vfs_context,
2563							imgp->ip_vp,
2564							imgp->ip_scriptlabelp,
2565							imgp->ip_execlabelp, p);
2566#endif
2567
2568	OSBitAndAtomic(~((uint32_t)P_SUGID), (UInt32 *)&p->p_flag);
2569
2570	/*
2571	 * Order of the following is important; group checks must go last,
2572	 * as we use the success of the 'ismember' check combined with the
2573	 * failure of the explicit match to indicate that we will be setting
2574	 * the egid of the process even though the new process did not
2575	 * require VSUID/VSGID bits in order for it to set the new group as
2576	 * its egid.
2577	 *
2578	 * Note:	Technically, by this we are implying a call to
2579	 *		setegid() in the new process, rather than implying
2580	 *		it used its VSGID bit to set the effective group,
2581	 *		even though there is no code in that process to make
2582	 *		such a call.
2583	 */
2584	if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
2585	     kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
2586	    ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
2587		 ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
2588		 (cred->cr_gid != imgp->ip_origvattr->va_gid)))) {
2589
2590#if CONFIG_MACF
2591/* label for MAC transition and neither VSUID nor VSGID */
2592handle_mac_transition:
2593#endif
2594
2595		/*
2596		 * Replace the credential with a copy of itself if euid or
2597		 * egid change.
2598		 *
2599		 * Note:	setuid binaries will automatically opt out of
2600		 *		group resolver participation as a side effect
2601		 *		of this operation.  This is an intentional
2602		 *		part of the security model, which requires a
2603		 *		participating credential be established by
2604		 *		escalating privilege, setting up all other
2605		 *		aspects of the credential including whether
2606		 *		or not to participate in external group
2607		 *		membership resolution, then dropping their
2608		 *		effective privilege to that of the desired
2609		 *		final credential state.
2610		 */
2611		if (imgp->ip_origvattr->va_mode & VSUID) {
2612			p->p_ucred  = kauth_cred_setresuid(p->p_ucred, KAUTH_UID_NONE, imgp->ip_origvattr->va_uid, imgp->ip_origvattr->va_uid, KAUTH_UID_NONE);
2613		}
2614		if (imgp->ip_origvattr->va_mode & VSGID) {
2615			p->p_ucred = kauth_cred_setresgid(p->p_ucred, KAUTH_GID_NONE, imgp->ip_origvattr->va_gid, imgp->ip_origvattr->va_gid);
2616		}
2617
2618#if CONFIG_MACF
2619		/*
2620		 * If a policy has indicated that it will transition the label,
2621		 * before making the call into the MAC policies, get a new
2622		 * duplicate credential, so they can modify it without
2623		 * modifying any others sharing it.
2624		 */
2625		if (mac_transition) {
2626			kauth_cred_t	my_cred;
2627			if (kauth_proc_label_update_execve(p,
2628						imgp->ip_vfs_context,
2629						imgp->ip_vp,
2630						imgp->ip_scriptlabelp,
2631						imgp->ip_execlabelp)) {
2632				/*
2633				 * If updating the MAC label resulted in a
2634				 * disjoint credential, flag that we need to
2635				 * set the P_SUGID bit.  This protects
2636				 * against debuggers being attached by an
2637				 * insufficiently privileged process onto the
2638				 * result of a transition to a more privileged
2639				 * credential.
2640				 */
2641				leave_sugid_clear = 0;
2642			}
2643
2644			my_cred = kauth_cred_proc_ref(p);
2645			mac_task_label_update_cred(my_cred, p->task);
2646			kauth_cred_unref(&my_cred);
2647		}
2648#endif	/* CONFIG_MACF */
2649
2650		/*
2651		 * Have mach reset the task and thread ports.
2652		 * We don't want anyone who had the ports before
2653		 * a setuid exec to be able to access/control the
2654		 * task/thread after.
2655		 */
2656		if (current_task() == p->task) {
2657			ipc_task_reset(p->task);
2658			ipc_thread_reset(current_thread());
2659		}
2660
2661		/*
2662		 * If 'leave_sugid_clear' is non-zero, then we passed the
2663		 * VSUID and MACF checks, and successfully determined that
2664		 * the previous cred was a member of the VSGID group, but
2665		 * that it was not the default at the time of the execve,
2666		 * and that the post-labelling credential was not disjoint.
2667		 * So we don't set the P_SUGID on the basis of simply
2668		 * running this code.
2669		 */
2670		if (!leave_sugid_clear)
2671			OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag);
2672
2673		/* Cache the vnode for /dev/null the first time around */
2674		if (dev_null == NULLVP) {
2675			struct nameidata nd1;
2676
2677			NDINIT(&nd1, LOOKUP, FOLLOW, UIO_SYSSPACE32,
2678			    CAST_USER_ADDR_T("/dev/null"),
2679			    imgp->ip_vfs_context);
2680
2681			if ((error = vn_open(&nd1, FREAD, 0)) == 0) {
2682				dev_null = nd1.ni_vp;
2683				/*
2684				 * vn_open returns with both a use_count
2685				 * and an io_count on the found vnode
2686				 * drop the io_count, but keep the use_count
2687				 */
2688				vnode_put(nd1.ni_vp);
2689			}
2690		}
2691
2692		/* Radar 2261856; setuid security hole fix */
2693		/* Patch from OpenBSD: A. Ramesh */
2694		/*
2695		 * XXX For setuid processes, attempt to ensure that
2696		 * stdin, stdout, and stderr are already allocated.
2697		 * We do not want userland to accidentally allocate
2698		 * descriptors in this range which has implied meaning
2699		 * to libc.
2700		 */
2701		if (dev_null != NULLVP) {
2702			for (i = 0; i < 3; i++) {
2703				struct fileproc *fp;
2704				int indx;
2705
2706				if (p->p_fd->fd_ofiles[i] != NULL)
2707					continue;
2708
2709				if ((error = falloc(p, &fp, &indx, imgp->ip_vfs_context)) != 0)
2710					continue;
2711
2712				if ((error = vnode_ref_ext(dev_null, FREAD)) != 0) {
2713					fp_free(p, indx, fp);
2714					break;
2715				}
2716
2717				fp->f_fglob->fg_flag = FREAD;
2718				fp->f_fglob->fg_type = DTYPE_VNODE;
2719				fp->f_fglob->fg_ops = &vnops;
2720				fp->f_fglob->fg_data = (caddr_t)dev_null;
2721
2722				proc_fdlock(p);
2723				procfdtbl_releasefd(p, indx, NULL);
2724				fp_drop(p, indx, fp, 1);
2725				proc_fdunlock(p);
2726			}
2727			/*
2728			 * for now we need to drop the reference immediately
2729			 * since we don't have any mechanism in place to
2730			 * release it before starting to unmount "/dev"
2731			 * during a reboot/shutdown
2732			 */
2733			vnode_rele(dev_null);
2734			dev_null = NULLVP;
2735		}
2736	}
2737#if CONFIG_MACF
2738	else {
2739		/*
2740		 * We are here because we were told that the MAC label will
2741		 * be transitioned, and the binary is not VSUID or VSGID; to
2742		 * deal with this case, we could either duplicate a lot of
2743		 * code, or we can indicate we want to default the P_SUGID
2744		 * bit clear and jump back up.
2745		 */
2746		if (mac_transition) {
2747			leave_sugid_clear = 1;
2748			goto handle_mac_transition;
2749		}
2750	}
2751#endif	/* CONFIG_MACF */
2752
2753	/*
2754	 * Implement the semantic where the effective user and group become
2755	 * the saved user and group in exec'ed programs.
2756	 */
2757	p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred),  p->p_ucred->cr_gid);
2758
2759	/* Update the process' identity version and set the security token */
2760	p->p_idversion++;
2761	set_security_token(p);
2762
2763	return(error);
2764}
2765
2766
2767/*
2768 * create_unix_stack
2769 *
2770 * Description:	Set the user stack address for the process to the provided
2771 *		address.  If a custom stack was not set as a result of the
2772 *		load process (i.e. as specified by the image file for the
2773 *		executable), then allocate the stack in the provided map and
2774 *		set up appropriate guard pages for enforcing administrative
2775 *		limits on stack growth, if they end up being needed.
2776 *
2777 * Parameters:	p			Process to set stack on
2778 *		user_stack		Address to set stack for process to
2779 *		customstack		FALSE if no custom stack in binary
2780 *		map			Address map in which to allocate the
2781 *					new stack, if 'customstack' is FALSE
2782 *
2783 * Returns:	KERN_SUCCESS		Stack successfully created
2784 *		!KERN_SUCCESS		Mach failure code
2785 */
2786static kern_return_t
2787create_unix_stack(vm_map_t map, user_addr_t user_stack, int customstack,
2788			proc_t p)
2789{
2790	mach_vm_size_t		size, prot_size;
2791	mach_vm_offset_t	addr, prot_addr;
2792	kern_return_t		kr;
2793
2794	proc_lock(p);
2795	p->user_stack = user_stack;
2796	proc_unlock(p);
2797
2798	if (!customstack) {
2799		/*
2800		 * Allocate enough space for the maximum stack size we
2801		 * will ever authorize and an extra page to act as
2802		 * a guard page for stack overflows.
2803		 */
2804		size = mach_vm_round_page(MAXSSIZ);
2805#if STACK_GROWTH_UP
2806		addr = mach_vm_trunc_page(user_stack);
2807#else	/* STACK_GROWTH_UP */
2808		addr = mach_vm_trunc_page(user_stack - size);
2809#endif	/* STACK_GROWTH_UP */
2810		kr = mach_vm_allocate(map, &addr, size,
2811					VM_MAKE_TAG(VM_MEMORY_STACK) |
2812				      VM_FLAGS_FIXED);
2813		if (kr != KERN_SUCCESS) {
2814			return kr;
2815		}
2816		/*
2817		 * And prevent access to what's above the current stack
2818		 * size limit for this process.
2819		 */
2820		prot_addr = addr;
2821#if STACK_GROWTH_UP
2822		prot_addr += unix_stack_size(p);
2823#endif /* STACK_GROWTH_UP */
2824		prot_addr = mach_vm_round_page(prot_addr);
2825		prot_size = mach_vm_trunc_page(size - unix_stack_size(p));
2826		kr = mach_vm_protect(map,
2827				     prot_addr,
2828				     prot_size,
2829				     FALSE,
2830				     VM_PROT_NONE);
2831		if (kr != KERN_SUCCESS) {
2832			(void) mach_vm_deallocate(map, addr, size);
2833			return kr;
2834		}
2835	}
2836	return KERN_SUCCESS;
2837}
2838
2839#include <sys/reboot.h>
2840
2841static char		init_program_name[128] = "/sbin/launchd";
2842
2843struct execve_args	init_exec_args;
2844
2845/*
2846 * load_init_program
2847 *
2848 * Description:	Load the "init" program; in most cases, this will be "launchd"
2849 *
2850 * Parameters:	p			Process to call execve() to create
2851 *					the "init" program
2852 *
2853 * Returns:	(void)
2854 *
2855 * Notes:	The process that is passed in is the first manufactured
2856 *		process on the system, and gets here via bsd_ast() firing
2857 *		for the first time.  This is done to ensure that bsd_init()
2858 *		has run to completion.
2859 */
2860void
2861load_init_program(proc_t p)
2862{
2863	vm_offset_t	init_addr;
2864	int		argc = 0;
2865	char		*argv[3];
2866	int			error;
2867	register_t 	retval[2];
2868
2869	/*
2870	 * Copy out program name.
2871	 */
2872
2873	init_addr = VM_MIN_ADDRESS;
2874	(void) vm_allocate(current_map(), &init_addr, PAGE_SIZE,
2875				VM_FLAGS_ANYWHERE);
2876	if (init_addr == 0)
2877		init_addr++;
2878
2879	(void) copyout((caddr_t) init_program_name, CAST_USER_ADDR_T(init_addr),
2880			(unsigned) sizeof(init_program_name)+1);
2881
2882	argv[argc++] = (char *) init_addr;
2883	init_addr += sizeof(init_program_name);
2884	init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
2885
2886	/*
2887	 * Put out first (and only) argument, similarly.
2888	 * Assumes everything fits in a page as allocated
2889	 * above.
2890	 */
2891	if (boothowto & RB_SINGLE) {
2892		const char *init_args = "-s";
2893
2894		copyout(init_args, CAST_USER_ADDR_T(init_addr),
2895			strlen(init_args));
2896
2897		argv[argc++] = (char *)init_addr;
2898		init_addr += strlen(init_args);
2899		init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
2900
2901	}
2902
2903	/*
2904	 * Null-end the argument list
2905	 */
2906	argv[argc] = NULL;
2907
2908	/*
2909	 * Copy out the argument list.
2910	 */
2911
2912	(void) copyout((caddr_t) argv, CAST_USER_ADDR_T(init_addr),
2913			(unsigned) sizeof(argv));
2914
2915	/*
2916	 * Set up argument block for fake call to execve.
2917	 */
2918
2919	init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
2920	init_exec_args.argp = CAST_USER_ADDR_T((char **)init_addr);
2921	init_exec_args.envp = CAST_USER_ADDR_T(0);
2922
2923	/*
2924	 * So that mach_init task is set with uid,gid 0 token
2925	 */
2926	set_security_token(p);
2927
2928	error = execve(p,&init_exec_args,retval);
2929	if (error)
2930		panic("Process 1 exec of %s failed, errno %d\n",
2931		      init_program_name, error);
2932}
2933
2934/*
2935 * load_return_to_errno
2936 *
2937 * Description:	Convert a load_return_t (Mach error) to an errno (BSD error)
2938 *
2939 * Parameters:	lrtn			Mach error number
2940 *
2941 * Returns:	(int)			BSD error number
2942 *		0			Success
2943 *		EBADARCH		Bad architecture
2944 *		EBADMACHO		Bad Mach object file
2945 *		ESHLIBVERS		Bad shared library version
2946 *		ENOMEM			Out of memory/resource shortage
2947 *		EACCES			Access denied
2948 *		ENOENT			Entry not found (usually "file does
2949 *					does not exist")
2950 *		EIO			An I/O error occurred
2951 *		EBADEXEC		The executable is corrupt/unknown
2952 */
2953static int
2954load_return_to_errno(load_return_t lrtn)
2955{
2956	switch (lrtn) {
2957	case LOAD_SUCCESS:
2958		return 0;
2959	case LOAD_BADARCH:
2960		return EBADARCH;
2961	case LOAD_BADMACHO:
2962		return EBADMACHO;
2963	case LOAD_SHLIB:
2964		return ESHLIBVERS;
2965	case LOAD_NOSPACE:
2966	case LOAD_RESOURCE:
2967		return ENOMEM;
2968	case LOAD_PROTECT:
2969		return EACCES;
2970	case LOAD_ENOENT:
2971		return ENOENT;
2972	case LOAD_IOERROR:
2973		return EIO;
2974	case LOAD_FAILURE:
2975	default:
2976		return EBADEXEC;
2977	}
2978}
2979
2980#include <mach/mach_types.h>
2981#include <mach/vm_prot.h>
2982#include <mach/semaphore.h>
2983#include <mach/sync_policy.h>
2984#include <kern/clock.h>
2985#include <mach/kern_return.h>
2986
2987extern semaphore_t execve_semaphore;
2988
2989/*
2990 * execargs_alloc
2991 *
2992 * Description:	Allocate the block of memory used by the execve arguments.
2993 *		At the same time, we allocate a page so that we can read in
2994 *		the first page of the image.
2995 *
2996 * Parameters:	struct image_params *	the image parameter block
2997 *
2998 * Returns:	0			Success
2999 *		EINVAL			Invalid argument
3000 *		EACCES			Permission denied
3001 *		EINTR			Interrupted function
3002 *		ENOMEM			Not enough space
3003 *
3004 * Notes:	This is a temporary allocation into the kernel address space
3005 *		to enable us to copy arguments in from user space.  This is
3006 *		necessitated by not mapping the process calling execve() into
3007 *		the kernel address space during the execve() system call.
3008 *
3009 *		We assemble the argument and environment, etc., into this
3010 *		region before copying it as a single block into the child
3011 *		process address space (at the top or bottom of the stack,
3012 *		depending on which way the stack grows; see the function
3013 *		exec_copyout_strings() for details).
3014 *
3015 *		This ends up with a second (possibly unnecessary) copy compared
3016 *		with assembing the data directly into the child address space,
3017 *		instead, but since we cannot be guaranteed that the parent has
3018 *		not modified its environment, we can't really know that it's
3019 *		really a block there as well.
3020 */
3021static int
3022execargs_alloc(struct image_params *imgp)
3023{
3024	kern_return_t kret;
3025
3026	kret = semaphore_wait(execve_semaphore);
3027	if (kret != KERN_SUCCESS)
3028		switch (kret) {
3029		default:
3030			return (EINVAL);
3031		case KERN_INVALID_ADDRESS:
3032		case KERN_PROTECTION_FAILURE:
3033			return (EACCES);
3034		case KERN_ABORTED:
3035		case KERN_OPERATION_TIMED_OUT:
3036			return (EINTR);
3037		}
3038
3039	kret = kmem_alloc_pageable(bsd_pageable_map, (vm_offset_t *)&imgp->ip_strings, NCARGS + PAGE_SIZE);
3040	imgp->ip_vdata = imgp->ip_strings + NCARGS;
3041	if (kret != KERN_SUCCESS) {
3042	        semaphore_signal(execve_semaphore);
3043		return (ENOMEM);
3044	}
3045	return (0);
3046}
3047
3048/*
3049 * execargs_free
3050 *
3051 * Description:	Free the block of memory used by the execve arguments and the
3052 *		first page of the executable by a previous call to the function
3053 *		execargs_alloc().
3054 *
3055 * Parameters:	struct image_params *	the image parameter block
3056 *
3057 * Returns:	0			Success
3058 *		EINVAL			Invalid argument
3059 *		EINTR			Oeration interrupted
3060 */
3061static int
3062execargs_free(struct image_params *imgp)
3063{
3064	kern_return_t kret;
3065
3066	kmem_free(bsd_pageable_map, (vm_offset_t)imgp->ip_strings, NCARGS + PAGE_SIZE);
3067	imgp->ip_strings = NULL;
3068
3069	kret = semaphore_signal(execve_semaphore);
3070	switch (kret) {
3071	case KERN_INVALID_ADDRESS:
3072	case KERN_PROTECTION_FAILURE:
3073		return (EINVAL);
3074	case KERN_ABORTED:
3075	case KERN_OPERATION_TIMED_OUT:
3076		return (EINTR);
3077	case KERN_SUCCESS:
3078		return(0);
3079	default:
3080		return (EINVAL);
3081	}
3082}
3083
3084static void
3085exec_resettextvp(proc_t p, struct image_params *imgp)
3086{
3087	vnode_t vp;
3088	off_t offset;
3089	vnode_t tvp  = p->p_textvp;
3090	int ret;
3091
3092	vp = imgp->ip_vp;
3093	offset = imgp->ip_arch_offset;
3094
3095	if (vp == NULLVP)
3096		panic("exec_resettextvp: expected valid vp");
3097
3098	ret = vnode_ref(vp);
3099	proc_lock(p);
3100	if (ret == 0) {
3101		p->p_textvp = vp;
3102		p->p_textoff = offset;
3103	} else {
3104		p->p_textvp = NULLVP;	/* this is paranoia */
3105		p->p_textoff = 0;
3106	}
3107	proc_unlock(p);
3108
3109	if ( tvp != NULLVP) {
3110		if (vnode_getwithref(tvp) == 0) {
3111			vnode_rele(tvp);
3112			vnode_put(tvp);
3113		}
3114	}
3115
3116}
3117
3118