kern_sharedpage.c revision 197031
1/*-
2 * Copyright (c) 1993, David Greenman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/kern/kern_exec.c 197031 2009-09-09 10:52:36Z kib $");
29
30#include "opt_hwpmc_hooks.h"
31#include "opt_kdtrace.h"
32#include "opt_ktrace.h"
33#include "opt_vm.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/eventhandler.h>
38#include <sys/lock.h>
39#include <sys/mutex.h>
40#include <sys/sysproto.h>
41#include <sys/signalvar.h>
42#include <sys/kernel.h>
43#include <sys/mount.h>
44#include <sys/filedesc.h>
45#include <sys/fcntl.h>
46#include <sys/acct.h>
47#include <sys/exec.h>
48#include <sys/imgact.h>
49#include <sys/imgact_elf.h>
50#include <sys/wait.h>
51#include <sys/malloc.h>
52#include <sys/priv.h>
53#include <sys/proc.h>
54#include <sys/pioctl.h>
55#include <sys/namei.h>
56#include <sys/resourcevar.h>
57#include <sys/sdt.h>
58#include <sys/sf_buf.h>
59#include <sys/syscallsubr.h>
60#include <sys/sysent.h>
61#include <sys/shm.h>
62#include <sys/sysctl.h>
63#include <sys/vnode.h>
64#include <sys/stat.h>
65#ifdef KTRACE
66#include <sys/ktrace.h>
67#endif
68
69#include <vm/vm.h>
70#include <vm/vm_param.h>
71#include <vm/pmap.h>
72#include <vm/vm_page.h>
73#include <vm/vm_map.h>
74#include <vm/vm_kern.h>
75#include <vm/vm_extern.h>
76#include <vm/vm_object.h>
77#include <vm/vm_pager.h>
78
79#ifdef	HWPMC_HOOKS
80#include <sys/pmckern.h>
81#endif
82
83#include <machine/reg.h>
84
85#include <security/audit/audit.h>
86#include <security/mac/mac_framework.h>
87
88#ifdef KDTRACE_HOOKS
89#include <sys/dtrace_bsd.h>
90dtrace_execexit_func_t	dtrace_fasttrap_exec;
91#endif
92
93SDT_PROVIDER_DECLARE(proc);
94SDT_PROBE_DEFINE(proc, kernel, , exec);
95SDT_PROBE_ARGTYPE(proc, kernel, , exec, 0, "char *");
96SDT_PROBE_DEFINE(proc, kernel, , exec_failure);
97SDT_PROBE_ARGTYPE(proc, kernel, , exec_failure, 0, "int");
98SDT_PROBE_DEFINE(proc, kernel, , exec_success);
99SDT_PROBE_ARGTYPE(proc, kernel, , exec_success, 0, "char *");
100
101MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
102
103static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
104static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
105static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);
106static int do_execve(struct thread *td, struct image_args *args,
107    struct mac *mac_p);
108static void exec_free_args(struct image_args *);
109
110/* XXX This should be vm_size_t. */
111SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD,
112    NULL, 0, sysctl_kern_ps_strings, "LU", "");
113
114/* XXX This should be vm_size_t. */
115SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD,
116    NULL, 0, sysctl_kern_usrstack, "LU", "");
117
118SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD,
119    NULL, 0, sysctl_kern_stackprot, "I", "");
120
121u_long ps_arg_cache_limit = PAGE_SIZE / 16;
122SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW,
123    &ps_arg_cache_limit, 0, "");
124
125static int
126sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
127{
128	struct proc *p;
129	int error;
130
131	p = curproc;
132#ifdef SCTL_MASK32
133	if (req->flags & SCTL_MASK32) {
134		unsigned int val;
135		val = (unsigned int)p->p_sysent->sv_psstrings;
136		error = SYSCTL_OUT(req, &val, sizeof(val));
137	} else
138#endif
139		error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings,
140		   sizeof(p->p_sysent->sv_psstrings));
141	return error;
142}
143
144static int
145sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
146{
147	struct proc *p;
148	int error;
149
150	p = curproc;
151#ifdef SCTL_MASK32
152	if (req->flags & SCTL_MASK32) {
153		unsigned int val;
154		val = (unsigned int)p->p_sysent->sv_usrstack;
155		error = SYSCTL_OUT(req, &val, sizeof(val));
156	} else
157#endif
158		error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
159		    sizeof(p->p_sysent->sv_usrstack));
160	return error;
161}
162
163static int
164sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS)
165{
166	struct proc *p;
167
168	p = curproc;
169	return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot,
170	    sizeof(p->p_sysent->sv_stackprot)));
171}
172
173/*
174 * Each of the items is a pointer to a `const struct execsw', hence the
175 * double pointer here.
176 */
177static const struct execsw **execsw;
178
179#ifndef _SYS_SYSPROTO_H_
180struct execve_args {
181	char    *fname;
182	char    **argv;
183	char    **envv;
184};
185#endif
186
187int
188execve(td, uap)
189	struct thread *td;
190	struct execve_args /* {
191		char *fname;
192		char **argv;
193		char **envv;
194	} */ *uap;
195{
196	int error;
197	struct image_args args;
198
199	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
200	    uap->argv, uap->envv);
201	if (error == 0)
202		error = kern_execve(td, &args, NULL);
203	return (error);
204}
205
206#ifndef _SYS_SYSPROTO_H_
207struct fexecve_args {
208	int	fd;
209	char	**argv;
210	char	**envv;
211}
212#endif
213int
214fexecve(struct thread *td, struct fexecve_args *uap)
215{
216	int error;
217	struct image_args args;
218
219	error = exec_copyin_args(&args, NULL, UIO_SYSSPACE,
220	    uap->argv, uap->envv);
221	if (error == 0) {
222		args.fd = uap->fd;
223		error = kern_execve(td, &args, NULL);
224	}
225	return (error);
226}
227
228#ifndef _SYS_SYSPROTO_H_
229struct __mac_execve_args {
230	char	*fname;
231	char	**argv;
232	char	**envv;
233	struct mac	*mac_p;
234};
235#endif
236
237int
238__mac_execve(td, uap)
239	struct thread *td;
240	struct __mac_execve_args /* {
241		char *fname;
242		char **argv;
243		char **envv;
244		struct mac *mac_p;
245	} */ *uap;
246{
247#ifdef MAC
248	int error;
249	struct image_args args;
250
251	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
252	    uap->argv, uap->envv);
253	if (error == 0)
254		error = kern_execve(td, &args, uap->mac_p);
255	return (error);
256#else
257	return (ENOSYS);
258#endif
259}
260
261/*
262 * XXX: kern_execve has the astonishing property of not always returning to
263 * the caller.  If sufficiently bad things happen during the call to
264 * do_execve(), it can end up calling exit1(); as a result, callers must
265 * avoid doing anything which they might need to undo (e.g., allocating
266 * memory).
267 */
268int
269kern_execve(td, args, mac_p)
270	struct thread *td;
271	struct image_args *args;
272	struct mac *mac_p;
273{
274	struct proc *p = td->td_proc;
275	int error;
276
277	AUDIT_ARG_ARGV(args->begin_argv, args->argc,
278	    args->begin_envv - args->begin_argv);
279	AUDIT_ARG_ENVV(args->begin_envv, args->envc,
280	    args->endp - args->begin_envv);
281	if (p->p_flag & P_HADTHREADS) {
282		PROC_LOCK(p);
283		if (thread_single(SINGLE_BOUNDARY)) {
284			PROC_UNLOCK(p);
285	       		exec_free_args(args);
286			return (ERESTART);	/* Try again later. */
287		}
288		PROC_UNLOCK(p);
289	}
290
291	error = do_execve(td, args, mac_p);
292
293	if (p->p_flag & P_HADTHREADS) {
294		PROC_LOCK(p);
295		/*
296		 * If success, we upgrade to SINGLE_EXIT state to
297		 * force other threads to suicide.
298		 */
299		if (error == 0)
300			thread_single(SINGLE_EXIT);
301		else
302			thread_single_end();
303		PROC_UNLOCK(p);
304	}
305
306	return (error);
307}
308
309/*
310 * In-kernel implementation of execve().  All arguments are assumed to be
311 * userspace pointers from the passed thread.
312 */
313static int
314do_execve(td, args, mac_p)
315	struct thread *td;
316	struct image_args *args;
317	struct mac *mac_p;
318{
319	struct proc *p = td->td_proc;
320	struct nameidata nd;
321	struct ucred *newcred = NULL, *oldcred;
322	struct uidinfo *euip;
323	register_t *stack_base;
324	int error, len = 0, i;
325	struct image_params image_params, *imgp;
326	struct vattr attr;
327	int (*img_first)(struct image_params *);
328	struct pargs *oldargs = NULL, *newargs = NULL;
329	struct sigacts *oldsigacts, *newsigacts;
330#ifdef KTRACE
331	struct vnode *tracevp = NULL;
332	struct ucred *tracecred = NULL;
333#endif
334	struct vnode *textvp = NULL, *binvp = NULL;
335	int credential_changing;
336	int vfslocked;
337	int textset;
338#ifdef MAC
339	struct label *interpvplabel = NULL;
340	int will_transition;
341#endif
342#ifdef HWPMC_HOOKS
343	struct pmckern_procexec pe;
344#endif
345	static const char fexecv_proc_title[] = "(fexecv)";
346
347	vfslocked = 0;
348	imgp = &image_params;
349
350	/*
351	 * Lock the process and set the P_INEXEC flag to indicate that
352	 * it should be left alone until we're done here.  This is
353	 * necessary to avoid race conditions - e.g. in ptrace() -
354	 * that might allow a local user to illicitly obtain elevated
355	 * privileges.
356	 */
357	PROC_LOCK(p);
358	KASSERT((p->p_flag & P_INEXEC) == 0,
359	    ("%s(): process already has P_INEXEC flag", __func__));
360	p->p_flag |= P_INEXEC;
361	PROC_UNLOCK(p);
362
363	/*
364	 * Initialize part of the common data
365	 */
366	imgp->proc = p;
367	imgp->execlabel = NULL;
368	imgp->attr = &attr;
369	imgp->entry_addr = 0;
370	imgp->vmspace_destroyed = 0;
371	imgp->interpreted = 0;
372	imgp->opened = 0;
373	imgp->interpreter_name = args->buf + PATH_MAX + ARG_MAX;
374	imgp->auxargs = NULL;
375	imgp->vp = NULL;
376	imgp->object = NULL;
377	imgp->firstpage = NULL;
378	imgp->ps_strings = 0;
379	imgp->auxarg_size = 0;
380	imgp->args = args;
381	imgp->execpath = imgp->freepath = NULL;
382	imgp->execpathp = 0;
383
384#ifdef MAC
385	error = mac_execve_enter(imgp, mac_p);
386	if (error)
387		goto exec_fail;
388#endif
389
390	imgp->image_header = NULL;
391
392	/*
393	 * Translate the file name. namei() returns a vnode pointer
394	 *	in ni_vp amoung other things.
395	 *
396	 * XXXAUDIT: It would be desirable to also audit the name of the
397	 * interpreter if this is an interpreted binary.
398	 */
399	if (args->fname != NULL) {
400		NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME
401		    | MPSAFE | AUDITVNODE1, UIO_SYSSPACE, args->fname, td);
402	}
403
404	SDT_PROBE(proc, kernel, , exec, args->fname, 0, 0, 0, 0 );
405
406interpret:
407	if (args->fname != NULL) {
408		error = namei(&nd);
409		if (error)
410			goto exec_fail;
411
412		vfslocked = NDHASGIANT(&nd);
413		binvp  = nd.ni_vp;
414		imgp->vp = binvp;
415	} else {
416		AUDIT_ARG_FD(args->fd);
417		error = fgetvp(td, args->fd, &binvp);
418		if (error)
419			goto exec_fail;
420		vfslocked = VFS_LOCK_GIANT(binvp->v_mount);
421		vn_lock(binvp, LK_EXCLUSIVE | LK_RETRY);
422		AUDIT_ARG_VNODE1(binvp);
423		imgp->vp = binvp;
424	}
425
426	/*
427	 * Check file permissions (also 'opens' file)
428	 */
429	error = exec_check_permissions(imgp);
430	if (error)
431		goto exec_fail_dealloc;
432
433	imgp->object = imgp->vp->v_object;
434	if (imgp->object != NULL)
435		vm_object_reference(imgp->object);
436
437	/*
438	 * Set VV_TEXT now so no one can write to the executable while we're
439	 * activating it.
440	 *
441	 * Remember if this was set before and unset it in case this is not
442	 * actually an executable image.
443	 */
444	textset = imgp->vp->v_vflag & VV_TEXT;
445	imgp->vp->v_vflag |= VV_TEXT;
446
447	error = exec_map_first_page(imgp);
448	if (error)
449		goto exec_fail_dealloc;
450
451	imgp->proc->p_osrel = 0;
452	/*
453	 *	If the current process has a special image activator it
454	 *	wants to try first, call it.   For example, emulating shell
455	 *	scripts differently.
456	 */
457	error = -1;
458	if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
459		error = img_first(imgp);
460
461	/*
462	 *	Loop through the list of image activators, calling each one.
463	 *	An activator returns -1 if there is no match, 0 on success,
464	 *	and an error otherwise.
465	 */
466	for (i = 0; error == -1 && execsw[i]; ++i) {
467		if (execsw[i]->ex_imgact == NULL ||
468		    execsw[i]->ex_imgact == img_first) {
469			continue;
470		}
471		error = (*execsw[i]->ex_imgact)(imgp);
472	}
473
474	if (error) {
475		if (error == -1) {
476			if (textset == 0)
477				imgp->vp->v_vflag &= ~VV_TEXT;
478			error = ENOEXEC;
479		}
480		goto exec_fail_dealloc;
481	}
482
483	/*
484	 * Special interpreter operation, cleanup and loop up to try to
485	 * activate the interpreter.
486	 */
487	if (imgp->interpreted) {
488		exec_unmap_first_page(imgp);
489		/*
490		 * VV_TEXT needs to be unset for scripts.  There is a short
491		 * period before we determine that something is a script where
492		 * VV_TEXT will be set. The vnode lock is held over this
493		 * entire period so nothing should illegitimately be blocked.
494		 */
495		imgp->vp->v_vflag &= ~VV_TEXT;
496		/* free name buffer and old vnode */
497		if (args->fname != NULL)
498			NDFREE(&nd, NDF_ONLY_PNBUF);
499#ifdef MAC
500		mac_execve_interpreter_enter(binvp, &interpvplabel);
501#endif
502		if (imgp->opened) {
503			VOP_CLOSE(binvp, FREAD, td->td_ucred, td);
504			imgp->opened = 0;
505		}
506		vput(binvp);
507		vm_object_deallocate(imgp->object);
508		imgp->object = NULL;
509		VFS_UNLOCK_GIANT(vfslocked);
510		vfslocked = 0;
511		/* set new name to that of the interpreter */
512		NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME | MPSAFE,
513		    UIO_SYSSPACE, imgp->interpreter_name, td);
514		args->fname = imgp->interpreter_name;
515		goto interpret;
516	}
517
518	/*
519	 * NB: We unlock the vnode here because it is believed that none
520	 * of the sv_copyout_strings/sv_fixup operations require the vnode.
521	 */
522	VOP_UNLOCK(imgp->vp, 0);
523
524	/*
525	 * Do the best to calculate the full path to the image file.
526	 */
527	if (imgp->auxargs != NULL &&
528	    ((args->fname != NULL && args->fname[0] == '/') ||
529	     vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0))
530		imgp->execpath = args->fname;
531
532	/*
533	 * Copy out strings (args and env) and initialize stack base
534	 */
535	if (p->p_sysent->sv_copyout_strings)
536		stack_base = (*p->p_sysent->sv_copyout_strings)(imgp);
537	else
538		stack_base = exec_copyout_strings(imgp);
539
540	/*
541	 * If custom stack fixup routine present for this process
542	 * let it do the stack setup.
543	 * Else stuff argument count as first item on stack
544	 */
545	if (p->p_sysent->sv_fixup != NULL)
546		(*p->p_sysent->sv_fixup)(&stack_base, imgp);
547	else
548		suword(--stack_base, imgp->args->argc);
549
550	/*
551	 * For security and other reasons, the file descriptor table cannot
552	 * be shared after an exec.
553	 */
554	fdunshare(p, td);
555
556	/*
557	 * Malloc things before we need locks.
558	 */
559	newcred = crget();
560	euip = uifind(attr.va_uid);
561	i = imgp->args->begin_envv - imgp->args->begin_argv;
562	/* Cache arguments if they fit inside our allowance */
563	if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
564		newargs = pargs_alloc(i);
565		bcopy(imgp->args->begin_argv, newargs->ar_args, i);
566	}
567
568	/* close files on exec */
569	fdcloseexec(td);
570	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
571
572	/* Get a reference to the vnode prior to locking the proc */
573	VREF(binvp);
574
575	/*
576	 * For security and other reasons, signal handlers cannot
577	 * be shared after an exec. The new process gets a copy of the old
578	 * handlers. In execsigs(), the new process will have its signals
579	 * reset.
580	 */
581	PROC_LOCK(p);
582	oldcred = crcopysafe(p, newcred);
583	if (sigacts_shared(p->p_sigacts)) {
584		oldsigacts = p->p_sigacts;
585		PROC_UNLOCK(p);
586		newsigacts = sigacts_alloc();
587		sigacts_copy(newsigacts, oldsigacts);
588		PROC_LOCK(p);
589		p->p_sigacts = newsigacts;
590	} else
591		oldsigacts = NULL;
592
593	/* Stop profiling */
594	stopprofclock(p);
595
596	/* reset caught signals */
597	execsigs(p);
598
599	/* name this process - nameiexec(p, ndp) */
600	if (args->fname) {
601		len = min(nd.ni_cnd.cn_namelen,MAXCOMLEN);
602		bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, len);
603	} else {
604		if (vn_commname(binvp, p->p_comm, MAXCOMLEN + 1) == 0)
605			len = MAXCOMLEN;
606		else {
607			len = sizeof(fexecv_proc_title);
608			bcopy(fexecv_proc_title, p->p_comm, len);
609		}
610	}
611	p->p_comm[len] = 0;
612	bcopy(p->p_comm, td->td_name, sizeof(td->td_name));
613
614	/*
615	 * mark as execed, wakeup the process that vforked (if any) and tell
616	 * it that it now has its own resources back
617	 */
618	p->p_flag |= P_EXEC;
619	if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
620		p->p_flag &= ~P_PPWAIT;
621		cv_broadcast(&p->p_pwait);
622	}
623
624	/*
625	 * Implement image setuid/setgid.
626	 *
627	 * Don't honor setuid/setgid if the filesystem prohibits it or if
628	 * the process is being traced.
629	 *
630	 * XXXMAC: For the time being, use NOSUID to also prohibit
631	 * transitions on the file system.
632	 */
633	credential_changing = 0;
634	credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid !=
635	    attr.va_uid;
636	credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid !=
637	    attr.va_gid;
638#ifdef MAC
639	will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp,
640	    interpvplabel, imgp);
641	credential_changing |= will_transition;
642#endif
643
644	if (credential_changing &&
645	    (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
646	    (p->p_flag & P_TRACED) == 0) {
647		/*
648		 * Turn off syscall tracing for set-id programs, except for
649		 * root.  Record any set-id flags first to make sure that
650		 * we do not regain any tracing during a possible block.
651		 */
652		setsugid(p);
653
654#ifdef KTRACE
655		if (p->p_tracevp != NULL &&
656		    priv_check_cred(oldcred, PRIV_DEBUG_DIFFCRED, 0)) {
657			mtx_lock(&ktrace_mtx);
658			p->p_traceflag = 0;
659			tracevp = p->p_tracevp;
660			p->p_tracevp = NULL;
661			tracecred = p->p_tracecred;
662			p->p_tracecred = NULL;
663			mtx_unlock(&ktrace_mtx);
664		}
665#endif
666		/*
667		 * Close any file descriptors 0..2 that reference procfs,
668		 * then make sure file descriptors 0..2 are in use.
669		 *
670		 * setugidsafety() may call closef() and then pfind()
671		 * which may grab the process lock.
672		 * fdcheckstd() may call falloc() which may block to
673		 * allocate memory, so temporarily drop the process lock.
674		 */
675		PROC_UNLOCK(p);
676		VOP_UNLOCK(imgp->vp, 0);
677		setugidsafety(td);
678		error = fdcheckstd(td);
679		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
680		if (error != 0)
681			goto done1;
682		PROC_LOCK(p);
683		/*
684		 * Set the new credentials.
685		 */
686		if (attr.va_mode & S_ISUID)
687			change_euid(newcred, euip);
688		if (attr.va_mode & S_ISGID)
689			change_egid(newcred, attr.va_gid);
690#ifdef MAC
691		if (will_transition) {
692			mac_vnode_execve_transition(oldcred, newcred, imgp->vp,
693			    interpvplabel, imgp);
694		}
695#endif
696		/*
697		 * Implement correct POSIX saved-id behavior.
698		 *
699		 * XXXMAC: Note that the current logic will save the
700		 * uid and gid if a MAC domain transition occurs, even
701		 * though maybe it shouldn't.
702		 */
703		change_svuid(newcred, newcred->cr_uid);
704		change_svgid(newcred, newcred->cr_gid);
705		p->p_ucred = newcred;
706		newcred = NULL;
707	} else {
708		if (oldcred->cr_uid == oldcred->cr_ruid &&
709		    oldcred->cr_gid == oldcred->cr_rgid)
710			p->p_flag &= ~P_SUGID;
711		/*
712		 * Implement correct POSIX saved-id behavior.
713		 *
714		 * XXX: It's not clear that the existing behavior is
715		 * POSIX-compliant.  A number of sources indicate that the
716		 * saved uid/gid should only be updated if the new ruid is
717		 * not equal to the old ruid, or the new euid is not equal
718		 * to the old euid and the new euid is not equal to the old
719		 * ruid.  The FreeBSD code always updates the saved uid/gid.
720		 * Also, this code uses the new (replaced) euid and egid as
721		 * the source, which may or may not be the right ones to use.
722		 */
723		if (oldcred->cr_svuid != oldcred->cr_uid ||
724		    oldcred->cr_svgid != oldcred->cr_gid) {
725			change_svuid(newcred, newcred->cr_uid);
726			change_svgid(newcred, newcred->cr_gid);
727			p->p_ucred = newcred;
728			newcred = NULL;
729		}
730	}
731
732	/*
733	 * Store the vp for use in procfs.  This vnode was referenced prior
734	 * to locking the proc lock.
735	 */
736	textvp = p->p_textvp;
737	p->p_textvp = binvp;
738
739#ifdef KDTRACE_HOOKS
740	/*
741	 * Tell the DTrace fasttrap provider about the exec if it
742	 * has declared an interest.
743	 */
744	if (dtrace_fasttrap_exec)
745		dtrace_fasttrap_exec(p);
746#endif
747
748	/*
749	 * Notify others that we exec'd, and clear the P_INEXEC flag
750	 * as we're now a bona fide freshly-execed process.
751	 */
752	KNOTE_LOCKED(&p->p_klist, NOTE_EXEC);
753	p->p_flag &= ~P_INEXEC;
754
755	/*
756	 * If tracing the process, trap to debugger so breakpoints
757	 * can be set before the program executes.
758	 * Use tdsignal to deliver signal to current thread, use
759	 * psignal may cause the signal to be delivered to wrong thread
760	 * because that thread will exit, remember we are going to enter
761	 * single thread mode.
762	 */
763	if (p->p_flag & P_TRACED)
764		tdsignal(p, td, SIGTRAP, NULL);
765
766	/* clear "fork but no exec" flag, as we _are_ execing */
767	p->p_acflag &= ~AFORK;
768
769	/*
770	 * Free any previous argument cache and replace it with
771	 * the new argument cache, if any.
772	 */
773	oldargs = p->p_args;
774	p->p_args = newargs;
775	newargs = NULL;
776
777#ifdef	HWPMC_HOOKS
778	/*
779	 * Check if system-wide sampling is in effect or if the
780	 * current process is using PMCs.  If so, do exec() time
781	 * processing.  This processing needs to happen AFTER the
782	 * P_INEXEC flag is cleared.
783	 *
784	 * The proc lock needs to be released before taking the PMC
785	 * SX.
786	 */
787	if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) {
788		PROC_UNLOCK(p);
789		VOP_UNLOCK(imgp->vp, 0);
790		pe.pm_credentialschanged = credential_changing;
791		pe.pm_entryaddr = imgp->entry_addr;
792
793		PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe);
794		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
795	} else
796		PROC_UNLOCK(p);
797#else  /* !HWPMC_HOOKS */
798	PROC_UNLOCK(p);
799#endif
800
801	/* Set values passed into the program in registers. */
802	if (p->p_sysent->sv_setregs)
803		(*p->p_sysent->sv_setregs)(td, imgp->entry_addr,
804		    (u_long)(uintptr_t)stack_base, imgp->ps_strings);
805	else
806		exec_setregs(td, imgp->entry_addr,
807		    (u_long)(uintptr_t)stack_base, imgp->ps_strings);
808
809	vfs_mark_atime(imgp->vp, td->td_ucred);
810
811	SDT_PROBE(proc, kernel, , exec_success, args->fname, 0, 0, 0, 0);
812
813done1:
814	/*
815	 * Free any resources malloc'd earlier that we didn't use.
816	 */
817	uifree(euip);
818	if (newcred == NULL)
819		crfree(oldcred);
820	else
821		crfree(newcred);
822	VOP_UNLOCK(imgp->vp, 0);
823
824	/*
825	 * Handle deferred decrement of ref counts.
826	 */
827	if (textvp != NULL) {
828		int tvfslocked;
829
830		tvfslocked = VFS_LOCK_GIANT(textvp->v_mount);
831		vrele(textvp);
832		VFS_UNLOCK_GIANT(tvfslocked);
833	}
834	if (binvp && error != 0)
835		vrele(binvp);
836#ifdef KTRACE
837	if (tracevp != NULL) {
838		int tvfslocked;
839
840		tvfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
841		vrele(tracevp);
842		VFS_UNLOCK_GIANT(tvfslocked);
843	}
844	if (tracecred != NULL)
845		crfree(tracecred);
846#endif
847	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
848	pargs_drop(oldargs);
849	pargs_drop(newargs);
850	if (oldsigacts != NULL)
851		sigacts_free(oldsigacts);
852
853exec_fail_dealloc:
854
855	/*
856	 * free various allocated resources
857	 */
858	if (imgp->firstpage != NULL)
859		exec_unmap_first_page(imgp);
860
861	if (imgp->vp != NULL) {
862		if (args->fname)
863			NDFREE(&nd, NDF_ONLY_PNBUF);
864		if (imgp->opened)
865			VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td);
866		vput(imgp->vp);
867	}
868
869	if (imgp->object != NULL)
870		vm_object_deallocate(imgp->object);
871
872	free(imgp->freepath, M_TEMP);
873
874	if (error == 0) {
875		/*
876		 * Stop the process here if its stop event mask has
877		 * the S_EXEC bit set.
878		 */
879		STOPEVENT(p, S_EXEC, 0);
880		goto done2;
881	}
882
883exec_fail:
884	/* we're done here, clear P_INEXEC */
885	PROC_LOCK(p);
886	p->p_flag &= ~P_INEXEC;
887	PROC_UNLOCK(p);
888
889	SDT_PROBE(proc, kernel, , exec_failure, error, 0, 0, 0, 0);
890
891done2:
892#ifdef MAC
893	mac_execve_exit(imgp);
894	mac_execve_interpreter_exit(interpvplabel);
895#endif
896	VFS_UNLOCK_GIANT(vfslocked);
897	exec_free_args(args);
898
899	if (error && imgp->vmspace_destroyed) {
900		/* sorry, no more process anymore. exit gracefully */
901		exit1(td, W_EXITCODE(0, SIGABRT));
902		/* NOT REACHED */
903	}
904	return (error);
905}
906
907int
908exec_map_first_page(imgp)
909	struct image_params *imgp;
910{
911	int rv, i;
912	int initial_pagein;
913	vm_page_t ma[VM_INITIAL_PAGEIN];
914	vm_object_t object;
915
916	if (imgp->firstpage != NULL)
917		exec_unmap_first_page(imgp);
918
919	object = imgp->vp->v_object;
920	if (object == NULL)
921		return (EACCES);
922	VM_OBJECT_LOCK(object);
923#if VM_NRESERVLEVEL > 0
924	if ((object->flags & OBJ_COLORED) == 0) {
925		object->flags |= OBJ_COLORED;
926		object->pg_color = 0;
927	}
928#endif
929	ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
930	if (ma[0]->valid != VM_PAGE_BITS_ALL) {
931		initial_pagein = VM_INITIAL_PAGEIN;
932		if (initial_pagein > object->size)
933			initial_pagein = object->size;
934		for (i = 1; i < initial_pagein; i++) {
935			if ((ma[i] = vm_page_lookup(object, i)) != NULL) {
936				if (ma[i]->valid)
937					break;
938				if ((ma[i]->oflags & VPO_BUSY) || ma[i]->busy)
939					break;
940				vm_page_busy(ma[i]);
941			} else {
942				ma[i] = vm_page_alloc(object, i,
943				    VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED);
944				if (ma[i] == NULL)
945					break;
946			}
947		}
948		initial_pagein = i;
949		rv = vm_pager_get_pages(object, ma, initial_pagein, 0);
950		ma[0] = vm_page_lookup(object, 0);
951		if ((rv != VM_PAGER_OK) || (ma[0] == NULL)) {
952			if (ma[0]) {
953				vm_page_lock_queues();
954				vm_page_free(ma[0]);
955				vm_page_unlock_queues();
956			}
957			VM_OBJECT_UNLOCK(object);
958			return (EIO);
959		}
960	}
961	vm_page_lock_queues();
962	vm_page_hold(ma[0]);
963	vm_page_unlock_queues();
964	vm_page_wakeup(ma[0]);
965	VM_OBJECT_UNLOCK(object);
966
967	imgp->firstpage = sf_buf_alloc(ma[0], 0);
968	imgp->image_header = (char *)sf_buf_kva(imgp->firstpage);
969
970	return (0);
971}
972
973void
974exec_unmap_first_page(imgp)
975	struct image_params *imgp;
976{
977	vm_page_t m;
978
979	if (imgp->firstpage != NULL) {
980		m = sf_buf_page(imgp->firstpage);
981		sf_buf_free(imgp->firstpage);
982		imgp->firstpage = NULL;
983		vm_page_lock_queues();
984		vm_page_unhold(m);
985		vm_page_unlock_queues();
986	}
987}
988
989/*
990 * Destroy old address space, and allocate a new stack
991 *	The new stack is only SGROWSIZ large because it is grown
992 *	automatically in trap.c.
993 */
994int
995exec_new_vmspace(imgp, sv)
996	struct image_params *imgp;
997	struct sysentvec *sv;
998{
999	int error;
1000	struct proc *p = imgp->proc;
1001	struct vmspace *vmspace = p->p_vmspace;
1002	vm_offset_t stack_addr;
1003	vm_map_t map;
1004	u_long ssiz;
1005
1006	imgp->vmspace_destroyed = 1;
1007	imgp->sysent = sv;
1008
1009	/* May be called with Giant held */
1010	EVENTHANDLER_INVOKE(process_exec, p, imgp);
1011
1012	/*
1013	 * Blow away entire process VM, if address space not shared,
1014	 * otherwise, create a new VM space so that other threads are
1015	 * not disrupted
1016	 */
1017	map = &vmspace->vm_map;
1018	if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv->sv_minuser &&
1019	    vm_map_max(map) == sv->sv_maxuser) {
1020		shmexit(vmspace);
1021		pmap_remove_pages(vmspace_pmap(vmspace));
1022		vm_map_remove(map, vm_map_min(map), vm_map_max(map));
1023	} else {
1024		error = vmspace_exec(p, sv->sv_minuser, sv->sv_maxuser);
1025		if (error)
1026			return (error);
1027		vmspace = p->p_vmspace;
1028		map = &vmspace->vm_map;
1029	}
1030
1031	/* Allocate a new stack */
1032	if (sv->sv_maxssiz != NULL)
1033		ssiz = *sv->sv_maxssiz;
1034	else
1035		ssiz = maxssiz;
1036	stack_addr = sv->sv_usrstack - ssiz;
1037	error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz,
1038	    sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
1039	if (error)
1040		return (error);
1041
1042#ifdef __ia64__
1043	/* Allocate a new register stack */
1044	stack_addr = IA64_BACKINGSTORE;
1045	error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz,
1046	    sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_UP);
1047	if (error)
1048		return (error);
1049#endif
1050
1051	/* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the
1052	 * VM_STACK case, but they are still used to monitor the size of the
1053	 * process stack so we can check the stack rlimit.
1054	 */
1055	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
1056	vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - ssiz;
1057
1058	return (0);
1059}
1060
1061/*
1062 * Copy out argument and environment strings from the old process address
1063 * space into the temporary string buffer.
1064 */
1065int
1066exec_copyin_args(struct image_args *args, char *fname,
1067    enum uio_seg segflg, char **argv, char **envv)
1068{
1069	char *argp, *envp;
1070	int error;
1071	size_t length;
1072
1073	bzero(args, sizeof(*args));
1074	if (argv == NULL)
1075		return (EFAULT);
1076	/*
1077	 * Allocate temporary demand zeroed space for argument and
1078	 *	environment strings:
1079	 *
1080	 * o ARG_MAX for argument and environment;
1081	 * o MAXSHELLCMDLEN for the name of interpreters.
1082	 */
1083	args->buf = (char *) kmem_alloc_wait(exec_map,
1084	    PATH_MAX + ARG_MAX + MAXSHELLCMDLEN);
1085	if (args->buf == NULL)
1086		return (ENOMEM);
1087	args->begin_argv = args->buf;
1088	args->endp = args->begin_argv;
1089	args->stringspace = ARG_MAX;
1090	/*
1091	 * Copy the file name.
1092	 */
1093	if (fname != NULL) {
1094		args->fname = args->buf + ARG_MAX;
1095		error = (segflg == UIO_SYSSPACE) ?
1096		    copystr(fname, args->fname, PATH_MAX, &length) :
1097		    copyinstr(fname, args->fname, PATH_MAX, &length);
1098		if (error != 0)
1099			goto err_exit;
1100	} else
1101		args->fname = NULL;
1102
1103	/*
1104	 * extract arguments first
1105	 */
1106	while ((argp = (caddr_t) (intptr_t) fuword(argv++))) {
1107		if (argp == (caddr_t) -1) {
1108			error = EFAULT;
1109			goto err_exit;
1110		}
1111		if ((error = copyinstr(argp, args->endp,
1112		    args->stringspace, &length))) {
1113			if (error == ENAMETOOLONG)
1114				error = E2BIG;
1115			goto err_exit;
1116		}
1117		args->stringspace -= length;
1118		args->endp += length;
1119		args->argc++;
1120	}
1121
1122	args->begin_envv = args->endp;
1123
1124	/*
1125	 * extract environment strings
1126	 */
1127	if (envv) {
1128		while ((envp = (caddr_t)(intptr_t)fuword(envv++))) {
1129			if (envp == (caddr_t)-1) {
1130				error = EFAULT;
1131				goto err_exit;
1132			}
1133			if ((error = copyinstr(envp, args->endp,
1134			    args->stringspace, &length))) {
1135				if (error == ENAMETOOLONG)
1136					error = E2BIG;
1137				goto err_exit;
1138			}
1139			args->stringspace -= length;
1140			args->endp += length;
1141			args->envc++;
1142		}
1143	}
1144
1145	return (0);
1146
1147err_exit:
1148	exec_free_args(args);
1149	return (error);
1150}
1151
1152static void
1153exec_free_args(struct image_args *args)
1154{
1155
1156	if (args->buf) {
1157		kmem_free_wakeup(exec_map, (vm_offset_t)args->buf,
1158		    PATH_MAX + ARG_MAX + MAXSHELLCMDLEN);
1159		args->buf = NULL;
1160	}
1161}
1162
1163/*
1164 * Copy strings out to the new process address space, constructing new arg
1165 * and env vector tables. Return a pointer to the base so that it can be used
1166 * as the initial stack pointer.
1167 */
1168register_t *
1169exec_copyout_strings(imgp)
1170	struct image_params *imgp;
1171{
1172	int argc, envc;
1173	char **vectp;
1174	char *stringp, *destp;
1175	register_t *stack_base;
1176	struct ps_strings *arginfo;
1177	struct proc *p;
1178	size_t execpath_len;
1179	int szsigcode;
1180
1181	/*
1182	 * Calculate string base and vector table pointers.
1183	 * Also deal with signal trampoline code for this exec type.
1184	 */
1185	if (imgp->execpath != NULL && imgp->auxargs != NULL)
1186		execpath_len = strlen(imgp->execpath) + 1;
1187	else
1188		execpath_len = 0;
1189	p = imgp->proc;
1190	szsigcode = 0;
1191	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
1192	if (p->p_sysent->sv_szsigcode != NULL)
1193		szsigcode = *(p->p_sysent->sv_szsigcode);
1194	destp =	(caddr_t)arginfo - szsigcode - SPARE_USRSPACE -
1195	    roundup(execpath_len, sizeof(char *)) -
1196	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
1197
1198	/*
1199	 * install sigcode
1200	 */
1201	if (szsigcode)
1202		copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
1203		    szsigcode), szsigcode);
1204
1205	/*
1206	 * Copy the image path for the rtld.
1207	 */
1208	if (execpath_len != 0) {
1209		imgp->execpathp = (uintptr_t)arginfo - szsigcode - execpath_len;
1210		copyout(imgp->execpath, (void *)imgp->execpathp,
1211		    execpath_len);
1212	}
1213
1214	/*
1215	 * If we have a valid auxargs ptr, prepare some room
1216	 * on the stack.
1217	 */
1218	if (imgp->auxargs) {
1219		/*
1220		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
1221		 * lower compatibility.
1222		 */
1223		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
1224		    (AT_COUNT * 2);
1225		/*
1226		 * The '+ 2' is for the null pointers at the end of each of
1227		 * the arg and env vector sets,and imgp->auxarg_size is room
1228		 * for argument of Runtime loader.
1229		 */
1230		vectp = (char **)(destp - (imgp->args->argc +
1231		    imgp->args->envc + 2 + imgp->auxarg_size + execpath_len) *
1232		    sizeof(char *));
1233	} else {
1234		/*
1235		 * The '+ 2' is for the null pointers at the end of each of
1236		 * the arg and env vector sets
1237		 */
1238		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
1239		    sizeof(char *));
1240	}
1241
1242	/*
1243	 * vectp also becomes our initial stack base
1244	 */
1245	stack_base = (register_t *)vectp;
1246
1247	stringp = imgp->args->begin_argv;
1248	argc = imgp->args->argc;
1249	envc = imgp->args->envc;
1250
1251	/*
1252	 * Copy out strings - arguments and environment.
1253	 */
1254	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
1255
1256	/*
1257	 * Fill in "ps_strings" struct for ps, w, etc.
1258	 */
1259	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
1260	suword(&arginfo->ps_nargvstr, argc);
1261
1262	/*
1263	 * Fill in argument portion of vector table.
1264	 */
1265	for (; argc > 0; --argc) {
1266		suword(vectp++, (long)(intptr_t)destp);
1267		while (*stringp++ != 0)
1268			destp++;
1269		destp++;
1270	}
1271
1272	/* a null vector table pointer separates the argp's from the envp's */
1273	suword(vectp++, 0);
1274
1275	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
1276	suword(&arginfo->ps_nenvstr, envc);
1277
1278	/*
1279	 * Fill in environment portion of vector table.
1280	 */
1281	for (; envc > 0; --envc) {
1282		suword(vectp++, (long)(intptr_t)destp);
1283		while (*stringp++ != 0)
1284			destp++;
1285		destp++;
1286	}
1287
1288	/* end of vector table is a null pointer */
1289	suword(vectp, 0);
1290
1291	return (stack_base);
1292}
1293
1294/*
1295 * Check permissions of file to execute.
1296 *	Called with imgp->vp locked.
1297 *	Return 0 for success or error code on failure.
1298 */
1299int
1300exec_check_permissions(imgp)
1301	struct image_params *imgp;
1302{
1303	struct vnode *vp = imgp->vp;
1304	struct vattr *attr = imgp->attr;
1305	struct thread *td;
1306	int error;
1307
1308	td = curthread;
1309
1310	/* Get file attributes */
1311	error = VOP_GETATTR(vp, attr, td->td_ucred);
1312	if (error)
1313		return (error);
1314
1315#ifdef MAC
1316	error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp);
1317	if (error)
1318		return (error);
1319#endif
1320
1321	/*
1322	 * 1) Check if file execution is disabled for the filesystem that this
1323	 *	file resides on.
1324	 * 2) Insure that at least one execute bit is on - otherwise root
1325	 *	will always succeed, and we don't want to happen unless the
1326	 *	file really is executable.
1327	 * 3) Insure that the file is a regular file.
1328	 */
1329	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
1330	    ((attr->va_mode & 0111) == 0) ||
1331	    (attr->va_type != VREG))
1332		return (EACCES);
1333
1334	/*
1335	 * Zero length files can't be exec'd
1336	 */
1337	if (attr->va_size == 0)
1338		return (ENOEXEC);
1339
1340	/*
1341	 *  Check for execute permission to file based on current credentials.
1342	 */
1343	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1344	if (error)
1345		return (error);
1346
1347	/*
1348	 * Check number of open-for-writes on the file and deny execution
1349	 * if there are any.
1350	 */
1351	if (vp->v_writecount)
1352		return (ETXTBSY);
1353
1354	/*
1355	 * Call filesystem specific open routine (which does nothing in the
1356	 * general case).
1357	 */
1358	error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
1359	if (error == 0)
1360		imgp->opened = 1;
1361	return (error);
1362}
1363
1364/*
1365 * Exec handler registration
1366 */
1367int
1368exec_register(execsw_arg)
1369	const struct execsw *execsw_arg;
1370{
1371	const struct execsw **es, **xs, **newexecsw;
1372	int count = 2;	/* New slot and trailing NULL */
1373
1374	if (execsw)
1375		for (es = execsw; *es; es++)
1376			count++;
1377	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
1378	if (newexecsw == NULL)
1379		return (ENOMEM);
1380	xs = newexecsw;
1381	if (execsw)
1382		for (es = execsw; *es; es++)
1383			*xs++ = *es;
1384	*xs++ = execsw_arg;
1385	*xs = NULL;
1386	if (execsw)
1387		free(execsw, M_TEMP);
1388	execsw = newexecsw;
1389	return (0);
1390}
1391
1392int
1393exec_unregister(execsw_arg)
1394	const struct execsw *execsw_arg;
1395{
1396	const struct execsw **es, **xs, **newexecsw;
1397	int count = 1;
1398
1399	if (execsw == NULL)
1400		panic("unregister with no handlers left?\n");
1401
1402	for (es = execsw; *es; es++) {
1403		if (*es == execsw_arg)
1404			break;
1405	}
1406	if (*es == NULL)
1407		return (ENOENT);
1408	for (es = execsw; *es; es++)
1409		if (*es != execsw_arg)
1410			count++;
1411	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
1412	if (newexecsw == NULL)
1413		return (ENOMEM);
1414	xs = newexecsw;
1415	for (es = execsw; *es; es++)
1416		if (*es != execsw_arg)
1417			*xs++ = *es;
1418	*xs = NULL;
1419	if (execsw)
1420		free(execsw, M_TEMP);
1421	execsw = newexecsw;
1422	return (0);
1423}
1424