Deleted Added
full compact
kern_exec.c (106459) kern_exec.c (106468)
1/*
2 * Copyright (c) 1993, David Greenman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
1/*
2 * Copyright (c) 1993, David Greenman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/kern/kern_exec.c 106459 2002-11-05 14:57:49Z rwatson $
26 * $FreeBSD: head/sys/kern/kern_exec.c 106468 2002-11-05 17:51:56Z rwatson $
27 */
28
29#include "opt_ktrace.h"
30#include "opt_mac.h"
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/lock.h>
35#include <sys/mutex.h>
36#include <sys/sysproto.h>
37#include <sys/signalvar.h>
38#include <sys/kernel.h>
39#include <sys/mac.h>
40#include <sys/mount.h>
41#include <sys/filedesc.h>
42#include <sys/fcntl.h>
43#include <sys/acct.h>
44#include <sys/exec.h>
45#include <sys/imgact.h>
46#include <sys/imgact_elf.h>
47#include <sys/wait.h>
48#include <sys/malloc.h>
49#include <sys/proc.h>
50#include <sys/pioctl.h>
51#include <sys/namei.h>
52#include <sys/sysent.h>
53#include <sys/shm.h>
54#include <sys/sysctl.h>
55#include <sys/user.h>
56#include <sys/vnode.h>
57#ifdef KTRACE
58#include <sys/ktrace.h>
59#endif
60
61#include <vm/vm.h>
62#include <vm/vm_param.h>
63#include <vm/pmap.h>
64#include <vm/vm_page.h>
65#include <vm/vm_map.h>
66#include <vm/vm_kern.h>
67#include <vm/vm_extern.h>
68#include <vm/vm_object.h>
69#include <vm/vm_pager.h>
70
71#include <machine/reg.h>
72
73MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
74
75static MALLOC_DEFINE(M_ATEXEC, "atexec", "atexec callback");
76
77static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
78static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
79static int kern_execve(struct thread *td, char *fname, char **argv,
27 */
28
29#include "opt_ktrace.h"
30#include "opt_mac.h"
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/lock.h>
35#include <sys/mutex.h>
36#include <sys/sysproto.h>
37#include <sys/signalvar.h>
38#include <sys/kernel.h>
39#include <sys/mac.h>
40#include <sys/mount.h>
41#include <sys/filedesc.h>
42#include <sys/fcntl.h>
43#include <sys/acct.h>
44#include <sys/exec.h>
45#include <sys/imgact.h>
46#include <sys/imgact_elf.h>
47#include <sys/wait.h>
48#include <sys/malloc.h>
49#include <sys/proc.h>
50#include <sys/pioctl.h>
51#include <sys/namei.h>
52#include <sys/sysent.h>
53#include <sys/shm.h>
54#include <sys/sysctl.h>
55#include <sys/user.h>
56#include <sys/vnode.h>
57#ifdef KTRACE
58#include <sys/ktrace.h>
59#endif
60
61#include <vm/vm.h>
62#include <vm/vm_param.h>
63#include <vm/pmap.h>
64#include <vm/vm_page.h>
65#include <vm/vm_map.h>
66#include <vm/vm_kern.h>
67#include <vm/vm_extern.h>
68#include <vm/vm_object.h>
69#include <vm/vm_pager.h>
70
71#include <machine/reg.h>
72
73MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
74
75static MALLOC_DEFINE(M_ATEXEC, "atexec", "atexec callback");
76
77static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
78static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
79static int kern_execve(struct thread *td, char *fname, char **argv,
80 char **envv);
80 char **envv, struct mac *mac_p);
81
82/*
83 * callout list for things to do at exec time
84 */
85struct execlist {
86 execlist_fn function;
87 TAILQ_ENTRY(execlist) next;
88};
89
90TAILQ_HEAD(exec_list_head, execlist);
91static struct exec_list_head exec_list = TAILQ_HEAD_INITIALIZER(exec_list);
92
93/* XXX This should be vm_size_t. */
94SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD,
95 NULL, 0, sysctl_kern_ps_strings, "LU", "");
96
97/* XXX This should be vm_size_t. */
98SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD,
99 NULL, 0, sysctl_kern_usrstack, "LU", "");
100
101u_long ps_arg_cache_limit = PAGE_SIZE / 16;
102SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW,
103 &ps_arg_cache_limit, 0, "");
104
105int ps_argsopen = 1;
106SYSCTL_INT(_kern, OID_AUTO, ps_argsopen, CTLFLAG_RW, &ps_argsopen, 0, "");
107
108#ifdef __ia64__
109/* XXX HACK */
110static int regstkpages = 256;
111SYSCTL_INT(_machdep, OID_AUTO, regstkpages, CTLFLAG_RW, &regstkpages, 0, "");
112#endif
113
114static int
115sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
116{
117 struct proc *p;
118
119 p = curproc;
120 return (SYSCTL_OUT(req, &p->p_sysent->sv_psstrings,
121 sizeof(p->p_sysent->sv_psstrings)));
122}
123
124static int
125sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
126{
127 struct proc *p;
128
129 p = curproc;
130 return (SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
131 sizeof(p->p_sysent->sv_usrstack)));
132}
133
134/*
135 * Each of the items is a pointer to a `const struct execsw', hence the
136 * double pointer here.
137 */
138static const struct execsw **execsw;
139
140/*
141 * In-kernel implementation of execve(). All arguments are assumed to be
142 * userspace pointers from the passed thread.
143 *
144 * MPSAFE
145 */
146static int
81
82/*
83 * callout list for things to do at exec time
84 */
85struct execlist {
86 execlist_fn function;
87 TAILQ_ENTRY(execlist) next;
88};
89
90TAILQ_HEAD(exec_list_head, execlist);
91static struct exec_list_head exec_list = TAILQ_HEAD_INITIALIZER(exec_list);
92
93/* XXX This should be vm_size_t. */
94SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD,
95 NULL, 0, sysctl_kern_ps_strings, "LU", "");
96
97/* XXX This should be vm_size_t. */
98SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD,
99 NULL, 0, sysctl_kern_usrstack, "LU", "");
100
101u_long ps_arg_cache_limit = PAGE_SIZE / 16;
102SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW,
103 &ps_arg_cache_limit, 0, "");
104
105int ps_argsopen = 1;
106SYSCTL_INT(_kern, OID_AUTO, ps_argsopen, CTLFLAG_RW, &ps_argsopen, 0, "");
107
108#ifdef __ia64__
109/* XXX HACK */
110static int regstkpages = 256;
111SYSCTL_INT(_machdep, OID_AUTO, regstkpages, CTLFLAG_RW, &regstkpages, 0, "");
112#endif
113
114static int
115sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
116{
117 struct proc *p;
118
119 p = curproc;
120 return (SYSCTL_OUT(req, &p->p_sysent->sv_psstrings,
121 sizeof(p->p_sysent->sv_psstrings)));
122}
123
124static int
125sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
126{
127 struct proc *p;
128
129 p = curproc;
130 return (SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
131 sizeof(p->p_sysent->sv_usrstack)));
132}
133
134/*
135 * Each of the items is a pointer to a `const struct execsw', hence the
136 * double pointer here.
137 */
138static const struct execsw **execsw;
139
140/*
141 * In-kernel implementation of execve(). All arguments are assumed to be
142 * userspace pointers from the passed thread.
143 *
144 * MPSAFE
145 */
146static int
147kern_execve(td, fname, argv, envv)
147kern_execve(td, fname, argv, envv, mac_p)
148 struct thread *td;
149 char *fname;
150 char **argv;
151 char **envv;
148 struct thread *td;
149 char *fname;
150 char **argv;
151 char **envv;
152 struct mac *mac_p;
152{
153 struct proc *p = td->td_proc;
154 struct nameidata nd, *ndp;
155 struct ucred *newcred = NULL, *oldcred;
156 struct uidinfo *euip;
157 register_t *stack_base;
158 int error, len, i;
159 struct image_params image_params, *imgp;
160 struct vattr attr;
161 int (*img_first)(struct image_params *);
162 struct pargs *oldargs = NULL, *newargs = NULL;
163 struct procsig *oldprocsig, *newprocsig;
164#ifdef KTRACE
165 struct vnode *tracevp = NULL;
166#endif
167 struct vnode *textvp = NULL;
168 int credential_changing;
169 int textset;
170#ifdef MAC
153{
154 struct proc *p = td->td_proc;
155 struct nameidata nd, *ndp;
156 struct ucred *newcred = NULL, *oldcred;
157 struct uidinfo *euip;
158 register_t *stack_base;
159 int error, len, i;
160 struct image_params image_params, *imgp;
161 struct vattr attr;
162 int (*img_first)(struct image_params *);
163 struct pargs *oldargs = NULL, *newargs = NULL;
164 struct procsig *oldprocsig, *newprocsig;
165#ifdef KTRACE
166 struct vnode *tracevp = NULL;
167#endif
168 struct vnode *textvp = NULL;
169 int credential_changing;
170 int textset;
171#ifdef MAC
171 int will_transition;
172 struct label interplabel; /* label of the interpreted vnode */
173 struct label execlabel; /* optional label argument */
174 int will_transition, interplabelvalid = 0;
172#endif
173
174 imgp = &image_params;
175
176 /*
177 * Lock the process and set the P_INEXEC flag to indicate that
178 * it should be left alone until we're done here. This is
179 * necessary to avoid race conditions - e.g. in ptrace() -
180 * that might allow a local user to illicitly obtain elevated
181 * privileges.
182 */
183 PROC_LOCK(p);
184 KASSERT((p->p_flag & P_INEXEC) == 0,
185 ("%s(): process already has P_INEXEC flag", __func__));
186 if (p->p_flag & P_KSES) {
187 if (thread_single(SINGLE_EXIT)) {
188 PROC_UNLOCK(p);
189 return (ERESTART); /* Try again later. */
190 }
191 /*
192 * If we get here all other threads are dead,
193 * so unset the associated flags and lose KSE mode.
194 */
195 p->p_flag &= ~P_KSES;
196 td->td_flags &= ~TDF_UNBOUND;
197 thread_single_end();
198 }
199 p->p_flag |= P_INEXEC;
200 PROC_UNLOCK(p);
201
202 /*
203 * Initialize part of the common data
204 */
205 imgp->proc = p;
206 imgp->userspace_argv = argv;
207 imgp->userspace_envv = envv;
175#endif
176
177 imgp = &image_params;
178
179 /*
180 * Lock the process and set the P_INEXEC flag to indicate that
181 * it should be left alone until we're done here. This is
182 * necessary to avoid race conditions - e.g. in ptrace() -
183 * that might allow a local user to illicitly obtain elevated
184 * privileges.
185 */
186 PROC_LOCK(p);
187 KASSERT((p->p_flag & P_INEXEC) == 0,
188 ("%s(): process already has P_INEXEC flag", __func__));
189 if (p->p_flag & P_KSES) {
190 if (thread_single(SINGLE_EXIT)) {
191 PROC_UNLOCK(p);
192 return (ERESTART); /* Try again later. */
193 }
194 /*
195 * If we get here all other threads are dead,
196 * so unset the associated flags and lose KSE mode.
197 */
198 p->p_flag &= ~P_KSES;
199 td->td_flags &= ~TDF_UNBOUND;
200 thread_single_end();
201 }
202 p->p_flag |= P_INEXEC;
203 PROC_UNLOCK(p);
204
205 /*
206 * Initialize part of the common data
207 */
208 imgp->proc = p;
209 imgp->userspace_argv = argv;
210 imgp->userspace_envv = envv;
211 imgp->execlabel = NULL;
208 imgp->attr = &attr;
209 imgp->argc = imgp->envc = 0;
210 imgp->argv0 = NULL;
211 imgp->entry_addr = 0;
212 imgp->vmspace_destroyed = 0;
213 imgp->interpreted = 0;
214 imgp->interpreter_name[0] = '\0';
215 imgp->auxargs = NULL;
216 imgp->vp = NULL;
217 imgp->object = NULL;
218 imgp->firstpage = NULL;
219 imgp->ps_strings = 0;
220 imgp->auxarg_size = 0;
221
212 imgp->attr = &attr;
213 imgp->argc = imgp->envc = 0;
214 imgp->argv0 = NULL;
215 imgp->entry_addr = 0;
216 imgp->vmspace_destroyed = 0;
217 imgp->interpreted = 0;
218 imgp->interpreter_name[0] = '\0';
219 imgp->auxargs = NULL;
220 imgp->vp = NULL;
221 imgp->object = NULL;
222 imgp->firstpage = NULL;
223 imgp->ps_strings = 0;
224 imgp->auxarg_size = 0;
225
226#ifdef MAC
227 error = mac_execve_enter(imgp, mac_p, &execlabel);
228 if (error) {
229 mtx_lock(&Giant);
230 goto exec_fail;
231 }
232#endif
233
222 /*
223 * Allocate temporary demand zeroed space for argument and
224 * environment strings
225 */
226 imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX +
227 PAGE_SIZE);
228 if (imgp->stringbase == NULL) {
229 error = ENOMEM;
230 mtx_lock(&Giant);
231 goto exec_fail;
232 }
233 imgp->stringp = imgp->stringbase;
234 imgp->stringspace = ARG_MAX;
235 imgp->image_header = imgp->stringbase + ARG_MAX;
236
237 /*
238 * Translate the file name. namei() returns a vnode pointer
239 * in ni_vp amoung other things.
240 */
241 ndp = &nd;
242 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
243 UIO_USERSPACE, fname, td);
244
245 mtx_lock(&Giant);
246interpret:
247
248 error = namei(ndp);
249 if (error) {
250 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
251 ARG_MAX + PAGE_SIZE);
252 goto exec_fail;
253 }
254
255 imgp->vp = ndp->ni_vp;
256 imgp->fname = fname;
257
258 /*
259 * Check file permissions (also 'opens' file)
260 */
261 error = exec_check_permissions(imgp);
262 if (error)
263 goto exec_fail_dealloc;
264
265 if (VOP_GETVOBJECT(imgp->vp, &imgp->object) == 0)
266 vm_object_reference(imgp->object);
267
268 /*
269 * Set VV_TEXT now so no one can write to the executable while we're
270 * activating it.
271 *
272 * Remember if this was set before and unset it in case this is not
273 * actually an executable image.
274 */
275 textset = imgp->vp->v_vflag & VV_TEXT;
276 imgp->vp->v_vflag |= VV_TEXT;
277
278 error = exec_map_first_page(imgp);
279 if (error)
280 goto exec_fail_dealloc;
281
282 /*
283 * If the current process has a special image activator it
284 * wants to try first, call it. For example, emulating shell
285 * scripts differently.
286 */
287 error = -1;
288 if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
289 error = img_first(imgp);
290
291 /*
292 * Loop through the list of image activators, calling each one.
293 * An activator returns -1 if there is no match, 0 on success,
294 * and an error otherwise.
295 */
296 for (i = 0; error == -1 && execsw[i]; ++i) {
297 if (execsw[i]->ex_imgact == NULL ||
298 execsw[i]->ex_imgact == img_first) {
299 continue;
300 }
301 error = (*execsw[i]->ex_imgact)(imgp);
302 }
303
304 if (error) {
305 if (error == -1) {
306 if (textset == 0)
307 imgp->vp->v_vflag &= ~VV_TEXT;
308 error = ENOEXEC;
309 }
310 goto exec_fail_dealloc;
311 }
312
313 /*
314 * Special interpreter operation, cleanup and loop up to try to
315 * activate the interpreter.
316 */
317 if (imgp->interpreted) {
318 exec_unmap_first_page(imgp);
319 /*
320 * VV_TEXT needs to be unset for scripts. There is a short
321 * period before we determine that something is a script where
322 * VV_TEXT will be set. The vnode lock is held over this
323 * entire period so nothing should illegitimately be blocked.
324 */
325 imgp->vp->v_vflag &= ~VV_TEXT;
326 /* free name buffer and old vnode */
327 NDFREE(ndp, NDF_ONLY_PNBUF);
234 /*
235 * Allocate temporary demand zeroed space for argument and
236 * environment strings
237 */
238 imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX +
239 PAGE_SIZE);
240 if (imgp->stringbase == NULL) {
241 error = ENOMEM;
242 mtx_lock(&Giant);
243 goto exec_fail;
244 }
245 imgp->stringp = imgp->stringbase;
246 imgp->stringspace = ARG_MAX;
247 imgp->image_header = imgp->stringbase + ARG_MAX;
248
249 /*
250 * Translate the file name. namei() returns a vnode pointer
251 * in ni_vp amoung other things.
252 */
253 ndp = &nd;
254 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
255 UIO_USERSPACE, fname, td);
256
257 mtx_lock(&Giant);
258interpret:
259
260 error = namei(ndp);
261 if (error) {
262 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
263 ARG_MAX + PAGE_SIZE);
264 goto exec_fail;
265 }
266
267 imgp->vp = ndp->ni_vp;
268 imgp->fname = fname;
269
270 /*
271 * Check file permissions (also 'opens' file)
272 */
273 error = exec_check_permissions(imgp);
274 if (error)
275 goto exec_fail_dealloc;
276
277 if (VOP_GETVOBJECT(imgp->vp, &imgp->object) == 0)
278 vm_object_reference(imgp->object);
279
280 /*
281 * Set VV_TEXT now so no one can write to the executable while we're
282 * activating it.
283 *
284 * Remember if this was set before and unset it in case this is not
285 * actually an executable image.
286 */
287 textset = imgp->vp->v_vflag & VV_TEXT;
288 imgp->vp->v_vflag |= VV_TEXT;
289
290 error = exec_map_first_page(imgp);
291 if (error)
292 goto exec_fail_dealloc;
293
294 /*
295 * If the current process has a special image activator it
296 * wants to try first, call it. For example, emulating shell
297 * scripts differently.
298 */
299 error = -1;
300 if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
301 error = img_first(imgp);
302
303 /*
304 * Loop through the list of image activators, calling each one.
305 * An activator returns -1 if there is no match, 0 on success,
306 * and an error otherwise.
307 */
308 for (i = 0; error == -1 && execsw[i]; ++i) {
309 if (execsw[i]->ex_imgact == NULL ||
310 execsw[i]->ex_imgact == img_first) {
311 continue;
312 }
313 error = (*execsw[i]->ex_imgact)(imgp);
314 }
315
316 if (error) {
317 if (error == -1) {
318 if (textset == 0)
319 imgp->vp->v_vflag &= ~VV_TEXT;
320 error = ENOEXEC;
321 }
322 goto exec_fail_dealloc;
323 }
324
325 /*
326 * Special interpreter operation, cleanup and loop up to try to
327 * activate the interpreter.
328 */
329 if (imgp->interpreted) {
330 exec_unmap_first_page(imgp);
331 /*
332 * VV_TEXT needs to be unset for scripts. There is a short
333 * period before we determine that something is a script where
334 * VV_TEXT will be set. The vnode lock is held over this
335 * entire period so nothing should illegitimately be blocked.
336 */
337 imgp->vp->v_vflag &= ~VV_TEXT;
338 /* free name buffer and old vnode */
339 NDFREE(ndp, NDF_ONLY_PNBUF);
340#ifdef MAC
341 mac_init_vnode_label(&interplabel);
342 mac_copy_vnode_label(&ndp->ni_vp->v_label, &interplabel);
343 interplabelvalid = 1;
344#endif
328 vput(ndp->ni_vp);
329 vm_object_deallocate(imgp->object);
330 imgp->object = NULL;
331 /* set new name to that of the interpreter */
332 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
333 UIO_SYSSPACE, imgp->interpreter_name, td);
334 goto interpret;
335 }
336
337 /*
338 * Copy out strings (args and env) and initialize stack base
339 */
340 if (p->p_sysent->sv_copyout_strings)
341 stack_base = (*p->p_sysent->sv_copyout_strings)(imgp);
342 else
343 stack_base = exec_copyout_strings(imgp);
344
345 /*
346 * If custom stack fixup routine present for this process
347 * let it do the stack setup.
348 * Else stuff argument count as first item on stack
349 */
350 if (p->p_sysent->sv_fixup)
351 (*p->p_sysent->sv_fixup)(&stack_base, imgp);
352 else
353 suword(--stack_base, imgp->argc);
354
355 /*
356 * For security and other reasons, the file descriptor table cannot
357 * be shared after an exec.
358 */
359 FILEDESC_LOCK(p->p_fd);
360 if (p->p_fd->fd_refcnt > 1) {
361 struct filedesc *tmp;
362
363 tmp = fdcopy(td);
364 FILEDESC_UNLOCK(p->p_fd);
365 fdfree(td);
366 p->p_fd = tmp;
367 } else
368 FILEDESC_UNLOCK(p->p_fd);
369
370 /*
371 * Malloc things before we need locks.
372 */
373 newcred = crget();
374 euip = uifind(attr.va_uid);
375 i = imgp->endargs - imgp->stringbase;
376 if (ps_arg_cache_limit >= i + sizeof(struct pargs))
377 newargs = pargs_alloc(i);
378
379 /* close files on exec */
380 fdcloseexec(td);
381
382 /* Get a reference to the vnode prior to locking the proc */
383 VREF(ndp->ni_vp);
384
385 /*
386 * For security and other reasons, signal handlers cannot
387 * be shared after an exec. The new process gets a copy of the old
388 * handlers. In execsigs(), the new process will have its signals
389 * reset.
390 */
391 PROC_LOCK(p);
392 mp_fixme("procsig needs a lock");
393 if (p->p_procsig->ps_refcnt > 1) {
394 oldprocsig = p->p_procsig;
395 PROC_UNLOCK(p);
396 MALLOC(newprocsig, struct procsig *, sizeof(struct procsig),
397 M_SUBPROC, M_WAITOK);
398 bcopy(oldprocsig, newprocsig, sizeof(*newprocsig));
399 newprocsig->ps_refcnt = 1;
400 oldprocsig->ps_refcnt--;
401 PROC_LOCK(p);
402 p->p_procsig = newprocsig;
403 if (p->p_sigacts == &p->p_uarea->u_sigacts)
404 panic("shared procsig but private sigacts?");
405
406 p->p_uarea->u_sigacts = *p->p_sigacts;
407 p->p_sigacts = &p->p_uarea->u_sigacts;
408 }
409 /* Stop profiling */
410 stopprofclock(p);
411
412 /* reset caught signals */
413 execsigs(p);
414
415 /* name this process - nameiexec(p, ndp) */
416 len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN);
417 bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len);
418 p->p_comm[len] = 0;
419
420 /*
421 * mark as execed, wakeup the process that vforked (if any) and tell
422 * it that it now has its own resources back
423 */
424 p->p_flag |= P_EXEC;
425 if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
426 p->p_flag &= ~P_PPWAIT;
427 wakeup(p->p_pptr);
428 }
429
430 /*
431 * Implement image setuid/setgid.
432 *
433 * Don't honor setuid/setgid if the filesystem prohibits it or if
434 * the process is being traced.
345 vput(ndp->ni_vp);
346 vm_object_deallocate(imgp->object);
347 imgp->object = NULL;
348 /* set new name to that of the interpreter */
349 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
350 UIO_SYSSPACE, imgp->interpreter_name, td);
351 goto interpret;
352 }
353
354 /*
355 * Copy out strings (args and env) and initialize stack base
356 */
357 if (p->p_sysent->sv_copyout_strings)
358 stack_base = (*p->p_sysent->sv_copyout_strings)(imgp);
359 else
360 stack_base = exec_copyout_strings(imgp);
361
362 /*
363 * If custom stack fixup routine present for this process
364 * let it do the stack setup.
365 * Else stuff argument count as first item on stack
366 */
367 if (p->p_sysent->sv_fixup)
368 (*p->p_sysent->sv_fixup)(&stack_base, imgp);
369 else
370 suword(--stack_base, imgp->argc);
371
372 /*
373 * For security and other reasons, the file descriptor table cannot
374 * be shared after an exec.
375 */
376 FILEDESC_LOCK(p->p_fd);
377 if (p->p_fd->fd_refcnt > 1) {
378 struct filedesc *tmp;
379
380 tmp = fdcopy(td);
381 FILEDESC_UNLOCK(p->p_fd);
382 fdfree(td);
383 p->p_fd = tmp;
384 } else
385 FILEDESC_UNLOCK(p->p_fd);
386
387 /*
388 * Malloc things before we need locks.
389 */
390 newcred = crget();
391 euip = uifind(attr.va_uid);
392 i = imgp->endargs - imgp->stringbase;
393 if (ps_arg_cache_limit >= i + sizeof(struct pargs))
394 newargs = pargs_alloc(i);
395
396 /* close files on exec */
397 fdcloseexec(td);
398
399 /* Get a reference to the vnode prior to locking the proc */
400 VREF(ndp->ni_vp);
401
402 /*
403 * For security and other reasons, signal handlers cannot
404 * be shared after an exec. The new process gets a copy of the old
405 * handlers. In execsigs(), the new process will have its signals
406 * reset.
407 */
408 PROC_LOCK(p);
409 mp_fixme("procsig needs a lock");
410 if (p->p_procsig->ps_refcnt > 1) {
411 oldprocsig = p->p_procsig;
412 PROC_UNLOCK(p);
413 MALLOC(newprocsig, struct procsig *, sizeof(struct procsig),
414 M_SUBPROC, M_WAITOK);
415 bcopy(oldprocsig, newprocsig, sizeof(*newprocsig));
416 newprocsig->ps_refcnt = 1;
417 oldprocsig->ps_refcnt--;
418 PROC_LOCK(p);
419 p->p_procsig = newprocsig;
420 if (p->p_sigacts == &p->p_uarea->u_sigacts)
421 panic("shared procsig but private sigacts?");
422
423 p->p_uarea->u_sigacts = *p->p_sigacts;
424 p->p_sigacts = &p->p_uarea->u_sigacts;
425 }
426 /* Stop profiling */
427 stopprofclock(p);
428
429 /* reset caught signals */
430 execsigs(p);
431
432 /* name this process - nameiexec(p, ndp) */
433 len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN);
434 bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len);
435 p->p_comm[len] = 0;
436
437 /*
438 * mark as execed, wakeup the process that vforked (if any) and tell
439 * it that it now has its own resources back
440 */
441 p->p_flag |= P_EXEC;
442 if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
443 p->p_flag &= ~P_PPWAIT;
444 wakeup(p->p_pptr);
445 }
446
447 /*
448 * Implement image setuid/setgid.
449 *
450 * Don't honor setuid/setgid if the filesystem prohibits it or if
451 * the process is being traced.
452 *
453 * XXXMAC: For the time being, use NOSUID to also prohibit
454 * transitions on the file system.
435 */
436 oldcred = p->p_ucred;
437 credential_changing = 0;
438 credential_changing |= (attr.va_mode & VSUID) && oldcred->cr_uid !=
439 attr.va_uid;
440 credential_changing |= (attr.va_mode & VSGID) && oldcred->cr_gid !=
441 attr.va_gid;
442#ifdef MAC
455 */
456 oldcred = p->p_ucred;
457 credential_changing = 0;
458 credential_changing |= (attr.va_mode & VSUID) && oldcred->cr_uid !=
459 attr.va_uid;
460 credential_changing |= (attr.va_mode & VSGID) && oldcred->cr_gid !=
461 attr.va_gid;
462#ifdef MAC
443 will_transition = mac_execve_will_transition(oldcred, imgp->vp);
463 will_transition = mac_execve_will_transition(oldcred, imgp->vp,
464 interplabelvalid ? &interplabel : NULL, imgp);
444 credential_changing |= will_transition;
445#endif
446
447 if (credential_changing &&
448 (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
449 (p->p_flag & P_TRACED) == 0) {
450 /*
451 * Turn off syscall tracing for set-id programs, except for
452 * root. Record any set-id flags first to make sure that
453 * we do not regain any tracing during a possible block.
454 */
455 setsugid(p);
456#ifdef KTRACE
457 if (p->p_tracep && suser_cred(oldcred, PRISON_ROOT)) {
458 mtx_lock(&ktrace_mtx);
459 p->p_traceflag = 0;
460 tracevp = p->p_tracep;
461 p->p_tracep = NULL;
462 mtx_unlock(&ktrace_mtx);
463 }
464#endif
465 /*
466 * Close any file descriptors 0..2 that reference procfs,
467 * then make sure file descriptors 0..2 are in use.
468 *
469 * setugidsafety() may call closef() and then pfind()
470 * which may grab the process lock.
471 * fdcheckstd() may call falloc() which may block to
472 * allocate memory, so temporarily drop the process lock.
473 */
474 PROC_UNLOCK(p);
475 setugidsafety(td);
476 error = fdcheckstd(td);
477 if (error != 0)
478 goto done1;
479 PROC_LOCK(p);
480 /*
481 * Set the new credentials.
482 */
483 crcopy(newcred, oldcred);
484 if (attr.va_mode & VSUID)
485 change_euid(newcred, euip);
486 if (attr.va_mode & VSGID)
487 change_egid(newcred, attr.va_gid);
488#ifdef MAC
465 credential_changing |= will_transition;
466#endif
467
468 if (credential_changing &&
469 (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
470 (p->p_flag & P_TRACED) == 0) {
471 /*
472 * Turn off syscall tracing for set-id programs, except for
473 * root. Record any set-id flags first to make sure that
474 * we do not regain any tracing during a possible block.
475 */
476 setsugid(p);
477#ifdef KTRACE
478 if (p->p_tracep && suser_cred(oldcred, PRISON_ROOT)) {
479 mtx_lock(&ktrace_mtx);
480 p->p_traceflag = 0;
481 tracevp = p->p_tracep;
482 p->p_tracep = NULL;
483 mtx_unlock(&ktrace_mtx);
484 }
485#endif
486 /*
487 * Close any file descriptors 0..2 that reference procfs,
488 * then make sure file descriptors 0..2 are in use.
489 *
490 * setugidsafety() may call closef() and then pfind()
491 * which may grab the process lock.
492 * fdcheckstd() may call falloc() which may block to
493 * allocate memory, so temporarily drop the process lock.
494 */
495 PROC_UNLOCK(p);
496 setugidsafety(td);
497 error = fdcheckstd(td);
498 if (error != 0)
499 goto done1;
500 PROC_LOCK(p);
501 /*
502 * Set the new credentials.
503 */
504 crcopy(newcred, oldcred);
505 if (attr.va_mode & VSUID)
506 change_euid(newcred, euip);
507 if (attr.va_mode & VSGID)
508 change_egid(newcred, attr.va_gid);
509#ifdef MAC
489 if (will_transition)
490 mac_execve_transition(oldcred, newcred, imgp->vp);
510 if (will_transition) {
511 mac_execve_transition(oldcred, newcred, imgp->vp,
512 interplabelvalid ? &interplabel : NULL, imgp);
513 }
491#endif
492 /*
493 * Implement correct POSIX saved-id behavior.
494 *
495 * XXXMAC: Note that the current logic will save the
496 * uid and gid if a MAC domain transition occurs, even
497 * though maybe it shouldn't.
498 */
499 change_svuid(newcred, newcred->cr_uid);
500 change_svgid(newcred, newcred->cr_gid);
501 p->p_ucred = newcred;
502 newcred = NULL;
503 } else {
504 if (oldcred->cr_uid == oldcred->cr_ruid &&
505 oldcred->cr_gid == oldcred->cr_rgid)
506 p->p_flag &= ~P_SUGID;
507 /*
508 * Implement correct POSIX saved-id behavior.
509 *
510 * XXX: It's not clear that the existing behavior is
511 * POSIX-compliant. A number of sources indicate that the
512 * saved uid/gid should only be updated if the new ruid is
513 * not equal to the old ruid, or the new euid is not equal
514 * to the old euid and the new euid is not equal to the old
515 * ruid. The FreeBSD code always updates the saved uid/gid.
516 * Also, this code uses the new (replaced) euid and egid as
517 * the source, which may or may not be the right ones to use.
518 */
519 if (oldcred->cr_svuid != oldcred->cr_uid ||
520 oldcred->cr_svgid != oldcred->cr_gid) {
521 crcopy(newcred, oldcred);
522 change_svuid(newcred, newcred->cr_uid);
523 change_svgid(newcred, newcred->cr_gid);
524 p->p_ucred = newcred;
525 newcred = NULL;
526 }
527 }
528
529 /*
530 * Store the vp for use in procfs. This vnode was referenced prior
531 * to locking the proc lock.
532 */
533 textvp = p->p_textvp;
534 p->p_textvp = ndp->ni_vp;
535
536 /*
537 * Notify others that we exec'd, and clear the P_INEXEC flag
538 * as we're now a bona fide freshly-execed process.
539 */
540 KNOTE(&p->p_klist, NOTE_EXEC);
541 p->p_flag &= ~P_INEXEC;
542
543 /*
544 * If tracing the process, trap to debugger so breakpoints
545 * can be set before the program executes.
546 */
547 _STOPEVENT(p, S_EXEC, 0);
548
549 if (p->p_flag & P_TRACED)
550 psignal(p, SIGTRAP);
551
552 /* clear "fork but no exec" flag, as we _are_ execing */
553 p->p_acflag &= ~AFORK;
554
555 /* Free any previous argument cache */
556 oldargs = p->p_args;
557 p->p_args = NULL;
558
559 /* Cache arguments if they fit inside our allowance */
560 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
561 bcopy(imgp->stringbase, newargs->ar_args, i);
562 p->p_args = newargs;
563 newargs = NULL;
564 }
565 PROC_UNLOCK(p);
566
567 /* Set values passed into the program in registers. */
568 if (p->p_sysent->sv_setregs)
569 (*p->p_sysent->sv_setregs)(td, imgp->entry_addr,
570 (u_long)(uintptr_t)stack_base, imgp->ps_strings);
571 else
572 exec_setregs(td, imgp->entry_addr,
573 (u_long)(uintptr_t)stack_base, imgp->ps_strings);
574
575done1:
576 /*
577 * Free any resources malloc'd earlier that we didn't use.
578 */
579 uifree(euip);
580 if (newcred == NULL)
581 crfree(oldcred);
582 else
583 crfree(newcred);
584 /*
585 * Handle deferred decrement of ref counts.
586 */
587 if (textvp != NULL)
588 vrele(textvp);
589 if (ndp->ni_vp && error != 0)
590 vrele(ndp->ni_vp);
591#ifdef KTRACE
592 if (tracevp != NULL)
593 vrele(tracevp);
594#endif
595 if (oldargs != NULL)
596 pargs_drop(oldargs);
597 if (newargs != NULL)
598 pargs_drop(newargs);
599
600exec_fail_dealloc:
601
602 /*
603 * free various allocated resources
604 */
605 if (imgp->firstpage)
606 exec_unmap_first_page(imgp);
607
608 if (imgp->stringbase != NULL)
609 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
610 ARG_MAX + PAGE_SIZE);
611
612 if (imgp->vp) {
613 NDFREE(ndp, NDF_ONLY_PNBUF);
614 vput(imgp->vp);
615 }
616
617 if (imgp->object)
618 vm_object_deallocate(imgp->object);
619
620 if (error == 0)
621 goto done2;
622
623exec_fail:
624 /* we're done here, clear P_INEXEC */
625 PROC_LOCK(p);
626 p->p_flag &= ~P_INEXEC;
627 PROC_UNLOCK(p);
628
629 if (imgp->vmspace_destroyed) {
630 /* sorry, no more process anymore. exit gracefully */
514#endif
515 /*
516 * Implement correct POSIX saved-id behavior.
517 *
518 * XXXMAC: Note that the current logic will save the
519 * uid and gid if a MAC domain transition occurs, even
520 * though maybe it shouldn't.
521 */
522 change_svuid(newcred, newcred->cr_uid);
523 change_svgid(newcred, newcred->cr_gid);
524 p->p_ucred = newcred;
525 newcred = NULL;
526 } else {
527 if (oldcred->cr_uid == oldcred->cr_ruid &&
528 oldcred->cr_gid == oldcred->cr_rgid)
529 p->p_flag &= ~P_SUGID;
530 /*
531 * Implement correct POSIX saved-id behavior.
532 *
533 * XXX: It's not clear that the existing behavior is
534 * POSIX-compliant. A number of sources indicate that the
535 * saved uid/gid should only be updated if the new ruid is
536 * not equal to the old ruid, or the new euid is not equal
537 * to the old euid and the new euid is not equal to the old
538 * ruid. The FreeBSD code always updates the saved uid/gid.
539 * Also, this code uses the new (replaced) euid and egid as
540 * the source, which may or may not be the right ones to use.
541 */
542 if (oldcred->cr_svuid != oldcred->cr_uid ||
543 oldcred->cr_svgid != oldcred->cr_gid) {
544 crcopy(newcred, oldcred);
545 change_svuid(newcred, newcred->cr_uid);
546 change_svgid(newcred, newcred->cr_gid);
547 p->p_ucred = newcred;
548 newcred = NULL;
549 }
550 }
551
552 /*
553 * Store the vp for use in procfs. This vnode was referenced prior
554 * to locking the proc lock.
555 */
556 textvp = p->p_textvp;
557 p->p_textvp = ndp->ni_vp;
558
559 /*
560 * Notify others that we exec'd, and clear the P_INEXEC flag
561 * as we're now a bona fide freshly-execed process.
562 */
563 KNOTE(&p->p_klist, NOTE_EXEC);
564 p->p_flag &= ~P_INEXEC;
565
566 /*
567 * If tracing the process, trap to debugger so breakpoints
568 * can be set before the program executes.
569 */
570 _STOPEVENT(p, S_EXEC, 0);
571
572 if (p->p_flag & P_TRACED)
573 psignal(p, SIGTRAP);
574
575 /* clear "fork but no exec" flag, as we _are_ execing */
576 p->p_acflag &= ~AFORK;
577
578 /* Free any previous argument cache */
579 oldargs = p->p_args;
580 p->p_args = NULL;
581
582 /* Cache arguments if they fit inside our allowance */
583 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
584 bcopy(imgp->stringbase, newargs->ar_args, i);
585 p->p_args = newargs;
586 newargs = NULL;
587 }
588 PROC_UNLOCK(p);
589
590 /* Set values passed into the program in registers. */
591 if (p->p_sysent->sv_setregs)
592 (*p->p_sysent->sv_setregs)(td, imgp->entry_addr,
593 (u_long)(uintptr_t)stack_base, imgp->ps_strings);
594 else
595 exec_setregs(td, imgp->entry_addr,
596 (u_long)(uintptr_t)stack_base, imgp->ps_strings);
597
598done1:
599 /*
600 * Free any resources malloc'd earlier that we didn't use.
601 */
602 uifree(euip);
603 if (newcred == NULL)
604 crfree(oldcred);
605 else
606 crfree(newcred);
607 /*
608 * Handle deferred decrement of ref counts.
609 */
610 if (textvp != NULL)
611 vrele(textvp);
612 if (ndp->ni_vp && error != 0)
613 vrele(ndp->ni_vp);
614#ifdef KTRACE
615 if (tracevp != NULL)
616 vrele(tracevp);
617#endif
618 if (oldargs != NULL)
619 pargs_drop(oldargs);
620 if (newargs != NULL)
621 pargs_drop(newargs);
622
623exec_fail_dealloc:
624
625 /*
626 * free various allocated resources
627 */
628 if (imgp->firstpage)
629 exec_unmap_first_page(imgp);
630
631 if (imgp->stringbase != NULL)
632 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
633 ARG_MAX + PAGE_SIZE);
634
635 if (imgp->vp) {
636 NDFREE(ndp, NDF_ONLY_PNBUF);
637 vput(imgp->vp);
638 }
639
640 if (imgp->object)
641 vm_object_deallocate(imgp->object);
642
643 if (error == 0)
644 goto done2;
645
646exec_fail:
647 /* we're done here, clear P_INEXEC */
648 PROC_LOCK(p);
649 p->p_flag &= ~P_INEXEC;
650 PROC_UNLOCK(p);
651
652 if (imgp->vmspace_destroyed) {
653 /* sorry, no more process anymore. exit gracefully */
654#ifdef MAC
655 mac_execve_exit(imgp);
656 if (interplabelvalid)
657 mac_destroy_vnode_label(&interplabel);
658#endif
631 exit1(td, W_EXITCODE(0, SIGABRT));
632 /* NOT REACHED */
633 error = 0;
634 }
635done2:
659 exit1(td, W_EXITCODE(0, SIGABRT));
660 /* NOT REACHED */
661 error = 0;
662 }
663done2:
664#ifdef MAC
665 mac_execve_exit(imgp);
666 if (interplabelvalid)
667 mac_destroy_vnode_label(&interplabel);
668#endif
636 mtx_unlock(&Giant);
637 return (error);
638}
639
640#ifndef _SYS_SYSPROTO_H_
641struct execve_args {
642 char *fname;
643 char **argv;
644 char **envv;
645};
646#endif
647
648/*
649 * MPSAFE
650 */
651int
652execve(td, uap)
653 struct thread *td;
654 struct execve_args /* {
655 syscallarg(char *) fname;
656 syscallarg(char **) argv;
657 syscallarg(char **) envv;
658 } */ *uap;
659{
660
669 mtx_unlock(&Giant);
670 return (error);
671}
672
673#ifndef _SYS_SYSPROTO_H_
674struct execve_args {
675 char *fname;
676 char **argv;
677 char **envv;
678};
679#endif
680
681/*
682 * MPSAFE
683 */
684int
685execve(td, uap)
686 struct thread *td;
687 struct execve_args /* {
688 syscallarg(char *) fname;
689 syscallarg(char **) argv;
690 syscallarg(char **) envv;
691 } */ *uap;
692{
693
661 return (kern_execve(td, uap->fname, uap->argv, uap->envv));
694#ifdef MAC
695 return (kern_execve(td, uap->fname, uap->argv, uap->envv, NULL));
696#else
697 return (ENOSYS);
698#endif
662}
663
699}
700
701#ifndef _SYS_SYSPROTO_H_
702struct __mac_execve_args {
703 char *fname;
704 char **argv;
705 char **envv;
706 struct mac *mac_p;
707};
708#endif
709
710/*
711 * MPSAFE
712 */
664int
713int
714__mac_execve(td, uap)
715 struct thread *td;
716 struct __mac_execve_args /* {
717 syscallarg(char *) fname;
718 syscallarg(char **) argv;
719 syscallarg(char **) envv;
720 syscallarg(struct mac *) mac_p;
721 } */ *uap;
722{
723
724 return (kern_execve(td, uap->fname, uap->argv, uap->envv,
725 uap->mac_p));
726}
727
728int
665exec_map_first_page(imgp)
666 struct image_params *imgp;
667{
668 int rv, i;
669 int initial_pagein;
670 vm_page_t ma[VM_INITIAL_PAGEIN];
671 vm_object_t object;
672
673 GIANT_REQUIRED;
674
675 if (imgp->firstpage) {
676 exec_unmap_first_page(imgp);
677 }
678
679 VOP_GETVOBJECT(imgp->vp, &object);
680
681 ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
682
683 if ((ma[0]->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
684 initial_pagein = VM_INITIAL_PAGEIN;
685 if (initial_pagein > object->size)
686 initial_pagein = object->size;
687 for (i = 1; i < initial_pagein; i++) {
688 if ((ma[i] = vm_page_lookup(object, i)) != NULL) {
689 if ((ma[i]->flags & PG_BUSY) || ma[i]->busy)
690 break;
691 if (ma[i]->valid)
692 break;
693 vm_page_busy(ma[i]);
694 } else {
695 ma[i] = vm_page_alloc(object, i,
696 VM_ALLOC_NORMAL);
697 if (ma[i] == NULL)
698 break;
699 }
700 }
701 initial_pagein = i;
702
703 rv = vm_pager_get_pages(object, ma, initial_pagein, 0);
704 ma[0] = vm_page_lookup(object, 0);
705
706 if ((rv != VM_PAGER_OK) || (ma[0] == NULL) ||
707 (ma[0]->valid == 0)) {
708 if (ma[0]) {
709 vm_page_lock_queues();
710 vm_page_protect(ma[0], VM_PROT_NONE);
711 vm_page_free(ma[0]);
712 vm_page_unlock_queues();
713 }
714 return (EIO);
715 }
716 }
717 vm_page_lock_queues();
718 vm_page_wire(ma[0]);
719 vm_page_wakeup(ma[0]);
720 vm_page_unlock_queues();
721
722 pmap_qenter((vm_offset_t)imgp->image_header, ma, 1);
723 imgp->firstpage = ma[0];
724
725 return (0);
726}
727
728void
729exec_unmap_first_page(imgp)
730 struct image_params *imgp;
731{
732 GIANT_REQUIRED;
733
734 if (imgp->firstpage) {
735 pmap_qremove((vm_offset_t)imgp->image_header, 1);
736 vm_page_lock_queues();
737 vm_page_unwire(imgp->firstpage, 1);
738 vm_page_unlock_queues();
739 imgp->firstpage = NULL;
740 }
741}
742
743/*
744 * Destroy old address space, and allocate a new stack
745 * The new stack is only SGROWSIZ large because it is grown
746 * automatically in trap.c.
747 */
748int
749exec_new_vmspace(imgp, sv)
750 struct image_params *imgp;
751 struct sysentvec *sv;
752{
753 int error;
754 struct execlist *ep;
755 struct proc *p = imgp->proc;
756 struct vmspace *vmspace = p->p_vmspace;
757 vm_offset_t stack_addr;
758 vm_map_t map;
759
760 GIANT_REQUIRED;
761
762 stack_addr = sv->sv_usrstack - maxssiz;
763
764 imgp->vmspace_destroyed = 1;
765
766 /*
767 * Perform functions registered with at_exec().
768 */
769 TAILQ_FOREACH(ep, &exec_list, next)
770 (*ep->function)(p);
771
772 /*
773 * Blow away entire process VM, if address space not shared,
774 * otherwise, create a new VM space so that other threads are
775 * not disrupted
776 */
777 map = &vmspace->vm_map;
778 if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv->sv_minuser &&
779 vm_map_max(map) == sv->sv_maxuser) {
780 if (vmspace->vm_shm)
781 shmexit(p);
782 pmap_remove_pages(vmspace_pmap(vmspace), vm_map_min(map),
783 vm_map_max(map));
784 vm_map_remove(map, vm_map_min(map), vm_map_max(map));
785 } else {
786 vmspace_exec(p, sv->sv_minuser, sv->sv_maxuser);
787 vmspace = p->p_vmspace;
788 map = &vmspace->vm_map;
789 }
790
791 /* Allocate a new stack */
792 error = vm_map_stack(map, stack_addr, (vm_size_t)maxssiz,
793 sv->sv_stackprot, VM_PROT_ALL, 0);
794 if (error)
795 return (error);
796
797#ifdef __ia64__
798 {
799 /*
800 * Allocate backing store. We really need something
801 * similar to vm_map_stack which can allow the backing
802 * store to grow upwards. This will do for now.
803 */
804 vm_offset_t bsaddr;
805 bsaddr = p->p_sysent->sv_usrstack - 2 * maxssiz;
806 error = vm_map_find(map, 0, 0, &bsaddr,
807 regstkpages * PAGE_SIZE, 0, VM_PROT_ALL, VM_PROT_ALL, 0);
808 FIRST_THREAD_IN_PROC(p)->td_md.md_bspstore = bsaddr;
809 }
810#endif
811
812 /* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the
813 * VM_STACK case, but they are still used to monitor the size of the
814 * process stack so we can check the stack rlimit.
815 */
816 vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
817 vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - maxssiz;
818
819 return (0);
820}
821
822/*
823 * Copy out argument and environment strings from the old process
824 * address space into the temporary string buffer.
825 */
826int
827exec_extract_strings(imgp)
828 struct image_params *imgp;
829{
830 char **argv, **envv;
831 char *argp, *envp;
832 int error;
833 size_t length;
834
835 /*
836 * extract arguments first
837 */
838
839 argv = imgp->userspace_argv;
840
841 if (argv) {
842 argp = (caddr_t)(intptr_t)fuword(argv);
843 if (argp == (caddr_t)-1)
844 return (EFAULT);
845 if (argp)
846 argv++;
847 if (imgp->argv0)
848 argp = imgp->argv0;
849 if (argp) {
850 do {
851 if (argp == (caddr_t)-1)
852 return (EFAULT);
853 if ((error = copyinstr(argp, imgp->stringp,
854 imgp->stringspace, &length))) {
855 if (error == ENAMETOOLONG)
856 return (E2BIG);
857 return (error);
858 }
859 imgp->stringspace -= length;
860 imgp->stringp += length;
861 imgp->argc++;
862 } while ((argp = (caddr_t)(intptr_t)fuword(argv++)));
863 }
864 }
865
866 imgp->endargs = imgp->stringp;
867
868 /*
869 * extract environment strings
870 */
871
872 envv = imgp->userspace_envv;
873
874 if (envv) {
875 while ((envp = (caddr_t)(intptr_t)fuword(envv++))) {
876 if (envp == (caddr_t)-1)
877 return (EFAULT);
878 if ((error = copyinstr(envp, imgp->stringp,
879 imgp->stringspace, &length))) {
880 if (error == ENAMETOOLONG)
881 return (E2BIG);
882 return (error);
883 }
884 imgp->stringspace -= length;
885 imgp->stringp += length;
886 imgp->envc++;
887 }
888 }
889
890 return (0);
891}
892
893/*
894 * Copy strings out to the new process address space, constructing
895 * new arg and env vector tables. Return a pointer to the base
896 * so that it can be used as the initial stack pointer.
897 */
898register_t *
899exec_copyout_strings(imgp)
900 struct image_params *imgp;
901{
902 int argc, envc;
903 char **vectp;
904 char *stringp, *destp;
905 register_t *stack_base;
906 struct ps_strings *arginfo;
907 struct proc *p;
908 int szsigcode;
909
910 /*
911 * Calculate string base and vector table pointers.
912 * Also deal with signal trampoline code for this exec type.
913 */
914 p = imgp->proc;
915 szsigcode = 0;
916 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
917 if (p->p_sysent->sv_szsigcode != NULL)
918 szsigcode = *(p->p_sysent->sv_szsigcode);
919 destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE -
920 roundup((ARG_MAX - imgp->stringspace), sizeof(char *));
921
922 /*
923 * install sigcode
924 */
925 if (szsigcode)
926 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
927 szsigcode), szsigcode);
928
929 /*
930 * If we have a valid auxargs ptr, prepare some room
931 * on the stack.
932 */
933 if (imgp->auxargs) {
934 /*
935 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
936 * lower compatibility.
937 */
938 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
939 (AT_COUNT * 2);
940 /*
941 * The '+ 2' is for the null pointers at the end of each of
942 * the arg and env vector sets,and imgp->auxarg_size is room
943 * for argument of Runtime loader.
944 */
945 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2 +
946 imgp->auxarg_size) * sizeof(char *));
947
948 } else
949 /*
950 * The '+ 2' is for the null pointers at the end of each of
951 * the arg and env vector sets
952 */
953 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2) *
954 sizeof(char *));
955
956 /*
957 * vectp also becomes our initial stack base
958 */
959 stack_base = (register_t *)vectp;
960
961 stringp = imgp->stringbase;
962 argc = imgp->argc;
963 envc = imgp->envc;
964
965 /*
966 * Copy out strings - arguments and environment.
967 */
968 copyout(stringp, destp, ARG_MAX - imgp->stringspace);
969
970 /*
971 * Fill in "ps_strings" struct for ps, w, etc.
972 */
973 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
974 suword(&arginfo->ps_nargvstr, argc);
975
976 /*
977 * Fill in argument portion of vector table.
978 */
979 for (; argc > 0; --argc) {
980 suword(vectp++, (long)(intptr_t)destp);
981 while (*stringp++ != 0)
982 destp++;
983 destp++;
984 }
985
986 /* a null vector table pointer separates the argp's from the envp's */
987 suword(vectp++, 0);
988
989 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
990 suword(&arginfo->ps_nenvstr, envc);
991
992 /*
993 * Fill in environment portion of vector table.
994 */
995 for (; envc > 0; --envc) {
996 suword(vectp++, (long)(intptr_t)destp);
997 while (*stringp++ != 0)
998 destp++;
999 destp++;
1000 }
1001
1002 /* end of vector table is a null pointer */
1003 suword(vectp, 0);
1004
1005 return (stack_base);
1006}
1007
1008/*
1009 * Check permissions of file to execute.
1010 * Called with imgp->vp locked.
1011 * Return 0 for success or error code on failure.
1012 */
1013int
1014exec_check_permissions(imgp)
1015 struct image_params *imgp;
1016{
1017 struct vnode *vp = imgp->vp;
1018 struct vattr *attr = imgp->attr;
1019 struct thread *td;
1020 int error;
1021
1022 td = curthread; /* XXXKSE */
1023
1024#ifdef MAC
729exec_map_first_page(imgp)
730 struct image_params *imgp;
731{
732 int rv, i;
733 int initial_pagein;
734 vm_page_t ma[VM_INITIAL_PAGEIN];
735 vm_object_t object;
736
737 GIANT_REQUIRED;
738
739 if (imgp->firstpage) {
740 exec_unmap_first_page(imgp);
741 }
742
743 VOP_GETVOBJECT(imgp->vp, &object);
744
745 ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
746
747 if ((ma[0]->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
748 initial_pagein = VM_INITIAL_PAGEIN;
749 if (initial_pagein > object->size)
750 initial_pagein = object->size;
751 for (i = 1; i < initial_pagein; i++) {
752 if ((ma[i] = vm_page_lookup(object, i)) != NULL) {
753 if ((ma[i]->flags & PG_BUSY) || ma[i]->busy)
754 break;
755 if (ma[i]->valid)
756 break;
757 vm_page_busy(ma[i]);
758 } else {
759 ma[i] = vm_page_alloc(object, i,
760 VM_ALLOC_NORMAL);
761 if (ma[i] == NULL)
762 break;
763 }
764 }
765 initial_pagein = i;
766
767 rv = vm_pager_get_pages(object, ma, initial_pagein, 0);
768 ma[0] = vm_page_lookup(object, 0);
769
770 if ((rv != VM_PAGER_OK) || (ma[0] == NULL) ||
771 (ma[0]->valid == 0)) {
772 if (ma[0]) {
773 vm_page_lock_queues();
774 vm_page_protect(ma[0], VM_PROT_NONE);
775 vm_page_free(ma[0]);
776 vm_page_unlock_queues();
777 }
778 return (EIO);
779 }
780 }
781 vm_page_lock_queues();
782 vm_page_wire(ma[0]);
783 vm_page_wakeup(ma[0]);
784 vm_page_unlock_queues();
785
786 pmap_qenter((vm_offset_t)imgp->image_header, ma, 1);
787 imgp->firstpage = ma[0];
788
789 return (0);
790}
791
792void
793exec_unmap_first_page(imgp)
794 struct image_params *imgp;
795{
796 GIANT_REQUIRED;
797
798 if (imgp->firstpage) {
799 pmap_qremove((vm_offset_t)imgp->image_header, 1);
800 vm_page_lock_queues();
801 vm_page_unwire(imgp->firstpage, 1);
802 vm_page_unlock_queues();
803 imgp->firstpage = NULL;
804 }
805}
806
807/*
808 * Destroy old address space, and allocate a new stack
809 * The new stack is only SGROWSIZ large because it is grown
810 * automatically in trap.c.
811 */
812int
813exec_new_vmspace(imgp, sv)
814 struct image_params *imgp;
815 struct sysentvec *sv;
816{
817 int error;
818 struct execlist *ep;
819 struct proc *p = imgp->proc;
820 struct vmspace *vmspace = p->p_vmspace;
821 vm_offset_t stack_addr;
822 vm_map_t map;
823
824 GIANT_REQUIRED;
825
826 stack_addr = sv->sv_usrstack - maxssiz;
827
828 imgp->vmspace_destroyed = 1;
829
830 /*
831 * Perform functions registered with at_exec().
832 */
833 TAILQ_FOREACH(ep, &exec_list, next)
834 (*ep->function)(p);
835
836 /*
837 * Blow away entire process VM, if address space not shared,
838 * otherwise, create a new VM space so that other threads are
839 * not disrupted
840 */
841 map = &vmspace->vm_map;
842 if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv->sv_minuser &&
843 vm_map_max(map) == sv->sv_maxuser) {
844 if (vmspace->vm_shm)
845 shmexit(p);
846 pmap_remove_pages(vmspace_pmap(vmspace), vm_map_min(map),
847 vm_map_max(map));
848 vm_map_remove(map, vm_map_min(map), vm_map_max(map));
849 } else {
850 vmspace_exec(p, sv->sv_minuser, sv->sv_maxuser);
851 vmspace = p->p_vmspace;
852 map = &vmspace->vm_map;
853 }
854
855 /* Allocate a new stack */
856 error = vm_map_stack(map, stack_addr, (vm_size_t)maxssiz,
857 sv->sv_stackprot, VM_PROT_ALL, 0);
858 if (error)
859 return (error);
860
861#ifdef __ia64__
862 {
863 /*
864 * Allocate backing store. We really need something
865 * similar to vm_map_stack which can allow the backing
866 * store to grow upwards. This will do for now.
867 */
868 vm_offset_t bsaddr;
869 bsaddr = p->p_sysent->sv_usrstack - 2 * maxssiz;
870 error = vm_map_find(map, 0, 0, &bsaddr,
871 regstkpages * PAGE_SIZE, 0, VM_PROT_ALL, VM_PROT_ALL, 0);
872 FIRST_THREAD_IN_PROC(p)->td_md.md_bspstore = bsaddr;
873 }
874#endif
875
876 /* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the
877 * VM_STACK case, but they are still used to monitor the size of the
878 * process stack so we can check the stack rlimit.
879 */
880 vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
881 vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - maxssiz;
882
883 return (0);
884}
885
886/*
887 * Copy out argument and environment strings from the old process
888 * address space into the temporary string buffer.
889 */
890int
891exec_extract_strings(imgp)
892 struct image_params *imgp;
893{
894 char **argv, **envv;
895 char *argp, *envp;
896 int error;
897 size_t length;
898
899 /*
900 * extract arguments first
901 */
902
903 argv = imgp->userspace_argv;
904
905 if (argv) {
906 argp = (caddr_t)(intptr_t)fuword(argv);
907 if (argp == (caddr_t)-1)
908 return (EFAULT);
909 if (argp)
910 argv++;
911 if (imgp->argv0)
912 argp = imgp->argv0;
913 if (argp) {
914 do {
915 if (argp == (caddr_t)-1)
916 return (EFAULT);
917 if ((error = copyinstr(argp, imgp->stringp,
918 imgp->stringspace, &length))) {
919 if (error == ENAMETOOLONG)
920 return (E2BIG);
921 return (error);
922 }
923 imgp->stringspace -= length;
924 imgp->stringp += length;
925 imgp->argc++;
926 } while ((argp = (caddr_t)(intptr_t)fuword(argv++)));
927 }
928 }
929
930 imgp->endargs = imgp->stringp;
931
932 /*
933 * extract environment strings
934 */
935
936 envv = imgp->userspace_envv;
937
938 if (envv) {
939 while ((envp = (caddr_t)(intptr_t)fuword(envv++))) {
940 if (envp == (caddr_t)-1)
941 return (EFAULT);
942 if ((error = copyinstr(envp, imgp->stringp,
943 imgp->stringspace, &length))) {
944 if (error == ENAMETOOLONG)
945 return (E2BIG);
946 return (error);
947 }
948 imgp->stringspace -= length;
949 imgp->stringp += length;
950 imgp->envc++;
951 }
952 }
953
954 return (0);
955}
956
957/*
958 * Copy strings out to the new process address space, constructing
959 * new arg and env vector tables. Return a pointer to the base
960 * so that it can be used as the initial stack pointer.
961 */
962register_t *
963exec_copyout_strings(imgp)
964 struct image_params *imgp;
965{
966 int argc, envc;
967 char **vectp;
968 char *stringp, *destp;
969 register_t *stack_base;
970 struct ps_strings *arginfo;
971 struct proc *p;
972 int szsigcode;
973
974 /*
975 * Calculate string base and vector table pointers.
976 * Also deal with signal trampoline code for this exec type.
977 */
978 p = imgp->proc;
979 szsigcode = 0;
980 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
981 if (p->p_sysent->sv_szsigcode != NULL)
982 szsigcode = *(p->p_sysent->sv_szsigcode);
983 destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE -
984 roundup((ARG_MAX - imgp->stringspace), sizeof(char *));
985
986 /*
987 * install sigcode
988 */
989 if (szsigcode)
990 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
991 szsigcode), szsigcode);
992
993 /*
994 * If we have a valid auxargs ptr, prepare some room
995 * on the stack.
996 */
997 if (imgp->auxargs) {
998 /*
999 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
1000 * lower compatibility.
1001 */
1002 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
1003 (AT_COUNT * 2);
1004 /*
1005 * The '+ 2' is for the null pointers at the end of each of
1006 * the arg and env vector sets,and imgp->auxarg_size is room
1007 * for argument of Runtime loader.
1008 */
1009 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2 +
1010 imgp->auxarg_size) * sizeof(char *));
1011
1012 } else
1013 /*
1014 * The '+ 2' is for the null pointers at the end of each of
1015 * the arg and env vector sets
1016 */
1017 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2) *
1018 sizeof(char *));
1019
1020 /*
1021 * vectp also becomes our initial stack base
1022 */
1023 stack_base = (register_t *)vectp;
1024
1025 stringp = imgp->stringbase;
1026 argc = imgp->argc;
1027 envc = imgp->envc;
1028
1029 /*
1030 * Copy out strings - arguments and environment.
1031 */
1032 copyout(stringp, destp, ARG_MAX - imgp->stringspace);
1033
1034 /*
1035 * Fill in "ps_strings" struct for ps, w, etc.
1036 */
1037 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
1038 suword(&arginfo->ps_nargvstr, argc);
1039
1040 /*
1041 * Fill in argument portion of vector table.
1042 */
1043 for (; argc > 0; --argc) {
1044 suword(vectp++, (long)(intptr_t)destp);
1045 while (*stringp++ != 0)
1046 destp++;
1047 destp++;
1048 }
1049
1050 /* a null vector table pointer separates the argp's from the envp's */
1051 suword(vectp++, 0);
1052
1053 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
1054 suword(&arginfo->ps_nenvstr, envc);
1055
1056 /*
1057 * Fill in environment portion of vector table.
1058 */
1059 for (; envc > 0; --envc) {
1060 suword(vectp++, (long)(intptr_t)destp);
1061 while (*stringp++ != 0)
1062 destp++;
1063 destp++;
1064 }
1065
1066 /* end of vector table is a null pointer */
1067 suword(vectp, 0);
1068
1069 return (stack_base);
1070}
1071
1072/*
1073 * Check permissions of file to execute.
1074 * Called with imgp->vp locked.
1075 * Return 0 for success or error code on failure.
1076 */
1077int
1078exec_check_permissions(imgp)
1079 struct image_params *imgp;
1080{
1081 struct vnode *vp = imgp->vp;
1082 struct vattr *attr = imgp->attr;
1083 struct thread *td;
1084 int error;
1085
1086 td = curthread; /* XXXKSE */
1087
1088#ifdef MAC
1025 error = mac_check_vnode_exec(td->td_ucred, imgp->vp);
1089 error = mac_check_vnode_exec(td->td_ucred, imgp->vp, imgp);
1026 if (error)
1027 return (error);
1028#endif
1029
1030 /* Get file attributes */
1031 error = VOP_GETATTR(vp, attr, td->td_ucred, td);
1032 if (error)
1033 return (error);
1034
1035 /*
1036 * 1) Check if file execution is disabled for the filesystem that this
1037 * file resides on.
1038 * 2) Insure that at least one execute bit is on - otherwise root
1039 * will always succeed, and we don't want to happen unless the
1040 * file really is executable.
1041 * 3) Insure that the file is a regular file.
1042 */
1043 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
1044 ((attr->va_mode & 0111) == 0) ||
1045 (attr->va_type != VREG))
1046 return (EACCES);
1047
1048 /*
1049 * Zero length files can't be exec'd
1050 */
1051 if (attr->va_size == 0)
1052 return (ENOEXEC);
1053
1054 /*
1055 * Check for execute permission to file based on current credentials.
1056 */
1057 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1058 if (error)
1059 return (error);
1060
1061 /*
1062 * Check number of open-for-writes on the file and deny execution
1063 * if there are any.
1064 */
1065 if (vp->v_writecount)
1066 return (ETXTBSY);
1067
1068 /*
1069 * Call filesystem specific open routine (which does nothing in the
1070 * general case).
1071 */
1072 error = VOP_OPEN(vp, FREAD, td->td_ucred, td);
1073 return (error);
1074}
1075
1076/*
1077 * Exec handler registration
1078 */
1079int
1080exec_register(execsw_arg)
1081 const struct execsw *execsw_arg;
1082{
1083 const struct execsw **es, **xs, **newexecsw;
1084 int count = 2; /* New slot and trailing NULL */
1085
1086 if (execsw)
1087 for (es = execsw; *es; es++)
1088 count++;
1089 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
1090 if (newexecsw == NULL)
1091 return (ENOMEM);
1092 xs = newexecsw;
1093 if (execsw)
1094 for (es = execsw; *es; es++)
1095 *xs++ = *es;
1096 *xs++ = execsw_arg;
1097 *xs = NULL;
1098 if (execsw)
1099 free(execsw, M_TEMP);
1100 execsw = newexecsw;
1101 return (0);
1102}
1103
1104int
1105exec_unregister(execsw_arg)
1106 const struct execsw *execsw_arg;
1107{
1108 const struct execsw **es, **xs, **newexecsw;
1109 int count = 1;
1110
1111 if (execsw == NULL)
1112 panic("unregister with no handlers left?\n");
1113
1114 for (es = execsw; *es; es++) {
1115 if (*es == execsw_arg)
1116 break;
1117 }
1118 if (*es == NULL)
1119 return (ENOENT);
1120 for (es = execsw; *es; es++)
1121 if (*es != execsw_arg)
1122 count++;
1123 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
1124 if (newexecsw == NULL)
1125 return (ENOMEM);
1126 xs = newexecsw;
1127 for (es = execsw; *es; es++)
1128 if (*es != execsw_arg)
1129 *xs++ = *es;
1130 *xs = NULL;
1131 if (execsw)
1132 free(execsw, M_TEMP);
1133 execsw = newexecsw;
1134 return (0);
1135}
1136
1137int
1138at_exec(function)
1139 execlist_fn function;
1140{
1141 struct execlist *ep;
1142
1143#ifdef INVARIANTS
1144 /* Be noisy if the programmer has lost track of things */
1145 if (rm_at_exec(function))
1146 printf("WARNING: exec callout entry (%p) already present\n",
1147 function);
1148#endif
1149 ep = malloc(sizeof(*ep), M_ATEXEC, M_NOWAIT);
1150 if (ep == NULL)
1151 return (ENOMEM);
1152 ep->function = function;
1153 TAILQ_INSERT_TAIL(&exec_list, ep, next);
1154 return (0);
1155}
1156
1157/*
1158 * Scan the exec callout list for the given item and remove it.
1159 * Returns the number of items removed (0 or 1)
1160 */
1161int
1162rm_at_exec(function)
1163 execlist_fn function;
1164{
1165 struct execlist *ep;
1166
1167 TAILQ_FOREACH(ep, &exec_list, next) {
1168 if (ep->function == function) {
1169 TAILQ_REMOVE(&exec_list, ep, next);
1170 free(ep, M_ATEXEC);
1171 return (1);
1172 }
1173 }
1174 return (0);
1175}
1090 if (error)
1091 return (error);
1092#endif
1093
1094 /* Get file attributes */
1095 error = VOP_GETATTR(vp, attr, td->td_ucred, td);
1096 if (error)
1097 return (error);
1098
1099 /*
1100 * 1) Check if file execution is disabled for the filesystem that this
1101 * file resides on.
1102 * 2) Insure that at least one execute bit is on - otherwise root
1103 * will always succeed, and we don't want to happen unless the
1104 * file really is executable.
1105 * 3) Insure that the file is a regular file.
1106 */
1107 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
1108 ((attr->va_mode & 0111) == 0) ||
1109 (attr->va_type != VREG))
1110 return (EACCES);
1111
1112 /*
1113 * Zero length files can't be exec'd
1114 */
1115 if (attr->va_size == 0)
1116 return (ENOEXEC);
1117
1118 /*
1119 * Check for execute permission to file based on current credentials.
1120 */
1121 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1122 if (error)
1123 return (error);
1124
1125 /*
1126 * Check number of open-for-writes on the file and deny execution
1127 * if there are any.
1128 */
1129 if (vp->v_writecount)
1130 return (ETXTBSY);
1131
1132 /*
1133 * Call filesystem specific open routine (which does nothing in the
1134 * general case).
1135 */
1136 error = VOP_OPEN(vp, FREAD, td->td_ucred, td);
1137 return (error);
1138}
1139
1140/*
1141 * Exec handler registration
1142 */
1143int
1144exec_register(execsw_arg)
1145 const struct execsw *execsw_arg;
1146{
1147 const struct execsw **es, **xs, **newexecsw;
1148 int count = 2; /* New slot and trailing NULL */
1149
1150 if (execsw)
1151 for (es = execsw; *es; es++)
1152 count++;
1153 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
1154 if (newexecsw == NULL)
1155 return (ENOMEM);
1156 xs = newexecsw;
1157 if (execsw)
1158 for (es = execsw; *es; es++)
1159 *xs++ = *es;
1160 *xs++ = execsw_arg;
1161 *xs = NULL;
1162 if (execsw)
1163 free(execsw, M_TEMP);
1164 execsw = newexecsw;
1165 return (0);
1166}
1167
1168int
1169exec_unregister(execsw_arg)
1170 const struct execsw *execsw_arg;
1171{
1172 const struct execsw **es, **xs, **newexecsw;
1173 int count = 1;
1174
1175 if (execsw == NULL)
1176 panic("unregister with no handlers left?\n");
1177
1178 for (es = execsw; *es; es++) {
1179 if (*es == execsw_arg)
1180 break;
1181 }
1182 if (*es == NULL)
1183 return (ENOENT);
1184 for (es = execsw; *es; es++)
1185 if (*es != execsw_arg)
1186 count++;
1187 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
1188 if (newexecsw == NULL)
1189 return (ENOMEM);
1190 xs = newexecsw;
1191 for (es = execsw; *es; es++)
1192 if (*es != execsw_arg)
1193 *xs++ = *es;
1194 *xs = NULL;
1195 if (execsw)
1196 free(execsw, M_TEMP);
1197 execsw = newexecsw;
1198 return (0);
1199}
1200
1201int
1202at_exec(function)
1203 execlist_fn function;
1204{
1205 struct execlist *ep;
1206
1207#ifdef INVARIANTS
1208 /* Be noisy if the programmer has lost track of things */
1209 if (rm_at_exec(function))
1210 printf("WARNING: exec callout entry (%p) already present\n",
1211 function);
1212#endif
1213 ep = malloc(sizeof(*ep), M_ATEXEC, M_NOWAIT);
1214 if (ep == NULL)
1215 return (ENOMEM);
1216 ep->function = function;
1217 TAILQ_INSERT_TAIL(&exec_list, ep, next);
1218 return (0);
1219}
1220
1221/*
1222 * Scan the exec callout list for the given item and remove it.
1223 * Returns the number of items removed (0 or 1)
1224 */
1225int
1226rm_at_exec(function)
1227 execlist_fn function;
1228{
1229 struct execlist *ep;
1230
1231 TAILQ_FOREACH(ep, &exec_list, next) {
1232 if (ep->function == function) {
1233 TAILQ_REMOVE(&exec_list, ep, next);
1234 free(ep, M_ATEXEC);
1235 return (1);
1236 }
1237 }
1238 return (0);
1239}