Deleted Added
full compact
kern_exec.c (120769) kern_exec.c (121268)
1/*
2 * Copyright (c) 1993, David Greenman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
1/*
2 * Copyright (c) 1993, David Greenman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/kern/kern_exec.c 120769 2003-10-04 22:47:20Z alc $");
28__FBSDID("$FreeBSD: head/sys/kern/kern_exec.c 121268 2003-10-20 05:34:10Z marcel $");
29
30#include "opt_ktrace.h"
31#include "opt_mac.h"
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/eventhandler.h>
36#include <sys/lock.h>
37#include <sys/mutex.h>
38#include <sys/sysproto.h>
39#include <sys/signalvar.h>
40#include <sys/kernel.h>
41#include <sys/mac.h>
42#include <sys/mount.h>
43#include <sys/filedesc.h>
44#include <sys/fcntl.h>
45#include <sys/acct.h>
46#include <sys/exec.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/wait.h>
50#include <sys/malloc.h>
51#include <sys/proc.h>
52#include <sys/pioctl.h>
53#include <sys/namei.h>
54#include <sys/sysent.h>
55#include <sys/shm.h>
56#include <sys/sysctl.h>
57#include <sys/user.h>
58#include <sys/vnode.h>
59#ifdef KTRACE
60#include <sys/ktrace.h>
61#endif
62
63#include <vm/vm.h>
64#include <vm/vm_param.h>
65#include <vm/pmap.h>
66#include <vm/vm_page.h>
67#include <vm/vm_map.h>
68#include <vm/vm_kern.h>
69#include <vm/vm_extern.h>
70#include <vm/vm_object.h>
71#include <vm/vm_pager.h>
72
73#include <machine/reg.h>
74
75MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
76
77static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
78static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
79static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);
80static int kern_execve(struct thread *td, char *fname, char **argv,
81 char **envv, struct mac *mac_p);
82
83/* XXX This should be vm_size_t. */
84SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD,
85 NULL, 0, sysctl_kern_ps_strings, "LU", "");
86
87/* XXX This should be vm_size_t. */
88SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD,
89 NULL, 0, sysctl_kern_usrstack, "LU", "");
90
91SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD,
92 NULL, 0, sysctl_kern_stackprot, "I", "");
93
94u_long ps_arg_cache_limit = PAGE_SIZE / 16;
95SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW,
96 &ps_arg_cache_limit, 0, "");
97
98int ps_argsopen = 1;
99SYSCTL_INT(_kern, OID_AUTO, ps_argsopen, CTLFLAG_RW, &ps_argsopen, 0, "");
100
101static int
102sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
103{
104 struct proc *p;
105
106 p = curproc;
107 return (SYSCTL_OUT(req, &p->p_sysent->sv_psstrings,
108 sizeof(p->p_sysent->sv_psstrings)));
109}
110
111static int
112sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
113{
114 struct proc *p;
115
116 p = curproc;
117 return (SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
118 sizeof(p->p_sysent->sv_usrstack)));
119}
120
121static int
122sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS)
123{
124 struct proc *p;
125
126 p = curproc;
127 return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot,
128 sizeof(p->p_sysent->sv_stackprot)));
129}
130
131/*
132 * Each of the items is a pointer to a `const struct execsw', hence the
133 * double pointer here.
134 */
135static const struct execsw **execsw;
136
137/*
138 * In-kernel implementation of execve(). All arguments are assumed to be
139 * userspace pointers from the passed thread.
140 *
141 * MPSAFE
142 */
143static int
144kern_execve(td, fname, argv, envv, mac_p)
145 struct thread *td;
146 char *fname;
147 char **argv;
148 char **envv;
149 struct mac *mac_p;
150{
151 struct proc *p = td->td_proc;
152 struct nameidata nd, *ndp;
153 struct ucred *newcred = NULL, *oldcred;
154 struct uidinfo *euip;
155 register_t *stack_base;
156 int error, len, i;
157 struct image_params image_params, *imgp;
158 struct vattr attr;
159 int (*img_first)(struct image_params *);
160 struct pargs *oldargs = NULL, *newargs = NULL;
161 struct sigacts *oldsigacts, *newsigacts;
162#ifdef KTRACE
163 struct vnode *tracevp = NULL;
164 struct ucred *tracecred = NULL;
165#endif
166 struct vnode *textvp = NULL;
167 int credential_changing;
168 int textset;
169#ifdef MAC
170 struct label interplabel; /* label of the interpreted vnode */
171 struct label execlabel; /* optional label argument */
172 int will_transition, interplabelvalid = 0;
173#endif
174
175 imgp = &image_params;
176
177 /*
178 * Lock the process and set the P_INEXEC flag to indicate that
179 * it should be left alone until we're done here. This is
180 * necessary to avoid race conditions - e.g. in ptrace() -
181 * that might allow a local user to illicitly obtain elevated
182 * privileges.
183 */
184 PROC_LOCK(p);
185 KASSERT((p->p_flag & P_INEXEC) == 0,
186 ("%s(): process already has P_INEXEC flag", __func__));
187 if (p->p_flag & P_SA || p->p_numthreads > 1) {
188 if (thread_single(SINGLE_EXIT)) {
189 PROC_UNLOCK(p);
190 return (ERESTART); /* Try again later. */
191 }
192 /*
193 * If we get here all other threads are dead,
194 * so unset the associated flags and lose KSE mode.
195 */
196 p->p_flag &= ~P_SA;
197 td->td_mailbox = NULL;
198 thread_single_end();
199 }
200 p->p_flag |= P_INEXEC;
201 PROC_UNLOCK(p);
202
203 /*
204 * Initialize part of the common data
205 */
206 imgp->proc = p;
207 imgp->userspace_argv = argv;
208 imgp->userspace_envv = envv;
209 imgp->execlabel = NULL;
210 imgp->attr = &attr;
211 imgp->argc = imgp->envc = 0;
212 imgp->argv0 = NULL;
213 imgp->entry_addr = 0;
214 imgp->vmspace_destroyed = 0;
215 imgp->interpreted = 0;
216 imgp->interpreter_name[0] = '\0';
217 imgp->auxargs = NULL;
218 imgp->vp = NULL;
219 imgp->object = NULL;
220 imgp->firstpage = NULL;
221 imgp->ps_strings = 0;
222 imgp->auxarg_size = 0;
223
224#ifdef MAC
225 error = mac_execve_enter(imgp, mac_p, &execlabel);
226 if (error) {
227 mtx_lock(&Giant);
228 goto exec_fail;
229 }
230#endif
231
232 /*
233 * Allocate temporary demand zeroed space for argument and
234 * environment strings
235 */
236 imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX +
237 PAGE_SIZE);
238 if (imgp->stringbase == NULL) {
239 error = ENOMEM;
240 mtx_lock(&Giant);
241 goto exec_fail;
242 }
243 imgp->stringp = imgp->stringbase;
244 imgp->stringspace = ARG_MAX;
245 imgp->image_header = imgp->stringbase + ARG_MAX;
246
247 /*
248 * Translate the file name. namei() returns a vnode pointer
249 * in ni_vp amoung other things.
250 */
251 ndp = &nd;
252 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
253 UIO_USERSPACE, fname, td);
254
255 mtx_lock(&Giant);
256interpret:
257
258 error = namei(ndp);
259 if (error) {
260 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
261 ARG_MAX + PAGE_SIZE);
262 goto exec_fail;
263 }
264
265 imgp->vp = ndp->ni_vp;
266 imgp->fname = fname;
267
268 /*
269 * Check file permissions (also 'opens' file)
270 */
271 error = exec_check_permissions(imgp);
272 if (error)
273 goto exec_fail_dealloc;
274
275 if (VOP_GETVOBJECT(imgp->vp, &imgp->object) == 0)
276 vm_object_reference(imgp->object);
277
278 /*
279 * Set VV_TEXT now so no one can write to the executable while we're
280 * activating it.
281 *
282 * Remember if this was set before and unset it in case this is not
283 * actually an executable image.
284 */
285 textset = imgp->vp->v_vflag & VV_TEXT;
286 imgp->vp->v_vflag |= VV_TEXT;
287
288 error = exec_map_first_page(imgp);
289 if (error)
290 goto exec_fail_dealloc;
291
292 /*
293 * If the current process has a special image activator it
294 * wants to try first, call it. For example, emulating shell
295 * scripts differently.
296 */
297 error = -1;
298 if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
299 error = img_first(imgp);
300
301 /*
302 * Loop through the list of image activators, calling each one.
303 * An activator returns -1 if there is no match, 0 on success,
304 * and an error otherwise.
305 */
306 for (i = 0; error == -1 && execsw[i]; ++i) {
307 if (execsw[i]->ex_imgact == NULL ||
308 execsw[i]->ex_imgact == img_first) {
309 continue;
310 }
311 error = (*execsw[i]->ex_imgact)(imgp);
312 }
313
314 if (error) {
315 if (error == -1) {
316 if (textset == 0)
317 imgp->vp->v_vflag &= ~VV_TEXT;
318 error = ENOEXEC;
319 }
320 goto exec_fail_dealloc;
321 }
322
323 /*
324 * Special interpreter operation, cleanup and loop up to try to
325 * activate the interpreter.
326 */
327 if (imgp->interpreted) {
328 exec_unmap_first_page(imgp);
329 /*
330 * VV_TEXT needs to be unset for scripts. There is a short
331 * period before we determine that something is a script where
332 * VV_TEXT will be set. The vnode lock is held over this
333 * entire period so nothing should illegitimately be blocked.
334 */
335 imgp->vp->v_vflag &= ~VV_TEXT;
336 /* free name buffer and old vnode */
337 NDFREE(ndp, NDF_ONLY_PNBUF);
338#ifdef MAC
339 mac_init_vnode_label(&interplabel);
340 mac_copy_vnode_label(&ndp->ni_vp->v_label, &interplabel);
341 interplabelvalid = 1;
342#endif
343 vput(ndp->ni_vp);
344 vm_object_deallocate(imgp->object);
345 imgp->object = NULL;
346 /* set new name to that of the interpreter */
347 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
348 UIO_SYSSPACE, imgp->interpreter_name, td);
349 goto interpret;
350 }
351
352 /*
353 * Copy out strings (args and env) and initialize stack base
354 */
355 if (p->p_sysent->sv_copyout_strings)
356 stack_base = (*p->p_sysent->sv_copyout_strings)(imgp);
357 else
358 stack_base = exec_copyout_strings(imgp);
359
360 /*
361 * If custom stack fixup routine present for this process
362 * let it do the stack setup.
363 * Else stuff argument count as first item on stack
364 */
365 if (p->p_sysent->sv_fixup)
366 (*p->p_sysent->sv_fixup)(&stack_base, imgp);
367 else
368 suword(--stack_base, imgp->argc);
369
370 /*
371 * For security and other reasons, the file descriptor table cannot
372 * be shared after an exec.
373 */
374 FILEDESC_LOCK(p->p_fd);
375 if (p->p_fd->fd_refcnt > 1) {
376 struct filedesc *tmp;
377
378 tmp = fdcopy(td->td_proc->p_fd);
379 FILEDESC_UNLOCK(p->p_fd);
380 fdfree(td);
381 p->p_fd = tmp;
382 } else
383 FILEDESC_UNLOCK(p->p_fd);
384
385 /*
386 * Malloc things before we need locks.
387 */
388 newcred = crget();
389 euip = uifind(attr.va_uid);
390 i = imgp->endargs - imgp->stringbase;
391 if (ps_arg_cache_limit >= i + sizeof(struct pargs))
392 newargs = pargs_alloc(i);
393
394 /* close files on exec */
395 fdcloseexec(td);
396
397 /* Get a reference to the vnode prior to locking the proc */
398 VREF(ndp->ni_vp);
399
400 /*
401 * For security and other reasons, signal handlers cannot
402 * be shared after an exec. The new process gets a copy of the old
403 * handlers. In execsigs(), the new process will have its signals
404 * reset.
405 */
406 PROC_LOCK(p);
407 if (sigacts_shared(p->p_sigacts)) {
408 oldsigacts = p->p_sigacts;
409 PROC_UNLOCK(p);
410 newsigacts = sigacts_alloc();
411 sigacts_copy(newsigacts, oldsigacts);
412 PROC_LOCK(p);
413 p->p_sigacts = newsigacts;
414 } else
415 oldsigacts = NULL;
416
417 /* Stop profiling */
418 stopprofclock(p);
419
420 /* reset caught signals */
421 execsigs(p);
422
423 /* name this process - nameiexec(p, ndp) */
424 len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN);
425 bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len);
426 p->p_comm[len] = 0;
427
428 /*
429 * mark as execed, wakeup the process that vforked (if any) and tell
430 * it that it now has its own resources back
431 */
432 p->p_flag |= P_EXEC;
433 if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
434 p->p_flag &= ~P_PPWAIT;
435 wakeup(p->p_pptr);
436 }
437
438 /*
439 * Implement image setuid/setgid.
440 *
441 * Don't honor setuid/setgid if the filesystem prohibits it or if
442 * the process is being traced.
443 *
444 * XXXMAC: For the time being, use NOSUID to also prohibit
445 * transitions on the file system.
446 */
447 oldcred = p->p_ucred;
448 credential_changing = 0;
449 credential_changing |= (attr.va_mode & VSUID) && oldcred->cr_uid !=
450 attr.va_uid;
451 credential_changing |= (attr.va_mode & VSGID) && oldcred->cr_gid !=
452 attr.va_gid;
453#ifdef MAC
454 will_transition = mac_execve_will_transition(oldcred, imgp->vp,
455 interplabelvalid ? &interplabel : NULL, imgp);
456 credential_changing |= will_transition;
457#endif
458
459 if (credential_changing &&
460 (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
461 (p->p_flag & P_TRACED) == 0) {
462 /*
463 * Turn off syscall tracing for set-id programs, except for
464 * root. Record any set-id flags first to make sure that
465 * we do not regain any tracing during a possible block.
466 */
467 setsugid(p);
468#ifdef KTRACE
469 if (p->p_tracevp != NULL && suser_cred(oldcred, PRISON_ROOT)) {
470 mtx_lock(&ktrace_mtx);
471 p->p_traceflag = 0;
472 tracevp = p->p_tracevp;
473 p->p_tracevp = NULL;
474 tracecred = p->p_tracecred;
475 p->p_tracecred = NULL;
476 mtx_unlock(&ktrace_mtx);
477 }
478#endif
479 /*
480 * Close any file descriptors 0..2 that reference procfs,
481 * then make sure file descriptors 0..2 are in use.
482 *
483 * setugidsafety() may call closef() and then pfind()
484 * which may grab the process lock.
485 * fdcheckstd() may call falloc() which may block to
486 * allocate memory, so temporarily drop the process lock.
487 */
488 PROC_UNLOCK(p);
489 setugidsafety(td);
490 error = fdcheckstd(td);
491 if (error != 0)
492 goto done1;
493 PROC_LOCK(p);
494 /*
495 * Set the new credentials.
496 */
497 crcopy(newcred, oldcred);
498 if (attr.va_mode & VSUID)
499 change_euid(newcred, euip);
500 if (attr.va_mode & VSGID)
501 change_egid(newcred, attr.va_gid);
502#ifdef MAC
503 if (will_transition) {
504 mac_execve_transition(oldcred, newcred, imgp->vp,
505 interplabelvalid ? &interplabel : NULL, imgp);
506 }
507#endif
508 /*
509 * Implement correct POSIX saved-id behavior.
510 *
511 * XXXMAC: Note that the current logic will save the
512 * uid and gid if a MAC domain transition occurs, even
513 * though maybe it shouldn't.
514 */
515 change_svuid(newcred, newcred->cr_uid);
516 change_svgid(newcred, newcred->cr_gid);
517 p->p_ucred = newcred;
518 newcred = NULL;
519 } else {
520 if (oldcred->cr_uid == oldcred->cr_ruid &&
521 oldcred->cr_gid == oldcred->cr_rgid)
522 p->p_flag &= ~P_SUGID;
523 /*
524 * Implement correct POSIX saved-id behavior.
525 *
526 * XXX: It's not clear that the existing behavior is
527 * POSIX-compliant. A number of sources indicate that the
528 * saved uid/gid should only be updated if the new ruid is
529 * not equal to the old ruid, or the new euid is not equal
530 * to the old euid and the new euid is not equal to the old
531 * ruid. The FreeBSD code always updates the saved uid/gid.
532 * Also, this code uses the new (replaced) euid and egid as
533 * the source, which may or may not be the right ones to use.
534 */
535 if (oldcred->cr_svuid != oldcred->cr_uid ||
536 oldcred->cr_svgid != oldcred->cr_gid) {
537 crcopy(newcred, oldcred);
538 change_svuid(newcred, newcred->cr_uid);
539 change_svgid(newcred, newcred->cr_gid);
540 p->p_ucred = newcred;
541 newcred = NULL;
542 }
543 }
544
545 /*
546 * Store the vp for use in procfs. This vnode was referenced prior
547 * to locking the proc lock.
548 */
549 textvp = p->p_textvp;
550 p->p_textvp = ndp->ni_vp;
551
552 /*
553 * Notify others that we exec'd, and clear the P_INEXEC flag
554 * as we're now a bona fide freshly-execed process.
555 */
556 KNOTE(&p->p_klist, NOTE_EXEC);
557 p->p_flag &= ~P_INEXEC;
558
559 /*
560 * If tracing the process, trap to debugger so breakpoints
561 * can be set before the program executes.
562 */
563 if (p->p_flag & P_TRACED)
564 psignal(p, SIGTRAP);
565
566 /* clear "fork but no exec" flag, as we _are_ execing */
567 p->p_acflag &= ~AFORK;
568
569 /* Free any previous argument cache */
570 oldargs = p->p_args;
571 p->p_args = NULL;
572
573 /* Cache arguments if they fit inside our allowance */
574 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
575 bcopy(imgp->stringbase, newargs->ar_args, i);
576 p->p_args = newargs;
577 newargs = NULL;
578 }
579 PROC_UNLOCK(p);
580
581 /* Set values passed into the program in registers. */
582 if (p->p_sysent->sv_setregs)
583 (*p->p_sysent->sv_setregs)(td, imgp->entry_addr,
584 (u_long)(uintptr_t)stack_base, imgp->ps_strings);
585 else
586 exec_setregs(td, imgp->entry_addr,
587 (u_long)(uintptr_t)stack_base, imgp->ps_strings);
588
589done1:
590 /*
591 * Free any resources malloc'd earlier that we didn't use.
592 */
593 uifree(euip);
594 if (newcred == NULL)
595 crfree(oldcred);
596 else
597 crfree(newcred);
598 /*
599 * Handle deferred decrement of ref counts.
600 */
601 if (textvp != NULL)
602 vrele(textvp);
603 if (ndp->ni_vp && error != 0)
604 vrele(ndp->ni_vp);
605#ifdef KTRACE
606 if (tracevp != NULL)
607 vrele(tracevp);
608 if (tracecred != NULL)
609 crfree(tracecred);
610#endif
611 if (oldargs != NULL)
612 pargs_drop(oldargs);
613 if (newargs != NULL)
614 pargs_drop(newargs);
615 if (oldsigacts != NULL)
616 sigacts_free(oldsigacts);
617
618exec_fail_dealloc:
619
620 /*
621 * free various allocated resources
622 */
623 if (imgp->firstpage)
624 exec_unmap_first_page(imgp);
625
626 if (imgp->vp) {
627 NDFREE(ndp, NDF_ONLY_PNBUF);
628 vput(imgp->vp);
629 }
630
631 if (imgp->stringbase != NULL)
632 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
633 ARG_MAX + PAGE_SIZE);
634
635 if (imgp->object)
636 vm_object_deallocate(imgp->object);
637
638 if (error == 0) {
639 /*
640 * Stop the process here if its stop event mask has
641 * the S_EXEC bit set.
642 */
643 STOPEVENT(p, S_EXEC, 0);
644 goto done2;
645 }
646
647exec_fail:
648 /* we're done here, clear P_INEXEC */
649 PROC_LOCK(p);
650 p->p_flag &= ~P_INEXEC;
651 PROC_UNLOCK(p);
652
653 if (imgp->vmspace_destroyed) {
654 /* sorry, no more process anymore. exit gracefully */
655#ifdef MAC
656 mac_execve_exit(imgp);
657 if (interplabelvalid)
658 mac_destroy_vnode_label(&interplabel);
659#endif
660 exit1(td, W_EXITCODE(0, SIGABRT));
661 /* NOT REACHED */
662 error = 0;
663 }
664done2:
665#ifdef MAC
666 mac_execve_exit(imgp);
667 if (interplabelvalid)
668 mac_destroy_vnode_label(&interplabel);
669#endif
670 mtx_unlock(&Giant);
671 return (error);
672}
673
674#ifndef _SYS_SYSPROTO_H_
675struct execve_args {
676 char *fname;
677 char **argv;
678 char **envv;
679};
680#endif
681
682/*
683 * MPSAFE
684 */
685int
686execve(td, uap)
687 struct thread *td;
688 struct execve_args /* {
689 char *fname;
690 char **argv;
691 char **envv;
692 } */ *uap;
693{
694
695 return (kern_execve(td, uap->fname, uap->argv, uap->envv, NULL));
696}
697
698#ifndef _SYS_SYSPROTO_H_
699struct __mac_execve_args {
700 char *fname;
701 char **argv;
702 char **envv;
703 struct mac *mac_p;
704};
705#endif
706
707/*
708 * MPSAFE
709 */
710int
711__mac_execve(td, uap)
712 struct thread *td;
713 struct __mac_execve_args /* {
714 char *fname;
715 char **argv;
716 char **envv;
717 struct mac *mac_p;
718 } */ *uap;
719{
720
721#ifdef MAC
722 return (kern_execve(td, uap->fname, uap->argv, uap->envv,
723 uap->mac_p));
724#else
725 return (ENOSYS);
726#endif
727}
728
729int
730exec_map_first_page(imgp)
731 struct image_params *imgp;
732{
733 int rv, i;
734 int initial_pagein;
735 vm_page_t ma[VM_INITIAL_PAGEIN];
736 vm_object_t object;
737
738 GIANT_REQUIRED;
739
740 if (imgp->firstpage) {
741 exec_unmap_first_page(imgp);
742 }
743
744 VOP_GETVOBJECT(imgp->vp, &object);
745 VM_OBJECT_LOCK(object);
746 ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
747 if ((ma[0]->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
748 initial_pagein = VM_INITIAL_PAGEIN;
749 if (initial_pagein > object->size)
750 initial_pagein = object->size;
751 for (i = 1; i < initial_pagein; i++) {
752 if ((ma[i] = vm_page_lookup(object, i)) != NULL) {
753 if (ma[i]->valid)
754 break;
755 vm_page_lock_queues();
756 if ((ma[i]->flags & PG_BUSY) || ma[i]->busy) {
757 vm_page_unlock_queues();
758 break;
759 }
760 vm_page_busy(ma[i]);
761 vm_page_unlock_queues();
762 } else {
763 ma[i] = vm_page_alloc(object, i,
764 VM_ALLOC_NORMAL);
765 if (ma[i] == NULL)
766 break;
767 }
768 }
769 initial_pagein = i;
770 rv = vm_pager_get_pages(object, ma, initial_pagein, 0);
771 ma[0] = vm_page_lookup(object, 0);
772 if ((rv != VM_PAGER_OK) || (ma[0] == NULL) ||
773 (ma[0]->valid == 0)) {
774 if (ma[0]) {
775 vm_page_lock_queues();
776 pmap_remove_all(ma[0]);
777 vm_page_free(ma[0]);
778 vm_page_unlock_queues();
779 }
780 VM_OBJECT_UNLOCK(object);
781 return (EIO);
782 }
783 }
784 vm_page_lock_queues();
785 vm_page_wire(ma[0]);
786 vm_page_wakeup(ma[0]);
787 vm_page_unlock_queues();
788 VM_OBJECT_UNLOCK(object);
789
790 pmap_qenter((vm_offset_t)imgp->image_header, ma, 1);
791 imgp->firstpage = ma[0];
792
793 return (0);
794}
795
796void
797exec_unmap_first_page(imgp)
798 struct image_params *imgp;
799{
800 GIANT_REQUIRED;
801
802 if (imgp->firstpage) {
803 pmap_qremove((vm_offset_t)imgp->image_header, 1);
804 vm_page_lock_queues();
805 vm_page_unwire(imgp->firstpage, 1);
806 vm_page_unlock_queues();
807 imgp->firstpage = NULL;
808 }
809}
810
811/*
812 * Destroy old address space, and allocate a new stack
813 * The new stack is only SGROWSIZ large because it is grown
814 * automatically in trap.c.
815 */
816int
817exec_new_vmspace(imgp, sv)
818 struct image_params *imgp;
819 struct sysentvec *sv;
820{
821 int error;
822 struct proc *p = imgp->proc;
823 struct vmspace *vmspace = p->p_vmspace;
824 vm_offset_t stack_addr;
825 vm_map_t map;
826
827 GIANT_REQUIRED;
828
829 imgp->vmspace_destroyed = 1;
830
831 EVENTHANDLER_INVOKE(process_exec, p);
832
833 /*
834 * Here is as good a place as any to do any resource limit cleanups.
835 * This is needed if a 64 bit binary exec's a 32 bit binary - the
836 * data size limit may need to be changed to a value that makes
837 * sense for the 32 bit binary.
838 */
839 if (sv->sv_fixlimits)
840 sv->sv_fixlimits(imgp);
841
842 /*
843 * Blow away entire process VM, if address space not shared,
844 * otherwise, create a new VM space so that other threads are
845 * not disrupted
846 */
847 map = &vmspace->vm_map;
848 if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv->sv_minuser &&
849 vm_map_max(map) == sv->sv_maxuser) {
850 shmexit(vmspace);
851 vm_page_lock_queues();
852 pmap_remove_pages(vmspace_pmap(vmspace), vm_map_min(map),
853 vm_map_max(map));
854 vm_page_unlock_queues();
855 vm_map_remove(map, vm_map_min(map), vm_map_max(map));
856 } else {
857 vmspace_exec(p, sv->sv_minuser, sv->sv_maxuser);
858 vmspace = p->p_vmspace;
859 map = &vmspace->vm_map;
860 }
861
862 /* Allocate a new stack */
863 stack_addr = sv->sv_usrstack - maxssiz;
864 error = vm_map_stack(map, stack_addr, (vm_size_t)maxssiz,
865 sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
866 if (error)
867 return (error);
868
869#ifdef __ia64__
870 /* Allocate a new register stack */
29
30#include "opt_ktrace.h"
31#include "opt_mac.h"
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/eventhandler.h>
36#include <sys/lock.h>
37#include <sys/mutex.h>
38#include <sys/sysproto.h>
39#include <sys/signalvar.h>
40#include <sys/kernel.h>
41#include <sys/mac.h>
42#include <sys/mount.h>
43#include <sys/filedesc.h>
44#include <sys/fcntl.h>
45#include <sys/acct.h>
46#include <sys/exec.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/wait.h>
50#include <sys/malloc.h>
51#include <sys/proc.h>
52#include <sys/pioctl.h>
53#include <sys/namei.h>
54#include <sys/sysent.h>
55#include <sys/shm.h>
56#include <sys/sysctl.h>
57#include <sys/user.h>
58#include <sys/vnode.h>
59#ifdef KTRACE
60#include <sys/ktrace.h>
61#endif
62
63#include <vm/vm.h>
64#include <vm/vm_param.h>
65#include <vm/pmap.h>
66#include <vm/vm_page.h>
67#include <vm/vm_map.h>
68#include <vm/vm_kern.h>
69#include <vm/vm_extern.h>
70#include <vm/vm_object.h>
71#include <vm/vm_pager.h>
72
73#include <machine/reg.h>
74
75MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
76
77static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
78static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
79static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);
80static int kern_execve(struct thread *td, char *fname, char **argv,
81 char **envv, struct mac *mac_p);
82
83/* XXX This should be vm_size_t. */
84SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD,
85 NULL, 0, sysctl_kern_ps_strings, "LU", "");
86
87/* XXX This should be vm_size_t. */
88SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD,
89 NULL, 0, sysctl_kern_usrstack, "LU", "");
90
91SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD,
92 NULL, 0, sysctl_kern_stackprot, "I", "");
93
94u_long ps_arg_cache_limit = PAGE_SIZE / 16;
95SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW,
96 &ps_arg_cache_limit, 0, "");
97
98int ps_argsopen = 1;
99SYSCTL_INT(_kern, OID_AUTO, ps_argsopen, CTLFLAG_RW, &ps_argsopen, 0, "");
100
101static int
102sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
103{
104 struct proc *p;
105
106 p = curproc;
107 return (SYSCTL_OUT(req, &p->p_sysent->sv_psstrings,
108 sizeof(p->p_sysent->sv_psstrings)));
109}
110
111static int
112sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
113{
114 struct proc *p;
115
116 p = curproc;
117 return (SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
118 sizeof(p->p_sysent->sv_usrstack)));
119}
120
121static int
122sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS)
123{
124 struct proc *p;
125
126 p = curproc;
127 return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot,
128 sizeof(p->p_sysent->sv_stackprot)));
129}
130
131/*
132 * Each of the items is a pointer to a `const struct execsw', hence the
133 * double pointer here.
134 */
135static const struct execsw **execsw;
136
137/*
138 * In-kernel implementation of execve(). All arguments are assumed to be
139 * userspace pointers from the passed thread.
140 *
141 * MPSAFE
142 */
143static int
144kern_execve(td, fname, argv, envv, mac_p)
145 struct thread *td;
146 char *fname;
147 char **argv;
148 char **envv;
149 struct mac *mac_p;
150{
151 struct proc *p = td->td_proc;
152 struct nameidata nd, *ndp;
153 struct ucred *newcred = NULL, *oldcred;
154 struct uidinfo *euip;
155 register_t *stack_base;
156 int error, len, i;
157 struct image_params image_params, *imgp;
158 struct vattr attr;
159 int (*img_first)(struct image_params *);
160 struct pargs *oldargs = NULL, *newargs = NULL;
161 struct sigacts *oldsigacts, *newsigacts;
162#ifdef KTRACE
163 struct vnode *tracevp = NULL;
164 struct ucred *tracecred = NULL;
165#endif
166 struct vnode *textvp = NULL;
167 int credential_changing;
168 int textset;
169#ifdef MAC
170 struct label interplabel; /* label of the interpreted vnode */
171 struct label execlabel; /* optional label argument */
172 int will_transition, interplabelvalid = 0;
173#endif
174
175 imgp = &image_params;
176
177 /*
178 * Lock the process and set the P_INEXEC flag to indicate that
179 * it should be left alone until we're done here. This is
180 * necessary to avoid race conditions - e.g. in ptrace() -
181 * that might allow a local user to illicitly obtain elevated
182 * privileges.
183 */
184 PROC_LOCK(p);
185 KASSERT((p->p_flag & P_INEXEC) == 0,
186 ("%s(): process already has P_INEXEC flag", __func__));
187 if (p->p_flag & P_SA || p->p_numthreads > 1) {
188 if (thread_single(SINGLE_EXIT)) {
189 PROC_UNLOCK(p);
190 return (ERESTART); /* Try again later. */
191 }
192 /*
193 * If we get here all other threads are dead,
194 * so unset the associated flags and lose KSE mode.
195 */
196 p->p_flag &= ~P_SA;
197 td->td_mailbox = NULL;
198 thread_single_end();
199 }
200 p->p_flag |= P_INEXEC;
201 PROC_UNLOCK(p);
202
203 /*
204 * Initialize part of the common data
205 */
206 imgp->proc = p;
207 imgp->userspace_argv = argv;
208 imgp->userspace_envv = envv;
209 imgp->execlabel = NULL;
210 imgp->attr = &attr;
211 imgp->argc = imgp->envc = 0;
212 imgp->argv0 = NULL;
213 imgp->entry_addr = 0;
214 imgp->vmspace_destroyed = 0;
215 imgp->interpreted = 0;
216 imgp->interpreter_name[0] = '\0';
217 imgp->auxargs = NULL;
218 imgp->vp = NULL;
219 imgp->object = NULL;
220 imgp->firstpage = NULL;
221 imgp->ps_strings = 0;
222 imgp->auxarg_size = 0;
223
224#ifdef MAC
225 error = mac_execve_enter(imgp, mac_p, &execlabel);
226 if (error) {
227 mtx_lock(&Giant);
228 goto exec_fail;
229 }
230#endif
231
232 /*
233 * Allocate temporary demand zeroed space for argument and
234 * environment strings
235 */
236 imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX +
237 PAGE_SIZE);
238 if (imgp->stringbase == NULL) {
239 error = ENOMEM;
240 mtx_lock(&Giant);
241 goto exec_fail;
242 }
243 imgp->stringp = imgp->stringbase;
244 imgp->stringspace = ARG_MAX;
245 imgp->image_header = imgp->stringbase + ARG_MAX;
246
247 /*
248 * Translate the file name. namei() returns a vnode pointer
249 * in ni_vp amoung other things.
250 */
251 ndp = &nd;
252 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
253 UIO_USERSPACE, fname, td);
254
255 mtx_lock(&Giant);
256interpret:
257
258 error = namei(ndp);
259 if (error) {
260 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
261 ARG_MAX + PAGE_SIZE);
262 goto exec_fail;
263 }
264
265 imgp->vp = ndp->ni_vp;
266 imgp->fname = fname;
267
268 /*
269 * Check file permissions (also 'opens' file)
270 */
271 error = exec_check_permissions(imgp);
272 if (error)
273 goto exec_fail_dealloc;
274
275 if (VOP_GETVOBJECT(imgp->vp, &imgp->object) == 0)
276 vm_object_reference(imgp->object);
277
278 /*
279 * Set VV_TEXT now so no one can write to the executable while we're
280 * activating it.
281 *
282 * Remember if this was set before and unset it in case this is not
283 * actually an executable image.
284 */
285 textset = imgp->vp->v_vflag & VV_TEXT;
286 imgp->vp->v_vflag |= VV_TEXT;
287
288 error = exec_map_first_page(imgp);
289 if (error)
290 goto exec_fail_dealloc;
291
292 /*
293 * If the current process has a special image activator it
294 * wants to try first, call it. For example, emulating shell
295 * scripts differently.
296 */
297 error = -1;
298 if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
299 error = img_first(imgp);
300
301 /*
302 * Loop through the list of image activators, calling each one.
303 * An activator returns -1 if there is no match, 0 on success,
304 * and an error otherwise.
305 */
306 for (i = 0; error == -1 && execsw[i]; ++i) {
307 if (execsw[i]->ex_imgact == NULL ||
308 execsw[i]->ex_imgact == img_first) {
309 continue;
310 }
311 error = (*execsw[i]->ex_imgact)(imgp);
312 }
313
314 if (error) {
315 if (error == -1) {
316 if (textset == 0)
317 imgp->vp->v_vflag &= ~VV_TEXT;
318 error = ENOEXEC;
319 }
320 goto exec_fail_dealloc;
321 }
322
323 /*
324 * Special interpreter operation, cleanup and loop up to try to
325 * activate the interpreter.
326 */
327 if (imgp->interpreted) {
328 exec_unmap_first_page(imgp);
329 /*
330 * VV_TEXT needs to be unset for scripts. There is a short
331 * period before we determine that something is a script where
332 * VV_TEXT will be set. The vnode lock is held over this
333 * entire period so nothing should illegitimately be blocked.
334 */
335 imgp->vp->v_vflag &= ~VV_TEXT;
336 /* free name buffer and old vnode */
337 NDFREE(ndp, NDF_ONLY_PNBUF);
338#ifdef MAC
339 mac_init_vnode_label(&interplabel);
340 mac_copy_vnode_label(&ndp->ni_vp->v_label, &interplabel);
341 interplabelvalid = 1;
342#endif
343 vput(ndp->ni_vp);
344 vm_object_deallocate(imgp->object);
345 imgp->object = NULL;
346 /* set new name to that of the interpreter */
347 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
348 UIO_SYSSPACE, imgp->interpreter_name, td);
349 goto interpret;
350 }
351
352 /*
353 * Copy out strings (args and env) and initialize stack base
354 */
355 if (p->p_sysent->sv_copyout_strings)
356 stack_base = (*p->p_sysent->sv_copyout_strings)(imgp);
357 else
358 stack_base = exec_copyout_strings(imgp);
359
360 /*
361 * If custom stack fixup routine present for this process
362 * let it do the stack setup.
363 * Else stuff argument count as first item on stack
364 */
365 if (p->p_sysent->sv_fixup)
366 (*p->p_sysent->sv_fixup)(&stack_base, imgp);
367 else
368 suword(--stack_base, imgp->argc);
369
370 /*
371 * For security and other reasons, the file descriptor table cannot
372 * be shared after an exec.
373 */
374 FILEDESC_LOCK(p->p_fd);
375 if (p->p_fd->fd_refcnt > 1) {
376 struct filedesc *tmp;
377
378 tmp = fdcopy(td->td_proc->p_fd);
379 FILEDESC_UNLOCK(p->p_fd);
380 fdfree(td);
381 p->p_fd = tmp;
382 } else
383 FILEDESC_UNLOCK(p->p_fd);
384
385 /*
386 * Malloc things before we need locks.
387 */
388 newcred = crget();
389 euip = uifind(attr.va_uid);
390 i = imgp->endargs - imgp->stringbase;
391 if (ps_arg_cache_limit >= i + sizeof(struct pargs))
392 newargs = pargs_alloc(i);
393
394 /* close files on exec */
395 fdcloseexec(td);
396
397 /* Get a reference to the vnode prior to locking the proc */
398 VREF(ndp->ni_vp);
399
400 /*
401 * For security and other reasons, signal handlers cannot
402 * be shared after an exec. The new process gets a copy of the old
403 * handlers. In execsigs(), the new process will have its signals
404 * reset.
405 */
406 PROC_LOCK(p);
407 if (sigacts_shared(p->p_sigacts)) {
408 oldsigacts = p->p_sigacts;
409 PROC_UNLOCK(p);
410 newsigacts = sigacts_alloc();
411 sigacts_copy(newsigacts, oldsigacts);
412 PROC_LOCK(p);
413 p->p_sigacts = newsigacts;
414 } else
415 oldsigacts = NULL;
416
417 /* Stop profiling */
418 stopprofclock(p);
419
420 /* reset caught signals */
421 execsigs(p);
422
423 /* name this process - nameiexec(p, ndp) */
424 len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN);
425 bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len);
426 p->p_comm[len] = 0;
427
428 /*
429 * mark as execed, wakeup the process that vforked (if any) and tell
430 * it that it now has its own resources back
431 */
432 p->p_flag |= P_EXEC;
433 if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
434 p->p_flag &= ~P_PPWAIT;
435 wakeup(p->p_pptr);
436 }
437
438 /*
439 * Implement image setuid/setgid.
440 *
441 * Don't honor setuid/setgid if the filesystem prohibits it or if
442 * the process is being traced.
443 *
444 * XXXMAC: For the time being, use NOSUID to also prohibit
445 * transitions on the file system.
446 */
447 oldcred = p->p_ucred;
448 credential_changing = 0;
449 credential_changing |= (attr.va_mode & VSUID) && oldcred->cr_uid !=
450 attr.va_uid;
451 credential_changing |= (attr.va_mode & VSGID) && oldcred->cr_gid !=
452 attr.va_gid;
453#ifdef MAC
454 will_transition = mac_execve_will_transition(oldcred, imgp->vp,
455 interplabelvalid ? &interplabel : NULL, imgp);
456 credential_changing |= will_transition;
457#endif
458
459 if (credential_changing &&
460 (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
461 (p->p_flag & P_TRACED) == 0) {
462 /*
463 * Turn off syscall tracing for set-id programs, except for
464 * root. Record any set-id flags first to make sure that
465 * we do not regain any tracing during a possible block.
466 */
467 setsugid(p);
468#ifdef KTRACE
469 if (p->p_tracevp != NULL && suser_cred(oldcred, PRISON_ROOT)) {
470 mtx_lock(&ktrace_mtx);
471 p->p_traceflag = 0;
472 tracevp = p->p_tracevp;
473 p->p_tracevp = NULL;
474 tracecred = p->p_tracecred;
475 p->p_tracecred = NULL;
476 mtx_unlock(&ktrace_mtx);
477 }
478#endif
479 /*
480 * Close any file descriptors 0..2 that reference procfs,
481 * then make sure file descriptors 0..2 are in use.
482 *
483 * setugidsafety() may call closef() and then pfind()
484 * which may grab the process lock.
485 * fdcheckstd() may call falloc() which may block to
486 * allocate memory, so temporarily drop the process lock.
487 */
488 PROC_UNLOCK(p);
489 setugidsafety(td);
490 error = fdcheckstd(td);
491 if (error != 0)
492 goto done1;
493 PROC_LOCK(p);
494 /*
495 * Set the new credentials.
496 */
497 crcopy(newcred, oldcred);
498 if (attr.va_mode & VSUID)
499 change_euid(newcred, euip);
500 if (attr.va_mode & VSGID)
501 change_egid(newcred, attr.va_gid);
502#ifdef MAC
503 if (will_transition) {
504 mac_execve_transition(oldcred, newcred, imgp->vp,
505 interplabelvalid ? &interplabel : NULL, imgp);
506 }
507#endif
508 /*
509 * Implement correct POSIX saved-id behavior.
510 *
511 * XXXMAC: Note that the current logic will save the
512 * uid and gid if a MAC domain transition occurs, even
513 * though maybe it shouldn't.
514 */
515 change_svuid(newcred, newcred->cr_uid);
516 change_svgid(newcred, newcred->cr_gid);
517 p->p_ucred = newcred;
518 newcred = NULL;
519 } else {
520 if (oldcred->cr_uid == oldcred->cr_ruid &&
521 oldcred->cr_gid == oldcred->cr_rgid)
522 p->p_flag &= ~P_SUGID;
523 /*
524 * Implement correct POSIX saved-id behavior.
525 *
526 * XXX: It's not clear that the existing behavior is
527 * POSIX-compliant. A number of sources indicate that the
528 * saved uid/gid should only be updated if the new ruid is
529 * not equal to the old ruid, or the new euid is not equal
530 * to the old euid and the new euid is not equal to the old
531 * ruid. The FreeBSD code always updates the saved uid/gid.
532 * Also, this code uses the new (replaced) euid and egid as
533 * the source, which may or may not be the right ones to use.
534 */
535 if (oldcred->cr_svuid != oldcred->cr_uid ||
536 oldcred->cr_svgid != oldcred->cr_gid) {
537 crcopy(newcred, oldcred);
538 change_svuid(newcred, newcred->cr_uid);
539 change_svgid(newcred, newcred->cr_gid);
540 p->p_ucred = newcred;
541 newcred = NULL;
542 }
543 }
544
545 /*
546 * Store the vp for use in procfs. This vnode was referenced prior
547 * to locking the proc lock.
548 */
549 textvp = p->p_textvp;
550 p->p_textvp = ndp->ni_vp;
551
552 /*
553 * Notify others that we exec'd, and clear the P_INEXEC flag
554 * as we're now a bona fide freshly-execed process.
555 */
556 KNOTE(&p->p_klist, NOTE_EXEC);
557 p->p_flag &= ~P_INEXEC;
558
559 /*
560 * If tracing the process, trap to debugger so breakpoints
561 * can be set before the program executes.
562 */
563 if (p->p_flag & P_TRACED)
564 psignal(p, SIGTRAP);
565
566 /* clear "fork but no exec" flag, as we _are_ execing */
567 p->p_acflag &= ~AFORK;
568
569 /* Free any previous argument cache */
570 oldargs = p->p_args;
571 p->p_args = NULL;
572
573 /* Cache arguments if they fit inside our allowance */
574 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
575 bcopy(imgp->stringbase, newargs->ar_args, i);
576 p->p_args = newargs;
577 newargs = NULL;
578 }
579 PROC_UNLOCK(p);
580
581 /* Set values passed into the program in registers. */
582 if (p->p_sysent->sv_setregs)
583 (*p->p_sysent->sv_setregs)(td, imgp->entry_addr,
584 (u_long)(uintptr_t)stack_base, imgp->ps_strings);
585 else
586 exec_setregs(td, imgp->entry_addr,
587 (u_long)(uintptr_t)stack_base, imgp->ps_strings);
588
589done1:
590 /*
591 * Free any resources malloc'd earlier that we didn't use.
592 */
593 uifree(euip);
594 if (newcred == NULL)
595 crfree(oldcred);
596 else
597 crfree(newcred);
598 /*
599 * Handle deferred decrement of ref counts.
600 */
601 if (textvp != NULL)
602 vrele(textvp);
603 if (ndp->ni_vp && error != 0)
604 vrele(ndp->ni_vp);
605#ifdef KTRACE
606 if (tracevp != NULL)
607 vrele(tracevp);
608 if (tracecred != NULL)
609 crfree(tracecred);
610#endif
611 if (oldargs != NULL)
612 pargs_drop(oldargs);
613 if (newargs != NULL)
614 pargs_drop(newargs);
615 if (oldsigacts != NULL)
616 sigacts_free(oldsigacts);
617
618exec_fail_dealloc:
619
620 /*
621 * free various allocated resources
622 */
623 if (imgp->firstpage)
624 exec_unmap_first_page(imgp);
625
626 if (imgp->vp) {
627 NDFREE(ndp, NDF_ONLY_PNBUF);
628 vput(imgp->vp);
629 }
630
631 if (imgp->stringbase != NULL)
632 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
633 ARG_MAX + PAGE_SIZE);
634
635 if (imgp->object)
636 vm_object_deallocate(imgp->object);
637
638 if (error == 0) {
639 /*
640 * Stop the process here if its stop event mask has
641 * the S_EXEC bit set.
642 */
643 STOPEVENT(p, S_EXEC, 0);
644 goto done2;
645 }
646
647exec_fail:
648 /* we're done here, clear P_INEXEC */
649 PROC_LOCK(p);
650 p->p_flag &= ~P_INEXEC;
651 PROC_UNLOCK(p);
652
653 if (imgp->vmspace_destroyed) {
654 /* sorry, no more process anymore. exit gracefully */
655#ifdef MAC
656 mac_execve_exit(imgp);
657 if (interplabelvalid)
658 mac_destroy_vnode_label(&interplabel);
659#endif
660 exit1(td, W_EXITCODE(0, SIGABRT));
661 /* NOT REACHED */
662 error = 0;
663 }
664done2:
665#ifdef MAC
666 mac_execve_exit(imgp);
667 if (interplabelvalid)
668 mac_destroy_vnode_label(&interplabel);
669#endif
670 mtx_unlock(&Giant);
671 return (error);
672}
673
674#ifndef _SYS_SYSPROTO_H_
675struct execve_args {
676 char *fname;
677 char **argv;
678 char **envv;
679};
680#endif
681
682/*
683 * MPSAFE
684 */
685int
686execve(td, uap)
687 struct thread *td;
688 struct execve_args /* {
689 char *fname;
690 char **argv;
691 char **envv;
692 } */ *uap;
693{
694
695 return (kern_execve(td, uap->fname, uap->argv, uap->envv, NULL));
696}
697
698#ifndef _SYS_SYSPROTO_H_
699struct __mac_execve_args {
700 char *fname;
701 char **argv;
702 char **envv;
703 struct mac *mac_p;
704};
705#endif
706
707/*
708 * MPSAFE
709 */
710int
711__mac_execve(td, uap)
712 struct thread *td;
713 struct __mac_execve_args /* {
714 char *fname;
715 char **argv;
716 char **envv;
717 struct mac *mac_p;
718 } */ *uap;
719{
720
721#ifdef MAC
722 return (kern_execve(td, uap->fname, uap->argv, uap->envv,
723 uap->mac_p));
724#else
725 return (ENOSYS);
726#endif
727}
728
729int
730exec_map_first_page(imgp)
731 struct image_params *imgp;
732{
733 int rv, i;
734 int initial_pagein;
735 vm_page_t ma[VM_INITIAL_PAGEIN];
736 vm_object_t object;
737
738 GIANT_REQUIRED;
739
740 if (imgp->firstpage) {
741 exec_unmap_first_page(imgp);
742 }
743
744 VOP_GETVOBJECT(imgp->vp, &object);
745 VM_OBJECT_LOCK(object);
746 ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
747 if ((ma[0]->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
748 initial_pagein = VM_INITIAL_PAGEIN;
749 if (initial_pagein > object->size)
750 initial_pagein = object->size;
751 for (i = 1; i < initial_pagein; i++) {
752 if ((ma[i] = vm_page_lookup(object, i)) != NULL) {
753 if (ma[i]->valid)
754 break;
755 vm_page_lock_queues();
756 if ((ma[i]->flags & PG_BUSY) || ma[i]->busy) {
757 vm_page_unlock_queues();
758 break;
759 }
760 vm_page_busy(ma[i]);
761 vm_page_unlock_queues();
762 } else {
763 ma[i] = vm_page_alloc(object, i,
764 VM_ALLOC_NORMAL);
765 if (ma[i] == NULL)
766 break;
767 }
768 }
769 initial_pagein = i;
770 rv = vm_pager_get_pages(object, ma, initial_pagein, 0);
771 ma[0] = vm_page_lookup(object, 0);
772 if ((rv != VM_PAGER_OK) || (ma[0] == NULL) ||
773 (ma[0]->valid == 0)) {
774 if (ma[0]) {
775 vm_page_lock_queues();
776 pmap_remove_all(ma[0]);
777 vm_page_free(ma[0]);
778 vm_page_unlock_queues();
779 }
780 VM_OBJECT_UNLOCK(object);
781 return (EIO);
782 }
783 }
784 vm_page_lock_queues();
785 vm_page_wire(ma[0]);
786 vm_page_wakeup(ma[0]);
787 vm_page_unlock_queues();
788 VM_OBJECT_UNLOCK(object);
789
790 pmap_qenter((vm_offset_t)imgp->image_header, ma, 1);
791 imgp->firstpage = ma[0];
792
793 return (0);
794}
795
796void
797exec_unmap_first_page(imgp)
798 struct image_params *imgp;
799{
800 GIANT_REQUIRED;
801
802 if (imgp->firstpage) {
803 pmap_qremove((vm_offset_t)imgp->image_header, 1);
804 vm_page_lock_queues();
805 vm_page_unwire(imgp->firstpage, 1);
806 vm_page_unlock_queues();
807 imgp->firstpage = NULL;
808 }
809}
810
811/*
812 * Destroy old address space, and allocate a new stack
813 * The new stack is only SGROWSIZ large because it is grown
814 * automatically in trap.c.
815 */
816int
817exec_new_vmspace(imgp, sv)
818 struct image_params *imgp;
819 struct sysentvec *sv;
820{
821 int error;
822 struct proc *p = imgp->proc;
823 struct vmspace *vmspace = p->p_vmspace;
824 vm_offset_t stack_addr;
825 vm_map_t map;
826
827 GIANT_REQUIRED;
828
829 imgp->vmspace_destroyed = 1;
830
831 EVENTHANDLER_INVOKE(process_exec, p);
832
833 /*
834 * Here is as good a place as any to do any resource limit cleanups.
835 * This is needed if a 64 bit binary exec's a 32 bit binary - the
836 * data size limit may need to be changed to a value that makes
837 * sense for the 32 bit binary.
838 */
839 if (sv->sv_fixlimits)
840 sv->sv_fixlimits(imgp);
841
842 /*
843 * Blow away entire process VM, if address space not shared,
844 * otherwise, create a new VM space so that other threads are
845 * not disrupted
846 */
847 map = &vmspace->vm_map;
848 if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv->sv_minuser &&
849 vm_map_max(map) == sv->sv_maxuser) {
850 shmexit(vmspace);
851 vm_page_lock_queues();
852 pmap_remove_pages(vmspace_pmap(vmspace), vm_map_min(map),
853 vm_map_max(map));
854 vm_page_unlock_queues();
855 vm_map_remove(map, vm_map_min(map), vm_map_max(map));
856 } else {
857 vmspace_exec(p, sv->sv_minuser, sv->sv_maxuser);
858 vmspace = p->p_vmspace;
859 map = &vmspace->vm_map;
860 }
861
862 /* Allocate a new stack */
863 stack_addr = sv->sv_usrstack - maxssiz;
864 error = vm_map_stack(map, stack_addr, (vm_size_t)maxssiz,
865 sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
866 if (error)
867 return (error);
868
869#ifdef __ia64__
870 /* Allocate a new register stack */
871 stack_addr = sv->sv_usrstack - 2 * maxssiz;
871 stack_addr = IA64_BACKINGSTORE;
872 error = vm_map_stack(map, stack_addr, (vm_size_t)maxssiz,
873 sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_UP);
874 if (error)
875 return (error);
876 FIRST_THREAD_IN_PROC(p)->td_md.md_bspstore = stack_addr;
877#endif
878
879 /* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the
880 * VM_STACK case, but they are still used to monitor the size of the
881 * process stack so we can check the stack rlimit.
882 */
883 vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
884 vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - maxssiz;
885
886 return (0);
887}
888
889/*
890 * Copy out argument and environment strings from the old process
891 * address space into the temporary string buffer.
892 */
893int
894exec_extract_strings(imgp)
895 struct image_params *imgp;
896{
897 char **argv, **envv;
898 char *argp, *envp;
899 int error;
900 size_t length;
901
902 /*
903 * extract arguments first
904 */
905
906 argv = imgp->userspace_argv;
907
908 if (argv) {
909 argp = (caddr_t)(intptr_t)fuword(argv);
910 if (argp == (caddr_t)-1)
911 return (EFAULT);
912 if (argp)
913 argv++;
914 if (imgp->argv0)
915 argp = imgp->argv0;
916 if (argp) {
917 do {
918 if (argp == (caddr_t)-1)
919 return (EFAULT);
920 if ((error = copyinstr(argp, imgp->stringp,
921 imgp->stringspace, &length))) {
922 if (error == ENAMETOOLONG)
923 return (E2BIG);
924 return (error);
925 }
926 imgp->stringspace -= length;
927 imgp->stringp += length;
928 imgp->argc++;
929 } while ((argp = (caddr_t)(intptr_t)fuword(argv++)));
930 }
931 }
932
933 imgp->endargs = imgp->stringp;
934
935 /*
936 * extract environment strings
937 */
938
939 envv = imgp->userspace_envv;
940
941 if (envv) {
942 while ((envp = (caddr_t)(intptr_t)fuword(envv++))) {
943 if (envp == (caddr_t)-1)
944 return (EFAULT);
945 if ((error = copyinstr(envp, imgp->stringp,
946 imgp->stringspace, &length))) {
947 if (error == ENAMETOOLONG)
948 return (E2BIG);
949 return (error);
950 }
951 imgp->stringspace -= length;
952 imgp->stringp += length;
953 imgp->envc++;
954 }
955 }
956
957 return (0);
958}
959
960/*
961 * Copy strings out to the new process address space, constructing
962 * new arg and env vector tables. Return a pointer to the base
963 * so that it can be used as the initial stack pointer.
964 */
965register_t *
966exec_copyout_strings(imgp)
967 struct image_params *imgp;
968{
969 int argc, envc;
970 char **vectp;
971 char *stringp, *destp;
972 register_t *stack_base;
973 struct ps_strings *arginfo;
974 struct proc *p;
975 int szsigcode;
976
977 /*
978 * Calculate string base and vector table pointers.
979 * Also deal with signal trampoline code for this exec type.
980 */
981 p = imgp->proc;
982 szsigcode = 0;
983 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
984 if (p->p_sysent->sv_szsigcode != NULL)
985 szsigcode = *(p->p_sysent->sv_szsigcode);
986 destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE -
987 roundup((ARG_MAX - imgp->stringspace), sizeof(char *));
988
989 /*
990 * install sigcode
991 */
992 if (szsigcode)
993 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
994 szsigcode), szsigcode);
995
996 /*
997 * If we have a valid auxargs ptr, prepare some room
998 * on the stack.
999 */
1000 if (imgp->auxargs) {
1001 /*
1002 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
1003 * lower compatibility.
1004 */
1005 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
1006 (AT_COUNT * 2);
1007 /*
1008 * The '+ 2' is for the null pointers at the end of each of
1009 * the arg and env vector sets,and imgp->auxarg_size is room
1010 * for argument of Runtime loader.
1011 */
1012 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2 +
1013 imgp->auxarg_size) * sizeof(char *));
1014
1015 } else
1016 /*
1017 * The '+ 2' is for the null pointers at the end of each of
1018 * the arg and env vector sets
1019 */
1020 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2) *
1021 sizeof(char *));
1022
1023 /*
1024 * vectp also becomes our initial stack base
1025 */
1026 stack_base = (register_t *)vectp;
1027
1028 stringp = imgp->stringbase;
1029 argc = imgp->argc;
1030 envc = imgp->envc;
1031
1032 /*
1033 * Copy out strings - arguments and environment.
1034 */
1035 copyout(stringp, destp, ARG_MAX - imgp->stringspace);
1036
1037 /*
1038 * Fill in "ps_strings" struct for ps, w, etc.
1039 */
1040 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
1041 suword(&arginfo->ps_nargvstr, argc);
1042
1043 /*
1044 * Fill in argument portion of vector table.
1045 */
1046 for (; argc > 0; --argc) {
1047 suword(vectp++, (long)(intptr_t)destp);
1048 while (*stringp++ != 0)
1049 destp++;
1050 destp++;
1051 }
1052
1053 /* a null vector table pointer separates the argp's from the envp's */
1054 suword(vectp++, 0);
1055
1056 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
1057 suword(&arginfo->ps_nenvstr, envc);
1058
1059 /*
1060 * Fill in environment portion of vector table.
1061 */
1062 for (; envc > 0; --envc) {
1063 suword(vectp++, (long)(intptr_t)destp);
1064 while (*stringp++ != 0)
1065 destp++;
1066 destp++;
1067 }
1068
1069 /* end of vector table is a null pointer */
1070 suword(vectp, 0);
1071
1072 return (stack_base);
1073}
1074
1075/*
1076 * Check permissions of file to execute.
1077 * Called with imgp->vp locked.
1078 * Return 0 for success or error code on failure.
1079 */
1080int
1081exec_check_permissions(imgp)
1082 struct image_params *imgp;
1083{
1084 struct vnode *vp = imgp->vp;
1085 struct vattr *attr = imgp->attr;
1086 struct thread *td;
1087 int error;
1088
1089 td = curthread; /* XXXKSE */
1090
1091 /* Get file attributes */
1092 error = VOP_GETATTR(vp, attr, td->td_ucred, td);
1093 if (error)
1094 return (error);
1095
1096#ifdef MAC
1097 error = mac_check_vnode_exec(td->td_ucred, imgp->vp, imgp);
1098 if (error)
1099 return (error);
1100#endif
1101
1102 /*
1103 * 1) Check if file execution is disabled for the filesystem that this
1104 * file resides on.
1105 * 2) Insure that at least one execute bit is on - otherwise root
1106 * will always succeed, and we don't want to happen unless the
1107 * file really is executable.
1108 * 3) Insure that the file is a regular file.
1109 */
1110 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
1111 ((attr->va_mode & 0111) == 0) ||
1112 (attr->va_type != VREG))
1113 return (EACCES);
1114
1115 /*
1116 * Zero length files can't be exec'd
1117 */
1118 if (attr->va_size == 0)
1119 return (ENOEXEC);
1120
1121 /*
1122 * Check for execute permission to file based on current credentials.
1123 */
1124 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1125 if (error)
1126 return (error);
1127
1128 /*
1129 * Check number of open-for-writes on the file and deny execution
1130 * if there are any.
1131 */
1132 if (vp->v_writecount)
1133 return (ETXTBSY);
1134
1135 /*
1136 * Call filesystem specific open routine (which does nothing in the
1137 * general case).
1138 */
1139 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1);
1140 return (error);
1141}
1142
1143/*
1144 * Exec handler registration
1145 */
1146int
1147exec_register(execsw_arg)
1148 const struct execsw *execsw_arg;
1149{
1150 const struct execsw **es, **xs, **newexecsw;
1151 int count = 2; /* New slot and trailing NULL */
1152
1153 if (execsw)
1154 for (es = execsw; *es; es++)
1155 count++;
1156 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
1157 if (newexecsw == NULL)
1158 return (ENOMEM);
1159 xs = newexecsw;
1160 if (execsw)
1161 for (es = execsw; *es; es++)
1162 *xs++ = *es;
1163 *xs++ = execsw_arg;
1164 *xs = NULL;
1165 if (execsw)
1166 free(execsw, M_TEMP);
1167 execsw = newexecsw;
1168 return (0);
1169}
1170
1171int
1172exec_unregister(execsw_arg)
1173 const struct execsw *execsw_arg;
1174{
1175 const struct execsw **es, **xs, **newexecsw;
1176 int count = 1;
1177
1178 if (execsw == NULL)
1179 panic("unregister with no handlers left?\n");
1180
1181 for (es = execsw; *es; es++) {
1182 if (*es == execsw_arg)
1183 break;
1184 }
1185 if (*es == NULL)
1186 return (ENOENT);
1187 for (es = execsw; *es; es++)
1188 if (*es != execsw_arg)
1189 count++;
1190 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
1191 if (newexecsw == NULL)
1192 return (ENOMEM);
1193 xs = newexecsw;
1194 for (es = execsw; *es; es++)
1195 if (*es != execsw_arg)
1196 *xs++ = *es;
1197 *xs = NULL;
1198 if (execsw)
1199 free(execsw, M_TEMP);
1200 execsw = newexecsw;
1201 return (0);
1202}
872 error = vm_map_stack(map, stack_addr, (vm_size_t)maxssiz,
873 sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_UP);
874 if (error)
875 return (error);
876 FIRST_THREAD_IN_PROC(p)->td_md.md_bspstore = stack_addr;
877#endif
878
879 /* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the
880 * VM_STACK case, but they are still used to monitor the size of the
881 * process stack so we can check the stack rlimit.
882 */
883 vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
884 vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - maxssiz;
885
886 return (0);
887}
888
889/*
890 * Copy out argument and environment strings from the old process
891 * address space into the temporary string buffer.
892 */
893int
894exec_extract_strings(imgp)
895 struct image_params *imgp;
896{
897 char **argv, **envv;
898 char *argp, *envp;
899 int error;
900 size_t length;
901
902 /*
903 * extract arguments first
904 */
905
906 argv = imgp->userspace_argv;
907
908 if (argv) {
909 argp = (caddr_t)(intptr_t)fuword(argv);
910 if (argp == (caddr_t)-1)
911 return (EFAULT);
912 if (argp)
913 argv++;
914 if (imgp->argv0)
915 argp = imgp->argv0;
916 if (argp) {
917 do {
918 if (argp == (caddr_t)-1)
919 return (EFAULT);
920 if ((error = copyinstr(argp, imgp->stringp,
921 imgp->stringspace, &length))) {
922 if (error == ENAMETOOLONG)
923 return (E2BIG);
924 return (error);
925 }
926 imgp->stringspace -= length;
927 imgp->stringp += length;
928 imgp->argc++;
929 } while ((argp = (caddr_t)(intptr_t)fuword(argv++)));
930 }
931 }
932
933 imgp->endargs = imgp->stringp;
934
935 /*
936 * extract environment strings
937 */
938
939 envv = imgp->userspace_envv;
940
941 if (envv) {
942 while ((envp = (caddr_t)(intptr_t)fuword(envv++))) {
943 if (envp == (caddr_t)-1)
944 return (EFAULT);
945 if ((error = copyinstr(envp, imgp->stringp,
946 imgp->stringspace, &length))) {
947 if (error == ENAMETOOLONG)
948 return (E2BIG);
949 return (error);
950 }
951 imgp->stringspace -= length;
952 imgp->stringp += length;
953 imgp->envc++;
954 }
955 }
956
957 return (0);
958}
959
960/*
961 * Copy strings out to the new process address space, constructing
962 * new arg and env vector tables. Return a pointer to the base
963 * so that it can be used as the initial stack pointer.
964 */
965register_t *
966exec_copyout_strings(imgp)
967 struct image_params *imgp;
968{
969 int argc, envc;
970 char **vectp;
971 char *stringp, *destp;
972 register_t *stack_base;
973 struct ps_strings *arginfo;
974 struct proc *p;
975 int szsigcode;
976
977 /*
978 * Calculate string base and vector table pointers.
979 * Also deal with signal trampoline code for this exec type.
980 */
981 p = imgp->proc;
982 szsigcode = 0;
983 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
984 if (p->p_sysent->sv_szsigcode != NULL)
985 szsigcode = *(p->p_sysent->sv_szsigcode);
986 destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE -
987 roundup((ARG_MAX - imgp->stringspace), sizeof(char *));
988
989 /*
990 * install sigcode
991 */
992 if (szsigcode)
993 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
994 szsigcode), szsigcode);
995
996 /*
997 * If we have a valid auxargs ptr, prepare some room
998 * on the stack.
999 */
1000 if (imgp->auxargs) {
1001 /*
1002 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
1003 * lower compatibility.
1004 */
1005 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
1006 (AT_COUNT * 2);
1007 /*
1008 * The '+ 2' is for the null pointers at the end of each of
1009 * the arg and env vector sets,and imgp->auxarg_size is room
1010 * for argument of Runtime loader.
1011 */
1012 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2 +
1013 imgp->auxarg_size) * sizeof(char *));
1014
1015 } else
1016 /*
1017 * The '+ 2' is for the null pointers at the end of each of
1018 * the arg and env vector sets
1019 */
1020 vectp = (char **)(destp - (imgp->argc + imgp->envc + 2) *
1021 sizeof(char *));
1022
1023 /*
1024 * vectp also becomes our initial stack base
1025 */
1026 stack_base = (register_t *)vectp;
1027
1028 stringp = imgp->stringbase;
1029 argc = imgp->argc;
1030 envc = imgp->envc;
1031
1032 /*
1033 * Copy out strings - arguments and environment.
1034 */
1035 copyout(stringp, destp, ARG_MAX - imgp->stringspace);
1036
1037 /*
1038 * Fill in "ps_strings" struct for ps, w, etc.
1039 */
1040 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
1041 suword(&arginfo->ps_nargvstr, argc);
1042
1043 /*
1044 * Fill in argument portion of vector table.
1045 */
1046 for (; argc > 0; --argc) {
1047 suword(vectp++, (long)(intptr_t)destp);
1048 while (*stringp++ != 0)
1049 destp++;
1050 destp++;
1051 }
1052
1053 /* a null vector table pointer separates the argp's from the envp's */
1054 suword(vectp++, 0);
1055
1056 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
1057 suword(&arginfo->ps_nenvstr, envc);
1058
1059 /*
1060 * Fill in environment portion of vector table.
1061 */
1062 for (; envc > 0; --envc) {
1063 suword(vectp++, (long)(intptr_t)destp);
1064 while (*stringp++ != 0)
1065 destp++;
1066 destp++;
1067 }
1068
1069 /* end of vector table is a null pointer */
1070 suword(vectp, 0);
1071
1072 return (stack_base);
1073}
1074
1075/*
1076 * Check permissions of file to execute.
1077 * Called with imgp->vp locked.
1078 * Return 0 for success or error code on failure.
1079 */
1080int
1081exec_check_permissions(imgp)
1082 struct image_params *imgp;
1083{
1084 struct vnode *vp = imgp->vp;
1085 struct vattr *attr = imgp->attr;
1086 struct thread *td;
1087 int error;
1088
1089 td = curthread; /* XXXKSE */
1090
1091 /* Get file attributes */
1092 error = VOP_GETATTR(vp, attr, td->td_ucred, td);
1093 if (error)
1094 return (error);
1095
1096#ifdef MAC
1097 error = mac_check_vnode_exec(td->td_ucred, imgp->vp, imgp);
1098 if (error)
1099 return (error);
1100#endif
1101
1102 /*
1103 * 1) Check if file execution is disabled for the filesystem that this
1104 * file resides on.
1105 * 2) Insure that at least one execute bit is on - otherwise root
1106 * will always succeed, and we don't want to happen unless the
1107 * file really is executable.
1108 * 3) Insure that the file is a regular file.
1109 */
1110 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
1111 ((attr->va_mode & 0111) == 0) ||
1112 (attr->va_type != VREG))
1113 return (EACCES);
1114
1115 /*
1116 * Zero length files can't be exec'd
1117 */
1118 if (attr->va_size == 0)
1119 return (ENOEXEC);
1120
1121 /*
1122 * Check for execute permission to file based on current credentials.
1123 */
1124 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1125 if (error)
1126 return (error);
1127
1128 /*
1129 * Check number of open-for-writes on the file and deny execution
1130 * if there are any.
1131 */
1132 if (vp->v_writecount)
1133 return (ETXTBSY);
1134
1135 /*
1136 * Call filesystem specific open routine (which does nothing in the
1137 * general case).
1138 */
1139 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1);
1140 return (error);
1141}
1142
1143/*
1144 * Exec handler registration
1145 */
1146int
1147exec_register(execsw_arg)
1148 const struct execsw *execsw_arg;
1149{
1150 const struct execsw **es, **xs, **newexecsw;
1151 int count = 2; /* New slot and trailing NULL */
1152
1153 if (execsw)
1154 for (es = execsw; *es; es++)
1155 count++;
1156 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
1157 if (newexecsw == NULL)
1158 return (ENOMEM);
1159 xs = newexecsw;
1160 if (execsw)
1161 for (es = execsw; *es; es++)
1162 *xs++ = *es;
1163 *xs++ = execsw_arg;
1164 *xs = NULL;
1165 if (execsw)
1166 free(execsw, M_TEMP);
1167 execsw = newexecsw;
1168 return (0);
1169}
1170
1171int
1172exec_unregister(execsw_arg)
1173 const struct execsw *execsw_arg;
1174{
1175 const struct execsw **es, **xs, **newexecsw;
1176 int count = 1;
1177
1178 if (execsw == NULL)
1179 panic("unregister with no handlers left?\n");
1180
1181 for (es = execsw; *es; es++) {
1182 if (*es == execsw_arg)
1183 break;
1184 }
1185 if (*es == NULL)
1186 return (ENOENT);
1187 for (es = execsw; *es; es++)
1188 if (*es != execsw_arg)
1189 count++;
1190 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
1191 if (newexecsw == NULL)
1192 return (ENOMEM);
1193 xs = newexecsw;
1194 for (es = execsw; *es; es++)
1195 if (*es != execsw_arg)
1196 *xs++ = *es;
1197 *xs = NULL;
1198 if (execsw)
1199 free(execsw, M_TEMP);
1200 execsw = newexecsw;
1201 return (0);
1202}