kern_sharedpage.c revision 93240
1/*
2 * Copyright (c) 1993, David Greenman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/kern/kern_exec.c 93240 2002-03-26 19:20:04Z alc $
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/lock.h>
32#include <sys/mutex.h>
33#include <sys/sysproto.h>
34#include <sys/signalvar.h>
35#include <sys/kernel.h>
36#include <sys/mount.h>
37#include <sys/filedesc.h>
38#include <sys/fcntl.h>
39#include <sys/acct.h>
40#include <sys/exec.h>
41#include <sys/imgact.h>
42#include <sys/imgact_elf.h>
43#include <sys/wait.h>
44#include <sys/malloc.h>
45#include <sys/proc.h>
46#include <sys/pioctl.h>
47#include <sys/namei.h>
48#include <sys/sysent.h>
49#include <sys/shm.h>
50#include <sys/sysctl.h>
51#include <sys/user.h>
52#include <sys/vnode.h>
53
54#include <vm/vm.h>
55#include <vm/vm_param.h>
56#include <vm/pmap.h>
57#include <vm/vm_page.h>
58#include <vm/vm_map.h>
59#include <vm/vm_kern.h>
60#include <vm/vm_extern.h>
61#include <vm/vm_object.h>
62#include <vm/vm_pager.h>
63
64#include <machine/reg.h>
65
66MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
67
68static MALLOC_DEFINE(M_ATEXEC, "atexec", "atexec callback");
69
70/*
71 * callout list for things to do at exec time
72 */
73struct execlist {
74	execlist_fn function;
75	TAILQ_ENTRY(execlist) next;
76};
77
78TAILQ_HEAD(exec_list_head, execlist);
79static struct exec_list_head exec_list = TAILQ_HEAD_INITIALIZER(exec_list);
80
81static register_t *exec_copyout_strings(struct image_params *);
82
83/* XXX This should be vm_size_t. */
84static u_long ps_strings = PS_STRINGS;
85SYSCTL_ULONG(_kern, KERN_PS_STRINGS, ps_strings, CTLFLAG_RD, &ps_strings, 0, "");
86
87/* XXX This should be vm_size_t. */
88static u_long usrstack = USRSTACK;
89SYSCTL_ULONG(_kern, KERN_USRSTACK, usrstack, CTLFLAG_RD, &usrstack, 0, "");
90
91u_long ps_arg_cache_limit = PAGE_SIZE / 16;
92SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW,
93    &ps_arg_cache_limit, 0, "");
94
95int ps_argsopen = 1;
96SYSCTL_INT(_kern, OID_AUTO, ps_argsopen, CTLFLAG_RW, &ps_argsopen, 0, "");
97
98/*
99 * Each of the items is a pointer to a `const struct execsw', hence the
100 * double pointer here.
101 */
102static const struct execsw **execsw;
103
104#ifndef _SYS_SYSPROTO_H_
105struct execve_args {
106        char    *fname;
107        char    **argv;
108        char    **envv;
109};
110#endif
111
112/*
113 * execve() system call.
114 *
115 * MPSAFE
116 */
117int
118execve(td, uap)
119	struct thread *td;
120	register struct execve_args *uap;
121{
122	struct proc *p = td->td_proc;
123	struct nameidata nd, *ndp;
124	struct ucred *newcred, *oldcred;
125	register_t *stack_base;
126	int error, len, i;
127	struct image_params image_params, *imgp;
128	struct vattr attr;
129	int (*img_first)(struct image_params *);
130	struct pargs *pa;
131
132	imgp = &image_params;
133
134	/*
135	 * Lock the process and set the P_INEXEC flag to indicate that
136	 * it should be left alone until we're done here.  This is
137	 * necessary to avoid race conditions - e.g. in ptrace() -
138	 * that might allow a local user to illicitly obtain elevated
139	 * privileges.
140	 */
141	mtx_lock(&Giant);
142	PROC_LOCK(p);
143	KASSERT((p->p_flag & P_INEXEC) == 0,
144	    ("%s(): process already has P_INEXEC flag", __func__));
145	p->p_flag |= P_INEXEC;
146	PROC_UNLOCK(p);
147
148/* XXXKSE */
149/* !!!!!!!! we need abort all the other threads of this process before we */
150/* proceed beyond his point! */
151
152	/*
153	 * Initialize part of the common data
154	 */
155	imgp->proc = p;
156	imgp->uap = uap;
157	imgp->attr = &attr;
158	imgp->argc = imgp->envc = 0;
159	imgp->argv0 = NULL;
160	imgp->entry_addr = 0;
161	imgp->vmspace_destroyed = 0;
162	imgp->interpreted = 0;
163	imgp->interpreter_name[0] = '\0';
164	imgp->auxargs = NULL;
165	imgp->vp = NULL;
166	imgp->firstpage = NULL;
167	imgp->ps_strings = 0;
168	imgp->auxarg_size = 0;
169
170	/*
171	 * Allocate temporary demand zeroed space for argument and
172	 *	environment strings
173	 */
174	imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX + PAGE_SIZE);
175	if (imgp->stringbase == NULL) {
176		error = ENOMEM;
177		goto exec_fail;
178	}
179	imgp->stringp = imgp->stringbase;
180	imgp->stringspace = ARG_MAX;
181	imgp->image_header = imgp->stringbase + ARG_MAX;
182
183	/*
184	 * Translate the file name. namei() returns a vnode pointer
185	 *	in ni_vp amoung other things.
186	 */
187	ndp = &nd;
188	NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
189	    UIO_USERSPACE, uap->fname, td);
190
191interpret:
192
193	error = namei(ndp);
194	if (error) {
195		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
196			ARG_MAX + PAGE_SIZE);
197		goto exec_fail;
198	}
199
200	imgp->vp = ndp->ni_vp;
201	imgp->fname = uap->fname;
202
203	/*
204	 * Check file permissions (also 'opens' file)
205	 */
206	error = exec_check_permissions(imgp);
207	if (error) {
208		VOP_UNLOCK(imgp->vp, 0, td);
209		goto exec_fail_dealloc;
210	}
211
212	error = exec_map_first_page(imgp);
213	VOP_UNLOCK(imgp->vp, 0, td);
214	if (error)
215		goto exec_fail_dealloc;
216
217	/*
218	 *	If the current process has a special image activator it
219	 *	wants to try first, call it.   For example, emulating shell
220	 *	scripts differently.
221	 */
222	error = -1;
223	if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
224		error = img_first(imgp);
225
226	/*
227	 *	Loop through the list of image activators, calling each one.
228	 *	An activator returns -1 if there is no match, 0 on success,
229	 *	and an error otherwise.
230	 */
231	for (i = 0; error == -1 && execsw[i]; ++i) {
232		if (execsw[i]->ex_imgact == NULL ||
233		    execsw[i]->ex_imgact == img_first) {
234			continue;
235		}
236		error = (*execsw[i]->ex_imgact)(imgp);
237	}
238
239	if (error) {
240		if (error == -1)
241			error = ENOEXEC;
242		goto exec_fail_dealloc;
243	}
244
245	/*
246	 * Special interpreter operation, cleanup and loop up to try to
247	 * activate the interpreter.
248	 */
249	if (imgp->interpreted) {
250		exec_unmap_first_page(imgp);
251		/* free name buffer and old vnode */
252		NDFREE(ndp, NDF_ONLY_PNBUF);
253		vrele(ndp->ni_vp);
254		/* set new name to that of the interpreter */
255		NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
256		    UIO_SYSSPACE, imgp->interpreter_name, td);
257		goto interpret;
258	}
259
260	/*
261	 * Copy out strings (args and env) and initialize stack base
262	 */
263	stack_base = exec_copyout_strings(imgp);
264	p->p_vmspace->vm_minsaddr = (char *)stack_base;
265
266	/*
267	 * If custom stack fixup routine present for this process
268	 * let it do the stack setup.
269	 * Else stuff argument count as first item on stack
270	 */
271	if (p->p_sysent->sv_fixup)
272		(*p->p_sysent->sv_fixup)(&stack_base, imgp);
273	else
274		suword(--stack_base, imgp->argc);
275
276	/*
277	 * For security and other reasons, the file descriptor table cannot
278	 * be shared after an exec.
279	 */
280	FILEDESC_LOCK(p->p_fd);
281	if (p->p_fd->fd_refcnt > 1) {
282		struct filedesc *tmp;
283
284		tmp = fdcopy(td);
285		FILEDESC_UNLOCK(p->p_fd);
286		fdfree(td);
287		p->p_fd = tmp;
288	} else
289		FILEDESC_UNLOCK(p->p_fd);
290
291	/*
292	 * For security and other reasons, signal handlers cannot
293	 * be shared after an exec. The new process gets a copy of the old
294	 * handlers. In execsigs(), the new process will have its signals
295	 * reset.
296	 */
297	if (p->p_procsig->ps_refcnt > 1) {
298		struct procsig *newprocsig;
299
300		MALLOC(newprocsig, struct procsig *, sizeof(struct procsig),
301		       M_SUBPROC, M_WAITOK);
302		bcopy(p->p_procsig, newprocsig, sizeof(*newprocsig));
303		p->p_procsig->ps_refcnt--;
304		p->p_procsig = newprocsig;
305		p->p_procsig->ps_refcnt = 1;
306		if (p->p_sigacts == &p->p_uarea->u_sigacts)
307			panic("shared procsig but private sigacts?");
308
309		p->p_uarea->u_sigacts = *p->p_sigacts;
310		p->p_sigacts = &p->p_uarea->u_sigacts;
311	}
312	/* Stop profiling */
313	stopprofclock(p);
314
315	/* close files on exec */
316	fdcloseexec(td);
317
318	/* reset caught signals */
319	execsigs(p);
320
321	/* name this process - nameiexec(p, ndp) */
322	len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN);
323	bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len);
324	p->p_comm[len] = 0;
325
326	/*
327	 * mark as execed, wakeup the process that vforked (if any) and tell
328	 * it that it now has its own resources back
329	 */
330	PROC_LOCK(p);
331	p->p_flag |= P_EXEC;
332	if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
333		p->p_flag &= ~P_PPWAIT;
334		wakeup((caddr_t)p->p_pptr);
335	}
336
337	/*
338	 * Implement image setuid/setgid.
339	 *
340	 * Don't honor setuid/setgid if the filesystem prohibits it or if
341	 * the process is being traced.
342	 */
343	oldcred = p->p_ucred;
344	newcred = NULL;
345	if ((((attr.va_mode & VSUID) && oldcred->cr_uid != attr.va_uid) ||
346	     ((attr.va_mode & VSGID) && oldcred->cr_gid != attr.va_gid)) &&
347	    (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
348	    (p->p_flag & P_TRACED) == 0) {
349		PROC_UNLOCK(p);
350		/*
351		 * Turn off syscall tracing for set-id programs, except for
352		 * root.  Record any set-id flags first to make sure that
353		 * we do not regain any tracing during a possible block.
354		 */
355		setsugid(p);
356		if (p->p_tracep && suser_xxx(oldcred, NULL, PRISON_ROOT)) {
357			struct vnode *vtmp;
358
359			if ((vtmp = p->p_tracep) != NULL) {
360				p->p_tracep = NULL;
361				p->p_traceflag = 0;
362				vrele(vtmp);
363			}
364		}
365		/*
366		 * Set the new credentials.
367		 */
368		newcred = crdup(oldcred);
369		if (attr.va_mode & VSUID)
370			change_euid(newcred, attr.va_uid);
371		if (attr.va_mode & VSGID)
372			change_egid(newcred, attr.va_gid);
373		setugidsafety(td);
374	} else {
375		if (oldcred->cr_uid == oldcred->cr_ruid &&
376		    oldcred->cr_gid == oldcred->cr_rgid)
377			p->p_flag &= ~P_SUGID;
378		PROC_UNLOCK(p);
379	}
380
381	/*
382	 * Implement correct POSIX saved-id behavior.
383	 *
384	 * XXX: It's not clear that the existing behavior is
385	 * POSIX-compliant.  A number of sources indicate that the saved
386	 * uid/gid should only be updated if the new ruid is not equal to
387	 * the old ruid, or the new euid is not equal to the old euid and
388	 * the new euid is not equal to the old ruid.  The FreeBSD code
389	 * always updates the saved uid/gid.  Also, this code uses the new
390	 * (replaced) euid and egid as the source, which may or may not be
391	 * the right ones to use.
392	 */
393	if (newcred == NULL) {
394		if (oldcred->cr_svuid != oldcred->cr_uid ||
395		    oldcred->cr_svgid != oldcred->cr_gid) {
396			newcred = crdup(oldcred);
397			change_svuid(newcred, newcred->cr_uid);
398			change_svgid(newcred, newcred->cr_gid);
399		}
400	} else {
401		change_svuid(newcred, newcred->cr_uid);
402		change_svgid(newcred, newcred->cr_gid);
403	}
404
405	if (newcred != NULL) {
406		PROC_LOCK(p);
407		p->p_ucred = newcred;
408		PROC_UNLOCK(p);
409		crfree(oldcred);
410	}
411
412	/*
413	 * Store the vp for use in procfs
414	 */
415	if (p->p_textvp)		/* release old reference */
416		vrele(p->p_textvp);
417	VREF(ndp->ni_vp);
418	p->p_textvp = ndp->ni_vp;
419
420	/*
421	 * Notify others that we exec'd, and clear the P_INEXEC flag
422	 * as we're now a bona fide freshly-execed process.
423	 */
424	PROC_LOCK(p);
425	KNOTE(&p->p_klist, NOTE_EXEC);
426	p->p_flag &= ~P_INEXEC;
427
428	/*
429	 * If tracing the process, trap to debugger so breakpoints
430	 * can be set before the program executes.
431	 */
432	_STOPEVENT(p, S_EXEC, 0);
433
434	if (p->p_flag & P_TRACED)
435		psignal(p, SIGTRAP);
436
437	/* clear "fork but no exec" flag, as we _are_ execing */
438	p->p_acflag &= ~AFORK;
439
440	/* Free any previous argument cache */
441	pa = p->p_args;
442	p->p_args = NULL;
443	PROC_UNLOCK(p);
444	if (pa != NULL && --pa->ar_ref == 0)
445		FREE(pa, M_PARGS);
446
447	/* Set values passed into the program in registers. */
448	setregs(td, imgp->entry_addr, (u_long)(uintptr_t)stack_base,
449	    imgp->ps_strings);
450
451	/* Cache arguments if they fit inside our allowance */
452	i = imgp->endargs - imgp->stringbase;
453	if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
454		MALLOC(pa, struct pargs *, sizeof(struct pargs) + i,
455		    M_PARGS, M_WAITOK);
456		pa->ar_ref = 1;
457		pa->ar_length = i;
458		bcopy(imgp->stringbase, pa->ar_args, i);
459		PROC_LOCK(p);
460		p->p_args = pa;
461		PROC_UNLOCK(p);
462	}
463
464exec_fail_dealloc:
465
466	/*
467	 * free various allocated resources
468	 */
469	if (imgp->firstpage)
470		exec_unmap_first_page(imgp);
471
472	if (imgp->stringbase != NULL)
473		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase,
474			ARG_MAX + PAGE_SIZE);
475
476	if (imgp->vp) {
477		NDFREE(ndp, NDF_ONLY_PNBUF);
478		vrele(imgp->vp);
479	}
480
481	if (error == 0)
482		goto done2;
483
484exec_fail:
485	/* we're done here, clear P_INEXEC */
486	PROC_LOCK(p);
487	p->p_flag &= ~P_INEXEC;
488	PROC_UNLOCK(p);
489
490	if (imgp->vmspace_destroyed) {
491		/* sorry, no more process anymore. exit gracefully */
492		exit1(td, W_EXITCODE(0, SIGABRT));
493		/* NOT REACHED */
494		error = 0;
495	}
496done2:
497	mtx_unlock(&Giant);
498	return (error);
499}
500
501int
502exec_map_first_page(imgp)
503	struct image_params *imgp;
504{
505	int rv, i;
506	int initial_pagein;
507	vm_page_t ma[VM_INITIAL_PAGEIN];
508	vm_object_t object;
509
510	GIANT_REQUIRED;
511
512	if (imgp->firstpage) {
513		exec_unmap_first_page(imgp);
514	}
515
516	VOP_GETVOBJECT(imgp->vp, &object);
517
518	ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
519
520	if ((ma[0]->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
521		initial_pagein = VM_INITIAL_PAGEIN;
522		if (initial_pagein > object->size)
523			initial_pagein = object->size;
524		for (i = 1; i < initial_pagein; i++) {
525			if ((ma[i] = vm_page_lookup(object, i)) != NULL) {
526				if ((ma[i]->flags & PG_BUSY) || ma[i]->busy)
527					break;
528				if (ma[i]->valid)
529					break;
530				vm_page_busy(ma[i]);
531			} else {
532				ma[i] = vm_page_alloc(object, i, VM_ALLOC_NORMAL);
533				if (ma[i] == NULL)
534					break;
535			}
536		}
537		initial_pagein = i;
538
539		rv = vm_pager_get_pages(object, ma, initial_pagein, 0);
540		ma[0] = vm_page_lookup(object, 0);
541
542		if ((rv != VM_PAGER_OK) || (ma[0] == NULL) || (ma[0]->valid == 0)) {
543			if (ma[0]) {
544				vm_page_protect(ma[0], VM_PROT_NONE);
545				vm_page_free(ma[0]);
546			}
547			return EIO;
548		}
549	}
550
551	vm_page_wire(ma[0]);
552	vm_page_wakeup(ma[0]);
553
554	pmap_qenter((vm_offset_t)imgp->image_header, ma, 1);
555	imgp->firstpage = ma[0];
556
557	return 0;
558}
559
560void
561exec_unmap_first_page(imgp)
562	struct image_params *imgp;
563{
564	GIANT_REQUIRED;
565
566	if (imgp->firstpage) {
567		pmap_qremove((vm_offset_t)imgp->image_header, 1);
568		vm_page_unwire(imgp->firstpage, 1);
569		imgp->firstpage = NULL;
570	}
571}
572
573/*
574 * Destroy old address space, and allocate a new stack
575 *	The new stack is only SGROWSIZ large because it is grown
576 *	automatically in trap.c.
577 */
578int
579exec_new_vmspace(imgp)
580	struct image_params *imgp;
581{
582	int error;
583	struct execlist *ep;
584	struct vmspace *vmspace = imgp->proc->p_vmspace;
585	vm_offset_t stack_addr = USRSTACK - maxssiz;
586
587	GIANT_REQUIRED;
588
589	imgp->vmspace_destroyed = 1;
590
591	/*
592	 * Perform functions registered with at_exec().
593	 */
594	TAILQ_FOREACH(ep, &exec_list, next)
595		(*ep->function)(imgp->proc);
596
597	/*
598	 * Blow away entire process VM, if address space not shared,
599	 * otherwise, create a new VM space so that other threads are
600	 * not disrupted
601	 */
602	if (vmspace->vm_refcnt == 1) {
603		if (vmspace->vm_shm)
604			shmexit(imgp->proc);
605		pmap_remove_pages(vmspace_pmap(vmspace), 0, VM_MAXUSER_ADDRESS);
606		vm_map_remove(&vmspace->vm_map, 0, VM_MAXUSER_ADDRESS);
607	} else {
608		vmspace_exec(imgp->proc);
609		vmspace = imgp->proc->p_vmspace;
610	}
611
612	/* Allocate a new stack */
613	error = vm_map_stack(&vmspace->vm_map, stack_addr, (vm_size_t)maxssiz,
614	    VM_PROT_ALL, VM_PROT_ALL, 0);
615	if (error)
616		return (error);
617
618#ifdef __ia64__
619	{
620		/*
621		 * Allocate backing store. We really need something
622		 * similar to vm_map_stack which can allow the backing
623		 * store to grow upwards. This will do for now.
624		 */
625		vm_offset_t bsaddr;
626		bsaddr = USRSTACK - 2*maxssiz;
627		error = vm_map_find(&vmspace->vm_map, 0, 0, &bsaddr,
628				    4*PAGE_SIZE, 0,
629				    VM_PROT_ALL, VM_PROT_ALL, 0);
630		FIRST_THREAD_IN_PROC(imgp->proc)->td_md.md_bspstore = bsaddr;
631	}
632#endif
633
634	/* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the
635	 * VM_STACK case, but they are still used to monitor the size of the
636	 * process stack so we can check the stack rlimit.
637	 */
638	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
639	vmspace->vm_maxsaddr = (char *)USRSTACK - maxssiz;
640
641	return(0);
642}
643
644/*
645 * Copy out argument and environment strings from the old process
646 *	address space into the temporary string buffer.
647 */
648int
649exec_extract_strings(imgp)
650	struct image_params *imgp;
651{
652	char	**argv, **envv;
653	char	*argp, *envp;
654	int	error;
655	size_t	length;
656
657	/*
658	 * extract arguments first
659	 */
660
661	argv = imgp->uap->argv;
662
663	if (argv) {
664		argp = (caddr_t) (intptr_t) fuword(argv);
665		if (argp == (caddr_t) -1)
666			return (EFAULT);
667		if (argp)
668			argv++;
669		if (imgp->argv0)
670			argp = imgp->argv0;
671		if (argp) {
672			do {
673				if (argp == (caddr_t) -1)
674					return (EFAULT);
675				if ((error = copyinstr(argp, imgp->stringp,
676				    imgp->stringspace, &length))) {
677					if (error == ENAMETOOLONG)
678						return(E2BIG);
679					return (error);
680				}
681				imgp->stringspace -= length;
682				imgp->stringp += length;
683				imgp->argc++;
684			} while ((argp = (caddr_t) (intptr_t) fuword(argv++)));
685		}
686	}
687
688	imgp->endargs = imgp->stringp;
689
690	/*
691	 * extract environment strings
692	 */
693
694	envv = imgp->uap->envv;
695
696	if (envv) {
697		while ((envp = (caddr_t) (intptr_t) fuword(envv++))) {
698			if (envp == (caddr_t) -1)
699				return (EFAULT);
700			if ((error = copyinstr(envp, imgp->stringp,
701			    imgp->stringspace, &length))) {
702				if (error == ENAMETOOLONG)
703					return(E2BIG);
704				return (error);
705			}
706			imgp->stringspace -= length;
707			imgp->stringp += length;
708			imgp->envc++;
709		}
710	}
711
712	return (0);
713}
714
715/*
716 * Copy strings out to the new process address space, constructing
717 *	new arg and env vector tables. Return a pointer to the base
718 *	so that it can be used as the initial stack pointer.
719 */
720register_t *
721exec_copyout_strings(imgp)
722	struct image_params *imgp;
723{
724	int argc, envc;
725	char **vectp;
726	char *stringp, *destp;
727	register_t *stack_base;
728	struct ps_strings *arginfo;
729	int szsigcode;
730
731	/*
732	 * Calculate string base and vector table pointers.
733	 * Also deal with signal trampoline code for this exec type.
734	 */
735	arginfo = (struct ps_strings *)PS_STRINGS;
736	szsigcode = *(imgp->proc->p_sysent->sv_szsigcode);
737	destp =	(caddr_t)arginfo - szsigcode - SPARE_USRSPACE -
738		roundup((ARG_MAX - imgp->stringspace), sizeof(char *));
739
740	/*
741	 * install sigcode
742	 */
743	if (szsigcode)
744		copyout(imgp->proc->p_sysent->sv_sigcode,
745			((caddr_t)arginfo - szsigcode), szsigcode);
746
747	/*
748	 * If we have a valid auxargs ptr, prepare some room
749	 * on the stack.
750	 */
751	if (imgp->auxargs) {
752		/*
753		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
754		 * lower compatibility.
755		 */
756		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
757			: (AT_COUNT * 2);
758		/*
759		 * The '+ 2' is for the null pointers at the end of each of
760		 * the arg and env vector sets,and imgp->auxarg_size is room
761		 * for argument of Runtime loader.
762		 */
763		vectp = (char **) (destp - (imgp->argc + imgp->envc + 2 +
764				       imgp->auxarg_size) * sizeof(char *));
765
766	} else
767		/*
768		 * The '+ 2' is for the null pointers at the end of each of
769		 * the arg and env vector sets
770		 */
771		vectp = (char **)
772			(destp - (imgp->argc + imgp->envc + 2) * sizeof(char *));
773
774	/*
775	 * vectp also becomes our initial stack base
776	 */
777	stack_base = (register_t *)vectp;
778
779	stringp = imgp->stringbase;
780	argc = imgp->argc;
781	envc = imgp->envc;
782
783	/*
784	 * Copy out strings - arguments and environment.
785	 */
786	copyout(stringp, destp, ARG_MAX - imgp->stringspace);
787
788	/*
789	 * Fill in "ps_strings" struct for ps, w, etc.
790	 */
791	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
792	suword(&arginfo->ps_nargvstr, argc);
793
794	/*
795	 * Fill in argument portion of vector table.
796	 */
797	for (; argc > 0; --argc) {
798		suword(vectp++, (long)(intptr_t)destp);
799		while (*stringp++ != 0)
800			destp++;
801		destp++;
802	}
803
804	/* a null vector table pointer separates the argp's from the envp's */
805	suword(vectp++, 0);
806
807	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
808	suword(&arginfo->ps_nenvstr, envc);
809
810	/*
811	 * Fill in environment portion of vector table.
812	 */
813	for (; envc > 0; --envc) {
814		suword(vectp++, (long)(intptr_t)destp);
815		while (*stringp++ != 0)
816			destp++;
817		destp++;
818	}
819
820	/* end of vector table is a null pointer */
821	suword(vectp, 0);
822
823	return (stack_base);
824}
825
826/*
827 * Check permissions of file to execute.
828 *	Called with imgp->vp locked.
829 *	Return 0 for success or error code on failure.
830 */
831int
832exec_check_permissions(imgp)
833	struct image_params *imgp;
834{
835	struct vnode *vp = imgp->vp;
836	struct vattr *attr = imgp->attr;
837	struct thread *td;
838	int error;
839
840	td = curthread;			/* XXXKSE */
841	/* Get file attributes */
842	error = VOP_GETATTR(vp, attr, td->td_ucred, td);
843	if (error)
844		return (error);
845
846	/*
847	 * 1) Check if file execution is disabled for the filesystem that this
848	 *	file resides on.
849	 * 2) Insure that at least one execute bit is on - otherwise root
850	 *	will always succeed, and we don't want to happen unless the
851	 *	file really is executable.
852	 * 3) Insure that the file is a regular file.
853	 */
854	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
855	    ((attr->va_mode & 0111) == 0) ||
856	    (attr->va_type != VREG))
857		return (EACCES);
858
859	/*
860	 * Zero length files can't be exec'd
861	 */
862	if (attr->va_size == 0)
863		return (ENOEXEC);
864
865	/*
866	 *  Check for execute permission to file based on current credentials.
867	 */
868	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
869	if (error)
870		return (error);
871
872	/*
873	 * Check number of open-for-writes on the file and deny execution
874	 * if there are any.
875	 */
876	if (vp->v_writecount)
877		return (ETXTBSY);
878
879	/*
880	 * Call filesystem specific open routine (which does nothing in the
881	 * general case).
882	 */
883	error = VOP_OPEN(vp, FREAD, td->td_ucred, td);
884	return (error);
885}
886
887/*
888 * Exec handler registration
889 */
890int
891exec_register(execsw_arg)
892	const struct execsw *execsw_arg;
893{
894	const struct execsw **es, **xs, **newexecsw;
895	int count = 2;	/* New slot and trailing NULL */
896
897	if (execsw)
898		for (es = execsw; *es; es++)
899			count++;
900	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
901	if (newexecsw == NULL)
902		return ENOMEM;
903	xs = newexecsw;
904	if (execsw)
905		for (es = execsw; *es; es++)
906			*xs++ = *es;
907	*xs++ = execsw_arg;
908	*xs = NULL;
909	if (execsw)
910		free(execsw, M_TEMP);
911	execsw = newexecsw;
912	return 0;
913}
914
915int
916exec_unregister(execsw_arg)
917	const struct execsw *execsw_arg;
918{
919	const struct execsw **es, **xs, **newexecsw;
920	int count = 1;
921
922	if (execsw == NULL)
923		panic("unregister with no handlers left?\n");
924
925	for (es = execsw; *es; es++) {
926		if (*es == execsw_arg)
927			break;
928	}
929	if (*es == NULL)
930		return ENOENT;
931	for (es = execsw; *es; es++)
932		if (*es != execsw_arg)
933			count++;
934	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
935	if (newexecsw == NULL)
936		return ENOMEM;
937	xs = newexecsw;
938	for (es = execsw; *es; es++)
939		if (*es != execsw_arg)
940			*xs++ = *es;
941	*xs = NULL;
942	if (execsw)
943		free(execsw, M_TEMP);
944	execsw = newexecsw;
945	return 0;
946}
947
948int
949at_exec(function)
950	execlist_fn function;
951{
952	struct execlist *ep;
953
954#ifdef INVARIANTS
955	/* Be noisy if the programmer has lost track of things */
956	if (rm_at_exec(function))
957		printf("WARNING: exec callout entry (%p) already present\n",
958		    function);
959#endif
960	ep = malloc(sizeof(*ep), M_ATEXEC, M_NOWAIT);
961	if (ep == NULL)
962		return (ENOMEM);
963	ep->function = function;
964	TAILQ_INSERT_TAIL(&exec_list, ep, next);
965	return (0);
966}
967
968/*
969 * Scan the exec callout list for the given item and remove it.
970 * Returns the number of items removed (0 or 1)
971 */
972int
973rm_at_exec(function)
974	execlist_fn function;
975{
976	struct execlist *ep;
977
978	TAILQ_FOREACH(ep, &exec_list, next) {
979		if (ep->function == function) {
980			TAILQ_REMOVE(&exec_list, ep, next);
981			free(ep, M_ATEXEC);
982			return(1);
983		}
984	}
985	return (0);
986}
987
988