linux_exec.c revision 1.28
1/*	$NetBSD: linux_exec.c,v 1.28 1998/08/09 20:37:54 perry Exp $	*/
2
3/*
4 * Copyright (c) 1995 Frank van der Linden
5 * Copyright (c) 1994 Christos Zoulas
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * based on exec_aout.c, sunos_exec.c and svr4_exec.c
31 */
32
33#define	ELFSIZE		32				/* XXX should die */
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/proc.h>
39#include <sys/malloc.h>
40#include <sys/namei.h>
41#include <sys/vnode.h>
42#include <sys/mount.h>
43#include <sys/exec.h>
44#include <sys/exec_elf.h>
45
46#include <sys/mman.h>
47#include <sys/syscallargs.h>
48
49#include <vm/vm.h>
50#include <vm/vm_param.h>
51#include <vm/vm_map.h>
52
53#include <machine/cpu.h>
54#include <machine/reg.h>
55#include <machine/linux_machdep.h>
56
57#include <compat/linux/linux_types.h>
58#include <compat/linux/linux_syscall.h>
59#include <compat/linux/linux_signal.h>
60#include <compat/linux/linux_syscallargs.h>
61#include <compat/linux/linux_util.h>
62#include <compat/linux/linux_exec.h>
63
64static void *linux_aout_copyargs __P((struct exec_package *,
65    struct ps_strings *, void *, void *));
66static int linux_elf32_signature __P((struct proc *p, struct exec_package *,
67    Elf32_Ehdr *));
68
69#define	LINUX_AOUT_AUX_ARGSIZ	2
70#define LINUX_ELF_AUX_ARGSIZ (sizeof(AuxInfo) * 8 / sizeof(char *))
71
72
73const char linux_emul_path[] = "/emul/linux";
74extern int linux_error[];
75extern char linux_sigcode[], linux_esigcode[];
76extern struct sysent linux_sysent[];
77extern char *linux_syscallnames[];
78
79int exec_linux_aout_prep_zmagic __P((struct proc *, struct exec_package *));
80int exec_linux_aout_prep_nmagic __P((struct proc *, struct exec_package *));
81int exec_linux_aout_prep_omagic __P((struct proc *, struct exec_package *));
82int exec_linux_aout_prep_qmagic __P((struct proc *, struct exec_package *));
83
84struct emul emul_linux_aout = {
85	"linux",
86	linux_error,
87	linux_sendsig,
88	LINUX_SYS_syscall,
89	LINUX_SYS_MAXSYSCALL,
90	linux_sysent,
91	linux_syscallnames,
92	LINUX_AOUT_AUX_ARGSIZ,
93	linux_aout_copyargs,
94	linux_setregs,
95	linux_sigcode,
96	linux_esigcode,
97};
98
99struct emul emul_linux_elf = {
100	"linux",
101	linux_error,
102	linux_sendsig,
103	LINUX_SYS_syscall,
104	LINUX_SYS_MAXSYSCALL,
105	linux_sysent,
106	linux_syscallnames,
107	LINUX_ELF_AUX_ARGSIZ,
108	elf32_copyargs,
109	linux_setregs,
110	linux_sigcode,
111	linux_esigcode,
112};
113
114
115static void *
116linux_aout_copyargs(pack, arginfo, stack, argp)
117	struct exec_package *pack;
118	struct ps_strings *arginfo;
119	void *stack;
120	void *argp;
121{
122	char **cpp = stack;
123	char **stk = stack;
124	char *dp, *sp;
125	size_t len;
126	void *nullp = NULL;
127	int argc = arginfo->ps_nargvstr;
128	int envc = arginfo->ps_nenvstr;
129
130	if (copyout(&argc, cpp++, sizeof(argc)))
131		return NULL;
132
133	/* leave room for envp and argv */
134	cpp += 2;
135	if (copyout(&cpp, &stk[1], sizeof (cpp)))
136		return NULL;
137
138	dp = (char *) (cpp + argc + envc + 2);
139	sp = argp;
140
141	/* XXX don't copy them out, remap them! */
142	arginfo->ps_argvstr = cpp; /* remember location of argv for later */
143
144	for (; --argc >= 0; sp += len, dp += len)
145		if (copyout(&dp, cpp++, sizeof(dp)) ||
146		    copyoutstr(sp, dp, ARG_MAX, &len))
147			return NULL;
148
149	if (copyout(&nullp, cpp++, sizeof(nullp)))
150		return NULL;
151
152	if (copyout(&cpp, &stk[2], sizeof (cpp)))
153		return NULL;
154
155	arginfo->ps_envstr = cpp; /* remember location of envp for later */
156
157	for (; --envc >= 0; sp += len, dp += len)
158		if (copyout(&dp, cpp++, sizeof(dp)) ||
159		    copyoutstr(sp, dp, ARG_MAX, &len))
160			return NULL;
161
162	if (copyout(&nullp, cpp++, sizeof(nullp)))
163		return NULL;
164
165	return cpp;
166}
167
168int
169exec_linux_aout_makecmds(p, epp)
170	struct proc *p;
171	struct exec_package *epp;
172{
173	struct exec *linux_ep = epp->ep_hdr;
174	int machtype, magic;
175	int error = ENOEXEC;
176
177	magic = LINUX_N_MAGIC(linux_ep);
178	machtype = LINUX_N_MACHTYPE(linux_ep);
179
180
181	if (machtype != LINUX_MID_MACHINE)
182		return (ENOEXEC);
183
184	switch (magic) {
185	case QMAGIC:
186		error = exec_linux_aout_prep_qmagic(p, epp);
187		break;
188	case ZMAGIC:
189		error = exec_linux_aout_prep_zmagic(p, epp);
190		break;
191	case NMAGIC:
192		error = exec_linux_aout_prep_nmagic(p, epp);
193		break;
194	case OMAGIC:
195		error = exec_linux_aout_prep_omagic(p, epp);
196		break;
197	}
198	if (error == 0)
199		epp->ep_emul = &emul_linux_aout;
200	return error;
201}
202
203/*
204 * Since text starts at 0x400 in Linux ZMAGIC executables, and 0x400
205 * is very likely not page aligned on most architectures, it is treated
206 * as an NMAGIC here. XXX
207 */
208
209int
210exec_linux_aout_prep_zmagic(p, epp)
211	struct proc *p;
212	struct exec_package *epp;
213{
214	struct exec *execp = epp->ep_hdr;
215
216	epp->ep_taddr = LINUX_N_TXTADDR(*execp, ZMAGIC);
217	epp->ep_tsize = execp->a_text;
218	epp->ep_daddr = LINUX_N_DATADDR(*execp, ZMAGIC);
219	epp->ep_dsize = execp->a_data + execp->a_bss;
220	epp->ep_entry = execp->a_entry;
221
222	/* set up command for text segment */
223	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
224	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, ZMAGIC),
225	    VM_PROT_READ|VM_PROT_EXECUTE);
226
227	/* set up command for data segment */
228	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
229	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, ZMAGIC),
230	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
231
232	/* set up command for bss segment */
233	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
234	    epp->ep_daddr + execp->a_data, NULLVP, 0,
235	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
236
237	return exec_aout_setup_stack(p, epp);
238}
239
240/*
241 * exec_aout_prep_nmagic(): Prepare Linux NMAGIC package.
242 * Not different from the normal stuff.
243 */
244
245int
246exec_linux_aout_prep_nmagic(p, epp)
247	struct proc *p;
248	struct exec_package *epp;
249{
250	struct exec *execp = epp->ep_hdr;
251	long bsize, baddr;
252
253	epp->ep_taddr = LINUX_N_TXTADDR(*execp, NMAGIC);
254	epp->ep_tsize = execp->a_text;
255	epp->ep_daddr = LINUX_N_DATADDR(*execp, NMAGIC);
256	epp->ep_dsize = execp->a_data + execp->a_bss;
257	epp->ep_entry = execp->a_entry;
258
259	/* set up command for text segment */
260	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
261	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, NMAGIC),
262	    VM_PROT_READ|VM_PROT_EXECUTE);
263
264	/* set up command for data segment */
265	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
266	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, NMAGIC),
267	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
268
269	/* set up command for bss segment */
270	baddr = roundup(epp->ep_daddr + execp->a_data, NBPG);
271	bsize = epp->ep_daddr + epp->ep_dsize - baddr;
272	if (bsize > 0)
273		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
274		    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
275
276	return exec_aout_setup_stack(p, epp);
277}
278
279/*
280 * exec_aout_prep_omagic(): Prepare Linux OMAGIC package.
281 * Business as usual.
282 */
283
284int
285exec_linux_aout_prep_omagic(p, epp)
286	struct proc *p;
287	struct exec_package *epp;
288{
289	struct exec *execp = epp->ep_hdr;
290	long dsize, bsize, baddr;
291
292	epp->ep_taddr = LINUX_N_TXTADDR(*execp, OMAGIC);
293	epp->ep_tsize = execp->a_text;
294	epp->ep_daddr = LINUX_N_DATADDR(*execp, OMAGIC);
295	epp->ep_dsize = execp->a_data + execp->a_bss;
296	epp->ep_entry = execp->a_entry;
297
298	/* set up command for text and data segments */
299	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn,
300	    execp->a_text + execp->a_data, epp->ep_taddr, epp->ep_vp,
301	    LINUX_N_TXTOFF(*execp, OMAGIC), VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
302
303	/* set up command for bss segment */
304	baddr = roundup(epp->ep_daddr + execp->a_data, NBPG);
305	bsize = epp->ep_daddr + epp->ep_dsize - baddr;
306	if (bsize > 0)
307		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
308		    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
309
310	/*
311	 * Make sure (# of pages) mapped above equals (vm_tsize + vm_dsize);
312	 * obreak(2) relies on this fact. Both `vm_tsize' and `vm_dsize' are
313	 * computed (in execve(2)) by rounding *up* `ep_tsize' and `ep_dsize'
314	 * respectively to page boundaries.
315	 * Compensate `ep_dsize' for the amount of data covered by the last
316	 * text page.
317	 */
318	dsize = epp->ep_dsize + execp->a_text - roundup(execp->a_text, NBPG);
319	epp->ep_dsize = (dsize > 0) ? dsize : 0;
320	return exec_aout_setup_stack(p, epp);
321}
322
323int
324exec_linux_aout_prep_qmagic(p, epp)
325	struct proc *p;
326	struct exec_package *epp;
327{
328	struct exec *execp = epp->ep_hdr;
329
330	epp->ep_taddr = LINUX_N_TXTADDR(*execp, QMAGIC);
331	epp->ep_tsize = execp->a_text;
332	epp->ep_daddr = LINUX_N_DATADDR(*execp, QMAGIC);
333	epp->ep_dsize = execp->a_data + execp->a_bss;
334	epp->ep_entry = execp->a_entry;
335
336	/*
337	 * check if vnode is in open for writing, because we want to
338	 * demand-page out of it.  if it is, don't do it, for various
339	 * reasons
340	 */
341	if ((execp->a_text != 0 || execp->a_data != 0) &&
342	    epp->ep_vp->v_writecount != 0) {
343#ifdef DIAGNOSTIC
344		if (epp->ep_vp->v_flag & VTEXT)
345			panic("exec: a VTEXT vnode has writecount != 0\n");
346#endif
347		return ETXTBSY;
348	}
349	epp->ep_vp->v_flag |= VTEXT;
350
351	/* set up command for text segment */
352	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_text,
353	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, QMAGIC),
354	    VM_PROT_READ|VM_PROT_EXECUTE);
355
356	/* set up command for data segment */
357	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_data,
358	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, QMAGIC),
359	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
360
361	/* set up command for bss segment */
362	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
363	    epp->ep_daddr + execp->a_data, NULLVP, 0,
364	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
365
366	return exec_aout_setup_stack(p, epp);
367}
368
369/*
370 * Take advantage of the fact that all the linux binaries are compiled
371 * with gcc, and gcc sticks in the comment field a signature. Note that
372 * on SVR4 binaries, the gcc signature will follow the OS name signature,
373 * that will not be a problem. We don't bother to read in the string table,
374 * but we check all the progbits headers.
375 */
376static int
377linux_elf32_signature(p, epp, eh)
378	struct proc *p;
379	struct exec_package *epp;
380	Elf32_Ehdr *eh;
381{
382	size_t shsize = sizeof(Elf32_Shdr) * eh->e_shnum;
383	size_t i;
384	static const char signature[] = "\0GCC: (GNU) ";
385	char buf[sizeof(signature) - 1];
386	Elf32_Shdr *sh;
387	int error;
388
389	sh = (Elf32_Shdr *) malloc(shsize, M_TEMP, M_WAITOK);
390
391	if ((error = elf32_read_from(p, epp->ep_vp, eh->e_shoff,
392	    (caddr_t) sh, shsize)) != 0)
393		goto out;
394
395	for (i = 0; i < eh->e_shnum; i++) {
396		Elf32_Shdr *s = &sh[i];
397
398		/*
399		 * Identify candidates for the comment header;
400		 * Header cannot have a load address, or flags and
401		 * it must be large enough.
402		 */
403		if (s->sh_type != Elf_sht_progbits ||
404		    s->sh_addr != 0 ||
405		    s->sh_flags != 0 ||
406		    s->sh_size < sizeof(signature) - 1)
407			continue;
408
409		if ((error = elf32_read_from(p, epp->ep_vp, s->sh_offset,
410		    (caddr_t) buf, sizeof(signature) - 1)) != 0)
411			goto out;
412
413		/*
414		 * error is 0, if the signatures match we are done.
415		 */
416		if (memcmp(buf, signature, sizeof(signature) - 1) == 0)
417			goto out;
418	}
419	error = EFTYPE;
420
421out:
422	free(sh, M_TEMP);
423	return error;
424}
425
426int
427linux_elf32_probe(p, epp, eh, itp, pos)
428	struct proc *p;
429	struct exec_package *epp;
430	Elf32_Ehdr *eh;
431	char *itp;
432	Elf32_Addr *pos;
433{
434	char *bp;
435	int error;
436	size_t len;
437
438	if ((error = linux_elf32_signature(p, epp, eh)) != 0)
439		return error;
440
441	if (itp[0]) {
442		if ((error = emul_find(p, NULL, linux_emul_path, itp, &bp, 0)))
443			return error;
444		if ((error = copystr(bp, itp, MAXPATHLEN, &len)))
445			return error;
446		free(bp, M_TEMP);
447	}
448	epp->ep_emul = &emul_linux_elf;
449	*pos = ELF32_NO_ADDR;
450	return 0;
451}
452
453/*
454 * The Linux system call to load shared libraries, a.out version. The
455 * a.out shared libs are just files that are mapped onto a fixed
456 * address in the process' address space. The address is given in
457 * a_entry. Read in the header, set up some VM commands and run them.
458 *
459 * Yes, both text and data are mapped at once, so we're left with
460 * writeable text for the shared libs. The Linux crt0 seemed to break
461 * sometimes when data was mapped seperately. It munmapped a uselib()
462 * of ld.so by hand, which failed with shared text and data for ld.so
463 * Yuck.
464 *
465 * Because of the problem with ZMAGIC executables (text starts
466 * at 0x400 in the file, but needs to be mapped at 0), ZMAGIC
467 * shared libs are not handled very efficiently :-(
468 */
469
470int
471linux_sys_uselib(p, v, retval)
472	struct proc *p;
473	void *v;
474	register_t *retval;
475{
476	struct linux_sys_uselib_args /* {
477		syscallarg(char *) path;
478	} */ *uap = v;
479	caddr_t sg;
480	long bsize, dsize, tsize, taddr, baddr, daddr;
481	struct nameidata ni;
482	struct vnode *vp;
483	struct exec hdr;
484	struct exec_vmcmd_set vcset;
485	int i, magic, error;
486	size_t rem;
487
488	sg = stackgap_init(p->p_emul);
489	LINUX_CHECK_ALT_EXIST(p, &sg, SCARG(uap, path));
490
491	NDINIT(&ni, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
492
493	if ((error = namei(&ni)))
494		return error;
495
496	vp = ni.ni_vp;
497
498	if ((error = vn_rdwr(UIO_READ, vp, (caddr_t) &hdr, LINUX_AOUT_HDR_SIZE,
499			     0, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred,
500			     &rem, p))) {
501		vrele(vp);
502		return error;
503	}
504
505	if (rem != 0) {
506		vrele(vp);
507		return ENOEXEC;
508	}
509
510	if (LINUX_N_MACHTYPE(&hdr) != LINUX_MID_MACHINE)
511		return ENOEXEC;
512
513	magic = LINUX_N_MAGIC(&hdr);
514	taddr = hdr.a_entry & (~(NBPG - 1));
515	tsize = hdr.a_text;
516	daddr = taddr + tsize;
517	dsize = hdr.a_data + hdr.a_bss;
518
519	if ((hdr.a_text != 0 || hdr.a_data != 0) && vp->v_writecount != 0) {
520		vrele(vp);
521                return ETXTBSY;
522        }
523	vp->v_flag |= VTEXT;
524
525	vcset.evs_cnt = 0;
526	vcset.evs_used = 0;
527
528	NEW_VMCMD(&vcset,
529		  magic == ZMAGIC ? vmcmd_map_readvn : vmcmd_map_pagedvn,
530		  hdr.a_text + hdr.a_data, taddr,
531		  vp, LINUX_N_TXTOFF(hdr, magic),
532		  VM_PROT_READ|VM_PROT_EXECUTE|VM_PROT_WRITE);
533
534	baddr = roundup(daddr + hdr.a_data, NBPG);
535	bsize = daddr + dsize - baddr;
536        if (bsize > 0) {
537                NEW_VMCMD(&vcset, vmcmd_map_zero, bsize, baddr,
538                    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
539	}
540
541	for (i = 0; i < vcset.evs_used && !error; i++) {
542		struct exec_vmcmd *vcp;
543
544		vcp = &vcset.evs_cmds[i];
545		error = (*vcp->ev_proc)(p, vcp);
546	}
547
548	kill_vmcmds(&vcset);
549
550	vrele(vp);
551
552	return error;
553}
554
555/*
556 * Execve(2). Just check the alternate emulation path, and pass it on
557 * to the NetBSD execve().
558 */
559int
560linux_sys_execve(p, v, retval)
561	struct proc *p;
562	void *v;
563	register_t *retval;
564{
565	struct linux_sys_execve_args /* {
566		syscallarg(char *) path;
567		syscallarg(char **) argv;
568		syscallarg(char **) envp;
569	} */ *uap = v;
570	struct sys_execve_args ap;
571	caddr_t sg;
572
573	sg = stackgap_init(p->p_emul);
574	LINUX_CHECK_ALT_EXIST(p, &sg, SCARG(uap, path));
575
576	SCARG(&ap, path) = SCARG(uap, path);
577	SCARG(&ap, argp) = SCARG(uap, argp);
578	SCARG(&ap, envp) = SCARG(uap, envp);
579
580	return sys_execve(p, &ap, retval);
581}
582