linux_exec.c revision 1.6
1/*	$NetBSD: linux_exec.c,v 1.6 1995/06/11 14:56:47 fvdl Exp $	*/
2
3/*
4 * Copyright (c) 1995 Frank van der Linden
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *      This product includes software developed for the NetBSD Project
18 *      by Frank van der Linden
19 * 4. The name of the author may not be used to endorse or promote products
20 *    derived from this software without specific prior written permission
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * based on kern/exec_aout.c and compat/sunos/sunos_exec.c
34 */
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/filedesc.h>
39#include <sys/kernel.h>
40#include <sys/proc.h>
41#include <sys/mount.h>
42#include <sys/malloc.h>
43#include <sys/namei.h>
44#include <sys/vnode.h>
45#include <sys/file.h>
46#include <sys/resourcevar.h>
47#include <sys/wait.h>
48
49#include <sys/mman.h>
50#include <vm/vm.h>
51#include <vm/vm_param.h>
52#include <vm/vm_map.h>
53#include <vm/vm_kern.h>
54#include <vm/vm_pager.h>
55
56#include <machine/cpu.h>
57#include <machine/reg.h>
58#include <machine/exec.h>
59#include <machine/linux_machdep.h>
60
61#include <compat/linux/linux_types.h>
62#include <compat/linux/linux_syscall.h>
63#include <compat/linux/linux_syscallargs.h>
64#include <compat/linux/linux_util.h>
65#include <compat/linux/linux_exec.h>
66
67struct elf_args {
68	u_long  arg_entry;	/* progran entry point */
69	u_long  arg_interp;	/* Interpreter load address */
70	u_long  arg_phaddr;	/* program header address */
71	u_long  arg_phentsize;	/* Size of program header */
72	u_long  arg_phnum;	/* Number of program headers */
73};
74
75static void *linux_aout_copyargs __P((struct exec_package *,
76	struct ps_strings *, void *, void *));
77static void *linux_elf_copyargs __P((struct exec_package *, struct ps_strings *,
78	void *, void *));
79static int linux_elf_check_header __P((Elf32_Ehdr *, int));
80static void linux_elf_load_psection __P((struct exec_vmcmd_set *,
81	struct vnode *, Elf32_Phdr *, u_long *, u_long *, int *));
82static int linux_elf_set_segment __P((struct exec_package *, u_long, u_long,
83	int));
84static int linux_elf_read_from __P((struct vnode *, u_long, struct proc *,
85	caddr_t, int));
86static int linux_elf_load_file __P((struct proc *, char *,
87	struct exec_vmcmd_set *, u_long *, struct elf_args *, u_long *));
88
89#ifdef DEBUG_EXEC_LINUX_ELF
90#define DPRINTF(x) printf x
91#else
92#define DPRINTF(x)
93#endif
94
95#define LINUX_ELF_ALIGN(a, b) ((a) & ~((b) - 1))
96#define LINUX_ELF_AUX_ARGSIZ (sizeof(AuxInfo) * 8 / sizeof(char *))
97#define	LINUX_AOUT_AUX_ARGSIZ	2
98
99extern int linux_error[];
100extern struct sysent linux_sysent[];
101extern char *linux_syscallnames[];
102
103struct emul emul_linux_aout = {
104	"linux",
105	linux_error,
106	linux_sendsig,
107	LINUX_SYS_syscall,
108	LINUX_SYS_MAXSYSCALL,
109	linux_sysent,
110	linux_syscallnames,
111	LINUX_AOUT_AUX_ARGSIZ,
112	linux_aout_copyargs,
113	setregs,
114	linux_sigcode,
115	linux_esigcode,
116};
117
118struct emul emul_linux_elf = {
119	"linux",
120	linux_error,
121	linux_sendsig,
122	LINUX_SYS_syscall,
123	LINUX_SYS_MAXSYSCALL,
124	linux_sysent,
125	linux_syscallnames,
126	LINUX_ELF_AUX_ARGSIZ,
127	linux_elf_copyargs,
128	setregs,
129	linux_sigcode,
130	linux_esigcode,
131};
132
133
134static void *
135linux_aout_copyargs(pack, arginfo, stack, argp)
136	struct exec_package *pack;
137	struct ps_strings *arginfo;
138	void *stack;
139	void *argp;
140{
141	char **cpp = stack;
142	char **stk = stack;
143	char *dp, *sp;
144	size_t len;
145	void *nullp = NULL;
146	int argc = arginfo->ps_nargvstr;
147	int envc = arginfo->ps_nenvstr;
148
149	if (copyout(&argc, cpp++, sizeof(argc)))
150		return NULL;
151
152	/* leave room for envp and argv */
153	cpp += 2;
154	if (copyout(&cpp, &stk[1], sizeof (cpp)))
155		return NULL;
156
157	dp = (char *) (cpp + argc + envc + 2);
158	sp = argp;
159
160	/* XXX don't copy them out, remap them! */
161	arginfo->ps_argvstr = cpp; /* remember location of argv for later */
162
163	for (; --argc >= 0; sp += len, dp += len)
164		if (copyout(&dp, cpp++, sizeof(dp)) ||
165		    copyoutstr(sp, dp, ARG_MAX, &len))
166			return NULL;
167
168	if (copyout(&nullp, cpp++, sizeof(nullp)))
169		return NULL;
170
171	if (copyout(&cpp, &stk[2], sizeof (cpp)))
172		return NULL;
173
174	arginfo->ps_envstr = cpp; /* remember location of envp for later */
175
176	for (; --envc >= 0; sp += len, dp += len)
177		if (copyout(&dp, cpp++, sizeof(dp)) ||
178		    copyoutstr(sp, dp, ARG_MAX, &len))
179			return NULL;
180
181	if (copyout(&nullp, cpp++, sizeof(nullp)))
182		return NULL;
183
184	return cpp;
185}
186
187static void *
188linux_elf_copyargs(pack, arginfo, stack, argp)
189	struct exec_package *pack;
190	struct ps_strings *arginfo;
191	void *stack;
192	void *argp;
193{
194	char **cpp = stack;
195	char *dp, *sp;
196	size_t len;
197	void *nullp = NULL;
198	int argc = arginfo->ps_nargvstr;
199	int envc = arginfo->ps_nenvstr;
200	AuxInfo *a;
201	struct elf_args *ap;
202
203	if (copyout(&argc, cpp++, sizeof(argc)))
204		return NULL;
205
206	dp = (char *) (cpp + argc + envc + 2 + pack->ep_emul->e_arglen);
207	sp = argp;
208
209	/* XXX don't copy them out, remap them! */
210	arginfo->ps_argvstr = cpp; /* remember location of argv for later */
211
212	for (; --argc >= 0; sp += len, dp += len)
213		if (copyout(&dp, cpp++, sizeof(dp)) ||
214		    copyoutstr(sp, dp, ARG_MAX, &len))
215			return NULL;
216
217	if (copyout(&nullp, cpp++, sizeof(nullp)))
218		return NULL;
219
220	arginfo->ps_envstr = cpp; /* remember location of envp for later */
221
222	for (; --envc >= 0; sp += len, dp += len)
223		if (copyout(&dp, cpp++, sizeof(dp)) ||
224		    copyoutstr(sp, dp, ARG_MAX, &len))
225			return NULL;
226
227	if (copyout(&nullp, cpp++, sizeof(nullp)))
228		return NULL;
229
230	/*
231	 * Push extra arguments on the stack needed by dynamically
232	 * linked binaries
233	 */
234	a = (AuxInfo *) cpp;
235	if ((ap = (struct elf_args *) pack->ep_emul_arg)) {
236
237		DPRINTF(("phaddr=0x%x, phsize=%d, phnum=%d, interp=0x%x, ",
238			 ap->arg_phaddr, ap->arg_phentsize, ap->arg_phnum,
239			 ap->arg_interp));
240		DPRINTF((" entry=0x%x\n", ap->arg_entry));
241
242		a->au_id = AUX_phdr;
243		a->au_v = ap->arg_phaddr;
244		a++;
245
246		a->au_id = AUX_phent;
247		a->au_v = ap->arg_phentsize;
248		a++;
249
250		a->au_id = AUX_phnum;
251		a->au_v = ap->arg_phnum;
252		a++;
253
254		a->au_id = AUX_pagesz;
255		a->au_v = NBPG;
256		a++;
257
258		a->au_id = AUX_base;
259		a->au_v = ap->arg_interp;
260		a++;
261
262		a->au_id = AUX_flags;
263		a->au_v = 0;
264		a++;
265
266		a->au_id = AUX_entry;
267		a->au_v = ap->arg_entry;
268		a++;
269
270		a->au_id = AUX_null;
271		a->au_v = 0;
272		a++;
273
274		free((char *) ap, M_TEMP);
275	}
276	return a;
277}
278
279#ifdef DEBUG_EXEC_LINUX_ELF
280static void
281print_Ehdr(e)
282	Elf32_Ehdr     *e;
283{
284	printf("e_ident %s, ", e->e_ident);
285	printf("e_type %d, ", e->e_type);
286	printf("e_machine %d, ", e->e_machine);
287	printf("e_version %ld, ", e->e_version);
288	printf("e_entry %lx, ", e->e_entry);
289	printf("e_phoff %lx, ", e->e_phoff);
290	printf("e_shoff %lx, ", e->e_shoff);
291	printf("e_flags %lx, ", e->e_flags);
292	printf("e_ehsize %d, ", e->e_ehsize);
293	printf("e_phentsize %d, ", e->e_phentsize);
294	printf("e_phnum %d, ", e->e_phnum);
295	printf("e_shentsize %d, ", e->e_shentsize);
296	printf("e_shnum %d, ", e->e_shnum);
297	printf("e_shstrndx %d\n", e->e_shstrndx);
298}
299
300
301static void
302print_Phdr(p)
303	Elf32_Phdr     *p;
304{
305	static char    *types[] =
306	{
307		"null", "load", "dynamic", "interp",
308		"note", "shlib", "phdr", "entry7"
309	};
310
311	printf("p_type %ld [%s], ", p->p_type, types[p->p_type & 7]);
312	printf("p_offset %lx, ", p->p_offset);
313	printf("p_vaddr %lx, ", p->p_vaddr);
314	printf("p_paddr %lx, ", p->p_paddr);
315	printf("p_filesz %ld, ", p->p_filesz);
316	printf("p_memsz %ld, ", p->p_memsz);
317	printf("p_flags %lx, ", p->p_flags);
318	printf("p_align %ld\n", p->p_align);
319}
320#endif
321
322int
323exec_linux_aout_makecmds(p, epp)
324	struct proc *p;
325	struct exec_package *epp;
326{
327	struct exec *linux_ep = epp->ep_hdr;
328	int machtype, magic;
329	int error = ENOEXEC;
330
331	magic = LINUX_N_MAGIC(linux_ep);
332	machtype = LINUX_N_MACHTYPE(linux_ep);
333
334
335	if (machtype != LINUX_MID_MACHINE)
336		return (ENOEXEC);
337
338	switch (magic) {
339	case QMAGIC:
340		error = exec_linux_aout_prep_qmagic(p, epp);
341		break;
342	case ZMAGIC:
343		error = exec_linux_aout_prep_zmagic(p, epp);
344		break;
345	case NMAGIC:
346		error = exec_linux_aout_prep_nmagic(p, epp);
347		break;
348	case OMAGIC:
349		error = exec_linux_aout_prep_omagic(p, epp);
350		break;
351	}
352	if (error == 0)
353		epp->ep_emul = &emul_linux_aout;
354	return error;
355}
356
357/*
358 * Since text starts at 0x400 in Linux ZMAGIC executables, and 0x400
359 * is very likely not page aligned on most architectures, it is treated
360 * as an NMAGIC here. XXX
361 */
362
363int
364exec_linux_aout_prep_zmagic(p, epp)
365	struct proc *p;
366	struct exec_package *epp;
367{
368	struct exec *execp = epp->ep_hdr;
369
370	epp->ep_taddr = LINUX_N_TXTADDR(*execp, ZMAGIC);
371	epp->ep_tsize = execp->a_text;
372	epp->ep_daddr = LINUX_N_DATADDR(*execp, ZMAGIC);
373	epp->ep_dsize = execp->a_data + execp->a_bss;
374	epp->ep_entry = execp->a_entry;
375
376	/* set up command for text segment */
377	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
378	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, ZMAGIC),
379	    VM_PROT_READ|VM_PROT_EXECUTE);
380
381	/* set up command for data segment */
382	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
383	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, ZMAGIC),
384	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
385
386	/* set up command for bss segment */
387	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
388	    epp->ep_daddr + execp->a_data, NULLVP, 0,
389	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
390
391	return exec_aout_setup_stack(p, epp);
392}
393
394/*
395 * exec_aout_prep_nmagic(): Prepare Linux NMAGIC package.
396 * Not different from the normal stuff.
397 */
398
399int
400exec_linux_aout_prep_nmagic(p, epp)
401	struct proc *p;
402	struct exec_package *epp;
403{
404	struct exec *execp = epp->ep_hdr;
405	long bsize, baddr;
406
407	epp->ep_taddr = LINUX_N_TXTADDR(*execp, NMAGIC);
408	epp->ep_tsize = execp->a_text;
409	epp->ep_daddr = LINUX_N_DATADDR(*execp, NMAGIC);
410	epp->ep_dsize = execp->a_data + execp->a_bss;
411	epp->ep_entry = execp->a_entry;
412
413	/* set up command for text segment */
414	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
415	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, NMAGIC),
416	    VM_PROT_READ|VM_PROT_EXECUTE);
417
418	/* set up command for data segment */
419	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
420	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, NMAGIC),
421	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
422
423	/* set up command for bss segment */
424	baddr = roundup(epp->ep_daddr + execp->a_data, NBPG);
425	bsize = epp->ep_daddr + epp->ep_dsize - baddr;
426	if (bsize > 0)
427		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
428		    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
429
430	return exec_aout_setup_stack(p, epp);
431}
432
433/*
434 * exec_aout_prep_omagic(): Prepare Linux OMAGIC package.
435 * Business as usual.
436 */
437
438int
439exec_linux_aout_prep_omagic(p, epp)
440	struct proc *p;
441	struct exec_package *epp;
442{
443	struct exec *execp = epp->ep_hdr;
444	long dsize, bsize, baddr;
445
446	epp->ep_taddr = LINUX_N_TXTADDR(*execp, OMAGIC);
447	epp->ep_tsize = execp->a_text;
448	epp->ep_daddr = LINUX_N_DATADDR(*execp, OMAGIC);
449	epp->ep_dsize = execp->a_data + execp->a_bss;
450	epp->ep_entry = execp->a_entry;
451
452	/* set up command for text and data segments */
453	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn,
454	    execp->a_text + execp->a_data, epp->ep_taddr, epp->ep_vp,
455	    LINUX_N_TXTOFF(*execp, OMAGIC), VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
456
457	/* set up command for bss segment */
458	baddr = roundup(epp->ep_daddr + execp->a_data, NBPG);
459	bsize = epp->ep_daddr + epp->ep_dsize - baddr;
460	if (bsize > 0)
461		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
462		    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
463
464	/*
465	 * Make sure (# of pages) mapped above equals (vm_tsize + vm_dsize);
466	 * obreak(2) relies on this fact. Both `vm_tsize' and `vm_dsize' are
467	 * computed (in execve(2)) by rounding *up* `ep_tsize' and `ep_dsize'
468	 * respectively to page boundaries.
469	 * Compensate `ep_dsize' for the amount of data covered by the last
470	 * text page.
471	 */
472	dsize = epp->ep_dsize + execp->a_text - roundup(execp->a_text, NBPG);
473	epp->ep_dsize = (dsize > 0) ? dsize : 0;
474	return exec_aout_setup_stack(p, epp);
475}
476
477int
478exec_linux_aout_prep_qmagic(p, epp)
479	struct proc *p;
480	struct exec_package *epp;
481{
482	struct exec *execp = epp->ep_hdr;
483
484	epp->ep_taddr = LINUX_N_TXTADDR(*execp, QMAGIC);
485	epp->ep_tsize = execp->a_text;
486	epp->ep_daddr = LINUX_N_DATADDR(*execp, QMAGIC);
487	epp->ep_dsize = execp->a_data + execp->a_bss;
488	epp->ep_entry = execp->a_entry;
489
490	/*
491	 * check if vnode is in open for writing, because we want to
492	 * demand-page out of it.  if it is, don't do it, for various
493	 * reasons
494	 */
495	if ((execp->a_text != 0 || execp->a_data != 0) &&
496	    epp->ep_vp->v_writecount != 0) {
497#ifdef DIAGNOSTIC
498		if (epp->ep_vp->v_flag & VTEXT)
499			panic("exec: a VTEXT vnode has writecount != 0\n");
500#endif
501		return ETXTBSY;
502	}
503	epp->ep_vp->v_flag |= VTEXT;
504
505	/* set up command for text segment */
506	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_text,
507	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, QMAGIC),
508	    VM_PROT_READ|VM_PROT_EXECUTE);
509
510	/* set up command for data segment */
511	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_data,
512	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, QMAGIC),
513	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
514
515	/* set up command for bss segment */
516	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
517	    epp->ep_daddr + execp->a_data, NULLVP, 0,
518	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
519
520	return exec_aout_setup_stack(p, epp);
521}
522
523/*
524 * linux_elf_check_header():
525 *
526 * Check header for validity; return 0 of ok ENOEXEC if error
527 */
528static int
529linux_elf_check_header(eh, type)
530	Elf32_Ehdr     *eh;
531	int             type;
532{
533#ifdef sparc
534  /* #$%@#$%@#$%! */
535# define memcmp bcmp
536#endif
537	if (memcmp(eh->e_ident, Elf32_e_ident, Elf32_e_siz) != 0) {
538		DPRINTF(("Not an elf file\n"));
539		return ENOEXEC;
540	}
541
542	switch (eh->e_machine) {
543#ifdef i386
544	case Elf32_em_386:
545	case Elf32_em_486:
546#endif
547#ifdef sparc
548	case Elf32_em_sparc:
549#endif
550		break;
551
552	default:
553		DPRINTF(("Unsupported elf machine type %d\n", eh->e_machine));
554		return ENOEXEC;
555	}
556
557	if (eh->e_type != type) {
558		DPRINTF(("Not an elf executable\n"));
559		return ENOEXEC;
560	}
561
562	return 0;
563}
564
565
566/*
567 * linux_elf_load_psection():
568 *
569 * Load a psection at the appropriate address
570 */
571static void
572linux_elf_load_psection(vcset, vp, ph, addr, size, prot)
573	struct exec_vmcmd_set   *vcset;
574	struct vnode		*vp;
575	Elf32_Phdr		*ph;
576	u_long			*addr;
577	u_long			*size;
578	int			*prot;
579{
580	u_long	uaddr;
581	long	diff;
582	long	offset;
583	u_long	msize;
584
585	/*
586         * If the user specified an address, then we load there.
587         */
588	if (*addr != ~0) {
589		uaddr = *addr + ph->p_align;
590		*addr = LINUX_ELF_ALIGN(uaddr, ph->p_align);
591		uaddr = LINUX_ELF_ALIGN(ph->p_vaddr, ph->p_align);
592		diff = ph->p_vaddr - uaddr;
593	} else {
594		uaddr = ph->p_vaddr;
595		*addr = LINUX_ELF_ALIGN(uaddr, ph->p_align);
596		diff = uaddr - *addr;
597	}
598
599	*prot |= (ph->p_flags & Elf32_pf_r) ? VM_PROT_READ : 0;
600	*prot |= (ph->p_flags & Elf32_pf_w) ? VM_PROT_WRITE : 0;
601	*prot |= (ph->p_flags & Elf32_pf_x) ? VM_PROT_EXECUTE : 0;
602
603	offset = ph->p_offset - diff;
604	*size = ph->p_filesz + diff;
605	msize = ph->p_memsz + diff;
606
607	DPRINTF(("Elf Seg@ 0x%x/0x%x sz %d/%d off 0x%x/0x%x[%d] algn 0x%x\n",
608		 ph->p_vaddr, *addr, *size, msize, ph->p_offset, offset,
609		 diff, ph->p_align));
610
611	NEW_VMCMD(vcset, vmcmd_map_readvn, *size,
612		  *addr, vp, offset, *prot);
613
614	/*
615         * Check if we need to extend the size of the segment
616         */
617	{
618		u_long	rm = round_page(*addr + msize);
619		u_long	rf = round_page(*addr + *size);
620		if (rm != rf) {
621			DPRINTF(("zeropad 0x%x-0x%x\n", rf, rm));
622			NEW_VMCMD(vcset, vmcmd_map_zero, rm - rf,
623				  rf, NULLVP, 0, *prot);
624			*size = msize;
625		}
626	}
627}
628
629
630/*
631 * linux_elf_set_segment():
632 *
633 * Decide if the segment is text or data, depending on the protection
634 * and set it appropriately
635 */
636static int
637linux_elf_set_segment(epp, vaddr, size, prot)
638	struct exec_package	*epp;
639	u_long			 vaddr;
640	u_long			 size;
641	int			 prot;
642{
643	/*
644         * Kludge: Unfortunately the current implementation of
645         * exec package assumes a single text and data segment.
646         * In Elf we can have more, but here we limit ourselves
647         * to two and hope :-(
648         * We also assume that the text is r-x, and data is rwx.
649         */
650	switch (prot) {
651	case (VM_PROT_READ | VM_PROT_EXECUTE):
652		if (epp->ep_tsize != ~0) {
653			DPRINTF(("More than one text segment\n"));
654			return ENOEXEC;
655		}
656		epp->ep_taddr = vaddr;
657		epp->ep_tsize = size;
658		DPRINTF(("Elf Text@ 0x%x, size %d\n", vaddr, size));
659		break;
660
661	case (VM_PROT_READ | VM_PROT_WRITE):
662	case (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE):
663		if (epp->ep_dsize != ~0) {
664			DPRINTF(("More than one data segment\n"));
665			return ENOEXEC;
666		}
667		epp->ep_daddr = vaddr;
668		epp->ep_dsize = size;
669
670		DPRINTF(("Elf Data@ 0x%x, size %d\n", vaddr, size));
671		break;
672
673	default:
674		DPRINTF(("Bad protection 0%o\n", prot));
675		return ENOEXEC;
676	}
677	return 0;
678}
679
680
681/*
682 * linux_elf_read_from():
683 *
684 *	Read from vnode into buffer at offset.
685 */
686static int
687linux_elf_read_from(vp, off, p, buf, size)
688	struct vnode	*vp;
689	u_long		 off;
690	struct proc	*p;
691	caddr_t		 buf;
692	int		 size;
693{
694	int	error;
695	int	resid;
696
697	DPRINTF(("read from 0x%x to 0x%x size %d\n",
698		 off, buf, size));
699	if ((error = vn_rdwr(UIO_READ, vp, buf, size,
700			     off, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred,
701			     &resid, p)) != 0) {
702		DPRINTF(("Bad read error %d\n", error));
703		return error;
704	}
705	/*
706         * See if we got all of it
707         */
708	if (resid != 0) {
709		DPRINTF(("Incomplete read for header ask=%d, rem=%d\n",
710			 size, resid));
711		return error;
712	}
713	return 0;
714}
715
716
717/*
718 * linux_elf_load_file():
719 *
720 * Load a file (interpreter/library) pointed to by path
721 * [stolen from coff_load_shlib()]. Made slightly more generic than
722 * the svr4 version, for possible later use in linux_uselib().
723 */
724static int
725linux_elf_load_file(p, path, vcset, entry, ap, last)
726	struct proc		*p;
727	char			*path;
728	struct exec_vmcmd_set   *vcset;
729	u_long			*entry;
730	struct elf_args		*ap;
731	u_long			*last;
732{
733	int			 error, i;
734	struct nameidata	 nd;
735	Elf32_Ehdr		 eh;
736	Elf32_Phdr		*ph = NULL;
737	u_long			 phsize;
738	char			*bp = NULL;
739	u_long			 addr = *last;
740
741	DPRINTF(("Loading file %s @ %x\n", path, addr));
742
743	if ((error = linux_emul_find(p, NULL, linux_emul_path, path, &bp, 0)) != 0)
744		bp = NULL;
745	else
746		path = bp;
747	/*
748         * 1. open file
749         * 2. read filehdr
750         * 3. map text, data, and bss out of it using VM_*
751         */
752	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
753	/* first get the vnode */
754	if ((error = namei(&nd)) != 0) {
755		if (bp != NULL)
756			free((char *) bp, M_TEMP);
757		return error;
758	}
759	if ((error = linux_elf_read_from(nd.ni_vp, 0, p, (caddr_t) &eh,
760				    sizeof(eh))) != 0)
761		goto bad;
762
763#ifdef DEBUG_EXEC_LINUX_ELF
764	print_Ehdr(&eh);
765#endif
766
767	if ((error = linux_elf_check_header(&eh, Elf32_et_dyn)) != 0)
768		goto bad;
769
770	phsize = eh.e_phnum * sizeof(Elf32_Phdr);
771	ph = (Elf32_Phdr *) malloc(phsize, M_TEMP, M_WAITOK);
772
773	if ((error = linux_elf_read_from(nd.ni_vp, eh.e_phoff, p,
774				    (caddr_t) ph, phsize)) != 0)
775		goto bad;
776
777	/*
778         * Load all the necessary sections
779         */
780	for (i = 0; i < eh.e_phnum; i++) {
781		u_long	size = 0;
782		int	prot = 0;
783#ifdef DEBUG_EXEC_LINUX_ELF
784		print_Phdr(&ph[i]);
785#endif
786
787		switch (ph[i].p_type) {
788		case Elf32_pt_load:
789			linux_elf_load_psection(vcset, nd.ni_vp, &ph[i], &addr,
790						&size, &prot);
791			/* Assume that the text segment is r-x only */
792			if ((prot & PROT_WRITE) == 0) {
793				*entry = addr + eh.e_entry;
794				ap->arg_interp = addr;
795				DPRINTF(("Interpreter@ 0x%x\n", addr));
796			}
797			addr += size;
798			break;
799
800		case Elf32_pt_dynamic:
801		case Elf32_pt_phdr:
802		case Elf32_pt_note:
803			break;
804
805		default:
806			DPRINTF(("interp: Unexpected program header type %d\n",
807				 ph[i].p_type));
808			break;
809		}
810	}
811
812bad:
813	if (ph != NULL)
814		free((char *) ph, M_TEMP);
815	if (bp != NULL)
816		free((char *) bp, M_TEMP);
817
818	*last = addr;
819	vrele(nd.ni_vp);
820	return error;
821}
822
823
824/*
825 * exec_linux_elf_makecmds(): Prepare an Elf binary's exec package
826 *
827 * First, set of the various offsets/lengths in the exec package.
828 *
829 * Then, mark the text image busy (so it can be demand paged) or error
830 * out if this is not possible.  Finally, set up vmcmds for the
831 * text, data, bss, and stack segments.
832 */
833int
834exec_linux_elf_makecmds(p, epp)
835	struct proc		*p;
836	struct exec_package	*epp;
837{
838	Elf32_Ehdr     *eh = epp->ep_hdr;
839	Elf32_Phdr     *ph, *pp;
840	int             error;
841	int             i;
842	char            interp[MAXPATHLEN];
843	u_long          pos = 0;
844	u_long          phsize;
845
846#ifdef DEBUG_EXEC_LINUX_ELF
847	print_Ehdr(eh);
848#endif
849	if (epp->ep_hdrvalid < sizeof(Elf32_Ehdr))
850		return ENOEXEC;
851
852	if (linux_elf_check_header(eh, Elf32_et_exec))
853		return ENOEXEC;
854
855	/*
856         * check if vnode is in open for writing, because we want to
857         * demand-page out of it.  if it is, don't do it, for various
858         * reasons
859         */
860	if (epp->ep_vp->v_writecount != 0) {
861#ifdef DIAGNOSTIC
862		if (epp->ep_vp->v_flag & VTEXT)
863			panic("exec: a VTEXT vnode has writecount != 0\n");
864#endif
865		return ETXTBSY;
866	}
867	/*
868         * Allocate space to hold all the program headers, and read them
869         * from the file
870         */
871	phsize = eh->e_phnum * sizeof(Elf32_Phdr);
872	ph = (Elf32_Phdr *) malloc(phsize, M_TEMP, M_WAITOK);
873
874	if ((error = linux_elf_read_from(epp->ep_vp, eh->e_phoff, p,
875				    (caddr_t) ph, phsize)) != 0)
876		goto bad;
877
878	epp->ep_tsize = ~0;
879	epp->ep_dsize = ~0;
880
881	interp[0] = '\0';
882
883	/*
884         * Load all the necessary sections
885         */
886	for (i = 0; i < eh->e_phnum; i++) {
887		u_long          addr = ~0, size = 0;
888		int             prot = 0;
889
890		pp = &ph[i];
891#ifdef DEBUG_EXEC_LINUX_ELF
892		print_Phdr(pp);
893#endif
894
895		switch (ph[i].p_type) {
896		case Elf32_pt_load:
897			linux_elf_load_psection(&epp->ep_vmcmds, epp->ep_vp,
898				&ph[i], &addr, &size, &prot);
899			if ((error = linux_elf_set_segment(epp, addr, size,
900						      prot)) != 0)
901				goto bad;
902			break;
903
904		case Elf32_pt_shlib:
905			DPRINTF(("No support for COFF libraries (yet)\n"));
906			error = ENOEXEC;
907			goto bad;
908
909		case Elf32_pt_interp:
910			if (pp->p_filesz >= sizeof(interp)) {
911				DPRINTF(("Interpreter path too long %d\n",
912					 pp->p_filesz));
913				goto bad;
914			}
915			if ((error = linux_elf_read_from(epp->ep_vp, pp->p_offset, p,
916				      (caddr_t) interp, pp->p_filesz)) != 0)
917				goto bad;
918			break;
919
920		case Elf32_pt_dynamic:
921		case Elf32_pt_phdr:
922		case Elf32_pt_note:
923			break;
924
925		default:
926			/*
927			 * Not fatal, we don't need to understand everything
928			 * :-)
929			 */
930			DPRINTF(("Unsupported program header type %d\n",
931				 pp->p_type));
932			break;
933		}
934	}
935
936	/*
937         * Check if we found a dynamically linked binary and arrange to load
938         * it's interpreter
939         */
940	if (interp[0]) {
941		struct elf_args *ap;
942		pos = ~0;
943
944		ap = (struct elf_args *) malloc(sizeof(struct elf_args),
945						 M_TEMP, M_WAITOK);
946		if ((error = linux_elf_load_file(p, interp, &epp->ep_vmcmds,
947				&epp->ep_entry, ap, &pos)) != 0) {
948			free((char *) ap, M_TEMP);
949			goto bad;
950		}
951		/* Arrange to load the program headers. */
952		pos = LINUX_ELF_ALIGN(pos + NBPG, NBPG);
953		DPRINTF(("Program header @0x%x\n", pos));
954		ap->arg_phaddr = pos;
955		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, phsize,
956			  pos, epp->ep_vp, eh->e_phoff,
957			  VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE);
958		pos += phsize;
959
960		ap->arg_phentsize = eh->e_phentsize;
961		ap->arg_phnum = eh->e_phnum;
962		ap->arg_entry = eh->e_entry;
963
964		epp->ep_emul_arg = ap;
965	} else
966		epp->ep_entry = eh->e_entry;
967
968	DPRINTF(("taddr 0x%x tsize 0x%x daddr 0x%x dsize 0x%x\n",
969	       epp->ep_taddr, epp->ep_tsize, epp->ep_daddr, epp->ep_dsize));
970
971	free((char *) ph, M_TEMP);
972
973	DPRINTF(("Elf entry@ 0x%x\n", epp->ep_entry));
974	epp->ep_vp->v_flag |= VTEXT;
975
976	epp->ep_emul = &emul_linux_elf;
977
978	return exec_aout_setup_stack(p, epp);
979
980bad:
981	free((char *) ph, M_TEMP);
982	kill_vmcmds(&epp->ep_vmcmds);
983	return ENOEXEC;
984}
985/*
986 * The Linux system call to load shared libraries, a.out version. The
987 * a.out shared libs are just files that are mapped onto a fixed
988 * address in the process' address space. The address is given in
989 * a_entry. Read in the header, set up some VM commands and run them.
990 *
991 * Yes, both text and data are mapped at once, so we're left with
992 * writeable text for the shared libs. The Linux crt0 seemed to break
993 * sometimes when data was mapped seperately. It munmapped a uselib()
994 * of ld.so by hand, which failed with shared text and data for ld.so
995 * Yuck.
996 *
997 * Because of the problem with ZMAGIC executables (text starts
998 * at 0x400 in the file, but needs to be mapped at 0), ZMAGIC
999 * shared libs are not handled very efficiently :-(
1000 */
1001
1002int
1003linux_uselib(p, uap, retval)
1004	struct proc *p;
1005	struct linux_uselib_args /* {
1006		syscallarg(char *) path;
1007	} */ *uap;
1008	register_t *retval;
1009{
1010	caddr_t sg;
1011	long bsize, dsize, tsize, taddr, baddr, daddr;
1012	struct nameidata ni;
1013	struct vnode *vp;
1014	struct exec hdr;
1015	struct exec_vmcmd_set vcset;
1016	int rem, i, magic, error;
1017
1018	sg = stackgap_init();
1019	CHECK_ALT_EXIST(p, &sg, SCARG(uap, path));
1020
1021	NDINIT(&ni, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1022
1023	if ((error = namei(&ni)))
1024		return error;
1025
1026	vp = ni.ni_vp;
1027
1028	if ((error = vn_rdwr(UIO_READ, vp, (caddr_t) &hdr, LINUX_AOUT_HDR_SIZE,
1029			     0, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred,
1030			     &rem, p))) {
1031		vrele(vp);
1032		return error;
1033	}
1034
1035	if (rem != 0) {
1036		vrele(vp);
1037		return ENOEXEC;
1038	}
1039
1040	if (LINUX_N_MACHTYPE(&hdr) != LINUX_MID_MACHINE)
1041		return ENOEXEC;
1042
1043	magic = LINUX_N_MAGIC(&hdr);
1044	taddr = hdr.a_entry & (~(NBPG - 1));
1045	tsize = hdr.a_text;
1046	daddr = taddr + tsize;
1047	dsize = hdr.a_data + hdr.a_bss;
1048
1049	if ((hdr.a_text != 0 || hdr.a_data != 0) && vp->v_writecount != 0) {
1050		vrele(vp);
1051                return ETXTBSY;
1052        }
1053	vp->v_flag |= VTEXT;
1054
1055	vcset.evs_cnt = 0;
1056	vcset.evs_used = 0;
1057
1058	NEW_VMCMD(&vcset,
1059		  magic == ZMAGIC ? vmcmd_map_readvn : vmcmd_map_pagedvn,
1060		  hdr.a_text + hdr.a_data, taddr,
1061		  vp, LINUX_N_TXTOFF(hdr, magic),
1062		  VM_PROT_READ|VM_PROT_EXECUTE|VM_PROT_WRITE);
1063
1064	baddr = roundup(daddr + hdr.a_data, NBPG);
1065	bsize = daddr + dsize - baddr;
1066        if (bsize > 0) {
1067                NEW_VMCMD(&vcset, vmcmd_map_zero, bsize, baddr,
1068                    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
1069	}
1070
1071	for (i = 0; i < vcset.evs_used && !error; i++) {
1072		struct exec_vmcmd *vcp;
1073
1074		vcp = &vcset.evs_cmds[i];
1075		error = (*vcp->ev_proc)(p, vcp);
1076	}
1077
1078	kill_vmcmds(&vcset);
1079
1080	vrele(vp);
1081
1082	return error;
1083}
1084
1085/*
1086 * Execve(2). Just check the alternate emulation path, and pass it on
1087 * to the NetBSD execve().
1088 */
1089int
1090linux_execve(p, uap, retval)
1091	struct proc *p;
1092	struct linux_execve_args /* {
1093		syscallarg(char *) path;
1094		syscallarg(char **) argv;
1095		syscallarg(char **) envp;
1096	} */ *uap;
1097	register_t *retval;
1098{
1099	caddr_t sg;
1100
1101	sg = stackgap_init();
1102	CHECK_ALT_EXIST(p, &sg, SCARG(uap, path));
1103
1104	return execve(p, uap, retval);
1105}
1106