linux_exec.c revision 1.1
1/*	$NetBSD: linux_exec.c,v 1.1 1995/02/28 23:24:46 fvdl Exp $	*/
2
3/*
4 * Copyright (c) 1995 Frank van der Linden
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *      This product includes software developed for the NetBSD Project
18 *      by Frank van der Linden
19 * 4. The name of the author may not be used to endorse or promote products
20 *    derived from this software without specific prior written permission
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 * based on kern/exec_aout.c and compat/sunos/sunos_exec.c
34 */
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/filedesc.h>
39#include <sys/kernel.h>
40#include <sys/proc.h>
41#include <sys/mount.h>
42#include <sys/malloc.h>
43#include <sys/namei.h>
44#include <sys/vnode.h>
45#include <sys/file.h>
46#include <sys/resourcevar.h>
47#include <sys/wait.h>
48
49#include <sys/mman.h>
50#include <vm/vm.h>
51#include <vm/vm_param.h>
52#include <vm/vm_map.h>
53#include <vm/vm_kern.h>
54#include <vm/vm_pager.h>
55
56#include <machine/cpu.h>
57#include <machine/reg.h>
58#include <machine/exec.h>
59
60#include <compat/linux/linux_types.h>
61#include <compat/linux/linux_syscallargs.h>
62#include <compat/linux/linux_util.h>
63#include <compat/linux/linux_exec.h>
64
65int
66exec_linux_aout_makecmds(p, epp)
67	struct proc *p;
68	struct exec_package *epp;
69{
70	struct exec *linux_ep = epp->ep_hdr;
71	int machtype, magic;
72	int error = ENOEXEC;
73
74	magic = LINUX_N_MAGIC(linux_ep);
75	machtype = LINUX_N_MACHTYPE(linux_ep);
76
77
78	if (machtype != LINUX_MID_MACHINE)
79		return (ENOEXEC);
80
81	switch (magic) {
82	case QMAGIC:
83		error = exec_linux_aout_prep_qmagic(p, epp);
84		break;
85	case ZMAGIC:
86		error = exec_linux_aout_prep_zmagic(p, epp);
87		break;
88	case NMAGIC:
89		error = exec_linux_aout_prep_nmagic(p, epp);
90		break;
91	case OMAGIC:
92		error = exec_linux_aout_prep_omagic(p, epp);
93		break;
94	}
95	if (error == 0)
96		epp->ep_emul = EMUL_LINUX;
97	return error;
98}
99
100/*
101 * Since text starts at 0x400 in Linux ZMAGIC executables, and 0x400
102 * is very likely not page aligned on most architectures, it is treated
103 * as an NMAGIC here. XXX
104 */
105
106int
107exec_linux_aout_prep_zmagic(p, epp)
108	struct proc *p;
109	struct exec_package *epp;
110{
111	struct exec *execp = epp->ep_hdr;
112
113	epp->ep_taddr = LINUX_N_TXTADDR(*execp, ZMAGIC);
114	epp->ep_tsize = execp->a_text;
115	epp->ep_daddr = LINUX_N_DATADDR(*execp, ZMAGIC);
116	epp->ep_dsize = execp->a_data + execp->a_bss;
117	epp->ep_entry = execp->a_entry;
118
119	/* set up command for text segment */
120	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
121	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, ZMAGIC),
122	    VM_PROT_READ|VM_PROT_EXECUTE);
123
124	/* set up command for data segment */
125	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
126	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, ZMAGIC),
127	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
128
129	/* set up command for bss segment */
130	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
131	    epp->ep_daddr + execp->a_data, NULLVP, 0,
132	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
133
134	return exec_aout_setup_stack(p, epp);
135}
136
137/*
138 * exec_aout_prep_nmagic(): Prepare Linux NMAGIC package.
139 * Not different from the normal stuff.
140 */
141
142int
143exec_linux_aout_prep_nmagic(p, epp)
144	struct proc *p;
145	struct exec_package *epp;
146{
147	struct exec *execp = epp->ep_hdr;
148	long bsize, baddr;
149
150	epp->ep_taddr = LINUX_N_TXTADDR(*execp, NMAGIC);
151	epp->ep_tsize = execp->a_text;
152	epp->ep_daddr = LINUX_N_DATADDR(*execp, NMAGIC);
153	epp->ep_dsize = execp->a_data + execp->a_bss;
154	epp->ep_entry = execp->a_entry;
155
156	/* set up command for text segment */
157	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
158	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, NMAGIC),
159	    VM_PROT_READ|VM_PROT_EXECUTE);
160
161	/* set up command for data segment */
162	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
163	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, NMAGIC),
164	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
165
166	/* set up command for bss segment */
167	baddr = roundup(epp->ep_daddr + execp->a_data, NBPG);
168	bsize = epp->ep_daddr + epp->ep_dsize - baddr;
169	if (bsize > 0)
170		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
171		    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
172
173	return exec_aout_setup_stack(p, epp);
174}
175
176/*
177 * exec_aout_prep_omagic(): Prepare Linux OMAGIC package.
178 * Business as usual.
179 */
180
181int
182exec_linux_aout_prep_omagic(p, epp)
183	struct proc *p;
184	struct exec_package *epp;
185{
186	struct exec *execp = epp->ep_hdr;
187	long dsize, bsize, baddr;
188
189	epp->ep_taddr = LINUX_N_TXTADDR(*execp, OMAGIC);
190	epp->ep_tsize = execp->a_text;
191	epp->ep_daddr = LINUX_N_DATADDR(*execp, OMAGIC);
192	epp->ep_dsize = execp->a_data + execp->a_bss;
193	epp->ep_entry = execp->a_entry;
194
195	/* set up command for text and data segments */
196	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn,
197	    execp->a_text + execp->a_data, epp->ep_taddr, epp->ep_vp,
198	    LINUX_N_TXTOFF(*execp, OMAGIC), VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
199
200	/* set up command for bss segment */
201	baddr = roundup(epp->ep_daddr + execp->a_data, NBPG);
202	bsize = epp->ep_daddr + epp->ep_dsize - baddr;
203	if (bsize > 0)
204		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
205		    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
206
207	/*
208	 * Make sure (# of pages) mapped above equals (vm_tsize + vm_dsize);
209	 * obreak(2) relies on this fact. Both `vm_tsize' and `vm_dsize' are
210	 * computed (in execve(2)) by rounding *up* `ep_tsize' and `ep_dsize'
211	 * respectively to page boundaries.
212	 * Compensate `ep_dsize' for the amount of data covered by the last
213	 * text page.
214	 */
215	dsize = epp->ep_dsize + execp->a_text - roundup(execp->a_text, NBPG);
216	epp->ep_dsize = (dsize > 0) ? dsize : 0;
217	return exec_aout_setup_stack(p, epp);
218}
219
220int
221exec_linux_aout_prep_qmagic(p, epp)
222	struct proc *p;
223	struct exec_package *epp;
224{
225	struct exec *execp = epp->ep_hdr;
226
227	epp->ep_taddr = LINUX_N_TXTADDR(*execp, QMAGIC);
228	epp->ep_tsize = execp->a_text;
229	epp->ep_daddr = LINUX_N_DATADDR(*execp, QMAGIC);
230	epp->ep_dsize = execp->a_data + execp->a_bss;
231	epp->ep_entry = execp->a_entry;
232
233	/*
234	 * check if vnode is in open for writing, because we want to
235	 * demand-page out of it.  if it is, don't do it, for various
236	 * reasons
237	 */
238	if ((execp->a_text != 0 || execp->a_data != 0) &&
239	    epp->ep_vp->v_writecount != 0) {
240#ifdef DIAGNOSTIC
241		if (epp->ep_vp->v_flag & VTEXT)
242			panic("exec: a VTEXT vnode has writecount != 0\n");
243#endif
244		return ETXTBSY;
245	}
246	epp->ep_vp->v_flag |= VTEXT;
247
248	/* set up command for text segment */
249	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_text,
250	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, QMAGIC),
251	    VM_PROT_READ|VM_PROT_EXECUTE);
252
253	/* set up command for data segment */
254	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_data,
255	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, QMAGIC),
256	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
257
258	/* set up command for bss segment */
259	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
260	    epp->ep_daddr + execp->a_data, NULLVP, 0,
261	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
262
263	return exec_aout_setup_stack(p, epp);
264}
265
266/*
267 * The Linux system call to load shared libraries. The current shared
268 * libraries are just (QMAGIC) a.out files that are mapped onto a fixed
269 * address * in the process' address space. The address is given in
270 * a_entry. Read in the header, set up some VM commands and run them.
271 *
272 * Yes, both text and data are mapped at once, so we're left with
273 * writeable text for the shared libs. The Linux crt0 seemed to break
274 * sometimes when data was mapped seperately. It munmapped a uselib()
275 * of ld.so by hand, which failed with shared text and data for ld.so
276 * Yuck.
277 *
278 * Because of the problem with ZMAGIC executables (text starts
279 * at 0x400 in the file, but needs t be mapped at 0), ZMAGIC
280 * shared libs are not handled very efficiently :-(
281 */
282
283int
284linux_uselib(p, uap, retval)
285	struct proc *p;
286	struct linux_uselib_args /* {
287		syscallarg(char *) path;
288	} */ *uap;
289	register_t *retval;
290{
291	caddr_t sg;
292	long bsize, dsize, tsize, taddr, baddr, daddr;
293	struct nameidata ni;
294	struct vnode *vp;
295	struct exec hdr;
296	struct exec_vmcmd_set vcset;
297	int rem, i, magic, error;
298
299	sg = stackgap_init();
300	CHECK_ALT(p, &sg, SCARG(uap, path));
301
302	NDINIT(&ni, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
303
304	if ((error = namei(&ni)))
305		return error;
306
307	vp = ni.ni_vp;
308
309	if ((error = vn_rdwr(UIO_READ, vp, (caddr_t) &hdr, LINUX_AOUT_HDR_SIZE,
310			     0, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred,
311			     &rem, p))) {
312		vrele(vp);
313		return error;
314	}
315
316	if (rem != 0) {
317		vrele(vp);
318		return ENOEXEC;
319	}
320
321	magic = LINUX_N_MAGIC(&hdr);
322	taddr = hdr.a_entry & (~(NBPG - 1));
323	tsize = hdr.a_text;
324	daddr = taddr + tsize;
325	dsize = hdr.a_data + hdr.a_bss;
326
327	if ((hdr.a_text != 0 || hdr.a_data != 0) && vp->v_writecount != 0) {
328		vrele(vp);
329                return ETXTBSY;
330        }
331	vp->v_flag |= VTEXT;
332
333	vcset.evs_cnt = 0;
334	vcset.evs_used = 0;
335
336	NEW_VMCMD(&vcset,
337		  magic == ZMAGIC ? vmcmd_map_readvn : vmcmd_map_pagedvn,
338		  hdr.a_text + hdr.a_data, taddr,
339		  vp, LINUX_N_TXTOFF(hdr, magic),
340		  VM_PROT_READ|VM_PROT_EXECUTE|VM_PROT_WRITE);
341
342	baddr = roundup(daddr + hdr.a_data, NBPG);
343	bsize = daddr + dsize - baddr;
344        if (bsize > 0) {
345                NEW_VMCMD(&vcset, vmcmd_map_zero, bsize, baddr,
346                    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
347	}
348
349	for (i = 0; i < vcset.evs_used && !error; i++) {
350		struct exec_vmcmd *vcp;
351
352		vcp = &vcset.evs_cmds[i];
353		error = (*vcp->ev_proc)(p, vcp);
354	}
355
356	kill_vmcmds(&vcset);
357
358	vrele(vp);
359
360	return error;
361}
362
363/*
364 * Execve(2). Just check the alternate emulation path, and pass it on
365 * to the NetBSD execve().
366 */
367int
368linux_execve(p, uap, retval)
369	struct proc *p;
370	struct linux_execve_args /* {
371		syscallarg(char *) path;
372		syscallarg(char **) argv;
373		syscallarg(char **) envp;
374	} */ *uap;
375	register_t *retval;
376{
377	caddr_t sg;
378
379	sg = stackgap_init();
380	CHECK_ALT(p, &sg, SCARG(uap, path));
381
382	return execve(p, uap, retval);
383}
384