linux_exec.c revision 1.99
11844Swollman/*	$NetBSD: linux_exec.c,v 1.99 2007/12/04 18:40:15 dsl Exp $	*/
250476Speter
31638Srgrimes/*-
494940Sru * Copyright (c) 1994, 1995, 1998, 2000, 2007 The NetBSD Foundation, Inc.
51638Srgrimes * All rights reserved.
6103713Smarkm *
71638Srgrimes * This code is derived from software contributed to The NetBSD Foundation
8119607Sru * by Christos Zoulas, Frank van der Linden, Eric Haszlakiewicz and
9119607Sru * Thor Lancelot Simon.
10119607Sru *
11119607Sru * Redistribution and use in source and binary forms, with or without
12119607Sru * modification, are permitted provided that the following conditions
13119607Sru * are met:
14119607Sru * 1. Redistributions of source code must retain the above copyright
15119607Sru *    notice, this list of conditions and the following disclaimer.
16119607Sru * 2. Redistributions in binary form must reproduce the above copyright
17117034Sgordon *    notice, this list of conditions and the following disclaimer in the
18119607Sru *    documentation and/or other materials provided with the distribution.
191638Srgrimes * 3. All advertising materials mentioning features or use of this software
202827Sjkh *    must display the following acknowledgement:
211638Srgrimes *        This product includes software developed by the NetBSD
222827Sjkh *        Foundation, Inc. and its contributors.
231638Srgrimes * 4. Neither the name of The NetBSD Foundation nor the names of its
2427028Spst *    contributors may be used to endorse or promote products derived
251844Swollman *    from this software without specific prior written permission.
261844Swollman *
271638Srgrimes * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
2894424Sru * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
2994424Sru * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
3094424Sru * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
3194424Sru * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
321638Srgrimes * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
331638Srgrimes * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
341638Srgrimes * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
3536054Sbde * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
3636054Sbde * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3736054Sbde * POSSIBILITY OF SUCH DAMAGE.
3836054Sbde */
3936054Sbde
4036054Sbde#include <sys/cdefs.h>
411844Swollman__KERNEL_RCSID(0, "$NetBSD: linux_exec.c,v 1.99 2007/12/04 18:40:15 dsl Exp $");
421638Srgrimes
4394518Sru#include <sys/param.h>
4494518Sru#include <sys/systm.h>
4594518Sru#include <sys/kernel.h>
4694518Sru#include <sys/proc.h>
4794518Sru#include <sys/malloc.h>
4894518Sru#include <sys/namei.h>
4994518Sru#include <sys/vnode.h>
508321Sbde#include <sys/mount.h>
511844Swollman#include <sys/exec.h>
5294518Sru#include <sys/exec_elf.h>
5394424Sru
5494424Sru#include <sys/mman.h>
5594424Sru#include <sys/syscallargs.h>
562351Sbde
5794424Sru#include <sys/ptrace.h>	/* For proc_reparent() */
581638Srgrimes
592351Sbde#include <uvm/uvm_extern.h>
602351Sbde
612351Sbde#include <sys/cpu.h>
622351Sbde#include <machine/reg.h>
632351Sbde
642351Sbde#include <compat/linux/common/linux_types.h>
6533624Seivind#include <compat/linux/common/linux_signal.h>
6634081Sbde#include <compat/linux/common/linux_util.h>
6794497Sru#include <compat/linux/common/linux_sched.h>
6894497Sru#include <compat/linux/common/linux_machdep.h>
6994497Sru#include <compat/linux/common/linux_exec.h>
7094410Sru#include <compat/linux/common/linux_futex.h>
7133624Seivind#include <compat/linux/common/linux_ipc.h>
7294518Sru#include <compat/linux/common/linux_sem.h>
731638Srgrimes
741638Srgrimes#include <compat/linux/linux_syscallargs.h>
751638Srgrimes#include <compat/linux/linux_syscall.h>
7674806Sru#include <compat/linux/common/linux_misc.h>
7774806Sru#include <compat/linux/common/linux_errno.h>
781638Srgrimes#include <compat/linux/common/linux_emuldata.h>
7958493Sru
8074806Sruextern struct sysent linux_sysent[];
8174941Sruextern const char * const linux_syscallnames[];
8274941Sruextern char linux_sigcode[], linux_esigcode[];
831638Srgrimes
841638Srgrimesstatic void linux_e_proc_exec(struct proc *, struct exec_package *);
851638Srgrimesstatic void linux_e_proc_fork(struct proc *, struct proc *, int);
8697769Srustatic void linux_e_proc_exit(struct proc *);
8774806Srustatic void linux_e_proc_init(struct proc *, struct proc *, int);
8896164Sru
8974806Sru#ifdef LINUX_NPTL
901638Srgrimesvoid linux_userret(void);
91119607Sru#endif
92119607Sru
93119607Sru/*
941638Srgrimes * Emulation switch.
95119607Sru */
96119607Sru
97119607Srustruct uvm_object *emul_linux_object;
98119607Sru
9955670Sbdeconst struct emul emul_linux = {
10024750Sbde	"linux",
10146541Sbde	"/emul/linux",
10294497Sru#ifndef __HAVE_MINIMAL_EMUL
10394497Sru	0,
10424750Sbde	(const int *)native_to_linux_errno,
10528945Speter	LINUX_SYS_syscall,
10624750Sbde	LINUX_SYS_NSYSENT,
1071638Srgrimes#endif
1081638Srgrimes	linux_sysent,
10949328Shoek	linux_syscallnames,
11049328Shoek	linux_sendsig,
11149328Shoek	linux_trapsignal,
11249328Shoek	NULL,
11349328Shoek	linux_sigcode,
11495734Sru	linux_esigcode,
11596163Sru	&emul_linux_object,
11699343Sru	linux_setregs,
11796163Sru	linux_e_proc_exec,
1181638Srgrimes	linux_e_proc_fork,
11975083Sru	linux_e_proc_exit,
120100872Sru	NULL,
12175083Sru	NULL,
12275083Sru#ifdef __HAVE_SYSCALL_INTERN
123100872Sru	linux_syscall_intern,
12449328Shoek#else
1251638Srgrimes#error Implement __HAVE_SYSCALL_INTERN for this platform
12675083Sru#endif
12795734Sru	NULL,
1281638Srgrimes	NULL,
12975284Sru
13075284Sru	uvm_default_mapaddr,
13199343Sru
13275284Sru	linux_usertrap,
13375284Sru	0,
13475284Sru	NULL,		/* e_startlwp */
13575284Sru};
13675284Sru
13775284Srustatic void
13875284Srulinux_e_proc_init(p, parent, forkflags)
13975284Sru	struct proc *p, *parent;
14075284Sru	int forkflags;
14175284Sru{
14275284Sru	struct linux_emuldata *e = p->p_emuldata;
14375284Sru	struct linux_emuldata_shared *s;
14475284Sru	struct linux_emuldata *ep = NULL;
14575284Sru
14675284Sru	if (!e) {
14775284Sru		/* allocate new Linux emuldata */
14888055Sru		MALLOC(e, void *, sizeof(struct linux_emuldata),
14988055Sru			M_EMULDATA, M_WAITOK);
150100872Sru	} else  {
15175284Sru		e->s->refs--;
15294954Sru		if (e->s->refs == 0)
15375284Sru			FREE(e->s, M_EMULDATA);
15475284Sru	}
15575284Sru
15675284Sru	memset(e, '\0', sizeof(struct linux_emuldata));
15799257Sru
15899257Sru	e->proc = p;
15999257Sru
16097769Sru	if (parent)
16196668Sru		ep = parent->p_emuldata;
16299256Sru
16396462Sru	if (forkflags & FORK_SHAREVM) {
16496163Sru#ifdef DIAGNOSTIC
16596164Sru		if (ep == NULL) {
16699343Sru			killproc(p, "FORK_SHAREVM while emuldata is NULL\n");
16796163Sru			FREE(e, M_EMULDATA);
16896163Sru			return;
1691844Swollman		}
1701638Srgrimes#endif
1711638Srgrimes		s = ep->s;
172103713Smarkm		s->refs++;
1731638Srgrimes	} else {
174103713Smarkm		struct vmspace *vm;
1751638Srgrimes
1761638Srgrimes		MALLOC(s, void *, sizeof(struct linux_emuldata_shared),
1771638Srgrimes			M_EMULDATA, M_WAITOK);
1781638Srgrimes		s->refs = 1;
1791638Srgrimes
18074842Sru		/*
1811844Swollman		 * Set the process idea of the break to the real value.
18228945Speter		 * For fork, we use parent's vmspace since our's
18328945Speter		 * is not setup at the time of this call and is going
1841844Swollman		 * to be copy of parent's anyway. For exec, just
18534081Sbde		 * use our own vmspace.
18694113Sru		 */
18734087Sbde		vm = (parent) ? parent->p_vmspace : p->p_vmspace;
18834081Sbde		s->p_break = (char *)vm->vm_daddr + ctob(vm->vm_dsize);
18934081Sbde
19016663Sjkh		/*
19176861Skris		 * Linux threads are emulated as NetBSD processes (not lwp)
19276861Skris		 * We use native PID for Linux TID. The Linux TID is the
193		 * PID of the first process in the group. It is stored
194		 * here
195		 */
196		s->group_pid = p->p_pid;
197
198		/*
199		 * Initialize the list of threads in the group
200		 */
201		LIST_INIT(&s->threads);
202
203		s->xstat = 0;
204		s->flags = 0;
205	}
206
207	e->s = s;
208
209	/*
210	 * Add this thread in the group thread list
211	 */
212	LIST_INSERT_HEAD(&s->threads, e, threads);
213
214#ifdef LINUX_NPTL
215	if (ep != NULL) {
216		e->parent_tidptr = ep->parent_tidptr;
217		e->child_tidptr = ep->child_tidptr;
218		e->clone_flags = ep->clone_flags;
219	}
220#endif /* LINUX_NPTL */
221
222	p->p_emuldata = e;
223}
224
225/*
226 * Allocate new per-process structures. Called when executing Linux
227 * process. We can reuse the old emuldata - if it's not null,
228 * the executed process is of same emulation as original forked one.
229 */
230static void
231linux_e_proc_exec(struct proc *p, struct exec_package *epp)
232{
233	/* exec, use our vmspace */
234	linux_e_proc_init(p, NULL, 0);
235}
236
237/*
238 * Emulation per-process exit hook.
239 */
240static void
241linux_e_proc_exit(p)
242	struct proc *p;
243{
244	struct linux_emuldata *e = p->p_emuldata;
245
246#ifdef LINUX_NPTL
247	linux_nptl_proc_exit(p);
248#endif
249	/* Remove the thread for the group thread list */
250	LIST_REMOVE(e, threads);
251
252	/* free Linux emuldata and set the pointer to null */
253	e->s->refs--;
254	if (e->s->refs == 0)
255		FREE(e->s, M_EMULDATA);
256	FREE(e, M_EMULDATA);
257	p->p_emuldata = NULL;
258}
259
260/*
261 * Emulation fork hook.
262 */
263static void
264linux_e_proc_fork(p, parent, forkflags)
265	struct proc *p, *parent;
266	int forkflags;
267{
268	/*
269	 * The new process might share some vmspace-related stuff
270	 * with parent, depending on fork flags (CLONE_VM et.al).
271	 * Force allocation of new base emuldata, and share the
272	 * VM-related parts only if necessary.
273	 */
274	p->p_emuldata = NULL;
275	linux_e_proc_init(p, parent, forkflags);
276
277#ifdef LINUX_NPTL
278	linux_nptl_proc_fork(p, parent, linux_userret);
279#endif
280
281	return;
282}
283
284#ifdef LINUX_NPTL
285void
286linux_userret(void)
287{
288	struct lwp *l = curlwp;
289	struct proc *p = l->l_proc;
290	struct linux_emuldata *led = p->p_emuldata;
291	int error;
292
293	/* LINUX_CLONE_CHILD_SETTID: copy child's TID to child's memory  */
294	if (led->clone_flags & LINUX_CLONE_CHILD_SETTID) {
295		if ((error = copyout(&l->l_proc->p_pid,
296		    led->child_tidptr,  sizeof(l->l_proc->p_pid))) != 0)
297			printf("linux_userret: LINUX_CLONE_CHILD_SETTID "
298			    "failed (led->child_tidptr = %p, p->p_pid = %d)\n",
299			    led->child_tidptr, p->p_pid);
300	}
301
302	/* LINUX_CLONE_SETTLS: allocate a new TLS */
303	if (led->clone_flags & LINUX_CLONE_SETTLS) {
304		if (linux_set_newtls(l, linux_get_newtls(l)) != 0)
305			printf("linux_userret: linux_set_tls failed");
306	}
307
308	return;
309}
310
311void
312linux_nptl_proc_exit(p)
313	struct proc *p;
314{
315	struct linux_emuldata *e = p->p_emuldata;
316
317	mutex_enter(&proclist_lock);
318
319	/*
320	 * Check if we are a thread group leader victim of another
321	 * thread doing exit_group(). If we are, change the exit code.
322	 */
323	if ((e->s->group_pid == p->p_pid) &&
324	    (e->s->flags & LINUX_LES_INEXITGROUP)) {
325		p->p_xstat = e->s->xstat;
326	}
327
328	/*
329	 * Members of the thread groups others than the leader should
330	 * exit quietely: no zombie stage, no signal. We do that by
331	 * reparenting to init. init will collect us and nobody will
332	 * notice what happened.
333	 */
334#ifdef DEBUG_LINUX
335	printf("%s:%d e->s->group_pid = %d, p->p_pid = %d, flags = 0x%x\n",
336	    __func__, __LINE__, e->s->group_pid, p->p_pid, e->s->flags);
337#endif
338	if (e->s->group_pid != p->p_pid) {
339		proc_reparent(p, initproc);
340		cv_broadcast(&initproc->p_waitcv);
341	}
342
343	mutex_exit(&proclist_lock);
344
345	/* Emulate LINUX_CLONE_CHILD_CLEARTID */
346	if (e->clear_tid != NULL) {
347		int error;
348		int null = 0;
349		struct linux_sys_futex_args cup;
350		register_t retval;
351
352		error = copyout(&null, e->clear_tid, sizeof(null));
353#ifdef DEBUG_LINUX
354		if (error != 0)
355			printf("%s: cannot clear TID\n", __func__);
356#endif
357
358		SCARG(&cup, uaddr) = e->clear_tid;
359		SCARG(&cup, op) = LINUX_FUTEX_WAKE;
360		SCARG(&cup, val) = 0x7fffffff; /* Awake everyone */
361		SCARG(&cup, timeout) = NULL;
362		SCARG(&cup, uaddr2) = NULL;
363		SCARG(&cup, val3) = 0;
364		if ((error = linux_sys_futex(curlwp, &cup, &retval)) != 0)
365			printf("%s: linux_sys_futex failed\n", __func__);
366	}
367
368	return;
369}
370
371void
372linux_nptl_proc_fork(p, parent, luserret)
373	struct proc *p;
374	struct proc *parent;
375	void (*luserret)(void);
376{
377#ifdef LINUX_NPTL
378	struct linux_emuldata *e;
379#endif
380
381	e = p->p_emuldata;
382
383	/* LINUX_CLONE_CHILD_CLEARTID: clear TID in child's memory on exit() */
384	if (e->clone_flags & LINUX_CLONE_CHILD_CLEARTID)
385		e->clear_tid = e->child_tidptr;
386
387	/* LINUX_CLONE_PARENT_SETTID: set child's TID in parent's memory */
388	if (e->clone_flags & LINUX_CLONE_PARENT_SETTID) {
389		if (copyout_proc(parent, &p->p_pid,
390		    e->parent_tidptr,  sizeof(p->p_pid)) != 0)
391			printf("%s: LINUX_CLONE_PARENT_SETTID "
392			    "failed (e->parent_tidptr = %p, "
393			    "parent->p_pid = %d, p->p_pid = %d)\n",
394			    __func__, e->parent_tidptr,
395			    parent->p_pid, p->p_pid);
396	}
397
398	/*
399	 * CLONE_CHILD_SETTID and LINUX_CLONE_SETTLS require child's VM
400	 * setup to be completed, we postpone them until userret time.
401	 */
402	if (e->clone_flags &
403	    (LINUX_CLONE_CHILD_CLEARTID | LINUX_CLONE_SETTLS))
404		p->p_userret = luserret;
405
406	return;
407}
408
409void
410linux_nptl_proc_init(p, parent)
411	struct proc *p;
412	struct proc *parent;
413{
414	struct linux_emuldata *e = p->p_emuldata;
415	struct linux_emuldata *ep;
416
417	if ((parent != NULL) && (parent->p_emuldata != NULL)) {
418		ep = parent->p_emuldata;
419
420		e->parent_tidptr = ep->parent_tidptr;
421		e->child_tidptr = ep->child_tidptr;
422		e->clone_flags = ep->clone_flags;
423	}
424
425	return;
426}
427
428
429#endif /* LINUX_NPTL */
430