linux_emul.c revision 178976
1/*-
2 * Copyright (c) 2006 Roman Divacky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/compat/linux/linux_emul.c 178976 2008-05-13 20:01:27Z rdivacky $");
31
32#include "opt_compat.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/imgact.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/mutex.h>
41#include <sys/sx.h>
42#include <sys/proc.h>
43#include <sys/syscallsubr.h>
44#include <sys/sysproto.h>
45#include <sys/unistd.h>
46
47#ifdef COMPAT_LINUX32
48#include <machine/../linux32/linux.h>
49#include <machine/../linux32/linux32_proto.h>
50#else
51#include <machine/../linux/linux.h>
52#include <machine/../linux/linux_proto.h>
53#endif
54
55#include <compat/linux/linux_emul.h>
56#include <compat/linux/linux_futex.h>
57
58struct sx	emul_shared_lock;
59struct mtx	emul_lock;
60
61/* this returns locked reference to the emuldata entry (if found) */
62struct linux_emuldata *
63em_find(struct proc *p, int locked)
64{
65	struct linux_emuldata *em;
66
67	if (locked == EMUL_DOLOCK)
68		EMUL_LOCK(&emul_lock);
69
70	em = p->p_emuldata;
71
72	if (em == NULL && locked == EMUL_DOLOCK)
73		EMUL_UNLOCK(&emul_lock);
74
75	return (em);
76}
77
78int
79linux_proc_init(struct thread *td, pid_t child, int flags)
80{
81	struct linux_emuldata *em, *p_em;
82	struct proc *p;
83
84	if (child != 0) {
85		/* non-exec call */
86		em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO);
87		em->pid = child;
88		em->pdeath_signal = 0;
89		em->robust_futexes = NULL;
90		if (flags & LINUX_CLONE_THREAD) {
91			/* handled later in the code */
92		} else {
93			struct linux_emuldata_shared *s;
94
95			s = malloc(sizeof *s, M_LINUX, M_WAITOK | M_ZERO);
96			s->refs = 1;
97			s->group_pid = child;
98
99			LIST_INIT(&s->threads);
100			em->shared = s;
101		}
102	} else {
103		/* lookup the old one */
104		em = em_find(td->td_proc, EMUL_DOLOCK);
105		KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n"));
106	}
107
108	em->child_clear_tid = NULL;
109	em->child_set_tid = NULL;
110
111	/*
112	 * allocate the shared struct only in clone()/fork cases in the case
113	 * of clone() td = calling proc and child = pid of the newly created
114	 * proc
115	 */
116	if (child != 0) {
117		if (flags & LINUX_CLONE_THREAD) {
118			/* lookup the parent */
119			/*
120			 * we dont have to lock the p_em because
121			 * its waiting for us in linux_clone so
122			 * there is no chance of it changing the
123			 * p_em->shared address
124			 */
125			p_em = em_find(td->td_proc, EMUL_DONTLOCK);
126			KASSERT(p_em != NULL, ("proc_init: parent emuldata not found for CLONE_THREAD\n"));
127			em->shared = p_em->shared;
128			EMUL_SHARED_WLOCK(&emul_shared_lock);
129			em->shared->refs++;
130			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
131		} else {
132			/*
133			 * handled earlier to avoid malloc(M_WAITOK) with
134			 * rwlock held
135			 */
136		}
137	}
138	if (child != 0) {
139		EMUL_SHARED_WLOCK(&emul_shared_lock);
140		LIST_INSERT_HEAD(&em->shared->threads, em, threads);
141		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
142
143		p = pfind(child);
144		KASSERT(p != NULL, ("process not found in proc_init\n"));
145		p->p_emuldata = em;
146		PROC_UNLOCK(p);
147	} else
148		EMUL_UNLOCK(&emul_lock);
149
150	return (0);
151}
152
153void
154linux_proc_exit(void *arg __unused, struct proc *p)
155{
156	struct linux_emuldata *em;
157	int error;
158	struct thread *td = FIRST_THREAD_IN_PROC(p);
159	int *child_clear_tid;
160	struct proc *q, *nq;
161
162	if (__predict_true(p->p_sysent != &elf_linux_sysvec))
163		return;
164
165	release_futexes(p);
166
167	/* find the emuldata */
168	em = em_find(p, EMUL_DOLOCK);
169
170	KASSERT(em != NULL, ("proc_exit: emuldata not found.\n"));
171
172	/* reparent all procs that are not a thread leader to initproc */
173	if (em->shared->group_pid != p->p_pid) {
174		child_clear_tid = em->child_clear_tid;
175		EMUL_UNLOCK(&emul_lock);
176		sx_xlock(&proctree_lock);
177		wakeup(initproc);
178		PROC_LOCK(p);
179		proc_reparent(p, initproc);
180		p->p_sigparent = SIGCHLD;
181		PROC_UNLOCK(p);
182		sx_xunlock(&proctree_lock);
183	} else {
184		child_clear_tid = em->child_clear_tid;
185		EMUL_UNLOCK(&emul_lock);
186	}
187
188	EMUL_SHARED_WLOCK(&emul_shared_lock);
189	LIST_REMOVE(em, threads);
190
191	em->shared->refs--;
192	if (em->shared->refs == 0) {
193		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
194		free(em->shared, M_LINUX);
195	} else
196		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
197
198	if (child_clear_tid != NULL) {
199		struct linux_sys_futex_args cup;
200		int null = 0;
201
202		error = copyout(&null, child_clear_tid, sizeof(null));
203		if (error) {
204			free(em, M_LINUX);
205			return;
206		}
207
208		/* futexes stuff */
209		cup.uaddr = child_clear_tid;
210		cup.op = LINUX_FUTEX_WAKE;
211		cup.val = 0x7fffffff;	/* Awake everyone */
212		cup.timeout = NULL;
213		cup.uaddr2 = NULL;
214		cup.val3 = 0;
215		error = linux_sys_futex(FIRST_THREAD_IN_PROC(p), &cup);
216		/*
217		 * this cannot happen at the moment and if this happens it
218		 * probably means there is a user space bug
219		 */
220		if (error)
221			printf(LMSG("futex stuff in proc_exit failed.\n"));
222	}
223
224	/* clean the stuff up */
225	free(em, M_LINUX);
226
227	/* this is a little weird but rewritten from exit1() */
228	sx_xlock(&proctree_lock);
229	q = LIST_FIRST(&p->p_children);
230	for (; q != NULL; q = nq) {
231		nq = LIST_NEXT(q, p_sibling);
232		if (q->p_flag & P_WEXIT)
233			continue;
234		if (__predict_false(q->p_sysent != &elf_linux_sysvec))
235			continue;
236		em = em_find(q, EMUL_DOLOCK);
237		KASSERT(em != NULL, ("linux_reparent: emuldata not found: %i\n", q->p_pid));
238		if (em->pdeath_signal != 0) {
239			PROC_LOCK(q);
240			psignal(q, em->pdeath_signal);
241			PROC_UNLOCK(q);
242		}
243		EMUL_UNLOCK(&emul_lock);
244	}
245	sx_xunlock(&proctree_lock);
246}
247
248/*
249 * This is used in a case of transition from FreeBSD binary execing to linux binary
250 * in this case we create linux emuldata proc entry with the pid of the currently running
251 * process.
252 */
253void
254linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
255{
256	if (__predict_false(imgp->sysent == &elf_linux_sysvec
257	    && p->p_sysent != &elf_linux_sysvec))
258		linux_proc_init(FIRST_THREAD_IN_PROC(p), p->p_pid, 0);
259	if (__predict_false(imgp->sysent != &elf_linux_sysvec
260	    && p->p_sysent == &elf_linux_sysvec)) {
261		struct linux_emuldata *em;
262
263		/*
264		 * XXX:There's a race because here we assign p->p_emuldata NULL
265		 * but the process is still counted as linux one for a short
266 		 * time so some other process might reference it and try to
267 		 * access its p->p_emuldata and panicing on a NULL reference.
268		 */
269		em = em_find(p, EMUL_DONTLOCK);
270
271		KASSERT(em != NULL, ("proc_exec: emuldata not found.\n"));
272
273		EMUL_SHARED_WLOCK(&emul_shared_lock);
274		LIST_REMOVE(em, threads);
275
276		PROC_LOCK(p);
277		p->p_emuldata = NULL;
278		PROC_UNLOCK(p);
279
280		em->shared->refs--;
281		if (em->shared->refs == 0) {
282			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
283			free(em->shared, M_LINUX);
284		} else
285			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
286
287		free(em, M_LINUX);
288	}
289}
290
291void
292linux_schedtail(void *arg __unused, struct proc *p)
293{
294	struct linux_emuldata *em;
295	int error = 0;
296	int *child_set_tid;
297
298	if (__predict_true(p->p_sysent != &elf_linux_sysvec))
299		return;
300
301	/* find the emuldata */
302	em = em_find(p, EMUL_DOLOCK);
303
304	KASSERT(em != NULL, ("linux_schedtail: emuldata not found.\n"));
305	child_set_tid = em->child_set_tid;
306	EMUL_UNLOCK(&emul_lock);
307
308	if (child_set_tid != NULL)
309		error = copyout(&p->p_pid, (int *)child_set_tid,
310		    sizeof(p->p_pid));
311
312	return;
313}
314
315int
316linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args)
317{
318	struct linux_emuldata *em;
319
320#ifdef DEBUG
321	if (ldebug(set_tid_address))
322		printf(ARGS(set_tid_address, "%p"), args->tidptr);
323#endif
324
325	/* find the emuldata */
326	em = em_find(td->td_proc, EMUL_DOLOCK);
327
328	KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n"));
329
330	em->child_clear_tid = args->tidptr;
331	td->td_retval[0] = td->td_proc->p_pid;
332
333	EMUL_UNLOCK(&emul_lock);
334	return 0;
335}
336