1/*-
2 * Copyright (c) 2006 Roman Divacky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include "opt_compat.h"
33#include "opt_kdtrace.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/imgact.h>
38#include <sys/kernel.h>
39#include <sys/lock.h>
40#include <sys/malloc.h>
41#include <sys/mutex.h>
42#include <sys/sdt.h>
43#include <sys/sx.h>
44#include <sys/proc.h>
45#include <sys/syscallsubr.h>
46#include <sys/sysent.h>
47#include <sys/sysproto.h>
48#include <sys/unistd.h>
49
50#ifdef COMPAT_LINUX32
51#include <machine/../linux32/linux.h>
52#include <machine/../linux32/linux32_proto.h>
53#else
54#include <machine/../linux/linux.h>
55#include <machine/../linux/linux_proto.h>
56#endif
57
58#include <compat/linux/linux_dtrace.h>
59#include <compat/linux/linux_emul.h>
60#include <compat/linux/linux_futex.h>
61#include <compat/linux/linux_misc.h>
62
63/**
64 * Special DTrace provider for the linuxulator.
65 *
66 * In this file we define the provider for the entire linuxulator. All
67 * modules (= files of the linuxulator) use it.
68 *
69 * We define a different name depending on the emulated bitsize, see
70 * ../../<ARCH>/linux{,32}/linux.h, e.g.:
71 *      native bitsize          = linuxulator
72 *      amd64, 32bit emulation  = linuxulator32
73 */
74LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE);
75
76/**
77 * Special DTrace module "locks", it covers some linuxulator internal
78 * locks.
79 */
80LIN_SDT_PROBE_DEFINE1(locks, emul_lock, locked, "struct mtx *");
81LIN_SDT_PROBE_DEFINE1(locks, emul_lock, unlock, "struct mtx *");
82LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, locked, "struct sx *");
83LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, unlock, "struct sx *");
84LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, locked, "struct sx *");
85LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, unlock, "struct sx *");
86
87/**
88 * DTrace probes in this module.
89 */
90LIN_SDT_PROBE_DEFINE2(emul, em_find, entry, "struct proc *", "int");
91LIN_SDT_PROBE_DEFINE0(emul, em_find, return);
92LIN_SDT_PROBE_DEFINE3(emul, proc_init, entry, "struct thread *", "pid_t",
93    "int");
94LIN_SDT_PROBE_DEFINE0(emul, proc_init, create_thread);
95LIN_SDT_PROBE_DEFINE0(emul, proc_init, fork);
96LIN_SDT_PROBE_DEFINE0(emul, proc_init, exec);
97LIN_SDT_PROBE_DEFINE0(emul, proc_init, return);
98LIN_SDT_PROBE_DEFINE1(emul, proc_exit, entry, "struct proc *");
99LIN_SDT_PROBE_DEFINE0(emul, proc_exit, futex_failed);
100LIN_SDT_PROBE_DEFINE3(emul, proc_exit, reparent, "pid_t", "pid_t",
101    "struct proc *");
102LIN_SDT_PROBE_DEFINE1(emul, proc_exit, child_clear_tid_error, "int");
103LIN_SDT_PROBE_DEFINE0(emul, proc_exit, return);
104LIN_SDT_PROBE_DEFINE2(emul, proc_exec, entry, "struct proc *",
105    "struct image_params *");
106LIN_SDT_PROBE_DEFINE0(emul, proc_exec, return);
107LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, entry);
108LIN_SDT_PROBE_DEFINE1(emul, linux_schedtail, copyout_error, "int");
109LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, return);
110LIN_SDT_PROBE_DEFINE1(emul, linux_set_tid_address, entry, "int *");
111LIN_SDT_PROBE_DEFINE0(emul, linux_set_tid_address, return);
112LIN_SDT_PROBE_DEFINE2(emul, linux_kill_threads, entry, "struct thread *",
113    "int");
114LIN_SDT_PROBE_DEFINE1(emul, linux_kill_threads, kill, "pid_t");
115LIN_SDT_PROBE_DEFINE0(emul, linux_kill_threads, return);
116
117struct sx	emul_shared_lock;
118struct mtx	emul_lock;
119
120/* this returns locked reference to the emuldata entry (if found) */
121struct linux_emuldata *
122em_find(struct proc *p, int locked)
123{
124	struct linux_emuldata *em;
125
126	LIN_SDT_PROBE2(emul, em_find, entry, p, locked);
127
128	if (locked == EMUL_DOLOCK)
129		EMUL_LOCK(&emul_lock);
130
131	em = p->p_emuldata;
132
133	if (em == NULL && locked == EMUL_DOLOCK)
134		EMUL_UNLOCK(&emul_lock);
135
136	LIN_SDT_PROBE1(emul, em_find, return, em);
137	return (em);
138}
139
140int
141linux_proc_init(struct thread *td, pid_t child, int flags)
142{
143	struct linux_emuldata *em, *p_em;
144	struct proc *p;
145
146	LIN_SDT_PROBE3(emul, proc_init, entry, td, child, flags);
147
148	if (child != 0) {
149		/* fork or create a thread */
150		em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO);
151		em->pid = child;
152		em->pdeath_signal = 0;
153		em->flags = 0;
154		em->robust_futexes = NULL;
155		if (flags & LINUX_CLONE_THREAD) {
156			/* handled later in the code */
157			LIN_SDT_PROBE0(emul, proc_init, create_thread);
158		} else {
159			struct linux_emuldata_shared *s;
160
161			LIN_SDT_PROBE0(emul, proc_init, fork);
162
163			s = malloc(sizeof *s, M_LINUX, M_WAITOK | M_ZERO);
164			s->refs = 1;
165			s->group_pid = child;
166
167			LIST_INIT(&s->threads);
168			em->shared = s;
169		}
170	} else {
171		/* exec */
172		LIN_SDT_PROBE0(emul, proc_init, exec);
173
174		/* lookup the old one */
175		em = em_find(td->td_proc, EMUL_DOLOCK);
176		KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n"));
177	}
178
179	em->child_clear_tid = NULL;
180	em->child_set_tid = NULL;
181
182	/*
183	 * allocate the shared struct only in clone()/fork cases in the case
184	 * of clone() td = calling proc and child = pid of the newly created
185	 * proc
186	 */
187	if (child != 0) {
188		if (flags & LINUX_CLONE_THREAD) {
189			/* lookup the parent */
190			/*
191			 * we dont have to lock the p_em because
192			 * its waiting for us in linux_clone so
193			 * there is no chance of it changing the
194			 * p_em->shared address
195			 */
196			p_em = em_find(td->td_proc, EMUL_DONTLOCK);
197			KASSERT(p_em != NULL, ("proc_init: parent emuldata not found for CLONE_THREAD\n"));
198			em->shared = p_em->shared;
199			EMUL_SHARED_WLOCK(&emul_shared_lock);
200			em->shared->refs++;
201			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
202		} else {
203			/*
204			 * handled earlier to avoid malloc(M_WAITOK) with
205			 * rwlock held
206			 */
207		}
208
209		EMUL_SHARED_WLOCK(&emul_shared_lock);
210		LIST_INSERT_HEAD(&em->shared->threads, em, threads);
211		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
212
213		p = pfind(child);
214		KASSERT(p != NULL, ("process not found in proc_init\n"));
215		p->p_emuldata = em;
216		PROC_UNLOCK(p);
217	} else
218		EMUL_UNLOCK(&emul_lock);
219
220	LIN_SDT_PROBE0(emul, proc_init, return);
221	return (0);
222}
223
224void
225linux_proc_exit(void *arg __unused, struct proc *p)
226{
227	struct linux_emuldata *em;
228	int error, shared_flags, shared_xstat;
229	struct thread *td = FIRST_THREAD_IN_PROC(p);
230	int *child_clear_tid;
231	struct proc *q, *nq;
232
233	if (__predict_true(p->p_sysent != &elf_linux_sysvec))
234		return;
235
236	LIN_SDT_PROBE1(emul, proc_exit, entry, p);
237
238	release_futexes(p);
239
240	/* find the emuldata */
241	em = em_find(p, EMUL_DOLOCK);
242
243	KASSERT(em != NULL, ("proc_exit: emuldata not found.\n"));
244
245	/* reparent all procs that are not a thread leader to initproc */
246	if (em->shared->group_pid != p->p_pid) {
247		LIN_SDT_PROBE3(emul, proc_exit, reparent,
248		    em->shared->group_pid, p->p_pid, p);
249
250		child_clear_tid = em->child_clear_tid;
251		EMUL_UNLOCK(&emul_lock);
252		sx_xlock(&proctree_lock);
253		wakeup(initproc);
254		PROC_LOCK(p);
255		proc_reparent(p, initproc);
256		p->p_sigparent = SIGCHLD;
257		PROC_UNLOCK(p);
258		sx_xunlock(&proctree_lock);
259	} else {
260		child_clear_tid = em->child_clear_tid;
261		EMUL_UNLOCK(&emul_lock);
262	}
263
264	EMUL_SHARED_WLOCK(&emul_shared_lock);
265	shared_flags = em->shared->flags;
266	shared_xstat = em->shared->xstat;
267	LIST_REMOVE(em, threads);
268
269	em->shared->refs--;
270	if (em->shared->refs == 0) {
271		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
272		free(em->shared, M_LINUX);
273	} else
274		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
275
276	if ((shared_flags & EMUL_SHARED_HASXSTAT) != 0)
277		p->p_xstat = shared_xstat;
278
279	if (child_clear_tid != NULL) {
280		struct linux_sys_futex_args cup;
281		int null = 0;
282
283		error = copyout(&null, child_clear_tid, sizeof(null));
284		if (error) {
285			LIN_SDT_PROBE1(emul, proc_exit,
286			    child_clear_tid_error, error);
287
288			free(em, M_LINUX);
289
290			LIN_SDT_PROBE0(emul, proc_exit, return);
291			return;
292		}
293
294		/* futexes stuff */
295		cup.uaddr = child_clear_tid;
296		cup.op = LINUX_FUTEX_WAKE;
297		cup.val = 0x7fffffff;	/* Awake everyone */
298		cup.timeout = NULL;
299		cup.uaddr2 = NULL;
300		cup.val3 = 0;
301		error = linux_sys_futex(FIRST_THREAD_IN_PROC(p), &cup);
302		/*
303		 * this cannot happen at the moment and if this happens it
304		 * probably means there is a user space bug
305		 */
306		if (error) {
307			LIN_SDT_PROBE0(emul, proc_exit, futex_failed);
308			printf(LMSG("futex stuff in proc_exit failed.\n"));
309		}
310	}
311
312	/* clean the stuff up */
313	free(em, M_LINUX);
314
315	/* this is a little weird but rewritten from exit1() */
316	sx_xlock(&proctree_lock);
317	q = LIST_FIRST(&p->p_children);
318	for (; q != NULL; q = nq) {
319		nq = LIST_NEXT(q, p_sibling);
320		if (q->p_flag & P_WEXIT)
321			continue;
322		if (__predict_false(q->p_sysent != &elf_linux_sysvec))
323			continue;
324		em = em_find(q, EMUL_DOLOCK);
325		KASSERT(em != NULL, ("linux_reparent: emuldata not found: %i\n", q->p_pid));
326		PROC_LOCK(q);
327		if ((q->p_flag & P_WEXIT) == 0 && em->pdeath_signal != 0) {
328			kern_psignal(q, em->pdeath_signal);
329		}
330		PROC_UNLOCK(q);
331		EMUL_UNLOCK(&emul_lock);
332	}
333	sx_xunlock(&proctree_lock);
334
335	LIN_SDT_PROBE0(emul, proc_exit, return);
336}
337
338/*
339 * This is used in a case of transition from FreeBSD binary execing to linux binary
340 * in this case we create linux emuldata proc entry with the pid of the currently running
341 * process.
342 */
343void
344linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
345{
346	if (__predict_false(imgp->sysent == &elf_linux_sysvec)) {
347		LIN_SDT_PROBE2(emul, proc_exec, entry, p, imgp);
348	}
349	if (__predict_false(imgp->sysent == &elf_linux_sysvec
350	    && p->p_sysent != &elf_linux_sysvec))
351		linux_proc_init(FIRST_THREAD_IN_PROC(p), p->p_pid, 0);
352	if (__predict_false((p->p_sysent->sv_flags & SV_ABI_MASK) ==
353	    SV_ABI_LINUX))
354		/* Kill threads regardless of imgp->sysent value */
355		linux_kill_threads(FIRST_THREAD_IN_PROC(p), SIGKILL);
356	if (__predict_false(imgp->sysent != &elf_linux_sysvec
357	    && p->p_sysent == &elf_linux_sysvec)) {
358		struct linux_emuldata *em;
359
360		/*
361		 * XXX:There's a race because here we assign p->p_emuldata NULL
362		 * but the process is still counted as linux one for a short
363 		 * time so some other process might reference it and try to
364 		 * access its p->p_emuldata and panicing on a NULL reference.
365		 */
366		em = em_find(p, EMUL_DONTLOCK);
367
368		KASSERT(em != NULL, ("proc_exec: emuldata not found.\n"));
369
370		EMUL_SHARED_WLOCK(&emul_shared_lock);
371		LIST_REMOVE(em, threads);
372
373		PROC_LOCK(p);
374		p->p_emuldata = NULL;
375		PROC_UNLOCK(p);
376
377		em->shared->refs--;
378		if (em->shared->refs == 0) {
379			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
380			free(em->shared, M_LINUX);
381		} else
382			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
383
384		free(em, M_LINUX);
385	}
386
387	if (__predict_false(imgp->sysent == &elf_linux_sysvec)) {
388		LIN_SDT_PROBE0(emul, proc_exec, return);
389	}
390}
391
392void
393linux_schedtail(struct thread *td)
394{
395	struct linux_emuldata *em;
396	struct proc *p;
397	int error = 0;
398	int *child_set_tid;
399
400	p = td->td_proc;
401
402	LIN_SDT_PROBE1(emul, linux_schedtail, entry, p);
403
404	/* find the emuldata */
405	em = em_find(p, EMUL_DOLOCK);
406
407	KASSERT(em != NULL, ("linux_schedtail: emuldata not found.\n"));
408	child_set_tid = em->child_set_tid;
409	EMUL_UNLOCK(&emul_lock);
410
411	if (child_set_tid != NULL) {
412		error = copyout(&p->p_pid, (int *)child_set_tid,
413		    sizeof(p->p_pid));
414
415		if (error != 0) {
416			LIN_SDT_PROBE1(emul, linux_schedtail, copyout_error,
417			    error);
418		}
419	}
420
421	LIN_SDT_PROBE0(emul, linux_schedtail, return);
422
423	return;
424}
425
426int
427linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args)
428{
429	struct linux_emuldata *em;
430
431	LIN_SDT_PROBE1(emul, linux_set_tid_address, entry, args->tidptr);
432
433	/* find the emuldata */
434	em = em_find(td->td_proc, EMUL_DOLOCK);
435
436	KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n"));
437
438	em->child_clear_tid = args->tidptr;
439	td->td_retval[0] = td->td_proc->p_pid;
440
441	EMUL_UNLOCK(&emul_lock);
442
443	LIN_SDT_PROBE0(emul, linux_set_tid_address, return);
444	return 0;
445}
446
447void
448linux_kill_threads(struct thread *td, int sig)
449{
450	struct linux_emuldata *em, *td_em, *tmp_em;
451	struct proc *sp;
452
453	LIN_SDT_PROBE2(emul, linux_kill_threads, entry, td, sig);
454
455	td_em = em_find(td->td_proc, EMUL_DONTLOCK);
456
457	KASSERT(td_em != NULL, ("linux_kill_threads: emuldata not found.\n"));
458
459	EMUL_SHARED_RLOCK(&emul_shared_lock);
460	LIST_FOREACH_SAFE(em, &td_em->shared->threads, threads, tmp_em) {
461		if (em->pid == td_em->pid)
462			continue;
463
464		sp = pfind(em->pid);
465		if ((sp->p_flag & P_WEXIT) == 0)
466			kern_psignal(sp, sig);
467		PROC_UNLOCK(sp);
468
469		LIN_SDT_PROBE1(emul, linux_kill_threads, kill, em->pid);
470	}
471	EMUL_SHARED_RUNLOCK(&emul_shared_lock);
472
473	LIN_SDT_PROBE0(emul, linux_kill_threads, return);
474}
475