1218616Sdchagin/*-
2218616Sdchagin * Copyright (c) 2004 Tim J. Robbins
3218616Sdchagin * Copyright (c) 2002 Doug Rabson
4218616Sdchagin * Copyright (c) 2000 Marcel Moolenaar
5218616Sdchagin * All rights reserved.
6218616Sdchagin *
7218616Sdchagin * Redistribution and use in source and binary forms, with or without
8218616Sdchagin * modification, are permitted provided that the following conditions
9218616Sdchagin * are met:
10218616Sdchagin * 1. Redistributions of source code must retain the above copyright
11218616Sdchagin *    notice, this list of conditions and the following disclaimer
12218616Sdchagin *    in this position and unchanged.
13218616Sdchagin * 2. Redistributions in binary form must reproduce the above copyright
14218616Sdchagin *    notice, this list of conditions and the following disclaimer in the
15218616Sdchagin *    documentation and/or other materials provided with the distribution.
16218616Sdchagin *
17218616Sdchagin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18218616Sdchagin * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19218616Sdchagin * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20218616Sdchagin * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21218616Sdchagin * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22218616Sdchagin * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23218616Sdchagin * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24218616Sdchagin * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25218616Sdchagin * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26218616Sdchagin * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27218616Sdchagin */
28218616Sdchagin
29218616Sdchagin#include <sys/cdefs.h>
30218616Sdchagin__FBSDID("$FreeBSD$");
31218616Sdchagin
32218616Sdchagin#include "opt_compat.h"
33246290Sdchagin#include "opt_kdtrace.h"
34218616Sdchagin
35218616Sdchagin#include <sys/param.h>
36218616Sdchagin#include <sys/systm.h>
37218616Sdchagin#include <sys/imgact.h>
38218616Sdchagin#include <sys/lock.h>
39218616Sdchagin#include <sys/mutex.h>
40218616Sdchagin#include <sys/proc.h>
41218616Sdchagin#include <sys/sched.h>
42246290Sdchagin#include <sys/sdt.h>
43218616Sdchagin#include <sys/sx.h>
44218616Sdchagin#include <sys/unistd.h>
45218616Sdchagin
46218616Sdchagin#ifdef COMPAT_LINUX32
47218616Sdchagin#include <machine/../linux32/linux.h>
48218616Sdchagin#include <machine/../linux32/linux32_proto.h>
49218616Sdchagin#else
50218616Sdchagin#include <machine/../linux/linux.h>
51218616Sdchagin#include <machine/../linux/linux_proto.h>
52218616Sdchagin#endif
53246290Sdchagin#include <compat/linux/linux_dtrace.h>
54218616Sdchagin#include <compat/linux/linux_signal.h>
55218616Sdchagin#include <compat/linux/linux_emul.h>
56218616Sdchagin
57246290Sdchagin/* DTrace init */
58246290SdchaginLIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
59218616Sdchagin
60246290Sdchagin/* Linuxulator-global DTrace probes */
61246290SdchaginLIN_SDT_PROBE_DECLARE(locks, emul_lock, locked);
62246290SdchaginLIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock);
63246290Sdchagin
64246290Sdchagin
65218616Sdchaginint
66218616Sdchaginlinux_fork(struct thread *td, struct linux_fork_args *args)
67218616Sdchagin{
68218616Sdchagin	int error;
69218616Sdchagin	struct proc *p2;
70218616Sdchagin	struct thread *td2;
71218616Sdchagin
72218616Sdchagin#ifdef DEBUG
73218616Sdchagin	if (ldebug(fork))
74218616Sdchagin		printf(ARGS(fork, ""));
75218616Sdchagin#endif
76218616Sdchagin
77224987Sjonathan	if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2, NULL, 0))
78224987Sjonathan	    != 0)
79218616Sdchagin		return (error);
80218616Sdchagin
81218618Sdchagin	td->td_retval[0] = p2->p_pid;
82218618Sdchagin	td->td_retval[1] = 0;
83218616Sdchagin
84218616Sdchagin	error = linux_proc_init(td, td->td_retval[0], 0);
85218616Sdchagin	if (error)
86218616Sdchagin		return (error);
87218616Sdchagin
88218616Sdchagin	td2 = FIRST_THREAD_IN_PROC(p2);
89218616Sdchagin
90218616Sdchagin	/*
91218616Sdchagin	 * Make this runnable after we are finished with it.
92218616Sdchagin	 */
93218616Sdchagin	thread_lock(td2);
94218616Sdchagin	TD_SET_CAN_RUN(td2);
95218616Sdchagin	sched_add(td2, SRQ_BORING);
96218616Sdchagin	thread_unlock(td2);
97218616Sdchagin
98218616Sdchagin	return (0);
99218616Sdchagin}
100218616Sdchagin
101218616Sdchaginint
102218616Sdchaginlinux_vfork(struct thread *td, struct linux_vfork_args *args)
103218616Sdchagin{
104218616Sdchagin	int error;
105218616Sdchagin	struct proc *p2;
106218616Sdchagin	struct thread *td2;
107218616Sdchagin
108218616Sdchagin#ifdef DEBUG
109218616Sdchagin	if (ldebug(vfork))
110218616Sdchagin		printf(ARGS(vfork, ""));
111218616Sdchagin#endif
112218616Sdchagin
113218616Sdchagin	/* Exclude RFPPWAIT */
114224987Sjonathan	if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2,
115224987Sjonathan	    NULL, 0)) != 0)
116218616Sdchagin		return (error);
117218618Sdchagin
118218618Sdchagin   	td->td_retval[0] = p2->p_pid;
119218618Sdchagin
120218616Sdchagin	error = linux_proc_init(td, td->td_retval[0], 0);
121218616Sdchagin	if (error)
122218616Sdchagin		return (error);
123218616Sdchagin
124218616Sdchagin	PROC_LOCK(p2);
125218616Sdchagin	p2->p_flag |= P_PPWAIT;
126218616Sdchagin	PROC_UNLOCK(p2);
127218616Sdchagin
128218616Sdchagin	td2 = FIRST_THREAD_IN_PROC(p2);
129218616Sdchagin
130218616Sdchagin	/*
131218616Sdchagin	 * Make this runnable after we are finished with it.
132218616Sdchagin	 */
133218616Sdchagin	thread_lock(td2);
134218616Sdchagin	TD_SET_CAN_RUN(td2);
135218616Sdchagin	sched_add(td2, SRQ_BORING);
136218616Sdchagin	thread_unlock(td2);
137218616Sdchagin
138218616Sdchagin	/* wait for the children to exit, ie. emulate vfork */
139218616Sdchagin	PROC_LOCK(p2);
140218616Sdchagin	while (p2->p_flag & P_PPWAIT)
141218616Sdchagin		cv_wait(&p2->p_pwait, &p2->p_mtx);
142218616Sdchagin	PROC_UNLOCK(p2);
143218616Sdchagin
144218616Sdchagin	return (0);
145218616Sdchagin}
146218616Sdchagin
147218616Sdchaginint
148218616Sdchaginlinux_clone(struct thread *td, struct linux_clone_args *args)
149218616Sdchagin{
150218616Sdchagin	int error, ff = RFPROC | RFSTOPPED;
151218616Sdchagin	struct proc *p2;
152218616Sdchagin	struct thread *td2;
153218616Sdchagin	int exit_signal;
154218616Sdchagin	struct linux_emuldata *em;
155218616Sdchagin
156218616Sdchagin#ifdef DEBUG
157218616Sdchagin	if (ldebug(clone)) {
158218616Sdchagin		printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, "
159218616Sdchagin		    "child tid: %p"), (unsigned)args->flags,
160218616Sdchagin		    args->stack, args->parent_tidptr, args->child_tidptr);
161218616Sdchagin	}
162218616Sdchagin#endif
163218616Sdchagin
164218616Sdchagin	exit_signal = args->flags & 0x000000ff;
165218616Sdchagin	if (LINUX_SIG_VALID(exit_signal)) {
166218616Sdchagin		if (exit_signal <= LINUX_SIGTBLSZ)
167218616Sdchagin			exit_signal =
168218616Sdchagin			    linux_to_bsd_signal[_SIG_IDX(exit_signal)];
169218616Sdchagin	} else if (exit_signal != 0)
170218616Sdchagin		return (EINVAL);
171218616Sdchagin
172218616Sdchagin	if (args->flags & LINUX_CLONE_VM)
173218616Sdchagin		ff |= RFMEM;
174218616Sdchagin	if (args->flags & LINUX_CLONE_SIGHAND)
175218616Sdchagin		ff |= RFSIGSHARE;
176218616Sdchagin	/*
177218616Sdchagin	 * XXX: In Linux, sharing of fs info (chroot/cwd/umask)
178218616Sdchagin	 * and open files is independant.  In FreeBSD, its in one
179218616Sdchagin	 * structure but in reality it does not cause any problems
180218616Sdchagin	 * because both of these flags are usually set together.
181218616Sdchagin	 */
182218616Sdchagin	if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS)))
183218616Sdchagin		ff |= RFFDG;
184218616Sdchagin
185218616Sdchagin	/*
186218616Sdchagin	 * Attempt to detect when linux_clone(2) is used for creating
187218616Sdchagin	 * kernel threads. Unfortunately despite the existence of the
188218616Sdchagin	 * CLONE_THREAD flag, version of linuxthreads package used in
189218616Sdchagin	 * most popular distros as of beginning of 2005 doesn't make
190218616Sdchagin	 * any use of it. Therefore, this detection relies on
191218616Sdchagin	 * empirical observation that linuxthreads sets certain
192218616Sdchagin	 * combination of flags, so that we can make more or less
193218616Sdchagin	 * precise detection and notify the FreeBSD kernel that several
194218616Sdchagin	 * processes are in fact part of the same threading group, so
195218616Sdchagin	 * that special treatment is necessary for signal delivery
196218616Sdchagin	 * between those processes and fd locking.
197218616Sdchagin	 */
198218616Sdchagin	if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS)
199218616Sdchagin		ff |= RFTHREAD;
200218616Sdchagin
201218616Sdchagin	if (args->flags & LINUX_CLONE_PARENT_SETTID)
202218616Sdchagin		if (args->parent_tidptr == NULL)
203218616Sdchagin			return (EINVAL);
204218616Sdchagin
205224987Sjonathan	error = fork1(td, ff, 0, &p2, NULL, 0);
206218616Sdchagin	if (error)
207218616Sdchagin		return (error);
208218616Sdchagin
209218616Sdchagin	if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) {
210218616Sdchagin	   	sx_xlock(&proctree_lock);
211218616Sdchagin		PROC_LOCK(p2);
212218616Sdchagin		proc_reparent(p2, td->td_proc->p_pptr);
213218616Sdchagin		PROC_UNLOCK(p2);
214218616Sdchagin		sx_xunlock(&proctree_lock);
215218616Sdchagin	}
216218616Sdchagin
217218616Sdchagin	/* create the emuldata */
218218616Sdchagin	error = linux_proc_init(td, p2->p_pid, args->flags);
219218616Sdchagin	/* reference it - no need to check this */
220218616Sdchagin	em = em_find(p2, EMUL_DOLOCK);
221218616Sdchagin	KASSERT(em != NULL, ("clone: emuldata not found."));
222218616Sdchagin	/* and adjust it */
223218616Sdchagin
224218616Sdchagin	if (args->flags & LINUX_CLONE_THREAD) {
225218616Sdchagin#ifdef notyet
226218616Sdchagin	   	PROC_LOCK(p2);
227218616Sdchagin	   	p2->p_pgrp = td->td_proc->p_pgrp;
228218616Sdchagin	   	PROC_UNLOCK(p2);
229218616Sdchagin#endif
230218616Sdchagin		exit_signal = 0;
231218616Sdchagin	}
232218616Sdchagin
233218616Sdchagin	if (args->flags & LINUX_CLONE_CHILD_SETTID)
234218616Sdchagin		em->child_set_tid = args->child_tidptr;
235218616Sdchagin	else
236218616Sdchagin	   	em->child_set_tid = NULL;
237218616Sdchagin
238218616Sdchagin	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
239218616Sdchagin		em->child_clear_tid = args->child_tidptr;
240218616Sdchagin	else
241218616Sdchagin	   	em->child_clear_tid = NULL;
242218616Sdchagin
243218616Sdchagin	EMUL_UNLOCK(&emul_lock);
244218616Sdchagin
245218616Sdchagin	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
246218616Sdchagin		error = copyout(&p2->p_pid, args->parent_tidptr,
247218616Sdchagin		    sizeof(p2->p_pid));
248218616Sdchagin		if (error)
249218616Sdchagin			printf(LMSG("copyout failed!"));
250218616Sdchagin	}
251218616Sdchagin
252218616Sdchagin	PROC_LOCK(p2);
253218616Sdchagin	p2->p_sigparent = exit_signal;
254218616Sdchagin	PROC_UNLOCK(p2);
255218616Sdchagin	td2 = FIRST_THREAD_IN_PROC(p2);
256218616Sdchagin	/*
257218616Sdchagin	 * In a case of stack = NULL, we are supposed to COW calling process
258218616Sdchagin	 * stack. This is what normal fork() does, so we just keep tf_rsp arg
259218616Sdchagin	 * intact.
260218616Sdchagin	 */
261218616Sdchagin	if (args->stack)
262218616Sdchagin		linux_set_upcall_kse(td2, PTROUT(args->stack));
263218616Sdchagin
264218616Sdchagin	if (args->flags & LINUX_CLONE_SETTLS)
265218616Sdchagin		linux_set_cloned_tls(td2, args->tls);
266218616Sdchagin
267218616Sdchagin#ifdef DEBUG
268218616Sdchagin	if (ldebug(clone))
269218616Sdchagin		printf(LMSG("clone: successful rfork to %d, "
270218616Sdchagin		    "stack %p sig = %d"), (int)p2->p_pid, args->stack,
271218616Sdchagin		    exit_signal);
272218616Sdchagin#endif
273218616Sdchagin	if (args->flags & LINUX_CLONE_VFORK) {
274218616Sdchagin	   	PROC_LOCK(p2);
275218616Sdchagin	   	p2->p_flag |= P_PPWAIT;
276218616Sdchagin	   	PROC_UNLOCK(p2);
277218616Sdchagin	}
278218616Sdchagin
279218616Sdchagin	/*
280218616Sdchagin	 * Make this runnable after we are finished with it.
281218616Sdchagin	 */
282218616Sdchagin	thread_lock(td2);
283218616Sdchagin	TD_SET_CAN_RUN(td2);
284218616Sdchagin	sched_add(td2, SRQ_BORING);
285218616Sdchagin	thread_unlock(td2);
286218616Sdchagin
287218616Sdchagin	td->td_retval[0] = p2->p_pid;
288218616Sdchagin	td->td_retval[1] = 0;
289218616Sdchagin
290218616Sdchagin	if (args->flags & LINUX_CLONE_VFORK) {
291218616Sdchagin		/* wait for the children to exit, ie. emulate vfork */
292218616Sdchagin		PROC_LOCK(p2);
293218616Sdchagin		while (p2->p_flag & P_PPWAIT)
294218616Sdchagin			cv_wait(&p2->p_pwait, &p2->p_mtx);
295218616Sdchagin		PROC_UNLOCK(p2);
296218616Sdchagin	}
297218616Sdchagin
298218616Sdchagin	return (0);
299218616Sdchagin}
300