linux_fork.c revision 293607
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2002 Doug Rabson
4 * Copyright (c) 2000 Marcel Moolenaar
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/compat/linux/linux_fork.c 293607 2016-01-09 18:24:53Z dchagin $");
31
32#include "opt_compat.h"
33#include "opt_kdtrace.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/imgact.h>
38#include <sys/ktr.h>
39#include <sys/lock.h>
40#include <sys/mutex.h>
41#include <sys/proc.h>
42#include <sys/racct.h>
43#include <sys/sched.h>
44#include <sys/syscallsubr.h>
45#include <sys/sx.h>
46#include <sys/unistd.h>
47#include <sys/wait.h>
48
49#include <vm/vm.h>
50#include <vm/pmap.h>
51#include <vm/vm_map.h>
52
53#ifdef COMPAT_LINUX32
54#include <machine/../linux32/linux.h>
55#include <machine/../linux32/linux32_proto.h>
56#else
57#include <machine/../linux/linux.h>
58#include <machine/../linux/linux_proto.h>
59#endif
60#include <compat/linux/linux_emul.h>
61#include <compat/linux/linux_futex.h>
62#include <compat/linux/linux_misc.h>
63#include <compat/linux/linux_util.h>
64
65int
66linux_fork(struct thread *td, struct linux_fork_args *args)
67{
68	int error;
69	struct proc *p2;
70	struct thread *td2;
71
72#ifdef DEBUG
73	if (ldebug(fork))
74		printf(ARGS(fork, ""));
75#endif
76
77	if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2, NULL, 0))
78	    != 0)
79		return (error);
80
81	td2 = FIRST_THREAD_IN_PROC(p2);
82
83	linux_proc_init(td, td2, 0);
84
85	td->td_retval[0] = p2->p_pid;
86
87	/*
88	 * Make this runnable after we are finished with it.
89	 */
90	thread_lock(td2);
91	TD_SET_CAN_RUN(td2);
92	sched_add(td2, SRQ_BORING);
93	thread_unlock(td2);
94
95	return (0);
96}
97
98int
99linux_vfork(struct thread *td, struct linux_vfork_args *args)
100{
101	int error;
102	struct proc *p2;
103	struct thread *td2;
104
105#ifdef DEBUG
106	if (ldebug(vfork))
107		printf(ARGS(vfork, ""));
108#endif
109
110	/* Exclude RFPPWAIT */
111	if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2,
112	    NULL, 0)) != 0)
113		return (error);
114
115
116	td2 = FIRST_THREAD_IN_PROC(p2);
117
118	linux_proc_init(td, td2, 0);
119
120	PROC_LOCK(p2);
121	p2->p_flag |= P_PPWAIT;
122	PROC_UNLOCK(p2);
123
124   	td->td_retval[0] = p2->p_pid;
125
126	/*
127	 * Make this runnable after we are finished with it.
128	 */
129	thread_lock(td2);
130	TD_SET_CAN_RUN(td2);
131	sched_add(td2, SRQ_BORING);
132	thread_unlock(td2);
133
134	/* wait for the children to exit, ie. emulate vfork */
135	PROC_LOCK(p2);
136	while (p2->p_flag & P_PPWAIT)
137		cv_wait(&p2->p_pwait, &p2->p_mtx);
138	PROC_UNLOCK(p2);
139
140	return (0);
141}
142
143static int
144linux_clone_proc(struct thread *td, struct linux_clone_args *args)
145{
146	int error, ff = RFPROC | RFSTOPPED;
147	struct proc *p2;
148	struct thread *td2;
149	int exit_signal;
150	struct linux_emuldata *em;
151
152#ifdef DEBUG
153	if (ldebug(clone)) {
154		printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, "
155		    "child tid: %p"), (unsigned)args->flags,
156		    args->stack, args->parent_tidptr, args->child_tidptr);
157	}
158#endif
159
160	exit_signal = args->flags & 0x000000ff;
161	if (LINUX_SIG_VALID(exit_signal)) {
162		exit_signal = linux_to_bsd_signal(exit_signal);
163	} else if (exit_signal != 0)
164		return (EINVAL);
165
166	if (args->flags & LINUX_CLONE_VM)
167		ff |= RFMEM;
168	if (args->flags & LINUX_CLONE_SIGHAND)
169		ff |= RFSIGSHARE;
170	/*
171	 * XXX: In Linux, sharing of fs info (chroot/cwd/umask)
172	 * and open files is independant.  In FreeBSD, its in one
173	 * structure but in reality it does not cause any problems
174	 * because both of these flags are usually set together.
175	 */
176	if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS)))
177		ff |= RFFDG;
178
179	if (args->flags & LINUX_CLONE_PARENT_SETTID)
180		if (args->parent_tidptr == NULL)
181			return (EINVAL);
182
183	error = fork1(td, ff, 0, &p2, NULL, 0);
184	if (error)
185		return (error);
186
187	td2 = FIRST_THREAD_IN_PROC(p2);
188
189	/* create the emuldata */
190	linux_proc_init(td, td2, args->flags);
191
192	em = em_find(td2);
193	KASSERT(em != NULL, ("clone_proc: emuldata not found.\n"));
194
195	if (args->flags & LINUX_CLONE_CHILD_SETTID)
196		em->child_set_tid = args->child_tidptr;
197	else
198	   	em->child_set_tid = NULL;
199
200	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
201		em->child_clear_tid = args->child_tidptr;
202	else
203	   	em->child_clear_tid = NULL;
204
205	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
206		error = copyout(&p2->p_pid, args->parent_tidptr,
207		    sizeof(p2->p_pid));
208		if (error)
209			printf(LMSG("copyout failed!"));
210	}
211
212	PROC_LOCK(p2);
213	p2->p_sigparent = exit_signal;
214	PROC_UNLOCK(p2);
215	/*
216	 * In a case of stack = NULL, we are supposed to COW calling process
217	 * stack. This is what normal fork() does, so we just keep tf_rsp arg
218	 * intact.
219	 */
220	linux_set_upcall_kse(td2, PTROUT(args->stack));
221
222	if (args->flags & LINUX_CLONE_SETTLS)
223		linux_set_cloned_tls(td2, args->tls);
224
225#ifdef DEBUG
226	if (ldebug(clone))
227		printf(LMSG("clone: successful rfork to %d, "
228		    "stack %p sig = %d"), (int)p2->p_pid, args->stack,
229		    exit_signal);
230#endif
231
232	if (args->flags & LINUX_CLONE_VFORK) {
233	   	PROC_LOCK(p2);
234	   	p2->p_flag |= P_PPWAIT;
235	   	PROC_UNLOCK(p2);
236	}
237
238	/*
239	 * Make this runnable after we are finished with it.
240	 */
241	thread_lock(td2);
242	TD_SET_CAN_RUN(td2);
243	sched_add(td2, SRQ_BORING);
244	thread_unlock(td2);
245
246	td->td_retval[0] = p2->p_pid;
247
248	if (args->flags & LINUX_CLONE_VFORK) {
249		/* wait for the children to exit, ie. emulate vfork */
250		PROC_LOCK(p2);
251		while (p2->p_flag & P_PPWAIT)
252			cv_wait(&p2->p_pwait, &p2->p_mtx);
253		PROC_UNLOCK(p2);
254	}
255
256	return (0);
257}
258
259static int
260linux_clone_thread(struct thread *td, struct linux_clone_args *args)
261{
262	struct linux_emuldata *em;
263	struct thread *newtd;
264	struct proc *p;
265	int error;
266
267#ifdef DEBUG
268	if (ldebug(clone)) {
269		printf(ARGS(clone, "thread: flags %x, stack %p, parent tid: %p, "
270		    "child tid: %p"), (unsigned)args->flags,
271		    args->stack, args->parent_tidptr, args->child_tidptr);
272	}
273#endif
274
275	LINUX_CTR4(clone_thread, "thread(%d) flags %x ptid %p ctid %p",
276	    td->td_tid, (unsigned)args->flags,
277	    args->parent_tidptr, args->child_tidptr);
278
279	if (args->flags & LINUX_CLONE_PARENT_SETTID)
280		if (args->parent_tidptr == NULL)
281			return (EINVAL);
282
283	/* Threads should be created with own stack */
284	if (args->stack == NULL)
285		return (EINVAL);
286
287	p = td->td_proc;
288
289#ifdef RACCT
290	if (racct_enable) {
291		PROC_LOCK(p);
292		error = racct_add(p, RACCT_NTHR, 1);
293		PROC_UNLOCK(p);
294		if (error != 0)
295			return (EPROCLIM);
296	}
297#endif
298
299	/* Initialize our td */
300	error = kern_thr_alloc(p, 0, &newtd);
301	if (error)
302		goto fail;
303
304	cpu_set_upcall(newtd, td);
305
306	bzero(&newtd->td_startzero,
307	    __rangeof(struct thread, td_startzero, td_endzero));
308	bcopy(&td->td_startcopy, &newtd->td_startcopy,
309	    __rangeof(struct thread, td_startcopy, td_endcopy));
310
311	newtd->td_proc = p;
312	newtd->td_ucred = crhold(td->td_ucred);
313
314	/* create the emuldata */
315	linux_proc_init(td, newtd, args->flags);
316
317	em = em_find(newtd);
318	KASSERT(em != NULL, ("clone_thread: emuldata not found.\n"));
319
320	if (args->flags & LINUX_CLONE_SETTLS)
321		linux_set_cloned_tls(newtd, args->tls);
322
323	if (args->flags & LINUX_CLONE_CHILD_SETTID)
324		em->child_set_tid = args->child_tidptr;
325	else
326	   	em->child_set_tid = NULL;
327
328	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
329		em->child_clear_tid = args->child_tidptr;
330	else
331	   	em->child_clear_tid = NULL;
332
333	cpu_thread_clean(newtd);
334
335	linux_set_upcall_kse(newtd, PTROUT(args->stack));
336
337	PROC_LOCK(p);
338	p->p_flag |= P_HADTHREADS;
339	bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
340
341	if (args->flags & LINUX_CLONE_PARENT)
342		thread_link(newtd, p->p_pptr);
343	else
344		thread_link(newtd, p);
345
346	thread_lock(td);
347	/* let the scheduler know about these things. */
348	sched_fork_thread(td, newtd);
349	thread_unlock(td);
350	if (P_SHOULDSTOP(p))
351		newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
352	PROC_UNLOCK(p);
353
354	tidhash_add(newtd);
355
356#ifdef DEBUG
357	if (ldebug(clone))
358		printf(ARGS(clone, "successful clone to %d, stack %p"),
359		(int)newtd->td_tid, args->stack);
360#endif
361
362	LINUX_CTR2(clone_thread, "thread(%d) successful clone to %d",
363	    td->td_tid, newtd->td_tid);
364
365	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
366		error = copyout(&newtd->td_tid, args->parent_tidptr,
367		    sizeof(newtd->td_tid));
368		if (error)
369			printf(LMSG("clone_thread: copyout failed!"));
370	}
371
372	/*
373	 * Make this runnable after we are finished with it.
374	 */
375	thread_lock(newtd);
376	TD_SET_CAN_RUN(newtd);
377	sched_add(newtd, SRQ_BORING);
378	thread_unlock(newtd);
379
380	td->td_retval[0] = newtd->td_tid;
381
382	return (0);
383
384fail:
385#ifdef RACCT
386	if (racct_enable) {
387		PROC_LOCK(p);
388		racct_sub(p, RACCT_NTHR, 1);
389		PROC_UNLOCK(p);
390	}
391#endif
392	return (error);
393}
394
395int
396linux_clone(struct thread *td, struct linux_clone_args *args)
397{
398
399	if (args->flags & LINUX_CLONE_THREAD)
400		return (linux_clone_thread(td, args));
401	else
402		return (linux_clone_proc(td, args));
403}
404
405int
406linux_exit(struct thread *td, struct linux_exit_args *args)
407{
408	struct linux_emuldata *em;
409
410	em = em_find(td);
411	KASSERT(em != NULL, ("exit: emuldata not found.\n"));
412
413	LINUX_CTR2(exit, "thread(%d) (%d)", em->em_tid, args->rval);
414
415	linux_thread_detach(td);
416
417	/*
418	 * XXX. When the last two threads of a process
419	 * exit via pthread_exit() try thr_exit() first.
420	 */
421	kern_thr_exit(td);
422	exit1(td, W_EXITCODE(args->rval, 0));
423		/* NOTREACHED */
424}
425
426int
427linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args)
428{
429	struct linux_emuldata *em;
430
431	em = em_find(td);
432	KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n"));
433
434	em->child_clear_tid = args->tidptr;
435
436	td->td_retval[0] = em->em_tid;
437
438	LINUX_CTR3(set_tid_address, "tidptr(%d) %p, returns %d",
439	    em->em_tid, args->tidptr, td->td_retval[0]);
440
441	return (0);
442}
443
444void
445linux_thread_detach(struct thread *td)
446{
447	struct linux_sys_futex_args cup;
448	struct linux_emuldata *em;
449	int *child_clear_tid;
450	int error;
451
452	em = em_find(td);
453	KASSERT(em != NULL, ("thread_detach: emuldata not found.\n"));
454
455	LINUX_CTR1(thread_detach, "thread(%d)", em->em_tid);
456
457	release_futexes(td, em);
458
459	child_clear_tid = em->child_clear_tid;
460
461	if (child_clear_tid != NULL) {
462
463		LINUX_CTR2(thread_detach, "thread(%d) %p",
464		    em->em_tid, child_clear_tid);
465
466		error = suword32(child_clear_tid, 0);
467		if (error != 0)
468			return;
469
470		cup.uaddr = child_clear_tid;
471		cup.op = LINUX_FUTEX_WAKE;
472		cup.val = 1;		/* wake one */
473		cup.timeout = NULL;
474		cup.uaddr2 = NULL;
475		cup.val3 = 0;
476		error = linux_sys_futex(td, &cup);
477		/*
478		 * this cannot happen at the moment and if this happens it
479		 * probably means there is a user space bug
480		 */
481		if (error != 0)
482			linux_msg(td, "futex stuff in thread_detach failed.");
483	}
484}
485