1218616Sdchagin/*- 2218616Sdchagin * Copyright (c) 2004 Tim J. Robbins 3218616Sdchagin * Copyright (c) 2002 Doug Rabson 4218616Sdchagin * Copyright (c) 2000 Marcel Moolenaar 5218616Sdchagin * All rights reserved. 6218616Sdchagin * 7218616Sdchagin * Redistribution and use in source and binary forms, with or without 8218616Sdchagin * modification, are permitted provided that the following conditions 9218616Sdchagin * are met: 10218616Sdchagin * 1. Redistributions of source code must retain the above copyright 11218616Sdchagin * notice, this list of conditions and the following disclaimer 12218616Sdchagin * in this position and unchanged. 13218616Sdchagin * 2. Redistributions in binary form must reproduce the above copyright 14218616Sdchagin * notice, this list of conditions and the following disclaimer in the 15218616Sdchagin * documentation and/or other materials provided with the distribution. 16218616Sdchagin * 17218616Sdchagin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18218616Sdchagin * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19218616Sdchagin * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20218616Sdchagin * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21218616Sdchagin * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22218616Sdchagin * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23218616Sdchagin * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24218616Sdchagin * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25218616Sdchagin * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26218616Sdchagin * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27218616Sdchagin */ 28218616Sdchagin 29218616Sdchagin#include <sys/cdefs.h> 30218616Sdchagin__FBSDID("$FreeBSD$"); 31218616Sdchagin 32218616Sdchagin#include "opt_compat.h" 33246290Sdchagin#include "opt_kdtrace.h" 34218616Sdchagin 35218616Sdchagin#include <sys/param.h> 36218616Sdchagin#include <sys/systm.h> 37218616Sdchagin#include <sys/imgact.h> 38218616Sdchagin#include <sys/lock.h> 39218616Sdchagin#include <sys/mutex.h> 40218616Sdchagin#include <sys/proc.h> 41218616Sdchagin#include <sys/sched.h> 42246290Sdchagin#include <sys/sdt.h> 43218616Sdchagin#include <sys/sx.h> 44218616Sdchagin#include <sys/unistd.h> 45218616Sdchagin 46218616Sdchagin#ifdef COMPAT_LINUX32 47218616Sdchagin#include <machine/../linux32/linux.h> 48218616Sdchagin#include <machine/../linux32/linux32_proto.h> 49218616Sdchagin#else 50218616Sdchagin#include <machine/../linux/linux.h> 51218616Sdchagin#include <machine/../linux/linux_proto.h> 52218616Sdchagin#endif 53246290Sdchagin#include <compat/linux/linux_dtrace.h> 54218616Sdchagin#include <compat/linux/linux_signal.h> 55218616Sdchagin#include <compat/linux/linux_emul.h> 56218616Sdchagin 57246290Sdchagin/* DTrace init */ 58246290SdchaginLIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); 59218616Sdchagin 60246290Sdchagin/* Linuxulator-global DTrace probes */ 61246290SdchaginLIN_SDT_PROBE_DECLARE(locks, emul_lock, locked); 62246290SdchaginLIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock); 63246290Sdchagin 64246290Sdchagin 65218616Sdchaginint 66218616Sdchaginlinux_fork(struct thread *td, struct linux_fork_args *args) 67218616Sdchagin{ 68218616Sdchagin int error; 69218616Sdchagin struct proc *p2; 70218616Sdchagin struct thread *td2; 71218616Sdchagin 72218616Sdchagin#ifdef DEBUG 73218616Sdchagin if (ldebug(fork)) 74218616Sdchagin printf(ARGS(fork, "")); 75218616Sdchagin#endif 76218616Sdchagin 77224987Sjonathan if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2, NULL, 0)) 78224987Sjonathan != 0) 79218616Sdchagin return (error); 80218616Sdchagin 81218618Sdchagin td->td_retval[0] = p2->p_pid; 82218618Sdchagin td->td_retval[1] = 0; 83218616Sdchagin 84218616Sdchagin error = linux_proc_init(td, td->td_retval[0], 0); 85218616Sdchagin if (error) 86218616Sdchagin return (error); 87218616Sdchagin 88218616Sdchagin td2 = FIRST_THREAD_IN_PROC(p2); 89218616Sdchagin 90218616Sdchagin /* 91218616Sdchagin * Make this runnable after we are finished with it. 92218616Sdchagin */ 93218616Sdchagin thread_lock(td2); 94218616Sdchagin TD_SET_CAN_RUN(td2); 95218616Sdchagin sched_add(td2, SRQ_BORING); 96218616Sdchagin thread_unlock(td2); 97218616Sdchagin 98218616Sdchagin return (0); 99218616Sdchagin} 100218616Sdchagin 101218616Sdchaginint 102218616Sdchaginlinux_vfork(struct thread *td, struct linux_vfork_args *args) 103218616Sdchagin{ 104218616Sdchagin int error; 105218616Sdchagin struct proc *p2; 106218616Sdchagin struct thread *td2; 107218616Sdchagin 108218616Sdchagin#ifdef DEBUG 109218616Sdchagin if (ldebug(vfork)) 110218616Sdchagin printf(ARGS(vfork, "")); 111218616Sdchagin#endif 112218616Sdchagin 113218616Sdchagin /* Exclude RFPPWAIT */ 114224987Sjonathan if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2, 115224987Sjonathan NULL, 0)) != 0) 116218616Sdchagin return (error); 117218618Sdchagin 118218618Sdchagin td->td_retval[0] = p2->p_pid; 119218618Sdchagin 120218616Sdchagin error = linux_proc_init(td, td->td_retval[0], 0); 121218616Sdchagin if (error) 122218616Sdchagin return (error); 123218616Sdchagin 124218616Sdchagin PROC_LOCK(p2); 125218616Sdchagin p2->p_flag |= P_PPWAIT; 126218616Sdchagin PROC_UNLOCK(p2); 127218616Sdchagin 128218616Sdchagin td2 = FIRST_THREAD_IN_PROC(p2); 129218616Sdchagin 130218616Sdchagin /* 131218616Sdchagin * Make this runnable after we are finished with it. 132218616Sdchagin */ 133218616Sdchagin thread_lock(td2); 134218616Sdchagin TD_SET_CAN_RUN(td2); 135218616Sdchagin sched_add(td2, SRQ_BORING); 136218616Sdchagin thread_unlock(td2); 137218616Sdchagin 138218616Sdchagin /* wait for the children to exit, ie. emulate vfork */ 139218616Sdchagin PROC_LOCK(p2); 140218616Sdchagin while (p2->p_flag & P_PPWAIT) 141218616Sdchagin cv_wait(&p2->p_pwait, &p2->p_mtx); 142218616Sdchagin PROC_UNLOCK(p2); 143218616Sdchagin 144218616Sdchagin return (0); 145218616Sdchagin} 146218616Sdchagin 147218616Sdchaginint 148218616Sdchaginlinux_clone(struct thread *td, struct linux_clone_args *args) 149218616Sdchagin{ 150218616Sdchagin int error, ff = RFPROC | RFSTOPPED; 151218616Sdchagin struct proc *p2; 152218616Sdchagin struct thread *td2; 153218616Sdchagin int exit_signal; 154218616Sdchagin struct linux_emuldata *em; 155218616Sdchagin 156218616Sdchagin#ifdef DEBUG 157218616Sdchagin if (ldebug(clone)) { 158218616Sdchagin printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, " 159218616Sdchagin "child tid: %p"), (unsigned)args->flags, 160218616Sdchagin args->stack, args->parent_tidptr, args->child_tidptr); 161218616Sdchagin } 162218616Sdchagin#endif 163218616Sdchagin 164218616Sdchagin exit_signal = args->flags & 0x000000ff; 165218616Sdchagin if (LINUX_SIG_VALID(exit_signal)) { 166218616Sdchagin if (exit_signal <= LINUX_SIGTBLSZ) 167218616Sdchagin exit_signal = 168218616Sdchagin linux_to_bsd_signal[_SIG_IDX(exit_signal)]; 169218616Sdchagin } else if (exit_signal != 0) 170218616Sdchagin return (EINVAL); 171218616Sdchagin 172218616Sdchagin if (args->flags & LINUX_CLONE_VM) 173218616Sdchagin ff |= RFMEM; 174218616Sdchagin if (args->flags & LINUX_CLONE_SIGHAND) 175218616Sdchagin ff |= RFSIGSHARE; 176218616Sdchagin /* 177218616Sdchagin * XXX: In Linux, sharing of fs info (chroot/cwd/umask) 178218616Sdchagin * and open files is independant. In FreeBSD, its in one 179218616Sdchagin * structure but in reality it does not cause any problems 180218616Sdchagin * because both of these flags are usually set together. 181218616Sdchagin */ 182218616Sdchagin if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS))) 183218616Sdchagin ff |= RFFDG; 184218616Sdchagin 185218616Sdchagin /* 186218616Sdchagin * Attempt to detect when linux_clone(2) is used for creating 187218616Sdchagin * kernel threads. Unfortunately despite the existence of the 188218616Sdchagin * CLONE_THREAD flag, version of linuxthreads package used in 189218616Sdchagin * most popular distros as of beginning of 2005 doesn't make 190218616Sdchagin * any use of it. Therefore, this detection relies on 191218616Sdchagin * empirical observation that linuxthreads sets certain 192218616Sdchagin * combination of flags, so that we can make more or less 193218616Sdchagin * precise detection and notify the FreeBSD kernel that several 194218616Sdchagin * processes are in fact part of the same threading group, so 195218616Sdchagin * that special treatment is necessary for signal delivery 196218616Sdchagin * between those processes and fd locking. 197218616Sdchagin */ 198218616Sdchagin if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS) 199218616Sdchagin ff |= RFTHREAD; 200218616Sdchagin 201218616Sdchagin if (args->flags & LINUX_CLONE_PARENT_SETTID) 202218616Sdchagin if (args->parent_tidptr == NULL) 203218616Sdchagin return (EINVAL); 204218616Sdchagin 205224987Sjonathan error = fork1(td, ff, 0, &p2, NULL, 0); 206218616Sdchagin if (error) 207218616Sdchagin return (error); 208218616Sdchagin 209218616Sdchagin if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) { 210218616Sdchagin sx_xlock(&proctree_lock); 211218616Sdchagin PROC_LOCK(p2); 212218616Sdchagin proc_reparent(p2, td->td_proc->p_pptr); 213218616Sdchagin PROC_UNLOCK(p2); 214218616Sdchagin sx_xunlock(&proctree_lock); 215218616Sdchagin } 216218616Sdchagin 217218616Sdchagin /* create the emuldata */ 218218616Sdchagin error = linux_proc_init(td, p2->p_pid, args->flags); 219218616Sdchagin /* reference it - no need to check this */ 220218616Sdchagin em = em_find(p2, EMUL_DOLOCK); 221218616Sdchagin KASSERT(em != NULL, ("clone: emuldata not found.")); 222218616Sdchagin /* and adjust it */ 223218616Sdchagin 224218616Sdchagin if (args->flags & LINUX_CLONE_THREAD) { 225218616Sdchagin#ifdef notyet 226218616Sdchagin PROC_LOCK(p2); 227218616Sdchagin p2->p_pgrp = td->td_proc->p_pgrp; 228218616Sdchagin PROC_UNLOCK(p2); 229218616Sdchagin#endif 230218616Sdchagin exit_signal = 0; 231218616Sdchagin } 232218616Sdchagin 233218616Sdchagin if (args->flags & LINUX_CLONE_CHILD_SETTID) 234218616Sdchagin em->child_set_tid = args->child_tidptr; 235218616Sdchagin else 236218616Sdchagin em->child_set_tid = NULL; 237218616Sdchagin 238218616Sdchagin if (args->flags & LINUX_CLONE_CHILD_CLEARTID) 239218616Sdchagin em->child_clear_tid = args->child_tidptr; 240218616Sdchagin else 241218616Sdchagin em->child_clear_tid = NULL; 242218616Sdchagin 243218616Sdchagin EMUL_UNLOCK(&emul_lock); 244218616Sdchagin 245218616Sdchagin if (args->flags & LINUX_CLONE_PARENT_SETTID) { 246218616Sdchagin error = copyout(&p2->p_pid, args->parent_tidptr, 247218616Sdchagin sizeof(p2->p_pid)); 248218616Sdchagin if (error) 249218616Sdchagin printf(LMSG("copyout failed!")); 250218616Sdchagin } 251218616Sdchagin 252218616Sdchagin PROC_LOCK(p2); 253218616Sdchagin p2->p_sigparent = exit_signal; 254218616Sdchagin PROC_UNLOCK(p2); 255218616Sdchagin td2 = FIRST_THREAD_IN_PROC(p2); 256218616Sdchagin /* 257218616Sdchagin * In a case of stack = NULL, we are supposed to COW calling process 258218616Sdchagin * stack. This is what normal fork() does, so we just keep tf_rsp arg 259218616Sdchagin * intact. 260218616Sdchagin */ 261218616Sdchagin if (args->stack) 262218616Sdchagin linux_set_upcall_kse(td2, PTROUT(args->stack)); 263218616Sdchagin 264218616Sdchagin if (args->flags & LINUX_CLONE_SETTLS) 265218616Sdchagin linux_set_cloned_tls(td2, args->tls); 266218616Sdchagin 267218616Sdchagin#ifdef DEBUG 268218616Sdchagin if (ldebug(clone)) 269218616Sdchagin printf(LMSG("clone: successful rfork to %d, " 270218616Sdchagin "stack %p sig = %d"), (int)p2->p_pid, args->stack, 271218616Sdchagin exit_signal); 272218616Sdchagin#endif 273218616Sdchagin if (args->flags & LINUX_CLONE_VFORK) { 274218616Sdchagin PROC_LOCK(p2); 275218616Sdchagin p2->p_flag |= P_PPWAIT; 276218616Sdchagin PROC_UNLOCK(p2); 277218616Sdchagin } 278218616Sdchagin 279218616Sdchagin /* 280218616Sdchagin * Make this runnable after we are finished with it. 281218616Sdchagin */ 282218616Sdchagin thread_lock(td2); 283218616Sdchagin TD_SET_CAN_RUN(td2); 284218616Sdchagin sched_add(td2, SRQ_BORING); 285218616Sdchagin thread_unlock(td2); 286218616Sdchagin 287218616Sdchagin td->td_retval[0] = p2->p_pid; 288218616Sdchagin td->td_retval[1] = 0; 289218616Sdchagin 290218616Sdchagin if (args->flags & LINUX_CLONE_VFORK) { 291218616Sdchagin /* wait for the children to exit, ie. emulate vfork */ 292218616Sdchagin PROC_LOCK(p2); 293218616Sdchagin while (p2->p_flag & P_PPWAIT) 294218616Sdchagin cv_wait(&p2->p_pwait, &p2->p_mtx); 295218616Sdchagin PROC_UNLOCK(p2); 296218616Sdchagin } 297218616Sdchagin 298218616Sdchagin return (0); 299218616Sdchagin} 300