lwproc.c revision 1.25
1254721Semaste/* $NetBSD: lwproc.c,v 1.25 2013/12/09 16:21:15 pooka Exp $ */ 2254721Semaste 3254721Semaste/* 4254721Semaste * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 5254721Semaste * 6254721Semaste * Redistribution and use in source and binary forms, with or without 7254721Semaste * modification, are permitted provided that the following conditions 8254721Semaste * are met: 9254721Semaste * 1. Redistributions of source code must retain the above copyright 10254721Semaste * notice, this list of conditions and the following disclaimer. 11254721Semaste * 2. Redistributions in binary form must reproduce the above copyright 12254721Semaste * notice, this list of conditions and the following disclaimer in the 13296417Sdim * documentation and/or other materials provided with the distribution. 14254721Semaste * 15254721Semaste * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16296417Sdim * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17296417Sdim * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18296417Sdim * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19296417Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20296417Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21296417Sdim * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22296417Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23254721Semaste * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24254721Semaste * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25288943Sdim * SUCH DAMAGE. 26262528Semaste */ 27254721Semaste 28254721Semaste#include <sys/cdefs.h> 29254721Semaste__KERNEL_RCSID(0, "$NetBSD: lwproc.c,v 1.25 2013/12/09 16:21:15 pooka Exp $"); 30254721Semaste 31280031Sdim#include <sys/param.h> 32254721Semaste#include <sys/atomic.h> 33254721Semaste#include <sys/filedesc.h> 34254721Semaste#include <sys/kauth.h> 35254721Semaste#include <sys/kmem.h> 36276479Sdim#include <sys/lwp.h> 37276479Sdim#include <sys/ktrace.h> 38276479Sdim#include <sys/pool.h> 39276479Sdim#include <sys/proc.h> 40276479Sdim#include <sys/queue.h> 41296417Sdim#include <sys/resourcevar.h> 42296417Sdim#include <sys/uidinfo.h> 43276479Sdim 44254721Semaste#include <rump/rumpuser.h> 45254721Semaste 46254721Semaste#include "rump_private.h" 47254721Semaste 48254721Semastestruct emul *emul_default = &emul_netbsd; 49254721Semaste 50254721Semastestatic void 51254721Semastelwproc_proc_free(struct proc *p) 52254721Semaste{ 53254721Semaste kauth_cred_t cred; 54254721Semaste 55254721Semaste KASSERT(p->p_stat == SDYING || p->p_stat == SDEAD); 56254721Semaste 57254721Semaste#ifdef KTRACE 58254721Semaste if (p->p_tracep) { 59254721Semaste mutex_enter(&ktrace_lock); 60254721Semaste ktrderef(p); 61254721Semaste mutex_exit(&ktrace_lock); 62296417Sdim } 63254721Semaste#endif 64254721Semaste 65296417Sdim mutex_enter(proc_lock); 66254721Semaste 67254721Semaste KASSERT(p->p_nlwps == 0); 68254721Semaste KASSERT(LIST_EMPTY(&p->p_lwps)); 69254721Semaste 70254721Semaste LIST_REMOVE(p, p_list); 71254721Semaste LIST_REMOVE(p, p_sibling); 72254721Semaste proc_free_pid(p->p_pid); /* decrements nprocs */ 73254721Semaste proc_leavepgrp(p); /* releases proc_lock */ 74288943Sdim 75254721Semaste cred = p->p_cred; 76288943Sdim chgproccnt(kauth_cred_getuid(cred), -1); 77288943Sdim if (rump_proc_vfs_release) 78254721Semaste rump_proc_vfs_release(p); 79254721Semaste 80254721Semaste lim_free(p->p_limit); 81254721Semaste pstatsfree(p->p_stats); 82254721Semaste kauth_cred_free(p->p_cred); 83254721Semaste proc_finispecific(p); 84254721Semaste 85254721Semaste mutex_obj_free(p->p_lock); 86254721Semaste mutex_destroy(&p->p_stmutex); 87254721Semaste mutex_destroy(&p->p_auxlock); 88296417Sdim rw_destroy(&p->p_reflock); 89296417Sdim cv_destroy(&p->p_waitcv); 90254721Semaste cv_destroy(&p->p_lwpcv); 91296417Sdim 92296417Sdim /* non-kernel vmspaces are not shared */ 93296417Sdim if (!RUMP_LOCALPROC_P(p)) { 94296417Sdim KASSERT(p->p_vmspace->vm_refcnt == 1); 95296417Sdim kmem_free(p->p_vmspace, sizeof(*p->p_vmspace)); 96296417Sdim } 97296417Sdim 98296417Sdim proc_free_mem(p); 99296417Sdim} 100296417Sdim 101296417Sdim/* 102296417Sdim * Allocate a new process. Mostly mimic fork by 103296417Sdim * copying the properties of the parent. However, there are some 104296417Sdim * differences. 105296417Sdim * 106296417Sdim * Switch to the new lwp and return a pointer to it. 107296417Sdim */ 108254721Semastestatic struct proc * 109254721Semastelwproc_newproc(struct proc *parent, int flags) 110254721Semaste{ 111254721Semaste uid_t uid = kauth_cred_getuid(parent->p_cred); 112254721Semaste struct proc *p; 113254721Semaste 114254721Semaste /* maxproc not enforced */ 115254721Semaste atomic_inc_uint(&nprocs); 116262528Semaste 117254721Semaste /* allocate process */ 118254721Semaste p = proc_alloc(); 119262528Semaste memset(&p->p_startzero, 0, 120254721Semaste offsetof(struct proc, p_endzero) 121254721Semaste - offsetof(struct proc, p_startzero)); 122262528Semaste memcpy(&p->p_startcopy, &parent->p_startcopy, 123254721Semaste offsetof(struct proc, p_endcopy) 124254721Semaste - offsetof(struct proc, p_startcopy)); 125262528Semaste 126254721Semaste /* some other garbage we need to zero */ 127254721Semaste p->p_sigacts = NULL; 128262528Semaste p->p_aio = NULL; 129254721Semaste p->p_dtrace = NULL; 130254721Semaste p->p_mqueue_cnt = p->p_exitsig = 0; 131262528Semaste p->p_flag = p->p_sflag = p->p_slflag = p->p_lflag = p->p_stflag = 0; 132254721Semaste p->p_trace_enabled = 0; 133262528Semaste p->p_xstat = p->p_acflag = 0; 134254721Semaste p->p_stackbase = 0; 135254721Semaste 136254721Semaste p->p_stats = pstatscopy(parent->p_stats); 137254721Semaste 138254721Semaste p->p_vmspace = vmspace_kernel(); 139254721Semaste p->p_emul = emul_default; 140254721Semaste if (*parent->p_comm) 141254721Semaste strcpy(p->p_comm, parent->p_comm); 142254721Semaste else 143254721Semaste strcpy(p->p_comm, "rumproc"); 144254721Semaste 145254721Semaste if ((flags & RUMP_RFCFDG) == 0) 146254721Semaste KASSERT(parent == curproc); 147254721Semaste if (flags & RUMP_RFFDG) 148254721Semaste p->p_fd = fd_copy(); 149254721Semaste else if (flags & RUMP_RFCFDG) 150254721Semaste p->p_fd = fd_init(NULL); 151254721Semaste else 152254721Semaste fd_share(p); 153254721Semaste 154254721Semaste lim_addref(parent->p_limit); 155254721Semaste p->p_limit = parent->p_limit; 156254721Semaste 157254721Semaste LIST_INIT(&p->p_lwps); 158254721Semaste LIST_INIT(&p->p_children); 159254721Semaste 160254721Semaste p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 161254721Semaste mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); 162254721Semaste mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE); 163254721Semaste rw_init(&p->p_reflock); 164254721Semaste cv_init(&p->p_waitcv, "pwait"); 165254721Semaste cv_init(&p->p_lwpcv, "plwp"); 166254721Semaste 167254721Semaste p->p_pptr = parent; 168254721Semaste p->p_ppid = parent->p_pid; 169254721Semaste p->p_stat = SACTIVE; 170254721Semaste 171254721Semaste kauth_proc_fork(parent, p); 172254721Semaste 173254721Semaste /* initialize cwd in rump kernels with vfs */ 174254721Semaste if (rump_proc_vfs_init) 175254721Semaste rump_proc_vfs_init(p); 176254721Semaste 177254721Semaste chgproccnt(uid, 1); /* not enforced */ 178254721Semaste 179254721Semaste /* publish proc various proc lists */ 180254721Semaste mutex_enter(proc_lock); 181254721Semaste LIST_INSERT_HEAD(&allproc, p, p_list); 182254721Semaste LIST_INSERT_HEAD(&parent->p_children, p, p_sibling); 183254721Semaste LIST_INSERT_AFTER(parent, p, p_pglist); 184254721Semaste mutex_exit(proc_lock); 185254721Semaste 186254721Semaste return p; 187254721Semaste} 188254721Semaste 189254721Semastestatic void 190254721Semastelwproc_freelwp(struct lwp *l) 191254721Semaste{ 192254721Semaste struct proc *p; 193254721Semaste 194254721Semaste p = l->l_proc; 195254721Semaste mutex_enter(p->p_lock); 196254721Semaste 197254721Semaste KASSERT(l->l_flag & LW_WEXIT); 198254721Semaste KASSERT(l->l_refcnt == 0); 199254721Semaste 200254721Semaste /* ok, zero references, continue with nuke */ 201254721Semaste LIST_REMOVE(l, l_sibling); 202254721Semaste KASSERT(p->p_nlwps >= 1); 203254721Semaste if (--p->p_nlwps == 0) { 204254721Semaste KASSERT(p != &proc0); 205254721Semaste p->p_stat = SDEAD; 206254721Semaste } 207254721Semaste cv_broadcast(&p->p_lwpcv); /* nobody sleeps on this in a rump kernel? */ 208254721Semaste kauth_cred_free(l->l_cred); 209254721Semaste mutex_exit(p->p_lock); 210254721Semaste 211262528Semaste mutex_enter(proc_lock); 212262528Semaste LIST_REMOVE(l, l_list); 213262528Semaste mutex_exit(proc_lock); 214262528Semaste 215262528Semaste if (l->l_name) 216254721Semaste kmem_free(l->l_name, MAXCOMLEN); 217262528Semaste lwp_finispecific(l); 218262528Semaste 219262528Semaste rumpuser_curlwpop(RUMPUSER_LWP_DESTROY, l); 220254721Semaste membar_exit(); 221254721Semaste kmem_free(l, sizeof(*l)); 222262528Semaste 223254721Semaste if (p->p_stat == SDEAD) 224262528Semaste lwproc_proc_free(p); 225262528Semaste} 226262528Semaste 227262528Semasteextern kmutex_t unruntime_lock; 228254721Semaste 229262528Semaste/* 230262528Semaste * called with p_lock held, releases lock before return 231262528Semaste */ 232262528Semastestatic void 233296417Sdimlwproc_makelwp(struct proc *p, struct lwp *l, bool doswitch, bool procmake) 234296417Sdim{ 235296417Sdim 236296417Sdim p->p_nlwps++; 237254721Semaste l->l_refcnt = 1; 238288943Sdim l->l_proc = p; 239288943Sdim 240288943Sdim l->l_lid = p->p_nlwpid++; 241262528Semaste LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling); 242262528Semaste 243262528Semaste l->l_fd = p->p_fd; 244288943Sdim l->l_cpu = rump_cpu; 245288943Sdim l->l_target_cpu = rump_cpu; /* Initial target CPU always the same */ 246254721Semaste l->l_stat = LSRUN; 247288943Sdim l->l_mutex = &unruntime_lock; 248288943Sdim TAILQ_INIT(&l->l_ld_locks); 249254721Semaste mutex_exit(p->p_lock); 250254721Semaste 251262528Semaste lwp_update_creds(l); 252254721Semaste lwp_initspecific(l); 253254721Semaste 254254721Semaste membar_enter(); 255254721Semaste rumpuser_curlwpop(RUMPUSER_LWP_CREATE, l); 256254721Semaste if (doswitch) { 257254721Semaste rump_lwproc_switch(l); 258254721Semaste } 259254721Semaste 260254721Semaste /* filedesc already has refcount 1 when process is created */ 261254721Semaste if (!procmake) { 262254721Semaste fd_hold(l); 263254721Semaste } 264254721Semaste 265254721Semaste mutex_enter(proc_lock); 266254721Semaste LIST_INSERT_HEAD(&alllwp, l, l_list); 267254721Semaste mutex_exit(proc_lock); 268254721Semaste} 269254721Semaste 270254721Semastestruct lwp * 271296417Sdimrump__lwproc_alloclwp(struct proc *p) 272254721Semaste{ 273254721Semaste struct lwp *l; 274254721Semaste bool newproc = false; 275254721Semaste 276296417Sdim if (p == NULL) { 277296417Sdim p = lwproc_newproc(&proc0, 0); 278296417Sdim newproc = true; 279296417Sdim } 280296417Sdim 281254721Semaste l = kmem_zalloc(sizeof(*l), KM_SLEEP); 282254721Semaste 283254721Semaste mutex_enter(p->p_lock); 284280031Sdim KASSERT((p->p_sflag & PS_RUMP_LWPEXIT) == 0); 285288943Sdim lwproc_makelwp(p, l, false, newproc); 286280031Sdim 287280031Sdim return l; 288288943Sdim} 289254721Semaste 290280031Sdimint 291288943Sdimrump_lwproc_newlwp(pid_t pid) 292254721Semaste{ 293254721Semaste struct proc *p; 294254721Semaste struct lwp *l; 295254721Semaste 296254721Semaste l = kmem_zalloc(sizeof(*l), KM_SLEEP); 297254721Semaste mutex_enter(proc_lock); 298254721Semaste p = proc_find_raw(pid); 299254721Semaste if (p == NULL) { 300254721Semaste mutex_exit(proc_lock); 301254721Semaste kmem_free(l, sizeof(*l)); 302254721Semaste return ESRCH; 303254721Semaste } 304254721Semaste mutex_enter(p->p_lock); 305254721Semaste if (p->p_sflag & PS_RUMP_LWPEXIT) { 306254721Semaste mutex_exit(proc_lock); 307254721Semaste mutex_exit(p->p_lock); 308254721Semaste kmem_free(l, sizeof(*l)); 309254721Semaste return EBUSY; 310254721Semaste } 311254721Semaste mutex_exit(proc_lock); 312254721Semaste lwproc_makelwp(p, l, true, false); 313254721Semaste 314254721Semaste return 0; 315254721Semaste} 316254721Semaste 317254721Semasteint 318254721Semasterump_lwproc_rfork(int flags) 319254721Semaste{ 320254721Semaste struct proc *p; 321254721Semaste struct lwp *l; 322254721Semaste 323254721Semaste if (flags & ~(RUMP_RFFDG|RUMP_RFCFDG) || 324254721Semaste (~flags & (RUMP_RFFDG|RUMP_RFCFDG)) == 0) 325254721Semaste return EINVAL; 326254721Semaste 327254721Semaste p = lwproc_newproc(curproc, flags); 328254721Semaste l = kmem_zalloc(sizeof(*l), KM_SLEEP); 329254721Semaste mutex_enter(p->p_lock); 330254721Semaste KASSERT((p->p_sflag & PS_RUMP_LWPEXIT) == 0); 331254721Semaste lwproc_makelwp(p, l, true, true); 332254721Semaste 333254721Semaste return 0; 334258054Semaste} 335258054Semaste 336258054Semaste/* 337296417Sdim * Switch to a new process/thread. Release previous one if 338296417Sdim * deemed to be exiting. This is considered a slow path for 339296417Sdim * rump kernel entry. 340296417Sdim */ 341296417Sdimvoid 342296417Sdimrump_lwproc_switch(struct lwp *newlwp) 343296417Sdim{ 344296417Sdim struct lwp *l = curlwp; 345296417Sdim 346296417Sdim KASSERT(!(l->l_flag & LW_WEXIT) || newlwp); 347296417Sdim 348296417Sdim if (__predict_false(newlwp && (newlwp->l_pflag & LP_RUNNING))) 349296417Sdim panic("lwp %p (%d:%d) already running", 350296417Sdim newlwp, newlwp->l_proc->p_pid, newlwp->l_lid); 351296417Sdim 352296417Sdim if (newlwp == NULL) { 353296417Sdim l->l_pflag &= ~LP_RUNNING; 354296417Sdim l->l_flag |= LW_RUMP_CLEAR; 355280031Sdim return; 356280031Sdim } 357280031Sdim 358254721Semaste /* fd_free() must be called from curlwp context. talk about ugh */ 359254721Semaste if (l->l_flag & LW_WEXIT) { 360254721Semaste fd_free(); 361254721Semaste } 362254721Semaste 363254721Semaste KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks); 364254721Semaste rumpuser_curlwpop(RUMPUSER_LWP_CLEAR, l); 365258884Semaste 366254721Semaste newlwp->l_cpu = newlwp->l_target_cpu = l->l_cpu; 367254721Semaste newlwp->l_mutex = l->l_mutex; 368254721Semaste newlwp->l_pflag |= LP_RUNNING; 369262528Semaste 370288943Sdim rumpuser_curlwpop(RUMPUSER_LWP_SET, newlwp); 371262528Semaste curcpu()->ci_curlwp = newlwp; 372262528Semaste KERNEL_LOCK(newlwp->l_biglocks, NULL); 373262528Semaste 374262528Semaste /* 375262528Semaste * Check if the thread should get a signal. This is 376262528Semaste * mostly to satisfy the "record" rump sigmodel. 377262528Semaste */ 378262528Semaste mutex_enter(newlwp->l_proc->p_lock); 379262528Semaste if (sigispending(newlwp, 0)) { 380276479Sdim newlwp->l_flag |= LW_PENDSIG; 381276479Sdim } 382276479Sdim mutex_exit(newlwp->l_proc->p_lock); 383276479Sdim 384280031Sdim l->l_mutex = &unruntime_lock; 385276479Sdim l->l_pflag &= ~LP_RUNNING; 386296417Sdim l->l_flag &= ~LW_PENDSIG; 387296417Sdim l->l_stat = LSRUN; 388296417Sdim 389276479Sdim if (l->l_flag & LW_WEXIT) { 390280031Sdim lwproc_freelwp(l); 391280031Sdim } 392280031Sdim} 393280031Sdim 394280031Sdim/* 395254721Semaste * Mark the current thread to be released upon return from 396262528Semaste * kernel. 397296417Sdim */ 398254721Semastevoid 399262528Semasterump_lwproc_releaselwp(void) 400262528Semaste{ 401262528Semaste struct lwp *l = curlwp; 402254721Semaste 403262528Semaste if (l->l_refcnt == 0 || l->l_flag & LW_WEXIT) 404254721Semaste panic("releasing non-pertinent lwp"); 405262528Semaste 406262528Semaste rump__lwproc_lwprele(); 407262528Semaste KASSERT(l->l_refcnt == 0 && (l->l_flag & LW_WEXIT)); 408254721Semaste} 409296417Sdim 410296417Sdim/* 411296417Sdim * In-kernel routines used to add and remove references for the 412262528Semaste * current thread. The main purpose is to make it possible for 413254721Semaste * implicit threads to persist over scheduling operations in 414262528Semaste * rump kernel drivers. Note that we don't need p_lock in a 415262528Semaste * rump kernel, since we do refcounting only for curlwp. 416262528Semaste */ 417296417Sdimvoid 418296417Sdimrump__lwproc_lwphold(void) 419296417Sdim{ 420262528Semaste struct lwp *l = curlwp; 421262528Semaste 422262528Semaste l->l_refcnt++; 423262528Semaste l->l_flag &= ~LW_WEXIT; 424262528Semaste} 425262528Semaste 426262528Semastevoid 427262528Semasterump__lwproc_lwprele(void) 428262528Semaste{ 429262528Semaste struct lwp *l = curlwp; 430262528Semaste 431262528Semaste l->l_refcnt--; 432262528Semaste if (l->l_refcnt == 0) 433262528Semaste l->l_flag |= LW_WEXIT; 434262528Semaste} 435262528Semaste 436262528Semastestruct lwp * 437262528Semasterump_lwproc_curlwp(void) 438262528Semaste{ 439262528Semaste struct lwp *l = curlwp; 440262528Semaste 441254721Semaste if (l->l_flag & LW_WEXIT) 442254721Semaste return NULL; 443254721Semaste return l; 444254721Semaste} 445254721Semaste 446280031Sdim/* this interface is under construction (like the proverbial 90's web page) */ 447280031Sdimint rump_i_know_what_i_am_doing_with_sysents = 0; 448280031Sdimvoid 449280031Sdimrump_lwproc_sysent_usenative() 450262528Semaste{ 451262528Semaste 452262528Semaste if (!rump_i_know_what_i_am_doing_with_sysents) 453254721Semaste panic("don't use rump_lwproc_sysent_usenative()"); 454254721Semaste curproc->p_emul = &emul_netbsd; 455280031Sdim} 456254721Semaste