kern_exit.c revision 170407
1210006Srdivacky/*- 2210006Srdivacky * Copyright (c) 1982, 1986, 1989, 1991, 1993 3210006Srdivacky * The Regents of the University of California. All rights reserved. 4210006Srdivacky * (c) UNIX System Laboratories, Inc. 5210006Srdivacky * All or some portions of this file are derived from material licensed 6210006Srdivacky * to the University of California by American Telephone and Telegraph 7210006Srdivacky * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8210006Srdivacky * the permission of UNIX System Laboratories, Inc. 9210006Srdivacky * 10210006Srdivacky * Redistribution and use in source and binary forms, with or without 11210006Srdivacky * modification, are permitted provided that the following conditions 12210006Srdivacky * are met: 13210006Srdivacky * 1. Redistributions of source code must retain the above copyright 14210006Srdivacky * notice, this list of conditions and the following disclaimer. 15212904Sdim * 2. Redistributions in binary form must reproduce the above copyright 16234353Sdim * notice, this list of conditions and the following disclaimer in the 17212904Sdim * documentation and/or other materials provided with the distribution. 18234353Sdim * 4. Neither the name of the University nor the names of its contributors 19212904Sdim * may be used to endorse or promote products derived from this software 20212904Sdim * without specific prior written permission. 21263508Sdim * 22249423Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23249423Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24249423Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25249423Sdim * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26249423Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27249423Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28249423Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29249423Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30249423Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31212904Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32212904Sdim * SUCH DAMAGE. 33212904Sdim * 34218893Sdim * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 35212904Sdim */ 36212904Sdim 37210006Srdivacky#include <sys/cdefs.h> 38210006Srdivacky__FBSDID("$FreeBSD: head/sys/kern/kern_exit.c 170407 2007-06-07 22:27:15Z rwatson $"); 39210006Srdivacky 40249423Sdim#include "opt_compat.h" 41210006Srdivacky#include "opt_ktrace.h" 42212904Sdim#include "opt_mac.h" 43212904Sdim 44212904Sdim#include <sys/param.h> 45212904Sdim#include <sys/systm.h> 46212904Sdim#include <sys/sysproto.h> 47212904Sdim#include <sys/eventhandler.h> 48212904Sdim#include <sys/kernel.h> 49210006Srdivacky#include <sys/malloc.h> 50212904Sdim#include <sys/lock.h> 51212904Sdim#include <sys/mutex.h> 52212904Sdim#include <sys/proc.h> 53212904Sdim#include <sys/pioctl.h> 54210006Srdivacky#include <sys/tty.h> 55218893Sdim#include <sys/wait.h> 56218893Sdim#include <sys/vmmeter.h> 57218893Sdim#include <sys/vnode.h> 58212904Sdim#include <sys/resourcevar.h> 59212904Sdim#include <sys/sbuf.h> 60212904Sdim#include <sys/signalvar.h> 61210006Srdivacky#include <sys/sched.h> 62249423Sdim#include <sys/sx.h> 63210006Srdivacky#include <sys/syscallsubr.h> 64212904Sdim#include <sys/syslog.h> 65218893Sdim#include <sys/ptrace.h> 66212904Sdim#include <sys/acct.h> /* for acct_process() function prototype */ 67212904Sdim#include <sys/filedesc.h> 68218893Sdim#include <sys/shm.h> 69218893Sdim#include <sys/sem.h> 70212904Sdim#ifdef KTRACE 71212904Sdim#include <sys/ktrace.h> 72212904Sdim#endif 73249423Sdim 74212904Sdim#include <security/audit/audit.h> 75212904Sdim#include <security/mac/mac_framework.h> 76218893Sdim 77218893Sdim#include <vm/vm.h> 78212904Sdim#include <vm/vm_extern.h> 79212904Sdim#include <vm/vm_param.h> 80212904Sdim#include <vm/pmap.h> 81212904Sdim#include <vm/vm_map.h> 82212904Sdim#include <vm/vm_page.h> 83212904Sdim#include <vm/uma.h> 84212904Sdim 85212904Sdim/* Required to be non-static for SysVR4 emulator */ 86212904SdimMALLOC_DEFINE(M_ZOMBIE, "zombie", "zombie proc status"); 87212904Sdim 88212904Sdim/* Hook for NFS teardown procedure. */ 89212904Sdimvoid (*nlminfo_release_p)(struct proc *p); 90212904Sdim 91212904Sdim/* 92212904Sdim * exit -- death of process. 93212904Sdim */ 94212904Sdimvoid 95212904Sdimsys_exit(struct thread *td, struct sys_exit_args *uap) 96218893Sdim{ 97212904Sdim 98218893Sdim exit1(td, W_EXITCODE(uap->rval, 0)); 99212904Sdim /* NOTREACHED */ 100212904Sdim} 101249423Sdim 102212904Sdim/* 103212904Sdim * Exit: deallocate address space and other resources, change proc state to 104212904Sdim * zombie, and unlink proc from allproc and parent's lists. Save exit status 105212904Sdim * and rusage for wait(). Check for child processes and orphan them. 106212904Sdim */ 107249423Sdimvoid 108212904Sdimexit1(struct thread *td, int rv) 109212904Sdim{ 110212904Sdim struct proc *p, *nq, *q; 111212904Sdim struct tty *tp; 112212904Sdim struct vnode *ttyvp; 113212904Sdim struct vnode *vtmp; 114212904Sdim#ifdef KTRACE 115212904Sdim struct vnode *tracevp; 116249423Sdim struct ucred *tracecred; 117212904Sdim#endif 118212904Sdim struct plimit *plim; 119212904Sdim struct rusage *ru; 120212904Sdim int locked; 121212904Sdim 122212904Sdim /* 123212904Sdim * Drop Giant if caller has it. Eventually we should warn about 124212904Sdim * being called with Giant held. 125218893Sdim */ 126218893Sdim while (mtx_owned(&Giant)) 127212904Sdim mtx_unlock(&Giant); 128234353Sdim 129234353Sdim p = td->td_proc; 130212904Sdim if (p == initproc) { 131212904Sdim printf("init died (signal %d, exit %d)\n", 132212904Sdim WTERMSIG(rv), WEXITSTATUS(rv)); 133212904Sdim panic("Going nowhere without my init!"); 134212904Sdim } 135212904Sdim 136212904Sdim /* 137212904Sdim * MUST abort all other threads before proceeding past here. 138212904Sdim */ 139212904Sdim PROC_LOCK(p); 140234353Sdim if (p->p_flag & P_HADTHREADS) { 141263508Sdimretry: 142212904Sdim /* 143218893Sdim * First check if some other thread got here before us.. 144218893Sdim * if so, act apropriatly, (exit or suspend); 145218893Sdim */ 146212904Sdim thread_suspend_check(0); 147212904Sdim 148249423Sdim /* 149212904Sdim * Kill off the other threads. This requires 150212904Sdim * some co-operation from other parts of the kernel 151263508Sdim * so it may not be instantaneous. With this state set 152263508Sdim * any thread entering the kernel from userspace will 153212904Sdim * thread_exit() in trap(). Any thread attempting to 154218893Sdim * sleep will return immediately with EINTR or EWOULDBLOCK 155218893Sdim * which will hopefully force them to back out to userland 156218893Sdim * freeing resources as they go. Any thread attempting 157212904Sdim * to return to userland will thread_exit() from userret(). 158212904Sdim * thread_exit() will unsuspend us when the last of the 159218893Sdim * other threads exits. 160218893Sdim * If there is already a thread singler after resumption, 161212904Sdim * calling thread_single will fail; in that case, we just 162212904Sdim * re-check all suspension request, the thread should 163212904Sdim * either be suspended there or exit. 164212904Sdim */ 165212904Sdim if (thread_single(SINGLE_EXIT)) 166212904Sdim goto retry; 167212904Sdim 168212904Sdim /* 169212904Sdim * All other activity in this process is now stopped. 170212904Sdim * Threading support has been turned off. 171218893Sdim */ 172212904Sdim } 173212904Sdim KASSERT(p->p_numthreads == 1, 174212904Sdim ("exit1: proc %p exiting with %d threads", p, p->p_numthreads)); 175212904Sdim /* 176212904Sdim * Wakeup anyone in procfs' PIOCWAIT. They should have a hold 177212904Sdim * on our vmspace, so we should block below until they have 178212904Sdim * released their reference to us. Note that if they have 179212904Sdim * requested S_EXIT stops we will block here until they ack 180218893Sdim * via PIOCCONT. 181212904Sdim */ 182210006Srdivacky _STOPEVENT(p, S_EXIT, rv); 183210006Srdivacky 184212904Sdim /* 185212904Sdim * Note that we are exiting and do another wakeup of anyone in 186212904Sdim * PIOCWAIT in case they aren't listening for S_EXIT stops or 187212904Sdim * decided to wait again after we told them we are exiting. 188212904Sdim */ 189212904Sdim p->p_flag |= P_WEXIT; 190210006Srdivacky wakeup(&p->p_stype); 191210006Srdivacky 192212904Sdim /* 193212904Sdim * Wait for any processes that have a hold on our vmspace to 194212904Sdim * release their reference. 195249423Sdim */ 196218893Sdim while (p->p_lock > 0) 197218893Sdim msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0); 198218893Sdim 199218893Sdim PROC_UNLOCK(p); 200218893Sdim /* Drain the limit callout while we don't have the proc locked */ 201212904Sdim callout_drain(&p->p_limco); 202212904Sdim 203212904Sdim#ifdef AUDIT 204212904Sdim /* 205212904Sdim * The Sun BSM exit token contains two components: an exit status as 206212904Sdim * passed to exit(), and a return value to indicate what sort of exit 207212904Sdim * it was. The exit status is WEXITSTATUS(rv), but it's not clear 208212904Sdim * what the return value is. 209212904Sdim */ 210212904Sdim AUDIT_ARG(exit, WEXITSTATUS(rv), 0); 211212904Sdim AUDIT_SYSCALL_EXIT(0, td); 212212904Sdim#endif 213212904Sdim 214212904Sdim /* Are we a task leader? */ 215212904Sdim if (p == p->p_leader) { 216218893Sdim mtx_lock(&ppeers_lock); 217218893Sdim q = p->p_peers; 218218893Sdim while (q != NULL) { 219218893Sdim PROC_LOCK(q); 220218893Sdim psignal(q, SIGKILL); 221218893Sdim PROC_UNLOCK(q); 222218893Sdim q = q->p_peers; 223218893Sdim } 224218893Sdim while (p->p_peers != NULL) 225218893Sdim msleep(p, &ppeers_lock, PWAIT, "exit1", 0); 226218893Sdim mtx_unlock(&ppeers_lock); 227218893Sdim } 228218893Sdim 229218893Sdim /* 230218893Sdim * Check if any loadable modules need anything done at process exit. 231218893Sdim * E.g. SYSV IPC stuff 232218893Sdim * XXX what if one of these generates an error? 233218893Sdim */ 234218893Sdim EVENTHANDLER_INVOKE(process_exit, p); 235218893Sdim 236218893Sdim MALLOC(ru, struct rusage *, sizeof(struct rusage), 237218893Sdim M_ZOMBIE, M_WAITOK); 238218893Sdim /* 239218893Sdim * If parent is waiting for us to exit or exec, 240218893Sdim * P_PPWAIT is set; we will wakeup the parent below. 241218893Sdim */ 242218893Sdim PROC_LOCK(p); 243218893Sdim stopprofclock(p); 244212904Sdim p->p_flag &= ~(P_TRACED | P_PPWAIT); 245212904Sdim 246212904Sdim /* 247249423Sdim * Stop the real interval timer. If the handler is currently 248218893Sdim * executing, prevent it from rearming itself and let it finish. 249218893Sdim */ 250218893Sdim if (timevalisset(&p->p_realtimer.it_value) && 251212904Sdim callout_stop(&p->p_itcallout) == 0) { 252212904Sdim timevalclear(&p->p_realtimer.it_interval); 253212904Sdim msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0); 254212904Sdim KASSERT(!timevalisset(&p->p_realtimer.it_value), 255212904Sdim ("realtime timer is still armed")); 256212904Sdim } 257212904Sdim PROC_UNLOCK(p); 258212904Sdim 259212904Sdim /* 260212904Sdim * Reset any sigio structures pointing to us as a result of 261212904Sdim * F_SETOWN with our pid. 262212904Sdim */ 263212904Sdim funsetownlst(&p->p_sigiolst); 264212904Sdim 265212904Sdim /* 266212904Sdim * If this process has an nlminfo data area (for lockd), release it 267212904Sdim */ 268212904Sdim if (nlminfo_release_p != NULL && p->p_nlminfo != NULL) 269212904Sdim (*nlminfo_release_p)(p); 270212904Sdim 271234353Sdim /* 272212904Sdim * Close open files and release open-file table. 273212904Sdim * This may block! 274212904Sdim */ 275212904Sdim fdfree(td); 276212904Sdim 277212904Sdim /* 278212904Sdim * If this thread tickled GEOM, we need to wait for the giggling to 279212904Sdim * stop before we return to userland 280249423Sdim */ 281212904Sdim if (td->td_pflags & TDP_GEOM) 282212904Sdim g_waitidle(); 283212904Sdim 284212904Sdim /* 285212904Sdim * Remove ourself from our leader's peer list and wake our leader. 286212904Sdim */ 287212904Sdim mtx_lock(&ppeers_lock); 288212904Sdim if (p->p_leader->p_peers) { 289212904Sdim q = p->p_leader; 290212904Sdim while (q->p_peers != p) 291212904Sdim q = q->p_peers; 292212904Sdim q->p_peers = p->p_peers; 293212904Sdim wakeup(p->p_leader); 294212904Sdim } 295212904Sdim mtx_unlock(&ppeers_lock); 296212904Sdim 297212904Sdim vmspace_exit(td); 298212904Sdim 299212904Sdim mtx_lock(&Giant); /* XXX TTY */ 300212904Sdim sx_xlock(&proctree_lock); 301212904Sdim if (SESS_LEADER(p)) { 302212904Sdim struct session *sp; 303212904Sdim 304234353Sdim sp = p->p_session; 305234353Sdim if (sp->s_ttyvp) { 306212904Sdim /* 307234353Sdim * Controlling process. 308212904Sdim * Signal foreground pgrp, 309212904Sdim * drain controlling terminal 310234353Sdim * and revoke access to controlling terminal. 311212904Sdim */ 312212904Sdim if (sp->s_ttyp && (sp->s_ttyp->t_session == sp)) { 313212904Sdim tp = sp->s_ttyp; 314212904Sdim if (sp->s_ttyp->t_pgrp) { 315212904Sdim PGRP_LOCK(sp->s_ttyp->t_pgrp); 316212904Sdim pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1); 317212904Sdim PGRP_UNLOCK(sp->s_ttyp->t_pgrp); 318212904Sdim } 319212904Sdim /* XXX tp should be locked. */ 320218893Sdim sx_xunlock(&proctree_lock); 321212904Sdim (void) ttywait(tp); 322212904Sdim sx_xlock(&proctree_lock); 323212904Sdim /* 324218893Sdim * The tty could have been revoked 325218893Sdim * if we blocked. 326218893Sdim */ 327218893Sdim if (sp->s_ttyvp) { 328218893Sdim ttyvp = sp->s_ttyvp; 329218893Sdim SESS_LOCK(p->p_session); 330218893Sdim sp->s_ttyvp = NULL; 331218893Sdim SESS_UNLOCK(p->p_session); 332218893Sdim sx_xunlock(&proctree_lock); 333218893Sdim VOP_LOCK(ttyvp, LK_EXCLUSIVE, td); 334249423Sdim VOP_REVOKE(ttyvp, REVOKEALL); 335212904Sdim vput(ttyvp); 336212904Sdim sx_xlock(&proctree_lock); 337212904Sdim } 338212904Sdim } 339212904Sdim if (sp->s_ttyvp) { 340212904Sdim ttyvp = sp->s_ttyvp; 341249423Sdim SESS_LOCK(p->p_session); 342212904Sdim sp->s_ttyvp = NULL; 343218893Sdim SESS_UNLOCK(p->p_session); 344212904Sdim vrele(ttyvp); 345212904Sdim } 346212904Sdim /* 347212904Sdim * s_ttyp is not zero'd; we use this to indicate 348212904Sdim * that the session once had a controlling terminal. 349212904Sdim * (for logging and informational purposes) 350212904Sdim */ 351212904Sdim } 352212904Sdim SESS_LOCK(p->p_session); 353218893Sdim sp->s_leader = NULL; 354218893Sdim SESS_UNLOCK(p->p_session); 355212904Sdim } 356212904Sdim fixjobc(p, p->p_pgrp, 0); 357212904Sdim sx_xunlock(&proctree_lock); 358212904Sdim (void)acct_process(td); 359212904Sdim mtx_unlock(&Giant); 360212904Sdim#ifdef KTRACE 361212904Sdim /* 362218893Sdim * Drain any pending records on the thread and release the trace 363218893Sdim * file. It might be better if drain-and-clear were atomic. 364212904Sdim */ 365212904Sdim ktrprocexit(td); 366212904Sdim PROC_LOCK(p); 367212904Sdim mtx_lock(&ktrace_mtx); 368212904Sdim p->p_traceflag = 0; /* don't trace the vrele() */ 369212904Sdim tracevp = p->p_tracevp; 370212904Sdim p->p_tracevp = NULL; 371212904Sdim tracecred = p->p_tracecred; 372212904Sdim p->p_tracecred = NULL; 373212904Sdim mtx_unlock(&ktrace_mtx); 374212904Sdim PROC_UNLOCK(p); 375212904Sdim if (tracevp != NULL) { 376212904Sdim locked = VFS_LOCK_GIANT(tracevp->v_mount); 377212904Sdim vrele(tracevp); 378212904Sdim VFS_UNLOCK_GIANT(locked); 379212904Sdim } 380212904Sdim if (tracecred != NULL) 381212904Sdim crfree(tracecred); 382212904Sdim#endif 383212904Sdim /* 384212904Sdim * Release reference to text vnode 385212904Sdim */ 386212904Sdim if ((vtmp = p->p_textvp) != NULL) { 387212904Sdim p->p_textvp = NULL; 388212904Sdim locked = VFS_LOCK_GIANT(vtmp->v_mount); 389212904Sdim vrele(vtmp); 390212904Sdim VFS_UNLOCK_GIANT(locked); 391212904Sdim } 392212904Sdim 393212904Sdim /* 394212904Sdim * Release our limits structure. 395212904Sdim */ 396218893Sdim PROC_LOCK(p); 397212904Sdim plim = p->p_limit; 398212904Sdim p->p_limit = NULL; 399212904Sdim PROC_UNLOCK(p); 400212904Sdim lim_free(plim); 401251662Sdim 402263508Sdim /* 403263508Sdim * Remove proc from allproc queue and pidhash chain. 404251662Sdim * Place onto zombproc. Unlink from parent's child list. 405249423Sdim */ 406251662Sdim sx_xlock(&allproc_lock); 407212904Sdim LIST_REMOVE(p, p_list); 408218893Sdim LIST_INSERT_HEAD(&zombproc, p, p_list); 409212904Sdim LIST_REMOVE(p, p_hash); 410212904Sdim sx_xunlock(&allproc_lock); 411249423Sdim 412251662Sdim /* 413251662Sdim * Call machine-dependent code to release any 414212904Sdim * machine-dependent resources other than the address space. 415251662Sdim * The address space is released by "vmspace_exitfree(p)" in 416251662Sdim * vm_waitproc(). 417212904Sdim */ 418251662Sdim cpu_exit(td); 419218893Sdim 420218893Sdim WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid); 421249423Sdim 422218893Sdim /* 423218893Sdim * Reparent all of our children to init. 424218893Sdim */ 425218893Sdim sx_xlock(&proctree_lock); 426218893Sdim q = LIST_FIRST(&p->p_children); 427218893Sdim if (q != NULL) /* only need this if any child is S_ZOMB */ 428218893Sdim wakeup(initproc); 429218893Sdim for (; q != NULL; q = nq) { 430212904Sdim nq = LIST_NEXT(q, p_sibling); 431212904Sdim PROC_LOCK(q); 432212904Sdim proc_reparent(q, initproc); 433212904Sdim q->p_sigparent = SIGCHLD; 434212904Sdim /* 435212904Sdim * Traced processes are killed 436212904Sdim * since their existence means someone is screwing up. 437218893Sdim */ 438212904Sdim if (q->p_flag & P_TRACED) { 439251662Sdim q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE); 440251662Sdim psignal(q, SIGKILL); 441251662Sdim } 442251662Sdim PROC_UNLOCK(q); 443218893Sdim } 444263508Sdim 445263508Sdim /* Save exit status. */ 446263508Sdim PROC_LOCK(p); 447263508Sdim p->p_xstat = rv; 448263508Sdim p->p_xthread = td; 449263508Sdim /* 450251662Sdim * All statistics have been aggregated into the final td_ru by 451251662Sdim * thread_exit(). Copy these into the proc here where wait*() 452218893Sdim * can find them. 453251662Sdim * XXX We will miss any statistics gathered between here and 454251662Sdim * thread_exit() except for those related to clock ticks. 455251662Sdim */ 456218893Sdim *ru = td->td_ru; 457251662Sdim ru->ru_nvcsw++; 458251662Sdim p->p_ru = ru; 459251662Sdim /* 460251662Sdim * Notify interested parties of our demise. 461263508Sdim */ 462263508Sdim KNOTE_LOCKED(&p->p_klist, NOTE_EXIT); 463263508Sdim 464251662Sdim /* 465251662Sdim * Just delete all entries in the p_klist. At this point we won't 466251662Sdim * report any more events, and there are nasty race conditions that 467251662Sdim * can beat us if we don't. 468251662Sdim */ 469212904Sdim knlist_clear(&p->p_klist, 1); 470212904Sdim 471218893Sdim /* 472218893Sdim * Notify parent that we're gone. If parent has the PS_NOCLDWAIT 473218893Sdim * flag set, or if the handler is set to SIG_IGN, notify process 474218893Sdim * 1 instead (and hope it will handle this situation). 475263508Sdim */ 476263508Sdim PROC_LOCK(p->p_pptr); 477263508Sdim mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); 478218893Sdim if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) { 479263508Sdim struct proc *pp; 480263508Sdim 481263508Sdim mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); 482263508Sdim pp = p->p_pptr; 483263508Sdim PROC_UNLOCK(pp); 484263508Sdim proc_reparent(p, initproc); 485263508Sdim p->p_sigparent = SIGCHLD; 486263508Sdim PROC_LOCK(p->p_pptr); 487263508Sdim /* 488263508Sdim * If this was the last child of our parent, notify 489263508Sdim * parent, so in case he was wait(2)ing, he will 490218893Sdim * continue. 491218893Sdim */ 492218893Sdim if (LIST_EMPTY(&pp->p_children)) 493218893Sdim wakeup(pp); 494218893Sdim } else 495218893Sdim mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); 496212904Sdim 497212904Sdim if (p->p_pptr == initproc) 498218893Sdim psignal(p->p_pptr, SIGCHLD); 499218893Sdim else if (p->p_sigparent != 0) { 500218893Sdim if (p->p_sigparent == SIGCHLD) 501218893Sdim childproc_exited(p); 502218893Sdim else /* LINUX thread */ 503218893Sdim psignal(p->p_pptr, p->p_sigparent); 504218893Sdim } 505218893Sdim sx_xunlock(&proctree_lock); 506218893Sdim 507218893Sdim /* 508212904Sdim * The state PRS_ZOMBIE prevents other proesses from sending 509212904Sdim * signal to the process, to avoid memory leak, we free memory 510212904Sdim * for signal queue at the time when the state is set. 511212904Sdim */ 512212904Sdim sigqueue_flush(&p->p_sigqueue); 513212904Sdim sigqueue_flush(&td->td_sigqueue); 514212904Sdim 515218893Sdim /* 516218893Sdim * We have to wait until after acquiring all locks before 517218893Sdim * changing p_state. We need to avoid all possible context 518212904Sdim * switches (including ones from blocking on a mutex) while 519212904Sdim * marked as a zombie. We also have to set the zombie state 520218893Sdim * before we release the parent process' proc lock to avoid 521218893Sdim * a lost wakeup. So, we first call wakeup, then we grab the 522218893Sdim * sched lock, update the state, and release the parent process' 523218893Sdim * proc lock. 524218893Sdim */ 525212904Sdim wakeup(p->p_pptr); 526212904Sdim PROC_SLOCK(p->p_pptr); 527212904Sdim sched_exit(p->p_pptr, td); 528212904Sdim PROC_SUNLOCK(p->p_pptr); 529212904Sdim PROC_SLOCK(p); 530212904Sdim p->p_state = PRS_ZOMBIE; 531212904Sdim PROC_UNLOCK(p->p_pptr); 532212904Sdim 533212904Sdim /* 534212904Sdim * Hopefully no one will try to deliver a signal to the process this 535212904Sdim * late in the game. 536212904Sdim */ 537212904Sdim knlist_destroy(&p->p_klist); 538212904Sdim 539212904Sdim /* 540212904Sdim * Make sure the scheduler takes this thread out of its tables etc. 541212904Sdim * This will also release this thread's reference to the ucred. 542212904Sdim * Other thread parts to release include pcb bits and such. 543212904Sdim */ 544212904Sdim thread_exit(); 545212904Sdim} 546212904Sdim 547212904Sdim 548212904Sdim#ifndef _SYS_SYSPROTO_H_ 549212904Sdimstruct abort2_args { 550212904Sdim char *why; 551212904Sdim int nargs; 552212904Sdim void **args; 553212904Sdim}; 554212904Sdim#endif 555212904Sdim 556212904Sdimint 557212904Sdimabort2(struct thread *td, struct abort2_args *uap) 558212904Sdim{ 559212904Sdim struct proc *p = td->td_proc; 560212904Sdim struct sbuf *sb; 561212904Sdim void *uargs[16]; 562212904Sdim int error, i, sig; 563212904Sdim 564212904Sdim error = 0; /* satisfy compiler */ 565212904Sdim 566212904Sdim /* 567212904Sdim * Do it right now so we can log either proper call of abort2(), or 568212904Sdim * note, that invalid argument was passed. 512 is big enough to 569212904Sdim * handle 16 arguments' descriptions with additional comments. 570212904Sdim */ 571212904Sdim sb = sbuf_new(NULL, NULL, 512, SBUF_FIXEDLEN); 572212904Sdim sbuf_clear(sb); 573212904Sdim sbuf_printf(sb, "%s(pid %d uid %d) aborted: ", 574212904Sdim p->p_comm, p->p_pid, td->td_ucred->cr_uid); 575212904Sdim /* 576212904Sdim * Since we can't return from abort2(), send SIGKILL in cases, where 577212904Sdim * abort2() was called improperly 578212904Sdim */ 579212904Sdim sig = SIGKILL; 580212904Sdim /* Prevent from DoSes from user-space. */ 581212904Sdim if (uap->nargs < 0 || uap->nargs > 16) 582212904Sdim goto out; 583212904Sdim if (uap->args == NULL) 584212904Sdim goto out; 585212904Sdim error = copyin(uap->args, uargs, uap->nargs * sizeof(void *)); 586212904Sdim if (error != 0) 587212904Sdim goto out; 588212904Sdim /* 589212904Sdim * Limit size of 'reason' string to 128. Will fit even when 590212904Sdim * maximal number of arguments was chosen to be logged. 591212904Sdim */ 592212904Sdim if (uap->why != NULL) { 593212904Sdim error = sbuf_copyin(sb, uap->why, 128); 594212904Sdim if (error < 0) 595212904Sdim goto out; 596212904Sdim } else { 597212904Sdim sbuf_printf(sb, "(null)"); 598212904Sdim } 599212904Sdim if (uap->nargs) { 600212904Sdim sbuf_printf(sb, "("); 601212904Sdim for (i = 0;i < uap->nargs; i++) 602212904Sdim sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]); 603212904Sdim sbuf_printf(sb, ")"); 604212904Sdim } 605212904Sdim /* 606212904Sdim * Final stage: arguments were proper, string has been 607212904Sdim * successfully copied from userspace, and copying pointers 608212904Sdim * from user-space succeed. 609210006Srdivacky */ 610210006Srdivacky sig = SIGABRT; 611210006Srdivackyout: 612218893Sdim if (sig == SIGKILL) { 613218893Sdim sbuf_trim(sb); 614212904Sdim sbuf_printf(sb, " (Reason text inaccessible)"); 615218893Sdim } 616212904Sdim sbuf_cat(sb, "\n"); 617212904Sdim sbuf_finish(sb); 618212904Sdim log(LOG_INFO, "%s", sbuf_data(sb)); 619212904Sdim sbuf_delete(sb); 620212904Sdim exit1(td, W_EXITCODE(0, sig)); 621263508Sdim return (0); 622263508Sdim} 623249423Sdim 624263508Sdim 625249423Sdim#ifdef COMPAT_43 626212904Sdim/* 627210006Srdivacky * The dirty work is handled by kern_wait(). 628210006Srdivacky */ 629210006Srdivackyint 630210006Srdivackyowait(struct thread *td, struct owait_args *uap __unused) 631210006Srdivacky{ 632210006Srdivacky int error, status; 633210006Srdivacky 634210006Srdivacky error = kern_wait(td, WAIT_ANY, &status, 0, NULL); 635212904Sdim if (error == 0) 636212904Sdim td->td_retval[1] = status; 637263508Sdim return (error); 638263508Sdim} 639263508Sdim#endif /* COMPAT_43 */ 640212904Sdim 641212904Sdim/* 642212904Sdim * The dirty work is handled by kern_wait(). 643212904Sdim */ 644218893Sdimint 645212904Sdimwait4(struct thread *td, struct wait_args *uap) 646218893Sdim{ 647212904Sdim struct rusage ru, *rup; 648212904Sdim int error, status; 649218893Sdim 650218893Sdim if (uap->rusage != NULL) 651221345Sdim rup = &ru; 652221345Sdim else 653221345Sdim rup = NULL; 654221345Sdim error = kern_wait(td, uap->pid, &status, uap->options, rup); 655212904Sdim if (uap->status != NULL && error == 0) 656212904Sdim error = copyout(&status, uap->status, sizeof(status)); 657212904Sdim if (uap->rusage != NULL && error == 0) 658212904Sdim error = copyout(&ru, uap->rusage, sizeof(struct rusage)); 659212904Sdim return (error); 660221345Sdim} 661221345Sdim 662212904Sdimint 663221345Sdimkern_wait(struct thread *td, pid_t pid, int *status, int options, 664221345Sdim struct rusage *rusage) 665221345Sdim{ 666221345Sdim struct proc *p, *q, *t; 667212904Sdim int error, nfound; 668212904Sdim 669212904Sdim AUDIT_ARG(pid, pid); 670221345Sdim 671221345Sdim q = td->td_proc; 672212904Sdim if (pid == 0) { 673212904Sdim PROC_LOCK(q); 674212904Sdim pid = -q->p_pgid; 675212904Sdim PROC_UNLOCK(q); 676212904Sdim } 677212904Sdim if (options &~ (WUNTRACED|WNOHANG|WCONTINUED|WLINUXCLONE)) 678212904Sdim return (EINVAL); 679212904Sdimloop: 680212904Sdim if (q->p_flag & P_STATCHILD) { 681218893Sdim PROC_LOCK(q); 682221345Sdim q->p_flag &= ~P_STATCHILD; 683218893Sdim PROC_UNLOCK(q); 684218893Sdim } 685218893Sdim nfound = 0; 686218893Sdim sx_xlock(&proctree_lock); 687218893Sdim LIST_FOREACH(p, &q->p_children, p_sibling) { 688218893Sdim PROC_LOCK(p); 689218893Sdim if (pid != WAIT_ANY && 690218893Sdim p->p_pid != pid && p->p_pgid != -pid) { 691212904Sdim PROC_UNLOCK(p); 692251662Sdim continue; 693251662Sdim } 694212904Sdim if (p_canwait(td, p)) { 695234353Sdim PROC_UNLOCK(p); 696234353Sdim continue; 697234353Sdim } 698234353Sdim 699212904Sdim /* 700212904Sdim * This special case handles a kthread spawned by linux_clone 701212904Sdim * (see linux_misc.c). The linux_wait4 and linux_waitpid 702210006Srdivacky * functions need to be able to distinguish between waiting 703210006Srdivacky * on a process and waiting on a thread. It is a thread if 704218893Sdim * p_sigparent is not SIGCHLD, and the WLINUXCLONE option 705210006Srdivacky * signifies we want to wait for threads and not processes. 706212904Sdim */ 707218893Sdim if ((p->p_sigparent != SIGCHLD) ^ 708212904Sdim ((options & WLINUXCLONE) != 0)) { 709263508Sdim PROC_UNLOCK(p); 710218893Sdim continue; 711218893Sdim } 712218893Sdim 713263508Sdim nfound++; 714263508Sdim if (p->p_state == PRS_ZOMBIE) { 715263508Sdim 716218893Sdim /* 717218893Sdim * It is possible that the last thread of this 718218893Sdim * process is still running on another CPU 719218893Sdim * in thread_exit() after having dropped the process 720218893Sdim * lock via PROC_UNLOCK() but before it has completed 721212904Sdim * cpu_throw(). In that case, the other thread must 722212904Sdim * still hold the proc slock, so simply by acquiring 723212904Sdim * proc slock once we will wait long enough for the 724212904Sdim * thread to exit in that case. 725212904Sdim * XXX This is questionable. 726212904Sdim */ 727212904Sdim PROC_SLOCK(p); 728212904Sdim PROC_SUNLOCK(p); 729218893Sdim 730212904Sdim td->td_retval[0] = p->p_pid; 731218893Sdim if (status) 732212904Sdim *status = p->p_xstat; /* convert to int */ 733212904Sdim if (rusage) { 734212904Sdim *rusage = *p->p_ru; 735212904Sdim calcru(p, &rusage->ru_utime, &rusage->ru_stime); 736218893Sdim } 737218893Sdim 738218893Sdim PROC_LOCK(q); 739218893Sdim sigqueue_take(p->p_ksi); 740218893Sdim PROC_UNLOCK(q); 741218893Sdim 742218893Sdim /* 743218893Sdim * If we got the child via a ptrace 'attach', 744212904Sdim * we need to give it back to the old parent. 745212904Sdim */ 746212904Sdim PROC_UNLOCK(p); 747212904Sdim if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) { 748218893Sdim PROC_LOCK(p); 749218893Sdim p->p_oppid = 0; 750218893Sdim proc_reparent(p, t); 751218893Sdim PROC_UNLOCK(p); 752212904Sdim tdsignal(t, NULL, SIGCHLD, p->p_ksi); 753218893Sdim wakeup(t); 754212904Sdim PROC_UNLOCK(t); 755218893Sdim sx_xunlock(&proctree_lock); 756212904Sdim return (0); 757212904Sdim } 758212904Sdim 759263508Sdim /* 760263508Sdim * Remove other references to this process to ensure 761263508Sdim * we have an exclusive reference. 762263508Sdim */ 763263508Sdim sx_xlock(&allproc_lock); 764263508Sdim LIST_REMOVE(p, p_list); /* off zombproc */ 765263508Sdim sx_xunlock(&allproc_lock); 766263508Sdim LIST_REMOVE(p, p_sibling); 767263508Sdim leavepgrp(p); 768263508Sdim sx_xunlock(&proctree_lock); 769263508Sdim 770263508Sdim /* 771263508Sdim * As a side effect of this lock, we know that 772263508Sdim * all other writes to this proc are visible now, so 773263508Sdim * no more locking is needed for p. 774263508Sdim */ 775263508Sdim PROC_LOCK(p); 776263508Sdim p->p_xstat = 0; /* XXX: why? */ 777263508Sdim PROC_UNLOCK(p); 778263508Sdim PROC_LOCK(q); 779263508Sdim ruadd(&q->p_stats->p_cru, &q->p_crux, p->p_ru, 780263508Sdim &p->p_rux); 781263508Sdim PROC_UNLOCK(q); 782263508Sdim FREE(p->p_ru, M_ZOMBIE); 783263508Sdim p->p_ru = NULL; 784212904Sdim 785212904Sdim /* 786212904Sdim * Decrement the count of procs running with this uid. 787212904Sdim */ 788212904Sdim (void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0); 789218893Sdim 790212904Sdim /* 791212904Sdim * Free credentials, arguments, and sigacts. 792212904Sdim */ 793212904Sdim crfree(p->p_ucred); 794218893Sdim p->p_ucred = NULL; 795212904Sdim pargs_drop(p->p_args); 796218893Sdim p->p_args = NULL; 797218893Sdim sigacts_free(p->p_sigacts); 798212904Sdim p->p_sigacts = NULL; 799218893Sdim 800218893Sdim /* 801218893Sdim * Do any thread-system specific cleanups. 802212904Sdim */ 803212904Sdim thread_wait(p); 804212904Sdim 805212904Sdim /* 806212904Sdim * Give vm and machine-dependent layer a chance 807212904Sdim * to free anything that cpu_exit couldn't 808234353Sdim * release while still running in process context. 809234353Sdim */ 810234353Sdim vm_waitproc(p); 811234353Sdim#ifdef MAC 812234353Sdim mac_destroy_proc(p); 813234353Sdim#endif 814234353Sdim KASSERT(FIRST_THREAD_IN_PROC(p), 815234353Sdim ("kern_wait: no residual thread!")); 816234353Sdim uma_zfree(proc_zone, p); 817212904Sdim sx_xlock(&allproc_lock); 818212904Sdim nprocs--; 819234353Sdim sx_xunlock(&allproc_lock); 820234353Sdim return (0); 821234353Sdim } 822234353Sdim PROC_SLOCK(p); 823234353Sdim if ((p->p_flag & P_STOPPED_SIG) && 824212904Sdim (p->p_suspcount == p->p_numthreads) && 825212904Sdim (p->p_flag & P_WAITED) == 0 && 826212904Sdim (p->p_flag & P_TRACED || options & WUNTRACED)) { 827212904Sdim PROC_SUNLOCK(p); 828218893Sdim p->p_flag |= P_WAITED; 829218893Sdim sx_xunlock(&proctree_lock); 830212904Sdim td->td_retval[0] = p->p_pid; 831212904Sdim if (status) 832212904Sdim *status = W_STOPCODE(p->p_xstat); 833212904Sdim 834218893Sdim PROC_LOCK(q); 835218893Sdim sigqueue_take(p->p_ksi); 836218893Sdim PROC_UNLOCK(q); 837212904Sdim PROC_UNLOCK(p); 838212904Sdim 839212904Sdim return (0); 840212904Sdim } 841212904Sdim PROC_SUNLOCK(p); 842212904Sdim if (options & WCONTINUED && (p->p_flag & P_CONTINUED)) { 843212904Sdim sx_xunlock(&proctree_lock); 844212904Sdim td->td_retval[0] = p->p_pid; 845212904Sdim p->p_flag &= ~P_CONTINUED; 846212904Sdim 847212904Sdim PROC_LOCK(q); 848212904Sdim sigqueue_take(p->p_ksi); 849212904Sdim PROC_UNLOCK(q); 850212904Sdim PROC_UNLOCK(p); 851212904Sdim 852212904Sdim if (status) 853212904Sdim *status = SIGCONT; 854212904Sdim return (0); 855212904Sdim } 856212904Sdim PROC_UNLOCK(p); 857212904Sdim } 858234353Sdim if (nfound == 0) { 859234353Sdim sx_xunlock(&proctree_lock); 860234353Sdim return (ECHILD); 861234353Sdim } 862218893Sdim if (options & WNOHANG) { 863234353Sdim sx_xunlock(&proctree_lock); 864212904Sdim td->td_retval[0] = 0; 865212904Sdim return (0); 866212904Sdim } 867218893Sdim PROC_LOCK(q); 868218893Sdim sx_xunlock(&proctree_lock); 869218893Sdim if (q->p_flag & P_STATCHILD) { 870218893Sdim q->p_flag &= ~P_STATCHILD; 871218893Sdim error = 0; 872212904Sdim } else 873212904Sdim error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "wait", 0); 874212904Sdim PROC_UNLOCK(q); 875212904Sdim if (error) 876234353Sdim return (error); 877212904Sdim goto loop; 878212904Sdim} 879212904Sdim 880212904Sdim/* 881212904Sdim * Make process 'parent' the new parent of process 'child'. 882212904Sdim * Must be called with an exclusive hold of proctree lock. 883234353Sdim */ 884234353Sdimvoid 885234353Sdimproc_reparent(struct proc *child, struct proc *parent) 886234353Sdim{ 887234353Sdim 888234353Sdim sx_assert(&proctree_lock, SX_XLOCKED); 889234353Sdim PROC_LOCK_ASSERT(child, MA_OWNED); 890234353Sdim if (child->p_pptr == parent) 891234353Sdim return; 892234353Sdim 893212904Sdim PROC_LOCK(child->p_pptr); 894212904Sdim sigqueue_take(child->p_ksi); 895212904Sdim PROC_UNLOCK(child->p_pptr); 896212904Sdim LIST_REMOVE(child, p_sibling); 897212904Sdim LIST_INSERT_HEAD(&parent->p_children, child, p_sibling); 898212904Sdim child->p_pptr = parent; 899212904Sdim} 900212904Sdim