kern_exit.c revision 155198
1139804Simp/*-
23332Sphk * Copyright (c) 1982, 1986, 1989, 1991, 1993
33332Sphk *	The Regents of the University of California.  All rights reserved.
493149Sphk * (c) UNIX System Laboratories, Inc.
53332Sphk * All or some portions of this file are derived from material licensed
63332Sphk * to the University of California by American Telephone and Telegraph
73332Sphk * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8139804Simp * the permission of UNIX System Laboratories, Inc.
9139804Simp *
10139804Simp * Redistribution and use in source and binary forms, with or without
113332Sphk * modification, are permitted provided that the following conditions
123784Sphk * are met:
133332Sphk * 1. Redistributions of source code must retain the above copyright
143332Sphk *    notice, this list of conditions and the following disclaimer.
153332Sphk * 2. Redistributions in binary form must reproduce the above copyright
163332Sphk *    notice, this list of conditions and the following disclaimer in the
173332Sphk *    documentation and/or other materials provided with the distribution.
183332Sphk * 4. Neither the name of the University nor the names of its contributors
193332Sphk *    may be used to endorse or promote products derived from this software
203332Sphk *    without specific prior written permission.
21249583Sgabor *
223332Sphk * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
233332Sphk * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24116182Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25116182Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26116182Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
273784Sphk * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
283332Sphk * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
293332Sphk * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
303332Sphk * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
313332Sphk * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3276166Smarkm * SUCH DAMAGE.
333507Scsgr *
3476166Smarkm *	@(#)kern_exit.c	8.7 (Berkeley) 2/12/94
3515494Sbde */
36220373Strasz
373507Scsgr#include <sys/cdefs.h>
383332Sphk__FBSDID("$FreeBSD: head/sys/kern/kern_exit.c 155198 2006-02-02 01:32:58Z rwatson $");
393507Scsgr
4015494Sbde#include "opt_compat.h"
413784Sphk#include "opt_ktrace.h"
423332Sphk#include "opt_mac.h"
433332Sphk
4412662Sdg#include <sys/param.h>
4512662Sdg#include <sys/systm.h>
4612662Sdg#include <sys/sysproto.h>
473332Sphk#include <sys/eventhandler.h>
4812662Sdg#include <sys/kernel.h>
493332Sphk#include <sys/malloc.h>
503784Sphk#include <sys/lock.h>
513784Sphk#include <sys/mutex.h>
523784Sphk#include <sys/proc.h>
533784Sphk#include <sys/pioctl.h>
5448079Shoek#include <sys/tty.h>
553784Sphk#include <sys/wait.h>
563784Sphk#include <sys/vmmeter.h>
573784Sphk#include <sys/vnode.h>
583784Sphk#include <sys/resourcevar.h>
593784Sphk#include <sys/sbuf.h>
603784Sphk#include <sys/signalvar.h>
613784Sphk#include <sys/sched.h>
623784Sphk#include <sys/sx.h>
633784Sphk#include <sys/syscallsubr.h>
6492723Salfred#include <sys/syslog.h>
6592723Salfred#include <sys/ptrace.h>
6692723Salfred#include <sys/acct.h>		/* for acct_process() function prototype */
6792723Salfred#include <sys/filedesc.h>
683784Sphk#include <sys/mac.h>
6912568Sbde#include <sys/shm.h>
7012130Sdg#include <sys/sem.h>
7112130Sdg#ifdef KTRACE
723332Sphk#include <sys/ktrace.h>
73231885Skib#endif
7417974Sbde
753784Sphk#include <security/audit/audit.h>
763784Sphk
7717386Sphk#include <vm/vm.h>
783332Sphk#include <vm/vm_extern.h>
793348Sphk#include <vm/vm_param.h>
803784Sphk#include <vm/pmap.h>
813784Sphk#include <vm/vm_map.h>
823784Sphk#include <vm/vm_page.h>
833784Sphk#include <vm/uma.h>
843784Sphk
853784Sphk/* Required to be non-static for SysVR4 emulator */
863784SphkMALLOC_DEFINE(M_ZOMBIE, "zombie", "zombie proc status");
873784Sphk
883332Sphk/* Hook for NFS teardown procedure. */
893784Sphkvoid (*nlminfo_release_p)(struct proc *p);
903784Sphk
913784Sphk/*
923348Sphk * exit --
933784Sphk *	Death of process.
943784Sphk *
953332Sphk * MPSAFE
963348Sphk */
973784Sphkvoid
983784Sphksys_exit(struct thread *td, struct sys_exit_args *uap)
993348Sphk{
1003784Sphk
1013784Sphk	exit1(td, W_EXITCODE(uap->rval, 0));
1023784Sphk	/* NOTREACHED */
1033784Sphk}
1043784Sphk
1053332Sphk/*
10612130Sdg * Exit: deallocate address space and other resources, change proc state
1073784Sphk * to zombie, and unlink proc from allproc and parent's lists.  Save exit
1083353Sphk * status and rusage for wait().  Check for child processes and orphan them.
1093784Sphk */
1103784Sphkvoid
1113784Sphkexit1(struct thread *td, int rv)
1123784Sphk{
1133348Sphk	struct bintime new_switchtime;
1143784Sphk	struct proc *p, *nq, *q;
1153784Sphk	struct tty *tp;
1163784Sphk	struct vnode *ttyvp;
1173784Sphk	struct vmspace *vm;
1183348Sphk	struct vnode *vtmp;
11917386Sphk#ifdef KTRACE
1203332Sphk	struct vnode *tracevp;
1213784Sphk	struct ucred *tracecred;
1223348Sphk#endif
12348079Shoek	struct plimit *plim;
12448079Shoek	int locked, refcnt;
12548079Shoek
12648079Shoek	/*
12748079Shoek	 * Drop Giant if caller has it.  Eventually we should warn about
12848079Shoek	 * being called with Giant held.
12948079Shoek	 */
13017386Sphk	while (mtx_owned(&Giant))
13117386Sphk		mtx_unlock(&Giant);
13217386Sphk
13317386Sphk	p = td->td_proc;
13417386Sphk	if (p == initproc) {
13517386Sphk		printf("init died (signal %d, exit %d)\n",
13617386Sphk		    WTERMSIG(rv), WEXITSTATUS(rv));
13717386Sphk		panic("Going nowhere without my init!");
13817386Sphk	}
139231885Skib
140231885Skib	/*
141231885Skib	 * MUST abort all other threads before proceeding past here.
1423784Sphk	 */
143231885Skib	PROC_LOCK(p);
144231885Skib	if (p->p_flag & P_HADTHREADS) {
1453348Sphkretry:
1463784Sphk		/*
1473784Sphk		 * First check if some other thread got here before us..
1483784Sphk		 * if so, act apropriatly, (exit or suspend);
1493784Sphk		 */
1503784Sphk		thread_suspend_check(0);
1513332Sphk
1523332Sphk		/*
1533784Sphk		 * Kill off the other threads. This requires
1543784Sphk		 * some co-operation from other parts of the kernel
1553332Sphk		 * so it may not be instantaneous.  With this state set
1563784Sphk		 * any thread entering the kernel from userspace will
15724848Sdyson		 * thread_exit() in trap().  Any thread attempting to
15814703Sbde		 * sleep will return immediately with EINTR or EWOULDBLOCK
1593332Sphk		 * which will hopefully force them to back out to userland
1603784Sphk		 * freeing resources as they go.  Any thread attempting
1613784Sphk		 * to return to userland will thread_exit() from userret().
1623784Sphk		 * thread_exit() will unsuspend us when the last of the
1633784Sphk		 * other threads exits.
164237694Simp		 * If there is already a thread singler after resumption,
1653332Sphk		 * calling thread_single will fail; in that case, we just
1663784Sphk		 * re-check all suspension request, the thread should
1673784Sphk		 * either be suspended there or exit.
16815538Sphk		 */
1693784Sphk		if (thread_single(SINGLE_EXIT))
1703784Sphk			goto retry;
1713784Sphk
1723784Sphk		/*
1733784Sphk		 * All other activity in this process is now stopped.
1743332Sphk		 * Threading support has been turned off.
17515538Sphk		 */
1763784Sphk	}
1773784Sphk
1783332Sphk	p->p_flag |= P_WEXIT;
1793784Sphk
180237694Simp	PROC_LOCK(p->p_pptr);
1813784Sphk	sigqueue_take(p->p_ksi);
1823784Sphk	PROC_UNLOCK(p->p_pptr);
18315538Sphk
1843784Sphk	PROC_UNLOCK(p);
1853784Sphk
1863784Sphk	/* Are we a task leader? */
1873784Sphk	if (p == p->p_leader) {
1883784Sphk		mtx_lock(&ppeers_lock);
1893784Sphk		q = p->p_peers;
1903332Sphk		while (q != NULL) {
1913332Sphk			PROC_LOCK(q);
19215538Sphk			psignal(q, SIGKILL);
1933332Sphk			PROC_UNLOCK(q);
1943784Sphk			q = q->p_peers;
1953784Sphk		}
1963784Sphk		while (p->p_peers != NULL)
1973784Sphk			msleep(p, &ppeers_lock, PWAIT, "exit1", 0);
1983784Sphk		mtx_unlock(&ppeers_lock);
1993784Sphk	}
2003332Sphk
2013332Sphk	PROC_LOCK(p);
20215538Sphk	_STOPEVENT(p, S_EXIT, rv);
2033784Sphk	wakeup(&p->p_stype);	/* Wakeup anyone in procfs' PIOCWAIT */
2043784Sphk	PROC_UNLOCK(p);
2053784Sphk
2063784Sphk	/*
2073784Sphk	 * Check if any loadable modules need anything done at process exit.
2083784Sphk	 * E.g. SYSV IPC stuff
209125454Sjhb	 * XXX what if one of these generates an error?
2103784Sphk	 */
21184783Sps	EVENTHANDLER_INVOKE(process_exit, p);
2123332Sphk
2133332Sphk	MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage),
2143784Sphk		M_ZOMBIE, M_WAITOK);
215220373Strasz	/*
216220373Strasz	 * If parent is waiting for us to exit or exec,
217220373Strasz	 * P_PPWAIT is set; we will wakeup the parent below.
218125454Sjhb	 */
2193784Sphk	PROC_LOCK(p);
2203784Sphk	stopprofclock(p);
2213784Sphk	p->p_flag &= ~(P_TRACED | P_PPWAIT);
222125454Sjhb
2233784Sphk	/*
2243784Sphk	 * Stop the real interval timer.  If the handler is currently
2253332Sphk	 * executing, prevent it from rearming itself and let it finish.
2263784Sphk	 */
227153698Salc	if (timevalisset(&p->p_realtimer.it_value) &&
228153698Salc	    callout_stop(&p->p_itcallout) == 0) {
229153698Salc		timevalclear(&p->p_realtimer.it_interval);
230153698Salc		msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0);
231153698Salc		KASSERT(!timevalisset(&p->p_realtimer.it_value),
232153698Salc		    ("realtime timer is still armed"));
233153698Salc	}
234175294Sattilio	sigqueue_flush(&p->p_sigqueue);
235153698Salc	sigqueue_flush(&td->td_sigqueue);
236153698Salc	PROC_UNLOCK(p);
2373784Sphk
2383784Sphk	/*
239173361Skib	 * Reset any sigio structures pointing to us as a result of
2403348Sphk	 * F_SETOWN with our pid.
241175202Sattilio	 */
242173361Skib	mtx_lock(&Giant);	/* XXX: not sure if needed */
243173361Skib	funsetownlst(&p->p_sigiolst);
244173361Skib	mtx_unlock(&Giant);
245173361Skib
246153698Salc	/*
24724848Sdyson	 * If this process has an nlminfo data area (for lockd), release it
24824848Sdyson	 */
2493784Sphk	if (nlminfo_release_p != NULL && p->p_nlminfo != NULL)
2503332Sphk		(*nlminfo_release_p)(p);
2513784Sphk
2523784Sphk	/*
25317386Sphk	 * Close open files and release open-file table.
25417386Sphk	 * This may block!
255144501Sjhb	 */
256144501Sjhb	fdfree(td);
2573784Sphk
2583332Sphk	/*
2593784Sphk	 * If this thread tickled GEOM, we need to wait for the giggling to
2603784Sphk	 * stop before we return to userland
2613784Sphk	 */
2623784Sphk	if (td->td_pflags & TDP_GEOM)
26317386Sphk		g_waitidle();
2646579Sdg
2656579Sdg	/*
26614087Sphk	 * Remove ourself from our leader's peer list and wake our leader.
26714087Sphk	 */
26814087Sphk	mtx_lock(&ppeers_lock);
2696579Sdg	if (p->p_leader->p_peers) {
27014087Sphk		q = p->p_leader;
27114087Sphk		while (q->p_peers != p)
27217386Sphk			q = q->p_peers;
27317386Sphk		q->p_peers = p->p_peers;
27417386Sphk		wakeup(p->p_leader);
27517386Sphk	}
27617386Sphk	mtx_unlock(&ppeers_lock);
27717386Sphk
2786579Sdg	/* The next two chunks should probably be moved to vmspace_exit. */
2796579Sdg	vm = p->p_vmspace;
2806579Sdg	/*
2816579Sdg	 * Release user portion of address space.
2823784Sphk	 * This releases references to vnodes,
2833784Sphk	 * which could cause I/O if the file has been unlinked.
2843784Sphk	 * Need to do this early enough that we can still sleep.
2853784Sphk	 * Can't free the entire vmspace as the kernel stack
28637656Sbde	 * may be mapped within that space also.
28737656Sbde	 *
28837656Sbde	 * Processes sharing the same vmspace may exit in one order, and
2893332Sphk	 * get cleaned up by vmspace_exit() in a different order.  The
2903784Sphk	 * last exiting process to reach this point releases as much of
2913784Sphk	 * the environment as it can, and the last process cleaned up
2923784Sphk	 * by vmspace_exit() (which decrements exitingcnt) cleans up the
2933332Sphk	 * remainder.
2943784Sphk	 */
2953332Sphk	atomic_add_int(&vm->vm_exitingcnt, 1);
2963784Sphk	do
2973784Sphk		refcnt = vm->vm_refcnt;
2983332Sphk	while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1));
2993784Sphk	if (refcnt == 1) {
3003784Sphk		shmexit(vm);
3013784Sphk		pmap_remove_pages(vmspace_pmap(vm), vm_map_min(&vm->vm_map),
3023784Sphk		    vm_map_max(&vm->vm_map));
3033784Sphk		(void) vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map),
3043332Sphk		    vm_map_max(&vm->vm_map));
3053784Sphk	}
3063784Sphk
3073784Sphk	sx_xlock(&proctree_lock);
3083784Sphk	if (SESS_LEADER(p)) {
3093784Sphk		struct session *sp;
3103784Sphk
3113784Sphk		sp = p->p_session;
312231885Skib		if (sp->s_ttyvp) {
313231885Skib			locked = VFS_LOCK_GIANT(sp->s_ttyvp->v_mount);
3146342Sphk			/*
3153332Sphk			 * Controlling process.
316231885Skib			 * Signal foreground pgrp,
3173784Sphk			 * drain controlling terminal
3183784Sphk			 * and revoke access to controlling terminal.
3193784Sphk			 */
3203784Sphk			if (sp->s_ttyp && (sp->s_ttyp->t_session == sp)) {
3213784Sphk				tp = sp->s_ttyp;
322144501Sjhb				if (sp->s_ttyp->t_pgrp) {
323144501Sjhb					PGRP_LOCK(sp->s_ttyp->t_pgrp);
3243784Sphk					pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
3253784Sphk					PGRP_UNLOCK(sp->s_ttyp->t_pgrp);
3263784Sphk				}
3273784Sphk				/* XXX tp should be locked. */
3283784Sphk				sx_xunlock(&proctree_lock);
3293784Sphk				(void) ttywait(tp);
3303784Sphk				sx_xlock(&proctree_lock);
3313784Sphk				/*
3323332Sphk				 * The tty could have been revoked
3333784Sphk				 * if we blocked.
3343784Sphk				 */
3353784Sphk				if (sp->s_ttyvp) {
3363784Sphk					ttyvp = sp->s_ttyvp;
3373784Sphk					SESS_LOCK(p->p_session);
3383784Sphk					sp->s_ttyvp = NULL;
3393348Sphk					SESS_UNLOCK(p->p_session);
340108533Sschweikh					sx_xunlock(&proctree_lock);
3413784Sphk					VOP_LOCK(ttyvp, LK_EXCLUSIVE, td);
3423784Sphk					VOP_REVOKE(ttyvp, REVOKEALL);
3433784Sphk					vput(ttyvp);
3443784Sphk					sx_xlock(&proctree_lock);
3453784Sphk				}
3463784Sphk			}
3473784Sphk			if (sp->s_ttyvp) {
3483784Sphk				ttyvp = sp->s_ttyvp;
34948079Shoek				SESS_LOCK(p->p_session);
3503784Sphk				sp->s_ttyvp = NULL;
3513784Sphk				SESS_UNLOCK(p->p_session);
3523785Sphk				vrele(ttyvp);
3533785Sphk			}
3543784Sphk			/*
3553784Sphk			 * s_ttyp is not zero'd; we use this to indicate
3563784Sphk			 * that the session once had a controlling terminal.
3573784Sphk			 * (for logging and informational purposes)
3583784Sphk			 */
3593784Sphk			VFS_UNLOCK_GIANT(locked);
3603784Sphk		}
36131718Sjdp		SESS_LOCK(p->p_session);
36237656Sbde		sp->s_leader = NULL;
36337015Sbde		SESS_UNLOCK(p->p_session);
3643784Sphk	}
3653784Sphk	fixjobc(p, p->p_pgrp, 0);
3663784Sphk	sx_xunlock(&proctree_lock);
3673784Sphk	(void)acct_process(td);
36837015Sbde#ifdef KTRACE
36937015Sbde	/*
3703784Sphk	 * Drain any pending records on the thread and release the trace
3713784Sphk	 * file.  It might be better if drain-and-clear were atomic.
3723784Sphk	 */
3733784Sphk	ktrprocexit(td);
3743784Sphk	PROC_LOCK(p);
3753784Sphk	mtx_lock(&ktrace_mtx);
3763784Sphk	p->p_traceflag = 0;	/* don't trace the vrele() */
37737656Sbde	tracevp = p->p_tracevp;
37837656Sbde	p->p_tracevp = NULL;
37937015Sbde	tracecred = p->p_tracecred;
3803784Sphk	p->p_tracecred = NULL;
3813784Sphk	mtx_unlock(&ktrace_mtx);
3823784Sphk	PROC_UNLOCK(p);
3833784Sphk	if (tracevp != NULL) {
3843784Sphk		locked = VFS_LOCK_GIANT(tracevp->v_mount);
3853784Sphk		vrele(tracevp);
3863332Sphk		VFS_UNLOCK_GIANT(locked);
3873332Sphk	}
3883784Sphk	if (tracecred != NULL)
3893332Sphk		crfree(tracecred);
3903332Sphk#endif
3913332Sphk	/*
39243402Sdillon	 * Release reference to text vnode
39340435Speter	 */
394	if ((vtmp = p->p_textvp) != NULL) {
395		p->p_textvp = NULL;
396		locked = VFS_LOCK_GIANT(vtmp->v_mount);
397		vrele(vtmp);
398		VFS_UNLOCK_GIANT(locked);
399	}
400
401	/*
402	 * Release our limits structure.
403	 */
404	PROC_LOCK(p);
405	plim = p->p_limit;
406	p->p_limit = NULL;
407	PROC_UNLOCK(p);
408	lim_free(plim);
409
410	/*
411	 * Remove proc from allproc queue and pidhash chain.
412	 * Place onto zombproc.  Unlink from parent's child list.
413	 */
414	sx_xlock(&allproc_lock);
415	LIST_REMOVE(p, p_list);
416	LIST_INSERT_HEAD(&zombproc, p, p_list);
417	LIST_REMOVE(p, p_hash);
418	sx_xunlock(&allproc_lock);
419
420	sx_xlock(&proctree_lock);
421	q = LIST_FIRST(&p->p_children);
422	if (q != NULL)		/* only need this if any child is S_ZOMB */
423		wakeup(initproc);
424	for (; q != NULL; q = nq) {
425		nq = LIST_NEXT(q, p_sibling);
426		PROC_LOCK(q);
427		proc_reparent(q, initproc);
428		q->p_sigparent = SIGCHLD;
429		/*
430		 * Traced processes are killed
431		 * since their existence means someone is screwing up.
432		 */
433		if (q->p_flag & P_TRACED) {
434			q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);
435			psignal(q, SIGKILL);
436		}
437		PROC_UNLOCK(q);
438	}
439
440	/*
441	 * Save exit status and finalize rusage info except for times,
442	 * adding in child rusage info later when our time is locked.
443	 */
444	PROC_LOCK(p);
445	p->p_xstat = rv;
446	p->p_xthread = td;
447	p->p_stats->p_ru.ru_nvcsw++;
448	*p->p_ru = p->p_stats->p_ru;
449
450	/*
451	 * Notify interested parties of our demise.
452	 */
453	KNOTE_LOCKED(&p->p_klist, NOTE_EXIT);
454
455	/*
456	 * Just delete all entries in the p_klist. At this point we won't
457	 * report any more events, and there are nasty race conditions that
458	 * can beat us if we don't.
459	 */
460	knlist_clear(&p->p_klist, 1);
461
462	/*
463	 * Notify parent that we're gone.  If parent has the PS_NOCLDWAIT
464	 * flag set, or if the handler is set to SIG_IGN, notify process
465	 * 1 instead (and hope it will handle this situation).
466	 */
467	PROC_LOCK(p->p_pptr);
468	mtx_lock(&p->p_pptr->p_sigacts->ps_mtx);
469	if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) {
470		struct proc *pp;
471
472		mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
473		pp = p->p_pptr;
474		PROC_UNLOCK(pp);
475		proc_reparent(p, initproc);
476		p->p_sigparent = SIGCHLD;
477		PROC_LOCK(p->p_pptr);
478		/*
479		 * If this was the last child of our parent, notify
480		 * parent, so in case he was wait(2)ing, he will
481		 * continue.
482		 */
483		if (LIST_EMPTY(&pp->p_children))
484			wakeup(pp);
485	} else
486		mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
487
488	if (p->p_pptr == initproc)
489		psignal(p->p_pptr, SIGCHLD);
490	else if (p->p_sigparent != 0) {
491		if (p->p_sigparent == SIGCHLD)
492			childproc_exited(p);
493		else	/* LINUX thread */
494			psignal(p->p_pptr, p->p_sigparent);
495	}
496	PROC_UNLOCK(p->p_pptr);
497
498	/*
499	 * If this is a kthread, then wakeup anyone waiting for it to exit.
500	 */
501	if (p->p_flag & P_KTHREAD)
502		wakeup(p);
503	PROC_UNLOCK(p);
504
505	/*
506	 * Finally, call machine-dependent code to release the remaining
507	 * resources including address space.
508	 * The address space is released by "vmspace_exitfree(p)" in
509	 * vm_waitproc().
510	 */
511	cpu_exit(td);
512
513	WITNESS_WARN(WARN_PANIC, &proctree_lock.sx_object,
514	    "process (pid %d) exiting", p->p_pid);
515
516	PROC_LOCK(p);
517	PROC_LOCK(p->p_pptr);
518	sx_xunlock(&proctree_lock);
519
520	/*
521	 * We have to wait until after acquiring all locks before
522	 * changing p_state.  We need to avoid all possible context
523	 * switches (including ones from blocking on a mutex) while
524	 * marked as a zombie.  We also have to set the zombie state
525	 * before we release the parent process' proc lock to avoid
526	 * a lost wakeup.  So, we first call wakeup, then we grab the
527	 * sched lock, update the state, and release the parent process'
528	 * proc lock.
529	 */
530	wakeup(p->p_pptr);
531	mtx_lock_spin(&sched_lock);
532	p->p_state = PRS_ZOMBIE;
533	PROC_UNLOCK(p->p_pptr);
534
535	ruadd(p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);
536
537	/* Do the same timestamp bookkeeping that mi_switch() would do. */
538	binuptime(&new_switchtime);
539	bintime_add(&p->p_rux.rux_runtime, &new_switchtime);
540	bintime_sub(&p->p_rux.rux_runtime, PCPU_PTR(switchtime));
541	PCPU_SET(switchtime, new_switchtime);
542	PCPU_SET(switchticks, ticks);
543	cnt.v_swtch++;
544
545	sched_exit(p->p_pptr, td);
546
547	/*
548	 * Hopefully no one will try to deliver a signal to the process this
549	 * late in the game.
550	 */
551	knlist_destroy(&p->p_klist);
552
553	/*
554	 * Make sure the scheduler takes this thread out of its tables etc.
555	 * This will also release this thread's reference to the ucred.
556	 * Other thread parts to release include pcb bits and such.
557	 */
558	thread_exit();
559}
560
561
562#ifndef _SYS_SYSPROTO_H_
563struct abort2_args {
564	char *why;
565	int nargs;
566	void **args;
567};
568#endif
569
570/*
571 * MPSAFE.
572 */
573int
574abort2(struct thread *td, struct abort2_args *uap)
575{
576	struct proc *p = td->td_proc;
577	struct sbuf *sb;
578	void *uargs[16];
579	int error, i, sig;
580
581	error = 0;	/* satisfy compiler */
582
583	/*
584	 * Do it right now so we can log either proper call of abort2(), or
585	 * note, that invalid argument was passed. 512 is big enough to
586	 * handle 16 arguments' descriptions with additional comments.
587	 */
588	sb = sbuf_new(NULL, NULL, 512, SBUF_FIXEDLEN);
589	sbuf_clear(sb);
590	sbuf_printf(sb, "%s(pid %d uid %d) aborted: ",
591	    p->p_comm, p->p_pid, td->td_ucred->cr_uid);
592	/*
593	 * Since we can't return from abort2(), send SIGKILL in cases, where
594	 * abort2() was called improperly
595	 */
596	sig = SIGKILL;
597	/* Prevent from DoSes from user-space. */
598	if (uap->nargs < 0 || uap->nargs > 16)
599		goto out;
600	if (uap->args == NULL)
601		goto out;
602	error = copyin(uap->args, uargs, uap->nargs * sizeof(void *));
603	if (error != 0)
604		goto out;
605	/*
606	 * Limit size of 'reason' string to 128. Will fit even when
607	 * maximal number of arguments was chosen to be logged.
608	 */
609	if (uap->why != NULL) {
610		error = sbuf_copyin(sb, uap->why, 128);
611		if (error < 0)
612			goto out;
613	} else {
614		sbuf_printf(sb, "(null)");
615	}
616	if (uap->nargs) {
617		sbuf_printf(sb, "(");
618		for (i = 0;i < uap->nargs; i++)
619			sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]);
620		sbuf_printf(sb, ")");
621	}
622	/*
623	 * Final stage: arguments were proper, string has been
624	 * successfully copied from userspace, and copying pointers
625	 * from user-space succeed.
626	 */
627	sig = SIGABRT;
628out:
629	if (sig == SIGKILL) {
630		sbuf_trim(sb);
631		sbuf_printf(sb, " (Reason text inaccessible)");
632	}
633	sbuf_cat(sb, "\n");
634	sbuf_finish(sb);
635	log(LOG_INFO, "%s", sbuf_data(sb));
636	sbuf_delete(sb);
637	exit1(td, W_EXITCODE(0, sig));
638	return (0);
639}
640
641
642#ifdef COMPAT_43
643/*
644 * The dirty work is handled by kern_wait().
645 *
646 * MPSAFE.
647 */
648int
649owait(struct thread *td, struct owait_args *uap __unused)
650{
651	int error, status;
652
653	error = kern_wait(td, WAIT_ANY, &status, 0, NULL);
654	if (error == 0)
655		td->td_retval[1] = status;
656	return (error);
657}
658#endif /* COMPAT_43 */
659
660/*
661 * The dirty work is handled by kern_wait().
662 *
663 * MPSAFE.
664 */
665int
666wait4(struct thread *td, struct wait_args *uap)
667{
668	struct rusage ru, *rup;
669	int error, status;
670
671	if (uap->rusage != NULL)
672		rup = &ru;
673	else
674		rup = NULL;
675	error = kern_wait(td, uap->pid, &status, uap->options, rup);
676	if (uap->status != NULL && error == 0)
677		error = copyout(&status, uap->status, sizeof(status));
678	if (uap->rusage != NULL && error == 0)
679		error = copyout(&ru, uap->rusage, sizeof(struct rusage));
680	return (error);
681}
682
683int
684kern_wait(struct thread *td, pid_t pid, int *status, int options,
685    struct rusage *rusage)
686{
687	struct proc *p, *q, *t;
688	int error, nfound;
689
690	q = td->td_proc;
691	if (pid == 0) {
692		PROC_LOCK(q);
693		pid = -q->p_pgid;
694		PROC_UNLOCK(q);
695	}
696	if (options &~ (WUNTRACED|WNOHANG|WCONTINUED|WLINUXCLONE))
697		return (EINVAL);
698loop:
699	if (q->p_flag & P_STATCHILD) {
700		PROC_LOCK(q);
701		q->p_flag &= ~P_STATCHILD;
702		PROC_UNLOCK(q);
703	}
704	nfound = 0;
705	sx_xlock(&proctree_lock);
706	LIST_FOREACH(p, &q->p_children, p_sibling) {
707		PROC_LOCK(p);
708		if (pid != WAIT_ANY &&
709		    p->p_pid != pid && p->p_pgid != -pid) {
710			PROC_UNLOCK(p);
711			continue;
712		}
713		if (p_canwait(td, p)) {
714			PROC_UNLOCK(p);
715			continue;
716		}
717
718		/*
719		 * This special case handles a kthread spawned by linux_clone
720		 * (see linux_misc.c).  The linux_wait4 and linux_waitpid
721		 * functions need to be able to distinguish between waiting
722		 * on a process and waiting on a thread.  It is a thread if
723		 * p_sigparent is not SIGCHLD, and the WLINUXCLONE option
724		 * signifies we want to wait for threads and not processes.
725		 */
726		if ((p->p_sigparent != SIGCHLD) ^
727		    ((options & WLINUXCLONE) != 0)) {
728			PROC_UNLOCK(p);
729			continue;
730		}
731
732		nfound++;
733		if (p->p_state == PRS_ZOMBIE) {
734
735			/*
736			 * It is possible that the last thread of this
737			 * process is still running on another CPU
738			 * in thread_exit() after having dropped the process
739			 * lock via PROC_UNLOCK() but before it has completed
740			 * cpu_throw().  In that case, the other thread must
741			 * still hold sched_lock, so simply by acquiring
742			 * sched_lock once we will wait long enough for the
743			 * thread to exit in that case.
744			 */
745			mtx_lock_spin(&sched_lock);
746			mtx_unlock_spin(&sched_lock);
747
748			td->td_retval[0] = p->p_pid;
749			if (status)
750				*status = p->p_xstat;	/* convert to int */
751			if (rusage) {
752				*rusage = *p->p_ru;
753				calcru(p, &rusage->ru_utime, &rusage->ru_stime);
754			}
755
756			PROC_LOCK(q);
757			sigqueue_take(p->p_ksi);
758			PROC_UNLOCK(q);
759
760			/*
761			 * If we got the child via a ptrace 'attach',
762			 * we need to give it back to the old parent.
763			 */
764			PROC_UNLOCK(p);
765			if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) {
766				PROC_LOCK(p);
767				p->p_oppid = 0;
768				proc_reparent(p, t);
769				PROC_UNLOCK(p);
770				tdsignal(t, NULL, SIGCHLD, p->p_ksi);
771				wakeup(t);
772				PROC_UNLOCK(t);
773				sx_xunlock(&proctree_lock);
774				return (0);
775			}
776
777			/*
778			 * Remove other references to this process to ensure
779			 * we have an exclusive reference.
780			 */
781			sx_xlock(&allproc_lock);
782			LIST_REMOVE(p, p_list);	/* off zombproc */
783			sx_xunlock(&allproc_lock);
784			LIST_REMOVE(p, p_sibling);
785			leavepgrp(p);
786			sx_xunlock(&proctree_lock);
787
788			/*
789			 * As a side effect of this lock, we know that
790			 * all other writes to this proc are visible now, so
791			 * no more locking is needed for p.
792			 */
793			PROC_LOCK(p);
794			p->p_xstat = 0;		/* XXX: why? */
795			PROC_UNLOCK(p);
796			PROC_LOCK(q);
797			ruadd(&q->p_stats->p_cru, &q->p_crux, p->p_ru,
798			    &p->p_rux);
799			PROC_UNLOCK(q);
800			FREE(p->p_ru, M_ZOMBIE);
801			p->p_ru = NULL;
802
803			/*
804			 * Decrement the count of procs running with this uid.
805			 */
806			(void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0);
807
808			/*
809			 * Free credentials, arguments, and sigacts.
810			 */
811			crfree(p->p_ucred);
812			p->p_ucred = NULL;
813			pargs_drop(p->p_args);
814			p->p_args = NULL;
815			sigacts_free(p->p_sigacts);
816			p->p_sigacts = NULL;
817
818			/*
819			 * Do any thread-system specific cleanups.
820			 */
821			thread_wait(p);
822
823			/*
824			 * Give vm and machine-dependent layer a chance
825			 * to free anything that cpu_exit couldn't
826			 * release while still running in process context.
827			 */
828			vm_waitproc(p);
829#ifdef MAC
830			mac_destroy_proc(p);
831#endif
832#ifdef AUDIT
833			audit_proc_free(p);
834#endif
835			KASSERT(FIRST_THREAD_IN_PROC(p),
836			    ("kern_wait: no residual thread!"));
837			uma_zfree(proc_zone, p);
838			sx_xlock(&allproc_lock);
839			nprocs--;
840			sx_xunlock(&allproc_lock);
841			return (0);
842		}
843		mtx_lock_spin(&sched_lock);
844		if ((p->p_flag & P_STOPPED_SIG) &&
845		    (p->p_suspcount == p->p_numthreads) &&
846		    (p->p_flag & P_WAITED) == 0 &&
847		    (p->p_flag & P_TRACED || options & WUNTRACED)) {
848			mtx_unlock_spin(&sched_lock);
849			p->p_flag |= P_WAITED;
850			sx_xunlock(&proctree_lock);
851			td->td_retval[0] = p->p_pid;
852			if (status)
853				*status = W_STOPCODE(p->p_xstat);
854			PROC_UNLOCK(p);
855
856			PROC_LOCK(q);
857			sigqueue_take(p->p_ksi);
858			PROC_UNLOCK(q);
859
860			return (0);
861		}
862		mtx_unlock_spin(&sched_lock);
863		if (options & WCONTINUED && (p->p_flag & P_CONTINUED)) {
864			sx_xunlock(&proctree_lock);
865			td->td_retval[0] = p->p_pid;
866			p->p_flag &= ~P_CONTINUED;
867			PROC_UNLOCK(p);
868
869			PROC_LOCK(q);
870			sigqueue_take(p->p_ksi);
871			PROC_UNLOCK(q);
872
873			if (status)
874				*status = SIGCONT;
875			return (0);
876		}
877		PROC_UNLOCK(p);
878	}
879	if (nfound == 0) {
880		sx_xunlock(&proctree_lock);
881		return (ECHILD);
882	}
883	if (options & WNOHANG) {
884		sx_xunlock(&proctree_lock);
885		td->td_retval[0] = 0;
886		return (0);
887	}
888	PROC_LOCK(q);
889	sx_xunlock(&proctree_lock);
890	if (q->p_flag & P_STATCHILD) {
891		q->p_flag &= ~P_STATCHILD;
892		error = 0;
893	} else
894		error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "wait", 0);
895	PROC_UNLOCK(q);
896	if (error)
897		return (error);
898	goto loop;
899}
900
901/*
902 * Make process 'parent' the new parent of process 'child'.
903 * Must be called with an exclusive hold of proctree lock.
904 */
905void
906proc_reparent(struct proc *child, struct proc *parent)
907{
908
909	sx_assert(&proctree_lock, SX_XLOCKED);
910	PROC_LOCK_ASSERT(child, MA_OWNED);
911	if (child->p_pptr == parent)
912		return;
913
914	LIST_REMOVE(child, p_sibling);
915	LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
916	child->p_pptr = parent;
917}
918