kern_exit.c revision 126325
1199482Srdivacky/*
2199482Srdivacky * Copyright (c) 1982, 1986, 1989, 1991, 1993
3199482Srdivacky *	The Regents of the University of California.  All rights reserved.
4199482Srdivacky * (c) UNIX System Laboratories, Inc.
5199482Srdivacky * All or some portions of this file are derived from material licensed
6199482Srdivacky * to the University of California by American Telephone and Telegraph
7199482Srdivacky * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8199482Srdivacky * the permission of UNIX System Laboratories, Inc.
9199482Srdivacky *
10199482Srdivacky * Redistribution and use in source and binary forms, with or without
11199482Srdivacky * modification, are permitted provided that the following conditions
12199482Srdivacky * are met:
13234353Sdim * 1. Redistributions of source code must retain the above copyright
14199482Srdivacky *    notice, this list of conditions and the following disclaimer.
15199482Srdivacky * 2. Redistributions in binary form must reproduce the above copyright
16199482Srdivacky *    notice, this list of conditions and the following disclaimer in the
17199482Srdivacky *    documentation and/or other materials provided with the distribution.
18202379Srdivacky * 3. All advertising materials mentioning features or use of this software
19226633Sdim *    must display the following acknowledgement:
20202379Srdivacky *	This product includes software developed by the University of
21226633Sdim *	California, Berkeley and its contributors.
22199482Srdivacky * 4. Neither the name of the University nor the names of its contributors
23221345Sdim *    may be used to endorse or promote products derived from this software
24234353Sdim *    without specific prior written permission.
25199482Srdivacky *
26249423Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27226633Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28249423Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29249423Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30249423Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31249423Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32249423Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33212904Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34249423Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35249423Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36249423Sdim * SUCH DAMAGE.
37218893Sdim *
38218893Sdim *	@(#)kern_exit.c	8.7 (Berkeley) 2/12/94
39234353Sdim */
40249423Sdim
41218893Sdim#include <sys/cdefs.h>
42218893Sdim__FBSDID("$FreeBSD: head/sys/kern/kern_exit.c 126325 2004-02-27 18:39:09Z jhb $");
43218893Sdim
44249423Sdim#include "opt_compat.h"
45249423Sdim#include "opt_ktrace.h"
46218893Sdim#include "opt_mac.h"
47249423Sdim
48249423Sdim#include <sys/param.h>
49226633Sdim#include <sys/systm.h>
50199482Srdivacky#include <sys/sysproto.h>
51199482Srdivacky#include <sys/eventhandler.h>
52203955Srdivacky#include <sys/kernel.h>
53249423Sdim#include <sys/malloc.h>
54249423Sdim#include <sys/lock.h>
55203955Srdivacky#include <sys/mutex.h>
56199482Srdivacky#include <sys/proc.h>
57199482Srdivacky#include <sys/pioctl.h>
58243830Sdim#include <sys/tty.h>
59199482Srdivacky#include <sys/wait.h>
60199482Srdivacky#include <sys/vmmeter.h>
61203955Srdivacky#include <sys/vnode.h>
62221345Sdim#include <sys/resourcevar.h>
63203955Srdivacky#include <sys/signalvar.h>
64203955Srdivacky#include <sys/sched.h>
65249423Sdim#include <sys/sx.h>
66249423Sdim#include <sys/ptrace.h>
67249423Sdim#include <sys/acct.h>		/* for acct_process() function prototype */
68249423Sdim#include <sys/filedesc.h>
69249423Sdim#include <sys/mac.h>
70249423Sdim#include <sys/shm.h>
71249423Sdim#include <sys/sem.h>
72226633Sdim#ifdef KTRACE
73206275Srdivacky#include <sys/ktrace.h>
74199482Srdivacky#endif
75199482Srdivacky
76199482Srdivacky#include <vm/vm.h>
77221345Sdim#include <vm/vm_extern.h>
78199482Srdivacky#include <vm/vm_param.h>
79199482Srdivacky#include <vm/pmap.h>
80199482Srdivacky#include <vm/vm_map.h>
81221345Sdim#include <vm/vm_page.h>
82199482Srdivacky#include <vm/uma.h>
83199482Srdivacky#include <sys/user.h>
84226633Sdim
85221345Sdim/* Required to be non-static for SysVR4 emulator */
86199482SrdivackyMALLOC_DEFINE(M_ZOMBIE, "zombie", "zombie proc status");
87199482Srdivacky
88221345Sdimstatic int wait1(struct thread *, struct wait_args *, int);
89199482Srdivacky
90221345Sdim/*
91199482Srdivacky * exit --
92212904Sdim *	Death of process.
93212904Sdim *
94212904Sdim * MPSAFE
95212904Sdim */
96199482Srdivackyvoid
97199482Srdivackysys_exit(struct thread *td, struct sys_exit_args *uap)
98199482Srdivacky{
99199482Srdivacky
100199482Srdivacky	mtx_lock(&Giant);
101199482Srdivacky	exit1(td, W_EXITCODE(uap->rval, 0));
102199482Srdivacky	/* NOTREACHED */
103199482Srdivacky}
104199482Srdivacky
105243830Sdim/*
106221345Sdim * Exit: deallocate address space and other resources, change proc state
107226633Sdim * to zombie, and unlink proc from allproc and parent's lists.  Save exit
108221345Sdim * status and rusage for wait().  Check for child processes and orphan them.
109221345Sdim */
110226633Sdimvoid
111243830Sdimexit1(struct thread *td, int rv)
112221345Sdim{
113221345Sdim	struct proc *p, *nq, *q;
114243830Sdim	struct tty *tp;
115221345Sdim	struct vnode *ttyvp;
116221345Sdim	struct vmspace *vm;
117221345Sdim	struct vnode *vtmp;
118249423Sdim#ifdef KTRACE
119221345Sdim	struct vnode *tracevp;
120221345Sdim	struct ucred *tracecred;
121221345Sdim#endif
122221345Sdim	struct plimit *plim;
123221345Sdim
124221345Sdim	GIANT_REQUIRED;
125221345Sdim
126221345Sdim	p = td->td_proc;
127221345Sdim	if (p == initproc) {
128221345Sdim		printf("init died (signal %d, exit %d)\n",
129221345Sdim		    WTERMSIG(rv), WEXITSTATUS(rv));
130221345Sdim		panic("Going nowhere without my init!");
131221345Sdim	}
132226633Sdim
133221345Sdim	/*
134221345Sdim	 * MUST abort all other threads before proceeding past here.
135243830Sdim	 */
136234353Sdim	PROC_LOCK(p);
137234353Sdim	if (p->p_flag & P_SA || p->p_numthreads > 1) {
138234353Sdim		/*
139234353Sdim		 * First check if some other thread got here before us..
140234353Sdim		 * if so, act apropriatly, (exit or suspend);
141234353Sdim		 */
142234353Sdim		thread_suspend_check(0);
143234353Sdim
144234353Sdim		/*
145234353Sdim		 * Kill off the other threads. This requires
146234353Sdim		 * Some co-operation from other parts of the kernel
147234353Sdim		 * so it may not be instant.
148234353Sdim		 * With this state set:
149234353Sdim		 * Any thread entering the kernel from userspace will
150234353Sdim		 * thread_exit() in trap().  Any thread attempting to
151234353Sdim		 * sleep will return immediatly
152234353Sdim		 * with EINTR or EWOULDBLOCK, which will hopefully force them
153234353Sdim		 * to back out to userland, freeing resources as they go, and
154234353Sdim		 * anything attempting to return to userland will thread_exit()
155234353Sdim		 * from userret().  thread_exit() will unsuspend us
156234353Sdim		 * when the last other thread exits.
157249423Sdim		 */
158251662Sdim		if (thread_single(SINGLE_EXIT)) {
159249423Sdim			panic ("Exit: Single threading fouled up");
160251662Sdim		}
161199482Srdivacky		/*
162199482Srdivacky		 * All other activity in this process is now stopped.
163234353Sdim		 * Remove excess KSEs and KSEGRPS. XXXKSE (when we have them)
164243830Sdim		 * ...
165226633Sdim		 * Turn off threading support.
166226633Sdim		 */
167221345Sdim		p->p_flag &= ~P_SA;
168234353Sdim		thread_single_end();	/* Don't need this any more. */
169234353Sdim	}
170243830Sdim	/*
171199482Srdivacky	 * With this state set:
172199482Srdivacky	 * Any thread entering the kernel from userspace will thread_exit()
173199482Srdivacky	 * in trap().  Any thread attempting to sleep will return immediatly
174226633Sdim	 * with EINTR or EWOULDBLOCK, which will hopefully force them
175251662Sdim	 * to back out to userland, freeing resources as they go, and
176226633Sdim	 * anything attempting to return to userland will thread_exit()
177212904Sdim	 * from userret().  thread_exit() will do a wakeup on p->p_numthreads
178199482Srdivacky	 * if it transitions to 1.
179199482Srdivacky	 */
180243830Sdim
181226633Sdim	p->p_flag |= P_WEXIT;
182199482Srdivacky	PROC_UNLOCK(p);
183221345Sdim
184243830Sdim	/* Are we a task leader? */
185221345Sdim	if (p == p->p_leader) {
186226633Sdim		mtx_lock(&ppeers_lock);
187243830Sdim		q = p->p_peers;
188234353Sdim		while (q != NULL) {
189243830Sdim			PROC_LOCK(q);
190234353Sdim			psignal(q, SIGKILL);
191199482Srdivacky			PROC_UNLOCK(q);
192243830Sdim			q = q->p_peers;
193199482Srdivacky		}
194206275Srdivacky		while (p->p_peers != NULL)
195199482Srdivacky			msleep(p, &ppeers_lock, PWAIT, "exit1", 0);
196199482Srdivacky		mtx_unlock(&ppeers_lock);
197199482Srdivacky	}
198199482Srdivacky
199199482Srdivacky#ifdef PGINPROF
200221345Sdim	vmsizmon();
201199482Srdivacky#endif
202199482Srdivacky	STOPEVENT(p, S_EXIT, rv);
203199482Srdivacky	wakeup(&p->p_stype);	/* Wakeup anyone in procfs' PIOCWAIT */
204199482Srdivacky
205218893Sdim	/*
206221345Sdim	 * Check if any loadable modules need anything done at process exit.
207199482Srdivacky	 * e.g. SYSV IPC stuff
208199482Srdivacky	 * XXX what if one of these generates an error?
209199482Srdivacky	 */
210199482Srdivacky	EVENTHANDLER_INVOKE(process_exit, p);
211199482Srdivacky
212226633Sdim	MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage),
213199482Srdivacky		M_ZOMBIE, M_WAITOK);
214199482Srdivacky	/*
215199482Srdivacky	 * If parent is waiting for us to exit or exec,
216199482Srdivacky	 * P_PPWAIT is set; we will wakeup the parent below.
217226633Sdim	 */
218199482Srdivacky	PROC_LOCK(p);
219199482Srdivacky	stopprofclock(p);
220243830Sdim	p->p_flag &= ~(P_TRACED | P_PPWAIT);
221243830Sdim	SIGEMPTYSET(p->p_siglist);
222234353Sdim	SIGEMPTYSET(td->td_siglist);
223234353Sdim
224234353Sdim	/*
225243830Sdim	 * Stop the real interval timer.  If the handler is currently
226243830Sdim	 * executing, prevent it from rearming itself and let it finish.
227226633Sdim	 */
228226633Sdim	if (timevalisset(&p->p_realtimer.it_value) &&
229199482Srdivacky	    callout_stop(&p->p_itcallout) == 0) {
230199482Srdivacky		timevalclear(&p->p_realtimer.it_interval);
231199482Srdivacky		msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0);
232199482Srdivacky		KASSERT(!timevalisset(&p->p_realtimer.it_value),
233199482Srdivacky		    ("realtime timer is still armed"));
234226633Sdim	}
235199482Srdivacky	PROC_UNLOCK(p);
236199482Srdivacky
237199482Srdivacky	/*
238205408Srdivacky	 * Reset any sigio structures pointing to us as a result of
239249423Sdim	 * F_SETOWN with our pid.
240199482Srdivacky	 */
241226633Sdim	funsetownlst(&p->p_sigiolst);
242226633Sdim
243249423Sdim	/*
244249423Sdim	 * Close open files and release open-file table.
245226633Sdim	 * This may block!
246226633Sdim	 */
247234353Sdim	fdfree(td);
248226633Sdim
249226633Sdim	/*
250226633Sdim	 * Remove ourself from our leader's peer list and wake our leader.
251226633Sdim	 */
252234353Sdim	mtx_lock(&ppeers_lock);
253226633Sdim	if (p->p_leader->p_peers) {
254199482Srdivacky		q = p->p_leader;
255226633Sdim		while (q->p_peers != p)
256199482Srdivacky			q = q->p_peers;
257199482Srdivacky		q->p_peers = p->p_peers;
258234353Sdim		wakeup(p->p_leader);
259234353Sdim	}
260234353Sdim	mtx_unlock(&ppeers_lock);
261199482Srdivacky
262234353Sdim	/* The next two chunks should probably be moved to vmspace_exit. */
263218893Sdim	vm = p->p_vmspace;
264218893Sdim	/*
265218893Sdim	 * Release user portion of address space.
266218893Sdim	 * This releases references to vnodes,
267226633Sdim	 * which could cause I/O if the file has been unlinked.
268218893Sdim	 * Need to do this early enough that we can still sleep.
269218893Sdim	 * Can't free the entire vmspace as the kernel stack
270221345Sdim	 * may be mapped within that space also.
271221345Sdim	 *
272218893Sdim	 * Processes sharing the same vmspace may exit in one order, and
273199482Srdivacky	 * get cleaned up by vmspace_exit() in a different order.  The
274199482Srdivacky	 * last exiting process to reach this point releases as much of
275199482Srdivacky	 * the environment as it can, and the last process cleaned up
276199482Srdivacky	 * by vmspace_exit() (which decrements exitingcnt) cleans up the
277199482Srdivacky	 * remainder.
278199482Srdivacky	 */
279221345Sdim	++vm->vm_exitingcnt;
280226633Sdim	if (--vm->vm_refcnt == 0) {
281221345Sdim		shmexit(vm);
282221345Sdim		vm_page_lock_queues();
283199482Srdivacky		pmap_remove_pages(vmspace_pmap(vm), vm_map_min(&vm->vm_map),
284199482Srdivacky		    vm_map_max(&vm->vm_map));
285199482Srdivacky		vm_page_unlock_queues();
286199482Srdivacky		(void) vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map),
287226633Sdim		    vm_map_max(&vm->vm_map));
288212904Sdim	}
289234353Sdim
290212904Sdim	sx_xlock(&proctree_lock);
291234353Sdim	if (SESS_LEADER(p)) {
292218893Sdim		struct session *sp;
293199482Srdivacky
294226633Sdim		sp = p->p_session;
295234353Sdim		if (sp->s_ttyvp) {
296212904Sdim			/*
297218893Sdim			 * Controlling process.
298249423Sdim			 * Signal foreground pgrp,
299249423Sdim			 * drain controlling terminal
300226633Sdim			 * and revoke access to controlling terminal.
301199482Srdivacky			 */
302199482Srdivacky			if (sp->s_ttyp && (sp->s_ttyp->t_session == sp)) {
303199482Srdivacky				tp = sp->s_ttyp;
304199482Srdivacky				if (sp->s_ttyp->t_pgrp) {
305226633Sdim					PGRP_LOCK(sp->s_ttyp->t_pgrp);
306199482Srdivacky					pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
307212904Sdim					PGRP_UNLOCK(sp->s_ttyp->t_pgrp);
308234353Sdim				}
309199482Srdivacky				/* XXX tp should be locked. */
310212904Sdim				sx_xunlock(&proctree_lock);
311218893Sdim				(void) ttywait(tp);
312249423Sdim				sx_xlock(&proctree_lock);
313249423Sdim				/*
314234353Sdim				 * The tty could have been revoked
315226633Sdim				 * if we blocked.
316226633Sdim				 */
317243830Sdim				if (sp->s_ttyvp) {
318249423Sdim					ttyvp = sp->s_ttyvp;
319249423Sdim					SESS_LOCK(p->p_session);
320199482Srdivacky					sp->s_ttyvp = NULL;
321212904Sdim					SESS_UNLOCK(p->p_session);
322212904Sdim					sx_xunlock(&proctree_lock);
323218893Sdim					VOP_REVOKE(ttyvp, REVOKEALL);
324226633Sdim					vrele(ttyvp);
325243830Sdim					sx_xlock(&proctree_lock);
326249423Sdim				}
327243830Sdim			}
328212904Sdim			if (sp->s_ttyvp) {
329199482Srdivacky				ttyvp = sp->s_ttyvp;
330199482Srdivacky				SESS_LOCK(p->p_session);
331199482Srdivacky				sp->s_ttyvp = NULL;
332199482Srdivacky				SESS_UNLOCK(p->p_session);
333199482Srdivacky				vrele(ttyvp);
334212904Sdim			}
335199482Srdivacky			/*
336199482Srdivacky			 * s_ttyp is not zero'd; we use this to indicate
337199482Srdivacky			 * that the session once had a controlling terminal.
338249423Sdim			 * (for logging and informational purposes)
339243830Sdim			 */
340243830Sdim		}
341243830Sdim		SESS_LOCK(p->p_session);
342243830Sdim		sp->s_leader = NULL;
343199482Srdivacky		SESS_UNLOCK(p->p_session);
344199482Srdivacky	}
345199482Srdivacky	fixjobc(p, p->p_pgrp, 0);
346199482Srdivacky	sx_xunlock(&proctree_lock);
347199482Srdivacky	(void)acct_process(td);
348199482Srdivacky#ifdef KTRACE
349199482Srdivacky	/*
350199482Srdivacky	 * release trace file
351199482Srdivacky	 */
352226633Sdim	PROC_LOCK(p);
353212904Sdim	mtx_lock(&ktrace_mtx);
354212904Sdim	p->p_traceflag = 0;	/* don't trace the vrele() */
355212904Sdim	tracevp = p->p_tracevp;
356212904Sdim	p->p_tracevp = NULL;
357212904Sdim	tracecred = p->p_tracecred;
358212904Sdim	p->p_tracecred = NULL;
359212904Sdim	mtx_unlock(&ktrace_mtx);
360212904Sdim	PROC_UNLOCK(p);
361212904Sdim	if (tracevp != NULL)
362212904Sdim		vrele(tracevp);
363212904Sdim	if (tracecred != NULL)
364212904Sdim		crfree(tracecred);
365212904Sdim#endif
366212904Sdim	/*
367212904Sdim	 * Release reference to text vnode
368212904Sdim	 */
369212904Sdim	if ((vtmp = p->p_textvp) != NULL) {
370199482Srdivacky		p->p_textvp = NULL;
371199482Srdivacky		vrele(vtmp);
372212904Sdim	}
373234353Sdim
374212904Sdim	/*
375212904Sdim	 * Release our limits structure.
376239462Sdim	 */
377212904Sdim	PROC_LOCK(p);
378212904Sdim	plim = p->p_limit;
379212904Sdim	p->p_limit = NULL;
380212904Sdim	PROC_UNLOCK(p);
381212904Sdim	lim_free(plim);
382234353Sdim
383205219Srdivacky	/*
384212904Sdim	 * Release this thread's reference to the ucred.  The actual proc
385199990Srdivacky	 * reference will stay around until the proc is harvested by
386199990Srdivacky	 * wait().  At this point the ucred is immutable (no other threads
387199990Srdivacky	 * from this proc are around that can change it) so we leave the
388199990Srdivacky	 * per-thread ucred pointer intact in case it is needed although
389234353Sdim	 * in theory nothing should be using it at this point.
390199990Srdivacky	 */
391199482Srdivacky	crfree(td->td_ucred);
392199482Srdivacky
393199990Srdivacky	/*
394199990Srdivacky	 * Remove proc from allproc queue and pidhash chain.
395199990Srdivacky	 * Place onto zombproc.  Unlink from parent's child list.
396199990Srdivacky	 */
397199482Srdivacky	sx_xlock(&allproc_lock);
398199482Srdivacky	LIST_REMOVE(p, p_list);
399199482Srdivacky	LIST_INSERT_HEAD(&zombproc, p, p_list);
400199482Srdivacky	LIST_REMOVE(p, p_hash);
401199482Srdivacky	sx_xunlock(&allproc_lock);
402239462Sdim
403226633Sdim	sx_xlock(&proctree_lock);
404212904Sdim	q = LIST_FIRST(&p->p_children);
405199482Srdivacky	if (q != NULL)		/* only need this if any child is S_ZOMB */
406199482Srdivacky		wakeup(initproc);
407199482Srdivacky	for (; q != NULL; q = nq) {
408239462Sdim		nq = LIST_NEXT(q, p_sibling);
409199482Srdivacky		PROC_LOCK(q);
410199482Srdivacky		proc_reparent(q, initproc);
411226633Sdim		q->p_sigparent = SIGCHLD;
412212904Sdim		/*
413212904Sdim		 * Traced processes are killed
414226633Sdim		 * since their existence means someone is screwing up.
415212904Sdim		 */
416212904Sdim		if (q->p_flag & P_TRACED) {
417199482Srdivacky			q->p_flag &= ~P_TRACED;
418199482Srdivacky			psignal(q, SIGKILL);
419218893Sdim		}
420218893Sdim		PROC_UNLOCK(q);
421218893Sdim	}
422199482Srdivacky
423199482Srdivacky	/*
424204962Srdivacky	 * Save exit status and final rusage info, adding in child rusage
425218893Sdim	 * info and self times.
426199482Srdivacky	 */
427218893Sdim	PROC_LOCK(p);
428218893Sdim	p->p_xstat = rv;
429221345Sdim	*p->p_ru = p->p_stats->p_ru;
430221345Sdim	mtx_lock_spin(&sched_lock);
431221345Sdim	calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL);
432221345Sdim	mtx_unlock_spin(&sched_lock);
433234353Sdim	ruadd(p->p_ru, &p->p_stats->p_cru);
434221345Sdim
435218893Sdim	/*
436218893Sdim	 * Notify interested parties of our demise.
437221345Sdim	 */
438221345Sdim	KNOTE(&p->p_klist, NOTE_EXIT);
439221345Sdim	/*
440239462Sdim	 * Just delete all entries in the p_klist. At this point we won't
441221345Sdim	 * report any more events, and there are nasty race conditions that
442221345Sdim	 * can beat us if we don't.
443221345Sdim	 */
444221345Sdim	while (SLIST_FIRST(&p->p_klist))
445218893Sdim		SLIST_REMOVE_HEAD(&p->p_klist, kn_selnext);
446218893Sdim
447218893Sdim	/*
448218893Sdim	 * Notify parent that we're gone.  If parent has the PS_NOCLDWAIT
449226633Sdim	 * flag set, or if the handler is set to SIG_IGN, notify process
450199482Srdivacky	 * 1 instead (and hope it will handle this situation).
451199482Srdivacky	 */
452199482Srdivacky	PROC_LOCK(p->p_pptr);
453199482Srdivacky	mtx_lock(&p->p_pptr->p_sigacts->ps_mtx);
454199482Srdivacky	if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) {
455199482Srdivacky		struct proc *pp;
456226633Sdim
457226633Sdim		mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
458199482Srdivacky		pp = p->p_pptr;
459234353Sdim		PROC_UNLOCK(pp);
460234353Sdim		proc_reparent(p, initproc);
461199482Srdivacky		p->p_sigparent = SIGCHLD;
462199482Srdivacky		PROC_LOCK(p->p_pptr);
463199482Srdivacky		/*
464226633Sdim		 * If this was the last child of our parent, notify
465218893Sdim		 * parent, so in case he was wait(2)ing, he will
466226633Sdim		 * continue.
467226633Sdim		 */
468234353Sdim		if (LIST_EMPTY(&pp->p_children))
469234353Sdim			wakeup(pp);
470218893Sdim	} else
471199482Srdivacky		mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
472218893Sdim
473199482Srdivacky	if (p->p_pptr == initproc)
474226633Sdim		psignal(p->p_pptr, SIGCHLD);
475234353Sdim	else if (p->p_sigparent != 0)
476218893Sdim		psignal(p->p_pptr, p->p_sigparent);
477218893Sdim	PROC_UNLOCK(p->p_pptr);
478199482Srdivacky
479200583Srdivacky	/*
480200583Srdivacky	 * If this is a kthread, then wakeup anyone waiting for it to exit.
481200583Srdivacky	 */
482199482Srdivacky	if (p->p_flag & P_KTHREAD)
483199482Srdivacky		wakeup(p);
484199482Srdivacky	PROC_UNLOCK(p);
485199482Srdivacky
486218893Sdim	/*
487218893Sdim	 * Finally, call machine-dependent code to release the remaining
488199482Srdivacky	 * resources including address space.
489199482Srdivacky	 * The address space is released by "vmspace_exitfree(p)" in
490199482Srdivacky	 * vm_waitproc().
491199482Srdivacky	 */
492199482Srdivacky	cpu_exit(td);
493226633Sdim
494199482Srdivacky	PROC_LOCK(p);
495199482Srdivacky	PROC_LOCK(p->p_pptr);
496218893Sdim	sx_xunlock(&proctree_lock);
497226633Sdim
498226633Sdim	while (mtx_owned(&Giant))
499226633Sdim		mtx_unlock(&Giant);
500234353Sdim
501218893Sdim	/*
502218893Sdim	 * We have to wait until after acquiring all locks before
503234353Sdim	 * changing p_state.  We need to avoid any possibly context
504234353Sdim	 * switches while marked as a zombie including blocking on
505234353Sdim	 * a mutex.
506218893Sdim	 */
507199482Srdivacky	mtx_lock_spin(&sched_lock);
508199482Srdivacky	p->p_state = PRS_ZOMBIE;
509199482Srdivacky	critical_enter();
510199482Srdivacky	mtx_unlock_spin(&sched_lock);
511199482Srdivacky
512199482Srdivacky	wakeup(p->p_pptr);
513199482Srdivacky	PROC_UNLOCK(p->p_pptr);
514199482Srdivacky
515199482Srdivacky	mtx_lock_spin(&sched_lock);
516199482Srdivacky	critical_exit();
517199482Srdivacky	cnt.v_swtch++;
518199482Srdivacky	binuptime(PCPU_PTR(switchtime));
519226633Sdim	PCPU_SET(switchticks, ticks);
520234353Sdim
521226633Sdim	cpu_sched_exit(td); /* XXXKSE check if this should be in thread_exit */
522226633Sdim	/*
523226633Sdim	 * Allow the scheduler to adjust the priority of the
524234353Sdim	 * parent when a kseg is exiting.
525234353Sdim	 */
526234353Sdim	if (p->p_pid != 1)
527234353Sdim		sched_exit(p->p_pptr, p);
528234353Sdim
529234353Sdim	/*
530234353Sdim	 * Make sure the scheduler takes this thread out of its tables etc.
531234353Sdim	 * This will also release this thread's reference to the ucred.
532234353Sdim	 * Other thread parts to release include pcb bits and such.
533234353Sdim	 */
534218893Sdim	thread_exit();
535234353Sdim}
536234353Sdim
537234353Sdim#ifdef COMPAT_43
538218893Sdim/*
539234353Sdim * MPSAFE.  The dirty work is handled by wait1().
540226633Sdim */
541226633Sdimint
542226633Sdimowait(struct thread *td, struct owait_args *uap __unused)
543226633Sdim{
544236260Sdim	struct wait_args w;
545236260Sdim
546226633Sdim	w.options = 0;
547226633Sdim	w.rusage = NULL;
548226633Sdim	w.pid = WAIT_ANY;
549218893Sdim	w.status = NULL;
550218893Sdim	return (wait1(td, &w, 1));
551199482Srdivacky}
552226633Sdim#endif /* COMPAT_43 */
553226633Sdim
554226633Sdim/*
555226633Sdim * MPSAFE.  The dirty work is handled by wait1().
556226633Sdim */
557226633Sdimint
558226633Sdimwait4(struct thread *td, struct wait_args *uap)
559226633Sdim{
560218893Sdim
561218893Sdim	return (wait1(td, uap, 0));
562218893Sdim}
563218893Sdim
564218893Sdim/*
565199482Srdivacky * MPSAFE
566199482Srdivacky */
567218893Sdimstatic int
568218893Sdimwait1(struct thread *td, struct wait_args *uap, int compat)
569199482Srdivacky{
570199990Srdivacky	struct rusage ru;
571199482Srdivacky	int nfound;
572199482Srdivacky	struct proc *p, *q, *t;
573199482Srdivacky	int status, error;
574199482Srdivacky
575243830Sdim	q = td->td_proc;
576243830Sdim	if (uap->pid == 0) {
577234353Sdim		PROC_LOCK(q);
578234353Sdim		uap->pid = -q->p_pgid;
579199482Srdivacky		PROC_UNLOCK(q);
580199482Srdivacky	}
581243830Sdim	if (uap->options &~ (WUNTRACED|WNOHANG|WCONTINUED|WLINUXCLONE))
582226633Sdim		return (EINVAL);
583199482Srdivacky	mtx_lock(&Giant);
584199482Srdivackyloop:
585199482Srdivacky	nfound = 0;
586243830Sdim	sx_xlock(&proctree_lock);
587243830Sdim	LIST_FOREACH(p, &q->p_children, p_sibling) {
588243830Sdim		PROC_LOCK(p);
589243830Sdim		if (uap->pid != WAIT_ANY &&
590243830Sdim		    p->p_pid != uap->pid && p->p_pgid != -uap->pid) {
591243830Sdim			PROC_UNLOCK(p);
592243830Sdim			continue;
593243830Sdim		}
594243830Sdim
595243830Sdim		/*
596243830Sdim		 * This special case handles a kthread spawned by linux_clone
597243830Sdim		 * (see linux_misc.c).  The linux_wait4 and linux_waitpid
598226633Sdim		 * functions need to be able to distinguish between waiting
599226633Sdim		 * on a process and waiting on a thread.  It is a thread if
600199482Srdivacky		 * p_sigparent is not SIGCHLD, and the WLINUXCLONE option
601218893Sdim		 * signifies we want to wait for threads and not processes.
602199482Srdivacky		 */
603199482Srdivacky		if ((p->p_sigparent != SIGCHLD) ^
604199482Srdivacky		    ((uap->options & WLINUXCLONE) != 0)) {
605243830Sdim			PROC_UNLOCK(p);
606243830Sdim			continue;
607243830Sdim		}
608243830Sdim
609243830Sdim		nfound++;
610243830Sdim		if (p->p_state == PRS_ZOMBIE) {
611243830Sdim			td->td_retval[0] = p->p_pid;
612243830Sdim#ifdef COMPAT_43
613243830Sdim			if (compat)
614243830Sdim				td->td_retval[1] = p->p_xstat;
615243830Sdim			else
616249423Sdim#endif
617249423Sdim			if (uap->status) {
618249423Sdim				status = p->p_xstat;	/* convert to int */
619243830Sdim				PROC_UNLOCK(p);
620243830Sdim				if ((error = copyout(&status,
621249423Sdim				    uap->status, sizeof(status)))) {
622249423Sdim					sx_xunlock(&proctree_lock);
623199482Srdivacky					mtx_unlock(&Giant);
624234353Sdim					return (error);
625218893Sdim				}
626218893Sdim				PROC_LOCK(p);
627199482Srdivacky			}
628199482Srdivacky			if (uap->rusage) {
629199482Srdivacky				bcopy(p->p_ru, &ru, sizeof(ru));
630218893Sdim				PROC_UNLOCK(p);
631218893Sdim				if ((error = copyout(&ru,
632234353Sdim				    uap->rusage, sizeof (struct rusage)))) {
633218893Sdim					sx_xunlock(&proctree_lock);
634199482Srdivacky					mtx_unlock(&Giant);
635199482Srdivacky					return (error);
636218893Sdim				}
637218893Sdim			} else
638199482Srdivacky				PROC_UNLOCK(p);
639199482Srdivacky			/*
640202379Srdivacky			 * If we got the child via a ptrace 'attach',
641202379Srdivacky			 * we need to give it back to the old parent.
642202379Srdivacky			 */
643202379Srdivacky			if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) {
644202379Srdivacky				PROC_LOCK(p);
645202379Srdivacky				p->p_oppid = 0;
646202379Srdivacky				proc_reparent(p, t);
647202379Srdivacky				PROC_UNLOCK(p);
648202379Srdivacky				psignal(t, SIGCHLD);
649202379Srdivacky				wakeup(t);
650226633Sdim				PROC_UNLOCK(t);
651202379Srdivacky				sx_xunlock(&proctree_lock);
652202379Srdivacky				mtx_unlock(&Giant);
653249423Sdim				return (0);
654202379Srdivacky			}
655202379Srdivacky
656202379Srdivacky			/*
657202379Srdivacky			 * Remove other references to this process to ensure
658202379Srdivacky			 * we have an exclusive reference.
659202379Srdivacky			 */
660202379Srdivacky			sx_xlock(&allproc_lock);
661202379Srdivacky			LIST_REMOVE(p, p_list);	/* off zombproc */
662202379Srdivacky			sx_xunlock(&allproc_lock);
663234982Sdim			LIST_REMOVE(p, p_sibling);
664234982Sdim			leavepgrp(p);
665234982Sdim			sx_xunlock(&proctree_lock);
666234982Sdim
667202379Srdivacky			/*
668202379Srdivacky			 * As a side effect of this lock, we know that
669202379Srdivacky			 * all other writes to this proc are visible now, so
670202379Srdivacky			 * no more locking is needed for p.
671234353Sdim			 */
672202379Srdivacky			PROC_LOCK(p);
673202379Srdivacky			p->p_xstat = 0;		/* XXX: why? */
674202379Srdivacky			PROC_UNLOCK(p);
675202379Srdivacky			PROC_LOCK(q);
676206084Srdivacky			ruadd(&q->p_stats->p_cru, p->p_ru);
677206084Srdivacky			PROC_UNLOCK(q);
678226633Sdim			FREE(p->p_ru, M_ZOMBIE);
679202379Srdivacky			p->p_ru = NULL;
680210299Sed
681210299Sed			/*
682210299Sed			 * Decrement the count of procs running with this uid.
683202379Srdivacky			 */
684234353Sdim			(void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0);
685202379Srdivacky
686202379Srdivacky			/*
687202379Srdivacky			 * Free credentials, arguments, and sigacts
688202379Srdivacky			 */
689202379Srdivacky			crfree(p->p_ucred);
690234353Sdim			p->p_ucred = NULL;
691234353Sdim			pargs_drop(p->p_args);
692234353Sdim			p->p_args = NULL;
693207619Srdivacky			sigacts_free(p->p_sigacts);
694218893Sdim			p->p_sigacts = NULL;
695218893Sdim
696218893Sdim			/*
697218893Sdim			 * do any thread-system specific cleanups
698226633Sdim			 */
699207619Srdivacky			thread_wait(p);
700207619Srdivacky
701207619Srdivacky			/*
702207619Srdivacky			 * Give vm and machine-dependent layer a chance
703207619Srdivacky			 * to free anything that cpu_exit couldn't
704207619Srdivacky			 * release while still running in process context.
705207619Srdivacky			 */
706207619Srdivacky			vm_waitproc(p);
707207619Srdivacky#ifdef MAC
708202379Srdivacky			mac_destroy_proc(p);
709210299Sed#endif
710202379Srdivacky			KASSERT(FIRST_THREAD_IN_PROC(p),
711202379Srdivacky			    ("wait1: no residual thread!"));
712202379Srdivacky			uma_zfree(proc_zone, p);
713202379Srdivacky			sx_xlock(&allproc_lock);
714218893Sdim			nprocs--;
715202379Srdivacky			sx_xunlock(&allproc_lock);
716202379Srdivacky			mtx_unlock(&Giant);
717226633Sdim			return (0);
718226633Sdim		}
719226633Sdim		mtx_lock_spin(&sched_lock);
720226633Sdim		if (P_SHOULDSTOP(p) && (p->p_suspcount == p->p_numthreads) &&
721226633Sdim		    ((p->p_flag & P_WAITED) == 0) &&
722226633Sdim		    (p->p_flag & P_TRACED || uap->options & WUNTRACED)) {
723226633Sdim			mtx_unlock_spin(&sched_lock);
724226633Sdim			p->p_flag |= P_WAITED;
725226633Sdim			sx_xunlock(&proctree_lock);
726226633Sdim			td->td_retval[0] = p->p_pid;
727226633Sdim#ifdef COMPAT_43
728202379Srdivacky			if (compat) {
729226633Sdim				td->td_retval[1] = W_STOPCODE(p->p_xstat);
730234353Sdim				PROC_UNLOCK(p);
731226633Sdim				error = 0;
732234353Sdim			} else
733226633Sdim#endif
734226633Sdim			if (uap->status) {
735226633Sdim				status = W_STOPCODE(p->p_xstat);
736226633Sdim				PROC_UNLOCK(p);
737226633Sdim				error = copyout(&status,
738234353Sdim					uap->status, sizeof(status));
739234353Sdim			} else {
740234353Sdim				PROC_UNLOCK(p);
741226633Sdim				error = 0;
742226633Sdim			}
743226633Sdim			mtx_unlock(&Giant);
744249423Sdim			return (error);
745249423Sdim		}
746249423Sdim		mtx_unlock_spin(&sched_lock);
747249423Sdim		if (uap->options & WCONTINUED && (p->p_flag & P_CONTINUED)) {
748249423Sdim			sx_xunlock(&proctree_lock);
749249423Sdim			td->td_retval[0] = p->p_pid;
750249423Sdim			p->p_flag &= ~P_CONTINUED;
751249423Sdim			PROC_UNLOCK(p);
752249423Sdim
753249423Sdim			if (uap->status) {
754249423Sdim				status = SIGCONT;
755249423Sdim				error = copyout(&status,
756249423Sdim				    uap->status, sizeof(status));
757249423Sdim			} else
758249423Sdim				error = 0;
759249423Sdim
760249423Sdim			mtx_unlock(&Giant);
761234353Sdim			return (error);
762234353Sdim		}
763226633Sdim		PROC_UNLOCK(p);
764249423Sdim	}
765234353Sdim	if (nfound == 0) {
766234353Sdim		sx_xunlock(&proctree_lock);
767234353Sdim		mtx_unlock(&Giant);
768226633Sdim		return (ECHILD);
769234353Sdim	}
770226633Sdim	if (uap->options & WNOHANG) {
771226633Sdim		sx_xunlock(&proctree_lock);
772234353Sdim		td->td_retval[0] = 0;
773226633Sdim		mtx_unlock(&Giant);
774226633Sdim		return (0);
775226633Sdim	}
776234353Sdim	PROC_LOCK(q);
777226633Sdim	sx_xunlock(&proctree_lock);
778226633Sdim	error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "wait", 0);
779226633Sdim	PROC_UNLOCK(q);
780243830Sdim	if (error) {
781226633Sdim		mtx_unlock(&Giant);
782226633Sdim		return (error);
783234353Sdim	}
784234353Sdim	goto loop;
785234353Sdim}
786226633Sdim
787234353Sdim/*
788226633Sdim * Make process 'parent' the new parent of process 'child'.
789226633Sdim * Must be called with an exclusive hold of proctree lock.
790234353Sdim */
791234353Sdimvoid
792226633Sdimproc_reparent(struct proc *child, struct proc *parent)
793226633Sdim{
794234353Sdim
795234353Sdim	sx_assert(&proctree_lock, SX_XLOCKED);
796226633Sdim	PROC_LOCK_ASSERT(child, MA_OWNED);
797249423Sdim	if (child->p_pptr == parent)
798249423Sdim		return;
799249423Sdim
800249423Sdim	LIST_REMOVE(child, p_sibling);
801249423Sdim	LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
802249423Sdim	child->p_pptr = parent;
803249423Sdim}
804249423Sdim