kern_exit.c revision 303975
1254721Semaste/*-
2254721Semaste * Copyright (c) 1982, 1986, 1989, 1991, 1993
3254721Semaste *	The Regents of the University of California.  All rights reserved.
4254721Semaste * (c) UNIX System Laboratories, Inc.
5254721Semaste * All or some portions of this file are derived from material licensed
6254721Semaste * to the University of California by American Telephone and Telegraph
7254721Semaste * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8254721Semaste * the permission of UNIX System Laboratories, Inc.
9254721Semaste *
10254721Semaste * Redistribution and use in source and binary forms, with or without
11254721Semaste * modification, are permitted provided that the following conditions
12254721Semaste * are met:
13254721Semaste * 1. Redistributions of source code must retain the above copyright
14254721Semaste *    notice, this list of conditions and the following disclaimer.
15254721Semaste * 2. Redistributions in binary form must reproduce the above copyright
16254721Semaste *    notice, this list of conditions and the following disclaimer in the
17296417Sdim *    documentation and/or other materials provided with the distribution.
18280031Sdim * 4. Neither the name of the University nor the names of its contributors
19296417Sdim *    may be used to endorse or promote products derived from this software
20296417Sdim *    without specific prior written permission.
21296417Sdim *
22254721Semaste * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23254721Semaste * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24280031Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25254721Semaste * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26254721Semaste * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27254721Semaste * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28280031Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29254721Semaste * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30254721Semaste * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31254721Semaste * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32296417Sdim * SUCH DAMAGE.
33254721Semaste *
34254721Semaste *	@(#)kern_exit.c	8.7 (Berkeley) 2/12/94
35280031Sdim */
36296417Sdim
37296417Sdim#include <sys/cdefs.h>
38254721Semaste__FBSDID("$FreeBSD: releng/11.0/sys/kern/kern_exit.c 303141 2016-07-21 12:50:23Z kib $");
39254721Semaste
40254721Semaste#include "opt_compat.h"
41254721Semaste#include "opt_ktrace.h"
42254721Semaste
43296417Sdim#include <sys/param.h>
44254721Semaste#include <sys/systm.h>
45254721Semaste#include <sys/sysproto.h>
46254721Semaste#include <sys/capsicum.h>
47254721Semaste#include <sys/eventhandler.h>
48296417Sdim#include <sys/kernel.h>
49254721Semaste#include <sys/malloc.h>
50296417Sdim#include <sys/lock.h>
51296417Sdim#include <sys/mutex.h>
52296417Sdim#include <sys/proc.h>
53296417Sdim#include <sys/procdesc.h>
54296417Sdim#include <sys/pioctl.h>
55296417Sdim#include <sys/jail.h>
56296417Sdim#include <sys/tty.h>
57296417Sdim#include <sys/wait.h>
58254721Semaste#include <sys/vmmeter.h>
59254721Semaste#include <sys/vnode.h>
60296417Sdim#include <sys/racct.h>
61254721Semaste#include <sys/resourcevar.h>
62296417Sdim#include <sys/sbuf.h>
63296417Sdim#include <sys/signalvar.h>
64296417Sdim#include <sys/sched.h>
65296417Sdim#include <sys/sx.h>
66296417Sdim#include <sys/syscallsubr.h>
67296417Sdim#include <sys/syslog.h>
68296417Sdim#include <sys/ptrace.h>
69296417Sdim#include <sys/acct.h>		/* for acct_process() function prototype */
70296417Sdim#include <sys/filedesc.h>
71296417Sdim#include <sys/sdt.h>
72296417Sdim#include <sys/shm.h>
73296417Sdim#include <sys/sem.h>
74296417Sdim#include <sys/umtx.h>
75296417Sdim#ifdef KTRACE
76296417Sdim#include <sys/ktrace.h>
77296417Sdim#endif
78276479Sdim
79296417Sdim#include <security/audit/audit.h>
80296417Sdim#include <security/mac/mac_framework.h>
81296417Sdim
82296417Sdim#include <vm/vm.h>
83296417Sdim#include <vm/vm_extern.h>
84296417Sdim#include <vm/vm_param.h>
85296417Sdim#include <vm/pmap.h>
86296417Sdim#include <vm/vm_map.h>
87296417Sdim#include <vm/vm_page.h>
88276479Sdim#include <vm/uma.h>
89276479Sdim#include <vm/vm_domain.h>
90254721Semaste
91254721Semaste#ifdef KDTRACE_HOOKS
92254721Semaste#include <sys/dtrace_bsd.h>
93296417Sdimdtrace_execexit_func_t	dtrace_fasttrap_exit;
94296417Sdim#endif
95296417Sdim
96254721SemasteSDT_PROVIDER_DECLARE(proc);
97254721SemasteSDT_PROBE_DEFINE1(proc, , , exit, "int");
98254721Semaste
99254721Semaste/* Hook for NFS teardown procedure. */
100254721Semastevoid (*nlminfo_release_p)(struct proc *p);
101254721Semaste
102254721Semastestruct proc *
103254721Semasteproc_realparent(struct proc *child)
104254721Semaste{
105254721Semaste	struct proc *p, *parent;
106254721Semaste
107254721Semaste	sx_assert(&proctree_lock, SX_LOCKED);
108254721Semaste	if ((child->p_treeflag & P_TREE_ORPHANED) == 0) {
109254721Semaste		if (child->p_oppid == 0 ||
110254721Semaste		    child->p_pptr->p_pid == child->p_oppid)
111254721Semaste			parent = child->p_pptr;
112254721Semaste		else
113254721Semaste			parent = initproc;
114254721Semaste		return (parent);
115254721Semaste	}
116254721Semaste	for (p = child; (p->p_treeflag & P_TREE_FIRST_ORPHAN) == 0;) {
117254721Semaste		/* Cannot use LIST_PREV(), since the list head is not known. */
118254721Semaste		p = __containerof(p->p_orphan.le_prev, struct proc,
119254721Semaste		    p_orphan.le_next);
120296417Sdim		KASSERT((p->p_treeflag & P_TREE_ORPHANED) != 0,
121296417Sdim		    ("missing P_ORPHAN %p", p));
122296417Sdim	}
123276479Sdim	parent = __containerof(p->p_orphan.le_prev, struct proc,
124254721Semaste	    p_orphans.lh_first);
125254721Semaste	return (parent);
126254721Semaste}
127254721Semaste
128254721Semastevoid
129254721Semastereaper_abandon_children(struct proc *p, bool exiting)
130254721Semaste{
131254721Semaste	struct proc *p1, *p2, *ptmp;
132254721Semaste
133254721Semaste	sx_assert(&proctree_lock, SX_LOCKED);
134254721Semaste	KASSERT(p != initproc, ("reaper_abandon_children for initproc"));
135254721Semaste	if ((p->p_treeflag & P_TREE_REAPER) == 0)
136254721Semaste		return;
137254721Semaste	p1 = p->p_reaper;
138254721Semaste	LIST_FOREACH_SAFE(p2, &p->p_reaplist, p_reapsibling, ptmp) {
139254721Semaste		LIST_REMOVE(p2, p_reapsibling);
140254721Semaste		p2->p_reaper = p1;
141254721Semaste		p2->p_reapsubtree = p->p_reapsubtree;
142254721Semaste		LIST_INSERT_HEAD(&p1->p_reaplist, p2, p_reapsibling);
143254721Semaste		if (exiting && p2->p_pptr == p) {
144276479Sdim			PROC_LOCK(p2);
145254721Semaste			proc_reparent(p2, p1);
146254721Semaste			PROC_UNLOCK(p2);
147254721Semaste		}
148254721Semaste	}
149254721Semaste	KASSERT(LIST_EMPTY(&p->p_reaplist), ("p_reaplist not empty"));
150254721Semaste	p->p_treeflag &= ~P_TREE_REAPER;
151254721Semaste}
152254721Semaste
153254721Semastestatic void
154296417Sdimclear_orphan(struct proc *p)
155296417Sdim{
156296417Sdim	struct proc *p1;
157296417Sdim
158296417Sdim	sx_assert(&proctree_lock, SA_XLOCKED);
159296417Sdim	if ((p->p_treeflag & P_TREE_ORPHANED) == 0)
160296417Sdim		return;
161254721Semaste	if ((p->p_treeflag & P_TREE_FIRST_ORPHAN) != 0) {
162296417Sdim		p1 = LIST_NEXT(p, p_orphan);
163296417Sdim		if (p1 != NULL)
164296417Sdim			p1->p_treeflag |= P_TREE_FIRST_ORPHAN;
165254721Semaste		p->p_treeflag &= ~P_TREE_FIRST_ORPHAN;
166254721Semaste	}
167254721Semaste	LIST_REMOVE(p, p_orphan);
168254721Semaste	p->p_treeflag &= ~P_TREE_ORPHANED;
169254721Semaste}
170254721Semaste
171254721Semaste/*
172254721Semaste * exit -- death of process.
173254721Semaste */
174254721Semastevoid
175254721Semastesys_sys_exit(struct thread *td, struct sys_exit_args *uap)
176254721Semaste{
177254721Semaste
178254721Semaste	exit1(td, uap->rval, 0);
179254721Semaste	/* NOTREACHED */
180254721Semaste}
181254721Semaste
182254721Semaste/*
183254721Semaste * Exit: deallocate address space and other resources, change proc state to
184254721Semaste * zombie, and unlink proc from allproc and parent's lists.  Save exit status
185254721Semaste * and rusage for wait().  Check for child processes and orphan them.
186254721Semaste */
187254721Semastevoid
188254721Semasteexit1(struct thread *td, int rval, int signo)
189254721Semaste{
190254721Semaste	struct proc *p, *nq, *q, *t;
191254721Semaste	struct thread *tdt;
192254721Semaste
193254721Semaste	mtx_assert(&Giant, MA_NOTOWNED);
194254721Semaste	KASSERT(rval == 0 || signo == 0, ("exit1 rv %d sig %d", rval, signo));
195296417Sdim
196254721Semaste	p = td->td_proc;
197296417Sdim	/*
198254721Semaste	 * XXX in case we're rebooting we just let init die in order to
199296417Sdim	 * work around an unsolved stack overflow seen very late during
200254721Semaste	 * shutdown on sparc64 when the gmirror worker process exists.
201254721Semaste	 */
202254721Semaste	if (p == initproc && rebooting == 0) {
203254721Semaste		printf("init died (signal %d, exit %d)\n", signo, rval);
204296417Sdim		panic("Going nowhere without my init!");
205254721Semaste	}
206254721Semaste
207296417Sdim	/*
208254721Semaste	 * Deref SU mp, since the thread does not return to userspace.
209254721Semaste	 */
210296417Sdim	if (softdep_ast_cleanup != NULL)
211254721Semaste		softdep_ast_cleanup();
212254721Semaste
213254721Semaste	/*
214254721Semaste	 * MUST abort all other threads before proceeding past here.
215254721Semaste	 */
216296417Sdim	PROC_LOCK(p);
217254721Semaste	/*
218254721Semaste	 * First check if some other thread or external request got
219254721Semaste	 * here before us.  If so, act appropriately: exit or suspend.
220254721Semaste	 * We must ensure that stop requests are handled before we set
221254721Semaste	 * P_WEXIT.
222296417Sdim	 */
223254721Semaste	thread_suspend_check(0);
224254721Semaste	while (p->p_flag & P_HADTHREADS) {
225296417Sdim		/*
226254721Semaste		 * Kill off the other threads. This requires
227254721Semaste		 * some co-operation from other parts of the kernel
228296417Sdim		 * so it may not be instantaneous.  With this state set
229254721Semaste		 * any thread entering the kernel from userspace will
230254721Semaste		 * thread_exit() in trap().  Any thread attempting to
231254721Semaste		 * sleep will return immediately with EINTR or EWOULDBLOCK
232254721Semaste		 * which will hopefully force them to back out to userland
233254721Semaste		 * freeing resources as they go.  Any thread attempting
234296417Sdim		 * to return to userland will thread_exit() from userret().
235296417Sdim		 * thread_exit() will unsuspend us when the last of the
236296417Sdim		 * other threads exits.
237296417Sdim		 * If there is already a thread singler after resumption,
238296417Sdim		 * calling thread_single will fail; in that case, we just
239296417Sdim		 * re-check all suspension request, the thread should
240254721Semaste		 * either be suspended there or exit.
241296417Sdim		 */
242254721Semaste		if (!thread_single(p, SINGLE_EXIT))
243254721Semaste			/*
244254721Semaste			 * All other activity in this process is now
245254721Semaste			 * stopped.  Threading support has been turned
246254721Semaste			 * off.
247254721Semaste			 */
248254721Semaste			break;
249254721Semaste		/*
250254721Semaste		 * Recheck for new stop or suspend requests which
251254721Semaste		 * might appear while process lock was dropped in
252254721Semaste		 * thread_single().
253254721Semaste		 */
254254721Semaste		thread_suspend_check(0);
255254721Semaste	}
256254721Semaste	KASSERT(p->p_numthreads == 1,
257254721Semaste	    ("exit1: proc %p exiting with %d threads", p, p->p_numthreads));
258296417Sdim	racct_sub(p, RACCT_NTHR, 1);
259296417Sdim
260254721Semaste	/* Let event handler change exit status */
261254721Semaste	p->p_xexit = rval;
262296417Sdim	p->p_xsig = signo;
263280031Sdim
264280031Sdim	/*
265296417Sdim	 * Wakeup anyone in procfs' PIOCWAIT.  They should have a hold
266254721Semaste	 * on our vmspace, so we should block below until they have
267254721Semaste	 * released their reference to us.  Note that if they have
268296417Sdim	 * requested S_EXIT stops we will block here until they ack
269280031Sdim	 * via PIOCCONT.
270280031Sdim	 */
271280031Sdim	_STOPEVENT(p, S_EXIT, 0);
272296417Sdim
273280031Sdim	/*
274280031Sdim	 * Ignore any pending request to stop due to a stop signal.
275296417Sdim	 * Once P_WEXIT is set, future requests will be ignored as
276280031Sdim	 * well.
277280031Sdim	 */
278280031Sdim	p->p_flag &= ~P_STOPPED_SIG;
279280031Sdim	KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped"));
280280031Sdim
281280031Sdim	/*
282280031Sdim	 * Note that we are exiting and do another wakeup of anyone in
283280031Sdim	 * PIOCWAIT in case they aren't listening for S_EXIT stops or
284280031Sdim	 * decided to wait again after we told them we are exiting.
285288943Sdim	 */
286280031Sdim	p->p_flag |= P_WEXIT;
287280031Sdim	wakeup(&p->p_stype);
288280031Sdim
289280031Sdim	/*
290280031Sdim	 * Wait for any processes that have a hold on our vmspace to
291296417Sdim	 * release their reference.
292280031Sdim	 */
293280031Sdim	while (p->p_lock > 0)
294280031Sdim		msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0);
295280031Sdim
296296417Sdim	PROC_UNLOCK(p);
297280031Sdim	/* Drain the limit callout while we don't have the proc locked */
298280031Sdim	callout_drain(&p->p_limco);
299296417Sdim
300280031Sdim#ifdef AUDIT
301296417Sdim	/*
302280031Sdim	 * The Sun BSM exit token contains two components: an exit status as
303254721Semaste	 * passed to exit(), and a return value to indicate what sort of exit
304254721Semaste	 * it was.  The exit status is WEXITSTATUS(rv), but it's not clear
305254721Semaste	 * what the return value is.
306254721Semaste	 */
307254721Semaste	AUDIT_ARG_EXIT(rval, 0);
308254721Semaste	AUDIT_SYSCALL_EXIT(0, td);
309254721Semaste#endif
310254721Semaste
311254721Semaste	/* Are we a task leader with peers? */
312254721Semaste	if (p->p_peers != NULL && p == p->p_leader) {
313254721Semaste		mtx_lock(&ppeers_lock);
314254721Semaste		q = p->p_peers;
315254721Semaste		while (q != NULL) {
316254721Semaste			PROC_LOCK(q);
317254721Semaste			kern_psignal(q, SIGKILL);
318296417Sdim			PROC_UNLOCK(q);
319296417Sdim			q = q->p_peers;
320296417Sdim		}
321296417Sdim		while (p->p_peers != NULL)
322296417Sdim			msleep(p, &ppeers_lock, PWAIT, "exit1", 0);
323296417Sdim		mtx_unlock(&ppeers_lock);
324296417Sdim	}
325254721Semaste
326254721Semaste	/*
327254721Semaste	 * Check if any loadable modules need anything done at process exit.
328254721Semaste	 * E.g. SYSV IPC stuff.
329254721Semaste	 * Event handler could change exit status.
330254721Semaste	 * XXX what if one of these generates an error?
331254721Semaste	 */
332254721Semaste	EVENTHANDLER_INVOKE(process_exit, p);
333254721Semaste
334254721Semaste	/*
335254721Semaste	 * If parent is waiting for us to exit or exec,
336254721Semaste	 * P_PPWAIT is set; we will wakeup the parent below.
337254721Semaste	 */
338254721Semaste	PROC_LOCK(p);
339254721Semaste	stopprofclock(p);
340254721Semaste	p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE);
341254721Semaste
342254721Semaste	/*
343254721Semaste	 * Stop the real interval timer.  If the handler is currently
344254721Semaste	 * executing, prevent it from rearming itself and let it finish.
345276479Sdim	 */
346276479Sdim	if (timevalisset(&p->p_realtimer.it_value) &&
347254721Semaste	    _callout_stop_safe(&p->p_itcallout, CS_EXECUTING, NULL) == 0) {
348254721Semaste		timevalclear(&p->p_realtimer.it_interval);
349254721Semaste		msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0);
350254721Semaste		KASSERT(!timevalisset(&p->p_realtimer.it_value),
351254721Semaste		    ("realtime timer is still armed"));
352254721Semaste	}
353254721Semaste
354254721Semaste	PROC_UNLOCK(p);
355254721Semaste
356254721Semaste	umtx_thread_exit(td);
357254721Semaste
358254721Semaste	/*
359254721Semaste	 * Reset any sigio structures pointing to us as a result of
360254721Semaste	 * F_SETOWN with our pid.
361254721Semaste	 */
362254721Semaste	funsetownlst(&p->p_sigiolst);
363254721Semaste
364254721Semaste	/*
365254721Semaste	 * If this process has an nlminfo data area (for lockd), release it
366254721Semaste	 */
367254721Semaste	if (nlminfo_release_p != NULL && p->p_nlminfo != NULL)
368254721Semaste		(*nlminfo_release_p)(p);
369254721Semaste
370254721Semaste	/*
371254721Semaste	 * Close open files and release open-file table.
372254721Semaste	 * This may block!
373296417Sdim	 */
374254721Semaste	fdescfree(td);
375254721Semaste
376254721Semaste	/*
377254721Semaste	 * If this thread tickled GEOM, we need to wait for the giggling to
378254721Semaste	 * stop before we return to userland
379254721Semaste	 */
380254721Semaste	if (td->td_pflags & TDP_GEOM)
381254721Semaste		g_waitidle();
382254721Semaste
383254721Semaste	/*
384254721Semaste	 * Remove ourself from our leader's peer list and wake our leader.
385254721Semaste	 */
386254721Semaste	if (p->p_leader->p_peers != NULL) {
387254721Semaste		mtx_lock(&ppeers_lock);
388254721Semaste		if (p->p_leader->p_peers != NULL) {
389254721Semaste			q = p->p_leader;
390254721Semaste			while (q->p_peers != p)
391254721Semaste				q = q->p_peers;
392254721Semaste			q->p_peers = p->p_peers;
393254721Semaste			wakeup(p->p_leader);
394254721Semaste		}
395254721Semaste		mtx_unlock(&ppeers_lock);
396254721Semaste	}
397254721Semaste
398254721Semaste	vmspace_exit(td);
399296417Sdim	killjobc();
400296417Sdim	(void)acct_process(td);
401296417Sdim
402296417Sdim#ifdef KTRACE
403296417Sdim	ktrprocexit(td);
404296417Sdim#endif
405296417Sdim	/*
406296417Sdim	 * Release reference to text vnode
407296417Sdim	 */
408296417Sdim	if (p->p_textvp != NULL) {
409296417Sdim		vrele(p->p_textvp);
410296417Sdim		p->p_textvp = NULL;
411296417Sdim	}
412296417Sdim
413296417Sdim	/*
414254721Semaste	 * Release our limits structure.
415254721Semaste	 */
416254721Semaste	lim_free(p->p_limit);
417254721Semaste	p->p_limit = NULL;
418254721Semaste
419254721Semaste	tidhash_remove(td);
420254721Semaste
421254721Semaste	/*
422254721Semaste	 * Remove proc from allproc queue and pidhash chain.
423254721Semaste	 * Place onto zombproc.  Unlink from parent's child list.
424254721Semaste	 */
425254721Semaste	sx_xlock(&allproc_lock);
426254721Semaste	LIST_REMOVE(p, p_list);
427254721Semaste	LIST_INSERT_HEAD(&zombproc, p, p_list);
428254721Semaste	LIST_REMOVE(p, p_hash);
429254721Semaste	sx_xunlock(&allproc_lock);
430254721Semaste
431254721Semaste	/*
432296417Sdim	 * Call machine-dependent code to release any
433254721Semaste	 * machine-dependent resources other than the address space.
434254721Semaste	 * The address space is released by "vmspace_exitfree(p)" in
435254721Semaste	 * vm_waitproc().
436296417Sdim	 */
437254721Semaste	cpu_exit(td);
438296417Sdim
439296417Sdim	WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid);
440254721Semaste
441254721Semaste	/*
442254721Semaste	 * Reparent all children processes:
443254721Semaste	 * - traced ones to the original parent (or init if we are that parent)
444296417Sdim	 * - the rest to init
445296417Sdim	 */
446296417Sdim	sx_xlock(&proctree_lock);
447254721Semaste	q = LIST_FIRST(&p->p_children);
448254721Semaste	if (q != NULL)		/* only need this if any child is S_ZOMB */
449254721Semaste		wakeup(q->p_reaper);
450254721Semaste	for (; q != NULL; q = nq) {
451254721Semaste		nq = LIST_NEXT(q, p_sibling);
452254721Semaste		PROC_LOCK(q);
453254721Semaste		q->p_sigparent = SIGCHLD;
454254721Semaste
455254721Semaste		if (!(q->p_flag & P_TRACED)) {
456254721Semaste			proc_reparent(q, q->p_reaper);
457254721Semaste		} else {
458254721Semaste			/*
459254721Semaste			 * Traced processes are killed since their existence
460254721Semaste			 * means someone is screwing up.
461296417Sdim			 */
462254721Semaste			t = proc_realparent(q);
463254721Semaste			if (t == p) {
464254721Semaste				proc_reparent(q, q->p_reaper);
465254721Semaste			} else {
466254721Semaste				PROC_LOCK(t);
467254721Semaste				proc_reparent(q, t);
468254721Semaste				PROC_UNLOCK(t);
469254721Semaste			}
470254721Semaste			/*
471254721Semaste			 * Since q was found on our children list, the
472254721Semaste			 * proc_reparent() call moved q to the orphan
473296417Sdim			 * list due to present P_TRACED flag. Clear
474296417Sdim			 * orphan link for q now while q is locked.
475254721Semaste			 */
476254721Semaste			clear_orphan(q);
477254721Semaste			q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);
478254721Semaste			FOREACH_THREAD_IN_PROC(q, tdt)
479254721Semaste				tdt->td_dbgflags &= ~TDB_SUSPEND;
480254721Semaste			kern_psignal(q, SIGKILL);
481296417Sdim		}
482254721Semaste		PROC_UNLOCK(q);
483254721Semaste	}
484254721Semaste
485296417Sdim	/*
486254721Semaste	 * Also get rid of our orphans.
487254721Semaste	 */
488276479Sdim	while ((q = LIST_FIRST(&p->p_orphans)) != NULL) {
489276479Sdim		PROC_LOCK(q);
490296417Sdim		CTR2(KTR_PTRACE, "exit: pid %d, clearing orphan %d", p->p_pid,
491296417Sdim		    q->p_pid);
492276479Sdim		clear_orphan(q);
493276479Sdim		PROC_UNLOCK(q);
494276479Sdim	}
495296417Sdim
496276479Sdim	/* Save exit status. */
497276479Sdim	PROC_LOCK(p);
498276479Sdim	p->p_xthread = td;
499276479Sdim
500276479Sdim	/* Tell the prison that we are gone. */
501296417Sdim	prison_proc_free(p->p_ucred->cr_prison);
502276479Sdim
503276479Sdim#ifdef KDTRACE_HOOKS
504276479Sdim	/*
505254721Semaste	 * Tell the DTrace fasttrap provider about the exit if it
506254721Semaste	 * has declared an interest.
507296417Sdim	 */
508296417Sdim	if (dtrace_fasttrap_exit)
509296417Sdim		dtrace_fasttrap_exit(p);
510296417Sdim#endif
511296417Sdim
512296417Sdim	/*
513296417Sdim	 * Notify interested parties of our demise.
514296417Sdim	 */
515296417Sdim	KNOTE_LOCKED(p->p_klist, NOTE_EXIT);
516296417Sdim
517296417Sdim#ifdef KDTRACE_HOOKS
518296417Sdim	int reason = CLD_EXITED;
519296417Sdim	if (WCOREDUMP(signo))
520296417Sdim		reason = CLD_DUMPED;
521296417Sdim	else if (WIFSIGNALED(signo))
522296417Sdim		reason = CLD_KILLED;
523296417Sdim	SDT_PROBE1(proc, , , exit, reason);
524296417Sdim#endif
525296417Sdim
526296417Sdim	/*
527296417Sdim	 * If this is a process with a descriptor, we may not need to deliver
528296417Sdim	 * a signal to the parent.  proctree_lock is held over
529296417Sdim	 * procdesc_exit() to serialize concurrent calls to close() and
530296417Sdim	 * exit().
531296417Sdim	 */
532296417Sdim	if (p->p_procdesc == NULL || procdesc_exit(p)) {
533296417Sdim		/*
534296417Sdim		 * Notify parent that we're gone.  If parent has the
535296417Sdim		 * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN,
536296417Sdim		 * notify process 1 instead (and hope it will handle this
537296417Sdim		 * situation).
538296417Sdim		 */
539296417Sdim		PROC_LOCK(p->p_pptr);
540296417Sdim		mtx_lock(&p->p_pptr->p_sigacts->ps_mtx);
541296417Sdim		if (p->p_pptr->p_sigacts->ps_flag &
542296417Sdim		    (PS_NOCLDWAIT | PS_CLDSIGIGN)) {
543296417Sdim			struct proc *pp;
544254721Semaste
545296417Sdim			mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
546296417Sdim			pp = p->p_pptr;
547296417Sdim			PROC_UNLOCK(pp);
548296417Sdim			proc_reparent(p, p->p_reaper);
549296417Sdim			p->p_sigparent = SIGCHLD;
550296417Sdim			PROC_LOCK(p->p_pptr);
551296417Sdim
552296417Sdim			/*
553296417Sdim			 * Notify parent, so in case he was wait(2)ing or
554296417Sdim			 * executing waitpid(2) with our pid, he will
555296417Sdim			 * continue.
556296417Sdim			 */
557296417Sdim			wakeup(pp);
558296417Sdim		} else
559296417Sdim			mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
560296417Sdim
561296417Sdim		if (p->p_pptr == p->p_reaper || p->p_pptr == initproc)
562296417Sdim			childproc_exited(p);
563296417Sdim		else if (p->p_sigparent != 0) {
564296417Sdim			if (p->p_sigparent == SIGCHLD)
565296417Sdim				childproc_exited(p);
566296417Sdim			else	/* LINUX thread */
567296417Sdim				kern_psignal(p->p_pptr, p->p_sigparent);
568296417Sdim		}
569296417Sdim	} else
570296417Sdim		PROC_LOCK(p->p_pptr);
571296417Sdim	sx_xunlock(&proctree_lock);
572296417Sdim
573296417Sdim	/*
574296417Sdim	 * The state PRS_ZOMBIE prevents other proesses from sending
575296417Sdim	 * signal to the process, to avoid memory leak, we free memory
576296417Sdim	 * for signal queue at the time when the state is set.
577296417Sdim	 */
578296417Sdim	sigqueue_flush(&p->p_sigqueue);
579296417Sdim	sigqueue_flush(&td->td_sigqueue);
580296417Sdim
581296417Sdim	/*
582296417Sdim	 * We have to wait until after acquiring all locks before
583296417Sdim	 * changing p_state.  We need to avoid all possible context
584296417Sdim	 * switches (including ones from blocking on a mutex) while
585296417Sdim	 * marked as a zombie.  We also have to set the zombie state
586296417Sdim	 * before we release the parent process' proc lock to avoid
587296417Sdim	 * a lost wakeup.  So, we first call wakeup, then we grab the
588296417Sdim	 * sched lock, update the state, and release the parent process'
589296417Sdim	 * proc lock.
590296417Sdim	 */
591254721Semaste	wakeup(p->p_pptr);
592296417Sdim	cv_broadcast(&p->p_pwait);
593254721Semaste	sched_exit(p->p_pptr, td);
594254721Semaste	PROC_SLOCK(p);
595296417Sdim	p->p_state = PRS_ZOMBIE;
596296417Sdim	PROC_UNLOCK(p->p_pptr);
597296417Sdim
598296417Sdim	/*
599296417Sdim	 * Save our children's rusage information in our exit rusage.
600296417Sdim	 */
601296417Sdim	PROC_STATLOCK(p);
602296417Sdim	ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);
603296417Sdim	PROC_STATUNLOCK(p);
604296417Sdim
605296417Sdim	/*
606296417Sdim	 * Make sure the scheduler takes this thread out of its tables etc.
607296417Sdim	 * This will also release this thread's reference to the ucred.
608296417Sdim	 * Other thread parts to release include pcb bits and such.
609296417Sdim	 */
610296417Sdim	thread_exit();
611296417Sdim}
612296417Sdim
613296417Sdim
614296417Sdim#ifndef _SYS_SYSPROTO_H_
615296417Sdimstruct abort2_args {
616296417Sdim	char *why;
617296417Sdim	int nargs;
618296417Sdim	void **args;
619296417Sdim};
620296417Sdim#endif
621296417Sdim
622296417Sdimint
623296417Sdimsys_abort2(struct thread *td, struct abort2_args *uap)
624296417Sdim{
625296417Sdim	struct proc *p = td->td_proc;
626296417Sdim	struct sbuf *sb;
627296417Sdim	void *uargs[16];
628296417Sdim	int error, i, sig;
629296417Sdim
630296417Sdim	/*
631296417Sdim	 * Do it right now so we can log either proper call of abort2(), or
632296417Sdim	 * note, that invalid argument was passed. 512 is big enough to
633296417Sdim	 * handle 16 arguments' descriptions with additional comments.
634296417Sdim	 */
635296417Sdim	sb = sbuf_new(NULL, NULL, 512, SBUF_FIXEDLEN);
636296417Sdim	sbuf_clear(sb);
637296417Sdim	sbuf_printf(sb, "%s(pid %d uid %d) aborted: ",
638296417Sdim	    p->p_comm, p->p_pid, td->td_ucred->cr_uid);
639296417Sdim	/*
640296417Sdim	 * Since we can't return from abort2(), send SIGKILL in cases, where
641296417Sdim	 * abort2() was called improperly
642296417Sdim	 */
643296417Sdim	sig = SIGKILL;
644296417Sdim	/* Prevent from DoSes from user-space. */
645296417Sdim	if (uap->nargs < 0 || uap->nargs > 16)
646296417Sdim		goto out;
647296417Sdim	if (uap->nargs > 0) {
648296417Sdim		if (uap->args == NULL)
649296417Sdim			goto out;
650296417Sdim		error = copyin(uap->args, uargs, uap->nargs * sizeof(void *));
651296417Sdim		if (error != 0)
652296417Sdim			goto out;
653296417Sdim	}
654296417Sdim	/*
655296417Sdim	 * Limit size of 'reason' string to 128. Will fit even when
656296417Sdim	 * maximal number of arguments was chosen to be logged.
657296417Sdim	 */
658296417Sdim	if (uap->why != NULL) {
659296417Sdim		error = sbuf_copyin(sb, uap->why, 128);
660296417Sdim		if (error < 0)
661296417Sdim			goto out;
662296417Sdim	} else {
663296417Sdim		sbuf_printf(sb, "(null)");
664296417Sdim	}
665296417Sdim	if (uap->nargs > 0) {
666296417Sdim		sbuf_printf(sb, "(");
667296417Sdim		for (i = 0;i < uap->nargs; i++)
668296417Sdim			sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]);
669296417Sdim		sbuf_printf(sb, ")");
670296417Sdim	}
671296417Sdim	/*
672296417Sdim	 * Final stage: arguments were proper, string has been
673296417Sdim	 * successfully copied from userspace, and copying pointers
674296417Sdim	 * from user-space succeed.
675296417Sdim	 */
676296417Sdim	sig = SIGABRT;
677296417Sdimout:
678296417Sdim	if (sig == SIGKILL) {
679296417Sdim		sbuf_trim(sb);
680296417Sdim		sbuf_printf(sb, " (Reason text inaccessible)");
681296417Sdim	}
682296417Sdim	sbuf_cat(sb, "\n");
683296417Sdim	sbuf_finish(sb);
684296417Sdim	log(LOG_INFO, "%s", sbuf_data(sb));
685296417Sdim	sbuf_delete(sb);
686296417Sdim	exit1(td, 0, sig);
687296417Sdim	return (0);
688296417Sdim}
689296417Sdim
690296417Sdim
691296417Sdim#ifdef COMPAT_43
692296417Sdim/*
693296417Sdim * The dirty work is handled by kern_wait().
694296417Sdim */
695296417Sdimint
696296417Sdimowait(struct thread *td, struct owait_args *uap __unused)
697296417Sdim{
698296417Sdim	int error, status;
699296417Sdim
700296417Sdim	error = kern_wait(td, WAIT_ANY, &status, 0, NULL);
701296417Sdim	if (error == 0)
702296417Sdim		td->td_retval[1] = status;
703296417Sdim	return (error);
704296417Sdim}
705296417Sdim#endif /* COMPAT_43 */
706296417Sdim
707296417Sdim/*
708296417Sdim * The dirty work is handled by kern_wait().
709296417Sdim */
710296417Sdimint
711296417Sdimsys_wait4(struct thread *td, struct wait4_args *uap)
712296417Sdim{
713296417Sdim	struct rusage ru, *rup;
714296417Sdim	int error, status;
715296417Sdim
716296417Sdim	if (uap->rusage != NULL)
717296417Sdim		rup = &ru;
718296417Sdim	else
719296417Sdim		rup = NULL;
720296417Sdim	error = kern_wait(td, uap->pid, &status, uap->options, rup);
721296417Sdim	if (uap->status != NULL && error == 0)
722296417Sdim		error = copyout(&status, uap->status, sizeof(status));
723296417Sdim	if (uap->rusage != NULL && error == 0)
724296417Sdim		error = copyout(&ru, uap->rusage, sizeof(struct rusage));
725296417Sdim	return (error);
726296417Sdim}
727296417Sdim
728296417Sdimint
729296417Sdimsys_wait6(struct thread *td, struct wait6_args *uap)
730296417Sdim{
731296417Sdim	struct __wrusage wru, *wrup;
732296417Sdim	siginfo_t si, *sip;
733296417Sdim	idtype_t idtype;
734296417Sdim	id_t id;
735296417Sdim	int error, status;
736296417Sdim
737296417Sdim	idtype = uap->idtype;
738296417Sdim	id = uap->id;
739296417Sdim
740296417Sdim	if (uap->wrusage != NULL)
741296417Sdim		wrup = &wru;
742296417Sdim	else
743296417Sdim		wrup = NULL;
744296417Sdim
745296417Sdim	if (uap->info != NULL) {
746296417Sdim		sip = &si;
747296417Sdim		bzero(sip, sizeof(*sip));
748296417Sdim	} else
749296417Sdim		sip = NULL;
750296417Sdim
751296417Sdim	/*
752296417Sdim	 *  We expect all callers of wait6() to know about WEXITED and
753296417Sdim	 *  WTRAPPED.
754296417Sdim	 */
755296417Sdim	error = kern_wait6(td, idtype, id, &status, uap->options, wrup, sip);
756296417Sdim
757296417Sdim	if (uap->status != NULL && error == 0)
758296417Sdim		error = copyout(&status, uap->status, sizeof(status));
759296417Sdim	if (uap->wrusage != NULL && error == 0)
760296417Sdim		error = copyout(&wru, uap->wrusage, sizeof(wru));
761296417Sdim	if (uap->info != NULL && error == 0)
762296417Sdim		error = copyout(&si, uap->info, sizeof(si));
763296417Sdim	return (error);
764296417Sdim}
765296417Sdim
766296417Sdim/*
767296417Sdim * Reap the remains of a zombie process and optionally return status and
768296417Sdim * rusage.  Asserts and will release both the proctree_lock and the process
769296417Sdim * lock as part of its work.
770296417Sdim */
771296417Sdimvoid
772296417Sdimproc_reap(struct thread *td, struct proc *p, int *status, int options)
773296417Sdim{
774296417Sdim	struct proc *q, *t;
775296417Sdim
776296417Sdim	sx_assert(&proctree_lock, SA_XLOCKED);
777296417Sdim	PROC_LOCK_ASSERT(p, MA_OWNED);
778296417Sdim	PROC_SLOCK_ASSERT(p, MA_OWNED);
779296417Sdim	KASSERT(p->p_state == PRS_ZOMBIE, ("proc_reap: !PRS_ZOMBIE"));
780296417Sdim
781296417Sdim	q = td->td_proc;
782296417Sdim
783296417Sdim	PROC_SUNLOCK(p);
784296417Sdim	if (status)
785296417Sdim		*status = KW_EXITCODE(p->p_xexit, p->p_xsig);
786296417Sdim	if (options & WNOWAIT) {
787296417Sdim		/*
788296417Sdim		 *  Only poll, returning the status.  Caller does not wish to
789296417Sdim		 * release the proc struct just yet.
790296417Sdim		 */
791296417Sdim		PROC_UNLOCK(p);
792296417Sdim		sx_xunlock(&proctree_lock);
793296417Sdim		return;
794296417Sdim	}
795296417Sdim
796296417Sdim	PROC_LOCK(q);
797296417Sdim	sigqueue_take(p->p_ksi);
798296417Sdim	PROC_UNLOCK(q);
799296417Sdim
800296417Sdim	/*
801296417Sdim	 * If we got the child via a ptrace 'attach', we need to give it back
802296417Sdim	 * to the old parent.
803296417Sdim	 */
804296417Sdim	if (p->p_oppid != 0 && p->p_oppid != p->p_pptr->p_pid) {
805296417Sdim		PROC_UNLOCK(p);
806296417Sdim		t = proc_realparent(p);
807296417Sdim		PROC_LOCK(t);
808296417Sdim		PROC_LOCK(p);
809296417Sdim		CTR2(KTR_PTRACE,
810296417Sdim		    "wait: traced child %d moved back to parent %d", p->p_pid,
811296417Sdim		    t->p_pid);
812296417Sdim		proc_reparent(p, t);
813296417Sdim		p->p_oppid = 0;
814296417Sdim		PROC_UNLOCK(p);
815296417Sdim		pksignal(t, SIGCHLD, p->p_ksi);
816296417Sdim		wakeup(t);
817296417Sdim		cv_broadcast(&p->p_pwait);
818296417Sdim		PROC_UNLOCK(t);
819296417Sdim		sx_xunlock(&proctree_lock);
820296417Sdim		return;
821296417Sdim	}
822296417Sdim	p->p_oppid = 0;
823296417Sdim	PROC_UNLOCK(p);
824296417Sdim
825296417Sdim	/*
826296417Sdim	 * Remove other references to this process to ensure we have an
827296417Sdim	 * exclusive reference.
828296417Sdim	 */
829296417Sdim	sx_xlock(&allproc_lock);
830296417Sdim	LIST_REMOVE(p, p_list);	/* off zombproc */
831296417Sdim	sx_xunlock(&allproc_lock);
832296417Sdim	LIST_REMOVE(p, p_sibling);
833296417Sdim	reaper_abandon_children(p, true);
834296417Sdim	LIST_REMOVE(p, p_reapsibling);
835296417Sdim	PROC_LOCK(p);
836296417Sdim	clear_orphan(p);
837296417Sdim	PROC_UNLOCK(p);
838296417Sdim	leavepgrp(p);
839296417Sdim	if (p->p_procdesc != NULL)
840296417Sdim		procdesc_reap(p);
841296417Sdim	sx_xunlock(&proctree_lock);
842296417Sdim
843296417Sdim	PROC_LOCK(p);
844296417Sdim	knlist_detach(p->p_klist);
845296417Sdim	p->p_klist = NULL;
846296417Sdim	PROC_UNLOCK(p);
847296417Sdim
848296417Sdim	/*
849296417Sdim	 * Removal from allproc list and process group list paired with
850296417Sdim	 * PROC_LOCK which was executed during that time should guarantee
851296417Sdim	 * nothing can reach this process anymore. As such further locking
852296417Sdim	 * is unnecessary.
853296417Sdim	 */
854296417Sdim	p->p_xexit = p->p_xsig = 0;		/* XXX: why? */
855296417Sdim
856296417Sdim	PROC_LOCK(q);
857296417Sdim	ruadd(&q->p_stats->p_cru, &q->p_crux, &p->p_ru, &p->p_rux);
858296417Sdim	PROC_UNLOCK(q);
859296417Sdim
860296417Sdim	/*
861296417Sdim	 * Decrement the count of procs running with this uid.
862296417Sdim	 */
863296417Sdim	(void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0);
864296417Sdim
865296417Sdim	/*
866296417Sdim	 * Destroy resource accounting information associated with the process.
867296417Sdim	 */
868296417Sdim#ifdef RACCT
869296417Sdim	if (racct_enable) {
870296417Sdim		PROC_LOCK(p);
871296417Sdim		racct_sub(p, RACCT_NPROC, 1);
872296417Sdim		PROC_UNLOCK(p);
873296417Sdim	}
874296417Sdim#endif
875296417Sdim	racct_proc_exit(p);
876296417Sdim
877296417Sdim	/*
878296417Sdim	 * Free credentials, arguments, and sigacts.
879296417Sdim	 */
880296417Sdim	crfree(p->p_ucred);
881296417Sdim	proc_set_cred(p, NULL);
882296417Sdim	pargs_drop(p->p_args);
883296417Sdim	p->p_args = NULL;
884296417Sdim	sigacts_free(p->p_sigacts);
885296417Sdim	p->p_sigacts = NULL;
886296417Sdim
887296417Sdim	/*
888296417Sdim	 * Do any thread-system specific cleanups.
889296417Sdim	 */
890296417Sdim	thread_wait(p);
891296417Sdim
892296417Sdim	/*
893296417Sdim	 * Give vm and machine-dependent layer a chance to free anything that
894296417Sdim	 * cpu_exit couldn't release while still running in process context.
895296417Sdim	 */
896296417Sdim	vm_waitproc(p);
897296417Sdim#ifdef MAC
898296417Sdim	mac_proc_destroy(p);
899296417Sdim#endif
900296417Sdim	/*
901296417Sdim	 * Free any domain policy that's still hiding around.
902296417Sdim	 */
903296417Sdim	vm_domain_policy_cleanup(&p->p_vm_dom_policy);
904296417Sdim
905296417Sdim	KASSERT(FIRST_THREAD_IN_PROC(p),
906296417Sdim	    ("proc_reap: no residual thread!"));
907296417Sdim	uma_zfree(proc_zone, p);
908296417Sdim	atomic_add_int(&nprocs, -1);
909296417Sdim}
910296417Sdim
911296417Sdimstatic int
912296417Sdimproc_to_reap(struct thread *td, struct proc *p, idtype_t idtype, id_t id,
913296417Sdim    int *status, int options, struct __wrusage *wrusage, siginfo_t *siginfo,
914296417Sdim    int check_only)
915296417Sdim{
916296417Sdim	struct rusage *rup;
917296417Sdim
918296417Sdim	sx_assert(&proctree_lock, SA_XLOCKED);
919296417Sdim
920296417Sdim	PROC_LOCK(p);
921296417Sdim
922296417Sdim	switch (idtype) {
923296417Sdim	case P_ALL:
924296417Sdim		if (p->p_procdesc != NULL) {
925296417Sdim			PROC_UNLOCK(p);
926296417Sdim			return (0);
927296417Sdim		}
928296417Sdim		break;
929296417Sdim	case P_PID:
930296417Sdim		if (p->p_pid != (pid_t)id) {
931296417Sdim			PROC_UNLOCK(p);
932296417Sdim			return (0);
933296417Sdim		}
934296417Sdim		break;
935296417Sdim	case P_PGID:
936296417Sdim		if (p->p_pgid != (pid_t)id) {
937296417Sdim			PROC_UNLOCK(p);
938296417Sdim			return (0);
939296417Sdim		}
940296417Sdim		break;
941296417Sdim	case P_SID:
942296417Sdim		if (p->p_session->s_sid != (pid_t)id) {
943296417Sdim			PROC_UNLOCK(p);
944296417Sdim			return (0);
945296417Sdim		}
946296417Sdim		break;
947296417Sdim	case P_UID:
948296417Sdim		if (p->p_ucred->cr_uid != (uid_t)id) {
949296417Sdim			PROC_UNLOCK(p);
950296417Sdim			return (0);
951296417Sdim		}
952296417Sdim		break;
953296417Sdim	case P_GID:
954296417Sdim		if (p->p_ucred->cr_gid != (gid_t)id) {
955296417Sdim			PROC_UNLOCK(p);
956296417Sdim			return (0);
957296417Sdim		}
958296417Sdim		break;
959296417Sdim	case P_JAILID:
960296417Sdim		if (p->p_ucred->cr_prison->pr_id != (int)id) {
961296417Sdim			PROC_UNLOCK(p);
962296417Sdim			return (0);
963296417Sdim		}
964296417Sdim		break;
965296417Sdim	/*
966296417Sdim	 * It seems that the thread structures get zeroed out
967296417Sdim	 * at process exit.  This makes it impossible to
968296417Sdim	 * support P_SETID, P_CID or P_CPUID.
969296417Sdim	 */
970296417Sdim	default:
971296417Sdim		PROC_UNLOCK(p);
972296417Sdim		return (0);
973296417Sdim	}
974296417Sdim
975296417Sdim	if (p_canwait(td, p)) {
976296417Sdim		PROC_UNLOCK(p);
977296417Sdim		return (0);
978296417Sdim	}
979296417Sdim
980296417Sdim	if (((options & WEXITED) == 0) && (p->p_state == PRS_ZOMBIE)) {
981296417Sdim		PROC_UNLOCK(p);
982296417Sdim		return (0);
983296417Sdim	}
984296417Sdim
985296417Sdim	/*
986296417Sdim	 * This special case handles a kthread spawned by linux_clone
987296417Sdim	 * (see linux_misc.c).  The linux_wait4 and linux_waitpid
988296417Sdim	 * functions need to be able to distinguish between waiting
989296417Sdim	 * on a process and waiting on a thread.  It is a thread if
990296417Sdim	 * p_sigparent is not SIGCHLD, and the WLINUXCLONE option
991296417Sdim	 * signifies we want to wait for threads and not processes.
992296417Sdim	 */
993296417Sdim	if ((p->p_sigparent != SIGCHLD) ^
994296417Sdim	    ((options & WLINUXCLONE) != 0)) {
995296417Sdim		PROC_UNLOCK(p);
996296417Sdim		return (0);
997296417Sdim	}
998296417Sdim
999296417Sdim	if (siginfo != NULL) {
1000296417Sdim		bzero(siginfo, sizeof(*siginfo));
1001296417Sdim		siginfo->si_errno = 0;
1002296417Sdim
1003296417Sdim		/*
1004296417Sdim		 * SUSv4 requires that the si_signo value is always
1005296417Sdim		 * SIGCHLD. Obey it despite the rfork(2) interface
1006296417Sdim		 * allows to request other signal for child exit
1007296417Sdim		 * notification.
1008296417Sdim		 */
1009296417Sdim		siginfo->si_signo = SIGCHLD;
1010296417Sdim
1011296417Sdim		/*
1012296417Sdim		 *  This is still a rough estimate.  We will fix the
1013296417Sdim		 *  cases TRAPPED, STOPPED, and CONTINUED later.
1014296417Sdim		 */
1015296417Sdim		if (WCOREDUMP(p->p_xsig)) {
1016296417Sdim			siginfo->si_code = CLD_DUMPED;
1017296417Sdim			siginfo->si_status = WTERMSIG(p->p_xsig);
1018296417Sdim		} else if (WIFSIGNALED(p->p_xsig)) {
1019296417Sdim			siginfo->si_code = CLD_KILLED;
1020296417Sdim			siginfo->si_status = WTERMSIG(p->p_xsig);
1021296417Sdim		} else {
1022296417Sdim			siginfo->si_code = CLD_EXITED;
1023296417Sdim			siginfo->si_status = p->p_xexit;
1024296417Sdim		}
1025296417Sdim
1026296417Sdim		siginfo->si_pid = p->p_pid;
1027296417Sdim		siginfo->si_uid = p->p_ucred->cr_uid;
1028296417Sdim
1029296417Sdim		/*
1030296417Sdim		 * The si_addr field would be useful additional
1031296417Sdim		 * detail, but apparently the PC value may be lost
1032296417Sdim		 * when we reach this point.  bzero() above sets
1033296417Sdim		 * siginfo->si_addr to NULL.
1034296417Sdim		 */
1035296417Sdim	}
1036296417Sdim
1037296417Sdim	/*
1038296417Sdim	 * There should be no reason to limit resources usage info to
1039296417Sdim	 * exited processes only.  A snapshot about any resources used
1040296417Sdim	 * by a stopped process may be exactly what is needed.
1041296417Sdim	 */
1042296417Sdim	if (wrusage != NULL) {
1043296417Sdim		rup = &wrusage->wru_self;
1044296417Sdim		*rup = p->p_ru;
1045296417Sdim		PROC_STATLOCK(p);
1046296417Sdim		calcru(p, &rup->ru_utime, &rup->ru_stime);
1047296417Sdim		PROC_STATUNLOCK(p);
1048296417Sdim
1049296417Sdim		rup = &wrusage->wru_children;
1050296417Sdim		*rup = p->p_stats->p_cru;
1051296417Sdim		calccru(p, &rup->ru_utime, &rup->ru_stime);
1052296417Sdim	}
1053296417Sdim
1054296417Sdim	if (p->p_state == PRS_ZOMBIE && !check_only) {
1055296417Sdim		PROC_SLOCK(p);
1056296417Sdim		proc_reap(td, p, status, options);
1057296417Sdim		return (-1);
1058296417Sdim	}
1059296417Sdim	PROC_UNLOCK(p);
1060296417Sdim	return (1);
1061296417Sdim}
1062296417Sdim
1063296417Sdimint
1064296417Sdimkern_wait(struct thread *td, pid_t pid, int *status, int options,
1065296417Sdim    struct rusage *rusage)
1066296417Sdim{
1067296417Sdim	struct __wrusage wru, *wrup;
1068296417Sdim	idtype_t idtype;
1069296417Sdim	id_t id;
1070296417Sdim	int ret;
1071296417Sdim
1072296417Sdim	/*
1073296417Sdim	 * Translate the special pid values into the (idtype, pid)
1074296417Sdim	 * pair for kern_wait6.  The WAIT_MYPGRP case is handled by
1075296417Sdim	 * kern_wait6() on its own.
1076296417Sdim	 */
1077296417Sdim	if (pid == WAIT_ANY) {
1078296417Sdim		idtype = P_ALL;
1079296417Sdim		id = 0;
1080296417Sdim	} else if (pid < 0) {
1081296417Sdim		idtype = P_PGID;
1082296417Sdim		id = (id_t)-pid;
1083296417Sdim	} else {
1084296417Sdim		idtype = P_PID;
1085296417Sdim		id = (id_t)pid;
1086296417Sdim	}
1087296417Sdim
1088296417Sdim	if (rusage != NULL)
1089296417Sdim		wrup = &wru;
1090296417Sdim	else
1091296417Sdim		wrup = NULL;
1092296417Sdim
1093296417Sdim	/*
1094296417Sdim	 * For backward compatibility we implicitly add flags WEXITED
1095296417Sdim	 * and WTRAPPED here.
1096296417Sdim	 */
1097296417Sdim	options |= WEXITED | WTRAPPED;
1098296417Sdim	ret = kern_wait6(td, idtype, id, status, options, wrup, NULL);
1099296417Sdim	if (rusage != NULL)
1100296417Sdim		*rusage = wru.wru_self;
1101296417Sdim	return (ret);
1102296417Sdim}
1103296417Sdim
1104296417Sdimint
1105296417Sdimkern_wait6(struct thread *td, idtype_t idtype, id_t id, int *status,
1106296417Sdim    int options, struct __wrusage *wrusage, siginfo_t *siginfo)
1107296417Sdim{
1108296417Sdim	struct proc *p, *q;
1109296417Sdim	pid_t pid;
1110296417Sdim	int error, nfound, ret;
1111296417Sdim
1112296417Sdim	AUDIT_ARG_VALUE((int)idtype);	/* XXX - This is likely wrong! */
1113296417Sdim	AUDIT_ARG_PID((pid_t)id);	/* XXX - This may be wrong! */
1114296417Sdim	AUDIT_ARG_VALUE(options);
1115296417Sdim
1116296417Sdim	q = td->td_proc;
1117296417Sdim
1118296417Sdim	if ((pid_t)id == WAIT_MYPGRP && (idtype == P_PID || idtype == P_PGID)) {
1119296417Sdim		PROC_LOCK(q);
1120296417Sdim		id = (id_t)q->p_pgid;
1121296417Sdim		PROC_UNLOCK(q);
1122296417Sdim		idtype = P_PGID;
1123296417Sdim	}
1124296417Sdim
1125296417Sdim	/* If we don't know the option, just return. */
1126296417Sdim	if ((options & ~(WUNTRACED | WNOHANG | WCONTINUED | WNOWAIT |
1127296417Sdim	    WEXITED | WTRAPPED | WLINUXCLONE)) != 0)
1128296417Sdim		return (EINVAL);
1129296417Sdim	if ((options & (WEXITED | WUNTRACED | WCONTINUED | WTRAPPED)) == 0) {
1130296417Sdim		/*
1131296417Sdim		 * We will be unable to find any matching processes,
1132296417Sdim		 * because there are no known events to look for.
1133296417Sdim		 * Prefer to return error instead of blocking
1134296417Sdim		 * indefinitely.
1135296417Sdim		 */
1136296417Sdim		return (EINVAL);
1137296417Sdim	}
1138296417Sdim
1139296417Sdimloop:
1140296417Sdim	if (q->p_flag & P_STATCHILD) {
1141296417Sdim		PROC_LOCK(q);
1142296417Sdim		q->p_flag &= ~P_STATCHILD;
1143296417Sdim		PROC_UNLOCK(q);
1144296417Sdim	}
1145296417Sdim	nfound = 0;
1146296417Sdim	sx_xlock(&proctree_lock);
1147296417Sdim	LIST_FOREACH(p, &q->p_children, p_sibling) {
1148296417Sdim		pid = p->p_pid;
1149296417Sdim		ret = proc_to_reap(td, p, idtype, id, status, options,
1150296417Sdim		    wrusage, siginfo, 0);
1151296417Sdim		if (ret == 0)
1152296417Sdim			continue;
1153296417Sdim		else if (ret == 1)
1154296417Sdim			nfound++;
1155296417Sdim		else {
1156296417Sdim			td->td_retval[0] = pid;
1157296417Sdim			return (0);
1158296417Sdim		}
1159296417Sdim
1160296417Sdim		PROC_LOCK(p);
1161296417Sdim		PROC_SLOCK(p);
1162296417Sdim
1163296417Sdim		if ((options & WTRAPPED) != 0 &&
1164296417Sdim		    (p->p_flag & P_TRACED) != 0 &&
1165296417Sdim		    (p->p_flag & (P_STOPPED_TRACE | P_STOPPED_SIG)) != 0 &&
1166296417Sdim		    (p->p_suspcount == p->p_numthreads) &&
1167296417Sdim		    ((p->p_flag & P_WAITED) == 0)) {
1168296417Sdim			PROC_SUNLOCK(p);
1169296417Sdim			if ((options & WNOWAIT) == 0)
1170296417Sdim				p->p_flag |= P_WAITED;
1171296417Sdim			sx_xunlock(&proctree_lock);
1172296417Sdim
1173296417Sdim			if (status != NULL)
1174296417Sdim				*status = W_STOPCODE(p->p_xsig);
1175296417Sdim			if (siginfo != NULL) {
1176296417Sdim				siginfo->si_status = p->p_xsig;
1177296417Sdim				siginfo->si_code = CLD_TRAPPED;
1178296417Sdim			}
1179296417Sdim			if ((options & WNOWAIT) == 0) {
1180296417Sdim				PROC_LOCK(q);
1181296417Sdim				sigqueue_take(p->p_ksi);
1182296417Sdim				PROC_UNLOCK(q);
1183296417Sdim			}
1184296417Sdim
1185254721Semaste			CTR4(KTR_PTRACE,
1186296417Sdim	    "wait: returning trapped pid %d status %#x (xstat %d) xthread %d",
1187296417Sdim			    p->p_pid, W_STOPCODE(p->p_xsig), p->p_xsig,
1188296417Sdim			    p->p_xthread != NULL ? p->p_xthread->td_tid : -1);
1189296417Sdim			PROC_UNLOCK(p);
1190296417Sdim			td->td_retval[0] = pid;
1191296417Sdim			return (0);
1192296417Sdim		}
1193296417Sdim		if ((options & WUNTRACED) != 0 &&
1194296417Sdim		    (p->p_flag & P_STOPPED_SIG) != 0 &&
1195296417Sdim		    (p->p_suspcount == p->p_numthreads) &&
1196296417Sdim		    ((p->p_flag & P_WAITED) == 0)) {
1197296417Sdim			PROC_SUNLOCK(p);
1198296417Sdim			if ((options & WNOWAIT) == 0)
1199296417Sdim				p->p_flag |= P_WAITED;
1200296417Sdim			sx_xunlock(&proctree_lock);
1201296417Sdim
1202254721Semaste			if (status != NULL)
1203254721Semaste				*status = W_STOPCODE(p->p_xsig);
1204254721Semaste			if (siginfo != NULL) {
1205254721Semaste				siginfo->si_status = p->p_xsig;
1206254721Semaste				siginfo->si_code = CLD_STOPPED;
1207254721Semaste			}
1208254721Semaste			if ((options & WNOWAIT) == 0) {
1209254721Semaste				PROC_LOCK(q);
1210254721Semaste				sigqueue_take(p->p_ksi);
1211254721Semaste				PROC_UNLOCK(q);
1212254721Semaste			}
1213276479Sdim
1214254721Semaste			PROC_UNLOCK(p);
1215254721Semaste			td->td_retval[0] = pid;
1216254721Semaste			return (0);
1217254721Semaste		}
1218296417Sdim		PROC_SUNLOCK(p);
1219296417Sdim		if ((options & WCONTINUED) != 0 &&
1220296417Sdim		    (p->p_flag & P_CONTINUED) != 0) {
1221254721Semaste			sx_xunlock(&proctree_lock);
1222254721Semaste			if ((options & WNOWAIT) == 0) {
1223254721Semaste				p->p_flag &= ~P_CONTINUED;
1224254721Semaste				PROC_LOCK(q);
1225296417Sdim				sigqueue_take(p->p_ksi);
1226296417Sdim				PROC_UNLOCK(q);
1227296417Sdim			}
1228296417Sdim			PROC_UNLOCK(p);
1229254721Semaste
1230254721Semaste			if (status != NULL)
1231254721Semaste				*status = SIGCONT;
1232254721Semaste			if (siginfo != NULL) {
1233254721Semaste				siginfo->si_status = SIGCONT;
1234254721Semaste				siginfo->si_code = CLD_CONTINUED;
1235254721Semaste			}
1236254721Semaste			td->td_retval[0] = pid;
1237296417Sdim			return (0);
1238296417Sdim		}
1239296417Sdim		PROC_UNLOCK(p);
1240296417Sdim	}
1241296417Sdim
1242296417Sdim	/*
1243296417Sdim	 * Look in the orphans list too, to allow the parent to
1244296417Sdim	 * collect it's child exit status even if child is being
1245296417Sdim	 * debugged.
1246296417Sdim	 *
1247296417Sdim	 * Debugger detaches from the parent upon successful
1248296417Sdim	 * switch-over from parent to child.  At this point due to
1249296417Sdim	 * re-parenting the parent loses the child to debugger and a
1250296417Sdim	 * wait4(2) call would report that it has no children to wait
1251296417Sdim	 * for.  By maintaining a list of orphans we allow the parent
1252296417Sdim	 * to successfully wait until the child becomes a zombie.
1253296417Sdim	 */
1254296417Sdim	if (nfound == 0) {
1255296417Sdim		LIST_FOREACH(p, &q->p_orphans, p_orphan) {
1256296417Sdim			ret = proc_to_reap(td, p, idtype, id, NULL, options,
1257296417Sdim			    NULL, NULL, 1);
1258296417Sdim			if (ret != 0) {
1259296417Sdim				KASSERT(ret != -1, ("reaped an orphan (pid %d)",
1260296417Sdim				    (int)td->td_retval[0]));
1261296417Sdim				nfound++;
1262296417Sdim				break;
1263296417Sdim			}
1264296417Sdim		}
1265296417Sdim	}
1266296417Sdim	if (nfound == 0) {
1267254721Semaste		sx_xunlock(&proctree_lock);
1268254721Semaste		return (ECHILD);
1269296417Sdim	}
1270	if (options & WNOHANG) {
1271		sx_xunlock(&proctree_lock);
1272		td->td_retval[0] = 0;
1273		return (0);
1274	}
1275	PROC_LOCK(q);
1276	sx_xunlock(&proctree_lock);
1277	if (q->p_flag & P_STATCHILD) {
1278		q->p_flag &= ~P_STATCHILD;
1279		error = 0;
1280	} else
1281		error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "wait", 0);
1282	PROC_UNLOCK(q);
1283	if (error)
1284		return (error);
1285	goto loop;
1286}
1287
1288/*
1289 * Make process 'parent' the new parent of process 'child'.
1290 * Must be called with an exclusive hold of proctree lock.
1291 */
1292void
1293proc_reparent(struct proc *child, struct proc *parent)
1294{
1295
1296	sx_assert(&proctree_lock, SX_XLOCKED);
1297	PROC_LOCK_ASSERT(child, MA_OWNED);
1298	if (child->p_pptr == parent)
1299		return;
1300
1301	PROC_LOCK(child->p_pptr);
1302	sigqueue_take(child->p_ksi);
1303	PROC_UNLOCK(child->p_pptr);
1304	LIST_REMOVE(child, p_sibling);
1305	LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
1306
1307	clear_orphan(child);
1308	if (child->p_flag & P_TRACED) {
1309		if (LIST_EMPTY(&child->p_pptr->p_orphans)) {
1310			child->p_treeflag |= P_TREE_FIRST_ORPHAN;
1311			LIST_INSERT_HEAD(&child->p_pptr->p_orphans, child,
1312			    p_orphan);
1313		} else {
1314			LIST_INSERT_AFTER(LIST_FIRST(&child->p_pptr->p_orphans),
1315			    child, p_orphan);
1316		}
1317		child->p_treeflag |= P_TREE_ORPHANED;
1318	}
1319
1320	child->p_pptr = parent;
1321}
1322