kern_proc.c revision 287645
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/kern_proc.c 287645 2015-09-11 03:54:37Z markj $");
34
35#include "opt_compat.h"
36#include "opt_ddb.h"
37#include "opt_ktrace.h"
38#include "opt_kstack_pages.h"
39#include "opt_stack.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/elf.h>
44#include <sys/eventhandler.h>
45#include <sys/exec.h>
46#include <sys/jail.h>
47#include <sys/kernel.h>
48#include <sys/limits.h>
49#include <sys/lock.h>
50#include <sys/loginclass.h>
51#include <sys/malloc.h>
52#include <sys/mman.h>
53#include <sys/mount.h>
54#include <sys/mutex.h>
55#include <sys/proc.h>
56#include <sys/ptrace.h>
57#include <sys/refcount.h>
58#include <sys/resourcevar.h>
59#include <sys/rwlock.h>
60#include <sys/sbuf.h>
61#include <sys/sysent.h>
62#include <sys/sched.h>
63#include <sys/smp.h>
64#include <sys/stack.h>
65#include <sys/stat.h>
66#include <sys/sysctl.h>
67#include <sys/filedesc.h>
68#include <sys/tty.h>
69#include <sys/signalvar.h>
70#include <sys/sdt.h>
71#include <sys/sx.h>
72#include <sys/user.h>
73#include <sys/vnode.h>
74#include <sys/wait.h>
75
76#ifdef DDB
77#include <ddb/ddb.h>
78#endif
79
80#include <vm/vm.h>
81#include <vm/vm_param.h>
82#include <vm/vm_extern.h>
83#include <vm/pmap.h>
84#include <vm/vm_map.h>
85#include <vm/vm_object.h>
86#include <vm/vm_page.h>
87#include <vm/uma.h>
88
89#ifdef COMPAT_FREEBSD32
90#include <compat/freebsd32/freebsd32.h>
91#include <compat/freebsd32/freebsd32_util.h>
92#endif
93
94SDT_PROVIDER_DEFINE(proc);
95SDT_PROBE_DEFINE4(proc, kernel, ctor, entry, "struct proc *", "int",
96    "void *", "int");
97SDT_PROBE_DEFINE4(proc, kernel, ctor, return, "struct proc *", "int",
98    "void *", "int");
99SDT_PROBE_DEFINE4(proc, kernel, dtor, entry, "struct proc *", "int",
100    "void *", "struct thread *");
101SDT_PROBE_DEFINE3(proc, kernel, dtor, return, "struct proc *", "int",
102    "void *");
103SDT_PROBE_DEFINE3(proc, kernel, init, entry, "struct proc *", "int",
104    "int");
105SDT_PROBE_DEFINE3(proc, kernel, init, return, "struct proc *", "int",
106    "int");
107
108MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
109MALLOC_DEFINE(M_SESSION, "session", "session header");
110static MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
111MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
112
113static void doenterpgrp(struct proc *, struct pgrp *);
114static void orphanpg(struct pgrp *pg);
115static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp);
116static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp);
117static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp,
118    int preferthread);
119static void pgadjustjobc(struct pgrp *pgrp, int entering);
120static void pgdelete(struct pgrp *);
121static int proc_ctor(void *mem, int size, void *arg, int flags);
122static void proc_dtor(void *mem, int size, void *arg);
123static int proc_init(void *mem, int size, int flags);
124static void proc_fini(void *mem, int size);
125static void pargs_free(struct pargs *pa);
126static struct proc *zpfind_locked(pid_t pid);
127
128/*
129 * Other process lists
130 */
131struct pidhashhead *pidhashtbl;
132u_long pidhash;
133struct pgrphashhead *pgrphashtbl;
134u_long pgrphash;
135struct proclist allproc;
136struct proclist zombproc;
137struct sx allproc_lock;
138struct sx proctree_lock;
139struct mtx ppeers_lock;
140uma_zone_t proc_zone;
141
142int kstack_pages = KSTACK_PAGES;
143SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0,
144    "Kernel stack size in pages");
145static int vmmap_skip_res_cnt = 0;
146SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW,
147    &vmmap_skip_res_cnt, 0,
148    "Skip calculation of the pages resident count in kern.proc.vmmap");
149
150CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
151#ifdef COMPAT_FREEBSD32
152CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE);
153#endif
154
155/*
156 * Initialize global process hashing structures.
157 */
158void
159procinit()
160{
161
162	sx_init(&allproc_lock, "allproc");
163	sx_init(&proctree_lock, "proctree");
164	mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
165	LIST_INIT(&allproc);
166	LIST_INIT(&zombproc);
167	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
168	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
169	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
170	    proc_ctor, proc_dtor, proc_init, proc_fini,
171	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
172	uihashinit();
173}
174
175/*
176 * Prepare a proc for use.
177 */
178static int
179proc_ctor(void *mem, int size, void *arg, int flags)
180{
181	struct proc *p;
182
183	p = (struct proc *)mem;
184	SDT_PROBE(proc, kernel, ctor , entry, p, size, arg, flags, 0);
185	EVENTHANDLER_INVOKE(process_ctor, p);
186	SDT_PROBE(proc, kernel, ctor , return, p, size, arg, flags, 0);
187	return (0);
188}
189
190/*
191 * Reclaim a proc after use.
192 */
193static void
194proc_dtor(void *mem, int size, void *arg)
195{
196	struct proc *p;
197	struct thread *td;
198
199	/* INVARIANTS checks go here */
200	p = (struct proc *)mem;
201	td = FIRST_THREAD_IN_PROC(p);
202	SDT_PROBE(proc, kernel, dtor, entry, p, size, arg, td, 0);
203	if (td != NULL) {
204#ifdef INVARIANTS
205		KASSERT((p->p_numthreads == 1),
206		    ("bad number of threads in exiting process"));
207		KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr"));
208#endif
209		/* Free all OSD associated to this thread. */
210		osd_thread_exit(td);
211	}
212	EVENTHANDLER_INVOKE(process_dtor, p);
213	if (p->p_ksi != NULL)
214		KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue"));
215	SDT_PROBE(proc, kernel, dtor, return, p, size, arg, 0, 0);
216}
217
218/*
219 * Initialize type-stable parts of a proc (when newly created).
220 */
221static int
222proc_init(void *mem, int size, int flags)
223{
224	struct proc *p;
225
226	p = (struct proc *)mem;
227	SDT_PROBE(proc, kernel, init, entry, p, size, flags, 0, 0);
228	p->p_sched = (struct p_sched *)&p[1];
229	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW);
230	mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_NEW);
231	mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW);
232	mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW);
233	mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW);
234	cv_init(&p->p_pwait, "ppwait");
235	cv_init(&p->p_dbgwait, "dbgwait");
236	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
237	EVENTHANDLER_INVOKE(process_init, p);
238	p->p_stats = pstats_alloc();
239	SDT_PROBE(proc, kernel, init, return, p, size, flags, 0, 0);
240	return (0);
241}
242
243/*
244 * UMA should ensure that this function is never called.
245 * Freeing a proc structure would violate type stability.
246 */
247static void
248proc_fini(void *mem, int size)
249{
250#ifdef notnow
251	struct proc *p;
252
253	p = (struct proc *)mem;
254	EVENTHANDLER_INVOKE(process_fini, p);
255	pstats_free(p->p_stats);
256	thread_free(FIRST_THREAD_IN_PROC(p));
257	mtx_destroy(&p->p_mtx);
258	if (p->p_ksi != NULL)
259		ksiginfo_free(p->p_ksi);
260#else
261	panic("proc reclaimed");
262#endif
263}
264
265/*
266 * Is p an inferior of the current process?
267 */
268int
269inferior(struct proc *p)
270{
271
272	sx_assert(&proctree_lock, SX_LOCKED);
273	PROC_LOCK_ASSERT(p, MA_OWNED);
274	for (; p != curproc; p = proc_realparent(p)) {
275		if (p->p_pid == 0)
276			return (0);
277	}
278	return (1);
279}
280
281struct proc *
282pfind_locked(pid_t pid)
283{
284	struct proc *p;
285
286	sx_assert(&allproc_lock, SX_LOCKED);
287	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
288		if (p->p_pid == pid) {
289			PROC_LOCK(p);
290			if (p->p_state == PRS_NEW) {
291				PROC_UNLOCK(p);
292				p = NULL;
293			}
294			break;
295		}
296	}
297	return (p);
298}
299
300/*
301 * Locate a process by number; return only "live" processes -- i.e., neither
302 * zombies nor newly born but incompletely initialized processes.  By not
303 * returning processes in the PRS_NEW state, we allow callers to avoid
304 * testing for that condition to avoid dereferencing p_ucred, et al.
305 */
306struct proc *
307pfind(pid_t pid)
308{
309	struct proc *p;
310
311	sx_slock(&allproc_lock);
312	p = pfind_locked(pid);
313	sx_sunlock(&allproc_lock);
314	return (p);
315}
316
317static struct proc *
318pfind_tid_locked(pid_t tid)
319{
320	struct proc *p;
321	struct thread *td;
322
323	sx_assert(&allproc_lock, SX_LOCKED);
324	FOREACH_PROC_IN_SYSTEM(p) {
325		PROC_LOCK(p);
326		if (p->p_state == PRS_NEW) {
327			PROC_UNLOCK(p);
328			continue;
329		}
330		FOREACH_THREAD_IN_PROC(p, td) {
331			if (td->td_tid == tid)
332				goto found;
333		}
334		PROC_UNLOCK(p);
335	}
336found:
337	return (p);
338}
339
340/*
341 * Locate a process group by number.
342 * The caller must hold proctree_lock.
343 */
344struct pgrp *
345pgfind(pgid)
346	register pid_t pgid;
347{
348	register struct pgrp *pgrp;
349
350	sx_assert(&proctree_lock, SX_LOCKED);
351
352	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
353		if (pgrp->pg_id == pgid) {
354			PGRP_LOCK(pgrp);
355			return (pgrp);
356		}
357	}
358	return (NULL);
359}
360
361/*
362 * Locate process and do additional manipulations, depending on flags.
363 */
364int
365pget(pid_t pid, int flags, struct proc **pp)
366{
367	struct proc *p;
368	int error;
369
370	sx_slock(&allproc_lock);
371	if (pid <= PID_MAX) {
372		p = pfind_locked(pid);
373		if (p == NULL && (flags & PGET_NOTWEXIT) == 0)
374			p = zpfind_locked(pid);
375	} else if ((flags & PGET_NOTID) == 0) {
376		p = pfind_tid_locked(pid);
377	} else {
378		p = NULL;
379	}
380	sx_sunlock(&allproc_lock);
381	if (p == NULL)
382		return (ESRCH);
383	if ((flags & PGET_CANSEE) != 0) {
384		error = p_cansee(curthread, p);
385		if (error != 0)
386			goto errout;
387	}
388	if ((flags & PGET_CANDEBUG) != 0) {
389		error = p_candebug(curthread, p);
390		if (error != 0)
391			goto errout;
392	}
393	if ((flags & PGET_ISCURRENT) != 0 && curproc != p) {
394		error = EPERM;
395		goto errout;
396	}
397	if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) {
398		error = ESRCH;
399		goto errout;
400	}
401	if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) {
402		/*
403		 * XXXRW: Not clear ESRCH is the right error during proc
404		 * execve().
405		 */
406		error = ESRCH;
407		goto errout;
408	}
409	if ((flags & PGET_HOLD) != 0) {
410		_PHOLD(p);
411		PROC_UNLOCK(p);
412	}
413	*pp = p;
414	return (0);
415errout:
416	PROC_UNLOCK(p);
417	return (error);
418}
419
420/*
421 * Create a new process group.
422 * pgid must be equal to the pid of p.
423 * Begin a new session if required.
424 */
425int
426enterpgrp(p, pgid, pgrp, sess)
427	register struct proc *p;
428	pid_t pgid;
429	struct pgrp *pgrp;
430	struct session *sess;
431{
432
433	sx_assert(&proctree_lock, SX_XLOCKED);
434
435	KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL"));
436	KASSERT(p->p_pid == pgid,
437	    ("enterpgrp: new pgrp and pid != pgid"));
438	KASSERT(pgfind(pgid) == NULL,
439	    ("enterpgrp: pgrp with pgid exists"));
440	KASSERT(!SESS_LEADER(p),
441	    ("enterpgrp: session leader attempted setpgrp"));
442
443	mtx_init(&pgrp->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
444
445	if (sess != NULL) {
446		/*
447		 * new session
448		 */
449		mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF);
450		PROC_LOCK(p);
451		p->p_flag &= ~P_CONTROLT;
452		PROC_UNLOCK(p);
453		PGRP_LOCK(pgrp);
454		sess->s_leader = p;
455		sess->s_sid = p->p_pid;
456		refcount_init(&sess->s_count, 1);
457		sess->s_ttyvp = NULL;
458		sess->s_ttydp = NULL;
459		sess->s_ttyp = NULL;
460		bcopy(p->p_session->s_login, sess->s_login,
461			    sizeof(sess->s_login));
462		pgrp->pg_session = sess;
463		KASSERT(p == curproc,
464		    ("enterpgrp: mksession and p != curproc"));
465	} else {
466		pgrp->pg_session = p->p_session;
467		sess_hold(pgrp->pg_session);
468		PGRP_LOCK(pgrp);
469	}
470	pgrp->pg_id = pgid;
471	LIST_INIT(&pgrp->pg_members);
472
473	/*
474	 * As we have an exclusive lock of proctree_lock,
475	 * this should not deadlock.
476	 */
477	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
478	pgrp->pg_jobc = 0;
479	SLIST_INIT(&pgrp->pg_sigiolst);
480	PGRP_UNLOCK(pgrp);
481
482	doenterpgrp(p, pgrp);
483
484	return (0);
485}
486
487/*
488 * Move p to an existing process group
489 */
490int
491enterthispgrp(p, pgrp)
492	register struct proc *p;
493	struct pgrp *pgrp;
494{
495
496	sx_assert(&proctree_lock, SX_XLOCKED);
497	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
498	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
499	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
500	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
501	KASSERT(pgrp->pg_session == p->p_session,
502		("%s: pgrp's session %p, p->p_session %p.\n",
503		__func__,
504		pgrp->pg_session,
505		p->p_session));
506	KASSERT(pgrp != p->p_pgrp,
507		("%s: p belongs to pgrp.", __func__));
508
509	doenterpgrp(p, pgrp);
510
511	return (0);
512}
513
514/*
515 * Move p to a process group
516 */
517static void
518doenterpgrp(p, pgrp)
519	struct proc *p;
520	struct pgrp *pgrp;
521{
522	struct pgrp *savepgrp;
523
524	sx_assert(&proctree_lock, SX_XLOCKED);
525	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
526	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
527	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
528	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
529
530	savepgrp = p->p_pgrp;
531
532	/*
533	 * Adjust eligibility of affected pgrps to participate in job control.
534	 * Increment eligibility counts before decrementing, otherwise we
535	 * could reach 0 spuriously during the first call.
536	 */
537	fixjobc(p, pgrp, 1);
538	fixjobc(p, p->p_pgrp, 0);
539
540	PGRP_LOCK(pgrp);
541	PGRP_LOCK(savepgrp);
542	PROC_LOCK(p);
543	LIST_REMOVE(p, p_pglist);
544	p->p_pgrp = pgrp;
545	PROC_UNLOCK(p);
546	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
547	PGRP_UNLOCK(savepgrp);
548	PGRP_UNLOCK(pgrp);
549	if (LIST_EMPTY(&savepgrp->pg_members))
550		pgdelete(savepgrp);
551}
552
553/*
554 * remove process from process group
555 */
556int
557leavepgrp(p)
558	register struct proc *p;
559{
560	struct pgrp *savepgrp;
561
562	sx_assert(&proctree_lock, SX_XLOCKED);
563	savepgrp = p->p_pgrp;
564	PGRP_LOCK(savepgrp);
565	PROC_LOCK(p);
566	LIST_REMOVE(p, p_pglist);
567	p->p_pgrp = NULL;
568	PROC_UNLOCK(p);
569	PGRP_UNLOCK(savepgrp);
570	if (LIST_EMPTY(&savepgrp->pg_members))
571		pgdelete(savepgrp);
572	return (0);
573}
574
575/*
576 * delete a process group
577 */
578static void
579pgdelete(pgrp)
580	register struct pgrp *pgrp;
581{
582	struct session *savesess;
583	struct tty *tp;
584
585	sx_assert(&proctree_lock, SX_XLOCKED);
586	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
587	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
588
589	/*
590	 * Reset any sigio structures pointing to us as a result of
591	 * F_SETOWN with our pgid.
592	 */
593	funsetownlst(&pgrp->pg_sigiolst);
594
595	PGRP_LOCK(pgrp);
596	tp = pgrp->pg_session->s_ttyp;
597	LIST_REMOVE(pgrp, pg_hash);
598	savesess = pgrp->pg_session;
599	PGRP_UNLOCK(pgrp);
600
601	/* Remove the reference to the pgrp before deallocating it. */
602	if (tp != NULL) {
603		tty_lock(tp);
604		tty_rel_pgrp(tp, pgrp);
605	}
606
607	mtx_destroy(&pgrp->pg_mtx);
608	free(pgrp, M_PGRP);
609	sess_release(savesess);
610}
611
612static void
613pgadjustjobc(pgrp, entering)
614	struct pgrp *pgrp;
615	int entering;
616{
617
618	PGRP_LOCK(pgrp);
619	if (entering)
620		pgrp->pg_jobc++;
621	else {
622		--pgrp->pg_jobc;
623		if (pgrp->pg_jobc == 0)
624			orphanpg(pgrp);
625	}
626	PGRP_UNLOCK(pgrp);
627}
628
629/*
630 * Adjust pgrp jobc counters when specified process changes process group.
631 * We count the number of processes in each process group that "qualify"
632 * the group for terminal job control (those with a parent in a different
633 * process group of the same session).  If that count reaches zero, the
634 * process group becomes orphaned.  Check both the specified process'
635 * process group and that of its children.
636 * entering == 0 => p is leaving specified group.
637 * entering == 1 => p is entering specified group.
638 */
639void
640fixjobc(p, pgrp, entering)
641	register struct proc *p;
642	register struct pgrp *pgrp;
643	int entering;
644{
645	register struct pgrp *hispgrp;
646	register struct session *mysession;
647
648	sx_assert(&proctree_lock, SX_LOCKED);
649	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
650	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
651	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
652
653	/*
654	 * Check p's parent to see whether p qualifies its own process
655	 * group; if so, adjust count for p's process group.
656	 */
657	mysession = pgrp->pg_session;
658	if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
659	    hispgrp->pg_session == mysession)
660		pgadjustjobc(pgrp, entering);
661
662	/*
663	 * Check this process' children to see whether they qualify
664	 * their process groups; if so, adjust counts for children's
665	 * process groups.
666	 */
667	LIST_FOREACH(p, &p->p_children, p_sibling) {
668		hispgrp = p->p_pgrp;
669		if (hispgrp == pgrp ||
670		    hispgrp->pg_session != mysession)
671			continue;
672		PROC_LOCK(p);
673		if (p->p_state == PRS_ZOMBIE) {
674			PROC_UNLOCK(p);
675			continue;
676		}
677		PROC_UNLOCK(p);
678		pgadjustjobc(hispgrp, entering);
679	}
680}
681
682/*
683 * A process group has become orphaned;
684 * if there are any stopped processes in the group,
685 * hang-up all process in that group.
686 */
687static void
688orphanpg(pg)
689	struct pgrp *pg;
690{
691	register struct proc *p;
692
693	PGRP_LOCK_ASSERT(pg, MA_OWNED);
694
695	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
696		PROC_LOCK(p);
697		if (P_SHOULDSTOP(p)) {
698			PROC_UNLOCK(p);
699			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
700				PROC_LOCK(p);
701				kern_psignal(p, SIGHUP);
702				kern_psignal(p, SIGCONT);
703				PROC_UNLOCK(p);
704			}
705			return;
706		}
707		PROC_UNLOCK(p);
708	}
709}
710
711void
712sess_hold(struct session *s)
713{
714
715	refcount_acquire(&s->s_count);
716}
717
718void
719sess_release(struct session *s)
720{
721
722	if (refcount_release(&s->s_count)) {
723		if (s->s_ttyp != NULL) {
724			tty_lock(s->s_ttyp);
725			tty_rel_sess(s->s_ttyp, s);
726		}
727		mtx_destroy(&s->s_mtx);
728		free(s, M_SESSION);
729	}
730}
731
732#ifdef DDB
733
734DB_SHOW_COMMAND(pgrpdump, pgrpdump)
735{
736	register struct pgrp *pgrp;
737	register struct proc *p;
738	register int i;
739
740	for (i = 0; i <= pgrphash; i++) {
741		if (!LIST_EMPTY(&pgrphashtbl[i])) {
742			printf("\tindx %d\n", i);
743			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
744				printf(
745			"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
746				    (void *)pgrp, (long)pgrp->pg_id,
747				    (void *)pgrp->pg_session,
748				    pgrp->pg_session->s_count,
749				    (void *)LIST_FIRST(&pgrp->pg_members));
750				LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
751					printf("\t\tpid %ld addr %p pgrp %p\n",
752					    (long)p->p_pid, (void *)p,
753					    (void *)p->p_pgrp);
754				}
755			}
756		}
757	}
758}
759#endif /* DDB */
760
761/*
762 * Calculate the kinfo_proc members which contain process-wide
763 * informations.
764 * Must be called with the target process locked.
765 */
766static void
767fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp)
768{
769	struct thread *td;
770
771	PROC_LOCK_ASSERT(p, MA_OWNED);
772
773	kp->ki_estcpu = 0;
774	kp->ki_pctcpu = 0;
775	FOREACH_THREAD_IN_PROC(p, td) {
776		thread_lock(td);
777		kp->ki_pctcpu += sched_pctcpu(td);
778		kp->ki_estcpu += td->td_estcpu;
779		thread_unlock(td);
780	}
781}
782
783/*
784 * Clear kinfo_proc and fill in any information that is common
785 * to all threads in the process.
786 * Must be called with the target process locked.
787 */
788static void
789fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
790{
791	struct thread *td0;
792	struct tty *tp;
793	struct session *sp;
794	struct ucred *cred;
795	struct sigacts *ps;
796
797	/* For proc_realparent. */
798	sx_assert(&proctree_lock, SX_LOCKED);
799	PROC_LOCK_ASSERT(p, MA_OWNED);
800	bzero(kp, sizeof(*kp));
801
802	kp->ki_structsize = sizeof(*kp);
803	kp->ki_paddr = p;
804	kp->ki_addr =/* p->p_addr; */0; /* XXX */
805	kp->ki_args = p->p_args;
806	kp->ki_textvp = p->p_textvp;
807#ifdef KTRACE
808	kp->ki_tracep = p->p_tracevp;
809	kp->ki_traceflag = p->p_traceflag;
810#endif
811	kp->ki_fd = p->p_fd;
812	kp->ki_vmspace = p->p_vmspace;
813	kp->ki_flag = p->p_flag;
814	kp->ki_flag2 = p->p_flag2;
815	cred = p->p_ucred;
816	if (cred) {
817		kp->ki_uid = cred->cr_uid;
818		kp->ki_ruid = cred->cr_ruid;
819		kp->ki_svuid = cred->cr_svuid;
820		kp->ki_cr_flags = 0;
821		if (cred->cr_flags & CRED_FLAG_CAPMODE)
822			kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE;
823		/* XXX bde doesn't like KI_NGROUPS */
824		if (cred->cr_ngroups > KI_NGROUPS) {
825			kp->ki_ngroups = KI_NGROUPS;
826			kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
827		} else
828			kp->ki_ngroups = cred->cr_ngroups;
829		bcopy(cred->cr_groups, kp->ki_groups,
830		    kp->ki_ngroups * sizeof(gid_t));
831		kp->ki_rgid = cred->cr_rgid;
832		kp->ki_svgid = cred->cr_svgid;
833		/* If jailed(cred), emulate the old P_JAILED flag. */
834		if (jailed(cred)) {
835			kp->ki_flag |= P_JAILED;
836			/* If inside the jail, use 0 as a jail ID. */
837			if (cred->cr_prison != curthread->td_ucred->cr_prison)
838				kp->ki_jid = cred->cr_prison->pr_id;
839		}
840		strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name,
841		    sizeof(kp->ki_loginclass));
842	}
843	ps = p->p_sigacts;
844	if (ps) {
845		mtx_lock(&ps->ps_mtx);
846		kp->ki_sigignore = ps->ps_sigignore;
847		kp->ki_sigcatch = ps->ps_sigcatch;
848		mtx_unlock(&ps->ps_mtx);
849	}
850	if (p->p_state != PRS_NEW &&
851	    p->p_state != PRS_ZOMBIE &&
852	    p->p_vmspace != NULL) {
853		struct vmspace *vm = p->p_vmspace;
854
855		kp->ki_size = vm->vm_map.size;
856		kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/
857		FOREACH_THREAD_IN_PROC(p, td0) {
858			if (!TD_IS_SWAPPED(td0))
859				kp->ki_rssize += td0->td_kstack_pages;
860		}
861		kp->ki_swrss = vm->vm_swrss;
862		kp->ki_tsize = vm->vm_tsize;
863		kp->ki_dsize = vm->vm_dsize;
864		kp->ki_ssize = vm->vm_ssize;
865	} else if (p->p_state == PRS_ZOMBIE)
866		kp->ki_stat = SZOMB;
867	if (kp->ki_flag & P_INMEM)
868		kp->ki_sflag = PS_INMEM;
869	else
870		kp->ki_sflag = 0;
871	/* Calculate legacy swtime as seconds since 'swtick'. */
872	kp->ki_swtime = (ticks - p->p_swtick) / hz;
873	kp->ki_pid = p->p_pid;
874	kp->ki_nice = p->p_nice;
875	kp->ki_fibnum = p->p_fibnum;
876	kp->ki_start = p->p_stats->p_start;
877	timevaladd(&kp->ki_start, &boottime);
878	PROC_STATLOCK(p);
879	rufetch(p, &kp->ki_rusage);
880	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
881	calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
882	PROC_STATUNLOCK(p);
883	calccru(p, &kp->ki_childutime, &kp->ki_childstime);
884	/* Some callers want child times in a single value. */
885	kp->ki_childtime = kp->ki_childstime;
886	timevaladd(&kp->ki_childtime, &kp->ki_childutime);
887
888	FOREACH_THREAD_IN_PROC(p, td0)
889		kp->ki_cow += td0->td_cow;
890
891	tp = NULL;
892	if (p->p_pgrp) {
893		kp->ki_pgid = p->p_pgrp->pg_id;
894		kp->ki_jobc = p->p_pgrp->pg_jobc;
895		sp = p->p_pgrp->pg_session;
896
897		if (sp != NULL) {
898			kp->ki_sid = sp->s_sid;
899			SESS_LOCK(sp);
900			strlcpy(kp->ki_login, sp->s_login,
901			    sizeof(kp->ki_login));
902			if (sp->s_ttyvp)
903				kp->ki_kiflag |= KI_CTTY;
904			if (SESS_LEADER(p))
905				kp->ki_kiflag |= KI_SLEADER;
906			/* XXX proctree_lock */
907			tp = sp->s_ttyp;
908			SESS_UNLOCK(sp);
909		}
910	}
911	if ((p->p_flag & P_CONTROLT) && tp != NULL) {
912		kp->ki_tdev = tty_udev(tp);
913		kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
914		if (tp->t_session)
915			kp->ki_tsid = tp->t_session->s_sid;
916	} else
917		kp->ki_tdev = NODEV;
918	if (p->p_comm[0] != '\0')
919		strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm));
920	if (p->p_sysent && p->p_sysent->sv_name != NULL &&
921	    p->p_sysent->sv_name[0] != '\0')
922		strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul));
923	kp->ki_siglist = p->p_siglist;
924	kp->ki_xstat = KW_EXITCODE(p->p_xexit, p->p_xsig);
925	kp->ki_acflag = p->p_acflag;
926	kp->ki_lock = p->p_lock;
927	if (p->p_pptr) {
928		kp->ki_ppid = proc_realparent(p)->p_pid;
929		if (p->p_flag & P_TRACED)
930			kp->ki_tracer = p->p_pptr->p_pid;
931	}
932}
933
934/*
935 * Fill in information that is thread specific.  Must be called with
936 * target process locked.  If 'preferthread' is set, overwrite certain
937 * process-related fields that are maintained for both threads and
938 * processes.
939 */
940static void
941fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread)
942{
943	struct proc *p;
944
945	p = td->td_proc;
946	kp->ki_tdaddr = td;
947	PROC_LOCK_ASSERT(p, MA_OWNED);
948
949	if (preferthread)
950		PROC_STATLOCK(p);
951	thread_lock(td);
952	if (td->td_wmesg != NULL)
953		strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
954	else
955		bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg));
956	strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname));
957	if (TD_ON_LOCK(td)) {
958		kp->ki_kiflag |= KI_LOCKBLOCK;
959		strlcpy(kp->ki_lockname, td->td_lockname,
960		    sizeof(kp->ki_lockname));
961	} else {
962		kp->ki_kiflag &= ~KI_LOCKBLOCK;
963		bzero(kp->ki_lockname, sizeof(kp->ki_lockname));
964	}
965
966	if (p->p_state == PRS_NORMAL) { /* approximate. */
967		if (TD_ON_RUNQ(td) ||
968		    TD_CAN_RUN(td) ||
969		    TD_IS_RUNNING(td)) {
970			kp->ki_stat = SRUN;
971		} else if (P_SHOULDSTOP(p)) {
972			kp->ki_stat = SSTOP;
973		} else if (TD_IS_SLEEPING(td)) {
974			kp->ki_stat = SSLEEP;
975		} else if (TD_ON_LOCK(td)) {
976			kp->ki_stat = SLOCK;
977		} else {
978			kp->ki_stat = SWAIT;
979		}
980	} else if (p->p_state == PRS_ZOMBIE) {
981		kp->ki_stat = SZOMB;
982	} else {
983		kp->ki_stat = SIDL;
984	}
985
986	/* Things in the thread */
987	kp->ki_wchan = td->td_wchan;
988	kp->ki_pri.pri_level = td->td_priority;
989	kp->ki_pri.pri_native = td->td_base_pri;
990
991	/*
992	 * Note: legacy fields; clamp at the old NOCPU value and/or
993	 * the maximum u_char CPU value.
994	 */
995	if (td->td_lastcpu == NOCPU)
996		kp->ki_lastcpu_old = NOCPU_OLD;
997	else if (td->td_lastcpu > MAXCPU_OLD)
998		kp->ki_lastcpu_old = MAXCPU_OLD;
999	else
1000		kp->ki_lastcpu_old = td->td_lastcpu;
1001
1002	if (td->td_oncpu == NOCPU)
1003		kp->ki_oncpu_old = NOCPU_OLD;
1004	else if (td->td_oncpu > MAXCPU_OLD)
1005		kp->ki_oncpu_old = MAXCPU_OLD;
1006	else
1007		kp->ki_oncpu_old = td->td_oncpu;
1008
1009	kp->ki_lastcpu = td->td_lastcpu;
1010	kp->ki_oncpu = td->td_oncpu;
1011	kp->ki_tdflags = td->td_flags;
1012	kp->ki_tid = td->td_tid;
1013	kp->ki_numthreads = p->p_numthreads;
1014	kp->ki_pcb = td->td_pcb;
1015	kp->ki_kstack = (void *)td->td_kstack;
1016	kp->ki_slptime = (ticks - td->td_slptick) / hz;
1017	kp->ki_pri.pri_class = td->td_pri_class;
1018	kp->ki_pri.pri_user = td->td_user_pri;
1019
1020	if (preferthread) {
1021		rufetchtd(td, &kp->ki_rusage);
1022		kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime);
1023		kp->ki_pctcpu = sched_pctcpu(td);
1024		kp->ki_estcpu = td->td_estcpu;
1025		kp->ki_cow = td->td_cow;
1026	}
1027
1028	/* We can't get this anymore but ps etc never used it anyway. */
1029	kp->ki_rqindex = 0;
1030
1031	if (preferthread)
1032		kp->ki_siglist = td->td_siglist;
1033	kp->ki_sigmask = td->td_sigmask;
1034	thread_unlock(td);
1035	if (preferthread)
1036		PROC_STATUNLOCK(p);
1037}
1038
1039/*
1040 * Fill in a kinfo_proc structure for the specified process.
1041 * Must be called with the target process locked.
1042 */
1043void
1044fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
1045{
1046
1047	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
1048
1049	fill_kinfo_proc_only(p, kp);
1050	fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0);
1051	fill_kinfo_aggregate(p, kp);
1052}
1053
1054struct pstats *
1055pstats_alloc(void)
1056{
1057
1058	return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK));
1059}
1060
1061/*
1062 * Copy parts of p_stats; zero the rest of p_stats (statistics).
1063 */
1064void
1065pstats_fork(struct pstats *src, struct pstats *dst)
1066{
1067
1068	bzero(&dst->pstat_startzero,
1069	    __rangeof(struct pstats, pstat_startzero, pstat_endzero));
1070	bcopy(&src->pstat_startcopy, &dst->pstat_startcopy,
1071	    __rangeof(struct pstats, pstat_startcopy, pstat_endcopy));
1072}
1073
1074void
1075pstats_free(struct pstats *ps)
1076{
1077
1078	free(ps, M_SUBPROC);
1079}
1080
1081static struct proc *
1082zpfind_locked(pid_t pid)
1083{
1084	struct proc *p;
1085
1086	sx_assert(&allproc_lock, SX_LOCKED);
1087	LIST_FOREACH(p, &zombproc, p_list) {
1088		if (p->p_pid == pid) {
1089			PROC_LOCK(p);
1090			break;
1091		}
1092	}
1093	return (p);
1094}
1095
1096/*
1097 * Locate a zombie process by number
1098 */
1099struct proc *
1100zpfind(pid_t pid)
1101{
1102	struct proc *p;
1103
1104	sx_slock(&allproc_lock);
1105	p = zpfind_locked(pid);
1106	sx_sunlock(&allproc_lock);
1107	return (p);
1108}
1109
1110#ifdef COMPAT_FREEBSD32
1111
1112/*
1113 * This function is typically used to copy out the kernel address, so
1114 * it can be replaced by assignment of zero.
1115 */
1116static inline uint32_t
1117ptr32_trim(void *ptr)
1118{
1119	uintptr_t uptr;
1120
1121	uptr = (uintptr_t)ptr;
1122	return ((uptr > UINT_MAX) ? 0 : uptr);
1123}
1124
1125#define PTRTRIM_CP(src,dst,fld) \
1126	do { (dst).fld = ptr32_trim((src).fld); } while (0)
1127
1128static void
1129freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32)
1130{
1131	int i;
1132
1133	bzero(ki32, sizeof(struct kinfo_proc32));
1134	ki32->ki_structsize = sizeof(struct kinfo_proc32);
1135	CP(*ki, *ki32, ki_layout);
1136	PTRTRIM_CP(*ki, *ki32, ki_args);
1137	PTRTRIM_CP(*ki, *ki32, ki_paddr);
1138	PTRTRIM_CP(*ki, *ki32, ki_addr);
1139	PTRTRIM_CP(*ki, *ki32, ki_tracep);
1140	PTRTRIM_CP(*ki, *ki32, ki_textvp);
1141	PTRTRIM_CP(*ki, *ki32, ki_fd);
1142	PTRTRIM_CP(*ki, *ki32, ki_vmspace);
1143	PTRTRIM_CP(*ki, *ki32, ki_wchan);
1144	CP(*ki, *ki32, ki_pid);
1145	CP(*ki, *ki32, ki_ppid);
1146	CP(*ki, *ki32, ki_pgid);
1147	CP(*ki, *ki32, ki_tpgid);
1148	CP(*ki, *ki32, ki_sid);
1149	CP(*ki, *ki32, ki_tsid);
1150	CP(*ki, *ki32, ki_jobc);
1151	CP(*ki, *ki32, ki_tdev);
1152	CP(*ki, *ki32, ki_siglist);
1153	CP(*ki, *ki32, ki_sigmask);
1154	CP(*ki, *ki32, ki_sigignore);
1155	CP(*ki, *ki32, ki_sigcatch);
1156	CP(*ki, *ki32, ki_uid);
1157	CP(*ki, *ki32, ki_ruid);
1158	CP(*ki, *ki32, ki_svuid);
1159	CP(*ki, *ki32, ki_rgid);
1160	CP(*ki, *ki32, ki_svgid);
1161	CP(*ki, *ki32, ki_ngroups);
1162	for (i = 0; i < KI_NGROUPS; i++)
1163		CP(*ki, *ki32, ki_groups[i]);
1164	CP(*ki, *ki32, ki_size);
1165	CP(*ki, *ki32, ki_rssize);
1166	CP(*ki, *ki32, ki_swrss);
1167	CP(*ki, *ki32, ki_tsize);
1168	CP(*ki, *ki32, ki_dsize);
1169	CP(*ki, *ki32, ki_ssize);
1170	CP(*ki, *ki32, ki_xstat);
1171	CP(*ki, *ki32, ki_acflag);
1172	CP(*ki, *ki32, ki_pctcpu);
1173	CP(*ki, *ki32, ki_estcpu);
1174	CP(*ki, *ki32, ki_slptime);
1175	CP(*ki, *ki32, ki_swtime);
1176	CP(*ki, *ki32, ki_cow);
1177	CP(*ki, *ki32, ki_runtime);
1178	TV_CP(*ki, *ki32, ki_start);
1179	TV_CP(*ki, *ki32, ki_childtime);
1180	CP(*ki, *ki32, ki_flag);
1181	CP(*ki, *ki32, ki_kiflag);
1182	CP(*ki, *ki32, ki_traceflag);
1183	CP(*ki, *ki32, ki_stat);
1184	CP(*ki, *ki32, ki_nice);
1185	CP(*ki, *ki32, ki_lock);
1186	CP(*ki, *ki32, ki_rqindex);
1187	CP(*ki, *ki32, ki_oncpu);
1188	CP(*ki, *ki32, ki_lastcpu);
1189
1190	/* XXX TODO: wrap cpu value as appropriate */
1191	CP(*ki, *ki32, ki_oncpu_old);
1192	CP(*ki, *ki32, ki_lastcpu_old);
1193
1194	bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1);
1195	bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1);
1196	bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1);
1197	bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1);
1198	bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
1199	bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
1200	bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
1201	CP(*ki, *ki32, ki_tracer);
1202	CP(*ki, *ki32, ki_flag2);
1203	CP(*ki, *ki32, ki_fibnum);
1204	CP(*ki, *ki32, ki_cr_flags);
1205	CP(*ki, *ki32, ki_jid);
1206	CP(*ki, *ki32, ki_numthreads);
1207	CP(*ki, *ki32, ki_tid);
1208	CP(*ki, *ki32, ki_pri);
1209	freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage);
1210	freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch);
1211	PTRTRIM_CP(*ki, *ki32, ki_pcb);
1212	PTRTRIM_CP(*ki, *ki32, ki_kstack);
1213	PTRTRIM_CP(*ki, *ki32, ki_udata);
1214	CP(*ki, *ki32, ki_sflag);
1215	CP(*ki, *ki32, ki_tdflags);
1216}
1217#endif
1218
1219int
1220kern_proc_out(struct proc *p, struct sbuf *sb, int flags)
1221{
1222	struct thread *td;
1223	struct kinfo_proc ki;
1224#ifdef COMPAT_FREEBSD32
1225	struct kinfo_proc32 ki32;
1226#endif
1227	int error;
1228
1229	PROC_LOCK_ASSERT(p, MA_OWNED);
1230	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
1231
1232	error = 0;
1233	fill_kinfo_proc(p, &ki);
1234	if ((flags & KERN_PROC_NOTHREADS) != 0) {
1235#ifdef COMPAT_FREEBSD32
1236		if ((flags & KERN_PROC_MASK32) != 0) {
1237			freebsd32_kinfo_proc_out(&ki, &ki32);
1238			if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
1239				error = ENOMEM;
1240		} else
1241#endif
1242			if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
1243				error = ENOMEM;
1244	} else {
1245		FOREACH_THREAD_IN_PROC(p, td) {
1246			fill_kinfo_thread(td, &ki, 1);
1247#ifdef COMPAT_FREEBSD32
1248			if ((flags & KERN_PROC_MASK32) != 0) {
1249				freebsd32_kinfo_proc_out(&ki, &ki32);
1250				if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
1251					error = ENOMEM;
1252			} else
1253#endif
1254				if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
1255					error = ENOMEM;
1256			if (error != 0)
1257				break;
1258		}
1259	}
1260	PROC_UNLOCK(p);
1261	return (error);
1262}
1263
1264static int
1265sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags,
1266    int doingzomb)
1267{
1268	struct sbuf sb;
1269	struct kinfo_proc ki;
1270	struct proc *np;
1271	int error, error2;
1272	pid_t pid;
1273
1274	pid = p->p_pid;
1275	sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req);
1276	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1277	error = kern_proc_out(p, &sb, flags);
1278	error2 = sbuf_finish(&sb);
1279	sbuf_delete(&sb);
1280	if (error != 0)
1281		return (error);
1282	else if (error2 != 0)
1283		return (error2);
1284	if (doingzomb)
1285		np = zpfind(pid);
1286	else {
1287		if (pid == 0)
1288			return (0);
1289		np = pfind(pid);
1290	}
1291	if (np == NULL)
1292		return (ESRCH);
1293	if (np != p) {
1294		PROC_UNLOCK(np);
1295		return (ESRCH);
1296	}
1297	PROC_UNLOCK(np);
1298	return (0);
1299}
1300
1301static int
1302sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
1303{
1304	int *name = (int *)arg1;
1305	u_int namelen = arg2;
1306	struct proc *p;
1307	int flags, doingzomb, oid_number;
1308	int error = 0;
1309
1310	oid_number = oidp->oid_number;
1311	if (oid_number != KERN_PROC_ALL &&
1312	    (oid_number & KERN_PROC_INC_THREAD) == 0)
1313		flags = KERN_PROC_NOTHREADS;
1314	else {
1315		flags = 0;
1316		oid_number &= ~KERN_PROC_INC_THREAD;
1317	}
1318#ifdef COMPAT_FREEBSD32
1319	if (req->flags & SCTL_MASK32)
1320		flags |= KERN_PROC_MASK32;
1321#endif
1322	if (oid_number == KERN_PROC_PID) {
1323		if (namelen != 1)
1324			return (EINVAL);
1325		error = sysctl_wire_old_buffer(req, 0);
1326		if (error)
1327			return (error);
1328		sx_slock(&proctree_lock);
1329		error = pget((pid_t)name[0], PGET_CANSEE, &p);
1330		if (error == 0)
1331			error = sysctl_out_proc(p, req, flags, 0);
1332		sx_sunlock(&proctree_lock);
1333		return (error);
1334	}
1335
1336	switch (oid_number) {
1337	case KERN_PROC_ALL:
1338		if (namelen != 0)
1339			return (EINVAL);
1340		break;
1341	case KERN_PROC_PROC:
1342		if (namelen != 0 && namelen != 1)
1343			return (EINVAL);
1344		break;
1345	default:
1346		if (namelen != 1)
1347			return (EINVAL);
1348		break;
1349	}
1350
1351	if (!req->oldptr) {
1352		/* overestimate by 5 procs */
1353		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
1354		if (error)
1355			return (error);
1356	}
1357	error = sysctl_wire_old_buffer(req, 0);
1358	if (error != 0)
1359		return (error);
1360	sx_slock(&proctree_lock);
1361	sx_slock(&allproc_lock);
1362	for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) {
1363		if (!doingzomb)
1364			p = LIST_FIRST(&allproc);
1365		else
1366			p = LIST_FIRST(&zombproc);
1367		for (; p != 0; p = LIST_NEXT(p, p_list)) {
1368			/*
1369			 * Skip embryonic processes.
1370			 */
1371			PROC_LOCK(p);
1372			if (p->p_state == PRS_NEW) {
1373				PROC_UNLOCK(p);
1374				continue;
1375			}
1376			KASSERT(p->p_ucred != NULL,
1377			    ("process credential is NULL for non-NEW proc"));
1378			/*
1379			 * Show a user only appropriate processes.
1380			 */
1381			if (p_cansee(curthread, p)) {
1382				PROC_UNLOCK(p);
1383				continue;
1384			}
1385			/*
1386			 * TODO - make more efficient (see notes below).
1387			 * do by session.
1388			 */
1389			switch (oid_number) {
1390
1391			case KERN_PROC_GID:
1392				if (p->p_ucred->cr_gid != (gid_t)name[0]) {
1393					PROC_UNLOCK(p);
1394					continue;
1395				}
1396				break;
1397
1398			case KERN_PROC_PGRP:
1399				/* could do this by traversing pgrp */
1400				if (p->p_pgrp == NULL ||
1401				    p->p_pgrp->pg_id != (pid_t)name[0]) {
1402					PROC_UNLOCK(p);
1403					continue;
1404				}
1405				break;
1406
1407			case KERN_PROC_RGID:
1408				if (p->p_ucred->cr_rgid != (gid_t)name[0]) {
1409					PROC_UNLOCK(p);
1410					continue;
1411				}
1412				break;
1413
1414			case KERN_PROC_SESSION:
1415				if (p->p_session == NULL ||
1416				    p->p_session->s_sid != (pid_t)name[0]) {
1417					PROC_UNLOCK(p);
1418					continue;
1419				}
1420				break;
1421
1422			case KERN_PROC_TTY:
1423				if ((p->p_flag & P_CONTROLT) == 0 ||
1424				    p->p_session == NULL) {
1425					PROC_UNLOCK(p);
1426					continue;
1427				}
1428				/* XXX proctree_lock */
1429				SESS_LOCK(p->p_session);
1430				if (p->p_session->s_ttyp == NULL ||
1431				    tty_udev(p->p_session->s_ttyp) !=
1432				    (dev_t)name[0]) {
1433					SESS_UNLOCK(p->p_session);
1434					PROC_UNLOCK(p);
1435					continue;
1436				}
1437				SESS_UNLOCK(p->p_session);
1438				break;
1439
1440			case KERN_PROC_UID:
1441				if (p->p_ucred->cr_uid != (uid_t)name[0]) {
1442					PROC_UNLOCK(p);
1443					continue;
1444				}
1445				break;
1446
1447			case KERN_PROC_RUID:
1448				if (p->p_ucred->cr_ruid != (uid_t)name[0]) {
1449					PROC_UNLOCK(p);
1450					continue;
1451				}
1452				break;
1453
1454			case KERN_PROC_PROC:
1455				break;
1456
1457			default:
1458				break;
1459
1460			}
1461
1462			error = sysctl_out_proc(p, req, flags, doingzomb);
1463			if (error) {
1464				sx_sunlock(&allproc_lock);
1465				sx_sunlock(&proctree_lock);
1466				return (error);
1467			}
1468		}
1469	}
1470	sx_sunlock(&allproc_lock);
1471	sx_sunlock(&proctree_lock);
1472	return (0);
1473}
1474
1475struct pargs *
1476pargs_alloc(int len)
1477{
1478	struct pargs *pa;
1479
1480	pa = malloc(sizeof(struct pargs) + len, M_PARGS,
1481		M_WAITOK);
1482	refcount_init(&pa->ar_ref, 1);
1483	pa->ar_length = len;
1484	return (pa);
1485}
1486
1487static void
1488pargs_free(struct pargs *pa)
1489{
1490
1491	free(pa, M_PARGS);
1492}
1493
1494void
1495pargs_hold(struct pargs *pa)
1496{
1497
1498	if (pa == NULL)
1499		return;
1500	refcount_acquire(&pa->ar_ref);
1501}
1502
1503void
1504pargs_drop(struct pargs *pa)
1505{
1506
1507	if (pa == NULL)
1508		return;
1509	if (refcount_release(&pa->ar_ref))
1510		pargs_free(pa);
1511}
1512
1513static int
1514proc_read_mem(struct thread *td, struct proc *p, vm_offset_t offset, void* buf,
1515    size_t len)
1516{
1517	struct iovec iov;
1518	struct uio uio;
1519
1520	iov.iov_base = (caddr_t)buf;
1521	iov.iov_len = len;
1522	uio.uio_iov = &iov;
1523	uio.uio_iovcnt = 1;
1524	uio.uio_offset = offset;
1525	uio.uio_resid = (ssize_t)len;
1526	uio.uio_segflg = UIO_SYSSPACE;
1527	uio.uio_rw = UIO_READ;
1528	uio.uio_td = td;
1529
1530	return (proc_rwmem(p, &uio));
1531}
1532
1533static int
1534proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf,
1535    size_t len)
1536{
1537	size_t i;
1538	int error;
1539
1540	error = proc_read_mem(td, p, (vm_offset_t)sptr, buf, len);
1541	/*
1542	 * Reading the chunk may validly return EFAULT if the string is shorter
1543	 * than the chunk and is aligned at the end of the page, assuming the
1544	 * next page is not mapped.  So if EFAULT is returned do a fallback to
1545	 * one byte read loop.
1546	 */
1547	if (error == EFAULT) {
1548		for (i = 0; i < len; i++, buf++, sptr++) {
1549			error = proc_read_mem(td, p, (vm_offset_t)sptr, buf, 1);
1550			if (error != 0)
1551				return (error);
1552			if (*buf == '\0')
1553				break;
1554		}
1555		error = 0;
1556	}
1557	return (error);
1558}
1559
1560#define PROC_AUXV_MAX	256	/* Safety limit on auxv size. */
1561
1562enum proc_vector_type {
1563	PROC_ARG,
1564	PROC_ENV,
1565	PROC_AUX,
1566};
1567
1568#ifdef COMPAT_FREEBSD32
1569static int
1570get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp,
1571    size_t *vsizep, enum proc_vector_type type)
1572{
1573	struct freebsd32_ps_strings pss;
1574	Elf32_Auxinfo aux;
1575	vm_offset_t vptr, ptr;
1576	uint32_t *proc_vector32;
1577	char **proc_vector;
1578	size_t vsize, size;
1579	int i, error;
1580
1581	error = proc_read_mem(td, p, (vm_offset_t)(p->p_sysent->sv_psstrings),
1582	    &pss, sizeof(pss));
1583	if (error != 0)
1584		return (error);
1585	switch (type) {
1586	case PROC_ARG:
1587		vptr = (vm_offset_t)PTRIN(pss.ps_argvstr);
1588		vsize = pss.ps_nargvstr;
1589		if (vsize > ARG_MAX)
1590			return (ENOEXEC);
1591		size = vsize * sizeof(int32_t);
1592		break;
1593	case PROC_ENV:
1594		vptr = (vm_offset_t)PTRIN(pss.ps_envstr);
1595		vsize = pss.ps_nenvstr;
1596		if (vsize > ARG_MAX)
1597			return (ENOEXEC);
1598		size = vsize * sizeof(int32_t);
1599		break;
1600	case PROC_AUX:
1601		vptr = (vm_offset_t)PTRIN(pss.ps_envstr) +
1602		    (pss.ps_nenvstr + 1) * sizeof(int32_t);
1603		if (vptr % 4 != 0)
1604			return (ENOEXEC);
1605		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
1606			error = proc_read_mem(td, p, ptr, &aux, sizeof(aux));
1607			if (error != 0)
1608				return (error);
1609			if (aux.a_type == AT_NULL)
1610				break;
1611			ptr += sizeof(aux);
1612		}
1613		if (aux.a_type != AT_NULL)
1614			return (ENOEXEC);
1615		vsize = i + 1;
1616		size = vsize * sizeof(aux);
1617		break;
1618	default:
1619		KASSERT(0, ("Wrong proc vector type: %d", type));
1620		return (EINVAL);
1621	}
1622	proc_vector32 = malloc(size, M_TEMP, M_WAITOK);
1623	error = proc_read_mem(td, p, vptr, proc_vector32, size);
1624	if (error != 0)
1625		goto done;
1626	if (type == PROC_AUX) {
1627		*proc_vectorp = (char **)proc_vector32;
1628		*vsizep = vsize;
1629		return (0);
1630	}
1631	proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK);
1632	for (i = 0; i < (int)vsize; i++)
1633		proc_vector[i] = PTRIN(proc_vector32[i]);
1634	*proc_vectorp = proc_vector;
1635	*vsizep = vsize;
1636done:
1637	free(proc_vector32, M_TEMP);
1638	return (error);
1639}
1640#endif
1641
1642static int
1643get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp,
1644    size_t *vsizep, enum proc_vector_type type)
1645{
1646	struct ps_strings pss;
1647	Elf_Auxinfo aux;
1648	vm_offset_t vptr, ptr;
1649	char **proc_vector;
1650	size_t vsize, size;
1651	int error, i;
1652
1653#ifdef COMPAT_FREEBSD32
1654	if (SV_PROC_FLAG(p, SV_ILP32) != 0)
1655		return (get_proc_vector32(td, p, proc_vectorp, vsizep, type));
1656#endif
1657	error = proc_read_mem(td, p, (vm_offset_t)(p->p_sysent->sv_psstrings),
1658	    &pss, sizeof(pss));
1659	if (error != 0)
1660		return (error);
1661	switch (type) {
1662	case PROC_ARG:
1663		vptr = (vm_offset_t)pss.ps_argvstr;
1664		vsize = pss.ps_nargvstr;
1665		if (vsize > ARG_MAX)
1666			return (ENOEXEC);
1667		size = vsize * sizeof(char *);
1668		break;
1669	case PROC_ENV:
1670		vptr = (vm_offset_t)pss.ps_envstr;
1671		vsize = pss.ps_nenvstr;
1672		if (vsize > ARG_MAX)
1673			return (ENOEXEC);
1674		size = vsize * sizeof(char *);
1675		break;
1676	case PROC_AUX:
1677		/*
1678		 * The aux array is just above env array on the stack. Check
1679		 * that the address is naturally aligned.
1680		 */
1681		vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1)
1682		    * sizeof(char *);
1683#if __ELF_WORD_SIZE == 64
1684		if (vptr % sizeof(uint64_t) != 0)
1685#else
1686		if (vptr % sizeof(uint32_t) != 0)
1687#endif
1688			return (ENOEXEC);
1689		/*
1690		 * We count the array size reading the aux vectors from the
1691		 * stack until AT_NULL vector is returned.  So (to keep the code
1692		 * simple) we read the process stack twice: the first time here
1693		 * to find the size and the second time when copying the vectors
1694		 * to the allocated proc_vector.
1695		 */
1696		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
1697			error = proc_read_mem(td, p, ptr, &aux, sizeof(aux));
1698			if (error != 0)
1699				return (error);
1700			if (aux.a_type == AT_NULL)
1701				break;
1702			ptr += sizeof(aux);
1703		}
1704		/*
1705		 * If the PROC_AUXV_MAX entries are iterated over, and we have
1706		 * not reached AT_NULL, it is most likely we are reading wrong
1707		 * data: either the process doesn't have auxv array or data has
1708		 * been modified. Return the error in this case.
1709		 */
1710		if (aux.a_type != AT_NULL)
1711			return (ENOEXEC);
1712		vsize = i + 1;
1713		size = vsize * sizeof(aux);
1714		break;
1715	default:
1716		KASSERT(0, ("Wrong proc vector type: %d", type));
1717		return (EINVAL); /* In case we are built without INVARIANTS. */
1718	}
1719	proc_vector = malloc(size, M_TEMP, M_WAITOK);
1720	if (proc_vector == NULL)
1721		return (ENOMEM);
1722	error = proc_read_mem(td, p, vptr, proc_vector, size);
1723	if (error != 0) {
1724		free(proc_vector, M_TEMP);
1725		return (error);
1726	}
1727	*proc_vectorp = proc_vector;
1728	*vsizep = vsize;
1729
1730	return (0);
1731}
1732
1733#define GET_PS_STRINGS_CHUNK_SZ	256	/* Chunk size (bytes) for ps_strings operations. */
1734
1735static int
1736get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb,
1737    enum proc_vector_type type)
1738{
1739	size_t done, len, nchr, vsize;
1740	int error, i;
1741	char **proc_vector, *sptr;
1742	char pss_string[GET_PS_STRINGS_CHUNK_SZ];
1743
1744	PROC_ASSERT_HELD(p);
1745
1746	/*
1747	 * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes.
1748	 */
1749	nchr = 2 * (PATH_MAX + ARG_MAX);
1750
1751	error = get_proc_vector(td, p, &proc_vector, &vsize, type);
1752	if (error != 0)
1753		return (error);
1754	for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) {
1755		/*
1756		 * The program may have scribbled into its argv array, e.g. to
1757		 * remove some arguments.  If that has happened, break out
1758		 * before trying to read from NULL.
1759		 */
1760		if (proc_vector[i] == NULL)
1761			break;
1762		for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) {
1763			error = proc_read_string(td, p, sptr, pss_string,
1764			    sizeof(pss_string));
1765			if (error != 0)
1766				goto done;
1767			len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ);
1768			if (done + len >= nchr)
1769				len = nchr - done - 1;
1770			sbuf_bcat(sb, pss_string, len);
1771			if (len != GET_PS_STRINGS_CHUNK_SZ)
1772				break;
1773			done += GET_PS_STRINGS_CHUNK_SZ;
1774		}
1775		sbuf_bcat(sb, "", 1);
1776		done += len + 1;
1777	}
1778done:
1779	free(proc_vector, M_TEMP);
1780	return (error);
1781}
1782
1783int
1784proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb)
1785{
1786
1787	return (get_ps_strings(curthread, p, sb, PROC_ARG));
1788}
1789
1790int
1791proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb)
1792{
1793
1794	return (get_ps_strings(curthread, p, sb, PROC_ENV));
1795}
1796
1797int
1798proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb)
1799{
1800	size_t vsize, size;
1801	char **auxv;
1802	int error;
1803
1804	error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX);
1805	if (error == 0) {
1806#ifdef COMPAT_FREEBSD32
1807		if (SV_PROC_FLAG(p, SV_ILP32) != 0)
1808			size = vsize * sizeof(Elf32_Auxinfo);
1809		else
1810#endif
1811			size = vsize * sizeof(Elf_Auxinfo);
1812		if (sbuf_bcat(sb, auxv, size) != 0)
1813			error = ENOMEM;
1814		free(auxv, M_TEMP);
1815	}
1816	return (error);
1817}
1818
1819/*
1820 * This sysctl allows a process to retrieve the argument list or process
1821 * title for another process without groping around in the address space
1822 * of the other process.  It also allow a process to set its own "process
1823 * title to a string of its own choice.
1824 */
1825static int
1826sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
1827{
1828	int *name = (int *)arg1;
1829	u_int namelen = arg2;
1830	struct pargs *newpa, *pa;
1831	struct proc *p;
1832	struct sbuf sb;
1833	int flags, error = 0, error2;
1834
1835	if (namelen != 1)
1836		return (EINVAL);
1837
1838	flags = PGET_CANSEE;
1839	if (req->newptr != NULL)
1840		flags |= PGET_ISCURRENT;
1841	error = pget((pid_t)name[0], flags, &p);
1842	if (error)
1843		return (error);
1844
1845	pa = p->p_args;
1846	if (pa != NULL) {
1847		pargs_hold(pa);
1848		PROC_UNLOCK(p);
1849		error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
1850		pargs_drop(pa);
1851	} else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) {
1852		_PHOLD(p);
1853		PROC_UNLOCK(p);
1854		sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
1855		sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1856		error = proc_getargv(curthread, p, &sb);
1857		error2 = sbuf_finish(&sb);
1858		PRELE(p);
1859		sbuf_delete(&sb);
1860		if (error == 0 && error2 != 0)
1861			error = error2;
1862	} else {
1863		PROC_UNLOCK(p);
1864	}
1865	if (error != 0 || req->newptr == NULL)
1866		return (error);
1867
1868	if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit)
1869		return (ENOMEM);
1870	newpa = pargs_alloc(req->newlen);
1871	error = SYSCTL_IN(req, newpa->ar_args, req->newlen);
1872	if (error != 0) {
1873		pargs_free(newpa);
1874		return (error);
1875	}
1876	PROC_LOCK(p);
1877	pa = p->p_args;
1878	p->p_args = newpa;
1879	PROC_UNLOCK(p);
1880	pargs_drop(pa);
1881	return (0);
1882}
1883
1884/*
1885 * This sysctl allows a process to retrieve environment of another process.
1886 */
1887static int
1888sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS)
1889{
1890	int *name = (int *)arg1;
1891	u_int namelen = arg2;
1892	struct proc *p;
1893	struct sbuf sb;
1894	int error, error2;
1895
1896	if (namelen != 1)
1897		return (EINVAL);
1898
1899	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
1900	if (error != 0)
1901		return (error);
1902	if ((p->p_flag & P_SYSTEM) != 0) {
1903		PRELE(p);
1904		return (0);
1905	}
1906
1907	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
1908	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1909	error = proc_getenvv(curthread, p, &sb);
1910	error2 = sbuf_finish(&sb);
1911	PRELE(p);
1912	sbuf_delete(&sb);
1913	return (error != 0 ? error : error2);
1914}
1915
1916/*
1917 * This sysctl allows a process to retrieve ELF auxiliary vector of
1918 * another process.
1919 */
1920static int
1921sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS)
1922{
1923	int *name = (int *)arg1;
1924	u_int namelen = arg2;
1925	struct proc *p;
1926	struct sbuf sb;
1927	int error, error2;
1928
1929	if (namelen != 1)
1930		return (EINVAL);
1931
1932	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
1933	if (error != 0)
1934		return (error);
1935	if ((p->p_flag & P_SYSTEM) != 0) {
1936		PRELE(p);
1937		return (0);
1938	}
1939	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
1940	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1941	error = proc_getauxv(curthread, p, &sb);
1942	error2 = sbuf_finish(&sb);
1943	PRELE(p);
1944	sbuf_delete(&sb);
1945	return (error != 0 ? error : error2);
1946}
1947
1948/*
1949 * This sysctl allows a process to retrieve the path of the executable for
1950 * itself or another process.
1951 */
1952static int
1953sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS)
1954{
1955	pid_t *pidp = (pid_t *)arg1;
1956	unsigned int arglen = arg2;
1957	struct proc *p;
1958	struct vnode *vp;
1959	char *retbuf, *freebuf;
1960	int error;
1961
1962	if (arglen != 1)
1963		return (EINVAL);
1964	if (*pidp == -1) {	/* -1 means this process */
1965		p = req->td->td_proc;
1966	} else {
1967		error = pget(*pidp, PGET_CANSEE, &p);
1968		if (error != 0)
1969			return (error);
1970	}
1971
1972	vp = p->p_textvp;
1973	if (vp == NULL) {
1974		if (*pidp != -1)
1975			PROC_UNLOCK(p);
1976		return (0);
1977	}
1978	vref(vp);
1979	if (*pidp != -1)
1980		PROC_UNLOCK(p);
1981	error = vn_fullpath(req->td, vp, &retbuf, &freebuf);
1982	vrele(vp);
1983	if (error)
1984		return (error);
1985	error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1);
1986	free(freebuf, M_TEMP);
1987	return (error);
1988}
1989
1990static int
1991sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS)
1992{
1993	struct proc *p;
1994	char *sv_name;
1995	int *name;
1996	int namelen;
1997	int error;
1998
1999	namelen = arg2;
2000	if (namelen != 1)
2001		return (EINVAL);
2002
2003	name = (int *)arg1;
2004	error = pget((pid_t)name[0], PGET_CANSEE, &p);
2005	if (error != 0)
2006		return (error);
2007	sv_name = p->p_sysent->sv_name;
2008	PROC_UNLOCK(p);
2009	return (sysctl_handle_string(oidp, sv_name, 0, req));
2010}
2011
2012#ifdef KINFO_OVMENTRY_SIZE
2013CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE);
2014#endif
2015
2016#ifdef COMPAT_FREEBSD7
2017static int
2018sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS)
2019{
2020	vm_map_entry_t entry, tmp_entry;
2021	unsigned int last_timestamp;
2022	char *fullpath, *freepath;
2023	struct kinfo_ovmentry *kve;
2024	struct vattr va;
2025	struct ucred *cred;
2026	int error, *name;
2027	struct vnode *vp;
2028	struct proc *p;
2029	vm_map_t map;
2030	struct vmspace *vm;
2031
2032	name = (int *)arg1;
2033	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2034	if (error != 0)
2035		return (error);
2036	vm = vmspace_acquire_ref(p);
2037	if (vm == NULL) {
2038		PRELE(p);
2039		return (ESRCH);
2040	}
2041	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
2042
2043	map = &vm->vm_map;
2044	vm_map_lock_read(map);
2045	for (entry = map->header.next; entry != &map->header;
2046	    entry = entry->next) {
2047		vm_object_t obj, tobj, lobj;
2048		vm_offset_t addr;
2049
2050		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2051			continue;
2052
2053		bzero(kve, sizeof(*kve));
2054		kve->kve_structsize = sizeof(*kve);
2055
2056		kve->kve_private_resident = 0;
2057		obj = entry->object.vm_object;
2058		if (obj != NULL) {
2059			VM_OBJECT_RLOCK(obj);
2060			if (obj->shadow_count == 1)
2061				kve->kve_private_resident =
2062				    obj->resident_page_count;
2063		}
2064		kve->kve_resident = 0;
2065		addr = entry->start;
2066		while (addr < entry->end) {
2067			if (pmap_extract(map->pmap, addr))
2068				kve->kve_resident++;
2069			addr += PAGE_SIZE;
2070		}
2071
2072		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
2073			if (tobj != obj)
2074				VM_OBJECT_RLOCK(tobj);
2075			if (lobj != obj)
2076				VM_OBJECT_RUNLOCK(lobj);
2077			lobj = tobj;
2078		}
2079
2080		kve->kve_start = (void*)entry->start;
2081		kve->kve_end = (void*)entry->end;
2082		kve->kve_offset = (off_t)entry->offset;
2083
2084		if (entry->protection & VM_PROT_READ)
2085			kve->kve_protection |= KVME_PROT_READ;
2086		if (entry->protection & VM_PROT_WRITE)
2087			kve->kve_protection |= KVME_PROT_WRITE;
2088		if (entry->protection & VM_PROT_EXECUTE)
2089			kve->kve_protection |= KVME_PROT_EXEC;
2090
2091		if (entry->eflags & MAP_ENTRY_COW)
2092			kve->kve_flags |= KVME_FLAG_COW;
2093		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
2094			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
2095		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
2096			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
2097
2098		last_timestamp = map->timestamp;
2099		vm_map_unlock_read(map);
2100
2101		kve->kve_fileid = 0;
2102		kve->kve_fsid = 0;
2103		freepath = NULL;
2104		fullpath = "";
2105		if (lobj) {
2106			vp = NULL;
2107			switch (lobj->type) {
2108			case OBJT_DEFAULT:
2109				kve->kve_type = KVME_TYPE_DEFAULT;
2110				break;
2111			case OBJT_VNODE:
2112				kve->kve_type = KVME_TYPE_VNODE;
2113				vp = lobj->handle;
2114				vref(vp);
2115				break;
2116			case OBJT_SWAP:
2117				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
2118					kve->kve_type = KVME_TYPE_VNODE;
2119					if ((lobj->flags & OBJ_TMPFS) != 0) {
2120						vp = lobj->un_pager.swp.swp_tmpfs;
2121						vref(vp);
2122					}
2123				} else {
2124					kve->kve_type = KVME_TYPE_SWAP;
2125				}
2126				break;
2127			case OBJT_DEVICE:
2128				kve->kve_type = KVME_TYPE_DEVICE;
2129				break;
2130			case OBJT_PHYS:
2131				kve->kve_type = KVME_TYPE_PHYS;
2132				break;
2133			case OBJT_DEAD:
2134				kve->kve_type = KVME_TYPE_DEAD;
2135				break;
2136			case OBJT_SG:
2137				kve->kve_type = KVME_TYPE_SG;
2138				break;
2139			default:
2140				kve->kve_type = KVME_TYPE_UNKNOWN;
2141				break;
2142			}
2143			if (lobj != obj)
2144				VM_OBJECT_RUNLOCK(lobj);
2145
2146			kve->kve_ref_count = obj->ref_count;
2147			kve->kve_shadow_count = obj->shadow_count;
2148			VM_OBJECT_RUNLOCK(obj);
2149			if (vp != NULL) {
2150				vn_fullpath(curthread, vp, &fullpath,
2151				    &freepath);
2152				cred = curthread->td_ucred;
2153				vn_lock(vp, LK_SHARED | LK_RETRY);
2154				if (VOP_GETATTR(vp, &va, cred) == 0) {
2155					kve->kve_fileid = va.va_fileid;
2156					kve->kve_fsid = va.va_fsid;
2157				}
2158				vput(vp);
2159			}
2160		} else {
2161			kve->kve_type = KVME_TYPE_NONE;
2162			kve->kve_ref_count = 0;
2163			kve->kve_shadow_count = 0;
2164		}
2165
2166		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
2167		if (freepath != NULL)
2168			free(freepath, M_TEMP);
2169
2170		error = SYSCTL_OUT(req, kve, sizeof(*kve));
2171		vm_map_lock_read(map);
2172		if (error)
2173			break;
2174		if (last_timestamp != map->timestamp) {
2175			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
2176			entry = tmp_entry;
2177		}
2178	}
2179	vm_map_unlock_read(map);
2180	vmspace_free(vm);
2181	PRELE(p);
2182	free(kve, M_TEMP);
2183	return (error);
2184}
2185#endif	/* COMPAT_FREEBSD7 */
2186
2187#ifdef KINFO_VMENTRY_SIZE
2188CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
2189#endif
2190
2191static void
2192kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
2193    struct kinfo_vmentry *kve)
2194{
2195	vm_object_t obj, tobj;
2196	vm_page_t m, m_adv;
2197	vm_offset_t addr;
2198	vm_paddr_t locked_pa;
2199	vm_pindex_t pi, pi_adv, pindex;
2200
2201	locked_pa = 0;
2202	obj = entry->object.vm_object;
2203	addr = entry->start;
2204	m_adv = NULL;
2205	pi = OFF_TO_IDX(entry->offset);
2206	for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) {
2207		if (m_adv != NULL) {
2208			m = m_adv;
2209		} else {
2210			pi_adv = OFF_TO_IDX(entry->end - addr);
2211			pindex = pi;
2212			for (tobj = obj;; tobj = tobj->backing_object) {
2213				m = vm_page_find_least(tobj, pindex);
2214				if (m != NULL) {
2215					if (m->pindex == pindex)
2216						break;
2217					if (pi_adv > m->pindex - pindex) {
2218						pi_adv = m->pindex - pindex;
2219						m_adv = m;
2220					}
2221				}
2222				if (tobj->backing_object == NULL)
2223					goto next;
2224				pindex += OFF_TO_IDX(tobj->
2225				    backing_object_offset);
2226			}
2227		}
2228		m_adv = NULL;
2229		if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
2230		    (addr & (pagesizes[1] - 1)) == 0 &&
2231		    (pmap_mincore(map->pmap, addr, &locked_pa) &
2232		    MINCORE_SUPER) != 0) {
2233			kve->kve_flags |= KVME_FLAG_SUPER;
2234			pi_adv = OFF_TO_IDX(pagesizes[1]);
2235		} else {
2236			/*
2237			 * We do not test the found page on validity.
2238			 * Either the page is busy and being paged in,
2239			 * or it was invalidated.  The first case
2240			 * should be counted as resident, the second
2241			 * is not so clear; we do account both.
2242			 */
2243			pi_adv = 1;
2244		}
2245		kve->kve_resident += pi_adv;
2246next:;
2247	}
2248	PA_UNLOCK_COND(locked_pa);
2249}
2250
2251/*
2252 * Must be called with the process locked and will return unlocked.
2253 */
2254int
2255kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
2256{
2257	vm_map_entry_t entry, tmp_entry;
2258	struct vattr va;
2259	vm_map_t map;
2260	vm_object_t obj, tobj, lobj;
2261	char *fullpath, *freepath;
2262	struct kinfo_vmentry *kve;
2263	struct ucred *cred;
2264	struct vnode *vp;
2265	struct vmspace *vm;
2266	vm_offset_t addr;
2267	unsigned int last_timestamp;
2268	int error;
2269
2270	PROC_LOCK_ASSERT(p, MA_OWNED);
2271
2272	_PHOLD(p);
2273	PROC_UNLOCK(p);
2274	vm = vmspace_acquire_ref(p);
2275	if (vm == NULL) {
2276		PRELE(p);
2277		return (ESRCH);
2278	}
2279	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
2280
2281	error = 0;
2282	map = &vm->vm_map;
2283	vm_map_lock_read(map);
2284	for (entry = map->header.next; entry != &map->header;
2285	    entry = entry->next) {
2286		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2287			continue;
2288
2289		addr = entry->end;
2290		bzero(kve, sizeof(*kve));
2291		obj = entry->object.vm_object;
2292		if (obj != NULL) {
2293			for (tobj = obj; tobj != NULL;
2294			    tobj = tobj->backing_object) {
2295				VM_OBJECT_RLOCK(tobj);
2296				lobj = tobj;
2297			}
2298			if (obj->backing_object == NULL)
2299				kve->kve_private_resident =
2300				    obj->resident_page_count;
2301			if (!vmmap_skip_res_cnt)
2302				kern_proc_vmmap_resident(map, entry, kve);
2303			for (tobj = obj; tobj != NULL;
2304			    tobj = tobj->backing_object) {
2305				if (tobj != obj && tobj != lobj)
2306					VM_OBJECT_RUNLOCK(tobj);
2307			}
2308		} else {
2309			lobj = NULL;
2310		}
2311
2312		kve->kve_start = entry->start;
2313		kve->kve_end = entry->end;
2314		kve->kve_offset = entry->offset;
2315
2316		if (entry->protection & VM_PROT_READ)
2317			kve->kve_protection |= KVME_PROT_READ;
2318		if (entry->protection & VM_PROT_WRITE)
2319			kve->kve_protection |= KVME_PROT_WRITE;
2320		if (entry->protection & VM_PROT_EXECUTE)
2321			kve->kve_protection |= KVME_PROT_EXEC;
2322
2323		if (entry->eflags & MAP_ENTRY_COW)
2324			kve->kve_flags |= KVME_FLAG_COW;
2325		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
2326			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
2327		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
2328			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
2329		if (entry->eflags & MAP_ENTRY_GROWS_UP)
2330			kve->kve_flags |= KVME_FLAG_GROWS_UP;
2331		if (entry->eflags & MAP_ENTRY_GROWS_DOWN)
2332			kve->kve_flags |= KVME_FLAG_GROWS_DOWN;
2333
2334		last_timestamp = map->timestamp;
2335		vm_map_unlock_read(map);
2336
2337		freepath = NULL;
2338		fullpath = "";
2339		if (lobj != NULL) {
2340			vp = NULL;
2341			switch (lobj->type) {
2342			case OBJT_DEFAULT:
2343				kve->kve_type = KVME_TYPE_DEFAULT;
2344				break;
2345			case OBJT_VNODE:
2346				kve->kve_type = KVME_TYPE_VNODE;
2347				vp = lobj->handle;
2348				vref(vp);
2349				break;
2350			case OBJT_SWAP:
2351				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
2352					kve->kve_type = KVME_TYPE_VNODE;
2353					if ((lobj->flags & OBJ_TMPFS) != 0) {
2354						vp = lobj->un_pager.swp.swp_tmpfs;
2355						vref(vp);
2356					}
2357				} else {
2358					kve->kve_type = KVME_TYPE_SWAP;
2359				}
2360				break;
2361			case OBJT_DEVICE:
2362				kve->kve_type = KVME_TYPE_DEVICE;
2363				break;
2364			case OBJT_PHYS:
2365				kve->kve_type = KVME_TYPE_PHYS;
2366				break;
2367			case OBJT_DEAD:
2368				kve->kve_type = KVME_TYPE_DEAD;
2369				break;
2370			case OBJT_SG:
2371				kve->kve_type = KVME_TYPE_SG;
2372				break;
2373			case OBJT_MGTDEVICE:
2374				kve->kve_type = KVME_TYPE_MGTDEVICE;
2375				break;
2376			default:
2377				kve->kve_type = KVME_TYPE_UNKNOWN;
2378				break;
2379			}
2380			if (lobj != obj)
2381				VM_OBJECT_RUNLOCK(lobj);
2382
2383			kve->kve_ref_count = obj->ref_count;
2384			kve->kve_shadow_count = obj->shadow_count;
2385			VM_OBJECT_RUNLOCK(obj);
2386			if (vp != NULL) {
2387				vn_fullpath(curthread, vp, &fullpath,
2388				    &freepath);
2389				kve->kve_vn_type = vntype_to_kinfo(vp->v_type);
2390				cred = curthread->td_ucred;
2391				vn_lock(vp, LK_SHARED | LK_RETRY);
2392				if (VOP_GETATTR(vp, &va, cred) == 0) {
2393					kve->kve_vn_fileid = va.va_fileid;
2394					kve->kve_vn_fsid = va.va_fsid;
2395					kve->kve_vn_mode =
2396					    MAKEIMODE(va.va_type, va.va_mode);
2397					kve->kve_vn_size = va.va_size;
2398					kve->kve_vn_rdev = va.va_rdev;
2399					kve->kve_status = KF_ATTR_VALID;
2400				}
2401				vput(vp);
2402			}
2403		} else {
2404			kve->kve_type = KVME_TYPE_NONE;
2405			kve->kve_ref_count = 0;
2406			kve->kve_shadow_count = 0;
2407		}
2408
2409		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
2410		if (freepath != NULL)
2411			free(freepath, M_TEMP);
2412
2413		/* Pack record size down */
2414		kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) +
2415		    strlen(kve->kve_path) + 1;
2416		kve->kve_structsize = roundup(kve->kve_structsize,
2417		    sizeof(uint64_t));
2418		if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
2419			error = ENOMEM;
2420		vm_map_lock_read(map);
2421		if (error != 0)
2422			break;
2423		if (last_timestamp != map->timestamp) {
2424			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
2425			entry = tmp_entry;
2426		}
2427	}
2428	vm_map_unlock_read(map);
2429	vmspace_free(vm);
2430	PRELE(p);
2431	free(kve, M_TEMP);
2432	return (error);
2433}
2434
2435static int
2436sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
2437{
2438	struct proc *p;
2439	struct sbuf sb;
2440	int error, error2, *name;
2441
2442	name = (int *)arg1;
2443	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req);
2444	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
2445	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
2446	if (error != 0) {
2447		sbuf_delete(&sb);
2448		return (error);
2449	}
2450	error = kern_proc_vmmap_out(p, &sb);
2451	error2 = sbuf_finish(&sb);
2452	sbuf_delete(&sb);
2453	return (error != 0 ? error : error2);
2454}
2455
2456#if defined(STACK) || defined(DDB)
2457static int
2458sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS)
2459{
2460	struct kinfo_kstack *kkstp;
2461	int error, i, *name, numthreads;
2462	lwpid_t *lwpidarray;
2463	struct thread *td;
2464	struct stack *st;
2465	struct sbuf sb;
2466	struct proc *p;
2467
2468	name = (int *)arg1;
2469	error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p);
2470	if (error != 0)
2471		return (error);
2472
2473	kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK);
2474	st = stack_create();
2475
2476	lwpidarray = NULL;
2477	numthreads = 0;
2478	PROC_LOCK(p);
2479repeat:
2480	if (numthreads < p->p_numthreads) {
2481		if (lwpidarray != NULL) {
2482			free(lwpidarray, M_TEMP);
2483			lwpidarray = NULL;
2484		}
2485		numthreads = p->p_numthreads;
2486		PROC_UNLOCK(p);
2487		lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP,
2488		    M_WAITOK | M_ZERO);
2489		PROC_LOCK(p);
2490		goto repeat;
2491	}
2492	i = 0;
2493
2494	/*
2495	 * XXXRW: During the below loop, execve(2) and countless other sorts
2496	 * of changes could have taken place.  Should we check to see if the
2497	 * vmspace has been replaced, or the like, in order to prevent
2498	 * giving a snapshot that spans, say, execve(2), with some threads
2499	 * before and some after?  Among other things, the credentials could
2500	 * have changed, in which case the right to extract debug info might
2501	 * no longer be assured.
2502	 */
2503	FOREACH_THREAD_IN_PROC(p, td) {
2504		KASSERT(i < numthreads,
2505		    ("sysctl_kern_proc_kstack: numthreads"));
2506		lwpidarray[i] = td->td_tid;
2507		i++;
2508	}
2509	numthreads = i;
2510	for (i = 0; i < numthreads; i++) {
2511		td = thread_find(p, lwpidarray[i]);
2512		if (td == NULL) {
2513			continue;
2514		}
2515		bzero(kkstp, sizeof(*kkstp));
2516		(void)sbuf_new(&sb, kkstp->kkst_trace,
2517		    sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN);
2518		thread_lock(td);
2519		kkstp->kkst_tid = td->td_tid;
2520		if (TD_IS_SWAPPED(td)) {
2521			kkstp->kkst_state = KKST_STATE_SWAPPED;
2522		} else if (TD_IS_RUNNING(td)) {
2523			if (stack_save_td_running(st, td) == 0)
2524				kkstp->kkst_state = KKST_STATE_STACKOK;
2525			else
2526				kkstp->kkst_state = KKST_STATE_RUNNING;
2527		} else {
2528			kkstp->kkst_state = KKST_STATE_STACKOK;
2529			stack_save_td(st, td);
2530		}
2531		thread_unlock(td);
2532		PROC_UNLOCK(p);
2533		stack_sbuf_print(&sb, st);
2534		sbuf_finish(&sb);
2535		sbuf_delete(&sb);
2536		error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp));
2537		PROC_LOCK(p);
2538		if (error)
2539			break;
2540	}
2541	_PRELE(p);
2542	PROC_UNLOCK(p);
2543	if (lwpidarray != NULL)
2544		free(lwpidarray, M_TEMP);
2545	stack_destroy(st);
2546	free(kkstp, M_TEMP);
2547	return (error);
2548}
2549#endif
2550
2551/*
2552 * This sysctl allows a process to retrieve the full list of groups from
2553 * itself or another process.
2554 */
2555static int
2556sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS)
2557{
2558	pid_t *pidp = (pid_t *)arg1;
2559	unsigned int arglen = arg2;
2560	struct proc *p;
2561	struct ucred *cred;
2562	int error;
2563
2564	if (arglen != 1)
2565		return (EINVAL);
2566	if (*pidp == -1) {	/* -1 means this process */
2567		p = req->td->td_proc;
2568		PROC_LOCK(p);
2569	} else {
2570		error = pget(*pidp, PGET_CANSEE, &p);
2571		if (error != 0)
2572			return (error);
2573	}
2574
2575	cred = crhold(p->p_ucred);
2576	PROC_UNLOCK(p);
2577
2578	error = SYSCTL_OUT(req, cred->cr_groups,
2579	    cred->cr_ngroups * sizeof(gid_t));
2580	crfree(cred);
2581	return (error);
2582}
2583
2584/*
2585 * This sysctl allows a process to retrieve or/and set the resource limit for
2586 * another process.
2587 */
2588static int
2589sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS)
2590{
2591	int *name = (int *)arg1;
2592	u_int namelen = arg2;
2593	struct rlimit rlim;
2594	struct proc *p;
2595	u_int which;
2596	int flags, error;
2597
2598	if (namelen != 2)
2599		return (EINVAL);
2600
2601	which = (u_int)name[1];
2602	if (which >= RLIM_NLIMITS)
2603		return (EINVAL);
2604
2605	if (req->newptr != NULL && req->newlen != sizeof(rlim))
2606		return (EINVAL);
2607
2608	flags = PGET_HOLD | PGET_NOTWEXIT;
2609	if (req->newptr != NULL)
2610		flags |= PGET_CANDEBUG;
2611	else
2612		flags |= PGET_CANSEE;
2613	error = pget((pid_t)name[0], flags, &p);
2614	if (error != 0)
2615		return (error);
2616
2617	/*
2618	 * Retrieve limit.
2619	 */
2620	if (req->oldptr != NULL) {
2621		PROC_LOCK(p);
2622		lim_rlimit_proc(p, which, &rlim);
2623		PROC_UNLOCK(p);
2624	}
2625	error = SYSCTL_OUT(req, &rlim, sizeof(rlim));
2626	if (error != 0)
2627		goto errout;
2628
2629	/*
2630	 * Set limit.
2631	 */
2632	if (req->newptr != NULL) {
2633		error = SYSCTL_IN(req, &rlim, sizeof(rlim));
2634		if (error == 0)
2635			error = kern_proc_setrlimit(curthread, p, which, &rlim);
2636	}
2637
2638errout:
2639	PRELE(p);
2640	return (error);
2641}
2642
2643/*
2644 * This sysctl allows a process to retrieve ps_strings structure location of
2645 * another process.
2646 */
2647static int
2648sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS)
2649{
2650	int *name = (int *)arg1;
2651	u_int namelen = arg2;
2652	struct proc *p;
2653	vm_offset_t ps_strings;
2654	int error;
2655#ifdef COMPAT_FREEBSD32
2656	uint32_t ps_strings32;
2657#endif
2658
2659	if (namelen != 1)
2660		return (EINVAL);
2661
2662	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
2663	if (error != 0)
2664		return (error);
2665#ifdef COMPAT_FREEBSD32
2666	if ((req->flags & SCTL_MASK32) != 0) {
2667		/*
2668		 * We return 0 if the 32 bit emulation request is for a 64 bit
2669		 * process.
2670		 */
2671		ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ?
2672		    PTROUT(p->p_sysent->sv_psstrings) : 0;
2673		PROC_UNLOCK(p);
2674		error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32));
2675		return (error);
2676	}
2677#endif
2678	ps_strings = p->p_sysent->sv_psstrings;
2679	PROC_UNLOCK(p);
2680	error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings));
2681	return (error);
2682}
2683
2684/*
2685 * This sysctl allows a process to retrieve umask of another process.
2686 */
2687static int
2688sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS)
2689{
2690	int *name = (int *)arg1;
2691	u_int namelen = arg2;
2692	struct proc *p;
2693	int error;
2694	u_short fd_cmask;
2695
2696	if (namelen != 1)
2697		return (EINVAL);
2698
2699	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2700	if (error != 0)
2701		return (error);
2702
2703	FILEDESC_SLOCK(p->p_fd);
2704	fd_cmask = p->p_fd->fd_cmask;
2705	FILEDESC_SUNLOCK(p->p_fd);
2706	PRELE(p);
2707	error = SYSCTL_OUT(req, &fd_cmask, sizeof(fd_cmask));
2708	return (error);
2709}
2710
2711/*
2712 * This sysctl allows a process to set and retrieve binary osreldate of
2713 * another process.
2714 */
2715static int
2716sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS)
2717{
2718	int *name = (int *)arg1;
2719	u_int namelen = arg2;
2720	struct proc *p;
2721	int flags, error, osrel;
2722
2723	if (namelen != 1)
2724		return (EINVAL);
2725
2726	if (req->newptr != NULL && req->newlen != sizeof(osrel))
2727		return (EINVAL);
2728
2729	flags = PGET_HOLD | PGET_NOTWEXIT;
2730	if (req->newptr != NULL)
2731		flags |= PGET_CANDEBUG;
2732	else
2733		flags |= PGET_CANSEE;
2734	error = pget((pid_t)name[0], flags, &p);
2735	if (error != 0)
2736		return (error);
2737
2738	error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel));
2739	if (error != 0)
2740		goto errout;
2741
2742	if (req->newptr != NULL) {
2743		error = SYSCTL_IN(req, &osrel, sizeof(osrel));
2744		if (error != 0)
2745			goto errout;
2746		if (osrel < 0) {
2747			error = EINVAL;
2748			goto errout;
2749		}
2750		p->p_osrel = osrel;
2751	}
2752errout:
2753	PRELE(p);
2754	return (error);
2755}
2756
2757static int
2758sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS)
2759{
2760	int *name = (int *)arg1;
2761	u_int namelen = arg2;
2762	struct proc *p;
2763	struct kinfo_sigtramp kst;
2764	const struct sysentvec *sv;
2765	int error;
2766#ifdef COMPAT_FREEBSD32
2767	struct kinfo_sigtramp32 kst32;
2768#endif
2769
2770	if (namelen != 1)
2771		return (EINVAL);
2772
2773	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
2774	if (error != 0)
2775		return (error);
2776	sv = p->p_sysent;
2777#ifdef COMPAT_FREEBSD32
2778	if ((req->flags & SCTL_MASK32) != 0) {
2779		bzero(&kst32, sizeof(kst32));
2780		if (SV_PROC_FLAG(p, SV_ILP32)) {
2781			if (sv->sv_sigcode_base != 0) {
2782				kst32.ksigtramp_start = sv->sv_sigcode_base;
2783				kst32.ksigtramp_end = sv->sv_sigcode_base +
2784				    *sv->sv_szsigcode;
2785			} else {
2786				kst32.ksigtramp_start = sv->sv_psstrings -
2787				    *sv->sv_szsigcode;
2788				kst32.ksigtramp_end = sv->sv_psstrings;
2789			}
2790		}
2791		PROC_UNLOCK(p);
2792		error = SYSCTL_OUT(req, &kst32, sizeof(kst32));
2793		return (error);
2794	}
2795#endif
2796	bzero(&kst, sizeof(kst));
2797	if (sv->sv_sigcode_base != 0) {
2798		kst.ksigtramp_start = (char *)sv->sv_sigcode_base;
2799		kst.ksigtramp_end = (char *)sv->sv_sigcode_base +
2800		    *sv->sv_szsigcode;
2801	} else {
2802		kst.ksigtramp_start = (char *)sv->sv_psstrings -
2803		    *sv->sv_szsigcode;
2804		kst.ksigtramp_end = (char *)sv->sv_psstrings;
2805	}
2806	PROC_UNLOCK(p);
2807	error = SYSCTL_OUT(req, &kst, sizeof(kst));
2808	return (error);
2809}
2810
2811SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
2812
2813SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT|
2814	CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc",
2815	"Return entire process table");
2816
2817static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2818	sysctl_kern_proc, "Process table");
2819
2820static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE,
2821	sysctl_kern_proc, "Process table");
2822
2823static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2824	sysctl_kern_proc, "Process table");
2825
2826static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD |
2827	CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2828
2829static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE,
2830	sysctl_kern_proc, "Process table");
2831
2832static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2833	sysctl_kern_proc, "Process table");
2834
2835static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2836	sysctl_kern_proc, "Process table");
2837
2838static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2839	sysctl_kern_proc, "Process table");
2840
2841static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,
2842	sysctl_kern_proc, "Return process table, no threads");
2843
2844static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args,
2845	CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
2846	sysctl_kern_proc_args, "Process argument list");
2847
2848static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE,
2849	sysctl_kern_proc_env, "Process environment");
2850
2851static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD |
2852	CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector");
2853
2854static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD |
2855	CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path");
2856
2857static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD |
2858	CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name,
2859	"Process syscall vector name (ABI type)");
2860
2861static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td,
2862	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2863
2864static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td,
2865	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2866
2867static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td,
2868	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2869
2870static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD),
2871	sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2872
2873static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td,
2874	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2875
2876static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td,
2877	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2878
2879static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td,
2880	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2881
2882static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td,
2883	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2884
2885static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
2886	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc,
2887	"Return process table, no threads");
2888
2889#ifdef COMPAT_FREEBSD7
2890static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD |
2891	CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries");
2892#endif
2893
2894static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD |
2895	CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries");
2896
2897#if defined(STACK) || defined(DDB)
2898static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD |
2899	CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks");
2900#endif
2901
2902static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD |
2903	CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups");
2904
2905static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW |
2906	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit,
2907	"Process resource limits");
2908
2909static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD |
2910	CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings,
2911	"Process ps_strings location");
2912
2913static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD |
2914	CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask");
2915
2916static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
2917	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel,
2918	"Process binary osreldate");
2919
2920static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
2921	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
2922	"Process signal trampoline location");
2923
2924int allproc_gen;
2925
2926void
2927stop_all_proc(void)
2928{
2929	struct proc *cp, *p;
2930	int r, gen;
2931	bool restart, seen_stopped, seen_exiting, stopped_some;
2932
2933	cp = curproc;
2934	/*
2935	 * stop_all_proc() assumes that all process which have
2936	 * usermode must be stopped, except current process, for
2937	 * obvious reasons.  Since other threads in the process
2938	 * establishing global stop could unstop something, disable
2939	 * calls from multithreaded processes as precaution.  The
2940	 * service must not be user-callable anyway.
2941	 */
2942	KASSERT((cp->p_flag & P_HADTHREADS) == 0 ||
2943	    (cp->p_flag & P_KTHREAD) != 0, ("mt stop_all_proc"));
2944
2945allproc_loop:
2946	sx_xlock(&allproc_lock);
2947	gen = allproc_gen;
2948	seen_exiting = seen_stopped = stopped_some = restart = false;
2949	LIST_REMOVE(cp, p_list);
2950	LIST_INSERT_HEAD(&allproc, cp, p_list);
2951	for (;;) {
2952		p = LIST_NEXT(cp, p_list);
2953		if (p == NULL)
2954			break;
2955		LIST_REMOVE(cp, p_list);
2956		LIST_INSERT_AFTER(p, cp, p_list);
2957		PROC_LOCK(p);
2958		if ((p->p_flag & (P_KTHREAD | P_SYSTEM |
2959		    P_TOTAL_STOP)) != 0) {
2960			PROC_UNLOCK(p);
2961			continue;
2962		}
2963		if ((p->p_flag & P_WEXIT) != 0) {
2964			seen_exiting = true;
2965			PROC_UNLOCK(p);
2966			continue;
2967		}
2968		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
2969			/*
2970			 * Stopped processes are tolerated when there
2971			 * are no other processes which might continue
2972			 * them.  P_STOPPED_SINGLE but not
2973			 * P_TOTAL_STOP process still has at least one
2974			 * thread running.
2975			 */
2976			seen_stopped = true;
2977			PROC_UNLOCK(p);
2978			continue;
2979		}
2980		_PHOLD(p);
2981		sx_xunlock(&allproc_lock);
2982		r = thread_single(p, SINGLE_ALLPROC);
2983		if (r != 0)
2984			restart = true;
2985		else
2986			stopped_some = true;
2987		_PRELE(p);
2988		PROC_UNLOCK(p);
2989		sx_xlock(&allproc_lock);
2990	}
2991	/* Catch forked children we did not see in iteration. */
2992	if (gen != allproc_gen)
2993		restart = true;
2994	sx_xunlock(&allproc_lock);
2995	if (restart || stopped_some || seen_exiting || seen_stopped) {
2996		kern_yield(PRI_USER);
2997		goto allproc_loop;
2998	}
2999}
3000
3001void
3002resume_all_proc(void)
3003{
3004	struct proc *cp, *p;
3005
3006	cp = curproc;
3007	sx_xlock(&allproc_lock);
3008	LIST_REMOVE(cp, p_list);
3009	LIST_INSERT_HEAD(&allproc, cp, p_list);
3010	for (;;) {
3011		p = LIST_NEXT(cp, p_list);
3012		if (p == NULL)
3013			break;
3014		LIST_REMOVE(cp, p_list);
3015		LIST_INSERT_AFTER(p, cp, p_list);
3016		PROC_LOCK(p);
3017		if ((p->p_flag & P_TOTAL_STOP) != 0) {
3018			sx_xunlock(&allproc_lock);
3019			_PHOLD(p);
3020			thread_single_end(p, SINGLE_ALLPROC);
3021			_PRELE(p);
3022			PROC_UNLOCK(p);
3023			sx_xlock(&allproc_lock);
3024		} else {
3025			PROC_UNLOCK(p);
3026		}
3027	}
3028	sx_xunlock(&allproc_lock);
3029}
3030
3031#define	TOTAL_STOP_DEBUG	1
3032#ifdef TOTAL_STOP_DEBUG
3033volatile static int ap_resume;
3034#include <sys/mount.h>
3035
3036static int
3037sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS)
3038{
3039	int error, val;
3040
3041	val = 0;
3042	ap_resume = 0;
3043	error = sysctl_handle_int(oidp, &val, 0, req);
3044	if (error != 0 || req->newptr == NULL)
3045		return (error);
3046	if (val != 0) {
3047		stop_all_proc();
3048		syncer_suspend();
3049		while (ap_resume == 0)
3050			;
3051		syncer_resume();
3052		resume_all_proc();
3053	}
3054	return (0);
3055}
3056
3057SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW |
3058    CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0,
3059    sysctl_debug_stop_all_proc, "I",
3060    "");
3061#endif
3062