kern_proc.c revision 284215
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/kern_proc.c 284215 2015-06-10 10:48:12Z mjg $");
34
35#include "opt_compat.h"
36#include "opt_ddb.h"
37#include "opt_ktrace.h"
38#include "opt_kstack_pages.h"
39#include "opt_stack.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/elf.h>
44#include <sys/exec.h>
45#include <sys/kernel.h>
46#include <sys/limits.h>
47#include <sys/lock.h>
48#include <sys/loginclass.h>
49#include <sys/malloc.h>
50#include <sys/mman.h>
51#include <sys/mount.h>
52#include <sys/mutex.h>
53#include <sys/proc.h>
54#include <sys/ptrace.h>
55#include <sys/refcount.h>
56#include <sys/resourcevar.h>
57#include <sys/rwlock.h>
58#include <sys/sbuf.h>
59#include <sys/sysent.h>
60#include <sys/sched.h>
61#include <sys/smp.h>
62#include <sys/stack.h>
63#include <sys/stat.h>
64#include <sys/sysctl.h>
65#include <sys/filedesc.h>
66#include <sys/tty.h>
67#include <sys/signalvar.h>
68#include <sys/sdt.h>
69#include <sys/sx.h>
70#include <sys/user.h>
71#include <sys/jail.h>
72#include <sys/vnode.h>
73#include <sys/eventhandler.h>
74
75#ifdef DDB
76#include <ddb/ddb.h>
77#endif
78
79#include <vm/vm.h>
80#include <vm/vm_param.h>
81#include <vm/vm_extern.h>
82#include <vm/pmap.h>
83#include <vm/vm_map.h>
84#include <vm/vm_object.h>
85#include <vm/vm_page.h>
86#include <vm/uma.h>
87
88#ifdef COMPAT_FREEBSD32
89#include <compat/freebsd32/freebsd32.h>
90#include <compat/freebsd32/freebsd32_util.h>
91#endif
92
93SDT_PROVIDER_DEFINE(proc);
94SDT_PROBE_DEFINE4(proc, kernel, ctor, entry, "struct proc *", "int",
95    "void *", "int");
96SDT_PROBE_DEFINE4(proc, kernel, ctor, return, "struct proc *", "int",
97    "void *", "int");
98SDT_PROBE_DEFINE4(proc, kernel, dtor, entry, "struct proc *", "int",
99    "void *", "struct thread *");
100SDT_PROBE_DEFINE3(proc, kernel, dtor, return, "struct proc *", "int",
101    "void *");
102SDT_PROBE_DEFINE3(proc, kernel, init, entry, "struct proc *", "int",
103    "int");
104SDT_PROBE_DEFINE3(proc, kernel, init, return, "struct proc *", "int",
105    "int");
106
107MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
108MALLOC_DEFINE(M_SESSION, "session", "session header");
109static MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
110MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
111
112static void doenterpgrp(struct proc *, struct pgrp *);
113static void orphanpg(struct pgrp *pg);
114static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp);
115static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp);
116static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp,
117    int preferthread);
118static void pgadjustjobc(struct pgrp *pgrp, int entering);
119static void pgdelete(struct pgrp *);
120static int proc_ctor(void *mem, int size, void *arg, int flags);
121static void proc_dtor(void *mem, int size, void *arg);
122static int proc_init(void *mem, int size, int flags);
123static void proc_fini(void *mem, int size);
124static void pargs_free(struct pargs *pa);
125static struct proc *zpfind_locked(pid_t pid);
126
127/*
128 * Other process lists
129 */
130struct pidhashhead *pidhashtbl;
131u_long pidhash;
132struct pgrphashhead *pgrphashtbl;
133u_long pgrphash;
134struct proclist allproc;
135struct proclist zombproc;
136struct sx allproc_lock;
137struct sx proctree_lock;
138struct mtx ppeers_lock;
139uma_zone_t proc_zone;
140
141int kstack_pages = KSTACK_PAGES;
142SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0,
143    "Kernel stack size in pages");
144static int vmmap_skip_res_cnt = 0;
145SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW,
146    &vmmap_skip_res_cnt, 0,
147    "Skip calculation of the pages resident count in kern.proc.vmmap");
148
149CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
150#ifdef COMPAT_FREEBSD32
151CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE);
152#endif
153
154/*
155 * Initialize global process hashing structures.
156 */
157void
158procinit()
159{
160
161	sx_init(&allproc_lock, "allproc");
162	sx_init(&proctree_lock, "proctree");
163	mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
164	LIST_INIT(&allproc);
165	LIST_INIT(&zombproc);
166	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
167	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
168	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
169	    proc_ctor, proc_dtor, proc_init, proc_fini,
170	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
171	uihashinit();
172}
173
174/*
175 * Prepare a proc for use.
176 */
177static int
178proc_ctor(void *mem, int size, void *arg, int flags)
179{
180	struct proc *p;
181
182	p = (struct proc *)mem;
183	SDT_PROBE(proc, kernel, ctor , entry, p, size, arg, flags, 0);
184	EVENTHANDLER_INVOKE(process_ctor, p);
185	SDT_PROBE(proc, kernel, ctor , return, p, size, arg, flags, 0);
186	return (0);
187}
188
189/*
190 * Reclaim a proc after use.
191 */
192static void
193proc_dtor(void *mem, int size, void *arg)
194{
195	struct proc *p;
196	struct thread *td;
197
198	/* INVARIANTS checks go here */
199	p = (struct proc *)mem;
200	td = FIRST_THREAD_IN_PROC(p);
201	SDT_PROBE(proc, kernel, dtor, entry, p, size, arg, td, 0);
202	if (td != NULL) {
203#ifdef INVARIANTS
204		KASSERT((p->p_numthreads == 1),
205		    ("bad number of threads in exiting process"));
206		KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr"));
207#endif
208		/* Free all OSD associated to this thread. */
209		osd_thread_exit(td);
210	}
211	EVENTHANDLER_INVOKE(process_dtor, p);
212	if (p->p_ksi != NULL)
213		KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue"));
214	SDT_PROBE(proc, kernel, dtor, return, p, size, arg, 0, 0);
215}
216
217/*
218 * Initialize type-stable parts of a proc (when newly created).
219 */
220static int
221proc_init(void *mem, int size, int flags)
222{
223	struct proc *p;
224
225	p = (struct proc *)mem;
226	SDT_PROBE(proc, kernel, init, entry, p, size, flags, 0, 0);
227	p->p_sched = (struct p_sched *)&p[1];
228	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW);
229	mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_NEW);
230	mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW);
231	mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW);
232	mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW);
233	cv_init(&p->p_pwait, "ppwait");
234	cv_init(&p->p_dbgwait, "dbgwait");
235	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
236	EVENTHANDLER_INVOKE(process_init, p);
237	p->p_stats = pstats_alloc();
238	SDT_PROBE(proc, kernel, init, return, p, size, flags, 0, 0);
239	return (0);
240}
241
242/*
243 * UMA should ensure that this function is never called.
244 * Freeing a proc structure would violate type stability.
245 */
246static void
247proc_fini(void *mem, int size)
248{
249#ifdef notnow
250	struct proc *p;
251
252	p = (struct proc *)mem;
253	EVENTHANDLER_INVOKE(process_fini, p);
254	pstats_free(p->p_stats);
255	thread_free(FIRST_THREAD_IN_PROC(p));
256	mtx_destroy(&p->p_mtx);
257	if (p->p_ksi != NULL)
258		ksiginfo_free(p->p_ksi);
259#else
260	panic("proc reclaimed");
261#endif
262}
263
264/*
265 * Is p an inferior of the current process?
266 */
267int
268inferior(struct proc *p)
269{
270
271	sx_assert(&proctree_lock, SX_LOCKED);
272	PROC_LOCK_ASSERT(p, MA_OWNED);
273	for (; p != curproc; p = proc_realparent(p)) {
274		if (p->p_pid == 0)
275			return (0);
276	}
277	return (1);
278}
279
280struct proc *
281pfind_locked(pid_t pid)
282{
283	struct proc *p;
284
285	sx_assert(&allproc_lock, SX_LOCKED);
286	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
287		if (p->p_pid == pid) {
288			PROC_LOCK(p);
289			if (p->p_state == PRS_NEW) {
290				PROC_UNLOCK(p);
291				p = NULL;
292			}
293			break;
294		}
295	}
296	return (p);
297}
298
299/*
300 * Locate a process by number; return only "live" processes -- i.e., neither
301 * zombies nor newly born but incompletely initialized processes.  By not
302 * returning processes in the PRS_NEW state, we allow callers to avoid
303 * testing for that condition to avoid dereferencing p_ucred, et al.
304 */
305struct proc *
306pfind(pid_t pid)
307{
308	struct proc *p;
309
310	sx_slock(&allproc_lock);
311	p = pfind_locked(pid);
312	sx_sunlock(&allproc_lock);
313	return (p);
314}
315
316static struct proc *
317pfind_tid_locked(pid_t tid)
318{
319	struct proc *p;
320	struct thread *td;
321
322	sx_assert(&allproc_lock, SX_LOCKED);
323	FOREACH_PROC_IN_SYSTEM(p) {
324		PROC_LOCK(p);
325		if (p->p_state == PRS_NEW) {
326			PROC_UNLOCK(p);
327			continue;
328		}
329		FOREACH_THREAD_IN_PROC(p, td) {
330			if (td->td_tid == tid)
331				goto found;
332		}
333		PROC_UNLOCK(p);
334	}
335found:
336	return (p);
337}
338
339/*
340 * Locate a process group by number.
341 * The caller must hold proctree_lock.
342 */
343struct pgrp *
344pgfind(pgid)
345	register pid_t pgid;
346{
347	register struct pgrp *pgrp;
348
349	sx_assert(&proctree_lock, SX_LOCKED);
350
351	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
352		if (pgrp->pg_id == pgid) {
353			PGRP_LOCK(pgrp);
354			return (pgrp);
355		}
356	}
357	return (NULL);
358}
359
360/*
361 * Locate process and do additional manipulations, depending on flags.
362 */
363int
364pget(pid_t pid, int flags, struct proc **pp)
365{
366	struct proc *p;
367	int error;
368
369	sx_slock(&allproc_lock);
370	if (pid <= PID_MAX) {
371		p = pfind_locked(pid);
372		if (p == NULL && (flags & PGET_NOTWEXIT) == 0)
373			p = zpfind_locked(pid);
374	} else if ((flags & PGET_NOTID) == 0) {
375		p = pfind_tid_locked(pid);
376	} else {
377		p = NULL;
378	}
379	sx_sunlock(&allproc_lock);
380	if (p == NULL)
381		return (ESRCH);
382	if ((flags & PGET_CANSEE) != 0) {
383		error = p_cansee(curthread, p);
384		if (error != 0)
385			goto errout;
386	}
387	if ((flags & PGET_CANDEBUG) != 0) {
388		error = p_candebug(curthread, p);
389		if (error != 0)
390			goto errout;
391	}
392	if ((flags & PGET_ISCURRENT) != 0 && curproc != p) {
393		error = EPERM;
394		goto errout;
395	}
396	if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) {
397		error = ESRCH;
398		goto errout;
399	}
400	if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) {
401		/*
402		 * XXXRW: Not clear ESRCH is the right error during proc
403		 * execve().
404		 */
405		error = ESRCH;
406		goto errout;
407	}
408	if ((flags & PGET_HOLD) != 0) {
409		_PHOLD(p);
410		PROC_UNLOCK(p);
411	}
412	*pp = p;
413	return (0);
414errout:
415	PROC_UNLOCK(p);
416	return (error);
417}
418
419/*
420 * Create a new process group.
421 * pgid must be equal to the pid of p.
422 * Begin a new session if required.
423 */
424int
425enterpgrp(p, pgid, pgrp, sess)
426	register struct proc *p;
427	pid_t pgid;
428	struct pgrp *pgrp;
429	struct session *sess;
430{
431
432	sx_assert(&proctree_lock, SX_XLOCKED);
433
434	KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL"));
435	KASSERT(p->p_pid == pgid,
436	    ("enterpgrp: new pgrp and pid != pgid"));
437	KASSERT(pgfind(pgid) == NULL,
438	    ("enterpgrp: pgrp with pgid exists"));
439	KASSERT(!SESS_LEADER(p),
440	    ("enterpgrp: session leader attempted setpgrp"));
441
442	mtx_init(&pgrp->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
443
444	if (sess != NULL) {
445		/*
446		 * new session
447		 */
448		mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF);
449		PROC_LOCK(p);
450		p->p_flag &= ~P_CONTROLT;
451		PROC_UNLOCK(p);
452		PGRP_LOCK(pgrp);
453		sess->s_leader = p;
454		sess->s_sid = p->p_pid;
455		refcount_init(&sess->s_count, 1);
456		sess->s_ttyvp = NULL;
457		sess->s_ttydp = NULL;
458		sess->s_ttyp = NULL;
459		bcopy(p->p_session->s_login, sess->s_login,
460			    sizeof(sess->s_login));
461		pgrp->pg_session = sess;
462		KASSERT(p == curproc,
463		    ("enterpgrp: mksession and p != curproc"));
464	} else {
465		pgrp->pg_session = p->p_session;
466		sess_hold(pgrp->pg_session);
467		PGRP_LOCK(pgrp);
468	}
469	pgrp->pg_id = pgid;
470	LIST_INIT(&pgrp->pg_members);
471
472	/*
473	 * As we have an exclusive lock of proctree_lock,
474	 * this should not deadlock.
475	 */
476	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
477	pgrp->pg_jobc = 0;
478	SLIST_INIT(&pgrp->pg_sigiolst);
479	PGRP_UNLOCK(pgrp);
480
481	doenterpgrp(p, pgrp);
482
483	return (0);
484}
485
486/*
487 * Move p to an existing process group
488 */
489int
490enterthispgrp(p, pgrp)
491	register struct proc *p;
492	struct pgrp *pgrp;
493{
494
495	sx_assert(&proctree_lock, SX_XLOCKED);
496	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
497	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
498	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
499	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
500	KASSERT(pgrp->pg_session == p->p_session,
501		("%s: pgrp's session %p, p->p_session %p.\n",
502		__func__,
503		pgrp->pg_session,
504		p->p_session));
505	KASSERT(pgrp != p->p_pgrp,
506		("%s: p belongs to pgrp.", __func__));
507
508	doenterpgrp(p, pgrp);
509
510	return (0);
511}
512
513/*
514 * Move p to a process group
515 */
516static void
517doenterpgrp(p, pgrp)
518	struct proc *p;
519	struct pgrp *pgrp;
520{
521	struct pgrp *savepgrp;
522
523	sx_assert(&proctree_lock, SX_XLOCKED);
524	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
525	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
526	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
527	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
528
529	savepgrp = p->p_pgrp;
530
531	/*
532	 * Adjust eligibility of affected pgrps to participate in job control.
533	 * Increment eligibility counts before decrementing, otherwise we
534	 * could reach 0 spuriously during the first call.
535	 */
536	fixjobc(p, pgrp, 1);
537	fixjobc(p, p->p_pgrp, 0);
538
539	PGRP_LOCK(pgrp);
540	PGRP_LOCK(savepgrp);
541	PROC_LOCK(p);
542	LIST_REMOVE(p, p_pglist);
543	p->p_pgrp = pgrp;
544	PROC_UNLOCK(p);
545	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
546	PGRP_UNLOCK(savepgrp);
547	PGRP_UNLOCK(pgrp);
548	if (LIST_EMPTY(&savepgrp->pg_members))
549		pgdelete(savepgrp);
550}
551
552/*
553 * remove process from process group
554 */
555int
556leavepgrp(p)
557	register struct proc *p;
558{
559	struct pgrp *savepgrp;
560
561	sx_assert(&proctree_lock, SX_XLOCKED);
562	savepgrp = p->p_pgrp;
563	PGRP_LOCK(savepgrp);
564	PROC_LOCK(p);
565	LIST_REMOVE(p, p_pglist);
566	p->p_pgrp = NULL;
567	PROC_UNLOCK(p);
568	PGRP_UNLOCK(savepgrp);
569	if (LIST_EMPTY(&savepgrp->pg_members))
570		pgdelete(savepgrp);
571	return (0);
572}
573
574/*
575 * delete a process group
576 */
577static void
578pgdelete(pgrp)
579	register struct pgrp *pgrp;
580{
581	struct session *savesess;
582	struct tty *tp;
583
584	sx_assert(&proctree_lock, SX_XLOCKED);
585	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
586	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
587
588	/*
589	 * Reset any sigio structures pointing to us as a result of
590	 * F_SETOWN with our pgid.
591	 */
592	funsetownlst(&pgrp->pg_sigiolst);
593
594	PGRP_LOCK(pgrp);
595	tp = pgrp->pg_session->s_ttyp;
596	LIST_REMOVE(pgrp, pg_hash);
597	savesess = pgrp->pg_session;
598	PGRP_UNLOCK(pgrp);
599
600	/* Remove the reference to the pgrp before deallocating it. */
601	if (tp != NULL) {
602		tty_lock(tp);
603		tty_rel_pgrp(tp, pgrp);
604	}
605
606	mtx_destroy(&pgrp->pg_mtx);
607	free(pgrp, M_PGRP);
608	sess_release(savesess);
609}
610
611static void
612pgadjustjobc(pgrp, entering)
613	struct pgrp *pgrp;
614	int entering;
615{
616
617	PGRP_LOCK(pgrp);
618	if (entering)
619		pgrp->pg_jobc++;
620	else {
621		--pgrp->pg_jobc;
622		if (pgrp->pg_jobc == 0)
623			orphanpg(pgrp);
624	}
625	PGRP_UNLOCK(pgrp);
626}
627
628/*
629 * Adjust pgrp jobc counters when specified process changes process group.
630 * We count the number of processes in each process group that "qualify"
631 * the group for terminal job control (those with a parent in a different
632 * process group of the same session).  If that count reaches zero, the
633 * process group becomes orphaned.  Check both the specified process'
634 * process group and that of its children.
635 * entering == 0 => p is leaving specified group.
636 * entering == 1 => p is entering specified group.
637 */
638void
639fixjobc(p, pgrp, entering)
640	register struct proc *p;
641	register struct pgrp *pgrp;
642	int entering;
643{
644	register struct pgrp *hispgrp;
645	register struct session *mysession;
646
647	sx_assert(&proctree_lock, SX_LOCKED);
648	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
649	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
650	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
651
652	/*
653	 * Check p's parent to see whether p qualifies its own process
654	 * group; if so, adjust count for p's process group.
655	 */
656	mysession = pgrp->pg_session;
657	if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
658	    hispgrp->pg_session == mysession)
659		pgadjustjobc(pgrp, entering);
660
661	/*
662	 * Check this process' children to see whether they qualify
663	 * their process groups; if so, adjust counts for children's
664	 * process groups.
665	 */
666	LIST_FOREACH(p, &p->p_children, p_sibling) {
667		hispgrp = p->p_pgrp;
668		if (hispgrp == pgrp ||
669		    hispgrp->pg_session != mysession)
670			continue;
671		PROC_LOCK(p);
672		if (p->p_state == PRS_ZOMBIE) {
673			PROC_UNLOCK(p);
674			continue;
675		}
676		PROC_UNLOCK(p);
677		pgadjustjobc(hispgrp, entering);
678	}
679}
680
681/*
682 * A process group has become orphaned;
683 * if there are any stopped processes in the group,
684 * hang-up all process in that group.
685 */
686static void
687orphanpg(pg)
688	struct pgrp *pg;
689{
690	register struct proc *p;
691
692	PGRP_LOCK_ASSERT(pg, MA_OWNED);
693
694	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
695		PROC_LOCK(p);
696		if (P_SHOULDSTOP(p)) {
697			PROC_UNLOCK(p);
698			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
699				PROC_LOCK(p);
700				kern_psignal(p, SIGHUP);
701				kern_psignal(p, SIGCONT);
702				PROC_UNLOCK(p);
703			}
704			return;
705		}
706		PROC_UNLOCK(p);
707	}
708}
709
710void
711sess_hold(struct session *s)
712{
713
714	refcount_acquire(&s->s_count);
715}
716
717void
718sess_release(struct session *s)
719{
720
721	if (refcount_release(&s->s_count)) {
722		if (s->s_ttyp != NULL) {
723			tty_lock(s->s_ttyp);
724			tty_rel_sess(s->s_ttyp, s);
725		}
726		mtx_destroy(&s->s_mtx);
727		free(s, M_SESSION);
728	}
729}
730
731#ifdef DDB
732
733DB_SHOW_COMMAND(pgrpdump, pgrpdump)
734{
735	register struct pgrp *pgrp;
736	register struct proc *p;
737	register int i;
738
739	for (i = 0; i <= pgrphash; i++) {
740		if (!LIST_EMPTY(&pgrphashtbl[i])) {
741			printf("\tindx %d\n", i);
742			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
743				printf(
744			"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
745				    (void *)pgrp, (long)pgrp->pg_id,
746				    (void *)pgrp->pg_session,
747				    pgrp->pg_session->s_count,
748				    (void *)LIST_FIRST(&pgrp->pg_members));
749				LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
750					printf("\t\tpid %ld addr %p pgrp %p\n",
751					    (long)p->p_pid, (void *)p,
752					    (void *)p->p_pgrp);
753				}
754			}
755		}
756	}
757}
758#endif /* DDB */
759
760/*
761 * Calculate the kinfo_proc members which contain process-wide
762 * informations.
763 * Must be called with the target process locked.
764 */
765static void
766fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp)
767{
768	struct thread *td;
769
770	PROC_LOCK_ASSERT(p, MA_OWNED);
771
772	kp->ki_estcpu = 0;
773	kp->ki_pctcpu = 0;
774	FOREACH_THREAD_IN_PROC(p, td) {
775		thread_lock(td);
776		kp->ki_pctcpu += sched_pctcpu(td);
777		kp->ki_estcpu += td->td_estcpu;
778		thread_unlock(td);
779	}
780}
781
782/*
783 * Clear kinfo_proc and fill in any information that is common
784 * to all threads in the process.
785 * Must be called with the target process locked.
786 */
787static void
788fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
789{
790	struct thread *td0;
791	struct tty *tp;
792	struct session *sp;
793	struct ucred *cred;
794	struct sigacts *ps;
795
796	/* For proc_realparent. */
797	sx_assert(&proctree_lock, SX_LOCKED);
798	PROC_LOCK_ASSERT(p, MA_OWNED);
799	bzero(kp, sizeof(*kp));
800
801	kp->ki_structsize = sizeof(*kp);
802	kp->ki_paddr = p;
803	kp->ki_addr =/* p->p_addr; */0; /* XXX */
804	kp->ki_args = p->p_args;
805	kp->ki_textvp = p->p_textvp;
806#ifdef KTRACE
807	kp->ki_tracep = p->p_tracevp;
808	kp->ki_traceflag = p->p_traceflag;
809#endif
810	kp->ki_fd = p->p_fd;
811	kp->ki_vmspace = p->p_vmspace;
812	kp->ki_flag = p->p_flag;
813	kp->ki_flag2 = p->p_flag2;
814	cred = p->p_ucred;
815	if (cred) {
816		kp->ki_uid = cred->cr_uid;
817		kp->ki_ruid = cred->cr_ruid;
818		kp->ki_svuid = cred->cr_svuid;
819		kp->ki_cr_flags = 0;
820		if (cred->cr_flags & CRED_FLAG_CAPMODE)
821			kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE;
822		/* XXX bde doesn't like KI_NGROUPS */
823		if (cred->cr_ngroups > KI_NGROUPS) {
824			kp->ki_ngroups = KI_NGROUPS;
825			kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
826		} else
827			kp->ki_ngroups = cred->cr_ngroups;
828		bcopy(cred->cr_groups, kp->ki_groups,
829		    kp->ki_ngroups * sizeof(gid_t));
830		kp->ki_rgid = cred->cr_rgid;
831		kp->ki_svgid = cred->cr_svgid;
832		/* If jailed(cred), emulate the old P_JAILED flag. */
833		if (jailed(cred)) {
834			kp->ki_flag |= P_JAILED;
835			/* If inside the jail, use 0 as a jail ID. */
836			if (cred->cr_prison != curthread->td_ucred->cr_prison)
837				kp->ki_jid = cred->cr_prison->pr_id;
838		}
839		strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name,
840		    sizeof(kp->ki_loginclass));
841	}
842	ps = p->p_sigacts;
843	if (ps) {
844		mtx_lock(&ps->ps_mtx);
845		kp->ki_sigignore = ps->ps_sigignore;
846		kp->ki_sigcatch = ps->ps_sigcatch;
847		mtx_unlock(&ps->ps_mtx);
848	}
849	if (p->p_state != PRS_NEW &&
850	    p->p_state != PRS_ZOMBIE &&
851	    p->p_vmspace != NULL) {
852		struct vmspace *vm = p->p_vmspace;
853
854		kp->ki_size = vm->vm_map.size;
855		kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/
856		FOREACH_THREAD_IN_PROC(p, td0) {
857			if (!TD_IS_SWAPPED(td0))
858				kp->ki_rssize += td0->td_kstack_pages;
859		}
860		kp->ki_swrss = vm->vm_swrss;
861		kp->ki_tsize = vm->vm_tsize;
862		kp->ki_dsize = vm->vm_dsize;
863		kp->ki_ssize = vm->vm_ssize;
864	} else if (p->p_state == PRS_ZOMBIE)
865		kp->ki_stat = SZOMB;
866	if (kp->ki_flag & P_INMEM)
867		kp->ki_sflag = PS_INMEM;
868	else
869		kp->ki_sflag = 0;
870	/* Calculate legacy swtime as seconds since 'swtick'. */
871	kp->ki_swtime = (ticks - p->p_swtick) / hz;
872	kp->ki_pid = p->p_pid;
873	kp->ki_nice = p->p_nice;
874	kp->ki_fibnum = p->p_fibnum;
875	kp->ki_start = p->p_stats->p_start;
876	timevaladd(&kp->ki_start, &boottime);
877	PROC_STATLOCK(p);
878	rufetch(p, &kp->ki_rusage);
879	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
880	calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
881	PROC_STATUNLOCK(p);
882	calccru(p, &kp->ki_childutime, &kp->ki_childstime);
883	/* Some callers want child times in a single value. */
884	kp->ki_childtime = kp->ki_childstime;
885	timevaladd(&kp->ki_childtime, &kp->ki_childutime);
886
887	FOREACH_THREAD_IN_PROC(p, td0)
888		kp->ki_cow += td0->td_cow;
889
890	tp = NULL;
891	if (p->p_pgrp) {
892		kp->ki_pgid = p->p_pgrp->pg_id;
893		kp->ki_jobc = p->p_pgrp->pg_jobc;
894		sp = p->p_pgrp->pg_session;
895
896		if (sp != NULL) {
897			kp->ki_sid = sp->s_sid;
898			SESS_LOCK(sp);
899			strlcpy(kp->ki_login, sp->s_login,
900			    sizeof(kp->ki_login));
901			if (sp->s_ttyvp)
902				kp->ki_kiflag |= KI_CTTY;
903			if (SESS_LEADER(p))
904				kp->ki_kiflag |= KI_SLEADER;
905			/* XXX proctree_lock */
906			tp = sp->s_ttyp;
907			SESS_UNLOCK(sp);
908		}
909	}
910	if ((p->p_flag & P_CONTROLT) && tp != NULL) {
911		kp->ki_tdev = tty_udev(tp);
912		kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
913		if (tp->t_session)
914			kp->ki_tsid = tp->t_session->s_sid;
915	} else
916		kp->ki_tdev = NODEV;
917	if (p->p_comm[0] != '\0')
918		strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm));
919	if (p->p_sysent && p->p_sysent->sv_name != NULL &&
920	    p->p_sysent->sv_name[0] != '\0')
921		strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul));
922	kp->ki_siglist = p->p_siglist;
923	kp->ki_xstat = p->p_xstat;
924	kp->ki_acflag = p->p_acflag;
925	kp->ki_lock = p->p_lock;
926	if (p->p_pptr) {
927		kp->ki_ppid = proc_realparent(p)->p_pid;
928		if (p->p_flag & P_TRACED)
929			kp->ki_tracer = p->p_pptr->p_pid;
930	}
931}
932
933/*
934 * Fill in information that is thread specific.  Must be called with
935 * target process locked.  If 'preferthread' is set, overwrite certain
936 * process-related fields that are maintained for both threads and
937 * processes.
938 */
939static void
940fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread)
941{
942	struct proc *p;
943
944	p = td->td_proc;
945	kp->ki_tdaddr = td;
946	PROC_LOCK_ASSERT(p, MA_OWNED);
947
948	if (preferthread)
949		PROC_STATLOCK(p);
950	thread_lock(td);
951	if (td->td_wmesg != NULL)
952		strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
953	else
954		bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg));
955	strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname));
956	if (TD_ON_LOCK(td)) {
957		kp->ki_kiflag |= KI_LOCKBLOCK;
958		strlcpy(kp->ki_lockname, td->td_lockname,
959		    sizeof(kp->ki_lockname));
960	} else {
961		kp->ki_kiflag &= ~KI_LOCKBLOCK;
962		bzero(kp->ki_lockname, sizeof(kp->ki_lockname));
963	}
964
965	if (p->p_state == PRS_NORMAL) { /* approximate. */
966		if (TD_ON_RUNQ(td) ||
967		    TD_CAN_RUN(td) ||
968		    TD_IS_RUNNING(td)) {
969			kp->ki_stat = SRUN;
970		} else if (P_SHOULDSTOP(p)) {
971			kp->ki_stat = SSTOP;
972		} else if (TD_IS_SLEEPING(td)) {
973			kp->ki_stat = SSLEEP;
974		} else if (TD_ON_LOCK(td)) {
975			kp->ki_stat = SLOCK;
976		} else {
977			kp->ki_stat = SWAIT;
978		}
979	} else if (p->p_state == PRS_ZOMBIE) {
980		kp->ki_stat = SZOMB;
981	} else {
982		kp->ki_stat = SIDL;
983	}
984
985	/* Things in the thread */
986	kp->ki_wchan = td->td_wchan;
987	kp->ki_pri.pri_level = td->td_priority;
988	kp->ki_pri.pri_native = td->td_base_pri;
989
990	/*
991	 * Note: legacy fields; clamp at the old NOCPU value and/or
992	 * the maximum u_char CPU value.
993	 */
994	if (td->td_lastcpu == NOCPU)
995		kp->ki_lastcpu_old = NOCPU_OLD;
996	else if (td->td_lastcpu > MAXCPU_OLD)
997		kp->ki_lastcpu_old = MAXCPU_OLD;
998	else
999		kp->ki_lastcpu_old = td->td_lastcpu;
1000
1001	if (td->td_oncpu == NOCPU)
1002		kp->ki_oncpu_old = NOCPU_OLD;
1003	else if (td->td_oncpu > MAXCPU_OLD)
1004		kp->ki_oncpu_old = MAXCPU_OLD;
1005	else
1006		kp->ki_oncpu_old = td->td_oncpu;
1007
1008	kp->ki_lastcpu = td->td_lastcpu;
1009	kp->ki_oncpu = td->td_oncpu;
1010	kp->ki_tdflags = td->td_flags;
1011	kp->ki_tid = td->td_tid;
1012	kp->ki_numthreads = p->p_numthreads;
1013	kp->ki_pcb = td->td_pcb;
1014	kp->ki_kstack = (void *)td->td_kstack;
1015	kp->ki_slptime = (ticks - td->td_slptick) / hz;
1016	kp->ki_pri.pri_class = td->td_pri_class;
1017	kp->ki_pri.pri_user = td->td_user_pri;
1018
1019	if (preferthread) {
1020		rufetchtd(td, &kp->ki_rusage);
1021		kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime);
1022		kp->ki_pctcpu = sched_pctcpu(td);
1023		kp->ki_estcpu = td->td_estcpu;
1024		kp->ki_cow = td->td_cow;
1025	}
1026
1027	/* We can't get this anymore but ps etc never used it anyway. */
1028	kp->ki_rqindex = 0;
1029
1030	if (preferthread)
1031		kp->ki_siglist = td->td_siglist;
1032	kp->ki_sigmask = td->td_sigmask;
1033	thread_unlock(td);
1034	if (preferthread)
1035		PROC_STATUNLOCK(p);
1036}
1037
1038/*
1039 * Fill in a kinfo_proc structure for the specified process.
1040 * Must be called with the target process locked.
1041 */
1042void
1043fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
1044{
1045
1046	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
1047
1048	fill_kinfo_proc_only(p, kp);
1049	fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0);
1050	fill_kinfo_aggregate(p, kp);
1051}
1052
1053struct pstats *
1054pstats_alloc(void)
1055{
1056
1057	return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK));
1058}
1059
1060/*
1061 * Copy parts of p_stats; zero the rest of p_stats (statistics).
1062 */
1063void
1064pstats_fork(struct pstats *src, struct pstats *dst)
1065{
1066
1067	bzero(&dst->pstat_startzero,
1068	    __rangeof(struct pstats, pstat_startzero, pstat_endzero));
1069	bcopy(&src->pstat_startcopy, &dst->pstat_startcopy,
1070	    __rangeof(struct pstats, pstat_startcopy, pstat_endcopy));
1071}
1072
1073void
1074pstats_free(struct pstats *ps)
1075{
1076
1077	free(ps, M_SUBPROC);
1078}
1079
1080static struct proc *
1081zpfind_locked(pid_t pid)
1082{
1083	struct proc *p;
1084
1085	sx_assert(&allproc_lock, SX_LOCKED);
1086	LIST_FOREACH(p, &zombproc, p_list) {
1087		if (p->p_pid == pid) {
1088			PROC_LOCK(p);
1089			break;
1090		}
1091	}
1092	return (p);
1093}
1094
1095/*
1096 * Locate a zombie process by number
1097 */
1098struct proc *
1099zpfind(pid_t pid)
1100{
1101	struct proc *p;
1102
1103	sx_slock(&allproc_lock);
1104	p = zpfind_locked(pid);
1105	sx_sunlock(&allproc_lock);
1106	return (p);
1107}
1108
1109#ifdef COMPAT_FREEBSD32
1110
1111/*
1112 * This function is typically used to copy out the kernel address, so
1113 * it can be replaced by assignment of zero.
1114 */
1115static inline uint32_t
1116ptr32_trim(void *ptr)
1117{
1118	uintptr_t uptr;
1119
1120	uptr = (uintptr_t)ptr;
1121	return ((uptr > UINT_MAX) ? 0 : uptr);
1122}
1123
1124#define PTRTRIM_CP(src,dst,fld) \
1125	do { (dst).fld = ptr32_trim((src).fld); } while (0)
1126
1127static void
1128freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32)
1129{
1130	int i;
1131
1132	bzero(ki32, sizeof(struct kinfo_proc32));
1133	ki32->ki_structsize = sizeof(struct kinfo_proc32);
1134	CP(*ki, *ki32, ki_layout);
1135	PTRTRIM_CP(*ki, *ki32, ki_args);
1136	PTRTRIM_CP(*ki, *ki32, ki_paddr);
1137	PTRTRIM_CP(*ki, *ki32, ki_addr);
1138	PTRTRIM_CP(*ki, *ki32, ki_tracep);
1139	PTRTRIM_CP(*ki, *ki32, ki_textvp);
1140	PTRTRIM_CP(*ki, *ki32, ki_fd);
1141	PTRTRIM_CP(*ki, *ki32, ki_vmspace);
1142	PTRTRIM_CP(*ki, *ki32, ki_wchan);
1143	CP(*ki, *ki32, ki_pid);
1144	CP(*ki, *ki32, ki_ppid);
1145	CP(*ki, *ki32, ki_pgid);
1146	CP(*ki, *ki32, ki_tpgid);
1147	CP(*ki, *ki32, ki_sid);
1148	CP(*ki, *ki32, ki_tsid);
1149	CP(*ki, *ki32, ki_jobc);
1150	CP(*ki, *ki32, ki_tdev);
1151	CP(*ki, *ki32, ki_siglist);
1152	CP(*ki, *ki32, ki_sigmask);
1153	CP(*ki, *ki32, ki_sigignore);
1154	CP(*ki, *ki32, ki_sigcatch);
1155	CP(*ki, *ki32, ki_uid);
1156	CP(*ki, *ki32, ki_ruid);
1157	CP(*ki, *ki32, ki_svuid);
1158	CP(*ki, *ki32, ki_rgid);
1159	CP(*ki, *ki32, ki_svgid);
1160	CP(*ki, *ki32, ki_ngroups);
1161	for (i = 0; i < KI_NGROUPS; i++)
1162		CP(*ki, *ki32, ki_groups[i]);
1163	CP(*ki, *ki32, ki_size);
1164	CP(*ki, *ki32, ki_rssize);
1165	CP(*ki, *ki32, ki_swrss);
1166	CP(*ki, *ki32, ki_tsize);
1167	CP(*ki, *ki32, ki_dsize);
1168	CP(*ki, *ki32, ki_ssize);
1169	CP(*ki, *ki32, ki_xstat);
1170	CP(*ki, *ki32, ki_acflag);
1171	CP(*ki, *ki32, ki_pctcpu);
1172	CP(*ki, *ki32, ki_estcpu);
1173	CP(*ki, *ki32, ki_slptime);
1174	CP(*ki, *ki32, ki_swtime);
1175	CP(*ki, *ki32, ki_cow);
1176	CP(*ki, *ki32, ki_runtime);
1177	TV_CP(*ki, *ki32, ki_start);
1178	TV_CP(*ki, *ki32, ki_childtime);
1179	CP(*ki, *ki32, ki_flag);
1180	CP(*ki, *ki32, ki_kiflag);
1181	CP(*ki, *ki32, ki_traceflag);
1182	CP(*ki, *ki32, ki_stat);
1183	CP(*ki, *ki32, ki_nice);
1184	CP(*ki, *ki32, ki_lock);
1185	CP(*ki, *ki32, ki_rqindex);
1186	CP(*ki, *ki32, ki_oncpu);
1187	CP(*ki, *ki32, ki_lastcpu);
1188
1189	/* XXX TODO: wrap cpu value as appropriate */
1190	CP(*ki, *ki32, ki_oncpu_old);
1191	CP(*ki, *ki32, ki_lastcpu_old);
1192
1193	bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1);
1194	bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1);
1195	bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1);
1196	bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1);
1197	bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
1198	bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
1199	bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
1200	CP(*ki, *ki32, ki_tracer);
1201	CP(*ki, *ki32, ki_flag2);
1202	CP(*ki, *ki32, ki_fibnum);
1203	CP(*ki, *ki32, ki_cr_flags);
1204	CP(*ki, *ki32, ki_jid);
1205	CP(*ki, *ki32, ki_numthreads);
1206	CP(*ki, *ki32, ki_tid);
1207	CP(*ki, *ki32, ki_pri);
1208	freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage);
1209	freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch);
1210	PTRTRIM_CP(*ki, *ki32, ki_pcb);
1211	PTRTRIM_CP(*ki, *ki32, ki_kstack);
1212	PTRTRIM_CP(*ki, *ki32, ki_udata);
1213	CP(*ki, *ki32, ki_sflag);
1214	CP(*ki, *ki32, ki_tdflags);
1215}
1216#endif
1217
1218int
1219kern_proc_out(struct proc *p, struct sbuf *sb, int flags)
1220{
1221	struct thread *td;
1222	struct kinfo_proc ki;
1223#ifdef COMPAT_FREEBSD32
1224	struct kinfo_proc32 ki32;
1225#endif
1226	int error;
1227
1228	PROC_LOCK_ASSERT(p, MA_OWNED);
1229	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
1230
1231	error = 0;
1232	fill_kinfo_proc(p, &ki);
1233	if ((flags & KERN_PROC_NOTHREADS) != 0) {
1234#ifdef COMPAT_FREEBSD32
1235		if ((flags & KERN_PROC_MASK32) != 0) {
1236			freebsd32_kinfo_proc_out(&ki, &ki32);
1237			if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
1238				error = ENOMEM;
1239		} else
1240#endif
1241			if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
1242				error = ENOMEM;
1243	} else {
1244		FOREACH_THREAD_IN_PROC(p, td) {
1245			fill_kinfo_thread(td, &ki, 1);
1246#ifdef COMPAT_FREEBSD32
1247			if ((flags & KERN_PROC_MASK32) != 0) {
1248				freebsd32_kinfo_proc_out(&ki, &ki32);
1249				if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
1250					error = ENOMEM;
1251			} else
1252#endif
1253				if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
1254					error = ENOMEM;
1255			if (error != 0)
1256				break;
1257		}
1258	}
1259	PROC_UNLOCK(p);
1260	return (error);
1261}
1262
1263static int
1264sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags,
1265    int doingzomb)
1266{
1267	struct sbuf sb;
1268	struct kinfo_proc ki;
1269	struct proc *np;
1270	int error, error2;
1271	pid_t pid;
1272
1273	pid = p->p_pid;
1274	sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req);
1275	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1276	error = kern_proc_out(p, &sb, flags);
1277	error2 = sbuf_finish(&sb);
1278	sbuf_delete(&sb);
1279	if (error != 0)
1280		return (error);
1281	else if (error2 != 0)
1282		return (error2);
1283	if (doingzomb)
1284		np = zpfind(pid);
1285	else {
1286		if (pid == 0)
1287			return (0);
1288		np = pfind(pid);
1289	}
1290	if (np == NULL)
1291		return (ESRCH);
1292	if (np != p) {
1293		PROC_UNLOCK(np);
1294		return (ESRCH);
1295	}
1296	PROC_UNLOCK(np);
1297	return (0);
1298}
1299
1300static int
1301sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
1302{
1303	int *name = (int *)arg1;
1304	u_int namelen = arg2;
1305	struct proc *p;
1306	int flags, doingzomb, oid_number;
1307	int error = 0;
1308
1309	oid_number = oidp->oid_number;
1310	if (oid_number != KERN_PROC_ALL &&
1311	    (oid_number & KERN_PROC_INC_THREAD) == 0)
1312		flags = KERN_PROC_NOTHREADS;
1313	else {
1314		flags = 0;
1315		oid_number &= ~KERN_PROC_INC_THREAD;
1316	}
1317#ifdef COMPAT_FREEBSD32
1318	if (req->flags & SCTL_MASK32)
1319		flags |= KERN_PROC_MASK32;
1320#endif
1321	if (oid_number == KERN_PROC_PID) {
1322		if (namelen != 1)
1323			return (EINVAL);
1324		error = sysctl_wire_old_buffer(req, 0);
1325		if (error)
1326			return (error);
1327		sx_slock(&proctree_lock);
1328		error = pget((pid_t)name[0], PGET_CANSEE, &p);
1329		if (error == 0)
1330			error = sysctl_out_proc(p, req, flags, 0);
1331		sx_sunlock(&proctree_lock);
1332		return (error);
1333	}
1334
1335	switch (oid_number) {
1336	case KERN_PROC_ALL:
1337		if (namelen != 0)
1338			return (EINVAL);
1339		break;
1340	case KERN_PROC_PROC:
1341		if (namelen != 0 && namelen != 1)
1342			return (EINVAL);
1343		break;
1344	default:
1345		if (namelen != 1)
1346			return (EINVAL);
1347		break;
1348	}
1349
1350	if (!req->oldptr) {
1351		/* overestimate by 5 procs */
1352		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
1353		if (error)
1354			return (error);
1355	}
1356	error = sysctl_wire_old_buffer(req, 0);
1357	if (error != 0)
1358		return (error);
1359	sx_slock(&proctree_lock);
1360	sx_slock(&allproc_lock);
1361	for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) {
1362		if (!doingzomb)
1363			p = LIST_FIRST(&allproc);
1364		else
1365			p = LIST_FIRST(&zombproc);
1366		for (; p != 0; p = LIST_NEXT(p, p_list)) {
1367			/*
1368			 * Skip embryonic processes.
1369			 */
1370			PROC_LOCK(p);
1371			if (p->p_state == PRS_NEW) {
1372				PROC_UNLOCK(p);
1373				continue;
1374			}
1375			KASSERT(p->p_ucred != NULL,
1376			    ("process credential is NULL for non-NEW proc"));
1377			/*
1378			 * Show a user only appropriate processes.
1379			 */
1380			if (p_cansee(curthread, p)) {
1381				PROC_UNLOCK(p);
1382				continue;
1383			}
1384			/*
1385			 * TODO - make more efficient (see notes below).
1386			 * do by session.
1387			 */
1388			switch (oid_number) {
1389
1390			case KERN_PROC_GID:
1391				if (p->p_ucred->cr_gid != (gid_t)name[0]) {
1392					PROC_UNLOCK(p);
1393					continue;
1394				}
1395				break;
1396
1397			case KERN_PROC_PGRP:
1398				/* could do this by traversing pgrp */
1399				if (p->p_pgrp == NULL ||
1400				    p->p_pgrp->pg_id != (pid_t)name[0]) {
1401					PROC_UNLOCK(p);
1402					continue;
1403				}
1404				break;
1405
1406			case KERN_PROC_RGID:
1407				if (p->p_ucred->cr_rgid != (gid_t)name[0]) {
1408					PROC_UNLOCK(p);
1409					continue;
1410				}
1411				break;
1412
1413			case KERN_PROC_SESSION:
1414				if (p->p_session == NULL ||
1415				    p->p_session->s_sid != (pid_t)name[0]) {
1416					PROC_UNLOCK(p);
1417					continue;
1418				}
1419				break;
1420
1421			case KERN_PROC_TTY:
1422				if ((p->p_flag & P_CONTROLT) == 0 ||
1423				    p->p_session == NULL) {
1424					PROC_UNLOCK(p);
1425					continue;
1426				}
1427				/* XXX proctree_lock */
1428				SESS_LOCK(p->p_session);
1429				if (p->p_session->s_ttyp == NULL ||
1430				    tty_udev(p->p_session->s_ttyp) !=
1431				    (dev_t)name[0]) {
1432					SESS_UNLOCK(p->p_session);
1433					PROC_UNLOCK(p);
1434					continue;
1435				}
1436				SESS_UNLOCK(p->p_session);
1437				break;
1438
1439			case KERN_PROC_UID:
1440				if (p->p_ucred->cr_uid != (uid_t)name[0]) {
1441					PROC_UNLOCK(p);
1442					continue;
1443				}
1444				break;
1445
1446			case KERN_PROC_RUID:
1447				if (p->p_ucred->cr_ruid != (uid_t)name[0]) {
1448					PROC_UNLOCK(p);
1449					continue;
1450				}
1451				break;
1452
1453			case KERN_PROC_PROC:
1454				break;
1455
1456			default:
1457				break;
1458
1459			}
1460
1461			error = sysctl_out_proc(p, req, flags, doingzomb);
1462			if (error) {
1463				sx_sunlock(&allproc_lock);
1464				sx_sunlock(&proctree_lock);
1465				return (error);
1466			}
1467		}
1468	}
1469	sx_sunlock(&allproc_lock);
1470	sx_sunlock(&proctree_lock);
1471	return (0);
1472}
1473
1474struct pargs *
1475pargs_alloc(int len)
1476{
1477	struct pargs *pa;
1478
1479	pa = malloc(sizeof(struct pargs) + len, M_PARGS,
1480		M_WAITOK);
1481	refcount_init(&pa->ar_ref, 1);
1482	pa->ar_length = len;
1483	return (pa);
1484}
1485
1486static void
1487pargs_free(struct pargs *pa)
1488{
1489
1490	free(pa, M_PARGS);
1491}
1492
1493void
1494pargs_hold(struct pargs *pa)
1495{
1496
1497	if (pa == NULL)
1498		return;
1499	refcount_acquire(&pa->ar_ref);
1500}
1501
1502void
1503pargs_drop(struct pargs *pa)
1504{
1505
1506	if (pa == NULL)
1507		return;
1508	if (refcount_release(&pa->ar_ref))
1509		pargs_free(pa);
1510}
1511
1512static int
1513proc_read_mem(struct thread *td, struct proc *p, vm_offset_t offset, void* buf,
1514    size_t len)
1515{
1516	struct iovec iov;
1517	struct uio uio;
1518
1519	iov.iov_base = (caddr_t)buf;
1520	iov.iov_len = len;
1521	uio.uio_iov = &iov;
1522	uio.uio_iovcnt = 1;
1523	uio.uio_offset = offset;
1524	uio.uio_resid = (ssize_t)len;
1525	uio.uio_segflg = UIO_SYSSPACE;
1526	uio.uio_rw = UIO_READ;
1527	uio.uio_td = td;
1528
1529	return (proc_rwmem(p, &uio));
1530}
1531
1532static int
1533proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf,
1534    size_t len)
1535{
1536	size_t i;
1537	int error;
1538
1539	error = proc_read_mem(td, p, (vm_offset_t)sptr, buf, len);
1540	/*
1541	 * Reading the chunk may validly return EFAULT if the string is shorter
1542	 * than the chunk and is aligned at the end of the page, assuming the
1543	 * next page is not mapped.  So if EFAULT is returned do a fallback to
1544	 * one byte read loop.
1545	 */
1546	if (error == EFAULT) {
1547		for (i = 0; i < len; i++, buf++, sptr++) {
1548			error = proc_read_mem(td, p, (vm_offset_t)sptr, buf, 1);
1549			if (error != 0)
1550				return (error);
1551			if (*buf == '\0')
1552				break;
1553		}
1554		error = 0;
1555	}
1556	return (error);
1557}
1558
1559#define PROC_AUXV_MAX	256	/* Safety limit on auxv size. */
1560
1561enum proc_vector_type {
1562	PROC_ARG,
1563	PROC_ENV,
1564	PROC_AUX,
1565};
1566
1567#ifdef COMPAT_FREEBSD32
1568static int
1569get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp,
1570    size_t *vsizep, enum proc_vector_type type)
1571{
1572	struct freebsd32_ps_strings pss;
1573	Elf32_Auxinfo aux;
1574	vm_offset_t vptr, ptr;
1575	uint32_t *proc_vector32;
1576	char **proc_vector;
1577	size_t vsize, size;
1578	int i, error;
1579
1580	error = proc_read_mem(td, p, (vm_offset_t)(p->p_sysent->sv_psstrings),
1581	    &pss, sizeof(pss));
1582	if (error != 0)
1583		return (error);
1584	switch (type) {
1585	case PROC_ARG:
1586		vptr = (vm_offset_t)PTRIN(pss.ps_argvstr);
1587		vsize = pss.ps_nargvstr;
1588		if (vsize > ARG_MAX)
1589			return (ENOEXEC);
1590		size = vsize * sizeof(int32_t);
1591		break;
1592	case PROC_ENV:
1593		vptr = (vm_offset_t)PTRIN(pss.ps_envstr);
1594		vsize = pss.ps_nenvstr;
1595		if (vsize > ARG_MAX)
1596			return (ENOEXEC);
1597		size = vsize * sizeof(int32_t);
1598		break;
1599	case PROC_AUX:
1600		vptr = (vm_offset_t)PTRIN(pss.ps_envstr) +
1601		    (pss.ps_nenvstr + 1) * sizeof(int32_t);
1602		if (vptr % 4 != 0)
1603			return (ENOEXEC);
1604		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
1605			error = proc_read_mem(td, p, ptr, &aux, sizeof(aux));
1606			if (error != 0)
1607				return (error);
1608			if (aux.a_type == AT_NULL)
1609				break;
1610			ptr += sizeof(aux);
1611		}
1612		if (aux.a_type != AT_NULL)
1613			return (ENOEXEC);
1614		vsize = i + 1;
1615		size = vsize * sizeof(aux);
1616		break;
1617	default:
1618		KASSERT(0, ("Wrong proc vector type: %d", type));
1619		return (EINVAL);
1620	}
1621	proc_vector32 = malloc(size, M_TEMP, M_WAITOK);
1622	error = proc_read_mem(td, p, vptr, proc_vector32, size);
1623	if (error != 0)
1624		goto done;
1625	if (type == PROC_AUX) {
1626		*proc_vectorp = (char **)proc_vector32;
1627		*vsizep = vsize;
1628		return (0);
1629	}
1630	proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK);
1631	for (i = 0; i < (int)vsize; i++)
1632		proc_vector[i] = PTRIN(proc_vector32[i]);
1633	*proc_vectorp = proc_vector;
1634	*vsizep = vsize;
1635done:
1636	free(proc_vector32, M_TEMP);
1637	return (error);
1638}
1639#endif
1640
1641static int
1642get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp,
1643    size_t *vsizep, enum proc_vector_type type)
1644{
1645	struct ps_strings pss;
1646	Elf_Auxinfo aux;
1647	vm_offset_t vptr, ptr;
1648	char **proc_vector;
1649	size_t vsize, size;
1650	int error, i;
1651
1652#ifdef COMPAT_FREEBSD32
1653	if (SV_PROC_FLAG(p, SV_ILP32) != 0)
1654		return (get_proc_vector32(td, p, proc_vectorp, vsizep, type));
1655#endif
1656	error = proc_read_mem(td, p, (vm_offset_t)(p->p_sysent->sv_psstrings),
1657	    &pss, sizeof(pss));
1658	if (error != 0)
1659		return (error);
1660	switch (type) {
1661	case PROC_ARG:
1662		vptr = (vm_offset_t)pss.ps_argvstr;
1663		vsize = pss.ps_nargvstr;
1664		if (vsize > ARG_MAX)
1665			return (ENOEXEC);
1666		size = vsize * sizeof(char *);
1667		break;
1668	case PROC_ENV:
1669		vptr = (vm_offset_t)pss.ps_envstr;
1670		vsize = pss.ps_nenvstr;
1671		if (vsize > ARG_MAX)
1672			return (ENOEXEC);
1673		size = vsize * sizeof(char *);
1674		break;
1675	case PROC_AUX:
1676		/*
1677		 * The aux array is just above env array on the stack. Check
1678		 * that the address is naturally aligned.
1679		 */
1680		vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1)
1681		    * sizeof(char *);
1682#if __ELF_WORD_SIZE == 64
1683		if (vptr % sizeof(uint64_t) != 0)
1684#else
1685		if (vptr % sizeof(uint32_t) != 0)
1686#endif
1687			return (ENOEXEC);
1688		/*
1689		 * We count the array size reading the aux vectors from the
1690		 * stack until AT_NULL vector is returned.  So (to keep the code
1691		 * simple) we read the process stack twice: the first time here
1692		 * to find the size and the second time when copying the vectors
1693		 * to the allocated proc_vector.
1694		 */
1695		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
1696			error = proc_read_mem(td, p, ptr, &aux, sizeof(aux));
1697			if (error != 0)
1698				return (error);
1699			if (aux.a_type == AT_NULL)
1700				break;
1701			ptr += sizeof(aux);
1702		}
1703		/*
1704		 * If the PROC_AUXV_MAX entries are iterated over, and we have
1705		 * not reached AT_NULL, it is most likely we are reading wrong
1706		 * data: either the process doesn't have auxv array or data has
1707		 * been modified. Return the error in this case.
1708		 */
1709		if (aux.a_type != AT_NULL)
1710			return (ENOEXEC);
1711		vsize = i + 1;
1712		size = vsize * sizeof(aux);
1713		break;
1714	default:
1715		KASSERT(0, ("Wrong proc vector type: %d", type));
1716		return (EINVAL); /* In case we are built without INVARIANTS. */
1717	}
1718	proc_vector = malloc(size, M_TEMP, M_WAITOK);
1719	if (proc_vector == NULL)
1720		return (ENOMEM);
1721	error = proc_read_mem(td, p, vptr, proc_vector, size);
1722	if (error != 0) {
1723		free(proc_vector, M_TEMP);
1724		return (error);
1725	}
1726	*proc_vectorp = proc_vector;
1727	*vsizep = vsize;
1728
1729	return (0);
1730}
1731
1732#define GET_PS_STRINGS_CHUNK_SZ	256	/* Chunk size (bytes) for ps_strings operations. */
1733
1734static int
1735get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb,
1736    enum proc_vector_type type)
1737{
1738	size_t done, len, nchr, vsize;
1739	int error, i;
1740	char **proc_vector, *sptr;
1741	char pss_string[GET_PS_STRINGS_CHUNK_SZ];
1742
1743	PROC_ASSERT_HELD(p);
1744
1745	/*
1746	 * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes.
1747	 */
1748	nchr = 2 * (PATH_MAX + ARG_MAX);
1749
1750	error = get_proc_vector(td, p, &proc_vector, &vsize, type);
1751	if (error != 0)
1752		return (error);
1753	for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) {
1754		/*
1755		 * The program may have scribbled into its argv array, e.g. to
1756		 * remove some arguments.  If that has happened, break out
1757		 * before trying to read from NULL.
1758		 */
1759		if (proc_vector[i] == NULL)
1760			break;
1761		for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) {
1762			error = proc_read_string(td, p, sptr, pss_string,
1763			    sizeof(pss_string));
1764			if (error != 0)
1765				goto done;
1766			len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ);
1767			if (done + len >= nchr)
1768				len = nchr - done - 1;
1769			sbuf_bcat(sb, pss_string, len);
1770			if (len != GET_PS_STRINGS_CHUNK_SZ)
1771				break;
1772			done += GET_PS_STRINGS_CHUNK_SZ;
1773		}
1774		sbuf_bcat(sb, "", 1);
1775		done += len + 1;
1776	}
1777done:
1778	free(proc_vector, M_TEMP);
1779	return (error);
1780}
1781
1782int
1783proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb)
1784{
1785
1786	return (get_ps_strings(curthread, p, sb, PROC_ARG));
1787}
1788
1789int
1790proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb)
1791{
1792
1793	return (get_ps_strings(curthread, p, sb, PROC_ENV));
1794}
1795
1796int
1797proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb)
1798{
1799	size_t vsize, size;
1800	char **auxv;
1801	int error;
1802
1803	error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX);
1804	if (error == 0) {
1805#ifdef COMPAT_FREEBSD32
1806		if (SV_PROC_FLAG(p, SV_ILP32) != 0)
1807			size = vsize * sizeof(Elf32_Auxinfo);
1808		else
1809#endif
1810			size = vsize * sizeof(Elf_Auxinfo);
1811		if (sbuf_bcat(sb, auxv, size) != 0)
1812			error = ENOMEM;
1813		free(auxv, M_TEMP);
1814	}
1815	return (error);
1816}
1817
1818/*
1819 * This sysctl allows a process to retrieve the argument list or process
1820 * title for another process without groping around in the address space
1821 * of the other process.  It also allow a process to set its own "process
1822 * title to a string of its own choice.
1823 */
1824static int
1825sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
1826{
1827	int *name = (int *)arg1;
1828	u_int namelen = arg2;
1829	struct pargs *newpa, *pa;
1830	struct proc *p;
1831	struct sbuf sb;
1832	int flags, error = 0, error2;
1833
1834	if (namelen != 1)
1835		return (EINVAL);
1836
1837	flags = PGET_CANSEE;
1838	if (req->newptr != NULL)
1839		flags |= PGET_ISCURRENT;
1840	error = pget((pid_t)name[0], flags, &p);
1841	if (error)
1842		return (error);
1843
1844	pa = p->p_args;
1845	if (pa != NULL) {
1846		pargs_hold(pa);
1847		PROC_UNLOCK(p);
1848		error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
1849		pargs_drop(pa);
1850	} else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) {
1851		_PHOLD(p);
1852		PROC_UNLOCK(p);
1853		sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
1854		sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1855		error = proc_getargv(curthread, p, &sb);
1856		error2 = sbuf_finish(&sb);
1857		PRELE(p);
1858		sbuf_delete(&sb);
1859		if (error == 0 && error2 != 0)
1860			error = error2;
1861	} else {
1862		PROC_UNLOCK(p);
1863	}
1864	if (error != 0 || req->newptr == NULL)
1865		return (error);
1866
1867	if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit)
1868		return (ENOMEM);
1869	newpa = pargs_alloc(req->newlen);
1870	error = SYSCTL_IN(req, newpa->ar_args, req->newlen);
1871	if (error != 0) {
1872		pargs_free(newpa);
1873		return (error);
1874	}
1875	PROC_LOCK(p);
1876	pa = p->p_args;
1877	p->p_args = newpa;
1878	PROC_UNLOCK(p);
1879	pargs_drop(pa);
1880	return (0);
1881}
1882
1883/*
1884 * This sysctl allows a process to retrieve environment of another process.
1885 */
1886static int
1887sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS)
1888{
1889	int *name = (int *)arg1;
1890	u_int namelen = arg2;
1891	struct proc *p;
1892	struct sbuf sb;
1893	int error, error2;
1894
1895	if (namelen != 1)
1896		return (EINVAL);
1897
1898	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
1899	if (error != 0)
1900		return (error);
1901	if ((p->p_flag & P_SYSTEM) != 0) {
1902		PRELE(p);
1903		return (0);
1904	}
1905
1906	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
1907	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1908	error = proc_getenvv(curthread, p, &sb);
1909	error2 = sbuf_finish(&sb);
1910	PRELE(p);
1911	sbuf_delete(&sb);
1912	return (error != 0 ? error : error2);
1913}
1914
1915/*
1916 * This sysctl allows a process to retrieve ELF auxiliary vector of
1917 * another process.
1918 */
1919static int
1920sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS)
1921{
1922	int *name = (int *)arg1;
1923	u_int namelen = arg2;
1924	struct proc *p;
1925	struct sbuf sb;
1926	int error, error2;
1927
1928	if (namelen != 1)
1929		return (EINVAL);
1930
1931	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
1932	if (error != 0)
1933		return (error);
1934	if ((p->p_flag & P_SYSTEM) != 0) {
1935		PRELE(p);
1936		return (0);
1937	}
1938	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
1939	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
1940	error = proc_getauxv(curthread, p, &sb);
1941	error2 = sbuf_finish(&sb);
1942	PRELE(p);
1943	sbuf_delete(&sb);
1944	return (error != 0 ? error : error2);
1945}
1946
1947/*
1948 * This sysctl allows a process to retrieve the path of the executable for
1949 * itself or another process.
1950 */
1951static int
1952sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS)
1953{
1954	pid_t *pidp = (pid_t *)arg1;
1955	unsigned int arglen = arg2;
1956	struct proc *p;
1957	struct vnode *vp;
1958	char *retbuf, *freebuf;
1959	int error;
1960
1961	if (arglen != 1)
1962		return (EINVAL);
1963	if (*pidp == -1) {	/* -1 means this process */
1964		p = req->td->td_proc;
1965	} else {
1966		error = pget(*pidp, PGET_CANSEE, &p);
1967		if (error != 0)
1968			return (error);
1969	}
1970
1971	vp = p->p_textvp;
1972	if (vp == NULL) {
1973		if (*pidp != -1)
1974			PROC_UNLOCK(p);
1975		return (0);
1976	}
1977	vref(vp);
1978	if (*pidp != -1)
1979		PROC_UNLOCK(p);
1980	error = vn_fullpath(req->td, vp, &retbuf, &freebuf);
1981	vrele(vp);
1982	if (error)
1983		return (error);
1984	error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1);
1985	free(freebuf, M_TEMP);
1986	return (error);
1987}
1988
1989static int
1990sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS)
1991{
1992	struct proc *p;
1993	char *sv_name;
1994	int *name;
1995	int namelen;
1996	int error;
1997
1998	namelen = arg2;
1999	if (namelen != 1)
2000		return (EINVAL);
2001
2002	name = (int *)arg1;
2003	error = pget((pid_t)name[0], PGET_CANSEE, &p);
2004	if (error != 0)
2005		return (error);
2006	sv_name = p->p_sysent->sv_name;
2007	PROC_UNLOCK(p);
2008	return (sysctl_handle_string(oidp, sv_name, 0, req));
2009}
2010
2011#ifdef KINFO_OVMENTRY_SIZE
2012CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE);
2013#endif
2014
2015#ifdef COMPAT_FREEBSD7
2016static int
2017sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS)
2018{
2019	vm_map_entry_t entry, tmp_entry;
2020	unsigned int last_timestamp;
2021	char *fullpath, *freepath;
2022	struct kinfo_ovmentry *kve;
2023	struct vattr va;
2024	struct ucred *cred;
2025	int error, *name;
2026	struct vnode *vp;
2027	struct proc *p;
2028	vm_map_t map;
2029	struct vmspace *vm;
2030
2031	name = (int *)arg1;
2032	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2033	if (error != 0)
2034		return (error);
2035	vm = vmspace_acquire_ref(p);
2036	if (vm == NULL) {
2037		PRELE(p);
2038		return (ESRCH);
2039	}
2040	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
2041
2042	map = &vm->vm_map;
2043	vm_map_lock_read(map);
2044	for (entry = map->header.next; entry != &map->header;
2045	    entry = entry->next) {
2046		vm_object_t obj, tobj, lobj;
2047		vm_offset_t addr;
2048
2049		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2050			continue;
2051
2052		bzero(kve, sizeof(*kve));
2053		kve->kve_structsize = sizeof(*kve);
2054
2055		kve->kve_private_resident = 0;
2056		obj = entry->object.vm_object;
2057		if (obj != NULL) {
2058			VM_OBJECT_RLOCK(obj);
2059			if (obj->shadow_count == 1)
2060				kve->kve_private_resident =
2061				    obj->resident_page_count;
2062		}
2063		kve->kve_resident = 0;
2064		addr = entry->start;
2065		while (addr < entry->end) {
2066			if (pmap_extract(map->pmap, addr))
2067				kve->kve_resident++;
2068			addr += PAGE_SIZE;
2069		}
2070
2071		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
2072			if (tobj != obj)
2073				VM_OBJECT_RLOCK(tobj);
2074			if (lobj != obj)
2075				VM_OBJECT_RUNLOCK(lobj);
2076			lobj = tobj;
2077		}
2078
2079		kve->kve_start = (void*)entry->start;
2080		kve->kve_end = (void*)entry->end;
2081		kve->kve_offset = (off_t)entry->offset;
2082
2083		if (entry->protection & VM_PROT_READ)
2084			kve->kve_protection |= KVME_PROT_READ;
2085		if (entry->protection & VM_PROT_WRITE)
2086			kve->kve_protection |= KVME_PROT_WRITE;
2087		if (entry->protection & VM_PROT_EXECUTE)
2088			kve->kve_protection |= KVME_PROT_EXEC;
2089
2090		if (entry->eflags & MAP_ENTRY_COW)
2091			kve->kve_flags |= KVME_FLAG_COW;
2092		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
2093			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
2094		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
2095			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
2096
2097		last_timestamp = map->timestamp;
2098		vm_map_unlock_read(map);
2099
2100		kve->kve_fileid = 0;
2101		kve->kve_fsid = 0;
2102		freepath = NULL;
2103		fullpath = "";
2104		if (lobj) {
2105			vp = NULL;
2106			switch (lobj->type) {
2107			case OBJT_DEFAULT:
2108				kve->kve_type = KVME_TYPE_DEFAULT;
2109				break;
2110			case OBJT_VNODE:
2111				kve->kve_type = KVME_TYPE_VNODE;
2112				vp = lobj->handle;
2113				vref(vp);
2114				break;
2115			case OBJT_SWAP:
2116				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
2117					kve->kve_type = KVME_TYPE_VNODE;
2118					if ((lobj->flags & OBJ_TMPFS) != 0) {
2119						vp = lobj->un_pager.swp.swp_tmpfs;
2120						vref(vp);
2121					}
2122				} else {
2123					kve->kve_type = KVME_TYPE_SWAP;
2124				}
2125				break;
2126			case OBJT_DEVICE:
2127				kve->kve_type = KVME_TYPE_DEVICE;
2128				break;
2129			case OBJT_PHYS:
2130				kve->kve_type = KVME_TYPE_PHYS;
2131				break;
2132			case OBJT_DEAD:
2133				kve->kve_type = KVME_TYPE_DEAD;
2134				break;
2135			case OBJT_SG:
2136				kve->kve_type = KVME_TYPE_SG;
2137				break;
2138			default:
2139				kve->kve_type = KVME_TYPE_UNKNOWN;
2140				break;
2141			}
2142			if (lobj != obj)
2143				VM_OBJECT_RUNLOCK(lobj);
2144
2145			kve->kve_ref_count = obj->ref_count;
2146			kve->kve_shadow_count = obj->shadow_count;
2147			VM_OBJECT_RUNLOCK(obj);
2148			if (vp != NULL) {
2149				vn_fullpath(curthread, vp, &fullpath,
2150				    &freepath);
2151				cred = curthread->td_ucred;
2152				vn_lock(vp, LK_SHARED | LK_RETRY);
2153				if (VOP_GETATTR(vp, &va, cred) == 0) {
2154					kve->kve_fileid = va.va_fileid;
2155					kve->kve_fsid = va.va_fsid;
2156				}
2157				vput(vp);
2158			}
2159		} else {
2160			kve->kve_type = KVME_TYPE_NONE;
2161			kve->kve_ref_count = 0;
2162			kve->kve_shadow_count = 0;
2163		}
2164
2165		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
2166		if (freepath != NULL)
2167			free(freepath, M_TEMP);
2168
2169		error = SYSCTL_OUT(req, kve, sizeof(*kve));
2170		vm_map_lock_read(map);
2171		if (error)
2172			break;
2173		if (last_timestamp != map->timestamp) {
2174			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
2175			entry = tmp_entry;
2176		}
2177	}
2178	vm_map_unlock_read(map);
2179	vmspace_free(vm);
2180	PRELE(p);
2181	free(kve, M_TEMP);
2182	return (error);
2183}
2184#endif	/* COMPAT_FREEBSD7 */
2185
2186#ifdef KINFO_VMENTRY_SIZE
2187CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
2188#endif
2189
2190static void
2191kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
2192    struct kinfo_vmentry *kve)
2193{
2194	vm_object_t obj, tobj;
2195	vm_page_t m, m_adv;
2196	vm_offset_t addr;
2197	vm_paddr_t locked_pa;
2198	vm_pindex_t pi, pi_adv, pindex;
2199
2200	locked_pa = 0;
2201	obj = entry->object.vm_object;
2202	addr = entry->start;
2203	m_adv = NULL;
2204	pi = OFF_TO_IDX(entry->offset);
2205	for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) {
2206		if (m_adv != NULL) {
2207			m = m_adv;
2208		} else {
2209			pi_adv = OFF_TO_IDX(entry->end - addr);
2210			pindex = pi;
2211			for (tobj = obj;; tobj = tobj->backing_object) {
2212				m = vm_page_find_least(tobj, pindex);
2213				if (m != NULL) {
2214					if (m->pindex == pindex)
2215						break;
2216					if (pi_adv > m->pindex - pindex) {
2217						pi_adv = m->pindex - pindex;
2218						m_adv = m;
2219					}
2220				}
2221				if (tobj->backing_object == NULL)
2222					goto next;
2223				pindex += OFF_TO_IDX(tobj->
2224				    backing_object_offset);
2225			}
2226		}
2227		m_adv = NULL;
2228		if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
2229		    (addr & (pagesizes[1] - 1)) == 0 &&
2230		    (pmap_mincore(map->pmap, addr, &locked_pa) &
2231		    MINCORE_SUPER) != 0) {
2232			kve->kve_flags |= KVME_FLAG_SUPER;
2233			pi_adv = OFF_TO_IDX(pagesizes[1]);
2234		} else {
2235			/*
2236			 * We do not test the found page on validity.
2237			 * Either the page is busy and being paged in,
2238			 * or it was invalidated.  The first case
2239			 * should be counted as resident, the second
2240			 * is not so clear; we do account both.
2241			 */
2242			pi_adv = 1;
2243		}
2244		kve->kve_resident += pi_adv;
2245next:;
2246	}
2247	PA_UNLOCK_COND(locked_pa);
2248}
2249
2250/*
2251 * Must be called with the process locked and will return unlocked.
2252 */
2253int
2254kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
2255{
2256	vm_map_entry_t entry, tmp_entry;
2257	struct vattr va;
2258	vm_map_t map;
2259	vm_object_t obj, tobj, lobj;
2260	char *fullpath, *freepath;
2261	struct kinfo_vmentry *kve;
2262	struct ucred *cred;
2263	struct vnode *vp;
2264	struct vmspace *vm;
2265	vm_offset_t addr;
2266	unsigned int last_timestamp;
2267	int error;
2268
2269	PROC_LOCK_ASSERT(p, MA_OWNED);
2270
2271	_PHOLD(p);
2272	PROC_UNLOCK(p);
2273	vm = vmspace_acquire_ref(p);
2274	if (vm == NULL) {
2275		PRELE(p);
2276		return (ESRCH);
2277	}
2278	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
2279
2280	error = 0;
2281	map = &vm->vm_map;
2282	vm_map_lock_read(map);
2283	for (entry = map->header.next; entry != &map->header;
2284	    entry = entry->next) {
2285		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
2286			continue;
2287
2288		addr = entry->end;
2289		bzero(kve, sizeof(*kve));
2290		obj = entry->object.vm_object;
2291		if (obj != NULL) {
2292			for (tobj = obj; tobj != NULL;
2293			    tobj = tobj->backing_object) {
2294				VM_OBJECT_RLOCK(tobj);
2295				lobj = tobj;
2296			}
2297			if (obj->backing_object == NULL)
2298				kve->kve_private_resident =
2299				    obj->resident_page_count;
2300			if (!vmmap_skip_res_cnt)
2301				kern_proc_vmmap_resident(map, entry, kve);
2302			for (tobj = obj; tobj != NULL;
2303			    tobj = tobj->backing_object) {
2304				if (tobj != obj && tobj != lobj)
2305					VM_OBJECT_RUNLOCK(tobj);
2306			}
2307		} else {
2308			lobj = NULL;
2309		}
2310
2311		kve->kve_start = entry->start;
2312		kve->kve_end = entry->end;
2313		kve->kve_offset = entry->offset;
2314
2315		if (entry->protection & VM_PROT_READ)
2316			kve->kve_protection |= KVME_PROT_READ;
2317		if (entry->protection & VM_PROT_WRITE)
2318			kve->kve_protection |= KVME_PROT_WRITE;
2319		if (entry->protection & VM_PROT_EXECUTE)
2320			kve->kve_protection |= KVME_PROT_EXEC;
2321
2322		if (entry->eflags & MAP_ENTRY_COW)
2323			kve->kve_flags |= KVME_FLAG_COW;
2324		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
2325			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
2326		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
2327			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
2328		if (entry->eflags & MAP_ENTRY_GROWS_UP)
2329			kve->kve_flags |= KVME_FLAG_GROWS_UP;
2330		if (entry->eflags & MAP_ENTRY_GROWS_DOWN)
2331			kve->kve_flags |= KVME_FLAG_GROWS_DOWN;
2332
2333		last_timestamp = map->timestamp;
2334		vm_map_unlock_read(map);
2335
2336		freepath = NULL;
2337		fullpath = "";
2338		if (lobj != NULL) {
2339			vp = NULL;
2340			switch (lobj->type) {
2341			case OBJT_DEFAULT:
2342				kve->kve_type = KVME_TYPE_DEFAULT;
2343				break;
2344			case OBJT_VNODE:
2345				kve->kve_type = KVME_TYPE_VNODE;
2346				vp = lobj->handle;
2347				vref(vp);
2348				break;
2349			case OBJT_SWAP:
2350				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
2351					kve->kve_type = KVME_TYPE_VNODE;
2352					if ((lobj->flags & OBJ_TMPFS) != 0) {
2353						vp = lobj->un_pager.swp.swp_tmpfs;
2354						vref(vp);
2355					}
2356				} else {
2357					kve->kve_type = KVME_TYPE_SWAP;
2358				}
2359				break;
2360			case OBJT_DEVICE:
2361				kve->kve_type = KVME_TYPE_DEVICE;
2362				break;
2363			case OBJT_PHYS:
2364				kve->kve_type = KVME_TYPE_PHYS;
2365				break;
2366			case OBJT_DEAD:
2367				kve->kve_type = KVME_TYPE_DEAD;
2368				break;
2369			case OBJT_SG:
2370				kve->kve_type = KVME_TYPE_SG;
2371				break;
2372			case OBJT_MGTDEVICE:
2373				kve->kve_type = KVME_TYPE_MGTDEVICE;
2374				break;
2375			default:
2376				kve->kve_type = KVME_TYPE_UNKNOWN;
2377				break;
2378			}
2379			if (lobj != obj)
2380				VM_OBJECT_RUNLOCK(lobj);
2381
2382			kve->kve_ref_count = obj->ref_count;
2383			kve->kve_shadow_count = obj->shadow_count;
2384			VM_OBJECT_RUNLOCK(obj);
2385			if (vp != NULL) {
2386				vn_fullpath(curthread, vp, &fullpath,
2387				    &freepath);
2388				kve->kve_vn_type = vntype_to_kinfo(vp->v_type);
2389				cred = curthread->td_ucred;
2390				vn_lock(vp, LK_SHARED | LK_RETRY);
2391				if (VOP_GETATTR(vp, &va, cred) == 0) {
2392					kve->kve_vn_fileid = va.va_fileid;
2393					kve->kve_vn_fsid = va.va_fsid;
2394					kve->kve_vn_mode =
2395					    MAKEIMODE(va.va_type, va.va_mode);
2396					kve->kve_vn_size = va.va_size;
2397					kve->kve_vn_rdev = va.va_rdev;
2398					kve->kve_status = KF_ATTR_VALID;
2399				}
2400				vput(vp);
2401			}
2402		} else {
2403			kve->kve_type = KVME_TYPE_NONE;
2404			kve->kve_ref_count = 0;
2405			kve->kve_shadow_count = 0;
2406		}
2407
2408		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
2409		if (freepath != NULL)
2410			free(freepath, M_TEMP);
2411
2412		/* Pack record size down */
2413		kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) +
2414		    strlen(kve->kve_path) + 1;
2415		kve->kve_structsize = roundup(kve->kve_structsize,
2416		    sizeof(uint64_t));
2417		if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
2418			error = ENOMEM;
2419		vm_map_lock_read(map);
2420		if (error != 0)
2421			break;
2422		if (last_timestamp != map->timestamp) {
2423			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
2424			entry = tmp_entry;
2425		}
2426	}
2427	vm_map_unlock_read(map);
2428	vmspace_free(vm);
2429	PRELE(p);
2430	free(kve, M_TEMP);
2431	return (error);
2432}
2433
2434static int
2435sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
2436{
2437	struct proc *p;
2438	struct sbuf sb;
2439	int error, error2, *name;
2440
2441	name = (int *)arg1;
2442	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req);
2443	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
2444	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
2445	if (error != 0) {
2446		sbuf_delete(&sb);
2447		return (error);
2448	}
2449	error = kern_proc_vmmap_out(p, &sb);
2450	error2 = sbuf_finish(&sb);
2451	sbuf_delete(&sb);
2452	return (error != 0 ? error : error2);
2453}
2454
2455#if defined(STACK) || defined(DDB)
2456static int
2457sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS)
2458{
2459	struct kinfo_kstack *kkstp;
2460	int error, i, *name, numthreads;
2461	lwpid_t *lwpidarray;
2462	struct thread *td;
2463	struct stack *st;
2464	struct sbuf sb;
2465	struct proc *p;
2466
2467	name = (int *)arg1;
2468	error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p);
2469	if (error != 0)
2470		return (error);
2471
2472	kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK);
2473	st = stack_create();
2474
2475	lwpidarray = NULL;
2476	numthreads = 0;
2477	PROC_LOCK(p);
2478repeat:
2479	if (numthreads < p->p_numthreads) {
2480		if (lwpidarray != NULL) {
2481			free(lwpidarray, M_TEMP);
2482			lwpidarray = NULL;
2483		}
2484		numthreads = p->p_numthreads;
2485		PROC_UNLOCK(p);
2486		lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP,
2487		    M_WAITOK | M_ZERO);
2488		PROC_LOCK(p);
2489		goto repeat;
2490	}
2491	i = 0;
2492
2493	/*
2494	 * XXXRW: During the below loop, execve(2) and countless other sorts
2495	 * of changes could have taken place.  Should we check to see if the
2496	 * vmspace has been replaced, or the like, in order to prevent
2497	 * giving a snapshot that spans, say, execve(2), with some threads
2498	 * before and some after?  Among other things, the credentials could
2499	 * have changed, in which case the right to extract debug info might
2500	 * no longer be assured.
2501	 */
2502	FOREACH_THREAD_IN_PROC(p, td) {
2503		KASSERT(i < numthreads,
2504		    ("sysctl_kern_proc_kstack: numthreads"));
2505		lwpidarray[i] = td->td_tid;
2506		i++;
2507	}
2508	numthreads = i;
2509	for (i = 0; i < numthreads; i++) {
2510		td = thread_find(p, lwpidarray[i]);
2511		if (td == NULL) {
2512			continue;
2513		}
2514		bzero(kkstp, sizeof(*kkstp));
2515		(void)sbuf_new(&sb, kkstp->kkst_trace,
2516		    sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN);
2517		thread_lock(td);
2518		kkstp->kkst_tid = td->td_tid;
2519		if (TD_IS_SWAPPED(td))
2520			kkstp->kkst_state = KKST_STATE_SWAPPED;
2521		else if (TD_IS_RUNNING(td))
2522			kkstp->kkst_state = KKST_STATE_RUNNING;
2523		else {
2524			kkstp->kkst_state = KKST_STATE_STACKOK;
2525			stack_save_td(st, td);
2526		}
2527		thread_unlock(td);
2528		PROC_UNLOCK(p);
2529		stack_sbuf_print(&sb, st);
2530		sbuf_finish(&sb);
2531		sbuf_delete(&sb);
2532		error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp));
2533		PROC_LOCK(p);
2534		if (error)
2535			break;
2536	}
2537	_PRELE(p);
2538	PROC_UNLOCK(p);
2539	if (lwpidarray != NULL)
2540		free(lwpidarray, M_TEMP);
2541	stack_destroy(st);
2542	free(kkstp, M_TEMP);
2543	return (error);
2544}
2545#endif
2546
2547/*
2548 * This sysctl allows a process to retrieve the full list of groups from
2549 * itself or another process.
2550 */
2551static int
2552sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS)
2553{
2554	pid_t *pidp = (pid_t *)arg1;
2555	unsigned int arglen = arg2;
2556	struct proc *p;
2557	struct ucred *cred;
2558	int error;
2559
2560	if (arglen != 1)
2561		return (EINVAL);
2562	if (*pidp == -1) {	/* -1 means this process */
2563		p = req->td->td_proc;
2564		PROC_LOCK(p);
2565	} else {
2566		error = pget(*pidp, PGET_CANSEE, &p);
2567		if (error != 0)
2568			return (error);
2569	}
2570
2571	cred = crhold(p->p_ucred);
2572	PROC_UNLOCK(p);
2573
2574	error = SYSCTL_OUT(req, cred->cr_groups,
2575	    cred->cr_ngroups * sizeof(gid_t));
2576	crfree(cred);
2577	return (error);
2578}
2579
2580/*
2581 * This sysctl allows a process to retrieve or/and set the resource limit for
2582 * another process.
2583 */
2584static int
2585sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS)
2586{
2587	int *name = (int *)arg1;
2588	u_int namelen = arg2;
2589	struct rlimit rlim;
2590	struct proc *p;
2591	u_int which;
2592	int flags, error;
2593
2594	if (namelen != 2)
2595		return (EINVAL);
2596
2597	which = (u_int)name[1];
2598	if (which >= RLIM_NLIMITS)
2599		return (EINVAL);
2600
2601	if (req->newptr != NULL && req->newlen != sizeof(rlim))
2602		return (EINVAL);
2603
2604	flags = PGET_HOLD | PGET_NOTWEXIT;
2605	if (req->newptr != NULL)
2606		flags |= PGET_CANDEBUG;
2607	else
2608		flags |= PGET_CANSEE;
2609	error = pget((pid_t)name[0], flags, &p);
2610	if (error != 0)
2611		return (error);
2612
2613	/*
2614	 * Retrieve limit.
2615	 */
2616	if (req->oldptr != NULL) {
2617		PROC_LOCK(p);
2618		lim_rlimit_proc(p, which, &rlim);
2619		PROC_UNLOCK(p);
2620	}
2621	error = SYSCTL_OUT(req, &rlim, sizeof(rlim));
2622	if (error != 0)
2623		goto errout;
2624
2625	/*
2626	 * Set limit.
2627	 */
2628	if (req->newptr != NULL) {
2629		error = SYSCTL_IN(req, &rlim, sizeof(rlim));
2630		if (error == 0)
2631			error = kern_proc_setrlimit(curthread, p, which, &rlim);
2632	}
2633
2634errout:
2635	PRELE(p);
2636	return (error);
2637}
2638
2639/*
2640 * This sysctl allows a process to retrieve ps_strings structure location of
2641 * another process.
2642 */
2643static int
2644sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS)
2645{
2646	int *name = (int *)arg1;
2647	u_int namelen = arg2;
2648	struct proc *p;
2649	vm_offset_t ps_strings;
2650	int error;
2651#ifdef COMPAT_FREEBSD32
2652	uint32_t ps_strings32;
2653#endif
2654
2655	if (namelen != 1)
2656		return (EINVAL);
2657
2658	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
2659	if (error != 0)
2660		return (error);
2661#ifdef COMPAT_FREEBSD32
2662	if ((req->flags & SCTL_MASK32) != 0) {
2663		/*
2664		 * We return 0 if the 32 bit emulation request is for a 64 bit
2665		 * process.
2666		 */
2667		ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ?
2668		    PTROUT(p->p_sysent->sv_psstrings) : 0;
2669		PROC_UNLOCK(p);
2670		error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32));
2671		return (error);
2672	}
2673#endif
2674	ps_strings = p->p_sysent->sv_psstrings;
2675	PROC_UNLOCK(p);
2676	error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings));
2677	return (error);
2678}
2679
2680/*
2681 * This sysctl allows a process to retrieve umask of another process.
2682 */
2683static int
2684sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS)
2685{
2686	int *name = (int *)arg1;
2687	u_int namelen = arg2;
2688	struct proc *p;
2689	int error;
2690	u_short fd_cmask;
2691
2692	if (namelen != 1)
2693		return (EINVAL);
2694
2695	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
2696	if (error != 0)
2697		return (error);
2698
2699	FILEDESC_SLOCK(p->p_fd);
2700	fd_cmask = p->p_fd->fd_cmask;
2701	FILEDESC_SUNLOCK(p->p_fd);
2702	PRELE(p);
2703	error = SYSCTL_OUT(req, &fd_cmask, sizeof(fd_cmask));
2704	return (error);
2705}
2706
2707/*
2708 * This sysctl allows a process to set and retrieve binary osreldate of
2709 * another process.
2710 */
2711static int
2712sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS)
2713{
2714	int *name = (int *)arg1;
2715	u_int namelen = arg2;
2716	struct proc *p;
2717	int flags, error, osrel;
2718
2719	if (namelen != 1)
2720		return (EINVAL);
2721
2722	if (req->newptr != NULL && req->newlen != sizeof(osrel))
2723		return (EINVAL);
2724
2725	flags = PGET_HOLD | PGET_NOTWEXIT;
2726	if (req->newptr != NULL)
2727		flags |= PGET_CANDEBUG;
2728	else
2729		flags |= PGET_CANSEE;
2730	error = pget((pid_t)name[0], flags, &p);
2731	if (error != 0)
2732		return (error);
2733
2734	error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel));
2735	if (error != 0)
2736		goto errout;
2737
2738	if (req->newptr != NULL) {
2739		error = SYSCTL_IN(req, &osrel, sizeof(osrel));
2740		if (error != 0)
2741			goto errout;
2742		if (osrel < 0) {
2743			error = EINVAL;
2744			goto errout;
2745		}
2746		p->p_osrel = osrel;
2747	}
2748errout:
2749	PRELE(p);
2750	return (error);
2751}
2752
2753static int
2754sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS)
2755{
2756	int *name = (int *)arg1;
2757	u_int namelen = arg2;
2758	struct proc *p;
2759	struct kinfo_sigtramp kst;
2760	const struct sysentvec *sv;
2761	int error;
2762#ifdef COMPAT_FREEBSD32
2763	struct kinfo_sigtramp32 kst32;
2764#endif
2765
2766	if (namelen != 1)
2767		return (EINVAL);
2768
2769	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
2770	if (error != 0)
2771		return (error);
2772	sv = p->p_sysent;
2773#ifdef COMPAT_FREEBSD32
2774	if ((req->flags & SCTL_MASK32) != 0) {
2775		bzero(&kst32, sizeof(kst32));
2776		if (SV_PROC_FLAG(p, SV_ILP32)) {
2777			if (sv->sv_sigcode_base != 0) {
2778				kst32.ksigtramp_start = sv->sv_sigcode_base;
2779				kst32.ksigtramp_end = sv->sv_sigcode_base +
2780				    *sv->sv_szsigcode;
2781			} else {
2782				kst32.ksigtramp_start = sv->sv_psstrings -
2783				    *sv->sv_szsigcode;
2784				kst32.ksigtramp_end = sv->sv_psstrings;
2785			}
2786		}
2787		PROC_UNLOCK(p);
2788		error = SYSCTL_OUT(req, &kst32, sizeof(kst32));
2789		return (error);
2790	}
2791#endif
2792	bzero(&kst, sizeof(kst));
2793	if (sv->sv_sigcode_base != 0) {
2794		kst.ksigtramp_start = (char *)sv->sv_sigcode_base;
2795		kst.ksigtramp_end = (char *)sv->sv_sigcode_base +
2796		    *sv->sv_szsigcode;
2797	} else {
2798		kst.ksigtramp_start = (char *)sv->sv_psstrings -
2799		    *sv->sv_szsigcode;
2800		kst.ksigtramp_end = (char *)sv->sv_psstrings;
2801	}
2802	PROC_UNLOCK(p);
2803	error = SYSCTL_OUT(req, &kst, sizeof(kst));
2804	return (error);
2805}
2806
2807SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
2808
2809SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT|
2810	CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc",
2811	"Return entire process table");
2812
2813static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2814	sysctl_kern_proc, "Process table");
2815
2816static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE,
2817	sysctl_kern_proc, "Process table");
2818
2819static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2820	sysctl_kern_proc, "Process table");
2821
2822static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD |
2823	CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2824
2825static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE,
2826	sysctl_kern_proc, "Process table");
2827
2828static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2829	sysctl_kern_proc, "Process table");
2830
2831static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2832	sysctl_kern_proc, "Process table");
2833
2834static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE,
2835	sysctl_kern_proc, "Process table");
2836
2837static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,
2838	sysctl_kern_proc, "Return process table, no threads");
2839
2840static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args,
2841	CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
2842	sysctl_kern_proc_args, "Process argument list");
2843
2844static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE,
2845	sysctl_kern_proc_env, "Process environment");
2846
2847static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD |
2848	CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector");
2849
2850static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD |
2851	CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path");
2852
2853static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD |
2854	CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name,
2855	"Process syscall vector name (ABI type)");
2856
2857static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td,
2858	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2859
2860static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td,
2861	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2862
2863static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td,
2864	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2865
2866static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD),
2867	sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2868
2869static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td,
2870	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2871
2872static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td,
2873	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2874
2875static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td,
2876	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2877
2878static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td,
2879	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
2880
2881static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
2882	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc,
2883	"Return process table, no threads");
2884
2885#ifdef COMPAT_FREEBSD7
2886static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD |
2887	CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries");
2888#endif
2889
2890static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD |
2891	CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries");
2892
2893#if defined(STACK) || defined(DDB)
2894static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD |
2895	CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks");
2896#endif
2897
2898static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD |
2899	CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups");
2900
2901static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW |
2902	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit,
2903	"Process resource limits");
2904
2905static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD |
2906	CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings,
2907	"Process ps_strings location");
2908
2909static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD |
2910	CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask");
2911
2912static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
2913	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel,
2914	"Process binary osreldate");
2915
2916static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
2917	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
2918	"Process signal trampoline location");
2919
2920int allproc_gen;
2921
2922void
2923stop_all_proc(void)
2924{
2925	struct proc *cp, *p;
2926	int r, gen;
2927	bool restart, seen_stopped, seen_exiting, stopped_some;
2928
2929	cp = curproc;
2930	/*
2931	 * stop_all_proc() assumes that all process which have
2932	 * usermode must be stopped, except current process, for
2933	 * obvious reasons.  Since other threads in the process
2934	 * establishing global stop could unstop something, disable
2935	 * calls from multithreaded processes as precaution.  The
2936	 * service must not be user-callable anyway.
2937	 */
2938	KASSERT((cp->p_flag & P_HADTHREADS) == 0 ||
2939	    (cp->p_flag & P_KTHREAD) != 0, ("mt stop_all_proc"));
2940
2941allproc_loop:
2942	sx_xlock(&allproc_lock);
2943	gen = allproc_gen;
2944	seen_exiting = seen_stopped = stopped_some = restart = false;
2945	LIST_REMOVE(cp, p_list);
2946	LIST_INSERT_HEAD(&allproc, cp, p_list);
2947	for (;;) {
2948		p = LIST_NEXT(cp, p_list);
2949		if (p == NULL)
2950			break;
2951		LIST_REMOVE(cp, p_list);
2952		LIST_INSERT_AFTER(p, cp, p_list);
2953		PROC_LOCK(p);
2954		if ((p->p_flag & (P_KTHREAD | P_SYSTEM |
2955		    P_TOTAL_STOP)) != 0) {
2956			PROC_UNLOCK(p);
2957			continue;
2958		}
2959		if ((p->p_flag & P_WEXIT) != 0) {
2960			seen_exiting = true;
2961			PROC_UNLOCK(p);
2962			continue;
2963		}
2964		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
2965			/*
2966			 * Stopped processes are tolerated when there
2967			 * are no other processes which might continue
2968			 * them.  P_STOPPED_SINGLE but not
2969			 * P_TOTAL_STOP process still has at least one
2970			 * thread running.
2971			 */
2972			seen_stopped = true;
2973			PROC_UNLOCK(p);
2974			continue;
2975		}
2976		_PHOLD(p);
2977		sx_xunlock(&allproc_lock);
2978		r = thread_single(p, SINGLE_ALLPROC);
2979		if (r != 0)
2980			restart = true;
2981		else
2982			stopped_some = true;
2983		_PRELE(p);
2984		PROC_UNLOCK(p);
2985		sx_xlock(&allproc_lock);
2986	}
2987	/* Catch forked children we did not see in iteration. */
2988	if (gen != allproc_gen)
2989		restart = true;
2990	sx_xunlock(&allproc_lock);
2991	if (restart || stopped_some || seen_exiting || seen_stopped) {
2992		kern_yield(PRI_USER);
2993		goto allproc_loop;
2994	}
2995}
2996
2997void
2998resume_all_proc(void)
2999{
3000	struct proc *cp, *p;
3001
3002	cp = curproc;
3003	sx_xlock(&allproc_lock);
3004	LIST_REMOVE(cp, p_list);
3005	LIST_INSERT_HEAD(&allproc, cp, p_list);
3006	for (;;) {
3007		p = LIST_NEXT(cp, p_list);
3008		if (p == NULL)
3009			break;
3010		LIST_REMOVE(cp, p_list);
3011		LIST_INSERT_AFTER(p, cp, p_list);
3012		PROC_LOCK(p);
3013		if ((p->p_flag & P_TOTAL_STOP) != 0) {
3014			sx_xunlock(&allproc_lock);
3015			_PHOLD(p);
3016			thread_single_end(p, SINGLE_ALLPROC);
3017			_PRELE(p);
3018			PROC_UNLOCK(p);
3019			sx_xlock(&allproc_lock);
3020		} else {
3021			PROC_UNLOCK(p);
3022		}
3023	}
3024	sx_xunlock(&allproc_lock);
3025}
3026
3027#define	TOTAL_STOP_DEBUG	1
3028#ifdef TOTAL_STOP_DEBUG
3029volatile static int ap_resume;
3030#include <sys/mount.h>
3031
3032static int
3033sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS)
3034{
3035	int error, val;
3036
3037	val = 0;
3038	ap_resume = 0;
3039	error = sysctl_handle_int(oidp, &val, 0, req);
3040	if (error != 0 || req->newptr == NULL)
3041		return (error);
3042	if (val != 0) {
3043		stop_all_proc();
3044		syncer_suspend();
3045		while (ap_resume == 0)
3046			;
3047		syncer_resume();
3048		resume_all_proc();
3049	}
3050	return (0);
3051}
3052
3053SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW |
3054    CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0,
3055    sysctl_debug_stop_all_proc, "I",
3056    "");
3057#endif
3058