kern_proc.c revision 100831
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
34 * $FreeBSD: head/sys/kern/kern_proc.c 100831 2002-07-28 19:59:31Z truckman $
35 */
36
37#include "opt_ktrace.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mutex.h>
45#include <sys/proc.h>
46#include <sys/sysproto.h>
47#include <sys/kse.h>
48#include <sys/sysctl.h>
49#include <sys/filedesc.h>
50#include <sys/tty.h>
51#include <sys/signalvar.h>
52#include <sys/sx.h>
53#include <sys/user.h>
54#include <sys/jail.h>
55#ifdef KTRACE
56#include <sys/uio.h>
57#include <sys/ktrace.h>
58#endif
59
60#include <vm/vm.h>
61#include <vm/vm_extern.h>
62#include <vm/pmap.h>
63#include <vm/vm_map.h>
64#include <vm/uma.h>
65#include <machine/critical.h>
66
67MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
68MALLOC_DEFINE(M_SESSION, "session", "session header");
69static MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
70MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
71
72static struct proc *dopfind(register pid_t);
73
74static void doenterpgrp(struct proc *, struct pgrp *);
75
76static void pgdelete(struct pgrp *);
77
78static void orphanpg(struct pgrp *pg);
79
80static void proc_ctor(void *mem, int size, void *arg);
81static void proc_dtor(void *mem, int size, void *arg);
82static void proc_init(void *mem, int size);
83static void proc_fini(void *mem, int size);
84
85/*
86 * Other process lists
87 */
88struct pidhashhead *pidhashtbl;
89u_long pidhash;
90struct pgrphashhead *pgrphashtbl;
91u_long pgrphash;
92struct proclist allproc;
93struct proclist zombproc;
94struct sx allproc_lock;
95struct sx proctree_lock;
96struct mtx pargs_ref_lock;
97uma_zone_t proc_zone;
98uma_zone_t ithread_zone;
99
100static int active_procs;
101static int cached_procs;
102static int allocated_procs;
103
104#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
105
106CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
107
108/*
109 * Initialize global process hashing structures.
110 */
111void
112procinit()
113{
114
115	sx_init(&allproc_lock, "allproc");
116	sx_init(&proctree_lock, "proctree");
117	mtx_init(&pargs_ref_lock, "struct pargs.ref", NULL, MTX_DEF);
118	LIST_INIT(&allproc);
119	LIST_INIT(&zombproc);
120	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
121	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
122	proc_zone = uma_zcreate("PROC", sizeof (struct proc),
123	    proc_ctor, proc_dtor, proc_init, proc_fini,
124	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
125	uihashinit();
126}
127
128/*
129 * Prepare a proc for use.
130 */
131static void
132proc_ctor(void *mem, int size, void *arg)
133{
134	struct proc *p;
135
136	KASSERT((size == sizeof(struct proc)),
137	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct proc)));
138	p = (struct proc *)mem;
139#if 0
140	/*
141	 * Maybe move these from process creation, but maybe not.
142	 * Moving them here takes them away from their "natural" place
143	 * in the fork process.
144	 */
145	bzero(&p->p_startzero,
146	    (unsigned) RANGEOF(struct proc, p_startzero, p_endzero));
147	p->p_state = PRS_NEW;
148	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
149	LIST_INIT(&p->p_children);
150	callout_init(&p->p_itcallout, 0);
151#endif
152	cached_procs--;
153	active_procs++;
154}
155
156/*
157 * Reclaim a proc after use.
158 */
159static void
160proc_dtor(void *mem, int size, void *arg)
161{
162	struct proc *p;
163
164	KASSERT((size == sizeof(struct proc)),
165	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct proc)));
166	p = (struct proc *)mem;
167	/* INVARIANTS checks go here */
168#if 0	/* See comment in proc_ctor about seperating things */
169	mtx_destroy(&p->p_mtx);
170#endif
171	active_procs--;
172	cached_procs++;
173}
174
175/*
176 * Initialize type-stable parts of a proc (when newly created).
177 */
178static void
179proc_init(void *mem, int size)
180{
181	struct proc *p;
182
183	KASSERT((size == sizeof(struct proc)),
184	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct proc)));
185	p = (struct proc *)mem;
186	vm_proc_new(p);
187	cached_procs++;
188	allocated_procs++;
189}
190
191/*
192 * Tear down type-stable parts of a proc (just before being discarded)
193 */
194static void
195proc_fini(void *mem, int size)
196{
197	struct proc *p;
198
199	KASSERT((size == sizeof(struct proc)),
200	    ("size mismatch: %d != %d\n", size, (int)sizeof(struct proc)));
201	p = (struct proc *)mem;
202	vm_proc_dispose(p);
203	cached_procs--;
204	allocated_procs--;
205}
206
207/*
208 * KSE is linked onto the idle queue.
209 */
210void
211kse_link(struct kse *ke, struct ksegrp *kg)
212{
213	struct proc *p = kg->kg_proc;
214
215	TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
216	kg->kg_kses++;
217	ke->ke_state = KES_IDLE;
218	TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
219	kg->kg_idle_kses++;
220	ke->ke_proc	= p;
221	ke->ke_ksegrp	= kg;
222	ke->ke_thread	= NULL;
223	ke->ke_oncpu = NOCPU;
224}
225
226void
227ksegrp_link(struct ksegrp *kg, struct proc *p)
228{
229
230	TAILQ_INIT(&kg->kg_threads);
231	TAILQ_INIT(&kg->kg_runq);	/* links with td_runq */
232	TAILQ_INIT(&kg->kg_slpq);	/* links with td_runq */
233	TAILQ_INIT(&kg->kg_kseq);	/* all kses in ksegrp */
234	TAILQ_INIT(&kg->kg_iq);		/* all kses in ksegrp */
235	kg->kg_proc	= p;
236/* the following counters are in the -zero- section and may not need clearing */
237	kg->kg_numthreads = 0;
238	kg->kg_runnable = 0;
239	kg->kg_kses = 0;
240	kg->kg_idle_kses = 0;
241	kg->kg_runq_kses = 0; /* XXXKSE change name */
242/* link it in now that it's consitant */
243	p->p_numksegrps++;
244	TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
245}
246
247/*
248 * for a newly created process,
249 * link up a the structure and its initial threads etc.
250 */
251void
252proc_linkup(struct proc *p, struct ksegrp *kg,
253			struct kse *ke, struct thread *td)
254{
255
256	TAILQ_INIT(&p->p_ksegrps);	     /* all ksegrps in proc */
257	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
258	TAILQ_INIT(&p->p_suspended);	     /* Threads suspended */
259
260	ksegrp_link(kg, p);
261	kse_link(ke, kg);
262	thread_link(td, kg);
263}
264
265int
266thread_wakeup(struct thread *td, struct  thread_wakeup_args *uap)
267{
268
269	return(ENOSYS);
270}
271
272int
273kse_exit(struct thread *td, struct kse_exit_args *uap)
274{
275
276	return(ENOSYS);
277}
278
279int
280kse_yield(struct thread *td, struct kse_yield_args *uap)
281{
282
283	PROC_LOCK(td->td_proc);
284	mtx_lock_spin(&sched_lock);
285	thread_exit();
286	/* NOTREACHED */
287	return(0);
288}
289
290int kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
291{
292
293	return(ENOSYS);
294}
295
296/*
297 * No new KSEG: first call: use current KSE, don't schedule an upcall
298 * All other situations, do alloate a new KSE and schedule an upcall on it.
299 */
300/* struct kse_new_args {
301	struct kse_mailbox *mbx;
302	int	new_grp_flag;
303}; */
304int
305kse_new(struct thread *td, struct kse_new_args *uap)
306{
307	struct kse *newkse;
308	struct proc *p;
309	struct kse_mailbox mbx;
310	int err;
311
312	p = td->td_proc;
313	if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
314		return (err);
315	PROC_LOCK(p);
316	/*
317	 * If we have no KSE mode set, just set it, and skip KSE and KSEGRP
318	 * creation.  You cannot request a new group with the first one as
319	 * you are effectively getting one. Instead, go directly to saving
320	 * the upcall info.
321	 */
322	if ((td->td_proc->p_flag & P_KSES) || (uap->new_grp_flag)) {
323
324		return (EINVAL);	/* XXX */
325		/*
326		 * If newgroup then create the new group.
327		 * Check we have the resources for this.
328		 */
329		/* Copy lots of fields from the current KSEGRP.  */
330		/* Create the new KSE */
331		/* Copy lots of fields from the current KSE.  */
332	} else {
333		/*
334		 * We are switching to KSEs so just
335		 * use the preallocated ones for this call.
336		 * XXXKSE if we have to initialise any fields for KSE
337		 * mode operation, do it here.
338		 */
339		newkse = td->td_kse;
340	}
341	/*
342	 * Fill out the KSE-mode specific fields of the new kse.
343	 */
344	PROC_UNLOCK(p);
345	mtx_lock_spin(&sched_lock);
346	mi_switch();	/* Save current registers to PCB. */
347	mtx_unlock_spin(&sched_lock);
348	newkse->ke_upcall = mbx.kmbx_upcall;
349	newkse->ke_stackbase  = mbx.kmbx_stackbase;
350	newkse->ke_stacksize = mbx.kmbx_stacksize;
351	newkse->ke_mailbox = uap->mbx;
352	cpu_save_upcall(td, newkse);
353	/* Note that we are the returning syscall */
354	td->td_retval[0] = 0;
355	td->td_retval[1] = 0;
356
357	if ((td->td_proc->p_flag & P_KSES) || (uap->new_grp_flag)) {
358		thread_schedule_upcall(td, newkse);
359	} else {
360		/*
361		 * Don't set this until we are truely ready, because
362		 * things will start acting differently.  Return to the
363		 * calling code for the first time.  Assuming we set up
364		 * the mailboxes right, all syscalls after this will be
365		 * asynchronous.
366		 */
367		td->td_proc->p_flag |= P_KSES;
368	}
369	return (0);
370}
371
372/*
373 * Is p an inferior of the current process?
374 */
375int
376inferior(p)
377	register struct proc *p;
378{
379
380	sx_assert(&proctree_lock, SX_LOCKED);
381	for (; p != curproc; p = p->p_pptr)
382		if (p->p_pid == 0)
383			return (0);
384	return (1);
385}
386
387/*
388 * Locate a process by number
389 */
390struct proc *
391pfind(pid)
392	register pid_t pid;
393{
394	register struct proc *p;
395
396	sx_slock(&allproc_lock);
397	p = dopfind(pid);
398	sx_sunlock(&allproc_lock);
399	return (p);
400}
401
402static struct proc *
403dopfind(pid)
404	register pid_t pid;
405{
406	register struct proc *p;
407
408	sx_assert(&allproc_lock, SX_LOCKED);
409
410	LIST_FOREACH(p, PIDHASH(pid), p_hash)
411		if (p->p_pid == pid) {
412			PROC_LOCK(p);
413			break;
414		}
415	return (p);
416}
417
418/*
419 * Locate a process group by number.
420 * The caller must hold proctree_lock.
421 */
422struct pgrp *
423pgfind(pgid)
424	register pid_t pgid;
425{
426	register struct pgrp *pgrp;
427
428	sx_assert(&proctree_lock, SX_LOCKED);
429
430	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
431		if (pgrp->pg_id == pgid) {
432			PGRP_LOCK(pgrp);
433			return (pgrp);
434		}
435	}
436	return (NULL);
437}
438
439/*
440 * Create a new process group.
441 * pgid must be equal to the pid of p.
442 * Begin a new session if required.
443 */
444int
445enterpgrp(p, pgid, pgrp, sess)
446	register struct proc *p;
447	pid_t pgid;
448	struct pgrp *pgrp;
449	struct session *sess;
450{
451	struct pgrp *pgrp2;
452
453	sx_assert(&proctree_lock, SX_XLOCKED);
454
455	KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL"));
456	KASSERT(p->p_pid == pgid,
457	    ("enterpgrp: new pgrp and pid != pgid"));
458
459	pgrp2 = pgfind(pgid);
460
461	KASSERT(pgrp2 == NULL,
462	    ("enterpgrp: pgrp with pgid exists"));
463	KASSERT(!SESS_LEADER(p),
464	    ("enterpgrp: session leader attempted setpgrp"));
465
466	mtx_init(&pgrp->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
467
468	if (sess != NULL) {
469		/*
470		 * new session
471		 */
472		mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF);
473		PROC_LOCK(p);
474		p->p_flag &= ~P_CONTROLT;
475		PROC_UNLOCK(p);
476		PGRP_LOCK(pgrp);
477		sess->s_leader = p;
478		sess->s_sid = p->p_pid;
479		sess->s_count = 1;
480		sess->s_ttyvp = NULL;
481		sess->s_ttyp = NULL;
482		bcopy(p->p_session->s_login, sess->s_login,
483			    sizeof(sess->s_login));
484		pgrp->pg_session = sess;
485		KASSERT(p == curproc,
486		    ("enterpgrp: mksession and p != curproc"));
487	} else {
488		pgrp->pg_session = p->p_session;
489		SESS_LOCK(pgrp->pg_session);
490		pgrp->pg_session->s_count++;
491		SESS_UNLOCK(pgrp->pg_session);
492		PGRP_LOCK(pgrp);
493	}
494	pgrp->pg_id = pgid;
495	LIST_INIT(&pgrp->pg_members);
496
497	/*
498	 * As we have an exclusive lock of proctree_lock,
499	 * this should not deadlock.
500	 */
501	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
502	pgrp->pg_jobc = 0;
503	SLIST_INIT(&pgrp->pg_sigiolst);
504	PGRP_UNLOCK(pgrp);
505
506	doenterpgrp(p, pgrp);
507
508	return (0);
509}
510
511/*
512 * Move p to an existing process group
513 */
514int
515enterthispgrp(p, pgrp)
516	register struct proc *p;
517	struct pgrp *pgrp;
518{
519
520	sx_assert(&proctree_lock, SX_XLOCKED);
521	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
522	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
523	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
524	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
525	KASSERT(pgrp->pg_session == p->p_session,
526		("%s: pgrp's session %p, p->p_session %p.\n",
527		__func__,
528		pgrp->pg_session,
529		p->p_session));
530	KASSERT(pgrp != p->p_pgrp,
531		("%s: p belongs to pgrp.", __func__));
532
533	doenterpgrp(p, pgrp);
534
535	return (0);
536}
537
538/*
539 * Move p to a process group
540 */
541static void
542doenterpgrp(p, pgrp)
543	struct proc *p;
544	struct pgrp *pgrp;
545{
546	struct pgrp *savepgrp;
547
548	sx_assert(&proctree_lock, SX_XLOCKED);
549	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
550	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
551	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
552	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
553
554	savepgrp = p->p_pgrp;
555
556	/*
557	 * Adjust eligibility of affected pgrps to participate in job control.
558	 * Increment eligibility counts before decrementing, otherwise we
559	 * could reach 0 spuriously during the first call.
560	 */
561	fixjobc(p, pgrp, 1);
562	fixjobc(p, p->p_pgrp, 0);
563
564	PGRP_LOCK(pgrp);
565	PGRP_LOCK(savepgrp);
566	PROC_LOCK(p);
567	LIST_REMOVE(p, p_pglist);
568	p->p_pgrp = pgrp;
569	PROC_UNLOCK(p);
570	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
571	PGRP_UNLOCK(savepgrp);
572	PGRP_UNLOCK(pgrp);
573	if (LIST_EMPTY(&savepgrp->pg_members))
574		pgdelete(savepgrp);
575}
576
577/*
578 * remove process from process group
579 */
580int
581leavepgrp(p)
582	register struct proc *p;
583{
584	struct pgrp *savepgrp;
585
586	sx_assert(&proctree_lock, SX_XLOCKED);
587	savepgrp = p->p_pgrp;
588	PGRP_LOCK(savepgrp);
589	PROC_LOCK(p);
590	LIST_REMOVE(p, p_pglist);
591	p->p_pgrp = NULL;
592	PROC_UNLOCK(p);
593	PGRP_UNLOCK(savepgrp);
594	if (LIST_EMPTY(&savepgrp->pg_members))
595		pgdelete(savepgrp);
596	return (0);
597}
598
599/*
600 * delete a process group
601 */
602static void
603pgdelete(pgrp)
604	register struct pgrp *pgrp;
605{
606	struct session *savesess;
607
608	sx_assert(&proctree_lock, SX_XLOCKED);
609	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
610	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
611
612	/*
613	 * Reset any sigio structures pointing to us as a result of
614	 * F_SETOWN with our pgid.
615	 */
616	funsetownlst(&pgrp->pg_sigiolst);
617
618	PGRP_LOCK(pgrp);
619	if (pgrp->pg_session->s_ttyp != NULL &&
620	    pgrp->pg_session->s_ttyp->t_pgrp == pgrp)
621		pgrp->pg_session->s_ttyp->t_pgrp = NULL;
622	LIST_REMOVE(pgrp, pg_hash);
623	savesess = pgrp->pg_session;
624	SESS_LOCK(savesess);
625	savesess->s_count--;
626	SESS_UNLOCK(savesess);
627	PGRP_UNLOCK(pgrp);
628	if (savesess->s_count == 0) {
629		mtx_destroy(&savesess->s_mtx);
630		FREE(pgrp->pg_session, M_SESSION);
631	}
632	mtx_destroy(&pgrp->pg_mtx);
633	FREE(pgrp, M_PGRP);
634}
635
636/*
637 * Adjust pgrp jobc counters when specified process changes process group.
638 * We count the number of processes in each process group that "qualify"
639 * the group for terminal job control (those with a parent in a different
640 * process group of the same session).  If that count reaches zero, the
641 * process group becomes orphaned.  Check both the specified process'
642 * process group and that of its children.
643 * entering == 0 => p is leaving specified group.
644 * entering == 1 => p is entering specified group.
645 */
646void
647fixjobc(p, pgrp, entering)
648	register struct proc *p;
649	register struct pgrp *pgrp;
650	int entering;
651{
652	register struct pgrp *hispgrp;
653	register struct session *mysession;
654
655	sx_assert(&proctree_lock, SX_LOCKED);
656	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
657	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
658	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
659
660	/*
661	 * Check p's parent to see whether p qualifies its own process
662	 * group; if so, adjust count for p's process group.
663	 */
664	mysession = pgrp->pg_session;
665	if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
666	    hispgrp->pg_session == mysession) {
667		PGRP_LOCK(pgrp);
668		if (entering)
669			pgrp->pg_jobc++;
670		else {
671			--pgrp->pg_jobc;
672			if (pgrp->pg_jobc == 0)
673				orphanpg(pgrp);
674		}
675		PGRP_UNLOCK(pgrp);
676	}
677
678	/*
679	 * Check this process' children to see whether they qualify
680	 * their process groups; if so, adjust counts for children's
681	 * process groups.
682	 */
683	LIST_FOREACH(p, &p->p_children, p_sibling) {
684		if ((hispgrp = p->p_pgrp) != pgrp &&
685		    hispgrp->pg_session == mysession &&
686		    p->p_state != PRS_ZOMBIE) {
687			PGRP_LOCK(hispgrp);
688			if (entering)
689				hispgrp->pg_jobc++;
690			else {
691				--hispgrp->pg_jobc;
692				if (hispgrp->pg_jobc == 0)
693					orphanpg(hispgrp);
694			}
695			PGRP_UNLOCK(hispgrp);
696		}
697	}
698}
699
700/*
701 * A process group has become orphaned;
702 * if there are any stopped processes in the group,
703 * hang-up all process in that group.
704 */
705static void
706orphanpg(pg)
707	struct pgrp *pg;
708{
709	register struct proc *p;
710
711	PGRP_LOCK_ASSERT(pg, MA_OWNED);
712
713	mtx_lock_spin(&sched_lock);
714	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
715		if (P_SHOULDSTOP(p)) {
716			mtx_unlock_spin(&sched_lock);
717			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
718				PROC_LOCK(p);
719				psignal(p, SIGHUP);
720				psignal(p, SIGCONT);
721				PROC_UNLOCK(p);
722			}
723			return;
724		}
725	}
726	mtx_unlock_spin(&sched_lock);
727}
728
729#include "opt_ddb.h"
730#ifdef DDB
731#include <ddb/ddb.h>
732
733DB_SHOW_COMMAND(pgrpdump, pgrpdump)
734{
735	register struct pgrp *pgrp;
736	register struct proc *p;
737	register int i;
738
739	for (i = 0; i <= pgrphash; i++) {
740		if (!LIST_EMPTY(&pgrphashtbl[i])) {
741			printf("\tindx %d\n", i);
742			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
743				printf(
744			"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
745				    (void *)pgrp, (long)pgrp->pg_id,
746				    (void *)pgrp->pg_session,
747				    pgrp->pg_session->s_count,
748				    (void *)LIST_FIRST(&pgrp->pg_members));
749				LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
750					printf("\t\tpid %ld addr %p pgrp %p\n",
751					    (long)p->p_pid, (void *)p,
752					    (void *)p->p_pgrp);
753				}
754			}
755		}
756	}
757}
758#endif /* DDB */
759
760/*
761 * Fill in an kinfo_proc structure for the specified process.
762 * Must be called with the target process locked.
763 */
764void
765fill_kinfo_proc(p, kp)
766	struct proc *p;
767	struct kinfo_proc *kp;
768{
769	struct thread *td;
770	struct tty *tp;
771	struct session *sp;
772	struct timeval tv;
773
774	bzero(kp, sizeof(*kp));
775
776	kp->ki_structsize = sizeof(*kp);
777	kp->ki_paddr = p;
778	PROC_LOCK_ASSERT(p, MA_OWNED);
779	kp->ki_addr =/* p->p_addr; */0; /* XXXKSE */
780	kp->ki_args = p->p_args;
781	kp->ki_textvp = p->p_textvp;
782#ifdef KTRACE
783	kp->ki_tracep = p->p_tracep;
784	mtx_lock(&ktrace_mtx);
785	kp->ki_traceflag = p->p_traceflag;
786	mtx_unlock(&ktrace_mtx);
787#endif
788	kp->ki_fd = p->p_fd;
789	kp->ki_vmspace = p->p_vmspace;
790	if (p->p_ucred) {
791		kp->ki_uid = p->p_ucred->cr_uid;
792		kp->ki_ruid = p->p_ucred->cr_ruid;
793		kp->ki_svuid = p->p_ucred->cr_svuid;
794		/* XXX bde doesn't like KI_NGROUPS */
795		kp->ki_ngroups = min(p->p_ucred->cr_ngroups, KI_NGROUPS);
796		bcopy(p->p_ucred->cr_groups, kp->ki_groups,
797		    kp->ki_ngroups * sizeof(gid_t));
798		kp->ki_rgid = p->p_ucred->cr_rgid;
799		kp->ki_svgid = p->p_ucred->cr_svgid;
800	}
801	if (p->p_procsig) {
802		kp->ki_sigignore = p->p_procsig->ps_sigignore;
803		kp->ki_sigcatch = p->p_procsig->ps_sigcatch;
804	}
805	mtx_lock_spin(&sched_lock);
806	if (p->p_state != PRS_NEW &&
807	    p->p_state != PRS_ZOMBIE &&
808	    p->p_vmspace != NULL) {
809		struct vmspace *vm = p->p_vmspace;
810
811		kp->ki_size = vm->vm_map.size;
812		kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/
813		if (p->p_sflag & PS_INMEM)
814			kp->ki_rssize += UAREA_PAGES;
815		FOREACH_THREAD_IN_PROC(p, td) /* XXXKSE: thread swapout check */
816			kp->ki_rssize += KSTACK_PAGES;
817		kp->ki_swrss = vm->vm_swrss;
818		kp->ki_tsize = vm->vm_tsize;
819		kp->ki_dsize = vm->vm_dsize;
820		kp->ki_ssize = vm->vm_ssize;
821	}
822	if ((p->p_sflag & PS_INMEM) && p->p_stats) {
823		kp->ki_start = p->p_stats->p_start;
824		kp->ki_rusage = p->p_stats->p_ru;
825		kp->ki_childtime.tv_sec = p->p_stats->p_cru.ru_utime.tv_sec +
826		    p->p_stats->p_cru.ru_stime.tv_sec;
827		kp->ki_childtime.tv_usec = p->p_stats->p_cru.ru_utime.tv_usec +
828		    p->p_stats->p_cru.ru_stime.tv_usec;
829	}
830	if (p->p_state != PRS_ZOMBIE) {
831		td = FIRST_THREAD_IN_PROC(p);
832		if (!(p->p_flag & P_KSES)) {
833			if (td->td_wmesg != NULL) {
834				strncpy(kp->ki_wmesg, td->td_wmesg,
835				    sizeof(kp->ki_wmesg) - 1);
836			}
837			if (td->td_state == TDS_MTX) {
838				kp->ki_kiflag |= KI_MTXBLOCK;
839				strncpy(kp->ki_mtxname, td->td_mtxname,
840				    sizeof(kp->ki_mtxname) - 1);
841			}
842		}
843
844		if (p->p_state == PRS_NORMAL) { /*  XXXKSE very aproximate */
845			if ((td->td_state == TDS_RUNQ) ||
846			    (td->td_state == TDS_RUNNING)) {
847				kp->ki_stat = SRUN;
848			} else if (td->td_state == TDS_SLP) {
849				kp->ki_stat = SSLEEP;
850			} else if (P_SHOULDSTOP(p)) {
851				kp->ki_stat = SSTOP;
852			} else if (td->td_state == TDS_MTX) {
853				kp->ki_stat = SMTX;
854			} else {
855				kp->ki_stat = SWAIT;
856			}
857		} else {
858			kp->ki_stat = SIDL;
859		}
860
861		kp->ki_sflag = p->p_sflag;
862		kp->ki_swtime = p->p_swtime;
863		kp->ki_pid = p->p_pid;
864		/* vvv XXXKSE */
865		if (!(p->p_flag & P_KSES)) {
866			bintime2timeval(&p->p_runtime, &tv);
867			kp->ki_runtime = tv.tv_sec * (u_int64_t)1000000 + tv.tv_usec;
868			kp->ki_pctcpu = p->p_kse.ke_pctcpu;
869			kp->ki_estcpu = p->p_ksegrp.kg_estcpu;
870			kp->ki_slptime = p->p_ksegrp.kg_slptime;
871			kp->ki_wchan = td->td_wchan;
872			kp->ki_pri.pri_level = td->td_priority;
873			kp->ki_pri.pri_user = p->p_ksegrp.kg_user_pri;
874			kp->ki_pri.pri_class = p->p_ksegrp.kg_pri_class;
875			kp->ki_pri.pri_native = td->td_base_pri;
876			kp->ki_nice = p->p_ksegrp.kg_nice;
877			kp->ki_rqindex = p->p_kse.ke_rqindex;
878			kp->ki_oncpu = p->p_kse.ke_oncpu;
879			kp->ki_lastcpu = td->td_lastcpu;
880			kp->ki_tdflags = td->td_flags;
881			kp->ki_pcb = td->td_pcb;
882			kp->ki_kstack = (void *)td->td_kstack;
883		} else {
884			kp->ki_oncpu = -1;
885			kp->ki_lastcpu = -1;
886			kp->ki_tdflags = -1;
887			/* All the reast are 0 */
888		}
889	} else {
890		kp->ki_stat = SZOMB;
891	}
892	/* ^^^ XXXKSE */
893	mtx_unlock_spin(&sched_lock);
894	sp = NULL;
895	tp = NULL;
896	if (p->p_pgrp) {
897		kp->ki_pgid = p->p_pgrp->pg_id;
898		kp->ki_jobc = p->p_pgrp->pg_jobc;
899		sp = p->p_pgrp->pg_session;
900
901		if (sp != NULL) {
902			kp->ki_sid = sp->s_sid;
903			SESS_LOCK(sp);
904			strncpy(kp->ki_login, sp->s_login,
905			    sizeof(kp->ki_login) - 1);
906			if (sp->s_ttyvp)
907				kp->ki_kiflag |= KI_CTTY;
908			if (SESS_LEADER(p))
909				kp->ki_kiflag |= KI_SLEADER;
910			tp = sp->s_ttyp;
911			SESS_UNLOCK(sp);
912		}
913	}
914	if ((p->p_flag & P_CONTROLT) && tp != NULL) {
915		kp->ki_tdev = dev2udev(tp->t_dev);
916		kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
917		if (tp->t_session)
918			kp->ki_tsid = tp->t_session->s_sid;
919	} else
920		kp->ki_tdev = NOUDEV;
921	if (p->p_comm[0] != '\0') {
922		strncpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm) - 1);
923		strncpy(kp->ki_ocomm, p->p_comm, sizeof(kp->ki_ocomm) - 1);
924	}
925	kp->ki_siglist = p->p_siglist;
926	kp->ki_sigmask = p->p_sigmask;
927	kp->ki_xstat = p->p_xstat;
928	kp->ki_acflag = p->p_acflag;
929	kp->ki_flag = p->p_flag;
930	/* If jailed(p->p_ucred), emulate the old P_JAILED flag. */
931	if (jailed(p->p_ucred))
932		kp->ki_flag |= P_JAILED;
933	kp->ki_lock = p->p_lock;
934	if (p->p_pptr)
935		kp->ki_ppid = p->p_pptr->p_pid;
936}
937
938/*
939 * Locate a zombie process by number
940 */
941struct proc *
942zpfind(pid_t pid)
943{
944	struct proc *p;
945
946	sx_slock(&allproc_lock);
947	LIST_FOREACH(p, &zombproc, p_list)
948		if (p->p_pid == pid) {
949			PROC_LOCK(p);
950			break;
951		}
952	sx_sunlock(&allproc_lock);
953	return (p);
954}
955
956
957/*
958 * Must be called with the process locked and will return with it unlocked.
959 */
960static int
961sysctl_out_proc(struct proc *p, struct sysctl_req *req, int doingzomb)
962{
963	struct kinfo_proc kinfo_proc;
964	int error;
965	struct proc *np;
966	pid_t pid = p->p_pid;
967
968	PROC_LOCK_ASSERT(p, MA_OWNED);
969	fill_kinfo_proc(p, &kinfo_proc);
970	PROC_UNLOCK(p);
971	error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, sizeof(kinfo_proc));
972	if (error)
973		return (error);
974	if (doingzomb)
975		np = zpfind(pid);
976	else {
977		if (pid == 0)
978			return (0);
979		np = pfind(pid);
980	}
981	if (np == NULL)
982		return EAGAIN;
983	if (np != p) {
984		PROC_UNLOCK(np);
985		return EAGAIN;
986	}
987	PROC_UNLOCK(np);
988	return (0);
989}
990
991static int
992sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
993{
994	int *name = (int*) arg1;
995	u_int namelen = arg2;
996	struct proc *p;
997	int doingzomb;
998	int error = 0;
999
1000	if (oidp->oid_number == KERN_PROC_PID) {
1001		if (namelen != 1)
1002			return (EINVAL);
1003		p = pfind((pid_t)name[0]);
1004		if (!p)
1005			return (0);
1006		if (p_cansee(curthread, p)) {
1007			PROC_UNLOCK(p);
1008			return (0);
1009		}
1010		error = sysctl_out_proc(p, req, 0);
1011		return (error);
1012	}
1013	if (oidp->oid_number == KERN_PROC_ALL && !namelen)
1014		;
1015	else if (oidp->oid_number != KERN_PROC_ALL && namelen == 1)
1016		;
1017	else
1018		return (EINVAL);
1019
1020	if (!req->oldptr) {
1021		/* overestimate by 5 procs */
1022		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
1023		if (error)
1024			return (error);
1025	}
1026	sysctl_wire_old_buffer(req, 0);
1027	sx_slock(&allproc_lock);
1028	for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) {
1029		if (!doingzomb)
1030			p = LIST_FIRST(&allproc);
1031		else
1032			p = LIST_FIRST(&zombproc);
1033		for (; p != 0; p = LIST_NEXT(p, p_list)) {
1034			PROC_LOCK(p);
1035			/*
1036			 * Show a user only appropriate processes.
1037			 */
1038			if (p_cansee(curthread, p)) {
1039				PROC_UNLOCK(p);
1040				continue;
1041			}
1042			/*
1043			 * Skip embryonic processes.
1044			 */
1045			if (p->p_state == PRS_NEW) {
1046				PROC_UNLOCK(p);
1047				continue;
1048			}
1049			/*
1050			 * TODO - make more efficient (see notes below).
1051			 * do by session.
1052			 */
1053			switch (oidp->oid_number) {
1054
1055			case KERN_PROC_PGRP:
1056				/* could do this by traversing pgrp */
1057				if (p->p_pgrp == NULL ||
1058				    p->p_pgrp->pg_id != (pid_t)name[0]) {
1059					PROC_UNLOCK(p);
1060					continue;
1061				}
1062				break;
1063
1064			case KERN_PROC_TTY:
1065				if ((p->p_flag & P_CONTROLT) == 0 ||
1066				    p->p_session == NULL) {
1067					PROC_UNLOCK(p);
1068					continue;
1069				}
1070				SESS_LOCK(p->p_session);
1071				if (p->p_session->s_ttyp == NULL ||
1072				    dev2udev(p->p_session->s_ttyp->t_dev) !=
1073				    (udev_t)name[0]) {
1074					SESS_UNLOCK(p->p_session);
1075					PROC_UNLOCK(p);
1076					continue;
1077				}
1078				SESS_UNLOCK(p->p_session);
1079				break;
1080
1081			case KERN_PROC_UID:
1082				if (p->p_ucred == NULL ||
1083				    p->p_ucred->cr_uid != (uid_t)name[0]) {
1084					PROC_UNLOCK(p);
1085					continue;
1086				}
1087				break;
1088
1089			case KERN_PROC_RUID:
1090				if (p->p_ucred == NULL ||
1091				    p->p_ucred->cr_ruid != (uid_t)name[0]) {
1092					PROC_UNLOCK(p);
1093					continue;
1094				}
1095				break;
1096			}
1097
1098			error = sysctl_out_proc(p, req, doingzomb);
1099			if (error) {
1100				sx_sunlock(&allproc_lock);
1101				return (error);
1102			}
1103		}
1104	}
1105	sx_sunlock(&allproc_lock);
1106	return (0);
1107}
1108
1109struct pargs *
1110pargs_alloc(int len)
1111{
1112	struct pargs *pa;
1113
1114	MALLOC(pa, struct pargs *, sizeof(struct pargs) + len, M_PARGS,
1115		M_WAITOK);
1116	pa->ar_ref = 1;
1117	pa->ar_length = len;
1118	return (pa);
1119}
1120
1121void
1122pargs_free(struct pargs *pa)
1123{
1124
1125	FREE(pa, M_PARGS);
1126}
1127
1128void
1129pargs_hold(struct pargs *pa)
1130{
1131
1132	if (pa == NULL)
1133		return;
1134	PARGS_LOCK(pa);
1135	pa->ar_ref++;
1136	PARGS_UNLOCK(pa);
1137}
1138
1139void
1140pargs_drop(struct pargs *pa)
1141{
1142
1143	if (pa == NULL)
1144		return;
1145	PARGS_LOCK(pa);
1146	if (--pa->ar_ref == 0) {
1147		PARGS_UNLOCK(pa);
1148		pargs_free(pa);
1149	} else
1150		PARGS_UNLOCK(pa);
1151}
1152
1153/*
1154 * This sysctl allows a process to retrieve the argument list or process
1155 * title for another process without groping around in the address space
1156 * of the other process.  It also allow a process to set its own "process
1157 * title to a string of its own choice.
1158 */
1159static int
1160sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
1161{
1162	int *name = (int*) arg1;
1163	u_int namelen = arg2;
1164	struct proc *p;
1165	struct pargs *pa;
1166	int error = 0;
1167
1168	if (namelen != 1)
1169		return (EINVAL);
1170
1171	p = pfind((pid_t)name[0]);
1172	if (!p)
1173		return (0);
1174
1175	if ((!ps_argsopen) && p_cansee(curthread, p)) {
1176		PROC_UNLOCK(p);
1177		return (0);
1178	}
1179	PROC_UNLOCK(p);
1180
1181	if (req->newptr && curproc != p)
1182		return (EPERM);
1183
1184	PROC_LOCK(p);
1185	pa = p->p_args;
1186	pargs_hold(pa);
1187	PROC_UNLOCK(p);
1188	if (req->oldptr && pa != NULL) {
1189		error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
1190	}
1191	pargs_drop(pa);
1192	if (req->newptr == NULL)
1193		return (error);
1194
1195	PROC_LOCK(p);
1196	pa = p->p_args;
1197	p->p_args = NULL;
1198	PROC_UNLOCK(p);
1199	pargs_drop(pa);
1200
1201	if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit)
1202		return (error);
1203
1204	pa = pargs_alloc(req->newlen);
1205	error = SYSCTL_IN(req, pa->ar_args, req->newlen);
1206	if (!error) {
1207		PROC_LOCK(p);
1208		p->p_args = pa;
1209		PROC_UNLOCK(p);
1210	} else
1211		pargs_free(pa);
1212	return (error);
1213}
1214
1215SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
1216
1217SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT,
1218	0, 0, sysctl_kern_proc, "S,proc", "Return entire process table");
1219
1220SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD,
1221	sysctl_kern_proc, "Process table");
1222
1223SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD,
1224	sysctl_kern_proc, "Process table");
1225
1226SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD,
1227	sysctl_kern_proc, "Process table");
1228
1229SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD,
1230	sysctl_kern_proc, "Process table");
1231
1232SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD,
1233	sysctl_kern_proc, "Process table");
1234
1235SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args, CTLFLAG_RW | CTLFLAG_ANYBODY,
1236	sysctl_kern_proc_args, "Process argument list");
1237
1238SYSCTL_INT(_kern_proc, OID_AUTO, active, CTLFLAG_RD,
1239	&active_procs, 0, "Number of active procs in system.");
1240
1241SYSCTL_INT(_kern_proc, OID_AUTO, cached, CTLFLAG_RD,
1242	&cached_procs, 0, "Number of procs in proc cache.");
1243
1244SYSCTL_INT(_kern_proc, OID_AUTO, allocated, CTLFLAG_RD,
1245	&allocated_procs, 0, "Number of procs in zone.");
1246