prsubr.c revision 10169:116daeae7223
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28/*	  All Rights Reserved  	*/
29
30#include <sys/types.h>
31#include <sys/t_lock.h>
32#include <sys/param.h>
33#include <sys/cmn_err.h>
34#include <sys/cred.h>
35#include <sys/priv.h>
36#include <sys/debug.h>
37#include <sys/errno.h>
38#include <sys/inline.h>
39#include <sys/kmem.h>
40#include <sys/mman.h>
41#include <sys/proc.h>
42#include <sys/sobject.h>
43#include <sys/sysmacros.h>
44#include <sys/systm.h>
45#include <sys/uio.h>
46#include <sys/var.h>
47#include <sys/vfs.h>
48#include <sys/vnode.h>
49#include <sys/session.h>
50#include <sys/pcb.h>
51#include <sys/signal.h>
52#include <sys/user.h>
53#include <sys/disp.h>
54#include <sys/class.h>
55#include <sys/ts.h>
56#include <sys/bitmap.h>
57#include <sys/poll.h>
58#include <sys/shm_impl.h>
59#include <sys/fault.h>
60#include <sys/syscall.h>
61#include <sys/procfs.h>
62#include <sys/processor.h>
63#include <sys/cpuvar.h>
64#include <sys/copyops.h>
65#include <sys/time.h>
66#include <sys/msacct.h>
67#include <vm/as.h>
68#include <vm/rm.h>
69#include <vm/seg.h>
70#include <vm/seg_vn.h>
71#include <vm/seg_dev.h>
72#include <vm/seg_spt.h>
73#include <vm/page.h>
74#include <sys/vmparam.h>
75#include <sys/swap.h>
76#include <fs/proc/prdata.h>
77#include <sys/task.h>
78#include <sys/project.h>
79#include <sys/contract_impl.h>
80#include <sys/contract/process.h>
81#include <sys/contract/process_impl.h>
82#include <sys/schedctl.h>
83#include <sys/pool.h>
84#include <sys/zone.h>
85#include <sys/atomic.h>
86#include <sys/sdt.h>
87
88#define	MAX_ITERS_SPIN	5
89
90typedef struct prpagev {
91	uint_t *pg_protv;	/* vector of page permissions */
92	char *pg_incore;	/* vector of incore flags */
93	size_t pg_npages;	/* number of pages in protv and incore */
94	ulong_t pg_pnbase;	/* pn within segment of first protv element */
95} prpagev_t;
96
97size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
98
99extern struct seg_ops segdev_ops;	/* needs a header file */
100extern struct seg_ops segspt_shmops;	/* needs a header file */
101
102static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
103static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
104
105/*
106 * Choose an lwp from the complete set of lwps for the process.
107 * This is called for any operation applied to the process
108 * file descriptor that requires an lwp to operate upon.
109 *
110 * Returns a pointer to the thread for the selected LWP,
111 * and with the dispatcher lock held for the thread.
112 *
113 * The algorithm for choosing an lwp is critical for /proc semantics;
114 * don't touch this code unless you know all of the implications.
115 */
116kthread_t *
117prchoose(proc_t *p)
118{
119	kthread_t *t;
120	kthread_t *t_onproc = NULL;	/* running on processor */
121	kthread_t *t_run = NULL;	/* runnable, on disp queue */
122	kthread_t *t_sleep = NULL;	/* sleeping */
123	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
124	kthread_t *t_susp = NULL;	/* suspended stop */
125	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
126	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
127	kthread_t *t_req = NULL;	/* requested stop */
128	kthread_t *t_istop = NULL;	/* event-of-interest stop */
129	kthread_t *t_dtrace = NULL;	/* DTrace stop */
130
131	ASSERT(MUTEX_HELD(&p->p_lock));
132
133	/*
134	 * If the agent lwp exists, it takes precedence over all others.
135	 */
136	if ((t = p->p_agenttp) != NULL) {
137		thread_lock(t);
138		return (t);
139	}
140
141	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
142		return (t);
143	do {		/* for eacn lwp in the process */
144		if (VSTOPPED(t)) {	/* virtually stopped */
145			if (t_req == NULL)
146				t_req = t;
147			continue;
148		}
149
150		thread_lock(t);		/* make sure thread is in good state */
151		switch (t->t_state) {
152		default:
153			panic("prchoose: bad thread state %d, thread 0x%p",
154			    t->t_state, (void *)t);
155			/*NOTREACHED*/
156		case TS_SLEEP:
157			/* this is filthy */
158			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
159			    t->t_wchan0 == NULL) {
160				if (t_hold == NULL)
161					t_hold = t;
162			} else {
163				if (t_sleep == NULL)
164					t_sleep = t;
165			}
166			break;
167		case TS_RUN:
168		case TS_WAIT:
169			if (t_run == NULL)
170				t_run = t;
171			break;
172		case TS_ONPROC:
173			if (t_onproc == NULL)
174				t_onproc = t;
175			break;
176		case TS_ZOMB:		/* last possible choice */
177			break;
178		case TS_STOPPED:
179			switch (t->t_whystop) {
180			case PR_SUSPENDED:
181				if (t_susp == NULL)
182					t_susp = t;
183				break;
184			case PR_JOBCONTROL:
185				if (t->t_proc_flag & TP_PRSTOP) {
186					if (t_jdstop == NULL)
187						t_jdstop = t;
188				} else {
189					if (t_jstop == NULL)
190						t_jstop = t;
191				}
192				break;
193			case PR_REQUESTED:
194				if (t->t_dtrace_stop && t_dtrace == NULL)
195					t_dtrace = t;
196				else if (t_req == NULL)
197					t_req = t;
198				break;
199			case PR_SYSENTRY:
200			case PR_SYSEXIT:
201			case PR_SIGNALLED:
202			case PR_FAULTED:
203				/*
204				 * Make an lwp calling exit() be the
205				 * last lwp seen in the process.
206				 */
207				if (t_istop == NULL ||
208				    (t_istop->t_whystop == PR_SYSENTRY &&
209				    t_istop->t_whatstop == SYS_exit))
210					t_istop = t;
211				break;
212			case PR_CHECKPOINT:	/* can't happen? */
213				break;
214			default:
215				panic("prchoose: bad t_whystop %d, thread 0x%p",
216				    t->t_whystop, (void *)t);
217				/*NOTREACHED*/
218			}
219			break;
220		}
221		thread_unlock(t);
222	} while ((t = t->t_forw) != p->p_tlist);
223
224	if (t_onproc)
225		t = t_onproc;
226	else if (t_run)
227		t = t_run;
228	else if (t_sleep)
229		t = t_sleep;
230	else if (t_jstop)
231		t = t_jstop;
232	else if (t_jdstop)
233		t = t_jdstop;
234	else if (t_istop)
235		t = t_istop;
236	else if (t_dtrace)
237		t = t_dtrace;
238	else if (t_req)
239		t = t_req;
240	else if (t_hold)
241		t = t_hold;
242	else if (t_susp)
243		t = t_susp;
244	else			/* TS_ZOMB */
245		t = p->p_tlist;
246
247	if (t != NULL)
248		thread_lock(t);
249	return (t);
250}
251
252/*
253 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
254 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
255 * on the /proc file descriptor.  Called from stop() when a traced
256 * process stops on an event of interest.  Also called from exit()
257 * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
258 */
259void
260prnotify(struct vnode *vp)
261{
262	prcommon_t *pcp = VTOP(vp)->pr_common;
263
264	mutex_enter(&pcp->prc_mutex);
265	cv_broadcast(&pcp->prc_wait);
266	mutex_exit(&pcp->prc_mutex);
267	if (pcp->prc_flags & PRC_POLL) {
268		/*
269		 * We call pollwakeup() with POLLHUP to ensure that
270		 * the pollers are awakened even if they are polling
271		 * for nothing (i.e., waiting for the process to exit).
272		 * This enables the use of the PRC_POLL flag for optimization
273		 * (we can turn off PRC_POLL only if we know no pollers remain).
274		 */
275		pcp->prc_flags &= ~PRC_POLL;
276		pollwakeup(&pcp->prc_pollhead, POLLHUP);
277	}
278}
279
280/* called immediately below, in prfree() */
281static void
282prfreenotify(vnode_t *vp)
283{
284	prnode_t *pnp;
285	prcommon_t *pcp;
286
287	while (vp != NULL) {
288		pnp = VTOP(vp);
289		pcp = pnp->pr_common;
290		ASSERT(pcp->prc_thread == NULL);
291		pcp->prc_proc = NULL;
292		/*
293		 * We can't call prnotify() here because we are holding
294		 * pidlock.  We assert that there is no need to.
295		 */
296		mutex_enter(&pcp->prc_mutex);
297		cv_broadcast(&pcp->prc_wait);
298		mutex_exit(&pcp->prc_mutex);
299		ASSERT(!(pcp->prc_flags & PRC_POLL));
300
301		vp = pnp->pr_next;
302		pnp->pr_next = NULL;
303	}
304}
305
306/*
307 * Called from a hook in freeproc() when a traced process is removed
308 * from the process table.  The proc-table pointers of all associated
309 * /proc vnodes are cleared to indicate that the process has gone away.
310 */
311void
312prfree(proc_t *p)
313{
314	uint_t slot = p->p_slot;
315
316	ASSERT(MUTEX_HELD(&pidlock));
317
318	/*
319	 * Block the process against /proc so it can be freed.
320	 * It cannot be freed while locked by some controlling process.
321	 * Lock ordering:
322	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
323	 */
324	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
325	mutex_enter(&p->p_lock);
326	while (p->p_proc_flag & P_PR_LOCK) {
327		mutex_exit(&pr_pidlock);
328		cv_wait(&pr_pid_cv[slot], &p->p_lock);
329		mutex_exit(&p->p_lock);
330		mutex_enter(&pr_pidlock);
331		mutex_enter(&p->p_lock);
332	}
333
334	ASSERT(p->p_tlist == NULL);
335
336	prfreenotify(p->p_plist);
337	p->p_plist = NULL;
338
339	prfreenotify(p->p_trace);
340	p->p_trace = NULL;
341
342	/*
343	 * We broadcast to wake up everyone waiting for this process.
344	 * No one can reach this process from this point on.
345	 */
346	cv_broadcast(&pr_pid_cv[slot]);
347
348	mutex_exit(&p->p_lock);
349	mutex_exit(&pr_pidlock);
350}
351
352/*
353 * Called from a hook in exit() when a traced process is becoming a zombie.
354 */
355void
356prexit(proc_t *p)
357{
358	ASSERT(MUTEX_HELD(&p->p_lock));
359
360	if (pr_watch_active(p)) {
361		pr_free_watchpoints(p);
362		watch_disable(curthread);
363	}
364	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
365	if (p->p_trace) {
366		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
367		prnotify(p->p_trace);
368	}
369	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
370}
371
372/*
373 * Called when a thread calls lwp_exit().
374 */
375void
376prlwpexit(kthread_t *t)
377{
378	vnode_t *vp;
379	prnode_t *pnp;
380	prcommon_t *pcp;
381	proc_t *p = ttoproc(t);
382	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
383
384	ASSERT(t == curthread);
385	ASSERT(MUTEX_HELD(&p->p_lock));
386
387	/*
388	 * The process must be blocked against /proc to do this safely.
389	 * The lwp must not disappear while the process is marked P_PR_LOCK.
390	 * It is the caller's responsibility to have called prbarrier(p).
391	 */
392	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
393
394	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
395		pnp = VTOP(vp);
396		pcp = pnp->pr_common;
397		if (pcp->prc_thread == t) {
398			pcp->prc_thread = NULL;
399			pcp->prc_flags |= PRC_DESTROY;
400		}
401	}
402
403	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
404		pnp = VTOP(vp);
405		pcp = pnp->pr_common;
406		pcp->prc_thread = NULL;
407		pcp->prc_flags |= PRC_DESTROY;
408		prnotify(vp);
409	}
410
411	if (p->p_trace)
412		prnotify(p->p_trace);
413}
414
415/*
416 * Called when a zombie thread is joined or when a
417 * detached lwp exits.  Called from lwp_hash_out().
418 */
419void
420prlwpfree(proc_t *p, lwpent_t *lep)
421{
422	vnode_t *vp;
423	prnode_t *pnp;
424	prcommon_t *pcp;
425
426	ASSERT(MUTEX_HELD(&p->p_lock));
427
428	/*
429	 * The process must be blocked against /proc to do this safely.
430	 * The lwp must not disappear while the process is marked P_PR_LOCK.
431	 * It is the caller's responsibility to have called prbarrier(p).
432	 */
433	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
434
435	vp = lep->le_trace;
436	lep->le_trace = NULL;
437	while (vp) {
438		prnotify(vp);
439		pnp = VTOP(vp);
440		pcp = pnp->pr_common;
441		ASSERT(pcp->prc_thread == NULL &&
442		    (pcp->prc_flags & PRC_DESTROY));
443		pcp->prc_tslot = -1;
444		vp = pnp->pr_next;
445		pnp->pr_next = NULL;
446	}
447
448	if (p->p_trace)
449		prnotify(p->p_trace);
450}
451
452/*
453 * Called from a hook in exec() when a thread starts exec().
454 */
455void
456prexecstart(void)
457{
458	proc_t *p = ttoproc(curthread);
459	klwp_t *lwp = ttolwp(curthread);
460
461	/*
462	 * The P_PR_EXEC flag blocks /proc operations for
463	 * the duration of the exec().
464	 * We can't start exec() while the process is
465	 * locked by /proc, so we call prbarrier().
466	 * lwp_nostop keeps the process from being stopped
467	 * via job control for the duration of the exec().
468	 */
469
470	ASSERT(MUTEX_HELD(&p->p_lock));
471	prbarrier(p);
472	lwp->lwp_nostop++;
473	p->p_proc_flag |= P_PR_EXEC;
474}
475
476/*
477 * Called from a hook in exec() when a thread finishes exec().
478 * The thread may or may not have succeeded.  Some other thread
479 * may have beat it to the punch.
480 */
481void
482prexecend(void)
483{
484	proc_t *p = ttoproc(curthread);
485	klwp_t *lwp = ttolwp(curthread);
486	vnode_t *vp;
487	prnode_t *pnp;
488	prcommon_t *pcp;
489	model_t model = p->p_model;
490	id_t tid = curthread->t_tid;
491	int tslot = curthread->t_dslot;
492
493	ASSERT(MUTEX_HELD(&p->p_lock));
494
495	lwp->lwp_nostop--;
496	if (p->p_flag & SEXITLWPS) {
497		/*
498		 * We are on our way to exiting because some
499		 * other thread beat us in the race to exec().
500		 * Don't clear the P_PR_EXEC flag in this case.
501		 */
502		return;
503	}
504
505	/*
506	 * Wake up anyone waiting in /proc for the process to complete exec().
507	 */
508	p->p_proc_flag &= ~P_PR_EXEC;
509	if ((vp = p->p_trace) != NULL) {
510		pcp = VTOP(vp)->pr_common;
511		mutex_enter(&pcp->prc_mutex);
512		cv_broadcast(&pcp->prc_wait);
513		mutex_exit(&pcp->prc_mutex);
514		for (; vp != NULL; vp = pnp->pr_next) {
515			pnp = VTOP(vp);
516			pnp->pr_common->prc_datamodel = model;
517		}
518	}
519	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
520		/*
521		 * We dealt with the process common above.
522		 */
523		ASSERT(p->p_trace != NULL);
524		pcp = VTOP(vp)->pr_common;
525		mutex_enter(&pcp->prc_mutex);
526		cv_broadcast(&pcp->prc_wait);
527		mutex_exit(&pcp->prc_mutex);
528		for (; vp != NULL; vp = pnp->pr_next) {
529			pnp = VTOP(vp);
530			pcp = pnp->pr_common;
531			pcp->prc_datamodel = model;
532			pcp->prc_tid = tid;
533			pcp->prc_tslot = tslot;
534		}
535	}
536}
537
538/*
539 * Called from a hook in relvm() just before freeing the address space.
540 * We free all the watched areas now.
541 */
542void
543prrelvm(void)
544{
545	proc_t *p = ttoproc(curthread);
546
547	mutex_enter(&p->p_lock);
548	prbarrier(p);	/* block all other /proc operations */
549	if (pr_watch_active(p)) {
550		pr_free_watchpoints(p);
551		watch_disable(curthread);
552	}
553	mutex_exit(&p->p_lock);
554	pr_free_watched_pages(p);
555}
556
557/*
558 * Called from hooks in exec-related code when a traced process
559 * attempts to exec(2) a setuid/setgid program or an unreadable
560 * file.  Rather than fail the exec we invalidate the associated
561 * /proc vnodes so that subsequent attempts to use them will fail.
562 *
563 * All /proc vnodes, except directory vnodes, are retained on a linked
564 * list (rooted at p_plist in the process structure) until last close.
565 *
566 * A controlling process must re-open the /proc files in order to
567 * regain control.
568 */
569void
570prinvalidate(struct user *up)
571{
572	kthread_t *t = curthread;
573	proc_t *p = ttoproc(t);
574	vnode_t *vp;
575	prnode_t *pnp;
576	int writers = 0;
577
578	mutex_enter(&p->p_lock);
579	prbarrier(p);	/* block all other /proc operations */
580
581	/*
582	 * At this moment, there can be only one lwp in the process.
583	 */
584	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
585
586	/*
587	 * Invalidate any currently active /proc vnodes.
588	 */
589	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
590		pnp = VTOP(vp);
591		switch (pnp->pr_type) {
592		case PR_PSINFO:		/* these files can read by anyone */
593		case PR_LPSINFO:
594		case PR_LWPSINFO:
595		case PR_LWPDIR:
596		case PR_LWPIDDIR:
597		case PR_USAGE:
598		case PR_LUSAGE:
599		case PR_LWPUSAGE:
600			break;
601		default:
602			pnp->pr_flags |= PR_INVAL;
603			break;
604		}
605	}
606	/*
607	 * Wake up anyone waiting for the process or lwp.
608	 * p->p_trace is guaranteed to be non-NULL if there
609	 * are any open /proc files for this process.
610	 */
611	if ((vp = p->p_trace) != NULL) {
612		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
613
614		prnotify(vp);
615		/*
616		 * Are there any writers?
617		 */
618		if ((writers = pcp->prc_writers) != 0) {
619			/*
620			 * Clear the exclusive open flag (old /proc interface).
621			 * Set prc_selfopens equal to prc_writers so that
622			 * the next O_EXCL|O_WRITE open will succeed
623			 * even with existing (though invalid) writers.
624			 * prclose() must decrement prc_selfopens when
625			 * the invalid files are closed.
626			 */
627			pcp->prc_flags &= ~PRC_EXCL;
628			ASSERT(pcp->prc_selfopens <= writers);
629			pcp->prc_selfopens = writers;
630		}
631	}
632	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
633	while (vp != NULL) {
634		/*
635		 * We should not invalidate the lwpiddir vnodes,
636		 * but the necessities of maintaining the old
637		 * ioctl()-based version of /proc require it.
638		 */
639		pnp = VTOP(vp);
640		pnp->pr_flags |= PR_INVAL;
641		prnotify(vp);
642		vp = pnp->pr_next;
643	}
644
645	/*
646	 * If any tracing flags are in effect and any vnodes are open for
647	 * writing then set the requested-stop and run-on-last-close flags.
648	 * Otherwise, clear all tracing flags.
649	 */
650	t->t_proc_flag &= ~TP_PAUSE;
651	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
652		t->t_proc_flag |= TP_PRSTOP;
653		aston(t);		/* so ISSIG will see the flag */
654		p->p_proc_flag |= P_PR_RUNLCL;
655	} else {
656		premptyset(&up->u_entrymask);		/* syscalls */
657		premptyset(&up->u_exitmask);
658		up->u_systrap = 0;
659		premptyset(&p->p_sigmask);		/* signals */
660		premptyset(&p->p_fltmask);		/* faults */
661		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
662		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
663		prnostep(ttolwp(t));
664	}
665
666	mutex_exit(&p->p_lock);
667}
668
669/*
670 * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
671 * Return with pr_pidlock held in all cases.
672 * Return with p_lock held if the the process still exists.
673 * Return value is the process pointer if the process still exists, else NULL.
674 * If we lock the process, give ourself kernel priority to avoid deadlocks;
675 * this is undone in prunlock().
676 */
677proc_t *
678pr_p_lock(prnode_t *pnp)
679{
680	proc_t *p;
681	prcommon_t *pcp;
682
683	mutex_enter(&pr_pidlock);
684	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
685		return (NULL);
686	mutex_enter(&p->p_lock);
687	while (p->p_proc_flag & P_PR_LOCK) {
688		/*
689		 * This cv/mutex pair is persistent even if
690		 * the process disappears while we sleep.
691		 */
692		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
693		kmutex_t *mp = &p->p_lock;
694
695		mutex_exit(&pr_pidlock);
696		cv_wait(cv, mp);
697		mutex_exit(mp);
698		mutex_enter(&pr_pidlock);
699		if (pcp->prc_proc == NULL)
700			return (NULL);
701		ASSERT(p == pcp->prc_proc);
702		mutex_enter(&p->p_lock);
703	}
704	p->p_proc_flag |= P_PR_LOCK;
705	THREAD_KPRI_REQUEST();
706	return (p);
707}
708
709/*
710 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
711 * This prevents any lwp of the process from disappearing and
712 * blocks most operations that a process can perform on itself.
713 * Returns 0 on success, a non-zero error number on failure.
714 *
715 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
716 * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
717 *
718 * error returns:
719 *	ENOENT: process or lwp has disappeared or process is exiting
720 *		(or has become a zombie and zdisp == ZNO).
721 *	EAGAIN: procfs vnode has become invalid.
722 *	EINTR:  signal arrived while waiting for exec to complete.
723 */
724int
725prlock(prnode_t *pnp, int zdisp)
726{
727	prcommon_t *pcp;
728	proc_t *p;
729
730again:
731	pcp = pnp->pr_common;
732	p = pr_p_lock(pnp);
733	mutex_exit(&pr_pidlock);
734
735	/*
736	 * Return ENOENT immediately if there is no process.
737	 */
738	if (p == NULL)
739		return (ENOENT);
740
741	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
742
743	/*
744	 * Return ENOENT if process entered zombie state or is exiting
745	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
746	 */
747	if (zdisp == ZNO &&
748	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
749		prunlock(pnp);
750		return (ENOENT);
751	}
752
753	/*
754	 * If lwp-specific, check to see if lwp has disappeared.
755	 */
756	if (pcp->prc_flags & PRC_LWP) {
757		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
758		    pcp->prc_tslot == -1) {
759			prunlock(pnp);
760			return (ENOENT);
761		}
762	}
763
764	/*
765	 * Return EAGAIN if we have encountered a security violation.
766	 * (The process exec'd a set-id or unreadable executable file.)
767	 */
768	if (pnp->pr_flags & PR_INVAL) {
769		prunlock(pnp);
770		return (EAGAIN);
771	}
772
773	/*
774	 * If process is undergoing an exec(), wait for
775	 * completion and then start all over again.
776	 */
777	if (p->p_proc_flag & P_PR_EXEC) {
778		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
779		mutex_enter(&pcp->prc_mutex);
780		prunlock(pnp);
781		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
782			mutex_exit(&pcp->prc_mutex);
783			return (EINTR);
784		}
785		mutex_exit(&pcp->prc_mutex);
786		goto again;
787	}
788
789	/*
790	 * We return holding p->p_lock.
791	 */
792	return (0);
793}
794
795/*
796 * Undo prlock() and pr_p_lock().
797 * p->p_lock is still held; pr_pidlock is no longer held.
798 *
799 * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
800 * if any, waiting for the flag to be dropped; it retains p->p_lock.
801 *
802 * prunlock() calls prunmark() and then drops p->p_lock.
803 */
804void
805prunmark(proc_t *p)
806{
807	ASSERT(p->p_proc_flag & P_PR_LOCK);
808	ASSERT(MUTEX_HELD(&p->p_lock));
809
810	cv_signal(&pr_pid_cv[p->p_slot]);
811	p->p_proc_flag &= ~P_PR_LOCK;
812	THREAD_KPRI_RELEASE();
813}
814
815void
816prunlock(prnode_t *pnp)
817{
818	prcommon_t *pcp = pnp->pr_common;
819	proc_t *p = pcp->prc_proc;
820
821	/*
822	 * If we (or someone) gave it a SIGKILL, and it is not
823	 * already a zombie, set it running unconditionally.
824	 */
825	if ((p->p_flag & SKILLED) &&
826	    !(p->p_flag & SEXITING) &&
827	    !(pcp->prc_flags & PRC_DESTROY) &&
828	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
829		(void) pr_setrun(pnp, 0);
830	prunmark(p);
831	mutex_exit(&p->p_lock);
832}
833
834/*
835 * Called while holding p->p_lock to delay until the process is unlocked.
836 * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
837 * The process cannot become locked again until p->p_lock is dropped.
838 */
839void
840prbarrier(proc_t *p)
841{
842	ASSERT(MUTEX_HELD(&p->p_lock));
843
844	if (p->p_proc_flag & P_PR_LOCK) {
845		/* The process is locked; delay until not locked */
846		uint_t slot = p->p_slot;
847
848		while (p->p_proc_flag & P_PR_LOCK)
849			cv_wait(&pr_pid_cv[slot], &p->p_lock);
850		cv_signal(&pr_pid_cv[slot]);
851	}
852}
853
854/*
855 * Return process/lwp status.
856 * The u-block is mapped in by this routine and unmapped at the end.
857 */
858void
859prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
860{
861	kthread_t *t;
862
863	ASSERT(MUTEX_HELD(&p->p_lock));
864
865	t = prchoose(p);	/* returns locked thread */
866	ASSERT(t != NULL);
867	thread_unlock(t);
868
869	/* just bzero the process part, prgetlwpstatus() does the rest */
870	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
871	sp->pr_nlwp = p->p_lwpcnt;
872	sp->pr_nzomb = p->p_zombcnt;
873	prassignset(&sp->pr_sigpend, &p->p_sig);
874	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
875	sp->pr_brksize = p->p_brksize;
876	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
877	sp->pr_stksize = p->p_stksize;
878	sp->pr_pid = p->p_pid;
879	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
880	    (p->p_flag & SZONETOP)) {
881		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
882		/*
883		 * Inside local zones, fake zsched's pid as parent pids for
884		 * processes which reference processes outside of the zone.
885		 */
886		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
887	} else {
888		sp->pr_ppid = p->p_ppid;
889	}
890	sp->pr_pgid  = p->p_pgrp;
891	sp->pr_sid   = p->p_sessp->s_sid;
892	sp->pr_taskid = p->p_task->tk_tkid;
893	sp->pr_projid = p->p_task->tk_proj->kpj_id;
894	sp->pr_zoneid = p->p_zone->zone_id;
895	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
896	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
897	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
898	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
899	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
900	prassignset(&sp->pr_flttrace, &p->p_fltmask);
901	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
902	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
903	switch (p->p_model) {
904	case DATAMODEL_ILP32:
905		sp->pr_dmodel = PR_MODEL_ILP32;
906		break;
907	case DATAMODEL_LP64:
908		sp->pr_dmodel = PR_MODEL_LP64;
909		break;
910	}
911	if (p->p_agenttp)
912		sp->pr_agentid = p->p_agenttp->t_tid;
913
914	/* get the chosen lwp's status */
915	prgetlwpstatus(t, &sp->pr_lwp, zp);
916
917	/* replicate the flags */
918	sp->pr_flags = sp->pr_lwp.pr_flags;
919}
920
921#ifdef _SYSCALL32_IMPL
922void
923prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
924{
925	proc_t *p = ttoproc(t);
926	klwp_t *lwp = ttolwp(t);
927	struct mstate *ms = &lwp->lwp_mstate;
928	hrtime_t usr, sys;
929	int flags;
930	ulong_t instr;
931
932	ASSERT(MUTEX_HELD(&p->p_lock));
933
934	bzero(sp, sizeof (*sp));
935	flags = 0L;
936	if (t->t_state == TS_STOPPED) {
937		flags |= PR_STOPPED;
938		if ((t->t_schedflag & TS_PSTART) == 0)
939			flags |= PR_ISTOP;
940	} else if (VSTOPPED(t)) {
941		flags |= PR_STOPPED|PR_ISTOP;
942	}
943	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
944		flags |= PR_DSTOP;
945	if (lwp->lwp_asleep)
946		flags |= PR_ASLEEP;
947	if (t == p->p_agenttp)
948		flags |= PR_AGENT;
949	if (!(t->t_proc_flag & TP_TWAIT))
950		flags |= PR_DETACH;
951	if (t->t_proc_flag & TP_DAEMON)
952		flags |= PR_DAEMON;
953	if (p->p_proc_flag & P_PR_FORK)
954		flags |= PR_FORK;
955	if (p->p_proc_flag & P_PR_RUNLCL)
956		flags |= PR_RLC;
957	if (p->p_proc_flag & P_PR_KILLCL)
958		flags |= PR_KLC;
959	if (p->p_proc_flag & P_PR_ASYNC)
960		flags |= PR_ASYNC;
961	if (p->p_proc_flag & P_PR_BPTADJ)
962		flags |= PR_BPTADJ;
963	if (p->p_proc_flag & P_PR_PTRACE)
964		flags |= PR_PTRACE;
965	if (p->p_flag & SMSACCT)
966		flags |= PR_MSACCT;
967	if (p->p_flag & SMSFORK)
968		flags |= PR_MSFORK;
969	if (p->p_flag & SVFWAIT)
970		flags |= PR_VFORKP;
971	sp->pr_flags = flags;
972	if (VSTOPPED(t)) {
973		sp->pr_why   = PR_REQUESTED;
974		sp->pr_what  = 0;
975	} else {
976		sp->pr_why   = t->t_whystop;
977		sp->pr_what  = t->t_whatstop;
978	}
979	sp->pr_lwpid = t->t_tid;
980	sp->pr_cursig  = lwp->lwp_cursig;
981	prassignset(&sp->pr_lwppend, &t->t_sig);
982	schedctl_finish_sigblock(t);
983	prassignset(&sp->pr_lwphold, &t->t_hold);
984	if (t->t_whystop == PR_FAULTED) {
985		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
986		if (t->t_whatstop == FLTPAGE)
987			sp->pr_info.si_addr =
988			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
989	} else if (lwp->lwp_curinfo)
990		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
991	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
992	    sp->pr_info.si_zoneid != zp->zone_id) {
993		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
994		sp->pr_info.si_uid = 0;
995		sp->pr_info.si_ctid = -1;
996		sp->pr_info.si_zoneid = zp->zone_id;
997	}
998	sp->pr_altstack.ss_sp =
999	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1000	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1001	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1002	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1003	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1004	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1005	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1006	    sizeof (sp->pr_clname) - 1);
1007	if (flags & PR_STOPPED)
1008		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1009	usr = ms->ms_acct[LMS_USER];
1010	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1011	scalehrtime(&usr);
1012	scalehrtime(&sys);
1013	hrt2ts32(usr, &sp->pr_utime);
1014	hrt2ts32(sys, &sp->pr_stime);
1015
1016	/*
1017	 * Fetch the current instruction, if not a system process.
1018	 * We don't attempt this unless the lwp is stopped.
1019	 */
1020	if ((p->p_flag & SSYS) || p->p_as == &kas)
1021		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1022	else if (!(flags & PR_STOPPED))
1023		sp->pr_flags |= PR_PCINVAL;
1024	else if (!prfetchinstr(lwp, &instr))
1025		sp->pr_flags |= PR_PCINVAL;
1026	else
1027		sp->pr_instr = (uint32_t)instr;
1028
1029	/*
1030	 * Drop p_lock while touching the lwp's stack.
1031	 */
1032	mutex_exit(&p->p_lock);
1033	if (prisstep(lwp))
1034		sp->pr_flags |= PR_STEP;
1035	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1036		int i;
1037
1038		sp->pr_syscall = get_syscall32_args(lwp,
1039		    (int *)sp->pr_sysarg, &i);
1040		sp->pr_nsysarg = (ushort_t)i;
1041	}
1042	if ((flags & PR_STOPPED) || t == curthread)
1043		prgetprregs32(lwp, sp->pr_reg);
1044	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1045	    (flags & PR_VFORKP)) {
1046		long r1, r2;
1047		user_t *up;
1048		auxv_t *auxp;
1049		int i;
1050
1051		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1052		if (sp->pr_errno == 0) {
1053			sp->pr_rval1 = (int32_t)r1;
1054			sp->pr_rval2 = (int32_t)r2;
1055			sp->pr_errpriv = PRIV_NONE;
1056		} else
1057			sp->pr_errpriv = lwp->lwp_badpriv;
1058
1059		if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) {
1060			up = PTOU(p);
1061			sp->pr_sysarg[0] = 0;
1062			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1063			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1064			for (i = 0, auxp = up->u_auxv;
1065			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1066			    i++, auxp++) {
1067				if (auxp->a_type == AT_SUN_EXECNAME) {
1068					sp->pr_sysarg[0] =
1069					    (caddr32_t)
1070					    (uintptr_t)auxp->a_un.a_ptr;
1071					break;
1072				}
1073			}
1074		}
1075	}
1076	if (prhasfp())
1077		prgetprfpregs32(lwp, &sp->pr_fpreg);
1078	mutex_enter(&p->p_lock);
1079}
1080
1081void
1082prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1083{
1084	kthread_t *t;
1085
1086	ASSERT(MUTEX_HELD(&p->p_lock));
1087
1088	t = prchoose(p);	/* returns locked thread */
1089	ASSERT(t != NULL);
1090	thread_unlock(t);
1091
1092	/* just bzero the process part, prgetlwpstatus32() does the rest */
1093	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1094	sp->pr_nlwp = p->p_lwpcnt;
1095	sp->pr_nzomb = p->p_zombcnt;
1096	prassignset(&sp->pr_sigpend, &p->p_sig);
1097	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1098	sp->pr_brksize = (uint32_t)p->p_brksize;
1099	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1100	sp->pr_stksize = (uint32_t)p->p_stksize;
1101	sp->pr_pid   = p->p_pid;
1102	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1103	    (p->p_flag & SZONETOP)) {
1104		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1105		/*
1106		 * Inside local zones, fake zsched's pid as parent pids for
1107		 * processes which reference processes outside of the zone.
1108		 */
1109		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1110	} else {
1111		sp->pr_ppid = p->p_ppid;
1112	}
1113	sp->pr_pgid  = p->p_pgrp;
1114	sp->pr_sid   = p->p_sessp->s_sid;
1115	sp->pr_taskid = p->p_task->tk_tkid;
1116	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1117	sp->pr_zoneid = p->p_zone->zone_id;
1118	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1119	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1120	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1121	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1122	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1123	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1124	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1125	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1126	switch (p->p_model) {
1127	case DATAMODEL_ILP32:
1128		sp->pr_dmodel = PR_MODEL_ILP32;
1129		break;
1130	case DATAMODEL_LP64:
1131		sp->pr_dmodel = PR_MODEL_LP64;
1132		break;
1133	}
1134	if (p->p_agenttp)
1135		sp->pr_agentid = p->p_agenttp->t_tid;
1136
1137	/* get the chosen lwp's status */
1138	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1139
1140	/* replicate the flags */
1141	sp->pr_flags = sp->pr_lwp.pr_flags;
1142}
1143#endif	/* _SYSCALL32_IMPL */
1144
1145/*
1146 * Return lwp status.
1147 */
1148void
1149prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1150{
1151	proc_t *p = ttoproc(t);
1152	klwp_t *lwp = ttolwp(t);
1153	struct mstate *ms = &lwp->lwp_mstate;
1154	hrtime_t usr, sys;
1155	int flags;
1156	ulong_t instr;
1157
1158	ASSERT(MUTEX_HELD(&p->p_lock));
1159
1160	bzero(sp, sizeof (*sp));
1161	flags = 0L;
1162	if (t->t_state == TS_STOPPED) {
1163		flags |= PR_STOPPED;
1164		if ((t->t_schedflag & TS_PSTART) == 0)
1165			flags |= PR_ISTOP;
1166	} else if (VSTOPPED(t)) {
1167		flags |= PR_STOPPED|PR_ISTOP;
1168	}
1169	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1170		flags |= PR_DSTOP;
1171	if (lwp->lwp_asleep)
1172		flags |= PR_ASLEEP;
1173	if (t == p->p_agenttp)
1174		flags |= PR_AGENT;
1175	if (!(t->t_proc_flag & TP_TWAIT))
1176		flags |= PR_DETACH;
1177	if (t->t_proc_flag & TP_DAEMON)
1178		flags |= PR_DAEMON;
1179	if (p->p_proc_flag & P_PR_FORK)
1180		flags |= PR_FORK;
1181	if (p->p_proc_flag & P_PR_RUNLCL)
1182		flags |= PR_RLC;
1183	if (p->p_proc_flag & P_PR_KILLCL)
1184		flags |= PR_KLC;
1185	if (p->p_proc_flag & P_PR_ASYNC)
1186		flags |= PR_ASYNC;
1187	if (p->p_proc_flag & P_PR_BPTADJ)
1188		flags |= PR_BPTADJ;
1189	if (p->p_proc_flag & P_PR_PTRACE)
1190		flags |= PR_PTRACE;
1191	if (p->p_flag & SMSACCT)
1192		flags |= PR_MSACCT;
1193	if (p->p_flag & SMSFORK)
1194		flags |= PR_MSFORK;
1195	if (p->p_flag & SVFWAIT)
1196		flags |= PR_VFORKP;
1197	if (p->p_pgidp->pid_pgorphaned)
1198		flags |= PR_ORPHAN;
1199	if (p->p_pidflag & CLDNOSIGCHLD)
1200		flags |= PR_NOSIGCHLD;
1201	if (p->p_pidflag & CLDWAITPID)
1202		flags |= PR_WAITPID;
1203	sp->pr_flags = flags;
1204	if (VSTOPPED(t)) {
1205		sp->pr_why   = PR_REQUESTED;
1206		sp->pr_what  = 0;
1207	} else {
1208		sp->pr_why   = t->t_whystop;
1209		sp->pr_what  = t->t_whatstop;
1210	}
1211	sp->pr_lwpid = t->t_tid;
1212	sp->pr_cursig  = lwp->lwp_cursig;
1213	prassignset(&sp->pr_lwppend, &t->t_sig);
1214	schedctl_finish_sigblock(t);
1215	prassignset(&sp->pr_lwphold, &t->t_hold);
1216	if (t->t_whystop == PR_FAULTED)
1217		bcopy(&lwp->lwp_siginfo,
1218		    &sp->pr_info, sizeof (k_siginfo_t));
1219	else if (lwp->lwp_curinfo)
1220		bcopy(&lwp->lwp_curinfo->sq_info,
1221		    &sp->pr_info, sizeof (k_siginfo_t));
1222	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1223	    sp->pr_info.si_zoneid != zp->zone_id) {
1224		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1225		sp->pr_info.si_uid = 0;
1226		sp->pr_info.si_ctid = -1;
1227		sp->pr_info.si_zoneid = zp->zone_id;
1228	}
1229	sp->pr_altstack = lwp->lwp_sigaltstack;
1230	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1231	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1232	sp->pr_ustack = lwp->lwp_ustack;
1233	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1234	    sizeof (sp->pr_clname) - 1);
1235	if (flags & PR_STOPPED)
1236		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1237	usr = ms->ms_acct[LMS_USER];
1238	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1239	scalehrtime(&usr);
1240	scalehrtime(&sys);
1241	hrt2ts(usr, &sp->pr_utime);
1242	hrt2ts(sys, &sp->pr_stime);
1243
1244	/*
1245	 * Fetch the current instruction, if not a system process.
1246	 * We don't attempt this unless the lwp is stopped.
1247	 */
1248	if ((p->p_flag & SSYS) || p->p_as == &kas)
1249		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1250	else if (!(flags & PR_STOPPED))
1251		sp->pr_flags |= PR_PCINVAL;
1252	else if (!prfetchinstr(lwp, &instr))
1253		sp->pr_flags |= PR_PCINVAL;
1254	else
1255		sp->pr_instr = instr;
1256
1257	/*
1258	 * Drop p_lock while touching the lwp's stack.
1259	 */
1260	mutex_exit(&p->p_lock);
1261	if (prisstep(lwp))
1262		sp->pr_flags |= PR_STEP;
1263	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1264		int i;
1265
1266		sp->pr_syscall = get_syscall_args(lwp,
1267		    (long *)sp->pr_sysarg, &i);
1268		sp->pr_nsysarg = (ushort_t)i;
1269	}
1270	if ((flags & PR_STOPPED) || t == curthread)
1271		prgetprregs(lwp, sp->pr_reg);
1272	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1273	    (flags & PR_VFORKP)) {
1274		user_t *up;
1275		auxv_t *auxp;
1276		int i;
1277
1278		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1279		if (sp->pr_errno == 0)
1280			sp->pr_errpriv = PRIV_NONE;
1281		else
1282			sp->pr_errpriv = lwp->lwp_badpriv;
1283
1284		if (t->t_sysnum == SYS_exec || t->t_sysnum == SYS_execve) {
1285			up = PTOU(p);
1286			sp->pr_sysarg[0] = 0;
1287			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1288			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1289			for (i = 0, auxp = up->u_auxv;
1290			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1291			    i++, auxp++) {
1292				if (auxp->a_type == AT_SUN_EXECNAME) {
1293					sp->pr_sysarg[0] =
1294					    (uintptr_t)auxp->a_un.a_ptr;
1295					break;
1296				}
1297			}
1298		}
1299	}
1300	if (prhasfp())
1301		prgetprfpregs(lwp, &sp->pr_fpreg);
1302	mutex_enter(&p->p_lock);
1303}
1304
1305/*
1306 * Get the sigaction structure for the specified signal.  The u-block
1307 * must already have been mapped in by the caller.
1308 */
1309void
1310prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1311{
1312	bzero(sp, sizeof (*sp));
1313
1314	if (sig != 0 && (unsigned)sig < NSIG) {
1315		sp->sa_handler = up->u_signal[sig-1];
1316		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1317		if (sigismember(&up->u_sigonstack, sig))
1318			sp->sa_flags |= SA_ONSTACK;
1319		if (sigismember(&up->u_sigresethand, sig))
1320			sp->sa_flags |= SA_RESETHAND;
1321		if (sigismember(&up->u_sigrestart, sig))
1322			sp->sa_flags |= SA_RESTART;
1323		if (sigismember(&p->p_siginfo, sig))
1324			sp->sa_flags |= SA_SIGINFO;
1325		if (sigismember(&up->u_signodefer, sig))
1326			sp->sa_flags |= SA_NODEFER;
1327		if (sig == SIGCLD) {
1328			if (p->p_flag & SNOWAIT)
1329				sp->sa_flags |= SA_NOCLDWAIT;
1330			if ((p->p_flag & SJCTL) == 0)
1331				sp->sa_flags |= SA_NOCLDSTOP;
1332		}
1333	}
1334}
1335
1336#ifdef _SYSCALL32_IMPL
1337void
1338prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1339{
1340	bzero(sp, sizeof (*sp));
1341
1342	if (sig != 0 && (unsigned)sig < NSIG) {
1343		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1344		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1345		if (sigismember(&up->u_sigonstack, sig))
1346			sp->sa_flags |= SA_ONSTACK;
1347		if (sigismember(&up->u_sigresethand, sig))
1348			sp->sa_flags |= SA_RESETHAND;
1349		if (sigismember(&up->u_sigrestart, sig))
1350			sp->sa_flags |= SA_RESTART;
1351		if (sigismember(&p->p_siginfo, sig))
1352			sp->sa_flags |= SA_SIGINFO;
1353		if (sigismember(&up->u_signodefer, sig))
1354			sp->sa_flags |= SA_NODEFER;
1355		if (sig == SIGCLD) {
1356			if (p->p_flag & SNOWAIT)
1357				sp->sa_flags |= SA_NOCLDWAIT;
1358			if ((p->p_flag & SJCTL) == 0)
1359				sp->sa_flags |= SA_NOCLDSTOP;
1360		}
1361	}
1362}
1363#endif	/* _SYSCALL32_IMPL */
1364
1365/*
1366 * Count the number of segments in this process's address space.
1367 */
1368int
1369prnsegs(struct as *as, int reserved)
1370{
1371	int n = 0;
1372	struct seg *seg;
1373
1374	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1375
1376	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1377		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1378		caddr_t saddr, naddr;
1379		void *tmp = NULL;
1380
1381		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1382			(void) pr_getprot(seg, reserved, &tmp,
1383			    &saddr, &naddr, eaddr);
1384			if (saddr != naddr)
1385				n++;
1386		}
1387
1388		ASSERT(tmp == NULL);
1389	}
1390
1391	return (n);
1392}
1393
1394/*
1395 * Convert uint32_t to decimal string w/o leading zeros.
1396 * Add trailing null characters if 'len' is greater than string length.
1397 * Return the string length.
1398 */
1399int
1400pr_u32tos(uint32_t n, char *s, int len)
1401{
1402	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1403	char *cp = cbuf;
1404	char *end = s + len;
1405
1406	do {
1407		*cp++ = (char)(n % 10 + '0');
1408		n /= 10;
1409	} while (n);
1410
1411	len = (int)(cp - cbuf);
1412
1413	do {
1414		*s++ = *--cp;
1415	} while (cp > cbuf);
1416
1417	while (s < end)		/* optional pad */
1418		*s++ = '\0';
1419
1420	return (len);
1421}
1422
1423/*
1424 * Convert uint64_t to decimal string w/o leading zeros.
1425 * Return the string length.
1426 */
1427static int
1428pr_u64tos(uint64_t n, char *s)
1429{
1430	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1431	char *cp = cbuf;
1432	int len;
1433
1434	do {
1435		*cp++ = (char)(n % 10 + '0');
1436		n /= 10;
1437	} while (n);
1438
1439	len = (int)(cp - cbuf);
1440
1441	do {
1442		*s++ = *--cp;
1443	} while (cp > cbuf);
1444
1445	return (len);
1446}
1447
1448void
1449pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1450{
1451	char *s = name;
1452	struct vfs *vfsp;
1453	struct vfssw *vfsswp;
1454
1455	if ((vfsp = vp->v_vfsp) != NULL &&
1456	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1457	    *vfsswp->vsw_name) {
1458		(void) strcpy(s, vfsswp->vsw_name);
1459		s += strlen(s);
1460		*s++ = '.';
1461	}
1462	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1463	*s++ = '.';
1464	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1465	*s++ = '.';
1466	s += pr_u64tos(vattr->va_nodeid, s);
1467	*s++ = '\0';
1468}
1469
1470struct seg *
1471break_seg(proc_t *p)
1472{
1473	caddr_t addr = p->p_brkbase;
1474	struct seg *seg;
1475	struct vnode *vp;
1476
1477	if (p->p_brksize != 0)
1478		addr += p->p_brksize - 1;
1479	seg = as_segat(p->p_as, addr);
1480	if (seg != NULL && seg->s_ops == &segvn_ops &&
1481	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1482		return (seg);
1483	return (NULL);
1484}
1485
1486/*
1487 * Implementation of service functions to handle procfs generic chained
1488 * copyout buffers.
1489 */
1490typedef struct pr_iobuf_list {
1491	list_node_t	piol_link;	/* buffer linkage */
1492	size_t		piol_size;	/* total size (header + data) */
1493	size_t		piol_usedsize;	/* amount to copy out from this buf */
1494} piol_t;
1495
1496#define	MAPSIZE	(64 * 1024)
1497#define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1498
1499void
1500pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1501{
1502	piol_t	*iol;
1503	size_t	initial_size = MIN(1, n) * itemsize;
1504
1505	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1506
1507	ASSERT(list_head(iolhead) == NULL);
1508	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1509	ASSERT(initial_size > 0);
1510
1511	/*
1512	 * Someone creating chained copyout buffers may ask for less than
1513	 * MAPSIZE if the amount of data to be buffered is known to be
1514	 * smaller than that.
1515	 * But in order to prevent involuntary self-denial of service,
1516	 * the requested input size is clamped at MAPSIZE.
1517	 */
1518	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1519	iol = kmem_alloc(initial_size, KM_SLEEP);
1520	list_insert_head(iolhead, iol);
1521	iol->piol_usedsize = 0;
1522	iol->piol_size = initial_size;
1523}
1524
1525void *
1526pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1527{
1528	piol_t	*iol;
1529	char	*new;
1530
1531	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1532	ASSERT(list_head(iolhead) != NULL);
1533
1534	iol = (piol_t *)list_tail(iolhead);
1535
1536	if (iol->piol_size <
1537	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1538		/*
1539		 * Out of space in the current buffer. Allocate more.
1540		 */
1541		piol_t *newiol;
1542
1543		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1544		newiol->piol_size = MAPSIZE;
1545		newiol->piol_usedsize = 0;
1546
1547		list_insert_after(iolhead, iol, newiol);
1548		iol = list_next(iolhead, iol);
1549		ASSERT(iol == newiol);
1550	}
1551	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1552	iol->piol_usedsize += itemsize;
1553	bzero(new, itemsize);
1554	return (new);
1555}
1556
1557int
1558pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1559{
1560	int error = errin;
1561	piol_t	*iol;
1562
1563	while ((iol = list_head(iolhead)) != NULL) {
1564		list_remove(iolhead, iol);
1565		if (!error) {
1566			if (copyout(PIOL_DATABUF(iol), *tgt,
1567			    iol->piol_usedsize))
1568				error = EFAULT;
1569			*tgt += iol->piol_usedsize;
1570		}
1571		kmem_free(iol, iol->piol_size);
1572	}
1573	list_destroy(iolhead);
1574
1575	return (error);
1576}
1577
1578int
1579pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1580{
1581	offset_t	off = uiop->uio_offset;
1582	char		*base;
1583	size_t		size;
1584	piol_t		*iol;
1585	int		error = errin;
1586
1587	while ((iol = list_head(iolhead)) != NULL) {
1588		list_remove(iolhead, iol);
1589		base = PIOL_DATABUF(iol);
1590		size = iol->piol_usedsize;
1591		if (off <= size && error == 0 && uiop->uio_resid > 0)
1592			error = uiomove(base + off, size - off,
1593			    UIO_READ, uiop);
1594		off = MAX(0, off - (offset_t)size);
1595		kmem_free(iol, iol->piol_size);
1596	}
1597	list_destroy(iolhead);
1598
1599	return (error);
1600}
1601
1602/*
1603 * Return an array of structures with memory map information.
1604 * We allocate here; the caller must deallocate.
1605 */
1606int
1607prgetmap(proc_t *p, int reserved, list_t *iolhead)
1608{
1609	struct as *as = p->p_as;
1610	prmap_t *mp;
1611	struct seg *seg;
1612	struct seg *brkseg, *stkseg;
1613	struct vnode *vp;
1614	struct vattr vattr;
1615	uint_t prot;
1616
1617	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1618
1619	/*
1620	 * Request an initial buffer size that doesn't waste memory
1621	 * if the address space has only a small number of segments.
1622	 */
1623	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1624
1625	if ((seg = AS_SEGFIRST(as)) == NULL)
1626		return (0);
1627
1628	brkseg = break_seg(p);
1629	stkseg = as_segat(as, prgetstackbase(p));
1630
1631	do {
1632		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1633		caddr_t saddr, naddr;
1634		void *tmp = NULL;
1635
1636		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1637			prot = pr_getprot(seg, reserved, &tmp,
1638			    &saddr, &naddr, eaddr);
1639			if (saddr == naddr)
1640				continue;
1641
1642			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1643
1644			mp->pr_vaddr = (uintptr_t)saddr;
1645			mp->pr_size = naddr - saddr;
1646			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1647			mp->pr_mflags = 0;
1648			if (prot & PROT_READ)
1649				mp->pr_mflags |= MA_READ;
1650			if (prot & PROT_WRITE)
1651				mp->pr_mflags |= MA_WRITE;
1652			if (prot & PROT_EXEC)
1653				mp->pr_mflags |= MA_EXEC;
1654			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1655				mp->pr_mflags |= MA_SHARED;
1656			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1657				mp->pr_mflags |= MA_NORESERVE;
1658			if (seg->s_ops == &segspt_shmops ||
1659			    (seg->s_ops == &segvn_ops &&
1660			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1661				mp->pr_mflags |= MA_ANON;
1662			if (seg == brkseg)
1663				mp->pr_mflags |= MA_BREAK;
1664			else if (seg == stkseg) {
1665				mp->pr_mflags |= MA_STACK;
1666				if (reserved) {
1667					size_t maxstack =
1668					    ((size_t)p->p_stk_ctl +
1669					    PAGEOFFSET) & PAGEMASK;
1670					mp->pr_vaddr =
1671					    (uintptr_t)prgetstackbase(p) +
1672					    p->p_stksize - maxstack;
1673					mp->pr_size = (uintptr_t)naddr -
1674					    mp->pr_vaddr;
1675				}
1676			}
1677			if (seg->s_ops == &segspt_shmops)
1678				mp->pr_mflags |= MA_ISM | MA_SHM;
1679			mp->pr_pagesize = PAGESIZE;
1680
1681			/*
1682			 * Manufacture a filename for the "object" directory.
1683			 */
1684			vattr.va_mask = AT_FSID|AT_NODEID;
1685			if (seg->s_ops == &segvn_ops &&
1686			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1687			    vp != NULL && vp->v_type == VREG &&
1688			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1689				if (vp == p->p_exec)
1690					(void) strcpy(mp->pr_mapname, "a.out");
1691				else
1692					pr_object_name(mp->pr_mapname,
1693					    vp, &vattr);
1694			}
1695
1696			/*
1697			 * Get the SysV shared memory id, if any.
1698			 */
1699			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1700			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1701			    SHMID_NONE) {
1702				if (mp->pr_shmid == SHMID_FREE)
1703					mp->pr_shmid = -1;
1704
1705				mp->pr_mflags |= MA_SHM;
1706			} else {
1707				mp->pr_shmid = -1;
1708			}
1709		}
1710		ASSERT(tmp == NULL);
1711	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1712
1713	return (0);
1714}
1715
1716#ifdef _SYSCALL32_IMPL
1717int
1718prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1719{
1720	struct as *as = p->p_as;
1721	prmap32_t *mp;
1722	struct seg *seg;
1723	struct seg *brkseg, *stkseg;
1724	struct vnode *vp;
1725	struct vattr vattr;
1726	uint_t prot;
1727
1728	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1729
1730	/*
1731	 * Request an initial buffer size that doesn't waste memory
1732	 * if the address space has only a small number of segments.
1733	 */
1734	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1735
1736	if ((seg = AS_SEGFIRST(as)) == NULL)
1737		return (0);
1738
1739	brkseg = break_seg(p);
1740	stkseg = as_segat(as, prgetstackbase(p));
1741
1742	do {
1743		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1744		caddr_t saddr, naddr;
1745		void *tmp = NULL;
1746
1747		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1748			prot = pr_getprot(seg, reserved, &tmp,
1749			    &saddr, &naddr, eaddr);
1750			if (saddr == naddr)
1751				continue;
1752
1753			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1754
1755			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1756			mp->pr_size = (size32_t)(naddr - saddr);
1757			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1758			mp->pr_mflags = 0;
1759			if (prot & PROT_READ)
1760				mp->pr_mflags |= MA_READ;
1761			if (prot & PROT_WRITE)
1762				mp->pr_mflags |= MA_WRITE;
1763			if (prot & PROT_EXEC)
1764				mp->pr_mflags |= MA_EXEC;
1765			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1766				mp->pr_mflags |= MA_SHARED;
1767			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1768				mp->pr_mflags |= MA_NORESERVE;
1769			if (seg->s_ops == &segspt_shmops ||
1770			    (seg->s_ops == &segvn_ops &&
1771			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1772				mp->pr_mflags |= MA_ANON;
1773			if (seg == brkseg)
1774				mp->pr_mflags |= MA_BREAK;
1775			else if (seg == stkseg) {
1776				mp->pr_mflags |= MA_STACK;
1777				if (reserved) {
1778					size_t maxstack =
1779					    ((size_t)p->p_stk_ctl +
1780					    PAGEOFFSET) & PAGEMASK;
1781					uintptr_t vaddr =
1782					    (uintptr_t)prgetstackbase(p) +
1783					    p->p_stksize - maxstack;
1784					mp->pr_vaddr = (caddr32_t)vaddr;
1785					mp->pr_size = (size32_t)
1786					    ((uintptr_t)naddr - vaddr);
1787				}
1788			}
1789			if (seg->s_ops == &segspt_shmops)
1790				mp->pr_mflags |= MA_ISM | MA_SHM;
1791			mp->pr_pagesize = PAGESIZE;
1792
1793			/*
1794			 * Manufacture a filename for the "object" directory.
1795			 */
1796			vattr.va_mask = AT_FSID|AT_NODEID;
1797			if (seg->s_ops == &segvn_ops &&
1798			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1799			    vp != NULL && vp->v_type == VREG &&
1800			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1801				if (vp == p->p_exec)
1802					(void) strcpy(mp->pr_mapname, "a.out");
1803				else
1804					pr_object_name(mp->pr_mapname,
1805					    vp, &vattr);
1806			}
1807
1808			/*
1809			 * Get the SysV shared memory id, if any.
1810			 */
1811			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1812			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1813			    SHMID_NONE) {
1814				if (mp->pr_shmid == SHMID_FREE)
1815					mp->pr_shmid = -1;
1816
1817				mp->pr_mflags |= MA_SHM;
1818			} else {
1819				mp->pr_shmid = -1;
1820			}
1821		}
1822		ASSERT(tmp == NULL);
1823	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1824
1825	return (0);
1826}
1827#endif	/* _SYSCALL32_IMPL */
1828
1829/*
1830 * Return the size of the /proc page data file.
1831 */
1832size_t
1833prpdsize(struct as *as)
1834{
1835	struct seg *seg;
1836	size_t size;
1837
1838	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1839
1840	if ((seg = AS_SEGFIRST(as)) == NULL)
1841		return (0);
1842
1843	size = sizeof (prpageheader_t);
1844	do {
1845		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1846		caddr_t saddr, naddr;
1847		void *tmp = NULL;
1848		size_t npage;
1849
1850		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1851			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1852			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1853				size += sizeof (prasmap_t) + round8(npage);
1854		}
1855		ASSERT(tmp == NULL);
1856	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1857
1858	return (size);
1859}
1860
1861#ifdef _SYSCALL32_IMPL
1862size_t
1863prpdsize32(struct as *as)
1864{
1865	struct seg *seg;
1866	size_t size;
1867
1868	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1869
1870	if ((seg = AS_SEGFIRST(as)) == NULL)
1871		return (0);
1872
1873	size = sizeof (prpageheader32_t);
1874	do {
1875		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1876		caddr_t saddr, naddr;
1877		void *tmp = NULL;
1878		size_t npage;
1879
1880		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1881			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1882			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1883				size += sizeof (prasmap32_t) + round8(npage);
1884		}
1885		ASSERT(tmp == NULL);
1886	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1887
1888	return (size);
1889}
1890#endif	/* _SYSCALL32_IMPL */
1891
1892/*
1893 * Read page data information.
1894 */
1895int
1896prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1897{
1898	struct as *as = p->p_as;
1899	caddr_t buf;
1900	size_t size;
1901	prpageheader_t *php;
1902	prasmap_t *pmp;
1903	struct seg *seg;
1904	int error;
1905
1906again:
1907	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1908
1909	if ((seg = AS_SEGFIRST(as)) == NULL) {
1910		AS_LOCK_EXIT(as, &as->a_lock);
1911		return (0);
1912	}
1913	size = prpdsize(as);
1914	if (uiop->uio_resid < size) {
1915		AS_LOCK_EXIT(as, &as->a_lock);
1916		return (E2BIG);
1917	}
1918
1919	buf = kmem_zalloc(size, KM_SLEEP);
1920	php = (prpageheader_t *)buf;
1921	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1922
1923	hrt2ts(gethrtime(), &php->pr_tstamp);
1924	php->pr_nmap = 0;
1925	php->pr_npage = 0;
1926	do {
1927		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1928		caddr_t saddr, naddr;
1929		void *tmp = NULL;
1930
1931		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1932			struct vnode *vp;
1933			struct vattr vattr;
1934			size_t len;
1935			size_t npage;
1936			uint_t prot;
1937			uintptr_t next;
1938
1939			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1940			if ((len = (size_t)(naddr - saddr)) == 0)
1941				continue;
1942			npage = len / PAGESIZE;
1943			next = (uintptr_t)(pmp + 1) + round8(npage);
1944			/*
1945			 * It's possible that the address space can change
1946			 * subtlely even though we're holding as->a_lock
1947			 * due to the nondeterminism of page_exists() in
1948			 * the presence of asychronously flushed pages or
1949			 * mapped files whose sizes are changing.
1950			 * page_exists() may be called indirectly from
1951			 * pr_getprot() by a SEGOP_INCORE() routine.
1952			 * If this happens we need to make sure we don't
1953			 * overrun the buffer whose size we computed based
1954			 * on the initial iteration through the segments.
1955			 * Once we've detected an overflow, we need to clean
1956			 * up the temporary memory allocated in pr_getprot()
1957			 * and retry. If there's a pending signal, we return
1958			 * EINTR so that this thread can be dislodged if
1959			 * a latent bug causes us to spin indefinitely.
1960			 */
1961			if (next > (uintptr_t)buf + size) {
1962				pr_getprot_done(&tmp);
1963				AS_LOCK_EXIT(as, &as->a_lock);
1964
1965				kmem_free(buf, size);
1966
1967				if (ISSIG(curthread, JUSTLOOKING))
1968					return (EINTR);
1969
1970				goto again;
1971			}
1972
1973			php->pr_nmap++;
1974			php->pr_npage += npage;
1975			pmp->pr_vaddr = (uintptr_t)saddr;
1976			pmp->pr_npage = npage;
1977			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1978			pmp->pr_mflags = 0;
1979			if (prot & PROT_READ)
1980				pmp->pr_mflags |= MA_READ;
1981			if (prot & PROT_WRITE)
1982				pmp->pr_mflags |= MA_WRITE;
1983			if (prot & PROT_EXEC)
1984				pmp->pr_mflags |= MA_EXEC;
1985			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1986				pmp->pr_mflags |= MA_SHARED;
1987			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1988				pmp->pr_mflags |= MA_NORESERVE;
1989			if (seg->s_ops == &segspt_shmops ||
1990			    (seg->s_ops == &segvn_ops &&
1991			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1992				pmp->pr_mflags |= MA_ANON;
1993			if (seg->s_ops == &segspt_shmops)
1994				pmp->pr_mflags |= MA_ISM | MA_SHM;
1995			pmp->pr_pagesize = PAGESIZE;
1996			/*
1997			 * Manufacture a filename for the "object" directory.
1998			 */
1999			vattr.va_mask = AT_FSID|AT_NODEID;
2000			if (seg->s_ops == &segvn_ops &&
2001			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2002			    vp != NULL && vp->v_type == VREG &&
2003			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2004				if (vp == p->p_exec)
2005					(void) strcpy(pmp->pr_mapname, "a.out");
2006				else
2007					pr_object_name(pmp->pr_mapname,
2008					    vp, &vattr);
2009			}
2010
2011			/*
2012			 * Get the SysV shared memory id, if any.
2013			 */
2014			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2015			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2016			    SHMID_NONE) {
2017				if (pmp->pr_shmid == SHMID_FREE)
2018					pmp->pr_shmid = -1;
2019
2020				pmp->pr_mflags |= MA_SHM;
2021			} else {
2022				pmp->pr_shmid = -1;
2023			}
2024
2025			hat_getstat(as, saddr, len, hatid,
2026			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2027			pmp = (prasmap_t *)next;
2028		}
2029		ASSERT(tmp == NULL);
2030	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2031
2032	AS_LOCK_EXIT(as, &as->a_lock);
2033
2034	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2035	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2036	kmem_free(buf, size);
2037
2038	return (error);
2039}
2040
2041#ifdef _SYSCALL32_IMPL
2042int
2043prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2044{
2045	struct as *as = p->p_as;
2046	caddr_t buf;
2047	size_t size;
2048	prpageheader32_t *php;
2049	prasmap32_t *pmp;
2050	struct seg *seg;
2051	int error;
2052
2053again:
2054	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2055
2056	if ((seg = AS_SEGFIRST(as)) == NULL) {
2057		AS_LOCK_EXIT(as, &as->a_lock);
2058		return (0);
2059	}
2060	size = prpdsize32(as);
2061	if (uiop->uio_resid < size) {
2062		AS_LOCK_EXIT(as, &as->a_lock);
2063		return (E2BIG);
2064	}
2065
2066	buf = kmem_zalloc(size, KM_SLEEP);
2067	php = (prpageheader32_t *)buf;
2068	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2069
2070	hrt2ts32(gethrtime(), &php->pr_tstamp);
2071	php->pr_nmap = 0;
2072	php->pr_npage = 0;
2073	do {
2074		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2075		caddr_t saddr, naddr;
2076		void *tmp = NULL;
2077
2078		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2079			struct vnode *vp;
2080			struct vattr vattr;
2081			size_t len;
2082			size_t npage;
2083			uint_t prot;
2084			uintptr_t next;
2085
2086			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2087			if ((len = (size_t)(naddr - saddr)) == 0)
2088				continue;
2089			npage = len / PAGESIZE;
2090			next = (uintptr_t)(pmp + 1) + round8(npage);
2091			/*
2092			 * It's possible that the address space can change
2093			 * subtlely even though we're holding as->a_lock
2094			 * due to the nondeterminism of page_exists() in
2095			 * the presence of asychronously flushed pages or
2096			 * mapped files whose sizes are changing.
2097			 * page_exists() may be called indirectly from
2098			 * pr_getprot() by a SEGOP_INCORE() routine.
2099			 * If this happens we need to make sure we don't
2100			 * overrun the buffer whose size we computed based
2101			 * on the initial iteration through the segments.
2102			 * Once we've detected an overflow, we need to clean
2103			 * up the temporary memory allocated in pr_getprot()
2104			 * and retry. If there's a pending signal, we return
2105			 * EINTR so that this thread can be dislodged if
2106			 * a latent bug causes us to spin indefinitely.
2107			 */
2108			if (next > (uintptr_t)buf + size) {
2109				pr_getprot_done(&tmp);
2110				AS_LOCK_EXIT(as, &as->a_lock);
2111
2112				kmem_free(buf, size);
2113
2114				if (ISSIG(curthread, JUSTLOOKING))
2115					return (EINTR);
2116
2117				goto again;
2118			}
2119
2120			php->pr_nmap++;
2121			php->pr_npage += npage;
2122			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2123			pmp->pr_npage = (size32_t)npage;
2124			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2125			pmp->pr_mflags = 0;
2126			if (prot & PROT_READ)
2127				pmp->pr_mflags |= MA_READ;
2128			if (prot & PROT_WRITE)
2129				pmp->pr_mflags |= MA_WRITE;
2130			if (prot & PROT_EXEC)
2131				pmp->pr_mflags |= MA_EXEC;
2132			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2133				pmp->pr_mflags |= MA_SHARED;
2134			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2135				pmp->pr_mflags |= MA_NORESERVE;
2136			if (seg->s_ops == &segspt_shmops ||
2137			    (seg->s_ops == &segvn_ops &&
2138			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2139				pmp->pr_mflags |= MA_ANON;
2140			if (seg->s_ops == &segspt_shmops)
2141				pmp->pr_mflags |= MA_ISM | MA_SHM;
2142			pmp->pr_pagesize = PAGESIZE;
2143			/*
2144			 * Manufacture a filename for the "object" directory.
2145			 */
2146			vattr.va_mask = AT_FSID|AT_NODEID;
2147			if (seg->s_ops == &segvn_ops &&
2148			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2149			    vp != NULL && vp->v_type == VREG &&
2150			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2151				if (vp == p->p_exec)
2152					(void) strcpy(pmp->pr_mapname, "a.out");
2153				else
2154					pr_object_name(pmp->pr_mapname,
2155					    vp, &vattr);
2156			}
2157
2158			/*
2159			 * Get the SysV shared memory id, if any.
2160			 */
2161			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2162			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2163			    SHMID_NONE) {
2164				if (pmp->pr_shmid == SHMID_FREE)
2165					pmp->pr_shmid = -1;
2166
2167				pmp->pr_mflags |= MA_SHM;
2168			} else {
2169				pmp->pr_shmid = -1;
2170			}
2171
2172			hat_getstat(as, saddr, len, hatid,
2173			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2174			pmp = (prasmap32_t *)next;
2175		}
2176		ASSERT(tmp == NULL);
2177	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2178
2179	AS_LOCK_EXIT(as, &as->a_lock);
2180
2181	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2182	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2183	kmem_free(buf, size);
2184
2185	return (error);
2186}
2187#endif	/* _SYSCALL32_IMPL */
2188
2189ushort_t
2190prgetpctcpu(uint64_t pct)
2191{
2192	/*
2193	 * The value returned will be relevant in the zone of the examiner,
2194	 * which may not be the same as the zone which performed the procfs
2195	 * mount.
2196	 */
2197	int nonline = zone_ncpus_online_get(curproc->p_zone);
2198
2199	/*
2200	 * Prorate over online cpus so we don't exceed 100%
2201	 */
2202	if (nonline > 1)
2203		pct /= nonline;
2204	pct >>= 16;		/* convert to 16-bit scaled integer */
2205	if (pct > 0x8000)	/* might happen, due to rounding */
2206		pct = 0x8000;
2207	return ((ushort_t)pct);
2208}
2209
2210/*
2211 * Return information used by ps(1).
2212 */
2213void
2214prgetpsinfo(proc_t *p, psinfo_t *psp)
2215{
2216	kthread_t *t;
2217	struct cred *cred;
2218	hrtime_t hrutime, hrstime;
2219
2220	ASSERT(MUTEX_HELD(&p->p_lock));
2221
2222	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2223		bzero(psp, sizeof (*psp));
2224	else {
2225		thread_unlock(t);
2226		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2227	}
2228
2229	/*
2230	 * only export SSYS and SMSACCT; everything else is off-limits to
2231	 * userland apps.
2232	 */
2233	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2234	psp->pr_nlwp = p->p_lwpcnt;
2235	psp->pr_nzomb = p->p_zombcnt;
2236	mutex_enter(&p->p_crlock);
2237	cred = p->p_cred;
2238	psp->pr_uid = crgetruid(cred);
2239	psp->pr_euid = crgetuid(cred);
2240	psp->pr_gid = crgetrgid(cred);
2241	psp->pr_egid = crgetgid(cred);
2242	mutex_exit(&p->p_crlock);
2243	psp->pr_pid = p->p_pid;
2244	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2245	    (p->p_flag & SZONETOP)) {
2246		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2247		/*
2248		 * Inside local zones, fake zsched's pid as parent pids for
2249		 * processes which reference processes outside of the zone.
2250		 */
2251		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2252	} else {
2253		psp->pr_ppid = p->p_ppid;
2254	}
2255	psp->pr_pgid = p->p_pgrp;
2256	psp->pr_sid = p->p_sessp->s_sid;
2257	psp->pr_taskid = p->p_task->tk_tkid;
2258	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2259	psp->pr_poolid = p->p_pool->pool_id;
2260	psp->pr_zoneid = p->p_zone->zone_id;
2261	if ((psp->pr_contract = PRCTID(p)) == 0)
2262		psp->pr_contract = -1;
2263	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2264	switch (p->p_model) {
2265	case DATAMODEL_ILP32:
2266		psp->pr_dmodel = PR_MODEL_ILP32;
2267		break;
2268	case DATAMODEL_LP64:
2269		psp->pr_dmodel = PR_MODEL_LP64;
2270		break;
2271	}
2272	hrutime = mstate_aggr_state(p, LMS_USER);
2273	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2274	hrt2ts((hrutime + hrstime), &psp->pr_time);
2275	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2276
2277	if (t == NULL) {
2278		int wcode = p->p_wcode;		/* must be atomic read */
2279
2280		if (wcode)
2281			psp->pr_wstat = wstat(wcode, p->p_wdata);
2282		psp->pr_ttydev = PRNODEV;
2283		psp->pr_lwp.pr_state = SZOMB;
2284		psp->pr_lwp.pr_sname = 'Z';
2285		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2286		psp->pr_lwp.pr_bindpset = PS_NONE;
2287	} else {
2288		user_t *up = PTOU(p);
2289		struct as *as;
2290		dev_t d;
2291		extern dev_t rwsconsdev, rconsdev, uconsdev;
2292
2293		d = cttydev(p);
2294		/*
2295		 * If the controlling terminal is the real
2296		 * or workstation console device, map to what the
2297		 * user thinks is the console device. Handle case when
2298		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2299		 */
2300		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2301			d = uconsdev;
2302		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2303		psp->pr_start = up->u_start;
2304		bcopy(up->u_comm, psp->pr_fname,
2305		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2306		bcopy(up->u_psargs, psp->pr_psargs,
2307		    MIN(PRARGSZ-1, PSARGSZ));
2308		psp->pr_argc = up->u_argc;
2309		psp->pr_argv = up->u_argv;
2310		psp->pr_envp = up->u_envp;
2311
2312		/* get the chosen lwp's lwpsinfo */
2313		prgetlwpsinfo(t, &psp->pr_lwp);
2314
2315		/* compute %cpu for the process */
2316		if (p->p_lwpcnt == 1)
2317			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2318		else {
2319			uint64_t pct = 0;
2320			hrtime_t cur_time = gethrtime_unscaled();
2321
2322			t = p->p_tlist;
2323			do {
2324				pct += cpu_update_pct(t, cur_time);
2325			} while ((t = t->t_forw) != p->p_tlist);
2326
2327			psp->pr_pctcpu = prgetpctcpu(pct);
2328		}
2329		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2330			psp->pr_size = 0;
2331			psp->pr_rssize = 0;
2332		} else {
2333			mutex_exit(&p->p_lock);
2334			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2335			psp->pr_size = btopr(as->a_resvsize) *
2336			    (PAGESIZE / 1024);
2337			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2338			psp->pr_pctmem = rm_pctmemory(as);
2339			AS_LOCK_EXIT(as, &as->a_lock);
2340			mutex_enter(&p->p_lock);
2341		}
2342	}
2343}
2344
2345#ifdef _SYSCALL32_IMPL
2346void
2347prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2348{
2349	kthread_t *t;
2350	struct cred *cred;
2351	hrtime_t hrutime, hrstime;
2352
2353	ASSERT(MUTEX_HELD(&p->p_lock));
2354
2355	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2356		bzero(psp, sizeof (*psp));
2357	else {
2358		thread_unlock(t);
2359		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2360	}
2361
2362	/*
2363	 * only export SSYS and SMSACCT; everything else is off-limits to
2364	 * userland apps.
2365	 */
2366	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2367	psp->pr_nlwp = p->p_lwpcnt;
2368	psp->pr_nzomb = p->p_zombcnt;
2369	mutex_enter(&p->p_crlock);
2370	cred = p->p_cred;
2371	psp->pr_uid = crgetruid(cred);
2372	psp->pr_euid = crgetuid(cred);
2373	psp->pr_gid = crgetrgid(cred);
2374	psp->pr_egid = crgetgid(cred);
2375	mutex_exit(&p->p_crlock);
2376	psp->pr_pid = p->p_pid;
2377	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2378	    (p->p_flag & SZONETOP)) {
2379		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2380		/*
2381		 * Inside local zones, fake zsched's pid as parent pids for
2382		 * processes which reference processes outside of the zone.
2383		 */
2384		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2385	} else {
2386		psp->pr_ppid = p->p_ppid;
2387	}
2388	psp->pr_pgid = p->p_pgrp;
2389	psp->pr_sid = p->p_sessp->s_sid;
2390	psp->pr_taskid = p->p_task->tk_tkid;
2391	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2392	psp->pr_poolid = p->p_pool->pool_id;
2393	psp->pr_zoneid = p->p_zone->zone_id;
2394	if ((psp->pr_contract = PRCTID(p)) == 0)
2395		psp->pr_contract = -1;
2396	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2397	switch (p->p_model) {
2398	case DATAMODEL_ILP32:
2399		psp->pr_dmodel = PR_MODEL_ILP32;
2400		break;
2401	case DATAMODEL_LP64:
2402		psp->pr_dmodel = PR_MODEL_LP64;
2403		break;
2404	}
2405	hrutime = mstate_aggr_state(p, LMS_USER);
2406	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2407	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2408	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2409
2410	if (t == NULL) {
2411		extern int wstat(int, int);	/* needs a header file */
2412		int wcode = p->p_wcode;		/* must be atomic read */
2413
2414		if (wcode)
2415			psp->pr_wstat = wstat(wcode, p->p_wdata);
2416		psp->pr_ttydev = PRNODEV32;
2417		psp->pr_lwp.pr_state = SZOMB;
2418		psp->pr_lwp.pr_sname = 'Z';
2419	} else {
2420		user_t *up = PTOU(p);
2421		struct as *as;
2422		dev_t d;
2423		extern dev_t rwsconsdev, rconsdev, uconsdev;
2424
2425		d = cttydev(p);
2426		/*
2427		 * If the controlling terminal is the real
2428		 * or workstation console device, map to what the
2429		 * user thinks is the console device. Handle case when
2430		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2431		 */
2432		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2433			d = uconsdev;
2434		(void) cmpldev(&psp->pr_ttydev, d);
2435		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2436		bcopy(up->u_comm, psp->pr_fname,
2437		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2438		bcopy(up->u_psargs, psp->pr_psargs,
2439		    MIN(PRARGSZ-1, PSARGSZ));
2440		psp->pr_argc = up->u_argc;
2441		psp->pr_argv = (caddr32_t)up->u_argv;
2442		psp->pr_envp = (caddr32_t)up->u_envp;
2443
2444		/* get the chosen lwp's lwpsinfo */
2445		prgetlwpsinfo32(t, &psp->pr_lwp);
2446
2447		/* compute %cpu for the process */
2448		if (p->p_lwpcnt == 1)
2449			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2450		else {
2451			uint64_t pct = 0;
2452			hrtime_t cur_time;
2453
2454			t = p->p_tlist;
2455			cur_time = gethrtime_unscaled();
2456			do {
2457				pct += cpu_update_pct(t, cur_time);
2458			} while ((t = t->t_forw) != p->p_tlist);
2459
2460			psp->pr_pctcpu = prgetpctcpu(pct);
2461		}
2462		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2463			psp->pr_size = 0;
2464			psp->pr_rssize = 0;
2465		} else {
2466			mutex_exit(&p->p_lock);
2467			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2468			psp->pr_size = (size32_t)
2469			    (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2470			psp->pr_rssize = (size32_t)
2471			    (rm_asrss(as) * (PAGESIZE / 1024));
2472			psp->pr_pctmem = rm_pctmemory(as);
2473			AS_LOCK_EXIT(as, &as->a_lock);
2474			mutex_enter(&p->p_lock);
2475		}
2476	}
2477
2478	/*
2479	 * If we are looking at an LP64 process, zero out
2480	 * the fields that cannot be represented in ILP32.
2481	 */
2482	if (p->p_model != DATAMODEL_ILP32) {
2483		psp->pr_size = 0;
2484		psp->pr_rssize = 0;
2485		psp->pr_argv = 0;
2486		psp->pr_envp = 0;
2487	}
2488}
2489#endif	/* _SYSCALL32_IMPL */
2490
2491void
2492prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2493{
2494	klwp_t *lwp = ttolwp(t);
2495	sobj_ops_t *sobj;
2496	char c, state;
2497	uint64_t pct;
2498	int retval, niceval;
2499	hrtime_t hrutime, hrstime;
2500
2501	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2502
2503	bzero(psp, sizeof (*psp));
2504
2505	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2506	psp->pr_lwpid = t->t_tid;
2507	psp->pr_addr = (uintptr_t)t;
2508	psp->pr_wchan = (uintptr_t)t->t_wchan;
2509
2510	/* map the thread state enum into a process state enum */
2511	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2512	switch (state) {
2513	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2514	case TS_RUN:		state = SRUN;		c = 'R';	break;
2515	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2516	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2517	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2518	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
2519	default:		state = 0;		c = '?';	break;
2520	}
2521	psp->pr_state = state;
2522	psp->pr_sname = c;
2523	if ((sobj = t->t_sobj_ops) != NULL)
2524		psp->pr_stype = SOBJ_TYPE(sobj);
2525	retval = CL_DONICE(t, NULL, 0, &niceval);
2526	if (retval == 0) {
2527		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2528		psp->pr_nice = niceval + NZERO;
2529	}
2530	psp->pr_syscall = t->t_sysnum;
2531	psp->pr_pri = t->t_pri;
2532	psp->pr_start.tv_sec = t->t_start;
2533	psp->pr_start.tv_nsec = 0L;
2534	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2535	scalehrtime(&hrutime);
2536	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2537	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2538	scalehrtime(&hrstime);
2539	hrt2ts(hrutime + hrstime, &psp->pr_time);
2540	/* compute %cpu for the lwp */
2541	pct = cpu_update_pct(t, gethrtime_unscaled());
2542	psp->pr_pctcpu = prgetpctcpu(pct);
2543	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2544	if (psp->pr_cpu > 99)
2545		psp->pr_cpu = 99;
2546
2547	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2548	    sizeof (psp->pr_clname) - 1);
2549	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2550	psp->pr_onpro = t->t_cpu->cpu_id;
2551	psp->pr_bindpro = t->t_bind_cpu;
2552	psp->pr_bindpset = t->t_bind_pset;
2553	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2554}
2555
2556#ifdef _SYSCALL32_IMPL
2557void
2558prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2559{
2560	proc_t *p = ttoproc(t);
2561	klwp_t *lwp = ttolwp(t);
2562	sobj_ops_t *sobj;
2563	char c, state;
2564	uint64_t pct;
2565	int retval, niceval;
2566	hrtime_t hrutime, hrstime;
2567
2568	ASSERT(MUTEX_HELD(&p->p_lock));
2569
2570	bzero(psp, sizeof (*psp));
2571
2572	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2573	psp->pr_lwpid = t->t_tid;
2574	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2575	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
2576
2577	/* map the thread state enum into a process state enum */
2578	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2579	switch (state) {
2580	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2581	case TS_RUN:		state = SRUN;		c = 'R';	break;
2582	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2583	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2584	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2585	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
2586	default:		state = 0;		c = '?';	break;
2587	}
2588	psp->pr_state = state;
2589	psp->pr_sname = c;
2590	if ((sobj = t->t_sobj_ops) != NULL)
2591		psp->pr_stype = SOBJ_TYPE(sobj);
2592	retval = CL_DONICE(t, NULL, 0, &niceval);
2593	if (retval == 0) {
2594		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2595		psp->pr_nice = niceval + NZERO;
2596	} else {
2597		psp->pr_oldpri = 0;
2598		psp->pr_nice = 0;
2599	}
2600	psp->pr_syscall = t->t_sysnum;
2601	psp->pr_pri = t->t_pri;
2602	psp->pr_start.tv_sec = (time32_t)t->t_start;
2603	psp->pr_start.tv_nsec = 0L;
2604	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2605	scalehrtime(&hrutime);
2606	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2607	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2608	scalehrtime(&hrstime);
2609	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2610	/* compute %cpu for the lwp */
2611	pct = cpu_update_pct(t, gethrtime_unscaled());
2612	psp->pr_pctcpu = prgetpctcpu(pct);
2613	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2614	if (psp->pr_cpu > 99)
2615		psp->pr_cpu = 99;
2616
2617	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2618	    sizeof (psp->pr_clname) - 1);
2619	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2620	psp->pr_onpro = t->t_cpu->cpu_id;
2621	psp->pr_bindpro = t->t_bind_cpu;
2622	psp->pr_bindpset = t->t_bind_pset;
2623	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2624}
2625#endif	/* _SYSCALL32_IMPL */
2626
2627/*
2628 * This used to get called when microstate accounting was disabled but
2629 * microstate information was requested.  Since Microstate accounting is on
2630 * regardless of the proc flags, this simply makes it appear to procfs that
2631 * microstate accounting is on.  This is relatively meaningless since you
2632 * can't turn it off, but this is here for the sake of appearances.
2633 */
2634
2635/*ARGSUSED*/
2636void
2637estimate_msacct(kthread_t *t, hrtime_t curtime)
2638{
2639	proc_t *p;
2640
2641	if (t == NULL)
2642		return;
2643
2644	p = ttoproc(t);
2645	ASSERT(MUTEX_HELD(&p->p_lock));
2646
2647	/*
2648	 * A system process (p0) could be referenced if the thread is
2649	 * in the process of exiting.  Don't turn on microstate accounting
2650	 * in that case.
2651	 */
2652	if (p->p_flag & SSYS)
2653		return;
2654
2655	/*
2656	 * Loop through all the LWPs (kernel threads) in the process.
2657	 */
2658	t = p->p_tlist;
2659	do {
2660		t->t_proc_flag |= TP_MSACCT;
2661	} while ((t = t->t_forw) != p->p_tlist);
2662
2663	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
2664}
2665
2666/*
2667 * It's not really possible to disable microstate accounting anymore.
2668 * However, this routine simply turns off the ms accounting flags in a process
2669 * This way procfs can still pretend to turn microstate accounting on and
2670 * off for a process, but it actually doesn't do anything.  This is
2671 * a neutered form of preemptive idiot-proofing.
2672 */
2673void
2674disable_msacct(proc_t *p)
2675{
2676	kthread_t *t;
2677
2678	ASSERT(MUTEX_HELD(&p->p_lock));
2679
2680	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
2681	/*
2682	 * Loop through all the LWPs (kernel threads) in the process.
2683	 */
2684	if ((t = p->p_tlist) != NULL) {
2685		do {
2686			/* clear per-thread flag */
2687			t->t_proc_flag &= ~TP_MSACCT;
2688		} while ((t = t->t_forw) != p->p_tlist);
2689	}
2690}
2691
2692/*
2693 * Return resource usage information.
2694 */
2695void
2696prgetusage(kthread_t *t, prhusage_t *pup)
2697{
2698	klwp_t *lwp = ttolwp(t);
2699	hrtime_t *mstimep;
2700	struct mstate *ms = &lwp->lwp_mstate;
2701	int state;
2702	int i;
2703	hrtime_t curtime;
2704	hrtime_t waitrq;
2705	hrtime_t tmp1;
2706
2707	curtime = gethrtime_unscaled();
2708
2709	pup->pr_lwpid	= t->t_tid;
2710	pup->pr_count	= 1;
2711	pup->pr_create	= ms->ms_start;
2712	pup->pr_term    = ms->ms_term;
2713	scalehrtime(&pup->pr_create);
2714	scalehrtime(&pup->pr_term);
2715	if (ms->ms_term == 0) {
2716		pup->pr_rtime = curtime - ms->ms_start;
2717		scalehrtime(&pup->pr_rtime);
2718	} else {
2719		pup->pr_rtime = ms->ms_term - ms->ms_start;
2720		scalehrtime(&pup->pr_rtime);
2721	}
2722
2723
2724	pup->pr_utime    = ms->ms_acct[LMS_USER];
2725	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
2726	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
2727	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
2728	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
2729	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
2730	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
2731	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
2732	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
2733	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2734
2735	prscaleusage(pup);
2736
2737	/*
2738	 * Adjust for time waiting in the dispatcher queue.
2739	 */
2740	waitrq = t->t_waitrq;	/* hopefully atomic */
2741	if (waitrq != 0) {
2742		tmp1 = curtime - waitrq;
2743		scalehrtime(&tmp1);
2744		pup->pr_wtime += tmp1;
2745		curtime = waitrq;
2746	}
2747
2748	/*
2749	 * Adjust for time spent in current microstate.
2750	 */
2751	if (ms->ms_state_start > curtime) {
2752		curtime = gethrtime_unscaled();
2753	}
2754
2755	i = 0;
2756	do {
2757		switch (state = t->t_mstate) {
2758		case LMS_SLEEP:
2759			/*
2760			 * Update the timer for the current sleep state.
2761			 */
2762			switch (state = ms->ms_prev) {
2763			case LMS_TFAULT:
2764			case LMS_DFAULT:
2765			case LMS_KFAULT:
2766			case LMS_USER_LOCK:
2767				break;
2768			default:
2769				state = LMS_SLEEP;
2770				break;
2771			}
2772			break;
2773		case LMS_TFAULT:
2774		case LMS_DFAULT:
2775		case LMS_KFAULT:
2776		case LMS_USER_LOCK:
2777			state = LMS_SYSTEM;
2778			break;
2779		}
2780		switch (state) {
2781		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2782		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2783		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2784		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2785		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2786		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2787		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2788		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2789		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2790		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2791		default:		panic("prgetusage: unknown microstate");
2792		}
2793		tmp1 = curtime - ms->ms_state_start;
2794		if (tmp1 < 0) {
2795			curtime = gethrtime_unscaled();
2796			i++;
2797			continue;
2798		}
2799		scalehrtime(&tmp1);
2800	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2801
2802	*mstimep += tmp1;
2803
2804	/* update pup timestamp */
2805	pup->pr_tstamp = curtime;
2806	scalehrtime(&pup->pr_tstamp);
2807
2808	/*
2809	 * Resource usage counters.
2810	 */
2811	pup->pr_minf  = lwp->lwp_ru.minflt;
2812	pup->pr_majf  = lwp->lwp_ru.majflt;
2813	pup->pr_nswap = lwp->lwp_ru.nswap;
2814	pup->pr_inblk = lwp->lwp_ru.inblock;
2815	pup->pr_oublk = lwp->lwp_ru.oublock;
2816	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
2817	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
2818	pup->pr_sigs  = lwp->lwp_ru.nsignals;
2819	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
2820	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
2821	pup->pr_sysc  = lwp->lwp_ru.sysc;
2822	pup->pr_ioch  = lwp->lwp_ru.ioch;
2823}
2824
2825/*
2826 * Convert ms_acct stats from unscaled high-res time to nanoseconds
2827 */
2828void
2829prscaleusage(prhusage_t *usg)
2830{
2831	scalehrtime(&usg->pr_utime);
2832	scalehrtime(&usg->pr_stime);
2833	scalehrtime(&usg->pr_ttime);
2834	scalehrtime(&usg->pr_tftime);
2835	scalehrtime(&usg->pr_dftime);
2836	scalehrtime(&usg->pr_kftime);
2837	scalehrtime(&usg->pr_ltime);
2838	scalehrtime(&usg->pr_slptime);
2839	scalehrtime(&usg->pr_wtime);
2840	scalehrtime(&usg->pr_stoptime);
2841}
2842
2843
2844/*
2845 * Sum resource usage information.
2846 */
2847void
2848praddusage(kthread_t *t, prhusage_t *pup)
2849{
2850	klwp_t *lwp = ttolwp(t);
2851	hrtime_t *mstimep;
2852	struct mstate *ms = &lwp->lwp_mstate;
2853	int state;
2854	int i;
2855	hrtime_t curtime;
2856	hrtime_t waitrq;
2857	hrtime_t tmp;
2858	prhusage_t conv;
2859
2860	curtime = gethrtime_unscaled();
2861
2862	if (ms->ms_term == 0) {
2863		tmp = curtime - ms->ms_start;
2864		scalehrtime(&tmp);
2865		pup->pr_rtime += tmp;
2866	} else {
2867		tmp = ms->ms_term - ms->ms_start;
2868		scalehrtime(&tmp);
2869		pup->pr_rtime += tmp;
2870	}
2871
2872	conv.pr_utime = ms->ms_acct[LMS_USER];
2873	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
2874	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
2875	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
2876	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
2877	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
2878	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2879	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
2880	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2881	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
2882
2883	prscaleusage(&conv);
2884
2885	pup->pr_utime	+= conv.pr_utime;
2886	pup->pr_stime	+= conv.pr_stime;
2887	pup->pr_ttime	+= conv.pr_ttime;
2888	pup->pr_tftime	+= conv.pr_tftime;
2889	pup->pr_dftime	+= conv.pr_dftime;
2890	pup->pr_kftime	+= conv.pr_kftime;
2891	pup->pr_ltime	+= conv.pr_ltime;
2892	pup->pr_slptime	+= conv.pr_slptime;
2893	pup->pr_wtime	+= conv.pr_wtime;
2894	pup->pr_stoptime += conv.pr_stoptime;
2895
2896	/*
2897	 * Adjust for time waiting in the dispatcher queue.
2898	 */
2899	waitrq = t->t_waitrq;	/* hopefully atomic */
2900	if (waitrq != 0) {
2901		tmp = curtime - waitrq;
2902		scalehrtime(&tmp);
2903		pup->pr_wtime += tmp;
2904		curtime = waitrq;
2905	}
2906
2907	/*
2908	 * Adjust for time spent in current microstate.
2909	 */
2910	if (ms->ms_state_start > curtime) {
2911		curtime = gethrtime_unscaled();
2912	}
2913
2914	i = 0;
2915	do {
2916		switch (state = t->t_mstate) {
2917		case LMS_SLEEP:
2918			/*
2919			 * Update the timer for the current sleep state.
2920			 */
2921			switch (state = ms->ms_prev) {
2922			case LMS_TFAULT:
2923			case LMS_DFAULT:
2924			case LMS_KFAULT:
2925			case LMS_USER_LOCK:
2926				break;
2927			default:
2928				state = LMS_SLEEP;
2929				break;
2930			}
2931			break;
2932		case LMS_TFAULT:
2933		case LMS_DFAULT:
2934		case LMS_KFAULT:
2935		case LMS_USER_LOCK:
2936			state = LMS_SYSTEM;
2937			break;
2938		}
2939		switch (state) {
2940		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2941		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2942		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2943		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2944		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2945		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2946		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2947		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2948		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2949		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2950		default:		panic("praddusage: unknown microstate");
2951		}
2952		tmp = curtime - ms->ms_state_start;
2953		if (tmp < 0) {
2954			curtime = gethrtime_unscaled();
2955			i++;
2956			continue;
2957		}
2958		scalehrtime(&tmp);
2959	} while (tmp < 0 && i < MAX_ITERS_SPIN);
2960
2961	*mstimep += tmp;
2962
2963	/* update pup timestamp */
2964	pup->pr_tstamp = curtime;
2965	scalehrtime(&pup->pr_tstamp);
2966
2967	/*
2968	 * Resource usage counters.
2969	 */
2970	pup->pr_minf  += lwp->lwp_ru.minflt;
2971	pup->pr_majf  += lwp->lwp_ru.majflt;
2972	pup->pr_nswap += lwp->lwp_ru.nswap;
2973	pup->pr_inblk += lwp->lwp_ru.inblock;
2974	pup->pr_oublk += lwp->lwp_ru.oublock;
2975	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
2976	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
2977	pup->pr_sigs  += lwp->lwp_ru.nsignals;
2978	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
2979	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
2980	pup->pr_sysc  += lwp->lwp_ru.sysc;
2981	pup->pr_ioch  += lwp->lwp_ru.ioch;
2982}
2983
2984/*
2985 * Convert a prhusage_t to a prusage_t.
2986 * This means convert each hrtime_t to a timestruc_t
2987 * and copy the count fields uint64_t => ulong_t.
2988 */
2989void
2990prcvtusage(prhusage_t *pup, prusage_t *upup)
2991{
2992	uint64_t *ullp;
2993	ulong_t *ulp;
2994	int i;
2995
2996	upup->pr_lwpid = pup->pr_lwpid;
2997	upup->pr_count = pup->pr_count;
2998
2999	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
3000	hrt2ts(pup->pr_create,	&upup->pr_create);
3001	hrt2ts(pup->pr_term,	&upup->pr_term);
3002	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
3003	hrt2ts(pup->pr_utime,	&upup->pr_utime);
3004	hrt2ts(pup->pr_stime,	&upup->pr_stime);
3005	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
3006	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
3007	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
3008	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
3009	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
3010	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
3011	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
3012	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3013	bzero(upup->filltime, sizeof (upup->filltime));
3014
3015	ullp = &pup->pr_minf;
3016	ulp = &upup->pr_minf;
3017	for (i = 0; i < 22; i++)
3018		*ulp++ = (ulong_t)*ullp++;
3019}
3020
3021#ifdef _SYSCALL32_IMPL
3022void
3023prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3024{
3025	uint64_t *ullp;
3026	uint32_t *ulp;
3027	int i;
3028
3029	upup->pr_lwpid = pup->pr_lwpid;
3030	upup->pr_count = pup->pr_count;
3031
3032	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3033	hrt2ts32(pup->pr_create,	&upup->pr_create);
3034	hrt2ts32(pup->pr_term,		&upup->pr_term);
3035	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3036	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3037	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3038	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3039	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3040	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3041	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3042	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3043	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3044	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3045	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3046	bzero(upup->filltime, sizeof (upup->filltime));
3047
3048	ullp = &pup->pr_minf;
3049	ulp = &upup->pr_minf;
3050	for (i = 0; i < 22; i++)
3051		*ulp++ = (uint32_t)*ullp++;
3052}
3053#endif	/* _SYSCALL32_IMPL */
3054
3055/*
3056 * Determine whether a set is empty.
3057 */
3058int
3059setisempty(uint32_t *sp, uint_t n)
3060{
3061	while (n--)
3062		if (*sp++)
3063			return (0);
3064	return (1);
3065}
3066
3067/*
3068 * Utility routine for establishing a watched area in the process.
3069 * Keep the list of watched areas sorted by virtual address.
3070 */
3071int
3072set_watched_area(proc_t *p, struct watched_area *pwa)
3073{
3074	caddr_t vaddr = pwa->wa_vaddr;
3075	caddr_t eaddr = pwa->wa_eaddr;
3076	ulong_t flags = pwa->wa_flags;
3077	struct watched_area *target;
3078	avl_index_t where;
3079	int error = 0;
3080
3081	/* we must not be holding p->p_lock, but the process must be locked */
3082	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3083	ASSERT(p->p_proc_flag & P_PR_LOCK);
3084
3085	/*
3086	 * If this is our first watchpoint, enable watchpoints for the process.
3087	 */
3088	if (!pr_watch_active(p)) {
3089		kthread_t *t;
3090
3091		mutex_enter(&p->p_lock);
3092		if ((t = p->p_tlist) != NULL) {
3093			do {
3094				watch_enable(t);
3095			} while ((t = t->t_forw) != p->p_tlist);
3096		}
3097		mutex_exit(&p->p_lock);
3098	}
3099
3100	target = pr_find_watched_area(p, pwa, &where);
3101	if (target != NULL) {
3102		/*
3103		 * We discovered an existing, overlapping watched area.
3104		 * Allow it only if it is an exact match.
3105		 */
3106		if (target->wa_vaddr != vaddr ||
3107		    target->wa_eaddr != eaddr)
3108			error = EINVAL;
3109		else if (target->wa_flags != flags) {
3110			error = set_watched_page(p, vaddr, eaddr,
3111			    flags, target->wa_flags);
3112			target->wa_flags = flags;
3113		}
3114		kmem_free(pwa, sizeof (struct watched_area));
3115	} else {
3116		avl_insert(&p->p_warea, pwa, where);
3117		error = set_watched_page(p, vaddr, eaddr, flags, 0);
3118	}
3119
3120	return (error);
3121}
3122
3123/*
3124 * Utility routine for clearing a watched area in the process.
3125 * Must be an exact match of the virtual address.
3126 * size and flags don't matter.
3127 */
3128int
3129clear_watched_area(proc_t *p, struct watched_area *pwa)
3130{
3131	struct watched_area *found;
3132
3133	/* we must not be holding p->p_lock, but the process must be locked */
3134	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3135	ASSERT(p->p_proc_flag & P_PR_LOCK);
3136
3137
3138	if (!pr_watch_active(p)) {
3139		kmem_free(pwa, sizeof (struct watched_area));
3140		return (0);
3141	}
3142
3143	/*
3144	 * Look for a matching address in the watched areas.  If a match is
3145	 * found, clear the old watched area and adjust the watched page(s).  It
3146	 * is not an error if there is no match.
3147	 */
3148	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3149	    found->wa_vaddr == pwa->wa_vaddr) {
3150		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3151		    found->wa_flags);
3152		avl_remove(&p->p_warea, found);
3153		kmem_free(found, sizeof (struct watched_area));
3154	}
3155
3156	kmem_free(pwa, sizeof (struct watched_area));
3157
3158	/*
3159	 * If we removed the last watched area from the process, disable
3160	 * watchpoints.
3161	 */
3162	if (!pr_watch_active(p)) {
3163		kthread_t *t;
3164
3165		mutex_enter(&p->p_lock);
3166		if ((t = p->p_tlist) != NULL) {
3167			do {
3168				watch_disable(t);
3169			} while ((t = t->t_forw) != p->p_tlist);
3170		}
3171		mutex_exit(&p->p_lock);
3172	}
3173
3174	return (0);
3175}
3176
3177/*
3178 * Frees all the watched_area structures
3179 */
3180void
3181pr_free_watchpoints(proc_t *p)
3182{
3183	struct watched_area *delp;
3184	void *cookie;
3185
3186	cookie = NULL;
3187	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3188		kmem_free(delp, sizeof (struct watched_area));
3189
3190	avl_destroy(&p->p_warea);
3191}
3192
3193/*
3194 * This one is called by the traced process to unwatch all the
3195 * pages while deallocating the list of watched_page structs.
3196 */
3197void
3198pr_free_watched_pages(proc_t *p)
3199{
3200	struct as *as = p->p_as;
3201	struct watched_page *pwp;
3202	uint_t prot;
3203	int    retrycnt, err;
3204	void *cookie;
3205
3206	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3207		return;
3208
3209	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3210	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3211
3212	pwp = avl_first(&as->a_wpage);
3213
3214	cookie = NULL;
3215	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3216		retrycnt = 0;
3217		if ((prot = pwp->wp_oprot) != 0) {
3218			caddr_t addr = pwp->wp_vaddr;
3219			struct seg *seg;
3220		retry:
3221
3222			if ((pwp->wp_prot != prot ||
3223			    (pwp->wp_flags & WP_NOWATCH)) &&
3224			    (seg = as_segat(as, addr)) != NULL) {
3225				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3226				if (err == IE_RETRY) {
3227					ASSERT(retrycnt == 0);
3228					retrycnt++;
3229					goto retry;
3230				}
3231			}
3232		}
3233		kmem_free(pwp, sizeof (struct watched_page));
3234	}
3235
3236	avl_destroy(&as->a_wpage);
3237	p->p_wprot = NULL;
3238
3239	AS_LOCK_EXIT(as, &as->a_lock);
3240}
3241
3242/*
3243 * Insert a watched area into the list of watched pages.
3244 * If oflags is zero then we are adding a new watched area.
3245 * Otherwise we are changing the flags of an existing watched area.
3246 */
3247static int
3248set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3249	ulong_t flags, ulong_t oflags)
3250{
3251	struct as *as = p->p_as;
3252	avl_tree_t *pwp_tree;
3253	struct watched_page *pwp, *newpwp;
3254	struct watched_page tpw;
3255	avl_index_t where;
3256	struct seg *seg;
3257	uint_t prot;
3258	caddr_t addr;
3259
3260	/*
3261	 * We need to pre-allocate a list of structures before we grab the
3262	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3263	 * held.
3264	 */
3265	newpwp = NULL;
3266	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3267	    addr < eaddr; addr += PAGESIZE) {
3268		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3269		pwp->wp_list = newpwp;
3270		newpwp = pwp;
3271	}
3272
3273	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3274
3275	/*
3276	 * Search for an existing watched page to contain the watched area.
3277	 * If none is found, grab a new one from the available list
3278	 * and insert it in the active list, keeping the list sorted
3279	 * by user-level virtual address.
3280	 */
3281	if (p->p_flag & SVFWAIT)
3282		pwp_tree = &p->p_wpage;
3283	else
3284		pwp_tree = &as->a_wpage;
3285
3286again:
3287	if (avl_numnodes(pwp_tree) > prnwatch) {
3288		AS_LOCK_EXIT(as, &as->a_lock);
3289		while (newpwp != NULL) {
3290			pwp = newpwp->wp_list;
3291			kmem_free(newpwp, sizeof (struct watched_page));
3292			newpwp = pwp;
3293		}
3294		return (E2BIG);
3295	}
3296
3297	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3298	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3299		pwp = newpwp;
3300		newpwp = newpwp->wp_list;
3301		pwp->wp_list = NULL;
3302		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3303		    (uintptr_t)PAGEMASK);
3304		avl_insert(pwp_tree, pwp, where);
3305	}
3306
3307	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3308
3309	if (oflags & WA_READ)
3310		pwp->wp_read--;
3311	if (oflags & WA_WRITE)
3312		pwp->wp_write--;
3313	if (oflags & WA_EXEC)
3314		pwp->wp_exec--;
3315
3316	ASSERT(pwp->wp_read >= 0);
3317	ASSERT(pwp->wp_write >= 0);
3318	ASSERT(pwp->wp_exec >= 0);
3319
3320	if (flags & WA_READ)
3321		pwp->wp_read++;
3322	if (flags & WA_WRITE)
3323		pwp->wp_write++;
3324	if (flags & WA_EXEC)
3325		pwp->wp_exec++;
3326
3327	if (!(p->p_flag & SVFWAIT)) {
3328		vaddr = pwp->wp_vaddr;
3329		if (pwp->wp_oprot == 0 &&
3330		    (seg = as_segat(as, vaddr)) != NULL) {
3331			SEGOP_GETPROT(seg, vaddr, 0, &prot);
3332			pwp->wp_oprot = (uchar_t)prot;
3333			pwp->wp_prot = (uchar_t)prot;
3334		}
3335		if (pwp->wp_oprot != 0) {
3336			prot = pwp->wp_oprot;
3337			if (pwp->wp_read)
3338				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3339			if (pwp->wp_write)
3340				prot &= ~PROT_WRITE;
3341			if (pwp->wp_exec)
3342				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3343			if (!(pwp->wp_flags & WP_NOWATCH) &&
3344			    pwp->wp_prot != prot &&
3345			    (pwp->wp_flags & WP_SETPROT) == 0) {
3346				pwp->wp_flags |= WP_SETPROT;
3347				pwp->wp_list = p->p_wprot;
3348				p->p_wprot = pwp;
3349			}
3350			pwp->wp_prot = (uchar_t)prot;
3351		}
3352	}
3353
3354	/*
3355	 * If the watched area extends into the next page then do
3356	 * it over again with the virtual address of the next page.
3357	 */
3358	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3359		goto again;
3360
3361	AS_LOCK_EXIT(as, &as->a_lock);
3362
3363	/*
3364	 * Free any pages we may have over-allocated
3365	 */
3366	while (newpwp != NULL) {
3367		pwp = newpwp->wp_list;
3368		kmem_free(newpwp, sizeof (struct watched_page));
3369		newpwp = pwp;
3370	}
3371
3372	return (0);
3373}
3374
3375/*
3376 * Remove a watched area from the list of watched pages.
3377 * A watched area may extend over more than one page.
3378 */
3379static void
3380clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3381{
3382	struct as *as = p->p_as;
3383	struct watched_page *pwp;
3384	struct watched_page tpw;
3385	avl_tree_t *tree;
3386	avl_index_t where;
3387
3388	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3389
3390	if (p->p_flag & SVFWAIT)
3391		tree = &p->p_wpage;
3392	else
3393		tree = &as->a_wpage;
3394
3395	tpw.wp_vaddr = vaddr =
3396	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3397	pwp = avl_find(tree, &tpw, &where);
3398	if (pwp == NULL)
3399		pwp = avl_nearest(tree, where, AVL_AFTER);
3400
3401	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3402		ASSERT(vaddr <=  pwp->wp_vaddr);
3403
3404		if (flags & WA_READ)
3405			pwp->wp_read--;
3406		if (flags & WA_WRITE)
3407			pwp->wp_write--;
3408		if (flags & WA_EXEC)
3409			pwp->wp_exec--;
3410
3411		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3412			/*
3413			 * Reset the hat layer's protections on this page.
3414			 */
3415			if (pwp->wp_oprot != 0) {
3416				uint_t prot = pwp->wp_oprot;
3417
3418				if (pwp->wp_read)
3419					prot &=
3420					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3421				if (pwp->wp_write)
3422					prot &= ~PROT_WRITE;
3423				if (pwp->wp_exec)
3424					prot &=
3425					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3426				if (!(pwp->wp_flags & WP_NOWATCH) &&
3427				    pwp->wp_prot != prot &&
3428				    (pwp->wp_flags & WP_SETPROT) == 0) {
3429					pwp->wp_flags |= WP_SETPROT;
3430					pwp->wp_list = p->p_wprot;
3431					p->p_wprot = pwp;
3432				}
3433				pwp->wp_prot = (uchar_t)prot;
3434			}
3435		} else {
3436			/*
3437			 * No watched areas remain in this page.
3438			 * Reset everything to normal.
3439			 */
3440			if (pwp->wp_oprot != 0) {
3441				pwp->wp_prot = pwp->wp_oprot;
3442				if ((pwp->wp_flags & WP_SETPROT) == 0) {
3443					pwp->wp_flags |= WP_SETPROT;
3444					pwp->wp_list = p->p_wprot;
3445					p->p_wprot = pwp;
3446				}
3447			}
3448		}
3449
3450		pwp = AVL_NEXT(tree, pwp);
3451	}
3452
3453	AS_LOCK_EXIT(as, &as->a_lock);
3454}
3455
3456/*
3457 * Return the original protections for the specified page.
3458 */
3459static void
3460getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3461{
3462	struct watched_page *pwp;
3463	struct watched_page tpw;
3464
3465	ASSERT(AS_LOCK_HELD(as, &as->a_lock));
3466
3467	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3468	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3469		*prot = pwp->wp_oprot;
3470}
3471
3472static prpagev_t *
3473pr_pagev_create(struct seg *seg, int check_noreserve)
3474{
3475	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3476	size_t total_pages = seg_pages(seg);
3477
3478	/*
3479	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
3480	 * 4 or 5 bytes of storage per page, so this means we limit ourself
3481	 * to about a megabyte of kernel heap by default.
3482	 */
3483	pagev->pg_npages = MIN(total_pages, pagev_lim);
3484	pagev->pg_pnbase = 0;
3485
3486	pagev->pg_protv =
3487	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3488
3489	if (check_noreserve)
3490		pagev->pg_incore =
3491		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3492	else
3493		pagev->pg_incore = NULL;
3494
3495	return (pagev);
3496}
3497
3498static void
3499pr_pagev_destroy(prpagev_t *pagev)
3500{
3501	if (pagev->pg_incore != NULL)
3502		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3503
3504	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3505	kmem_free(pagev, sizeof (prpagev_t));
3506}
3507
3508static caddr_t
3509pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3510{
3511	ulong_t lastpg = seg_page(seg, eaddr - 1);
3512	ulong_t pn, pnlim;
3513	caddr_t saddr;
3514	size_t len;
3515
3516	ASSERT(addr >= seg->s_base && addr <= eaddr);
3517
3518	if (addr == eaddr)
3519		return (eaddr);
3520
3521refill:
3522	ASSERT(addr < eaddr);
3523	pagev->pg_pnbase = seg_page(seg, addr);
3524	pnlim = pagev->pg_pnbase + pagev->pg_npages;
3525	saddr = addr;
3526
3527	if (lastpg < pnlim)
3528		len = (size_t)(eaddr - addr);
3529	else
3530		len = pagev->pg_npages * PAGESIZE;
3531
3532	if (pagev->pg_incore != NULL) {
3533		/*
3534		 * INCORE cleverly has different semantics than GETPROT:
3535		 * it returns info on pages up to but NOT including addr + len.
3536		 */
3537		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3538		pn = pagev->pg_pnbase;
3539
3540		do {
3541			/*
3542			 * Guilty knowledge here:  We know that segvn_incore
3543			 * returns more than just the low-order bit that
3544			 * indicates the page is actually in memory.  If any
3545			 * bits are set, then the page has backing store.
3546			 */
3547			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3548				goto out;
3549
3550		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3551
3552		/*
3553		 * If we examined all the pages in the vector but we're not
3554		 * at the end of the segment, take another lap.
3555		 */
3556		if (addr < eaddr)
3557			goto refill;
3558	}
3559
3560	/*
3561	 * Need to take len - 1 because addr + len is the address of the
3562	 * first byte of the page just past the end of what we want.
3563	 */
3564out:
3565	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3566	return (addr);
3567}
3568
3569static caddr_t
3570pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3571    caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3572{
3573	/*
3574	 * Our starting address is either the specified address, or the base
3575	 * address from the start of the pagev.  If the latter is greater,
3576	 * this means a previous call to pr_pagev_fill has already scanned
3577	 * further than the end of the previous mapping.
3578	 */
3579	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3580	caddr_t addr = MAX(*saddrp, base);
3581	ulong_t pn = seg_page(seg, addr);
3582	uint_t prot, nprot;
3583
3584	/*
3585	 * If we're dealing with noreserve pages, then advance addr to
3586	 * the address of the next page which has backing store.
3587	 */
3588	if (pagev->pg_incore != NULL) {
3589		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3590			if ((addr += PAGESIZE) == eaddr) {
3591				*saddrp = addr;
3592				prot = 0;
3593				goto out;
3594			}
3595			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3596				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3597				if (addr == eaddr) {
3598					*saddrp = addr;
3599					prot = 0;
3600					goto out;
3601				}
3602				pn = seg_page(seg, addr);
3603			}
3604		}
3605	}
3606
3607	/*
3608	 * Get the protections on the page corresponding to addr.
3609	 */
3610	pn = seg_page(seg, addr);
3611	ASSERT(pn >= pagev->pg_pnbase);
3612	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3613
3614	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3615	getwatchprot(seg->s_as, addr, &prot);
3616	*saddrp = addr;
3617
3618	/*
3619	 * Now loop until we find a backed page with different protections
3620	 * or we reach the end of this segment.
3621	 */
3622	while ((addr += PAGESIZE) < eaddr) {
3623		/*
3624		 * If pn has advanced to the page number following what we
3625		 * have information on, refill the page vector and reset
3626		 * addr and pn.  If pr_pagev_fill does not return the
3627		 * address of the next page, we have a discontiguity and
3628		 * thus have reached the end of the current mapping.
3629		 */
3630		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3631			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3632			if (naddr != addr)
3633				goto out;
3634			pn = seg_page(seg, addr);
3635		}
3636
3637		/*
3638		 * The previous page's protections are in prot, and it has
3639		 * backing.  If this page is MAP_NORESERVE and has no backing,
3640		 * then end this mapping and return the previous protections.
3641		 */
3642		if (pagev->pg_incore != NULL &&
3643		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3644			break;
3645
3646		/*
3647		 * Otherwise end the mapping if this page's protections (nprot)
3648		 * are different than those in the previous page (prot).
3649		 */
3650		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3651		getwatchprot(seg->s_as, addr, &nprot);
3652
3653		if (nprot != prot)
3654			break;
3655	}
3656
3657out:
3658	*protp = prot;
3659	return (addr);
3660}
3661
3662size_t
3663pr_getsegsize(struct seg *seg, int reserved)
3664{
3665	size_t size = seg->s_size;
3666
3667	/*
3668	 * If we're interested in the reserved space, return the size of the
3669	 * segment itself.  Everything else in this function is a special case
3670	 * to determine the actual underlying size of various segment types.
3671	 */
3672	if (reserved)
3673		return (size);
3674
3675	/*
3676	 * If this is a segvn mapping of a regular file, return the smaller
3677	 * of the segment size and the remaining size of the file beyond
3678	 * the file offset corresponding to seg->s_base.
3679	 */
3680	if (seg->s_ops == &segvn_ops) {
3681		vattr_t vattr;
3682		vnode_t *vp;
3683
3684		vattr.va_mask = AT_SIZE;
3685
3686		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3687		    vp != NULL && vp->v_type == VREG &&
3688		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3689
3690			u_offset_t fsize = vattr.va_size;
3691			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3692
3693			if (fsize < offset)
3694				fsize = 0;
3695			else
3696				fsize -= offset;
3697
3698			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3699
3700			if (fsize < (u_offset_t)size)
3701				size = (size_t)fsize;
3702		}
3703
3704		return (size);
3705	}
3706
3707	/*
3708	 * If this is an ISM shared segment, don't include pages that are
3709	 * beyond the real size of the spt segment that backs it.
3710	 */
3711	if (seg->s_ops == &segspt_shmops)
3712		return (MIN(spt_realsize(seg), size));
3713
3714	/*
3715	 * If this is segment is a mapping from /dev/null, then this is a
3716	 * reservation of virtual address space and has no actual size.
3717	 * Such segments are backed by segdev and have type set to neither
3718	 * MAP_SHARED nor MAP_PRIVATE.
3719	 */
3720	if (seg->s_ops == &segdev_ops &&
3721	    ((SEGOP_GETTYPE(seg, seg->s_base) &
3722	    (MAP_SHARED | MAP_PRIVATE)) == 0))
3723		return (0);
3724
3725	/*
3726	 * If this segment doesn't match one of the special types we handle,
3727	 * just return the size of the segment itself.
3728	 */
3729	return (size);
3730}
3731
3732uint_t
3733pr_getprot(struct seg *seg, int reserved, void **tmp,
3734	caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3735{
3736	struct as *as = seg->s_as;
3737
3738	caddr_t saddr = *saddrp;
3739	caddr_t naddr;
3740
3741	int check_noreserve;
3742	uint_t prot;
3743
3744	union {
3745		struct segvn_data *svd;
3746		struct segdev_data *sdp;
3747		void *data;
3748	} s;
3749
3750	s.data = seg->s_data;
3751
3752	ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3753	ASSERT(saddr >= seg->s_base && saddr < eaddr);
3754	ASSERT(eaddr <= seg->s_base + seg->s_size);
3755
3756	/*
3757	 * Don't include MAP_NORESERVE pages in the address range
3758	 * unless their mappings have actually materialized.
3759	 * We cheat by knowing that segvn is the only segment
3760	 * driver that supports MAP_NORESERVE.
3761	 */
3762	check_noreserve =
3763	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3764	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3765	    (s.svd->flags & MAP_NORESERVE));
3766
3767	/*
3768	 * Examine every page only as a last resort.  We use guilty knowledge
3769	 * of segvn and segdev to avoid this: if there are no per-page
3770	 * protections present in the segment and we don't care about
3771	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3772	 */
3773	if (!check_noreserve && saddr == seg->s_base &&
3774	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3775		prot = s.svd->prot;
3776		getwatchprot(as, saddr, &prot);
3777		naddr = eaddr;
3778
3779	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3780	    s.sdp != NULL && s.sdp->pageprot == 0) {
3781		prot = s.sdp->prot;
3782		getwatchprot(as, saddr, &prot);
3783		naddr = eaddr;
3784
3785	} else {
3786		prpagev_t *pagev;
3787
3788		/*
3789		 * If addr is sitting at the start of the segment, then
3790		 * create a page vector to store protection and incore
3791		 * information for pages in the segment, and fill it.
3792		 * Otherwise, we expect *tmp to address the prpagev_t
3793		 * allocated by a previous call to this function.
3794		 */
3795		if (saddr == seg->s_base) {
3796			pagev = pr_pagev_create(seg, check_noreserve);
3797			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3798
3799			ASSERT(*tmp == NULL);
3800			*tmp = pagev;
3801
3802			ASSERT(saddr <= eaddr);
3803			*saddrp = saddr;
3804
3805			if (saddr == eaddr) {
3806				naddr = saddr;
3807				prot = 0;
3808				goto out;
3809			}
3810
3811		} else {
3812			ASSERT(*tmp != NULL);
3813			pagev = (prpagev_t *)*tmp;
3814		}
3815
3816		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3817		ASSERT(naddr <= eaddr);
3818	}
3819
3820out:
3821	if (naddr == eaddr)
3822		pr_getprot_done(tmp);
3823	*naddrp = naddr;
3824	return (prot);
3825}
3826
3827void
3828pr_getprot_done(void **tmp)
3829{
3830	if (*tmp != NULL) {
3831		pr_pagev_destroy((prpagev_t *)*tmp);
3832		*tmp = NULL;
3833	}
3834}
3835
3836/*
3837 * Return true iff the vnode is a /proc file from the object directory.
3838 */
3839int
3840pr_isobject(vnode_t *vp)
3841{
3842	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3843}
3844
3845/*
3846 * Return true iff the vnode is a /proc file opened by the process itself.
3847 */
3848int
3849pr_isself(vnode_t *vp)
3850{
3851	/*
3852	 * XXX: To retain binary compatibility with the old
3853	 * ioctl()-based version of /proc, we exempt self-opens
3854	 * of /proc/<pid> from being marked close-on-exec.
3855	 */
3856	return (vn_matchops(vp, prvnodeops) &&
3857	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
3858	    VTOP(vp)->pr_type != PR_PIDDIR);
3859}
3860
3861static ssize_t
3862pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3863{
3864	ssize_t pagesize, hatsize;
3865
3866	ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
3867	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3868	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3869	ASSERT(saddr < eaddr);
3870
3871	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3872	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3873	ASSERT(pagesize != 0);
3874
3875	if (pagesize == -1)
3876		pagesize = PAGESIZE;
3877
3878	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3879
3880	while (saddr < eaddr) {
3881		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3882			break;
3883		ASSERT(IS_P2ALIGNED(saddr, pagesize));
3884		saddr += pagesize;
3885	}
3886
3887	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
3888	return (hatsize);
3889}
3890
3891/*
3892 * Return an array of structures with extended memory map information.
3893 * We allocate here; the caller must deallocate.
3894 */
3895int
3896prgetxmap(proc_t *p, list_t *iolhead)
3897{
3898	struct as *as = p->p_as;
3899	prxmap_t *mp;
3900	struct seg *seg;
3901	struct seg *brkseg, *stkseg;
3902	struct vnode *vp;
3903	struct vattr vattr;
3904	uint_t prot;
3905
3906	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
3907
3908	/*
3909	 * Request an initial buffer size that doesn't waste memory
3910	 * if the address space has only a small number of segments.
3911	 */
3912	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
3913
3914	if ((seg = AS_SEGFIRST(as)) == NULL)
3915		return (0);
3916
3917	brkseg = break_seg(p);
3918	stkseg = as_segat(as, prgetstackbase(p));
3919
3920	do {
3921		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
3922		caddr_t saddr, naddr, baddr;
3923		void *tmp = NULL;
3924		ssize_t psz;
3925		char *parr;
3926		uint64_t npages;
3927		uint64_t pagenum;
3928
3929		/*
3930		 * Segment loop part one: iterate from the base of the segment
3931		 * to its end, pausing at each address boundary (baddr) between
3932		 * ranges that have different virtual memory protections.
3933		 */
3934		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
3935			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
3936			ASSERT(baddr >= saddr && baddr <= eaddr);
3937
3938			/*
3939			 * Segment loop part two: iterate from the current
3940			 * position to the end of the protection boundary,
3941			 * pausing at each address boundary (naddr) between
3942			 * ranges that have different underlying page sizes.
3943			 */
3944			for (; saddr < baddr; saddr = naddr) {
3945				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
3946				ASSERT(naddr >= saddr && naddr <= baddr);
3947
3948				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
3949
3950				mp->pr_vaddr = (uintptr_t)saddr;
3951				mp->pr_size = naddr - saddr;
3952				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
3953				mp->pr_mflags = 0;
3954				if (prot & PROT_READ)
3955					mp->pr_mflags |= MA_READ;
3956				if (prot & PROT_WRITE)
3957					mp->pr_mflags |= MA_WRITE;
3958				if (prot & PROT_EXEC)
3959					mp->pr_mflags |= MA_EXEC;
3960				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
3961					mp->pr_mflags |= MA_SHARED;
3962				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
3963					mp->pr_mflags |= MA_NORESERVE;
3964				if (seg->s_ops == &segspt_shmops ||
3965				    (seg->s_ops == &segvn_ops &&
3966				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
3967				    vp == NULL)))
3968					mp->pr_mflags |= MA_ANON;
3969				if (seg == brkseg)
3970					mp->pr_mflags |= MA_BREAK;
3971				else if (seg == stkseg)
3972					mp->pr_mflags |= MA_STACK;
3973				if (seg->s_ops == &segspt_shmops)
3974					mp->pr_mflags |= MA_ISM | MA_SHM;
3975
3976				mp->pr_pagesize = PAGESIZE;
3977				if (psz == -1) {
3978					mp->pr_hatpagesize = 0;
3979				} else {
3980					mp->pr_hatpagesize = psz;
3981				}
3982
3983				/*
3984				 * Manufacture a filename for the "object" dir.
3985				 */
3986				mp->pr_dev = PRNODEV;
3987				vattr.va_mask = AT_FSID|AT_NODEID;
3988				if (seg->s_ops == &segvn_ops &&
3989				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
3990				    vp != NULL && vp->v_type == VREG &&
3991				    VOP_GETATTR(vp, &vattr, 0, CRED(),
3992				    NULL) == 0) {
3993					mp->pr_dev = vattr.va_fsid;
3994					mp->pr_ino = vattr.va_nodeid;
3995					if (vp == p->p_exec)
3996						(void) strcpy(mp->pr_mapname,
3997						    "a.out");
3998					else
3999						pr_object_name(mp->pr_mapname,
4000						    vp, &vattr);
4001				}
4002
4003				/*
4004				 * Get the SysV shared memory id, if any.
4005				 */
4006				if ((mp->pr_mflags & MA_SHARED) &&
4007				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4008				    seg->s_base)) != SHMID_NONE) {
4009					if (mp->pr_shmid == SHMID_FREE)
4010						mp->pr_shmid = -1;
4011
4012					mp->pr_mflags |= MA_SHM;
4013				} else {
4014					mp->pr_shmid = -1;
4015				}
4016
4017				npages = ((uintptr_t)(naddr - saddr)) >>
4018				    PAGESHIFT;
4019				parr = kmem_zalloc(npages, KM_SLEEP);
4020
4021				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4022
4023				for (pagenum = 0; pagenum < npages; pagenum++) {
4024					if (parr[pagenum] & SEG_PAGE_INCORE)
4025						mp->pr_rss++;
4026					if (parr[pagenum] & SEG_PAGE_ANON)
4027						mp->pr_anon++;
4028					if (parr[pagenum] & SEG_PAGE_LOCKED)
4029						mp->pr_locked++;
4030				}
4031				kmem_free(parr, npages);
4032			}
4033		}
4034		ASSERT(tmp == NULL);
4035	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4036
4037	return (0);
4038}
4039
4040/*
4041 * Return the process's credentials.  We don't need a 32-bit equivalent of
4042 * this function because prcred_t and prcred32_t are actually the same.
4043 */
4044void
4045prgetcred(proc_t *p, prcred_t *pcrp)
4046{
4047	mutex_enter(&p->p_crlock);
4048	cred2prcred(p->p_cred, pcrp);
4049	mutex_exit(&p->p_crlock);
4050}
4051
4052/*
4053 * Compute actual size of the prpriv_t structure.
4054 */
4055
4056size_t
4057prgetprivsize(void)
4058{
4059	return (priv_prgetprivsize(NULL));
4060}
4061
4062/*
4063 * Return the process's privileges.  We don't need a 32-bit equivalent of
4064 * this function because prpriv_t and prpriv32_t are actually the same.
4065 */
4066void
4067prgetpriv(proc_t *p, prpriv_t *pprp)
4068{
4069	mutex_enter(&p->p_crlock);
4070	cred2prpriv(p->p_cred, pprp);
4071	mutex_exit(&p->p_crlock);
4072}
4073
4074#ifdef _SYSCALL32_IMPL
4075/*
4076 * Return an array of structures with HAT memory map information.
4077 * We allocate here; the caller must deallocate.
4078 */
4079int
4080prgetxmap32(proc_t *p, list_t *iolhead)
4081{
4082	struct as *as = p->p_as;
4083	prxmap32_t *mp;
4084	struct seg *seg;
4085	struct seg *brkseg, *stkseg;
4086	struct vnode *vp;
4087	struct vattr vattr;
4088	uint_t prot;
4089
4090	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
4091
4092	/*
4093	 * Request an initial buffer size that doesn't waste memory
4094	 * if the address space has only a small number of segments.
4095	 */
4096	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4097
4098	if ((seg = AS_SEGFIRST(as)) == NULL)
4099		return (0);
4100
4101	brkseg = break_seg(p);
4102	stkseg = as_segat(as, prgetstackbase(p));
4103
4104	do {
4105		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4106		caddr_t saddr, naddr, baddr;
4107		void *tmp = NULL;
4108		ssize_t psz;
4109		char *parr;
4110		uint64_t npages;
4111		uint64_t pagenum;
4112
4113		/*
4114		 * Segment loop part one: iterate from the base of the segment
4115		 * to its end, pausing at each address boundary (baddr) between
4116		 * ranges that have different virtual memory protections.
4117		 */
4118		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4119			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4120			ASSERT(baddr >= saddr && baddr <= eaddr);
4121
4122			/*
4123			 * Segment loop part two: iterate from the current
4124			 * position to the end of the protection boundary,
4125			 * pausing at each address boundary (naddr) between
4126			 * ranges that have different underlying page sizes.
4127			 */
4128			for (; saddr < baddr; saddr = naddr) {
4129				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4130				ASSERT(naddr >= saddr && naddr <= baddr);
4131
4132				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4133
4134				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4135				mp->pr_size = (size32_t)(naddr - saddr);
4136				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4137				mp->pr_mflags = 0;
4138				if (prot & PROT_READ)
4139					mp->pr_mflags |= MA_READ;
4140				if (prot & PROT_WRITE)
4141					mp->pr_mflags |= MA_WRITE;
4142				if (prot & PROT_EXEC)
4143					mp->pr_mflags |= MA_EXEC;
4144				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4145					mp->pr_mflags |= MA_SHARED;
4146				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4147					mp->pr_mflags |= MA_NORESERVE;
4148				if (seg->s_ops == &segspt_shmops ||
4149				    (seg->s_ops == &segvn_ops &&
4150				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4151				    vp == NULL)))
4152					mp->pr_mflags |= MA_ANON;
4153				if (seg == brkseg)
4154					mp->pr_mflags |= MA_BREAK;
4155				else if (seg == stkseg)
4156					mp->pr_mflags |= MA_STACK;
4157				if (seg->s_ops == &segspt_shmops)
4158					mp->pr_mflags |= MA_ISM | MA_SHM;
4159
4160				mp->pr_pagesize = PAGESIZE;
4161				if (psz == -1) {
4162					mp->pr_hatpagesize = 0;
4163				} else {
4164					mp->pr_hatpagesize = psz;
4165				}
4166
4167				/*
4168				 * Manufacture a filename for the "object" dir.
4169				 */
4170				mp->pr_dev = PRNODEV32;
4171				vattr.va_mask = AT_FSID|AT_NODEID;
4172				if (seg->s_ops == &segvn_ops &&
4173				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4174				    vp != NULL && vp->v_type == VREG &&
4175				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4176				    NULL) == 0) {
4177					(void) cmpldev(&mp->pr_dev,
4178					    vattr.va_fsid);
4179					mp->pr_ino = vattr.va_nodeid;
4180					if (vp == p->p_exec)
4181						(void) strcpy(mp->pr_mapname,
4182						    "a.out");
4183					else
4184						pr_object_name(mp->pr_mapname,
4185						    vp, &vattr);
4186				}
4187
4188				/*
4189				 * Get the SysV shared memory id, if any.
4190				 */
4191				if ((mp->pr_mflags & MA_SHARED) &&
4192				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4193				    seg->s_base)) != SHMID_NONE) {
4194					if (mp->pr_shmid == SHMID_FREE)
4195						mp->pr_shmid = -1;
4196
4197					mp->pr_mflags |= MA_SHM;
4198				} else {
4199					mp->pr_shmid = -1;
4200				}
4201
4202				npages = ((uintptr_t)(naddr - saddr)) >>
4203				    PAGESHIFT;
4204				parr = kmem_zalloc(npages, KM_SLEEP);
4205
4206				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4207
4208				for (pagenum = 0; pagenum < npages; pagenum++) {
4209					if (parr[pagenum] & SEG_PAGE_INCORE)
4210						mp->pr_rss++;
4211					if (parr[pagenum] & SEG_PAGE_ANON)
4212						mp->pr_anon++;
4213					if (parr[pagenum] & SEG_PAGE_LOCKED)
4214						mp->pr_locked++;
4215				}
4216				kmem_free(parr, npages);
4217			}
4218		}
4219		ASSERT(tmp == NULL);
4220	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4221
4222	return (0);
4223}
4224#endif	/* _SYSCALL32_IMPL */
4225