prsubr.c revision 11940:98e0550abcac
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28/*	  All Rights Reserved  	*/
29
30#include <sys/types.h>
31#include <sys/t_lock.h>
32#include <sys/param.h>
33#include <sys/cmn_err.h>
34#include <sys/cred.h>
35#include <sys/priv.h>
36#include <sys/debug.h>
37#include <sys/errno.h>
38#include <sys/inline.h>
39#include <sys/kmem.h>
40#include <sys/mman.h>
41#include <sys/proc.h>
42#include <sys/brand.h>
43#include <sys/sobject.h>
44#include <sys/sysmacros.h>
45#include <sys/systm.h>
46#include <sys/uio.h>
47#include <sys/var.h>
48#include <sys/vfs.h>
49#include <sys/vnode.h>
50#include <sys/session.h>
51#include <sys/pcb.h>
52#include <sys/signal.h>
53#include <sys/user.h>
54#include <sys/disp.h>
55#include <sys/class.h>
56#include <sys/ts.h>
57#include <sys/bitmap.h>
58#include <sys/poll.h>
59#include <sys/shm_impl.h>
60#include <sys/fault.h>
61#include <sys/syscall.h>
62#include <sys/procfs.h>
63#include <sys/processor.h>
64#include <sys/cpuvar.h>
65#include <sys/copyops.h>
66#include <sys/time.h>
67#include <sys/msacct.h>
68#include <vm/as.h>
69#include <vm/rm.h>
70#include <vm/seg.h>
71#include <vm/seg_vn.h>
72#include <vm/seg_dev.h>
73#include <vm/seg_spt.h>
74#include <vm/page.h>
75#include <sys/vmparam.h>
76#include <sys/swap.h>
77#include <fs/proc/prdata.h>
78#include <sys/task.h>
79#include <sys/project.h>
80#include <sys/contract_impl.h>
81#include <sys/contract/process.h>
82#include <sys/contract/process_impl.h>
83#include <sys/schedctl.h>
84#include <sys/pool.h>
85#include <sys/zone.h>
86#include <sys/atomic.h>
87#include <sys/sdt.h>
88
89#define	MAX_ITERS_SPIN	5
90
91typedef struct prpagev {
92	uint_t *pg_protv;	/* vector of page permissions */
93	char *pg_incore;	/* vector of incore flags */
94	size_t pg_npages;	/* number of pages in protv and incore */
95	ulong_t pg_pnbase;	/* pn within segment of first protv element */
96} prpagev_t;
97
98size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
99
100extern struct seg_ops segdev_ops;	/* needs a header file */
101extern struct seg_ops segspt_shmops;	/* needs a header file */
102
103static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
104static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
105
106/*
107 * Choose an lwp from the complete set of lwps for the process.
108 * This is called for any operation applied to the process
109 * file descriptor that requires an lwp to operate upon.
110 *
111 * Returns a pointer to the thread for the selected LWP,
112 * and with the dispatcher lock held for the thread.
113 *
114 * The algorithm for choosing an lwp is critical for /proc semantics;
115 * don't touch this code unless you know all of the implications.
116 */
117kthread_t *
118prchoose(proc_t *p)
119{
120	kthread_t *t;
121	kthread_t *t_onproc = NULL;	/* running on processor */
122	kthread_t *t_run = NULL;	/* runnable, on disp queue */
123	kthread_t *t_sleep = NULL;	/* sleeping */
124	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
125	kthread_t *t_susp = NULL;	/* suspended stop */
126	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
127	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
128	kthread_t *t_req = NULL;	/* requested stop */
129	kthread_t *t_istop = NULL;	/* event-of-interest stop */
130	kthread_t *t_dtrace = NULL;	/* DTrace stop */
131
132	ASSERT(MUTEX_HELD(&p->p_lock));
133
134	/*
135	 * If the agent lwp exists, it takes precedence over all others.
136	 */
137	if ((t = p->p_agenttp) != NULL) {
138		thread_lock(t);
139		return (t);
140	}
141
142	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
143		return (t);
144	do {		/* for eacn lwp in the process */
145		if (VSTOPPED(t)) {	/* virtually stopped */
146			if (t_req == NULL)
147				t_req = t;
148			continue;
149		}
150
151		thread_lock(t);		/* make sure thread is in good state */
152		switch (t->t_state) {
153		default:
154			panic("prchoose: bad thread state %d, thread 0x%p",
155			    t->t_state, (void *)t);
156			/*NOTREACHED*/
157		case TS_SLEEP:
158			/* this is filthy */
159			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
160			    t->t_wchan0 == NULL) {
161				if (t_hold == NULL)
162					t_hold = t;
163			} else {
164				if (t_sleep == NULL)
165					t_sleep = t;
166			}
167			break;
168		case TS_RUN:
169		case TS_WAIT:
170			if (t_run == NULL)
171				t_run = t;
172			break;
173		case TS_ONPROC:
174			if (t_onproc == NULL)
175				t_onproc = t;
176			break;
177		case TS_ZOMB:		/* last possible choice */
178			break;
179		case TS_STOPPED:
180			switch (t->t_whystop) {
181			case PR_SUSPENDED:
182				if (t_susp == NULL)
183					t_susp = t;
184				break;
185			case PR_JOBCONTROL:
186				if (t->t_proc_flag & TP_PRSTOP) {
187					if (t_jdstop == NULL)
188						t_jdstop = t;
189				} else {
190					if (t_jstop == NULL)
191						t_jstop = t;
192				}
193				break;
194			case PR_REQUESTED:
195				if (t->t_dtrace_stop && t_dtrace == NULL)
196					t_dtrace = t;
197				else if (t_req == NULL)
198					t_req = t;
199				break;
200			case PR_SYSENTRY:
201			case PR_SYSEXIT:
202			case PR_SIGNALLED:
203			case PR_FAULTED:
204				/*
205				 * Make an lwp calling exit() be the
206				 * last lwp seen in the process.
207				 */
208				if (t_istop == NULL ||
209				    (t_istop->t_whystop == PR_SYSENTRY &&
210				    t_istop->t_whatstop == SYS_exit))
211					t_istop = t;
212				break;
213			case PR_CHECKPOINT:	/* can't happen? */
214				break;
215			default:
216				panic("prchoose: bad t_whystop %d, thread 0x%p",
217				    t->t_whystop, (void *)t);
218				/*NOTREACHED*/
219			}
220			break;
221		}
222		thread_unlock(t);
223	} while ((t = t->t_forw) != p->p_tlist);
224
225	if (t_onproc)
226		t = t_onproc;
227	else if (t_run)
228		t = t_run;
229	else if (t_sleep)
230		t = t_sleep;
231	else if (t_jstop)
232		t = t_jstop;
233	else if (t_jdstop)
234		t = t_jdstop;
235	else if (t_istop)
236		t = t_istop;
237	else if (t_dtrace)
238		t = t_dtrace;
239	else if (t_req)
240		t = t_req;
241	else if (t_hold)
242		t = t_hold;
243	else if (t_susp)
244		t = t_susp;
245	else			/* TS_ZOMB */
246		t = p->p_tlist;
247
248	if (t != NULL)
249		thread_lock(t);
250	return (t);
251}
252
253/*
254 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
255 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
256 * on the /proc file descriptor.  Called from stop() when a traced
257 * process stops on an event of interest.  Also called from exit()
258 * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
259 */
260void
261prnotify(struct vnode *vp)
262{
263	prcommon_t *pcp = VTOP(vp)->pr_common;
264
265	mutex_enter(&pcp->prc_mutex);
266	cv_broadcast(&pcp->prc_wait);
267	mutex_exit(&pcp->prc_mutex);
268	if (pcp->prc_flags & PRC_POLL) {
269		/*
270		 * We call pollwakeup() with POLLHUP to ensure that
271		 * the pollers are awakened even if they are polling
272		 * for nothing (i.e., waiting for the process to exit).
273		 * This enables the use of the PRC_POLL flag for optimization
274		 * (we can turn off PRC_POLL only if we know no pollers remain).
275		 */
276		pcp->prc_flags &= ~PRC_POLL;
277		pollwakeup(&pcp->prc_pollhead, POLLHUP);
278	}
279}
280
281/* called immediately below, in prfree() */
282static void
283prfreenotify(vnode_t *vp)
284{
285	prnode_t *pnp;
286	prcommon_t *pcp;
287
288	while (vp != NULL) {
289		pnp = VTOP(vp);
290		pcp = pnp->pr_common;
291		ASSERT(pcp->prc_thread == NULL);
292		pcp->prc_proc = NULL;
293		/*
294		 * We can't call prnotify() here because we are holding
295		 * pidlock.  We assert that there is no need to.
296		 */
297		mutex_enter(&pcp->prc_mutex);
298		cv_broadcast(&pcp->prc_wait);
299		mutex_exit(&pcp->prc_mutex);
300		ASSERT(!(pcp->prc_flags & PRC_POLL));
301
302		vp = pnp->pr_next;
303		pnp->pr_next = NULL;
304	}
305}
306
307/*
308 * Called from a hook in freeproc() when a traced process is removed
309 * from the process table.  The proc-table pointers of all associated
310 * /proc vnodes are cleared to indicate that the process has gone away.
311 */
312void
313prfree(proc_t *p)
314{
315	uint_t slot = p->p_slot;
316
317	ASSERT(MUTEX_HELD(&pidlock));
318
319	/*
320	 * Block the process against /proc so it can be freed.
321	 * It cannot be freed while locked by some controlling process.
322	 * Lock ordering:
323	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
324	 */
325	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
326	mutex_enter(&p->p_lock);
327	while (p->p_proc_flag & P_PR_LOCK) {
328		mutex_exit(&pr_pidlock);
329		cv_wait(&pr_pid_cv[slot], &p->p_lock);
330		mutex_exit(&p->p_lock);
331		mutex_enter(&pr_pidlock);
332		mutex_enter(&p->p_lock);
333	}
334
335	ASSERT(p->p_tlist == NULL);
336
337	prfreenotify(p->p_plist);
338	p->p_plist = NULL;
339
340	prfreenotify(p->p_trace);
341	p->p_trace = NULL;
342
343	/*
344	 * We broadcast to wake up everyone waiting for this process.
345	 * No one can reach this process from this point on.
346	 */
347	cv_broadcast(&pr_pid_cv[slot]);
348
349	mutex_exit(&p->p_lock);
350	mutex_exit(&pr_pidlock);
351}
352
353/*
354 * Called from a hook in exit() when a traced process is becoming a zombie.
355 */
356void
357prexit(proc_t *p)
358{
359	ASSERT(MUTEX_HELD(&p->p_lock));
360
361	if (pr_watch_active(p)) {
362		pr_free_watchpoints(p);
363		watch_disable(curthread);
364	}
365	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
366	if (p->p_trace) {
367		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
368		prnotify(p->p_trace);
369	}
370	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
371}
372
373/*
374 * Called when a thread calls lwp_exit().
375 */
376void
377prlwpexit(kthread_t *t)
378{
379	vnode_t *vp;
380	prnode_t *pnp;
381	prcommon_t *pcp;
382	proc_t *p = ttoproc(t);
383	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
384
385	ASSERT(t == curthread);
386	ASSERT(MUTEX_HELD(&p->p_lock));
387
388	/*
389	 * The process must be blocked against /proc to do this safely.
390	 * The lwp must not disappear while the process is marked P_PR_LOCK.
391	 * It is the caller's responsibility to have called prbarrier(p).
392	 */
393	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
394
395	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
396		pnp = VTOP(vp);
397		pcp = pnp->pr_common;
398		if (pcp->prc_thread == t) {
399			pcp->prc_thread = NULL;
400			pcp->prc_flags |= PRC_DESTROY;
401		}
402	}
403
404	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
405		pnp = VTOP(vp);
406		pcp = pnp->pr_common;
407		pcp->prc_thread = NULL;
408		pcp->prc_flags |= PRC_DESTROY;
409		prnotify(vp);
410	}
411
412	if (p->p_trace)
413		prnotify(p->p_trace);
414}
415
416/*
417 * Called when a zombie thread is joined or when a
418 * detached lwp exits.  Called from lwp_hash_out().
419 */
420void
421prlwpfree(proc_t *p, lwpent_t *lep)
422{
423	vnode_t *vp;
424	prnode_t *pnp;
425	prcommon_t *pcp;
426
427	ASSERT(MUTEX_HELD(&p->p_lock));
428
429	/*
430	 * The process must be blocked against /proc to do this safely.
431	 * The lwp must not disappear while the process is marked P_PR_LOCK.
432	 * It is the caller's responsibility to have called prbarrier(p).
433	 */
434	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
435
436	vp = lep->le_trace;
437	lep->le_trace = NULL;
438	while (vp) {
439		prnotify(vp);
440		pnp = VTOP(vp);
441		pcp = pnp->pr_common;
442		ASSERT(pcp->prc_thread == NULL &&
443		    (pcp->prc_flags & PRC_DESTROY));
444		pcp->prc_tslot = -1;
445		vp = pnp->pr_next;
446		pnp->pr_next = NULL;
447	}
448
449	if (p->p_trace)
450		prnotify(p->p_trace);
451}
452
453/*
454 * Called from a hook in exec() when a thread starts exec().
455 */
456void
457prexecstart(void)
458{
459	proc_t *p = ttoproc(curthread);
460	klwp_t *lwp = ttolwp(curthread);
461
462	/*
463	 * The P_PR_EXEC flag blocks /proc operations for
464	 * the duration of the exec().
465	 * We can't start exec() while the process is
466	 * locked by /proc, so we call prbarrier().
467	 * lwp_nostop keeps the process from being stopped
468	 * via job control for the duration of the exec().
469	 */
470
471	ASSERT(MUTEX_HELD(&p->p_lock));
472	prbarrier(p);
473	lwp->lwp_nostop++;
474	p->p_proc_flag |= P_PR_EXEC;
475}
476
477/*
478 * Called from a hook in exec() when a thread finishes exec().
479 * The thread may or may not have succeeded.  Some other thread
480 * may have beat it to the punch.
481 */
482void
483prexecend(void)
484{
485	proc_t *p = ttoproc(curthread);
486	klwp_t *lwp = ttolwp(curthread);
487	vnode_t *vp;
488	prnode_t *pnp;
489	prcommon_t *pcp;
490	model_t model = p->p_model;
491	id_t tid = curthread->t_tid;
492	int tslot = curthread->t_dslot;
493
494	ASSERT(MUTEX_HELD(&p->p_lock));
495
496	lwp->lwp_nostop--;
497	if (p->p_flag & SEXITLWPS) {
498		/*
499		 * We are on our way to exiting because some
500		 * other thread beat us in the race to exec().
501		 * Don't clear the P_PR_EXEC flag in this case.
502		 */
503		return;
504	}
505
506	/*
507	 * Wake up anyone waiting in /proc for the process to complete exec().
508	 */
509	p->p_proc_flag &= ~P_PR_EXEC;
510	if ((vp = p->p_trace) != NULL) {
511		pcp = VTOP(vp)->pr_common;
512		mutex_enter(&pcp->prc_mutex);
513		cv_broadcast(&pcp->prc_wait);
514		mutex_exit(&pcp->prc_mutex);
515		for (; vp != NULL; vp = pnp->pr_next) {
516			pnp = VTOP(vp);
517			pnp->pr_common->prc_datamodel = model;
518		}
519	}
520	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
521		/*
522		 * We dealt with the process common above.
523		 */
524		ASSERT(p->p_trace != NULL);
525		pcp = VTOP(vp)->pr_common;
526		mutex_enter(&pcp->prc_mutex);
527		cv_broadcast(&pcp->prc_wait);
528		mutex_exit(&pcp->prc_mutex);
529		for (; vp != NULL; vp = pnp->pr_next) {
530			pnp = VTOP(vp);
531			pcp = pnp->pr_common;
532			pcp->prc_datamodel = model;
533			pcp->prc_tid = tid;
534			pcp->prc_tslot = tslot;
535		}
536	}
537}
538
539/*
540 * Called from a hook in relvm() just before freeing the address space.
541 * We free all the watched areas now.
542 */
543void
544prrelvm(void)
545{
546	proc_t *p = ttoproc(curthread);
547
548	mutex_enter(&p->p_lock);
549	prbarrier(p);	/* block all other /proc operations */
550	if (pr_watch_active(p)) {
551		pr_free_watchpoints(p);
552		watch_disable(curthread);
553	}
554	mutex_exit(&p->p_lock);
555	pr_free_watched_pages(p);
556}
557
558/*
559 * Called from hooks in exec-related code when a traced process
560 * attempts to exec(2) a setuid/setgid program or an unreadable
561 * file.  Rather than fail the exec we invalidate the associated
562 * /proc vnodes so that subsequent attempts to use them will fail.
563 *
564 * All /proc vnodes, except directory vnodes, are retained on a linked
565 * list (rooted at p_plist in the process structure) until last close.
566 *
567 * A controlling process must re-open the /proc files in order to
568 * regain control.
569 */
570void
571prinvalidate(struct user *up)
572{
573	kthread_t *t = curthread;
574	proc_t *p = ttoproc(t);
575	vnode_t *vp;
576	prnode_t *pnp;
577	int writers = 0;
578
579	mutex_enter(&p->p_lock);
580	prbarrier(p);	/* block all other /proc operations */
581
582	/*
583	 * At this moment, there can be only one lwp in the process.
584	 */
585	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
586
587	/*
588	 * Invalidate any currently active /proc vnodes.
589	 */
590	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
591		pnp = VTOP(vp);
592		switch (pnp->pr_type) {
593		case PR_PSINFO:		/* these files can read by anyone */
594		case PR_LPSINFO:
595		case PR_LWPSINFO:
596		case PR_LWPDIR:
597		case PR_LWPIDDIR:
598		case PR_USAGE:
599		case PR_LUSAGE:
600		case PR_LWPUSAGE:
601			break;
602		default:
603			pnp->pr_flags |= PR_INVAL;
604			break;
605		}
606	}
607	/*
608	 * Wake up anyone waiting for the process or lwp.
609	 * p->p_trace is guaranteed to be non-NULL if there
610	 * are any open /proc files for this process.
611	 */
612	if ((vp = p->p_trace) != NULL) {
613		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
614
615		prnotify(vp);
616		/*
617		 * Are there any writers?
618		 */
619		if ((writers = pcp->prc_writers) != 0) {
620			/*
621			 * Clear the exclusive open flag (old /proc interface).
622			 * Set prc_selfopens equal to prc_writers so that
623			 * the next O_EXCL|O_WRITE open will succeed
624			 * even with existing (though invalid) writers.
625			 * prclose() must decrement prc_selfopens when
626			 * the invalid files are closed.
627			 */
628			pcp->prc_flags &= ~PRC_EXCL;
629			ASSERT(pcp->prc_selfopens <= writers);
630			pcp->prc_selfopens = writers;
631		}
632	}
633	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
634	while (vp != NULL) {
635		/*
636		 * We should not invalidate the lwpiddir vnodes,
637		 * but the necessities of maintaining the old
638		 * ioctl()-based version of /proc require it.
639		 */
640		pnp = VTOP(vp);
641		pnp->pr_flags |= PR_INVAL;
642		prnotify(vp);
643		vp = pnp->pr_next;
644	}
645
646	/*
647	 * If any tracing flags are in effect and any vnodes are open for
648	 * writing then set the requested-stop and run-on-last-close flags.
649	 * Otherwise, clear all tracing flags.
650	 */
651	t->t_proc_flag &= ~TP_PAUSE;
652	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
653		t->t_proc_flag |= TP_PRSTOP;
654		aston(t);		/* so ISSIG will see the flag */
655		p->p_proc_flag |= P_PR_RUNLCL;
656	} else {
657		premptyset(&up->u_entrymask);		/* syscalls */
658		premptyset(&up->u_exitmask);
659		up->u_systrap = 0;
660		premptyset(&p->p_sigmask);		/* signals */
661		premptyset(&p->p_fltmask);		/* faults */
662		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
663		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
664		prnostep(ttolwp(t));
665	}
666
667	mutex_exit(&p->p_lock);
668}
669
670/*
671 * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
672 * Return with pr_pidlock held in all cases.
673 * Return with p_lock held if the the process still exists.
674 * Return value is the process pointer if the process still exists, else NULL.
675 * If we lock the process, give ourself kernel priority to avoid deadlocks;
676 * this is undone in prunlock().
677 */
678proc_t *
679pr_p_lock(prnode_t *pnp)
680{
681	proc_t *p;
682	prcommon_t *pcp;
683
684	mutex_enter(&pr_pidlock);
685	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
686		return (NULL);
687	mutex_enter(&p->p_lock);
688	while (p->p_proc_flag & P_PR_LOCK) {
689		/*
690		 * This cv/mutex pair is persistent even if
691		 * the process disappears while we sleep.
692		 */
693		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
694		kmutex_t *mp = &p->p_lock;
695
696		mutex_exit(&pr_pidlock);
697		cv_wait(cv, mp);
698		mutex_exit(mp);
699		mutex_enter(&pr_pidlock);
700		if (pcp->prc_proc == NULL)
701			return (NULL);
702		ASSERT(p == pcp->prc_proc);
703		mutex_enter(&p->p_lock);
704	}
705	p->p_proc_flag |= P_PR_LOCK;
706	THREAD_KPRI_REQUEST();
707	return (p);
708}
709
710/*
711 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
712 * This prevents any lwp of the process from disappearing and
713 * blocks most operations that a process can perform on itself.
714 * Returns 0 on success, a non-zero error number on failure.
715 *
716 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
717 * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
718 *
719 * error returns:
720 *	ENOENT: process or lwp has disappeared or process is exiting
721 *		(or has become a zombie and zdisp == ZNO).
722 *	EAGAIN: procfs vnode has become invalid.
723 *	EINTR:  signal arrived while waiting for exec to complete.
724 */
725int
726prlock(prnode_t *pnp, int zdisp)
727{
728	prcommon_t *pcp;
729	proc_t *p;
730
731again:
732	pcp = pnp->pr_common;
733	p = pr_p_lock(pnp);
734	mutex_exit(&pr_pidlock);
735
736	/*
737	 * Return ENOENT immediately if there is no process.
738	 */
739	if (p == NULL)
740		return (ENOENT);
741
742	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
743
744	/*
745	 * Return ENOENT if process entered zombie state or is exiting
746	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
747	 */
748	if (zdisp == ZNO &&
749	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
750		prunlock(pnp);
751		return (ENOENT);
752	}
753
754	/*
755	 * If lwp-specific, check to see if lwp has disappeared.
756	 */
757	if (pcp->prc_flags & PRC_LWP) {
758		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
759		    pcp->prc_tslot == -1) {
760			prunlock(pnp);
761			return (ENOENT);
762		}
763	}
764
765	/*
766	 * Return EAGAIN if we have encountered a security violation.
767	 * (The process exec'd a set-id or unreadable executable file.)
768	 */
769	if (pnp->pr_flags & PR_INVAL) {
770		prunlock(pnp);
771		return (EAGAIN);
772	}
773
774	/*
775	 * If process is undergoing an exec(), wait for
776	 * completion and then start all over again.
777	 */
778	if (p->p_proc_flag & P_PR_EXEC) {
779		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
780		mutex_enter(&pcp->prc_mutex);
781		prunlock(pnp);
782		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
783			mutex_exit(&pcp->prc_mutex);
784			return (EINTR);
785		}
786		mutex_exit(&pcp->prc_mutex);
787		goto again;
788	}
789
790	/*
791	 * We return holding p->p_lock.
792	 */
793	return (0);
794}
795
796/*
797 * Undo prlock() and pr_p_lock().
798 * p->p_lock is still held; pr_pidlock is no longer held.
799 *
800 * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
801 * if any, waiting for the flag to be dropped; it retains p->p_lock.
802 *
803 * prunlock() calls prunmark() and then drops p->p_lock.
804 */
805void
806prunmark(proc_t *p)
807{
808	ASSERT(p->p_proc_flag & P_PR_LOCK);
809	ASSERT(MUTEX_HELD(&p->p_lock));
810
811	cv_signal(&pr_pid_cv[p->p_slot]);
812	p->p_proc_flag &= ~P_PR_LOCK;
813	THREAD_KPRI_RELEASE();
814}
815
816void
817prunlock(prnode_t *pnp)
818{
819	prcommon_t *pcp = pnp->pr_common;
820	proc_t *p = pcp->prc_proc;
821
822	/*
823	 * If we (or someone) gave it a SIGKILL, and it is not
824	 * already a zombie, set it running unconditionally.
825	 */
826	if ((p->p_flag & SKILLED) &&
827	    !(p->p_flag & SEXITING) &&
828	    !(pcp->prc_flags & PRC_DESTROY) &&
829	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
830		(void) pr_setrun(pnp, 0);
831	prunmark(p);
832	mutex_exit(&p->p_lock);
833}
834
835/*
836 * Called while holding p->p_lock to delay until the process is unlocked.
837 * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
838 * The process cannot become locked again until p->p_lock is dropped.
839 */
840void
841prbarrier(proc_t *p)
842{
843	ASSERT(MUTEX_HELD(&p->p_lock));
844
845	if (p->p_proc_flag & P_PR_LOCK) {
846		/* The process is locked; delay until not locked */
847		uint_t slot = p->p_slot;
848
849		while (p->p_proc_flag & P_PR_LOCK)
850			cv_wait(&pr_pid_cv[slot], &p->p_lock);
851		cv_signal(&pr_pid_cv[slot]);
852	}
853}
854
855/*
856 * Return process/lwp status.
857 * The u-block is mapped in by this routine and unmapped at the end.
858 */
859void
860prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
861{
862	kthread_t *t;
863
864	ASSERT(MUTEX_HELD(&p->p_lock));
865
866	t = prchoose(p);	/* returns locked thread */
867	ASSERT(t != NULL);
868	thread_unlock(t);
869
870	/* just bzero the process part, prgetlwpstatus() does the rest */
871	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
872	sp->pr_nlwp = p->p_lwpcnt;
873	sp->pr_nzomb = p->p_zombcnt;
874	prassignset(&sp->pr_sigpend, &p->p_sig);
875	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
876	sp->pr_brksize = p->p_brksize;
877	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
878	sp->pr_stksize = p->p_stksize;
879	sp->pr_pid = p->p_pid;
880	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
881	    (p->p_flag & SZONETOP)) {
882		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
883		/*
884		 * Inside local zones, fake zsched's pid as parent pids for
885		 * processes which reference processes outside of the zone.
886		 */
887		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
888	} else {
889		sp->pr_ppid = p->p_ppid;
890	}
891	sp->pr_pgid  = p->p_pgrp;
892	sp->pr_sid   = p->p_sessp->s_sid;
893	sp->pr_taskid = p->p_task->tk_tkid;
894	sp->pr_projid = p->p_task->tk_proj->kpj_id;
895	sp->pr_zoneid = p->p_zone->zone_id;
896	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
897	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
898	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
899	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
900	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
901	prassignset(&sp->pr_flttrace, &p->p_fltmask);
902	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
903	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
904	switch (p->p_model) {
905	case DATAMODEL_ILP32:
906		sp->pr_dmodel = PR_MODEL_ILP32;
907		break;
908	case DATAMODEL_LP64:
909		sp->pr_dmodel = PR_MODEL_LP64;
910		break;
911	}
912	if (p->p_agenttp)
913		sp->pr_agentid = p->p_agenttp->t_tid;
914
915	/* get the chosen lwp's status */
916	prgetlwpstatus(t, &sp->pr_lwp, zp);
917
918	/* replicate the flags */
919	sp->pr_flags = sp->pr_lwp.pr_flags;
920}
921
922#ifdef _SYSCALL32_IMPL
923void
924prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
925{
926	proc_t *p = ttoproc(t);
927	klwp_t *lwp = ttolwp(t);
928	struct mstate *ms = &lwp->lwp_mstate;
929	hrtime_t usr, sys;
930	int flags;
931	ulong_t instr;
932
933	ASSERT(MUTEX_HELD(&p->p_lock));
934
935	bzero(sp, sizeof (*sp));
936	flags = 0L;
937	if (t->t_state == TS_STOPPED) {
938		flags |= PR_STOPPED;
939		if ((t->t_schedflag & TS_PSTART) == 0)
940			flags |= PR_ISTOP;
941	} else if (VSTOPPED(t)) {
942		flags |= PR_STOPPED|PR_ISTOP;
943	}
944	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
945		flags |= PR_DSTOP;
946	if (lwp->lwp_asleep)
947		flags |= PR_ASLEEP;
948	if (t == p->p_agenttp)
949		flags |= PR_AGENT;
950	if (!(t->t_proc_flag & TP_TWAIT))
951		flags |= PR_DETACH;
952	if (t->t_proc_flag & TP_DAEMON)
953		flags |= PR_DAEMON;
954	if (p->p_proc_flag & P_PR_FORK)
955		flags |= PR_FORK;
956	if (p->p_proc_flag & P_PR_RUNLCL)
957		flags |= PR_RLC;
958	if (p->p_proc_flag & P_PR_KILLCL)
959		flags |= PR_KLC;
960	if (p->p_proc_flag & P_PR_ASYNC)
961		flags |= PR_ASYNC;
962	if (p->p_proc_flag & P_PR_BPTADJ)
963		flags |= PR_BPTADJ;
964	if (p->p_proc_flag & P_PR_PTRACE)
965		flags |= PR_PTRACE;
966	if (p->p_flag & SMSACCT)
967		flags |= PR_MSACCT;
968	if (p->p_flag & SMSFORK)
969		flags |= PR_MSFORK;
970	if (p->p_flag & SVFWAIT)
971		flags |= PR_VFORKP;
972	sp->pr_flags = flags;
973	if (VSTOPPED(t)) {
974		sp->pr_why   = PR_REQUESTED;
975		sp->pr_what  = 0;
976	} else {
977		sp->pr_why   = t->t_whystop;
978		sp->pr_what  = t->t_whatstop;
979	}
980	sp->pr_lwpid = t->t_tid;
981	sp->pr_cursig  = lwp->lwp_cursig;
982	prassignset(&sp->pr_lwppend, &t->t_sig);
983	schedctl_finish_sigblock(t);
984	prassignset(&sp->pr_lwphold, &t->t_hold);
985	if (t->t_whystop == PR_FAULTED) {
986		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
987		if (t->t_whatstop == FLTPAGE)
988			sp->pr_info.si_addr =
989			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
990	} else if (lwp->lwp_curinfo)
991		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
992	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
993	    sp->pr_info.si_zoneid != zp->zone_id) {
994		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
995		sp->pr_info.si_uid = 0;
996		sp->pr_info.si_ctid = -1;
997		sp->pr_info.si_zoneid = zp->zone_id;
998	}
999	sp->pr_altstack.ss_sp =
1000	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1001	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1002	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1003	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1004	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1005	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1006	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1007	    sizeof (sp->pr_clname) - 1);
1008	if (flags & PR_STOPPED)
1009		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1010	usr = ms->ms_acct[LMS_USER];
1011	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1012	scalehrtime(&usr);
1013	scalehrtime(&sys);
1014	hrt2ts32(usr, &sp->pr_utime);
1015	hrt2ts32(sys, &sp->pr_stime);
1016
1017	/*
1018	 * Fetch the current instruction, if not a system process.
1019	 * We don't attempt this unless the lwp is stopped.
1020	 */
1021	if ((p->p_flag & SSYS) || p->p_as == &kas)
1022		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1023	else if (!(flags & PR_STOPPED))
1024		sp->pr_flags |= PR_PCINVAL;
1025	else if (!prfetchinstr(lwp, &instr))
1026		sp->pr_flags |= PR_PCINVAL;
1027	else
1028		sp->pr_instr = (uint32_t)instr;
1029
1030	/*
1031	 * Drop p_lock while touching the lwp's stack.
1032	 */
1033	mutex_exit(&p->p_lock);
1034	if (prisstep(lwp))
1035		sp->pr_flags |= PR_STEP;
1036	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1037		int i;
1038
1039		sp->pr_syscall = get_syscall32_args(lwp,
1040		    (int *)sp->pr_sysarg, &i);
1041		sp->pr_nsysarg = (ushort_t)i;
1042	}
1043	if ((flags & PR_STOPPED) || t == curthread)
1044		prgetprregs32(lwp, sp->pr_reg);
1045	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1046	    (flags & PR_VFORKP)) {
1047		long r1, r2;
1048		user_t *up;
1049		auxv_t *auxp;
1050		int i;
1051
1052		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1053		if (sp->pr_errno == 0) {
1054			sp->pr_rval1 = (int32_t)r1;
1055			sp->pr_rval2 = (int32_t)r2;
1056			sp->pr_errpriv = PRIV_NONE;
1057		} else
1058			sp->pr_errpriv = lwp->lwp_badpriv;
1059
1060		if (t->t_sysnum == SYS_execve) {
1061			up = PTOU(p);
1062			sp->pr_sysarg[0] = 0;
1063			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1064			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1065			for (i = 0, auxp = up->u_auxv;
1066			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1067			    i++, auxp++) {
1068				if (auxp->a_type == AT_SUN_EXECNAME) {
1069					sp->pr_sysarg[0] =
1070					    (caddr32_t)
1071					    (uintptr_t)auxp->a_un.a_ptr;
1072					break;
1073				}
1074			}
1075		}
1076	}
1077	if (prhasfp())
1078		prgetprfpregs32(lwp, &sp->pr_fpreg);
1079	mutex_enter(&p->p_lock);
1080}
1081
1082void
1083prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1084{
1085	kthread_t *t;
1086
1087	ASSERT(MUTEX_HELD(&p->p_lock));
1088
1089	t = prchoose(p);	/* returns locked thread */
1090	ASSERT(t != NULL);
1091	thread_unlock(t);
1092
1093	/* just bzero the process part, prgetlwpstatus32() does the rest */
1094	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1095	sp->pr_nlwp = p->p_lwpcnt;
1096	sp->pr_nzomb = p->p_zombcnt;
1097	prassignset(&sp->pr_sigpend, &p->p_sig);
1098	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1099	sp->pr_brksize = (uint32_t)p->p_brksize;
1100	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1101	sp->pr_stksize = (uint32_t)p->p_stksize;
1102	sp->pr_pid   = p->p_pid;
1103	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1104	    (p->p_flag & SZONETOP)) {
1105		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1106		/*
1107		 * Inside local zones, fake zsched's pid as parent pids for
1108		 * processes which reference processes outside of the zone.
1109		 */
1110		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1111	} else {
1112		sp->pr_ppid = p->p_ppid;
1113	}
1114	sp->pr_pgid  = p->p_pgrp;
1115	sp->pr_sid   = p->p_sessp->s_sid;
1116	sp->pr_taskid = p->p_task->tk_tkid;
1117	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1118	sp->pr_zoneid = p->p_zone->zone_id;
1119	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1120	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1121	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1122	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1123	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1124	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1125	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1126	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1127	switch (p->p_model) {
1128	case DATAMODEL_ILP32:
1129		sp->pr_dmodel = PR_MODEL_ILP32;
1130		break;
1131	case DATAMODEL_LP64:
1132		sp->pr_dmodel = PR_MODEL_LP64;
1133		break;
1134	}
1135	if (p->p_agenttp)
1136		sp->pr_agentid = p->p_agenttp->t_tid;
1137
1138	/* get the chosen lwp's status */
1139	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1140
1141	/* replicate the flags */
1142	sp->pr_flags = sp->pr_lwp.pr_flags;
1143}
1144#endif	/* _SYSCALL32_IMPL */
1145
1146/*
1147 * Return lwp status.
1148 */
1149void
1150prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1151{
1152	proc_t *p = ttoproc(t);
1153	klwp_t *lwp = ttolwp(t);
1154	struct mstate *ms = &lwp->lwp_mstate;
1155	hrtime_t usr, sys;
1156	int flags;
1157	ulong_t instr;
1158
1159	ASSERT(MUTEX_HELD(&p->p_lock));
1160
1161	bzero(sp, sizeof (*sp));
1162	flags = 0L;
1163	if (t->t_state == TS_STOPPED) {
1164		flags |= PR_STOPPED;
1165		if ((t->t_schedflag & TS_PSTART) == 0)
1166			flags |= PR_ISTOP;
1167	} else if (VSTOPPED(t)) {
1168		flags |= PR_STOPPED|PR_ISTOP;
1169	}
1170	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1171		flags |= PR_DSTOP;
1172	if (lwp->lwp_asleep)
1173		flags |= PR_ASLEEP;
1174	if (t == p->p_agenttp)
1175		flags |= PR_AGENT;
1176	if (!(t->t_proc_flag & TP_TWAIT))
1177		flags |= PR_DETACH;
1178	if (t->t_proc_flag & TP_DAEMON)
1179		flags |= PR_DAEMON;
1180	if (p->p_proc_flag & P_PR_FORK)
1181		flags |= PR_FORK;
1182	if (p->p_proc_flag & P_PR_RUNLCL)
1183		flags |= PR_RLC;
1184	if (p->p_proc_flag & P_PR_KILLCL)
1185		flags |= PR_KLC;
1186	if (p->p_proc_flag & P_PR_ASYNC)
1187		flags |= PR_ASYNC;
1188	if (p->p_proc_flag & P_PR_BPTADJ)
1189		flags |= PR_BPTADJ;
1190	if (p->p_proc_flag & P_PR_PTRACE)
1191		flags |= PR_PTRACE;
1192	if (p->p_flag & SMSACCT)
1193		flags |= PR_MSACCT;
1194	if (p->p_flag & SMSFORK)
1195		flags |= PR_MSFORK;
1196	if (p->p_flag & SVFWAIT)
1197		flags |= PR_VFORKP;
1198	if (p->p_pgidp->pid_pgorphaned)
1199		flags |= PR_ORPHAN;
1200	if (p->p_pidflag & CLDNOSIGCHLD)
1201		flags |= PR_NOSIGCHLD;
1202	if (p->p_pidflag & CLDWAITPID)
1203		flags |= PR_WAITPID;
1204	sp->pr_flags = flags;
1205	if (VSTOPPED(t)) {
1206		sp->pr_why   = PR_REQUESTED;
1207		sp->pr_what  = 0;
1208	} else {
1209		sp->pr_why   = t->t_whystop;
1210		sp->pr_what  = t->t_whatstop;
1211	}
1212	sp->pr_lwpid = t->t_tid;
1213	sp->pr_cursig  = lwp->lwp_cursig;
1214	prassignset(&sp->pr_lwppend, &t->t_sig);
1215	schedctl_finish_sigblock(t);
1216	prassignset(&sp->pr_lwphold, &t->t_hold);
1217	if (t->t_whystop == PR_FAULTED)
1218		bcopy(&lwp->lwp_siginfo,
1219		    &sp->pr_info, sizeof (k_siginfo_t));
1220	else if (lwp->lwp_curinfo)
1221		bcopy(&lwp->lwp_curinfo->sq_info,
1222		    &sp->pr_info, sizeof (k_siginfo_t));
1223	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1224	    sp->pr_info.si_zoneid != zp->zone_id) {
1225		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1226		sp->pr_info.si_uid = 0;
1227		sp->pr_info.si_ctid = -1;
1228		sp->pr_info.si_zoneid = zp->zone_id;
1229	}
1230	sp->pr_altstack = lwp->lwp_sigaltstack;
1231	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1232	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1233	sp->pr_ustack = lwp->lwp_ustack;
1234	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1235	    sizeof (sp->pr_clname) - 1);
1236	if (flags & PR_STOPPED)
1237		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1238	usr = ms->ms_acct[LMS_USER];
1239	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1240	scalehrtime(&usr);
1241	scalehrtime(&sys);
1242	hrt2ts(usr, &sp->pr_utime);
1243	hrt2ts(sys, &sp->pr_stime);
1244
1245	/*
1246	 * Fetch the current instruction, if not a system process.
1247	 * We don't attempt this unless the lwp is stopped.
1248	 */
1249	if ((p->p_flag & SSYS) || p->p_as == &kas)
1250		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1251	else if (!(flags & PR_STOPPED))
1252		sp->pr_flags |= PR_PCINVAL;
1253	else if (!prfetchinstr(lwp, &instr))
1254		sp->pr_flags |= PR_PCINVAL;
1255	else
1256		sp->pr_instr = instr;
1257
1258	/*
1259	 * Drop p_lock while touching the lwp's stack.
1260	 */
1261	mutex_exit(&p->p_lock);
1262	if (prisstep(lwp))
1263		sp->pr_flags |= PR_STEP;
1264	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1265		int i;
1266
1267		sp->pr_syscall = get_syscall_args(lwp,
1268		    (long *)sp->pr_sysarg, &i);
1269		sp->pr_nsysarg = (ushort_t)i;
1270	}
1271	if ((flags & PR_STOPPED) || t == curthread)
1272		prgetprregs(lwp, sp->pr_reg);
1273	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1274	    (flags & PR_VFORKP)) {
1275		user_t *up;
1276		auxv_t *auxp;
1277		int i;
1278
1279		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1280		if (sp->pr_errno == 0)
1281			sp->pr_errpriv = PRIV_NONE;
1282		else
1283			sp->pr_errpriv = lwp->lwp_badpriv;
1284
1285		if (t->t_sysnum == SYS_execve) {
1286			up = PTOU(p);
1287			sp->pr_sysarg[0] = 0;
1288			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1289			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1290			for (i = 0, auxp = up->u_auxv;
1291			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1292			    i++, auxp++) {
1293				if (auxp->a_type == AT_SUN_EXECNAME) {
1294					sp->pr_sysarg[0] =
1295					    (uintptr_t)auxp->a_un.a_ptr;
1296					break;
1297				}
1298			}
1299		}
1300	}
1301	if (prhasfp())
1302		prgetprfpregs(lwp, &sp->pr_fpreg);
1303	mutex_enter(&p->p_lock);
1304}
1305
1306/*
1307 * Get the sigaction structure for the specified signal.  The u-block
1308 * must already have been mapped in by the caller.
1309 */
1310void
1311prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1312{
1313	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1314
1315	bzero(sp, sizeof (*sp));
1316
1317	if (sig != 0 && (unsigned)sig < nsig) {
1318		sp->sa_handler = up->u_signal[sig-1];
1319		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1320		if (sigismember(&up->u_sigonstack, sig))
1321			sp->sa_flags |= SA_ONSTACK;
1322		if (sigismember(&up->u_sigresethand, sig))
1323			sp->sa_flags |= SA_RESETHAND;
1324		if (sigismember(&up->u_sigrestart, sig))
1325			sp->sa_flags |= SA_RESTART;
1326		if (sigismember(&p->p_siginfo, sig))
1327			sp->sa_flags |= SA_SIGINFO;
1328		if (sigismember(&up->u_signodefer, sig))
1329			sp->sa_flags |= SA_NODEFER;
1330		if (sig == SIGCLD) {
1331			if (p->p_flag & SNOWAIT)
1332				sp->sa_flags |= SA_NOCLDWAIT;
1333			if ((p->p_flag & SJCTL) == 0)
1334				sp->sa_flags |= SA_NOCLDSTOP;
1335		}
1336	}
1337}
1338
1339#ifdef _SYSCALL32_IMPL
1340void
1341prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1342{
1343	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1344
1345	bzero(sp, sizeof (*sp));
1346
1347	if (sig != 0 && (unsigned)sig < nsig) {
1348		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1349		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1350		if (sigismember(&up->u_sigonstack, sig))
1351			sp->sa_flags |= SA_ONSTACK;
1352		if (sigismember(&up->u_sigresethand, sig))
1353			sp->sa_flags |= SA_RESETHAND;
1354		if (sigismember(&up->u_sigrestart, sig))
1355			sp->sa_flags |= SA_RESTART;
1356		if (sigismember(&p->p_siginfo, sig))
1357			sp->sa_flags |= SA_SIGINFO;
1358		if (sigismember(&up->u_signodefer, sig))
1359			sp->sa_flags |= SA_NODEFER;
1360		if (sig == SIGCLD) {
1361			if (p->p_flag & SNOWAIT)
1362				sp->sa_flags |= SA_NOCLDWAIT;
1363			if ((p->p_flag & SJCTL) == 0)
1364				sp->sa_flags |= SA_NOCLDSTOP;
1365		}
1366	}
1367}
1368#endif	/* _SYSCALL32_IMPL */
1369
1370/*
1371 * Count the number of segments in this process's address space.
1372 */
1373int
1374prnsegs(struct as *as, int reserved)
1375{
1376	int n = 0;
1377	struct seg *seg;
1378
1379	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1380
1381	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1382		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1383		caddr_t saddr, naddr;
1384		void *tmp = NULL;
1385
1386		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1387			(void) pr_getprot(seg, reserved, &tmp,
1388			    &saddr, &naddr, eaddr);
1389			if (saddr != naddr)
1390				n++;
1391		}
1392
1393		ASSERT(tmp == NULL);
1394	}
1395
1396	return (n);
1397}
1398
1399/*
1400 * Convert uint32_t to decimal string w/o leading zeros.
1401 * Add trailing null characters if 'len' is greater than string length.
1402 * Return the string length.
1403 */
1404int
1405pr_u32tos(uint32_t n, char *s, int len)
1406{
1407	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1408	char *cp = cbuf;
1409	char *end = s + len;
1410
1411	do {
1412		*cp++ = (char)(n % 10 + '0');
1413		n /= 10;
1414	} while (n);
1415
1416	len = (int)(cp - cbuf);
1417
1418	do {
1419		*s++ = *--cp;
1420	} while (cp > cbuf);
1421
1422	while (s < end)		/* optional pad */
1423		*s++ = '\0';
1424
1425	return (len);
1426}
1427
1428/*
1429 * Convert uint64_t to decimal string w/o leading zeros.
1430 * Return the string length.
1431 */
1432static int
1433pr_u64tos(uint64_t n, char *s)
1434{
1435	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1436	char *cp = cbuf;
1437	int len;
1438
1439	do {
1440		*cp++ = (char)(n % 10 + '0');
1441		n /= 10;
1442	} while (n);
1443
1444	len = (int)(cp - cbuf);
1445
1446	do {
1447		*s++ = *--cp;
1448	} while (cp > cbuf);
1449
1450	return (len);
1451}
1452
1453void
1454pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1455{
1456	char *s = name;
1457	struct vfs *vfsp;
1458	struct vfssw *vfsswp;
1459
1460	if ((vfsp = vp->v_vfsp) != NULL &&
1461	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1462	    *vfsswp->vsw_name) {
1463		(void) strcpy(s, vfsswp->vsw_name);
1464		s += strlen(s);
1465		*s++ = '.';
1466	}
1467	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1468	*s++ = '.';
1469	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1470	*s++ = '.';
1471	s += pr_u64tos(vattr->va_nodeid, s);
1472	*s++ = '\0';
1473}
1474
1475struct seg *
1476break_seg(proc_t *p)
1477{
1478	caddr_t addr = p->p_brkbase;
1479	struct seg *seg;
1480	struct vnode *vp;
1481
1482	if (p->p_brksize != 0)
1483		addr += p->p_brksize - 1;
1484	seg = as_segat(p->p_as, addr);
1485	if (seg != NULL && seg->s_ops == &segvn_ops &&
1486	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1487		return (seg);
1488	return (NULL);
1489}
1490
1491/*
1492 * Implementation of service functions to handle procfs generic chained
1493 * copyout buffers.
1494 */
1495typedef struct pr_iobuf_list {
1496	list_node_t	piol_link;	/* buffer linkage */
1497	size_t		piol_size;	/* total size (header + data) */
1498	size_t		piol_usedsize;	/* amount to copy out from this buf */
1499} piol_t;
1500
1501#define	MAPSIZE	(64 * 1024)
1502#define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1503
1504void
1505pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1506{
1507	piol_t	*iol;
1508	size_t	initial_size = MIN(1, n) * itemsize;
1509
1510	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1511
1512	ASSERT(list_head(iolhead) == NULL);
1513	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1514	ASSERT(initial_size > 0);
1515
1516	/*
1517	 * Someone creating chained copyout buffers may ask for less than
1518	 * MAPSIZE if the amount of data to be buffered is known to be
1519	 * smaller than that.
1520	 * But in order to prevent involuntary self-denial of service,
1521	 * the requested input size is clamped at MAPSIZE.
1522	 */
1523	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1524	iol = kmem_alloc(initial_size, KM_SLEEP);
1525	list_insert_head(iolhead, iol);
1526	iol->piol_usedsize = 0;
1527	iol->piol_size = initial_size;
1528}
1529
1530void *
1531pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1532{
1533	piol_t	*iol;
1534	char	*new;
1535
1536	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1537	ASSERT(list_head(iolhead) != NULL);
1538
1539	iol = (piol_t *)list_tail(iolhead);
1540
1541	if (iol->piol_size <
1542	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1543		/*
1544		 * Out of space in the current buffer. Allocate more.
1545		 */
1546		piol_t *newiol;
1547
1548		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1549		newiol->piol_size = MAPSIZE;
1550		newiol->piol_usedsize = 0;
1551
1552		list_insert_after(iolhead, iol, newiol);
1553		iol = list_next(iolhead, iol);
1554		ASSERT(iol == newiol);
1555	}
1556	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1557	iol->piol_usedsize += itemsize;
1558	bzero(new, itemsize);
1559	return (new);
1560}
1561
1562int
1563pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1564{
1565	int error = errin;
1566	piol_t	*iol;
1567
1568	while ((iol = list_head(iolhead)) != NULL) {
1569		list_remove(iolhead, iol);
1570		if (!error) {
1571			if (copyout(PIOL_DATABUF(iol), *tgt,
1572			    iol->piol_usedsize))
1573				error = EFAULT;
1574			*tgt += iol->piol_usedsize;
1575		}
1576		kmem_free(iol, iol->piol_size);
1577	}
1578	list_destroy(iolhead);
1579
1580	return (error);
1581}
1582
1583int
1584pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1585{
1586	offset_t	off = uiop->uio_offset;
1587	char		*base;
1588	size_t		size;
1589	piol_t		*iol;
1590	int		error = errin;
1591
1592	while ((iol = list_head(iolhead)) != NULL) {
1593		list_remove(iolhead, iol);
1594		base = PIOL_DATABUF(iol);
1595		size = iol->piol_usedsize;
1596		if (off <= size && error == 0 && uiop->uio_resid > 0)
1597			error = uiomove(base + off, size - off,
1598			    UIO_READ, uiop);
1599		off = MAX(0, off - (offset_t)size);
1600		kmem_free(iol, iol->piol_size);
1601	}
1602	list_destroy(iolhead);
1603
1604	return (error);
1605}
1606
1607/*
1608 * Return an array of structures with memory map information.
1609 * We allocate here; the caller must deallocate.
1610 */
1611int
1612prgetmap(proc_t *p, int reserved, list_t *iolhead)
1613{
1614	struct as *as = p->p_as;
1615	prmap_t *mp;
1616	struct seg *seg;
1617	struct seg *brkseg, *stkseg;
1618	struct vnode *vp;
1619	struct vattr vattr;
1620	uint_t prot;
1621
1622	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1623
1624	/*
1625	 * Request an initial buffer size that doesn't waste memory
1626	 * if the address space has only a small number of segments.
1627	 */
1628	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1629
1630	if ((seg = AS_SEGFIRST(as)) == NULL)
1631		return (0);
1632
1633	brkseg = break_seg(p);
1634	stkseg = as_segat(as, prgetstackbase(p));
1635
1636	do {
1637		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1638		caddr_t saddr, naddr;
1639		void *tmp = NULL;
1640
1641		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1642			prot = pr_getprot(seg, reserved, &tmp,
1643			    &saddr, &naddr, eaddr);
1644			if (saddr == naddr)
1645				continue;
1646
1647			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1648
1649			mp->pr_vaddr = (uintptr_t)saddr;
1650			mp->pr_size = naddr - saddr;
1651			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1652			mp->pr_mflags = 0;
1653			if (prot & PROT_READ)
1654				mp->pr_mflags |= MA_READ;
1655			if (prot & PROT_WRITE)
1656				mp->pr_mflags |= MA_WRITE;
1657			if (prot & PROT_EXEC)
1658				mp->pr_mflags |= MA_EXEC;
1659			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1660				mp->pr_mflags |= MA_SHARED;
1661			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1662				mp->pr_mflags |= MA_NORESERVE;
1663			if (seg->s_ops == &segspt_shmops ||
1664			    (seg->s_ops == &segvn_ops &&
1665			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1666				mp->pr_mflags |= MA_ANON;
1667			if (seg == brkseg)
1668				mp->pr_mflags |= MA_BREAK;
1669			else if (seg == stkseg) {
1670				mp->pr_mflags |= MA_STACK;
1671				if (reserved) {
1672					size_t maxstack =
1673					    ((size_t)p->p_stk_ctl +
1674					    PAGEOFFSET) & PAGEMASK;
1675					mp->pr_vaddr =
1676					    (uintptr_t)prgetstackbase(p) +
1677					    p->p_stksize - maxstack;
1678					mp->pr_size = (uintptr_t)naddr -
1679					    mp->pr_vaddr;
1680				}
1681			}
1682			if (seg->s_ops == &segspt_shmops)
1683				mp->pr_mflags |= MA_ISM | MA_SHM;
1684			mp->pr_pagesize = PAGESIZE;
1685
1686			/*
1687			 * Manufacture a filename for the "object" directory.
1688			 */
1689			vattr.va_mask = AT_FSID|AT_NODEID;
1690			if (seg->s_ops == &segvn_ops &&
1691			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1692			    vp != NULL && vp->v_type == VREG &&
1693			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1694				if (vp == p->p_exec)
1695					(void) strcpy(mp->pr_mapname, "a.out");
1696				else
1697					pr_object_name(mp->pr_mapname,
1698					    vp, &vattr);
1699			}
1700
1701			/*
1702			 * Get the SysV shared memory id, if any.
1703			 */
1704			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1705			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1706			    SHMID_NONE) {
1707				if (mp->pr_shmid == SHMID_FREE)
1708					mp->pr_shmid = -1;
1709
1710				mp->pr_mflags |= MA_SHM;
1711			} else {
1712				mp->pr_shmid = -1;
1713			}
1714		}
1715		ASSERT(tmp == NULL);
1716	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1717
1718	return (0);
1719}
1720
1721#ifdef _SYSCALL32_IMPL
1722int
1723prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1724{
1725	struct as *as = p->p_as;
1726	prmap32_t *mp;
1727	struct seg *seg;
1728	struct seg *brkseg, *stkseg;
1729	struct vnode *vp;
1730	struct vattr vattr;
1731	uint_t prot;
1732
1733	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1734
1735	/*
1736	 * Request an initial buffer size that doesn't waste memory
1737	 * if the address space has only a small number of segments.
1738	 */
1739	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1740
1741	if ((seg = AS_SEGFIRST(as)) == NULL)
1742		return (0);
1743
1744	brkseg = break_seg(p);
1745	stkseg = as_segat(as, prgetstackbase(p));
1746
1747	do {
1748		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1749		caddr_t saddr, naddr;
1750		void *tmp = NULL;
1751
1752		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1753			prot = pr_getprot(seg, reserved, &tmp,
1754			    &saddr, &naddr, eaddr);
1755			if (saddr == naddr)
1756				continue;
1757
1758			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1759
1760			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1761			mp->pr_size = (size32_t)(naddr - saddr);
1762			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1763			mp->pr_mflags = 0;
1764			if (prot & PROT_READ)
1765				mp->pr_mflags |= MA_READ;
1766			if (prot & PROT_WRITE)
1767				mp->pr_mflags |= MA_WRITE;
1768			if (prot & PROT_EXEC)
1769				mp->pr_mflags |= MA_EXEC;
1770			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1771				mp->pr_mflags |= MA_SHARED;
1772			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1773				mp->pr_mflags |= MA_NORESERVE;
1774			if (seg->s_ops == &segspt_shmops ||
1775			    (seg->s_ops == &segvn_ops &&
1776			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1777				mp->pr_mflags |= MA_ANON;
1778			if (seg == brkseg)
1779				mp->pr_mflags |= MA_BREAK;
1780			else if (seg == stkseg) {
1781				mp->pr_mflags |= MA_STACK;
1782				if (reserved) {
1783					size_t maxstack =
1784					    ((size_t)p->p_stk_ctl +
1785					    PAGEOFFSET) & PAGEMASK;
1786					uintptr_t vaddr =
1787					    (uintptr_t)prgetstackbase(p) +
1788					    p->p_stksize - maxstack;
1789					mp->pr_vaddr = (caddr32_t)vaddr;
1790					mp->pr_size = (size32_t)
1791					    ((uintptr_t)naddr - vaddr);
1792				}
1793			}
1794			if (seg->s_ops == &segspt_shmops)
1795				mp->pr_mflags |= MA_ISM | MA_SHM;
1796			mp->pr_pagesize = PAGESIZE;
1797
1798			/*
1799			 * Manufacture a filename for the "object" directory.
1800			 */
1801			vattr.va_mask = AT_FSID|AT_NODEID;
1802			if (seg->s_ops == &segvn_ops &&
1803			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1804			    vp != NULL && vp->v_type == VREG &&
1805			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1806				if (vp == p->p_exec)
1807					(void) strcpy(mp->pr_mapname, "a.out");
1808				else
1809					pr_object_name(mp->pr_mapname,
1810					    vp, &vattr);
1811			}
1812
1813			/*
1814			 * Get the SysV shared memory id, if any.
1815			 */
1816			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1817			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1818			    SHMID_NONE) {
1819				if (mp->pr_shmid == SHMID_FREE)
1820					mp->pr_shmid = -1;
1821
1822				mp->pr_mflags |= MA_SHM;
1823			} else {
1824				mp->pr_shmid = -1;
1825			}
1826		}
1827		ASSERT(tmp == NULL);
1828	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1829
1830	return (0);
1831}
1832#endif	/* _SYSCALL32_IMPL */
1833
1834/*
1835 * Return the size of the /proc page data file.
1836 */
1837size_t
1838prpdsize(struct as *as)
1839{
1840	struct seg *seg;
1841	size_t size;
1842
1843	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1844
1845	if ((seg = AS_SEGFIRST(as)) == NULL)
1846		return (0);
1847
1848	size = sizeof (prpageheader_t);
1849	do {
1850		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1851		caddr_t saddr, naddr;
1852		void *tmp = NULL;
1853		size_t npage;
1854
1855		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1856			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1857			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1858				size += sizeof (prasmap_t) + round8(npage);
1859		}
1860		ASSERT(tmp == NULL);
1861	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1862
1863	return (size);
1864}
1865
1866#ifdef _SYSCALL32_IMPL
1867size_t
1868prpdsize32(struct as *as)
1869{
1870	struct seg *seg;
1871	size_t size;
1872
1873	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1874
1875	if ((seg = AS_SEGFIRST(as)) == NULL)
1876		return (0);
1877
1878	size = sizeof (prpageheader32_t);
1879	do {
1880		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1881		caddr_t saddr, naddr;
1882		void *tmp = NULL;
1883		size_t npage;
1884
1885		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1886			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1887			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1888				size += sizeof (prasmap32_t) + round8(npage);
1889		}
1890		ASSERT(tmp == NULL);
1891	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1892
1893	return (size);
1894}
1895#endif	/* _SYSCALL32_IMPL */
1896
1897/*
1898 * Read page data information.
1899 */
1900int
1901prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1902{
1903	struct as *as = p->p_as;
1904	caddr_t buf;
1905	size_t size;
1906	prpageheader_t *php;
1907	prasmap_t *pmp;
1908	struct seg *seg;
1909	int error;
1910
1911again:
1912	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1913
1914	if ((seg = AS_SEGFIRST(as)) == NULL) {
1915		AS_LOCK_EXIT(as, &as->a_lock);
1916		return (0);
1917	}
1918	size = prpdsize(as);
1919	if (uiop->uio_resid < size) {
1920		AS_LOCK_EXIT(as, &as->a_lock);
1921		return (E2BIG);
1922	}
1923
1924	buf = kmem_zalloc(size, KM_SLEEP);
1925	php = (prpageheader_t *)buf;
1926	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1927
1928	hrt2ts(gethrtime(), &php->pr_tstamp);
1929	php->pr_nmap = 0;
1930	php->pr_npage = 0;
1931	do {
1932		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1933		caddr_t saddr, naddr;
1934		void *tmp = NULL;
1935
1936		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1937			struct vnode *vp;
1938			struct vattr vattr;
1939			size_t len;
1940			size_t npage;
1941			uint_t prot;
1942			uintptr_t next;
1943
1944			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1945			if ((len = (size_t)(naddr - saddr)) == 0)
1946				continue;
1947			npage = len / PAGESIZE;
1948			next = (uintptr_t)(pmp + 1) + round8(npage);
1949			/*
1950			 * It's possible that the address space can change
1951			 * subtlely even though we're holding as->a_lock
1952			 * due to the nondeterminism of page_exists() in
1953			 * the presence of asychronously flushed pages or
1954			 * mapped files whose sizes are changing.
1955			 * page_exists() may be called indirectly from
1956			 * pr_getprot() by a SEGOP_INCORE() routine.
1957			 * If this happens we need to make sure we don't
1958			 * overrun the buffer whose size we computed based
1959			 * on the initial iteration through the segments.
1960			 * Once we've detected an overflow, we need to clean
1961			 * up the temporary memory allocated in pr_getprot()
1962			 * and retry. If there's a pending signal, we return
1963			 * EINTR so that this thread can be dislodged if
1964			 * a latent bug causes us to spin indefinitely.
1965			 */
1966			if (next > (uintptr_t)buf + size) {
1967				pr_getprot_done(&tmp);
1968				AS_LOCK_EXIT(as, &as->a_lock);
1969
1970				kmem_free(buf, size);
1971
1972				if (ISSIG(curthread, JUSTLOOKING))
1973					return (EINTR);
1974
1975				goto again;
1976			}
1977
1978			php->pr_nmap++;
1979			php->pr_npage += npage;
1980			pmp->pr_vaddr = (uintptr_t)saddr;
1981			pmp->pr_npage = npage;
1982			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1983			pmp->pr_mflags = 0;
1984			if (prot & PROT_READ)
1985				pmp->pr_mflags |= MA_READ;
1986			if (prot & PROT_WRITE)
1987				pmp->pr_mflags |= MA_WRITE;
1988			if (prot & PROT_EXEC)
1989				pmp->pr_mflags |= MA_EXEC;
1990			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1991				pmp->pr_mflags |= MA_SHARED;
1992			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1993				pmp->pr_mflags |= MA_NORESERVE;
1994			if (seg->s_ops == &segspt_shmops ||
1995			    (seg->s_ops == &segvn_ops &&
1996			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1997				pmp->pr_mflags |= MA_ANON;
1998			if (seg->s_ops == &segspt_shmops)
1999				pmp->pr_mflags |= MA_ISM | MA_SHM;
2000			pmp->pr_pagesize = PAGESIZE;
2001			/*
2002			 * Manufacture a filename for the "object" directory.
2003			 */
2004			vattr.va_mask = AT_FSID|AT_NODEID;
2005			if (seg->s_ops == &segvn_ops &&
2006			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2007			    vp != NULL && vp->v_type == VREG &&
2008			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2009				if (vp == p->p_exec)
2010					(void) strcpy(pmp->pr_mapname, "a.out");
2011				else
2012					pr_object_name(pmp->pr_mapname,
2013					    vp, &vattr);
2014			}
2015
2016			/*
2017			 * Get the SysV shared memory id, if any.
2018			 */
2019			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2020			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2021			    SHMID_NONE) {
2022				if (pmp->pr_shmid == SHMID_FREE)
2023					pmp->pr_shmid = -1;
2024
2025				pmp->pr_mflags |= MA_SHM;
2026			} else {
2027				pmp->pr_shmid = -1;
2028			}
2029
2030			hat_getstat(as, saddr, len, hatid,
2031			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2032			pmp = (prasmap_t *)next;
2033		}
2034		ASSERT(tmp == NULL);
2035	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2036
2037	AS_LOCK_EXIT(as, &as->a_lock);
2038
2039	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2040	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2041	kmem_free(buf, size);
2042
2043	return (error);
2044}
2045
2046#ifdef _SYSCALL32_IMPL
2047int
2048prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2049{
2050	struct as *as = p->p_as;
2051	caddr_t buf;
2052	size_t size;
2053	prpageheader32_t *php;
2054	prasmap32_t *pmp;
2055	struct seg *seg;
2056	int error;
2057
2058again:
2059	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2060
2061	if ((seg = AS_SEGFIRST(as)) == NULL) {
2062		AS_LOCK_EXIT(as, &as->a_lock);
2063		return (0);
2064	}
2065	size = prpdsize32(as);
2066	if (uiop->uio_resid < size) {
2067		AS_LOCK_EXIT(as, &as->a_lock);
2068		return (E2BIG);
2069	}
2070
2071	buf = kmem_zalloc(size, KM_SLEEP);
2072	php = (prpageheader32_t *)buf;
2073	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2074
2075	hrt2ts32(gethrtime(), &php->pr_tstamp);
2076	php->pr_nmap = 0;
2077	php->pr_npage = 0;
2078	do {
2079		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2080		caddr_t saddr, naddr;
2081		void *tmp = NULL;
2082
2083		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2084			struct vnode *vp;
2085			struct vattr vattr;
2086			size_t len;
2087			size_t npage;
2088			uint_t prot;
2089			uintptr_t next;
2090
2091			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2092			if ((len = (size_t)(naddr - saddr)) == 0)
2093				continue;
2094			npage = len / PAGESIZE;
2095			next = (uintptr_t)(pmp + 1) + round8(npage);
2096			/*
2097			 * It's possible that the address space can change
2098			 * subtlely even though we're holding as->a_lock
2099			 * due to the nondeterminism of page_exists() in
2100			 * the presence of asychronously flushed pages or
2101			 * mapped files whose sizes are changing.
2102			 * page_exists() may be called indirectly from
2103			 * pr_getprot() by a SEGOP_INCORE() routine.
2104			 * If this happens we need to make sure we don't
2105			 * overrun the buffer whose size we computed based
2106			 * on the initial iteration through the segments.
2107			 * Once we've detected an overflow, we need to clean
2108			 * up the temporary memory allocated in pr_getprot()
2109			 * and retry. If there's a pending signal, we return
2110			 * EINTR so that this thread can be dislodged if
2111			 * a latent bug causes us to spin indefinitely.
2112			 */
2113			if (next > (uintptr_t)buf + size) {
2114				pr_getprot_done(&tmp);
2115				AS_LOCK_EXIT(as, &as->a_lock);
2116
2117				kmem_free(buf, size);
2118
2119				if (ISSIG(curthread, JUSTLOOKING))
2120					return (EINTR);
2121
2122				goto again;
2123			}
2124
2125			php->pr_nmap++;
2126			php->pr_npage += npage;
2127			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2128			pmp->pr_npage = (size32_t)npage;
2129			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2130			pmp->pr_mflags = 0;
2131			if (prot & PROT_READ)
2132				pmp->pr_mflags |= MA_READ;
2133			if (prot & PROT_WRITE)
2134				pmp->pr_mflags |= MA_WRITE;
2135			if (prot & PROT_EXEC)
2136				pmp->pr_mflags |= MA_EXEC;
2137			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2138				pmp->pr_mflags |= MA_SHARED;
2139			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2140				pmp->pr_mflags |= MA_NORESERVE;
2141			if (seg->s_ops == &segspt_shmops ||
2142			    (seg->s_ops == &segvn_ops &&
2143			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2144				pmp->pr_mflags |= MA_ANON;
2145			if (seg->s_ops == &segspt_shmops)
2146				pmp->pr_mflags |= MA_ISM | MA_SHM;
2147			pmp->pr_pagesize = PAGESIZE;
2148			/*
2149			 * Manufacture a filename for the "object" directory.
2150			 */
2151			vattr.va_mask = AT_FSID|AT_NODEID;
2152			if (seg->s_ops == &segvn_ops &&
2153			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2154			    vp != NULL && vp->v_type == VREG &&
2155			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2156				if (vp == p->p_exec)
2157					(void) strcpy(pmp->pr_mapname, "a.out");
2158				else
2159					pr_object_name(pmp->pr_mapname,
2160					    vp, &vattr);
2161			}
2162
2163			/*
2164			 * Get the SysV shared memory id, if any.
2165			 */
2166			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2167			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2168			    SHMID_NONE) {
2169				if (pmp->pr_shmid == SHMID_FREE)
2170					pmp->pr_shmid = -1;
2171
2172				pmp->pr_mflags |= MA_SHM;
2173			} else {
2174				pmp->pr_shmid = -1;
2175			}
2176
2177			hat_getstat(as, saddr, len, hatid,
2178			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2179			pmp = (prasmap32_t *)next;
2180		}
2181		ASSERT(tmp == NULL);
2182	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2183
2184	AS_LOCK_EXIT(as, &as->a_lock);
2185
2186	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2187	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2188	kmem_free(buf, size);
2189
2190	return (error);
2191}
2192#endif	/* _SYSCALL32_IMPL */
2193
2194ushort_t
2195prgetpctcpu(uint64_t pct)
2196{
2197	/*
2198	 * The value returned will be relevant in the zone of the examiner,
2199	 * which may not be the same as the zone which performed the procfs
2200	 * mount.
2201	 */
2202	int nonline = zone_ncpus_online_get(curproc->p_zone);
2203
2204	/*
2205	 * Prorate over online cpus so we don't exceed 100%
2206	 */
2207	if (nonline > 1)
2208		pct /= nonline;
2209	pct >>= 16;		/* convert to 16-bit scaled integer */
2210	if (pct > 0x8000)	/* might happen, due to rounding */
2211		pct = 0x8000;
2212	return ((ushort_t)pct);
2213}
2214
2215/*
2216 * Return information used by ps(1).
2217 */
2218void
2219prgetpsinfo(proc_t *p, psinfo_t *psp)
2220{
2221	kthread_t *t;
2222	struct cred *cred;
2223	hrtime_t hrutime, hrstime;
2224
2225	ASSERT(MUTEX_HELD(&p->p_lock));
2226
2227	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2228		bzero(psp, sizeof (*psp));
2229	else {
2230		thread_unlock(t);
2231		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2232	}
2233
2234	/*
2235	 * only export SSYS and SMSACCT; everything else is off-limits to
2236	 * userland apps.
2237	 */
2238	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2239	psp->pr_nlwp = p->p_lwpcnt;
2240	psp->pr_nzomb = p->p_zombcnt;
2241	mutex_enter(&p->p_crlock);
2242	cred = p->p_cred;
2243	psp->pr_uid = crgetruid(cred);
2244	psp->pr_euid = crgetuid(cred);
2245	psp->pr_gid = crgetrgid(cred);
2246	psp->pr_egid = crgetgid(cred);
2247	mutex_exit(&p->p_crlock);
2248	psp->pr_pid = p->p_pid;
2249	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2250	    (p->p_flag & SZONETOP)) {
2251		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2252		/*
2253		 * Inside local zones, fake zsched's pid as parent pids for
2254		 * processes which reference processes outside of the zone.
2255		 */
2256		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2257	} else {
2258		psp->pr_ppid = p->p_ppid;
2259	}
2260	psp->pr_pgid = p->p_pgrp;
2261	psp->pr_sid = p->p_sessp->s_sid;
2262	psp->pr_taskid = p->p_task->tk_tkid;
2263	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2264	psp->pr_poolid = p->p_pool->pool_id;
2265	psp->pr_zoneid = p->p_zone->zone_id;
2266	if ((psp->pr_contract = PRCTID(p)) == 0)
2267		psp->pr_contract = -1;
2268	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2269	switch (p->p_model) {
2270	case DATAMODEL_ILP32:
2271		psp->pr_dmodel = PR_MODEL_ILP32;
2272		break;
2273	case DATAMODEL_LP64:
2274		psp->pr_dmodel = PR_MODEL_LP64;
2275		break;
2276	}
2277	hrutime = mstate_aggr_state(p, LMS_USER);
2278	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2279	hrt2ts((hrutime + hrstime), &psp->pr_time);
2280	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2281
2282	if (t == NULL) {
2283		int wcode = p->p_wcode;		/* must be atomic read */
2284
2285		if (wcode)
2286			psp->pr_wstat = wstat(wcode, p->p_wdata);
2287		psp->pr_ttydev = PRNODEV;
2288		psp->pr_lwp.pr_state = SZOMB;
2289		psp->pr_lwp.pr_sname = 'Z';
2290		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2291		psp->pr_lwp.pr_bindpset = PS_NONE;
2292	} else {
2293		user_t *up = PTOU(p);
2294		struct as *as;
2295		dev_t d;
2296		extern dev_t rwsconsdev, rconsdev, uconsdev;
2297
2298		d = cttydev(p);
2299		/*
2300		 * If the controlling terminal is the real
2301		 * or workstation console device, map to what the
2302		 * user thinks is the console device. Handle case when
2303		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2304		 */
2305		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2306			d = uconsdev;
2307		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2308		psp->pr_start = up->u_start;
2309		bcopy(up->u_comm, psp->pr_fname,
2310		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2311		bcopy(up->u_psargs, psp->pr_psargs,
2312		    MIN(PRARGSZ-1, PSARGSZ));
2313		psp->pr_argc = up->u_argc;
2314		psp->pr_argv = up->u_argv;
2315		psp->pr_envp = up->u_envp;
2316
2317		/* get the chosen lwp's lwpsinfo */
2318		prgetlwpsinfo(t, &psp->pr_lwp);
2319
2320		/* compute %cpu for the process */
2321		if (p->p_lwpcnt == 1)
2322			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2323		else {
2324			uint64_t pct = 0;
2325			hrtime_t cur_time = gethrtime_unscaled();
2326
2327			t = p->p_tlist;
2328			do {
2329				pct += cpu_update_pct(t, cur_time);
2330			} while ((t = t->t_forw) != p->p_tlist);
2331
2332			psp->pr_pctcpu = prgetpctcpu(pct);
2333		}
2334		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2335			psp->pr_size = 0;
2336			psp->pr_rssize = 0;
2337		} else {
2338			mutex_exit(&p->p_lock);
2339			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2340			psp->pr_size = btopr(as->a_resvsize) *
2341			    (PAGESIZE / 1024);
2342			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2343			psp->pr_pctmem = rm_pctmemory(as);
2344			AS_LOCK_EXIT(as, &as->a_lock);
2345			mutex_enter(&p->p_lock);
2346		}
2347	}
2348}
2349
2350#ifdef _SYSCALL32_IMPL
2351void
2352prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2353{
2354	kthread_t *t;
2355	struct cred *cred;
2356	hrtime_t hrutime, hrstime;
2357
2358	ASSERT(MUTEX_HELD(&p->p_lock));
2359
2360	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2361		bzero(psp, sizeof (*psp));
2362	else {
2363		thread_unlock(t);
2364		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2365	}
2366
2367	/*
2368	 * only export SSYS and SMSACCT; everything else is off-limits to
2369	 * userland apps.
2370	 */
2371	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2372	psp->pr_nlwp = p->p_lwpcnt;
2373	psp->pr_nzomb = p->p_zombcnt;
2374	mutex_enter(&p->p_crlock);
2375	cred = p->p_cred;
2376	psp->pr_uid = crgetruid(cred);
2377	psp->pr_euid = crgetuid(cred);
2378	psp->pr_gid = crgetrgid(cred);
2379	psp->pr_egid = crgetgid(cred);
2380	mutex_exit(&p->p_crlock);
2381	psp->pr_pid = p->p_pid;
2382	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2383	    (p->p_flag & SZONETOP)) {
2384		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2385		/*
2386		 * Inside local zones, fake zsched's pid as parent pids for
2387		 * processes which reference processes outside of the zone.
2388		 */
2389		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2390	} else {
2391		psp->pr_ppid = p->p_ppid;
2392	}
2393	psp->pr_pgid = p->p_pgrp;
2394	psp->pr_sid = p->p_sessp->s_sid;
2395	psp->pr_taskid = p->p_task->tk_tkid;
2396	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2397	psp->pr_poolid = p->p_pool->pool_id;
2398	psp->pr_zoneid = p->p_zone->zone_id;
2399	if ((psp->pr_contract = PRCTID(p)) == 0)
2400		psp->pr_contract = -1;
2401	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2402	switch (p->p_model) {
2403	case DATAMODEL_ILP32:
2404		psp->pr_dmodel = PR_MODEL_ILP32;
2405		break;
2406	case DATAMODEL_LP64:
2407		psp->pr_dmodel = PR_MODEL_LP64;
2408		break;
2409	}
2410	hrutime = mstate_aggr_state(p, LMS_USER);
2411	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2412	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2413	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2414
2415	if (t == NULL) {
2416		extern int wstat(int, int);	/* needs a header file */
2417		int wcode = p->p_wcode;		/* must be atomic read */
2418
2419		if (wcode)
2420			psp->pr_wstat = wstat(wcode, p->p_wdata);
2421		psp->pr_ttydev = PRNODEV32;
2422		psp->pr_lwp.pr_state = SZOMB;
2423		psp->pr_lwp.pr_sname = 'Z';
2424	} else {
2425		user_t *up = PTOU(p);
2426		struct as *as;
2427		dev_t d;
2428		extern dev_t rwsconsdev, rconsdev, uconsdev;
2429
2430		d = cttydev(p);
2431		/*
2432		 * If the controlling terminal is the real
2433		 * or workstation console device, map to what the
2434		 * user thinks is the console device. Handle case when
2435		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2436		 */
2437		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2438			d = uconsdev;
2439		(void) cmpldev(&psp->pr_ttydev, d);
2440		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2441		bcopy(up->u_comm, psp->pr_fname,
2442		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2443		bcopy(up->u_psargs, psp->pr_psargs,
2444		    MIN(PRARGSZ-1, PSARGSZ));
2445		psp->pr_argc = up->u_argc;
2446		psp->pr_argv = (caddr32_t)up->u_argv;
2447		psp->pr_envp = (caddr32_t)up->u_envp;
2448
2449		/* get the chosen lwp's lwpsinfo */
2450		prgetlwpsinfo32(t, &psp->pr_lwp);
2451
2452		/* compute %cpu for the process */
2453		if (p->p_lwpcnt == 1)
2454			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2455		else {
2456			uint64_t pct = 0;
2457			hrtime_t cur_time;
2458
2459			t = p->p_tlist;
2460			cur_time = gethrtime_unscaled();
2461			do {
2462				pct += cpu_update_pct(t, cur_time);
2463			} while ((t = t->t_forw) != p->p_tlist);
2464
2465			psp->pr_pctcpu = prgetpctcpu(pct);
2466		}
2467		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2468			psp->pr_size = 0;
2469			psp->pr_rssize = 0;
2470		} else {
2471			mutex_exit(&p->p_lock);
2472			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2473			psp->pr_size = (size32_t)
2474			    (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2475			psp->pr_rssize = (size32_t)
2476			    (rm_asrss(as) * (PAGESIZE / 1024));
2477			psp->pr_pctmem = rm_pctmemory(as);
2478			AS_LOCK_EXIT(as, &as->a_lock);
2479			mutex_enter(&p->p_lock);
2480		}
2481	}
2482
2483	/*
2484	 * If we are looking at an LP64 process, zero out
2485	 * the fields that cannot be represented in ILP32.
2486	 */
2487	if (p->p_model != DATAMODEL_ILP32) {
2488		psp->pr_size = 0;
2489		psp->pr_rssize = 0;
2490		psp->pr_argv = 0;
2491		psp->pr_envp = 0;
2492	}
2493}
2494#endif	/* _SYSCALL32_IMPL */
2495
2496void
2497prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2498{
2499	klwp_t *lwp = ttolwp(t);
2500	sobj_ops_t *sobj;
2501	char c, state;
2502	uint64_t pct;
2503	int retval, niceval;
2504	hrtime_t hrutime, hrstime;
2505
2506	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2507
2508	bzero(psp, sizeof (*psp));
2509
2510	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2511	psp->pr_lwpid = t->t_tid;
2512	psp->pr_addr = (uintptr_t)t;
2513	psp->pr_wchan = (uintptr_t)t->t_wchan;
2514
2515	/* map the thread state enum into a process state enum */
2516	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2517	switch (state) {
2518	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2519	case TS_RUN:		state = SRUN;		c = 'R';	break;
2520	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2521	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2522	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2523	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
2524	default:		state = 0;		c = '?';	break;
2525	}
2526	psp->pr_state = state;
2527	psp->pr_sname = c;
2528	if ((sobj = t->t_sobj_ops) != NULL)
2529		psp->pr_stype = SOBJ_TYPE(sobj);
2530	retval = CL_DONICE(t, NULL, 0, &niceval);
2531	if (retval == 0) {
2532		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2533		psp->pr_nice = niceval + NZERO;
2534	}
2535	psp->pr_syscall = t->t_sysnum;
2536	psp->pr_pri = t->t_pri;
2537	psp->pr_start.tv_sec = t->t_start;
2538	psp->pr_start.tv_nsec = 0L;
2539	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2540	scalehrtime(&hrutime);
2541	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2542	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2543	scalehrtime(&hrstime);
2544	hrt2ts(hrutime + hrstime, &psp->pr_time);
2545	/* compute %cpu for the lwp */
2546	pct = cpu_update_pct(t, gethrtime_unscaled());
2547	psp->pr_pctcpu = prgetpctcpu(pct);
2548	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2549	if (psp->pr_cpu > 99)
2550		psp->pr_cpu = 99;
2551
2552	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2553	    sizeof (psp->pr_clname) - 1);
2554	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2555	psp->pr_onpro = t->t_cpu->cpu_id;
2556	psp->pr_bindpro = t->t_bind_cpu;
2557	psp->pr_bindpset = t->t_bind_pset;
2558	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2559}
2560
2561#ifdef _SYSCALL32_IMPL
2562void
2563prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2564{
2565	proc_t *p = ttoproc(t);
2566	klwp_t *lwp = ttolwp(t);
2567	sobj_ops_t *sobj;
2568	char c, state;
2569	uint64_t pct;
2570	int retval, niceval;
2571	hrtime_t hrutime, hrstime;
2572
2573	ASSERT(MUTEX_HELD(&p->p_lock));
2574
2575	bzero(psp, sizeof (*psp));
2576
2577	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
2578	psp->pr_lwpid = t->t_tid;
2579	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
2580	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
2581
2582	/* map the thread state enum into a process state enum */
2583	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2584	switch (state) {
2585	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
2586	case TS_RUN:		state = SRUN;		c = 'R';	break;
2587	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
2588	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
2589	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
2590	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
2591	default:		state = 0;		c = '?';	break;
2592	}
2593	psp->pr_state = state;
2594	psp->pr_sname = c;
2595	if ((sobj = t->t_sobj_ops) != NULL)
2596		psp->pr_stype = SOBJ_TYPE(sobj);
2597	retval = CL_DONICE(t, NULL, 0, &niceval);
2598	if (retval == 0) {
2599		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2600		psp->pr_nice = niceval + NZERO;
2601	} else {
2602		psp->pr_oldpri = 0;
2603		psp->pr_nice = 0;
2604	}
2605	psp->pr_syscall = t->t_sysnum;
2606	psp->pr_pri = t->t_pri;
2607	psp->pr_start.tv_sec = (time32_t)t->t_start;
2608	psp->pr_start.tv_nsec = 0L;
2609	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2610	scalehrtime(&hrutime);
2611	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2612	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
2613	scalehrtime(&hrstime);
2614	hrt2ts32(hrutime + hrstime, &psp->pr_time);
2615	/* compute %cpu for the lwp */
2616	pct = cpu_update_pct(t, gethrtime_unscaled());
2617	psp->pr_pctcpu = prgetpctcpu(pct);
2618	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
2619	if (psp->pr_cpu > 99)
2620		psp->pr_cpu = 99;
2621
2622	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2623	    sizeof (psp->pr_clname) - 1);
2624	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
2625	psp->pr_onpro = t->t_cpu->cpu_id;
2626	psp->pr_bindpro = t->t_bind_cpu;
2627	psp->pr_bindpset = t->t_bind_pset;
2628	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2629}
2630#endif	/* _SYSCALL32_IMPL */
2631
2632/*
2633 * This used to get called when microstate accounting was disabled but
2634 * microstate information was requested.  Since Microstate accounting is on
2635 * regardless of the proc flags, this simply makes it appear to procfs that
2636 * microstate accounting is on.  This is relatively meaningless since you
2637 * can't turn it off, but this is here for the sake of appearances.
2638 */
2639
2640/*ARGSUSED*/
2641void
2642estimate_msacct(kthread_t *t, hrtime_t curtime)
2643{
2644	proc_t *p;
2645
2646	if (t == NULL)
2647		return;
2648
2649	p = ttoproc(t);
2650	ASSERT(MUTEX_HELD(&p->p_lock));
2651
2652	/*
2653	 * A system process (p0) could be referenced if the thread is
2654	 * in the process of exiting.  Don't turn on microstate accounting
2655	 * in that case.
2656	 */
2657	if (p->p_flag & SSYS)
2658		return;
2659
2660	/*
2661	 * Loop through all the LWPs (kernel threads) in the process.
2662	 */
2663	t = p->p_tlist;
2664	do {
2665		t->t_proc_flag |= TP_MSACCT;
2666	} while ((t = t->t_forw) != p->p_tlist);
2667
2668	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
2669}
2670
2671/*
2672 * It's not really possible to disable microstate accounting anymore.
2673 * However, this routine simply turns off the ms accounting flags in a process
2674 * This way procfs can still pretend to turn microstate accounting on and
2675 * off for a process, but it actually doesn't do anything.  This is
2676 * a neutered form of preemptive idiot-proofing.
2677 */
2678void
2679disable_msacct(proc_t *p)
2680{
2681	kthread_t *t;
2682
2683	ASSERT(MUTEX_HELD(&p->p_lock));
2684
2685	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
2686	/*
2687	 * Loop through all the LWPs (kernel threads) in the process.
2688	 */
2689	if ((t = p->p_tlist) != NULL) {
2690		do {
2691			/* clear per-thread flag */
2692			t->t_proc_flag &= ~TP_MSACCT;
2693		} while ((t = t->t_forw) != p->p_tlist);
2694	}
2695}
2696
2697/*
2698 * Return resource usage information.
2699 */
2700void
2701prgetusage(kthread_t *t, prhusage_t *pup)
2702{
2703	klwp_t *lwp = ttolwp(t);
2704	hrtime_t *mstimep;
2705	struct mstate *ms = &lwp->lwp_mstate;
2706	int state;
2707	int i;
2708	hrtime_t curtime;
2709	hrtime_t waitrq;
2710	hrtime_t tmp1;
2711
2712	curtime = gethrtime_unscaled();
2713
2714	pup->pr_lwpid	= t->t_tid;
2715	pup->pr_count	= 1;
2716	pup->pr_create	= ms->ms_start;
2717	pup->pr_term    = ms->ms_term;
2718	scalehrtime(&pup->pr_create);
2719	scalehrtime(&pup->pr_term);
2720	if (ms->ms_term == 0) {
2721		pup->pr_rtime = curtime - ms->ms_start;
2722		scalehrtime(&pup->pr_rtime);
2723	} else {
2724		pup->pr_rtime = ms->ms_term - ms->ms_start;
2725		scalehrtime(&pup->pr_rtime);
2726	}
2727
2728
2729	pup->pr_utime    = ms->ms_acct[LMS_USER];
2730	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
2731	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
2732	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
2733	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
2734	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
2735	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
2736	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
2737	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
2738	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2739
2740	prscaleusage(pup);
2741
2742	/*
2743	 * Adjust for time waiting in the dispatcher queue.
2744	 */
2745	waitrq = t->t_waitrq;	/* hopefully atomic */
2746	if (waitrq != 0) {
2747		tmp1 = curtime - waitrq;
2748		scalehrtime(&tmp1);
2749		pup->pr_wtime += tmp1;
2750		curtime = waitrq;
2751	}
2752
2753	/*
2754	 * Adjust for time spent in current microstate.
2755	 */
2756	if (ms->ms_state_start > curtime) {
2757		curtime = gethrtime_unscaled();
2758	}
2759
2760	i = 0;
2761	do {
2762		switch (state = t->t_mstate) {
2763		case LMS_SLEEP:
2764			/*
2765			 * Update the timer for the current sleep state.
2766			 */
2767			switch (state = ms->ms_prev) {
2768			case LMS_TFAULT:
2769			case LMS_DFAULT:
2770			case LMS_KFAULT:
2771			case LMS_USER_LOCK:
2772				break;
2773			default:
2774				state = LMS_SLEEP;
2775				break;
2776			}
2777			break;
2778		case LMS_TFAULT:
2779		case LMS_DFAULT:
2780		case LMS_KFAULT:
2781		case LMS_USER_LOCK:
2782			state = LMS_SYSTEM;
2783			break;
2784		}
2785		switch (state) {
2786		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2787		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2788		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2789		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2790		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2791		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2792		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2793		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2794		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2795		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2796		default:		panic("prgetusage: unknown microstate");
2797		}
2798		tmp1 = curtime - ms->ms_state_start;
2799		if (tmp1 < 0) {
2800			curtime = gethrtime_unscaled();
2801			i++;
2802			continue;
2803		}
2804		scalehrtime(&tmp1);
2805	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2806
2807	*mstimep += tmp1;
2808
2809	/* update pup timestamp */
2810	pup->pr_tstamp = curtime;
2811	scalehrtime(&pup->pr_tstamp);
2812
2813	/*
2814	 * Resource usage counters.
2815	 */
2816	pup->pr_minf  = lwp->lwp_ru.minflt;
2817	pup->pr_majf  = lwp->lwp_ru.majflt;
2818	pup->pr_nswap = lwp->lwp_ru.nswap;
2819	pup->pr_inblk = lwp->lwp_ru.inblock;
2820	pup->pr_oublk = lwp->lwp_ru.oublock;
2821	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
2822	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
2823	pup->pr_sigs  = lwp->lwp_ru.nsignals;
2824	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
2825	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
2826	pup->pr_sysc  = lwp->lwp_ru.sysc;
2827	pup->pr_ioch  = lwp->lwp_ru.ioch;
2828}
2829
2830/*
2831 * Convert ms_acct stats from unscaled high-res time to nanoseconds
2832 */
2833void
2834prscaleusage(prhusage_t *usg)
2835{
2836	scalehrtime(&usg->pr_utime);
2837	scalehrtime(&usg->pr_stime);
2838	scalehrtime(&usg->pr_ttime);
2839	scalehrtime(&usg->pr_tftime);
2840	scalehrtime(&usg->pr_dftime);
2841	scalehrtime(&usg->pr_kftime);
2842	scalehrtime(&usg->pr_ltime);
2843	scalehrtime(&usg->pr_slptime);
2844	scalehrtime(&usg->pr_wtime);
2845	scalehrtime(&usg->pr_stoptime);
2846}
2847
2848
2849/*
2850 * Sum resource usage information.
2851 */
2852void
2853praddusage(kthread_t *t, prhusage_t *pup)
2854{
2855	klwp_t *lwp = ttolwp(t);
2856	hrtime_t *mstimep;
2857	struct mstate *ms = &lwp->lwp_mstate;
2858	int state;
2859	int i;
2860	hrtime_t curtime;
2861	hrtime_t waitrq;
2862	hrtime_t tmp;
2863	prhusage_t conv;
2864
2865	curtime = gethrtime_unscaled();
2866
2867	if (ms->ms_term == 0) {
2868		tmp = curtime - ms->ms_start;
2869		scalehrtime(&tmp);
2870		pup->pr_rtime += tmp;
2871	} else {
2872		tmp = ms->ms_term - ms->ms_start;
2873		scalehrtime(&tmp);
2874		pup->pr_rtime += tmp;
2875	}
2876
2877	conv.pr_utime = ms->ms_acct[LMS_USER];
2878	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
2879	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
2880	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
2881	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
2882	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
2883	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2884	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
2885	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2886	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
2887
2888	prscaleusage(&conv);
2889
2890	pup->pr_utime	+= conv.pr_utime;
2891	pup->pr_stime	+= conv.pr_stime;
2892	pup->pr_ttime	+= conv.pr_ttime;
2893	pup->pr_tftime	+= conv.pr_tftime;
2894	pup->pr_dftime	+= conv.pr_dftime;
2895	pup->pr_kftime	+= conv.pr_kftime;
2896	pup->pr_ltime	+= conv.pr_ltime;
2897	pup->pr_slptime	+= conv.pr_slptime;
2898	pup->pr_wtime	+= conv.pr_wtime;
2899	pup->pr_stoptime += conv.pr_stoptime;
2900
2901	/*
2902	 * Adjust for time waiting in the dispatcher queue.
2903	 */
2904	waitrq = t->t_waitrq;	/* hopefully atomic */
2905	if (waitrq != 0) {
2906		tmp = curtime - waitrq;
2907		scalehrtime(&tmp);
2908		pup->pr_wtime += tmp;
2909		curtime = waitrq;
2910	}
2911
2912	/*
2913	 * Adjust for time spent in current microstate.
2914	 */
2915	if (ms->ms_state_start > curtime) {
2916		curtime = gethrtime_unscaled();
2917	}
2918
2919	i = 0;
2920	do {
2921		switch (state = t->t_mstate) {
2922		case LMS_SLEEP:
2923			/*
2924			 * Update the timer for the current sleep state.
2925			 */
2926			switch (state = ms->ms_prev) {
2927			case LMS_TFAULT:
2928			case LMS_DFAULT:
2929			case LMS_KFAULT:
2930			case LMS_USER_LOCK:
2931				break;
2932			default:
2933				state = LMS_SLEEP;
2934				break;
2935			}
2936			break;
2937		case LMS_TFAULT:
2938		case LMS_DFAULT:
2939		case LMS_KFAULT:
2940		case LMS_USER_LOCK:
2941			state = LMS_SYSTEM;
2942			break;
2943		}
2944		switch (state) {
2945		case LMS_USER:		mstimep = &pup->pr_utime;	break;
2946		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
2947		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
2948		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
2949		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
2950		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
2951		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
2952		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
2953		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
2954		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
2955		default:		panic("praddusage: unknown microstate");
2956		}
2957		tmp = curtime - ms->ms_state_start;
2958		if (tmp < 0) {
2959			curtime = gethrtime_unscaled();
2960			i++;
2961			continue;
2962		}
2963		scalehrtime(&tmp);
2964	} while (tmp < 0 && i < MAX_ITERS_SPIN);
2965
2966	*mstimep += tmp;
2967
2968	/* update pup timestamp */
2969	pup->pr_tstamp = curtime;
2970	scalehrtime(&pup->pr_tstamp);
2971
2972	/*
2973	 * Resource usage counters.
2974	 */
2975	pup->pr_minf  += lwp->lwp_ru.minflt;
2976	pup->pr_majf  += lwp->lwp_ru.majflt;
2977	pup->pr_nswap += lwp->lwp_ru.nswap;
2978	pup->pr_inblk += lwp->lwp_ru.inblock;
2979	pup->pr_oublk += lwp->lwp_ru.oublock;
2980	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
2981	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
2982	pup->pr_sigs  += lwp->lwp_ru.nsignals;
2983	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
2984	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
2985	pup->pr_sysc  += lwp->lwp_ru.sysc;
2986	pup->pr_ioch  += lwp->lwp_ru.ioch;
2987}
2988
2989/*
2990 * Convert a prhusage_t to a prusage_t.
2991 * This means convert each hrtime_t to a timestruc_t
2992 * and copy the count fields uint64_t => ulong_t.
2993 */
2994void
2995prcvtusage(prhusage_t *pup, prusage_t *upup)
2996{
2997	uint64_t *ullp;
2998	ulong_t *ulp;
2999	int i;
3000
3001	upup->pr_lwpid = pup->pr_lwpid;
3002	upup->pr_count = pup->pr_count;
3003
3004	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
3005	hrt2ts(pup->pr_create,	&upup->pr_create);
3006	hrt2ts(pup->pr_term,	&upup->pr_term);
3007	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
3008	hrt2ts(pup->pr_utime,	&upup->pr_utime);
3009	hrt2ts(pup->pr_stime,	&upup->pr_stime);
3010	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
3011	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
3012	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
3013	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
3014	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
3015	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
3016	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
3017	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3018	bzero(upup->filltime, sizeof (upup->filltime));
3019
3020	ullp = &pup->pr_minf;
3021	ulp = &upup->pr_minf;
3022	for (i = 0; i < 22; i++)
3023		*ulp++ = (ulong_t)*ullp++;
3024}
3025
3026#ifdef _SYSCALL32_IMPL
3027void
3028prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3029{
3030	uint64_t *ullp;
3031	uint32_t *ulp;
3032	int i;
3033
3034	upup->pr_lwpid = pup->pr_lwpid;
3035	upup->pr_count = pup->pr_count;
3036
3037	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3038	hrt2ts32(pup->pr_create,	&upup->pr_create);
3039	hrt2ts32(pup->pr_term,		&upup->pr_term);
3040	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3041	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3042	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3043	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3044	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3045	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3046	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3047	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3048	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3049	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3050	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3051	bzero(upup->filltime, sizeof (upup->filltime));
3052
3053	ullp = &pup->pr_minf;
3054	ulp = &upup->pr_minf;
3055	for (i = 0; i < 22; i++)
3056		*ulp++ = (uint32_t)*ullp++;
3057}
3058#endif	/* _SYSCALL32_IMPL */
3059
3060/*
3061 * Determine whether a set is empty.
3062 */
3063int
3064setisempty(uint32_t *sp, uint_t n)
3065{
3066	while (n--)
3067		if (*sp++)
3068			return (0);
3069	return (1);
3070}
3071
3072/*
3073 * Utility routine for establishing a watched area in the process.
3074 * Keep the list of watched areas sorted by virtual address.
3075 */
3076int
3077set_watched_area(proc_t *p, struct watched_area *pwa)
3078{
3079	caddr_t vaddr = pwa->wa_vaddr;
3080	caddr_t eaddr = pwa->wa_eaddr;
3081	ulong_t flags = pwa->wa_flags;
3082	struct watched_area *target;
3083	avl_index_t where;
3084	int error = 0;
3085
3086	/* we must not be holding p->p_lock, but the process must be locked */
3087	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3088	ASSERT(p->p_proc_flag & P_PR_LOCK);
3089
3090	/*
3091	 * If this is our first watchpoint, enable watchpoints for the process.
3092	 */
3093	if (!pr_watch_active(p)) {
3094		kthread_t *t;
3095
3096		mutex_enter(&p->p_lock);
3097		if ((t = p->p_tlist) != NULL) {
3098			do {
3099				watch_enable(t);
3100			} while ((t = t->t_forw) != p->p_tlist);
3101		}
3102		mutex_exit(&p->p_lock);
3103	}
3104
3105	target = pr_find_watched_area(p, pwa, &where);
3106	if (target != NULL) {
3107		/*
3108		 * We discovered an existing, overlapping watched area.
3109		 * Allow it only if it is an exact match.
3110		 */
3111		if (target->wa_vaddr != vaddr ||
3112		    target->wa_eaddr != eaddr)
3113			error = EINVAL;
3114		else if (target->wa_flags != flags) {
3115			error = set_watched_page(p, vaddr, eaddr,
3116			    flags, target->wa_flags);
3117			target->wa_flags = flags;
3118		}
3119		kmem_free(pwa, sizeof (struct watched_area));
3120	} else {
3121		avl_insert(&p->p_warea, pwa, where);
3122		error = set_watched_page(p, vaddr, eaddr, flags, 0);
3123	}
3124
3125	return (error);
3126}
3127
3128/*
3129 * Utility routine for clearing a watched area in the process.
3130 * Must be an exact match of the virtual address.
3131 * size and flags don't matter.
3132 */
3133int
3134clear_watched_area(proc_t *p, struct watched_area *pwa)
3135{
3136	struct watched_area *found;
3137
3138	/* we must not be holding p->p_lock, but the process must be locked */
3139	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3140	ASSERT(p->p_proc_flag & P_PR_LOCK);
3141
3142
3143	if (!pr_watch_active(p)) {
3144		kmem_free(pwa, sizeof (struct watched_area));
3145		return (0);
3146	}
3147
3148	/*
3149	 * Look for a matching address in the watched areas.  If a match is
3150	 * found, clear the old watched area and adjust the watched page(s).  It
3151	 * is not an error if there is no match.
3152	 */
3153	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3154	    found->wa_vaddr == pwa->wa_vaddr) {
3155		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3156		    found->wa_flags);
3157		avl_remove(&p->p_warea, found);
3158		kmem_free(found, sizeof (struct watched_area));
3159	}
3160
3161	kmem_free(pwa, sizeof (struct watched_area));
3162
3163	/*
3164	 * If we removed the last watched area from the process, disable
3165	 * watchpoints.
3166	 */
3167	if (!pr_watch_active(p)) {
3168		kthread_t *t;
3169
3170		mutex_enter(&p->p_lock);
3171		if ((t = p->p_tlist) != NULL) {
3172			do {
3173				watch_disable(t);
3174			} while ((t = t->t_forw) != p->p_tlist);
3175		}
3176		mutex_exit(&p->p_lock);
3177	}
3178
3179	return (0);
3180}
3181
3182/*
3183 * Frees all the watched_area structures
3184 */
3185void
3186pr_free_watchpoints(proc_t *p)
3187{
3188	struct watched_area *delp;
3189	void *cookie;
3190
3191	cookie = NULL;
3192	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3193		kmem_free(delp, sizeof (struct watched_area));
3194
3195	avl_destroy(&p->p_warea);
3196}
3197
3198/*
3199 * This one is called by the traced process to unwatch all the
3200 * pages while deallocating the list of watched_page structs.
3201 */
3202void
3203pr_free_watched_pages(proc_t *p)
3204{
3205	struct as *as = p->p_as;
3206	struct watched_page *pwp;
3207	uint_t prot;
3208	int    retrycnt, err;
3209	void *cookie;
3210
3211	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3212		return;
3213
3214	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3215	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3216
3217	pwp = avl_first(&as->a_wpage);
3218
3219	cookie = NULL;
3220	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3221		retrycnt = 0;
3222		if ((prot = pwp->wp_oprot) != 0) {
3223			caddr_t addr = pwp->wp_vaddr;
3224			struct seg *seg;
3225		retry:
3226
3227			if ((pwp->wp_prot != prot ||
3228			    (pwp->wp_flags & WP_NOWATCH)) &&
3229			    (seg = as_segat(as, addr)) != NULL) {
3230				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3231				if (err == IE_RETRY) {
3232					ASSERT(retrycnt == 0);
3233					retrycnt++;
3234					goto retry;
3235				}
3236			}
3237		}
3238		kmem_free(pwp, sizeof (struct watched_page));
3239	}
3240
3241	avl_destroy(&as->a_wpage);
3242	p->p_wprot = NULL;
3243
3244	AS_LOCK_EXIT(as, &as->a_lock);
3245}
3246
3247/*
3248 * Insert a watched area into the list of watched pages.
3249 * If oflags is zero then we are adding a new watched area.
3250 * Otherwise we are changing the flags of an existing watched area.
3251 */
3252static int
3253set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3254	ulong_t flags, ulong_t oflags)
3255{
3256	struct as *as = p->p_as;
3257	avl_tree_t *pwp_tree;
3258	struct watched_page *pwp, *newpwp;
3259	struct watched_page tpw;
3260	avl_index_t where;
3261	struct seg *seg;
3262	uint_t prot;
3263	caddr_t addr;
3264
3265	/*
3266	 * We need to pre-allocate a list of structures before we grab the
3267	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3268	 * held.
3269	 */
3270	newpwp = NULL;
3271	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3272	    addr < eaddr; addr += PAGESIZE) {
3273		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3274		pwp->wp_list = newpwp;
3275		newpwp = pwp;
3276	}
3277
3278	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3279
3280	/*
3281	 * Search for an existing watched page to contain the watched area.
3282	 * If none is found, grab a new one from the available list
3283	 * and insert it in the active list, keeping the list sorted
3284	 * by user-level virtual address.
3285	 */
3286	if (p->p_flag & SVFWAIT)
3287		pwp_tree = &p->p_wpage;
3288	else
3289		pwp_tree = &as->a_wpage;
3290
3291again:
3292	if (avl_numnodes(pwp_tree) > prnwatch) {
3293		AS_LOCK_EXIT(as, &as->a_lock);
3294		while (newpwp != NULL) {
3295			pwp = newpwp->wp_list;
3296			kmem_free(newpwp, sizeof (struct watched_page));
3297			newpwp = pwp;
3298		}
3299		return (E2BIG);
3300	}
3301
3302	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3303	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3304		pwp = newpwp;
3305		newpwp = newpwp->wp_list;
3306		pwp->wp_list = NULL;
3307		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3308		    (uintptr_t)PAGEMASK);
3309		avl_insert(pwp_tree, pwp, where);
3310	}
3311
3312	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3313
3314	if (oflags & WA_READ)
3315		pwp->wp_read--;
3316	if (oflags & WA_WRITE)
3317		pwp->wp_write--;
3318	if (oflags & WA_EXEC)
3319		pwp->wp_exec--;
3320
3321	ASSERT(pwp->wp_read >= 0);
3322	ASSERT(pwp->wp_write >= 0);
3323	ASSERT(pwp->wp_exec >= 0);
3324
3325	if (flags & WA_READ)
3326		pwp->wp_read++;
3327	if (flags & WA_WRITE)
3328		pwp->wp_write++;
3329	if (flags & WA_EXEC)
3330		pwp->wp_exec++;
3331
3332	if (!(p->p_flag & SVFWAIT)) {
3333		vaddr = pwp->wp_vaddr;
3334		if (pwp->wp_oprot == 0 &&
3335		    (seg = as_segat(as, vaddr)) != NULL) {
3336			SEGOP_GETPROT(seg, vaddr, 0, &prot);
3337			pwp->wp_oprot = (uchar_t)prot;
3338			pwp->wp_prot = (uchar_t)prot;
3339		}
3340		if (pwp->wp_oprot != 0) {
3341			prot = pwp->wp_oprot;
3342			if (pwp->wp_read)
3343				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3344			if (pwp->wp_write)
3345				prot &= ~PROT_WRITE;
3346			if (pwp->wp_exec)
3347				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3348			if (!(pwp->wp_flags & WP_NOWATCH) &&
3349			    pwp->wp_prot != prot &&
3350			    (pwp->wp_flags & WP_SETPROT) == 0) {
3351				pwp->wp_flags |= WP_SETPROT;
3352				pwp->wp_list = p->p_wprot;
3353				p->p_wprot = pwp;
3354			}
3355			pwp->wp_prot = (uchar_t)prot;
3356		}
3357	}
3358
3359	/*
3360	 * If the watched area extends into the next page then do
3361	 * it over again with the virtual address of the next page.
3362	 */
3363	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3364		goto again;
3365
3366	AS_LOCK_EXIT(as, &as->a_lock);
3367
3368	/*
3369	 * Free any pages we may have over-allocated
3370	 */
3371	while (newpwp != NULL) {
3372		pwp = newpwp->wp_list;
3373		kmem_free(newpwp, sizeof (struct watched_page));
3374		newpwp = pwp;
3375	}
3376
3377	return (0);
3378}
3379
3380/*
3381 * Remove a watched area from the list of watched pages.
3382 * A watched area may extend over more than one page.
3383 */
3384static void
3385clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3386{
3387	struct as *as = p->p_as;
3388	struct watched_page *pwp;
3389	struct watched_page tpw;
3390	avl_tree_t *tree;
3391	avl_index_t where;
3392
3393	AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3394
3395	if (p->p_flag & SVFWAIT)
3396		tree = &p->p_wpage;
3397	else
3398		tree = &as->a_wpage;
3399
3400	tpw.wp_vaddr = vaddr =
3401	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3402	pwp = avl_find(tree, &tpw, &where);
3403	if (pwp == NULL)
3404		pwp = avl_nearest(tree, where, AVL_AFTER);
3405
3406	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3407		ASSERT(vaddr <=  pwp->wp_vaddr);
3408
3409		if (flags & WA_READ)
3410			pwp->wp_read--;
3411		if (flags & WA_WRITE)
3412			pwp->wp_write--;
3413		if (flags & WA_EXEC)
3414			pwp->wp_exec--;
3415
3416		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3417			/*
3418			 * Reset the hat layer's protections on this page.
3419			 */
3420			if (pwp->wp_oprot != 0) {
3421				uint_t prot = pwp->wp_oprot;
3422
3423				if (pwp->wp_read)
3424					prot &=
3425					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3426				if (pwp->wp_write)
3427					prot &= ~PROT_WRITE;
3428				if (pwp->wp_exec)
3429					prot &=
3430					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3431				if (!(pwp->wp_flags & WP_NOWATCH) &&
3432				    pwp->wp_prot != prot &&
3433				    (pwp->wp_flags & WP_SETPROT) == 0) {
3434					pwp->wp_flags |= WP_SETPROT;
3435					pwp->wp_list = p->p_wprot;
3436					p->p_wprot = pwp;
3437				}
3438				pwp->wp_prot = (uchar_t)prot;
3439			}
3440		} else {
3441			/*
3442			 * No watched areas remain in this page.
3443			 * Reset everything to normal.
3444			 */
3445			if (pwp->wp_oprot != 0) {
3446				pwp->wp_prot = pwp->wp_oprot;
3447				if ((pwp->wp_flags & WP_SETPROT) == 0) {
3448					pwp->wp_flags |= WP_SETPROT;
3449					pwp->wp_list = p->p_wprot;
3450					p->p_wprot = pwp;
3451				}
3452			}
3453		}
3454
3455		pwp = AVL_NEXT(tree, pwp);
3456	}
3457
3458	AS_LOCK_EXIT(as, &as->a_lock);
3459}
3460
3461/*
3462 * Return the original protections for the specified page.
3463 */
3464static void
3465getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3466{
3467	struct watched_page *pwp;
3468	struct watched_page tpw;
3469
3470	ASSERT(AS_LOCK_HELD(as, &as->a_lock));
3471
3472	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3473	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3474		*prot = pwp->wp_oprot;
3475}
3476
3477static prpagev_t *
3478pr_pagev_create(struct seg *seg, int check_noreserve)
3479{
3480	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3481	size_t total_pages = seg_pages(seg);
3482
3483	/*
3484	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
3485	 * 4 or 5 bytes of storage per page, so this means we limit ourself
3486	 * to about a megabyte of kernel heap by default.
3487	 */
3488	pagev->pg_npages = MIN(total_pages, pagev_lim);
3489	pagev->pg_pnbase = 0;
3490
3491	pagev->pg_protv =
3492	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3493
3494	if (check_noreserve)
3495		pagev->pg_incore =
3496		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3497	else
3498		pagev->pg_incore = NULL;
3499
3500	return (pagev);
3501}
3502
3503static void
3504pr_pagev_destroy(prpagev_t *pagev)
3505{
3506	if (pagev->pg_incore != NULL)
3507		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3508
3509	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3510	kmem_free(pagev, sizeof (prpagev_t));
3511}
3512
3513static caddr_t
3514pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3515{
3516	ulong_t lastpg = seg_page(seg, eaddr - 1);
3517	ulong_t pn, pnlim;
3518	caddr_t saddr;
3519	size_t len;
3520
3521	ASSERT(addr >= seg->s_base && addr <= eaddr);
3522
3523	if (addr == eaddr)
3524		return (eaddr);
3525
3526refill:
3527	ASSERT(addr < eaddr);
3528	pagev->pg_pnbase = seg_page(seg, addr);
3529	pnlim = pagev->pg_pnbase + pagev->pg_npages;
3530	saddr = addr;
3531
3532	if (lastpg < pnlim)
3533		len = (size_t)(eaddr - addr);
3534	else
3535		len = pagev->pg_npages * PAGESIZE;
3536
3537	if (pagev->pg_incore != NULL) {
3538		/*
3539		 * INCORE cleverly has different semantics than GETPROT:
3540		 * it returns info on pages up to but NOT including addr + len.
3541		 */
3542		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3543		pn = pagev->pg_pnbase;
3544
3545		do {
3546			/*
3547			 * Guilty knowledge here:  We know that segvn_incore
3548			 * returns more than just the low-order bit that
3549			 * indicates the page is actually in memory.  If any
3550			 * bits are set, then the page has backing store.
3551			 */
3552			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3553				goto out;
3554
3555		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3556
3557		/*
3558		 * If we examined all the pages in the vector but we're not
3559		 * at the end of the segment, take another lap.
3560		 */
3561		if (addr < eaddr)
3562			goto refill;
3563	}
3564
3565	/*
3566	 * Need to take len - 1 because addr + len is the address of the
3567	 * first byte of the page just past the end of what we want.
3568	 */
3569out:
3570	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3571	return (addr);
3572}
3573
3574static caddr_t
3575pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3576    caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3577{
3578	/*
3579	 * Our starting address is either the specified address, or the base
3580	 * address from the start of the pagev.  If the latter is greater,
3581	 * this means a previous call to pr_pagev_fill has already scanned
3582	 * further than the end of the previous mapping.
3583	 */
3584	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3585	caddr_t addr = MAX(*saddrp, base);
3586	ulong_t pn = seg_page(seg, addr);
3587	uint_t prot, nprot;
3588
3589	/*
3590	 * If we're dealing with noreserve pages, then advance addr to
3591	 * the address of the next page which has backing store.
3592	 */
3593	if (pagev->pg_incore != NULL) {
3594		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3595			if ((addr += PAGESIZE) == eaddr) {
3596				*saddrp = addr;
3597				prot = 0;
3598				goto out;
3599			}
3600			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3601				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3602				if (addr == eaddr) {
3603					*saddrp = addr;
3604					prot = 0;
3605					goto out;
3606				}
3607				pn = seg_page(seg, addr);
3608			}
3609		}
3610	}
3611
3612	/*
3613	 * Get the protections on the page corresponding to addr.
3614	 */
3615	pn = seg_page(seg, addr);
3616	ASSERT(pn >= pagev->pg_pnbase);
3617	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3618
3619	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3620	getwatchprot(seg->s_as, addr, &prot);
3621	*saddrp = addr;
3622
3623	/*
3624	 * Now loop until we find a backed page with different protections
3625	 * or we reach the end of this segment.
3626	 */
3627	while ((addr += PAGESIZE) < eaddr) {
3628		/*
3629		 * If pn has advanced to the page number following what we
3630		 * have information on, refill the page vector and reset
3631		 * addr and pn.  If pr_pagev_fill does not return the
3632		 * address of the next page, we have a discontiguity and
3633		 * thus have reached the end of the current mapping.
3634		 */
3635		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3636			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3637			if (naddr != addr)
3638				goto out;
3639			pn = seg_page(seg, addr);
3640		}
3641
3642		/*
3643		 * The previous page's protections are in prot, and it has
3644		 * backing.  If this page is MAP_NORESERVE and has no backing,
3645		 * then end this mapping and return the previous protections.
3646		 */
3647		if (pagev->pg_incore != NULL &&
3648		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3649			break;
3650
3651		/*
3652		 * Otherwise end the mapping if this page's protections (nprot)
3653		 * are different than those in the previous page (prot).
3654		 */
3655		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3656		getwatchprot(seg->s_as, addr, &nprot);
3657
3658		if (nprot != prot)
3659			break;
3660	}
3661
3662out:
3663	*protp = prot;
3664	return (addr);
3665}
3666
3667size_t
3668pr_getsegsize(struct seg *seg, int reserved)
3669{
3670	size_t size = seg->s_size;
3671
3672	/*
3673	 * If we're interested in the reserved space, return the size of the
3674	 * segment itself.  Everything else in this function is a special case
3675	 * to determine the actual underlying size of various segment types.
3676	 */
3677	if (reserved)
3678		return (size);
3679
3680	/*
3681	 * If this is a segvn mapping of a regular file, return the smaller
3682	 * of the segment size and the remaining size of the file beyond
3683	 * the file offset corresponding to seg->s_base.
3684	 */
3685	if (seg->s_ops == &segvn_ops) {
3686		vattr_t vattr;
3687		vnode_t *vp;
3688
3689		vattr.va_mask = AT_SIZE;
3690
3691		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3692		    vp != NULL && vp->v_type == VREG &&
3693		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3694
3695			u_offset_t fsize = vattr.va_size;
3696			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3697
3698			if (fsize < offset)
3699				fsize = 0;
3700			else
3701				fsize -= offset;
3702
3703			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3704
3705			if (fsize < (u_offset_t)size)
3706				size = (size_t)fsize;
3707		}
3708
3709		return (size);
3710	}
3711
3712	/*
3713	 * If this is an ISM shared segment, don't include pages that are
3714	 * beyond the real size of the spt segment that backs it.
3715	 */
3716	if (seg->s_ops == &segspt_shmops)
3717		return (MIN(spt_realsize(seg), size));
3718
3719	/*
3720	 * If this is segment is a mapping from /dev/null, then this is a
3721	 * reservation of virtual address space and has no actual size.
3722	 * Such segments are backed by segdev and have type set to neither
3723	 * MAP_SHARED nor MAP_PRIVATE.
3724	 */
3725	if (seg->s_ops == &segdev_ops &&
3726	    ((SEGOP_GETTYPE(seg, seg->s_base) &
3727	    (MAP_SHARED | MAP_PRIVATE)) == 0))
3728		return (0);
3729
3730	/*
3731	 * If this segment doesn't match one of the special types we handle,
3732	 * just return the size of the segment itself.
3733	 */
3734	return (size);
3735}
3736
3737uint_t
3738pr_getprot(struct seg *seg, int reserved, void **tmp,
3739	caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3740{
3741	struct as *as = seg->s_as;
3742
3743	caddr_t saddr = *saddrp;
3744	caddr_t naddr;
3745
3746	int check_noreserve;
3747	uint_t prot;
3748
3749	union {
3750		struct segvn_data *svd;
3751		struct segdev_data *sdp;
3752		void *data;
3753	} s;
3754
3755	s.data = seg->s_data;
3756
3757	ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3758	ASSERT(saddr >= seg->s_base && saddr < eaddr);
3759	ASSERT(eaddr <= seg->s_base + seg->s_size);
3760
3761	/*
3762	 * Don't include MAP_NORESERVE pages in the address range
3763	 * unless their mappings have actually materialized.
3764	 * We cheat by knowing that segvn is the only segment
3765	 * driver that supports MAP_NORESERVE.
3766	 */
3767	check_noreserve =
3768	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3769	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3770	    (s.svd->flags & MAP_NORESERVE));
3771
3772	/*
3773	 * Examine every page only as a last resort.  We use guilty knowledge
3774	 * of segvn and segdev to avoid this: if there are no per-page
3775	 * protections present in the segment and we don't care about
3776	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3777	 */
3778	if (!check_noreserve && saddr == seg->s_base &&
3779	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3780		prot = s.svd->prot;
3781		getwatchprot(as, saddr, &prot);
3782		naddr = eaddr;
3783
3784	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3785	    s.sdp != NULL && s.sdp->pageprot == 0) {
3786		prot = s.sdp->prot;
3787		getwatchprot(as, saddr, &prot);
3788		naddr = eaddr;
3789
3790	} else {
3791		prpagev_t *pagev;
3792
3793		/*
3794		 * If addr is sitting at the start of the segment, then
3795		 * create a page vector to store protection and incore
3796		 * information for pages in the segment, and fill it.
3797		 * Otherwise, we expect *tmp to address the prpagev_t
3798		 * allocated by a previous call to this function.
3799		 */
3800		if (saddr == seg->s_base) {
3801			pagev = pr_pagev_create(seg, check_noreserve);
3802			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3803
3804			ASSERT(*tmp == NULL);
3805			*tmp = pagev;
3806
3807			ASSERT(saddr <= eaddr);
3808			*saddrp = saddr;
3809
3810			if (saddr == eaddr) {
3811				naddr = saddr;
3812				prot = 0;
3813				goto out;
3814			}
3815
3816		} else {
3817			ASSERT(*tmp != NULL);
3818			pagev = (prpagev_t *)*tmp;
3819		}
3820
3821		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3822		ASSERT(naddr <= eaddr);
3823	}
3824
3825out:
3826	if (naddr == eaddr)
3827		pr_getprot_done(tmp);
3828	*naddrp = naddr;
3829	return (prot);
3830}
3831
3832void
3833pr_getprot_done(void **tmp)
3834{
3835	if (*tmp != NULL) {
3836		pr_pagev_destroy((prpagev_t *)*tmp);
3837		*tmp = NULL;
3838	}
3839}
3840
3841/*
3842 * Return true iff the vnode is a /proc file from the object directory.
3843 */
3844int
3845pr_isobject(vnode_t *vp)
3846{
3847	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3848}
3849
3850/*
3851 * Return true iff the vnode is a /proc file opened by the process itself.
3852 */
3853int
3854pr_isself(vnode_t *vp)
3855{
3856	/*
3857	 * XXX: To retain binary compatibility with the old
3858	 * ioctl()-based version of /proc, we exempt self-opens
3859	 * of /proc/<pid> from being marked close-on-exec.
3860	 */
3861	return (vn_matchops(vp, prvnodeops) &&
3862	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
3863	    VTOP(vp)->pr_type != PR_PIDDIR);
3864}
3865
3866static ssize_t
3867pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3868{
3869	ssize_t pagesize, hatsize;
3870
3871	ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
3872	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3873	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3874	ASSERT(saddr < eaddr);
3875
3876	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3877	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3878	ASSERT(pagesize != 0);
3879
3880	if (pagesize == -1)
3881		pagesize = PAGESIZE;
3882
3883	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3884
3885	while (saddr < eaddr) {
3886		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3887			break;
3888		ASSERT(IS_P2ALIGNED(saddr, pagesize));
3889		saddr += pagesize;
3890	}
3891
3892	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
3893	return (hatsize);
3894}
3895
3896/*
3897 * Return an array of structures with extended memory map information.
3898 * We allocate here; the caller must deallocate.
3899 */
3900int
3901prgetxmap(proc_t *p, list_t *iolhead)
3902{
3903	struct as *as = p->p_as;
3904	prxmap_t *mp;
3905	struct seg *seg;
3906	struct seg *brkseg, *stkseg;
3907	struct vnode *vp;
3908	struct vattr vattr;
3909	uint_t prot;
3910
3911	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
3912
3913	/*
3914	 * Request an initial buffer size that doesn't waste memory
3915	 * if the address space has only a small number of segments.
3916	 */
3917	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
3918
3919	if ((seg = AS_SEGFIRST(as)) == NULL)
3920		return (0);
3921
3922	brkseg = break_seg(p);
3923	stkseg = as_segat(as, prgetstackbase(p));
3924
3925	do {
3926		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
3927		caddr_t saddr, naddr, baddr;
3928		void *tmp = NULL;
3929		ssize_t psz;
3930		char *parr;
3931		uint64_t npages;
3932		uint64_t pagenum;
3933
3934		/*
3935		 * Segment loop part one: iterate from the base of the segment
3936		 * to its end, pausing at each address boundary (baddr) between
3937		 * ranges that have different virtual memory protections.
3938		 */
3939		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
3940			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
3941			ASSERT(baddr >= saddr && baddr <= eaddr);
3942
3943			/*
3944			 * Segment loop part two: iterate from the current
3945			 * position to the end of the protection boundary,
3946			 * pausing at each address boundary (naddr) between
3947			 * ranges that have different underlying page sizes.
3948			 */
3949			for (; saddr < baddr; saddr = naddr) {
3950				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
3951				ASSERT(naddr >= saddr && naddr <= baddr);
3952
3953				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
3954
3955				mp->pr_vaddr = (uintptr_t)saddr;
3956				mp->pr_size = naddr - saddr;
3957				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
3958				mp->pr_mflags = 0;
3959				if (prot & PROT_READ)
3960					mp->pr_mflags |= MA_READ;
3961				if (prot & PROT_WRITE)
3962					mp->pr_mflags |= MA_WRITE;
3963				if (prot & PROT_EXEC)
3964					mp->pr_mflags |= MA_EXEC;
3965				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
3966					mp->pr_mflags |= MA_SHARED;
3967				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
3968					mp->pr_mflags |= MA_NORESERVE;
3969				if (seg->s_ops == &segspt_shmops ||
3970				    (seg->s_ops == &segvn_ops &&
3971				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
3972				    vp == NULL)))
3973					mp->pr_mflags |= MA_ANON;
3974				if (seg == brkseg)
3975					mp->pr_mflags |= MA_BREAK;
3976				else if (seg == stkseg)
3977					mp->pr_mflags |= MA_STACK;
3978				if (seg->s_ops == &segspt_shmops)
3979					mp->pr_mflags |= MA_ISM | MA_SHM;
3980
3981				mp->pr_pagesize = PAGESIZE;
3982				if (psz == -1) {
3983					mp->pr_hatpagesize = 0;
3984				} else {
3985					mp->pr_hatpagesize = psz;
3986				}
3987
3988				/*
3989				 * Manufacture a filename for the "object" dir.
3990				 */
3991				mp->pr_dev = PRNODEV;
3992				vattr.va_mask = AT_FSID|AT_NODEID;
3993				if (seg->s_ops == &segvn_ops &&
3994				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
3995				    vp != NULL && vp->v_type == VREG &&
3996				    VOP_GETATTR(vp, &vattr, 0, CRED(),
3997				    NULL) == 0) {
3998					mp->pr_dev = vattr.va_fsid;
3999					mp->pr_ino = vattr.va_nodeid;
4000					if (vp == p->p_exec)
4001						(void) strcpy(mp->pr_mapname,
4002						    "a.out");
4003					else
4004						pr_object_name(mp->pr_mapname,
4005						    vp, &vattr);
4006				}
4007
4008				/*
4009				 * Get the SysV shared memory id, if any.
4010				 */
4011				if ((mp->pr_mflags & MA_SHARED) &&
4012				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4013				    seg->s_base)) != SHMID_NONE) {
4014					if (mp->pr_shmid == SHMID_FREE)
4015						mp->pr_shmid = -1;
4016
4017					mp->pr_mflags |= MA_SHM;
4018				} else {
4019					mp->pr_shmid = -1;
4020				}
4021
4022				npages = ((uintptr_t)(naddr - saddr)) >>
4023				    PAGESHIFT;
4024				parr = kmem_zalloc(npages, KM_SLEEP);
4025
4026				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4027
4028				for (pagenum = 0; pagenum < npages; pagenum++) {
4029					if (parr[pagenum] & SEG_PAGE_INCORE)
4030						mp->pr_rss++;
4031					if (parr[pagenum] & SEG_PAGE_ANON)
4032						mp->pr_anon++;
4033					if (parr[pagenum] & SEG_PAGE_LOCKED)
4034						mp->pr_locked++;
4035				}
4036				kmem_free(parr, npages);
4037			}
4038		}
4039		ASSERT(tmp == NULL);
4040	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4041
4042	return (0);
4043}
4044
4045/*
4046 * Return the process's credentials.  We don't need a 32-bit equivalent of
4047 * this function because prcred_t and prcred32_t are actually the same.
4048 */
4049void
4050prgetcred(proc_t *p, prcred_t *pcrp)
4051{
4052	mutex_enter(&p->p_crlock);
4053	cred2prcred(p->p_cred, pcrp);
4054	mutex_exit(&p->p_crlock);
4055}
4056
4057/*
4058 * Compute actual size of the prpriv_t structure.
4059 */
4060
4061size_t
4062prgetprivsize(void)
4063{
4064	return (priv_prgetprivsize(NULL));
4065}
4066
4067/*
4068 * Return the process's privileges.  We don't need a 32-bit equivalent of
4069 * this function because prpriv_t and prpriv32_t are actually the same.
4070 */
4071void
4072prgetpriv(proc_t *p, prpriv_t *pprp)
4073{
4074	mutex_enter(&p->p_crlock);
4075	cred2prpriv(p->p_cred, pprp);
4076	mutex_exit(&p->p_crlock);
4077}
4078
4079#ifdef _SYSCALL32_IMPL
4080/*
4081 * Return an array of structures with HAT memory map information.
4082 * We allocate here; the caller must deallocate.
4083 */
4084int
4085prgetxmap32(proc_t *p, list_t *iolhead)
4086{
4087	struct as *as = p->p_as;
4088	prxmap32_t *mp;
4089	struct seg *seg;
4090	struct seg *brkseg, *stkseg;
4091	struct vnode *vp;
4092	struct vattr vattr;
4093	uint_t prot;
4094
4095	ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
4096
4097	/*
4098	 * Request an initial buffer size that doesn't waste memory
4099	 * if the address space has only a small number of segments.
4100	 */
4101	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4102
4103	if ((seg = AS_SEGFIRST(as)) == NULL)
4104		return (0);
4105
4106	brkseg = break_seg(p);
4107	stkseg = as_segat(as, prgetstackbase(p));
4108
4109	do {
4110		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4111		caddr_t saddr, naddr, baddr;
4112		void *tmp = NULL;
4113		ssize_t psz;
4114		char *parr;
4115		uint64_t npages;
4116		uint64_t pagenum;
4117
4118		/*
4119		 * Segment loop part one: iterate from the base of the segment
4120		 * to its end, pausing at each address boundary (baddr) between
4121		 * ranges that have different virtual memory protections.
4122		 */
4123		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4124			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4125			ASSERT(baddr >= saddr && baddr <= eaddr);
4126
4127			/*
4128			 * Segment loop part two: iterate from the current
4129			 * position to the end of the protection boundary,
4130			 * pausing at each address boundary (naddr) between
4131			 * ranges that have different underlying page sizes.
4132			 */
4133			for (; saddr < baddr; saddr = naddr) {
4134				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4135				ASSERT(naddr >= saddr && naddr <= baddr);
4136
4137				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4138
4139				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4140				mp->pr_size = (size32_t)(naddr - saddr);
4141				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4142				mp->pr_mflags = 0;
4143				if (prot & PROT_READ)
4144					mp->pr_mflags |= MA_READ;
4145				if (prot & PROT_WRITE)
4146					mp->pr_mflags |= MA_WRITE;
4147				if (prot & PROT_EXEC)
4148					mp->pr_mflags |= MA_EXEC;
4149				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4150					mp->pr_mflags |= MA_SHARED;
4151				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4152					mp->pr_mflags |= MA_NORESERVE;
4153				if (seg->s_ops == &segspt_shmops ||
4154				    (seg->s_ops == &segvn_ops &&
4155				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4156				    vp == NULL)))
4157					mp->pr_mflags |= MA_ANON;
4158				if (seg == brkseg)
4159					mp->pr_mflags |= MA_BREAK;
4160				else if (seg == stkseg)
4161					mp->pr_mflags |= MA_STACK;
4162				if (seg->s_ops == &segspt_shmops)
4163					mp->pr_mflags |= MA_ISM | MA_SHM;
4164
4165				mp->pr_pagesize = PAGESIZE;
4166				if (psz == -1) {
4167					mp->pr_hatpagesize = 0;
4168				} else {
4169					mp->pr_hatpagesize = psz;
4170				}
4171
4172				/*
4173				 * Manufacture a filename for the "object" dir.
4174				 */
4175				mp->pr_dev = PRNODEV32;
4176				vattr.va_mask = AT_FSID|AT_NODEID;
4177				if (seg->s_ops == &segvn_ops &&
4178				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4179				    vp != NULL && vp->v_type == VREG &&
4180				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4181				    NULL) == 0) {
4182					(void) cmpldev(&mp->pr_dev,
4183					    vattr.va_fsid);
4184					mp->pr_ino = vattr.va_nodeid;
4185					if (vp == p->p_exec)
4186						(void) strcpy(mp->pr_mapname,
4187						    "a.out");
4188					else
4189						pr_object_name(mp->pr_mapname,
4190						    vp, &vattr);
4191				}
4192
4193				/*
4194				 * Get the SysV shared memory id, if any.
4195				 */
4196				if ((mp->pr_mflags & MA_SHARED) &&
4197				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4198				    seg->s_base)) != SHMID_NONE) {
4199					if (mp->pr_shmid == SHMID_FREE)
4200						mp->pr_shmid = -1;
4201
4202					mp->pr_mflags |= MA_SHM;
4203				} else {
4204					mp->pr_shmid = -1;
4205				}
4206
4207				npages = ((uintptr_t)(naddr - saddr)) >>
4208				    PAGESHIFT;
4209				parr = kmem_zalloc(npages, KM_SLEEP);
4210
4211				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4212
4213				for (pagenum = 0; pagenum < npages; pagenum++) {
4214					if (parr[pagenum] & SEG_PAGE_INCORE)
4215						mp->pr_rss++;
4216					if (parr[pagenum] & SEG_PAGE_ANON)
4217						mp->pr_anon++;
4218					if (parr[pagenum] & SEG_PAGE_LOCKED)
4219						mp->pr_locked++;
4220				}
4221				kmem_free(parr, npages);
4222			}
4223		}
4224		ASSERT(tmp == NULL);
4225	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4226
4227	return (0);
4228}
4229#endif	/* _SYSCALL32_IMPL */
4230