1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*	Copyright (c) 1984,	 1986, 1987, 1988, 1989 AT&T	*/
27/*	  All Rights Reserved  	*/
28
29#include <sys/types.h>
30#include <sys/param.h>
31#include <sys/time.h>
32#include <sys/cred.h>
33#include <sys/policy.h>
34#include <sys/debug.h>
35#include <sys/dirent.h>
36#include <sys/errno.h>
37#include <sys/file.h>
38#include <sys/inline.h>
39#include <sys/kmem.h>
40#include <sys/pathname.h>
41#include <sys/proc.h>
42#include <sys/brand.h>
43#include <sys/signal.h>
44#include <sys/stat.h>
45#include <sys/sysmacros.h>
46#include <sys/systm.h>
47#include <sys/zone.h>
48#include <sys/uio.h>
49#include <sys/var.h>
50#include <sys/mode.h>
51#include <sys/poll.h>
52#include <sys/user.h>
53#include <sys/vfs.h>
54#include <sys/vfs_opreg.h>
55#include <sys/gfs.h>
56#include <sys/vnode.h>
57#include <sys/fault.h>
58#include <sys/syscall.h>
59#include <sys/procfs.h>
60#include <sys/atomic.h>
61#include <sys/cmn_err.h>
62#include <sys/contract_impl.h>
63#include <sys/ctfs.h>
64#include <sys/avl.h>
65#include <fs/fs_subr.h>
66#include <vm/rm.h>
67#include <vm/as.h>
68#include <vm/seg.h>
69#include <vm/seg_vn.h>
70#include <vm/hat.h>
71#include <fs/proc/prdata.h>
72#if defined(__sparc)
73#include <sys/regset.h>
74#endif
75#if defined(__x86)
76#include <sys/sysi86.h>
77#endif
78
79/*
80 * Created by prinit.
81 */
82vnodeops_t *prvnodeops;
83
84/*
85 * Directory characteristics (patterned after the s5 file system).
86 */
87#define	PRROOTINO	2
88
89#define	PRDIRSIZE	14
90struct prdirect {
91	ushort_t	d_ino;
92	char		d_name[PRDIRSIZE];
93};
94
95#define	PRSDSIZE	(sizeof (struct prdirect))
96
97/*
98 * Directory characteristics.
99 */
100typedef struct prdirent {
101	ino64_t		d_ino;		/* "inode number" of entry */
102	off64_t		d_off;		/* offset of disk directory entry */
103	unsigned short	d_reclen;	/* length of this record */
104	char		d_name[14];	/* name of file */
105} prdirent_t;
106
107/*
108 * Contents of a /proc/<pid> directory.
109 * Reuse d_ino field for the /proc file type.
110 */
111static prdirent_t piddir[] = {
112	{ PR_PIDDIR,	 1 * sizeof (prdirent_t), sizeof (prdirent_t),
113		"." },
114	{ PR_PROCDIR,	 2 * sizeof (prdirent_t), sizeof (prdirent_t),
115		".." },
116	{ PR_AS,	 3 * sizeof (prdirent_t), sizeof (prdirent_t),
117		"as" },
118	{ PR_CTL,	 4 * sizeof (prdirent_t), sizeof (prdirent_t),
119		"ctl" },
120	{ PR_STATUS,	 5 * sizeof (prdirent_t), sizeof (prdirent_t),
121		"status" },
122	{ PR_LSTATUS,	 6 * sizeof (prdirent_t), sizeof (prdirent_t),
123		"lstatus" },
124	{ PR_PSINFO,	 7 * sizeof (prdirent_t), sizeof (prdirent_t),
125		"psinfo" },
126	{ PR_LPSINFO,	 8 * sizeof (prdirent_t), sizeof (prdirent_t),
127		"lpsinfo" },
128	{ PR_MAP,	 9 * sizeof (prdirent_t), sizeof (prdirent_t),
129		"map" },
130	{ PR_RMAP,	10 * sizeof (prdirent_t), sizeof (prdirent_t),
131		"rmap" },
132	{ PR_XMAP,	11 * sizeof (prdirent_t), sizeof (prdirent_t),
133		"xmap" },
134	{ PR_CRED,	12 * sizeof (prdirent_t), sizeof (prdirent_t),
135		"cred" },
136	{ PR_SIGACT,	13 * sizeof (prdirent_t), sizeof (prdirent_t),
137		"sigact" },
138	{ PR_AUXV,	14 * sizeof (prdirent_t), sizeof (prdirent_t),
139		"auxv" },
140	{ PR_USAGE,	15 * sizeof (prdirent_t), sizeof (prdirent_t),
141		"usage" },
142	{ PR_LUSAGE,	16 * sizeof (prdirent_t), sizeof (prdirent_t),
143		"lusage" },
144	{ PR_PAGEDATA,	17 * sizeof (prdirent_t), sizeof (prdirent_t),
145		"pagedata" },
146	{ PR_WATCH,	18 * sizeof (prdirent_t), sizeof (prdirent_t),
147		"watch" },
148	{ PR_CURDIR,	19 * sizeof (prdirent_t), sizeof (prdirent_t),
149		"cwd" },
150	{ PR_ROOTDIR,	20 * sizeof (prdirent_t), sizeof (prdirent_t),
151		"root" },
152	{ PR_FDDIR,	21 * sizeof (prdirent_t), sizeof (prdirent_t),
153		"fd" },
154	{ PR_OBJECTDIR,	22 * sizeof (prdirent_t), sizeof (prdirent_t),
155		"object" },
156	{ PR_LWPDIR,	23 * sizeof (prdirent_t), sizeof (prdirent_t),
157		"lwp" },
158	{ PR_PRIV,	24 * sizeof (prdirent_t), sizeof (prdirent_t),
159		"priv" },
160	{ PR_PATHDIR,	25 * sizeof (prdirent_t), sizeof (prdirent_t),
161		"path" },
162	{ PR_CTDIR,	26 * sizeof (prdirent_t), sizeof (prdirent_t),
163		"contracts" },
164#if defined(__x86)
165	{ PR_LDT,	27 * sizeof (prdirent_t), sizeof (prdirent_t),
166		"ldt" },
167#endif
168};
169
170#define	NPIDDIRFILES	(sizeof (piddir) / sizeof (piddir[0]) - 2)
171
172/*
173 * Contents of a /proc/<pid>/lwp/<lwpid> directory.
174 */
175static prdirent_t lwpiddir[] = {
176	{ PR_LWPIDDIR,	 1 * sizeof (prdirent_t), sizeof (prdirent_t),
177		"." },
178	{ PR_LWPDIR,	 2 * sizeof (prdirent_t), sizeof (prdirent_t),
179		".." },
180	{ PR_LWPCTL,	 3 * sizeof (prdirent_t), sizeof (prdirent_t),
181		"lwpctl" },
182	{ PR_LWPSTATUS,	 4 * sizeof (prdirent_t), sizeof (prdirent_t),
183		"lwpstatus" },
184	{ PR_LWPSINFO,	 5 * sizeof (prdirent_t), sizeof (prdirent_t),
185		"lwpsinfo" },
186	{ PR_LWPUSAGE,	 6 * sizeof (prdirent_t), sizeof (prdirent_t),
187		"lwpusage" },
188	{ PR_XREGS,	 7 * sizeof (prdirent_t), sizeof (prdirent_t),
189		"xregs" },
190	{ PR_TMPLDIR,	 8 * sizeof (prdirent_t), sizeof (prdirent_t),
191		"templates" },
192#if defined(__sparc)
193	{ PR_GWINDOWS,	 9 * sizeof (prdirent_t), sizeof (prdirent_t),
194		"gwindows" },
195	{ PR_ASRS,	10 * sizeof (prdirent_t), sizeof (prdirent_t),
196		"asrs" },
197#endif
198};
199
200#define	NLWPIDDIRFILES	(sizeof (lwpiddir) / sizeof (lwpiddir[0]) - 2)
201
202/*
203 * Span of entries in the array files (lstatus, lpsinfo, lusage).
204 * We make the span larger than the size of the structure on purpose,
205 * to make sure that programs cannot use the structure size by mistake.
206 * Align _ILP32 structures at 8 bytes, _LP64 structures at 16 bytes.
207 */
208#ifdef _LP64
209#define	LSPAN(type)	(round16(sizeof (type)) + 16)
210#define	LSPAN32(type)	(round8(sizeof (type)) + 8)
211#else
212#define	LSPAN(type)	(round8(sizeof (type)) + 8)
213#endif
214
215static void rebuild_objdir(struct as *);
216static void prfreecommon(prcommon_t *);
217static int praccess(vnode_t *, int, int, cred_t *, caller_context_t *);
218
219static int
220propen(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
221{
222	vnode_t *vp = *vpp;
223	prnode_t *pnp = VTOP(vp);
224	prcommon_t *pcp = pnp->pr_pcommon;
225	prnodetype_t type = pnp->pr_type;
226	vnode_t *rvp;
227	vtype_t vtype;
228	proc_t *p;
229	int error = 0;
230	prnode_t *npnp = NULL;
231
232	/*
233	 * Nothing to do for the /proc directory itself.
234	 */
235	if (type == PR_PROCDIR)
236		return (0);
237
238	/*
239	 * If we are opening an underlying mapped object, reject opens
240	 * for writing regardless of the objects's access modes.
241	 * If we are opening a file in the /proc/pid/fd directory,
242	 * reject the open for any but a regular file or directory.
243	 * Just do it if we are opening the current or root directory.
244	 */
245	switch (type) {
246	case PR_OBJECT:
247	case PR_FD:
248	case PR_CURDIR:
249	case PR_ROOTDIR:
250		rvp = pnp->pr_realvp;
251		vtype = rvp->v_type;
252		if ((type == PR_OBJECT && (flag & FWRITE)) ||
253		    (type == PR_FD && vtype != VREG && vtype != VDIR))
254			error = EACCES;
255		else {
256			/*
257			 * Need to hold rvp since VOP_OPEN() may release it.
258			 */
259			VN_HOLD(rvp);
260			error = VOP_OPEN(&rvp, flag, cr, ct);
261			if (error) {
262				VN_RELE(rvp);
263			} else {
264				*vpp = rvp;
265				VN_RELE(vp);
266			}
267		}
268		return (error);
269	default:
270		break;
271	}
272
273	/*
274	 * If we are opening the pagedata file, allocate a prnode now
275	 * to avoid calling kmem_alloc() while holding p->p_lock.
276	 */
277	if (type == PR_PAGEDATA || type == PR_OPAGEDATA)
278		npnp = prgetnode(vp, type);
279
280	/*
281	 * If the process exists, lock it now.
282	 * Otherwise we have a race condition with prclose().
283	 */
284	p = pr_p_lock(pnp);
285	mutex_exit(&pr_pidlock);
286	if (p == NULL) {
287		if (npnp != NULL)
288			prfreenode(npnp);
289		return (ENOENT);
290	}
291	ASSERT(p == pcp->prc_proc);
292	ASSERT(p->p_proc_flag & P_PR_LOCK);
293
294	/*
295	 * Maintain a count of opens for write.  Allow exactly one
296	 * O_WRITE|O_EXCL request and fail subsequent ones.
297	 * Don't fail opens of old (bletch!) /proc lwp files.
298	 * Special case for open by the process itself:
299	 * Always allow the open by self and discount this
300	 * open for other opens for writing.
301	 */
302	if (flag & FWRITE) {
303		if (p == curproc) {
304			pcp->prc_selfopens++;
305			pnp->pr_flags |= PR_ISSELF;
306		} else if (type == PR_LWPIDFILE) {
307			/* EMPTY */;
308		} else if (flag & FEXCL) {
309			if (pcp->prc_writers > pcp->prc_selfopens) {
310				error = EBUSY;
311				goto out;
312			}
313			/* semantic for old /proc interface */
314			if (type == PR_PIDDIR)
315				pcp->prc_flags |= PRC_EXCL;
316		} else if (pcp->prc_flags & PRC_EXCL) {
317			ASSERT(pcp->prc_writers > pcp->prc_selfopens);
318			error = secpolicy_proc_excl_open(cr);
319			if (error)
320				goto out;
321		}
322		pcp->prc_writers++;
323		/*
324		 * The vnode may have become invalid between the
325		 * VOP_LOOKUP() of the /proc vnode and the VOP_OPEN().
326		 * If so, do now what prinvalidate() should have done.
327		 */
328		if ((pnp->pr_flags & PR_INVAL) ||
329		    (type == PR_PIDDIR &&
330		    (VTOP(pnp->pr_pidfile)->pr_flags & PR_INVAL))) {
331			if (p != curproc)
332				pcp->prc_selfopens++;
333			ASSERT(pcp->prc_selfopens <= pcp->prc_writers);
334			if (pcp->prc_selfopens == pcp->prc_writers)
335				pcp->prc_flags &= ~PRC_EXCL;
336		}
337	}
338
339	/*
340	 * Do file-specific things.
341	 */
342	switch (type) {
343	default:
344		break;
345	case PR_PAGEDATA:
346	case PR_OPAGEDATA:
347		/*
348		 * Enable data collection for page data file;
349		 * get unique id from the hat layer.
350		 */
351		{
352			int id;
353
354			/*
355			 * Drop p->p_lock to call hat_startstat()
356			 */
357			mutex_exit(&p->p_lock);
358			if ((p->p_flag & SSYS) || p->p_as == &kas ||
359			    (id = hat_startstat(p->p_as)) == -1) {
360				mutex_enter(&p->p_lock);
361				error = ENOMEM;
362			} else if (pnp->pr_hatid == 0) {
363				mutex_enter(&p->p_lock);
364				pnp->pr_hatid = (uint_t)id;
365			} else {
366				mutex_enter(&p->p_lock);
367				/*
368				 * Use our newly allocated prnode.
369				 */
370				npnp->pr_hatid = (uint_t)id;
371				/*
372				 * prgetnode() initialized most of the prnode.
373				 * Duplicate the remainder.
374				 */
375				npnp->pr_ino = pnp->pr_ino;
376				npnp->pr_common = pnp->pr_common;
377				npnp->pr_pcommon = pnp->pr_pcommon;
378				npnp->pr_parent = pnp->pr_parent;
379				VN_HOLD(npnp->pr_parent);
380				npnp->pr_index = pnp->pr_index;
381
382				npnp->pr_next = p->p_plist;
383				p->p_plist = PTOV(npnp);
384
385				VN_RELE(PTOV(pnp));
386				pnp = npnp;
387				npnp = NULL;
388				*vpp = PTOV(pnp);
389			}
390		}
391		break;
392	}
393
394out:
395	prunlock(pnp);
396
397	if (npnp != NULL)
398		prfreenode(npnp);
399	return (error);
400}
401
402/* ARGSUSED */
403static int
404prclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
405	caller_context_t *ct)
406{
407	prnode_t *pnp = VTOP(vp);
408	prcommon_t *pcp = pnp->pr_pcommon;
409	prnodetype_t type = pnp->pr_type;
410	proc_t *p;
411	kthread_t *t;
412	user_t *up;
413
414	/*
415	 * Nothing to do for the /proc directory itself.
416	 */
417	if (type == PR_PROCDIR)
418		return (0);
419
420	ASSERT(type != PR_OBJECT && type != PR_FD &&
421	    type != PR_CURDIR && type != PR_ROOTDIR);
422
423	/*
424	 * If the process exists, lock it now.
425	 * Otherwise we have a race condition with propen().
426	 * Hold pr_pidlock across the reference to prc_selfopens,
427	 * and prc_writers in case there is no process anymore,
428	 * to cover the case of concurrent calls to prclose()
429	 * after the process has been reaped by freeproc().
430	 */
431	p = pr_p_lock(pnp);
432
433	/*
434	 * There is nothing more to do until the last close of
435	 * the file table entry except to clear the pr_owner
436	 * field of the prnode and notify any waiters
437	 * (their file descriptor may have just been closed).
438	 */
439	if (count > 1) {
440		mutex_exit(&pr_pidlock);
441		if (pnp->pr_owner == curproc && !fisopen(vp))
442			pnp->pr_owner = NULL;
443		if (p != NULL) {
444			prnotify(vp);
445			prunlock(pnp);
446		}
447		return (0);
448	}
449
450	/*
451	 * Decrement the count of self-opens for writing.
452	 * Decrement the total count of opens for writing.
453	 * Cancel exclusive opens when only self-opens remain.
454	 */
455	if (flag & FWRITE) {
456		/*
457		 * prc_selfopens also contains the count of
458		 * invalid writers.  See prinvalidate().
459		 */
460		if ((pnp->pr_flags & (PR_ISSELF|PR_INVAL)) ||
461		    (type == PR_PIDDIR &&
462		    (VTOP(pnp->pr_pidfile)->pr_flags & PR_INVAL))) {
463			ASSERT(pcp->prc_selfopens != 0);
464			--pcp->prc_selfopens;
465		}
466		ASSERT(pcp->prc_writers != 0);
467		if (--pcp->prc_writers == pcp->prc_selfopens)
468			pcp->prc_flags &= ~PRC_EXCL;
469	}
470	ASSERT(pcp->prc_writers >= pcp->prc_selfopens);
471	mutex_exit(&pr_pidlock);
472	if (pnp->pr_owner == curproc && !fisopen(vp))
473		pnp->pr_owner = NULL;
474
475	/*
476	 * If there is no process, there is nothing more to do.
477	 */
478	if (p == NULL)
479		return (0);
480
481	ASSERT(p == pcp->prc_proc);
482	prnotify(vp);	/* notify waiters */
483
484	/*
485	 * Do file-specific things.
486	 */
487	switch (type) {
488	default:
489		break;
490	case PR_PAGEDATA:
491	case PR_OPAGEDATA:
492		/*
493		 * This is a page data file.
494		 * Free the hat level statistics.
495		 * Drop p->p_lock before calling hat_freestat().
496		 */
497		mutex_exit(&p->p_lock);
498		if (p->p_as != &kas && pnp->pr_hatid != 0)
499			hat_freestat(p->p_as, pnp->pr_hatid);
500		mutex_enter(&p->p_lock);
501		pnp->pr_hatid = 0;
502		break;
503	}
504
505	/*
506	 * On last close of all writable file descriptors,
507	 * perform run-on-last-close and/or kill-on-last-close logic.
508	 * Can't do this is the /proc agent lwp still exists.
509	 */
510	if (pcp->prc_writers == 0 &&
511	    p->p_agenttp == NULL &&
512	    !(pcp->prc_flags & PRC_DESTROY) &&
513	    p->p_stat != SZOMB &&
514	    (p->p_proc_flag & (P_PR_RUNLCL|P_PR_KILLCL))) {
515		int killproc;
516
517		/*
518		 * Cancel any watchpoints currently in effect.
519		 * The process might disappear during this operation.
520		 */
521		if (pr_cancel_watch(pnp) == NULL)
522			return (0);
523		/*
524		 * If any tracing flags are set, clear them.
525		 */
526		if (p->p_proc_flag & P_PR_TRACE) {
527			up = PTOU(p);
528			premptyset(&up->u_entrymask);
529			premptyset(&up->u_exitmask);
530			up->u_systrap = 0;
531		}
532		premptyset(&p->p_sigmask);
533		premptyset(&p->p_fltmask);
534		killproc = (p->p_proc_flag & P_PR_KILLCL);
535		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
536		/*
537		 * Cancel any outstanding single-step requests.
538		 */
539		if ((t = p->p_tlist) != NULL) {
540			/*
541			 * Drop p_lock because prnostep() touches the stack.
542			 * The loop is safe because the process is P_PR_LOCK'd.
543			 */
544			mutex_exit(&p->p_lock);
545			do {
546				prnostep(ttolwp(t));
547			} while ((t = t->t_forw) != p->p_tlist);
548			mutex_enter(&p->p_lock);
549		}
550		/*
551		 * Set runnable all lwps stopped by /proc.
552		 */
553		if (killproc)
554			sigtoproc(p, NULL, SIGKILL);
555		else
556			allsetrun(p);
557	}
558
559	prunlock(pnp);
560	return (0);
561}
562
563/*
564 * Array of read functions, indexed by /proc file type.
565 */
566static int pr_read_inval(), pr_read_as(), pr_read_status(),
567	pr_read_lstatus(), pr_read_psinfo(), pr_read_lpsinfo(),
568	pr_read_map(), pr_read_rmap(), pr_read_xmap(),
569	pr_read_cred(), pr_read_sigact(), pr_read_auxv(),
570#if defined(__x86)
571	pr_read_ldt(),
572#endif
573	pr_read_usage(), pr_read_lusage(), pr_read_pagedata(),
574	pr_read_watch(), pr_read_lwpstatus(), pr_read_lwpsinfo(),
575	pr_read_lwpusage(), pr_read_xregs(), pr_read_priv(),
576#if defined(__sparc)
577	pr_read_gwindows(), pr_read_asrs(),
578#endif
579	pr_read_piddir(), pr_read_pidfile(), pr_read_opagedata();
580
581static int (*pr_read_function[PR_NFILES])() = {
582	pr_read_inval,		/* /proc				*/
583	pr_read_inval,		/* /proc/self				*/
584	pr_read_piddir,		/* /proc/<pid> (old /proc read())	*/
585	pr_read_as,		/* /proc/<pid>/as			*/
586	pr_read_inval,		/* /proc/<pid>/ctl			*/
587	pr_read_status,		/* /proc/<pid>/status			*/
588	pr_read_lstatus,	/* /proc/<pid>/lstatus			*/
589	pr_read_psinfo,		/* /proc/<pid>/psinfo			*/
590	pr_read_lpsinfo,	/* /proc/<pid>/lpsinfo			*/
591	pr_read_map,		/* /proc/<pid>/map			*/
592	pr_read_rmap,		/* /proc/<pid>/rmap			*/
593	pr_read_xmap,		/* /proc/<pid>/xmap			*/
594	pr_read_cred,		/* /proc/<pid>/cred			*/
595	pr_read_sigact,		/* /proc/<pid>/sigact			*/
596	pr_read_auxv,		/* /proc/<pid>/auxv			*/
597#if defined(__x86)
598	pr_read_ldt,		/* /proc/<pid>/ldt			*/
599#endif
600	pr_read_usage,		/* /proc/<pid>/usage			*/
601	pr_read_lusage,		/* /proc/<pid>/lusage			*/
602	pr_read_pagedata,	/* /proc/<pid>/pagedata			*/
603	pr_read_watch,		/* /proc/<pid>/watch			*/
604	pr_read_inval,		/* /proc/<pid>/cwd			*/
605	pr_read_inval,		/* /proc/<pid>/root			*/
606	pr_read_inval,		/* /proc/<pid>/fd			*/
607	pr_read_inval,		/* /proc/<pid>/fd/nn			*/
608	pr_read_inval,		/* /proc/<pid>/object			*/
609	pr_read_inval,		/* /proc/<pid>/object/xxx		*/
610	pr_read_inval,		/* /proc/<pid>/lwp			*/
611	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>		*/
612	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>/lwpctl	*/
613	pr_read_lwpstatus,	/* /proc/<pid>/lwp/<lwpid>/lwpstatus	*/
614	pr_read_lwpsinfo,	/* /proc/<pid>/lwp/<lwpid>/lwpsinfo	*/
615	pr_read_lwpusage,	/* /proc/<pid>/lwp/<lwpid>/lwpusage	*/
616	pr_read_xregs,		/* /proc/<pid>/lwp/<lwpid>/xregs	*/
617	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>/templates	*/
618	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>/templates/<id> */
619#if defined(__sparc)
620	pr_read_gwindows,	/* /proc/<pid>/lwp/<lwpid>/gwindows	*/
621	pr_read_asrs,		/* /proc/<pid>/lwp/<lwpid>/asrs		*/
622#endif
623	pr_read_priv,		/* /proc/<pid>/priv			*/
624	pr_read_inval,		/* /proc/<pid>/path			*/
625	pr_read_inval,		/* /proc/<pid>/path/xxx			*/
626	pr_read_inval,		/* /proc/<pid>/contracts		*/
627	pr_read_inval,		/* /proc/<pid>/contracts/<ctid>		*/
628	pr_read_pidfile,	/* old process file			*/
629	pr_read_pidfile,	/* old lwp file				*/
630	pr_read_opagedata,	/* old pagedata file			*/
631};
632
633/* ARGSUSED */
634static int
635pr_read_inval(prnode_t *pnp, uio_t *uiop)
636{
637	/*
638	 * No read() on any /proc directory, use getdents(2) instead.
639	 * Cannot read a control file either.
640	 * An underlying mapped object file cannot get here.
641	 */
642	return (EINVAL);
643}
644
645static int
646pr_uioread(void *base, long count, uio_t *uiop)
647{
648	int error = 0;
649
650	ASSERT(count >= 0);
651	count -= uiop->uio_offset;
652	if (count > 0 && uiop->uio_offset >= 0) {
653		error = uiomove((char *)base + uiop->uio_offset,
654		    count, UIO_READ, uiop);
655	}
656
657	return (error);
658}
659
660static int
661pr_read_as(prnode_t *pnp, uio_t *uiop)
662{
663	int error;
664
665	ASSERT(pnp->pr_type == PR_AS);
666
667	if ((error = prlock(pnp, ZNO)) == 0) {
668		proc_t *p = pnp->pr_common->prc_proc;
669		struct as *as = p->p_as;
670
671		/*
672		 * /proc I/O cannot be done to a system process.
673		 * A 32-bit process cannot read a 64-bit process.
674		 */
675		if ((p->p_flag & SSYS) || as == &kas) {
676			error = 0;
677#ifdef _SYSCALL32_IMPL
678		} else if (curproc->p_model == DATAMODEL_ILP32 &&
679		    PROCESS_NOT_32BIT(p)) {
680			error = EOVERFLOW;
681#endif
682		} else {
683			/*
684			 * We don't hold p_lock over an i/o operation because
685			 * that could lead to deadlock with the clock thread.
686			 */
687			mutex_exit(&p->p_lock);
688			error = prusrio(p, UIO_READ, uiop, 0);
689			mutex_enter(&p->p_lock);
690		}
691		prunlock(pnp);
692	}
693
694	return (error);
695}
696
697static int
698pr_read_status(prnode_t *pnp, uio_t *uiop)
699{
700	pstatus_t *sp;
701	int error;
702
703	ASSERT(pnp->pr_type == PR_STATUS);
704
705	/*
706	 * We kmem_alloc() the pstatus structure because
707	 * it is so big it might blow the kernel stack.
708	 */
709	sp = kmem_alloc(sizeof (*sp), KM_SLEEP);
710	if ((error = prlock(pnp, ZNO)) == 0) {
711		prgetstatus(pnp->pr_common->prc_proc, sp, VTOZONE(PTOV(pnp)));
712		prunlock(pnp);
713		error = pr_uioread(sp, sizeof (*sp), uiop);
714	}
715	kmem_free(sp, sizeof (*sp));
716	return (error);
717}
718
719static int
720pr_read_lstatus(prnode_t *pnp, uio_t *uiop)
721{
722	proc_t *p;
723	kthread_t *t;
724	lwpdir_t *ldp;
725	size_t size;
726	prheader_t *php;
727	lwpstatus_t *sp;
728	int error;
729	int nlwp;
730	int i;
731
732	ASSERT(pnp->pr_type == PR_LSTATUS);
733
734	if ((error = prlock(pnp, ZNO)) != 0)
735		return (error);
736	p = pnp->pr_common->prc_proc;
737	nlwp = p->p_lwpcnt;
738	size = sizeof (prheader_t) + nlwp * LSPAN(lwpstatus_t);
739
740	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
741	mutex_exit(&p->p_lock);
742	php = kmem_zalloc(size, KM_SLEEP);
743	mutex_enter(&p->p_lock);
744	/* p->p_lwpcnt can't change while process is locked */
745	ASSERT(nlwp == p->p_lwpcnt);
746
747	php->pr_nent = nlwp;
748	php->pr_entsize = LSPAN(lwpstatus_t);
749
750	sp = (lwpstatus_t *)(php + 1);
751	for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
752		if (ldp->ld_entry == NULL ||
753		    (t = ldp->ld_entry->le_thread) == NULL)
754			continue;
755		prgetlwpstatus(t, sp, VTOZONE(PTOV(pnp)));
756		sp = (lwpstatus_t *)((caddr_t)sp + LSPAN(lwpstatus_t));
757	}
758	prunlock(pnp);
759
760	error = pr_uioread(php, size, uiop);
761	kmem_free(php, size);
762	return (error);
763}
764
765static int
766pr_read_psinfo(prnode_t *pnp, uio_t *uiop)
767{
768	psinfo_t psinfo;
769	proc_t *p;
770	int error = 0;
771
772	ASSERT(pnp->pr_type == PR_PSINFO);
773
774	/*
775	 * We don't want the full treatment of prlock(pnp) here.
776	 * This file is world-readable and never goes invalid.
777	 * It doesn't matter if we are in the middle of an exec().
778	 */
779	p = pr_p_lock(pnp);
780	mutex_exit(&pr_pidlock);
781	if (p == NULL)
782		error = ENOENT;
783	else {
784		ASSERT(p == pnp->pr_common->prc_proc);
785		prgetpsinfo(p, &psinfo);
786		prunlock(pnp);
787		error = pr_uioread(&psinfo, sizeof (psinfo), uiop);
788	}
789	return (error);
790}
791
792static int
793pr_read_lpsinfo(prnode_t *pnp, uio_t *uiop)
794{
795	proc_t *p;
796	kthread_t *t;
797	lwpdir_t *ldp;
798	lwpent_t *lep;
799	size_t size;
800	prheader_t *php;
801	lwpsinfo_t *sp;
802	int error;
803	int nlwp;
804	int i;
805
806	ASSERT(pnp->pr_type == PR_LPSINFO);
807
808	/*
809	 * We don't want the full treatment of prlock(pnp) here.
810	 * This file is world-readable and never goes invalid.
811	 * It doesn't matter if we are in the middle of an exec().
812	 */
813	p = pr_p_lock(pnp);
814	mutex_exit(&pr_pidlock);
815	if (p == NULL)
816		return (ENOENT);
817	ASSERT(p == pnp->pr_common->prc_proc);
818	if ((nlwp = p->p_lwpcnt + p->p_zombcnt) == 0) {
819		prunlock(pnp);
820		return (ENOENT);
821	}
822	size = sizeof (prheader_t) + nlwp * LSPAN(lwpsinfo_t);
823
824	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
825	mutex_exit(&p->p_lock);
826	php = kmem_zalloc(size, KM_SLEEP);
827	mutex_enter(&p->p_lock);
828	/* p->p_lwpcnt can't change while process is locked */
829	ASSERT(nlwp == p->p_lwpcnt + p->p_zombcnt);
830
831	php->pr_nent = nlwp;
832	php->pr_entsize = LSPAN(lwpsinfo_t);
833
834	sp = (lwpsinfo_t *)(php + 1);
835	for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
836		if ((lep = ldp->ld_entry) == NULL)
837			continue;
838		if ((t = lep->le_thread) != NULL)
839			prgetlwpsinfo(t, sp);
840		else {
841			bzero(sp, sizeof (*sp));
842			sp->pr_lwpid = lep->le_lwpid;
843			sp->pr_state = SZOMB;
844			sp->pr_sname = 'Z';
845			sp->pr_start.tv_sec = lep->le_start;
846			sp->pr_bindpro = PBIND_NONE;
847			sp->pr_bindpset = PS_NONE;
848		}
849		sp = (lwpsinfo_t *)((caddr_t)sp + LSPAN(lwpsinfo_t));
850	}
851	prunlock(pnp);
852
853	error = pr_uioread(php, size, uiop);
854	kmem_free(php, size);
855	return (error);
856}
857
858static int
859pr_read_map_common(prnode_t *pnp, uio_t *uiop, prnodetype_t type)
860{
861	proc_t *p;
862	struct as *as;
863	list_t iolhead;
864	int error;
865
866readmap_common:
867	if ((error = prlock(pnp, ZNO)) != 0)
868		return (error);
869
870	p = pnp->pr_common->prc_proc;
871	as = p->p_as;
872
873	if ((p->p_flag & SSYS) || as == &kas) {
874		prunlock(pnp);
875		return (0);
876	}
877
878	if (!AS_LOCK_TRYENTER(as, &as->a_lock, RW_WRITER)) {
879		prunlock(pnp);
880		delay(1);
881		goto readmap_common;
882	}
883	mutex_exit(&p->p_lock);
884
885	switch (type) {
886	case PR_XMAP:
887		error = prgetxmap(p, &iolhead);
888		break;
889	case PR_RMAP:
890		error = prgetmap(p, 1, &iolhead);
891		break;
892	case PR_MAP:
893		error = prgetmap(p, 0, &iolhead);
894		break;
895	}
896
897	AS_LOCK_EXIT(as, &as->a_lock);
898	mutex_enter(&p->p_lock);
899	prunlock(pnp);
900
901	error = pr_iol_uiomove_and_free(&iolhead, uiop, error);
902
903	return (error);
904}
905
906static int
907pr_read_map(prnode_t *pnp, uio_t *uiop)
908{
909	ASSERT(pnp->pr_type == PR_MAP);
910	return (pr_read_map_common(pnp, uiop, pnp->pr_type));
911}
912
913static int
914pr_read_rmap(prnode_t *pnp, uio_t *uiop)
915{
916	ASSERT(pnp->pr_type == PR_RMAP);
917	return (pr_read_map_common(pnp, uiop, pnp->pr_type));
918}
919
920static int
921pr_read_xmap(prnode_t *pnp, uio_t *uiop)
922{
923	ASSERT(pnp->pr_type == PR_XMAP);
924	return (pr_read_map_common(pnp, uiop, pnp->pr_type));
925}
926
927static int
928pr_read_cred(prnode_t *pnp, uio_t *uiop)
929{
930	proc_t *p;
931	prcred_t *pcrp;
932	int error;
933	size_t count;
934
935	ASSERT(pnp->pr_type == PR_CRED);
936
937	/*
938	 * We kmem_alloc() the prcred_t structure because
939	 * the number of supplementary groups is variable.
940	 */
941	pcrp =
942	    kmem_alloc(sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1),
943	    KM_SLEEP);
944
945	if ((error = prlock(pnp, ZNO)) != 0)
946		goto out;
947	p = pnp->pr_common->prc_proc;
948	ASSERT(p != NULL);
949
950	prgetcred(p, pcrp);
951	prunlock(pnp);
952
953	count = sizeof (prcred_t);
954	if (pcrp->pr_ngroups > 1)
955		count += sizeof (gid_t) * (pcrp->pr_ngroups - 1);
956	error = pr_uioread(pcrp, count, uiop);
957out:
958	kmem_free(pcrp, sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1));
959	return (error);
960}
961
962static int
963pr_read_priv(prnode_t *pnp, uio_t *uiop)
964{
965	proc_t *p;
966	size_t psize = prgetprivsize();
967	prpriv_t *ppriv = kmem_alloc(psize, KM_SLEEP);
968	int error;
969
970	ASSERT(pnp->pr_type == PR_PRIV);
971
972	if ((error = prlock(pnp, ZNO)) != 0)
973		goto out;
974	p = pnp->pr_common->prc_proc;
975	ASSERT(p != NULL);
976
977	prgetpriv(p, ppriv);
978	prunlock(pnp);
979
980	error = pr_uioread(ppriv, psize, uiop);
981out:
982	kmem_free(ppriv, psize);
983	return (error);
984}
985
986static int
987pr_read_sigact(prnode_t *pnp, uio_t *uiop)
988{
989	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
990	proc_t *p;
991	struct sigaction *sap;
992	int sig;
993	int error;
994	user_t *up;
995
996	ASSERT(pnp->pr_type == PR_SIGACT);
997
998	/*
999	 * We kmem_alloc() the sigaction array because
1000	 * it is so big it might blow the kernel stack.
1001	 */
1002	sap = kmem_alloc((nsig-1) * sizeof (struct sigaction), KM_SLEEP);
1003
1004	if ((error = prlock(pnp, ZNO)) != 0)
1005		goto out;
1006	p = pnp->pr_common->prc_proc;
1007	ASSERT(p != NULL);
1008
1009	if (uiop->uio_offset >= (nsig-1)*sizeof (struct sigaction)) {
1010		prunlock(pnp);
1011		goto out;
1012	}
1013
1014	up = PTOU(p);
1015	for (sig = 1; sig < nsig; sig++)
1016		prgetaction(p, up, sig, &sap[sig-1]);
1017	prunlock(pnp);
1018
1019	error = pr_uioread(sap, (nsig - 1) * sizeof (struct sigaction), uiop);
1020out:
1021	kmem_free(sap, (nsig-1) * sizeof (struct sigaction));
1022	return (error);
1023}
1024
1025static int
1026pr_read_auxv(prnode_t *pnp, uio_t *uiop)
1027{
1028	auxv_t auxv[__KERN_NAUXV_IMPL];
1029	proc_t *p;
1030	user_t *up;
1031	int error;
1032
1033	ASSERT(pnp->pr_type == PR_AUXV);
1034
1035	if ((error = prlock(pnp, ZNO)) != 0)
1036		return (error);
1037
1038	if (uiop->uio_offset >= sizeof (auxv)) {
1039		prunlock(pnp);
1040		return (0);
1041	}
1042
1043	p = pnp->pr_common->prc_proc;
1044	up = PTOU(p);
1045	bcopy(up->u_auxv, auxv, sizeof (auxv));
1046	prunlock(pnp);
1047
1048	return (pr_uioread(auxv, sizeof (auxv), uiop));
1049}
1050
1051#if defined(__x86)
1052/*
1053 * XX64
1054 *	This is almost certainly broken for the amd64 kernel, because
1055 *	we have two kinds of LDT structures to export -- one for compatibility
1056 *	mode, and one for long mode, sigh.
1057 *
1058 * 	For now lets just have a ldt of size 0 for 64-bit processes.
1059 */
1060static int
1061pr_read_ldt(prnode_t *pnp, uio_t *uiop)
1062{
1063	proc_t *p;
1064	struct ssd *ssd;
1065	size_t size;
1066	int error;
1067
1068	ASSERT(pnp->pr_type == PR_LDT);
1069
1070	if ((error = prlock(pnp, ZNO)) != 0)
1071		return (error);
1072	p = pnp->pr_common->prc_proc;
1073
1074	mutex_exit(&p->p_lock);
1075	mutex_enter(&p->p_ldtlock);
1076	size = prnldt(p) * sizeof (struct ssd);
1077	if (uiop->uio_offset >= size) {
1078		mutex_exit(&p->p_ldtlock);
1079		mutex_enter(&p->p_lock);
1080		prunlock(pnp);
1081		return (0);
1082	}
1083
1084	ssd = kmem_alloc(size, KM_SLEEP);
1085	prgetldt(p, ssd);
1086	mutex_exit(&p->p_ldtlock);
1087	mutex_enter(&p->p_lock);
1088	prunlock(pnp);
1089
1090	error = pr_uioread(ssd, size, uiop);
1091	kmem_free(ssd, size);
1092	return (error);
1093}
1094#endif	/* __x86 */
1095
1096static int
1097pr_read_usage(prnode_t *pnp, uio_t *uiop)
1098{
1099	prhusage_t *pup;
1100	prusage_t *upup;
1101	proc_t *p;
1102	kthread_t *t;
1103	int error;
1104
1105	ASSERT(pnp->pr_type == PR_USAGE);
1106
1107	/* allocate now, before locking the process */
1108	pup = kmem_zalloc(sizeof (*pup), KM_SLEEP);
1109	upup = kmem_alloc(sizeof (*upup), KM_SLEEP);
1110
1111	/*
1112	 * We don't want the full treatment of prlock(pnp) here.
1113	 * This file is world-readable and never goes invalid.
1114	 * It doesn't matter if we are in the middle of an exec().
1115	 */
1116	p = pr_p_lock(pnp);
1117	mutex_exit(&pr_pidlock);
1118	if (p == NULL) {
1119		error = ENOENT;
1120		goto out;
1121	}
1122	ASSERT(p == pnp->pr_common->prc_proc);
1123
1124	if (uiop->uio_offset >= sizeof (prusage_t)) {
1125		prunlock(pnp);
1126		error = 0;
1127		goto out;
1128	}
1129
1130	pup->pr_tstamp = gethrtime();
1131
1132	pup->pr_count  = p->p_defunct;
1133	pup->pr_create = p->p_mstart;
1134	pup->pr_term   = p->p_mterm;
1135
1136	pup->pr_rtime    = p->p_mlreal;
1137	pup->pr_utime    = p->p_acct[LMS_USER];
1138	pup->pr_stime    = p->p_acct[LMS_SYSTEM];
1139	pup->pr_ttime    = p->p_acct[LMS_TRAP];
1140	pup->pr_tftime   = p->p_acct[LMS_TFAULT];
1141	pup->pr_dftime   = p->p_acct[LMS_DFAULT];
1142	pup->pr_kftime   = p->p_acct[LMS_KFAULT];
1143	pup->pr_ltime    = p->p_acct[LMS_USER_LOCK];
1144	pup->pr_slptime  = p->p_acct[LMS_SLEEP];
1145	pup->pr_wtime    = p->p_acct[LMS_WAIT_CPU];
1146	pup->pr_stoptime = p->p_acct[LMS_STOPPED];
1147
1148	pup->pr_minf  = p->p_ru.minflt;
1149	pup->pr_majf  = p->p_ru.majflt;
1150	pup->pr_nswap = p->p_ru.nswap;
1151	pup->pr_inblk = p->p_ru.inblock;
1152	pup->pr_oublk = p->p_ru.oublock;
1153	pup->pr_msnd  = p->p_ru.msgsnd;
1154	pup->pr_mrcv  = p->p_ru.msgrcv;
1155	pup->pr_sigs  = p->p_ru.nsignals;
1156	pup->pr_vctx  = p->p_ru.nvcsw;
1157	pup->pr_ictx  = p->p_ru.nivcsw;
1158	pup->pr_sysc  = p->p_ru.sysc;
1159	pup->pr_ioch  = p->p_ru.ioch;
1160
1161	/*
1162	 * Add the usage information for each active lwp.
1163	 */
1164	if ((t = p->p_tlist) != NULL &&
1165	    !(pnp->pr_pcommon->prc_flags & PRC_DESTROY)) {
1166		do {
1167			if (t->t_proc_flag & TP_LWPEXIT)
1168				continue;
1169			pup->pr_count++;
1170			praddusage(t, pup);
1171		} while ((t = t->t_forw) != p->p_tlist);
1172	}
1173
1174	prunlock(pnp);
1175
1176	prcvtusage(pup, upup);
1177
1178	error = pr_uioread(upup, sizeof (prusage_t), uiop);
1179out:
1180	kmem_free(pup, sizeof (*pup));
1181	kmem_free(upup, sizeof (*upup));
1182	return (error);
1183}
1184
1185static int
1186pr_read_lusage(prnode_t *pnp, uio_t *uiop)
1187{
1188	int nlwp;
1189	prhusage_t *pup;
1190	prheader_t *php;
1191	prusage_t *upup;
1192	size_t size;
1193	hrtime_t curtime;
1194	proc_t *p;
1195	kthread_t *t;
1196	lwpdir_t *ldp;
1197	int error;
1198	int i;
1199
1200	ASSERT(pnp->pr_type == PR_LUSAGE);
1201
1202	/*
1203	 * We don't want the full treatment of prlock(pnp) here.
1204	 * This file is world-readable and never goes invalid.
1205	 * It doesn't matter if we are in the middle of an exec().
1206	 */
1207	p = pr_p_lock(pnp);
1208	mutex_exit(&pr_pidlock);
1209	if (p == NULL)
1210		return (ENOENT);
1211	ASSERT(p == pnp->pr_common->prc_proc);
1212	if ((nlwp = p->p_lwpcnt) == 0) {
1213		prunlock(pnp);
1214		return (ENOENT);
1215	}
1216
1217	size = sizeof (prheader_t) + (nlwp + 1) * LSPAN(prusage_t);
1218	if (uiop->uio_offset >= size) {
1219		prunlock(pnp);
1220		return (0);
1221	}
1222
1223	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
1224	mutex_exit(&p->p_lock);
1225	pup = kmem_zalloc(size + sizeof (prhusage_t), KM_SLEEP);
1226	mutex_enter(&p->p_lock);
1227	/* p->p_lwpcnt can't change while process is locked */
1228	ASSERT(nlwp == p->p_lwpcnt);
1229
1230	php = (prheader_t *)(pup + 1);
1231	upup = (prusage_t *)(php + 1);
1232
1233	php->pr_nent = nlwp + 1;
1234	php->pr_entsize = LSPAN(prusage_t);
1235
1236	curtime = gethrtime();
1237
1238	/*
1239	 * First the summation over defunct lwps.
1240	 */
1241	pup->pr_count  = p->p_defunct;
1242	pup->pr_tstamp = curtime;
1243	pup->pr_create = p->p_mstart;
1244	pup->pr_term   = p->p_mterm;
1245
1246	pup->pr_rtime    = p->p_mlreal;
1247	pup->pr_utime    = p->p_acct[LMS_USER];
1248	pup->pr_stime    = p->p_acct[LMS_SYSTEM];
1249	pup->pr_ttime    = p->p_acct[LMS_TRAP];
1250	pup->pr_tftime   = p->p_acct[LMS_TFAULT];
1251	pup->pr_dftime   = p->p_acct[LMS_DFAULT];
1252	pup->pr_kftime   = p->p_acct[LMS_KFAULT];
1253	pup->pr_ltime    = p->p_acct[LMS_USER_LOCK];
1254	pup->pr_slptime  = p->p_acct[LMS_SLEEP];
1255	pup->pr_wtime    = p->p_acct[LMS_WAIT_CPU];
1256	pup->pr_stoptime = p->p_acct[LMS_STOPPED];
1257
1258	pup->pr_minf  = p->p_ru.minflt;
1259	pup->pr_majf  = p->p_ru.majflt;
1260	pup->pr_nswap = p->p_ru.nswap;
1261	pup->pr_inblk = p->p_ru.inblock;
1262	pup->pr_oublk = p->p_ru.oublock;
1263	pup->pr_msnd  = p->p_ru.msgsnd;
1264	pup->pr_mrcv  = p->p_ru.msgrcv;
1265	pup->pr_sigs  = p->p_ru.nsignals;
1266	pup->pr_vctx  = p->p_ru.nvcsw;
1267	pup->pr_ictx  = p->p_ru.nivcsw;
1268	pup->pr_sysc  = p->p_ru.sysc;
1269	pup->pr_ioch  = p->p_ru.ioch;
1270
1271	prcvtusage(pup, upup);
1272
1273	/*
1274	 * Fill one prusage struct for each active lwp.
1275	 */
1276	for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
1277		if (ldp->ld_entry == NULL ||
1278		    (t = ldp->ld_entry->le_thread) == NULL)
1279			continue;
1280		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1281		ASSERT(nlwp > 0);
1282		--nlwp;
1283		upup = (prusage_t *)((caddr_t)upup + LSPAN(prusage_t));
1284		prgetusage(t, pup);
1285		prcvtusage(pup, upup);
1286	}
1287	ASSERT(nlwp == 0);
1288
1289	prunlock(pnp);
1290
1291	error = pr_uioread(php, size, uiop);
1292	kmem_free(pup, size + sizeof (prhusage_t));
1293	return (error);
1294}
1295
1296static int
1297pr_read_pagedata(prnode_t *pnp, uio_t *uiop)
1298{
1299	proc_t *p;
1300	int error;
1301
1302	ASSERT(pnp->pr_type == PR_PAGEDATA);
1303
1304	if ((error = prlock(pnp, ZNO)) != 0)
1305		return (error);
1306
1307	p = pnp->pr_common->prc_proc;
1308	if ((p->p_flag & SSYS) || p->p_as == &kas) {
1309		prunlock(pnp);
1310		return (0);
1311	}
1312
1313	mutex_exit(&p->p_lock);
1314	error = prpdread(p, pnp->pr_hatid, uiop);
1315	mutex_enter(&p->p_lock);
1316
1317	prunlock(pnp);
1318	return (error);
1319}
1320
1321static int
1322pr_read_opagedata(prnode_t *pnp, uio_t *uiop)
1323{
1324	proc_t *p;
1325	struct as *as;
1326	int error;
1327
1328	ASSERT(pnp->pr_type == PR_OPAGEDATA);
1329
1330	if ((error = prlock(pnp, ZNO)) != 0)
1331		return (error);
1332
1333	p = pnp->pr_common->prc_proc;
1334	as = p->p_as;
1335	if ((p->p_flag & SSYS) || as == &kas) {
1336		prunlock(pnp);
1337		return (0);
1338	}
1339
1340	mutex_exit(&p->p_lock);
1341	error = oprpdread(as, pnp->pr_hatid, uiop);
1342	mutex_enter(&p->p_lock);
1343
1344	prunlock(pnp);
1345	return (error);
1346}
1347
1348static int
1349pr_read_watch(prnode_t *pnp, uio_t *uiop)
1350{
1351	proc_t *p;
1352	int error;
1353	prwatch_t *Bpwp;
1354	size_t size;
1355	prwatch_t *pwp;
1356	int nwarea;
1357	struct watched_area *pwarea;
1358
1359	ASSERT(pnp->pr_type == PR_WATCH);
1360
1361	if ((error = prlock(pnp, ZNO)) != 0)
1362		return (error);
1363
1364	p = pnp->pr_common->prc_proc;
1365	nwarea = avl_numnodes(&p->p_warea);
1366	size = nwarea * sizeof (prwatch_t);
1367	if (uiop->uio_offset >= size) {
1368		prunlock(pnp);
1369		return (0);
1370	}
1371
1372	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
1373	mutex_exit(&p->p_lock);
1374	Bpwp = pwp = kmem_zalloc(size, KM_SLEEP);
1375	mutex_enter(&p->p_lock);
1376	/* p->p_nwarea can't change while process is locked */
1377	ASSERT(nwarea == avl_numnodes(&p->p_warea));
1378
1379	/* gather the watched areas */
1380	for (pwarea = avl_first(&p->p_warea); pwarea != NULL;
1381	    pwarea = AVL_NEXT(&p->p_warea, pwarea), pwp++) {
1382		pwp->pr_vaddr = (uintptr_t)pwarea->wa_vaddr;
1383		pwp->pr_size = pwarea->wa_eaddr - pwarea->wa_vaddr;
1384		pwp->pr_wflags = (int)pwarea->wa_flags;
1385	}
1386
1387	prunlock(pnp);
1388
1389	error = pr_uioread(Bpwp, size, uiop);
1390	kmem_free(Bpwp, size);
1391	return (error);
1392}
1393
1394static int
1395pr_read_lwpstatus(prnode_t *pnp, uio_t *uiop)
1396{
1397	lwpstatus_t *sp;
1398	int error;
1399
1400	ASSERT(pnp->pr_type == PR_LWPSTATUS);
1401
1402	/*
1403	 * We kmem_alloc() the lwpstatus structure because
1404	 * it is so big it might blow the kernel stack.
1405	 */
1406	sp = kmem_alloc(sizeof (*sp), KM_SLEEP);
1407
1408	if ((error = prlock(pnp, ZNO)) != 0)
1409		goto out;
1410
1411	if (uiop->uio_offset >= sizeof (*sp)) {
1412		prunlock(pnp);
1413		goto out;
1414	}
1415
1416	prgetlwpstatus(pnp->pr_common->prc_thread, sp, VTOZONE(PTOV(pnp)));
1417	prunlock(pnp);
1418
1419	error = pr_uioread(sp, sizeof (*sp), uiop);
1420out:
1421	kmem_free(sp, sizeof (*sp));
1422	return (error);
1423}
1424
1425static int
1426pr_read_lwpsinfo(prnode_t *pnp, uio_t *uiop)
1427{
1428	lwpsinfo_t lwpsinfo;
1429	proc_t *p;
1430	kthread_t *t;
1431	lwpent_t *lep;
1432
1433	ASSERT(pnp->pr_type == PR_LWPSINFO);
1434
1435	/*
1436	 * We don't want the full treatment of prlock(pnp) here.
1437	 * This file is world-readable and never goes invalid.
1438	 * It doesn't matter if we are in the middle of an exec().
1439	 */
1440	p = pr_p_lock(pnp);
1441	mutex_exit(&pr_pidlock);
1442	if (p == NULL)
1443		return (ENOENT);
1444	ASSERT(p == pnp->pr_common->prc_proc);
1445	if (pnp->pr_common->prc_tslot == -1) {
1446		prunlock(pnp);
1447		return (ENOENT);
1448	}
1449
1450	if (uiop->uio_offset >= sizeof (lwpsinfo)) {
1451		prunlock(pnp);
1452		return (0);
1453	}
1454
1455	if ((t = pnp->pr_common->prc_thread) != NULL)
1456		prgetlwpsinfo(t, &lwpsinfo);
1457	else {
1458		lep = p->p_lwpdir[pnp->pr_common->prc_tslot].ld_entry;
1459		bzero(&lwpsinfo, sizeof (lwpsinfo));
1460		lwpsinfo.pr_lwpid = lep->le_lwpid;
1461		lwpsinfo.pr_state = SZOMB;
1462		lwpsinfo.pr_sname = 'Z';
1463		lwpsinfo.pr_start.tv_sec = lep->le_start;
1464		lwpsinfo.pr_bindpro = PBIND_NONE;
1465		lwpsinfo.pr_bindpset = PS_NONE;
1466	}
1467	prunlock(pnp);
1468
1469	return (pr_uioread(&lwpsinfo, sizeof (lwpsinfo), uiop));
1470}
1471
1472static int
1473pr_read_lwpusage(prnode_t *pnp, uio_t *uiop)
1474{
1475	prhusage_t *pup;
1476	prusage_t *upup;
1477	proc_t *p;
1478	int error;
1479
1480	ASSERT(pnp->pr_type == PR_LWPUSAGE);
1481
1482	/* allocate now, before locking the process */
1483	pup = kmem_zalloc(sizeof (*pup), KM_SLEEP);
1484	upup = kmem_alloc(sizeof (*upup), KM_SLEEP);
1485
1486	/*
1487	 * We don't want the full treatment of prlock(pnp) here.
1488	 * This file is world-readable and never goes invalid.
1489	 * It doesn't matter if we are in the middle of an exec().
1490	 */
1491	p = pr_p_lock(pnp);
1492	mutex_exit(&pr_pidlock);
1493	if (p == NULL) {
1494		error = ENOENT;
1495		goto out;
1496	}
1497	ASSERT(p == pnp->pr_common->prc_proc);
1498	if (pnp->pr_common->prc_thread == NULL) {
1499		prunlock(pnp);
1500		error = ENOENT;
1501		goto out;
1502	}
1503	if (uiop->uio_offset >= sizeof (prusage_t)) {
1504		prunlock(pnp);
1505		error = 0;
1506		goto out;
1507	}
1508
1509	pup->pr_tstamp = gethrtime();
1510	prgetusage(pnp->pr_common->prc_thread, pup);
1511
1512	prunlock(pnp);
1513
1514	prcvtusage(pup, upup);
1515
1516	error = pr_uioread(upup, sizeof (prusage_t), uiop);
1517out:
1518	kmem_free(pup, sizeof (*pup));
1519	kmem_free(upup, sizeof (*upup));
1520	return (error);
1521}
1522
1523/* ARGSUSED */
1524static int
1525pr_read_xregs(prnode_t *pnp, uio_t *uiop)
1526{
1527#if defined(__sparc)
1528	proc_t *p;
1529	kthread_t *t;
1530	int error;
1531	char *xreg;
1532	size_t size;
1533
1534	ASSERT(pnp->pr_type == PR_XREGS);
1535
1536	xreg = kmem_zalloc(sizeof (prxregset_t), KM_SLEEP);
1537
1538	if ((error = prlock(pnp, ZNO)) != 0)
1539		goto out;
1540
1541	p = pnp->pr_common->prc_proc;
1542	t = pnp->pr_common->prc_thread;
1543
1544	size = prhasx(p)? prgetprxregsize(p) : 0;
1545	if (uiop->uio_offset >= size) {
1546		prunlock(pnp);
1547		goto out;
1548	}
1549
1550	/* drop p->p_lock while (possibly) touching the stack */
1551	mutex_exit(&p->p_lock);
1552	prgetprxregs(ttolwp(t), xreg);
1553	mutex_enter(&p->p_lock);
1554	prunlock(pnp);
1555
1556	error = pr_uioread(xreg, size, uiop);
1557out:
1558	kmem_free(xreg, sizeof (prxregset_t));
1559	return (error);
1560#else
1561	return (0);
1562#endif
1563}
1564
1565#if defined(__sparc)
1566
1567static int
1568pr_read_gwindows(prnode_t *pnp, uio_t *uiop)
1569{
1570	proc_t *p;
1571	kthread_t *t;
1572	gwindows_t *gwp;
1573	int error;
1574	size_t size;
1575
1576	ASSERT(pnp->pr_type == PR_GWINDOWS);
1577
1578	gwp = kmem_zalloc(sizeof (gwindows_t), KM_SLEEP);
1579
1580	if ((error = prlock(pnp, ZNO)) != 0)
1581		goto out;
1582
1583	p = pnp->pr_common->prc_proc;
1584	t = pnp->pr_common->prc_thread;
1585
1586	/*
1587	 * Drop p->p_lock while touching the stack.
1588	 * The P_PR_LOCK flag prevents the lwp from
1589	 * disappearing while we do this.
1590	 */
1591	mutex_exit(&p->p_lock);
1592	if ((size = prnwindows(ttolwp(t))) != 0)
1593		size = sizeof (gwindows_t) -
1594		    (SPARC_MAXREGWINDOW - size) * sizeof (struct rwindow);
1595	if (uiop->uio_offset >= size) {
1596		mutex_enter(&p->p_lock);
1597		prunlock(pnp);
1598		goto out;
1599	}
1600	prgetwindows(ttolwp(t), gwp);
1601	mutex_enter(&p->p_lock);
1602	prunlock(pnp);
1603
1604	error = pr_uioread(gwp, size, uiop);
1605out:
1606	kmem_free(gwp, sizeof (gwindows_t));
1607	return (error);
1608}
1609
1610/* ARGSUSED */
1611static int
1612pr_read_asrs(prnode_t *pnp, uio_t *uiop)
1613{
1614	int error;
1615
1616	ASSERT(pnp->pr_type == PR_ASRS);
1617
1618	/* the asrs file exists only for sparc v9 _LP64 processes */
1619	if ((error = prlock(pnp, ZNO)) == 0) {
1620		proc_t *p = pnp->pr_common->prc_proc;
1621		kthread_t *t = pnp->pr_common->prc_thread;
1622		asrset_t asrset;
1623
1624		if (p->p_model != DATAMODEL_LP64 ||
1625		    uiop->uio_offset >= sizeof (asrset_t)) {
1626			prunlock(pnp);
1627			return (0);
1628		}
1629
1630		/*
1631		 * Drop p->p_lock while touching the stack.
1632		 * The P_PR_LOCK flag prevents the lwp from
1633		 * disappearing while we do this.
1634		 */
1635		mutex_exit(&p->p_lock);
1636		prgetasregs(ttolwp(t), asrset);
1637		mutex_enter(&p->p_lock);
1638		prunlock(pnp);
1639
1640		error = pr_uioread(&asrset[0], sizeof (asrset_t), uiop);
1641	}
1642
1643	return (error);
1644}
1645
1646#endif	/* __sparc */
1647
1648static int
1649pr_read_piddir(prnode_t *pnp, uio_t *uiop)
1650{
1651	ASSERT(pnp->pr_type == PR_PIDDIR);
1652	ASSERT(pnp->pr_pidfile != NULL);
1653
1654	/* use the underlying PR_PIDFILE to read the process */
1655	pnp = VTOP(pnp->pr_pidfile);
1656	ASSERT(pnp->pr_type == PR_PIDFILE);
1657
1658	return (pr_read_pidfile(pnp, uiop));
1659}
1660
1661static int
1662pr_read_pidfile(prnode_t *pnp, uio_t *uiop)
1663{
1664	int error;
1665
1666	ASSERT(pnp->pr_type == PR_PIDFILE || pnp->pr_type == PR_LWPIDFILE);
1667
1668	if ((error = prlock(pnp, ZNO)) == 0) {
1669		proc_t *p = pnp->pr_common->prc_proc;
1670		struct as *as = p->p_as;
1671
1672		if ((p->p_flag & SSYS) || as == &kas) {
1673			/*
1674			 * /proc I/O cannot be done to a system process.
1675			 */
1676			error = EIO;	/* old /proc semantics */
1677		} else {
1678			/*
1679			 * We drop p_lock because we don't want to hold
1680			 * it over an I/O operation because that could
1681			 * lead to deadlock with the clock thread.
1682			 * The process will not disappear and its address
1683			 * space will not change because it is marked P_PR_LOCK.
1684			 */
1685			mutex_exit(&p->p_lock);
1686			error = prusrio(p, UIO_READ, uiop, 1);
1687			mutex_enter(&p->p_lock);
1688		}
1689		prunlock(pnp);
1690	}
1691
1692	return (error);
1693}
1694
1695#ifdef _SYSCALL32_IMPL
1696
1697/*
1698 * Array of ILP32 read functions, indexed by /proc file type.
1699 */
1700static int pr_read_status_32(),
1701	pr_read_lstatus_32(), pr_read_psinfo_32(), pr_read_lpsinfo_32(),
1702	pr_read_map_32(), pr_read_rmap_32(), pr_read_xmap_32(),
1703	pr_read_sigact_32(), pr_read_auxv_32(),
1704	pr_read_usage_32(), pr_read_lusage_32(), pr_read_pagedata_32(),
1705	pr_read_watch_32(), pr_read_lwpstatus_32(), pr_read_lwpsinfo_32(),
1706	pr_read_lwpusage_32(),
1707#if defined(__sparc)
1708	pr_read_gwindows_32(),
1709#endif
1710	pr_read_opagedata_32();
1711
1712static int (*pr_read_function_32[PR_NFILES])() = {
1713	pr_read_inval,		/* /proc				*/
1714	pr_read_inval,		/* /proc/self				*/
1715	pr_read_piddir,		/* /proc/<pid> (old /proc read())	*/
1716	pr_read_as,		/* /proc/<pid>/as			*/
1717	pr_read_inval,		/* /proc/<pid>/ctl			*/
1718	pr_read_status_32,	/* /proc/<pid>/status			*/
1719	pr_read_lstatus_32,	/* /proc/<pid>/lstatus			*/
1720	pr_read_psinfo_32,	/* /proc/<pid>/psinfo			*/
1721	pr_read_lpsinfo_32,	/* /proc/<pid>/lpsinfo			*/
1722	pr_read_map_32,		/* /proc/<pid>/map			*/
1723	pr_read_rmap_32,	/* /proc/<pid>/rmap			*/
1724	pr_read_xmap_32,	/* /proc/<pid>/xmap			*/
1725	pr_read_cred,		/* /proc/<pid>/cred			*/
1726	pr_read_sigact_32,	/* /proc/<pid>/sigact			*/
1727	pr_read_auxv_32,	/* /proc/<pid>/auxv			*/
1728#if defined(__x86)
1729	pr_read_ldt,		/* /proc/<pid>/ldt			*/
1730#endif
1731	pr_read_usage_32,	/* /proc/<pid>/usage			*/
1732	pr_read_lusage_32,	/* /proc/<pid>/lusage			*/
1733	pr_read_pagedata_32,	/* /proc/<pid>/pagedata			*/
1734	pr_read_watch_32,	/* /proc/<pid>/watch			*/
1735	pr_read_inval,		/* /proc/<pid>/cwd			*/
1736	pr_read_inval,		/* /proc/<pid>/root			*/
1737	pr_read_inval,		/* /proc/<pid>/fd			*/
1738	pr_read_inval,		/* /proc/<pid>/fd/nn			*/
1739	pr_read_inval,		/* /proc/<pid>/object			*/
1740	pr_read_inval,		/* /proc/<pid>/object/xxx		*/
1741	pr_read_inval,		/* /proc/<pid>/lwp			*/
1742	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>		*/
1743	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>/lwpctl	*/
1744	pr_read_lwpstatus_32,	/* /proc/<pid>/lwp/<lwpid>/lwpstatus	*/
1745	pr_read_lwpsinfo_32,	/* /proc/<pid>/lwp/<lwpid>/lwpsinfo	*/
1746	pr_read_lwpusage_32,	/* /proc/<pid>/lwp/<lwpid>/lwpusage	*/
1747	pr_read_xregs,		/* /proc/<pid>/lwp/<lwpid>/xregs	*/
1748	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>/templates	*/
1749	pr_read_inval,		/* /proc/<pid>/lwp/<lwpid>/templates/<id> */
1750#if defined(__sparc)
1751	pr_read_gwindows_32,	/* /proc/<pid>/lwp/<lwpid>/gwindows	*/
1752	pr_read_asrs,		/* /proc/<pid>/lwp/<lwpid>/asrs		*/
1753#endif
1754	pr_read_priv,		/* /proc/<pid>/priv			*/
1755	pr_read_inval,		/* /proc/<pid>/path			*/
1756	pr_read_inval,		/* /proc/<pid>/path/xxx			*/
1757	pr_read_inval,		/* /proc/<pid>/contracts		*/
1758	pr_read_inval,		/* /proc/<pid>/contracts/<ctid>		*/
1759	pr_read_pidfile,	/* old process file			*/
1760	pr_read_pidfile,	/* old lwp file				*/
1761	pr_read_opagedata_32,	/* old pagedata file			*/
1762};
1763
1764static int
1765pr_read_status_32(prnode_t *pnp, uio_t *uiop)
1766{
1767	pstatus32_t *sp;
1768	proc_t *p;
1769	int error;
1770
1771	ASSERT(pnp->pr_type == PR_STATUS);
1772
1773	/*
1774	 * We kmem_alloc() the pstatus structure because
1775	 * it is so big it might blow the kernel stack.
1776	 */
1777	sp = kmem_alloc(sizeof (*sp), KM_SLEEP);
1778	if ((error = prlock(pnp, ZNO)) == 0) {
1779		/*
1780		 * A 32-bit process cannot get the status of a 64-bit process.
1781		 * The fields for the 64-bit quantities are not large enough.
1782		 */
1783		p = pnp->pr_common->prc_proc;
1784		if (PROCESS_NOT_32BIT(p)) {
1785			prunlock(pnp);
1786			error = EOVERFLOW;
1787		} else {
1788			prgetstatus32(pnp->pr_common->prc_proc, sp,
1789			    VTOZONE(PTOV(pnp)));
1790			prunlock(pnp);
1791			error = pr_uioread(sp, sizeof (*sp), uiop);
1792		}
1793	}
1794	kmem_free((caddr_t)sp, sizeof (*sp));
1795	return (error);
1796}
1797
1798static int
1799pr_read_lstatus_32(prnode_t *pnp, uio_t *uiop)
1800{
1801	proc_t *p;
1802	kthread_t *t;
1803	lwpdir_t *ldp;
1804	size_t size;
1805	prheader32_t *php;
1806	lwpstatus32_t *sp;
1807	int error;
1808	int nlwp;
1809	int i;
1810
1811	ASSERT(pnp->pr_type == PR_LSTATUS);
1812
1813	if ((error = prlock(pnp, ZNO)) != 0)
1814		return (error);
1815	p = pnp->pr_common->prc_proc;
1816	/*
1817	 * A 32-bit process cannot get the status of a 64-bit process.
1818	 * The fields for the 64-bit quantities are not large enough.
1819	 */
1820	if (PROCESS_NOT_32BIT(p)) {
1821		prunlock(pnp);
1822		return (EOVERFLOW);
1823	}
1824	nlwp = p->p_lwpcnt;
1825	size = sizeof (prheader32_t) + nlwp * LSPAN32(lwpstatus32_t);
1826
1827	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
1828	mutex_exit(&p->p_lock);
1829	php = kmem_zalloc(size, KM_SLEEP);
1830	mutex_enter(&p->p_lock);
1831	/* p->p_lwpcnt can't change while process is locked */
1832	ASSERT(nlwp == p->p_lwpcnt);
1833
1834	php->pr_nent = nlwp;
1835	php->pr_entsize = LSPAN32(lwpstatus32_t);
1836
1837	sp = (lwpstatus32_t *)(php + 1);
1838	for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
1839		if (ldp->ld_entry == NULL ||
1840		    (t = ldp->ld_entry->le_thread) == NULL)
1841			continue;
1842		prgetlwpstatus32(t, sp, VTOZONE(PTOV(pnp)));
1843		sp = (lwpstatus32_t *)((caddr_t)sp + LSPAN32(lwpstatus32_t));
1844	}
1845	prunlock(pnp);
1846
1847	error = pr_uioread(php, size, uiop);
1848	kmem_free(php, size);
1849	return (error);
1850}
1851
1852static int
1853pr_read_psinfo_32(prnode_t *pnp, uio_t *uiop)
1854{
1855	psinfo32_t psinfo;
1856	proc_t *p;
1857	int error = 0;
1858
1859	ASSERT(pnp->pr_type == PR_PSINFO);
1860
1861	/*
1862	 * We don't want the full treatment of prlock(pnp) here.
1863	 * This file is world-readable and never goes invalid.
1864	 * It doesn't matter if we are in the middle of an exec().
1865	 */
1866	p = pr_p_lock(pnp);
1867	mutex_exit(&pr_pidlock);
1868	if (p == NULL)
1869		error = ENOENT;
1870	else {
1871		ASSERT(p == pnp->pr_common->prc_proc);
1872		prgetpsinfo32(p, &psinfo);
1873		prunlock(pnp);
1874		error = pr_uioread(&psinfo, sizeof (psinfo), uiop);
1875	}
1876	return (error);
1877}
1878
1879static int
1880pr_read_lpsinfo_32(prnode_t *pnp, uio_t *uiop)
1881{
1882	proc_t *p;
1883	kthread_t *t;
1884	lwpdir_t *ldp;
1885	lwpent_t *lep;
1886	size_t size;
1887	prheader32_t *php;
1888	lwpsinfo32_t *sp;
1889	int error;
1890	int nlwp;
1891	int i;
1892
1893	ASSERT(pnp->pr_type == PR_LPSINFO);
1894
1895	/*
1896	 * We don't want the full treatment of prlock(pnp) here.
1897	 * This file is world-readable and never goes invalid.
1898	 * It doesn't matter if we are in the middle of an exec().
1899	 */
1900	p = pr_p_lock(pnp);
1901	mutex_exit(&pr_pidlock);
1902	if (p == NULL)
1903		return (ENOENT);
1904	ASSERT(p == pnp->pr_common->prc_proc);
1905	if ((nlwp = p->p_lwpcnt + p->p_zombcnt) == 0) {
1906		prunlock(pnp);
1907		return (ENOENT);
1908	}
1909	size = sizeof (prheader32_t) + nlwp * LSPAN32(lwpsinfo32_t);
1910
1911	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
1912	mutex_exit(&p->p_lock);
1913	php = kmem_zalloc(size, KM_SLEEP);
1914	mutex_enter(&p->p_lock);
1915	/* p->p_lwpcnt can't change while process is locked */
1916	ASSERT(nlwp == p->p_lwpcnt + p->p_zombcnt);
1917
1918	php->pr_nent = nlwp;
1919	php->pr_entsize = LSPAN32(lwpsinfo32_t);
1920
1921	sp = (lwpsinfo32_t *)(php + 1);
1922	for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
1923		if ((lep = ldp->ld_entry) == NULL)
1924			continue;
1925		if ((t = lep->le_thread) != NULL)
1926			prgetlwpsinfo32(t, sp);
1927		else {
1928			bzero(sp, sizeof (*sp));
1929			sp->pr_lwpid = lep->le_lwpid;
1930			sp->pr_state = SZOMB;
1931			sp->pr_sname = 'Z';
1932			sp->pr_start.tv_sec = (time32_t)lep->le_start;
1933		}
1934		sp = (lwpsinfo32_t *)((caddr_t)sp + LSPAN32(lwpsinfo32_t));
1935	}
1936	prunlock(pnp);
1937
1938	error = pr_uioread(php, size, uiop);
1939	kmem_free(php, size);
1940	return (error);
1941}
1942
1943static int
1944pr_read_map_common_32(prnode_t *pnp, uio_t *uiop, prnodetype_t type)
1945{
1946	proc_t *p;
1947	struct as *as;
1948	list_t	iolhead;
1949	int error;
1950
1951readmap32_common:
1952	if ((error = prlock(pnp, ZNO)) != 0)
1953		return (error);
1954
1955	p = pnp->pr_common->prc_proc;
1956	as = p->p_as;
1957
1958	if ((p->p_flag & SSYS) || as == &kas) {
1959		prunlock(pnp);
1960		return (0);
1961	}
1962
1963	if (PROCESS_NOT_32BIT(p)) {
1964		prunlock(pnp);
1965		return (EOVERFLOW);
1966	}
1967
1968	if (!AS_LOCK_TRYENTER(as, &as->a_lock, RW_WRITER)) {
1969		prunlock(pnp);
1970		delay(1);
1971		goto readmap32_common;
1972	}
1973	mutex_exit(&p->p_lock);
1974
1975	switch (type) {
1976	case PR_XMAP:
1977		error = prgetxmap32(p, &iolhead);
1978		break;
1979	case PR_RMAP:
1980		error = prgetmap32(p, 1, &iolhead);
1981		break;
1982	case PR_MAP:
1983		error = prgetmap32(p, 0, &iolhead);
1984		break;
1985	}
1986	AS_LOCK_EXIT(as, &as->a_lock);
1987	mutex_enter(&p->p_lock);
1988	prunlock(pnp);
1989
1990	error = pr_iol_uiomove_and_free(&iolhead, uiop, error);
1991
1992	return (error);
1993}
1994
1995static int
1996pr_read_map_32(prnode_t *pnp, uio_t *uiop)
1997{
1998	ASSERT(pnp->pr_type == PR_MAP);
1999	return (pr_read_map_common_32(pnp, uiop, pnp->pr_type));
2000}
2001
2002static int
2003pr_read_rmap_32(prnode_t *pnp, uio_t *uiop)
2004{
2005	ASSERT(pnp->pr_type == PR_RMAP);
2006	return (pr_read_map_common_32(pnp, uiop, pnp->pr_type));
2007}
2008
2009static int
2010pr_read_xmap_32(prnode_t *pnp, uio_t *uiop)
2011{
2012	ASSERT(pnp->pr_type == PR_XMAP);
2013	return (pr_read_map_common_32(pnp, uiop, pnp->pr_type));
2014}
2015
2016static int
2017pr_read_sigact_32(prnode_t *pnp, uio_t *uiop)
2018{
2019	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
2020	proc_t *p;
2021	struct sigaction32 *sap;
2022	int sig;
2023	int error;
2024	user_t *up;
2025
2026	ASSERT(pnp->pr_type == PR_SIGACT);
2027
2028	/*
2029	 * We kmem_alloc() the sigaction32 array because
2030	 * it is so big it might blow the kernel stack.
2031	 */
2032	sap = kmem_alloc((nsig-1) * sizeof (struct sigaction32), KM_SLEEP);
2033
2034	if ((error = prlock(pnp, ZNO)) != 0)
2035		goto out;
2036	p = pnp->pr_common->prc_proc;
2037
2038	if (PROCESS_NOT_32BIT(p)) {
2039		prunlock(pnp);
2040		error = EOVERFLOW;
2041		goto out;
2042	}
2043
2044	if (uiop->uio_offset >= (nsig-1) * sizeof (struct sigaction32)) {
2045		prunlock(pnp);
2046		goto out;
2047	}
2048
2049	up = PTOU(p);
2050	for (sig = 1; sig < nsig; sig++)
2051		prgetaction32(p, up, sig, &sap[sig-1]);
2052	prunlock(pnp);
2053
2054	error = pr_uioread(sap, (nsig - 1) * sizeof (struct sigaction32), uiop);
2055out:
2056	kmem_free(sap, (nsig-1) * sizeof (struct sigaction32));
2057	return (error);
2058}
2059
2060static int
2061pr_read_auxv_32(prnode_t *pnp, uio_t *uiop)
2062{
2063	auxv32_t auxv[__KERN_NAUXV_IMPL];
2064	proc_t *p;
2065	user_t *up;
2066	int error;
2067	int i;
2068
2069	ASSERT(pnp->pr_type == PR_AUXV);
2070
2071	if ((error = prlock(pnp, ZNO)) != 0)
2072		return (error);
2073	p = pnp->pr_common->prc_proc;
2074
2075	if (PROCESS_NOT_32BIT(p)) {
2076		prunlock(pnp);
2077		return (EOVERFLOW);
2078	}
2079
2080	if (uiop->uio_offset >= sizeof (auxv)) {
2081		prunlock(pnp);
2082		return (0);
2083	}
2084
2085	up = PTOU(p);
2086	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
2087		auxv[i].a_type = (int32_t)up->u_auxv[i].a_type;
2088		auxv[i].a_un.a_val = (int32_t)up->u_auxv[i].a_un.a_val;
2089	}
2090	prunlock(pnp);
2091
2092	return (pr_uioread(auxv, sizeof (auxv), uiop));
2093}
2094
2095static int
2096pr_read_usage_32(prnode_t *pnp, uio_t *uiop)
2097{
2098	prhusage_t *pup;
2099	prusage32_t *upup;
2100	proc_t *p;
2101	kthread_t *t;
2102	int error;
2103
2104	ASSERT(pnp->pr_type == PR_USAGE);
2105
2106	/* allocate now, before locking the process */
2107	pup = kmem_zalloc(sizeof (*pup), KM_SLEEP);
2108	upup = kmem_alloc(sizeof (*upup), KM_SLEEP);
2109
2110	/*
2111	 * We don't want the full treatment of prlock(pnp) here.
2112	 * This file is world-readable and never goes invalid.
2113	 * It doesn't matter if we are in the middle of an exec().
2114	 */
2115	p = pr_p_lock(pnp);
2116	mutex_exit(&pr_pidlock);
2117	if (p == NULL) {
2118		error = ENOENT;
2119		goto out;
2120	}
2121	ASSERT(p == pnp->pr_common->prc_proc);
2122
2123	if (uiop->uio_offset >= sizeof (prusage32_t)) {
2124		prunlock(pnp);
2125		error = 0;
2126		goto out;
2127	}
2128
2129	pup->pr_tstamp = gethrtime();
2130
2131	pup->pr_count  = p->p_defunct;
2132	pup->pr_create = p->p_mstart;
2133	pup->pr_term   = p->p_mterm;
2134
2135	pup->pr_rtime    = p->p_mlreal;
2136	pup->pr_utime    = p->p_acct[LMS_USER];
2137	pup->pr_stime    = p->p_acct[LMS_SYSTEM];
2138	pup->pr_ttime    = p->p_acct[LMS_TRAP];
2139	pup->pr_tftime   = p->p_acct[LMS_TFAULT];
2140	pup->pr_dftime   = p->p_acct[LMS_DFAULT];
2141	pup->pr_kftime   = p->p_acct[LMS_KFAULT];
2142	pup->pr_ltime    = p->p_acct[LMS_USER_LOCK];
2143	pup->pr_slptime  = p->p_acct[LMS_SLEEP];
2144	pup->pr_wtime    = p->p_acct[LMS_WAIT_CPU];
2145	pup->pr_stoptime = p->p_acct[LMS_STOPPED];
2146
2147	pup->pr_minf  = p->p_ru.minflt;
2148	pup->pr_majf  = p->p_ru.majflt;
2149	pup->pr_nswap = p->p_ru.nswap;
2150	pup->pr_inblk = p->p_ru.inblock;
2151	pup->pr_oublk = p->p_ru.oublock;
2152	pup->pr_msnd  = p->p_ru.msgsnd;
2153	pup->pr_mrcv  = p->p_ru.msgrcv;
2154	pup->pr_sigs  = p->p_ru.nsignals;
2155	pup->pr_vctx  = p->p_ru.nvcsw;
2156	pup->pr_ictx  = p->p_ru.nivcsw;
2157	pup->pr_sysc  = p->p_ru.sysc;
2158	pup->pr_ioch  = p->p_ru.ioch;
2159
2160	/*
2161	 * Add the usage information for each active lwp.
2162	 */
2163	if ((t = p->p_tlist) != NULL &&
2164	    !(pnp->pr_pcommon->prc_flags & PRC_DESTROY)) {
2165		do {
2166			if (t->t_proc_flag & TP_LWPEXIT)
2167				continue;
2168			pup->pr_count++;
2169			praddusage(t, pup);
2170		} while ((t = t->t_forw) != p->p_tlist);
2171	}
2172
2173	prunlock(pnp);
2174
2175	prcvtusage32(pup, upup);
2176
2177	error = pr_uioread(upup, sizeof (prusage32_t), uiop);
2178out:
2179	kmem_free(pup, sizeof (*pup));
2180	kmem_free(upup, sizeof (*upup));
2181	return (error);
2182}
2183
2184static int
2185pr_read_lusage_32(prnode_t *pnp, uio_t *uiop)
2186{
2187	int nlwp;
2188	prhusage_t *pup;
2189	prheader32_t *php;
2190	prusage32_t *upup;
2191	size_t size;
2192	hrtime_t curtime;
2193	proc_t *p;
2194	kthread_t *t;
2195	lwpdir_t *ldp;
2196	int error;
2197	int i;
2198
2199	ASSERT(pnp->pr_type == PR_LUSAGE);
2200
2201	/*
2202	 * We don't want the full treatment of prlock(pnp) here.
2203	 * This file is world-readable and never goes invalid.
2204	 * It doesn't matter if we are in the middle of an exec().
2205	 */
2206	p = pr_p_lock(pnp);
2207	mutex_exit(&pr_pidlock);
2208	if (p == NULL)
2209		return (ENOENT);
2210	ASSERT(p == pnp->pr_common->prc_proc);
2211	if ((nlwp = p->p_lwpcnt) == 0) {
2212		prunlock(pnp);
2213		return (ENOENT);
2214	}
2215
2216	size = sizeof (prheader32_t) + (nlwp + 1) * LSPAN32(prusage32_t);
2217	if (uiop->uio_offset >= size) {
2218		prunlock(pnp);
2219		return (0);
2220	}
2221
2222	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
2223	mutex_exit(&p->p_lock);
2224	pup = kmem_zalloc(size + sizeof (prhusage_t), KM_SLEEP);
2225	mutex_enter(&p->p_lock);
2226	/* p->p_lwpcnt can't change while process is locked */
2227	ASSERT(nlwp == p->p_lwpcnt);
2228
2229	php = (prheader32_t *)(pup + 1);
2230	upup = (prusage32_t *)(php + 1);
2231
2232	php->pr_nent = nlwp + 1;
2233	php->pr_entsize = LSPAN32(prusage32_t);
2234
2235	curtime = gethrtime();
2236
2237	/*
2238	 * First the summation over defunct lwps.
2239	 */
2240	pup->pr_count  = p->p_defunct;
2241	pup->pr_tstamp = curtime;
2242	pup->pr_create = p->p_mstart;
2243	pup->pr_term   = p->p_mterm;
2244
2245	pup->pr_rtime    = p->p_mlreal;
2246	pup->pr_utime    = p->p_acct[LMS_USER];
2247	pup->pr_stime    = p->p_acct[LMS_SYSTEM];
2248	pup->pr_ttime    = p->p_acct[LMS_TRAP];
2249	pup->pr_tftime   = p->p_acct[LMS_TFAULT];
2250	pup->pr_dftime   = p->p_acct[LMS_DFAULT];
2251	pup->pr_kftime   = p->p_acct[LMS_KFAULT];
2252	pup->pr_ltime    = p->p_acct[LMS_USER_LOCK];
2253	pup->pr_slptime  = p->p_acct[LMS_SLEEP];
2254	pup->pr_wtime    = p->p_acct[LMS_WAIT_CPU];
2255	pup->pr_stoptime = p->p_acct[LMS_STOPPED];
2256
2257	pup->pr_minf  = p->p_ru.minflt;
2258	pup->pr_majf  = p->p_ru.majflt;
2259	pup->pr_nswap = p->p_ru.nswap;
2260	pup->pr_inblk = p->p_ru.inblock;
2261	pup->pr_oublk = p->p_ru.oublock;
2262	pup->pr_msnd  = p->p_ru.msgsnd;
2263	pup->pr_mrcv  = p->p_ru.msgrcv;
2264	pup->pr_sigs  = p->p_ru.nsignals;
2265	pup->pr_vctx  = p->p_ru.nvcsw;
2266	pup->pr_ictx  = p->p_ru.nivcsw;
2267	pup->pr_sysc  = p->p_ru.sysc;
2268	pup->pr_ioch  = p->p_ru.ioch;
2269
2270	prcvtusage32(pup, upup);
2271
2272	/*
2273	 * Fill one prusage struct for each active lwp.
2274	 */
2275	for (ldp = p->p_lwpdir, i = 0; i < p->p_lwpdir_sz; i++, ldp++) {
2276		if (ldp->ld_entry == NULL ||
2277		    (t = ldp->ld_entry->le_thread) == NULL)
2278			continue;
2279		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
2280		ASSERT(nlwp > 0);
2281		--nlwp;
2282		upup = (prusage32_t *)
2283		    ((caddr_t)upup + LSPAN32(prusage32_t));
2284		prgetusage(t, pup);
2285		prcvtusage32(pup, upup);
2286	}
2287	ASSERT(nlwp == 0);
2288
2289	prunlock(pnp);
2290
2291	error = pr_uioread(php, size, uiop);
2292	kmem_free(pup, size + sizeof (prhusage_t));
2293	return (error);
2294}
2295
2296static int
2297pr_read_pagedata_32(prnode_t *pnp, uio_t *uiop)
2298{
2299	proc_t *p;
2300	int error;
2301
2302	ASSERT(pnp->pr_type == PR_PAGEDATA);
2303
2304	if ((error = prlock(pnp, ZNO)) != 0)
2305		return (error);
2306
2307	p = pnp->pr_common->prc_proc;
2308	if ((p->p_flag & SSYS) || p->p_as == &kas) {
2309		prunlock(pnp);
2310		return (0);
2311	}
2312
2313	if (PROCESS_NOT_32BIT(p)) {
2314		prunlock(pnp);
2315		return (EOVERFLOW);
2316	}
2317
2318	mutex_exit(&p->p_lock);
2319	error = prpdread32(p, pnp->pr_hatid, uiop);
2320	mutex_enter(&p->p_lock);
2321
2322	prunlock(pnp);
2323	return (error);
2324}
2325
2326static int
2327pr_read_opagedata_32(prnode_t *pnp, uio_t *uiop)
2328{
2329	proc_t *p;
2330	struct as *as;
2331	int error;
2332
2333	ASSERT(pnp->pr_type == PR_OPAGEDATA);
2334
2335	if ((error = prlock(pnp, ZNO)) != 0)
2336		return (error);
2337
2338	p = pnp->pr_common->prc_proc;
2339	as = p->p_as;
2340
2341	if ((p->p_flag & SSYS) || as == &kas) {
2342		prunlock(pnp);
2343		return (0);
2344	}
2345
2346	if (PROCESS_NOT_32BIT(p)) {
2347		prunlock(pnp);
2348		return (EOVERFLOW);
2349	}
2350
2351	mutex_exit(&p->p_lock);
2352	error = oprpdread32(as, pnp->pr_hatid, uiop);
2353	mutex_enter(&p->p_lock);
2354
2355	prunlock(pnp);
2356	return (error);
2357}
2358
2359static int
2360pr_read_watch_32(prnode_t *pnp, uio_t *uiop)
2361{
2362	proc_t *p;
2363	int error;
2364	prwatch32_t *Bpwp;
2365	size_t size;
2366	prwatch32_t *pwp;
2367	int nwarea;
2368	struct watched_area *pwarea;
2369
2370	ASSERT(pnp->pr_type == PR_WATCH);
2371
2372	if ((error = prlock(pnp, ZNO)) != 0)
2373		return (error);
2374
2375	p = pnp->pr_common->prc_proc;
2376	if (PROCESS_NOT_32BIT(p)) {
2377		prunlock(pnp);
2378		return (EOVERFLOW);
2379	}
2380	nwarea = avl_numnodes(&p->p_warea);
2381	size = nwarea * sizeof (prwatch32_t);
2382	if (uiop->uio_offset >= size) {
2383		prunlock(pnp);
2384		return (0);
2385	}
2386
2387	/* drop p->p_lock to do kmem_alloc(KM_SLEEP) */
2388	mutex_exit(&p->p_lock);
2389	Bpwp = pwp = kmem_zalloc(size, KM_SLEEP);
2390	mutex_enter(&p->p_lock);
2391	/* p->p_nwarea can't change while process is locked */
2392	ASSERT(nwarea == avl_numnodes(&p->p_warea));
2393
2394	/* gather the watched areas */
2395	for (pwarea = avl_first(&p->p_warea); pwarea != NULL;
2396	    pwarea = AVL_NEXT(&p->p_warea, pwarea), pwp++) {
2397		pwp->pr_vaddr = (caddr32_t)(uintptr_t)pwarea->wa_vaddr;
2398		pwp->pr_size = (size32_t)(pwarea->wa_eaddr - pwarea->wa_vaddr);
2399		pwp->pr_wflags = (int)pwarea->wa_flags;
2400	}
2401
2402	prunlock(pnp);
2403
2404	error = pr_uioread(Bpwp, size, uiop);
2405	kmem_free(Bpwp, size);
2406	return (error);
2407}
2408
2409static int
2410pr_read_lwpstatus_32(prnode_t *pnp, uio_t *uiop)
2411{
2412	lwpstatus32_t *sp;
2413	proc_t *p;
2414	int error;
2415
2416	ASSERT(pnp->pr_type == PR_LWPSTATUS);
2417
2418	/*
2419	 * We kmem_alloc() the lwpstatus structure because
2420	 * it is so big it might blow the kernel stack.
2421	 */
2422	sp = kmem_alloc(sizeof (*sp), KM_SLEEP);
2423
2424	if ((error = prlock(pnp, ZNO)) != 0)
2425		goto out;
2426
2427	/*
2428	 * A 32-bit process cannot get the status of a 64-bit process.
2429	 * The fields for the 64-bit quantities are not large enough.
2430	 */
2431	p = pnp->pr_common->prc_proc;
2432	if (PROCESS_NOT_32BIT(p)) {
2433		prunlock(pnp);
2434		error = EOVERFLOW;
2435		goto out;
2436	}
2437
2438	if (uiop->uio_offset >= sizeof (*sp)) {
2439		prunlock(pnp);
2440		goto out;
2441	}
2442
2443	prgetlwpstatus32(pnp->pr_common->prc_thread, sp, VTOZONE(PTOV(pnp)));
2444	prunlock(pnp);
2445
2446	error = pr_uioread(sp, sizeof (*sp), uiop);
2447out:
2448	kmem_free(sp, sizeof (*sp));
2449	return (error);
2450}
2451
2452static int
2453pr_read_lwpsinfo_32(prnode_t *pnp, uio_t *uiop)
2454{
2455	lwpsinfo32_t lwpsinfo;
2456	proc_t *p;
2457	kthread_t *t;
2458	lwpent_t *lep;
2459
2460	ASSERT(pnp->pr_type == PR_LWPSINFO);
2461
2462	/*
2463	 * We don't want the full treatment of prlock(pnp) here.
2464	 * This file is world-readable and never goes invalid.
2465	 * It doesn't matter if we are in the middle of an exec().
2466	 */
2467	p = pr_p_lock(pnp);
2468	mutex_exit(&pr_pidlock);
2469	if (p == NULL)
2470		return (ENOENT);
2471	ASSERT(p == pnp->pr_common->prc_proc);
2472	if (pnp->pr_common->prc_tslot == -1) {
2473		prunlock(pnp);
2474		return (ENOENT);
2475	}
2476
2477	if (uiop->uio_offset >= sizeof (lwpsinfo)) {
2478		prunlock(pnp);
2479		return (0);
2480	}
2481
2482	if ((t = pnp->pr_common->prc_thread) != NULL)
2483		prgetlwpsinfo32(t, &lwpsinfo);
2484	else {
2485		lep = p->p_lwpdir[pnp->pr_common->prc_tslot].ld_entry;
2486		bzero(&lwpsinfo, sizeof (lwpsinfo));
2487		lwpsinfo.pr_lwpid = lep->le_lwpid;
2488		lwpsinfo.pr_state = SZOMB;
2489		lwpsinfo.pr_sname = 'Z';
2490		lwpsinfo.pr_start.tv_sec = (time32_t)lep->le_start;
2491	}
2492	prunlock(pnp);
2493
2494	return (pr_uioread(&lwpsinfo, sizeof (lwpsinfo), uiop));
2495}
2496
2497static int
2498pr_read_lwpusage_32(prnode_t *pnp, uio_t *uiop)
2499{
2500	prhusage_t *pup;
2501	prusage32_t *upup;
2502	proc_t *p;
2503	int error;
2504
2505	ASSERT(pnp->pr_type == PR_LWPUSAGE);
2506
2507	/* allocate now, before locking the process */
2508	pup = kmem_zalloc(sizeof (*pup), KM_SLEEP);
2509	upup = kmem_alloc(sizeof (*upup), KM_SLEEP);
2510
2511	/*
2512	 * We don't want the full treatment of prlock(pnp) here.
2513	 * This file is world-readable and never goes invalid.
2514	 * It doesn't matter if we are in the middle of an exec().
2515	 */
2516	p = pr_p_lock(pnp);
2517	mutex_exit(&pr_pidlock);
2518	if (p == NULL) {
2519		error = ENOENT;
2520		goto out;
2521	}
2522	ASSERT(p == pnp->pr_common->prc_proc);
2523	if (pnp->pr_common->prc_thread == NULL) {
2524		prunlock(pnp);
2525		error = ENOENT;
2526		goto out;
2527	}
2528	if (uiop->uio_offset >= sizeof (prusage32_t)) {
2529		prunlock(pnp);
2530		error = 0;
2531		goto out;
2532	}
2533
2534	pup->pr_tstamp = gethrtime();
2535	prgetusage(pnp->pr_common->prc_thread, pup);
2536
2537	prunlock(pnp);
2538
2539	prcvtusage32(pup, upup);
2540
2541	error = pr_uioread(upup, sizeof (prusage32_t), uiop);
2542out:
2543	kmem_free(pup, sizeof (*pup));
2544	kmem_free(upup, sizeof (*upup));
2545	return (error);
2546}
2547
2548#if defined(__sparc)
2549static int
2550pr_read_gwindows_32(prnode_t *pnp, uio_t *uiop)
2551{
2552	proc_t *p;
2553	kthread_t *t;
2554	gwindows32_t *gwp;
2555	int error;
2556	size_t size;
2557
2558	ASSERT(pnp->pr_type == PR_GWINDOWS);
2559
2560	gwp = kmem_zalloc(sizeof (gwindows32_t), KM_SLEEP);
2561
2562	if ((error = prlock(pnp, ZNO)) != 0)
2563		goto out;
2564
2565	p = pnp->pr_common->prc_proc;
2566	t = pnp->pr_common->prc_thread;
2567
2568	if (PROCESS_NOT_32BIT(p)) {
2569		prunlock(pnp);
2570		error = EOVERFLOW;
2571		goto out;
2572	}
2573
2574	/*
2575	 * Drop p->p_lock while touching the stack.
2576	 * The P_PR_LOCK flag prevents the lwp from
2577	 * disappearing while we do this.
2578	 */
2579	mutex_exit(&p->p_lock);
2580	if ((size = prnwindows(ttolwp(t))) != 0)
2581		size = sizeof (gwindows32_t) -
2582		    (SPARC_MAXREGWINDOW - size) * sizeof (struct rwindow32);
2583	if (uiop->uio_offset >= size) {
2584		mutex_enter(&p->p_lock);
2585		prunlock(pnp);
2586		goto out;
2587	}
2588	prgetwindows32(ttolwp(t), gwp);
2589	mutex_enter(&p->p_lock);
2590	prunlock(pnp);
2591
2592	error = pr_uioread(gwp, size, uiop);
2593out:
2594	kmem_free(gwp, sizeof (gwindows32_t));
2595	return (error);
2596}
2597#endif	/* __sparc */
2598
2599#endif	/* _SYSCALL32_IMPL */
2600
2601/* ARGSUSED */
2602static int
2603prread(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
2604{
2605	prnode_t *pnp = VTOP(vp);
2606
2607	ASSERT(pnp->pr_type < PR_NFILES);
2608
2609#ifdef _SYSCALL32_IMPL
2610	/*
2611	 * What is read from the /proc files depends on the data
2612	 * model of the caller.  An LP64 process will see LP64
2613	 * data.  An ILP32 process will see ILP32 data.
2614	 */
2615	if (curproc->p_model == DATAMODEL_LP64)
2616		return (pr_read_function[pnp->pr_type](pnp, uiop));
2617	else
2618		return (pr_read_function_32[pnp->pr_type](pnp, uiop));
2619#else
2620	return (pr_read_function[pnp->pr_type](pnp, uiop));
2621#endif
2622}
2623
2624/* ARGSUSED */
2625static int
2626prwrite(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
2627{
2628	prnode_t *pnp = VTOP(vp);
2629	int old = 0;
2630	int error;
2631	ssize_t resid;
2632
2633	ASSERT(pnp->pr_type < PR_NFILES);
2634
2635	/*
2636	 * Only a handful of /proc files are writable, enumerate them here.
2637	 */
2638	switch (pnp->pr_type) {
2639	case PR_PIDDIR:		/* directory write()s: visceral revulsion. */
2640		ASSERT(pnp->pr_pidfile != NULL);
2641		/* use the underlying PR_PIDFILE to write the process */
2642		vp = pnp->pr_pidfile;
2643		pnp = VTOP(vp);
2644		ASSERT(pnp->pr_type == PR_PIDFILE);
2645		/* FALLTHROUGH */
2646	case PR_PIDFILE:
2647	case PR_LWPIDFILE:
2648		old = 1;
2649		/* FALLTHROUGH */
2650	case PR_AS:
2651		if ((error = prlock(pnp, ZNO)) == 0) {
2652			proc_t *p = pnp->pr_common->prc_proc;
2653			struct as *as = p->p_as;
2654
2655			if ((p->p_flag & SSYS) || as == &kas) {
2656				/*
2657				 * /proc I/O cannot be done to a system process.
2658				 */
2659				error = EIO;
2660#ifdef _SYSCALL32_IMPL
2661			} else if (curproc->p_model == DATAMODEL_ILP32 &&
2662			    PROCESS_NOT_32BIT(p)) {
2663				error = EOVERFLOW;
2664#endif
2665			} else {
2666				/*
2667				 * See comments above (pr_read_pidfile)
2668				 * about this locking dance.
2669				 */
2670				mutex_exit(&p->p_lock);
2671				error = prusrio(p, UIO_WRITE, uiop, old);
2672				mutex_enter(&p->p_lock);
2673			}
2674			prunlock(pnp);
2675		}
2676		return (error);
2677
2678	case PR_CTL:
2679	case PR_LWPCTL:
2680		resid = uiop->uio_resid;
2681		/*
2682		 * Perform the action on the control file
2683		 * by passing curthreads credentials
2684		 * and not target process's credentials.
2685		 */
2686#ifdef _SYSCALL32_IMPL
2687		if (curproc->p_model == DATAMODEL_ILP32)
2688			error = prwritectl32(vp, uiop, CRED());
2689		else
2690			error = prwritectl(vp, uiop, CRED());
2691#else
2692		error = prwritectl(vp, uiop, CRED());
2693#endif
2694		/*
2695		 * This hack makes sure that the EINTR is passed
2696		 * all the way back to the caller's write() call.
2697		 */
2698		if (error == EINTR)
2699			uiop->uio_resid = resid;
2700		return (error);
2701
2702	default:
2703		return ((vp->v_type == VDIR)? EISDIR : EBADF);
2704	}
2705	/* NOTREACHED */
2706}
2707
2708static int
2709prgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2710	caller_context_t *ct)
2711{
2712	prnode_t *pnp = VTOP(vp);
2713	prnodetype_t type = pnp->pr_type;
2714	prcommon_t *pcp;
2715	proc_t *p;
2716	struct as *as;
2717	int error;
2718	vnode_t *rvp;
2719	timestruc_t now;
2720	extern uint_t nproc;
2721	int ngroups;
2722	int nsig;
2723
2724	/*
2725	 * This ugly bit of code allows us to keep both versions of this
2726	 * function from the same source.
2727	 */
2728#ifdef _LP64
2729	int iam32bit = (curproc->p_model == DATAMODEL_ILP32);
2730#define	PR_OBJSIZE(obj32, obj64)	\
2731	(iam32bit ? sizeof (obj32) : sizeof (obj64))
2732#define	PR_OBJSPAN(obj32, obj64)	\
2733	(iam32bit ? LSPAN32(obj32) : LSPAN(obj64))
2734#else
2735#define	PR_OBJSIZE(obj32, obj64)	\
2736	(sizeof (obj64))
2737#define	PR_OBJSPAN(obj32, obj64)	\
2738	(LSPAN(obj64))
2739#endif
2740
2741	/*
2742	 * Return all the attributes.  Should be refined
2743	 * so that it returns only those asked for.
2744	 * Most of this is complete fakery anyway.
2745	 */
2746
2747	/*
2748	 * For files in the /proc/<pid>/object directory,
2749	 * return the attributes of the underlying object.
2750	 * For files in the /proc/<pid>/fd directory,
2751	 * return the attributes of the underlying file, but
2752	 * make it look inaccessible if it is not a regular file.
2753	 * Make directories look like symlinks.
2754	 */
2755	switch (type) {
2756	case PR_CURDIR:
2757	case PR_ROOTDIR:
2758		if (!(flags & ATTR_REAL))
2759			break;
2760		/* restrict full knowledge of the attributes to owner or root */
2761		if ((error = praccess(vp, 0, 0, cr, ct)) != 0)
2762			return (error);
2763		/* FALLTHROUGH */
2764	case PR_OBJECT:
2765	case PR_FD:
2766		rvp = pnp->pr_realvp;
2767		error = VOP_GETATTR(rvp, vap, flags, cr, ct);
2768		if (error)
2769			return (error);
2770		if (type == PR_FD) {
2771			if (rvp->v_type != VREG && rvp->v_type != VDIR)
2772				vap->va_mode = 0;
2773			else
2774				vap->va_mode &= pnp->pr_mode;
2775		}
2776		if (type == PR_OBJECT)
2777			vap->va_mode &= 07555;
2778		if (rvp->v_type == VDIR && !(flags & ATTR_REAL)) {
2779			vap->va_type = VLNK;
2780			vap->va_size = 0;
2781			vap->va_nlink = 1;
2782		}
2783		return (0);
2784	default:
2785		break;
2786	}
2787
2788	bzero(vap, sizeof (*vap));
2789	/*
2790	 * Large Files: Internally proc now uses VPROC to indicate
2791	 * a proc file. Since we have been returning VREG through
2792	 * VOP_GETATTR() until now, we continue to do this so as
2793	 * not to break apps depending on this return value.
2794	 */
2795	vap->va_type = (vp->v_type == VPROC) ? VREG : vp->v_type;
2796	vap->va_mode = pnp->pr_mode;
2797	vap->va_fsid = vp->v_vfsp->vfs_dev;
2798	vap->va_blksize = DEV_BSIZE;
2799	vap->va_rdev = 0;
2800	vap->va_seq = 0;
2801
2802	if (type == PR_PROCDIR) {
2803		vap->va_uid = 0;
2804		vap->va_gid = 0;
2805		vap->va_nlink = nproc + 2;
2806		vap->va_nodeid = (ino64_t)PRROOTINO;
2807		gethrestime(&now);
2808		vap->va_atime = vap->va_mtime = vap->va_ctime = now;
2809		vap->va_size = (v.v_proc + 2) * PRSDSIZE;
2810		vap->va_nblocks = btod(vap->va_size);
2811		return (0);
2812	}
2813
2814	/*
2815	 * /proc/<pid>/self is a symbolic link, and has no prcommon member
2816	 */
2817	if (type == PR_SELF) {
2818		vap->va_uid = crgetruid(CRED());
2819		vap->va_gid = crgetrgid(CRED());
2820		vap->va_nodeid = (ino64_t)PR_SELF;
2821		gethrestime(&now);
2822		vap->va_atime = vap->va_mtime = vap->va_ctime = now;
2823		vap->va_nlink = 1;
2824		vap->va_type = VLNK;
2825		vap->va_size = 0;
2826		return (0);
2827	}
2828
2829	p = pr_p_lock(pnp);
2830	mutex_exit(&pr_pidlock);
2831	if (p == NULL)
2832		return (ENOENT);
2833	pcp = pnp->pr_common;
2834
2835	mutex_enter(&p->p_crlock);
2836	vap->va_uid = crgetruid(p->p_cred);
2837	vap->va_gid = crgetrgid(p->p_cred);
2838	mutex_exit(&p->p_crlock);
2839
2840	vap->va_nlink = 1;
2841	vap->va_nodeid = pnp->pr_ino? pnp->pr_ino :
2842	    pmkino(pcp->prc_tslot, pcp->prc_slot, pnp->pr_type);
2843	if ((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot != -1) {
2844		vap->va_atime.tv_sec = vap->va_mtime.tv_sec =
2845		    vap->va_ctime.tv_sec =
2846		    p->p_lwpdir[pcp->prc_tslot].ld_entry->le_start;
2847		vap->va_atime.tv_nsec = vap->va_mtime.tv_nsec =
2848		    vap->va_ctime.tv_nsec = 0;
2849	} else {
2850		user_t *up = PTOU(p);
2851		vap->va_atime.tv_sec = vap->va_mtime.tv_sec =
2852		    vap->va_ctime.tv_sec = up->u_start.tv_sec;
2853		vap->va_atime.tv_nsec = vap->va_mtime.tv_nsec =
2854		    vap->va_ctime.tv_nsec = up->u_start.tv_nsec;
2855	}
2856
2857	switch (type) {
2858	case PR_PIDDIR:
2859		/* va_nlink: count 'lwp', 'object' and 'fd' directory links */
2860		vap->va_nlink = 5;
2861		vap->va_size = sizeof (piddir);
2862		break;
2863	case PR_OBJECTDIR:
2864		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
2865			vap->va_size = 2 * PRSDSIZE;
2866		else {
2867			mutex_exit(&p->p_lock);
2868			AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2869			if (as->a_updatedir)
2870				rebuild_objdir(as);
2871			vap->va_size = (as->a_sizedir + 2) * PRSDSIZE;
2872			AS_LOCK_EXIT(as, &as->a_lock);
2873			mutex_enter(&p->p_lock);
2874		}
2875		vap->va_nlink = 2;
2876		break;
2877	case PR_PATHDIR:
2878		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
2879			vap->va_size = (P_FINFO(p)->fi_nfiles + 4) * PRSDSIZE;
2880		else {
2881			mutex_exit(&p->p_lock);
2882			AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2883			if (as->a_updatedir)
2884				rebuild_objdir(as);
2885			vap->va_size = (as->a_sizedir + 4 +
2886			    P_FINFO(p)->fi_nfiles) * PRSDSIZE;
2887			AS_LOCK_EXIT(as, &as->a_lock);
2888			mutex_enter(&p->p_lock);
2889		}
2890		vap->va_nlink = 2;
2891		break;
2892	case PR_PATH:
2893	case PR_CURDIR:
2894	case PR_ROOTDIR:
2895	case PR_CT:
2896		vap->va_type = VLNK;
2897		vap->va_size = 0;
2898		break;
2899	case PR_FDDIR:
2900		vap->va_nlink = 2;
2901		vap->va_size = (P_FINFO(p)->fi_nfiles + 2) * PRSDSIZE;
2902		break;
2903	case PR_LWPDIR:
2904		/*
2905		 * va_nlink: count each lwp as a directory link.
2906		 * va_size: size of p_lwpdir + 2
2907		 */
2908		vap->va_nlink = p->p_lwpcnt + p->p_zombcnt + 2;
2909		vap->va_size = (p->p_lwpdir_sz + 2) * PRSDSIZE;
2910		break;
2911	case PR_LWPIDDIR:
2912		vap->va_nlink = 2;
2913		vap->va_size = sizeof (lwpiddir);
2914		break;
2915	case PR_CTDIR:
2916		vap->va_nlink = 2;
2917		vap->va_size = (avl_numnodes(&p->p_ct_held) + 2) * PRSDSIZE;
2918		break;
2919	case PR_TMPLDIR:
2920		vap->va_nlink = 2;
2921		vap->va_size = (ct_ntypes + 2) * PRSDSIZE;
2922		break;
2923	case PR_AS:
2924	case PR_PIDFILE:
2925	case PR_LWPIDFILE:
2926		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
2927			vap->va_size = 0;
2928		else
2929			vap->va_size = as->a_resvsize;
2930		break;
2931	case PR_STATUS:
2932		vap->va_size = PR_OBJSIZE(pstatus32_t, pstatus_t);
2933		break;
2934	case PR_LSTATUS:
2935		vap->va_size = PR_OBJSIZE(prheader32_t, prheader_t) +
2936		    p->p_lwpcnt * PR_OBJSPAN(lwpstatus32_t, lwpstatus_t);
2937		break;
2938	case PR_PSINFO:
2939		vap->va_size = PR_OBJSIZE(psinfo32_t, psinfo_t);
2940		break;
2941	case PR_LPSINFO:
2942		vap->va_size = PR_OBJSIZE(prheader32_t, prheader_t) +
2943		    (p->p_lwpcnt + p->p_zombcnt) *
2944		    PR_OBJSPAN(lwpsinfo32_t, lwpsinfo_t);
2945		break;
2946	case PR_MAP:
2947	case PR_RMAP:
2948	case PR_XMAP:
2949		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
2950			vap->va_size = 0;
2951		else {
2952			mutex_exit(&p->p_lock);
2953			AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2954			if (type == PR_MAP)
2955				vap->va_mtime = as->a_updatetime;
2956			if (type == PR_XMAP)
2957				vap->va_size = prnsegs(as, 0) *
2958				    PR_OBJSIZE(prxmap32_t, prxmap_t);
2959			else
2960				vap->va_size = prnsegs(as, type == PR_RMAP) *
2961				    PR_OBJSIZE(prmap32_t, prmap_t);
2962			AS_LOCK_EXIT(as, &as->a_lock);
2963			mutex_enter(&p->p_lock);
2964		}
2965		break;
2966	case PR_CRED:
2967		mutex_enter(&p->p_crlock);
2968		vap->va_size = sizeof (prcred_t);
2969		ngroups = crgetngroups(p->p_cred);
2970		if (ngroups > 1)
2971			vap->va_size += (ngroups - 1) * sizeof (gid_t);
2972		mutex_exit(&p->p_crlock);
2973		break;
2974	case PR_PRIV:
2975		vap->va_size = prgetprivsize();
2976		break;
2977	case PR_SIGACT:
2978		nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
2979		vap->va_size = (nsig-1) *
2980		    PR_OBJSIZE(struct sigaction32, struct sigaction);
2981		break;
2982	case PR_AUXV:
2983		vap->va_size = __KERN_NAUXV_IMPL * PR_OBJSIZE(auxv32_t, auxv_t);
2984		break;
2985#if defined(__x86)
2986	case PR_LDT:
2987		mutex_exit(&p->p_lock);
2988		mutex_enter(&p->p_ldtlock);
2989		vap->va_size = prnldt(p) * sizeof (struct ssd);
2990		mutex_exit(&p->p_ldtlock);
2991		mutex_enter(&p->p_lock);
2992		break;
2993#endif
2994	case PR_USAGE:
2995		vap->va_size = PR_OBJSIZE(prusage32_t, prusage_t);
2996		break;
2997	case PR_LUSAGE:
2998		vap->va_size = PR_OBJSIZE(prheader32_t, prheader_t) +
2999		    (p->p_lwpcnt + 1) * PR_OBJSPAN(prusage32_t, prusage_t);
3000		break;
3001	case PR_PAGEDATA:
3002		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
3003			vap->va_size = 0;
3004		else {
3005			/*
3006			 * We can drop p->p_lock before grabbing the
3007			 * address space lock because p->p_as will not
3008			 * change while the process is marked P_PR_LOCK.
3009			 */
3010			mutex_exit(&p->p_lock);
3011			AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3012#ifdef _LP64
3013			vap->va_size = iam32bit?
3014			    prpdsize32(as) : prpdsize(as);
3015#else
3016			vap->va_size = prpdsize(as);
3017#endif
3018			AS_LOCK_EXIT(as, &as->a_lock);
3019			mutex_enter(&p->p_lock);
3020		}
3021		break;
3022	case PR_OPAGEDATA:
3023		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas)
3024			vap->va_size = 0;
3025		else {
3026			mutex_exit(&p->p_lock);
3027			AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3028#ifdef _LP64
3029			vap->va_size = iam32bit?
3030			    oprpdsize32(as) : oprpdsize(as);
3031#else
3032			vap->va_size = oprpdsize(as);
3033#endif
3034			AS_LOCK_EXIT(as, &as->a_lock);
3035			mutex_enter(&p->p_lock);
3036		}
3037		break;
3038	case PR_WATCH:
3039		vap->va_size = avl_numnodes(&p->p_warea) *
3040		    PR_OBJSIZE(prwatch32_t, prwatch_t);
3041		break;
3042	case PR_LWPSTATUS:
3043		vap->va_size = PR_OBJSIZE(lwpstatus32_t, lwpstatus_t);
3044		break;
3045	case PR_LWPSINFO:
3046		vap->va_size = PR_OBJSIZE(lwpsinfo32_t, lwpsinfo_t);
3047		break;
3048	case PR_LWPUSAGE:
3049		vap->va_size = PR_OBJSIZE(prusage32_t, prusage_t);
3050		break;
3051	case PR_XREGS:
3052		if (prhasx(p))
3053			vap->va_size = prgetprxregsize(p);
3054		else
3055			vap->va_size = 0;
3056		break;
3057#if defined(__sparc)
3058	case PR_GWINDOWS:
3059	{
3060		kthread_t *t;
3061		int n;
3062
3063		/*
3064		 * If there is no lwp then just make the size zero.
3065		 * This can happen if the lwp exits between the VOP_LOOKUP()
3066		 * of the /proc/<pid>/lwp/<lwpid>/gwindows file and the
3067		 * VOP_GETATTR() of the resulting vnode.
3068		 */
3069		if ((t = pcp->prc_thread) == NULL) {
3070			vap->va_size = 0;
3071			break;
3072		}
3073		/*
3074		 * Drop p->p_lock while touching the stack.
3075		 * The P_PR_LOCK flag prevents the lwp from
3076		 * disappearing while we do this.
3077		 */
3078		mutex_exit(&p->p_lock);
3079		if ((n = prnwindows(ttolwp(t))) == 0)
3080			vap->va_size = 0;
3081		else
3082			vap->va_size = PR_OBJSIZE(gwindows32_t, gwindows_t) -
3083			    (SPARC_MAXREGWINDOW - n) *
3084			    PR_OBJSIZE(struct rwindow32, struct rwindow);
3085		mutex_enter(&p->p_lock);
3086		break;
3087	}
3088	case PR_ASRS:
3089#ifdef _LP64
3090		if (p->p_model == DATAMODEL_LP64)
3091			vap->va_size = sizeof (asrset_t);
3092		else
3093#endif
3094			vap->va_size = 0;
3095		break;
3096#endif
3097	case PR_CTL:
3098	case PR_LWPCTL:
3099	default:
3100		vap->va_size = 0;
3101		break;
3102	}
3103
3104	prunlock(pnp);
3105	vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
3106	return (0);
3107}
3108
3109static int
3110praccess(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
3111{
3112	prnode_t *pnp = VTOP(vp);
3113	prnodetype_t type = pnp->pr_type;
3114	int vmode;
3115	vtype_t vtype;
3116	proc_t *p;
3117	int error = 0;
3118	vnode_t *rvp;
3119	vnode_t *xvp;
3120
3121	if ((mode & VWRITE) && vn_is_readonly(vp))
3122		return (EROFS);
3123
3124	switch (type) {
3125	case PR_PROCDIR:
3126		break;
3127
3128	case PR_OBJECT:
3129	case PR_FD:
3130		/*
3131		 * Disallow write access to the underlying objects.
3132		 * Disallow access to underlying non-regular-file fds.
3133		 * Disallow access to fds with other than existing open modes.
3134		 */
3135		rvp = pnp->pr_realvp;
3136		vtype = rvp->v_type;
3137		vmode = pnp->pr_mode;
3138		if ((type == PR_OBJECT && (mode & VWRITE)) ||
3139		    (type == PR_FD && vtype != VREG && vtype != VDIR) ||
3140		    (type == PR_FD && (vmode & mode) != mode &&
3141		    secpolicy_proc_access(cr) != 0))
3142			return (EACCES);
3143		return (VOP_ACCESS(rvp, mode, flags, cr, ct));
3144
3145	case PR_PSINFO:		/* these files can be read by anyone */
3146	case PR_LPSINFO:
3147	case PR_LWPSINFO:
3148	case PR_LWPDIR:
3149	case PR_LWPIDDIR:
3150	case PR_USAGE:
3151	case PR_LUSAGE:
3152	case PR_LWPUSAGE:
3153		p = pr_p_lock(pnp);
3154		mutex_exit(&pr_pidlock);
3155		if (p == NULL)
3156			return (ENOENT);
3157		prunlock(pnp);
3158		break;
3159
3160	default:
3161		/*
3162		 * Except for the world-readable files above,
3163		 * only /proc/pid exists if the process is a zombie.
3164		 */
3165		if ((error = prlock(pnp,
3166		    (type == PR_PIDDIR)? ZYES : ZNO)) != 0)
3167			return (error);
3168		p = pnp->pr_common->prc_proc;
3169		if (p != curproc)
3170			error = priv_proc_cred_perm(cr, p, NULL, mode);
3171
3172		if (error != 0 || p == curproc || (p->p_flag & SSYS) ||
3173		    p->p_as == &kas || (xvp = p->p_exec) == NULL) {
3174			prunlock(pnp);
3175		} else {
3176			/*
3177			 * Determine if the process's executable is readable.
3178			 * We have to drop p->p_lock before the secpolicy
3179			 * and VOP operation.
3180			 */
3181			VN_HOLD(xvp);
3182			prunlock(pnp);
3183			if (secpolicy_proc_access(cr) != 0)
3184				error = VOP_ACCESS(xvp, VREAD, 0, cr, ct);
3185			VN_RELE(xvp);
3186		}
3187		if (error)
3188			return (error);
3189		break;
3190	}
3191
3192	if (type == PR_CURDIR || type == PR_ROOTDIR) {
3193		/*
3194		 * Final access check on the underlying directory vnode.
3195		 */
3196		return (VOP_ACCESS(pnp->pr_realvp, mode, flags, cr, ct));
3197	}
3198
3199	/*
3200	 * Visceral revulsion:  For compatibility with old /proc,
3201	 * allow the /proc/<pid> directory to be opened for writing.
3202	 */
3203	vmode = pnp->pr_mode;
3204	if (type == PR_PIDDIR)
3205		vmode |= VWRITE;
3206	if ((vmode & mode) != mode)
3207		error = secpolicy_proc_access(cr);
3208	return (error);
3209}
3210
3211/*
3212 * Array of lookup functions, indexed by /proc file type.
3213 */
3214static vnode_t *pr_lookup_notdir(), *pr_lookup_procdir(), *pr_lookup_piddir(),
3215	*pr_lookup_objectdir(), *pr_lookup_lwpdir(), *pr_lookup_lwpiddir(),
3216	*pr_lookup_fddir(), *pr_lookup_pathdir(), *pr_lookup_tmpldir(),
3217	*pr_lookup_ctdir();
3218
3219static vnode_t *(*pr_lookup_function[PR_NFILES])() = {
3220	pr_lookup_procdir,	/* /proc				*/
3221	pr_lookup_notdir,	/* /proc/self				*/
3222	pr_lookup_piddir,	/* /proc/<pid>				*/
3223	pr_lookup_notdir,	/* /proc/<pid>/as			*/
3224	pr_lookup_notdir,	/* /proc/<pid>/ctl			*/
3225	pr_lookup_notdir,	/* /proc/<pid>/status			*/
3226	pr_lookup_notdir,	/* /proc/<pid>/lstatus			*/
3227	pr_lookup_notdir,	/* /proc/<pid>/psinfo			*/
3228	pr_lookup_notdir,	/* /proc/<pid>/lpsinfo			*/
3229	pr_lookup_notdir,	/* /proc/<pid>/map			*/
3230	pr_lookup_notdir,	/* /proc/<pid>/rmap			*/
3231	pr_lookup_notdir,	/* /proc/<pid>/xmap			*/
3232	pr_lookup_notdir,	/* /proc/<pid>/cred			*/
3233	pr_lookup_notdir,	/* /proc/<pid>/sigact			*/
3234	pr_lookup_notdir,	/* /proc/<pid>/auxv			*/
3235#if defined(__x86)
3236	pr_lookup_notdir,	/* /proc/<pid>/ldt			*/
3237#endif
3238	pr_lookup_notdir,	/* /proc/<pid>/usage			*/
3239	pr_lookup_notdir,	/* /proc/<pid>/lusage			*/
3240	pr_lookup_notdir,	/* /proc/<pid>/pagedata			*/
3241	pr_lookup_notdir,	/* /proc/<pid>/watch			*/
3242	pr_lookup_notdir,	/* /proc/<pid>/cwd			*/
3243	pr_lookup_notdir,	/* /proc/<pid>/root			*/
3244	pr_lookup_fddir,	/* /proc/<pid>/fd			*/
3245	pr_lookup_notdir,	/* /proc/<pid>/fd/nn			*/
3246	pr_lookup_objectdir,	/* /proc/<pid>/object			*/
3247	pr_lookup_notdir,	/* /proc/<pid>/object/xxx		*/
3248	pr_lookup_lwpdir,	/* /proc/<pid>/lwp			*/
3249	pr_lookup_lwpiddir,	/* /proc/<pid>/lwp/<lwpid>		*/
3250	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpctl	*/
3251	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpstatus	*/
3252	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpsinfo	*/
3253	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpusage	*/
3254	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/xregs	*/
3255	pr_lookup_tmpldir,	/* /proc/<pid>/lwp/<lwpid>/templates	*/
3256	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/templates/<id> */
3257#if defined(__sparc)
3258	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/gwindows	*/
3259	pr_lookup_notdir,	/* /proc/<pid>/lwp/<lwpid>/asrs		*/
3260#endif
3261	pr_lookup_notdir,	/* /proc/<pid>/priv			*/
3262	pr_lookup_pathdir,	/* /proc/<pid>/path			*/
3263	pr_lookup_notdir,	/* /proc/<pid>/path/xxx			*/
3264	pr_lookup_ctdir,	/* /proc/<pid>/contracts		*/
3265	pr_lookup_notdir,	/* /proc/<pid>/contracts/<ctid>		*/
3266	pr_lookup_notdir,	/* old process file			*/
3267	pr_lookup_notdir,	/* old lwp file				*/
3268	pr_lookup_notdir,	/* old pagedata file			*/
3269};
3270
3271static int
3272prlookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
3273	int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
3274	int *direntflags, pathname_t *realpnp)
3275{
3276	prnode_t *pnp = VTOP(dp);
3277	prnodetype_t type = pnp->pr_type;
3278	int error;
3279
3280	ASSERT(dp->v_type == VDIR);
3281	ASSERT(type < PR_NFILES);
3282
3283	if (type != PR_PROCDIR && strcmp(comp, "..") == 0) {
3284		VN_HOLD(pnp->pr_parent);
3285		*vpp = pnp->pr_parent;
3286		return (0);
3287	}
3288
3289	if (*comp == '\0' ||
3290	    strcmp(comp, ".") == 0 || strcmp(comp, "..") == 0) {
3291		VN_HOLD(dp);
3292		*vpp = dp;
3293		return (0);
3294	}
3295
3296	switch (type) {
3297	case PR_CURDIR:
3298	case PR_ROOTDIR:
3299		/* restrict lookup permission to owner or root */
3300		if ((error = praccess(dp, VEXEC, 0, cr, ct)) != 0)
3301			return (error);
3302		/* FALLTHROUGH */
3303	case PR_FD:
3304		dp = pnp->pr_realvp;
3305		return (VOP_LOOKUP(dp, comp, vpp, pathp, flags, rdir, cr, ct,
3306		    direntflags, realpnp));
3307	default:
3308		break;
3309	}
3310
3311	if ((type == PR_OBJECTDIR || type == PR_FDDIR || type == PR_PATHDIR) &&
3312	    (error = praccess(dp, VEXEC, 0, cr, ct)) != 0)
3313		return (error);
3314
3315	/* XXX - Do we need to pass ct, direntflags, or realpnp? */
3316	*vpp = (pr_lookup_function[type](dp, comp));
3317
3318	return ((*vpp == NULL) ? ENOENT : 0);
3319}
3320
3321/* ARGSUSED */
3322static int
3323prcreate(vnode_t *dp, char *comp, vattr_t *vap, vcexcl_t excl,
3324	int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
3325	vsecattr_t *vsecp)
3326{
3327	int error;
3328
3329	if ((error = prlookup(dp, comp, vpp, NULL, 0, NULL, cr,
3330	    ct, NULL, NULL)) != 0) {
3331		if (error == ENOENT)	/* can't O_CREAT nonexistent files */
3332			error = EACCES;		/* unwriteable directories */
3333	} else {
3334		if (excl == EXCL)			/* O_EXCL */
3335			error = EEXIST;
3336		else if (vap->va_mask & AT_SIZE) {	/* O_TRUNC */
3337			vnode_t *vp = *vpp;
3338			uint_t mask;
3339
3340			if (vp->v_type == VDIR)
3341				error = EISDIR;
3342			else if (vp->v_type != VPROC ||
3343			    VTOP(vp)->pr_type != PR_FD)
3344				error = EACCES;
3345			else {		/* /proc/<pid>/fd/<n> */
3346				vp = VTOP(vp)->pr_realvp;
3347				mask = vap->va_mask;
3348				vap->va_mask = AT_SIZE;
3349				error = VOP_SETATTR(vp, vap, 0, cr, ct);
3350				vap->va_mask = mask;
3351			}
3352		}
3353		if (error) {
3354			VN_RELE(*vpp);
3355			*vpp = NULL;
3356		}
3357	}
3358	return (error);
3359}
3360
3361/* ARGSUSED */
3362static vnode_t *
3363pr_lookup_notdir(vnode_t *dp, char *comp)
3364{
3365	return (NULL);
3366}
3367
3368/*
3369 * Find or construct a process vnode for the given pid.
3370 */
3371static vnode_t *
3372pr_lookup_procdir(vnode_t *dp, char *comp)
3373{
3374	pid_t pid;
3375	prnode_t *pnp;
3376	prcommon_t *pcp;
3377	vnode_t *vp;
3378	proc_t *p;
3379	int c;
3380
3381	ASSERT(VTOP(dp)->pr_type == PR_PROCDIR);
3382
3383	if (strcmp(comp, "self") == 0) {
3384		pnp = prgetnode(dp, PR_SELF);
3385		return (PTOV(pnp));
3386	} else {
3387		pid = 0;
3388		while ((c = *comp++) != '\0') {
3389			if (c < '0' || c > '9')
3390				return (NULL);
3391			pid = 10*pid + c - '0';
3392			if (pid > maxpid)
3393				return (NULL);
3394		}
3395	}
3396
3397	pnp = prgetnode(dp, PR_PIDDIR);
3398
3399	mutex_enter(&pidlock);
3400	if ((p = prfind(pid)) == NULL || p->p_stat == SIDL) {
3401		mutex_exit(&pidlock);
3402		prfreenode(pnp);
3403		return (NULL);
3404	}
3405	ASSERT(p->p_stat != 0);
3406
3407	/* NOTE: we're holding pidlock across the policy call. */
3408	if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
3409		mutex_exit(&pidlock);
3410		prfreenode(pnp);
3411		return (NULL);
3412	}
3413
3414	mutex_enter(&p->p_lock);
3415	mutex_exit(&pidlock);
3416
3417	/*
3418	 * If a process vnode already exists and it is not invalid
3419	 * and it was created by the current process and it belongs
3420	 * to the same /proc mount point as our parent vnode, then
3421	 * just use it and discard the newly-allocated prnode.
3422	 */
3423	for (vp = p->p_trace; vp != NULL; vp = VTOP(vp)->pr_next) {
3424		if (!(VTOP(VTOP(vp)->pr_pidfile)->pr_flags & PR_INVAL) &&
3425		    VTOP(vp)->pr_owner == curproc &&
3426		    vp->v_vfsp == dp->v_vfsp) {
3427			ASSERT(!(VTOP(vp)->pr_flags & PR_INVAL));
3428			VN_HOLD(vp);
3429			prfreenode(pnp);
3430			mutex_exit(&p->p_lock);
3431			return (vp);
3432		}
3433	}
3434	pnp->pr_owner = curproc;
3435
3436	/*
3437	 * prgetnode() initialized most of the prnode.
3438	 * Finish the job.
3439	 */
3440	pcp = pnp->pr_common;	/* the newly-allocated prcommon struct */
3441	if ((vp = p->p_trace) != NULL) {
3442		/* discard the new prcommon and use the existing prcommon */
3443		prfreecommon(pcp);
3444		pcp = VTOP(vp)->pr_common;
3445		mutex_enter(&pcp->prc_mutex);
3446		ASSERT(pcp->prc_refcnt > 0);
3447		pcp->prc_refcnt++;
3448		mutex_exit(&pcp->prc_mutex);
3449		pnp->pr_common = pcp;
3450	} else {
3451		/* initialize the new prcommon struct */
3452		if ((p->p_flag & SSYS) || p->p_as == &kas)
3453			pcp->prc_flags |= PRC_SYS;
3454		if (p->p_stat == SZOMB)
3455			pcp->prc_flags |= PRC_DESTROY;
3456		pcp->prc_proc = p;
3457		pcp->prc_datamodel = p->p_model;
3458		pcp->prc_pid = p->p_pid;
3459		pcp->prc_slot = p->p_slot;
3460	}
3461	pnp->pr_pcommon = pcp;
3462	pnp->pr_parent = dp;
3463	VN_HOLD(dp);
3464	/*
3465	 * Link in the old, invalid directory vnode so we
3466	 * can later determine the last close of the file.
3467	 */
3468	pnp->pr_next = p->p_trace;
3469	p->p_trace = dp = PTOV(pnp);
3470
3471	/*
3472	 * Kludge for old /proc: initialize the PR_PIDFILE as well.
3473	 */
3474	vp = pnp->pr_pidfile;
3475	pnp = VTOP(vp);
3476	pnp->pr_ino = ptoi(pcp->prc_pid);
3477	pnp->pr_common = pcp;
3478	pnp->pr_pcommon = pcp;
3479	pnp->pr_parent = dp;
3480	pnp->pr_next = p->p_plist;
3481	p->p_plist = vp;
3482
3483	mutex_exit(&p->p_lock);
3484	return (dp);
3485}
3486
3487static vnode_t *
3488pr_lookup_piddir(vnode_t *dp, char *comp)
3489{
3490	prnode_t *dpnp = VTOP(dp);
3491	vnode_t *vp;
3492	prnode_t *pnp;
3493	proc_t *p;
3494	user_t *up;
3495	prdirent_t *dirp;
3496	int i;
3497	enum prnodetype type;
3498
3499	ASSERT(dpnp->pr_type == PR_PIDDIR);
3500
3501	for (i = 0; i < NPIDDIRFILES; i++) {
3502		/* Skip "." and ".." */
3503		dirp = &piddir[i+2];
3504		if (strcmp(comp, dirp->d_name) == 0)
3505			break;
3506	}
3507
3508	if (i >= NPIDDIRFILES)
3509		return (NULL);
3510
3511	type = (int)dirp->d_ino;
3512	pnp = prgetnode(dp, type);
3513
3514	p = pr_p_lock(dpnp);
3515	mutex_exit(&pr_pidlock);
3516	if (p == NULL) {
3517		prfreenode(pnp);
3518		return (NULL);
3519	}
3520	if (dpnp->pr_pcommon->prc_flags & PRC_DESTROY) {
3521		switch (type) {
3522		case PR_PSINFO:
3523		case PR_USAGE:
3524			break;
3525		default:
3526			prunlock(dpnp);
3527			prfreenode(pnp);
3528			return (NULL);
3529		}
3530	}
3531
3532	switch (type) {
3533	case PR_CURDIR:
3534	case PR_ROOTDIR:
3535		up = PTOU(p);
3536		vp = (type == PR_CURDIR)? up->u_cdir :
3537		    (up->u_rdir? up->u_rdir : rootdir);
3538
3539		if (vp == NULL) {	/* can't happen? */
3540			prunlock(dpnp);
3541			prfreenode(pnp);
3542			return (NULL);
3543		}
3544		/*
3545		 * Fill in the prnode so future references will
3546		 * be able to find the underlying object's vnode.
3547		 */
3548		VN_HOLD(vp);
3549		pnp->pr_realvp = vp;
3550		break;
3551	default:
3552		break;
3553	}
3554
3555	mutex_enter(&dpnp->pr_mutex);
3556
3557	if ((vp = dpnp->pr_files[i]) != NULL &&
3558	    !(VTOP(vp)->pr_flags & PR_INVAL)) {
3559		VN_HOLD(vp);
3560		mutex_exit(&dpnp->pr_mutex);
3561		prunlock(dpnp);
3562		prfreenode(pnp);
3563		return (vp);
3564	}
3565
3566	/*
3567	 * prgetnode() initialized most of the prnode.
3568	 * Finish the job.
3569	 */
3570	pnp->pr_common = dpnp->pr_common;
3571	pnp->pr_pcommon = dpnp->pr_pcommon;
3572	pnp->pr_parent = dp;
3573	VN_HOLD(dp);
3574	pnp->pr_index = i;
3575
3576	dpnp->pr_files[i] = vp = PTOV(pnp);
3577
3578	/*
3579	 * Link new vnode into list of all /proc vnodes for the process.
3580	 */
3581	if (vp->v_type == VPROC) {
3582		pnp->pr_next = p->p_plist;
3583		p->p_plist = vp;
3584	}
3585	mutex_exit(&dpnp->pr_mutex);
3586	prunlock(dpnp);
3587	return (vp);
3588}
3589
3590static vnode_t *
3591pr_lookup_objectdir(vnode_t *dp, char *comp)
3592{
3593	prnode_t *dpnp = VTOP(dp);
3594	prnode_t *pnp;
3595	proc_t *p;
3596	struct seg *seg;
3597	struct as *as;
3598	vnode_t *vp;
3599	vattr_t vattr;
3600
3601	ASSERT(dpnp->pr_type == PR_OBJECTDIR);
3602
3603	pnp = prgetnode(dp, PR_OBJECT);
3604
3605	if (prlock(dpnp, ZNO) != 0) {
3606		prfreenode(pnp);
3607		return (NULL);
3608	}
3609	p = dpnp->pr_common->prc_proc;
3610	if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
3611		prunlock(dpnp);
3612		prfreenode(pnp);
3613		return (NULL);
3614	}
3615
3616	/*
3617	 * We drop p_lock before grabbing the address space lock
3618	 * in order to avoid a deadlock with the clock thread.
3619	 * The process will not disappear and its address space
3620	 * will not change because it is marked P_PR_LOCK.
3621	 */
3622	mutex_exit(&p->p_lock);
3623	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
3624	if ((seg = AS_SEGFIRST(as)) == NULL) {
3625		vp = NULL;
3626		goto out;
3627	}
3628	if (strcmp(comp, "a.out") == 0) {
3629		vp = p->p_exec;
3630		goto out;
3631	}
3632	do {
3633		/*
3634		 * Manufacture a filename for the "object" directory.
3635		 */
3636		vattr.va_mask = AT_FSID|AT_NODEID;
3637		if (seg->s_ops == &segvn_ops &&
3638		    SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3639		    vp != NULL && vp->v_type == VREG &&
3640		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3641			char name[64];
3642
3643			if (vp == p->p_exec)	/* "a.out" */
3644				continue;
3645			pr_object_name(name, vp, &vattr);
3646			if (strcmp(name, comp) == 0)
3647				goto out;
3648		}
3649	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
3650
3651	vp = NULL;
3652out:
3653	if (vp != NULL) {
3654		VN_HOLD(vp);
3655	}
3656	AS_LOCK_EXIT(as, &as->a_lock);
3657	mutex_enter(&p->p_lock);
3658	prunlock(dpnp);
3659
3660	if (vp == NULL)
3661		prfreenode(pnp);
3662	else {
3663		/*
3664		 * Fill in the prnode so future references will
3665		 * be able to find the underlying object's vnode.
3666		 * Don't link this prnode into the list of all
3667		 * prnodes for the process; this is a one-use node.
3668		 * Its use is entirely to catch and fail opens for writing.
3669		 */
3670		pnp->pr_realvp = vp;
3671		vp = PTOV(pnp);
3672	}
3673
3674	return (vp);
3675}
3676
3677/*
3678 * Find or construct an lwp vnode for the given lwpid.
3679 */
3680static vnode_t *
3681pr_lookup_lwpdir(vnode_t *dp, char *comp)
3682{
3683	id_t tid;	/* same type as t->t_tid */
3684	int want_agent;
3685	prnode_t *dpnp = VTOP(dp);
3686	prnode_t *pnp;
3687	prcommon_t *pcp;
3688	vnode_t *vp;
3689	proc_t *p;
3690	kthread_t *t;
3691	lwpdir_t *ldp;
3692	lwpent_t *lep;
3693	int tslot;
3694	int c;
3695
3696	ASSERT(dpnp->pr_type == PR_LWPDIR);
3697
3698	tid = 0;
3699	if (strcmp(comp, "agent") == 0)
3700		want_agent = 1;
3701	else {
3702		want_agent = 0;
3703		while ((c = *comp++) != '\0') {
3704			id_t otid;
3705
3706			if (c < '0' || c > '9')
3707				return (NULL);
3708			otid = tid;
3709			tid = 10*tid + c - '0';
3710			if (tid/10 != otid)	/* integer overflow */
3711				return (NULL);
3712		}
3713	}
3714
3715	pnp = prgetnode(dp, PR_LWPIDDIR);
3716
3717	p = pr_p_lock(dpnp);
3718	mutex_exit(&pr_pidlock);
3719	if (p == NULL) {
3720		prfreenode(pnp);
3721		return (NULL);
3722	}
3723
3724	if (want_agent) {
3725		if ((t = p->p_agenttp) == NULL)
3726			lep = NULL;
3727		else {
3728			tid = t->t_tid;
3729			tslot = t->t_dslot;
3730			lep = p->p_lwpdir[tslot].ld_entry;
3731		}
3732	} else {
3733		if ((ldp = lwp_hash_lookup(p, tid)) == NULL)
3734			lep = NULL;
3735		else {
3736			tslot = (int)(ldp - p->p_lwpdir);
3737			lep = ldp->ld_entry;
3738		}
3739	}
3740
3741	if (lep == NULL) {
3742		prunlock(dpnp);
3743		prfreenode(pnp);
3744		return (NULL);
3745	}
3746
3747	/*
3748	 * If an lwp vnode already exists and it is not invalid
3749	 * and it was created by the current process and it belongs
3750	 * to the same /proc mount point as our parent vnode, then
3751	 * just use it and discard the newly-allocated prnode.
3752	 */
3753	for (vp = lep->le_trace; vp != NULL; vp = VTOP(vp)->pr_next) {
3754		if (!(VTOP(vp)->pr_flags & PR_INVAL) &&
3755		    VTOP(vp)->pr_owner == curproc &&
3756		    vp->v_vfsp == dp->v_vfsp) {
3757			VN_HOLD(vp);
3758			prunlock(dpnp);
3759			prfreenode(pnp);
3760			return (vp);
3761		}
3762	}
3763	pnp->pr_owner = curproc;
3764
3765	/*
3766	 * prgetnode() initialized most of the prnode.
3767	 * Finish the job.
3768	 */
3769	pcp = pnp->pr_common;	/* the newly-allocated prcommon struct */
3770	if ((vp = lep->le_trace) != NULL) {
3771		/* discard the new prcommon and use the existing prcommon */
3772		prfreecommon(pcp);
3773		pcp = VTOP(vp)->pr_common;
3774		mutex_enter(&pcp->prc_mutex);
3775		ASSERT(pcp->prc_refcnt > 0);
3776		pcp->prc_refcnt++;
3777		mutex_exit(&pcp->prc_mutex);
3778		pnp->pr_common = pcp;
3779	} else {
3780		/* initialize the new prcommon struct */
3781		pcp->prc_flags |= PRC_LWP;
3782		if ((p->p_flag & SSYS) || p->p_as == &kas)
3783			pcp->prc_flags |= PRC_SYS;
3784		if ((t = lep->le_thread) == NULL)
3785			pcp->prc_flags |= PRC_DESTROY;
3786		pcp->prc_proc = p;
3787		pcp->prc_datamodel = dpnp->pr_pcommon->prc_datamodel;
3788		pcp->prc_pid = p->p_pid;
3789		pcp->prc_slot = p->p_slot;
3790		pcp->prc_thread = t;
3791		pcp->prc_tid = tid;
3792		pcp->prc_tslot = tslot;
3793	}
3794	pnp->pr_pcommon = dpnp->pr_pcommon;
3795	pnp->pr_parent = dp;
3796	VN_HOLD(dp);
3797	/*
3798	 * Link in the old, invalid directory vnode so we
3799	 * can later determine the last close of the file.
3800	 */
3801	pnp->pr_next = lep->le_trace;
3802	lep->le_trace = vp = PTOV(pnp);
3803	prunlock(dpnp);
3804	return (vp);
3805}
3806
3807static vnode_t *
3808pr_lookup_lwpiddir(vnode_t *dp, char *comp)
3809{
3810	prnode_t *dpnp = VTOP(dp);
3811	vnode_t *vp;
3812	prnode_t *pnp;
3813	proc_t *p;
3814	prdirent_t *dirp;
3815	int i;
3816	enum prnodetype type;
3817
3818	ASSERT(dpnp->pr_type == PR_LWPIDDIR);
3819
3820	for (i = 0; i < NLWPIDDIRFILES; i++) {
3821		/* Skip "." and ".." */
3822		dirp = &lwpiddir[i+2];
3823		if (strcmp(comp, dirp->d_name) == 0)
3824			break;
3825	}
3826
3827	if (i >= NLWPIDDIRFILES)
3828		return (NULL);
3829
3830	type = (int)dirp->d_ino;
3831	pnp = prgetnode(dp, type);
3832
3833	p = pr_p_lock(dpnp);
3834	mutex_exit(&pr_pidlock);
3835	if (p == NULL) {
3836		prfreenode(pnp);
3837		return (NULL);
3838	}
3839	if (dpnp->pr_common->prc_flags & PRC_DESTROY) {
3840		/*
3841		 * Only the lwpsinfo file is present for zombie lwps.
3842		 * Nothing is present if the lwp has been reaped.
3843		 */
3844		if (dpnp->pr_common->prc_tslot == -1 ||
3845		    type != PR_LWPSINFO) {
3846			prunlock(dpnp);
3847			prfreenode(pnp);
3848			return (NULL);
3849		}
3850	}
3851
3852#if defined(__sparc)
3853	/* the asrs file exists only for sparc v9 _LP64 processes */
3854	if (type == PR_ASRS && p->p_model != DATAMODEL_LP64) {
3855		prunlock(dpnp);
3856		prfreenode(pnp);
3857		return (NULL);
3858	}
3859#endif
3860
3861	mutex_enter(&dpnp->pr_mutex);
3862
3863	if ((vp = dpnp->pr_files[i]) != NULL &&
3864	    !(VTOP(vp)->pr_flags & PR_INVAL)) {
3865		VN_HOLD(vp);
3866		mutex_exit(&dpnp->pr_mutex);
3867		prunlock(dpnp);
3868		prfreenode(pnp);
3869		return (vp);
3870	}
3871
3872	/*
3873	 * prgetnode() initialized most of the prnode.
3874	 * Finish the job.
3875	 */
3876	pnp->pr_common = dpnp->pr_common;
3877	pnp->pr_pcommon = dpnp->pr_pcommon;
3878	pnp->pr_parent = dp;
3879	VN_HOLD(dp);
3880	pnp->pr_index = i;
3881
3882	dpnp->pr_files[i] = vp = PTOV(pnp);
3883
3884	/*
3885	 * Link new vnode into list of all /proc vnodes for the process.
3886	 */
3887	if (vp->v_type == VPROC) {
3888		pnp->pr_next = p->p_plist;
3889		p->p_plist = vp;
3890	}
3891	mutex_exit(&dpnp->pr_mutex);
3892	prunlock(dpnp);
3893	return (vp);
3894}
3895
3896/*
3897 * Lookup one of the process's open files.
3898 */
3899static vnode_t *
3900pr_lookup_fddir(vnode_t *dp, char *comp)
3901{
3902	prnode_t *dpnp = VTOP(dp);
3903	prnode_t *pnp;
3904	vnode_t *vp = NULL;
3905	proc_t *p;
3906	file_t *fp;
3907	uint_t fd;
3908	int c;
3909	uf_entry_t *ufp;
3910	uf_info_t *fip;
3911
3912	ASSERT(dpnp->pr_type == PR_FDDIR);
3913
3914	fd = 0;
3915	while ((c = *comp++) != '\0') {
3916		int ofd;
3917		if (c < '0' || c > '9')
3918			return (NULL);
3919		ofd = fd;
3920		fd = 10*fd + c - '0';
3921		if (fd/10 != ofd)	/* integer overflow */
3922			return (NULL);
3923	}
3924
3925	pnp = prgetnode(dp, PR_FD);
3926
3927	if (prlock(dpnp, ZNO) != 0) {
3928		prfreenode(pnp);
3929		return (NULL);
3930	}
3931	p = dpnp->pr_common->prc_proc;
3932	if ((p->p_flag & SSYS) || p->p_as == &kas) {
3933		prunlock(dpnp);
3934		prfreenode(pnp);
3935		return (NULL);
3936	}
3937
3938	fip = P_FINFO(p);
3939	mutex_exit(&p->p_lock);
3940	mutex_enter(&fip->fi_lock);
3941	if (fd < fip->fi_nfiles) {
3942		UF_ENTER(ufp, fip, fd);
3943		if ((fp = ufp->uf_file) != NULL) {
3944			pnp->pr_mode = 07111;
3945			if (fp->f_flag & FREAD)
3946				pnp->pr_mode |= 0444;
3947			if (fp->f_flag & FWRITE)
3948				pnp->pr_mode |= 0222;
3949			vp = fp->f_vnode;
3950			VN_HOLD(vp);
3951		}
3952		UF_EXIT(ufp);
3953	}
3954	mutex_exit(&fip->fi_lock);
3955	mutex_enter(&p->p_lock);
3956	prunlock(dpnp);
3957
3958	if (vp == NULL)
3959		prfreenode(pnp);
3960	else {
3961		/*
3962		 * Fill in the prnode so future references will
3963		 * be able to find the underlying object's vnode.
3964		 * Don't link this prnode into the list of all
3965		 * prnodes for the process; this is a one-use node.
3966		 */
3967		pnp->pr_realvp = vp;
3968		pnp->pr_parent = dp;		/* needed for prlookup */
3969		VN_HOLD(dp);
3970		vp = PTOV(pnp);
3971		if (pnp->pr_realvp->v_type == VDIR)
3972			vp->v_type = VDIR;
3973	}
3974
3975	return (vp);
3976}
3977
3978static vnode_t *
3979pr_lookup_pathdir(vnode_t *dp, char *comp)
3980{
3981	prnode_t *dpnp = VTOP(dp);
3982	prnode_t *pnp;
3983	vnode_t *vp = NULL;
3984	proc_t *p;
3985	uint_t fd, flags = 0;
3986	int c;
3987	uf_entry_t *ufp;
3988	uf_info_t *fip;
3989	enum { NAME_FD, NAME_OBJECT, NAME_ROOT, NAME_CWD, NAME_UNKNOWN } type;
3990	char *tmp;
3991	int idx;
3992	struct seg *seg;
3993	struct as *as = NULL;
3994	vattr_t vattr;
3995
3996	ASSERT(dpnp->pr_type == PR_PATHDIR);
3997
3998	/*
3999	 * First, check if this is a numeric entry, in which case we have a
4000	 * file descriptor.
4001	 */
4002	fd = 0;
4003	type = NAME_FD;
4004	tmp = comp;
4005	while ((c = *tmp++) != '\0') {
4006		int ofd;
4007		if (c < '0' || c > '9') {
4008			type = NAME_UNKNOWN;
4009			break;
4010		}
4011		ofd = fd;
4012		fd = 10*fd + c - '0';
4013		if (fd/10 != ofd) {	/* integer overflow */
4014			type = NAME_UNKNOWN;
4015			break;
4016		}
4017	}
4018
4019	/*
4020	 * Next, see if it is one of the special values {root, cwd}.
4021	 */
4022	if (type == NAME_UNKNOWN) {
4023		if (strcmp(comp, "root") == 0)
4024			type = NAME_ROOT;
4025		else if (strcmp(comp, "cwd") == 0)
4026			type = NAME_CWD;
4027	}
4028
4029	/*
4030	 * Grab the necessary data from the process
4031	 */
4032	if (prlock(dpnp, ZNO) != 0)
4033		return (NULL);
4034	p = dpnp->pr_common->prc_proc;
4035
4036	fip = P_FINFO(p);
4037
4038	switch (type) {
4039	case NAME_ROOT:
4040		if ((vp = PTOU(p)->u_rdir) == NULL)
4041			vp = p->p_zone->zone_rootvp;
4042		VN_HOLD(vp);
4043		break;
4044	case NAME_CWD:
4045		vp = PTOU(p)->u_cdir;
4046		VN_HOLD(vp);
4047		break;
4048	default:
4049		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
4050			prunlock(dpnp);
4051			return (NULL);
4052		}
4053	}
4054	mutex_exit(&p->p_lock);
4055
4056	/*
4057	 * Determine if this is an object entry
4058	 */
4059	if (type == NAME_UNKNOWN) {
4060		/*
4061		 * Start with the inode index immediately after the number of
4062		 * files.
4063		 */
4064		mutex_enter(&fip->fi_lock);
4065		idx = fip->fi_nfiles + 4;
4066		mutex_exit(&fip->fi_lock);
4067
4068		if (strcmp(comp, "a.out") == 0) {
4069			if (p->p_execdir != NULL) {
4070				vp = p->p_execdir;
4071				VN_HOLD(vp);
4072				type = NAME_OBJECT;
4073				flags |= PR_AOUT;
4074			} else {
4075				vp = p->p_exec;
4076				VN_HOLD(vp);
4077				type = NAME_OBJECT;
4078			}
4079		} else {
4080			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
4081			if ((seg = AS_SEGFIRST(as)) != NULL) {
4082				do {
4083					/*
4084					 * Manufacture a filename for the
4085					 * "object" directory.
4086					 */
4087					vattr.va_mask = AT_FSID|AT_NODEID;
4088					if (seg->s_ops == &segvn_ops &&
4089					    SEGOP_GETVP(seg, seg->s_base, &vp)
4090					    == 0 &&
4091					    vp != NULL && vp->v_type == VREG &&
4092					    VOP_GETATTR(vp, &vattr, 0, CRED(),
4093					    NULL) == 0) {
4094						char name[64];
4095
4096						if (vp == p->p_exec)
4097							continue;
4098						idx++;
4099						pr_object_name(name, vp,
4100						    &vattr);
4101						if (strcmp(name, comp) == 0)
4102							break;
4103					}
4104				} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4105			}
4106
4107			if (seg == NULL) {
4108				vp = NULL;
4109			} else {
4110				VN_HOLD(vp);
4111				type = NAME_OBJECT;
4112			}
4113
4114			AS_LOCK_EXIT(as, &as->a_lock);
4115		}
4116	}
4117
4118
4119	switch (type) {
4120	case NAME_FD:
4121		mutex_enter(&fip->fi_lock);
4122		if (fd < fip->fi_nfiles) {
4123			UF_ENTER(ufp, fip, fd);
4124			if (ufp->uf_file != NULL) {
4125				vp = ufp->uf_file->f_vnode;
4126				VN_HOLD(vp);
4127			}
4128			UF_EXIT(ufp);
4129		}
4130		mutex_exit(&fip->fi_lock);
4131		idx = fd + 4;
4132		break;
4133	case NAME_ROOT:
4134		idx = 2;
4135		break;
4136	case NAME_CWD:
4137		idx = 3;
4138		break;
4139	case NAME_OBJECT:
4140	case NAME_UNKNOWN:
4141		/* Nothing to do */
4142		break;
4143	}
4144
4145	mutex_enter(&p->p_lock);
4146	prunlock(dpnp);
4147
4148	if (vp != NULL) {
4149		pnp = prgetnode(dp, PR_PATH);
4150
4151		pnp->pr_flags |= flags;
4152		pnp->pr_common = dpnp->pr_common;
4153		pnp->pr_pcommon = dpnp->pr_pcommon;
4154		pnp->pr_realvp = vp;
4155		pnp->pr_parent = dp;		/* needed for prlookup */
4156		pnp->pr_ino = pmkino(idx, dpnp->pr_common->prc_slot, PR_PATH);
4157		VN_HOLD(dp);
4158		vp = PTOV(pnp);
4159		vp->v_type = VLNK;
4160	}
4161
4162	return (vp);
4163}
4164
4165/*
4166 * Look up one of the process's active templates.
4167 */
4168static vnode_t *
4169pr_lookup_tmpldir(vnode_t *dp, char *comp)
4170{
4171	prnode_t *dpnp = VTOP(dp);
4172	prnode_t *pnp;
4173	vnode_t *vp = NULL;
4174	proc_t *p;
4175	int i;
4176
4177	ASSERT(dpnp->pr_type == PR_TMPLDIR);
4178
4179	for (i = 0; i < ct_ntypes; i++)
4180		if (strcmp(comp, ct_types[i]->ct_type_name) == 0)
4181			break;
4182	if (i == ct_ntypes)
4183		return (NULL);
4184
4185	pnp = prgetnode(dp, PR_TMPL);
4186
4187	if (prlock(dpnp, ZNO) != 0) {
4188		prfreenode(pnp);
4189		return (NULL);
4190	}
4191	p = dpnp->pr_common->prc_proc;
4192	if ((p->p_flag & SSYS) || p->p_as == &kas ||
4193	    (dpnp->pr_common->prc_flags & (PRC_DESTROY | PRC_LWP)) != PRC_LWP) {
4194		prunlock(dpnp);
4195		prfreenode(pnp);
4196		return (NULL);
4197	}
4198	if (ttolwp(dpnp->pr_common->prc_thread)->lwp_ct_active[i] != NULL) {
4199		pnp->pr_common = dpnp->pr_common;
4200		pnp->pr_pcommon = dpnp->pr_pcommon;
4201		pnp->pr_parent = dp;
4202		pnp->pr_cttype = i;
4203		VN_HOLD(dp);
4204		vp = PTOV(pnp);
4205	} else {
4206		prfreenode(pnp);
4207	}
4208	prunlock(dpnp);
4209
4210	return (vp);
4211}
4212
4213/*
4214 * Look up one of the contracts owned by the process.
4215 */
4216static vnode_t *
4217pr_lookup_ctdir(vnode_t *dp, char *comp)
4218{
4219	prnode_t *dpnp = VTOP(dp);
4220	prnode_t *pnp;
4221	vnode_t *vp = NULL;
4222	proc_t *p;
4223	id_t id = 0;
4224	contract_t *ct;
4225	int c;
4226
4227	ASSERT(dpnp->pr_type == PR_CTDIR);
4228
4229	while ((c = *comp++) != '\0') {
4230		id_t oid;
4231		if (c < '0' || c > '9')
4232			return (NULL);
4233		oid = id;
4234		id = 10 * id + c - '0';
4235		if (id / 10 != oid)	/* integer overflow */
4236			return (NULL);
4237	}
4238
4239	/*
4240	 * Search all contracts; we'll filter below.
4241	 */
4242	ct = contract_ptr(id, GLOBAL_ZONEUNIQID);
4243	if (ct == NULL)
4244		return (NULL);
4245
4246	pnp = prgetnode(dp, PR_CT);
4247
4248	if (prlock(dpnp, ZNO) != 0) {
4249		prfreenode(pnp);
4250		contract_rele(ct);
4251		return (NULL);
4252	}
4253	p = dpnp->pr_common->prc_proc;
4254	/*
4255	 * We only allow lookups of contracts owned by this process, or,
4256	 * if we are zsched and this is a zone's procfs, contracts on
4257	 * stuff in the zone which are held by processes or contracts
4258	 * outside the zone.  (see logic in contract_status_common)
4259	 */
4260	if ((ct->ct_owner != p) &&
4261	    !(p == VTOZONE(dp)->zone_zsched && ct->ct_state < CTS_ORPHAN &&
4262	    VTOZONE(dp)->zone_uniqid == contract_getzuniqid(ct) &&
4263	    VTOZONE(dp)->zone_uniqid != GLOBAL_ZONEUNIQID &&
4264	    ct->ct_czuniqid == GLOBAL_ZONEUNIQID)) {
4265		prunlock(dpnp);
4266		prfreenode(pnp);
4267		contract_rele(ct);
4268		return (NULL);
4269	}
4270	pnp->pr_common = dpnp->pr_common;
4271	pnp->pr_pcommon = dpnp->pr_pcommon;
4272	pnp->pr_contract = ct;
4273	pnp->pr_parent = dp;
4274	pnp->pr_ino = pmkino(id, pnp->pr_common->prc_slot, PR_CT);
4275	VN_HOLD(dp);
4276	prunlock(dpnp);
4277	vp = PTOV(pnp);
4278
4279	return (vp);
4280}
4281
4282/*
4283 * Construct an lwp vnode for the old /proc interface.
4284 * We stand on our head to make the /proc plumbing correct.
4285 */
4286vnode_t *
4287prlwpnode(prnode_t *pnp, uint_t tid)
4288{
4289	char comp[12];
4290	vnode_t *dp;
4291	vnode_t *vp;
4292	prcommon_t *pcp;
4293	proc_t *p;
4294
4295	/*
4296	 * Lookup the /proc/<pid>/lwp/<lwpid> directory vnode.
4297	 */
4298	if (pnp->pr_type == PR_PIDFILE) {
4299		dp = pnp->pr_parent;		/* /proc/<pid> */
4300		VN_HOLD(dp);
4301		vp = pr_lookup_piddir(dp, "lwp");
4302		VN_RELE(dp);
4303		if ((dp = vp) == NULL)		/* /proc/<pid>/lwp */
4304			return (NULL);
4305	} else if (pnp->pr_type == PR_LWPIDFILE) {
4306		dp = pnp->pr_parent;		/* /proc/<pid>/lwp/<lwpid> */
4307		dp = VTOP(dp)->pr_parent;	/* /proc/<pid>/lwp */
4308		VN_HOLD(dp);
4309	} else {
4310		return (NULL);
4311	}
4312
4313	(void) pr_u32tos(tid, comp, sizeof (comp));
4314	vp = pr_lookup_lwpdir(dp, comp);
4315	VN_RELE(dp);
4316	if ((dp = vp) == NULL)
4317		return (NULL);
4318
4319	pnp = prgetnode(dp, PR_LWPIDFILE);
4320	vp = PTOV(pnp);
4321
4322	/*
4323	 * prgetnode() initialized most of the prnode.
4324	 * Finish the job.
4325	 */
4326	pcp = VTOP(dp)->pr_common;
4327	pnp->pr_ino = ptoi(pcp->prc_pid);
4328	pnp->pr_common = pcp;
4329	pnp->pr_pcommon = VTOP(dp)->pr_pcommon;
4330	pnp->pr_parent = dp;
4331	/*
4332	 * Link new vnode into list of all /proc vnodes for the process.
4333	 */
4334	p = pr_p_lock(pnp);
4335	mutex_exit(&pr_pidlock);
4336	if (p == NULL) {
4337		VN_RELE(dp);
4338		prfreenode(pnp);
4339		vp = NULL;
4340	} else if (pcp->prc_thread == NULL) {
4341		prunlock(pnp);
4342		VN_RELE(dp);
4343		prfreenode(pnp);
4344		vp = NULL;
4345	} else {
4346		pnp->pr_next = p->p_plist;
4347		p->p_plist = vp;
4348		prunlock(pnp);
4349	}
4350
4351	return (vp);
4352}
4353
4354#if defined(DEBUG)
4355
4356static	uint32_t nprnode;
4357static	uint32_t nprcommon;
4358
4359#define	INCREMENT(x)	atomic_add_32(&x, 1);
4360#define	DECREMENT(x)	atomic_add_32(&x, -1);
4361
4362#else
4363
4364#define	INCREMENT(x)
4365#define	DECREMENT(x)
4366
4367#endif	/* DEBUG */
4368
4369/*
4370 * New /proc vnode required; allocate it and fill in most of the fields.
4371 */
4372prnode_t *
4373prgetnode(vnode_t *dp, prnodetype_t type)
4374{
4375	prnode_t *pnp;
4376	prcommon_t *pcp;
4377	vnode_t *vp;
4378	ulong_t nfiles;
4379
4380	INCREMENT(nprnode);
4381	pnp = kmem_zalloc(sizeof (prnode_t), KM_SLEEP);
4382
4383	mutex_init(&pnp->pr_mutex, NULL, MUTEX_DEFAULT, NULL);
4384	pnp->pr_type = type;
4385
4386	pnp->pr_vnode = vn_alloc(KM_SLEEP);
4387
4388	vp = PTOV(pnp);
4389	vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
4390	vn_setops(vp, prvnodeops);
4391	vp->v_vfsp = dp->v_vfsp;
4392	vp->v_type = VPROC;
4393	vp->v_data = (caddr_t)pnp;
4394
4395	switch (type) {
4396	case PR_PIDDIR:
4397	case PR_LWPIDDIR:
4398		/*
4399		 * We need a prcommon and a files array for each of these.
4400		 */
4401		INCREMENT(nprcommon);
4402
4403		pcp = kmem_zalloc(sizeof (prcommon_t), KM_SLEEP);
4404		pcp->prc_refcnt = 1;
4405		pnp->pr_common = pcp;
4406		mutex_init(&pcp->prc_mutex, NULL, MUTEX_DEFAULT, NULL);
4407		cv_init(&pcp->prc_wait, NULL, CV_DEFAULT, NULL);
4408
4409		nfiles = (type == PR_PIDDIR)? NPIDDIRFILES : NLWPIDDIRFILES;
4410		pnp->pr_files =
4411		    kmem_zalloc(nfiles * sizeof (vnode_t *), KM_SLEEP);
4412
4413		vp->v_type = VDIR;
4414		/*
4415		 * Mode should be read-search by all, but we cannot so long
4416		 * as we must support compatibility mode with old /proc.
4417		 * Make /proc/<pid> be read by owner only, search by all.
4418		 * Make /proc/<pid>/lwp/<lwpid> read-search by all.  Also,
4419		 * set VDIROPEN on /proc/<pid> so it can be opened for writing.
4420		 */
4421		if (type == PR_PIDDIR) {
4422			/* kludge for old /proc interface */
4423			prnode_t *xpnp = prgetnode(dp, PR_PIDFILE);
4424			pnp->pr_pidfile = PTOV(xpnp);
4425			pnp->pr_mode = 0511;
4426			vp->v_flag |= VDIROPEN;
4427		} else {
4428			pnp->pr_mode = 0555;
4429		}
4430
4431		break;
4432
4433	case PR_CURDIR:
4434	case PR_ROOTDIR:
4435	case PR_FDDIR:
4436	case PR_OBJECTDIR:
4437	case PR_PATHDIR:
4438	case PR_CTDIR:
4439	case PR_TMPLDIR:
4440		vp->v_type = VDIR;
4441		pnp->pr_mode = 0500;	/* read-search by owner only */
4442		break;
4443
4444	case PR_CT:
4445		vp->v_type = VLNK;
4446		pnp->pr_mode = 0500;	/* read-search by owner only */
4447		break;
4448
4449	case PR_PATH:
4450	case PR_SELF:
4451		vp->v_type = VLNK;
4452		pnp->pr_mode = 0777;
4453		break;
4454
4455	case PR_LWPDIR:
4456		vp->v_type = VDIR;
4457		pnp->pr_mode = 0555;	/* read-search by all */
4458		break;
4459
4460	case PR_AS:
4461	case PR_TMPL:
4462		pnp->pr_mode = 0600;	/* read-write by owner only */
4463		break;
4464
4465	case PR_CTL:
4466	case PR_LWPCTL:
4467		pnp->pr_mode = 0200;	/* write-only by owner only */
4468		break;
4469
4470	case PR_PIDFILE:
4471	case PR_LWPIDFILE:
4472		pnp->pr_mode = 0600;	/* read-write by owner only */
4473		break;
4474
4475	case PR_PSINFO:
4476	case PR_LPSINFO:
4477	case PR_LWPSINFO:
4478	case PR_USAGE:
4479	case PR_LUSAGE:
4480	case PR_LWPUSAGE:
4481		pnp->pr_mode = 0444;	/* read-only by all */
4482		break;
4483
4484	default:
4485		pnp->pr_mode = 0400;	/* read-only by owner only */
4486		break;
4487	}
4488	vn_exists(vp);
4489	return (pnp);
4490}
4491
4492/*
4493 * Free the storage obtained from prgetnode().
4494 */
4495void
4496prfreenode(prnode_t *pnp)
4497{
4498	vnode_t *vp;
4499	ulong_t nfiles;
4500
4501	vn_invalid(PTOV(pnp));
4502	vn_free(PTOV(pnp));
4503	mutex_destroy(&pnp->pr_mutex);
4504
4505	switch (pnp->pr_type) {
4506	case PR_PIDDIR:
4507		/* kludge for old /proc interface */
4508		if (pnp->pr_pidfile != NULL) {
4509			prfreenode(VTOP(pnp->pr_pidfile));
4510			pnp->pr_pidfile = NULL;
4511		}
4512		/* FALLTHROUGH */
4513	case PR_LWPIDDIR:
4514		/*
4515		 * We allocated a prcommon and a files array for each of these.
4516		 */
4517		prfreecommon(pnp->pr_common);
4518		nfiles = (pnp->pr_type == PR_PIDDIR)?
4519		    NPIDDIRFILES : NLWPIDDIRFILES;
4520		kmem_free(pnp->pr_files, nfiles * sizeof (vnode_t *));
4521		break;
4522	default:
4523		break;
4524	}
4525	/*
4526	 * If there is an underlying vnode, be sure
4527	 * to release it after freeing the prnode.
4528	 */
4529	vp = pnp->pr_realvp;
4530	kmem_free(pnp, sizeof (*pnp));
4531	DECREMENT(nprnode);
4532	if (vp != NULL) {
4533		VN_RELE(vp);
4534	}
4535}
4536
4537/*
4538 * Free a prcommon structure, if the reference count reaches zero.
4539 */
4540static void
4541prfreecommon(prcommon_t *pcp)
4542{
4543	mutex_enter(&pcp->prc_mutex);
4544	ASSERT(pcp->prc_refcnt > 0);
4545	if (--pcp->prc_refcnt != 0)
4546		mutex_exit(&pcp->prc_mutex);
4547	else {
4548		mutex_exit(&pcp->prc_mutex);
4549		ASSERT(pcp->prc_pollhead.ph_list == NULL);
4550		ASSERT(pcp->prc_refcnt == 0);
4551		ASSERT(pcp->prc_selfopens == 0 && pcp->prc_writers == 0);
4552		mutex_destroy(&pcp->prc_mutex);
4553		cv_destroy(&pcp->prc_wait);
4554		kmem_free(pcp, sizeof (prcommon_t));
4555		DECREMENT(nprcommon);
4556	}
4557}
4558
4559/*
4560 * Array of readdir functions, indexed by /proc file type.
4561 */
4562static int pr_readdir_notdir(), pr_readdir_procdir(), pr_readdir_piddir(),
4563	pr_readdir_objectdir(), pr_readdir_lwpdir(), pr_readdir_lwpiddir(),
4564	pr_readdir_fddir(), pr_readdir_pathdir(), pr_readdir_tmpldir(),
4565	pr_readdir_ctdir();
4566
4567static int (*pr_readdir_function[PR_NFILES])() = {
4568	pr_readdir_procdir,	/* /proc				*/
4569	pr_readdir_notdir,	/* /proc/self				*/
4570	pr_readdir_piddir,	/* /proc/<pid>				*/
4571	pr_readdir_notdir,	/* /proc/<pid>/as			*/
4572	pr_readdir_notdir,	/* /proc/<pid>/ctl			*/
4573	pr_readdir_notdir,	/* /proc/<pid>/status			*/
4574	pr_readdir_notdir,	/* /proc/<pid>/lstatus			*/
4575	pr_readdir_notdir,	/* /proc/<pid>/psinfo			*/
4576	pr_readdir_notdir,	/* /proc/<pid>/lpsinfo			*/
4577	pr_readdir_notdir,	/* /proc/<pid>/map			*/
4578	pr_readdir_notdir,	/* /proc/<pid>/rmap			*/
4579	pr_readdir_notdir,	/* /proc/<pid>/xmap			*/
4580	pr_readdir_notdir,	/* /proc/<pid>/cred			*/
4581	pr_readdir_notdir,	/* /proc/<pid>/sigact			*/
4582	pr_readdir_notdir,	/* /proc/<pid>/auxv			*/
4583#if defined(__x86)
4584	pr_readdir_notdir,	/* /proc/<pid>/ldt			*/
4585#endif
4586	pr_readdir_notdir,	/* /proc/<pid>/usage			*/
4587	pr_readdir_notdir,	/* /proc/<pid>/lusage			*/
4588	pr_readdir_notdir,	/* /proc/<pid>/pagedata			*/
4589	pr_readdir_notdir,	/* /proc/<pid>/watch			*/
4590	pr_readdir_notdir,	/* /proc/<pid>/cwd			*/
4591	pr_readdir_notdir,	/* /proc/<pid>/root			*/
4592	pr_readdir_fddir,	/* /proc/<pid>/fd			*/
4593	pr_readdir_notdir,	/* /proc/<pid>/fd/nn			*/
4594	pr_readdir_objectdir,	/* /proc/<pid>/object			*/
4595	pr_readdir_notdir,	/* /proc/<pid>/object/xxx		*/
4596	pr_readdir_lwpdir,	/* /proc/<pid>/lwp			*/
4597	pr_readdir_lwpiddir,	/* /proc/<pid>/lwp/<lwpid>		*/
4598	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpctl	*/
4599	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpstatus	*/
4600	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpsinfo	*/
4601	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/lwpusage	*/
4602	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/xregs	*/
4603	pr_readdir_tmpldir,	/* /proc/<pid>/lwp/<lwpid>/templates	*/
4604	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/templates/<id> */
4605#if defined(__sparc)
4606	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/gwindows	*/
4607	pr_readdir_notdir,	/* /proc/<pid>/lwp/<lwpid>/asrs		*/
4608#endif
4609	pr_readdir_notdir,	/* /proc/<pid>/priv			*/
4610	pr_readdir_pathdir,	/* /proc/<pid>/path			*/
4611	pr_readdir_notdir,	/* /proc/<pid>/path/xxx			*/
4612	pr_readdir_ctdir,	/* /proc/<pid>/contracts		*/
4613	pr_readdir_notdir,	/* /proc/<pid>/contracts/<ctid>		*/
4614	pr_readdir_notdir,	/* old process file			*/
4615	pr_readdir_notdir,	/* old lwp file				*/
4616	pr_readdir_notdir,	/* old pagedata file			*/
4617};
4618
4619/* ARGSUSED */
4620static int
4621prreaddir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
4622	caller_context_t *ct, int flags)
4623{
4624	prnode_t *pnp = VTOP(vp);
4625
4626	ASSERT(pnp->pr_type < PR_NFILES);
4627
4628	/* XXX - Do we need to pass ct and flags? */
4629	return (pr_readdir_function[pnp->pr_type](pnp, uiop, eofp));
4630}
4631
4632/* ARGSUSED */
4633static int
4634pr_readdir_notdir(prnode_t *pnp, uio_t *uiop, int *eofp)
4635{
4636	return (ENOTDIR);
4637}
4638
4639/* ARGSUSED */
4640static int
4641pr_readdir_procdir(prnode_t *pnp, uio_t *uiop, int *eofp)
4642{
4643	zoneid_t zoneid;
4644	gfs_readdir_state_t gstate;
4645	int error, eof = 0;
4646	offset_t n;
4647
4648	ASSERT(pnp->pr_type == PR_PROCDIR);
4649
4650	zoneid = VTOZONE(PTOV(pnp))->zone_id;
4651
4652	if ((error = gfs_readdir_init(&gstate, PNSIZ, PRSDSIZE, uiop,
4653	    PRROOTINO, PRROOTINO, 0)) != 0)
4654		return (error);
4655
4656	/*
4657	 * Loop until user's request is satisfied or until all processes
4658	 * have been examined.
4659	 */
4660	while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
4661		uint_t pid;
4662		int pslot;
4663		proc_t *p;
4664
4665		/*
4666		 * Find next entry.  Skip processes not visible where
4667		 * this /proc was mounted.
4668		 */
4669		mutex_enter(&pidlock);
4670		while (n < v.v_proc &&
4671		    ((p = pid_entry(n)) == NULL || p->p_stat == SIDL ||
4672		    (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) ||
4673		    secpolicy_basic_procinfo(CRED(), p, curproc) != 0))
4674			n++;
4675
4676		/*
4677		 * Stop when entire proc table has been examined.
4678		 */
4679		if (n >= v.v_proc) {
4680			mutex_exit(&pidlock);
4681			eof = 1;
4682			break;
4683		}
4684
4685		ASSERT(p->p_stat != 0);
4686		pid = p->p_pid;
4687		pslot = p->p_slot;
4688		mutex_exit(&pidlock);
4689		error = gfs_readdir_emitn(&gstate, uiop, n,
4690		    pmkino(0, pslot, PR_PIDDIR), pid);
4691		if (error)
4692			break;
4693	}
4694
4695	return (gfs_readdir_fini(&gstate, error, eofp, eof));
4696}
4697
4698/* ARGSUSED */
4699static int
4700pr_readdir_piddir(prnode_t *pnp, uio_t *uiop, int *eofp)
4701{
4702	int zombie = ((pnp->pr_pcommon->prc_flags & PRC_DESTROY) != 0);
4703	prdirent_t dirent;
4704	prdirent_t *dirp;
4705	offset_t off;
4706	int error;
4707
4708	ASSERT(pnp->pr_type == PR_PIDDIR);
4709
4710	if (uiop->uio_offset < 0 ||
4711	    uiop->uio_offset % sizeof (prdirent_t) != 0 ||
4712	    uiop->uio_resid < sizeof (prdirent_t))
4713		return (EINVAL);
4714	if (pnp->pr_pcommon->prc_proc == NULL)
4715		return (ENOENT);
4716	if (uiop->uio_offset >= sizeof (piddir))
4717		goto out;
4718
4719	/*
4720	 * Loop until user's request is satisfied, omitting some
4721	 * files along the way if the process is a zombie.
4722	 */
4723	for (dirp = &piddir[uiop->uio_offset / sizeof (prdirent_t)];
4724	    uiop->uio_resid >= sizeof (prdirent_t) &&
4725	    dirp < &piddir[NPIDDIRFILES+2];
4726	    uiop->uio_offset = off + sizeof (prdirent_t), dirp++) {
4727		off = uiop->uio_offset;
4728		if (zombie) {
4729			switch (dirp->d_ino) {
4730			case PR_PIDDIR:
4731			case PR_PROCDIR:
4732			case PR_PSINFO:
4733			case PR_USAGE:
4734				break;
4735			default:
4736				continue;
4737			}
4738		}
4739		bcopy(dirp, &dirent, sizeof (prdirent_t));
4740		if (dirent.d_ino == PR_PROCDIR)
4741			dirent.d_ino = PRROOTINO;
4742		else
4743			dirent.d_ino = pmkino(0, pnp->pr_pcommon->prc_slot,
4744			    dirent.d_ino);
4745		if ((error = uiomove((caddr_t)&dirent, sizeof (prdirent_t),
4746		    UIO_READ, uiop)) != 0)
4747			return (error);
4748	}
4749out:
4750	if (eofp)
4751		*eofp = (uiop->uio_offset >= sizeof (piddir));
4752	return (0);
4753}
4754
4755static void
4756rebuild_objdir(struct as *as)
4757{
4758	struct seg *seg;
4759	vnode_t *vp;
4760	vattr_t vattr;
4761	vnode_t **dir;
4762	ulong_t nalloc;
4763	ulong_t nentries;
4764	int i, j;
4765	ulong_t nold, nnew;
4766
4767	ASSERT(AS_WRITE_HELD(as, &as->a_lock));
4768
4769	if (as->a_updatedir == 0 && as->a_objectdir != NULL)
4770		return;
4771	as->a_updatedir = 0;
4772
4773	if ((nalloc = avl_numnodes(&as->a_segtree)) == 0 ||
4774	    (seg = AS_SEGFIRST(as)) == NULL)	/* can't happen? */
4775		return;
4776
4777	/*
4778	 * Allocate space for the new object directory.
4779	 * (This is usually about two times too many entries.)
4780	 */
4781	nalloc = (nalloc + 0xf) & ~0xf;		/* multiple of 16 */
4782	dir = kmem_zalloc(nalloc * sizeof (vnode_t *), KM_SLEEP);
4783
4784	/* fill in the new directory with desired entries */
4785	nentries = 0;
4786	do {
4787		vattr.va_mask = AT_FSID|AT_NODEID;
4788		if (seg->s_ops == &segvn_ops &&
4789		    SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
4790		    vp != NULL && vp->v_type == VREG &&
4791		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
4792			for (i = 0; i < nentries; i++)
4793				if (vp == dir[i])
4794					break;
4795			if (i == nentries) {
4796				ASSERT(nentries < nalloc);
4797				dir[nentries++] = vp;
4798			}
4799		}
4800	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4801
4802	if (as->a_objectdir == NULL) {	/* first time */
4803		as->a_objectdir = dir;
4804		as->a_sizedir = nalloc;
4805		return;
4806	}
4807
4808	/*
4809	 * Null out all of the defunct entries in the old directory.
4810	 */
4811	nold = 0;
4812	nnew = nentries;
4813	for (i = 0; i < as->a_sizedir; i++) {
4814		if ((vp = as->a_objectdir[i]) != NULL) {
4815			for (j = 0; j < nentries; j++) {
4816				if (vp == dir[j]) {
4817					dir[j] = NULL;
4818					nnew--;
4819					break;
4820				}
4821			}
4822			if (j == nentries)
4823				as->a_objectdir[i] = NULL;
4824			else
4825				nold++;
4826		}
4827	}
4828
4829	if (nold + nnew > as->a_sizedir) {
4830		/*
4831		 * Reallocate the old directory to have enough
4832		 * space for the old and new entries combined.
4833		 * Round up to the next multiple of 16.
4834		 */
4835		ulong_t newsize = (nold + nnew + 0xf) & ~0xf;
4836		vnode_t **newdir = kmem_zalloc(newsize * sizeof (vnode_t *),
4837		    KM_SLEEP);
4838		bcopy(as->a_objectdir, newdir,
4839		    as->a_sizedir * sizeof (vnode_t *));
4840		kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *));
4841		as->a_objectdir = newdir;
4842		as->a_sizedir = newsize;
4843	}
4844
4845	/*
4846	 * Move all new entries to the old directory and
4847	 * deallocate the space used by the new directory.
4848	 */
4849	if (nnew) {
4850		for (i = 0, j = 0; i < nentries; i++) {
4851			if ((vp = dir[i]) == NULL)
4852				continue;
4853			for (; j < as->a_sizedir; j++) {
4854				if (as->a_objectdir[j] != NULL)
4855					continue;
4856				as->a_objectdir[j++] = vp;
4857				break;
4858			}
4859		}
4860	}
4861	kmem_free(dir, nalloc * sizeof (vnode_t *));
4862}
4863
4864/*
4865 * Return the vnode from a slot in the process's object directory.
4866 * The caller must have locked the process's address space.
4867 * The only caller is below, in pr_readdir_objectdir().
4868 */
4869static vnode_t *
4870obj_entry(struct as *as, int slot)
4871{
4872	ASSERT(AS_LOCK_HELD(as, &as->a_lock));
4873	if (as->a_objectdir == NULL)
4874		return (NULL);
4875	ASSERT(slot < as->a_sizedir);
4876	return (as->a_objectdir[slot]);
4877}
4878
4879/* ARGSUSED */
4880static int
4881pr_readdir_objectdir(prnode_t *pnp, uio_t *uiop, int *eofp)
4882{
4883	gfs_readdir_state_t gstate;
4884	int error, eof = 0;
4885	offset_t n;
4886	int pslot;
4887	size_t objdirsize;
4888	proc_t *p;
4889	struct as *as;
4890	vnode_t *vp;
4891
4892	ASSERT(pnp->pr_type == PR_OBJECTDIR);
4893
4894	if ((error = prlock(pnp, ZNO)) != 0)
4895		return (error);
4896	p = pnp->pr_common->prc_proc;
4897	pslot = p->p_slot;
4898
4899	/*
4900	 * We drop p_lock before grabbing the address space lock
4901	 * in order to avoid a deadlock with the clock thread.
4902	 * The process will not disappear and its address space
4903	 * will not change because it is marked P_PR_LOCK.
4904	 */
4905	mutex_exit(&p->p_lock);
4906
4907	if ((error = gfs_readdir_init(&gstate, 64, PRSDSIZE, uiop,
4908	    pmkino(0, pslot, PR_PIDDIR),
4909	    pmkino(0, pslot, PR_OBJECTDIR), 0)) != 0) {
4910		mutex_enter(&p->p_lock);
4911		prunlock(pnp);
4912		return (error);
4913	}
4914
4915	if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
4916		as = NULL;
4917		objdirsize = 0;
4918	}
4919
4920	/*
4921	 * Loop until user's request is satisfied or until
4922	 * all mapped objects have been examined. Cannot hold
4923	 * the address space lock for the following call as
4924	 * gfs_readdir_pred() utimately causes a call to uiomove().
4925	 */
4926	while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
4927		vattr_t vattr;
4928		char str[64];
4929
4930		/*
4931		 * Set the correct size of the directory just
4932		 * in case the process has changed it's address
4933		 * space via mmap/munmap calls.
4934		 */
4935		if (as != NULL) {
4936			AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
4937			if (as->a_updatedir)
4938				rebuild_objdir(as);
4939			objdirsize = as->a_sizedir;
4940		}
4941
4942		/*
4943		 * Find next object.
4944		 */
4945		vattr.va_mask = AT_FSID | AT_NODEID;
4946		while (n < objdirsize && (((vp = obj_entry(as, n)) == NULL) ||
4947		    (VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)
4948		    != 0))) {
4949			vattr.va_mask = AT_FSID | AT_NODEID;
4950			n++;
4951		}
4952
4953		if (as != NULL)
4954			AS_LOCK_EXIT(as, &as->a_lock);
4955
4956		/*
4957		 * Stop when all objects have been reported.
4958		 */
4959		if (n >= objdirsize) {
4960			eof = 1;
4961			break;
4962		}
4963
4964		if (vp == p->p_exec)
4965			(void) strcpy(str, "a.out");
4966		else
4967			pr_object_name(str, vp, &vattr);
4968
4969		error = gfs_readdir_emit(&gstate, uiop, n, vattr.va_nodeid,
4970		    str, 0);
4971
4972		if (error)
4973			break;
4974	}
4975
4976	mutex_enter(&p->p_lock);
4977	prunlock(pnp);
4978
4979	return (gfs_readdir_fini(&gstate, error, eofp, eof));
4980}
4981
4982/* ARGSUSED */
4983static int
4984pr_readdir_lwpdir(prnode_t *pnp, uio_t *uiop, int *eofp)
4985{
4986	gfs_readdir_state_t gstate;
4987	int error, eof = 0;
4988	offset_t tslot;
4989	proc_t *p;
4990	int pslot;
4991	lwpdir_t *lwpdir;
4992	int lwpdirsize;
4993
4994	ASSERT(pnp->pr_type == PR_LWPDIR);
4995
4996	p = pr_p_lock(pnp);
4997	mutex_exit(&pr_pidlock);
4998	if (p == NULL)
4999		return (ENOENT);
5000	ASSERT(p == pnp->pr_common->prc_proc);
5001	pslot = p->p_slot;
5002	lwpdir = p->p_lwpdir;
5003	lwpdirsize = p->p_lwpdir_sz;
5004
5005	/*
5006	 * Drop p->p_lock so we can safely do uiomove().
5007	 * The lwp directory will not change because
5008	 * we have the process locked with P_PR_LOCK.
5009	 */
5010	mutex_exit(&p->p_lock);
5011
5012
5013	if ((error = gfs_readdir_init(&gstate, PLNSIZ, PRSDSIZE, uiop,
5014	    pmkino(0, pslot, PR_PIDDIR),
5015	    pmkino(0, pslot, PR_LWPDIR), 0)) != 0) {
5016		mutex_enter(&p->p_lock);
5017		prunlock(pnp);
5018		return (error);
5019	}
5020
5021	/*
5022	 * Loop until user's request is satisfied or until all lwps
5023	 * have been examined.
5024	 */
5025	while ((error = gfs_readdir_pred(&gstate, uiop, &tslot)) == 0) {
5026		lwpent_t *lep;
5027		uint_t tid;
5028
5029		/*
5030		 * Find next LWP.
5031		 */
5032		while (tslot < lwpdirsize &&
5033		    ((lep = lwpdir[tslot].ld_entry) == NULL))
5034			tslot++;
5035		/*
5036		 * Stop when all lwps have been reported.
5037		 */
5038		if (tslot >= lwpdirsize) {
5039			eof = 1;
5040			break;
5041		}
5042
5043		tid = lep->le_lwpid;
5044		error = gfs_readdir_emitn(&gstate, uiop, tslot,
5045		    pmkino(tslot, pslot, PR_LWPIDDIR), tid);
5046		if (error)
5047			break;
5048	}
5049
5050	mutex_enter(&p->p_lock);
5051	prunlock(pnp);
5052
5053	return (gfs_readdir_fini(&gstate, error, eofp, eof));
5054}
5055
5056/* ARGSUSED */
5057static int
5058pr_readdir_lwpiddir(prnode_t *pnp, uio_t *uiop, int *eofp)
5059{
5060	prcommon_t *pcp = pnp->pr_common;
5061	int zombie = ((pcp->prc_flags & PRC_DESTROY) != 0);
5062	prdirent_t dirent;
5063	prdirent_t *dirp;
5064	offset_t off;
5065	int error;
5066	int pslot;
5067	int tslot;
5068
5069	ASSERT(pnp->pr_type == PR_LWPIDDIR);
5070
5071	if (uiop->uio_offset < 0 ||
5072	    uiop->uio_offset % sizeof (prdirent_t) != 0 ||
5073	    uiop->uio_resid < sizeof (prdirent_t))
5074		return (EINVAL);
5075	if (pcp->prc_proc == NULL || pcp->prc_tslot == -1)
5076		return (ENOENT);
5077	if (uiop->uio_offset >= sizeof (lwpiddir))
5078		goto out;
5079
5080	/*
5081	 * Loop until user's request is satisfied, omitting some files
5082	 * along the way if the lwp is a zombie and also depending
5083	 * on the data model of the process.
5084	 */
5085	pslot = pcp->prc_slot;
5086	tslot = pcp->prc_tslot;
5087	for (dirp = &lwpiddir[uiop->uio_offset / sizeof (prdirent_t)];
5088	    uiop->uio_resid >= sizeof (prdirent_t) &&
5089	    dirp < &lwpiddir[NLWPIDDIRFILES+2];
5090	    uiop->uio_offset = off + sizeof (prdirent_t), dirp++) {
5091		off = uiop->uio_offset;
5092		if (zombie) {
5093			switch (dirp->d_ino) {
5094			case PR_LWPIDDIR:
5095			case PR_LWPDIR:
5096			case PR_LWPSINFO:
5097				break;
5098			default:
5099				continue;
5100			}
5101		}
5102#if defined(__sparc)
5103		/* the asrs file exists only for sparc v9 _LP64 processes */
5104		if (dirp->d_ino == PR_ASRS &&
5105		    pcp->prc_datamodel != DATAMODEL_LP64)
5106			continue;
5107#endif
5108		bcopy(dirp, &dirent, sizeof (prdirent_t));
5109		if (dirent.d_ino == PR_LWPDIR)
5110			dirent.d_ino = pmkino(0, pslot, dirp->d_ino);
5111		else
5112			dirent.d_ino = pmkino(tslot, pslot, dirp->d_ino);
5113		if ((error = uiomove((caddr_t)&dirent, sizeof (prdirent_t),
5114		    UIO_READ, uiop)) != 0)
5115			return (error);
5116	}
5117out:
5118	if (eofp)
5119		*eofp = (uiop->uio_offset >= sizeof (lwpiddir));
5120	return (0);
5121}
5122
5123/* ARGSUSED */
5124static int
5125pr_readdir_fddir(prnode_t *pnp, uio_t *uiop, int *eofp)
5126{
5127	gfs_readdir_state_t gstate;
5128	int error, eof = 0;
5129	offset_t n;
5130	proc_t *p;
5131	int pslot;
5132	int fddirsize;
5133	uf_info_t *fip;
5134
5135	ASSERT(pnp->pr_type == PR_FDDIR);
5136
5137	if ((error = prlock(pnp, ZNO)) != 0)
5138		return (error);
5139	p = pnp->pr_common->prc_proc;
5140	pslot = p->p_slot;
5141	fip = P_FINFO(p);
5142	mutex_exit(&p->p_lock);
5143
5144	if ((error = gfs_readdir_init(&gstate, PLNSIZ, PRSDSIZE, uiop,
5145	    pmkino(0, pslot, PR_PIDDIR), pmkino(0, pslot, PR_FDDIR), 0)) != 0) {
5146		mutex_enter(&p->p_lock);
5147		prunlock(pnp);
5148		return (error);
5149	}
5150
5151	mutex_enter(&fip->fi_lock);
5152	if ((p->p_flag & SSYS) || p->p_as == &kas)
5153		fddirsize = 0;
5154	else
5155		fddirsize = fip->fi_nfiles;
5156
5157	/*
5158	 * Loop until user's request is satisfied or until
5159	 * all file descriptors have been examined.
5160	 */
5161	while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
5162		/*
5163		 * Find next fd.
5164		 */
5165		while (n < fddirsize && fip->fi_list[n].uf_file == NULL)
5166			n++;
5167		/*
5168		 * Stop when all fds have been reported.
5169		 */
5170		if (n >= fddirsize) {
5171			eof = 1;
5172			break;
5173		}
5174
5175		error = gfs_readdir_emitn(&gstate, uiop, n,
5176		    pmkino(n, pslot, PR_FD), n);
5177		if (error)
5178			break;
5179	}
5180
5181	mutex_exit(&fip->fi_lock);
5182	mutex_enter(&p->p_lock);
5183	prunlock(pnp);
5184
5185	return (gfs_readdir_fini(&gstate, error, eofp, eof));
5186}
5187
5188/* ARGSUSED */
5189static int
5190pr_readdir_pathdir(prnode_t *pnp, uio_t *uiop, int *eofp)
5191{
5192	longlong_t bp[DIRENT64_RECLEN(64) / sizeof (longlong_t)];
5193	dirent64_t *dirent = (dirent64_t *)bp;
5194	int reclen;
5195	ssize_t oresid;
5196	offset_t off, idx;
5197	int error = 0;
5198	proc_t *p;
5199	int fd, obj;
5200	int pslot;
5201	int fddirsize;
5202	uf_info_t *fip;
5203	struct as *as = NULL;
5204	size_t objdirsize;
5205	vattr_t vattr;
5206	vnode_t *vp;
5207
5208	ASSERT(pnp->pr_type == PR_PATHDIR);
5209
5210	if (uiop->uio_offset < 0 ||
5211	    uiop->uio_resid <= 0 ||
5212	    (uiop->uio_offset % PRSDSIZE) != 0)
5213		return (EINVAL);
5214	oresid = uiop->uio_resid;
5215	bzero(bp, sizeof (bp));
5216
5217	if ((error = prlock(pnp, ZNO)) != 0)
5218		return (error);
5219	p = pnp->pr_common->prc_proc;
5220	fip = P_FINFO(p);
5221	pslot = p->p_slot;
5222	mutex_exit(&p->p_lock);
5223
5224	if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
5225		as = NULL;
5226		objdirsize = 0;
5227	} else {
5228		AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
5229		if (as->a_updatedir)
5230			rebuild_objdir(as);
5231		objdirsize = as->a_sizedir;
5232		AS_LOCK_EXIT(as, &as->a_lock);
5233		as = NULL;
5234	}
5235
5236	mutex_enter(&fip->fi_lock);
5237	if ((p->p_flag & SSYS) || p->p_as == &kas)
5238		fddirsize = 0;
5239	else
5240		fddirsize = fip->fi_nfiles;
5241
5242	for (; uiop->uio_resid > 0; uiop->uio_offset = off + PRSDSIZE) {
5243		/*
5244		 * There are 4 special files in the path directory: ".", "..",
5245		 * "root", and "cwd".  We handle those specially here.
5246		 */
5247		off = uiop->uio_offset;
5248		idx = off / PRSDSIZE;
5249		if (off == 0) {				/* "." */
5250			dirent->d_ino = pmkino(0, pslot, PR_PATHDIR);
5251			dirent->d_name[0] = '.';
5252			dirent->d_name[1] = '\0';
5253			reclen = DIRENT64_RECLEN(1);
5254		} else if (idx == 1) {			/* ".." */
5255			dirent->d_ino = pmkino(0, pslot, PR_PIDDIR);
5256			dirent->d_name[0] = '.';
5257			dirent->d_name[1] = '.';
5258			dirent->d_name[2] = '\0';
5259			reclen = DIRENT64_RECLEN(2);
5260		} else if (idx == 2) {			/* "root" */
5261			dirent->d_ino = pmkino(idx, pslot, PR_PATH);
5262			(void) strcpy(dirent->d_name, "root");
5263			reclen = DIRENT64_RECLEN(4);
5264		} else if (idx == 3) {			/* "cwd" */
5265			dirent->d_ino = pmkino(idx, pslot, PR_PATH);
5266			(void) strcpy(dirent->d_name, "cwd");
5267			reclen = DIRENT64_RECLEN(3);
5268		} else if (idx < 4 + fddirsize) {
5269			/*
5270			 * In this case, we have one of the file descriptors.
5271			 */
5272			fd = idx - 4;
5273			if (fip->fi_list[fd].uf_file == NULL)
5274				continue;
5275			dirent->d_ino = pmkino(idx, pslot, PR_PATH);
5276			(void) pr_u32tos(fd, dirent->d_name, PLNSIZ+1);
5277			reclen = DIRENT64_RECLEN(PLNSIZ);
5278		} else if (idx < 4 + fddirsize + objdirsize) {
5279			if (fip != NULL) {
5280				mutex_exit(&fip->fi_lock);
5281				fip = NULL;
5282			}
5283
5284			/*
5285			 * We drop p_lock before grabbing the address space lock
5286			 * in order to avoid a deadlock with the clock thread.
5287			 * The process will not disappear and its address space
5288			 * will not change because it is marked P_PR_LOCK.
5289			 */
5290			if (as == NULL) {
5291				as = p->p_as;
5292				AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
5293			}
5294
5295			if (as->a_updatedir) {
5296				rebuild_objdir(as);
5297				objdirsize = as->a_sizedir;
5298			}
5299
5300			obj = idx - 4 - fddirsize;
5301			if ((vp = obj_entry(as, obj)) == NULL)
5302				continue;
5303			vattr.va_mask = AT_FSID|AT_NODEID;
5304			if (VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) != 0)
5305				continue;
5306			if (vp == p->p_exec)
5307				(void) strcpy(dirent->d_name, "a.out");
5308			else
5309				pr_object_name(dirent->d_name, vp, &vattr);
5310			dirent->d_ino = pmkino(idx, pslot, PR_PATH);
5311			reclen = DIRENT64_RECLEN(strlen(dirent->d_name));
5312		} else {
5313			break;
5314		}
5315
5316		dirent->d_off = uiop->uio_offset + PRSDSIZE;
5317		dirent->d_reclen = (ushort_t)reclen;
5318		if (reclen > uiop->uio_resid) {
5319			/*
5320			 * Error if no entries have been returned yet.
5321			 */
5322			if (uiop->uio_resid == oresid)
5323				error = EINVAL;
5324			break;
5325		}
5326		/*
5327		 * Drop the address space lock to do the uiomove().
5328		 */
5329		if (as != NULL)
5330			AS_LOCK_EXIT(as, &as->a_lock);
5331
5332		error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop);
5333		if (as != NULL)
5334			AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
5335
5336		if (error)
5337			break;
5338	}
5339
5340	if (error == 0 && eofp)
5341		*eofp = (uiop->uio_offset >= (fddirsize + 2) * PRSDSIZE);
5342
5343	if (fip != NULL)
5344		mutex_exit(&fip->fi_lock);
5345	if (as != NULL)
5346		AS_LOCK_EXIT(as, &as->a_lock);
5347	mutex_enter(&p->p_lock);
5348	prunlock(pnp);
5349	return (error);
5350}
5351
5352static int
5353pr_readdir_tmpldir(prnode_t *pnp, uio_t *uiop, int *eofp)
5354{
5355	proc_t *p;
5356	int pslot, tslot;
5357	gfs_readdir_state_t gstate;
5358	int error, eof = 0;
5359	offset_t n;
5360
5361	ASSERT(pnp->pr_type == PR_TMPLDIR);
5362
5363	if ((error = prlock(pnp, ZNO)) != 0)
5364		return (error);
5365	p = pnp->pr_common->prc_proc;
5366	pslot = pnp->pr_common->prc_slot;
5367	tslot = pnp->pr_common->prc_tslot;
5368	mutex_exit(&p->p_lock);
5369
5370	if ((error = gfs_readdir_init(&gstate, PRDIRSIZE, PRSDSIZE, uiop,
5371	    pmkino(tslot, pslot, PR_LWPDIR),
5372	    pmkino(tslot, pslot, PR_TMPLDIR), 0)) != 0) {
5373		mutex_enter(&p->p_lock);
5374		prunlock(pnp);
5375		return (error);
5376	}
5377
5378	while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
5379		/*
5380		 * Check for an active template.  Reading a directory's
5381		 * contents is already racy, so we don't bother taking
5382		 * any locks.
5383		 */
5384		while (n < ct_ntypes &&
5385		    pnp->pr_common->prc_thread->t_lwp->lwp_ct_active[n] == NULL)
5386			n++;
5387		/*
5388		 * Stop when all types have been reported.
5389		 */
5390		if (n >= ct_ntypes) {
5391			eof = 1;
5392			break;
5393		}
5394		/*
5395		 * The pmkino invocation below will need to be updated
5396		 * when we create our fifth contract type.
5397		 */
5398		ASSERT(ct_ntypes <= 4);
5399		error = gfs_readdir_emit(&gstate, uiop, n,
5400		    pmkino((tslot << 2) | n, pslot, PR_TMPL),
5401		    ct_types[n]->ct_type_name, 0);
5402		if (error)
5403			break;
5404	}
5405
5406	mutex_enter(&p->p_lock);
5407	prunlock(pnp);
5408
5409	return (gfs_readdir_fini(&gstate, error, eofp, eof));
5410}
5411
5412static int
5413pr_readdir_ctdir(prnode_t *pnp, uio_t *uiop, int *eofp)
5414{
5415	proc_t *p;
5416	int pslot;
5417	gfs_readdir_state_t gstate;
5418	int error, eof = 0;
5419	offset_t n;
5420	uint64_t zid;
5421
5422	ASSERT(pnp->pr_type == PR_CTDIR);
5423
5424	if ((error = prlock(pnp, ZNO)) != 0)
5425		return (error);
5426	p = pnp->pr_common->prc_proc;
5427	pslot = p->p_slot;
5428	mutex_exit(&p->p_lock);
5429
5430	if ((error = gfs_readdir_init(&gstate, PRDIRSIZE, PRSDSIZE, uiop,
5431	    pmkino(0, pslot, PR_PIDDIR), pmkino(0, pslot, PR_CTDIR), 0)) != 0) {
5432		mutex_enter(&p->p_lock);
5433		prunlock(pnp);
5434		return (error);
5435	}
5436
5437	zid = VTOZONE(pnp->pr_vnode)->zone_uniqid;
5438	while ((error = gfs_readdir_pred(&gstate, uiop, &n)) == 0) {
5439		id_t next = contract_plookup(p, n, zid);
5440		if (next == -1) {
5441			eof = 1;
5442			break;
5443		}
5444		error = gfs_readdir_emitn(&gstate, uiop, next,
5445		    pmkino(next, pslot, PR_CT), next);
5446		if (error)
5447			break;
5448	}
5449
5450	mutex_enter(&p->p_lock);
5451	prunlock(pnp);
5452
5453	return (gfs_readdir_fini(&gstate, error, eofp, eof));
5454}
5455
5456/* ARGSUSED */
5457static int
5458prfsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
5459{
5460	return (0);
5461}
5462
5463/*
5464 * Utility: remove a /proc vnode from a linked list, threaded through pr_next.
5465 */
5466static void
5467pr_list_unlink(vnode_t *pvp, vnode_t **listp)
5468{
5469	vnode_t *vp;
5470	prnode_t *pnp;
5471
5472	while ((vp = *listp) != NULL) {
5473		pnp = VTOP(vp);
5474		if (vp == pvp) {
5475			*listp = pnp->pr_next;
5476			pnp->pr_next = NULL;
5477			break;
5478		}
5479		listp = &pnp->pr_next;
5480	}
5481}
5482
5483/* ARGSUSED */
5484static void
5485prinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
5486{
5487	prnode_t *pnp = VTOP(vp);
5488	prnodetype_t type = pnp->pr_type;
5489	proc_t *p;
5490	vnode_t *dp;
5491	vnode_t *ovp = NULL;
5492	prnode_t *opnp = NULL;
5493
5494	switch (type) {
5495	case PR_OBJECT:
5496	case PR_FD:
5497	case PR_SELF:
5498	case PR_PATH:
5499		/* These are not linked into the usual lists */
5500		ASSERT(vp->v_count == 1);
5501		if ((dp = pnp->pr_parent) != NULL)
5502			VN_RELE(dp);
5503		prfreenode(pnp);
5504		return;
5505	default:
5506		break;
5507	}
5508
5509	mutex_enter(&pr_pidlock);
5510	if (pnp->pr_pcommon == NULL)
5511		p = NULL;
5512	else if ((p = pnp->pr_pcommon->prc_proc) != NULL)
5513		mutex_enter(&p->p_lock);
5514	mutex_enter(&vp->v_lock);
5515
5516	if (type == PR_PROCDIR || vp->v_count > 1) {
5517		vp->v_count--;
5518		mutex_exit(&vp->v_lock);
5519		if (p != NULL)
5520			mutex_exit(&p->p_lock);
5521		mutex_exit(&pr_pidlock);
5522		return;
5523	}
5524
5525	if ((dp = pnp->pr_parent) != NULL) {
5526		prnode_t *dpnp;
5527
5528		switch (type) {
5529		case PR_PIDFILE:
5530		case PR_LWPIDFILE:
5531		case PR_OPAGEDATA:
5532			break;
5533		default:
5534			dpnp = VTOP(dp);
5535			mutex_enter(&dpnp->pr_mutex);
5536			if (dpnp->pr_files != NULL &&
5537			    dpnp->pr_files[pnp->pr_index] == vp)
5538				dpnp->pr_files[pnp->pr_index] = NULL;
5539			mutex_exit(&dpnp->pr_mutex);
5540			break;
5541		}
5542		pnp->pr_parent = NULL;
5543	}
5544
5545	ASSERT(vp->v_count == 1);
5546
5547	/*
5548	 * If we allocated an old /proc/pid node, free it too.
5549	 */
5550	if (pnp->pr_pidfile != NULL) {
5551		ASSERT(type == PR_PIDDIR);
5552		ovp = pnp->pr_pidfile;
5553		opnp = VTOP(ovp);
5554		ASSERT(opnp->pr_type == PR_PIDFILE);
5555		pnp->pr_pidfile = NULL;
5556	}
5557
5558	mutex_exit(&pr_pidlock);
5559
5560	if (p != NULL) {
5561		/*
5562		 * Remove the vnodes from the lists of
5563		 * /proc vnodes for the process.
5564		 */
5565		int slot;
5566
5567		switch (type) {
5568		case PR_PIDDIR:
5569			pr_list_unlink(vp, &p->p_trace);
5570			break;
5571		case PR_LWPIDDIR:
5572			if ((slot = pnp->pr_common->prc_tslot) != -1) {
5573				lwpent_t *lep = p->p_lwpdir[slot].ld_entry;
5574				pr_list_unlink(vp, &lep->le_trace);
5575			}
5576			break;
5577		default:
5578			pr_list_unlink(vp, &p->p_plist);
5579			break;
5580		}
5581		if (ovp != NULL)
5582			pr_list_unlink(ovp, &p->p_plist);
5583		mutex_exit(&p->p_lock);
5584	}
5585
5586	mutex_exit(&vp->v_lock);
5587
5588	if (type == PR_CT && pnp->pr_contract != NULL) {
5589		contract_rele(pnp->pr_contract);
5590		pnp->pr_contract = NULL;
5591	}
5592
5593	if (opnp != NULL)
5594		prfreenode(opnp);
5595	prfreenode(pnp);
5596	if (dp != NULL) {
5597		VN_RELE(dp);
5598	}
5599}
5600
5601/* ARGSUSED */
5602static int
5603prseek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
5604{
5605	return (0);
5606}
5607
5608/*
5609 * We use the p_execdir member of proc_t to expand the %d token in core file
5610 * paths (the directory path for the executable that dumped core; see
5611 * coreadm(1M) for details). We'd like gcore(1) to be able to expand %d in
5612 * the same way as core dumping from the kernel, but there's no convenient
5613 * and comprehensible way to export the path name for p_execdir. To solve
5614 * this, we try to find the actual path to the executable that was used. In
5615 * pr_lookup_pathdir(), we mark the a.out path name vnode with the PR_AOUT
5616 * flag, and use that here to indicate that more work is needed beyond the
5617 * call to vnodetopath().
5618 */
5619static int
5620prreadlink_lookup(prnode_t *pnp, char *buf, size_t size, cred_t *cr)
5621{
5622	proc_t *p;
5623	vnode_t *vp, *execvp, *vrootp;
5624	int ret;
5625	size_t len;
5626	dirent64_t *dp;
5627	size_t dlen = DIRENT64_RECLEN(MAXPATHLEN);
5628	char *dbuf;
5629
5630	p = curproc;
5631	mutex_enter(&p->p_lock);
5632	if ((vrootp = PTOU(p)->u_rdir) == NULL)
5633		vrootp = rootdir;
5634	VN_HOLD(vrootp);
5635	mutex_exit(&p->p_lock);
5636
5637	ret = vnodetopath(vrootp, pnp->pr_realvp, buf, size, cr);
5638
5639	/*
5640	 * If PR_AOUT isn't set, then we looked up the path for the vnode;
5641	 * otherwise, we looked up the path for (what we believe to be) the
5642	 * containing directory.
5643	 */
5644	if ((pnp->pr_flags & PR_AOUT) == 0) {
5645		VN_RELE(vrootp);
5646		return (ret);
5647	}
5648
5649	/*
5650	 * Fail if there's a problem locking the process. This will only
5651	 * occur if the process is changing so the information we would
5652	 * report would already be invalid.
5653	 */
5654	if (prlock(pnp, ZNO) != 0) {
5655		VN_RELE(vrootp);
5656		return (EIO);
5657	}
5658
5659	p = pnp->pr_common->prc_proc;
5660	mutex_exit(&p->p_lock);
5661
5662	execvp = p->p_exec;
5663	VN_HOLD(execvp);
5664
5665	/*
5666	 * If our initial lookup of the directory failed, fall back to
5667	 * the path name information for p_exec.
5668	 */
5669	if (ret != 0) {
5670		mutex_enter(&p->p_lock);
5671		prunlock(pnp);
5672		ret = vnodetopath(vrootp, execvp, buf, size, cr);
5673		VN_RELE(execvp);
5674		VN_RELE(vrootp);
5675		return (ret);
5676	}
5677
5678	len = strlen(buf);
5679
5680	/*
5681	 * We use u_comm as a guess for the last component of the full
5682	 * executable path name. If there isn't going to be enough space
5683	 * we fall back to using the p_exec so that we can have _an_
5684	 * answer even if it's not perfect.
5685	 */
5686	if (strlen(PTOU(p)->u_comm) + len + 1 < size) {
5687		buf[len] = '/';
5688		(void) strcpy(buf + len + 1, PTOU(p)->u_comm);
5689		mutex_enter(&p->p_lock);
5690		prunlock(pnp);
5691
5692		/*
5693		 * Do a forward lookup of our u_comm guess.
5694		 */
5695		if (lookupnameat(buf + len + 1, UIO_SYSSPACE, FOLLOW, NULLVPP,
5696		    &vp, pnp->pr_realvp) == 0) {
5697			if (vn_compare(vp, execvp)) {
5698				VN_RELE(vp);
5699				VN_RELE(execvp);
5700				VN_RELE(vrootp);
5701				return (0);
5702			}
5703
5704			VN_RELE(vp);
5705		}
5706	} else {
5707		mutex_enter(&p->p_lock);
5708		prunlock(pnp);
5709	}
5710
5711	dbuf = kmem_alloc(dlen, KM_SLEEP);
5712
5713	/*
5714	 * Try to find a matching vnode by iterating through the directory's
5715	 * entries. If that fails, fall back to the path information for
5716	 * p_exec.
5717	 */
5718	if ((ret = dirfindvp(vrootp, pnp->pr_realvp, execvp, cr, dbuf,
5719	    dlen, &dp)) == 0 && strlen(dp->d_name) + len + 1 < size) {
5720		buf[len] = '/';
5721		(void) strcpy(buf + len + 1, dp->d_name);
5722	} else {
5723		ret = vnodetopath(vrootp, execvp, buf, size, cr);
5724	}
5725
5726	kmem_free(dbuf, dlen);
5727	VN_RELE(execvp);
5728	VN_RELE(vrootp);
5729
5730	return (ret);
5731}
5732
5733/* ARGSUSED */
5734static int
5735prreadlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ctp)
5736{
5737	prnode_t *pnp = VTOP(vp);
5738	char *buf;
5739	int ret = EINVAL;
5740	char idbuf[16];
5741	int length, rlength;
5742	contract_t *ct;
5743
5744	switch (pnp->pr_type) {
5745	case PR_SELF:
5746		(void) snprintf(idbuf, sizeof (idbuf), "%d", curproc->p_pid);
5747		ret = uiomove(idbuf, strlen(idbuf), UIO_READ, uiop);
5748		break;
5749	case PR_OBJECT:
5750	case PR_FD:
5751	case PR_CURDIR:
5752	case PR_ROOTDIR:
5753		if (pnp->pr_realvp->v_type == VDIR)
5754			ret = 0;
5755		break;
5756	case PR_PATH:
5757		buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
5758
5759		if ((ret = prreadlink_lookup(pnp, buf, MAXPATHLEN, cr)) == 0)
5760			ret = uiomove(buf, strlen(buf), UIO_READ, uiop);
5761
5762		kmem_free(buf, MAXPATHLEN);
5763		break;
5764	case PR_CT:
5765		ASSERT(pnp->pr_contract != NULL);
5766		ct = pnp->pr_contract;
5767		length = sizeof (CTFS_ROOT "//") + sizeof (idbuf) +
5768		    strlen(ct->ct_type->ct_type_name);
5769		buf = kmem_alloc(length, KM_SLEEP);
5770		rlength = snprintf(buf, length, CTFS_ROOT "/%s/%d",
5771		    ct->ct_type->ct_type_name, ct->ct_id);
5772		ASSERT(rlength < length);
5773		ret = uiomove(buf, rlength, UIO_READ, uiop);
5774		kmem_free(buf, length);
5775		break;
5776	default:
5777		break;
5778	}
5779
5780	return (ret);
5781}
5782
5783/*ARGSUSED2*/
5784static int
5785prcmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
5786{
5787	prnode_t *pp1, *pp2;
5788
5789	if (vp1 == vp2)
5790		return (1);
5791
5792	if (!vn_matchops(vp1, prvnodeops) || !vn_matchops(vp2, prvnodeops))
5793		return (0);
5794
5795	pp1 = VTOP(vp1);
5796	pp2 = VTOP(vp2);
5797
5798	if (pp1->pr_type != pp2->pr_type)
5799		return (0);
5800	if (pp1->pr_type == PR_PROCDIR)
5801		return (1);
5802	if (pp1->pr_ino || pp2->pr_ino)
5803		return (pp2->pr_ino == pp1->pr_ino);
5804
5805	if (pp1->pr_common == NULL || pp2->pr_common == NULL)
5806		return (0);
5807
5808	return (pp1->pr_common->prc_slot == pp2->pr_common->prc_slot &&
5809	    pp1->pr_common->prc_tslot == pp2->pr_common->prc_tslot);
5810}
5811
5812static int
5813prrealvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
5814{
5815	vnode_t *rvp;
5816
5817	if ((rvp = VTOP(vp)->pr_realvp) != NULL) {
5818		vp = rvp;
5819		if (VOP_REALVP(vp, &rvp, ct) == 0)
5820			vp = rvp;
5821	}
5822
5823	*vpp = vp;
5824	return (0);
5825}
5826
5827/*
5828 * Return the answer requested to poll().
5829 * POLLIN, POLLRDNORM, and POLLOUT are recognized as in fs_poll().
5830 * In addition, these have special meaning for /proc files:
5831 *	POLLPRI		process or lwp stopped on an event of interest
5832 *	POLLERR		/proc file descriptor is invalid
5833 *	POLLHUP		process or lwp has terminated
5834 */
5835/*ARGSUSED5*/
5836static int
5837prpoll(vnode_t *vp, short events, int anyyet, short *reventsp,
5838	pollhead_t **phpp, caller_context_t *ct)
5839{
5840	prnode_t *pnp = VTOP(vp);
5841	prcommon_t *pcp = pnp->pr_common;
5842	pollhead_t *php = &pcp->prc_pollhead;
5843	proc_t *p;
5844	short revents;
5845	int error;
5846	int lockstate;
5847
5848	ASSERT(pnp->pr_type < PR_NFILES);
5849
5850	/*
5851	 * Support for old /proc interface.
5852	 */
5853	if (pnp->pr_pidfile != NULL) {
5854		vp = pnp->pr_pidfile;
5855		pnp = VTOP(vp);
5856		ASSERT(pnp->pr_type == PR_PIDFILE);
5857		ASSERT(pnp->pr_common == pcp);
5858	}
5859
5860	*reventsp = revents = 0;
5861	*phpp = (pollhead_t *)NULL;
5862
5863	if (vp->v_type == VDIR) {
5864		*reventsp |= POLLNVAL;
5865		return (0);
5866	}
5867
5868	lockstate = pollunlock();	/* avoid deadlock with prnotify() */
5869
5870	if ((error = prlock(pnp, ZNO)) != 0) {
5871		pollrelock(lockstate);
5872		switch (error) {
5873		case ENOENT:		/* process or lwp died */
5874			*reventsp = POLLHUP;
5875			error = 0;
5876			break;
5877		case EAGAIN:		/* invalidated */
5878			*reventsp = POLLERR;
5879			error = 0;
5880			break;
5881		}
5882		return (error);
5883	}
5884
5885	/*
5886	 * We have the process marked locked (P_PR_LOCK) and we are holding
5887	 * its p->p_lock.  We want to unmark the process but retain
5888	 * exclusive control w.r.t. other /proc controlling processes
5889	 * before reacquiring the polling locks.
5890	 *
5891	 * prunmark() does this for us.  It unmarks the process
5892	 * but retains p->p_lock so we still have exclusive control.
5893	 * We will drop p->p_lock at the end to relinquish control.
5894	 *
5895	 * We cannot call prunlock() at the end to relinquish control
5896	 * because prunlock(), like prunmark(), may drop and reacquire
5897	 * p->p_lock and that would lead to a lock order violation
5898	 * w.r.t. the polling locks we are about to reacquire.
5899	 */
5900	p = pcp->prc_proc;
5901	ASSERT(p != NULL);
5902	prunmark(p);
5903
5904	pollrelock(lockstate);		/* reacquire dropped poll locks */
5905
5906	if ((p->p_flag & SSYS) || p->p_as == &kas)
5907		revents = POLLNVAL;
5908	else {
5909		short ev;
5910
5911		if ((ev = (events & (POLLIN|POLLRDNORM))) != 0)
5912			revents |= ev;
5913		/*
5914		 * POLLWRNORM (same as POLLOUT) really should not be
5915		 * used to indicate that the process or lwp stopped.
5916		 * However, USL chose to use POLLWRNORM rather than
5917		 * POLLPRI to indicate this, so we just accept either
5918		 * requested event to indicate stopped.  (grr...)
5919		 */
5920		if ((ev = (events & (POLLPRI|POLLOUT|POLLWRNORM))) != 0) {
5921			kthread_t *t;
5922
5923			if (pcp->prc_flags & PRC_LWP) {
5924				t = pcp->prc_thread;
5925				ASSERT(t != NULL);
5926				thread_lock(t);
5927			} else {
5928				t = prchoose(p);	/* returns locked t */
5929				ASSERT(t != NULL);
5930			}
5931
5932			if (ISTOPPED(t) || VSTOPPED(t))
5933				revents |= ev;
5934			thread_unlock(t);
5935		}
5936	}
5937
5938	*reventsp = revents;
5939	if (!anyyet && revents == 0) {
5940		/*
5941		 * Arrange to wake up the polling lwp when
5942		 * the target process/lwp stops or terminates
5943		 * or when the file descriptor becomes invalid.
5944		 */
5945		pcp->prc_flags |= PRC_POLL;
5946		*phpp = php;
5947	}
5948	mutex_exit(&p->p_lock);
5949	return (0);
5950}
5951
5952/* in prioctl.c */
5953extern int prioctl(vnode_t *, int, intptr_t, int, cred_t *, int *,
5954	caller_context_t *);
5955
5956/*
5957 * /proc vnode operations vector
5958 */
5959const fs_operation_def_t pr_vnodeops_template[] = {
5960	VOPNAME_OPEN,		{ .vop_open = propen },
5961	VOPNAME_CLOSE,		{ .vop_close = prclose },
5962	VOPNAME_READ,		{ .vop_read = prread },
5963	VOPNAME_WRITE,		{ .vop_write = prwrite },
5964	VOPNAME_IOCTL,		{ .vop_ioctl = prioctl },
5965	VOPNAME_GETATTR,	{ .vop_getattr = prgetattr },
5966	VOPNAME_ACCESS,		{ .vop_access = praccess },
5967	VOPNAME_LOOKUP,		{ .vop_lookup = prlookup },
5968	VOPNAME_CREATE,		{ .vop_create = prcreate },
5969	VOPNAME_READDIR,	{ .vop_readdir = prreaddir },
5970	VOPNAME_READLINK,	{ .vop_readlink = prreadlink },
5971	VOPNAME_FSYNC,		{ .vop_fsync = prfsync },
5972	VOPNAME_INACTIVE,	{ .vop_inactive = prinactive },
5973	VOPNAME_SEEK,		{ .vop_seek = prseek },
5974	VOPNAME_CMP,		{ .vop_cmp = prcmp },
5975	VOPNAME_FRLOCK,		{ .error = fs_error },
5976	VOPNAME_REALVP,		{ .vop_realvp = prrealvp },
5977	VOPNAME_POLL,		{ .vop_poll = prpoll },
5978	VOPNAME_DISPOSE,	{ .error = fs_error },
5979	VOPNAME_SHRLOCK,	{ .error = fs_error },
5980	NULL,			NULL
5981};
5982