1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/types.h>
28#include <sys/uio.h>
29#include <sys/param.h>
30#include <sys/cmn_err.h>
31#include <sys/cred.h>
32#include <sys/policy.h>
33#include <sys/debug.h>
34#include <sys/errno.h>
35#include <sys/file.h>
36#include <sys/inline.h>
37#include <sys/kmem.h>
38#include <sys/proc.h>
39#include <sys/brand.h>
40#include <sys/regset.h>
41#include <sys/sysmacros.h>
42#include <sys/systm.h>
43#include <sys/vfs.h>
44#include <sys/vnode.h>
45#include <sys/signal.h>
46#include <sys/auxv.h>
47#include <sys/user.h>
48#include <sys/class.h>
49#include <sys/fault.h>
50#include <sys/syscall.h>
51#include <sys/procfs.h>
52#include <sys/zone.h>
53#include <sys/copyops.h>
54#include <sys/schedctl.h>
55#include <vm/as.h>
56#include <vm/seg.h>
57#include <fs/proc/prdata.h>
58#include <sys/contract/process_impl.h>
59
60static	void	pr_settrace(proc_t *, sigset_t *);
61static	int	pr_setfpregs(prnode_t *, prfpregset_t *);
62#if defined(__sparc)
63static	int	pr_setxregs(prnode_t *, prxregset_t *);
64static	int	pr_setasrs(prnode_t *, asrset_t);
65#endif
66static	int	pr_setvaddr(prnode_t *, caddr_t);
67static	int	pr_clearsig(prnode_t *);
68static	int	pr_clearflt(prnode_t *);
69static	int	pr_watch(prnode_t *, prwatch_t *, int *);
70static	int	pr_agent(prnode_t *, prgregset_t, int *);
71static	int	pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
72static	int	pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
73static	int	pr_spriv(proc_t *, prpriv_t *, cred_t *);
74static	int	pr_szoneid(proc_t *, zoneid_t, cred_t *);
75static	void	pauselwps(proc_t *);
76static	void	unpauselwps(proc_t *);
77
78typedef union {
79	long		sig;		/* PCKILL, PCUNKILL */
80	long		nice;		/* PCNICE */
81	long		timeo;		/* PCTWSTOP */
82	ulong_t		flags;		/* PCRUN, PCSET, PCUNSET */
83	caddr_t		vaddr;		/* PCSVADDR */
84	siginfo_t	siginfo;	/* PCSSIG */
85	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
86	fltset_t	fltset;		/* PCSFAULT */
87	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
88	prgregset_t	prgregset;	/* PCSREG, PCAGENT */
89	prfpregset_t	prfpregset;	/* PCSFPREG */
90#if defined(__sparc)
91	prxregset_t	prxregset;	/* PCSXREG */
92	asrset_t	asrset;		/* PCSASRS */
93#endif
94	prwatch_t	prwatch;	/* PCWATCH */
95	priovec_t	priovec;	/* PCREAD, PCWRITE */
96	prcred_t	prcred;		/* PCSCRED */
97	prpriv_t	prpriv;		/* PCSPRIV */
98	long		przoneid;	/* PCSZONE */
99} arg_t;
100
101static	int	pr_control(long, arg_t *, prnode_t *, cred_t *);
102
103static size_t
104ctlsize(long cmd, size_t resid, arg_t *argp)
105{
106	size_t size = sizeof (long);
107	size_t rnd;
108	int ngrp;
109
110	switch (cmd) {
111	case PCNULL:
112	case PCSTOP:
113	case PCDSTOP:
114	case PCWSTOP:
115	case PCCSIG:
116	case PCCFAULT:
117		break;
118	case PCSSIG:
119		size += sizeof (siginfo_t);
120		break;
121	case PCTWSTOP:
122		size += sizeof (long);
123		break;
124	case PCKILL:
125	case PCUNKILL:
126	case PCNICE:
127		size += sizeof (long);
128		break;
129	case PCRUN:
130	case PCSET:
131	case PCUNSET:
132		size += sizeof (ulong_t);
133		break;
134	case PCSVADDR:
135		size += sizeof (caddr_t);
136		break;
137	case PCSTRACE:
138	case PCSHOLD:
139		size += sizeof (sigset_t);
140		break;
141	case PCSFAULT:
142		size += sizeof (fltset_t);
143		break;
144	case PCSENTRY:
145	case PCSEXIT:
146		size += sizeof (sysset_t);
147		break;
148	case PCSREG:
149	case PCAGENT:
150		size += sizeof (prgregset_t);
151		break;
152	case PCSFPREG:
153		size += sizeof (prfpregset_t);
154		break;
155#if defined(__sparc)
156	case PCSXREG:
157		size += sizeof (prxregset_t);
158		break;
159	case PCSASRS:
160		size += sizeof (asrset_t);
161		break;
162#endif
163	case PCWATCH:
164		size += sizeof (prwatch_t);
165		break;
166	case PCREAD:
167	case PCWRITE:
168		size += sizeof (priovec_t);
169		break;
170	case PCSCRED:
171		size += sizeof (prcred_t);
172		break;
173	case PCSCREDX:
174		/*
175		 * We cannot derefence the pr_ngroups fields if it
176		 * we don't have enough data.
177		 */
178		if (resid < size + sizeof (prcred_t) - sizeof (gid_t))
179			return (0);
180		ngrp = argp->prcred.pr_ngroups;
181		if (ngrp < 0 || ngrp > ngroups_max)
182			return (0);
183
184		/* The result can be smaller than sizeof (prcred_t) */
185		size += sizeof (prcred_t) - sizeof (gid_t);
186		size += ngrp * sizeof (gid_t);
187		break;
188	case PCSPRIV:
189		if (resid >= size + sizeof (prpriv_t))
190			size += priv_prgetprivsize(&argp->prpriv);
191		else
192			return (0);
193		break;
194	case PCSZONE:
195		size += sizeof (long);
196		break;
197	default:
198		return (0);
199	}
200
201	/* Round up to a multiple of long, unless exact amount written */
202	if (size < resid) {
203		rnd = size & (sizeof (long) - 1);
204
205		if (rnd != 0)
206			size += sizeof (long) - rnd;
207	}
208
209	if (size > resid)
210		return (0);
211	return (size);
212}
213
214/*
215 * Control operations (lots).
216 */
217int
218prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
219{
220#define	MY_BUFFER_SIZE \
221		100 > 1 + sizeof (arg_t) / sizeof (long) ? \
222		100 : 1 + sizeof (arg_t) / sizeof (long)
223	long buf[MY_BUFFER_SIZE];
224	long *bufp;
225	size_t resid = 0;
226	size_t size;
227	prnode_t *pnp = VTOP(vp);
228	int error;
229	int locked = 0;
230
231	while (uiop->uio_resid) {
232		/*
233		 * Read several commands in one gulp.
234		 */
235		bufp = buf;
236		if (resid) {	/* move incomplete command to front of buffer */
237			long *tail;
238
239			if (resid >= sizeof (buf))
240				break;
241			tail = (long *)((char *)buf + sizeof (buf) - resid);
242			do {
243				*bufp++ = *tail++;
244			} while ((resid -= sizeof (long)) != 0);
245		}
246		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
247		if (resid > uiop->uio_resid)
248			resid = uiop->uio_resid;
249		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
250			return (error);
251		resid += (char *)bufp - (char *)buf;
252		bufp = buf;
253
254		do {		/* loop over commands in buffer */
255			long cmd = bufp[0];
256			arg_t *argp = (arg_t *)&bufp[1];
257
258			size = ctlsize(cmd, resid, argp);
259			if (size == 0)	/* incomplete or invalid command */
260				break;
261			/*
262			 * Perform the specified control operation.
263			 */
264			if (!locked) {
265				if ((error = prlock(pnp, ZNO)) != 0)
266					return (error);
267				locked = 1;
268			}
269			if (error = pr_control(cmd, argp, pnp, cr)) {
270				if (error == -1)	/* -1 is timeout */
271					locked = 0;
272				else
273					return (error);
274			}
275			bufp = (long *)((char *)bufp + size);
276		} while ((resid -= size) != 0);
277
278		if (locked) {
279			prunlock(pnp);
280			locked = 0;
281		}
282	}
283	return (resid? EINVAL : 0);
284}
285
286static int
287pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr)
288{
289	prcommon_t *pcp;
290	proc_t *p;
291	int unlocked;
292	int error = 0;
293
294	if (cmd == PCNULL)
295		return (0);
296
297	pcp = pnp->pr_common;
298	p = pcp->prc_proc;
299	ASSERT(p != NULL);
300
301	/* System processes defy control. */
302	if (p->p_flag & SSYS) {
303		prunlock(pnp);
304		return (EBUSY);
305	}
306
307	switch (cmd) {
308
309	default:
310		error = EINVAL;
311		break;
312
313	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
314	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
315	case PCWSTOP:	/* wait for process or lwp to stop */
316	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
317		{
318			time_t timeo;
319
320			/*
321			 * Can't apply to a system process.
322			 */
323			if (p->p_as == &kas) {
324				error = EBUSY;
325				break;
326			}
327
328			if (cmd == PCSTOP || cmd == PCDSTOP)
329				pr_stop(pnp);
330
331			if (cmd == PCDSTOP)
332				break;
333
334			/*
335			 * If an lwp is waiting for itself or its process,
336			 * don't wait. The stopped lwp would never see the
337			 * fact that it is stopped.
338			 */
339			if ((pcp->prc_flags & PRC_LWP)?
340			    (pcp->prc_thread == curthread) : (p == curproc)) {
341				if (cmd == PCWSTOP || cmd == PCTWSTOP)
342					error = EBUSY;
343				break;
344			}
345
346			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
347			if ((error = pr_wait_stop(pnp, timeo)) != 0)
348				return (error);
349
350			break;
351		}
352
353	case PCRUN:	/* make lwp or process runnable */
354		error = pr_setrun(pnp, argp->flags);
355		break;
356
357	case PCSTRACE:	/* set signal trace mask */
358		pr_settrace(p,  &argp->sigset);
359		break;
360
361	case PCSSIG:	/* set current signal */
362		error = pr_setsig(pnp, &argp->siginfo);
363		if (argp->siginfo.si_signo == SIGKILL && error == 0) {
364			prunlock(pnp);
365			pr_wait_die(pnp);
366			return (-1);
367		}
368		break;
369
370	case PCKILL:	/* send signal */
371		error = pr_kill(pnp, (int)argp->sig, cr);
372		if (error == 0 && argp->sig == SIGKILL) {
373			prunlock(pnp);
374			pr_wait_die(pnp);
375			return (-1);
376		}
377		break;
378
379	case PCUNKILL:	/* delete a pending signal */
380		error = pr_unkill(pnp, (int)argp->sig);
381		break;
382
383	case PCNICE:	/* set nice priority */
384		error = pr_nice(p, (int)argp->nice, cr);
385		break;
386
387	case PCSENTRY:	/* set syscall entry bit mask */
388	case PCSEXIT:	/* set syscall exit bit mask */
389		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
390		break;
391
392	case PCSET:	/* set process flags */
393		error = pr_set(p, argp->flags);
394		break;
395
396	case PCUNSET:	/* unset process flags */
397		error = pr_unset(p, argp->flags);
398		break;
399
400	case PCSREG:	/* set general registers */
401		{
402			kthread_t *t = pr_thread(pnp);
403
404			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
405				thread_unlock(t);
406				error = EBUSY;
407			} else {
408				thread_unlock(t);
409				mutex_exit(&p->p_lock);
410				prsetprregs(ttolwp(t), argp->prgregset, 0);
411				mutex_enter(&p->p_lock);
412			}
413			break;
414		}
415
416	case PCSFPREG:	/* set floating-point registers */
417		error = pr_setfpregs(pnp, &argp->prfpregset);
418		break;
419
420	case PCSXREG:	/* set extra registers */
421#if defined(__sparc)
422		error = pr_setxregs(pnp, &argp->prxregset);
423#else
424		error = EINVAL;
425#endif
426		break;
427
428#if defined(__sparc)
429	case PCSASRS:	/* set ancillary state registers */
430		error = pr_setasrs(pnp, argp->asrset);
431		break;
432#endif
433
434	case PCSVADDR:	/* set virtual address at which to resume */
435		error = pr_setvaddr(pnp, argp->vaddr);
436		break;
437
438	case PCSHOLD:	/* set signal-hold mask */
439		pr_sethold(pnp, &argp->sigset);
440		break;
441
442	case PCSFAULT:	/* set mask of traced faults */
443		pr_setfault(p, &argp->fltset);
444		break;
445
446	case PCCSIG:	/* clear current signal */
447		error = pr_clearsig(pnp);
448		break;
449
450	case PCCFAULT:	/* clear current fault */
451		error = pr_clearflt(pnp);
452		break;
453
454	case PCWATCH:	/* set or clear watched areas */
455		error = pr_watch(pnp, &argp->prwatch, &unlocked);
456		if (error && unlocked)
457			return (error);
458		break;
459
460	case PCAGENT:	/* create the /proc agent lwp in the target process */
461		error = pr_agent(pnp, argp->prgregset, &unlocked);
462		if (error && unlocked)
463			return (error);
464		break;
465
466	case PCREAD:	/* read from the address space */
467		error = pr_rdwr(p, UIO_READ, &argp->priovec);
468		break;
469
470	case PCWRITE:	/* write to the address space */
471		error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
472		break;
473
474	case PCSCRED:	/* set the process credentials */
475	case PCSCREDX:
476		error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
477		break;
478
479	case PCSPRIV:	/* set the process privileges */
480		error = pr_spriv(p, &argp->prpriv, cr);
481		break;
482	case PCSZONE:	/* set the process's zoneid credentials */
483		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
484		break;
485	}
486
487	if (error)
488		prunlock(pnp);
489	return (error);
490}
491
492#ifdef _SYSCALL32_IMPL
493
494typedef union {
495	int32_t		sig;		/* PCKILL, PCUNKILL */
496	int32_t		nice;		/* PCNICE */
497	int32_t		timeo;		/* PCTWSTOP */
498	uint32_t	flags;		/* PCRUN, PCSET, PCUNSET */
499	caddr32_t	vaddr;		/* PCSVADDR */
500	siginfo32_t	siginfo;	/* PCSSIG */
501	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
502	fltset_t	fltset;		/* PCSFAULT */
503	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
504	prgregset32_t	prgregset;	/* PCSREG, PCAGENT */
505	prfpregset32_t	prfpregset;	/* PCSFPREG */
506#if defined(__sparc)
507	prxregset_t	prxregset;	/* PCSXREG */
508#endif
509	prwatch32_t	prwatch;	/* PCWATCH */
510	priovec32_t	priovec;	/* PCREAD, PCWRITE */
511	prcred32_t	prcred;		/* PCSCRED */
512	prpriv_t	prpriv;		/* PCSPRIV */
513	int32_t		przoneid;	/* PCSZONE */
514} arg32_t;
515
516static	int	pr_control32(int32_t, arg32_t *, prnode_t *, cred_t *);
517static	int	pr_setfpregs32(prnode_t *, prfpregset32_t *);
518
519/*
520 * Note that while ctlsize32() can use argp, it must do so only in a way
521 * that assumes 32-bit rather than 64-bit alignment as argp is a pointer
522 * to an array of 32-bit values and only 32-bit alignment is ensured.
523 */
524static size_t
525ctlsize32(int32_t cmd, size_t resid, arg32_t *argp)
526{
527	size_t size = sizeof (int32_t);
528	size_t rnd;
529	int ngrp;
530
531	switch (cmd) {
532	case PCNULL:
533	case PCSTOP:
534	case PCDSTOP:
535	case PCWSTOP:
536	case PCCSIG:
537	case PCCFAULT:
538		break;
539	case PCSSIG:
540		size += sizeof (siginfo32_t);
541		break;
542	case PCTWSTOP:
543		size += sizeof (int32_t);
544		break;
545	case PCKILL:
546	case PCUNKILL:
547	case PCNICE:
548		size += sizeof (int32_t);
549		break;
550	case PCRUN:
551	case PCSET:
552	case PCUNSET:
553		size += sizeof (uint32_t);
554		break;
555	case PCSVADDR:
556		size += sizeof (caddr32_t);
557		break;
558	case PCSTRACE:
559	case PCSHOLD:
560		size += sizeof (sigset_t);
561		break;
562	case PCSFAULT:
563		size += sizeof (fltset_t);
564		break;
565	case PCSENTRY:
566	case PCSEXIT:
567		size += sizeof (sysset_t);
568		break;
569	case PCSREG:
570	case PCAGENT:
571		size += sizeof (prgregset32_t);
572		break;
573	case PCSFPREG:
574		size += sizeof (prfpregset32_t);
575		break;
576#if defined(__sparc)
577	case PCSXREG:
578		size += sizeof (prxregset_t);
579		break;
580#endif
581	case PCWATCH:
582		size += sizeof (prwatch32_t);
583		break;
584	case PCREAD:
585	case PCWRITE:
586		size += sizeof (priovec32_t);
587		break;
588	case PCSCRED:
589		size += sizeof (prcred32_t);
590		break;
591	case PCSCREDX:
592		/*
593		 * We cannot derefence the pr_ngroups fields if it
594		 * we don't have enough data.
595		 */
596		if (resid < size + sizeof (prcred32_t) - sizeof (gid32_t))
597			return (0);
598		ngrp = argp->prcred.pr_ngroups;
599		if (ngrp < 0 || ngrp > ngroups_max)
600			return (0);
601
602		/* The result can be smaller than sizeof (prcred32_t) */
603		size += sizeof (prcred32_t) - sizeof (gid32_t);
604		size += ngrp * sizeof (gid32_t);
605		break;
606	case PCSPRIV:
607		if (resid >= size + sizeof (prpriv_t))
608			size += priv_prgetprivsize(&argp->prpriv);
609		else
610			return (0);
611		break;
612	case PCSZONE:
613		size += sizeof (int32_t);
614		break;
615	default:
616		return (0);
617	}
618
619	/* Round up to a multiple of int32_t */
620	rnd = size & (sizeof (int32_t) - 1);
621
622	if (rnd != 0)
623		size += sizeof (int32_t) - rnd;
624
625	if (size > resid)
626		return (0);
627	return (size);
628}
629
630/*
631 * Control operations (lots).
632 */
633int
634prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
635{
636#define	MY_BUFFER_SIZE32 \
637		100 > 1 + sizeof (arg32_t) / sizeof (int32_t) ? \
638		100 : 1 + sizeof (arg32_t) / sizeof (int32_t)
639	int32_t buf[MY_BUFFER_SIZE32];
640	int32_t *bufp;
641	arg32_t arg;
642	size_t resid = 0;
643	size_t size;
644	prnode_t *pnp = VTOP(vp);
645	int error;
646	int locked = 0;
647
648	while (uiop->uio_resid) {
649		/*
650		 * Read several commands in one gulp.
651		 */
652		bufp = buf;
653		if (resid) {	/* move incomplete command to front of buffer */
654			int32_t *tail;
655
656			if (resid >= sizeof (buf))
657				break;
658			tail = (int32_t *)((char *)buf + sizeof (buf) - resid);
659			do {
660				*bufp++ = *tail++;
661			} while ((resid -= sizeof (int32_t)) != 0);
662		}
663		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
664		if (resid > uiop->uio_resid)
665			resid = uiop->uio_resid;
666		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
667			return (error);
668		resid += (char *)bufp - (char *)buf;
669		bufp = buf;
670
671		do {		/* loop over commands in buffer */
672			int32_t cmd = bufp[0];
673			arg32_t *argp = (arg32_t *)&bufp[1];
674
675			size = ctlsize32(cmd, resid, argp);
676			if (size == 0)	/* incomplete or invalid command */
677				break;
678			/*
679			 * Perform the specified control operation.
680			 */
681			if (!locked) {
682				if ((error = prlock(pnp, ZNO)) != 0)
683					return (error);
684				locked = 1;
685			}
686
687			/*
688			 * Since some members of the arg32_t union contain
689			 * 64-bit values (which must be 64-bit aligned), we
690			 * can't simply pass a pointer to the structure as
691			 * it may be unaligned. Note that we do pass the
692			 * potentially unaligned structure to ctlsize32()
693			 * above, but that uses it a way that makes no
694			 * assumptions about alignment.
695			 */
696			ASSERT(size - sizeof (cmd) <= sizeof (arg));
697			bcopy(argp, &arg, size - sizeof (cmd));
698
699			if (error = pr_control32(cmd, &arg, pnp, cr)) {
700				if (error == -1)	/* -1 is timeout */
701					locked = 0;
702				else
703					return (error);
704			}
705			bufp = (int32_t *)((char *)bufp + size);
706		} while ((resid -= size) != 0);
707
708		if (locked) {
709			prunlock(pnp);
710			locked = 0;
711		}
712	}
713	return (resid? EINVAL : 0);
714}
715
716static int
717pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr)
718{
719	prcommon_t *pcp;
720	proc_t *p;
721	int unlocked;
722	int error = 0;
723
724	if (cmd == PCNULL)
725		return (0);
726
727	pcp = pnp->pr_common;
728	p = pcp->prc_proc;
729	ASSERT(p != NULL);
730
731	if (p->p_flag & SSYS) {
732		prunlock(pnp);
733		return (EBUSY);
734	}
735
736	switch (cmd) {
737
738	default:
739		error = EINVAL;
740		break;
741
742	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
743	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
744	case PCWSTOP:	/* wait for process or lwp to stop */
745	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
746		{
747			time_t timeo;
748
749			/*
750			 * Can't apply to a system process.
751			 */
752			if (p->p_as == &kas) {
753				error = EBUSY;
754				break;
755			}
756
757			if (cmd == PCSTOP || cmd == PCDSTOP)
758				pr_stop(pnp);
759
760			if (cmd == PCDSTOP)
761				break;
762
763			/*
764			 * If an lwp is waiting for itself or its process,
765			 * don't wait. The lwp will never see the fact that
766			 * itself is stopped.
767			 */
768			if ((pcp->prc_flags & PRC_LWP)?
769			    (pcp->prc_thread == curthread) : (p == curproc)) {
770				if (cmd == PCWSTOP || cmd == PCTWSTOP)
771					error = EBUSY;
772				break;
773			}
774
775			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
776			if ((error = pr_wait_stop(pnp, timeo)) != 0)
777				return (error);
778
779			break;
780		}
781
782	case PCRUN:	/* make lwp or process runnable */
783		error = pr_setrun(pnp, (ulong_t)argp->flags);
784		break;
785
786	case PCSTRACE:	/* set signal trace mask */
787		pr_settrace(p,  &argp->sigset);
788		break;
789
790	case PCSSIG:	/* set current signal */
791		if (PROCESS_NOT_32BIT(p))
792			error = EOVERFLOW;
793		else {
794			int sig = (int)argp->siginfo.si_signo;
795			siginfo_t siginfo;
796
797			bzero(&siginfo, sizeof (siginfo));
798			siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
799			error = pr_setsig(pnp, &siginfo);
800			if (sig == SIGKILL && error == 0) {
801				prunlock(pnp);
802				pr_wait_die(pnp);
803				return (-1);
804			}
805		}
806		break;
807
808	case PCKILL:	/* send signal */
809		error = pr_kill(pnp, (int)argp->sig, cr);
810		if (error == 0 && argp->sig == SIGKILL) {
811			prunlock(pnp);
812			pr_wait_die(pnp);
813			return (-1);
814		}
815		break;
816
817	case PCUNKILL:	/* delete a pending signal */
818		error = pr_unkill(pnp, (int)argp->sig);
819		break;
820
821	case PCNICE:	/* set nice priority */
822		error = pr_nice(p, (int)argp->nice, cr);
823		break;
824
825	case PCSENTRY:	/* set syscall entry bit mask */
826	case PCSEXIT:	/* set syscall exit bit mask */
827		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
828		break;
829
830	case PCSET:	/* set process flags */
831		error = pr_set(p, (long)argp->flags);
832		break;
833
834	case PCUNSET:	/* unset process flags */
835		error = pr_unset(p, (long)argp->flags);
836		break;
837
838	case PCSREG:	/* set general registers */
839		if (PROCESS_NOT_32BIT(p))
840			error = EOVERFLOW;
841		else {
842			kthread_t *t = pr_thread(pnp);
843
844			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
845				thread_unlock(t);
846				error = EBUSY;
847			} else {
848				prgregset_t prgregset;
849				klwp_t *lwp = ttolwp(t);
850
851				thread_unlock(t);
852				mutex_exit(&p->p_lock);
853				prgregset_32ton(lwp, argp->prgregset,
854				    prgregset);
855				prsetprregs(lwp, prgregset, 0);
856				mutex_enter(&p->p_lock);
857			}
858		}
859		break;
860
861	case PCSFPREG:	/* set floating-point registers */
862		if (PROCESS_NOT_32BIT(p))
863			error = EOVERFLOW;
864		else
865			error = pr_setfpregs32(pnp, &argp->prfpregset);
866		break;
867
868	case PCSXREG:	/* set extra registers */
869#if defined(__sparc)
870		if (PROCESS_NOT_32BIT(p))
871			error = EOVERFLOW;
872		else
873			error = pr_setxregs(pnp, &argp->prxregset);
874#else
875		error = EINVAL;
876#endif
877		break;
878
879	case PCSVADDR:	/* set virtual address at which to resume */
880		if (PROCESS_NOT_32BIT(p))
881			error = EOVERFLOW;
882		else
883			error = pr_setvaddr(pnp,
884			    (caddr_t)(uintptr_t)argp->vaddr);
885		break;
886
887	case PCSHOLD:	/* set signal-hold mask */
888		pr_sethold(pnp, &argp->sigset);
889		break;
890
891	case PCSFAULT:	/* set mask of traced faults */
892		pr_setfault(p, &argp->fltset);
893		break;
894
895	case PCCSIG:	/* clear current signal */
896		error = pr_clearsig(pnp);
897		break;
898
899	case PCCFAULT:	/* clear current fault */
900		error = pr_clearflt(pnp);
901		break;
902
903	case PCWATCH:	/* set or clear watched areas */
904		if (PROCESS_NOT_32BIT(p))
905			error = EOVERFLOW;
906		else {
907			prwatch_t prwatch;
908
909			prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
910			prwatch.pr_size = argp->prwatch.pr_size;
911			prwatch.pr_wflags = argp->prwatch.pr_wflags;
912			prwatch.pr_pad = argp->prwatch.pr_pad;
913			error = pr_watch(pnp, &prwatch, &unlocked);
914			if (error && unlocked)
915				return (error);
916		}
917		break;
918
919	case PCAGENT:	/* create the /proc agent lwp in the target process */
920		if (PROCESS_NOT_32BIT(p))
921			error = EOVERFLOW;
922		else {
923			prgregset_t prgregset;
924			kthread_t *t = pr_thread(pnp);
925			klwp_t *lwp = ttolwp(t);
926			thread_unlock(t);
927			mutex_exit(&p->p_lock);
928			prgregset_32ton(lwp, argp->prgregset, prgregset);
929			mutex_enter(&p->p_lock);
930			error = pr_agent(pnp, prgregset, &unlocked);
931			if (error && unlocked)
932				return (error);
933		}
934		break;
935
936	case PCREAD:	/* read from the address space */
937	case PCWRITE:	/* write to the address space */
938		if (PROCESS_NOT_32BIT(p))
939			error = EOVERFLOW;
940		else {
941			enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
942			priovec_t priovec;
943
944			priovec.pio_base =
945			    (void *)(uintptr_t)argp->priovec.pio_base;
946			priovec.pio_len = (size_t)argp->priovec.pio_len;
947			priovec.pio_offset = (off_t)
948			    (uint32_t)argp->priovec.pio_offset;
949			error = pr_rdwr(p, rw, &priovec);
950		}
951		break;
952
953	case PCSCRED:	/* set the process credentials */
954	case PCSCREDX:
955		{
956			/*
957			 * All the fields in these structures are exactly the
958			 * same and so the structures are compatible.  In case
959			 * this ever changes, we catch this with the ASSERT
960			 * below.
961			 */
962			prcred_t *prcred = (prcred_t *)&argp->prcred;
963
964#ifndef __lint
965			ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
966#endif
967
968			error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
969			break;
970		}
971
972	case PCSPRIV:	/* set the process privileges */
973		error = pr_spriv(p, &argp->prpriv, cr);
974		break;
975
976	case PCSZONE:	/* set the process's zoneid */
977		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
978		break;
979	}
980
981	if (error)
982		prunlock(pnp);
983	return (error);
984}
985
986#endif	/* _SYSCALL32_IMPL */
987
988/*
989 * Return the specific or chosen thread/lwp for a control operation.
990 * Returns with the thread locked via thread_lock(t).
991 */
992kthread_t *
993pr_thread(prnode_t *pnp)
994{
995	prcommon_t *pcp = pnp->pr_common;
996	kthread_t *t;
997
998	if (pcp->prc_flags & PRC_LWP) {
999		t = pcp->prc_thread;
1000		ASSERT(t != NULL);
1001		thread_lock(t);
1002	} else {
1003		proc_t *p = pcp->prc_proc;
1004		t = prchoose(p);	/* returns locked thread */
1005		ASSERT(t != NULL);
1006	}
1007
1008	return (t);
1009}
1010
1011/*
1012 * Direct the process or lwp to stop.
1013 */
1014void
1015pr_stop(prnode_t *pnp)
1016{
1017	prcommon_t *pcp = pnp->pr_common;
1018	proc_t *p = pcp->prc_proc;
1019	kthread_t *t;
1020	vnode_t *vp;
1021
1022	/*
1023	 * If already stopped, do nothing; otherwise flag
1024	 * it to be stopped the next time it tries to run.
1025	 * If sleeping at interruptible priority, set it
1026	 * running so it will stop within cv_wait_sig().
1027	 *
1028	 * Take care to cooperate with jobcontrol: if an lwp
1029	 * is stopped due to the default action of a jobcontrol
1030	 * stop signal, flag it to be stopped the next time it
1031	 * starts due to a SIGCONT signal.
1032	 */
1033	if (pcp->prc_flags & PRC_LWP)
1034		t = pcp->prc_thread;
1035	else
1036		t = p->p_tlist;
1037	ASSERT(t != NULL);
1038
1039	do {
1040		int notify;
1041
1042		notify = 0;
1043		thread_lock(t);
1044		if (!ISTOPPED(t)) {
1045			t->t_proc_flag |= TP_PRSTOP;
1046			t->t_sig_check = 1;	/* do ISSIG */
1047		}
1048
1049		/* Move the thread from wait queue to run queue */
1050		if (ISWAITING(t))
1051			setrun_locked(t);
1052
1053		if (ISWAKEABLE(t)) {
1054			if (t->t_wchan0 == NULL)
1055				setrun_locked(t);
1056			else if (!VSTOPPED(t)) {
1057				/*
1058				 * Mark it virtually stopped.
1059				 */
1060				t->t_proc_flag |= TP_PRVSTOP;
1061				notify = 1;
1062			}
1063		}
1064		/*
1065		 * force the thread into the kernel
1066		 * if it is not already there.
1067		 */
1068		prpokethread(t);
1069		thread_unlock(t);
1070		if (notify &&
1071		    (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1072			prnotify(vp);
1073		if (pcp->prc_flags & PRC_LWP)
1074			break;
1075	} while ((t = t->t_forw) != p->p_tlist);
1076
1077	/*
1078	 * We do this just in case the thread we asked
1079	 * to stop is in holdlwps() (called from cfork()).
1080	 */
1081	cv_broadcast(&p->p_holdlwps);
1082}
1083
1084/*
1085 * Sleep until the lwp stops, but cooperate with
1086 * jobcontrol:  Don't wake up if the lwp is stopped
1087 * due to the default action of a jobcontrol stop signal.
1088 * If this is the process file descriptor, sleep
1089 * until all of the process's lwps stop.
1090 */
1091int
1092pr_wait_stop(prnode_t *pnp, time_t timeo)
1093{
1094	prcommon_t *pcp = pnp->pr_common;
1095	proc_t *p = pcp->prc_proc;
1096	timestruc_t rqtime;
1097	timestruc_t *rqtp = NULL;
1098	int timecheck = 0;
1099	kthread_t *t;
1100	int error;
1101
1102	if (timeo > 0) {	/* millisecond timeout */
1103		/*
1104		 * Determine the precise future time of the requested timeout.
1105		 */
1106		timestruc_t now;
1107
1108		timecheck = timechanged;
1109		gethrestime(&now);
1110		rqtp = &rqtime;
1111		rqtp->tv_sec = timeo / MILLISEC;
1112		rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1113		timespecadd(rqtp, &now);
1114	}
1115
1116	if (pcp->prc_flags & PRC_LWP) {	/* lwp file descriptor */
1117		t = pcp->prc_thread;
1118		ASSERT(t != NULL);
1119		thread_lock(t);
1120		while (!ISTOPPED(t) && !VSTOPPED(t)) {
1121			thread_unlock(t);
1122			mutex_enter(&pcp->prc_mutex);
1123			prunlock(pnp);
1124			error = pr_wait(pcp, rqtp, timecheck);
1125			if (error)	/* -1 is timeout */
1126				return (error);
1127			if ((error = prlock(pnp, ZNO)) != 0)
1128				return (error);
1129			ASSERT(p == pcp->prc_proc);
1130			ASSERT(t == pcp->prc_thread);
1131			thread_lock(t);
1132		}
1133		thread_unlock(t);
1134	} else {			/* process file descriptor */
1135		t = prchoose(p);	/* returns locked thread */
1136		ASSERT(t != NULL);
1137		ASSERT(MUTEX_HELD(&p->p_lock));
1138		while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1139		    (p->p_flag & SEXITLWPS)) {
1140			thread_unlock(t);
1141			mutex_enter(&pcp->prc_mutex);
1142			prunlock(pnp);
1143			error = pr_wait(pcp, rqtp, timecheck);
1144			if (error)	/* -1 is timeout */
1145				return (error);
1146			if ((error = prlock(pnp, ZNO)) != 0)
1147				return (error);
1148			ASSERT(p == pcp->prc_proc);
1149			t = prchoose(p);	/* returns locked t */
1150			ASSERT(t != NULL);
1151		}
1152		thread_unlock(t);
1153	}
1154
1155	ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1156	    t != NULL && t->t_state != TS_ZOMB);
1157
1158	return (0);
1159}
1160
1161int
1162pr_setrun(prnode_t *pnp, ulong_t flags)
1163{
1164	prcommon_t *pcp = pnp->pr_common;
1165	proc_t *p = pcp->prc_proc;
1166	kthread_t *t;
1167	klwp_t *lwp;
1168
1169	/*
1170	 * Cannot set an lwp running if it is not stopped.
1171	 * Also, no lwp other than the /proc agent lwp can
1172	 * be set running so long as the /proc agent lwp exists.
1173	 */
1174	t = pr_thread(pnp);	/* returns locked thread */
1175	if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1176	    !(t->t_proc_flag & TP_PRSTOP)) ||
1177	    (p->p_agenttp != NULL &&
1178	    (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1179		thread_unlock(t);
1180		return (EBUSY);
1181	}
1182	thread_unlock(t);
1183	if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1184		return (EINVAL);
1185	lwp = ttolwp(t);
1186	if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1187		/*
1188		 * Discard current siginfo_t, if any.
1189		 */
1190		lwp->lwp_cursig = 0;
1191		lwp->lwp_extsig = 0;
1192		if (lwp->lwp_curinfo) {
1193			siginfofree(lwp->lwp_curinfo);
1194			lwp->lwp_curinfo = NULL;
1195		}
1196	}
1197	if (flags & PRCFAULT)
1198		lwp->lwp_curflt = 0;
1199	/*
1200	 * We can't hold p->p_lock when we touch the lwp's registers.
1201	 * It may be swapped out and we will get a page fault.
1202	 */
1203	if (flags & PRSTEP) {
1204		mutex_exit(&p->p_lock);
1205		prstep(lwp, 0);
1206		mutex_enter(&p->p_lock);
1207	}
1208	if (flags & PRSTOP) {
1209		t->t_proc_flag |= TP_PRSTOP;
1210		t->t_sig_check = 1;	/* do ISSIG */
1211	}
1212	if (flags & PRSABORT)
1213		lwp->lwp_sysabort = 1;
1214	thread_lock(t);
1215	if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1216		/*
1217		 * Here, we are dealing with a single lwp.
1218		 */
1219		if (ISTOPPED(t)) {
1220			t->t_schedflag |= TS_PSTART;
1221			t->t_dtrace_stop = 0;
1222			setrun_locked(t);
1223		} else if (flags & PRSABORT) {
1224			t->t_proc_flag &=
1225			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1226			setrun_locked(t);
1227		} else if (!(flags & PRSTOP)) {
1228			t->t_proc_flag &=
1229			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1230		}
1231		thread_unlock(t);
1232	} else {
1233		/*
1234		 * Here, we are dealing with the whole process.
1235		 */
1236		if (ISTOPPED(t)) {
1237			/*
1238			 * The representative lwp is stopped on an event
1239			 * of interest.  We demote it to PR_REQUESTED and
1240			 * choose another representative lwp.  If the new
1241			 * representative lwp is not stopped on an event of
1242			 * interest (other than PR_REQUESTED), we set the
1243			 * whole process running, else we leave the process
1244			 * stopped showing the next event of interest.
1245			 */
1246			kthread_t *tx = NULL;
1247
1248			if (!(flags & PRSABORT) &&
1249			    t->t_whystop == PR_SYSENTRY &&
1250			    t->t_whatstop == SYS_lwp_exit)
1251				tx = t;		/* remember the exiting lwp */
1252			t->t_whystop = PR_REQUESTED;
1253			t->t_whatstop = 0;
1254			thread_unlock(t);
1255			t = prchoose(p);	/* returns locked t */
1256			ASSERT(ISTOPPED(t) || VSTOPPED(t));
1257			if (VSTOPPED(t) ||
1258			    t->t_whystop == PR_REQUESTED) {
1259				thread_unlock(t);
1260				allsetrun(p);
1261			} else {
1262				thread_unlock(t);
1263				/*
1264				 * As a special case, if the old representative
1265				 * lwp was stopped on entry to _lwp_exit()
1266				 * (and we are not aborting the system call),
1267				 * we set the old representative lwp running.
1268				 * We do this so that the next process stop
1269				 * will find the exiting lwp gone.
1270				 */
1271				if (tx != NULL) {
1272					thread_lock(tx);
1273					tx->t_schedflag |= TS_PSTART;
1274					t->t_dtrace_stop = 0;
1275					setrun_locked(tx);
1276					thread_unlock(tx);
1277				}
1278			}
1279		} else {
1280			/*
1281			 * No event of interest; set all of the lwps running.
1282			 */
1283			if (flags & PRSABORT) {
1284				t->t_proc_flag &=
1285				    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1286				setrun_locked(t);
1287			}
1288			thread_unlock(t);
1289			allsetrun(p);
1290		}
1291	}
1292	return (0);
1293}
1294
1295/*
1296 * Wait until process/lwp stops or until timer expires.
1297 * Return EINTR for an interruption, -1 for timeout, else 0.
1298 */
1299int
1300pr_wait(prcommon_t *pcp,	/* prcommon referring to process/lwp */
1301	timestruc_t *ts,	/* absolute time of timeout, if any */
1302	int timecheck)
1303{
1304	int rval;
1305
1306	ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1307	rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1308	mutex_exit(&pcp->prc_mutex);
1309	switch (rval) {
1310	case 0:
1311		return (EINTR);
1312	case -1:
1313		return (-1);
1314	default:
1315		return (0);
1316	}
1317}
1318
1319/*
1320 * Make all threads in the process runnable.
1321 */
1322void
1323allsetrun(proc_t *p)
1324{
1325	kthread_t *t;
1326
1327	ASSERT(MUTEX_HELD(&p->p_lock));
1328
1329	if ((t = p->p_tlist) != NULL) {
1330		do {
1331			thread_lock(t);
1332			ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1333			t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1334			if (ISTOPPED(t)) {
1335				t->t_schedflag |= TS_PSTART;
1336				t->t_dtrace_stop = 0;
1337				setrun_locked(t);
1338			}
1339			thread_unlock(t);
1340		} while ((t = t->t_forw) != p->p_tlist);
1341	}
1342}
1343
1344/*
1345 * Wait for the process to die.
1346 * We do this after sending SIGKILL because we know it will
1347 * die soon and we want subsequent operations to return ENOENT.
1348 */
1349void
1350pr_wait_die(prnode_t *pnp)
1351{
1352	proc_t *p;
1353
1354	mutex_enter(&pidlock);
1355	while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1356		if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1357			break;
1358	}
1359	mutex_exit(&pidlock);
1360}
1361
1362static void
1363pr_settrace(proc_t *p, sigset_t *sp)
1364{
1365	prdelset(sp, SIGKILL);
1366	prassignset(&p->p_sigmask, sp);
1367	if (!sigisempty(&p->p_sigmask))
1368		p->p_proc_flag |= P_PR_TRACE;
1369	else if (prisempty(&p->p_fltmask)) {
1370		user_t *up = PTOU(p);
1371		if (up->u_systrap == 0)
1372			p->p_proc_flag &= ~P_PR_TRACE;
1373	}
1374}
1375
1376int
1377pr_setsig(prnode_t *pnp, siginfo_t *sip)
1378{
1379	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1380	int sig = sip->si_signo;
1381	prcommon_t *pcp = pnp->pr_common;
1382	proc_t *p = pcp->prc_proc;
1383	kthread_t *t;
1384	klwp_t *lwp;
1385	int error = 0;
1386
1387	t = pr_thread(pnp);	/* returns locked thread */
1388	thread_unlock(t);
1389	lwp = ttolwp(t);
1390	if (sig < 0 || sig >= nsig)
1391		/* Zero allowed here */
1392		error = EINVAL;
1393	else if (lwp->lwp_cursig == SIGKILL)
1394		/* "can't happen", but just in case */
1395		error = EBUSY;
1396	else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1397		lwp->lwp_extsig = 0;
1398		/*
1399		 * Discard current siginfo_t, if any.
1400		 */
1401		if (lwp->lwp_curinfo) {
1402			siginfofree(lwp->lwp_curinfo);
1403			lwp->lwp_curinfo = NULL;
1404		}
1405	} else {
1406		kthread_t *tx;
1407		sigqueue_t *sqp;
1408
1409		/* drop p_lock to do kmem_alloc(KM_SLEEP) */
1410		mutex_exit(&p->p_lock);
1411		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1412		mutex_enter(&p->p_lock);
1413
1414		if (lwp->lwp_curinfo == NULL)
1415			lwp->lwp_curinfo = sqp;
1416		else
1417			kmem_free(sqp, sizeof (sigqueue_t));
1418		/*
1419		 * Copy contents of info to current siginfo_t.
1420		 */
1421		bcopy(sip, &lwp->lwp_curinfo->sq_info,
1422		    sizeof (lwp->lwp_curinfo->sq_info));
1423		/*
1424		 * Prevent contents published by si_zoneid-unaware /proc
1425		 * consumers from being incorrectly filtered.  Because
1426		 * an uninitialized si_zoneid is the same as
1427		 * GLOBAL_ZONEID, this means that you can't pr_setsig a
1428		 * process in a non-global zone with a siginfo which
1429		 * appears to come from the global zone.
1430		 */
1431		if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1432			lwp->lwp_curinfo->sq_info.si_zoneid =
1433			    p->p_zone->zone_id;
1434		/*
1435		 * Side-effects for SIGKILL and jobcontrol signals.
1436		 */
1437		if (sig == SIGKILL) {
1438			p->p_flag |= SKILLED;
1439			p->p_flag &= ~SEXTKILLED;
1440		} else if (sig == SIGCONT) {
1441			p->p_flag |= SSCONT;
1442			sigdelq(p, NULL, SIGSTOP);
1443			sigdelq(p, NULL, SIGTSTP);
1444			sigdelq(p, NULL, SIGTTOU);
1445			sigdelq(p, NULL, SIGTTIN);
1446			sigdiffset(&p->p_sig, &stopdefault);
1447			sigdiffset(&p->p_extsig, &stopdefault);
1448			if ((tx = p->p_tlist) != NULL) {
1449				do {
1450					sigdelq(p, tx, SIGSTOP);
1451					sigdelq(p, tx, SIGTSTP);
1452					sigdelq(p, tx, SIGTTOU);
1453					sigdelq(p, tx, SIGTTIN);
1454					sigdiffset(&tx->t_sig, &stopdefault);
1455					sigdiffset(&tx->t_extsig, &stopdefault);
1456				} while ((tx = tx->t_forw) != p->p_tlist);
1457			}
1458		} else if (sigismember(&stopdefault, sig)) {
1459			if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1460			    (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1461				p->p_flag &= ~SSCONT;
1462			sigdelq(p, NULL, SIGCONT);
1463			sigdelset(&p->p_sig, SIGCONT);
1464			sigdelset(&p->p_extsig, SIGCONT);
1465			if ((tx = p->p_tlist) != NULL) {
1466				do {
1467					sigdelq(p, tx, SIGCONT);
1468					sigdelset(&tx->t_sig, SIGCONT);
1469					sigdelset(&tx->t_extsig, SIGCONT);
1470				} while ((tx = tx->t_forw) != p->p_tlist);
1471			}
1472		}
1473		thread_lock(t);
1474		if (ISWAKEABLE(t) || ISWAITING(t)) {
1475			/* Set signaled sleeping/waiting lwp running */
1476			setrun_locked(t);
1477		} else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1478			/* If SIGKILL, set stopped lwp running */
1479			p->p_stopsig = 0;
1480			t->t_schedflag |= TS_XSTART | TS_PSTART;
1481			t->t_dtrace_stop = 0;
1482			setrun_locked(t);
1483		}
1484		t->t_sig_check = 1;	/* so ISSIG will be done */
1485		thread_unlock(t);
1486		/*
1487		 * More jobcontrol side-effects.
1488		 */
1489		if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1490			p->p_stopsig = 0;
1491			do {
1492				thread_lock(tx);
1493				if (tx->t_state == TS_STOPPED &&
1494				    tx->t_whystop == PR_JOBCONTROL) {
1495					tx->t_schedflag |= TS_XSTART;
1496					setrun_locked(tx);
1497				}
1498				thread_unlock(tx);
1499			} while ((tx = tx->t_forw) != p->p_tlist);
1500		}
1501	}
1502	return (error);
1503}
1504
1505int
1506pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1507{
1508	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1509	prcommon_t *pcp = pnp->pr_common;
1510	proc_t *p = pcp->prc_proc;
1511	k_siginfo_t info;
1512
1513	if (sig <= 0 || sig >= nsig)
1514		return (EINVAL);
1515
1516	bzero(&info, sizeof (info));
1517	info.si_signo = sig;
1518	info.si_code = SI_USER;
1519	info.si_pid = curproc->p_pid;
1520	info.si_ctid = PRCTID(curproc);
1521	info.si_zoneid = getzoneid();
1522	info.si_uid = crgetruid(cr);
1523	sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1524	    pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1525
1526	return (0);
1527}
1528
1529int
1530pr_unkill(prnode_t *pnp, int sig)
1531{
1532	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1533	prcommon_t *pcp = pnp->pr_common;
1534	proc_t *p = pcp->prc_proc;
1535	sigqueue_t *infop = NULL;
1536
1537	if (sig <= 0 || sig >= nsig || sig == SIGKILL)
1538		return (EINVAL);
1539
1540	if (pcp->prc_flags & PRC_LWP)
1541		sigdeq(p, pcp->prc_thread, sig, &infop);
1542	else
1543		sigdeq(p, NULL, sig, &infop);
1544
1545	if (infop)
1546		siginfofree(infop);
1547
1548	return (0);
1549}
1550
1551int
1552pr_nice(proc_t *p, int nice, cred_t *cr)
1553{
1554	kthread_t *t;
1555	int err;
1556	int error = 0;
1557
1558	t = p->p_tlist;
1559	do {
1560		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1561		err = CL_DONICE(t, cr, nice, (int *)NULL);
1562		schedctl_set_cidpri(t);
1563		if (error == 0)
1564			error = err;
1565	} while ((t = t->t_forw) != p->p_tlist);
1566
1567	return (error);
1568}
1569
1570void
1571pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1572{
1573	user_t *up = PTOU(p);
1574
1575	if (entry) {
1576		prassignset(&up->u_entrymask, sysset);
1577	} else {
1578		prassignset(&up->u_exitmask, sysset);
1579	}
1580	if (!prisempty(&up->u_entrymask) ||
1581	    !prisempty(&up->u_exitmask)) {
1582		up->u_systrap = 1;
1583		p->p_proc_flag |= P_PR_TRACE;
1584		set_proc_sys(p);	/* set pre and post-sys flags */
1585	} else {
1586		up->u_systrap = 0;
1587		if (sigisempty(&p->p_sigmask) &&
1588		    prisempty(&p->p_fltmask))
1589			p->p_proc_flag &= ~P_PR_TRACE;
1590	}
1591}
1592
1593#define	ALLFLAGS	\
1594	(PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1595
1596int
1597pr_set(proc_t *p, long flags)
1598{
1599	if ((p->p_flag & SSYS) || p->p_as == &kas)
1600		return (EBUSY);
1601
1602	if (flags & ~ALLFLAGS)
1603		return (EINVAL);
1604
1605	if (flags & PR_FORK)
1606		p->p_proc_flag |= P_PR_FORK;
1607	if (flags & PR_RLC)
1608		p->p_proc_flag |= P_PR_RUNLCL;
1609	if (flags & PR_KLC)
1610		p->p_proc_flag |= P_PR_KILLCL;
1611	if (flags & PR_ASYNC)
1612		p->p_proc_flag |= P_PR_ASYNC;
1613	if (flags & PR_BPTADJ)
1614		p->p_proc_flag |= P_PR_BPTADJ;
1615	if (flags & PR_MSACCT)
1616		if ((p->p_flag & SMSACCT) == 0)
1617			estimate_msacct(p->p_tlist, gethrtime());
1618	if (flags & PR_MSFORK)
1619		p->p_flag |= SMSFORK;
1620	if (flags & PR_PTRACE) {
1621		p->p_proc_flag |= P_PR_PTRACE;
1622		/* ptraced process must die if parent dead */
1623		if (p->p_ppid == 1)
1624			sigtoproc(p, NULL, SIGKILL);
1625	}
1626
1627	return (0);
1628}
1629
1630int
1631pr_unset(proc_t *p, long flags)
1632{
1633	if ((p->p_flag & SSYS) || p->p_as == &kas)
1634		return (EBUSY);
1635
1636	if (flags & ~ALLFLAGS)
1637		return (EINVAL);
1638
1639	if (flags & PR_FORK)
1640		p->p_proc_flag &= ~P_PR_FORK;
1641	if (flags & PR_RLC)
1642		p->p_proc_flag &= ~P_PR_RUNLCL;
1643	if (flags & PR_KLC)
1644		p->p_proc_flag &= ~P_PR_KILLCL;
1645	if (flags & PR_ASYNC)
1646		p->p_proc_flag &= ~P_PR_ASYNC;
1647	if (flags & PR_BPTADJ)
1648		p->p_proc_flag &= ~P_PR_BPTADJ;
1649	if (flags & PR_MSACCT)
1650		disable_msacct(p);
1651	if (flags & PR_MSFORK)
1652		p->p_flag &= ~SMSFORK;
1653	if (flags & PR_PTRACE)
1654		p->p_proc_flag &= ~P_PR_PTRACE;
1655
1656	return (0);
1657}
1658
1659static int
1660pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1661{
1662	proc_t *p = pnp->pr_common->prc_proc;
1663	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1664
1665	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1666		thread_unlock(t);
1667		return (EBUSY);
1668	}
1669	if (!prhasfp()) {
1670		thread_unlock(t);
1671		return (EINVAL);	/* No FP support */
1672	}
1673
1674	/* drop p_lock while touching the lwp's stack */
1675	thread_unlock(t);
1676	mutex_exit(&p->p_lock);
1677	prsetprfpregs(ttolwp(t), prfpregset);
1678	mutex_enter(&p->p_lock);
1679
1680	return (0);
1681}
1682
1683#ifdef	_SYSCALL32_IMPL
1684static int
1685pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1686{
1687	proc_t *p = pnp->pr_common->prc_proc;
1688	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1689
1690	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1691		thread_unlock(t);
1692		return (EBUSY);
1693	}
1694	if (!prhasfp()) {
1695		thread_unlock(t);
1696		return (EINVAL);	/* No FP support */
1697	}
1698
1699	/* drop p_lock while touching the lwp's stack */
1700	thread_unlock(t);
1701	mutex_exit(&p->p_lock);
1702	prsetprfpregs32(ttolwp(t), prfpregset);
1703	mutex_enter(&p->p_lock);
1704
1705	return (0);
1706}
1707#endif	/* _SYSCALL32_IMPL */
1708
1709#if defined(__sparc)
1710/* ARGSUSED */
1711static int
1712pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1713{
1714	proc_t *p = pnp->pr_common->prc_proc;
1715	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1716
1717	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1718		thread_unlock(t);
1719		return (EBUSY);
1720	}
1721	thread_unlock(t);
1722
1723	if (!prhasx(p))
1724		return (EINVAL);	/* No extra register support */
1725
1726	/* drop p_lock while touching the lwp's stack */
1727	mutex_exit(&p->p_lock);
1728	prsetprxregs(ttolwp(t), (caddr_t)prxregset);
1729	mutex_enter(&p->p_lock);
1730
1731	return (0);
1732}
1733
1734static int
1735pr_setasrs(prnode_t *pnp, asrset_t asrset)
1736{
1737	proc_t *p = pnp->pr_common->prc_proc;
1738	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1739
1740	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1741		thread_unlock(t);
1742		return (EBUSY);
1743	}
1744	thread_unlock(t);
1745
1746	/* drop p_lock while touching the lwp's stack */
1747	mutex_exit(&p->p_lock);
1748	prsetasregs(ttolwp(t), asrset);
1749	mutex_enter(&p->p_lock);
1750
1751	return (0);
1752}
1753#endif
1754
1755static int
1756pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1757{
1758	proc_t *p = pnp->pr_common->prc_proc;
1759	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1760
1761	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1762		thread_unlock(t);
1763		return (EBUSY);
1764	}
1765
1766	/* drop p_lock while touching the lwp's stack */
1767	thread_unlock(t);
1768	mutex_exit(&p->p_lock);
1769	prsvaddr(ttolwp(t), vaddr);
1770	mutex_enter(&p->p_lock);
1771
1772	return (0);
1773}
1774
1775void
1776pr_sethold(prnode_t *pnp, sigset_t *sp)
1777{
1778	proc_t *p = pnp->pr_common->prc_proc;
1779	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1780
1781	schedctl_finish_sigblock(t);
1782	sigutok(sp, &t->t_hold);
1783	if (ISWAKEABLE(t) &&
1784	    (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1785		setrun_locked(t);
1786	t->t_sig_check = 1;	/* so thread will see new holdmask */
1787	thread_unlock(t);
1788}
1789
1790void
1791pr_setfault(proc_t *p, fltset_t *fltp)
1792{
1793	prassignset(&p->p_fltmask, fltp);
1794	if (!prisempty(&p->p_fltmask))
1795		p->p_proc_flag |= P_PR_TRACE;
1796	else if (sigisempty(&p->p_sigmask)) {
1797		user_t *up = PTOU(p);
1798		if (up->u_systrap == 0)
1799			p->p_proc_flag &= ~P_PR_TRACE;
1800	}
1801}
1802
1803static int
1804pr_clearsig(prnode_t *pnp)
1805{
1806	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1807	klwp_t *lwp = ttolwp(t);
1808
1809	thread_unlock(t);
1810	if (lwp->lwp_cursig == SIGKILL)
1811		return (EBUSY);
1812
1813	/*
1814	 * Discard current siginfo_t, if any.
1815	 */
1816	lwp->lwp_cursig = 0;
1817	lwp->lwp_extsig = 0;
1818	if (lwp->lwp_curinfo) {
1819		siginfofree(lwp->lwp_curinfo);
1820		lwp->lwp_curinfo = NULL;
1821	}
1822
1823	return (0);
1824}
1825
1826static int
1827pr_clearflt(prnode_t *pnp)
1828{
1829	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1830
1831	thread_unlock(t);
1832	ttolwp(t)->lwp_curflt = 0;
1833
1834	return (0);
1835}
1836
1837static int
1838pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1839{
1840	proc_t *p = pnp->pr_common->prc_proc;
1841	struct as *as = p->p_as;
1842	uintptr_t vaddr = pwp->pr_vaddr;
1843	size_t size = pwp->pr_size;
1844	int wflags = pwp->pr_wflags;
1845	ulong_t newpage = 0;
1846	struct watched_area *pwa;
1847	int error;
1848
1849	*unlocked = 0;
1850
1851	/*
1852	 * Can't apply to a system process.
1853	 */
1854	if ((p->p_flag & SSYS) || p->p_as == &kas)
1855		return (EBUSY);
1856
1857	/*
1858	 * Verify that the address range does not wrap
1859	 * and that only the proper flags were specified.
1860	 */
1861	if ((wflags & ~WA_TRAPAFTER) == 0)
1862		size = 0;
1863	if (vaddr + size < vaddr ||
1864	    (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1865	    ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1866		return (EINVAL);
1867
1868	/*
1869	 * Don't let the address range go above as->a_userlimit.
1870	 * There is no error here, just a limitation.
1871	 */
1872	if (vaddr >= (uintptr_t)as->a_userlimit)
1873		return (0);
1874	if (vaddr + size > (uintptr_t)as->a_userlimit)
1875		size = (uintptr_t)as->a_userlimit - vaddr;
1876
1877	/*
1878	 * Compute maximum number of pages this will add.
1879	 */
1880	if ((wflags & ~WA_TRAPAFTER) != 0) {
1881		ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
1882		newpage = btopr(pagespan);
1883		if (newpage > 2 * prnwatch)
1884			return (E2BIG);
1885	}
1886
1887	/*
1888	 * Force the process to be fully stopped.
1889	 */
1890	if (p == curproc) {
1891		prunlock(pnp);
1892		while (holdwatch() != 0)
1893			continue;
1894		if ((error = prlock(pnp, ZNO)) != 0) {
1895			continuelwps(p);
1896			*unlocked = 1;
1897			return (error);
1898		}
1899	} else {
1900		pauselwps(p);
1901		while (pr_allstopped(p, 0) > 0) {
1902			/*
1903			 * This cv/mutex pair is persistent even
1904			 * if the process disappears after we
1905			 * unmark it and drop p->p_lock.
1906			 */
1907			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
1908			kmutex_t *mp = &p->p_lock;
1909
1910			prunmark(p);
1911			(void) cv_wait(cv, mp);
1912			mutex_exit(mp);
1913			if ((error = prlock(pnp, ZNO)) != 0) {
1914				/*
1915				 * Unpause the process if it exists.
1916				 */
1917				p = pr_p_lock(pnp);
1918				mutex_exit(&pr_pidlock);
1919				if (p != NULL) {
1920					unpauselwps(p);
1921					prunlock(pnp);
1922				}
1923				*unlocked = 1;
1924				return (error);
1925			}
1926		}
1927	}
1928
1929	/*
1930	 * Drop p->p_lock in order to perform the rest of this.
1931	 * The process is still locked with the P_PR_LOCK flag.
1932	 */
1933	mutex_exit(&p->p_lock);
1934
1935	pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
1936	pwa->wa_vaddr = (caddr_t)vaddr;
1937	pwa->wa_eaddr = (caddr_t)vaddr + size;
1938	pwa->wa_flags = (ulong_t)wflags;
1939
1940	error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
1941	    clear_watched_area(p, pwa) : set_watched_area(p, pwa);
1942
1943	if (p == curproc) {
1944		setallwatch();
1945		mutex_enter(&p->p_lock);
1946		continuelwps(p);
1947	} else {
1948		mutex_enter(&p->p_lock);
1949		unpauselwps(p);
1950	}
1951
1952	return (error);
1953}
1954
1955/* jobcontrol stopped, but with a /proc directed stop in effect */
1956#define	JDSTOPPED(t)	\
1957	((t)->t_state == TS_STOPPED && \
1958	(t)->t_whystop == PR_JOBCONTROL && \
1959	((t)->t_proc_flag & TP_PRSTOP))
1960
1961/*
1962 * pr_agent() creates the agent lwp. If the process is exiting while
1963 * we are creating an agent lwp, then exitlwps() waits until the
1964 * agent has been created using prbarrier().
1965 */
1966static int
1967pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
1968{
1969	proc_t *p = pnp->pr_common->prc_proc;
1970	prcommon_t *pcp;
1971	kthread_t *t;
1972	kthread_t *ct;
1973	klwp_t *clwp;
1974	k_sigset_t smask;
1975	int cid;
1976	void *bufp = NULL;
1977	int error;
1978
1979	*unlocked = 0;
1980
1981	/*
1982	 * Cannot create the /proc agent lwp if :-
1983	 * - the process is not fully stopped or directed to stop.
1984	 * - there is an agent lwp already.
1985	 * - the process has been killed.
1986	 * - the process is exiting.
1987	 * - it's a vfork(2) parent.
1988	 */
1989	t = prchoose(p);	/* returns locked thread */
1990	ASSERT(t != NULL);
1991
1992	if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
1993	    p->p_agenttp != NULL ||
1994	    (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
1995		thread_unlock(t);
1996		return (EBUSY);
1997	}
1998
1999	thread_unlock(t);
2000	mutex_exit(&p->p_lock);
2001
2002	sigfillset(&smask);
2003	sigdiffset(&smask, &cantmask);
2004	clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
2005	    t->t_pri, &smask, NOCLASS, 0);
2006	if (clwp == NULL) {
2007		mutex_enter(&p->p_lock);
2008		return (ENOMEM);
2009	}
2010	prsetprregs(clwp, prgregset, 1);
2011retry:
2012	cid = t->t_cid;
2013	(void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2014	mutex_enter(&p->p_lock);
2015	if (cid != t->t_cid) {
2016		/*
2017		 * Someone just changed this thread's scheduling class,
2018		 * so try pre-allocating the buffer again.  Hopefully we
2019		 * don't hit this often.
2020		 */
2021		mutex_exit(&p->p_lock);
2022		CL_FREE(cid, bufp);
2023		goto retry;
2024	}
2025
2026	clwp->lwp_ap = clwp->lwp_arg;
2027	clwp->lwp_eosys = NORMALRETURN;
2028	ct = lwptot(clwp);
2029	ct->t_clfuncs = t->t_clfuncs;
2030	CL_FORK(t, ct, bufp);
2031	ct->t_cid = t->t_cid;
2032	ct->t_proc_flag |= TP_PRSTOP;
2033	/*
2034	 * Setting t_sysnum to zero causes post_syscall()
2035	 * to bypass all syscall checks and go directly to
2036	 *	if (issig()) psig();
2037	 * so that the agent lwp will stop in issig_forreal()
2038	 * showing PR_REQUESTED.
2039	 */
2040	ct->t_sysnum = 0;
2041	ct->t_post_sys = 1;
2042	ct->t_sig_check = 1;
2043	p->p_agenttp = ct;
2044	ct->t_proc_flag &= ~TP_HOLDLWP;
2045
2046	pcp = pnp->pr_pcommon;
2047	mutex_enter(&pcp->prc_mutex);
2048
2049	lwp_create_done(ct);
2050
2051	/*
2052	 * Don't return until the agent is stopped on PR_REQUESTED.
2053	 */
2054
2055	for (;;) {
2056		prunlock(pnp);
2057		*unlocked = 1;
2058
2059		/*
2060		 * Wait for the agent to stop and notify us.
2061		 * If we've been interrupted, return that information.
2062		 */
2063		error = pr_wait(pcp, NULL, 0);
2064		if (error == EINTR) {
2065			error = 0;
2066			break;
2067		}
2068
2069		/*
2070		 * Confirm that the agent LWP has stopped.
2071		 */
2072
2073		if ((error = prlock(pnp, ZNO)) != 0)
2074			break;
2075		*unlocked = 0;
2076
2077		/*
2078		 * Since we dropped the lock on the process, the agent
2079		 * may have disappeared or changed. Grab the current
2080		 * agent and check fail if it has disappeared.
2081		 */
2082		if ((ct = p->p_agenttp) == NULL) {
2083			error = ENOENT;
2084			break;
2085		}
2086
2087		mutex_enter(&pcp->prc_mutex);
2088		thread_lock(ct);
2089
2090		if (ISTOPPED(ct)) {
2091			thread_unlock(ct);
2092			mutex_exit(&pcp->prc_mutex);
2093			break;
2094		}
2095
2096		thread_unlock(ct);
2097	}
2098
2099	return (error ? error : -1);
2100}
2101
2102static int
2103pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2104{
2105	caddr_t base = (caddr_t)pio->pio_base;
2106	size_t cnt = pio->pio_len;
2107	uintptr_t offset = (uintptr_t)pio->pio_offset;
2108	struct uio auio;
2109	struct iovec aiov;
2110	int error = 0;
2111
2112	if ((p->p_flag & SSYS) || p->p_as == &kas)
2113		error = EIO;
2114	else if ((base + cnt) < base || (offset + cnt) < offset)
2115		error = EINVAL;
2116	else if (cnt != 0) {
2117		aiov.iov_base = base;
2118		aiov.iov_len = cnt;
2119
2120		auio.uio_loffset = offset;
2121		auio.uio_iov = &aiov;
2122		auio.uio_iovcnt = 1;
2123		auio.uio_resid = cnt;
2124		auio.uio_segflg = UIO_USERSPACE;
2125		auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2126		auio.uio_fmode = FREAD|FWRITE;
2127		auio.uio_extflg = UIO_COPY_DEFAULT;
2128
2129		mutex_exit(&p->p_lock);
2130		error = prusrio(p, rw, &auio, 0);
2131		mutex_enter(&p->p_lock);
2132
2133		/*
2134		 * We have no way to return the i/o count,
2135		 * like read() or write() would do, so we
2136		 * return an error if the i/o was truncated.
2137		 */
2138		if (auio.uio_resid != 0 && error == 0)
2139			error = EIO;
2140	}
2141
2142	return (error);
2143}
2144
2145static int
2146pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2147{
2148	kthread_t *t;
2149	cred_t *oldcred;
2150	cred_t *newcred;
2151	uid_t oldruid;
2152	int error;
2153	zone_t *zone = crgetzone(cr);
2154
2155	if (!VALID_UID(prcred->pr_euid, zone) ||
2156	    !VALID_UID(prcred->pr_ruid, zone) ||
2157	    !VALID_UID(prcred->pr_suid, zone) ||
2158	    !VALID_GID(prcred->pr_egid, zone) ||
2159	    !VALID_GID(prcred->pr_rgid, zone) ||
2160	    !VALID_GID(prcred->pr_sgid, zone))
2161		return (EINVAL);
2162
2163	if (dogrps) {
2164		int ngrp = prcred->pr_ngroups;
2165		int i;
2166
2167		if (ngrp < 0 || ngrp > ngroups_max)
2168			return (EINVAL);
2169
2170		for (i = 0; i < ngrp; i++) {
2171			if (!VALID_GID(prcred->pr_groups[i], zone))
2172				return (EINVAL);
2173		}
2174	}
2175
2176	error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2177
2178	if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2179		error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2180
2181	if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2182	    prcred->pr_suid != prcred->pr_ruid)
2183		error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2184
2185	if (error)
2186		return (error);
2187
2188	mutex_exit(&p->p_lock);
2189
2190	/* hold old cred so it doesn't disappear while we dup it */
2191	mutex_enter(&p->p_crlock);
2192	crhold(oldcred = p->p_cred);
2193	mutex_exit(&p->p_crlock);
2194	newcred = crdup(oldcred);
2195	oldruid = crgetruid(oldcred);
2196	crfree(oldcred);
2197
2198	/* Error checking done above */
2199	(void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2200	    prcred->pr_suid);
2201	(void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2202	    prcred->pr_sgid);
2203
2204	if (dogrps) {
2205		(void) crsetgroups(newcred, prcred->pr_ngroups,
2206		    prcred->pr_groups);
2207
2208	}
2209
2210	mutex_enter(&p->p_crlock);
2211	oldcred = p->p_cred;
2212	p->p_cred = newcred;
2213	mutex_exit(&p->p_crlock);
2214	crfree(oldcred);
2215
2216	/*
2217	 * Keep count of processes per uid consistent.
2218	 */
2219	if (oldruid != prcred->pr_ruid) {
2220		zoneid_t zoneid = crgetzoneid(newcred);
2221
2222		mutex_enter(&pidlock);
2223		upcount_dec(oldruid, zoneid);
2224		upcount_inc(prcred->pr_ruid, zoneid);
2225		mutex_exit(&pidlock);
2226	}
2227
2228	/*
2229	 * Broadcast the cred change to the threads.
2230	 */
2231	mutex_enter(&p->p_lock);
2232	t = p->p_tlist;
2233	do {
2234		t->t_pre_sys = 1; /* so syscall will get new cred */
2235	} while ((t = t->t_forw) != p->p_tlist);
2236
2237	return (0);
2238}
2239
2240/*
2241 * Change process credentials to specified zone.  Used to temporarily
2242 * set a process to run in the global zone; only transitions between
2243 * the process's actual zone and the global zone are allowed.
2244 */
2245static int
2246pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2247{
2248	kthread_t *t;
2249	cred_t *oldcred;
2250	cred_t *newcred;
2251	zone_t *zptr;
2252	zoneid_t oldzoneid;
2253
2254	if (secpolicy_zone_config(cr) != 0)
2255		return (EPERM);
2256	if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2257		return (EINVAL);
2258	if ((zptr = zone_find_by_id(zoneid)) == NULL)
2259		return (EINVAL);
2260	mutex_exit(&p->p_lock);
2261	mutex_enter(&p->p_crlock);
2262	oldcred = p->p_cred;
2263	crhold(oldcred);
2264	mutex_exit(&p->p_crlock);
2265	newcred = crdup(oldcred);
2266	oldzoneid = crgetzoneid(oldcred);
2267	crfree(oldcred);
2268
2269	crsetzone(newcred, zptr);
2270	zone_rele(zptr);
2271
2272	mutex_enter(&p->p_crlock);
2273	oldcred = p->p_cred;
2274	p->p_cred = newcred;
2275	mutex_exit(&p->p_crlock);
2276	crfree(oldcred);
2277
2278	/*
2279	 * The target process is changing zones (according to its cred), so
2280	 * update the per-zone upcounts, which are based on process creds.
2281	 */
2282	if (oldzoneid != zoneid) {
2283		uid_t ruid = crgetruid(newcred);
2284
2285		mutex_enter(&pidlock);
2286		upcount_dec(ruid, oldzoneid);
2287		upcount_inc(ruid, zoneid);
2288		mutex_exit(&pidlock);
2289	}
2290	/*
2291	 * Broadcast the cred change to the threads.
2292	 */
2293	mutex_enter(&p->p_lock);
2294	t = p->p_tlist;
2295	do {
2296		t->t_pre_sys = 1;	/* so syscall will get new cred */
2297	} while ((t = t->t_forw) != p->p_tlist);
2298
2299	return (0);
2300}
2301
2302static int
2303pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2304{
2305	kthread_t *t;
2306	int err;
2307
2308	ASSERT(MUTEX_HELD(&p->p_lock));
2309
2310	if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2311		/*
2312		 * Broadcast the cred change to the threads.
2313		 */
2314		t = p->p_tlist;
2315		do {
2316			t->t_pre_sys = 1; /* so syscall will get new cred */
2317		} while ((t = t->t_forw) != p->p_tlist);
2318	}
2319
2320	return (err);
2321}
2322
2323/*
2324 * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2325 * terminate or perform an exec(2).
2326 *
2327 * Returns 0 if the process is fully stopped except for the current thread (if
2328 * we are operating on our own process), 1 otherwise.
2329 *
2330 * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2331 * See holdwatch() for details.
2332 */
2333int
2334pr_allstopped(proc_t *p, int watchstop)
2335{
2336	kthread_t *t;
2337	int rv = 0;
2338
2339	ASSERT(MUTEX_HELD(&p->p_lock));
2340
2341	if (p->p_flag & SVFWAIT)	/* waiting for vfork'd child to exec */
2342		return (-1);
2343
2344	if ((t = p->p_tlist) != NULL) {
2345		do {
2346			if (t == curthread || VSTOPPED(t) ||
2347			    (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2348				continue;
2349			thread_lock(t);
2350			switch (t->t_state) {
2351			case TS_ZOMB:
2352			case TS_STOPPED:
2353				break;
2354			case TS_SLEEP:
2355				if (!(t->t_flag & T_WAKEABLE) ||
2356				    t->t_wchan0 == NULL)
2357					rv = 1;
2358				break;
2359			default:
2360				rv = 1;
2361				break;
2362			}
2363			thread_unlock(t);
2364		} while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2365	}
2366
2367	return (rv);
2368}
2369
2370/*
2371 * Cause all lwps in the process to pause (for watchpoint operations).
2372 */
2373static void
2374pauselwps(proc_t *p)
2375{
2376	kthread_t *t;
2377
2378	ASSERT(MUTEX_HELD(&p->p_lock));
2379	ASSERT(p != curproc);
2380
2381	if ((t = p->p_tlist) != NULL) {
2382		do {
2383			thread_lock(t);
2384			t->t_proc_flag |= TP_PAUSE;
2385			aston(t);
2386			if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2387			    ISWAITING(t)) {
2388				setrun_locked(t);
2389			}
2390			prpokethread(t);
2391			thread_unlock(t);
2392		} while ((t = t->t_forw) != p->p_tlist);
2393	}
2394}
2395
2396/*
2397 * undo the effects of pauselwps()
2398 */
2399static void
2400unpauselwps(proc_t *p)
2401{
2402	kthread_t *t;
2403
2404	ASSERT(MUTEX_HELD(&p->p_lock));
2405	ASSERT(p != curproc);
2406
2407	if ((t = p->p_tlist) != NULL) {
2408		do {
2409			thread_lock(t);
2410			t->t_proc_flag &= ~TP_PAUSE;
2411			if (t->t_state == TS_STOPPED) {
2412				t->t_schedflag |= TS_UNPAUSE;
2413				t->t_dtrace_stop = 0;
2414				setrun_locked(t);
2415			}
2416			thread_unlock(t);
2417		} while ((t = t->t_forw) != p->p_tlist);
2418	}
2419}
2420
2421/*
2422 * Cancel all watched areas.  Called from prclose().
2423 */
2424proc_t *
2425pr_cancel_watch(prnode_t *pnp)
2426{
2427	proc_t *p = pnp->pr_pcommon->prc_proc;
2428	struct as *as;
2429	kthread_t *t;
2430
2431	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2432
2433	if (!pr_watch_active(p))
2434		return (p);
2435
2436	/*
2437	 * Pause the process before dealing with the watchpoints.
2438	 */
2439	if (p == curproc) {
2440		prunlock(pnp);
2441		while (holdwatch() != 0)
2442			continue;
2443		p = pr_p_lock(pnp);
2444		mutex_exit(&pr_pidlock);
2445		ASSERT(p == curproc);
2446	} else {
2447		pauselwps(p);
2448		while (p != NULL && pr_allstopped(p, 0) > 0) {
2449			/*
2450			 * This cv/mutex pair is persistent even
2451			 * if the process disappears after we
2452			 * unmark it and drop p->p_lock.
2453			 */
2454			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2455			kmutex_t *mp = &p->p_lock;
2456
2457			prunmark(p);
2458			(void) cv_wait(cv, mp);
2459			mutex_exit(mp);
2460			p = pr_p_lock(pnp);  /* NULL if process disappeared */
2461			mutex_exit(&pr_pidlock);
2462		}
2463	}
2464
2465	if (p == NULL)		/* the process disappeared */
2466		return (NULL);
2467
2468	ASSERT(p == pnp->pr_pcommon->prc_proc);
2469	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2470
2471	if (pr_watch_active(p)) {
2472		pr_free_watchpoints(p);
2473		if ((t = p->p_tlist) != NULL) {
2474			do {
2475				watch_disable(t);
2476
2477			} while ((t = t->t_forw) != p->p_tlist);
2478		}
2479	}
2480
2481	if ((as = p->p_as) != NULL) {
2482		avl_tree_t *tree;
2483		struct watched_page *pwp;
2484
2485		/*
2486		 * If this is the parent of a vfork, the watched page
2487		 * list has been moved temporarily to p->p_wpage.
2488		 */
2489		if (avl_numnodes(&p->p_wpage) != 0)
2490			tree = &p->p_wpage;
2491		else
2492			tree = &as->a_wpage;
2493
2494		mutex_exit(&p->p_lock);
2495		AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2496
2497		for (pwp = avl_first(tree); pwp != NULL;
2498		    pwp = AVL_NEXT(tree, pwp)) {
2499			pwp->wp_read = 0;
2500			pwp->wp_write = 0;
2501			pwp->wp_exec = 0;
2502			if ((pwp->wp_flags & WP_SETPROT) == 0) {
2503				pwp->wp_flags |= WP_SETPROT;
2504				pwp->wp_prot = pwp->wp_oprot;
2505				pwp->wp_list = p->p_wprot;
2506				p->p_wprot = pwp;
2507			}
2508		}
2509
2510		AS_LOCK_EXIT(as, &as->a_lock);
2511		mutex_enter(&p->p_lock);
2512	}
2513
2514	/*
2515	 * Unpause the process now.
2516	 */
2517	if (p == curproc)
2518		continuelwps(p);
2519	else
2520		unpauselwps(p);
2521
2522	return (p);
2523}
2524