1178479Sjb/*
2178479Sjb * CDDL HEADER START
3178479Sjb *
4178479Sjb * The contents of this file are subject to the terms of the
5178479Sjb * Common Development and Distribution License (the "License").
6178479Sjb * You may not use this file except in compliance with the License.
7178479Sjb *
8178479Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9178479Sjb * or http://www.opensolaris.org/os/licensing.
10178479Sjb * See the License for the specific language governing permissions
11178479Sjb * and limitations under the License.
12178479Sjb *
13178479Sjb * When distributing Covered Code, include this CDDL HEADER in each
14178479Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15178479Sjb * If applicable, add the following below this CDDL HEADER, with the
16178479Sjb * fields enclosed by brackets "[]" replaced with your own identifying
17178479Sjb * information: Portions Copyright [yyyy] [name of copyright owner]
18178479Sjb *
19178479Sjb * CDDL HEADER END
20178479Sjb */
21178479Sjb
22178479Sjb/*
23210767Srpaulo * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24178479Sjb * Use is subject to license terms.
25178479Sjb */
26178479Sjb
27178479Sjb/*
28178479Sjb * DTrace Process Control
29178479Sjb *
30178479Sjb * This file provides a set of routines that permit libdtrace and its clients
31178479Sjb * to create and grab process handles using libproc, and to share these handles
32178479Sjb * between library mechanisms that need libproc access, such as ustack(), and
33178479Sjb * client mechanisms that need libproc access, such as dtrace(1M) -c and -p.
34178479Sjb * The library provides several mechanisms in the libproc control layer:
35178479Sjb *
36178479Sjb * Reference Counting: The library code and client code can independently grab
37178479Sjb * the same process handles without interfering with one another.  Only when
38178479Sjb * the reference count drops to zero and the handle is not being cached (see
39178479Sjb * below for more information on caching) will Prelease() be called on it.
40178479Sjb *
41178479Sjb * Handle Caching: If a handle is grabbed PGRAB_RDONLY (e.g. by ustack()) and
42178479Sjb * the reference count drops to zero, the handle is not immediately released.
43178479Sjb * Instead, libproc handles are maintained on dph_lrulist in order from most-
44178479Sjb * recently accessed to least-recently accessed.  Idle handles are maintained
45178479Sjb * until a pre-defined LRU cache limit is exceeded, permitting repeated calls
46178479Sjb * to ustack() to avoid the overhead of releasing and re-grabbing processes.
47178479Sjb *
48178479Sjb * Process Control: For processes that are grabbed for control (~PGRAB_RDONLY)
49178479Sjb * or created by dt_proc_create(), a control thread is created to provide
50178479Sjb * callbacks on process exit and symbol table caching on dlopen()s.
51178479Sjb *
52178479Sjb * MT-Safety: Libproc is not MT-Safe, so dt_proc_lock() and dt_proc_unlock()
53178479Sjb * are provided to synchronize access to the libproc handle between libdtrace
54178479Sjb * code and client code and the control thread's use of the ps_prochandle.
55178479Sjb *
56178479Sjb * NOTE: MT-Safety is NOT provided for libdtrace itself, or for use of the
57178479Sjb * dtrace_proc_grab/dtrace_proc_create mechanisms.  Like all exported libdtrace
58178479Sjb * calls, these are assumed to be MT-Unsafe.  MT-Safety is ONLY provided for
59178479Sjb * synchronization between libdtrace control threads and the client thread.
60178479Sjb *
61178479Sjb * The ps_prochandles themselves are maintained along with a dt_proc_t struct
62178479Sjb * in a hash table indexed by PID.  This provides basic locking and reference
63178479Sjb * counting.  The dt_proc_t is also maintained in LRU order on dph_lrulist.
64178479Sjb * The dph_lrucnt and dph_lrulim count the number of cacheable processes and
65178479Sjb * the current limit on the number of actively cached entries.
66178479Sjb *
67178479Sjb * The control thread for a process establishes breakpoints at the rtld_db
68178479Sjb * locations of interest, updates mappings and symbol tables at these points,
69178479Sjb * and handles exec and fork (by always following the parent).  The control
70178479Sjb * thread automatically exits when the process dies or control is lost.
71178479Sjb *
72178479Sjb * A simple notification mechanism is provided for libdtrace clients using
73178479Sjb * dtrace_handle_proc() for notification of PS_UNDEAD or PS_LOST events.  If
74178479Sjb * such an event occurs, the dt_proc_t itself is enqueued on a notification
75178479Sjb * list and the control thread broadcasts to dph_cv.  dtrace_sleep() will wake
76178479Sjb * up using this condition and will then call the client handler as necessary.
77178479Sjb */
78178479Sjb
79178479Sjb#include <sys/wait.h>
80277300Ssmh#ifdef illumos
81178479Sjb#include <sys/lwp.h>
82178565Sjb#endif
83178479Sjb#include <strings.h>
84178479Sjb#include <signal.h>
85178479Sjb#include <assert.h>
86178479Sjb#include <errno.h>
87178479Sjb
88178479Sjb#include <dt_proc.h>
89178479Sjb#include <dt_pid.h>
90178479Sjb#include <dt_impl.h>
91178479Sjb
92277300Ssmh#ifndef illumos
93211554Srpaulo#include <sys/syscall.h>
94211554Srpaulo#include <libproc_compat.h>
95211554Srpaulo#define	SYS_forksys SYS_fork
96211554Srpaulo#endif
97211554Srpaulo
98210767Srpaulo#define	IS_SYS_EXEC(w)	(w == SYS_execve)
99210767Srpaulo#define	IS_SYS_FORK(w)	(w == SYS_vfork || w == SYS_forksys)
100178479Sjb
101178479Sjbstatic dt_bkpt_t *
102178479Sjbdt_proc_bpcreate(dt_proc_t *dpr, uintptr_t addr, dt_bkpt_f *func, void *data)
103178479Sjb{
104178479Sjb	struct ps_prochandle *P = dpr->dpr_proc;
105178479Sjb	dt_bkpt_t *dbp;
106178479Sjb
107210775Srpaulo	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
108178479Sjb
109178479Sjb	if ((dbp = dt_zalloc(dpr->dpr_hdl, sizeof (dt_bkpt_t))) != NULL) {
110178479Sjb		dbp->dbp_func = func;
111178479Sjb		dbp->dbp_data = data;
112178479Sjb		dbp->dbp_addr = addr;
113178479Sjb
114178479Sjb		if (Psetbkpt(P, dbp->dbp_addr, &dbp->dbp_instr) == 0)
115178479Sjb			dbp->dbp_active = B_TRUE;
116178479Sjb
117178479Sjb		dt_list_append(&dpr->dpr_bps, dbp);
118178479Sjb	}
119178479Sjb
120178479Sjb	return (dbp);
121178479Sjb}
122178479Sjb
123178479Sjbstatic void
124178479Sjbdt_proc_bpdestroy(dt_proc_t *dpr, int delbkpts)
125178479Sjb{
126178479Sjb	int state = Pstate(dpr->dpr_proc);
127178479Sjb	dt_bkpt_t *dbp, *nbp;
128178479Sjb
129210775Srpaulo	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
130178479Sjb
131178479Sjb	for (dbp = dt_list_next(&dpr->dpr_bps); dbp != NULL; dbp = nbp) {
132178479Sjb		if (delbkpts && dbp->dbp_active &&
133178479Sjb		    state != PS_LOST && state != PS_UNDEAD) {
134178479Sjb			(void) Pdelbkpt(dpr->dpr_proc,
135178479Sjb			    dbp->dbp_addr, dbp->dbp_instr);
136178479Sjb		}
137178479Sjb		nbp = dt_list_next(dbp);
138178479Sjb		dt_list_delete(&dpr->dpr_bps, dbp);
139178479Sjb		dt_free(dpr->dpr_hdl, dbp);
140178479Sjb	}
141178479Sjb}
142178479Sjb
143178479Sjbstatic void
144178479Sjbdt_proc_bpmatch(dtrace_hdl_t *dtp, dt_proc_t *dpr)
145178479Sjb{
146277300Ssmh#ifdef illumos
147178479Sjb	const lwpstatus_t *psp = &Pstatus(dpr->dpr_proc)->pr_lwp;
148211554Srpaulo#else
149211554Srpaulo	unsigned long pc;
150211554Srpaulo#endif
151178479Sjb	dt_bkpt_t *dbp;
152178479Sjb
153210775Srpaulo	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
154178479Sjb
155277300Ssmh#ifndef illumos
156211554Srpaulo	proc_regget(dpr->dpr_proc, REG_PC, &pc);
157211554Srpaulo	proc_bkptregadj(&pc);
158211554Srpaulo#endif
159211554Srpaulo
160178479Sjb	for (dbp = dt_list_next(&dpr->dpr_bps);
161178479Sjb	    dbp != NULL; dbp = dt_list_next(dbp)) {
162277300Ssmh#ifdef illumos
163178479Sjb		if (psp->pr_reg[R_PC] == dbp->dbp_addr)
164178479Sjb			break;
165211554Srpaulo#else
166211554Srpaulo		if (pc == dbp->dbp_addr)
167211554Srpaulo			break;
168211554Srpaulo#endif
169178479Sjb	}
170178479Sjb
171178479Sjb	if (dbp == NULL) {
172178479Sjb		dt_dprintf("pid %d: spurious breakpoint wakeup for %lx\n",
173277300Ssmh#ifdef illumos
174178479Sjb		    (int)dpr->dpr_pid, (ulong_t)psp->pr_reg[R_PC]);
175211554Srpaulo#else
176211554Srpaulo		    (int)dpr->dpr_pid, pc);
177211554Srpaulo#endif
178178479Sjb		return;
179178479Sjb	}
180178479Sjb
181178479Sjb	dt_dprintf("pid %d: hit breakpoint at %lx (%lu)\n",
182178479Sjb	    (int)dpr->dpr_pid, (ulong_t)dbp->dbp_addr, ++dbp->dbp_hits);
183178479Sjb
184178479Sjb	dbp->dbp_func(dtp, dpr, dbp->dbp_data);
185178479Sjb	(void) Pxecbkpt(dpr->dpr_proc, dbp->dbp_instr);
186178479Sjb}
187178479Sjb
188178479Sjbstatic void
189178479Sjbdt_proc_bpenable(dt_proc_t *dpr)
190178479Sjb{
191178479Sjb	dt_bkpt_t *dbp;
192178479Sjb
193210775Srpaulo	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
194178479Sjb
195178479Sjb	for (dbp = dt_list_next(&dpr->dpr_bps);
196178479Sjb	    dbp != NULL; dbp = dt_list_next(dbp)) {
197178479Sjb		if (!dbp->dbp_active && Psetbkpt(dpr->dpr_proc,
198178479Sjb		    dbp->dbp_addr, &dbp->dbp_instr) == 0)
199178479Sjb			dbp->dbp_active = B_TRUE;
200178479Sjb	}
201178479Sjb
202178479Sjb	dt_dprintf("breakpoints enabled\n");
203178479Sjb}
204178479Sjb
205178479Sjbstatic void
206178479Sjbdt_proc_bpdisable(dt_proc_t *dpr)
207178479Sjb{
208178479Sjb	dt_bkpt_t *dbp;
209178479Sjb
210210775Srpaulo	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
211178479Sjb
212178479Sjb	for (dbp = dt_list_next(&dpr->dpr_bps);
213178479Sjb	    dbp != NULL; dbp = dt_list_next(dbp)) {
214178479Sjb		if (dbp->dbp_active && Pdelbkpt(dpr->dpr_proc,
215178479Sjb		    dbp->dbp_addr, dbp->dbp_instr) == 0)
216178479Sjb			dbp->dbp_active = B_FALSE;
217178479Sjb	}
218178479Sjb
219178479Sjb	dt_dprintf("breakpoints disabled\n");
220178479Sjb}
221178479Sjb
222178479Sjbstatic void
223178479Sjbdt_proc_notify(dtrace_hdl_t *dtp, dt_proc_hash_t *dph, dt_proc_t *dpr,
224178479Sjb    const char *msg)
225178479Sjb{
226178479Sjb	dt_proc_notify_t *dprn = dt_alloc(dtp, sizeof (dt_proc_notify_t));
227178479Sjb
228178479Sjb	if (dprn == NULL) {
229178479Sjb		dt_dprintf("failed to allocate notification for %d %s\n",
230178479Sjb		    (int)dpr->dpr_pid, msg);
231178479Sjb	} else {
232178479Sjb		dprn->dprn_dpr = dpr;
233178479Sjb		if (msg == NULL)
234178479Sjb			dprn->dprn_errmsg[0] = '\0';
235178479Sjb		else
236178479Sjb			(void) strlcpy(dprn->dprn_errmsg, msg,
237178479Sjb			    sizeof (dprn->dprn_errmsg));
238178479Sjb
239178479Sjb		(void) pthread_mutex_lock(&dph->dph_lock);
240178479Sjb
241178479Sjb		dprn->dprn_next = dph->dph_notify;
242178479Sjb		dph->dph_notify = dprn;
243178479Sjb
244178479Sjb		(void) pthread_cond_broadcast(&dph->dph_cv);
245178479Sjb		(void) pthread_mutex_unlock(&dph->dph_lock);
246178479Sjb	}
247178479Sjb}
248178479Sjb
249178479Sjb/*
250178479Sjb * Check to see if the control thread was requested to stop when the victim
251178479Sjb * process reached a particular event (why) rather than continuing the victim.
252178479Sjb * If 'why' is set in the stop mask, we wait on dpr_cv for dt_proc_continue().
253178479Sjb * If 'why' is not set, this function returns immediately and does nothing.
254178479Sjb */
255178479Sjbstatic void
256178479Sjbdt_proc_stop(dt_proc_t *dpr, uint8_t why)
257178479Sjb{
258210775Srpaulo	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
259178479Sjb	assert(why != DT_PROC_STOP_IDLE);
260178479Sjb
261178479Sjb	if (dpr->dpr_stop & why) {
262178479Sjb		dpr->dpr_stop |= DT_PROC_STOP_IDLE;
263178479Sjb		dpr->dpr_stop &= ~why;
264178479Sjb
265178479Sjb		(void) pthread_cond_broadcast(&dpr->dpr_cv);
266178479Sjb
267178479Sjb		/*
268178479Sjb		 * We disable breakpoints while stopped to preserve the
269178479Sjb		 * integrity of the program text for both our own disassembly
270178479Sjb		 * and that of the kernel.
271178479Sjb		 */
272178479Sjb		dt_proc_bpdisable(dpr);
273178479Sjb
274178479Sjb		while (dpr->dpr_stop & DT_PROC_STOP_IDLE)
275178479Sjb			(void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock);
276178479Sjb
277178479Sjb		dt_proc_bpenable(dpr);
278178479Sjb	}
279178479Sjb}
280178479Sjb
281178479Sjb/*ARGSUSED*/
282178479Sjbstatic void
283178479Sjbdt_proc_bpmain(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *fname)
284178479Sjb{
285178479Sjb	dt_dprintf("pid %d: breakpoint at %s()\n", (int)dpr->dpr_pid, fname);
286178479Sjb	dt_proc_stop(dpr, DT_PROC_STOP_MAIN);
287178479Sjb}
288178479Sjb
289178479Sjbstatic void
290178479Sjbdt_proc_rdevent(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *evname)
291178479Sjb{
292178479Sjb	rd_event_msg_t rdm;
293178479Sjb	rd_err_e err;
294178479Sjb
295178479Sjb	if ((err = rd_event_getmsg(dpr->dpr_rtld, &rdm)) != RD_OK) {
296178479Sjb		dt_dprintf("pid %d: failed to get %s event message: %s\n",
297178479Sjb		    (int)dpr->dpr_pid, evname, rd_errstr(err));
298178479Sjb		return;
299178479Sjb	}
300178479Sjb
301178479Sjb	dt_dprintf("pid %d: rtld event %s type=%d state %d\n",
302178479Sjb	    (int)dpr->dpr_pid, evname, rdm.type, rdm.u.state);
303178479Sjb
304178479Sjb	switch (rdm.type) {
305178479Sjb	case RD_DLACTIVITY:
306178479Sjb		if (rdm.u.state != RD_CONSISTENT)
307178479Sjb			break;
308178479Sjb
309178479Sjb		Pupdate_syms(dpr->dpr_proc);
310178479Sjb		if (dt_pid_create_probes_module(dtp, dpr) != 0)
311178479Sjb			dt_proc_notify(dtp, dtp->dt_procs, dpr,
312178479Sjb			    dpr->dpr_errmsg);
313178479Sjb
314178479Sjb		break;
315178479Sjb	case RD_PREINIT:
316178479Sjb		Pupdate_syms(dpr->dpr_proc);
317178479Sjb		dt_proc_stop(dpr, DT_PROC_STOP_PREINIT);
318178479Sjb		break;
319178479Sjb	case RD_POSTINIT:
320178479Sjb		Pupdate_syms(dpr->dpr_proc);
321178479Sjb		dt_proc_stop(dpr, DT_PROC_STOP_POSTINIT);
322178479Sjb		break;
323178479Sjb	}
324178479Sjb}
325178479Sjb
326178479Sjbstatic void
327178479Sjbdt_proc_rdwatch(dt_proc_t *dpr, rd_event_e event, const char *evname)
328178479Sjb{
329178479Sjb	rd_notify_t rdn;
330178479Sjb	rd_err_e err;
331178479Sjb
332178479Sjb	if ((err = rd_event_addr(dpr->dpr_rtld, event, &rdn)) != RD_OK) {
333178479Sjb		dt_dprintf("pid %d: failed to get event address for %s: %s\n",
334178479Sjb		    (int)dpr->dpr_pid, evname, rd_errstr(err));
335178479Sjb		return;
336178479Sjb	}
337178479Sjb
338178479Sjb	if (rdn.type != RD_NOTIFY_BPT) {
339178479Sjb		dt_dprintf("pid %d: event %s has unexpected type %d\n",
340178479Sjb		    (int)dpr->dpr_pid, evname, rdn.type);
341178479Sjb		return;
342178479Sjb	}
343178479Sjb
344178479Sjb	(void) dt_proc_bpcreate(dpr, rdn.u.bptaddr,
345277300Ssmh#ifdef illumos
346178479Sjb	    (dt_bkpt_f *)dt_proc_rdevent, (void *)evname);
347211554Srpaulo#else
348211554Srpaulo	    /* XXX ugly */
349211554Srpaulo	    (dt_bkpt_f *)dt_proc_rdevent, __DECONST(void *, evname));
350211554Srpaulo#endif
351178479Sjb}
352178479Sjb
353178479Sjb/*
354178479Sjb * Common code for enabling events associated with the run-time linker after
355178479Sjb * attaching to a process or after a victim process completes an exec(2).
356178479Sjb */
357178479Sjbstatic void
358178479Sjbdt_proc_attach(dt_proc_t *dpr, int exec)
359178479Sjb{
360277300Ssmh#ifdef illumos
361178479Sjb	const pstatus_t *psp = Pstatus(dpr->dpr_proc);
362211554Srpaulo#endif
363178479Sjb	rd_err_e err;
364178479Sjb	GElf_Sym sym;
365178479Sjb
366210775Srpaulo	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
367178479Sjb
368178479Sjb	if (exec) {
369277300Ssmh#ifdef illumos
370178479Sjb		if (psp->pr_lwp.pr_errno != 0)
371178479Sjb			return; /* exec failed: nothing needs to be done */
372211554Srpaulo#endif
373178479Sjb
374178479Sjb		dt_proc_bpdestroy(dpr, B_FALSE);
375277300Ssmh#ifdef illumos
376178479Sjb		Preset_maps(dpr->dpr_proc);
377211554Srpaulo#endif
378178479Sjb	}
379178479Sjb	if ((dpr->dpr_rtld = Prd_agent(dpr->dpr_proc)) != NULL &&
380178479Sjb	    (err = rd_event_enable(dpr->dpr_rtld, B_TRUE)) == RD_OK) {
381277300Ssmh#ifdef illumos
382178479Sjb		dt_proc_rdwatch(dpr, RD_PREINIT, "RD_PREINIT");
383211554Srpaulo#endif
384178479Sjb		dt_proc_rdwatch(dpr, RD_POSTINIT, "RD_POSTINIT");
385277300Ssmh#ifdef illumos
386178479Sjb		dt_proc_rdwatch(dpr, RD_DLACTIVITY, "RD_DLACTIVITY");
387211554Srpaulo#endif
388178479Sjb	} else {
389178479Sjb		dt_dprintf("pid %d: failed to enable rtld events: %s\n",
390178479Sjb		    (int)dpr->dpr_pid, dpr->dpr_rtld ? rd_errstr(err) :
391178479Sjb		    "rtld_db agent initialization failed");
392178479Sjb	}
393178479Sjb
394178479Sjb	Pupdate_maps(dpr->dpr_proc);
395178479Sjb
396178479Sjb	if (Pxlookup_by_name(dpr->dpr_proc, LM_ID_BASE,
397178479Sjb	    "a.out", "main", &sym, NULL) == 0) {
398178479Sjb		(void) dt_proc_bpcreate(dpr, (uintptr_t)sym.st_value,
399178479Sjb		    (dt_bkpt_f *)dt_proc_bpmain, "a.out`main");
400178479Sjb	} else {
401178479Sjb		dt_dprintf("pid %d: failed to find a.out`main: %s\n",
402178479Sjb		    (int)dpr->dpr_pid, strerror(errno));
403178479Sjb	}
404178479Sjb}
405178479Sjb
406178479Sjb/*
407178479Sjb * Wait for a stopped process to be set running again by some other debugger.
408178479Sjb * This is typically not required by /proc-based debuggers, since the usual
409178479Sjb * model is that one debugger controls one victim.  But DTrace, as usual, has
410178479Sjb * its own needs: the stop() action assumes that prun(1) or some other tool
411178479Sjb * will be applied to resume the victim process.  This could be solved by
412178479Sjb * adding a PCWRUN directive to /proc, but that seems like overkill unless
413178479Sjb * other debuggers end up needing this functionality, so we implement a cheap
414178479Sjb * equivalent to PCWRUN using the set of existing kernel mechanisms.
415178479Sjb *
416178479Sjb * Our intent is really not just to wait for the victim to run, but rather to
417178479Sjb * wait for it to run and then stop again for a reason other than the current
418178479Sjb * PR_REQUESTED stop.  Since PCWSTOP/Pstopstatus() can be applied repeatedly
419178479Sjb * to a stopped process and will return the same result without affecting the
420178479Sjb * victim, we can just perform these operations repeatedly until Pstate()
421178479Sjb * changes, the representative LWP ID changes, or the stop timestamp advances.
422178479Sjb * dt_proc_control() will then rediscover the new state and continue as usual.
423178479Sjb * When the process is still stopped in the same exact state, we sleep for a
424178479Sjb * brief interval before waiting again so as not to spin consuming CPU cycles.
425178479Sjb */
426178479Sjbstatic void
427178479Sjbdt_proc_waitrun(dt_proc_t *dpr)
428178479Sjb{
429326302Semaste	printf("%s:%s(%d): not implemented\n", __FUNCTION__, __FILE__,
430326302Semaste	    __LINE__);
431211554Srpaulo#ifdef DOODAD
432178479Sjb	struct ps_prochandle *P = dpr->dpr_proc;
433178479Sjb	const lwpstatus_t *psp = &Pstatus(P)->pr_lwp;
434178479Sjb
435178479Sjb	int krflag = psp->pr_flags & (PR_KLC | PR_RLC);
436178479Sjb	timestruc_t tstamp = psp->pr_tstamp;
437178479Sjb	lwpid_t lwpid = psp->pr_lwpid;
438178479Sjb
439178479Sjb	const long wstop = PCWSTOP;
440178479Sjb	int pfd = Pctlfd(P);
441178479Sjb
442210775Srpaulo	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
443178479Sjb	assert(psp->pr_flags & PR_STOPPED);
444178479Sjb	assert(Pstate(P) == PS_STOP);
445178479Sjb
446178479Sjb	/*
447178479Sjb	 * While we are waiting for the victim to run, clear PR_KLC and PR_RLC
448178479Sjb	 * so that if the libdtrace client is killed, the victim stays stopped.
449178479Sjb	 * dt_proc_destroy() will also observe this and perform PRELEASE_HANG.
450178479Sjb	 */
451178479Sjb	(void) Punsetflags(P, krflag);
452178479Sjb	Psync(P);
453178479Sjb
454178479Sjb	(void) pthread_mutex_unlock(&dpr->dpr_lock);
455178479Sjb
456178479Sjb	while (!dpr->dpr_quit) {
457178479Sjb		if (write(pfd, &wstop, sizeof (wstop)) == -1 && errno == EINTR)
458178479Sjb			continue; /* check dpr_quit and continue waiting */
459178479Sjb
460178479Sjb		(void) pthread_mutex_lock(&dpr->dpr_lock);
461178479Sjb		(void) Pstopstatus(P, PCNULL, 0);
462178479Sjb		psp = &Pstatus(P)->pr_lwp;
463178479Sjb
464178479Sjb		/*
465178479Sjb		 * If we've reached a new state, found a new representative, or
466178479Sjb		 * the stop timestamp has changed, restore PR_KLC/PR_RLC to its
467178479Sjb		 * original setting and then return with dpr_lock held.
468178479Sjb		 */
469178479Sjb		if (Pstate(P) != PS_STOP || psp->pr_lwpid != lwpid ||
470178479Sjb		    bcmp(&psp->pr_tstamp, &tstamp, sizeof (tstamp)) != 0) {
471178479Sjb			(void) Psetflags(P, krflag);
472178479Sjb			Psync(P);
473178479Sjb			return;
474178479Sjb		}
475178479Sjb
476178479Sjb		(void) pthread_mutex_unlock(&dpr->dpr_lock);
477178479Sjb		(void) poll(NULL, 0, MILLISEC / 2);
478178479Sjb	}
479178479Sjb
480178479Sjb	(void) pthread_mutex_lock(&dpr->dpr_lock);
481211554Srpaulo#endif
482178479Sjb}
483178479Sjb
484178479Sjbtypedef struct dt_proc_control_data {
485178479Sjb	dtrace_hdl_t *dpcd_hdl;			/* DTrace handle */
486178479Sjb	dt_proc_t *dpcd_proc;			/* proccess to control */
487178479Sjb} dt_proc_control_data_t;
488178479Sjb
489178479Sjb/*
490178479Sjb * Main loop for all victim process control threads.  We initialize all the
491178479Sjb * appropriate /proc control mechanisms, and then enter a loop waiting for
492178479Sjb * the process to stop on an event or die.  We process any events by calling
493178479Sjb * appropriate subroutines, and exit when the victim dies or we lose control.
494178479Sjb *
495178479Sjb * The control thread synchronizes the use of dpr_proc with other libdtrace
496178479Sjb * threads using dpr_lock.  We hold the lock for all of our operations except
497178479Sjb * waiting while the process is running: this is accomplished by writing a
498178479Sjb * PCWSTOP directive directly to the underlying /proc/<pid>/ctl file.  If the
499178479Sjb * libdtrace client wishes to exit or abort our wait, SIGCANCEL can be used.
500178479Sjb */
501178479Sjbstatic void *
502178479Sjbdt_proc_control(void *arg)
503178479Sjb{
504178479Sjb	dt_proc_control_data_t *datap = arg;
505178479Sjb	dtrace_hdl_t *dtp = datap->dpcd_hdl;
506178479Sjb	dt_proc_t *dpr = datap->dpcd_proc;
507249573Spfg	dt_proc_hash_t *dph = dpr->dpr_hdl->dt_procs;
508178479Sjb	struct ps_prochandle *P = dpr->dpr_proc;
509178565Sjb	int pid = dpr->dpr_pid;
510178479Sjb
511277300Ssmh#ifdef illumos
512178479Sjb	int pfd = Pctlfd(P);
513178479Sjb
514178479Sjb	const long wstop = PCWSTOP;
515178565Sjb#endif
516178479Sjb	int notify = B_FALSE;
517178479Sjb
518178479Sjb	/*
519178479Sjb	 * We disable the POSIX thread cancellation mechanism so that the
520178479Sjb	 * client program using libdtrace can't accidentally cancel our thread.
521178479Sjb	 * dt_proc_destroy() uses SIGCANCEL explicitly to simply poke us out
522178479Sjb	 * of PCWSTOP with EINTR, at which point we will see dpr_quit and exit.
523178479Sjb	 */
524178479Sjb	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
525178479Sjb
526178479Sjb	/*
527178479Sjb	 * Set up the corresponding process for tracing by libdtrace.  We want
528178479Sjb	 * to be able to catch breakpoints and efficiently single-step over
529178479Sjb	 * them, and we need to enable librtld_db to watch libdl activity.
530178479Sjb	 */
531178479Sjb	(void) pthread_mutex_lock(&dpr->dpr_lock);
532178479Sjb
533277300Ssmh#ifdef illumos
534178479Sjb	(void) Punsetflags(P, PR_ASYNC);	/* require synchronous mode */
535178479Sjb	(void) Psetflags(P, PR_BPTADJ);		/* always adjust eip on x86 */
536178479Sjb	(void) Punsetflags(P, PR_FORK);		/* do not inherit on fork */
537178479Sjb
538178479Sjb	(void) Pfault(P, FLTBPT, B_TRUE);	/* always trace breakpoints */
539178479Sjb	(void) Pfault(P, FLTTRACE, B_TRUE);	/* always trace single-step */
540178479Sjb
541178479Sjb	/*
542178479Sjb	 * We must trace exit from exec() system calls so that if the exec is
543178479Sjb	 * successful, we can reset our breakpoints and re-initialize libproc.
544178479Sjb	 */
545178479Sjb	(void) Psysexit(P, SYS_execve, B_TRUE);
546178479Sjb
547178479Sjb	/*
548178479Sjb	 * We must trace entry and exit for fork() system calls in order to
549178479Sjb	 * disable our breakpoints temporarily during the fork.  We do not set
550178479Sjb	 * the PR_FORK flag, so if fork succeeds the child begins executing and
551178479Sjb	 * does not inherit any other tracing behaviors or a control thread.
552178479Sjb	 */
553178479Sjb	(void) Psysentry(P, SYS_vfork, B_TRUE);
554178479Sjb	(void) Psysexit(P, SYS_vfork, B_TRUE);
555178479Sjb	(void) Psysentry(P, SYS_forksys, B_TRUE);
556178479Sjb	(void) Psysexit(P, SYS_forksys, B_TRUE);
557178479Sjb
558178479Sjb	Psync(P);				/* enable all /proc changes */
559211554Srpaulo#endif
560178479Sjb	dt_proc_attach(dpr, B_FALSE);		/* enable rtld breakpoints */
561178479Sjb
562178479Sjb	/*
563178479Sjb	 * If PR_KLC is set, we created the process; otherwise we grabbed it.
564178479Sjb	 * Check for an appropriate stop request and wait for dt_proc_continue.
565178479Sjb	 */
566277300Ssmh#ifdef illumos
567178479Sjb	if (Pstatus(P)->pr_flags & PR_KLC)
568211554Srpaulo#else
569211554Srpaulo	if (proc_getflags(P) & PR_KLC)
570211554Srpaulo#endif
571178479Sjb		dt_proc_stop(dpr, DT_PROC_STOP_CREATE);
572178479Sjb	else
573178479Sjb		dt_proc_stop(dpr, DT_PROC_STOP_GRAB);
574178479Sjb
575178479Sjb	if (Psetrun(P, 0, 0) == -1) {
576178479Sjb		dt_dprintf("pid %d: failed to set running: %s\n",
577178479Sjb		    (int)dpr->dpr_pid, strerror(errno));
578178479Sjb	}
579178479Sjb
580178479Sjb	(void) pthread_mutex_unlock(&dpr->dpr_lock);
581178479Sjb
582178479Sjb	/*
583178479Sjb	 * Wait for the process corresponding to this control thread to stop,
584178479Sjb	 * process the event, and then set it running again.  We want to sleep
585178479Sjb	 * with dpr_lock *unheld* so that other parts of libdtrace can use the
586178479Sjb	 * ps_prochandle in the meantime (e.g. ustack()).  To do this, we write
587178479Sjb	 * a PCWSTOP directive directly to the underlying /proc/<pid>/ctl file.
588178479Sjb	 * Once the process stops, we wake up, grab dpr_lock, and then call
589178479Sjb	 * Pwait() (which will return immediately) and do our processing.
590178479Sjb	 */
591178479Sjb	while (!dpr->dpr_quit) {
592178479Sjb		const lwpstatus_t *psp;
593178479Sjb
594277300Ssmh#ifdef illumos
595178479Sjb		if (write(pfd, &wstop, sizeof (wstop)) == -1 && errno == EINTR)
596178479Sjb			continue; /* check dpr_quit and continue waiting */
597178565Sjb#else
598178565Sjb		/* Wait for the process to report status. */
599210695Srpaulo		proc_wstatus(P);
600211554Srpaulo		if (errno == EINTR)
601211554Srpaulo			continue; /* check dpr_quit and continue waiting */
602178565Sjb#endif
603178479Sjb
604178479Sjb		(void) pthread_mutex_lock(&dpr->dpr_lock);
605178565Sjb
606277300Ssmh#ifdef illumos
607178479Sjbpwait_locked:
608178479Sjb		if (Pstopstatus(P, PCNULL, 0) == -1 && errno == EINTR) {
609178479Sjb			(void) pthread_mutex_unlock(&dpr->dpr_lock);
610178479Sjb			continue; /* check dpr_quit and continue waiting */
611178479Sjb		}
612178565Sjb#endif
613178479Sjb
614211554Srpaulo		switch (Pstate(P)) {
615211554Srpaulo		case PS_STOP:
616277300Ssmh#ifdef illumos
617211554Srpaulo			psp = &Pstatus(P)->pr_lwp;
618178565Sjb#else
619211554Srpaulo			psp = proc_getlwpstatus(P);
620178565Sjb#endif
621178479Sjb
622178479Sjb			dt_dprintf("pid %d: proc stopped showing %d/%d\n",
623178479Sjb			    pid, psp->pr_why, psp->pr_what);
624178479Sjb
625178479Sjb			/*
626178479Sjb			 * If the process stops showing PR_REQUESTED, then the
627178479Sjb			 * DTrace stop() action was applied to it or another
628178479Sjb			 * debugging utility (e.g. pstop(1)) asked it to stop.
629178479Sjb			 * In either case, the user's intention is for the
630178479Sjb			 * process to remain stopped until another external
631178479Sjb			 * mechanism (e.g. prun(1)) is applied.  So instead of
632178479Sjb			 * setting the process running ourself, we wait for
633178479Sjb			 * someone else to do so.  Once that happens, we return
634178479Sjb			 * to our normal loop waiting for an event of interest.
635178479Sjb			 */
636178479Sjb			if (psp->pr_why == PR_REQUESTED) {
637178479Sjb				dt_proc_waitrun(dpr);
638178479Sjb				(void) pthread_mutex_unlock(&dpr->dpr_lock);
639178479Sjb				continue;
640178479Sjb			}
641178479Sjb
642178479Sjb			/*
643178479Sjb			 * If the process stops showing one of the events that
644178479Sjb			 * we are tracing, perform the appropriate response.
645178479Sjb			 * Note that we ignore PR_SUSPENDED, PR_CHECKPOINT, and
646178479Sjb			 * PR_JOBCONTROL by design: if one of these conditions
647178479Sjb			 * occurs, we will fall through to Psetrun() but the
648178479Sjb			 * process will remain stopped in the kernel by the
649178479Sjb			 * corresponding mechanism (e.g. job control stop).
650178479Sjb			 */
651178479Sjb			if (psp->pr_why == PR_FAULTED && psp->pr_what == FLTBPT)
652178479Sjb				dt_proc_bpmatch(dtp, dpr);
653178479Sjb			else if (psp->pr_why == PR_SYSENTRY &&
654178479Sjb			    IS_SYS_FORK(psp->pr_what))
655178479Sjb				dt_proc_bpdisable(dpr);
656178479Sjb			else if (psp->pr_why == PR_SYSEXIT &&
657178479Sjb			    IS_SYS_FORK(psp->pr_what))
658178479Sjb				dt_proc_bpenable(dpr);
659178479Sjb			else if (psp->pr_why == PR_SYSEXIT &&
660178479Sjb			    IS_SYS_EXEC(psp->pr_what))
661178479Sjb				dt_proc_attach(dpr, B_TRUE);
662178479Sjb			break;
663178479Sjb
664178479Sjb		case PS_LOST:
665277300Ssmh#ifdef illumos
666178479Sjb			if (Preopen(P) == 0)
667178479Sjb				goto pwait_locked;
668178565Sjb#endif
669178479Sjb
670178479Sjb			dt_dprintf("pid %d: proc lost: %s\n",
671178479Sjb			    pid, strerror(errno));
672178479Sjb
673178479Sjb			dpr->dpr_quit = B_TRUE;
674178479Sjb			notify = B_TRUE;
675178479Sjb			break;
676178479Sjb
677178479Sjb		case PS_UNDEAD:
678178479Sjb			dt_dprintf("pid %d: proc died\n", pid);
679178479Sjb			dpr->dpr_quit = B_TRUE;
680178479Sjb			notify = B_TRUE;
681178479Sjb			break;
682178479Sjb		}
683178479Sjb
684178479Sjb		if (Pstate(P) != PS_UNDEAD && Psetrun(P, 0, 0) == -1) {
685178479Sjb			dt_dprintf("pid %d: failed to set running: %s\n",
686178479Sjb			    (int)dpr->dpr_pid, strerror(errno));
687178479Sjb		}
688178479Sjb
689178479Sjb		(void) pthread_mutex_unlock(&dpr->dpr_lock);
690178479Sjb	}
691178479Sjb
692178479Sjb	/*
693178479Sjb	 * If the control thread detected PS_UNDEAD or PS_LOST, then enqueue
694178479Sjb	 * the dt_proc_t structure on the dt_proc_hash_t notification list.
695178479Sjb	 */
696178479Sjb	if (notify)
697178479Sjb		dt_proc_notify(dtp, dph, dpr, NULL);
698178479Sjb
699178479Sjb	/*
700178479Sjb	 * Destroy and remove any remaining breakpoints, set dpr_done and clear
701178479Sjb	 * dpr_tid to indicate the control thread has exited, and notify any
702178479Sjb	 * waiting thread in dt_proc_destroy() that we have succesfully exited.
703178479Sjb	 */
704178479Sjb	(void) pthread_mutex_lock(&dpr->dpr_lock);
705178479Sjb
706178479Sjb	dt_proc_bpdestroy(dpr, B_TRUE);
707178479Sjb	dpr->dpr_done = B_TRUE;
708178479Sjb	dpr->dpr_tid = 0;
709178479Sjb
710178479Sjb	(void) pthread_cond_broadcast(&dpr->dpr_cv);
711178479Sjb	(void) pthread_mutex_unlock(&dpr->dpr_lock);
712178479Sjb
713178479Sjb	return (NULL);
714178479Sjb}
715178479Sjb
716178479Sjb/*PRINTFLIKE3*/
717178479Sjbstatic struct ps_prochandle *
718178479Sjbdt_proc_error(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *format, ...)
719178479Sjb{
720178479Sjb	va_list ap;
721178479Sjb
722178479Sjb	va_start(ap, format);
723178479Sjb	dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap);
724178479Sjb	va_end(ap);
725178479Sjb
726178479Sjb	if (dpr->dpr_proc != NULL)
727178479Sjb		Prelease(dpr->dpr_proc, 0);
728178479Sjb
729178479Sjb	dt_free(dtp, dpr);
730178479Sjb	(void) dt_set_errno(dtp, EDT_COMPILER);
731178479Sjb	return (NULL);
732178479Sjb}
733178479Sjb
734178479Sjbdt_proc_t *
735178479Sjbdt_proc_lookup(dtrace_hdl_t *dtp, struct ps_prochandle *P, int remove)
736178479Sjb{
737178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
738277300Ssmh#ifdef illumos
739178479Sjb	pid_t pid = Pstatus(P)->pr_pid;
740178565Sjb#else
741178565Sjb	pid_t pid = proc_getpid(P);
742178565Sjb#endif
743178479Sjb	dt_proc_t *dpr, **dpp = &dph->dph_hash[pid & (dph->dph_hashlen - 1)];
744178479Sjb
745178479Sjb	for (dpr = *dpp; dpr != NULL; dpr = dpr->dpr_hash) {
746178479Sjb		if (dpr->dpr_pid == pid)
747178479Sjb			break;
748178479Sjb		else
749178479Sjb			dpp = &dpr->dpr_hash;
750178479Sjb	}
751178479Sjb
752178479Sjb	assert(dpr != NULL);
753178479Sjb	assert(dpr->dpr_proc == P);
754178479Sjb
755178479Sjb	if (remove)
756178479Sjb		*dpp = dpr->dpr_hash; /* remove from pid hash chain */
757178479Sjb
758178479Sjb	return (dpr);
759178479Sjb}
760178479Sjb
761178479Sjbstatic void
762178479Sjbdt_proc_destroy(dtrace_hdl_t *dtp, struct ps_prochandle *P)
763178479Sjb{
764178479Sjb	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
765178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
766178479Sjb	dt_proc_notify_t *npr, **npp;
767178479Sjb	int rflag;
768178479Sjb
769178479Sjb	assert(dpr != NULL);
770178479Sjb
771178479Sjb	/*
772178479Sjb	 * If neither PR_KLC nor PR_RLC is set, then the process is stopped by
773178479Sjb	 * an external debugger and we were waiting in dt_proc_waitrun().
774178479Sjb	 * Leave the process in this condition using PRELEASE_HANG.
775178479Sjb	 */
776277300Ssmh#ifdef illumos
777178479Sjb	if (!(Pstatus(dpr->dpr_proc)->pr_flags & (PR_KLC | PR_RLC))) {
778178565Sjb#else
779178565Sjb	if (!(proc_getflags(dpr->dpr_proc) & (PR_KLC | PR_RLC))) {
780178565Sjb#endif
781178479Sjb		dt_dprintf("abandoning pid %d\n", (int)dpr->dpr_pid);
782210767Srpaulo		rflag = PRELEASE_HANG;
783277300Ssmh#ifdef illumos
784210767Srpaulo	} else if (Pstatus(dpr->dpr_proc)->pr_flags & PR_KLC) {
785178565Sjb#else
786210767Srpaulo	} else if (proc_getflags(dpr->dpr_proc) & PR_KLC) {
787178565Sjb#endif
788210767Srpaulo		dt_dprintf("killing pid %d\n", (int)dpr->dpr_pid);
789210767Srpaulo		rflag = PRELEASE_KILL; /* apply kill-on-last-close */
790178479Sjb	} else {
791178479Sjb		dt_dprintf("releasing pid %d\n", (int)dpr->dpr_pid);
792210767Srpaulo		rflag = 0; /* apply run-on-last-close */
793178479Sjb	}
794178479Sjb
795178479Sjb	if (dpr->dpr_tid) {
796178479Sjb		/*
797178479Sjb		 * Set the dpr_quit flag to tell the daemon thread to exit.  We
798178479Sjb		 * send it a SIGCANCEL to poke it out of PCWSTOP or any other
799178479Sjb		 * long-term /proc system call.  Our daemon threads have POSIX
800178479Sjb		 * cancellation disabled, so EINTR will be the only effect.  We
801178479Sjb		 * then wait for dpr_done to indicate the thread has exited.
802178479Sjb		 *
803178479Sjb		 * We can't use pthread_kill() to send SIGCANCEL because the
804178479Sjb		 * interface forbids it and we can't use pthread_cancel()
805178479Sjb		 * because with cancellation disabled it won't actually
806178479Sjb		 * send SIGCANCEL to the target thread, so we use _lwp_kill()
807178479Sjb		 * to do the job.  This is all built on evil knowledge of
808178479Sjb		 * the details of the cancellation mechanism in libc.
809178479Sjb		 */
810178479Sjb		(void) pthread_mutex_lock(&dpr->dpr_lock);
811178479Sjb		dpr->dpr_quit = B_TRUE;
812277300Ssmh#ifdef illumos
813178479Sjb		(void) _lwp_kill(dpr->dpr_tid, SIGCANCEL);
814178565Sjb#else
815234234Sgnn		pthread_kill(dpr->dpr_tid, SIGTHR);
816178565Sjb#endif
817178479Sjb
818178479Sjb		/*
819178479Sjb		 * If the process is currently idling in dt_proc_stop(), re-
820178479Sjb		 * enable breakpoints and poke it into running again.
821178479Sjb		 */
822178479Sjb		if (dpr->dpr_stop & DT_PROC_STOP_IDLE) {
823178479Sjb			dt_proc_bpenable(dpr);
824178479Sjb			dpr->dpr_stop &= ~DT_PROC_STOP_IDLE;
825178479Sjb			(void) pthread_cond_broadcast(&dpr->dpr_cv);
826178479Sjb		}
827178479Sjb
828178479Sjb		while (!dpr->dpr_done)
829178479Sjb			(void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock);
830178479Sjb
831178479Sjb		(void) pthread_mutex_unlock(&dpr->dpr_lock);
832178479Sjb	}
833178479Sjb
834178479Sjb	/*
835178479Sjb	 * Before we free the process structure, remove this dt_proc_t from the
836178479Sjb	 * lookup hash, and then walk the dt_proc_hash_t's notification list
837178479Sjb	 * and remove this dt_proc_t if it is enqueued.
838178479Sjb	 */
839178479Sjb	(void) pthread_mutex_lock(&dph->dph_lock);
840178479Sjb	(void) dt_proc_lookup(dtp, P, B_TRUE);
841178479Sjb	npp = &dph->dph_notify;
842178479Sjb
843178479Sjb	while ((npr = *npp) != NULL) {
844178479Sjb		if (npr->dprn_dpr == dpr) {
845178479Sjb			*npp = npr->dprn_next;
846178479Sjb			dt_free(dtp, npr);
847178479Sjb		} else {
848178479Sjb			npp = &npr->dprn_next;
849178479Sjb		}
850178479Sjb	}
851178479Sjb
852178479Sjb	(void) pthread_mutex_unlock(&dph->dph_lock);
853178479Sjb
854178479Sjb	/*
855178479Sjb	 * Remove the dt_proc_list from the LRU list, release the underlying
856178479Sjb	 * libproc handle, and free our dt_proc_t data structure.
857178479Sjb	 */
858178479Sjb	if (dpr->dpr_cacheable) {
859178479Sjb		assert(dph->dph_lrucnt != 0);
860178479Sjb		dph->dph_lrucnt--;
861178479Sjb	}
862178479Sjb
863178479Sjb	dt_list_delete(&dph->dph_lrulist, dpr);
864178479Sjb	Prelease(dpr->dpr_proc, rflag);
865178479Sjb	dt_free(dtp, dpr);
866178479Sjb}
867178479Sjb
868178479Sjbstatic int
869178479Sjbdt_proc_create_thread(dtrace_hdl_t *dtp, dt_proc_t *dpr, uint_t stop)
870178479Sjb{
871178479Sjb	dt_proc_control_data_t data;
872178479Sjb	sigset_t nset, oset;
873178479Sjb	pthread_attr_t a;
874178479Sjb	int err;
875178479Sjb
876178479Sjb	(void) pthread_mutex_lock(&dpr->dpr_lock);
877178479Sjb	dpr->dpr_stop |= stop; /* set bit for initial rendezvous */
878178479Sjb
879178479Sjb	(void) pthread_attr_init(&a);
880178479Sjb	(void) pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED);
881178479Sjb
882178479Sjb	(void) sigfillset(&nset);
883178479Sjb	(void) sigdelset(&nset, SIGABRT);	/* unblocked for assert() */
884277300Ssmh#ifdef illumos
885178479Sjb	(void) sigdelset(&nset, SIGCANCEL);	/* see dt_proc_destroy() */
886178565Sjb#else
887178565Sjb	(void) sigdelset(&nset, SIGUSR1);	/* see dt_proc_destroy() */
888178565Sjb#endif
889178479Sjb
890178479Sjb	data.dpcd_hdl = dtp;
891178479Sjb	data.dpcd_proc = dpr;
892178479Sjb
893178479Sjb	(void) pthread_sigmask(SIG_SETMASK, &nset, &oset);
894178479Sjb	err = pthread_create(&dpr->dpr_tid, &a, dt_proc_control, &data);
895178479Sjb	(void) pthread_sigmask(SIG_SETMASK, &oset, NULL);
896178479Sjb
897178479Sjb	/*
898178479Sjb	 * If the control thread was created, then wait on dpr_cv for either
899178479Sjb	 * dpr_done to be set (the victim died or the control thread failed)
900178479Sjb	 * or DT_PROC_STOP_IDLE to be set, indicating that the victim is now
901178479Sjb	 * stopped by /proc and the control thread is at the rendezvous event.
902178479Sjb	 * On success, we return with the process and control thread stopped:
903178479Sjb	 * the caller can then apply dt_proc_continue() to resume both.
904178479Sjb	 */
905178479Sjb	if (err == 0) {
906178479Sjb		while (!dpr->dpr_done && !(dpr->dpr_stop & DT_PROC_STOP_IDLE))
907178479Sjb			(void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock);
908178479Sjb
909178479Sjb		/*
910178479Sjb		 * If dpr_done is set, the control thread aborted before it
911178479Sjb		 * reached the rendezvous event.  This is either due to PS_LOST
912178479Sjb		 * or PS_UNDEAD (i.e. the process died).  We try to provide a
913178479Sjb		 * small amount of useful information to help figure it out.
914178479Sjb		 */
915178479Sjb		if (dpr->dpr_done) {
916277300Ssmh#ifdef illumos
917178479Sjb			const psinfo_t *prp = Ppsinfo(dpr->dpr_proc);
918178479Sjb			int stat = prp ? prp->pr_wstat : 0;
919178479Sjb			int pid = dpr->dpr_pid;
920178565Sjb#else
921211554Srpaulo			int stat = proc_getwstat(dpr->dpr_proc);
922211554Srpaulo			int pid = proc_getpid(dpr->dpr_proc);
923211554Srpaulo#endif
924178565Sjb			if (proc_state(dpr->dpr_proc) == PS_LOST) {
925178479Sjb				(void) dt_proc_error(dpr->dpr_hdl, dpr,
926178479Sjb				    "failed to control pid %d: process exec'd "
927178479Sjb				    "set-id or unobservable program\n", pid);
928178479Sjb			} else if (WIFSIGNALED(stat)) {
929178479Sjb				(void) dt_proc_error(dpr->dpr_hdl, dpr,
930178479Sjb				    "failed to control pid %d: process died "
931178479Sjb				    "from signal %d\n", pid, WTERMSIG(stat));
932178479Sjb			} else {
933178479Sjb				(void) dt_proc_error(dpr->dpr_hdl, dpr,
934178479Sjb				    "failed to control pid %d: process exited "
935178479Sjb				    "with status %d\n", pid, WEXITSTATUS(stat));
936178479Sjb			}
937178479Sjb
938178479Sjb			err = ESRCH; /* cause grab() or create() to fail */
939178479Sjb		}
940178479Sjb	} else {
941178479Sjb		(void) dt_proc_error(dpr->dpr_hdl, dpr,
942178479Sjb		    "failed to create control thread for process-id %d: %s\n",
943178479Sjb		    (int)dpr->dpr_pid, strerror(err));
944178479Sjb	}
945178479Sjb
946238979Sgnn	if (err == 0)
947238979Sgnn		(void) pthread_mutex_unlock(&dpr->dpr_lock);
948178479Sjb	(void) pthread_attr_destroy(&a);
949178479Sjb
950178479Sjb	return (err);
951178479Sjb}
952178479Sjb
953178479Sjbstruct ps_prochandle *
954184696Srodrigcdt_proc_create(dtrace_hdl_t *dtp, const char *file, char *const *argv,
955184696Srodrigc    proc_child_func *pcf, void *child_arg)
956178479Sjb{
957178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
958178479Sjb	dt_proc_t *dpr;
959178479Sjb	int err;
960178479Sjb
961178479Sjb	if ((dpr = dt_zalloc(dtp, sizeof (dt_proc_t))) == NULL)
962178479Sjb		return (NULL); /* errno is set for us */
963178479Sjb
964178479Sjb	(void) pthread_mutex_init(&dpr->dpr_lock, NULL);
965178479Sjb	(void) pthread_cond_init(&dpr->dpr_cv, NULL);
966178479Sjb
967277300Ssmh#ifdef illumos
968249573Spfg	if ((dpr->dpr_proc = Pcreate(file, argv, &err, NULL, 0)) == NULL) {
969211554Srpaulo#else
970211554Srpaulo	if ((err = proc_create(file, argv, pcf, child_arg,
971211554Srpaulo	    &dpr->dpr_proc)) != 0) {
972211554Srpaulo#endif
973178479Sjb		return (dt_proc_error(dtp, dpr,
974178479Sjb		    "failed to execute %s: %s\n", file, Pcreate_error(err)));
975178479Sjb	}
976178479Sjb
977178479Sjb	dpr->dpr_hdl = dtp;
978277300Ssmh#ifdef illumos
979178479Sjb	dpr->dpr_pid = Pstatus(dpr->dpr_proc)->pr_pid;
980178565Sjb#else
981178565Sjb	dpr->dpr_pid = proc_getpid(dpr->dpr_proc);
982178565Sjb#endif
983178479Sjb
984211554Srpaulo	(void) Punsetflags(dpr->dpr_proc, PR_RLC);
985211554Srpaulo	(void) Psetflags(dpr->dpr_proc, PR_KLC);
986211554Srpaulo
987178479Sjb	if (dt_proc_create_thread(dtp, dpr, dtp->dt_prcmode) != 0)
988178479Sjb		return (NULL); /* dt_proc_error() has been called for us */
989178479Sjb
990178479Sjb	dpr->dpr_hash = dph->dph_hash[dpr->dpr_pid & (dph->dph_hashlen - 1)];
991178479Sjb	dph->dph_hash[dpr->dpr_pid & (dph->dph_hashlen - 1)] = dpr;
992178479Sjb	dt_list_prepend(&dph->dph_lrulist, dpr);
993178479Sjb
994178479Sjb	dt_dprintf("created pid %d\n", (int)dpr->dpr_pid);
995178479Sjb	dpr->dpr_refs++;
996178479Sjb
997178479Sjb	return (dpr->dpr_proc);
998178479Sjb}
999178479Sjb
1000178479Sjbstruct ps_prochandle *
1001178479Sjbdt_proc_grab(dtrace_hdl_t *dtp, pid_t pid, int flags, int nomonitor)
1002178479Sjb{
1003178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
1004178479Sjb	uint_t h = pid & (dph->dph_hashlen - 1);
1005178479Sjb	dt_proc_t *dpr, *opr;
1006178479Sjb	int err;
1007178479Sjb
1008178479Sjb	/*
1009178479Sjb	 * Search the hash table for the pid.  If it is already grabbed or
1010178479Sjb	 * created, move the handle to the front of the lrulist, increment
1011178479Sjb	 * the reference count, and return the existing ps_prochandle.
1012178479Sjb	 */
1013178479Sjb	for (dpr = dph->dph_hash[h]; dpr != NULL; dpr = dpr->dpr_hash) {
1014178479Sjb		if (dpr->dpr_pid == pid && !dpr->dpr_stale) {
1015178479Sjb			/*
1016178479Sjb			 * If the cached handle was opened read-only and
1017178479Sjb			 * this request is for a writeable handle, mark
1018178479Sjb			 * the cached handle as stale and open a new handle.
1019178479Sjb			 * Since it's stale, unmark it as cacheable.
1020178479Sjb			 */
1021178479Sjb			if (dpr->dpr_rdonly && !(flags & PGRAB_RDONLY)) {
1022178479Sjb				dt_dprintf("upgrading pid %d\n", (int)pid);
1023178479Sjb				dpr->dpr_stale = B_TRUE;
1024178479Sjb				dpr->dpr_cacheable = B_FALSE;
1025178479Sjb				dph->dph_lrucnt--;
1026178479Sjb				break;
1027178479Sjb			}
1028178479Sjb
1029178479Sjb			dt_dprintf("grabbed pid %d (cached)\n", (int)pid);
1030178479Sjb			dt_list_delete(&dph->dph_lrulist, dpr);
1031178479Sjb			dt_list_prepend(&dph->dph_lrulist, dpr);
1032178479Sjb			dpr->dpr_refs++;
1033178479Sjb			return (dpr->dpr_proc);
1034178479Sjb		}
1035178479Sjb	}
1036178479Sjb
1037178479Sjb	if ((dpr = dt_zalloc(dtp, sizeof (dt_proc_t))) == NULL)
1038178479Sjb		return (NULL); /* errno is set for us */
1039178479Sjb
1040178479Sjb	(void) pthread_mutex_init(&dpr->dpr_lock, NULL);
1041178479Sjb	(void) pthread_cond_init(&dpr->dpr_cv, NULL);
1042178479Sjb
1043277300Ssmh#ifdef illumos
1044178479Sjb	if ((dpr->dpr_proc = Pgrab(pid, flags, &err)) == NULL) {
1045211554Srpaulo#else
1046211554Srpaulo	if ((err = proc_attach(pid, flags, &dpr->dpr_proc)) != 0) {
1047211554Srpaulo#endif
1048178479Sjb		return (dt_proc_error(dtp, dpr,
1049178479Sjb		    "failed to grab pid %d: %s\n", (int)pid, Pgrab_error(err)));
1050178479Sjb	}
1051178479Sjb
1052178479Sjb	dpr->dpr_hdl = dtp;
1053178479Sjb	dpr->dpr_pid = pid;
1054178479Sjb
1055178479Sjb	(void) Punsetflags(dpr->dpr_proc, PR_KLC);
1056178479Sjb	(void) Psetflags(dpr->dpr_proc, PR_RLC);
1057178479Sjb
1058178479Sjb	/*
1059178479Sjb	 * If we are attempting to grab the process without a monitor
1060178479Sjb	 * thread, then mark the process cacheable only if it's being
1061178479Sjb	 * grabbed read-only.  If we're currently caching more process
1062178479Sjb	 * handles than dph_lrulim permits, attempt to find the
1063178479Sjb	 * least-recently-used handle that is currently unreferenced and
1064178479Sjb	 * release it from the cache.  Otherwise we are grabbing the process
1065178479Sjb	 * for control: create a control thread for this process and store
1066178479Sjb	 * its ID in dpr->dpr_tid.
1067178479Sjb	 */
1068178479Sjb	if (nomonitor || (flags & PGRAB_RDONLY)) {
1069178479Sjb		if (dph->dph_lrucnt >= dph->dph_lrulim) {
1070178479Sjb			for (opr = dt_list_prev(&dph->dph_lrulist);
1071178479Sjb			    opr != NULL; opr = dt_list_prev(opr)) {
1072178479Sjb				if (opr->dpr_cacheable && opr->dpr_refs == 0) {
1073178479Sjb					dt_proc_destroy(dtp, opr->dpr_proc);
1074178479Sjb					break;
1075178479Sjb				}
1076178479Sjb			}
1077178479Sjb		}
1078178479Sjb
1079178479Sjb		if (flags & PGRAB_RDONLY) {
1080178479Sjb			dpr->dpr_cacheable = B_TRUE;
1081178479Sjb			dpr->dpr_rdonly = B_TRUE;
1082178479Sjb			dph->dph_lrucnt++;
1083178479Sjb		}
1084178479Sjb
1085178479Sjb	} else if (dt_proc_create_thread(dtp, dpr, DT_PROC_STOP_GRAB) != 0)
1086178479Sjb		return (NULL); /* dt_proc_error() has been called for us */
1087178479Sjb
1088178479Sjb	dpr->dpr_hash = dph->dph_hash[h];
1089178479Sjb	dph->dph_hash[h] = dpr;
1090178479Sjb	dt_list_prepend(&dph->dph_lrulist, dpr);
1091178479Sjb
1092178479Sjb	dt_dprintf("grabbed pid %d\n", (int)pid);
1093178479Sjb	dpr->dpr_refs++;
1094178479Sjb
1095178479Sjb	return (dpr->dpr_proc);
1096178479Sjb}
1097178479Sjb
1098178479Sjbvoid
1099178479Sjbdt_proc_release(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1100178479Sjb{
1101178479Sjb	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1102178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
1103178479Sjb
1104178479Sjb	assert(dpr != NULL);
1105178479Sjb	assert(dpr->dpr_refs != 0);
1106178479Sjb
1107178479Sjb	if (--dpr->dpr_refs == 0 &&
1108178479Sjb	    (!dpr->dpr_cacheable || dph->dph_lrucnt > dph->dph_lrulim))
1109178479Sjb		dt_proc_destroy(dtp, P);
1110178479Sjb}
1111178479Sjb
1112178479Sjbvoid
1113178479Sjbdt_proc_continue(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1114178479Sjb{
1115178479Sjb	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1116178479Sjb
1117178479Sjb	(void) pthread_mutex_lock(&dpr->dpr_lock);
1118178479Sjb
1119178479Sjb	if (dpr->dpr_stop & DT_PROC_STOP_IDLE) {
1120178479Sjb		dpr->dpr_stop &= ~DT_PROC_STOP_IDLE;
1121178479Sjb		(void) pthread_cond_broadcast(&dpr->dpr_cv);
1122178479Sjb	}
1123178479Sjb
1124178479Sjb	(void) pthread_mutex_unlock(&dpr->dpr_lock);
1125178479Sjb}
1126178479Sjb
1127178479Sjbvoid
1128178479Sjbdt_proc_lock(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1129178479Sjb{
1130178479Sjb	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1131178479Sjb	int err = pthread_mutex_lock(&dpr->dpr_lock);
1132178479Sjb	assert(err == 0); /* check for recursion */
1133178479Sjb}
1134178479Sjb
1135178479Sjbvoid
1136178479Sjbdt_proc_unlock(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1137178479Sjb{
1138178479Sjb	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1139178479Sjb	int err = pthread_mutex_unlock(&dpr->dpr_lock);
1140178479Sjb	assert(err == 0); /* check for unheld lock */
1141178479Sjb}
1142178479Sjb
1143178479Sjbvoid
1144249573Spfgdt_proc_hash_create(dtrace_hdl_t *dtp)
1145178479Sjb{
1146178479Sjb	if ((dtp->dt_procs = dt_zalloc(dtp, sizeof (dt_proc_hash_t) +
1147249573Spfg	    sizeof (dt_proc_t *) * _dtrace_pidbuckets - 1)) != NULL) {
1148178479Sjb
1149249573Spfg		(void) pthread_mutex_init(&dtp->dt_procs->dph_lock, NULL);
1150249573Spfg		(void) pthread_cond_init(&dtp->dt_procs->dph_cv, NULL);
1151178479Sjb
1152249573Spfg		dtp->dt_procs->dph_hashlen = _dtrace_pidbuckets;
1153249573Spfg		dtp->dt_procs->dph_lrulim = _dtrace_pidlrulim;
1154178479Sjb	}
1155178479Sjb}
1156178479Sjb
1157178479Sjbvoid
1158249573Spfgdt_proc_hash_destroy(dtrace_hdl_t *dtp)
1159178479Sjb{
1160178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
1161178479Sjb	dt_proc_t *dpr;
1162178479Sjb
1163178479Sjb	while ((dpr = dt_list_next(&dph->dph_lrulist)) != NULL)
1164178479Sjb		dt_proc_destroy(dtp, dpr->dpr_proc);
1165178479Sjb
1166178479Sjb	dtp->dt_procs = NULL;
1167178479Sjb	dt_free(dtp, dph);
1168178479Sjb}
1169178479Sjb
1170178479Sjbstruct ps_prochandle *
1171184696Srodrigcdtrace_proc_create(dtrace_hdl_t *dtp, const char *file, char *const *argv,
1172184696Srodrigc    proc_child_func *pcf, void *child_arg)
1173178479Sjb{
1174178479Sjb	dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target");
1175184696Srodrigc	struct ps_prochandle *P = dt_proc_create(dtp, file, argv, pcf, child_arg);
1176178479Sjb
1177211554Srpaulo	if (P != NULL && idp != NULL && idp->di_id == 0) {
1178277300Ssmh#ifdef illumos
1179178479Sjb		idp->di_id = Pstatus(P)->pr_pid; /* $target = created pid */
1180178565Sjb#else
1181178565Sjb		idp->di_id = proc_getpid(P); /* $target = created pid */
1182178565Sjb#endif
1183211554Srpaulo	}
1184178479Sjb
1185178479Sjb	return (P);
1186178479Sjb}
1187178479Sjb
1188178479Sjbstruct ps_prochandle *
1189178479Sjbdtrace_proc_grab(dtrace_hdl_t *dtp, pid_t pid, int flags)
1190178479Sjb{
1191178479Sjb	dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target");
1192178479Sjb	struct ps_prochandle *P = dt_proc_grab(dtp, pid, flags, 0);
1193178479Sjb
1194178479Sjb	if (P != NULL && idp != NULL && idp->di_id == 0)
1195178479Sjb		idp->di_id = pid; /* $target = grabbed pid */
1196178479Sjb
1197178479Sjb	return (P);
1198178479Sjb}
1199178479Sjb
1200178479Sjbvoid
1201178479Sjbdtrace_proc_release(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1202178479Sjb{
1203178479Sjb	dt_proc_release(dtp, P);
1204178479Sjb}
1205178479Sjb
1206178479Sjbvoid
1207178479Sjbdtrace_proc_continue(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1208178479Sjb{
1209178479Sjb	dt_proc_continue(dtp, P);
1210178479Sjb}
1211