dt_proc.c revision 210695
1178479Sjb/*
2178479Sjb * CDDL HEADER START
3178479Sjb *
4178479Sjb * The contents of this file are subject to the terms of the
5178479Sjb * Common Development and Distribution License (the "License").
6178479Sjb * You may not use this file except in compliance with the License.
7178479Sjb *
8178479Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9178479Sjb * or http://www.opensolaris.org/os/licensing.
10178479Sjb * See the License for the specific language governing permissions
11178479Sjb * and limitations under the License.
12178479Sjb *
13178479Sjb * When distributing Covered Code, include this CDDL HEADER in each
14178479Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15178479Sjb * If applicable, add the following below this CDDL HEADER, with the
16178479Sjb * fields enclosed by brackets "[]" replaced with your own identifying
17178479Sjb * information: Portions Copyright [yyyy] [name of copyright owner]
18178479Sjb *
19178479Sjb * CDDL HEADER END
20178479Sjb */
21178479Sjb
22178479Sjb/*
23178479Sjb * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24178479Sjb * Use is subject to license terms.
25178479Sjb */
26178479Sjb
27178479Sjb#pragma ident	"%Z%%M%	%I%	%E% SMI"
28178479Sjb
29178479Sjb/*
30178479Sjb * DTrace Process Control
31178479Sjb *
32178479Sjb * This file provides a set of routines that permit libdtrace and its clients
33178479Sjb * to create and grab process handles using libproc, and to share these handles
34178479Sjb * between library mechanisms that need libproc access, such as ustack(), and
35178479Sjb * client mechanisms that need libproc access, such as dtrace(1M) -c and -p.
36178479Sjb * The library provides several mechanisms in the libproc control layer:
37178479Sjb *
38178479Sjb * Reference Counting: The library code and client code can independently grab
39178479Sjb * the same process handles without interfering with one another.  Only when
40178479Sjb * the reference count drops to zero and the handle is not being cached (see
41178479Sjb * below for more information on caching) will Prelease() be called on it.
42178479Sjb *
43178479Sjb * Handle Caching: If a handle is grabbed PGRAB_RDONLY (e.g. by ustack()) and
44178479Sjb * the reference count drops to zero, the handle is not immediately released.
45178479Sjb * Instead, libproc handles are maintained on dph_lrulist in order from most-
46178479Sjb * recently accessed to least-recently accessed.  Idle handles are maintained
47178479Sjb * until a pre-defined LRU cache limit is exceeded, permitting repeated calls
48178479Sjb * to ustack() to avoid the overhead of releasing and re-grabbing processes.
49178479Sjb *
50178479Sjb * Process Control: For processes that are grabbed for control (~PGRAB_RDONLY)
51178479Sjb * or created by dt_proc_create(), a control thread is created to provide
52178479Sjb * callbacks on process exit and symbol table caching on dlopen()s.
53178479Sjb *
54178479Sjb * MT-Safety: Libproc is not MT-Safe, so dt_proc_lock() and dt_proc_unlock()
55178479Sjb * are provided to synchronize access to the libproc handle between libdtrace
56178479Sjb * code and client code and the control thread's use of the ps_prochandle.
57178479Sjb *
58178479Sjb * NOTE: MT-Safety is NOT provided for libdtrace itself, or for use of the
59178479Sjb * dtrace_proc_grab/dtrace_proc_create mechanisms.  Like all exported libdtrace
60178479Sjb * calls, these are assumed to be MT-Unsafe.  MT-Safety is ONLY provided for
61178479Sjb * synchronization between libdtrace control threads and the client thread.
62178479Sjb *
63178479Sjb * The ps_prochandles themselves are maintained along with a dt_proc_t struct
64178479Sjb * in a hash table indexed by PID.  This provides basic locking and reference
65178479Sjb * counting.  The dt_proc_t is also maintained in LRU order on dph_lrulist.
66178479Sjb * The dph_lrucnt and dph_lrulim count the number of cacheable processes and
67178479Sjb * the current limit on the number of actively cached entries.
68178479Sjb *
69178479Sjb * The control thread for a process establishes breakpoints at the rtld_db
70178479Sjb * locations of interest, updates mappings and symbol tables at these points,
71178479Sjb * and handles exec and fork (by always following the parent).  The control
72178479Sjb * thread automatically exits when the process dies or control is lost.
73178479Sjb *
74178479Sjb * A simple notification mechanism is provided for libdtrace clients using
75178479Sjb * dtrace_handle_proc() for notification of PS_UNDEAD or PS_LOST events.  If
76178479Sjb * such an event occurs, the dt_proc_t itself is enqueued on a notification
77178479Sjb * list and the control thread broadcasts to dph_cv.  dtrace_sleep() will wake
78178479Sjb * up using this condition and will then call the client handler as necessary.
79178479Sjb */
80178479Sjb
81178479Sjb#include <sys/wait.h>
82178565Sjb#if defined(sun)
83178479Sjb#include <sys/lwp.h>
84178565Sjb#endif
85178479Sjb#include <strings.h>
86178479Sjb#include <signal.h>
87178479Sjb#include <assert.h>
88178479Sjb#include <errno.h>
89178479Sjb
90178479Sjb#include <dt_proc.h>
91178479Sjb#include <dt_pid.h>
92178479Sjb#include <dt_impl.h>
93178479Sjb
94178479Sjb#define	IS_SYS_EXEC(w)	(w == SYS_exec || w == SYS_execve)
95178479Sjb#define	IS_SYS_FORK(w)	(w == SYS_vfork || w == SYS_fork1 ||	\
96178479Sjb			w == SYS_forkall || w == SYS_forksys)
97178479Sjb
98178565Sjb#ifdef DOODAD
99178479Sjbstatic dt_bkpt_t *
100178479Sjbdt_proc_bpcreate(dt_proc_t *dpr, uintptr_t addr, dt_bkpt_f *func, void *data)
101178479Sjb{
102178479Sjb	struct ps_prochandle *P = dpr->dpr_proc;
103178479Sjb	dt_bkpt_t *dbp;
104178479Sjb
105178479Sjb	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
106178479Sjb
107178479Sjb	if ((dbp = dt_zalloc(dpr->dpr_hdl, sizeof (dt_bkpt_t))) != NULL) {
108178479Sjb		dbp->dbp_func = func;
109178479Sjb		dbp->dbp_data = data;
110178479Sjb		dbp->dbp_addr = addr;
111178479Sjb
112178479Sjb		if (Psetbkpt(P, dbp->dbp_addr, &dbp->dbp_instr) == 0)
113178479Sjb			dbp->dbp_active = B_TRUE;
114178479Sjb
115178479Sjb		dt_list_append(&dpr->dpr_bps, dbp);
116178479Sjb	}
117178479Sjb
118178479Sjb	return (dbp);
119178479Sjb}
120178565Sjb#endif
121178479Sjb
122178479Sjbstatic void
123178479Sjbdt_proc_bpdestroy(dt_proc_t *dpr, int delbkpts)
124178479Sjb{
125178565Sjb#if defined(sun)
126178479Sjb	int state = Pstate(dpr->dpr_proc);
127178565Sjb#else
128178565Sjb	int state = proc_state(dpr->dpr_proc);
129178565Sjb#endif
130178479Sjb	dt_bkpt_t *dbp, *nbp;
131178479Sjb
132178479Sjb	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
133178479Sjb
134178479Sjb	for (dbp = dt_list_next(&dpr->dpr_bps); dbp != NULL; dbp = nbp) {
135178565Sjbprintf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
136178565Sjb#ifdef DOODAD
137178479Sjb		if (delbkpts && dbp->dbp_active &&
138178479Sjb		    state != PS_LOST && state != PS_UNDEAD) {
139178479Sjb			(void) Pdelbkpt(dpr->dpr_proc,
140178479Sjb			    dbp->dbp_addr, dbp->dbp_instr);
141178479Sjb		}
142178565Sjb#endif
143178479Sjb		nbp = dt_list_next(dbp);
144178479Sjb		dt_list_delete(&dpr->dpr_bps, dbp);
145178479Sjb		dt_free(dpr->dpr_hdl, dbp);
146178479Sjb	}
147178479Sjb}
148178479Sjb
149178565Sjb#ifdef DOODAD
150178479Sjbstatic void
151178479Sjbdt_proc_bpmatch(dtrace_hdl_t *dtp, dt_proc_t *dpr)
152178479Sjb{
153178479Sjb	const lwpstatus_t *psp = &Pstatus(dpr->dpr_proc)->pr_lwp;
154178479Sjb	dt_bkpt_t *dbp;
155178479Sjb
156178479Sjb	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
157178479Sjb
158178479Sjb	for (dbp = dt_list_next(&dpr->dpr_bps);
159178479Sjb	    dbp != NULL; dbp = dt_list_next(dbp)) {
160178479Sjb		if (psp->pr_reg[R_PC] == dbp->dbp_addr)
161178479Sjb			break;
162178479Sjb	}
163178479Sjb
164178479Sjb	if (dbp == NULL) {
165178479Sjb		dt_dprintf("pid %d: spurious breakpoint wakeup for %lx\n",
166178479Sjb		    (int)dpr->dpr_pid, (ulong_t)psp->pr_reg[R_PC]);
167178479Sjb		return;
168178479Sjb	}
169178479Sjb
170178479Sjb	dt_dprintf("pid %d: hit breakpoint at %lx (%lu)\n",
171178479Sjb	    (int)dpr->dpr_pid, (ulong_t)dbp->dbp_addr, ++dbp->dbp_hits);
172178479Sjb
173178479Sjb	dbp->dbp_func(dtp, dpr, dbp->dbp_data);
174178479Sjb	(void) Pxecbkpt(dpr->dpr_proc, dbp->dbp_instr);
175178479Sjb}
176178565Sjb#endif
177178479Sjb
178178479Sjbstatic void
179178479Sjbdt_proc_bpenable(dt_proc_t *dpr)
180178479Sjb{
181178479Sjb	dt_bkpt_t *dbp;
182178479Sjb
183178479Sjb	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
184178479Sjb
185178479Sjb	for (dbp = dt_list_next(&dpr->dpr_bps);
186178479Sjb	    dbp != NULL; dbp = dt_list_next(dbp)) {
187178565Sjbprintf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
188178565Sjb#ifdef DOODAD
189178479Sjb		if (!dbp->dbp_active && Psetbkpt(dpr->dpr_proc,
190178479Sjb		    dbp->dbp_addr, &dbp->dbp_instr) == 0)
191178479Sjb			dbp->dbp_active = B_TRUE;
192178565Sjb#endif
193178479Sjb	}
194178479Sjb
195178479Sjb	dt_dprintf("breakpoints enabled\n");
196178479Sjb}
197178479Sjb
198178479Sjbstatic void
199178479Sjbdt_proc_bpdisable(dt_proc_t *dpr)
200178479Sjb{
201178479Sjb	dt_bkpt_t *dbp;
202178479Sjb
203178479Sjb	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
204178479Sjb
205178479Sjb	for (dbp = dt_list_next(&dpr->dpr_bps);
206178479Sjb	    dbp != NULL; dbp = dt_list_next(dbp)) {
207178565Sjbprintf("%s:%s(%d): DOODAD\n",__FUNCTION__,__FILE__,__LINE__);
208178565Sjb#ifdef DOODAD
209178479Sjb		if (dbp->dbp_active && Pdelbkpt(dpr->dpr_proc,
210178479Sjb		    dbp->dbp_addr, dbp->dbp_instr) == 0)
211178479Sjb			dbp->dbp_active = B_FALSE;
212178565Sjb#endif
213178479Sjb	}
214178479Sjb
215178479Sjb	dt_dprintf("breakpoints disabled\n");
216178479Sjb}
217178479Sjb
218178479Sjbstatic void
219178479Sjbdt_proc_notify(dtrace_hdl_t *dtp, dt_proc_hash_t *dph, dt_proc_t *dpr,
220178479Sjb    const char *msg)
221178479Sjb{
222178479Sjb	dt_proc_notify_t *dprn = dt_alloc(dtp, sizeof (dt_proc_notify_t));
223178479Sjb
224178479Sjb	if (dprn == NULL) {
225178479Sjb		dt_dprintf("failed to allocate notification for %d %s\n",
226178479Sjb		    (int)dpr->dpr_pid, msg);
227178479Sjb	} else {
228178479Sjb		dprn->dprn_dpr = dpr;
229178479Sjb		if (msg == NULL)
230178479Sjb			dprn->dprn_errmsg[0] = '\0';
231178479Sjb		else
232178479Sjb			(void) strlcpy(dprn->dprn_errmsg, msg,
233178479Sjb			    sizeof (dprn->dprn_errmsg));
234178479Sjb
235178479Sjb		(void) pthread_mutex_lock(&dph->dph_lock);
236178479Sjb
237178479Sjb		dprn->dprn_next = dph->dph_notify;
238178479Sjb		dph->dph_notify = dprn;
239178479Sjb
240178479Sjb		(void) pthread_cond_broadcast(&dph->dph_cv);
241178479Sjb		(void) pthread_mutex_unlock(&dph->dph_lock);
242178479Sjb	}
243178479Sjb}
244178479Sjb
245178479Sjb/*
246178479Sjb * Check to see if the control thread was requested to stop when the victim
247178479Sjb * process reached a particular event (why) rather than continuing the victim.
248178479Sjb * If 'why' is set in the stop mask, we wait on dpr_cv for dt_proc_continue().
249178479Sjb * If 'why' is not set, this function returns immediately and does nothing.
250178479Sjb */
251178479Sjbstatic void
252178479Sjbdt_proc_stop(dt_proc_t *dpr, uint8_t why)
253178479Sjb{
254178479Sjb	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
255178479Sjb	assert(why != DT_PROC_STOP_IDLE);
256178479Sjb
257178479Sjb	if (dpr->dpr_stop & why) {
258178479Sjb		dpr->dpr_stop |= DT_PROC_STOP_IDLE;
259178479Sjb		dpr->dpr_stop &= ~why;
260178479Sjb
261178479Sjb		(void) pthread_cond_broadcast(&dpr->dpr_cv);
262178479Sjb
263178479Sjb		/*
264178479Sjb		 * We disable breakpoints while stopped to preserve the
265178479Sjb		 * integrity of the program text for both our own disassembly
266178479Sjb		 * and that of the kernel.
267178479Sjb		 */
268178479Sjb		dt_proc_bpdisable(dpr);
269178479Sjb
270178479Sjb		while (dpr->dpr_stop & DT_PROC_STOP_IDLE)
271178479Sjb			(void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock);
272178479Sjb
273178479Sjb		dt_proc_bpenable(dpr);
274178479Sjb	}
275178479Sjb}
276178479Sjb
277178479Sjb/*ARGSUSED*/
278178479Sjbstatic void
279178479Sjbdt_proc_bpmain(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *fname)
280178479Sjb{
281178479Sjb	dt_dprintf("pid %d: breakpoint at %s()\n", (int)dpr->dpr_pid, fname);
282178479Sjb	dt_proc_stop(dpr, DT_PROC_STOP_MAIN);
283178479Sjb}
284178479Sjb
285178565Sjb#if defined(sun)
286178479Sjbstatic void
287178479Sjbdt_proc_rdevent(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *evname)
288178479Sjb{
289178479Sjb	rd_event_msg_t rdm;
290178479Sjb	rd_err_e err;
291178479Sjb
292178479Sjb	if ((err = rd_event_getmsg(dpr->dpr_rtld, &rdm)) != RD_OK) {
293178479Sjb		dt_dprintf("pid %d: failed to get %s event message: %s\n",
294178479Sjb		    (int)dpr->dpr_pid, evname, rd_errstr(err));
295178479Sjb		return;
296178479Sjb	}
297178479Sjb
298178479Sjb	dt_dprintf("pid %d: rtld event %s type=%d state %d\n",
299178479Sjb	    (int)dpr->dpr_pid, evname, rdm.type, rdm.u.state);
300178479Sjb
301178479Sjb	switch (rdm.type) {
302178479Sjb	case RD_DLACTIVITY:
303178479Sjb		if (rdm.u.state != RD_CONSISTENT)
304178479Sjb			break;
305178479Sjb
306178479Sjb		Pupdate_syms(dpr->dpr_proc);
307178479Sjb		if (dt_pid_create_probes_module(dtp, dpr) != 0)
308178479Sjb			dt_proc_notify(dtp, dtp->dt_procs, dpr,
309178479Sjb			    dpr->dpr_errmsg);
310178479Sjb
311178479Sjb		break;
312178479Sjb	case RD_PREINIT:
313178479Sjb		Pupdate_syms(dpr->dpr_proc);
314178479Sjb		dt_proc_stop(dpr, DT_PROC_STOP_PREINIT);
315178479Sjb		break;
316178479Sjb	case RD_POSTINIT:
317178479Sjb		Pupdate_syms(dpr->dpr_proc);
318178479Sjb		dt_proc_stop(dpr, DT_PROC_STOP_POSTINIT);
319178479Sjb		break;
320178479Sjb	}
321178479Sjb}
322178479Sjb
323178479Sjbstatic void
324178479Sjbdt_proc_rdwatch(dt_proc_t *dpr, rd_event_e event, const char *evname)
325178479Sjb{
326178479Sjb	rd_notify_t rdn;
327178479Sjb	rd_err_e err;
328178479Sjb
329178479Sjb	if ((err = rd_event_addr(dpr->dpr_rtld, event, &rdn)) != RD_OK) {
330178479Sjb		dt_dprintf("pid %d: failed to get event address for %s: %s\n",
331178479Sjb		    (int)dpr->dpr_pid, evname, rd_errstr(err));
332178479Sjb		return;
333178479Sjb	}
334178479Sjb
335178479Sjb	if (rdn.type != RD_NOTIFY_BPT) {
336178479Sjb		dt_dprintf("pid %d: event %s has unexpected type %d\n",
337178479Sjb		    (int)dpr->dpr_pid, evname, rdn.type);
338178479Sjb		return;
339178479Sjb	}
340178479Sjb
341178479Sjb	(void) dt_proc_bpcreate(dpr, rdn.u.bptaddr,
342178479Sjb	    (dt_bkpt_f *)dt_proc_rdevent, (void *)evname);
343178479Sjb}
344178479Sjb
345178479Sjb/*
346178479Sjb * Common code for enabling events associated with the run-time linker after
347178479Sjb * attaching to a process or after a victim process completes an exec(2).
348178479Sjb */
349178479Sjbstatic void
350178479Sjbdt_proc_attach(dt_proc_t *dpr, int exec)
351178479Sjb{
352178479Sjb	const pstatus_t *psp = Pstatus(dpr->dpr_proc);
353178479Sjb	rd_err_e err;
354178479Sjb	GElf_Sym sym;
355178479Sjb
356178479Sjb	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
357178479Sjb
358178479Sjb	if (exec) {
359178479Sjb		if (psp->pr_lwp.pr_errno != 0)
360178479Sjb			return; /* exec failed: nothing needs to be done */
361178479Sjb
362178479Sjb		dt_proc_bpdestroy(dpr, B_FALSE);
363178479Sjb		Preset_maps(dpr->dpr_proc);
364178479Sjb	}
365178479Sjb
366178479Sjb	if ((dpr->dpr_rtld = Prd_agent(dpr->dpr_proc)) != NULL &&
367178479Sjb	    (err = rd_event_enable(dpr->dpr_rtld, B_TRUE)) == RD_OK) {
368178479Sjb		dt_proc_rdwatch(dpr, RD_PREINIT, "RD_PREINIT");
369178479Sjb		dt_proc_rdwatch(dpr, RD_POSTINIT, "RD_POSTINIT");
370178479Sjb		dt_proc_rdwatch(dpr, RD_DLACTIVITY, "RD_DLACTIVITY");
371178479Sjb	} else {
372178479Sjb		dt_dprintf("pid %d: failed to enable rtld events: %s\n",
373178479Sjb		    (int)dpr->dpr_pid, dpr->dpr_rtld ? rd_errstr(err) :
374178479Sjb		    "rtld_db agent initialization failed");
375178479Sjb	}
376178479Sjb
377178479Sjb	Pupdate_maps(dpr->dpr_proc);
378178479Sjb
379178479Sjb	if (Pxlookup_by_name(dpr->dpr_proc, LM_ID_BASE,
380178479Sjb	    "a.out", "main", &sym, NULL) == 0) {
381178479Sjb		(void) dt_proc_bpcreate(dpr, (uintptr_t)sym.st_value,
382178479Sjb		    (dt_bkpt_f *)dt_proc_bpmain, "a.out`main");
383178479Sjb	} else {
384178479Sjb		dt_dprintf("pid %d: failed to find a.out`main: %s\n",
385178479Sjb		    (int)dpr->dpr_pid, strerror(errno));
386178479Sjb	}
387178479Sjb}
388178479Sjb
389178479Sjb/*
390178479Sjb * Wait for a stopped process to be set running again by some other debugger.
391178479Sjb * This is typically not required by /proc-based debuggers, since the usual
392178479Sjb * model is that one debugger controls one victim.  But DTrace, as usual, has
393178479Sjb * its own needs: the stop() action assumes that prun(1) or some other tool
394178479Sjb * will be applied to resume the victim process.  This could be solved by
395178479Sjb * adding a PCWRUN directive to /proc, but that seems like overkill unless
396178479Sjb * other debuggers end up needing this functionality, so we implement a cheap
397178479Sjb * equivalent to PCWRUN using the set of existing kernel mechanisms.
398178479Sjb *
399178479Sjb * Our intent is really not just to wait for the victim to run, but rather to
400178479Sjb * wait for it to run and then stop again for a reason other than the current
401178479Sjb * PR_REQUESTED stop.  Since PCWSTOP/Pstopstatus() can be applied repeatedly
402178479Sjb * to a stopped process and will return the same result without affecting the
403178479Sjb * victim, we can just perform these operations repeatedly until Pstate()
404178479Sjb * changes, the representative LWP ID changes, or the stop timestamp advances.
405178479Sjb * dt_proc_control() will then rediscover the new state and continue as usual.
406178479Sjb * When the process is still stopped in the same exact state, we sleep for a
407178479Sjb * brief interval before waiting again so as not to spin consuming CPU cycles.
408178479Sjb */
409178479Sjbstatic void
410178479Sjbdt_proc_waitrun(dt_proc_t *dpr)
411178479Sjb{
412178479Sjb	struct ps_prochandle *P = dpr->dpr_proc;
413178479Sjb	const lwpstatus_t *psp = &Pstatus(P)->pr_lwp;
414178479Sjb
415178479Sjb	int krflag = psp->pr_flags & (PR_KLC | PR_RLC);
416178479Sjb	timestruc_t tstamp = psp->pr_tstamp;
417178479Sjb	lwpid_t lwpid = psp->pr_lwpid;
418178479Sjb
419178479Sjb	const long wstop = PCWSTOP;
420178479Sjb	int pfd = Pctlfd(P);
421178479Sjb
422178479Sjb	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
423178479Sjb	assert(psp->pr_flags & PR_STOPPED);
424178479Sjb	assert(Pstate(P) == PS_STOP);
425178479Sjb
426178479Sjb	/*
427178479Sjb	 * While we are waiting for the victim to run, clear PR_KLC and PR_RLC
428178479Sjb	 * so that if the libdtrace client is killed, the victim stays stopped.
429178479Sjb	 * dt_proc_destroy() will also observe this and perform PRELEASE_HANG.
430178479Sjb	 */
431178479Sjb	(void) Punsetflags(P, krflag);
432178479Sjb	Psync(P);
433178479Sjb
434178479Sjb	(void) pthread_mutex_unlock(&dpr->dpr_lock);
435178479Sjb
436178479Sjb	while (!dpr->dpr_quit) {
437178479Sjb		if (write(pfd, &wstop, sizeof (wstop)) == -1 && errno == EINTR)
438178479Sjb			continue; /* check dpr_quit and continue waiting */
439178479Sjb
440178479Sjb		(void) pthread_mutex_lock(&dpr->dpr_lock);
441178479Sjb		(void) Pstopstatus(P, PCNULL, 0);
442178479Sjb		psp = &Pstatus(P)->pr_lwp;
443178479Sjb
444178479Sjb		/*
445178479Sjb		 * If we've reached a new state, found a new representative, or
446178479Sjb		 * the stop timestamp has changed, restore PR_KLC/PR_RLC to its
447178479Sjb		 * original setting and then return with dpr_lock held.
448178479Sjb		 */
449178479Sjb		if (Pstate(P) != PS_STOP || psp->pr_lwpid != lwpid ||
450178479Sjb		    bcmp(&psp->pr_tstamp, &tstamp, sizeof (tstamp)) != 0) {
451178479Sjb			(void) Psetflags(P, krflag);
452178479Sjb			Psync(P);
453178479Sjb			return;
454178479Sjb		}
455178479Sjb
456178479Sjb		(void) pthread_mutex_unlock(&dpr->dpr_lock);
457178479Sjb		(void) poll(NULL, 0, MILLISEC / 2);
458178479Sjb	}
459178479Sjb
460178479Sjb	(void) pthread_mutex_lock(&dpr->dpr_lock);
461178479Sjb}
462178565Sjb#endif
463178479Sjb
464178479Sjbtypedef struct dt_proc_control_data {
465178479Sjb	dtrace_hdl_t *dpcd_hdl;			/* DTrace handle */
466178479Sjb	dt_proc_t *dpcd_proc;			/* proccess to control */
467178479Sjb} dt_proc_control_data_t;
468178479Sjb
469178479Sjb/*
470178479Sjb * Main loop for all victim process control threads.  We initialize all the
471178479Sjb * appropriate /proc control mechanisms, and then enter a loop waiting for
472178479Sjb * the process to stop on an event or die.  We process any events by calling
473178479Sjb * appropriate subroutines, and exit when the victim dies or we lose control.
474178479Sjb *
475178479Sjb * The control thread synchronizes the use of dpr_proc with other libdtrace
476178479Sjb * threads using dpr_lock.  We hold the lock for all of our operations except
477178479Sjb * waiting while the process is running: this is accomplished by writing a
478178479Sjb * PCWSTOP directive directly to the underlying /proc/<pid>/ctl file.  If the
479178479Sjb * libdtrace client wishes to exit or abort our wait, SIGCANCEL can be used.
480178479Sjb */
481178479Sjbstatic void *
482178479Sjbdt_proc_control(void *arg)
483178479Sjb{
484178479Sjb	dt_proc_control_data_t *datap = arg;
485178479Sjb	dtrace_hdl_t *dtp = datap->dpcd_hdl;
486178479Sjb	dt_proc_t *dpr = datap->dpcd_proc;
487178479Sjb	dt_proc_hash_t *dph = dpr->dpr_hdl->dt_procs;
488178479Sjb	struct ps_prochandle *P = dpr->dpr_proc;
489178565Sjb	int pid = dpr->dpr_pid;
490178479Sjb
491178565Sjb#if defined(sun)
492178479Sjb	int pfd = Pctlfd(P);
493178479Sjb
494178479Sjb	const long wstop = PCWSTOP;
495178565Sjb#endif
496178479Sjb	int notify = B_FALSE;
497178479Sjb
498178479Sjb	/*
499178479Sjb	 * We disable the POSIX thread cancellation mechanism so that the
500178479Sjb	 * client program using libdtrace can't accidentally cancel our thread.
501178479Sjb	 * dt_proc_destroy() uses SIGCANCEL explicitly to simply poke us out
502178479Sjb	 * of PCWSTOP with EINTR, at which point we will see dpr_quit and exit.
503178479Sjb	 */
504178479Sjb	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
505178479Sjb
506178479Sjb	/*
507178479Sjb	 * Set up the corresponding process for tracing by libdtrace.  We want
508178479Sjb	 * to be able to catch breakpoints and efficiently single-step over
509178479Sjb	 * them, and we need to enable librtld_db to watch libdl activity.
510178479Sjb	 */
511178479Sjb	(void) pthread_mutex_lock(&dpr->dpr_lock);
512178479Sjb
513178565Sjb#if defined(sun)
514178479Sjb	(void) Punsetflags(P, PR_ASYNC);	/* require synchronous mode */
515178479Sjb	(void) Psetflags(P, PR_BPTADJ);		/* always adjust eip on x86 */
516178479Sjb	(void) Punsetflags(P, PR_FORK);		/* do not inherit on fork */
517178479Sjb
518178479Sjb	(void) Pfault(P, FLTBPT, B_TRUE);	/* always trace breakpoints */
519178479Sjb	(void) Pfault(P, FLTTRACE, B_TRUE);	/* always trace single-step */
520178479Sjb
521178479Sjb	/*
522178479Sjb	 * We must trace exit from exec() system calls so that if the exec is
523178479Sjb	 * successful, we can reset our breakpoints and re-initialize libproc.
524178479Sjb	 */
525178479Sjb	(void) Psysexit(P, SYS_exec, B_TRUE);
526178479Sjb	(void) Psysexit(P, SYS_execve, B_TRUE);
527178479Sjb
528178479Sjb	/*
529178479Sjb	 * We must trace entry and exit for fork() system calls in order to
530178479Sjb	 * disable our breakpoints temporarily during the fork.  We do not set
531178479Sjb	 * the PR_FORK flag, so if fork succeeds the child begins executing and
532178479Sjb	 * does not inherit any other tracing behaviors or a control thread.
533178479Sjb	 */
534178479Sjb	(void) Psysentry(P, SYS_vfork, B_TRUE);
535178479Sjb	(void) Psysexit(P, SYS_vfork, B_TRUE);
536178479Sjb	(void) Psysentry(P, SYS_fork1, B_TRUE);
537178479Sjb	(void) Psysexit(P, SYS_fork1, B_TRUE);
538178479Sjb	(void) Psysentry(P, SYS_forkall, B_TRUE);
539178479Sjb	(void) Psysexit(P, SYS_forkall, B_TRUE);
540178479Sjb	(void) Psysentry(P, SYS_forksys, B_TRUE);
541178479Sjb	(void) Psysexit(P, SYS_forksys, B_TRUE);
542178479Sjb
543178479Sjb	Psync(P);				/* enable all /proc changes */
544178479Sjb	dt_proc_attach(dpr, B_FALSE);		/* enable rtld breakpoints */
545178479Sjb
546178479Sjb	/*
547178479Sjb	 * If PR_KLC is set, we created the process; otherwise we grabbed it.
548178479Sjb	 * Check for an appropriate stop request and wait for dt_proc_continue.
549178479Sjb	 */
550178479Sjb	if (Pstatus(P)->pr_flags & PR_KLC)
551178479Sjb		dt_proc_stop(dpr, DT_PROC_STOP_CREATE);
552178479Sjb	else
553178479Sjb		dt_proc_stop(dpr, DT_PROC_STOP_GRAB);
554178479Sjb
555178479Sjb	if (Psetrun(P, 0, 0) == -1) {
556178479Sjb		dt_dprintf("pid %d: failed to set running: %s\n",
557178479Sjb		    (int)dpr->dpr_pid, strerror(errno));
558178479Sjb	}
559178565Sjb#else
560178565Sjb	/*
561178565Sjb	 * If PR_KLC is set, we created the process; otherwise we grabbed it.
562178565Sjb	 * Check for an appropriate stop request and wait for dt_proc_continue.
563178565Sjb	 */
564178565Sjb	if (proc_getflags(P) & PR_KLC)
565178565Sjb		dt_proc_stop(dpr, DT_PROC_STOP_CREATE);
566178565Sjb	else
567178565Sjb		dt_proc_stop(dpr, DT_PROC_STOP_GRAB);
568178479Sjb
569178565Sjb	if (proc_continue(P) != 0)
570178565Sjb		dt_dprintf("pid %d: failed to set running: %s\n",
571178565Sjb		    (int)dpr->dpr_pid, strerror(errno));
572178565Sjb#endif
573178565Sjb
574178479Sjb	(void) pthread_mutex_unlock(&dpr->dpr_lock);
575178479Sjb
576178479Sjb	/*
577178479Sjb	 * Wait for the process corresponding to this control thread to stop,
578178479Sjb	 * process the event, and then set it running again.  We want to sleep
579178479Sjb	 * with dpr_lock *unheld* so that other parts of libdtrace can use the
580178479Sjb	 * ps_prochandle in the meantime (e.g. ustack()).  To do this, we write
581178479Sjb	 * a PCWSTOP directive directly to the underlying /proc/<pid>/ctl file.
582178479Sjb	 * Once the process stops, we wake up, grab dpr_lock, and then call
583178479Sjb	 * Pwait() (which will return immediately) and do our processing.
584178479Sjb	 */
585178479Sjb	while (!dpr->dpr_quit) {
586178565Sjb#if defined(sun)
587178479Sjb		const lwpstatus_t *psp;
588178479Sjb
589178479Sjb		if (write(pfd, &wstop, sizeof (wstop)) == -1 && errno == EINTR)
590178479Sjb			continue; /* check dpr_quit and continue waiting */
591178565Sjb#else
592178565Sjb		/* Wait for the process to report status. */
593210695Srpaulo		proc_wstatus(P);
594178565Sjb#endif
595178479Sjb
596178479Sjb		(void) pthread_mutex_lock(&dpr->dpr_lock);
597178565Sjb
598178565Sjb#if defined(sun)
599178479Sjbpwait_locked:
600178479Sjb		if (Pstopstatus(P, PCNULL, 0) == -1 && errno == EINTR) {
601178479Sjb			(void) pthread_mutex_unlock(&dpr->dpr_lock);
602178479Sjb			continue; /* check dpr_quit and continue waiting */
603178479Sjb		}
604178565Sjb#endif
605178479Sjb
606178565Sjb#if defined(sun)
607178479Sjb		switch (Pstate(P)) {
608178565Sjb#else
609178565Sjb		switch (proc_state(P)) {
610178565Sjb#endif
611178479Sjb		case PS_STOP:
612178565Sjb#ifdef DOODAD
613178479Sjb			psp = &Pstatus(P)->pr_lwp;
614178479Sjb
615178479Sjb			dt_dprintf("pid %d: proc stopped showing %d/%d\n",
616178479Sjb			    pid, psp->pr_why, psp->pr_what);
617178479Sjb
618178479Sjb			/*
619178479Sjb			 * If the process stops showing PR_REQUESTED, then the
620178479Sjb			 * DTrace stop() action was applied to it or another
621178479Sjb			 * debugging utility (e.g. pstop(1)) asked it to stop.
622178479Sjb			 * In either case, the user's intention is for the
623178479Sjb			 * process to remain stopped until another external
624178479Sjb			 * mechanism (e.g. prun(1)) is applied.  So instead of
625178479Sjb			 * setting the process running ourself, we wait for
626178479Sjb			 * someone else to do so.  Once that happens, we return
627178479Sjb			 * to our normal loop waiting for an event of interest.
628178479Sjb			 */
629178479Sjb			if (psp->pr_why == PR_REQUESTED) {
630178479Sjb				dt_proc_waitrun(dpr);
631178479Sjb				(void) pthread_mutex_unlock(&dpr->dpr_lock);
632178479Sjb				continue;
633178479Sjb			}
634178479Sjb
635178479Sjb			/*
636178479Sjb			 * If the process stops showing one of the events that
637178479Sjb			 * we are tracing, perform the appropriate response.
638178479Sjb			 * Note that we ignore PR_SUSPENDED, PR_CHECKPOINT, and
639178479Sjb			 * PR_JOBCONTROL by design: if one of these conditions
640178479Sjb			 * occurs, we will fall through to Psetrun() but the
641178479Sjb			 * process will remain stopped in the kernel by the
642178479Sjb			 * corresponding mechanism (e.g. job control stop).
643178479Sjb			 */
644178479Sjb			if (psp->pr_why == PR_FAULTED && psp->pr_what == FLTBPT)
645178479Sjb				dt_proc_bpmatch(dtp, dpr);
646178479Sjb			else if (psp->pr_why == PR_SYSENTRY &&
647178479Sjb			    IS_SYS_FORK(psp->pr_what))
648178479Sjb				dt_proc_bpdisable(dpr);
649178479Sjb			else if (psp->pr_why == PR_SYSEXIT &&
650178479Sjb			    IS_SYS_FORK(psp->pr_what))
651178479Sjb				dt_proc_bpenable(dpr);
652178479Sjb			else if (psp->pr_why == PR_SYSEXIT &&
653178479Sjb			    IS_SYS_EXEC(psp->pr_what))
654178479Sjb				dt_proc_attach(dpr, B_TRUE);
655178565Sjb#endif
656178479Sjb			break;
657178479Sjb
658178479Sjb		case PS_LOST:
659178565Sjb#if defined(sun)
660178479Sjb			if (Preopen(P) == 0)
661178479Sjb				goto pwait_locked;
662178565Sjb#endif
663178479Sjb
664178479Sjb			dt_dprintf("pid %d: proc lost: %s\n",
665178479Sjb			    pid, strerror(errno));
666178479Sjb
667178479Sjb			dpr->dpr_quit = B_TRUE;
668178479Sjb			notify = B_TRUE;
669178479Sjb			break;
670178479Sjb
671178479Sjb		case PS_UNDEAD:
672178479Sjb			dt_dprintf("pid %d: proc died\n", pid);
673178479Sjb			dpr->dpr_quit = B_TRUE;
674178479Sjb			notify = B_TRUE;
675178479Sjb			break;
676178479Sjb		}
677178479Sjb
678178565Sjb#if defined(sun)
679178479Sjb		if (Pstate(P) != PS_UNDEAD && Psetrun(P, 0, 0) == -1) {
680178479Sjb			dt_dprintf("pid %d: failed to set running: %s\n",
681178479Sjb			    (int)dpr->dpr_pid, strerror(errno));
682178479Sjb		}
683178565Sjb#endif
684178479Sjb
685178479Sjb		(void) pthread_mutex_unlock(&dpr->dpr_lock);
686178479Sjb	}
687178479Sjb
688178479Sjb	/*
689178479Sjb	 * If the control thread detected PS_UNDEAD or PS_LOST, then enqueue
690178479Sjb	 * the dt_proc_t structure on the dt_proc_hash_t notification list.
691178479Sjb	 */
692178479Sjb	if (notify)
693178479Sjb		dt_proc_notify(dtp, dph, dpr, NULL);
694178479Sjb
695178479Sjb	/*
696178479Sjb	 * Destroy and remove any remaining breakpoints, set dpr_done and clear
697178479Sjb	 * dpr_tid to indicate the control thread has exited, and notify any
698178479Sjb	 * waiting thread in dt_proc_destroy() that we have succesfully exited.
699178479Sjb	 */
700178479Sjb	(void) pthread_mutex_lock(&dpr->dpr_lock);
701178479Sjb
702178479Sjb	dt_proc_bpdestroy(dpr, B_TRUE);
703178479Sjb	dpr->dpr_done = B_TRUE;
704178479Sjb	dpr->dpr_tid = 0;
705178479Sjb
706178479Sjb	(void) pthread_cond_broadcast(&dpr->dpr_cv);
707178479Sjb	(void) pthread_mutex_unlock(&dpr->dpr_lock);
708178479Sjb
709178479Sjb	return (NULL);
710178479Sjb}
711178479Sjb
712178479Sjb/*PRINTFLIKE3*/
713178479Sjbstatic struct ps_prochandle *
714178479Sjbdt_proc_error(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *format, ...)
715178479Sjb{
716178479Sjb	va_list ap;
717178479Sjb
718178479Sjb	va_start(ap, format);
719178479Sjb	dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap);
720178479Sjb	va_end(ap);
721178479Sjb
722178479Sjb	if (dpr->dpr_proc != NULL)
723178565Sjb#if defined(sun)
724178479Sjb		Prelease(dpr->dpr_proc, 0);
725178565Sjb#else
726210692Srpaulo		proc_detach(dpr->dpr_proc, 0);
727178565Sjb#endif
728178479Sjb
729178479Sjb	dt_free(dtp, dpr);
730178479Sjb	(void) dt_set_errno(dtp, EDT_COMPILER);
731178479Sjb	return (NULL);
732178479Sjb}
733178479Sjb
734178479Sjbdt_proc_t *
735178479Sjbdt_proc_lookup(dtrace_hdl_t *dtp, struct ps_prochandle *P, int remove)
736178479Sjb{
737178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
738178565Sjb#if defined(sun)
739178479Sjb	pid_t pid = Pstatus(P)->pr_pid;
740178565Sjb#else
741178565Sjb	pid_t pid = proc_getpid(P);
742178565Sjb#endif
743178479Sjb	dt_proc_t *dpr, **dpp = &dph->dph_hash[pid & (dph->dph_hashlen - 1)];
744178479Sjb
745178479Sjb	for (dpr = *dpp; dpr != NULL; dpr = dpr->dpr_hash) {
746178479Sjb		if (dpr->dpr_pid == pid)
747178479Sjb			break;
748178479Sjb		else
749178479Sjb			dpp = &dpr->dpr_hash;
750178479Sjb	}
751178479Sjb
752178479Sjb	assert(dpr != NULL);
753178479Sjb	assert(dpr->dpr_proc == P);
754178479Sjb
755178479Sjb	if (remove)
756178479Sjb		*dpp = dpr->dpr_hash; /* remove from pid hash chain */
757178479Sjb
758178479Sjb	return (dpr);
759178479Sjb}
760178479Sjb
761178479Sjbstatic void
762178479Sjbdt_proc_destroy(dtrace_hdl_t *dtp, struct ps_prochandle *P)
763178479Sjb{
764178479Sjb	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
765178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
766178479Sjb	dt_proc_notify_t *npr, **npp;
767178479Sjb	int rflag;
768178479Sjb
769178479Sjb	assert(dpr != NULL);
770178479Sjb
771178479Sjb	/*
772178479Sjb	 * If neither PR_KLC nor PR_RLC is set, then the process is stopped by
773178479Sjb	 * an external debugger and we were waiting in dt_proc_waitrun().
774178479Sjb	 * Leave the process in this condition using PRELEASE_HANG.
775178479Sjb	 */
776178565Sjb#if defined(sun)
777178479Sjb	if (!(Pstatus(dpr->dpr_proc)->pr_flags & (PR_KLC | PR_RLC))) {
778178565Sjb#else
779178565Sjb	if (!(proc_getflags(dpr->dpr_proc) & (PR_KLC | PR_RLC))) {
780178565Sjb#endif
781178479Sjb		dt_dprintf("abandoning pid %d\n", (int)dpr->dpr_pid);
782178565Sjb#if defined(sun)
783178479Sjb		rflag = PRELEASE_HANG;
784178565Sjb#else
785178565Sjb		rflag = 0 /* XXX */;
786178565Sjb#endif
787178479Sjb	} else {
788178479Sjb		dt_dprintf("releasing pid %d\n", (int)dpr->dpr_pid);
789178479Sjb		rflag = 0; /* apply kill or run-on-last-close */
790178479Sjb	}
791178479Sjb
792178479Sjb	if (dpr->dpr_tid) {
793178479Sjb		/*
794178479Sjb		 * Set the dpr_quit flag to tell the daemon thread to exit.  We
795178479Sjb		 * send it a SIGCANCEL to poke it out of PCWSTOP or any other
796178479Sjb		 * long-term /proc system call.  Our daemon threads have POSIX
797178479Sjb		 * cancellation disabled, so EINTR will be the only effect.  We
798178479Sjb		 * then wait for dpr_done to indicate the thread has exited.
799178479Sjb		 *
800178479Sjb		 * We can't use pthread_kill() to send SIGCANCEL because the
801178479Sjb		 * interface forbids it and we can't use pthread_cancel()
802178479Sjb		 * because with cancellation disabled it won't actually
803178479Sjb		 * send SIGCANCEL to the target thread, so we use _lwp_kill()
804178479Sjb		 * to do the job.  This is all built on evil knowledge of
805178479Sjb		 * the details of the cancellation mechanism in libc.
806178479Sjb		 */
807178479Sjb		(void) pthread_mutex_lock(&dpr->dpr_lock);
808178479Sjb		dpr->dpr_quit = B_TRUE;
809178565Sjb#if defined(sun)
810178479Sjb		(void) _lwp_kill(dpr->dpr_tid, SIGCANCEL);
811178565Sjb#else
812178565Sjb		(void) pthread_kill(dpr->dpr_tid, SIGUSR1);
813178565Sjb#endif
814178479Sjb
815178479Sjb		/*
816178479Sjb		 * If the process is currently idling in dt_proc_stop(), re-
817178479Sjb		 * enable breakpoints and poke it into running again.
818178479Sjb		 */
819178479Sjb		if (dpr->dpr_stop & DT_PROC_STOP_IDLE) {
820178479Sjb			dt_proc_bpenable(dpr);
821178479Sjb			dpr->dpr_stop &= ~DT_PROC_STOP_IDLE;
822178479Sjb			(void) pthread_cond_broadcast(&dpr->dpr_cv);
823178479Sjb		}
824178479Sjb
825178479Sjb		while (!dpr->dpr_done)
826178479Sjb			(void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock);
827178479Sjb
828178479Sjb		(void) pthread_mutex_unlock(&dpr->dpr_lock);
829178479Sjb	}
830178479Sjb
831178479Sjb	/*
832178479Sjb	 * Before we free the process structure, remove this dt_proc_t from the
833178479Sjb	 * lookup hash, and then walk the dt_proc_hash_t's notification list
834178479Sjb	 * and remove this dt_proc_t if it is enqueued.
835178479Sjb	 */
836178479Sjb	(void) pthread_mutex_lock(&dph->dph_lock);
837178479Sjb	(void) dt_proc_lookup(dtp, P, B_TRUE);
838178479Sjb	npp = &dph->dph_notify;
839178479Sjb
840178479Sjb	while ((npr = *npp) != NULL) {
841178479Sjb		if (npr->dprn_dpr == dpr) {
842178479Sjb			*npp = npr->dprn_next;
843178479Sjb			dt_free(dtp, npr);
844178479Sjb		} else {
845178479Sjb			npp = &npr->dprn_next;
846178479Sjb		}
847178479Sjb	}
848178479Sjb
849178479Sjb	(void) pthread_mutex_unlock(&dph->dph_lock);
850178479Sjb
851178479Sjb	/*
852178479Sjb	 * Remove the dt_proc_list from the LRU list, release the underlying
853178479Sjb	 * libproc handle, and free our dt_proc_t data structure.
854178479Sjb	 */
855178479Sjb	if (dpr->dpr_cacheable) {
856178479Sjb		assert(dph->dph_lrucnt != 0);
857178479Sjb		dph->dph_lrucnt--;
858178479Sjb	}
859178479Sjb
860178479Sjb	dt_list_delete(&dph->dph_lrulist, dpr);
861178565Sjb#if defined(sun)
862178479Sjb	Prelease(dpr->dpr_proc, rflag);
863178565Sjb#else
864210692Srpaulo	proc_detach(dpr->dpr_proc, rflag);
865178565Sjb#endif
866178479Sjb	dt_free(dtp, dpr);
867178479Sjb}
868178479Sjb
869178479Sjbstatic int
870178479Sjbdt_proc_create_thread(dtrace_hdl_t *dtp, dt_proc_t *dpr, uint_t stop)
871178479Sjb{
872178479Sjb	dt_proc_control_data_t data;
873178479Sjb	sigset_t nset, oset;
874178479Sjb	pthread_attr_t a;
875178479Sjb	int err;
876178479Sjb
877178479Sjb	(void) pthread_mutex_lock(&dpr->dpr_lock);
878178479Sjb	dpr->dpr_stop |= stop; /* set bit for initial rendezvous */
879178479Sjb
880178479Sjb	(void) pthread_attr_init(&a);
881178479Sjb	(void) pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED);
882178479Sjb
883178479Sjb	(void) sigfillset(&nset);
884178479Sjb	(void) sigdelset(&nset, SIGABRT);	/* unblocked for assert() */
885178565Sjb#if defined(sun)
886178479Sjb	(void) sigdelset(&nset, SIGCANCEL);	/* see dt_proc_destroy() */
887178565Sjb#else
888178565Sjb	(void) sigdelset(&nset, SIGUSR1);	/* see dt_proc_destroy() */
889178565Sjb#endif
890178479Sjb
891178479Sjb	data.dpcd_hdl = dtp;
892178479Sjb	data.dpcd_proc = dpr;
893178479Sjb
894178479Sjb	(void) pthread_sigmask(SIG_SETMASK, &nset, &oset);
895178479Sjb	err = pthread_create(&dpr->dpr_tid, &a, dt_proc_control, &data);
896178479Sjb	(void) pthread_sigmask(SIG_SETMASK, &oset, NULL);
897178479Sjb
898178479Sjb	/*
899178479Sjb	 * If the control thread was created, then wait on dpr_cv for either
900178479Sjb	 * dpr_done to be set (the victim died or the control thread failed)
901178479Sjb	 * or DT_PROC_STOP_IDLE to be set, indicating that the victim is now
902178479Sjb	 * stopped by /proc and the control thread is at the rendezvous event.
903178479Sjb	 * On success, we return with the process and control thread stopped:
904178479Sjb	 * the caller can then apply dt_proc_continue() to resume both.
905178479Sjb	 */
906178479Sjb	if (err == 0) {
907178479Sjb		while (!dpr->dpr_done && !(dpr->dpr_stop & DT_PROC_STOP_IDLE))
908178479Sjb			(void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock);
909178479Sjb
910178479Sjb		/*
911178479Sjb		 * If dpr_done is set, the control thread aborted before it
912178479Sjb		 * reached the rendezvous event.  This is either due to PS_LOST
913178479Sjb		 * or PS_UNDEAD (i.e. the process died).  We try to provide a
914178479Sjb		 * small amount of useful information to help figure it out.
915178479Sjb		 */
916178479Sjb		if (dpr->dpr_done) {
917178565Sjb#if defined(sun)
918178479Sjb			const psinfo_t *prp = Ppsinfo(dpr->dpr_proc);
919178479Sjb			int stat = prp ? prp->pr_wstat : 0;
920178565Sjb#endif
921178479Sjb			int pid = dpr->dpr_pid;
922178479Sjb
923178565Sjb#if defined(sun)
924178479Sjb			if (Pstate(dpr->dpr_proc) == PS_LOST) {
925178565Sjb#else
926178565Sjb			if (proc_state(dpr->dpr_proc) == PS_LOST) {
927178565Sjb#endif
928178479Sjb				(void) dt_proc_error(dpr->dpr_hdl, dpr,
929178479Sjb				    "failed to control pid %d: process exec'd "
930178479Sjb				    "set-id or unobservable program\n", pid);
931178565Sjb#if defined(sun)
932178479Sjb			} else if (WIFSIGNALED(stat)) {
933178479Sjb				(void) dt_proc_error(dpr->dpr_hdl, dpr,
934178479Sjb				    "failed to control pid %d: process died "
935178479Sjb				    "from signal %d\n", pid, WTERMSIG(stat));
936178479Sjb			} else {
937178479Sjb				(void) dt_proc_error(dpr->dpr_hdl, dpr,
938178479Sjb				    "failed to control pid %d: process exited "
939178479Sjb				    "with status %d\n", pid, WEXITSTATUS(stat));
940178565Sjb#endif
941178479Sjb			}
942178479Sjb
943178479Sjb			err = ESRCH; /* cause grab() or create() to fail */
944178479Sjb		}
945178479Sjb	} else {
946178479Sjb		(void) dt_proc_error(dpr->dpr_hdl, dpr,
947178479Sjb		    "failed to create control thread for process-id %d: %s\n",
948178479Sjb		    (int)dpr->dpr_pid, strerror(err));
949178479Sjb	}
950178479Sjb
951178479Sjb	(void) pthread_mutex_unlock(&dpr->dpr_lock);
952178479Sjb	(void) pthread_attr_destroy(&a);
953178479Sjb
954178479Sjb	return (err);
955178479Sjb}
956178479Sjb
957178479Sjbstruct ps_prochandle *
958184696Srodrigcdt_proc_create(dtrace_hdl_t *dtp, const char *file, char *const *argv,
959184696Srodrigc    proc_child_func *pcf, void *child_arg)
960178479Sjb{
961178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
962178479Sjb	dt_proc_t *dpr;
963178479Sjb	int err;
964178479Sjb
965178479Sjb	if ((dpr = dt_zalloc(dtp, sizeof (dt_proc_t))) == NULL)
966178479Sjb		return (NULL); /* errno is set for us */
967178479Sjb
968178479Sjb	(void) pthread_mutex_init(&dpr->dpr_lock, NULL);
969178479Sjb	(void) pthread_cond_init(&dpr->dpr_cv, NULL);
970178479Sjb
971178565Sjb#if defined(sun)
972178479Sjb	if ((dpr->dpr_proc = Pcreate(file, argv, &err, NULL, 0)) == NULL) {
973178479Sjb		return (dt_proc_error(dtp, dpr,
974178479Sjb		    "failed to execute %s: %s\n", file, Pcreate_error(err)));
975178479Sjb	}
976178479Sjb
977178479Sjb	dpr->dpr_hdl = dtp;
978178479Sjb	dpr->dpr_pid = Pstatus(dpr->dpr_proc)->pr_pid;
979178479Sjb
980178479Sjb	(void) Punsetflags(dpr->dpr_proc, PR_RLC);
981178479Sjb	(void) Psetflags(dpr->dpr_proc, PR_KLC);
982178565Sjb#else
983178565Sjb	(void) proc_clearflags(dpr->dpr_proc, PR_RLC);
984178565Sjb	(void) proc_setflags(dpr->dpr_proc, PR_KLC);
985184696Srodrigc	if ((err = proc_create(file, argv, pcf, child_arg, &dpr->dpr_proc)) != 0)
986178565Sjb		return (dt_proc_error(dtp, dpr,
987178565Sjb		    "failed to execute %s: %s\n", file, strerror(err)));
988178565Sjb	dpr->dpr_hdl = dtp;
989178565Sjb	dpr->dpr_pid = proc_getpid(dpr->dpr_proc);
990178565Sjb#endif
991178479Sjb
992178565Sjb#if defined(sun)
993178479Sjb	if (dt_proc_create_thread(dtp, dpr, dtp->dt_prcmode) != 0)
994178565Sjb#else
995178565Sjb	if (dt_proc_create_thread(dtp, dpr, DT_PROC_STOP_IDLE) != 0)
996178565Sjb#endif
997178479Sjb		return (NULL); /* dt_proc_error() has been called for us */
998178479Sjb
999178479Sjb	dpr->dpr_hash = dph->dph_hash[dpr->dpr_pid & (dph->dph_hashlen - 1)];
1000178479Sjb	dph->dph_hash[dpr->dpr_pid & (dph->dph_hashlen - 1)] = dpr;
1001178479Sjb	dt_list_prepend(&dph->dph_lrulist, dpr);
1002178479Sjb
1003178479Sjb	dt_dprintf("created pid %d\n", (int)dpr->dpr_pid);
1004178479Sjb	dpr->dpr_refs++;
1005178479Sjb
1006178479Sjb	return (dpr->dpr_proc);
1007178479Sjb}
1008178479Sjb
1009178479Sjbstruct ps_prochandle *
1010178479Sjbdt_proc_grab(dtrace_hdl_t *dtp, pid_t pid, int flags, int nomonitor)
1011178479Sjb{
1012178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
1013178479Sjb	uint_t h = pid & (dph->dph_hashlen - 1);
1014178479Sjb	dt_proc_t *dpr, *opr;
1015178479Sjb	int err;
1016178479Sjb
1017178479Sjb	/*
1018178479Sjb	 * Search the hash table for the pid.  If it is already grabbed or
1019178479Sjb	 * created, move the handle to the front of the lrulist, increment
1020178479Sjb	 * the reference count, and return the existing ps_prochandle.
1021178479Sjb	 */
1022178479Sjb	for (dpr = dph->dph_hash[h]; dpr != NULL; dpr = dpr->dpr_hash) {
1023178479Sjb		if (dpr->dpr_pid == pid && !dpr->dpr_stale) {
1024178479Sjb			/*
1025178479Sjb			 * If the cached handle was opened read-only and
1026178479Sjb			 * this request is for a writeable handle, mark
1027178479Sjb			 * the cached handle as stale and open a new handle.
1028178479Sjb			 * Since it's stale, unmark it as cacheable.
1029178479Sjb			 */
1030178479Sjb			if (dpr->dpr_rdonly && !(flags & PGRAB_RDONLY)) {
1031178479Sjb				dt_dprintf("upgrading pid %d\n", (int)pid);
1032178479Sjb				dpr->dpr_stale = B_TRUE;
1033178479Sjb				dpr->dpr_cacheable = B_FALSE;
1034178479Sjb				dph->dph_lrucnt--;
1035178479Sjb				break;
1036178479Sjb			}
1037178479Sjb
1038178479Sjb			dt_dprintf("grabbed pid %d (cached)\n", (int)pid);
1039178479Sjb			dt_list_delete(&dph->dph_lrulist, dpr);
1040178479Sjb			dt_list_prepend(&dph->dph_lrulist, dpr);
1041178479Sjb			dpr->dpr_refs++;
1042178479Sjb			return (dpr->dpr_proc);
1043178479Sjb		}
1044178479Sjb	}
1045178479Sjb
1046178479Sjb	if ((dpr = dt_zalloc(dtp, sizeof (dt_proc_t))) == NULL)
1047178479Sjb		return (NULL); /* errno is set for us */
1048178479Sjb
1049178479Sjb	(void) pthread_mutex_init(&dpr->dpr_lock, NULL);
1050178479Sjb	(void) pthread_cond_init(&dpr->dpr_cv, NULL);
1051178479Sjb
1052178565Sjb#if defined(sun)
1053178479Sjb	if ((dpr->dpr_proc = Pgrab(pid, flags, &err)) == NULL) {
1054178479Sjb		return (dt_proc_error(dtp, dpr,
1055178479Sjb		    "failed to grab pid %d: %s\n", (int)pid, Pgrab_error(err)));
1056178479Sjb	}
1057178565Sjb#else
1058178565Sjb	if ((err = proc_attach(pid, flags, &dpr->dpr_proc)) != 0)
1059178565Sjb		return (dt_proc_error(dtp, dpr,
1060178565Sjb		    "failed to grab pid %d: %s\n", (int) pid, strerror(err)));
1061178565Sjb#endif
1062178479Sjb
1063178479Sjb	dpr->dpr_hdl = dtp;
1064178479Sjb	dpr->dpr_pid = pid;
1065178479Sjb
1066178565Sjb#if defined(sun)
1067178479Sjb	(void) Punsetflags(dpr->dpr_proc, PR_KLC);
1068178479Sjb	(void) Psetflags(dpr->dpr_proc, PR_RLC);
1069178565Sjb#else
1070178565Sjb	(void) proc_clearflags(dpr->dpr_proc, PR_KLC);
1071178565Sjb	(void) proc_setflags(dpr->dpr_proc, PR_RLC);
1072178565Sjb#endif
1073178479Sjb
1074178479Sjb	/*
1075178479Sjb	 * If we are attempting to grab the process without a monitor
1076178479Sjb	 * thread, then mark the process cacheable only if it's being
1077178479Sjb	 * grabbed read-only.  If we're currently caching more process
1078178479Sjb	 * handles than dph_lrulim permits, attempt to find the
1079178479Sjb	 * least-recently-used handle that is currently unreferenced and
1080178479Sjb	 * release it from the cache.  Otherwise we are grabbing the process
1081178479Sjb	 * for control: create a control thread for this process and store
1082178479Sjb	 * its ID in dpr->dpr_tid.
1083178479Sjb	 */
1084178479Sjb	if (nomonitor || (flags & PGRAB_RDONLY)) {
1085178479Sjb		if (dph->dph_lrucnt >= dph->dph_lrulim) {
1086178479Sjb			for (opr = dt_list_prev(&dph->dph_lrulist);
1087178479Sjb			    opr != NULL; opr = dt_list_prev(opr)) {
1088178479Sjb				if (opr->dpr_cacheable && opr->dpr_refs == 0) {
1089178479Sjb					dt_proc_destroy(dtp, opr->dpr_proc);
1090178479Sjb					break;
1091178479Sjb				}
1092178479Sjb			}
1093178479Sjb		}
1094178479Sjb
1095178479Sjb		if (flags & PGRAB_RDONLY) {
1096178479Sjb			dpr->dpr_cacheable = B_TRUE;
1097178479Sjb			dpr->dpr_rdonly = B_TRUE;
1098178479Sjb			dph->dph_lrucnt++;
1099178479Sjb		}
1100178479Sjb
1101178479Sjb	} else if (dt_proc_create_thread(dtp, dpr, DT_PROC_STOP_GRAB) != 0)
1102178479Sjb		return (NULL); /* dt_proc_error() has been called for us */
1103178479Sjb
1104178479Sjb	dpr->dpr_hash = dph->dph_hash[h];
1105178479Sjb	dph->dph_hash[h] = dpr;
1106178479Sjb	dt_list_prepend(&dph->dph_lrulist, dpr);
1107178479Sjb
1108178479Sjb	dt_dprintf("grabbed pid %d\n", (int)pid);
1109178479Sjb	dpr->dpr_refs++;
1110178479Sjb
1111178479Sjb	return (dpr->dpr_proc);
1112178479Sjb}
1113178479Sjb
1114178479Sjbvoid
1115178479Sjbdt_proc_release(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1116178479Sjb{
1117178479Sjb	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1118178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
1119178479Sjb
1120178479Sjb	assert(dpr != NULL);
1121178479Sjb	assert(dpr->dpr_refs != 0);
1122178479Sjb
1123178479Sjb	if (--dpr->dpr_refs == 0 &&
1124178479Sjb	    (!dpr->dpr_cacheable || dph->dph_lrucnt > dph->dph_lrulim))
1125178479Sjb		dt_proc_destroy(dtp, P);
1126178479Sjb}
1127178479Sjb
1128178479Sjbvoid
1129178479Sjbdt_proc_continue(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1130178479Sjb{
1131178479Sjb	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1132178479Sjb
1133178479Sjb	(void) pthread_mutex_lock(&dpr->dpr_lock);
1134178479Sjb
1135178479Sjb	if (dpr->dpr_stop & DT_PROC_STOP_IDLE) {
1136178479Sjb		dpr->dpr_stop &= ~DT_PROC_STOP_IDLE;
1137178479Sjb		(void) pthread_cond_broadcast(&dpr->dpr_cv);
1138178479Sjb	}
1139178479Sjb
1140178479Sjb	(void) pthread_mutex_unlock(&dpr->dpr_lock);
1141178479Sjb}
1142178479Sjb
1143178479Sjbvoid
1144178479Sjbdt_proc_lock(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1145178479Sjb{
1146178479Sjb	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1147178479Sjb	int err = pthread_mutex_lock(&dpr->dpr_lock);
1148178479Sjb	assert(err == 0); /* check for recursion */
1149178479Sjb}
1150178479Sjb
1151178479Sjbvoid
1152178479Sjbdt_proc_unlock(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1153178479Sjb{
1154178479Sjb	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1155178479Sjb	int err = pthread_mutex_unlock(&dpr->dpr_lock);
1156178479Sjb	assert(err == 0); /* check for unheld lock */
1157178479Sjb}
1158178479Sjb
1159178479Sjbvoid
1160178479Sjbdt_proc_hash_create(dtrace_hdl_t *dtp)
1161178479Sjb{
1162178479Sjb	if ((dtp->dt_procs = dt_zalloc(dtp, sizeof (dt_proc_hash_t) +
1163178479Sjb	    sizeof (dt_proc_t *) * _dtrace_pidbuckets - 1)) != NULL) {
1164178479Sjb
1165178479Sjb		(void) pthread_mutex_init(&dtp->dt_procs->dph_lock, NULL);
1166178479Sjb		(void) pthread_cond_init(&dtp->dt_procs->dph_cv, NULL);
1167178479Sjb
1168178479Sjb		dtp->dt_procs->dph_hashlen = _dtrace_pidbuckets;
1169178479Sjb		dtp->dt_procs->dph_lrulim = _dtrace_pidlrulim;
1170178479Sjb	}
1171178479Sjb}
1172178479Sjb
1173178479Sjbvoid
1174178479Sjbdt_proc_hash_destroy(dtrace_hdl_t *dtp)
1175178479Sjb{
1176178479Sjb	dt_proc_hash_t *dph = dtp->dt_procs;
1177178479Sjb	dt_proc_t *dpr;
1178178479Sjb
1179178479Sjb	while ((dpr = dt_list_next(&dph->dph_lrulist)) != NULL)
1180178479Sjb		dt_proc_destroy(dtp, dpr->dpr_proc);
1181178479Sjb
1182178479Sjb	dtp->dt_procs = NULL;
1183178479Sjb	dt_free(dtp, dph);
1184178479Sjb}
1185178479Sjb
1186178479Sjbstruct ps_prochandle *
1187184696Srodrigcdtrace_proc_create(dtrace_hdl_t *dtp, const char *file, char *const *argv,
1188184696Srodrigc    proc_child_func *pcf, void *child_arg)
1189178479Sjb{
1190178479Sjb	dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target");
1191184696Srodrigc	struct ps_prochandle *P = dt_proc_create(dtp, file, argv, pcf, child_arg);
1192178479Sjb
1193178479Sjb	if (P != NULL && idp != NULL && idp->di_id == 0)
1194178565Sjb#if defined(sun)
1195178479Sjb		idp->di_id = Pstatus(P)->pr_pid; /* $target = created pid */
1196178565Sjb#else
1197178565Sjb		idp->di_id = proc_getpid(P); /* $target = created pid */
1198178565Sjb#endif
1199178479Sjb
1200178479Sjb	return (P);
1201178479Sjb}
1202178479Sjb
1203178479Sjbstruct ps_prochandle *
1204178479Sjbdtrace_proc_grab(dtrace_hdl_t *dtp, pid_t pid, int flags)
1205178479Sjb{
1206178479Sjb	dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target");
1207178479Sjb	struct ps_prochandle *P = dt_proc_grab(dtp, pid, flags, 0);
1208178479Sjb
1209178479Sjb	if (P != NULL && idp != NULL && idp->di_id == 0)
1210178479Sjb		idp->di_id = pid; /* $target = grabbed pid */
1211178479Sjb
1212178479Sjb	return (P);
1213178479Sjb}
1214178479Sjb
1215178479Sjbvoid
1216178479Sjbdtrace_proc_release(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1217178479Sjb{
1218178479Sjb	dt_proc_release(dtp, P);
1219178479Sjb}
1220178479Sjb
1221178479Sjbvoid
1222178479Sjbdtrace_proc_continue(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1223178479Sjb{
1224178479Sjb	dt_proc_continue(dtp, P);
1225178479Sjb}
1226