dt_proc.c revision 178528
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * DTrace Process Control
31 *
32 * This file provides a set of routines that permit libdtrace and its clients
33 * to create and grab process handles using libproc, and to share these handles
34 * between library mechanisms that need libproc access, such as ustack(), and
35 * client mechanisms that need libproc access, such as dtrace(1M) -c and -p.
36 * The library provides several mechanisms in the libproc control layer:
37 *
38 * Reference Counting: The library code and client code can independently grab
39 * the same process handles without interfering with one another.  Only when
40 * the reference count drops to zero and the handle is not being cached (see
41 * below for more information on caching) will Prelease() be called on it.
42 *
43 * Handle Caching: If a handle is grabbed PGRAB_RDONLY (e.g. by ustack()) and
44 * the reference count drops to zero, the handle is not immediately released.
45 * Instead, libproc handles are maintained on dph_lrulist in order from most-
46 * recently accessed to least-recently accessed.  Idle handles are maintained
47 * until a pre-defined LRU cache limit is exceeded, permitting repeated calls
48 * to ustack() to avoid the overhead of releasing and re-grabbing processes.
49 *
50 * Process Control: For processes that are grabbed for control (~PGRAB_RDONLY)
51 * or created by dt_proc_create(), a control thread is created to provide
52 * callbacks on process exit and symbol table caching on dlopen()s.
53 *
54 * MT-Safety: Libproc is not MT-Safe, so dt_proc_lock() and dt_proc_unlock()
55 * are provided to synchronize access to the libproc handle between libdtrace
56 * code and client code and the control thread's use of the ps_prochandle.
57 *
58 * NOTE: MT-Safety is NOT provided for libdtrace itself, or for use of the
59 * dtrace_proc_grab/dtrace_proc_create mechanisms.  Like all exported libdtrace
60 * calls, these are assumed to be MT-Unsafe.  MT-Safety is ONLY provided for
61 * synchronization between libdtrace control threads and the client thread.
62 *
63 * The ps_prochandles themselves are maintained along with a dt_proc_t struct
64 * in a hash table indexed by PID.  This provides basic locking and reference
65 * counting.  The dt_proc_t is also maintained in LRU order on dph_lrulist.
66 * The dph_lrucnt and dph_lrulim count the number of cacheable processes and
67 * the current limit on the number of actively cached entries.
68 *
69 * The control thread for a process establishes breakpoints at the rtld_db
70 * locations of interest, updates mappings and symbol tables at these points,
71 * and handles exec and fork (by always following the parent).  The control
72 * thread automatically exits when the process dies or control is lost.
73 *
74 * A simple notification mechanism is provided for libdtrace clients using
75 * dtrace_handle_proc() for notification of PS_UNDEAD or PS_LOST events.  If
76 * such an event occurs, the dt_proc_t itself is enqueued on a notification
77 * list and the control thread broadcasts to dph_cv.  dtrace_sleep() will wake
78 * up using this condition and will then call the client handler as necessary.
79 */
80
81#include <sys/wait.h>
82#include <sys/lwp.h>
83#include <strings.h>
84#include <signal.h>
85#include <assert.h>
86#include <errno.h>
87
88#include <dt_proc.h>
89#include <dt_pid.h>
90#include <dt_impl.h>
91
92#define	IS_SYS_EXEC(w)	(w == SYS_exec || w == SYS_execve)
93#define	IS_SYS_FORK(w)	(w == SYS_vfork || w == SYS_fork1 ||	\
94			w == SYS_forkall || w == SYS_forksys)
95
96static dt_bkpt_t *
97dt_proc_bpcreate(dt_proc_t *dpr, uintptr_t addr, dt_bkpt_f *func, void *data)
98{
99	struct ps_prochandle *P = dpr->dpr_proc;
100	dt_bkpt_t *dbp;
101
102	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
103
104	if ((dbp = dt_zalloc(dpr->dpr_hdl, sizeof (dt_bkpt_t))) != NULL) {
105		dbp->dbp_func = func;
106		dbp->dbp_data = data;
107		dbp->dbp_addr = addr;
108
109		if (Psetbkpt(P, dbp->dbp_addr, &dbp->dbp_instr) == 0)
110			dbp->dbp_active = B_TRUE;
111
112		dt_list_append(&dpr->dpr_bps, dbp);
113	}
114
115	return (dbp);
116}
117
118static void
119dt_proc_bpdestroy(dt_proc_t *dpr, int delbkpts)
120{
121	int state = Pstate(dpr->dpr_proc);
122	dt_bkpt_t *dbp, *nbp;
123
124	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
125
126	for (dbp = dt_list_next(&dpr->dpr_bps); dbp != NULL; dbp = nbp) {
127		if (delbkpts && dbp->dbp_active &&
128		    state != PS_LOST && state != PS_UNDEAD) {
129			(void) Pdelbkpt(dpr->dpr_proc,
130			    dbp->dbp_addr, dbp->dbp_instr);
131		}
132		nbp = dt_list_next(dbp);
133		dt_list_delete(&dpr->dpr_bps, dbp);
134		dt_free(dpr->dpr_hdl, dbp);
135	}
136}
137
138static void
139dt_proc_bpmatch(dtrace_hdl_t *dtp, dt_proc_t *dpr)
140{
141	const lwpstatus_t *psp = &Pstatus(dpr->dpr_proc)->pr_lwp;
142	dt_bkpt_t *dbp;
143
144	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
145
146	for (dbp = dt_list_next(&dpr->dpr_bps);
147	    dbp != NULL; dbp = dt_list_next(dbp)) {
148		if (psp->pr_reg[R_PC] == dbp->dbp_addr)
149			break;
150	}
151
152	if (dbp == NULL) {
153		dt_dprintf("pid %d: spurious breakpoint wakeup for %lx\n",
154		    (int)dpr->dpr_pid, (ulong_t)psp->pr_reg[R_PC]);
155		return;
156	}
157
158	dt_dprintf("pid %d: hit breakpoint at %lx (%lu)\n",
159	    (int)dpr->dpr_pid, (ulong_t)dbp->dbp_addr, ++dbp->dbp_hits);
160
161	dbp->dbp_func(dtp, dpr, dbp->dbp_data);
162	(void) Pxecbkpt(dpr->dpr_proc, dbp->dbp_instr);
163}
164
165static void
166dt_proc_bpenable(dt_proc_t *dpr)
167{
168	dt_bkpt_t *dbp;
169
170	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
171
172	for (dbp = dt_list_next(&dpr->dpr_bps);
173	    dbp != NULL; dbp = dt_list_next(dbp)) {
174		if (!dbp->dbp_active && Psetbkpt(dpr->dpr_proc,
175		    dbp->dbp_addr, &dbp->dbp_instr) == 0)
176			dbp->dbp_active = B_TRUE;
177	}
178
179	dt_dprintf("breakpoints enabled\n");
180}
181
182static void
183dt_proc_bpdisable(dt_proc_t *dpr)
184{
185	dt_bkpt_t *dbp;
186
187	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
188
189	for (dbp = dt_list_next(&dpr->dpr_bps);
190	    dbp != NULL; dbp = dt_list_next(dbp)) {
191		if (dbp->dbp_active && Pdelbkpt(dpr->dpr_proc,
192		    dbp->dbp_addr, dbp->dbp_instr) == 0)
193			dbp->dbp_active = B_FALSE;
194	}
195
196	dt_dprintf("breakpoints disabled\n");
197}
198
199static void
200dt_proc_notify(dtrace_hdl_t *dtp, dt_proc_hash_t *dph, dt_proc_t *dpr,
201    const char *msg)
202{
203	dt_proc_notify_t *dprn = dt_alloc(dtp, sizeof (dt_proc_notify_t));
204
205	if (dprn == NULL) {
206		dt_dprintf("failed to allocate notification for %d %s\n",
207		    (int)dpr->dpr_pid, msg);
208	} else {
209		dprn->dprn_dpr = dpr;
210		if (msg == NULL)
211			dprn->dprn_errmsg[0] = '\0';
212		else
213			(void) strlcpy(dprn->dprn_errmsg, msg,
214			    sizeof (dprn->dprn_errmsg));
215
216		(void) pthread_mutex_lock(&dph->dph_lock);
217
218		dprn->dprn_next = dph->dph_notify;
219		dph->dph_notify = dprn;
220
221		(void) pthread_cond_broadcast(&dph->dph_cv);
222		(void) pthread_mutex_unlock(&dph->dph_lock);
223	}
224}
225
226/*
227 * Check to see if the control thread was requested to stop when the victim
228 * process reached a particular event (why) rather than continuing the victim.
229 * If 'why' is set in the stop mask, we wait on dpr_cv for dt_proc_continue().
230 * If 'why' is not set, this function returns immediately and does nothing.
231 */
232static void
233dt_proc_stop(dt_proc_t *dpr, uint8_t why)
234{
235	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
236	assert(why != DT_PROC_STOP_IDLE);
237
238	if (dpr->dpr_stop & why) {
239		dpr->dpr_stop |= DT_PROC_STOP_IDLE;
240		dpr->dpr_stop &= ~why;
241
242		(void) pthread_cond_broadcast(&dpr->dpr_cv);
243
244		/*
245		 * We disable breakpoints while stopped to preserve the
246		 * integrity of the program text for both our own disassembly
247		 * and that of the kernel.
248		 */
249		dt_proc_bpdisable(dpr);
250
251		while (dpr->dpr_stop & DT_PROC_STOP_IDLE)
252			(void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock);
253
254		dt_proc_bpenable(dpr);
255	}
256}
257
258/*ARGSUSED*/
259static void
260dt_proc_bpmain(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *fname)
261{
262	dt_dprintf("pid %d: breakpoint at %s()\n", (int)dpr->dpr_pid, fname);
263	dt_proc_stop(dpr, DT_PROC_STOP_MAIN);
264}
265
266static void
267dt_proc_rdevent(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *evname)
268{
269	rd_event_msg_t rdm;
270	rd_err_e err;
271
272	if ((err = rd_event_getmsg(dpr->dpr_rtld, &rdm)) != RD_OK) {
273		dt_dprintf("pid %d: failed to get %s event message: %s\n",
274		    (int)dpr->dpr_pid, evname, rd_errstr(err));
275		return;
276	}
277
278	dt_dprintf("pid %d: rtld event %s type=%d state %d\n",
279	    (int)dpr->dpr_pid, evname, rdm.type, rdm.u.state);
280
281	switch (rdm.type) {
282	case RD_DLACTIVITY:
283		if (rdm.u.state != RD_CONSISTENT)
284			break;
285
286		Pupdate_syms(dpr->dpr_proc);
287		if (dt_pid_create_probes_module(dtp, dpr) != 0)
288			dt_proc_notify(dtp, dtp->dt_procs, dpr,
289			    dpr->dpr_errmsg);
290
291		break;
292	case RD_PREINIT:
293		Pupdate_syms(dpr->dpr_proc);
294		dt_proc_stop(dpr, DT_PROC_STOP_PREINIT);
295		break;
296	case RD_POSTINIT:
297		Pupdate_syms(dpr->dpr_proc);
298		dt_proc_stop(dpr, DT_PROC_STOP_POSTINIT);
299		break;
300	}
301}
302
303static void
304dt_proc_rdwatch(dt_proc_t *dpr, rd_event_e event, const char *evname)
305{
306	rd_notify_t rdn;
307	rd_err_e err;
308
309	if ((err = rd_event_addr(dpr->dpr_rtld, event, &rdn)) != RD_OK) {
310		dt_dprintf("pid %d: failed to get event address for %s: %s\n",
311		    (int)dpr->dpr_pid, evname, rd_errstr(err));
312		return;
313	}
314
315	if (rdn.type != RD_NOTIFY_BPT) {
316		dt_dprintf("pid %d: event %s has unexpected type %d\n",
317		    (int)dpr->dpr_pid, evname, rdn.type);
318		return;
319	}
320
321	(void) dt_proc_bpcreate(dpr, rdn.u.bptaddr,
322	    (dt_bkpt_f *)dt_proc_rdevent, (void *)evname);
323}
324
325/*
326 * Common code for enabling events associated with the run-time linker after
327 * attaching to a process or after a victim process completes an exec(2).
328 */
329static void
330dt_proc_attach(dt_proc_t *dpr, int exec)
331{
332	const pstatus_t *psp = Pstatus(dpr->dpr_proc);
333	rd_err_e err;
334	GElf_Sym sym;
335
336	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
337
338	if (exec) {
339		if (psp->pr_lwp.pr_errno != 0)
340			return; /* exec failed: nothing needs to be done */
341
342		dt_proc_bpdestroy(dpr, B_FALSE);
343		Preset_maps(dpr->dpr_proc);
344	}
345
346	if ((dpr->dpr_rtld = Prd_agent(dpr->dpr_proc)) != NULL &&
347	    (err = rd_event_enable(dpr->dpr_rtld, B_TRUE)) == RD_OK) {
348		dt_proc_rdwatch(dpr, RD_PREINIT, "RD_PREINIT");
349		dt_proc_rdwatch(dpr, RD_POSTINIT, "RD_POSTINIT");
350		dt_proc_rdwatch(dpr, RD_DLACTIVITY, "RD_DLACTIVITY");
351	} else {
352		dt_dprintf("pid %d: failed to enable rtld events: %s\n",
353		    (int)dpr->dpr_pid, dpr->dpr_rtld ? rd_errstr(err) :
354		    "rtld_db agent initialization failed");
355	}
356
357	Pupdate_maps(dpr->dpr_proc);
358
359	if (Pxlookup_by_name(dpr->dpr_proc, LM_ID_BASE,
360	    "a.out", "main", &sym, NULL) == 0) {
361		(void) dt_proc_bpcreate(dpr, (uintptr_t)sym.st_value,
362		    (dt_bkpt_f *)dt_proc_bpmain, "a.out`main");
363	} else {
364		dt_dprintf("pid %d: failed to find a.out`main: %s\n",
365		    (int)dpr->dpr_pid, strerror(errno));
366	}
367}
368
369/*
370 * Wait for a stopped process to be set running again by some other debugger.
371 * This is typically not required by /proc-based debuggers, since the usual
372 * model is that one debugger controls one victim.  But DTrace, as usual, has
373 * its own needs: the stop() action assumes that prun(1) or some other tool
374 * will be applied to resume the victim process.  This could be solved by
375 * adding a PCWRUN directive to /proc, but that seems like overkill unless
376 * other debuggers end up needing this functionality, so we implement a cheap
377 * equivalent to PCWRUN using the set of existing kernel mechanisms.
378 *
379 * Our intent is really not just to wait for the victim to run, but rather to
380 * wait for it to run and then stop again for a reason other than the current
381 * PR_REQUESTED stop.  Since PCWSTOP/Pstopstatus() can be applied repeatedly
382 * to a stopped process and will return the same result without affecting the
383 * victim, we can just perform these operations repeatedly until Pstate()
384 * changes, the representative LWP ID changes, or the stop timestamp advances.
385 * dt_proc_control() will then rediscover the new state and continue as usual.
386 * When the process is still stopped in the same exact state, we sleep for a
387 * brief interval before waiting again so as not to spin consuming CPU cycles.
388 */
389static void
390dt_proc_waitrun(dt_proc_t *dpr)
391{
392	struct ps_prochandle *P = dpr->dpr_proc;
393	const lwpstatus_t *psp = &Pstatus(P)->pr_lwp;
394
395	int krflag = psp->pr_flags & (PR_KLC | PR_RLC);
396	timestruc_t tstamp = psp->pr_tstamp;
397	lwpid_t lwpid = psp->pr_lwpid;
398
399	const long wstop = PCWSTOP;
400	int pfd = Pctlfd(P);
401
402	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
403	assert(psp->pr_flags & PR_STOPPED);
404	assert(Pstate(P) == PS_STOP);
405
406	/*
407	 * While we are waiting for the victim to run, clear PR_KLC and PR_RLC
408	 * so that if the libdtrace client is killed, the victim stays stopped.
409	 * dt_proc_destroy() will also observe this and perform PRELEASE_HANG.
410	 */
411	(void) Punsetflags(P, krflag);
412	Psync(P);
413
414	(void) pthread_mutex_unlock(&dpr->dpr_lock);
415
416	while (!dpr->dpr_quit) {
417		if (write(pfd, &wstop, sizeof (wstop)) == -1 && errno == EINTR)
418			continue; /* check dpr_quit and continue waiting */
419
420		(void) pthread_mutex_lock(&dpr->dpr_lock);
421		(void) Pstopstatus(P, PCNULL, 0);
422		psp = &Pstatus(P)->pr_lwp;
423
424		/*
425		 * If we've reached a new state, found a new representative, or
426		 * the stop timestamp has changed, restore PR_KLC/PR_RLC to its
427		 * original setting and then return with dpr_lock held.
428		 */
429		if (Pstate(P) != PS_STOP || psp->pr_lwpid != lwpid ||
430		    bcmp(&psp->pr_tstamp, &tstamp, sizeof (tstamp)) != 0) {
431			(void) Psetflags(P, krflag);
432			Psync(P);
433			return;
434		}
435
436		(void) pthread_mutex_unlock(&dpr->dpr_lock);
437		(void) poll(NULL, 0, MILLISEC / 2);
438	}
439
440	(void) pthread_mutex_lock(&dpr->dpr_lock);
441}
442
443typedef struct dt_proc_control_data {
444	dtrace_hdl_t *dpcd_hdl;			/* DTrace handle */
445	dt_proc_t *dpcd_proc;			/* proccess to control */
446} dt_proc_control_data_t;
447
448/*
449 * Main loop for all victim process control threads.  We initialize all the
450 * appropriate /proc control mechanisms, and then enter a loop waiting for
451 * the process to stop on an event or die.  We process any events by calling
452 * appropriate subroutines, and exit when the victim dies or we lose control.
453 *
454 * The control thread synchronizes the use of dpr_proc with other libdtrace
455 * threads using dpr_lock.  We hold the lock for all of our operations except
456 * waiting while the process is running: this is accomplished by writing a
457 * PCWSTOP directive directly to the underlying /proc/<pid>/ctl file.  If the
458 * libdtrace client wishes to exit or abort our wait, SIGCANCEL can be used.
459 */
460static void *
461dt_proc_control(void *arg)
462{
463	dt_proc_control_data_t *datap = arg;
464	dtrace_hdl_t *dtp = datap->dpcd_hdl;
465	dt_proc_t *dpr = datap->dpcd_proc;
466	dt_proc_hash_t *dph = dpr->dpr_hdl->dt_procs;
467	struct ps_prochandle *P = dpr->dpr_proc;
468
469	int pfd = Pctlfd(P);
470	int pid = dpr->dpr_pid;
471
472	const long wstop = PCWSTOP;
473	int notify = B_FALSE;
474
475	/*
476	 * We disable the POSIX thread cancellation mechanism so that the
477	 * client program using libdtrace can't accidentally cancel our thread.
478	 * dt_proc_destroy() uses SIGCANCEL explicitly to simply poke us out
479	 * of PCWSTOP with EINTR, at which point we will see dpr_quit and exit.
480	 */
481	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
482
483	/*
484	 * Set up the corresponding process for tracing by libdtrace.  We want
485	 * to be able to catch breakpoints and efficiently single-step over
486	 * them, and we need to enable librtld_db to watch libdl activity.
487	 */
488	(void) pthread_mutex_lock(&dpr->dpr_lock);
489
490	(void) Punsetflags(P, PR_ASYNC);	/* require synchronous mode */
491	(void) Psetflags(P, PR_BPTADJ);		/* always adjust eip on x86 */
492	(void) Punsetflags(P, PR_FORK);		/* do not inherit on fork */
493
494	(void) Pfault(P, FLTBPT, B_TRUE);	/* always trace breakpoints */
495	(void) Pfault(P, FLTTRACE, B_TRUE);	/* always trace single-step */
496
497	/*
498	 * We must trace exit from exec() system calls so that if the exec is
499	 * successful, we can reset our breakpoints and re-initialize libproc.
500	 */
501	(void) Psysexit(P, SYS_exec, B_TRUE);
502	(void) Psysexit(P, SYS_execve, B_TRUE);
503
504	/*
505	 * We must trace entry and exit for fork() system calls in order to
506	 * disable our breakpoints temporarily during the fork.  We do not set
507	 * the PR_FORK flag, so if fork succeeds the child begins executing and
508	 * does not inherit any other tracing behaviors or a control thread.
509	 */
510	(void) Psysentry(P, SYS_vfork, B_TRUE);
511	(void) Psysexit(P, SYS_vfork, B_TRUE);
512	(void) Psysentry(P, SYS_fork1, B_TRUE);
513	(void) Psysexit(P, SYS_fork1, B_TRUE);
514	(void) Psysentry(P, SYS_forkall, B_TRUE);
515	(void) Psysexit(P, SYS_forkall, B_TRUE);
516	(void) Psysentry(P, SYS_forksys, B_TRUE);
517	(void) Psysexit(P, SYS_forksys, B_TRUE);
518
519	Psync(P);				/* enable all /proc changes */
520	dt_proc_attach(dpr, B_FALSE);		/* enable rtld breakpoints */
521
522	/*
523	 * If PR_KLC is set, we created the process; otherwise we grabbed it.
524	 * Check for an appropriate stop request and wait for dt_proc_continue.
525	 */
526	if (Pstatus(P)->pr_flags & PR_KLC)
527		dt_proc_stop(dpr, DT_PROC_STOP_CREATE);
528	else
529		dt_proc_stop(dpr, DT_PROC_STOP_GRAB);
530
531	if (Psetrun(P, 0, 0) == -1) {
532		dt_dprintf("pid %d: failed to set running: %s\n",
533		    (int)dpr->dpr_pid, strerror(errno));
534	}
535
536	(void) pthread_mutex_unlock(&dpr->dpr_lock);
537
538	/*
539	 * Wait for the process corresponding to this control thread to stop,
540	 * process the event, and then set it running again.  We want to sleep
541	 * with dpr_lock *unheld* so that other parts of libdtrace can use the
542	 * ps_prochandle in the meantime (e.g. ustack()).  To do this, we write
543	 * a PCWSTOP directive directly to the underlying /proc/<pid>/ctl file.
544	 * Once the process stops, we wake up, grab dpr_lock, and then call
545	 * Pwait() (which will return immediately) and do our processing.
546	 */
547	while (!dpr->dpr_quit) {
548		const lwpstatus_t *psp;
549
550		if (write(pfd, &wstop, sizeof (wstop)) == -1 && errno == EINTR)
551			continue; /* check dpr_quit and continue waiting */
552
553		(void) pthread_mutex_lock(&dpr->dpr_lock);
554pwait_locked:
555		if (Pstopstatus(P, PCNULL, 0) == -1 && errno == EINTR) {
556			(void) pthread_mutex_unlock(&dpr->dpr_lock);
557			continue; /* check dpr_quit and continue waiting */
558		}
559
560		switch (Pstate(P)) {
561		case PS_STOP:
562			psp = &Pstatus(P)->pr_lwp;
563
564			dt_dprintf("pid %d: proc stopped showing %d/%d\n",
565			    pid, psp->pr_why, psp->pr_what);
566
567			/*
568			 * If the process stops showing PR_REQUESTED, then the
569			 * DTrace stop() action was applied to it or another
570			 * debugging utility (e.g. pstop(1)) asked it to stop.
571			 * In either case, the user's intention is for the
572			 * process to remain stopped until another external
573			 * mechanism (e.g. prun(1)) is applied.  So instead of
574			 * setting the process running ourself, we wait for
575			 * someone else to do so.  Once that happens, we return
576			 * to our normal loop waiting for an event of interest.
577			 */
578			if (psp->pr_why == PR_REQUESTED) {
579				dt_proc_waitrun(dpr);
580				(void) pthread_mutex_unlock(&dpr->dpr_lock);
581				continue;
582			}
583
584			/*
585			 * If the process stops showing one of the events that
586			 * we are tracing, perform the appropriate response.
587			 * Note that we ignore PR_SUSPENDED, PR_CHECKPOINT, and
588			 * PR_JOBCONTROL by design: if one of these conditions
589			 * occurs, we will fall through to Psetrun() but the
590			 * process will remain stopped in the kernel by the
591			 * corresponding mechanism (e.g. job control stop).
592			 */
593			if (psp->pr_why == PR_FAULTED && psp->pr_what == FLTBPT)
594				dt_proc_bpmatch(dtp, dpr);
595			else if (psp->pr_why == PR_SYSENTRY &&
596			    IS_SYS_FORK(psp->pr_what))
597				dt_proc_bpdisable(dpr);
598			else if (psp->pr_why == PR_SYSEXIT &&
599			    IS_SYS_FORK(psp->pr_what))
600				dt_proc_bpenable(dpr);
601			else if (psp->pr_why == PR_SYSEXIT &&
602			    IS_SYS_EXEC(psp->pr_what))
603				dt_proc_attach(dpr, B_TRUE);
604			break;
605
606		case PS_LOST:
607			if (Preopen(P) == 0)
608				goto pwait_locked;
609
610			dt_dprintf("pid %d: proc lost: %s\n",
611			    pid, strerror(errno));
612
613			dpr->dpr_quit = B_TRUE;
614			notify = B_TRUE;
615			break;
616
617		case PS_UNDEAD:
618			dt_dprintf("pid %d: proc died\n", pid);
619			dpr->dpr_quit = B_TRUE;
620			notify = B_TRUE;
621			break;
622		}
623
624		if (Pstate(P) != PS_UNDEAD && Psetrun(P, 0, 0) == -1) {
625			dt_dprintf("pid %d: failed to set running: %s\n",
626			    (int)dpr->dpr_pid, strerror(errno));
627		}
628
629		(void) pthread_mutex_unlock(&dpr->dpr_lock);
630	}
631
632	/*
633	 * If the control thread detected PS_UNDEAD or PS_LOST, then enqueue
634	 * the dt_proc_t structure on the dt_proc_hash_t notification list.
635	 */
636	if (notify)
637		dt_proc_notify(dtp, dph, dpr, NULL);
638
639	/*
640	 * Destroy and remove any remaining breakpoints, set dpr_done and clear
641	 * dpr_tid to indicate the control thread has exited, and notify any
642	 * waiting thread in dt_proc_destroy() that we have succesfully exited.
643	 */
644	(void) pthread_mutex_lock(&dpr->dpr_lock);
645
646	dt_proc_bpdestroy(dpr, B_TRUE);
647	dpr->dpr_done = B_TRUE;
648	dpr->dpr_tid = 0;
649
650	(void) pthread_cond_broadcast(&dpr->dpr_cv);
651	(void) pthread_mutex_unlock(&dpr->dpr_lock);
652
653	return (NULL);
654}
655
656/*PRINTFLIKE3*/
657static struct ps_prochandle *
658dt_proc_error(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *format, ...)
659{
660	va_list ap;
661
662	va_start(ap, format);
663	dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap);
664	va_end(ap);
665
666	if (dpr->dpr_proc != NULL)
667		Prelease(dpr->dpr_proc, 0);
668
669	dt_free(dtp, dpr);
670	(void) dt_set_errno(dtp, EDT_COMPILER);
671	return (NULL);
672}
673
674dt_proc_t *
675dt_proc_lookup(dtrace_hdl_t *dtp, struct ps_prochandle *P, int remove)
676{
677	dt_proc_hash_t *dph = dtp->dt_procs;
678	pid_t pid = Pstatus(P)->pr_pid;
679	dt_proc_t *dpr, **dpp = &dph->dph_hash[pid & (dph->dph_hashlen - 1)];
680
681	for (dpr = *dpp; dpr != NULL; dpr = dpr->dpr_hash) {
682		if (dpr->dpr_pid == pid)
683			break;
684		else
685			dpp = &dpr->dpr_hash;
686	}
687
688	assert(dpr != NULL);
689	assert(dpr->dpr_proc == P);
690
691	if (remove)
692		*dpp = dpr->dpr_hash; /* remove from pid hash chain */
693
694	return (dpr);
695}
696
697static void
698dt_proc_destroy(dtrace_hdl_t *dtp, struct ps_prochandle *P)
699{
700	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
701	dt_proc_hash_t *dph = dtp->dt_procs;
702	dt_proc_notify_t *npr, **npp;
703	int rflag;
704
705	assert(dpr != NULL);
706
707	/*
708	 * If neither PR_KLC nor PR_RLC is set, then the process is stopped by
709	 * an external debugger and we were waiting in dt_proc_waitrun().
710	 * Leave the process in this condition using PRELEASE_HANG.
711	 */
712	if (!(Pstatus(dpr->dpr_proc)->pr_flags & (PR_KLC | PR_RLC))) {
713		dt_dprintf("abandoning pid %d\n", (int)dpr->dpr_pid);
714		rflag = PRELEASE_HANG;
715	} else {
716		dt_dprintf("releasing pid %d\n", (int)dpr->dpr_pid);
717		rflag = 0; /* apply kill or run-on-last-close */
718	}
719
720	if (dpr->dpr_tid) {
721		/*
722		 * Set the dpr_quit flag to tell the daemon thread to exit.  We
723		 * send it a SIGCANCEL to poke it out of PCWSTOP or any other
724		 * long-term /proc system call.  Our daemon threads have POSIX
725		 * cancellation disabled, so EINTR will be the only effect.  We
726		 * then wait for dpr_done to indicate the thread has exited.
727		 *
728		 * We can't use pthread_kill() to send SIGCANCEL because the
729		 * interface forbids it and we can't use pthread_cancel()
730		 * because with cancellation disabled it won't actually
731		 * send SIGCANCEL to the target thread, so we use _lwp_kill()
732		 * to do the job.  This is all built on evil knowledge of
733		 * the details of the cancellation mechanism in libc.
734		 */
735		(void) pthread_mutex_lock(&dpr->dpr_lock);
736		dpr->dpr_quit = B_TRUE;
737		(void) _lwp_kill(dpr->dpr_tid, SIGCANCEL);
738
739		/*
740		 * If the process is currently idling in dt_proc_stop(), re-
741		 * enable breakpoints and poke it into running again.
742		 */
743		if (dpr->dpr_stop & DT_PROC_STOP_IDLE) {
744			dt_proc_bpenable(dpr);
745			dpr->dpr_stop &= ~DT_PROC_STOP_IDLE;
746			(void) pthread_cond_broadcast(&dpr->dpr_cv);
747		}
748
749		while (!dpr->dpr_done)
750			(void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock);
751
752		(void) pthread_mutex_unlock(&dpr->dpr_lock);
753	}
754
755	/*
756	 * Before we free the process structure, remove this dt_proc_t from the
757	 * lookup hash, and then walk the dt_proc_hash_t's notification list
758	 * and remove this dt_proc_t if it is enqueued.
759	 */
760	(void) pthread_mutex_lock(&dph->dph_lock);
761	(void) dt_proc_lookup(dtp, P, B_TRUE);
762	npp = &dph->dph_notify;
763
764	while ((npr = *npp) != NULL) {
765		if (npr->dprn_dpr == dpr) {
766			*npp = npr->dprn_next;
767			dt_free(dtp, npr);
768		} else {
769			npp = &npr->dprn_next;
770		}
771	}
772
773	(void) pthread_mutex_unlock(&dph->dph_lock);
774
775	/*
776	 * Remove the dt_proc_list from the LRU list, release the underlying
777	 * libproc handle, and free our dt_proc_t data structure.
778	 */
779	if (dpr->dpr_cacheable) {
780		assert(dph->dph_lrucnt != 0);
781		dph->dph_lrucnt--;
782	}
783
784	dt_list_delete(&dph->dph_lrulist, dpr);
785	Prelease(dpr->dpr_proc, rflag);
786	dt_free(dtp, dpr);
787}
788
789static int
790dt_proc_create_thread(dtrace_hdl_t *dtp, dt_proc_t *dpr, uint_t stop)
791{
792	dt_proc_control_data_t data;
793	sigset_t nset, oset;
794	pthread_attr_t a;
795	int err;
796
797	(void) pthread_mutex_lock(&dpr->dpr_lock);
798	dpr->dpr_stop |= stop; /* set bit for initial rendezvous */
799
800	(void) pthread_attr_init(&a);
801	(void) pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED);
802
803	(void) sigfillset(&nset);
804	(void) sigdelset(&nset, SIGABRT);	/* unblocked for assert() */
805	(void) sigdelset(&nset, SIGCANCEL);	/* see dt_proc_destroy() */
806
807	data.dpcd_hdl = dtp;
808	data.dpcd_proc = dpr;
809
810	(void) pthread_sigmask(SIG_SETMASK, &nset, &oset);
811	err = pthread_create(&dpr->dpr_tid, &a, dt_proc_control, &data);
812	(void) pthread_sigmask(SIG_SETMASK, &oset, NULL);
813
814	/*
815	 * If the control thread was created, then wait on dpr_cv for either
816	 * dpr_done to be set (the victim died or the control thread failed)
817	 * or DT_PROC_STOP_IDLE to be set, indicating that the victim is now
818	 * stopped by /proc and the control thread is at the rendezvous event.
819	 * On success, we return with the process and control thread stopped:
820	 * the caller can then apply dt_proc_continue() to resume both.
821	 */
822	if (err == 0) {
823		while (!dpr->dpr_done && !(dpr->dpr_stop & DT_PROC_STOP_IDLE))
824			(void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock);
825
826		/*
827		 * If dpr_done is set, the control thread aborted before it
828		 * reached the rendezvous event.  This is either due to PS_LOST
829		 * or PS_UNDEAD (i.e. the process died).  We try to provide a
830		 * small amount of useful information to help figure it out.
831		 */
832		if (dpr->dpr_done) {
833			const psinfo_t *prp = Ppsinfo(dpr->dpr_proc);
834			int stat = prp ? prp->pr_wstat : 0;
835			int pid = dpr->dpr_pid;
836
837			if (Pstate(dpr->dpr_proc) == PS_LOST) {
838				(void) dt_proc_error(dpr->dpr_hdl, dpr,
839				    "failed to control pid %d: process exec'd "
840				    "set-id or unobservable program\n", pid);
841			} else if (WIFSIGNALED(stat)) {
842				(void) dt_proc_error(dpr->dpr_hdl, dpr,
843				    "failed to control pid %d: process died "
844				    "from signal %d\n", pid, WTERMSIG(stat));
845			} else {
846				(void) dt_proc_error(dpr->dpr_hdl, dpr,
847				    "failed to control pid %d: process exited "
848				    "with status %d\n", pid, WEXITSTATUS(stat));
849			}
850
851			err = ESRCH; /* cause grab() or create() to fail */
852		}
853	} else {
854		(void) dt_proc_error(dpr->dpr_hdl, dpr,
855		    "failed to create control thread for process-id %d: %s\n",
856		    (int)dpr->dpr_pid, strerror(err));
857	}
858
859	(void) pthread_mutex_unlock(&dpr->dpr_lock);
860	(void) pthread_attr_destroy(&a);
861
862	return (err);
863}
864
865struct ps_prochandle *
866dt_proc_create(dtrace_hdl_t *dtp, const char *file, char *const *argv)
867{
868	dt_proc_hash_t *dph = dtp->dt_procs;
869	dt_proc_t *dpr;
870	int err;
871
872	if ((dpr = dt_zalloc(dtp, sizeof (dt_proc_t))) == NULL)
873		return (NULL); /* errno is set for us */
874
875	(void) pthread_mutex_init(&dpr->dpr_lock, NULL);
876	(void) pthread_cond_init(&dpr->dpr_cv, NULL);
877
878	if ((dpr->dpr_proc = Pcreate(file, argv, &err, NULL, 0)) == NULL) {
879		return (dt_proc_error(dtp, dpr,
880		    "failed to execute %s: %s\n", file, Pcreate_error(err)));
881	}
882
883	dpr->dpr_hdl = dtp;
884	dpr->dpr_pid = Pstatus(dpr->dpr_proc)->pr_pid;
885
886	(void) Punsetflags(dpr->dpr_proc, PR_RLC);
887	(void) Psetflags(dpr->dpr_proc, PR_KLC);
888
889	if (dt_proc_create_thread(dtp, dpr, dtp->dt_prcmode) != 0)
890		return (NULL); /* dt_proc_error() has been called for us */
891
892	dpr->dpr_hash = dph->dph_hash[dpr->dpr_pid & (dph->dph_hashlen - 1)];
893	dph->dph_hash[dpr->dpr_pid & (dph->dph_hashlen - 1)] = dpr;
894	dt_list_prepend(&dph->dph_lrulist, dpr);
895
896	dt_dprintf("created pid %d\n", (int)dpr->dpr_pid);
897	dpr->dpr_refs++;
898
899	return (dpr->dpr_proc);
900}
901
902struct ps_prochandle *
903dt_proc_grab(dtrace_hdl_t *dtp, pid_t pid, int flags, int nomonitor)
904{
905	dt_proc_hash_t *dph = dtp->dt_procs;
906	uint_t h = pid & (dph->dph_hashlen - 1);
907	dt_proc_t *dpr, *opr;
908	int err;
909
910	/*
911	 * Search the hash table for the pid.  If it is already grabbed or
912	 * created, move the handle to the front of the lrulist, increment
913	 * the reference count, and return the existing ps_prochandle.
914	 */
915	for (dpr = dph->dph_hash[h]; dpr != NULL; dpr = dpr->dpr_hash) {
916		if (dpr->dpr_pid == pid && !dpr->dpr_stale) {
917			/*
918			 * If the cached handle was opened read-only and
919			 * this request is for a writeable handle, mark
920			 * the cached handle as stale and open a new handle.
921			 * Since it's stale, unmark it as cacheable.
922			 */
923			if (dpr->dpr_rdonly && !(flags & PGRAB_RDONLY)) {
924				dt_dprintf("upgrading pid %d\n", (int)pid);
925				dpr->dpr_stale = B_TRUE;
926				dpr->dpr_cacheable = B_FALSE;
927				dph->dph_lrucnt--;
928				break;
929			}
930
931			dt_dprintf("grabbed pid %d (cached)\n", (int)pid);
932			dt_list_delete(&dph->dph_lrulist, dpr);
933			dt_list_prepend(&dph->dph_lrulist, dpr);
934			dpr->dpr_refs++;
935			return (dpr->dpr_proc);
936		}
937	}
938
939	if ((dpr = dt_zalloc(dtp, sizeof (dt_proc_t))) == NULL)
940		return (NULL); /* errno is set for us */
941
942	(void) pthread_mutex_init(&dpr->dpr_lock, NULL);
943	(void) pthread_cond_init(&dpr->dpr_cv, NULL);
944
945	if ((dpr->dpr_proc = Pgrab(pid, flags, &err)) == NULL) {
946		return (dt_proc_error(dtp, dpr,
947		    "failed to grab pid %d: %s\n", (int)pid, Pgrab_error(err)));
948	}
949
950	dpr->dpr_hdl = dtp;
951	dpr->dpr_pid = pid;
952
953	(void) Punsetflags(dpr->dpr_proc, PR_KLC);
954	(void) Psetflags(dpr->dpr_proc, PR_RLC);
955
956	/*
957	 * If we are attempting to grab the process without a monitor
958	 * thread, then mark the process cacheable only if it's being
959	 * grabbed read-only.  If we're currently caching more process
960	 * handles than dph_lrulim permits, attempt to find the
961	 * least-recently-used handle that is currently unreferenced and
962	 * release it from the cache.  Otherwise we are grabbing the process
963	 * for control: create a control thread for this process and store
964	 * its ID in dpr->dpr_tid.
965	 */
966	if (nomonitor || (flags & PGRAB_RDONLY)) {
967		if (dph->dph_lrucnt >= dph->dph_lrulim) {
968			for (opr = dt_list_prev(&dph->dph_lrulist);
969			    opr != NULL; opr = dt_list_prev(opr)) {
970				if (opr->dpr_cacheable && opr->dpr_refs == 0) {
971					dt_proc_destroy(dtp, opr->dpr_proc);
972					break;
973				}
974			}
975		}
976
977		if (flags & PGRAB_RDONLY) {
978			dpr->dpr_cacheable = B_TRUE;
979			dpr->dpr_rdonly = B_TRUE;
980			dph->dph_lrucnt++;
981		}
982
983	} else if (dt_proc_create_thread(dtp, dpr, DT_PROC_STOP_GRAB) != 0)
984		return (NULL); /* dt_proc_error() has been called for us */
985
986	dpr->dpr_hash = dph->dph_hash[h];
987	dph->dph_hash[h] = dpr;
988	dt_list_prepend(&dph->dph_lrulist, dpr);
989
990	dt_dprintf("grabbed pid %d\n", (int)pid);
991	dpr->dpr_refs++;
992
993	return (dpr->dpr_proc);
994}
995
996void
997dt_proc_release(dtrace_hdl_t *dtp, struct ps_prochandle *P)
998{
999	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1000	dt_proc_hash_t *dph = dtp->dt_procs;
1001
1002	assert(dpr != NULL);
1003	assert(dpr->dpr_refs != 0);
1004
1005	if (--dpr->dpr_refs == 0 &&
1006	    (!dpr->dpr_cacheable || dph->dph_lrucnt > dph->dph_lrulim))
1007		dt_proc_destroy(dtp, P);
1008}
1009
1010void
1011dt_proc_continue(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1012{
1013	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1014
1015	(void) pthread_mutex_lock(&dpr->dpr_lock);
1016
1017	if (dpr->dpr_stop & DT_PROC_STOP_IDLE) {
1018		dpr->dpr_stop &= ~DT_PROC_STOP_IDLE;
1019		(void) pthread_cond_broadcast(&dpr->dpr_cv);
1020	}
1021
1022	(void) pthread_mutex_unlock(&dpr->dpr_lock);
1023}
1024
1025void
1026dt_proc_lock(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1027{
1028	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1029	int err = pthread_mutex_lock(&dpr->dpr_lock);
1030	assert(err == 0); /* check for recursion */
1031}
1032
1033void
1034dt_proc_unlock(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1035{
1036	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1037	int err = pthread_mutex_unlock(&dpr->dpr_lock);
1038	assert(err == 0); /* check for unheld lock */
1039}
1040
1041void
1042dt_proc_hash_create(dtrace_hdl_t *dtp)
1043{
1044	if ((dtp->dt_procs = dt_zalloc(dtp, sizeof (dt_proc_hash_t) +
1045	    sizeof (dt_proc_t *) * _dtrace_pidbuckets - 1)) != NULL) {
1046
1047		(void) pthread_mutex_init(&dtp->dt_procs->dph_lock, NULL);
1048		(void) pthread_cond_init(&dtp->dt_procs->dph_cv, NULL);
1049
1050		dtp->dt_procs->dph_hashlen = _dtrace_pidbuckets;
1051		dtp->dt_procs->dph_lrulim = _dtrace_pidlrulim;
1052	}
1053}
1054
1055void
1056dt_proc_hash_destroy(dtrace_hdl_t *dtp)
1057{
1058	dt_proc_hash_t *dph = dtp->dt_procs;
1059	dt_proc_t *dpr;
1060
1061	while ((dpr = dt_list_next(&dph->dph_lrulist)) != NULL)
1062		dt_proc_destroy(dtp, dpr->dpr_proc);
1063
1064	dtp->dt_procs = NULL;
1065	dt_free(dtp, dph);
1066}
1067
1068struct ps_prochandle *
1069dtrace_proc_create(dtrace_hdl_t *dtp, const char *file, char *const *argv)
1070{
1071	dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target");
1072	struct ps_prochandle *P = dt_proc_create(dtp, file, argv);
1073
1074	if (P != NULL && idp != NULL && idp->di_id == 0)
1075		idp->di_id = Pstatus(P)->pr_pid; /* $target = created pid */
1076
1077	return (P);
1078}
1079
1080struct ps_prochandle *
1081dtrace_proc_grab(dtrace_hdl_t *dtp, pid_t pid, int flags)
1082{
1083	dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target");
1084	struct ps_prochandle *P = dt_proc_grab(dtp, pid, flags, 0);
1085
1086	if (P != NULL && idp != NULL && idp->di_id == 0)
1087		idp->di_id = pid; /* $target = grabbed pid */
1088
1089	return (P);
1090}
1091
1092void
1093dtrace_proc_release(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1094{
1095	dt_proc_release(dtp, P);
1096}
1097
1098void
1099dtrace_proc_continue(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1100{
1101	dt_proc_continue(dtp, P);
1102}
1103