plockstat.c revision 297077
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#ifdef illumos
28#pragma ident	"%Z%%M%	%I%	%E% SMI"
29#endif
30
31#include <assert.h>
32#include <dtrace.h>
33#include <limits.h>
34#include <link.h>
35#include <priv.h>
36#include <signal.h>
37#include <stdlib.h>
38#include <stdarg.h>
39#include <stdio.h>
40#include <string.h>
41#include <strings.h>
42#include <errno.h>
43#include <sys/wait.h>
44#include <libgen.h>
45#include <libproc.h>
46#include <libproc_compat.h>
47
48static char *g_pname;
49static dtrace_hdl_t *g_dtp;
50struct ps_prochandle *g_pr;
51
52#define	E_SUCCESS	0
53#define	E_ERROR		1
54#define	E_USAGE		2
55
56/*
57 * For hold times we use a global associative array since for mutexes, in
58 * user-land, it's not invalid to release a sychonization primitive that
59 * another thread acquired; rwlocks require a thread-local associative array
60 * since multiple thread can hold the same lock for reading. Note that we
61 * ignore recursive mutex acquisitions and releases as they don't truly
62 * affect lock contention.
63 */
64static const char *g_hold_init =
65"plockstat$target:::rw-acquire\n"
66"{\n"
67"	self->rwhold[arg0] = timestamp;\n"
68"}\n"
69"plockstat$target:::mutex-acquire\n"
70"/arg1 == 0/\n"
71"{\n"
72"	mtxhold[arg0] = timestamp;\n"
73"}\n";
74
75static const char *g_hold_histogram =
76"plockstat$target:::rw-release\n"
77"/self->rwhold[arg0] && arg1 == 1/\n"
78"{\n"
79"	@rw_w_hold[arg0, ustack()] =\n"
80"	    quantize(timestamp - self->rwhold[arg0]);\n"
81"	self->rwhold[arg0] = 0;\n"
82"	rw_w_hold_found = 1;\n"
83"}\n"
84"plockstat$target:::rw-release\n"
85"/self->rwhold[arg0]/\n"
86"{\n"
87"	@rw_r_hold[arg0, ustack()] =\n"
88"	    quantize(timestamp - self->rwhold[arg0]);\n"
89"	self->rwhold[arg0] = 0;\n"
90"	rw_r_hold_found = 1;\n"
91"}\n"
92"plockstat$target:::mutex-release\n"
93"/mtxhold[arg0] && arg1 == 0/\n"
94"{\n"
95"	@mtx_hold[arg0, ustack()] = quantize(timestamp - mtxhold[arg0]);\n"
96"	mtxhold[arg0] = 0;\n"
97"	mtx_hold_found = 1;\n"
98"}\n"
99"\n"
100"END\n"
101"/mtx_hold_found/\n"
102"{\n"
103"	trace(\"Mutex hold\");\n"
104"	printa(@mtx_hold);\n"
105"}\n"
106"END\n"
107"/rw_r_hold_found/\n"
108"{\n"
109"	trace(\"R/W reader hold\");\n"
110"	printa(@rw_r_hold);\n"
111"}\n"
112"END\n"
113"/rw_w_hold_found/\n"
114"{\n"
115"	trace(\"R/W writer hold\");\n"
116"	printa(@rw_w_hold);\n"
117"}\n";
118
119static const char *g_hold_times =
120"plockstat$target:::rw-release\n"
121"/self->rwhold[arg0] && arg1 == 1/\n"
122"{\n"
123"	@rw_w_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
124"	@rw_w_hold_count[arg0, ustack(5)] = count();\n"
125"	self->rwhold[arg0] = 0;\n"
126"	rw_w_hold_found = 1;\n"
127"}\n"
128"plockstat$target:::rw-release\n"
129"/self->rwhold[arg0]/\n"
130"{\n"
131"	@rw_r_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
132"	@rw_r_hold_count[arg0, ustack(5)] = count();\n"
133"	self->rwhold[arg0] = 0;\n"
134"	rw_r_hold_found = 1;\n"
135"}\n"
136"plockstat$target:::mutex-release\n"
137"/mtxhold[arg0] && arg1 == 0/\n"
138"{\n"
139"	@mtx_hold[arg0, ustack(5)] = sum(timestamp - mtxhold[arg0]);\n"
140"	@mtx_hold_count[arg0, ustack(5)] = count();\n"
141"	mtxhold[arg0] = 0;\n"
142"	mtx_hold_found = 1;\n"
143"}\n"
144"\n"
145"END\n"
146"/mtx_hold_found/\n"
147"{\n"
148"	trace(\"Mutex hold\");\n"
149"	printa(@mtx_hold, @mtx_hold_count);\n"
150"}\n"
151"END\n"
152"/rw_r_hold_found/\n"
153"{\n"
154"	trace(\"R/W reader hold\");\n"
155"	printa(@rw_r_hold, @rw_r_hold_count);\n"
156"}\n"
157"END\n"
158"/rw_w_hold_found/\n"
159"{\n"
160"	trace(\"R/W writer hold\");\n"
161"	printa(@rw_w_hold, @rw_w_hold_count);\n"
162"}\n";
163
164
165/*
166 * For contention, we use thread-local associative arrays since we're tracing
167 * a single thread's activity in libc and multiple threads can be blocking or
168 * spinning on the same sychonization primitive.
169 */
170static const char *g_ctnd_init =
171"plockstat$target:::rw-block\n"
172"{\n"
173"	self->rwblock[arg0] = timestamp;\n"
174"}\n"
175"plockstat$target:::mutex-block\n"
176"{\n"
177"	self->mtxblock[arg0] = timestamp;\n"
178"}\n"
179"plockstat$target:::mutex-spin\n"
180"{\n"
181"	self->mtxspin[arg0] = timestamp;\n"
182"}\n";
183
184static const char *g_ctnd_histogram =
185"plockstat$target:::rw-blocked\n"
186"/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
187"{\n"
188"	@rw_w_block[arg0, ustack()] =\n"
189"	    quantize(timestamp - self->rwblock[arg0]);\n"
190"	self->rwblock[arg0] = 0;\n"
191"	rw_w_block_found = 1;\n"
192"}\n"
193"plockstat$target:::rw-blocked\n"
194"/self->rwblock[arg0] && arg2 != 0/\n"
195"{\n"
196"	@rw_r_block[arg0, ustack()] =\n"
197"	    quantize(timestamp - self->rwblock[arg0]);\n"
198"	self->rwblock[arg0] = 0;\n"
199"	rw_r_block_found = 1;\n"
200"}\n"
201"plockstat$target:::rw-blocked\n"
202"/self->rwblock[arg0]/\n"
203"{\n"
204"	self->rwblock[arg0] = 0;\n"
205"}\n"
206"plockstat$target:::mutex-spun\n"
207"/self->mtxspin[arg0] && arg1 != 0/\n"
208"{\n"
209"	@mtx_spin[arg0, ustack()] =\n"
210"	    quantize(timestamp - self->mtxspin[arg0]);\n"
211"	self->mtxspin[arg0] = 0;\n"
212"	mtx_spin_found = 1;\n"
213"}\n"
214"plockstat$target:::mutex-spun\n"
215"/self->mtxspin[arg0]/\n"
216"{\n"
217"	@mtx_vain_spin[arg0, ustack()] =\n"
218"	    quantize(timestamp - self->mtxspin[arg0]);\n"
219"	self->mtxspin[arg0] = 0;\n"
220"	mtx_vain_spin_found = 1;\n"
221"}\n"
222"plockstat$target:::mutex-blocked\n"
223"/self->mtxblock[arg0] && arg1 != 0/\n"
224"{\n"
225"	@mtx_block[arg0, ustack()] =\n"
226"	    quantize(timestamp - self->mtxblock[arg0]);\n"
227"	self->mtxblock[arg0] = 0;\n"
228"	mtx_block_found = 1;\n"
229"}\n"
230"plockstat$target:::mutex-blocked\n"
231"/self->mtxblock[arg0]/\n"
232"{\n"
233"	self->mtxblock[arg0] = 0;\n"
234"}\n"
235"\n"
236"END\n"
237"/mtx_block_found/\n"
238"{\n"
239"	trace(\"Mutex block\");\n"
240"	printa(@mtx_block);\n"
241"}\n"
242"END\n"
243"/mtx_spin_found/\n"
244"{\n"
245"	trace(\"Mutex spin\");\n"
246"	printa(@mtx_spin);\n"
247"}\n"
248"END\n"
249"/mtx_vain_spin_found/\n"
250"{\n"
251"	trace(\"Mutex unsuccessful spin\");\n"
252"	printa(@mtx_vain_spin);\n"
253"}\n"
254"END\n"
255"/rw_r_block_found/\n"
256"{\n"
257"	trace(\"R/W reader block\");\n"
258"	printa(@rw_r_block);\n"
259"}\n"
260"END\n"
261"/rw_w_block_found/\n"
262"{\n"
263"	trace(\"R/W writer block\");\n"
264"	printa(@rw_w_block);\n"
265"}\n";
266
267
268static const char *g_ctnd_times =
269"plockstat$target:::rw-blocked\n"
270"/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
271"{\n"
272"	@rw_w_block[arg0, ustack(5)] =\n"
273"	    sum(timestamp - self->rwblock[arg0]);\n"
274"	@rw_w_block_count[arg0, ustack(5)] = count();\n"
275"	self->rwblock[arg0] = 0;\n"
276"	rw_w_block_found = 1;\n"
277"}\n"
278"plockstat$target:::rw-blocked\n"
279"/self->rwblock[arg0] && arg2 != 0/\n"
280"{\n"
281"	@rw_r_block[arg0, ustack(5)] =\n"
282"	    sum(timestamp - self->rwblock[arg0]);\n"
283"	@rw_r_block_count[arg0, ustack(5)] = count();\n"
284"	self->rwblock[arg0] = 0;\n"
285"	rw_r_block_found = 1;\n"
286"}\n"
287"plockstat$target:::rw-blocked\n"
288"/self->rwblock[arg0]/\n"
289"{\n"
290"	self->rwblock[arg0] = 0;\n"
291"}\n"
292"plockstat$target:::mutex-spun\n"
293"/self->mtxspin[arg0] && arg1 != 0/\n"
294"{\n"
295"	@mtx_spin[arg0, ustack(5)] =\n"
296"	    sum(timestamp - self->mtxspin[arg0]);\n"
297"	@mtx_spin_count[arg0, ustack(5)] = count();\n"
298"	self->mtxspin[arg0] = 0;\n"
299"	mtx_spin_found = 1;\n"
300"}\n"
301"plockstat$target:::mutex-spun\n"
302"/self->mtxspin[arg0]/\n"
303"{\n"
304"	@mtx_vain_spin[arg0, ustack(5)] =\n"
305"	    sum(timestamp - self->mtxspin[arg0]);\n"
306"	@mtx_vain_spin_count[arg0, ustack(5)] = count();\n"
307"	self->mtxspin[arg0] = 0;\n"
308"	mtx_vain_spin_found = 1;\n"
309"}\n"
310"plockstat$target:::mutex-blocked\n"
311"/self->mtxblock[arg0] && arg1 != 0/\n"
312"{\n"
313"	@mtx_block[arg0, ustack(5)] =\n"
314"	    sum(timestamp - self->mtxblock[arg0]);\n"
315"	@mtx_block_count[arg0, ustack(5)] = count();\n"
316"	self->mtxblock[arg0] = 0;\n"
317"	mtx_block_found = 1;\n"
318"}\n"
319"plockstat$target:::mutex-blocked\n"
320"/self->mtxblock[arg0]/\n"
321"{\n"
322"	self->mtxblock[arg0] = 0;\n"
323"}\n"
324"\n"
325"END\n"
326"/mtx_block_found/\n"
327"{\n"
328"	trace(\"Mutex block\");\n"
329"	printa(@mtx_block, @mtx_block_count);\n"
330"}\n"
331"END\n"
332"/mtx_spin_found/\n"
333"{\n"
334"	trace(\"Mutex spin\");\n"
335"	printa(@mtx_spin, @mtx_spin_count);\n"
336"}\n"
337"END\n"
338"/mtx_vain_spin_found/\n"
339"{\n"
340"	trace(\"Mutex unsuccessful spin\");\n"
341"	printa(@mtx_vain_spin, @mtx_vain_spin_count);\n"
342"}\n"
343"END\n"
344"/rw_r_block_found/\n"
345"{\n"
346"	trace(\"R/W reader block\");\n"
347"	printa(@rw_r_block, @rw_r_block_count);\n"
348"}\n"
349"END\n"
350"/rw_w_block_found/\n"
351"{\n"
352"	trace(\"R/W writer block\");\n"
353"	printa(@rw_w_block, @rw_w_block_count);\n"
354"}\n";
355
356static char g_prog[4096];
357static size_t g_proglen;
358static int g_opt_V, g_opt_s;
359static int g_intr;
360static int g_exited;
361static dtrace_optval_t g_nframes;
362static ulong_t g_nent = ULONG_MAX;
363
364#define	PLOCKSTAT_OPTSTR	"n:ps:e:vx:ACHV"
365
366static void
367usage(void)
368{
369	(void) fprintf(stderr, "Usage:\n"
370	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
371	    "\t    command [arg...]\n"
372	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
373	    "\t    -p pid\n", g_pname, g_pname);
374
375	exit(E_USAGE);
376}
377
378static void
379verror(const char *fmt, va_list ap)
380{
381	int error = errno;
382
383	(void) fprintf(stderr, "%s: ", g_pname);
384	(void) vfprintf(stderr, fmt, ap);
385
386	if (fmt[strlen(fmt) - 1] != '\n')
387		(void) fprintf(stderr, ": %s\n", strerror(error));
388}
389
390/*PRINTFLIKE1*/
391static void
392fatal(const char *fmt, ...)
393{
394	va_list ap;
395
396	va_start(ap, fmt);
397	verror(fmt, ap);
398	va_end(ap);
399
400	if (g_pr != NULL && g_dtp != NULL)
401		dtrace_proc_release(g_dtp, g_pr);
402
403	exit(E_ERROR);
404}
405
406/*PRINTFLIKE1*/
407static void
408dfatal(const char *fmt, ...)
409{
410	va_list ap;
411
412	va_start(ap, fmt);
413
414	(void) fprintf(stderr, "%s: ", g_pname);
415	if (fmt != NULL)
416		(void) vfprintf(stderr, fmt, ap);
417
418	va_end(ap);
419
420	if (fmt != NULL && fmt[strlen(fmt) - 1] != '\n') {
421		(void) fprintf(stderr, ": %s\n",
422		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
423	} else if (fmt == NULL) {
424		(void) fprintf(stderr, "%s\n",
425		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
426	}
427
428	if (g_pr != NULL) {
429		dtrace_proc_continue(g_dtp, g_pr);
430		dtrace_proc_release(g_dtp, g_pr);
431	}
432
433	exit(E_ERROR);
434}
435
436/*PRINTFLIKE1*/
437static void
438notice(const char *fmt, ...)
439{
440	va_list ap;
441
442	va_start(ap, fmt);
443	verror(fmt, ap);
444	va_end(ap);
445}
446
447static void
448dprog_add(const char *prog)
449{
450	size_t len = strlen(prog);
451	bcopy(prog, g_prog + g_proglen, len + 1);
452	g_proglen += len;
453	assert(g_proglen < sizeof (g_prog));
454}
455
456static void
457dprog_compile(void)
458{
459	dtrace_prog_t *prog;
460	dtrace_proginfo_t info;
461
462	if (g_opt_V) {
463		(void) fprintf(stderr, "%s: vvvv D program vvvv\n", g_pname);
464		(void) fputs(g_prog, stderr);
465		(void) fprintf(stderr, "%s: ^^^^ D program ^^^^\n", g_pname);
466	}
467
468	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
469	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
470		dfatal("failed to compile program");
471
472	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
473		dfatal("failed to enable probes");
474}
475
476void
477print_legend(void)
478{
479	(void) printf("%5s %8s %-28s %s\n", "Count", "nsec", "Lock", "Caller");
480}
481
482void
483print_bar(void)
484{
485	(void) printf("---------------------------------------"
486	    "----------------------------------------\n");
487}
488
489void
490print_histogram_header(void)
491{
492	(void) printf("\n%10s ---- Time Distribution --- %5s %s\n",
493	    "nsec", "count", "Stack");
494}
495
496/*
497 * Convert an address to a symbolic string or a numeric string. If nolocks
498 * is set, we return an error code if this symbol appears to be a mutex- or
499 * rwlock-related symbol in libc so the caller has a chance to find a more
500 * helpful symbol.
501 */
502static int
503getsym(struct ps_prochandle *P, uintptr_t addr, char *buf, size_t size,
504    int nolocks)
505{
506	char name[256];
507	GElf_Sym sym;
508#ifdef illumos
509	prsyminfo_t info;
510#else
511	prmap_t *map;
512	int info; /* XXX unused */
513#endif
514	size_t len;
515
516	if (P == NULL || Pxlookup_by_addr(P, addr, name, sizeof (name),
517	    &sym, &info) != 0) {
518		(void) snprintf(buf, size, "%#lx", addr);
519		return (0);
520	}
521#ifdef illumos
522	if (info.prs_object == NULL)
523		info.prs_object = "<unknown>";
524
525	if (info.prs_lmid != LM_ID_BASE) {
526		len = snprintf(buf, size, "LM%lu`", info.prs_lmid);
527		buf += len;
528		size -= len;
529	}
530
531	len = snprintf(buf, size, "%s`%s", info.prs_object, info.prs_name);
532#else
533	map = proc_addr2map(P, addr);
534	len = snprintf(buf, size, "%s`%s", map->pr_mapname, name);
535#endif
536	buf += len;
537	size -= len;
538
539	if (sym.st_value != addr)
540		len = snprintf(buf, size, "+%#lx", addr - sym.st_value);
541
542	if (nolocks && strcmp("libc.so.1", map->pr_mapname) == 0 &&
543	    (strstr("mutex", name) == 0 ||
544	    strstr("rw", name) == 0))
545		return (-1);
546
547	return (0);
548}
549
550/*ARGSUSED*/
551static int
552process_aggregate(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
553{
554	const dtrace_recdesc_t *rec;
555	uintptr_t lock;
556	uint64_t *stack;
557	caddr_t data;
558	pid_t pid;
559	struct ps_prochandle *P;
560	char buf[256];
561	int i, j;
562	uint64_t sum, count, avg;
563
564	if ((*(uint_t *)arg)++ >= g_nent)
565		return (DTRACE_AGGWALK_NEXT);
566
567	rec = aggsdata[0]->dtada_desc->dtagd_rec;
568	data = aggsdata[0]->dtada_data;
569
570	/*LINTED - alignment*/
571	lock = (uintptr_t)*(uint64_t *)(data + rec[1].dtrd_offset);
572	/*LINTED - alignment*/
573	stack = (uint64_t *)(data + rec[2].dtrd_offset);
574
575	if (!g_opt_s) {
576		/*LINTED - alignment*/
577		sum = *(uint64_t *)(aggsdata[1]->dtada_data +
578		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
579		/*LINTED - alignment*/
580		count = *(uint64_t *)(aggsdata[2]->dtada_data +
581		    aggsdata[2]->dtada_desc->dtagd_rec[3].dtrd_offset);
582	} else {
583		uint64_t *a;
584
585		/*LINTED - alignment*/
586		a = (uint64_t *)(aggsdata[1]->dtada_data +
587		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
588
589		print_bar();
590		print_legend();
591
592		for (count = sum = 0, i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
593		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++) {
594			count += a[i];
595			sum += a[i] << (j - 64);
596		}
597	}
598
599	avg = sum / count;
600	(void) printf("%5llu %8llu ", (u_longlong_t)count, (u_longlong_t)avg);
601
602	pid = stack[0];
603	P = dtrace_proc_grab(g_dtp, pid, PGRAB_RDONLY);
604
605	(void) getsym(P, lock, buf, sizeof (buf), 0);
606	(void) printf("%-28s ", buf);
607
608	for (i = 2; i <= 5; i++) {
609		if (getsym(P, stack[i], buf, sizeof (buf), 1) == 0)
610			break;
611	}
612	(void) printf("%s\n", buf);
613
614	if (g_opt_s) {
615		int stack_done = 0;
616		int quant_done = 0;
617		int first_bin, last_bin;
618		uint64_t bin_size, *a;
619
620		/*LINTED - alignment*/
621		a = (uint64_t *)(aggsdata[1]->dtada_data +
622		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
623
624		print_histogram_header();
625
626		for (first_bin = DTRACE_QUANTIZE_ZEROBUCKET;
627		    a[first_bin] == 0; first_bin++)
628			continue;
629		for (last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 63;
630		    a[last_bin] == 0; last_bin--)
631			continue;
632
633		for (i = 0; !stack_done || !quant_done; i++) {
634			if (!stack_done) {
635				(void) getsym(P, stack[i + 2], buf,
636				    sizeof (buf), 0);
637			} else {
638				buf[0] = '\0';
639			}
640
641			if (!quant_done) {
642				bin_size = a[first_bin];
643
644				(void) printf("%10llu |%-24.*s| %5llu %s\n",
645				    1ULL <<
646				    (first_bin - DTRACE_QUANTIZE_ZEROBUCKET),
647				    (int)(24.0 * bin_size / count),
648				    "@@@@@@@@@@@@@@@@@@@@@@@@@@",
649				    (u_longlong_t)bin_size, buf);
650			} else {
651				(void) printf("%43s %s\n", "", buf);
652			}
653
654			if (i + 1 >= g_nframes || stack[i + 3] == 0)
655				stack_done = 1;
656
657			if (first_bin++ == last_bin)
658				quant_done = 1;
659		}
660	}
661
662	dtrace_proc_release(g_dtp, P);
663
664	return (DTRACE_AGGWALK_NEXT);
665}
666
667/*ARGSUSED*/
668static void
669prochandler(struct ps_prochandle *P, const char *msg, void *arg)
670{
671#ifdef illumos
672	const psinfo_t *prp = Ppsinfo(P);
673	int pid = Pstatus(P)->pr_pid;
674#else
675	int pid = proc_getpid(P);
676	int wstat = proc_getwstat(P);
677#endif
678	char name[SIG2STR_MAX];
679
680	if (msg != NULL) {
681		notice("pid %d: %s\n", pid, msg);
682		return;
683	}
684
685	switch (Pstate(P)) {
686	case PS_UNDEAD:
687		/*
688		 * Ideally we would like to always report pr_wstat here, but it
689		 * isn't possible given current /proc semantics.  If we grabbed
690		 * the process, Ppsinfo() will either fail or return a zeroed
691		 * psinfo_t depending on how far the parent is in reaping it.
692		 * When /proc provides a stable pr_wstat in the status file,
693		 * this code can be improved by examining this new pr_wstat.
694		 */
695		if (WIFSIGNALED(wstat)) {
696			notice("pid %d terminated by %s\n", pid,
697			    proc_signame(WTERMSIG(wstat),
698			    name, sizeof (name)));
699		} else if (WEXITSTATUS(wstat) != 0) {
700			notice("pid %d exited with status %d\n",
701			    pid, WEXITSTATUS(wstat));
702		} else {
703			notice("pid %d has exited\n", pid);
704		}
705		g_exited = 1;
706		break;
707
708	case PS_LOST:
709		notice("pid %d exec'd a set-id or unobservable program\n", pid);
710		g_exited = 1;
711		break;
712	}
713}
714
715/*ARGSUSED*/
716static int
717chewrec(const dtrace_probedata_t *data, const dtrace_recdesc_t *rec, void *arg)
718{
719	dtrace_eprobedesc_t *epd = data->dtpda_edesc;
720	dtrace_aggvarid_t aggvars[2];
721	const void *buf;
722	int i, nagv;
723
724	/*
725	 * A NULL rec indicates that we've processed the last record.
726	 */
727	if (rec == NULL)
728		return (DTRACE_CONSUME_NEXT);
729
730	buf = data->dtpda_data - rec->dtrd_offset;
731
732	switch (rec->dtrd_action) {
733	case DTRACEACT_DIFEXPR:
734		(void) printf("\n%s\n\n", (char *)buf + rec->dtrd_offset);
735		if (!g_opt_s) {
736			print_legend();
737			print_bar();
738		}
739		return (DTRACE_CONSUME_NEXT);
740
741	case DTRACEACT_PRINTA:
742		for (nagv = 0, i = 0; i < epd->dtepd_nrecs - 1; i++) {
743			const dtrace_recdesc_t *nrec = &rec[i];
744
745			if (nrec->dtrd_uarg != rec->dtrd_uarg)
746				break;
747
748			/*LINTED - alignment*/
749			aggvars[nagv++] = *(dtrace_aggvarid_t *)((caddr_t)buf +
750			    nrec->dtrd_offset);
751		}
752
753		if (nagv == (g_opt_s ? 1 : 2)) {
754			uint_t nent = 0;
755			if (dtrace_aggregate_walk_joined(g_dtp, aggvars, nagv,
756			    process_aggregate, &nent) != 0)
757				dfatal("failed to walk aggregate");
758		}
759
760		return (DTRACE_CONSUME_NEXT);
761	}
762
763	return (DTRACE_CONSUME_THIS);
764}
765
766/*ARGSUSED*/
767static void
768intr(int signo)
769{
770	g_intr = 1;
771}
772
773int
774main(int argc, char **argv)
775{
776#ifdef illumos
777	ucred_t *ucp;
778#endif
779	int err;
780	int opt_C = 0, opt_H = 0, opt_p = 0, opt_v = 0;
781	int c;
782	char *p, *end;
783	struct sigaction act;
784	int done = 0;
785
786	g_pname = basename(argv[0]);
787	argv[0] = g_pname; /* rewrite argv[0] for getopt errors */
788#ifdef illumos
789	/*
790	 * Make sure we have the required dtrace_proc privilege.
791	 */
792	if ((ucp = ucred_get(getpid())) != NULL) {
793		const priv_set_t *psp;
794		if ((psp = ucred_getprivset(ucp, PRIV_EFFECTIVE)) != NULL &&
795		    !priv_ismember(psp, PRIV_DTRACE_PROC)) {
796			fatal("dtrace_proc privilege required\n");
797		}
798
799		ucred_free(ucp);
800	}
801#endif
802
803	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
804		switch (c) {
805		case 'n':
806			errno = 0;
807			g_nent = strtoul(optarg, &end, 10);
808			if (*end != '\0' || errno != 0) {
809				(void) fprintf(stderr, "%s: invalid count "
810				    "'%s'\n", g_pname, optarg);
811				usage();
812			}
813			break;
814
815		case 'p':
816			opt_p = 1;
817			break;
818
819		case 'v':
820			opt_v = 1;
821			break;
822
823		case 'A':
824			opt_C = opt_H = 1;
825			break;
826
827		case 'C':
828			opt_C = 1;
829			break;
830
831		case 'H':
832			opt_H = 1;
833			break;
834
835		case 'V':
836			g_opt_V = 1;
837			break;
838
839		default:
840			if (strchr(PLOCKSTAT_OPTSTR, c) == NULL)
841				usage();
842		}
843	}
844
845	/*
846	 * We need a command or at least one pid.
847	 */
848	if (argc == optind)
849		usage();
850
851	if (opt_C == 0 && opt_H == 0)
852		opt_C = 1;
853
854	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL)
855		fatal("failed to initialize dtrace: %s\n",
856		    dtrace_errmsg(NULL, err));
857
858	/*
859	 * The longest string we trace is 23 bytes long -- so 32 is plenty.
860	 */
861	if (dtrace_setopt(g_dtp, "strsize", "32") == -1)
862		dfatal("failed to set 'strsize'");
863
864	/*
865	 * 1k should be more than enough for all trace() and printa() actions.
866	 */
867	if (dtrace_setopt(g_dtp, "bufsize", "1k") == -1)
868		dfatal("failed to set 'bufsize'");
869
870	/*
871	 * The table we produce has the hottest locks at the top.
872	 */
873	if (dtrace_setopt(g_dtp, "aggsortrev", NULL) == -1)
874		dfatal("failed to set 'aggsortrev'");
875
876	/*
877	 * These are two reasonable defaults which should suffice.
878	 */
879	if (dtrace_setopt(g_dtp, "aggsize", "256k") == -1)
880		dfatal("failed to set 'aggsize'");
881	if (dtrace_setopt(g_dtp, "aggrate", "1sec") == -1)
882		dfatal("failed to set 'aggrate'");
883
884	/*
885	 * Take a second pass through to look for options that set options now
886	 * that we have an open dtrace handle.
887	 */
888	optind = 1;
889	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
890		switch (c) {
891		case 's':
892			g_opt_s = 1;
893			if (dtrace_setopt(g_dtp, "ustackframes", optarg) == -1)
894				dfatal("failed to set 'ustackframes'");
895			break;
896
897		case 'x':
898			if ((p = strchr(optarg, '=')) != NULL)
899				*p++ = '\0';
900
901			if (dtrace_setopt(g_dtp, optarg, p) != 0)
902				dfatal("failed to set -x %s", optarg);
903			break;
904
905		case 'e':
906			errno = 0;
907			(void) strtoul(optarg, &end, 10);
908			if (*optarg == '-' || *end != '\0' || errno != 0) {
909				(void) fprintf(stderr, "%s: invalid timeout "
910				    "'%s'\n", g_pname, optarg);
911				usage();
912			}
913
914			/*
915			 * Construct a DTrace enabling that will exit after
916			 * the specified number of seconds.
917			 */
918			dprog_add("BEGIN\n{\n\tend = timestamp + ");
919			dprog_add(optarg);
920			dprog_add(" * 1000000000;\n}\n");
921			dprog_add("tick-10hz\n/timestamp >= end/\n");
922			dprog_add("{\n\texit(0);\n}\n");
923			break;
924		}
925	}
926
927	argc -= optind;
928	argv += optind;
929
930	if (opt_H) {
931		dprog_add(g_hold_init);
932		if (!g_opt_s)
933			dprog_add(g_hold_times);
934		else
935			dprog_add(g_hold_histogram);
936	}
937
938	if (opt_C) {
939		dprog_add(g_ctnd_init);
940		if (!g_opt_s)
941			dprog_add(g_ctnd_times);
942		else
943			dprog_add(g_ctnd_histogram);
944	}
945
946	if (opt_p) {
947		ulong_t pid;
948
949		if (argc > 1) {
950			(void) fprintf(stderr, "%s: only one pid is allowed\n",
951			    g_pname);
952			usage();
953		}
954
955		errno = 0;
956		pid = strtoul(argv[0], &end, 10);
957		if (*end != '\0' || errno != 0 || (pid_t)pid != pid) {
958			(void) fprintf(stderr, "%s: invalid pid '%s'\n",
959			    g_pname, argv[0]);
960			usage();
961		}
962
963		if ((g_pr = dtrace_proc_grab(g_dtp, (pid_t)pid, 0)) == NULL)
964			dfatal(NULL);
965	} else {
966		if ((g_pr = dtrace_proc_create(g_dtp, argv[0], argv, NULL, NULL)) == NULL)
967			dfatal(NULL);
968	}
969
970	dprog_compile();
971
972	if (dtrace_handle_proc(g_dtp, &prochandler, NULL) == -1)
973		dfatal("failed to establish proc handler");
974
975	(void) sigemptyset(&act.sa_mask);
976	act.sa_flags = 0;
977	act.sa_handler = intr;
978	(void) sigaction(SIGINT, &act, NULL);
979	(void) sigaction(SIGTERM, &act, NULL);
980
981	if (dtrace_go(g_dtp) != 0)
982		dfatal("dtrace_go()");
983
984	if (dtrace_getopt(g_dtp, "ustackframes", &g_nframes) != 0)
985		dfatal("failed to get 'ustackframes'");
986
987	dtrace_proc_continue(g_dtp, g_pr);
988
989	if (opt_v)
990		(void) printf("%s: tracing enabled for pid %d\n", g_pname,
991#ifdef illumos
992		    (int)Pstatus(g_pr)->pr_pid);
993#else
994		    (int)proc_getpid(g_pr));
995#endif
996
997	do {
998		if (!g_intr && !done)
999			dtrace_sleep(g_dtp);
1000
1001		if (done || g_intr || g_exited) {
1002			done = 1;
1003			if (dtrace_stop(g_dtp) == -1)
1004				dfatal("couldn't stop tracing");
1005		}
1006
1007		switch (dtrace_work(g_dtp, stdout, NULL, chewrec, NULL)) {
1008		case DTRACE_WORKSTATUS_DONE:
1009			done = 1;
1010			break;
1011		case DTRACE_WORKSTATUS_OKAY:
1012			break;
1013		default:
1014			dfatal("processing aborted");
1015		}
1016
1017	} while (!done);
1018
1019	dtrace_close(g_dtp);
1020
1021	return (0);
1022}
1023