1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#if defined(sun)
28#pragma ident	"%Z%%M%	%I%	%E% SMI"
29#endif
30
31#include <assert.h>
32#include <dtrace.h>
33#include <limits.h>
34#include <link.h>
35#include <priv.h>
36#include <signal.h>
37#include <stdlib.h>
38#include <stdarg.h>
39#include <stdio.h>
40#include <string.h>
41#include <strings.h>
42#include <errno.h>
43#include <sys/wait.h>
44#include <libgen.h>
45#include <libproc.h>
46#include <libproc_compat.h>
47
48static char *g_pname;
49static dtrace_hdl_t *g_dtp;
50struct ps_prochandle *g_pr;
51
52#define	E_SUCCESS	0
53#define	E_ERROR		1
54#define	E_USAGE		2
55
56/*
57 * For hold times we use a global associative array since for mutexes, in
58 * user-land, it's not invalid to release a sychonization primitive that
59 * another thread acquired; rwlocks require a thread-local associative array
60 * since multiple thread can hold the same lock for reading. Note that we
61 * ignore recursive mutex acquisitions and releases as they don't truly
62 * affect lock contention.
63 */
64static const char *g_hold_init =
65"plockstat$target:::rw-acquire\n"
66"{\n"
67"	self->rwhold[arg0] = timestamp;\n"
68"}\n"
69"plockstat$target:::mutex-acquire\n"
70"/arg1 == 0/\n"
71"{\n"
72"	mtxhold[arg0] = timestamp;\n"
73"}\n";
74
75static const char *g_hold_histogram =
76"plockstat$target:::rw-release\n"
77"/self->rwhold[arg0] && arg1 == 1/\n"
78"{\n"
79"	@rw_w_hold[arg0, ustack()] =\n"
80"	    quantize(timestamp - self->rwhold[arg0]);\n"
81"	self->rwhold[arg0] = 0;\n"
82"	rw_w_hold_found = 1;\n"
83"}\n"
84"plockstat$target:::rw-release\n"
85"/self->rwhold[arg0]/\n"
86"{\n"
87"	@rw_r_hold[arg0, ustack()] =\n"
88"	    quantize(timestamp - self->rwhold[arg0]);\n"
89"	self->rwhold[arg0] = 0;\n"
90"	rw_r_hold_found = 1;\n"
91"}\n"
92"plockstat$target:::mutex-release\n"
93"/mtxhold[arg0] && arg1 == 0/\n"
94"{\n"
95"	@mtx_hold[arg0, ustack()] = quantize(timestamp - mtxhold[arg0]);\n"
96"	mtxhold[arg0] = 0;\n"
97"	mtx_hold_found = 1;\n"
98"}\n"
99"\n"
100"END\n"
101"/mtx_hold_found/\n"
102"{\n"
103"	trace(\"Mutex hold\");\n"
104"	printa(@mtx_hold);\n"
105"}\n"
106"END\n"
107"/rw_r_hold_found/\n"
108"{\n"
109"	trace(\"R/W reader hold\");\n"
110"	printa(@rw_r_hold);\n"
111"}\n"
112"END\n"
113"/rw_w_hold_found/\n"
114"{\n"
115"	trace(\"R/W writer hold\");\n"
116"	printa(@rw_w_hold);\n"
117"}\n";
118
119static const char *g_hold_times =
120"plockstat$target:::rw-release\n"
121"/self->rwhold[arg0] && arg1 == 1/\n"
122"{\n"
123"	@rw_w_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
124"	@rw_w_hold_count[arg0, ustack(5)] = count();\n"
125"	self->rwhold[arg0] = 0;\n"
126"	rw_w_hold_found = 1;\n"
127"}\n"
128"plockstat$target:::rw-release\n"
129"/self->rwhold[arg0]/\n"
130"{\n"
131"	@rw_r_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
132"	@rw_r_hold_count[arg0, ustack(5)] = count();\n"
133"	self->rwhold[arg0] = 0;\n"
134"	rw_r_hold_found = 1;\n"
135"}\n"
136"plockstat$target:::mutex-release\n"
137"/mtxhold[arg0] && arg1 == 0/\n"
138"{\n"
139"	@mtx_hold[arg0, ustack(5)] = sum(timestamp - mtxhold[arg0]);\n"
140"	@mtx_hold_count[arg0, ustack(5)] = count();\n"
141"	mtxhold[arg0] = 0;\n"
142"	mtx_hold_found = 1;\n"
143"}\n"
144"\n"
145"END\n"
146"/mtx_hold_found/\n"
147"{\n"
148"	trace(\"Mutex hold\");\n"
149"	printa(@mtx_hold, @mtx_hold_count);\n"
150"}\n"
151"END\n"
152"/rw_r_hold_found/\n"
153"{\n"
154"	trace(\"R/W reader hold\");\n"
155"	printa(@rw_r_hold, @rw_r_hold_count);\n"
156"}\n"
157"END\n"
158"/rw_w_hold_found/\n"
159"{\n"
160"	trace(\"R/W writer hold\");\n"
161"	printa(@rw_w_hold, @rw_w_hold_count);\n"
162"}\n";
163
164
165/*
166 * For contention, we use thread-local associative arrays since we're tracing
167 * a single thread's activity in libc and multiple threads can be blocking or
168 * spinning on the same sychonization primitive.
169 */
170static const char *g_ctnd_init =
171"plockstat$target:::rw-block\n"
172"{\n"
173"	self->rwblock[arg0] = timestamp;\n"
174"}\n"
175"plockstat$target:::mutex-block\n"
176"{\n"
177"	self->mtxblock[arg0] = timestamp;\n"
178"}\n"
179"plockstat$target:::mutex-spin\n"
180"{\n"
181"	self->mtxspin[arg0] = timestamp;\n"
182"}\n";
183
184static const char *g_ctnd_histogram =
185"plockstat$target:::rw-blocked\n"
186"/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
187"{\n"
188"	@rw_w_block[arg0, ustack()] =\n"
189"	    quantize(timestamp - self->rwblock[arg0]);\n"
190"	self->rwblock[arg0] = 0;\n"
191"	rw_w_block_found = 1;\n"
192"}\n"
193"plockstat$target:::rw-blocked\n"
194"/self->rwblock[arg0] && arg2 != 0/\n"
195"{\n"
196"	@rw_r_block[arg0, ustack()] =\n"
197"	    quantize(timestamp - self->rwblock[arg0]);\n"
198"	self->rwblock[arg0] = 0;\n"
199"	rw_r_block_found = 1;\n"
200"}\n"
201"plockstat$target:::rw-blocked\n"
202"/self->rwblock[arg0]/\n"
203"{\n"
204"	self->rwblock[arg0] = 0;\n"
205"}\n"
206"plockstat$target:::mutex-spun\n"
207"/self->mtxspin[arg0] && arg1 != 0/\n"
208"{\n"
209"	@mtx_spin[arg0, ustack()] =\n"
210"	    quantize(timestamp - self->mtxspin[arg0]);\n"
211"	self->mtxspin[arg0] = 0;\n"
212"	mtx_spin_found = 1;\n"
213"}\n"
214"plockstat$target:::mutex-spun\n"
215"/self->mtxspin[arg0]/\n"
216"{\n"
217"	@mtx_vain_spin[arg0, ustack()] =\n"
218"	    quantize(timestamp - self->mtxspin[arg0]);\n"
219"	self->mtxspin[arg0] = 0;\n"
220"	mtx_vain_spin_found = 1;\n"
221"}\n"
222"plockstat$target:::mutex-blocked\n"
223"/self->mtxblock[arg0] && arg1 != 0/\n"
224"{\n"
225"	@mtx_block[arg0, ustack()] =\n"
226"	    quantize(timestamp - self->mtxblock[arg0]);\n"
227"	self->mtxblock[arg0] = 0;\n"
228"	mtx_block_found = 1;\n"
229"}\n"
230"plockstat$target:::mutex-blocked\n"
231"/self->mtxblock[arg0]/\n"
232"{\n"
233"	self->mtxblock[arg0] = 0;\n"
234"}\n"
235"\n"
236"END\n"
237"/mtx_block_found/\n"
238"{\n"
239"	trace(\"Mutex block\");\n"
240"	printa(@mtx_block);\n"
241"}\n"
242"END\n"
243"/mtx_spin_found/\n"
244"{\n"
245"	trace(\"Mutex spin\");\n"
246"	printa(@mtx_spin);\n"
247"}\n"
248"END\n"
249"/mtx_vain_spin_found/\n"
250"{\n"
251"	trace(\"Mutex unsuccessful spin\");\n"
252"	printa(@mtx_vain_spin);\n"
253"}\n"
254"END\n"
255"/rw_r_block_found/\n"
256"{\n"
257"	trace(\"R/W reader block\");\n"
258"	printa(@rw_r_block);\n"
259"}\n"
260"END\n"
261"/rw_w_block_found/\n"
262"{\n"
263"	trace(\"R/W writer block\");\n"
264"	printa(@rw_w_block);\n"
265"}\n";
266
267
268static const char *g_ctnd_times =
269"plockstat$target:::rw-blocked\n"
270"/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
271"{\n"
272"	@rw_w_block[arg0, ustack(5)] =\n"
273"	    sum(timestamp - self->rwblock[arg0]);\n"
274"	@rw_w_block_count[arg0, ustack(5)] = count();\n"
275"	self->rwblock[arg0] = 0;\n"
276"	rw_w_block_found = 1;\n"
277"}\n"
278"plockstat$target:::rw-blocked\n"
279"/self->rwblock[arg0] && arg2 != 0/\n"
280"{\n"
281"	@rw_r_block[arg0, ustack(5)] =\n"
282"	    sum(timestamp - self->rwblock[arg0]);\n"
283"	@rw_r_block_count[arg0, ustack(5)] = count();\n"
284"	self->rwblock[arg0] = 0;\n"
285"	rw_r_block_found = 1;\n"
286"}\n"
287"plockstat$target:::rw-blocked\n"
288"/self->rwblock[arg0]/\n"
289"{\n"
290"	self->rwblock[arg0] = 0;\n"
291"}\n"
292"plockstat$target:::mutex-spun\n"
293"/self->mtxspin[arg0] && arg1 != 0/\n"
294"{\n"
295"	@mtx_spin[arg0, ustack(5)] =\n"
296"	    sum(timestamp - self->mtxspin[arg0]);\n"
297"	@mtx_spin_count[arg0, ustack(5)] = count();\n"
298"	self->mtxspin[arg0] = 0;\n"
299"	mtx_spin_found = 1;\n"
300"}\n"
301"plockstat$target:::mutex-spun\n"
302"/self->mtxspin[arg0]/\n"
303"{\n"
304"	@mtx_vain_spin[arg0, ustack(5)] =\n"
305"	    sum(timestamp - self->mtxspin[arg0]);\n"
306"	@mtx_vain_spin_count[arg0, ustack(5)] = count();\n"
307"	self->mtxspin[arg0] = 0;\n"
308"	mtx_vain_spin_found = 1;\n"
309"}\n"
310"plockstat$target:::mutex-blocked\n"
311"/self->mtxblock[arg0] && arg1 != 0/\n"
312"{\n"
313"	@mtx_block[arg0, ustack(5)] =\n"
314"	    sum(timestamp - self->mtxblock[arg0]);\n"
315"	@mtx_block_count[arg0, ustack(5)] = count();\n"
316"	self->mtxblock[arg0] = 0;\n"
317"	mtx_block_found = 1;\n"
318"}\n"
319"plockstat$target:::mutex-blocked\n"
320"/self->mtxblock[arg0]/\n"
321"{\n"
322"	self->mtxblock[arg0] = 0;\n"
323"}\n"
324"\n"
325"END\n"
326"/mtx_block_found/\n"
327"{\n"
328"	trace(\"Mutex block\");\n"
329"	printa(@mtx_block, @mtx_block_count);\n"
330"}\n"
331"END\n"
332"/mtx_spin_found/\n"
333"{\n"
334"	trace(\"Mutex spin\");\n"
335"	printa(@mtx_spin, @mtx_spin_count);\n"
336"}\n"
337"END\n"
338"/mtx_vain_spin_found/\n"
339"{\n"
340"	trace(\"Mutex unsuccessful spin\");\n"
341"	printa(@mtx_vain_spin, @mtx_vain_spin_count);\n"
342"}\n"
343"END\n"
344"/rw_r_block_found/\n"
345"{\n"
346"	trace(\"R/W reader block\");\n"
347"	printa(@rw_r_block, @rw_r_block_count);\n"
348"}\n"
349"END\n"
350"/rw_w_block_found/\n"
351"{\n"
352"	trace(\"R/W writer block\");\n"
353"	printa(@rw_w_block, @rw_w_block_count);\n"
354"}\n";
355
356static char g_prog[4096];
357static size_t g_proglen;
358static int g_opt_V, g_opt_s;
359static int g_intr;
360static int g_exited;
361static dtrace_optval_t g_nframes;
362static ulong_t g_nent = ULONG_MAX;
363
364#define	PLOCKSTAT_OPTSTR	"n:ps:e:vx:ACHV"
365
366static void
367usage(void)
368{
369	(void) fprintf(stderr, "Usage:\n"
370	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
371	    "\t    command [arg...]\n"
372	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
373	    "\t    -p pid\n", g_pname, g_pname);
374
375	exit(E_USAGE);
376}
377
378static void
379verror(const char *fmt, va_list ap)
380{
381	int error = errno;
382
383	(void) fprintf(stderr, "%s: ", g_pname);
384	(void) vfprintf(stderr, fmt, ap);
385
386	if (fmt[strlen(fmt) - 1] != '\n')
387		(void) fprintf(stderr, ": %s\n", strerror(error));
388}
389
390/*PRINTFLIKE1*/
391static void
392fatal(const char *fmt, ...)
393{
394	va_list ap;
395
396	va_start(ap, fmt);
397	verror(fmt, ap);
398	va_end(ap);
399
400	if (g_pr != NULL && g_dtp != NULL)
401		dtrace_proc_release(g_dtp, g_pr);
402
403	exit(E_ERROR);
404}
405
406/*PRINTFLIKE1*/
407static void
408dfatal(const char *fmt, ...)
409{
410	va_list ap;
411
412	va_start(ap, fmt);
413
414	(void) fprintf(stderr, "%s: ", g_pname);
415	if (fmt != NULL)
416		(void) vfprintf(stderr, fmt, ap);
417
418	va_end(ap);
419
420	if (fmt != NULL && fmt[strlen(fmt) - 1] != '\n') {
421		(void) fprintf(stderr, ": %s\n",
422		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
423	} else if (fmt == NULL) {
424		(void) fprintf(stderr, "%s\n",
425		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
426	}
427
428	if (g_pr != NULL) {
429		dtrace_proc_continue(g_dtp, g_pr);
430		dtrace_proc_release(g_dtp, g_pr);
431	}
432
433	exit(E_ERROR);
434}
435
436/*PRINTFLIKE1*/
437static void
438notice(const char *fmt, ...)
439{
440	va_list ap;
441
442	va_start(ap, fmt);
443	verror(fmt, ap);
444	va_end(ap);
445}
446
447static void
448dprog_add(const char *prog)
449{
450	size_t len = strlen(prog);
451	bcopy(prog, g_prog + g_proglen, len + 1);
452	g_proglen += len;
453	assert(g_proglen < sizeof (g_prog));
454}
455
456static void
457dprog_compile(void)
458{
459	dtrace_prog_t *prog;
460	dtrace_proginfo_t info;
461
462	if (g_opt_V) {
463		(void) fprintf(stderr, "%s: vvvv D program vvvv\n", g_pname);
464		(void) fputs(g_prog, stderr);
465		(void) fprintf(stderr, "%s: ^^^^ D program ^^^^\n", g_pname);
466	}
467
468	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
469	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
470		dfatal("failed to compile program");
471
472	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
473		dfatal("failed to enable probes");
474}
475
476void
477print_legend(void)
478{
479	(void) printf("%5s %8s %-28s %s\n", "Count", "nsec", "Lock", "Caller");
480}
481
482void
483print_bar(void)
484{
485	(void) printf("---------------------------------------"
486	    "----------------------------------------\n");
487}
488
489void
490print_histogram_header(void)
491{
492	(void) printf("\n%10s ---- Time Distribution --- %5s %s\n",
493	    "nsec", "count", "Stack");
494}
495
496/*
497 * Convert an address to a symbolic string or a numeric string. If nolocks
498 * is set, we return an error code if this symbol appears to be a mutex- or
499 * rwlock-related symbol in libc so the caller has a chance to find a more
500 * helpful symbol.
501 */
502static int
503getsym(struct ps_prochandle *P, uintptr_t addr, char *buf, size_t size,
504    int nolocks)
505{
506	char name[256];
507	GElf_Sym sym;
508#if defined(sun)
509	prsyminfo_t info;
510#else
511	prmap_t *map;
512	int info; /* XXX unused */
513#endif
514	size_t len;
515
516	if (P == NULL || Pxlookup_by_addr(P, addr, name, sizeof (name),
517	    &sym, &info) != 0) {
518		(void) snprintf(buf, size, "%#lx", addr);
519		return (0);
520	}
521#if defined(sun)
522	if (info.prs_object == NULL)
523		info.prs_object = "<unknown>";
524
525	if (info.prs_lmid != LM_ID_BASE) {
526		len = snprintf(buf, size, "LM%lu`", info.prs_lmid);
527		buf += len;
528		size -= len;
529	}
530
531	len = snprintf(buf, size, "%s`%s", info.prs_object, info.prs_name);
532#else
533	map = proc_addr2map(P, addr);
534	len = snprintf(buf, size, "%s`%s", map->pr_mapname, name);
535#endif
536	buf += len;
537	size -= len;
538
539	if (sym.st_value != addr)
540		len = snprintf(buf, size, "+%#lx", addr - sym.st_value);
541
542	if (nolocks && strcmp("libc.so.1", map->pr_mapname) == 0 &&
543	    (strstr("mutex", name) == 0 ||
544	    strstr("rw", name) == 0))
545		return (-1);
546
547	return (0);
548}
549
550/*ARGSUSED*/
551static int
552process_aggregate(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
553{
554	const dtrace_recdesc_t *rec;
555	uintptr_t lock;
556	uint64_t *stack;
557	caddr_t data;
558	pid_t pid;
559	struct ps_prochandle *P;
560	char buf[256];
561	int i, j;
562	uint64_t sum, count, avg;
563
564	if ((*(uint_t *)arg)++ >= g_nent)
565		return (DTRACE_AGGWALK_NEXT);
566
567	rec = aggsdata[0]->dtada_desc->dtagd_rec;
568	data = aggsdata[0]->dtada_data;
569
570	/*LINTED - alignment*/
571	lock = (uintptr_t)*(uint64_t *)(data + rec[1].dtrd_offset);
572	/*LINTED - alignment*/
573	stack = (uint64_t *)(data + rec[2].dtrd_offset);
574
575	if (!g_opt_s) {
576		/*LINTED - alignment*/
577		sum = *(uint64_t *)(aggsdata[1]->dtada_data +
578		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
579		/*LINTED - alignment*/
580		count = *(uint64_t *)(aggsdata[2]->dtada_data +
581		    aggsdata[2]->dtada_desc->dtagd_rec[3].dtrd_offset);
582	} else {
583		uint64_t *a;
584
585		/*LINTED - alignment*/
586		a = (uint64_t *)(aggsdata[1]->dtada_data +
587		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
588
589		print_bar();
590		print_legend();
591
592		for (count = sum = 0, i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
593		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++) {
594			count += a[i];
595			sum += a[i] << (j - 64);
596		}
597	}
598
599	avg = sum / count;
600	(void) printf("%5llu %8llu ", (u_longlong_t)count, (u_longlong_t)avg);
601
602	pid = stack[0];
603	P = dtrace_proc_grab(g_dtp, pid, PGRAB_RDONLY);
604
605	(void) getsym(P, lock, buf, sizeof (buf), 0);
606	(void) printf("%-28s ", buf);
607
608	for (i = 2; i <= 5; i++) {
609		if (getsym(P, stack[i], buf, sizeof (buf), 1) == 0)
610			break;
611	}
612	(void) printf("%s\n", buf);
613
614	if (g_opt_s) {
615		int stack_done = 0;
616		int quant_done = 0;
617		int first_bin, last_bin;
618		uint64_t bin_size, *a;
619
620		/*LINTED - alignment*/
621		a = (uint64_t *)(aggsdata[1]->dtada_data +
622		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
623
624		print_histogram_header();
625
626		for (first_bin = DTRACE_QUANTIZE_ZEROBUCKET;
627		    a[first_bin] == 0; first_bin++)
628			continue;
629		for (last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 63;
630		    a[last_bin] == 0; last_bin--)
631			continue;
632
633		for (i = 0; !stack_done || !quant_done; i++) {
634			if (!stack_done) {
635				(void) getsym(P, stack[i + 2], buf,
636				    sizeof (buf), 0);
637			} else {
638				buf[0] = '\0';
639			}
640
641			if (!quant_done) {
642				bin_size = a[first_bin];
643
644				(void) printf("%10llu |%-24.*s| %5llu %s\n",
645				    1ULL <<
646				    (first_bin - DTRACE_QUANTIZE_ZEROBUCKET),
647				    (int)(24.0 * bin_size / count),
648				    "@@@@@@@@@@@@@@@@@@@@@@@@@@",
649				    (u_longlong_t)bin_size, buf);
650			} else {
651				(void) printf("%43s %s\n", "", buf);
652			}
653
654			if (i + 1 >= g_nframes || stack[i + 3] == 0)
655				stack_done = 1;
656
657			if (first_bin++ == last_bin)
658				quant_done = 1;
659		}
660	}
661
662	dtrace_proc_release(g_dtp, P);
663
664	return (DTRACE_AGGWALK_NEXT);
665}
666
667/*ARGSUSED*/
668static void
669prochandler(struct ps_prochandle *P, const char *msg, void *arg)
670{
671#if defined(sun)
672	const psinfo_t *prp = Ppsinfo(P);
673	int pid = Pstatus(P)->pr_pid;
674#else
675	int pid = proc_getpid(P);
676	int wstat = proc_getwstat(P);
677#endif
678	char name[SIG2STR_MAX];
679
680	if (msg != NULL) {
681		notice("pid %d: %s\n", pid, msg);
682		return;
683	}
684
685	switch (Pstate(P)) {
686	case PS_UNDEAD:
687		/*
688		 * Ideally we would like to always report pr_wstat here, but it
689		 * isn't possible given current /proc semantics.  If we grabbed
690		 * the process, Ppsinfo() will either fail or return a zeroed
691		 * psinfo_t depending on how far the parent is in reaping it.
692		 * When /proc provides a stable pr_wstat in the status file,
693		 * this code can be improved by examining this new pr_wstat.
694		 */
695		if (WIFSIGNALED(wstat)) {
696			notice("pid %d terminated by %s\n", pid,
697			    proc_signame(WTERMSIG(wstat),
698			    name, sizeof (name)));
699		} else if (WEXITSTATUS(wstat) != 0) {
700			notice("pid %d exited with status %d\n",
701			    pid, WEXITSTATUS(wstat));
702		} else {
703			notice("pid %d has exited\n", pid);
704		}
705		g_exited = 1;
706		break;
707
708	case PS_LOST:
709		notice("pid %d exec'd a set-id or unobservable program\n", pid);
710		g_exited = 1;
711		break;
712	}
713}
714
715/*ARGSUSED*/
716static int
717chewrec(const dtrace_probedata_t *data, const dtrace_recdesc_t *rec, void *arg)
718{
719	dtrace_eprobedesc_t *epd = data->dtpda_edesc;
720	dtrace_aggvarid_t aggvars[2];
721	const void *buf;
722	int i, nagv;
723
724	/*
725	 * A NULL rec indicates that we've processed the last record.
726	 */
727	if (rec == NULL)
728		return (DTRACE_CONSUME_NEXT);
729
730	buf = data->dtpda_data - rec->dtrd_offset;
731
732	switch (rec->dtrd_action) {
733	case DTRACEACT_DIFEXPR:
734		(void) printf("\n%s\n\n", (char *)buf + rec->dtrd_offset);
735		if (!g_opt_s) {
736			print_legend();
737			print_bar();
738		}
739		return (DTRACE_CONSUME_NEXT);
740
741	case DTRACEACT_PRINTA:
742		for (nagv = 0, i = 0; i < epd->dtepd_nrecs - 1; i++) {
743			const dtrace_recdesc_t *nrec = &rec[i];
744
745			if (nrec->dtrd_uarg != rec->dtrd_uarg)
746				break;
747
748			/*LINTED - alignment*/
749			aggvars[nagv++] = *(dtrace_aggvarid_t *)((caddr_t)buf +
750			    nrec->dtrd_offset);
751		}
752
753		if (nagv == (g_opt_s ? 1 : 2)) {
754			uint_t nent = 0;
755			if (dtrace_aggregate_walk_joined(g_dtp, aggvars, nagv,
756			    process_aggregate, &nent) != 0)
757				dfatal("failed to walk aggregate");
758		}
759
760		return (DTRACE_CONSUME_NEXT);
761	}
762
763	return (DTRACE_CONSUME_THIS);
764}
765
766/*ARGSUSED*/
767static void
768intr(int signo)
769{
770	g_intr = 1;
771}
772
773int
774main(int argc, char **argv)
775{
776#if defined(sun)
777	ucred_t *ucp;
778#endif
779	int err;
780	int opt_C = 0, opt_H = 0, opt_p = 0, opt_v = 0;
781	char c, *p, *end;
782	struct sigaction act;
783	int done = 0;
784
785	g_pname = basename(argv[0]);
786	argv[0] = g_pname; /* rewrite argv[0] for getopt errors */
787#if defined(sun)
788	/*
789	 * Make sure we have the required dtrace_proc privilege.
790	 */
791	if ((ucp = ucred_get(getpid())) != NULL) {
792		const priv_set_t *psp;
793		if ((psp = ucred_getprivset(ucp, PRIV_EFFECTIVE)) != NULL &&
794		    !priv_ismember(psp, PRIV_DTRACE_PROC)) {
795			fatal("dtrace_proc privilege required\n");
796		}
797
798		ucred_free(ucp);
799	}
800#endif
801
802	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
803		switch (c) {
804		case 'n':
805			errno = 0;
806			g_nent = strtoul(optarg, &end, 10);
807			if (*end != '\0' || errno != 0) {
808				(void) fprintf(stderr, "%s: invalid count "
809				    "'%s'\n", g_pname, optarg);
810				usage();
811			}
812			break;
813
814		case 'p':
815			opt_p = 1;
816			break;
817
818		case 'v':
819			opt_v = 1;
820			break;
821
822		case 'A':
823			opt_C = opt_H = 1;
824			break;
825
826		case 'C':
827			opt_C = 1;
828			break;
829
830		case 'H':
831			opt_H = 1;
832			break;
833
834		case 'V':
835			g_opt_V = 1;
836			break;
837
838		default:
839			if (strchr(PLOCKSTAT_OPTSTR, c) == NULL)
840				usage();
841		}
842	}
843
844	/*
845	 * We need a command or at least one pid.
846	 */
847	if (argc == optind)
848		usage();
849
850	if (opt_C == 0 && opt_H == 0)
851		opt_C = 1;
852
853	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL)
854		fatal("failed to initialize dtrace: %s\n",
855		    dtrace_errmsg(NULL, err));
856
857	/*
858	 * The longest string we trace is 23 bytes long -- so 32 is plenty.
859	 */
860	if (dtrace_setopt(g_dtp, "strsize", "32") == -1)
861		dfatal("failed to set 'strsize'");
862
863	/*
864	 * 1k should be more than enough for all trace() and printa() actions.
865	 */
866	if (dtrace_setopt(g_dtp, "bufsize", "1k") == -1)
867		dfatal("failed to set 'bufsize'");
868
869	/*
870	 * The table we produce has the hottest locks at the top.
871	 */
872	if (dtrace_setopt(g_dtp, "aggsortrev", NULL) == -1)
873		dfatal("failed to set 'aggsortrev'");
874
875	/*
876	 * These are two reasonable defaults which should suffice.
877	 */
878	if (dtrace_setopt(g_dtp, "aggsize", "256k") == -1)
879		dfatal("failed to set 'aggsize'");
880	if (dtrace_setopt(g_dtp, "aggrate", "1sec") == -1)
881		dfatal("failed to set 'aggrate'");
882
883	/*
884	 * Take a second pass through to look for options that set options now
885	 * that we have an open dtrace handle.
886	 */
887	optind = 1;
888	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
889		switch (c) {
890		case 's':
891			g_opt_s = 1;
892			if (dtrace_setopt(g_dtp, "ustackframes", optarg) == -1)
893				dfatal("failed to set 'ustackframes'");
894			break;
895
896		case 'x':
897			if ((p = strchr(optarg, '=')) != NULL)
898				*p++ = '\0';
899
900			if (dtrace_setopt(g_dtp, optarg, p) != 0)
901				dfatal("failed to set -x %s", optarg);
902			break;
903
904		case 'e':
905			errno = 0;
906			(void) strtoul(optarg, &end, 10);
907			if (*optarg == '-' || *end != '\0' || errno != 0) {
908				(void) fprintf(stderr, "%s: invalid timeout "
909				    "'%s'\n", g_pname, optarg);
910				usage();
911			}
912
913			/*
914			 * Construct a DTrace enabling that will exit after
915			 * the specified number of seconds.
916			 */
917			dprog_add("BEGIN\n{\n\tend = timestamp + ");
918			dprog_add(optarg);
919			dprog_add(" * 1000000000;\n}\n");
920			dprog_add("tick-10hz\n/timestamp >= end/\n");
921			dprog_add("{\n\texit(0);\n}\n");
922			break;
923		}
924	}
925
926	argc -= optind;
927	argv += optind;
928
929	if (opt_H) {
930		dprog_add(g_hold_init);
931		if (!g_opt_s)
932			dprog_add(g_hold_times);
933		else
934			dprog_add(g_hold_histogram);
935	}
936
937	if (opt_C) {
938		dprog_add(g_ctnd_init);
939		if (!g_opt_s)
940			dprog_add(g_ctnd_times);
941		else
942			dprog_add(g_ctnd_histogram);
943	}
944
945	if (opt_p) {
946		ulong_t pid;
947
948		if (argc > 1) {
949			(void) fprintf(stderr, "%s: only one pid is allowed\n",
950			    g_pname);
951			usage();
952		}
953
954		errno = 0;
955		pid = strtoul(argv[0], &end, 10);
956		if (*end != '\0' || errno != 0 || (pid_t)pid != pid) {
957			(void) fprintf(stderr, "%s: invalid pid '%s'\n",
958			    g_pname, argv[0]);
959			usage();
960		}
961
962		if ((g_pr = dtrace_proc_grab(g_dtp, (pid_t)pid, 0)) == NULL)
963			dfatal(NULL);
964	} else {
965		if ((g_pr = dtrace_proc_create(g_dtp, argv[0], argv, NULL, NULL)) == NULL)
966			dfatal(NULL);
967	}
968
969	dprog_compile();
970
971	if (dtrace_handle_proc(g_dtp, &prochandler, NULL) == -1)
972		dfatal("failed to establish proc handler");
973
974	(void) sigemptyset(&act.sa_mask);
975	act.sa_flags = 0;
976	act.sa_handler = intr;
977	(void) sigaction(SIGINT, &act, NULL);
978	(void) sigaction(SIGTERM, &act, NULL);
979
980	if (dtrace_go(g_dtp) != 0)
981		dfatal("dtrace_go()");
982
983	if (dtrace_getopt(g_dtp, "ustackframes", &g_nframes) != 0)
984		dfatal("failed to get 'ustackframes'");
985
986	dtrace_proc_continue(g_dtp, g_pr);
987
988	if (opt_v)
989		(void) printf("%s: tracing enabled for pid %d\n", g_pname,
990#if defined(sun)
991		    (int)Pstatus(g_pr)->pr_pid);
992#else
993		    (int)proc_getpid(g_pr));
994#endif
995
996	do {
997		if (!g_intr && !done)
998			dtrace_sleep(g_dtp);
999
1000		if (done || g_intr || g_exited) {
1001			done = 1;
1002			if (dtrace_stop(g_dtp) == -1)
1003				dfatal("couldn't stop tracing");
1004		}
1005
1006		switch (dtrace_work(g_dtp, stdout, NULL, chewrec, NULL)) {
1007		case DTRACE_WORKSTATUS_DONE:
1008			done = 1;
1009			break;
1010		case DTRACE_WORKSTATUS_OKAY:
1011			break;
1012		default:
1013			dfatal("processing aborted");
1014		}
1015
1016	} while (!done);
1017
1018	dtrace_close(g_dtp);
1019
1020	return (0);
1021}
1022