1/* vim: set noexpandtab tabstop=4 shiftwidth=4 : */
2/*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23/*
24 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
25 * Use is subject to license terms.
26 */
27
28#pragma ident	"%Z%%M%	%I%	%E% SMI"
29
30#include <assert.h>
31#include <dtrace.h>
32#include <limits.h>
33#include <signal.h>
34#include <stdlib.h>
35#include <stdarg.h>
36#include <stdio.h>
37#include <string.h>
38#include <strings.h>
39#include <errno.h>
40#include <sys/wait.h>
41#include <libgen.h>
42#include <libproc.h>
43#include <getopt.h>
44
45static char *g_pname;
46static dtrace_hdl_t *g_dtp;
47struct ps_prochandle *g_pr;
48
49#define	E_SUCCESS	0
50#define	E_ERROR		1
51#define	E_USAGE		2
52
53/*
54 * For hold times we use a global associative array since for mutexes, in
55 * user-land, it's not invalid to release a sychonization primitive that
56 * another thread acquired; rwlocks require a thread-local associative array
57 * since multiple thread can hold the same lock for reading. Note that we
58 * ignore recursive mutex acquisitions and releases as they don't truly
59 * affect lock contention.
60 */
61static const char *g_hold_init =
62"plockstat$target:::rw-acquire\n"
63"{\n"
64"	self->rwhold[arg0] = timestamp;\n"
65"}\n"
66"plockstat$target:::mutex-acquire\n"
67"/arg1 == 0/\n"
68"{\n"
69"	mtxhold[arg0] = timestamp;\n"
70"}\n";
71
72static const char *g_hold_histogram =
73"plockstat$target:::rw-release\n"
74"/self->rwhold[arg0] && arg1 == 1/\n"
75"{\n"
76"	@rw_w_hold[arg0, ustack()] =\n"
77"	    quantize(timestamp - self->rwhold[arg0]);\n"
78"	self->rwhold[arg0] = 0;\n"
79"	rw_w_hold_found = 1;\n"
80"}\n"
81"plockstat$target:::rw-release\n"
82"/self->rwhold[arg0]/\n"
83"{\n"
84"	@rw_r_hold[arg0, ustack()] =\n"
85"	    quantize(timestamp - self->rwhold[arg0]);\n"
86"	self->rwhold[arg0] = 0;\n"
87"	rw_r_hold_found = 1;\n"
88"}\n"
89"plockstat$target:::mutex-release\n"
90"/mtxhold[arg0] && arg1 == 0/\n"
91"{\n"
92"	@mtx_hold[arg0, ustack()] = quantize(timestamp - mtxhold[arg0]);\n"
93"	mtxhold[arg0] = 0;\n"
94"	mtx_hold_found = 1;\n"
95"}\n"
96"\n"
97"END\n"
98"/mtx_hold_found/\n"
99"{\n"
100"	trace(\"Mutex hold\");\n"
101"	printa(@mtx_hold);\n"
102"}\n"
103"END\n"
104"/rw_r_hold_found/\n"
105"{\n"
106"	trace(\"R/W reader hold\");\n"
107"	printa(@rw_r_hold);\n"
108"}\n"
109"END\n"
110"/rw_w_hold_found/\n"
111"{\n"
112"	trace(\"R/W writer hold\");\n"
113"	printa(@rw_w_hold);\n"
114"}\n";
115
116static const char *g_hold_times =
117"plockstat$target:::rw-release\n"
118"/self->rwhold[arg0] && arg1 == 1/\n"
119"{\n"
120"	@rw_w_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
121"	@rw_w_hold_count[arg0, ustack(5)] = count();\n"
122"	self->rwhold[arg0] = 0;\n"
123"	rw_w_hold_found = 1;\n"
124"}\n"
125"plockstat$target:::rw-release\n"
126"/self->rwhold[arg0]/\n"
127"{\n"
128"	@rw_r_hold[arg0, ustack(5)] = sum(timestamp - self->rwhold[arg0]);\n"
129"	@rw_r_hold_count[arg0, ustack(5)] = count();\n"
130"	self->rwhold[arg0] = 0;\n"
131"	rw_r_hold_found = 1;\n"
132"}\n"
133"plockstat$target:::mutex-release\n"
134"/mtxhold[arg0] && arg1 == 0/\n"
135"{\n"
136"	@mtx_hold[arg0, ustack(5)] = sum(timestamp - mtxhold[arg0]);\n"
137"	@mtx_hold_count[arg0, ustack(5)] = count();\n"
138"	mtxhold[arg0] = 0;\n"
139"	mtx_hold_found = 1;\n"
140"}\n"
141"\n"
142"END\n"
143"/mtx_hold_found/\n"
144"{\n"
145"	trace(\"Mutex hold\");\n"
146"	printa(@mtx_hold, @mtx_hold_count);\n"
147"}\n"
148"END\n"
149"/rw_r_hold_found/\n"
150"{\n"
151"	trace(\"R/W reader hold\");\n"
152"	printa(@rw_r_hold, @rw_r_hold_count);\n"
153"}\n"
154"END\n"
155"/rw_w_hold_found/\n"
156"{\n"
157"	trace(\"R/W writer hold\");\n"
158"	printa(@rw_w_hold, @rw_w_hold_count);\n"
159"}\n";
160
161
162/*
163 * For contention, we use thread-local associative arrays since we're tracing
164 * a single thread's activity in libc and multiple threads can be blocking or
165 * spinning on the same sychonization primitive.
166 */
167static const char *g_ctnd_init =
168"plockstat$target:::rw-block\n"
169"{\n"
170"	self->rwblock[arg0] = timestamp;\n"
171"}\n"
172"plockstat$target:::mutex-block\n"
173"{\n"
174"	self->mtxblock[arg0] = timestamp;\n"
175"}\n"
176"plockstat$target:::mutex-spin\n"
177"{\n"
178"	self->mtxspin[arg0] = timestamp;\n"
179"}\n";
180
181static const char *g_ctnd_histogram =
182"plockstat$target:::rw-blocked\n"
183"/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
184"{\n"
185"	@rw_w_block[arg0, ustack()] =\n"
186"	    quantize(timestamp - self->rwblock[arg0]);\n"
187"	self->rwblock[arg0] = 0;\n"
188"	rw_w_block_found = 1;\n"
189"}\n"
190"plockstat$target:::rw-blocked\n"
191"/self->rwblock[arg0] && arg2 != 0/\n"
192"{\n"
193"	@rw_r_block[arg0, ustack()] =\n"
194"	    quantize(timestamp - self->rwblock[arg0]);\n"
195"	self->rwblock[arg0] = 0;\n"
196"	rw_r_block_found = 1;\n"
197"}\n"
198"plockstat$target:::rw-blocked\n"
199"/self->rwblock[arg0]/\n"
200"{\n"
201"	self->rwblock[arg0] = 0;\n"
202"}\n"
203"plockstat$target:::mutex-spun\n"
204"/self->mtxspin[arg0] && arg1 != 0/\n"
205"{\n"
206"	@mtx_spin[arg0, ustack()] =\n"
207"	    quantize(timestamp - self->mtxspin[arg0]);\n"
208"	self->mtxspin[arg0] = 0;\n"
209"	mtx_spin_found = 1;\n"
210"}\n"
211"plockstat$target:::mutex-spun\n"
212"/self->mtxspin[arg0]/\n"
213"{\n"
214"	@mtx_vain_spin[arg0, ustack()] =\n"
215"	    quantize(timestamp - self->mtxspin[arg0]);\n"
216"	self->mtxspin[arg0] = 0;\n"
217"	mtx_vain_spin_found = 1;\n"
218"}\n"
219"plockstat$target:::mutex-blocked\n"
220"/self->mtxblock[arg0] && arg1 != 0/\n"
221"{\n"
222"	@mtx_block[arg0, ustack()] =\n"
223"	    quantize(timestamp - self->mtxblock[arg0]);\n"
224"	self->mtxblock[arg0] = 0;\n"
225"	mtx_block_found = 1;\n"
226"}\n"
227"plockstat$target:::mutex-blocked\n"
228"/self->mtxblock[arg0]/\n"
229"{\n"
230"	self->mtxblock[arg0] = 0;\n"
231"}\n"
232"\n"
233"END\n"
234"/mtx_block_found/\n"
235"{\n"
236"	trace(\"Mutex block\");\n"
237"	printa(@mtx_block);\n"
238"}\n"
239"END\n"
240"/mtx_spin_found/\n"
241"{\n"
242"	trace(\"Mutex spin\");\n"
243"	printa(@mtx_spin);\n"
244"}\n"
245"END\n"
246"/mtx_vain_spin_found/\n"
247"{\n"
248"	trace(\"Mutex unsuccessful spin\");\n"
249"	printa(@mtx_vain_spin);\n"
250"}\n"
251"END\n"
252"/rw_r_block_found/\n"
253"{\n"
254"	trace(\"R/W reader block\");\n"
255"	printa(@rw_r_block);\n"
256"}\n"
257"END\n"
258"/rw_w_block_found/\n"
259"{\n"
260"	trace(\"R/W writer block\");\n"
261"	printa(@rw_w_block);\n"
262"}\n";
263
264
265static const char *g_ctnd_times =
266"plockstat$target:::rw-blocked\n"
267"/self->rwblock[arg0] && arg1 == 1 && arg2 != 0/\n"
268"{\n"
269"	@rw_w_block[arg0, ustack(5)] =\n"
270"	    sum(timestamp - self->rwblock[arg0]);\n"
271"	@rw_w_block_count[arg0, ustack(5)] = count();\n"
272"	self->rwblock[arg0] = 0;\n"
273"	rw_w_block_found = 1;\n"
274"}\n"
275"plockstat$target:::rw-blocked\n"
276"/self->rwblock[arg0] && arg2 != 0/\n"
277"{\n"
278"	@rw_r_block[arg0, ustack(5)] =\n"
279"	    sum(timestamp - self->rwblock[arg0]);\n"
280"	@rw_r_block_count[arg0, ustack(5)] = count();\n"
281"	self->rwblock[arg0] = 0;\n"
282"	rw_r_block_found = 1;\n"
283"}\n"
284"plockstat$target:::rw-blocked\n"
285"/self->rwblock[arg0]/\n"
286"{\n"
287"	self->rwblock[arg0] = 0;\n"
288"}\n"
289"plockstat$target:::mutex-spun\n"
290"/self->mtxspin[arg0] && arg1 != 0/\n"
291"{\n"
292"	@mtx_spin[arg0, ustack(5)] =\n"
293"	    sum(timestamp - self->mtxspin[arg0]);\n"
294"	@mtx_spin_count[arg0, ustack(5)] = count();\n"
295"	self->mtxspin[arg0] = 0;\n"
296"	mtx_spin_found = 1;\n"
297"}\n"
298"plockstat$target:::mutex-spun\n"
299"/self->mtxspin[arg0]/\n"
300"{\n"
301"	@mtx_vain_spin[arg0, ustack(5)] =\n"
302"	    sum(timestamp - self->mtxspin[arg0]);\n"
303"	@mtx_vain_spin_count[arg0, ustack(5)] = count();\n"
304"	self->mtxspin[arg0] = 0;\n"
305"	mtx_vain_spin_found = 1;\n"
306"}\n"
307"plockstat$target:::mutex-blocked\n"
308"/self->mtxblock[arg0] && arg1 != 0/\n"
309"{\n"
310"	@mtx_block[arg0, ustack(5)] =\n"
311"	    sum(timestamp - self->mtxblock[arg0]);\n"
312"	@mtx_block_count[arg0, ustack(5)] = count();\n"
313"	self->mtxblock[arg0] = 0;\n"
314"	mtx_block_found = 1;\n"
315"}\n"
316"plockstat$target:::mutex-blocked\n"
317"/self->mtxblock[arg0]/\n"
318"{\n"
319"	self->mtxblock[arg0] = 0;\n"
320"}\n"
321"\n"
322"END\n"
323"/mtx_block_found/\n"
324"{\n"
325"	trace(\"Mutex block\");\n"
326"	printa(@mtx_block, @mtx_block_count);\n"
327"}\n"
328"END\n"
329"/mtx_spin_found/\n"
330"{\n"
331"	trace(\"Mutex spin\");\n"
332"	printa(@mtx_spin, @mtx_spin_count);\n"
333"}\n"
334"END\n"
335"/mtx_vain_spin_found/\n"
336"{\n"
337"	trace(\"Mutex unsuccessful spin\");\n"
338"	printa(@mtx_vain_spin, @mtx_vain_spin_count);\n"
339"}\n"
340"END\n"
341"/rw_r_block_found/\n"
342"{\n"
343"	trace(\"R/W reader block\");\n"
344"	printa(@rw_r_block, @rw_r_block_count);\n"
345"}\n"
346"END\n"
347"/rw_w_block_found/\n"
348"{\n"
349"	trace(\"R/W writer block\");\n"
350"	printa(@rw_w_block, @rw_w_block_count);\n"
351"}\n";
352
353static char g_prog[4096];
354static size_t g_proglen;
355static int g_opt_V, g_opt_s;
356static int g_intr;
357static int g_exited;
358static dtrace_optval_t g_nframes;
359static ulong_t g_nent = ULONG_MAX;
360
361#define	PLOCKSTAT_OPTSTR	"n:ps:e:vx:ACHV"
362
363static void
364usage(void)
365{
366	(void) fprintf(stderr, "Usage:\n"
367	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
368	    "\t    command [arg...]\n"
369	    "\t%s [-vACHV] [-n count] [-s depth] [-e secs] [-x opt[=val]]\n"
370	    "\t    -p pid\n", g_pname, g_pname);
371
372	(void) fprintf(stderr, "\n");
373	(void) fprintf(stderr, "\t-v\t\tprint a message when tracing starts\n");
374	(void) fprintf(stderr, "\t-A\t\ttrace contention and hold events (same as -CH)\n");
375	(void) fprintf(stderr, "\t-C\t\ttrace contention events for mutexes and rwlocks\n");
376	(void) fprintf(stderr, "\t-H\t\ttrace hold events for mutexes and rwlocks\n");
377	(void) fprintf(stderr, "\t-V\t\tprint the dtrace script to run\n");
378	(void) fprintf(stderr, "\t-n count\tdisplay only \'count\' entries for each event type\n");
379	(void) fprintf(stderr, "\t-s depth\tshow stack trace upto \'depth\' entries\n");
380	(void) fprintf(stderr, "\t-e secs\t\texit after specified seconds\n");
381	(void) fprintf(stderr, "\t-x arg[=val]\tenable a DTrace runtime option or a D compiler option\n");
382	(void) fprintf(stderr, "\t-p pid\t\tattach and trace the specified process id\n");
383
384	exit(E_USAGE);
385}
386
387static void
388verror(const char *fmt, va_list ap)
389{
390	int error = errno;
391
392	(void) fprintf(stderr, "%s: ", g_pname);
393	(void) vfprintf(stderr, fmt, ap);
394
395	if (fmt[strlen(fmt) - 1] != '\n')
396		(void) fprintf(stderr, ": %s\n", strerror(error));
397}
398
399/*PRINTFLIKE1*/
400static void
401fatal(const char *fmt, ...)
402{
403	va_list ap;
404
405	va_start(ap, fmt);
406	verror(fmt, ap);
407	va_end(ap);
408
409	if (g_pr != NULL && g_dtp != NULL)
410		dtrace_proc_release(g_dtp, g_pr);
411
412	exit(E_ERROR);
413}
414
415/*PRINTFLIKE1*/
416static void
417dfatal(const char *fmt, ...)
418{
419	va_list ap;
420
421	va_start(ap, fmt);
422
423	(void) fprintf(stderr, "%s: ", g_pname);
424	if (fmt != NULL)
425		(void) vfprintf(stderr, fmt, ap);
426
427	va_end(ap);
428
429	if (fmt != NULL && fmt[strlen(fmt) - 1] != '\n') {
430		(void) fprintf(stderr, ": %s\n",
431		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
432	} else if (fmt == NULL) {
433		(void) fprintf(stderr, "%s\n",
434		    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
435	}
436
437	if (g_pr != NULL) {
438		dtrace_proc_continue(g_dtp, g_pr);
439		dtrace_proc_release(g_dtp, g_pr);
440	}
441
442	exit(E_ERROR);
443}
444
445/*PRINTFLIKE1*/
446static void
447notice(const char *fmt, ...)
448{
449	va_list ap;
450
451	va_start(ap, fmt);
452	verror(fmt, ap);
453	va_end(ap);
454}
455
456static void
457dprog_add(const char *prog)
458{
459	size_t len = strlen(prog);
460	bcopy(prog, g_prog + g_proglen, len + 1);
461	g_proglen += len;
462	assert(g_proglen < sizeof (g_prog));
463}
464
465static void
466dprog_compile(void)
467{
468	dtrace_prog_t *prog;
469	dtrace_proginfo_t info;
470
471	if (g_opt_V) {
472		(void) fprintf(stderr, "%s: vvvv D program vvvv\n", g_pname);
473		(void) fputs(g_prog, stderr);
474		(void) fprintf(stderr, "%s: ^^^^ D program ^^^^\n", g_pname);
475	}
476
477	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
478	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
479		dfatal("failed to compile program");
480
481	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
482		dfatal("failed to enable probes");
483}
484
485void
486print_legend(void)
487{
488	(void) printf("%5s %8s %-28s %s\n", "Count", "nsec", "Lock", "Caller");
489}
490
491void
492print_bar(void)
493{
494	(void) printf("---------------------------------------"
495	    "----------------------------------------\n");
496}
497
498void
499print_histogram_header(void)
500{
501	(void) printf("\n%10s ---- Time Distribution --- %5s %s\n",
502	    "nsec", "count", "Stack");
503}
504
505/*
506 * Convert an address to a symbolic string or a numeric string. If nolocks
507 * is set, we return an error code if this symbol appears to be a mutex- or
508 * rwlock-related symbol in libc so the caller has a chance to find a more
509 * helpful symbol.
510 */
511static int
512getsym(struct ps_prochandle *P, uintptr_t addr, char *buf, size_t size,
513    int nolocks)
514{
515	char name[256];
516	GElf_Sym sym;
517	prsyminfo_t info;
518	size_t len;
519
520	if (P == NULL || Pxlookup_by_addr(P, addr, name, sizeof (name),
521	    &sym, &info) != 0) {
522		(void) snprintf(buf, size, "%#lx", addr);
523		return (0);
524	}
525	if (info.prs_object == NULL)
526		info.prs_object = "<unknown>";
527
528	if (info.prs_lmid != LM_ID_BASE) {
529		len = snprintf(buf, size, "LM%lu`", info.prs_lmid);
530		buf += len;
531		size -= len;
532	}
533
534	len = snprintf(buf, size, "%s`%s", info.prs_object, info.prs_name);
535	buf += len;
536	size -= len;
537
538	if (sym.st_value != addr)
539		len = snprintf(buf, size, "+%#lx", addr - sym.st_value);
540
541	if (nolocks && strcmp("libc.so.1", info.prs_object) == 0 &&
542	    (strstr("mutex", info.prs_name) == 0 ||
543	    strstr("rw", info.prs_name) == 0))
544		return (-1);
545
546	return (0);
547}
548
549/*ARGSUSED*/
550static int
551process_aggregate(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
552{
553	const dtrace_recdesc_t *rec;
554	uintptr_t lock;
555	uint64_t *stack;
556	caddr_t data;
557	pid_t pid;
558	struct ps_prochandle *P;
559	char buf[256];
560	int i, j;
561	uint64_t sum, count, avg;
562
563	if ((*(uint_t *)arg)++ >= g_nent)
564		return (DTRACE_AGGWALK_NEXT);
565
566	rec = aggsdata[0]->dtada_desc->dtagd_rec;
567	data = aggsdata[0]->dtada_data;
568
569	/*LINTED - alignment*/
570	lock = (uintptr_t)*(uint64_t *)(data + rec[1].dtrd_offset);
571	/*LINTED - alignment*/
572	stack = (uint64_t *)(data + rec[2].dtrd_offset);
573
574	if (!g_opt_s) {
575		/*LINTED - alignment*/
576		sum = *(uint64_t *)(aggsdata[1]->dtada_data +
577		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
578		/*LINTED - alignment*/
579		count = *(uint64_t *)(aggsdata[2]->dtada_data +
580		    aggsdata[2]->dtada_desc->dtagd_rec[3].dtrd_offset);
581	} else {
582		uint64_t *a;
583
584		/*LINTED - alignment*/
585		a = (uint64_t *)(aggsdata[1]->dtada_data +
586		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
587
588		print_bar();
589		print_legend();
590
591		for (count = sum = 0, i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
592		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++) {
593			count += a[i];
594			sum += a[i] << (j - 64);
595		}
596	}
597
598	avg = sum / count;
599	(void) printf("%5llu %8llu ", (u_longlong_t)count, (u_longlong_t)avg);
600
601	pid = stack[0];
602	P = dtrace_proc_grab(g_dtp, pid, PGRAB_RDONLY);
603
604	(void) getsym(P, lock, buf, sizeof (buf), 0);
605	(void) printf("%-28s ", buf);
606
607	for (i = 2; i <= 5; i++) {
608		if (getsym(P, stack[i], buf, sizeof (buf), 1) == 0)
609			break;
610	}
611	(void) printf("%s\n", buf);
612
613	if (g_opt_s) {
614		int stack_done = 0;
615		int quant_done = 0;
616		int first_bin, last_bin;
617		uint64_t bin_size, *a;
618
619		/*LINTED - alignment*/
620		a = (uint64_t *)(aggsdata[1]->dtada_data +
621		    aggsdata[1]->dtada_desc->dtagd_rec[3].dtrd_offset);
622
623		print_histogram_header();
624
625		for (first_bin = DTRACE_QUANTIZE_ZEROBUCKET;
626		    a[first_bin] == 0; first_bin++)
627			continue;
628		for (last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 63;
629		    a[last_bin] == 0; last_bin--)
630			continue;
631
632		for (i = 0; !stack_done || !quant_done; i++) {
633			if (!stack_done) {
634				(void) getsym(P, stack[i + 2], buf,
635				    sizeof (buf), 0);
636			} else {
637				buf[0] = '\0';
638			}
639
640			if (!quant_done) {
641				bin_size = a[first_bin];
642
643				(void) printf("%10llu |%-24.*s| %5llu %s\n",
644				    1ULL <<
645				    (first_bin - DTRACE_QUANTIZE_ZEROBUCKET),
646				    (int)(24.0 * bin_size / count),
647				    "@@@@@@@@@@@@@@@@@@@@@@@@@@",
648				    (u_longlong_t)bin_size, buf);
649			} else {
650				(void) printf("%43s %s\n", "", buf);
651			}
652
653			if (i + 1 >= g_nframes || stack[i + 3] == 0)
654				stack_done = 1;
655
656			if (first_bin++ == last_bin)
657				quant_done = 1;
658		}
659	}
660
661	dtrace_proc_release(g_dtp, P);
662
663	return (DTRACE_AGGWALK_NEXT);
664}
665
666/*ARGSUSED*/
667static void
668prochandler(struct ps_prochandle *P, const char *msg, void *arg)
669{
670#define SIG2STR_MAX 32 /* Not referenced so long as prp just below is NULL. */
671#define proc_signame(x,y,z) "Unknown" /* Not referenced so long as prp just below is NULL. */
672	typedef struct psinfo { int pr_wstat; } psinfo_t;
673	const psinfo_t *prp = NULL;
674	int pid = Pstatus(P)->pr_pid;
675
676	if (msg != NULL) {
677		notice("pid %d: %s\n", pid, msg);
678		return;
679	}
680
681	switch (Pstate(P)) {
682	case PS_UNDEAD:
683		/*
684		 * Ideally we would like to always report pr_wstat here, but it
685		 * isn't possible given current /proc semantics.  If we grabbed
686		 * the process, Ppsinfo() will either fail or return a zeroed
687		 * psinfo_t depending on how far the parent is in reaping it.
688		 * When /proc provides a stable pr_wstat in the status file,
689		 * this code can be improved by examining this new pr_wstat.
690		 */
691		if (prp != NULL && WIFSIGNALED(prp->pr_wstat)) {
692			notice("pid %d terminated by %s\n", pid,
693			    proc_signame(WTERMSIG(prp->pr_wstat),
694			    name, sizeof (name)));
695		} else if (prp != NULL && WEXITSTATUS(prp->pr_wstat) != 0) {
696			notice("pid %d exited with status %d\n",
697			    pid, WEXITSTATUS(prp->pr_wstat));
698		} else {
699			notice("pid %d has exited\n", pid);
700		}
701		g_exited = 1;
702		break;
703
704	case PS_LOST:
705		notice("pid %d has exited\n", pid);
706		g_exited = 1;
707		break;
708	}
709}
710
711/*ARGSUSED*/
712static int
713chewrec(const dtrace_probedata_t *data, const dtrace_recdesc_t *rec, void *arg)
714{
715	dtrace_eprobedesc_t *epd = data->dtpda_edesc;
716	dtrace_aggvarid_t aggvars[2];
717	const void *buf;
718	int i, nagv;
719
720	/*
721	 * A NULL rec indicates that we've processed the last record.
722	 */
723	if (rec == NULL)
724		return (DTRACE_CONSUME_NEXT);
725
726	buf = data->dtpda_data - rec->dtrd_offset;
727
728	switch (rec->dtrd_action) {
729	case DTRACEACT_DIFEXPR:
730		(void) printf("\n%s\n\n", (char *)buf + rec->dtrd_offset);
731		if (!g_opt_s) {
732			print_legend();
733			print_bar();
734		}
735		return (DTRACE_CONSUME_NEXT);
736
737	case DTRACEACT_PRINTA:
738		for (nagv = 0, i = 0; i < epd->dtepd_nrecs - 1; i++) {
739			const dtrace_recdesc_t *nrec = &rec[i];
740
741			if (nrec->dtrd_uarg != rec->dtrd_uarg)
742				break;
743
744			/*LINTED - alignment*/
745			aggvars[nagv++] = *(dtrace_aggvarid_t *)((caddr_t)buf +
746			    nrec->dtrd_offset);
747		}
748
749		if (nagv == (g_opt_s ? 1 : 2)) {
750			uint_t nent = 0;
751			if (dtrace_aggregate_walk_joined(g_dtp, aggvars, nagv,
752			    process_aggregate, &nent) != 0)
753				dfatal("failed to walk aggregate");
754		}
755
756		return (DTRACE_CONSUME_NEXT);
757	}
758
759	return (DTRACE_CONSUME_THIS);
760}
761
762/*ARGSUSED*/
763static void
764intr(int signo)
765{
766	g_intr = 1;
767}
768
769int
770main(int argc, char **argv)
771{
772	int err;
773	int opt_C = 0, opt_H = 0, opt_p = 0, opt_v = 0;
774	char c, *p, *end;
775	struct sigaction act;
776	int done = 0;
777
778	g_pname = basename(argv[0]);
779	argv[0] = g_pname; /* rewrite argv[0] for getopt errors */
780
781	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
782		switch (c) {
783		case 'n':
784			errno = 0;
785			g_nent = strtoul(optarg, &end, 10);
786			if (*end != '\0' || errno != 0) {
787				(void) fprintf(stderr, "%s: invalid count "
788				    "'%s'\n", g_pname, optarg);
789				usage();
790			}
791			break;
792
793		case 'p':
794			opt_p = 1;
795			break;
796
797		case 'v':
798			opt_v = 1;
799			break;
800
801		case 'A':
802			opt_C = opt_H = 1;
803			break;
804
805		case 'C':
806			opt_C = 1;
807			break;
808
809		case 'H':
810			opt_H = 1;
811			break;
812
813		case 'V':
814			g_opt_V = 1;
815			break;
816
817		default:
818			if (strchr(PLOCKSTAT_OPTSTR, c) == NULL)
819				usage();
820		}
821	}
822
823	/*
824	 * We need a command or at least one pid.
825	 */
826	if (argc == optind)
827		usage();
828
829	if (opt_C == 0 && opt_H == 0)
830		opt_C = 1;
831
832	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL)
833		fatal("failed to initialize dtrace: %s\n",
834		    dtrace_errmsg(NULL, err));
835
836	/*
837	 * The longest string we trace is 23 bytes long -- so 32 is plenty.
838	 */
839	if (dtrace_setopt(g_dtp, "strsize", "32") == -1)
840		dfatal("failed to set 'strsize'");
841
842	/*
843	 * 1k should be more than enough for all trace() and printa() actions.
844	 */
845	if (dtrace_setopt(g_dtp, "bufsize", "1k") == -1)
846		dfatal("failed to set 'bufsize'");
847
848	/*
849	 * The table we produce has the hottest locks at the top.
850	 */
851	if (dtrace_setopt(g_dtp, "aggsortrev", NULL) == -1)
852		dfatal("failed to set 'aggsortrev'");
853
854	/*
855	 * These are two reasonable defaults which should suffice.
856	 */
857	if (dtrace_setopt(g_dtp, "aggsize", "256k") == -1)
858		dfatal("failed to set 'aggsize'");
859	if (dtrace_setopt(g_dtp, "aggrate", "1sec") == -1)
860		dfatal("failed to set 'aggrate'");
861
862	/*
863	 * Take a second pass through to look for options that set options now
864	 * that we have an open dtrace handle.
865	 */
866	optind = 1;
867	while ((c = getopt(argc, argv, PLOCKSTAT_OPTSTR)) != EOF) {
868		switch (c) {
869		case 's':
870			g_opt_s = 1;
871			if (dtrace_setopt(g_dtp, "ustackframes", optarg) == -1)
872				dfatal("failed to set 'ustackframes'");
873			break;
874
875		case 'x':
876			if ((p = strchr(optarg, '=')) != NULL)
877				*p++ = '\0';
878
879			if (dtrace_setopt(g_dtp, optarg, p) != 0)
880				dfatal("failed to set -x %s", optarg);
881			break;
882
883		case 'e':
884			errno = 0;
885			(void) strtoul(optarg, &end, 10);
886			if (*optarg == '-' || *end != '\0' || errno != 0) {
887				(void) fprintf(stderr, "%s: invalid timeout "
888				    "'%s'\n", g_pname, optarg);
889				usage();
890			}
891
892			/*
893			 * Construct a DTrace enabling that will exit after
894			 * the specified number of seconds.
895			 */
896			dprog_add("BEGIN\n{\n\tend = timestamp + ");
897			dprog_add(optarg);
898			dprog_add(" * 1000000000;\n}\n");
899			dprog_add("tick-10hz\n/timestamp >= end/\n");
900			dprog_add("{\n\texit(0);\n}\n");
901			break;
902		}
903	}
904
905	argc -= optind;
906	argv += optind;
907
908	if (opt_H) {
909		dprog_add(g_hold_init);
910		if (g_opt_s == NULL)
911			dprog_add(g_hold_times);
912		else
913			dprog_add(g_hold_histogram);
914	}
915
916	if (opt_C) {
917		dprog_add(g_ctnd_init);
918		if (g_opt_s == NULL)
919			dprog_add(g_ctnd_times);
920		else
921			dprog_add(g_ctnd_histogram);
922	}
923
924	if (opt_p) {
925		ulong_t pid;
926
927		if (argc > 1) {
928			(void) fprintf(stderr, "%s: only one pid is allowed\n",
929			    g_pname);
930			usage();
931		}
932
933		errno = 0;
934		pid = strtoul(argv[0], &end, 10);
935		if (*end != '\0' || errno != 0 || (pid_t)pid != pid) {
936			(void) fprintf(stderr, "%s: invalid pid '%s'\n",
937			    g_pname, argv[0]);
938			usage();
939		}
940
941		if ((g_pr = dtrace_proc_grab(g_dtp, (pid_t)pid, 0)) == NULL)
942			dfatal(NULL);
943	} else {
944		if ((g_pr = dtrace_proc_create(g_dtp, argv[0], argv)) == NULL)
945			dfatal(NULL);
946	}
947
948	dprog_compile();
949
950	if (dtrace_handle_proc(g_dtp, &prochandler, NULL) == -1)
951		dfatal("failed to establish proc handler");
952
953	(void) sigemptyset(&act.sa_mask);
954	act.sa_flags = 0;
955	act.sa_handler = intr;
956	(void) sigaction(SIGINT, &act, NULL);
957	(void) sigaction(SIGTERM, &act, NULL);
958
959	if (dtrace_go(g_dtp) != 0)
960		dfatal("dtrace_go()");
961
962	if (dtrace_getopt(g_dtp, "ustackframes", &g_nframes) != 0)
963		dfatal("failed to get 'ustackframes'");
964
965	dtrace_proc_continue(g_dtp, g_pr);
966
967	if (opt_v)
968		(void) printf("%s: tracing enabled for pid %d\n", g_pname,
969		    (int)Pstatus(g_pr)->pr_pid);
970
971	do {
972		if (!g_intr && !done)
973			dtrace_sleep(g_dtp);
974
975		if (done || g_intr || g_exited) {
976			done = 1;
977			if (dtrace_stop(g_dtp) == -1)
978				dfatal("couldn't stop tracing");
979		}
980
981		switch (dtrace_work(g_dtp, stdout, NULL, chewrec, NULL)) {
982		case DTRACE_WORKSTATUS_DONE:
983			done = 1;
984			break;
985		case DTRACE_WORKSTATUS_OKAY:
986			break;
987		default:
988			dfatal("processing aborted");
989		}
990
991	} while (!done);
992
993	dtrace_close(g_dtp);
994
995	return (0);
996}
997