pmcpl_calltree.c revision 299826
1178476Sjb/*-
2178476Sjb * Copyright (c) 2012, Fabien Thomas
3178476Sjb * All rights reserved.
4178476Sjb *
5178476Sjb * Redistribution and use in source and binary forms, with or without
6178476Sjb * modification, are permitted provided that the following conditions
7178476Sjb * are met:
8178476Sjb * 1. Redistributions of source code must retain the above copyright
9178476Sjb *    notice, this list of conditions and the following disclaimer.
10178476Sjb * 2. Redistributions in binary form must reproduce the above copyright
11178476Sjb *    notice, this list of conditions and the following disclaimer in the
12178476Sjb *    documentation and/or other materials provided with the distribution.
13178476Sjb *
14178476Sjb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15178476Sjb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16178476Sjb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17178476Sjb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18178476Sjb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19178476Sjb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20178476Sjb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21178476Sjb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22178476Sjb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23178476Sjb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24178476Sjb * SUCH DAMAGE.
25178476Sjb */
26178476Sjb
27178476Sjb/*
28178476Sjb * Process hwpmc(4) samples as calltree.
29178476Sjb *
30178476Sjb * Output file format compatible with Kcachegrind (kdesdk).
31178476Sjb * Handle top mode with a sorted tree display.
32178476Sjb */
33178476Sjb
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/10/usr.sbin/pmcstat/pmcpl_calltree.c 299826 2016-05-15 03:15:36Z pfg $");
36
37#include <sys/param.h>
38#include <sys/endian.h>
39#include <sys/queue.h>
40
41#include <assert.h>
42#include <curses.h>
43#include <ctype.h>
44#include <err.h>
45#include <errno.h>
46#include <fcntl.h>
47#include <pmc.h>
48#include <pmclog.h>
49#include <stdint.h>
50#include <stdio.h>
51#include <stdlib.h>
52#include <string.h>
53#include <unistd.h>
54#include <sysexits.h>
55
56#include "pmcstat.h"
57#include "pmcstat_log.h"
58#include "pmcstat_top.h"
59#include "pmcpl_calltree.h"
60
61#define	PMCPL_CT_GROWSIZE	4
62
63static int pmcstat_skiplink = 0;
64
65struct pmcpl_ct_node;
66
67/* Get the sample value for PMC a. */
68#define	PMCPL_CT_SAMPLE(a, b) \
69	((a) < (b)->npmcs ? (b)->sb[a] : 0)
70
71/* Get the sample value in percent related to rsamples. */
72#define	PMCPL_CT_SAMPLEP(a, b) \
73	(PMCPL_CT_SAMPLE(a, b) * 100.0 / rsamples->sb[a])
74
75struct pmcpl_ct_sample {
76	int		npmcs;		/* Max pmc index available. */
77	unsigned	*sb;		/* Sample buffer for 0..npmcs. */
78};
79
80struct pmcpl_ct_arc {
81	struct pmcpl_ct_sample	pcta_samples;
82	struct pmcpl_ct_sample	pcta_callid;
83	unsigned		pcta_call;
84	struct pmcpl_ct_node	*pcta_child;
85};
86
87struct pmcpl_ct_instr {
88	uintfptr_t		pctf_func;
89	struct pmcpl_ct_sample	pctf_samples;
90};
91
92/*
93 * Each calltree node is tracked by a pmcpl_ct_node struct.
94 */
95struct pmcpl_ct_node {
96	struct pmcstat_image	*pct_image;
97	uintfptr_t		pct_func;
98
99	struct pmcstat_symbol	*pct_sym;
100	pmcstat_interned_string	pct_ifl;
101	pmcstat_interned_string	pct_ifn;
102
103	struct pmcpl_ct_sample	pct_samples;
104
105	int			pct_narc;
106	int			pct_arc_c;
107	struct pmcpl_ct_arc 	*pct_arc;
108
109	/* TODO: optimize for large number of items. */
110	int			pct_ninstr;
111	int			pct_instr_c;
112	struct pmcpl_ct_instr	*pct_instr;
113
114#define PMCPL_PCT_ADDR	0
115#define PMCPL_PCT_NAME	1
116	char			pct_type;
117#define	PMCPL_PCT_WHITE	0
118#define	PMCPL_PCT_GREY	1
119#define	PMCPL_PCT_BLACK	2
120	char			pct_color;
121};
122
123struct pmcpl_ct_node_hash {
124	struct pmcpl_ct_node  *pch_ctnode;
125	STAILQ_ENTRY(pmcpl_ct_node_hash) pch_next;
126};
127
128static struct pmcpl_ct_sample pmcpl_ct_callid;
129
130#define	PMCPL_CT_MAXCOL		PMC_CALLCHAIN_DEPTH_MAX
131#define	PMCPL_CT_MAXLINE	1024	/* TODO: dynamic. */
132
133struct pmcpl_ct_line {
134	unsigned	ln_sum;
135	unsigned	ln_index;
136};
137
138static struct pmcpl_ct_line	pmcpl_ct_topmax[PMCPL_CT_MAXLINE+1];
139static struct pmcpl_ct_node
140    *pmcpl_ct_topscreen[PMCPL_CT_MAXCOL+1][PMCPL_CT_MAXLINE+1];
141
142/*
143 * All nodes indexed by function/image name are placed in a hash table.
144 */
145static STAILQ_HEAD(,pmcpl_ct_node_hash) pmcpl_ct_node_hash[PMCSTAT_NHASH];
146
147/*
148 * Root node for the graph.
149 */
150static struct pmcpl_ct_node *pmcpl_ct_root;
151
152/*
153 * Prototypes
154 */
155
156/*
157 * Initialize a samples.
158 */
159
160static void
161pmcpl_ct_samples_init(struct pmcpl_ct_sample *samples)
162{
163
164	samples->npmcs = 0;
165	samples->sb = NULL;
166}
167
168/*
169 * Free a samples.
170 */
171
172static void
173pmcpl_ct_samples_free(struct pmcpl_ct_sample *samples)
174{
175
176	samples->npmcs = 0;
177	free(samples->sb);
178	samples->sb = NULL;
179}
180
181/*
182 * Grow a sample block to store pmcstat_npmcs PMCs.
183 */
184
185static void
186pmcpl_ct_samples_grow(struct pmcpl_ct_sample *samples)
187{
188	int npmcs;
189
190	/* Enough storage. */
191	if (pmcstat_npmcs <= samples->npmcs)
192                return;
193
194	npmcs = samples->npmcs +
195	    max(pmcstat_npmcs - samples->npmcs, PMCPL_CT_GROWSIZE);
196	samples->sb = realloc(samples->sb, npmcs * sizeof(unsigned));
197	if (samples->sb == NULL)
198		errx(EX_SOFTWARE, "ERROR: out of memory");
199	bzero((char *)samples->sb + samples->npmcs * sizeof(unsigned),
200	    (npmcs - samples->npmcs) * sizeof(unsigned));
201	samples->npmcs = npmcs;
202}
203
204/*
205 * Compute the sum of all root arcs.
206 */
207
208static void
209pmcpl_ct_samples_root(struct pmcpl_ct_sample *samples)
210{
211	int i, pmcin;
212
213	pmcpl_ct_samples_init(samples);
214	pmcpl_ct_samples_grow(samples);
215
216	for (i = 0; i < pmcpl_ct_root->pct_narc; i++)
217		for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++)
218			samples->sb[pmcin] += PMCPL_CT_SAMPLE(pmcin,
219			    &pmcpl_ct_root->pct_arc[i].pcta_samples);
220}
221
222/*
223 * Grow the arc table.
224 */
225
226static void
227pmcpl_ct_arc_grow(int cursize, int *maxsize, struct pmcpl_ct_arc **items)
228{
229	int nmaxsize;
230
231	if (cursize < *maxsize)
232		return;
233
234	nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE);
235	*items = realloc(*items, nmaxsize * sizeof(struct pmcpl_ct_arc));
236	if (*items == NULL)
237		errx(EX_SOFTWARE, "ERROR: out of memory");
238	bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_arc),
239	    (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_arc));
240	*maxsize = nmaxsize;
241}
242
243/*
244 * Grow the instr table.
245 */
246
247static void
248pmcpl_ct_instr_grow(int cursize, int *maxsize, struct pmcpl_ct_instr **items)
249{
250	int nmaxsize;
251
252	if (cursize < *maxsize)
253		return;
254
255	nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE);
256	*items = realloc(*items, nmaxsize * sizeof(struct pmcpl_ct_instr));
257	if (*items == NULL)
258		errx(EX_SOFTWARE, "ERROR: out of memory");
259	bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_instr),
260	    (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_instr));
261	*maxsize = nmaxsize;
262}
263
264/*
265 * Add a new instruction sample to given node.
266 */
267
268static void
269pmcpl_ct_instr_add(struct pmcpl_ct_node *ct, int pmcin,
270    uintfptr_t pc, unsigned v)
271{
272	int i;
273	struct pmcpl_ct_instr *in;
274
275	for (i = 0; i<ct->pct_ninstr; i++) {
276		if (ct->pct_instr[i].pctf_func == pc) {
277			in = &ct->pct_instr[i];
278			pmcpl_ct_samples_grow(&in->pctf_samples);
279			in->pctf_samples.sb[pmcin] += v;
280			return;
281		}
282	}
283
284	pmcpl_ct_instr_grow(ct->pct_ninstr, &ct->pct_instr_c, &ct->pct_instr);
285	in = &ct->pct_instr[ct->pct_ninstr];
286	in->pctf_func = pc;
287	pmcpl_ct_samples_init(&in->pctf_samples);
288	pmcpl_ct_samples_grow(&in->pctf_samples);
289	in->pctf_samples.sb[pmcin] = v;
290	ct->pct_ninstr++;
291}
292
293/*
294 * Allocate a new node.
295 */
296
297static struct pmcpl_ct_node *
298pmcpl_ct_node_allocate(void)
299{
300	struct pmcpl_ct_node *ct;
301
302	if ((ct = malloc(sizeof(*ct))) == NULL)
303		err(EX_OSERR, "ERROR: Cannot allocate callgraph node");
304
305	pmcpl_ct_samples_init(&ct->pct_samples);
306
307	ct->pct_sym	= NULL;
308	ct->pct_image	= NULL;
309	ct->pct_func	= 0;
310
311	ct->pct_narc	= 0;
312	ct->pct_arc_c	= 0;
313	ct->pct_arc	= NULL;
314
315	ct->pct_ninstr	= 0;
316	ct->pct_instr_c	= 0;
317	ct->pct_instr	= NULL;
318
319	ct->pct_color   = PMCPL_PCT_WHITE;
320
321	return (ct);
322}
323
324/*
325 * Free a node.
326 */
327
328static void
329pmcpl_ct_node_free(struct pmcpl_ct_node *ct)
330{
331	int i;
332
333	for (i = 0; i < ct->pct_narc; i++) {
334		pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_samples);
335		pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_callid);
336	}
337
338	pmcpl_ct_samples_free(&ct->pct_samples);
339	free(ct->pct_arc);
340	free(ct->pct_instr);
341	free(ct);
342}
343
344/*
345 * Clear the graph tag on each node.
346 */
347static void
348pmcpl_ct_node_cleartag(void)
349{
350	int i;
351	struct pmcpl_ct_node_hash *pch;
352
353	for (i = 0; i < PMCSTAT_NHASH; i++)
354		STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next)
355			pch->pch_ctnode->pct_color = PMCPL_PCT_WHITE;
356
357	pmcpl_ct_root->pct_color = PMCPL_PCT_WHITE;
358}
359
360/*
361 * Print the callchain line by line with maximum cost at top.
362 */
363
364static int
365pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct,
366    struct pmcpl_ct_sample *rsamples, int x, int *y)
367{
368	int i, terminal;
369	struct pmcpl_ct_arc *arc;
370
371	if (ct->pct_color == PMCPL_PCT_GREY)
372		return 0;
373
374	if (x >= PMCPL_CT_MAXCOL) {
375		pmcpl_ct_topscreen[x][*y] = NULL;
376		return 1;
377	}
378	pmcpl_ct_topscreen[x][*y] = ct;
379
380	/*
381	 * Check if this is a terminal node.
382	 * We need to check that some samples exist
383	 * for at least one arc for that PMC.
384	 */
385	terminal = 1;
386	for (i = 0; i < ct->pct_narc; i++) {
387		arc = &ct->pct_arc[i];
388		if (arc->pcta_child->pct_color != PMCPL_PCT_GREY &&
389		    PMCPL_CT_SAMPLE(pmcin,
390		    &arc->pcta_samples) != 0 &&
391		    PMCPL_CT_SAMPLEP(pmcin,
392		    &arc->pcta_samples) > pmcstat_threshold) {
393			terminal = 0;
394			break;
395		}
396	}
397
398	if (ct->pct_narc == 0 || terminal) {
399		pmcpl_ct_topscreen[x+1][*y] = NULL;
400		if (*y >= PMCPL_CT_MAXLINE)
401			return 1;
402		*y = *y + 1;
403		for (i=0; i < x; i++)
404			pmcpl_ct_topscreen[i][*y] =
405			    pmcpl_ct_topscreen[i][*y - 1];
406		return 0;
407	}
408
409	ct->pct_color = PMCPL_PCT_GREY;
410	for (i = 0; i < ct->pct_narc; i++) {
411		if (PMCPL_CT_SAMPLE(pmcin,
412		    &ct->pct_arc[i].pcta_samples) == 0)
413			continue;
414		if (PMCPL_CT_SAMPLEP(pmcin,
415		    &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) {
416			if (pmcpl_ct_node_dumptop(pmcin,
417			        ct->pct_arc[i].pcta_child,
418			        rsamples, x+1, y)) {
419				ct->pct_color = PMCPL_PCT_BLACK;
420				return 1;
421			}
422		}
423	}
424	ct->pct_color = PMCPL_PCT_BLACK;
425
426	return 0;
427}
428
429/*
430 * Compare two top line by sum.
431 */
432static int
433pmcpl_ct_line_compare(const void *a, const void *b)
434{
435	const struct pmcpl_ct_line *ct1, *ct2;
436
437	ct1 = (const struct pmcpl_ct_line *) a;
438	ct2 = (const struct pmcpl_ct_line *) b;
439
440	/* Sort in reverse order */
441	if (ct1->ln_sum < ct2->ln_sum)
442		return (1);
443	if (ct1->ln_sum > ct2->ln_sum)
444		return (-1);
445	return (0);
446}
447
448/*
449 * Format and display given PMC index.
450 */
451
452static void
453pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy)
454{
455#undef	TS
456#undef	TSI
457#define	TS(x, y)	(pmcpl_ct_topscreen[x][y])
458#define	TSI(x, y)	(pmcpl_ct_topscreen[x][pmcpl_ct_topmax[y].ln_index])
459
460	int v_attrs, ns_len, vs_len, is_len, width, indentwidth, x, y;
461	float v;
462	char ns[30], vs[10], is[20];
463	struct pmcpl_ct_node *ct;
464	const char *space = " ";
465
466	/*
467	 * Sort by line cost.
468	 */
469	for (y = 0; ; y++) {
470		ct = TS(1, y);
471		if (ct == NULL)
472			break;
473
474		pmcpl_ct_topmax[y].ln_sum = 0;
475		pmcpl_ct_topmax[y].ln_index = y;
476		for (x = 1; TS(x, y) != NULL; x++) {
477			pmcpl_ct_topmax[y].ln_sum +=
478			    PMCPL_CT_SAMPLE(pmcin, &TS(x, y)->pct_samples);
479		}
480	}
481	qsort(pmcpl_ct_topmax, y, sizeof(pmcpl_ct_topmax[0]),
482	    pmcpl_ct_line_compare);
483	pmcpl_ct_topmax[y].ln_index = y;
484
485	for (y = 0; y < maxy; y++) {
486		ct = TSI(1, y);
487		if (ct == NULL)
488			break;
489
490		if (y > 0)
491			PMCSTAT_PRINTW("\n");
492
493		/* Output sum. */
494		v = pmcpl_ct_topmax[y].ln_sum * 100.0 /
495		    rsamples->sb[pmcin];
496		snprintf(vs, sizeof(vs), "%.1f", v);
497		v_attrs = PMCSTAT_ATTRPERCENT(v);
498		PMCSTAT_ATTRON(v_attrs);
499		PMCSTAT_PRINTW("%5.5s ", vs);
500		PMCSTAT_ATTROFF(v_attrs);
501
502		width = indentwidth = 5 + 1;
503
504		for (x = 1; (ct = TSI(x, y)) != NULL; x++) {
505
506			vs[0] = '\0'; vs_len = 0;
507			is[0] = '\0'; is_len = 0;
508
509			/* Format value. */
510			v = PMCPL_CT_SAMPLEP(pmcin, &ct->pct_samples);
511			if (v > pmcstat_threshold)
512				vs_len  = snprintf(vs, sizeof(vs),
513				    "(%.1f%%)", v);
514			v_attrs = PMCSTAT_ATTRPERCENT(v);
515
516			if (pmcstat_skiplink && v <= pmcstat_threshold) {
517				strlcpy(ns, ".", sizeof(ns));
518				ns_len = 1;
519			} else {
520			if (ct->pct_sym != NULL) {
521				ns_len = snprintf(ns, sizeof(ns), "%s",
522				    pmcstat_string_unintern(ct->pct_sym->ps_name));
523			} else
524				ns_len = snprintf(ns, sizeof(ns), "%p",
525				    (void *)ct->pct_func);
526
527			/* Format image. */
528			if (x == 1 ||
529			    TSI(x-1, y)->pct_image != ct->pct_image)
530				is_len = snprintf(is, sizeof(is), "@%s",
531				    pmcstat_string_unintern(ct->pct_image->pi_name));
532
533			/* Check for line wrap. */
534			width += ns_len + is_len + vs_len + 1;
535			}
536			if (width >= pmcstat_displaywidth) {
537				maxy--;
538				if (y >= maxy)
539					break;
540				PMCSTAT_PRINTW("\n%*s", indentwidth, space);
541				width = indentwidth + ns_len + is_len + vs_len;
542			}
543
544			PMCSTAT_ATTRON(v_attrs);
545			PMCSTAT_PRINTW("%s%s%s ", ns, is, vs);
546			PMCSTAT_ATTROFF(v_attrs);
547		}
548	}
549}
550
551/*
552 * Output top mode snapshot.
553 */
554
555void
556pmcpl_ct_topdisplay(void)
557{
558	int y;
559	struct pmcpl_ct_sample r, *rsamples;
560
561	rsamples = &r;
562	pmcpl_ct_samples_root(rsamples);
563	pmcpl_ct_node_cleartag();
564
565	PMCSTAT_PRINTW("%5.5s %s\n", "%SAMP", "CALLTREE");
566
567	y = 0;
568	if (pmcpl_ct_node_dumptop(pmcstat_pmcinfilter,
569	    pmcpl_ct_root, rsamples, 0, &y))
570		PMCSTAT_PRINTW("...\n");
571	pmcpl_ct_topscreen[1][y] = NULL;
572
573	pmcpl_ct_node_printtop(rsamples,
574	    pmcstat_pmcinfilter, pmcstat_displayheight - 2);
575
576	pmcpl_ct_samples_free(rsamples);
577}
578
579/*
580 * Handle top mode keypress.
581 */
582
583int
584pmcpl_ct_topkeypress(int c, WINDOW *w)
585{
586
587	switch (c) {
588	case 'f':
589		pmcstat_skiplink = !pmcstat_skiplink;
590		wprintw(w, "skip empty link %s",
591		    pmcstat_skiplink ? "on" : "off");
592		break;
593	}
594
595	return 0;
596}
597
598/*
599 * Look for a callgraph node associated with pmc `pmcid' in the global
600 * hash table that corresponds to the given `pc' value in the process map
601 * `ppm'.
602 */
603
604static void
605pmcpl_ct_node_update(struct pmcpl_ct_node *parent,
606    struct pmcpl_ct_node *child, int pmcin, unsigned v, int cd)
607{
608	struct pmcpl_ct_arc *arc;
609	int i;
610
611	assert(parent != NULL);
612
613	/*
614	 * Find related arc in parent node and
615	 * increment the sample count.
616	 */
617	for (i = 0; i < parent->pct_narc; i++) {
618		if (parent->pct_arc[i].pcta_child == child) {
619			arc = &parent->pct_arc[i];
620			pmcpl_ct_samples_grow(&arc->pcta_samples);
621			arc->pcta_samples.sb[pmcin] += v;
622			/* Estimate call count. */
623			if (cd) {
624			pmcpl_ct_samples_grow(&arc->pcta_callid);
625			if (pmcpl_ct_callid.sb[pmcin] -
626			    arc->pcta_callid.sb[pmcin] > 1)
627				arc->pcta_call++;
628			arc->pcta_callid.sb[pmcin] =
629			    pmcpl_ct_callid.sb[pmcin];
630			}
631			return;
632		}
633	}
634
635	/*
636	 * No arc found for us, add ourself to the parent.
637	 */
638	pmcpl_ct_arc_grow(parent->pct_narc,
639	    &parent->pct_arc_c, &parent->pct_arc);
640	arc = &parent->pct_arc[parent->pct_narc];
641	pmcpl_ct_samples_grow(&arc->pcta_samples);
642	arc->pcta_samples.sb[pmcin] = v;
643	arc->pcta_call = 1;
644	if (cd) {
645		pmcpl_ct_samples_grow(&arc->pcta_callid);
646		arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin];
647	}
648	arc->pcta_child = child;
649	parent->pct_narc++;
650}
651
652/*
653 * Lookup by image/pc.
654 */
655
656static struct pmcpl_ct_node *
657pmcpl_ct_node_hash_lookup(struct pmcstat_image *image, uintfptr_t pc,
658    struct pmcstat_symbol *sym, char *fl, char *fn)
659{
660	int i;
661	unsigned int hash;
662	struct pmcpl_ct_node *ct;
663	struct pmcpl_ct_node_hash *h;
664	pmcstat_interned_string	ifl, ifn;
665
666	if (fn != NULL) {
667		ifl = pmcstat_string_intern(fl);
668		ifn = pmcstat_string_intern(fn);
669	} else {
670		ifl = 0;
671		ifn = 0;
672	}
673
674	for (hash = i = 0; i < (int)sizeof(uintfptr_t); i++)
675		hash += (pc >> i) & 0xFF;
676
677	hash &= PMCSTAT_HASH_MASK;
678
679	STAILQ_FOREACH(h, &pmcpl_ct_node_hash[hash], pch_next) {
680		ct = h->pch_ctnode;
681
682		assert(ct != NULL);
683
684		if (ct->pct_image == image && ct->pct_func == pc) {
685			if (fn == NULL)
686				return (ct);
687			if (ct->pct_type == PMCPL_PCT_NAME &&
688			    ct->pct_ifl == ifl && ct->pct_ifn == ifn)
689				return (ct);
690		}
691	}
692
693	/*
694	 * We haven't seen this (pmcid, pc) tuple yet, so allocate a
695	 * new callgraph node and a new hash table entry for it.
696	 */
697	ct = pmcpl_ct_node_allocate();
698	if ((h = malloc(sizeof(*h))) == NULL)
699		err(EX_OSERR, "ERROR: Could not allocate callgraph node");
700
701	if (fn != NULL) {
702		ct->pct_type = PMCPL_PCT_NAME;
703		ct->pct_ifl = ifl;
704		ct->pct_ifn = ifn;
705	} else
706		ct->pct_type = PMCPL_PCT_ADDR;
707	ct->pct_image = image;
708	ct->pct_func = pc;
709	ct->pct_sym = sym;
710
711	h->pch_ctnode = ct;
712	STAILQ_INSERT_HEAD(&pmcpl_ct_node_hash[hash], h, pch_next);
713	return (ct);
714}
715
716/*
717 * Record a callchain.
718 */
719
720void
721pmcpl_ct_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr,
722    uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu)
723{
724	int i, n, pmcin;
725	uintfptr_t pc, loadaddress;
726	struct pmcstat_image *image;
727	struct pmcstat_symbol *sym;
728	struct pmcstat_pcmap *ppm[PMC_CALLCHAIN_DEPTH_MAX];
729	struct pmcstat_process *km;
730	struct pmcpl_ct_node *ct;
731	struct pmcpl_ct_node *ctl[PMC_CALLCHAIN_DEPTH_MAX+1];
732
733	(void) cpu;
734
735	assert(nsamples>0 && nsamples<=PMC_CALLCHAIN_DEPTH_MAX);
736
737	/* Get the PMC index. */
738	pmcin = pmcr->pr_pmcin;
739
740	/*
741	 * Validate mapping for the callchain.
742	 * Go from bottom to first invalid entry.
743	 */
744	km = pmcstat_kernproc;
745	for (n = 0; n < (int)nsamples; n++) {
746		ppm[n] = pmcstat_process_find_map(usermode ?
747		    pp : km, cc[n]);
748		if (ppm[n] == NULL) {
749			/* Detect full frame capture (kernel + user). */
750			if (!usermode) {
751				ppm[n] = pmcstat_process_find_map(pp, cc[n]);
752				if (ppm[n] != NULL)
753					km = pp;
754			}
755		}
756		if (ppm[n] == NULL)
757			break;
758	}
759	if (n-- == 0) {
760		pmcstat_stats.ps_callchain_dubious_frames++;
761		pmcr->pr_dubious_frames++;
762		return;
763	}
764
765	/* Increase the call generation counter. */
766	pmcpl_ct_samples_grow(&pmcpl_ct_callid);
767	pmcpl_ct_callid.sb[pmcin]++;
768
769	/*
770	 * Build node list.
771	 */
772	ctl[0] = pmcpl_ct_root;
773	for (i = 1; n >= 0; n--) {
774		image = ppm[n]->ppm_image;
775		loadaddress = ppm[n]->ppm_lowpc +
776		    image->pi_vaddr - image->pi_start;
777		/* Convert to an offset in the image. */
778		pc = cc[n] - loadaddress;
779		/*
780		 * Try determine the function at this offset.  If we can't
781		 * find a function round leave the `pc' value alone.
782		 */
783		if ((sym = pmcstat_symbol_search(image, pc)) != NULL)
784			pc = sym->ps_start;
785		else
786			pmcstat_stats.ps_samples_unknown_function++;
787
788		ct = pmcpl_ct_node_hash_lookup(image, pc, sym, NULL, NULL);
789		if (ct == NULL) {
790			pmcstat_stats.ps_callchain_dubious_frames++;
791			continue;
792		}
793		ctl[i++] = ct;
794	}
795	/* No valid node found. */
796	if (i == 1)
797		return;
798	n = i;
799
800	ct = ctl[0];
801	for (i = 1; i < n; i++)
802		pmcpl_ct_node_update(ctl[i-1], ctl[i], pmcin, 1, 1);
803
804	/*
805	 * Increment the sample count for this PMC.
806	 */
807	pmcpl_ct_samples_grow(&ctl[n-1]->pct_samples);
808	ctl[n-1]->pct_samples.sb[pmcin]++;
809
810	/* Update per instruction sample if required. */
811	if (args.pa_ctdumpinstr)
812		pmcpl_ct_instr_add(ctl[n-1], pmcin, cc[0] -
813		    (ppm[0]->ppm_lowpc + ppm[0]->ppm_image->pi_vaddr -
814		     ppm[0]->ppm_image->pi_start), 1);
815}
816
817/*
818 * Print node child cost.
819 */
820
821static void
822pmcpl_ct_node_printchild(struct pmcpl_ct_node *ct, uintfptr_t paddr,
823    int pline)
824{
825	int i, j, line;
826	uintfptr_t addr;
827	struct pmcpl_ct_node *child;
828	char sourcefile[PATH_MAX];
829	char funcname[PATH_MAX];
830
831	/*
832	 * Child cost.
833	 * TODO: attach child cost to the real position in the function.
834	 * TODO: cfn=<fn> / call <ncall> addr(<fn>) / addr(call <fn>) <arccost>
835	 */
836	for (i=0 ; i<ct->pct_narc; i++) {
837		child = ct->pct_arc[i].pcta_child;
838		/* Object binary. */
839		fprintf(args.pa_graphfile, "cob=%s\n",
840		    pmcstat_string_unintern(child->pct_image->pi_fullpath));
841		/* Child function name. */
842		addr = child->pct_image->pi_vaddr + child->pct_func;
843		line = 0;
844		/* Child function source file. */
845		if (child->pct_type == PMCPL_PCT_NAME) {
846			fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n",
847			    pmcstat_string_unintern(child->pct_ifl),
848			    pmcstat_string_unintern(child->pct_ifn));
849		} else if (pmcstat_image_addr2line(child->pct_image, addr,
850		    sourcefile, sizeof(sourcefile), &line,
851		    funcname, sizeof(funcname))) {
852			fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n",
853				sourcefile, funcname);
854		} else {
855			if (child->pct_sym != NULL)
856				fprintf(args.pa_graphfile,
857				    "cfi=???\ncfn=%s\n",
858				    pmcstat_string_unintern(
859				        child->pct_sym->ps_name));
860			else
861				fprintf(args.pa_graphfile,
862				    "cfi=???\ncfn=%p\n", (void *)addr);
863		}
864
865		/* Child function address, line and call count. */
866		fprintf(args.pa_graphfile, "calls=%u %p %u\n",
867		    ct->pct_arc[i].pcta_call, (void *)addr, line);
868
869		/*
870		 * Call address, line, sample.
871		 * TODO: Associate call address to the right location.
872		 */
873		fprintf(args.pa_graphfile, "%p %u", (void *)paddr, pline);
874		for (j = 0; j<pmcstat_npmcs; j++)
875			fprintf(args.pa_graphfile, " %u",
876			    PMCPL_CT_SAMPLE(j, &ct->pct_arc[i].pcta_samples));
877		fprintf(args.pa_graphfile, "\n");
878	}
879}
880
881/*
882 * Print node self cost.
883 */
884
885static void
886pmcpl_ct_node_printself(struct pmcpl_ct_node *ct)
887{
888	int i, j, fline, line;
889	uintfptr_t faddr, addr;
890	char sourcefile[PATH_MAX];
891	char funcname[PATH_MAX];
892
893	/*
894	 * Object binary.
895	 */
896	fprintf(args.pa_graphfile, "ob=%s\n",
897	    pmcstat_string_unintern(ct->pct_image->pi_fullpath));
898
899	/*
900	 * Function name.
901	 */
902	faddr = ct->pct_image->pi_vaddr + ct->pct_func;
903	fline = 0;
904	if (ct->pct_type == PMCPL_PCT_NAME) {
905		fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n",
906		    pmcstat_string_unintern(ct->pct_ifl),
907		    pmcstat_string_unintern(ct->pct_ifn));
908	} else if (pmcstat_image_addr2line(ct->pct_image, faddr,
909	    sourcefile, sizeof(sourcefile), &fline,
910	    funcname, sizeof(funcname))) {
911		fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n",
912		    sourcefile, funcname);
913	} else {
914		if (ct->pct_sym != NULL)
915			fprintf(args.pa_graphfile, "fl=???\nfn=%s\n",
916			    pmcstat_string_unintern(ct->pct_sym->ps_name));
917		else
918			fprintf(args.pa_graphfile, "fl=???\nfn=%p\n",
919			    (void *)(ct->pct_image->pi_vaddr + ct->pct_func));
920	}
921
922	/*
923	 * Self cost.
924	 */
925	if (ct->pct_ninstr > 0) {
926		/*
927		 * Per location cost.
928		 */
929		for (i = 0; i < ct->pct_ninstr; i++) {
930			addr = ct->pct_image->pi_vaddr +
931			    ct->pct_instr[i].pctf_func;
932			line = 0;
933			pmcstat_image_addr2line(ct->pct_image, addr,
934			    sourcefile, sizeof(sourcefile), &line,
935			    funcname, sizeof(funcname));
936			fprintf(args.pa_graphfile, "%p %u",
937			    (void *)addr, line);
938			for (j = 0; j<pmcstat_npmcs; j++)
939				fprintf(args.pa_graphfile, " %u",
940				    PMCPL_CT_SAMPLE(j,
941				    &ct->pct_instr[i].pctf_samples));
942			fprintf(args.pa_graphfile, "\n");
943		}
944	} else {
945		/* Global cost function cost. */
946		fprintf(args.pa_graphfile, "%p %u", (void *)faddr, fline);
947		for (i = 0; i<pmcstat_npmcs ; i++)
948			fprintf(args.pa_graphfile, " %u",
949			    PMCPL_CT_SAMPLE(i, &ct->pct_samples));
950		fprintf(args.pa_graphfile, "\n");
951	}
952
953	pmcpl_ct_node_printchild(ct, faddr, fline);
954}
955
956static void
957pmcpl_ct_printnode(struct pmcpl_ct_node *ct)
958{
959	int i;
960
961	if (ct == pmcpl_ct_root) {
962		fprintf(args.pa_graphfile, "fn=root\n");
963		fprintf(args.pa_graphfile, "0x0 1");
964		for (i = 0; i<pmcstat_npmcs ; i++)
965			fprintf(args.pa_graphfile, " 0");
966		fprintf(args.pa_graphfile, "\n");
967		pmcpl_ct_node_printchild(ct, 0, 0);
968	} else
969		pmcpl_ct_node_printself(ct);
970}
971
972/*
973 * Breadth first traversal.
974 */
975
976static void
977pmcpl_ct_bfs(struct pmcpl_ct_node *ct)
978{
979	int i;
980	struct pmcpl_ct_node_hash *pch, *pchc;
981	struct pmcpl_ct_node *child;
982	STAILQ_HEAD(,pmcpl_ct_node_hash) q;
983
984	STAILQ_INIT(&q);
985	if ((pch = malloc(sizeof(*pch))) == NULL)
986		err(EX_OSERR, "ERROR: Cannot allocate queue");
987	pch->pch_ctnode = ct;
988	STAILQ_INSERT_TAIL(&q, pch, pch_next);
989	ct->pct_color = PMCPL_PCT_BLACK;
990
991	while (!STAILQ_EMPTY(&q)) {
992		pch = STAILQ_FIRST(&q);
993		STAILQ_REMOVE_HEAD(&q, pch_next);
994		pmcpl_ct_printnode(pch->pch_ctnode);
995		for (i = 0; i<pch->pch_ctnode->pct_narc; i++) {
996			child = pch->pch_ctnode->pct_arc[i].pcta_child;
997			if (child->pct_color == PMCPL_PCT_WHITE) {
998				child->pct_color = PMCPL_PCT_BLACK;
999				if ((pchc = malloc(sizeof(*pchc))) == NULL)
1000					err(EX_OSERR,
1001					    "ERROR: Cannot allocate queue");
1002				pchc->pch_ctnode = child;
1003				STAILQ_INSERT_TAIL(&q, pchc, pch_next);
1004			}
1005		}
1006		free(pch);
1007	}
1008}
1009
1010/*
1011 * Detect and fix inlined location.
1012 */
1013
1014static void
1015_pmcpl_ct_expand_inline(struct pmcpl_ct_node *ct)
1016{
1017	int i, j;
1018	unsigned fline, line, v;
1019	uintfptr_t faddr, addr, pc;
1020	char sourcefile[PATH_MAX];
1021	char ffuncname[PATH_MAX], funcname[PATH_MAX];
1022	char buffer[PATH_MAX];
1023	struct pmcpl_ct_node *child;
1024
1025	/*
1026	 * Resolve parent and compare to each instr location.
1027	 */
1028	faddr = ct->pct_image->pi_vaddr + ct->pct_func;
1029	fline = 0;
1030	if (!pmcstat_image_addr2line(ct->pct_image, faddr,
1031	    sourcefile, sizeof(sourcefile), &fline,
1032	    ffuncname, sizeof(ffuncname)))
1033		return;
1034
1035	for (i = 0; i < ct->pct_ninstr; i++) {
1036		addr = ct->pct_image->pi_vaddr +
1037		    ct->pct_instr[i].pctf_func;
1038		line = 0;
1039		if (!pmcstat_image_addr2line(ct->pct_image, addr,
1040		    sourcefile, sizeof(sourcefile), &line,
1041		    funcname, sizeof(funcname)))
1042			continue;
1043
1044		if (strcmp(funcname, ffuncname) == 0)
1045			continue;
1046
1047		/*
1048		 * - Lookup/create inline node by function name.
1049		 * - Move instr PMCs to the inline node.
1050		 * - Link nodes.
1051		 * The lookup create a specific node per image/pc.
1052		 */
1053		if (args.pa_verbosity >= 2)
1054			fprintf(args.pa_printfile,
1055			    "WARNING: inlined function at %p %s in %s\n",
1056			    (void *)addr, funcname, ffuncname);
1057
1058		snprintf(buffer, sizeof(buffer), "%s@%s",
1059			funcname, ffuncname);
1060		child = pmcpl_ct_node_hash_lookup(ct->pct_image,
1061		    ct->pct_func, ct->pct_sym, sourcefile, buffer);
1062		assert(child != NULL);
1063		pc = ct->pct_instr[i].pctf_func;
1064		for (j = 0; j<pmcstat_npmcs; j++) {
1065			v = PMCPL_CT_SAMPLE(j,
1066			    &ct->pct_instr[i].pctf_samples);
1067			if (v == 0)
1068				continue;
1069			pmcpl_ct_instr_add(child, j, pc, v);
1070			pmcpl_ct_node_update(ct, child, j, v, 0);
1071			if (j < ct->pct_samples.npmcs)
1072				ct->pct_samples.sb[j] -=
1073				    ct->pct_instr[i].pctf_samples.sb[j];
1074			ct->pct_instr[i].pctf_samples.sb[j] = 0;
1075		}
1076	}
1077}
1078
1079static void
1080pmcpl_ct_expand_inline(void)
1081{
1082	int i;
1083	struct pmcpl_ct_node_hash *pch;
1084
1085	if (!args.pa_ctdumpinstr)
1086		return;
1087
1088	for (i = 0; i < PMCSTAT_NHASH; i++)
1089		STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next)
1090			if (pch->pch_ctnode->pct_type == PMCPL_PCT_ADDR)
1091				_pmcpl_ct_expand_inline(pch->pch_ctnode);
1092}
1093
1094/*
1095 * Clean the PMC name for Kcachegrind formula
1096 */
1097
1098static void
1099pmcpl_ct_fixup_pmcname(char *s)
1100{
1101	char *p;
1102
1103	for (p = s; *p; p++)
1104		if (!isalnum(*p))
1105			*p = '_';
1106}
1107
1108/*
1109 * Print a calltree (KCachegrind) for all PMCs.
1110 */
1111
1112static void
1113pmcpl_ct_print(void)
1114{
1115	int i;
1116	char name[40];
1117	struct pmcpl_ct_sample rsamples;
1118
1119	pmcpl_ct_samples_root(&rsamples);
1120	pmcpl_ct_expand_inline();
1121
1122	fprintf(args.pa_graphfile,
1123		"version: 1\n"
1124		"creator: pmcstat\n"
1125		"positions: instr line\n"
1126		"events:");
1127	for (i=0; i<pmcstat_npmcs; i++) {
1128		snprintf(name, sizeof(name), "%s_%d",
1129		    pmcstat_pmcindex_to_name(i), i);
1130		pmcpl_ct_fixup_pmcname(name);
1131		fprintf(args.pa_graphfile, " %s", name);
1132	}
1133	fprintf(args.pa_graphfile, "\nsummary:");
1134	for (i=0; i<pmcstat_npmcs ; i++)
1135		fprintf(args.pa_graphfile, " %u",
1136		    PMCPL_CT_SAMPLE(i, &rsamples));
1137	fprintf(args.pa_graphfile, "\n");
1138	pmcpl_ct_bfs(pmcpl_ct_root);
1139	pmcpl_ct_samples_free(&rsamples);
1140}
1141
1142int
1143pmcpl_ct_configure(char *opt)
1144{
1145
1146	if (strncmp(opt, "skiplink=", 9) == 0) {
1147		pmcstat_skiplink = atoi(opt+9);
1148	} else
1149		return (0);
1150
1151	return (1);
1152}
1153
1154int
1155pmcpl_ct_init(void)
1156{
1157	int i;
1158
1159	pmcpl_ct_root = pmcpl_ct_node_allocate();
1160
1161	for (i = 0; i < PMCSTAT_NHASH; i++)
1162		STAILQ_INIT(&pmcpl_ct_node_hash[i]);
1163
1164	pmcpl_ct_samples_init(&pmcpl_ct_callid);
1165
1166	return (0);
1167}
1168
1169void
1170pmcpl_ct_shutdown(FILE *mf)
1171{
1172	int i;
1173	struct pmcpl_ct_node_hash *pch, *pchtmp;
1174
1175	(void) mf;
1176
1177	if (args.pa_flags & FLAG_DO_CALLGRAPHS)
1178		pmcpl_ct_print();
1179
1180	/*
1181	 * Free memory.
1182	 */
1183
1184	for (i = 0; i < PMCSTAT_NHASH; i++) {
1185		STAILQ_FOREACH_SAFE(pch, &pmcpl_ct_node_hash[i], pch_next,
1186		    pchtmp) {
1187			pmcpl_ct_node_free(pch->pch_ctnode);
1188			free(pch);
1189		}
1190	}
1191
1192	pmcpl_ct_node_free(pmcpl_ct_root);
1193	pmcpl_ct_root = NULL;
1194
1195	pmcpl_ct_samples_free(&pmcpl_ct_callid);
1196}
1197
1198