pmcpl_calltree.c revision 203790
1/*-
2 * Copyright (c) 2009, Fabien Thomas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27/*
28 * Process hwpmc(4) samples as calltree.
29 *
30 * Output file format compatible with Kcachegrind (kdesdk).
31 * Handle top mode with a sorted tree display.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/usr.sbin/pmcstat/pmcpl_calltree.c 203790 2010-02-11 22:51:44Z fabient $");
36
37#include <sys/param.h>
38#include <sys/endian.h>
39#include <sys/queue.h>
40
41#include <assert.h>
42#include <curses.h>
43#include <ctype.h>
44#include <err.h>
45#include <errno.h>
46#include <fcntl.h>
47#include <pmc.h>
48#include <pmclog.h>
49#include <sysexits.h>
50#include <stdint.h>
51#include <stdio.h>
52#include <stdlib.h>
53#include <string.h>
54#include <unistd.h>
55#include <sysexits.h>
56
57#include "pmcstat.h"
58#include "pmcstat_log.h"
59#include "pmcstat_top.h"
60#include "pmcpl_calltree.h"
61
62#define PMCPL_CT_GROWSIZE	4
63
64static pmcstat_interned_string pmcpl_ct_prevfn;
65
66static int pmcstat_skiplink = 0;
67
68struct pmcpl_ct_node;
69
70/* Get the sample value for PMC a. */
71#define PMCPL_CT_SAMPLE(a, b) \
72	((a) < (b)->npmcs ? (b)->sb[a] : 0)
73
74/* Get the sample value in percent related to rsamples. */
75#define PMCPL_CT_SAMPLEP(a, b) \
76	(PMCPL_CT_SAMPLE(a, b) * 100.0 / rsamples->sb[a])
77
78struct pmcpl_ct_sample {
79	int		npmcs;		/* Max pmc index available. */
80	unsigned	*sb;		/* Sample buffer for 0..npmcs. */
81};
82
83struct pmcpl_ct_arc {
84	struct pmcpl_ct_sample	pcta_samples;
85	struct pmcpl_ct_sample	pcta_callid;
86	unsigned		pcta_call;
87	struct pmcpl_ct_node	*pcta_child;
88};
89
90struct pmcpl_ct_instr {
91	uintfptr_t		pctf_func;
92	struct pmcpl_ct_sample	pctf_samples;
93};
94
95/*
96 * Each calltree node is tracked by a pmcpl_ct_node struct.
97 */
98struct pmcpl_ct_node {
99#define PMCPL_PCT_TAG	0x00000001	/* Loop detection. */
100	uint32_t		pct_flags;
101	struct pmcstat_image	*pct_image;
102	uintfptr_t		pct_func;
103	struct pmcpl_ct_sample	pct_samples;
104
105	int			pct_narc;
106	int			pct_arc_c;
107	struct pmcpl_ct_arc 	*pct_arc;
108
109	/* TODO: optimize for large number of items. */
110	int			pct_ninstr;
111	int			pct_instr_c;
112	struct pmcpl_ct_instr	*pct_instr;
113};
114
115struct pmcpl_ct_node_hash {
116	struct pmcpl_ct_node  *pch_ctnode;
117	LIST_ENTRY(pmcpl_ct_node_hash) pch_next;
118};
119
120struct pmcpl_ct_sample pmcpl_ct_callid;
121
122#define PMCPL_CT_MAXCOL		PMC_CALLCHAIN_DEPTH_MAX
123#define PMCPL_CT_MAXLINE	256
124struct pmcpl_ct_node  *pmcpl_ct_topscreen[PMCPL_CT_MAXCOL][PMCPL_CT_MAXLINE];
125
126/*
127 * All nodes indexed by function/image name are placed in a hash table.
128 */
129static LIST_HEAD(,pmcpl_ct_node_hash) pmcpl_ct_node_hash[PMCSTAT_NHASH];
130
131/*
132 * Root node for the graph.
133 */
134static struct pmcpl_ct_node *pmcpl_ct_root;
135
136/*
137 * Prototypes
138 */
139
140/*
141 * Initialize a samples.
142 */
143
144static void
145pmcpl_ct_samples_init(struct pmcpl_ct_sample *samples)
146{
147
148	samples->npmcs = 0;
149	samples->sb = NULL;
150}
151
152/*
153 * Free a samples.
154 */
155
156static void
157pmcpl_ct_samples_free(struct pmcpl_ct_sample *samples)
158{
159
160	samples->npmcs = 0;
161	free(samples->sb);
162	samples->sb = NULL;
163}
164
165/*
166 * Grow a sample block to store pmcstat_npmcs PMCs.
167 */
168
169static void
170pmcpl_ct_samples_grow(struct pmcpl_ct_sample *samples)
171{
172	int npmcs;
173
174	/* Enough storage. */
175	if (pmcstat_npmcs <= samples->npmcs)
176                return;
177
178	npmcs = samples->npmcs +
179	    max(pmcstat_npmcs - samples->npmcs, PMCPL_CT_GROWSIZE);
180	samples->sb = realloc(samples->sb, npmcs * sizeof(unsigned));
181	if (samples->sb == NULL)
182		errx(EX_SOFTWARE, "ERROR: out of memory");
183	bzero((char *)samples->sb + samples->npmcs * sizeof(unsigned),
184	    (npmcs - samples->npmcs) * sizeof(unsigned));
185	samples->npmcs = npmcs;
186}
187
188/*
189 * Compute the sum of all root arcs.
190 */
191
192static void
193pmcpl_ct_samples_root(struct pmcpl_ct_sample *samples)
194{
195	int i, pmcin;
196
197	pmcpl_ct_samples_init(samples);
198	pmcpl_ct_samples_grow(samples);
199
200	for (i = 0; i < pmcpl_ct_root->pct_narc; i++)
201		for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++)
202			samples->sb[pmcin] += PMCPL_CT_SAMPLE(pmcin,
203			    &pmcpl_ct_root->pct_arc[i].pcta_samples);
204}
205
206/*
207 * Grow the arc table.
208 */
209
210static void
211pmcpl_ct_arc_grow(int cursize, int *maxsize, struct pmcpl_ct_arc **items)
212{
213	int nmaxsize;
214
215	if (cursize < *maxsize)
216		return;
217
218	nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE);
219	*items = realloc(*items, nmaxsize * sizeof(struct pmcpl_ct_arc));
220	if (*items == NULL)
221		errx(EX_SOFTWARE, "ERROR: out of memory");
222	bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_arc),
223	    (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_arc));
224	*maxsize = nmaxsize;
225}
226
227/*
228 * Compare two arc by samples value.
229 */
230static int
231pmcpl_ct_arc_compare(void *thunk, const void *a, const void *b)
232{
233	const struct pmcpl_ct_arc *ct1, *ct2;
234	int pmcin = *(int *)thunk;
235
236	ct1 = (const struct pmcpl_ct_arc *) a;
237	ct2 = (const struct pmcpl_ct_arc *) b;
238
239	/* Sort in reverse order */
240	if (PMCPL_CT_SAMPLE(pmcin, &ct1->pcta_samples) <
241	    PMCPL_CT_SAMPLE(pmcin, &ct2->pcta_samples))
242		return (1);
243	if (PMCPL_CT_SAMPLE(pmcin, &ct1->pcta_samples) >
244	    PMCPL_CT_SAMPLE(pmcin, &ct2->pcta_samples))
245		return (-1);
246	return (0);
247}
248
249/*
250 * Grow the instr table.
251 */
252
253static void
254pmcpl_ct_instr_grow(int cursize, int *maxsize, struct pmcpl_ct_instr **items)
255{
256	int nmaxsize;
257
258	if (cursize < *maxsize)
259		return;
260
261	nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE);
262	*items = realloc(*items, nmaxsize * sizeof(struct pmcpl_ct_instr));
263	if (*items == NULL)
264		errx(EX_SOFTWARE, "ERROR: out of memory");
265	bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_instr),
266	    (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_instr));
267	*maxsize = nmaxsize;
268}
269
270/*
271 * Add a new instruction sample to given node.
272 */
273
274static void
275pmcpl_ct_instr_add(struct pmcpl_ct_node *ct, int pmcin, uintfptr_t pc)
276{
277	int i;
278	struct pmcpl_ct_instr *in;
279
280	for (i = 0; i<ct->pct_ninstr; i++) {
281		if (ct->pct_instr[i].pctf_func == pc) {
282			in = &ct->pct_instr[i];
283			pmcpl_ct_samples_grow(&in->pctf_samples);
284			in->pctf_samples.sb[pmcin]++;
285			return;
286		}
287	}
288
289	pmcpl_ct_instr_grow(ct->pct_ninstr, &ct->pct_instr_c, &ct->pct_instr);
290	in = &ct->pct_instr[ct->pct_ninstr];
291	in->pctf_func = pc;
292	pmcpl_ct_samples_init(&in->pctf_samples);
293	pmcpl_ct_samples_grow(&in->pctf_samples);
294	in->pctf_samples.sb[pmcin] = 1;
295	ct->pct_ninstr++;
296}
297
298/*
299 * Allocate a new node.
300 */
301
302static struct pmcpl_ct_node *
303pmcpl_ct_node_allocate(struct pmcstat_image *image, uintfptr_t pc)
304{
305	struct pmcpl_ct_node *ct;
306
307	if ((ct = malloc(sizeof(*ct))) == NULL)
308		err(EX_OSERR, "ERROR: Cannot allocate callgraph node");
309
310	ct->pct_flags	= 0;
311	ct->pct_image 	= image;
312	ct->pct_func	= pc;
313
314	pmcpl_ct_samples_init(&ct->pct_samples);
315
316	ct->pct_narc	= 0;
317	ct->pct_arc_c	= 0;
318	ct->pct_arc	= NULL;
319
320	ct->pct_ninstr	= 0;
321	ct->pct_instr_c	= 0;
322	ct->pct_instr	= NULL;
323
324	return (ct);
325}
326
327/*
328 * Free a node.
329 */
330
331static void
332pmcpl_ct_node_free(struct pmcpl_ct_node *ct)
333{
334	int i;
335
336	for (i = 0; i < ct->pct_narc; i++) {
337		pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_samples);
338		pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_callid);
339	}
340
341	pmcpl_ct_samples_free(&ct->pct_samples);
342	free(ct->pct_arc);
343	free(ct->pct_instr);
344	free(ct);
345}
346
347/*
348 * Clear the graph tag on each node.
349 */
350static void
351pmcpl_ct_node_cleartag(void)
352{
353	int i;
354	struct pmcpl_ct_node_hash *pch;
355
356	for (i = 0; i < PMCSTAT_NHASH; i++)
357		LIST_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next)
358			pch->pch_ctnode->pct_flags &= ~PMCPL_PCT_TAG;
359
360	pmcpl_ct_root->pct_flags &= ~PMCPL_PCT_TAG;
361}
362
363/*
364 * Print the callchain line by line with maximum cost at top.
365 */
366
367static int
368pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct,
369    struct pmcpl_ct_sample *rsamples, int x, int *y)
370{
371	int i;
372
373	if (ct->pct_flags & PMCPL_PCT_TAG)
374		return 0;
375
376	ct->pct_flags |= PMCPL_PCT_TAG;
377
378	if (x >= PMCPL_CT_MAXCOL) {
379		pmcpl_ct_topscreen[x][*y] = NULL;
380		return 1;
381	}
382	pmcpl_ct_topscreen[x][*y] = ct;
383
384	/*
385	 * This is a terminal node
386	 */
387	if (ct->pct_narc == 0) {
388		pmcpl_ct_topscreen[x+1][*y] = NULL;
389		if (*y >= PMCPL_CT_MAXLINE ||
390		    *y >= pmcstat_displaywidth)
391			return 1;
392		*y = *y + 1;
393		for (i=0; i < x; i++)
394			pmcpl_ct_topscreen[i][*y] =
395			    pmcpl_ct_topscreen[i][*y - 1];
396		return 0;
397	}
398
399	/*
400	 * Quicksort the arcs.
401	 */
402	qsort_r(ct->pct_arc, ct->pct_narc, sizeof(struct pmcpl_ct_arc),
403	    &pmcin, pmcpl_ct_arc_compare);
404
405	for (i = 0; i < ct->pct_narc; i++) {
406		if (PMCPL_CT_SAMPLEP(pmcin,
407		    &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) {
408			if (pmcpl_ct_node_dumptop(pmcin,
409			        ct->pct_arc[i].pcta_child,
410			        rsamples, x+1, y))
411				return 1;
412		}
413	}
414
415	return 0;
416}
417
418/*
419 * Format and display given PMC index.
420 */
421
422static void
423pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy)
424{
425	int v_attrs, ns_len, vs_len, is_len, width, indentwidth, x, y;
426	float v;
427	char ns[30], vs[10], is[20];
428	struct pmcpl_ct_node *ct;
429	struct pmcstat_symbol *sym;
430	const char *space = " ";
431
432	for (y = 0; y < maxy; y++) {
433		/* Output image. */
434		ct = pmcpl_ct_topscreen[0][y];
435		snprintf(is, sizeof(is), "%-10.10s",
436		    pmcstat_string_unintern(ct->pct_image->pi_name));
437		PMCSTAT_PRINTW("%s ", is);
438		width = indentwidth = 11;
439
440		for (x = 0; pmcpl_ct_topscreen[x][y] !=NULL; x++) {
441
442			ct = pmcpl_ct_topscreen[x][y];
443
444			ns[0] = '\0'; ns_len = 0;
445			vs[0] = '\0'; vs_len = 0;
446			is[0] = '\0'; is_len = 0;
447
448			/* Format value. */
449			v = PMCPL_CT_SAMPLEP(pmcin, &ct->pct_samples);
450			if (v > pmcstat_threshold)
451				vs_len  = snprintf(vs, sizeof(vs), "(%.1f%%)", v);
452			v_attrs = PMCSTAT_ATTRPERCENT(v);
453
454			if (pmcstat_skiplink && v <= pmcstat_threshold) {
455				PMCSTAT_PRINTW(". ");
456				width += 2;
457				continue;
458			}
459			sym = pmcstat_symbol_search(ct->pct_image, ct->pct_func);
460			if (sym != NULL) {
461				ns_len = snprintf(ns, sizeof(ns), "%s",
462				    pmcstat_string_unintern(sym->ps_name));
463			} else
464				ns_len = snprintf(ns, sizeof(ns), "%p",
465				    (void *)ct->pct_func);
466
467			/* Format image. */
468			if (x > 0 && pmcpl_ct_topscreen[x-1][y]->pct_image != ct->pct_image)
469				is_len = snprintf(is, sizeof(is), "@%s",
470				    pmcstat_string_unintern(ct->pct_image->pi_name));
471
472			/* Check for line wrap. */
473			width += ns_len + is_len + vs_len + 1;
474			if (width >= pmcstat_displaywidth) {
475				PMCSTAT_PRINTW("\n%*s", indentwidth, space);
476				width = indentwidth + ns_len + is_len + vs_len;
477			}
478
479			PMCSTAT_ATTRON(v_attrs);
480			PMCSTAT_PRINTW("%s%s%s ", ns, is, vs);
481			PMCSTAT_ATTROFF(v_attrs);
482		}
483		PMCSTAT_PRINTW("\n");
484	}
485}
486
487/*
488 * Output top mode snapshot.
489 */
490
491void
492pmcpl_ct_topdisplay(void)
493{
494	int i, x, y, pmcin;
495	struct pmcpl_ct_sample rsamples;
496
497	pmcpl_ct_samples_root(&rsamples);
498
499	PMCSTAT_PRINTW("%-10.10s %s\n", "IMAGE", "CALLTREE");
500
501	for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++) {
502		/* Filter PMCs. */
503		if (pmcstat_pmcinfilter != pmcin)
504			continue;
505
506		pmcpl_ct_node_cleartag();
507
508		/* Quicksort the arcs. */
509		qsort_r(pmcpl_ct_root->pct_arc,
510		    pmcpl_ct_root->pct_narc,
511		    sizeof(struct pmcpl_ct_arc),
512		    &pmcin, pmcpl_ct_arc_compare);
513
514		x = y = 0;
515		for (i = 0; i < pmcpl_ct_root->pct_narc; i++) {
516			if (pmcpl_ct_node_dumptop(pmcin,
517			        pmcpl_ct_root->pct_arc[i].pcta_child,
518			        &rsamples, x, &y)) {
519				break;
520			}
521		}
522
523		pmcpl_ct_node_printtop(&rsamples, pmcin, y);
524	}
525	pmcpl_ct_samples_free(&rsamples);
526}
527
528/*
529 * Handle top mode keypress.
530 */
531
532int
533pmcpl_ct_topkeypress(int c, WINDOW *w)
534{
535
536	switch (c) {
537	case 'f':
538		pmcstat_skiplink = !pmcstat_skiplink;
539		wprintw(w, "skip empty link %s", pmcstat_skiplink ? "on" : "off");
540		break;
541	}
542
543	return 0;
544}
545
546/*
547 * Look for a callgraph node associated with pmc `pmcid' in the global
548 * hash table that corresponds to the given `pc' value in the process map
549 * `ppm'.
550 */
551
552static struct pmcpl_ct_node *
553pmcpl_ct_node_hash_lookup_pc(struct pmcpl_ct_node *parent,
554    struct pmcstat_pcmap *ppm, uintfptr_t pc, int pmcin)
555{
556	struct pmcstat_symbol *sym;
557	struct pmcstat_image *image;
558	struct pmcpl_ct_node *ct;
559	struct pmcpl_ct_node_hash *h;
560	struct pmcpl_ct_arc *arc;
561	uintfptr_t loadaddress;
562	int i;
563	unsigned int hash;
564
565	assert(parent != NULL);
566
567	image = ppm->ppm_image;
568
569	loadaddress = ppm->ppm_lowpc + image->pi_vaddr - image->pi_start;
570	pc -= loadaddress;	/* Convert to an offset in the image. */
571
572	/*
573	 * Try determine the function at this offset.  If we can't
574	 * find a function round leave the `pc' value alone.
575	 */
576	if ((sym = pmcstat_symbol_search(image, pc)) != NULL)
577		pc = sym->ps_start;
578
579	for (hash = i = 0; i < (int)sizeof(uintfptr_t); i++)
580		hash += (pc >> i) & 0xFF;
581
582	hash &= PMCSTAT_HASH_MASK;
583
584	ct = NULL;
585	LIST_FOREACH(h, &pmcpl_ct_node_hash[hash], pch_next) {
586		ct = h->pch_ctnode;
587
588		assert(ct != NULL);
589
590		if (ct->pct_image == image && ct->pct_func == pc) {
591			/*
592			 * Find related arc in parent node and
593			 * increment the sample count.
594			 */
595			for (i = 0; i < parent->pct_narc; i++) {
596				if (parent->pct_arc[i].pcta_child == ct) {
597					arc = &parent->pct_arc[i];
598					pmcpl_ct_samples_grow(&arc->pcta_samples);
599					arc->pcta_samples.sb[pmcin]++;
600					/* Estimate call count. */
601					pmcpl_ct_samples_grow(&arc->pcta_callid);
602					if (pmcpl_ct_callid.sb[pmcin] -
603					    arc->pcta_callid.sb[pmcin] > 1)
604						arc->pcta_call++;
605					arc->pcta_callid.sb[pmcin] =
606					    pmcpl_ct_callid.sb[pmcin];
607					return (ct);
608				}
609			}
610
611			/*
612			 * No arc found for us, add ourself to the parent.
613			 */
614			pmcpl_ct_arc_grow(parent->pct_narc,
615			    &parent->pct_arc_c, &parent->pct_arc);
616			arc = &parent->pct_arc[parent->pct_narc];
617			pmcpl_ct_samples_grow(&arc->pcta_samples);
618			arc->pcta_samples.sb[pmcin] = 1;
619			arc->pcta_call = 1;
620			pmcpl_ct_samples_grow(&arc->pcta_callid);
621			arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin];
622			arc->pcta_child = ct;
623			parent->pct_narc++;
624			return (ct);
625		}
626	}
627
628	/*
629	 * We haven't seen this (pmcid, pc) tuple yet, so allocate a
630	 * new callgraph node and a new hash table entry for it.
631	 */
632	ct = pmcpl_ct_node_allocate(image, pc);
633	if ((h = malloc(sizeof(*h))) == NULL)
634		err(EX_OSERR, "ERROR: Could not allocate callgraph node");
635
636	h->pch_ctnode = ct;
637	LIST_INSERT_HEAD(&pmcpl_ct_node_hash[hash], h, pch_next);
638
639	pmcpl_ct_arc_grow(parent->pct_narc,
640	    &parent->pct_arc_c, &parent->pct_arc);
641	arc = &parent->pct_arc[parent->pct_narc];
642	pmcpl_ct_samples_grow(&arc->pcta_samples);
643	arc->pcta_samples.sb[pmcin] = 1;
644	arc->pcta_call = 1;
645	pmcpl_ct_samples_grow(&arc->pcta_callid);
646	arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin];
647	arc->pcta_child = ct;
648	parent->pct_narc++;
649	return (ct);
650}
651
652/*
653 * Record a callchain.
654 */
655
656void
657pmcpl_ct_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr,
658    uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu)
659{
660	int n, pmcin;
661	struct pmcstat_pcmap *ppm[PMC_CALLCHAIN_DEPTH_MAX];
662	struct pmcstat_process *km;
663	struct pmcpl_ct_node *parent, *child;
664
665	(void) cpu;
666
667	assert(nsamples>0 && nsamples<=PMC_CALLCHAIN_DEPTH_MAX);
668
669	/* Get the PMC index. */
670	pmcin = pmcr->pr_pmcin;
671
672	/*
673	 * Validate mapping for the callchain.
674	 * Go from bottom to first invalid entry.
675	 */
676	km = pmcstat_kernproc;
677	for (n = 0; n < (int)nsamples; n++) {
678		ppm[n] = pmcstat_process_find_map(usermode ?
679		    pp : km, cc[n]);
680		if (ppm[n] == NULL) {
681			/* Detect full frame capture (kernel + user). */
682			if (!usermode) {
683				ppm[n] = pmcstat_process_find_map(pp, cc[n]);
684				if (ppm[n] != NULL)
685					km = pp;
686			}
687		}
688		if (ppm[n] == NULL)
689			break;
690	}
691	if (n-- == 0) {
692		pmcstat_stats.ps_callchain_dubious_frames++;
693		return;
694	}
695
696	/* Increase the call generation counter. */
697	pmcpl_ct_samples_grow(&pmcpl_ct_callid);
698	pmcpl_ct_callid.sb[pmcin]++;
699
700	/*
701	 * Iterate remaining addresses.
702	 */
703	for (parent = pmcpl_ct_root, child = NULL; n >= 0; n--) {
704		child = pmcpl_ct_node_hash_lookup_pc(parent, ppm[n], cc[n],
705		    pmcin);
706		if (child == NULL) {
707			pmcstat_stats.ps_callchain_dubious_frames++;
708			continue;
709		}
710		parent = child;
711	}
712
713	/*
714	 * Increment the sample count for this PMC.
715	 */
716	if (child != NULL) {
717		pmcpl_ct_samples_grow(&child->pct_samples);
718		child->pct_samples.sb[pmcin]++;
719
720		/* Update per instruction sample if required. */
721		if (args.pa_ctdumpinstr)
722			pmcpl_ct_instr_add(child, pmcin, cc[0] -
723			    (ppm[0]->ppm_lowpc + ppm[0]->ppm_image->pi_vaddr -
724			     ppm[0]->ppm_image->pi_start));
725	}
726}
727
728/*
729 * Print node self cost.
730 */
731
732static void
733pmcpl_ct_node_printself(struct pmcpl_ct_node *ct)
734{
735	int i, j, line;
736	uintptr_t addr;
737	struct pmcstat_symbol *sym;
738	char sourcefile[PATH_MAX];
739	char funcname[PATH_MAX];
740
741	/*
742	 * Object binary.
743	 */
744#ifdef PMCPL_CT_OPTIMIZEFN
745	if (pmcpl_ct_prevfn != ct->pct_image->pi_fullpath) {
746#endif
747		pmcpl_ct_prevfn = ct->pct_image->pi_fullpath;
748		fprintf(args.pa_graphfile, "ob=%s\n",
749		    pmcstat_string_unintern(pmcpl_ct_prevfn));
750#ifdef PMCPL_CT_OPTIMIZEFN
751	}
752#endif
753
754	/*
755	 * Function name.
756	 */
757	if (pmcstat_image_addr2line(ct->pct_image, ct->pct_func,
758	    sourcefile, sizeof(sourcefile), &line,
759	    funcname, sizeof(funcname))) {
760		fprintf(args.pa_graphfile, "fn=%s\n",
761		    funcname);
762	} else {
763		sym = pmcstat_symbol_search(ct->pct_image, ct->pct_func);
764		if (sym != NULL)
765			fprintf(args.pa_graphfile, "fn=%s\n",
766			    pmcstat_string_unintern(sym->ps_name));
767		else
768			fprintf(args.pa_graphfile, "fn=%p\n",
769			    (void *)(ct->pct_image->pi_vaddr + ct->pct_func));
770	}
771
772	/*
773	 * Self cost.
774	 */
775	if (ct->pct_ninstr > 0) {
776		for (i = 0; i < ct->pct_ninstr; i++) {
777			addr = ct->pct_image->pi_vaddr +
778			    ct->pct_instr[i].pctf_func;
779			line = 0;
780			if (pmcstat_image_addr2line(ct->pct_image, addr,
781			    sourcefile, sizeof(sourcefile), &line,
782			    funcname, sizeof(funcname)))
783				fprintf(args.pa_graphfile, "fl=%s\n", sourcefile);
784			fprintf(args.pa_graphfile, "%p %u", (void *)addr, line);
785			for (j = 0; j<pmcstat_npmcs; j++)
786				fprintf(args.pa_graphfile, " %u",
787				    PMCPL_CT_SAMPLE(j,
788				    &ct->pct_instr[i].pctf_samples));
789			fprintf(args.pa_graphfile, "\n");
790		}
791	} else {
792		addr = ct->pct_image->pi_vaddr + ct->pct_func;
793		line = 0;
794		if (pmcstat_image_addr2line(ct->pct_image, addr,
795		    sourcefile, sizeof(sourcefile), &line,
796		    funcname, sizeof(funcname)))
797			fprintf(args.pa_graphfile, "fl=%s\n", sourcefile);
798		fprintf(args.pa_graphfile, "* *");
799		for (i = 0; i<pmcstat_npmcs ; i++)
800			fprintf(args.pa_graphfile, " %u",
801			    PMCPL_CT_SAMPLE(i, &ct->pct_samples));
802		fprintf(args.pa_graphfile, "\n");
803	}
804}
805
806/*
807 * Print node child cost.
808 */
809
810static void
811pmcpl_ct_node_printchild(struct pmcpl_ct_node *ct)
812{
813	int i, j, line;
814	uintptr_t addr;
815	struct pmcstat_symbol *sym;
816	struct pmcpl_ct_node *child;
817	char sourcefile[PATH_MAX];
818	char funcname[PATH_MAX];
819
820	/*
821	 * Child cost.
822	 * TODO: attach child cost to the real position in the funtion.
823	 * TODO: cfn=<fn> / call <ncall> addr(<fn>) / addr(call <fn>) <arccost>
824	 */
825	for (i=0 ; i<ct->pct_narc; i++) {
826		child = ct->pct_arc[i].pcta_child;
827
828		/* Object binary. */
829#ifdef PMCPL_CT_OPTIMIZEFN
830		if (pmcpl_ct_prevfn != child->pct_image->pi_fullpath) {
831#endif
832			pmcpl_ct_prevfn = child->pct_image->pi_fullpath;
833			fprintf(args.pa_graphfile, "cob=%s\n",
834			    pmcstat_string_unintern(pmcpl_ct_prevfn));
835#if PMCPL_CT_OPTIMIZEFN
836		}
837#endif
838		/* Child function name. */
839		addr = child->pct_image->pi_vaddr + child->pct_func;
840		/* Child function source file. */
841		if (pmcstat_image_addr2line(child->pct_image, addr,
842		    sourcefile, sizeof(sourcefile), &line,
843		    funcname, sizeof(funcname))) {
844			fprintf(args.pa_graphfile, "cfn=%s\n", funcname);
845			fprintf(args.pa_graphfile, "cfl=%s\n", sourcefile);
846		} else {
847			sym = pmcstat_symbol_search(child->pct_image,
848			    child->pct_func);
849			if (sym != NULL)
850				fprintf(args.pa_graphfile, "cfn=%s\n",
851				    pmcstat_string_unintern(sym->ps_name));
852			else
853				fprintf(args.pa_graphfile, "cfn=%p\n", (void *)addr);
854		}
855
856		/* Child function address, line and call count. */
857		fprintf(args.pa_graphfile, "calls=%u %p %u\n",
858		    ct->pct_arc[i].pcta_call, (void *)addr, line);
859
860		if (ct->pct_image != NULL) {
861			/* Call address, line, sample. */
862			addr = ct->pct_image->pi_vaddr + ct->pct_func;
863			line = 0;
864			pmcstat_image_addr2line(ct->pct_image, addr, sourcefile,
865			    sizeof(sourcefile), &line,
866			    funcname, sizeof(funcname));
867			fprintf(args.pa_graphfile, "%p %u", (void *)addr, line);
868		}
869		else
870			fprintf(args.pa_graphfile, "* *");
871		for (j = 0; j<pmcstat_npmcs; j++)
872			fprintf(args.pa_graphfile, " %u",
873			    PMCPL_CT_SAMPLE(j, &ct->pct_arc[i].pcta_samples));
874		fprintf(args.pa_graphfile, "\n");
875	}
876}
877
878/*
879 * Clean the PMC name for Kcachegrind formula
880 */
881
882static void
883pmcpl_ct_fixup_pmcname(char *s)
884{
885	char *p;
886
887	for (p = s; *p; p++)
888		if (!isalnum(*p))
889			*p = '_';
890}
891
892/*
893 * Print a calltree (KCachegrind) for all PMCs.
894 */
895
896static void
897pmcpl_ct_print(void)
898{
899	int n, i;
900	struct pmcpl_ct_node_hash *pch;
901	struct pmcpl_ct_sample rsamples;
902	char name[40];
903
904	pmcpl_ct_samples_root(&rsamples);
905	pmcpl_ct_prevfn = NULL;
906
907	fprintf(args.pa_graphfile,
908		"version: 1\n"
909		"creator: pmcstat\n"
910		"positions: instr line\n"
911		"events:");
912	for (i=0; i<pmcstat_npmcs; i++) {
913		snprintf(name, sizeof(name), "%s_%d",
914		    pmcstat_pmcindex_to_name(i), i);
915		pmcpl_ct_fixup_pmcname(name);
916		fprintf(args.pa_graphfile, " %s", name);
917	}
918	fprintf(args.pa_graphfile, "\nsummary:");
919	for (i=0; i<pmcstat_npmcs ; i++)
920		fprintf(args.pa_graphfile, " %u",
921		    PMCPL_CT_SAMPLE(i, &rsamples));
922	fprintf(args.pa_graphfile, "\n\n");
923
924	/*
925	 * Fake root node
926	 */
927	fprintf(args.pa_graphfile, "ob=FreeBSD\n");
928	fprintf(args.pa_graphfile, "fn=ROOT\n");
929	fprintf(args.pa_graphfile, "* *");
930	for (i = 0; i<pmcstat_npmcs ; i++)
931		fprintf(args.pa_graphfile, " 0");
932	fprintf(args.pa_graphfile, "\n");
933	pmcpl_ct_node_printchild(pmcpl_ct_root);
934
935	for (n = 0; n < PMCSTAT_NHASH; n++)
936		LIST_FOREACH(pch, &pmcpl_ct_node_hash[n], pch_next) {
937			pmcpl_ct_node_printself(pch->pch_ctnode);
938			pmcpl_ct_node_printchild(pch->pch_ctnode);
939	}
940
941	pmcpl_ct_samples_free(&rsamples);
942}
943
944int
945pmcpl_ct_configure(char *opt)
946{
947
948	if (strncmp(opt, "skiplink=", 9) == 0) {
949		pmcstat_skiplink = atoi(opt+9);
950	} else
951		return (0);
952
953	return (1);
954}
955
956int
957pmcpl_ct_init(void)
958{
959	int i;
960
961	pmcpl_ct_prevfn = NULL;
962	pmcpl_ct_root = pmcpl_ct_node_allocate(NULL, 0);
963
964	for (i = 0; i < PMCSTAT_NHASH; i++)
965		LIST_INIT(&pmcpl_ct_node_hash[i]);
966
967	pmcpl_ct_samples_init(&pmcpl_ct_callid);
968
969	return (0);
970}
971
972void
973pmcpl_ct_shutdown(FILE *mf)
974{
975	int i;
976	struct pmcpl_ct_node_hash *pch, *pchtmp;
977
978	(void) mf;
979
980	if (args.pa_flags & FLAG_DO_CALLGRAPHS)
981		pmcpl_ct_print();
982
983	/*
984	 * Free memory.
985	 */
986
987	for (i = 0; i < PMCSTAT_NHASH; i++) {
988		LIST_FOREACH_SAFE(pch, &pmcpl_ct_node_hash[i], pch_next,
989		    pchtmp) {
990			pmcpl_ct_node_free(pch->pch_ctnode);
991			free(pch);
992		}
993	}
994
995	pmcpl_ct_node_free(pmcpl_ct_root);
996	pmcpl_ct_root = NULL;
997
998	pmcpl_ct_samples_free(&pmcpl_ct_callid);
999}
1000
1001