pmcstat_log.c revision 157144
1/*-
2 * Copyright (c) 2005-2006, Joseph Koshy
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27/*
28 * Transform a hwpmc(4) log into human readable form, and into
29 * gprof(1) compatible profiles.
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/usr.sbin/pmcstat/pmcstat_log.c 157144 2006-03-26 12:20:54Z jkoshy $");
34
35#include <sys/param.h>
36#include <sys/endian.h>
37#include <sys/gmon.h>
38#include <sys/imgact_aout.h>
39#include <sys/imgact_elf.h>
40#include <sys/mman.h>
41#include <sys/pmc.h>
42#include <sys/queue.h>
43#include <sys/stat.h>
44#include <sys/wait.h>
45
46#include <netinet/in.h>
47
48#include <assert.h>
49#include <err.h>
50#include <fcntl.h>
51#include <libgen.h>
52#include <limits.h>
53#include <pmc.h>
54#include <pmclog.h>
55#include <sysexits.h>
56#include <stdint.h>
57#include <stdio.h>
58#include <stdlib.h>
59#include <string.h>
60#include <unistd.h>
61
62#include "pmcstat.h"
63
64#define	min(A,B)		((A) < (B) ? (A) : (B))
65#define	max(A,B)		((A) > (B) ? (A) : (B))
66
67/*
68 * PUBLIC INTERFACES
69 *
70 * pmcstat_initialize_logging()	initialize this module, called first
71 * pmcstat_shutdown_logging()		orderly shutdown, called last
72 * pmcstat_open_log()			open an eventlog for processing
73 * pmcstat_process_log()		print/convert an event log
74 * pmcstat_close_log()			finish processing an event log
75 *
76 * IMPLEMENTATION OF GMON OUTPUT
77 *
78 * We correlate each 'sample' seen in the event log back to an
79 * executable object in the system. Executable objects include:
80 * 	- program executables,
81 *	- shared libraries loaded by the runtime loader,
82 *	- dlopen()'ed objects loaded by the program,
83 *	- the runtime loader itself,
84 *	- the kernel and kernel modules.
85 *
86 * Each such executable object gets one 'gmon.out' profile, per PMC in
87 * use.  Creation of 'gmon.out' profiles is done lazily.  The
88 * 'gmon.out' profiles generated for a given sampling PMC are
89 * aggregates of all the samples for that particular executable
90 * object.
91 *
92 * Each process that we know about is treated as a set of regions that
93 * map to executable objects.  Processes are described by
94 * 'pmcstat_process' structures.  Executable objects are tracked by
95 * 'pmcstat_image' structures.  The kernel and kernel modules are
96 * common to all processes (they reside at the same virtual addresses
97 * for all processes).  Individual processes can have their text
98 * segments and shared libraries loaded at process-specific locations.
99 *
100 * A given executable object can be in use by multiple processes
101 * (e.g., libc.so) and loaded at a different address in each.
102 * pmcstat_pcmap structures track per-image mappings.
103 *
104 * The sample log could have samples from multiple PMCs; we
105 * generate one 'gmon.out' profile per PMC.
106 */
107
108typedef const void *pmcstat_interned_string;
109
110/*
111 * 'pmcstat_pmcrecord' is a mapping from PMC ids to human-readable
112 * names.
113 */
114
115struct pmcstat_pmcrecord {
116	LIST_ENTRY(pmcstat_pmcrecord)	pr_next;
117	pmc_id_t			pr_pmcid;
118	pmcstat_interned_string	pr_pmcname;
119};
120
121static LIST_HEAD(,pmcstat_pmcrecord)	pmcstat_pmcs =
122	LIST_HEAD_INITIALIZER(&pmcstat_pmcs);
123
124
125/*
126 * struct pmcstat_gmonfile tracks a given 'gmon.out' file.  These
127 * files are mmap()'ed in as needed.
128 */
129
130struct pmcstat_gmonfile {
131	LIST_ENTRY(pmcstat_gmonfile)	pgf_next; /* list of entries */
132	int		pgf_overflow;	/* whether a count overflowed */
133	pmc_id_t	pgf_pmcid;	/* id of the associated pmc */
134	size_t		pgf_nbuckets;	/* #buckets in this gmon.out */
135	pmcstat_interned_string pgf_name;	/* pathname of gmon.out file */
136	size_t		pgf_ndatabytes;	/* number of bytes mapped */
137	void		*pgf_gmondata;	/* pointer to mmap'ed data */
138};
139
140/*
141 * A 'pmcstat_image' structure describes an executable program on
142 * disk.  'pi_execpath' is a cookie representing the pathname of
143 * the executable.  'pi_start' and 'pi_end' are the least and greatest
144 * virtual addresses for the text segments in the executable.
145 * 'pi_gmonlist' contains a linked list of gmon.out files associated
146 * with this image.
147 */
148
149enum pmcstat_image_type {
150	PMCSTAT_IMAGE_UNKNOWN = 0,	/* never looked at the image */
151	PMCSTAT_IMAGE_INDETERMINABLE,	/* can't tell what the image is */
152	PMCSTAT_IMAGE_ELF32,		/* ELF 32 bit object */
153	PMCSTAT_IMAGE_ELF64,		/* ELF 64 bit object */
154	PMCSTAT_IMAGE_AOUT		/* AOUT object */
155};
156
157struct pmcstat_image {
158	LIST_ENTRY(pmcstat_image) pi_next;	/* hash link */
159	TAILQ_ENTRY(pmcstat_image) pi_lru;	/* LRU list */
160	pmcstat_interned_string	pi_execpath;/* cookie */
161	pmcstat_interned_string pi_samplename;  /* sample path name */
162
163	enum pmcstat_image_type pi_type;	/* executable type */
164
165	/*
166	 * Executables have pi_start and pi_end; these are zero
167	 * for shared libraries.
168	 */
169	uintfptr_t	pi_start;		/* start address (inclusive) */
170	uintfptr_t	pi_end;			/* end address (exclusive) */
171	uintfptr_t	pi_entry;		/* entry address */
172	uintfptr_t	pi_vaddr;		/* virtual address where loaded */
173	int		pi_isdynamic;		/* whether a dynamic
174						 * object */
175	int		pi_iskernelmodule;
176	pmcstat_interned_string pi_dynlinkerpath; /* path in .interp */
177
178	/*
179	 * An image can be associated with one or more gmon.out files;
180	 * one per PMC.
181	 */
182	LIST_HEAD(,pmcstat_gmonfile) pi_gmlist;
183};
184
185/*
186 * All image descriptors are kept in a hash table.
187 */
188static LIST_HEAD(,pmcstat_image)	pmcstat_image_hash[PMCSTAT_NHASH];
189static TAILQ_HEAD(,pmcstat_image)	pmcstat_image_lru =
190	TAILQ_HEAD_INITIALIZER(pmcstat_image_lru);
191
192/*
193 * A 'pmcstat_pcmap' structure maps a virtual address range to an
194 * underlying 'pmcstat_image' descriptor.
195 */
196struct pmcstat_pcmap {
197	TAILQ_ENTRY(pmcstat_pcmap) ppm_next;
198	uintfptr_t	ppm_lowpc;
199	uintfptr_t	ppm_highpc;
200	struct pmcstat_image *ppm_image;
201};
202
203/*
204 * A 'pmcstat_process' structure models processes.  Each process is
205 * associated with a set of pmcstat_pcmap structures that map
206 * addresses inside it to executable objects.  This set is implemented
207 * as a list, kept sorted in ascending order of mapped addresses.
208 *
209 * 'pp_pid' holds the pid of the process.  When a process exits, the
210 * 'pp_isactive' field is set to zero, but the process structure is
211 * not immediately reclaimed because there may still be samples in the
212 * log for this process.
213 */
214
215struct pmcstat_process {
216	LIST_ENTRY(pmcstat_process) pp_next;	/* hash-next */
217	pid_t			pp_pid;		/* associated pid */
218	int			pp_isactive;	/* whether active */
219	uintfptr_t		pp_entryaddr;	/* entry address */
220	TAILQ_HEAD(,pmcstat_pcmap) pp_map;	/* address range map */
221};
222
223#define	PMCSTAT_ALLOCATE		1
224
225/*
226 * All process descriptors are kept in a hash table.
227 */
228static LIST_HEAD(,pmcstat_process) pmcstat_process_hash[PMCSTAT_NHASH];
229
230static struct pmcstat_process *pmcstat_kernproc; /* kernel 'process' */
231
232/* Misc. statistics */
233static struct pmcstat_stats {
234	int ps_exec_aout;	/* # a.out executables seen */
235	int ps_exec_elf;	/* # elf executables seen */
236	int ps_exec_errors;	/* # errors processing executables */
237	int ps_exec_indeterminable; /* # unknown executables seen */
238	int ps_samples_total;	/* total number of samples processed */
239	int ps_samples_unknown_offset;	/* #samples not in any map */
240	int ps_samples_indeterminable;	/* #samples in indeterminable images */
241} pmcstat_stats;
242
243/*
244 * Prototypes
245 */
246
247static void	pmcstat_gmon_create_file(struct pmcstat_gmonfile *_pgf,
248    struct pmcstat_image *_image);
249static pmcstat_interned_string pmcstat_gmon_create_name(const char *_sd,
250    struct pmcstat_image *_img, pmc_id_t _pmcid);
251static void	pmcstat_gmon_map_file(struct pmcstat_gmonfile *_pgf);
252static void	pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *_pgf);
253
254static void pmcstat_image_determine_type(struct pmcstat_image *_image,
255    struct pmcstat_args *_a);
256static struct pmcstat_image *pmcstat_image_from_path(pmcstat_interned_string
257    _path, int _iskernelmodule);
258static void pmcstat_image_get_aout_params(struct pmcstat_image *_image,
259    struct pmcstat_args *_a);
260static void pmcstat_image_get_elf_params(struct pmcstat_image *_image,
261    struct pmcstat_args *_a);
262static void	pmcstat_image_increment_bucket(struct pmcstat_pcmap *_pcm,
263    uintfptr_t _pc, pmc_id_t _pmcid, struct pmcstat_args *_a);
264static void	pmcstat_image_link(struct pmcstat_process *_pp,
265    struct pmcstat_image *_i, uintfptr_t _lpc);
266
267static void	pmcstat_pmcid_add(pmc_id_t _pmcid,
268    pmcstat_interned_string _name, struct pmcstat_args *_a);
269static const char *pmcstat_pmcid_to_name(pmc_id_t _pmcid);
270
271static void	pmcstat_process_aout_exec(struct pmcstat_process *_pp,
272    struct pmcstat_image *_image, uintfptr_t _entryaddr,
273    struct pmcstat_args *_a);
274static void	pmcstat_process_elf_exec(struct pmcstat_process *_pp,
275    struct pmcstat_image *_image, uintfptr_t _entryaddr,
276    struct pmcstat_args *_a);
277static void	pmcstat_process_exec(struct pmcstat_process *_pp,
278    pmcstat_interned_string _path, uintfptr_t _entryaddr,
279    struct pmcstat_args *_ao);
280static struct pmcstat_process *pmcstat_process_lookup(pid_t _pid,
281    int _allocate);
282static struct pmcstat_pcmap *pmcstat_process_find_map(
283    struct pmcstat_process *_p, uintfptr_t _pc);
284
285static int	pmcstat_string_compute_hash(const char *_string);
286static void pmcstat_string_initialize(void);
287static pmcstat_interned_string pmcstat_string_intern(const char *_s);
288static pmcstat_interned_string pmcstat_string_lookup(const char *_s);
289static int	pmcstat_string_lookup_hash(pmcstat_interned_string _is);
290static void pmcstat_string_shutdown(void);
291static const char *pmcstat_string_unintern(pmcstat_interned_string _is);
292
293
294/*
295 * A simple implementation of interned strings.  Each interned string
296 * is assigned a unique address, so that subsequent string compares
297 * can be done by a simple pointer comparision instead of using
298 * strcmp().  This speeds up hash table lookups and saves memory if
299 * duplicate strings are the norm.
300 */
301struct pmcstat_string {
302	LIST_ENTRY(pmcstat_string)	ps_next;	/* hash link */
303	int		ps_len;
304	int		ps_hash;
305	char		*ps_string;
306};
307
308static LIST_HEAD(,pmcstat_string)	pmcstat_string_hash[PMCSTAT_NHASH];
309
310/*
311 * Compute a 'hash' value for a string.
312 */
313
314static int
315pmcstat_string_compute_hash(const char *s)
316{
317	int hash;
318
319	for (hash = 0; *s; s++)
320		hash ^= *s;
321
322	return (hash & PMCSTAT_HASH_MASK);
323}
324
325/*
326 * Intern a copy of string 's', and return a pointer to the
327 * interned structure.
328 */
329
330static pmcstat_interned_string
331pmcstat_string_intern(const char *s)
332{
333	struct pmcstat_string *ps;
334	const struct pmcstat_string *cps;
335	int hash, len;
336
337	if ((cps = pmcstat_string_lookup(s)) != NULL)
338		return (cps);
339
340	hash = pmcstat_string_compute_hash(s);
341	len  = strlen(s);
342
343	if ((ps = malloc(sizeof(*ps))) == NULL)
344		err(EX_OSERR, "ERROR: Could not intern string");
345	ps->ps_len = len;
346	ps->ps_hash = hash;
347	ps->ps_string = strdup(s);
348	LIST_INSERT_HEAD(&pmcstat_string_hash[hash], ps, ps_next);
349	return ((pmcstat_interned_string) ps);
350}
351
352static const char *
353pmcstat_string_unintern(pmcstat_interned_string str)
354{
355	const char *s;
356
357	s = ((const struct pmcstat_string *) str)->ps_string;
358	return (s);
359}
360
361static pmcstat_interned_string
362pmcstat_string_lookup(const char *s)
363{
364	struct pmcstat_string *ps;
365	int hash, len;
366
367	hash = pmcstat_string_compute_hash(s);
368	len = strlen(s);
369
370	LIST_FOREACH(ps, &pmcstat_string_hash[hash], ps_next)
371	    if (ps->ps_len == len && ps->ps_hash == hash &&
372		strcmp(ps->ps_string, s) == 0)
373		    return (ps);
374	return (NULL);
375}
376
377static int
378pmcstat_string_lookup_hash(pmcstat_interned_string s)
379{
380	const struct pmcstat_string *ps;
381
382	ps = (const struct pmcstat_string *) s;
383	return (ps->ps_hash);
384}
385
386/*
387 * Initialize the string interning facility.
388 */
389
390static void
391pmcstat_string_initialize(void)
392{
393	int i;
394
395	for (i = 0; i < PMCSTAT_NHASH; i++)
396		LIST_INIT(&pmcstat_string_hash[i]);
397}
398
399/*
400 * Destroy the string table, free'ing up space.
401 */
402
403static void
404pmcstat_string_shutdown(void)
405{
406	int i;
407	struct pmcstat_string *ps, *pstmp;
408
409	for (i = 0; i < PMCSTAT_NHASH; i++)
410		LIST_FOREACH_SAFE(ps, &pmcstat_string_hash[i], ps_next,
411		    pstmp) {
412			LIST_REMOVE(ps, ps_next);
413			free(ps->ps_string);
414			free(ps);
415		}
416}
417
418/*
419 * Create a gmon.out file and size it.
420 */
421
422static void
423pmcstat_gmon_create_file(struct pmcstat_gmonfile *pgf,
424    struct pmcstat_image *image)
425{
426	int fd;
427	size_t count;
428	struct gmonhdr gm;
429	const char *pathname;
430	char buffer[DEFAULT_BUFFER_SIZE];
431
432	pathname = pmcstat_string_unintern(pgf->pgf_name);
433	if ((fd = open(pathname, O_RDWR|O_NOFOLLOW|O_CREAT,
434		 S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
435		err(EX_OSERR, "ERROR: Cannot open \"%s\"", pathname);
436
437	gm.lpc = image->pi_start;
438	gm.hpc = image->pi_end;
439	gm.ncnt = (pgf->pgf_nbuckets * sizeof(HISTCOUNTER)) +
440	    sizeof(struct gmonhdr);
441	gm.version = GMONVERSION;
442	gm.profrate = 0;		/* use ticks */
443	gm.histcounter_type = 0;	/* compatibility with moncontrol() */
444	gm.spare[0] = gm.spare[1] = 0;
445
446	/* Write out the gmon header */
447	if (write(fd, &gm, sizeof(gm)) < 0)
448		goto error;
449
450	/* Zero fill the samples[] array */
451	(void) memset(buffer, 0, sizeof(buffer));
452
453	count = pgf->pgf_ndatabytes - sizeof(struct gmonhdr);
454	while (count > sizeof(buffer)) {
455		if (write(fd, &buffer, sizeof(buffer)) < 0)
456			goto error;
457		count -= sizeof(buffer);
458	}
459
460	if (write(fd, &buffer, count) < 0)
461		goto error;
462
463	/* TODO size the arc table */
464
465	(void) close(fd);
466
467	return;
468
469 error:
470	err(EX_OSERR, "ERROR: Cannot write \"%s\"", pathname);
471}
472
473/*
474 * Determine the full pathname of a gmon.out file for a given
475 * (image,pmcid) combination.  Return the interned string.
476 */
477
478pmcstat_interned_string
479pmcstat_gmon_create_name(const char *samplesdir, struct pmcstat_image *image,
480    pmc_id_t pmcid)
481{
482	const char *pmcname;
483	char fullpath[PATH_MAX];
484
485	pmcname = pmcstat_pmcid_to_name(pmcid);
486
487	(void) snprintf(fullpath, sizeof(fullpath),
488	    "%s/%s/%s", samplesdir, pmcname,
489	    pmcstat_string_unintern(image->pi_samplename));
490
491	return (pmcstat_string_intern(fullpath));
492}
493
494
495/*
496 * Mmap in a gmon.out file for processing.
497 */
498
499static void
500pmcstat_gmon_map_file(struct pmcstat_gmonfile *pgf)
501{
502	int fd;
503	const char *pathname;
504
505	pathname = pmcstat_string_unintern(pgf->pgf_name);
506
507	/* the gmon.out file must already exist */
508	if ((fd = open(pathname, O_RDWR | O_NOFOLLOW, 0)) < 0)
509		err(EX_OSERR, "ERROR: cannot open \"%s\"", pathname);
510
511	pgf->pgf_gmondata = mmap(NULL, pgf->pgf_ndatabytes,
512	    PROT_READ|PROT_WRITE, MAP_NOSYNC|MAP_SHARED, fd, 0);
513
514	if (pgf->pgf_gmondata == MAP_FAILED)
515		err(EX_OSERR, "ERROR: cannot map \"%s\"", pathname);
516
517	(void) close(fd);
518}
519
520/*
521 * Unmap a gmon.out file after sync'ing its data to disk.
522 */
523
524static void
525pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *pgf)
526{
527	(void) msync(pgf->pgf_gmondata, pgf->pgf_ndatabytes,
528	    MS_SYNC);
529	(void) munmap(pgf->pgf_gmondata, pgf->pgf_ndatabytes);
530	pgf->pgf_gmondata = NULL;
531}
532
533/*
534 * Determine whether a given executable image is an A.OUT object, and
535 * if so, fill in its parameters from the text file.
536 * Sets image->pi_type.
537 */
538
539static void
540pmcstat_image_get_aout_params(struct pmcstat_image *image,
541    struct pmcstat_args *a)
542{
543	int fd;
544	ssize_t nbytes;
545	struct exec ex;
546	const char *path;
547	char buffer[PATH_MAX];
548
549	path = pmcstat_string_unintern(image->pi_execpath);
550	assert(path != NULL);
551
552	if (image->pi_iskernelmodule)
553		errx(EX_SOFTWARE, "ERROR: a.out kernel modules are "
554		    "unsupported \"%s\"", path);
555
556	(void) snprintf(buffer, sizeof(buffer), "%s%s",
557	    a->pa_fsroot, path);
558
559	if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
560	    (nbytes = read(fd, &ex, sizeof(ex))) < 0) {
561		warn("WARNING: Cannot determine type of \"%s\"", path);
562		image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
563		if (fd != -1)
564			(void) close(fd);
565		return;
566	}
567
568	(void) close(fd);
569
570	if ((unsigned) nbytes != sizeof(ex) ||
571	    N_BADMAG(ex))
572		return;
573
574	image->pi_type = PMCSTAT_IMAGE_AOUT;
575
576	/* TODO: the rest of a.out processing */
577
578	return;
579}
580
581/*
582 * Examine an ELF file to determine the size of its text segment.
583 * Sets image->pi_type if anything conclusive can be determined about
584 * this image.
585 */
586
587static void
588pmcstat_image_get_elf_params(struct pmcstat_image *image,
589    struct pmcstat_args *a)
590{
591	int fd, i;
592	const char *path;
593	void *mapbase;
594	uintfptr_t minva, maxva;
595	const Elf_Ehdr *h;
596	const Elf_Phdr *ph;
597	const Elf_Shdr *sh;
598#if	defined(__amd64__)
599	const Elf32_Ehdr *h32;
600	const Elf32_Phdr *ph32;
601	const Elf32_Shdr *sh32;
602#endif
603	enum pmcstat_image_type image_type;
604	struct stat st;
605	char buffer[PATH_MAX];
606
607	assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
608
609	minva = ~(uintfptr_t) 0;
610	maxva = (uintfptr_t) 0;
611	path = pmcstat_string_unintern(image->pi_execpath);
612
613	assert(path != NULL);
614
615	/*
616	 * Look for kernel modules under FSROOT/KERNELPATH/NAME,
617	 * and user mode executable objects under FSROOT/PATHNAME.
618	 */
619	if (image->pi_iskernelmodule)
620		(void) snprintf(buffer, sizeof(buffer), "%s%s/%s",
621		    a->pa_fsroot, a->pa_kernel, path);
622	else
623		(void) snprintf(buffer, sizeof(buffer), "%s%s",
624		    a->pa_fsroot, path);
625
626	if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
627	    fstat(fd, &st) < 0 ||
628	    (mapbase = mmap(0, st.st_size, PROT_READ, MAP_SHARED,
629		fd, 0)) == MAP_FAILED) {
630		warn("WARNING: Cannot determine type of \"%s\"", buffer);
631		image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
632		if (fd != -1)
633			(void) close(fd);
634		return;
635	}
636
637	(void) close(fd);
638
639	/* Punt on non-ELF objects */
640	h = (const Elf_Ehdr *) mapbase;
641	if (!IS_ELF(*h))
642		return;
643
644	/*
645	 * We only handle executable ELF objects and kernel
646	 * modules.
647	 */
648	if (h->e_type != ET_EXEC && h->e_type != ET_DYN &&
649	    !(image->pi_iskernelmodule && h->e_type == ET_REL))
650		return;
651
652	image->pi_isdynamic = 0;
653	image->pi_dynlinkerpath = NULL;
654	image->pi_vaddr = 0;
655
656#define	GET_VA(H, SH, MINVA, MAXVA) do {				\
657		for (i = 0; i < (H)->e_shnum; i++)			\
658			if ((SH)[i].sh_flags & SHF_EXECINSTR) {		\
659				(MINVA) = min((MINVA),(SH)[i].sh_addr);	\
660				(MAXVA) = max((MAXVA),(SH)[i].sh_addr +	\
661				    (SH)[i].sh_size);			\
662			}						\
663	} while (0)
664
665
666#define	GET_PHDR_INFO(H, PH, IMAGE) do {				\
667		for (i = 0; i < (H)->e_phnum; i++) {			\
668			switch ((PH)[i].p_type) {			\
669			case PT_DYNAMIC:				\
670				image->pi_isdynamic = 1;		\
671				break;					\
672			case PT_INTERP:					\
673				image->pi_dynlinkerpath =		\
674				    pmcstat_string_intern(		\
675					(char *) mapbase +		\
676					(PH)[i].p_offset);		\
677				break;					\
678			case PT_LOAD:					\
679				if ((PH)[i].p_offset == 0)		\
680				    image->pi_vaddr = 			\
681					(PH)[i].p_vaddr;		\
682				break;					\
683			}						\
684		}							\
685	} while (0)
686
687	switch (h->e_machine) {
688	case EM_386:
689	case EM_486:
690#if	defined(__amd64__)
691		/* a 32 bit executable */
692		h32 = (const Elf32_Ehdr *) h;
693		sh32 = (const Elf32_Shdr *)((uintptr_t) mapbase + h32->e_shoff);
694
695		GET_VA(h32, sh32, minva, maxva);
696
697		image->pi_entry = h32->e_entry;
698
699		if (h32->e_type == ET_EXEC) {
700			ph32 = (const Elf32_Phdr *)((uintptr_t) mapbase +
701			    h32->e_phoff);
702			GET_PHDR_INFO(h32, ph32, image);
703		}
704		image_type = PMCSTAT_IMAGE_ELF32;
705		break;
706#endif
707	default:
708		sh = (const Elf_Shdr *)((uintptr_t) mapbase + h->e_shoff);
709
710		GET_VA(h, sh, minva, maxva);
711
712		image->pi_entry = h->e_entry;
713
714		if (h->e_type == ET_EXEC) {
715			ph = (const Elf_Phdr *)((uintptr_t) mapbase +
716			    h->e_phoff);
717			GET_PHDR_INFO(h, ph, image);
718		}
719		image_type = PMCSTAT_IMAGE_ELF64;
720		break;
721	}
722
723#undef	GET_PHDR_INFO
724#undef	GET_VA
725
726	image->pi_start = minva;
727	image->pi_end   = maxva;
728	image->pi_type  = image_type;
729
730	if (munmap(mapbase, st.st_size) < 0)
731		err(EX_OSERR, "ERROR: Cannot unmap \"%s\"", path);
732	return;
733}
734
735/*
736 * Given an image descriptor, determine whether it is an ELF, or AOUT.
737 * If no handler claims the image, set its type to 'INDETERMINABLE'.
738 */
739
740static void
741pmcstat_image_determine_type(struct pmcstat_image *image,
742    struct pmcstat_args *a)
743{
744	assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
745
746	/* Try each kind of handler in turn */
747	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
748		pmcstat_image_get_elf_params(image, a);
749	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
750		pmcstat_image_get_aout_params(image, a);
751
752	/*
753	 * Otherwise, remember that we tried to determine
754	 * the object's type and had failed.
755	 */
756	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
757		image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
758}
759
760/*
761 * Locate an image descriptor given an interned path, adding a fresh
762 * descriptor to the cache if necessary.  This function also finds a
763 * suitable name for this image's sample file.
764 *
765 * We defer filling in the file format specific parts of the image
766 * structure till the time we actually see a sample that would fall
767 * into this image.
768 */
769
770static struct pmcstat_image *
771pmcstat_image_from_path(pmcstat_interned_string internedpath,
772    int iskernelmodule)
773{
774	int count, hash, nlen;
775	struct pmcstat_image *pi;
776	char *sn;
777	char name[NAME_MAX];
778
779	hash = pmcstat_string_lookup_hash(internedpath);
780
781	/* First, look for an existing entry. */
782	LIST_FOREACH(pi, &pmcstat_image_hash[hash], pi_next)
783	    if (pi->pi_execpath == internedpath &&
784		  pi->pi_iskernelmodule == iskernelmodule) {
785		    /* move descriptor to the head of the lru list */
786		    TAILQ_REMOVE(&pmcstat_image_lru, pi, pi_lru);
787		    TAILQ_INSERT_HEAD(&pmcstat_image_lru, pi, pi_lru);
788		    return (pi);
789	    }
790
791	/*
792	 * Allocate a new entry and place at the head of the hash and
793	 * LRU lists.
794	 */
795	pi = malloc(sizeof(*pi));
796	if (pi == NULL)
797		return (NULL);
798
799	pi->pi_type = PMCSTAT_IMAGE_UNKNOWN;
800	pi->pi_execpath = internedpath;
801	pi->pi_start = ~0;
802	pi->pi_entry = ~0;
803	pi->pi_end = 0;
804	pi->pi_iskernelmodule = iskernelmodule;
805
806	/*
807	 * Look for a suitable name for the sample files associated
808	 * with this image: if `basename(path)`+".gmon" is available,
809	 * we use that, otherwise we try iterating through
810	 * `basename(path)`+ "~" + NNN + ".gmon" till we get a free
811	 * entry.
812	 */
813	if ((sn = basename(pmcstat_string_unintern(internedpath))) == NULL)
814		err(EX_OSERR, "ERROR: Cannot process \"%s\"",
815		    pmcstat_string_unintern(internedpath));
816
817	nlen = strlen(sn);
818	nlen = min(nlen, (int) (sizeof(name) - sizeof(".gmon")));
819
820	snprintf(name, sizeof(name), "%.*s.gmon", nlen, sn);
821
822	/* try use the unabridged name first */
823	if (pmcstat_string_lookup(name) == NULL)
824		pi->pi_samplename = pmcstat_string_intern(name);
825	else {
826		/*
827		 * Otherwise use a prefix from the original name and
828		 * upto 3 digits.
829		 */
830		nlen = strlen(sn);
831		nlen = min(nlen, (int) (sizeof(name)-sizeof("~NNN.gmon")));
832		count = 0;
833		do {
834			if (++count > 999)
835				errx(EX_CANTCREAT, "ERROR: cannot create a gmon "
836				    "file for \"%s\"", name);
837			snprintf(name, sizeof(name), "%.*s~%3.3d.gmon",
838			    nlen, sn, count);
839			if (pmcstat_string_lookup(name) == NULL) {
840				pi->pi_samplename = pmcstat_string_intern(name);
841				count = 0;
842			}
843		} while (count > 0);
844	}
845
846
847	LIST_INIT(&pi->pi_gmlist);
848
849	LIST_INSERT_HEAD(&pmcstat_image_hash[hash], pi, pi_next);
850	TAILQ_INSERT_HEAD(&pmcstat_image_lru, pi, pi_lru);
851
852	return (pi);
853}
854
855/*
856 * Increment the bucket in the gmon.out file corresponding to 'pmcid'
857 * and 'pc'.
858 */
859
860static void
861pmcstat_image_increment_bucket(struct pmcstat_pcmap *map, uintfptr_t pc,
862    pmc_id_t pmcid, struct pmcstat_args *a)
863{
864	struct pmcstat_image *image;
865	struct pmcstat_gmonfile *pgf;
866	uintfptr_t bucket;
867	HISTCOUNTER *hc;
868
869	assert(pc >= map->ppm_lowpc && pc < map->ppm_highpc);
870
871	image = map->ppm_image;
872
873	/*
874	 * If this is the first time we are seeing a sample for
875	 * this executable image, try determine its parameters.
876	 */
877	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
878		pmcstat_image_determine_type(image, a);
879
880	assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN);
881
882	/* Ignore samples in images that we know nothing about. */
883	if (image->pi_type == PMCSTAT_IMAGE_INDETERMINABLE) {
884		pmcstat_stats.ps_samples_indeterminable++;
885		return;
886	}
887
888	/*
889	 * Find the gmon file corresponding to 'pmcid', creating it if
890	 * needed.
891	 */
892	LIST_FOREACH(pgf, &image->pi_gmlist, pgf_next)
893	    if (pgf->pgf_pmcid == pmcid)
894		    break;
895
896	/* If we don't have a gmon.out file for this PMCid, create one */
897	if (pgf == NULL) {
898		if ((pgf = calloc(1, sizeof(*pgf))) == NULL)
899			err(EX_OSERR, "ERROR:");
900
901		pgf->pgf_gmondata = NULL;	/* mark as unmapped */
902		pgf->pgf_name = pmcstat_gmon_create_name(a->pa_samplesdir,
903		    image, pmcid);
904		pgf->pgf_pmcid = pmcid;
905		assert(image->pi_end > image->pi_start);
906		pgf->pgf_nbuckets = (image->pi_end - image->pi_start) /
907		    FUNCTION_ALIGNMENT;	/* see <machine/profile.h> */
908		pgf->pgf_ndatabytes = sizeof(struct gmonhdr) +
909		    pgf->pgf_nbuckets * sizeof(HISTCOUNTER);
910
911		pmcstat_gmon_create_file(pgf, image);
912
913		LIST_INSERT_HEAD(&image->pi_gmlist, pgf, pgf_next);
914	}
915
916	/*
917	 * Map the gmon file in if needed.  It may have been mapped
918	 * out under memory pressure.
919	 */
920	if (pgf->pgf_gmondata == NULL)
921		pmcstat_gmon_map_file(pgf);
922
923	assert(pgf->pgf_gmondata != NULL);
924
925	/*
926	 *
927	 */
928
929	bucket = (pc - map->ppm_lowpc) / FUNCTION_ALIGNMENT;
930
931	assert(bucket < pgf->pgf_nbuckets);
932
933	hc = (HISTCOUNTER *) ((uintptr_t) pgf->pgf_gmondata +
934	    sizeof(struct gmonhdr));
935
936	/* saturating add */
937	if (hc[bucket] < 0xFFFFU)  /* XXX tie this to sizeof(HISTCOUNTER) */
938		hc[bucket]++;
939	else /* mark that an overflow occurred */
940		pgf->pgf_overflow = 1;
941}
942
943/*
944 * Record the fact that PC values from 'start' to 'end' come from
945 * image 'image'.
946 */
947
948static void
949pmcstat_image_link(struct pmcstat_process *pp, struct pmcstat_image *image,
950    uintfptr_t start)
951{
952	struct pmcstat_pcmap *pcm, *pcmnew;
953	uintfptr_t offset;
954
955	assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN &&
956	    image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE);
957
958	if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
959		err(EX_OSERR, "ERROR: Cannot create a map entry");
960
961	/*
962	 * Adjust the map entry to only cover the text portion
963	 * of the object.
964	 */
965
966	offset = start - image->pi_vaddr;
967	pcmnew->ppm_lowpc  = image->pi_start + offset;
968	pcmnew->ppm_highpc = image->pi_end + offset;
969	pcmnew->ppm_image  = image;
970
971	assert(pcmnew->ppm_lowpc < pcmnew->ppm_highpc);
972
973	/* Overlapped mmap()'s are assumed to never occur. */
974	TAILQ_FOREACH(pcm, &pp->pp_map, ppm_next)
975	    if (pcm->ppm_lowpc >= pcmnew->ppm_highpc)
976		    break;
977
978	if (pcm == NULL)
979		TAILQ_INSERT_TAIL(&pp->pp_map, pcmnew, ppm_next);
980	else
981		TAILQ_INSERT_BEFORE(pcm, pcmnew, ppm_next);
982}
983
984/*
985 * Unmap images in the range [start..end) associated with process
986 * 'pp'.
987 */
988
989static void
990pmcstat_image_unmap(struct pmcstat_process *pp, uintfptr_t start,
991    uintfptr_t end)
992{
993	struct pmcstat_pcmap *pcm, *pcmtmp, *pcmnew;
994
995	assert(pp != NULL);
996	assert(start < end);
997
998	/*
999	 * Cases:
1000	 * - we could have the range completely in the middle of an
1001	 *   existing pcmap; in this case we have to split the pcmap
1002	 *   structure into two (i.e., generate a 'hole').
1003	 * - we could have the range covering multiple pcmaps; these
1004	 *   will have to be removed.
1005	 * - we could have either 'start' or 'end' falling in the
1006	 *   middle of a pcmap; in this case shorten the entry.
1007	 */
1008
1009	TAILQ_FOREACH_SAFE(pcm, &pp->pp_map, ppm_next, pcmtmp) {
1010		assert(pcm->ppm_lowpc < pcm->ppm_highpc);
1011		if (pcm->ppm_highpc <= start)
1012			continue;
1013		if (pcm->ppm_lowpc > end)
1014			return;
1015		if (pcm->ppm_lowpc >= start && pcm->ppm_highpc <= end) {
1016			/*
1017			 * The current pcmap is completely inside the
1018			 * unmapped range: remove it entirely.
1019			 */
1020			TAILQ_REMOVE(&pp->pp_map, pcm, ppm_next);
1021			free(pcm);
1022		} else if (pcm->ppm_lowpc < start && pcm->ppm_highpc > end) {
1023			/*
1024			 * Split this pcmap into two; curtail the
1025			 * current map to end at [start-1], and start
1026			 * the new one at [end].
1027			 */
1028			if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
1029				err(EX_OSERR, "ERROR: Cannot split a map "
1030				    "entry");
1031
1032			pcmnew->ppm_image = pcm->ppm_image;
1033
1034			pcmnew->ppm_lowpc = end;
1035			pcmnew->ppm_highpc = pcm->ppm_highpc;
1036
1037			pcm->ppm_highpc = start;
1038
1039			TAILQ_INSERT_AFTER(&pp->pp_map, pcm, pcmnew, ppm_next);
1040
1041			return;
1042		} else if (pcm->ppm_lowpc < start)
1043			pcm->ppm_lowpc = start;
1044		else if (pcm->ppm_highpc > end)
1045			pcm->ppm_highpc = end;
1046		else
1047			assert(0);
1048	}
1049}
1050
1051/*
1052 * Add a {pmcid,name} mapping.
1053 */
1054
1055static void
1056pmcstat_pmcid_add(pmc_id_t pmcid, pmcstat_interned_string ps,
1057    struct pmcstat_args *a)
1058{
1059	struct pmcstat_pmcrecord *pr;
1060	struct stat st;
1061	char fullpath[PATH_MAX];
1062
1063	LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
1064	    if (pr->pr_pmcid == pmcid) {
1065		    pr->pr_pmcname = ps;
1066		    return;
1067	    }
1068
1069	if ((pr = malloc(sizeof(*pr))) == NULL)
1070		err(EX_OSERR, "ERROR: Cannot allocate pmc record");
1071
1072	pr->pr_pmcid = pmcid;
1073	pr->pr_pmcname = ps;
1074	LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next);
1075
1076	(void) snprintf(fullpath, sizeof(fullpath), "%s/%s", a->pa_samplesdir,
1077	    pmcstat_string_unintern(ps));
1078
1079	/* If the path name exists, it should be a directory */
1080	if (stat(fullpath, &st) == 0 && S_ISDIR(st.st_mode))
1081		return;
1082
1083	if (mkdir(fullpath, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) < 0)
1084		err(EX_OSERR, "ERROR: Cannot create directory \"%s\"",
1085		    fullpath);
1086}
1087
1088/*
1089 * Given a pmcid in use, find its human-readable name.
1090 */
1091
1092static const char *
1093pmcstat_pmcid_to_name(pmc_id_t pmcid)
1094{
1095	struct pmcstat_pmcrecord *pr;
1096	char fullpath[PATH_MAX];
1097
1098	LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
1099	    if (pr->pr_pmcid == pmcid)
1100		    return (pmcstat_string_unintern(pr->pr_pmcname));
1101
1102	/* create a default name and add this entry */
1103	if ((pr = malloc(sizeof(*pr))) == NULL)
1104		err(EX_OSERR, "ERROR: ");
1105	pr->pr_pmcid = pmcid;
1106
1107	(void) snprintf(fullpath, sizeof(fullpath), "%X", (unsigned int) pmcid);
1108	pr->pr_pmcname = pmcstat_string_intern(fullpath);
1109
1110	LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next);
1111
1112	return (pmcstat_string_unintern(pr->pr_pmcname));
1113}
1114
1115/*
1116 * Associate an AOUT image with a process.
1117 */
1118
1119static void
1120pmcstat_process_aout_exec(struct pmcstat_process *pp,
1121    struct pmcstat_image *image, uintfptr_t entryaddr,
1122    struct pmcstat_args *a)
1123{
1124	(void) pp;
1125	(void) image;
1126	(void) entryaddr;
1127	(void) a;
1128	/* TODO Implement a.out handling */
1129}
1130
1131/*
1132 * Associate an ELF image with a process.
1133 */
1134
1135static void
1136pmcstat_process_elf_exec(struct pmcstat_process *pp,
1137    struct pmcstat_image *image, uintfptr_t entryaddr,
1138    struct pmcstat_args *a)
1139{
1140	uintmax_t libstart;
1141	struct pmcstat_image *rtldimage;
1142
1143	assert(image->pi_type == PMCSTAT_IMAGE_ELF32 ||
1144	    image->pi_type == PMCSTAT_IMAGE_ELF64);
1145
1146	/* Create a map entry for the base executable. */
1147	pmcstat_image_link(pp, image, image->pi_vaddr);
1148
1149	/*
1150	 * For dynamically linked executables we need to:
1151	 * (a) find where the dynamic linker was mapped to for this
1152	 *     process,
1153	 * (b) find all the executable objects that the dynamic linker
1154	 *     brought in.
1155	 */
1156
1157	if (image->pi_isdynamic) {
1158
1159		/*
1160		 * The runtime loader gets loaded just after the maximum
1161		 * possible heap address.  Like so:
1162		 *
1163		 * [  TEXT DATA BSS HEAP -->*RTLD  SHLIBS   <--STACK]
1164		 * ^					            ^
1165		 * 0				   VM_MAXUSER_ADDRESS
1166
1167		 *
1168		 * The exact address where the loader gets mapped in
1169		 * will vary according to the size of the executable
1170		 * and the limits on the size of the process'es data
1171		 * segment at the time of exec().  The entry address
1172		 * recorded at process exec time corresponds to the
1173		 * 'start' address inside the dynamic linker.  From
1174		 * this we can figure out the address where the
1175		 * runtime loader's file object had been mapped to.
1176		 */
1177		rtldimage = pmcstat_image_from_path(image->pi_dynlinkerpath,
1178		    0);
1179		if (rtldimage == NULL) {
1180			warnx("WARNING: Cannot find image for \"%s\".",
1181			    pmcstat_string_unintern(image->pi_dynlinkerpath));
1182			pmcstat_stats.ps_exec_errors++;
1183			return;
1184		}
1185
1186		if (rtldimage->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1187			pmcstat_image_get_elf_params(rtldimage, a);
1188
1189		if (rtldimage->pi_type != PMCSTAT_IMAGE_ELF32 &&
1190		    rtldimage->pi_type != PMCSTAT_IMAGE_ELF64) {
1191			warnx("WARNING: rtld not an ELF object \"%s\".",
1192			    pmcstat_string_unintern(image->pi_dynlinkerpath));
1193			return;
1194		}
1195
1196		libstart = entryaddr - rtldimage->pi_entry;
1197		pmcstat_image_link(pp, rtldimage, libstart);
1198	}
1199}
1200
1201/*
1202 * Find the process descriptor corresponding to a PID.  If 'allocate'
1203 * is zero, we return a NULL if a pid descriptor could not be found or
1204 * a process descriptor process.  If 'allocate' is non-zero, then we
1205 * will attempt to allocate a fresh process descriptor.  Zombie
1206 * process descriptors are only removed if a fresh allocation for the
1207 * same PID is requested.
1208 */
1209
1210static struct pmcstat_process *
1211pmcstat_process_lookup(pid_t pid, int allocate)
1212{
1213	uint32_t hash;
1214	struct pmcstat_pcmap *ppm, *ppmtmp;
1215	struct pmcstat_process *pp, *pptmp;
1216
1217	hash = (uint32_t) pid & PMCSTAT_HASH_MASK;	/* simplicity wins */
1218
1219	LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[hash], pp_next, pptmp)
1220	    if (pp->pp_pid == pid) {
1221		    /* Found a descriptor, check and process zombies */
1222		    if (allocate && pp->pp_isactive == 0) {
1223			    /* remove maps */
1224			    TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next,
1225				ppmtmp) {
1226				    TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next);
1227				    free(ppm);
1228			    }
1229			    /* remove process entry */
1230			    LIST_REMOVE(pp, pp_next);
1231			    free(pp);
1232			    break;
1233		    }
1234		    return (pp);
1235	    }
1236
1237	if (!allocate)
1238		return (NULL);
1239
1240	if ((pp = malloc(sizeof(*pp))) == NULL)
1241		err(EX_OSERR, "ERROR: Cannot allocate pid descriptor");
1242
1243	pp->pp_pid = pid;
1244	pp->pp_isactive = 1;
1245
1246	TAILQ_INIT(&pp->pp_map);
1247
1248	LIST_INSERT_HEAD(&pmcstat_process_hash[hash], pp, pp_next);
1249	return (pp);
1250}
1251
1252/*
1253 * Associate an image and a process.
1254 */
1255
1256static void
1257pmcstat_process_exec(struct pmcstat_process *pp,
1258    pmcstat_interned_string path, uintfptr_t entryaddr,
1259    struct pmcstat_args *a)
1260{
1261	struct pmcstat_image *image;
1262
1263	if ((image = pmcstat_image_from_path(path, 0)) == NULL) {
1264		pmcstat_stats.ps_exec_errors++;
1265		return;
1266	}
1267
1268	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1269		pmcstat_image_determine_type(image, a);
1270
1271	assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN);
1272
1273	switch (image->pi_type) {
1274	case PMCSTAT_IMAGE_ELF32:
1275	case PMCSTAT_IMAGE_ELF64:
1276		pmcstat_stats.ps_exec_elf++;
1277		pmcstat_process_elf_exec(pp, image, entryaddr, a);
1278		break;
1279
1280	case PMCSTAT_IMAGE_AOUT:
1281		pmcstat_stats.ps_exec_aout++;
1282		pmcstat_process_aout_exec(pp, image, entryaddr, a);
1283		break;
1284
1285	case PMCSTAT_IMAGE_INDETERMINABLE:
1286		pmcstat_stats.ps_exec_indeterminable++;
1287		break;
1288
1289	default:
1290		err(EX_SOFTWARE, "ERROR: Unsupported executable type for "
1291		    "\"%s\"", pmcstat_string_unintern(path));
1292	}
1293}
1294
1295
1296/*
1297 * Find the map entry associated with process 'p' at PC value 'pc'.
1298 */
1299
1300static struct pmcstat_pcmap *
1301pmcstat_process_find_map(struct pmcstat_process *p, uintfptr_t pc)
1302{
1303	struct pmcstat_pcmap *ppm;
1304
1305	TAILQ_FOREACH(ppm, &p->pp_map, ppm_next) {
1306		if (pc >= ppm->ppm_lowpc && pc < ppm->ppm_highpc)
1307			return (ppm);
1308		if (pc < ppm->ppm_lowpc)
1309			return (NULL);
1310	}
1311
1312	return (NULL);
1313}
1314
1315
1316
1317static int
1318pmcstat_convert_log(struct pmcstat_args *a)
1319{
1320	uintfptr_t pc;
1321	pid_t pid;
1322	struct pmcstat_image *image;
1323	struct pmcstat_process *pp, *ppnew;
1324	struct pmcstat_pcmap *ppm, *ppmtmp;
1325	struct pmclog_ev ev;
1326	pmcstat_interned_string image_path;
1327
1328	while (pmclog_read(a->pa_logparser, &ev) == 0) {
1329		assert(ev.pl_state == PMCLOG_OK);
1330
1331		switch (ev.pl_type) {
1332		case PMCLOG_TYPE_INITIALIZE:
1333			if ((ev.pl_u.pl_i.pl_version & 0xFF000000) !=
1334			    PMC_VERSION_MAJOR << 24 && a->pa_verbosity > 0)
1335				warnx("WARNING: Log version 0x%x does not "
1336				    "match compiled version 0x%x.",
1337				    ev.pl_u.pl_i.pl_version,
1338				    PMC_VERSION_MAJOR);
1339			break;
1340		case PMCLOG_TYPE_MAP_IN:
1341			/*
1342			 * Introduce an address range mapping for a
1343			 * userland process or the kernel (pid == -1).
1344			 *
1345			 * We always allocate a process descriptor so
1346			 * that subsequent samples seen for this
1347			 * address range are mapped to the current
1348			 * object being mapped in.
1349			 */
1350			pid = ev.pl_u.pl_mi.pl_pid;
1351			if (pid == -1)
1352				pp = pmcstat_kernproc;
1353			else
1354				pp = pmcstat_process_lookup(pid,
1355				    PMCSTAT_ALLOCATE);
1356
1357			assert(pp != NULL);
1358
1359			image_path = pmcstat_string_intern(ev.pl_u.pl_mi.
1360			    pl_pathname);
1361			image = pmcstat_image_from_path(image_path, pid == -1);
1362			if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1363				pmcstat_image_determine_type(image, a);
1364			if (image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE)
1365				pmcstat_image_link(pp, image,
1366				    ev.pl_u.pl_mi.pl_start);
1367			break;
1368
1369		case PMCLOG_TYPE_MAP_OUT:
1370			/*
1371			 * Remove an address map.
1372			 */
1373			pid = ev.pl_u.pl_mo.pl_pid;
1374			if (pid == -1)
1375				pp = pmcstat_kernproc;
1376			else
1377				pp = pmcstat_process_lookup(pid, 0);
1378
1379			if (pp == NULL)	/* unknown process */
1380				break;
1381
1382			pmcstat_image_unmap(pp, ev.pl_u.pl_mo.pl_start,
1383			    ev.pl_u.pl_mo.pl_end);
1384			break;
1385
1386		case PMCLOG_TYPE_PCSAMPLE:
1387
1388			/*
1389			 * We bring in the gmon file for the image
1390			 * currently associated with the PMC & pid
1391			 * pair and increment the appropriate entry
1392			 * bin inside this.
1393			 */
1394			pmcstat_stats.ps_samples_total++;
1395
1396			pc = ev.pl_u.pl_s.pl_pc;
1397			pp = pmcstat_process_lookup(ev.pl_u.pl_s.pl_pid,
1398			    PMCSTAT_ALLOCATE);
1399			if ((ppm = pmcstat_process_find_map(pp, pc)) == NULL &&
1400			    (ppm = pmcstat_process_find_map(pmcstat_kernproc,
1401				pc)) == NULL) {	/* unknown process,offset pair */
1402				pmcstat_stats.ps_samples_unknown_offset++;
1403				break;
1404			}
1405
1406			pmcstat_image_increment_bucket(ppm, pc,
1407			    ev.pl_u.pl_s.pl_pmcid, a);
1408
1409			break;
1410
1411		case PMCLOG_TYPE_PMCALLOCATE:
1412			/*
1413			 * Record the association pmc id between this
1414			 * PMC and its name.
1415			 */
1416			pmcstat_pmcid_add(ev.pl_u.pl_a.pl_pmcid,
1417			    pmcstat_string_intern(ev.pl_u.pl_a.pl_evname), a);
1418			break;
1419
1420		case PMCLOG_TYPE_PROCEXEC:
1421
1422			/*
1423			 * Change the executable image associated with
1424			 * a process.
1425			 */
1426			pp = pmcstat_process_lookup(ev.pl_u.pl_x.pl_pid,
1427			    PMCSTAT_ALLOCATE);
1428
1429			/* delete the current process map */
1430			TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, ppmtmp) {
1431				TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next);
1432				free(ppm);
1433			}
1434
1435			/* associate this process  image */
1436			image_path = pmcstat_string_intern(
1437				ev.pl_u.pl_x.pl_pathname);
1438			assert(image_path != NULL);
1439			pmcstat_process_exec(pp, image_path,
1440			    ev.pl_u.pl_x.pl_entryaddr, a);
1441			break;
1442
1443		case PMCLOG_TYPE_PROCEXIT:
1444
1445			/*
1446			 * Due to the way the log is generated, the
1447			 * last few samples corresponding to a process
1448			 * may appear in the log after the process
1449			 * exit event is recorded.  Thus we keep the
1450			 * process' descriptor and associated data
1451			 * structures around, but mark the process as
1452			 * having exited.
1453			 */
1454			pp = pmcstat_process_lookup(ev.pl_u.pl_e.pl_pid, 0);
1455			if (pp == NULL)
1456				break;
1457			pp->pp_isactive = 0;	/* mark as a zombie */
1458			break;
1459
1460		case PMCLOG_TYPE_SYSEXIT:
1461			pp = pmcstat_process_lookup(ev.pl_u.pl_se.pl_pid, 0);
1462			if (pp == NULL)
1463				break;
1464			pp->pp_isactive = 0;	/* make a zombie */
1465			break;
1466
1467		case PMCLOG_TYPE_PROCFORK:
1468
1469			/*
1470			 * Allocate a process descriptor for the new
1471			 * (child) process.
1472			 */
1473			ppnew =
1474			    pmcstat_process_lookup(ev.pl_u.pl_f.pl_newpid,
1475				PMCSTAT_ALLOCATE);
1476
1477			/*
1478			 * If we had been tracking the parent, clone
1479			 * its address maps.
1480			 */
1481			pp = pmcstat_process_lookup(ev.pl_u.pl_f.pl_oldpid, 0);
1482			if (pp == NULL)
1483				break;
1484			TAILQ_FOREACH(ppm, &pp->pp_map, ppm_next)
1485			    pmcstat_image_link(ppnew, ppm->ppm_image,
1486				ppm->ppm_lowpc);
1487			break;
1488
1489		default:	/* other types of entries are not relevant */
1490			break;
1491		}
1492	}
1493
1494	if (ev.pl_state == PMCLOG_EOF)
1495		return (PMCSTAT_FINISHED);
1496	else if (ev.pl_state == PMCLOG_REQUIRE_DATA)
1497		return (PMCSTAT_RUNNING);
1498
1499	err(EX_DATAERR, "ERROR: event parsing failed (record %jd, "
1500	    "offset 0x%jx)", (uintmax_t) ev.pl_count + 1, ev.pl_offset);
1501}
1502
1503/*
1504 * Print log entries as text.
1505 */
1506
1507static int
1508pmcstat_print_log(struct pmcstat_args *a)
1509{
1510	struct pmclog_ev ev;
1511
1512	while (pmclog_read(a->pa_logparser, &ev) == 0) {
1513		assert(ev.pl_state == PMCLOG_OK);
1514		switch (ev.pl_type) {
1515		case PMCLOG_TYPE_CLOSELOG:
1516			PMCSTAT_PRINT_ENTRY(a,"closelog",);
1517			break;
1518		case PMCLOG_TYPE_DROPNOTIFY:
1519			PMCSTAT_PRINT_ENTRY(a,"drop",);
1520			break;
1521		case PMCLOG_TYPE_INITIALIZE:
1522			PMCSTAT_PRINT_ENTRY(a,"initlog","0x%x \"%s\"",
1523			    ev.pl_u.pl_i.pl_version,
1524			    pmc_name_of_cputype(ev.pl_u.pl_i.pl_arch));
1525			break;
1526		case PMCLOG_TYPE_MAP_IN:
1527			PMCSTAT_PRINT_ENTRY(a,"map-in","%d %p \"%s\"",
1528			    ev.pl_u.pl_mi.pl_pid,
1529			    (void *) ev.pl_u.pl_mi.pl_start,
1530			    ev.pl_u.pl_mi.pl_pathname);
1531			break;
1532		case PMCLOG_TYPE_MAP_OUT:
1533			PMCSTAT_PRINT_ENTRY(a,"map-out","%d %p %p",
1534			    ev.pl_u.pl_mo.pl_pid,
1535			    (void *) ev.pl_u.pl_mo.pl_start,
1536			    (void *) ev.pl_u.pl_mo.pl_end);
1537			break;
1538		case PMCLOG_TYPE_PCSAMPLE:
1539			PMCSTAT_PRINT_ENTRY(a,"sample","0x%x %d %p %c",
1540			    ev.pl_u.pl_s.pl_pmcid,
1541			    ev.pl_u.pl_s.pl_pid,
1542			    (void *) ev.pl_u.pl_s.pl_pc,
1543			    ev.pl_u.pl_s.pl_usermode ? 'u' : 's');
1544			break;
1545		case PMCLOG_TYPE_PMCALLOCATE:
1546			PMCSTAT_PRINT_ENTRY(a,"allocate","0x%x \"%s\" 0x%x",
1547			    ev.pl_u.pl_a.pl_pmcid,
1548			    ev.pl_u.pl_a.pl_evname,
1549			    ev.pl_u.pl_a.pl_flags);
1550			break;
1551		case PMCLOG_TYPE_PMCATTACH:
1552			PMCSTAT_PRINT_ENTRY(a,"attach","0x%x %d \"%s\"",
1553			    ev.pl_u.pl_t.pl_pmcid,
1554			    ev.pl_u.pl_t.pl_pid,
1555			    ev.pl_u.pl_t.pl_pathname);
1556			break;
1557		case PMCLOG_TYPE_PMCDETACH:
1558			PMCSTAT_PRINT_ENTRY(a,"detach","0x%x %d",
1559			    ev.pl_u.pl_d.pl_pmcid,
1560			    ev.pl_u.pl_d.pl_pid);
1561			break;
1562		case PMCLOG_TYPE_PROCCSW:
1563			PMCSTAT_PRINT_ENTRY(a,"cswval","0x%x %d %jd",
1564			    ev.pl_u.pl_c.pl_pmcid,
1565			    ev.pl_u.pl_c.pl_pid,
1566			    ev.pl_u.pl_c.pl_value);
1567			break;
1568		case PMCLOG_TYPE_PROCEXEC:
1569			PMCSTAT_PRINT_ENTRY(a,"exec","0x%x %d %p \"%s\"",
1570			    ev.pl_u.pl_x.pl_pmcid,
1571			    ev.pl_u.pl_x.pl_pid,
1572			    (void *) ev.pl_u.pl_x.pl_entryaddr,
1573			    ev.pl_u.pl_x.pl_pathname);
1574			break;
1575		case PMCLOG_TYPE_PROCEXIT:
1576			PMCSTAT_PRINT_ENTRY(a,"exitval","0x%x %d %jd",
1577			    ev.pl_u.pl_e.pl_pmcid,
1578			    ev.pl_u.pl_e.pl_pid,
1579			    ev.pl_u.pl_e.pl_value);
1580			break;
1581		case PMCLOG_TYPE_PROCFORK:
1582			PMCSTAT_PRINT_ENTRY(a,"fork","%d %d",
1583			    ev.pl_u.pl_f.pl_oldpid,
1584			    ev.pl_u.pl_f.pl_newpid);
1585			break;
1586		case PMCLOG_TYPE_USERDATA:
1587			PMCSTAT_PRINT_ENTRY(a,"userdata","0x%x",
1588			    ev.pl_u.pl_u.pl_userdata);
1589			break;
1590		case PMCLOG_TYPE_SYSEXIT:
1591			PMCSTAT_PRINT_ENTRY(a,"exit","%d",
1592			    ev.pl_u.pl_se.pl_pid);
1593			break;
1594		default:
1595			fprintf(a->pa_printfile, "unknown %d",
1596			    ev.pl_type);
1597		}
1598	}
1599
1600	if (ev.pl_state == PMCLOG_EOF)
1601		return (PMCSTAT_FINISHED);
1602	else if (ev.pl_state ==  PMCLOG_REQUIRE_DATA)
1603		return (PMCSTAT_RUNNING);
1604
1605	err(EX_DATAERR, "ERROR: event parsing failed "
1606	    "(record %jd, offset 0x%jx)",
1607	    (uintmax_t) ev.pl_count + 1, ev.pl_offset);
1608	/*NOTREACHED*/
1609}
1610
1611/*
1612 * Public Interfaces.
1613 */
1614
1615/*
1616 * Close a logfile, after first flushing all in-module queued data.
1617 */
1618
1619int
1620pmcstat_close_log(struct pmcstat_args *a)
1621{
1622	if (pmc_flush_logfile() < 0 ||
1623	    pmc_configure_logfile(-1) < 0)
1624		err(EX_OSERR, "ERROR: logging failed");
1625	a->pa_flags &= ~(FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE);
1626	return (a->pa_flags & FLAG_HAS_PIPE ? PMCSTAT_EXITING :
1627	    PMCSTAT_FINISHED);
1628}
1629
1630
1631
1632/*
1633 * Open a log file, for reading or writing.
1634 *
1635 * The function returns the fd of a successfully opened log or -1 in
1636 * case of failure.
1637 */
1638
1639int
1640pmcstat_open_log(const char *path, int mode)
1641{
1642	int fd;
1643
1644	/*
1645	 * If 'path' is "-" then open one of stdin or stdout depending
1646	 * on the value of 'mode'.  Otherwise, treat 'path' as a file
1647	 * name and open that.
1648	 */
1649	if (path[0] == '-' && path[1] == '\0')
1650		fd = (mode == PMCSTAT_OPEN_FOR_READ) ? 0 : 1;
1651	else
1652		fd = open(path, mode == PMCSTAT_OPEN_FOR_READ ?
1653		    O_RDONLY : (O_WRONLY|O_CREAT|O_TRUNC),
1654		    S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
1655
1656	return (fd);
1657}
1658
1659/*
1660 * Process a log file in offline analysis mode.
1661 */
1662
1663int
1664pmcstat_process_log(struct pmcstat_args *a)
1665{
1666
1667	/*
1668	 * If gprof style profiles haven't been asked for, just print the
1669	 * log to the current output file.
1670	 */
1671	if (a->pa_flags & FLAG_DO_PRINT)
1672		return (pmcstat_print_log(a));
1673	else
1674		/* convert the log to gprof compatible profiles */
1675		return (pmcstat_convert_log(a));
1676}
1677
1678/*
1679 * Initialize module.
1680 */
1681
1682void
1683pmcstat_initialize_logging(struct pmcstat_args *a)
1684{
1685	int i;
1686
1687	(void) a;
1688
1689	/* use a convenient format for 'ldd' output */
1690	if (setenv("LD_TRACE_LOADED_OBJECTS_FMT1","%o \"%p\" %x\n",1) != 0)
1691		err(EX_OSERR, "ERROR: Cannot setenv");
1692
1693	/* Initialize hash tables */
1694	pmcstat_string_initialize();
1695	for (i = 0; i < PMCSTAT_NHASH; i++) {
1696		LIST_INIT(&pmcstat_image_hash[i]);
1697		LIST_INIT(&pmcstat_process_hash[i]);
1698	}
1699
1700	/*
1701	 * Create a fake 'process' entry for the kernel with pid -1.
1702	 * hwpmc(4) will subsequently inform us about where the kernel
1703	 * and any loaded kernel modules are mapped.
1704	 */
1705	if ((pmcstat_kernproc = pmcstat_process_lookup((pid_t) -1,
1706		 PMCSTAT_ALLOCATE)) == NULL)
1707		err(EX_OSERR, "ERROR: Cannot initialize logging");
1708}
1709
1710/*
1711 * Shutdown module.
1712 */
1713
1714void
1715pmcstat_shutdown_logging(struct pmcstat_args *a)
1716{
1717	int i;
1718	FILE *mf;
1719	struct pmcstat_gmonfile *pgf, *pgftmp;
1720	struct pmcstat_image *pi, *pitmp;
1721	struct pmcstat_process *pp, *pptmp;
1722
1723	/* determine where to send the map file */
1724	mf = NULL;
1725	if (a->pa_mapfilename != NULL)
1726		mf = (strcmp(a->pa_mapfilename, "-") == 0) ?
1727		    a->pa_printfile : fopen(a->pa_mapfilename, "w");
1728
1729	if (mf == NULL && a->pa_flags & FLAG_DO_GPROF &&
1730	    a->pa_verbosity >= 2)
1731		mf = a->pa_printfile;
1732
1733	if (mf)
1734		(void) fprintf(mf, "MAP:\n");
1735
1736	for (i = 0; i < PMCSTAT_NHASH; i++) {
1737		LIST_FOREACH_SAFE(pi, &pmcstat_image_hash[i], pi_next, pitmp) {
1738			/* flush gmon.out data to disk */
1739			LIST_FOREACH_SAFE(pgf, &pi->pi_gmlist, pgf_next,
1740			    pgftmp) {
1741				pmcstat_gmon_unmap_file(pgf);
1742			    	LIST_REMOVE(pgf, pgf_next);
1743
1744				if (pgf->pgf_overflow && a->pa_verbosity >= 1)
1745					warnx("WARNING: profile \"%s\" "
1746					    "overflowed.",
1747					    pmcstat_string_unintern(pgf->pgf_name));
1748			    	free(pgf);
1749			}
1750			if (mf)
1751				(void) fprintf(mf, " \"%s\" -> \"%s\"\n",
1752				    pmcstat_string_unintern(pi->pi_execpath),
1753				    pmcstat_string_unintern(pi->pi_samplename));
1754
1755			LIST_REMOVE(pi, pi_next);
1756			free(pi);
1757		}
1758		LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[i], pp_next,
1759		    pptmp) {
1760			LIST_REMOVE(pp, pp_next);
1761			free(pp);
1762		}
1763	}
1764
1765	pmcstat_string_shutdown();
1766
1767	/*
1768	 * Print errors unless -q was specified.  Print all statistics
1769	 * if verbosity > 1.
1770	 */
1771#define	PRINT(N,V,A) do {						\
1772		if (pmcstat_stats.ps_##V || (A)->pa_verbosity >= 2)	\
1773			(void) fprintf((A)->pa_printfile, " %-40s %d\n",\
1774			    N, pmcstat_stats.ps_##V);			\
1775	} while (0)
1776
1777	if (a->pa_verbosity >= 1 && a->pa_flags & FLAG_DO_GPROF) {
1778		(void) fprintf(a->pa_printfile, "CONVERSION STATISTICS:\n");
1779		PRINT("#exec/a.out", exec_aout, a);
1780		PRINT("#exec/elf", exec_elf, a);
1781		PRINT("#exec/unknown", exec_indeterminable, a);
1782		PRINT("#exec handling errors", exec_errors, a);
1783		PRINT("#samples/total", samples_total, a);
1784		PRINT("#samples/unclaimed", samples_unknown_offset, a);
1785		PRINT("#samples/unknown-object", samples_indeterminable, a);
1786	}
1787
1788	if (mf)
1789		(void) fclose(mf);
1790}
1791