pmcstat_log.c revision 157536
1/*-
2 * Copyright (c) 2005-2006, Joseph Koshy
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27/*
28 * Transform a hwpmc(4) log into human readable form, and into
29 * gprof(1) compatible profiles.
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/usr.sbin/pmcstat/pmcstat_log.c 157536 2006-04-05 15:12:25Z jkoshy $");
34
35#include <sys/param.h>
36#include <sys/endian.h>
37#include <sys/gmon.h>
38#include <sys/imgact_aout.h>
39#include <sys/imgact_elf.h>
40#include <sys/mman.h>
41#include <sys/pmc.h>
42#include <sys/queue.h>
43#include <sys/socket.h>
44#include <sys/stat.h>
45#include <sys/wait.h>
46
47#include <netinet/in.h>
48
49#include <assert.h>
50#include <err.h>
51#include <errno.h>
52#include <fcntl.h>
53#include <libgen.h>
54#include <limits.h>
55#include <netdb.h>
56#include <pmc.h>
57#include <pmclog.h>
58#include <sysexits.h>
59#include <stdint.h>
60#include <stdio.h>
61#include <stdlib.h>
62#include <string.h>
63#include <unistd.h>
64
65#include "pmcstat.h"
66
67#define	min(A,B)		((A) < (B) ? (A) : (B))
68#define	max(A,B)		((A) > (B) ? (A) : (B))
69
70/*
71 * PUBLIC INTERFACES
72 *
73 * pmcstat_initialize_logging()	initialize this module, called first
74 * pmcstat_shutdown_logging()		orderly shutdown, called last
75 * pmcstat_open_log()			open an eventlog for processing
76 * pmcstat_process_log()		print/convert an event log
77 * pmcstat_close_log()			finish processing an event log
78 *
79 * IMPLEMENTATION OF GMON OUTPUT
80 *
81 * We correlate each 'sample' seen in the event log back to an
82 * executable object in the system. Executable objects include:
83 * 	- program executables,
84 *	- shared libraries loaded by the runtime loader,
85 *	- dlopen()'ed objects loaded by the program,
86 *	- the runtime loader itself,
87 *	- the kernel and kernel modules.
88 *
89 * Each such executable object gets one 'gmon.out' profile, per PMC in
90 * use.  Creation of 'gmon.out' profiles is done lazily.  The
91 * 'gmon.out' profiles generated for a given sampling PMC are
92 * aggregates of all the samples for that particular executable
93 * object.
94 *
95 * Each process that we know about is treated as a set of regions that
96 * map to executable objects.  Processes are described by
97 * 'pmcstat_process' structures.  Executable objects are tracked by
98 * 'pmcstat_image' structures.  The kernel and kernel modules are
99 * common to all processes (they reside at the same virtual addresses
100 * for all processes).  Individual processes can have their text
101 * segments and shared libraries loaded at process-specific locations.
102 *
103 * A given executable object can be in use by multiple processes
104 * (e.g., libc.so) and loaded at a different address in each.
105 * pmcstat_pcmap structures track per-image mappings.
106 *
107 * The sample log could have samples from multiple PMCs; we
108 * generate one 'gmon.out' profile per PMC.
109 */
110
111typedef const void *pmcstat_interned_string;
112
113/*
114 * 'pmcstat_pmcrecord' is a mapping from PMC ids to human-readable
115 * names.
116 */
117
118struct pmcstat_pmcrecord {
119	LIST_ENTRY(pmcstat_pmcrecord)	pr_next;
120	pmc_id_t			pr_pmcid;
121	pmcstat_interned_string	pr_pmcname;
122};
123
124static LIST_HEAD(,pmcstat_pmcrecord)	pmcstat_pmcs =
125	LIST_HEAD_INITIALIZER(&pmcstat_pmcs);
126
127
128/*
129 * struct pmcstat_gmonfile tracks a given 'gmon.out' file.  These
130 * files are mmap()'ed in as needed.
131 */
132
133struct pmcstat_gmonfile {
134	LIST_ENTRY(pmcstat_gmonfile)	pgf_next; /* list of entries */
135	int		pgf_overflow;	/* whether a count overflowed */
136	pmc_id_t	pgf_pmcid;	/* id of the associated pmc */
137	size_t		pgf_nbuckets;	/* #buckets in this gmon.out */
138	unsigned int	pgf_nsamples;	/* #samples in this gmon.out */
139	pmcstat_interned_string pgf_name;	/* pathname of gmon.out file */
140	size_t		pgf_ndatabytes;	/* number of bytes mapped */
141	void		*pgf_gmondata;	/* pointer to mmap'ed data */
142};
143
144/*
145 * A 'pmcstat_image' structure describes an executable program on
146 * disk.  'pi_execpath' is a cookie representing the pathname of
147 * the executable.  'pi_start' and 'pi_end' are the least and greatest
148 * virtual addresses for the text segments in the executable.
149 * 'pi_gmonlist' contains a linked list of gmon.out files associated
150 * with this image.
151 */
152
153enum pmcstat_image_type {
154	PMCSTAT_IMAGE_UNKNOWN = 0,	/* never looked at the image */
155	PMCSTAT_IMAGE_INDETERMINABLE,	/* can't tell what the image is */
156	PMCSTAT_IMAGE_ELF32,		/* ELF 32 bit object */
157	PMCSTAT_IMAGE_ELF64,		/* ELF 64 bit object */
158	PMCSTAT_IMAGE_AOUT		/* AOUT object */
159};
160
161struct pmcstat_image {
162	LIST_ENTRY(pmcstat_image) pi_next;	/* hash link */
163	TAILQ_ENTRY(pmcstat_image) pi_lru;	/* LRU list */
164	pmcstat_interned_string	pi_execpath;/* cookie */
165	pmcstat_interned_string pi_samplename;  /* sample path name */
166
167	enum pmcstat_image_type pi_type;	/* executable type */
168
169	/*
170	 * Executables have pi_start and pi_end; these are zero
171	 * for shared libraries.
172	 */
173	uintfptr_t	pi_start;		/* start address (inclusive) */
174	uintfptr_t	pi_end;			/* end address (exclusive) */
175	uintfptr_t	pi_entry;		/* entry address */
176	uintfptr_t	pi_vaddr;		/* virtual address where loaded */
177	int		pi_isdynamic;		/* whether a dynamic
178						 * object */
179	int		pi_iskernelmodule;
180	pmcstat_interned_string pi_dynlinkerpath; /* path in .interp */
181
182	/*
183	 * An image can be associated with one or more gmon.out files;
184	 * one per PMC.
185	 */
186	LIST_HEAD(,pmcstat_gmonfile) pi_gmlist;
187};
188
189/*
190 * All image descriptors are kept in a hash table.
191 */
192static LIST_HEAD(,pmcstat_image)	pmcstat_image_hash[PMCSTAT_NHASH];
193static TAILQ_HEAD(,pmcstat_image)	pmcstat_image_lru =
194	TAILQ_HEAD_INITIALIZER(pmcstat_image_lru);
195
196/*
197 * A 'pmcstat_pcmap' structure maps a virtual address range to an
198 * underlying 'pmcstat_image' descriptor.
199 */
200struct pmcstat_pcmap {
201	TAILQ_ENTRY(pmcstat_pcmap) ppm_next;
202	uintfptr_t	ppm_lowpc;
203	uintfptr_t	ppm_highpc;
204	struct pmcstat_image *ppm_image;
205};
206
207/*
208 * A 'pmcstat_process' structure models processes.  Each process is
209 * associated with a set of pmcstat_pcmap structures that map
210 * addresses inside it to executable objects.  This set is implemented
211 * as a list, kept sorted in ascending order of mapped addresses.
212 *
213 * 'pp_pid' holds the pid of the process.  When a process exits, the
214 * 'pp_isactive' field is set to zero, but the process structure is
215 * not immediately reclaimed because there may still be samples in the
216 * log for this process.
217 */
218
219struct pmcstat_process {
220	LIST_ENTRY(pmcstat_process) pp_next;	/* hash-next */
221	pid_t			pp_pid;		/* associated pid */
222	int			pp_isactive;	/* whether active */
223	uintfptr_t		pp_entryaddr;	/* entry address */
224	TAILQ_HEAD(,pmcstat_pcmap) pp_map;	/* address range map */
225};
226
227#define	PMCSTAT_ALLOCATE		1
228
229/*
230 * All process descriptors are kept in a hash table.
231 */
232static LIST_HEAD(,pmcstat_process) pmcstat_process_hash[PMCSTAT_NHASH];
233
234static struct pmcstat_process *pmcstat_kernproc; /* kernel 'process' */
235
236/* Misc. statistics */
237static struct pmcstat_stats {
238	int ps_exec_aout;	/* # a.out executables seen */
239	int ps_exec_elf;	/* # elf executables seen */
240	int ps_exec_errors;	/* # errors processing executables */
241	int ps_exec_indeterminable; /* # unknown executables seen */
242	int ps_samples_total;	/* total number of samples processed */
243	int ps_samples_unknown_offset;	/* #samples not in any map */
244	int ps_samples_indeterminable;	/* #samples in indeterminable images */
245} pmcstat_stats;
246
247/*
248 * Prototypes
249 */
250
251static void	pmcstat_gmon_create_file(struct pmcstat_gmonfile *_pgf,
252    struct pmcstat_image *_image);
253static pmcstat_interned_string pmcstat_gmon_create_name(const char *_sd,
254    struct pmcstat_image *_img, pmc_id_t _pmcid);
255static void	pmcstat_gmon_map_file(struct pmcstat_gmonfile *_pgf);
256static void	pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *_pgf);
257
258static void pmcstat_image_determine_type(struct pmcstat_image *_image,
259    struct pmcstat_args *_a);
260static struct pmcstat_image *pmcstat_image_from_path(pmcstat_interned_string
261    _path, int _iskernelmodule);
262static void pmcstat_image_get_aout_params(struct pmcstat_image *_image,
263    struct pmcstat_args *_a);
264static void pmcstat_image_get_elf_params(struct pmcstat_image *_image,
265    struct pmcstat_args *_a);
266static void	pmcstat_image_increment_bucket(struct pmcstat_pcmap *_pcm,
267    uintfptr_t _pc, pmc_id_t _pmcid, struct pmcstat_args *_a);
268static void	pmcstat_image_link(struct pmcstat_process *_pp,
269    struct pmcstat_image *_i, uintfptr_t _lpc);
270
271static void	pmcstat_pmcid_add(pmc_id_t _pmcid,
272    pmcstat_interned_string _name, struct pmcstat_args *_a);
273static const char *pmcstat_pmcid_to_name(pmc_id_t _pmcid);
274
275static void	pmcstat_process_aout_exec(struct pmcstat_process *_pp,
276    struct pmcstat_image *_image, uintfptr_t _entryaddr,
277    struct pmcstat_args *_a);
278static void	pmcstat_process_elf_exec(struct pmcstat_process *_pp,
279    struct pmcstat_image *_image, uintfptr_t _entryaddr,
280    struct pmcstat_args *_a);
281static void	pmcstat_process_exec(struct pmcstat_process *_pp,
282    pmcstat_interned_string _path, uintfptr_t _entryaddr,
283    struct pmcstat_args *_ao);
284static struct pmcstat_process *pmcstat_process_lookup(pid_t _pid,
285    int _allocate);
286static struct pmcstat_pcmap *pmcstat_process_find_map(
287    struct pmcstat_process *_p, uintfptr_t _pc);
288
289static int	pmcstat_string_compute_hash(const char *_string);
290static void pmcstat_string_initialize(void);
291static pmcstat_interned_string pmcstat_string_intern(const char *_s);
292static pmcstat_interned_string pmcstat_string_lookup(const char *_s);
293static int	pmcstat_string_lookup_hash(pmcstat_interned_string _is);
294static void pmcstat_string_shutdown(void);
295static const char *pmcstat_string_unintern(pmcstat_interned_string _is);
296
297
298/*
299 * A simple implementation of interned strings.  Each interned string
300 * is assigned a unique address, so that subsequent string compares
301 * can be done by a simple pointer comparision instead of using
302 * strcmp().  This speeds up hash table lookups and saves memory if
303 * duplicate strings are the norm.
304 */
305struct pmcstat_string {
306	LIST_ENTRY(pmcstat_string)	ps_next;	/* hash link */
307	int		ps_len;
308	int		ps_hash;
309	char		*ps_string;
310};
311
312static LIST_HEAD(,pmcstat_string)	pmcstat_string_hash[PMCSTAT_NHASH];
313
314/*
315 * Compute a 'hash' value for a string.
316 */
317
318static int
319pmcstat_string_compute_hash(const char *s)
320{
321	int hash;
322
323	for (hash = 0; *s; s++)
324		hash ^= *s;
325
326	return (hash & PMCSTAT_HASH_MASK);
327}
328
329/*
330 * Intern a copy of string 's', and return a pointer to the
331 * interned structure.
332 */
333
334static pmcstat_interned_string
335pmcstat_string_intern(const char *s)
336{
337	struct pmcstat_string *ps;
338	const struct pmcstat_string *cps;
339	int hash, len;
340
341	if ((cps = pmcstat_string_lookup(s)) != NULL)
342		return (cps);
343
344	hash = pmcstat_string_compute_hash(s);
345	len  = strlen(s);
346
347	if ((ps = malloc(sizeof(*ps))) == NULL)
348		err(EX_OSERR, "ERROR: Could not intern string");
349	ps->ps_len = len;
350	ps->ps_hash = hash;
351	ps->ps_string = strdup(s);
352	LIST_INSERT_HEAD(&pmcstat_string_hash[hash], ps, ps_next);
353	return ((pmcstat_interned_string) ps);
354}
355
356static const char *
357pmcstat_string_unintern(pmcstat_interned_string str)
358{
359	const char *s;
360
361	s = ((const struct pmcstat_string *) str)->ps_string;
362	return (s);
363}
364
365static pmcstat_interned_string
366pmcstat_string_lookup(const char *s)
367{
368	struct pmcstat_string *ps;
369	int hash, len;
370
371	hash = pmcstat_string_compute_hash(s);
372	len = strlen(s);
373
374	LIST_FOREACH(ps, &pmcstat_string_hash[hash], ps_next)
375	    if (ps->ps_len == len && ps->ps_hash == hash &&
376		strcmp(ps->ps_string, s) == 0)
377		    return (ps);
378	return (NULL);
379}
380
381static int
382pmcstat_string_lookup_hash(pmcstat_interned_string s)
383{
384	const struct pmcstat_string *ps;
385
386	ps = (const struct pmcstat_string *) s;
387	return (ps->ps_hash);
388}
389
390/*
391 * Initialize the string interning facility.
392 */
393
394static void
395pmcstat_string_initialize(void)
396{
397	int i;
398
399	for (i = 0; i < PMCSTAT_NHASH; i++)
400		LIST_INIT(&pmcstat_string_hash[i]);
401}
402
403/*
404 * Destroy the string table, free'ing up space.
405 */
406
407static void
408pmcstat_string_shutdown(void)
409{
410	int i;
411	struct pmcstat_string *ps, *pstmp;
412
413	for (i = 0; i < PMCSTAT_NHASH; i++)
414		LIST_FOREACH_SAFE(ps, &pmcstat_string_hash[i], ps_next,
415		    pstmp) {
416			LIST_REMOVE(ps, ps_next);
417			free(ps->ps_string);
418			free(ps);
419		}
420}
421
422/*
423 * Create a gmon.out file and size it.
424 */
425
426static void
427pmcstat_gmon_create_file(struct pmcstat_gmonfile *pgf,
428    struct pmcstat_image *image)
429{
430	int fd;
431	size_t count;
432	struct gmonhdr gm;
433	const char *pathname;
434	char buffer[DEFAULT_BUFFER_SIZE];
435
436	pathname = pmcstat_string_unintern(pgf->pgf_name);
437	if ((fd = open(pathname, O_RDWR|O_NOFOLLOW|O_CREAT,
438		 S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
439		err(EX_OSERR, "ERROR: Cannot open \"%s\"", pathname);
440
441	gm.lpc = image->pi_start;
442	gm.hpc = image->pi_end;
443	gm.ncnt = (pgf->pgf_nbuckets * sizeof(HISTCOUNTER)) +
444	    sizeof(struct gmonhdr);
445	gm.version = GMONVERSION;
446	gm.profrate = 0;		/* use ticks */
447	gm.histcounter_type = 0;	/* compatibility with moncontrol() */
448	gm.spare[0] = gm.spare[1] = 0;
449
450	/* Write out the gmon header */
451	if (write(fd, &gm, sizeof(gm)) < 0)
452		goto error;
453
454	/* Zero fill the samples[] array */
455	(void) memset(buffer, 0, sizeof(buffer));
456
457	count = pgf->pgf_ndatabytes - sizeof(struct gmonhdr);
458	while (count > sizeof(buffer)) {
459		if (write(fd, &buffer, sizeof(buffer)) < 0)
460			goto error;
461		count -= sizeof(buffer);
462	}
463
464	if (write(fd, &buffer, count) < 0)
465		goto error;
466
467	/* TODO size the arc table */
468
469	(void) close(fd);
470
471	return;
472
473 error:
474	err(EX_OSERR, "ERROR: Cannot write \"%s\"", pathname);
475}
476
477/*
478 * Determine the full pathname of a gmon.out file for a given
479 * (image,pmcid) combination.  Return the interned string.
480 */
481
482pmcstat_interned_string
483pmcstat_gmon_create_name(const char *samplesdir, struct pmcstat_image *image,
484    pmc_id_t pmcid)
485{
486	const char *pmcname;
487	char fullpath[PATH_MAX];
488
489	pmcname = pmcstat_pmcid_to_name(pmcid);
490
491	(void) snprintf(fullpath, sizeof(fullpath),
492	    "%s/%s/%s", samplesdir, pmcname,
493	    pmcstat_string_unintern(image->pi_samplename));
494
495	return (pmcstat_string_intern(fullpath));
496}
497
498
499/*
500 * Mmap in a gmon.out file for processing.
501 */
502
503static void
504pmcstat_gmon_map_file(struct pmcstat_gmonfile *pgf)
505{
506	int fd;
507	const char *pathname;
508
509	pathname = pmcstat_string_unintern(pgf->pgf_name);
510
511	/* the gmon.out file must already exist */
512	if ((fd = open(pathname, O_RDWR | O_NOFOLLOW, 0)) < 0)
513		err(EX_OSERR, "ERROR: cannot open \"%s\"", pathname);
514
515	pgf->pgf_gmondata = mmap(NULL, pgf->pgf_ndatabytes,
516	    PROT_READ|PROT_WRITE, MAP_NOSYNC|MAP_SHARED, fd, 0);
517
518	if (pgf->pgf_gmondata == MAP_FAILED)
519		err(EX_OSERR, "ERROR: cannot map \"%s\"", pathname);
520
521	(void) close(fd);
522}
523
524/*
525 * Unmap a gmon.out file after sync'ing its data to disk.
526 */
527
528static void
529pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *pgf)
530{
531	(void) msync(pgf->pgf_gmondata, pgf->pgf_ndatabytes,
532	    MS_SYNC);
533	(void) munmap(pgf->pgf_gmondata, pgf->pgf_ndatabytes);
534	pgf->pgf_gmondata = NULL;
535}
536
537/*
538 * Determine whether a given executable image is an A.OUT object, and
539 * if so, fill in its parameters from the text file.
540 * Sets image->pi_type.
541 */
542
543static void
544pmcstat_image_get_aout_params(struct pmcstat_image *image,
545    struct pmcstat_args *a)
546{
547	int fd;
548	ssize_t nbytes;
549	struct exec ex;
550	const char *path;
551	char buffer[PATH_MAX];
552
553	path = pmcstat_string_unintern(image->pi_execpath);
554	assert(path != NULL);
555
556	if (image->pi_iskernelmodule)
557		errx(EX_SOFTWARE, "ERROR: a.out kernel modules are "
558		    "unsupported \"%s\"", path);
559
560	(void) snprintf(buffer, sizeof(buffer), "%s%s",
561	    a->pa_fsroot, path);
562
563	if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
564	    (nbytes = read(fd, &ex, sizeof(ex))) < 0) {
565		warn("WARNING: Cannot determine type of \"%s\"", path);
566		image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
567		if (fd != -1)
568			(void) close(fd);
569		return;
570	}
571
572	(void) close(fd);
573
574	if ((unsigned) nbytes != sizeof(ex) ||
575	    N_BADMAG(ex))
576		return;
577
578	image->pi_type = PMCSTAT_IMAGE_AOUT;
579
580	/* TODO: the rest of a.out processing */
581
582	return;
583}
584
585/*
586 * Examine an ELF file to determine the size of its text segment.
587 * Sets image->pi_type if anything conclusive can be determined about
588 * this image.
589 */
590
591static void
592pmcstat_image_get_elf_params(struct pmcstat_image *image,
593    struct pmcstat_args *a)
594{
595	int fd, i;
596	const char *path;
597	void *mapbase;
598	uintfptr_t minva, maxva;
599	const Elf_Ehdr *h;
600	const Elf_Phdr *ph;
601	const Elf_Shdr *sh;
602#if	defined(__amd64__)
603	const Elf32_Ehdr *h32;
604	const Elf32_Phdr *ph32;
605	const Elf32_Shdr *sh32;
606#endif
607	enum pmcstat_image_type image_type;
608	struct stat st;
609	char buffer[PATH_MAX];
610
611	assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
612
613	minva = ~(uintfptr_t) 0;
614	maxva = (uintfptr_t) 0;
615	path = pmcstat_string_unintern(image->pi_execpath);
616
617	assert(path != NULL);
618
619	/*
620	 * Look for kernel modules under FSROOT/KERNELPATH/NAME,
621	 * and user mode executable objects under FSROOT/PATHNAME.
622	 */
623	if (image->pi_iskernelmodule)
624		(void) snprintf(buffer, sizeof(buffer), "%s%s/%s",
625		    a->pa_fsroot, a->pa_kernel, path);
626	else
627		(void) snprintf(buffer, sizeof(buffer), "%s%s",
628		    a->pa_fsroot, path);
629
630	if ((fd = open(buffer, O_RDONLY, 0)) < 0 ||
631	    fstat(fd, &st) < 0 ||
632	    (mapbase = mmap(0, st.st_size, PROT_READ, MAP_SHARED,
633		fd, 0)) == MAP_FAILED) {
634		warn("WARNING: Cannot determine type of \"%s\"", buffer);
635		image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
636		if (fd != -1)
637			(void) close(fd);
638		return;
639	}
640
641	(void) close(fd);
642
643	/* Punt on non-ELF objects */
644	h = (const Elf_Ehdr *) mapbase;
645	if (!IS_ELF(*h))
646		return;
647
648	/*
649	 * We only handle executable ELF objects and kernel
650	 * modules.
651	 */
652	if (h->e_type != ET_EXEC && h->e_type != ET_DYN &&
653	    !(image->pi_iskernelmodule && h->e_type == ET_REL))
654		return;
655
656	image->pi_isdynamic = 0;
657	image->pi_dynlinkerpath = NULL;
658	image->pi_vaddr = 0;
659
660#define	GET_VA(H, SH, MINVA, MAXVA) do {				\
661		for (i = 0; i < (H)->e_shnum; i++)			\
662			if ((SH)[i].sh_flags & SHF_EXECINSTR) {		\
663				(MINVA) = min((MINVA),(SH)[i].sh_addr);	\
664				(MAXVA) = max((MAXVA),(SH)[i].sh_addr +	\
665				    (SH)[i].sh_size);			\
666			}						\
667	} while (0)
668
669
670#define	GET_PHDR_INFO(H, PH, IMAGE) do {				\
671		for (i = 0; i < (H)->e_phnum; i++) {			\
672			switch ((PH)[i].p_type) {			\
673			case PT_DYNAMIC:				\
674				image->pi_isdynamic = 1;		\
675				break;					\
676			case PT_INTERP:					\
677				image->pi_dynlinkerpath =		\
678				    pmcstat_string_intern(		\
679					(char *) mapbase +		\
680					(PH)[i].p_offset);		\
681				break;					\
682			case PT_LOAD:					\
683				if ((PH)[i].p_offset == 0)		\
684				    image->pi_vaddr = 			\
685					(PH)[i].p_vaddr;		\
686				break;					\
687			}						\
688		}							\
689	} while (0)
690
691	switch (h->e_machine) {
692	case EM_386:
693	case EM_486:
694#if	defined(__amd64__)
695		/* a 32 bit executable */
696		h32 = (const Elf32_Ehdr *) h;
697		sh32 = (const Elf32_Shdr *)((uintptr_t) mapbase + h32->e_shoff);
698
699		GET_VA(h32, sh32, minva, maxva);
700
701		image->pi_entry = h32->e_entry;
702
703		if (h32->e_type == ET_EXEC) {
704			ph32 = (const Elf32_Phdr *)((uintptr_t) mapbase +
705			    h32->e_phoff);
706			GET_PHDR_INFO(h32, ph32, image);
707		}
708		image_type = PMCSTAT_IMAGE_ELF32;
709		break;
710#endif
711	default:
712		sh = (const Elf_Shdr *)((uintptr_t) mapbase + h->e_shoff);
713
714		GET_VA(h, sh, minva, maxva);
715
716		image->pi_entry = h->e_entry;
717
718		if (h->e_type == ET_EXEC) {
719			ph = (const Elf_Phdr *)((uintptr_t) mapbase +
720			    h->e_phoff);
721			GET_PHDR_INFO(h, ph, image);
722		}
723		image_type = PMCSTAT_IMAGE_ELF64;
724		break;
725	}
726
727#undef	GET_PHDR_INFO
728#undef	GET_VA
729
730	image->pi_start = minva;
731	image->pi_end   = maxva;
732	image->pi_type  = image_type;
733
734	if (munmap(mapbase, st.st_size) < 0)
735		err(EX_OSERR, "ERROR: Cannot unmap \"%s\"", path);
736	return;
737}
738
739/*
740 * Given an image descriptor, determine whether it is an ELF, or AOUT.
741 * If no handler claims the image, set its type to 'INDETERMINABLE'.
742 */
743
744static void
745pmcstat_image_determine_type(struct pmcstat_image *image,
746    struct pmcstat_args *a)
747{
748	assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN);
749
750	/* Try each kind of handler in turn */
751	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
752		pmcstat_image_get_elf_params(image, a);
753	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
754		pmcstat_image_get_aout_params(image, a);
755
756	/*
757	 * Otherwise, remember that we tried to determine
758	 * the object's type and had failed.
759	 */
760	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
761		image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE;
762}
763
764/*
765 * Locate an image descriptor given an interned path, adding a fresh
766 * descriptor to the cache if necessary.  This function also finds a
767 * suitable name for this image's sample file.
768 *
769 * We defer filling in the file format specific parts of the image
770 * structure till the time we actually see a sample that would fall
771 * into this image.
772 */
773
774static struct pmcstat_image *
775pmcstat_image_from_path(pmcstat_interned_string internedpath,
776    int iskernelmodule)
777{
778	int count, hash, nlen;
779	struct pmcstat_image *pi;
780	char *sn;
781	char name[NAME_MAX];
782
783	hash = pmcstat_string_lookup_hash(internedpath);
784
785	/* First, look for an existing entry. */
786	LIST_FOREACH(pi, &pmcstat_image_hash[hash], pi_next)
787	    if (pi->pi_execpath == internedpath &&
788		  pi->pi_iskernelmodule == iskernelmodule) {
789		    /* move descriptor to the head of the lru list */
790		    TAILQ_REMOVE(&pmcstat_image_lru, pi, pi_lru);
791		    TAILQ_INSERT_HEAD(&pmcstat_image_lru, pi, pi_lru);
792		    return (pi);
793	    }
794
795	/*
796	 * Allocate a new entry and place at the head of the hash and
797	 * LRU lists.
798	 */
799	pi = malloc(sizeof(*pi));
800	if (pi == NULL)
801		return (NULL);
802
803	pi->pi_type = PMCSTAT_IMAGE_UNKNOWN;
804	pi->pi_execpath = internedpath;
805	pi->pi_start = ~0;
806	pi->pi_entry = ~0;
807	pi->pi_end = 0;
808	pi->pi_iskernelmodule = iskernelmodule;
809
810	/*
811	 * Look for a suitable name for the sample files associated
812	 * with this image: if `basename(path)`+".gmon" is available,
813	 * we use that, otherwise we try iterating through
814	 * `basename(path)`+ "~" + NNN + ".gmon" till we get a free
815	 * entry.
816	 */
817	if ((sn = basename(pmcstat_string_unintern(internedpath))) == NULL)
818		err(EX_OSERR, "ERROR: Cannot process \"%s\"",
819		    pmcstat_string_unintern(internedpath));
820
821	nlen = strlen(sn);
822	nlen = min(nlen, (int) (sizeof(name) - sizeof(".gmon")));
823
824	snprintf(name, sizeof(name), "%.*s.gmon", nlen, sn);
825
826	/* try use the unabridged name first */
827	if (pmcstat_string_lookup(name) == NULL)
828		pi->pi_samplename = pmcstat_string_intern(name);
829	else {
830		/*
831		 * Otherwise use a prefix from the original name and
832		 * upto 3 digits.
833		 */
834		nlen = strlen(sn);
835		nlen = min(nlen, (int) (sizeof(name)-sizeof("~NNN.gmon")));
836		count = 0;
837		do {
838			if (++count > 999)
839				errx(EX_CANTCREAT, "ERROR: cannot create a gmon "
840				    "file for \"%s\"", name);
841			snprintf(name, sizeof(name), "%.*s~%3.3d.gmon",
842			    nlen, sn, count);
843			if (pmcstat_string_lookup(name) == NULL) {
844				pi->pi_samplename = pmcstat_string_intern(name);
845				count = 0;
846			}
847		} while (count > 0);
848	}
849
850
851	LIST_INIT(&pi->pi_gmlist);
852
853	LIST_INSERT_HEAD(&pmcstat_image_hash[hash], pi, pi_next);
854	TAILQ_INSERT_HEAD(&pmcstat_image_lru, pi, pi_lru);
855
856	return (pi);
857}
858
859/*
860 * Increment the bucket in the gmon.out file corresponding to 'pmcid'
861 * and 'pc'.
862 */
863
864static void
865pmcstat_image_increment_bucket(struct pmcstat_pcmap *map, uintfptr_t pc,
866    pmc_id_t pmcid, struct pmcstat_args *a)
867{
868	struct pmcstat_image *image;
869	struct pmcstat_gmonfile *pgf;
870	uintfptr_t bucket;
871	HISTCOUNTER *hc;
872
873	assert(pc >= map->ppm_lowpc && pc < map->ppm_highpc);
874
875	image = map->ppm_image;
876
877	/*
878	 * If this is the first time we are seeing a sample for
879	 * this executable image, try determine its parameters.
880	 */
881	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
882		pmcstat_image_determine_type(image, a);
883
884	assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN);
885
886	/* Ignore samples in images that we know nothing about. */
887	if (image->pi_type == PMCSTAT_IMAGE_INDETERMINABLE) {
888		pmcstat_stats.ps_samples_indeterminable++;
889		return;
890	}
891
892	/*
893	 * Find the gmon file corresponding to 'pmcid', creating it if
894	 * needed.
895	 */
896	LIST_FOREACH(pgf, &image->pi_gmlist, pgf_next)
897	    if (pgf->pgf_pmcid == pmcid)
898		    break;
899
900	/* If we don't have a gmon.out file for this PMCid, create one */
901	if (pgf == NULL) {
902		if ((pgf = calloc(1, sizeof(*pgf))) == NULL)
903			err(EX_OSERR, "ERROR:");
904
905		pgf->pgf_gmondata = NULL;	/* mark as unmapped */
906		pgf->pgf_name = pmcstat_gmon_create_name(a->pa_samplesdir,
907		    image, pmcid);
908		pgf->pgf_pmcid = pmcid;
909		assert(image->pi_end > image->pi_start);
910		pgf->pgf_nbuckets = (image->pi_end - image->pi_start) /
911		    FUNCTION_ALIGNMENT;	/* see <machine/profile.h> */
912		pgf->pgf_ndatabytes = sizeof(struct gmonhdr) +
913		    pgf->pgf_nbuckets * sizeof(HISTCOUNTER);
914		pgf->pgf_nsamples = 0;
915
916		pmcstat_gmon_create_file(pgf, image);
917
918		LIST_INSERT_HEAD(&image->pi_gmlist, pgf, pgf_next);
919	}
920
921	/*
922	 * Map the gmon file in if needed.  It may have been mapped
923	 * out under memory pressure.
924	 */
925	if (pgf->pgf_gmondata == NULL)
926		pmcstat_gmon_map_file(pgf);
927
928	assert(pgf->pgf_gmondata != NULL);
929
930	/*
931	 *
932	 */
933
934	bucket = (pc - map->ppm_lowpc) / FUNCTION_ALIGNMENT;
935
936	assert(bucket < pgf->pgf_nbuckets);
937
938	hc = (HISTCOUNTER *) ((uintptr_t) pgf->pgf_gmondata +
939	    sizeof(struct gmonhdr));
940
941	/* saturating add */
942	if (hc[bucket] < 0xFFFFU)  /* XXX tie this to sizeof(HISTCOUNTER) */
943		hc[bucket]++;
944	else /* mark that an overflow occurred */
945		pgf->pgf_overflow = 1;
946
947	pgf->pgf_nsamples++;
948}
949
950/*
951 * Record the fact that PC values from 'start' to 'end' come from
952 * image 'image'.
953 */
954
955static void
956pmcstat_image_link(struct pmcstat_process *pp, struct pmcstat_image *image,
957    uintfptr_t start)
958{
959	struct pmcstat_pcmap *pcm, *pcmnew;
960	uintfptr_t offset;
961
962	assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN &&
963	    image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE);
964
965	if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
966		err(EX_OSERR, "ERROR: Cannot create a map entry");
967
968	/*
969	 * Adjust the map entry to only cover the text portion
970	 * of the object.
971	 */
972
973	offset = start - image->pi_vaddr;
974	pcmnew->ppm_lowpc  = image->pi_start + offset;
975	pcmnew->ppm_highpc = image->pi_end + offset;
976	pcmnew->ppm_image  = image;
977
978	assert(pcmnew->ppm_lowpc < pcmnew->ppm_highpc);
979
980	/* Overlapped mmap()'s are assumed to never occur. */
981	TAILQ_FOREACH(pcm, &pp->pp_map, ppm_next)
982	    if (pcm->ppm_lowpc >= pcmnew->ppm_highpc)
983		    break;
984
985	if (pcm == NULL)
986		TAILQ_INSERT_TAIL(&pp->pp_map, pcmnew, ppm_next);
987	else
988		TAILQ_INSERT_BEFORE(pcm, pcmnew, ppm_next);
989}
990
991/*
992 * Unmap images in the range [start..end) associated with process
993 * 'pp'.
994 */
995
996static void
997pmcstat_image_unmap(struct pmcstat_process *pp, uintfptr_t start,
998    uintfptr_t end)
999{
1000	struct pmcstat_pcmap *pcm, *pcmtmp, *pcmnew;
1001
1002	assert(pp != NULL);
1003	assert(start < end);
1004
1005	/*
1006	 * Cases:
1007	 * - we could have the range completely in the middle of an
1008	 *   existing pcmap; in this case we have to split the pcmap
1009	 *   structure into two (i.e., generate a 'hole').
1010	 * - we could have the range covering multiple pcmaps; these
1011	 *   will have to be removed.
1012	 * - we could have either 'start' or 'end' falling in the
1013	 *   middle of a pcmap; in this case shorten the entry.
1014	 */
1015
1016	TAILQ_FOREACH_SAFE(pcm, &pp->pp_map, ppm_next, pcmtmp) {
1017		assert(pcm->ppm_lowpc < pcm->ppm_highpc);
1018		if (pcm->ppm_highpc <= start)
1019			continue;
1020		if (pcm->ppm_lowpc > end)
1021			return;
1022		if (pcm->ppm_lowpc >= start && pcm->ppm_highpc <= end) {
1023			/*
1024			 * The current pcmap is completely inside the
1025			 * unmapped range: remove it entirely.
1026			 */
1027			TAILQ_REMOVE(&pp->pp_map, pcm, ppm_next);
1028			free(pcm);
1029		} else if (pcm->ppm_lowpc < start && pcm->ppm_highpc > end) {
1030			/*
1031			 * Split this pcmap into two; curtail the
1032			 * current map to end at [start-1], and start
1033			 * the new one at [end].
1034			 */
1035			if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
1036				err(EX_OSERR, "ERROR: Cannot split a map "
1037				    "entry");
1038
1039			pcmnew->ppm_image = pcm->ppm_image;
1040
1041			pcmnew->ppm_lowpc = end;
1042			pcmnew->ppm_highpc = pcm->ppm_highpc;
1043
1044			pcm->ppm_highpc = start;
1045
1046			TAILQ_INSERT_AFTER(&pp->pp_map, pcm, pcmnew, ppm_next);
1047
1048			return;
1049		} else if (pcm->ppm_lowpc < start)
1050			pcm->ppm_lowpc = start;
1051		else if (pcm->ppm_highpc > end)
1052			pcm->ppm_highpc = end;
1053		else
1054			assert(0);
1055	}
1056}
1057
1058/*
1059 * Add a {pmcid,name} mapping.
1060 */
1061
1062static void
1063pmcstat_pmcid_add(pmc_id_t pmcid, pmcstat_interned_string ps,
1064    struct pmcstat_args *a)
1065{
1066	struct pmcstat_pmcrecord *pr;
1067	struct stat st;
1068	char fullpath[PATH_MAX];
1069
1070	LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
1071	    if (pr->pr_pmcid == pmcid) {
1072		    pr->pr_pmcname = ps;
1073		    return;
1074	    }
1075
1076	if ((pr = malloc(sizeof(*pr))) == NULL)
1077		err(EX_OSERR, "ERROR: Cannot allocate pmc record");
1078
1079	pr->pr_pmcid = pmcid;
1080	pr->pr_pmcname = ps;
1081	LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next);
1082
1083	(void) snprintf(fullpath, sizeof(fullpath), "%s/%s", a->pa_samplesdir,
1084	    pmcstat_string_unintern(ps));
1085
1086	/* If the path name exists, it should be a directory */
1087	if (stat(fullpath, &st) == 0 && S_ISDIR(st.st_mode))
1088		return;
1089
1090	if (mkdir(fullpath, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) < 0)
1091		err(EX_OSERR, "ERROR: Cannot create directory \"%s\"",
1092		    fullpath);
1093}
1094
1095/*
1096 * Given a pmcid in use, find its human-readable name.
1097 */
1098
1099static const char *
1100pmcstat_pmcid_to_name(pmc_id_t pmcid)
1101{
1102	struct pmcstat_pmcrecord *pr;
1103	char fullpath[PATH_MAX];
1104
1105	LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
1106	    if (pr->pr_pmcid == pmcid)
1107		    return (pmcstat_string_unintern(pr->pr_pmcname));
1108
1109	/* create a default name and add this entry */
1110	if ((pr = malloc(sizeof(*pr))) == NULL)
1111		err(EX_OSERR, "ERROR: ");
1112	pr->pr_pmcid = pmcid;
1113
1114	(void) snprintf(fullpath, sizeof(fullpath), "%X", (unsigned int) pmcid);
1115	pr->pr_pmcname = pmcstat_string_intern(fullpath);
1116
1117	LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next);
1118
1119	return (pmcstat_string_unintern(pr->pr_pmcname));
1120}
1121
1122/*
1123 * Associate an AOUT image with a process.
1124 */
1125
1126static void
1127pmcstat_process_aout_exec(struct pmcstat_process *pp,
1128    struct pmcstat_image *image, uintfptr_t entryaddr,
1129    struct pmcstat_args *a)
1130{
1131	(void) pp;
1132	(void) image;
1133	(void) entryaddr;
1134	(void) a;
1135	/* TODO Implement a.out handling */
1136}
1137
1138/*
1139 * Associate an ELF image with a process.
1140 */
1141
1142static void
1143pmcstat_process_elf_exec(struct pmcstat_process *pp,
1144    struct pmcstat_image *image, uintfptr_t entryaddr,
1145    struct pmcstat_args *a)
1146{
1147	uintmax_t libstart;
1148	struct pmcstat_image *rtldimage;
1149
1150	assert(image->pi_type == PMCSTAT_IMAGE_ELF32 ||
1151	    image->pi_type == PMCSTAT_IMAGE_ELF64);
1152
1153	/* Create a map entry for the base executable. */
1154	pmcstat_image_link(pp, image, image->pi_vaddr);
1155
1156	/*
1157	 * For dynamically linked executables we need to:
1158	 * (a) find where the dynamic linker was mapped to for this
1159	 *     process,
1160	 * (b) find all the executable objects that the dynamic linker
1161	 *     brought in.
1162	 */
1163
1164	if (image->pi_isdynamic) {
1165
1166		/*
1167		 * The runtime loader gets loaded just after the maximum
1168		 * possible heap address.  Like so:
1169		 *
1170		 * [  TEXT DATA BSS HEAP -->*RTLD  SHLIBS   <--STACK]
1171		 * ^					            ^
1172		 * 0				   VM_MAXUSER_ADDRESS
1173
1174		 *
1175		 * The exact address where the loader gets mapped in
1176		 * will vary according to the size of the executable
1177		 * and the limits on the size of the process'es data
1178		 * segment at the time of exec().  The entry address
1179		 * recorded at process exec time corresponds to the
1180		 * 'start' address inside the dynamic linker.  From
1181		 * this we can figure out the address where the
1182		 * runtime loader's file object had been mapped to.
1183		 */
1184		rtldimage = pmcstat_image_from_path(image->pi_dynlinkerpath,
1185		    0);
1186		if (rtldimage == NULL) {
1187			warnx("WARNING: Cannot find image for \"%s\".",
1188			    pmcstat_string_unintern(image->pi_dynlinkerpath));
1189			pmcstat_stats.ps_exec_errors++;
1190			return;
1191		}
1192
1193		if (rtldimage->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1194			pmcstat_image_get_elf_params(rtldimage, a);
1195
1196		if (rtldimage->pi_type != PMCSTAT_IMAGE_ELF32 &&
1197		    rtldimage->pi_type != PMCSTAT_IMAGE_ELF64) {
1198			warnx("WARNING: rtld not an ELF object \"%s\".",
1199			    pmcstat_string_unintern(image->pi_dynlinkerpath));
1200			return;
1201		}
1202
1203		libstart = entryaddr - rtldimage->pi_entry;
1204		pmcstat_image_link(pp, rtldimage, libstart);
1205	}
1206}
1207
1208/*
1209 * Find the process descriptor corresponding to a PID.  If 'allocate'
1210 * is zero, we return a NULL if a pid descriptor could not be found or
1211 * a process descriptor process.  If 'allocate' is non-zero, then we
1212 * will attempt to allocate a fresh process descriptor.  Zombie
1213 * process descriptors are only removed if a fresh allocation for the
1214 * same PID is requested.
1215 */
1216
1217static struct pmcstat_process *
1218pmcstat_process_lookup(pid_t pid, int allocate)
1219{
1220	uint32_t hash;
1221	struct pmcstat_pcmap *ppm, *ppmtmp;
1222	struct pmcstat_process *pp, *pptmp;
1223
1224	hash = (uint32_t) pid & PMCSTAT_HASH_MASK;	/* simplicity wins */
1225
1226	LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[hash], pp_next, pptmp)
1227	    if (pp->pp_pid == pid) {
1228		    /* Found a descriptor, check and process zombies */
1229		    if (allocate && pp->pp_isactive == 0) {
1230			    /* remove maps */
1231			    TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next,
1232				ppmtmp) {
1233				    TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next);
1234				    free(ppm);
1235			    }
1236			    /* remove process entry */
1237			    LIST_REMOVE(pp, pp_next);
1238			    free(pp);
1239			    break;
1240		    }
1241		    return (pp);
1242	    }
1243
1244	if (!allocate)
1245		return (NULL);
1246
1247	if ((pp = malloc(sizeof(*pp))) == NULL)
1248		err(EX_OSERR, "ERROR: Cannot allocate pid descriptor");
1249
1250	pp->pp_pid = pid;
1251	pp->pp_isactive = 1;
1252
1253	TAILQ_INIT(&pp->pp_map);
1254
1255	LIST_INSERT_HEAD(&pmcstat_process_hash[hash], pp, pp_next);
1256	return (pp);
1257}
1258
1259/*
1260 * Associate an image and a process.
1261 */
1262
1263static void
1264pmcstat_process_exec(struct pmcstat_process *pp,
1265    pmcstat_interned_string path, uintfptr_t entryaddr,
1266    struct pmcstat_args *a)
1267{
1268	struct pmcstat_image *image;
1269
1270	if ((image = pmcstat_image_from_path(path, 0)) == NULL) {
1271		pmcstat_stats.ps_exec_errors++;
1272		return;
1273	}
1274
1275	if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1276		pmcstat_image_determine_type(image, a);
1277
1278	assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN);
1279
1280	switch (image->pi_type) {
1281	case PMCSTAT_IMAGE_ELF32:
1282	case PMCSTAT_IMAGE_ELF64:
1283		pmcstat_stats.ps_exec_elf++;
1284		pmcstat_process_elf_exec(pp, image, entryaddr, a);
1285		break;
1286
1287	case PMCSTAT_IMAGE_AOUT:
1288		pmcstat_stats.ps_exec_aout++;
1289		pmcstat_process_aout_exec(pp, image, entryaddr, a);
1290		break;
1291
1292	case PMCSTAT_IMAGE_INDETERMINABLE:
1293		pmcstat_stats.ps_exec_indeterminable++;
1294		break;
1295
1296	default:
1297		err(EX_SOFTWARE, "ERROR: Unsupported executable type for "
1298		    "\"%s\"", pmcstat_string_unintern(path));
1299	}
1300}
1301
1302
1303/*
1304 * Find the map entry associated with process 'p' at PC value 'pc'.
1305 */
1306
1307static struct pmcstat_pcmap *
1308pmcstat_process_find_map(struct pmcstat_process *p, uintfptr_t pc)
1309{
1310	struct pmcstat_pcmap *ppm;
1311
1312	TAILQ_FOREACH(ppm, &p->pp_map, ppm_next) {
1313		if (pc >= ppm->ppm_lowpc && pc < ppm->ppm_highpc)
1314			return (ppm);
1315		if (pc < ppm->ppm_lowpc)
1316			return (NULL);
1317	}
1318
1319	return (NULL);
1320}
1321
1322
1323
1324static int
1325pmcstat_convert_log(struct pmcstat_args *a)
1326{
1327	uintfptr_t pc;
1328	pid_t pid;
1329	struct pmcstat_image *image;
1330	struct pmcstat_process *pp, *ppnew;
1331	struct pmcstat_pcmap *ppm, *ppmtmp;
1332	struct pmclog_ev ev;
1333	pmcstat_interned_string image_path;
1334
1335	while (pmclog_read(a->pa_logparser, &ev) == 0) {
1336		assert(ev.pl_state == PMCLOG_OK);
1337
1338		switch (ev.pl_type) {
1339		case PMCLOG_TYPE_INITIALIZE:
1340			if ((ev.pl_u.pl_i.pl_version & 0xFF000000) !=
1341			    PMC_VERSION_MAJOR << 24 && a->pa_verbosity > 0)
1342				warnx("WARNING: Log version 0x%x does not "
1343				    "match compiled version 0x%x.",
1344				    ev.pl_u.pl_i.pl_version,
1345				    PMC_VERSION_MAJOR);
1346			break;
1347		case PMCLOG_TYPE_MAP_IN:
1348			/*
1349			 * Introduce an address range mapping for a
1350			 * userland process or the kernel (pid == -1).
1351			 *
1352			 * We always allocate a process descriptor so
1353			 * that subsequent samples seen for this
1354			 * address range are mapped to the current
1355			 * object being mapped in.
1356			 */
1357			pid = ev.pl_u.pl_mi.pl_pid;
1358			if (pid == -1)
1359				pp = pmcstat_kernproc;
1360			else
1361				pp = pmcstat_process_lookup(pid,
1362				    PMCSTAT_ALLOCATE);
1363
1364			assert(pp != NULL);
1365
1366			image_path = pmcstat_string_intern(ev.pl_u.pl_mi.
1367			    pl_pathname);
1368			image = pmcstat_image_from_path(image_path, pid == -1);
1369			if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
1370				pmcstat_image_determine_type(image, a);
1371			if (image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE)
1372				pmcstat_image_link(pp, image,
1373				    ev.pl_u.pl_mi.pl_start);
1374			break;
1375
1376		case PMCLOG_TYPE_MAP_OUT:
1377			/*
1378			 * Remove an address map.
1379			 */
1380			pid = ev.pl_u.pl_mo.pl_pid;
1381			if (pid == -1)
1382				pp = pmcstat_kernproc;
1383			else
1384				pp = pmcstat_process_lookup(pid, 0);
1385
1386			if (pp == NULL)	/* unknown process */
1387				break;
1388
1389			pmcstat_image_unmap(pp, ev.pl_u.pl_mo.pl_start,
1390			    ev.pl_u.pl_mo.pl_end);
1391			break;
1392
1393		case PMCLOG_TYPE_PCSAMPLE:
1394
1395			/*
1396			 * We bring in the gmon file for the image
1397			 * currently associated with the PMC & pid
1398			 * pair and increment the appropriate entry
1399			 * bin inside this.
1400			 */
1401			pmcstat_stats.ps_samples_total++;
1402
1403			pc = ev.pl_u.pl_s.pl_pc;
1404			pp = pmcstat_process_lookup(ev.pl_u.pl_s.pl_pid,
1405			    PMCSTAT_ALLOCATE);
1406			if ((ppm = pmcstat_process_find_map(pp, pc)) == NULL &&
1407			    (ppm = pmcstat_process_find_map(pmcstat_kernproc,
1408				pc)) == NULL) {	/* unknown process,offset pair */
1409				pmcstat_stats.ps_samples_unknown_offset++;
1410				break;
1411			}
1412
1413			pmcstat_image_increment_bucket(ppm, pc,
1414			    ev.pl_u.pl_s.pl_pmcid, a);
1415
1416			break;
1417
1418		case PMCLOG_TYPE_PMCALLOCATE:
1419			/*
1420			 * Record the association pmc id between this
1421			 * PMC and its name.
1422			 */
1423			pmcstat_pmcid_add(ev.pl_u.pl_a.pl_pmcid,
1424			    pmcstat_string_intern(ev.pl_u.pl_a.pl_evname), a);
1425			break;
1426
1427		case PMCLOG_TYPE_PROCEXEC:
1428
1429			/*
1430			 * Change the executable image associated with
1431			 * a process.
1432			 */
1433			pp = pmcstat_process_lookup(ev.pl_u.pl_x.pl_pid,
1434			    PMCSTAT_ALLOCATE);
1435
1436			/* delete the current process map */
1437			TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, ppmtmp) {
1438				TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next);
1439				free(ppm);
1440			}
1441
1442			/* associate this process  image */
1443			image_path = pmcstat_string_intern(
1444				ev.pl_u.pl_x.pl_pathname);
1445			assert(image_path != NULL);
1446			pmcstat_process_exec(pp, image_path,
1447			    ev.pl_u.pl_x.pl_entryaddr, a);
1448			break;
1449
1450		case PMCLOG_TYPE_PROCEXIT:
1451
1452			/*
1453			 * Due to the way the log is generated, the
1454			 * last few samples corresponding to a process
1455			 * may appear in the log after the process
1456			 * exit event is recorded.  Thus we keep the
1457			 * process' descriptor and associated data
1458			 * structures around, but mark the process as
1459			 * having exited.
1460			 */
1461			pp = pmcstat_process_lookup(ev.pl_u.pl_e.pl_pid, 0);
1462			if (pp == NULL)
1463				break;
1464			pp->pp_isactive = 0;	/* mark as a zombie */
1465			break;
1466
1467		case PMCLOG_TYPE_SYSEXIT:
1468			pp = pmcstat_process_lookup(ev.pl_u.pl_se.pl_pid, 0);
1469			if (pp == NULL)
1470				break;
1471			pp->pp_isactive = 0;	/* make a zombie */
1472			break;
1473
1474		case PMCLOG_TYPE_PROCFORK:
1475
1476			/*
1477			 * Allocate a process descriptor for the new
1478			 * (child) process.
1479			 */
1480			ppnew =
1481			    pmcstat_process_lookup(ev.pl_u.pl_f.pl_newpid,
1482				PMCSTAT_ALLOCATE);
1483
1484			/*
1485			 * If we had been tracking the parent, clone
1486			 * its address maps.
1487			 */
1488			pp = pmcstat_process_lookup(ev.pl_u.pl_f.pl_oldpid, 0);
1489			if (pp == NULL)
1490				break;
1491			TAILQ_FOREACH(ppm, &pp->pp_map, ppm_next)
1492			    pmcstat_image_link(ppnew, ppm->ppm_image,
1493				ppm->ppm_lowpc);
1494			break;
1495
1496		default:	/* other types of entries are not relevant */
1497			break;
1498		}
1499	}
1500
1501	if (ev.pl_state == PMCLOG_EOF)
1502		return (PMCSTAT_FINISHED);
1503	else if (ev.pl_state == PMCLOG_REQUIRE_DATA)
1504		return (PMCSTAT_RUNNING);
1505
1506	err(EX_DATAERR, "ERROR: event parsing failed (record %jd, "
1507	    "offset 0x%jx)", (uintmax_t) ev.pl_count + 1, ev.pl_offset);
1508}
1509
1510/*
1511 * Print log entries as text.
1512 */
1513
1514static int
1515pmcstat_print_log(struct pmcstat_args *a)
1516{
1517	struct pmclog_ev ev;
1518
1519	while (pmclog_read(a->pa_logparser, &ev) == 0) {
1520		assert(ev.pl_state == PMCLOG_OK);
1521		switch (ev.pl_type) {
1522		case PMCLOG_TYPE_CLOSELOG:
1523			PMCSTAT_PRINT_ENTRY(a,"closelog",);
1524			break;
1525		case PMCLOG_TYPE_DROPNOTIFY:
1526			PMCSTAT_PRINT_ENTRY(a,"drop",);
1527			break;
1528		case PMCLOG_TYPE_INITIALIZE:
1529			PMCSTAT_PRINT_ENTRY(a,"initlog","0x%x \"%s\"",
1530			    ev.pl_u.pl_i.pl_version,
1531			    pmc_name_of_cputype(ev.pl_u.pl_i.pl_arch));
1532			break;
1533		case PMCLOG_TYPE_MAP_IN:
1534			PMCSTAT_PRINT_ENTRY(a,"map-in","%d %p \"%s\"",
1535			    ev.pl_u.pl_mi.pl_pid,
1536			    (void *) ev.pl_u.pl_mi.pl_start,
1537			    ev.pl_u.pl_mi.pl_pathname);
1538			break;
1539		case PMCLOG_TYPE_MAP_OUT:
1540			PMCSTAT_PRINT_ENTRY(a,"map-out","%d %p %p",
1541			    ev.pl_u.pl_mo.pl_pid,
1542			    (void *) ev.pl_u.pl_mo.pl_start,
1543			    (void *) ev.pl_u.pl_mo.pl_end);
1544			break;
1545		case PMCLOG_TYPE_PCSAMPLE:
1546			PMCSTAT_PRINT_ENTRY(a,"sample","0x%x %d %p %c",
1547			    ev.pl_u.pl_s.pl_pmcid,
1548			    ev.pl_u.pl_s.pl_pid,
1549			    (void *) ev.pl_u.pl_s.pl_pc,
1550			    ev.pl_u.pl_s.pl_usermode ? 'u' : 's');
1551			break;
1552		case PMCLOG_TYPE_PMCALLOCATE:
1553			PMCSTAT_PRINT_ENTRY(a,"allocate","0x%x \"%s\" 0x%x",
1554			    ev.pl_u.pl_a.pl_pmcid,
1555			    ev.pl_u.pl_a.pl_evname,
1556			    ev.pl_u.pl_a.pl_flags);
1557			break;
1558		case PMCLOG_TYPE_PMCATTACH:
1559			PMCSTAT_PRINT_ENTRY(a,"attach","0x%x %d \"%s\"",
1560			    ev.pl_u.pl_t.pl_pmcid,
1561			    ev.pl_u.pl_t.pl_pid,
1562			    ev.pl_u.pl_t.pl_pathname);
1563			break;
1564		case PMCLOG_TYPE_PMCDETACH:
1565			PMCSTAT_PRINT_ENTRY(a,"detach","0x%x %d",
1566			    ev.pl_u.pl_d.pl_pmcid,
1567			    ev.pl_u.pl_d.pl_pid);
1568			break;
1569		case PMCLOG_TYPE_PROCCSW:
1570			PMCSTAT_PRINT_ENTRY(a,"cswval","0x%x %d %jd",
1571			    ev.pl_u.pl_c.pl_pmcid,
1572			    ev.pl_u.pl_c.pl_pid,
1573			    ev.pl_u.pl_c.pl_value);
1574			break;
1575		case PMCLOG_TYPE_PROCEXEC:
1576			PMCSTAT_PRINT_ENTRY(a,"exec","0x%x %d %p \"%s\"",
1577			    ev.pl_u.pl_x.pl_pmcid,
1578			    ev.pl_u.pl_x.pl_pid,
1579			    (void *) ev.pl_u.pl_x.pl_entryaddr,
1580			    ev.pl_u.pl_x.pl_pathname);
1581			break;
1582		case PMCLOG_TYPE_PROCEXIT:
1583			PMCSTAT_PRINT_ENTRY(a,"exitval","0x%x %d %jd",
1584			    ev.pl_u.pl_e.pl_pmcid,
1585			    ev.pl_u.pl_e.pl_pid,
1586			    ev.pl_u.pl_e.pl_value);
1587			break;
1588		case PMCLOG_TYPE_PROCFORK:
1589			PMCSTAT_PRINT_ENTRY(a,"fork","%d %d",
1590			    ev.pl_u.pl_f.pl_oldpid,
1591			    ev.pl_u.pl_f.pl_newpid);
1592			break;
1593		case PMCLOG_TYPE_USERDATA:
1594			PMCSTAT_PRINT_ENTRY(a,"userdata","0x%x",
1595			    ev.pl_u.pl_u.pl_userdata);
1596			break;
1597		case PMCLOG_TYPE_SYSEXIT:
1598			PMCSTAT_PRINT_ENTRY(a,"exit","%d",
1599			    ev.pl_u.pl_se.pl_pid);
1600			break;
1601		default:
1602			fprintf(a->pa_printfile, "unknown %d",
1603			    ev.pl_type);
1604		}
1605	}
1606
1607	if (ev.pl_state == PMCLOG_EOF)
1608		return (PMCSTAT_FINISHED);
1609	else if (ev.pl_state ==  PMCLOG_REQUIRE_DATA)
1610		return (PMCSTAT_RUNNING);
1611
1612	err(EX_DATAERR, "ERROR: event parsing failed "
1613	    "(record %jd, offset 0x%jx)",
1614	    (uintmax_t) ev.pl_count + 1, ev.pl_offset);
1615	/*NOTREACHED*/
1616}
1617
1618/*
1619 * Public Interfaces.
1620 */
1621
1622/*
1623 * Close a logfile, after first flushing all in-module queued data.
1624 */
1625
1626int
1627pmcstat_close_log(struct pmcstat_args *a)
1628{
1629	if (pmc_flush_logfile() < 0 ||
1630	    pmc_configure_logfile(-1) < 0)
1631		err(EX_OSERR, "ERROR: logging failed");
1632	a->pa_flags &= ~(FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE);
1633	return (a->pa_flags & FLAG_HAS_PIPE ? PMCSTAT_EXITING :
1634	    PMCSTAT_FINISHED);
1635}
1636
1637
1638
1639/*
1640 * Open a log file, for reading or writing.
1641 *
1642 * The function returns the fd of a successfully opened log or -1 in
1643 * case of failure.
1644 */
1645
1646int
1647pmcstat_open_log(const char *path, int mode)
1648{
1649	int error, fd;
1650	size_t hlen;
1651	const char *p, *errstr;
1652	struct addrinfo hints, *res, *res0;
1653	char hostname[MAXHOSTNAMELEN];
1654
1655	errstr = NULL;
1656	fd = -1;
1657
1658	/*
1659	 * If 'path' is "-" then open one of stdin or stdout depending
1660	 * on the value of 'mode'.
1661	 *
1662	 * If 'path' contains a ':' and does not start with a '/' or '.',
1663	 * and is being opened for writing, treat it as a "host:port"
1664	 * specification and open a network socket.
1665	 *
1666	 * Otherwise, treat 'path' as a file name and open that.
1667	 */
1668	if (path[0] == '-' && path[1] == '\0')
1669		fd = (mode == PMCSTAT_OPEN_FOR_READ) ? 0 : 1;
1670	else if (mode == PMCSTAT_OPEN_FOR_WRITE && path[0] != '/' &&
1671	    path[0] != '.' && strchr(path, ':') != NULL) {
1672
1673		p = strrchr(path, ':');
1674		hlen = p - path;
1675		if (p == path || hlen >= sizeof(hostname)) {
1676			errstr = strerror(EINVAL);
1677			goto done;
1678		}
1679
1680		assert(hlen < sizeof(hostname));
1681		(void) strncpy(hostname, path, hlen);
1682		hostname[hlen] = '\0';
1683
1684		(void) memset(&hints, 0, sizeof(hints));
1685		hints.ai_family = AF_UNSPEC;
1686		hints.ai_socktype = SOCK_STREAM;
1687		if ((error = getaddrinfo(hostname, p+1, &hints, &res0)) != 0) {
1688			errstr = gai_strerror(error);
1689			goto done;
1690		}
1691
1692		fd = -1;
1693		for (res = res0; res; res = res->ai_next) {
1694			if ((fd = socket(res->ai_family, res->ai_socktype,
1695			    res->ai_protocol)) < 0) {
1696				errstr = strerror(errno);
1697				continue;
1698			}
1699			if (connect(fd, res->ai_addr, res->ai_addrlen) < 0) {
1700				errstr = strerror(errno);
1701				(void) close(fd);
1702				fd = -1;
1703				continue;
1704			}
1705			errstr = NULL;
1706			break;
1707		}
1708		freeaddrinfo(res0);
1709
1710	} else if ((fd = open(path, mode == PMCSTAT_OPEN_FOR_READ ?
1711		    O_RDONLY : (O_WRONLY|O_CREAT|O_TRUNC),
1712		    S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
1713			errstr = strerror(errno);
1714
1715  done:
1716	if (errstr)
1717		errx(EX_OSERR, "ERROR: Cannot open \"%s\" for %s: %s.", path,
1718		    (mode == PMCSTAT_OPEN_FOR_READ ? "reading" : "writing"),
1719		    errstr);
1720
1721	return (fd);
1722}
1723
1724/*
1725 * Process a log file in offline analysis mode.
1726 */
1727
1728int
1729pmcstat_process_log(struct pmcstat_args *a)
1730{
1731
1732	/*
1733	 * If gprof style profiles haven't been asked for, just print the
1734	 * log to the current output file.
1735	 */
1736	if (a->pa_flags & FLAG_DO_PRINT)
1737		return (pmcstat_print_log(a));
1738	else
1739		/* convert the log to gprof compatible profiles */
1740		return (pmcstat_convert_log(a));
1741}
1742
1743/*
1744 * Initialize module.
1745 */
1746
1747void
1748pmcstat_initialize_logging(struct pmcstat_args *a)
1749{
1750	int i;
1751
1752	(void) a;
1753
1754	/* use a convenient format for 'ldd' output */
1755	if (setenv("LD_TRACE_LOADED_OBJECTS_FMT1","%o \"%p\" %x\n",1) != 0)
1756		err(EX_OSERR, "ERROR: Cannot setenv");
1757
1758	/* Initialize hash tables */
1759	pmcstat_string_initialize();
1760	for (i = 0; i < PMCSTAT_NHASH; i++) {
1761		LIST_INIT(&pmcstat_image_hash[i]);
1762		LIST_INIT(&pmcstat_process_hash[i]);
1763	}
1764
1765	/*
1766	 * Create a fake 'process' entry for the kernel with pid -1.
1767	 * hwpmc(4) will subsequently inform us about where the kernel
1768	 * and any loaded kernel modules are mapped.
1769	 */
1770	if ((pmcstat_kernproc = pmcstat_process_lookup((pid_t) -1,
1771		 PMCSTAT_ALLOCATE)) == NULL)
1772		err(EX_OSERR, "ERROR: Cannot initialize logging");
1773}
1774
1775/*
1776 * Shutdown module.
1777 */
1778
1779void
1780pmcstat_shutdown_logging(struct pmcstat_args *a)
1781{
1782	int i;
1783	FILE *mf;
1784	struct pmcstat_gmonfile *pgf, *pgftmp;
1785	struct pmcstat_image *pi, *pitmp;
1786	struct pmcstat_process *pp, *pptmp;
1787
1788	/* determine where to send the map file */
1789	mf = NULL;
1790	if (a->pa_mapfilename != NULL)
1791		mf = (strcmp(a->pa_mapfilename, "-") == 0) ?
1792		    a->pa_printfile : fopen(a->pa_mapfilename, "w");
1793
1794	if (mf == NULL && a->pa_flags & FLAG_DO_GPROF &&
1795	    a->pa_verbosity >= 2)
1796		mf = a->pa_printfile;
1797
1798	if (mf)
1799		(void) fprintf(mf, "MAP:\n");
1800
1801	for (i = 0; i < PMCSTAT_NHASH; i++) {
1802		LIST_FOREACH_SAFE(pi, &pmcstat_image_hash[i], pi_next, pitmp) {
1803
1804			if (mf)
1805				(void) fprintf(mf, " \"%s\" => \"%s\"",
1806				    pmcstat_string_unintern(pi->pi_execpath),
1807				    pmcstat_string_unintern(pi->pi_samplename));
1808
1809			/* flush gmon.out data to disk */
1810			LIST_FOREACH_SAFE(pgf, &pi->pi_gmlist, pgf_next,
1811			    pgftmp) {
1812				pmcstat_gmon_unmap_file(pgf);
1813			    	LIST_REMOVE(pgf, pgf_next);
1814				if (mf)
1815					(void) fprintf(mf, " %s/%d",
1816					    pmcstat_pmcid_to_name(pgf->pgf_pmcid),
1817					    pgf->pgf_nsamples);
1818				if (pgf->pgf_overflow && a->pa_verbosity >= 1)
1819					warnx("WARNING: profile \"%s\" "
1820					    "overflowed.",
1821					    pmcstat_string_unintern(
1822					        pgf->pgf_name));
1823			    	free(pgf);
1824			}
1825
1826			if (mf)
1827				(void) fprintf(mf, "\n");
1828
1829			LIST_REMOVE(pi, pi_next);
1830			free(pi);
1831		}
1832		LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[i], pp_next,
1833		    pptmp) {
1834			LIST_REMOVE(pp, pp_next);
1835			free(pp);
1836		}
1837	}
1838
1839	pmcstat_string_shutdown();
1840
1841	/*
1842	 * Print errors unless -q was specified.  Print all statistics
1843	 * if verbosity > 1.
1844	 */
1845#define	PRINT(N,V,A) do {						\
1846		if (pmcstat_stats.ps_##V || (A)->pa_verbosity >= 2)	\
1847			(void) fprintf((A)->pa_printfile, " %-40s %d\n",\
1848			    N, pmcstat_stats.ps_##V);			\
1849	} while (0)
1850
1851	if (a->pa_verbosity >= 1 && a->pa_flags & FLAG_DO_GPROF) {
1852		(void) fprintf(a->pa_printfile, "CONVERSION STATISTICS:\n");
1853		PRINT("#exec/a.out", exec_aout, a);
1854		PRINT("#exec/elf", exec_elf, a);
1855		PRINT("#exec/unknown", exec_indeterminable, a);
1856		PRINT("#exec handling errors", exec_errors, a);
1857		PRINT("#samples/total", samples_total, a);
1858		PRINT("#samples/unclaimed", samples_unknown_offset, a);
1859		PRINT("#samples/unknown-object", samples_indeterminable, a);
1860	}
1861
1862	if (mf)
1863		(void) fclose(mf);
1864}
1865