1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2018, Matthew Macy
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/cpuset.h>
34#include <sys/event.h>
35#include <sys/queue.h>
36#include <sys/socket.h>
37#include <sys/stat.h>
38#include <sys/sysctl.h>
39#include <sys/time.h>
40#include <sys/ttycom.h>
41#include <sys/user.h>
42#include <sys/wait.h>
43
44#include <assert.h>
45#include <curses.h>
46#include <err.h>
47#include <errno.h>
48#include <fcntl.h>
49#include <getopt.h>
50#include <kvm.h>
51#include <libgen.h>
52#include <limits.h>
53#include <locale.h>
54#include <math.h>
55#include <pmc.h>
56#include <pmclog.h>
57#include <regex.h>
58#include <signal.h>
59#include <stdarg.h>
60#include <stdint.h>
61#include <stdio.h>
62#include <stdlib.h>
63#include <string.h>
64#include <sysexits.h>
65#include <unistd.h>
66
67#include <libpmcstat.h>
68#include "cmd_pmc.h"
69
70/*
71 * Return the frequency of the kernel's statistics clock.
72 */
73static int
74getstathz(void)
75{
76	int mib[2];
77	size_t size;
78	struct clockinfo clockrate;
79
80	mib[0] = CTL_KERN;
81	mib[1] = KERN_CLOCKRATE;
82	size = sizeof clockrate;
83	if (sysctl(mib, 2, &clockrate, &size, NULL, 0) == -1)
84		err(1, "sysctl kern.clockrate");
85	return clockrate.stathz;
86}
87
88#define STAT_MODE_NPMCS 6
89#define FIXED_MODE_NPMCS 2
90static struct timespec before_ts;
91#define CYCLES		0
92#define INST		1
93#define BR		2
94#define IAP_START	BR
95#define BR_MISS	3
96#define CACHE	4
97#define CACHE_MISS	5
98static const char *pmc_stat_mode_names[] = {
99	"cycles",
100	"instructions",
101	"branches",
102	"branch-misses",
103	"cache-references",
104	"cache-misses",
105};
106
107static int pmcstat_sockpair[NSOCKPAIRFD];
108
109static void __dead2
110usage(void)
111{
112	errx(EX_USAGE,
113	    "\t get basic stats from command line program\n"
114	    "\t -j <eventlist>, --events <eventlist> comma-delimited list of event specifiers\n"
115	    );
116}
117
118static void
119showtime(FILE *out, struct timespec *before, struct timespec *after,
120    struct rusage *ru)
121{
122	char decimal_point;
123	uint64_t real, user, sys;
124
125	(void)setlocale(LC_NUMERIC, "");
126	decimal_point = localeconv()->decimal_point[0];
127
128	after->tv_sec -= before->tv_sec;
129	after->tv_nsec -= before->tv_nsec;
130	if (after->tv_nsec < 0) {
131		after->tv_sec--;
132		after->tv_nsec += 1000000000;
133	}
134
135	real = (after->tv_sec * 1000000000 + after->tv_nsec) / 1000;
136	user = ru->ru_utime.tv_sec * 1000000 + ru->ru_utime.tv_usec;
137	sys = ru->ru_stime.tv_sec * 1000000 + ru->ru_stime.tv_usec;
138	fprintf(out, "%13jd%c%02ld  real\t\t\t#\t%2.02f%% cpu\n",
139	    (intmax_t)after->tv_sec, decimal_point,
140	    after->tv_nsec / 10000000, 100 * (double)(sys + user + 1) / (double)(real + 1));
141	fprintf(out, "%13jd%c%02ld  user\t\t\t#\t%2.2f%% cpu\n",
142	    (intmax_t)ru->ru_utime.tv_sec, decimal_point,
143	    ru->ru_utime.tv_usec / 10000, 100 * (double)(user + 1) / (double)(real + 1));
144	fprintf(out, "%13jd%c%02ld  sys\t\t\t#\t%2.02f%% cpu\n",
145	    (intmax_t)ru->ru_stime.tv_sec, decimal_point,
146	    ru->ru_stime.tv_usec / 10000, 100 * (double)(sys + 1) / (double)(real + 1));
147}
148
149static const char *stat_mode_cntrs[STAT_MODE_NPMCS];
150static const char *stat_mode_names[STAT_MODE_NPMCS];
151
152static void
153pmc_stat_setup_stat(int system_mode, const char *arg)
154{
155	const char *new_cntrs[STAT_MODE_NPMCS];
156	static const char **pmc_stat_mode_cntrs;
157	struct pmcstat_ev *ev;
158	char *counters, *counter;
159	int i, c, start, newcnt;
160	cpuset_t cpumask, rootmask;
161
162	if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
163	    sizeof(rootmask), &rootmask) == -1)
164		err(EX_OSERR, "ERROR: Cannot determine the root set of CPUs");
165	CPU_COPY(&rootmask, &cpumask);
166
167	if (pmc_pmu_stat_mode(&pmc_stat_mode_cntrs) != 0)
168		errx(EX_USAGE, "ERROR: hwmpc.ko not loaded or stat not supported on host.");
169	if (system_mode && geteuid() != 0)
170		errx(EX_USAGE, "ERROR: system mode counters can only be used as root");
171	counters = NULL;
172	for (i = 0; i < STAT_MODE_NPMCS; i++) {
173		stat_mode_cntrs[i] = pmc_stat_mode_cntrs[i];
174		stat_mode_names[i] = pmc_stat_mode_names[i];
175	}
176	if (arg) {
177		counters = strdup(arg);
178		newcnt = 0;
179		while ((counter = strsep(&counters, ",")) != NULL &&
180		    newcnt < STAT_MODE_NPMCS - IAP_START) {
181			new_cntrs[newcnt++] = counter;
182			if (pmc_pmu_sample_rate_get(counter) == DEFAULT_SAMPLE_COUNT)
183				errx(EX_USAGE, "ERROR: %s not recognized on host", counter);
184		}
185		start = IAP_START + STAT_MODE_NPMCS - FIXED_MODE_NPMCS - newcnt;
186		for (i = 0; i < newcnt; i++) {
187			stat_mode_cntrs[start + i] = new_cntrs[i];
188			stat_mode_names[start + i] = new_cntrs[i];
189		}
190	}
191	if (system_mode)
192		pmc_args.pa_flags |= FLAG_HAS_SYSTEM_PMCS;
193	else
194		pmc_args.pa_flags |= FLAG_HAS_PROCESS_PMCS;
195	pmc_args.pa_flags |= FLAG_HAS_COUNTING_PMCS;
196	pmc_args.pa_flags |= FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET;
197	pmc_args.pa_flags |= FLAG_HAS_PIPE;
198	pmc_args.pa_required |= FLAG_HAS_COMMANDLINE | FLAG_HAS_TARGET | FLAG_HAS_OUTPUT_LOGFILE;
199	pmc_args.pa_outputpath = strdup("/dev/null");
200	pmc_args.pa_logfd = pmcstat_open_log(pmc_args.pa_outputpath,
201	    PMCSTAT_OPEN_FOR_WRITE);
202	for (i = 0; i < STAT_MODE_NPMCS; i++) {
203		if ((ev = malloc(sizeof(*ev))) == NULL)
204			errx(EX_SOFTWARE, "ERROR: Out of memory.");
205		if (system_mode)
206			ev->ev_mode = PMC_MODE_SC;
207		else
208			ev->ev_mode = PMC_MODE_TC;
209		ev->ev_spec = strdup(stat_mode_cntrs[i]);
210		if (ev->ev_spec == NULL)
211			errx(EX_SOFTWARE, "ERROR: Out of memory.");
212		c = strcspn(strdup(stat_mode_cntrs[i]), ", \t");
213		ev->ev_name = malloc(c + 1);
214		if (ev->ev_name == NULL)
215			errx(EX_SOFTWARE, "ERROR: Out of memory.");
216		(void)strncpy(ev->ev_name, stat_mode_cntrs[i], c);
217		*(ev->ev_name + c) = '\0';
218
219		ev->ev_count = -1;
220		ev->ev_flags = 0;
221		ev->ev_flags |= PMC_F_DESCENDANTS;
222		ev->ev_cumulative = 1;
223
224		ev->ev_saved = 0LL;
225		ev->ev_pmcid = PMC_ID_INVALID;
226		STAILQ_INSERT_TAIL(&pmc_args.pa_events, ev, ev_next);
227		if (system_mode) {
228			ev->ev_cpu = CPU_FFS(&cpumask) - 1;
229			CPU_CLR(ev->ev_cpu, &cpumask);
230			pmcstat_clone_event_descriptor(ev, &cpumask, &pmc_args);
231			CPU_SET(ev->ev_cpu, &cpumask);
232		} else
233			ev->ev_cpu = PMC_CPU_ANY;
234
235	}
236	if (clock_gettime(CLOCK_MONOTONIC, &before_ts))
237		err(1, "clock_gettime");
238}
239
240static void
241pmc_stat_print_stat(struct rusage *ru)
242{
243	struct pmcstat_ev *ev;
244	struct timespec after;
245	uint64_t cvals[STAT_MODE_NPMCS];
246	uint64_t ticks, value;
247	int hz, i;
248
249	if (ru) {
250		hz = getstathz();
251		ticks = hz * (ru->ru_utime.tv_sec + ru->ru_stime.tv_sec) +
252			hz * (ru->ru_utime.tv_usec + ru->ru_stime.tv_usec) / 1000000;
253		if (clock_gettime(CLOCK_MONOTONIC, &after))
254			err(1, "clock_gettime");
255		/*
256		 * If our round-off on the tick calculation still puts us at 0,
257		 * then always assume at least one tick.
258		 */
259		if (ticks == 0)
260			ticks = 1;
261		fprintf(pmc_args.pa_printfile, "%16ld  %s\t\t#\t%02.03f M/sec\n",
262			ru->ru_minflt, "page faults", ((double)ru->ru_minflt / (double)ticks) / hz);
263		fprintf(pmc_args.pa_printfile, "%16ld  %s\t\t#\t%02.03f M/sec\n",
264			ru->ru_nvcsw, "voluntary csw", ((double)ru->ru_nvcsw / (double)ticks) / hz);
265		fprintf(pmc_args.pa_printfile, "%16ld  %s\t#\t%02.03f M/sec\n",
266			ru->ru_nivcsw, "involuntary csw", ((double)ru->ru_nivcsw / (double)ticks) / hz);
267	}
268
269	bzero(&cvals, sizeof(cvals));
270	STAILQ_FOREACH(ev, &pmc_args.pa_events, ev_next) {
271		if (pmc_read(ev->ev_pmcid, &value) < 0)
272			err(EX_OSERR, "ERROR: Cannot read pmc \"%s\"",
273			    ev->ev_name);
274		for (i = 0; i < STAT_MODE_NPMCS; i++)
275			if (strcmp(ev->ev_name, stat_mode_cntrs[i]) == 0)
276				cvals[i] += value;
277	}
278
279	fprintf(pmc_args.pa_printfile, "%16jd  %s\n", (uintmax_t)cvals[CYCLES], stat_mode_names[CYCLES]);
280	fprintf(pmc_args.pa_printfile, "%16jd  %s\t\t#\t%01.03f inst/cycle\n", (uintmax_t)cvals[INST], stat_mode_names[INST],
281	    (double)cvals[INST] / cvals[CYCLES]);
282	fprintf(pmc_args.pa_printfile, "%16jd  %s\n", (uintmax_t)cvals[BR], stat_mode_names[BR]);
283	if (stat_mode_names[BR_MISS] == pmc_stat_mode_names[BR_MISS])
284		fprintf(pmc_args.pa_printfile, "%16jd  %s\t\t#\t%.03f%%\n",
285		    (uintmax_t)cvals[BR_MISS], stat_mode_names[BR_MISS],
286		    100 * ((double)cvals[BR_MISS] / cvals[BR]));
287	else
288		fprintf(pmc_args.pa_printfile, "%16jd  %s\n",
289		    (uintmax_t)cvals[BR_MISS], stat_mode_names[BR_MISS]);
290	fprintf(pmc_args.pa_printfile, "%16jd  %s%s", (uintmax_t)cvals[CACHE], stat_mode_names[CACHE],
291	    stat_mode_names[CACHE] != pmc_stat_mode_names[CACHE] ? "\n" : "");
292	if (stat_mode_names[CACHE] == pmc_stat_mode_names[CACHE])
293		fprintf(pmc_args.pa_printfile, "\t#\t%.03f refs/inst\n",
294		    ((double)cvals[CACHE] / cvals[INST]));
295	fprintf(pmc_args.pa_printfile, "%16jd  %s%s", (uintmax_t)cvals[CACHE_MISS], stat_mode_names[CACHE_MISS],
296	    stat_mode_names[CACHE_MISS] != pmc_stat_mode_names[CACHE_MISS] ? "\n" : "");
297	if (stat_mode_names[CACHE_MISS] == pmc_stat_mode_names[CACHE_MISS])
298		fprintf(pmc_args.pa_printfile, "\t\t#\t%.03f%%\n",
299		    100 * ((double)cvals[CACHE_MISS] / cvals[CACHE]));
300
301	if (ru)
302		showtime(pmc_args.pa_printfile, &before_ts, &after, ru);
303}
304
305static struct option longopts[] = {
306	{"events", required_argument, NULL, 'j'},
307	{NULL, 0, NULL, 0}
308};
309
310static int
311pmc_stat_internal(int argc, char **argv, int system_mode)
312{
313	char *event, *r;
314	struct sigaction sa;
315	struct kevent kev;
316	struct rusage ru;
317	struct winsize ws;
318	struct pmcstat_ev *ev;
319	int c, option, runstate;
320	int waitstatus, ru_valid, do_debug;
321
322	do_debug = ru_valid = 0;
323	r = event = NULL;
324	while ((option = getopt_long(argc, argv, "dj:", longopts, NULL)) != -1) {
325		switch (option) {
326		case 'j':
327			r = event = strdup(optarg);
328			break;
329		case 'd':
330			do_debug = 1;
331			break;
332		case '?':
333		default:
334			usage();
335		}
336	}
337	pmc_args.pa_argc = (argc -= optind);
338	pmc_args.pa_argv = (argv += optind);
339	if (argc == 0)
340		usage();
341	pmc_args.pa_flags |= FLAG_HAS_COMMANDLINE;
342	pmc_stat_setup_stat(system_mode, event);
343	free(r);
344	bzero(&ru, sizeof(ru));
345	EV_SET(&kev, SIGINT, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
346	if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0)
347		err(EX_OSERR, "ERROR: Cannot register kevent for SIGINT");
348
349	EV_SET(&kev, SIGIO, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
350	if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0)
351		err(EX_OSERR, "ERROR: Cannot register kevent for SIGIO");
352	EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0, 1000, NULL);
353	if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0)
354		err(EX_OSERR,
355			"ERROR: Cannot register kevent for timer");
356
357	STAILQ_FOREACH(ev, &pmc_args.pa_events, ev_next) {
358		if (pmc_allocate(ev->ev_spec, ev->ev_mode,
359		    ev->ev_flags, ev->ev_cpu, &ev->ev_pmcid, ev->ev_count) < 0)
360			err(EX_OSERR,
361			    "ERROR: Cannot allocate %s-mode pmc with specification \"%s\"",
362			    PMC_IS_SYSTEM_MODE(ev->ev_mode) ?
363			    "system" : "process", ev->ev_spec);
364
365		if (PMC_IS_SAMPLING_MODE(ev->ev_mode) &&
366		    pmc_set(ev->ev_pmcid, ev->ev_count) < 0)
367			err(EX_OSERR,
368			    "ERROR: Cannot set sampling count for PMC \"%s\"",
369			    ev->ev_name);
370	}
371
372	/*
373	 * An exec() failure of a forked child is signalled by the
374	 * child sending the parent a SIGCHLD.  We don't register an
375	 * actual signal handler for SIGCHLD, but instead use our
376	 * kqueue to pick up the signal.
377	 */
378	EV_SET(&kev, SIGCHLD, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
379	if (kevent(pmc_kq, &kev, 1, NULL, 0, NULL) < 0)
380		err(EX_OSERR, "ERROR: Cannot register kevent for SIGCHLD");
381
382	pmcstat_create_process(pmcstat_sockpair, &pmc_args, pmc_kq);
383
384	if (SLIST_EMPTY(&pmc_args.pa_targets))
385		errx(EX_DATAERR,
386		    "ERROR: No matching target processes.");
387	if (pmc_args.pa_flags & FLAG_HAS_PROCESS_PMCS)
388		pmcstat_attach_pmcs(&pmc_args);
389
390	/* start the pmcs */
391	pmc_util_start_pmcs(&pmc_args);
392
393	/* start the (commandline) process if needed */
394	pmcstat_start_process(pmcstat_sockpair);
395
396	/* Handle SIGINT using the kqueue loop */
397	sa.sa_handler = SIG_IGN;
398	sa.sa_flags = 0;
399	(void)sigemptyset(&sa.sa_mask);
400
401	if (sigaction(SIGINT, &sa, NULL) < 0)
402		err(EX_OSERR, "ERROR: Cannot install signal handler");
403
404	/*
405 * loop till either the target process (if any) exits, or we
406 * are killed by a SIGINT or we reached the time duration.
407 */
408	runstate = PMCSTAT_RUNNING;
409	do {
410		if ((c = kevent(pmc_kq, NULL, 0, &kev, 1, NULL)) <= 0) {
411			if (errno != EINTR)
412				err(EX_OSERR, "ERROR: kevent failed");
413			else
414				continue;
415		}
416		if (kev.flags & EV_ERROR)
417			errc(EX_OSERR, kev.data, "ERROR: kevent failed");
418
419		switch (kev.filter) {
420		case EVFILT_PROC:	/* target has exited */
421			if (wait4(pmc_util_get_pid(&pmc_args), &waitstatus, 0, &ru) > 0) {
422				getrusage(RUSAGE_CHILDREN, &ru);
423				ru_valid = 1;
424			}
425			break;
426
427		case EVFILT_READ:	/* log file data is present */
428			break;
429		case EVFILT_TIMER:
430			if (do_debug)
431				pmc_stat_print_stat(NULL);
432			break;
433		case EVFILT_SIGNAL:
434			if (kev.ident == SIGCHLD) {
435				/*
436				 * The child process sends us a
437				 * SIGCHLD if its exec() failed.  We
438				 * wait for it to exit and then exit
439				 * ourselves.
440				 */
441				(void)wait(&c);
442				runstate = PMCSTAT_FINISHED;
443			} else if (kev.ident == SIGIO) {
444				/*
445				 * We get a SIGIO if a PMC loses all
446				 * of its targets, or if logfile
447				 * writes encounter an error.
448				 */
449				if (wait4(pmc_util_get_pid(&pmc_args), &waitstatus, 0, &ru) > 0) {
450					getrusage(RUSAGE_CHILDREN, &ru);
451					ru_valid = 1;
452				}
453				runstate = pmcstat_close_log(&pmc_args);
454			} else if (kev.ident == SIGINT) {
455				/* Kill the child process if we started it */
456				if (pmc_args.pa_flags & FLAG_HAS_COMMANDLINE)
457					pmc_util_kill_process(&pmc_args);
458				runstate = pmcstat_close_log(&pmc_args);
459			} else if (kev.ident == SIGWINCH) {
460				if (ioctl(fileno(pmc_args.pa_printfile),
461				    TIOCGWINSZ, &ws) < 0)
462					err(EX_OSERR,
463					    "ERROR: Cannot determine window size");
464				pmc_displayheight = ws.ws_row - 1;
465				pmc_displaywidth = ws.ws_col - 1;
466			} else
467				assert(0);
468
469			break;
470		}
471	} while (runstate != PMCSTAT_FINISHED);
472	if (!ru_valid)
473		warnx("couldn't get rusage");
474	pmc_stat_print_stat(&ru);
475	pmc_util_cleanup(&pmc_args);
476	return (0);
477}
478
479int
480cmd_pmc_stat(int argc, char **argv)
481{
482	return (pmc_stat_internal(argc, argv, 0));
483}
484
485int
486cmd_pmc_stat_system(int argc, char **argv)
487{
488	return (pmc_stat_internal(argc, argv, 1));
489}
490