1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * builtin-ftrace.c
4 *
5 * Copyright (c) 2013  LG Electronics,  Namhyung Kim <namhyung@kernel.org>
6 * Copyright (c) 2020  Changbin Du <changbin.du@gmail.com>, significant enhancement.
7 */
8
9#include "builtin.h"
10
11#include <errno.h>
12#include <unistd.h>
13#include <signal.h>
14#include <stdlib.h>
15#include <fcntl.h>
16#include <math.h>
17#include <poll.h>
18#include <ctype.h>
19#include <linux/capability.h>
20#include <linux/string.h>
21
22#include "debug.h"
23#include <subcmd/pager.h>
24#include <subcmd/parse-options.h>
25#include <api/fs/tracing_path.h>
26#include "evlist.h"
27#include "target.h"
28#include "cpumap.h"
29#include "thread_map.h"
30#include "strfilter.h"
31#include "util/cap.h"
32#include "util/config.h"
33#include "util/ftrace.h"
34#include "util/units.h"
35#include "util/parse-sublevel-options.h"
36
37#define DEFAULT_TRACER  "function_graph"
38
39static volatile sig_atomic_t workload_exec_errno;
40static volatile sig_atomic_t done;
41
42static void sig_handler(int sig __maybe_unused)
43{
44	done = true;
45}
46
47/*
48 * evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
49 * we asked by setting its exec_error to the function below,
50 * ftrace__workload_exec_failed_signal.
51 *
52 * XXX We need to handle this more appropriately, emitting an error, etc.
53 */
54static void ftrace__workload_exec_failed_signal(int signo __maybe_unused,
55						siginfo_t *info __maybe_unused,
56						void *ucontext __maybe_unused)
57{
58	workload_exec_errno = info->si_value.sival_int;
59	done = true;
60}
61
62static int __write_tracing_file(const char *name, const char *val, bool append)
63{
64	char *file;
65	int fd, ret = -1;
66	ssize_t size = strlen(val);
67	int flags = O_WRONLY;
68	char errbuf[512];
69	char *val_copy;
70
71	file = get_tracing_file(name);
72	if (!file) {
73		pr_debug("cannot get tracing file: %s\n", name);
74		return -1;
75	}
76
77	if (append)
78		flags |= O_APPEND;
79	else
80		flags |= O_TRUNC;
81
82	fd = open(file, flags);
83	if (fd < 0) {
84		pr_debug("cannot open tracing file: %s: %s\n",
85			 name, str_error_r(errno, errbuf, sizeof(errbuf)));
86		goto out;
87	}
88
89	/*
90	 * Copy the original value and append a '\n'. Without this,
91	 * the kernel can hide possible errors.
92	 */
93	val_copy = strdup(val);
94	if (!val_copy)
95		goto out_close;
96	val_copy[size] = '\n';
97
98	if (write(fd, val_copy, size + 1) == size + 1)
99		ret = 0;
100	else
101		pr_debug("write '%s' to tracing/%s failed: %s\n",
102			 val, name, str_error_r(errno, errbuf, sizeof(errbuf)));
103
104	free(val_copy);
105out_close:
106	close(fd);
107out:
108	put_tracing_file(file);
109	return ret;
110}
111
112static int write_tracing_file(const char *name, const char *val)
113{
114	return __write_tracing_file(name, val, false);
115}
116
117static int append_tracing_file(const char *name, const char *val)
118{
119	return __write_tracing_file(name, val, true);
120}
121
122static int read_tracing_file_to_stdout(const char *name)
123{
124	char buf[4096];
125	char *file;
126	int fd;
127	int ret = -1;
128
129	file = get_tracing_file(name);
130	if (!file) {
131		pr_debug("cannot get tracing file: %s\n", name);
132		return -1;
133	}
134
135	fd = open(file, O_RDONLY);
136	if (fd < 0) {
137		pr_debug("cannot open tracing file: %s: %s\n",
138			 name, str_error_r(errno, buf, sizeof(buf)));
139		goto out;
140	}
141
142	/* read contents to stdout */
143	while (true) {
144		int n = read(fd, buf, sizeof(buf));
145		if (n == 0)
146			break;
147		else if (n < 0)
148			goto out_close;
149
150		if (fwrite(buf, n, 1, stdout) != 1)
151			goto out_close;
152	}
153	ret = 0;
154
155out_close:
156	close(fd);
157out:
158	put_tracing_file(file);
159	return ret;
160}
161
162static int read_tracing_file_by_line(const char *name,
163				     void (*cb)(char *str, void *arg),
164				     void *cb_arg)
165{
166	char *line = NULL;
167	size_t len = 0;
168	char *file;
169	FILE *fp;
170
171	file = get_tracing_file(name);
172	if (!file) {
173		pr_debug("cannot get tracing file: %s\n", name);
174		return -1;
175	}
176
177	fp = fopen(file, "r");
178	if (fp == NULL) {
179		pr_debug("cannot open tracing file: %s\n", name);
180		put_tracing_file(file);
181		return -1;
182	}
183
184	while (getline(&line, &len, fp) != -1) {
185		cb(line, cb_arg);
186	}
187
188	if (line)
189		free(line);
190
191	fclose(fp);
192	put_tracing_file(file);
193	return 0;
194}
195
196static int write_tracing_file_int(const char *name, int value)
197{
198	char buf[16];
199
200	snprintf(buf, sizeof(buf), "%d", value);
201	if (write_tracing_file(name, buf) < 0)
202		return -1;
203
204	return 0;
205}
206
207static int write_tracing_option_file(const char *name, const char *val)
208{
209	char *file;
210	int ret;
211
212	if (asprintf(&file, "options/%s", name) < 0)
213		return -1;
214
215	ret = __write_tracing_file(file, val, false);
216	free(file);
217	return ret;
218}
219
220static int reset_tracing_cpu(void);
221static void reset_tracing_filters(void);
222
223static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused)
224{
225	write_tracing_option_file("function-fork", "0");
226	write_tracing_option_file("func_stack_trace", "0");
227	write_tracing_option_file("sleep-time", "1");
228	write_tracing_option_file("funcgraph-irqs", "1");
229	write_tracing_option_file("funcgraph-proc", "0");
230	write_tracing_option_file("funcgraph-abstime", "0");
231	write_tracing_option_file("latency-format", "0");
232	write_tracing_option_file("irq-info", "0");
233}
234
235static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
236{
237	if (write_tracing_file("tracing_on", "0") < 0)
238		return -1;
239
240	if (write_tracing_file("current_tracer", "nop") < 0)
241		return -1;
242
243	if (write_tracing_file("set_ftrace_pid", " ") < 0)
244		return -1;
245
246	if (reset_tracing_cpu() < 0)
247		return -1;
248
249	if (write_tracing_file("max_graph_depth", "0") < 0)
250		return -1;
251
252	if (write_tracing_file("tracing_thresh", "0") < 0)
253		return -1;
254
255	reset_tracing_filters();
256	reset_tracing_options(ftrace);
257	return 0;
258}
259
260static int set_tracing_pid(struct perf_ftrace *ftrace)
261{
262	int i;
263	char buf[16];
264
265	if (target__has_cpu(&ftrace->target))
266		return 0;
267
268	for (i = 0; i < perf_thread_map__nr(ftrace->evlist->core.threads); i++) {
269		scnprintf(buf, sizeof(buf), "%d",
270			  perf_thread_map__pid(ftrace->evlist->core.threads, i));
271		if (append_tracing_file("set_ftrace_pid", buf) < 0)
272			return -1;
273	}
274	return 0;
275}
276
277static int set_tracing_cpumask(struct perf_cpu_map *cpumap)
278{
279	char *cpumask;
280	size_t mask_size;
281	int ret;
282	int last_cpu;
283
284	last_cpu = perf_cpu_map__cpu(cpumap, perf_cpu_map__nr(cpumap) - 1).cpu;
285	mask_size = last_cpu / 4 + 2; /* one more byte for EOS */
286	mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */
287
288	cpumask = malloc(mask_size);
289	if (cpumask == NULL) {
290		pr_debug("failed to allocate cpu mask\n");
291		return -1;
292	}
293
294	cpu_map__snprint_mask(cpumap, cpumask, mask_size);
295
296	ret = write_tracing_file("tracing_cpumask", cpumask);
297
298	free(cpumask);
299	return ret;
300}
301
302static int set_tracing_cpu(struct perf_ftrace *ftrace)
303{
304	struct perf_cpu_map *cpumap = ftrace->evlist->core.user_requested_cpus;
305
306	if (!target__has_cpu(&ftrace->target))
307		return 0;
308
309	return set_tracing_cpumask(cpumap);
310}
311
312static int set_tracing_func_stack_trace(struct perf_ftrace *ftrace)
313{
314	if (!ftrace->func_stack_trace)
315		return 0;
316
317	if (write_tracing_option_file("func_stack_trace", "1") < 0)
318		return -1;
319
320	return 0;
321}
322
323static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace)
324{
325	if (!ftrace->func_irq_info)
326		return 0;
327
328	if (write_tracing_option_file("irq-info", "1") < 0)
329		return -1;
330
331	return 0;
332}
333
334static int reset_tracing_cpu(void)
335{
336	struct perf_cpu_map *cpumap = perf_cpu_map__new_online_cpus();
337	int ret;
338
339	ret = set_tracing_cpumask(cpumap);
340	perf_cpu_map__put(cpumap);
341	return ret;
342}
343
344static int __set_tracing_filter(const char *filter_file, struct list_head *funcs)
345{
346	struct filter_entry *pos;
347
348	list_for_each_entry(pos, funcs, list) {
349		if (append_tracing_file(filter_file, pos->name) < 0)
350			return -1;
351	}
352
353	return 0;
354}
355
356static int set_tracing_filters(struct perf_ftrace *ftrace)
357{
358	int ret;
359
360	ret = __set_tracing_filter("set_ftrace_filter", &ftrace->filters);
361	if (ret < 0)
362		return ret;
363
364	ret = __set_tracing_filter("set_ftrace_notrace", &ftrace->notrace);
365	if (ret < 0)
366		return ret;
367
368	ret = __set_tracing_filter("set_graph_function", &ftrace->graph_funcs);
369	if (ret < 0)
370		return ret;
371
372	/* old kernels do not have this filter */
373	__set_tracing_filter("set_graph_notrace", &ftrace->nograph_funcs);
374
375	return ret;
376}
377
378static void reset_tracing_filters(void)
379{
380	write_tracing_file("set_ftrace_filter", " ");
381	write_tracing_file("set_ftrace_notrace", " ");
382	write_tracing_file("set_graph_function", " ");
383	write_tracing_file("set_graph_notrace", " ");
384}
385
386static int set_tracing_depth(struct perf_ftrace *ftrace)
387{
388	if (ftrace->graph_depth == 0)
389		return 0;
390
391	if (ftrace->graph_depth < 0) {
392		pr_err("invalid graph depth: %d\n", ftrace->graph_depth);
393		return -1;
394	}
395
396	if (write_tracing_file_int("max_graph_depth", ftrace->graph_depth) < 0)
397		return -1;
398
399	return 0;
400}
401
402static int set_tracing_percpu_buffer_size(struct perf_ftrace *ftrace)
403{
404	int ret;
405
406	if (ftrace->percpu_buffer_size == 0)
407		return 0;
408
409	ret = write_tracing_file_int("buffer_size_kb",
410				     ftrace->percpu_buffer_size / 1024);
411	if (ret < 0)
412		return ret;
413
414	return 0;
415}
416
417static int set_tracing_trace_inherit(struct perf_ftrace *ftrace)
418{
419	if (!ftrace->inherit)
420		return 0;
421
422	if (write_tracing_option_file("function-fork", "1") < 0)
423		return -1;
424
425	return 0;
426}
427
428static int set_tracing_sleep_time(struct perf_ftrace *ftrace)
429{
430	if (!ftrace->graph_nosleep_time)
431		return 0;
432
433	if (write_tracing_option_file("sleep-time", "0") < 0)
434		return -1;
435
436	return 0;
437}
438
439static int set_tracing_funcgraph_irqs(struct perf_ftrace *ftrace)
440{
441	if (!ftrace->graph_noirqs)
442		return 0;
443
444	if (write_tracing_option_file("funcgraph-irqs", "0") < 0)
445		return -1;
446
447	return 0;
448}
449
450static int set_tracing_funcgraph_verbose(struct perf_ftrace *ftrace)
451{
452	if (!ftrace->graph_verbose)
453		return 0;
454
455	if (write_tracing_option_file("funcgraph-proc", "1") < 0)
456		return -1;
457
458	if (write_tracing_option_file("funcgraph-abstime", "1") < 0)
459		return -1;
460
461	if (write_tracing_option_file("latency-format", "1") < 0)
462		return -1;
463
464	return 0;
465}
466
467static int set_tracing_thresh(struct perf_ftrace *ftrace)
468{
469	int ret;
470
471	if (ftrace->graph_thresh == 0)
472		return 0;
473
474	ret = write_tracing_file_int("tracing_thresh", ftrace->graph_thresh);
475	if (ret < 0)
476		return ret;
477
478	return 0;
479}
480
481static int set_tracing_options(struct perf_ftrace *ftrace)
482{
483	if (set_tracing_pid(ftrace) < 0) {
484		pr_err("failed to set ftrace pid\n");
485		return -1;
486	}
487
488	if (set_tracing_cpu(ftrace) < 0) {
489		pr_err("failed to set tracing cpumask\n");
490		return -1;
491	}
492
493	if (set_tracing_func_stack_trace(ftrace) < 0) {
494		pr_err("failed to set tracing option func_stack_trace\n");
495		return -1;
496	}
497
498	if (set_tracing_func_irqinfo(ftrace) < 0) {
499		pr_err("failed to set tracing option irq-info\n");
500		return -1;
501	}
502
503	if (set_tracing_filters(ftrace) < 0) {
504		pr_err("failed to set tracing filters\n");
505		return -1;
506	}
507
508	if (set_tracing_depth(ftrace) < 0) {
509		pr_err("failed to set graph depth\n");
510		return -1;
511	}
512
513	if (set_tracing_percpu_buffer_size(ftrace) < 0) {
514		pr_err("failed to set tracing per-cpu buffer size\n");
515		return -1;
516	}
517
518	if (set_tracing_trace_inherit(ftrace) < 0) {
519		pr_err("failed to set tracing option function-fork\n");
520		return -1;
521	}
522
523	if (set_tracing_sleep_time(ftrace) < 0) {
524		pr_err("failed to set tracing option sleep-time\n");
525		return -1;
526	}
527
528	if (set_tracing_funcgraph_irqs(ftrace) < 0) {
529		pr_err("failed to set tracing option funcgraph-irqs\n");
530		return -1;
531	}
532
533	if (set_tracing_funcgraph_verbose(ftrace) < 0) {
534		pr_err("failed to set tracing option funcgraph-proc/funcgraph-abstime\n");
535		return -1;
536	}
537
538	if (set_tracing_thresh(ftrace) < 0) {
539		pr_err("failed to set tracing thresh\n");
540		return -1;
541	}
542
543	return 0;
544}
545
546static void select_tracer(struct perf_ftrace *ftrace)
547{
548	bool graph = !list_empty(&ftrace->graph_funcs) ||
549		     !list_empty(&ftrace->nograph_funcs);
550	bool func = !list_empty(&ftrace->filters) ||
551		    !list_empty(&ftrace->notrace);
552
553	/* The function_graph has priority over function tracer. */
554	if (graph)
555		ftrace->tracer = "function_graph";
556	else if (func)
557		ftrace->tracer = "function";
558	/* Otherwise, the default tracer is used. */
559
560	pr_debug("%s tracer is used\n", ftrace->tracer);
561}
562
563static int __cmd_ftrace(struct perf_ftrace *ftrace)
564{
565	char *trace_file;
566	int trace_fd;
567	char buf[4096];
568	struct pollfd pollfd = {
569		.events = POLLIN,
570	};
571
572	if (!(perf_cap__capable(CAP_PERFMON) ||
573	      perf_cap__capable(CAP_SYS_ADMIN))) {
574		pr_err("ftrace only works for %s!\n",
575#ifdef HAVE_LIBCAP_SUPPORT
576		"users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
577#else
578		"root"
579#endif
580		);
581		return -1;
582	}
583
584	select_tracer(ftrace);
585
586	if (reset_tracing_files(ftrace) < 0) {
587		pr_err("failed to reset ftrace\n");
588		goto out;
589	}
590
591	/* reset ftrace buffer */
592	if (write_tracing_file("trace", "0") < 0)
593		goto out;
594
595	if (set_tracing_options(ftrace) < 0)
596		goto out_reset;
597
598	if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
599		pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
600		goto out_reset;
601	}
602
603	setup_pager();
604
605	trace_file = get_tracing_file("trace_pipe");
606	if (!trace_file) {
607		pr_err("failed to open trace_pipe\n");
608		goto out_reset;
609	}
610
611	trace_fd = open(trace_file, O_RDONLY);
612
613	put_tracing_file(trace_file);
614
615	if (trace_fd < 0) {
616		pr_err("failed to open trace_pipe\n");
617		goto out_reset;
618	}
619
620	fcntl(trace_fd, F_SETFL, O_NONBLOCK);
621	pollfd.fd = trace_fd;
622
623	/* display column headers */
624	read_tracing_file_to_stdout("trace");
625
626	if (!ftrace->target.initial_delay) {
627		if (write_tracing_file("tracing_on", "1") < 0) {
628			pr_err("can't enable tracing\n");
629			goto out_close_fd;
630		}
631	}
632
633	evlist__start_workload(ftrace->evlist);
634
635	if (ftrace->target.initial_delay > 0) {
636		usleep(ftrace->target.initial_delay * 1000);
637		if (write_tracing_file("tracing_on", "1") < 0) {
638			pr_err("can't enable tracing\n");
639			goto out_close_fd;
640		}
641	}
642
643	while (!done) {
644		if (poll(&pollfd, 1, -1) < 0)
645			break;
646
647		if (pollfd.revents & POLLIN) {
648			int n = read(trace_fd, buf, sizeof(buf));
649			if (n < 0)
650				break;
651			if (fwrite(buf, n, 1, stdout) != 1)
652				break;
653			/* flush output since stdout is in full buffering mode due to pager */
654			fflush(stdout);
655		}
656	}
657
658	write_tracing_file("tracing_on", "0");
659
660	if (workload_exec_errno) {
661		const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
662		/* flush stdout first so below error msg appears at the end. */
663		fflush(stdout);
664		pr_err("workload failed: %s\n", emsg);
665		goto out_close_fd;
666	}
667
668	/* read remaining buffer contents */
669	while (true) {
670		int n = read(trace_fd, buf, sizeof(buf));
671		if (n <= 0)
672			break;
673		if (fwrite(buf, n, 1, stdout) != 1)
674			break;
675	}
676
677out_close_fd:
678	close(trace_fd);
679out_reset:
680	reset_tracing_files(ftrace);
681out:
682	return (done && !workload_exec_errno) ? 0 : -1;
683}
684
685static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
686			   bool use_nsec)
687{
688	char *p, *q;
689	char *unit;
690	double num;
691	int i;
692
693	/* ensure NUL termination */
694	buf[len] = '\0';
695
696	/* handle data line by line */
697	for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) {
698		*q = '\0';
699		/* move it to the line buffer */
700		strcat(linebuf, p);
701
702		/*
703		 * parse trace output to get function duration like in
704		 *
705		 * # tracer: function_graph
706		 * #
707		 * # CPU  DURATION                  FUNCTION CALLS
708		 * # |     |   |                     |   |   |   |
709		 *  1) + 10.291 us   |  do_filp_open();
710		 *  1)   4.889 us    |  do_filp_open();
711		 *  1)   6.086 us    |  do_filp_open();
712		 *
713		 */
714		if (linebuf[0] == '#')
715			goto next;
716
717		/* ignore CPU */
718		p = strchr(linebuf, ')');
719		if (p == NULL)
720			p = linebuf;
721
722		while (*p && !isdigit(*p) && (*p != '|'))
723			p++;
724
725		/* no duration */
726		if (*p == '\0' || *p == '|')
727			goto next;
728
729		num = strtod(p, &unit);
730		if (!unit || strncmp(unit, " us", 3))
731			goto next;
732
733		if (use_nsec)
734			num *= 1000;
735
736		i = log2(num);
737		if (i < 0)
738			i = 0;
739		if (i >= NUM_BUCKET)
740			i = NUM_BUCKET - 1;
741
742		buckets[i]++;
743
744next:
745		/* empty the line buffer for the next output  */
746		linebuf[0] = '\0';
747	}
748
749	/* preserve any remaining output (before newline) */
750	strcat(linebuf, p);
751}
752
753static void display_histogram(int buckets[], bool use_nsec)
754{
755	int i;
756	int total = 0;
757	int bar_total = 46;  /* to fit in 80 column */
758	char bar[] = "###############################################";
759	int bar_len;
760
761	for (i = 0; i < NUM_BUCKET; i++)
762		total += buckets[i];
763
764	if (total == 0) {
765		printf("No data found\n");
766		return;
767	}
768
769	printf("# %14s | %10s | %-*s |\n",
770	       "  DURATION    ", "COUNT", bar_total, "GRAPH");
771
772	bar_len = buckets[0] * bar_total / total;
773	printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
774	       0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
775
776	for (i = 1; i < NUM_BUCKET - 1; i++) {
777		int start = (1 << (i - 1));
778		int stop = 1 << i;
779		const char *unit = use_nsec ? "ns" : "us";
780
781		if (start >= 1024) {
782			start >>= 10;
783			stop >>= 10;
784			unit = use_nsec ? "us" : "ms";
785		}
786		bar_len = buckets[i] * bar_total / total;
787		printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
788		       start, stop, unit, buckets[i], bar_len, bar,
789		       bar_total - bar_len, "");
790	}
791
792	bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
793	printf("  %4d - %-4s %s | %10d | %.*s%*s |\n",
794	       1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
795	       bar_len, bar, bar_total - bar_len, "");
796
797}
798
799static int prepare_func_latency(struct perf_ftrace *ftrace)
800{
801	char *trace_file;
802	int fd;
803
804	if (ftrace->target.use_bpf)
805		return perf_ftrace__latency_prepare_bpf(ftrace);
806
807	if (reset_tracing_files(ftrace) < 0) {
808		pr_err("failed to reset ftrace\n");
809		return -1;
810	}
811
812	/* reset ftrace buffer */
813	if (write_tracing_file("trace", "0") < 0)
814		return -1;
815
816	if (set_tracing_options(ftrace) < 0)
817		return -1;
818
819	/* force to use the function_graph tracer to track duration */
820	if (write_tracing_file("current_tracer", "function_graph") < 0) {
821		pr_err("failed to set current_tracer to function_graph\n");
822		return -1;
823	}
824
825	trace_file = get_tracing_file("trace_pipe");
826	if (!trace_file) {
827		pr_err("failed to open trace_pipe\n");
828		return -1;
829	}
830
831	fd = open(trace_file, O_RDONLY);
832	if (fd < 0)
833		pr_err("failed to open trace_pipe\n");
834
835	put_tracing_file(trace_file);
836	return fd;
837}
838
839static int start_func_latency(struct perf_ftrace *ftrace)
840{
841	if (ftrace->target.use_bpf)
842		return perf_ftrace__latency_start_bpf(ftrace);
843
844	if (write_tracing_file("tracing_on", "1") < 0) {
845		pr_err("can't enable tracing\n");
846		return -1;
847	}
848
849	return 0;
850}
851
852static int stop_func_latency(struct perf_ftrace *ftrace)
853{
854	if (ftrace->target.use_bpf)
855		return perf_ftrace__latency_stop_bpf(ftrace);
856
857	write_tracing_file("tracing_on", "0");
858	return 0;
859}
860
861static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
862{
863	if (ftrace->target.use_bpf)
864		return perf_ftrace__latency_read_bpf(ftrace, buckets);
865
866	return 0;
867}
868
869static int cleanup_func_latency(struct perf_ftrace *ftrace)
870{
871	if (ftrace->target.use_bpf)
872		return perf_ftrace__latency_cleanup_bpf(ftrace);
873
874	reset_tracing_files(ftrace);
875	return 0;
876}
877
878static int __cmd_latency(struct perf_ftrace *ftrace)
879{
880	int trace_fd;
881	char buf[4096];
882	char line[256];
883	struct pollfd pollfd = {
884		.events = POLLIN,
885	};
886	int buckets[NUM_BUCKET] = { };
887
888	if (!(perf_cap__capable(CAP_PERFMON) ||
889	      perf_cap__capable(CAP_SYS_ADMIN))) {
890		pr_err("ftrace only works for %s!\n",
891#ifdef HAVE_LIBCAP_SUPPORT
892		"users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
893#else
894		"root"
895#endif
896		);
897		return -1;
898	}
899
900	trace_fd = prepare_func_latency(ftrace);
901	if (trace_fd < 0)
902		goto out;
903
904	fcntl(trace_fd, F_SETFL, O_NONBLOCK);
905	pollfd.fd = trace_fd;
906
907	if (start_func_latency(ftrace) < 0)
908		goto out;
909
910	evlist__start_workload(ftrace->evlist);
911
912	line[0] = '\0';
913	while (!done) {
914		if (poll(&pollfd, 1, -1) < 0)
915			break;
916
917		if (pollfd.revents & POLLIN) {
918			int n = read(trace_fd, buf, sizeof(buf) - 1);
919			if (n < 0)
920				break;
921
922			make_histogram(buckets, buf, n, line, ftrace->use_nsec);
923		}
924	}
925
926	stop_func_latency(ftrace);
927
928	if (workload_exec_errno) {
929		const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
930		pr_err("workload failed: %s\n", emsg);
931		goto out;
932	}
933
934	/* read remaining buffer contents */
935	while (!ftrace->target.use_bpf) {
936		int n = read(trace_fd, buf, sizeof(buf) - 1);
937		if (n <= 0)
938			break;
939		make_histogram(buckets, buf, n, line, ftrace->use_nsec);
940	}
941
942	read_func_latency(ftrace, buckets);
943
944	display_histogram(buckets, ftrace->use_nsec);
945
946out:
947	close(trace_fd);
948	cleanup_func_latency(ftrace);
949
950	return (done && !workload_exec_errno) ? 0 : -1;
951}
952
953static int perf_ftrace_config(const char *var, const char *value, void *cb)
954{
955	struct perf_ftrace *ftrace = cb;
956
957	if (!strstarts(var, "ftrace."))
958		return 0;
959
960	if (strcmp(var, "ftrace.tracer"))
961		return -1;
962
963	if (!strcmp(value, "function_graph") ||
964	    !strcmp(value, "function")) {
965		ftrace->tracer = value;
966		return 0;
967	}
968
969	pr_err("Please select \"function_graph\" (default) or \"function\"\n");
970	return -1;
971}
972
973static void list_function_cb(char *str, void *arg)
974{
975	struct strfilter *filter = (struct strfilter *)arg;
976
977	if (strfilter__compare(filter, str))
978		printf("%s", str);
979}
980
981static int opt_list_avail_functions(const struct option *opt __maybe_unused,
982				    const char *str, int unset)
983{
984	struct strfilter *filter;
985	const char *err = NULL;
986	int ret;
987
988	if (unset || !str)
989		return -1;
990
991	filter = strfilter__new(str, &err);
992	if (!filter)
993		return err ? -EINVAL : -ENOMEM;
994
995	ret = strfilter__or(filter, str, &err);
996	if (ret == -EINVAL) {
997		pr_err("Filter parse error at %td.\n", err - str + 1);
998		pr_err("Source: \"%s\"\n", str);
999		pr_err("         %*c\n", (int)(err - str + 1), '^');
1000		strfilter__delete(filter);
1001		return ret;
1002	}
1003
1004	ret = read_tracing_file_by_line("available_filter_functions",
1005					list_function_cb, filter);
1006	strfilter__delete(filter);
1007	if (ret < 0)
1008		return ret;
1009
1010	exit(0);
1011}
1012
1013static int parse_filter_func(const struct option *opt, const char *str,
1014			     int unset __maybe_unused)
1015{
1016	struct list_head *head = opt->value;
1017	struct filter_entry *entry;
1018
1019	entry = malloc(sizeof(*entry) + strlen(str) + 1);
1020	if (entry == NULL)
1021		return -ENOMEM;
1022
1023	strcpy(entry->name, str);
1024	list_add_tail(&entry->list, head);
1025
1026	return 0;
1027}
1028
1029static void delete_filter_func(struct list_head *head)
1030{
1031	struct filter_entry *pos, *tmp;
1032
1033	list_for_each_entry_safe(pos, tmp, head, list) {
1034		list_del_init(&pos->list);
1035		free(pos);
1036	}
1037}
1038
1039static int parse_buffer_size(const struct option *opt,
1040			     const char *str, int unset)
1041{
1042	unsigned long *s = (unsigned long *)opt->value;
1043	static struct parse_tag tags_size[] = {
1044		{ .tag  = 'B', .mult = 1       },
1045		{ .tag  = 'K', .mult = 1 << 10 },
1046		{ .tag  = 'M', .mult = 1 << 20 },
1047		{ .tag  = 'G', .mult = 1 << 30 },
1048		{ .tag  = 0 },
1049	};
1050	unsigned long val;
1051
1052	if (unset) {
1053		*s = 0;
1054		return 0;
1055	}
1056
1057	val = parse_tag_value(str, tags_size);
1058	if (val != (unsigned long) -1) {
1059		if (val < 1024) {
1060			pr_err("buffer size too small, must larger than 1KB.");
1061			return -1;
1062		}
1063		*s = val;
1064		return 0;
1065	}
1066
1067	return -1;
1068}
1069
1070static int parse_func_tracer_opts(const struct option *opt,
1071				  const char *str, int unset)
1072{
1073	int ret;
1074	struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value;
1075	struct sublevel_option func_tracer_opts[] = {
1076		{ .name = "call-graph",	.value_ptr = &ftrace->func_stack_trace },
1077		{ .name = "irq-info",	.value_ptr = &ftrace->func_irq_info },
1078		{ .name = NULL, }
1079	};
1080
1081	if (unset)
1082		return 0;
1083
1084	ret = perf_parse_sublevel_options(str, func_tracer_opts);
1085	if (ret)
1086		return ret;
1087
1088	return 0;
1089}
1090
1091static int parse_graph_tracer_opts(const struct option *opt,
1092				  const char *str, int unset)
1093{
1094	int ret;
1095	struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value;
1096	struct sublevel_option graph_tracer_opts[] = {
1097		{ .name = "nosleep-time",	.value_ptr = &ftrace->graph_nosleep_time },
1098		{ .name = "noirqs",		.value_ptr = &ftrace->graph_noirqs },
1099		{ .name = "verbose",		.value_ptr = &ftrace->graph_verbose },
1100		{ .name = "thresh",		.value_ptr = &ftrace->graph_thresh },
1101		{ .name = "depth",		.value_ptr = &ftrace->graph_depth },
1102		{ .name = NULL, }
1103	};
1104
1105	if (unset)
1106		return 0;
1107
1108	ret = perf_parse_sublevel_options(str, graph_tracer_opts);
1109	if (ret)
1110		return ret;
1111
1112	return 0;
1113}
1114
1115enum perf_ftrace_subcommand {
1116	PERF_FTRACE_NONE,
1117	PERF_FTRACE_TRACE,
1118	PERF_FTRACE_LATENCY,
1119};
1120
1121int cmd_ftrace(int argc, const char **argv)
1122{
1123	int ret;
1124	int (*cmd_func)(struct perf_ftrace *) = NULL;
1125	struct perf_ftrace ftrace = {
1126		.tracer = DEFAULT_TRACER,
1127		.target = { .uid = UINT_MAX, },
1128	};
1129	const struct option common_options[] = {
1130	OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
1131		   "Trace on existing process id"),
1132	/* TODO: Add short option -t after -t/--tracer can be removed. */
1133	OPT_STRING(0, "tid", &ftrace.target.tid, "tid",
1134		   "Trace on existing thread id (exclusive to --pid)"),
1135	OPT_INCR('v', "verbose", &verbose,
1136		 "Be more verbose"),
1137	OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide,
1138		    "System-wide collection from all CPUs"),
1139	OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
1140		    "List of cpus to monitor"),
1141	OPT_END()
1142	};
1143	const struct option ftrace_options[] = {
1144	OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
1145		   "Tracer to use: function_graph(default) or function"),
1146	OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
1147			     "Show available functions to filter",
1148			     opt_list_avail_functions, "*"),
1149	OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
1150		     "Trace given functions using function tracer",
1151		     parse_filter_func),
1152	OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
1153		     "Do not trace given functions", parse_filter_func),
1154	OPT_CALLBACK(0, "func-opts", &ftrace, "options",
1155		     "Function tracer options, available options: call-graph,irq-info",
1156		     parse_func_tracer_opts),
1157	OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
1158		     "Trace given functions using function_graph tracer",
1159		     parse_filter_func),
1160	OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
1161		     "Set nograph filter on given functions", parse_filter_func),
1162	OPT_CALLBACK(0, "graph-opts", &ftrace, "options",
1163		     "Graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=<n>,depth=<n>",
1164		     parse_graph_tracer_opts),
1165	OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size",
1166		     "Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size),
1167	OPT_BOOLEAN(0, "inherit", &ftrace.inherit,
1168		    "Trace children processes"),
1169	OPT_INTEGER('D', "delay", &ftrace.target.initial_delay,
1170		    "Number of milliseconds to wait before starting tracing after program start"),
1171	OPT_PARENT(common_options),
1172	};
1173	const struct option latency_options[] = {
1174	OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
1175		     "Show latency of given function", parse_filter_func),
1176#ifdef HAVE_BPF_SKEL
1177	OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
1178		    "Use BPF to measure function latency"),
1179#endif
1180	OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec,
1181		    "Use nano-second histogram"),
1182	OPT_PARENT(common_options),
1183	};
1184	const struct option *options = ftrace_options;
1185
1186	const char * const ftrace_usage[] = {
1187		"perf ftrace [<options>] [<command>]",
1188		"perf ftrace [<options>] -- [<command>] [<options>]",
1189		"perf ftrace {trace|latency} [<options>] [<command>]",
1190		"perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
1191		NULL
1192	};
1193	enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
1194
1195	INIT_LIST_HEAD(&ftrace.filters);
1196	INIT_LIST_HEAD(&ftrace.notrace);
1197	INIT_LIST_HEAD(&ftrace.graph_funcs);
1198	INIT_LIST_HEAD(&ftrace.nograph_funcs);
1199
1200	signal(SIGINT, sig_handler);
1201	signal(SIGUSR1, sig_handler);
1202	signal(SIGCHLD, sig_handler);
1203	signal(SIGPIPE, sig_handler);
1204
1205	ret = perf_config(perf_ftrace_config, &ftrace);
1206	if (ret < 0)
1207		return -1;
1208
1209	if (argc > 1) {
1210		if (!strcmp(argv[1], "trace")) {
1211			subcmd = PERF_FTRACE_TRACE;
1212		} else if (!strcmp(argv[1], "latency")) {
1213			subcmd = PERF_FTRACE_LATENCY;
1214			options = latency_options;
1215		}
1216
1217		if (subcmd != PERF_FTRACE_NONE) {
1218			argc--;
1219			argv++;
1220		}
1221	}
1222	/* for backward compatibility */
1223	if (subcmd == PERF_FTRACE_NONE)
1224		subcmd = PERF_FTRACE_TRACE;
1225
1226	argc = parse_options(argc, argv, options, ftrace_usage,
1227			    PARSE_OPT_STOP_AT_NON_OPTION);
1228	if (argc < 0) {
1229		ret = -EINVAL;
1230		goto out_delete_filters;
1231	}
1232
1233	/* Make system wide (-a) the default target. */
1234	if (!argc && target__none(&ftrace.target))
1235		ftrace.target.system_wide = true;
1236
1237	switch (subcmd) {
1238	case PERF_FTRACE_TRACE:
1239		cmd_func = __cmd_ftrace;
1240		break;
1241	case PERF_FTRACE_LATENCY:
1242		if (list_empty(&ftrace.filters)) {
1243			pr_err("Should provide a function to measure\n");
1244			parse_options_usage(ftrace_usage, options, "T", 1);
1245			ret = -EINVAL;
1246			goto out_delete_filters;
1247		}
1248		cmd_func = __cmd_latency;
1249		break;
1250	case PERF_FTRACE_NONE:
1251	default:
1252		pr_err("Invalid subcommand\n");
1253		ret = -EINVAL;
1254		goto out_delete_filters;
1255	}
1256
1257	ret = target__validate(&ftrace.target);
1258	if (ret) {
1259		char errbuf[512];
1260
1261		target__strerror(&ftrace.target, ret, errbuf, 512);
1262		pr_err("%s\n", errbuf);
1263		goto out_delete_filters;
1264	}
1265
1266	ftrace.evlist = evlist__new();
1267	if (ftrace.evlist == NULL) {
1268		ret = -ENOMEM;
1269		goto out_delete_filters;
1270	}
1271
1272	ret = evlist__create_maps(ftrace.evlist, &ftrace.target);
1273	if (ret < 0)
1274		goto out_delete_evlist;
1275
1276	if (argc) {
1277		ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target,
1278					       argv, false,
1279					       ftrace__workload_exec_failed_signal);
1280		if (ret < 0)
1281			goto out_delete_evlist;
1282	}
1283
1284	ret = cmd_func(&ftrace);
1285
1286out_delete_evlist:
1287	evlist__delete(ftrace.evlist);
1288
1289out_delete_filters:
1290	delete_filter_func(&ftrace.filters);
1291	delete_filter_func(&ftrace.notrace);
1292	delete_filter_func(&ftrace.graph_funcs);
1293	delete_filter_func(&ftrace.nograph_funcs);
1294
1295	return ret;
1296}
1297