1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6#define _GNU_SOURCE
7#include <getopt.h>
8#include <stdlib.h>
9#include <string.h>
10#include <signal.h>
11#include <unistd.h>
12#include <stdio.h>
13#include <time.h>
14#include <sched.h>
15#include <pthread.h>
16
17#include "utils.h"
18#include "osnoise.h"
19#include "timerlat.h"
20#include "timerlat_aa.h"
21#include "timerlat_u.h"
22
23struct timerlat_hist_params {
24	char			*cpus;
25	cpu_set_t		monitored_cpus;
26	char			*trace_output;
27	char			*cgroup_name;
28	unsigned long long	runtime;
29	long long		stop_us;
30	long long		stop_total_us;
31	long long		timerlat_period_us;
32	long long		print_stack;
33	int			sleep_time;
34	int			output_divisor;
35	int			duration;
36	int			set_sched;
37	int			dma_latency;
38	int			cgroup;
39	int			hk_cpus;
40	int			no_aa;
41	int			dump_tasks;
42	int			user_workload;
43	int			user_hist;
44	cpu_set_t		hk_cpu_set;
45	struct sched_attr	sched_param;
46	struct trace_events	*events;
47	char			no_irq;
48	char			no_thread;
49	char			no_header;
50	char			no_summary;
51	char			no_index;
52	char			with_zeros;
53	int			bucket_size;
54	int			entries;
55};
56
57struct timerlat_hist_cpu {
58	int			*irq;
59	int			*thread;
60	int			*user;
61
62	int			irq_count;
63	int			thread_count;
64	int			user_count;
65
66	unsigned long long	min_irq;
67	unsigned long long	sum_irq;
68	unsigned long long	max_irq;
69
70	unsigned long long	min_thread;
71	unsigned long long	sum_thread;
72	unsigned long long	max_thread;
73
74	unsigned long long	min_user;
75	unsigned long long	sum_user;
76	unsigned long long	max_user;
77};
78
79struct timerlat_hist_data {
80	struct timerlat_hist_cpu	*hist;
81	int				entries;
82	int				bucket_size;
83	int				nr_cpus;
84};
85
86/*
87 * timerlat_free_histogram - free runtime data
88 */
89static void
90timerlat_free_histogram(struct timerlat_hist_data *data)
91{
92	int cpu;
93
94	/* one histogram for IRQ and one for thread, per CPU */
95	for (cpu = 0; cpu < data->nr_cpus; cpu++) {
96		if (data->hist[cpu].irq)
97			free(data->hist[cpu].irq);
98
99		if (data->hist[cpu].thread)
100			free(data->hist[cpu].thread);
101
102		if (data->hist[cpu].user)
103			free(data->hist[cpu].user);
104
105	}
106
107	/* one set of histograms per CPU */
108	if (data->hist)
109		free(data->hist);
110
111	free(data);
112}
113
114/*
115 * timerlat_alloc_histogram - alloc runtime data
116 */
117static struct timerlat_hist_data
118*timerlat_alloc_histogram(int nr_cpus, int entries, int bucket_size)
119{
120	struct timerlat_hist_data *data;
121	int cpu;
122
123	data = calloc(1, sizeof(*data));
124	if (!data)
125		return NULL;
126
127	data->entries = entries;
128	data->bucket_size = bucket_size;
129	data->nr_cpus = nr_cpus;
130
131	/* one set of histograms per CPU */
132	data->hist = calloc(1, sizeof(*data->hist) * nr_cpus);
133	if (!data->hist)
134		goto cleanup;
135
136	/* one histogram for IRQ and one for thread, per cpu */
137	for (cpu = 0; cpu < nr_cpus; cpu++) {
138		data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1));
139		if (!data->hist[cpu].irq)
140			goto cleanup;
141
142		data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1));
143		if (!data->hist[cpu].thread)
144			goto cleanup;
145
146		data->hist[cpu].user = calloc(1, sizeof(*data->hist->user) * (entries + 1));
147		if (!data->hist[cpu].user)
148			goto cleanup;
149	}
150
151	/* set the min to max */
152	for (cpu = 0; cpu < nr_cpus; cpu++) {
153		data->hist[cpu].min_irq = ~0;
154		data->hist[cpu].min_thread = ~0;
155		data->hist[cpu].min_user = ~0;
156	}
157
158	return data;
159
160cleanup:
161	timerlat_free_histogram(data);
162	return NULL;
163}
164
165/*
166 * timerlat_hist_update - record a new timerlat occurent on cpu, updating data
167 */
168static void
169timerlat_hist_update(struct osnoise_tool *tool, int cpu,
170		     unsigned long long context,
171		     unsigned long long latency)
172{
173	struct timerlat_hist_params *params = tool->params;
174	struct timerlat_hist_data *data = tool->data;
175	int entries = data->entries;
176	int bucket;
177	int *hist;
178
179	if (params->output_divisor)
180		latency = latency / params->output_divisor;
181
182	bucket = latency / data->bucket_size;
183
184	if (!context) {
185		hist = data->hist[cpu].irq;
186		data->hist[cpu].irq_count++;
187		update_min(&data->hist[cpu].min_irq, &latency);
188		update_sum(&data->hist[cpu].sum_irq, &latency);
189		update_max(&data->hist[cpu].max_irq, &latency);
190	} else if (context == 1) {
191		hist = data->hist[cpu].thread;
192		data->hist[cpu].thread_count++;
193		update_min(&data->hist[cpu].min_thread, &latency);
194		update_sum(&data->hist[cpu].sum_thread, &latency);
195		update_max(&data->hist[cpu].max_thread, &latency);
196	} else { /* user */
197		hist = data->hist[cpu].user;
198		data->hist[cpu].user_count++;
199		update_min(&data->hist[cpu].min_user, &latency);
200		update_sum(&data->hist[cpu].sum_user, &latency);
201		update_max(&data->hist[cpu].max_user, &latency);
202	}
203
204	if (bucket < entries)
205		hist[bucket]++;
206	else
207		hist[entries]++;
208}
209
210/*
211 * timerlat_hist_handler - this is the handler for timerlat tracer events
212 */
213static int
214timerlat_hist_handler(struct trace_seq *s, struct tep_record *record,
215		     struct tep_event *event, void *data)
216{
217	struct trace_instance *trace = data;
218	unsigned long long context, latency;
219	struct osnoise_tool *tool;
220	int cpu = record->cpu;
221
222	tool = container_of(trace, struct osnoise_tool, trace);
223
224	tep_get_field_val(s, event, "context", record, &context, 1);
225	tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
226
227	timerlat_hist_update(tool, cpu, context, latency);
228
229	return 0;
230}
231
232/*
233 * timerlat_hist_header - print the header of the tracer to the output
234 */
235static void timerlat_hist_header(struct osnoise_tool *tool)
236{
237	struct timerlat_hist_params *params = tool->params;
238	struct timerlat_hist_data *data = tool->data;
239	struct trace_seq *s = tool->trace.seq;
240	char duration[26];
241	int cpu;
242
243	if (params->no_header)
244		return;
245
246	get_duration(tool->start_time, duration, sizeof(duration));
247	trace_seq_printf(s, "# RTLA timerlat histogram\n");
248	trace_seq_printf(s, "# Time unit is %s (%s)\n",
249			params->output_divisor == 1 ? "nanoseconds" : "microseconds",
250			params->output_divisor == 1 ? "ns" : "us");
251
252	trace_seq_printf(s, "# Duration: %s\n", duration);
253
254	if (!params->no_index)
255		trace_seq_printf(s, "Index");
256
257	for (cpu = 0; cpu < data->nr_cpus; cpu++) {
258		if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
259			continue;
260
261		if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
262			continue;
263
264		if (!params->no_irq)
265			trace_seq_printf(s, "   IRQ-%03d", cpu);
266
267		if (!params->no_thread)
268			trace_seq_printf(s, "   Thr-%03d", cpu);
269
270		if (params->user_hist)
271			trace_seq_printf(s, "   Usr-%03d", cpu);
272	}
273	trace_seq_printf(s, "\n");
274
275
276	trace_seq_do_printf(s);
277	trace_seq_reset(s);
278}
279
280/*
281 * timerlat_print_summary - print the summary of the hist data to the output
282 */
283static void
284timerlat_print_summary(struct timerlat_hist_params *params,
285		       struct trace_instance *trace,
286		       struct timerlat_hist_data *data)
287{
288	int cpu;
289
290	if (params->no_summary)
291		return;
292
293	if (!params->no_index)
294		trace_seq_printf(trace->seq, "count:");
295
296	for (cpu = 0; cpu < data->nr_cpus; cpu++) {
297		if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
298			continue;
299
300		if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
301			continue;
302
303		if (!params->no_irq)
304			trace_seq_printf(trace->seq, "%9d ",
305					data->hist[cpu].irq_count);
306
307		if (!params->no_thread)
308			trace_seq_printf(trace->seq, "%9d ",
309					data->hist[cpu].thread_count);
310
311		if (params->user_hist)
312			trace_seq_printf(trace->seq, "%9d ",
313					 data->hist[cpu].user_count);
314	}
315	trace_seq_printf(trace->seq, "\n");
316
317	if (!params->no_index)
318		trace_seq_printf(trace->seq, "min:  ");
319
320	for (cpu = 0; cpu < data->nr_cpus; cpu++) {
321		if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
322			continue;
323
324		if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
325			continue;
326
327		if (!params->no_irq)
328			trace_seq_printf(trace->seq, "%9llu ",
329					data->hist[cpu].min_irq);
330
331		if (!params->no_thread)
332			trace_seq_printf(trace->seq, "%9llu ",
333					data->hist[cpu].min_thread);
334
335		if (params->user_hist)
336			trace_seq_printf(trace->seq, "%9llu ",
337					data->hist[cpu].min_user);
338	}
339	trace_seq_printf(trace->seq, "\n");
340
341	if (!params->no_index)
342		trace_seq_printf(trace->seq, "avg:  ");
343
344	for (cpu = 0; cpu < data->nr_cpus; cpu++) {
345		if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
346			continue;
347
348		if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
349			continue;
350
351		if (!params->no_irq) {
352			if (data->hist[cpu].irq_count)
353				trace_seq_printf(trace->seq, "%9llu ",
354						 data->hist[cpu].sum_irq / data->hist[cpu].irq_count);
355			else
356				trace_seq_printf(trace->seq, "        - ");
357		}
358
359		if (!params->no_thread) {
360			if (data->hist[cpu].thread_count)
361				trace_seq_printf(trace->seq, "%9llu ",
362						 data->hist[cpu].sum_thread / data->hist[cpu].thread_count);
363			else
364				trace_seq_printf(trace->seq, "        - ");
365		}
366
367		if (params->user_hist) {
368			if (data->hist[cpu].user_count)
369				trace_seq_printf(trace->seq, "%9llu ",
370						 data->hist[cpu].sum_user / data->hist[cpu].user_count);
371			else
372				trace_seq_printf(trace->seq, "        - ");
373		}
374	}
375	trace_seq_printf(trace->seq, "\n");
376
377	if (!params->no_index)
378		trace_seq_printf(trace->seq, "max:  ");
379
380	for (cpu = 0; cpu < data->nr_cpus; cpu++) {
381		if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
382			continue;
383
384		if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
385			continue;
386
387		if (!params->no_irq)
388			trace_seq_printf(trace->seq, "%9llu ",
389					data->hist[cpu].max_irq);
390
391		if (!params->no_thread)
392			trace_seq_printf(trace->seq, "%9llu ",
393					data->hist[cpu].max_thread);
394
395		if (params->user_hist)
396			trace_seq_printf(trace->seq, "%9llu ",
397					data->hist[cpu].max_user);
398	}
399	trace_seq_printf(trace->seq, "\n");
400	trace_seq_do_printf(trace->seq);
401	trace_seq_reset(trace->seq);
402}
403
404/*
405 * timerlat_print_stats - print data for all CPUs
406 */
407static void
408timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool)
409{
410	struct timerlat_hist_data *data = tool->data;
411	struct trace_instance *trace = &tool->trace;
412	int bucket, cpu;
413	int total;
414
415	timerlat_hist_header(tool);
416
417	for (bucket = 0; bucket < data->entries; bucket++) {
418		total = 0;
419
420		if (!params->no_index)
421			trace_seq_printf(trace->seq, "%-6d",
422					 bucket * data->bucket_size);
423
424		for (cpu = 0; cpu < data->nr_cpus; cpu++) {
425			if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
426				continue;
427
428			if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
429				continue;
430
431			if (!params->no_irq) {
432				total += data->hist[cpu].irq[bucket];
433				trace_seq_printf(trace->seq, "%9d ",
434						data->hist[cpu].irq[bucket]);
435			}
436
437			if (!params->no_thread) {
438				total += data->hist[cpu].thread[bucket];
439				trace_seq_printf(trace->seq, "%9d ",
440						data->hist[cpu].thread[bucket]);
441			}
442
443			if (params->user_hist) {
444				total += data->hist[cpu].user[bucket];
445				trace_seq_printf(trace->seq, "%9d ",
446						data->hist[cpu].user[bucket]);
447			}
448
449		}
450
451		if (total == 0 && !params->with_zeros) {
452			trace_seq_reset(trace->seq);
453			continue;
454		}
455
456		trace_seq_printf(trace->seq, "\n");
457		trace_seq_do_printf(trace->seq);
458		trace_seq_reset(trace->seq);
459	}
460
461	if (!params->no_index)
462		trace_seq_printf(trace->seq, "over: ");
463
464	for (cpu = 0; cpu < data->nr_cpus; cpu++) {
465		if (params->cpus && !CPU_ISSET(cpu, &params->monitored_cpus))
466			continue;
467
468		if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
469			continue;
470
471		if (!params->no_irq)
472			trace_seq_printf(trace->seq, "%9d ",
473					 data->hist[cpu].irq[data->entries]);
474
475		if (!params->no_thread)
476			trace_seq_printf(trace->seq, "%9d ",
477					 data->hist[cpu].thread[data->entries]);
478
479		if (params->user_hist)
480			trace_seq_printf(trace->seq, "%9d ",
481					 data->hist[cpu].user[data->entries]);
482	}
483	trace_seq_printf(trace->seq, "\n");
484	trace_seq_do_printf(trace->seq);
485	trace_seq_reset(trace->seq);
486
487	timerlat_print_summary(params, trace, data);
488}
489
490/*
491 * timerlat_hist_usage - prints timerlat top usage message
492 */
493static void timerlat_hist_usage(char *usage)
494{
495	int i;
496
497	char *msg[] = {
498		"",
499		"  usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] \\",
500		"         [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\",
501		"	  [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\",
502		"	  [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u]",
503		"",
504		"	  -h/--help: print this menu",
505		"	  -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit",
506		"	  -p/--period us: timerlat period in us",
507		"	  -i/--irq us: stop trace if the irq latency is higher than the argument in us",
508		"	  -T/--thread us: stop trace if the thread latency is higher than the argument in us",
509		"	  -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us",
510		"	  -c/--cpus cpus: run the tracer only on the given cpus",
511		"	  -H/--house-keeping cpus: run rtla control threads only on the given cpus",
512		"	  -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited",
513		"	  -d/--duration time[m|h|d]: duration of the session in seconds",
514		"	     --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)",
515		"	  -D/--debug: print debug info",
516		"	  -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
517		"	  -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed",
518		"	     --filter <filter>: enable a trace event filter to the previous -e event",
519		"	     --trigger <trigger>: enable a trace event trigger to the previous -e event",
520		"	  -n/--nano: display data in nanoseconds",
521		"	     --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage",
522		"	  -b/--bucket-size N: set the histogram bucket size (default 1)",
523		"	  -E/--entries N: set the number of entries of the histogram (default 256)",
524		"	     --no-irq: ignore IRQ latencies",
525		"	     --no-thread: ignore thread latencies",
526		"	     --no-header: do not print header",
527		"	     --no-summary: do not print summary",
528		"	     --no-index: do not print index",
529		"	     --with-zeros: print zero only entries",
530		"	     --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency",
531		"	  -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
532		"		o:prio - use SCHED_OTHER with prio",
533		"		r:prio - use SCHED_RR with prio",
534		"		f:prio - use SCHED_FIFO with prio",
535		"		d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
536		"						       in nanoseconds",
537		"	  -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads",
538		"	  -U/--user-load: enable timerlat for user-defined user-space workload",
539		NULL,
540	};
541
542	if (usage)
543		fprintf(stderr, "%s\n", usage);
544
545	fprintf(stderr, "rtla timerlat hist: a per-cpu histogram of the timer latency (version %s)\n",
546			VERSION);
547
548	for (i = 0; msg[i]; i++)
549		fprintf(stderr, "%s\n", msg[i]);
550
551	if (usage)
552		exit(EXIT_FAILURE);
553
554	exit(EXIT_SUCCESS);
555}
556
557/*
558 * timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters
559 */
560static struct timerlat_hist_params
561*timerlat_hist_parse_args(int argc, char *argv[])
562{
563	struct timerlat_hist_params *params;
564	struct trace_events *tevent;
565	int auto_thresh;
566	int retval;
567	int c;
568
569	params = calloc(1, sizeof(*params));
570	if (!params)
571		exit(1);
572
573	/* disabled by default */
574	params->dma_latency = -1;
575
576	/* display data in microseconds */
577	params->output_divisor = 1000;
578	params->bucket_size = 1;
579	params->entries = 256;
580
581	while (1) {
582		static struct option long_options[] = {
583			{"auto",		required_argument,	0, 'a'},
584			{"cpus",		required_argument,	0, 'c'},
585			{"cgroup",		optional_argument,	0, 'C'},
586			{"bucket-size",		required_argument,	0, 'b'},
587			{"debug",		no_argument,		0, 'D'},
588			{"entries",		required_argument,	0, 'E'},
589			{"duration",		required_argument,	0, 'd'},
590			{"house-keeping",	required_argument,	0, 'H'},
591			{"help",		no_argument,		0, 'h'},
592			{"irq",			required_argument,	0, 'i'},
593			{"nano",		no_argument,		0, 'n'},
594			{"period",		required_argument,	0, 'p'},
595			{"priority",		required_argument,	0, 'P'},
596			{"stack",		required_argument,	0, 's'},
597			{"thread",		required_argument,	0, 'T'},
598			{"trace",		optional_argument,	0, 't'},
599			{"user-threads",	no_argument,		0, 'u'},
600			{"user-load",		no_argument,		0, 'U'},
601			{"event",		required_argument,	0, 'e'},
602			{"no-irq",		no_argument,		0, '0'},
603			{"no-thread",		no_argument,		0, '1'},
604			{"no-header",		no_argument,		0, '2'},
605			{"no-summary",		no_argument,		0, '3'},
606			{"no-index",		no_argument,		0, '4'},
607			{"with-zeros",		no_argument,		0, '5'},
608			{"trigger",		required_argument,	0, '6'},
609			{"filter",		required_argument,	0, '7'},
610			{"dma-latency",		required_argument,	0, '8'},
611			{"no-aa",		no_argument,		0, '9'},
612			{"dump-task",		no_argument,		0, '\1'},
613			{0, 0, 0, 0}
614		};
615
616		/* getopt_long stores the option index here. */
617		int option_index = 0;
618
619		c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:np:P:s:t::T:uU0123456:7:8:9\1",
620				 long_options, &option_index);
621
622		/* detect the end of the options. */
623		if (c == -1)
624			break;
625
626		switch (c) {
627		case 'a':
628			auto_thresh = get_llong_from_str(optarg);
629
630			/* set thread stop to auto_thresh */
631			params->stop_total_us = auto_thresh;
632			params->stop_us = auto_thresh;
633
634			/* get stack trace */
635			params->print_stack = auto_thresh;
636
637			/* set trace */
638			params->trace_output = "timerlat_trace.txt";
639
640			break;
641		case 'c':
642			retval = parse_cpu_set(optarg, &params->monitored_cpus);
643			if (retval)
644				timerlat_hist_usage("\nInvalid -c cpu list\n");
645			params->cpus = optarg;
646			break;
647		case 'C':
648			params->cgroup = 1;
649			if (!optarg) {
650				/* will inherit this cgroup */
651				params->cgroup_name = NULL;
652			} else if (*optarg == '=') {
653				/* skip the = */
654				params->cgroup_name = ++optarg;
655			}
656			break;
657		case 'b':
658			params->bucket_size = get_llong_from_str(optarg);
659			if ((params->bucket_size == 0) || (params->bucket_size >= 1000000))
660				timerlat_hist_usage("Bucket size needs to be > 0 and <= 1000000\n");
661			break;
662		case 'D':
663			config_debug = 1;
664			break;
665		case 'd':
666			params->duration = parse_seconds_duration(optarg);
667			if (!params->duration)
668				timerlat_hist_usage("Invalid -D duration\n");
669			break;
670		case 'e':
671			tevent = trace_event_alloc(optarg);
672			if (!tevent) {
673				err_msg("Error alloc trace event");
674				exit(EXIT_FAILURE);
675			}
676
677			if (params->events)
678				tevent->next = params->events;
679
680			params->events = tevent;
681			break;
682		case 'E':
683			params->entries = get_llong_from_str(optarg);
684			if ((params->entries < 10) || (params->entries > 9999999))
685					timerlat_hist_usage("Entries must be > 10 and < 9999999\n");
686			break;
687		case 'h':
688		case '?':
689			timerlat_hist_usage(NULL);
690			break;
691		case 'H':
692			params->hk_cpus = 1;
693			retval = parse_cpu_set(optarg, &params->hk_cpu_set);
694			if (retval) {
695				err_msg("Error parsing house keeping CPUs\n");
696				exit(EXIT_FAILURE);
697			}
698			break;
699		case 'i':
700			params->stop_us = get_llong_from_str(optarg);
701			break;
702		case 'n':
703			params->output_divisor = 1;
704			break;
705		case 'p':
706			params->timerlat_period_us = get_llong_from_str(optarg);
707			if (params->timerlat_period_us > 1000000)
708				timerlat_hist_usage("Period longer than 1 s\n");
709			break;
710		case 'P':
711			retval = parse_prio(optarg, &params->sched_param);
712			if (retval == -1)
713				timerlat_hist_usage("Invalid -P priority");
714			params->set_sched = 1;
715			break;
716		case 's':
717			params->print_stack = get_llong_from_str(optarg);
718			break;
719		case 'T':
720			params->stop_total_us = get_llong_from_str(optarg);
721			break;
722		case 't':
723			if (optarg)
724				/* skip = */
725				params->trace_output = &optarg[1];
726			else
727				params->trace_output = "timerlat_trace.txt";
728			break;
729		case 'u':
730			params->user_workload = 1;
731			/* fallback: -u implies in -U */
732		case 'U':
733			params->user_hist = 1;
734			break;
735		case '0': /* no irq */
736			params->no_irq = 1;
737			break;
738		case '1': /* no thread */
739			params->no_thread = 1;
740			break;
741		case '2': /* no header */
742			params->no_header = 1;
743			break;
744		case '3': /* no summary */
745			params->no_summary = 1;
746			break;
747		case '4': /* no index */
748			params->no_index = 1;
749			break;
750		case '5': /* with zeros */
751			params->with_zeros = 1;
752			break;
753		case '6': /* trigger */
754			if (params->events) {
755				retval = trace_event_add_trigger(params->events, optarg);
756				if (retval) {
757					err_msg("Error adding trigger %s\n", optarg);
758					exit(EXIT_FAILURE);
759				}
760			} else {
761				timerlat_hist_usage("--trigger requires a previous -e\n");
762			}
763			break;
764		case '7': /* filter */
765			if (params->events) {
766				retval = trace_event_add_filter(params->events, optarg);
767				if (retval) {
768					err_msg("Error adding filter %s\n", optarg);
769					exit(EXIT_FAILURE);
770				}
771			} else {
772				timerlat_hist_usage("--filter requires a previous -e\n");
773			}
774			break;
775		case '8':
776			params->dma_latency = get_llong_from_str(optarg);
777			if (params->dma_latency < 0 || params->dma_latency > 10000) {
778				err_msg("--dma-latency needs to be >= 0 and < 10000");
779				exit(EXIT_FAILURE);
780			}
781			break;
782		case '9':
783			params->no_aa = 1;
784			break;
785		case '\1':
786			params->dump_tasks = 1;
787			break;
788		default:
789			timerlat_hist_usage("Invalid option");
790		}
791	}
792
793	if (geteuid()) {
794		err_msg("rtla needs root permission\n");
795		exit(EXIT_FAILURE);
796	}
797
798	if (params->no_irq && params->no_thread)
799		timerlat_hist_usage("no-irq and no-thread set, there is nothing to do here");
800
801	if (params->no_index && !params->with_zeros)
802		timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense");
803
804	/*
805	 * Auto analysis only happens if stop tracing, thus:
806	 */
807	if (!params->stop_us && !params->stop_total_us)
808		params->no_aa = 1;
809
810	return params;
811}
812
813/*
814 * timerlat_hist_apply_config - apply the hist configs to the initialized tool
815 */
816static int
817timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params)
818{
819	int retval, i;
820
821	if (!params->sleep_time)
822		params->sleep_time = 1;
823
824	if (params->cpus) {
825		retval = osnoise_set_cpus(tool->context, params->cpus);
826		if (retval) {
827			err_msg("Failed to apply CPUs config\n");
828			goto out_err;
829		}
830	} else {
831		for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++)
832			CPU_SET(i, &params->monitored_cpus);
833	}
834
835	if (params->stop_us) {
836		retval = osnoise_set_stop_us(tool->context, params->stop_us);
837		if (retval) {
838			err_msg("Failed to set stop us\n");
839			goto out_err;
840		}
841	}
842
843	if (params->stop_total_us) {
844		retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
845		if (retval) {
846			err_msg("Failed to set stop total us\n");
847			goto out_err;
848		}
849	}
850
851	if (params->timerlat_period_us) {
852		retval = osnoise_set_timerlat_period_us(tool->context, params->timerlat_period_us);
853		if (retval) {
854			err_msg("Failed to set timerlat period\n");
855			goto out_err;
856		}
857	}
858
859	if (params->print_stack) {
860		retval = osnoise_set_print_stack(tool->context, params->print_stack);
861		if (retval) {
862			err_msg("Failed to set print stack\n");
863			goto out_err;
864		}
865	}
866
867	if (params->hk_cpus) {
868		retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set),
869					   &params->hk_cpu_set);
870		if (retval == -1) {
871			err_msg("Failed to set rtla to the house keeping CPUs\n");
872			goto out_err;
873		}
874	} else if (params->cpus) {
875		/*
876		 * Even if the user do not set a house-keeping CPU, try to
877		 * move rtla to a CPU set different to the one where the user
878		 * set the workload to run.
879		 *
880		 * No need to check results as this is an automatic attempt.
881		 */
882		auto_house_keeping(&params->monitored_cpus);
883	}
884
885	if (params->user_hist) {
886		retval = osnoise_set_workload(tool->context, 0);
887		if (retval) {
888			err_msg("Failed to set OSNOISE_WORKLOAD option\n");
889			goto out_err;
890		}
891	}
892
893	return 0;
894
895out_err:
896	return -1;
897}
898
899/*
900 * timerlat_init_hist - initialize a timerlat hist tool with parameters
901 */
902static struct osnoise_tool
903*timerlat_init_hist(struct timerlat_hist_params *params)
904{
905	struct osnoise_tool *tool;
906	int nr_cpus;
907
908	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
909
910	tool = osnoise_init_tool("timerlat_hist");
911	if (!tool)
912		return NULL;
913
914	tool->data = timerlat_alloc_histogram(nr_cpus, params->entries, params->bucket_size);
915	if (!tool->data)
916		goto out_err;
917
918	tool->params = params;
919
920	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
921				   timerlat_hist_handler, tool);
922
923	return tool;
924
925out_err:
926	osnoise_destroy_tool(tool);
927	return NULL;
928}
929
930static int stop_tracing;
931static void stop_hist(int sig)
932{
933	stop_tracing = 1;
934}
935
936/*
937 * timerlat_hist_set_signals - handles the signal to stop the tool
938 */
939static void
940timerlat_hist_set_signals(struct timerlat_hist_params *params)
941{
942	signal(SIGINT, stop_hist);
943	if (params->duration) {
944		signal(SIGALRM, stop_hist);
945		alarm(params->duration);
946	}
947}
948
949int timerlat_hist_main(int argc, char *argv[])
950{
951	struct timerlat_hist_params *params;
952	struct osnoise_tool *record = NULL;
953	struct timerlat_u_params params_u;
954	struct osnoise_tool *tool = NULL;
955	struct osnoise_tool *aa = NULL;
956	struct trace_instance *trace;
957	int dma_latency_fd = -1;
958	int return_value = 1;
959	pthread_t timerlat_u;
960	int retval;
961
962	params = timerlat_hist_parse_args(argc, argv);
963	if (!params)
964		exit(1);
965
966	tool = timerlat_init_hist(params);
967	if (!tool) {
968		err_msg("Could not init osnoise hist\n");
969		goto out_exit;
970	}
971
972	retval = timerlat_hist_apply_config(tool, params);
973	if (retval) {
974		err_msg("Could not apply config\n");
975		goto out_free;
976	}
977
978	trace = &tool->trace;
979
980	retval = enable_timerlat(trace);
981	if (retval) {
982		err_msg("Failed to enable timerlat tracer\n");
983		goto out_free;
984	}
985
986	if (params->set_sched) {
987		retval = set_comm_sched_attr("timerlat/", &params->sched_param);
988		if (retval) {
989			err_msg("Failed to set sched parameters\n");
990			goto out_free;
991		}
992	}
993
994	if (params->cgroup && !params->user_workload) {
995		retval = set_comm_cgroup("timerlat/", params->cgroup_name);
996		if (!retval) {
997			err_msg("Failed to move threads to cgroup\n");
998			goto out_free;
999		}
1000	}
1001
1002	if (params->dma_latency >= 0) {
1003		dma_latency_fd = set_cpu_dma_latency(params->dma_latency);
1004		if (dma_latency_fd < 0) {
1005			err_msg("Could not set /dev/cpu_dma_latency.\n");
1006			goto out_free;
1007		}
1008	}
1009
1010	if (params->trace_output) {
1011		record = osnoise_init_trace_tool("timerlat");
1012		if (!record) {
1013			err_msg("Failed to enable the trace instance\n");
1014			goto out_free;
1015		}
1016
1017		if (params->events) {
1018			retval = trace_events_enable(&record->trace, params->events);
1019			if (retval)
1020				goto out_hist;
1021		}
1022	}
1023
1024	if (!params->no_aa) {
1025		aa = osnoise_init_tool("timerlat_aa");
1026		if (!aa)
1027			goto out_hist;
1028
1029		retval = timerlat_aa_init(aa, params->dump_tasks);
1030		if (retval) {
1031			err_msg("Failed to enable the auto analysis instance\n");
1032			goto out_hist;
1033		}
1034
1035		retval = enable_timerlat(&aa->trace);
1036		if (retval) {
1037			err_msg("Failed to enable timerlat tracer\n");
1038			goto out_hist;
1039		}
1040	}
1041
1042	/*
1043	 * Start the tracers here, after having set all instances.
1044	 *
1045	 * Let the trace instance start first for the case of hitting a stop
1046	 * tracing while enabling other instances. The trace instance is the
1047	 * one with most valuable information.
1048	 */
1049	if (params->trace_output)
1050		trace_instance_start(&record->trace);
1051	if (!params->no_aa)
1052		trace_instance_start(&aa->trace);
1053	trace_instance_start(trace);
1054
1055	tool->start_time = time(NULL);
1056	timerlat_hist_set_signals(params);
1057
1058	if (params->user_workload) {
1059		/* rtla asked to stop */
1060		params_u.should_run = 1;
1061		/* all threads left */
1062		params_u.stopped_running = 0;
1063
1064		params_u.set = &params->monitored_cpus;
1065		if (params->set_sched)
1066			params_u.sched_param = &params->sched_param;
1067		else
1068			params_u.sched_param = NULL;
1069
1070		params_u.cgroup_name = params->cgroup_name;
1071
1072		retval = pthread_create(&timerlat_u, NULL, timerlat_u_dispatcher, &params_u);
1073		if (retval)
1074			err_msg("Error creating timerlat user-space threads\n");
1075	}
1076
1077	while (!stop_tracing) {
1078		sleep(params->sleep_time);
1079
1080		retval = tracefs_iterate_raw_events(trace->tep,
1081						    trace->inst,
1082						    NULL,
1083						    0,
1084						    collect_registered_events,
1085						    trace);
1086		if (retval < 0) {
1087			err_msg("Error iterating on events\n");
1088			goto out_hist;
1089		}
1090
1091		if (trace_is_off(&tool->trace, &record->trace))
1092			break;
1093
1094		/* is there still any user-threads ? */
1095		if (params->user_workload) {
1096			if (params_u.stopped_running) {
1097				debug_msg("timerlat user-space threads stopped!\n");
1098				break;
1099			}
1100		}
1101	}
1102	if (params->user_workload && !params_u.stopped_running) {
1103		params_u.should_run = 0;
1104		sleep(1);
1105	}
1106
1107	timerlat_print_stats(params, tool);
1108
1109	return_value = 0;
1110
1111	if (trace_is_off(&tool->trace, &record->trace)) {
1112		printf("rtla timerlat hit stop tracing\n");
1113
1114		if (!params->no_aa)
1115			timerlat_auto_analysis(params->stop_us, params->stop_total_us);
1116
1117		if (params->trace_output) {
1118			printf("  Saving trace to %s\n", params->trace_output);
1119			save_trace_to_file(record->trace.inst, params->trace_output);
1120		}
1121	}
1122
1123out_hist:
1124	timerlat_aa_destroy();
1125	if (dma_latency_fd >= 0)
1126		close(dma_latency_fd);
1127	trace_events_destroy(&record->trace, params->events);
1128	params->events = NULL;
1129out_free:
1130	timerlat_free_histogram(tool->data);
1131	osnoise_destroy_tool(aa);
1132	osnoise_destroy_tool(record);
1133	osnoise_destroy_tool(tool);
1134	free(params);
1135out_exit:
1136	exit(return_value);
1137}
1138