1// SPDX-License-Identifier: GPL-2.0
2/*
3 *
4 * Function graph tracer.
5 * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
6 * Mostly borrowed from function tracer which
7 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
8 *
9 */
10#include <linux/uaccess.h>
11#include <linux/ftrace.h>
12#include <linux/interrupt.h>
13#include <linux/slab.h>
14#include <linux/fs.h>
15
16#include "trace.h"
17#include "trace_output.h"
18
19/* When set, irq functions will be ignored */
20static int ftrace_graph_skip_irqs;
21
22struct fgraph_cpu_data {
23	pid_t		last_pid;
24	int		depth;
25	int		depth_irq;
26	int		ignore;
27	unsigned long	enter_funcs[FTRACE_RETFUNC_DEPTH];
28};
29
30struct fgraph_data {
31	struct fgraph_cpu_data __percpu *cpu_data;
32
33	/* Place to preserve last processed entry. */
34	struct ftrace_graph_ent_entry	ent;
35	struct ftrace_graph_ret_entry	ret;
36	int				failed;
37	int				cpu;
38};
39
40#define TRACE_GRAPH_INDENT	2
41
42unsigned int fgraph_max_depth;
43
44static struct tracer_opt trace_opts[] = {
45	/* Display overruns? (for self-debug purpose) */
46	{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
47	/* Display CPU ? */
48	{ TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
49	/* Display Overhead ? */
50	{ TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
51	/* Display proc name/pid */
52	{ TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
53	/* Display duration of execution */
54	{ TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
55	/* Display absolute time of an entry */
56	{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
57	/* Display interrupts */
58	{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
59	/* Display function name after trailing } */
60	{ TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
61#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
62	/* Display function return value ? */
63	{ TRACER_OPT(funcgraph-retval, TRACE_GRAPH_PRINT_RETVAL) },
64	/* Display function return value in hexadecimal format ? */
65	{ TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) },
66#endif
67	/* Include sleep time (scheduled out) between entry and return */
68	{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
69
70#ifdef CONFIG_FUNCTION_PROFILER
71	/* Include time within nested functions */
72	{ TRACER_OPT(graph-time, TRACE_GRAPH_GRAPH_TIME) },
73#endif
74
75	{ } /* Empty entry */
76};
77
78static struct tracer_flags tracer_flags = {
79	/* Don't display overruns, proc, or tail by default */
80	.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
81	       TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS |
82	       TRACE_GRAPH_SLEEP_TIME | TRACE_GRAPH_GRAPH_TIME,
83	.opts = trace_opts
84};
85
86static struct trace_array *graph_array;
87
88/*
89 * DURATION column is being also used to display IRQ signs,
90 * following values are used by print_graph_irq and others
91 * to fill in space into DURATION column.
92 */
93enum {
94	FLAGS_FILL_FULL  = 1 << TRACE_GRAPH_PRINT_FILL_SHIFT,
95	FLAGS_FILL_START = 2 << TRACE_GRAPH_PRINT_FILL_SHIFT,
96	FLAGS_FILL_END   = 3 << TRACE_GRAPH_PRINT_FILL_SHIFT,
97};
98
99static void
100print_graph_duration(struct trace_array *tr, unsigned long long duration,
101		     struct trace_seq *s, u32 flags);
102
103int __trace_graph_entry(struct trace_array *tr,
104				struct ftrace_graph_ent *trace,
105				unsigned int trace_ctx)
106{
107	struct trace_event_call *call = &event_funcgraph_entry;
108	struct ring_buffer_event *event;
109	struct trace_buffer *buffer = tr->array_buffer.buffer;
110	struct ftrace_graph_ent_entry *entry;
111
112	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
113					  sizeof(*entry), trace_ctx);
114	if (!event)
115		return 0;
116	entry	= ring_buffer_event_data(event);
117	entry->graph_ent			= *trace;
118	if (!call_filter_check_discard(call, entry, buffer, event))
119		trace_buffer_unlock_commit_nostack(buffer, event);
120
121	return 1;
122}
123
124static inline int ftrace_graph_ignore_irqs(void)
125{
126	if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))
127		return 0;
128
129	return in_hardirq();
130}
131
132int trace_graph_entry(struct ftrace_graph_ent *trace)
133{
134	struct trace_array *tr = graph_array;
135	struct trace_array_cpu *data;
136	unsigned long flags;
137	unsigned int trace_ctx;
138	long disabled;
139	int ret;
140	int cpu;
141
142	if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT))
143		return 0;
144
145	/*
146	 * Do not trace a function if it's filtered by set_graph_notrace.
147	 * Make the index of ret stack negative to indicate that it should
148	 * ignore further functions.  But it needs its own ret stack entry
149	 * to recover the original index in order to continue tracing after
150	 * returning from the function.
151	 */
152	if (ftrace_graph_notrace_addr(trace->func)) {
153		trace_recursion_set(TRACE_GRAPH_NOTRACE_BIT);
154		/*
155		 * Need to return 1 to have the return called
156		 * that will clear the NOTRACE bit.
157		 */
158		return 1;
159	}
160
161	if (!ftrace_trace_task(tr))
162		return 0;
163
164	if (ftrace_graph_ignore_func(trace))
165		return 0;
166
167	if (ftrace_graph_ignore_irqs())
168		return 0;
169
170	/*
171	 * Stop here if tracing_threshold is set. We only write function return
172	 * events to the ring buffer.
173	 */
174	if (tracing_thresh)
175		return 1;
176
177	local_irq_save(flags);
178	cpu = raw_smp_processor_id();
179	data = per_cpu_ptr(tr->array_buffer.data, cpu);
180	disabled = atomic_inc_return(&data->disabled);
181	if (likely(disabled == 1)) {
182		trace_ctx = tracing_gen_ctx_flags(flags);
183		ret = __trace_graph_entry(tr, trace, trace_ctx);
184	} else {
185		ret = 0;
186	}
187
188	atomic_dec(&data->disabled);
189	local_irq_restore(flags);
190
191	return ret;
192}
193
194static void
195__trace_graph_function(struct trace_array *tr,
196		unsigned long ip, unsigned int trace_ctx)
197{
198	u64 time = trace_clock_local();
199	struct ftrace_graph_ent ent = {
200		.func  = ip,
201		.depth = 0,
202	};
203	struct ftrace_graph_ret ret = {
204		.func     = ip,
205		.depth    = 0,
206		.calltime = time,
207		.rettime  = time,
208	};
209
210	__trace_graph_entry(tr, &ent, trace_ctx);
211	__trace_graph_return(tr, &ret, trace_ctx);
212}
213
214void
215trace_graph_function(struct trace_array *tr,
216		unsigned long ip, unsigned long parent_ip,
217		unsigned int trace_ctx)
218{
219	__trace_graph_function(tr, ip, trace_ctx);
220}
221
222void __trace_graph_return(struct trace_array *tr,
223				struct ftrace_graph_ret *trace,
224				unsigned int trace_ctx)
225{
226	struct trace_event_call *call = &event_funcgraph_exit;
227	struct ring_buffer_event *event;
228	struct trace_buffer *buffer = tr->array_buffer.buffer;
229	struct ftrace_graph_ret_entry *entry;
230
231	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
232					  sizeof(*entry), trace_ctx);
233	if (!event)
234		return;
235	entry	= ring_buffer_event_data(event);
236	entry->ret				= *trace;
237	if (!call_filter_check_discard(call, entry, buffer, event))
238		trace_buffer_unlock_commit_nostack(buffer, event);
239}
240
241void trace_graph_return(struct ftrace_graph_ret *trace)
242{
243	struct trace_array *tr = graph_array;
244	struct trace_array_cpu *data;
245	unsigned long flags;
246	unsigned int trace_ctx;
247	long disabled;
248	int cpu;
249
250	ftrace_graph_addr_finish(trace);
251
252	if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
253		trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
254		return;
255	}
256
257	local_irq_save(flags);
258	cpu = raw_smp_processor_id();
259	data = per_cpu_ptr(tr->array_buffer.data, cpu);
260	disabled = atomic_inc_return(&data->disabled);
261	if (likely(disabled == 1)) {
262		trace_ctx = tracing_gen_ctx_flags(flags);
263		__trace_graph_return(tr, trace, trace_ctx);
264	}
265	atomic_dec(&data->disabled);
266	local_irq_restore(flags);
267}
268
269void set_graph_array(struct trace_array *tr)
270{
271	graph_array = tr;
272
273	/* Make graph_array visible before we start tracing */
274
275	smp_mb();
276}
277
278static void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
279{
280	ftrace_graph_addr_finish(trace);
281
282	if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
283		trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
284		return;
285	}
286
287	if (tracing_thresh &&
288	    (trace->rettime - trace->calltime < tracing_thresh))
289		return;
290	else
291		trace_graph_return(trace);
292}
293
294static struct fgraph_ops funcgraph_thresh_ops = {
295	.entryfunc = &trace_graph_entry,
296	.retfunc = &trace_graph_thresh_return,
297};
298
299static struct fgraph_ops funcgraph_ops = {
300	.entryfunc = &trace_graph_entry,
301	.retfunc = &trace_graph_return,
302};
303
304static int graph_trace_init(struct trace_array *tr)
305{
306	int ret;
307
308	set_graph_array(tr);
309	if (tracing_thresh)
310		ret = register_ftrace_graph(&funcgraph_thresh_ops);
311	else
312		ret = register_ftrace_graph(&funcgraph_ops);
313	if (ret)
314		return ret;
315	tracing_start_cmdline_record();
316
317	return 0;
318}
319
320static void graph_trace_reset(struct trace_array *tr)
321{
322	tracing_stop_cmdline_record();
323	if (tracing_thresh)
324		unregister_ftrace_graph(&funcgraph_thresh_ops);
325	else
326		unregister_ftrace_graph(&funcgraph_ops);
327}
328
329static int graph_trace_update_thresh(struct trace_array *tr)
330{
331	graph_trace_reset(tr);
332	return graph_trace_init(tr);
333}
334
335static int max_bytes_for_cpu;
336
337static void print_graph_cpu(struct trace_seq *s, int cpu)
338{
339	/*
340	 * Start with a space character - to make it stand out
341	 * to the right a bit when trace output is pasted into
342	 * email:
343	 */
344	trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu);
345}
346
347#define TRACE_GRAPH_PROCINFO_LENGTH	14
348
349static void print_graph_proc(struct trace_seq *s, pid_t pid)
350{
351	char comm[TASK_COMM_LEN];
352	/* sign + log10(MAX_INT) + '\0' */
353	char pid_str[11];
354	int spaces = 0;
355	int len;
356	int i;
357
358	trace_find_cmdline(pid, comm);
359	comm[7] = '\0';
360	sprintf(pid_str, "%d", pid);
361
362	/* 1 stands for the "-" character */
363	len = strlen(comm) + strlen(pid_str) + 1;
364
365	if (len < TRACE_GRAPH_PROCINFO_LENGTH)
366		spaces = TRACE_GRAPH_PROCINFO_LENGTH - len;
367
368	/* First spaces to align center */
369	for (i = 0; i < spaces / 2; i++)
370		trace_seq_putc(s, ' ');
371
372	trace_seq_printf(s, "%s-%s", comm, pid_str);
373
374	/* Last spaces to align center */
375	for (i = 0; i < spaces - (spaces / 2); i++)
376		trace_seq_putc(s, ' ');
377}
378
379
380static void print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
381{
382	trace_seq_putc(s, ' ');
383	trace_print_lat_fmt(s, entry);
384	trace_seq_puts(s, " | ");
385}
386
387/* If the pid changed since the last trace, output this event */
388static void
389verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
390{
391	pid_t prev_pid;
392	pid_t *last_pid;
393
394	if (!data)
395		return;
396
397	last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
398
399	if (*last_pid == pid)
400		return;
401
402	prev_pid = *last_pid;
403	*last_pid = pid;
404
405	if (prev_pid == -1)
406		return;
407/*
408 * Context-switch trace line:
409
410 ------------------------------------------
411 | 1)  migration/0--1  =>  sshd-1755
412 ------------------------------------------
413
414 */
415	trace_seq_puts(s, " ------------------------------------------\n");
416	print_graph_cpu(s, cpu);
417	print_graph_proc(s, prev_pid);
418	trace_seq_puts(s, " => ");
419	print_graph_proc(s, pid);
420	trace_seq_puts(s, "\n ------------------------------------------\n\n");
421}
422
423static struct ftrace_graph_ret_entry *
424get_return_for_leaf(struct trace_iterator *iter,
425		struct ftrace_graph_ent_entry *curr)
426{
427	struct fgraph_data *data = iter->private;
428	struct ring_buffer_iter *ring_iter = NULL;
429	struct ring_buffer_event *event;
430	struct ftrace_graph_ret_entry *next;
431
432	/*
433	 * If the previous output failed to write to the seq buffer,
434	 * then we just reuse the data from before.
435	 */
436	if (data && data->failed) {
437		curr = &data->ent;
438		next = &data->ret;
439	} else {
440
441		ring_iter = trace_buffer_iter(iter, iter->cpu);
442
443		/* First peek to compare current entry and the next one */
444		if (ring_iter)
445			event = ring_buffer_iter_peek(ring_iter, NULL);
446		else {
447			/*
448			 * We need to consume the current entry to see
449			 * the next one.
450			 */
451			ring_buffer_consume(iter->array_buffer->buffer, iter->cpu,
452					    NULL, NULL);
453			event = ring_buffer_peek(iter->array_buffer->buffer, iter->cpu,
454						 NULL, NULL);
455		}
456
457		if (!event)
458			return NULL;
459
460		next = ring_buffer_event_data(event);
461
462		if (data) {
463			/*
464			 * Save current and next entries for later reference
465			 * if the output fails.
466			 */
467			data->ent = *curr;
468			/*
469			 * If the next event is not a return type, then
470			 * we only care about what type it is. Otherwise we can
471			 * safely copy the entire event.
472			 */
473			if (next->ent.type == TRACE_GRAPH_RET)
474				data->ret = *next;
475			else
476				data->ret.ent.type = next->ent.type;
477		}
478	}
479
480	if (next->ent.type != TRACE_GRAPH_RET)
481		return NULL;
482
483	if (curr->ent.pid != next->ent.pid ||
484			curr->graph_ent.func != next->ret.func)
485		return NULL;
486
487	/* this is a leaf, now advance the iterator */
488	if (ring_iter)
489		ring_buffer_iter_advance(ring_iter);
490
491	return next;
492}
493
494static void print_graph_abs_time(u64 t, struct trace_seq *s)
495{
496	unsigned long usecs_rem;
497
498	usecs_rem = do_div(t, NSEC_PER_SEC);
499	usecs_rem /= 1000;
500
501	trace_seq_printf(s, "%5lu.%06lu |  ",
502			 (unsigned long)t, usecs_rem);
503}
504
505static void
506print_graph_rel_time(struct trace_iterator *iter, struct trace_seq *s)
507{
508	unsigned long long usecs;
509
510	usecs = iter->ts - iter->array_buffer->time_start;
511	do_div(usecs, NSEC_PER_USEC);
512
513	trace_seq_printf(s, "%9llu us |  ", usecs);
514}
515
516static void
517print_graph_irq(struct trace_iterator *iter, unsigned long addr,
518		enum trace_type type, int cpu, pid_t pid, u32 flags)
519{
520	struct trace_array *tr = iter->tr;
521	struct trace_seq *s = &iter->seq;
522	struct trace_entry *ent = iter->ent;
523
524	if (addr < (unsigned long)__irqentry_text_start ||
525		addr >= (unsigned long)__irqentry_text_end)
526		return;
527
528	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
529		/* Absolute time */
530		if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
531			print_graph_abs_time(iter->ts, s);
532
533		/* Relative time */
534		if (flags & TRACE_GRAPH_PRINT_REL_TIME)
535			print_graph_rel_time(iter, s);
536
537		/* Cpu */
538		if (flags & TRACE_GRAPH_PRINT_CPU)
539			print_graph_cpu(s, cpu);
540
541		/* Proc */
542		if (flags & TRACE_GRAPH_PRINT_PROC) {
543			print_graph_proc(s, pid);
544			trace_seq_puts(s, " | ");
545		}
546
547		/* Latency format */
548		if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
549			print_graph_lat_fmt(s, ent);
550	}
551
552	/* No overhead */
553	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_START);
554
555	if (type == TRACE_GRAPH_ENT)
556		trace_seq_puts(s, "==========>");
557	else
558		trace_seq_puts(s, "<==========");
559
560	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_END);
561	trace_seq_putc(s, '\n');
562}
563
564void
565trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
566{
567	unsigned long nsecs_rem = do_div(duration, 1000);
568	/* log10(ULONG_MAX) + '\0' */
569	char usecs_str[21];
570	char nsecs_str[5];
571	int len;
572	int i;
573
574	sprintf(usecs_str, "%lu", (unsigned long) duration);
575
576	/* Print msecs */
577	trace_seq_printf(s, "%s", usecs_str);
578
579	len = strlen(usecs_str);
580
581	/* Print nsecs (we don't want to exceed 7 numbers) */
582	if (len < 7) {
583		size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len);
584
585		snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
586		trace_seq_printf(s, ".%s", nsecs_str);
587		len += strlen(nsecs_str) + 1;
588	}
589
590	trace_seq_puts(s, " us ");
591
592	/* Print remaining spaces to fit the row's width */
593	for (i = len; i < 8; i++)
594		trace_seq_putc(s, ' ');
595}
596
597static void
598print_graph_duration(struct trace_array *tr, unsigned long long duration,
599		     struct trace_seq *s, u32 flags)
600{
601	if (!(flags & TRACE_GRAPH_PRINT_DURATION) ||
602	    !(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
603		return;
604
605	/* No real adata, just filling the column with spaces */
606	switch (flags & TRACE_GRAPH_PRINT_FILL_MASK) {
607	case FLAGS_FILL_FULL:
608		trace_seq_puts(s, "              |  ");
609		return;
610	case FLAGS_FILL_START:
611		trace_seq_puts(s, "  ");
612		return;
613	case FLAGS_FILL_END:
614		trace_seq_puts(s, " |");
615		return;
616	}
617
618	/* Signal a overhead of time execution to the output */
619	if (flags & TRACE_GRAPH_PRINT_OVERHEAD)
620		trace_seq_printf(s, "%c ", trace_find_mark(duration));
621	else
622		trace_seq_puts(s, "  ");
623
624	trace_print_graph_duration(duration, s);
625	trace_seq_puts(s, "|  ");
626}
627
628#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
629
630#define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL
631
632static void print_graph_retval(struct trace_seq *s, unsigned long retval,
633				bool leaf, void *func, bool hex_format)
634{
635	unsigned long err_code = 0;
636
637	if (retval == 0 || hex_format)
638		goto done;
639
640	/* Check if the return value matches the negative format */
641	if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
642		(((u64)retval) >> 32) == 0) {
643		/* sign extension */
644		err_code = (unsigned long)(s32)retval;
645	} else {
646		err_code = retval;
647	}
648
649	if (!IS_ERR_VALUE(err_code))
650		err_code = 0;
651
652done:
653	if (leaf) {
654		if (hex_format || (err_code == 0))
655			trace_seq_printf(s, "%ps(); /* = 0x%lx */\n",
656					func, retval);
657		else
658			trace_seq_printf(s, "%ps(); /* = %ld */\n",
659					func, err_code);
660	} else {
661		if (hex_format || (err_code == 0))
662			trace_seq_printf(s, "} /* %ps = 0x%lx */\n",
663					func, retval);
664		else
665			trace_seq_printf(s, "} /* %ps = %ld */\n",
666					func, err_code);
667	}
668}
669
670#else
671
672#define __TRACE_GRAPH_PRINT_RETVAL 0
673
674#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0)
675
676#endif
677
678/* Case of a leaf function on its call entry */
679static enum print_line_t
680print_graph_entry_leaf(struct trace_iterator *iter,
681		struct ftrace_graph_ent_entry *entry,
682		struct ftrace_graph_ret_entry *ret_entry,
683		struct trace_seq *s, u32 flags)
684{
685	struct fgraph_data *data = iter->private;
686	struct trace_array *tr = iter->tr;
687	struct ftrace_graph_ret *graph_ret;
688	struct ftrace_graph_ent *call;
689	unsigned long long duration;
690	int cpu = iter->cpu;
691	int i;
692
693	graph_ret = &ret_entry->ret;
694	call = &entry->graph_ent;
695	duration = graph_ret->rettime - graph_ret->calltime;
696
697	if (data) {
698		struct fgraph_cpu_data *cpu_data;
699
700		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
701
702		/*
703		 * Comments display at + 1 to depth. Since
704		 * this is a leaf function, keep the comments
705		 * equal to this depth.
706		 */
707		cpu_data->depth = call->depth - 1;
708
709		/* No need to keep this function around for this depth */
710		if (call->depth < FTRACE_RETFUNC_DEPTH &&
711		    !WARN_ON_ONCE(call->depth < 0))
712			cpu_data->enter_funcs[call->depth] = 0;
713	}
714
715	/* Overhead and duration */
716	print_graph_duration(tr, duration, s, flags);
717
718	/* Function */
719	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
720		trace_seq_putc(s, ' ');
721
722	/*
723	 * Write out the function return value if the option function-retval is
724	 * enabled.
725	 */
726	if (flags & __TRACE_GRAPH_PRINT_RETVAL)
727		print_graph_retval(s, graph_ret->retval, true, (void *)call->func,
728				!!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
729	else
730		trace_seq_printf(s, "%ps();\n", (void *)call->func);
731
732	print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
733			cpu, iter->ent->pid, flags);
734
735	return trace_handle_return(s);
736}
737
738static enum print_line_t
739print_graph_entry_nested(struct trace_iterator *iter,
740			 struct ftrace_graph_ent_entry *entry,
741			 struct trace_seq *s, int cpu, u32 flags)
742{
743	struct ftrace_graph_ent *call = &entry->graph_ent;
744	struct fgraph_data *data = iter->private;
745	struct trace_array *tr = iter->tr;
746	int i;
747
748	if (data) {
749		struct fgraph_cpu_data *cpu_data;
750		int cpu = iter->cpu;
751
752		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
753		cpu_data->depth = call->depth;
754
755		/* Save this function pointer to see if the exit matches */
756		if (call->depth < FTRACE_RETFUNC_DEPTH &&
757		    !WARN_ON_ONCE(call->depth < 0))
758			cpu_data->enter_funcs[call->depth] = call->func;
759	}
760
761	/* No time */
762	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_FULL);
763
764	/* Function */
765	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
766		trace_seq_putc(s, ' ');
767
768	trace_seq_printf(s, "%ps() {\n", (void *)call->func);
769
770	if (trace_seq_has_overflowed(s))
771		return TRACE_TYPE_PARTIAL_LINE;
772
773	/*
774	 * we already consumed the current entry to check the next one
775	 * and see if this is a leaf.
776	 */
777	return TRACE_TYPE_NO_CONSUME;
778}
779
780static void
781print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
782		     int type, unsigned long addr, u32 flags)
783{
784	struct fgraph_data *data = iter->private;
785	struct trace_entry *ent = iter->ent;
786	struct trace_array *tr = iter->tr;
787	int cpu = iter->cpu;
788
789	/* Pid */
790	verif_pid(s, ent->pid, cpu, data);
791
792	if (type)
793		/* Interrupt */
794		print_graph_irq(iter, addr, type, cpu, ent->pid, flags);
795
796	if (!(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
797		return;
798
799	/* Absolute time */
800	if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
801		print_graph_abs_time(iter->ts, s);
802
803	/* Relative time */
804	if (flags & TRACE_GRAPH_PRINT_REL_TIME)
805		print_graph_rel_time(iter, s);
806
807	/* Cpu */
808	if (flags & TRACE_GRAPH_PRINT_CPU)
809		print_graph_cpu(s, cpu);
810
811	/* Proc */
812	if (flags & TRACE_GRAPH_PRINT_PROC) {
813		print_graph_proc(s, ent->pid);
814		trace_seq_puts(s, " | ");
815	}
816
817	/* Latency format */
818	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
819		print_graph_lat_fmt(s, ent);
820
821	return;
822}
823
824/*
825 * Entry check for irq code
826 *
827 * returns 1 if
828 *  - we are inside irq code
829 *  - we just entered irq code
830 *
831 * returns 0 if
832 *  - funcgraph-interrupts option is set
833 *  - we are not inside irq code
834 */
835static int
836check_irq_entry(struct trace_iterator *iter, u32 flags,
837		unsigned long addr, int depth)
838{
839	int cpu = iter->cpu;
840	int *depth_irq;
841	struct fgraph_data *data = iter->private;
842
843	/*
844	 * If we are either displaying irqs, or we got called as
845	 * a graph event and private data does not exist,
846	 * then we bypass the irq check.
847	 */
848	if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
849	    (!data))
850		return 0;
851
852	depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
853
854	/*
855	 * We are inside the irq code
856	 */
857	if (*depth_irq >= 0)
858		return 1;
859
860	if ((addr < (unsigned long)__irqentry_text_start) ||
861	    (addr >= (unsigned long)__irqentry_text_end))
862		return 0;
863
864	/*
865	 * We are entering irq code.
866	 */
867	*depth_irq = depth;
868	return 1;
869}
870
871/*
872 * Return check for irq code
873 *
874 * returns 1 if
875 *  - we are inside irq code
876 *  - we just left irq code
877 *
878 * returns 0 if
879 *  - funcgraph-interrupts option is set
880 *  - we are not inside irq code
881 */
882static int
883check_irq_return(struct trace_iterator *iter, u32 flags, int depth)
884{
885	int cpu = iter->cpu;
886	int *depth_irq;
887	struct fgraph_data *data = iter->private;
888
889	/*
890	 * If we are either displaying irqs, or we got called as
891	 * a graph event and private data does not exist,
892	 * then we bypass the irq check.
893	 */
894	if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
895	    (!data))
896		return 0;
897
898	depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
899
900	/*
901	 * We are not inside the irq code.
902	 */
903	if (*depth_irq == -1)
904		return 0;
905
906	/*
907	 * We are inside the irq code, and this is returning entry.
908	 * Let's not trace it and clear the entry depth, since
909	 * we are out of irq code.
910	 *
911	 * This condition ensures that we 'leave the irq code' once
912	 * we are out of the entry depth. Thus protecting us from
913	 * the RETURN entry loss.
914	 */
915	if (*depth_irq >= depth) {
916		*depth_irq = -1;
917		return 1;
918	}
919
920	/*
921	 * We are inside the irq code, and this is not the entry.
922	 */
923	return 1;
924}
925
926static enum print_line_t
927print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
928			struct trace_iterator *iter, u32 flags)
929{
930	struct fgraph_data *data = iter->private;
931	struct ftrace_graph_ent *call = &field->graph_ent;
932	struct ftrace_graph_ret_entry *leaf_ret;
933	static enum print_line_t ret;
934	int cpu = iter->cpu;
935
936	if (check_irq_entry(iter, flags, call->func, call->depth))
937		return TRACE_TYPE_HANDLED;
938
939	print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags);
940
941	leaf_ret = get_return_for_leaf(iter, field);
942	if (leaf_ret)
943		ret = print_graph_entry_leaf(iter, field, leaf_ret, s, flags);
944	else
945		ret = print_graph_entry_nested(iter, field, s, cpu, flags);
946
947	if (data) {
948		/*
949		 * If we failed to write our output, then we need to make
950		 * note of it. Because we already consumed our entry.
951		 */
952		if (s->full) {
953			data->failed = 1;
954			data->cpu = cpu;
955		} else
956			data->failed = 0;
957	}
958
959	return ret;
960}
961
962static enum print_line_t
963print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
964		   struct trace_entry *ent, struct trace_iterator *iter,
965		   u32 flags)
966{
967	unsigned long long duration = trace->rettime - trace->calltime;
968	struct fgraph_data *data = iter->private;
969	struct trace_array *tr = iter->tr;
970	pid_t pid = ent->pid;
971	int cpu = iter->cpu;
972	int func_match = 1;
973	int i;
974
975	if (check_irq_return(iter, flags, trace->depth))
976		return TRACE_TYPE_HANDLED;
977
978	if (data) {
979		struct fgraph_cpu_data *cpu_data;
980		int cpu = iter->cpu;
981
982		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
983
984		/*
985		 * Comments display at + 1 to depth. This is the
986		 * return from a function, we now want the comments
987		 * to display at the same level of the bracket.
988		 */
989		cpu_data->depth = trace->depth - 1;
990
991		if (trace->depth < FTRACE_RETFUNC_DEPTH &&
992		    !WARN_ON_ONCE(trace->depth < 0)) {
993			if (cpu_data->enter_funcs[trace->depth] != trace->func)
994				func_match = 0;
995			cpu_data->enter_funcs[trace->depth] = 0;
996		}
997	}
998
999	print_graph_prologue(iter, s, 0, 0, flags);
1000
1001	/* Overhead and duration */
1002	print_graph_duration(tr, duration, s, flags);
1003
1004	/* Closing brace */
1005	for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++)
1006		trace_seq_putc(s, ' ');
1007
1008	/*
1009	 * Always write out the function name and its return value if the
1010	 * function-retval option is enabled.
1011	 */
1012	if (flags & __TRACE_GRAPH_PRINT_RETVAL) {
1013		print_graph_retval(s, trace->retval, false, (void *)trace->func,
1014			!!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
1015	} else {
1016		/*
1017		 * If the return function does not have a matching entry,
1018		 * then the entry was lost. Instead of just printing
1019		 * the '}' and letting the user guess what function this
1020		 * belongs to, write out the function name. Always do
1021		 * that if the funcgraph-tail option is enabled.
1022		 */
1023		if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL))
1024			trace_seq_puts(s, "}\n");
1025		else
1026			trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
1027	}
1028
1029	/* Overrun */
1030	if (flags & TRACE_GRAPH_PRINT_OVERRUN)
1031		trace_seq_printf(s, " (Overruns: %u)\n",
1032				 trace->overrun);
1033
1034	print_graph_irq(iter, trace->func, TRACE_GRAPH_RET,
1035			cpu, pid, flags);
1036
1037	return trace_handle_return(s);
1038}
1039
1040static enum print_line_t
1041print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
1042		    struct trace_iterator *iter, u32 flags)
1043{
1044	struct trace_array *tr = iter->tr;
1045	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
1046	struct fgraph_data *data = iter->private;
1047	struct trace_event *event;
1048	int depth = 0;
1049	int ret;
1050	int i;
1051
1052	if (data)
1053		depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
1054
1055	print_graph_prologue(iter, s, 0, 0, flags);
1056
1057	/* No time */
1058	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_FULL);
1059
1060	/* Indentation */
1061	if (depth > 0)
1062		for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++)
1063			trace_seq_putc(s, ' ');
1064
1065	/* The comment */
1066	trace_seq_puts(s, "/* ");
1067
1068	switch (iter->ent->type) {
1069	case TRACE_BPUTS:
1070		ret = trace_print_bputs_msg_only(iter);
1071		if (ret != TRACE_TYPE_HANDLED)
1072			return ret;
1073		break;
1074	case TRACE_BPRINT:
1075		ret = trace_print_bprintk_msg_only(iter);
1076		if (ret != TRACE_TYPE_HANDLED)
1077			return ret;
1078		break;
1079	case TRACE_PRINT:
1080		ret = trace_print_printk_msg_only(iter);
1081		if (ret != TRACE_TYPE_HANDLED)
1082			return ret;
1083		break;
1084	default:
1085		event = ftrace_find_event(ent->type);
1086		if (!event)
1087			return TRACE_TYPE_UNHANDLED;
1088
1089		ret = event->funcs->trace(iter, sym_flags, event);
1090		if (ret != TRACE_TYPE_HANDLED)
1091			return ret;
1092	}
1093
1094	if (trace_seq_has_overflowed(s))
1095		goto out;
1096
1097	/* Strip ending newline */
1098	if (s->buffer[s->seq.len - 1] == '\n') {
1099		s->buffer[s->seq.len - 1] = '\0';
1100		s->seq.len--;
1101	}
1102
1103	trace_seq_puts(s, " */\n");
1104 out:
1105	return trace_handle_return(s);
1106}
1107
1108
1109enum print_line_t
1110print_graph_function_flags(struct trace_iterator *iter, u32 flags)
1111{
1112	struct ftrace_graph_ent_entry *field;
1113	struct fgraph_data *data = iter->private;
1114	struct trace_entry *entry = iter->ent;
1115	struct trace_seq *s = &iter->seq;
1116	int cpu = iter->cpu;
1117	int ret;
1118
1119	if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
1120		per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
1121		return TRACE_TYPE_HANDLED;
1122	}
1123
1124	/*
1125	 * If the last output failed, there's a possibility we need
1126	 * to print out the missing entry which would never go out.
1127	 */
1128	if (data && data->failed) {
1129		field = &data->ent;
1130		iter->cpu = data->cpu;
1131		ret = print_graph_entry(field, s, iter, flags);
1132		if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
1133			per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
1134			ret = TRACE_TYPE_NO_CONSUME;
1135		}
1136		iter->cpu = cpu;
1137		return ret;
1138	}
1139
1140	switch (entry->type) {
1141	case TRACE_GRAPH_ENT: {
1142		/*
1143		 * print_graph_entry() may consume the current event,
1144		 * thus @field may become invalid, so we need to save it.
1145		 * sizeof(struct ftrace_graph_ent_entry) is very small,
1146		 * it can be safely saved at the stack.
1147		 */
1148		struct ftrace_graph_ent_entry saved;
1149		trace_assign_type(field, entry);
1150		saved = *field;
1151		return print_graph_entry(&saved, s, iter, flags);
1152	}
1153	case TRACE_GRAPH_RET: {
1154		struct ftrace_graph_ret_entry *field;
1155		trace_assign_type(field, entry);
1156		return print_graph_return(&field->ret, s, entry, iter, flags);
1157	}
1158	case TRACE_STACK:
1159	case TRACE_FN:
1160		/* dont trace stack and functions as comments */
1161		return TRACE_TYPE_UNHANDLED;
1162
1163	default:
1164		return print_graph_comment(s, entry, iter, flags);
1165	}
1166
1167	return TRACE_TYPE_HANDLED;
1168}
1169
1170static enum print_line_t
1171print_graph_function(struct trace_iterator *iter)
1172{
1173	return print_graph_function_flags(iter, tracer_flags.val);
1174}
1175
1176static enum print_line_t
1177print_graph_function_event(struct trace_iterator *iter, int flags,
1178			   struct trace_event *event)
1179{
1180	return print_graph_function(iter);
1181}
1182
1183static void print_lat_header(struct seq_file *s, u32 flags)
1184{
1185	static const char spaces[] = "                "	/* 16 spaces */
1186		"    "					/* 4 spaces */
1187		"                 ";			/* 17 spaces */
1188	int size = 0;
1189
1190	if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
1191		size += 16;
1192	if (flags & TRACE_GRAPH_PRINT_REL_TIME)
1193		size += 16;
1194	if (flags & TRACE_GRAPH_PRINT_CPU)
1195		size += 4;
1196	if (flags & TRACE_GRAPH_PRINT_PROC)
1197		size += 17;
1198
1199	seq_printf(s, "#%.*s  _-----=> irqs-off        \n", size, spaces);
1200	seq_printf(s, "#%.*s / _----=> need-resched    \n", size, spaces);
1201	seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);
1202	seq_printf(s, "#%.*s|| / _--=> preempt-depth   \n", size, spaces);
1203	seq_printf(s, "#%.*s||| /                      \n", size, spaces);
1204}
1205
1206static void __print_graph_headers_flags(struct trace_array *tr,
1207					struct seq_file *s, u32 flags)
1208{
1209	int lat = tr->trace_flags & TRACE_ITER_LATENCY_FMT;
1210
1211	if (lat)
1212		print_lat_header(s, flags);
1213
1214	/* 1st line */
1215	seq_putc(s, '#');
1216	if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
1217		seq_puts(s, "     TIME       ");
1218	if (flags & TRACE_GRAPH_PRINT_REL_TIME)
1219		seq_puts(s, "   REL TIME     ");
1220	if (flags & TRACE_GRAPH_PRINT_CPU)
1221		seq_puts(s, " CPU");
1222	if (flags & TRACE_GRAPH_PRINT_PROC)
1223		seq_puts(s, "  TASK/PID       ");
1224	if (lat)
1225		seq_puts(s, "||||   ");
1226	if (flags & TRACE_GRAPH_PRINT_DURATION)
1227		seq_puts(s, "  DURATION   ");
1228	seq_puts(s, "               FUNCTION CALLS\n");
1229
1230	/* 2nd line */
1231	seq_putc(s, '#');
1232	if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
1233		seq_puts(s, "      |         ");
1234	if (flags & TRACE_GRAPH_PRINT_REL_TIME)
1235		seq_puts(s, "      |         ");
1236	if (flags & TRACE_GRAPH_PRINT_CPU)
1237		seq_puts(s, " |  ");
1238	if (flags & TRACE_GRAPH_PRINT_PROC)
1239		seq_puts(s, "   |    |        ");
1240	if (lat)
1241		seq_puts(s, "||||   ");
1242	if (flags & TRACE_GRAPH_PRINT_DURATION)
1243		seq_puts(s, "   |   |      ");
1244	seq_puts(s, "               |   |   |   |\n");
1245}
1246
1247static void print_graph_headers(struct seq_file *s)
1248{
1249	print_graph_headers_flags(s, tracer_flags.val);
1250}
1251
1252void print_graph_headers_flags(struct seq_file *s, u32 flags)
1253{
1254	struct trace_iterator *iter = s->private;
1255	struct trace_array *tr = iter->tr;
1256
1257	if (!(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
1258		return;
1259
1260	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT) {
1261		/* print nothing if the buffers are empty */
1262		if (trace_empty(iter))
1263			return;
1264
1265		print_trace_header(s, iter);
1266	}
1267
1268	__print_graph_headers_flags(tr, s, flags);
1269}
1270
1271void graph_trace_open(struct trace_iterator *iter)
1272{
1273	/* pid and depth on the last trace processed */
1274	struct fgraph_data *data;
1275	gfp_t gfpflags;
1276	int cpu;
1277
1278	iter->private = NULL;
1279
1280	/* We can be called in atomic context via ftrace_dump() */
1281	gfpflags = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
1282
1283	data = kzalloc(sizeof(*data), gfpflags);
1284	if (!data)
1285		goto out_err;
1286
1287	data->cpu_data = alloc_percpu_gfp(struct fgraph_cpu_data, gfpflags);
1288	if (!data->cpu_data)
1289		goto out_err_free;
1290
1291	for_each_possible_cpu(cpu) {
1292		pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1293		int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1294		int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1295		int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
1296
1297		*pid = -1;
1298		*depth = 0;
1299		*ignore = 0;
1300		*depth_irq = -1;
1301	}
1302
1303	iter->private = data;
1304
1305	return;
1306
1307 out_err_free:
1308	kfree(data);
1309 out_err:
1310	pr_warn("function graph tracer: not enough memory\n");
1311}
1312
1313void graph_trace_close(struct trace_iterator *iter)
1314{
1315	struct fgraph_data *data = iter->private;
1316
1317	if (data) {
1318		free_percpu(data->cpu_data);
1319		kfree(data);
1320	}
1321}
1322
1323static int
1324func_graph_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
1325{
1326	if (bit == TRACE_GRAPH_PRINT_IRQS)
1327		ftrace_graph_skip_irqs = !set;
1328
1329	if (bit == TRACE_GRAPH_SLEEP_TIME)
1330		ftrace_graph_sleep_time_control(set);
1331
1332	if (bit == TRACE_GRAPH_GRAPH_TIME)
1333		ftrace_graph_graph_time_control(set);
1334
1335	return 0;
1336}
1337
1338static struct trace_event_functions graph_functions = {
1339	.trace		= print_graph_function_event,
1340};
1341
1342static struct trace_event graph_trace_entry_event = {
1343	.type		= TRACE_GRAPH_ENT,
1344	.funcs		= &graph_functions,
1345};
1346
1347static struct trace_event graph_trace_ret_event = {
1348	.type		= TRACE_GRAPH_RET,
1349	.funcs		= &graph_functions
1350};
1351
1352static struct tracer graph_trace __tracer_data = {
1353	.name		= "function_graph",
1354	.update_thresh	= graph_trace_update_thresh,
1355	.open		= graph_trace_open,
1356	.pipe_open	= graph_trace_open,
1357	.close		= graph_trace_close,
1358	.pipe_close	= graph_trace_close,
1359	.init		= graph_trace_init,
1360	.reset		= graph_trace_reset,
1361	.print_line	= print_graph_function,
1362	.print_header	= print_graph_headers,
1363	.flags		= &tracer_flags,
1364	.set_flag	= func_graph_set_flag,
1365#ifdef CONFIG_FTRACE_SELFTEST
1366	.selftest	= trace_selftest_startup_function_graph,
1367#endif
1368};
1369
1370
1371static ssize_t
1372graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt,
1373		  loff_t *ppos)
1374{
1375	unsigned long val;
1376	int ret;
1377
1378	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1379	if (ret)
1380		return ret;
1381
1382	fgraph_max_depth = val;
1383
1384	*ppos += cnt;
1385
1386	return cnt;
1387}
1388
1389static ssize_t
1390graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt,
1391		 loff_t *ppos)
1392{
1393	char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/
1394	int n;
1395
1396	n = sprintf(buf, "%d\n", fgraph_max_depth);
1397
1398	return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
1399}
1400
1401static const struct file_operations graph_depth_fops = {
1402	.open		= tracing_open_generic,
1403	.write		= graph_depth_write,
1404	.read		= graph_depth_read,
1405	.llseek		= generic_file_llseek,
1406};
1407
1408static __init int init_graph_tracefs(void)
1409{
1410	int ret;
1411
1412	ret = tracing_init_dentry();
1413	if (ret)
1414		return 0;
1415
1416	trace_create_file("max_graph_depth", TRACE_MODE_WRITE, NULL,
1417			  NULL, &graph_depth_fops);
1418
1419	return 0;
1420}
1421fs_initcall(init_graph_tracefs);
1422
1423static __init int init_graph_trace(void)
1424{
1425	max_bytes_for_cpu = snprintf(NULL, 0, "%u", nr_cpu_ids - 1);
1426
1427	if (!register_trace_event(&graph_trace_entry_event)) {
1428		pr_warn("Warning: could not register graph trace events\n");
1429		return 1;
1430	}
1431
1432	if (!register_trace_event(&graph_trace_ret_event)) {
1433		pr_warn("Warning: could not register graph trace events\n");
1434		return 1;
1435	}
1436
1437	return register_tracer(&graph_trace);
1438}
1439
1440core_initcall(init_graph_trace);
1441