1/* SPDX-License-Identifier: GPL-2.0-only */
2#ifndef _LINUX_TRACEPOINT_H
3#define _LINUX_TRACEPOINT_H
4
5/*
6 * Kernel Tracepoint API.
7 *
8 * See Documentation/trace/tracepoints.rst.
9 *
10 * Copyright (C) 2008-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
11 *
12 * Heavily inspired from the Linux Kernel Markers.
13 */
14
15#include <linux/smp.h>
16#include <linux/srcu.h>
17#include <linux/errno.h>
18#include <linux/types.h>
19#include <linux/cpumask.h>
20#include <linux/rcupdate.h>
21#include <linux/tracepoint-defs.h>
22#include <linux/static_call.h>
23
24struct module;
25struct tracepoint;
26struct notifier_block;
27
28struct trace_eval_map {
29	const char		*system;
30	const char		*eval_string;
31	unsigned long		eval_value;
32};
33
34#define TRACEPOINT_DEFAULT_PRIO	10
35
36extern struct srcu_struct tracepoint_srcu;
37
38extern int
39tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
40extern int
41tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
42			       int prio);
43extern int
44tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, void *data,
45					 int prio);
46extern int
47tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
48static inline int
49tracepoint_probe_register_may_exist(struct tracepoint *tp, void *probe,
50				    void *data)
51{
52	return tracepoint_probe_register_prio_may_exist(tp, probe, data,
53							TRACEPOINT_DEFAULT_PRIO);
54}
55extern void
56for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
57		void *priv);
58
59#ifdef CONFIG_MODULES
60struct tp_module {
61	struct list_head list;
62	struct module *mod;
63};
64
65bool trace_module_has_bad_taint(struct module *mod);
66extern int register_tracepoint_module_notifier(struct notifier_block *nb);
67extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
68#else
69static inline bool trace_module_has_bad_taint(struct module *mod)
70{
71	return false;
72}
73static inline
74int register_tracepoint_module_notifier(struct notifier_block *nb)
75{
76	return 0;
77}
78static inline
79int unregister_tracepoint_module_notifier(struct notifier_block *nb)
80{
81	return 0;
82}
83#endif /* CONFIG_MODULES */
84
85/*
86 * tracepoint_synchronize_unregister must be called between the last tracepoint
87 * probe unregistration and the end of module exit to make sure there is no
88 * caller executing a probe when it is freed.
89 */
90#ifdef CONFIG_TRACEPOINTS
91static inline void tracepoint_synchronize_unregister(void)
92{
93	synchronize_srcu(&tracepoint_srcu);
94	synchronize_rcu();
95}
96#else
97static inline void tracepoint_synchronize_unregister(void)
98{ }
99#endif
100
101#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
102extern int syscall_regfunc(void);
103extern void syscall_unregfunc(void);
104#endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
105
106#ifndef PARAMS
107#define PARAMS(args...) args
108#endif
109
110#define TRACE_DEFINE_ENUM(x)
111#define TRACE_DEFINE_SIZEOF(x)
112
113#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
114static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
115{
116	return offset_to_ptr(p);
117}
118
119#define __TRACEPOINT_ENTRY(name)					\
120	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
121	    "	.balign 4					\n"	\
122	    "	.long 	__tracepoint_" #name " - .		\n"	\
123	    "	.previous					\n")
124#else
125static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
126{
127	return *p;
128}
129
130#define __TRACEPOINT_ENTRY(name)					 \
131	static tracepoint_ptr_t __tracepoint_ptr_##name __used		 \
132	__section("__tracepoints_ptrs") = &__tracepoint_##name
133#endif
134
135#endif /* _LINUX_TRACEPOINT_H */
136
137/*
138 * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include
139 *  file ifdef protection.
140 *  This is due to the way trace events work. If a file includes two
141 *  trace event headers under one "CREATE_TRACE_POINTS" the first include
142 *  will override the TRACE_EVENT and break the second include.
143 */
144
145#ifndef DECLARE_TRACE
146
147#define TP_PROTO(args...)	args
148#define TP_ARGS(args...)	args
149#define TP_CONDITION(args...)	args
150
151/*
152 * Individual subsystem my have a separate configuration to
153 * enable their tracepoints. By default, this file will create
154 * the tracepoints if CONFIG_TRACEPOINTS is defined. If a subsystem
155 * wants to be able to disable its tracepoints from being created
156 * it can define NOTRACE before including the tracepoint headers.
157 */
158#if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE)
159#define TRACEPOINTS_ENABLED
160#endif
161
162#ifdef TRACEPOINTS_ENABLED
163
164#ifdef CONFIG_HAVE_STATIC_CALL
165#define __DO_TRACE_CALL(name, args)					\
166	do {								\
167		struct tracepoint_func *it_func_ptr;			\
168		void *__data;						\
169		it_func_ptr =						\
170			rcu_dereference_raw((&__tracepoint_##name)->funcs); \
171		if (it_func_ptr) {					\
172			__data = (it_func_ptr)->data;			\
173			static_call(tp_func_##name)(__data, args);	\
174		}							\
175	} while (0)
176#else
177#define __DO_TRACE_CALL(name, args)	__traceiter_##name(NULL, args)
178#endif /* CONFIG_HAVE_STATIC_CALL */
179
180/*
181 * ARCH_WANTS_NO_INSTR archs are expected to have sanitized entry and idle
182 * code that disallow any/all tracing/instrumentation when RCU isn't watching.
183 */
184#ifdef CONFIG_ARCH_WANTS_NO_INSTR
185#define RCUIDLE_COND(rcuidle)	(rcuidle)
186#else
187/* srcu can't be used from NMI */
188#define RCUIDLE_COND(rcuidle)	(rcuidle && in_nmi())
189#endif
190
191/*
192 * it_func[0] is never NULL because there is at least one element in the array
193 * when the array itself is non NULL.
194 */
195#define __DO_TRACE(name, args, cond, rcuidle)				\
196	do {								\
197		int __maybe_unused __idx = 0;				\
198									\
199		if (!(cond))						\
200			return;						\
201									\
202		if (WARN_ONCE(RCUIDLE_COND(rcuidle),			\
203			      "Bad RCU usage for tracepoint"))		\
204			return;						\
205									\
206		/* keep srcu and sched-rcu usage consistent */		\
207		preempt_disable_notrace();				\
208									\
209		/*							\
210		 * For rcuidle callers, use srcu since sched-rcu	\
211		 * doesn't work from the idle path.			\
212		 */							\
213		if (rcuidle) {						\
214			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
215			ct_irq_enter_irqson();				\
216		}							\
217									\
218		__DO_TRACE_CALL(name, TP_ARGS(args));			\
219									\
220		if (rcuidle) {						\
221			ct_irq_exit_irqson();				\
222			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
223		}							\
224									\
225		preempt_enable_notrace();				\
226	} while (0)
227
228#ifndef MODULE
229#define __DECLARE_TRACE_RCU(name, proto, args, cond)			\
230	static inline void trace_##name##_rcuidle(proto)		\
231	{								\
232		if (static_key_false(&__tracepoint_##name.key))		\
233			__DO_TRACE(name,				\
234				TP_ARGS(args),				\
235				TP_CONDITION(cond), 1);			\
236	}
237#else
238#define __DECLARE_TRACE_RCU(name, proto, args, cond)
239#endif
240
241/*
242 * Make sure the alignment of the structure in the __tracepoints section will
243 * not add unwanted padding between the beginning of the section and the
244 * structure. Force alignment to the same alignment as the section start.
245 *
246 * When lockdep is enabled, we make sure to always test if RCU is
247 * "watching" regardless if the tracepoint is enabled or not. Tracepoints
248 * require RCU to be active, and it should always warn at the tracepoint
249 * site if it is not watching, as it will need to be active when the
250 * tracepoint is enabled.
251 */
252#define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
253	extern int __traceiter_##name(data_proto);			\
254	DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name);	\
255	extern struct tracepoint __tracepoint_##name;			\
256	static inline void trace_##name(proto)				\
257	{								\
258		if (static_key_false(&__tracepoint_##name.key))		\
259			__DO_TRACE(name,				\
260				TP_ARGS(args),				\
261				TP_CONDITION(cond), 0);			\
262		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
263			WARN_ONCE(!rcu_is_watching(),			\
264				  "RCU not watching for tracepoint");	\
265		}							\
266	}								\
267	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
268			    PARAMS(cond))				\
269	static inline int						\
270	register_trace_##name(void (*probe)(data_proto), void *data)	\
271	{								\
272		return tracepoint_probe_register(&__tracepoint_##name,	\
273						(void *)probe, data);	\
274	}								\
275	static inline int						\
276	register_trace_prio_##name(void (*probe)(data_proto), void *data,\
277				   int prio)				\
278	{								\
279		return tracepoint_probe_register_prio(&__tracepoint_##name, \
280					      (void *)probe, data, prio); \
281	}								\
282	static inline int						\
283	unregister_trace_##name(void (*probe)(data_proto), void *data)	\
284	{								\
285		return tracepoint_probe_unregister(&__tracepoint_##name,\
286						(void *)probe, data);	\
287	}								\
288	static inline void						\
289	check_trace_callback_type_##name(void (*cb)(data_proto))	\
290	{								\
291	}								\
292	static inline bool						\
293	trace_##name##_enabled(void)					\
294	{								\
295		return static_key_false(&__tracepoint_##name.key);	\
296	}
297
298/*
299 * We have no guarantee that gcc and the linker won't up-align the tracepoint
300 * structures, so we create an array of pointers that will be used for iteration
301 * on the tracepoints.
302 */
303#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)		\
304	static const char __tpstrtab_##_name[]				\
305	__section("__tracepoints_strings") = #_name;			\
306	extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name);	\
307	int __traceiter_##_name(void *__data, proto);			\
308	void __probestub_##_name(void *__data, proto);			\
309	struct tracepoint __tracepoint_##_name	__used			\
310	__section("__tracepoints") = {					\
311		.name = __tpstrtab_##_name,				\
312		.key = STATIC_KEY_INIT_FALSE,				\
313		.static_call_key = &STATIC_CALL_KEY(tp_func_##_name),	\
314		.static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
315		.iterator = &__traceiter_##_name,			\
316		.probestub = &__probestub_##_name,			\
317		.regfunc = _reg,					\
318		.unregfunc = _unreg,					\
319		.funcs = NULL };					\
320	__TRACEPOINT_ENTRY(_name);					\
321	int __traceiter_##_name(void *__data, proto)			\
322	{								\
323		struct tracepoint_func *it_func_ptr;			\
324		void *it_func;						\
325									\
326		it_func_ptr =						\
327			rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
328		if (it_func_ptr) {					\
329			do {						\
330				it_func = READ_ONCE((it_func_ptr)->func); \
331				__data = (it_func_ptr)->data;		\
332				((void(*)(void *, proto))(it_func))(__data, args); \
333			} while ((++it_func_ptr)->func);		\
334		}							\
335		return 0;						\
336	}								\
337	void __probestub_##_name(void *__data, proto)			\
338	{								\
339	}								\
340	DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
341
342#define DEFINE_TRACE(name, proto, args)		\
343	DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
344
345#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)				\
346	EXPORT_SYMBOL_GPL(__tracepoint_##name);				\
347	EXPORT_SYMBOL_GPL(__traceiter_##name);				\
348	EXPORT_STATIC_CALL_GPL(tp_func_##name)
349#define EXPORT_TRACEPOINT_SYMBOL(name)					\
350	EXPORT_SYMBOL(__tracepoint_##name);				\
351	EXPORT_SYMBOL(__traceiter_##name);				\
352	EXPORT_STATIC_CALL(tp_func_##name)
353
354
355#else /* !TRACEPOINTS_ENABLED */
356#define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
357	static inline void trace_##name(proto)				\
358	{ }								\
359	static inline void trace_##name##_rcuidle(proto)		\
360	{ }								\
361	static inline int						\
362	register_trace_##name(void (*probe)(data_proto),		\
363			      void *data)				\
364	{								\
365		return -ENOSYS;						\
366	}								\
367	static inline int						\
368	unregister_trace_##name(void (*probe)(data_proto),		\
369				void *data)				\
370	{								\
371		return -ENOSYS;						\
372	}								\
373	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
374	{								\
375	}								\
376	static inline bool						\
377	trace_##name##_enabled(void)					\
378	{								\
379		return false;						\
380	}
381
382#define DEFINE_TRACE_FN(name, reg, unreg, proto, args)
383#define DEFINE_TRACE(name, proto, args)
384#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
385#define EXPORT_TRACEPOINT_SYMBOL(name)
386
387#endif /* TRACEPOINTS_ENABLED */
388
389#ifdef CONFIG_TRACING
390/**
391 * tracepoint_string - register constant persistent string to trace system
392 * @str - a constant persistent string that will be referenced in tracepoints
393 *
394 * If constant strings are being used in tracepoints, it is faster and
395 * more efficient to just save the pointer to the string and reference
396 * that with a printf "%s" instead of saving the string in the ring buffer
397 * and wasting space and time.
398 *
399 * The problem with the above approach is that userspace tools that read
400 * the binary output of the trace buffers do not have access to the string.
401 * Instead they just show the address of the string which is not very
402 * useful to users.
403 *
404 * With tracepoint_string(), the string will be registered to the tracing
405 * system and exported to userspace via the debugfs/tracing/printk_formats
406 * file that maps the string address to the string text. This way userspace
407 * tools that read the binary buffers have a way to map the pointers to
408 * the ASCII strings they represent.
409 *
410 * The @str used must be a constant string and persistent as it would not
411 * make sense to show a string that no longer exists. But it is still fine
412 * to be used with modules, because when modules are unloaded, if they
413 * had tracepoints, the ring buffers are cleared too. As long as the string
414 * does not change during the life of the module, it is fine to use
415 * tracepoint_string() within a module.
416 */
417#define tracepoint_string(str)						\
418	({								\
419		static const char *___tp_str __tracepoint_string = str; \
420		___tp_str;						\
421	})
422#define __tracepoint_string	__used __section("__tracepoint_str")
423#else
424/*
425 * tracepoint_string() is used to save the string address for userspace
426 * tracing tools. When tracing isn't configured, there's no need to save
427 * anything.
428 */
429# define tracepoint_string(str) str
430# define __tracepoint_string
431#endif
432
433#define DECLARE_TRACE(name, proto, args)				\
434	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
435			cpu_online(raw_smp_processor_id()),		\
436			PARAMS(void *__data, proto))
437
438#define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
439	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
440			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
441			PARAMS(void *__data, proto))
442
443#define TRACE_EVENT_FLAGS(event, flag)
444
445#define TRACE_EVENT_PERF_PERM(event, expr...)
446
447#endif /* DECLARE_TRACE */
448
449#ifndef TRACE_EVENT
450/*
451 * For use with the TRACE_EVENT macro:
452 *
453 * We define a tracepoint, its arguments, its printk format
454 * and its 'fast binary record' layout.
455 *
456 * Firstly, name your tracepoint via TRACE_EVENT(name : the
457 * 'subsystem_event' notation is fine.
458 *
459 * Think about this whole construct as the
460 * 'trace_sched_switch() function' from now on.
461 *
462 *
463 *  TRACE_EVENT(sched_switch,
464 *
465 *	*
466 *	* A function has a regular function arguments
467 *	* prototype, declare it via TP_PROTO():
468 *	*
469 *
470 *	TP_PROTO(struct rq *rq, struct task_struct *prev,
471 *		 struct task_struct *next),
472 *
473 *	*
474 *	* Define the call signature of the 'function'.
475 *	* (Design sidenote: we use this instead of a
476 *	*  TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.)
477 *	*
478 *
479 *	TP_ARGS(rq, prev, next),
480 *
481 *	*
482 *	* Fast binary tracing: define the trace record via
483 *	* TP_STRUCT__entry(). You can think about it like a
484 *	* regular C structure local variable definition.
485 *	*
486 *	* This is how the trace record is structured and will
487 *	* be saved into the ring buffer. These are the fields
488 *	* that will be exposed to user-space in
489 *	* /sys/kernel/tracing/events/<*>/format.
490 *	*
491 *	* The declared 'local variable' is called '__entry'
492 *	*
493 *	* __field(pid_t, prev_pid) is equivalent to a standard declaration:
494 *	*
495 *	*	pid_t	prev_pid;
496 *	*
497 *	* __array(char, prev_comm, TASK_COMM_LEN) is equivalent to:
498 *	*
499 *	*	char	prev_comm[TASK_COMM_LEN];
500 *	*
501 *
502 *	TP_STRUCT__entry(
503 *		__array(	char,	prev_comm,	TASK_COMM_LEN	)
504 *		__field(	pid_t,	prev_pid			)
505 *		__field(	int,	prev_prio			)
506 *		__array(	char,	next_comm,	TASK_COMM_LEN	)
507 *		__field(	pid_t,	next_pid			)
508 *		__field(	int,	next_prio			)
509 *	),
510 *
511 *	*
512 *	* Assign the entry into the trace record, by embedding
513 *	* a full C statement block into TP_fast_assign(). You
514 *	* can refer to the trace record as '__entry' -
515 *	* otherwise you can put arbitrary C code in here.
516 *	*
517 *	* Note: this C code will execute every time a trace event
518 *	* happens, on an active tracepoint.
519 *	*
520 *
521 *	TP_fast_assign(
522 *		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
523 *		__entry->prev_pid	= prev->pid;
524 *		__entry->prev_prio	= prev->prio;
525 *		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
526 *		__entry->next_pid	= next->pid;
527 *		__entry->next_prio	= next->prio;
528 *	),
529 *
530 *	*
531 *	* Formatted output of a trace record via TP_printk().
532 *	* This is how the tracepoint will appear under ftrace
533 *	* plugins that make use of this tracepoint.
534 *	*
535 *	* (raw-binary tracing wont actually perform this step.)
536 *	*
537 *
538 *	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
539 *		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
540 *		__entry->next_comm, __entry->next_pid, __entry->next_prio),
541 *
542 * );
543 *
544 * This macro construct is thus used for the regular printk format
545 * tracing setup, it is used to construct a function pointer based
546 * tracepoint callback (this is used by programmatic plugins and
547 * can also by used by generic instrumentation like SystemTap), and
548 * it is also used to expose a structured trace record in
549 * /sys/kernel/tracing/events/.
550 *
551 * A set of (un)registration functions can be passed to the variant
552 * TRACE_EVENT_FN to perform any (un)registration work.
553 */
554
555#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print)
556#define DEFINE_EVENT(template, name, proto, args)		\
557	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
558#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\
559	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
560#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
561	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
562#define DEFINE_EVENT_CONDITION(template, name, proto,		\
563			       args, cond)			\
564	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
565				PARAMS(args), PARAMS(cond))
566
567#define TRACE_EVENT(name, proto, args, struct, assign, print)	\
568	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
569#define TRACE_EVENT_FN(name, proto, args, struct,		\
570		assign, print, reg, unreg)			\
571	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
572#define TRACE_EVENT_FN_COND(name, proto, args, cond, struct,		\
573		assign, print, reg, unreg)			\
574	DECLARE_TRACE_CONDITION(name, PARAMS(proto),	\
575			PARAMS(args), PARAMS(cond))
576#define TRACE_EVENT_CONDITION(name, proto, args, cond,		\
577			      struct, assign, print)		\
578	DECLARE_TRACE_CONDITION(name, PARAMS(proto),		\
579				PARAMS(args), PARAMS(cond))
580
581#define TRACE_EVENT_FLAGS(event, flag)
582
583#define TRACE_EVENT_PERF_PERM(event, expr...)
584
585#define DECLARE_EVENT_NOP(name, proto, args)				\
586	static inline void trace_##name(proto)				\
587	{ }								\
588	static inline bool trace_##name##_enabled(void)			\
589	{								\
590		return false;						\
591	}
592
593#define TRACE_EVENT_NOP(name, proto, args, struct, assign, print)	\
594	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
595
596#define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print)
597#define DEFINE_EVENT_NOP(template, name, proto, args)			\
598	DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args))
599
600#endif /* ifdef TRACE_EVENT (see note above) */
601