1// SPDX-License-Identifier: GPL-2.0
2/*
3 * trace context switch
4 *
5 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
6 *
7 */
8#include <linux/module.h>
9#include <linux/kallsyms.h>
10#include <linux/uaccess.h>
11#include <linux/kmemleak.h>
12#include <linux/ftrace.h>
13#include <trace/events/sched.h>
14
15#include "trace.h"
16
17#define RECORD_CMDLINE	1
18#define RECORD_TGID	2
19
20static int		sched_cmdline_ref;
21static int		sched_tgid_ref;
22static DEFINE_MUTEX(sched_register_mutex);
23
24static void
25probe_sched_switch(void *ignore, bool preempt,
26		   struct task_struct *prev, struct task_struct *next,
27		   unsigned int prev_state)
28{
29	int flags;
30
31	flags = (RECORD_TGID * !!sched_tgid_ref) +
32		(RECORD_CMDLINE * !!sched_cmdline_ref);
33
34	if (!flags)
35		return;
36	tracing_record_taskinfo_sched_switch(prev, next, flags);
37}
38
39static void
40probe_sched_wakeup(void *ignore, struct task_struct *wakee)
41{
42	int flags;
43
44	flags = (RECORD_TGID * !!sched_tgid_ref) +
45		(RECORD_CMDLINE * !!sched_cmdline_ref);
46
47	if (!flags)
48		return;
49	tracing_record_taskinfo_sched_switch(current, wakee, flags);
50}
51
52static int tracing_sched_register(void)
53{
54	int ret;
55
56	ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL);
57	if (ret) {
58		pr_info("wakeup trace: Couldn't activate tracepoint"
59			" probe to kernel_sched_wakeup\n");
60		return ret;
61	}
62
63	ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
64	if (ret) {
65		pr_info("wakeup trace: Couldn't activate tracepoint"
66			" probe to kernel_sched_wakeup_new\n");
67		goto fail_deprobe;
68	}
69
70	ret = register_trace_sched_switch(probe_sched_switch, NULL);
71	if (ret) {
72		pr_info("sched trace: Couldn't activate tracepoint"
73			" probe to kernel_sched_switch\n");
74		goto fail_deprobe_wake_new;
75	}
76
77	return ret;
78fail_deprobe_wake_new:
79	unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
80fail_deprobe:
81	unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
82	return ret;
83}
84
85static void tracing_sched_unregister(void)
86{
87	unregister_trace_sched_switch(probe_sched_switch, NULL);
88	unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
89	unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
90}
91
92static void tracing_start_sched_switch(int ops)
93{
94	bool sched_register;
95
96	mutex_lock(&sched_register_mutex);
97	sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
98
99	switch (ops) {
100	case RECORD_CMDLINE:
101		sched_cmdline_ref++;
102		break;
103
104	case RECORD_TGID:
105		sched_tgid_ref++;
106		break;
107	}
108
109	if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
110		tracing_sched_register();
111	mutex_unlock(&sched_register_mutex);
112}
113
114static void tracing_stop_sched_switch(int ops)
115{
116	mutex_lock(&sched_register_mutex);
117
118	switch (ops) {
119	case RECORD_CMDLINE:
120		sched_cmdline_ref--;
121		break;
122
123	case RECORD_TGID:
124		sched_tgid_ref--;
125		break;
126	}
127
128	if (!sched_cmdline_ref && !sched_tgid_ref)
129		tracing_sched_unregister();
130	mutex_unlock(&sched_register_mutex);
131}
132
133void tracing_start_cmdline_record(void)
134{
135	tracing_start_sched_switch(RECORD_CMDLINE);
136}
137
138void tracing_stop_cmdline_record(void)
139{
140	tracing_stop_sched_switch(RECORD_CMDLINE);
141}
142
143void tracing_start_tgid_record(void)
144{
145	tracing_start_sched_switch(RECORD_TGID);
146}
147
148void tracing_stop_tgid_record(void)
149{
150	tracing_stop_sched_switch(RECORD_TGID);
151}
152
153/*
154 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
155 * is the tgid last observed corresponding to pid=i.
156 */
157static int *tgid_map;
158
159/* The maximum valid index into tgid_map. */
160static size_t tgid_map_max;
161
162#define SAVED_CMDLINES_DEFAULT 128
163#define NO_CMDLINE_MAP UINT_MAX
164/*
165 * Preemption must be disabled before acquiring trace_cmdline_lock.
166 * The various trace_arrays' max_lock must be acquired in a context
167 * where interrupt is disabled.
168 */
169static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
170struct saved_cmdlines_buffer {
171	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
172	unsigned *map_cmdline_to_pid;
173	unsigned cmdline_num;
174	int cmdline_idx;
175	char saved_cmdlines[];
176};
177static struct saved_cmdlines_buffer *savedcmd;
178
179/* Holds the size of a cmdline and pid element */
180#define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s)			\
181	(TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
182
183static inline char *get_saved_cmdlines(int idx)
184{
185	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
186}
187
188static inline void set_cmdline(int idx, const char *cmdline)
189{
190	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
191}
192
193static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
194{
195	int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
196
197	kmemleak_free(s);
198	free_pages((unsigned long)s, order);
199}
200
201static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
202{
203	struct saved_cmdlines_buffer *s;
204	struct page *page;
205	int orig_size, size;
206	int order;
207
208	/* Figure out how much is needed to hold the given number of cmdlines */
209	orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
210	order = get_order(orig_size);
211	size = 1 << (order + PAGE_SHIFT);
212	page = alloc_pages(GFP_KERNEL, order);
213	if (!page)
214		return NULL;
215
216	s = page_address(page);
217	kmemleak_alloc(s, size, 1, GFP_KERNEL);
218	memset(s, 0, sizeof(*s));
219
220	/* Round up to actual allocation */
221	val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
222	s->cmdline_num = val;
223
224	/* Place map_cmdline_to_pid array right after saved_cmdlines */
225	s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
226
227	s->cmdline_idx = 0;
228	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
229	       sizeof(s->map_pid_to_cmdline));
230	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
231	       val * sizeof(*s->map_cmdline_to_pid));
232
233	return s;
234}
235
236int trace_create_savedcmd(void)
237{
238	savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
239
240	return savedcmd ? 0 : -ENOMEM;
241}
242
243int trace_save_cmdline(struct task_struct *tsk)
244{
245	unsigned tpid, idx;
246
247	/* treat recording of idle task as a success */
248	if (!tsk->pid)
249		return 1;
250
251	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
252
253	/*
254	 * It's not the end of the world if we don't get
255	 * the lock, but we also don't want to spin
256	 * nor do we want to disable interrupts,
257	 * so if we miss here, then better luck next time.
258	 *
259	 * This is called within the scheduler and wake up, so interrupts
260	 * had better been disabled and run queue lock been held.
261	 */
262	lockdep_assert_preemption_disabled();
263	if (!arch_spin_trylock(&trace_cmdline_lock))
264		return 0;
265
266	idx = savedcmd->map_pid_to_cmdline[tpid];
267	if (idx == NO_CMDLINE_MAP) {
268		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
269
270		savedcmd->map_pid_to_cmdline[tpid] = idx;
271		savedcmd->cmdline_idx = idx;
272	}
273
274	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
275	set_cmdline(idx, tsk->comm);
276
277	arch_spin_unlock(&trace_cmdline_lock);
278
279	return 1;
280}
281
282static void __trace_find_cmdline(int pid, char comm[])
283{
284	unsigned map;
285	int tpid;
286
287	if (!pid) {
288		strcpy(comm, "<idle>");
289		return;
290	}
291
292	if (WARN_ON_ONCE(pid < 0)) {
293		strcpy(comm, "<XXX>");
294		return;
295	}
296
297	tpid = pid & (PID_MAX_DEFAULT - 1);
298	map = savedcmd->map_pid_to_cmdline[tpid];
299	if (map != NO_CMDLINE_MAP) {
300		tpid = savedcmd->map_cmdline_to_pid[map];
301		if (tpid == pid) {
302			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
303			return;
304		}
305	}
306	strcpy(comm, "<...>");
307}
308
309void trace_find_cmdline(int pid, char comm[])
310{
311	preempt_disable();
312	arch_spin_lock(&trace_cmdline_lock);
313
314	__trace_find_cmdline(pid, comm);
315
316	arch_spin_unlock(&trace_cmdline_lock);
317	preempt_enable();
318}
319
320static int *trace_find_tgid_ptr(int pid)
321{
322	/*
323	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
324	 * if we observe a non-NULL tgid_map then we also observe the correct
325	 * tgid_map_max.
326	 */
327	int *map = smp_load_acquire(&tgid_map);
328
329	if (unlikely(!map || pid > tgid_map_max))
330		return NULL;
331
332	return &map[pid];
333}
334
335int trace_find_tgid(int pid)
336{
337	int *ptr = trace_find_tgid_ptr(pid);
338
339	return ptr ? *ptr : 0;
340}
341
342static int trace_save_tgid(struct task_struct *tsk)
343{
344	int *ptr;
345
346	/* treat recording of idle task as a success */
347	if (!tsk->pid)
348		return 1;
349
350	ptr = trace_find_tgid_ptr(tsk->pid);
351	if (!ptr)
352		return 0;
353
354	*ptr = tsk->tgid;
355	return 1;
356}
357
358static bool tracing_record_taskinfo_skip(int flags)
359{
360	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
361		return true;
362	if (!__this_cpu_read(trace_taskinfo_save))
363		return true;
364	return false;
365}
366
367/**
368 * tracing_record_taskinfo - record the task info of a task
369 *
370 * @task:  task to record
371 * @flags: TRACE_RECORD_CMDLINE for recording comm
372 *         TRACE_RECORD_TGID for recording tgid
373 */
374void tracing_record_taskinfo(struct task_struct *task, int flags)
375{
376	bool done;
377
378	if (tracing_record_taskinfo_skip(flags))
379		return;
380
381	/*
382	 * Record as much task information as possible. If some fail, continue
383	 * to try to record the others.
384	 */
385	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
386	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
387
388	/* If recording any information failed, retry again soon. */
389	if (!done)
390		return;
391
392	__this_cpu_write(trace_taskinfo_save, false);
393}
394
395/**
396 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
397 *
398 * @prev: previous task during sched_switch
399 * @next: next task during sched_switch
400 * @flags: TRACE_RECORD_CMDLINE for recording comm
401 *         TRACE_RECORD_TGID for recording tgid
402 */
403void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
404					  struct task_struct *next, int flags)
405{
406	bool done;
407
408	if (tracing_record_taskinfo_skip(flags))
409		return;
410
411	/*
412	 * Record as much task information as possible. If some fail, continue
413	 * to try to record the others.
414	 */
415	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
416	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
417	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
418	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
419
420	/* If recording any information failed, retry again soon. */
421	if (!done)
422		return;
423
424	__this_cpu_write(trace_taskinfo_save, false);
425}
426
427/* Helpers to record a specific task information */
428void tracing_record_cmdline(struct task_struct *task)
429{
430	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
431}
432
433void tracing_record_tgid(struct task_struct *task)
434{
435	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
436}
437
438int trace_alloc_tgid_map(void)
439{
440	int *map;
441
442	if (tgid_map)
443		return 0;
444
445	tgid_map_max = pid_max;
446	map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
447		       GFP_KERNEL);
448	if (!map)
449		return -ENOMEM;
450
451	/*
452	 * Pairs with smp_load_acquire() in
453	 * trace_find_tgid_ptr() to ensure that if it observes
454	 * the tgid_map we just allocated then it also observes
455	 * the corresponding tgid_map_max value.
456	 */
457	smp_store_release(&tgid_map, map);
458	return 0;
459}
460
461static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
462{
463	int pid = ++(*pos);
464
465	return trace_find_tgid_ptr(pid);
466}
467
468static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
469{
470	int pid = *pos;
471
472	return trace_find_tgid_ptr(pid);
473}
474
475static void saved_tgids_stop(struct seq_file *m, void *v)
476{
477}
478
479static int saved_tgids_show(struct seq_file *m, void *v)
480{
481	int *entry = (int *)v;
482	int pid = entry - tgid_map;
483	int tgid = *entry;
484
485	if (tgid == 0)
486		return SEQ_SKIP;
487
488	seq_printf(m, "%d %d\n", pid, tgid);
489	return 0;
490}
491
492static const struct seq_operations tracing_saved_tgids_seq_ops = {
493	.start		= saved_tgids_start,
494	.stop		= saved_tgids_stop,
495	.next		= saved_tgids_next,
496	.show		= saved_tgids_show,
497};
498
499static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
500{
501	int ret;
502
503	ret = tracing_check_open_get_tr(NULL);
504	if (ret)
505		return ret;
506
507	return seq_open(filp, &tracing_saved_tgids_seq_ops);
508}
509
510
511const struct file_operations tracing_saved_tgids_fops = {
512	.open		= tracing_saved_tgids_open,
513	.read		= seq_read,
514	.llseek		= seq_lseek,
515	.release	= seq_release,
516};
517
518static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
519{
520	unsigned int *ptr = v;
521
522	if (*pos || m->count)
523		ptr++;
524
525	(*pos)++;
526
527	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
528	     ptr++) {
529		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
530			continue;
531
532		return ptr;
533	}
534
535	return NULL;
536}
537
538static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
539{
540	void *v;
541	loff_t l = 0;
542
543	preempt_disable();
544	arch_spin_lock(&trace_cmdline_lock);
545
546	v = &savedcmd->map_cmdline_to_pid[0];
547	while (l <= *pos) {
548		v = saved_cmdlines_next(m, v, &l);
549		if (!v)
550			return NULL;
551	}
552
553	return v;
554}
555
556static void saved_cmdlines_stop(struct seq_file *m, void *v)
557{
558	arch_spin_unlock(&trace_cmdline_lock);
559	preempt_enable();
560}
561
562static int saved_cmdlines_show(struct seq_file *m, void *v)
563{
564	char buf[TASK_COMM_LEN];
565	unsigned int *pid = v;
566
567	__trace_find_cmdline(*pid, buf);
568	seq_printf(m, "%d %s\n", *pid, buf);
569	return 0;
570}
571
572static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
573	.start		= saved_cmdlines_start,
574	.next		= saved_cmdlines_next,
575	.stop		= saved_cmdlines_stop,
576	.show		= saved_cmdlines_show,
577};
578
579static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
580{
581	int ret;
582
583	ret = tracing_check_open_get_tr(NULL);
584	if (ret)
585		return ret;
586
587	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
588}
589
590const struct file_operations tracing_saved_cmdlines_fops = {
591	.open		= tracing_saved_cmdlines_open,
592	.read		= seq_read,
593	.llseek		= seq_lseek,
594	.release	= seq_release,
595};
596
597static ssize_t
598tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
599				 size_t cnt, loff_t *ppos)
600{
601	char buf[64];
602	int r;
603
604	preempt_disable();
605	arch_spin_lock(&trace_cmdline_lock);
606	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
607	arch_spin_unlock(&trace_cmdline_lock);
608	preempt_enable();
609
610	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
611}
612
613void trace_free_saved_cmdlines_buffer(void)
614{
615	free_saved_cmdlines_buffer(savedcmd);
616}
617
618static int tracing_resize_saved_cmdlines(unsigned int val)
619{
620	struct saved_cmdlines_buffer *s, *savedcmd_temp;
621
622	s = allocate_cmdlines_buffer(val);
623	if (!s)
624		return -ENOMEM;
625
626	preempt_disable();
627	arch_spin_lock(&trace_cmdline_lock);
628	savedcmd_temp = savedcmd;
629	savedcmd = s;
630	arch_spin_unlock(&trace_cmdline_lock);
631	preempt_enable();
632	free_saved_cmdlines_buffer(savedcmd_temp);
633
634	return 0;
635}
636
637static ssize_t
638tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
639				  size_t cnt, loff_t *ppos)
640{
641	unsigned long val;
642	int ret;
643
644	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
645	if (ret)
646		return ret;
647
648	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
649	if (!val || val > PID_MAX_DEFAULT)
650		return -EINVAL;
651
652	ret = tracing_resize_saved_cmdlines((unsigned int)val);
653	if (ret < 0)
654		return ret;
655
656	*ppos += cnt;
657
658	return cnt;
659}
660
661const struct file_operations tracing_saved_cmdlines_size_fops = {
662	.open		= tracing_open_generic,
663	.read		= tracing_saved_cmdlines_size_read,
664	.write		= tracing_saved_cmdlines_size_write,
665};
666