1/* Copyright (c) 2016 Facebook
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <uapi/linux/bpf.h>
8#include <uapi/linux/ptrace.h>
9#include <uapi/linux/perf_event.h>
10#include <linux/version.h>
11#include <linux/sched.h>
12#include <bpf/bpf_helpers.h>
13#include <bpf/bpf_tracing.h>
14
15#define _(P)                                                                   \
16	({                                                                     \
17		typeof(P) val;                                                 \
18		bpf_probe_read_kernel(&val, sizeof(val), &(P));                \
19		val;                                                           \
20	})
21
22#define MINBLOCK_US	1
23#define MAX_ENTRIES	10000
24
25struct key_t {
26	char waker[TASK_COMM_LEN];
27	char target[TASK_COMM_LEN];
28	u32 wret;
29	u32 tret;
30};
31
32struct {
33	__uint(type, BPF_MAP_TYPE_HASH);
34	__type(key, struct key_t);
35	__type(value, u64);
36	__uint(max_entries, MAX_ENTRIES);
37} counts SEC(".maps");
38
39struct {
40	__uint(type, BPF_MAP_TYPE_HASH);
41	__type(key, u32);
42	__type(value, u64);
43	__uint(max_entries, MAX_ENTRIES);
44} start SEC(".maps");
45
46struct wokeby_t {
47	char name[TASK_COMM_LEN];
48	u32 ret;
49};
50
51struct {
52	__uint(type, BPF_MAP_TYPE_HASH);
53	__type(key, u32);
54	__type(value, struct wokeby_t);
55	__uint(max_entries, MAX_ENTRIES);
56} wokeby SEC(".maps");
57
58struct {
59	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
60	__uint(key_size, sizeof(u32));
61	__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
62	__uint(max_entries, MAX_ENTRIES);
63} stackmap SEC(".maps");
64
65#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
66
67SEC("kprobe/try_to_wake_up")
68int waker(struct pt_regs *ctx)
69{
70	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
71	struct wokeby_t woke;
72	u32 pid;
73
74	pid = _(p->pid);
75
76	bpf_get_current_comm(&woke.name, sizeof(woke.name));
77	woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
78
79	bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
80	return 0;
81}
82
83static inline int update_counts(void *ctx, u32 pid, u64 delta)
84{
85	struct wokeby_t *woke;
86	u64 zero = 0, *val;
87	struct key_t key;
88
89	__builtin_memset(&key.waker, 0, sizeof(key.waker));
90	bpf_get_current_comm(&key.target, sizeof(key.target));
91	key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
92	key.wret = 0;
93
94	woke = bpf_map_lookup_elem(&wokeby, &pid);
95	if (woke) {
96		key.wret = woke->ret;
97		__builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
98		bpf_map_delete_elem(&wokeby, &pid);
99	}
100
101	val = bpf_map_lookup_elem(&counts, &key);
102	if (!val) {
103		bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
104		val = bpf_map_lookup_elem(&counts, &key);
105		if (!val)
106			return 0;
107	}
108	(*val) += delta;
109	return 0;
110}
111
112#if 1
113/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
114struct sched_switch_args {
115	unsigned long long pad;
116	char prev_comm[TASK_COMM_LEN];
117	int prev_pid;
118	int prev_prio;
119	long long prev_state;
120	char next_comm[TASK_COMM_LEN];
121	int next_pid;
122	int next_prio;
123};
124SEC("tracepoint/sched/sched_switch")
125int oncpu(struct sched_switch_args *ctx)
126{
127	/* record previous thread sleep time */
128	u32 pid = ctx->prev_pid;
129#else
130SEC("kprobe/finish_task_switch")
131int oncpu(struct pt_regs *ctx)
132{
133	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
134	/* record previous thread sleep time */
135	u32 pid = _(p->pid);
136#endif
137	u64 delta, ts, *tsp;
138
139	ts = bpf_ktime_get_ns();
140	bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
141
142	/* calculate current thread's delta time */
143	pid = bpf_get_current_pid_tgid();
144	tsp = bpf_map_lookup_elem(&start, &pid);
145	if (!tsp)
146		/* missed start or filtered */
147		return 0;
148
149	delta = bpf_ktime_get_ns() - *tsp;
150	bpf_map_delete_elem(&start, &pid);
151	delta = delta / 1000;
152	if (delta < MINBLOCK_US)
153		return 0;
154
155	return update_counts(ctx, pid, delta);
156}
157char _license[] SEC("license") = "GPL";
158u32 _version SEC("version") = LINUX_VERSION_CODE;
159