1/* Copyright (c) 2016 Facebook
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include "vmlinux.h"
8#include <linux/version.h>
9#include <bpf/bpf_helpers.h>
10#include <bpf/bpf_tracing.h>
11#include <bpf/bpf_core_read.h>
12
13#ifndef PERF_MAX_STACK_DEPTH
14#define PERF_MAX_STACK_DEPTH         127
15#endif
16
17#define MINBLOCK_US	1
18#define MAX_ENTRIES	10000
19
20struct key_t {
21	char waker[TASK_COMM_LEN];
22	char target[TASK_COMM_LEN];
23	u32 wret;
24	u32 tret;
25};
26
27struct {
28	__uint(type, BPF_MAP_TYPE_HASH);
29	__type(key, struct key_t);
30	__type(value, u64);
31	__uint(max_entries, MAX_ENTRIES);
32} counts SEC(".maps");
33
34struct {
35	__uint(type, BPF_MAP_TYPE_HASH);
36	__type(key, u32);
37	__type(value, u64);
38	__uint(max_entries, MAX_ENTRIES);
39} start SEC(".maps");
40
41struct wokeby_t {
42	char name[TASK_COMM_LEN];
43	u32 ret;
44};
45
46struct {
47	__uint(type, BPF_MAP_TYPE_HASH);
48	__type(key, u32);
49	__type(value, struct wokeby_t);
50	__uint(max_entries, MAX_ENTRIES);
51} wokeby SEC(".maps");
52
53struct {
54	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
55	__uint(key_size, sizeof(u32));
56	__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
57	__uint(max_entries, MAX_ENTRIES);
58} stackmap SEC(".maps");
59
60#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
61
62SEC("kprobe/try_to_wake_up")
63int waker(struct pt_regs *ctx)
64{
65	struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx);
66	u32 pid = BPF_CORE_READ(p, pid);
67	struct wokeby_t woke;
68
69	bpf_get_current_comm(&woke.name, sizeof(woke.name));
70	woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
71
72	bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
73	return 0;
74}
75
76static inline int update_counts(void *ctx, u32 pid, u64 delta)
77{
78	struct wokeby_t *woke;
79	u64 zero = 0, *val;
80	struct key_t key;
81
82	__builtin_memset(&key.waker, 0, sizeof(key.waker));
83	bpf_get_current_comm(&key.target, sizeof(key.target));
84	key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
85	key.wret = 0;
86
87	woke = bpf_map_lookup_elem(&wokeby, &pid);
88	if (woke) {
89		key.wret = woke->ret;
90		__builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
91		bpf_map_delete_elem(&wokeby, &pid);
92	}
93
94	val = bpf_map_lookup_elem(&counts, &key);
95	if (!val) {
96		bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
97		val = bpf_map_lookup_elem(&counts, &key);
98		if (!val)
99			return 0;
100	}
101	(*val) += delta;
102	return 0;
103}
104
105#if 1
106/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
107SEC("tracepoint/sched/sched_switch")
108int oncpu(struct trace_event_raw_sched_switch *ctx)
109{
110	/* record previous thread sleep time */
111	u32 pid = ctx->prev_pid;
112#else
113SEC("kprobe.multi/finish_task_switch*")
114int oncpu(struct pt_regs *ctx)
115{
116	struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx);
117	/* record previous thread sleep time */
118	u32 pid = BPF_CORE_READ(p, pid);
119#endif
120	u64 delta, ts, *tsp;
121
122	ts = bpf_ktime_get_ns();
123	bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);
124
125	/* calculate current thread's delta time */
126	pid = bpf_get_current_pid_tgid();
127	tsp = bpf_map_lookup_elem(&start, &pid);
128	if (!tsp)
129		/* missed start or filtered */
130		return 0;
131
132	delta = bpf_ktime_get_ns() - *tsp;
133	bpf_map_delete_elem(&start, &pid);
134	delta = delta / 1000;
135	if (delta < MINBLOCK_US)
136		return 0;
137
138	return update_counts(ctx, pid, delta);
139}
140char _license[] SEC("license") = "GPL";
141u32 _version SEC("version") = LINUX_VERSION_CODE;
142