1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Facebook
3#include <linux/sched.h>
4#include <linux/ptrace.h>
5#include <stdint.h>
6#include <stddef.h>
7#include <stdbool.h>
8#include <linux/bpf.h>
9#include <bpf/bpf_helpers.h>
10#include "bpf_misc.h"
11#include "bpf_compiler.h"
12
13#define FUNCTION_NAME_LEN 64
14#define FILE_NAME_LEN 128
15#define TASK_COMM_LEN 16
16
17typedef struct {
18	int PyThreadState_frame;
19	int PyThreadState_thread;
20	int PyFrameObject_back;
21	int PyFrameObject_code;
22	int PyFrameObject_lineno;
23	int PyCodeObject_filename;
24	int PyCodeObject_name;
25	int String_data;
26	int String_size;
27} OffsetConfig;
28
29typedef struct {
30	uintptr_t current_state_addr;
31	uintptr_t tls_key_addr;
32	OffsetConfig offsets;
33	bool use_tls;
34} PidData;
35
36typedef struct {
37	uint32_t success;
38} Stats;
39
40typedef struct {
41	char name[FUNCTION_NAME_LEN];
42	char file[FILE_NAME_LEN];
43} Symbol;
44
45typedef struct {
46	uint32_t pid;
47	uint32_t tid;
48	char comm[TASK_COMM_LEN];
49	int32_t kernel_stack_id;
50	int32_t user_stack_id;
51	bool thread_current;
52	bool pthread_match;
53	bool stack_complete;
54	int16_t stack_len;
55	int32_t stack[STACK_MAX_LEN];
56
57	int has_meta;
58	int metadata;
59	char dummy_safeguard;
60} Event;
61
62
63typedef int pid_t;
64
65typedef struct {
66	void* f_back; // PyFrameObject.f_back, previous frame
67	void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
68	void* co_filename; // PyCodeObject.co_filename
69	void* co_name; // PyCodeObject.co_name
70} FrameData;
71
72#ifdef SUBPROGS
73__noinline
74#else
75__always_inline
76#endif
77static void *get_thread_state(void *tls_base, PidData *pidData)
78{
79	void* thread_state;
80	int key;
81
82	bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
83	bpf_probe_read_user(&thread_state, sizeof(thread_state),
84			    tls_base + 0x310 + key * 0x10 + 0x08);
85	return thread_state;
86}
87
88static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
89					   FrameData *frame, Symbol *symbol)
90{
91	// read data from PyFrameObject
92	bpf_probe_read_user(&frame->f_back,
93			    sizeof(frame->f_back),
94			    frame_ptr + pidData->offsets.PyFrameObject_back);
95	bpf_probe_read_user(&frame->f_code,
96			    sizeof(frame->f_code),
97			    frame_ptr + pidData->offsets.PyFrameObject_code);
98
99	// read data from PyCodeObject
100	if (!frame->f_code)
101		return false;
102	bpf_probe_read_user(&frame->co_filename,
103			    sizeof(frame->co_filename),
104			    frame->f_code + pidData->offsets.PyCodeObject_filename);
105	bpf_probe_read_user(&frame->co_name,
106			    sizeof(frame->co_name),
107			    frame->f_code + pidData->offsets.PyCodeObject_name);
108	// read actual names into symbol
109	if (frame->co_filename)
110		bpf_probe_read_user_str(&symbol->file,
111					sizeof(symbol->file),
112					frame->co_filename +
113					pidData->offsets.String_data);
114	if (frame->co_name)
115		bpf_probe_read_user_str(&symbol->name,
116					sizeof(symbol->name),
117					frame->co_name +
118					pidData->offsets.String_data);
119	return true;
120}
121
122struct {
123	__uint(type, BPF_MAP_TYPE_HASH);
124	__uint(max_entries, 1);
125	__type(key, int);
126	__type(value, PidData);
127} pidmap SEC(".maps");
128
129struct {
130	__uint(type, BPF_MAP_TYPE_HASH);
131	__uint(max_entries, 1);
132	__type(key, int);
133	__type(value, Event);
134} eventmap SEC(".maps");
135
136struct {
137	__uint(type, BPF_MAP_TYPE_HASH);
138	__uint(max_entries, 1);
139	__type(key, Symbol);
140	__type(value, int);
141} symbolmap SEC(".maps");
142
143struct {
144	__uint(type, BPF_MAP_TYPE_ARRAY);
145	__uint(max_entries, 1);
146	__type(key, int);
147	__type(value, Stats);
148} statsmap SEC(".maps");
149
150struct {
151	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
152	__uint(max_entries, 32);
153	__uint(key_size, sizeof(int));
154	__uint(value_size, sizeof(int));
155} perfmap SEC(".maps");
156
157struct {
158	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
159	__uint(max_entries, 1000);
160	__uint(key_size, sizeof(int));
161	__uint(value_size, sizeof(long long) * 127);
162} stackmap SEC(".maps");
163
164#ifdef USE_BPF_LOOP
165struct process_frame_ctx {
166	int cur_cpu;
167	int32_t *symbol_counter;
168	void *frame_ptr;
169	FrameData *frame;
170	PidData *pidData;
171	Symbol *sym;
172	Event *event;
173	bool done;
174};
175
176static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
177{
178	int zero = 0;
179	void *frame_ptr = ctx->frame_ptr;
180	PidData *pidData = ctx->pidData;
181	FrameData *frame = ctx->frame;
182	int32_t *symbol_counter = ctx->symbol_counter;
183	int cur_cpu = ctx->cur_cpu;
184	Event *event = ctx->event;
185	Symbol *sym = ctx->sym;
186
187	if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
188		int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
189		int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
190
191		if (!symbol_id) {
192			bpf_map_update_elem(&symbolmap, sym, &zero, 0);
193			symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
194			if (!symbol_id) {
195				ctx->done = true;
196				return 1;
197			}
198		}
199		if (*symbol_id == new_symbol_id)
200			(*symbol_counter)++;
201
202		barrier_var(i);
203		if (i >= STACK_MAX_LEN)
204			return 1;
205
206		event->stack[i] = *symbol_id;
207
208		event->stack_len = i + 1;
209		frame_ptr = frame->f_back;
210	}
211	return 0;
212}
213#endif /* USE_BPF_LOOP */
214
215#ifdef GLOBAL_FUNC
216__noinline
217#elif defined(SUBPROGS)
218static __noinline
219#else
220static __always_inline
221#endif
222int __on_event(struct bpf_raw_tracepoint_args *ctx)
223{
224	uint64_t pid_tgid = bpf_get_current_pid_tgid();
225	pid_t pid = (pid_t)(pid_tgid >> 32);
226	PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
227	if (!pidData)
228		return 0;
229
230	int zero = 0;
231	Event* event = bpf_map_lookup_elem(&eventmap, &zero);
232	if (!event)
233		return 0;
234
235	event->pid = pid;
236
237	event->tid = (pid_t)pid_tgid;
238	bpf_get_current_comm(&event->comm, sizeof(event->comm));
239
240	event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
241	event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
242
243	void* thread_state_current = (void*)0;
244	bpf_probe_read_user(&thread_state_current,
245			    sizeof(thread_state_current),
246			    (void*)(long)pidData->current_state_addr);
247
248	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
249	void* tls_base = (void*)task;
250
251	void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
252		: thread_state_current;
253	event->thread_current = thread_state == thread_state_current;
254
255	if (pidData->use_tls) {
256		uint64_t pthread_created;
257		uint64_t pthread_self;
258		bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
259				    tls_base + 0x10);
260
261		bpf_probe_read_user(&pthread_created,
262				    sizeof(pthread_created),
263				    thread_state +
264				    pidData->offsets.PyThreadState_thread);
265		event->pthread_match = pthread_created == pthread_self;
266	} else {
267		event->pthread_match = 1;
268	}
269
270	if (event->pthread_match || !pidData->use_tls) {
271		void* frame_ptr;
272		FrameData frame;
273		Symbol sym = {};
274		int cur_cpu = bpf_get_smp_processor_id();
275
276		bpf_probe_read_user(&frame_ptr,
277				    sizeof(frame_ptr),
278				    thread_state +
279				    pidData->offsets.PyThreadState_frame);
280
281		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
282		if (symbol_counter == NULL)
283			return 0;
284#ifdef USE_BPF_LOOP
285	struct process_frame_ctx ctx = {
286		.cur_cpu = cur_cpu,
287		.symbol_counter = symbol_counter,
288		.frame_ptr = frame_ptr,
289		.frame = &frame,
290		.pidData = pidData,
291		.sym = &sym,
292		.event = event,
293	};
294
295	bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
296	if (ctx.done)
297		return 0;
298#else
299#if defined(USE_ITER)
300/* no for loop, no unrolling */
301#elif defined(NO_UNROLL)
302	__pragma_loop_no_unroll
303#elif defined(UNROLL_COUNT)
304	__pragma_loop_unroll_count(UNROLL_COUNT)
305#else
306	__pragma_loop_unroll_full
307#endif /* NO_UNROLL */
308		/* Unwind python stack */
309#ifdef USE_ITER
310		int i;
311		bpf_for(i, 0, STACK_MAX_LEN) {
312#else /* !USE_ITER */
313		for (int i = 0; i < STACK_MAX_LEN; ++i) {
314#endif
315			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
316				int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
317				int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
318				if (!symbol_id) {
319					bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
320					symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
321					if (!symbol_id)
322						return 0;
323				}
324				if (*symbol_id == new_symbol_id)
325					(*symbol_counter)++;
326				event->stack[i] = *symbol_id;
327				event->stack_len = i + 1;
328				frame_ptr = frame.f_back;
329			}
330		}
331#endif /* USE_BPF_LOOP */
332		event->stack_complete = frame_ptr == NULL;
333	} else {
334		event->stack_complete = 1;
335	}
336
337	Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
338	if (stats)
339		stats->success++;
340
341	event->has_meta = 0;
342	bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
343	return 0;
344}
345
346SEC("raw_tracepoint/kfree_skb")
347int on_event(struct bpf_raw_tracepoint_args* ctx)
348{
349	int ret = 0;
350	ret |= __on_event(ctx);
351	ret |= __on_event(ctx);
352	ret |= __on_event(ctx);
353	ret |= __on_event(ctx);
354	ret |= __on_event(ctx);
355	return ret;
356}
357
358char _license[] SEC("license") = "GPL";
359