// SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #include #include #include #include #include #include #include #include "bpf_misc.h" #include "bpf_compiler.h" #define FUNCTION_NAME_LEN 64 #define FILE_NAME_LEN 128 #define TASK_COMM_LEN 16 typedef struct { int PyThreadState_frame; int PyThreadState_thread; int PyFrameObject_back; int PyFrameObject_code; int PyFrameObject_lineno; int PyCodeObject_filename; int PyCodeObject_name; int String_data; int String_size; } OffsetConfig; typedef struct { uintptr_t current_state_addr; uintptr_t tls_key_addr; OffsetConfig offsets; bool use_tls; } PidData; typedef struct { uint32_t success; } Stats; typedef struct { char name[FUNCTION_NAME_LEN]; char file[FILE_NAME_LEN]; } Symbol; typedef struct { uint32_t pid; uint32_t tid; char comm[TASK_COMM_LEN]; int32_t kernel_stack_id; int32_t user_stack_id; bool thread_current; bool pthread_match; bool stack_complete; int16_t stack_len; int32_t stack[STACK_MAX_LEN]; int has_meta; int metadata; char dummy_safeguard; } Event; typedef int pid_t; typedef struct { void* f_back; // PyFrameObject.f_back, previous frame void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject void* co_filename; // PyCodeObject.co_filename void* co_name; // PyCodeObject.co_name } FrameData; #ifdef SUBPROGS __noinline #else __always_inline #endif static void *get_thread_state(void *tls_base, PidData *pidData) { void* thread_state; int key; bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr); bpf_probe_read_user(&thread_state, sizeof(thread_state), tls_base + 0x310 + key * 0x10 + 0x08); return thread_state; } static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData, FrameData *frame, Symbol *symbol) { // read data from PyFrameObject bpf_probe_read_user(&frame->f_back, sizeof(frame->f_back), frame_ptr + pidData->offsets.PyFrameObject_back); bpf_probe_read_user(&frame->f_code, sizeof(frame->f_code), frame_ptr + pidData->offsets.PyFrameObject_code); // read data from PyCodeObject if (!frame->f_code) return false; bpf_probe_read_user(&frame->co_filename, sizeof(frame->co_filename), frame->f_code + pidData->offsets.PyCodeObject_filename); bpf_probe_read_user(&frame->co_name, sizeof(frame->co_name), frame->f_code + pidData->offsets.PyCodeObject_name); // read actual names into symbol if (frame->co_filename) bpf_probe_read_user_str(&symbol->file, sizeof(symbol->file), frame->co_filename + pidData->offsets.String_data); if (frame->co_name) bpf_probe_read_user_str(&symbol->name, sizeof(symbol->name), frame->co_name + pidData->offsets.String_data); return true; } struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 1); __type(key, int); __type(value, PidData); } pidmap SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 1); __type(key, int); __type(value, Event); } eventmap SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 1); __type(key, Symbol); __type(value, int); } symbolmap SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); __type(key, int); __type(value, Stats); } statsmap SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __uint(max_entries, 32); __uint(key_size, sizeof(int)); __uint(value_size, sizeof(int)); } perfmap SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_STACK_TRACE); __uint(max_entries, 1000); __uint(key_size, sizeof(int)); __uint(value_size, sizeof(long long) * 127); } stackmap SEC(".maps"); #ifdef USE_BPF_LOOP struct process_frame_ctx { int cur_cpu; int32_t *symbol_counter; void *frame_ptr; FrameData *frame; PidData *pidData; Symbol *sym; Event *event; bool done; }; static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) { int zero = 0; void *frame_ptr = ctx->frame_ptr; PidData *pidData = ctx->pidData; FrameData *frame = ctx->frame; int32_t *symbol_counter = ctx->symbol_counter; int cur_cpu = ctx->cur_cpu; Event *event = ctx->event; Symbol *sym = ctx->sym; if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) { int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym); if (!symbol_id) { bpf_map_update_elem(&symbolmap, sym, &zero, 0); symbol_id = bpf_map_lookup_elem(&symbolmap, sym); if (!symbol_id) { ctx->done = true; return 1; } } if (*symbol_id == new_symbol_id) (*symbol_counter)++; barrier_var(i); if (i >= STACK_MAX_LEN) return 1; event->stack[i] = *symbol_id; event->stack_len = i + 1; frame_ptr = frame->f_back; } return 0; } #endif /* USE_BPF_LOOP */ #ifdef GLOBAL_FUNC __noinline #elif defined(SUBPROGS) static __noinline #else static __always_inline #endif int __on_event(struct bpf_raw_tracepoint_args *ctx) { uint64_t pid_tgid = bpf_get_current_pid_tgid(); pid_t pid = (pid_t)(pid_tgid >> 32); PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid); if (!pidData) return 0; int zero = 0; Event* event = bpf_map_lookup_elem(&eventmap, &zero); if (!event) return 0; event->pid = pid; event->tid = (pid_t)pid_tgid; bpf_get_current_comm(&event->comm, sizeof(event->comm)); event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK); event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0); void* thread_state_current = (void*)0; bpf_probe_read_user(&thread_state_current, sizeof(thread_state_current), (void*)(long)pidData->current_state_addr); struct task_struct* task = (struct task_struct*)bpf_get_current_task(); void* tls_base = (void*)task; void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData) : thread_state_current; event->thread_current = thread_state == thread_state_current; if (pidData->use_tls) { uint64_t pthread_created; uint64_t pthread_self; bpf_probe_read_user(&pthread_self, sizeof(pthread_self), tls_base + 0x10); bpf_probe_read_user(&pthread_created, sizeof(pthread_created), thread_state + pidData->offsets.PyThreadState_thread); event->pthread_match = pthread_created == pthread_self; } else { event->pthread_match = 1; } if (event->pthread_match || !pidData->use_tls) { void* frame_ptr; FrameData frame; Symbol sym = {}; int cur_cpu = bpf_get_smp_processor_id(); bpf_probe_read_user(&frame_ptr, sizeof(frame_ptr), thread_state + pidData->offsets.PyThreadState_frame); int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); if (symbol_counter == NULL) return 0; #ifdef USE_BPF_LOOP struct process_frame_ctx ctx = { .cur_cpu = cur_cpu, .symbol_counter = symbol_counter, .frame_ptr = frame_ptr, .frame = &frame, .pidData = pidData, .sym = &sym, .event = event, }; bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0); if (ctx.done) return 0; #else #if defined(USE_ITER) /* no for loop, no unrolling */ #elif defined(NO_UNROLL) __pragma_loop_no_unroll #elif defined(UNROLL_COUNT) __pragma_loop_unroll_count(UNROLL_COUNT) #else __pragma_loop_unroll_full #endif /* NO_UNROLL */ /* Unwind python stack */ #ifdef USE_ITER int i; bpf_for(i, 0, STACK_MAX_LEN) { #else /* !USE_ITER */ for (int i = 0; i < STACK_MAX_LEN; ++i) { #endif if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); if (!symbol_id) { bpf_map_update_elem(&symbolmap, &sym, &zero, 0); symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); if (!symbol_id) return 0; } if (*symbol_id == new_symbol_id) (*symbol_counter)++; event->stack[i] = *symbol_id; event->stack_len = i + 1; frame_ptr = frame.f_back; } } #endif /* USE_BPF_LOOP */ event->stack_complete = frame_ptr == NULL; } else { event->stack_complete = 1; } Stats* stats = bpf_map_lookup_elem(&statsmap, &zero); if (stats) stats->success++; event->has_meta = 0; bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata)); return 0; } SEC("raw_tracepoint/kfree_skb") int on_event(struct bpf_raw_tracepoint_args* ctx) { int ret = 0; ret |= __on_event(ctx); ret |= __on_event(ctx); ret |= __on_event(ctx); ret |= __on_event(ctx); ret |= __on_event(ctx); return ret; } char _license[] SEC("license") = "GPL";