1// SPDX-License-Identifier: GPL-2.0 2/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ 3 4#include <argp.h> 5 6#include <sys/prctl.h> 7#include "local_storage_rcu_tasks_trace_bench.skel.h" 8#include "bench.h" 9 10#include <signal.h> 11 12static struct { 13 __u32 nr_procs; 14 __u32 kthread_pid; 15} args = { 16 .nr_procs = 1000, 17 .kthread_pid = 0, 18}; 19 20enum { 21 ARG_NR_PROCS = 7000, 22 ARG_KTHREAD_PID = 7001, 23}; 24 25static const struct argp_option opts[] = { 26 { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0, 27 "Set number of user processes to spin up"}, 28 { "kthread_pid", ARG_KTHREAD_PID, "PID", 0, 29 "Pid of rcu_tasks_trace kthread for ticks tracking"}, 30 {}, 31}; 32 33static error_t parse_arg(int key, char *arg, struct argp_state *state) 34{ 35 long ret; 36 37 switch (key) { 38 case ARG_NR_PROCS: 39 ret = strtol(arg, NULL, 10); 40 if (ret < 1 || ret > UINT_MAX) { 41 fprintf(stderr, "invalid nr_procs\n"); 42 argp_usage(state); 43 } 44 args.nr_procs = ret; 45 break; 46 case ARG_KTHREAD_PID: 47 ret = strtol(arg, NULL, 10); 48 if (ret < 1) { 49 fprintf(stderr, "invalid kthread_pid\n"); 50 argp_usage(state); 51 } 52 args.kthread_pid = ret; 53 break; 54break; 55 default: 56 return ARGP_ERR_UNKNOWN; 57 } 58 59 return 0; 60} 61 62const struct argp bench_local_storage_rcu_tasks_trace_argp = { 63 .options = opts, 64 .parser = parse_arg, 65}; 66 67#define MAX_SLEEP_PROCS 150000 68 69static void validate(void) 70{ 71 if (env.producer_cnt != 1) { 72 fprintf(stderr, "benchmark doesn't support multi-producer!\n"); 73 exit(1); 74 } 75 if (env.consumer_cnt != 0) { 76 fprintf(stderr, "benchmark doesn't support consumer!\n"); 77 exit(1); 78 } 79 80 if (args.nr_procs > MAX_SLEEP_PROCS) { 81 fprintf(stderr, "benchmark supports up to %u sleeper procs!\n", 82 MAX_SLEEP_PROCS); 83 exit(1); 84 } 85} 86 87static long kthread_pid_ticks(void) 88{ 89 char procfs_path[100]; 90 long stime; 91 FILE *f; 92 93 if (!args.kthread_pid) 94 return -1; 95 96 sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid); 97 f = fopen(procfs_path, "r"); 98 if (!f) { 99 fprintf(stderr, "couldn't open %s, exiting\n", procfs_path); 100 goto err_out; 101 } 102 if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) { 103 fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path); 104 goto err_out; 105 } 106 fclose(f); 107 return stime; 108 109err_out: 110 if (f) 111 fclose(f); 112 exit(1); 113 return 0; 114} 115 116static struct { 117 struct local_storage_rcu_tasks_trace_bench *skel; 118 long prev_kthread_stime; 119} ctx; 120 121static void sleep_and_loop(void) 122{ 123 while (true) { 124 sleep(rand() % 4); 125 syscall(__NR_getpgid); 126 } 127} 128 129static void local_storage_tasks_trace_setup(void) 130{ 131 int i, err, forkret, runner_pid; 132 133 runner_pid = getpid(); 134 135 for (i = 0; i < args.nr_procs; i++) { 136 forkret = fork(); 137 if (forkret < 0) { 138 fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i, 139 args.nr_procs); 140 goto err_out; 141 } 142 143 if (!forkret) { 144 err = prctl(PR_SET_PDEATHSIG, SIGKILL); 145 if (err < 0) { 146 fprintf(stderr, "prctl failed with err %d, exiting\n", errno); 147 goto err_out; 148 } 149 150 if (getppid() != runner_pid) { 151 fprintf(stderr, "Runner died while spinning up procs, exiting\n"); 152 goto err_out; 153 } 154 sleep_and_loop(); 155 } 156 } 157 printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid); 158 159 setup_libbpf(); 160 161 ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load(); 162 if (!ctx.skel) { 163 fprintf(stderr, "Error doing open_and_load, exiting\n"); 164 goto err_out; 165 } 166 167 ctx.prev_kthread_stime = kthread_pid_ticks(); 168 169 if (!bpf_program__attach(ctx.skel->progs.get_local)) { 170 fprintf(stderr, "Error attaching bpf program\n"); 171 goto err_out; 172 } 173 174 if (!bpf_program__attach(ctx.skel->progs.pregp_step)) { 175 fprintf(stderr, "Error attaching bpf program\n"); 176 goto err_out; 177 } 178 179 if (!bpf_program__attach(ctx.skel->progs.postgp)) { 180 fprintf(stderr, "Error attaching bpf program\n"); 181 goto err_out; 182 } 183 184 return; 185err_out: 186 exit(1); 187} 188 189static void measure(struct bench_res *res) 190{ 191 long ticks; 192 193 res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0); 194 res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0); 195 ticks = kthread_pid_ticks(); 196 res->stime = ticks - ctx.prev_kthread_stime; 197 ctx.prev_kthread_stime = ticks; 198} 199 200static void *producer(void *input) 201{ 202 while (true) 203 syscall(__NR_getpgid); 204 return NULL; 205} 206 207static void report_progress(int iter, struct bench_res *res, long delta_ns) 208{ 209 if (ctx.skel->bss->unexpected) { 210 fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp)."); 211 fprintf(stderr, "Data can't be trusted, exiting\n"); 212 exit(1); 213 } 214 215 if (env.quiet) 216 return; 217 218 printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n", 219 iter, res->gp_ns / (double)res->gp_ct); 220 printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n", 221 iter, res->stime / (double)res->gp_ct); 222} 223 224static void report_final(struct bench_res res[], int res_cnt) 225{ 226 struct basic_stats gp_stat; 227 228 grace_period_latency_basic_stats(res, res_cnt, &gp_stat); 229 printf("SUMMARY tasks_trace grace period latency"); 230 printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev); 231 grace_period_ticks_basic_stats(res, res_cnt, &gp_stat); 232 printf("SUMMARY ticks per tasks_trace grace period"); 233 printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev); 234} 235 236/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use 237 * of RCU Tasks-Trace. 238 * 239 * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside 240 * from sleep() loop, and creating/destroying BPF task-local storage on wakeup. 241 * The number of forked tasks is configurable. 242 * 243 * exercising code paths which call call_rcu_tasks_trace while there are many 244 * thousands of tasks on the system should result in RCU Tasks-Trace having to 245 * do a noticeable amount of work. 246 * 247 * This should be observable by measuring rcu_tasks_trace_kthread CPU usage 248 * after the grace period has ended, or by measuring grace period latency. 249 * 250 * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step 251 * and rcu_tasks_trace_postgp functions to measure grace period latency and 252 * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks 253 */ 254const struct bench bench_local_storage_tasks_trace = { 255 .name = "local-storage-tasks-trace", 256 .argp = &bench_local_storage_rcu_tasks_trace_argp, 257 .validate = validate, 258 .setup = local_storage_tasks_trace_setup, 259 .producer_thread = producer, 260 .measure = measure, 261 .report_progress = report_progress, 262 .report_final = report_final, 263}; 264