1/* SPDX-License-Identifier: GPL-2.0 */
2/* Copyright (c) 2023 Isovalent */
3#ifndef __BPF_MPROG_H
4#define __BPF_MPROG_H
5
6#include <linux/bpf.h>
7
8/* bpf_mprog framework:
9 *
10 * bpf_mprog is a generic layer for multi-program attachment. In-kernel users
11 * of the bpf_mprog don't need to care about the dependency resolution
12 * internals, they can just consume it with few API calls. Currently available
13 * dependency directives are BPF_F_{BEFORE,AFTER} which enable insertion of
14 * a BPF program or BPF link relative to an existing BPF program or BPF link
15 * inside the multi-program array as well as prepend and append behavior if
16 * no relative object was specified, see corresponding selftests for concrete
17 * examples (e.g. tc_links and tc_opts test cases of test_progs).
18 *
19 * Usage of bpf_mprog_{attach,detach,query}() core APIs with pseudo code:
20 *
21 *  Attach case:
22 *
23 *   struct bpf_mprog_entry *entry, *entry_new;
24 *   int ret;
25 *
26 *   // bpf_mprog user-side lock
27 *   // fetch active @entry from attach location
28 *   [...]
29 *   ret = bpf_mprog_attach(entry, &entry_new, [...]);
30 *   if (!ret) {
31 *       if (entry != entry_new) {
32 *           // swap @entry to @entry_new at attach location
33 *           // ensure there are no inflight users of @entry:
34 *           synchronize_rcu();
35 *       }
36 *       bpf_mprog_commit(entry);
37 *   } else {
38 *       // error path, bail out, propagate @ret
39 *   }
40 *   // bpf_mprog user-side unlock
41 *
42 *  Detach case:
43 *
44 *   struct bpf_mprog_entry *entry, *entry_new;
45 *   int ret;
46 *
47 *   // bpf_mprog user-side lock
48 *   // fetch active @entry from attach location
49 *   [...]
50 *   ret = bpf_mprog_detach(entry, &entry_new, [...]);
51 *   if (!ret) {
52 *       // all (*) marked is optional and depends on the use-case
53 *       // whether bpf_mprog_bundle should be freed or not
54 *       if (!bpf_mprog_total(entry_new))     (*)
55 *           entry_new = NULL                 (*)
56 *       // swap @entry to @entry_new at attach location
57 *       // ensure there are no inflight users of @entry:
58 *       synchronize_rcu();
59 *       bpf_mprog_commit(entry);
60 *       if (!entry_new)                      (*)
61 *           // free bpf_mprog_bundle         (*)
62 *   } else {
63 *       // error path, bail out, propagate @ret
64 *   }
65 *   // bpf_mprog user-side unlock
66 *
67 *  Query case:
68 *
69 *   struct bpf_mprog_entry *entry;
70 *   int ret;
71 *
72 *   // bpf_mprog user-side lock
73 *   // fetch active @entry from attach location
74 *   [...]
75 *   ret = bpf_mprog_query(attr, uattr, entry);
76 *   // bpf_mprog user-side unlock
77 *
78 *  Data/fast path:
79 *
80 *   struct bpf_mprog_entry *entry;
81 *   struct bpf_mprog_fp *fp;
82 *   struct bpf_prog *prog;
83 *   int ret = [...];
84 *
85 *   rcu_read_lock();
86 *   // fetch active @entry from attach location
87 *   [...]
88 *   bpf_mprog_foreach_prog(entry, fp, prog) {
89 *       ret = bpf_prog_run(prog, [...]);
90 *       // process @ret from program
91 *   }
92 *   [...]
93 *   rcu_read_unlock();
94 *
95 * bpf_mprog locking considerations:
96 *
97 * bpf_mprog_{attach,detach,query}() must be protected by an external lock
98 * (like RTNL in case of tcx).
99 *
100 * bpf_mprog_entry pointer can be an __rcu annotated pointer (in case of tcx
101 * the netdevice has tcx_ingress and tcx_egress __rcu pointer) which gets
102 * updated via rcu_assign_pointer() pointing to the active bpf_mprog_entry of
103 * the bpf_mprog_bundle.
104 *
105 * Fast path accesses the active bpf_mprog_entry within RCU critical section
106 * (in case of tcx it runs in NAPI which provides RCU protection there,
107 * other users might need explicit rcu_read_lock()). The bpf_mprog_commit()
108 * assumes that for the old bpf_mprog_entry there are no inflight users
109 * anymore.
110 *
111 * The READ_ONCE()/WRITE_ONCE() pairing for bpf_mprog_fp's prog access is for
112 * the replacement case where we don't swap the bpf_mprog_entry.
113 */
114
115#define bpf_mprog_foreach_tuple(entry, fp, cp, t)			\
116	for (fp = &entry->fp_items[0], cp = &entry->parent->cp_items[0];\
117	     ({								\
118		t.prog = READ_ONCE(fp->prog);				\
119		t.link = cp->link;					\
120		t.prog;							\
121	      });							\
122	     fp++, cp++)
123
124#define bpf_mprog_foreach_prog(entry, fp, p)				\
125	for (fp = &entry->fp_items[0];					\
126	     (p = READ_ONCE(fp->prog));					\
127	     fp++)
128
129#define BPF_MPROG_MAX 64
130
131struct bpf_mprog_fp {
132	struct bpf_prog *prog;
133};
134
135struct bpf_mprog_cp {
136	struct bpf_link *link;
137};
138
139struct bpf_mprog_entry {
140	struct bpf_mprog_fp fp_items[BPF_MPROG_MAX];
141	struct bpf_mprog_bundle *parent;
142};
143
144struct bpf_mprog_bundle {
145	struct bpf_mprog_entry a;
146	struct bpf_mprog_entry b;
147	struct bpf_mprog_cp cp_items[BPF_MPROG_MAX];
148	struct bpf_prog *ref;
149	atomic64_t revision;
150	u32 count;
151};
152
153struct bpf_tuple {
154	struct bpf_prog *prog;
155	struct bpf_link *link;
156};
157
158static inline struct bpf_mprog_entry *
159bpf_mprog_peer(const struct bpf_mprog_entry *entry)
160{
161	if (entry == &entry->parent->a)
162		return &entry->parent->b;
163	else
164		return &entry->parent->a;
165}
166
167static inline void bpf_mprog_bundle_init(struct bpf_mprog_bundle *bundle)
168{
169	BUILD_BUG_ON(sizeof(bundle->a.fp_items[0]) > sizeof(u64));
170	BUILD_BUG_ON(ARRAY_SIZE(bundle->a.fp_items) !=
171		     ARRAY_SIZE(bundle->cp_items));
172
173	memset(bundle, 0, sizeof(*bundle));
174	atomic64_set(&bundle->revision, 1);
175	bundle->a.parent = bundle;
176	bundle->b.parent = bundle;
177}
178
179static inline void bpf_mprog_inc(struct bpf_mprog_entry *entry)
180{
181	entry->parent->count++;
182}
183
184static inline void bpf_mprog_dec(struct bpf_mprog_entry *entry)
185{
186	entry->parent->count--;
187}
188
189static inline int bpf_mprog_max(void)
190{
191	return ARRAY_SIZE(((struct bpf_mprog_entry *)NULL)->fp_items) - 1;
192}
193
194static inline int bpf_mprog_total(struct bpf_mprog_entry *entry)
195{
196	int total = entry->parent->count;
197
198	WARN_ON_ONCE(total > bpf_mprog_max());
199	return total;
200}
201
202static inline bool bpf_mprog_exists(struct bpf_mprog_entry *entry,
203				    struct bpf_prog *prog)
204{
205	const struct bpf_mprog_fp *fp;
206	const struct bpf_prog *tmp;
207
208	bpf_mprog_foreach_prog(entry, fp, tmp) {
209		if (tmp == prog)
210			return true;
211	}
212	return false;
213}
214
215static inline void bpf_mprog_mark_for_release(struct bpf_mprog_entry *entry,
216					      struct bpf_tuple *tuple)
217{
218	WARN_ON_ONCE(entry->parent->ref);
219	if (!tuple->link)
220		entry->parent->ref = tuple->prog;
221}
222
223static inline void bpf_mprog_complete_release(struct bpf_mprog_entry *entry)
224{
225	/* In the non-link case prog deletions can only drop the reference
226	 * to the prog after the bpf_mprog_entry got swapped and the
227	 * bpf_mprog ensured that there are no inflight users anymore.
228	 *
229	 * Paired with bpf_mprog_mark_for_release().
230	 */
231	if (entry->parent->ref) {
232		bpf_prog_put(entry->parent->ref);
233		entry->parent->ref = NULL;
234	}
235}
236
237static inline void bpf_mprog_revision_new(struct bpf_mprog_entry *entry)
238{
239	atomic64_inc(&entry->parent->revision);
240}
241
242static inline void bpf_mprog_commit(struct bpf_mprog_entry *entry)
243{
244	bpf_mprog_complete_release(entry);
245	bpf_mprog_revision_new(entry);
246}
247
248static inline u64 bpf_mprog_revision(struct bpf_mprog_entry *entry)
249{
250	return atomic64_read(&entry->parent->revision);
251}
252
253static inline void bpf_mprog_entry_copy(struct bpf_mprog_entry *dst,
254					struct bpf_mprog_entry *src)
255{
256	memcpy(dst->fp_items, src->fp_items, sizeof(src->fp_items));
257}
258
259static inline void bpf_mprog_entry_clear(struct bpf_mprog_entry *dst)
260{
261	memset(dst->fp_items, 0, sizeof(dst->fp_items));
262}
263
264static inline void bpf_mprog_clear_all(struct bpf_mprog_entry *entry,
265				       struct bpf_mprog_entry **entry_new)
266{
267	struct bpf_mprog_entry *peer;
268
269	peer = bpf_mprog_peer(entry);
270	bpf_mprog_entry_clear(peer);
271	peer->parent->count = 0;
272	*entry_new = peer;
273}
274
275static inline void bpf_mprog_entry_grow(struct bpf_mprog_entry *entry, int idx)
276{
277	int total = bpf_mprog_total(entry);
278
279	memmove(entry->fp_items + idx + 1,
280		entry->fp_items + idx,
281		(total - idx) * sizeof(struct bpf_mprog_fp));
282
283	memmove(entry->parent->cp_items + idx + 1,
284		entry->parent->cp_items + idx,
285		(total - idx) * sizeof(struct bpf_mprog_cp));
286}
287
288static inline void bpf_mprog_entry_shrink(struct bpf_mprog_entry *entry, int idx)
289{
290	/* Total array size is needed in this case to enure the NULL
291	 * entry is copied at the end.
292	 */
293	int total = ARRAY_SIZE(entry->fp_items);
294
295	memmove(entry->fp_items + idx,
296		entry->fp_items + idx + 1,
297		(total - idx - 1) * sizeof(struct bpf_mprog_fp));
298
299	memmove(entry->parent->cp_items + idx,
300		entry->parent->cp_items + idx + 1,
301		(total - idx - 1) * sizeof(struct bpf_mprog_cp));
302}
303
304static inline void bpf_mprog_read(struct bpf_mprog_entry *entry, u32 idx,
305				  struct bpf_mprog_fp **fp,
306				  struct bpf_mprog_cp **cp)
307{
308	*fp = &entry->fp_items[idx];
309	*cp = &entry->parent->cp_items[idx];
310}
311
312static inline void bpf_mprog_write(struct bpf_mprog_fp *fp,
313				   struct bpf_mprog_cp *cp,
314				   struct bpf_tuple *tuple)
315{
316	WRITE_ONCE(fp->prog, tuple->prog);
317	cp->link = tuple->link;
318}
319
320int bpf_mprog_attach(struct bpf_mprog_entry *entry,
321		     struct bpf_mprog_entry **entry_new,
322		     struct bpf_prog *prog_new, struct bpf_link *link,
323		     struct bpf_prog *prog_old,
324		     u32 flags, u32 id_or_fd, u64 revision);
325
326int bpf_mprog_detach(struct bpf_mprog_entry *entry,
327		     struct bpf_mprog_entry **entry_new,
328		     struct bpf_prog *prog, struct bpf_link *link,
329		     u32 flags, u32 id_or_fd, u64 revision);
330
331int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr,
332		    struct bpf_mprog_entry *entry);
333
334static inline bool bpf_mprog_supported(enum bpf_prog_type type)
335{
336	switch (type) {
337	case BPF_PROG_TYPE_SCHED_CLS:
338		return true;
339	default:
340		return false;
341	}
342}
343#endif /* __BPF_MPROG_H */
344