1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
3#define _GNU_SOURCE
4#include <test_progs.h>
5#include <bpf/btf.h>
6#include "cap_helpers.h"
7#include <fcntl.h>
8#include <sched.h>
9#include <signal.h>
10#include <unistd.h>
11#include <linux/filter.h>
12#include <linux/unistd.h>
13#include <linux/mount.h>
14#include <sys/socket.h>
15#include <sys/stat.h>
16#include <sys/syscall.h>
17#include <sys/un.h>
18#include "priv_map.skel.h"
19#include "priv_prog.skel.h"
20#include "dummy_st_ops_success.skel.h"
21#include "token_lsm.skel.h"
22
23static inline int sys_mount(const char *dev_name, const char *dir_name,
24			    const char *type, unsigned long flags,
25			    const void *data)
26{
27	return syscall(__NR_mount, dev_name, dir_name, type, flags, data);
28}
29
30static inline int sys_fsopen(const char *fsname, unsigned flags)
31{
32	return syscall(__NR_fsopen, fsname, flags);
33}
34
35static inline int sys_fspick(int dfd, const char *path, unsigned flags)
36{
37	return syscall(__NR_fspick, dfd, path, flags);
38}
39
40static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
41{
42	return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
43}
44
45static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
46{
47	return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
48}
49
50static inline int sys_move_mount(int from_dfd, const char *from_path,
51				 int to_dfd, const char *to_path,
52				 unsigned flags)
53{
54	return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags);
55}
56
57static int drop_priv_caps(__u64 *old_caps)
58{
59	return cap_disable_effective((1ULL << CAP_BPF) |
60				     (1ULL << CAP_PERFMON) |
61				     (1ULL << CAP_NET_ADMIN) |
62				     (1ULL << CAP_SYS_ADMIN), old_caps);
63}
64
65static int restore_priv_caps(__u64 old_caps)
66{
67	return cap_enable_effective(old_caps, NULL);
68}
69
70static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str)
71{
72	char buf[32];
73	int err;
74
75	if (!mask_str) {
76		if (mask == ~0ULL) {
77			mask_str = "any";
78		} else {
79			snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
80			mask_str = buf;
81		}
82	}
83
84	err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key,
85			   mask_str, 0);
86	if (err < 0)
87		err = -errno;
88	return err;
89}
90
91#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0)
92
93struct bpffs_opts {
94	__u64 cmds;
95	__u64 maps;
96	__u64 progs;
97	__u64 attachs;
98	const char *cmds_str;
99	const char *maps_str;
100	const char *progs_str;
101	const char *attachs_str;
102};
103
104static int create_bpffs_fd(void)
105{
106	int fs_fd;
107
108	/* create VFS context */
109	fs_fd = sys_fsopen("bpf", 0);
110	ASSERT_GE(fs_fd, 0, "fs_fd");
111
112	return fs_fd;
113}
114
115static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
116{
117	int mnt_fd, err;
118
119	/* set up token delegation mount options */
120	err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
121	if (!ASSERT_OK(err, "fs_cfg_cmds"))
122		return err;
123	err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str);
124	if (!ASSERT_OK(err, "fs_cfg_maps"))
125		return err;
126	err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str);
127	if (!ASSERT_OK(err, "fs_cfg_progs"))
128		return err;
129	err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str);
130	if (!ASSERT_OK(err, "fs_cfg_attachs"))
131		return err;
132
133	/* instantiate FS object */
134	err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
135	if (err < 0)
136		return -errno;
137
138	/* create O_PATH fd for detached mount */
139	mnt_fd = sys_fsmount(fs_fd, 0, 0);
140	if (err < 0)
141		return -errno;
142
143	return mnt_fd;
144}
145
146/* send FD over Unix domain (AF_UNIX) socket */
147static int sendfd(int sockfd, int fd)
148{
149	struct msghdr msg = {};
150	struct cmsghdr *cmsg;
151	int fds[1] = { fd }, err;
152	char iobuf[1];
153	struct iovec io = {
154		.iov_base = iobuf,
155		.iov_len = sizeof(iobuf),
156	};
157	union {
158		char buf[CMSG_SPACE(sizeof(fds))];
159		struct cmsghdr align;
160	} u;
161
162	msg.msg_iov = &io;
163	msg.msg_iovlen = 1;
164	msg.msg_control = u.buf;
165	msg.msg_controllen = sizeof(u.buf);
166	cmsg = CMSG_FIRSTHDR(&msg);
167	cmsg->cmsg_level = SOL_SOCKET;
168	cmsg->cmsg_type = SCM_RIGHTS;
169	cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
170	memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
171
172	err = sendmsg(sockfd, &msg, 0);
173	if (err < 0)
174		err = -errno;
175	if (!ASSERT_EQ(err, 1, "sendmsg"))
176		return -EINVAL;
177
178	return 0;
179}
180
181/* receive FD over Unix domain (AF_UNIX) socket */
182static int recvfd(int sockfd, int *fd)
183{
184	struct msghdr msg = {};
185	struct cmsghdr *cmsg;
186	int fds[1], err;
187	char iobuf[1];
188	struct iovec io = {
189		.iov_base = iobuf,
190		.iov_len = sizeof(iobuf),
191	};
192	union {
193		char buf[CMSG_SPACE(sizeof(fds))];
194		struct cmsghdr align;
195	} u;
196
197	msg.msg_iov = &io;
198	msg.msg_iovlen = 1;
199	msg.msg_control = u.buf;
200	msg.msg_controllen = sizeof(u.buf);
201
202	err = recvmsg(sockfd, &msg, 0);
203	if (err < 0)
204		err = -errno;
205	if (!ASSERT_EQ(err, 1, "recvmsg"))
206		return -EINVAL;
207
208	cmsg = CMSG_FIRSTHDR(&msg);
209	if (!ASSERT_OK_PTR(cmsg, "cmsg_null") ||
210	    !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") ||
211	    !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") ||
212	    !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type"))
213		return -EINVAL;
214
215	memcpy(fds, CMSG_DATA(cmsg), sizeof(fds));
216	*fd = fds[0];
217
218	return 0;
219}
220
221static ssize_t write_nointr(int fd, const void *buf, size_t count)
222{
223	ssize_t ret;
224
225	do {
226		ret = write(fd, buf, count);
227	} while (ret < 0 && errno == EINTR);
228
229	return ret;
230}
231
232static int write_file(const char *path, const void *buf, size_t count)
233{
234	int fd;
235	ssize_t ret;
236
237	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
238	if (fd < 0)
239		return -1;
240
241	ret = write_nointr(fd, buf, count);
242	close(fd);
243	if (ret < 0 || (size_t)ret != count)
244		return -1;
245
246	return 0;
247}
248
249static int create_and_enter_userns(void)
250{
251	uid_t uid;
252	gid_t gid;
253	char map[100];
254
255	uid = getuid();
256	gid = getgid();
257
258	if (unshare(CLONE_NEWUSER))
259		return -1;
260
261	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
262	    errno != ENOENT)
263		return -1;
264
265	snprintf(map, sizeof(map), "0 %d 1", uid);
266	if (write_file("/proc/self/uid_map", map, strlen(map)))
267		return -1;
268
269
270	snprintf(map, sizeof(map), "0 %d 1", gid);
271	if (write_file("/proc/self/gid_map", map, strlen(map)))
272		return -1;
273
274	if (setgid(0))
275		return -1;
276
277	if (setuid(0))
278		return -1;
279
280	return 0;
281}
282
283typedef int (*child_callback_fn)(int bpffs_fd, struct token_lsm *lsm_skel);
284
285static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback)
286{
287	int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1;
288	struct token_lsm *lsm_skel = NULL;
289
290	/* load and attach LSM "policy" before we go into unpriv userns */
291	lsm_skel = token_lsm__open_and_load();
292	if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel_load")) {
293		err = -EINVAL;
294		goto cleanup;
295	}
296	lsm_skel->bss->my_pid = getpid();
297	err = token_lsm__attach(lsm_skel);
298	if (!ASSERT_OK(err, "lsm_skel_attach"))
299		goto cleanup;
300
301	/* setup userns with root mappings */
302	err = create_and_enter_userns();
303	if (!ASSERT_OK(err, "create_and_enter_userns"))
304		goto cleanup;
305
306	/* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */
307	err = unshare(CLONE_NEWNS);
308	if (!ASSERT_OK(err, "create_mountns"))
309		goto cleanup;
310
311	err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
312	if (!ASSERT_OK(err, "remount_root"))
313		goto cleanup;
314
315	fs_fd = create_bpffs_fd();
316	if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) {
317		err = -EINVAL;
318		goto cleanup;
319	}
320
321	/* ensure unprivileged child cannot set delegation options */
322	err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL);
323	ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm");
324	err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL);
325	ASSERT_EQ(err, -EPERM, "delegate_maps_eperm");
326	err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL);
327	ASSERT_EQ(err, -EPERM, "delegate_progs_eperm");
328	err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL);
329	ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm");
330
331	/* pass BPF FS context object to parent */
332	err = sendfd(sock_fd, fs_fd);
333	if (!ASSERT_OK(err, "send_fs_fd"))
334		goto cleanup;
335	zclose(fs_fd);
336
337	/* avoid mucking around with mount namespaces and mounting at
338	 * well-known path, just get detach-mounted BPF FS fd back from parent
339	 */
340	err = recvfd(sock_fd, &mnt_fd);
341	if (!ASSERT_OK(err, "recv_mnt_fd"))
342		goto cleanup;
343
344	/* try to fspick() BPF FS and try to add some delegation options */
345	fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH);
346	if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) {
347		err = -EINVAL;
348		goto cleanup;
349	}
350
351	/* ensure unprivileged child cannot reconfigure to set delegation options */
352	err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any");
353	if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) {
354		err = -EINVAL;
355		goto cleanup;
356	}
357	err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any");
358	if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) {
359		err = -EINVAL;
360		goto cleanup;
361	}
362	err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any");
363	if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) {
364		err = -EINVAL;
365		goto cleanup;
366	}
367	err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any");
368	if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) {
369		err = -EINVAL;
370		goto cleanup;
371	}
372	zclose(fs_fd);
373
374	bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR);
375	if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) {
376		err = -EINVAL;
377		goto cleanup;
378	}
379
380	/* create BPF token FD and pass it to parent for some extra checks */
381	token_fd = bpf_token_create(bpffs_fd, NULL);
382	if (!ASSERT_GT(token_fd, 0, "child_token_create")) {
383		err = -EINVAL;
384		goto cleanup;
385	}
386	err = sendfd(sock_fd, token_fd);
387	if (!ASSERT_OK(err, "send_token_fd"))
388		goto cleanup;
389	zclose(token_fd);
390
391	/* do custom test logic with customly set up BPF FS instance */
392	err = callback(bpffs_fd, lsm_skel);
393	if (!ASSERT_OK(err, "test_callback"))
394		goto cleanup;
395
396	err = 0;
397cleanup:
398	zclose(sock_fd);
399	zclose(mnt_fd);
400	zclose(fs_fd);
401	zclose(bpffs_fd);
402	zclose(token_fd);
403
404	lsm_skel->bss->my_pid = 0;
405	token_lsm__destroy(lsm_skel);
406
407	exit(-err);
408}
409
410static int wait_for_pid(pid_t pid)
411{
412	int status, ret;
413
414again:
415	ret = waitpid(pid, &status, 0);
416	if (ret == -1) {
417		if (errno == EINTR)
418			goto again;
419
420		return -1;
421	}
422
423	if (!WIFEXITED(status))
424		return -1;
425
426	return WEXITSTATUS(status);
427}
428
429static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
430{
431	int fs_fd = -1, mnt_fd = -1, token_fd = -1, err;
432
433	err = recvfd(sock_fd, &fs_fd);
434	if (!ASSERT_OK(err, "recv_bpffs_fd"))
435		goto cleanup;
436
437	mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
438	if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
439		err = -EINVAL;
440		goto cleanup;
441	}
442	zclose(fs_fd);
443
444	/* pass BPF FS context object to parent */
445	err = sendfd(sock_fd, mnt_fd);
446	if (!ASSERT_OK(err, "send_mnt_fd"))
447		goto cleanup;
448	zclose(mnt_fd);
449
450	/* receive BPF token FD back from child for some extra tests */
451	err = recvfd(sock_fd, &token_fd);
452	if (!ASSERT_OK(err, "recv_token_fd"))
453		goto cleanup;
454
455	err = wait_for_pid(child_pid);
456	ASSERT_OK(err, "waitpid_child");
457
458cleanup:
459	zclose(sock_fd);
460	zclose(fs_fd);
461	zclose(mnt_fd);
462	zclose(token_fd);
463
464	if (child_pid > 0)
465		(void)kill(child_pid, SIGKILL);
466}
467
468static void subtest_userns(struct bpffs_opts *bpffs_opts,
469			   child_callback_fn child_cb)
470{
471	int sock_fds[2] = { -1, -1 };
472	int child_pid = 0, err;
473
474	err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds);
475	if (!ASSERT_OK(err, "socketpair"))
476		goto cleanup;
477
478	child_pid = fork();
479	if (!ASSERT_GE(child_pid, 0, "fork"))
480		goto cleanup;
481
482	if (child_pid == 0) {
483		zclose(sock_fds[0]);
484		return child(sock_fds[1], bpffs_opts, child_cb);
485
486	} else {
487		zclose(sock_fds[1]);
488		return parent(child_pid, bpffs_opts, sock_fds[0]);
489	}
490
491cleanup:
492	zclose(sock_fds[0]);
493	zclose(sock_fds[1]);
494	if (child_pid > 0)
495		(void)kill(child_pid, SIGKILL);
496}
497
498static int userns_map_create(int mnt_fd, struct token_lsm *lsm_skel)
499{
500	LIBBPF_OPTS(bpf_map_create_opts, map_opts);
501	int err, token_fd = -1, map_fd = -1;
502	__u64 old_caps = 0;
503
504	/* create BPF token from BPF FS mount */
505	token_fd = bpf_token_create(mnt_fd, NULL);
506	if (!ASSERT_GT(token_fd, 0, "token_create")) {
507		err = -EINVAL;
508		goto cleanup;
509	}
510
511	/* while inside non-init userns, we need both a BPF token *and*
512	 * CAP_BPF inside current userns to create privileged map; let's test
513	 * that neither BPF token alone nor namespaced CAP_BPF is sufficient
514	 */
515	err = drop_priv_caps(&old_caps);
516	if (!ASSERT_OK(err, "drop_caps"))
517		goto cleanup;
518
519	/* no token, no CAP_BPF -> fail */
520	map_opts.map_flags = 0;
521	map_opts.token_fd = 0;
522	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts);
523	if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) {
524		err = -EINVAL;
525		goto cleanup;
526	}
527
528	/* token without CAP_BPF -> fail */
529	map_opts.map_flags = BPF_F_TOKEN_FD;
530	map_opts.token_fd = token_fd;
531	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts);
532	if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) {
533		err = -EINVAL;
534		goto cleanup;
535	}
536
537	/* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
538	err = restore_priv_caps(old_caps);
539	if (!ASSERT_OK(err, "restore_caps"))
540		goto cleanup;
541
542	/* CAP_BPF without token -> fail */
543	map_opts.map_flags = 0;
544	map_opts.token_fd = 0;
545	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts);
546	if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) {
547		err = -EINVAL;
548		goto cleanup;
549	}
550
551	/* finally, namespaced CAP_BPF + token -> success */
552	map_opts.map_flags = BPF_F_TOKEN_FD;
553	map_opts.token_fd = token_fd;
554	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts);
555	if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) {
556		err = -EINVAL;
557		goto cleanup;
558	}
559
560cleanup:
561	zclose(token_fd);
562	zclose(map_fd);
563	return err;
564}
565
566static int userns_btf_load(int mnt_fd, struct token_lsm *lsm_skel)
567{
568	LIBBPF_OPTS(bpf_btf_load_opts, btf_opts);
569	int err, token_fd = -1, btf_fd = -1;
570	const void *raw_btf_data;
571	struct btf *btf = NULL;
572	__u32 raw_btf_size;
573	__u64 old_caps = 0;
574
575	/* create BPF token from BPF FS mount */
576	token_fd = bpf_token_create(mnt_fd, NULL);
577	if (!ASSERT_GT(token_fd, 0, "token_create")) {
578		err = -EINVAL;
579		goto cleanup;
580	}
581
582	/* while inside non-init userns, we need both a BPF token *and*
583	 * CAP_BPF inside current userns to create privileged map; let's test
584	 * that neither BPF token alone nor namespaced CAP_BPF is sufficient
585	 */
586	err = drop_priv_caps(&old_caps);
587	if (!ASSERT_OK(err, "drop_caps"))
588		goto cleanup;
589
590	/* setup a trivial BTF data to load to the kernel */
591	btf = btf__new_empty();
592	if (!ASSERT_OK_PTR(btf, "empty_btf"))
593		goto cleanup;
594
595	ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
596
597	raw_btf_data = btf__raw_data(btf, &raw_btf_size);
598	if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data"))
599		goto cleanup;
600
601	/* no token + no CAP_BPF -> failure */
602	btf_opts.btf_flags = 0;
603	btf_opts.token_fd = 0;
604	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
605	if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail"))
606		goto cleanup;
607
608	/* token + no CAP_BPF -> failure */
609	btf_opts.btf_flags = BPF_F_TOKEN_FD;
610	btf_opts.token_fd = token_fd;
611	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
612	if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail"))
613		goto cleanup;
614
615	/* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
616	err = restore_priv_caps(old_caps);
617	if (!ASSERT_OK(err, "restore_caps"))
618		goto cleanup;
619
620	/* token + CAP_BPF -> success */
621	btf_opts.btf_flags = BPF_F_TOKEN_FD;
622	btf_opts.token_fd = token_fd;
623	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
624	if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success"))
625		goto cleanup;
626
627	err = 0;
628cleanup:
629	btf__free(btf);
630	zclose(btf_fd);
631	zclose(token_fd);
632	return err;
633}
634
635static int userns_prog_load(int mnt_fd, struct token_lsm *lsm_skel)
636{
637	LIBBPF_OPTS(bpf_prog_load_opts, prog_opts);
638	int err, token_fd = -1, prog_fd = -1;
639	struct bpf_insn insns[] = {
640		/* bpf_jiffies64() requires CAP_BPF */
641		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
642		/* bpf_get_current_task() requires CAP_PERFMON */
643		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task),
644		/* r0 = 0; exit; */
645		BPF_MOV64_IMM(BPF_REG_0, 0),
646		BPF_EXIT_INSN(),
647	};
648	size_t insn_cnt = ARRAY_SIZE(insns);
649	__u64 old_caps = 0;
650
651	/* create BPF token from BPF FS mount */
652	token_fd = bpf_token_create(mnt_fd, NULL);
653	if (!ASSERT_GT(token_fd, 0, "token_create")) {
654		err = -EINVAL;
655		goto cleanup;
656	}
657
658	/* validate we can successfully load BPF program with token; this
659	 * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF)
660	 * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have
661	 * BPF token wired properly in a bunch of places in the kernel
662	 */
663	prog_opts.prog_flags = BPF_F_TOKEN_FD;
664	prog_opts.token_fd = token_fd;
665	prog_opts.expected_attach_type = BPF_XDP;
666	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
667				insns, insn_cnt, &prog_opts);
668	if (!ASSERT_GT(prog_fd, 0, "prog_fd")) {
669		err = -EPERM;
670		goto cleanup;
671	}
672
673	/* no token + caps -> failure */
674	prog_opts.prog_flags = 0;
675	prog_opts.token_fd = 0;
676	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
677				insns, insn_cnt, &prog_opts);
678	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
679		err = -EPERM;
680		goto cleanup;
681	}
682
683	err = drop_priv_caps(&old_caps);
684	if (!ASSERT_OK(err, "drop_caps"))
685		goto cleanup;
686
687	/* no caps + token -> failure */
688	prog_opts.prog_flags = BPF_F_TOKEN_FD;
689	prog_opts.token_fd = token_fd;
690	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
691				insns, insn_cnt, &prog_opts);
692	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
693		err = -EPERM;
694		goto cleanup;
695	}
696
697	/* no caps + no token -> definitely a failure */
698	prog_opts.prog_flags = 0;
699	prog_opts.token_fd = 0;
700	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
701				insns, insn_cnt, &prog_opts);
702	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
703		err = -EPERM;
704		goto cleanup;
705	}
706
707	err = 0;
708cleanup:
709	zclose(prog_fd);
710	zclose(token_fd);
711	return err;
712}
713
714static int userns_obj_priv_map(int mnt_fd, struct token_lsm *lsm_skel)
715{
716	LIBBPF_OPTS(bpf_object_open_opts, opts);
717	char buf[256];
718	struct priv_map *skel;
719	int err;
720
721	skel = priv_map__open_and_load();
722	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
723		priv_map__destroy(skel);
724		return -EINVAL;
725	}
726
727	/* use bpf_token_path to provide BPF FS path */
728	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
729	opts.bpf_token_path = buf;
730	skel = priv_map__open_opts(&opts);
731	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
732		return -EINVAL;
733
734	err = priv_map__load(skel);
735	priv_map__destroy(skel);
736	if (!ASSERT_OK(err, "obj_token_path_load"))
737		return -EINVAL;
738
739	return 0;
740}
741
742static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel)
743{
744	LIBBPF_OPTS(bpf_object_open_opts, opts);
745	char buf[256];
746	struct priv_prog *skel;
747	int err;
748
749	skel = priv_prog__open_and_load();
750	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
751		priv_prog__destroy(skel);
752		return -EINVAL;
753	}
754
755	/* use bpf_token_path to provide BPF FS path */
756	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
757	opts.bpf_token_path = buf;
758	skel = priv_prog__open_opts(&opts);
759	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
760		return -EINVAL;
761	err = priv_prog__load(skel);
762	priv_prog__destroy(skel);
763	if (!ASSERT_OK(err, "obj_token_path_load"))
764		return -EINVAL;
765
766	/* provide BPF token, but reject bpf_token_capable() with LSM */
767	lsm_skel->bss->reject_capable = true;
768	lsm_skel->bss->reject_cmd = false;
769	skel = priv_prog__open_opts(&opts);
770	if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cap_open"))
771		return -EINVAL;
772	err = priv_prog__load(skel);
773	priv_prog__destroy(skel);
774	if (!ASSERT_ERR(err, "obj_token_lsm_reject_cap_load"))
775		return -EINVAL;
776
777	/* provide BPF token, but reject bpf_token_cmd() with LSM */
778	lsm_skel->bss->reject_capable = false;
779	lsm_skel->bss->reject_cmd = true;
780	skel = priv_prog__open_opts(&opts);
781	if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cmd_open"))
782		return -EINVAL;
783	err = priv_prog__load(skel);
784	priv_prog__destroy(skel);
785	if (!ASSERT_ERR(err, "obj_token_lsm_reject_cmd_load"))
786		return -EINVAL;
787
788	return 0;
789}
790
791/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
792 * which should cause struct_ops application to fail, as BTF won't be uploaded
793 * into the kernel, even if STRUCT_OPS programs themselves are allowed
794 */
795static int validate_struct_ops_load(int mnt_fd, bool expect_success)
796{
797	LIBBPF_OPTS(bpf_object_open_opts, opts);
798	char buf[256];
799	struct dummy_st_ops_success *skel;
800	int err;
801
802	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
803	opts.bpf_token_path = buf;
804	skel = dummy_st_ops_success__open_opts(&opts);
805	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
806		return -EINVAL;
807
808	err = dummy_st_ops_success__load(skel);
809	dummy_st_ops_success__destroy(skel);
810	if (expect_success) {
811		if (!ASSERT_OK(err, "obj_token_path_load"))
812			return -EINVAL;
813	} else /* expect failure */ {
814		if (!ASSERT_ERR(err, "obj_token_path_load"))
815			return -EINVAL;
816	}
817
818	return 0;
819}
820
821static int userns_obj_priv_btf_fail(int mnt_fd, struct token_lsm *lsm_skel)
822{
823	return validate_struct_ops_load(mnt_fd, false /* should fail */);
824}
825
826static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel)
827{
828	return validate_struct_ops_load(mnt_fd, true /* should succeed */);
829}
830
831#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
832#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs"
833
834static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel)
835{
836	LIBBPF_OPTS(bpf_object_open_opts, opts);
837	struct dummy_st_ops_success *skel;
838	int err;
839
840	/* before we mount BPF FS with token delegation, struct_ops skeleton
841	 * should fail to load
842	 */
843	skel = dummy_st_ops_success__open_and_load();
844	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
845		dummy_st_ops_success__destroy(skel);
846		return -EINVAL;
847	}
848
849	/* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF
850	 * token automatically and implicitly
851	 */
852	err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH);
853	if (!ASSERT_OK(err, "move_mount_bpffs"))
854		return -EINVAL;
855
856	/* disable implicit BPF token creation by setting
857	 * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail
858	 */
859	err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/);
860	if (!ASSERT_OK(err, "setenv_token_path"))
861		return -EINVAL;
862	skel = dummy_st_ops_success__open_and_load();
863	if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) {
864		unsetenv(TOKEN_ENVVAR);
865		dummy_st_ops_success__destroy(skel);
866		return -EINVAL;
867	}
868	unsetenv(TOKEN_ENVVAR);
869
870	/* now the same struct_ops skeleton should succeed thanks to libppf
871	 * creating BPF token from /sys/fs/bpf mount point
872	 */
873	skel = dummy_st_ops_success__open_and_load();
874	if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
875		return -EINVAL;
876
877	dummy_st_ops_success__destroy(skel);
878
879	/* now disable implicit token through empty bpf_token_path, should fail */
880	opts.bpf_token_path = "";
881	skel = dummy_st_ops_success__open_opts(&opts);
882	if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
883		return -EINVAL;
884
885	err = dummy_st_ops_success__load(skel);
886	dummy_st_ops_success__destroy(skel);
887	if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
888		return -EINVAL;
889
890	return 0;
891}
892
893static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel)
894{
895	LIBBPF_OPTS(bpf_object_open_opts, opts);
896	struct dummy_st_ops_success *skel;
897	int err;
898
899	/* before we mount BPF FS with token delegation, struct_ops skeleton
900	 * should fail to load
901	 */
902	skel = dummy_st_ops_success__open_and_load();
903	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
904		dummy_st_ops_success__destroy(skel);
905		return -EINVAL;
906	}
907
908	/* mount custom BPF FS over custom location, so libbpf can't create
909	 * BPF token implicitly, unless pointed to it through
910	 * LIBBPF_BPF_TOKEN_PATH envvar
911	 */
912	rmdir(TOKEN_BPFFS_CUSTOM);
913	if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom"))
914		goto err_out;
915	err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH);
916	if (!ASSERT_OK(err, "move_mount_bpffs"))
917		goto err_out;
918
919	/* even though we have BPF FS with delegation, it's not at default
920	 * /sys/fs/bpf location, so we still fail to load until envvar is set up
921	 */
922	skel = dummy_st_ops_success__open_and_load();
923	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) {
924		dummy_st_ops_success__destroy(skel);
925		goto err_out;
926	}
927
928	err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/);
929	if (!ASSERT_OK(err, "setenv_token_path"))
930		goto err_out;
931
932	/* now the same struct_ops skeleton should succeed thanks to libppf
933	 * creating BPF token from custom mount point
934	 */
935	skel = dummy_st_ops_success__open_and_load();
936	if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
937		goto err_out;
938
939	dummy_st_ops_success__destroy(skel);
940
941	/* now disable implicit token through empty bpf_token_path, envvar
942	 * will be ignored, should fail
943	 */
944	opts.bpf_token_path = "";
945	skel = dummy_st_ops_success__open_opts(&opts);
946	if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
947		goto err_out;
948
949	err = dummy_st_ops_success__load(skel);
950	dummy_st_ops_success__destroy(skel);
951	if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
952		goto err_out;
953
954	rmdir(TOKEN_BPFFS_CUSTOM);
955	unsetenv(TOKEN_ENVVAR);
956	return 0;
957err_out:
958	rmdir(TOKEN_BPFFS_CUSTOM);
959	unsetenv(TOKEN_ENVVAR);
960	return -EINVAL;
961}
962
963#define bit(n) (1ULL << (n))
964
965void test_token(void)
966{
967	if (test__start_subtest("map_token")) {
968		struct bpffs_opts opts = {
969			.cmds_str = "map_create",
970			.maps_str = "stack",
971		};
972
973		subtest_userns(&opts, userns_map_create);
974	}
975	if (test__start_subtest("btf_token")) {
976		struct bpffs_opts opts = {
977			.cmds = 1ULL << BPF_BTF_LOAD,
978		};
979
980		subtest_userns(&opts, userns_btf_load);
981	}
982	if (test__start_subtest("prog_token")) {
983		struct bpffs_opts opts = {
984			.cmds_str = "PROG_LOAD",
985			.progs_str = "XDP",
986			.attachs_str = "xdp",
987		};
988
989		subtest_userns(&opts, userns_prog_load);
990	}
991	if (test__start_subtest("obj_priv_map")) {
992		struct bpffs_opts opts = {
993			.cmds = bit(BPF_MAP_CREATE),
994			.maps = bit(BPF_MAP_TYPE_QUEUE),
995		};
996
997		subtest_userns(&opts, userns_obj_priv_map);
998	}
999	if (test__start_subtest("obj_priv_prog")) {
1000		struct bpffs_opts opts = {
1001			.cmds = bit(BPF_PROG_LOAD),
1002			.progs = bit(BPF_PROG_TYPE_KPROBE),
1003			.attachs = ~0ULL,
1004		};
1005
1006		subtest_userns(&opts, userns_obj_priv_prog);
1007	}
1008	if (test__start_subtest("obj_priv_btf_fail")) {
1009		struct bpffs_opts opts = {
1010			/* disallow BTF loading */
1011			.cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1012			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1013			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1014			.attachs = ~0ULL,
1015		};
1016
1017		subtest_userns(&opts, userns_obj_priv_btf_fail);
1018	}
1019	if (test__start_subtest("obj_priv_btf_success")) {
1020		struct bpffs_opts opts = {
1021			/* allow BTF loading */
1022			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1023			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1024			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1025			.attachs = ~0ULL,
1026		};
1027
1028		subtest_userns(&opts, userns_obj_priv_btf_success);
1029	}
1030	if (test__start_subtest("obj_priv_implicit_token")) {
1031		struct bpffs_opts opts = {
1032			/* allow BTF loading */
1033			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1034			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1035			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1036			.attachs = ~0ULL,
1037		};
1038
1039		subtest_userns(&opts, userns_obj_priv_implicit_token);
1040	}
1041	if (test__start_subtest("obj_priv_implicit_token_envvar")) {
1042		struct bpffs_opts opts = {
1043			/* allow BTF loading */
1044			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
1045			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
1046			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
1047			.attachs = ~0ULL,
1048		};
1049
1050		subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
1051	}
1052}
1053