1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * common eBPF ELF operations.
5 *
6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8 * Copyright (C) 2015 Huawei Inc.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation;
13 * version 2.1 of the License (not later!)
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this program; if not,  see <http://www.gnu.org/licenses>
22 */
23
24#include <stdlib.h>
25#include <string.h>
26#include <memory.h>
27#include <unistd.h>
28#include <asm/unistd.h>
29#include <errno.h>
30#include <linux/bpf.h>
31#include <linux/filter.h>
32#include <linux/kernel.h>
33#include <limits.h>
34#include <sys/resource.h>
35#include "bpf.h"
36#include "libbpf.h"
37#include "libbpf_internal.h"
38
39/*
40 * When building perf, unistd.h is overridden. __NR_bpf is
41 * required to be defined explicitly.
42 */
43#ifndef __NR_bpf
44# if defined(__i386__)
45#  define __NR_bpf 357
46# elif defined(__x86_64__)
47#  define __NR_bpf 321
48# elif defined(__aarch64__)
49#  define __NR_bpf 280
50# elif defined(__sparc__)
51#  define __NR_bpf 349
52# elif defined(__s390__)
53#  define __NR_bpf 351
54# elif defined(__arc__)
55#  define __NR_bpf 280
56# elif defined(__mips__) && defined(_ABIO32)
57#  define __NR_bpf 4355
58# elif defined(__mips__) && defined(_ABIN32)
59#  define __NR_bpf 6319
60# elif defined(__mips__) && defined(_ABI64)
61#  define __NR_bpf 5315
62# else
63#  error __NR_bpf not defined. libbpf does not support your arch.
64# endif
65#endif
66
67static inline __u64 ptr_to_u64(const void *ptr)
68{
69	return (__u64) (unsigned long) ptr;
70}
71
72static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
73			  unsigned int size)
74{
75	return syscall(__NR_bpf, cmd, attr, size);
76}
77
78static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr,
79			     unsigned int size)
80{
81	int fd;
82
83	fd = sys_bpf(cmd, attr, size);
84	return ensure_good_fd(fd);
85}
86
87#define PROG_LOAD_ATTEMPTS 5
88
89static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
90{
91	int fd;
92
93	do {
94		fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size);
95	} while (fd < 0 && errno == EAGAIN && --attempts > 0);
96
97	return fd;
98}
99
100/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to
101 * memcg-based memory accounting for BPF maps and progs. This was done in [0].
102 * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in
103 * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF.
104 *
105 *   [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/
106 *   [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper")
107 */
108int probe_memcg_account(void)
109{
110	const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
111	struct bpf_insn insns[] = {
112		BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns),
113		BPF_EXIT_INSN(),
114	};
115	size_t insn_cnt = ARRAY_SIZE(insns);
116	union bpf_attr attr;
117	int prog_fd;
118
119	/* attempt loading freplace trying to use custom BTF */
120	memset(&attr, 0, prog_load_attr_sz);
121	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
122	attr.insns = ptr_to_u64(insns);
123	attr.insn_cnt = insn_cnt;
124	attr.license = ptr_to_u64("GPL");
125
126	prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz);
127	if (prog_fd >= 0) {
128		close(prog_fd);
129		return 1;
130	}
131	return 0;
132}
133
134static bool memlock_bumped;
135static rlim_t memlock_rlim = RLIM_INFINITY;
136
137int libbpf_set_memlock_rlim(size_t memlock_bytes)
138{
139	if (memlock_bumped)
140		return libbpf_err(-EBUSY);
141
142	memlock_rlim = memlock_bytes;
143	return 0;
144}
145
146int bump_rlimit_memlock(void)
147{
148	struct rlimit rlim;
149
150	/* this the default in libbpf 1.0, but for now user has to opt-in explicitly */
151	if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK))
152		return 0;
153
154	/* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
155	if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT))
156		return 0;
157
158	memlock_bumped = true;
159
160	/* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */
161	if (memlock_rlim == 0)
162		return 0;
163
164	rlim.rlim_cur = rlim.rlim_max = memlock_rlim;
165	if (setrlimit(RLIMIT_MEMLOCK, &rlim))
166		return -errno;
167
168	return 0;
169}
170
171int bpf_map_create(enum bpf_map_type map_type,
172		   const char *map_name,
173		   __u32 key_size,
174		   __u32 value_size,
175		   __u32 max_entries,
176		   const struct bpf_map_create_opts *opts)
177{
178	const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
179	union bpf_attr attr;
180	int fd;
181
182	bump_rlimit_memlock();
183
184	memset(&attr, 0, attr_sz);
185
186	if (!OPTS_VALID(opts, bpf_map_create_opts))
187		return libbpf_err(-EINVAL);
188
189	attr.map_type = map_type;
190	if (map_name)
191		libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
192	attr.key_size = key_size;
193	attr.value_size = value_size;
194	attr.max_entries = max_entries;
195
196	attr.btf_fd = OPTS_GET(opts, btf_fd, 0);
197	attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0);
198	attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0);
199	attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0);
200
201	attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0);
202	attr.map_flags = OPTS_GET(opts, map_flags, 0);
203	attr.map_extra = OPTS_GET(opts, map_extra, 0);
204	attr.numa_node = OPTS_GET(opts, numa_node, 0);
205	attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
206
207	fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
208	return libbpf_err_errno(fd);
209}
210
211static void *
212alloc_zero_tailing_info(const void *orecord, __u32 cnt,
213			__u32 actual_rec_size, __u32 expected_rec_size)
214{
215	__u64 info_len = (__u64)actual_rec_size * cnt;
216	void *info, *nrecord;
217	int i;
218
219	info = malloc(info_len);
220	if (!info)
221		return NULL;
222
223	/* zero out bytes kernel does not understand */
224	nrecord = info;
225	for (i = 0; i < cnt; i++) {
226		memcpy(nrecord, orecord, expected_rec_size);
227		memset(nrecord + expected_rec_size, 0,
228		       actual_rec_size - expected_rec_size);
229		orecord += actual_rec_size;
230		nrecord += actual_rec_size;
231	}
232
233	return info;
234}
235
236DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0)
237int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type,
238		         const char *prog_name, const char *license,
239		         const struct bpf_insn *insns, size_t insn_cnt,
240		         const struct bpf_prog_load_opts *opts)
241{
242	void *finfo = NULL, *linfo = NULL;
243	const char *func_info, *line_info;
244	__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
245	__u32 func_info_rec_size, line_info_rec_size;
246	int fd, attempts;
247	union bpf_attr attr;
248	char *log_buf;
249
250	bump_rlimit_memlock();
251
252	if (!OPTS_VALID(opts, bpf_prog_load_opts))
253		return libbpf_err(-EINVAL);
254
255	attempts = OPTS_GET(opts, attempts, 0);
256	if (attempts < 0)
257		return libbpf_err(-EINVAL);
258	if (attempts == 0)
259		attempts = PROG_LOAD_ATTEMPTS;
260
261	memset(&attr, 0, sizeof(attr));
262
263	attr.prog_type = prog_type;
264	attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0);
265
266	attr.prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0);
267	attr.prog_flags = OPTS_GET(opts, prog_flags, 0);
268	attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0);
269	attr.kern_version = OPTS_GET(opts, kern_version, 0);
270
271	if (prog_name)
272		libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
273	attr.license = ptr_to_u64(license);
274
275	if (insn_cnt > UINT_MAX)
276		return libbpf_err(-E2BIG);
277
278	attr.insns = ptr_to_u64(insns);
279	attr.insn_cnt = (__u32)insn_cnt;
280
281	attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
282	attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0);
283
284	if (attach_prog_fd && attach_btf_obj_fd)
285		return libbpf_err(-EINVAL);
286
287	attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0);
288	if (attach_prog_fd)
289		attr.attach_prog_fd = attach_prog_fd;
290	else
291		attr.attach_btf_obj_fd = attach_btf_obj_fd;
292
293	log_buf = OPTS_GET(opts, log_buf, NULL);
294	log_size = OPTS_GET(opts, log_size, 0);
295	log_level = OPTS_GET(opts, log_level, 0);
296
297	if (!!log_buf != !!log_size)
298		return libbpf_err(-EINVAL);
299	if (log_level > (4 | 2 | 1))
300		return libbpf_err(-EINVAL);
301	if (log_level && !log_buf)
302		return libbpf_err(-EINVAL);
303
304	func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0);
305	func_info = OPTS_GET(opts, func_info, NULL);
306	attr.func_info_rec_size = func_info_rec_size;
307	attr.func_info = ptr_to_u64(func_info);
308	attr.func_info_cnt = OPTS_GET(opts, func_info_cnt, 0);
309
310	line_info_rec_size = OPTS_GET(opts, line_info_rec_size, 0);
311	line_info = OPTS_GET(opts, line_info, NULL);
312	attr.line_info_rec_size = line_info_rec_size;
313	attr.line_info = ptr_to_u64(line_info);
314	attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0);
315
316	attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL));
317
318	if (log_level) {
319		attr.log_buf = ptr_to_u64(log_buf);
320		attr.log_size = log_size;
321		attr.log_level = log_level;
322	}
323
324	fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
325	if (fd >= 0)
326		return fd;
327
328	/* After bpf_prog_load, the kernel may modify certain attributes
329	 * to give user space a hint how to deal with loading failure.
330	 * Check to see whether we can make some changes and load again.
331	 */
332	while (errno == E2BIG && (!finfo || !linfo)) {
333		if (!finfo && attr.func_info_cnt &&
334		    attr.func_info_rec_size < func_info_rec_size) {
335			/* try with corrected func info records */
336			finfo = alloc_zero_tailing_info(func_info,
337							attr.func_info_cnt,
338							func_info_rec_size,
339							attr.func_info_rec_size);
340			if (!finfo) {
341				errno = E2BIG;
342				goto done;
343			}
344
345			attr.func_info = ptr_to_u64(finfo);
346			attr.func_info_rec_size = func_info_rec_size;
347		} else if (!linfo && attr.line_info_cnt &&
348			   attr.line_info_rec_size < line_info_rec_size) {
349			linfo = alloc_zero_tailing_info(line_info,
350							attr.line_info_cnt,
351							line_info_rec_size,
352							attr.line_info_rec_size);
353			if (!linfo) {
354				errno = E2BIG;
355				goto done;
356			}
357
358			attr.line_info = ptr_to_u64(linfo);
359			attr.line_info_rec_size = line_info_rec_size;
360		} else {
361			break;
362		}
363
364		fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
365		if (fd >= 0)
366			goto done;
367	}
368
369	if (log_level == 0 && log_buf) {
370		/* log_level == 0 with non-NULL log_buf requires retrying on error
371		 * with log_level == 1 and log_buf/log_buf_size set, to get details of
372		 * failure
373		 */
374		attr.log_buf = ptr_to_u64(log_buf);
375		attr.log_size = log_size;
376		attr.log_level = 1;
377
378		fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
379	}
380done:
381	/* free() doesn't affect errno, so we don't need to restore it */
382	free(finfo);
383	free(linfo);
384	return libbpf_err_errno(fd);
385}
386
387__attribute__((alias("bpf_load_program_xattr2")))
388int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
389			   char *log_buf, size_t log_buf_sz);
390
391static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr,
392				   char *log_buf, size_t log_buf_sz)
393{
394	LIBBPF_OPTS(bpf_prog_load_opts, p);
395
396	if (!load_attr || !log_buf != !log_buf_sz)
397		return libbpf_err(-EINVAL);
398
399	p.expected_attach_type = load_attr->expected_attach_type;
400	switch (load_attr->prog_type) {
401	case BPF_PROG_TYPE_STRUCT_OPS:
402	case BPF_PROG_TYPE_LSM:
403		p.attach_btf_id = load_attr->attach_btf_id;
404		break;
405	case BPF_PROG_TYPE_TRACING:
406	case BPF_PROG_TYPE_EXT:
407		p.attach_btf_id = load_attr->attach_btf_id;
408		p.attach_prog_fd = load_attr->attach_prog_fd;
409		break;
410	default:
411		p.prog_ifindex = load_attr->prog_ifindex;
412		p.kern_version = load_attr->kern_version;
413	}
414	p.log_level = load_attr->log_level;
415	p.log_buf = log_buf;
416	p.log_size = log_buf_sz;
417	p.prog_btf_fd = load_attr->prog_btf_fd;
418	p.func_info_rec_size = load_attr->func_info_rec_size;
419	p.func_info_cnt = load_attr->func_info_cnt;
420	p.func_info = load_attr->func_info;
421	p.line_info_rec_size = load_attr->line_info_rec_size;
422	p.line_info_cnt = load_attr->line_info_cnt;
423	p.line_info = load_attr->line_info;
424	p.prog_flags = load_attr->prog_flags;
425
426	return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license,
427			     load_attr->insns, load_attr->insns_cnt, &p);
428}
429
430int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
431		     size_t insns_cnt, const char *license,
432		     __u32 kern_version, char *log_buf,
433		     size_t log_buf_sz)
434{
435	struct bpf_load_program_attr load_attr;
436
437	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
438	load_attr.prog_type = type;
439	load_attr.expected_attach_type = 0;
440	load_attr.name = NULL;
441	load_attr.insns = insns;
442	load_attr.insns_cnt = insns_cnt;
443	load_attr.license = license;
444	load_attr.kern_version = kern_version;
445
446	return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz);
447}
448
449int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
450		       size_t insns_cnt, __u32 prog_flags, const char *license,
451		       __u32 kern_version, char *log_buf, size_t log_buf_sz,
452		       int log_level)
453{
454	union bpf_attr attr;
455	int fd;
456
457	bump_rlimit_memlock();
458
459	memset(&attr, 0, sizeof(attr));
460	attr.prog_type = type;
461	attr.insn_cnt = (__u32)insns_cnt;
462	attr.insns = ptr_to_u64(insns);
463	attr.license = ptr_to_u64(license);
464	attr.log_buf = ptr_to_u64(log_buf);
465	attr.log_size = log_buf_sz;
466	attr.log_level = log_level;
467	log_buf[0] = 0;
468	attr.kern_version = kern_version;
469	attr.prog_flags = prog_flags;
470
471	fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS);
472	return libbpf_err_errno(fd);
473}
474
475int bpf_map_update_elem(int fd, const void *key, const void *value,
476			__u64 flags)
477{
478	union bpf_attr attr;
479	int ret;
480
481	memset(&attr, 0, sizeof(attr));
482	attr.map_fd = fd;
483	attr.key = ptr_to_u64(key);
484	attr.value = ptr_to_u64(value);
485	attr.flags = flags;
486
487	ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
488	return libbpf_err_errno(ret);
489}
490
491int bpf_map_lookup_elem(int fd, const void *key, void *value)
492{
493	union bpf_attr attr;
494	int ret;
495
496	memset(&attr, 0, sizeof(attr));
497	attr.map_fd = fd;
498	attr.key = ptr_to_u64(key);
499	attr.value = ptr_to_u64(value);
500
501	ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
502	return libbpf_err_errno(ret);
503}
504
505int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
506{
507	union bpf_attr attr;
508	int ret;
509
510	memset(&attr, 0, sizeof(attr));
511	attr.map_fd = fd;
512	attr.key = ptr_to_u64(key);
513	attr.value = ptr_to_u64(value);
514	attr.flags = flags;
515
516	ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
517	return libbpf_err_errno(ret);
518}
519
520int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
521{
522	union bpf_attr attr;
523	int ret;
524
525	memset(&attr, 0, sizeof(attr));
526	attr.map_fd = fd;
527	attr.key = ptr_to_u64(key);
528	attr.value = ptr_to_u64(value);
529
530	ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
531	return libbpf_err_errno(ret);
532}
533
534int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
535{
536	union bpf_attr attr;
537	int ret;
538
539	memset(&attr, 0, sizeof(attr));
540	attr.map_fd = fd;
541	attr.key = ptr_to_u64(key);
542	attr.value = ptr_to_u64(value);
543	attr.flags = flags;
544
545	ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
546	return libbpf_err_errno(ret);
547}
548
549int bpf_map_delete_elem(int fd, const void *key)
550{
551	union bpf_attr attr;
552	int ret;
553
554	memset(&attr, 0, sizeof(attr));
555	attr.map_fd = fd;
556	attr.key = ptr_to_u64(key);
557
558	ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
559	return libbpf_err_errno(ret);
560}
561
562int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags)
563{
564	union bpf_attr attr;
565	int ret;
566
567	memset(&attr, 0, sizeof(attr));
568	attr.map_fd = fd;
569	attr.key = ptr_to_u64(key);
570	attr.flags = flags;
571
572	ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
573	return libbpf_err_errno(ret);
574}
575
576int bpf_map_get_next_key(int fd, const void *key, void *next_key)
577{
578	union bpf_attr attr;
579	int ret;
580
581	memset(&attr, 0, sizeof(attr));
582	attr.map_fd = fd;
583	attr.key = ptr_to_u64(key);
584	attr.next_key = ptr_to_u64(next_key);
585
586	ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
587	return libbpf_err_errno(ret);
588}
589
590int bpf_map_freeze(int fd)
591{
592	union bpf_attr attr;
593	int ret;
594
595	memset(&attr, 0, sizeof(attr));
596	attr.map_fd = fd;
597
598	ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
599	return libbpf_err_errno(ret);
600}
601
602static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
603				void *out_batch, void *keys, void *values,
604				__u32 *count,
605				const struct bpf_map_batch_opts *opts)
606{
607	union bpf_attr attr;
608	int ret;
609
610	if (!OPTS_VALID(opts, bpf_map_batch_opts))
611		return libbpf_err(-EINVAL);
612
613	memset(&attr, 0, sizeof(attr));
614	attr.batch.map_fd = fd;
615	attr.batch.in_batch = ptr_to_u64(in_batch);
616	attr.batch.out_batch = ptr_to_u64(out_batch);
617	attr.batch.keys = ptr_to_u64(keys);
618	attr.batch.values = ptr_to_u64(values);
619	attr.batch.count = *count;
620	attr.batch.elem_flags  = OPTS_GET(opts, elem_flags, 0);
621	attr.batch.flags = OPTS_GET(opts, flags, 0);
622
623	ret = sys_bpf(cmd, &attr, sizeof(attr));
624	*count = attr.batch.count;
625
626	return libbpf_err_errno(ret);
627}
628
629int bpf_map_delete_batch(int fd, const void *keys, __u32 *count,
630			 const struct bpf_map_batch_opts *opts)
631{
632	return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
633				    NULL, (void *)keys, NULL, count, opts);
634}
635
636int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
637			 void *values, __u32 *count,
638			 const struct bpf_map_batch_opts *opts)
639{
640	return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch,
641				    out_batch, keys, values, count, opts);
642}
643
644int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
645				    void *keys, void *values, __u32 *count,
646				    const struct bpf_map_batch_opts *opts)
647{
648	return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH,
649				    fd, in_batch, out_batch, keys, values,
650				    count, opts);
651}
652
653int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count,
654			 const struct bpf_map_batch_opts *opts)
655{
656	return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
657				    (void *)keys, (void *)values, count, opts);
658}
659
660int bpf_obj_pin(int fd, const char *pathname)
661{
662	union bpf_attr attr;
663	int ret;
664
665	memset(&attr, 0, sizeof(attr));
666	attr.pathname = ptr_to_u64((void *)pathname);
667	attr.bpf_fd = fd;
668
669	ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
670	return libbpf_err_errno(ret);
671}
672
673int bpf_obj_get(const char *pathname)
674{
675	union bpf_attr attr;
676	int fd;
677
678	memset(&attr, 0, sizeof(attr));
679	attr.pathname = ptr_to_u64((void *)pathname);
680
681	fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr));
682	return libbpf_err_errno(fd);
683}
684
685int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
686		    unsigned int flags)
687{
688	DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts,
689		.flags = flags,
690	);
691
692	return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts);
693}
694
695int bpf_prog_attach_opts(int prog_fd, int target_fd,
696			  enum bpf_attach_type type,
697			  const struct bpf_prog_attach_opts *opts)
698{
699	union bpf_attr attr;
700	int ret;
701
702	if (!OPTS_VALID(opts, bpf_prog_attach_opts))
703		return libbpf_err(-EINVAL);
704
705	memset(&attr, 0, sizeof(attr));
706	attr.target_fd	   = target_fd;
707	attr.attach_bpf_fd = prog_fd;
708	attr.attach_type   = type;
709	attr.attach_flags  = OPTS_GET(opts, flags, 0);
710	attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
711
712	ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
713	return libbpf_err_errno(ret);
714}
715
716__attribute__((alias("bpf_prog_attach_opts")))
717int bpf_prog_attach_xattr(int prog_fd, int target_fd,
718			  enum bpf_attach_type type,
719			  const struct bpf_prog_attach_opts *opts);
720
721int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
722{
723	union bpf_attr attr;
724	int ret;
725
726	memset(&attr, 0, sizeof(attr));
727	attr.target_fd	 = target_fd;
728	attr.attach_type = type;
729
730	ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
731	return libbpf_err_errno(ret);
732}
733
734int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
735{
736	union bpf_attr attr;
737	int ret;
738
739	memset(&attr, 0, sizeof(attr));
740	attr.target_fd	 = target_fd;
741	attr.attach_bpf_fd = prog_fd;
742	attr.attach_type = type;
743
744	ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
745	return libbpf_err_errno(ret);
746}
747
748int bpf_link_create(int prog_fd, int target_fd,
749		    enum bpf_attach_type attach_type,
750		    const struct bpf_link_create_opts *opts)
751{
752	__u32 target_btf_id, iter_info_len;
753	union bpf_attr attr;
754	int fd, err;
755
756	if (!OPTS_VALID(opts, bpf_link_create_opts))
757		return libbpf_err(-EINVAL);
758
759	iter_info_len = OPTS_GET(opts, iter_info_len, 0);
760	target_btf_id = OPTS_GET(opts, target_btf_id, 0);
761
762	/* validate we don't have unexpected combinations of non-zero fields */
763	if (iter_info_len || target_btf_id) {
764		if (iter_info_len && target_btf_id)
765			return libbpf_err(-EINVAL);
766		if (!OPTS_ZEROED(opts, target_btf_id))
767			return libbpf_err(-EINVAL);
768	}
769
770	memset(&attr, 0, sizeof(attr));
771	attr.link_create.prog_fd = prog_fd;
772	attr.link_create.target_fd = target_fd;
773	attr.link_create.attach_type = attach_type;
774	attr.link_create.flags = OPTS_GET(opts, flags, 0);
775
776	if (target_btf_id) {
777		attr.link_create.target_btf_id = target_btf_id;
778		goto proceed;
779	}
780
781	switch (attach_type) {
782	case BPF_TRACE_ITER:
783		attr.link_create.iter_info = ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
784		attr.link_create.iter_info_len = iter_info_len;
785		break;
786	case BPF_PERF_EVENT:
787		attr.link_create.perf_event.bpf_cookie = OPTS_GET(opts, perf_event.bpf_cookie, 0);
788		if (!OPTS_ZEROED(opts, perf_event))
789			return libbpf_err(-EINVAL);
790		break;
791	case BPF_TRACE_KPROBE_MULTI:
792		attr.link_create.kprobe_multi.flags = OPTS_GET(opts, kprobe_multi.flags, 0);
793		attr.link_create.kprobe_multi.cnt = OPTS_GET(opts, kprobe_multi.cnt, 0);
794		attr.link_create.kprobe_multi.syms = ptr_to_u64(OPTS_GET(opts, kprobe_multi.syms, 0));
795		attr.link_create.kprobe_multi.addrs = ptr_to_u64(OPTS_GET(opts, kprobe_multi.addrs, 0));
796		attr.link_create.kprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, kprobe_multi.cookies, 0));
797		if (!OPTS_ZEROED(opts, kprobe_multi))
798			return libbpf_err(-EINVAL);
799		break;
800	case BPF_TRACE_FENTRY:
801	case BPF_TRACE_FEXIT:
802	case BPF_MODIFY_RETURN:
803	case BPF_LSM_MAC:
804		attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0);
805		if (!OPTS_ZEROED(opts, tracing))
806			return libbpf_err(-EINVAL);
807		break;
808	default:
809		if (!OPTS_ZEROED(opts, flags))
810			return libbpf_err(-EINVAL);
811		break;
812	}
813proceed:
814	fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr));
815	if (fd >= 0)
816		return fd;
817	/* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry
818	 * and other similar programs
819	 */
820	err = -errno;
821	if (err != -EINVAL)
822		return libbpf_err(err);
823
824	/* if user used features not supported by
825	 * BPF_RAW_TRACEPOINT_OPEN command, then just give up immediately
826	 */
827	if (attr.link_create.target_fd || attr.link_create.target_btf_id)
828		return libbpf_err(err);
829	if (!OPTS_ZEROED(opts, sz))
830		return libbpf_err(err);
831
832	/* otherwise, for few select kinds of programs that can be
833	 * attached using BPF_RAW_TRACEPOINT_OPEN command, try that as
834	 * a fallback for older kernels
835	 */
836	switch (attach_type) {
837	case BPF_TRACE_RAW_TP:
838	case BPF_LSM_MAC:
839	case BPF_TRACE_FENTRY:
840	case BPF_TRACE_FEXIT:
841	case BPF_MODIFY_RETURN:
842		return bpf_raw_tracepoint_open(NULL, prog_fd);
843	default:
844		return libbpf_err(err);
845	}
846}
847
848int bpf_link_detach(int link_fd)
849{
850	union bpf_attr attr;
851	int ret;
852
853	memset(&attr, 0, sizeof(attr));
854	attr.link_detach.link_fd = link_fd;
855
856	ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
857	return libbpf_err_errno(ret);
858}
859
860int bpf_link_update(int link_fd, int new_prog_fd,
861		    const struct bpf_link_update_opts *opts)
862{
863	union bpf_attr attr;
864	int ret;
865
866	if (!OPTS_VALID(opts, bpf_link_update_opts))
867		return libbpf_err(-EINVAL);
868
869	memset(&attr, 0, sizeof(attr));
870	attr.link_update.link_fd = link_fd;
871	attr.link_update.new_prog_fd = new_prog_fd;
872	attr.link_update.flags = OPTS_GET(opts, flags, 0);
873	attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
874
875	ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
876	return libbpf_err_errno(ret);
877}
878
879int bpf_iter_create(int link_fd)
880{
881	union bpf_attr attr;
882	int fd;
883
884	memset(&attr, 0, sizeof(attr));
885	attr.iter_create.link_fd = link_fd;
886
887	fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr));
888	return libbpf_err_errno(fd);
889}
890
891int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
892		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
893{
894	union bpf_attr attr;
895	int ret;
896
897	memset(&attr, 0, sizeof(attr));
898	attr.query.target_fd	= target_fd;
899	attr.query.attach_type	= type;
900	attr.query.query_flags	= query_flags;
901	attr.query.prog_cnt	= *prog_cnt;
902	attr.query.prog_ids	= ptr_to_u64(prog_ids);
903
904	ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
905
906	if (attach_flags)
907		*attach_flags = attr.query.attach_flags;
908	*prog_cnt = attr.query.prog_cnt;
909
910	return libbpf_err_errno(ret);
911}
912
913int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
914		      void *data_out, __u32 *size_out, __u32 *retval,
915		      __u32 *duration)
916{
917	union bpf_attr attr;
918	int ret;
919
920	memset(&attr, 0, sizeof(attr));
921	attr.test.prog_fd = prog_fd;
922	attr.test.data_in = ptr_to_u64(data);
923	attr.test.data_out = ptr_to_u64(data_out);
924	attr.test.data_size_in = size;
925	attr.test.repeat = repeat;
926
927	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
928
929	if (size_out)
930		*size_out = attr.test.data_size_out;
931	if (retval)
932		*retval = attr.test.retval;
933	if (duration)
934		*duration = attr.test.duration;
935
936	return libbpf_err_errno(ret);
937}
938
939int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
940{
941	union bpf_attr attr;
942	int ret;
943
944	if (!test_attr->data_out && test_attr->data_size_out > 0)
945		return libbpf_err(-EINVAL);
946
947	memset(&attr, 0, sizeof(attr));
948	attr.test.prog_fd = test_attr->prog_fd;
949	attr.test.data_in = ptr_to_u64(test_attr->data_in);
950	attr.test.data_out = ptr_to_u64(test_attr->data_out);
951	attr.test.data_size_in = test_attr->data_size_in;
952	attr.test.data_size_out = test_attr->data_size_out;
953	attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
954	attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
955	attr.test.ctx_size_in = test_attr->ctx_size_in;
956	attr.test.ctx_size_out = test_attr->ctx_size_out;
957	attr.test.repeat = test_attr->repeat;
958
959	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
960
961	test_attr->data_size_out = attr.test.data_size_out;
962	test_attr->ctx_size_out = attr.test.ctx_size_out;
963	test_attr->retval = attr.test.retval;
964	test_attr->duration = attr.test.duration;
965
966	return libbpf_err_errno(ret);
967}
968
969int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
970{
971	union bpf_attr attr;
972	int ret;
973
974	if (!OPTS_VALID(opts, bpf_test_run_opts))
975		return libbpf_err(-EINVAL);
976
977	memset(&attr, 0, sizeof(attr));
978	attr.test.prog_fd = prog_fd;
979	attr.test.batch_size = OPTS_GET(opts, batch_size, 0);
980	attr.test.cpu = OPTS_GET(opts, cpu, 0);
981	attr.test.flags = OPTS_GET(opts, flags, 0);
982	attr.test.repeat = OPTS_GET(opts, repeat, 0);
983	attr.test.duration = OPTS_GET(opts, duration, 0);
984	attr.test.ctx_size_in = OPTS_GET(opts, ctx_size_in, 0);
985	attr.test.ctx_size_out = OPTS_GET(opts, ctx_size_out, 0);
986	attr.test.data_size_in = OPTS_GET(opts, data_size_in, 0);
987	attr.test.data_size_out = OPTS_GET(opts, data_size_out, 0);
988	attr.test.ctx_in = ptr_to_u64(OPTS_GET(opts, ctx_in, NULL));
989	attr.test.ctx_out = ptr_to_u64(OPTS_GET(opts, ctx_out, NULL));
990	attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL));
991	attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
992
993	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
994
995	OPTS_SET(opts, data_size_out, attr.test.data_size_out);
996	OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
997	OPTS_SET(opts, duration, attr.test.duration);
998	OPTS_SET(opts, retval, attr.test.retval);
999
1000	return libbpf_err_errno(ret);
1001}
1002
1003static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
1004{
1005	union bpf_attr attr;
1006	int err;
1007
1008	memset(&attr, 0, sizeof(attr));
1009	attr.start_id = start_id;
1010
1011	err = sys_bpf(cmd, &attr, sizeof(attr));
1012	if (!err)
1013		*next_id = attr.next_id;
1014
1015	return libbpf_err_errno(err);
1016}
1017
1018int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
1019{
1020	return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID);
1021}
1022
1023int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
1024{
1025	return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID);
1026}
1027
1028int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
1029{
1030	return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
1031}
1032
1033int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
1034{
1035	return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID);
1036}
1037
1038int bpf_prog_get_fd_by_id(__u32 id)
1039{
1040	union bpf_attr attr;
1041	int fd;
1042
1043	memset(&attr, 0, sizeof(attr));
1044	attr.prog_id = id;
1045
1046	fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
1047	return libbpf_err_errno(fd);
1048}
1049
1050int bpf_map_get_fd_by_id(__u32 id)
1051{
1052	union bpf_attr attr;
1053	int fd;
1054
1055	memset(&attr, 0, sizeof(attr));
1056	attr.map_id = id;
1057
1058	fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
1059	return libbpf_err_errno(fd);
1060}
1061
1062int bpf_btf_get_fd_by_id(__u32 id)
1063{
1064	union bpf_attr attr;
1065	int fd;
1066
1067	memset(&attr, 0, sizeof(attr));
1068	attr.btf_id = id;
1069
1070	fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
1071	return libbpf_err_errno(fd);
1072}
1073
1074int bpf_link_get_fd_by_id(__u32 id)
1075{
1076	union bpf_attr attr;
1077	int fd;
1078
1079	memset(&attr, 0, sizeof(attr));
1080	attr.link_id = id;
1081
1082	fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
1083	return libbpf_err_errno(fd);
1084}
1085
1086int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
1087{
1088	union bpf_attr attr;
1089	int err;
1090
1091	memset(&attr, 0, sizeof(attr));
1092	attr.info.bpf_fd = bpf_fd;
1093	attr.info.info_len = *info_len;
1094	attr.info.info = ptr_to_u64(info);
1095
1096	err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
1097
1098	if (!err)
1099		*info_len = attr.info.info_len;
1100
1101	return libbpf_err_errno(err);
1102}
1103
1104int bpf_raw_tracepoint_open(const char *name, int prog_fd)
1105{
1106	union bpf_attr attr;
1107	int fd;
1108
1109	memset(&attr, 0, sizeof(attr));
1110	attr.raw_tracepoint.name = ptr_to_u64(name);
1111	attr.raw_tracepoint.prog_fd = prog_fd;
1112
1113	fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
1114	return libbpf_err_errno(fd);
1115}
1116
1117int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts)
1118{
1119	const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level);
1120	union bpf_attr attr;
1121	char *log_buf;
1122	size_t log_size;
1123	__u32 log_level;
1124	int fd;
1125
1126	bump_rlimit_memlock();
1127
1128	memset(&attr, 0, attr_sz);
1129
1130	if (!OPTS_VALID(opts, bpf_btf_load_opts))
1131		return libbpf_err(-EINVAL);
1132
1133	log_buf = OPTS_GET(opts, log_buf, NULL);
1134	log_size = OPTS_GET(opts, log_size, 0);
1135	log_level = OPTS_GET(opts, log_level, 0);
1136
1137	if (log_size > UINT_MAX)
1138		return libbpf_err(-EINVAL);
1139	if (log_size && !log_buf)
1140		return libbpf_err(-EINVAL);
1141
1142	attr.btf = ptr_to_u64(btf_data);
1143	attr.btf_size = btf_size;
1144	/* log_level == 0 and log_buf != NULL means "try loading without
1145	 * log_buf, but retry with log_buf and log_level=1 on error", which is
1146	 * consistent across low-level and high-level BTF and program loading
1147	 * APIs within libbpf and provides a sensible behavior in practice
1148	 */
1149	if (log_level) {
1150		attr.btf_log_buf = ptr_to_u64(log_buf);
1151		attr.btf_log_size = (__u32)log_size;
1152		attr.btf_log_level = log_level;
1153	}
1154
1155	fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz);
1156	if (fd < 0 && log_buf && log_level == 0) {
1157		attr.btf_log_buf = ptr_to_u64(log_buf);
1158		attr.btf_log_size = (__u32)log_size;
1159		attr.btf_log_level = 1;
1160		fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz);
1161	}
1162	return libbpf_err_errno(fd);
1163}
1164
1165int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log)
1166{
1167	LIBBPF_OPTS(bpf_btf_load_opts, opts);
1168	int fd;
1169
1170retry:
1171	if (do_log && log_buf && log_buf_size) {
1172		opts.log_buf = log_buf;
1173		opts.log_size = log_buf_size;
1174		opts.log_level = 1;
1175	}
1176
1177	fd = bpf_btf_load(btf, btf_size, &opts);
1178	if (fd < 0 && !do_log && log_buf && log_buf_size) {
1179		do_log = true;
1180		goto retry;
1181	}
1182
1183	return libbpf_err_errno(fd);
1184}
1185
1186int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
1187		      __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
1188		      __u64 *probe_addr)
1189{
1190	union bpf_attr attr = {};
1191	int err;
1192
1193	attr.task_fd_query.pid = pid;
1194	attr.task_fd_query.fd = fd;
1195	attr.task_fd_query.flags = flags;
1196	attr.task_fd_query.buf = ptr_to_u64(buf);
1197	attr.task_fd_query.buf_len = *buf_len;
1198
1199	err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
1200
1201	*buf_len = attr.task_fd_query.buf_len;
1202	*prog_id = attr.task_fd_query.prog_id;
1203	*fd_type = attr.task_fd_query.fd_type;
1204	*probe_offset = attr.task_fd_query.probe_offset;
1205	*probe_addr = attr.task_fd_query.probe_addr;
1206
1207	return libbpf_err_errno(err);
1208}
1209
1210int bpf_enable_stats(enum bpf_stats_type type)
1211{
1212	union bpf_attr attr;
1213	int fd;
1214
1215	memset(&attr, 0, sizeof(attr));
1216	attr.enable_stats.type = type;
1217
1218	fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr));
1219	return libbpf_err_errno(fd);
1220}
1221
1222int bpf_prog_bind_map(int prog_fd, int map_fd,
1223		      const struct bpf_prog_bind_opts *opts)
1224{
1225	union bpf_attr attr;
1226	int ret;
1227
1228	if (!OPTS_VALID(opts, bpf_prog_bind_opts))
1229		return libbpf_err(-EINVAL);
1230
1231	memset(&attr, 0, sizeof(attr));
1232	attr.prog_bind_map.prog_fd = prog_fd;
1233	attr.prog_bind_map.map_fd = map_fd;
1234	attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
1235
1236	ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
1237	return libbpf_err_errno(ret);
1238}
1239