1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * Copyright (c) 2016 Facebook
4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5 */
6#include <uapi/linux/btf.h>
7#include <linux/bpf-cgroup.h>
8#include <linux/kernel.h>
9#include <linux/types.h>
10#include <linux/slab.h>
11#include <linux/bpf.h>
12#include <linux/btf.h>
13#include <linux/bpf_verifier.h>
14#include <linux/filter.h>
15#include <net/netlink.h>
16#include <linux/file.h>
17#include <linux/vmalloc.h>
18#include <linux/stringify.h>
19#include <linux/bsearch.h>
20#include <linux/sort.h>
21#include <linux/perf_event.h>
22#include <linux/ctype.h>
23#include <linux/error-injection.h>
24#include <linux/bpf_lsm.h>
25#include <linux/btf_ids.h>
26
27#include "disasm.h"
28
29static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
30#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
31	[_id] = & _name ## _verifier_ops,
32#define BPF_MAP_TYPE(_id, _ops)
33#define BPF_LINK_TYPE(_id, _name)
34#include <linux/bpf_types.h>
35#undef BPF_PROG_TYPE
36#undef BPF_MAP_TYPE
37#undef BPF_LINK_TYPE
38};
39
40/* bpf_check() is a static code analyzer that walks eBPF program
41 * instruction by instruction and updates register/stack state.
42 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
43 *
44 * The first pass is depth-first-search to check that the program is a DAG.
45 * It rejects the following programs:
46 * - larger than BPF_MAXINSNS insns
47 * - if loop is present (detected via back-edge)
48 * - unreachable insns exist (shouldn't be a forest. program = one function)
49 * - out of bounds or malformed jumps
50 * The second pass is all possible path descent from the 1st insn.
51 * Since it's analyzing all paths through the program, the length of the
52 * analysis is limited to 64k insn, which may be hit even if total number of
53 * insn is less then 4K, but there are too many branches that change stack/regs.
54 * Number of 'branches to be analyzed' is limited to 1k
55 *
56 * On entry to each instruction, each register has a type, and the instruction
57 * changes the types of the registers depending on instruction semantics.
58 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
59 * copied to R1.
60 *
61 * All registers are 64-bit.
62 * R0 - return register
63 * R1-R5 argument passing registers
64 * R6-R9 callee saved registers
65 * R10 - frame pointer read-only
66 *
67 * At the start of BPF program the register R1 contains a pointer to bpf_context
68 * and has type PTR_TO_CTX.
69 *
70 * Verifier tracks arithmetic operations on pointers in case:
71 *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
72 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
73 * 1st insn copies R10 (which has FRAME_PTR) type into R1
74 * and 2nd arithmetic instruction is pattern matched to recognize
75 * that it wants to construct a pointer to some element within stack.
76 * So after 2nd insn, the register R1 has type PTR_TO_STACK
77 * (and -20 constant is saved for further stack bounds checking).
78 * Meaning that this reg is a pointer to stack plus known immediate constant.
79 *
80 * Most of the time the registers have SCALAR_VALUE type, which
81 * means the register has some value, but it's not a valid pointer.
82 * (like pointer plus pointer becomes SCALAR_VALUE type)
83 *
84 * When verifier sees load or store instructions the type of base register
85 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
86 * four pointer types recognized by check_mem_access() function.
87 *
88 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
89 * and the range of [ptr, ptr + map's value_size) is accessible.
90 *
91 * registers used to pass values to function calls are checked against
92 * function argument constraints.
93 *
94 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
95 * It means that the register type passed to this function must be
96 * PTR_TO_STACK and it will be used inside the function as
97 * 'pointer to map element key'
98 *
99 * For example the argument constraints for bpf_map_lookup_elem():
100 *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
101 *   .arg1_type = ARG_CONST_MAP_PTR,
102 *   .arg2_type = ARG_PTR_TO_MAP_KEY,
103 *
104 * ret_type says that this function returns 'pointer to map elem value or null'
105 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
106 * 2nd argument should be a pointer to stack, which will be used inside
107 * the helper function as a pointer to map element key.
108 *
109 * On the kernel side the helper function looks like:
110 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
111 * {
112 *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
113 *    void *key = (void *) (unsigned long) r2;
114 *    void *value;
115 *
116 *    here kernel can access 'key' and 'map' pointers safely, knowing that
117 *    [key, key + map->key_size) bytes are valid and were initialized on
118 *    the stack of eBPF program.
119 * }
120 *
121 * Corresponding eBPF program may look like:
122 *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
123 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
124 *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
125 *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
126 * here verifier looks at prototype of map_lookup_elem() and sees:
127 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
128 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
129 *
130 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
131 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
132 * and were initialized prior to this call.
133 * If it's ok, then verifier allows this BPF_CALL insn and looks at
134 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
135 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
136 * returns either pointer to map value or NULL.
137 *
138 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
139 * insn, the register holding that pointer in the true branch changes state to
140 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
141 * branch. See check_cond_jmp_op().
142 *
143 * After the call R0 is set to return type of the function and registers R1-R5
144 * are set to NOT_INIT to indicate that they are no longer readable.
145 *
146 * The following reference types represent a potential reference to a kernel
147 * resource which, after first being allocated, must be checked and freed by
148 * the BPF program:
149 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
150 *
151 * When the verifier sees a helper call return a reference type, it allocates a
152 * pointer id for the reference and stores it in the current function state.
153 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
154 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
155 * passes through a NULL-check conditional. For the branch wherein the state is
156 * changed to CONST_IMM, the verifier releases the reference.
157 *
158 * For each helper function that allocates a reference, such as
159 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
160 * bpf_sk_release(). When a reference type passes into the release function,
161 * the verifier also releases the reference. If any unchecked or unreleased
162 * reference remains at the end of the program, the verifier rejects it.
163 */
164
165/* verifier_state + insn_idx are pushed to stack when branch is encountered */
166struct bpf_verifier_stack_elem {
167	/* verifer state is 'st'
168	 * before processing instruction 'insn_idx'
169	 * and after processing instruction 'prev_insn_idx'
170	 */
171	struct bpf_verifier_state st;
172	int insn_idx;
173	int prev_insn_idx;
174	struct bpf_verifier_stack_elem *next;
175	/* length of verifier log at the time this state was pushed on stack */
176	u32 log_pos;
177};
178
179#define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
180#define BPF_COMPLEXITY_LIMIT_STATES	64
181
182#define BPF_MAP_KEY_POISON	(1ULL << 63)
183#define BPF_MAP_KEY_SEEN	(1ULL << 62)
184
185#define BPF_MAP_PTR_UNPRIV	1UL
186#define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
187					  POISON_POINTER_DELTA))
188#define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
189
190static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
191static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
192
193static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
194{
195	return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
196}
197
198static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
199{
200	return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
201}
202
203static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
204			      const struct bpf_map *map, bool unpriv)
205{
206	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
207	unpriv |= bpf_map_ptr_unpriv(aux);
208	aux->map_ptr_state = (unsigned long)map |
209			     (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
210}
211
212static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
213{
214	return aux->map_key_state & BPF_MAP_KEY_POISON;
215}
216
217static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
218{
219	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
220}
221
222static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
223{
224	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
225}
226
227static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
228{
229	bool poisoned = bpf_map_key_poisoned(aux);
230
231	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
232			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
233}
234
235static bool bpf_pseudo_call(const struct bpf_insn *insn)
236{
237	return insn->code == (BPF_JMP | BPF_CALL) &&
238	       insn->src_reg == BPF_PSEUDO_CALL;
239}
240
241static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
242{
243	return insn->code == (BPF_JMP | BPF_CALL) &&
244	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
245}
246
247struct bpf_call_arg_meta {
248	struct bpf_map *map_ptr;
249	bool raw_mode;
250	bool pkt_access;
251	u8 release_regno;
252	int regno;
253	int access_size;
254	int mem_size;
255	u64 msize_max_value;
256	int ref_obj_id;
257	int map_uid;
258	int func_id;
259	struct btf *btf;
260	u32 btf_id;
261	struct btf *ret_btf;
262	u32 ret_btf_id;
263	u32 subprogno;
264	struct bpf_map_value_off_desc *kptr_off_desc;
265	u8 uninit_dynptr_regno;
266};
267
268struct btf *btf_vmlinux;
269
270static DEFINE_MUTEX(bpf_verifier_lock);
271
272static const struct bpf_line_info *
273find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
274{
275	const struct bpf_line_info *linfo;
276	const struct bpf_prog *prog;
277	u32 i, nr_linfo;
278
279	prog = env->prog;
280	nr_linfo = prog->aux->nr_linfo;
281
282	if (!nr_linfo || insn_off >= prog->len)
283		return NULL;
284
285	linfo = prog->aux->linfo;
286	for (i = 1; i < nr_linfo; i++)
287		if (insn_off < linfo[i].insn_off)
288			break;
289
290	return &linfo[i - 1];
291}
292
293void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
294		       va_list args)
295{
296	unsigned int n;
297
298	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
299
300	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
301		  "verifier log line truncated - local buffer too short\n");
302
303	if (log->level == BPF_LOG_KERNEL) {
304		bool newline = n > 0 && log->kbuf[n - 1] == '\n';
305
306		pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
307		return;
308	}
309
310	n = min(log->len_total - log->len_used - 1, n);
311	log->kbuf[n] = '\0';
312	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
313		log->len_used += n;
314	else
315		log->ubuf = NULL;
316}
317
318static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
319{
320	char zero = 0;
321
322	if (!bpf_verifier_log_needed(log))
323		return;
324
325	log->len_used = new_pos;
326	if (put_user(zero, log->ubuf + new_pos))
327		log->ubuf = NULL;
328}
329
330/* log_level controls verbosity level of eBPF verifier.
331 * bpf_verifier_log_write() is used to dump the verification trace to the log,
332 * so the user can figure out what's wrong with the program
333 */
334__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
335					   const char *fmt, ...)
336{
337	va_list args;
338
339	if (!bpf_verifier_log_needed(&env->log))
340		return;
341
342	va_start(args, fmt);
343	bpf_verifier_vlog(&env->log, fmt, args);
344	va_end(args);
345}
346EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
347
348__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
349{
350	struct bpf_verifier_env *env = private_data;
351	va_list args;
352
353	if (!bpf_verifier_log_needed(&env->log))
354		return;
355
356	va_start(args, fmt);
357	bpf_verifier_vlog(&env->log, fmt, args);
358	va_end(args);
359}
360
361__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
362			    const char *fmt, ...)
363{
364	va_list args;
365
366	if (!bpf_verifier_log_needed(log))
367		return;
368
369	va_start(args, fmt);
370	bpf_verifier_vlog(log, fmt, args);
371	va_end(args);
372}
373
374static const char *ltrim(const char *s)
375{
376	while (isspace(*s))
377		s++;
378
379	return s;
380}
381
382__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
383					 u32 insn_off,
384					 const char *prefix_fmt, ...)
385{
386	const struct bpf_line_info *linfo;
387
388	if (!bpf_verifier_log_needed(&env->log))
389		return;
390
391	linfo = find_linfo(env, insn_off);
392	if (!linfo || linfo == env->prev_linfo)
393		return;
394
395	if (prefix_fmt) {
396		va_list args;
397
398		va_start(args, prefix_fmt);
399		bpf_verifier_vlog(&env->log, prefix_fmt, args);
400		va_end(args);
401	}
402
403	verbose(env, "%s\n",
404		ltrim(btf_name_by_offset(env->prog->aux->btf,
405					 linfo->line_off)));
406
407	env->prev_linfo = linfo;
408}
409
410static void verbose_invalid_scalar(struct bpf_verifier_env *env,
411				   struct bpf_reg_state *reg,
412				   struct tnum *range, const char *ctx,
413				   const char *reg_name)
414{
415	char tn_buf[48];
416
417	verbose(env, "At %s the register %s ", ctx, reg_name);
418	if (!tnum_is_unknown(reg->var_off)) {
419		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
420		verbose(env, "has value %s", tn_buf);
421	} else {
422		verbose(env, "has unknown scalar value");
423	}
424	tnum_strn(tn_buf, sizeof(tn_buf), *range);
425	verbose(env, " should have been in %s\n", tn_buf);
426}
427
428static bool type_is_pkt_pointer(enum bpf_reg_type type)
429{
430	return type == PTR_TO_PACKET ||
431	       type == PTR_TO_PACKET_META;
432}
433
434static bool type_is_sk_pointer(enum bpf_reg_type type)
435{
436	return type == PTR_TO_SOCKET ||
437		type == PTR_TO_SOCK_COMMON ||
438		type == PTR_TO_TCP_SOCK ||
439		type == PTR_TO_XDP_SOCK;
440}
441
442static bool reg_type_not_null(enum bpf_reg_type type)
443{
444	return type == PTR_TO_SOCKET ||
445		type == PTR_TO_TCP_SOCK ||
446		type == PTR_TO_MAP_VALUE ||
447		type == PTR_TO_MAP_KEY ||
448		type == PTR_TO_SOCK_COMMON;
449}
450
451static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
452{
453	return reg->type == PTR_TO_MAP_VALUE &&
454		map_value_has_spin_lock(reg->map_ptr);
455}
456
457static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
458{
459	return base_type(type) == PTR_TO_SOCKET ||
460		base_type(type) == PTR_TO_TCP_SOCK ||
461		base_type(type) == PTR_TO_MEM ||
462		base_type(type) == PTR_TO_BTF_ID;
463}
464
465static bool type_is_rdonly_mem(u32 type)
466{
467	return type & MEM_RDONLY;
468}
469
470static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
471{
472	return type == ARG_PTR_TO_SOCK_COMMON;
473}
474
475static bool type_may_be_null(u32 type)
476{
477	return type & PTR_MAYBE_NULL;
478}
479
480static bool may_be_acquire_function(enum bpf_func_id func_id)
481{
482	return func_id == BPF_FUNC_sk_lookup_tcp ||
483		func_id == BPF_FUNC_sk_lookup_udp ||
484		func_id == BPF_FUNC_skc_lookup_tcp ||
485		func_id == BPF_FUNC_map_lookup_elem ||
486	        func_id == BPF_FUNC_ringbuf_reserve;
487}
488
489static bool is_acquire_function(enum bpf_func_id func_id,
490				const struct bpf_map *map)
491{
492	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
493
494	if (func_id == BPF_FUNC_sk_lookup_tcp ||
495	    func_id == BPF_FUNC_sk_lookup_udp ||
496	    func_id == BPF_FUNC_skc_lookup_tcp ||
497	    func_id == BPF_FUNC_ringbuf_reserve ||
498	    func_id == BPF_FUNC_kptr_xchg)
499		return true;
500
501	if (func_id == BPF_FUNC_map_lookup_elem &&
502	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
503	     map_type == BPF_MAP_TYPE_SOCKHASH))
504		return true;
505
506	return false;
507}
508
509static bool is_ptr_cast_function(enum bpf_func_id func_id)
510{
511	return func_id == BPF_FUNC_tcp_sock ||
512		func_id == BPF_FUNC_sk_fullsock ||
513		func_id == BPF_FUNC_skc_to_tcp_sock ||
514		func_id == BPF_FUNC_skc_to_tcp6_sock ||
515		func_id == BPF_FUNC_skc_to_udp6_sock ||
516		func_id == BPF_FUNC_skc_to_mptcp_sock ||
517		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
518		func_id == BPF_FUNC_skc_to_tcp_request_sock;
519}
520
521static bool is_cmpxchg_insn(const struct bpf_insn *insn)
522{
523	return BPF_CLASS(insn->code) == BPF_STX &&
524	       BPF_MODE(insn->code) == BPF_ATOMIC &&
525	       insn->imm == BPF_CMPXCHG;
526}
527
528/* string representation of 'enum bpf_reg_type'
529 *
530 * Note that reg_type_str() can not appear more than once in a single verbose()
531 * statement.
532 */
533static const char *reg_type_str(struct bpf_verifier_env *env,
534				enum bpf_reg_type type)
535{
536	char postfix[16] = {0}, prefix[32] = {0};
537	static const char * const str[] = {
538		[NOT_INIT]		= "?",
539		[SCALAR_VALUE]		= "scalar",
540		[PTR_TO_CTX]		= "ctx",
541		[CONST_PTR_TO_MAP]	= "map_ptr",
542		[PTR_TO_MAP_VALUE]	= "map_value",
543		[PTR_TO_STACK]		= "fp",
544		[PTR_TO_PACKET]		= "pkt",
545		[PTR_TO_PACKET_META]	= "pkt_meta",
546		[PTR_TO_PACKET_END]	= "pkt_end",
547		[PTR_TO_FLOW_KEYS]	= "flow_keys",
548		[PTR_TO_SOCKET]		= "sock",
549		[PTR_TO_SOCK_COMMON]	= "sock_common",
550		[PTR_TO_TCP_SOCK]	= "tcp_sock",
551		[PTR_TO_TP_BUFFER]	= "tp_buffer",
552		[PTR_TO_XDP_SOCK]	= "xdp_sock",
553		[PTR_TO_BTF_ID]		= "ptr_",
554		[PTR_TO_MEM]		= "mem",
555		[PTR_TO_BUF]		= "buf",
556		[PTR_TO_FUNC]		= "func",
557		[PTR_TO_MAP_KEY]	= "map_key",
558	};
559
560	if (type & PTR_MAYBE_NULL) {
561		if (base_type(type) == PTR_TO_BTF_ID)
562			strncpy(postfix, "or_null_", 16);
563		else
564			strncpy(postfix, "_or_null", 16);
565	}
566
567	if (type & MEM_RDONLY)
568		strncpy(prefix, "rdonly_", 32);
569	if (type & MEM_ALLOC)
570		strncpy(prefix, "alloc_", 32);
571	if (type & MEM_USER)
572		strncpy(prefix, "user_", 32);
573	if (type & MEM_PERCPU)
574		strncpy(prefix, "percpu_", 32);
575	if (type & PTR_UNTRUSTED)
576		strncpy(prefix, "untrusted_", 32);
577
578	snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
579		 prefix, str[base_type(type)], postfix);
580	return env->type_str_buf;
581}
582
583static char slot_type_char[] = {
584	[STACK_INVALID]	= '?',
585	[STACK_SPILL]	= 'r',
586	[STACK_MISC]	= 'm',
587	[STACK_ZERO]	= '0',
588	[STACK_DYNPTR]	= 'd',
589};
590
591static void print_liveness(struct bpf_verifier_env *env,
592			   enum bpf_reg_liveness live)
593{
594	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
595	    verbose(env, "_");
596	if (live & REG_LIVE_READ)
597		verbose(env, "r");
598	if (live & REG_LIVE_WRITTEN)
599		verbose(env, "w");
600	if (live & REG_LIVE_DONE)
601		verbose(env, "D");
602}
603
604static int get_spi(s32 off)
605{
606	return (-off - 1) / BPF_REG_SIZE;
607}
608
609static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
610{
611	int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
612
613	/* We need to check that slots between [spi - nr_slots + 1, spi] are
614	 * within [0, allocated_stack).
615	 *
616	 * Please note that the spi grows downwards. For example, a dynptr
617	 * takes the size of two stack slots; the first slot will be at
618	 * spi and the second slot will be at spi - 1.
619	 */
620	return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
621}
622
623static struct bpf_func_state *func(struct bpf_verifier_env *env,
624				   const struct bpf_reg_state *reg)
625{
626	struct bpf_verifier_state *cur = env->cur_state;
627
628	return cur->frame[reg->frameno];
629}
630
631static const char *kernel_type_name(const struct btf* btf, u32 id)
632{
633	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
634}
635
636static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
637{
638	env->scratched_regs |= 1U << regno;
639}
640
641static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
642{
643	env->scratched_stack_slots |= 1ULL << spi;
644}
645
646static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
647{
648	return (env->scratched_regs >> regno) & 1;
649}
650
651static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
652{
653	return (env->scratched_stack_slots >> regno) & 1;
654}
655
656static bool verifier_state_scratched(const struct bpf_verifier_env *env)
657{
658	return env->scratched_regs || env->scratched_stack_slots;
659}
660
661static void mark_verifier_state_clean(struct bpf_verifier_env *env)
662{
663	env->scratched_regs = 0U;
664	env->scratched_stack_slots = 0ULL;
665}
666
667/* Used for printing the entire verifier state. */
668static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
669{
670	env->scratched_regs = ~0U;
671	env->scratched_stack_slots = ~0ULL;
672}
673
674static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
675{
676	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
677	case DYNPTR_TYPE_LOCAL:
678		return BPF_DYNPTR_TYPE_LOCAL;
679	case DYNPTR_TYPE_RINGBUF:
680		return BPF_DYNPTR_TYPE_RINGBUF;
681	default:
682		return BPF_DYNPTR_TYPE_INVALID;
683	}
684}
685
686static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
687{
688	return type == BPF_DYNPTR_TYPE_RINGBUF;
689}
690
691static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
692				   enum bpf_arg_type arg_type, int insn_idx)
693{
694	struct bpf_func_state *state = func(env, reg);
695	enum bpf_dynptr_type type;
696	int spi, i, id;
697
698	spi = get_spi(reg->off);
699
700	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
701		return -EINVAL;
702
703	for (i = 0; i < BPF_REG_SIZE; i++) {
704		state->stack[spi].slot_type[i] = STACK_DYNPTR;
705		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
706	}
707
708	type = arg_to_dynptr_type(arg_type);
709	if (type == BPF_DYNPTR_TYPE_INVALID)
710		return -EINVAL;
711
712	state->stack[spi].spilled_ptr.dynptr.first_slot = true;
713	state->stack[spi].spilled_ptr.dynptr.type = type;
714	state->stack[spi - 1].spilled_ptr.dynptr.type = type;
715
716	if (dynptr_type_refcounted(type)) {
717		/* The id is used to track proper releasing */
718		id = acquire_reference_state(env, insn_idx);
719		if (id < 0)
720			return id;
721
722		state->stack[spi].spilled_ptr.id = id;
723		state->stack[spi - 1].spilled_ptr.id = id;
724	}
725
726	return 0;
727}
728
729static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
730{
731	struct bpf_func_state *state = func(env, reg);
732	int spi, i;
733
734	spi = get_spi(reg->off);
735
736	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
737		return -EINVAL;
738
739	for (i = 0; i < BPF_REG_SIZE; i++) {
740		state->stack[spi].slot_type[i] = STACK_INVALID;
741		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
742	}
743
744	/* Invalidate any slices associated with this dynptr */
745	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
746		release_reference(env, state->stack[spi].spilled_ptr.id);
747		state->stack[spi].spilled_ptr.id = 0;
748		state->stack[spi - 1].spilled_ptr.id = 0;
749	}
750
751	state->stack[spi].spilled_ptr.dynptr.first_slot = false;
752	state->stack[spi].spilled_ptr.dynptr.type = 0;
753	state->stack[spi - 1].spilled_ptr.dynptr.type = 0;
754
755	return 0;
756}
757
758static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
759{
760	struct bpf_func_state *state = func(env, reg);
761	int spi = get_spi(reg->off);
762	int i;
763
764	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
765		return true;
766
767	for (i = 0; i < BPF_REG_SIZE; i++) {
768		if (state->stack[spi].slot_type[i] == STACK_DYNPTR ||
769		    state->stack[spi - 1].slot_type[i] == STACK_DYNPTR)
770			return false;
771	}
772
773	return true;
774}
775
776static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
777				     enum bpf_arg_type arg_type)
778{
779	struct bpf_func_state *state = func(env, reg);
780	int spi = get_spi(reg->off);
781	int i;
782
783	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
784	    !state->stack[spi].spilled_ptr.dynptr.first_slot)
785		return false;
786
787	for (i = 0; i < BPF_REG_SIZE; i++) {
788		if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
789		    state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
790			return false;
791	}
792
793	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
794	if (arg_type == ARG_PTR_TO_DYNPTR)
795		return true;
796
797	return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type);
798}
799
800/* The reg state of a pointer or a bounded scalar was saved when
801 * it was spilled to the stack.
802 */
803static bool is_spilled_reg(const struct bpf_stack_state *stack)
804{
805	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
806}
807
808static void scrub_spilled_slot(u8 *stype)
809{
810	if (*stype != STACK_INVALID)
811		*stype = STACK_MISC;
812}
813
814static void print_verifier_state(struct bpf_verifier_env *env,
815				 const struct bpf_func_state *state,
816				 bool print_all)
817{
818	const struct bpf_reg_state *reg;
819	enum bpf_reg_type t;
820	int i;
821
822	if (state->frameno)
823		verbose(env, " frame%d:", state->frameno);
824	for (i = 0; i < MAX_BPF_REG; i++) {
825		reg = &state->regs[i];
826		t = reg->type;
827		if (t == NOT_INIT)
828			continue;
829		if (!print_all && !reg_scratched(env, i))
830			continue;
831		verbose(env, " R%d", i);
832		print_liveness(env, reg->live);
833		verbose(env, "=");
834		if (t == SCALAR_VALUE && reg->precise)
835			verbose(env, "P");
836		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
837		    tnum_is_const(reg->var_off)) {
838			/* reg->off should be 0 for SCALAR_VALUE */
839			verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
840			verbose(env, "%lld", reg->var_off.value + reg->off);
841		} else {
842			const char *sep = "";
843
844			verbose(env, "%s", reg_type_str(env, t));
845			if (base_type(t) == PTR_TO_BTF_ID)
846				verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
847			verbose(env, "(");
848/*
849 * _a stands for append, was shortened to avoid multiline statements below.
850 * This macro is used to output a comma separated list of attributes.
851 */
852#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
853
854			if (reg->id)
855				verbose_a("id=%d", reg->id);
856			if (reg_type_may_be_refcounted_or_null(t) && reg->ref_obj_id)
857				verbose_a("ref_obj_id=%d", reg->ref_obj_id);
858			if (t != SCALAR_VALUE)
859				verbose_a("off=%d", reg->off);
860			if (type_is_pkt_pointer(t))
861				verbose_a("r=%d", reg->range);
862			else if (base_type(t) == CONST_PTR_TO_MAP ||
863				 base_type(t) == PTR_TO_MAP_KEY ||
864				 base_type(t) == PTR_TO_MAP_VALUE)
865				verbose_a("ks=%d,vs=%d",
866					  reg->map_ptr->key_size,
867					  reg->map_ptr->value_size);
868			if (tnum_is_const(reg->var_off)) {
869				/* Typically an immediate SCALAR_VALUE, but
870				 * could be a pointer whose offset is too big
871				 * for reg->off
872				 */
873				verbose_a("imm=%llx", reg->var_off.value);
874			} else {
875				if (reg->smin_value != reg->umin_value &&
876				    reg->smin_value != S64_MIN)
877					verbose_a("smin=%lld", (long long)reg->smin_value);
878				if (reg->smax_value != reg->umax_value &&
879				    reg->smax_value != S64_MAX)
880					verbose_a("smax=%lld", (long long)reg->smax_value);
881				if (reg->umin_value != 0)
882					verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
883				if (reg->umax_value != U64_MAX)
884					verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
885				if (!tnum_is_unknown(reg->var_off)) {
886					char tn_buf[48];
887
888					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
889					verbose_a("var_off=%s", tn_buf);
890				}
891				if (reg->s32_min_value != reg->smin_value &&
892				    reg->s32_min_value != S32_MIN)
893					verbose_a("s32_min=%d", (int)(reg->s32_min_value));
894				if (reg->s32_max_value != reg->smax_value &&
895				    reg->s32_max_value != S32_MAX)
896					verbose_a("s32_max=%d", (int)(reg->s32_max_value));
897				if (reg->u32_min_value != reg->umin_value &&
898				    reg->u32_min_value != U32_MIN)
899					verbose_a("u32_min=%d", (int)(reg->u32_min_value));
900				if (reg->u32_max_value != reg->umax_value &&
901				    reg->u32_max_value != U32_MAX)
902					verbose_a("u32_max=%d", (int)(reg->u32_max_value));
903			}
904#undef verbose_a
905
906			verbose(env, ")");
907		}
908	}
909	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
910		char types_buf[BPF_REG_SIZE + 1];
911		bool valid = false;
912		int j;
913
914		for (j = 0; j < BPF_REG_SIZE; j++) {
915			if (state->stack[i].slot_type[j] != STACK_INVALID)
916				valid = true;
917			types_buf[j] = slot_type_char[
918					state->stack[i].slot_type[j]];
919		}
920		types_buf[BPF_REG_SIZE] = 0;
921		if (!valid)
922			continue;
923		if (!print_all && !stack_slot_scratched(env, i))
924			continue;
925		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
926		print_liveness(env, state->stack[i].spilled_ptr.live);
927		if (is_spilled_reg(&state->stack[i])) {
928			reg = &state->stack[i].spilled_ptr;
929			t = reg->type;
930			verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
931			if (t == SCALAR_VALUE && reg->precise)
932				verbose(env, "P");
933			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
934				verbose(env, "%lld", reg->var_off.value + reg->off);
935		} else {
936			verbose(env, "=%s", types_buf);
937		}
938	}
939	if (state->acquired_refs && state->refs[0].id) {
940		verbose(env, " refs=%d", state->refs[0].id);
941		for (i = 1; i < state->acquired_refs; i++)
942			if (state->refs[i].id)
943				verbose(env, ",%d", state->refs[i].id);
944	}
945	if (state->in_callback_fn)
946		verbose(env, " cb");
947	if (state->in_async_callback_fn)
948		verbose(env, " async_cb");
949	verbose(env, "\n");
950	mark_verifier_state_clean(env);
951}
952
953static inline u32 vlog_alignment(u32 pos)
954{
955	return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
956			BPF_LOG_MIN_ALIGNMENT) - pos - 1;
957}
958
959static void print_insn_state(struct bpf_verifier_env *env,
960			     const struct bpf_func_state *state)
961{
962	if (env->prev_log_len && env->prev_log_len == env->log.len_used) {
963		/* remove new line character */
964		bpf_vlog_reset(&env->log, env->prev_log_len - 1);
965		verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' ');
966	} else {
967		verbose(env, "%d:", env->insn_idx);
968	}
969	print_verifier_state(env, state, false);
970}
971
972/* copy array src of length n * size bytes to dst. dst is reallocated if it's too
973 * small to hold src. This is different from krealloc since we don't want to preserve
974 * the contents of dst.
975 *
976 * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
977 * not be allocated.
978 */
979static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
980{
981	size_t bytes;
982
983	if (ZERO_OR_NULL_PTR(src))
984		goto out;
985
986	if (unlikely(check_mul_overflow(n, size, &bytes)))
987		return NULL;
988
989	if (ksize(dst) < bytes) {
990		kfree(dst);
991		dst = kmalloc_track_caller(bytes, flags);
992		if (!dst)
993			return NULL;
994	}
995
996	memcpy(dst, src, bytes);
997out:
998	return dst ? dst : ZERO_SIZE_PTR;
999}
1000
1001/* resize an array from old_n items to new_n items. the array is reallocated if it's too
1002 * small to hold new_n items. new items are zeroed out if the array grows.
1003 *
1004 * Contrary to krealloc_array, does not free arr if new_n is zero.
1005 */
1006static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1007{
1008	if (!new_n || old_n == new_n)
1009		goto out;
1010
1011	arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
1012	if (!arr)
1013		return NULL;
1014
1015	if (new_n > old_n)
1016		memset(arr + old_n * size, 0, (new_n - old_n) * size);
1017
1018out:
1019	return arr ? arr : ZERO_SIZE_PTR;
1020}
1021
1022static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1023{
1024	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1025			       sizeof(struct bpf_reference_state), GFP_KERNEL);
1026	if (!dst->refs)
1027		return -ENOMEM;
1028
1029	dst->acquired_refs = src->acquired_refs;
1030	return 0;
1031}
1032
1033static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1034{
1035	size_t n = src->allocated_stack / BPF_REG_SIZE;
1036
1037	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1038				GFP_KERNEL);
1039	if (!dst->stack)
1040		return -ENOMEM;
1041
1042	dst->allocated_stack = src->allocated_stack;
1043	return 0;
1044}
1045
1046static int resize_reference_state(struct bpf_func_state *state, size_t n)
1047{
1048	state->refs = realloc_array(state->refs, state->acquired_refs, n,
1049				    sizeof(struct bpf_reference_state));
1050	if (!state->refs)
1051		return -ENOMEM;
1052
1053	state->acquired_refs = n;
1054	return 0;
1055}
1056
1057static int grow_stack_state(struct bpf_func_state *state, int size)
1058{
1059	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
1060
1061	if (old_n >= n)
1062		return 0;
1063
1064	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1065	if (!state->stack)
1066		return -ENOMEM;
1067
1068	state->allocated_stack = size;
1069	return 0;
1070}
1071
1072/* Acquire a pointer id from the env and update the state->refs to include
1073 * this new pointer reference.
1074 * On success, returns a valid pointer id to associate with the register
1075 * On failure, returns a negative errno.
1076 */
1077static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1078{
1079	struct bpf_func_state *state = cur_func(env);
1080	int new_ofs = state->acquired_refs;
1081	int id, err;
1082
1083	err = resize_reference_state(state, state->acquired_refs + 1);
1084	if (err)
1085		return err;
1086	id = ++env->id_gen;
1087	state->refs[new_ofs].id = id;
1088	state->refs[new_ofs].insn_idx = insn_idx;
1089
1090	return id;
1091}
1092
1093/* release function corresponding to acquire_reference_state(). Idempotent. */
1094static int release_reference_state(struct bpf_func_state *state, int ptr_id)
1095{
1096	int i, last_idx;
1097
1098	last_idx = state->acquired_refs - 1;
1099	for (i = 0; i < state->acquired_refs; i++) {
1100		if (state->refs[i].id == ptr_id) {
1101			if (last_idx && i != last_idx)
1102				memcpy(&state->refs[i], &state->refs[last_idx],
1103				       sizeof(*state->refs));
1104			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1105			state->acquired_refs--;
1106			return 0;
1107		}
1108	}
1109	return -EINVAL;
1110}
1111
1112static void free_func_state(struct bpf_func_state *state)
1113{
1114	if (!state)
1115		return;
1116	kfree(state->refs);
1117	kfree(state->stack);
1118	kfree(state);
1119}
1120
1121static void clear_jmp_history(struct bpf_verifier_state *state)
1122{
1123	kfree(state->jmp_history);
1124	state->jmp_history = NULL;
1125	state->jmp_history_cnt = 0;
1126}
1127
1128static void free_verifier_state(struct bpf_verifier_state *state,
1129				bool free_self)
1130{
1131	int i;
1132
1133	for (i = 0; i <= state->curframe; i++) {
1134		free_func_state(state->frame[i]);
1135		state->frame[i] = NULL;
1136	}
1137	clear_jmp_history(state);
1138	if (free_self)
1139		kfree(state);
1140}
1141
1142/* copy verifier state from src to dst growing dst stack space
1143 * when necessary to accommodate larger src stack
1144 */
1145static int copy_func_state(struct bpf_func_state *dst,
1146			   const struct bpf_func_state *src)
1147{
1148	int err;
1149
1150	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1151	err = copy_reference_state(dst, src);
1152	if (err)
1153		return err;
1154	return copy_stack_state(dst, src);
1155}
1156
1157static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1158			       const struct bpf_verifier_state *src)
1159{
1160	struct bpf_func_state *dst;
1161	int i, err;
1162
1163	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1164					    src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
1165					    GFP_USER);
1166	if (!dst_state->jmp_history)
1167		return -ENOMEM;
1168	dst_state->jmp_history_cnt = src->jmp_history_cnt;
1169
1170	/* if dst has more stack frames then src frame, free them */
1171	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1172		free_func_state(dst_state->frame[i]);
1173		dst_state->frame[i] = NULL;
1174	}
1175	dst_state->speculative = src->speculative;
1176	dst_state->curframe = src->curframe;
1177	dst_state->active_spin_lock = src->active_spin_lock;
1178	dst_state->branches = src->branches;
1179	dst_state->parent = src->parent;
1180	dst_state->first_insn_idx = src->first_insn_idx;
1181	dst_state->last_insn_idx = src->last_insn_idx;
1182	for (i = 0; i <= src->curframe; i++) {
1183		dst = dst_state->frame[i];
1184		if (!dst) {
1185			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1186			if (!dst)
1187				return -ENOMEM;
1188			dst_state->frame[i] = dst;
1189		}
1190		err = copy_func_state(dst, src->frame[i]);
1191		if (err)
1192			return err;
1193	}
1194	return 0;
1195}
1196
1197static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1198{
1199	while (st) {
1200		u32 br = --st->branches;
1201
1202		/* WARN_ON(br > 1) technically makes sense here,
1203		 * but see comment in push_stack(), hence:
1204		 */
1205		WARN_ONCE((int)br < 0,
1206			  "BUG update_branch_counts:branches_to_explore=%d\n",
1207			  br);
1208		if (br)
1209			break;
1210		st = st->parent;
1211	}
1212}
1213
1214static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1215		     int *insn_idx, bool pop_log)
1216{
1217	struct bpf_verifier_state *cur = env->cur_state;
1218	struct bpf_verifier_stack_elem *elem, *head = env->head;
1219	int err;
1220
1221	if (env->head == NULL)
1222		return -ENOENT;
1223
1224	if (cur) {
1225		err = copy_verifier_state(cur, &head->st);
1226		if (err)
1227			return err;
1228	}
1229	if (pop_log)
1230		bpf_vlog_reset(&env->log, head->log_pos);
1231	if (insn_idx)
1232		*insn_idx = head->insn_idx;
1233	if (prev_insn_idx)
1234		*prev_insn_idx = head->prev_insn_idx;
1235	elem = head->next;
1236	free_verifier_state(&head->st, false);
1237	kfree(head);
1238	env->head = elem;
1239	env->stack_size--;
1240	return 0;
1241}
1242
1243static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1244					     int insn_idx, int prev_insn_idx,
1245					     bool speculative)
1246{
1247	struct bpf_verifier_state *cur = env->cur_state;
1248	struct bpf_verifier_stack_elem *elem;
1249	int err;
1250
1251	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1252	if (!elem)
1253		goto err;
1254
1255	elem->insn_idx = insn_idx;
1256	elem->prev_insn_idx = prev_insn_idx;
1257	elem->next = env->head;
1258	elem->log_pos = env->log.len_used;
1259	env->head = elem;
1260	env->stack_size++;
1261	err = copy_verifier_state(&elem->st, cur);
1262	if (err)
1263		goto err;
1264	elem->st.speculative |= speculative;
1265	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1266		verbose(env, "The sequence of %d jumps is too complex.\n",
1267			env->stack_size);
1268		goto err;
1269	}
1270	if (elem->st.parent) {
1271		++elem->st.parent->branches;
1272		/* WARN_ON(branches > 2) technically makes sense here,
1273		 * but
1274		 * 1. speculative states will bump 'branches' for non-branch
1275		 * instructions
1276		 * 2. is_state_visited() heuristics may decide not to create
1277		 * a new state for a sequence of branches and all such current
1278		 * and cloned states will be pointing to a single parent state
1279		 * which might have large 'branches' count.
1280		 */
1281	}
1282	return &elem->st;
1283err:
1284	free_verifier_state(env->cur_state, true);
1285	env->cur_state = NULL;
1286	/* pop all elements and return */
1287	while (!pop_stack(env, NULL, NULL, false));
1288	return NULL;
1289}
1290
1291#define CALLER_SAVED_REGS 6
1292static const int caller_saved[CALLER_SAVED_REGS] = {
1293	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1294};
1295
1296static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1297				struct bpf_reg_state *reg);
1298
1299/* This helper doesn't clear reg->id */
1300static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1301{
1302	reg->var_off = tnum_const(imm);
1303	reg->smin_value = (s64)imm;
1304	reg->smax_value = (s64)imm;
1305	reg->umin_value = imm;
1306	reg->umax_value = imm;
1307
1308	reg->s32_min_value = (s32)imm;
1309	reg->s32_max_value = (s32)imm;
1310	reg->u32_min_value = (u32)imm;
1311	reg->u32_max_value = (u32)imm;
1312}
1313
1314/* Mark the unknown part of a register (variable offset or scalar value) as
1315 * known to have the value @imm.
1316 */
1317static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1318{
1319	/* Clear id, off, and union(map_ptr, range) */
1320	memset(((u8 *)reg) + sizeof(reg->type), 0,
1321	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1322	___mark_reg_known(reg, imm);
1323}
1324
1325static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1326{
1327	reg->var_off = tnum_const_subreg(reg->var_off, imm);
1328	reg->s32_min_value = (s32)imm;
1329	reg->s32_max_value = (s32)imm;
1330	reg->u32_min_value = (u32)imm;
1331	reg->u32_max_value = (u32)imm;
1332}
1333
1334/* Mark the 'variable offset' part of a register as zero.  This should be
1335 * used only on registers holding a pointer type.
1336 */
1337static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1338{
1339	__mark_reg_known(reg, 0);
1340}
1341
1342static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1343{
1344	__mark_reg_known(reg, 0);
1345	reg->type = SCALAR_VALUE;
1346}
1347
1348static void mark_reg_known_zero(struct bpf_verifier_env *env,
1349				struct bpf_reg_state *regs, u32 regno)
1350{
1351	if (WARN_ON(regno >= MAX_BPF_REG)) {
1352		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1353		/* Something bad happened, let's kill all regs */
1354		for (regno = 0; regno < MAX_BPF_REG; regno++)
1355			__mark_reg_not_init(env, regs + regno);
1356		return;
1357	}
1358	__mark_reg_known_zero(regs + regno);
1359}
1360
1361static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1362{
1363	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1364		const struct bpf_map *map = reg->map_ptr;
1365
1366		if (map->inner_map_meta) {
1367			reg->type = CONST_PTR_TO_MAP;
1368			reg->map_ptr = map->inner_map_meta;
1369			/* transfer reg's id which is unique for every map_lookup_elem
1370			 * as UID of the inner map.
1371			 */
1372			if (map_value_has_timer(map->inner_map_meta))
1373				reg->map_uid = reg->id;
1374		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1375			reg->type = PTR_TO_XDP_SOCK;
1376		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1377			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1378			reg->type = PTR_TO_SOCKET;
1379		} else {
1380			reg->type = PTR_TO_MAP_VALUE;
1381		}
1382		return;
1383	}
1384
1385	reg->type &= ~PTR_MAYBE_NULL;
1386}
1387
1388static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1389{
1390	return type_is_pkt_pointer(reg->type);
1391}
1392
1393static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1394{
1395	return reg_is_pkt_pointer(reg) ||
1396	       reg->type == PTR_TO_PACKET_END;
1397}
1398
1399/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1400static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1401				    enum bpf_reg_type which)
1402{
1403	/* The register can already have a range from prior markings.
1404	 * This is fine as long as it hasn't been advanced from its
1405	 * origin.
1406	 */
1407	return reg->type == which &&
1408	       reg->id == 0 &&
1409	       reg->off == 0 &&
1410	       tnum_equals_const(reg->var_off, 0);
1411}
1412
1413/* Reset the min/max bounds of a register */
1414static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1415{
1416	reg->smin_value = S64_MIN;
1417	reg->smax_value = S64_MAX;
1418	reg->umin_value = 0;
1419	reg->umax_value = U64_MAX;
1420
1421	reg->s32_min_value = S32_MIN;
1422	reg->s32_max_value = S32_MAX;
1423	reg->u32_min_value = 0;
1424	reg->u32_max_value = U32_MAX;
1425}
1426
1427static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1428{
1429	reg->smin_value = S64_MIN;
1430	reg->smax_value = S64_MAX;
1431	reg->umin_value = 0;
1432	reg->umax_value = U64_MAX;
1433}
1434
1435static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1436{
1437	reg->s32_min_value = S32_MIN;
1438	reg->s32_max_value = S32_MAX;
1439	reg->u32_min_value = 0;
1440	reg->u32_max_value = U32_MAX;
1441}
1442
1443static void __update_reg32_bounds(struct bpf_reg_state *reg)
1444{
1445	struct tnum var32_off = tnum_subreg(reg->var_off);
1446
1447	/* min signed is max(sign bit) | min(other bits) */
1448	reg->s32_min_value = max_t(s32, reg->s32_min_value,
1449			var32_off.value | (var32_off.mask & S32_MIN));
1450	/* max signed is min(sign bit) | max(other bits) */
1451	reg->s32_max_value = min_t(s32, reg->s32_max_value,
1452			var32_off.value | (var32_off.mask & S32_MAX));
1453	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1454	reg->u32_max_value = min(reg->u32_max_value,
1455				 (u32)(var32_off.value | var32_off.mask));
1456}
1457
1458static void __update_reg64_bounds(struct bpf_reg_state *reg)
1459{
1460	/* min signed is max(sign bit) | min(other bits) */
1461	reg->smin_value = max_t(s64, reg->smin_value,
1462				reg->var_off.value | (reg->var_off.mask & S64_MIN));
1463	/* max signed is min(sign bit) | max(other bits) */
1464	reg->smax_value = min_t(s64, reg->smax_value,
1465				reg->var_off.value | (reg->var_off.mask & S64_MAX));
1466	reg->umin_value = max(reg->umin_value, reg->var_off.value);
1467	reg->umax_value = min(reg->umax_value,
1468			      reg->var_off.value | reg->var_off.mask);
1469}
1470
1471static void __update_reg_bounds(struct bpf_reg_state *reg)
1472{
1473	__update_reg32_bounds(reg);
1474	__update_reg64_bounds(reg);
1475}
1476
1477/* Uses signed min/max values to inform unsigned, and vice-versa */
1478static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1479{
1480	/* Learn sign from signed bounds.
1481	 * If we cannot cross the sign boundary, then signed and unsigned bounds
1482	 * are the same, so combine.  This works even in the negative case, e.g.
1483	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1484	 */
1485	if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1486		reg->s32_min_value = reg->u32_min_value =
1487			max_t(u32, reg->s32_min_value, reg->u32_min_value);
1488		reg->s32_max_value = reg->u32_max_value =
1489			min_t(u32, reg->s32_max_value, reg->u32_max_value);
1490		return;
1491	}
1492	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
1493	 * boundary, so we must be careful.
1494	 */
1495	if ((s32)reg->u32_max_value >= 0) {
1496		/* Positive.  We can't learn anything from the smin, but smax
1497		 * is positive, hence safe.
1498		 */
1499		reg->s32_min_value = reg->u32_min_value;
1500		reg->s32_max_value = reg->u32_max_value =
1501			min_t(u32, reg->s32_max_value, reg->u32_max_value);
1502	} else if ((s32)reg->u32_min_value < 0) {
1503		/* Negative.  We can't learn anything from the smax, but smin
1504		 * is negative, hence safe.
1505		 */
1506		reg->s32_min_value = reg->u32_min_value =
1507			max_t(u32, reg->s32_min_value, reg->u32_min_value);
1508		reg->s32_max_value = reg->u32_max_value;
1509	}
1510}
1511
1512static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1513{
1514	/* Learn sign from signed bounds.
1515	 * If we cannot cross the sign boundary, then signed and unsigned bounds
1516	 * are the same, so combine.  This works even in the negative case, e.g.
1517	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1518	 */
1519	if (reg->smin_value >= 0 || reg->smax_value < 0) {
1520		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1521							  reg->umin_value);
1522		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1523							  reg->umax_value);
1524		return;
1525	}
1526	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
1527	 * boundary, so we must be careful.
1528	 */
1529	if ((s64)reg->umax_value >= 0) {
1530		/* Positive.  We can't learn anything from the smin, but smax
1531		 * is positive, hence safe.
1532		 */
1533		reg->smin_value = reg->umin_value;
1534		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1535							  reg->umax_value);
1536	} else if ((s64)reg->umin_value < 0) {
1537		/* Negative.  We can't learn anything from the smax, but smin
1538		 * is negative, hence safe.
1539		 */
1540		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1541							  reg->umin_value);
1542		reg->smax_value = reg->umax_value;
1543	}
1544}
1545
1546static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1547{
1548	__reg32_deduce_bounds(reg);
1549	__reg64_deduce_bounds(reg);
1550}
1551
1552/* Attempts to improve var_off based on unsigned min/max information */
1553static void __reg_bound_offset(struct bpf_reg_state *reg)
1554{
1555	struct tnum var64_off = tnum_intersect(reg->var_off,
1556					       tnum_range(reg->umin_value,
1557							  reg->umax_value));
1558	struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1559						tnum_range(reg->u32_min_value,
1560							   reg->u32_max_value));
1561
1562	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1563}
1564
1565static void reg_bounds_sync(struct bpf_reg_state *reg)
1566{
1567	/* We might have learned new bounds from the var_off. */
1568	__update_reg_bounds(reg);
1569	/* We might have learned something about the sign bit. */
1570	__reg_deduce_bounds(reg);
1571	/* We might have learned some bits from the bounds. */
1572	__reg_bound_offset(reg);
1573	/* Intersecting with the old var_off might have improved our bounds
1574	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1575	 * then new var_off is (0; 0x7f...fc) which improves our umax.
1576	 */
1577	__update_reg_bounds(reg);
1578}
1579
1580static bool __reg32_bound_s64(s32 a)
1581{
1582	return a >= 0 && a <= S32_MAX;
1583}
1584
1585static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1586{
1587	reg->umin_value = reg->u32_min_value;
1588	reg->umax_value = reg->u32_max_value;
1589
1590	/* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1591	 * be positive otherwise set to worse case bounds and refine later
1592	 * from tnum.
1593	 */
1594	if (__reg32_bound_s64(reg->s32_min_value) &&
1595	    __reg32_bound_s64(reg->s32_max_value)) {
1596		reg->smin_value = reg->s32_min_value;
1597		reg->smax_value = reg->s32_max_value;
1598	} else {
1599		reg->smin_value = 0;
1600		reg->smax_value = U32_MAX;
1601	}
1602}
1603
1604static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1605{
1606	/* special case when 64-bit register has upper 32-bit register
1607	 * zeroed. Typically happens after zext or <<32, >>32 sequence
1608	 * allowing us to use 32-bit bounds directly,
1609	 */
1610	if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1611		__reg_assign_32_into_64(reg);
1612	} else {
1613		/* Otherwise the best we can do is push lower 32bit known and
1614		 * unknown bits into register (var_off set from jmp logic)
1615		 * then learn as much as possible from the 64-bit tnum
1616		 * known and unknown bits. The previous smin/smax bounds are
1617		 * invalid here because of jmp32 compare so mark them unknown
1618		 * so they do not impact tnum bounds calculation.
1619		 */
1620		__mark_reg64_unbounded(reg);
1621	}
1622	reg_bounds_sync(reg);
1623}
1624
1625static bool __reg64_bound_s32(s64 a)
1626{
1627	return a >= S32_MIN && a <= S32_MAX;
1628}
1629
1630static bool __reg64_bound_u32(u64 a)
1631{
1632	return a >= U32_MIN && a <= U32_MAX;
1633}
1634
1635static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1636{
1637	__mark_reg32_unbounded(reg);
1638	if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1639		reg->s32_min_value = (s32)reg->smin_value;
1640		reg->s32_max_value = (s32)reg->smax_value;
1641	}
1642	if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1643		reg->u32_min_value = (u32)reg->umin_value;
1644		reg->u32_max_value = (u32)reg->umax_value;
1645	}
1646	reg_bounds_sync(reg);
1647}
1648
1649/* Mark a register as having a completely unknown (scalar) value. */
1650static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1651			       struct bpf_reg_state *reg)
1652{
1653	/*
1654	 * Clear type, id, off, and union(map_ptr, range) and
1655	 * padding between 'type' and union
1656	 */
1657	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1658	reg->type = SCALAR_VALUE;
1659	reg->var_off = tnum_unknown;
1660	reg->frameno = 0;
1661	reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1662	__mark_reg_unbounded(reg);
1663}
1664
1665static void mark_reg_unknown(struct bpf_verifier_env *env,
1666			     struct bpf_reg_state *regs, u32 regno)
1667{
1668	if (WARN_ON(regno >= MAX_BPF_REG)) {
1669		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1670		/* Something bad happened, let's kill all regs except FP */
1671		for (regno = 0; regno < BPF_REG_FP; regno++)
1672			__mark_reg_not_init(env, regs + regno);
1673		return;
1674	}
1675	__mark_reg_unknown(env, regs + regno);
1676}
1677
1678static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1679				struct bpf_reg_state *reg)
1680{
1681	__mark_reg_unknown(env, reg);
1682	reg->type = NOT_INIT;
1683}
1684
1685static void mark_reg_not_init(struct bpf_verifier_env *env,
1686			      struct bpf_reg_state *regs, u32 regno)
1687{
1688	if (WARN_ON(regno >= MAX_BPF_REG)) {
1689		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1690		/* Something bad happened, let's kill all regs except FP */
1691		for (regno = 0; regno < BPF_REG_FP; regno++)
1692			__mark_reg_not_init(env, regs + regno);
1693		return;
1694	}
1695	__mark_reg_not_init(env, regs + regno);
1696}
1697
1698static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1699			    struct bpf_reg_state *regs, u32 regno,
1700			    enum bpf_reg_type reg_type,
1701			    struct btf *btf, u32 btf_id,
1702			    enum bpf_type_flag flag)
1703{
1704	if (reg_type == SCALAR_VALUE) {
1705		mark_reg_unknown(env, regs, regno);
1706		return;
1707	}
1708	mark_reg_known_zero(env, regs, regno);
1709	regs[regno].type = PTR_TO_BTF_ID | flag;
1710	regs[regno].btf = btf;
1711	regs[regno].btf_id = btf_id;
1712}
1713
1714#define DEF_NOT_SUBREG	(0)
1715static void init_reg_state(struct bpf_verifier_env *env,
1716			   struct bpf_func_state *state)
1717{
1718	struct bpf_reg_state *regs = state->regs;
1719	int i;
1720
1721	for (i = 0; i < MAX_BPF_REG; i++) {
1722		mark_reg_not_init(env, regs, i);
1723		regs[i].live = REG_LIVE_NONE;
1724		regs[i].parent = NULL;
1725		regs[i].subreg_def = DEF_NOT_SUBREG;
1726	}
1727
1728	/* frame pointer */
1729	regs[BPF_REG_FP].type = PTR_TO_STACK;
1730	mark_reg_known_zero(env, regs, BPF_REG_FP);
1731	regs[BPF_REG_FP].frameno = state->frameno;
1732}
1733
1734#define BPF_MAIN_FUNC (-1)
1735static void init_func_state(struct bpf_verifier_env *env,
1736			    struct bpf_func_state *state,
1737			    int callsite, int frameno, int subprogno)
1738{
1739	state->callsite = callsite;
1740	state->frameno = frameno;
1741	state->subprogno = subprogno;
1742	init_reg_state(env, state);
1743	mark_verifier_state_scratched(env);
1744}
1745
1746/* Similar to push_stack(), but for async callbacks */
1747static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
1748						int insn_idx, int prev_insn_idx,
1749						int subprog)
1750{
1751	struct bpf_verifier_stack_elem *elem;
1752	struct bpf_func_state *frame;
1753
1754	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1755	if (!elem)
1756		goto err;
1757
1758	elem->insn_idx = insn_idx;
1759	elem->prev_insn_idx = prev_insn_idx;
1760	elem->next = env->head;
1761	elem->log_pos = env->log.len_used;
1762	env->head = elem;
1763	env->stack_size++;
1764	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1765		verbose(env,
1766			"The sequence of %d jumps is too complex for async cb.\n",
1767			env->stack_size);
1768		goto err;
1769	}
1770	/* Unlike push_stack() do not copy_verifier_state().
1771	 * The caller state doesn't matter.
1772	 * This is async callback. It starts in a fresh stack.
1773	 * Initialize it similar to do_check_common().
1774	 */
1775	elem->st.branches = 1;
1776	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
1777	if (!frame)
1778		goto err;
1779	init_func_state(env, frame,
1780			BPF_MAIN_FUNC /* callsite */,
1781			0 /* frameno within this callchain */,
1782			subprog /* subprog number within this prog */);
1783	elem->st.frame[0] = frame;
1784	return &elem->st;
1785err:
1786	free_verifier_state(env->cur_state, true);
1787	env->cur_state = NULL;
1788	/* pop all elements and return */
1789	while (!pop_stack(env, NULL, NULL, false));
1790	return NULL;
1791}
1792
1793
1794enum reg_arg_type {
1795	SRC_OP,		/* register is used as source operand */
1796	DST_OP,		/* register is used as destination operand */
1797	DST_OP_NO_MARK	/* same as above, check only, don't mark */
1798};
1799
1800static int cmp_subprogs(const void *a, const void *b)
1801{
1802	return ((struct bpf_subprog_info *)a)->start -
1803	       ((struct bpf_subprog_info *)b)->start;
1804}
1805
1806static int find_subprog(struct bpf_verifier_env *env, int off)
1807{
1808	struct bpf_subprog_info *p;
1809
1810	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1811		    sizeof(env->subprog_info[0]), cmp_subprogs);
1812	if (!p)
1813		return -ENOENT;
1814	return p - env->subprog_info;
1815
1816}
1817
1818static int add_subprog(struct bpf_verifier_env *env, int off)
1819{
1820	int insn_cnt = env->prog->len;
1821	int ret;
1822
1823	if (off >= insn_cnt || off < 0) {
1824		verbose(env, "call to invalid destination\n");
1825		return -EINVAL;
1826	}
1827	ret = find_subprog(env, off);
1828	if (ret >= 0)
1829		return ret;
1830	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1831		verbose(env, "too many subprograms\n");
1832		return -E2BIG;
1833	}
1834	/* determine subprog starts. The end is one before the next starts */
1835	env->subprog_info[env->subprog_cnt++].start = off;
1836	sort(env->subprog_info, env->subprog_cnt,
1837	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1838	return env->subprog_cnt - 1;
1839}
1840
1841#define MAX_KFUNC_DESCS 256
1842#define MAX_KFUNC_BTFS	256
1843
1844struct bpf_kfunc_desc {
1845	struct btf_func_model func_model;
1846	u32 func_id;
1847	s32 imm;
1848	u16 offset;
1849};
1850
1851struct bpf_kfunc_btf {
1852	struct btf *btf;
1853	struct module *module;
1854	u16 offset;
1855};
1856
1857struct bpf_kfunc_desc_tab {
1858	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
1859	u32 nr_descs;
1860};
1861
1862struct bpf_kfunc_btf_tab {
1863	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
1864	u32 nr_descs;
1865};
1866
1867static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
1868{
1869	const struct bpf_kfunc_desc *d0 = a;
1870	const struct bpf_kfunc_desc *d1 = b;
1871
1872	/* func_id is not greater than BTF_MAX_TYPE */
1873	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
1874}
1875
1876static int kfunc_btf_cmp_by_off(const void *a, const void *b)
1877{
1878	const struct bpf_kfunc_btf *d0 = a;
1879	const struct bpf_kfunc_btf *d1 = b;
1880
1881	return d0->offset - d1->offset;
1882}
1883
1884static const struct bpf_kfunc_desc *
1885find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
1886{
1887	struct bpf_kfunc_desc desc = {
1888		.func_id = func_id,
1889		.offset = offset,
1890	};
1891	struct bpf_kfunc_desc_tab *tab;
1892
1893	tab = prog->aux->kfunc_tab;
1894	return bsearch(&desc, tab->descs, tab->nr_descs,
1895		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
1896}
1897
1898static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
1899					 s16 offset)
1900{
1901	struct bpf_kfunc_btf kf_btf = { .offset = offset };
1902	struct bpf_kfunc_btf_tab *tab;
1903	struct bpf_kfunc_btf *b;
1904	struct module *mod;
1905	struct btf *btf;
1906	int btf_fd;
1907
1908	tab = env->prog->aux->kfunc_btf_tab;
1909	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
1910		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
1911	if (!b) {
1912		if (tab->nr_descs == MAX_KFUNC_BTFS) {
1913			verbose(env, "too many different module BTFs\n");
1914			return ERR_PTR(-E2BIG);
1915		}
1916
1917		if (bpfptr_is_null(env->fd_array)) {
1918			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
1919			return ERR_PTR(-EPROTO);
1920		}
1921
1922		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
1923					    offset * sizeof(btf_fd),
1924					    sizeof(btf_fd)))
1925			return ERR_PTR(-EFAULT);
1926
1927		btf = btf_get_by_fd(btf_fd);
1928		if (IS_ERR(btf)) {
1929			verbose(env, "invalid module BTF fd specified\n");
1930			return btf;
1931		}
1932
1933		if (!btf_is_module(btf)) {
1934			verbose(env, "BTF fd for kfunc is not a module BTF\n");
1935			btf_put(btf);
1936			return ERR_PTR(-EINVAL);
1937		}
1938
1939		mod = btf_try_get_module(btf);
1940		if (!mod) {
1941			btf_put(btf);
1942			return ERR_PTR(-ENXIO);
1943		}
1944
1945		b = &tab->descs[tab->nr_descs++];
1946		b->btf = btf;
1947		b->module = mod;
1948		b->offset = offset;
1949
1950		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1951		     kfunc_btf_cmp_by_off, NULL);
1952	}
1953	return b->btf;
1954}
1955
1956void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
1957{
1958	if (!tab)
1959		return;
1960
1961	while (tab->nr_descs--) {
1962		module_put(tab->descs[tab->nr_descs].module);
1963		btf_put(tab->descs[tab->nr_descs].btf);
1964	}
1965	kfree(tab);
1966}
1967
1968static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
1969{
1970	if (offset) {
1971		if (offset < 0) {
1972			/* In the future, this can be allowed to increase limit
1973			 * of fd index into fd_array, interpreted as u16.
1974			 */
1975			verbose(env, "negative offset disallowed for kernel module function call\n");
1976			return ERR_PTR(-EINVAL);
1977		}
1978
1979		return __find_kfunc_desc_btf(env, offset);
1980	}
1981	return btf_vmlinux ?: ERR_PTR(-ENOENT);
1982}
1983
1984static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
1985{
1986	const struct btf_type *func, *func_proto;
1987	struct bpf_kfunc_btf_tab *btf_tab;
1988	struct bpf_kfunc_desc_tab *tab;
1989	struct bpf_prog_aux *prog_aux;
1990	struct bpf_kfunc_desc *desc;
1991	const char *func_name;
1992	struct btf *desc_btf;
1993	unsigned long call_imm;
1994	unsigned long addr;
1995	int err;
1996
1997	prog_aux = env->prog->aux;
1998	tab = prog_aux->kfunc_tab;
1999	btf_tab = prog_aux->kfunc_btf_tab;
2000	if (!tab) {
2001		if (!btf_vmlinux) {
2002			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2003			return -ENOTSUPP;
2004		}
2005
2006		if (!env->prog->jit_requested) {
2007			verbose(env, "JIT is required for calling kernel function\n");
2008			return -ENOTSUPP;
2009		}
2010
2011		if (!bpf_jit_supports_kfunc_call()) {
2012			verbose(env, "JIT does not support calling kernel function\n");
2013			return -ENOTSUPP;
2014		}
2015
2016		if (!env->prog->gpl_compatible) {
2017			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2018			return -EINVAL;
2019		}
2020
2021		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
2022		if (!tab)
2023			return -ENOMEM;
2024		prog_aux->kfunc_tab = tab;
2025	}
2026
2027	/* func_id == 0 is always invalid, but instead of returning an error, be
2028	 * conservative and wait until the code elimination pass before returning
2029	 * error, so that invalid calls that get pruned out can be in BPF programs
2030	 * loaded from userspace.  It is also required that offset be untouched
2031	 * for such calls.
2032	 */
2033	if (!func_id && !offset)
2034		return 0;
2035
2036	if (!btf_tab && offset) {
2037		btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
2038		if (!btf_tab)
2039			return -ENOMEM;
2040		prog_aux->kfunc_btf_tab = btf_tab;
2041	}
2042
2043	desc_btf = find_kfunc_desc_btf(env, offset);
2044	if (IS_ERR(desc_btf)) {
2045		verbose(env, "failed to find BTF for kernel function\n");
2046		return PTR_ERR(desc_btf);
2047	}
2048
2049	if (find_kfunc_desc(env->prog, func_id, offset))
2050		return 0;
2051
2052	if (tab->nr_descs == MAX_KFUNC_DESCS) {
2053		verbose(env, "too many different kernel function calls\n");
2054		return -E2BIG;
2055	}
2056
2057	func = btf_type_by_id(desc_btf, func_id);
2058	if (!func || !btf_type_is_func(func)) {
2059		verbose(env, "kernel btf_id %u is not a function\n",
2060			func_id);
2061		return -EINVAL;
2062	}
2063	func_proto = btf_type_by_id(desc_btf, func->type);
2064	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2065		verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
2066			func_id);
2067		return -EINVAL;
2068	}
2069
2070	func_name = btf_name_by_offset(desc_btf, func->name_off);
2071	addr = kallsyms_lookup_name(func_name);
2072	if (!addr) {
2073		verbose(env, "cannot find address for kernel function %s\n",
2074			func_name);
2075		return -EINVAL;
2076	}
2077
2078	call_imm = BPF_CALL_IMM(addr);
2079	/* Check whether or not the relative offset overflows desc->imm */
2080	if ((unsigned long)(s32)call_imm != call_imm) {
2081		verbose(env, "address of kernel function %s is out of range\n",
2082			func_name);
2083		return -EINVAL;
2084	}
2085
2086	desc = &tab->descs[tab->nr_descs++];
2087	desc->func_id = func_id;
2088	desc->imm = call_imm;
2089	desc->offset = offset;
2090	err = btf_distill_func_proto(&env->log, desc_btf,
2091				     func_proto, func_name,
2092				     &desc->func_model);
2093	if (!err)
2094		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2095		     kfunc_desc_cmp_by_id_off, NULL);
2096	return err;
2097}
2098
2099static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
2100{
2101	const struct bpf_kfunc_desc *d0 = a;
2102	const struct bpf_kfunc_desc *d1 = b;
2103
2104	if (d0->imm > d1->imm)
2105		return 1;
2106	else if (d0->imm < d1->imm)
2107		return -1;
2108	return 0;
2109}
2110
2111static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
2112{
2113	struct bpf_kfunc_desc_tab *tab;
2114
2115	tab = prog->aux->kfunc_tab;
2116	if (!tab)
2117		return;
2118
2119	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2120	     kfunc_desc_cmp_by_imm, NULL);
2121}
2122
2123bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2124{
2125	return !!prog->aux->kfunc_tab;
2126}
2127
2128const struct btf_func_model *
2129bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
2130			 const struct bpf_insn *insn)
2131{
2132	const struct bpf_kfunc_desc desc = {
2133		.imm = insn->imm,
2134	};
2135	const struct bpf_kfunc_desc *res;
2136	struct bpf_kfunc_desc_tab *tab;
2137
2138	tab = prog->aux->kfunc_tab;
2139	res = bsearch(&desc, tab->descs, tab->nr_descs,
2140		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
2141
2142	return res ? &res->func_model : NULL;
2143}
2144
2145static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2146{
2147	struct bpf_subprog_info *subprog = env->subprog_info;
2148	struct bpf_insn *insn = env->prog->insnsi;
2149	int i, ret, insn_cnt = env->prog->len;
2150
2151	/* Add entry function. */
2152	ret = add_subprog(env, 0);
2153	if (ret)
2154		return ret;
2155
2156	for (i = 0; i < insn_cnt; i++, insn++) {
2157		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2158		    !bpf_pseudo_kfunc_call(insn))
2159			continue;
2160
2161		if (!env->bpf_capable) {
2162			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2163			return -EPERM;
2164		}
2165
2166		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2167			ret = add_subprog(env, i + insn->imm + 1);
2168		else
2169			ret = add_kfunc_call(env, insn->imm, insn->off);
2170
2171		if (ret < 0)
2172			return ret;
2173	}
2174
2175	/* Add a fake 'exit' subprog which could simplify subprog iteration
2176	 * logic. 'subprog_cnt' should not be increased.
2177	 */
2178	subprog[env->subprog_cnt].start = insn_cnt;
2179
2180	if (env->log.level & BPF_LOG_LEVEL2)
2181		for (i = 0; i < env->subprog_cnt; i++)
2182			verbose(env, "func#%d @%d\n", i, subprog[i].start);
2183
2184	return 0;
2185}
2186
2187static int check_subprogs(struct bpf_verifier_env *env)
2188{
2189	int i, subprog_start, subprog_end, off, cur_subprog = 0;
2190	struct bpf_subprog_info *subprog = env->subprog_info;
2191	struct bpf_insn *insn = env->prog->insnsi;
2192	int insn_cnt = env->prog->len;
2193
2194	/* now check that all jumps are within the same subprog */
2195	subprog_start = subprog[cur_subprog].start;
2196	subprog_end = subprog[cur_subprog + 1].start;
2197	for (i = 0; i < insn_cnt; i++) {
2198		u8 code = insn[i].code;
2199
2200		if (code == (BPF_JMP | BPF_CALL) &&
2201		    insn[i].imm == BPF_FUNC_tail_call &&
2202		    insn[i].src_reg != BPF_PSEUDO_CALL)
2203			subprog[cur_subprog].has_tail_call = true;
2204		if (BPF_CLASS(code) == BPF_LD &&
2205		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2206			subprog[cur_subprog].has_ld_abs = true;
2207		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2208			goto next;
2209		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2210			goto next;
2211		off = i + insn[i].off + 1;
2212		if (off < subprog_start || off >= subprog_end) {
2213			verbose(env, "jump out of range from insn %d to %d\n", i, off);
2214			return -EINVAL;
2215		}
2216next:
2217		if (i == subprog_end - 1) {
2218			/* to avoid fall-through from one subprog into another
2219			 * the last insn of the subprog should be either exit
2220			 * or unconditional jump back
2221			 */
2222			if (code != (BPF_JMP | BPF_EXIT) &&
2223			    code != (BPF_JMP | BPF_JA)) {
2224				verbose(env, "last insn is not an exit or jmp\n");
2225				return -EINVAL;
2226			}
2227			subprog_start = subprog_end;
2228			cur_subprog++;
2229			if (cur_subprog < env->subprog_cnt)
2230				subprog_end = subprog[cur_subprog + 1].start;
2231		}
2232	}
2233	return 0;
2234}
2235
2236/* Parentage chain of this register (or stack slot) should take care of all
2237 * issues like callee-saved registers, stack slot allocation time, etc.
2238 */
2239static int mark_reg_read(struct bpf_verifier_env *env,
2240			 const struct bpf_reg_state *state,
2241			 struct bpf_reg_state *parent, u8 flag)
2242{
2243	bool writes = parent == state->parent; /* Observe write marks */
2244	int cnt = 0;
2245
2246	while (parent) {
2247		/* if read wasn't screened by an earlier write ... */
2248		if (writes && state->live & REG_LIVE_WRITTEN)
2249			break;
2250		if (parent->live & REG_LIVE_DONE) {
2251			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
2252				reg_type_str(env, parent->type),
2253				parent->var_off.value, parent->off);
2254			return -EFAULT;
2255		}
2256		/* The first condition is more likely to be true than the
2257		 * second, checked it first.
2258		 */
2259		if ((parent->live & REG_LIVE_READ) == flag ||
2260		    parent->live & REG_LIVE_READ64)
2261			/* The parentage chain never changes and
2262			 * this parent was already marked as LIVE_READ.
2263			 * There is no need to keep walking the chain again and
2264			 * keep re-marking all parents as LIVE_READ.
2265			 * This case happens when the same register is read
2266			 * multiple times without writes into it in-between.
2267			 * Also, if parent has the stronger REG_LIVE_READ64 set,
2268			 * then no need to set the weak REG_LIVE_READ32.
2269			 */
2270			break;
2271		/* ... then we depend on parent's value */
2272		parent->live |= flag;
2273		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2274		if (flag == REG_LIVE_READ64)
2275			parent->live &= ~REG_LIVE_READ32;
2276		state = parent;
2277		parent = state->parent;
2278		writes = true;
2279		cnt++;
2280	}
2281
2282	if (env->longest_mark_read_walk < cnt)
2283		env->longest_mark_read_walk = cnt;
2284	return 0;
2285}
2286
2287/* This function is supposed to be used by the following 32-bit optimization
2288 * code only. It returns TRUE if the source or destination register operates
2289 * on 64-bit, otherwise return FALSE.
2290 */
2291static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2292		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2293{
2294	u8 code, class, op;
2295
2296	code = insn->code;
2297	class = BPF_CLASS(code);
2298	op = BPF_OP(code);
2299	if (class == BPF_JMP) {
2300		/* BPF_EXIT for "main" will reach here. Return TRUE
2301		 * conservatively.
2302		 */
2303		if (op == BPF_EXIT)
2304			return true;
2305		if (op == BPF_CALL) {
2306			/* BPF to BPF call will reach here because of marking
2307			 * caller saved clobber with DST_OP_NO_MARK for which we
2308			 * don't care the register def because they are anyway
2309			 * marked as NOT_INIT already.
2310			 */
2311			if (insn->src_reg == BPF_PSEUDO_CALL)
2312				return false;
2313			/* Helper call will reach here because of arg type
2314			 * check, conservatively return TRUE.
2315			 */
2316			if (t == SRC_OP)
2317				return true;
2318
2319			return false;
2320		}
2321	}
2322
2323	if (class == BPF_ALU64 || class == BPF_JMP ||
2324	    /* BPF_END always use BPF_ALU class. */
2325	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
2326		return true;
2327
2328	if (class == BPF_ALU || class == BPF_JMP32)
2329		return false;
2330
2331	if (class == BPF_LDX) {
2332		if (t != SRC_OP)
2333			return BPF_SIZE(code) == BPF_DW;
2334		/* LDX source must be ptr. */
2335		return true;
2336	}
2337
2338	if (class == BPF_STX) {
2339		/* BPF_STX (including atomic variants) has multiple source
2340		 * operands, one of which is a ptr. Check whether the caller is
2341		 * asking about it.
2342		 */
2343		if (t == SRC_OP && reg->type != SCALAR_VALUE)
2344			return true;
2345		return BPF_SIZE(code) == BPF_DW;
2346	}
2347
2348	if (class == BPF_LD) {
2349		u8 mode = BPF_MODE(code);
2350
2351		/* LD_IMM64 */
2352		if (mode == BPF_IMM)
2353			return true;
2354
2355		/* Both LD_IND and LD_ABS return 32-bit data. */
2356		if (t != SRC_OP)
2357			return  false;
2358
2359		/* Implicit ctx ptr. */
2360		if (regno == BPF_REG_6)
2361			return true;
2362
2363		/* Explicit source could be any width. */
2364		return true;
2365	}
2366
2367	if (class == BPF_ST)
2368		/* The only source register for BPF_ST is a ptr. */
2369		return true;
2370
2371	/* Conservatively return true at default. */
2372	return true;
2373}
2374
2375/* Return the regno defined by the insn, or -1. */
2376static int insn_def_regno(const struct bpf_insn *insn)
2377{
2378	switch (BPF_CLASS(insn->code)) {
2379	case BPF_JMP:
2380	case BPF_JMP32:
2381	case BPF_ST:
2382		return -1;
2383	case BPF_STX:
2384		if (BPF_MODE(insn->code) == BPF_ATOMIC &&
2385		    (insn->imm & BPF_FETCH)) {
2386			if (insn->imm == BPF_CMPXCHG)
2387				return BPF_REG_0;
2388			else
2389				return insn->src_reg;
2390		} else {
2391			return -1;
2392		}
2393	default:
2394		return insn->dst_reg;
2395	}
2396}
2397
2398/* Return TRUE if INSN has defined any 32-bit value explicitly. */
2399static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
2400{
2401	int dst_reg = insn_def_regno(insn);
2402
2403	if (dst_reg == -1)
2404		return false;
2405
2406	return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
2407}
2408
2409static void mark_insn_zext(struct bpf_verifier_env *env,
2410			   struct bpf_reg_state *reg)
2411{
2412	s32 def_idx = reg->subreg_def;
2413
2414	if (def_idx == DEF_NOT_SUBREG)
2415		return;
2416
2417	env->insn_aux_data[def_idx - 1].zext_dst = true;
2418	/* The dst will be zero extended, so won't be sub-register anymore. */
2419	reg->subreg_def = DEF_NOT_SUBREG;
2420}
2421
2422static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
2423			 enum reg_arg_type t)
2424{
2425	struct bpf_verifier_state *vstate = env->cur_state;
2426	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2427	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
2428	struct bpf_reg_state *reg, *regs = state->regs;
2429	bool rw64;
2430
2431	if (regno >= MAX_BPF_REG) {
2432		verbose(env, "R%d is invalid\n", regno);
2433		return -EINVAL;
2434	}
2435
2436	mark_reg_scratched(env, regno);
2437
2438	reg = &regs[regno];
2439	rw64 = is_reg64(env, insn, regno, reg, t);
2440	if (t == SRC_OP) {
2441		/* check whether register used as source operand can be read */
2442		if (reg->type == NOT_INIT) {
2443			verbose(env, "R%d !read_ok\n", regno);
2444			return -EACCES;
2445		}
2446		/* We don't need to worry about FP liveness because it's read-only */
2447		if (regno == BPF_REG_FP)
2448			return 0;
2449
2450		if (rw64)
2451			mark_insn_zext(env, reg);
2452
2453		return mark_reg_read(env, reg, reg->parent,
2454				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
2455	} else {
2456		/* check whether register used as dest operand can be written to */
2457		if (regno == BPF_REG_FP) {
2458			verbose(env, "frame pointer is read only\n");
2459			return -EACCES;
2460		}
2461		reg->live |= REG_LIVE_WRITTEN;
2462		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
2463		if (t == DST_OP)
2464			mark_reg_unknown(env, regs, regno);
2465	}
2466	return 0;
2467}
2468
2469/* for any branch, call, exit record the history of jmps in the given state */
2470static int push_jmp_history(struct bpf_verifier_env *env,
2471			    struct bpf_verifier_state *cur)
2472{
2473	u32 cnt = cur->jmp_history_cnt;
2474	struct bpf_idx_pair *p;
2475
2476	cnt++;
2477	p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
2478	if (!p)
2479		return -ENOMEM;
2480	p[cnt - 1].idx = env->insn_idx;
2481	p[cnt - 1].prev_idx = env->prev_insn_idx;
2482	cur->jmp_history = p;
2483	cur->jmp_history_cnt = cnt;
2484	return 0;
2485}
2486
2487/* Backtrack one insn at a time. If idx is not at the top of recorded
2488 * history then previous instruction came from straight line execution.
2489 */
2490static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
2491			     u32 *history)
2492{
2493	u32 cnt = *history;
2494
2495	if (cnt && st->jmp_history[cnt - 1].idx == i) {
2496		i = st->jmp_history[cnt - 1].prev_idx;
2497		(*history)--;
2498	} else {
2499		i--;
2500	}
2501	return i;
2502}
2503
2504static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
2505{
2506	const struct btf_type *func;
2507	struct btf *desc_btf;
2508
2509	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
2510		return NULL;
2511
2512	desc_btf = find_kfunc_desc_btf(data, insn->off);
2513	if (IS_ERR(desc_btf))
2514		return "<error>";
2515
2516	func = btf_type_by_id(desc_btf, insn->imm);
2517	return btf_name_by_offset(desc_btf, func->name_off);
2518}
2519
2520/* For given verifier state backtrack_insn() is called from the last insn to
2521 * the first insn. Its purpose is to compute a bitmask of registers and
2522 * stack slots that needs precision in the parent verifier state.
2523 */
2524static int backtrack_insn(struct bpf_verifier_env *env, int idx,
2525			  u32 *reg_mask, u64 *stack_mask)
2526{
2527	const struct bpf_insn_cbs cbs = {
2528		.cb_call	= disasm_kfunc_name,
2529		.cb_print	= verbose,
2530		.private_data	= env,
2531	};
2532	struct bpf_insn *insn = env->prog->insnsi + idx;
2533	u8 class = BPF_CLASS(insn->code);
2534	u8 opcode = BPF_OP(insn->code);
2535	u8 mode = BPF_MODE(insn->code);
2536	u32 dreg = 1u << insn->dst_reg;
2537	u32 sreg = 1u << insn->src_reg;
2538	u32 spi;
2539
2540	if (insn->code == 0)
2541		return 0;
2542	if (env->log.level & BPF_LOG_LEVEL2) {
2543		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
2544		verbose(env, "%d: ", idx);
2545		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2546	}
2547
2548	if (class == BPF_ALU || class == BPF_ALU64) {
2549		if (!(*reg_mask & dreg))
2550			return 0;
2551		if (opcode == BPF_MOV) {
2552			if (BPF_SRC(insn->code) == BPF_X) {
2553				/* dreg = sreg
2554				 * dreg needs precision after this insn
2555				 * sreg needs precision before this insn
2556				 */
2557				*reg_mask &= ~dreg;
2558				*reg_mask |= sreg;
2559			} else {
2560				/* dreg = K
2561				 * dreg needs precision after this insn.
2562				 * Corresponding register is already marked
2563				 * as precise=true in this verifier state.
2564				 * No further markings in parent are necessary
2565				 */
2566				*reg_mask &= ~dreg;
2567			}
2568		} else {
2569			if (BPF_SRC(insn->code) == BPF_X) {
2570				/* dreg += sreg
2571				 * both dreg and sreg need precision
2572				 * before this insn
2573				 */
2574				*reg_mask |= sreg;
2575			} /* else dreg += K
2576			   * dreg still needs precision before this insn
2577			   */
2578		}
2579	} else if (class == BPF_LDX) {
2580		if (!(*reg_mask & dreg))
2581			return 0;
2582		*reg_mask &= ~dreg;
2583
2584		/* scalars can only be spilled into stack w/o losing precision.
2585		 * Load from any other memory can be zero extended.
2586		 * The desire to keep that precision is already indicated
2587		 * by 'precise' mark in corresponding register of this state.
2588		 * No further tracking necessary.
2589		 */
2590		if (insn->src_reg != BPF_REG_FP)
2591			return 0;
2592
2593		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
2594		 * that [fp - off] slot contains scalar that needs to be
2595		 * tracked with precision
2596		 */
2597		spi = (-insn->off - 1) / BPF_REG_SIZE;
2598		if (spi >= 64) {
2599			verbose(env, "BUG spi %d\n", spi);
2600			WARN_ONCE(1, "verifier backtracking bug");
2601			return -EFAULT;
2602		}
2603		*stack_mask |= 1ull << spi;
2604	} else if (class == BPF_STX || class == BPF_ST) {
2605		if (*reg_mask & dreg)
2606			/* stx & st shouldn't be using _scalar_ dst_reg
2607			 * to access memory. It means backtracking
2608			 * encountered a case of pointer subtraction.
2609			 */
2610			return -ENOTSUPP;
2611		/* scalars can only be spilled into stack */
2612		if (insn->dst_reg != BPF_REG_FP)
2613			return 0;
2614		spi = (-insn->off - 1) / BPF_REG_SIZE;
2615		if (spi >= 64) {
2616			verbose(env, "BUG spi %d\n", spi);
2617			WARN_ONCE(1, "verifier backtracking bug");
2618			return -EFAULT;
2619		}
2620		if (!(*stack_mask & (1ull << spi)))
2621			return 0;
2622		*stack_mask &= ~(1ull << spi);
2623		if (class == BPF_STX)
2624			*reg_mask |= sreg;
2625	} else if (class == BPF_JMP || class == BPF_JMP32) {
2626		if (opcode == BPF_CALL) {
2627			if (insn->src_reg == BPF_PSEUDO_CALL)
2628				return -ENOTSUPP;
2629			/* regular helper call sets R0 */
2630			*reg_mask &= ~1;
2631			if (*reg_mask & 0x3f) {
2632				/* if backtracing was looking for registers R1-R5
2633				 * they should have been found already.
2634				 */
2635				verbose(env, "BUG regs %x\n", *reg_mask);
2636				WARN_ONCE(1, "verifier backtracking bug");
2637				return -EFAULT;
2638			}
2639		} else if (opcode == BPF_EXIT) {
2640			return -ENOTSUPP;
2641		}
2642	} else if (class == BPF_LD) {
2643		if (!(*reg_mask & dreg))
2644			return 0;
2645		*reg_mask &= ~dreg;
2646		/* It's ld_imm64 or ld_abs or ld_ind.
2647		 * For ld_imm64 no further tracking of precision
2648		 * into parent is necessary
2649		 */
2650		if (mode == BPF_IND || mode == BPF_ABS)
2651			/* to be analyzed */
2652			return -ENOTSUPP;
2653	}
2654	return 0;
2655}
2656
2657/* the scalar precision tracking algorithm:
2658 * . at the start all registers have precise=false.
2659 * . scalar ranges are tracked as normal through alu and jmp insns.
2660 * . once precise value of the scalar register is used in:
2661 *   .  ptr + scalar alu
2662 *   . if (scalar cond K|scalar)
2663 *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
2664 *   backtrack through the verifier states and mark all registers and
2665 *   stack slots with spilled constants that these scalar regisers
2666 *   should be precise.
2667 * . during state pruning two registers (or spilled stack slots)
2668 *   are equivalent if both are not precise.
2669 *
2670 * Note the verifier cannot simply walk register parentage chain,
2671 * since many different registers and stack slots could have been
2672 * used to compute single precise scalar.
2673 *
2674 * The approach of starting with precise=true for all registers and then
2675 * backtrack to mark a register as not precise when the verifier detects
2676 * that program doesn't care about specific value (e.g., when helper
2677 * takes register as ARG_ANYTHING parameter) is not safe.
2678 *
2679 * It's ok to walk single parentage chain of the verifier states.
2680 * It's possible that this backtracking will go all the way till 1st insn.
2681 * All other branches will be explored for needing precision later.
2682 *
2683 * The backtracking needs to deal with cases like:
2684 *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
2685 * r9 -= r8
2686 * r5 = r9
2687 * if r5 > 0x79f goto pc+7
2688 *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
2689 * r5 += 1
2690 * ...
2691 * call bpf_perf_event_output#25
2692 *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
2693 *
2694 * and this case:
2695 * r6 = 1
2696 * call foo // uses callee's r6 inside to compute r0
2697 * r0 += r6
2698 * if r0 == 0 goto
2699 *
2700 * to track above reg_mask/stack_mask needs to be independent for each frame.
2701 *
2702 * Also if parent's curframe > frame where backtracking started,
2703 * the verifier need to mark registers in both frames, otherwise callees
2704 * may incorrectly prune callers. This is similar to
2705 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
2706 *
2707 * For now backtracking falls back into conservative marking.
2708 */
2709static void mark_all_scalars_precise(struct bpf_verifier_env *env,
2710				     struct bpf_verifier_state *st)
2711{
2712	struct bpf_func_state *func;
2713	struct bpf_reg_state *reg;
2714	int i, j;
2715
2716	/* big hammer: mark all scalars precise in this path.
2717	 * pop_stack may still get !precise scalars.
2718	 */
2719	for (; st; st = st->parent)
2720		for (i = 0; i <= st->curframe; i++) {
2721			func = st->frame[i];
2722			for (j = 0; j < BPF_REG_FP; j++) {
2723				reg = &func->regs[j];
2724				if (reg->type != SCALAR_VALUE)
2725					continue;
2726				reg->precise = true;
2727			}
2728			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2729				if (!is_spilled_reg(&func->stack[j]))
2730					continue;
2731				reg = &func->stack[j].spilled_ptr;
2732				if (reg->type != SCALAR_VALUE)
2733					continue;
2734				reg->precise = true;
2735			}
2736		}
2737}
2738
2739static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2740				  int spi)
2741{
2742	struct bpf_verifier_state *st = env->cur_state;
2743	int first_idx = st->first_insn_idx;
2744	int last_idx = env->insn_idx;
2745	struct bpf_func_state *func;
2746	struct bpf_reg_state *reg;
2747	u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2748	u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2749	bool skip_first = true;
2750	bool new_marks = false;
2751	int i, err;
2752
2753	if (!env->bpf_capable)
2754		return 0;
2755
2756	func = st->frame[st->curframe];
2757	if (regno >= 0) {
2758		reg = &func->regs[regno];
2759		if (reg->type != SCALAR_VALUE) {
2760			WARN_ONCE(1, "backtracing misuse");
2761			return -EFAULT;
2762		}
2763		if (!reg->precise)
2764			new_marks = true;
2765		else
2766			reg_mask = 0;
2767		reg->precise = true;
2768	}
2769
2770	while (spi >= 0) {
2771		if (!is_spilled_reg(&func->stack[spi])) {
2772			stack_mask = 0;
2773			break;
2774		}
2775		reg = &func->stack[spi].spilled_ptr;
2776		if (reg->type != SCALAR_VALUE) {
2777			stack_mask = 0;
2778			break;
2779		}
2780		if (!reg->precise)
2781			new_marks = true;
2782		else
2783			stack_mask = 0;
2784		reg->precise = true;
2785		break;
2786	}
2787
2788	if (!new_marks)
2789		return 0;
2790	if (!reg_mask && !stack_mask)
2791		return 0;
2792	for (;;) {
2793		DECLARE_BITMAP(mask, 64);
2794		u32 history = st->jmp_history_cnt;
2795
2796		if (env->log.level & BPF_LOG_LEVEL2)
2797			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2798		for (i = last_idx;;) {
2799			if (skip_first) {
2800				err = 0;
2801				skip_first = false;
2802			} else {
2803				err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2804			}
2805			if (err == -ENOTSUPP) {
2806				mark_all_scalars_precise(env, st);
2807				return 0;
2808			} else if (err) {
2809				return err;
2810			}
2811			if (!reg_mask && !stack_mask)
2812				/* Found assignment(s) into tracked register in this state.
2813				 * Since this state is already marked, just return.
2814				 * Nothing to be tracked further in the parent state.
2815				 */
2816				return 0;
2817			if (i == first_idx)
2818				break;
2819			i = get_prev_insn_idx(st, i, &history);
2820			if (i >= env->prog->len) {
2821				/* This can happen if backtracking reached insn 0
2822				 * and there are still reg_mask or stack_mask
2823				 * to backtrack.
2824				 * It means the backtracking missed the spot where
2825				 * particular register was initialized with a constant.
2826				 */
2827				verbose(env, "BUG backtracking idx %d\n", i);
2828				WARN_ONCE(1, "verifier backtracking bug");
2829				return -EFAULT;
2830			}
2831		}
2832		st = st->parent;
2833		if (!st)
2834			break;
2835
2836		new_marks = false;
2837		func = st->frame[st->curframe];
2838		bitmap_from_u64(mask, reg_mask);
2839		for_each_set_bit(i, mask, 32) {
2840			reg = &func->regs[i];
2841			if (reg->type != SCALAR_VALUE) {
2842				reg_mask &= ~(1u << i);
2843				continue;
2844			}
2845			if (!reg->precise)
2846				new_marks = true;
2847			reg->precise = true;
2848		}
2849
2850		bitmap_from_u64(mask, stack_mask);
2851		for_each_set_bit(i, mask, 64) {
2852			if (i >= func->allocated_stack / BPF_REG_SIZE) {
2853				/* the sequence of instructions:
2854				 * 2: (bf) r3 = r10
2855				 * 3: (7b) *(u64 *)(r3 -8) = r0
2856				 * 4: (79) r4 = *(u64 *)(r10 -8)
2857				 * doesn't contain jmps. It's backtracked
2858				 * as a single block.
2859				 * During backtracking insn 3 is not recognized as
2860				 * stack access, so at the end of backtracking
2861				 * stack slot fp-8 is still marked in stack_mask.
2862				 * However the parent state may not have accessed
2863				 * fp-8 and it's "unallocated" stack space.
2864				 * In such case fallback to conservative.
2865				 */
2866				mark_all_scalars_precise(env, st);
2867				return 0;
2868			}
2869
2870			if (!is_spilled_reg(&func->stack[i])) {
2871				stack_mask &= ~(1ull << i);
2872				continue;
2873			}
2874			reg = &func->stack[i].spilled_ptr;
2875			if (reg->type != SCALAR_VALUE) {
2876				stack_mask &= ~(1ull << i);
2877				continue;
2878			}
2879			if (!reg->precise)
2880				new_marks = true;
2881			reg->precise = true;
2882		}
2883		if (env->log.level & BPF_LOG_LEVEL2) {
2884			verbose(env, "parent %s regs=%x stack=%llx marks:",
2885				new_marks ? "didn't have" : "already had",
2886				reg_mask, stack_mask);
2887			print_verifier_state(env, func, true);
2888		}
2889
2890		if (!reg_mask && !stack_mask)
2891			break;
2892		if (!new_marks)
2893			break;
2894
2895		last_idx = st->last_insn_idx;
2896		first_idx = st->first_insn_idx;
2897	}
2898	return 0;
2899}
2900
2901static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2902{
2903	return __mark_chain_precision(env, regno, -1);
2904}
2905
2906static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2907{
2908	return __mark_chain_precision(env, -1, spi);
2909}
2910
2911static bool is_spillable_regtype(enum bpf_reg_type type)
2912{
2913	switch (base_type(type)) {
2914	case PTR_TO_MAP_VALUE:
2915	case PTR_TO_STACK:
2916	case PTR_TO_CTX:
2917	case PTR_TO_PACKET:
2918	case PTR_TO_PACKET_META:
2919	case PTR_TO_PACKET_END:
2920	case PTR_TO_FLOW_KEYS:
2921	case CONST_PTR_TO_MAP:
2922	case PTR_TO_SOCKET:
2923	case PTR_TO_SOCK_COMMON:
2924	case PTR_TO_TCP_SOCK:
2925	case PTR_TO_XDP_SOCK:
2926	case PTR_TO_BTF_ID:
2927	case PTR_TO_BUF:
2928	case PTR_TO_MEM:
2929	case PTR_TO_FUNC:
2930	case PTR_TO_MAP_KEY:
2931		return true;
2932	default:
2933		return false;
2934	}
2935}
2936
2937/* Does this register contain a constant zero? */
2938static bool register_is_null(struct bpf_reg_state *reg)
2939{
2940	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2941}
2942
2943static bool register_is_const(struct bpf_reg_state *reg)
2944{
2945	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2946}
2947
2948static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2949{
2950	return tnum_is_unknown(reg->var_off) &&
2951	       reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2952	       reg->umin_value == 0 && reg->umax_value == U64_MAX &&
2953	       reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
2954	       reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2955}
2956
2957static bool register_is_bounded(struct bpf_reg_state *reg)
2958{
2959	return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2960}
2961
2962static bool __is_pointer_value(bool allow_ptr_leaks,
2963			       const struct bpf_reg_state *reg)
2964{
2965	if (allow_ptr_leaks)
2966		return false;
2967
2968	return reg->type != SCALAR_VALUE;
2969}
2970
2971static void save_register_state(struct bpf_func_state *state,
2972				int spi, struct bpf_reg_state *reg,
2973				int size)
2974{
2975	int i;
2976
2977	state->stack[spi].spilled_ptr = *reg;
2978	if (size == BPF_REG_SIZE)
2979		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2980
2981	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
2982		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
2983
2984	/* size < 8 bytes spill */
2985	for (; i; i--)
2986		scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
2987}
2988
2989/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2990 * stack boundary and alignment are checked in check_mem_access()
2991 */
2992static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2993				       /* stack frame we're writing to */
2994				       struct bpf_func_state *state,
2995				       int off, int size, int value_regno,
2996				       int insn_idx)
2997{
2998	struct bpf_func_state *cur; /* state of the current function */
2999	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
3000	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
3001	struct bpf_reg_state *reg = NULL;
3002
3003	err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
3004	if (err)
3005		return err;
3006	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
3007	 * so it's aligned access and [off, off + size) are within stack limits
3008	 */
3009	if (!env->allow_ptr_leaks &&
3010	    state->stack[spi].slot_type[0] == STACK_SPILL &&
3011	    size != BPF_REG_SIZE) {
3012		verbose(env, "attempt to corrupt spilled pointer on stack\n");
3013		return -EACCES;
3014	}
3015
3016	cur = env->cur_state->frame[env->cur_state->curframe];
3017	if (value_regno >= 0)
3018		reg = &cur->regs[value_regno];
3019	if (!env->bypass_spec_v4) {
3020		bool sanitize = reg && is_spillable_regtype(reg->type);
3021
3022		for (i = 0; i < size; i++) {
3023			if (state->stack[spi].slot_type[i] == STACK_INVALID) {
3024				sanitize = true;
3025				break;
3026			}
3027		}
3028
3029		if (sanitize)
3030			env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
3031	}
3032
3033	mark_stack_slot_scratched(env, spi);
3034	if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
3035	    !register_is_null(reg) && env->bpf_capable) {
3036		if (dst_reg != BPF_REG_FP) {
3037			/* The backtracking logic can only recognize explicit
3038			 * stack slot address like [fp - 8]. Other spill of
3039			 * scalar via different register has to be conservative.
3040			 * Backtrack from here and mark all registers as precise
3041			 * that contributed into 'reg' being a constant.
3042			 */
3043			err = mark_chain_precision(env, value_regno);
3044			if (err)
3045				return err;
3046		}
3047		save_register_state(state, spi, reg, size);
3048	} else if (reg && is_spillable_regtype(reg->type)) {
3049		/* register containing pointer is being spilled into stack */
3050		if (size != BPF_REG_SIZE) {
3051			verbose_linfo(env, insn_idx, "; ");
3052			verbose(env, "invalid size of register spill\n");
3053			return -EACCES;
3054		}
3055		if (state != cur && reg->type == PTR_TO_STACK) {
3056			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
3057			return -EINVAL;
3058		}
3059		save_register_state(state, spi, reg, size);
3060	} else {
3061		u8 type = STACK_MISC;
3062
3063		/* regular write of data into stack destroys any spilled ptr */
3064		state->stack[spi].spilled_ptr.type = NOT_INIT;
3065		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
3066		if (is_spilled_reg(&state->stack[spi]))
3067			for (i = 0; i < BPF_REG_SIZE; i++)
3068				scrub_spilled_slot(&state->stack[spi].slot_type[i]);
3069
3070		/* only mark the slot as written if all 8 bytes were written
3071		 * otherwise read propagation may incorrectly stop too soon
3072		 * when stack slots are partially written.
3073		 * This heuristic means that read propagation will be
3074		 * conservative, since it will add reg_live_read marks
3075		 * to stack slots all the way to first state when programs
3076		 * writes+reads less than 8 bytes
3077		 */
3078		if (size == BPF_REG_SIZE)
3079			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
3080
3081		/* when we zero initialize stack slots mark them as such */
3082		if (reg && register_is_null(reg)) {
3083			/* backtracking doesn't work for STACK_ZERO yet. */
3084			err = mark_chain_precision(env, value_regno);
3085			if (err)
3086				return err;
3087			type = STACK_ZERO;
3088		}
3089
3090		/* Mark slots affected by this stack write. */
3091		for (i = 0; i < size; i++)
3092			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
3093				type;
3094	}
3095	return 0;
3096}
3097
3098/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
3099 * known to contain a variable offset.
3100 * This function checks whether the write is permitted and conservatively
3101 * tracks the effects of the write, considering that each stack slot in the
3102 * dynamic range is potentially written to.
3103 *
3104 * 'off' includes 'regno->off'.
3105 * 'value_regno' can be -1, meaning that an unknown value is being written to
3106 * the stack.
3107 *
3108 * Spilled pointers in range are not marked as written because we don't know
3109 * what's going to be actually written. This means that read propagation for
3110 * future reads cannot be terminated by this write.
3111 *
3112 * For privileged programs, uninitialized stack slots are considered
3113 * initialized by this write (even though we don't know exactly what offsets
3114 * are going to be written to). The idea is that we don't want the verifier to
3115 * reject future reads that access slots written to through variable offsets.
3116 */
3117static int check_stack_write_var_off(struct bpf_verifier_env *env,
3118				     /* func where register points to */
3119				     struct bpf_func_state *state,
3120				     int ptr_regno, int off, int size,
3121				     int value_regno, int insn_idx)
3122{
3123	struct bpf_func_state *cur; /* state of the current function */
3124	int min_off, max_off;
3125	int i, err;
3126	struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
3127	bool writing_zero = false;
3128	/* set if the fact that we're writing a zero is used to let any
3129	 * stack slots remain STACK_ZERO
3130	 */
3131	bool zero_used = false;
3132
3133	cur = env->cur_state->frame[env->cur_state->curframe];
3134	ptr_reg = &cur->regs[ptr_regno];
3135	min_off = ptr_reg->smin_value + off;
3136	max_off = ptr_reg->smax_value + off + size;
3137	if (value_regno >= 0)
3138		value_reg = &cur->regs[value_regno];
3139	if (value_reg && register_is_null(value_reg))
3140		writing_zero = true;
3141
3142	err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
3143	if (err)
3144		return err;
3145
3146
3147	/* Variable offset writes destroy any spilled pointers in range. */
3148	for (i = min_off; i < max_off; i++) {
3149		u8 new_type, *stype;
3150		int slot, spi;
3151
3152		slot = -i - 1;
3153		spi = slot / BPF_REG_SIZE;
3154		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3155		mark_stack_slot_scratched(env, spi);
3156
3157		if (!env->allow_ptr_leaks
3158				&& *stype != NOT_INIT
3159				&& *stype != SCALAR_VALUE) {
3160			/* Reject the write if there's are spilled pointers in
3161			 * range. If we didn't reject here, the ptr status
3162			 * would be erased below (even though not all slots are
3163			 * actually overwritten), possibly opening the door to
3164			 * leaks.
3165			 */
3166			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3167				insn_idx, i);
3168			return -EINVAL;
3169		}
3170
3171		/* Erase all spilled pointers. */
3172		state->stack[spi].spilled_ptr.type = NOT_INIT;
3173
3174		/* Update the slot type. */
3175		new_type = STACK_MISC;
3176		if (writing_zero && *stype == STACK_ZERO) {
3177			new_type = STACK_ZERO;
3178			zero_used = true;
3179		}
3180		/* If the slot is STACK_INVALID, we check whether it's OK to
3181		 * pretend that it will be initialized by this write. The slot
3182		 * might not actually be written to, and so if we mark it as
3183		 * initialized future reads might leak uninitialized memory.
3184		 * For privileged programs, we will accept such reads to slots
3185		 * that may or may not be written because, if we're reject
3186		 * them, the error would be too confusing.
3187		 */
3188		if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
3189			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3190					insn_idx, i);
3191			return -EINVAL;
3192		}
3193		*stype = new_type;
3194	}
3195	if (zero_used) {
3196		/* backtracking doesn't work for STACK_ZERO yet. */
3197		err = mark_chain_precision(env, value_regno);
3198		if (err)
3199			return err;
3200	}
3201	return 0;
3202}
3203
3204/* When register 'dst_regno' is assigned some values from stack[min_off,
3205 * max_off), we set the register's type according to the types of the
3206 * respective stack slots. If all the stack values are known to be zeros, then
3207 * so is the destination reg. Otherwise, the register is considered to be
3208 * SCALAR. This function does not deal with register filling; the caller must
3209 * ensure that all spilled registers in the stack range have been marked as
3210 * read.
3211 */
3212static void mark_reg_stack_read(struct bpf_verifier_env *env,
3213				/* func where src register points to */
3214				struct bpf_func_state *ptr_state,
3215				int min_off, int max_off, int dst_regno)
3216{
3217	struct bpf_verifier_state *vstate = env->cur_state;
3218	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3219	int i, slot, spi;
3220	u8 *stype;
3221	int zeros = 0;
3222
3223	for (i = min_off; i < max_off; i++) {
3224		slot = -i - 1;
3225		spi = slot / BPF_REG_SIZE;
3226		stype = ptr_state->stack[spi].slot_type;
3227		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3228			break;
3229		zeros++;
3230	}
3231	if (zeros == max_off - min_off) {
3232		/* any access_size read into register is zero extended,
3233		 * so the whole register == const_zero
3234		 */
3235		__mark_reg_const_zero(&state->regs[dst_regno]);
3236		/* backtracking doesn't support STACK_ZERO yet,
3237		 * so mark it precise here, so that later
3238		 * backtracking can stop here.
3239		 * Backtracking may not need this if this register
3240		 * doesn't participate in pointer adjustment.
3241		 * Forward propagation of precise flag is not
3242		 * necessary either. This mark is only to stop
3243		 * backtracking. Any register that contributed
3244		 * to const 0 was marked precise before spill.
3245		 */
3246		state->regs[dst_regno].precise = true;
3247	} else {
3248		/* have read misc data from the stack */
3249		mark_reg_unknown(env, state->regs, dst_regno);
3250	}
3251	state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3252}
3253
3254/* Read the stack at 'off' and put the results into the register indicated by
3255 * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3256 * spilled reg.
3257 *
3258 * 'dst_regno' can be -1, meaning that the read value is not going to a
3259 * register.
3260 *
3261 * The access is assumed to be within the current stack bounds.
3262 */
3263static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3264				      /* func where src register points to */
3265				      struct bpf_func_state *reg_state,
3266				      int off, int size, int dst_regno)
3267{
3268	struct bpf_verifier_state *vstate = env->cur_state;
3269	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3270	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3271	struct bpf_reg_state *reg;
3272	u8 *stype, type;
3273
3274	stype = reg_state->stack[spi].slot_type;
3275	reg = &reg_state->stack[spi].spilled_ptr;
3276
3277	if (is_spilled_reg(&reg_state->stack[spi])) {
3278		u8 spill_size = 1;
3279
3280		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3281			spill_size++;
3282
3283		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3284			if (reg->type != SCALAR_VALUE) {
3285				verbose_linfo(env, env->insn_idx, "; ");
3286				verbose(env, "invalid size of register fill\n");
3287				return -EACCES;
3288			}
3289
3290			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3291			if (dst_regno < 0)
3292				return 0;
3293
3294			if (!(off % BPF_REG_SIZE) && size == spill_size) {
3295				/* The earlier check_reg_arg() has decided the
3296				 * subreg_def for this insn.  Save it first.
3297				 */
3298				s32 subreg_def = state->regs[dst_regno].subreg_def;
3299
3300				state->regs[dst_regno] = *reg;
3301				state->regs[dst_regno].subreg_def = subreg_def;
3302			} else {
3303				for (i = 0; i < size; i++) {
3304					type = stype[(slot - i) % BPF_REG_SIZE];
3305					if (type == STACK_SPILL)
3306						continue;
3307					if (type == STACK_MISC)
3308						continue;
3309					verbose(env, "invalid read from stack off %d+%d size %d\n",
3310						off, i, size);
3311					return -EACCES;
3312				}
3313				mark_reg_unknown(env, state->regs, dst_regno);
3314			}
3315			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3316			return 0;
3317		}
3318
3319		if (dst_regno >= 0) {
3320			/* restore register state from stack */
3321			state->regs[dst_regno] = *reg;
3322			/* mark reg as written since spilled pointer state likely
3323			 * has its liveness marks cleared by is_state_visited()
3324			 * which resets stack/reg liveness for state transitions
3325			 */
3326			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3327		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3328			/* If dst_regno==-1, the caller is asking us whether
3329			 * it is acceptable to use this value as a SCALAR_VALUE
3330			 * (e.g. for XADD).
3331			 * We must not allow unprivileged callers to do that
3332			 * with spilled pointers.
3333			 */
3334			verbose(env, "leaking pointer from stack off %d\n",
3335				off);
3336			return -EACCES;
3337		}
3338		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3339	} else {
3340		for (i = 0; i < size; i++) {
3341			type = stype[(slot - i) % BPF_REG_SIZE];
3342			if (type == STACK_MISC)
3343				continue;
3344			if (type == STACK_ZERO)
3345				continue;
3346			verbose(env, "invalid read from stack off %d+%d size %d\n",
3347				off, i, size);
3348			return -EACCES;
3349		}
3350		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3351		if (dst_regno >= 0)
3352			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3353	}
3354	return 0;
3355}
3356
3357enum bpf_access_src {
3358	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
3359	ACCESS_HELPER = 2,  /* the access is performed by a helper */
3360};
3361
3362static int check_stack_range_initialized(struct bpf_verifier_env *env,
3363					 int regno, int off, int access_size,
3364					 bool zero_size_allowed,
3365					 enum bpf_access_src type,
3366					 struct bpf_call_arg_meta *meta);
3367
3368static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3369{
3370	return cur_regs(env) + regno;
3371}
3372
3373/* Read the stack at 'ptr_regno + off' and put the result into the register
3374 * 'dst_regno'.
3375 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
3376 * but not its variable offset.
3377 * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3378 *
3379 * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3380 * filling registers (i.e. reads of spilled register cannot be detected when
3381 * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3382 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
3383 * offset; for a fixed offset check_stack_read_fixed_off should be used
3384 * instead.
3385 */
3386static int check_stack_read_var_off(struct bpf_verifier_env *env,
3387				    int ptr_regno, int off, int size, int dst_regno)
3388{
3389	/* The state of the source register. */
3390	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3391	struct bpf_func_state *ptr_state = func(env, reg);
3392	int err;
3393	int min_off, max_off;
3394
3395	/* Note that we pass a NULL meta, so raw access will not be permitted.
3396	 */
3397	err = check_stack_range_initialized(env, ptr_regno, off, size,
3398					    false, ACCESS_DIRECT, NULL);
3399	if (err)
3400		return err;
3401
3402	min_off = reg->smin_value + off;
3403	max_off = reg->smax_value + off;
3404	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3405	return 0;
3406}
3407
3408/* check_stack_read dispatches to check_stack_read_fixed_off or
3409 * check_stack_read_var_off.
3410 *
3411 * The caller must ensure that the offset falls within the allocated stack
3412 * bounds.
3413 *
3414 * 'dst_regno' is a register which will receive the value from the stack. It
3415 * can be -1, meaning that the read value is not going to a register.
3416 */
3417static int check_stack_read(struct bpf_verifier_env *env,
3418			    int ptr_regno, int off, int size,
3419			    int dst_regno)
3420{
3421	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3422	struct bpf_func_state *state = func(env, reg);
3423	int err;
3424	/* Some accesses are only permitted with a static offset. */
3425	bool var_off = !tnum_is_const(reg->var_off);
3426
3427	/* The offset is required to be static when reads don't go to a
3428	 * register, in order to not leak pointers (see
3429	 * check_stack_read_fixed_off).
3430	 */
3431	if (dst_regno < 0 && var_off) {
3432		char tn_buf[48];
3433
3434		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3435		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
3436			tn_buf, off, size);
3437		return -EACCES;
3438	}
3439	/* Variable offset is prohibited for unprivileged mode for simplicity
3440	 * since it requires corresponding support in Spectre masking for stack
3441	 * ALU. See also retrieve_ptr_limit().
3442	 */
3443	if (!env->bypass_spec_v1 && var_off) {
3444		char tn_buf[48];
3445
3446		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3447		verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
3448				ptr_regno, tn_buf);
3449		return -EACCES;
3450	}
3451
3452	if (!var_off) {
3453		off += reg->var_off.value;
3454		err = check_stack_read_fixed_off(env, state, off, size,
3455						 dst_regno);
3456	} else {
3457		/* Variable offset stack reads need more conservative handling
3458		 * than fixed offset ones. Note that dst_regno >= 0 on this
3459		 * branch.
3460		 */
3461		err = check_stack_read_var_off(env, ptr_regno, off, size,
3462					       dst_regno);
3463	}
3464	return err;
3465}
3466
3467
3468/* check_stack_write dispatches to check_stack_write_fixed_off or
3469 * check_stack_write_var_off.
3470 *
3471 * 'ptr_regno' is the register used as a pointer into the stack.
3472 * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
3473 * 'value_regno' is the register whose value we're writing to the stack. It can
3474 * be -1, meaning that we're not writing from a register.
3475 *
3476 * The caller must ensure that the offset falls within the maximum stack size.
3477 */
3478static int check_stack_write(struct bpf_verifier_env *env,
3479			     int ptr_regno, int off, int size,
3480			     int value_regno, int insn_idx)
3481{
3482	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3483	struct bpf_func_state *state = func(env, reg);
3484	int err;
3485
3486	if (tnum_is_const(reg->var_off)) {
3487		off += reg->var_off.value;
3488		err = check_stack_write_fixed_off(env, state, off, size,
3489						  value_regno, insn_idx);
3490	} else {
3491		/* Variable offset stack reads need more conservative handling
3492		 * than fixed offset ones.
3493		 */
3494		err = check_stack_write_var_off(env, state,
3495						ptr_regno, off, size,
3496						value_regno, insn_idx);
3497	}
3498	return err;
3499}
3500
3501static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
3502				 int off, int size, enum bpf_access_type type)
3503{
3504	struct bpf_reg_state *regs = cur_regs(env);
3505	struct bpf_map *map = regs[regno].map_ptr;
3506	u32 cap = bpf_map_flags_to_cap(map);
3507
3508	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
3509		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
3510			map->value_size, off, size);
3511		return -EACCES;
3512	}
3513
3514	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
3515		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
3516			map->value_size, off, size);
3517		return -EACCES;
3518	}
3519
3520	return 0;
3521}
3522
3523/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
3524static int __check_mem_access(struct bpf_verifier_env *env, int regno,
3525			      int off, int size, u32 mem_size,
3526			      bool zero_size_allowed)
3527{
3528	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
3529	struct bpf_reg_state *reg;
3530
3531	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
3532		return 0;
3533
3534	reg = &cur_regs(env)[regno];
3535	switch (reg->type) {
3536	case PTR_TO_MAP_KEY:
3537		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
3538			mem_size, off, size);
3539		break;
3540	case PTR_TO_MAP_VALUE:
3541		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
3542			mem_size, off, size);
3543		break;
3544	case PTR_TO_PACKET:
3545	case PTR_TO_PACKET_META:
3546	case PTR_TO_PACKET_END:
3547		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
3548			off, size, regno, reg->id, off, mem_size);
3549		break;
3550	case PTR_TO_MEM:
3551	default:
3552		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
3553			mem_size, off, size);
3554	}
3555
3556	return -EACCES;
3557}
3558
3559/* check read/write into a memory region with possible variable offset */
3560static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
3561				   int off, int size, u32 mem_size,
3562				   bool zero_size_allowed)
3563{
3564	struct bpf_verifier_state *vstate = env->cur_state;
3565	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3566	struct bpf_reg_state *reg = &state->regs[regno];
3567	int err;
3568
3569	/* We may have adjusted the register pointing to memory region, so we
3570	 * need to try adding each of min_value and max_value to off
3571	 * to make sure our theoretical access will be safe.
3572	 *
3573	 * The minimum value is only important with signed
3574	 * comparisons where we can't assume the floor of a
3575	 * value is 0.  If we are using signed variables for our
3576	 * index'es we need to make sure that whatever we use
3577	 * will have a set floor within our range.
3578	 */
3579	if (reg->smin_value < 0 &&
3580	    (reg->smin_value == S64_MIN ||
3581	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
3582	      reg->smin_value + off < 0)) {
3583		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3584			regno);
3585		return -EACCES;
3586	}
3587	err = __check_mem_access(env, regno, reg->smin_value + off, size,
3588				 mem_size, zero_size_allowed);
3589	if (err) {
3590		verbose(env, "R%d min value is outside of the allowed memory range\n",
3591			regno);
3592		return err;
3593	}
3594
3595	/* If we haven't set a max value then we need to bail since we can't be
3596	 * sure we won't do bad things.
3597	 * If reg->umax_value + off could overflow, treat that as unbounded too.
3598	 */
3599	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
3600		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
3601			regno);
3602		return -EACCES;
3603	}
3604	err = __check_mem_access(env, regno, reg->umax_value + off, size,
3605				 mem_size, zero_size_allowed);
3606	if (err) {
3607		verbose(env, "R%d max value is outside of the allowed memory range\n",
3608			regno);
3609		return err;
3610	}
3611
3612	return 0;
3613}
3614
3615static int __check_ptr_off_reg(struct bpf_verifier_env *env,
3616			       const struct bpf_reg_state *reg, int regno,
3617			       bool fixed_off_ok)
3618{
3619	/* Access to this pointer-typed register or passing it to a helper
3620	 * is only allowed in its original, unmodified form.
3621	 */
3622
3623	if (reg->off < 0) {
3624		verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
3625			reg_type_str(env, reg->type), regno, reg->off);
3626		return -EACCES;
3627	}
3628
3629	if (!fixed_off_ok && reg->off) {
3630		verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
3631			reg_type_str(env, reg->type), regno, reg->off);
3632		return -EACCES;
3633	}
3634
3635	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3636		char tn_buf[48];
3637
3638		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3639		verbose(env, "variable %s access var_off=%s disallowed\n",
3640			reg_type_str(env, reg->type), tn_buf);
3641		return -EACCES;
3642	}
3643
3644	return 0;
3645}
3646
3647int check_ptr_off_reg(struct bpf_verifier_env *env,
3648		      const struct bpf_reg_state *reg, int regno)
3649{
3650	return __check_ptr_off_reg(env, reg, regno, false);
3651}
3652
3653static int map_kptr_match_type(struct bpf_verifier_env *env,
3654			       struct bpf_map_value_off_desc *off_desc,
3655			       struct bpf_reg_state *reg, u32 regno)
3656{
3657	const char *targ_name = kernel_type_name(off_desc->kptr.btf, off_desc->kptr.btf_id);
3658	int perm_flags = PTR_MAYBE_NULL;
3659	const char *reg_name = "";
3660
3661	/* Only unreferenced case accepts untrusted pointers */
3662	if (off_desc->type == BPF_KPTR_UNREF)
3663		perm_flags |= PTR_UNTRUSTED;
3664
3665	if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
3666		goto bad_type;
3667
3668	if (!btf_is_kernel(reg->btf)) {
3669		verbose(env, "R%d must point to kernel BTF\n", regno);
3670		return -EINVAL;
3671	}
3672	/* We need to verify reg->type and reg->btf, before accessing reg->btf */
3673	reg_name = kernel_type_name(reg->btf, reg->btf_id);
3674
3675	/* For ref_ptr case, release function check should ensure we get one
3676	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
3677	 * normal store of unreferenced kptr, we must ensure var_off is zero.
3678	 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
3679	 * reg->off and reg->ref_obj_id are not needed here.
3680	 */
3681	if (__check_ptr_off_reg(env, reg, regno, true))
3682		return -EACCES;
3683
3684	/* A full type match is needed, as BTF can be vmlinux or module BTF, and
3685	 * we also need to take into account the reg->off.
3686	 *
3687	 * We want to support cases like:
3688	 *
3689	 * struct foo {
3690	 *         struct bar br;
3691	 *         struct baz bz;
3692	 * };
3693	 *
3694	 * struct foo *v;
3695	 * v = func();	      // PTR_TO_BTF_ID
3696	 * val->foo = v;      // reg->off is zero, btf and btf_id match type
3697	 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
3698	 *                    // first member type of struct after comparison fails
3699	 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
3700	 *                    // to match type
3701	 *
3702	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
3703	 * is zero. We must also ensure that btf_struct_ids_match does not walk
3704	 * the struct to match type against first member of struct, i.e. reject
3705	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
3706	 * strict mode to true for type match.
3707	 */
3708	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
3709				  off_desc->kptr.btf, off_desc->kptr.btf_id,
3710				  off_desc->type == BPF_KPTR_REF))
3711		goto bad_type;
3712	return 0;
3713bad_type:
3714	verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
3715		reg_type_str(env, reg->type), reg_name);
3716	verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
3717	if (off_desc->type == BPF_KPTR_UNREF)
3718		verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
3719			targ_name);
3720	else
3721		verbose(env, "\n");
3722	return -EINVAL;
3723}
3724
3725static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
3726				 int value_regno, int insn_idx,
3727				 struct bpf_map_value_off_desc *off_desc)
3728{
3729	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3730	int class = BPF_CLASS(insn->code);
3731	struct bpf_reg_state *val_reg;
3732
3733	/* Things we already checked for in check_map_access and caller:
3734	 *  - Reject cases where variable offset may touch kptr
3735	 *  - size of access (must be BPF_DW)
3736	 *  - tnum_is_const(reg->var_off)
3737	 *  - off_desc->offset == off + reg->var_off.value
3738	 */
3739	/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
3740	if (BPF_MODE(insn->code) != BPF_MEM) {
3741		verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
3742		return -EACCES;
3743	}
3744
3745	/* We only allow loading referenced kptr, since it will be marked as
3746	 * untrusted, similar to unreferenced kptr.
3747	 */
3748	if (class != BPF_LDX && off_desc->type == BPF_KPTR_REF) {
3749		verbose(env, "store to referenced kptr disallowed\n");
3750		return -EACCES;
3751	}
3752
3753	if (class == BPF_LDX) {
3754		val_reg = reg_state(env, value_regno);
3755		/* We can simply mark the value_regno receiving the pointer
3756		 * value from map as PTR_TO_BTF_ID, with the correct type.
3757		 */
3758		mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, off_desc->kptr.btf,
3759				off_desc->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
3760		/* For mark_ptr_or_null_reg */
3761		val_reg->id = ++env->id_gen;
3762	} else if (class == BPF_STX) {
3763		val_reg = reg_state(env, value_regno);
3764		if (!register_is_null(val_reg) &&
3765		    map_kptr_match_type(env, off_desc, val_reg, value_regno))
3766			return -EACCES;
3767	} else if (class == BPF_ST) {
3768		if (insn->imm) {
3769			verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
3770				off_desc->offset);
3771			return -EACCES;
3772		}
3773	} else {
3774		verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
3775		return -EACCES;
3776	}
3777	return 0;
3778}
3779
3780/* check read/write into a map element with possible variable offset */
3781static int check_map_access(struct bpf_verifier_env *env, u32 regno,
3782			    int off, int size, bool zero_size_allowed,
3783			    enum bpf_access_src src)
3784{
3785	struct bpf_verifier_state *vstate = env->cur_state;
3786	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3787	struct bpf_reg_state *reg = &state->regs[regno];
3788	struct bpf_map *map = reg->map_ptr;
3789	int err;
3790
3791	err = check_mem_region_access(env, regno, off, size, map->value_size,
3792				      zero_size_allowed);
3793	if (err)
3794		return err;
3795
3796	if (map_value_has_spin_lock(map)) {
3797		u32 lock = map->spin_lock_off;
3798
3799		/* if any part of struct bpf_spin_lock can be touched by
3800		 * load/store reject this program.
3801		 * To check that [x1, x2) overlaps with [y1, y2)
3802		 * it is sufficient to check x1 < y2 && y1 < x2.
3803		 */
3804		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
3805		     lock < reg->umax_value + off + size) {
3806			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
3807			return -EACCES;
3808		}
3809	}
3810	if (map_value_has_timer(map)) {
3811		u32 t = map->timer_off;
3812
3813		if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
3814		     t < reg->umax_value + off + size) {
3815			verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
3816			return -EACCES;
3817		}
3818	}
3819	if (map_value_has_kptrs(map)) {
3820		struct bpf_map_value_off *tab = map->kptr_off_tab;
3821		int i;
3822
3823		for (i = 0; i < tab->nr_off; i++) {
3824			u32 p = tab->off[i].offset;
3825
3826			if (reg->smin_value + off < p + sizeof(u64) &&
3827			    p < reg->umax_value + off + size) {
3828				if (src != ACCESS_DIRECT) {
3829					verbose(env, "kptr cannot be accessed indirectly by helper\n");
3830					return -EACCES;
3831				}
3832				if (!tnum_is_const(reg->var_off)) {
3833					verbose(env, "kptr access cannot have variable offset\n");
3834					return -EACCES;
3835				}
3836				if (p != off + reg->var_off.value) {
3837					verbose(env, "kptr access misaligned expected=%u off=%llu\n",
3838						p, off + reg->var_off.value);
3839					return -EACCES;
3840				}
3841				if (size != bpf_size_to_bytes(BPF_DW)) {
3842					verbose(env, "kptr access size must be BPF_DW\n");
3843					return -EACCES;
3844				}
3845				break;
3846			}
3847		}
3848	}
3849	return err;
3850}
3851
3852#define MAX_PACKET_OFF 0xffff
3853
3854static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3855				       const struct bpf_call_arg_meta *meta,
3856				       enum bpf_access_type t)
3857{
3858	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
3859
3860	switch (prog_type) {
3861	/* Program types only with direct read access go here! */
3862	case BPF_PROG_TYPE_LWT_IN:
3863	case BPF_PROG_TYPE_LWT_OUT:
3864	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
3865	case BPF_PROG_TYPE_SK_REUSEPORT:
3866	case BPF_PROG_TYPE_FLOW_DISSECTOR:
3867	case BPF_PROG_TYPE_CGROUP_SKB:
3868		if (t == BPF_WRITE)
3869			return false;
3870		fallthrough;
3871
3872	/* Program types with direct read + write access go here! */
3873	case BPF_PROG_TYPE_SCHED_CLS:
3874	case BPF_PROG_TYPE_SCHED_ACT:
3875	case BPF_PROG_TYPE_XDP:
3876	case BPF_PROG_TYPE_LWT_XMIT:
3877	case BPF_PROG_TYPE_SK_SKB:
3878	case BPF_PROG_TYPE_SK_MSG:
3879		if (meta)
3880			return meta->pkt_access;
3881
3882		env->seen_direct_write = true;
3883		return true;
3884
3885	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3886		if (t == BPF_WRITE)
3887			env->seen_direct_write = true;
3888
3889		return true;
3890
3891	default:
3892		return false;
3893	}
3894}
3895
3896static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
3897			       int size, bool zero_size_allowed)
3898{
3899	struct bpf_reg_state *regs = cur_regs(env);
3900	struct bpf_reg_state *reg = &regs[regno];
3901	int err;
3902
3903	/* We may have added a variable offset to the packet pointer; but any
3904	 * reg->range we have comes after that.  We are only checking the fixed
3905	 * offset.
3906	 */
3907
3908	/* We don't allow negative numbers, because we aren't tracking enough
3909	 * detail to prove they're safe.
3910	 */
3911	if (reg->smin_value < 0) {
3912		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3913			regno);
3914		return -EACCES;
3915	}
3916
3917	err = reg->range < 0 ? -EINVAL :
3918	      __check_mem_access(env, regno, off, size, reg->range,
3919				 zero_size_allowed);
3920	if (err) {
3921		verbose(env, "R%d offset is outside of the packet\n", regno);
3922		return err;
3923	}
3924
3925	/* __check_mem_access has made sure "off + size - 1" is within u16.
3926	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
3927	 * otherwise find_good_pkt_pointers would have refused to set range info
3928	 * that __check_mem_access would have rejected this pkt access.
3929	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
3930	 */
3931	env->prog->aux->max_pkt_offset =
3932		max_t(u32, env->prog->aux->max_pkt_offset,
3933		      off + reg->umax_value + size - 1);
3934
3935	return err;
3936}
3937
3938/* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
3939static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
3940			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
3941			    struct btf **btf, u32 *btf_id)
3942{
3943	struct bpf_insn_access_aux info = {
3944		.reg_type = *reg_type,
3945		.log = &env->log,
3946	};
3947
3948	if (env->ops->is_valid_access &&
3949	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
3950		/* A non zero info.ctx_field_size indicates that this field is a
3951		 * candidate for later verifier transformation to load the whole
3952		 * field and then apply a mask when accessed with a narrower
3953		 * access than actual ctx access size. A zero info.ctx_field_size
3954		 * will only allow for whole field access and rejects any other
3955		 * type of narrower access.
3956		 */
3957		*reg_type = info.reg_type;
3958
3959		if (base_type(*reg_type) == PTR_TO_BTF_ID) {
3960			*btf = info.btf;
3961			*btf_id = info.btf_id;
3962		} else {
3963			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3964		}
3965		/* remember the offset of last byte accessed in ctx */
3966		if (env->prog->aux->max_ctx_offset < off + size)
3967			env->prog->aux->max_ctx_offset = off + size;
3968		return 0;
3969	}
3970
3971	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3972	return -EACCES;
3973}
3974
3975static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
3976				  int size)
3977{
3978	if (size < 0 || off < 0 ||
3979	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
3980		verbose(env, "invalid access to flow keys off=%d size=%d\n",
3981			off, size);
3982		return -EACCES;
3983	}
3984	return 0;
3985}
3986
3987static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
3988			     u32 regno, int off, int size,
3989			     enum bpf_access_type t)
3990{
3991	struct bpf_reg_state *regs = cur_regs(env);
3992	struct bpf_reg_state *reg = &regs[regno];
3993	struct bpf_insn_access_aux info = {};
3994	bool valid;
3995
3996	if (reg->smin_value < 0) {
3997		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3998			regno);
3999		return -EACCES;
4000	}
4001
4002	switch (reg->type) {
4003	case PTR_TO_SOCK_COMMON:
4004		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
4005		break;
4006	case PTR_TO_SOCKET:
4007		valid = bpf_sock_is_valid_access(off, size, t, &info);
4008		break;
4009	case PTR_TO_TCP_SOCK:
4010		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
4011		break;
4012	case PTR_TO_XDP_SOCK:
4013		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
4014		break;
4015	default:
4016		valid = false;
4017	}
4018
4019
4020	if (valid) {
4021		env->insn_aux_data[insn_idx].ctx_field_size =
4022			info.ctx_field_size;
4023		return 0;
4024	}
4025
4026	verbose(env, "R%d invalid %s access off=%d size=%d\n",
4027		regno, reg_type_str(env, reg->type), off, size);
4028
4029	return -EACCES;
4030}
4031
4032static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
4033{
4034	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4035}
4036
4037static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
4038{
4039	const struct bpf_reg_state *reg = reg_state(env, regno);
4040
4041	return reg->type == PTR_TO_CTX;
4042}
4043
4044static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
4045{
4046	const struct bpf_reg_state *reg = reg_state(env, regno);
4047
4048	return type_is_sk_pointer(reg->type);
4049}
4050
4051static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
4052{
4053	const struct bpf_reg_state *reg = reg_state(env, regno);
4054
4055	return type_is_pkt_pointer(reg->type);
4056}
4057
4058static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
4059{
4060	const struct bpf_reg_state *reg = reg_state(env, regno);
4061
4062	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
4063	return reg->type == PTR_TO_FLOW_KEYS;
4064}
4065
4066static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
4067				   const struct bpf_reg_state *reg,
4068				   int off, int size, bool strict)
4069{
4070	struct tnum reg_off;
4071	int ip_align;
4072
4073	/* Byte size accesses are always allowed. */
4074	if (!strict || size == 1)
4075		return 0;
4076
4077	/* For platforms that do not have a Kconfig enabling
4078	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
4079	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
4080	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
4081	 * to this code only in strict mode where we want to emulate
4082	 * the NET_IP_ALIGN==2 checking.  Therefore use an
4083	 * unconditional IP align value of '2'.
4084	 */
4085	ip_align = 2;
4086
4087	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
4088	if (!tnum_is_aligned(reg_off, size)) {
4089		char tn_buf[48];
4090
4091		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4092		verbose(env,
4093			"misaligned packet access off %d+%s+%d+%d size %d\n",
4094			ip_align, tn_buf, reg->off, off, size);
4095		return -EACCES;
4096	}
4097
4098	return 0;
4099}
4100
4101static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
4102				       const struct bpf_reg_state *reg,
4103				       const char *pointer_desc,
4104				       int off, int size, bool strict)
4105{
4106	struct tnum reg_off;
4107
4108	/* Byte size accesses are always allowed. */
4109	if (!strict || size == 1)
4110		return 0;
4111
4112	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
4113	if (!tnum_is_aligned(reg_off, size)) {
4114		char tn_buf[48];
4115
4116		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4117		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
4118			pointer_desc, tn_buf, reg->off, off, size);
4119		return -EACCES;
4120	}
4121
4122	return 0;
4123}
4124
4125static int check_ptr_alignment(struct bpf_verifier_env *env,
4126			       const struct bpf_reg_state *reg, int off,
4127			       int size, bool strict_alignment_once)
4128{
4129	bool strict = env->strict_alignment || strict_alignment_once;
4130	const char *pointer_desc = "";
4131
4132	switch (reg->type) {
4133	case PTR_TO_PACKET:
4134	case PTR_TO_PACKET_META:
4135		/* Special case, because of NET_IP_ALIGN. Given metadata sits
4136		 * right in front, treat it the very same way.
4137		 */
4138		return check_pkt_ptr_alignment(env, reg, off, size, strict);
4139	case PTR_TO_FLOW_KEYS:
4140		pointer_desc = "flow keys ";
4141		break;
4142	case PTR_TO_MAP_KEY:
4143		pointer_desc = "key ";
4144		break;
4145	case PTR_TO_MAP_VALUE:
4146		pointer_desc = "value ";
4147		break;
4148	case PTR_TO_CTX:
4149		pointer_desc = "context ";
4150		break;
4151	case PTR_TO_STACK:
4152		pointer_desc = "stack ";
4153		/* The stack spill tracking logic in check_stack_write_fixed_off()
4154		 * and check_stack_read_fixed_off() relies on stack accesses being
4155		 * aligned.
4156		 */
4157		strict = true;
4158		break;
4159	case PTR_TO_SOCKET:
4160		pointer_desc = "sock ";
4161		break;
4162	case PTR_TO_SOCK_COMMON:
4163		pointer_desc = "sock_common ";
4164		break;
4165	case PTR_TO_TCP_SOCK:
4166		pointer_desc = "tcp_sock ";
4167		break;
4168	case PTR_TO_XDP_SOCK:
4169		pointer_desc = "xdp_sock ";
4170		break;
4171	default:
4172		break;
4173	}
4174	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
4175					   strict);
4176}
4177
4178static int update_stack_depth(struct bpf_verifier_env *env,
4179			      const struct bpf_func_state *func,
4180			      int off)
4181{
4182	u16 stack = env->subprog_info[func->subprogno].stack_depth;
4183
4184	if (stack >= -off)
4185		return 0;
4186
4187	/* update known max for given subprogram */
4188	env->subprog_info[func->subprogno].stack_depth = -off;
4189	return 0;
4190}
4191
4192/* starting from main bpf function walk all instructions of the function
4193 * and recursively walk all callees that given function can call.
4194 * Ignore jump and exit insns.
4195 * Since recursion is prevented by check_cfg() this algorithm
4196 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
4197 */
4198static int check_max_stack_depth(struct bpf_verifier_env *env)
4199{
4200	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
4201	struct bpf_subprog_info *subprog = env->subprog_info;
4202	struct bpf_insn *insn = env->prog->insnsi;
4203	bool tail_call_reachable = false;
4204	int ret_insn[MAX_CALL_FRAMES];
4205	int ret_prog[MAX_CALL_FRAMES];
4206	int j;
4207
4208process_func:
4209	/* protect against potential stack overflow that might happen when
4210	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
4211	 * depth for such case down to 256 so that the worst case scenario
4212	 * would result in 8k stack size (32 which is tailcall limit * 256 =
4213	 * 8k).
4214	 *
4215	 * To get the idea what might happen, see an example:
4216	 * func1 -> sub rsp, 128
4217	 *  subfunc1 -> sub rsp, 256
4218	 *  tailcall1 -> add rsp, 256
4219	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
4220	 *   subfunc2 -> sub rsp, 64
4221	 *   subfunc22 -> sub rsp, 128
4222	 *   tailcall2 -> add rsp, 128
4223	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
4224	 *
4225	 * tailcall will unwind the current stack frame but it will not get rid
4226	 * of caller's stack as shown on the example above.
4227	 */
4228	if (idx && subprog[idx].has_tail_call && depth >= 256) {
4229		verbose(env,
4230			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
4231			depth);
4232		return -EACCES;
4233	}
4234	/* round up to 32-bytes, since this is granularity
4235	 * of interpreter stack size
4236	 */
4237	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
4238	if (depth > MAX_BPF_STACK) {
4239		verbose(env, "combined stack size of %d calls is %d. Too large\n",
4240			frame + 1, depth);
4241		return -EACCES;
4242	}
4243continue_func:
4244	subprog_end = subprog[idx + 1].start;
4245	for (; i < subprog_end; i++) {
4246		int next_insn;
4247
4248		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
4249			continue;
4250		/* remember insn and function to return to */
4251		ret_insn[frame] = i + 1;
4252		ret_prog[frame] = idx;
4253
4254		/* find the callee */
4255		next_insn = i + insn[i].imm + 1;
4256		idx = find_subprog(env, next_insn);
4257		if (idx < 0) {
4258			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
4259				  next_insn);
4260			return -EFAULT;
4261		}
4262		if (subprog[idx].is_async_cb) {
4263			if (subprog[idx].has_tail_call) {
4264				verbose(env, "verifier bug. subprog has tail_call and async cb\n");
4265				return -EFAULT;
4266			}
4267			 /* async callbacks don't increase bpf prog stack size */
4268			continue;
4269		}
4270		i = next_insn;
4271
4272		if (subprog[idx].has_tail_call)
4273			tail_call_reachable = true;
4274
4275		frame++;
4276		if (frame >= MAX_CALL_FRAMES) {
4277			verbose(env, "the call stack of %d frames is too deep !\n",
4278				frame);
4279			return -E2BIG;
4280		}
4281		goto process_func;
4282	}
4283	/* if tail call got detected across bpf2bpf calls then mark each of the
4284	 * currently present subprog frames as tail call reachable subprogs;
4285	 * this info will be utilized by JIT so that we will be preserving the
4286	 * tail call counter throughout bpf2bpf calls combined with tailcalls
4287	 */
4288	if (tail_call_reachable)
4289		for (j = 0; j < frame; j++)
4290			subprog[ret_prog[j]].tail_call_reachable = true;
4291	if (subprog[0].tail_call_reachable)
4292		env->prog->aux->tail_call_reachable = true;
4293
4294	/* end of for() loop means the last insn of the 'subprog'
4295	 * was reached. Doesn't matter whether it was JA or EXIT
4296	 */
4297	if (frame == 0)
4298		return 0;
4299	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
4300	frame--;
4301	i = ret_insn[frame];
4302	idx = ret_prog[frame];
4303	goto continue_func;
4304}
4305
4306#ifndef CONFIG_BPF_JIT_ALWAYS_ON
4307static int get_callee_stack_depth(struct bpf_verifier_env *env,
4308				  const struct bpf_insn *insn, int idx)
4309{
4310	int start = idx + insn->imm + 1, subprog;
4311
4312	subprog = find_subprog(env, start);
4313	if (subprog < 0) {
4314		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
4315			  start);
4316		return -EFAULT;
4317	}
4318	return env->subprog_info[subprog].stack_depth;
4319}
4320#endif
4321
4322static int __check_buffer_access(struct bpf_verifier_env *env,
4323				 const char *buf_info,
4324				 const struct bpf_reg_state *reg,
4325				 int regno, int off, int size)
4326{
4327	if (off < 0) {
4328		verbose(env,
4329			"R%d invalid %s buffer access: off=%d, size=%d\n",
4330			regno, buf_info, off, size);
4331		return -EACCES;
4332	}
4333	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4334		char tn_buf[48];
4335
4336		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4337		verbose(env,
4338			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
4339			regno, off, tn_buf);
4340		return -EACCES;
4341	}
4342
4343	return 0;
4344}
4345
4346static int check_tp_buffer_access(struct bpf_verifier_env *env,
4347				  const struct bpf_reg_state *reg,
4348				  int regno, int off, int size)
4349{
4350	int err;
4351
4352	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
4353	if (err)
4354		return err;
4355
4356	if (off + size > env->prog->aux->max_tp_access)
4357		env->prog->aux->max_tp_access = off + size;
4358
4359	return 0;
4360}
4361
4362static int check_buffer_access(struct bpf_verifier_env *env,
4363			       const struct bpf_reg_state *reg,
4364			       int regno, int off, int size,
4365			       bool zero_size_allowed,
4366			       u32 *max_access)
4367{
4368	const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
4369	int err;
4370
4371	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
4372	if (err)
4373		return err;
4374
4375	if (off + size > *max_access)
4376		*max_access = off + size;
4377
4378	return 0;
4379}
4380
4381/* BPF architecture zero extends alu32 ops into 64-bit registesr */
4382static void zext_32_to_64(struct bpf_reg_state *reg)
4383{
4384	reg->var_off = tnum_subreg(reg->var_off);
4385	__reg_assign_32_into_64(reg);
4386}
4387
4388/* truncate register to smaller size (in bytes)
4389 * must be called with size < BPF_REG_SIZE
4390 */
4391static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
4392{
4393	u64 mask;
4394
4395	/* clear high bits in bit representation */
4396	reg->var_off = tnum_cast(reg->var_off, size);
4397
4398	/* fix arithmetic bounds */
4399	mask = ((u64)1 << (size * 8)) - 1;
4400	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
4401		reg->umin_value &= mask;
4402		reg->umax_value &= mask;
4403	} else {
4404		reg->umin_value = 0;
4405		reg->umax_value = mask;
4406	}
4407	reg->smin_value = reg->umin_value;
4408	reg->smax_value = reg->umax_value;
4409
4410	/* If size is smaller than 32bit register the 32bit register
4411	 * values are also truncated so we push 64-bit bounds into
4412	 * 32-bit bounds. Above were truncated < 32-bits already.
4413	 */
4414	if (size >= 4)
4415		return;
4416	__reg_combine_64_into_32(reg);
4417}
4418
4419static bool bpf_map_is_rdonly(const struct bpf_map *map)
4420{
4421	/* A map is considered read-only if the following condition are true:
4422	 *
4423	 * 1) BPF program side cannot change any of the map content. The
4424	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
4425	 *    and was set at map creation time.
4426	 * 2) The map value(s) have been initialized from user space by a
4427	 *    loader and then "frozen", such that no new map update/delete
4428	 *    operations from syscall side are possible for the rest of
4429	 *    the map's lifetime from that point onwards.
4430	 * 3) Any parallel/pending map update/delete operations from syscall
4431	 *    side have been completed. Only after that point, it's safe to
4432	 *    assume that map value(s) are immutable.
4433	 */
4434	return (map->map_flags & BPF_F_RDONLY_PROG) &&
4435	       READ_ONCE(map->frozen) &&
4436	       !bpf_map_write_active(map);
4437}
4438
4439static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
4440{
4441	void *ptr;
4442	u64 addr;
4443	int err;
4444
4445	err = map->ops->map_direct_value_addr(map, &addr, off);
4446	if (err)
4447		return err;
4448	ptr = (void *)(long)addr + off;
4449
4450	switch (size) {
4451	case sizeof(u8):
4452		*val = (u64)*(u8 *)ptr;
4453		break;
4454	case sizeof(u16):
4455		*val = (u64)*(u16 *)ptr;
4456		break;
4457	case sizeof(u32):
4458		*val = (u64)*(u32 *)ptr;
4459		break;
4460	case sizeof(u64):
4461		*val = *(u64 *)ptr;
4462		break;
4463	default:
4464		return -EINVAL;
4465	}
4466	return 0;
4467}
4468
4469static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
4470				   struct bpf_reg_state *regs,
4471				   int regno, int off, int size,
4472				   enum bpf_access_type atype,
4473				   int value_regno)
4474{
4475	struct bpf_reg_state *reg = regs + regno;
4476	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
4477	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
4478	enum bpf_type_flag flag = 0;
4479	u32 btf_id;
4480	int ret;
4481
4482	if (off < 0) {
4483		verbose(env,
4484			"R%d is ptr_%s invalid negative access: off=%d\n",
4485			regno, tname, off);
4486		return -EACCES;
4487	}
4488	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4489		char tn_buf[48];
4490
4491		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4492		verbose(env,
4493			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
4494			regno, tname, off, tn_buf);
4495		return -EACCES;
4496	}
4497
4498	if (reg->type & MEM_USER) {
4499		verbose(env,
4500			"R%d is ptr_%s access user memory: off=%d\n",
4501			regno, tname, off);
4502		return -EACCES;
4503	}
4504
4505	if (reg->type & MEM_PERCPU) {
4506		verbose(env,
4507			"R%d is ptr_%s access percpu memory: off=%d\n",
4508			regno, tname, off);
4509		return -EACCES;
4510	}
4511
4512	if (env->ops->btf_struct_access) {
4513		ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
4514						  off, size, atype, &btf_id, &flag);
4515	} else {
4516		if (atype != BPF_READ) {
4517			verbose(env, "only read is supported\n");
4518			return -EACCES;
4519		}
4520
4521		ret = btf_struct_access(&env->log, reg->btf, t, off, size,
4522					atype, &btf_id, &flag);
4523	}
4524
4525	if (ret < 0)
4526		return ret;
4527
4528	/* If this is an untrusted pointer, all pointers formed by walking it
4529	 * also inherit the untrusted flag.
4530	 */
4531	if (type_flag(reg->type) & PTR_UNTRUSTED)
4532		flag |= PTR_UNTRUSTED;
4533
4534	if (atype == BPF_READ && value_regno >= 0)
4535		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
4536
4537	return 0;
4538}
4539
4540static int check_ptr_to_map_access(struct bpf_verifier_env *env,
4541				   struct bpf_reg_state *regs,
4542				   int regno, int off, int size,
4543				   enum bpf_access_type atype,
4544				   int value_regno)
4545{
4546	struct bpf_reg_state *reg = regs + regno;
4547	struct bpf_map *map = reg->map_ptr;
4548	enum bpf_type_flag flag = 0;
4549	const struct btf_type *t;
4550	const char *tname;
4551	u32 btf_id;
4552	int ret;
4553
4554	if (!btf_vmlinux) {
4555		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
4556		return -ENOTSUPP;
4557	}
4558
4559	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
4560		verbose(env, "map_ptr access not supported for map type %d\n",
4561			map->map_type);
4562		return -ENOTSUPP;
4563	}
4564
4565	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
4566	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
4567
4568	if (!env->allow_ptr_to_map_access) {
4569		verbose(env,
4570			"%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
4571			tname);
4572		return -EPERM;
4573	}
4574
4575	if (off < 0) {
4576		verbose(env, "R%d is %s invalid negative access: off=%d\n",
4577			regno, tname, off);
4578		return -EACCES;
4579	}
4580
4581	if (atype != BPF_READ) {
4582		verbose(env, "only read from %s is supported\n", tname);
4583		return -EACCES;
4584	}
4585
4586	ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag);
4587	if (ret < 0)
4588		return ret;
4589
4590	if (value_regno >= 0)
4591		mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
4592
4593	return 0;
4594}
4595
4596/* Check that the stack access at the given offset is within bounds. The
4597 * maximum valid offset is -1.
4598 *
4599 * The minimum valid offset is -MAX_BPF_STACK for writes, and
4600 * -state->allocated_stack for reads.
4601 */
4602static int check_stack_slot_within_bounds(int off,
4603					  struct bpf_func_state *state,
4604					  enum bpf_access_type t)
4605{
4606	int min_valid_off;
4607
4608	if (t == BPF_WRITE)
4609		min_valid_off = -MAX_BPF_STACK;
4610	else
4611		min_valid_off = -state->allocated_stack;
4612
4613	if (off < min_valid_off || off > -1)
4614		return -EACCES;
4615	return 0;
4616}
4617
4618/* Check that the stack access at 'regno + off' falls within the maximum stack
4619 * bounds.
4620 *
4621 * 'off' includes `regno->offset`, but not its dynamic part (if any).
4622 */
4623static int check_stack_access_within_bounds(
4624		struct bpf_verifier_env *env,
4625		int regno, int off, int access_size,
4626		enum bpf_access_src src, enum bpf_access_type type)
4627{
4628	struct bpf_reg_state *regs = cur_regs(env);
4629	struct bpf_reg_state *reg = regs + regno;
4630	struct bpf_func_state *state = func(env, reg);
4631	int min_off, max_off;
4632	int err;
4633	char *err_extra;
4634
4635	if (src == ACCESS_HELPER)
4636		/* We don't know if helpers are reading or writing (or both). */
4637		err_extra = " indirect access to";
4638	else if (type == BPF_READ)
4639		err_extra = " read from";
4640	else
4641		err_extra = " write to";
4642
4643	if (tnum_is_const(reg->var_off)) {
4644		min_off = reg->var_off.value + off;
4645		if (access_size > 0)
4646			max_off = min_off + access_size - 1;
4647		else
4648			max_off = min_off;
4649	} else {
4650		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
4651		    reg->smin_value <= -BPF_MAX_VAR_OFF) {
4652			verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
4653				err_extra, regno);
4654			return -EACCES;
4655		}
4656		min_off = reg->smin_value + off;
4657		if (access_size > 0)
4658			max_off = reg->smax_value + off + access_size - 1;
4659		else
4660			max_off = min_off;
4661	}
4662
4663	err = check_stack_slot_within_bounds(min_off, state, type);
4664	if (!err)
4665		err = check_stack_slot_within_bounds(max_off, state, type);
4666
4667	if (err) {
4668		if (tnum_is_const(reg->var_off)) {
4669			verbose(env, "invalid%s stack R%d off=%d size=%d\n",
4670				err_extra, regno, off, access_size);
4671		} else {
4672			char tn_buf[48];
4673
4674			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4675			verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
4676				err_extra, regno, tn_buf, access_size);
4677		}
4678	}
4679	return err;
4680}
4681
4682/* check whether memory at (regno + off) is accessible for t = (read | write)
4683 * if t==write, value_regno is a register which value is stored into memory
4684 * if t==read, value_regno is a register which will receive the value from memory
4685 * if t==write && value_regno==-1, some unknown value is stored into memory
4686 * if t==read && value_regno==-1, don't care what we read from memory
4687 */
4688static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
4689			    int off, int bpf_size, enum bpf_access_type t,
4690			    int value_regno, bool strict_alignment_once)
4691{
4692	struct bpf_reg_state *regs = cur_regs(env);
4693	struct bpf_reg_state *reg = regs + regno;
4694	struct bpf_func_state *state;
4695	int size, err = 0;
4696
4697	size = bpf_size_to_bytes(bpf_size);
4698	if (size < 0)
4699		return size;
4700
4701	/* alignment checks will add in reg->off themselves */
4702	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
4703	if (err)
4704		return err;
4705
4706	/* for access checks, reg->off is just part of off */
4707	off += reg->off;
4708
4709	if (reg->type == PTR_TO_MAP_KEY) {
4710		if (t == BPF_WRITE) {
4711			verbose(env, "write to change key R%d not allowed\n", regno);
4712			return -EACCES;
4713		}
4714
4715		err = check_mem_region_access(env, regno, off, size,
4716					      reg->map_ptr->key_size, false);
4717		if (err)
4718			return err;
4719		if (value_regno >= 0)
4720			mark_reg_unknown(env, regs, value_regno);
4721	} else if (reg->type == PTR_TO_MAP_VALUE) {
4722		struct bpf_map_value_off_desc *kptr_off_desc = NULL;
4723
4724		if (t == BPF_WRITE && value_regno >= 0 &&
4725		    is_pointer_value(env, value_regno)) {
4726			verbose(env, "R%d leaks addr into map\n", value_regno);
4727			return -EACCES;
4728		}
4729		err = check_map_access_type(env, regno, off, size, t);
4730		if (err)
4731			return err;
4732		err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
4733		if (err)
4734			return err;
4735		if (tnum_is_const(reg->var_off))
4736			kptr_off_desc = bpf_map_kptr_off_contains(reg->map_ptr,
4737								  off + reg->var_off.value);
4738		if (kptr_off_desc) {
4739			err = check_map_kptr_access(env, regno, value_regno, insn_idx,
4740						    kptr_off_desc);
4741		} else if (t == BPF_READ && value_regno >= 0) {
4742			struct bpf_map *map = reg->map_ptr;
4743
4744			/* if map is read-only, track its contents as scalars */
4745			if (tnum_is_const(reg->var_off) &&
4746			    bpf_map_is_rdonly(map) &&
4747			    map->ops->map_direct_value_addr) {
4748				int map_off = off + reg->var_off.value;
4749				u64 val = 0;
4750
4751				err = bpf_map_direct_read(map, map_off, size,
4752							  &val);
4753				if (err)
4754					return err;
4755
4756				regs[value_regno].type = SCALAR_VALUE;
4757				__mark_reg_known(&regs[value_regno], val);
4758			} else {
4759				mark_reg_unknown(env, regs, value_regno);
4760			}
4761		}
4762	} else if (base_type(reg->type) == PTR_TO_MEM) {
4763		bool rdonly_mem = type_is_rdonly_mem(reg->type);
4764
4765		if (type_may_be_null(reg->type)) {
4766			verbose(env, "R%d invalid mem access '%s'\n", regno,
4767				reg_type_str(env, reg->type));
4768			return -EACCES;
4769		}
4770
4771		if (t == BPF_WRITE && rdonly_mem) {
4772			verbose(env, "R%d cannot write into %s\n",
4773				regno, reg_type_str(env, reg->type));
4774			return -EACCES;
4775		}
4776
4777		if (t == BPF_WRITE && value_regno >= 0 &&
4778		    is_pointer_value(env, value_regno)) {
4779			verbose(env, "R%d leaks addr into mem\n", value_regno);
4780			return -EACCES;
4781		}
4782
4783		err = check_mem_region_access(env, regno, off, size,
4784					      reg->mem_size, false);
4785		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
4786			mark_reg_unknown(env, regs, value_regno);
4787	} else if (reg->type == PTR_TO_CTX) {
4788		enum bpf_reg_type reg_type = SCALAR_VALUE;
4789		struct btf *btf = NULL;
4790		u32 btf_id = 0;
4791
4792		if (t == BPF_WRITE && value_regno >= 0 &&
4793		    is_pointer_value(env, value_regno)) {
4794			verbose(env, "R%d leaks addr into ctx\n", value_regno);
4795			return -EACCES;
4796		}
4797
4798		err = check_ptr_off_reg(env, reg, regno);
4799		if (err < 0)
4800			return err;
4801
4802		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
4803				       &btf_id);
4804		if (err)
4805			verbose_linfo(env, insn_idx, "; ");
4806		if (!err && t == BPF_READ && value_regno >= 0) {
4807			/* ctx access returns either a scalar, or a
4808			 * PTR_TO_PACKET[_META,_END]. In the latter
4809			 * case, we know the offset is zero.
4810			 */
4811			if (reg_type == SCALAR_VALUE) {
4812				mark_reg_unknown(env, regs, value_regno);
4813			} else {
4814				mark_reg_known_zero(env, regs,
4815						    value_regno);
4816				if (type_may_be_null(reg_type))
4817					regs[value_regno].id = ++env->id_gen;
4818				/* A load of ctx field could have different
4819				 * actual load size with the one encoded in the
4820				 * insn. When the dst is PTR, it is for sure not
4821				 * a sub-register.
4822				 */
4823				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
4824				if (base_type(reg_type) == PTR_TO_BTF_ID) {
4825					regs[value_regno].btf = btf;
4826					regs[value_regno].btf_id = btf_id;
4827				}
4828			}
4829			regs[value_regno].type = reg_type;
4830		}
4831
4832	} else if (reg->type == PTR_TO_STACK) {
4833		/* Basic bounds checks. */
4834		err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
4835		if (err)
4836			return err;
4837
4838		state = func(env, reg);
4839		err = update_stack_depth(env, state, off);
4840		if (err)
4841			return err;
4842
4843		if (t == BPF_READ)
4844			err = check_stack_read(env, regno, off, size,
4845					       value_regno);
4846		else
4847			err = check_stack_write(env, regno, off, size,
4848						value_regno, insn_idx);
4849	} else if (reg_is_pkt_pointer(reg)) {
4850		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
4851			verbose(env, "cannot write into packet\n");
4852			return -EACCES;
4853		}
4854		if (t == BPF_WRITE && value_regno >= 0 &&
4855		    is_pointer_value(env, value_regno)) {
4856			verbose(env, "R%d leaks addr into packet\n",
4857				value_regno);
4858			return -EACCES;
4859		}
4860		err = check_packet_access(env, regno, off, size, false);
4861		if (!err && t == BPF_READ && value_regno >= 0)
4862			mark_reg_unknown(env, regs, value_regno);
4863	} else if (reg->type == PTR_TO_FLOW_KEYS) {
4864		if (t == BPF_WRITE && value_regno >= 0 &&
4865		    is_pointer_value(env, value_regno)) {
4866			verbose(env, "R%d leaks addr into flow keys\n",
4867				value_regno);
4868			return -EACCES;
4869		}
4870
4871		err = check_flow_keys_access(env, off, size);
4872		if (!err && t == BPF_READ && value_regno >= 0)
4873			mark_reg_unknown(env, regs, value_regno);
4874	} else if (type_is_sk_pointer(reg->type)) {
4875		if (t == BPF_WRITE) {
4876			verbose(env, "R%d cannot write into %s\n",
4877				regno, reg_type_str(env, reg->type));
4878			return -EACCES;
4879		}
4880		err = check_sock_access(env, insn_idx, regno, off, size, t);
4881		if (!err && value_regno >= 0)
4882			mark_reg_unknown(env, regs, value_regno);
4883	} else if (reg->type == PTR_TO_TP_BUFFER) {
4884		err = check_tp_buffer_access(env, reg, regno, off, size);
4885		if (!err && t == BPF_READ && value_regno >= 0)
4886			mark_reg_unknown(env, regs, value_regno);
4887	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
4888		   !type_may_be_null(reg->type)) {
4889		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
4890					      value_regno);
4891	} else if (reg->type == CONST_PTR_TO_MAP) {
4892		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
4893					      value_regno);
4894	} else if (base_type(reg->type) == PTR_TO_BUF) {
4895		bool rdonly_mem = type_is_rdonly_mem(reg->type);
4896		u32 *max_access;
4897
4898		if (rdonly_mem) {
4899			if (t == BPF_WRITE) {
4900				verbose(env, "R%d cannot write into %s\n",
4901					regno, reg_type_str(env, reg->type));
4902				return -EACCES;
4903			}
4904			max_access = &env->prog->aux->max_rdonly_access;
4905		} else {
4906			max_access = &env->prog->aux->max_rdwr_access;
4907		}
4908
4909		err = check_buffer_access(env, reg, regno, off, size, false,
4910					  max_access);
4911
4912		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
4913			mark_reg_unknown(env, regs, value_regno);
4914	} else {
4915		verbose(env, "R%d invalid mem access '%s'\n", regno,
4916			reg_type_str(env, reg->type));
4917		return -EACCES;
4918	}
4919
4920	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
4921	    regs[value_regno].type == SCALAR_VALUE) {
4922		/* b/h/w load zero-extends, mark upper bits as known 0 */
4923		coerce_reg_to_size(&regs[value_regno], size);
4924	}
4925	return err;
4926}
4927
4928static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
4929{
4930	int load_reg;
4931	int err;
4932
4933	switch (insn->imm) {
4934	case BPF_ADD:
4935	case BPF_ADD | BPF_FETCH:
4936	case BPF_AND:
4937	case BPF_AND | BPF_FETCH:
4938	case BPF_OR:
4939	case BPF_OR | BPF_FETCH:
4940	case BPF_XOR:
4941	case BPF_XOR | BPF_FETCH:
4942	case BPF_XCHG:
4943	case BPF_CMPXCHG:
4944		break;
4945	default:
4946		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
4947		return -EINVAL;
4948	}
4949
4950	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
4951		verbose(env, "invalid atomic operand size\n");
4952		return -EINVAL;
4953	}
4954
4955	/* check src1 operand */
4956	err = check_reg_arg(env, insn->src_reg, SRC_OP);
4957	if (err)
4958		return err;
4959
4960	/* check src2 operand */
4961	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4962	if (err)
4963		return err;
4964
4965	if (insn->imm == BPF_CMPXCHG) {
4966		/* Check comparison of R0 with memory location */
4967		const u32 aux_reg = BPF_REG_0;
4968
4969		err = check_reg_arg(env, aux_reg, SRC_OP);
4970		if (err)
4971			return err;
4972
4973		if (is_pointer_value(env, aux_reg)) {
4974			verbose(env, "R%d leaks addr into mem\n", aux_reg);
4975			return -EACCES;
4976		}
4977	}
4978
4979	if (is_pointer_value(env, insn->src_reg)) {
4980		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
4981		return -EACCES;
4982	}
4983
4984	if (is_ctx_reg(env, insn->dst_reg) ||
4985	    is_pkt_reg(env, insn->dst_reg) ||
4986	    is_flow_key_reg(env, insn->dst_reg) ||
4987	    is_sk_reg(env, insn->dst_reg)) {
4988		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
4989			insn->dst_reg,
4990			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
4991		return -EACCES;
4992	}
4993
4994	if (insn->imm & BPF_FETCH) {
4995		if (insn->imm == BPF_CMPXCHG)
4996			load_reg = BPF_REG_0;
4997		else
4998			load_reg = insn->src_reg;
4999
5000		/* check and record load of old value */
5001		err = check_reg_arg(env, load_reg, DST_OP);
5002		if (err)
5003			return err;
5004	} else {
5005		/* This instruction accesses a memory location but doesn't
5006		 * actually load it into a register.
5007		 */
5008		load_reg = -1;
5009	}
5010
5011	/* Check whether we can read the memory, with second call for fetch
5012	 * case to simulate the register fill.
5013	 */
5014	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5015			       BPF_SIZE(insn->code), BPF_READ, -1, true);
5016	if (!err && load_reg >= 0)
5017		err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5018				       BPF_SIZE(insn->code), BPF_READ, load_reg,
5019				       true);
5020	if (err)
5021		return err;
5022
5023	/* Check whether we can write into the same memory. */
5024	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5025			       BPF_SIZE(insn->code), BPF_WRITE, -1, true);
5026	if (err)
5027		return err;
5028
5029	return 0;
5030}
5031
5032/* When register 'regno' is used to read the stack (either directly or through
5033 * a helper function) make sure that it's within stack boundary and, depending
5034 * on the access type, that all elements of the stack are initialized.
5035 *
5036 * 'off' includes 'regno->off', but not its dynamic part (if any).
5037 *
5038 * All registers that have been spilled on the stack in the slots within the
5039 * read offsets are marked as read.
5040 */
5041static int check_stack_range_initialized(
5042		struct bpf_verifier_env *env, int regno, int off,
5043		int access_size, bool zero_size_allowed,
5044		enum bpf_access_src type, struct bpf_call_arg_meta *meta)
5045{
5046	struct bpf_reg_state *reg = reg_state(env, regno);
5047	struct bpf_func_state *state = func(env, reg);
5048	int err, min_off, max_off, i, j, slot, spi;
5049	char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
5050	enum bpf_access_type bounds_check_type;
5051	/* Some accesses can write anything into the stack, others are
5052	 * read-only.
5053	 */
5054	bool clobber = false;
5055
5056	if (access_size == 0 && !zero_size_allowed) {
5057		verbose(env, "invalid zero-sized read\n");
5058		return -EACCES;
5059	}
5060
5061	if (type == ACCESS_HELPER) {
5062		/* The bounds checks for writes are more permissive than for
5063		 * reads. However, if raw_mode is not set, we'll do extra
5064		 * checks below.
5065		 */
5066		bounds_check_type = BPF_WRITE;
5067		clobber = true;
5068	} else {
5069		bounds_check_type = BPF_READ;
5070	}
5071	err = check_stack_access_within_bounds(env, regno, off, access_size,
5072					       type, bounds_check_type);
5073	if (err)
5074		return err;
5075
5076
5077	if (tnum_is_const(reg->var_off)) {
5078		min_off = max_off = reg->var_off.value + off;
5079	} else {
5080		/* Variable offset is prohibited for unprivileged mode for
5081		 * simplicity since it requires corresponding support in
5082		 * Spectre masking for stack ALU.
5083		 * See also retrieve_ptr_limit().
5084		 */
5085		if (!env->bypass_spec_v1) {
5086			char tn_buf[48];
5087
5088			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5089			verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
5090				regno, err_extra, tn_buf);
5091			return -EACCES;
5092		}
5093		/* Only initialized buffer on stack is allowed to be accessed
5094		 * with variable offset. With uninitialized buffer it's hard to
5095		 * guarantee that whole memory is marked as initialized on
5096		 * helper return since specific bounds are unknown what may
5097		 * cause uninitialized stack leaking.
5098		 */
5099		if (meta && meta->raw_mode)
5100			meta = NULL;
5101
5102		min_off = reg->smin_value + off;
5103		max_off = reg->smax_value + off;
5104	}
5105
5106	if (meta && meta->raw_mode) {
5107		meta->access_size = access_size;
5108		meta->regno = regno;
5109		return 0;
5110	}
5111
5112	for (i = min_off; i < max_off + access_size; i++) {
5113		u8 *stype;
5114
5115		slot = -i - 1;
5116		spi = slot / BPF_REG_SIZE;
5117		if (state->allocated_stack <= slot)
5118			goto err;
5119		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
5120		if (*stype == STACK_MISC)
5121			goto mark;
5122		if (*stype == STACK_ZERO) {
5123			if (clobber) {
5124				/* helper can write anything into the stack */
5125				*stype = STACK_MISC;
5126			}
5127			goto mark;
5128		}
5129
5130		if (is_spilled_reg(&state->stack[spi]) &&
5131		    base_type(state->stack[spi].spilled_ptr.type) == PTR_TO_BTF_ID)
5132			goto mark;
5133
5134		if (is_spilled_reg(&state->stack[spi]) &&
5135		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
5136		     env->allow_ptr_leaks)) {
5137			if (clobber) {
5138				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
5139				for (j = 0; j < BPF_REG_SIZE; j++)
5140					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
5141			}
5142			goto mark;
5143		}
5144
5145err:
5146		if (tnum_is_const(reg->var_off)) {
5147			verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
5148				err_extra, regno, min_off, i - min_off, access_size);
5149		} else {
5150			char tn_buf[48];
5151
5152			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5153			verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
5154				err_extra, regno, tn_buf, i - min_off, access_size);
5155		}
5156		return -EACCES;
5157mark:
5158		/* reading any byte out of 8-byte 'spill_slot' will cause
5159		 * the whole slot to be marked as 'read'
5160		 */
5161		mark_reg_read(env, &state->stack[spi].spilled_ptr,
5162			      state->stack[spi].spilled_ptr.parent,
5163			      REG_LIVE_READ64);
5164	}
5165	return update_stack_depth(env, state, min_off);
5166}
5167
5168static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
5169				   int access_size, bool zero_size_allowed,
5170				   struct bpf_call_arg_meta *meta)
5171{
5172	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5173	u32 *max_access;
5174
5175	switch (base_type(reg->type)) {
5176	case PTR_TO_PACKET:
5177	case PTR_TO_PACKET_META:
5178		return check_packet_access(env, regno, reg->off, access_size,
5179					   zero_size_allowed);
5180	case PTR_TO_MAP_KEY:
5181		if (meta && meta->raw_mode) {
5182			verbose(env, "R%d cannot write into %s\n", regno,
5183				reg_type_str(env, reg->type));
5184			return -EACCES;
5185		}
5186		return check_mem_region_access(env, regno, reg->off, access_size,
5187					       reg->map_ptr->key_size, false);
5188	case PTR_TO_MAP_VALUE:
5189		if (check_map_access_type(env, regno, reg->off, access_size,
5190					  meta && meta->raw_mode ? BPF_WRITE :
5191					  BPF_READ))
5192			return -EACCES;
5193		return check_map_access(env, regno, reg->off, access_size,
5194					zero_size_allowed, ACCESS_HELPER);
5195	case PTR_TO_MEM:
5196		if (type_is_rdonly_mem(reg->type)) {
5197			if (meta && meta->raw_mode) {
5198				verbose(env, "R%d cannot write into %s\n", regno,
5199					reg_type_str(env, reg->type));
5200				return -EACCES;
5201			}
5202		}
5203		return check_mem_region_access(env, regno, reg->off,
5204					       access_size, reg->mem_size,
5205					       zero_size_allowed);
5206	case PTR_TO_BUF:
5207		if (type_is_rdonly_mem(reg->type)) {
5208			if (meta && meta->raw_mode) {
5209				verbose(env, "R%d cannot write into %s\n", regno,
5210					reg_type_str(env, reg->type));
5211				return -EACCES;
5212			}
5213
5214			max_access = &env->prog->aux->max_rdonly_access;
5215		} else {
5216			max_access = &env->prog->aux->max_rdwr_access;
5217		}
5218		return check_buffer_access(env, reg, regno, reg->off,
5219					   access_size, zero_size_allowed,
5220					   max_access);
5221	case PTR_TO_STACK:
5222		return check_stack_range_initialized(
5223				env,
5224				regno, reg->off, access_size,
5225				zero_size_allowed, ACCESS_HELPER, meta);
5226	default: /* scalar_value or invalid ptr */
5227		/* Allow zero-byte read from NULL, regardless of pointer type */
5228		if (zero_size_allowed && access_size == 0 &&
5229		    register_is_null(reg))
5230			return 0;
5231
5232		verbose(env, "R%d type=%s ", regno,
5233			reg_type_str(env, reg->type));
5234		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
5235		return -EACCES;
5236	}
5237}
5238
5239static int check_mem_size_reg(struct bpf_verifier_env *env,
5240			      struct bpf_reg_state *reg, u32 regno,
5241			      bool zero_size_allowed,
5242			      struct bpf_call_arg_meta *meta)
5243{
5244	int err;
5245
5246	/* This is used to refine r0 return value bounds for helpers
5247	 * that enforce this value as an upper bound on return values.
5248	 * See do_refine_retval_range() for helpers that can refine
5249	 * the return value. C type of helper is u32 so we pull register
5250	 * bound from umax_value however, if negative verifier errors
5251	 * out. Only upper bounds can be learned because retval is an
5252	 * int type and negative retvals are allowed.
5253	 */
5254	meta->msize_max_value = reg->umax_value;
5255
5256	/* The register is SCALAR_VALUE; the access check
5257	 * happens using its boundaries.
5258	 */
5259	if (!tnum_is_const(reg->var_off))
5260		/* For unprivileged variable accesses, disable raw
5261		 * mode so that the program is required to
5262		 * initialize all the memory that the helper could
5263		 * just partially fill up.
5264		 */
5265		meta = NULL;
5266
5267	if (reg->smin_value < 0) {
5268		verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
5269			regno);
5270		return -EACCES;
5271	}
5272
5273	if (reg->umin_value == 0) {
5274		err = check_helper_mem_access(env, regno - 1, 0,
5275					      zero_size_allowed,
5276					      meta);
5277		if (err)
5278			return err;
5279	}
5280
5281	if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
5282		verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
5283			regno);
5284		return -EACCES;
5285	}
5286	err = check_helper_mem_access(env, regno - 1,
5287				      reg->umax_value,
5288				      zero_size_allowed, meta);
5289	if (!err)
5290		err = mark_chain_precision(env, regno);
5291	return err;
5292}
5293
5294int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
5295		   u32 regno, u32 mem_size)
5296{
5297	bool may_be_null = type_may_be_null(reg->type);
5298	struct bpf_reg_state saved_reg;
5299	struct bpf_call_arg_meta meta;
5300	int err;
5301
5302	if (register_is_null(reg))
5303		return 0;
5304
5305	memset(&meta, 0, sizeof(meta));
5306	/* Assuming that the register contains a value check if the memory
5307	 * access is safe. Temporarily save and restore the register's state as
5308	 * the conversion shouldn't be visible to a caller.
5309	 */
5310	if (may_be_null) {
5311		saved_reg = *reg;
5312		mark_ptr_not_null_reg(reg);
5313	}
5314
5315	err = check_helper_mem_access(env, regno, mem_size, true, &meta);
5316	/* Check access for BPF_WRITE */
5317	meta.raw_mode = true;
5318	err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
5319
5320	if (may_be_null)
5321		*reg = saved_reg;
5322
5323	return err;
5324}
5325
5326int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
5327			     u32 regno)
5328{
5329	struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
5330	bool may_be_null = type_may_be_null(mem_reg->type);
5331	struct bpf_reg_state saved_reg;
5332	struct bpf_call_arg_meta meta;
5333	int err;
5334
5335	WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
5336
5337	memset(&meta, 0, sizeof(meta));
5338
5339	if (may_be_null) {
5340		saved_reg = *mem_reg;
5341		mark_ptr_not_null_reg(mem_reg);
5342	}
5343
5344	err = check_mem_size_reg(env, reg, regno, true, &meta);
5345	/* Check access for BPF_WRITE */
5346	meta.raw_mode = true;
5347	err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
5348
5349	if (may_be_null)
5350		*mem_reg = saved_reg;
5351	return err;
5352}
5353
5354/* Implementation details:
5355 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
5356 * Two bpf_map_lookups (even with the same key) will have different reg->id.
5357 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
5358 * value_or_null->value transition, since the verifier only cares about
5359 * the range of access to valid map value pointer and doesn't care about actual
5360 * address of the map element.
5361 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
5362 * reg->id > 0 after value_or_null->value transition. By doing so
5363 * two bpf_map_lookups will be considered two different pointers that
5364 * point to different bpf_spin_locks.
5365 * The verifier allows taking only one bpf_spin_lock at a time to avoid
5366 * dead-locks.
5367 * Since only one bpf_spin_lock is allowed the checks are simpler than
5368 * reg_is_refcounted() logic. The verifier needs to remember only
5369 * one spin_lock instead of array of acquired_refs.
5370 * cur_state->active_spin_lock remembers which map value element got locked
5371 * and clears it after bpf_spin_unlock.
5372 */
5373static int process_spin_lock(struct bpf_verifier_env *env, int regno,
5374			     bool is_lock)
5375{
5376	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5377	struct bpf_verifier_state *cur = env->cur_state;
5378	bool is_const = tnum_is_const(reg->var_off);
5379	struct bpf_map *map = reg->map_ptr;
5380	u64 val = reg->var_off.value;
5381
5382	if (!is_const) {
5383		verbose(env,
5384			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
5385			regno);
5386		return -EINVAL;
5387	}
5388	if (!map->btf) {
5389		verbose(env,
5390			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
5391			map->name);
5392		return -EINVAL;
5393	}
5394	if (!map_value_has_spin_lock(map)) {
5395		if (map->spin_lock_off == -E2BIG)
5396			verbose(env,
5397				"map '%s' has more than one 'struct bpf_spin_lock'\n",
5398				map->name);
5399		else if (map->spin_lock_off == -ENOENT)
5400			verbose(env,
5401				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
5402				map->name);
5403		else
5404			verbose(env,
5405				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
5406				map->name);
5407		return -EINVAL;
5408	}
5409	if (map->spin_lock_off != val + reg->off) {
5410		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
5411			val + reg->off);
5412		return -EINVAL;
5413	}
5414	if (is_lock) {
5415		if (cur->active_spin_lock) {
5416			verbose(env,
5417				"Locking two bpf_spin_locks are not allowed\n");
5418			return -EINVAL;
5419		}
5420		cur->active_spin_lock = reg->id;
5421	} else {
5422		if (!cur->active_spin_lock) {
5423			verbose(env, "bpf_spin_unlock without taking a lock\n");
5424			return -EINVAL;
5425		}
5426		if (cur->active_spin_lock != reg->id) {
5427			verbose(env, "bpf_spin_unlock of different lock\n");
5428			return -EINVAL;
5429		}
5430		cur->active_spin_lock = 0;
5431	}
5432	return 0;
5433}
5434
5435static int process_timer_func(struct bpf_verifier_env *env, int regno,
5436			      struct bpf_call_arg_meta *meta)
5437{
5438	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5439	bool is_const = tnum_is_const(reg->var_off);
5440	struct bpf_map *map = reg->map_ptr;
5441	u64 val = reg->var_off.value;
5442
5443	if (!is_const) {
5444		verbose(env,
5445			"R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
5446			regno);
5447		return -EINVAL;
5448	}
5449	if (!map->btf) {
5450		verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
5451			map->name);
5452		return -EINVAL;
5453	}
5454	if (!map_value_has_timer(map)) {
5455		if (map->timer_off == -E2BIG)
5456			verbose(env,
5457				"map '%s' has more than one 'struct bpf_timer'\n",
5458				map->name);
5459		else if (map->timer_off == -ENOENT)
5460			verbose(env,
5461				"map '%s' doesn't have 'struct bpf_timer'\n",
5462				map->name);
5463		else
5464			verbose(env,
5465				"map '%s' is not a struct type or bpf_timer is mangled\n",
5466				map->name);
5467		return -EINVAL;
5468	}
5469	if (map->timer_off != val + reg->off) {
5470		verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
5471			val + reg->off, map->timer_off);
5472		return -EINVAL;
5473	}
5474	if (meta->map_ptr) {
5475		verbose(env, "verifier bug. Two map pointers in a timer helper\n");
5476		return -EFAULT;
5477	}
5478	meta->map_uid = reg->map_uid;
5479	meta->map_ptr = map;
5480	return 0;
5481}
5482
5483static int process_kptr_func(struct bpf_verifier_env *env, int regno,
5484			     struct bpf_call_arg_meta *meta)
5485{
5486	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5487	struct bpf_map_value_off_desc *off_desc;
5488	struct bpf_map *map_ptr = reg->map_ptr;
5489	u32 kptr_off;
5490	int ret;
5491
5492	if (!tnum_is_const(reg->var_off)) {
5493		verbose(env,
5494			"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
5495			regno);
5496		return -EINVAL;
5497	}
5498	if (!map_ptr->btf) {
5499		verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
5500			map_ptr->name);
5501		return -EINVAL;
5502	}
5503	if (!map_value_has_kptrs(map_ptr)) {
5504		ret = PTR_ERR_OR_ZERO(map_ptr->kptr_off_tab);
5505		if (ret == -E2BIG)
5506			verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name,
5507				BPF_MAP_VALUE_OFF_MAX);
5508		else if (ret == -EEXIST)
5509			verbose(env, "map '%s' has repeating kptr BTF tags\n", map_ptr->name);
5510		else
5511			verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
5512		return -EINVAL;
5513	}
5514
5515	meta->map_ptr = map_ptr;
5516	kptr_off = reg->off + reg->var_off.value;
5517	off_desc = bpf_map_kptr_off_contains(map_ptr, kptr_off);
5518	if (!off_desc) {
5519		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
5520		return -EACCES;
5521	}
5522	if (off_desc->type != BPF_KPTR_REF) {
5523		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
5524		return -EACCES;
5525	}
5526	meta->kptr_off_desc = off_desc;
5527	return 0;
5528}
5529
5530static bool arg_type_is_mem_size(enum bpf_arg_type type)
5531{
5532	return type == ARG_CONST_SIZE ||
5533	       type == ARG_CONST_SIZE_OR_ZERO;
5534}
5535
5536static bool arg_type_is_release(enum bpf_arg_type type)
5537{
5538	return type & OBJ_RELEASE;
5539}
5540
5541static bool arg_type_is_dynptr(enum bpf_arg_type type)
5542{
5543	return base_type(type) == ARG_PTR_TO_DYNPTR;
5544}
5545
5546static int int_ptr_type_to_size(enum bpf_arg_type type)
5547{
5548	if (type == ARG_PTR_TO_INT)
5549		return sizeof(u32);
5550	else if (type == ARG_PTR_TO_LONG)
5551		return sizeof(u64);
5552
5553	return -EINVAL;
5554}
5555
5556static int resolve_map_arg_type(struct bpf_verifier_env *env,
5557				 const struct bpf_call_arg_meta *meta,
5558				 enum bpf_arg_type *arg_type)
5559{
5560	if (!meta->map_ptr) {
5561		/* kernel subsystem misconfigured verifier */
5562		verbose(env, "invalid map_ptr to access map->type\n");
5563		return -EACCES;
5564	}
5565
5566	switch (meta->map_ptr->map_type) {
5567	case BPF_MAP_TYPE_SOCKMAP:
5568	case BPF_MAP_TYPE_SOCKHASH:
5569		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
5570			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
5571		} else {
5572			verbose(env, "invalid arg_type for sockmap/sockhash\n");
5573			return -EINVAL;
5574		}
5575		break;
5576	case BPF_MAP_TYPE_BLOOM_FILTER:
5577		if (meta->func_id == BPF_FUNC_map_peek_elem)
5578			*arg_type = ARG_PTR_TO_MAP_VALUE;
5579		break;
5580	default:
5581		break;
5582	}
5583	return 0;
5584}
5585
5586struct bpf_reg_types {
5587	const enum bpf_reg_type types[10];
5588	u32 *btf_id;
5589};
5590
5591static const struct bpf_reg_types map_key_value_types = {
5592	.types = {
5593		PTR_TO_STACK,
5594		PTR_TO_PACKET,
5595		PTR_TO_PACKET_META,
5596		PTR_TO_MAP_KEY,
5597		PTR_TO_MAP_VALUE,
5598	},
5599};
5600
5601static const struct bpf_reg_types sock_types = {
5602	.types = {
5603		PTR_TO_SOCK_COMMON,
5604		PTR_TO_SOCKET,
5605		PTR_TO_TCP_SOCK,
5606		PTR_TO_XDP_SOCK,
5607	},
5608};
5609
5610#ifdef CONFIG_NET
5611static const struct bpf_reg_types btf_id_sock_common_types = {
5612	.types = {
5613		PTR_TO_SOCK_COMMON,
5614		PTR_TO_SOCKET,
5615		PTR_TO_TCP_SOCK,
5616		PTR_TO_XDP_SOCK,
5617		PTR_TO_BTF_ID,
5618	},
5619	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5620};
5621#endif
5622
5623static const struct bpf_reg_types mem_types = {
5624	.types = {
5625		PTR_TO_STACK,
5626		PTR_TO_PACKET,
5627		PTR_TO_PACKET_META,
5628		PTR_TO_MAP_KEY,
5629		PTR_TO_MAP_VALUE,
5630		PTR_TO_MEM,
5631		PTR_TO_MEM | MEM_ALLOC,
5632		PTR_TO_BUF,
5633	},
5634};
5635
5636static const struct bpf_reg_types int_ptr_types = {
5637	.types = {
5638		PTR_TO_STACK,
5639		PTR_TO_PACKET,
5640		PTR_TO_PACKET_META,
5641		PTR_TO_MAP_KEY,
5642		PTR_TO_MAP_VALUE,
5643	},
5644};
5645
5646static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
5647static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
5648static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
5649static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
5650static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
5651static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
5652static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
5653static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
5654static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
5655static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
5656static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
5657static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
5658static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
5659
5660static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
5661	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
5662	[ARG_PTR_TO_MAP_VALUE]		= &map_key_value_types,
5663	[ARG_CONST_SIZE]		= &scalar_types,
5664	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
5665	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
5666	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
5667	[ARG_PTR_TO_CTX]		= &context_types,
5668	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
5669#ifdef CONFIG_NET
5670	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
5671#endif
5672	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
5673	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
5674	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
5675	[ARG_PTR_TO_MEM]		= &mem_types,
5676	[ARG_PTR_TO_ALLOC_MEM]		= &alloc_mem_types,
5677	[ARG_PTR_TO_INT]		= &int_ptr_types,
5678	[ARG_PTR_TO_LONG]		= &int_ptr_types,
5679	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
5680	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
5681	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
5682	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
5683	[ARG_PTR_TO_TIMER]		= &timer_types,
5684	[ARG_PTR_TO_KPTR]		= &kptr_types,
5685	[ARG_PTR_TO_DYNPTR]		= &stack_ptr_types,
5686};
5687
5688static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
5689			  enum bpf_arg_type arg_type,
5690			  const u32 *arg_btf_id,
5691			  struct bpf_call_arg_meta *meta)
5692{
5693	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5694	enum bpf_reg_type expected, type = reg->type;
5695	const struct bpf_reg_types *compatible;
5696	int i, j;
5697
5698	compatible = compatible_reg_types[base_type(arg_type)];
5699	if (!compatible) {
5700		verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
5701		return -EFAULT;
5702	}
5703
5704	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
5705	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
5706	 *
5707	 * Same for MAYBE_NULL:
5708	 *
5709	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
5710	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
5711	 *
5712	 * Therefore we fold these flags depending on the arg_type before comparison.
5713	 */
5714	if (arg_type & MEM_RDONLY)
5715		type &= ~MEM_RDONLY;
5716	if (arg_type & PTR_MAYBE_NULL)
5717		type &= ~PTR_MAYBE_NULL;
5718
5719	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
5720		expected = compatible->types[i];
5721		if (expected == NOT_INIT)
5722			break;
5723
5724		if (type == expected)
5725			goto found;
5726	}
5727
5728	verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
5729	for (j = 0; j + 1 < i; j++)
5730		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
5731	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
5732	return -EACCES;
5733
5734found:
5735	if (reg->type == PTR_TO_BTF_ID) {
5736		/* For bpf_sk_release, it needs to match against first member
5737		 * 'struct sock_common', hence make an exception for it. This
5738		 * allows bpf_sk_release to work for multiple socket types.
5739		 */
5740		bool strict_type_match = arg_type_is_release(arg_type) &&
5741					 meta->func_id != BPF_FUNC_sk_release;
5742
5743		if (!arg_btf_id) {
5744			if (!compatible->btf_id) {
5745				verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
5746				return -EFAULT;
5747			}
5748			arg_btf_id = compatible->btf_id;
5749		}
5750
5751		if (meta->func_id == BPF_FUNC_kptr_xchg) {
5752			if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno))
5753				return -EACCES;
5754		} else if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5755						 btf_vmlinux, *arg_btf_id,
5756						 strict_type_match)) {
5757			verbose(env, "R%d is of type %s but %s is expected\n",
5758				regno, kernel_type_name(reg->btf, reg->btf_id),
5759				kernel_type_name(btf_vmlinux, *arg_btf_id));
5760			return -EACCES;
5761		}
5762	}
5763
5764	return 0;
5765}
5766
5767int check_func_arg_reg_off(struct bpf_verifier_env *env,
5768			   const struct bpf_reg_state *reg, int regno,
5769			   enum bpf_arg_type arg_type)
5770{
5771	enum bpf_reg_type type = reg->type;
5772	bool fixed_off_ok = false;
5773
5774	switch ((u32)type) {
5775	/* Pointer types where reg offset is explicitly allowed: */
5776	case PTR_TO_STACK:
5777		if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) {
5778			verbose(env, "cannot pass in dynptr at an offset\n");
5779			return -EINVAL;
5780		}
5781		fallthrough;
5782	case PTR_TO_PACKET:
5783	case PTR_TO_PACKET_META:
5784	case PTR_TO_MAP_KEY:
5785	case PTR_TO_MAP_VALUE:
5786	case PTR_TO_MEM:
5787	case PTR_TO_MEM | MEM_RDONLY:
5788	case PTR_TO_MEM | MEM_ALLOC:
5789	case PTR_TO_BUF:
5790	case PTR_TO_BUF | MEM_RDONLY:
5791	case SCALAR_VALUE:
5792		/* Some of the argument types nevertheless require a
5793		 * zero register offset.
5794		 */
5795		if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM)
5796			return 0;
5797		break;
5798	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
5799	 * fixed offset.
5800	 */
5801	case PTR_TO_BTF_ID:
5802		/* When referenced PTR_TO_BTF_ID is passed to release function,
5803		 * it's fixed offset must be 0.	In the other cases, fixed offset
5804		 * can be non-zero.
5805		 */
5806		if (arg_type_is_release(arg_type) && reg->off) {
5807			verbose(env, "R%d must have zero offset when passed to release func\n",
5808				regno);
5809			return -EINVAL;
5810		}
5811		/* For arg is release pointer, fixed_off_ok must be false, but
5812		 * we already checked and rejected reg->off != 0 above, so set
5813		 * to true to allow fixed offset for all other cases.
5814		 */
5815		fixed_off_ok = true;
5816		break;
5817	default:
5818		break;
5819	}
5820	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
5821}
5822
5823static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
5824{
5825	struct