1// SPDX-License-Identifier: GPL-2.0
2/*
3 * queue_stack_maps.c: BPF queue and stack maps
4 *
5 * Copyright (c) 2018 Politecnico di Torino
6 */
7#include <linux/bpf.h>
8#include <linux/list.h>
9#include <linux/slab.h>
10#include <linux/btf_ids.h>
11#include "percpu_freelist.h"
12
13#define QUEUE_STACK_CREATE_FLAG_MASK \
14	(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
15
16struct bpf_queue_stack {
17	struct bpf_map map;
18	raw_spinlock_t lock;
19	u32 head, tail;
20	u32 size; /* max_entries + 1 */
21
22	char elements[] __aligned(8);
23};
24
25static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
26{
27	return container_of(map, struct bpf_queue_stack, map);
28}
29
30static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
31{
32	return qs->head == qs->tail;
33}
34
35static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
36{
37	u32 head = qs->head + 1;
38
39	if (unlikely(head >= qs->size))
40		head = 0;
41
42	return head == qs->tail;
43}
44
45/* Called from syscall */
46static int queue_stack_map_alloc_check(union bpf_attr *attr)
47{
48	/* check sanity of attributes */
49	if (attr->max_entries == 0 || attr->key_size != 0 ||
50	    attr->value_size == 0 ||
51	    attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK ||
52	    !bpf_map_flags_access_ok(attr->map_flags))
53		return -EINVAL;
54
55	if (attr->value_size > KMALLOC_MAX_SIZE)
56		/* if value_size is bigger, the user space won't be able to
57		 * access the elements.
58		 */
59		return -E2BIG;
60
61	return 0;
62}
63
64static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
65{
66	int numa_node = bpf_map_attr_numa_node(attr);
67	struct bpf_queue_stack *qs;
68	u64 size, queue_size;
69
70	size = (u64) attr->max_entries + 1;
71	queue_size = sizeof(*qs) + size * attr->value_size;
72
73	qs = bpf_map_area_alloc(queue_size, numa_node);
74	if (!qs)
75		return ERR_PTR(-ENOMEM);
76
77	bpf_map_init_from_attr(&qs->map, attr);
78
79	qs->size = size;
80
81	raw_spin_lock_init(&qs->lock);
82
83	return &qs->map;
84}
85
86/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
87static void queue_stack_map_free(struct bpf_map *map)
88{
89	struct bpf_queue_stack *qs = bpf_queue_stack(map);
90
91	bpf_map_area_free(qs);
92}
93
94static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
95{
96	struct bpf_queue_stack *qs = bpf_queue_stack(map);
97	unsigned long flags;
98	int err = 0;
99	void *ptr;
100
101	if (in_nmi()) {
102		if (!raw_spin_trylock_irqsave(&qs->lock, flags))
103			return -EBUSY;
104	} else {
105		raw_spin_lock_irqsave(&qs->lock, flags);
106	}
107
108	if (queue_stack_map_is_empty(qs)) {
109		memset(value, 0, qs->map.value_size);
110		err = -ENOENT;
111		goto out;
112	}
113
114	ptr = &qs->elements[qs->tail * qs->map.value_size];
115	memcpy(value, ptr, qs->map.value_size);
116
117	if (delete) {
118		if (unlikely(++qs->tail >= qs->size))
119			qs->tail = 0;
120	}
121
122out:
123	raw_spin_unlock_irqrestore(&qs->lock, flags);
124	return err;
125}
126
127
128static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
129{
130	struct bpf_queue_stack *qs = bpf_queue_stack(map);
131	unsigned long flags;
132	int err = 0;
133	void *ptr;
134	u32 index;
135
136	if (in_nmi()) {
137		if (!raw_spin_trylock_irqsave(&qs->lock, flags))
138			return -EBUSY;
139	} else {
140		raw_spin_lock_irqsave(&qs->lock, flags);
141	}
142
143	if (queue_stack_map_is_empty(qs)) {
144		memset(value, 0, qs->map.value_size);
145		err = -ENOENT;
146		goto out;
147	}
148
149	index = qs->head - 1;
150	if (unlikely(index >= qs->size))
151		index = qs->size - 1;
152
153	ptr = &qs->elements[index * qs->map.value_size];
154	memcpy(value, ptr, qs->map.value_size);
155
156	if (delete)
157		qs->head = index;
158
159out:
160	raw_spin_unlock_irqrestore(&qs->lock, flags);
161	return err;
162}
163
164/* Called from syscall or from eBPF program */
165static long queue_map_peek_elem(struct bpf_map *map, void *value)
166{
167	return __queue_map_get(map, value, false);
168}
169
170/* Called from syscall or from eBPF program */
171static long stack_map_peek_elem(struct bpf_map *map, void *value)
172{
173	return __stack_map_get(map, value, false);
174}
175
176/* Called from syscall or from eBPF program */
177static long queue_map_pop_elem(struct bpf_map *map, void *value)
178{
179	return __queue_map_get(map, value, true);
180}
181
182/* Called from syscall or from eBPF program */
183static long stack_map_pop_elem(struct bpf_map *map, void *value)
184{
185	return __stack_map_get(map, value, true);
186}
187
188/* Called from syscall or from eBPF program */
189static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
190				      u64 flags)
191{
192	struct bpf_queue_stack *qs = bpf_queue_stack(map);
193	unsigned long irq_flags;
194	int err = 0;
195	void *dst;
196
197	/* BPF_EXIST is used to force making room for a new element in case the
198	 * map is full
199	 */
200	bool replace = (flags & BPF_EXIST);
201
202	/* Check supported flags for queue and stack maps */
203	if (flags & BPF_NOEXIST || flags > BPF_EXIST)
204		return -EINVAL;
205
206	if (in_nmi()) {
207		if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags))
208			return -EBUSY;
209	} else {
210		raw_spin_lock_irqsave(&qs->lock, irq_flags);
211	}
212
213	if (queue_stack_map_is_full(qs)) {
214		if (!replace) {
215			err = -E2BIG;
216			goto out;
217		}
218		/* advance tail pointer to overwrite oldest element */
219		if (unlikely(++qs->tail >= qs->size))
220			qs->tail = 0;
221	}
222
223	dst = &qs->elements[qs->head * qs->map.value_size];
224	memcpy(dst, value, qs->map.value_size);
225
226	if (unlikely(++qs->head >= qs->size))
227		qs->head = 0;
228
229out:
230	raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
231	return err;
232}
233
234/* Called from syscall or from eBPF program */
235static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
236{
237	return NULL;
238}
239
240/* Called from syscall or from eBPF program */
241static long queue_stack_map_update_elem(struct bpf_map *map, void *key,
242					void *value, u64 flags)
243{
244	return -EINVAL;
245}
246
247/* Called from syscall or from eBPF program */
248static long queue_stack_map_delete_elem(struct bpf_map *map, void *key)
249{
250	return -EINVAL;
251}
252
253/* Called from syscall */
254static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
255					void *next_key)
256{
257	return -EINVAL;
258}
259
260static u64 queue_stack_map_mem_usage(const struct bpf_map *map)
261{
262	u64 usage = sizeof(struct bpf_queue_stack);
263
264	usage += ((u64)map->max_entries + 1) * map->value_size;
265	return usage;
266}
267
268BTF_ID_LIST_SINGLE(queue_map_btf_ids, struct, bpf_queue_stack)
269const struct bpf_map_ops queue_map_ops = {
270	.map_meta_equal = bpf_map_meta_equal,
271	.map_alloc_check = queue_stack_map_alloc_check,
272	.map_alloc = queue_stack_map_alloc,
273	.map_free = queue_stack_map_free,
274	.map_lookup_elem = queue_stack_map_lookup_elem,
275	.map_update_elem = queue_stack_map_update_elem,
276	.map_delete_elem = queue_stack_map_delete_elem,
277	.map_push_elem = queue_stack_map_push_elem,
278	.map_pop_elem = queue_map_pop_elem,
279	.map_peek_elem = queue_map_peek_elem,
280	.map_get_next_key = queue_stack_map_get_next_key,
281	.map_mem_usage = queue_stack_map_mem_usage,
282	.map_btf_id = &queue_map_btf_ids[0],
283};
284
285const struct bpf_map_ops stack_map_ops = {
286	.map_meta_equal = bpf_map_meta_equal,
287	.map_alloc_check = queue_stack_map_alloc_check,
288	.map_alloc = queue_stack_map_alloc,
289	.map_free = queue_stack_map_free,
290	.map_lookup_elem = queue_stack_map_lookup_elem,
291	.map_update_elem = queue_stack_map_update_elem,
292	.map_delete_elem = queue_stack_map_delete_elem,
293	.map_push_elem = queue_stack_map_push_elem,
294	.map_pop_elem = stack_map_pop_elem,
295	.map_peek_elem = stack_map_peek_elem,
296	.map_get_next_key = queue_stack_map_get_next_key,
297	.map_mem_usage = queue_stack_map_mem_usage,
298	.map_btf_id = &queue_map_btf_ids[0],
299};
300