1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2020 Cloudflare
3/*
4 * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
5 * Covers:
6 *  1. BPF map operations - bpf_map_{update,lookup delete}_elem
7 *  2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
8 *  3. BPF reuseport helper - bpf_sk_select_reuseport
9 */
10
11#include <linux/compiler.h>
12#include <errno.h>
13#include <error.h>
14#include <limits.h>
15#include <netinet/in.h>
16#include <pthread.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/select.h>
20#include <unistd.h>
21#include <linux/vm_sockets.h>
22
23#include <bpf/bpf.h>
24#include <bpf/libbpf.h>
25
26#include "bpf_util.h"
27#include "test_progs.h"
28#include "test_sockmap_listen.skel.h"
29
30#include "sockmap_helpers.h"
31
32static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
33				int family, int sotype, int mapfd)
34{
35	u32 key = 0;
36	u64 value;
37	int err;
38
39	value = -1;
40	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
41	if (!err || errno != EINVAL)
42		FAIL_ERRNO("map_update: expected EINVAL");
43
44	value = INT_MAX;
45	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
46	if (!err || errno != EBADF)
47		FAIL_ERRNO("map_update: expected EBADF");
48}
49
50static void test_insert_opened(struct test_sockmap_listen *skel __always_unused,
51			       int family, int sotype, int mapfd)
52{
53	u32 key = 0;
54	u64 value;
55	int err, s;
56
57	s = xsocket(family, sotype, 0);
58	if (s == -1)
59		return;
60
61	errno = 0;
62	value = s;
63	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
64	if (sotype == SOCK_STREAM) {
65		if (!err || errno != EOPNOTSUPP)
66			FAIL_ERRNO("map_update: expected EOPNOTSUPP");
67	} else if (err)
68		FAIL_ERRNO("map_update: expected success");
69	xclose(s);
70}
71
72static void test_insert_bound(struct test_sockmap_listen *skel __always_unused,
73			      int family, int sotype, int mapfd)
74{
75	struct sockaddr_storage addr;
76	socklen_t len = 0;
77	u32 key = 0;
78	u64 value;
79	int err, s;
80
81	init_addr_loopback(family, &addr, &len);
82
83	s = xsocket(family, sotype, 0);
84	if (s == -1)
85		return;
86
87	err = xbind(s, sockaddr(&addr), len);
88	if (err)
89		goto close;
90
91	errno = 0;
92	value = s;
93	err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
94	if (!err || errno != EOPNOTSUPP)
95		FAIL_ERRNO("map_update: expected EOPNOTSUPP");
96close:
97	xclose(s);
98}
99
100static void test_insert(struct test_sockmap_listen *skel __always_unused,
101			int family, int sotype, int mapfd)
102{
103	u64 value;
104	u32 key;
105	int s;
106
107	s = socket_loopback(family, sotype);
108	if (s < 0)
109		return;
110
111	key = 0;
112	value = s;
113	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
114	xclose(s);
115}
116
117static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused,
118				     int family, int sotype, int mapfd)
119{
120	u64 value;
121	u32 key;
122	int s;
123
124	s = socket_loopback(family, sotype);
125	if (s < 0)
126		return;
127
128	key = 0;
129	value = s;
130	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
131	xbpf_map_delete_elem(mapfd, &key);
132	xclose(s);
133}
134
135static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused,
136				    int family, int sotype, int mapfd)
137{
138	int err, s;
139	u64 value;
140	u32 key;
141
142	s = socket_loopback(family, sotype);
143	if (s < 0)
144		return;
145
146	key = 0;
147	value = s;
148	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
149
150	xclose(s);
151
152	errno = 0;
153	err = bpf_map_delete_elem(mapfd, &key);
154	if (!err || (errno != EINVAL && errno != ENOENT))
155		/* SOCKMAP and SOCKHASH return different error codes */
156		FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
157}
158
159static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused,
160				     int family, int sotype, int mapfd)
161{
162	u64 cookie, value;
163	socklen_t len;
164	u32 key;
165	int s;
166
167	s = socket_loopback(family, sotype);
168	if (s < 0)
169		return;
170
171	key = 0;
172	value = s;
173	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
174
175	len = sizeof(cookie);
176	xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
177
178	xbpf_map_lookup_elem(mapfd, &key, &value);
179
180	if (value != cookie) {
181		FAIL("map_lookup: have %#llx, want %#llx",
182		     (unsigned long long)value, (unsigned long long)cookie);
183	}
184
185	xclose(s);
186}
187
188static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused,
189				     int family, int sotype, int mapfd)
190{
191	int err, s;
192	u64 value;
193	u32 key;
194
195	s = socket_loopback(family, sotype);
196	if (s < 0)
197		return;
198
199	key = 0;
200	value = s;
201	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
202	xbpf_map_delete_elem(mapfd, &key);
203
204	errno = 0;
205	err = bpf_map_lookup_elem(mapfd, &key, &value);
206	if (!err || errno != ENOENT)
207		FAIL_ERRNO("map_lookup: expected ENOENT");
208
209	xclose(s);
210}
211
212static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused,
213				     int family, int sotype, int mapfd)
214{
215	u32 key, value32;
216	int err, s;
217
218	s = socket_loopback(family, sotype);
219	if (s < 0)
220		return;
221
222	mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key),
223			       sizeof(value32), 1, NULL);
224	if (mapfd < 0) {
225		FAIL_ERRNO("map_create");
226		goto close;
227	}
228
229	key = 0;
230	value32 = s;
231	xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
232
233	errno = 0;
234	err = bpf_map_lookup_elem(mapfd, &key, &value32);
235	if (!err || errno != ENOSPC)
236		FAIL_ERRNO("map_lookup: expected ENOSPC");
237
238	xclose(mapfd);
239close:
240	xclose(s);
241}
242
243static void test_update_existing(struct test_sockmap_listen *skel __always_unused,
244				 int family, int sotype, int mapfd)
245{
246	int s1, s2;
247	u64 value;
248	u32 key;
249
250	s1 = socket_loopback(family, sotype);
251	if (s1 < 0)
252		return;
253
254	s2 = socket_loopback(family, sotype);
255	if (s2 < 0)
256		goto close_s1;
257
258	key = 0;
259	value = s1;
260	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
261
262	value = s2;
263	xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
264	xclose(s2);
265close_s1:
266	xclose(s1);
267}
268
269/* Exercise the code path where we destroy child sockets that never
270 * got accept()'ed, aka orphans, when parent socket gets closed.
271 */
272static void do_destroy_orphan_child(int family, int sotype, int mapfd)
273{
274	struct sockaddr_storage addr;
275	socklen_t len;
276	int err, s, c;
277	u64 value;
278	u32 key;
279
280	s = socket_loopback(family, sotype);
281	if (s < 0)
282		return;
283
284	len = sizeof(addr);
285	err = xgetsockname(s, sockaddr(&addr), &len);
286	if (err)
287		goto close_srv;
288
289	key = 0;
290	value = s;
291	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
292
293	c = xsocket(family, sotype, 0);
294	if (c == -1)
295		goto close_srv;
296
297	xconnect(c, sockaddr(&addr), len);
298	xclose(c);
299close_srv:
300	xclose(s);
301}
302
303static void test_destroy_orphan_child(struct test_sockmap_listen *skel,
304				      int family, int sotype, int mapfd)
305{
306	int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
307	int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
308	const struct test {
309		int progfd;
310		enum bpf_attach_type atype;
311	} tests[] = {
312		{ -1, -1 },
313		{ msg_verdict, BPF_SK_MSG_VERDICT },
314		{ skb_verdict, BPF_SK_SKB_VERDICT },
315	};
316	const struct test *t;
317
318	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
319		if (t->progfd != -1 &&
320		    xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0)
321			return;
322
323		do_destroy_orphan_child(family, sotype, mapfd);
324
325		if (t->progfd != -1)
326			xbpf_prog_detach2(t->progfd, mapfd, t->atype);
327	}
328}
329
330/* Perform a passive open after removing listening socket from SOCKMAP
331 * to ensure that callbacks get restored properly.
332 */
333static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused,
334				    int family, int sotype, int mapfd)
335{
336	struct sockaddr_storage addr;
337	socklen_t len;
338	int err, s, c;
339	u64 value;
340	u32 key;
341
342	s = socket_loopback(family, sotype);
343	if (s < 0)
344		return;
345
346	len = sizeof(addr);
347	err = xgetsockname(s, sockaddr(&addr), &len);
348	if (err)
349		goto close_srv;
350
351	key = 0;
352	value = s;
353	xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
354	xbpf_map_delete_elem(mapfd, &key);
355
356	c = xsocket(family, sotype, 0);
357	if (c < 0)
358		goto close_srv;
359
360	xconnect(c, sockaddr(&addr), len);
361	xclose(c);
362close_srv:
363	xclose(s);
364}
365
366/* Check that child socket that got created while parent was in a
367 * SOCKMAP, but got accept()'ed only after the parent has been removed
368 * from SOCKMAP, gets cloned without parent psock state or callbacks.
369 */
370static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused,
371				     int family, int sotype, int mapfd)
372{
373	struct sockaddr_storage addr;
374	const u32 zero = 0;
375	int err, s, c, p;
376	socklen_t len;
377	u64 value;
378
379	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
380	if (s == -1)
381		return;
382
383	len = sizeof(addr);
384	err = xgetsockname(s, sockaddr(&addr), &len);
385	if (err)
386		goto close_srv;
387
388	value = s;
389	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
390	if (err)
391		goto close_srv;
392
393	c = xsocket(family, sotype, 0);
394	if (c == -1)
395		goto close_srv;
396
397	/* Create child while parent is in sockmap */
398	err = xconnect(c, sockaddr(&addr), len);
399	if (err)
400		goto close_cli;
401
402	/* Remove parent from sockmap */
403	err = xbpf_map_delete_elem(mapfd, &zero);
404	if (err)
405		goto close_cli;
406
407	p = xaccept_nonblock(s, NULL, NULL);
408	if (p == -1)
409		goto close_cli;
410
411	/* Check that child sk_user_data is not set */
412	value = p;
413	xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
414
415	xclose(p);
416close_cli:
417	xclose(c);
418close_srv:
419	xclose(s);
420}
421
422/* Check that child socket that got created and accepted while parent
423 * was in a SOCKMAP is cloned without parent psock state or callbacks.
424 */
425static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused,
426				      int family, int sotype, int mapfd)
427{
428	struct sockaddr_storage addr;
429	const u32 zero = 0, one = 1;
430	int err, s, c, p;
431	socklen_t len;
432	u64 value;
433
434	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
435	if (s == -1)
436		return;
437
438	len = sizeof(addr);
439	err = xgetsockname(s, sockaddr(&addr), &len);
440	if (err)
441		goto close_srv;
442
443	value = s;
444	err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
445	if (err)
446		goto close_srv;
447
448	c = xsocket(family, sotype, 0);
449	if (c == -1)
450		goto close_srv;
451
452	/* Create & accept child while parent is in sockmap */
453	err = xconnect(c, sockaddr(&addr), len);
454	if (err)
455		goto close_cli;
456
457	p = xaccept_nonblock(s, NULL, NULL);
458	if (p == -1)
459		goto close_cli;
460
461	/* Check that child sk_user_data is not set */
462	value = p;
463	xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
464
465	xclose(p);
466close_cli:
467	xclose(c);
468close_srv:
469	xclose(s);
470}
471
472struct connect_accept_ctx {
473	int sockfd;
474	unsigned int done;
475	unsigned int nr_iter;
476};
477
478static bool is_thread_done(struct connect_accept_ctx *ctx)
479{
480	return READ_ONCE(ctx->done);
481}
482
483static void *connect_accept_thread(void *arg)
484{
485	struct connect_accept_ctx *ctx = arg;
486	struct sockaddr_storage addr;
487	int family, socktype;
488	socklen_t len;
489	int err, i, s;
490
491	s = ctx->sockfd;
492
493	len = sizeof(addr);
494	err = xgetsockname(s, sockaddr(&addr), &len);
495	if (err)
496		goto done;
497
498	len = sizeof(family);
499	err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
500	if (err)
501		goto done;
502
503	len = sizeof(socktype);
504	err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
505	if (err)
506		goto done;
507
508	for (i = 0; i < ctx->nr_iter; i++) {
509		int c, p;
510
511		c = xsocket(family, socktype, 0);
512		if (c < 0)
513			break;
514
515		err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
516		if (err) {
517			xclose(c);
518			break;
519		}
520
521		p = xaccept_nonblock(s, NULL, NULL);
522		if (p < 0) {
523			xclose(c);
524			break;
525		}
526
527		xclose(p);
528		xclose(c);
529	}
530done:
531	WRITE_ONCE(ctx->done, 1);
532	return NULL;
533}
534
535static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused,
536					int family, int sotype, int mapfd)
537{
538	struct connect_accept_ctx ctx = { 0 };
539	struct sockaddr_storage addr;
540	socklen_t len;
541	u32 zero = 0;
542	pthread_t t;
543	int err, s;
544	u64 value;
545
546	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
547	if (s < 0)
548		return;
549
550	len = sizeof(addr);
551	err = xgetsockname(s, sockaddr(&addr), &len);
552	if (err)
553		goto close;
554
555	ctx.sockfd = s;
556	ctx.nr_iter = 1000;
557
558	err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
559	if (err)
560		goto close;
561
562	value = s;
563	while (!is_thread_done(&ctx)) {
564		err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
565		if (err)
566			break;
567
568		err = xbpf_map_delete_elem(mapfd, &zero);
569		if (err)
570			break;
571	}
572
573	xpthread_join(t, NULL);
574close:
575	xclose(s);
576}
577
578static void *listen_thread(void *arg)
579{
580	struct sockaddr unspec = { AF_UNSPEC };
581	struct connect_accept_ctx *ctx = arg;
582	int err, i, s;
583
584	s = ctx->sockfd;
585
586	for (i = 0; i < ctx->nr_iter; i++) {
587		err = xlisten(s, 1);
588		if (err)
589			break;
590		err = xconnect(s, &unspec, sizeof(unspec));
591		if (err)
592			break;
593	}
594
595	WRITE_ONCE(ctx->done, 1);
596	return NULL;
597}
598
599static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused,
600				    int family, int socktype, int mapfd)
601{
602	struct connect_accept_ctx ctx = { 0 };
603	const u32 zero = 0;
604	const int one = 1;
605	pthread_t t;
606	int err, s;
607	u64 value;
608
609	s = xsocket(family, socktype, 0);
610	if (s < 0)
611		return;
612
613	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
614	if (err)
615		goto close;
616
617	ctx.sockfd = s;
618	ctx.nr_iter = 10000;
619
620	err = pthread_create(&t, NULL, listen_thread, &ctx);
621	if (err)
622		goto close;
623
624	value = s;
625	while (!is_thread_done(&ctx)) {
626		err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
627		/* Expecting EOPNOTSUPP before listen() */
628		if (err && errno != EOPNOTSUPP) {
629			FAIL_ERRNO("map_update");
630			break;
631		}
632
633		err = bpf_map_delete_elem(mapfd, &zero);
634		/* Expecting no entry after unhash on connect(AF_UNSPEC) */
635		if (err && errno != EINVAL && errno != ENOENT) {
636			FAIL_ERRNO("map_delete");
637			break;
638		}
639	}
640
641	xpthread_join(t, NULL);
642close:
643	xclose(s);
644}
645
646static void zero_verdict_count(int mapfd)
647{
648	unsigned int zero = 0;
649	int key;
650
651	key = SK_DROP;
652	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
653	key = SK_PASS;
654	xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
655}
656
657enum redir_mode {
658	REDIR_INGRESS,
659	REDIR_EGRESS,
660};
661
662static const char *redir_mode_str(enum redir_mode mode)
663{
664	switch (mode) {
665	case REDIR_INGRESS:
666		return "ingress";
667	case REDIR_EGRESS:
668		return "egress";
669	default:
670		return "unknown";
671	}
672}
673
674static void redir_to_connected(int family, int sotype, int sock_mapfd,
675			       int verd_mapfd, enum redir_mode mode)
676{
677	const char *log_prefix = redir_mode_str(mode);
678	int s, c0, c1, p0, p1;
679	unsigned int pass;
680	int err, n;
681	u32 key;
682	char b;
683
684	zero_verdict_count(verd_mapfd);
685
686	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
687	if (s < 0)
688		return;
689
690	err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
691	if (err)
692		goto close_srv;
693
694	err = add_to_sockmap(sock_mapfd, p0, p1);
695	if (err)
696		goto close;
697
698	n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
699	if (n < 0)
700		FAIL_ERRNO("%s: write", log_prefix);
701	if (n == 0)
702		FAIL("%s: incomplete write", log_prefix);
703	if (n < 1)
704		goto close;
705
706	key = SK_PASS;
707	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
708	if (err)
709		goto close;
710	if (pass != 1)
711		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
712	n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
713	if (n < 0)
714		FAIL_ERRNO("%s: recv_timeout", log_prefix);
715	if (n == 0)
716		FAIL("%s: incomplete recv", log_prefix);
717
718close:
719	xclose(p1);
720	xclose(c1);
721	xclose(p0);
722	xclose(c0);
723close_srv:
724	xclose(s);
725}
726
727static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
728					struct bpf_map *inner_map, int family,
729					int sotype)
730{
731	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
732	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
733	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
734	int sock_map = bpf_map__fd(inner_map);
735	int err;
736
737	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
738	if (err)
739		return;
740	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
741	if (err)
742		goto detach;
743
744	redir_to_connected(family, sotype, sock_map, verdict_map,
745			   REDIR_INGRESS);
746
747	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
748detach:
749	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
750}
751
752static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
753					struct bpf_map *inner_map, int family,
754					int sotype)
755{
756	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
757	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
758	int sock_map = bpf_map__fd(inner_map);
759	int err;
760
761	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
762	if (err)
763		return;
764
765	redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
766
767	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
768}
769
770static void redir_to_listening(int family, int sotype, int sock_mapfd,
771			       int verd_mapfd, enum redir_mode mode)
772{
773	const char *log_prefix = redir_mode_str(mode);
774	struct sockaddr_storage addr;
775	int s, c, p, err, n;
776	unsigned int drop;
777	socklen_t len;
778	u32 key;
779
780	zero_verdict_count(verd_mapfd);
781
782	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
783	if (s < 0)
784		return;
785
786	len = sizeof(addr);
787	err = xgetsockname(s, sockaddr(&addr), &len);
788	if (err)
789		goto close_srv;
790
791	c = xsocket(family, sotype, 0);
792	if (c < 0)
793		goto close_srv;
794	err = xconnect(c, sockaddr(&addr), len);
795	if (err)
796		goto close_cli;
797
798	p = xaccept_nonblock(s, NULL, NULL);
799	if (p < 0)
800		goto close_cli;
801
802	err = add_to_sockmap(sock_mapfd, s, p);
803	if (err)
804		goto close_peer;
805
806	n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
807	if (n < 0 && errno != EACCES)
808		FAIL_ERRNO("%s: write", log_prefix);
809	if (n == 0)
810		FAIL("%s: incomplete write", log_prefix);
811	if (n < 1)
812		goto close_peer;
813
814	key = SK_DROP;
815	err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
816	if (err)
817		goto close_peer;
818	if (drop != 1)
819		FAIL("%s: want drop count 1, have %d", log_prefix, drop);
820
821close_peer:
822	xclose(p);
823close_cli:
824	xclose(c);
825close_srv:
826	xclose(s);
827}
828
829static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
830					struct bpf_map *inner_map, int family,
831					int sotype)
832{
833	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
834	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
835	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
836	int sock_map = bpf_map__fd(inner_map);
837	int err;
838
839	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
840	if (err)
841		return;
842	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
843	if (err)
844		goto detach;
845
846	redir_to_listening(family, sotype, sock_map, verdict_map,
847			   REDIR_INGRESS);
848
849	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
850detach:
851	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
852}
853
854static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
855					struct bpf_map *inner_map, int family,
856					int sotype)
857{
858	int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
859	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
860	int sock_map = bpf_map__fd(inner_map);
861	int err;
862
863	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
864	if (err)
865		return;
866
867	redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
868
869	xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
870}
871
872static void redir_partial(int family, int sotype, int sock_map, int parser_map)
873{
874	int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
875	int err, n, key, value;
876	char buf[] = "abc";
877
878	key = 0;
879	value = sizeof(buf) - 1;
880	err = xbpf_map_update_elem(parser_map, &key, &value, 0);
881	if (err)
882		return;
883
884	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
885	if (s < 0)
886		goto clean_parser_map;
887
888	err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
889	if (err)
890		goto close_srv;
891
892	err = add_to_sockmap(sock_map, p0, p1);
893	if (err)
894		goto close;
895
896	n = xsend(c1, buf, sizeof(buf), 0);
897	if (n < sizeof(buf))
898		FAIL("incomplete write");
899
900	n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
901	if (n != sizeof(buf) - 1)
902		FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
903
904close:
905	xclose(c0);
906	xclose(p0);
907	xclose(c1);
908	xclose(p1);
909close_srv:
910	xclose(s);
911
912clean_parser_map:
913	key = 0;
914	value = 0;
915	xbpf_map_update_elem(parser_map, &key, &value, 0);
916}
917
918static void test_skb_redir_partial(struct test_sockmap_listen *skel,
919				   struct bpf_map *inner_map, int family,
920				   int sotype)
921{
922	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
923	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
924	int parser_map = bpf_map__fd(skel->maps.parser_map);
925	int sock_map = bpf_map__fd(inner_map);
926	int err;
927
928	err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
929	if (err)
930		return;
931
932	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
933	if (err)
934		goto detach;
935
936	redir_partial(family, sotype, sock_map, parser_map);
937
938	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
939detach:
940	xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
941}
942
943static void test_reuseport_select_listening(int family, int sotype,
944					    int sock_map, int verd_map,
945					    int reuseport_prog)
946{
947	struct sockaddr_storage addr;
948	unsigned int pass;
949	int s, c, err;
950	socklen_t len;
951	u64 value;
952	u32 key;
953
954	zero_verdict_count(verd_map);
955
956	s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
957				      reuseport_prog);
958	if (s < 0)
959		return;
960
961	len = sizeof(addr);
962	err = xgetsockname(s, sockaddr(&addr), &len);
963	if (err)
964		goto close_srv;
965
966	key = 0;
967	value = s;
968	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
969	if (err)
970		goto close_srv;
971
972	c = xsocket(family, sotype, 0);
973	if (c < 0)
974		goto close_srv;
975	err = xconnect(c, sockaddr(&addr), len);
976	if (err)
977		goto close_cli;
978
979	if (sotype == SOCK_STREAM) {
980		int p;
981
982		p = xaccept_nonblock(s, NULL, NULL);
983		if (p < 0)
984			goto close_cli;
985		xclose(p);
986	} else {
987		char b = 'a';
988		ssize_t n;
989
990		n = xsend(c, &b, sizeof(b), 0);
991		if (n == -1)
992			goto close_cli;
993
994		n = xrecv_nonblock(s, &b, sizeof(b), 0);
995		if (n == -1)
996			goto close_cli;
997	}
998
999	key = SK_PASS;
1000	err = xbpf_map_lookup_elem(verd_map, &key, &pass);
1001	if (err)
1002		goto close_cli;
1003	if (pass != 1)
1004		FAIL("want pass count 1, have %d", pass);
1005
1006close_cli:
1007	xclose(c);
1008close_srv:
1009	xclose(s);
1010}
1011
1012static void test_reuseport_select_connected(int family, int sotype,
1013					    int sock_map, int verd_map,
1014					    int reuseport_prog)
1015{
1016	struct sockaddr_storage addr;
1017	int s, c0, c1, p0, err;
1018	unsigned int drop;
1019	socklen_t len;
1020	u64 value;
1021	u32 key;
1022
1023	zero_verdict_count(verd_map);
1024
1025	s = socket_loopback_reuseport(family, sotype, reuseport_prog);
1026	if (s < 0)
1027		return;
1028
1029	/* Populate sock_map[0] to avoid ENOENT on first connection */
1030	key = 0;
1031	value = s;
1032	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1033	if (err)
1034		goto close_srv;
1035
1036	len = sizeof(addr);
1037	err = xgetsockname(s, sockaddr(&addr), &len);
1038	if (err)
1039		goto close_srv;
1040
1041	c0 = xsocket(family, sotype, 0);
1042	if (c0 < 0)
1043		goto close_srv;
1044
1045	err = xconnect(c0, sockaddr(&addr), len);
1046	if (err)
1047		goto close_cli0;
1048
1049	if (sotype == SOCK_STREAM) {
1050		p0 = xaccept_nonblock(s, NULL, NULL);
1051		if (p0 < 0)
1052			goto close_cli0;
1053	} else {
1054		p0 = xsocket(family, sotype, 0);
1055		if (p0 < 0)
1056			goto close_cli0;
1057
1058		len = sizeof(addr);
1059		err = xgetsockname(c0, sockaddr(&addr), &len);
1060		if (err)
1061			goto close_cli0;
1062
1063		err = xconnect(p0, sockaddr(&addr), len);
1064		if (err)
1065			goto close_cli0;
1066	}
1067
1068	/* Update sock_map[0] to redirect to a connected socket */
1069	key = 0;
1070	value = p0;
1071	err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
1072	if (err)
1073		goto close_peer0;
1074
1075	c1 = xsocket(family, sotype, 0);
1076	if (c1 < 0)
1077		goto close_peer0;
1078
1079	len = sizeof(addr);
1080	err = xgetsockname(s, sockaddr(&addr), &len);
1081	if (err)
1082		goto close_srv;
1083
1084	errno = 0;
1085	err = connect(c1, sockaddr(&addr), len);
1086	if (sotype == SOCK_DGRAM) {
1087		char b = 'a';
1088		ssize_t n;
1089
1090		n = xsend(c1, &b, sizeof(b), 0);
1091		if (n == -1)
1092			goto close_cli1;
1093
1094		n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1095		err = n == -1;
1096	}
1097	if (!err || errno != ECONNREFUSED)
1098		FAIL_ERRNO("connect: expected ECONNREFUSED");
1099
1100	key = SK_DROP;
1101	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1102	if (err)
1103		goto close_cli1;
1104	if (drop != 1)
1105		FAIL("want drop count 1, have %d", drop);
1106
1107close_cli1:
1108	xclose(c1);
1109close_peer0:
1110	xclose(p0);
1111close_cli0:
1112	xclose(c0);
1113close_srv:
1114	xclose(s);
1115}
1116
1117/* Check that redirecting across reuseport groups is not allowed. */
1118static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
1119					int verd_map, int reuseport_prog)
1120{
1121	struct sockaddr_storage addr;
1122	int s1, s2, c, err;
1123	unsigned int drop;
1124	socklen_t len;
1125	u32 key;
1126
1127	zero_verdict_count(verd_map);
1128
1129	/* Create two listeners, each in its own reuseport group */
1130	s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1131	if (s1 < 0)
1132		return;
1133
1134	s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1135	if (s2 < 0)
1136		goto close_srv1;
1137
1138	err = add_to_sockmap(sock_map, s1, s2);
1139	if (err)
1140		goto close_srv2;
1141
1142	/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
1143	len = sizeof(addr);
1144	err = xgetsockname(s2, sockaddr(&addr), &len);
1145	if (err)
1146		goto close_srv2;
1147
1148	c = xsocket(family, sotype, 0);
1149	if (c < 0)
1150		goto close_srv2;
1151
1152	err = connect(c, sockaddr(&addr), len);
1153	if (sotype == SOCK_DGRAM) {
1154		char b = 'a';
1155		ssize_t n;
1156
1157		n = xsend(c, &b, sizeof(b), 0);
1158		if (n == -1)
1159			goto close_cli;
1160
1161		n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1162		err = n == -1;
1163	}
1164	if (!err || errno != ECONNREFUSED) {
1165		FAIL_ERRNO("connect: expected ECONNREFUSED");
1166		goto close_cli;
1167	}
1168
1169	/* Expect drop, can't redirect outside of reuseport group */
1170	key = SK_DROP;
1171	err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1172	if (err)
1173		goto close_cli;
1174	if (drop != 1)
1175		FAIL("want drop count 1, have %d", drop);
1176
1177close_cli:
1178	xclose(c);
1179close_srv2:
1180	xclose(s2);
1181close_srv1:
1182	xclose(s1);
1183}
1184
1185#define TEST(fn, ...)                                                          \
1186	{                                                                      \
1187		fn, #fn, __VA_ARGS__                                           \
1188	}
1189
1190static void test_ops_cleanup(const struct bpf_map *map)
1191{
1192	int err, mapfd;
1193	u32 key;
1194
1195	mapfd = bpf_map__fd(map);
1196
1197	for (key = 0; key < bpf_map__max_entries(map); key++) {
1198		err = bpf_map_delete_elem(mapfd, &key);
1199		if (err && errno != EINVAL && errno != ENOENT)
1200			FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
1201	}
1202}
1203
1204static const char *family_str(sa_family_t family)
1205{
1206	switch (family) {
1207	case AF_INET:
1208		return "IPv4";
1209	case AF_INET6:
1210		return "IPv6";
1211	case AF_UNIX:
1212		return "Unix";
1213	case AF_VSOCK:
1214		return "VSOCK";
1215	default:
1216		return "unknown";
1217	}
1218}
1219
1220static const char *map_type_str(const struct bpf_map *map)
1221{
1222	int type;
1223
1224	if (!map)
1225		return "invalid";
1226	type = bpf_map__type(map);
1227
1228	switch (type) {
1229	case BPF_MAP_TYPE_SOCKMAP:
1230		return "sockmap";
1231	case BPF_MAP_TYPE_SOCKHASH:
1232		return "sockhash";
1233	default:
1234		return "unknown";
1235	}
1236}
1237
1238static const char *sotype_str(int sotype)
1239{
1240	switch (sotype) {
1241	case SOCK_DGRAM:
1242		return "UDP";
1243	case SOCK_STREAM:
1244		return "TCP";
1245	default:
1246		return "unknown";
1247	}
1248}
1249
1250static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
1251		     int family, int sotype)
1252{
1253	const struct op_test {
1254		void (*fn)(struct test_sockmap_listen *skel,
1255			   int family, int sotype, int mapfd);
1256		const char *name;
1257		int sotype;
1258	} tests[] = {
1259		/* insert */
1260		TEST(test_insert_invalid),
1261		TEST(test_insert_opened),
1262		TEST(test_insert_bound, SOCK_STREAM),
1263		TEST(test_insert),
1264		/* delete */
1265		TEST(test_delete_after_insert),
1266		TEST(test_delete_after_close),
1267		/* lookup */
1268		TEST(test_lookup_after_insert),
1269		TEST(test_lookup_after_delete),
1270		TEST(test_lookup_32_bit_value),
1271		/* update */
1272		TEST(test_update_existing),
1273		/* races with insert/delete */
1274		TEST(test_destroy_orphan_child, SOCK_STREAM),
1275		TEST(test_syn_recv_insert_delete, SOCK_STREAM),
1276		TEST(test_race_insert_listen, SOCK_STREAM),
1277		/* child clone */
1278		TEST(test_clone_after_delete, SOCK_STREAM),
1279		TEST(test_accept_after_delete, SOCK_STREAM),
1280		TEST(test_accept_before_delete, SOCK_STREAM),
1281	};
1282	const char *family_name, *map_name, *sotype_name;
1283	const struct op_test *t;
1284	char s[MAX_TEST_NAME];
1285	int map_fd;
1286
1287	family_name = family_str(family);
1288	map_name = map_type_str(map);
1289	sotype_name = sotype_str(sotype);
1290	map_fd = bpf_map__fd(map);
1291
1292	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1293		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1294			 sotype_name, t->name);
1295
1296		if (t->sotype != 0 && t->sotype != sotype)
1297			continue;
1298
1299		if (!test__start_subtest(s))
1300			continue;
1301
1302		t->fn(skel, family, sotype, map_fd);
1303		test_ops_cleanup(map);
1304	}
1305}
1306
1307static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1308		       int family, int sotype)
1309{
1310	const struct redir_test {
1311		void (*fn)(struct test_sockmap_listen *skel,
1312			   struct bpf_map *map, int family, int sotype);
1313		const char *name;
1314	} tests[] = {
1315		TEST(test_skb_redir_to_connected),
1316		TEST(test_skb_redir_to_listening),
1317		TEST(test_skb_redir_partial),
1318		TEST(test_msg_redir_to_connected),
1319		TEST(test_msg_redir_to_listening),
1320	};
1321	const char *family_name, *map_name;
1322	const struct redir_test *t;
1323	char s[MAX_TEST_NAME];
1324
1325	family_name = family_str(family);
1326	map_name = map_type_str(map);
1327
1328	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1329		snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
1330			 t->name);
1331
1332		if (!test__start_subtest(s))
1333			continue;
1334
1335		t->fn(skel, map, family, sotype);
1336	}
1337}
1338
1339static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
1340				     int sock_mapfd, int nop_mapfd,
1341				     int verd_mapfd, enum redir_mode mode)
1342{
1343	const char *log_prefix = redir_mode_str(mode);
1344	unsigned int pass;
1345	int err, n;
1346	u32 key;
1347	char b;
1348
1349	zero_verdict_count(verd_mapfd);
1350
1351	err = add_to_sockmap(sock_mapfd, peer0, peer1);
1352	if (err)
1353		return;
1354
1355	if (nop_mapfd >= 0) {
1356		err = add_to_sockmap(nop_mapfd, cli0, cli1);
1357		if (err)
1358			return;
1359	}
1360
1361	n = write(cli1, "a", 1);
1362	if (n < 0)
1363		FAIL_ERRNO("%s: write", log_prefix);
1364	if (n == 0)
1365		FAIL("%s: incomplete write", log_prefix);
1366	if (n < 1)
1367		return;
1368
1369	key = SK_PASS;
1370	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1371	if (err)
1372		return;
1373	if (pass != 1)
1374		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1375
1376	n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
1377	if (n < 0)
1378		FAIL_ERRNO("%s: recv_timeout", log_prefix);
1379	if (n == 0)
1380		FAIL("%s: incomplete recv", log_prefix);
1381}
1382
1383static void unix_redir_to_connected(int sotype, int sock_mapfd,
1384			       int verd_mapfd, enum redir_mode mode)
1385{
1386	int c0, c1, p0, p1;
1387	int sfd[2];
1388
1389	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1390		return;
1391	c0 = sfd[0], p0 = sfd[1];
1392
1393	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1394		goto close0;
1395	c1 = sfd[0], p1 = sfd[1];
1396
1397	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
1398
1399	xclose(c1);
1400	xclose(p1);
1401close0:
1402	xclose(c0);
1403	xclose(p0);
1404}
1405
1406static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1407					struct bpf_map *inner_map, int sotype)
1408{
1409	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1410	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1411	int sock_map = bpf_map__fd(inner_map);
1412	int err;
1413
1414	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1415	if (err)
1416		return;
1417
1418	skel->bss->test_ingress = false;
1419	unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
1420	skel->bss->test_ingress = true;
1421	unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
1422
1423	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1424}
1425
1426static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1427			    int sotype)
1428{
1429	const char *family_name, *map_name;
1430	char s[MAX_TEST_NAME];
1431
1432	family_name = family_str(AF_UNIX);
1433	map_name = map_type_str(map);
1434	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1435	if (!test__start_subtest(s))
1436		return;
1437	unix_skb_redir_to_connected(skel, map, sotype);
1438}
1439
1440/* Returns two connected loopback vsock sockets */
1441static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
1442{
1443	struct sockaddr_storage addr;
1444	socklen_t len = sizeof(addr);
1445	int s, p, c;
1446
1447	s = socket_loopback(AF_VSOCK, sotype);
1448	if (s < 0)
1449		return -1;
1450
1451	c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0);
1452	if (c == -1)
1453		goto close_srv;
1454
1455	if (getsockname(s, sockaddr(&addr), &len) < 0)
1456		goto close_cli;
1457
1458	if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) {
1459		FAIL_ERRNO("connect");
1460		goto close_cli;
1461	}
1462
1463	len = sizeof(addr);
1464	p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC);
1465	if (p < 0)
1466		goto close_cli;
1467
1468	if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
1469		FAIL_ERRNO("poll_connect");
1470		goto close_acc;
1471	}
1472
1473	*v0 = p;
1474	*v1 = c;
1475
1476	return 0;
1477
1478close_acc:
1479	close(p);
1480close_cli:
1481	close(c);
1482close_srv:
1483	close(s);
1484
1485	return -1;
1486}
1487
1488static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
1489					 enum redir_mode mode, int sotype)
1490{
1491	const char *log_prefix = redir_mode_str(mode);
1492	char a = 'a', b = 'b';
1493	int u0, u1, v0, v1;
1494	int sfd[2];
1495	unsigned int pass;
1496	int err, n;
1497	u32 key;
1498
1499	zero_verdict_count(verd_mapfd);
1500
1501	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
1502		return;
1503
1504	u0 = sfd[0];
1505	u1 = sfd[1];
1506
1507	err = vsock_socketpair_connectible(sotype, &v0, &v1);
1508	if (err) {
1509		FAIL("vsock_socketpair_connectible() failed");
1510		goto close_uds;
1511	}
1512
1513	err = add_to_sockmap(sock_mapfd, u0, v0);
1514	if (err) {
1515		FAIL("add_to_sockmap failed");
1516		goto close_vsock;
1517	}
1518
1519	n = write(v1, &a, sizeof(a));
1520	if (n < 0)
1521		FAIL_ERRNO("%s: write", log_prefix);
1522	if (n == 0)
1523		FAIL("%s: incomplete write", log_prefix);
1524	if (n < 1)
1525		goto out;
1526
1527	n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
1528	if (n < 0)
1529		FAIL("%s: recv() err, errno=%d", log_prefix, errno);
1530	if (n == 0)
1531		FAIL("%s: incomplete recv", log_prefix);
1532	if (b != a)
1533		FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
1534
1535	key = SK_PASS;
1536	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1537	if (err)
1538		goto out;
1539	if (pass != 1)
1540		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1541out:
1542	key = 0;
1543	bpf_map_delete_elem(sock_mapfd, &key);
1544	key = 1;
1545	bpf_map_delete_elem(sock_mapfd, &key);
1546
1547close_vsock:
1548	close(v0);
1549	close(v1);
1550
1551close_uds:
1552	close(u0);
1553	close(u1);
1554}
1555
1556static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
1557					     struct bpf_map *inner_map,
1558					     int sotype)
1559{
1560	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1561	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1562	int sock_map = bpf_map__fd(inner_map);
1563	int err;
1564
1565	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1566	if (err)
1567		return;
1568
1569	skel->bss->test_ingress = false;
1570	vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
1571	skel->bss->test_ingress = true;
1572	vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
1573
1574	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1575}
1576
1577static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
1578{
1579	const char *family_name, *map_name;
1580	char s[MAX_TEST_NAME];
1581
1582	family_name = family_str(AF_VSOCK);
1583	map_name = map_type_str(map);
1584	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1585	if (!test__start_subtest(s))
1586		return;
1587
1588	vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
1589	vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
1590}
1591
1592static void test_reuseport(struct test_sockmap_listen *skel,
1593			   struct bpf_map *map, int family, int sotype)
1594{
1595	const struct reuseport_test {
1596		void (*fn)(int family, int sotype, int socket_map,
1597			   int verdict_map, int reuseport_prog);
1598		const char *name;
1599		int sotype;
1600	} tests[] = {
1601		TEST(test_reuseport_select_listening),
1602		TEST(test_reuseport_select_connected),
1603		TEST(test_reuseport_mixed_groups),
1604	};
1605	int socket_map, verdict_map, reuseport_prog;
1606	const char *family_name, *map_name, *sotype_name;
1607	const struct reuseport_test *t;
1608	char s[MAX_TEST_NAME];
1609
1610	family_name = family_str(family);
1611	map_name = map_type_str(map);
1612	sotype_name = sotype_str(sotype);
1613
1614	socket_map = bpf_map__fd(map);
1615	verdict_map = bpf_map__fd(skel->maps.verdict_map);
1616	reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
1617
1618	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1619		snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1620			 sotype_name, t->name);
1621
1622		if (t->sotype != 0 && t->sotype != sotype)
1623			continue;
1624
1625		if (!test__start_subtest(s))
1626			continue;
1627
1628		t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
1629	}
1630}
1631
1632static int inet_socketpair(int family, int type, int *s, int *c)
1633{
1634	struct sockaddr_storage addr;
1635	socklen_t len;
1636	int p0, c0;
1637	int err;
1638
1639	p0 = socket_loopback(family, type | SOCK_NONBLOCK);
1640	if (p0 < 0)
1641		return p0;
1642
1643	len = sizeof(addr);
1644	err = xgetsockname(p0, sockaddr(&addr), &len);
1645	if (err)
1646		goto close_peer0;
1647
1648	c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
1649	if (c0 < 0) {
1650		err = c0;
1651		goto close_peer0;
1652	}
1653	err = xconnect(c0, sockaddr(&addr), len);
1654	if (err)
1655		goto close_cli0;
1656	err = xgetsockname(c0, sockaddr(&addr), &len);
1657	if (err)
1658		goto close_cli0;
1659	err = xconnect(p0, sockaddr(&addr), len);
1660	if (err)
1661		goto close_cli0;
1662
1663	*s = p0;
1664	*c = c0;
1665	return 0;
1666
1667close_cli0:
1668	xclose(c0);
1669close_peer0:
1670	xclose(p0);
1671	return err;
1672}
1673
1674static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
1675				   enum redir_mode mode)
1676{
1677	int c0, c1, p0, p1;
1678	int err;
1679
1680	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1681	if (err)
1682		return;
1683	err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1684	if (err)
1685		goto close_cli0;
1686
1687	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
1688
1689	xclose(c1);
1690	xclose(p1);
1691close_cli0:
1692	xclose(c0);
1693	xclose(p0);
1694}
1695
1696static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
1697				       struct bpf_map *inner_map, int family)
1698{
1699	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1700	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1701	int sock_map = bpf_map__fd(inner_map);
1702	int err;
1703
1704	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1705	if (err)
1706		return;
1707
1708	skel->bss->test_ingress = false;
1709	udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
1710	skel->bss->test_ingress = true;
1711	udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
1712
1713	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1714}
1715
1716static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1717			   int family)
1718{
1719	const char *family_name, *map_name;
1720	char s[MAX_TEST_NAME];
1721
1722	family_name = family_str(family);
1723	map_name = map_type_str(map);
1724	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1725	if (!test__start_subtest(s))
1726		return;
1727	udp_skb_redir_to_connected(skel, map, family);
1728}
1729
1730static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
1731					int verd_mapfd, enum redir_mode mode)
1732{
1733	int c0, c1, p0, p1;
1734	int sfd[2];
1735	int err;
1736
1737	if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1738		return;
1739	c0 = sfd[0], p0 = sfd[1];
1740
1741	err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1742	if (err)
1743		goto close;
1744
1745	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
1746
1747	xclose(c1);
1748	xclose(p1);
1749close:
1750	xclose(c0);
1751	xclose(p0);
1752}
1753
1754static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1755					    struct bpf_map *inner_map, int family)
1756{
1757	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1758	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1759	int sock_map = bpf_map__fd(inner_map);
1760	int err;
1761
1762	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1763	if (err)
1764		return;
1765
1766	skel->bss->test_ingress = false;
1767	inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1768				    REDIR_EGRESS);
1769	inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1770				    REDIR_EGRESS);
1771	skel->bss->test_ingress = true;
1772	inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1773				    REDIR_INGRESS);
1774	inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1775				    REDIR_INGRESS);
1776
1777	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1778}
1779
1780static void unix_inet_redir_to_connected(int family, int type,
1781					int sock_mapfd, int nop_mapfd,
1782					int verd_mapfd,
1783					enum redir_mode mode)
1784{
1785	int c0, c1, p0, p1;
1786	int sfd[2];
1787	int err;
1788
1789	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1790	if (err)
1791		return;
1792
1793	if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1794		goto close_cli0;
1795	c1 = sfd[0], p1 = sfd[1];
1796
1797	pairs_redir_to_connected(c0, p0, c1, p1,
1798				 sock_mapfd, nop_mapfd, verd_mapfd, mode);
1799
1800	xclose(c1);
1801	xclose(p1);
1802close_cli0:
1803	xclose(c0);
1804	xclose(p0);
1805
1806}
1807
1808static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
1809					    struct bpf_map *inner_map, int family)
1810{
1811	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1812	int nop_map = bpf_map__fd(skel->maps.nop_map);
1813	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1814	int sock_map = bpf_map__fd(inner_map);
1815	int err;
1816
1817	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1818	if (err)
1819		return;
1820
1821	skel->bss->test_ingress = false;
1822	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1823				     sock_map, -1, verdict_map,
1824				     REDIR_EGRESS);
1825	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1826				     sock_map, -1, verdict_map,
1827				     REDIR_EGRESS);
1828
1829	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1830				     sock_map, nop_map, verdict_map,
1831				     REDIR_EGRESS);
1832	unix_inet_redir_to_connected(family, SOCK_STREAM,
1833				     sock_map, nop_map, verdict_map,
1834				     REDIR_EGRESS);
1835	skel->bss->test_ingress = true;
1836	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1837				     sock_map, -1, verdict_map,
1838				     REDIR_INGRESS);
1839	unix_inet_redir_to_connected(family, SOCK_STREAM,
1840				     sock_map, -1, verdict_map,
1841				     REDIR_INGRESS);
1842
1843	unix_inet_redir_to_connected(family, SOCK_DGRAM,
1844				     sock_map, nop_map, verdict_map,
1845				     REDIR_INGRESS);
1846	unix_inet_redir_to_connected(family, SOCK_STREAM,
1847				     sock_map, nop_map, verdict_map,
1848				     REDIR_INGRESS);
1849
1850	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1851}
1852
1853static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1854				int family)
1855{
1856	const char *family_name, *map_name;
1857	char s[MAX_TEST_NAME];
1858
1859	family_name = family_str(family);
1860	map_name = map_type_str(map);
1861	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1862	if (!test__start_subtest(s))
1863		return;
1864	inet_unix_skb_redir_to_connected(skel, map, family);
1865	unix_inet_skb_redir_to_connected(skel, map, family);
1866}
1867
1868static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
1869		      int family)
1870{
1871	test_ops(skel, map, family, SOCK_STREAM);
1872	test_ops(skel, map, family, SOCK_DGRAM);
1873	test_redir(skel, map, family, SOCK_STREAM);
1874	test_reuseport(skel, map, family, SOCK_STREAM);
1875	test_reuseport(skel, map, family, SOCK_DGRAM);
1876	test_udp_redir(skel, map, family);
1877	test_udp_unix_redir(skel, map, family);
1878}
1879
1880void serial_test_sockmap_listen(void)
1881{
1882	struct test_sockmap_listen *skel;
1883
1884	skel = test_sockmap_listen__open_and_load();
1885	if (!skel) {
1886		FAIL("skeleton open/load failed");
1887		return;
1888	}
1889
1890	skel->bss->test_sockmap = true;
1891	run_tests(skel, skel->maps.sock_map, AF_INET);
1892	run_tests(skel, skel->maps.sock_map, AF_INET6);
1893	test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
1894	test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
1895	test_vsock_redir(skel, skel->maps.sock_map);
1896
1897	skel->bss->test_sockmap = false;
1898	run_tests(skel, skel->maps.sock_hash, AF_INET);
1899	run_tests(skel, skel->maps.sock_hash, AF_INET6);
1900	test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
1901	test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
1902	test_vsock_redir(skel, skel->maps.sock_hash);
1903
1904	test_sockmap_listen__destroy(skel);
1905}
1906