1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3/*
4 * Test suite of lwt_xmit BPF programs that redirect packets
5 *   The file tests focus not only if these programs work as expected normally,
6 *   but also if they can handle abnormal situations gracefully.
7 *
8 * WARNING
9 * -------
10 *  This test suite may crash the kernel, thus should be run in a VM.
11 *
12 * Setup:
13 * ---------
14 *  All tests are performed in a single netns. Two lwt encap routes are setup for
15 *  each subtest:
16 *
17 *    ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<ingress_sec>" dev link_err
18 *    ip route add 20.0.0.0/24 encap bpf xmit <obj> sec "<egress_sec>" dev link_err
19 *
20 *  Here <obj> is statically defined to test_lwt_redirect.bpf.o, and each section
21 *  of this object holds a program entry to test. The BPF object is built from
22 *  progs/test_lwt_redirect.c. We didn't use generated BPF skeleton since the
23 *  attachment for lwt programs are not supported by libbpf yet.
24 *
25 *  For testing, ping commands are run in the test netns:
26 *
27 *    ping 10.0.0.<ifindex> -c 1 -w 1 -s 100
28 *    ping 20.0.0.<ifindex> -c 1 -w 1 -s 100
29 *
30 * Scenarios:
31 * --------------------------------
32 *  1. Redirect to a running tap/tun device
33 *  2. Redirect to a down tap/tun device
34 *  3. Redirect to a vlan device with lower layer down
35 *
36 *  Case 1, ping packets should be received by packet socket on target device
37 *  when redirected to ingress, and by tun/tap fd when redirected to egress.
38 *
39 *  Case 2,3 are considered successful as long as they do not crash the kernel
40 *  as a regression.
41 *
42 *  Case 1,2 use tap device to test redirect to device that requires MAC
43 *  header, and tun device to test the case with no MAC header added.
44 */
45#include <sys/socket.h>
46#include <net/if.h>
47#include <linux/if_ether.h>
48#include <linux/if_packet.h>
49#include <linux/if_tun.h>
50#include <linux/icmp.h>
51#include <arpa/inet.h>
52#include <unistd.h>
53#include <errno.h>
54#include <stdbool.h>
55#include <stdlib.h>
56
57#define NETNS "ns_lwt_redirect"
58#include "lwt_helpers.h"
59#include "test_progs.h"
60#include "network_helpers.h"
61
62#define BPF_OBJECT            "test_lwt_redirect.bpf.o"
63#define INGRESS_SEC(need_mac) ((need_mac) ? "redir_ingress" : "redir_ingress_nomac")
64#define EGRESS_SEC(need_mac)  ((need_mac) ? "redir_egress" : "redir_egress_nomac")
65#define LOCAL_SRC             "10.0.0.1"
66#define CIDR_TO_INGRESS       "10.0.0.0/24"
67#define CIDR_TO_EGRESS        "20.0.0.0/24"
68
69/* ping to redirect toward given dev, with last byte of dest IP being the target
70 * device index.
71 *
72 * Note: ping command inside BPF-CI is busybox version, so it does not have certain
73 * function, such like -m option to set packet mark.
74 */
75static void ping_dev(const char *dev, bool is_ingress)
76{
77	int link_index = if_nametoindex(dev);
78	char ip[256];
79
80	if (!ASSERT_GE(link_index, 0, "if_nametoindex"))
81		return;
82
83	if (is_ingress)
84		snprintf(ip, sizeof(ip), "10.0.0.%d", link_index);
85	else
86		snprintf(ip, sizeof(ip), "20.0.0.%d", link_index);
87
88	/* We won't get a reply. Don't fail here */
89	SYS_NOFAIL("ping %s -c1 -W1 -s %d",
90		   ip, ICMP_PAYLOAD_SIZE);
91}
92
93static int new_packet_sock(const char *ifname)
94{
95	int err = 0;
96	int ignore_outgoing = 1;
97	int ifindex = -1;
98	int s = -1;
99
100	s = socket(AF_PACKET, SOCK_RAW, 0);
101	if (!ASSERT_GE(s, 0, "socket(AF_PACKET)"))
102		return -1;
103
104	ifindex = if_nametoindex(ifname);
105	if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) {
106		close(s);
107		return -1;
108	}
109
110	struct sockaddr_ll addr = {
111		.sll_family = AF_PACKET,
112		.sll_protocol = htons(ETH_P_IP),
113		.sll_ifindex = ifindex,
114	};
115
116	err = bind(s, (struct sockaddr *)&addr, sizeof(addr));
117	if (!ASSERT_OK(err, "bind(AF_PACKET)")) {
118		close(s);
119		return -1;
120	}
121
122	/* Use packet socket to capture only the ingress, so we can distinguish
123	 * the case where a regression that actually redirects the packet to
124	 * the egress.
125	 */
126	err = setsockopt(s, SOL_PACKET, PACKET_IGNORE_OUTGOING,
127			 &ignore_outgoing, sizeof(ignore_outgoing));
128	if (!ASSERT_OK(err, "setsockopt(PACKET_IGNORE_OUTGOING)")) {
129		close(s);
130		return -1;
131	}
132
133	err = fcntl(s, F_SETFL, O_NONBLOCK);
134	if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
135		close(s);
136		return -1;
137	}
138
139	return s;
140}
141
142static int expect_icmp(char *buf, ssize_t len)
143{
144	struct ethhdr *eth = (struct ethhdr *)buf;
145
146	if (len < (ssize_t)sizeof(*eth))
147		return -1;
148
149	if (eth->h_proto == htons(ETH_P_IP))
150		return __expect_icmp_ipv4((char *)(eth + 1), len - sizeof(*eth));
151
152	return -1;
153}
154
155static int expect_icmp_nomac(char *buf, ssize_t len)
156{
157	return __expect_icmp_ipv4(buf, len);
158}
159
160static void send_and_capture_test_packets(const char *test_name, int tap_fd,
161					  const char *target_dev, bool need_mac)
162{
163	int psock = -1;
164	struct timeval timeo = {
165		.tv_sec = 0,
166		.tv_usec = 250000,
167	};
168	int ret = -1;
169
170	filter_t filter = need_mac ? expect_icmp : expect_icmp_nomac;
171
172	ping_dev(target_dev, false);
173
174	ret = wait_for_packet(tap_fd, filter, &timeo);
175	if (!ASSERT_EQ(ret, 1, "wait_for_epacket")) {
176		log_err("%s egress test fails", test_name);
177		goto out;
178	}
179
180	psock = new_packet_sock(target_dev);
181	ping_dev(target_dev, true);
182
183	ret = wait_for_packet(psock, filter, &timeo);
184	if (!ASSERT_EQ(ret, 1, "wait_for_ipacket")) {
185		log_err("%s ingress test fails", test_name);
186		goto out;
187	}
188
189out:
190	if (psock >= 0)
191		close(psock);
192}
193
194static int setup_redirect_target(const char *target_dev, bool need_mac)
195{
196	int target_index = -1;
197	int tap_fd = -1;
198
199	tap_fd = open_tuntap(target_dev, need_mac);
200	if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
201		goto fail;
202
203	target_index = if_nametoindex(target_dev);
204	if (!ASSERT_GE(target_index, 0, "if_nametoindex"))
205		goto fail;
206
207	SYS(fail, "sysctl -w net.ipv6.conf.all.disable_ipv6=1");
208	SYS(fail, "ip link add link_err type dummy");
209	SYS(fail, "ip link set lo up");
210	SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32");
211	SYS(fail, "ip link set link_err up");
212	SYS(fail, "ip link set %s up", target_dev);
213
214	SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s",
215	    CIDR_TO_INGRESS, BPF_OBJECT, INGRESS_SEC(need_mac));
216
217	SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s",
218	    CIDR_TO_EGRESS, BPF_OBJECT, EGRESS_SEC(need_mac));
219
220	return tap_fd;
221
222fail:
223	if (tap_fd >= 0)
224		close(tap_fd);
225	return -1;
226}
227
228static void test_lwt_redirect_normal(void)
229{
230	const char *target_dev = "tap0";
231	int tap_fd = -1;
232	bool need_mac = true;
233
234	tap_fd = setup_redirect_target(target_dev, need_mac);
235	if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
236		return;
237
238	send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac);
239	close(tap_fd);
240}
241
242static void test_lwt_redirect_normal_nomac(void)
243{
244	const char *target_dev = "tun0";
245	int tap_fd = -1;
246	bool need_mac = false;
247
248	tap_fd = setup_redirect_target(target_dev, need_mac);
249	if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
250		return;
251
252	send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac);
253	close(tap_fd);
254}
255
256/* This test aims to prevent regression of future. As long as the kernel does
257 * not panic, it is considered as success.
258 */
259static void __test_lwt_redirect_dev_down(bool need_mac)
260{
261	const char *target_dev = "tap0";
262	int tap_fd = -1;
263
264	tap_fd = setup_redirect_target(target_dev, need_mac);
265	if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
266		return;
267
268	SYS(out, "ip link set %s down", target_dev);
269	ping_dev(target_dev, true);
270	ping_dev(target_dev, false);
271
272out:
273	close(tap_fd);
274}
275
276static void test_lwt_redirect_dev_down(void)
277{
278	__test_lwt_redirect_dev_down(true);
279}
280
281static void test_lwt_redirect_dev_down_nomac(void)
282{
283	__test_lwt_redirect_dev_down(false);
284}
285
286/* This test aims to prevent regression of future. As long as the kernel does
287 * not panic, it is considered as success.
288 */
289static void test_lwt_redirect_dev_carrier_down(void)
290{
291	const char *lower_dev = "tap0";
292	const char *vlan_dev = "vlan100";
293	int tap_fd = -1;
294
295	tap_fd = setup_redirect_target(lower_dev, true);
296	if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
297		return;
298
299	SYS(out, "ip link add vlan100 link %s type vlan id 100", lower_dev);
300	SYS(out, "ip link set %s up", vlan_dev);
301	SYS(out, "ip link set %s down", lower_dev);
302	ping_dev(vlan_dev, true);
303	ping_dev(vlan_dev, false);
304
305out:
306	close(tap_fd);
307}
308
309static void *test_lwt_redirect_run(void *arg)
310{
311	netns_delete();
312	RUN_TEST(lwt_redirect_normal);
313	RUN_TEST(lwt_redirect_normal_nomac);
314	RUN_TEST(lwt_redirect_dev_down);
315	RUN_TEST(lwt_redirect_dev_down_nomac);
316	RUN_TEST(lwt_redirect_dev_carrier_down);
317	return NULL;
318}
319
320void test_lwt_redirect(void)
321{
322	pthread_t test_thread;
323	int err;
324
325	/* Run the tests in their own thread to isolate the namespace changes
326	 * so they do not affect the environment of other tests.
327	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
328	 */
329	err = pthread_create(&test_thread, NULL, &test_lwt_redirect_run, NULL);
330	if (ASSERT_OK(err, "pthread_create"))
331		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
332}
333