1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3/*
4 * Test suite of lwt BPF programs that reroutes packets
5 *   The file tests focus not only if these programs work as expected normally,
6 *   but also if they can handle abnormal situations gracefully. This test
7 *   suite currently only covers lwt_xmit hook. lwt_in tests have not been
8 *   implemented.
9 *
10 * WARNING
11 * -------
12 *  This test suite can crash the kernel, thus should be run in a VM.
13 *
14 * Setup:
15 * ---------
16 *  all tests are performed in a single netns. A lwt encap route is setup for
17 *  each subtest:
18 *
19 *    ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err
20 *
21 *  Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains
22 *  a single test program entry. This program sets packet mark by last byte of
23 *  the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb
24 *  mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped
25 *  to avoid route loop. We didn't use generated BPF skeleton since the
26 *  attachment for lwt programs are not supported by libbpf yet.
27 *
28 *  The test program will bring up a tun device, and sets up the following
29 *  routes:
30 *
31 *    ip rule add pref 100 from all fwmark <tun_index> lookup 100
32 *    ip route add table 100 default dev tun0
33 *
34 *  For normal testing, a ping command is running in the test netns:
35 *
36 *    ping 10.0.0.<tun_index> -c 1 -w 1 -s 100
37 *
38 *  For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP
39 *  socket will try to overflow the fq queue and trigger qdisc drop error.
40 *
41 * Scenarios:
42 * --------------------------------
43 *  1. Reroute to a running tun device
44 *  2. Reroute to a device where qdisc drop
45 *
46 *  For case 1, ping packets should be received by the tun device.
47 *
48 *  For case 2, force UDP packets to overflow fq limit. As long as kernel
49 *  is not crashed, it is considered successful.
50 */
51#define NETNS "ns_lwt_reroute"
52#include "lwt_helpers.h"
53#include "network_helpers.h"
54#include <linux/net_tstamp.h>
55
56#define BPF_OBJECT            "test_lwt_reroute.bpf.o"
57#define LOCAL_SRC             "10.0.0.1"
58#define TEST_CIDR             "10.0.0.0/24"
59#define XMIT_HOOK             "xmit"
60#define XMIT_SECTION          "lwt_xmit"
61#define NSEC_PER_SEC          1000000000ULL
62
63/* send a ping to be rerouted to the target device */
64static void ping_once(const char *ip)
65{
66	/* We won't get a reply. Don't fail here */
67	SYS_NOFAIL("ping %s -c1 -W1 -s %d",
68		   ip, ICMP_PAYLOAD_SIZE);
69}
70
71/* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop
72 * error. This is done via TX tstamp to force buffering delayed packets.
73 */
74static int overflow_fq(int snd_target, const char *target_ip)
75{
76	struct sockaddr_in addr = {
77		.sin_family = AF_INET,
78		.sin_port = htons(1234),
79	};
80
81	char data_buf[8]; /* only #pkts matter, so use a random small buffer */
82	char control_buf[CMSG_SPACE(sizeof(uint64_t))];
83	struct iovec iov = {
84		.iov_base = data_buf,
85		.iov_len = sizeof(data_buf),
86	};
87	int err = -1;
88	int s = -1;
89	struct sock_txtime txtime_on = {
90		.clockid = CLOCK_MONOTONIC,
91		.flags = 0,
92	};
93	struct msghdr msg = {
94		.msg_name = &addr,
95		.msg_namelen = sizeof(addr),
96		.msg_control = control_buf,
97		.msg_controllen = sizeof(control_buf),
98		.msg_iovlen = 1,
99		.msg_iov = &iov,
100	};
101	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
102
103	memset(data_buf, 0, sizeof(data_buf));
104
105	s = socket(AF_INET, SOCK_DGRAM, 0);
106	if (!ASSERT_GE(s, 0, "socket"))
107		goto out;
108
109	err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on));
110	if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)"))
111		goto out;
112
113	err = inet_pton(AF_INET, target_ip, &addr.sin_addr);
114	if (!ASSERT_EQ(err, 1, "inet_pton"))
115		goto out;
116
117	while (snd_target > 0) {
118		struct timespec now;
119
120		memset(control_buf, 0, sizeof(control_buf));
121		cmsg->cmsg_type = SCM_TXTIME;
122		cmsg->cmsg_level = SOL_SOCKET;
123		cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t));
124
125		err = clock_gettime(CLOCK_MONOTONIC, &now);
126		if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) {
127			err = -1;
128			goto out;
129		}
130
131		*(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC +
132					       now.tv_nsec;
133
134		/* we will intentionally send more than fq limit, so ignore
135		 * the error here.
136		 */
137		sendmsg(s, &msg, MSG_NOSIGNAL);
138		snd_target--;
139	}
140
141	/* no kernel crash so far is considered success */
142	err = 0;
143
144out:
145	if (s >= 0)
146		close(s);
147
148	return err;
149}
150
151static int setup(const char *tun_dev)
152{
153	int target_index = -1;
154	int tap_fd = -1;
155
156	tap_fd = open_tuntap(tun_dev, false);
157	if (!ASSERT_GE(tap_fd, 0, "open_tun"))
158		return -1;
159
160	target_index = if_nametoindex(tun_dev);
161	if (!ASSERT_GE(target_index, 0, "if_nametoindex"))
162		return -1;
163
164	SYS(fail, "ip link add link_err type dummy");
165	SYS(fail, "ip link set lo up");
166	SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32");
167	SYS(fail, "ip link set link_err up");
168	SYS(fail, "ip link set %s up", tun_dev);
169
170	SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit",
171	    TEST_CIDR, BPF_OBJECT);
172
173	SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100",
174	    target_index);
175	SYS(fail, "ip route add t 100 default dev %s", tun_dev);
176
177	return tap_fd;
178
179fail:
180	if (tap_fd >= 0)
181		close(tap_fd);
182	return -1;
183}
184
185static void test_lwt_reroute_normal_xmit(void)
186{
187	const char *tun_dev = "tun0";
188	int tun_fd = -1;
189	int ifindex = -1;
190	char ip[256];
191	struct timeval timeo = {
192		.tv_sec = 0,
193		.tv_usec = 250000,
194	};
195
196	tun_fd = setup(tun_dev);
197	if (!ASSERT_GE(tun_fd, 0, "setup_reroute"))
198		return;
199
200	ifindex = if_nametoindex(tun_dev);
201	if (!ASSERT_GE(ifindex, 0, "if_nametoindex"))
202		return;
203
204	snprintf(ip, 256, "10.0.0.%d", ifindex);
205
206	/* ping packets should be received by the tun device */
207	ping_once(ip);
208
209	if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1,
210		       "wait_for_packet"))
211		log_err("%s xmit", __func__);
212}
213
214/*
215 * Test the failure case when the skb is dropped at the qdisc. This is a
216 * regression prevention at the xmit hook only.
217 */
218static void test_lwt_reroute_qdisc_dropped(void)
219{
220	const char *tun_dev = "tun0";
221	int tun_fd = -1;
222	int ifindex = -1;
223	char ip[256];
224
225	tun_fd = setup(tun_dev);
226	if (!ASSERT_GE(tun_fd, 0, "setup_reroute"))
227		goto fail;
228
229	SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev);
230
231	ifindex = if_nametoindex(tun_dev);
232	if (!ASSERT_GE(ifindex, 0, "if_nametoindex"))
233		return;
234
235	snprintf(ip, 256, "10.0.0.%d", ifindex);
236	ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq");
237
238fail:
239	if (tun_fd >= 0)
240		close(tun_fd);
241}
242
243static void *test_lwt_reroute_run(void *arg)
244{
245	netns_delete();
246	RUN_TEST(lwt_reroute_normal_xmit);
247	RUN_TEST(lwt_reroute_qdisc_dropped);
248	return NULL;
249}
250
251void test_lwt_reroute(void)
252{
253	pthread_t test_thread;
254	int err;
255
256	/* Run the tests in their own thread to isolate the namespace changes
257	 * so they do not affect the environment of other tests.
258	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
259	 */
260	err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL);
261	if (ASSERT_OK(err, "pthread_create"))
262		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
263}
264