1// SPDX-License-Identifier: GPL-2.0
2#include <test_progs.h>
3#include <network_helpers.h>
4#include <net/if.h>
5#include <linux/if_ether.h>
6#include <linux/if_packet.h>
7#include <linux/if_link.h>
8#include <linux/ipv6.h>
9#include <linux/in6.h>
10#include <linux/udp.h>
11#include <bpf/bpf_endian.h>
12#include <uapi/linux/netdev.h>
13#include "test_xdp_do_redirect.skel.h"
14
15struct udp_packet {
16	struct ethhdr eth;
17	struct ipv6hdr iph;
18	struct udphdr udp;
19	__u8 payload[64 - sizeof(struct udphdr)
20		     - sizeof(struct ethhdr) - sizeof(struct ipv6hdr)];
21} __packed;
22
23static struct udp_packet pkt_udp = {
24	.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
25	.eth.h_dest = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
26	.eth.h_source = {0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb},
27	.iph.version = 6,
28	.iph.nexthdr = IPPROTO_UDP,
29	.iph.payload_len = bpf_htons(sizeof(struct udp_packet)
30				     - offsetof(struct udp_packet, udp)),
31	.iph.hop_limit = 2,
32	.iph.saddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(1)},
33	.iph.daddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(2)},
34	.udp.source = bpf_htons(1),
35	.udp.dest = bpf_htons(1),
36	.udp.len = bpf_htons(sizeof(struct udp_packet)
37			     - offsetof(struct udp_packet, udp)),
38	.payload = {0x42}, /* receiver XDP program matches on this */
39};
40
41static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
42{
43	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
44	int ret;
45
46	ret = bpf_tc_hook_create(hook);
47	if (!ASSERT_OK(ret, "create tc hook"))
48		return ret;
49
50	ret = bpf_tc_attach(hook, &opts);
51	if (!ASSERT_OK(ret, "bpf_tc_attach")) {
52		bpf_tc_hook_destroy(hook);
53		return ret;
54	}
55
56	return 0;
57}
58
59/* The maximum permissible size is: PAGE_SIZE - sizeof(struct xdp_page_head) -
60 * SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - XDP_PACKET_HEADROOM =
61 * 3408 bytes for 64-byte cacheline and 3216 for 256-byte one.
62 */
63#if defined(__s390x__)
64#define MAX_PKT_SIZE 3216
65#else
66#define MAX_PKT_SIZE 3408
67#endif
68static void test_max_pkt_size(int fd)
69{
70	char data[MAX_PKT_SIZE + 1] = {};
71	int err;
72	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
73			    .data_in = &data,
74			    .data_size_in = MAX_PKT_SIZE,
75			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
76			    .repeat = 1,
77		);
78	err = bpf_prog_test_run_opts(fd, &opts);
79	ASSERT_OK(err, "prog_run_max_size");
80
81	opts.data_size_in += 1;
82	err = bpf_prog_test_run_opts(fd, &opts);
83	ASSERT_EQ(err, -EINVAL, "prog_run_too_big");
84}
85
86#define NUM_PKTS 10000
87void test_xdp_do_redirect(void)
88{
89	int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
90	char data[sizeof(pkt_udp) + sizeof(__u64)];
91	struct test_xdp_do_redirect *skel = NULL;
92	struct nstoken *nstoken = NULL;
93	struct bpf_link *link;
94	LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
95	struct xdp_md ctx_in = { .data = sizeof(__u64),
96				 .data_end = sizeof(data) };
97	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
98			    .data_in = &data,
99			    .data_size_in = sizeof(data),
100			    .ctx_in = &ctx_in,
101			    .ctx_size_in = sizeof(ctx_in),
102			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
103			    .repeat = NUM_PKTS,
104			    .batch_size = 64,
105		);
106	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
107			    .attach_point = BPF_TC_INGRESS);
108
109	memcpy(&data[sizeof(__u64)], &pkt_udp, sizeof(pkt_udp));
110	*((__u32 *)data) = 0x42; /* metadata test value */
111	*((__u32 *)data + 4) = 0;
112
113	skel = test_xdp_do_redirect__open();
114	if (!ASSERT_OK_PTR(skel, "skel"))
115		return;
116
117	/* The XDP program we run with bpf_prog_run() will cycle through all
118	 * three xmit (PASS/TX/REDIRECT) return codes starting from above, and
119	 * ending up with PASS, so we should end up with two packets on the dst
120	 * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP
121	 * payload.
122	 */
123	SYS(out, "ip netns add testns");
124	nstoken = open_netns("testns");
125	if (!ASSERT_OK_PTR(nstoken, "setns"))
126		goto out;
127
128	SYS(out, "ip link add veth_src type veth peer name veth_dst");
129	SYS(out, "ip link set dev veth_src address 00:11:22:33:44:55");
130	SYS(out, "ip link set dev veth_dst address 66:77:88:99:aa:bb");
131	SYS(out, "ip link set dev veth_src up");
132	SYS(out, "ip link set dev veth_dst up");
133	SYS(out, "ip addr add dev veth_src fc00::1/64");
134	SYS(out, "ip addr add dev veth_dst fc00::2/64");
135	SYS(out, "ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
136
137	/* We enable forwarding in the test namespace because that will cause
138	 * the packets that go through the kernel stack (with XDP_PASS) to be
139	 * forwarded back out the same interface (because of the packet dst
140	 * combined with the interface addresses). When this happens, the
141	 * regular forwarding path will end up going through the same
142	 * veth_xdp_xmit() call as the XDP_REDIRECT code, which can cause a
143	 * deadlock if it happens on the same CPU. There's a local_bh_disable()
144	 * in the test_run code to prevent this, but an earlier version of the
145	 * code didn't have this, so we keep the test behaviour to make sure the
146	 * bug doesn't resurface.
147	 */
148	SYS(out, "sysctl -qw net.ipv6.conf.all.forwarding=1");
149
150	ifindex_src = if_nametoindex("veth_src");
151	ifindex_dst = if_nametoindex("veth_dst");
152	if (!ASSERT_NEQ(ifindex_src, 0, "ifindex_src") ||
153	    !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst"))
154		goto out;
155
156	/* Check xdp features supported by veth driver */
157	err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts);
158	if (!ASSERT_OK(err, "veth_src bpf_xdp_query"))
159		goto out;
160
161	if (!ASSERT_EQ(query_opts.feature_flags,
162		       NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
163		       NETDEV_XDP_ACT_RX_SG,
164		       "veth_src query_opts.feature_flags"))
165		goto out;
166
167	err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts);
168	if (!ASSERT_OK(err, "veth_dst bpf_xdp_query"))
169		goto out;
170
171	if (!ASSERT_EQ(query_opts.feature_flags,
172		       NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
173		       NETDEV_XDP_ACT_RX_SG,
174		       "veth_dst query_opts.feature_flags"))
175		goto out;
176
177	/* Enable GRO */
178	SYS(out, "ethtool -K veth_src gro on");
179	SYS(out, "ethtool -K veth_dst gro on");
180
181	err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts);
182	if (!ASSERT_OK(err, "veth_src bpf_xdp_query gro on"))
183		goto out;
184
185	if (!ASSERT_EQ(query_opts.feature_flags,
186		       NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
187		       NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
188		       NETDEV_XDP_ACT_NDO_XMIT_SG,
189		       "veth_src query_opts.feature_flags gro on"))
190		goto out;
191
192	err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts);
193	if (!ASSERT_OK(err, "veth_dst bpf_xdp_query gro on"))
194		goto out;
195
196	if (!ASSERT_EQ(query_opts.feature_flags,
197		       NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
198		       NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
199		       NETDEV_XDP_ACT_NDO_XMIT_SG,
200		       "veth_dst query_opts.feature_flags gro on"))
201		goto out;
202
203	memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
204	skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */
205	skel->rodata->ifindex_in = ifindex_src;
206	ctx_in.ingress_ifindex = ifindex_src;
207	tc_hook.ifindex = ifindex_src;
208
209	if (!ASSERT_OK(test_xdp_do_redirect__load(skel), "load"))
210		goto out;
211
212	link = bpf_program__attach_xdp(skel->progs.xdp_count_pkts, ifindex_dst);
213	if (!ASSERT_OK_PTR(link, "prog_attach"))
214		goto out;
215	skel->links.xdp_count_pkts = link;
216
217	tc_prog_fd = bpf_program__fd(skel->progs.tc_count_pkts);
218	if (attach_tc_prog(&tc_hook, tc_prog_fd))
219		goto out;
220
221	xdp_prog_fd = bpf_program__fd(skel->progs.xdp_redirect);
222	err = bpf_prog_test_run_opts(xdp_prog_fd, &opts);
223	if (!ASSERT_OK(err, "prog_run"))
224		goto out_tc;
225
226	/* wait for the packets to be flushed */
227	kern_sync_rcu();
228
229	/* There will be one packet sent through XDP_REDIRECT and one through
230	 * XDP_TX; these will show up on the XDP counting program, while the
231	 * rest will be counted at the TC ingress hook (and the counting program
232	 * resets the packet payload so they don't get counted twice even though
233	 * they are re-xmited out the veth device
234	 */
235	ASSERT_EQ(skel->bss->pkts_seen_xdp, 2, "pkt_count_xdp");
236	ASSERT_EQ(skel->bss->pkts_seen_zero, 2, "pkt_count_zero");
237	ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc");
238
239	test_max_pkt_size(bpf_program__fd(skel->progs.xdp_count_pkts));
240
241out_tc:
242	bpf_tc_hook_destroy(&tc_hook);
243out:
244	if (nstoken)
245		close_netns(nstoken);
246	SYS_NOFAIL("ip netns del testns");
247	test_xdp_do_redirect__destroy(skel);
248}
249