1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3/* 4 * Test suite of lwt_xmit BPF programs that redirect packets 5 * The file tests focus not only if these programs work as expected normally, 6 * but also if they can handle abnormal situations gracefully. 7 * 8 * WARNING 9 * ------- 10 * This test suite may crash the kernel, thus should be run in a VM. 11 * 12 * Setup: 13 * --------- 14 * All tests are performed in a single netns. Two lwt encap routes are setup for 15 * each subtest: 16 * 17 * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<ingress_sec>" dev link_err 18 * ip route add 20.0.0.0/24 encap bpf xmit <obj> sec "<egress_sec>" dev link_err 19 * 20 * Here <obj> is statically defined to test_lwt_redirect.bpf.o, and each section 21 * of this object holds a program entry to test. The BPF object is built from 22 * progs/test_lwt_redirect.c. We didn't use generated BPF skeleton since the 23 * attachment for lwt programs are not supported by libbpf yet. 24 * 25 * For testing, ping commands are run in the test netns: 26 * 27 * ping 10.0.0.<ifindex> -c 1 -w 1 -s 100 28 * ping 20.0.0.<ifindex> -c 1 -w 1 -s 100 29 * 30 * Scenarios: 31 * -------------------------------- 32 * 1. Redirect to a running tap/tun device 33 * 2. Redirect to a down tap/tun device 34 * 3. Redirect to a vlan device with lower layer down 35 * 36 * Case 1, ping packets should be received by packet socket on target device 37 * when redirected to ingress, and by tun/tap fd when redirected to egress. 38 * 39 * Case 2,3 are considered successful as long as they do not crash the kernel 40 * as a regression. 41 * 42 * Case 1,2 use tap device to test redirect to device that requires MAC 43 * header, and tun device to test the case with no MAC header added. 44 */ 45#include <sys/socket.h> 46#include <net/if.h> 47#include <linux/if_ether.h> 48#include <linux/if_packet.h> 49#include <linux/if_tun.h> 50#include <linux/icmp.h> 51#include <arpa/inet.h> 52#include <unistd.h> 53#include <errno.h> 54#include <stdbool.h> 55#include <stdlib.h> 56 57#define NETNS "ns_lwt_redirect" 58#include "lwt_helpers.h" 59#include "test_progs.h" 60#include "network_helpers.h" 61 62#define BPF_OBJECT "test_lwt_redirect.bpf.o" 63#define INGRESS_SEC(need_mac) ((need_mac) ? "redir_ingress" : "redir_ingress_nomac") 64#define EGRESS_SEC(need_mac) ((need_mac) ? "redir_egress" : "redir_egress_nomac") 65#define LOCAL_SRC "10.0.0.1" 66#define CIDR_TO_INGRESS "10.0.0.0/24" 67#define CIDR_TO_EGRESS "20.0.0.0/24" 68 69/* ping to redirect toward given dev, with last byte of dest IP being the target 70 * device index. 71 * 72 * Note: ping command inside BPF-CI is busybox version, so it does not have certain 73 * function, such like -m option to set packet mark. 74 */ 75static void ping_dev(const char *dev, bool is_ingress) 76{ 77 int link_index = if_nametoindex(dev); 78 char ip[256]; 79 80 if (!ASSERT_GE(link_index, 0, "if_nametoindex")) 81 return; 82 83 if (is_ingress) 84 snprintf(ip, sizeof(ip), "10.0.0.%d", link_index); 85 else 86 snprintf(ip, sizeof(ip), "20.0.0.%d", link_index); 87 88 /* We won't get a reply. Don't fail here */ 89 SYS_NOFAIL("ping %s -c1 -W1 -s %d", 90 ip, ICMP_PAYLOAD_SIZE); 91} 92 93static int new_packet_sock(const char *ifname) 94{ 95 int err = 0; 96 int ignore_outgoing = 1; 97 int ifindex = -1; 98 int s = -1; 99 100 s = socket(AF_PACKET, SOCK_RAW, 0); 101 if (!ASSERT_GE(s, 0, "socket(AF_PACKET)")) 102 return -1; 103 104 ifindex = if_nametoindex(ifname); 105 if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) { 106 close(s); 107 return -1; 108 } 109 110 struct sockaddr_ll addr = { 111 .sll_family = AF_PACKET, 112 .sll_protocol = htons(ETH_P_IP), 113 .sll_ifindex = ifindex, 114 }; 115 116 err = bind(s, (struct sockaddr *)&addr, sizeof(addr)); 117 if (!ASSERT_OK(err, "bind(AF_PACKET)")) { 118 close(s); 119 return -1; 120 } 121 122 /* Use packet socket to capture only the ingress, so we can distinguish 123 * the case where a regression that actually redirects the packet to 124 * the egress. 125 */ 126 err = setsockopt(s, SOL_PACKET, PACKET_IGNORE_OUTGOING, 127 &ignore_outgoing, sizeof(ignore_outgoing)); 128 if (!ASSERT_OK(err, "setsockopt(PACKET_IGNORE_OUTGOING)")) { 129 close(s); 130 return -1; 131 } 132 133 err = fcntl(s, F_SETFL, O_NONBLOCK); 134 if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { 135 close(s); 136 return -1; 137 } 138 139 return s; 140} 141 142static int expect_icmp(char *buf, ssize_t len) 143{ 144 struct ethhdr *eth = (struct ethhdr *)buf; 145 146 if (len < (ssize_t)sizeof(*eth)) 147 return -1; 148 149 if (eth->h_proto == htons(ETH_P_IP)) 150 return __expect_icmp_ipv4((char *)(eth + 1), len - sizeof(*eth)); 151 152 return -1; 153} 154 155static int expect_icmp_nomac(char *buf, ssize_t len) 156{ 157 return __expect_icmp_ipv4(buf, len); 158} 159 160static void send_and_capture_test_packets(const char *test_name, int tap_fd, 161 const char *target_dev, bool need_mac) 162{ 163 int psock = -1; 164 struct timeval timeo = { 165 .tv_sec = 0, 166 .tv_usec = 250000, 167 }; 168 int ret = -1; 169 170 filter_t filter = need_mac ? expect_icmp : expect_icmp_nomac; 171 172 ping_dev(target_dev, false); 173 174 ret = wait_for_packet(tap_fd, filter, &timeo); 175 if (!ASSERT_EQ(ret, 1, "wait_for_epacket")) { 176 log_err("%s egress test fails", test_name); 177 goto out; 178 } 179 180 psock = new_packet_sock(target_dev); 181 ping_dev(target_dev, true); 182 183 ret = wait_for_packet(psock, filter, &timeo); 184 if (!ASSERT_EQ(ret, 1, "wait_for_ipacket")) { 185 log_err("%s ingress test fails", test_name); 186 goto out; 187 } 188 189out: 190 if (psock >= 0) 191 close(psock); 192} 193 194static int setup_redirect_target(const char *target_dev, bool need_mac) 195{ 196 int target_index = -1; 197 int tap_fd = -1; 198 199 tap_fd = open_tuntap(target_dev, need_mac); 200 if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) 201 goto fail; 202 203 target_index = if_nametoindex(target_dev); 204 if (!ASSERT_GE(target_index, 0, "if_nametoindex")) 205 goto fail; 206 207 SYS(fail, "sysctl -w net.ipv6.conf.all.disable_ipv6=1"); 208 SYS(fail, "ip link add link_err type dummy"); 209 SYS(fail, "ip link set lo up"); 210 SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); 211 SYS(fail, "ip link set link_err up"); 212 SYS(fail, "ip link set %s up", target_dev); 213 214 SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s", 215 CIDR_TO_INGRESS, BPF_OBJECT, INGRESS_SEC(need_mac)); 216 217 SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s", 218 CIDR_TO_EGRESS, BPF_OBJECT, EGRESS_SEC(need_mac)); 219 220 return tap_fd; 221 222fail: 223 if (tap_fd >= 0) 224 close(tap_fd); 225 return -1; 226} 227 228static void test_lwt_redirect_normal(void) 229{ 230 const char *target_dev = "tap0"; 231 int tap_fd = -1; 232 bool need_mac = true; 233 234 tap_fd = setup_redirect_target(target_dev, need_mac); 235 if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) 236 return; 237 238 send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac); 239 close(tap_fd); 240} 241 242static void test_lwt_redirect_normal_nomac(void) 243{ 244 const char *target_dev = "tun0"; 245 int tap_fd = -1; 246 bool need_mac = false; 247 248 tap_fd = setup_redirect_target(target_dev, need_mac); 249 if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) 250 return; 251 252 send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac); 253 close(tap_fd); 254} 255 256/* This test aims to prevent regression of future. As long as the kernel does 257 * not panic, it is considered as success. 258 */ 259static void __test_lwt_redirect_dev_down(bool need_mac) 260{ 261 const char *target_dev = "tap0"; 262 int tap_fd = -1; 263 264 tap_fd = setup_redirect_target(target_dev, need_mac); 265 if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) 266 return; 267 268 SYS(out, "ip link set %s down", target_dev); 269 ping_dev(target_dev, true); 270 ping_dev(target_dev, false); 271 272out: 273 close(tap_fd); 274} 275 276static void test_lwt_redirect_dev_down(void) 277{ 278 __test_lwt_redirect_dev_down(true); 279} 280 281static void test_lwt_redirect_dev_down_nomac(void) 282{ 283 __test_lwt_redirect_dev_down(false); 284} 285 286/* This test aims to prevent regression of future. As long as the kernel does 287 * not panic, it is considered as success. 288 */ 289static void test_lwt_redirect_dev_carrier_down(void) 290{ 291 const char *lower_dev = "tap0"; 292 const char *vlan_dev = "vlan100"; 293 int tap_fd = -1; 294 295 tap_fd = setup_redirect_target(lower_dev, true); 296 if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) 297 return; 298 299 SYS(out, "ip link add vlan100 link %s type vlan id 100", lower_dev); 300 SYS(out, "ip link set %s up", vlan_dev); 301 SYS(out, "ip link set %s down", lower_dev); 302 ping_dev(vlan_dev, true); 303 ping_dev(vlan_dev, false); 304 305out: 306 close(tap_fd); 307} 308 309static void *test_lwt_redirect_run(void *arg) 310{ 311 netns_delete(); 312 RUN_TEST(lwt_redirect_normal); 313 RUN_TEST(lwt_redirect_normal_nomac); 314 RUN_TEST(lwt_redirect_dev_down); 315 RUN_TEST(lwt_redirect_dev_down_nomac); 316 RUN_TEST(lwt_redirect_dev_carrier_down); 317 return NULL; 318} 319 320void test_lwt_redirect(void) 321{ 322 pthread_t test_thread; 323 int err; 324 325 /* Run the tests in their own thread to isolate the namespace changes 326 * so they do not affect the environment of other tests. 327 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) 328 */ 329 err = pthread_create(&test_thread, NULL, &test_lwt_redirect_run, NULL); 330 if (ASSERT_OK(err, "pthread_create")) 331 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); 332} 333