1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3/* 4 * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link 5 * between src and dst. The netns fwd has veth links to each src and dst. The 6 * client is in src and server in dst. The test installs a TC BPF program to each 7 * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the 8 * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace 9 * switch from ingress side; it also installs a checker prog on the egress side 10 * to drop unexpected traffic. 11 */ 12 13#include <arpa/inet.h> 14#include <linux/if_tun.h> 15#include <linux/limits.h> 16#include <linux/sysctl.h> 17#include <linux/time_types.h> 18#include <linux/net_tstamp.h> 19#include <net/if.h> 20#include <stdbool.h> 21#include <stdio.h> 22#include <sys/stat.h> 23#include <unistd.h> 24 25#include "test_progs.h" 26#include "network_helpers.h" 27#include "netlink_helpers.h" 28#include "test_tc_neigh_fib.skel.h" 29#include "test_tc_neigh.skel.h" 30#include "test_tc_peer.skel.h" 31#include "test_tc_dtime.skel.h" 32 33#ifndef TCP_TX_DELAY 34#define TCP_TX_DELAY 37 35#endif 36 37#define NS_SRC "ns_src" 38#define NS_FWD "ns_fwd" 39#define NS_DST "ns_dst" 40 41#define IP4_SRC "172.16.1.100" 42#define IP4_DST "172.16.2.100" 43#define IP4_TUN_SRC "172.17.1.100" 44#define IP4_TUN_FWD "172.17.1.200" 45#define IP4_PORT 9004 46 47#define IP6_SRC "0::1:dead:beef:cafe" 48#define IP6_DST "0::2:dead:beef:cafe" 49#define IP6_TUN_SRC "1::1:dead:beef:cafe" 50#define IP6_TUN_FWD "1::2:dead:beef:cafe" 51#define IP6_PORT 9006 52 53#define IP4_SLL "169.254.0.1" 54#define IP4_DLL "169.254.0.2" 55#define IP4_NET "169.254.0.0" 56 57#define MAC_DST_FWD "00:11:22:33:44:55" 58#define MAC_DST "00:22:33:44:55:66" 59 60#define IFADDR_STR_LEN 18 61#define PING_ARGS "-i 0.2 -c 3 -w 10 -q" 62 63#define TIMEOUT_MILLIS 10000 64#define NSEC_PER_SEC 1000000000ULL 65 66#define log_err(MSG, ...) \ 67 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ 68 __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) 69 70static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL}; 71 72static int write_file(const char *path, const char *newval) 73{ 74 FILE *f; 75 76 f = fopen(path, "r+"); 77 if (!f) 78 return -1; 79 if (fwrite(newval, strlen(newval), 1, f) != 1) { 80 log_err("writing to %s failed", path); 81 fclose(f); 82 return -1; 83 } 84 fclose(f); 85 return 0; 86} 87 88static int netns_setup_namespaces(const char *verb) 89{ 90 const char * const *ns = namespaces; 91 char cmd[128]; 92 93 while (*ns) { 94 snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns); 95 if (!ASSERT_OK(system(cmd), cmd)) 96 return -1; 97 ns++; 98 } 99 return 0; 100} 101 102static void netns_setup_namespaces_nofail(const char *verb) 103{ 104 const char * const *ns = namespaces; 105 char cmd[128]; 106 107 while (*ns) { 108 snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns); 109 system(cmd); 110 ns++; 111 } 112} 113 114enum dev_mode { 115 MODE_VETH, 116 MODE_NETKIT, 117}; 118 119struct netns_setup_result { 120 enum dev_mode dev_mode; 121 int ifindex_src; 122 int ifindex_src_fwd; 123 int ifindex_dst; 124 int ifindex_dst_fwd; 125}; 126 127static int get_ifaddr(const char *name, char *ifaddr) 128{ 129 char path[PATH_MAX]; 130 FILE *f; 131 int ret; 132 133 snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name); 134 f = fopen(path, "r"); 135 if (!ASSERT_OK_PTR(f, path)) 136 return -1; 137 138 ret = fread(ifaddr, 1, IFADDR_STR_LEN, f); 139 if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) { 140 fclose(f); 141 return -1; 142 } 143 fclose(f); 144 return 0; 145} 146 147static int create_netkit(int mode, char *prim, char *peer) 148{ 149 struct rtattr *linkinfo, *data, *peer_info; 150 struct rtnl_handle rth = { .fd = -1 }; 151 const char *type = "netkit"; 152 struct { 153 struct nlmsghdr n; 154 struct ifinfomsg i; 155 char buf[1024]; 156 } req = {}; 157 int err; 158 159 err = rtnl_open(&rth, 0); 160 if (!ASSERT_OK(err, "open_rtnetlink")) 161 return err; 162 163 memset(&req, 0, sizeof(req)); 164 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); 165 req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; 166 req.n.nlmsg_type = RTM_NEWLINK; 167 req.i.ifi_family = AF_UNSPEC; 168 169 addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim)); 170 linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO); 171 addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type)); 172 data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA); 173 addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); 174 peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO); 175 req.n.nlmsg_len += sizeof(struct ifinfomsg); 176 addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer)); 177 addattr_nest_end(&req.n, peer_info); 178 addattr_nest_end(&req.n, data); 179 addattr_nest_end(&req.n, linkinfo); 180 181 err = rtnl_talk(&rth, &req.n, NULL); 182 ASSERT_OK(err, "talk_rtnetlink"); 183 rtnl_close(&rth); 184 return err; 185} 186 187static int netns_setup_links_and_routes(struct netns_setup_result *result) 188{ 189 struct nstoken *nstoken = NULL; 190 char src_fwd_addr[IFADDR_STR_LEN+1] = {}; 191 char src_addr[IFADDR_STR_LEN + 1] = {}; 192 int err; 193 194 if (result->dev_mode == MODE_VETH) { 195 SYS(fail, "ip link add src type veth peer name src_fwd"); 196 SYS(fail, "ip link add dst type veth peer name dst_fwd"); 197 198 SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD); 199 SYS(fail, "ip link set dst address " MAC_DST); 200 } else if (result->dev_mode == MODE_NETKIT) { 201 err = create_netkit(NETKIT_L3, "src", "src_fwd"); 202 if (!ASSERT_OK(err, "create_ifindex_src")) 203 goto fail; 204 err = create_netkit(NETKIT_L3, "dst", "dst_fwd"); 205 if (!ASSERT_OK(err, "create_ifindex_dst")) 206 goto fail; 207 } 208 209 if (get_ifaddr("src_fwd", src_fwd_addr)) 210 goto fail; 211 212 if (get_ifaddr("src", src_addr)) 213 goto fail; 214 215 result->ifindex_src = if_nametoindex("src"); 216 if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src")) 217 goto fail; 218 219 result->ifindex_src_fwd = if_nametoindex("src_fwd"); 220 if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd")) 221 goto fail; 222 223 result->ifindex_dst = if_nametoindex("dst"); 224 if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst")) 225 goto fail; 226 227 result->ifindex_dst_fwd = if_nametoindex("dst_fwd"); 228 if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd")) 229 goto fail; 230 231 SYS(fail, "ip link set src netns " NS_SRC); 232 SYS(fail, "ip link set src_fwd netns " NS_FWD); 233 SYS(fail, "ip link set dst_fwd netns " NS_FWD); 234 SYS(fail, "ip link set dst netns " NS_DST); 235 236 /** setup in 'src' namespace */ 237 nstoken = open_netns(NS_SRC); 238 if (!ASSERT_OK_PTR(nstoken, "setns src")) 239 goto fail; 240 241 SYS(fail, "ip addr add " IP4_SRC "/32 dev src"); 242 SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad"); 243 SYS(fail, "ip link set dev src up"); 244 245 SYS(fail, "ip route add " IP4_DST "/32 dev src scope global"); 246 SYS(fail, "ip route add " IP4_NET "/16 dev src scope global"); 247 SYS(fail, "ip route add " IP6_DST "/128 dev src scope global"); 248 249 if (result->dev_mode == MODE_VETH) { 250 SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s", 251 src_fwd_addr); 252 SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s", 253 src_fwd_addr); 254 } 255 256 close_netns(nstoken); 257 258 /** setup in 'fwd' namespace */ 259 nstoken = open_netns(NS_FWD); 260 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 261 goto fail; 262 263 /* The fwd netns automatically gets a v6 LL address / routes, but also 264 * needs v4 one in order to start ARP probing. IP4_NET route is added 265 * to the endpoints so that the ARP processing will reply. 266 */ 267 SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd"); 268 SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd"); 269 SYS(fail, "ip link set dev src_fwd up"); 270 SYS(fail, "ip link set dev dst_fwd up"); 271 272 SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global"); 273 SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global"); 274 SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global"); 275 SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global"); 276 277 if (result->dev_mode == MODE_VETH) { 278 SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr); 279 SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr); 280 SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST); 281 SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST); 282 } 283 284 close_netns(nstoken); 285 286 /** setup in 'dst' namespace */ 287 nstoken = open_netns(NS_DST); 288 if (!ASSERT_OK_PTR(nstoken, "setns dst")) 289 goto fail; 290 291 SYS(fail, "ip addr add " IP4_DST "/32 dev dst"); 292 SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad"); 293 SYS(fail, "ip link set dev dst up"); 294 SYS(fail, "ip link set dev lo up"); 295 296 SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global"); 297 SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global"); 298 SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global"); 299 300 if (result->dev_mode == MODE_VETH) { 301 SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD); 302 SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD); 303 } 304 305 close_netns(nstoken); 306 307 return 0; 308fail: 309 if (nstoken) 310 close_netns(nstoken); 311 return -1; 312} 313 314static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex) 315{ 316 char err_str[128], ifname[16]; 317 int err; 318 319 qdisc_hook->ifindex = ifindex; 320 qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; 321 err = bpf_tc_hook_create(qdisc_hook); 322 snprintf(err_str, sizeof(err_str), 323 "qdisc add dev %s clsact", 324 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>"); 325 err_str[sizeof(err_str) - 1] = 0; 326 ASSERT_OK(err, err_str); 327 328 return err; 329} 330 331static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook, 332 enum bpf_tc_attach_point xgress, 333 const struct bpf_program *prog, int priority) 334{ 335 LIBBPF_OPTS(bpf_tc_opts, tc_attach); 336 char err_str[128], ifname[16]; 337 int err; 338 339 qdisc_hook->attach_point = xgress; 340 tc_attach.prog_fd = bpf_program__fd(prog); 341 tc_attach.priority = priority; 342 err = bpf_tc_attach(qdisc_hook, &tc_attach); 343 snprintf(err_str, sizeof(err_str), 344 "filter add dev %s %s prio %d bpf da %s", 345 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>", 346 xgress == BPF_TC_INGRESS ? "ingress" : "egress", 347 priority, bpf_program__name(prog)); 348 err_str[sizeof(err_str) - 1] = 0; 349 ASSERT_OK(err, err_str); 350 351 return err; 352} 353 354#define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \ 355 if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \ 356 goto fail; \ 357}) 358 359#define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \ 360 if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \ 361 goto fail; \ 362}) 363 364static int netns_load_bpf(const struct bpf_program *src_prog, 365 const struct bpf_program *dst_prog, 366 const struct bpf_program *chk_prog, 367 const struct netns_setup_result *setup_result) 368{ 369 LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); 370 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); 371 int err; 372 373 /* tc qdisc add dev src_fwd clsact */ 374 QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); 375 /* tc filter add dev src_fwd ingress bpf da src_prog */ 376 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0); 377 /* tc filter add dev src_fwd egress bpf da chk_prog */ 378 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0); 379 380 /* tc qdisc add dev dst_fwd clsact */ 381 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); 382 /* tc filter add dev dst_fwd ingress bpf da dst_prog */ 383 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); 384 /* tc filter add dev dst_fwd egress bpf da chk_prog */ 385 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); 386 387 return 0; 388fail: 389 return -1; 390} 391 392static void test_tcp(int family, const char *addr, __u16 port) 393{ 394 int listen_fd = -1, accept_fd = -1, client_fd = -1; 395 char buf[] = "testing testing"; 396 int n; 397 struct nstoken *nstoken; 398 399 nstoken = open_netns(NS_DST); 400 if (!ASSERT_OK_PTR(nstoken, "setns dst")) 401 return; 402 403 listen_fd = start_server(family, SOCK_STREAM, addr, port, 0); 404 if (!ASSERT_GE(listen_fd, 0, "listen")) 405 goto done; 406 407 close_netns(nstoken); 408 nstoken = open_netns(NS_SRC); 409 if (!ASSERT_OK_PTR(nstoken, "setns src")) 410 goto done; 411 412 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); 413 if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) 414 goto done; 415 416 accept_fd = accept(listen_fd, NULL, NULL); 417 if (!ASSERT_GE(accept_fd, 0, "accept")) 418 goto done; 419 420 if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo")) 421 goto done; 422 423 n = write(client_fd, buf, sizeof(buf)); 424 if (!ASSERT_EQ(n, sizeof(buf), "send to server")) 425 goto done; 426 427 n = read(accept_fd, buf, sizeof(buf)); 428 ASSERT_EQ(n, sizeof(buf), "recv from server"); 429 430done: 431 if (nstoken) 432 close_netns(nstoken); 433 if (listen_fd >= 0) 434 close(listen_fd); 435 if (accept_fd >= 0) 436 close(accept_fd); 437 if (client_fd >= 0) 438 close(client_fd); 439} 440 441static int test_ping(int family, const char *addr) 442{ 443 SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); 444 return 0; 445fail: 446 return -1; 447} 448 449static void test_connectivity(void) 450{ 451 test_tcp(AF_INET, IP4_DST, IP4_PORT); 452 test_ping(AF_INET, IP4_DST); 453 test_tcp(AF_INET6, IP6_DST, IP6_PORT); 454 test_ping(AF_INET6, IP6_DST); 455} 456 457static int set_forwarding(bool enable) 458{ 459 int err; 460 461 err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0"); 462 if (!ASSERT_OK(err, "set ipv4.ip_forward=0")) 463 return err; 464 465 err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0"); 466 if (!ASSERT_OK(err, "set ipv6.forwarding=0")) 467 return err; 468 469 return 0; 470} 471 472static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp) 473{ 474 struct __kernel_timespec pkt_ts = {}; 475 char ctl[CMSG_SPACE(sizeof(pkt_ts))]; 476 struct timespec now_ts; 477 struct msghdr msg = {}; 478 __u64 now_ns, pkt_ns; 479 struct cmsghdr *cmsg; 480 struct iovec iov; 481 char data[32]; 482 int ret; 483 484 iov.iov_base = data; 485 iov.iov_len = sizeof(data); 486 msg.msg_iov = &iov; 487 msg.msg_iovlen = 1; 488 msg.msg_control = &ctl; 489 msg.msg_controllen = sizeof(ctl); 490 491 ret = recvmsg(fd, &msg, 0); 492 if (!ASSERT_EQ(ret, s, "recvmsg")) 493 return -1; 494 ASSERT_STRNEQ(data, expected, s, "expected rcv data"); 495 496 cmsg = CMSG_FIRSTHDR(&msg); 497 if (cmsg && cmsg->cmsg_level == SOL_SOCKET && 498 cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) 499 memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts)); 500 501 pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec; 502 if (tstamp) { 503 /* caller will check the tstamp itself */ 504 *tstamp = pkt_ns; 505 return 0; 506 } 507 508 ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp"); 509 510 ret = clock_gettime(CLOCK_REALTIME, &now_ts); 511 ASSERT_OK(ret, "clock_gettime"); 512 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec; 513 514 if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp")) 515 ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC, 516 "check rcv tstamp"); 517 return 0; 518} 519 520static void rcv_tstamp(int fd, const char *expected, size_t s) 521{ 522 __rcv_tstamp(fd, expected, s, NULL); 523} 524 525static int wait_netstamp_needed_key(void) 526{ 527 int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n; 528 char buf[] = "testing testing"; 529 struct nstoken *nstoken; 530 __u64 tstamp = 0; 531 532 nstoken = open_netns(NS_DST); 533 if (!nstoken) 534 return -1; 535 536 srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0); 537 if (!ASSERT_GE(srv_fd, 0, "start_server")) 538 goto done; 539 540 err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, 541 &opt, sizeof(opt)); 542 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)")) 543 goto done; 544 545 cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS); 546 if (!ASSERT_GE(cli_fd, 0, "connect_to_fd")) 547 goto done; 548 549again: 550 n = write(cli_fd, buf, sizeof(buf)); 551 if (!ASSERT_EQ(n, sizeof(buf), "send to server")) 552 goto done; 553 err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp); 554 if (!ASSERT_OK(err, "__rcv_tstamp")) 555 goto done; 556 if (!tstamp && nretries++ < 5) { 557 sleep(1); 558 printf("netstamp_needed_key retry#%d\n", nretries); 559 goto again; 560 } 561 562done: 563 if (!tstamp && srv_fd != -1) { 564 close(srv_fd); 565 srv_fd = -1; 566 } 567 if (cli_fd != -1) 568 close(cli_fd); 569 close_netns(nstoken); 570 return srv_fd; 571} 572 573static void snd_tstamp(int fd, char *b, size_t s) 574{ 575 struct sock_txtime opt = { .clockid = CLOCK_TAI }; 576 char ctl[CMSG_SPACE(sizeof(__u64))]; 577 struct timespec now_ts; 578 struct msghdr msg = {}; 579 struct cmsghdr *cmsg; 580 struct iovec iov; 581 __u64 now_ns; 582 int ret; 583 584 ret = clock_gettime(CLOCK_TAI, &now_ts); 585 ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)"); 586 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec; 587 588 iov.iov_base = b; 589 iov.iov_len = s; 590 msg.msg_iov = &iov; 591 msg.msg_iovlen = 1; 592 msg.msg_control = &ctl; 593 msg.msg_controllen = sizeof(ctl); 594 595 cmsg = CMSG_FIRSTHDR(&msg); 596 cmsg->cmsg_level = SOL_SOCKET; 597 cmsg->cmsg_type = SCM_TXTIME; 598 cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns)); 599 *(__u64 *)CMSG_DATA(cmsg) = now_ns; 600 601 ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt)); 602 ASSERT_OK(ret, "setsockopt(SO_TXTIME)"); 603 604 ret = sendmsg(fd, &msg, 0); 605 ASSERT_EQ(ret, s, "sendmsg"); 606} 607 608static void test_inet_dtime(int family, int type, const char *addr, __u16 port) 609{ 610 int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err; 611 char buf[] = "testing testing"; 612 struct nstoken *nstoken; 613 614 nstoken = open_netns(NS_DST); 615 if (!ASSERT_OK_PTR(nstoken, "setns dst")) 616 return; 617 listen_fd = start_server(family, type, addr, port, 0); 618 close_netns(nstoken); 619 620 if (!ASSERT_GE(listen_fd, 0, "listen")) 621 return; 622 623 /* Ensure the kernel puts the (rcv) timestamp for all skb */ 624 err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, 625 &opt, sizeof(opt)); 626 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)")) 627 goto done; 628 629 if (type == SOCK_STREAM) { 630 /* Ensure the kernel set EDT when sending out rst/ack 631 * from the kernel's ctl_sk. 632 */ 633 err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt, 634 sizeof(opt)); 635 if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)")) 636 goto done; 637 } 638 639 nstoken = open_netns(NS_SRC); 640 if (!ASSERT_OK_PTR(nstoken, "setns src")) 641 goto done; 642 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); 643 close_netns(nstoken); 644 645 if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) 646 goto done; 647 648 if (type == SOCK_STREAM) { 649 int n; 650 651 accept_fd = accept(listen_fd, NULL, NULL); 652 if (!ASSERT_GE(accept_fd, 0, "accept")) 653 goto done; 654 655 n = write(client_fd, buf, sizeof(buf)); 656 if (!ASSERT_EQ(n, sizeof(buf), "send to server")) 657 goto done; 658 rcv_tstamp(accept_fd, buf, sizeof(buf)); 659 } else { 660 snd_tstamp(client_fd, buf, sizeof(buf)); 661 rcv_tstamp(listen_fd, buf, sizeof(buf)); 662 } 663 664done: 665 close(listen_fd); 666 if (accept_fd != -1) 667 close(accept_fd); 668 if (client_fd != -1) 669 close(client_fd); 670} 671 672static int netns_load_dtime_bpf(struct test_tc_dtime *skel, 673 const struct netns_setup_result *setup_result) 674{ 675 LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); 676 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); 677 LIBBPF_OPTS(bpf_tc_hook, qdisc_src); 678 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst); 679 struct nstoken *nstoken; 680 int err; 681 682 /* setup ns_src tc progs */ 683 nstoken = open_netns(NS_SRC); 684 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) 685 return -1; 686 /* tc qdisc add dev src clsact */ 687 QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src); 688 /* tc filter add dev src ingress bpf da ingress_host */ 689 XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); 690 /* tc filter add dev src egress bpf da egress_host */ 691 XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); 692 close_netns(nstoken); 693 694 /* setup ns_dst tc progs */ 695 nstoken = open_netns(NS_DST); 696 if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST)) 697 return -1; 698 /* tc qdisc add dev dst clsact */ 699 QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst); 700 /* tc filter add dev dst ingress bpf da ingress_host */ 701 XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); 702 /* tc filter add dev dst egress bpf da egress_host */ 703 XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); 704 close_netns(nstoken); 705 706 /* setup ns_fwd tc progs */ 707 nstoken = open_netns(NS_FWD); 708 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) 709 return -1; 710 /* tc qdisc add dev dst_fwd clsact */ 711 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); 712 /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ 713 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, 714 skel->progs.ingress_fwdns_prio100, 100); 715 /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ 716 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, 717 skel->progs.ingress_fwdns_prio101, 101); 718 /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ 719 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, 720 skel->progs.egress_fwdns_prio100, 100); 721 /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ 722 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, 723 skel->progs.egress_fwdns_prio101, 101); 724 725 /* tc qdisc add dev src_fwd clsact */ 726 QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); 727 /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ 728 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, 729 skel->progs.ingress_fwdns_prio100, 100); 730 /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ 731 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, 732 skel->progs.ingress_fwdns_prio101, 101); 733 /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ 734 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, 735 skel->progs.egress_fwdns_prio100, 100); 736 /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ 737 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, 738 skel->progs.egress_fwdns_prio101, 101); 739 close_netns(nstoken); 740 return 0; 741 742fail: 743 close_netns(nstoken); 744 return err; 745} 746 747enum { 748 INGRESS_FWDNS_P100, 749 INGRESS_FWDNS_P101, 750 EGRESS_FWDNS_P100, 751 EGRESS_FWDNS_P101, 752 INGRESS_ENDHOST, 753 EGRESS_ENDHOST, 754 SET_DTIME, 755 __MAX_CNT, 756}; 757 758const char *cnt_names[] = { 759 "ingress_fwdns_p100", 760 "ingress_fwdns_p101", 761 "egress_fwdns_p100", 762 "egress_fwdns_p101", 763 "ingress_endhost", 764 "egress_endhost", 765 "set_dtime", 766}; 767 768enum { 769 TCP_IP6_CLEAR_DTIME, 770 TCP_IP4, 771 TCP_IP6, 772 UDP_IP4, 773 UDP_IP6, 774 TCP_IP4_RT_FWD, 775 TCP_IP6_RT_FWD, 776 UDP_IP4_RT_FWD, 777 UDP_IP6_RT_FWD, 778 UKN_TEST, 779 __NR_TESTS, 780}; 781 782const char *test_names[] = { 783 "tcp ip6 clear dtime", 784 "tcp ip4", 785 "tcp ip6", 786 "udp ip4", 787 "udp ip6", 788 "tcp ip4 rt fwd", 789 "tcp ip6 rt fwd", 790 "udp ip4 rt fwd", 791 "udp ip6 rt fwd", 792}; 793 794static const char *dtime_cnt_str(int test, int cnt) 795{ 796 static char name[64]; 797 798 snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]); 799 800 return name; 801} 802 803static const char *dtime_err_str(int test, int cnt) 804{ 805 static char name[64]; 806 807 snprintf(name, sizeof(name), "%s %s errs", test_names[test], 808 cnt_names[cnt]); 809 810 return name; 811} 812 813static void test_tcp_clear_dtime(struct test_tc_dtime *skel) 814{ 815 int i, t = TCP_IP6_CLEAR_DTIME; 816 __u32 *dtimes = skel->bss->dtimes[t]; 817 __u32 *errs = skel->bss->errs[t]; 818 819 skel->bss->test = t; 820 test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t); 821 822 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, 823 dtime_cnt_str(t, INGRESS_FWDNS_P100)); 824 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, 825 dtime_cnt_str(t, INGRESS_FWDNS_P101)); 826 ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0, 827 dtime_cnt_str(t, EGRESS_FWDNS_P100)); 828 ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0, 829 dtime_cnt_str(t, EGRESS_FWDNS_P101)); 830 ASSERT_GT(dtimes[EGRESS_ENDHOST], 0, 831 dtime_cnt_str(t, EGRESS_ENDHOST)); 832 ASSERT_GT(dtimes[INGRESS_ENDHOST], 0, 833 dtime_cnt_str(t, INGRESS_ENDHOST)); 834 835 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++) 836 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i)); 837} 838 839static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) 840{ 841 __u32 *dtimes, *errs; 842 const char *addr; 843 int i, t; 844 845 if (family == AF_INET) { 846 t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD; 847 addr = IP4_DST; 848 } else { 849 t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD; 850 addr = IP6_DST; 851 } 852 853 dtimes = skel->bss->dtimes[t]; 854 errs = skel->bss->errs[t]; 855 856 skel->bss->test = t; 857 test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t); 858 859 /* fwdns_prio100 prog does not read delivery_time_type, so 860 * kernel puts the (rcv) timetamp in __sk_buff->tstamp 861 */ 862 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, 863 dtime_cnt_str(t, INGRESS_FWDNS_P100)); 864 for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++) 865 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); 866 867 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++) 868 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i)); 869} 870 871static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) 872{ 873 __u32 *dtimes, *errs; 874 const char *addr; 875 int i, t; 876 877 if (family == AF_INET) { 878 t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD; 879 addr = IP4_DST; 880 } else { 881 t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD; 882 addr = IP6_DST; 883 } 884 885 dtimes = skel->bss->dtimes[t]; 886 errs = skel->bss->errs[t]; 887 888 skel->bss->test = t; 889 test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t); 890 891 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, 892 dtime_cnt_str(t, INGRESS_FWDNS_P100)); 893 /* non mono delivery time is not forwarded */ 894 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, 895 dtime_cnt_str(t, INGRESS_FWDNS_P101)); 896 for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++) 897 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); 898 899 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++) 900 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i)); 901} 902 903static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) 904{ 905 struct test_tc_dtime *skel; 906 struct nstoken *nstoken; 907 int hold_tstamp_fd, err; 908 909 /* Hold a sk with the SOCK_TIMESTAMP set to ensure there 910 * is no delay in the kernel net_enable_timestamp(). 911 * This ensures the following tests must have 912 * non zero rcv tstamp in the recvmsg(). 913 */ 914 hold_tstamp_fd = wait_netstamp_needed_key(); 915 if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key")) 916 return; 917 918 skel = test_tc_dtime__open(); 919 if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open")) 920 goto done; 921 922 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; 923 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; 924 925 err = test_tc_dtime__load(skel); 926 if (!ASSERT_OK(err, "test_tc_dtime__load")) 927 goto done; 928 929 if (netns_load_dtime_bpf(skel, setup_result)) 930 goto done; 931 932 nstoken = open_netns(NS_FWD); 933 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 934 goto done; 935 err = set_forwarding(false); 936 close_netns(nstoken); 937 if (!ASSERT_OK(err, "disable forwarding")) 938 goto done; 939 940 test_tcp_clear_dtime(skel); 941 942 test_tcp_dtime(skel, AF_INET, true); 943 test_tcp_dtime(skel, AF_INET6, true); 944 test_udp_dtime(skel, AF_INET, true); 945 test_udp_dtime(skel, AF_INET6, true); 946 947 /* Test the kernel ip[6]_forward path instead 948 * of bpf_redirect_neigh(). 949 */ 950 nstoken = open_netns(NS_FWD); 951 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 952 goto done; 953 err = set_forwarding(true); 954 close_netns(nstoken); 955 if (!ASSERT_OK(err, "enable forwarding")) 956 goto done; 957 958 test_tcp_dtime(skel, AF_INET, false); 959 test_tcp_dtime(skel, AF_INET6, false); 960 test_udp_dtime(skel, AF_INET, false); 961 test_udp_dtime(skel, AF_INET6, false); 962 963done: 964 test_tc_dtime__destroy(skel); 965 close(hold_tstamp_fd); 966} 967 968static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) 969{ 970 struct nstoken *nstoken = NULL; 971 struct test_tc_neigh_fib *skel = NULL; 972 973 nstoken = open_netns(NS_FWD); 974 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 975 return; 976 977 skel = test_tc_neigh_fib__open(); 978 if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open")) 979 goto done; 980 981 if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) 982 goto done; 983 984 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst, 985 skel->progs.tc_chk, setup_result)) 986 goto done; 987 988 /* bpf_fib_lookup() checks if forwarding is enabled */ 989 if (!ASSERT_OK(set_forwarding(true), "enable forwarding")) 990 goto done; 991 992 test_connectivity(); 993 994done: 995 if (skel) 996 test_tc_neigh_fib__destroy(skel); 997 close_netns(nstoken); 998} 999 1000static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) 1001{ 1002 struct nstoken *nstoken = NULL; 1003 struct test_tc_neigh *skel = NULL; 1004 int err; 1005 1006 nstoken = open_netns(NS_FWD); 1007 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 1008 return; 1009 1010 skel = test_tc_neigh__open(); 1011 if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) 1012 goto done; 1013 1014 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; 1015 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; 1016 1017 err = test_tc_neigh__load(skel); 1018 if (!ASSERT_OK(err, "test_tc_neigh__load")) 1019 goto done; 1020 1021 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst, 1022 skel->progs.tc_chk, setup_result)) 1023 goto done; 1024 1025 if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) 1026 goto done; 1027 1028 test_connectivity(); 1029 1030done: 1031 if (skel) 1032 test_tc_neigh__destroy(skel); 1033 close_netns(nstoken); 1034} 1035 1036static void test_tc_redirect_peer(struct netns_setup_result *setup_result) 1037{ 1038 struct nstoken *nstoken; 1039 struct test_tc_peer *skel; 1040 int err; 1041 1042 nstoken = open_netns(NS_FWD); 1043 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 1044 return; 1045 1046 skel = test_tc_peer__open(); 1047 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) 1048 goto done; 1049 1050 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; 1051 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; 1052 1053 err = test_tc_peer__load(skel); 1054 if (!ASSERT_OK(err, "test_tc_peer__load")) 1055 goto done; 1056 1057 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst, 1058 skel->progs.tc_chk, setup_result)) 1059 goto done; 1060 1061 if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) 1062 goto done; 1063 1064 test_connectivity(); 1065 1066done: 1067 if (skel) 1068 test_tc_peer__destroy(skel); 1069 close_netns(nstoken); 1070} 1071 1072static int tun_open(char *name) 1073{ 1074 struct ifreq ifr; 1075 int fd, err; 1076 1077 fd = open("/dev/net/tun", O_RDWR); 1078 if (!ASSERT_GE(fd, 0, "open /dev/net/tun")) 1079 return -1; 1080 1081 memset(&ifr, 0, sizeof(ifr)); 1082 1083 ifr.ifr_flags = IFF_TUN | IFF_NO_PI; 1084 if (*name) 1085 strncpy(ifr.ifr_name, name, IFNAMSIZ); 1086 1087 err = ioctl(fd, TUNSETIFF, &ifr); 1088 if (!ASSERT_OK(err, "ioctl TUNSETIFF")) 1089 goto fail; 1090 1091 SYS(fail, "ip link set dev %s up", name); 1092 1093 return fd; 1094fail: 1095 close(fd); 1096 return -1; 1097} 1098 1099enum { 1100 SRC_TO_TARGET = 0, 1101 TARGET_TO_SRC = 1, 1102}; 1103 1104static int tun_relay_loop(int src_fd, int target_fd) 1105{ 1106 fd_set rfds, wfds; 1107 1108 FD_ZERO(&rfds); 1109 FD_ZERO(&wfds); 1110 1111 for (;;) { 1112 char buf[1500]; 1113 int direction, nread, nwrite; 1114 1115 FD_SET(src_fd, &rfds); 1116 FD_SET(target_fd, &rfds); 1117 1118 if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) { 1119 log_err("select failed"); 1120 return 1; 1121 } 1122 1123 direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC; 1124 1125 nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf)); 1126 if (nread < 0) { 1127 log_err("read failed"); 1128 return 1; 1129 } 1130 1131 nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread); 1132 if (nwrite != nread) { 1133 log_err("write failed"); 1134 return 1; 1135 } 1136 } 1137} 1138 1139static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) 1140{ 1141 LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd); 1142 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); 1143 struct test_tc_peer *skel = NULL; 1144 struct nstoken *nstoken = NULL; 1145 int err; 1146 int tunnel_pid = -1; 1147 int src_fd, target_fd = -1; 1148 int ifindex; 1149 1150 /* Start a L3 TUN/TAP tunnel between the src and dst namespaces. 1151 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those 1152 * expose the L2 headers encapsulating the IP packet to BPF and hence 1153 * don't have skb in suitable state for this test. Alternative to TUN/TAP 1154 * would be e.g. Wireguard which would appear as a pure L3 device to BPF, 1155 * but that requires much more complicated setup. 1156 */ 1157 nstoken = open_netns(NS_SRC); 1158 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) 1159 return; 1160 1161 src_fd = tun_open("tun_src"); 1162 if (!ASSERT_GE(src_fd, 0, "tun_open tun_src")) 1163 goto fail; 1164 1165 close_netns(nstoken); 1166 1167 nstoken = open_netns(NS_FWD); 1168 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) 1169 goto fail; 1170 1171 target_fd = tun_open("tun_fwd"); 1172 if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd")) 1173 goto fail; 1174 1175 tunnel_pid = fork(); 1176 if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop")) 1177 goto fail; 1178 1179 if (tunnel_pid == 0) 1180 exit(tun_relay_loop(src_fd, target_fd)); 1181 1182 skel = test_tc_peer__open(); 1183 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) 1184 goto fail; 1185 1186 ifindex = if_nametoindex("tun_fwd"); 1187 if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd")) 1188 goto fail; 1189 1190 skel->rodata->IFINDEX_SRC = ifindex; 1191 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; 1192 1193 err = test_tc_peer__load(skel); 1194 if (!ASSERT_OK(err, "test_tc_peer__load")) 1195 goto fail; 1196 1197 /* Load "tc_src_l3" to the tun_fwd interface to redirect packets 1198 * towards dst, and "tc_dst" to redirect packets 1199 * and "tc_chk" on dst_fwd to drop non-redirected packets. 1200 */ 1201 /* tc qdisc add dev tun_fwd clsact */ 1202 QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex); 1203 /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */ 1204 XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0); 1205 1206 /* tc qdisc add dev dst_fwd clsact */ 1207 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); 1208 /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */ 1209 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); 1210 /* tc filter add dev dst_fwd egress bpf da tc_chk */ 1211 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); 1212 1213 /* Setup route and neigh tables */ 1214 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); 1215 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); 1216 1217 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); 1218 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); 1219 1220 SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global"); 1221 SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD 1222 " dev tun_src scope global"); 1223 SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global"); 1224 SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global"); 1225 SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD 1226 " dev tun_src scope global"); 1227 SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global"); 1228 1229 SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD); 1230 SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD); 1231 1232 if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) 1233 goto fail; 1234 1235 test_connectivity(); 1236 1237fail: 1238 if (tunnel_pid > 0) { 1239 kill(tunnel_pid, SIGTERM); 1240 waitpid(tunnel_pid, NULL, 0); 1241 } 1242 if (src_fd >= 0) 1243 close(src_fd); 1244 if (target_fd >= 0) 1245 close(target_fd); 1246 if (skel) 1247 test_tc_peer__destroy(skel); 1248 if (nstoken) 1249 close_netns(nstoken); 1250} 1251 1252#define RUN_TEST(name, mode) \ 1253 ({ \ 1254 struct netns_setup_result setup_result = { .dev_mode = mode, }; \ 1255 if (test__start_subtest(#name)) \ 1256 if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \ 1257 if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \ 1258 "setup links and routes")) \ 1259 test_ ## name(&setup_result); \ 1260 netns_setup_namespaces("delete"); \ 1261 } \ 1262 }) 1263 1264static void *test_tc_redirect_run_tests(void *arg) 1265{ 1266 netns_setup_namespaces_nofail("delete"); 1267 1268 RUN_TEST(tc_redirect_peer, MODE_VETH); 1269 RUN_TEST(tc_redirect_peer, MODE_NETKIT); 1270 RUN_TEST(tc_redirect_peer_l3, MODE_VETH); 1271 RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT); 1272 RUN_TEST(tc_redirect_neigh, MODE_VETH); 1273 RUN_TEST(tc_redirect_neigh_fib, MODE_VETH); 1274 RUN_TEST(tc_redirect_dtime, MODE_VETH); 1275 return NULL; 1276} 1277 1278void test_tc_redirect(void) 1279{ 1280 pthread_t test_thread; 1281 int err; 1282 1283 /* Run the tests in their own thread to isolate the namespace changes 1284 * so they do not affect the environment of other tests. 1285 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) 1286 */ 1287 err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL); 1288 if (ASSERT_OK(err, "pthread_create")) 1289 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); 1290} 1291