1// SPDX-License-Identifier: GPL-2.0 2// Copyright (c) 2022 Meta 3 4#include <stddef.h> 5#include <stdint.h> 6#include <stdbool.h> 7#include <linux/bpf.h> 8#include <linux/stddef.h> 9#include <linux/pkt_cls.h> 10#include <linux/if_ether.h> 11#include <linux/in.h> 12#include <linux/ip.h> 13#include <linux/ipv6.h> 14#include <linux/tcp.h> 15#include <linux/udp.h> 16#include <bpf/bpf_helpers.h> 17#include <bpf/bpf_endian.h> 18 19/* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst 20 * | | 21 * ns_src | ns_fwd | ns_dst 22 * 23 * ns_src and ns_dst: ENDHOST namespace 24 * ns_fwd: Fowarding namespace 25 */ 26 27#define ctx_ptr(field) (void *)(long)(field) 28 29#define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */ 30#define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */ 31 32#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 33 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } 34#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 35 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } 36 37#define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ 38 a.s6_addr32[1] == b.s6_addr32[1] && \ 39 a.s6_addr32[2] == b.s6_addr32[2] && \ 40 a.s6_addr32[3] == b.s6_addr32[3]) 41 42volatile const __u32 IFINDEX_SRC; 43volatile const __u32 IFINDEX_DST; 44 45#define EGRESS_ENDHOST_MAGIC 0x0b9fbeef 46#define INGRESS_FWDNS_MAGIC 0x1b9fbeef 47#define EGRESS_FWDNS_MAGIC 0x2b9fbeef 48 49enum { 50 INGRESS_FWDNS_P100, 51 INGRESS_FWDNS_P101, 52 EGRESS_FWDNS_P100, 53 EGRESS_FWDNS_P101, 54 INGRESS_ENDHOST, 55 EGRESS_ENDHOST, 56 SET_DTIME, 57 __MAX_CNT, 58}; 59 60enum { 61 TCP_IP6_CLEAR_DTIME, 62 TCP_IP4, 63 TCP_IP6, 64 UDP_IP4, 65 UDP_IP6, 66 TCP_IP4_RT_FWD, 67 TCP_IP6_RT_FWD, 68 UDP_IP4_RT_FWD, 69 UDP_IP6_RT_FWD, 70 UKN_TEST, 71 __NR_TESTS, 72}; 73 74enum { 75 SRC_NS = 1, 76 DST_NS, 77}; 78 79__u32 dtimes[__NR_TESTS][__MAX_CNT] = {}; 80__u32 errs[__NR_TESTS][__MAX_CNT] = {}; 81__u32 test = 0; 82 83static void inc_dtimes(__u32 idx) 84{ 85 if (test < __NR_TESTS) 86 dtimes[test][idx]++; 87 else 88 dtimes[UKN_TEST][idx]++; 89} 90 91static void inc_errs(__u32 idx) 92{ 93 if (test < __NR_TESTS) 94 errs[test][idx]++; 95 else 96 errs[UKN_TEST][idx]++; 97} 98 99static int skb_proto(int type) 100{ 101 return type & 0xff; 102} 103 104static int skb_ns(int type) 105{ 106 return (type >> 8) & 0xff; 107} 108 109static bool fwdns_clear_dtime(void) 110{ 111 return test == TCP_IP6_CLEAR_DTIME; 112} 113 114static bool bpf_fwd(void) 115{ 116 return test < TCP_IP4_RT_FWD; 117} 118 119static __u8 get_proto(void) 120{ 121 switch (test) { 122 case UDP_IP4: 123 case UDP_IP6: 124 case UDP_IP4_RT_FWD: 125 case UDP_IP6_RT_FWD: 126 return IPPROTO_UDP; 127 default: 128 return IPPROTO_TCP; 129 } 130} 131 132/* -1: parse error: TC_ACT_SHOT 133 * 0: not testing traffic: TC_ACT_OK 134 * >0: first byte is the inet_proto, second byte has the netns 135 * of the sender 136 */ 137static int skb_get_type(struct __sk_buff *skb) 138{ 139 __u16 dst_ns_port = __bpf_htons(50000 + test); 140 void *data_end = ctx_ptr(skb->data_end); 141 void *data = ctx_ptr(skb->data); 142 __u8 inet_proto = 0, ns = 0; 143 struct ipv6hdr *ip6h; 144 __u16 sport, dport; 145 struct iphdr *iph; 146 struct tcphdr *th; 147 struct udphdr *uh; 148 void *trans; 149 150 switch (skb->protocol) { 151 case __bpf_htons(ETH_P_IP): 152 iph = data + sizeof(struct ethhdr); 153 if (iph + 1 > data_end) 154 return -1; 155 if (iph->saddr == ip4_src) 156 ns = SRC_NS; 157 else if (iph->saddr == ip4_dst) 158 ns = DST_NS; 159 inet_proto = iph->protocol; 160 trans = iph + 1; 161 break; 162 case __bpf_htons(ETH_P_IPV6): 163 ip6h = data + sizeof(struct ethhdr); 164 if (ip6h + 1 > data_end) 165 return -1; 166 if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_src}})) 167 ns = SRC_NS; 168 else if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_dst}})) 169 ns = DST_NS; 170 inet_proto = ip6h->nexthdr; 171 trans = ip6h + 1; 172 break; 173 default: 174 return 0; 175 } 176 177 /* skb is not from src_ns or dst_ns. 178 * skb is not the testing IPPROTO. 179 */ 180 if (!ns || inet_proto != get_proto()) 181 return 0; 182 183 switch (inet_proto) { 184 case IPPROTO_TCP: 185 th = trans; 186 if (th + 1 > data_end) 187 return -1; 188 sport = th->source; 189 dport = th->dest; 190 break; 191 case IPPROTO_UDP: 192 uh = trans; 193 if (uh + 1 > data_end) 194 return -1; 195 sport = uh->source; 196 dport = uh->dest; 197 break; 198 default: 199 return 0; 200 } 201 202 /* The skb is the testing traffic */ 203 if ((ns == SRC_NS && dport == dst_ns_port) || 204 (ns == DST_NS && sport == dst_ns_port)) 205 return (ns << 8 | inet_proto); 206 207 return 0; 208} 209 210/* format: direction@iface@netns 211 * egress@veth_(src|dst)@ns_(src|dst) 212 */ 213SEC("tc") 214int egress_host(struct __sk_buff *skb) 215{ 216 int skb_type; 217 218 skb_type = skb_get_type(skb); 219 if (skb_type == -1) 220 return TC_ACT_SHOT; 221 if (!skb_type) 222 return TC_ACT_OK; 223 224 if (skb_proto(skb_type) == IPPROTO_TCP) { 225 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && 226 skb->tstamp) 227 inc_dtimes(EGRESS_ENDHOST); 228 else 229 inc_errs(EGRESS_ENDHOST); 230 } else { 231 if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC && 232 skb->tstamp) 233 inc_dtimes(EGRESS_ENDHOST); 234 else 235 inc_errs(EGRESS_ENDHOST); 236 } 237 238 skb->tstamp = EGRESS_ENDHOST_MAGIC; 239 240 return TC_ACT_OK; 241} 242 243/* ingress@veth_(src|dst)@ns_(src|dst) */ 244SEC("tc") 245int ingress_host(struct __sk_buff *skb) 246{ 247 int skb_type; 248 249 skb_type = skb_get_type(skb); 250 if (skb_type == -1) 251 return TC_ACT_SHOT; 252 if (!skb_type) 253 return TC_ACT_OK; 254 255 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && 256 skb->tstamp == EGRESS_FWDNS_MAGIC) 257 inc_dtimes(INGRESS_ENDHOST); 258 else 259 inc_errs(INGRESS_ENDHOST); 260 261 return TC_ACT_OK; 262} 263 264/* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */ 265SEC("tc") 266int ingress_fwdns_prio100(struct __sk_buff *skb) 267{ 268 int skb_type; 269 270 skb_type = skb_get_type(skb); 271 if (skb_type == -1) 272 return TC_ACT_SHOT; 273 if (!skb_type) 274 return TC_ACT_OK; 275 276 /* delivery_time is only available to the ingress 277 * if the tc-bpf checks the skb->tstamp_type. 278 */ 279 if (skb->tstamp == EGRESS_ENDHOST_MAGIC) 280 inc_errs(INGRESS_FWDNS_P100); 281 282 if (fwdns_clear_dtime()) 283 skb->tstamp = 0; 284 285 return TC_ACT_UNSPEC; 286} 287 288/* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */ 289SEC("tc") 290int egress_fwdns_prio100(struct __sk_buff *skb) 291{ 292 int skb_type; 293 294 skb_type = skb_get_type(skb); 295 if (skb_type == -1) 296 return TC_ACT_SHOT; 297 if (!skb_type) 298 return TC_ACT_OK; 299 300 /* delivery_time is always available to egress even 301 * the tc-bpf did not use the tstamp_type. 302 */ 303 if (skb->tstamp == INGRESS_FWDNS_MAGIC) 304 inc_dtimes(EGRESS_FWDNS_P100); 305 else 306 inc_errs(EGRESS_FWDNS_P100); 307 308 if (fwdns_clear_dtime()) 309 skb->tstamp = 0; 310 311 return TC_ACT_UNSPEC; 312} 313 314/* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */ 315SEC("tc") 316int ingress_fwdns_prio101(struct __sk_buff *skb) 317{ 318 __u64 expected_dtime = EGRESS_ENDHOST_MAGIC; 319 int skb_type; 320 321 skb_type = skb_get_type(skb); 322 if (skb_type == -1 || !skb_type) 323 /* Should have handled in prio100 */ 324 return TC_ACT_SHOT; 325 326 if (skb_proto(skb_type) == IPPROTO_UDP) 327 expected_dtime = 0; 328 329 if (skb->tstamp_type) { 330 if (fwdns_clear_dtime() || 331 skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || 332 skb->tstamp != expected_dtime) 333 inc_errs(INGRESS_FWDNS_P101); 334 else 335 inc_dtimes(INGRESS_FWDNS_P101); 336 } else { 337 if (!fwdns_clear_dtime() && expected_dtime) 338 inc_errs(INGRESS_FWDNS_P101); 339 } 340 341 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { 342 skb->tstamp = INGRESS_FWDNS_MAGIC; 343 } else { 344 if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 345 BPF_SKB_TSTAMP_DELIVERY_MONO)) 346 inc_errs(SET_DTIME); 347 if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 348 BPF_SKB_TSTAMP_UNSPEC)) 349 inc_errs(SET_DTIME); 350 } 351 352 if (skb_ns(skb_type) == SRC_NS) 353 return bpf_fwd() ? 354 bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK; 355 else 356 return bpf_fwd() ? 357 bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK; 358} 359 360/* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */ 361SEC("tc") 362int egress_fwdns_prio101(struct __sk_buff *skb) 363{ 364 int skb_type; 365 366 skb_type = skb_get_type(skb); 367 if (skb_type == -1 || !skb_type) 368 /* Should have handled in prio100 */ 369 return TC_ACT_SHOT; 370 371 if (skb->tstamp_type) { 372 if (fwdns_clear_dtime() || 373 skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || 374 skb->tstamp != INGRESS_FWDNS_MAGIC) 375 inc_errs(EGRESS_FWDNS_P101); 376 else 377 inc_dtimes(EGRESS_FWDNS_P101); 378 } else { 379 if (!fwdns_clear_dtime()) 380 inc_errs(EGRESS_FWDNS_P101); 381 } 382 383 if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { 384 skb->tstamp = EGRESS_FWDNS_MAGIC; 385 } else { 386 if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC, 387 BPF_SKB_TSTAMP_DELIVERY_MONO)) 388 inc_errs(SET_DTIME); 389 if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 390 BPF_SKB_TSTAMP_UNSPEC)) 391 inc_errs(SET_DTIME); 392 } 393 394 return TC_ACT_OK; 395} 396 397char __license[] SEC("license") = "GPL"; 398