1226031Sstas// SPDX-License-Identifier: GPL-2.0 2226031Sstas// Copyright (c) 2019 Cloudflare Ltd. 3226128Sstas// Copyright (c) 2020 Isovalent, Inc. 4226128Sstas 5226031Sstas#include <stddef.h> 6226031Sstas#include <stdbool.h> 7226128Sstas#include <string.h> 8226031Sstas#include <linux/bpf.h> 9226031Sstas#include <linux/if_ether.h> 10226031Sstas#include <linux/in.h> 11226031Sstas#include <linux/ip.h> 12226128Sstas#include <linux/ipv6.h> 13226031Sstas#include <linux/pkt_cls.h> 14226031Sstas#include <linux/tcp.h> 15226031Sstas#include <sys/socket.h> 16226031Sstas#include <bpf/bpf_helpers.h> 17226031Sstas#include <bpf/bpf_endian.h> 18226031Sstas#include "bpf_misc.h" 19226031Sstas 20226031Sstas#if defined(IPROUTE2_HAVE_LIBBPF) 21226031Sstas/* Use a new-style map definition. */ 22226031Sstasstruct { 23226031Sstas __uint(type, BPF_MAP_TYPE_SOCKMAP); 24226031Sstas __type(key, int); 25226031Sstas __type(value, __u64); 26226031Sstas __uint(pinning, LIBBPF_PIN_BY_NAME); 27226031Sstas __uint(max_entries, 1); 28226031Sstas} server_map SEC(".maps"); 29226031Sstas#else 30226031Sstas/* Pin map under /sys/fs/bpf/tc/globals/<map name> */ 31226031Sstas#define PIN_GLOBAL_NS 2 32226031Sstas 33226031Sstas/* Must match struct bpf_elf_map layout from iproute2 */ 34226031Sstasstruct { 35226031Sstas __u32 type; 36226031Sstas __u32 size_key; 37226031Sstas __u32 size_value; 38226031Sstas __u32 max_elem; 39226031Sstas __u32 flags; 40226031Sstas __u32 id; 41226031Sstas __u32 pinning; 42226031Sstas} server_map SEC("maps") = { 43226031Sstas .type = BPF_MAP_TYPE_SOCKMAP, 44226031Sstas .size_key = sizeof(int), 45226031Sstas .size_value = sizeof(__u64), 46226031Sstas .max_elem = 1, 47226031Sstas .pinning = PIN_GLOBAL_NS, 48226031Sstas}; 49226031Sstas#endif 50226031Sstas 51226031Sstaschar _license[] SEC("license") = "GPL"; 52226031Sstas 53226031Sstas/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ 54226031Sstasstatic inline struct bpf_sock_tuple * 55226031Sstasget_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp) 56226031Sstas{ 57226031Sstas void *data_end = (void *)(long)skb->data_end; 58226031Sstas void *data = (void *)(long)skb->data; 59226031Sstas struct bpf_sock_tuple *result; 60226031Sstas struct ethhdr *eth; 61226031Sstas __u8 proto = 0; 62226031Sstas __u64 ihl_len; 63226031Sstas 64226031Sstas eth = (struct ethhdr *)(data); 65226031Sstas if (eth + 1 > data_end) 66226031Sstas return NULL; 67226031Sstas 68226031Sstas if (eth->h_proto == bpf_htons(ETH_P_IP)) { 69226031Sstas struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth)); 70226031Sstas 71226031Sstas if (iph + 1 > data_end) 72226031Sstas return NULL; 73226031Sstas if (iph->ihl != 5) 74226031Sstas /* Options are not supported */ 75226031Sstas return NULL; 76226031Sstas ihl_len = iph->ihl * 4; 77226031Sstas proto = iph->protocol; 78226031Sstas *ipv4 = true; 79226031Sstas result = (struct bpf_sock_tuple *)&iph->saddr; 80226031Sstas } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { 81226031Sstas struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth)); 82226031Sstas 83226031Sstas if (ip6h + 1 > data_end) 84226031Sstas return NULL; 85226031Sstas ihl_len = sizeof(*ip6h); 86226031Sstas proto = ip6h->nexthdr; 87226031Sstas *ipv4 = false; 88226031Sstas result = (struct bpf_sock_tuple *)&ip6h->saddr; 89226031Sstas } else { 90226031Sstas return (struct bpf_sock_tuple *)data; 91226031Sstas } 92226031Sstas 93226031Sstas if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) 94226128Sstas return NULL; 95226031Sstas 96226031Sstas *tcp = (proto == IPPROTO_TCP); 97226031Sstas __sink(ihl_len); 98226031Sstas return result; 99226031Sstas} 100226031Sstas 101226031Sstasstatic inline int 102226031Sstashandle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) 103226031Sstas{ 104226031Sstas struct bpf_sock *sk; 105226031Sstas const int zero = 0; 106226031Sstas size_t tuple_len; 107226031Sstas __be16 dport; 108226031Sstas int ret; 109226031Sstas 110226031Sstas tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); 111226031Sstas if ((void *)tuple + tuple_len > (void *)(long)skb->data_end) 112226031Sstas return TC_ACT_SHOT; 113226031Sstas 114226031Sstas sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); 115226031Sstas if (sk) 116226031Sstas goto assign; 117226031Sstas 118226031Sstas dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport; 119226031Sstas if (dport != bpf_htons(4321)) 120226031Sstas return TC_ACT_OK; 121226031Sstas 122226031Sstas sk = bpf_map_lookup_elem(&server_map, &zero); 123226031Sstas if (!sk) 124226031Sstas return TC_ACT_SHOT; 125226031Sstas 126226031Sstasassign: 127226031Sstas ret = bpf_sk_assign(skb, sk, 0); 128226031Sstas bpf_sk_release(sk); 129226031Sstas return ret; 130226031Sstas} 131226031Sstas 132226031Sstasstatic inline int 133226031Sstashandle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) 134226031Sstas{ 135226031Sstas struct bpf_sock *sk; 136226031Sstas const int zero = 0; 137226031Sstas size_t tuple_len; 138226031Sstas __be16 dport; 139226031Sstas int ret; 140226031Sstas 141226031Sstas tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); 142226128Sstas if ((void *)tuple + tuple_len > (void *)(long)skb->data_end) 143226128Sstas return TC_ACT_SHOT; 144226128Sstas 145226031Sstas sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); 146226031Sstas if (sk) { 147226031Sstas if (sk->state != BPF_TCP_LISTEN) 148226031Sstas goto assign; 149226031Sstas bpf_sk_release(sk); 150226031Sstas } 151226031Sstas 152226031Sstas dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport; 153226031Sstas if (dport != bpf_htons(4321)) 154226031Sstas return TC_ACT_OK; 155226031Sstas 156226031Sstas sk = bpf_map_lookup_elem(&server_map, &zero); 157226031Sstas if (!sk) 158226031Sstas return TC_ACT_SHOT; 159226031Sstas 160226031Sstas if (sk->state != BPF_TCP_LISTEN) { 161226031Sstas bpf_sk_release(sk); 162226031Sstas return TC_ACT_SHOT; 163226031Sstas } 164226031Sstas 165226031Sstasassign: 166226031Sstas ret = bpf_sk_assign(skb, sk, 0); 167226031Sstas bpf_sk_release(sk); 168226031Sstas return ret; 169226031Sstas} 170226031Sstas 171226031SstasSEC("tc") 172226031Sstasint bpf_sk_assign_test(struct __sk_buff *skb) 173226031Sstas{ 174226031Sstas struct bpf_sock_tuple *tuple; 175226031Sstas bool ipv4 = false; 176226031Sstas bool tcp = false; 177226031Sstas int ret = 0; 178226031Sstas 179226031Sstas tuple = get_tuple(skb, &ipv4, &tcp); 180226031Sstas if (!tuple) 181226031Sstas return TC_ACT_SHOT; 182226031Sstas 183226031Sstas /* Note that the verifier socket return type for bpf_skc_lookup_tcp() 184226031Sstas * differs from bpf_sk_lookup_udp(), so even though the C-level type is 185226031Sstas * the same here, if we try to share the implementations they will 186226031Sstas * fail to verify because we're crossing pointer types. 187226031Sstas */ 188226031Sstas if (tcp) 189226031Sstas ret = handle_tcp(skb, tuple, ipv4); 190226031Sstas else 191226031Sstas ret = handle_udp(skb, tuple, ipv4); 192226031Sstas 193226031Sstas return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT; 194226128Sstas} 195226128Sstas