1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Cloudflare Ltd.
3// Copyright (c) 2020 Isovalent, Inc.
4
5#include <stddef.h>
6#include <stdbool.h>
7#include <string.h>
8#include <linux/bpf.h>
9#include <linux/if_ether.h>
10#include <linux/in.h>
11#include <linux/ip.h>
12#include <linux/ipv6.h>
13#include <linux/pkt_cls.h>
14#include <linux/tcp.h>
15#include <sys/socket.h>
16#include <bpf/bpf_helpers.h>
17#include <bpf/bpf_endian.h>
18#include "bpf_misc.h"
19
20#if defined(IPROUTE2_HAVE_LIBBPF)
21/* Use a new-style map definition. */
22struct {
23	__uint(type, BPF_MAP_TYPE_SOCKMAP);
24	__type(key, int);
25	__type(value, __u64);
26	__uint(pinning, LIBBPF_PIN_BY_NAME);
27	__uint(max_entries, 1);
28} server_map SEC(".maps");
29#else
30/* Pin map under /sys/fs/bpf/tc/globals/<map name> */
31#define PIN_GLOBAL_NS 2
32
33/* Must match struct bpf_elf_map layout from iproute2 */
34struct {
35	__u32 type;
36	__u32 size_key;
37	__u32 size_value;
38	__u32 max_elem;
39	__u32 flags;
40	__u32 id;
41	__u32 pinning;
42} server_map SEC("maps") = {
43	.type = BPF_MAP_TYPE_SOCKMAP,
44	.size_key = sizeof(int),
45	.size_value  = sizeof(__u64),
46	.max_elem = 1,
47	.pinning = PIN_GLOBAL_NS,
48};
49#endif
50
51char _license[] SEC("license") = "GPL";
52
53/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
54static inline struct bpf_sock_tuple *
55get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
56{
57	void *data_end = (void *)(long)skb->data_end;
58	void *data = (void *)(long)skb->data;
59	struct bpf_sock_tuple *result;
60	struct ethhdr *eth;
61	__u8 proto = 0;
62	__u64 ihl_len;
63
64	eth = (struct ethhdr *)(data);
65	if (eth + 1 > data_end)
66		return NULL;
67
68	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
69		struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
70
71		if (iph + 1 > data_end)
72			return NULL;
73		if (iph->ihl != 5)
74			/* Options are not supported */
75			return NULL;
76		ihl_len = iph->ihl * 4;
77		proto = iph->protocol;
78		*ipv4 = true;
79		result = (struct bpf_sock_tuple *)&iph->saddr;
80	} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
81		struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
82
83		if (ip6h + 1 > data_end)
84			return NULL;
85		ihl_len = sizeof(*ip6h);
86		proto = ip6h->nexthdr;
87		*ipv4 = false;
88		result = (struct bpf_sock_tuple *)&ip6h->saddr;
89	} else {
90		return (struct bpf_sock_tuple *)data;
91	}
92
93	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
94		return NULL;
95
96	*tcp = (proto == IPPROTO_TCP);
97	__sink(ihl_len);
98	return result;
99}
100
101static inline int
102handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
103{
104	struct bpf_sock *sk;
105	const int zero = 0;
106	size_t tuple_len;
107	__be16 dport;
108	int ret;
109
110	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
111	if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
112		return TC_ACT_SHOT;
113
114	sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
115	if (sk)
116		goto assign;
117
118	dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
119	if (dport != bpf_htons(4321))
120		return TC_ACT_OK;
121
122	sk = bpf_map_lookup_elem(&server_map, &zero);
123	if (!sk)
124		return TC_ACT_SHOT;
125
126assign:
127	ret = bpf_sk_assign(skb, sk, 0);
128	bpf_sk_release(sk);
129	return ret;
130}
131
132static inline int
133handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
134{
135	struct bpf_sock *sk;
136	const int zero = 0;
137	size_t tuple_len;
138	__be16 dport;
139	int ret;
140
141	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
142	if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
143		return TC_ACT_SHOT;
144
145	sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
146	if (sk) {
147		if (sk->state != BPF_TCP_LISTEN)
148			goto assign;
149		bpf_sk_release(sk);
150	}
151
152	dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
153	if (dport != bpf_htons(4321))
154		return TC_ACT_OK;
155
156	sk = bpf_map_lookup_elem(&server_map, &zero);
157	if (!sk)
158		return TC_ACT_SHOT;
159
160	if (sk->state != BPF_TCP_LISTEN) {
161		bpf_sk_release(sk);
162		return TC_ACT_SHOT;
163	}
164
165assign:
166	ret = bpf_sk_assign(skb, sk, 0);
167	bpf_sk_release(sk);
168	return ret;
169}
170
171SEC("tc")
172int bpf_sk_assign_test(struct __sk_buff *skb)
173{
174	struct bpf_sock_tuple *tuple;
175	bool ipv4 = false;
176	bool tcp = false;
177	int ret = 0;
178
179	tuple = get_tuple(skb, &ipv4, &tcp);
180	if (!tuple)
181		return TC_ACT_SHOT;
182
183	/* Note that the verifier socket return type for bpf_skc_lookup_tcp()
184	 * differs from bpf_sk_lookup_udp(), so even though the C-level type is
185	 * the same here, if we try to share the implementations they will
186	 * fail to verify because we're crossing pointer types.
187	 */
188	if (tcp)
189		ret = handle_tcp(skb, tuple, ipv4);
190	else
191		ret = handle_udp(skb, tuple, ipv4);
192
193	return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
194}
195