1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2020 Facebook */
3
4#include <stddef.h>
5#include <errno.h>
6#include <stdbool.h>
7#include <sys/types.h>
8#include <sys/socket.h>
9#include <linux/ipv6.h>
10#include <linux/tcp.h>
11#include <linux/socket.h>
12#include <linux/bpf.h>
13#include <linux/types.h>
14#include <bpf/bpf_helpers.h>
15#include <bpf/bpf_endian.h>
16#define BPF_PROG_TEST_TCP_HDR_OPTIONS
17#include "test_tcp_hdr_options.h"
18
19__u16 last_addr16_n = __bpf_htons(1);
20__u16 active_lport_n = 0;
21__u16 active_lport_h = 0;
22__u16 passive_lport_n = 0;
23__u16 passive_lport_h = 0;
24
25/* options received at passive side */
26unsigned int nr_pure_ack = 0;
27unsigned int nr_data = 0;
28unsigned int nr_syn = 0;
29unsigned int nr_fin = 0;
30unsigned int nr_hwtstamp = 0;
31
32/* Check the header received from the active side */
33static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
34{
35	union {
36		struct tcphdr th;
37		struct ipv6hdr ip6;
38		struct tcp_exprm_opt exprm_opt;
39		struct tcp_opt reg_opt;
40		__u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */
41	} hdr = {};
42	__u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
43	struct tcphdr *pth;
44	int ret;
45
46	hdr.reg_opt.kind = 0xB9;
47
48	/* The option is 4 bytes long instead of 2 bytes */
49	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags);
50	if (ret != -ENOSPC)
51		RET_CG_ERR(ret);
52
53	/* Test searching magic with regular kind */
54	hdr.reg_opt.len = 4;
55	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
56			       load_flags);
57	if (ret != -EINVAL)
58		RET_CG_ERR(ret);
59
60	hdr.reg_opt.len = 0;
61	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
62			       load_flags);
63	if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 ||
64	    hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce)
65		RET_CG_ERR(ret);
66
67	/* Test searching experimental option with invalid kind length */
68	hdr.exprm_opt.kind = TCPOPT_EXP;
69	hdr.exprm_opt.len = 5;
70	hdr.exprm_opt.magic = 0;
71	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
72			       load_flags);
73	if (ret != -EINVAL)
74		RET_CG_ERR(ret);
75
76	/* Test searching experimental option with 0 magic value */
77	hdr.exprm_opt.len = 4;
78	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
79			       load_flags);
80	if (ret != -ENOMSG)
81		RET_CG_ERR(ret);
82
83	hdr.exprm_opt.magic = __bpf_htons(0xeB9F);
84	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
85			       load_flags);
86	if (ret != 4 || hdr.exprm_opt.len != 4 ||
87	    hdr.exprm_opt.kind != TCPOPT_EXP ||
88	    hdr.exprm_opt.magic != __bpf_htons(0xeB9F))
89		RET_CG_ERR(ret);
90
91	if (!check_syn)
92		return CG_OK;
93
94	/* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV
95	 *
96	 * Test loading from tp->saved_syn for other sk_state.
97	 */
98	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6,
99			     sizeof(hdr.ip6));
100	if (ret != -ENOSPC)
101		RET_CG_ERR(ret);
102
103	if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n ||
104	    hdr.ip6.daddr.s6_addr16[7] != last_addr16_n)
105		RET_CG_ERR(0);
106
107	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr));
108	if (ret < 0)
109		RET_CG_ERR(ret);
110
111	pth = (struct tcphdr *)(&hdr.ip6 + 1);
112	if (pth->dest != passive_lport_n || pth->source != active_lport_n)
113		RET_CG_ERR(0);
114
115	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr));
116	if (ret < 0)
117		RET_CG_ERR(ret);
118
119	if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n)
120		RET_CG_ERR(0);
121
122	return CG_OK;
123}
124
125static int check_active_syn_in(struct bpf_sock_ops *skops)
126{
127	return __check_active_hdr_in(skops, true);
128}
129
130static int check_active_hdr_in(struct bpf_sock_ops *skops)
131{
132	struct tcphdr *th;
133
134	if (__check_active_hdr_in(skops, false) == CG_ERR)
135		return CG_ERR;
136
137	th = skops->skb_data;
138	if (th + 1 > skops->skb_data_end)
139		RET_CG_ERR(0);
140
141	if (tcp_hdrlen(th) < skops->skb_len)
142		nr_data++;
143
144	if (th->fin)
145		nr_fin++;
146
147	if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
148		nr_pure_ack++;
149
150	if (skops->skb_hwtstamp)
151		nr_hwtstamp++;
152
153	return CG_OK;
154}
155
156static int active_opt_len(struct bpf_sock_ops *skops)
157{
158	int err;
159
160	/* Reserve more than enough to allow the -EEXIST test in
161	 * the write_active_opt().
162	 */
163	err = bpf_reserve_hdr_opt(skops, 12, 0);
164	if (err)
165		RET_CG_ERR(err);
166
167	return CG_OK;
168}
169
170static int write_active_opt(struct bpf_sock_ops *skops)
171{
172	struct tcp_exprm_opt exprm_opt = {};
173	struct tcp_opt win_scale_opt = {};
174	struct tcp_opt reg_opt = {};
175	struct tcphdr *th;
176	int err, ret;
177
178	exprm_opt.kind = TCPOPT_EXP;
179	exprm_opt.len = 4;
180	exprm_opt.magic = __bpf_htons(0xeB9F);
181
182	reg_opt.kind = 0xB9;
183	reg_opt.len = 4;
184	reg_opt.data[0] = 0xfa;
185	reg_opt.data[1] = 0xce;
186
187	win_scale_opt.kind = TCPOPT_WINDOW;
188
189	err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
190	if (err)
191		RET_CG_ERR(err);
192
193	/* Store the same exprm option */
194	err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
195	if (err != -EEXIST)
196		RET_CG_ERR(err);
197
198	err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
199	if (err)
200		RET_CG_ERR(err);
201	err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
202	if (err != -EEXIST)
203		RET_CG_ERR(err);
204
205	/* Check the option has been written and can be searched */
206	ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
207	if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP ||
208	    exprm_opt.magic != __bpf_htons(0xeB9F))
209		RET_CG_ERR(ret);
210
211	reg_opt.len = 0;
212	ret = bpf_load_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
213	if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 ||
214	    reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce)
215		RET_CG_ERR(ret);
216
217	th = skops->skb_data;
218	if (th + 1 > skops->skb_data_end)
219		RET_CG_ERR(0);
220
221	if (th->syn) {
222		active_lport_h = skops->local_port;
223		active_lport_n = th->source;
224
225		/* Search the win scale option written by kernel
226		 * in the SYN packet.
227		 */
228		ret = bpf_load_hdr_opt(skops, &win_scale_opt,
229				       sizeof(win_scale_opt), 0);
230		if (ret != 3 || win_scale_opt.len != 3 ||
231		    win_scale_opt.kind != TCPOPT_WINDOW)
232			RET_CG_ERR(ret);
233
234		/* Write the win scale option that kernel
235		 * has already written.
236		 */
237		err = bpf_store_hdr_opt(skops, &win_scale_opt,
238					sizeof(win_scale_opt), 0);
239		if (err != -EEXIST)
240			RET_CG_ERR(err);
241	}
242
243	return CG_OK;
244}
245
246static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
247{
248	__u8 tcp_flags = skops_tcp_flags(skops);
249
250	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
251		/* Check the SYN from bpf_sock_ops_kern->syn_skb */
252		return check_active_syn_in(skops);
253
254	/* Passive side should have cleared the write hdr cb by now */
255	if (skops->local_port == passive_lport_h)
256		RET_CG_ERR(0);
257
258	return active_opt_len(skops);
259}
260
261static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
262{
263	if (skops->local_port == passive_lport_h)
264		RET_CG_ERR(0);
265
266	return write_active_opt(skops);
267}
268
269static int handle_parse_hdr(struct bpf_sock_ops *skops)
270{
271	/* Passive side is not writing any non-standard/unknown
272	 * option, so the active side should never be called.
273	 */
274	if (skops->local_port == active_lport_h)
275		RET_CG_ERR(0);
276
277	return check_active_hdr_in(skops);
278}
279
280static int handle_passive_estab(struct bpf_sock_ops *skops)
281{
282	int err;
283
284	/* No more write hdr cb */
285	bpf_sock_ops_cb_flags_set(skops,
286				  skops->bpf_sock_ops_cb_flags &
287				  ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
288
289	/* Recheck the SYN but check the tp->saved_syn this time */
290	err = check_active_syn_in(skops);
291	if (err == CG_ERR)
292		return err;
293
294	nr_syn++;
295
296	/* The ack has header option written by the active side also */
297	return check_active_hdr_in(skops);
298}
299
300SEC("sockops")
301int misc_estab(struct bpf_sock_ops *skops)
302{
303	int true_val = 1;
304
305	switch (skops->op) {
306	case BPF_SOCK_OPS_TCP_LISTEN_CB:
307		passive_lport_h = skops->local_port;
308		passive_lport_n = __bpf_htons(passive_lport_h);
309		bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
310			       &true_val, sizeof(true_val));
311		set_hdr_cb_flags(skops, 0);
312		break;
313	case BPF_SOCK_OPS_TCP_CONNECT_CB:
314		set_hdr_cb_flags(skops, 0);
315		break;
316	case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
317		return handle_parse_hdr(skops);
318	case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
319		return handle_hdr_opt_len(skops);
320	case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
321		return handle_write_hdr_opt(skops);
322	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
323		return handle_passive_estab(skops);
324	}
325
326	return CG_OK;
327}
328
329char _license[] SEC("license") = "GPL";
330