1// SPDX-License-Identifier: GPL-2.0
2#include <limits.h>
3#include <stddef.h>
4#include <stdbool.h>
5#include <string.h>
6#include <linux/pkt_cls.h>
7#include <linux/bpf.h>
8#include <linux/in.h>
9#include <linux/if_ether.h>
10#include <linux/icmp.h>
11#include <linux/ip.h>
12#include <linux/ipv6.h>
13#include <linux/tcp.h>
14#include <linux/udp.h>
15#include <linux/if_packet.h>
16#include <sys/socket.h>
17#include <linux/if_tunnel.h>
18#include <linux/mpls.h>
19#include <bpf/bpf_helpers.h>
20#include <bpf/bpf_endian.h>
21
22#define PROG(F) PROG_(F, _##F)
23#define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM
24
25#define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */
26
27/* These are the identifiers of the BPF programs that will be used in tail
28 * calls. Name is limited to 16 characters, with the terminating character and
29 * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
30 */
31#define IP		0
32#define IPV6		1
33#define IPV6OP		2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */
34#define IPV6FR		3 /* Fragmentation IPv6 Extension Header */
35#define MPLS		4
36#define VLAN		5
37#define MAX_PROG	6
38
39#define IP_MF		0x2000
40#define IP_OFFSET	0x1FFF
41#define IP6_MF		0x0001
42#define IP6_OFFSET	0xFFF8
43
44struct vlan_hdr {
45	__be16 h_vlan_TCI;
46	__be16 h_vlan_encapsulated_proto;
47};
48
49struct gre_hdr {
50	__be16 flags;
51	__be16 proto;
52};
53
54struct frag_hdr {
55	__u8 nexthdr;
56	__u8 reserved;
57	__be16 frag_off;
58	__be32 identification;
59};
60
61struct {
62	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
63	__uint(max_entries, MAX_PROG);
64	__uint(key_size, sizeof(__u32));
65	__uint(value_size, sizeof(__u32));
66} jmp_table SEC(".maps");
67
68struct {
69	__uint(type, BPF_MAP_TYPE_HASH);
70	__uint(max_entries, 1024);
71	__type(key, __u32);
72	__type(value, struct bpf_flow_keys);
73} last_dissection SEC(".maps");
74
75static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
76					    int ret)
77{
78	__u32 key = (__u32)(keys->sport) << 16 | keys->dport;
79	struct bpf_flow_keys val;
80
81	memcpy(&val, keys, sizeof(val));
82	bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY);
83	return ret;
84}
85
86#define IPV6_FLOWLABEL_MASK		__bpf_constant_htonl(0x000FFFFF)
87static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
88{
89	return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
90}
91
92static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
93							 __u16 hdr_size,
94							 void *buffer)
95{
96	void *data_end = (void *)(long)skb->data_end;
97	void *data = (void *)(long)skb->data;
98	__u16 thoff = skb->flow_keys->thoff;
99	__u8 *hdr;
100
101	/* Verifies this variable offset does not overflow */
102	if (thoff > (USHRT_MAX - hdr_size))
103		return NULL;
104
105	hdr = data + thoff;
106	if (hdr + hdr_size <= data_end)
107		return hdr;
108
109	if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
110		return NULL;
111
112	return buffer;
113}
114
115/* Dispatches on ETHERTYPE */
116static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
117{
118	struct bpf_flow_keys *keys = skb->flow_keys;
119
120	switch (proto) {
121	case bpf_htons(ETH_P_IP):
122		bpf_tail_call_static(skb, &jmp_table, IP);
123		break;
124	case bpf_htons(ETH_P_IPV6):
125		bpf_tail_call_static(skb, &jmp_table, IPV6);
126		break;
127	case bpf_htons(ETH_P_MPLS_MC):
128	case bpf_htons(ETH_P_MPLS_UC):
129		bpf_tail_call_static(skb, &jmp_table, MPLS);
130		break;
131	case bpf_htons(ETH_P_8021Q):
132	case bpf_htons(ETH_P_8021AD):
133		bpf_tail_call_static(skb, &jmp_table, VLAN);
134		break;
135	default:
136		/* Protocol not supported */
137		return export_flow_keys(keys, BPF_DROP);
138	}
139
140	return export_flow_keys(keys, BPF_DROP);
141}
142
143SEC("flow_dissector")
144int _dissect(struct __sk_buff *skb)
145{
146	struct bpf_flow_keys *keys = skb->flow_keys;
147
148	if (keys->n_proto == bpf_htons(ETH_P_IP)) {
149		/* IP traffic from FLOW_CONTINUE_SADDR falls-back to
150		 * standard dissector
151		 */
152		struct iphdr *iph, _iph;
153
154		iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
155		if (iph && iph->ihl == 5 &&
156		    iph->saddr == bpf_htonl(FLOW_CONTINUE_SADDR)) {
157			return BPF_FLOW_DISSECTOR_CONTINUE;
158		}
159	}
160
161	return parse_eth_proto(skb, keys->n_proto);
162}
163
164/* Parses on IPPROTO_* */
165static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
166{
167	struct bpf_flow_keys *keys = skb->flow_keys;
168	void *data_end = (void *)(long)skb->data_end;
169	struct icmphdr *icmp, _icmp;
170	struct gre_hdr *gre, _gre;
171	struct ethhdr *eth, _eth;
172	struct tcphdr *tcp, _tcp;
173	struct udphdr *udp, _udp;
174
175	switch (proto) {
176	case IPPROTO_ICMP:
177		icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
178		if (!icmp)
179			return export_flow_keys(keys, BPF_DROP);
180		return export_flow_keys(keys, BPF_OK);
181	case IPPROTO_IPIP:
182		keys->is_encap = true;
183		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
184			return export_flow_keys(keys, BPF_OK);
185
186		return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
187	case IPPROTO_IPV6:
188		keys->is_encap = true;
189		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
190			return export_flow_keys(keys, BPF_OK);
191
192		return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
193	case IPPROTO_GRE:
194		gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
195		if (!gre)
196			return export_flow_keys(keys, BPF_DROP);
197
198		if (bpf_htons(gre->flags & GRE_VERSION))
199			/* Only inspect standard GRE packets with version 0 */
200			return export_flow_keys(keys, BPF_OK);
201
202		keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
203		if (GRE_IS_CSUM(gre->flags))
204			keys->thoff += 4; /* Step over chksum and Padding */
205		if (GRE_IS_KEY(gre->flags))
206			keys->thoff += 4; /* Step over key */
207		if (GRE_IS_SEQ(gre->flags))
208			keys->thoff += 4; /* Step over sequence number */
209
210		keys->is_encap = true;
211		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
212			return export_flow_keys(keys, BPF_OK);
213
214		if (gre->proto == bpf_htons(ETH_P_TEB)) {
215			eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
216							  &_eth);
217			if (!eth)
218				return export_flow_keys(keys, BPF_DROP);
219
220			keys->thoff += sizeof(*eth);
221
222			return parse_eth_proto(skb, eth->h_proto);
223		} else {
224			return parse_eth_proto(skb, gre->proto);
225		}
226	case IPPROTO_TCP:
227		tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
228		if (!tcp)
229			return export_flow_keys(keys, BPF_DROP);
230
231		if (tcp->doff < 5)
232			return export_flow_keys(keys, BPF_DROP);
233
234		if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
235			return export_flow_keys(keys, BPF_DROP);
236
237		keys->sport = tcp->source;
238		keys->dport = tcp->dest;
239		return export_flow_keys(keys, BPF_OK);
240	case IPPROTO_UDP:
241	case IPPROTO_UDPLITE:
242		udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
243		if (!udp)
244			return export_flow_keys(keys, BPF_DROP);
245
246		keys->sport = udp->source;
247		keys->dport = udp->dest;
248		return export_flow_keys(keys, BPF_OK);
249	default:
250		return export_flow_keys(keys, BPF_DROP);
251	}
252
253	return export_flow_keys(keys, BPF_DROP);
254}
255
256static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
257{
258	struct bpf_flow_keys *keys = skb->flow_keys;
259
260	switch (nexthdr) {
261	case IPPROTO_HOPOPTS:
262	case IPPROTO_DSTOPTS:
263		bpf_tail_call_static(skb, &jmp_table, IPV6OP);
264		break;
265	case IPPROTO_FRAGMENT:
266		bpf_tail_call_static(skb, &jmp_table, IPV6FR);
267		break;
268	default:
269		return parse_ip_proto(skb, nexthdr);
270	}
271
272	return export_flow_keys(keys, BPF_DROP);
273}
274
275PROG(IP)(struct __sk_buff *skb)
276{
277	void *data_end = (void *)(long)skb->data_end;
278	struct bpf_flow_keys *keys = skb->flow_keys;
279	void *data = (void *)(long)skb->data;
280	struct iphdr *iph, _iph;
281	bool done = false;
282
283	iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
284	if (!iph)
285		return export_flow_keys(keys, BPF_DROP);
286
287	/* IP header cannot be smaller than 20 bytes */
288	if (iph->ihl < 5)
289		return export_flow_keys(keys, BPF_DROP);
290
291	keys->addr_proto = ETH_P_IP;
292	keys->ipv4_src = iph->saddr;
293	keys->ipv4_dst = iph->daddr;
294	keys->ip_proto = iph->protocol;
295
296	keys->thoff += iph->ihl << 2;
297	if (data + keys->thoff > data_end)
298		return export_flow_keys(keys, BPF_DROP);
299
300	if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
301		keys->is_frag = true;
302		if (iph->frag_off & bpf_htons(IP_OFFSET)) {
303			/* From second fragment on, packets do not have headers
304			 * we can parse.
305			 */
306			done = true;
307		} else {
308			keys->is_first_frag = true;
309			/* No need to parse fragmented packet unless
310			 * explicitly asked for.
311			 */
312			if (!(keys->flags &
313			      BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
314				done = true;
315		}
316	}
317
318	if (done)
319		return export_flow_keys(keys, BPF_OK);
320
321	return parse_ip_proto(skb, iph->protocol);
322}
323
324PROG(IPV6)(struct __sk_buff *skb)
325{
326	struct bpf_flow_keys *keys = skb->flow_keys;
327	struct ipv6hdr *ip6h, _ip6h;
328
329	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
330	if (!ip6h)
331		return export_flow_keys(keys, BPF_DROP);
332
333	keys->addr_proto = ETH_P_IPV6;
334	memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
335
336	keys->thoff += sizeof(struct ipv6hdr);
337	keys->ip_proto = ip6h->nexthdr;
338	keys->flow_label = ip6_flowlabel(ip6h);
339
340	if (keys->flow_label && keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
341		return export_flow_keys(keys, BPF_OK);
342
343	return parse_ipv6_proto(skb, ip6h->nexthdr);
344}
345
346PROG(IPV6OP)(struct __sk_buff *skb)
347{
348	struct bpf_flow_keys *keys = skb->flow_keys;
349	struct ipv6_opt_hdr *ip6h, _ip6h;
350
351	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
352	if (!ip6h)
353		return export_flow_keys(keys, BPF_DROP);
354
355	/* hlen is in 8-octets and does not include the first 8 bytes
356	 * of the header
357	 */
358	keys->thoff += (1 + ip6h->hdrlen) << 3;
359	keys->ip_proto = ip6h->nexthdr;
360
361	return parse_ipv6_proto(skb, ip6h->nexthdr);
362}
363
364PROG(IPV6FR)(struct __sk_buff *skb)
365{
366	struct bpf_flow_keys *keys = skb->flow_keys;
367	struct frag_hdr *fragh, _fragh;
368
369	fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
370	if (!fragh)
371		return export_flow_keys(keys, BPF_DROP);
372
373	keys->thoff += sizeof(*fragh);
374	keys->is_frag = true;
375	keys->ip_proto = fragh->nexthdr;
376
377	if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) {
378		keys->is_first_frag = true;
379
380		/* No need to parse fragmented packet unless
381		 * explicitly asked for.
382		 */
383		if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
384			return export_flow_keys(keys, BPF_OK);
385	} else {
386		return export_flow_keys(keys, BPF_OK);
387	}
388
389	return parse_ipv6_proto(skb, fragh->nexthdr);
390}
391
392PROG(MPLS)(struct __sk_buff *skb)
393{
394	struct bpf_flow_keys *keys = skb->flow_keys;
395	struct mpls_label *mpls, _mpls;
396
397	mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
398	if (!mpls)
399		return export_flow_keys(keys, BPF_DROP);
400
401	return export_flow_keys(keys, BPF_OK);
402}
403
404PROG(VLAN)(struct __sk_buff *skb)
405{
406	struct bpf_flow_keys *keys = skb->flow_keys;
407	struct vlan_hdr *vlan, _vlan;
408
409	/* Account for double-tagging */
410	if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
411		vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
412		if (!vlan)
413			return export_flow_keys(keys, BPF_DROP);
414
415		if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
416			return export_flow_keys(keys, BPF_DROP);
417
418		keys->nhoff += sizeof(*vlan);
419		keys->thoff += sizeof(*vlan);
420	}
421
422	vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
423	if (!vlan)
424		return export_flow_keys(keys, BPF_DROP);
425
426	keys->nhoff += sizeof(*vlan);
427	keys->thoff += sizeof(*vlan);
428	/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
429	if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
430	    vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
431		return export_flow_keys(keys, BPF_DROP);
432
433	keys->n_proto = vlan->h_vlan_encapsulated_proto;
434	return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
435}
436
437char __license[] SEC("license") = "GPL";
438