1// SPDX-License-Identifier: GPL-2.0
2
3/* In-place tunneling */
4
5#include <stdbool.h>
6#include <string.h>
7
8#include <linux/stddef.h>
9#include <linux/bpf.h>
10#include <linux/if_ether.h>
11#include <linux/in.h>
12#include <linux/ip.h>
13#include <linux/ipv6.h>
14#include <linux/mpls.h>
15#include <linux/tcp.h>
16#include <linux/udp.h>
17#include <linux/pkt_cls.h>
18#include <linux/types.h>
19
20#include <bpf/bpf_endian.h>
21#include <bpf/bpf_helpers.h>
22#include "bpf_compiler.h"
23
24#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
25
26static const int cfg_port = 8000;
27
28static const int cfg_udp_src = 20000;
29
30#define	L2_PAD_SZ	(sizeof(struct vxlanhdr) + ETH_HLEN)
31
32#define	UDP_PORT		5555
33#define	MPLS_OVER_UDP_PORT	6635
34#define	ETH_OVER_UDP_PORT	7777
35#define	VXLAN_UDP_PORT		8472
36
37#define	EXTPROTO_VXLAN	0x1
38
39#define	VXLAN_N_VID     (1u << 24)
40#define	VXLAN_VNI_MASK	bpf_htonl((VXLAN_N_VID - 1) << 8)
41#define	VXLAN_FLAGS     0x8
42#define	VXLAN_VNI       1
43
44#ifndef NEXTHDR_DEST
45#define NEXTHDR_DEST	60
46#endif
47
48/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
49static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
50						     MPLS_LS_S_MASK | 0xff);
51
52struct vxlanhdr {
53	__be32 vx_flags;
54	__be32 vx_vni;
55} __attribute__((packed));
56
57struct gre_hdr {
58	__be16 flags;
59	__be16 protocol;
60} __attribute__((packed));
61
62union l4hdr {
63	struct udphdr udp;
64	struct gre_hdr gre;
65};
66
67struct v4hdr {
68	struct iphdr ip;
69	union l4hdr l4hdr;
70	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
71} __attribute__((packed));
72
73struct v6hdr {
74	struct ipv6hdr ip;
75	union l4hdr l4hdr;
76	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
77} __attribute__((packed));
78
79static __always_inline void set_ipv4_csum(struct iphdr *iph)
80{
81	__u16 *iph16 = (__u16 *)iph;
82	__u32 csum;
83	int i;
84
85	iph->check = 0;
86
87	__pragma_loop_unroll_full
88	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
89		csum += *iph16++;
90
91	iph->check = ~((csum & 0xffff) + (csum >> 16));
92}
93
94static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
95					__u16 l2_proto, __u16 ext_proto)
96{
97	__u16 udp_dst = UDP_PORT;
98	struct iphdr iph_inner;
99	struct v4hdr h_outer;
100	struct tcphdr tcph;
101	int olen, l2_len;
102	__u8 *l2_hdr = NULL;
103	int tcp_off;
104	__u64 flags;
105
106	/* Most tests encapsulate a packet into a tunnel with the same
107	 * network protocol, and derive the outer header fields from
108	 * the inner header.
109	 *
110	 * The 6in4 case tests different inner and outer protocols. As
111	 * the inner is ipv6, but the outer expects an ipv4 header as
112	 * input, manually build a struct iphdr based on the ipv6hdr.
113	 */
114	if (encap_proto == IPPROTO_IPV6) {
115		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
116		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
117		struct ipv6hdr iph6_inner;
118
119		/* Read the IPv6 header */
120		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
121				       sizeof(iph6_inner)) < 0)
122			return TC_ACT_OK;
123
124		/* Derive the IPv4 header fields from the IPv6 header */
125		memset(&iph_inner, 0, sizeof(iph_inner));
126		iph_inner.version = 4;
127		iph_inner.ihl = 5;
128		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
129				    bpf_ntohs(iph6_inner.payload_len));
130		iph_inner.ttl = iph6_inner.hop_limit - 1;
131		iph_inner.protocol = iph6_inner.nexthdr;
132		iph_inner.saddr = __bpf_constant_htonl(saddr);
133		iph_inner.daddr = __bpf_constant_htonl(daddr);
134
135		tcp_off = sizeof(iph6_inner);
136	} else {
137		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
138				       sizeof(iph_inner)) < 0)
139			return TC_ACT_OK;
140
141		tcp_off = sizeof(iph_inner);
142	}
143
144	/* filter only packets we want */
145	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
146		return TC_ACT_OK;
147
148	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
149			       &tcph, sizeof(tcph)) < 0)
150		return TC_ACT_OK;
151
152	if (tcph.dest != __bpf_constant_htons(cfg_port))
153		return TC_ACT_OK;
154
155	olen = sizeof(h_outer.ip);
156	l2_len = 0;
157
158	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
159
160	switch (l2_proto) {
161	case ETH_P_MPLS_UC:
162		l2_len = sizeof(mpls_label);
163		udp_dst = MPLS_OVER_UDP_PORT;
164		break;
165	case ETH_P_TEB:
166		l2_len = ETH_HLEN;
167		if (ext_proto & EXTPROTO_VXLAN) {
168			udp_dst = VXLAN_UDP_PORT;
169			l2_len += sizeof(struct vxlanhdr);
170		} else
171			udp_dst = ETH_OVER_UDP_PORT;
172		break;
173	}
174	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
175
176	switch (encap_proto) {
177	case IPPROTO_GRE:
178		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
179		olen += sizeof(h_outer.l4hdr.gre);
180		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
181		h_outer.l4hdr.gre.flags = 0;
182		break;
183	case IPPROTO_UDP:
184		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
185		olen += sizeof(h_outer.l4hdr.udp);
186		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
187		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
188		h_outer.l4hdr.udp.check = 0;
189		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
190						  sizeof(h_outer.l4hdr.udp) +
191						  l2_len);
192		break;
193	case IPPROTO_IPIP:
194	case IPPROTO_IPV6:
195		break;
196	default:
197		return TC_ACT_OK;
198	}
199
200	/* add L2 encap (if specified) */
201	l2_hdr = (__u8 *)&h_outer + olen;
202	switch (l2_proto) {
203	case ETH_P_MPLS_UC:
204		*(__u32 *)l2_hdr = mpls_label;
205		break;
206	case ETH_P_TEB:
207		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
208
209		if (ext_proto & EXTPROTO_VXLAN) {
210			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
211
212			vxlan_hdr->vx_flags = VXLAN_FLAGS;
213			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
214
215			l2_hdr += sizeof(struct vxlanhdr);
216		}
217
218		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
219			return TC_ACT_SHOT;
220
221		break;
222	}
223	olen += l2_len;
224
225	/* add room between mac and network header */
226	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
227		return TC_ACT_SHOT;
228
229	/* prepare new outer network header */
230	h_outer.ip = iph_inner;
231	h_outer.ip.tot_len = bpf_htons(olen +
232				       bpf_ntohs(h_outer.ip.tot_len));
233	h_outer.ip.protocol = encap_proto;
234
235	set_ipv4_csum((void *)&h_outer.ip);
236
237	/* store new outer network header */
238	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
239				BPF_F_INVALIDATE_HASH) < 0)
240		return TC_ACT_SHOT;
241
242	/* if changing outer proto type, update eth->h_proto */
243	if (encap_proto == IPPROTO_IPV6) {
244		struct ethhdr eth;
245
246		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
247			return TC_ACT_SHOT;
248		eth.h_proto = bpf_htons(ETH_P_IP);
249		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
250			return TC_ACT_SHOT;
251	}
252
253	return TC_ACT_OK;
254}
255
256static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
257				      __u16 l2_proto)
258{
259	return __encap_ipv4(skb, encap_proto, l2_proto, 0);
260}
261
262static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
263					__u16 l2_proto, __u16 ext_proto)
264{
265	__u16 udp_dst = UDP_PORT;
266	struct ipv6hdr iph_inner;
267	struct v6hdr h_outer;
268	struct tcphdr tcph;
269	int olen, l2_len;
270	__u8 *l2_hdr = NULL;
271	__u16 tot_len;
272	__u64 flags;
273
274	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
275			       sizeof(iph_inner)) < 0)
276		return TC_ACT_OK;
277
278	/* filter only packets we want */
279	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
280			       &tcph, sizeof(tcph)) < 0)
281		return TC_ACT_OK;
282
283	if (tcph.dest != __bpf_constant_htons(cfg_port))
284		return TC_ACT_OK;
285
286	olen = sizeof(h_outer.ip);
287	l2_len = 0;
288
289	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
290
291	switch (l2_proto) {
292	case ETH_P_MPLS_UC:
293		l2_len = sizeof(mpls_label);
294		udp_dst = MPLS_OVER_UDP_PORT;
295		break;
296	case ETH_P_TEB:
297		l2_len = ETH_HLEN;
298		if (ext_proto & EXTPROTO_VXLAN) {
299			udp_dst = VXLAN_UDP_PORT;
300			l2_len += sizeof(struct vxlanhdr);
301		} else
302			udp_dst = ETH_OVER_UDP_PORT;
303		break;
304	}
305	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
306
307	switch (encap_proto) {
308	case IPPROTO_GRE:
309		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
310		olen += sizeof(h_outer.l4hdr.gre);
311		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
312		h_outer.l4hdr.gre.flags = 0;
313		break;
314	case IPPROTO_UDP:
315		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
316		olen += sizeof(h_outer.l4hdr.udp);
317		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
318		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
319		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
320			  sizeof(h_outer.l4hdr.udp) + l2_len;
321		h_outer.l4hdr.udp.check = 0;
322		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
323		break;
324	case IPPROTO_IPV6:
325		break;
326	default:
327		return TC_ACT_OK;
328	}
329
330	/* add L2 encap (if specified) */
331	l2_hdr = (__u8 *)&h_outer + olen;
332	switch (l2_proto) {
333	case ETH_P_MPLS_UC:
334		*(__u32 *)l2_hdr = mpls_label;
335		break;
336	case ETH_P_TEB:
337		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
338
339		if (ext_proto & EXTPROTO_VXLAN) {
340			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
341
342			vxlan_hdr->vx_flags = VXLAN_FLAGS;
343			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
344
345			l2_hdr += sizeof(struct vxlanhdr);
346		}
347
348		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
349			return TC_ACT_SHOT;
350		break;
351	}
352	olen += l2_len;
353
354	/* add room between mac and network header */
355	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
356		return TC_ACT_SHOT;
357
358	/* prepare new outer network header */
359	h_outer.ip = iph_inner;
360	h_outer.ip.payload_len = bpf_htons(olen +
361					   bpf_ntohs(h_outer.ip.payload_len));
362
363	h_outer.ip.nexthdr = encap_proto;
364
365	/* store new outer network header */
366	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
367				BPF_F_INVALIDATE_HASH) < 0)
368		return TC_ACT_SHOT;
369
370	return TC_ACT_OK;
371}
372
373static int encap_ipv6_ipip6(struct __sk_buff *skb)
374{
375	struct iphdr iph_inner;
376	struct v6hdr h_outer;
377	struct tcphdr tcph;
378	struct ethhdr eth;
379	__u64 flags;
380	int olen;
381
382	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
383			       sizeof(iph_inner)) < 0)
384		return TC_ACT_OK;
385
386	/* filter only packets we want */
387	if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
388			       &tcph, sizeof(tcph)) < 0)
389		return TC_ACT_OK;
390
391	if (tcph.dest != __bpf_constant_htons(cfg_port))
392		return TC_ACT_OK;
393
394	olen = sizeof(h_outer.ip);
395
396	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
397
398	/* add room between mac and network header */
399	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
400		return TC_ACT_SHOT;
401
402	/* prepare new outer network header */
403	memset(&h_outer.ip, 0, sizeof(h_outer.ip));
404	h_outer.ip.version = 6;
405	h_outer.ip.hop_limit = iph_inner.ttl;
406	h_outer.ip.saddr.s6_addr[1] = 0xfd;
407	h_outer.ip.saddr.s6_addr[15] = 1;
408	h_outer.ip.daddr.s6_addr[1] = 0xfd;
409	h_outer.ip.daddr.s6_addr[15] = 2;
410	h_outer.ip.payload_len = iph_inner.tot_len;
411	h_outer.ip.nexthdr = IPPROTO_IPIP;
412
413	/* store new outer network header */
414	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
415				BPF_F_INVALIDATE_HASH) < 0)
416		return TC_ACT_SHOT;
417
418	/* update eth->h_proto */
419	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
420		return TC_ACT_SHOT;
421	eth.h_proto = bpf_htons(ETH_P_IPV6);
422	if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
423		return TC_ACT_SHOT;
424
425	return TC_ACT_OK;
426}
427
428static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
429				      __u16 l2_proto)
430{
431	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
432}
433
434SEC("encap_ipip_none")
435int __encap_ipip_none(struct __sk_buff *skb)
436{
437	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
438		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
439	else
440		return TC_ACT_OK;
441}
442
443SEC("encap_gre_none")
444int __encap_gre_none(struct __sk_buff *skb)
445{
446	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
447		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
448	else
449		return TC_ACT_OK;
450}
451
452SEC("encap_gre_mpls")
453int __encap_gre_mpls(struct __sk_buff *skb)
454{
455	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
456		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
457	else
458		return TC_ACT_OK;
459}
460
461SEC("encap_gre_eth")
462int __encap_gre_eth(struct __sk_buff *skb)
463{
464	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
465		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
466	else
467		return TC_ACT_OK;
468}
469
470SEC("encap_udp_none")
471int __encap_udp_none(struct __sk_buff *skb)
472{
473	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
474		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
475	else
476		return TC_ACT_OK;
477}
478
479SEC("encap_udp_mpls")
480int __encap_udp_mpls(struct __sk_buff *skb)
481{
482	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
483		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
484	else
485		return TC_ACT_OK;
486}
487
488SEC("encap_udp_eth")
489int __encap_udp_eth(struct __sk_buff *skb)
490{
491	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
492		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
493	else
494		return TC_ACT_OK;
495}
496
497SEC("encap_vxlan_eth")
498int __encap_vxlan_eth(struct __sk_buff *skb)
499{
500	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
501		return __encap_ipv4(skb, IPPROTO_UDP,
502				    ETH_P_TEB,
503				    EXTPROTO_VXLAN);
504	else
505		return TC_ACT_OK;
506}
507
508SEC("encap_sit_none")
509int __encap_sit_none(struct __sk_buff *skb)
510{
511	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
512		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
513	else
514		return TC_ACT_OK;
515}
516
517SEC("encap_ip6tnl_none")
518int __encap_ip6tnl_none(struct __sk_buff *skb)
519{
520	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
521		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
522	else
523		return TC_ACT_OK;
524}
525
526SEC("encap_ipip6_none")
527int __encap_ipip6_none(struct __sk_buff *skb)
528{
529	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
530		return encap_ipv6_ipip6(skb);
531	else
532		return TC_ACT_OK;
533}
534
535SEC("encap_ip6gre_none")
536int __encap_ip6gre_none(struct __sk_buff *skb)
537{
538	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
539		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
540	else
541		return TC_ACT_OK;
542}
543
544SEC("encap_ip6gre_mpls")
545int __encap_ip6gre_mpls(struct __sk_buff *skb)
546{
547	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
548		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
549	else
550		return TC_ACT_OK;
551}
552
553SEC("encap_ip6gre_eth")
554int __encap_ip6gre_eth(struct __sk_buff *skb)
555{
556	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
557		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
558	else
559		return TC_ACT_OK;
560}
561
562SEC("encap_ip6udp_none")
563int __encap_ip6udp_none(struct __sk_buff *skb)
564{
565	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
566		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
567	else
568		return TC_ACT_OK;
569}
570
571SEC("encap_ip6udp_mpls")
572int __encap_ip6udp_mpls(struct __sk_buff *skb)
573{
574	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
575		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
576	else
577		return TC_ACT_OK;
578}
579
580SEC("encap_ip6udp_eth")
581int __encap_ip6udp_eth(struct __sk_buff *skb)
582{
583	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
584		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
585	else
586		return TC_ACT_OK;
587}
588
589SEC("encap_ip6vxlan_eth")
590int __encap_ip6vxlan_eth(struct __sk_buff *skb)
591{
592	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
593		return __encap_ipv6(skb, IPPROTO_UDP,
594				    ETH_P_TEB,
595				    EXTPROTO_VXLAN);
596	else
597		return TC_ACT_OK;
598}
599
600static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
601{
602	__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
603	struct ipv6_opt_hdr ip6_opt_hdr;
604	struct gre_hdr greh;
605	struct udphdr udph;
606	int olen = len;
607
608	switch (proto) {
609	case IPPROTO_IPIP:
610		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
611		break;
612	case IPPROTO_IPV6:
613		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
614		break;
615	case NEXTHDR_DEST:
616		if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
617				       sizeof(ip6_opt_hdr)) < 0)
618			return TC_ACT_OK;
619		switch (ip6_opt_hdr.nexthdr) {
620		case IPPROTO_IPIP:
621			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
622			break;
623		case IPPROTO_IPV6:
624			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
625			break;
626		default:
627			return TC_ACT_OK;
628		}
629		break;
630	case IPPROTO_GRE:
631		olen += sizeof(struct gre_hdr);
632		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
633			return TC_ACT_OK;
634		switch (bpf_ntohs(greh.protocol)) {
635		case ETH_P_MPLS_UC:
636			olen += sizeof(mpls_label);
637			break;
638		case ETH_P_TEB:
639			olen += ETH_HLEN;
640			break;
641		}
642		break;
643	case IPPROTO_UDP:
644		olen += sizeof(struct udphdr);
645		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
646			return TC_ACT_OK;
647		switch (bpf_ntohs(udph.dest)) {
648		case MPLS_OVER_UDP_PORT:
649			olen += sizeof(mpls_label);
650			break;
651		case ETH_OVER_UDP_PORT:
652			olen += ETH_HLEN;
653			break;
654		case VXLAN_UDP_PORT:
655			olen += ETH_HLEN + sizeof(struct vxlanhdr);
656			break;
657		}
658		break;
659	default:
660		return TC_ACT_OK;
661	}
662
663	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
664		return TC_ACT_SHOT;
665
666	return TC_ACT_OK;
667}
668
669static int decap_ipv4(struct __sk_buff *skb)
670{
671	struct iphdr iph_outer;
672
673	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
674			       sizeof(iph_outer)) < 0)
675		return TC_ACT_OK;
676
677	if (iph_outer.ihl != 5)
678		return TC_ACT_OK;
679
680	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
681			      iph_outer.protocol);
682}
683
684static int decap_ipv6(struct __sk_buff *skb)
685{
686	struct ipv6hdr iph_outer;
687
688	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
689			       sizeof(iph_outer)) < 0)
690		return TC_ACT_OK;
691
692	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
693			      iph_outer.nexthdr);
694}
695
696SEC("decap")
697int decap_f(struct __sk_buff *skb)
698{
699	switch (skb->protocol) {
700	case __bpf_constant_htons(ETH_P_IP):
701		return decap_ipv4(skb);
702	case __bpf_constant_htons(ETH_P_IPV6):
703		return decap_ipv6(skb);
704	default:
705		/* does not match, ignore */
706		return TC_ACT_OK;
707	}
708}
709
710char __license[] SEC("license") = "GPL";
711