1#include <stddef.h>
2#include <inttypes.h>
3#include <errno.h>
4#include <linux/seg6_local.h>
5#include <linux/bpf.h>
6#include <bpf/bpf_helpers.h>
7#include <bpf/bpf_endian.h>
8
9#include "bpf_compiler.h"
10
11/* Packet parsing state machine helpers. */
12#define cursor_advance(_cursor, _len) \
13	({ void *_tmp = _cursor; _cursor += _len; _tmp; })
14
15#define SR6_FLAG_ALERT (1 << 4)
16
17#define BPF_PACKET_HEADER __attribute__((packed))
18
19struct ip6_t {
20	unsigned int ver:4;
21	unsigned int priority:8;
22	unsigned int flow_label:20;
23	unsigned short payload_len;
24	unsigned char next_header;
25	unsigned char hop_limit;
26	unsigned long long src_hi;
27	unsigned long long src_lo;
28	unsigned long long dst_hi;
29	unsigned long long dst_lo;
30} BPF_PACKET_HEADER;
31
32struct ip6_addr_t {
33	unsigned long long hi;
34	unsigned long long lo;
35} BPF_PACKET_HEADER;
36
37struct ip6_srh_t {
38	unsigned char nexthdr;
39	unsigned char hdrlen;
40	unsigned char type;
41	unsigned char segments_left;
42	unsigned char first_segment;
43	unsigned char flags;
44	unsigned short tag;
45
46	struct ip6_addr_t segments[0];
47} BPF_PACKET_HEADER;
48
49struct sr6_tlv_t {
50	unsigned char type;
51	unsigned char len;
52	unsigned char value[0];
53} BPF_PACKET_HEADER;
54
55static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb)
56{
57	void *cursor, *data_end;
58	struct ip6_srh_t *srh;
59	struct ip6_t *ip;
60	uint8_t *ipver;
61
62	data_end = (void *)(long)skb->data_end;
63	cursor = (void *)(long)skb->data;
64	ipver = (uint8_t *)cursor;
65
66	if ((void *)ipver + sizeof(*ipver) > data_end)
67		return NULL;
68
69	if ((*ipver >> 4) != 6)
70		return NULL;
71
72	ip = cursor_advance(cursor, sizeof(*ip));
73	if ((void *)ip + sizeof(*ip) > data_end)
74		return NULL;
75
76	if (ip->next_header != 43)
77		return NULL;
78
79	srh = cursor_advance(cursor, sizeof(*srh));
80	if ((void *)srh + sizeof(*srh) > data_end)
81		return NULL;
82
83	if (srh->type != 4)
84		return NULL;
85
86	return srh;
87}
88
89static __always_inline
90int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
91		   uint32_t old_pad, uint32_t pad_off)
92{
93	int err;
94
95	if (new_pad != old_pad) {
96		err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
97					  (int) new_pad - (int) old_pad);
98		if (err)
99			return err;
100	}
101
102	if (new_pad > 0) {
103		char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104					0, 0, 0};
105		struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
106
107		pad_tlv->type = SR6_TLV_PADDING;
108		pad_tlv->len = new_pad - 2;
109
110		err = bpf_lwt_seg6_store_bytes(skb, pad_off,
111					       (void *)pad_tlv_buf, new_pad);
112		if (err)
113			return err;
114	}
115
116	return 0;
117}
118
119static __always_inline
120int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
121			  uint32_t *tlv_off, uint32_t *pad_size,
122			  uint32_t *pad_off)
123{
124	uint32_t srh_off, cur_off;
125	int offset_valid = 0;
126	int err;
127
128	srh_off = (char *)srh - (char *)(long)skb->data;
129	// cur_off = end of segments, start of possible TLVs
130	cur_off = srh_off + sizeof(*srh) +
131		sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
132
133	*pad_off = 0;
134
135	// we can only go as far as ~10 TLVs due to the BPF max stack size
136	__pragma_loop_unroll_full
137	for (int i = 0; i < 10; i++) {
138		struct sr6_tlv_t tlv;
139
140		if (cur_off == *tlv_off)
141			offset_valid = 1;
142
143		if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
144			break;
145
146		err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
147		if (err)
148			return err;
149
150		if (tlv.type == SR6_TLV_PADDING) {
151			*pad_size = tlv.len + sizeof(tlv);
152			*pad_off = cur_off;
153
154			if (*tlv_off == srh_off) {
155				*tlv_off = cur_off;
156				offset_valid = 1;
157			}
158			break;
159
160		} else if (tlv.type == SR6_TLV_HMAC) {
161			break;
162		}
163
164		cur_off += sizeof(tlv) + tlv.len;
165	} // we reached the padding or HMAC TLVs, or the end of the SRH
166
167	if (*pad_off == 0)
168		*pad_off = cur_off;
169
170	if (*tlv_off == -1)
171		*tlv_off = cur_off;
172	else if (!offset_valid)
173		return -EINVAL;
174
175	return 0;
176}
177
178static __always_inline
179int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
180	    struct sr6_tlv_t *itlv, uint8_t tlv_size)
181{
182	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
183	uint8_t len_remaining, new_pad;
184	uint32_t pad_off = 0;
185	uint32_t pad_size = 0;
186	uint32_t partial_srh_len;
187	int err;
188
189	if (tlv_off != -1)
190		tlv_off += srh_off;
191
192	if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
193		return -EINVAL;
194
195	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
196	if (err)
197		return err;
198
199	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
200	if (err)
201		return err;
202
203	err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
204	if (err)
205		return err;
206
207	// the following can't be moved inside update_tlv_pad because the
208	// bpf verifier has some issues with it
209	pad_off += sizeof(*itlv) + itlv->len;
210	partial_srh_len = pad_off - srh_off;
211	len_remaining = partial_srh_len % 8;
212	new_pad = 8 - len_remaining;
213
214	if (new_pad == 1) // cannot pad for 1 byte only
215		new_pad = 9;
216	else if (new_pad == 8)
217		new_pad = 0;
218
219	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
220}
221
222static __always_inline
223int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
224	       uint32_t tlv_off)
225{
226	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
227	uint8_t len_remaining, new_pad;
228	uint32_t partial_srh_len;
229	uint32_t pad_off = 0;
230	uint32_t pad_size = 0;
231	struct sr6_tlv_t tlv;
232	int err;
233
234	tlv_off += srh_off;
235
236	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
237	if (err)
238		return err;
239
240	err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
241	if (err)
242		return err;
243
244	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
245	if (err)
246		return err;
247
248	pad_off -= sizeof(tlv) + tlv.len;
249	partial_srh_len = pad_off - srh_off;
250	len_remaining = partial_srh_len % 8;
251	new_pad = 8 - len_remaining;
252	if (new_pad == 1) // cannot pad for 1 byte only
253		new_pad = 9;
254	else if (new_pad == 8)
255		new_pad = 0;
256
257	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
258}
259
260static __always_inline
261int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
262{
263	int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
264		((srh->first_segment + 1) << 4);
265	struct sr6_tlv_t tlv;
266
267	if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
268		return 0;
269
270	if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
271		struct ip6_addr_t egr_addr;
272
273		if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
274			return 0;
275
276		// check if egress TLV value is correct
277		if (bpf_be64_to_cpu(egr_addr.hi) == 0xfd00000000000000 &&
278		    bpf_be64_to_cpu(egr_addr.lo) == 0x4)
279			return 1;
280	}
281
282	return 0;
283}
284
285// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
286// fd00::4
287SEC("encap_srh")
288int __encap_srh(struct __sk_buff *skb)
289{
290	unsigned long long hi = 0xfd00000000000000;
291	struct ip6_addr_t *seg;
292	struct ip6_srh_t *srh;
293	char srh_buf[72]; // room for 4 segments
294	int err;
295
296	srh = (struct ip6_srh_t *)srh_buf;
297	srh->nexthdr = 0;
298	srh->hdrlen = 8;
299	srh->type = 4;
300	srh->segments_left = 3;
301	srh->first_segment = 3;
302	srh->flags = 0;
303	srh->tag = 0;
304
305	seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
306
307	__pragma_loop_unroll_full
308	for (unsigned long long lo = 0; lo < 4; lo++) {
309		seg->lo = bpf_cpu_to_be64(4 - lo);
310		seg->hi = bpf_cpu_to_be64(hi);
311		seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
312	}
313
314	err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
315	if (err)
316		return BPF_DROP;
317
318	return BPF_REDIRECT;
319}
320
321// Add an Egress TLV fc00::4, add the flag A,
322// and apply End.X action to fc42::1
323SEC("add_egr_x")
324int __add_egr_x(struct __sk_buff *skb)
325{
326	unsigned long long hi = 0xfc42000000000000;
327	unsigned long long lo = 0x1;
328	struct ip6_srh_t *srh = get_srh(skb);
329	uint8_t new_flags = SR6_FLAG_ALERT;
330	struct ip6_addr_t addr;
331	int err, offset;
332
333	if (srh == NULL)
334		return BPF_DROP;
335
336	uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
337			   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
338
339	err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
340		      (struct sr6_tlv_t *)&tlv, 20);
341	if (err)
342		return BPF_DROP;
343
344	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
345	err = bpf_lwt_seg6_store_bytes(skb, offset,
346				       (void *)&new_flags, sizeof(new_flags));
347	if (err)
348		return BPF_DROP;
349
350	addr.lo = bpf_cpu_to_be64(lo);
351	addr.hi = bpf_cpu_to_be64(hi);
352	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
353				  (void *)&addr, sizeof(addr));
354	if (err)
355		return BPF_DROP;
356	return BPF_REDIRECT;
357}
358
359// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
360// simple End action
361SEC("pop_egr")
362int __pop_egr(struct __sk_buff *skb)
363{
364	struct ip6_srh_t *srh = get_srh(skb);
365	uint16_t new_tag = bpf_htons(2442);
366	uint8_t new_flags = 0;
367	int err, offset;
368
369	if (srh == NULL)
370		return BPF_DROP;
371
372	if (srh->flags != SR6_FLAG_ALERT)
373		return BPF_DROP;
374
375	if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
376		return BPF_DROP;
377
378	if (!has_egr_tlv(skb, srh))
379		return BPF_DROP;
380
381	err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
382	if (err)
383		return BPF_DROP;
384
385	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
386	if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
387				     sizeof(new_flags)))
388		return BPF_DROP;
389
390	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
391	if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
392				     sizeof(new_tag)))
393		return BPF_DROP;
394
395	return BPF_OK;
396}
397
398// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
399// then apply a End.T action to reach the last segment
400SEC("inspect_t")
401int __inspect_t(struct __sk_buff *skb)
402{
403	struct ip6_srh_t *srh = get_srh(skb);
404	int table = 117;
405	int err;
406
407	if (srh == NULL)
408		return BPF_DROP;
409
410	if (srh->flags != 0)
411		return BPF_DROP;
412
413	if (srh->tag != bpf_htons(2442))
414		return BPF_DROP;
415
416	if (srh->hdrlen != 8) // 4 segments
417		return BPF_DROP;
418
419	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
420				  (void *)&table, sizeof(table));
421
422	if (err)
423		return BPF_DROP;
424
425	return BPF_REDIRECT;
426}
427
428char __license[] SEC("license") = "GPL";
429