1/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <uapi/linux/bpf.h>
8#include <uapi/linux/in.h>
9#include <uapi/linux/if.h>
10#include <uapi/linux/if_ether.h>
11#include <uapi/linux/ip.h>
12#include <uapi/linux/ipv6.h>
13#include <uapi/linux/if_tunnel.h>
14#include <uapi/linux/mpls.h>
15#include <bpf/bpf_helpers.h>
16#include "bpf_legacy.h"
17#define IP_MF		0x2000
18#define IP_OFFSET	0x1FFF
19
20#define PARSE_VLAN 1
21#define PARSE_MPLS 2
22#define PARSE_IP 3
23#define PARSE_IPV6 4
24
25struct vlan_hdr {
26	__be16 h_vlan_TCI;
27	__be16 h_vlan_encapsulated_proto;
28};
29
30struct flow_key_record {
31	__be32 src;
32	__be32 dst;
33	union {
34		__be32 ports;
35		__be16 port16[2];
36	};
37	__u32 ip_proto;
38};
39
40static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto);
41
42static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
43{
44	return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
45		& (IP_MF | IP_OFFSET);
46}
47
48static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
49{
50	__u64 w0 = load_word(ctx, off);
51	__u64 w1 = load_word(ctx, off + 4);
52	__u64 w2 = load_word(ctx, off + 8);
53	__u64 w3 = load_word(ctx, off + 12);
54
55	return (__u32)(w0 ^ w1 ^ w2 ^ w3);
56}
57
58struct globals {
59	struct flow_key_record flow;
60};
61
62struct {
63	__uint(type, BPF_MAP_TYPE_ARRAY);
64	__type(key, __u32);
65	__type(value, struct globals);
66	__uint(max_entries, 32);
67} percpu_map SEC(".maps");
68
69/* user poor man's per_cpu until native support is ready */
70static struct globals *this_cpu_globals(void)
71{
72	u32 key = bpf_get_smp_processor_id();
73
74	return bpf_map_lookup_elem(&percpu_map, &key);
75}
76
77/* some simple stats for user space consumption */
78struct pair {
79	__u64 packets;
80	__u64 bytes;
81};
82
83struct {
84	__uint(type, BPF_MAP_TYPE_HASH);
85	__type(key, struct flow_key_record);
86	__type(value, struct pair);
87	__uint(max_entries, 1024);
88} hash_map SEC(".maps");
89
90static void update_stats(struct __sk_buff *skb, struct globals *g)
91{
92	struct flow_key_record key = g->flow;
93	struct pair *value;
94
95	value = bpf_map_lookup_elem(&hash_map, &key);
96	if (value) {
97		__sync_fetch_and_add(&value->packets, 1);
98		__sync_fetch_and_add(&value->bytes, skb->len);
99	} else {
100		struct pair val = {1, skb->len};
101
102		bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
103	}
104}
105
106static __always_inline void parse_ip_proto(struct __sk_buff *skb,
107					   struct globals *g, __u32 ip_proto)
108{
109	__u32 nhoff = skb->cb[0];
110	int poff;
111
112	switch (ip_proto) {
113	case IPPROTO_GRE: {
114		struct gre_hdr {
115			__be16 flags;
116			__be16 proto;
117		};
118
119		__u32 gre_flags = load_half(skb,
120					    nhoff + offsetof(struct gre_hdr, flags));
121		__u32 gre_proto = load_half(skb,
122					    nhoff + offsetof(struct gre_hdr, proto));
123
124		if (gre_flags & (GRE_VERSION|GRE_ROUTING))
125			break;
126
127		nhoff += 4;
128		if (gre_flags & GRE_CSUM)
129			nhoff += 4;
130		if (gre_flags & GRE_KEY)
131			nhoff += 4;
132		if (gre_flags & GRE_SEQ)
133			nhoff += 4;
134
135		skb->cb[0] = nhoff;
136		parse_eth_proto(skb, gre_proto);
137		break;
138	}
139	case IPPROTO_IPIP:
140		parse_eth_proto(skb, ETH_P_IP);
141		break;
142	case IPPROTO_IPV6:
143		parse_eth_proto(skb, ETH_P_IPV6);
144		break;
145	case IPPROTO_TCP:
146	case IPPROTO_UDP:
147		g->flow.ports = load_word(skb, nhoff);
148	case IPPROTO_ICMP:
149		g->flow.ip_proto = ip_proto;
150		update_stats(skb, g);
151		break;
152	default:
153		break;
154	}
155}
156
157SEC("socket")
158int bpf_func_ip(struct __sk_buff *skb)
159{
160	struct globals *g = this_cpu_globals();
161	__u32 nhoff, verlen, ip_proto;
162
163	if (!g)
164		return 0;
165
166	nhoff = skb->cb[0];
167
168	if (unlikely(ip_is_fragment(skb, nhoff)))
169		return 0;
170
171	ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
172
173	if (ip_proto != IPPROTO_GRE) {
174		g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
175		g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
176	}
177
178	verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
179	nhoff += (verlen & 0xF) << 2;
180
181	skb->cb[0] = nhoff;
182	parse_ip_proto(skb, g, ip_proto);
183	return 0;
184}
185
186SEC("socket")
187int bpf_func_ipv6(struct __sk_buff *skb)
188{
189	struct globals *g = this_cpu_globals();
190	__u32 nhoff, ip_proto;
191
192	if (!g)
193		return 0;
194
195	nhoff = skb->cb[0];
196
197	ip_proto = load_byte(skb,
198			     nhoff + offsetof(struct ipv6hdr, nexthdr));
199	g->flow.src = ipv6_addr_hash(skb,
200				     nhoff + offsetof(struct ipv6hdr, saddr));
201	g->flow.dst = ipv6_addr_hash(skb,
202				     nhoff + offsetof(struct ipv6hdr, daddr));
203	nhoff += sizeof(struct ipv6hdr);
204
205	skb->cb[0] = nhoff;
206	parse_ip_proto(skb, g, ip_proto);
207	return 0;
208}
209
210SEC("socket")
211int bpf_func_vlan(struct __sk_buff *skb)
212{
213	__u32 nhoff, proto;
214
215	nhoff = skb->cb[0];
216
217	proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
218						h_vlan_encapsulated_proto));
219	nhoff += sizeof(struct vlan_hdr);
220	skb->cb[0] = nhoff;
221
222	parse_eth_proto(skb, proto);
223
224	return 0;
225}
226
227SEC("socket")
228int bpf_func_mpls(struct __sk_buff *skb)
229{
230	__u32 nhoff, label;
231
232	nhoff = skb->cb[0];
233
234	label = load_word(skb, nhoff);
235	nhoff += sizeof(struct mpls_label);
236	skb->cb[0] = nhoff;
237
238	if (label & MPLS_LS_S_MASK) {
239		__u8 verlen = load_byte(skb, nhoff);
240		if ((verlen & 0xF0) == 4)
241			parse_eth_proto(skb, ETH_P_IP);
242		else
243			parse_eth_proto(skb, ETH_P_IPV6);
244	} else {
245		parse_eth_proto(skb, ETH_P_MPLS_UC);
246	}
247
248	return 0;
249}
250
251struct {
252	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
253	__uint(key_size, sizeof(u32));
254	__uint(max_entries, 8);
255	__array(values, u32 (void *));
256} prog_array_init SEC(".maps") = {
257	.values = {
258		[PARSE_VLAN] = (void *)&bpf_func_vlan,
259		[PARSE_IP]   = (void *)&bpf_func_ip,
260		[PARSE_IPV6] = (void *)&bpf_func_ipv6,
261		[PARSE_MPLS] = (void *)&bpf_func_mpls,
262	},
263};
264
265/* Protocol dispatch routine. It tail-calls next BPF program depending
266 * on eth proto. Note, we could have used ...
267 *
268 *   bpf_tail_call(skb, &prog_array_init, proto);
269 *
270 * ... but it would need large prog_array and cannot be optimised given
271 * the map key is not static.
272 */
273static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
274{
275	switch (proto) {
276	case ETH_P_8021Q:
277	case ETH_P_8021AD:
278		bpf_tail_call(skb, &prog_array_init, PARSE_VLAN);
279		break;
280	case ETH_P_MPLS_UC:
281	case ETH_P_MPLS_MC:
282		bpf_tail_call(skb, &prog_array_init, PARSE_MPLS);
283		break;
284	case ETH_P_IP:
285		bpf_tail_call(skb, &prog_array_init, PARSE_IP);
286		break;
287	case ETH_P_IPV6:
288		bpf_tail_call(skb, &prog_array_init, PARSE_IPV6);
289		break;
290	}
291}
292
293SEC("socket")
294int main_prog(struct __sk_buff *skb)
295{
296	__u32 nhoff = ETH_HLEN;
297	__u32 proto = load_half(skb, 12);
298
299	skb->cb[0] = nhoff;
300	parse_eth_proto(skb, proto);
301	return 0;
302}
303
304char _license[] SEC("license") = "GPL";
305