162216Sdes/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com 2261233Sdes * 3253680Sdes * This program is free software; you can redistribute it and/or 462216Sdes * modify it under the terms of version 2 of the GNU General Public 562216Sdes * License as published by the Free Software Foundation. 662216Sdes */ 762216Sdes#include <uapi/linux/bpf.h> 862216Sdes#include <uapi/linux/in.h> 962216Sdes#include <uapi/linux/if.h> 1062216Sdes#include <uapi/linux/if_ether.h> 1162216Sdes#include <uapi/linux/ip.h> 1262216Sdes#include <uapi/linux/ipv6.h> 1362216Sdes#include <uapi/linux/if_tunnel.h> 1462216Sdes#include <uapi/linux/mpls.h> 1562216Sdes#include <bpf/bpf_helpers.h> 1662216Sdes#include "bpf_legacy.h" 1762216Sdes#define IP_MF 0x2000 1862216Sdes#define IP_OFFSET 0x1FFF 1962216Sdes 2062216Sdes#define PARSE_VLAN 1 2162216Sdes#define PARSE_MPLS 2 2262216Sdes#define PARSE_IP 3 2362216Sdes#define PARSE_IPV6 4 2462216Sdes 2562216Sdesstruct vlan_hdr { 2662216Sdes __be16 h_vlan_TCI; 2762216Sdes __be16 h_vlan_encapsulated_proto; 2862216Sdes}; 2962216Sdes 3093213Scharnierstruct flow_key_record { 3193213Scharnier __be32 src; 3293213Scharnier __be32 dst; 3362216Sdes union { 3491225Sbde __be32 ports; 3562216Sdes __be16 port16[2]; 3693257Sbde }; 3762216Sdes __u32 ip_proto; 38200462Sdelphij}; 3962216Sdes 4062216Sdesstatic inline void parse_eth_proto(struct __sk_buff *skb, u32 proto); 41253680Sdes 4263235Sdesstatic inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff) 43125976Sdes{ 4462216Sdes return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) 4562216Sdes & (IP_MF | IP_OFFSET); 4662216Sdes} 4777241Sdes 4862216Sdesstatic inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) 4962216Sdes{ 5062216Sdes __u64 w0 = load_word(ctx, off); 5162216Sdes __u64 w1 = load_word(ctx, off + 4); 52261234Sdes __u64 w2 = load_word(ctx, off + 8); 53187361Sdes __u64 w3 = load_word(ctx, off + 12); 5462216Sdes 5562216Sdes return (__u32)(w0 ^ w1 ^ w2 ^ w3); 56241737Sed} 57241737Sed 58241737Sedstruct globals { 59241737Sed struct flow_key_record flow; 60241737Sed}; 61241737Sed 62241737Sedstruct { 63241737Sed __uint(type, BPF_MAP_TYPE_ARRAY); 64241737Sed __type(key, __u32); 65241737Sed __type(value, struct globals); 66241737Sed __uint(max_entries, 32); 67241737Sed} percpu_map SEC(".maps"); 68241737Sed 69241737Sed/* user poor man's per_cpu until native support is ready */ 70241737Sedstatic struct globals *this_cpu_globals(void) 71241737Sed{ 72241737Sed u32 key = bpf_get_smp_processor_id(); 73241737Sed 74241737Sed return bpf_map_lookup_elem(&percpu_map, &key); 75241737Sed} 76241737Sed 77241737Sed/* some simple stats for user space consumption */ 78241737Sedstruct pair { 79241737Sed __u64 packets; 80241737Sed __u64 bytes; 81241737Sed}; 82241737Sed 83241737Sedstruct { 84241737Sed __uint(type, BPF_MAP_TYPE_HASH); 85241737Sed __type(key, struct flow_key_record); 86241737Sed __type(value, struct pair); 87241737Sed __uint(max_entries, 1024); 88241737Sed} hash_map SEC(".maps"); 8962216Sdes 90241737Sedstatic void update_stats(struct __sk_buff *skb, struct globals *g) 91241737Sed{ 92241737Sed struct flow_key_record key = g->flow; 9362216Sdes struct pair *value; 94241737Sed 95241737Sed value = bpf_map_lookup_elem(&hash_map, &key); 96241737Sed if (value) { 9762216Sdes __sync_fetch_and_add(&value->packets, 1); 98253680Sdes __sync_fetch_and_add(&value->bytes, skb->len); 99253680Sdes } else { 100253680Sdes struct pair val = {1, skb->len}; 101253680Sdes 102253680Sdes bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); 103253680Sdes } 104253680Sdes} 105253680Sdes 106253680Sdesstatic __always_inline void parse_ip_proto(struct __sk_buff *skb, 107253680Sdes struct globals *g, __u32 ip_proto) 108253680Sdes{ 109253680Sdes __u32 nhoff = skb->cb[0]; 110253680Sdes int poff; 111261233Sdes 112253680Sdes switch (ip_proto) { 113253680Sdes case IPPROTO_GRE: { 114253680Sdes struct gre_hdr { 11562216Sdes __be16 flags; 116253680Sdes __be16 proto; 117253680Sdes }; 118253680Sdes 119253680Sdes __u32 gre_flags = load_half(skb, 120253680Sdes nhoff + offsetof(struct gre_hdr, flags)); 121253680Sdes __u32 gre_proto = load_half(skb, 122253680Sdes nhoff + offsetof(struct gre_hdr, proto)); 123253680Sdes 124253680Sdes if (gre_flags & (GRE_VERSION|GRE_ROUTING)) 125253680Sdes break; 126253680Sdes 127253680Sdes nhoff += 4; 128253680Sdes if (gre_flags & GRE_CSUM) 129253680Sdes nhoff += 4; 130253680Sdes if (gre_flags & GRE_KEY) 131253680Sdes nhoff += 4; 132253680Sdes if (gre_flags & GRE_SEQ) 133253680Sdes nhoff += 4; 134253680Sdes 135253680Sdes skb->cb[0] = nhoff; 136253680Sdes parse_eth_proto(skb, gre_proto); 137253680Sdes break; 138253680Sdes } 139253680Sdes case IPPROTO_IPIP: 140253680Sdes parse_eth_proto(skb, ETH_P_IP); 141253680Sdes break; 142253680Sdes case IPPROTO_IPV6: 143253680Sdes parse_eth_proto(skb, ETH_P_IPV6); 144253680Sdes break; 145253680Sdes case IPPROTO_TCP: 146253680Sdes case IPPROTO_UDP: 147253680Sdes g->flow.ports = load_word(skb, nhoff); 148253680Sdes case IPPROTO_ICMP: 149261233Sdes g->flow.ip_proto = ip_proto; 150253680Sdes update_stats(skb, g); 151253680Sdes break; 152253680Sdes default: 153253680Sdes break; 154253680Sdes } 155253680Sdes} 156253680Sdes 157253680SdesSEC("socket") 158253680Sdesint bpf_func_ip(struct __sk_buff *skb) 159253680Sdes{ 160253680Sdes struct globals *g = this_cpu_globals(); 161253680Sdes __u32 nhoff, verlen, ip_proto; 162253680Sdes 163253680Sdes if (!g) 164253680Sdes return 0; 165253680Sdes 166253680Sdes nhoff = skb->cb[0]; 167253680Sdes 168253680Sdes if (unlikely(ip_is_fragment(skb, nhoff))) 16981863Sdes return 0; 17081863Sdes 17181863Sdes ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); 17279837Sdes 17362216Sdes if (ip_proto != IPPROTO_GRE) { 17462216Sdes g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); 17579837Sdes g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); 17679837Sdes } 17779837Sdes 17879837Sdes verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); 17979837Sdes nhoff += (verlen & 0xF) << 2; 18079837Sdes 18179837Sdes skb->cb[0] = nhoff; 18279837Sdes parse_ip_proto(skb, g, ip_proto); 18379837Sdes return 0; 18479837Sdes} 18579837Sdes 18662216SdesSEC("socket") 18762216Sdesint bpf_func_ipv6(struct __sk_buff *skb) 18862216Sdes{ 189125965Sdes struct globals *g = this_cpu_globals(); 190243147Sandre __u32 nhoff, ip_proto; 191243147Sandre 192243147Sandre if (!g) 193243147Sandre return 0; 194243147Sandre 195243147Sandre nhoff = skb->cb[0]; 196243147Sandre 19762216Sdes ip_proto = load_byte(skb, 19862216Sdes nhoff + offsetof(struct ipv6hdr, nexthdr)); 19981863Sdes g->flow.src = ipv6_addr_hash(skb, 200109702Sdes nhoff + offsetof(struct ipv6hdr, saddr)); 201109702Sdes g->flow.dst = ipv6_addr_hash(skb, 202125965Sdes nhoff + offsetof(struct ipv6hdr, daddr)); 203109702Sdes nhoff += sizeof(struct ipv6hdr); 204109702Sdes 205125965Sdes skb->cb[0] = nhoff; 206125976Sdes parse_ip_proto(skb, g, ip_proto); 207125976Sdes return 0; 208109702Sdes} 209109702Sdes 210112083SdesSEC("socket") 211112083Sdesint bpf_func_vlan(struct __sk_buff *skb) 212112114Sdes{ 213125965Sdes __u32 nhoff, proto; 214125965Sdes 215125965Sdes nhoff = skb->cb[0]; 216243147Sandre 217243147Sandre proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, 218243147Sandre h_vlan_encapsulated_proto)); 219125965Sdes nhoff += sizeof(struct vlan_hdr); 220125965Sdes skb->cb[0] = nhoff; 221243147Sandre 222125965Sdes parse_eth_proto(skb, proto); 223125965Sdes 224125965Sdes return 0; 225125965Sdes} 226125965Sdes 227125965SdesSEC("socket") 228125965Sdesint bpf_func_mpls(struct __sk_buff *skb) 229125965Sdes{ 230129440Sle __u32 nhoff, label; 231125965Sdes 232125965Sdes nhoff = skb->cb[0]; 233125965Sdes 234125965Sdes label = load_word(skb, nhoff); 235125965Sdes nhoff += sizeof(struct mpls_label); 236125965Sdes skb->cb[0] = nhoff; 237125965Sdes 238109702Sdes if (label & MPLS_LS_S_MASK) { 239129440Sle __u8 verlen = load_byte(skb, nhoff); 240125965Sdes if ((verlen & 0xF0) == 4) 241109702Sdes parse_eth_proto(skb, ETH_P_IP); 242109702Sdes else 243109702Sdes parse_eth_proto(skb, ETH_P_IPV6); 244109702Sdes } else { 245109702Sdes parse_eth_proto(skb, ETH_P_MPLS_UC); 246125965Sdes } 247109702Sdes 248109702Sdes return 0; 249125965Sdes} 250109735Sdes 251109702Sdesstruct { 252109735Sdes __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 253243147Sandre __uint(key_size, sizeof(u32)); 254243147Sandre __uint(max_entries, 8); 255109735Sdes __array(values, u32 (void *)); 256125965Sdes} prog_array_init SEC(".maps") = { 257125965Sdes .values = { 258244058Sandre [PARSE_VLAN] = (void *)&bpf_func_vlan, 259129440Sle [PARSE_IP] = (void *)&bpf_func_ip, 260109735Sdes [PARSE_IPV6] = (void *)&bpf_func_ipv6, 261125965Sdes [PARSE_MPLS] = (void *)&bpf_func_mpls, 262109702Sdes }, 263109702Sdes}; 264109702Sdes 26581863Sdes/* Protocol dispatch routine. It tail-calls next BPF program depending 26681863Sdes * on eth proto. Note, we could have used ... 26779837Sdes * 26863046Sdes * bpf_tail_call(skb, &prog_array_init, proto); 26962216Sdes * 27079837Sdes * ... but it would need large prog_array and cannot be optimised given 27183863Sdes * the map key is not static. 27279837Sdes */ 27383863Sdesstatic inline void parse_eth_proto(struct __sk_buff *skb, u32 proto) 27483863Sdes{ 27583863Sdes switch (proto) { 27683863Sdes case ETH_P_8021Q: 277106043Sdes case ETH_P_8021AD: 27879837Sdes bpf_tail_call(skb, &prog_array_init, PARSE_VLAN); 27979837Sdes break; 28079837Sdes case ETH_P_MPLS_UC: 281243147Sandre case ETH_P_MPLS_MC: 28279837Sdes bpf_tail_call(skb, &prog_array_init, PARSE_MPLS); 28379837Sdes break; 284131615Sdes case ETH_P_IP: 285106041Sdes bpf_tail_call(skb, &prog_array_init, PARSE_IP); 286153894Sdes break; 287125965Sdes case ETH_P_IPV6: 288106041Sdes bpf_tail_call(skb, &prog_array_init, PARSE_IPV6); 289153894Sdes break; 290153894Sdes } 291153894Sdes} 292125965Sdes 293125965SdesSEC("socket") 294125965Sdesint main_prog(struct __sk_buff *skb) 295106041Sdes{ 296243147Sandre __u32 nhoff = ETH_HLEN; 297243147Sandre __u32 proto = load_half(skb, 12); 298243147Sandre 299243147Sandre skb->cb[0] = nhoff; 300125965Sdes parse_eth_proto(skb, proto); 301243147Sandre return 0; 302243147Sandre} 303243147Sandre 304125965Sdeschar _license[] SEC("license") = "GPL"; 305243147Sandre