npf_bpf_comp.c revision 1.9
1/* $NetBSD: npf_bpf_comp.c,v 1.9 2016/12/26 23:05:05 christos Exp $ */ 2 3/*- 4 * Copyright (c) 2010-2014 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* 33 * BPF byte-code generation for NPF rules. 34 */ 35 36#include <sys/cdefs.h> 37__RCSID("$NetBSD: npf_bpf_comp.c,v 1.9 2016/12/26 23:05:05 christos Exp $"); 38 39#include <stdlib.h> 40#include <stdbool.h> 41#include <stddef.h> 42#include <string.h> 43#include <inttypes.h> 44#include <err.h> 45#include <assert.h> 46 47#include <netinet/in.h> 48#include <netinet/in_systm.h> 49#define __FAVOR_BSD 50#include <netinet/ip.h> 51#include <netinet/ip6.h> 52#include <netinet/udp.h> 53#include <netinet/tcp.h> 54#include <netinet/ip_icmp.h> 55#include <netinet/icmp6.h> 56 57#include <net/bpf.h> 58 59#include "npfctl.h" 60 61/* 62 * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores 63 * something other than L4 header offset. Generally, when BPF_LDX is used. 64 */ 65#define FETCHED_L3 0x01 66#define CHECKED_L4 0x02 67#define X_EQ_L4OFF 0x04 68 69struct npf_bpf { 70 /* 71 * BPF program code, the allocated length (in bytes), the number 72 * of logical blocks and the flags. 73 */ 74 struct bpf_program prog; 75 size_t alen; 76 u_int nblocks; 77 sa_family_t af; 78 uint32_t flags; 79 80 /* The current group offset and block number. */ 81 bool ingroup; 82 u_int goff; 83 u_int gblock; 84 85 /* BPF marks, allocated length and the real length. */ 86 uint32_t * marks; 87 size_t malen; 88 size_t mlen; 89}; 90 91/* 92 * NPF success and failure values to be returned from BPF. 93 */ 94#define NPF_BPF_SUCCESS ((u_int)-1) 95#define NPF_BPF_FAILURE 0 96 97/* 98 * Magic value to indicate the failure path, which is fixed up on completion. 99 * Note: this is the longest jump offset in BPF, since the offset is one byte. 100 */ 101#define JUMP_MAGIC 0xff 102 103/* Reduce re-allocations by expanding in 64 byte blocks. */ 104#define ALLOC_MASK (64 - 1) 105#define ALLOC_ROUND(x) (((x) + ALLOC_MASK) & ~ALLOC_MASK) 106 107#ifndef IPV6_VERSION 108#define IPV6_VERSION 0x60 109#endif 110 111npf_bpf_t * 112npfctl_bpf_create(void) 113{ 114 return ecalloc(1, sizeof(npf_bpf_t)); 115} 116 117static void 118fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap) 119{ 120 struct bpf_program *bp = &ctx->prog; 121 122 for (u_int i = start; i < end; i++) { 123 struct bpf_insn *insn = &bp->bf_insns[i]; 124 const u_int fail_off = end - i; 125 126 if (fail_off >= JUMP_MAGIC) { 127 errx(EXIT_FAILURE, "BPF generation error: " 128 "the number of instructions is over the limit"); 129 } 130 if (BPF_CLASS(insn->code) != BPF_JMP) { 131 continue; 132 } 133 if (swap) { 134 uint8_t jt = insn->jt; 135 insn->jt = insn->jf; 136 insn->jf = jt; 137 } 138 if (insn->jt == JUMP_MAGIC) 139 insn->jt = fail_off; 140 if (insn->jf == JUMP_MAGIC) 141 insn->jf = fail_off; 142 } 143} 144 145static void 146add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count) 147{ 148 struct bpf_program *bp = &ctx->prog; 149 size_t offset, len, reqlen; 150 151 /* Note: bf_len is the count of instructions. */ 152 offset = bp->bf_len * sizeof(struct bpf_insn); 153 len = count * sizeof(struct bpf_insn); 154 155 /* Ensure the memory buffer for the program. */ 156 reqlen = ALLOC_ROUND(offset + len); 157 if (reqlen > ctx->alen) { 158 bp->bf_insns = erealloc(bp->bf_insns, reqlen); 159 ctx->alen = reqlen; 160 } 161 162 /* Add the code block. */ 163 memcpy((uint8_t *)bp->bf_insns + offset, insns, len); 164 bp->bf_len += count; 165} 166 167static void 168done_raw_block(npf_bpf_t *ctx, const uint32_t *m, size_t len) 169{ 170 size_t reqlen, nargs = m[1]; 171 172 if ((len / sizeof(uint32_t) - 2) != nargs) { 173 errx(EXIT_FAILURE, "invalid BPF block description"); 174 } 175 reqlen = ALLOC_ROUND(ctx->mlen + len); 176 if (reqlen > ctx->malen) { 177 ctx->marks = erealloc(ctx->marks, reqlen); 178 ctx->malen = reqlen; 179 } 180 memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len); 181 ctx->mlen += len; 182} 183 184static void 185done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len) 186{ 187 done_raw_block(ctx, m, len); 188 ctx->nblocks++; 189} 190 191struct bpf_program * 192npfctl_bpf_complete(npf_bpf_t *ctx) 193{ 194 struct bpf_program *bp = &ctx->prog; 195 const u_int retoff = bp->bf_len; 196 197 /* No instructions (optimised out). */ 198 if (!bp->bf_len) 199 return NULL; 200 201 /* Add the return fragment (success and failure paths). */ 202 struct bpf_insn insns_ret[] = { 203 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS), 204 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE), 205 }; 206 add_insns(ctx, insns_ret, __arraycount(insns_ret)); 207 208 /* Fixup all jumps to the main failure path. */ 209 fixup_jumps(ctx, 0, retoff, false); 210 211 return &ctx->prog; 212} 213 214const void * 215npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len) 216{ 217 *len = ctx->mlen; 218 return ctx->marks; 219} 220 221void 222npfctl_bpf_destroy(npf_bpf_t *ctx) 223{ 224 free(ctx->prog.bf_insns); 225 free(ctx->marks); 226 free(ctx); 227} 228 229/* 230 * npfctl_bpf_group: begin a logical group. It merely uses logical 231 * disjunction (OR) for compares within the group. 232 */ 233void 234npfctl_bpf_group(npf_bpf_t *ctx) 235{ 236 struct bpf_program *bp = &ctx->prog; 237 238 assert(ctx->goff == 0); 239 assert(ctx->gblock == 0); 240 241 ctx->goff = bp->bf_len; 242 ctx->gblock = ctx->nblocks; 243 ctx->ingroup = true; 244} 245 246void 247npfctl_bpf_endgroup(npf_bpf_t *ctx) 248{ 249 struct bpf_program *bp = &ctx->prog; 250 const size_t curoff = bp->bf_len; 251 252 /* If there are no blocks or only one - nothing to do. */ 253 if ((ctx->nblocks - ctx->gblock) <= 1) { 254 ctx->goff = ctx->gblock = 0; 255 return; 256 } 257 258 /* 259 * Append a failure return as a fall-through i.e. if there is 260 * no match within the group. 261 */ 262 struct bpf_insn insns_ret[] = { 263 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE), 264 }; 265 add_insns(ctx, insns_ret, __arraycount(insns_ret)); 266 267 /* 268 * Adjust jump offsets: on match - jump outside the group i.e. 269 * to the current offset. Otherwise, jump to the next instruction 270 * which would lead to the fall-through code above if none matches. 271 */ 272 fixup_jumps(ctx, ctx->goff, curoff, true); 273 ctx->goff = ctx->gblock = 0; 274} 275 276static void 277fetch_l3(npf_bpf_t *ctx, sa_family_t af, u_int flags) 278{ 279 u_int ver; 280 281 switch (af) { 282 case AF_INET: 283 ver = IPVERSION; 284 break; 285 case AF_INET6: 286 ver = IPV6_VERSION >> 4; 287 break; 288 case AF_UNSPEC: 289 ver = 0; 290 break; 291 default: 292 abort(); 293 } 294 295 /* 296 * The memory store is populated with: 297 * - BPF_MW_IPVER: IP version (4 or 6). 298 * - BPF_MW_L4OFF: L4 header offset. 299 * - BPF_MW_L4PROTO: L4 protocol. 300 */ 301 if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) { 302 const uint8_t jt = ver ? 0 : JUMP_MAGIC; 303 const uint8_t jf = ver ? JUMP_MAGIC : 0; 304 bool ingroup = ctx->ingroup; 305 306 /* 307 * L3 block cannot be inserted in the middle of a group. 308 * In fact, it never is. Check and start the group after. 309 */ 310 if (ingroup) { 311 assert(ctx->nblocks == ctx->gblock); 312 npfctl_bpf_endgroup(ctx); 313 } 314 315 /* 316 * A <- IP version; A == expected-version? 317 * If no particular version specified, check for non-zero. 318 */ 319 struct bpf_insn insns_af[] = { 320 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_IPVER), 321 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf), 322 }; 323 add_insns(ctx, insns_af, __arraycount(insns_af)); 324 ctx->flags |= FETCHED_L3; 325 ctx->af = af; 326 327 if (af) { 328 uint32_t mwords[] = { BM_IPVER, 1, af }; 329 done_raw_block(ctx, mwords, sizeof(mwords)); 330 } 331 if (ingroup) { 332 npfctl_bpf_group(ctx); 333 } 334 335 } else if (af && af != ctx->af) { 336 errx(EXIT_FAILURE, "address family mismatch"); 337 } 338 339 if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) { 340 /* X <- IP header length */ 341 struct bpf_insn insns_hlen[] = { 342 BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF), 343 }; 344 add_insns(ctx, insns_hlen, __arraycount(insns_hlen)); 345 ctx->flags |= X_EQ_L4OFF; 346 } 347} 348 349/* 350 * npfctl_bpf_proto: code block to match IP version and L4 protocol. 351 */ 352void 353npfctl_bpf_proto(npf_bpf_t *ctx, sa_family_t af, int proto) 354{ 355 assert(af != AF_UNSPEC || proto != -1); 356 357 /* Note: fails if IP version does not match. */ 358 fetch_l3(ctx, af, 0); 359 if (proto == -1) { 360 return; 361 } 362 363 struct bpf_insn insns_proto[] = { 364 /* A <- L4 protocol; A == expected-protocol? */ 365 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO), 366 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC), 367 }; 368 add_insns(ctx, insns_proto, __arraycount(insns_proto)); 369 370 uint32_t mwords[] = { BM_PROTO, 1, proto }; 371 done_block(ctx, mwords, sizeof(mwords)); 372 ctx->flags |= CHECKED_L4; 373} 374 375/* 376 * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR. 377 * 378 * => IP address shall be in the network byte order. 379 */ 380void 381npfctl_bpf_cidr(npf_bpf_t *ctx, u_int opts, sa_family_t af, 382 const npf_addr_t *addr, const npf_netmask_t mask) 383{ 384 const uint32_t *awords = (const uint32_t *)addr; 385 u_int nwords, length, maxmask, off; 386 387 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0)); 388 assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK); 389 390 switch (af) { 391 case AF_INET: 392 maxmask = 32; 393 off = (opts & MATCH_SRC) ? 394 offsetof(struct ip, ip_src) : 395 offsetof(struct ip, ip_dst); 396 nwords = sizeof(struct in_addr) / sizeof(uint32_t); 397 break; 398 case AF_INET6: 399 maxmask = 128; 400 off = (opts & MATCH_SRC) ? 401 offsetof(struct ip6_hdr, ip6_src) : 402 offsetof(struct ip6_hdr, ip6_dst); 403 nwords = sizeof(struct in6_addr) / sizeof(uint32_t); 404 break; 405 default: 406 abort(); 407 } 408 409 /* Ensure address family. */ 410 fetch_l3(ctx, af, 0); 411 412 length = (mask == NPF_NO_NETMASK) ? maxmask : mask; 413 414 /* CAUTION: BPF operates in host byte-order. */ 415 for (u_int i = 0; i < nwords; i++) { 416 const u_int woff = i * sizeof(uint32_t); 417 uint32_t word = ntohl(awords[i]); 418 uint32_t wordmask; 419 420 if (length >= 32) { 421 /* The mask is a full word - do not apply it. */ 422 wordmask = 0; 423 length -= 32; 424 } else if (length) { 425 wordmask = 0xffffffff << (32 - length); 426 length = 0; 427 } else { 428 /* The mask became zero - skip the rest. */ 429 break; 430 } 431 432 /* A <- IP address (or one word of it) */ 433 struct bpf_insn insns_ip[] = { 434 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff), 435 }; 436 add_insns(ctx, insns_ip, __arraycount(insns_ip)); 437 438 /* A <- (A & MASK) */ 439 if (wordmask) { 440 struct bpf_insn insns_mask[] = { 441 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask), 442 }; 443 add_insns(ctx, insns_mask, __arraycount(insns_mask)); 444 } 445 446 /* A == expected-IP-word ? */ 447 struct bpf_insn insns_cmp[] = { 448 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC), 449 }; 450 add_insns(ctx, insns_cmp, __arraycount(insns_cmp)); 451 } 452 453 uint32_t mwords[] = { 454 (opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6, 455 af, mask, awords[0], awords[1], awords[2], awords[3], 456 }; 457 done_block(ctx, mwords, sizeof(mwords)); 458} 459 460/* 461 * npfctl_bpf_ports: code block to match TCP/UDP port range. 462 * 463 * => Port numbers shall be in the network byte order. 464 */ 465void 466npfctl_bpf_ports(npf_bpf_t *ctx, u_int opts, in_port_t from, in_port_t to) 467{ 468 const u_int sport_off = offsetof(struct udphdr, uh_sport); 469 const u_int dport_off = offsetof(struct udphdr, uh_dport); 470 u_int off; 471 472 /* TCP and UDP port offsets are the same. */ 473 assert(sport_off == offsetof(struct tcphdr, th_sport)); 474 assert(dport_off == offsetof(struct tcphdr, th_dport)); 475 assert(ctx->flags & CHECKED_L4); 476 477 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0)); 478 off = (opts & MATCH_SRC) ? sport_off : dport_off; 479 480 /* X <- IP header length */ 481 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF); 482 483 struct bpf_insn insns_fetch[] = { 484 /* A <- port */ 485 BPF_STMT(BPF_LD+BPF_H+BPF_IND, off), 486 }; 487 add_insns(ctx, insns_fetch, __arraycount(insns_fetch)); 488 489 /* CAUTION: BPF operates in host byte-order. */ 490 from = ntohs(from); 491 to = ntohs(to); 492 493 if (from == to) { 494 /* Single port case. */ 495 struct bpf_insn insns_port[] = { 496 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC), 497 }; 498 add_insns(ctx, insns_port, __arraycount(insns_port)); 499 } else { 500 /* Port range case. */ 501 struct bpf_insn insns_range[] = { 502 BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, JUMP_MAGIC), 503 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, JUMP_MAGIC, 0), 504 }; 505 add_insns(ctx, insns_range, __arraycount(insns_range)); 506 } 507 508 uint32_t mwords[] = { 509 opts & MATCH_SRC ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to 510 }; 511 done_block(ctx, mwords, sizeof(mwords)); 512} 513 514/* 515 * npfctl_bpf_tcpfl: code block to match TCP flags. 516 */ 517void 518npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask, bool checktcp) 519{ 520 const u_int tcpfl_off = offsetof(struct tcphdr, th_flags); 521 const bool usingmask = tf_mask != tf; 522 523 /* X <- IP header length */ 524 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF); 525 if (checktcp) { 526 const u_int jf = usingmask ? 3 : 2; 527 assert(ctx->ingroup == false); 528 529 /* A <- L4 protocol; A == TCP? If not, jump out. */ 530 struct bpf_insn insns_tcp[] = { 531 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO), 532 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, jf), 533 }; 534 add_insns(ctx, insns_tcp, __arraycount(insns_tcp)); 535 } else { 536 assert(ctx->flags & CHECKED_L4); 537 } 538 539 struct bpf_insn insns_tf[] = { 540 /* A <- TCP flags */ 541 BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off), 542 }; 543 add_insns(ctx, insns_tf, __arraycount(insns_tf)); 544 545 if (usingmask) { 546 /* A <- (A & mask) */ 547 struct bpf_insn insns_mask[] = { 548 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask), 549 }; 550 add_insns(ctx, insns_mask, __arraycount(insns_mask)); 551 } 552 553 struct bpf_insn insns_cmp[] = { 554 /* A == expected-TCP-flags? */ 555 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC), 556 }; 557 add_insns(ctx, insns_cmp, __arraycount(insns_cmp)); 558 559 if (!checktcp) { 560 uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask}; 561 done_block(ctx, mwords, sizeof(mwords)); 562 } 563} 564 565/* 566 * npfctl_bpf_icmp: code block to match ICMP type and/or code. 567 * Note: suitable both for the ICMPv4 and ICMPv6. 568 */ 569void 570npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code) 571{ 572 const u_int type_off = offsetof(struct icmp, icmp_type); 573 const u_int code_off = offsetof(struct icmp, icmp_code); 574 575 assert(ctx->flags & CHECKED_L4); 576 assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off); 577 assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off); 578 assert(type != -1 || code != -1); 579 580 /* X <- IP header length */ 581 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF); 582 583 if (type != -1) { 584 struct bpf_insn insns_type[] = { 585 BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off), 586 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC), 587 }; 588 add_insns(ctx, insns_type, __arraycount(insns_type)); 589 590 uint32_t mwords[] = { BM_ICMP_TYPE, 1, type }; 591 done_block(ctx, mwords, sizeof(mwords)); 592 } 593 594 if (code != -1) { 595 struct bpf_insn insns_code[] = { 596 BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off), 597 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC), 598 }; 599 add_insns(ctx, insns_code, __arraycount(insns_code)); 600 601 uint32_t mwords[] = { BM_ICMP_CODE, 1, code }; 602 done_block(ctx, mwords, sizeof(mwords)); 603 } 604} 605 606#define SRC_FLAG_BIT (1U << 31) 607 608/* 609 * npfctl_bpf_table: code block to match source/destination IP address 610 * against NPF table specified by ID. 611 */ 612void 613npfctl_bpf_table(npf_bpf_t *ctx, u_int opts, u_int tid) 614{ 615 const bool src = (opts & MATCH_SRC) != 0; 616 617 struct bpf_insn insns_table[] = { 618 BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid), 619 BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE), 620 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0), 621 }; 622 add_insns(ctx, insns_table, __arraycount(insns_table)); 623 624 uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid }; 625 done_block(ctx, mwords, sizeof(mwords)); 626} 627