npf_bpf_comp.c revision 1.8
1/* $NetBSD: npf_bpf_comp.c,v 1.8 2015/06/08 01:00:43 rmind Exp $ */ 2 3/*- 4 * Copyright (c) 2010-2014 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* 33 * BPF byte-code generation for NPF rules. 34 */ 35 36#include <sys/cdefs.h> 37__RCSID("$NetBSD: npf_bpf_comp.c,v 1.8 2015/06/08 01:00:43 rmind Exp $"); 38 39#include <stdlib.h> 40#include <stdbool.h> 41#include <stddef.h> 42#include <string.h> 43#include <inttypes.h> 44#include <err.h> 45#include <assert.h> 46 47#include <netinet/in.h> 48#include <netinet/in_systm.h> 49#include <netinet/ip.h> 50#include <netinet/ip6.h> 51#include <netinet/udp.h> 52#include <netinet/tcp.h> 53#include <netinet/ip_icmp.h> 54#include <netinet/icmp6.h> 55 56#include <net/bpf.h> 57 58#include "npfctl.h" 59 60/* 61 * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores 62 * something other than L4 header offset. Generally, when BPF_LDX is used. 63 */ 64#define FETCHED_L3 0x01 65#define CHECKED_L4 0x02 66#define X_EQ_L4OFF 0x04 67 68struct npf_bpf { 69 /* 70 * BPF program code, the allocated length (in bytes), the number 71 * of logical blocks and the flags. 72 */ 73 struct bpf_program prog; 74 size_t alen; 75 u_int nblocks; 76 sa_family_t af; 77 uint32_t flags; 78 79 /* The current group offset and block number. */ 80 bool ingroup; 81 u_int goff; 82 u_int gblock; 83 84 /* BPF marks, allocated length and the real length. */ 85 uint32_t * marks; 86 size_t malen; 87 size_t mlen; 88}; 89 90/* 91 * NPF success and failure values to be returned from BPF. 92 */ 93#define NPF_BPF_SUCCESS ((u_int)-1) 94#define NPF_BPF_FAILURE 0 95 96/* 97 * Magic value to indicate the failure path, which is fixed up on completion. 98 * Note: this is the longest jump offset in BPF, since the offset is one byte. 99 */ 100#define JUMP_MAGIC 0xff 101 102/* Reduce re-allocations by expanding in 64 byte blocks. */ 103#define ALLOC_MASK (64 - 1) 104#define ALLOC_ROUND(x) (((x) + ALLOC_MASK) & ~ALLOC_MASK) 105 106npf_bpf_t * 107npfctl_bpf_create(void) 108{ 109 return ecalloc(1, sizeof(npf_bpf_t)); 110} 111 112static void 113fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap) 114{ 115 struct bpf_program *bp = &ctx->prog; 116 117 for (u_int i = start; i < end; i++) { 118 struct bpf_insn *insn = &bp->bf_insns[i]; 119 const u_int fail_off = end - i; 120 121 if (fail_off >= JUMP_MAGIC) { 122 errx(EXIT_FAILURE, "BPF generation error: " 123 "the number of instructions is over the limit"); 124 } 125 if (BPF_CLASS(insn->code) != BPF_JMP) { 126 continue; 127 } 128 if (swap) { 129 uint8_t jt = insn->jt; 130 insn->jt = insn->jf; 131 insn->jf = jt; 132 } 133 if (insn->jt == JUMP_MAGIC) 134 insn->jt = fail_off; 135 if (insn->jf == JUMP_MAGIC) 136 insn->jf = fail_off; 137 } 138} 139 140static void 141add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count) 142{ 143 struct bpf_program *bp = &ctx->prog; 144 size_t offset, len, reqlen; 145 146 /* Note: bf_len is the count of instructions. */ 147 offset = bp->bf_len * sizeof(struct bpf_insn); 148 len = count * sizeof(struct bpf_insn); 149 150 /* Ensure the memory buffer for the program. */ 151 reqlen = ALLOC_ROUND(offset + len); 152 if (reqlen > ctx->alen) { 153 bp->bf_insns = erealloc(bp->bf_insns, reqlen); 154 ctx->alen = reqlen; 155 } 156 157 /* Add the code block. */ 158 memcpy((uint8_t *)bp->bf_insns + offset, insns, len); 159 bp->bf_len += count; 160} 161 162static void 163done_raw_block(npf_bpf_t *ctx, const uint32_t *m, size_t len) 164{ 165 size_t reqlen, nargs = m[1]; 166 167 if ((len / sizeof(uint32_t) - 2) != nargs) { 168 errx(EXIT_FAILURE, "invalid BPF block description"); 169 } 170 reqlen = ALLOC_ROUND(ctx->mlen + len); 171 if (reqlen > ctx->malen) { 172 ctx->marks = erealloc(ctx->marks, reqlen); 173 ctx->malen = reqlen; 174 } 175 memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len); 176 ctx->mlen += len; 177} 178 179static void 180done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len) 181{ 182 done_raw_block(ctx, m, len); 183 ctx->nblocks++; 184} 185 186struct bpf_program * 187npfctl_bpf_complete(npf_bpf_t *ctx) 188{ 189 struct bpf_program *bp = &ctx->prog; 190 const u_int retoff = bp->bf_len; 191 192 /* No instructions (optimised out). */ 193 if (!bp->bf_len) 194 return NULL; 195 196 /* Add the return fragment (success and failure paths). */ 197 struct bpf_insn insns_ret[] = { 198 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS), 199 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE), 200 }; 201 add_insns(ctx, insns_ret, __arraycount(insns_ret)); 202 203 /* Fixup all jumps to the main failure path. */ 204 fixup_jumps(ctx, 0, retoff, false); 205 206 return &ctx->prog; 207} 208 209const void * 210npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len) 211{ 212 *len = ctx->mlen; 213 return ctx->marks; 214} 215 216void 217npfctl_bpf_destroy(npf_bpf_t *ctx) 218{ 219 free(ctx->prog.bf_insns); 220 free(ctx->marks); 221 free(ctx); 222} 223 224/* 225 * npfctl_bpf_group: begin a logical group. It merely uses logical 226 * disjunction (OR) for compares within the group. 227 */ 228void 229npfctl_bpf_group(npf_bpf_t *ctx) 230{ 231 struct bpf_program *bp = &ctx->prog; 232 233 assert(ctx->goff == 0); 234 assert(ctx->gblock == 0); 235 236 ctx->goff = bp->bf_len; 237 ctx->gblock = ctx->nblocks; 238 ctx->ingroup = true; 239} 240 241void 242npfctl_bpf_endgroup(npf_bpf_t *ctx) 243{ 244 struct bpf_program *bp = &ctx->prog; 245 const size_t curoff = bp->bf_len; 246 247 /* If there are no blocks or only one - nothing to do. */ 248 if ((ctx->nblocks - ctx->gblock) <= 1) { 249 ctx->goff = ctx->gblock = 0; 250 return; 251 } 252 253 /* 254 * Append a failure return as a fall-through i.e. if there is 255 * no match within the group. 256 */ 257 struct bpf_insn insns_ret[] = { 258 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE), 259 }; 260 add_insns(ctx, insns_ret, __arraycount(insns_ret)); 261 262 /* 263 * Adjust jump offsets: on match - jump outside the group i.e. 264 * to the current offset. Otherwise, jump to the next instruction 265 * which would lead to the fall-through code above if none matches. 266 */ 267 fixup_jumps(ctx, ctx->goff, curoff, true); 268 ctx->goff = ctx->gblock = 0; 269} 270 271static void 272fetch_l3(npf_bpf_t *ctx, sa_family_t af, u_int flags) 273{ 274 u_int ver; 275 276 switch (af) { 277 case AF_INET: 278 ver = IPVERSION; 279 break; 280 case AF_INET6: 281 ver = IPV6_VERSION >> 4; 282 break; 283 case AF_UNSPEC: 284 ver = 0; 285 break; 286 default: 287 abort(); 288 } 289 290 /* 291 * The memory store is populated with: 292 * - BPF_MW_IPVER: IP version (4 or 6). 293 * - BPF_MW_L4OFF: L4 header offset. 294 * - BPF_MW_L4PROTO: L4 protocol. 295 */ 296 if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) { 297 const uint8_t jt = ver ? 0 : JUMP_MAGIC; 298 const uint8_t jf = ver ? JUMP_MAGIC : 0; 299 bool ingroup = ctx->ingroup; 300 301 /* 302 * L3 block cannot be inserted in the middle of a group. 303 * In fact, it never is. Check and start the group after. 304 */ 305 if (ingroup) { 306 assert(ctx->nblocks == ctx->gblock); 307 npfctl_bpf_endgroup(ctx); 308 } 309 310 /* 311 * A <- IP version; A == expected-version? 312 * If no particular version specified, check for non-zero. 313 */ 314 struct bpf_insn insns_af[] = { 315 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_IPVER), 316 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf), 317 }; 318 add_insns(ctx, insns_af, __arraycount(insns_af)); 319 ctx->flags |= FETCHED_L3; 320 ctx->af = af; 321 322 if (af) { 323 uint32_t mwords[] = { BM_IPVER, 1, af }; 324 done_raw_block(ctx, mwords, sizeof(mwords)); 325 } 326 if (ingroup) { 327 npfctl_bpf_group(ctx); 328 } 329 330 } else if (af && af != ctx->af) { 331 errx(EXIT_FAILURE, "address family mismatch"); 332 } 333 334 if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) { 335 /* X <- IP header length */ 336 struct bpf_insn insns_hlen[] = { 337 BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF), 338 }; 339 add_insns(ctx, insns_hlen, __arraycount(insns_hlen)); 340 ctx->flags |= X_EQ_L4OFF; 341 } 342} 343 344/* 345 * npfctl_bpf_proto: code block to match IP version and L4 protocol. 346 */ 347void 348npfctl_bpf_proto(npf_bpf_t *ctx, sa_family_t af, int proto) 349{ 350 assert(af != AF_UNSPEC || proto != -1); 351 352 /* Note: fails if IP version does not match. */ 353 fetch_l3(ctx, af, 0); 354 if (proto == -1) { 355 return; 356 } 357 358 struct bpf_insn insns_proto[] = { 359 /* A <- L4 protocol; A == expected-protocol? */ 360 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO), 361 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC), 362 }; 363 add_insns(ctx, insns_proto, __arraycount(insns_proto)); 364 365 uint32_t mwords[] = { BM_PROTO, 1, proto }; 366 done_block(ctx, mwords, sizeof(mwords)); 367 ctx->flags |= CHECKED_L4; 368} 369 370/* 371 * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR. 372 * 373 * => IP address shall be in the network byte order. 374 */ 375void 376npfctl_bpf_cidr(npf_bpf_t *ctx, u_int opts, sa_family_t af, 377 const npf_addr_t *addr, const npf_netmask_t mask) 378{ 379 const uint32_t *awords = (const uint32_t *)addr; 380 u_int nwords, length, maxmask, off; 381 382 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0)); 383 assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK); 384 385 switch (af) { 386 case AF_INET: 387 maxmask = 32; 388 off = (opts & MATCH_SRC) ? 389 offsetof(struct ip, ip_src) : 390 offsetof(struct ip, ip_dst); 391 nwords = sizeof(struct in_addr) / sizeof(uint32_t); 392 break; 393 case AF_INET6: 394 maxmask = 128; 395 off = (opts & MATCH_SRC) ? 396 offsetof(struct ip6_hdr, ip6_src) : 397 offsetof(struct ip6_hdr, ip6_dst); 398 nwords = sizeof(struct in6_addr) / sizeof(uint32_t); 399 break; 400 default: 401 abort(); 402 } 403 404 /* Ensure address family. */ 405 fetch_l3(ctx, af, 0); 406 407 length = (mask == NPF_NO_NETMASK) ? maxmask : mask; 408 409 /* CAUTION: BPF operates in host byte-order. */ 410 for (u_int i = 0; i < nwords; i++) { 411 const u_int woff = i * sizeof(uint32_t); 412 uint32_t word = ntohl(awords[i]); 413 uint32_t wordmask; 414 415 if (length >= 32) { 416 /* The mask is a full word - do not apply it. */ 417 wordmask = 0; 418 length -= 32; 419 } else if (length) { 420 wordmask = 0xffffffff << (32 - length); 421 length = 0; 422 } else { 423 /* The mask became zero - skip the rest. */ 424 break; 425 } 426 427 /* A <- IP address (or one word of it) */ 428 struct bpf_insn insns_ip[] = { 429 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff), 430 }; 431 add_insns(ctx, insns_ip, __arraycount(insns_ip)); 432 433 /* A <- (A & MASK) */ 434 if (wordmask) { 435 struct bpf_insn insns_mask[] = { 436 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask), 437 }; 438 add_insns(ctx, insns_mask, __arraycount(insns_mask)); 439 } 440 441 /* A == expected-IP-word ? */ 442 struct bpf_insn insns_cmp[] = { 443 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC), 444 }; 445 add_insns(ctx, insns_cmp, __arraycount(insns_cmp)); 446 } 447 448 uint32_t mwords[] = { 449 (opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6, 450 af, mask, awords[0], awords[1], awords[2], awords[3], 451 }; 452 done_block(ctx, mwords, sizeof(mwords)); 453} 454 455/* 456 * npfctl_bpf_ports: code block to match TCP/UDP port range. 457 * 458 * => Port numbers shall be in the network byte order. 459 */ 460void 461npfctl_bpf_ports(npf_bpf_t *ctx, u_int opts, in_port_t from, in_port_t to) 462{ 463 const u_int sport_off = offsetof(struct udphdr, uh_sport); 464 const u_int dport_off = offsetof(struct udphdr, uh_dport); 465 u_int off; 466 467 /* TCP and UDP port offsets are the same. */ 468 assert(sport_off == offsetof(struct tcphdr, th_sport)); 469 assert(dport_off == offsetof(struct tcphdr, th_dport)); 470 assert(ctx->flags & CHECKED_L4); 471 472 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0)); 473 off = (opts & MATCH_SRC) ? sport_off : dport_off; 474 475 /* X <- IP header length */ 476 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF); 477 478 struct bpf_insn insns_fetch[] = { 479 /* A <- port */ 480 BPF_STMT(BPF_LD+BPF_H+BPF_IND, off), 481 }; 482 add_insns(ctx, insns_fetch, __arraycount(insns_fetch)); 483 484 /* CAUTION: BPF operates in host byte-order. */ 485 from = ntohs(from); 486 to = ntohs(to); 487 488 if (from == to) { 489 /* Single port case. */ 490 struct bpf_insn insns_port[] = { 491 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC), 492 }; 493 add_insns(ctx, insns_port, __arraycount(insns_port)); 494 } else { 495 /* Port range case. */ 496 struct bpf_insn insns_range[] = { 497 BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, JUMP_MAGIC), 498 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, JUMP_MAGIC, 0), 499 }; 500 add_insns(ctx, insns_range, __arraycount(insns_range)); 501 } 502 503 uint32_t mwords[] = { 504 opts & MATCH_SRC ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to 505 }; 506 done_block(ctx, mwords, sizeof(mwords)); 507} 508 509/* 510 * npfctl_bpf_tcpfl: code block to match TCP flags. 511 */ 512void 513npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask, bool checktcp) 514{ 515 const u_int tcpfl_off = offsetof(struct tcphdr, th_flags); 516 const bool usingmask = tf_mask != tf; 517 518 /* X <- IP header length */ 519 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF); 520 if (checktcp) { 521 const u_int jf = usingmask ? 3 : 2; 522 assert(ctx->ingroup == false); 523 524 /* A <- L4 protocol; A == TCP? If not, jump out. */ 525 struct bpf_insn insns_tcp[] = { 526 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO), 527 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, jf), 528 }; 529 add_insns(ctx, insns_tcp, __arraycount(insns_tcp)); 530 } else { 531 assert(ctx->flags & CHECKED_L4); 532 } 533 534 struct bpf_insn insns_tf[] = { 535 /* A <- TCP flags */ 536 BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off), 537 }; 538 add_insns(ctx, insns_tf, __arraycount(insns_tf)); 539 540 if (usingmask) { 541 /* A <- (A & mask) */ 542 struct bpf_insn insns_mask[] = { 543 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask), 544 }; 545 add_insns(ctx, insns_mask, __arraycount(insns_mask)); 546 } 547 548 struct bpf_insn insns_cmp[] = { 549 /* A == expected-TCP-flags? */ 550 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC), 551 }; 552 add_insns(ctx, insns_cmp, __arraycount(insns_cmp)); 553 554 if (!checktcp) { 555 uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask}; 556 done_block(ctx, mwords, sizeof(mwords)); 557 } 558} 559 560/* 561 * npfctl_bpf_icmp: code block to match ICMP type and/or code. 562 * Note: suitable both for the ICMPv4 and ICMPv6. 563 */ 564void 565npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code) 566{ 567 const u_int type_off = offsetof(struct icmp, icmp_type); 568 const u_int code_off = offsetof(struct icmp, icmp_code); 569 570 assert(ctx->flags & CHECKED_L4); 571 assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off); 572 assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off); 573 assert(type != -1 || code != -1); 574 575 /* X <- IP header length */ 576 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF); 577 578 if (type != -1) { 579 struct bpf_insn insns_type[] = { 580 BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off), 581 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC), 582 }; 583 add_insns(ctx, insns_type, __arraycount(insns_type)); 584 585 uint32_t mwords[] = { BM_ICMP_TYPE, 1, type }; 586 done_block(ctx, mwords, sizeof(mwords)); 587 } 588 589 if (code != -1) { 590 struct bpf_insn insns_code[] = { 591 BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off), 592 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC), 593 }; 594 add_insns(ctx, insns_code, __arraycount(insns_code)); 595 596 uint32_t mwords[] = { BM_ICMP_CODE, 1, code }; 597 done_block(ctx, mwords, sizeof(mwords)); 598 } 599} 600 601#define SRC_FLAG_BIT (1U << 31) 602 603/* 604 * npfctl_bpf_table: code block to match source/destination IP address 605 * against NPF table specified by ID. 606 */ 607void 608npfctl_bpf_table(npf_bpf_t *ctx, u_int opts, u_int tid) 609{ 610 const bool src = (opts & MATCH_SRC) != 0; 611 612 struct bpf_insn insns_table[] = { 613 BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid), 614 BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE), 615 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0), 616 }; 617 add_insns(ctx, insns_table, __arraycount(insns_table)); 618 619 uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid }; 620 done_block(ctx, mwords, sizeof(mwords)); 621} 622