npf_bpf_comp.c revision 1.1
1/* $NetBSD: npf_bpf_comp.c,v 1.1 2013/09/19 01:04:45 rmind Exp $ */ 2 3/*- 4 * Copyright (c) 2010-2013 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This material is based upon work partially supported by The 8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* 33 * BPF byte-code generation for NPF rules. 34 */ 35 36#include <sys/cdefs.h> 37__RCSID("$NetBSD: npf_bpf_comp.c,v 1.1 2013/09/19 01:04:45 rmind Exp $"); 38 39#include <stdlib.h> 40#include <stdbool.h> 41#include <stddef.h> 42#include <string.h> 43#include <inttypes.h> 44#include <err.h> 45#include <assert.h> 46 47#include <netinet/in.h> 48#include <netinet/in_systm.h> 49#include <netinet/ip.h> 50#include <netinet/ip6.h> 51#include <netinet/udp.h> 52#include <netinet/tcp.h> 53#include <netinet/ip_icmp.h> 54#include <netinet/icmp6.h> 55 56#include <net/bpf.h> 57 58#include "npfctl.h" 59 60/* 61 * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores 62 * something other than L4 header offset. Generally, when BPF_LDX is used. 63 */ 64#define FETCHED_L3 0x01 65#define X_EQ_L4OFF 0x02 66 67struct npf_bpf { 68 /* 69 * BPF program code, the allocated length (in bytes), the number 70 * of logical blocks and the flags. 71 */ 72 struct bpf_program prog; 73 size_t alen; 74 u_int nblocks; 75 sa_family_t af; 76 uint32_t flags; 77 78 /* The current group offset and block number. */ 79 bool ingroup; 80 u_int goff; 81 u_int gblock; 82 83 /* BPF marks, allocated length and the real length. */ 84 uint32_t * marks; 85 size_t malen; 86 size_t mlen; 87}; 88 89/* 90 * NPF success and failure values to be returned from BPF. 91 */ 92#define NPF_BPF_SUCCESS ((u_int)-1) 93#define NPF_BPF_FAILURE 0 94 95/* 96 * Magic value to indicate the failure path, which is fixed up on completion. 97 * Note: this is the longest jump offset in BPF, since the offset is one byte. 98 */ 99#define JUMP_MAGIC 0xff 100 101/* Reduce re-allocations by expanding in 64 byte blocks. */ 102#define ALLOC_MASK (64 - 1) 103#define ALLOC_ROUND(x) (((x) + ALLOC_MASK) & ~ALLOC_MASK) 104 105npf_bpf_t * 106npfctl_bpf_create(void) 107{ 108 return ecalloc(1, sizeof(npf_bpf_t)); 109} 110 111static void 112fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap) 113{ 114 struct bpf_program *bp = &ctx->prog; 115 116 for (u_int i = start; i < end; i++) { 117 struct bpf_insn *insn = &bp->bf_insns[i]; 118 const u_int fail_off = end - i; 119 120 if (fail_off >= JUMP_MAGIC) { 121 errx(EXIT_FAILURE, "BPF generation error: " 122 "the number of instructions is over the limit"); 123 } 124 if (BPF_CLASS(insn->code) != BPF_JMP) { 125 continue; 126 } 127 if (swap) { 128 uint8_t jt = insn->jt; 129 insn->jt = insn->jf; 130 insn->jf = jt; 131 } 132 if (insn->jt == JUMP_MAGIC) 133 insn->jt = fail_off; 134 if (insn->jf == JUMP_MAGIC) 135 insn->jf = fail_off; 136 } 137} 138 139static void 140add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count) 141{ 142 struct bpf_program *bp = &ctx->prog; 143 size_t offset, len, reqlen; 144 145 /* Note: bf_len is the count of instructions. */ 146 offset = bp->bf_len * sizeof(struct bpf_insn); 147 len = count * sizeof(struct bpf_insn); 148 149 /* Ensure the memory buffer for the program. */ 150 reqlen = ALLOC_ROUND(offset + len); 151 if (reqlen > ctx->alen) { 152 bp->bf_insns = erealloc(bp->bf_insns, reqlen); 153 ctx->alen = reqlen; 154 } 155 156 /* Add the code block. */ 157 memcpy((uint8_t *)bp->bf_insns + offset, insns, len); 158 bp->bf_len += count; 159} 160 161static void 162done_raw_block(npf_bpf_t *ctx, const uint32_t *m, size_t len) 163{ 164 size_t reqlen, nargs = m[1]; 165 166 if ((len / sizeof(uint32_t) - 2) != nargs) { 167 errx(EXIT_FAILURE, "invalid BPF block description"); 168 } 169 reqlen = ALLOC_ROUND(ctx->mlen + len); 170 if (reqlen > ctx->malen) { 171 ctx->marks = erealloc(ctx->marks, reqlen); 172 ctx->malen = reqlen; 173 } 174 memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len); 175 ctx->mlen += len; 176} 177 178static void 179done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len) 180{ 181 done_raw_block(ctx, m, len); 182 ctx->nblocks++; 183} 184 185struct bpf_program * 186npfctl_bpf_complete(npf_bpf_t *ctx) 187{ 188 struct bpf_program *bp = &ctx->prog; 189 const u_int retoff = bp->bf_len; 190 191 /* Add the return fragment (success and failure paths). */ 192 struct bpf_insn insns_ret[] = { 193 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS), 194 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE), 195 }; 196 add_insns(ctx, insns_ret, __arraycount(insns_ret)); 197 198 /* Fixup all jumps to the main failure path. */ 199 fixup_jumps(ctx, 0, retoff, false); 200 201 return &ctx->prog; 202} 203 204const void * 205npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len) 206{ 207 *len = ctx->mlen; 208 return ctx->marks; 209} 210 211void 212npfctl_bpf_destroy(npf_bpf_t *ctx) 213{ 214 free(ctx->prog.bf_insns); 215 free(ctx->marks); 216 free(ctx); 217} 218 219/* 220 * npfctl_bpf_group: begin a logical group. It merely uses logical 221 * disjunction (OR) for compares within the group. 222 */ 223void 224npfctl_bpf_group(npf_bpf_t *ctx) 225{ 226 struct bpf_program *bp = &ctx->prog; 227 228 assert(ctx->goff == 0); 229 assert(ctx->gblock == 0); 230 231 ctx->goff = bp->bf_len; 232 ctx->gblock = ctx->nblocks; 233 ctx->ingroup = true; 234} 235 236void 237npfctl_bpf_endgroup(npf_bpf_t *ctx) 238{ 239 struct bpf_program *bp = &ctx->prog; 240 const size_t curoff = bp->bf_len; 241 242 /* If there are no blocks or only one - nothing to do. */ 243 if ((ctx->nblocks - ctx->gblock) <= 1) { 244 ctx->goff = ctx->gblock = 0; 245 return; 246 } 247 248 /* 249 * Append a failure return as a fall-through i.e. if there is 250 * no match within the group. 251 */ 252 struct bpf_insn insns_ret[] = { 253 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE), 254 }; 255 add_insns(ctx, insns_ret, __arraycount(insns_ret)); 256 257 /* 258 * Adjust jump offsets: on match - jump outside the group i.e. 259 * to the current offset. Otherwise, jump to the next instruction 260 * which would lead to the fall-through code above if none matches. 261 */ 262 fixup_jumps(ctx, ctx->goff, curoff, true); 263 ctx->goff = ctx->gblock = 0; 264} 265 266static void 267fetch_l3(npf_bpf_t *ctx, sa_family_t af, u_int flags) 268{ 269 u_int ver; 270 271 switch (af) { 272 case AF_INET: 273 ver = IPVERSION; 274 break; 275 case AF_INET6: 276 ver = IPV6_VERSION >> 4; 277 break; 278 case AF_UNSPEC: 279 ver = 0; 280 break; 281 default: 282 abort(); 283 } 284 285 /* 286 * Fetch L3 information. The coprocessor populates the following 287 * words in the scratch memory store: 288 * - BPF_MW_IPVER: IP version (4 or 6). 289 * - BPF_MW_L4OFF: L4 header offset. 290 * - BPF_MW_L4PROTO: L4 protocol. 291 */ 292 if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) { 293 const uint8_t jt = ver ? 0 : JUMP_MAGIC; 294 const uint8_t jf = ver ? JUMP_MAGIC : 0; 295 bool ingroup = ctx->ingroup; 296 297 /* 298 * L3 block cannot be inserted in the middle of a group. 299 * In fact, it never is. Check and start the group after. 300 */ 301 if (ingroup) { 302 assert(ctx->nblocks == ctx->gblock); 303 npfctl_bpf_endgroup(ctx); 304 } 305 306 /* 307 * A <- IP version; A == expected-version? 308 * If no particular version specified, check for non-zero. 309 */ 310 struct bpf_insn insns_l3[] = { 311 BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_L3), 312 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf), 313 }; 314 add_insns(ctx, insns_l3, __arraycount(insns_l3)); 315 ctx->flags |= FETCHED_L3; 316 ctx->af = af; 317 318 if (af) { 319 uint32_t mwords[] = { BM_IPVER, 1, af }; 320 done_raw_block(ctx, mwords, sizeof(mwords)); 321 } 322 if (ingroup) { 323 npfctl_bpf_group(ctx); 324 } 325 326 } else if (af && af != ctx->af) { 327 errx(EXIT_FAILURE, "address family mismatch"); 328 } 329 330 if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) { 331 /* X <- IP header length */ 332 struct bpf_insn insns_hlen[] = { 333 BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF), 334 }; 335 add_insns(ctx, insns_hlen, __arraycount(insns_hlen)); 336 ctx->flags |= X_EQ_L4OFF; 337 } 338} 339 340/* 341 * npfctl_bpf_proto: code block to match IP version and L4 protocol. 342 */ 343void 344npfctl_bpf_proto(npf_bpf_t *ctx, sa_family_t af, int proto) 345{ 346 assert(af != AF_UNSPEC || proto != -1); 347 348 /* Note: fails if IP version does not match. */ 349 fetch_l3(ctx, af, 0); 350 if (proto == -1) { 351 return; 352 } 353 354 struct bpf_insn insns_proto[] = { 355 /* A <- L4 protocol; A == expected-protocol? */ 356 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO), 357 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC), 358 }; 359 add_insns(ctx, insns_proto, __arraycount(insns_proto)); 360 361 uint32_t mwords[] = { BM_PROTO, 1, proto }; 362 done_block(ctx, mwords, sizeof(mwords)); 363} 364 365/* 366 * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR. 367 * 368 * => IP address shall be in the network byte order. 369 */ 370void 371npfctl_bpf_cidr(npf_bpf_t *ctx, u_int opts, sa_family_t af, 372 const npf_addr_t *addr, const npf_netmask_t mask) 373{ 374 const uint32_t *awords = (const uint32_t *)addr; 375 u_int nwords, length, maxmask, off; 376 377 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0)); 378 assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK); 379 380 switch (af) { 381 case AF_INET: 382 maxmask = 32; 383 off = (opts & MATCH_SRC) ? 384 offsetof(struct ip, ip_src) : 385 offsetof(struct ip, ip_dst); 386 nwords = sizeof(struct in_addr) / sizeof(uint32_t); 387 break; 388 case AF_INET6: 389 maxmask = 128; 390 off = (opts & MATCH_SRC) ? 391 offsetof(struct ip6_hdr, ip6_src) : 392 offsetof(struct ip6_hdr, ip6_dst); 393 nwords = sizeof(struct in6_addr) / sizeof(uint32_t); 394 break; 395 default: 396 abort(); 397 } 398 399 /* Ensure address family. */ 400 fetch_l3(ctx, af, 0); 401 402 length = (mask == NPF_NO_NETMASK) ? maxmask : mask; 403 404 /* CAUTION: BPF operates in host byte-order. */ 405 for (u_int i = 0; i < nwords; i++) { 406 const u_int woff = i * sizeof(uint32_t); 407 uint32_t word = ntohl(awords[i]); 408 uint32_t wordmask; 409 410 if (length >= 32) { 411 /* The mask is a full word - do not apply it. */ 412 wordmask = 0; 413 length -= 32; 414 } else if (length) { 415 wordmask = 0xffffffff << (maxmask - length); 416 length = 0; 417 } else { 418 /* 419 * The mask is zero - just compare the word 420 * against zero. 421 */ 422 wordmask = 0; 423 word = 0; 424 } 425 426 /* A <- IP address (or one word of it) */ 427 struct bpf_insn insns_ip[] = { 428 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff), 429 }; 430 add_insns(ctx, insns_ip, __arraycount(insns_ip)); 431 432 /* A <- (A & MASK) */ 433 if (wordmask) { 434 struct bpf_insn insns_mask[] = { 435 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask), 436 }; 437 add_insns(ctx, insns_mask, __arraycount(insns_mask)); 438 } 439 440 /* A == expected-IP-word ? */ 441 struct bpf_insn insns_cmp[] = { 442 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC), 443 }; 444 add_insns(ctx, insns_cmp, __arraycount(insns_cmp)); 445 } 446 447 uint32_t mwords[] = { 448 (opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6, 449 af, mask, awords[0], awords[1], awords[2], awords[3], 450 }; 451 done_block(ctx, mwords, sizeof(mwords)); 452} 453 454/* 455 * npfctl_bpf_ports: code block to match TCP/UDP port range. 456 * 457 * => Port numbers shall be in the network byte order. 458 */ 459void 460npfctl_bpf_ports(npf_bpf_t *ctx, u_int opts, in_port_t from, in_port_t to) 461{ 462 const u_int sport_off = offsetof(struct udphdr, uh_sport); 463 const u_int dport_off = offsetof(struct udphdr, uh_dport); 464 u_int off; 465 466 /* TCP and UDP port offsets are the same. */ 467 assert(sport_off == offsetof(struct tcphdr, th_sport)); 468 assert(dport_off == offsetof(struct tcphdr, th_dport)); 469 470 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0)); 471 off = (opts & MATCH_SRC) ? sport_off : dport_off; 472 473 /* X <- IP header length */ 474 fetch_l3(ctx, 0, X_EQ_L4OFF); 475 476 struct bpf_insn insns_fetch[] = { 477 /* A <- port */ 478 BPF_STMT(BPF_LD+BPF_H+BPF_IND, off), 479 }; 480 add_insns(ctx, insns_fetch, __arraycount(insns_fetch)); 481 482 /* CAUTION: BPF operates in host byte-order. */ 483 from = ntohs(from); 484 to = ntohs(to); 485 486 if (from == to) { 487 /* Single port case. */ 488 struct bpf_insn insns_port[] = { 489 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC), 490 }; 491 add_insns(ctx, insns_port, __arraycount(insns_port)); 492 } else { 493 /* Port range case. */ 494 struct bpf_insn insns_range[] = { 495 BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, JUMP_MAGIC), 496 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, JUMP_MAGIC, 0), 497 }; 498 add_insns(ctx, insns_range, __arraycount(insns_range)); 499 } 500 501 uint32_t mwords[] = { 502 opts & MATCH_SRC ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to 503 }; 504 done_block(ctx, mwords, sizeof(mwords)); 505} 506 507/* 508 * npfctl_bpf_tcpfl: code block to match TCP flags. 509 */ 510void 511npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask) 512{ 513 const u_int tcpfl_off = offsetof(struct tcphdr, th_flags); 514 515 /* X <- IP header length */ 516 fetch_l3(ctx, 0, X_EQ_L4OFF); 517 518 struct bpf_insn insns_tf[] = { 519 /* A <- TCP flags */ 520 BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off), 521 }; 522 add_insns(ctx, insns_tf, __arraycount(insns_tf)); 523 524 if (tf_mask != tf) { 525 /* A <- (A & mask) */ 526 struct bpf_insn insns_mask[] = { 527 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask), 528 }; 529 add_insns(ctx, insns_mask, __arraycount(insns_mask)); 530 } 531 532 struct bpf_insn insns_cmp[] = { 533 /* A == expected-TCP-flags? */ 534 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC), 535 }; 536 add_insns(ctx, insns_cmp, __arraycount(insns_cmp)); 537 538 uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask}; 539 done_block(ctx, mwords, sizeof(mwords)); 540} 541 542/* 543 * npfctl_bpf_icmp: code block to match ICMP type and/or code. 544 * Note: suitable both for the ICMPv4 and ICMPv6. 545 */ 546void 547npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code) 548{ 549 const u_int type_off = offsetof(struct icmp, icmp_type); 550 const u_int code_off = offsetof(struct icmp, icmp_code); 551 552 assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off); 553 assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off); 554 assert(type != -1 || code != -1); 555 556 /* X <- IP header length */ 557 fetch_l3(ctx, 0, X_EQ_L4OFF); 558 559 if (type != -1) { 560 struct bpf_insn insns_type[] = { 561 BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off), 562 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC), 563 }; 564 add_insns(ctx, insns_type, __arraycount(insns_type)); 565 566 uint32_t mwords[] = { BM_ICMP_TYPE, 1, type }; 567 done_block(ctx, mwords, sizeof(mwords)); 568 } 569 570 if (code != -1) { 571 struct bpf_insn insns_code[] = { 572 BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off), 573 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC), 574 }; 575 add_insns(ctx, insns_code, __arraycount(insns_code)); 576 577 uint32_t mwords[] = { BM_ICMP_CODE, 1, code }; 578 done_block(ctx, mwords, sizeof(mwords)); 579 } 580} 581 582#define SRC_FLAG_BIT (1U << 31) 583 584/* 585 * npfctl_bpf_table: code block to match source/destination IP address 586 * against NPF table specified by ID. 587 */ 588void 589npfctl_bpf_table(npf_bpf_t *ctx, u_int opts, u_int tid) 590{ 591 const bool src = (opts & MATCH_SRC) != 0; 592 593 struct bpf_insn insns_table[] = { 594 BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid), 595 BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE), 596 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0), 597 }; 598 add_insns(ctx, insns_table, __arraycount(insns_table)); 599 600 uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid }; 601 done_block(ctx, mwords, sizeof(mwords)); 602} 603