ip_fw2.c revision 145093
1/*- 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: head/sys/netinet/ip_fw2.c 145093 2005-04-15 00:47:44Z brooks $ 26 */ 27 28#define DEB(x) 29#define DDB(x) x 30 31/* 32 * Implement IP packet firewall (new version) 33 */ 34 35#if !defined(KLD_MODULE) 36#include "opt_ipfw.h" 37#include "opt_ipdn.h" 38#include "opt_inet.h" 39#include "opt_ipsec.h" 40#ifndef INET 41#error IPFIREWALL requires INET. 42#endif /* INET */ 43#endif 44 45#define IPFW2 1 46#if IPFW2 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/condvar.h> 50#include <sys/malloc.h> 51#include <sys/mbuf.h> 52#include <sys/kernel.h> 53#include <sys/jail.h> 54#include <sys/module.h> 55#include <sys/proc.h> 56#include <sys/socket.h> 57#include <sys/socketvar.h> 58#include <sys/sysctl.h> 59#include <sys/syslog.h> 60#include <sys/ucred.h> 61#include <net/if.h> 62#include <net/radix.h> 63#include <net/route.h> 64#include <netinet/in.h> 65#include <netinet/in_systm.h> 66#include <netinet/in_var.h> 67#include <netinet/in_pcb.h> 68#include <netinet/ip.h> 69#include <netinet/ip_var.h> 70#include <netinet/ip_icmp.h> 71#include <netinet/ip_fw.h> 72#include <netinet/ip_divert.h> 73#include <netinet/ip_dummynet.h> 74#include <netinet/tcp.h> 75#include <netinet/tcp_timer.h> 76#include <netinet/tcp_var.h> 77#include <netinet/tcpip.h> 78#include <netinet/udp.h> 79#include <netinet/udp_var.h> 80 81#include <netgraph/ng_ipfw.h> 82 83#include <altq/if_altq.h> 84 85#ifdef IPSEC 86#include <netinet6/ipsec.h> 87#endif 88 89#include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */ 90 91#include <machine/in_cksum.h> /* XXX for in_cksum */ 92 93/* 94 * set_disable contains one bit per set value (0..31). 95 * If the bit is set, all rules with the corresponding set 96 * are disabled. Set RESVD_SET(31) is reserved for the default rule 97 * and rules that are not deleted by the flush command, 98 * and CANNOT be disabled. 99 * Rules in set RESVD_SET can only be deleted explicitly. 100 */ 101static u_int32_t set_disable; 102 103static int fw_verbose; 104static int verbose_limit; 105 106static struct callout ipfw_timeout; 107static uma_zone_t ipfw_dyn_rule_zone; 108#define IPFW_DEFAULT_RULE 65535 109 110/* 111 * Data structure to cache our ucred related 112 * information. This structure only gets used if 113 * the user specified UID/GID based constraints in 114 * a firewall rule. 115 */ 116struct ip_fw_ugid { 117 gid_t fw_groups[NGROUPS]; 118 int fw_ngroups; 119 uid_t fw_uid; 120 int fw_prid; 121}; 122 123struct ip_fw_chain { 124 struct ip_fw *rules; /* list of rules */ 125 struct ip_fw *reap; /* list of rules to reap */ 126 struct mtx mtx; /* lock guarding rule list */ 127 int busy_count; /* busy count for rw locks */ 128 int want_write; 129 struct cv cv; 130}; 131#define IPFW_LOCK_INIT(_chain) \ 132 mtx_init(&(_chain)->mtx, "IPFW static rules", NULL, \ 133 MTX_DEF | MTX_RECURSE) 134#define IPFW_LOCK_DESTROY(_chain) mtx_destroy(&(_chain)->mtx) 135#define IPFW_WLOCK_ASSERT(_chain) do { \ 136 mtx_assert(&(_chain)->mtx, MA_OWNED); \ 137 NET_ASSERT_GIANT(); \ 138} while (0) 139 140static __inline void 141IPFW_RLOCK(struct ip_fw_chain *chain) 142{ 143 mtx_lock(&chain->mtx); 144 chain->busy_count++; 145 mtx_unlock(&chain->mtx); 146} 147 148static __inline void 149IPFW_RUNLOCK(struct ip_fw_chain *chain) 150{ 151 mtx_lock(&chain->mtx); 152 chain->busy_count--; 153 if (chain->busy_count == 0 && chain->want_write) 154 cv_signal(&chain->cv); 155 mtx_unlock(&chain->mtx); 156} 157 158static __inline void 159IPFW_WLOCK(struct ip_fw_chain *chain) 160{ 161 mtx_lock(&chain->mtx); 162 chain->want_write++; 163 while (chain->busy_count > 0) 164 cv_wait(&chain->cv, &chain->mtx); 165} 166 167static __inline void 168IPFW_WUNLOCK(struct ip_fw_chain *chain) 169{ 170 chain->want_write--; 171 cv_signal(&chain->cv); 172 mtx_unlock(&chain->mtx); 173} 174 175/* 176 * list of rules for layer 3 177 */ 178static struct ip_fw_chain layer3_chain; 179 180MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); 181MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); 182 183struct table_entry { 184 struct radix_node rn[2]; 185 struct sockaddr_in addr, mask; 186 u_int32_t value; 187}; 188 189#define IPFW_TABLES_MAX 128 190static struct { 191 struct radix_node_head *rnh; 192 int modified; 193} ipfw_tables[IPFW_TABLES_MAX]; 194 195static int fw_debug = 1; 196static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ 197 198#ifdef SYSCTL_NODE 199SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 200SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, enable, 201 CTLFLAG_RW | CTLFLAG_SECURE3, 202 &fw_enable, 0, "Enable ipfw"); 203SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW, 204 &autoinc_step, 0, "Rule number autincrement step"); 205SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass, 206 CTLFLAG_RW | CTLFLAG_SECURE3, 207 &fw_one_pass, 0, 208 "Only do a single pass through ipfw when using dummynet(4)"); 209SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, 210 &fw_debug, 0, "Enable printing of debug ip_fw statements"); 211SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, 212 CTLFLAG_RW | CTLFLAG_SECURE3, 213 &fw_verbose, 0, "Log matches to ipfw rules"); 214SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, 215 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); 216 217/* 218 * Description of dynamic rules. 219 * 220 * Dynamic rules are stored in lists accessed through a hash table 221 * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can 222 * be modified through the sysctl variable dyn_buckets which is 223 * updated when the table becomes empty. 224 * 225 * XXX currently there is only one list, ipfw_dyn. 226 * 227 * When a packet is received, its address fields are first masked 228 * with the mask defined for the rule, then hashed, then matched 229 * against the entries in the corresponding list. 230 * Dynamic rules can be used for different purposes: 231 * + stateful rules; 232 * + enforcing limits on the number of sessions; 233 * + in-kernel NAT (not implemented yet) 234 * 235 * The lifetime of dynamic rules is regulated by dyn_*_lifetime, 236 * measured in seconds and depending on the flags. 237 * 238 * The total number of dynamic rules is stored in dyn_count. 239 * The max number of dynamic rules is dyn_max. When we reach 240 * the maximum number of rules we do not create anymore. This is 241 * done to avoid consuming too much memory, but also too much 242 * time when searching on each packet (ideally, we should try instead 243 * to put a limit on the length of the list on each bucket...). 244 * 245 * Each dynamic rule holds a pointer to the parent ipfw rule so 246 * we know what action to perform. Dynamic rules are removed when 247 * the parent rule is deleted. XXX we should make them survive. 248 * 249 * There are some limitations with dynamic rules -- we do not 250 * obey the 'randomized match', and we do not do multiple 251 * passes through the firewall. XXX check the latter!!! 252 */ 253static ipfw_dyn_rule **ipfw_dyn_v = NULL; 254static u_int32_t dyn_buckets = 256; /* must be power of 2 */ 255static u_int32_t curr_dyn_buckets = 256; /* must be power of 2 */ 256 257static struct mtx ipfw_dyn_mtx; /* mutex guarding dynamic rules */ 258#define IPFW_DYN_LOCK_INIT() \ 259 mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF) 260#define IPFW_DYN_LOCK_DESTROY() mtx_destroy(&ipfw_dyn_mtx) 261#define IPFW_DYN_LOCK() mtx_lock(&ipfw_dyn_mtx) 262#define IPFW_DYN_UNLOCK() mtx_unlock(&ipfw_dyn_mtx) 263#define IPFW_DYN_LOCK_ASSERT() mtx_assert(&ipfw_dyn_mtx, MA_OWNED) 264 265/* 266 * Timeouts for various events in handing dynamic rules. 267 */ 268static u_int32_t dyn_ack_lifetime = 300; 269static u_int32_t dyn_syn_lifetime = 20; 270static u_int32_t dyn_fin_lifetime = 1; 271static u_int32_t dyn_rst_lifetime = 1; 272static u_int32_t dyn_udp_lifetime = 10; 273static u_int32_t dyn_short_lifetime = 5; 274 275/* 276 * Keepalives are sent if dyn_keepalive is set. They are sent every 277 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 278 * seconds of lifetime of a rule. 279 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower 280 * than dyn_keepalive_period. 281 */ 282 283static u_int32_t dyn_keepalive_interval = 20; 284static u_int32_t dyn_keepalive_period = 5; 285static u_int32_t dyn_keepalive = 1; /* do send keepalives */ 286 287static u_int32_t static_count; /* # of static rules */ 288static u_int32_t static_len; /* size in bytes of static rules */ 289static u_int32_t dyn_count; /* # of dynamic rules */ 290static u_int32_t dyn_max = 4096; /* max # of dynamic rules */ 291 292SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW, 293 &dyn_buckets, 0, "Number of dyn. buckets"); 294SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD, 295 &curr_dyn_buckets, 0, "Current Number of dyn. buckets"); 296SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD, 297 &dyn_count, 0, "Number of dyn. rules"); 298SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW, 299 &dyn_max, 0, "Max number of dyn. rules"); 300SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, 301 &static_count, 0, "Number of static rules"); 302SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, 303 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); 304SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, 305 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); 306SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, 307 &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin"); 308SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, 309 &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst"); 310SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, 311 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); 312SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, 313 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); 314SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, 315 &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); 316 317#endif /* SYSCTL_NODE */ 318 319 320/* 321 * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T 322 * Other macros just cast void * into the appropriate type 323 */ 324#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) 325#define TCP(p) ((struct tcphdr *)(p)) 326#define UDP(p) ((struct udphdr *)(p)) 327#define ICMP(p) ((struct icmp *)(p)) 328 329static __inline int 330icmptype_match(struct icmp *icmp, ipfw_insn_u32 *cmd) 331{ 332 int type = icmp->icmp_type; 333 334 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) ); 335} 336 337#define TT ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \ 338 (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) ) 339 340static int 341is_icmp_query(struct icmp *icmp) 342{ 343 int type = icmp->icmp_type; 344 345 return (type <= ICMP_MAXTYPE && (TT & (1<<type)) ); 346} 347#undef TT 348 349/* 350 * The following checks use two arrays of 8 or 16 bits to store the 351 * bits that we want set or clear, respectively. They are in the 352 * low and high half of cmd->arg1 or cmd->d[0]. 353 * 354 * We scan options and store the bits we find set. We succeed if 355 * 356 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 357 * 358 * The code is sometimes optimized not to store additional variables. 359 */ 360 361static int 362flags_match(ipfw_insn *cmd, u_int8_t bits) 363{ 364 u_char want_clear; 365 bits = ~bits; 366 367 if ( ((cmd->arg1 & 0xff) & bits) != 0) 368 return 0; /* some bits we want set were clear */ 369 want_clear = (cmd->arg1 >> 8) & 0xff; 370 if ( (want_clear & bits) != want_clear) 371 return 0; /* some bits we want clear were set */ 372 return 1; 373} 374 375static int 376ipopts_match(struct ip *ip, ipfw_insn *cmd) 377{ 378 int optlen, bits = 0; 379 u_char *cp = (u_char *)(ip + 1); 380 int x = (ip->ip_hl << 2) - sizeof (struct ip); 381 382 for (; x > 0; x -= optlen, cp += optlen) { 383 int opt = cp[IPOPT_OPTVAL]; 384 385 if (opt == IPOPT_EOL) 386 break; 387 if (opt == IPOPT_NOP) 388 optlen = 1; 389 else { 390 optlen = cp[IPOPT_OLEN]; 391 if (optlen <= 0 || optlen > x) 392 return 0; /* invalid or truncated */ 393 } 394 switch (opt) { 395 396 default: 397 break; 398 399 case IPOPT_LSRR: 400 bits |= IP_FW_IPOPT_LSRR; 401 break; 402 403 case IPOPT_SSRR: 404 bits |= IP_FW_IPOPT_SSRR; 405 break; 406 407 case IPOPT_RR: 408 bits |= IP_FW_IPOPT_RR; 409 break; 410 411 case IPOPT_TS: 412 bits |= IP_FW_IPOPT_TS; 413 break; 414 } 415 } 416 return (flags_match(cmd, bits)); 417} 418 419static int 420tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) 421{ 422 int optlen, bits = 0; 423 u_char *cp = (u_char *)(tcp + 1); 424 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 425 426 for (; x > 0; x -= optlen, cp += optlen) { 427 int opt = cp[0]; 428 if (opt == TCPOPT_EOL) 429 break; 430 if (opt == TCPOPT_NOP) 431 optlen = 1; 432 else { 433 optlen = cp[1]; 434 if (optlen <= 0) 435 break; 436 } 437 438 switch (opt) { 439 440 default: 441 break; 442 443 case TCPOPT_MAXSEG: 444 bits |= IP_FW_TCPOPT_MSS; 445 break; 446 447 case TCPOPT_WINDOW: 448 bits |= IP_FW_TCPOPT_WINDOW; 449 break; 450 451 case TCPOPT_SACK_PERMITTED: 452 case TCPOPT_SACK: 453 bits |= IP_FW_TCPOPT_SACK; 454 break; 455 456 case TCPOPT_TIMESTAMP: 457 bits |= IP_FW_TCPOPT_TS; 458 break; 459 460 } 461 } 462 return (flags_match(cmd, bits)); 463} 464 465static int 466iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) 467{ 468 if (ifp == NULL) /* no iface with this packet, match fails */ 469 return 0; 470 /* Check by name or by IP address */ 471 if (cmd->name[0] != '\0') { /* match by name */ 472 /* Check name */ 473 if (cmd->p.glob) { 474 if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) 475 return(1); 476 } else { 477 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 478 return(1); 479 } 480 } else { 481 struct ifaddr *ia; 482 483 /* XXX lock? */ 484 TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { 485 if (ia->ifa_addr == NULL) 486 continue; 487 if (ia->ifa_addr->sa_family != AF_INET) 488 continue; 489 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 490 (ia->ifa_addr))->sin_addr.s_addr) 491 return(1); /* match */ 492 } 493 } 494 return(0); /* no match, fail ... */ 495} 496 497/* 498 * The verify_path function checks if a route to the src exists and 499 * if it is reachable via ifp (when provided). 500 * 501 * The 'verrevpath' option checks that the interface that an IP packet 502 * arrives on is the same interface that traffic destined for the 503 * packet's source address would be routed out of. The 'versrcreach' 504 * option just checks that the source address is reachable via any route 505 * (except default) in the routing table. These two are a measure to block 506 * forged packets. This is also commonly known as "anti-spoofing" or Unicast 507 * Reverse Path Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs 508 * is purposely reminiscent of the Cisco IOS command, 509 * 510 * ip verify unicast reverse-path 511 * ip verify unicast source reachable-via any 512 * 513 * which implements the same functionality. But note that syntax is 514 * misleading. The check may be performed on all IP packets whether unicast, 515 * multicast, or broadcast. 516 */ 517static int 518verify_path(struct in_addr src, struct ifnet *ifp) 519{ 520 struct route ro; 521 struct sockaddr_in *dst; 522 523 bzero(&ro, sizeof(ro)); 524 525 dst = (struct sockaddr_in *)&(ro.ro_dst); 526 dst->sin_family = AF_INET; 527 dst->sin_len = sizeof(*dst); 528 dst->sin_addr = src; 529 rtalloc_ign(&ro, RTF_CLONING); 530 531 if (ro.ro_rt == NULL) 532 return 0; 533 534 /* if ifp is provided, check for equality with rtentry */ 535 if (ifp != NULL && ro.ro_rt->rt_ifp != ifp) { 536 RTFREE(ro.ro_rt); 537 return 0; 538 } 539 540 /* if no ifp provided, check if rtentry is not default route */ 541 if (ifp == NULL && 542 satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) { 543 RTFREE(ro.ro_rt); 544 return 0; 545 } 546 547 /* or if this is a blackhole/reject route */ 548 if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 549 RTFREE(ro.ro_rt); 550 return 0; 551 } 552 553 /* found valid route */ 554 RTFREE(ro.ro_rt); 555 return 1; 556} 557 558 559static u_int64_t norule_counter; /* counter for ipfw_log(NULL...) */ 560 561#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 562#define SNP(buf) buf, sizeof(buf) 563 564/* 565 * We enter here when we have a rule with O_LOG. 566 * XXX this function alone takes about 2Kbytes of code! 567 */ 568static void 569ipfw_log(struct ip_fw *f, u_int hlen, struct ether_header *eh, 570 struct mbuf *m, struct ifnet *oif) 571{ 572 char *action; 573 int limit_reached = 0; 574 char action2[40], proto[48], fragment[28]; 575 576 fragment[0] = '\0'; 577 proto[0] = '\0'; 578 579 if (f == NULL) { /* bogus pkt */ 580 if (verbose_limit != 0 && norule_counter >= verbose_limit) 581 return; 582 norule_counter++; 583 if (norule_counter == verbose_limit) 584 limit_reached = verbose_limit; 585 action = "Refuse"; 586 } else { /* O_LOG is the first action, find the real one */ 587 ipfw_insn *cmd = ACTION_PTR(f); 588 ipfw_insn_log *l = (ipfw_insn_log *)cmd; 589 590 if (l->max_log != 0 && l->log_left == 0) 591 return; 592 l->log_left--; 593 if (l->log_left == 0) 594 limit_reached = l->max_log; 595 cmd += F_LEN(cmd); /* point to first action */ 596 if (cmd->opcode == O_ALTQ) { 597 ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; 598 599 snprintf(SNPARGS(action2, 0), "Altq %d", 600 altq->qid); 601 cmd += F_LEN(cmd); 602 } 603 if (cmd->opcode == O_PROB) 604 cmd += F_LEN(cmd); 605 606 action = action2; 607 switch (cmd->opcode) { 608 case O_DENY: 609 action = "Deny"; 610 break; 611 612 case O_REJECT: 613 if (cmd->arg1==ICMP_REJECT_RST) 614 action = "Reset"; 615 else if (cmd->arg1==ICMP_UNREACH_HOST) 616 action = "Reject"; 617 else 618 snprintf(SNPARGS(action2, 0), "Unreach %d", 619 cmd->arg1); 620 break; 621 622 case O_ACCEPT: 623 action = "Accept"; 624 break; 625 case O_COUNT: 626 action = "Count"; 627 break; 628 case O_DIVERT: 629 snprintf(SNPARGS(action2, 0), "Divert %d", 630 cmd->arg1); 631 break; 632 case O_TEE: 633 snprintf(SNPARGS(action2, 0), "Tee %d", 634 cmd->arg1); 635 break; 636 case O_SKIPTO: 637 snprintf(SNPARGS(action2, 0), "SkipTo %d", 638 cmd->arg1); 639 break; 640 case O_PIPE: 641 snprintf(SNPARGS(action2, 0), "Pipe %d", 642 cmd->arg1); 643 break; 644 case O_QUEUE: 645 snprintf(SNPARGS(action2, 0), "Queue %d", 646 cmd->arg1); 647 break; 648 case O_FORWARD_IP: { 649 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; 650 int len; 651 652 len = snprintf(SNPARGS(action2, 0), "Forward to %s", 653 inet_ntoa(sa->sa.sin_addr)); 654 if (sa->sa.sin_port) 655 snprintf(SNPARGS(action2, len), ":%d", 656 sa->sa.sin_port); 657 } 658 break; 659 case O_NETGRAPH: 660 snprintf(SNPARGS(action2, 0), "Netgraph %d", 661 cmd->arg1); 662 break; 663 case O_NGTEE: 664 snprintf(SNPARGS(action2, 0), "Ngtee %d", 665 cmd->arg1); 666 break; 667 default: 668 action = "UNKNOWN"; 669 break; 670 } 671 } 672 673 if (hlen == 0) { /* non-ip */ 674 snprintf(SNPARGS(proto, 0), "MAC"); 675 } else { 676 struct ip *ip = mtod(m, struct ip *); 677 /* these three are all aliases to the same thing */ 678 struct icmp *const icmp = L3HDR(struct icmp, ip); 679 struct tcphdr *const tcp = (struct tcphdr *)icmp; 680 struct udphdr *const udp = (struct udphdr *)icmp; 681 682 int ip_off, offset, ip_len; 683 684 int len; 685 686 if (eh != NULL) { /* layer 2 packets are as on the wire */ 687 ip_off = ntohs(ip->ip_off); 688 ip_len = ntohs(ip->ip_len); 689 } else { 690 ip_off = ip->ip_off; 691 ip_len = ip->ip_len; 692 } 693 offset = ip_off & IP_OFFMASK; 694 switch (ip->ip_p) { 695 case IPPROTO_TCP: 696 len = snprintf(SNPARGS(proto, 0), "TCP %s", 697 inet_ntoa(ip->ip_src)); 698 if (offset == 0) 699 snprintf(SNPARGS(proto, len), ":%d %s:%d", 700 ntohs(tcp->th_sport), 701 inet_ntoa(ip->ip_dst), 702 ntohs(tcp->th_dport)); 703 else 704 snprintf(SNPARGS(proto, len), " %s", 705 inet_ntoa(ip->ip_dst)); 706 break; 707 708 case IPPROTO_UDP: 709 len = snprintf(SNPARGS(proto, 0), "UDP %s", 710 inet_ntoa(ip->ip_src)); 711 if (offset == 0) 712 snprintf(SNPARGS(proto, len), ":%d %s:%d", 713 ntohs(udp->uh_sport), 714 inet_ntoa(ip->ip_dst), 715 ntohs(udp->uh_dport)); 716 else 717 snprintf(SNPARGS(proto, len), " %s", 718 inet_ntoa(ip->ip_dst)); 719 break; 720 721 case IPPROTO_ICMP: 722 if (offset == 0) 723 len = snprintf(SNPARGS(proto, 0), 724 "ICMP:%u.%u ", 725 icmp->icmp_type, icmp->icmp_code); 726 else 727 len = snprintf(SNPARGS(proto, 0), "ICMP "); 728 len += snprintf(SNPARGS(proto, len), "%s", 729 inet_ntoa(ip->ip_src)); 730 snprintf(SNPARGS(proto, len), " %s", 731 inet_ntoa(ip->ip_dst)); 732 break; 733 734 default: 735 len = snprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, 736 inet_ntoa(ip->ip_src)); 737 snprintf(SNPARGS(proto, len), " %s", 738 inet_ntoa(ip->ip_dst)); 739 break; 740 } 741 742 if (ip_off & (IP_MF | IP_OFFMASK)) 743 snprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", 744 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), 745 offset << 3, 746 (ip_off & IP_MF) ? "+" : ""); 747 } 748 if (oif || m->m_pkthdr.rcvif) 749 log(LOG_SECURITY | LOG_INFO, 750 "ipfw: %d %s %s %s via %s%s\n", 751 f ? f->rulenum : -1, 752 action, proto, oif ? "out" : "in", 753 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, 754 fragment); 755 else 756 log(LOG_SECURITY | LOG_INFO, 757 "ipfw: %d %s %s [no if info]%s\n", 758 f ? f->rulenum : -1, 759 action, proto, fragment); 760 if (limit_reached) 761 log(LOG_SECURITY | LOG_NOTICE, 762 "ipfw: limit %d reached on entry %d\n", 763 limit_reached, f ? f->rulenum : -1); 764} 765 766/* 767 * IMPORTANT: the hash function for dynamic rules must be commutative 768 * in source and destination (ip,port), because rules are bidirectional 769 * and we want to find both in the same bucket. 770 */ 771static __inline int 772hash_packet(struct ipfw_flow_id *id) 773{ 774 u_int32_t i; 775 776 i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); 777 i &= (curr_dyn_buckets - 1); 778 return i; 779} 780 781/** 782 * unlink a dynamic rule from a chain. prev is a pointer to 783 * the previous one, q is a pointer to the rule to delete, 784 * head is a pointer to the head of the queue. 785 * Modifies q and potentially also head. 786 */ 787#define UNLINK_DYN_RULE(prev, head, q) { \ 788 ipfw_dyn_rule *old_q = q; \ 789 \ 790 /* remove a refcount to the parent */ \ 791 if (q->dyn_type == O_LIMIT) \ 792 q->parent->count--; \ 793 DEB(printf("ipfw: unlink entry 0x%08x %d -> 0x%08x %d, %d left\n",\ 794 (q->id.src_ip), (q->id.src_port), \ 795 (q->id.dst_ip), (q->id.dst_port), dyn_count-1 ); ) \ 796 if (prev != NULL) \ 797 prev->next = q = q->next; \ 798 else \ 799 head = q = q->next; \ 800 dyn_count--; \ 801 uma_zfree(ipfw_dyn_rule_zone, old_q); } 802 803#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) 804 805/** 806 * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. 807 * 808 * If keep_me == NULL, rules are deleted even if not expired, 809 * otherwise only expired rules are removed. 810 * 811 * The value of the second parameter is also used to point to identify 812 * a rule we absolutely do not want to remove (e.g. because we are 813 * holding a reference to it -- this is the case with O_LIMIT_PARENT 814 * rules). The pointer is only used for comparison, so any non-null 815 * value will do. 816 */ 817static void 818remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) 819{ 820 static u_int32_t last_remove = 0; 821 822#define FORCE (keep_me == NULL) 823 824 ipfw_dyn_rule *prev, *q; 825 int i, pass = 0, max_pass = 0; 826 827 IPFW_DYN_LOCK_ASSERT(); 828 829 if (ipfw_dyn_v == NULL || dyn_count == 0) 830 return; 831 /* do not expire more than once per second, it is useless */ 832 if (!FORCE && last_remove == time_second) 833 return; 834 last_remove = time_second; 835 836 /* 837 * because O_LIMIT refer to parent rules, during the first pass only 838 * remove child and mark any pending LIMIT_PARENT, and remove 839 * them in a second pass. 840 */ 841next_pass: 842 for (i = 0 ; i < curr_dyn_buckets ; i++) { 843 for (prev=NULL, q = ipfw_dyn_v[i] ; q ; ) { 844 /* 845 * Logic can become complex here, so we split tests. 846 */ 847 if (q == keep_me) 848 goto next; 849 if (rule != NULL && rule != q->rule) 850 goto next; /* not the one we are looking for */ 851 if (q->dyn_type == O_LIMIT_PARENT) { 852 /* 853 * handle parent in the second pass, 854 * record we need one. 855 */ 856 max_pass = 1; 857 if (pass == 0) 858 goto next; 859 if (FORCE && q->count != 0 ) { 860 /* XXX should not happen! */ 861 printf("ipfw: OUCH! cannot remove rule," 862 " count %d\n", q->count); 863 } 864 } else { 865 if (!FORCE && 866 !TIME_LEQ( q->expire, time_second )) 867 goto next; 868 } 869 if (q->dyn_type != O_LIMIT_PARENT || !q->count) { 870 UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); 871 continue; 872 } 873next: 874 prev=q; 875 q=q->next; 876 } 877 } 878 if (pass++ < max_pass) 879 goto next_pass; 880} 881 882 883/** 884 * lookup a dynamic rule. 885 */ 886static ipfw_dyn_rule * 887lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction, 888 struct tcphdr *tcp) 889{ 890 /* 891 * stateful ipfw extensions. 892 * Lookup into dynamic session queue 893 */ 894#define MATCH_REVERSE 0 895#define MATCH_FORWARD 1 896#define MATCH_NONE 2 897#define MATCH_UNKNOWN 3 898 int i, dir = MATCH_NONE; 899 ipfw_dyn_rule *prev, *q=NULL; 900 901 IPFW_DYN_LOCK_ASSERT(); 902 903 if (ipfw_dyn_v == NULL) 904 goto done; /* not found */ 905 i = hash_packet( pkt ); 906 for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) { 907 if (q->dyn_type == O_LIMIT_PARENT && q->count) 908 goto next; 909 if (TIME_LEQ( q->expire, time_second)) { /* expire entry */ 910 UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); 911 continue; 912 } 913 if (pkt->proto == q->id.proto && 914 q->dyn_type != O_LIMIT_PARENT) { 915 if (pkt->src_ip == q->id.src_ip && 916 pkt->dst_ip == q->id.dst_ip && 917 pkt->src_port == q->id.src_port && 918 pkt->dst_port == q->id.dst_port ) { 919 dir = MATCH_FORWARD; 920 break; 921 } 922 if (pkt->src_ip == q->id.dst_ip && 923 pkt->dst_ip == q->id.src_ip && 924 pkt->src_port == q->id.dst_port && 925 pkt->dst_port == q->id.src_port ) { 926 dir = MATCH_REVERSE; 927 break; 928 } 929 } 930next: 931 prev = q; 932 q = q->next; 933 } 934 if (q == NULL) 935 goto done; /* q = NULL, not found */ 936 937 if ( prev != NULL) { /* found and not in front */ 938 prev->next = q->next; 939 q->next = ipfw_dyn_v[i]; 940 ipfw_dyn_v[i] = q; 941 } 942 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 943 u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); 944 945#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 946#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 947 q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); 948 switch (q->state) { 949 case TH_SYN: /* opening */ 950 q->expire = time_second + dyn_syn_lifetime; 951 break; 952 953 case BOTH_SYN: /* move to established */ 954 case BOTH_SYN | TH_FIN : /* one side tries to close */ 955 case BOTH_SYN | (TH_FIN << 8) : 956 if (tcp) { 957#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) 958 u_int32_t ack = ntohl(tcp->th_ack); 959 if (dir == MATCH_FORWARD) { 960 if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) 961 q->ack_fwd = ack; 962 else { /* ignore out-of-sequence */ 963 break; 964 } 965 } else { 966 if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) 967 q->ack_rev = ack; 968 else { /* ignore out-of-sequence */ 969 break; 970 } 971 } 972 } 973 q->expire = time_second + dyn_ack_lifetime; 974 break; 975 976 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 977 if (dyn_fin_lifetime >= dyn_keepalive_period) 978 dyn_fin_lifetime = dyn_keepalive_period - 1; 979 q->expire = time_second + dyn_fin_lifetime; 980 break; 981 982 default: 983#if 0 984 /* 985 * reset or some invalid combination, but can also 986 * occur if we use keep-state the wrong way. 987 */ 988 if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) 989 printf("invalid state: 0x%x\n", q->state); 990#endif 991 if (dyn_rst_lifetime >= dyn_keepalive_period) 992 dyn_rst_lifetime = dyn_keepalive_period - 1; 993 q->expire = time_second + dyn_rst_lifetime; 994 break; 995 } 996 } else if (pkt->proto == IPPROTO_UDP) { 997 q->expire = time_second + dyn_udp_lifetime; 998 } else { 999 /* other protocols */ 1000 q->expire = time_second + dyn_short_lifetime; 1001 } 1002done: 1003 if (match_direction) 1004 *match_direction = dir; 1005 return q; 1006} 1007 1008static ipfw_dyn_rule * 1009lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, 1010 struct tcphdr *tcp) 1011{ 1012 ipfw_dyn_rule *q; 1013 1014 IPFW_DYN_LOCK(); 1015 q = lookup_dyn_rule_locked(pkt, match_direction, tcp); 1016 if (q == NULL) 1017 IPFW_DYN_UNLOCK(); 1018 /* NB: return table locked when q is not NULL */ 1019 return q; 1020} 1021 1022static void 1023realloc_dynamic_table(void) 1024{ 1025 IPFW_DYN_LOCK_ASSERT(); 1026 1027 /* 1028 * Try reallocation, make sure we have a power of 2 and do 1029 * not allow more than 64k entries. In case of overflow, 1030 * default to 1024. 1031 */ 1032 1033 if (dyn_buckets > 65536) 1034 dyn_buckets = 1024; 1035 if ((dyn_buckets & (dyn_buckets-1)) != 0) { /* not a power of 2 */ 1036 dyn_buckets = curr_dyn_buckets; /* reset */ 1037 return; 1038 } 1039 curr_dyn_buckets = dyn_buckets; 1040 if (ipfw_dyn_v != NULL) 1041 free(ipfw_dyn_v, M_IPFW); 1042 for (;;) { 1043 ipfw_dyn_v = malloc(curr_dyn_buckets * sizeof(ipfw_dyn_rule *), 1044 M_IPFW, M_NOWAIT | M_ZERO); 1045 if (ipfw_dyn_v != NULL || curr_dyn_buckets <= 2) 1046 break; 1047 curr_dyn_buckets /= 2; 1048 } 1049} 1050 1051/** 1052 * Install state of type 'type' for a dynamic session. 1053 * The hash table contains two type of rules: 1054 * - regular rules (O_KEEP_STATE) 1055 * - rules for sessions with limited number of sess per user 1056 * (O_LIMIT). When they are created, the parent is 1057 * increased by 1, and decreased on delete. In this case, 1058 * the third parameter is the parent rule and not the chain. 1059 * - "parent" rules for the above (O_LIMIT_PARENT). 1060 */ 1061static ipfw_dyn_rule * 1062add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) 1063{ 1064 ipfw_dyn_rule *r; 1065 int i; 1066 1067 IPFW_DYN_LOCK_ASSERT(); 1068 1069 if (ipfw_dyn_v == NULL || 1070 (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) { 1071 realloc_dynamic_table(); 1072 if (ipfw_dyn_v == NULL) 1073 return NULL; /* failed ! */ 1074 } 1075 i = hash_packet(id); 1076 1077 r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO); 1078 if (r == NULL) { 1079 printf ("ipfw: sorry cannot allocate state\n"); 1080 return NULL; 1081 } 1082 1083 /* increase refcount on parent, and set pointer */ 1084 if (dyn_type == O_LIMIT) { 1085 ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; 1086 if ( parent->dyn_type != O_LIMIT_PARENT) 1087 panic("invalid parent"); 1088 parent->count++; 1089 r->parent = parent; 1090 rule = parent->rule; 1091 } 1092 1093 r->id = *id; 1094 r->expire = time_second + dyn_syn_lifetime; 1095 r->rule = rule; 1096 r->dyn_type = dyn_type; 1097 r->pcnt = r->bcnt = 0; 1098 r->count = 0; 1099 1100 r->bucket = i; 1101 r->next = ipfw_dyn_v[i]; 1102 ipfw_dyn_v[i] = r; 1103 dyn_count++; 1104 DEB(printf("ipfw: add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n", 1105 dyn_type, 1106 (r->id.src_ip), (r->id.src_port), 1107 (r->id.dst_ip), (r->id.dst_port), 1108 dyn_count ); ) 1109 return r; 1110} 1111 1112/** 1113 * lookup dynamic parent rule using pkt and rule as search keys. 1114 * If the lookup fails, then install one. 1115 */ 1116static ipfw_dyn_rule * 1117lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) 1118{ 1119 ipfw_dyn_rule *q; 1120 int i; 1121 1122 IPFW_DYN_LOCK_ASSERT(); 1123 1124 if (ipfw_dyn_v) { 1125 i = hash_packet( pkt ); 1126 for (q = ipfw_dyn_v[i] ; q != NULL ; q=q->next) 1127 if (q->dyn_type == O_LIMIT_PARENT && 1128 rule== q->rule && 1129 pkt->proto == q->id.proto && 1130 pkt->src_ip == q->id.src_ip && 1131 pkt->dst_ip == q->id.dst_ip && 1132 pkt->src_port == q->id.src_port && 1133 pkt->dst_port == q->id.dst_port) { 1134 q->expire = time_second + dyn_short_lifetime; 1135 DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);) 1136 return q; 1137 } 1138 } 1139 return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); 1140} 1141 1142/** 1143 * Install dynamic state for rule type cmd->o.opcode 1144 * 1145 * Returns 1 (failure) if state is not installed because of errors or because 1146 * session limitations are enforced. 1147 */ 1148static int 1149install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, 1150 struct ip_fw_args *args) 1151{ 1152 static int last_log; 1153 1154 ipfw_dyn_rule *q; 1155 1156 DEB(printf("ipfw: install state type %d 0x%08x %u -> 0x%08x %u\n", 1157 cmd->o.opcode, 1158 (args->f_id.src_ip), (args->f_id.src_port), 1159 (args->f_id.dst_ip), (args->f_id.dst_port) );) 1160 1161 IPFW_DYN_LOCK(); 1162 1163 q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL); 1164 1165 if (q != NULL) { /* should never occur */ 1166 if (last_log != time_second) { 1167 last_log = time_second; 1168 printf("ipfw: install_state: entry already present, done\n"); 1169 } 1170 IPFW_DYN_UNLOCK(); 1171 return 0; 1172 } 1173 1174 if (dyn_count >= dyn_max) 1175 /* 1176 * Run out of slots, try to remove any expired rule. 1177 */ 1178 remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); 1179 1180 if (dyn_count >= dyn_max) { 1181 if (last_log != time_second) { 1182 last_log = time_second; 1183 printf("ipfw: install_state: Too many dynamic rules\n"); 1184 } 1185 IPFW_DYN_UNLOCK(); 1186 return 1; /* cannot install, notify caller */ 1187 } 1188 1189 switch (cmd->o.opcode) { 1190 case O_KEEP_STATE: /* bidir rule */ 1191 add_dyn_rule(&args->f_id, O_KEEP_STATE, rule); 1192 break; 1193 1194 case O_LIMIT: /* limit number of sessions */ 1195 { 1196 u_int16_t limit_mask = cmd->limit_mask; 1197 struct ipfw_flow_id id; 1198 ipfw_dyn_rule *parent; 1199 1200 DEB(printf("ipfw: installing dyn-limit rule %d\n", 1201 cmd->conn_limit);) 1202 1203 id.dst_ip = id.src_ip = 0; 1204 id.dst_port = id.src_port = 0; 1205 id.proto = args->f_id.proto; 1206 1207 if (limit_mask & DYN_SRC_ADDR) 1208 id.src_ip = args->f_id.src_ip; 1209 if (limit_mask & DYN_DST_ADDR) 1210 id.dst_ip = args->f_id.dst_ip; 1211 if (limit_mask & DYN_SRC_PORT) 1212 id.src_port = args->f_id.src_port; 1213 if (limit_mask & DYN_DST_PORT) 1214 id.dst_port = args->f_id.dst_port; 1215 parent = lookup_dyn_parent(&id, rule); 1216 if (parent == NULL) { 1217 printf("ipfw: add parent failed\n"); 1218 return 1; 1219 } 1220 if (parent->count >= cmd->conn_limit) { 1221 /* 1222 * See if we can remove some expired rule. 1223 */ 1224 remove_dyn_rule(rule, parent); 1225 if (parent->count >= cmd->conn_limit) { 1226 if (fw_verbose && last_log != time_second) { 1227 last_log = time_second; 1228 log(LOG_SECURITY | LOG_DEBUG, 1229 "drop session, too many entries\n"); 1230 } 1231 IPFW_DYN_UNLOCK(); 1232 return 1; 1233 } 1234 } 1235 add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent); 1236 } 1237 break; 1238 default: 1239 printf("ipfw: unknown dynamic rule type %u\n", cmd->o.opcode); 1240 IPFW_DYN_UNLOCK(); 1241 return 1; 1242 } 1243 lookup_dyn_rule_locked(&args->f_id, NULL, NULL); /* XXX just set lifetime */ 1244 IPFW_DYN_UNLOCK(); 1245 return 0; 1246} 1247 1248/* 1249 * Transmit a TCP packet, containing either a RST or a keepalive. 1250 * When flags & TH_RST, we are sending a RST packet, because of a 1251 * "reset" action matched the packet. 1252 * Otherwise we are sending a keepalive, and flags & TH_ 1253 */ 1254static void 1255send_pkt(struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags) 1256{ 1257 struct mbuf *m; 1258 struct ip *ip; 1259 struct tcphdr *tcp; 1260 1261 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1262 if (m == 0) 1263 return; 1264 m->m_pkthdr.rcvif = (struct ifnet *)0; 1265 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); 1266 m->m_data += max_linkhdr; 1267 1268 ip = mtod(m, struct ip *); 1269 bzero(ip, m->m_len); 1270 tcp = (struct tcphdr *)(ip + 1); /* no IP options */ 1271 ip->ip_p = IPPROTO_TCP; 1272 tcp->th_off = 5; 1273 /* 1274 * Assume we are sending a RST (or a keepalive in the reverse 1275 * direction), swap src and destination addresses and ports. 1276 */ 1277 ip->ip_src.s_addr = htonl(id->dst_ip); 1278 ip->ip_dst.s_addr = htonl(id->src_ip); 1279 tcp->th_sport = htons(id->dst_port); 1280 tcp->th_dport = htons(id->src_port); 1281 if (flags & TH_RST) { /* we are sending a RST */ 1282 if (flags & TH_ACK) { 1283 tcp->th_seq = htonl(ack); 1284 tcp->th_ack = htonl(0); 1285 tcp->th_flags = TH_RST; 1286 } else { 1287 if (flags & TH_SYN) 1288 seq++; 1289 tcp->th_seq = htonl(0); 1290 tcp->th_ack = htonl(seq); 1291 tcp->th_flags = TH_RST | TH_ACK; 1292 } 1293 } else { 1294 /* 1295 * We are sending a keepalive. flags & TH_SYN determines 1296 * the direction, forward if set, reverse if clear. 1297 * NOTE: seq and ack are always assumed to be correct 1298 * as set by the caller. This may be confusing... 1299 */ 1300 if (flags & TH_SYN) { 1301 /* 1302 * we have to rewrite the correct addresses! 1303 */ 1304 ip->ip_dst.s_addr = htonl(id->dst_ip); 1305 ip->ip_src.s_addr = htonl(id->src_ip); 1306 tcp->th_dport = htons(id->dst_port); 1307 tcp->th_sport = htons(id->src_port); 1308 } 1309 tcp->th_seq = htonl(seq); 1310 tcp->th_ack = htonl(ack); 1311 tcp->th_flags = TH_ACK; 1312 } 1313 /* 1314 * set ip_len to the payload size so we can compute 1315 * the tcp checksum on the pseudoheader 1316 * XXX check this, could save a couple of words ? 1317 */ 1318 ip->ip_len = htons(sizeof(struct tcphdr)); 1319 tcp->th_sum = in_cksum(m, m->m_pkthdr.len); 1320 /* 1321 * now fill fields left out earlier 1322 */ 1323 ip->ip_ttl = ip_defttl; 1324 ip->ip_len = m->m_pkthdr.len; 1325 m->m_flags |= M_SKIP_FIREWALL; 1326 ip_output(m, NULL, NULL, 0, NULL, NULL); 1327} 1328 1329/* 1330 * sends a reject message, consuming the mbuf passed as an argument. 1331 */ 1332static void 1333send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) 1334{ 1335 1336 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ 1337 /* We need the IP header in host order for icmp_error(). */ 1338 if (args->eh != NULL) { 1339 struct ip *ip = mtod(args->m, struct ip *); 1340 ip->ip_len = ntohs(ip->ip_len); 1341 ip->ip_off = ntohs(ip->ip_off); 1342 } 1343 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 1344 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { 1345 struct tcphdr *const tcp = 1346 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 1347 if ( (tcp->th_flags & TH_RST) == 0) 1348 send_pkt(&(args->f_id), ntohl(tcp->th_seq), 1349 ntohl(tcp->th_ack), 1350 tcp->th_flags | TH_RST); 1351 m_freem(args->m); 1352 } else 1353 m_freem(args->m); 1354 args->m = NULL; 1355} 1356 1357/** 1358 * 1359 * Given an ip_fw *, lookup_next_rule will return a pointer 1360 * to the next rule, which can be either the jump 1361 * target (for skipto instructions) or the next one in the list (in 1362 * all other cases including a missing jump target). 1363 * The result is also written in the "next_rule" field of the rule. 1364 * Backward jumps are not allowed, so start looking from the next 1365 * rule... 1366 * 1367 * This never returns NULL -- in case we do not have an exact match, 1368 * the next rule is returned. When the ruleset is changed, 1369 * pointers are flushed so we are always correct. 1370 */ 1371 1372static struct ip_fw * 1373lookup_next_rule(struct ip_fw *me) 1374{ 1375 struct ip_fw *rule = NULL; 1376 ipfw_insn *cmd; 1377 1378 /* look for action, in case it is a skipto */ 1379 cmd = ACTION_PTR(me); 1380 if (cmd->opcode == O_LOG) 1381 cmd += F_LEN(cmd); 1382 if (cmd->opcode == O_ALTQ) 1383 cmd += F_LEN(cmd); 1384 if ( cmd->opcode == O_SKIPTO ) 1385 for (rule = me->next; rule ; rule = rule->next) 1386 if (rule->rulenum >= cmd->arg1) 1387 break; 1388 if (rule == NULL) /* failure or not a skipto */ 1389 rule = me->next; 1390 me->next_rule = rule; 1391 return rule; 1392} 1393 1394static void 1395init_tables(void) 1396{ 1397 int i; 1398 1399 for (i = 0; i < IPFW_TABLES_MAX; i++) { 1400 rn_inithead((void **)&ipfw_tables[i].rnh, 32); 1401 ipfw_tables[i].modified = 1; 1402 } 1403} 1404 1405static int 1406add_table_entry(u_int16_t tbl, in_addr_t addr, u_int8_t mlen, u_int32_t value) 1407{ 1408 struct radix_node_head *rnh; 1409 struct table_entry *ent; 1410 1411 if (tbl >= IPFW_TABLES_MAX) 1412 return (EINVAL); 1413 rnh = ipfw_tables[tbl].rnh; 1414 ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO); 1415 if (ent == NULL) 1416 return (ENOMEM); 1417 ent->value = value; 1418 ent->addr.sin_len = ent->mask.sin_len = 8; 1419 ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); 1420 ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr; 1421 RADIX_NODE_HEAD_LOCK(rnh); 1422 if (rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent) == 1423 NULL) { 1424 RADIX_NODE_HEAD_UNLOCK(rnh); 1425 free(ent, M_IPFW_TBL); 1426 return (EEXIST); 1427 } 1428 ipfw_tables[tbl].modified = 1; 1429 RADIX_NODE_HEAD_UNLOCK(rnh); 1430 return (0); 1431} 1432 1433static int 1434del_table_entry(u_int16_t tbl, in_addr_t addr, u_int8_t mlen) 1435{ 1436 struct radix_node_head *rnh; 1437 struct table_entry *ent; 1438 struct sockaddr_in sa, mask; 1439 1440 if (tbl >= IPFW_TABLES_MAX) 1441 return (EINVAL); 1442 rnh = ipfw_tables[tbl].rnh; 1443 sa.sin_len = mask.sin_len = 8; 1444 mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); 1445 sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr; 1446 RADIX_NODE_HEAD_LOCK(rnh); 1447 ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh); 1448 if (ent == NULL) { 1449 RADIX_NODE_HEAD_UNLOCK(rnh); 1450 return (ESRCH); 1451 } 1452 ipfw_tables[tbl].modified = 1; 1453 RADIX_NODE_HEAD_UNLOCK(rnh); 1454 free(ent, M_IPFW_TBL); 1455 return (0); 1456} 1457 1458static int 1459flush_table_entry(struct radix_node *rn, void *arg) 1460{ 1461 struct radix_node_head * const rnh = arg; 1462 struct table_entry *ent; 1463 1464 ent = (struct table_entry *) 1465 rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); 1466 if (ent != NULL) 1467 free(ent, M_IPFW_TBL); 1468 return (0); 1469} 1470 1471static int 1472flush_table(u_int16_t tbl) 1473{ 1474 struct radix_node_head *rnh; 1475 1476 if (tbl >= IPFW_TABLES_MAX) 1477 return (EINVAL); 1478 rnh = ipfw_tables[tbl].rnh; 1479 RADIX_NODE_HEAD_LOCK(rnh); 1480 rnh->rnh_walktree(rnh, flush_table_entry, rnh); 1481 ipfw_tables[tbl].modified = 1; 1482 RADIX_NODE_HEAD_UNLOCK(rnh); 1483 return (0); 1484} 1485 1486static void 1487flush_tables(void) 1488{ 1489 u_int16_t tbl; 1490 1491 for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++) 1492 flush_table(tbl); 1493} 1494 1495static int 1496lookup_table(u_int16_t tbl, in_addr_t addr, u_int32_t *val) 1497{ 1498 struct radix_node_head *rnh; 1499 struct table_entry *ent; 1500 struct sockaddr_in sa; 1501 static in_addr_t last_addr; 1502 static int last_tbl; 1503 static int last_match; 1504 static u_int32_t last_value; 1505 1506 if (tbl >= IPFW_TABLES_MAX) 1507 return (0); 1508 if (tbl == last_tbl && addr == last_addr && 1509 !ipfw_tables[tbl].modified) { 1510 if (last_match) 1511 *val = last_value; 1512 return (last_match); 1513 } 1514 rnh = ipfw_tables[tbl].rnh; 1515 sa.sin_len = 8; 1516 sa.sin_addr.s_addr = addr; 1517 RADIX_NODE_HEAD_LOCK(rnh); 1518 ipfw_tables[tbl].modified = 0; 1519 ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh)); 1520 RADIX_NODE_HEAD_UNLOCK(rnh); 1521 last_addr = addr; 1522 last_tbl = tbl; 1523 if (ent != NULL) { 1524 last_value = *val = ent->value; 1525 last_match = 1; 1526 return (1); 1527 } 1528 last_match = 0; 1529 return (0); 1530} 1531 1532static int 1533count_table_entry(struct radix_node *rn, void *arg) 1534{ 1535 u_int32_t * const cnt = arg; 1536 1537 (*cnt)++; 1538 return (0); 1539} 1540 1541static int 1542count_table(u_int32_t tbl, u_int32_t *cnt) 1543{ 1544 struct radix_node_head *rnh; 1545 1546 if (tbl >= IPFW_TABLES_MAX) 1547 return (EINVAL); 1548 rnh = ipfw_tables[tbl].rnh; 1549 *cnt = 0; 1550 RADIX_NODE_HEAD_LOCK(rnh); 1551 rnh->rnh_walktree(rnh, count_table_entry, cnt); 1552 RADIX_NODE_HEAD_UNLOCK(rnh); 1553 return (0); 1554} 1555 1556static int 1557dump_table_entry(struct radix_node *rn, void *arg) 1558{ 1559 struct table_entry * const n = (struct table_entry *)rn; 1560 ipfw_table * const tbl = arg; 1561 ipfw_table_entry *ent; 1562 1563 if (tbl->cnt == tbl->size) 1564 return (1); 1565 ent = &tbl->ent[tbl->cnt]; 1566 ent->tbl = tbl->tbl; 1567 if (in_nullhost(n->mask.sin_addr)) 1568 ent->masklen = 0; 1569 else 1570 ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr)); 1571 ent->addr = n->addr.sin_addr.s_addr; 1572 ent->value = n->value; 1573 tbl->cnt++; 1574 return (0); 1575} 1576 1577static int 1578dump_table(ipfw_table *tbl) 1579{ 1580 struct radix_node_head *rnh; 1581 1582 if (tbl->tbl >= IPFW_TABLES_MAX) 1583 return (EINVAL); 1584 rnh = ipfw_tables[tbl->tbl].rnh; 1585 tbl->cnt = 0; 1586 RADIX_NODE_HEAD_LOCK(rnh); 1587 rnh->rnh_walktree(rnh, dump_table_entry, tbl); 1588 RADIX_NODE_HEAD_UNLOCK(rnh); 1589 return (0); 1590} 1591 1592static void 1593fill_ugid_cache(struct inpcb *inp, struct ip_fw_ugid *ugp) 1594{ 1595 struct ucred *cr; 1596 1597 if (inp->inp_socket != NULL) { 1598 cr = inp->inp_socket->so_cred; 1599 ugp->fw_prid = jailed(cr) ? 1600 cr->cr_prison->pr_id : -1; 1601 ugp->fw_uid = cr->cr_uid; 1602 ugp->fw_ngroups = cr->cr_ngroups; 1603 bcopy(cr->cr_groups, ugp->fw_groups, 1604 sizeof(ugp->fw_groups)); 1605 } 1606} 1607 1608static int 1609check_uidgid(ipfw_insn_u32 *insn, 1610 int proto, struct ifnet *oif, 1611 struct in_addr dst_ip, u_int16_t dst_port, 1612 struct in_addr src_ip, u_int16_t src_port, 1613 struct ip_fw_ugid *ugp, int *lookup, struct inpcb *inp) 1614{ 1615 struct inpcbinfo *pi; 1616 int wildcard; 1617 struct inpcb *pcb; 1618 int match; 1619 gid_t *gp; 1620 1621 /* 1622 * Check to see if the UDP or TCP stack supplied us with 1623 * the PCB. If so, rather then holding a lock and looking 1624 * up the PCB, we can use the one that was supplied. 1625 */ 1626 if (inp && *lookup == 0) { 1627 INP_LOCK_ASSERT(inp); 1628 if (inp->inp_socket != NULL) { 1629 fill_ugid_cache(inp, ugp); 1630 *lookup = 1; 1631 } 1632 } 1633 /* 1634 * If we have already been here and the packet has no 1635 * PCB entry associated with it, then we can safely 1636 * assume that this is a no match. 1637 */ 1638 if (*lookup == -1) 1639 return (0); 1640 if (proto == IPPROTO_TCP) { 1641 wildcard = 0; 1642 pi = &tcbinfo; 1643 } else if (proto == IPPROTO_UDP) { 1644 wildcard = 1; 1645 pi = &udbinfo; 1646 } else 1647 return 0; 1648 match = 0; 1649 if (*lookup == 0) { 1650 INP_INFO_RLOCK(pi); 1651 pcb = (oif) ? 1652 in_pcblookup_hash(pi, 1653 dst_ip, htons(dst_port), 1654 src_ip, htons(src_port), 1655 wildcard, oif) : 1656 in_pcblookup_hash(pi, 1657 src_ip, htons(src_port), 1658 dst_ip, htons(dst_port), 1659 wildcard, NULL); 1660 if (pcb != NULL) { 1661 INP_LOCK(pcb); 1662 if (pcb->inp_socket != NULL) { 1663 fill_ugid_cache(pcb, ugp); 1664 *lookup = 1; 1665 } 1666 INP_UNLOCK(pcb); 1667 } 1668 INP_INFO_RUNLOCK(pi); 1669 if (*lookup == 0) { 1670 /* 1671 * If the lookup did not yield any results, there 1672 * is no sense in coming back and trying again. So 1673 * we can set lookup to -1 and ensure that we wont 1674 * bother the pcb system again. 1675 */ 1676 *lookup = -1; 1677 return (0); 1678 } 1679 } 1680 if (insn->o.opcode == O_UID) 1681 match = (ugp->fw_uid == (uid_t)insn->d[0]); 1682 else if (insn->o.opcode == O_GID) { 1683 for (gp = ugp->fw_groups; 1684 gp < &ugp->fw_groups[ugp->fw_ngroups]; gp++) 1685 if (*gp == (gid_t)insn->d[0]) { 1686 match = 1; 1687 break; 1688 } 1689 } else if (insn->o.opcode == O_JAIL) 1690 match = (ugp->fw_prid == (int)insn->d[0]); 1691 return match; 1692} 1693 1694/* 1695 * The main check routine for the firewall. 1696 * 1697 * All arguments are in args so we can modify them and return them 1698 * back to the caller. 1699 * 1700 * Parameters: 1701 * 1702 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 1703 * Starts with the IP header. 1704 * args->eh (in) Mac header if present, or NULL for layer3 packet. 1705 * args->oif Outgoing interface, or NULL if packet is incoming. 1706 * The incoming interface is in the mbuf. (in) 1707 * args->divert_rule (in/out) 1708 * Skip up to the first rule past this rule number; 1709 * upon return, non-zero port number for divert or tee. 1710 * 1711 * args->rule Pointer to the last matching rule (in/out) 1712 * args->next_hop Socket we are forwarding to (out). 1713 * args->f_id Addresses grabbed from the packet (out) 1714 * args->cookie a cookie depending on rule action 1715 * 1716 * Return value: 1717 * 1718 * IP_FW_PASS the packet must be accepted 1719 * IP_FW_DENY the packet must be dropped 1720 * IP_FW_DIVERT divert packet, port in m_tag 1721 * IP_FW_TEE tee packet, port in m_tag 1722 * IP_FW_DUMMYNET to dummynet, pipe in args->cookie 1723 * IP_FW_NETGRAPH into netgraph, cookie args->cookie 1724 * 1725 */ 1726 1727int 1728ipfw_chk(struct ip_fw_args *args) 1729{ 1730 /* 1731 * Local variables hold state during the processing of a packet. 1732 * 1733 * IMPORTANT NOTE: to speed up the processing of rules, there 1734 * are some assumption on the values of the variables, which 1735 * are documented here. Should you change them, please check 1736 * the implementation of the various instructions to make sure 1737 * that they still work. 1738 * 1739 * args->eh The MAC header. It is non-null for a layer2 1740 * packet, it is NULL for a layer-3 packet. 1741 * 1742 * m | args->m Pointer to the mbuf, as received from the caller. 1743 * It may change if ipfw_chk() does an m_pullup, or if it 1744 * consumes the packet because it calls send_reject(). 1745 * XXX This has to change, so that ipfw_chk() never modifies 1746 * or consumes the buffer. 1747 * ip is simply an alias of the value of m, and it is kept 1748 * in sync with it (the packet is supposed to start with 1749 * the ip header). 1750 */ 1751 struct mbuf *m = args->m; 1752 struct ip *ip = mtod(m, struct ip *); 1753 1754 /* 1755 * For rules which contain uid/gid or jail constraints, cache 1756 * a copy of the users credentials after the pcb lookup has been 1757 * executed. This will speed up the processing of rules with 1758 * these types of constraints, as well as decrease contention 1759 * on pcb related locks. 1760 */ 1761 struct ip_fw_ugid fw_ugid_cache; 1762 int ugid_lookup = 0; 1763 1764 /* 1765 * divinput_flags If non-zero, set to the IP_FW_DIVERT_*_FLAG 1766 * associated with a packet input on a divert socket. This 1767 * will allow to distinguish traffic and its direction when 1768 * it originates from a divert socket. 1769 */ 1770 u_int divinput_flags = 0; 1771 1772 /* 1773 * oif | args->oif If NULL, ipfw_chk has been called on the 1774 * inbound path (ether_input, bdg_forward, ip_input). 1775 * If non-NULL, ipfw_chk has been called on the outbound path 1776 * (ether_output, ip_output). 1777 */ 1778 struct ifnet *oif = args->oif; 1779 1780 struct ip_fw *f = NULL; /* matching rule */ 1781 int retval = 0; 1782 1783 /* 1784 * hlen The length of the IPv4 header. 1785 * hlen >0 means we have an IPv4 packet. 1786 */ 1787 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 1788 1789 /* 1790 * offset The offset of a fragment. offset != 0 means that 1791 * we have a fragment at this offset of an IPv4 packet. 1792 * offset == 0 means that (if this is an IPv4 packet) 1793 * this is the first or only fragment. 1794 */ 1795 u_short offset = 0; 1796 1797 /* 1798 * Local copies of addresses. They are only valid if we have 1799 * an IP packet. 1800 * 1801 * proto The protocol. Set to 0 for non-ip packets, 1802 * or to the protocol read from the packet otherwise. 1803 * proto != 0 means that we have an IPv4 packet. 1804 * 1805 * src_port, dst_port port numbers, in HOST format. Only 1806 * valid for TCP and UDP packets. 1807 * 1808 * src_ip, dst_ip ip addresses, in NETWORK format. 1809 * Only valid for IPv4 packets. 1810 */ 1811 u_int8_t proto; 1812 u_int16_t src_port = 0, dst_port = 0; /* NOTE: host format */ 1813 struct in_addr src_ip, dst_ip; /* NOTE: network format */ 1814 u_int16_t ip_len=0; 1815 int pktlen; 1816 1817 /* 1818 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 1819 * MATCH_NONE when checked and not matched (q = NULL), 1820 * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) 1821 */ 1822 int dyn_dir = MATCH_UNKNOWN; 1823 ipfw_dyn_rule *q = NULL; 1824 struct ip_fw_chain *chain = &layer3_chain; 1825 struct m_tag *mtag; 1826 1827 /* 1828 * We store in ulp a pointer to the upper layer protocol header. 1829 * In the ipv4 case this is easy to determine from the header, 1830 * but for ipv6 we might have some additional headers in the middle. 1831 * ulp is NULL if not found. 1832 */ 1833 void *ulp = NULL; /* upper layer protocol pointer. */ 1834 1835 if (m->m_flags & M_SKIP_FIREWALL) 1836 return (IP_FW_PASS); /* accept */ 1837 1838 pktlen = m->m_pkthdr.len; 1839 proto = args->f_id.proto = 0; /* mark f_id invalid */ 1840 1841/* 1842 * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous, 1843 * then it sets p to point at the offset "len" in the mbuf. WARNING: the 1844 * pointer might become stale after other pullups (but we never use it 1845 * this way). 1846 */ 1847#define PULLUP_TO(len, p, T) \ 1848do { \ 1849 int x = (len) + sizeof(T); \ 1850 if ((m)->m_len < x) { \ 1851 args->m = m = m_pullup(m, x); \ 1852 if (m == NULL) \ 1853 goto pullup_failed; \ 1854 } \ 1855 p = (mtod(m, char *) + (len)); \ 1856} while (0) 1857 1858 /* Identify IP packets and fill up veriables. */ 1859 if (pktlen >= sizeof(struct ip) && 1860 (args->eh == NULL || ntohs(args->eh->ether_type) == ETHERTYPE_IP) && 1861 mtod(m, struct ip *)->ip_v == 4) { 1862 ip = mtod(m, struct ip *); 1863 hlen = ip->ip_hl << 2; 1864#ifdef NOTYET 1865 args->f_id.addr_type = 4; 1866#endif 1867 1868 /* 1869 * Collect parameters into local variables for faster matching. 1870 */ 1871 proto = ip->ip_p; 1872 src_ip = ip->ip_src; 1873 dst_ip = ip->ip_dst; 1874 if (args->eh != NULL) { /* layer 2 packets are as on the wire */ 1875 offset = ntohs(ip->ip_off) & IP_OFFMASK; 1876 ip_len = ntohs(ip->ip_len); 1877 } else { 1878 offset = ip->ip_off & IP_OFFMASK; 1879 ip_len = ip->ip_len; 1880 } 1881 pktlen = ip_len < pktlen ? ip_len : pktlen; 1882 1883 if (offset == 0) { 1884 switch (proto) { 1885 case IPPROTO_TCP: 1886 PULLUP_TO(hlen, ulp, struct tcphdr); 1887 dst_port = TCP(ulp)->th_dport; 1888 src_port = TCP(ulp)->th_sport; 1889 args->f_id.flags = TCP(ulp)->th_flags; 1890 break; 1891 1892 case IPPROTO_UDP: 1893 PULLUP_TO(hlen, ulp, struct udphdr); 1894 dst_port = UDP(ulp)->uh_dport; 1895 src_port = UDP(ulp)->uh_sport; 1896 break; 1897 1898 case IPPROTO_ICMP: 1899 /* 1900 * we only care for 4 bytes: type, code, 1901 * checksum 1902 */ 1903 PULLUP_TO(hlen, ulp, struct icmp); 1904 args->f_id.flags = ICMP(ulp)->icmp_type; 1905 break; 1906 1907 default: 1908 break; 1909 } 1910 } 1911 1912 args->f_id.src_ip = ntohl(src_ip.s_addr); 1913 args->f_id.dst_ip = ntohl(dst_ip.s_addr); 1914 } 1915#undef PULLUP_TO 1916 if (proto) { /* we may have port numbers, store them */ 1917 args->f_id.proto = proto; 1918 args->f_id.src_port = src_port = ntohs(src_port); 1919 args->f_id.dst_port = dst_port = ntohs(dst_port); 1920 } 1921 1922 IPFW_RLOCK(chain); 1923 mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); 1924 if (args->rule) { 1925 /* 1926 * Packet has already been tagged. Look for the next rule 1927 * to restart processing. 1928 * 1929 * If fw_one_pass != 0 then just accept it. 1930 * XXX should not happen here, but optimized out in 1931 * the caller. 1932 */ 1933 if (fw_one_pass) { 1934 IPFW_RUNLOCK(chain); 1935 return (IP_FW_PASS); 1936 } 1937 1938 f = args->rule->next_rule; 1939 if (f == NULL) 1940 f = lookup_next_rule(args->rule); 1941 } else { 1942 /* 1943 * Find the starting rule. It can be either the first 1944 * one, or the one after divert_rule if asked so. 1945 */ 1946 int skipto = mtag ? divert_cookie(mtag) : 0; 1947 1948 f = chain->rules; 1949 if (args->eh == NULL && skipto != 0) { 1950 if (skipto >= IPFW_DEFAULT_RULE) { 1951 IPFW_RUNLOCK(chain); 1952 return (IP_FW_DENY); /* invalid */ 1953 } 1954 while (f && f->rulenum <= skipto) 1955 f = f->next; 1956 if (f == NULL) { /* drop packet */ 1957 IPFW_RUNLOCK(chain); 1958 return (IP_FW_DENY); 1959 } 1960 } 1961 } 1962 /* reset divert rule to avoid confusion later */ 1963 if (mtag) { 1964 divinput_flags = divert_info(mtag) & 1965 (IP_FW_DIVERT_OUTPUT_FLAG | IP_FW_DIVERT_LOOPBACK_FLAG); 1966 m_tag_delete(m, mtag); 1967 } 1968 1969 /* 1970 * Now scan the rules, and parse microinstructions for each rule. 1971 */ 1972 for (; f; f = f->next) { 1973 int l, cmdlen; 1974 ipfw_insn *cmd; 1975 int skip_or; /* skip rest of OR block */ 1976 1977again: 1978 if (set_disable & (1 << f->set) ) 1979 continue; 1980 1981 skip_or = 0; 1982 for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; 1983 l -= cmdlen, cmd += cmdlen) { 1984 int match; 1985 1986 /* 1987 * check_body is a jump target used when we find a 1988 * CHECK_STATE, and need to jump to the body of 1989 * the target rule. 1990 */ 1991 1992check_body: 1993 cmdlen = F_LEN(cmd); 1994 /* 1995 * An OR block (insn_1 || .. || insn_n) has the 1996 * F_OR bit set in all but the last instruction. 1997 * The first match will set "skip_or", and cause 1998 * the following instructions to be skipped until 1999 * past the one with the F_OR bit clear. 2000 */ 2001 if (skip_or) { /* skip this instruction */ 2002 if ((cmd->len & F_OR) == 0) 2003 skip_or = 0; /* next one is good */ 2004 continue; 2005 } 2006 match = 0; /* set to 1 if we succeed */ 2007 2008 switch (cmd->opcode) { 2009 /* 2010 * The first set of opcodes compares the packet's 2011 * fields with some pattern, setting 'match' if a 2012 * match is found. At the end of the loop there is 2013 * logic to deal with F_NOT and F_OR flags associated 2014 * with the opcode. 2015 */ 2016 case O_NOP: 2017 match = 1; 2018 break; 2019 2020 case O_FORWARD_MAC: 2021 printf("ipfw: opcode %d unimplemented\n", 2022 cmd->opcode); 2023 break; 2024 2025 case O_GID: 2026 case O_UID: 2027 case O_JAIL: 2028 /* 2029 * We only check offset == 0 && proto != 0, 2030 * as this ensures that we have an IPv4 2031 * packet with the ports info. 2032 */ 2033 if (offset!=0) 2034 break; 2035 if (proto == IPPROTO_TCP || 2036 proto == IPPROTO_UDP) 2037 match = check_uidgid( 2038 (ipfw_insn_u32 *)cmd, 2039 proto, oif, 2040 dst_ip, dst_port, 2041 src_ip, src_port, &fw_ugid_cache, 2042 &ugid_lookup, args->inp); 2043 break; 2044 2045 case O_RECV: 2046 match = iface_match(m->m_pkthdr.rcvif, 2047 (ipfw_insn_if *)cmd); 2048 break; 2049 2050 case O_XMIT: 2051 match = iface_match(oif, (ipfw_insn_if *)cmd); 2052 break; 2053 2054 case O_VIA: 2055 match = iface_match(oif ? oif : 2056 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); 2057 break; 2058 2059 case O_MACADDR2: 2060 if (args->eh != NULL) { /* have MAC header */ 2061 u_int32_t *want = (u_int32_t *) 2062 ((ipfw_insn_mac *)cmd)->addr; 2063 u_int32_t *mask = (u_int32_t *) 2064 ((ipfw_insn_mac *)cmd)->mask; 2065 u_int32_t *hdr = (u_int32_t *)args->eh; 2066 2067 match = 2068 ( want[0] == (hdr[0] & mask[0]) && 2069 want[1] == (hdr[1] & mask[1]) && 2070 want[2] == (hdr[2] & mask[2]) ); 2071 } 2072 break; 2073 2074 case O_MAC_TYPE: 2075 if (args->eh != NULL) { 2076 u_int16_t t = 2077 ntohs(args->eh->ether_type); 2078 u_int16_t *p = 2079 ((ipfw_insn_u16 *)cmd)->ports; 2080 int i; 2081 2082 for (i = cmdlen - 1; !match && i>0; 2083 i--, p += 2) 2084 match = (t>=p[0] && t<=p[1]); 2085 } 2086 break; 2087 2088 case O_FRAG: 2089 match = (hlen > 0 && offset != 0); 2090 break; 2091 2092 case O_IN: /* "out" is "not in" */ 2093 match = (oif == NULL); 2094 break; 2095 2096 case O_LAYER2: 2097 match = (args->eh != NULL); 2098 break; 2099 2100 case O_DIVERTED: 2101 match = (cmd->arg1 & 1 && divinput_flags & 2102 IP_FW_DIVERT_LOOPBACK_FLAG) || 2103 (cmd->arg1 & 2 && divinput_flags & 2104 IP_FW_DIVERT_OUTPUT_FLAG); 2105 break; 2106 2107 case O_PROTO: 2108 /* 2109 * We do not allow an arg of 0 so the 2110 * check of "proto" only suffices. 2111 */ 2112 match = (proto == cmd->arg1); 2113 break; 2114 2115 case O_IP_SRC: 2116 match = (hlen > 0 && 2117 ((ipfw_insn_ip *)cmd)->addr.s_addr == 2118 src_ip.s_addr); 2119 break; 2120 2121 case O_IP_SRC_LOOKUP: 2122 case O_IP_DST_LOOKUP: 2123 if (hlen > 0) { 2124 uint32_t a = 2125 (cmd->opcode == O_IP_DST_LOOKUP) ? 2126 dst_ip.s_addr : src_ip.s_addr; 2127 uint32_t v; 2128 2129 match = lookup_table(cmd->arg1, a, &v); 2130 if (!match) 2131 break; 2132 if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) 2133 match = 2134 ((ipfw_insn_u32 *)cmd)->d[0] == v; 2135 } 2136 break; 2137 2138 case O_IP_SRC_MASK: 2139 case O_IP_DST_MASK: 2140 if (hlen > 0) { 2141 uint32_t a = 2142 (cmd->opcode == O_IP_DST_MASK) ? 2143 dst_ip.s_addr : src_ip.s_addr; 2144 uint32_t *p = ((ipfw_insn_u32 *)cmd)->d; 2145 int i = cmdlen-1; 2146 2147 for (; !match && i>0; i-= 2, p+= 2) 2148 match = (p[0] == (a & p[1])); 2149 } 2150 break; 2151 2152 case O_IP_SRC_ME: 2153 if (hlen > 0) { 2154 struct ifnet *tif; 2155 2156 INADDR_TO_IFP(src_ip, tif); 2157 match = (tif != NULL); 2158 } 2159 break; 2160 2161 case O_IP_DST_SET: 2162 case O_IP_SRC_SET: 2163 if (hlen > 0) { 2164 u_int32_t *d = (u_int32_t *)(cmd+1); 2165 u_int32_t addr = 2166 cmd->opcode == O_IP_DST_SET ? 2167 args->f_id.dst_ip : 2168 args->f_id.src_ip; 2169 2170 if (addr < d[0]) 2171 break; 2172 addr -= d[0]; /* subtract base */ 2173 match = (addr < cmd->arg1) && 2174 ( d[ 1 + (addr>>5)] & 2175 (1<<(addr & 0x1f)) ); 2176 } 2177 break; 2178 2179 case O_IP_DST: 2180 match = (hlen > 0 && 2181 ((ipfw_insn_ip *)cmd)->addr.s_addr == 2182 dst_ip.s_addr); 2183 break; 2184 2185 case O_IP_DST_ME: 2186 if (hlen > 0) { 2187 struct ifnet *tif; 2188 2189 INADDR_TO_IFP(dst_ip, tif); 2190 match = (tif != NULL); 2191 } 2192 break; 2193 2194 case O_IP_SRCPORT: 2195 case O_IP_DSTPORT: 2196 /* 2197 * offset == 0 && proto != 0 is enough 2198 * to guarantee that we have an IPv4 2199 * packet with port info. 2200 */ 2201 if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) 2202 && offset == 0) { 2203 u_int16_t x = 2204 (cmd->opcode == O_IP_SRCPORT) ? 2205 src_port : dst_port ; 2206 u_int16_t *p = 2207 ((ipfw_insn_u16 *)cmd)->ports; 2208 int i; 2209 2210 for (i = cmdlen - 1; !match && i>0; 2211 i--, p += 2) 2212 match = (x>=p[0] && x<=p[1]); 2213 } 2214 break; 2215 2216 case O_ICMPTYPE: 2217 match = (offset == 0 && proto==IPPROTO_ICMP && 2218 icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) ); 2219 break; 2220 2221 case O_IPOPT: 2222 match = (hlen > 0 && ipopts_match(ip, cmd) ); 2223 break; 2224 2225 case O_IPVER: 2226 match = (hlen > 0 && cmd->arg1 == ip->ip_v); 2227 break; 2228 2229 case O_IPID: 2230 case O_IPLEN: 2231 case O_IPTTL: 2232 if (hlen > 0) { /* only for IP packets */ 2233 uint16_t x; 2234 uint16_t *p; 2235 int i; 2236 2237 if (cmd->opcode == O_IPLEN) 2238 x = ip_len; 2239 else if (cmd->opcode == O_IPTTL) 2240 x = ip->ip_ttl; 2241 else /* must be IPID */ 2242 x = ntohs(ip->ip_id); 2243 if (cmdlen == 1) { 2244 match = (cmd->arg1 == x); 2245 break; 2246 } 2247 /* otherwise we have ranges */ 2248 p = ((ipfw_insn_u16 *)cmd)->ports; 2249 i = cmdlen - 1; 2250 for (; !match && i>0; i--, p += 2) 2251 match = (x >= p[0] && x <= p[1]); 2252 } 2253 break; 2254 2255 case O_IPPRECEDENCE: 2256 match = (hlen > 0 && 2257 (cmd->arg1 == (ip->ip_tos & 0xe0)) ); 2258 break; 2259 2260 case O_IPTOS: 2261 match = (hlen > 0 && 2262 flags_match(cmd, ip->ip_tos)); 2263 break; 2264 2265 case O_TCPDATALEN: 2266 if (proto == IPPROTO_TCP && offset == 0) { 2267 struct tcphdr *tcp; 2268 uint16_t x; 2269 uint16_t *p; 2270 int i; 2271 2272 tcp = TCP(ulp); 2273 x = ip_len - 2274 ((ip->ip_hl + tcp->th_off) << 2); 2275 if (cmdlen == 1) { 2276 match = (cmd->arg1 == x); 2277 break; 2278 } 2279 /* otherwise we have ranges */ 2280 p = ((ipfw_insn_u16 *)cmd)->ports; 2281 i = cmdlen - 1; 2282 for (; !match && i>0; i--, p += 2) 2283 match = (x >= p[0] && x <= p[1]); 2284 } 2285 break; 2286 2287 case O_TCPFLAGS: 2288 match = (proto == IPPROTO_TCP && offset == 0 && 2289 flags_match(cmd, TCP(ulp)->th_flags)); 2290 break; 2291 2292 case O_TCPOPTS: 2293 match = (proto == IPPROTO_TCP && offset == 0 && 2294 tcpopts_match(TCP(ulp), cmd)); 2295 break; 2296 2297 case O_TCPSEQ: 2298 match = (proto == IPPROTO_TCP && offset == 0 && 2299 ((ipfw_insn_u32 *)cmd)->d[0] == 2300 TCP(ulp)->th_seq); 2301 break; 2302 2303 case O_TCPACK: 2304 match = (proto == IPPROTO_TCP && offset == 0 && 2305 ((ipfw_insn_u32 *)cmd)->d[0] == 2306 TCP(ulp)->th_ack); 2307 break; 2308 2309 case O_TCPWIN: 2310 match = (proto == IPPROTO_TCP && offset == 0 && 2311 cmd->arg1 == TCP(ulp)->th_win); 2312 break; 2313 2314 case O_ESTAB: 2315 /* reject packets which have SYN only */ 2316 /* XXX should i also check for TH_ACK ? */ 2317 match = (proto == IPPROTO_TCP && offset == 0 && 2318 (TCP(ulp)->th_flags & 2319 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 2320 break; 2321 2322 case O_ALTQ: { 2323 struct altq_tag *at; 2324 ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; 2325 2326 match = 1; 2327 mtag = m_tag_get(PACKET_TAG_PF_QID, 2328 sizeof(struct altq_tag), 2329 M_NOWAIT); 2330 if (mtag == NULL) { 2331 /* 2332 * Let the packet fall back to the 2333 * default ALTQ. 2334 */ 2335 break; 2336 } 2337 at = (struct altq_tag *)(mtag+1); 2338 at->qid = altq->qid; 2339 if (hlen != 0) 2340 at->af = AF_INET; 2341 else 2342 at->af = AF_LINK; 2343 at->hdr = ip; 2344 m_tag_prepend(m, mtag); 2345 break; 2346 } 2347 2348 case O_LOG: 2349 if (fw_verbose) 2350 ipfw_log(f, hlen, args->eh, m, oif); 2351 match = 1; 2352 break; 2353 2354 case O_PROB: 2355 match = (random()<((ipfw_insn_u32 *)cmd)->d[0]); 2356 break; 2357 2358 case O_VERREVPATH: 2359 /* Outgoing packets automatically pass/match */ 2360 match = (hlen > 0 && ((oif != NULL) || 2361 (m->m_pkthdr.rcvif == NULL) || 2362 verify_path(src_ip, m->m_pkthdr.rcvif))); 2363 break; 2364 2365 case O_VERSRCREACH: 2366 /* Outgoing packets automatically pass/match */ 2367 match = (hlen > 0 && ((oif != NULL) || 2368 verify_path(src_ip, NULL))); 2369 break; 2370 2371 case O_ANTISPOOF: 2372 /* Outgoing packets automatically pass/match */ 2373 if (oif == NULL && hlen > 0 && 2374 in_localaddr(src_ip)) 2375 match = verify_path(src_ip, 2376 m->m_pkthdr.rcvif); 2377 else 2378 match = 1; 2379 break; 2380 2381 case O_IPSEC: 2382#ifdef FAST_IPSEC 2383 match = (m_tag_find(m, 2384 PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL); 2385#endif 2386#ifdef IPSEC 2387 match = (ipsec_getnhist(m) != 0); 2388#endif 2389 /* otherwise no match */ 2390 break; 2391 2392 /* 2393 * The second set of opcodes represents 'actions', 2394 * i.e. the terminal part of a rule once the packet 2395 * matches all previous patterns. 2396 * Typically there is only one action for each rule, 2397 * and the opcode is stored at the end of the rule 2398 * (but there are exceptions -- see below). 2399 * 2400 * In general, here we set retval and terminate the 2401 * outer loop (would be a 'break 3' in some language, 2402 * but we need to do a 'goto done'). 2403 * 2404 * Exceptions: 2405 * O_COUNT and O_SKIPTO actions: 2406 * instead of terminating, we jump to the next rule 2407 * ('goto next_rule', equivalent to a 'break 2'), 2408 * or to the SKIPTO target ('goto again' after 2409 * having set f, cmd and l), respectively. 2410 * 2411 * O_LOG and O_ALTQ action parameters: 2412 * perform some action and set match = 1; 2413 * 2414 * O_LIMIT and O_KEEP_STATE: these opcodes are 2415 * not real 'actions', and are stored right 2416 * before the 'action' part of the rule. 2417 * These opcodes try to install an entry in the 2418 * state tables; if successful, we continue with 2419 * the next opcode (match=1; break;), otherwise 2420 * the packet * must be dropped 2421 * ('goto done' after setting retval); 2422 * 2423 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 2424 * cause a lookup of the state table, and a jump 2425 * to the 'action' part of the parent rule 2426 * ('goto check_body') if an entry is found, or 2427 * (CHECK_STATE only) a jump to the next rule if 2428 * the entry is not found ('goto next_rule'). 2429 * The result of the lookup is cached to make 2430 * further instances of these opcodes are 2431 * effectively NOPs. 2432 */ 2433 case O_LIMIT: 2434 case O_KEEP_STATE: 2435 if (install_state(f, 2436 (ipfw_insn_limit *)cmd, args)) { 2437 retval = IP_FW_DENY; 2438 goto done; /* error/limit violation */ 2439 } 2440 match = 1; 2441 break; 2442 2443 case O_PROBE_STATE: 2444 case O_CHECK_STATE: 2445 /* 2446 * dynamic rules are checked at the first 2447 * keep-state or check-state occurrence, 2448 * with the result being stored in dyn_dir. 2449 * The compiler introduces a PROBE_STATE 2450 * instruction for us when we have a 2451 * KEEP_STATE (because PROBE_STATE needs 2452 * to be run first). 2453 */ 2454 if (dyn_dir == MATCH_UNKNOWN && 2455 (q = lookup_dyn_rule(&args->f_id, 2456 &dyn_dir, proto == IPPROTO_TCP ? 2457 TCP(ulp) : NULL)) 2458 != NULL) { 2459 /* 2460 * Found dynamic entry, update stats 2461 * and jump to the 'action' part of 2462 * the parent rule. 2463 */ 2464 q->pcnt++; 2465 q->bcnt += pktlen; 2466 f = q->rule; 2467 cmd = ACTION_PTR(f); 2468 l = f->cmd_len - f->act_ofs; 2469 IPFW_DYN_UNLOCK(); 2470 goto check_body; 2471 } 2472 /* 2473 * Dynamic entry not found. If CHECK_STATE, 2474 * skip to next rule, if PROBE_STATE just 2475 * ignore and continue with next opcode. 2476 */ 2477 if (cmd->opcode == O_CHECK_STATE) 2478 goto next_rule; 2479 match = 1; 2480 break; 2481 2482 case O_ACCEPT: 2483 retval = 0; /* accept */ 2484 goto done; 2485 2486 case O_PIPE: 2487 case O_QUEUE: 2488 args->rule = f; /* report matching rule */ 2489 args->cookie = cmd->arg1; 2490 retval = IP_FW_DUMMYNET; 2491 goto done; 2492 2493 case O_DIVERT: 2494 case O_TEE: { 2495 struct divert_tag *dt; 2496 2497 if (args->eh) /* not on layer 2 */ 2498 break; 2499 mtag = m_tag_get(PACKET_TAG_DIVERT, 2500 sizeof(struct divert_tag), 2501 M_NOWAIT); 2502 if (mtag == NULL) { 2503 /* XXX statistic */ 2504 /* drop packet */ 2505 IPFW_RUNLOCK(chain); 2506 return (IP_FW_DENY); 2507 } 2508 dt = (struct divert_tag *)(mtag+1); 2509 dt->cookie = f->rulenum; 2510 dt->info = cmd->arg1; 2511 m_tag_prepend(m, mtag); 2512 retval = (cmd->opcode == O_DIVERT) ? 2513 IP_FW_DIVERT : IP_FW_TEE; 2514 goto done; 2515 } 2516 2517 case O_COUNT: 2518 case O_SKIPTO: 2519 f->pcnt++; /* update stats */ 2520 f->bcnt += pktlen; 2521 f->timestamp = time_second; 2522 if (cmd->opcode == O_COUNT) 2523 goto next_rule; 2524 /* handle skipto */ 2525 if (f->next_rule == NULL) 2526 lookup_next_rule(f); 2527 f = f->next_rule; 2528 goto again; 2529 2530 case O_REJECT: 2531 /* 2532 * Drop the packet and send a reject notice 2533 * if the packet is not ICMP (or is an ICMP 2534 * query), and it is not multicast/broadcast. 2535 */ 2536 if (hlen > 0 && 2537 (proto != IPPROTO_ICMP || 2538 is_icmp_query(ICMP(ulp))) && 2539 !(m->m_flags & (M_BCAST|M_MCAST)) && 2540 !IN_MULTICAST(ntohl(dst_ip.s_addr))) { 2541 send_reject(args, cmd->arg1, 2542 offset,ip_len); 2543 m = args->m; 2544 } 2545 /* FALLTHROUGH */ 2546 case O_DENY: 2547 retval = IP_FW_DENY; 2548 goto done; 2549 2550 case O_FORWARD_IP: 2551 if (args->eh) /* not valid on layer2 pkts */ 2552 break; 2553 if (!q || dyn_dir == MATCH_FORWARD) 2554 args->next_hop = 2555 &((ipfw_insn_sa *)cmd)->sa; 2556 retval = IP_FW_PASS; 2557 goto done; 2558 2559 case O_NETGRAPH: 2560 case O_NGTEE: 2561 args->rule = f; /* report matching rule */ 2562 args->cookie = cmd->arg1; 2563 retval = (cmd->opcode == O_NETGRAPH) ? 2564 IP_FW_NETGRAPH : IP_FW_NGTEE; 2565 goto done; 2566 2567 default: 2568 panic("-- unknown opcode %d\n", cmd->opcode); 2569 } /* end of switch() on opcodes */ 2570 2571 if (cmd->len & F_NOT) 2572 match = !match; 2573 2574 if (match) { 2575 if (cmd->len & F_OR) 2576 skip_or = 1; 2577 } else { 2578 if (!(cmd->len & F_OR)) /* not an OR block, */ 2579 break; /* try next rule */ 2580 } 2581 2582 } /* end of inner for, scan opcodes */ 2583 2584next_rule:; /* try next rule */ 2585 2586 } /* end of outer for, scan rules */ 2587 printf("ipfw: ouch!, skip past end of rules, denying packet\n"); 2588 IPFW_RUNLOCK(chain); 2589 return (IP_FW_DENY); 2590 2591done: 2592 /* Update statistics */ 2593 f->pcnt++; 2594 f->bcnt += pktlen; 2595 f->timestamp = time_second; 2596 IPFW_RUNLOCK(chain); 2597 return (retval); 2598 2599pullup_failed: 2600 if (fw_verbose) 2601 printf("ipfw: pullup failed\n"); 2602 return (IP_FW_DENY); 2603} 2604 2605/* 2606 * When a rule is added/deleted, clear the next_rule pointers in all rules. 2607 * These will be reconstructed on the fly as packets are matched. 2608 */ 2609static void 2610flush_rule_ptrs(struct ip_fw_chain *chain) 2611{ 2612 struct ip_fw *rule; 2613 2614 IPFW_WLOCK_ASSERT(chain); 2615 2616 for (rule = chain->rules; rule; rule = rule->next) 2617 rule->next_rule = NULL; 2618} 2619 2620/* 2621 * When pipes/queues are deleted, clear the "pipe_ptr" pointer to a given 2622 * pipe/queue, or to all of them (match == NULL). 2623 */ 2624void 2625flush_pipe_ptrs(struct dn_flow_set *match) 2626{ 2627 struct ip_fw *rule; 2628 2629 IPFW_WLOCK(&layer3_chain); 2630 for (rule = layer3_chain.rules; rule; rule = rule->next) { 2631 ipfw_insn_pipe *cmd = (ipfw_insn_pipe *)ACTION_PTR(rule); 2632 2633 if (cmd->o.opcode != O_PIPE && cmd->o.opcode != O_QUEUE) 2634 continue; 2635 /* 2636 * XXX Use bcmp/bzero to handle pipe_ptr to overcome 2637 * possible alignment problems on 64-bit architectures. 2638 * This code is seldom used so we do not worry too 2639 * much about efficiency. 2640 */ 2641 if (match == NULL || 2642 !bcmp(&cmd->pipe_ptr, &match, sizeof(match)) ) 2643 bzero(&cmd->pipe_ptr, sizeof(cmd->pipe_ptr)); 2644 } 2645 IPFW_WUNLOCK(&layer3_chain); 2646} 2647 2648/* 2649 * Add a new rule to the list. Copy the rule into a malloc'ed area, then 2650 * possibly create a rule number and add the rule to the list. 2651 * Update the rule_number in the input struct so the caller knows it as well. 2652 */ 2653static int 2654add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) 2655{ 2656 struct ip_fw *rule, *f, *prev; 2657 int l = RULESIZE(input_rule); 2658 2659 if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE) 2660 return (EINVAL); 2661 2662 rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO); 2663 if (rule == NULL) 2664 return (ENOSPC); 2665 2666 bcopy(input_rule, rule, l); 2667 2668 rule->next = NULL; 2669 rule->next_rule = NULL; 2670 2671 rule->pcnt = 0; 2672 rule->bcnt = 0; 2673 rule->timestamp = 0; 2674 2675 IPFW_WLOCK(chain); 2676 2677 if (chain->rules == NULL) { /* default rule */ 2678 chain->rules = rule; 2679 goto done; 2680 } 2681 2682 /* 2683 * If rulenum is 0, find highest numbered rule before the 2684 * default rule, and add autoinc_step 2685 */ 2686 if (autoinc_step < 1) 2687 autoinc_step = 1; 2688 else if (autoinc_step > 1000) 2689 autoinc_step = 1000; 2690 if (rule->rulenum == 0) { 2691 /* 2692 * locate the highest numbered rule before default 2693 */ 2694 for (f = chain->rules; f; f = f->next) { 2695 if (f->rulenum == IPFW_DEFAULT_RULE) 2696 break; 2697 rule->rulenum = f->rulenum; 2698 } 2699 if (rule->rulenum < IPFW_DEFAULT_RULE - autoinc_step) 2700 rule->rulenum += autoinc_step; 2701 input_rule->rulenum = rule->rulenum; 2702 } 2703 2704 /* 2705 * Now insert the new rule in the right place in the sorted list. 2706 */ 2707 for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) { 2708 if (f->rulenum > rule->rulenum) { /* found the location */ 2709 if (prev) { 2710 rule->next = f; 2711 prev->next = rule; 2712 } else { /* head insert */ 2713 rule->next = chain->rules; 2714 chain->rules = rule; 2715 } 2716 break; 2717 } 2718 } 2719 flush_rule_ptrs(chain); 2720done: 2721 static_count++; 2722 static_len += l; 2723 IPFW_WUNLOCK(chain); 2724 DEB(printf("ipfw: installed rule %d, static count now %d\n", 2725 rule->rulenum, static_count);) 2726 return (0); 2727} 2728 2729/** 2730 * Remove a static rule (including derived * dynamic rules) 2731 * and place it on the ``reap list'' for later reclamation. 2732 * The caller is in charge of clearing rule pointers to avoid 2733 * dangling pointers. 2734 * @return a pointer to the next entry. 2735 * Arguments are not checked, so they better be correct. 2736 */ 2737static struct ip_fw * 2738remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, struct ip_fw *prev) 2739{ 2740 struct ip_fw *n; 2741 int l = RULESIZE(rule); 2742 2743 IPFW_WLOCK_ASSERT(chain); 2744 2745 n = rule->next; 2746 IPFW_DYN_LOCK(); 2747 remove_dyn_rule(rule, NULL /* force removal */); 2748 IPFW_DYN_UNLOCK(); 2749 if (prev == NULL) 2750 chain->rules = n; 2751 else 2752 prev->next = n; 2753 static_count--; 2754 static_len -= l; 2755 2756 rule->next = chain->reap; 2757 chain->reap = rule; 2758 2759 return n; 2760} 2761 2762/** 2763 * Reclaim storage associated with a list of rules. This is 2764 * typically the list created using remove_rule. 2765 */ 2766static void 2767reap_rules(struct ip_fw *head) 2768{ 2769 struct ip_fw *rule; 2770 2771 while ((rule = head) != NULL) { 2772 head = head->next; 2773 if (DUMMYNET_LOADED) 2774 ip_dn_ruledel_ptr(rule); 2775 free(rule, M_IPFW); 2776 } 2777} 2778 2779/* 2780 * Remove all rules from a chain (except rules in set RESVD_SET 2781 * unless kill_default = 1). The caller is responsible for 2782 * reclaiming storage for the rules left in chain->reap. 2783 */ 2784static void 2785free_chain(struct ip_fw_chain *chain, int kill_default) 2786{ 2787 struct ip_fw *prev, *rule; 2788 2789 IPFW_WLOCK_ASSERT(chain); 2790 2791 flush_rule_ptrs(chain); /* more efficient to do outside the loop */ 2792 for (prev = NULL, rule = chain->rules; rule ; ) 2793 if (kill_default || rule->set != RESVD_SET) 2794 rule = remove_rule(chain, rule, prev); 2795 else { 2796 prev = rule; 2797 rule = rule->next; 2798 } 2799} 2800 2801/** 2802 * Remove all rules with given number, and also do set manipulation. 2803 * Assumes chain != NULL && *chain != NULL. 2804 * 2805 * The argument is an u_int32_t. The low 16 bit are the rule or set number, 2806 * the next 8 bits are the new set, the top 8 bits are the command: 2807 * 2808 * 0 delete rules with given number 2809 * 1 delete rules with given set number 2810 * 2 move rules with given number to new set 2811 * 3 move rules with given set number to new set 2812 * 4 swap sets with given numbers 2813 */ 2814static int 2815del_entry(struct ip_fw_chain *chain, u_int32_t arg) 2816{ 2817 struct ip_fw *prev = NULL, *rule; 2818 u_int16_t rulenum; /* rule or old_set */ 2819 u_int8_t cmd, new_set; 2820 2821 rulenum = arg & 0xffff; 2822 cmd = (arg >> 24) & 0xff; 2823 new_set = (arg >> 16) & 0xff; 2824 2825 if (cmd > 4) 2826 return EINVAL; 2827 if (new_set > RESVD_SET) 2828 return EINVAL; 2829 if (cmd == 0 || cmd == 2) { 2830 if (rulenum >= IPFW_DEFAULT_RULE) 2831 return EINVAL; 2832 } else { 2833 if (rulenum > RESVD_SET) /* old_set */ 2834 return EINVAL; 2835 } 2836 2837 IPFW_WLOCK(chain); 2838 rule = chain->rules; 2839 chain->reap = NULL; 2840 switch (cmd) { 2841 case 0: /* delete rules with given number */ 2842 /* 2843 * locate first rule to delete 2844 */ 2845 for (; rule->rulenum < rulenum; prev = rule, rule = rule->next) 2846 ; 2847 if (rule->rulenum != rulenum) { 2848 IPFW_WUNLOCK(chain); 2849 return EINVAL; 2850 } 2851 2852 /* 2853 * flush pointers outside the loop, then delete all matching 2854 * rules. prev remains the same throughout the cycle. 2855 */ 2856 flush_rule_ptrs(chain); 2857 while (rule->rulenum == rulenum) 2858 rule = remove_rule(chain, rule, prev); 2859 break; 2860 2861 case 1: /* delete all rules with given set number */ 2862 flush_rule_ptrs(chain); 2863 rule = chain->rules; 2864 while (rule->rulenum < IPFW_DEFAULT_RULE) 2865 if (rule->set == rulenum) 2866 rule = remove_rule(chain, rule, prev); 2867 else { 2868 prev = rule; 2869 rule = rule->next; 2870 } 2871 break; 2872 2873 case 2: /* move rules with given number to new set */ 2874 rule = chain->rules; 2875 for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) 2876 if (rule->rulenum == rulenum) 2877 rule->set = new_set; 2878 break; 2879 2880 case 3: /* move rules with given set number to new set */ 2881 for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) 2882 if (rule->set == rulenum) 2883 rule->set = new_set; 2884 break; 2885 2886 case 4: /* swap two sets */ 2887 for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) 2888 if (rule->set == rulenum) 2889 rule->set = new_set; 2890 else if (rule->set == new_set) 2891 rule->set = rulenum; 2892 break; 2893 } 2894 /* 2895 * Look for rules to reclaim. We grab the list before 2896 * releasing the lock then reclaim them w/o the lock to 2897 * avoid a LOR with dummynet. 2898 */ 2899 rule = chain->reap; 2900 chain->reap = NULL; 2901 IPFW_WUNLOCK(chain); 2902 if (rule) 2903 reap_rules(rule); 2904 return 0; 2905} 2906 2907/* 2908 * Clear counters for a specific rule. 2909 * The enclosing "table" is assumed locked. 2910 */ 2911static void 2912clear_counters(struct ip_fw *rule, int log_only) 2913{ 2914 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); 2915 2916 if (log_only == 0) { 2917 rule->bcnt = rule->pcnt = 0; 2918 rule->timestamp = 0; 2919 } 2920 if (l->o.opcode == O_LOG) 2921 l->log_left = l->max_log; 2922} 2923 2924/** 2925 * Reset some or all counters on firewall rules. 2926 * @arg frwl is null to clear all entries, or contains a specific 2927 * rule number. 2928 * @arg log_only is 1 if we only want to reset logs, zero otherwise. 2929 */ 2930static int 2931zero_entry(struct ip_fw_chain *chain, int rulenum, int log_only) 2932{ 2933 struct ip_fw *rule; 2934 char *msg; 2935 2936 IPFW_WLOCK(chain); 2937 if (rulenum == 0) { 2938 norule_counter = 0; 2939 for (rule = chain->rules; rule; rule = rule->next) 2940 clear_counters(rule, log_only); 2941 msg = log_only ? "ipfw: All logging counts reset.\n" : 2942 "ipfw: Accounting cleared.\n"; 2943 } else { 2944 int cleared = 0; 2945 /* 2946 * We can have multiple rules with the same number, so we 2947 * need to clear them all. 2948 */ 2949 for (rule = chain->rules; rule; rule = rule->next) 2950 if (rule->rulenum == rulenum) { 2951 while (rule && rule->rulenum == rulenum) { 2952 clear_counters(rule, log_only); 2953 rule = rule->next; 2954 } 2955 cleared = 1; 2956 break; 2957 } 2958 if (!cleared) { /* we did not find any matching rules */ 2959 IPFW_WUNLOCK(chain); 2960 return (EINVAL); 2961 } 2962 msg = log_only ? "ipfw: Entry %d logging count reset.\n" : 2963 "ipfw: Entry %d cleared.\n"; 2964 } 2965 IPFW_WUNLOCK(chain); 2966 2967 if (fw_verbose) 2968 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum); 2969 return (0); 2970} 2971 2972/* 2973 * Check validity of the structure before insert. 2974 * Fortunately rules are simple, so this mostly need to check rule sizes. 2975 */ 2976static int 2977check_ipfw_struct(struct ip_fw *rule, int size) 2978{ 2979 int l, cmdlen = 0; 2980 int have_action=0; 2981 ipfw_insn *cmd; 2982 2983 if (size < sizeof(*rule)) { 2984 printf("ipfw: rule too short\n"); 2985 return (EINVAL); 2986 } 2987 /* first, check for valid size */ 2988 l = RULESIZE(rule); 2989 if (l != size) { 2990 printf("ipfw: size mismatch (have %d want %d)\n", size, l); 2991 return (EINVAL); 2992 } 2993 if (rule->act_ofs >= rule->cmd_len) { 2994 printf("ipfw: bogus action offset (%u > %u)\n", 2995 rule->act_ofs, rule->cmd_len - 1); 2996 return (EINVAL); 2997 } 2998 /* 2999 * Now go for the individual checks. Very simple ones, basically only 3000 * instruction sizes. 3001 */ 3002 for (l = rule->cmd_len, cmd = rule->cmd ; 3003 l > 0 ; l -= cmdlen, cmd += cmdlen) { 3004 cmdlen = F_LEN(cmd); 3005 if (cmdlen > l) { 3006 printf("ipfw: opcode %d size truncated\n", 3007 cmd->opcode); 3008 return EINVAL; 3009 } 3010 DEB(printf("ipfw: opcode %d\n", cmd->opcode);) 3011 switch (cmd->opcode) { 3012 case O_PROBE_STATE: 3013 case O_KEEP_STATE: 3014 case O_PROTO: 3015 case O_IP_SRC_ME: 3016 case O_IP_DST_ME: 3017 case O_LAYER2: 3018 case O_IN: 3019 case O_FRAG: 3020 case O_DIVERTED: 3021 case O_IPOPT: 3022 case O_IPTOS: 3023 case O_IPPRECEDENCE: 3024 case O_IPVER: 3025 case O_TCPWIN: 3026 case O_TCPFLAGS: 3027 case O_TCPOPTS: 3028 case O_ESTAB: 3029 case O_VERREVPATH: 3030 case O_VERSRCREACH: 3031 case O_ANTISPOOF: 3032 case O_IPSEC: 3033 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 3034 goto bad_size; 3035 break; 3036 3037 case O_UID: 3038 case O_GID: 3039 case O_JAIL: 3040 case O_IP_SRC: 3041 case O_IP_DST: 3042 case O_TCPSEQ: 3043 case O_TCPACK: 3044 case O_PROB: 3045 case O_ICMPTYPE: 3046 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) 3047 goto bad_size; 3048 break; 3049 3050 case O_LIMIT: 3051 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) 3052 goto bad_size; 3053 break; 3054 3055 case O_LOG: 3056 if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) 3057 goto bad_size; 3058 3059 ((ipfw_insn_log *)cmd)->log_left = 3060 ((ipfw_insn_log *)cmd)->max_log; 3061 3062 break; 3063 3064 case O_IP_SRC_MASK: 3065 case O_IP_DST_MASK: 3066 /* only odd command lengths */ 3067 if ( !(cmdlen & 1) || cmdlen > 31) 3068 goto bad_size; 3069 break; 3070 3071 case O_IP_SRC_SET: 3072 case O_IP_DST_SET: 3073 if (cmd->arg1 == 0 || cmd->arg1 > 256) { 3074 printf("ipfw: invalid set size %d\n", 3075 cmd->arg1); 3076 return EINVAL; 3077 } 3078 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 3079 (cmd->arg1+31)/32 ) 3080 goto bad_size; 3081 break; 3082 3083 case O_IP_SRC_LOOKUP: 3084 case O_IP_DST_LOOKUP: 3085 if (cmd->arg1 >= IPFW_TABLES_MAX) { 3086 printf("ipfw: invalid table number %d\n", 3087 cmd->arg1); 3088 return (EINVAL); 3089 } 3090 if (cmdlen != F_INSN_SIZE(ipfw_insn) && 3091 cmdlen != F_INSN_SIZE(ipfw_insn_u32)) 3092 goto bad_size; 3093 break; 3094 3095 case O_MACADDR2: 3096 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) 3097 goto bad_size; 3098 break; 3099 3100 case O_NOP: 3101 case O_IPID: 3102 case O_IPTTL: 3103 case O_IPLEN: 3104 case O_TCPDATALEN: 3105 if (cmdlen < 1 || cmdlen > 31) 3106 goto bad_size; 3107 break; 3108 3109 case O_MAC_TYPE: 3110 case O_IP_SRCPORT: 3111 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ 3112 if (cmdlen < 2 || cmdlen > 31) 3113 goto bad_size; 3114 break; 3115 3116 case O_RECV: 3117 case O_XMIT: 3118 case O_VIA: 3119 if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) 3120 goto bad_size; 3121 break; 3122 3123 case O_ALTQ: 3124 if (cmdlen != F_INSN_SIZE(ipfw_insn_altq)) 3125 goto bad_size; 3126 break; 3127 3128 case O_PIPE: 3129 case O_QUEUE: 3130 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) 3131 goto bad_size; 3132 goto check_action; 3133 3134 case O_FORWARD_IP: 3135#ifdef IPFIREWALL_FORWARD 3136 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) 3137 goto bad_size; 3138 goto check_action; 3139#else 3140 return EINVAL; 3141#endif 3142 3143 case O_DIVERT: 3144 case O_TEE: 3145 if (ip_divert_ptr == NULL) 3146 return EINVAL; 3147 else 3148 goto check_size; 3149 case O_NETGRAPH: 3150 case O_NGTEE: 3151 if (!NG_IPFW_LOADED) 3152 return EINVAL; 3153 else 3154 goto check_size; 3155 case O_FORWARD_MAC: /* XXX not implemented yet */ 3156 case O_CHECK_STATE: 3157 case O_COUNT: 3158 case O_ACCEPT: 3159 case O_DENY: 3160 case O_REJECT: 3161 case O_SKIPTO: 3162check_size: 3163 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 3164 goto bad_size; 3165check_action: 3166 if (have_action) { 3167 printf("ipfw: opcode %d, multiple actions" 3168 " not allowed\n", 3169 cmd->opcode); 3170 return EINVAL; 3171 } 3172 have_action = 1; 3173 if (l != cmdlen) { 3174 printf("ipfw: opcode %d, action must be" 3175 " last opcode\n", 3176 cmd->opcode); 3177 return EINVAL; 3178 } 3179 break; 3180 default: 3181 printf("ipfw: opcode %d, unknown opcode\n", 3182 cmd->opcode); 3183 return EINVAL; 3184 } 3185 } 3186 if (have_action == 0) { 3187 printf("ipfw: missing action\n"); 3188 return EINVAL; 3189 } 3190 return 0; 3191 3192bad_size: 3193 printf("ipfw: opcode %d size %d wrong\n", 3194 cmd->opcode, cmdlen); 3195 return EINVAL; 3196} 3197 3198/* 3199 * Copy the static and dynamic rules to the supplied buffer 3200 * and return the amount of space actually used. 3201 */ 3202static size_t 3203ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) 3204{ 3205 char *bp = buf; 3206 char *ep = bp + space; 3207 struct ip_fw *rule; 3208 int i; 3209 3210 /* XXX this can take a long time and locking will block packet flow */ 3211 IPFW_RLOCK(chain); 3212 for (rule = chain->rules; rule ; rule = rule->next) { 3213 /* 3214 * Verify the entry fits in the buffer in case the 3215 * rules changed between calculating buffer space and 3216 * now. This would be better done using a generation 3217 * number but should suffice for now. 3218 */ 3219 i = RULESIZE(rule); 3220 if (bp + i <= ep) { 3221 bcopy(rule, bp, i); 3222 bcopy(&set_disable, &(((struct ip_fw *)bp)->next_rule), 3223 sizeof(set_disable)); 3224 bp += i; 3225 } 3226 } 3227 IPFW_RUNLOCK(chain); 3228 if (ipfw_dyn_v) { 3229 ipfw_dyn_rule *p, *last = NULL; 3230 3231 IPFW_DYN_LOCK(); 3232 for (i = 0 ; i < curr_dyn_buckets; i++) 3233 for (p = ipfw_dyn_v[i] ; p != NULL; p = p->next) { 3234 if (bp + sizeof *p <= ep) { 3235 ipfw_dyn_rule *dst = 3236 (ipfw_dyn_rule *)bp; 3237 bcopy(p, dst, sizeof *p); 3238 bcopy(&(p->rule->rulenum), &(dst->rule), 3239 sizeof(p->rule->rulenum)); 3240 /* 3241 * store a non-null value in "next". 3242 * The userland code will interpret a 3243 * NULL here as a marker 3244 * for the last dynamic rule. 3245 */ 3246 bcopy(&dst, &dst->next, sizeof(dst)); 3247 last = dst; 3248 dst->expire = 3249 TIME_LEQ(dst->expire, time_second) ? 3250 0 : dst->expire - time_second ; 3251 bp += sizeof(ipfw_dyn_rule); 3252 } 3253 } 3254 IPFW_DYN_UNLOCK(); 3255 if (last != NULL) /* mark last dynamic rule */ 3256 bzero(&last->next, sizeof(last)); 3257 } 3258 return (bp - (char *)buf); 3259} 3260 3261 3262/** 3263 * {set|get}sockopt parser. 3264 */ 3265static int 3266ipfw_ctl(struct sockopt *sopt) 3267{ 3268#define RULE_MAXSIZE (256*sizeof(u_int32_t)) 3269 int error, rule_num; 3270 size_t size; 3271 struct ip_fw *buf, *rule; 3272 u_int32_t rulenum[2]; 3273 3274 error = suser(sopt->sopt_td); 3275 if (error) 3276 return (error); 3277 3278 /* 3279 * Disallow modifications in really-really secure mode, but still allow 3280 * the logging counters to be reset. 3281 */ 3282 if (sopt->sopt_name == IP_FW_ADD || 3283 (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) { 3284#if __FreeBSD_version >= 500034 3285 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); 3286 if (error) 3287 return (error); 3288#else /* FreeBSD 4.x */ 3289 if (securelevel >= 3) 3290 return (EPERM); 3291#endif 3292 } 3293 3294 error = 0; 3295 3296 switch (sopt->sopt_name) { 3297 case IP_FW_GET: 3298 /* 3299 * pass up a copy of the current rules. Static rules 3300 * come first (the last of which has number IPFW_DEFAULT_RULE), 3301 * followed by a possibly empty list of dynamic rule. 3302 * The last dynamic rule has NULL in the "next" field. 3303 * 3304 * Note that the calculated size is used to bound the 3305 * amount of data returned to the user. The rule set may 3306 * change between calculating the size and returning the 3307 * data in which case we'll just return what fits. 3308 */ 3309 size = static_len; /* size of static rules */ 3310 if (ipfw_dyn_v) /* add size of dyn.rules */ 3311 size += (dyn_count * sizeof(ipfw_dyn_rule)); 3312 3313 /* 3314 * XXX todo: if the user passes a short length just to know 3315 * how much room is needed, do not bother filling up the 3316 * buffer, just jump to the sooptcopyout. 3317 */ 3318 buf = malloc(size, M_TEMP, M_WAITOK); 3319 error = sooptcopyout(sopt, buf, 3320 ipfw_getrules(&layer3_chain, buf, size)); 3321 free(buf, M_TEMP); 3322 break; 3323 3324 case IP_FW_FLUSH: 3325 /* 3326 * Normally we cannot release the lock on each iteration. 3327 * We could do it here only because we start from the head all 3328 * the times so there is no risk of missing some entries. 3329 * On the other hand, the risk is that we end up with 3330 * a very inconsistent ruleset, so better keep the lock 3331 * around the whole cycle. 3332 * 3333 * XXX this code can be improved by resetting the head of 3334 * the list to point to the default rule, and then freeing 3335 * the old list without the need for a lock. 3336 */ 3337 3338 IPFW_WLOCK(&layer3_chain); 3339 layer3_chain.reap = NULL; 3340 free_chain(&layer3_chain, 0 /* keep default rule */); 3341 rule = layer3_chain.reap, layer3_chain.reap = NULL; 3342 IPFW_WUNLOCK(&layer3_chain); 3343 if (layer3_chain.reap != NULL) 3344 reap_rules(rule); 3345 break; 3346 3347 case IP_FW_ADD: 3348 rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK); 3349 error = sooptcopyin(sopt, rule, RULE_MAXSIZE, 3350 sizeof(struct ip_fw) ); 3351 if (error == 0) 3352 error = check_ipfw_struct(rule, sopt->sopt_valsize); 3353 if (error == 0) { 3354 error = add_rule(&layer3_chain, rule); 3355 size = RULESIZE(rule); 3356 if (!error && sopt->sopt_dir == SOPT_GET) 3357 error = sooptcopyout(sopt, rule, size); 3358 } 3359 free(rule, M_TEMP); 3360 break; 3361 3362 case IP_FW_DEL: 3363 /* 3364 * IP_FW_DEL is used for deleting single rules or sets, 3365 * and (ab)used to atomically manipulate sets. Argument size 3366 * is used to distinguish between the two: 3367 * sizeof(u_int32_t) 3368 * delete single rule or set of rules, 3369 * or reassign rules (or sets) to a different set. 3370 * 2*sizeof(u_int32_t) 3371 * atomic disable/enable sets. 3372 * first u_int32_t contains sets to be disabled, 3373 * second u_int32_t contains sets to be enabled. 3374 */ 3375 error = sooptcopyin(sopt, rulenum, 3376 2*sizeof(u_int32_t), sizeof(u_int32_t)); 3377 if (error) 3378 break; 3379 size = sopt->sopt_valsize; 3380 if (size == sizeof(u_int32_t)) /* delete or reassign */ 3381 error = del_entry(&layer3_chain, rulenum[0]); 3382 else if (size == 2*sizeof(u_int32_t)) /* set enable/disable */ 3383 set_disable = 3384 (set_disable | rulenum[0]) & ~rulenum[1] & 3385 ~(1<<RESVD_SET); /* set RESVD_SET always enabled */ 3386 else 3387 error = EINVAL; 3388 break; 3389 3390 case IP_FW_ZERO: 3391 case IP_FW_RESETLOG: /* argument is an int, the rule number */ 3392 rule_num = 0; 3393 if (sopt->sopt_val != 0) { 3394 error = sooptcopyin(sopt, &rule_num, 3395 sizeof(int), sizeof(int)); 3396 if (error) 3397 break; 3398 } 3399 error = zero_entry(&layer3_chain, rule_num, 3400 sopt->sopt_name == IP_FW_RESETLOG); 3401 break; 3402 3403 case IP_FW_TABLE_ADD: 3404 { 3405 ipfw_table_entry ent; 3406 3407 error = sooptcopyin(sopt, &ent, 3408 sizeof(ent), sizeof(ent)); 3409 if (error) 3410 break; 3411 error = add_table_entry(ent.tbl, ent.addr, 3412 ent.masklen, ent.value); 3413 } 3414 break; 3415 3416 case IP_FW_TABLE_DEL: 3417 { 3418 ipfw_table_entry ent; 3419 3420 error = sooptcopyin(sopt, &ent, 3421 sizeof(ent), sizeof(ent)); 3422 if (error) 3423 break; 3424 error = del_table_entry(ent.tbl, ent.addr, ent.masklen); 3425 } 3426 break; 3427 3428 case IP_FW_TABLE_FLUSH: 3429 { 3430 u_int16_t tbl; 3431 3432 error = sooptcopyin(sopt, &tbl, 3433 sizeof(tbl), sizeof(tbl)); 3434 if (error) 3435 break; 3436 error = flush_table(tbl); 3437 } 3438 break; 3439 3440 case IP_FW_TABLE_GETSIZE: 3441 { 3442 u_int32_t tbl, cnt; 3443 3444 if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl), 3445 sizeof(tbl)))) 3446 break; 3447 if ((error = count_table(tbl, &cnt))) 3448 break; 3449 error = sooptcopyout(sopt, &cnt, sizeof(cnt)); 3450 } 3451 break; 3452 3453 case IP_FW_TABLE_LIST: 3454 { 3455 ipfw_table *tbl; 3456 3457 if (sopt->sopt_valsize < sizeof(*tbl)) { 3458 error = EINVAL; 3459 break; 3460 } 3461 size = sopt->sopt_valsize; 3462 tbl = malloc(size, M_TEMP, M_WAITOK); 3463 if (tbl == NULL) { 3464 error = ENOMEM; 3465 break; 3466 } 3467 error = sooptcopyin(sopt, tbl, size, sizeof(*tbl)); 3468 if (error) { 3469 free(tbl, M_TEMP); 3470 break; 3471 } 3472 tbl->size = (size - sizeof(*tbl)) / 3473 sizeof(ipfw_table_entry); 3474 error = dump_table(tbl); 3475 if (error) { 3476 free(tbl, M_TEMP); 3477 break; 3478 } 3479 error = sooptcopyout(sopt, tbl, size); 3480 free(tbl, M_TEMP); 3481 } 3482 break; 3483 3484 default: 3485 printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name); 3486 error = EINVAL; 3487 } 3488 3489 return (error); 3490#undef RULE_MAXSIZE 3491} 3492 3493/** 3494 * dummynet needs a reference to the default rule, because rules can be 3495 * deleted while packets hold a reference to them. When this happens, 3496 * dummynet changes the reference to the default rule (it could well be a 3497 * NULL pointer, but this way we do not need to check for the special 3498 * case, plus here he have info on the default behaviour). 3499 */ 3500struct ip_fw *ip_fw_default_rule; 3501 3502/* 3503 * This procedure is only used to handle keepalives. It is invoked 3504 * every dyn_keepalive_period 3505 */ 3506static void 3507ipfw_tick(void * __unused unused) 3508{ 3509 int i; 3510 ipfw_dyn_rule *q; 3511 3512 if (dyn_keepalive == 0 || ipfw_dyn_v == NULL || dyn_count == 0) 3513 goto done; 3514 3515 IPFW_DYN_LOCK(); 3516 for (i = 0 ; i < curr_dyn_buckets ; i++) { 3517 for (q = ipfw_dyn_v[i] ; q ; q = q->next ) { 3518 if (q->dyn_type == O_LIMIT_PARENT) 3519 continue; 3520 if (q->id.proto != IPPROTO_TCP) 3521 continue; 3522 if ( (q->state & BOTH_SYN) != BOTH_SYN) 3523 continue; 3524 if (TIME_LEQ( time_second+dyn_keepalive_interval, 3525 q->expire)) 3526 continue; /* too early */ 3527 if (TIME_LEQ(q->expire, time_second)) 3528 continue; /* too late, rule expired */ 3529 3530 send_pkt(&(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN); 3531 send_pkt(&(q->id), q->ack_fwd - 1, q->ack_rev, 0); 3532 } 3533 } 3534 IPFW_DYN_UNLOCK(); 3535done: 3536 callout_reset(&ipfw_timeout, dyn_keepalive_period*hz, ipfw_tick, NULL); 3537} 3538 3539int 3540ipfw_init(void) 3541{ 3542 struct ip_fw default_rule; 3543 int error; 3544 3545 layer3_chain.rules = NULL; 3546 layer3_chain.want_write = 0; 3547 layer3_chain.busy_count = 0; 3548 cv_init(&layer3_chain.cv, "Condition variable for IPFW rw locks"); 3549 IPFW_LOCK_INIT(&layer3_chain); 3550 ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule zone", 3551 sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, 3552 UMA_ALIGN_PTR, 0); 3553 IPFW_DYN_LOCK_INIT(); 3554 callout_init(&ipfw_timeout, NET_CALLOUT_MPSAFE); 3555 3556 bzero(&default_rule, sizeof default_rule); 3557 3558 default_rule.act_ofs = 0; 3559 default_rule.rulenum = IPFW_DEFAULT_RULE; 3560 default_rule.cmd_len = 1; 3561 default_rule.set = RESVD_SET; 3562 3563 default_rule.cmd[0].len = 1; 3564 default_rule.cmd[0].opcode = 3565#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 3566 1 ? O_ACCEPT : 3567#endif 3568 O_DENY; 3569 3570 error = add_rule(&layer3_chain, &default_rule); 3571 if (error != 0) { 3572 printf("ipfw2: error %u initializing default rule " 3573 "(support disabled)\n", error); 3574 IPFW_DYN_LOCK_DESTROY(); 3575 IPFW_LOCK_DESTROY(&layer3_chain); 3576 return (error); 3577 } 3578 3579 ip_fw_default_rule = layer3_chain.rules; 3580 printf("ipfw2 initialized, divert %s, " 3581 "rule-based forwarding " 3582#ifdef IPFIREWALL_FORWARD 3583 "enabled, " 3584#else 3585 "disabled, " 3586#endif 3587 "default to %s, logging ", 3588#ifdef IPDIVERT 3589 "enabled", 3590#else 3591 "loadable", 3592#endif 3593 default_rule.cmd[0].opcode == O_ACCEPT ? "accept" : "deny"); 3594 3595#ifdef IPFIREWALL_VERBOSE 3596 fw_verbose = 1; 3597#endif 3598#ifdef IPFIREWALL_VERBOSE_LIMIT 3599 verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 3600#endif 3601 if (fw_verbose == 0) 3602 printf("disabled\n"); 3603 else if (verbose_limit == 0) 3604 printf("unlimited\n"); 3605 else 3606 printf("limited to %d packets/entry by default\n", 3607 verbose_limit); 3608 3609 init_tables(); 3610 ip_fw_ctl_ptr = ipfw_ctl; 3611 ip_fw_chk_ptr = ipfw_chk; 3612 callout_reset(&ipfw_timeout, hz, ipfw_tick, NULL); 3613 3614 return (0); 3615} 3616 3617void 3618ipfw_destroy(void) 3619{ 3620 struct ip_fw *reap; 3621 3622 ip_fw_chk_ptr = NULL; 3623 ip_fw_ctl_ptr = NULL; 3624 callout_drain(&ipfw_timeout); 3625 IPFW_WLOCK(&layer3_chain); 3626 layer3_chain.reap = NULL; 3627 free_chain(&layer3_chain, 1 /* kill default rule */); 3628 reap = layer3_chain.reap, layer3_chain.reap = NULL; 3629 IPFW_WUNLOCK(&layer3_chain); 3630 if (reap != NULL) 3631 reap_rules(reap); 3632 flush_tables(); 3633 IPFW_DYN_LOCK_DESTROY(); 3634 uma_zdestroy(ipfw_dyn_rule_zone); 3635 IPFW_LOCK_DESTROY(&layer3_chain); 3636 printf("IP firewall unloaded\n"); 3637} 3638 3639#endif /* IPFW2 */ 3640