ip_fw2.c revision 147758
1/*- 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: head/sys/netinet/ip_fw2.c 147758 2005-07-03 15:42:22Z mlaier $ 26 */ 27 28#define DEB(x) 29#define DDB(x) x 30 31/* 32 * Implement IP packet firewall (new version) 33 */ 34 35#if !defined(KLD_MODULE) 36#include "opt_ipfw.h" 37#include "opt_ipdn.h" 38#include "opt_inet.h" 39#include "opt_inet6.h" 40#include "opt_ipsec.h" 41#ifndef INET 42#error IPFIREWALL requires INET. 43#endif /* INET */ 44#endif 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/condvar.h> 49#include <sys/malloc.h> 50#include <sys/mbuf.h> 51#include <sys/kernel.h> 52#include <sys/jail.h> 53#include <sys/module.h> 54#include <sys/proc.h> 55#include <sys/socket.h> 56#include <sys/socketvar.h> 57#include <sys/sysctl.h> 58#include <sys/syslog.h> 59#include <sys/ucred.h> 60#include <net/if.h> 61#include <net/radix.h> 62#include <net/route.h> 63#include <netinet/in.h> 64#include <netinet/in_systm.h> 65#include <netinet/in_var.h> 66#include <netinet/in_pcb.h> 67#include <netinet/ip.h> 68#include <netinet/ip_var.h> 69#include <netinet/ip_icmp.h> 70#include <netinet/ip_fw.h> 71#include <netinet/ip_divert.h> 72#include <netinet/ip_dummynet.h> 73#include <netinet/tcp.h> 74#include <netinet/tcp_timer.h> 75#include <netinet/tcp_var.h> 76#include <netinet/tcpip.h> 77#include <netinet/udp.h> 78#include <netinet/udp_var.h> 79 80#include <netgraph/ng_ipfw.h> 81 82#include <altq/if_altq.h> 83 84#ifdef IPSEC 85#include <netinet6/ipsec.h> 86#endif 87 88#include <netinet/ip6.h> 89#include <netinet/icmp6.h> 90 91#include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */ 92 93#include <machine/in_cksum.h> /* XXX for in_cksum */ 94 95/* 96 * set_disable contains one bit per set value (0..31). 97 * If the bit is set, all rules with the corresponding set 98 * are disabled. Set RESVD_SET(31) is reserved for the default rule 99 * and rules that are not deleted by the flush command, 100 * and CANNOT be disabled. 101 * Rules in set RESVD_SET can only be deleted explicitly. 102 */ 103static u_int32_t set_disable; 104 105static int fw_verbose; 106static int verbose_limit; 107 108static struct callout ipfw_timeout; 109static uma_zone_t ipfw_dyn_rule_zone; 110#define IPFW_DEFAULT_RULE 65535 111 112/* 113 * Data structure to cache our ucred related 114 * information. This structure only gets used if 115 * the user specified UID/GID based constraints in 116 * a firewall rule. 117 */ 118struct ip_fw_ugid { 119 gid_t fw_groups[NGROUPS]; 120 int fw_ngroups; 121 uid_t fw_uid; 122 int fw_prid; 123}; 124 125struct ip_fw_chain { 126 struct ip_fw *rules; /* list of rules */ 127 struct ip_fw *reap; /* list of rules to reap */ 128 struct mtx mtx; /* lock guarding rule list */ 129 int busy_count; /* busy count for rw locks */ 130 int want_write; 131 struct cv cv; 132}; 133#define IPFW_LOCK_INIT(_chain) \ 134 mtx_init(&(_chain)->mtx, "IPFW static rules", NULL, \ 135 MTX_DEF | MTX_RECURSE) 136#define IPFW_LOCK_DESTROY(_chain) mtx_destroy(&(_chain)->mtx) 137#define IPFW_WLOCK_ASSERT(_chain) do { \ 138 mtx_assert(&(_chain)->mtx, MA_OWNED); \ 139 NET_ASSERT_GIANT(); \ 140} while (0) 141 142static __inline void 143IPFW_RLOCK(struct ip_fw_chain *chain) 144{ 145 mtx_lock(&chain->mtx); 146 chain->busy_count++; 147 mtx_unlock(&chain->mtx); 148} 149 150static __inline void 151IPFW_RUNLOCK(struct ip_fw_chain *chain) 152{ 153 mtx_lock(&chain->mtx); 154 chain->busy_count--; 155 if (chain->busy_count == 0 && chain->want_write) 156 cv_signal(&chain->cv); 157 mtx_unlock(&chain->mtx); 158} 159 160static __inline void 161IPFW_WLOCK(struct ip_fw_chain *chain) 162{ 163 mtx_lock(&chain->mtx); 164 chain->want_write++; 165 while (chain->busy_count > 0) 166 cv_wait(&chain->cv, &chain->mtx); 167} 168 169static __inline void 170IPFW_WUNLOCK(struct ip_fw_chain *chain) 171{ 172 chain->want_write--; 173 cv_signal(&chain->cv); 174 mtx_unlock(&chain->mtx); 175} 176 177/* 178 * list of rules for layer 3 179 */ 180static struct ip_fw_chain layer3_chain; 181 182MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); 183MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); 184 185struct table_entry { 186 struct radix_node rn[2]; 187 struct sockaddr_in addr, mask; 188 u_int32_t value; 189}; 190 191#define IPFW_TABLES_MAX 128 192static struct ip_fw_table { 193 struct radix_node_head *rnh; 194 int modified; 195 in_addr_t last_addr; 196 int last_match; 197 u_int32_t last_value; 198} ipfw_tables[IPFW_TABLES_MAX]; 199 200static int fw_debug = 1; 201static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ 202 203#ifdef SYSCTL_NODE 204SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 205SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, enable, 206 CTLFLAG_RW | CTLFLAG_SECURE3, 207 &fw_enable, 0, "Enable ipfw"); 208SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW, 209 &autoinc_step, 0, "Rule number autincrement step"); 210SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass, 211 CTLFLAG_RW | CTLFLAG_SECURE3, 212 &fw_one_pass, 0, 213 "Only do a single pass through ipfw when using dummynet(4)"); 214SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, 215 &fw_debug, 0, "Enable printing of debug ip_fw statements"); 216SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, 217 CTLFLAG_RW | CTLFLAG_SECURE3, 218 &fw_verbose, 0, "Log matches to ipfw rules"); 219SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, 220 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); 221 222/* 223 * Description of dynamic rules. 224 * 225 * Dynamic rules are stored in lists accessed through a hash table 226 * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can 227 * be modified through the sysctl variable dyn_buckets which is 228 * updated when the table becomes empty. 229 * 230 * XXX currently there is only one list, ipfw_dyn. 231 * 232 * When a packet is received, its address fields are first masked 233 * with the mask defined for the rule, then hashed, then matched 234 * against the entries in the corresponding list. 235 * Dynamic rules can be used for different purposes: 236 * + stateful rules; 237 * + enforcing limits on the number of sessions; 238 * + in-kernel NAT (not implemented yet) 239 * 240 * The lifetime of dynamic rules is regulated by dyn_*_lifetime, 241 * measured in seconds and depending on the flags. 242 * 243 * The total number of dynamic rules is stored in dyn_count. 244 * The max number of dynamic rules is dyn_max. When we reach 245 * the maximum number of rules we do not create anymore. This is 246 * done to avoid consuming too much memory, but also too much 247 * time when searching on each packet (ideally, we should try instead 248 * to put a limit on the length of the list on each bucket...). 249 * 250 * Each dynamic rule holds a pointer to the parent ipfw rule so 251 * we know what action to perform. Dynamic rules are removed when 252 * the parent rule is deleted. XXX we should make them survive. 253 * 254 * There are some limitations with dynamic rules -- we do not 255 * obey the 'randomized match', and we do not do multiple 256 * passes through the firewall. XXX check the latter!!! 257 */ 258static ipfw_dyn_rule **ipfw_dyn_v = NULL; 259static u_int32_t dyn_buckets = 256; /* must be power of 2 */ 260static u_int32_t curr_dyn_buckets = 256; /* must be power of 2 */ 261 262static struct mtx ipfw_dyn_mtx; /* mutex guarding dynamic rules */ 263#define IPFW_DYN_LOCK_INIT() \ 264 mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF) 265#define IPFW_DYN_LOCK_DESTROY() mtx_destroy(&ipfw_dyn_mtx) 266#define IPFW_DYN_LOCK() mtx_lock(&ipfw_dyn_mtx) 267#define IPFW_DYN_UNLOCK() mtx_unlock(&ipfw_dyn_mtx) 268#define IPFW_DYN_LOCK_ASSERT() mtx_assert(&ipfw_dyn_mtx, MA_OWNED) 269 270/* 271 * Timeouts for various events in handing dynamic rules. 272 */ 273static u_int32_t dyn_ack_lifetime = 300; 274static u_int32_t dyn_syn_lifetime = 20; 275static u_int32_t dyn_fin_lifetime = 1; 276static u_int32_t dyn_rst_lifetime = 1; 277static u_int32_t dyn_udp_lifetime = 10; 278static u_int32_t dyn_short_lifetime = 5; 279 280/* 281 * Keepalives are sent if dyn_keepalive is set. They are sent every 282 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 283 * seconds of lifetime of a rule. 284 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower 285 * than dyn_keepalive_period. 286 */ 287 288static u_int32_t dyn_keepalive_interval = 20; 289static u_int32_t dyn_keepalive_period = 5; 290static u_int32_t dyn_keepalive = 1; /* do send keepalives */ 291 292static u_int32_t static_count; /* # of static rules */ 293static u_int32_t static_len; /* size in bytes of static rules */ 294static u_int32_t dyn_count; /* # of dynamic rules */ 295static u_int32_t dyn_max = 4096; /* max # of dynamic rules */ 296 297SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW, 298 &dyn_buckets, 0, "Number of dyn. buckets"); 299SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD, 300 &curr_dyn_buckets, 0, "Current Number of dyn. buckets"); 301SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD, 302 &dyn_count, 0, "Number of dyn. rules"); 303SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW, 304 &dyn_max, 0, "Max number of dyn. rules"); 305SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, 306 &static_count, 0, "Number of static rules"); 307SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, 308 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); 309SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, 310 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); 311SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, 312 &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin"); 313SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, 314 &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst"); 315SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, 316 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); 317SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, 318 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); 319SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, 320 &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); 321 322#endif /* SYSCTL_NODE */ 323 324 325/* 326 * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T 327 * Other macros just cast void * into the appropriate type 328 */ 329#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) 330#define TCP(p) ((struct tcphdr *)(p)) 331#define UDP(p) ((struct udphdr *)(p)) 332#define ICMP(p) ((struct icmphdr *)(p)) 333#define ICMP6(p) ((struct icmp6_hdr *)(p)) 334 335static __inline int 336icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd) 337{ 338 int type = icmp->icmp_type; 339 340 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) ); 341} 342 343#define TT ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \ 344 (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) ) 345 346static int 347is_icmp_query(struct icmphdr *icmp) 348{ 349 int type = icmp->icmp_type; 350 351 return (type <= ICMP_MAXTYPE && (TT & (1<<type)) ); 352} 353#undef TT 354 355/* 356 * The following checks use two arrays of 8 or 16 bits to store the 357 * bits that we want set or clear, respectively. They are in the 358 * low and high half of cmd->arg1 or cmd->d[0]. 359 * 360 * We scan options and store the bits we find set. We succeed if 361 * 362 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 363 * 364 * The code is sometimes optimized not to store additional variables. 365 */ 366 367static int 368flags_match(ipfw_insn *cmd, u_int8_t bits) 369{ 370 u_char want_clear; 371 bits = ~bits; 372 373 if ( ((cmd->arg1 & 0xff) & bits) != 0) 374 return 0; /* some bits we want set were clear */ 375 want_clear = (cmd->arg1 >> 8) & 0xff; 376 if ( (want_clear & bits) != want_clear) 377 return 0; /* some bits we want clear were set */ 378 return 1; 379} 380 381static int 382ipopts_match(struct ip *ip, ipfw_insn *cmd) 383{ 384 int optlen, bits = 0; 385 u_char *cp = (u_char *)(ip + 1); 386 int x = (ip->ip_hl << 2) - sizeof (struct ip); 387 388 for (; x > 0; x -= optlen, cp += optlen) { 389 int opt = cp[IPOPT_OPTVAL]; 390 391 if (opt == IPOPT_EOL) 392 break; 393 if (opt == IPOPT_NOP) 394 optlen = 1; 395 else { 396 optlen = cp[IPOPT_OLEN]; 397 if (optlen <= 0 || optlen > x) 398 return 0; /* invalid or truncated */ 399 } 400 switch (opt) { 401 402 default: 403 break; 404 405 case IPOPT_LSRR: 406 bits |= IP_FW_IPOPT_LSRR; 407 break; 408 409 case IPOPT_SSRR: 410 bits |= IP_FW_IPOPT_SSRR; 411 break; 412 413 case IPOPT_RR: 414 bits |= IP_FW_IPOPT_RR; 415 break; 416 417 case IPOPT_TS: 418 bits |= IP_FW_IPOPT_TS; 419 break; 420 } 421 } 422 return (flags_match(cmd, bits)); 423} 424 425static int 426tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) 427{ 428 int optlen, bits = 0; 429 u_char *cp = (u_char *)(tcp + 1); 430 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 431 432 for (; x > 0; x -= optlen, cp += optlen) { 433 int opt = cp[0]; 434 if (opt == TCPOPT_EOL) 435 break; 436 if (opt == TCPOPT_NOP) 437 optlen = 1; 438 else { 439 optlen = cp[1]; 440 if (optlen <= 0) 441 break; 442 } 443 444 switch (opt) { 445 446 default: 447 break; 448 449 case TCPOPT_MAXSEG: 450 bits |= IP_FW_TCPOPT_MSS; 451 break; 452 453 case TCPOPT_WINDOW: 454 bits |= IP_FW_TCPOPT_WINDOW; 455 break; 456 457 case TCPOPT_SACK_PERMITTED: 458 case TCPOPT_SACK: 459 bits |= IP_FW_TCPOPT_SACK; 460 break; 461 462 case TCPOPT_TIMESTAMP: 463 bits |= IP_FW_TCPOPT_TS; 464 break; 465 466 } 467 } 468 return (flags_match(cmd, bits)); 469} 470 471static int 472iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) 473{ 474 if (ifp == NULL) /* no iface with this packet, match fails */ 475 return 0; 476 /* Check by name or by IP address */ 477 if (cmd->name[0] != '\0') { /* match by name */ 478 /* Check name */ 479 if (cmd->p.glob) { 480 if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) 481 return(1); 482 } else { 483 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 484 return(1); 485 } 486 } else { 487 struct ifaddr *ia; 488 489 /* XXX lock? */ 490 TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { 491 if (ia->ifa_addr == NULL) 492 continue; 493 if (ia->ifa_addr->sa_family != AF_INET) 494 continue; 495 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 496 (ia->ifa_addr))->sin_addr.s_addr) 497 return(1); /* match */ 498 } 499 } 500 return(0); /* no match, fail ... */ 501} 502 503/* 504 * The verify_path function checks if a route to the src exists and 505 * if it is reachable via ifp (when provided). 506 * 507 * The 'verrevpath' option checks that the interface that an IP packet 508 * arrives on is the same interface that traffic destined for the 509 * packet's source address would be routed out of. The 'versrcreach' 510 * option just checks that the source address is reachable via any route 511 * (except default) in the routing table. These two are a measure to block 512 * forged packets. This is also commonly known as "anti-spoofing" or Unicast 513 * Reverse Path Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs 514 * is purposely reminiscent of the Cisco IOS command, 515 * 516 * ip verify unicast reverse-path 517 * ip verify unicast source reachable-via any 518 * 519 * which implements the same functionality. But note that syntax is 520 * misleading. The check may be performed on all IP packets whether unicast, 521 * multicast, or broadcast. 522 */ 523static int 524verify_path(struct in_addr src, struct ifnet *ifp) 525{ 526 struct route ro; 527 struct sockaddr_in *dst; 528 529 bzero(&ro, sizeof(ro)); 530 531 dst = (struct sockaddr_in *)&(ro.ro_dst); 532 dst->sin_family = AF_INET; 533 dst->sin_len = sizeof(*dst); 534 dst->sin_addr = src; 535 rtalloc_ign(&ro, RTF_CLONING); 536 537 if (ro.ro_rt == NULL) 538 return 0; 539 540 /* if ifp is provided, check for equality with rtentry */ 541 if (ifp != NULL && ro.ro_rt->rt_ifp != ifp) { 542 RTFREE(ro.ro_rt); 543 return 0; 544 } 545 546 /* if no ifp provided, check if rtentry is not default route */ 547 if (ifp == NULL && 548 satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) { 549 RTFREE(ro.ro_rt); 550 return 0; 551 } 552 553 /* or if this is a blackhole/reject route */ 554 if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 555 RTFREE(ro.ro_rt); 556 return 0; 557 } 558 559 /* found valid route */ 560 RTFREE(ro.ro_rt); 561 return 1; 562} 563 564#ifdef INET6 565/* 566 * ipv6 specific rules here... 567 */ 568static __inline int 569icmp6type_match (int type, ipfw_insn_u32 *cmd) 570{ 571 return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) ); 572} 573 574static int 575flow6id_match( int curr_flow, ipfw_insn_u32 *cmd ) 576{ 577 int i; 578 for (i=0; i <= cmd->o.arg1; ++i ) 579 if (curr_flow == cmd->d[i] ) 580 return 1; 581 return 0; 582} 583 584/* support for IP6_*_ME opcodes */ 585static int 586search_ip6_addr_net (struct in6_addr * ip6_addr) 587{ 588 struct ifnet *mdc; 589 struct ifaddr *mdc2; 590 struct in6_ifaddr *fdm; 591 struct in6_addr copia; 592 593 TAILQ_FOREACH(mdc, &ifnet, if_link) 594 for (mdc2 = mdc->if_addrlist.tqh_first; mdc2; 595 mdc2 = mdc2->ifa_list.tqe_next) { 596 if (!mdc2->ifa_addr) 597 continue; 598 if (mdc2->ifa_addr->sa_family == AF_INET6) { 599 fdm = (struct in6_ifaddr *)mdc2; 600 copia = fdm->ia_addr.sin6_addr; 601 /* need for leaving scope_id in the sock_addr */ 602 in6_clearscope(&copia); 603 if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) 604 return 1; 605 } 606 } 607 return 0; 608} 609 610static int 611verify_path6(struct in6_addr *src, struct ifnet *ifp) 612{ 613 struct route_in6 ro; 614 struct sockaddr_in6 *dst; 615 616 bzero(&ro, sizeof(ro)); 617 618 dst = (struct sockaddr_in6 * )&(ro.ro_dst); 619 dst->sin6_family = AF_INET6; 620 dst->sin6_len = sizeof(*dst); 621 dst->sin6_addr = *src; 622 rtalloc_ign((struct route *)&ro, RTF_CLONING); 623 624 if (ro.ro_rt == NULL) 625 return 0; 626 627 /* if ifp is provided, check for equality with rtentry */ 628 if (ifp != NULL && ro.ro_rt->rt_ifp != ifp) { 629 RTFREE(ro.ro_rt); 630 return 0; 631 } 632 633 /* if no ifp provided, check if rtentry is not default route */ 634 if (ifp == NULL && 635 IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) { 636 RTFREE(ro.ro_rt); 637 return 0; 638 } 639 640 /* or if this is a blackhole/reject route */ 641 if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 642 RTFREE(ro.ro_rt); 643 return 0; 644 } 645 646 /* found valid route */ 647 RTFREE(ro.ro_rt); 648 return 1; 649 650} 651static __inline int 652hash_packet6(struct ipfw_flow_id *id) 653{ 654 u_int32_t i; 655 i = (id->dst_ip6.__u6_addr.__u6_addr32[0]) ^ 656 (id->dst_ip6.__u6_addr.__u6_addr32[1]) ^ 657 (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ 658 (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ 659 (id->dst_port) ^ (id->src_port) ^ (id->flow_id6); 660 return i; 661} 662/* end of ipv6 opcodes */ 663 664#endif /* INET6 */ 665 666static u_int64_t norule_counter; /* counter for ipfw_log(NULL...) */ 667 668#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 669#define SNP(buf) buf, sizeof(buf) 670 671/* 672 * We enter here when we have a rule with O_LOG. 673 * XXX this function alone takes about 2Kbytes of code! 674 */ 675static void 676ipfw_log(struct ip_fw *f, u_int hlen, struct ether_header *eh, 677 struct mbuf *m, struct ifnet *oif) 678{ 679 char *action; 680 int limit_reached = 0; 681 char action2[40], proto[48], fragment[28]; 682 683 fragment[0] = '\0'; 684 proto[0] = '\0'; 685 686 if (f == NULL) { /* bogus pkt */ 687 if (verbose_limit != 0 && norule_counter >= verbose_limit) 688 return; 689 norule_counter++; 690 if (norule_counter == verbose_limit) 691 limit_reached = verbose_limit; 692 action = "Refuse"; 693 } else { /* O_LOG is the first action, find the real one */ 694 ipfw_insn *cmd = ACTION_PTR(f); 695 ipfw_insn_log *l = (ipfw_insn_log *)cmd; 696 697 if (l->max_log != 0 && l->log_left == 0) 698 return; 699 l->log_left--; 700 if (l->log_left == 0) 701 limit_reached = l->max_log; 702 cmd += F_LEN(cmd); /* point to first action */ 703 if (cmd->opcode == O_ALTQ) { 704 ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; 705 706 snprintf(SNPARGS(action2, 0), "Altq %d", 707 altq->qid); 708 cmd += F_LEN(cmd); 709 } 710 if (cmd->opcode == O_PROB) 711 cmd += F_LEN(cmd); 712 713 action = action2; 714 switch (cmd->opcode) { 715 case O_DENY: 716 action = "Deny"; 717 break; 718 719 case O_REJECT: 720 if (cmd->arg1==ICMP_REJECT_RST) 721 action = "Reset"; 722 else if (cmd->arg1==ICMP_UNREACH_HOST) 723 action = "Reject"; 724 else 725 snprintf(SNPARGS(action2, 0), "Unreach %d", 726 cmd->arg1); 727 break; 728 729 case O_ACCEPT: 730 action = "Accept"; 731 break; 732 case O_COUNT: 733 action = "Count"; 734 break; 735 case O_DIVERT: 736 snprintf(SNPARGS(action2, 0), "Divert %d", 737 cmd->arg1); 738 break; 739 case O_TEE: 740 snprintf(SNPARGS(action2, 0), "Tee %d", 741 cmd->arg1); 742 break; 743 case O_SKIPTO: 744 snprintf(SNPARGS(action2, 0), "SkipTo %d", 745 cmd->arg1); 746 break; 747 case O_PIPE: 748 snprintf(SNPARGS(action2, 0), "Pipe %d", 749 cmd->arg1); 750 break; 751 case O_QUEUE: 752 snprintf(SNPARGS(action2, 0), "Queue %d", 753 cmd->arg1); 754 break; 755 case O_FORWARD_IP: { 756 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; 757 int len; 758 759 len = snprintf(SNPARGS(action2, 0), "Forward to %s", 760 inet_ntoa(sa->sa.sin_addr)); 761 if (sa->sa.sin_port) 762 snprintf(SNPARGS(action2, len), ":%d", 763 sa->sa.sin_port); 764 } 765 break; 766 case O_NETGRAPH: 767 snprintf(SNPARGS(action2, 0), "Netgraph %d", 768 cmd->arg1); 769 break; 770 case O_NGTEE: 771 snprintf(SNPARGS(action2, 0), "Ngtee %d", 772 cmd->arg1); 773 break; 774 default: 775 action = "UNKNOWN"; 776 break; 777 } 778 } 779 780 if (hlen == 0) { /* non-ip */ 781 snprintf(SNPARGS(proto, 0), "MAC"); 782 } else { 783 struct ip *ip = mtod(m, struct ip *); 784 /* these three are all aliases to the same thing */ 785 struct icmphdr *const icmp = L3HDR(struct icmphdr, ip); 786 struct tcphdr *const tcp = (struct tcphdr *)icmp; 787 struct udphdr *const udp = (struct udphdr *)icmp; 788 789 int ip_off, offset, ip_len; 790 791 int len; 792 793 if (eh != NULL) { /* layer 2 packets are as on the wire */ 794 ip_off = ntohs(ip->ip_off); 795 ip_len = ntohs(ip->ip_len); 796 } else { 797 ip_off = ip->ip_off; 798 ip_len = ip->ip_len; 799 } 800 offset = ip_off & IP_OFFMASK; 801 switch (ip->ip_p) { 802 case IPPROTO_TCP: 803 len = snprintf(SNPARGS(proto, 0), "TCP %s", 804 inet_ntoa(ip->ip_src)); 805 if (offset == 0) 806 snprintf(SNPARGS(proto, len), ":%d %s:%d", 807 ntohs(tcp->th_sport), 808 inet_ntoa(ip->ip_dst), 809 ntohs(tcp->th_dport)); 810 else 811 snprintf(SNPARGS(proto, len), " %s", 812 inet_ntoa(ip->ip_dst)); 813 break; 814 815 case IPPROTO_UDP: 816 len = snprintf(SNPARGS(proto, 0), "UDP %s", 817 inet_ntoa(ip->ip_src)); 818 if (offset == 0) 819 snprintf(SNPARGS(proto, len), ":%d %s:%d", 820 ntohs(udp->uh_sport), 821 inet_ntoa(ip->ip_dst), 822 ntohs(udp->uh_dport)); 823 else 824 snprintf(SNPARGS(proto, len), " %s", 825 inet_ntoa(ip->ip_dst)); 826 break; 827 828 case IPPROTO_ICMP: 829 if (offset == 0) 830 len = snprintf(SNPARGS(proto, 0), 831 "ICMP:%u.%u ", 832 icmp->icmp_type, icmp->icmp_code); 833 else 834 len = snprintf(SNPARGS(proto, 0), "ICMP "); 835 len += snprintf(SNPARGS(proto, len), "%s", 836 inet_ntoa(ip->ip_src)); 837 snprintf(SNPARGS(proto, len), " %s", 838 inet_ntoa(ip->ip_dst)); 839 break; 840 841 default: 842 len = snprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, 843 inet_ntoa(ip->ip_src)); 844 snprintf(SNPARGS(proto, len), " %s", 845 inet_ntoa(ip->ip_dst)); 846 break; 847 } 848 849 if (ip_off & (IP_MF | IP_OFFMASK)) 850 snprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", 851 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), 852 offset << 3, 853 (ip_off & IP_MF) ? "+" : ""); 854 } 855 if (oif || m->m_pkthdr.rcvif) 856 log(LOG_SECURITY | LOG_INFO, 857 "ipfw: %d %s %s %s via %s%s\n", 858 f ? f->rulenum : -1, 859 action, proto, oif ? "out" : "in", 860 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, 861 fragment); 862 else 863 log(LOG_SECURITY | LOG_INFO, 864 "ipfw: %d %s %s [no if info]%s\n", 865 f ? f->rulenum : -1, 866 action, proto, fragment); 867 if (limit_reached) 868 log(LOG_SECURITY | LOG_NOTICE, 869 "ipfw: limit %d reached on entry %d\n", 870 limit_reached, f ? f->rulenum : -1); 871} 872 873/* 874 * IMPORTANT: the hash function for dynamic rules must be commutative 875 * in source and destination (ip,port), because rules are bidirectional 876 * and we want to find both in the same bucket. 877 */ 878static __inline int 879hash_packet(struct ipfw_flow_id *id) 880{ 881 u_int32_t i; 882 883#ifdef INET6 884 if (IS_IP6_FLOW_ID(id)) 885 i = hash_packet6(id); 886 else 887#endif /* INET6 */ 888 i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); 889 i &= (curr_dyn_buckets - 1); 890 return i; 891} 892 893/** 894 * unlink a dynamic rule from a chain. prev is a pointer to 895 * the previous one, q is a pointer to the rule to delete, 896 * head is a pointer to the head of the queue. 897 * Modifies q and potentially also head. 898 */ 899#define UNLINK_DYN_RULE(prev, head, q) { \ 900 ipfw_dyn_rule *old_q = q; \ 901 \ 902 /* remove a refcount to the parent */ \ 903 if (q->dyn_type == O_LIMIT) \ 904 q->parent->count--; \ 905 DEB(printf("ipfw: unlink entry 0x%08x %d -> 0x%08x %d, %d left\n",\ 906 (q->id.src_ip), (q->id.src_port), \ 907 (q->id.dst_ip), (q->id.dst_port), dyn_count-1 ); ) \ 908 if (prev != NULL) \ 909 prev->next = q = q->next; \ 910 else \ 911 head = q = q->next; \ 912 dyn_count--; \ 913 uma_zfree(ipfw_dyn_rule_zone, old_q); } 914 915#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) 916 917/** 918 * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. 919 * 920 * If keep_me == NULL, rules are deleted even if not expired, 921 * otherwise only expired rules are removed. 922 * 923 * The value of the second parameter is also used to point to identify 924 * a rule we absolutely do not want to remove (e.g. because we are 925 * holding a reference to it -- this is the case with O_LIMIT_PARENT 926 * rules). The pointer is only used for comparison, so any non-null 927 * value will do. 928 */ 929static void 930remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) 931{ 932 static u_int32_t last_remove = 0; 933 934#define FORCE (keep_me == NULL) 935 936 ipfw_dyn_rule *prev, *q; 937 int i, pass = 0, max_pass = 0; 938 939 IPFW_DYN_LOCK_ASSERT(); 940 941 if (ipfw_dyn_v == NULL || dyn_count == 0) 942 return; 943 /* do not expire more than once per second, it is useless */ 944 if (!FORCE && last_remove == time_second) 945 return; 946 last_remove = time_second; 947 948 /* 949 * because O_LIMIT refer to parent rules, during the first pass only 950 * remove child and mark any pending LIMIT_PARENT, and remove 951 * them in a second pass. 952 */ 953next_pass: 954 for (i = 0 ; i < curr_dyn_buckets ; i++) { 955 for (prev=NULL, q = ipfw_dyn_v[i] ; q ; ) { 956 /* 957 * Logic can become complex here, so we split tests. 958 */ 959 if (q == keep_me) 960 goto next; 961 if (rule != NULL && rule != q->rule) 962 goto next; /* not the one we are looking for */ 963 if (q->dyn_type == O_LIMIT_PARENT) { 964 /* 965 * handle parent in the second pass, 966 * record we need one. 967 */ 968 max_pass = 1; 969 if (pass == 0) 970 goto next; 971 if (FORCE && q->count != 0 ) { 972 /* XXX should not happen! */ 973 printf("ipfw: OUCH! cannot remove rule," 974 " count %d\n", q->count); 975 } 976 } else { 977 if (!FORCE && 978 !TIME_LEQ( q->expire, time_second )) 979 goto next; 980 } 981 if (q->dyn_type != O_LIMIT_PARENT || !q->count) { 982 UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); 983 continue; 984 } 985next: 986 prev=q; 987 q=q->next; 988 } 989 } 990 if (pass++ < max_pass) 991 goto next_pass; 992} 993 994 995/** 996 * lookup a dynamic rule. 997 */ 998static ipfw_dyn_rule * 999lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction, 1000 struct tcphdr *tcp) 1001{ 1002 /* 1003 * stateful ipfw extensions. 1004 * Lookup into dynamic session queue 1005 */ 1006#define MATCH_REVERSE 0 1007#define MATCH_FORWARD 1 1008#define MATCH_NONE 2 1009#define MATCH_UNKNOWN 3 1010 int i, dir = MATCH_NONE; 1011 ipfw_dyn_rule *prev, *q=NULL; 1012 1013 IPFW_DYN_LOCK_ASSERT(); 1014 1015 if (ipfw_dyn_v == NULL) 1016 goto done; /* not found */ 1017 i = hash_packet( pkt ); 1018 for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) { 1019 if (q->dyn_type == O_LIMIT_PARENT && q->count) 1020 goto next; 1021 if (TIME_LEQ( q->expire, time_second)) { /* expire entry */ 1022 UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); 1023 continue; 1024 } 1025 if (pkt->proto == q->id.proto && 1026 q->dyn_type != O_LIMIT_PARENT) { 1027 if (IS_IP6_FLOW_ID(pkt)) { 1028 if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 1029 &(q->id.src_ip6)) && 1030 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 1031 &(q->id.dst_ip6)) && 1032 pkt->src_port == q->id.src_port && 1033 pkt->dst_port == q->id.dst_port ) { 1034 dir = MATCH_FORWARD; 1035 break; 1036 } 1037 if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 1038 &(q->id.dst_ip6)) && 1039 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 1040 &(q->id.src_ip6)) && 1041 pkt->src_port == q->id.dst_port && 1042 pkt->dst_port == q->id.src_port ) { 1043 dir = MATCH_REVERSE; 1044 break; 1045 } 1046 } else { 1047 if (pkt->src_ip == q->id.src_ip && 1048 pkt->dst_ip == q->id.dst_ip && 1049 pkt->src_port == q->id.src_port && 1050 pkt->dst_port == q->id.dst_port ) { 1051 dir = MATCH_FORWARD; 1052 break; 1053 } 1054 if (pkt->src_ip == q->id.dst_ip && 1055 pkt->dst_ip == q->id.src_ip && 1056 pkt->src_port == q->id.dst_port && 1057 pkt->dst_port == q->id.src_port ) { 1058 dir = MATCH_REVERSE; 1059 break; 1060 } 1061 } 1062 } 1063next: 1064 prev = q; 1065 q = q->next; 1066 } 1067 if (q == NULL) 1068 goto done; /* q = NULL, not found */ 1069 1070 if ( prev != NULL) { /* found and not in front */ 1071 prev->next = q->next; 1072 q->next = ipfw_dyn_v[i]; 1073 ipfw_dyn_v[i] = q; 1074 } 1075 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 1076 u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); 1077 1078#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 1079#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 1080 q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); 1081 switch (q->state) { 1082 case TH_SYN: /* opening */ 1083 q->expire = time_second + dyn_syn_lifetime; 1084 break; 1085 1086 case BOTH_SYN: /* move to established */ 1087 case BOTH_SYN | TH_FIN : /* one side tries to close */ 1088 case BOTH_SYN | (TH_FIN << 8) : 1089 if (tcp) { 1090#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) 1091 u_int32_t ack = ntohl(tcp->th_ack); 1092 if (dir == MATCH_FORWARD) { 1093 if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) 1094 q->ack_fwd = ack; 1095 else { /* ignore out-of-sequence */ 1096 break; 1097 } 1098 } else { 1099 if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) 1100 q->ack_rev = ack; 1101 else { /* ignore out-of-sequence */ 1102 break; 1103 } 1104 } 1105 } 1106 q->expire = time_second + dyn_ack_lifetime; 1107 break; 1108 1109 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 1110 if (dyn_fin_lifetime >= dyn_keepalive_period) 1111 dyn_fin_lifetime = dyn_keepalive_period - 1; 1112 q->expire = time_second + dyn_fin_lifetime; 1113 break; 1114 1115 default: 1116#if 0 1117 /* 1118 * reset or some invalid combination, but can also 1119 * occur if we use keep-state the wrong way. 1120 */ 1121 if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) 1122 printf("invalid state: 0x%x\n", q->state); 1123#endif 1124 if (dyn_rst_lifetime >= dyn_keepalive_period) 1125 dyn_rst_lifetime = dyn_keepalive_period - 1; 1126 q->expire = time_second + dyn_rst_lifetime; 1127 break; 1128 } 1129 } else if (pkt->proto == IPPROTO_UDP) { 1130 q->expire = time_second + dyn_udp_lifetime; 1131 } else { 1132 /* other protocols */ 1133 q->expire = time_second + dyn_short_lifetime; 1134 } 1135done: 1136 if (match_direction) 1137 *match_direction = dir; 1138 return q; 1139} 1140 1141static ipfw_dyn_rule * 1142lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, 1143 struct tcphdr *tcp) 1144{ 1145 ipfw_dyn_rule *q; 1146 1147 IPFW_DYN_LOCK(); 1148 q = lookup_dyn_rule_locked(pkt, match_direction, tcp); 1149 if (q == NULL) 1150 IPFW_DYN_UNLOCK(); 1151 /* NB: return table locked when q is not NULL */ 1152 return q; 1153} 1154 1155static void 1156realloc_dynamic_table(void) 1157{ 1158 IPFW_DYN_LOCK_ASSERT(); 1159 1160 /* 1161 * Try reallocation, make sure we have a power of 2 and do 1162 * not allow more than 64k entries. In case of overflow, 1163 * default to 1024. 1164 */ 1165 1166 if (dyn_buckets > 65536) 1167 dyn_buckets = 1024; 1168 if ((dyn_buckets & (dyn_buckets-1)) != 0) { /* not a power of 2 */ 1169 dyn_buckets = curr_dyn_buckets; /* reset */ 1170 return; 1171 } 1172 curr_dyn_buckets = dyn_buckets; 1173 if (ipfw_dyn_v != NULL) 1174 free(ipfw_dyn_v, M_IPFW); 1175 for (;;) { 1176 ipfw_dyn_v = malloc(curr_dyn_buckets * sizeof(ipfw_dyn_rule *), 1177 M_IPFW, M_NOWAIT | M_ZERO); 1178 if (ipfw_dyn_v != NULL || curr_dyn_buckets <= 2) 1179 break; 1180 curr_dyn_buckets /= 2; 1181 } 1182} 1183 1184/** 1185 * Install state of type 'type' for a dynamic session. 1186 * The hash table contains two type of rules: 1187 * - regular rules (O_KEEP_STATE) 1188 * - rules for sessions with limited number of sess per user 1189 * (O_LIMIT). When they are created, the parent is 1190 * increased by 1, and decreased on delete. In this case, 1191 * the third parameter is the parent rule and not the chain. 1192 * - "parent" rules for the above (O_LIMIT_PARENT). 1193 */ 1194static ipfw_dyn_rule * 1195add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) 1196{ 1197 ipfw_dyn_rule *r; 1198 int i; 1199 1200 IPFW_DYN_LOCK_ASSERT(); 1201 1202 if (ipfw_dyn_v == NULL || 1203 (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) { 1204 realloc_dynamic_table(); 1205 if (ipfw_dyn_v == NULL) 1206 return NULL; /* failed ! */ 1207 } 1208 i = hash_packet(id); 1209 1210 r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO); 1211 if (r == NULL) { 1212 printf ("ipfw: sorry cannot allocate state\n"); 1213 return NULL; 1214 } 1215 1216 /* increase refcount on parent, and set pointer */ 1217 if (dyn_type == O_LIMIT) { 1218 ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; 1219 if ( parent->dyn_type != O_LIMIT_PARENT) 1220 panic("invalid parent"); 1221 parent->count++; 1222 r->parent = parent; 1223 rule = parent->rule; 1224 } 1225 1226 r->id = *id; 1227 r->expire = time_second + dyn_syn_lifetime; 1228 r->rule = rule; 1229 r->dyn_type = dyn_type; 1230 r->pcnt = r->bcnt = 0; 1231 r->count = 0; 1232 1233 r->bucket = i; 1234 r->next = ipfw_dyn_v[i]; 1235 ipfw_dyn_v[i] = r; 1236 dyn_count++; 1237 DEB(printf("ipfw: add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n", 1238 dyn_type, 1239 (r->id.src_ip), (r->id.src_port), 1240 (r->id.dst_ip), (r->id.dst_port), 1241 dyn_count ); ) 1242 return r; 1243} 1244 1245/** 1246 * lookup dynamic parent rule using pkt and rule as search keys. 1247 * If the lookup fails, then install one. 1248 */ 1249static ipfw_dyn_rule * 1250lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) 1251{ 1252 ipfw_dyn_rule *q; 1253 int i; 1254 1255 IPFW_DYN_LOCK_ASSERT(); 1256 1257 if (ipfw_dyn_v) { 1258 int is_v6 = IS_IP6_FLOW_ID(pkt); 1259 i = hash_packet( pkt ); 1260 for (q = ipfw_dyn_v[i] ; q != NULL ; q=q->next) 1261 if (q->dyn_type == O_LIMIT_PARENT && 1262 rule== q->rule && 1263 pkt->proto == q->id.proto && 1264 pkt->src_port == q->id.src_port && 1265 pkt->dst_port == q->id.dst_port && 1266 ( 1267 (is_v6 && 1268 IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 1269 &(q->id.src_ip6)) && 1270 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 1271 &(q->id.dst_ip6))) || 1272 (!is_v6 && 1273 pkt->src_ip == q->id.src_ip && 1274 pkt->dst_ip == q->id.dst_ip) 1275 ) 1276 ) { 1277 q->expire = time_second + dyn_short_lifetime; 1278 DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);) 1279 return q; 1280 } 1281 } 1282 return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); 1283} 1284 1285/** 1286 * Install dynamic state for rule type cmd->o.opcode 1287 * 1288 * Returns 1 (failure) if state is not installed because of errors or because 1289 * session limitations are enforced. 1290 */ 1291static int 1292install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, 1293 struct ip_fw_args *args) 1294{ 1295 static int last_log; 1296 1297 ipfw_dyn_rule *q; 1298 1299 DEB(printf("ipfw: install state type %d 0x%08x %u -> 0x%08x %u\n", 1300 cmd->o.opcode, 1301 (args->f_id.src_ip), (args->f_id.src_port), 1302 (args->f_id.dst_ip), (args->f_id.dst_port) );) 1303 1304 IPFW_DYN_LOCK(); 1305 1306 q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL); 1307 1308 if (q != NULL) { /* should never occur */ 1309 if (last_log != time_second) { 1310 last_log = time_second; 1311 printf("ipfw: install_state: entry already present, done\n"); 1312 } 1313 IPFW_DYN_UNLOCK(); 1314 return 0; 1315 } 1316 1317 if (dyn_count >= dyn_max) 1318 /* 1319 * Run out of slots, try to remove any expired rule. 1320 */ 1321 remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); 1322 1323 if (dyn_count >= dyn_max) { 1324 if (last_log != time_second) { 1325 last_log = time_second; 1326 printf("ipfw: install_state: Too many dynamic rules\n"); 1327 } 1328 IPFW_DYN_UNLOCK(); 1329 return 1; /* cannot install, notify caller */ 1330 } 1331 1332 switch (cmd->o.opcode) { 1333 case O_KEEP_STATE: /* bidir rule */ 1334 add_dyn_rule(&args->f_id, O_KEEP_STATE, rule); 1335 break; 1336 1337 case O_LIMIT: /* limit number of sessions */ 1338 { 1339 u_int16_t limit_mask = cmd->limit_mask; 1340 struct ipfw_flow_id id; 1341 ipfw_dyn_rule *parent; 1342 1343 DEB(printf("ipfw: installing dyn-limit rule %d\n", 1344 cmd->conn_limit);) 1345 1346 id.dst_ip = id.src_ip = 0; 1347 id.dst_port = id.src_port = 0; 1348 id.proto = args->f_id.proto; 1349 1350 if (IS_IP6_FLOW_ID (&(args->f_id))) { 1351 if (limit_mask & DYN_SRC_ADDR) 1352 id.src_ip6 = args->f_id.src_ip6; 1353 if (limit_mask & DYN_DST_ADDR) 1354 id.dst_ip6 = args->f_id.dst_ip6; 1355 } else { 1356 if (limit_mask & DYN_SRC_ADDR) 1357 id.src_ip = args->f_id.src_ip; 1358 if (limit_mask & DYN_DST_ADDR) 1359 id.dst_ip = args->f_id.dst_ip; 1360 } 1361 if (limit_mask & DYN_SRC_PORT) 1362 id.src_port = args->f_id.src_port; 1363 if (limit_mask & DYN_DST_PORT) 1364 id.dst_port = args->f_id.dst_port; 1365 parent = lookup_dyn_parent(&id, rule); 1366 if (parent == NULL) { 1367 printf("ipfw: add parent failed\n"); 1368 return 1; 1369 } 1370 if (parent->count >= cmd->conn_limit) { 1371 /* 1372 * See if we can remove some expired rule. 1373 */ 1374 remove_dyn_rule(rule, parent); 1375 if (parent->count >= cmd->conn_limit) { 1376 if (fw_verbose && last_log != time_second) { 1377 last_log = time_second; 1378 log(LOG_SECURITY | LOG_DEBUG, 1379 "drop session, too many entries\n"); 1380 } 1381 IPFW_DYN_UNLOCK(); 1382 return 1; 1383 } 1384 } 1385 add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent); 1386 } 1387 break; 1388 default: 1389 printf("ipfw: unknown dynamic rule type %u\n", cmd->o.opcode); 1390 IPFW_DYN_UNLOCK(); 1391 return 1; 1392 } 1393 lookup_dyn_rule_locked(&args->f_id, NULL, NULL); /* XXX just set lifetime */ 1394 IPFW_DYN_UNLOCK(); 1395 return 0; 1396} 1397 1398/* 1399 * Generate a TCP packet, containing either a RST or a keepalive. 1400 * When flags & TH_RST, we are sending a RST packet, because of a 1401 * "reset" action matched the packet. 1402 * Otherwise we are sending a keepalive, and flags & TH_ 1403 */ 1404static struct mbuf * 1405send_pkt(struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags) 1406{ 1407 struct mbuf *m; 1408 struct ip *ip; 1409 struct tcphdr *tcp; 1410 1411 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1412 if (m == 0) 1413 return (NULL); 1414 m->m_pkthdr.rcvif = (struct ifnet *)0; 1415 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); 1416 m->m_data += max_linkhdr; 1417 1418 ip = mtod(m, struct ip *); 1419 bzero(ip, m->m_len); 1420 tcp = (struct tcphdr *)(ip + 1); /* no IP options */ 1421 ip->ip_p = IPPROTO_TCP; 1422 tcp->th_off = 5; 1423 /* 1424 * Assume we are sending a RST (or a keepalive in the reverse 1425 * direction), swap src and destination addresses and ports. 1426 */ 1427 ip->ip_src.s_addr = htonl(id->dst_ip); 1428 ip->ip_dst.s_addr = htonl(id->src_ip); 1429 tcp->th_sport = htons(id->dst_port); 1430 tcp->th_dport = htons(id->src_port); 1431 if (flags & TH_RST) { /* we are sending a RST */ 1432 if (flags & TH_ACK) { 1433 tcp->th_seq = htonl(ack); 1434 tcp->th_ack = htonl(0); 1435 tcp->th_flags = TH_RST; 1436 } else { 1437 if (flags & TH_SYN) 1438 seq++; 1439 tcp->th_seq = htonl(0); 1440 tcp->th_ack = htonl(seq); 1441 tcp->th_flags = TH_RST | TH_ACK; 1442 } 1443 } else { 1444 /* 1445 * We are sending a keepalive. flags & TH_SYN determines 1446 * the direction, forward if set, reverse if clear. 1447 * NOTE: seq and ack are always assumed to be correct 1448 * as set by the caller. This may be confusing... 1449 */ 1450 if (flags & TH_SYN) { 1451 /* 1452 * we have to rewrite the correct addresses! 1453 */ 1454 ip->ip_dst.s_addr = htonl(id->dst_ip); 1455 ip->ip_src.s_addr = htonl(id->src_ip); 1456 tcp->th_dport = htons(id->dst_port); 1457 tcp->th_sport = htons(id->src_port); 1458 } 1459 tcp->th_seq = htonl(seq); 1460 tcp->th_ack = htonl(ack); 1461 tcp->th_flags = TH_ACK; 1462 } 1463 /* 1464 * set ip_len to the payload size so we can compute 1465 * the tcp checksum on the pseudoheader 1466 * XXX check this, could save a couple of words ? 1467 */ 1468 ip->ip_len = htons(sizeof(struct tcphdr)); 1469 tcp->th_sum = in_cksum(m, m->m_pkthdr.len); 1470 /* 1471 * now fill fields left out earlier 1472 */ 1473 ip->ip_ttl = ip_defttl; 1474 ip->ip_len = m->m_pkthdr.len; 1475 m->m_flags |= M_SKIP_FIREWALL; 1476 return (m); 1477} 1478 1479/* 1480 * sends a reject message, consuming the mbuf passed as an argument. 1481 */ 1482static void 1483send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) 1484{ 1485 1486 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ 1487 /* We need the IP header in host order for icmp_error(). */ 1488 if (args->eh != NULL) { 1489 struct ip *ip = mtod(args->m, struct ip *); 1490 ip->ip_len = ntohs(ip->ip_len); 1491 ip->ip_off = ntohs(ip->ip_off); 1492 } 1493 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 1494 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { 1495 struct tcphdr *const tcp = 1496 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 1497 if ( (tcp->th_flags & TH_RST) == 0) { 1498 struct mbuf *m; 1499 m = send_pkt(&(args->f_id), ntohl(tcp->th_seq), 1500 ntohl(tcp->th_ack), 1501 tcp->th_flags | TH_RST); 1502 if (m != NULL) 1503 ip_output(m, NULL, NULL, 0, NULL, NULL); 1504 } 1505 m_freem(args->m); 1506 } else 1507 m_freem(args->m); 1508 args->m = NULL; 1509} 1510 1511/** 1512 * 1513 * Given an ip_fw *, lookup_next_rule will return a pointer 1514 * to the next rule, which can be either the jump 1515 * target (for skipto instructions) or the next one in the list (in 1516 * all other cases including a missing jump target). 1517 * The result is also written in the "next_rule" field of the rule. 1518 * Backward jumps are not allowed, so start looking from the next 1519 * rule... 1520 * 1521 * This never returns NULL -- in case we do not have an exact match, 1522 * the next rule is returned. When the ruleset is changed, 1523 * pointers are flushed so we are always correct. 1524 */ 1525 1526static struct ip_fw * 1527lookup_next_rule(struct ip_fw *me) 1528{ 1529 struct ip_fw *rule = NULL; 1530 ipfw_insn *cmd; 1531 1532 /* look for action, in case it is a skipto */ 1533 cmd = ACTION_PTR(me); 1534 if (cmd->opcode == O_LOG) 1535 cmd += F_LEN(cmd); 1536 if (cmd->opcode == O_ALTQ) 1537 cmd += F_LEN(cmd); 1538 if ( cmd->opcode == O_SKIPTO ) 1539 for (rule = me->next; rule ; rule = rule->next) 1540 if (rule->rulenum >= cmd->arg1) 1541 break; 1542 if (rule == NULL) /* failure or not a skipto */ 1543 rule = me->next; 1544 me->next_rule = rule; 1545 return rule; 1546} 1547 1548static void 1549init_tables(void) 1550{ 1551 int i; 1552 1553 for (i = 0; i < IPFW_TABLES_MAX; i++) { 1554 rn_inithead((void **)&ipfw_tables[i].rnh, 32); 1555 ipfw_tables[i].modified = 1; 1556 } 1557} 1558 1559static int 1560add_table_entry(u_int16_t tbl, in_addr_t addr, u_int8_t mlen, u_int32_t value) 1561{ 1562 struct radix_node_head *rnh; 1563 struct table_entry *ent; 1564 1565 if (tbl >= IPFW_TABLES_MAX) 1566 return (EINVAL); 1567 rnh = ipfw_tables[tbl].rnh; 1568 ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO); 1569 if (ent == NULL) 1570 return (ENOMEM); 1571 ent->value = value; 1572 ent->addr.sin_len = ent->mask.sin_len = 8; 1573 ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); 1574 ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr; 1575 RADIX_NODE_HEAD_LOCK(rnh); 1576 if (rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent) == 1577 NULL) { 1578 RADIX_NODE_HEAD_UNLOCK(rnh); 1579 free(ent, M_IPFW_TBL); 1580 return (EEXIST); 1581 } 1582 ipfw_tables[tbl].modified = 1; 1583 RADIX_NODE_HEAD_UNLOCK(rnh); 1584 return (0); 1585} 1586 1587static int 1588del_table_entry(u_int16_t tbl, in_addr_t addr, u_int8_t mlen) 1589{ 1590 struct radix_node_head *rnh; 1591 struct table_entry *ent; 1592 struct sockaddr_in sa, mask; 1593 1594 if (tbl >= IPFW_TABLES_MAX) 1595 return (EINVAL); 1596 rnh = ipfw_tables[tbl].rnh; 1597 sa.sin_len = mask.sin_len = 8; 1598 mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); 1599 sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr; 1600 RADIX_NODE_HEAD_LOCK(rnh); 1601 ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh); 1602 if (ent == NULL) { 1603 RADIX_NODE_HEAD_UNLOCK(rnh); 1604 return (ESRCH); 1605 } 1606 ipfw_tables[tbl].modified = 1; 1607 RADIX_NODE_HEAD_UNLOCK(rnh); 1608 free(ent, M_IPFW_TBL); 1609 return (0); 1610} 1611 1612static int 1613flush_table_entry(struct radix_node *rn, void *arg) 1614{ 1615 struct radix_node_head * const rnh = arg; 1616 struct table_entry *ent; 1617 1618 ent = (struct table_entry *) 1619 rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); 1620 if (ent != NULL) 1621 free(ent, M_IPFW_TBL); 1622 return (0); 1623} 1624 1625static int 1626flush_table(u_int16_t tbl) 1627{ 1628 struct radix_node_head *rnh; 1629 1630 if (tbl >= IPFW_TABLES_MAX) 1631 return (EINVAL); 1632 rnh = ipfw_tables[tbl].rnh; 1633 RADIX_NODE_HEAD_LOCK(rnh); 1634 rnh->rnh_walktree(rnh, flush_table_entry, rnh); 1635 ipfw_tables[tbl].modified = 1; 1636 RADIX_NODE_HEAD_UNLOCK(rnh); 1637 return (0); 1638} 1639 1640static void 1641flush_tables(void) 1642{ 1643 u_int16_t tbl; 1644 1645 for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++) 1646 flush_table(tbl); 1647} 1648 1649static int 1650lookup_table(u_int16_t tbl, in_addr_t addr, u_int32_t *val) 1651{ 1652 struct radix_node_head *rnh; 1653 struct ip_fw_table *table; 1654 struct table_entry *ent; 1655 struct sockaddr_in sa; 1656 int last_match; 1657 1658 if (tbl >= IPFW_TABLES_MAX) 1659 return (0); 1660 table = &ipfw_tables[tbl]; 1661 rnh = table->rnh; 1662 RADIX_NODE_HEAD_LOCK(rnh); 1663 if (addr == table->last_addr && !table->modified) { 1664 last_match = table->last_match; 1665 if (last_match) 1666 *val = table->last_value; 1667 RADIX_NODE_HEAD_UNLOCK(rnh); 1668 return (last_match); 1669 } 1670 table->modified = 0; 1671 sa.sin_len = 8; 1672 sa.sin_addr.s_addr = addr; 1673 ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh)); 1674 table->last_addr = addr; 1675 if (ent != NULL) { 1676 table->last_value = *val = ent->value; 1677 table->last_match = 1; 1678 RADIX_NODE_HEAD_UNLOCK(rnh); 1679 return (1); 1680 } 1681 table->last_match = 0; 1682 RADIX_NODE_HEAD_UNLOCK(rnh); 1683 return (0); 1684} 1685 1686static int 1687count_table_entry(struct radix_node *rn, void *arg) 1688{ 1689 u_int32_t * const cnt = arg; 1690 1691 (*cnt)++; 1692 return (0); 1693} 1694 1695static int 1696count_table(u_int32_t tbl, u_int32_t *cnt) 1697{ 1698 struct radix_node_head *rnh; 1699 1700 if (tbl >= IPFW_TABLES_MAX) 1701 return (EINVAL); 1702 rnh = ipfw_tables[tbl].rnh; 1703 *cnt = 0; 1704 RADIX_NODE_HEAD_LOCK(rnh); 1705 rnh->rnh_walktree(rnh, count_table_entry, cnt); 1706 RADIX_NODE_HEAD_UNLOCK(rnh); 1707 return (0); 1708} 1709 1710static int 1711dump_table_entry(struct radix_node *rn, void *arg) 1712{ 1713 struct table_entry * const n = (struct table_entry *)rn; 1714 ipfw_table * const tbl = arg; 1715 ipfw_table_entry *ent; 1716 1717 if (tbl->cnt == tbl->size) 1718 return (1); 1719 ent = &tbl->ent[tbl->cnt]; 1720 ent->tbl = tbl->tbl; 1721 if (in_nullhost(n->mask.sin_addr)) 1722 ent->masklen = 0; 1723 else 1724 ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr)); 1725 ent->addr = n->addr.sin_addr.s_addr; 1726 ent->value = n->value; 1727 tbl->cnt++; 1728 return (0); 1729} 1730 1731static int 1732dump_table(ipfw_table *tbl) 1733{ 1734 struct radix_node_head *rnh; 1735 1736 if (tbl->tbl >= IPFW_TABLES_MAX) 1737 return (EINVAL); 1738 rnh = ipfw_tables[tbl->tbl].rnh; 1739 tbl->cnt = 0; 1740 RADIX_NODE_HEAD_LOCK(rnh); 1741 rnh->rnh_walktree(rnh, dump_table_entry, tbl); 1742 RADIX_NODE_HEAD_UNLOCK(rnh); 1743 return (0); 1744} 1745 1746static void 1747fill_ugid_cache(struct inpcb *inp, struct ip_fw_ugid *ugp) 1748{ 1749 struct ucred *cr; 1750 1751 if (inp->inp_socket != NULL) { 1752 cr = inp->inp_socket->so_cred; 1753 ugp->fw_prid = jailed(cr) ? 1754 cr->cr_prison->pr_id : -1; 1755 ugp->fw_uid = cr->cr_uid; 1756 ugp->fw_ngroups = cr->cr_ngroups; 1757 bcopy(cr->cr_groups, ugp->fw_groups, 1758 sizeof(ugp->fw_groups)); 1759 } 1760} 1761 1762static int 1763check_uidgid(ipfw_insn_u32 *insn, 1764 int proto, struct ifnet *oif, 1765 struct in_addr dst_ip, u_int16_t dst_port, 1766 struct in_addr src_ip, u_int16_t src_port, 1767 struct ip_fw_ugid *ugp, int *lookup, struct inpcb *inp) 1768{ 1769 struct inpcbinfo *pi; 1770 int wildcard; 1771 struct inpcb *pcb; 1772 int match; 1773 gid_t *gp; 1774 1775 /* 1776 * Check to see if the UDP or TCP stack supplied us with 1777 * the PCB. If so, rather then holding a lock and looking 1778 * up the PCB, we can use the one that was supplied. 1779 */ 1780 if (inp && *lookup == 0) { 1781 INP_LOCK_ASSERT(inp); 1782 if (inp->inp_socket != NULL) { 1783 fill_ugid_cache(inp, ugp); 1784 *lookup = 1; 1785 } 1786 } 1787 /* 1788 * If we have already been here and the packet has no 1789 * PCB entry associated with it, then we can safely 1790 * assume that this is a no match. 1791 */ 1792 if (*lookup == -1) 1793 return (0); 1794 if (proto == IPPROTO_TCP) { 1795 wildcard = 0; 1796 pi = &tcbinfo; 1797 } else if (proto == IPPROTO_UDP) { 1798 wildcard = 1; 1799 pi = &udbinfo; 1800 } else 1801 return 0; 1802 match = 0; 1803 if (*lookup == 0) { 1804 INP_INFO_RLOCK(pi); 1805 pcb = (oif) ? 1806 in_pcblookup_hash(pi, 1807 dst_ip, htons(dst_port), 1808 src_ip, htons(src_port), 1809 wildcard, oif) : 1810 in_pcblookup_hash(pi, 1811 src_ip, htons(src_port), 1812 dst_ip, htons(dst_port), 1813 wildcard, NULL); 1814 if (pcb != NULL) { 1815 INP_LOCK(pcb); 1816 if (pcb->inp_socket != NULL) { 1817 fill_ugid_cache(pcb, ugp); 1818 *lookup = 1; 1819 } 1820 INP_UNLOCK(pcb); 1821 } 1822 INP_INFO_RUNLOCK(pi); 1823 if (*lookup == 0) { 1824 /* 1825 * If the lookup did not yield any results, there 1826 * is no sense in coming back and trying again. So 1827 * we can set lookup to -1 and ensure that we wont 1828 * bother the pcb system again. 1829 */ 1830 *lookup = -1; 1831 return (0); 1832 } 1833 } 1834 if (insn->o.opcode == O_UID) 1835 match = (ugp->fw_uid == (uid_t)insn->d[0]); 1836 else if (insn->o.opcode == O_GID) { 1837 for (gp = ugp->fw_groups; 1838 gp < &ugp->fw_groups[ugp->fw_ngroups]; gp++) 1839 if (*gp == (gid_t)insn->d[0]) { 1840 match = 1; 1841 break; 1842 } 1843 } else if (insn->o.opcode == O_JAIL) 1844 match = (ugp->fw_prid == (int)insn->d[0]); 1845 return match; 1846} 1847 1848/* 1849 * The main check routine for the firewall. 1850 * 1851 * All arguments are in args so we can modify them and return them 1852 * back to the caller. 1853 * 1854 * Parameters: 1855 * 1856 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 1857 * Starts with the IP header. 1858 * args->eh (in) Mac header if present, or NULL for layer3 packet. 1859 * args->oif Outgoing interface, or NULL if packet is incoming. 1860 * The incoming interface is in the mbuf. (in) 1861 * args->divert_rule (in/out) 1862 * Skip up to the first rule past this rule number; 1863 * upon return, non-zero port number for divert or tee. 1864 * 1865 * args->rule Pointer to the last matching rule (in/out) 1866 * args->next_hop Socket we are forwarding to (out). 1867 * args->f_id Addresses grabbed from the packet (out) 1868 * args->cookie a cookie depending on rule action 1869 * 1870 * Return value: 1871 * 1872 * IP_FW_PASS the packet must be accepted 1873 * IP_FW_DENY the packet must be dropped 1874 * IP_FW_DIVERT divert packet, port in m_tag 1875 * IP_FW_TEE tee packet, port in m_tag 1876 * IP_FW_DUMMYNET to dummynet, pipe in args->cookie 1877 * IP_FW_NETGRAPH into netgraph, cookie args->cookie 1878 * 1879 */ 1880 1881int 1882ipfw_chk(struct ip_fw_args *args) 1883{ 1884 /* 1885 * Local variables hold state during the processing of a packet. 1886 * 1887 * IMPORTANT NOTE: to speed up the processing of rules, there 1888 * are some assumption on the values of the variables, which 1889 * are documented here. Should you change them, please check 1890 * the implementation of the various instructions to make sure 1891 * that they still work. 1892 * 1893 * args->eh The MAC header. It is non-null for a layer2 1894 * packet, it is NULL for a layer-3 packet. 1895 * 1896 * m | args->m Pointer to the mbuf, as received from the caller. 1897 * It may change if ipfw_chk() does an m_pullup, or if it 1898 * consumes the packet because it calls send_reject(). 1899 * XXX This has to change, so that ipfw_chk() never modifies 1900 * or consumes the buffer. 1901 * ip is simply an alias of the value of m, and it is kept 1902 * in sync with it (the packet is supposed to start with 1903 * the ip header). 1904 */ 1905 struct mbuf *m = args->m; 1906 struct ip *ip = mtod(m, struct ip *); 1907 1908 /* 1909 * For rules which contain uid/gid or jail constraints, cache 1910 * a copy of the users credentials after the pcb lookup has been 1911 * executed. This will speed up the processing of rules with 1912 * these types of constraints, as well as decrease contention 1913 * on pcb related locks. 1914 */ 1915 struct ip_fw_ugid fw_ugid_cache; 1916 int ugid_lookup = 0; 1917 1918 /* 1919 * divinput_flags If non-zero, set to the IP_FW_DIVERT_*_FLAG 1920 * associated with a packet input on a divert socket. This 1921 * will allow to distinguish traffic and its direction when 1922 * it originates from a divert socket. 1923 */ 1924 u_int divinput_flags = 0; 1925 1926 /* 1927 * oif | args->oif If NULL, ipfw_chk has been called on the 1928 * inbound path (ether_input, bdg_forward, ip_input). 1929 * If non-NULL, ipfw_chk has been called on the outbound path 1930 * (ether_output, ip_output). 1931 */ 1932 struct ifnet *oif = args->oif; 1933 1934 struct ip_fw *f = NULL; /* matching rule */ 1935 int retval = 0; 1936 1937 /* 1938 * hlen The length of the IP header. 1939 */ 1940 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 1941 1942 /* 1943 * offset The offset of a fragment. offset != 0 means that 1944 * we have a fragment at this offset of an IPv4 packet. 1945 * offset == 0 means that (if this is an IPv4 packet) 1946 * this is the first or only fragment. 1947 */ 1948 u_short offset = 0; 1949 1950 /* 1951 * Local copies of addresses. They are only valid if we have 1952 * an IP packet. 1953 * 1954 * proto The protocol. Set to 0 for non-ip packets, 1955 * or to the protocol read from the packet otherwise. 1956 * proto != 0 means that we have an IPv4 packet. 1957 * 1958 * src_port, dst_port port numbers, in HOST format. Only 1959 * valid for TCP and UDP packets. 1960 * 1961 * src_ip, dst_ip ip addresses, in NETWORK format. 1962 * Only valid for IPv4 packets. 1963 */ 1964 u_int8_t proto; 1965 u_int16_t src_port = 0, dst_port = 0; /* NOTE: host format */ 1966 struct in_addr src_ip, dst_ip; /* NOTE: network format */ 1967 u_int16_t ip_len=0; 1968 int pktlen; 1969 1970 /* 1971 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 1972 * MATCH_NONE when checked and not matched (q = NULL), 1973 * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) 1974 */ 1975 int dyn_dir = MATCH_UNKNOWN; 1976 ipfw_dyn_rule *q = NULL; 1977 struct ip_fw_chain *chain = &layer3_chain; 1978 struct m_tag *mtag; 1979 1980 /* 1981 * We store in ulp a pointer to the upper layer protocol header. 1982 * In the ipv4 case this is easy to determine from the header, 1983 * but for ipv6 we might have some additional headers in the middle. 1984 * ulp is NULL if not found. 1985 */ 1986 void *ulp = NULL; /* upper layer protocol pointer. */ 1987 /* XXX ipv6 variables */ 1988 int is_ipv6 = 0; 1989 u_int16_t ext_hd = 0; /* bits vector for extension header filtering */ 1990 /* end of ipv6 variables */ 1991 int is_ipv4 = 0; 1992 1993 if (m->m_flags & M_SKIP_FIREWALL) 1994 return (IP_FW_PASS); /* accept */ 1995 1996 pktlen = m->m_pkthdr.len; 1997 proto = args->f_id.proto = 0; /* mark f_id invalid */ 1998 1999/* 2000 * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous, 2001 * then it sets p to point at the offset "len" in the mbuf. WARNING: the 2002 * pointer might become stale after other pullups (but we never use it 2003 * this way). 2004 */ 2005#define PULLUP_TO(len, p, T) \ 2006do { \ 2007 int x = (len) + sizeof(T); \ 2008 if ((m)->m_len < x) { \ 2009 args->m = m = m_pullup(m, x); \ 2010 if (m == NULL) \ 2011 goto pullup_failed; \ 2012 } \ 2013 p = (mtod(m, char *) + (len)); \ 2014} while (0) 2015 2016 /* Identify IP packets and fill up variables. */ 2017 if (pktlen >= sizeof(struct ip6_hdr) && 2018 (args->eh == NULL || ntohs(args->eh->ether_type)==ETHERTYPE_IPV6) && 2019 mtod(m, struct ip *)->ip_v == 6) { 2020 is_ipv6 = 1; 2021 args->f_id.addr_type = 6; 2022 hlen = sizeof(struct ip6_hdr); 2023 proto = mtod(m, struct ip6_hdr *)->ip6_nxt; 2024 2025 /* Search extension headers to find upper layer protocols */ 2026 while (ulp == NULL) { 2027 switch (proto) { 2028 case IPPROTO_ICMPV6: 2029 PULLUP_TO(hlen, ulp, struct icmp6_hdr); 2030 args->f_id.flags = ICMP6(ulp)->icmp6_type; 2031 break; 2032 2033 case IPPROTO_TCP: 2034 PULLUP_TO(hlen, ulp, struct tcphdr); 2035 dst_port = TCP(ulp)->th_dport; 2036 src_port = TCP(ulp)->th_sport; 2037 args->f_id.flags = TCP(ulp)->th_flags; 2038 break; 2039 2040 case IPPROTO_UDP: 2041 PULLUP_TO(hlen, ulp, struct udphdr); 2042 dst_port = UDP(ulp)->uh_dport; 2043 src_port = UDP(ulp)->uh_sport; 2044 break; 2045 2046 case IPPROTO_HOPOPTS: 2047 PULLUP_TO(hlen, ulp, struct ip6_hbh); 2048 ext_hd |= EXT_HOPOPTS; 2049 hlen += sizeof(struct ip6_hbh); 2050 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; 2051 ulp = NULL; 2052 break; 2053 2054 case IPPROTO_ROUTING: 2055 PULLUP_TO(hlen, ulp, struct ip6_rthdr); 2056 ext_hd |= EXT_ROUTING; 2057 hlen += sizeof(struct ip6_rthdr); 2058 proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt; 2059 ulp = NULL; 2060 break; 2061 2062 case IPPROTO_FRAGMENT: 2063 PULLUP_TO(hlen, ulp, struct ip6_frag); 2064 ext_hd |= EXT_FRAGMENT; 2065 hlen += sizeof (struct ip6_frag); 2066 proto = ((struct ip6_frag *)ulp)->ip6f_nxt; 2067 offset = 1; 2068 ulp = NULL; /* XXX is it correct ? */ 2069 break; 2070 2071 case IPPROTO_AH: 2072 case IPPROTO_NONE: 2073 case IPPROTO_ESP: 2074 PULLUP_TO(hlen, ulp, struct ip6_ext); 2075 if (proto == IPPROTO_AH) 2076 ext_hd |= EXT_AH; 2077 else if (proto == IPPROTO_ESP) 2078 ext_hd |= EXT_ESP; 2079 hlen += ((struct ip6_ext *)ulp)->ip6e_len + 2080 sizeof (struct ip6_ext); 2081 proto = ((struct ip6_ext *)ulp)->ip6e_nxt; 2082 ulp = NULL; 2083 break; 2084 2085 case IPPROTO_OSPFIGP: 2086 /* XXX OSPF header check? */ 2087 PULLUP_TO(hlen, ulp, struct ip6_ext); 2088 break; 2089 2090 default: 2091 printf( "IPFW2: IPV6 - Unknown Extension Header (%d)\n", 2092 proto); 2093 return 0; /* deny */ 2094 break; 2095 } /*switch */ 2096 } 2097 args->f_id.src_ip6 = mtod(m,struct ip6_hdr *)->ip6_src; 2098 args->f_id.dst_ip6 = mtod(m,struct ip6_hdr *)->ip6_dst; 2099 args->f_id.src_ip = 0; 2100 args->f_id.dst_ip = 0; 2101 args->f_id.flow_id6 = ntohs(mtod(m, struct ip6_hdr *)->ip6_flow); 2102 } else if (pktlen >= sizeof(struct ip) && 2103 (args->eh == NULL || ntohs(args->eh->ether_type) == ETHERTYPE_IP) && 2104 mtod(m, struct ip *)->ip_v == 4) { 2105 is_ipv4 = 1; 2106 ip = mtod(m, struct ip *); 2107 hlen = ip->ip_hl << 2; 2108 args->f_id.addr_type = 4; 2109 2110 /* 2111 * Collect parameters into local variables for faster matching. 2112 */ 2113 proto = ip->ip_p; 2114 src_ip = ip->ip_src; 2115 dst_ip = ip->ip_dst; 2116 if (args->eh != NULL) { /* layer 2 packets are as on the wire */ 2117 offset = ntohs(ip->ip_off) & IP_OFFMASK; 2118 ip_len = ntohs(ip->ip_len); 2119 } else { 2120 offset = ip->ip_off & IP_OFFMASK; 2121 ip_len = ip->ip_len; 2122 } 2123 pktlen = ip_len < pktlen ? ip_len : pktlen; 2124 2125 if (offset == 0) { 2126 switch (proto) { 2127 case IPPROTO_TCP: 2128 PULLUP_TO(hlen, ulp, struct tcphdr); 2129 dst_port = TCP(ulp)->th_dport; 2130 src_port = TCP(ulp)->th_sport; 2131 args->f_id.flags = TCP(ulp)->th_flags; 2132 break; 2133 2134 case IPPROTO_UDP: 2135 PULLUP_TO(hlen, ulp, struct udphdr); 2136 dst_port = UDP(ulp)->uh_dport; 2137 src_port = UDP(ulp)->uh_sport; 2138 break; 2139 2140 case IPPROTO_ICMP: 2141 PULLUP_TO(hlen, ulp, struct icmphdr); 2142 args->f_id.flags = ICMP(ulp)->icmp_type; 2143 break; 2144 2145 default: 2146 break; 2147 } 2148 } 2149 2150 args->f_id.src_ip = ntohl(src_ip.s_addr); 2151 args->f_id.dst_ip = ntohl(dst_ip.s_addr); 2152 } 2153#undef PULLUP_TO 2154 if (proto) { /* we may have port numbers, store them */ 2155 args->f_id.proto = proto; 2156 args->f_id.src_port = src_port = ntohs(src_port); 2157 args->f_id.dst_port = dst_port = ntohs(dst_port); 2158 } 2159 2160 IPFW_RLOCK(chain); 2161 mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); 2162 if (args->rule) { 2163 /* 2164 * Packet has already been tagged. Look for the next rule 2165 * to restart processing. 2166 * 2167 * If fw_one_pass != 0 then just accept it. 2168 * XXX should not happen here, but optimized out in 2169 * the caller. 2170 */ 2171 if (fw_one_pass) { 2172 IPFW_RUNLOCK(chain); 2173 return (IP_FW_PASS); 2174 } 2175 2176 f = args->rule->next_rule; 2177 if (f == NULL) 2178 f = lookup_next_rule(args->rule); 2179 } else { 2180 /* 2181 * Find the starting rule. It can be either the first 2182 * one, or the one after divert_rule if asked so. 2183 */ 2184 int skipto = mtag ? divert_cookie(mtag) : 0; 2185 2186 f = chain->rules; 2187 if (args->eh == NULL && skipto != 0) { 2188 if (skipto >= IPFW_DEFAULT_RULE) { 2189 IPFW_RUNLOCK(chain); 2190 return (IP_FW_DENY); /* invalid */ 2191 } 2192 while (f && f->rulenum <= skipto) 2193 f = f->next; 2194 if (f == NULL) { /* drop packet */ 2195 IPFW_RUNLOCK(chain); 2196 return (IP_FW_DENY); 2197 } 2198 } 2199 } 2200 /* reset divert rule to avoid confusion later */ 2201 if (mtag) { 2202 divinput_flags = divert_info(mtag) & 2203 (IP_FW_DIVERT_OUTPUT_FLAG | IP_FW_DIVERT_LOOPBACK_FLAG); 2204 m_tag_delete(m, mtag); 2205 } 2206 2207 /* 2208 * Now scan the rules, and parse microinstructions for each rule. 2209 */ 2210 for (; f; f = f->next) { 2211 int l, cmdlen; 2212 ipfw_insn *cmd; 2213 int skip_or; /* skip rest of OR block */ 2214 2215again: 2216 if (set_disable & (1 << f->set) ) 2217 continue; 2218 2219 skip_or = 0; 2220 for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; 2221 l -= cmdlen, cmd += cmdlen) { 2222 int match; 2223 2224 /* 2225 * check_body is a jump target used when we find a 2226 * CHECK_STATE, and need to jump to the body of 2227 * the target rule. 2228 */ 2229 2230check_body: 2231 cmdlen = F_LEN(cmd); 2232 /* 2233 * An OR block (insn_1 || .. || insn_n) has the 2234 * F_OR bit set in all but the last instruction. 2235 * The first match will set "skip_or", and cause 2236 * the following instructions to be skipped until 2237 * past the one with the F_OR bit clear. 2238 */ 2239 if (skip_or) { /* skip this instruction */ 2240 if ((cmd->len & F_OR) == 0) 2241 skip_or = 0; /* next one is good */ 2242 continue; 2243 } 2244 match = 0; /* set to 1 if we succeed */ 2245 2246 switch (cmd->opcode) { 2247 /* 2248 * The first set of opcodes compares the packet's 2249 * fields with some pattern, setting 'match' if a 2250 * match is found. At the end of the loop there is 2251 * logic to deal with F_NOT and F_OR flags associated 2252 * with the opcode. 2253 */ 2254 case O_NOP: 2255 match = 1; 2256 break; 2257 2258 case O_FORWARD_MAC: 2259 printf("ipfw: opcode %d unimplemented\n", 2260 cmd->opcode); 2261 break; 2262 2263 case O_GID: 2264 case O_UID: 2265 case O_JAIL: 2266 /* 2267 * We only check offset == 0 && proto != 0, 2268 * as this ensures that we have a 2269 * packet with the ports info. 2270 */ 2271 if (offset!=0) 2272 break; 2273 if (is_ipv6) /* XXX to be fixed later */ 2274 break; 2275 if (proto == IPPROTO_TCP || 2276 proto == IPPROTO_UDP) 2277 match = check_uidgid( 2278 (ipfw_insn_u32 *)cmd, 2279 proto, oif, 2280 dst_ip, dst_port, 2281 src_ip, src_port, &fw_ugid_cache, 2282 &ugid_lookup, args->inp); 2283 break; 2284 2285 case O_RECV: 2286 match = iface_match(m->m_pkthdr.rcvif, 2287 (ipfw_insn_if *)cmd); 2288 break; 2289 2290 case O_XMIT: 2291 match = iface_match(oif, (ipfw_insn_if *)cmd); 2292 break; 2293 2294 case O_VIA: 2295 match = iface_match(oif ? oif : 2296 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); 2297 break; 2298 2299 case O_MACADDR2: 2300 if (args->eh != NULL) { /* have MAC header */ 2301 u_int32_t *want = (u_int32_t *) 2302 ((ipfw_insn_mac *)cmd)->addr; 2303 u_int32_t *mask = (u_int32_t *) 2304 ((ipfw_insn_mac *)cmd)->mask; 2305 u_int32_t *hdr = (u_int32_t *)args->eh; 2306 2307 match = 2308 ( want[0] == (hdr[0] & mask[0]) && 2309 want[1] == (hdr[1] & mask[1]) && 2310 want[2] == (hdr[2] & mask[2]) ); 2311 } 2312 break; 2313 2314 case O_MAC_TYPE: 2315 if (args->eh != NULL) { 2316 u_int16_t t = 2317 ntohs(args->eh->ether_type); 2318 u_int16_t *p = 2319 ((ipfw_insn_u16 *)cmd)->ports; 2320 int i; 2321 2322 for (i = cmdlen - 1; !match && i>0; 2323 i--, p += 2) 2324 match = (t>=p[0] && t<=p[1]); 2325 } 2326 break; 2327 2328 case O_FRAG: 2329 match = (offset != 0); 2330 break; 2331 2332 case O_IN: /* "out" is "not in" */ 2333 match = (oif == NULL); 2334 break; 2335 2336 case O_LAYER2: 2337 match = (args->eh != NULL); 2338 break; 2339 2340 case O_DIVERTED: 2341 match = (cmd->arg1 & 1 && divinput_flags & 2342 IP_FW_DIVERT_LOOPBACK_FLAG) || 2343 (cmd->arg1 & 2 && divinput_flags & 2344 IP_FW_DIVERT_OUTPUT_FLAG); 2345 break; 2346 2347 case O_PROTO: 2348 /* 2349 * We do not allow an arg of 0 so the 2350 * check of "proto" only suffices. 2351 */ 2352 match = (proto == cmd->arg1); 2353 break; 2354 2355 case O_IP_SRC: 2356 match = is_ipv4 && 2357 (((ipfw_insn_ip *)cmd)->addr.s_addr == 2358 src_ip.s_addr); 2359 break; 2360 2361 case O_IP_SRC_LOOKUP: 2362 case O_IP_DST_LOOKUP: 2363 if (is_ipv4) { 2364 uint32_t a = 2365 (cmd->opcode == O_IP_DST_LOOKUP) ? 2366 dst_ip.s_addr : src_ip.s_addr; 2367 uint32_t v; 2368 2369 match = lookup_table(cmd->arg1, a, &v); 2370 if (!match) 2371 break; 2372 if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) 2373 match = 2374 ((ipfw_insn_u32 *)cmd)->d[0] == v; 2375 } 2376 break; 2377 2378 case O_IP_SRC_MASK: 2379 case O_IP_DST_MASK: 2380 if (is_ipv4) { 2381 uint32_t a = 2382 (cmd->opcode == O_IP_DST_MASK) ? 2383 dst_ip.s_addr : src_ip.s_addr; 2384 uint32_t *p = ((ipfw_insn_u32 *)cmd)->d; 2385 int i = cmdlen-1; 2386 2387 for (; !match && i>0; i-= 2, p+= 2) 2388 match = (p[0] == (a & p[1])); 2389 } 2390 break; 2391 2392 case O_IP_SRC_ME: 2393 if (is_ipv4) { 2394 struct ifnet *tif; 2395 2396 INADDR_TO_IFP(src_ip, tif); 2397 match = (tif != NULL); 2398 } 2399 break; 2400 2401 case O_IP_DST_SET: 2402 case O_IP_SRC_SET: 2403 if (is_ipv4) { 2404 u_int32_t *d = (u_int32_t *)(cmd+1); 2405 u_int32_t addr = 2406 cmd->opcode == O_IP_DST_SET ? 2407 args->f_id.dst_ip : 2408 args->f_id.src_ip; 2409 2410 if (addr < d[0]) 2411 break; 2412 addr -= d[0]; /* subtract base */ 2413 match = (addr < cmd->arg1) && 2414 ( d[ 1 + (addr>>5)] & 2415 (1<<(addr & 0x1f)) ); 2416 } 2417 break; 2418 2419 case O_IP_DST: 2420 match = is_ipv4 && 2421 (((ipfw_insn_ip *)cmd)->addr.s_addr == 2422 dst_ip.s_addr); 2423 break; 2424 2425 case O_IP_DST_ME: 2426 if (is_ipv4) { 2427 struct ifnet *tif; 2428 2429 INADDR_TO_IFP(dst_ip, tif); 2430 match = (tif != NULL); 2431 } 2432 break; 2433 2434 case O_IP_SRCPORT: 2435 case O_IP_DSTPORT: 2436 /* 2437 * offset == 0 && proto != 0 is enough 2438 * to guarantee that we have a 2439 * packet with port info. 2440 */ 2441 if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) 2442 && offset == 0) { 2443 u_int16_t x = 2444 (cmd->opcode == O_IP_SRCPORT) ? 2445 src_port : dst_port ; 2446 u_int16_t *p = 2447 ((ipfw_insn_u16 *)cmd)->ports; 2448 int i; 2449 2450 for (i = cmdlen - 1; !match && i>0; 2451 i--, p += 2) 2452 match = (x>=p[0] && x<=p[1]); 2453 } 2454 break; 2455 2456 case O_ICMPTYPE: 2457 match = (offset == 0 && proto==IPPROTO_ICMP && 2458 icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) ); 2459 break; 2460 2461#ifdef INET6 2462 case O_ICMP6TYPE: 2463 match = is_ipv6 && offset == 0 && 2464 proto==IPPROTO_ICMPV6 && 2465 icmp6type_match( 2466 ICMP6(ulp)->icmp6_type, 2467 (ipfw_insn_u32 *)cmd); 2468 break; 2469#endif /* INET6 */ 2470 2471 case O_IPOPT: 2472 match = (is_ipv4 && 2473 ipopts_match(mtod(m, struct ip *), cmd) ); 2474 break; 2475 2476 case O_IPVER: 2477 match = (is_ipv4 && 2478 cmd->arg1 == mtod(m, struct ip *)->ip_v); 2479 break; 2480 2481 case O_IPID: 2482 case O_IPLEN: 2483 case O_IPTTL: 2484 if (is_ipv4) { /* only for IP packets */ 2485 uint16_t x; 2486 uint16_t *p; 2487 int i; 2488 2489 if (cmd->opcode == O_IPLEN) 2490 x = ip_len; 2491 else if (cmd->opcode == O_IPTTL) 2492 x = mtod(m, struct ip *)->ip_ttl; 2493 else /* must be IPID */ 2494 x = ntohs(mtod(m, struct ip *)->ip_id); 2495 if (cmdlen == 1) { 2496 match = (cmd->arg1 == x); 2497 break; 2498 } 2499 /* otherwise we have ranges */ 2500 p = ((ipfw_insn_u16 *)cmd)->ports; 2501 i = cmdlen - 1; 2502 for (; !match && i>0; i--, p += 2) 2503 match = (x >= p[0] && x <= p[1]); 2504 } 2505 break; 2506 2507 case O_IPPRECEDENCE: 2508 match = (is_ipv4 && 2509 (cmd->arg1 == (mtod(m, struct ip *)->ip_tos & 0xe0)) ); 2510 break; 2511 2512 case O_IPTOS: 2513 match = (is_ipv4 && 2514 flags_match(cmd, mtod(m, struct ip *)->ip_tos)); 2515 break; 2516 2517 case O_TCPDATALEN: 2518 if (proto == IPPROTO_TCP && offset == 0) { 2519 struct tcphdr *tcp; 2520 uint16_t x; 2521 uint16_t *p; 2522 int i; 2523 2524 tcp = TCP(ulp); 2525 x = ip_len - 2526 ((ip->ip_hl + tcp->th_off) << 2); 2527 if (cmdlen == 1) { 2528 match = (cmd->arg1 == x); 2529 break; 2530 } 2531 /* otherwise we have ranges */ 2532 p = ((ipfw_insn_u16 *)cmd)->ports; 2533 i = cmdlen - 1; 2534 for (; !match && i>0; i--, p += 2) 2535 match = (x >= p[0] && x <= p[1]); 2536 } 2537 break; 2538 2539 case O_TCPFLAGS: 2540 match = (proto == IPPROTO_TCP && offset == 0 && 2541 flags_match(cmd, TCP(ulp)->th_flags)); 2542 break; 2543 2544 case O_TCPOPTS: 2545 match = (proto == IPPROTO_TCP && offset == 0 && 2546 tcpopts_match(TCP(ulp), cmd)); 2547 break; 2548 2549 case O_TCPSEQ: 2550 match = (proto == IPPROTO_TCP && offset == 0 && 2551 ((ipfw_insn_u32 *)cmd)->d[0] == 2552 TCP(ulp)->th_seq); 2553 break; 2554 2555 case O_TCPACK: 2556 match = (proto == IPPROTO_TCP && offset == 0 && 2557 ((ipfw_insn_u32 *)cmd)->d[0] == 2558 TCP(ulp)->th_ack); 2559 break; 2560 2561 case O_TCPWIN: 2562 match = (proto == IPPROTO_TCP && offset == 0 && 2563 cmd->arg1 == TCP(ulp)->th_win); 2564 break; 2565 2566 case O_ESTAB: 2567 /* reject packets which have SYN only */ 2568 /* XXX should i also check for TH_ACK ? */ 2569 match = (proto == IPPROTO_TCP && offset == 0 && 2570 (TCP(ulp)->th_flags & 2571 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 2572 break; 2573 2574 case O_ALTQ: { 2575 struct altq_tag *at; 2576 ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; 2577 2578 match = 1; 2579 mtag = m_tag_find(m, PACKET_TAG_PF_QID, NULL); 2580 if (mtag != NULL) 2581 break; 2582 mtag = m_tag_get(PACKET_TAG_PF_QID, 2583 sizeof(struct altq_tag), 2584 M_NOWAIT); 2585 if (mtag == NULL) { 2586 /* 2587 * Let the packet fall back to the 2588 * default ALTQ. 2589 */ 2590 break; 2591 } 2592 at = (struct altq_tag *)(mtag+1); 2593 at->qid = altq->qid; 2594 if (is_ipv4) 2595 at->af = AF_INET; 2596 else 2597 at->af = AF_LINK; 2598 at->hdr = ip; 2599 m_tag_prepend(m, mtag); 2600 break; 2601 } 2602 2603 case O_LOG: 2604 if (fw_verbose && !is_ipv6) 2605 ipfw_log(f, hlen, args->eh, m, oif); 2606 match = 1; 2607 break; 2608 2609 case O_PROB: 2610 match = (random()<((ipfw_insn_u32 *)cmd)->d[0]); 2611 break; 2612 2613 case O_VERREVPATH: 2614 /* Outgoing packets automatically pass/match */ 2615 match = ((oif != NULL) || 2616 (m->m_pkthdr.rcvif == NULL) || 2617 ( 2618#ifdef INET6 2619 is_ipv6 ? 2620 verify_path6(&(args->f_id.src_ip6), 2621 m->m_pkthdr.rcvif) : 2622#endif 2623 verify_path(src_ip, m->m_pkthdr.rcvif))); 2624 break; 2625 2626 case O_VERSRCREACH: 2627 /* Outgoing packets automatically pass/match */ 2628 match = (hlen > 0 && ((oif != NULL) || 2629#ifdef INET6 2630 is_ipv6 ? 2631 verify_path6(&(args->f_id.src_ip6), 2632 NULL) : 2633#endif 2634 verify_path(src_ip, NULL))); 2635 break; 2636 2637 case O_ANTISPOOF: 2638 /* Outgoing packets automatically pass/match */ 2639 if (oif == NULL && hlen > 0 && 2640 ( (is_ipv4 && in_localaddr(src_ip)) 2641#ifdef INET6 2642 || (is_ipv6 && 2643 in6_localaddr(&(args->f_id.src_ip6))) 2644#endif 2645 )) 2646 match = 2647#ifdef INET6 2648 is_ipv6 ? verify_path6( 2649 &(args->f_id.src_ip6), 2650 m->m_pkthdr.rcvif) : 2651#endif 2652 verify_path(src_ip, 2653 m->m_pkthdr.rcvif); 2654 else 2655 match = 1; 2656 break; 2657 2658 case O_IPSEC: 2659#ifdef FAST_IPSEC 2660 match = (m_tag_find(m, 2661 PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL); 2662#endif 2663#ifdef IPSEC 2664 match = (ipsec_getnhist(m) != 0); 2665#endif 2666 /* otherwise no match */ 2667 break; 2668 2669 case O_IP6_SRC: 2670 match = is_ipv6 && 2671 IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6, 2672 &((ipfw_insn_ip6 *)cmd)->addr6); 2673 break; 2674 2675 case O_IP6_DST: 2676 match = is_ipv6 && 2677 IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6, 2678 &((ipfw_insn_ip6 *)cmd)->addr6); 2679 break; 2680 case O_IP6_SRC_MASK: 2681 if (is_ipv6) { 2682 ipfw_insn_ip6 *te = (ipfw_insn_ip6 *)cmd; 2683 struct in6_addr p = args->f_id.src_ip6; 2684 2685 APPLY_MASK(&p, &te->mask6); 2686 match = IN6_ARE_ADDR_EQUAL(&te->addr6, &p); 2687 } 2688 break; 2689 2690 case O_IP6_DST_MASK: 2691 if (is_ipv6) { 2692 ipfw_insn_ip6 *te = (ipfw_insn_ip6 *)cmd; 2693 struct in6_addr p = args->f_id.dst_ip6; 2694 2695 APPLY_MASK(&p, &te->mask6); 2696 match = IN6_ARE_ADDR_EQUAL(&te->addr6, &p); 2697 } 2698 break; 2699 2700#ifdef INET6 2701 case O_IP6_SRC_ME: 2702 match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6); 2703 break; 2704 2705 case O_IP6_DST_ME: 2706 match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6); 2707 break; 2708 2709 case O_FLOW6ID: 2710 match = is_ipv6 && 2711 flow6id_match(args->f_id.flow_id6, 2712 (ipfw_insn_u32 *) cmd); 2713 break; 2714 2715 case O_EXT_HDR: 2716 match = is_ipv6 && 2717 (ext_hd & ((ipfw_insn *) cmd)->arg1); 2718 break; 2719 2720 case O_IP6: 2721 match = is_ipv6; 2722 break; 2723#endif 2724 2725 case O_IP4: 2726 match = is_ipv4; 2727 break; 2728 2729 /* 2730 * The second set of opcodes represents 'actions', 2731 * i.e. the terminal part of a rule once the packet 2732 * matches all previous patterns. 2733 * Typically there is only one action for each rule, 2734 * and the opcode is stored at the end of the rule 2735 * (but there are exceptions -- see below). 2736 * 2737 * In general, here we set retval and terminate the 2738 * outer loop (would be a 'break 3' in some language, 2739 * but we need to do a 'goto done'). 2740 * 2741 * Exceptions: 2742 * O_COUNT and O_SKIPTO actions: 2743 * instead of terminating, we jump to the next rule 2744 * ('goto next_rule', equivalent to a 'break 2'), 2745 * or to the SKIPTO target ('goto again' after 2746 * having set f, cmd and l), respectively. 2747 * 2748 * O_LOG and O_ALTQ action parameters: 2749 * perform some action and set match = 1; 2750 * 2751 * O_LIMIT and O_KEEP_STATE: these opcodes are 2752 * not real 'actions', and are stored right 2753 * before the 'action' part of the rule. 2754 * These opcodes try to install an entry in the 2755 * state tables; if successful, we continue with 2756 * the next opcode (match=1; break;), otherwise 2757 * the packet * must be dropped 2758 * ('goto done' after setting retval); 2759 * 2760 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 2761 * cause a lookup of the state table, and a jump 2762 * to the 'action' part of the parent rule 2763 * ('goto check_body') if an entry is found, or 2764 * (CHECK_STATE only) a jump to the next rule if 2765 * the entry is not found ('goto next_rule'). 2766 * The result of the lookup is cached to make 2767 * further instances of these opcodes are 2768 * effectively NOPs. 2769 */ 2770 case O_LIMIT: 2771 case O_KEEP_STATE: 2772 if (install_state(f, 2773 (ipfw_insn_limit *)cmd, args)) { 2774 retval = IP_FW_DENY; 2775 goto done; /* error/limit violation */ 2776 } 2777 match = 1; 2778 break; 2779 2780 case O_PROBE_STATE: 2781 case O_CHECK_STATE: 2782 /* 2783 * dynamic rules are checked at the first 2784 * keep-state or check-state occurrence, 2785 * with the result being stored in dyn_dir. 2786 * The compiler introduces a PROBE_STATE 2787 * instruction for us when we have a 2788 * KEEP_STATE (because PROBE_STATE needs 2789 * to be run first). 2790 */ 2791 if (dyn_dir == MATCH_UNKNOWN && 2792 (q = lookup_dyn_rule(&args->f_id, 2793 &dyn_dir, proto == IPPROTO_TCP ? 2794 TCP(ulp) : NULL)) 2795 != NULL) { 2796 /* 2797 * Found dynamic entry, update stats 2798 * and jump to the 'action' part of 2799 * the parent rule. 2800 */ 2801 q->pcnt++; 2802 q->bcnt += pktlen; 2803 f = q->rule; 2804 cmd = ACTION_PTR(f); 2805 l = f->cmd_len - f->act_ofs; 2806 IPFW_DYN_UNLOCK(); 2807 goto check_body; 2808 } 2809 /* 2810 * Dynamic entry not found. If CHECK_STATE, 2811 * skip to next rule, if PROBE_STATE just 2812 * ignore and continue with next opcode. 2813 */ 2814 if (cmd->opcode == O_CHECK_STATE) 2815 goto next_rule; 2816 match = 1; 2817 break; 2818 2819 case O_ACCEPT: 2820 retval = 0; /* accept */ 2821 goto done; 2822 2823 case O_PIPE: 2824 case O_QUEUE: 2825 args->rule = f; /* report matching rule */ 2826 args->cookie = cmd->arg1; 2827 retval = IP_FW_DUMMYNET; 2828 goto done; 2829 2830 case O_DIVERT: 2831 case O_TEE: { 2832 struct divert_tag *dt; 2833 2834 if (args->eh) /* not on layer 2 */ 2835 break; 2836 mtag = m_tag_get(PACKET_TAG_DIVERT, 2837 sizeof(struct divert_tag), 2838 M_NOWAIT); 2839 if (mtag == NULL) { 2840 /* XXX statistic */ 2841 /* drop packet */ 2842 IPFW_RUNLOCK(chain); 2843 return (IP_FW_DENY); 2844 } 2845 dt = (struct divert_tag *)(mtag+1); 2846 dt->cookie = f->rulenum; 2847 dt->info = cmd->arg1; 2848 m_tag_prepend(m, mtag); 2849 retval = (cmd->opcode == O_DIVERT) ? 2850 IP_FW_DIVERT : IP_FW_TEE; 2851 goto done; 2852 } 2853 2854 case O_COUNT: 2855 case O_SKIPTO: 2856 f->pcnt++; /* update stats */ 2857 f->bcnt += pktlen; 2858 f->timestamp = time_second; 2859 if (cmd->opcode == O_COUNT) 2860 goto next_rule; 2861 /* handle skipto */ 2862 if (f->next_rule == NULL) 2863 lookup_next_rule(f); 2864 f = f->next_rule; 2865 goto again; 2866 2867 case O_REJECT: 2868 /* 2869 * Drop the packet and send a reject notice 2870 * if the packet is not ICMP (or is an ICMP 2871 * query), and it is not multicast/broadcast. 2872 */ 2873 /* XXX: IPv6 missing!?! */ 2874 if (hlen > 0 && is_ipv4 && 2875 (proto != IPPROTO_ICMP || 2876 is_icmp_query(ICMP(ulp))) && 2877 !(m->m_flags & (M_BCAST|M_MCAST)) && 2878 !IN_MULTICAST(ntohl(dst_ip.s_addr))) { 2879 send_reject(args, cmd->arg1, 2880 offset,ip_len); 2881 m = args->m; 2882 } 2883 /* FALLTHROUGH */ 2884 case O_DENY: 2885 retval = IP_FW_DENY; 2886 goto done; 2887 2888 case O_FORWARD_IP: 2889 if (args->eh) /* not valid on layer2 pkts */ 2890 break; 2891 if (!q || dyn_dir == MATCH_FORWARD) 2892 args->next_hop = 2893 &((ipfw_insn_sa *)cmd)->sa; 2894 retval = IP_FW_PASS; 2895 goto done; 2896 2897 case O_NETGRAPH: 2898 case O_NGTEE: 2899 args->rule = f; /* report matching rule */ 2900 args->cookie = cmd->arg1; 2901 retval = (cmd->opcode == O_NETGRAPH) ? 2902 IP_FW_NETGRAPH : IP_FW_NGTEE; 2903 goto done; 2904 2905 default: 2906 panic("-- unknown opcode %d\n", cmd->opcode); 2907 } /* end of switch() on opcodes */ 2908 2909 if (cmd->len & F_NOT) 2910 match = !match; 2911 2912 if (match) { 2913 if (cmd->len & F_OR) 2914 skip_or = 1; 2915 } else { 2916 if (!(cmd->len & F_OR)) /* not an OR block, */ 2917 break; /* try next rule */ 2918 } 2919 2920 } /* end of inner for, scan opcodes */ 2921 2922next_rule:; /* try next rule */ 2923 2924 } /* end of outer for, scan rules */ 2925 printf("ipfw: ouch!, skip past end of rules, denying packet\n"); 2926 IPFW_RUNLOCK(chain); 2927 return (IP_FW_DENY); 2928 2929done: 2930 /* Update statistics */ 2931 f->pcnt++; 2932 f->bcnt += pktlen; 2933 f->timestamp = time_second; 2934 IPFW_RUNLOCK(chain); 2935 return (retval); 2936 2937pullup_failed: 2938 if (fw_verbose) 2939 printf("ipfw: pullup failed\n"); 2940 return (IP_FW_DENY); 2941} 2942 2943/* 2944 * When a rule is added/deleted, clear the next_rule pointers in all rules. 2945 * These will be reconstructed on the fly as packets are matched. 2946 */ 2947static void 2948flush_rule_ptrs(struct ip_fw_chain *chain) 2949{ 2950 struct ip_fw *rule; 2951 2952 IPFW_WLOCK_ASSERT(chain); 2953 2954 for (rule = chain->rules; rule; rule = rule->next) 2955 rule->next_rule = NULL; 2956} 2957 2958/* 2959 * When pipes/queues are deleted, clear the "pipe_ptr" pointer to a given 2960 * pipe/queue, or to all of them (match == NULL). 2961 */ 2962void 2963flush_pipe_ptrs(struct dn_flow_set *match) 2964{ 2965 struct ip_fw *rule; 2966 2967 IPFW_WLOCK(&layer3_chain); 2968 for (rule = layer3_chain.rules; rule; rule = rule->next) { 2969 ipfw_insn_pipe *cmd = (ipfw_insn_pipe *)ACTION_PTR(rule); 2970 2971 if (cmd->o.opcode != O_PIPE && cmd->o.opcode != O_QUEUE) 2972 continue; 2973 /* 2974 * XXX Use bcmp/bzero to handle pipe_ptr to overcome 2975 * possible alignment problems on 64-bit architectures. 2976 * This code is seldom used so we do not worry too 2977 * much about efficiency. 2978 */ 2979 if (match == NULL || 2980 !bcmp(&cmd->pipe_ptr, &match, sizeof(match)) ) 2981 bzero(&cmd->pipe_ptr, sizeof(cmd->pipe_ptr)); 2982 } 2983 IPFW_WUNLOCK(&layer3_chain); 2984} 2985 2986/* 2987 * Add a new rule to the list. Copy the rule into a malloc'ed area, then 2988 * possibly create a rule number and add the rule to the list. 2989 * Update the rule_number in the input struct so the caller knows it as well. 2990 */ 2991static int 2992add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) 2993{ 2994 struct ip_fw *rule, *f, *prev; 2995 int l = RULESIZE(input_rule); 2996 2997 if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE) 2998 return (EINVAL); 2999 3000 rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO); 3001 if (rule == NULL) 3002 return (ENOSPC); 3003 3004 bcopy(input_rule, rule, l); 3005 3006 rule->next = NULL; 3007 rule->next_rule = NULL; 3008 3009 rule->pcnt = 0; 3010 rule->bcnt = 0; 3011 rule->timestamp = 0; 3012 3013 IPFW_WLOCK(chain); 3014 3015 if (chain->rules == NULL) { /* default rule */ 3016 chain->rules = rule; 3017 goto done; 3018 } 3019 3020 /* 3021 * If rulenum is 0, find highest numbered rule before the 3022 * default rule, and add autoinc_step 3023 */ 3024 if (autoinc_step < 1) 3025 autoinc_step = 1; 3026 else if (autoinc_step > 1000) 3027 autoinc_step = 1000; 3028 if (rule->rulenum == 0) { 3029 /* 3030 * locate the highest numbered rule before default 3031 */ 3032 for (f = chain->rules; f; f = f->next) { 3033 if (f->rulenum == IPFW_DEFAULT_RULE) 3034 break; 3035 rule->rulenum = f->rulenum; 3036 } 3037 if (rule->rulenum < IPFW_DEFAULT_RULE - autoinc_step) 3038 rule->rulenum += autoinc_step; 3039 input_rule->rulenum = rule->rulenum; 3040 } 3041 3042 /* 3043 * Now insert the new rule in the right place in the sorted list. 3044 */ 3045 for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) { 3046 if (f->rulenum > rule->rulenum) { /* found the location */ 3047 if (prev) { 3048 rule->next = f; 3049 prev->next = rule; 3050 } else { /* head insert */ 3051 rule->next = chain->rules; 3052 chain->rules = rule; 3053 } 3054 break; 3055 } 3056 } 3057 flush_rule_ptrs(chain); 3058done: 3059 static_count++; 3060 static_len += l; 3061 IPFW_WUNLOCK(chain); 3062 DEB(printf("ipfw: installed rule %d, static count now %d\n", 3063 rule->rulenum, static_count);) 3064 return (0); 3065} 3066 3067/** 3068 * Remove a static rule (including derived * dynamic rules) 3069 * and place it on the ``reap list'' for later reclamation. 3070 * The caller is in charge of clearing rule pointers to avoid 3071 * dangling pointers. 3072 * @return a pointer to the next entry. 3073 * Arguments are not checked, so they better be correct. 3074 */ 3075static struct ip_fw * 3076remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, struct ip_fw *prev) 3077{ 3078 struct ip_fw *n; 3079 int l = RULESIZE(rule); 3080 3081 IPFW_WLOCK_ASSERT(chain); 3082 3083 n = rule->next; 3084 IPFW_DYN_LOCK(); 3085 remove_dyn_rule(rule, NULL /* force removal */); 3086 IPFW_DYN_UNLOCK(); 3087 if (prev == NULL) 3088 chain->rules = n; 3089 else 3090 prev->next = n; 3091 static_count--; 3092 static_len -= l; 3093 3094 rule->next = chain->reap; 3095 chain->reap = rule; 3096 3097 return n; 3098} 3099 3100/** 3101 * Reclaim storage associated with a list of rules. This is 3102 * typically the list created using remove_rule. 3103 */ 3104static void 3105reap_rules(struct ip_fw *head) 3106{ 3107 struct ip_fw *rule; 3108 3109 while ((rule = head) != NULL) { 3110 head = head->next; 3111 if (DUMMYNET_LOADED) 3112 ip_dn_ruledel_ptr(rule); 3113 free(rule, M_IPFW); 3114 } 3115} 3116 3117/* 3118 * Remove all rules from a chain (except rules in set RESVD_SET 3119 * unless kill_default = 1). The caller is responsible for 3120 * reclaiming storage for the rules left in chain->reap. 3121 */ 3122static void 3123free_chain(struct ip_fw_chain *chain, int kill_default) 3124{ 3125 struct ip_fw *prev, *rule; 3126 3127 IPFW_WLOCK_ASSERT(chain); 3128 3129 flush_rule_ptrs(chain); /* more efficient to do outside the loop */ 3130 for (prev = NULL, rule = chain->rules; rule ; ) 3131 if (kill_default || rule->set != RESVD_SET) 3132 rule = remove_rule(chain, rule, prev); 3133 else { 3134 prev = rule; 3135 rule = rule->next; 3136 } 3137} 3138 3139/** 3140 * Remove all rules with given number, and also do set manipulation. 3141 * Assumes chain != NULL && *chain != NULL. 3142 * 3143 * The argument is an u_int32_t. The low 16 bit are the rule or set number, 3144 * the next 8 bits are the new set, the top 8 bits are the command: 3145 * 3146 * 0 delete rules with given number 3147 * 1 delete rules with given set number 3148 * 2 move rules with given number to new set 3149 * 3 move rules with given set number to new set 3150 * 4 swap sets with given numbers 3151 */ 3152static int 3153del_entry(struct ip_fw_chain *chain, u_int32_t arg) 3154{ 3155 struct ip_fw *prev = NULL, *rule; 3156 u_int16_t rulenum; /* rule or old_set */ 3157 u_int8_t cmd, new_set; 3158 3159 rulenum = arg & 0xffff; 3160 cmd = (arg >> 24) & 0xff; 3161 new_set = (arg >> 16) & 0xff; 3162 3163 if (cmd > 4) 3164 return EINVAL; 3165 if (new_set > RESVD_SET) 3166 return EINVAL; 3167 if (cmd == 0 || cmd == 2) { 3168 if (rulenum >= IPFW_DEFAULT_RULE) 3169 return EINVAL; 3170 } else { 3171 if (rulenum > RESVD_SET) /* old_set */ 3172 return EINVAL; 3173 } 3174 3175 IPFW_WLOCK(chain); 3176 rule = chain->rules; 3177 chain->reap = NULL; 3178 switch (cmd) { 3179 case 0: /* delete rules with given number */ 3180 /* 3181 * locate first rule to delete 3182 */ 3183 for (; rule->rulenum < rulenum; prev = rule, rule = rule->next) 3184 ; 3185 if (rule->rulenum != rulenum) { 3186 IPFW_WUNLOCK(chain); 3187 return EINVAL; 3188 } 3189 3190 /* 3191 * flush pointers outside the loop, then delete all matching 3192 * rules. prev remains the same throughout the cycle. 3193 */ 3194 flush_rule_ptrs(chain); 3195 while (rule->rulenum == rulenum) 3196 rule = remove_rule(chain, rule, prev); 3197 break; 3198 3199 case 1: /* delete all rules with given set number */ 3200 flush_rule_ptrs(chain); 3201 rule = chain->rules; 3202 while (rule->rulenum < IPFW_DEFAULT_RULE) 3203 if (rule->set == rulenum) 3204 rule = remove_rule(chain, rule, prev); 3205 else { 3206 prev = rule; 3207 rule = rule->next; 3208 } 3209 break; 3210 3211 case 2: /* move rules with given number to new set */ 3212 rule = chain->rules; 3213 for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) 3214 if (rule->rulenum == rulenum) 3215 rule->set = new_set; 3216 break; 3217 3218 case 3: /* move rules with given set number to new set */ 3219 for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) 3220 if (rule->set == rulenum) 3221 rule->set = new_set; 3222 break; 3223 3224 case 4: /* swap two sets */ 3225 for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) 3226 if (rule->set == rulenum) 3227 rule->set = new_set; 3228 else if (rule->set == new_set) 3229 rule->set = rulenum; 3230 break; 3231 } 3232 /* 3233 * Look for rules to reclaim. We grab the list before 3234 * releasing the lock then reclaim them w/o the lock to 3235 * avoid a LOR with dummynet. 3236 */ 3237 rule = chain->reap; 3238 chain->reap = NULL; 3239 IPFW_WUNLOCK(chain); 3240 if (rule) 3241 reap_rules(rule); 3242 return 0; 3243} 3244 3245/* 3246 * Clear counters for a specific rule. 3247 * The enclosing "table" is assumed locked. 3248 */ 3249static void 3250clear_counters(struct ip_fw *rule, int log_only) 3251{ 3252 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); 3253 3254 if (log_only == 0) { 3255 rule->bcnt = rule->pcnt = 0; 3256 rule->timestamp = 0; 3257 } 3258 if (l->o.opcode == O_LOG) 3259 l->log_left = l->max_log; 3260} 3261 3262/** 3263 * Reset some or all counters on firewall rules. 3264 * @arg frwl is null to clear all entries, or contains a specific 3265 * rule number. 3266 * @arg log_only is 1 if we only want to reset logs, zero otherwise. 3267 */ 3268static int 3269zero_entry(struct ip_fw_chain *chain, int rulenum, int log_only) 3270{ 3271 struct ip_fw *rule; 3272 char *msg; 3273 3274 IPFW_WLOCK(chain); 3275 if (rulenum == 0) { 3276 norule_counter = 0; 3277 for (rule = chain->rules; rule; rule = rule->next) 3278 clear_counters(rule, log_only); 3279 msg = log_only ? "ipfw: All logging counts reset.\n" : 3280 "ipfw: Accounting cleared.\n"; 3281 } else { 3282 int cleared = 0; 3283 /* 3284 * We can have multiple rules with the same number, so we 3285 * need to clear them all. 3286 */ 3287 for (rule = chain->rules; rule; rule = rule->next) 3288 if (rule->rulenum == rulenum) { 3289 while (rule && rule->rulenum == rulenum) { 3290 clear_counters(rule, log_only); 3291 rule = rule->next; 3292 } 3293 cleared = 1; 3294 break; 3295 } 3296 if (!cleared) { /* we did not find any matching rules */ 3297 IPFW_WUNLOCK(chain); 3298 return (EINVAL); 3299 } 3300 msg = log_only ? "ipfw: Entry %d logging count reset.\n" : 3301 "ipfw: Entry %d cleared.\n"; 3302 } 3303 IPFW_WUNLOCK(chain); 3304 3305 if (fw_verbose) 3306 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum); 3307 return (0); 3308} 3309 3310/* 3311 * Check validity of the structure before insert. 3312 * Fortunately rules are simple, so this mostly need to check rule sizes. 3313 */ 3314static int 3315check_ipfw_struct(struct ip_fw *rule, int size) 3316{ 3317 int l, cmdlen = 0; 3318 int have_action=0; 3319 ipfw_insn *cmd; 3320 3321 if (size < sizeof(*rule)) { 3322 printf("ipfw: rule too short\n"); 3323 return (EINVAL); 3324 } 3325 /* first, check for valid size */ 3326 l = RULESIZE(rule); 3327 if (l != size) { 3328 printf("ipfw: size mismatch (have %d want %d)\n", size, l); 3329 return (EINVAL); 3330 } 3331 if (rule->act_ofs >= rule->cmd_len) { 3332 printf("ipfw: bogus action offset (%u > %u)\n", 3333 rule->act_ofs, rule->cmd_len - 1); 3334 return (EINVAL); 3335 } 3336 /* 3337 * Now go for the individual checks. Very simple ones, basically only 3338 * instruction sizes. 3339 */ 3340 for (l = rule->cmd_len, cmd = rule->cmd ; 3341 l > 0 ; l -= cmdlen, cmd += cmdlen) { 3342 cmdlen = F_LEN(cmd); 3343 if (cmdlen > l) { 3344 printf("ipfw: opcode %d size truncated\n", 3345 cmd->opcode); 3346 return EINVAL; 3347 } 3348 DEB(printf("ipfw: opcode %d\n", cmd->opcode);) 3349 switch (cmd->opcode) { 3350 case O_PROBE_STATE: 3351 case O_KEEP_STATE: 3352 case O_PROTO: 3353 case O_IP_SRC_ME: 3354 case O_IP_DST_ME: 3355 case O_LAYER2: 3356 case O_IN: 3357 case O_FRAG: 3358 case O_DIVERTED: 3359 case O_IPOPT: 3360 case O_IPTOS: 3361 case O_IPPRECEDENCE: 3362 case O_IPVER: 3363 case O_TCPWIN: 3364 case O_TCPFLAGS: 3365 case O_TCPOPTS: 3366 case O_ESTAB: 3367 case O_VERREVPATH: 3368 case O_VERSRCREACH: 3369 case O_ANTISPOOF: 3370 case O_IPSEC: 3371 case O_IP6_SRC_ME: 3372 case O_IP6_DST_ME: 3373 case O_EXT_HDR: 3374 case O_IP6: 3375 case O_IP4: 3376 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 3377 goto bad_size; 3378 break; 3379 3380 case O_UID: 3381 case O_GID: 3382 case O_JAIL: 3383 case O_IP_SRC: 3384 case O_IP_DST: 3385 case O_TCPSEQ: 3386 case O_TCPACK: 3387 case O_PROB: 3388 case O_ICMPTYPE: 3389 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) 3390 goto bad_size; 3391 break; 3392 3393 case O_LIMIT: 3394 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) 3395 goto bad_size; 3396 break; 3397 3398 case O_LOG: 3399 if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) 3400 goto bad_size; 3401 3402 ((ipfw_insn_log *)cmd)->log_left = 3403 ((ipfw_insn_log *)cmd)->max_log; 3404 3405 break; 3406 3407 case O_IP_SRC_MASK: 3408 case O_IP_DST_MASK: 3409 /* only odd command lengths */ 3410 if ( !(cmdlen & 1) || cmdlen > 31) 3411 goto bad_size; 3412 break; 3413 3414 case O_IP_SRC_SET: 3415 case O_IP_DST_SET: 3416 if (cmd->arg1 == 0 || cmd->arg1 > 256) { 3417 printf("ipfw: invalid set size %d\n", 3418 cmd->arg1); 3419 return EINVAL; 3420 } 3421 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 3422 (cmd->arg1+31)/32 ) 3423 goto bad_size; 3424 break; 3425 3426 case O_IP_SRC_LOOKUP: 3427 case O_IP_DST_LOOKUP: 3428 if (cmd->arg1 >= IPFW_TABLES_MAX) { 3429 printf("ipfw: invalid table number %d\n", 3430 cmd->arg1); 3431 return (EINVAL); 3432 } 3433 if (cmdlen != F_INSN_SIZE(ipfw_insn) && 3434 cmdlen != F_INSN_SIZE(ipfw_insn_u32)) 3435 goto bad_size; 3436 break; 3437 3438 case O_MACADDR2: 3439 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) 3440 goto bad_size; 3441 break; 3442 3443 case O_NOP: 3444 case O_IPID: 3445 case O_IPTTL: 3446 case O_IPLEN: 3447 case O_TCPDATALEN: 3448 if (cmdlen < 1 || cmdlen > 31) 3449 goto bad_size; 3450 break; 3451 3452 case O_MAC_TYPE: 3453 case O_IP_SRCPORT: 3454 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ 3455 if (cmdlen < 2 || cmdlen > 31) 3456 goto bad_size; 3457 break; 3458 3459 case O_RECV: 3460 case O_XMIT: 3461 case O_VIA: 3462 if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) 3463 goto bad_size; 3464 break; 3465 3466 case O_ALTQ: 3467 if (cmdlen != F_INSN_SIZE(ipfw_insn_altq)) 3468 goto bad_size; 3469 break; 3470 3471 case O_PIPE: 3472 case O_QUEUE: 3473 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) 3474 goto bad_size; 3475 goto check_action; 3476 3477 case O_FORWARD_IP: 3478#ifdef IPFIREWALL_FORWARD 3479 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) 3480 goto bad_size; 3481 goto check_action; 3482#else 3483 return EINVAL; 3484#endif 3485 3486 case O_DIVERT: 3487 case O_TEE: 3488 if (ip_divert_ptr == NULL) 3489 return EINVAL; 3490 else 3491 goto check_size; 3492 case O_NETGRAPH: 3493 case O_NGTEE: 3494 if (!NG_IPFW_LOADED) 3495 return EINVAL; 3496 else 3497 goto check_size; 3498 case O_FORWARD_MAC: /* XXX not implemented yet */ 3499 case O_CHECK_STATE: 3500 case O_COUNT: 3501 case O_ACCEPT: 3502 case O_DENY: 3503 case O_REJECT: 3504 case O_SKIPTO: 3505check_size: 3506 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 3507 goto bad_size; 3508check_action: 3509 if (have_action) { 3510 printf("ipfw: opcode %d, multiple actions" 3511 " not allowed\n", 3512 cmd->opcode); 3513 return EINVAL; 3514 } 3515 have_action = 1; 3516 if (l != cmdlen) { 3517 printf("ipfw: opcode %d, action must be" 3518 " last opcode\n", 3519 cmd->opcode); 3520 return EINVAL; 3521 } 3522 break; 3523 case O_IP6_SRC: 3524 case O_IP6_DST: 3525 if (cmdlen != F_INSN_SIZE(struct in6_addr) + 3526 F_INSN_SIZE(ipfw_insn)) 3527 goto bad_size; 3528 break; 3529 3530 case O_FLOW6ID: 3531 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 3532 ((ipfw_insn_u32 *)cmd)->o.arg1) 3533 goto bad_size; 3534 break; 3535 3536 case O_IP6_SRC_MASK: 3537 case O_IP6_DST_MASK: 3538 if ( !(cmdlen & 1) || cmdlen > 127) 3539 goto bad_size; 3540 break; 3541 case O_ICMP6TYPE: 3542 if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) ) 3543 goto bad_size; 3544 break; 3545 3546 default: 3547 printf("ipfw: opcode %d, unknown opcode\n", 3548 cmd->opcode); 3549 return EINVAL; 3550 } 3551 } 3552 if (have_action == 0) { 3553 printf("ipfw: missing action\n"); 3554 return EINVAL; 3555 } 3556 return 0; 3557 3558bad_size: 3559 printf("ipfw: opcode %d size %d wrong\n", 3560 cmd->opcode, cmdlen); 3561 return EINVAL; 3562} 3563 3564/* 3565 * Copy the static and dynamic rules to the supplied buffer 3566 * and return the amount of space actually used. 3567 */ 3568static size_t 3569ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) 3570{ 3571 char *bp = buf; 3572 char *ep = bp + space; 3573 struct ip_fw *rule; 3574 int i; 3575 3576 /* XXX this can take a long time and locking will block packet flow */ 3577 IPFW_RLOCK(chain); 3578 for (rule = chain->rules; rule ; rule = rule->next) { 3579 /* 3580 * Verify the entry fits in the buffer in case the 3581 * rules changed between calculating buffer space and 3582 * now. This would be better done using a generation 3583 * number but should suffice for now. 3584 */ 3585 i = RULESIZE(rule); 3586 if (bp + i <= ep) { 3587 bcopy(rule, bp, i); 3588 bcopy(&set_disable, &(((struct ip_fw *)bp)->next_rule), 3589 sizeof(set_disable)); 3590 bp += i; 3591 } 3592 } 3593 IPFW_RUNLOCK(chain); 3594 if (ipfw_dyn_v) { 3595 ipfw_dyn_rule *p, *last = NULL; 3596 3597 IPFW_DYN_LOCK(); 3598 for (i = 0 ; i < curr_dyn_buckets; i++) 3599 for (p = ipfw_dyn_v[i] ; p != NULL; p = p->next) { 3600 if (bp + sizeof *p <= ep) { 3601 ipfw_dyn_rule *dst = 3602 (ipfw_dyn_rule *)bp; 3603 bcopy(p, dst, sizeof *p); 3604 bcopy(&(p->rule->rulenum), &(dst->rule), 3605 sizeof(p->rule->rulenum)); 3606 /* 3607 * store a non-null value in "next". 3608 * The userland code will interpret a 3609 * NULL here as a marker 3610 * for the last dynamic rule. 3611 */ 3612 bcopy(&dst, &dst->next, sizeof(dst)); 3613 last = dst; 3614 dst->expire = 3615 TIME_LEQ(dst->expire, time_second) ? 3616 0 : dst->expire - time_second ; 3617 bp += sizeof(ipfw_dyn_rule); 3618 } 3619 } 3620 IPFW_DYN_UNLOCK(); 3621 if (last != NULL) /* mark last dynamic rule */ 3622 bzero(&last->next, sizeof(last)); 3623 } 3624 return (bp - (char *)buf); 3625} 3626 3627 3628/** 3629 * {set|get}sockopt parser. 3630 */ 3631static int 3632ipfw_ctl(struct sockopt *sopt) 3633{ 3634#define RULE_MAXSIZE (256*sizeof(u_int32_t)) 3635 int error, rule_num; 3636 size_t size; 3637 struct ip_fw *buf, *rule; 3638 u_int32_t rulenum[2]; 3639 3640 error = suser(sopt->sopt_td); 3641 if (error) 3642 return (error); 3643 3644 /* 3645 * Disallow modifications in really-really secure mode, but still allow 3646 * the logging counters to be reset. 3647 */ 3648 if (sopt->sopt_name == IP_FW_ADD || 3649 (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) { 3650#if __FreeBSD_version >= 500034 3651 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); 3652 if (error) 3653 return (error); 3654#else /* FreeBSD 4.x */ 3655 if (securelevel >= 3) 3656 return (EPERM); 3657#endif 3658 } 3659 3660 error = 0; 3661 3662 switch (sopt->sopt_name) { 3663 case IP_FW_GET: 3664 /* 3665 * pass up a copy of the current rules. Static rules 3666 * come first (the last of which has number IPFW_DEFAULT_RULE), 3667 * followed by a possibly empty list of dynamic rule. 3668 * The last dynamic rule has NULL in the "next" field. 3669 * 3670 * Note that the calculated size is used to bound the 3671 * amount of data returned to the user. The rule set may 3672 * change between calculating the size and returning the 3673 * data in which case we'll just return what fits. 3674 */ 3675 size = static_len; /* size of static rules */ 3676 if (ipfw_dyn_v) /* add size of dyn.rules */ 3677 size += (dyn_count * sizeof(ipfw_dyn_rule)); 3678 3679 /* 3680 * XXX todo: if the user passes a short length just to know 3681 * how much room is needed, do not bother filling up the 3682 * buffer, just jump to the sooptcopyout. 3683 */ 3684 buf = malloc(size, M_TEMP, M_WAITOK); 3685 error = sooptcopyout(sopt, buf, 3686 ipfw_getrules(&layer3_chain, buf, size)); 3687 free(buf, M_TEMP); 3688 break; 3689 3690 case IP_FW_FLUSH: 3691 /* 3692 * Normally we cannot release the lock on each iteration. 3693 * We could do it here only because we start from the head all 3694 * the times so there is no risk of missing some entries. 3695 * On the other hand, the risk is that we end up with 3696 * a very inconsistent ruleset, so better keep the lock 3697 * around the whole cycle. 3698 * 3699 * XXX this code can be improved by resetting the head of 3700 * the list to point to the default rule, and then freeing 3701 * the old list without the need for a lock. 3702 */ 3703 3704 IPFW_WLOCK(&layer3_chain); 3705 layer3_chain.reap = NULL; 3706 free_chain(&layer3_chain, 0 /* keep default rule */); 3707 rule = layer3_chain.reap, layer3_chain.reap = NULL; 3708 IPFW_WUNLOCK(&layer3_chain); 3709 if (layer3_chain.reap != NULL) 3710 reap_rules(rule); 3711 break; 3712 3713 case IP_FW_ADD: 3714 rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK); 3715 error = sooptcopyin(sopt, rule, RULE_MAXSIZE, 3716 sizeof(struct ip_fw) ); 3717 if (error == 0) 3718 error = check_ipfw_struct(rule, sopt->sopt_valsize); 3719 if (error == 0) { 3720 error = add_rule(&layer3_chain, rule); 3721 size = RULESIZE(rule); 3722 if (!error && sopt->sopt_dir == SOPT_GET) 3723 error = sooptcopyout(sopt, rule, size); 3724 } 3725 free(rule, M_TEMP); 3726 break; 3727 3728 case IP_FW_DEL: 3729 /* 3730 * IP_FW_DEL is used for deleting single rules or sets, 3731 * and (ab)used to atomically manipulate sets. Argument size 3732 * is used to distinguish between the two: 3733 * sizeof(u_int32_t) 3734 * delete single rule or set of rules, 3735 * or reassign rules (or sets) to a different set. 3736 * 2*sizeof(u_int32_t) 3737 * atomic disable/enable sets. 3738 * first u_int32_t contains sets to be disabled, 3739 * second u_int32_t contains sets to be enabled. 3740 */ 3741 error = sooptcopyin(sopt, rulenum, 3742 2*sizeof(u_int32_t), sizeof(u_int32_t)); 3743 if (error) 3744 break; 3745 size = sopt->sopt_valsize; 3746 if (size == sizeof(u_int32_t)) /* delete or reassign */ 3747 error = del_entry(&layer3_chain, rulenum[0]); 3748 else if (size == 2*sizeof(u_int32_t)) /* set enable/disable */ 3749 set_disable = 3750 (set_disable | rulenum[0]) & ~rulenum[1] & 3751 ~(1<<RESVD_SET); /* set RESVD_SET always enabled */ 3752 else 3753 error = EINVAL; 3754 break; 3755 3756 case IP_FW_ZERO: 3757 case IP_FW_RESETLOG: /* argument is an int, the rule number */ 3758 rule_num = 0; 3759 if (sopt->sopt_val != 0) { 3760 error = sooptcopyin(sopt, &rule_num, 3761 sizeof(int), sizeof(int)); 3762 if (error) 3763 break; 3764 } 3765 error = zero_entry(&layer3_chain, rule_num, 3766 sopt->sopt_name == IP_FW_RESETLOG); 3767 break; 3768 3769 case IP_FW_TABLE_ADD: 3770 { 3771 ipfw_table_entry ent; 3772 3773 error = sooptcopyin(sopt, &ent, 3774 sizeof(ent), sizeof(ent)); 3775 if (error) 3776 break; 3777 error = add_table_entry(ent.tbl, ent.addr, 3778 ent.masklen, ent.value); 3779 } 3780 break; 3781 3782 case IP_FW_TABLE_DEL: 3783 { 3784 ipfw_table_entry ent; 3785 3786 error = sooptcopyin(sopt, &ent, 3787 sizeof(ent), sizeof(ent)); 3788 if (error) 3789 break; 3790 error = del_table_entry(ent.tbl, ent.addr, ent.masklen); 3791 } 3792 break; 3793 3794 case IP_FW_TABLE_FLUSH: 3795 { 3796 u_int16_t tbl; 3797 3798 error = sooptcopyin(sopt, &tbl, 3799 sizeof(tbl), sizeof(tbl)); 3800 if (error) 3801 break; 3802 error = flush_table(tbl); 3803 } 3804 break; 3805 3806 case IP_FW_TABLE_GETSIZE: 3807 { 3808 u_int32_t tbl, cnt; 3809 3810 if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl), 3811 sizeof(tbl)))) 3812 break; 3813 if ((error = count_table(tbl, &cnt))) 3814 break; 3815 error = sooptcopyout(sopt, &cnt, sizeof(cnt)); 3816 } 3817 break; 3818 3819 case IP_FW_TABLE_LIST: 3820 { 3821 ipfw_table *tbl; 3822 3823 if (sopt->sopt_valsize < sizeof(*tbl)) { 3824 error = EINVAL; 3825 break; 3826 } 3827 size = sopt->sopt_valsize; 3828 tbl = malloc(size, M_TEMP, M_WAITOK); 3829 if (tbl == NULL) { 3830 error = ENOMEM; 3831 break; 3832 } 3833 error = sooptcopyin(sopt, tbl, size, sizeof(*tbl)); 3834 if (error) { 3835 free(tbl, M_TEMP); 3836 break; 3837 } 3838 tbl->size = (size - sizeof(*tbl)) / 3839 sizeof(ipfw_table_entry); 3840 error = dump_table(tbl); 3841 if (error) { 3842 free(tbl, M_TEMP); 3843 break; 3844 } 3845 error = sooptcopyout(sopt, tbl, size); 3846 free(tbl, M_TEMP); 3847 } 3848 break; 3849 3850 default: 3851 printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name); 3852 error = EINVAL; 3853 } 3854 3855 return (error); 3856#undef RULE_MAXSIZE 3857} 3858 3859/** 3860 * dummynet needs a reference to the default rule, because rules can be 3861 * deleted while packets hold a reference to them. When this happens, 3862 * dummynet changes the reference to the default rule (it could well be a 3863 * NULL pointer, but this way we do not need to check for the special 3864 * case, plus here he have info on the default behaviour). 3865 */ 3866struct ip_fw *ip_fw_default_rule; 3867 3868/* 3869 * This procedure is only used to handle keepalives. It is invoked 3870 * every dyn_keepalive_period 3871 */ 3872static void 3873ipfw_tick(void * __unused unused) 3874{ 3875 struct mbuf *m0, *m, *mnext, **mtailp; 3876 int i; 3877 ipfw_dyn_rule *q; 3878 3879 if (dyn_keepalive == 0 || ipfw_dyn_v == NULL || dyn_count == 0) 3880 goto done; 3881 3882 /* 3883 * We make a chain of packets to go out here -- not deferring 3884 * until after we drop the IPFW dynamic rule lock would result 3885 * in a lock order reversal with the normal packet input -> ipfw 3886 * call stack. 3887 */ 3888 m0 = NULL; 3889 mtailp = &m0; 3890 IPFW_DYN_LOCK(); 3891 for (i = 0 ; i < curr_dyn_buckets ; i++) { 3892 for (q = ipfw_dyn_v[i] ; q ; q = q->next ) { 3893 if (q->dyn_type == O_LIMIT_PARENT) 3894 continue; 3895 if (q->id.proto != IPPROTO_TCP) 3896 continue; 3897 if ( (q->state & BOTH_SYN) != BOTH_SYN) 3898 continue; 3899 if (TIME_LEQ( time_second+dyn_keepalive_interval, 3900 q->expire)) 3901 continue; /* too early */ 3902 if (TIME_LEQ(q->expire, time_second)) 3903 continue; /* too late, rule expired */ 3904 3905 *mtailp = send_pkt(&(q->id), q->ack_rev - 1, 3906 q->ack_fwd, TH_SYN); 3907 if (*mtailp != NULL) 3908 mtailp = &(*mtailp)->m_nextpkt; 3909 *mtailp = send_pkt(&(q->id), q->ack_fwd - 1, 3910 q->ack_rev, 0); 3911 if (*mtailp != NULL) 3912 mtailp = &(*mtailp)->m_nextpkt; 3913 } 3914 } 3915 IPFW_DYN_UNLOCK(); 3916 for (m = mnext = m0; m != NULL; m = mnext) { 3917 mnext = m->m_nextpkt; 3918 m->m_nextpkt = NULL; 3919 ip_output(m, NULL, NULL, 0, NULL, NULL); 3920 } 3921done: 3922 callout_reset(&ipfw_timeout, dyn_keepalive_period*hz, ipfw_tick, NULL); 3923} 3924 3925int 3926ipfw_init(void) 3927{ 3928 struct ip_fw default_rule; 3929 int error; 3930 3931 layer3_chain.rules = NULL; 3932 layer3_chain.want_write = 0; 3933 layer3_chain.busy_count = 0; 3934 cv_init(&layer3_chain.cv, "Condition variable for IPFW rw locks"); 3935 IPFW_LOCK_INIT(&layer3_chain); 3936 ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule zone", 3937 sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, 3938 UMA_ALIGN_PTR, 0); 3939 IPFW_DYN_LOCK_INIT(); 3940 callout_init(&ipfw_timeout, NET_CALLOUT_MPSAFE); 3941 3942 bzero(&default_rule, sizeof default_rule); 3943 3944 default_rule.act_ofs = 0; 3945 default_rule.rulenum = IPFW_DEFAULT_RULE; 3946 default_rule.cmd_len = 1; 3947 default_rule.set = RESVD_SET; 3948 3949 default_rule.cmd[0].len = 1; 3950 default_rule.cmd[0].opcode = 3951#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 3952 1 ? O_ACCEPT : 3953#endif 3954 O_DENY; 3955 3956 error = add_rule(&layer3_chain, &default_rule); 3957 if (error != 0) { 3958 printf("ipfw2: error %u initializing default rule " 3959 "(support disabled)\n", error); 3960 IPFW_DYN_LOCK_DESTROY(); 3961 IPFW_LOCK_DESTROY(&layer3_chain); 3962 return (error); 3963 } 3964 3965 ip_fw_default_rule = layer3_chain.rules; 3966 printf("ipfw2 (+ipv6) initialized, divert %s, " 3967 "rule-based forwarding " 3968#ifdef IPFIREWALL_FORWARD 3969 "enabled, " 3970#else 3971 "disabled, " 3972#endif 3973 "default to %s, logging ", 3974#ifdef IPDIVERT 3975 "enabled", 3976#else 3977 "loadable", 3978#endif 3979 default_rule.cmd[0].opcode == O_ACCEPT ? "accept" : "deny"); 3980 3981#ifdef IPFIREWALL_VERBOSE 3982 fw_verbose = 1; 3983#endif 3984#ifdef IPFIREWALL_VERBOSE_LIMIT 3985 verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 3986#endif 3987 if (fw_verbose == 0) 3988 printf("disabled\n"); 3989 else if (verbose_limit == 0) 3990 printf("unlimited\n"); 3991 else 3992 printf("limited to %d packets/entry by default\n", 3993 verbose_limit); 3994 3995 init_tables(); 3996 ip_fw_ctl_ptr = ipfw_ctl; 3997 ip_fw_chk_ptr = ipfw_chk; 3998 callout_reset(&ipfw_timeout, hz, ipfw_tick, NULL); 3999 4000 return (0); 4001} 4002 4003void 4004ipfw_destroy(void) 4005{ 4006 struct ip_fw *reap; 4007 4008 ip_fw_chk_ptr = NULL; 4009 ip_fw_ctl_ptr = NULL; 4010 callout_drain(&ipfw_timeout); 4011 IPFW_WLOCK(&layer3_chain); 4012 layer3_chain.reap = NULL; 4013 free_chain(&layer3_chain, 1 /* kill default rule */); 4014 reap = layer3_chain.reap, layer3_chain.reap = NULL; 4015 IPFW_WUNLOCK(&layer3_chain); 4016 if (reap != NULL) 4017 reap_rules(reap); 4018 flush_tables(); 4019 IPFW_DYN_LOCK_DESTROY(); 4020 uma_zdestroy(ipfw_dyn_rule_zone); 4021 IPFW_LOCK_DESTROY(&layer3_chain); 4022 printf("IP firewall unloaded\n"); 4023} 4024