ip_fw2.c revision 234597
1/*- 2 * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h> 27__FBSDID("$FreeBSD: stable/9/sys/netinet/ipfw/ip_fw2.c 234597 2012-04-23 07:15:15Z melifaro $"); 28 29/* 30 * The FreeBSD IP packet firewall, main file 31 */ 32 33#include "opt_ipfw.h" 34#include "opt_ipdivert.h" 35#include "opt_inet.h" 36#ifndef INET 37#error IPFIREWALL requires INET. 38#endif /* INET */ 39#include "opt_inet6.h" 40#include "opt_ipsec.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/condvar.h> 45#include <sys/eventhandler.h> 46#include <sys/malloc.h> 47#include <sys/mbuf.h> 48#include <sys/kernel.h> 49#include <sys/lock.h> 50#include <sys/jail.h> 51#include <sys/module.h> 52#include <sys/priv.h> 53#include <sys/proc.h> 54#include <sys/rwlock.h> 55#include <sys/socket.h> 56#include <sys/socketvar.h> 57#include <sys/sysctl.h> 58#include <sys/syslog.h> 59#include <sys/ucred.h> 60#include <net/ethernet.h> /* for ETHERTYPE_IP */ 61#include <net/if.h> 62#include <net/route.h> 63#include <net/pf_mtag.h> 64#include <net/vnet.h> 65 66#include <netinet/in.h> 67#include <netinet/in_var.h> 68#include <netinet/in_pcb.h> 69#include <netinet/ip.h> 70#include <netinet/ip_var.h> 71#include <netinet/ip_icmp.h> 72#include <netinet/ip_fw.h> 73#include <netinet/ipfw/ip_fw_private.h> 74#include <netinet/ip_carp.h> 75#include <netinet/pim.h> 76#include <netinet/tcp_var.h> 77#include <netinet/udp.h> 78#include <netinet/udp_var.h> 79#include <netinet/sctp.h> 80 81#include <netinet/ip6.h> 82#include <netinet/icmp6.h> 83#ifdef INET6 84#include <netinet6/in6_pcb.h> 85#include <netinet6/scope6_var.h> 86#include <netinet6/ip6_var.h> 87#endif 88 89#include <machine/in_cksum.h> /* XXX for in_cksum */ 90 91#ifdef MAC 92#include <security/mac/mac_framework.h> 93#endif 94 95/* 96 * static variables followed by global ones. 97 * All ipfw global variables are here. 98 */ 99 100/* ipfw_vnet_ready controls when we are open for business */ 101static VNET_DEFINE(int, ipfw_vnet_ready) = 0; 102#define V_ipfw_vnet_ready VNET(ipfw_vnet_ready) 103 104static VNET_DEFINE(int, fw_deny_unknown_exthdrs); 105#define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs) 106 107static VNET_DEFINE(int, fw_permit_single_frag6) = 1; 108#define V_fw_permit_single_frag6 VNET(fw_permit_single_frag6) 109 110#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 111static int default_to_accept = 1; 112#else 113static int default_to_accept; 114#endif 115 116VNET_DEFINE(int, autoinc_step); 117VNET_DEFINE(int, fw_one_pass) = 1; 118 119VNET_DEFINE(unsigned int, fw_tables_max); 120/* Use 128 tables by default */ 121static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT; 122 123/* 124 * Each rule belongs to one of 32 different sets (0..31). 125 * The variable set_disable contains one bit per set. 126 * If the bit is set, all rules in the corresponding set 127 * are disabled. Set RESVD_SET(31) is reserved for the default rule 128 * and rules that are not deleted by the flush command, 129 * and CANNOT be disabled. 130 * Rules in set RESVD_SET can only be deleted individually. 131 */ 132VNET_DEFINE(u_int32_t, set_disable); 133#define V_set_disable VNET(set_disable) 134 135VNET_DEFINE(int, fw_verbose); 136/* counter for ipfw_log(NULL...) */ 137VNET_DEFINE(u_int64_t, norule_counter); 138VNET_DEFINE(int, verbose_limit); 139 140/* layer3_chain contains the list of rules for layer 3 */ 141VNET_DEFINE(struct ip_fw_chain, layer3_chain); 142 143ipfw_nat_t *ipfw_nat_ptr = NULL; 144struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int); 145ipfw_nat_cfg_t *ipfw_nat_cfg_ptr; 146ipfw_nat_cfg_t *ipfw_nat_del_ptr; 147ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr; 148ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; 149 150#ifdef SYSCTL_NODE 151uint32_t dummy_def = IPFW_DEFAULT_RULE; 152static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS); 153 154SYSBEGIN(f3) 155 156SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 157SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass, 158 CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0, 159 "Only do a single pass through ipfw when using dummynet(4)"); 160SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, 161 CTLFLAG_RW, &VNET_NAME(autoinc_step), 0, 162 "Rule number auto-increment step"); 163SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose, 164 CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0, 165 "Log matches to ipfw rules"); 166SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, 167 CTLFLAG_RW, &VNET_NAME(verbose_limit), 0, 168 "Set upper limit of matches of ipfw rules logged"); 169SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD, 170 &dummy_def, 0, 171 "The default/max possible rule number."); 172SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, tables_max, 173 CTLTYPE_UINT|CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU", 174 "Maximum number of tables"); 175SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, 176 &default_to_accept, 0, 177 "Make the default rule accept all packets."); 178TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept); 179TUNABLE_INT("net.inet.ip.fw.tables_max", &default_fw_tables); 180SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count, 181 CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0, 182 "Number of static rules"); 183 184#ifdef INET6 185SYSCTL_DECL(_net_inet6_ip6); 186SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 187SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs, 188 CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0, 189 "Deny packets with unknown IPv6 Extension Headers"); 190SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6, 191 CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_permit_single_frag6), 0, 192 "Permit single packet IPv6 fragments"); 193#endif /* INET6 */ 194 195SYSEND 196 197#endif /* SYSCTL_NODE */ 198 199 200/* 201 * Some macros used in the various matching options. 202 * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T 203 * Other macros just cast void * into the appropriate type 204 */ 205#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) 206#define TCP(p) ((struct tcphdr *)(p)) 207#define SCTP(p) ((struct sctphdr *)(p)) 208#define UDP(p) ((struct udphdr *)(p)) 209#define ICMP(p) ((struct icmphdr *)(p)) 210#define ICMP6(p) ((struct icmp6_hdr *)(p)) 211 212static __inline int 213icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd) 214{ 215 int type = icmp->icmp_type; 216 217 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) ); 218} 219 220#define TT ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \ 221 (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) ) 222 223static int 224is_icmp_query(struct icmphdr *icmp) 225{ 226 int type = icmp->icmp_type; 227 228 return (type <= ICMP_MAXTYPE && (TT & (1<<type)) ); 229} 230#undef TT 231 232/* 233 * The following checks use two arrays of 8 or 16 bits to store the 234 * bits that we want set or clear, respectively. They are in the 235 * low and high half of cmd->arg1 or cmd->d[0]. 236 * 237 * We scan options and store the bits we find set. We succeed if 238 * 239 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 240 * 241 * The code is sometimes optimized not to store additional variables. 242 */ 243 244static int 245flags_match(ipfw_insn *cmd, u_int8_t bits) 246{ 247 u_char want_clear; 248 bits = ~bits; 249 250 if ( ((cmd->arg1 & 0xff) & bits) != 0) 251 return 0; /* some bits we want set were clear */ 252 want_clear = (cmd->arg1 >> 8) & 0xff; 253 if ( (want_clear & bits) != want_clear) 254 return 0; /* some bits we want clear were set */ 255 return 1; 256} 257 258static int 259ipopts_match(struct ip *ip, ipfw_insn *cmd) 260{ 261 int optlen, bits = 0; 262 u_char *cp = (u_char *)(ip + 1); 263 int x = (ip->ip_hl << 2) - sizeof (struct ip); 264 265 for (; x > 0; x -= optlen, cp += optlen) { 266 int opt = cp[IPOPT_OPTVAL]; 267 268 if (opt == IPOPT_EOL) 269 break; 270 if (opt == IPOPT_NOP) 271 optlen = 1; 272 else { 273 optlen = cp[IPOPT_OLEN]; 274 if (optlen <= 0 || optlen > x) 275 return 0; /* invalid or truncated */ 276 } 277 switch (opt) { 278 279 default: 280 break; 281 282 case IPOPT_LSRR: 283 bits |= IP_FW_IPOPT_LSRR; 284 break; 285 286 case IPOPT_SSRR: 287 bits |= IP_FW_IPOPT_SSRR; 288 break; 289 290 case IPOPT_RR: 291 bits |= IP_FW_IPOPT_RR; 292 break; 293 294 case IPOPT_TS: 295 bits |= IP_FW_IPOPT_TS; 296 break; 297 } 298 } 299 return (flags_match(cmd, bits)); 300} 301 302static int 303tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) 304{ 305 int optlen, bits = 0; 306 u_char *cp = (u_char *)(tcp + 1); 307 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 308 309 for (; x > 0; x -= optlen, cp += optlen) { 310 int opt = cp[0]; 311 if (opt == TCPOPT_EOL) 312 break; 313 if (opt == TCPOPT_NOP) 314 optlen = 1; 315 else { 316 optlen = cp[1]; 317 if (optlen <= 0) 318 break; 319 } 320 321 switch (opt) { 322 323 default: 324 break; 325 326 case TCPOPT_MAXSEG: 327 bits |= IP_FW_TCPOPT_MSS; 328 break; 329 330 case TCPOPT_WINDOW: 331 bits |= IP_FW_TCPOPT_WINDOW; 332 break; 333 334 case TCPOPT_SACK_PERMITTED: 335 case TCPOPT_SACK: 336 bits |= IP_FW_TCPOPT_SACK; 337 break; 338 339 case TCPOPT_TIMESTAMP: 340 bits |= IP_FW_TCPOPT_TS; 341 break; 342 343 } 344 } 345 return (flags_match(cmd, bits)); 346} 347 348static int 349iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uint32_t *tablearg) 350{ 351 if (ifp == NULL) /* no iface with this packet, match fails */ 352 return 0; 353 /* Check by name or by IP address */ 354 if (cmd->name[0] != '\0') { /* match by name */ 355 if (cmd->name[0] == '\1') /* use tablearg to match */ 356 return ipfw_lookup_table_extended(chain, cmd->p.glob, 357 ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE); 358 /* Check name */ 359 if (cmd->p.glob) { 360 if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) 361 return(1); 362 } else { 363 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 364 return(1); 365 } 366 } else { 367#ifdef __FreeBSD__ /* and OSX too ? */ 368 struct ifaddr *ia; 369 370 if_addr_rlock(ifp); 371 TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { 372 if (ia->ifa_addr->sa_family != AF_INET) 373 continue; 374 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 375 (ia->ifa_addr))->sin_addr.s_addr) { 376 if_addr_runlock(ifp); 377 return(1); /* match */ 378 } 379 } 380 if_addr_runlock(ifp); 381#endif /* __FreeBSD__ */ 382 } 383 return(0); /* no match, fail ... */ 384} 385 386/* 387 * The verify_path function checks if a route to the src exists and 388 * if it is reachable via ifp (when provided). 389 * 390 * The 'verrevpath' option checks that the interface that an IP packet 391 * arrives on is the same interface that traffic destined for the 392 * packet's source address would be routed out of. 393 * The 'versrcreach' option just checks that the source address is 394 * reachable via any route (except default) in the routing table. 395 * These two are a measure to block forged packets. This is also 396 * commonly known as "anti-spoofing" or Unicast Reverse Path 397 * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs 398 * is purposely reminiscent of the Cisco IOS command, 399 * 400 * ip verify unicast reverse-path 401 * ip verify unicast source reachable-via any 402 * 403 * which implements the same functionality. But note that the syntax 404 * is misleading, and the check may be performed on all IP packets 405 * whether unicast, multicast, or broadcast. 406 */ 407static int 408verify_path(struct in_addr src, struct ifnet *ifp, u_int fib) 409{ 410#ifndef __FreeBSD__ 411 return 0; 412#else 413 struct route ro; 414 struct sockaddr_in *dst; 415 416 bzero(&ro, sizeof(ro)); 417 418 dst = (struct sockaddr_in *)&(ro.ro_dst); 419 dst->sin_family = AF_INET; 420 dst->sin_len = sizeof(*dst); 421 dst->sin_addr = src; 422 in_rtalloc_ign(&ro, 0, fib); 423 424 if (ro.ro_rt == NULL) 425 return 0; 426 427 /* 428 * If ifp is provided, check for equality with rtentry. 429 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, 430 * in order to pass packets injected back by if_simloop(): 431 * if useloopback == 1 routing entry (via lo0) for our own address 432 * may exist, so we need to handle routing assymetry. 433 */ 434 if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) { 435 RTFREE(ro.ro_rt); 436 return 0; 437 } 438 439 /* if no ifp provided, check if rtentry is not default route */ 440 if (ifp == NULL && 441 satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) { 442 RTFREE(ro.ro_rt); 443 return 0; 444 } 445 446 /* or if this is a blackhole/reject route */ 447 if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 448 RTFREE(ro.ro_rt); 449 return 0; 450 } 451 452 /* found valid route */ 453 RTFREE(ro.ro_rt); 454 return 1; 455#endif /* __FreeBSD__ */ 456} 457 458#ifdef INET6 459/* 460 * ipv6 specific rules here... 461 */ 462static __inline int 463icmp6type_match (int type, ipfw_insn_u32 *cmd) 464{ 465 return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) ); 466} 467 468static int 469flow6id_match( int curr_flow, ipfw_insn_u32 *cmd ) 470{ 471 int i; 472 for (i=0; i <= cmd->o.arg1; ++i ) 473 if (curr_flow == cmd->d[i] ) 474 return 1; 475 return 0; 476} 477 478/* support for IP6_*_ME opcodes */ 479static int 480search_ip6_addr_net (struct in6_addr * ip6_addr) 481{ 482 struct ifnet *mdc; 483 struct ifaddr *mdc2; 484 struct in6_ifaddr *fdm; 485 struct in6_addr copia; 486 487 TAILQ_FOREACH(mdc, &V_ifnet, if_link) { 488 if_addr_rlock(mdc); 489 TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) { 490 if (mdc2->ifa_addr->sa_family == AF_INET6) { 491 fdm = (struct in6_ifaddr *)mdc2; 492 copia = fdm->ia_addr.sin6_addr; 493 /* need for leaving scope_id in the sock_addr */ 494 in6_clearscope(&copia); 495 if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) { 496 if_addr_runlock(mdc); 497 return 1; 498 } 499 } 500 } 501 if_addr_runlock(mdc); 502 } 503 return 0; 504} 505 506static int 507verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib) 508{ 509 struct route_in6 ro; 510 struct sockaddr_in6 *dst; 511 512 bzero(&ro, sizeof(ro)); 513 514 dst = (struct sockaddr_in6 * )&(ro.ro_dst); 515 dst->sin6_family = AF_INET6; 516 dst->sin6_len = sizeof(*dst); 517 dst->sin6_addr = *src; 518 519 in6_rtalloc_ign(&ro, 0, fib); 520 if (ro.ro_rt == NULL) 521 return 0; 522 523 /* 524 * if ifp is provided, check for equality with rtentry 525 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, 526 * to support the case of sending packets to an address of our own. 527 * (where the former interface is the first argument of if_simloop() 528 * (=ifp), the latter is lo0) 529 */ 530 if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) { 531 RTFREE(ro.ro_rt); 532 return 0; 533 } 534 535 /* if no ifp provided, check if rtentry is not default route */ 536 if (ifp == NULL && 537 IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) { 538 RTFREE(ro.ro_rt); 539 return 0; 540 } 541 542 /* or if this is a blackhole/reject route */ 543 if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 544 RTFREE(ro.ro_rt); 545 return 0; 546 } 547 548 /* found valid route */ 549 RTFREE(ro.ro_rt); 550 return 1; 551 552} 553 554static int 555is_icmp6_query(int icmp6_type) 556{ 557 if ((icmp6_type <= ICMP6_MAXTYPE) && 558 (icmp6_type == ICMP6_ECHO_REQUEST || 559 icmp6_type == ICMP6_MEMBERSHIP_QUERY || 560 icmp6_type == ICMP6_WRUREQUEST || 561 icmp6_type == ICMP6_FQDN_QUERY || 562 icmp6_type == ICMP6_NI_QUERY)) 563 return (1); 564 565 return (0); 566} 567 568static void 569send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6) 570{ 571 struct mbuf *m; 572 573 m = args->m; 574 if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) { 575 struct tcphdr *tcp; 576 tcp = (struct tcphdr *)((char *)ip6 + hlen); 577 578 if ((tcp->th_flags & TH_RST) == 0) { 579 struct mbuf *m0; 580 m0 = ipfw_send_pkt(args->m, &(args->f_id), 581 ntohl(tcp->th_seq), ntohl(tcp->th_ack), 582 tcp->th_flags | TH_RST); 583 if (m0 != NULL) 584 ip6_output(m0, NULL, NULL, 0, NULL, NULL, 585 NULL); 586 } 587 FREE_PKT(m); 588 } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */ 589#if 0 590 /* 591 * Unlike above, the mbufs need to line up with the ip6 hdr, 592 * as the contents are read. We need to m_adj() the 593 * needed amount. 594 * The mbuf will however be thrown away so we can adjust it. 595 * Remember we did an m_pullup on it already so we 596 * can make some assumptions about contiguousness. 597 */ 598 if (args->L3offset) 599 m_adj(m, args->L3offset); 600#endif 601 icmp6_error(m, ICMP6_DST_UNREACH, code, 0); 602 } else 603 FREE_PKT(m); 604 605 args->m = NULL; 606} 607 608#endif /* INET6 */ 609 610 611/* 612 * sends a reject message, consuming the mbuf passed as an argument. 613 */ 614static void 615send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip) 616{ 617 618#if 0 619 /* XXX When ip is not guaranteed to be at mtod() we will 620 * need to account for this */ 621 * The mbuf will however be thrown away so we can adjust it. 622 * Remember we did an m_pullup on it already so we 623 * can make some assumptions about contiguousness. 624 */ 625 if (args->L3offset) 626 m_adj(m, args->L3offset); 627#endif 628 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ 629 /* We need the IP header in host order for icmp_error(). */ 630 SET_HOST_IPLEN(ip); 631 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 632 } else if (args->f_id.proto == IPPROTO_TCP) { 633 struct tcphdr *const tcp = 634 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 635 if ( (tcp->th_flags & TH_RST) == 0) { 636 struct mbuf *m; 637 m = ipfw_send_pkt(args->m, &(args->f_id), 638 ntohl(tcp->th_seq), ntohl(tcp->th_ack), 639 tcp->th_flags | TH_RST); 640 if (m != NULL) 641 ip_output(m, NULL, NULL, 0, NULL, NULL); 642 } 643 FREE_PKT(args->m); 644 } else 645 FREE_PKT(args->m); 646 args->m = NULL; 647} 648 649/* 650 * Support for uid/gid/jail lookup. These tests are expensive 651 * (because we may need to look into the list of active sockets) 652 * so we cache the results. ugid_lookupp is 0 if we have not 653 * yet done a lookup, 1 if we succeeded, and -1 if we tried 654 * and failed. The function always returns the match value. 655 * We could actually spare the variable and use *uc, setting 656 * it to '(void *)check_uidgid if we have no info, NULL if 657 * we tried and failed, or any other value if successful. 658 */ 659static int 660check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp, 661 struct ucred **uc) 662{ 663#ifndef __FreeBSD__ 664 /* XXX */ 665 return cred_check(insn, proto, oif, 666 dst_ip, dst_port, src_ip, src_port, 667 (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb); 668#else /* FreeBSD */ 669 struct in_addr src_ip, dst_ip; 670 struct inpcbinfo *pi; 671 struct ipfw_flow_id *id; 672 struct inpcb *pcb, *inp; 673 struct ifnet *oif; 674 int lookupflags; 675 int match; 676 677 id = &args->f_id; 678 inp = args->inp; 679 oif = args->oif; 680 681 /* 682 * Check to see if the UDP or TCP stack supplied us with 683 * the PCB. If so, rather then holding a lock and looking 684 * up the PCB, we can use the one that was supplied. 685 */ 686 if (inp && *ugid_lookupp == 0) { 687 INP_LOCK_ASSERT(inp); 688 if (inp->inp_socket != NULL) { 689 *uc = crhold(inp->inp_cred); 690 *ugid_lookupp = 1; 691 } else 692 *ugid_lookupp = -1; 693 } 694 /* 695 * If we have already been here and the packet has no 696 * PCB entry associated with it, then we can safely 697 * assume that this is a no match. 698 */ 699 if (*ugid_lookupp == -1) 700 return (0); 701 if (id->proto == IPPROTO_TCP) { 702 lookupflags = 0; 703 pi = &V_tcbinfo; 704 } else if (id->proto == IPPROTO_UDP) { 705 lookupflags = INPLOOKUP_WILDCARD; 706 pi = &V_udbinfo; 707 } else 708 return 0; 709 lookupflags |= INPLOOKUP_RLOCKPCB; 710 match = 0; 711 if (*ugid_lookupp == 0) { 712 if (id->addr_type == 6) { 713#ifdef INET6 714 if (oif == NULL) 715 pcb = in6_pcblookup_mbuf(pi, 716 &id->src_ip6, htons(id->src_port), 717 &id->dst_ip6, htons(id->dst_port), 718 lookupflags, oif, args->m); 719 else 720 pcb = in6_pcblookup_mbuf(pi, 721 &id->dst_ip6, htons(id->dst_port), 722 &id->src_ip6, htons(id->src_port), 723 lookupflags, oif, args->m); 724#else 725 *ugid_lookupp = -1; 726 return (0); 727#endif 728 } else { 729 src_ip.s_addr = htonl(id->src_ip); 730 dst_ip.s_addr = htonl(id->dst_ip); 731 if (oif == NULL) 732 pcb = in_pcblookup_mbuf(pi, 733 src_ip, htons(id->src_port), 734 dst_ip, htons(id->dst_port), 735 lookupflags, oif, args->m); 736 else 737 pcb = in_pcblookup_mbuf(pi, 738 dst_ip, htons(id->dst_port), 739 src_ip, htons(id->src_port), 740 lookupflags, oif, args->m); 741 } 742 if (pcb != NULL) { 743 INP_RLOCK_ASSERT(pcb); 744 *uc = crhold(pcb->inp_cred); 745 *ugid_lookupp = 1; 746 INP_RUNLOCK(pcb); 747 } 748 if (*ugid_lookupp == 0) { 749 /* 750 * We tried and failed, set the variable to -1 751 * so we will not try again on this packet. 752 */ 753 *ugid_lookupp = -1; 754 return (0); 755 } 756 } 757 if (insn->o.opcode == O_UID) 758 match = ((*uc)->cr_uid == (uid_t)insn->d[0]); 759 else if (insn->o.opcode == O_GID) 760 match = groupmember((gid_t)insn->d[0], *uc); 761 else if (insn->o.opcode == O_JAIL) 762 match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]); 763 return (match); 764#endif /* __FreeBSD__ */ 765} 766 767/* 768 * Helper function to set args with info on the rule after the matching 769 * one. slot is precise, whereas we guess rule_id as they are 770 * assigned sequentially. 771 */ 772static inline void 773set_match(struct ip_fw_args *args, int slot, 774 struct ip_fw_chain *chain) 775{ 776 args->rule.chain_id = chain->id; 777 args->rule.slot = slot + 1; /* we use 0 as a marker */ 778 args->rule.rule_id = 1 + chain->map[slot]->id; 779 args->rule.rulenum = chain->map[slot]->rulenum; 780} 781 782/* 783 * The main check routine for the firewall. 784 * 785 * All arguments are in args so we can modify them and return them 786 * back to the caller. 787 * 788 * Parameters: 789 * 790 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 791 * Starts with the IP header. 792 * args->eh (in) Mac header if present, NULL for layer3 packet. 793 * args->L3offset Number of bytes bypassed if we came from L2. 794 * e.g. often sizeof(eh) ** NOTYET ** 795 * args->oif Outgoing interface, NULL if packet is incoming. 796 * The incoming interface is in the mbuf. (in) 797 * args->divert_rule (in/out) 798 * Skip up to the first rule past this rule number; 799 * upon return, non-zero port number for divert or tee. 800 * 801 * args->rule Pointer to the last matching rule (in/out) 802 * args->next_hop Socket we are forwarding to (out). 803 * args->next_hop6 IPv6 next hop we are forwarding to (out). 804 * args->f_id Addresses grabbed from the packet (out) 805 * args->rule.info a cookie depending on rule action 806 * 807 * Return value: 808 * 809 * IP_FW_PASS the packet must be accepted 810 * IP_FW_DENY the packet must be dropped 811 * IP_FW_DIVERT divert packet, port in m_tag 812 * IP_FW_TEE tee packet, port in m_tag 813 * IP_FW_DUMMYNET to dummynet, pipe in args->cookie 814 * IP_FW_NETGRAPH into netgraph, cookie args->cookie 815 * args->rule contains the matching rule, 816 * args->rule.info has additional information. 817 * 818 */ 819int 820ipfw_chk(struct ip_fw_args *args) 821{ 822 823 /* 824 * Local variables holding state while processing a packet: 825 * 826 * IMPORTANT NOTE: to speed up the processing of rules, there 827 * are some assumption on the values of the variables, which 828 * are documented here. Should you change them, please check 829 * the implementation of the various instructions to make sure 830 * that they still work. 831 * 832 * args->eh The MAC header. It is non-null for a layer2 833 * packet, it is NULL for a layer-3 packet. 834 * **notyet** 835 * args->L3offset Offset in the packet to the L3 (IP or equiv.) header. 836 * 837 * m | args->m Pointer to the mbuf, as received from the caller. 838 * It may change if ipfw_chk() does an m_pullup, or if it 839 * consumes the packet because it calls send_reject(). 840 * XXX This has to change, so that ipfw_chk() never modifies 841 * or consumes the buffer. 842 * ip is the beginning of the ip(4 or 6) header. 843 * Calculated by adding the L3offset to the start of data. 844 * (Until we start using L3offset, the packet is 845 * supposed to start with the ip header). 846 */ 847 struct mbuf *m = args->m; 848 struct ip *ip = mtod(m, struct ip *); 849 850 /* 851 * For rules which contain uid/gid or jail constraints, cache 852 * a copy of the users credentials after the pcb lookup has been 853 * executed. This will speed up the processing of rules with 854 * these types of constraints, as well as decrease contention 855 * on pcb related locks. 856 */ 857#ifndef __FreeBSD__ 858 struct bsd_ucred ucred_cache; 859#else 860 struct ucred *ucred_cache = NULL; 861#endif 862 int ucred_lookup = 0; 863 864 /* 865 * oif | args->oif If NULL, ipfw_chk has been called on the 866 * inbound path (ether_input, ip_input). 867 * If non-NULL, ipfw_chk has been called on the outbound path 868 * (ether_output, ip_output). 869 */ 870 struct ifnet *oif = args->oif; 871 872 int f_pos = 0; /* index of current rule in the array */ 873 int retval = 0; 874 875 /* 876 * hlen The length of the IP header. 877 */ 878 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 879 880 /* 881 * offset The offset of a fragment. offset != 0 means that 882 * we have a fragment at this offset of an IPv4 packet. 883 * offset == 0 means that (if this is an IPv4 packet) 884 * this is the first or only fragment. 885 * For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header 886 * or there is a single packet fragement (fragement header added 887 * without needed). We will treat a single packet fragment as if 888 * there was no fragment header (or log/block depending on the 889 * V_fw_permit_single_frag6 sysctl setting). 890 */ 891 u_short offset = 0; 892 u_short ip6f_mf = 0; 893 894 /* 895 * Local copies of addresses. They are only valid if we have 896 * an IP packet. 897 * 898 * proto The protocol. Set to 0 for non-ip packets, 899 * or to the protocol read from the packet otherwise. 900 * proto != 0 means that we have an IPv4 packet. 901 * 902 * src_port, dst_port port numbers, in HOST format. Only 903 * valid for TCP and UDP packets. 904 * 905 * src_ip, dst_ip ip addresses, in NETWORK format. 906 * Only valid for IPv4 packets. 907 */ 908 uint8_t proto; 909 uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */ 910 struct in_addr src_ip, dst_ip; /* NOTE: network format */ 911 uint16_t iplen=0; 912 int pktlen; 913 uint16_t etype = 0; /* Host order stored ether type */ 914 915 /* 916 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 917 * MATCH_NONE when checked and not matched (q = NULL), 918 * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) 919 */ 920 int dyn_dir = MATCH_UNKNOWN; 921 ipfw_dyn_rule *q = NULL; 922 struct ip_fw_chain *chain = &V_layer3_chain; 923 924 /* 925 * We store in ulp a pointer to the upper layer protocol header. 926 * In the ipv4 case this is easy to determine from the header, 927 * but for ipv6 we might have some additional headers in the middle. 928 * ulp is NULL if not found. 929 */ 930 void *ulp = NULL; /* upper layer protocol pointer. */ 931 932 /* XXX ipv6 variables */ 933 int is_ipv6 = 0; 934 uint8_t icmp6_type = 0; 935 uint16_t ext_hd = 0; /* bits vector for extension header filtering */ 936 /* end of ipv6 variables */ 937 938 int is_ipv4 = 0; 939 940 int done = 0; /* flag to exit the outer loop */ 941 942 if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) 943 return (IP_FW_PASS); /* accept */ 944 945 dst_ip.s_addr = 0; /* make sure it is initialized */ 946 src_ip.s_addr = 0; /* make sure it is initialized */ 947 pktlen = m->m_pkthdr.len; 948 args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */ 949 proto = args->f_id.proto = 0; /* mark f_id invalid */ 950 /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */ 951 952/* 953 * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous, 954 * then it sets p to point at the offset "len" in the mbuf. WARNING: the 955 * pointer might become stale after other pullups (but we never use it 956 * this way). 957 */ 958#define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T)) 959#define PULLUP_LEN(_len, p, T) \ 960do { \ 961 int x = (_len) + T; \ 962 if ((m)->m_len < x) { \ 963 args->m = m = m_pullup(m, x); \ 964 if (m == NULL) \ 965 goto pullup_failed; \ 966 } \ 967 p = (mtod(m, char *) + (_len)); \ 968} while (0) 969 970 /* 971 * if we have an ether header, 972 */ 973 if (args->eh) 974 etype = ntohs(args->eh->ether_type); 975 976 /* Identify IP packets and fill up variables. */ 977 if (pktlen >= sizeof(struct ip6_hdr) && 978 (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) { 979 struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; 980 is_ipv6 = 1; 981 args->f_id.addr_type = 6; 982 hlen = sizeof(struct ip6_hdr); 983 proto = ip6->ip6_nxt; 984 985 /* Search extension headers to find upper layer protocols */ 986 while (ulp == NULL && offset == 0) { 987 switch (proto) { 988 case IPPROTO_ICMPV6: 989 PULLUP_TO(hlen, ulp, struct icmp6_hdr); 990 icmp6_type = ICMP6(ulp)->icmp6_type; 991 break; 992 993 case IPPROTO_TCP: 994 PULLUP_TO(hlen, ulp, struct tcphdr); 995 dst_port = TCP(ulp)->th_dport; 996 src_port = TCP(ulp)->th_sport; 997 /* save flags for dynamic rules */ 998 args->f_id._flags = TCP(ulp)->th_flags; 999 break; 1000 1001 case IPPROTO_SCTP: 1002 PULLUP_TO(hlen, ulp, struct sctphdr); 1003 src_port = SCTP(ulp)->src_port; 1004 dst_port = SCTP(ulp)->dest_port; 1005 break; 1006 1007 case IPPROTO_UDP: 1008 PULLUP_TO(hlen, ulp, struct udphdr); 1009 dst_port = UDP(ulp)->uh_dport; 1010 src_port = UDP(ulp)->uh_sport; 1011 break; 1012 1013 case IPPROTO_HOPOPTS: /* RFC 2460 */ 1014 PULLUP_TO(hlen, ulp, struct ip6_hbh); 1015 ext_hd |= EXT_HOPOPTS; 1016 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; 1017 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; 1018 ulp = NULL; 1019 break; 1020 1021 case IPPROTO_ROUTING: /* RFC 2460 */ 1022 PULLUP_TO(hlen, ulp, struct ip6_rthdr); 1023 switch (((struct ip6_rthdr *)ulp)->ip6r_type) { 1024 case 0: 1025 ext_hd |= EXT_RTHDR0; 1026 break; 1027 case 2: 1028 ext_hd |= EXT_RTHDR2; 1029 break; 1030 default: 1031 if (V_fw_verbose) 1032 printf("IPFW2: IPV6 - Unknown " 1033 "Routing Header type(%d)\n", 1034 ((struct ip6_rthdr *) 1035 ulp)->ip6r_type); 1036 if (V_fw_deny_unknown_exthdrs) 1037 return (IP_FW_DENY); 1038 break; 1039 } 1040 ext_hd |= EXT_ROUTING; 1041 hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3; 1042 proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt; 1043 ulp = NULL; 1044 break; 1045 1046 case IPPROTO_FRAGMENT: /* RFC 2460 */ 1047 PULLUP_TO(hlen, ulp, struct ip6_frag); 1048 ext_hd |= EXT_FRAGMENT; 1049 hlen += sizeof (struct ip6_frag); 1050 proto = ((struct ip6_frag *)ulp)->ip6f_nxt; 1051 offset = ((struct ip6_frag *)ulp)->ip6f_offlg & 1052 IP6F_OFF_MASK; 1053 ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg & 1054 IP6F_MORE_FRAG; 1055 if (V_fw_permit_single_frag6 == 0 && 1056 offset == 0 && ip6f_mf == 0) { 1057 if (V_fw_verbose) 1058 printf("IPFW2: IPV6 - Invalid " 1059 "Fragment Header\n"); 1060 if (V_fw_deny_unknown_exthdrs) 1061 return (IP_FW_DENY); 1062 break; 1063 } 1064 args->f_id.extra = 1065 ntohl(((struct ip6_frag *)ulp)->ip6f_ident); 1066 ulp = NULL; 1067 break; 1068 1069 case IPPROTO_DSTOPTS: /* RFC 2460 */ 1070 PULLUP_TO(hlen, ulp, struct ip6_hbh); 1071 ext_hd |= EXT_DSTOPTS; 1072 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; 1073 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; 1074 ulp = NULL; 1075 break; 1076 1077 case IPPROTO_AH: /* RFC 2402 */ 1078 PULLUP_TO(hlen, ulp, struct ip6_ext); 1079 ext_hd |= EXT_AH; 1080 hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2; 1081 proto = ((struct ip6_ext *)ulp)->ip6e_nxt; 1082 ulp = NULL; 1083 break; 1084 1085 case IPPROTO_ESP: /* RFC 2406 */ 1086 PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */ 1087 /* Anything past Seq# is variable length and 1088 * data past this ext. header is encrypted. */ 1089 ext_hd |= EXT_ESP; 1090 break; 1091 1092 case IPPROTO_NONE: /* RFC 2460 */ 1093 /* 1094 * Packet ends here, and IPv6 header has 1095 * already been pulled up. If ip6e_len!=0 1096 * then octets must be ignored. 1097 */ 1098 ulp = ip; /* non-NULL to get out of loop. */ 1099 break; 1100 1101 case IPPROTO_OSPFIGP: 1102 /* XXX OSPF header check? */ 1103 PULLUP_TO(hlen, ulp, struct ip6_ext); 1104 break; 1105 1106 case IPPROTO_PIM: 1107 /* XXX PIM header check? */ 1108 PULLUP_TO(hlen, ulp, struct pim); 1109 break; 1110 1111 case IPPROTO_CARP: 1112 PULLUP_TO(hlen, ulp, struct carp_header); 1113 if (((struct carp_header *)ulp)->carp_version != 1114 CARP_VERSION) 1115 return (IP_FW_DENY); 1116 if (((struct carp_header *)ulp)->carp_type != 1117 CARP_ADVERTISEMENT) 1118 return (IP_FW_DENY); 1119 break; 1120 1121 case IPPROTO_IPV6: /* RFC 2893 */ 1122 PULLUP_TO(hlen, ulp, struct ip6_hdr); 1123 break; 1124 1125 case IPPROTO_IPV4: /* RFC 2893 */ 1126 PULLUP_TO(hlen, ulp, struct ip); 1127 break; 1128 1129 default: 1130 if (V_fw_verbose) 1131 printf("IPFW2: IPV6 - Unknown " 1132 "Extension Header(%d), ext_hd=%x\n", 1133 proto, ext_hd); 1134 if (V_fw_deny_unknown_exthdrs) 1135 return (IP_FW_DENY); 1136 PULLUP_TO(hlen, ulp, struct ip6_ext); 1137 break; 1138 } /*switch */ 1139 } 1140 ip = mtod(m, struct ip *); 1141 ip6 = (struct ip6_hdr *)ip; 1142 args->f_id.src_ip6 = ip6->ip6_src; 1143 args->f_id.dst_ip6 = ip6->ip6_dst; 1144 args->f_id.src_ip = 0; 1145 args->f_id.dst_ip = 0; 1146 args->f_id.flow_id6 = ntohl(ip6->ip6_flow); 1147 } else if (pktlen >= sizeof(struct ip) && 1148 (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) { 1149 is_ipv4 = 1; 1150 hlen = ip->ip_hl << 2; 1151 args->f_id.addr_type = 4; 1152 1153 /* 1154 * Collect parameters into local variables for faster matching. 1155 */ 1156 proto = ip->ip_p; 1157 src_ip = ip->ip_src; 1158 dst_ip = ip->ip_dst; 1159 offset = ntohs(ip->ip_off) & IP_OFFMASK; 1160 iplen = ntohs(ip->ip_len); 1161 pktlen = iplen < pktlen ? iplen : pktlen; 1162 1163 if (offset == 0) { 1164 switch (proto) { 1165 case IPPROTO_TCP: 1166 PULLUP_TO(hlen, ulp, struct tcphdr); 1167 dst_port = TCP(ulp)->th_dport; 1168 src_port = TCP(ulp)->th_sport; 1169 /* save flags for dynamic rules */ 1170 args->f_id._flags = TCP(ulp)->th_flags; 1171 break; 1172 1173 case IPPROTO_SCTP: 1174 PULLUP_TO(hlen, ulp, struct sctphdr); 1175 src_port = SCTP(ulp)->src_port; 1176 dst_port = SCTP(ulp)->dest_port; 1177 break; 1178 1179 case IPPROTO_UDP: 1180 PULLUP_TO(hlen, ulp, struct udphdr); 1181 dst_port = UDP(ulp)->uh_dport; 1182 src_port = UDP(ulp)->uh_sport; 1183 break; 1184 1185 case IPPROTO_ICMP: 1186 PULLUP_TO(hlen, ulp, struct icmphdr); 1187 //args->f_id.flags = ICMP(ulp)->icmp_type; 1188 break; 1189 1190 default: 1191 break; 1192 } 1193 } 1194 1195 ip = mtod(m, struct ip *); 1196 args->f_id.src_ip = ntohl(src_ip.s_addr); 1197 args->f_id.dst_ip = ntohl(dst_ip.s_addr); 1198 } 1199#undef PULLUP_TO 1200 if (proto) { /* we may have port numbers, store them */ 1201 args->f_id.proto = proto; 1202 args->f_id.src_port = src_port = ntohs(src_port); 1203 args->f_id.dst_port = dst_port = ntohs(dst_port); 1204 } 1205 1206 IPFW_RLOCK(chain); 1207 if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */ 1208 IPFW_RUNLOCK(chain); 1209 return (IP_FW_PASS); /* accept */ 1210 } 1211 if (args->rule.slot) { 1212 /* 1213 * Packet has already been tagged as a result of a previous 1214 * match on rule args->rule aka args->rule_id (PIPE, QUEUE, 1215 * REASS, NETGRAPH, DIVERT/TEE...) 1216 * Validate the slot and continue from the next one 1217 * if still present, otherwise do a lookup. 1218 */ 1219 f_pos = (args->rule.chain_id == chain->id) ? 1220 args->rule.slot : 1221 ipfw_find_rule(chain, args->rule.rulenum, 1222 args->rule.rule_id); 1223 } else { 1224 f_pos = 0; 1225 } 1226 1227 /* 1228 * Now scan the rules, and parse microinstructions for each rule. 1229 * We have two nested loops and an inner switch. Sometimes we 1230 * need to break out of one or both loops, or re-enter one of 1231 * the loops with updated variables. Loop variables are: 1232 * 1233 * f_pos (outer loop) points to the current rule. 1234 * On output it points to the matching rule. 1235 * done (outer loop) is used as a flag to break the loop. 1236 * l (inner loop) residual length of current rule. 1237 * cmd points to the current microinstruction. 1238 * 1239 * We break the inner loop by setting l=0 and possibly 1240 * cmdlen=0 if we don't want to advance cmd. 1241 * We break the outer loop by setting done=1 1242 * We can restart the inner loop by setting l>0 and f_pos, f, cmd 1243 * as needed. 1244 */ 1245 for (; f_pos < chain->n_rules; f_pos++) { 1246 ipfw_insn *cmd; 1247 uint32_t tablearg = 0; 1248 int l, cmdlen, skip_or; /* skip rest of OR block */ 1249 struct ip_fw *f; 1250 1251 f = chain->map[f_pos]; 1252 if (V_set_disable & (1 << f->set) ) 1253 continue; 1254 1255 skip_or = 0; 1256 for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; 1257 l -= cmdlen, cmd += cmdlen) { 1258 int match; 1259 1260 /* 1261 * check_body is a jump target used when we find a 1262 * CHECK_STATE, and need to jump to the body of 1263 * the target rule. 1264 */ 1265 1266/* check_body: */ 1267 cmdlen = F_LEN(cmd); 1268 /* 1269 * An OR block (insn_1 || .. || insn_n) has the 1270 * F_OR bit set in all but the last instruction. 1271 * The first match will set "skip_or", and cause 1272 * the following instructions to be skipped until 1273 * past the one with the F_OR bit clear. 1274 */ 1275 if (skip_or) { /* skip this instruction */ 1276 if ((cmd->len & F_OR) == 0) 1277 skip_or = 0; /* next one is good */ 1278 continue; 1279 } 1280 match = 0; /* set to 1 if we succeed */ 1281 1282 switch (cmd->opcode) { 1283 /* 1284 * The first set of opcodes compares the packet's 1285 * fields with some pattern, setting 'match' if a 1286 * match is found. At the end of the loop there is 1287 * logic to deal with F_NOT and F_OR flags associated 1288 * with the opcode. 1289 */ 1290 case O_NOP: 1291 match = 1; 1292 break; 1293 1294 case O_FORWARD_MAC: 1295 printf("ipfw: opcode %d unimplemented\n", 1296 cmd->opcode); 1297 break; 1298 1299 case O_GID: 1300 case O_UID: 1301 case O_JAIL: 1302 /* 1303 * We only check offset == 0 && proto != 0, 1304 * as this ensures that we have a 1305 * packet with the ports info. 1306 */ 1307 if (offset != 0) 1308 break; 1309 if (proto == IPPROTO_TCP || 1310 proto == IPPROTO_UDP) 1311 match = check_uidgid( 1312 (ipfw_insn_u32 *)cmd, 1313 args, &ucred_lookup, 1314#ifdef __FreeBSD__ 1315 &ucred_cache); 1316#else 1317 (void *)&ucred_cache); 1318#endif 1319 break; 1320 1321 case O_RECV: 1322 match = iface_match(m->m_pkthdr.rcvif, 1323 (ipfw_insn_if *)cmd, chain, &tablearg); 1324 break; 1325 1326 case O_XMIT: 1327 match = iface_match(oif, (ipfw_insn_if *)cmd, 1328 chain, &tablearg); 1329 break; 1330 1331 case O_VIA: 1332 match = iface_match(oif ? oif : 1333 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd, 1334 chain, &tablearg); 1335 break; 1336 1337 case O_MACADDR2: 1338 if (args->eh != NULL) { /* have MAC header */ 1339 u_int32_t *want = (u_int32_t *) 1340 ((ipfw_insn_mac *)cmd)->addr; 1341 u_int32_t *mask = (u_int32_t *) 1342 ((ipfw_insn_mac *)cmd)->mask; 1343 u_int32_t *hdr = (u_int32_t *)args->eh; 1344 1345 match = 1346 ( want[0] == (hdr[0] & mask[0]) && 1347 want[1] == (hdr[1] & mask[1]) && 1348 want[2] == (hdr[2] & mask[2]) ); 1349 } 1350 break; 1351 1352 case O_MAC_TYPE: 1353 if (args->eh != NULL) { 1354 u_int16_t *p = 1355 ((ipfw_insn_u16 *)cmd)->ports; 1356 int i; 1357 1358 for (i = cmdlen - 1; !match && i>0; 1359 i--, p += 2) 1360 match = (etype >= p[0] && 1361 etype <= p[1]); 1362 } 1363 break; 1364 1365 case O_FRAG: 1366 match = (offset != 0); 1367 break; 1368 1369 case O_IN: /* "out" is "not in" */ 1370 match = (oif == NULL); 1371 break; 1372 1373 case O_LAYER2: 1374 match = (args->eh != NULL); 1375 break; 1376 1377 case O_DIVERTED: 1378 { 1379 /* For diverted packets, args->rule.info 1380 * contains the divert port (in host format) 1381 * reason and direction. 1382 */ 1383 uint32_t i = args->rule.info; 1384 match = (i&IPFW_IS_MASK) == IPFW_IS_DIVERT && 1385 cmd->arg1 & ((i & IPFW_INFO_IN) ? 1 : 2); 1386 } 1387 break; 1388 1389 case O_PROTO: 1390 /* 1391 * We do not allow an arg of 0 so the 1392 * check of "proto" only suffices. 1393 */ 1394 match = (proto == cmd->arg1); 1395 break; 1396 1397 case O_IP_SRC: 1398 match = is_ipv4 && 1399 (((ipfw_insn_ip *)cmd)->addr.s_addr == 1400 src_ip.s_addr); 1401 break; 1402 1403 case O_IP_SRC_LOOKUP: 1404 case O_IP_DST_LOOKUP: 1405 if (is_ipv4) { 1406 uint32_t key = 1407 (cmd->opcode == O_IP_DST_LOOKUP) ? 1408 dst_ip.s_addr : src_ip.s_addr; 1409 uint32_t v = 0; 1410 1411 if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) { 1412 /* generic lookup. The key must be 1413 * in 32bit big-endian format. 1414 */ 1415 v = ((ipfw_insn_u32 *)cmd)->d[1]; 1416 if (v == 0) 1417 key = dst_ip.s_addr; 1418 else if (v == 1) 1419 key = src_ip.s_addr; 1420 else if (v == 6) /* dscp */ 1421 key = (ip->ip_tos >> 2) & 0x3f; 1422 else if (offset != 0) 1423 break; 1424 else if (proto != IPPROTO_TCP && 1425 proto != IPPROTO_UDP) 1426 break; 1427 else if (v == 2) 1428 key = htonl(dst_port); 1429 else if (v == 3) 1430 key = htonl(src_port); 1431 else if (v == 4 || v == 5) { 1432 check_uidgid( 1433 (ipfw_insn_u32 *)cmd, 1434 args, &ucred_lookup, 1435#ifdef __FreeBSD__ 1436 &ucred_cache); 1437 if (v == 4 /* O_UID */) 1438 key = ucred_cache->cr_uid; 1439 else if (v == 5 /* O_JAIL */) 1440 key = ucred_cache->cr_prison->pr_id; 1441#else /* !__FreeBSD__ */ 1442 (void *)&ucred_cache); 1443 if (v ==4 /* O_UID */) 1444 key = ucred_cache.uid; 1445 else if (v == 5 /* O_JAIL */) 1446 key = ucred_cache.xid; 1447#endif /* !__FreeBSD__ */ 1448 key = htonl(key); 1449 } else 1450 break; 1451 } 1452 match = ipfw_lookup_table(chain, 1453 cmd->arg1, key, &v); 1454 if (!match) 1455 break; 1456 if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) 1457 match = 1458 ((ipfw_insn_u32 *)cmd)->d[0] == v; 1459 else 1460 tablearg = v; 1461 } else if (is_ipv6) { 1462 uint32_t v = 0; 1463 void *pkey = (cmd->opcode == O_IP_DST_LOOKUP) ? 1464 &args->f_id.dst_ip6: &args->f_id.src_ip6; 1465 match = ipfw_lookup_table_extended(chain, 1466 cmd->arg1, pkey, &v, 1467 IPFW_TABLE_CIDR); 1468 if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) 1469 match = ((ipfw_insn_u32 *)cmd)->d[0] == v; 1470 if (match) 1471 tablearg = v; 1472 } 1473 break; 1474 1475 case O_IP_SRC_MASK: 1476 case O_IP_DST_MASK: 1477 if (is_ipv4) { 1478 uint32_t a = 1479 (cmd->opcode == O_IP_DST_MASK) ? 1480 dst_ip.s_addr : src_ip.s_addr; 1481 uint32_t *p = ((ipfw_insn_u32 *)cmd)->d; 1482 int i = cmdlen-1; 1483 1484 for (; !match && i>0; i-= 2, p+= 2) 1485 match = (p[0] == (a & p[1])); 1486 } 1487 break; 1488 1489 case O_IP_SRC_ME: 1490 if (is_ipv4) { 1491 struct ifnet *tif; 1492 1493 INADDR_TO_IFP(src_ip, tif); 1494 match = (tif != NULL); 1495 break; 1496 } 1497#ifdef INET6 1498 /* FALLTHROUGH */ 1499 case O_IP6_SRC_ME: 1500 match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6); 1501#endif 1502 break; 1503 1504 case O_IP_DST_SET: 1505 case O_IP_SRC_SET: 1506 if (is_ipv4) { 1507 u_int32_t *d = (u_int32_t *)(cmd+1); 1508 u_int32_t addr = 1509 cmd->opcode == O_IP_DST_SET ? 1510 args->f_id.dst_ip : 1511 args->f_id.src_ip; 1512 1513 if (addr < d[0]) 1514 break; 1515 addr -= d[0]; /* subtract base */ 1516 match = (addr < cmd->arg1) && 1517 ( d[ 1 + (addr>>5)] & 1518 (1<<(addr & 0x1f)) ); 1519 } 1520 break; 1521 1522 case O_IP_DST: 1523 match = is_ipv4 && 1524 (((ipfw_insn_ip *)cmd)->addr.s_addr == 1525 dst_ip.s_addr); 1526 break; 1527 1528 case O_IP_DST_ME: 1529 if (is_ipv4) { 1530 struct ifnet *tif; 1531 1532 INADDR_TO_IFP(dst_ip, tif); 1533 match = (tif != NULL); 1534 break; 1535 } 1536#ifdef INET6 1537 /* FALLTHROUGH */ 1538 case O_IP6_DST_ME: 1539 match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6); 1540#endif 1541 break; 1542 1543 1544 case O_IP_SRCPORT: 1545 case O_IP_DSTPORT: 1546 /* 1547 * offset == 0 && proto != 0 is enough 1548 * to guarantee that we have a 1549 * packet with port info. 1550 */ 1551 if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) 1552 && offset == 0) { 1553 u_int16_t x = 1554 (cmd->opcode == O_IP_SRCPORT) ? 1555 src_port : dst_port ; 1556 u_int16_t *p = 1557 ((ipfw_insn_u16 *)cmd)->ports; 1558 int i; 1559 1560 for (i = cmdlen - 1; !match && i>0; 1561 i--, p += 2) 1562 match = (x>=p[0] && x<=p[1]); 1563 } 1564 break; 1565 1566 case O_ICMPTYPE: 1567 match = (offset == 0 && proto==IPPROTO_ICMP && 1568 icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) ); 1569 break; 1570 1571#ifdef INET6 1572 case O_ICMP6TYPE: 1573 match = is_ipv6 && offset == 0 && 1574 proto==IPPROTO_ICMPV6 && 1575 icmp6type_match( 1576 ICMP6(ulp)->icmp6_type, 1577 (ipfw_insn_u32 *)cmd); 1578 break; 1579#endif /* INET6 */ 1580 1581 case O_IPOPT: 1582 match = (is_ipv4 && 1583 ipopts_match(ip, cmd) ); 1584 break; 1585 1586 case O_IPVER: 1587 match = (is_ipv4 && 1588 cmd->arg1 == ip->ip_v); 1589 break; 1590 1591 case O_IPID: 1592 case O_IPLEN: 1593 case O_IPTTL: 1594 if (is_ipv4) { /* only for IP packets */ 1595 uint16_t x; 1596 uint16_t *p; 1597 int i; 1598 1599 if (cmd->opcode == O_IPLEN) 1600 x = iplen; 1601 else if (cmd->opcode == O_IPTTL) 1602 x = ip->ip_ttl; 1603 else /* must be IPID */ 1604 x = ntohs(ip->ip_id); 1605 if (cmdlen == 1) { 1606 match = (cmd->arg1 == x); 1607 break; 1608 } 1609 /* otherwise we have ranges */ 1610 p = ((ipfw_insn_u16 *)cmd)->ports; 1611 i = cmdlen - 1; 1612 for (; !match && i>0; i--, p += 2) 1613 match = (x >= p[0] && x <= p[1]); 1614 } 1615 break; 1616 1617 case O_IPPRECEDENCE: 1618 match = (is_ipv4 && 1619 (cmd->arg1 == (ip->ip_tos & 0xe0)) ); 1620 break; 1621 1622 case O_IPTOS: 1623 match = (is_ipv4 && 1624 flags_match(cmd, ip->ip_tos)); 1625 break; 1626 1627 case O_TCPDATALEN: 1628 if (proto == IPPROTO_TCP && offset == 0) { 1629 struct tcphdr *tcp; 1630 uint16_t x; 1631 uint16_t *p; 1632 int i; 1633 1634 tcp = TCP(ulp); 1635 x = iplen - 1636 ((ip->ip_hl + tcp->th_off) << 2); 1637 if (cmdlen == 1) { 1638 match = (cmd->arg1 == x); 1639 break; 1640 } 1641 /* otherwise we have ranges */ 1642 p = ((ipfw_insn_u16 *)cmd)->ports; 1643 i = cmdlen - 1; 1644 for (; !match && i>0; i--, p += 2) 1645 match = (x >= p[0] && x <= p[1]); 1646 } 1647 break; 1648 1649 case O_TCPFLAGS: 1650 match = (proto == IPPROTO_TCP && offset == 0 && 1651 flags_match(cmd, TCP(ulp)->th_flags)); 1652 break; 1653 1654 case O_TCPOPTS: 1655 PULLUP_LEN(hlen, ulp, (TCP(ulp)->th_off << 2)); 1656 match = (proto == IPPROTO_TCP && offset == 0 && 1657 tcpopts_match(TCP(ulp), cmd)); 1658 break; 1659 1660 case O_TCPSEQ: 1661 match = (proto == IPPROTO_TCP && offset == 0 && 1662 ((ipfw_insn_u32 *)cmd)->d[0] == 1663 TCP(ulp)->th_seq); 1664 break; 1665 1666 case O_TCPACK: 1667 match = (proto == IPPROTO_TCP && offset == 0 && 1668 ((ipfw_insn_u32 *)cmd)->d[0] == 1669 TCP(ulp)->th_ack); 1670 break; 1671 1672 case O_TCPWIN: 1673 if (proto == IPPROTO_TCP && offset == 0) { 1674 uint16_t x; 1675 uint16_t *p; 1676 int i; 1677 1678 x = ntohs(TCP(ulp)->th_win); 1679 if (cmdlen == 1) { 1680 match = (cmd->arg1 == x); 1681 break; 1682 } 1683 /* Otherwise we have ranges. */ 1684 p = ((ipfw_insn_u16 *)cmd)->ports; 1685 i = cmdlen - 1; 1686 for (; !match && i > 0; i--, p += 2) 1687 match = (x >= p[0] && x <= p[1]); 1688 } 1689 break; 1690 1691 case O_ESTAB: 1692 /* reject packets which have SYN only */ 1693 /* XXX should i also check for TH_ACK ? */ 1694 match = (proto == IPPROTO_TCP && offset == 0 && 1695 (TCP(ulp)->th_flags & 1696 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 1697 break; 1698 1699 case O_ALTQ: { 1700 struct pf_mtag *at; 1701 ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; 1702 1703 match = 1; 1704 at = pf_find_mtag(m); 1705 if (at != NULL && at->qid != 0) 1706 break; 1707 at = pf_get_mtag(m); 1708 if (at == NULL) { 1709 /* 1710 * Let the packet fall back to the 1711 * default ALTQ. 1712 */ 1713 break; 1714 } 1715 at->qid = altq->qid; 1716 at->hdr = ip; 1717 break; 1718 } 1719 1720 case O_LOG: 1721 ipfw_log(f, hlen, args, m, 1722 oif, offset | ip6f_mf, tablearg, ip); 1723 match = 1; 1724 break; 1725 1726 case O_PROB: 1727 match = (random()<((ipfw_insn_u32 *)cmd)->d[0]); 1728 break; 1729 1730 case O_VERREVPATH: 1731 /* Outgoing packets automatically pass/match */ 1732 match = ((oif != NULL) || 1733 (m->m_pkthdr.rcvif == NULL) || 1734 ( 1735#ifdef INET6 1736 is_ipv6 ? 1737 verify_path6(&(args->f_id.src_ip6), 1738 m->m_pkthdr.rcvif, args->f_id.fib) : 1739#endif 1740 verify_path(src_ip, m->m_pkthdr.rcvif, 1741 args->f_id.fib))); 1742 break; 1743 1744 case O_VERSRCREACH: 1745 /* Outgoing packets automatically pass/match */ 1746 match = (hlen > 0 && ((oif != NULL) || 1747#ifdef INET6 1748 is_ipv6 ? 1749 verify_path6(&(args->f_id.src_ip6), 1750 NULL, args->f_id.fib) : 1751#endif 1752 verify_path(src_ip, NULL, args->f_id.fib))); 1753 break; 1754 1755 case O_ANTISPOOF: 1756 /* Outgoing packets automatically pass/match */ 1757 if (oif == NULL && hlen > 0 && 1758 ( (is_ipv4 && in_localaddr(src_ip)) 1759#ifdef INET6 1760 || (is_ipv6 && 1761 in6_localaddr(&(args->f_id.src_ip6))) 1762#endif 1763 )) 1764 match = 1765#ifdef INET6 1766 is_ipv6 ? verify_path6( 1767 &(args->f_id.src_ip6), 1768 m->m_pkthdr.rcvif, 1769 args->f_id.fib) : 1770#endif 1771 verify_path(src_ip, 1772 m->m_pkthdr.rcvif, 1773 args->f_id.fib); 1774 else 1775 match = 1; 1776 break; 1777 1778 case O_IPSEC: 1779#ifdef IPSEC 1780 match = (m_tag_find(m, 1781 PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL); 1782#endif 1783 /* otherwise no match */ 1784 break; 1785 1786#ifdef INET6 1787 case O_IP6_SRC: 1788 match = is_ipv6 && 1789 IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6, 1790 &((ipfw_insn_ip6 *)cmd)->addr6); 1791 break; 1792 1793 case O_IP6_DST: 1794 match = is_ipv6 && 1795 IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6, 1796 &((ipfw_insn_ip6 *)cmd)->addr6); 1797 break; 1798 case O_IP6_SRC_MASK: 1799 case O_IP6_DST_MASK: 1800 if (is_ipv6) { 1801 int i = cmdlen - 1; 1802 struct in6_addr p; 1803 struct in6_addr *d = 1804 &((ipfw_insn_ip6 *)cmd)->addr6; 1805 1806 for (; !match && i > 0; d += 2, 1807 i -= F_INSN_SIZE(struct in6_addr) 1808 * 2) { 1809 p = (cmd->opcode == 1810 O_IP6_SRC_MASK) ? 1811 args->f_id.src_ip6: 1812 args->f_id.dst_ip6; 1813 APPLY_MASK(&p, &d[1]); 1814 match = 1815 IN6_ARE_ADDR_EQUAL(&d[0], 1816 &p); 1817 } 1818 } 1819 break; 1820 1821 case O_FLOW6ID: 1822 match = is_ipv6 && 1823 flow6id_match(args->f_id.flow_id6, 1824 (ipfw_insn_u32 *) cmd); 1825 break; 1826 1827 case O_EXT_HDR: 1828 match = is_ipv6 && 1829 (ext_hd & ((ipfw_insn *) cmd)->arg1); 1830 break; 1831 1832 case O_IP6: 1833 match = is_ipv6; 1834 break; 1835#endif 1836 1837 case O_IP4: 1838 match = is_ipv4; 1839 break; 1840 1841 case O_TAG: { 1842 struct m_tag *mtag; 1843 uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ? 1844 tablearg : cmd->arg1; 1845 1846 /* Packet is already tagged with this tag? */ 1847 mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL); 1848 1849 /* We have `untag' action when F_NOT flag is 1850 * present. And we must remove this mtag from 1851 * mbuf and reset `match' to zero (`match' will 1852 * be inversed later). 1853 * Otherwise we should allocate new mtag and 1854 * push it into mbuf. 1855 */ 1856 if (cmd->len & F_NOT) { /* `untag' action */ 1857 if (mtag != NULL) 1858 m_tag_delete(m, mtag); 1859 match = 0; 1860 } else { 1861 if (mtag == NULL) { 1862 mtag = m_tag_alloc( MTAG_IPFW, 1863 tag, 0, M_NOWAIT); 1864 if (mtag != NULL) 1865 m_tag_prepend(m, mtag); 1866 } 1867 match = 1; 1868 } 1869 break; 1870 } 1871 1872 case O_FIB: /* try match the specified fib */ 1873 if (args->f_id.fib == cmd->arg1) 1874 match = 1; 1875 break; 1876 1877 case O_SOCKARG: { 1878 struct inpcb *inp = args->inp; 1879 struct inpcbinfo *pi; 1880 1881 if (is_ipv6) /* XXX can we remove this ? */ 1882 break; 1883 1884 if (proto == IPPROTO_TCP) 1885 pi = &V_tcbinfo; 1886 else if (proto == IPPROTO_UDP) 1887 pi = &V_udbinfo; 1888 else 1889 break; 1890 1891 /* 1892 * XXXRW: so_user_cookie should almost 1893 * certainly be inp_user_cookie? 1894 */ 1895 1896 /* For incomming packet, lookup up the 1897 inpcb using the src/dest ip/port tuple */ 1898 if (inp == NULL) { 1899 inp = in_pcblookup(pi, 1900 src_ip, htons(src_port), 1901 dst_ip, htons(dst_port), 1902 INPLOOKUP_RLOCKPCB, NULL); 1903 if (inp != NULL) { 1904 tablearg = 1905 inp->inp_socket->so_user_cookie; 1906 if (tablearg) 1907 match = 1; 1908 INP_RUNLOCK(inp); 1909 } 1910 } else { 1911 if (inp->inp_socket) { 1912 tablearg = 1913 inp->inp_socket->so_user_cookie; 1914 if (tablearg) 1915 match = 1; 1916 } 1917 } 1918 break; 1919 } 1920 1921 case O_TAGGED: { 1922 struct m_tag *mtag; 1923 uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ? 1924 tablearg : cmd->arg1; 1925 1926 if (cmdlen == 1) { 1927 match = m_tag_locate(m, MTAG_IPFW, 1928 tag, NULL) != NULL; 1929 break; 1930 } 1931 1932 /* we have ranges */ 1933 for (mtag = m_tag_first(m); 1934 mtag != NULL && !match; 1935 mtag = m_tag_next(m, mtag)) { 1936 uint16_t *p; 1937 int i; 1938 1939 if (mtag->m_tag_cookie != MTAG_IPFW) 1940 continue; 1941 1942 p = ((ipfw_insn_u16 *)cmd)->ports; 1943 i = cmdlen - 1; 1944 for(; !match && i > 0; i--, p += 2) 1945 match = 1946 mtag->m_tag_id >= p[0] && 1947 mtag->m_tag_id <= p[1]; 1948 } 1949 break; 1950 } 1951 1952 /* 1953 * The second set of opcodes represents 'actions', 1954 * i.e. the terminal part of a rule once the packet 1955 * matches all previous patterns. 1956 * Typically there is only one action for each rule, 1957 * and the opcode is stored at the end of the rule 1958 * (but there are exceptions -- see below). 1959 * 1960 * In general, here we set retval and terminate the 1961 * outer loop (would be a 'break 3' in some language, 1962 * but we need to set l=0, done=1) 1963 * 1964 * Exceptions: 1965 * O_COUNT and O_SKIPTO actions: 1966 * instead of terminating, we jump to the next rule 1967 * (setting l=0), or to the SKIPTO target (setting 1968 * f/f_len, cmd and l as needed), respectively. 1969 * 1970 * O_TAG, O_LOG and O_ALTQ action parameters: 1971 * perform some action and set match = 1; 1972 * 1973 * O_LIMIT and O_KEEP_STATE: these opcodes are 1974 * not real 'actions', and are stored right 1975 * before the 'action' part of the rule. 1976 * These opcodes try to install an entry in the 1977 * state tables; if successful, we continue with 1978 * the next opcode (match=1; break;), otherwise 1979 * the packet must be dropped (set retval, 1980 * break loops with l=0, done=1) 1981 * 1982 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 1983 * cause a lookup of the state table, and a jump 1984 * to the 'action' part of the parent rule 1985 * if an entry is found, or 1986 * (CHECK_STATE only) a jump to the next rule if 1987 * the entry is not found. 1988 * The result of the lookup is cached so that 1989 * further instances of these opcodes become NOPs. 1990 * The jump to the next rule is done by setting 1991 * l=0, cmdlen=0. 1992 */ 1993 case O_LIMIT: 1994 case O_KEEP_STATE: 1995 if (ipfw_install_state(f, 1996 (ipfw_insn_limit *)cmd, args, tablearg)) { 1997 /* error or limit violation */ 1998 retval = IP_FW_DENY; 1999 l = 0; /* exit inner loop */ 2000 done = 1; /* exit outer loop */ 2001 } 2002 match = 1; 2003 break; 2004 2005 case O_PROBE_STATE: 2006 case O_CHECK_STATE: 2007 /* 2008 * dynamic rules are checked at the first 2009 * keep-state or check-state occurrence, 2010 * with the result being stored in dyn_dir. 2011 * The compiler introduces a PROBE_STATE 2012 * instruction for us when we have a 2013 * KEEP_STATE (because PROBE_STATE needs 2014 * to be run first). 2015 */ 2016 if (dyn_dir == MATCH_UNKNOWN && 2017 (q = ipfw_lookup_dyn_rule(&args->f_id, 2018 &dyn_dir, proto == IPPROTO_TCP ? 2019 TCP(ulp) : NULL)) 2020 != NULL) { 2021 /* 2022 * Found dynamic entry, update stats 2023 * and jump to the 'action' part of 2024 * the parent rule by setting 2025 * f, cmd, l and clearing cmdlen. 2026 */ 2027 q->pcnt++; 2028 q->bcnt += pktlen; 2029 /* XXX we would like to have f_pos 2030 * readily accessible in the dynamic 2031 * rule, instead of having to 2032 * lookup q->rule. 2033 */ 2034 f = q->rule; 2035 f_pos = ipfw_find_rule(chain, 2036 f->rulenum, f->id); 2037 cmd = ACTION_PTR(f); 2038 l = f->cmd_len - f->act_ofs; 2039 ipfw_dyn_unlock(); 2040 cmdlen = 0; 2041 match = 1; 2042 break; 2043 } 2044 /* 2045 * Dynamic entry not found. If CHECK_STATE, 2046 * skip to next rule, if PROBE_STATE just 2047 * ignore and continue with next opcode. 2048 */ 2049 if (cmd->opcode == O_CHECK_STATE) 2050 l = 0; /* exit inner loop */ 2051 match = 1; 2052 break; 2053 2054 case O_ACCEPT: 2055 retval = 0; /* accept */ 2056 l = 0; /* exit inner loop */ 2057 done = 1; /* exit outer loop */ 2058 break; 2059 2060 case O_PIPE: 2061 case O_QUEUE: 2062 set_match(args, f_pos, chain); 2063 args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ? 2064 tablearg : cmd->arg1; 2065 if (cmd->opcode == O_PIPE) 2066 args->rule.info |= IPFW_IS_PIPE; 2067 if (V_fw_one_pass) 2068 args->rule.info |= IPFW_ONEPASS; 2069 retval = IP_FW_DUMMYNET; 2070 l = 0; /* exit inner loop */ 2071 done = 1; /* exit outer loop */ 2072 break; 2073 2074 case O_DIVERT: 2075 case O_TEE: 2076 if (args->eh) /* not on layer 2 */ 2077 break; 2078 /* otherwise this is terminal */ 2079 l = 0; /* exit inner loop */ 2080 done = 1; /* exit outer loop */ 2081 retval = (cmd->opcode == O_DIVERT) ? 2082 IP_FW_DIVERT : IP_FW_TEE; 2083 set_match(args, f_pos, chain); 2084 args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ? 2085 tablearg : cmd->arg1; 2086 break; 2087 2088 case O_COUNT: 2089 f->pcnt++; /* update stats */ 2090 f->bcnt += pktlen; 2091 f->timestamp = time_uptime; 2092 l = 0; /* exit inner loop */ 2093 break; 2094 2095 case O_SKIPTO: 2096 f->pcnt++; /* update stats */ 2097 f->bcnt += pktlen; 2098 f->timestamp = time_uptime; 2099 /* If possible use cached f_pos (in f->next_rule), 2100 * whose version is written in f->next_rule 2101 * (horrible hacks to avoid changing the ABI). 2102 */ 2103 if (cmd->arg1 != IP_FW_TABLEARG && 2104 (uintptr_t)f->x_next == chain->id) { 2105 f_pos = (uintptr_t)f->next_rule; 2106 } else { 2107 int i = (cmd->arg1 == IP_FW_TABLEARG) ? 2108 tablearg : cmd->arg1; 2109 /* make sure we do not jump backward */ 2110 if (i <= f->rulenum) 2111 i = f->rulenum + 1; 2112 f_pos = ipfw_find_rule(chain, i, 0); 2113 /* update the cache */ 2114 if (cmd->arg1 != IP_FW_TABLEARG) { 2115 f->next_rule = 2116 (void *)(uintptr_t)f_pos; 2117 f->x_next = 2118 (void *)(uintptr_t)chain->id; 2119 } 2120 } 2121 /* 2122 * Skip disabled rules, and re-enter 2123 * the inner loop with the correct 2124 * f_pos, f, l and cmd. 2125 * Also clear cmdlen and skip_or 2126 */ 2127 for (; f_pos < chain->n_rules - 1 && 2128 (V_set_disable & 2129 (1 << chain->map[f_pos]->set)); 2130 f_pos++) 2131 ; 2132 /* Re-enter the inner loop at the skipto rule. */ 2133 f = chain->map[f_pos]; 2134 l = f->cmd_len; 2135 cmd = f->cmd; 2136 match = 1; 2137 cmdlen = 0; 2138 skip_or = 0; 2139 continue; 2140 break; /* not reached */ 2141 2142 case O_CALLRETURN: { 2143 /* 2144 * Implementation of `subroutine' call/return, 2145 * in the stack carried in an mbuf tag. This 2146 * is different from `skipto' in that any call 2147 * address is possible (`skipto' must prevent 2148 * backward jumps to avoid endless loops). 2149 * We have `return' action when F_NOT flag is 2150 * present. The `m_tag_id' field is used as 2151 * stack pointer. 2152 */ 2153 struct m_tag *mtag; 2154 uint16_t jmpto, *stack; 2155 2156#define IS_CALL ((cmd->len & F_NOT) == 0) 2157#define IS_RETURN ((cmd->len & F_NOT) != 0) 2158 /* 2159 * Hand-rolled version of m_tag_locate() with 2160 * wildcard `type'. 2161 * If not already tagged, allocate new tag. 2162 */ 2163 mtag = m_tag_first(m); 2164 while (mtag != NULL) { 2165 if (mtag->m_tag_cookie == 2166 MTAG_IPFW_CALL) 2167 break; 2168 mtag = m_tag_next(m, mtag); 2169 } 2170 if (mtag == NULL && IS_CALL) { 2171 mtag = m_tag_alloc(MTAG_IPFW_CALL, 0, 2172 IPFW_CALLSTACK_SIZE * 2173 sizeof(uint16_t), M_NOWAIT); 2174 if (mtag != NULL) 2175 m_tag_prepend(m, mtag); 2176 } 2177 2178 /* 2179 * On error both `call' and `return' just 2180 * continue with next rule. 2181 */ 2182 if (IS_RETURN && (mtag == NULL || 2183 mtag->m_tag_id == 0)) { 2184 l = 0; /* exit inner loop */ 2185 break; 2186 } 2187 if (IS_CALL && (mtag == NULL || 2188 mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) { 2189 printf("ipfw: call stack error, " 2190 "go to next rule\n"); 2191 l = 0; /* exit inner loop */ 2192 break; 2193 } 2194 2195 f->pcnt++; /* update stats */ 2196 f->bcnt += pktlen; 2197 f->timestamp = time_uptime; 2198 stack = (uint16_t *)(mtag + 1); 2199 2200 /* 2201 * The `call' action may use cached f_pos 2202 * (in f->next_rule), whose version is written 2203 * in f->next_rule. 2204 * The `return' action, however, doesn't have 2205 * fixed jump address in cmd->arg1 and can't use 2206 * cache. 2207 */ 2208 if (IS_CALL) { 2209 stack[mtag->m_tag_id] = f->rulenum; 2210 mtag->m_tag_id++; 2211 if (cmd->arg1 != IP_FW_TABLEARG && 2212 (uintptr_t)f->x_next == chain->id) { 2213 f_pos = (uintptr_t)f->next_rule; 2214 } else { 2215 jmpto = (cmd->arg1 == 2216 IP_FW_TABLEARG) ? tablearg: 2217 cmd->arg1; 2218 f_pos = ipfw_find_rule(chain, 2219 jmpto, 0); 2220 /* update the cache */ 2221 if (cmd->arg1 != 2222 IP_FW_TABLEARG) { 2223 f->next_rule = 2224 (void *)(uintptr_t) 2225 f_pos; 2226 f->x_next = 2227 (void *)(uintptr_t) 2228 chain->id; 2229 } 2230 } 2231 } else { /* `return' action */ 2232 mtag->m_tag_id--; 2233 jmpto = stack[mtag->m_tag_id] + 1; 2234 f_pos = ipfw_find_rule(chain, jmpto, 0); 2235 } 2236 2237 /* 2238 * Skip disabled rules, and re-enter 2239 * the inner loop with the correct 2240 * f_pos, f, l and cmd. 2241 * Also clear cmdlen and skip_or 2242 */ 2243 for (; f_pos < chain->n_rules - 1 && 2244 (V_set_disable & 2245 (1 << chain->map[f_pos]->set)); f_pos++) 2246 ; 2247 /* Re-enter the inner loop at the dest rule. */ 2248 f = chain->map[f_pos]; 2249 l = f->cmd_len; 2250 cmd = f->cmd; 2251 cmdlen = 0; 2252 skip_or = 0; 2253 continue; 2254 break; /* NOTREACHED */ 2255 } 2256#undef IS_CALL 2257#undef IS_RETURN 2258 2259 case O_REJECT: 2260 /* 2261 * Drop the packet and send a reject notice 2262 * if the packet is not ICMP (or is an ICMP 2263 * query), and it is not multicast/broadcast. 2264 */ 2265 if (hlen > 0 && is_ipv4 && offset == 0 && 2266 (proto != IPPROTO_ICMP || 2267 is_icmp_query(ICMP(ulp))) && 2268 !(m->m_flags & (M_BCAST|M_MCAST)) && 2269 !IN_MULTICAST(ntohl(dst_ip.s_addr))) { 2270 send_reject(args, cmd->arg1, iplen, ip); 2271 m = args->m; 2272 } 2273 /* FALLTHROUGH */ 2274#ifdef INET6 2275 case O_UNREACH6: 2276 if (hlen > 0 && is_ipv6 && 2277 ((offset & IP6F_OFF_MASK) == 0) && 2278 (proto != IPPROTO_ICMPV6 || 2279 (is_icmp6_query(icmp6_type) == 1)) && 2280 !(m->m_flags & (M_BCAST|M_MCAST)) && 2281 !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) { 2282 send_reject6( 2283 args, cmd->arg1, hlen, 2284 (struct ip6_hdr *)ip); 2285 m = args->m; 2286 } 2287 /* FALLTHROUGH */ 2288#endif 2289 case O_DENY: 2290 retval = IP_FW_DENY; 2291 l = 0; /* exit inner loop */ 2292 done = 1; /* exit outer loop */ 2293 break; 2294 2295 case O_FORWARD_IP: 2296 if (args->eh) /* not valid on layer2 pkts */ 2297 break; 2298 if (q == NULL || q->rule != f || 2299 dyn_dir == MATCH_FORWARD) { 2300 struct sockaddr_in *sa; 2301 sa = &(((ipfw_insn_sa *)cmd)->sa); 2302 if (sa->sin_addr.s_addr == INADDR_ANY) { 2303 bcopy(sa, &args->hopstore, 2304 sizeof(*sa)); 2305 args->hopstore.sin_addr.s_addr = 2306 htonl(tablearg); 2307 args->next_hop = &args->hopstore; 2308 } else { 2309 args->next_hop = sa; 2310 } 2311 } 2312 retval = IP_FW_PASS; 2313 l = 0; /* exit inner loop */ 2314 done = 1; /* exit outer loop */ 2315 break; 2316 2317#ifdef INET6 2318 case O_FORWARD_IP6: 2319 if (args->eh) /* not valid on layer2 pkts */ 2320 break; 2321 if (q == NULL || q->rule != f || 2322 dyn_dir == MATCH_FORWARD) { 2323 struct sockaddr_in6 *sin6; 2324 2325 sin6 = &(((ipfw_insn_sa6 *)cmd)->sa); 2326 args->next_hop6 = sin6; 2327 } 2328 retval = IP_FW_PASS; 2329 l = 0; /* exit inner loop */ 2330 done = 1; /* exit outer loop */ 2331 break; 2332#endif 2333 2334 case O_NETGRAPH: 2335 case O_NGTEE: 2336 set_match(args, f_pos, chain); 2337 args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ? 2338 tablearg : cmd->arg1; 2339 if (V_fw_one_pass) 2340 args->rule.info |= IPFW_ONEPASS; 2341 retval = (cmd->opcode == O_NETGRAPH) ? 2342 IP_FW_NETGRAPH : IP_FW_NGTEE; 2343 l = 0; /* exit inner loop */ 2344 done = 1; /* exit outer loop */ 2345 break; 2346 2347 case O_SETFIB: { 2348 uint32_t fib; 2349 2350 f->pcnt++; /* update stats */ 2351 f->bcnt += pktlen; 2352 f->timestamp = time_uptime; 2353 fib = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg: 2354 cmd->arg1; 2355 if (fib >= rt_numfibs) 2356 fib = 0; 2357 M_SETFIB(m, fib); 2358 args->f_id.fib = fib; 2359 l = 0; /* exit inner loop */ 2360 break; 2361 } 2362 2363 case O_NAT: 2364 if (!IPFW_NAT_LOADED) { 2365 retval = IP_FW_DENY; 2366 } else { 2367 struct cfg_nat *t; 2368 int nat_id; 2369 2370 set_match(args, f_pos, chain); 2371 /* Check if this is 'global' nat rule */ 2372 if (cmd->arg1 == 0) { 2373 retval = ipfw_nat_ptr(args, NULL, m); 2374 l = 0; 2375 done = 1; 2376 break; 2377 } 2378 t = ((ipfw_insn_nat *)cmd)->nat; 2379 if (t == NULL) { 2380 nat_id = (cmd->arg1 == IP_FW_TABLEARG) ? 2381 tablearg : cmd->arg1; 2382 t = (*lookup_nat_ptr)(&chain->nat, nat_id); 2383 2384 if (t == NULL) { 2385 retval = IP_FW_DENY; 2386 l = 0; /* exit inner loop */ 2387 done = 1; /* exit outer loop */ 2388 break; 2389 } 2390 if (cmd->arg1 != IP_FW_TABLEARG) 2391 ((ipfw_insn_nat *)cmd)->nat = t; 2392 } 2393 retval = ipfw_nat_ptr(args, t, m); 2394 } 2395 l = 0; /* exit inner loop */ 2396 done = 1; /* exit outer loop */ 2397 break; 2398 2399 case O_REASS: { 2400 int ip_off; 2401 2402 f->pcnt++; 2403 f->bcnt += pktlen; 2404 l = 0; /* in any case exit inner loop */ 2405 ip_off = ntohs(ip->ip_off); 2406 2407 /* if not fragmented, go to next rule */ 2408 if ((ip_off & (IP_MF | IP_OFFMASK)) == 0) 2409 break; 2410 /* 2411 * ip_reass() expects len & off in host 2412 * byte order. 2413 */ 2414 SET_HOST_IPLEN(ip); 2415 2416 args->m = m = ip_reass(m); 2417 2418 /* 2419 * do IP header checksum fixup. 2420 */ 2421 if (m == NULL) { /* fragment got swallowed */ 2422 retval = IP_FW_DENY; 2423 } else { /* good, packet complete */ 2424 int hlen; 2425 2426 ip = mtod(m, struct ip *); 2427 hlen = ip->ip_hl << 2; 2428 SET_NET_IPLEN(ip); 2429 ip->ip_sum = 0; 2430 if (hlen == sizeof(struct ip)) 2431 ip->ip_sum = in_cksum_hdr(ip); 2432 else 2433 ip->ip_sum = in_cksum(m, hlen); 2434 retval = IP_FW_REASS; 2435 set_match(args, f_pos, chain); 2436 } 2437 done = 1; /* exit outer loop */ 2438 break; 2439 } 2440 2441 default: 2442 panic("-- unknown opcode %d\n", cmd->opcode); 2443 } /* end of switch() on opcodes */ 2444 /* 2445 * if we get here with l=0, then match is irrelevant. 2446 */ 2447 2448 if (cmd->len & F_NOT) 2449 match = !match; 2450 2451 if (match) { 2452 if (cmd->len & F_OR) 2453 skip_or = 1; 2454 } else { 2455 if (!(cmd->len & F_OR)) /* not an OR block, */ 2456 break; /* try next rule */ 2457 } 2458 2459 } /* end of inner loop, scan opcodes */ 2460#undef PULLUP_LEN 2461 2462 if (done) 2463 break; 2464 2465/* next_rule:; */ /* try next rule */ 2466 2467 } /* end of outer for, scan rules */ 2468 2469 if (done) { 2470 struct ip_fw *rule = chain->map[f_pos]; 2471 /* Update statistics */ 2472 rule->pcnt++; 2473 rule->bcnt += pktlen; 2474 rule->timestamp = time_uptime; 2475 } else { 2476 retval = IP_FW_DENY; 2477 printf("ipfw: ouch!, skip past end of rules, denying packet\n"); 2478 } 2479 IPFW_RUNLOCK(chain); 2480#ifdef __FreeBSD__ 2481 if (ucred_cache != NULL) 2482 crfree(ucred_cache); 2483#endif 2484 return (retval); 2485 2486pullup_failed: 2487 if (V_fw_verbose) 2488 printf("ipfw: pullup failed\n"); 2489 return (IP_FW_DENY); 2490} 2491 2492/* 2493 * Set maximum number of tables that can be used in given VNET ipfw instance. 2494 */ 2495#ifdef SYSCTL_NODE 2496static int 2497sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS) 2498{ 2499 int error; 2500 unsigned int ntables; 2501 2502 ntables = V_fw_tables_max; 2503 2504 error = sysctl_handle_int(oidp, &ntables, 0, req); 2505 /* Read operation or some error */ 2506 if ((error != 0) || (req->newptr == NULL)) 2507 return (error); 2508 2509 return (ipfw_resize_tables(&V_layer3_chain, ntables)); 2510} 2511#endif 2512/* 2513 * Module and VNET glue 2514 */ 2515 2516/* 2517 * Stuff that must be initialised only on boot or module load 2518 */ 2519static int 2520ipfw_init(void) 2521{ 2522 int error = 0; 2523 2524 ipfw_dyn_attach(); 2525 /* 2526 * Only print out this stuff the first time around, 2527 * when called from the sysinit code. 2528 */ 2529 printf("ipfw2 " 2530#ifdef INET6 2531 "(+ipv6) " 2532#endif 2533 "initialized, divert %s, nat %s, " 2534 "rule-based forwarding " 2535#ifdef IPFIREWALL_FORWARD 2536 "enabled, " 2537#else 2538 "disabled, " 2539#endif 2540 "default to %s, logging ", 2541#ifdef IPDIVERT 2542 "enabled", 2543#else 2544 "loadable", 2545#endif 2546#ifdef IPFIREWALL_NAT 2547 "enabled", 2548#else 2549 "loadable", 2550#endif 2551 default_to_accept ? "accept" : "deny"); 2552 2553 /* 2554 * Note: V_xxx variables can be accessed here but the vnet specific 2555 * initializer may not have been called yet for the VIMAGE case. 2556 * Tuneables will have been processed. We will print out values for 2557 * the default vnet. 2558 * XXX This should all be rationalized AFTER 8.0 2559 */ 2560 if (V_fw_verbose == 0) 2561 printf("disabled\n"); 2562 else if (V_verbose_limit == 0) 2563 printf("unlimited\n"); 2564 else 2565 printf("limited to %d packets/entry by default\n", 2566 V_verbose_limit); 2567 2568 /* Check user-supplied table count for validness */ 2569 if (default_fw_tables > IPFW_TABLES_MAX) 2570 default_fw_tables = IPFW_TABLES_MAX; 2571 2572 ipfw_log_bpf(1); /* init */ 2573 return (error); 2574} 2575 2576/* 2577 * Called for the removal of the last instance only on module unload. 2578 */ 2579static void 2580ipfw_destroy(void) 2581{ 2582 2583 ipfw_log_bpf(0); /* uninit */ 2584 ipfw_dyn_detach(); 2585 printf("IP firewall unloaded\n"); 2586} 2587 2588/* 2589 * Stuff that must be initialized for every instance 2590 * (including the first of course). 2591 */ 2592static int 2593vnet_ipfw_init(const void *unused) 2594{ 2595 int error; 2596 struct ip_fw *rule = NULL; 2597 struct ip_fw_chain *chain; 2598 2599 chain = &V_layer3_chain; 2600 2601 /* First set up some values that are compile time options */ 2602 V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ 2603 V_fw_deny_unknown_exthdrs = 1; 2604#ifdef IPFIREWALL_VERBOSE 2605 V_fw_verbose = 1; 2606#endif 2607#ifdef IPFIREWALL_VERBOSE_LIMIT 2608 V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 2609#endif 2610#ifdef IPFIREWALL_NAT 2611 LIST_INIT(&chain->nat); 2612#endif 2613 2614 /* insert the default rule and create the initial map */ 2615 chain->n_rules = 1; 2616 chain->static_len = sizeof(struct ip_fw); 2617 chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_WAITOK | M_ZERO); 2618 if (chain->map) 2619 rule = malloc(chain->static_len, M_IPFW, M_WAITOK | M_ZERO); 2620 2621 /* Set initial number of tables */ 2622 V_fw_tables_max = default_fw_tables; 2623 error = ipfw_init_tables(chain); 2624 if (error) { 2625 printf("ipfw2: setting up tables failed\n"); 2626 free(chain->map, M_IPFW); 2627 free(rule, M_IPFW); 2628 return (ENOSPC); 2629 } 2630 2631 /* fill and insert the default rule */ 2632 rule->act_ofs = 0; 2633 rule->rulenum = IPFW_DEFAULT_RULE; 2634 rule->cmd_len = 1; 2635 rule->set = RESVD_SET; 2636 rule->cmd[0].len = 1; 2637 rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY; 2638 chain->rules = chain->default_rule = chain->map[0] = rule; 2639 chain->id = rule->id = 1; 2640 2641 IPFW_LOCK_INIT(chain); 2642 ipfw_dyn_init(); 2643 2644 /* First set up some values that are compile time options */ 2645 V_ipfw_vnet_ready = 1; /* Open for business */ 2646 2647 /* 2648 * Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr) 2649 * and pfil hooks for ipv4 and ipv6. Even if the latter two fail 2650 * we still keep the module alive because the sockopt and 2651 * layer2 paths are still useful. 2652 * ipfw[6]_hook return 0 on success, ENOENT on failure, 2653 * so we can ignore the exact return value and just set a flag. 2654 * 2655 * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so 2656 * changes in the underlying (per-vnet) variables trigger 2657 * immediate hook()/unhook() calls. 2658 * In layer2 we have the same behaviour, except that V_ether_ipfw 2659 * is checked on each packet because there are no pfil hooks. 2660 */ 2661 V_ip_fw_ctl_ptr = ipfw_ctl; 2662 V_ip_fw_chk_ptr = ipfw_chk; 2663 error = ipfw_attach_hooks(1); 2664 return (error); 2665} 2666 2667/* 2668 * Called for the removal of each instance. 2669 */ 2670static int 2671vnet_ipfw_uninit(const void *unused) 2672{ 2673 struct ip_fw *reap, *rule; 2674 struct ip_fw_chain *chain = &V_layer3_chain; 2675 int i; 2676 2677 V_ipfw_vnet_ready = 0; /* tell new callers to go away */ 2678 /* 2679 * disconnect from ipv4, ipv6, layer2 and sockopt. 2680 * Then grab, release and grab again the WLOCK so we make 2681 * sure the update is propagated and nobody will be in. 2682 */ 2683 (void)ipfw_attach_hooks(0 /* detach */); 2684 V_ip_fw_chk_ptr = NULL; 2685 V_ip_fw_ctl_ptr = NULL; 2686 IPFW_UH_WLOCK(chain); 2687 IPFW_UH_WUNLOCK(chain); 2688 IPFW_UH_WLOCK(chain); 2689 2690 IPFW_WLOCK(chain); 2691 ipfw_dyn_uninit(0); /* run the callout_drain */ 2692 IPFW_WUNLOCK(chain); 2693 2694 ipfw_destroy_tables(chain); 2695 reap = NULL; 2696 IPFW_WLOCK(chain); 2697 for (i = 0; i < chain->n_rules; i++) { 2698 rule = chain->map[i]; 2699 rule->x_next = reap; 2700 reap = rule; 2701 } 2702 if (chain->map) 2703 free(chain->map, M_IPFW); 2704 IPFW_WUNLOCK(chain); 2705 IPFW_UH_WUNLOCK(chain); 2706 if (reap != NULL) 2707 ipfw_reap_rules(reap); 2708 IPFW_LOCK_DESTROY(chain); 2709 ipfw_dyn_uninit(1); /* free the remaining parts */ 2710 return 0; 2711} 2712 2713/* 2714 * Module event handler. 2715 * In general we have the choice of handling most of these events by the 2716 * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to 2717 * use the SYSINIT handlers as they are more capable of expressing the 2718 * flow of control during module and vnet operations, so this is just 2719 * a skeleton. Note there is no SYSINIT equivalent of the module 2720 * SHUTDOWN handler, but we don't have anything to do in that case anyhow. 2721 */ 2722static int 2723ipfw_modevent(module_t mod, int type, void *unused) 2724{ 2725 int err = 0; 2726 2727 switch (type) { 2728 case MOD_LOAD: 2729 /* Called once at module load or 2730 * system boot if compiled in. */ 2731 break; 2732 case MOD_QUIESCE: 2733 /* Called before unload. May veto unloading. */ 2734 break; 2735 case MOD_UNLOAD: 2736 /* Called during unload. */ 2737 break; 2738 case MOD_SHUTDOWN: 2739 /* Called during system shutdown. */ 2740 break; 2741 default: 2742 err = EOPNOTSUPP; 2743 break; 2744 } 2745 return err; 2746} 2747 2748static moduledata_t ipfwmod = { 2749 "ipfw", 2750 ipfw_modevent, 2751 0 2752}; 2753 2754/* Define startup order. */ 2755#define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN 2756#define IPFW_MODEVENT_ORDER (SI_ORDER_ANY - 255) /* On boot slot in here. */ 2757#define IPFW_MODULE_ORDER (IPFW_MODEVENT_ORDER + 1) /* A little later. */ 2758#define IPFW_VNET_ORDER (IPFW_MODEVENT_ORDER + 2) /* Later still. */ 2759 2760DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER); 2761MODULE_VERSION(ipfw, 2); 2762/* should declare some dependencies here */ 2763 2764/* 2765 * Starting up. Done in order after ipfwmod() has been called. 2766 * VNET_SYSINIT is also called for each existing vnet and each new vnet. 2767 */ 2768SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER, 2769 ipfw_init, NULL); 2770VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER, 2771 vnet_ipfw_init, NULL); 2772 2773/* 2774 * Closing up shop. These are done in REVERSE ORDER, but still 2775 * after ipfwmod() has been called. Not called on reboot. 2776 * VNET_SYSUNINIT is also called for each exiting vnet as it exits. 2777 * or when the module is unloaded. 2778 */ 2779SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER, 2780 ipfw_destroy, NULL); 2781VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER, 2782 vnet_ipfw_uninit, NULL); 2783/* end of file */ 2784