ip_fw_dynamic.c revision 332401
1/*- 2 * Copyright (c) 2017-2018 Yandex LLC 3 * Copyright (c) 2017-2018 Andrey V. Elsukov <ae@FreeBSD.org> 4 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: stable/11/sys/netpfil/ipfw/ip_fw_dynamic.c 332401 2018-04-11 10:36:20Z ae $"); 30 31#include "opt_inet.h" 32#include "opt_inet6.h" 33#include "opt_ipfw.h" 34#ifndef INET 35#error IPFIREWALL requires INET. 36#endif /* INET */ 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/hash.h> 41#include <sys/mbuf.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/pcpu.h> 45#include <sys/queue.h> 46#include <sys/rmlock.h> 47#include <sys/smp.h> 48#include <sys/socket.h> 49#include <sys/sysctl.h> 50#include <sys/syslog.h> 51#include <net/ethernet.h> 52#include <net/if.h> 53#include <net/if_var.h> 54#include <net/pfil.h> 55#include <net/vnet.h> 56 57#include <netinet/in.h> 58#include <netinet/ip.h> 59#include <netinet/ip_var.h> 60#include <netinet/ip_fw.h> 61#include <netinet/tcp_var.h> 62#include <netinet/udp.h> 63 64#include <netinet/ip6.h> /* IN6_ARE_ADDR_EQUAL */ 65#ifdef INET6 66#include <netinet6/in6_var.h> 67#include <netinet6/ip6_var.h> 68#include <netinet6/scope6_var.h> 69#endif 70 71#include <netpfil/ipfw/ip_fw_private.h> 72 73#include <machine/in_cksum.h> /* XXX for in_cksum */ 74 75#ifdef MAC 76#include <security/mac/mac_framework.h> 77#endif 78#include <ck_queue.h> 79 80/* 81 * Description of dynamic states. 82 * 83 * Dynamic states are stored in lists accessed through a hash tables 84 * whose size is curr_dyn_buckets. This value can be modified through 85 * the sysctl variable dyn_buckets. 86 * 87 * Currently there are four tables: dyn_ipv4, dyn_ipv6, dyn_ipv4_parent, 88 * and dyn_ipv6_parent. 89 * 90 * When a packet is received, its address fields hashed, then matched 91 * against the entries in the corresponding list by addr_type. 92 * Dynamic states can be used for different purposes: 93 * + stateful rules; 94 * + enforcing limits on the number of sessions; 95 * + in-kernel NAT (not implemented yet) 96 * 97 * The lifetime of dynamic states is regulated by dyn_*_lifetime, 98 * measured in seconds and depending on the flags. 99 * 100 * The total number of dynamic states is equal to UMA zone items count. 101 * The max number of dynamic states is dyn_max. When we reach 102 * the maximum number of rules we do not create anymore. This is 103 * done to avoid consuming too much memory, but also too much 104 * time when searching on each packet (ideally, we should try instead 105 * to put a limit on the length of the list on each bucket...). 106 * 107 * Each state holds a pointer to the parent ipfw rule so we know what 108 * action to perform. Dynamic rules are removed when the parent rule is 109 * deleted. 110 * 111 * There are some limitations with dynamic rules -- we do not 112 * obey the 'randomized match', and we do not do multiple 113 * passes through the firewall. XXX check the latter!!! 114 */ 115 116/* By default use jenkins hash function */ 117#define IPFIREWALL_JENKINSHASH 118 119#define DYN_COUNTER_INC(d, dir, pktlen) do { \ 120 (d)->pcnt_ ## dir++; \ 121 (d)->bcnt_ ## dir += pktlen; \ 122 } while (0) 123 124struct dyn_data { 125 void *parent; /* pointer to parent rule */ 126 uint32_t chain_id; /* cached ruleset id */ 127 uint32_t f_pos; /* cached rule index */ 128 129 uint32_t hashval; /* hash value used for hash resize */ 130 uint16_t fibnum; /* fib used to send keepalives */ 131 uint8_t _pad[3]; 132 uint8_t set; /* parent rule set number */ 133 uint16_t rulenum; /* parent rule number */ 134 uint32_t ruleid; /* parent rule id */ 135 136 uint32_t state; /* TCP session state and flags */ 137 uint32_t ack_fwd; /* most recent ACKs in forward */ 138 uint32_t ack_rev; /* and reverse direction (used */ 139 /* to generate keepalives) */ 140 uint32_t sync; /* synchronization time */ 141 uint32_t expire; /* expire time */ 142 143 uint64_t pcnt_fwd; /* bytes counter in forward */ 144 uint64_t bcnt_fwd; /* packets counter in forward */ 145 uint64_t pcnt_rev; /* bytes counter in reverse */ 146 uint64_t bcnt_rev; /* packets counter in reverse */ 147}; 148 149#define DPARENT_COUNT_DEC(p) do { \ 150 MPASS(p->count > 0); \ 151 ck_pr_dec_32(&(p)->count); \ 152} while (0) 153#define DPARENT_COUNT_INC(p) ck_pr_inc_32(&(p)->count) 154#define DPARENT_COUNT(p) ck_pr_load_32(&(p)->count) 155struct dyn_parent { 156 void *parent; /* pointer to parent rule */ 157 uint32_t count; /* number of linked states */ 158 uint8_t _pad; 159 uint8_t set; /* parent rule set number */ 160 uint16_t rulenum; /* parent rule number */ 161 uint32_t ruleid; /* parent rule id */ 162 uint32_t hashval; /* hash value used for hash resize */ 163 uint32_t expire; /* expire time */ 164}; 165 166struct dyn_ipv4_state { 167 uint8_t type; /* State type */ 168 uint8_t proto; /* UL Protocol */ 169 uint16_t kidx; /* named object index */ 170 uint16_t sport, dport; /* ULP source and destination ports */ 171 in_addr_t src, dst; /* IPv4 source and destination */ 172 173 union { 174 struct dyn_data *data; 175 struct dyn_parent *limit; 176 }; 177 CK_SLIST_ENTRY(dyn_ipv4_state) entry; 178 SLIST_ENTRY(dyn_ipv4_state) expired; 179}; 180CK_SLIST_HEAD(dyn_ipv4ck_slist, dyn_ipv4_state); 181static VNET_DEFINE(struct dyn_ipv4ck_slist *, dyn_ipv4); 182static VNET_DEFINE(struct dyn_ipv4ck_slist *, dyn_ipv4_parent); 183 184SLIST_HEAD(dyn_ipv4_slist, dyn_ipv4_state); 185static VNET_DEFINE(struct dyn_ipv4_slist, dyn_expired_ipv4); 186#define V_dyn_ipv4 VNET(dyn_ipv4) 187#define V_dyn_ipv4_parent VNET(dyn_ipv4_parent) 188#define V_dyn_expired_ipv4 VNET(dyn_expired_ipv4) 189 190#ifdef INET6 191struct dyn_ipv6_state { 192 uint8_t type; /* State type */ 193 uint8_t proto; /* UL Protocol */ 194 uint16_t kidx; /* named object index */ 195 uint16_t sport, dport; /* ULP source and destination ports */ 196 struct in6_addr src, dst; /* IPv6 source and destination */ 197 uint32_t zoneid; /* IPv6 scope zone id */ 198 union { 199 struct dyn_data *data; 200 struct dyn_parent *limit; 201 }; 202 CK_SLIST_ENTRY(dyn_ipv6_state) entry; 203 SLIST_ENTRY(dyn_ipv6_state) expired; 204}; 205CK_SLIST_HEAD(dyn_ipv6ck_slist, dyn_ipv6_state); 206static VNET_DEFINE(struct dyn_ipv6ck_slist *, dyn_ipv6); 207static VNET_DEFINE(struct dyn_ipv6ck_slist *, dyn_ipv6_parent); 208 209SLIST_HEAD(dyn_ipv6_slist, dyn_ipv6_state); 210static VNET_DEFINE(struct dyn_ipv6_slist, dyn_expired_ipv6); 211#define V_dyn_ipv6 VNET(dyn_ipv6) 212#define V_dyn_ipv6_parent VNET(dyn_ipv6_parent) 213#define V_dyn_expired_ipv6 VNET(dyn_expired_ipv6) 214#endif /* INET6 */ 215 216/* 217 * Per-CPU pointer indicates that specified state is currently in use 218 * and must not be reclaimed by expiration callout. 219 */ 220static void **dyn_hp_cache; 221static DPCPU_DEFINE(void *, dyn_hp); 222#define DYNSTATE_GET(cpu) ck_pr_load_ptr(DPCPU_ID_PTR((cpu), dyn_hp)) 223#define DYNSTATE_PROTECT(v) ck_pr_store_ptr(DPCPU_PTR(dyn_hp), (v)) 224#define DYNSTATE_RELEASE() DYNSTATE_PROTECT(NULL) 225#define DYNSTATE_CRITICAL_ENTER() critical_enter() 226#define DYNSTATE_CRITICAL_EXIT() do { \ 227 DYNSTATE_RELEASE(); \ 228 critical_exit(); \ 229} while (0); 230 231/* 232 * We keep two version numbers, one is updated when new entry added to 233 * the list. Second is updated when an entry deleted from the list. 234 * Versions are updated under bucket lock. 235 * 236 * Bucket "add" version number is used to know, that in the time between 237 * state lookup (i.e. ipfw_dyn_lookup_state()) and the followed state 238 * creation (i.e. ipfw_dyn_install_state()) another concurrent thread did 239 * not install some state in this bucket. Using this info we can avoid 240 * additional state lookup, because we are sure that we will not install 241 * the state twice. 242 * 243 * Also doing the tracking of bucket "del" version during lookup we can 244 * be sure, that state entry was not unlinked and freed in time between 245 * we read the state pointer and protect it with hazard pointer. 246 * 247 * An entry unlinked from CK list keeps unchanged until it is freed. 248 * Unlinked entries are linked into expired lists using "expired" field. 249 */ 250 251/* 252 * dyn_expire_lock is used to protect access to dyn_expired_xxx lists. 253 * dyn_bucket_lock is used to get write access to lists in specific bucket. 254 * Currently one dyn_bucket_lock is used for all ipv4, ipv4_parent, ipv6, 255 * and ipv6_parent lists. 256 */ 257static VNET_DEFINE(struct mtx, dyn_expire_lock); 258static VNET_DEFINE(struct mtx *, dyn_bucket_lock); 259#define V_dyn_expire_lock VNET(dyn_expire_lock) 260#define V_dyn_bucket_lock VNET(dyn_bucket_lock) 261 262/* 263 * Bucket's add/delete generation versions. 264 */ 265static VNET_DEFINE(uint32_t *, dyn_ipv4_add); 266static VNET_DEFINE(uint32_t *, dyn_ipv4_del); 267static VNET_DEFINE(uint32_t *, dyn_ipv4_parent_add); 268static VNET_DEFINE(uint32_t *, dyn_ipv4_parent_del); 269#define V_dyn_ipv4_add VNET(dyn_ipv4_add) 270#define V_dyn_ipv4_del VNET(dyn_ipv4_del) 271#define V_dyn_ipv4_parent_add VNET(dyn_ipv4_parent_add) 272#define V_dyn_ipv4_parent_del VNET(dyn_ipv4_parent_del) 273 274#ifdef INET6 275static VNET_DEFINE(uint32_t *, dyn_ipv6_add); 276static VNET_DEFINE(uint32_t *, dyn_ipv6_del); 277static VNET_DEFINE(uint32_t *, dyn_ipv6_parent_add); 278static VNET_DEFINE(uint32_t *, dyn_ipv6_parent_del); 279#define V_dyn_ipv6_add VNET(dyn_ipv6_add) 280#define V_dyn_ipv6_del VNET(dyn_ipv6_del) 281#define V_dyn_ipv6_parent_add VNET(dyn_ipv6_parent_add) 282#define V_dyn_ipv6_parent_del VNET(dyn_ipv6_parent_del) 283#endif /* INET6 */ 284 285#define DYN_BUCKET(h, b) ((h) & (b - 1)) 286#define DYN_BUCKET_VERSION(b, v) ck_pr_load_32(&V_dyn_ ## v[(b)]) 287#define DYN_BUCKET_VERSION_BUMP(b, v) ck_pr_inc_32(&V_dyn_ ## v[(b)]) 288 289#define DYN_BUCKET_LOCK_INIT(lock, b) \ 290 mtx_init(&lock[(b)], "IPFW dynamic bucket", NULL, MTX_DEF) 291#define DYN_BUCKET_LOCK_DESTROY(lock, b) mtx_destroy(&lock[(b)]) 292#define DYN_BUCKET_LOCK(b) mtx_lock(&V_dyn_bucket_lock[(b)]) 293#define DYN_BUCKET_UNLOCK(b) mtx_unlock(&V_dyn_bucket_lock[(b)]) 294#define DYN_BUCKET_ASSERT(b) mtx_assert(&V_dyn_bucket_lock[(b)], MA_OWNED) 295 296#define DYN_EXPIRED_LOCK_INIT() \ 297 mtx_init(&V_dyn_expire_lock, "IPFW expired states list", NULL, MTX_DEF) 298#define DYN_EXPIRED_LOCK_DESTROY() mtx_destroy(&V_dyn_expire_lock) 299#define DYN_EXPIRED_LOCK() mtx_lock(&V_dyn_expire_lock) 300#define DYN_EXPIRED_UNLOCK() mtx_unlock(&V_dyn_expire_lock) 301 302static VNET_DEFINE(uint32_t, dyn_buckets_max); 303static VNET_DEFINE(uint32_t, curr_dyn_buckets); 304static VNET_DEFINE(struct callout, dyn_timeout); 305#define V_dyn_buckets_max VNET(dyn_buckets_max) 306#define V_curr_dyn_buckets VNET(curr_dyn_buckets) 307#define V_dyn_timeout VNET(dyn_timeout) 308 309/* Maximum length of states chain in a bucket */ 310static VNET_DEFINE(uint32_t, curr_max_length); 311#define V_curr_max_length VNET(curr_max_length) 312 313static VNET_DEFINE(uma_zone_t, dyn_data_zone); 314static VNET_DEFINE(uma_zone_t, dyn_parent_zone); 315static VNET_DEFINE(uma_zone_t, dyn_ipv4_zone); 316#ifdef INET6 317static VNET_DEFINE(uma_zone_t, dyn_ipv6_zone); 318#define V_dyn_ipv6_zone VNET(dyn_ipv6_zone) 319#endif /* INET6 */ 320#define V_dyn_data_zone VNET(dyn_data_zone) 321#define V_dyn_parent_zone VNET(dyn_parent_zone) 322#define V_dyn_ipv4_zone VNET(dyn_ipv4_zone) 323 324/* 325 * Timeouts for various events in handing dynamic rules. 326 */ 327static VNET_DEFINE(uint32_t, dyn_ack_lifetime); 328static VNET_DEFINE(uint32_t, dyn_syn_lifetime); 329static VNET_DEFINE(uint32_t, dyn_fin_lifetime); 330static VNET_DEFINE(uint32_t, dyn_rst_lifetime); 331static VNET_DEFINE(uint32_t, dyn_udp_lifetime); 332static VNET_DEFINE(uint32_t, dyn_short_lifetime); 333 334#define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) 335#define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) 336#define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) 337#define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) 338#define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) 339#define V_dyn_short_lifetime VNET(dyn_short_lifetime) 340 341/* 342 * Keepalives are sent if dyn_keepalive is set. They are sent every 343 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 344 * seconds of lifetime of a rule. 345 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower 346 * than dyn_keepalive_period. 347 */ 348#define DYN_KEEPALIVE_MAXQ 512 349static VNET_DEFINE(uint32_t, dyn_keepalive_interval); 350static VNET_DEFINE(uint32_t, dyn_keepalive_period); 351static VNET_DEFINE(uint32_t, dyn_keepalive); 352static VNET_DEFINE(time_t, dyn_keepalive_last); 353 354#define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) 355#define V_dyn_keepalive_period VNET(dyn_keepalive_period) 356#define V_dyn_keepalive VNET(dyn_keepalive) 357#define V_dyn_keepalive_last VNET(dyn_keepalive_last) 358 359static VNET_DEFINE(uint32_t, dyn_max); /* max # of dynamic states */ 360static VNET_DEFINE(uint32_t, dyn_count); /* number of states */ 361static VNET_DEFINE(uint32_t, dyn_parent_max); /* max # of parent states */ 362static VNET_DEFINE(uint32_t, dyn_parent_count); /* number of parent states */ 363#define V_dyn_max VNET(dyn_max) 364#define V_dyn_count VNET(dyn_count) 365#define V_dyn_parent_max VNET(dyn_parent_max) 366#define V_dyn_parent_count VNET(dyn_parent_count) 367 368#define DYN_COUNT_DEC(name) do { \ 369 MPASS((V_ ## name) > 0); \ 370 ck_pr_dec_32(&(V_ ## name)); \ 371} while (0) 372#define DYN_COUNT_INC(name) ck_pr_inc_32(&(V_ ## name)) 373#define DYN_COUNT(name) ck_pr_load_32(&(V_ ## name)) 374 375static time_t last_log; /* Log ratelimiting */ 376 377/* 378 * Get/set maximum number of dynamic states in given VNET instance. 379 */ 380static int 381sysctl_dyn_max(SYSCTL_HANDLER_ARGS) 382{ 383 uint32_t nstates; 384 int error; 385 386 nstates = V_dyn_max; 387 error = sysctl_handle_32(oidp, &nstates, 0, req); 388 /* Read operation or some error */ 389 if ((error != 0) || (req->newptr == NULL)) 390 return (error); 391 392 V_dyn_max = nstates; 393 uma_zone_set_max(V_dyn_data_zone, V_dyn_max); 394 return (0); 395} 396 397static int 398sysctl_dyn_parent_max(SYSCTL_HANDLER_ARGS) 399{ 400 uint32_t nstates; 401 int error; 402 403 nstates = V_dyn_parent_max; 404 error = sysctl_handle_32(oidp, &nstates, 0, req); 405 /* Read operation or some error */ 406 if ((error != 0) || (req->newptr == NULL)) 407 return (error); 408 409 V_dyn_parent_max = nstates; 410 uma_zone_set_max(V_dyn_parent_zone, V_dyn_parent_max); 411 return (0); 412} 413 414static int 415sysctl_dyn_buckets(SYSCTL_HANDLER_ARGS) 416{ 417 uint32_t nbuckets; 418 int error; 419 420 nbuckets = V_dyn_buckets_max; 421 error = sysctl_handle_32(oidp, &nbuckets, 0, req); 422 /* Read operation or some error */ 423 if ((error != 0) || (req->newptr == NULL)) 424 return (error); 425 426 if (nbuckets > 256) 427 V_dyn_buckets_max = 1 << fls(nbuckets - 1); 428 else 429 return (EINVAL); 430 return (0); 431} 432 433SYSCTL_DECL(_net_inet_ip_fw); 434 435SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_count, 436 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(dyn_count), 0, 437 "Current number of dynamic states."); 438SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_parent_count, 439 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(dyn_parent_count), 0, 440 "Current number of parent states. "); 441SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, 442 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, 443 "Current number of buckets for states hash table."); 444SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, curr_max_length, 445 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_max_length), 0, 446 "Current maximum length of states chains in hash buckets."); 447SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_buckets, 448 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_buckets, 449 "IU", "Max number of buckets for dynamic states hash table."); 450SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max, 451 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_max, 452 "IU", "Max number of dynamic states."); 453SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_parent_max, 454 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RW, 0, 0, sysctl_dyn_parent_max, 455 "IU", "Max number of parent dynamic states."); 456SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, 457 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, 458 "Lifetime of dynamic states for TCP ACK."); 459SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, 460 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, 461 "Lifetime of dynamic states for TCP SYN."); 462SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, 463 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, 464 "Lifetime of dynamic states for TCP FIN."); 465SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, 466 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, 467 "Lifetime of dynamic states for TCP RST."); 468SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, 469 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, 470 "Lifetime of dynamic states for UDP."); 471SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, 472 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, 473 "Lifetime of dynamic states for other situations."); 474SYSCTL_U32(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, 475 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, 476 "Enable keepalives for dynamic states."); 477 478#ifdef IPFIREWALL_DYNDEBUG 479#define DYN_DEBUG(fmt, ...) do { \ 480 printf("%s: " fmt "\n", __func__, __VA_ARGS__); \ 481} while (0) 482#else 483#define DYN_DEBUG(fmt, ...) 484#endif /* !IPFIREWALL_DYNDEBUG */ 485 486#ifdef INET6 487/* Functions to work with IPv6 states */ 488static struct dyn_ipv6_state *dyn_lookup_ipv6_state( 489 const struct ipfw_flow_id *, uint32_t, const void *, 490 struct ipfw_dyn_info *, int); 491static int dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *, 492 uint32_t, const void *, int, const void *, uint32_t, uint16_t, uint32_t, 493 uint16_t); 494static struct dyn_ipv6_state *dyn_alloc_ipv6_state( 495 const struct ipfw_flow_id *, uint32_t, uint16_t, uint8_t); 496static int dyn_add_ipv6_state(void *, uint32_t, uint16_t, uint8_t, 497 const struct ipfw_flow_id *, uint32_t, const void *, int, uint32_t, 498 struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t); 499static void dyn_export_ipv6_state(const struct dyn_ipv6_state *, 500 ipfw_dyn_rule *); 501 502static uint32_t dyn_getscopeid(const struct ip_fw_args *); 503static void dyn_make_keepalive_ipv6(struct mbuf *, const struct in6_addr *, 504 const struct in6_addr *, uint32_t, uint32_t, uint32_t, uint16_t, 505 uint16_t); 506static void dyn_enqueue_keepalive_ipv6(struct mbufq *, 507 const struct dyn_ipv6_state *); 508static void dyn_send_keepalive_ipv6(struct ip_fw_chain *); 509 510static struct dyn_ipv6_state *dyn_lookup_ipv6_parent( 511 const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t, 512 uint32_t); 513static struct dyn_ipv6_state *dyn_lookup_ipv6_parent_locked( 514 const struct ipfw_flow_id *, uint32_t, const void *, uint32_t, uint16_t, 515 uint32_t); 516static struct dyn_ipv6_state *dyn_add_ipv6_parent(void *, uint32_t, uint16_t, 517 uint8_t, const struct ipfw_flow_id *, uint32_t, uint32_t, uint32_t, 518 uint16_t); 519#endif /* INET6 */ 520 521/* Functions to work with limit states */ 522static void *dyn_get_parent_state(const struct ipfw_flow_id *, uint32_t, 523 struct ip_fw *, uint32_t, uint32_t, uint16_t); 524static struct dyn_ipv4_state *dyn_lookup_ipv4_parent( 525 const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t); 526static struct dyn_ipv4_state *dyn_lookup_ipv4_parent_locked( 527 const struct ipfw_flow_id *, const void *, uint32_t, uint16_t, uint32_t); 528static struct dyn_parent *dyn_alloc_parent(void *, uint32_t, uint16_t, 529 uint8_t, uint32_t); 530static struct dyn_ipv4_state *dyn_add_ipv4_parent(void *, uint32_t, uint16_t, 531 uint8_t, const struct ipfw_flow_id *, uint32_t, uint32_t, uint16_t); 532 533static void dyn_tick(void *); 534static void dyn_expire_states(struct ip_fw_chain *, ipfw_range_tlv *); 535static void dyn_free_states(struct ip_fw_chain *); 536static void dyn_export_parent(const struct dyn_parent *, uint16_t, 537 ipfw_dyn_rule *); 538static void dyn_export_data(const struct dyn_data *, uint16_t, uint8_t, 539 ipfw_dyn_rule *); 540static uint32_t dyn_update_tcp_state(struct dyn_data *, 541 const struct ipfw_flow_id *, const struct tcphdr *, int); 542static void dyn_update_proto_state(struct dyn_data *, 543 const struct ipfw_flow_id *, const void *, int, int); 544 545/* Functions to work with IPv4 states */ 546struct dyn_ipv4_state *dyn_lookup_ipv4_state(const struct ipfw_flow_id *, 547 const void *, struct ipfw_dyn_info *, int); 548static int dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *, 549 const void *, int, const void *, uint32_t, uint16_t, uint32_t, uint16_t); 550static struct dyn_ipv4_state *dyn_alloc_ipv4_state( 551 const struct ipfw_flow_id *, uint16_t, uint8_t); 552static int dyn_add_ipv4_state(void *, uint32_t, uint16_t, uint8_t, 553 const struct ipfw_flow_id *, const void *, int, uint32_t, 554 struct ipfw_dyn_info *, uint16_t, uint16_t, uint8_t); 555static void dyn_export_ipv4_state(const struct dyn_ipv4_state *, 556 ipfw_dyn_rule *); 557 558/* 559 * Named states support. 560 */ 561static char *default_state_name = "default"; 562struct dyn_state_obj { 563 struct named_object no; 564 char name[64]; 565}; 566 567#define DYN_STATE_OBJ(ch, cmd) \ 568 ((struct dyn_state_obj *)SRV_OBJECT(ch, (cmd)->arg1)) 569/* 570 * Classifier callback. 571 * Return 0 if opcode contains object that should be referenced 572 * or rewritten. 573 */ 574static int 575dyn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) 576{ 577 578 DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); 579 /* Don't rewrite "check-state any" */ 580 if (cmd->arg1 == 0 && 581 cmd->opcode == O_CHECK_STATE) 582 return (1); 583 584 *puidx = cmd->arg1; 585 *ptype = 0; 586 return (0); 587} 588 589static void 590dyn_update(ipfw_insn *cmd, uint16_t idx) 591{ 592 593 cmd->arg1 = idx; 594 DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); 595} 596 597static int 598dyn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, 599 struct named_object **pno) 600{ 601 ipfw_obj_ntlv *ntlv; 602 const char *name; 603 604 DYN_DEBUG("uidx %d", ti->uidx); 605 if (ti->uidx != 0) { 606 if (ti->tlvs == NULL) 607 return (EINVAL); 608 /* Search ntlv in the buffer provided by user */ 609 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, 610 IPFW_TLV_STATE_NAME); 611 if (ntlv == NULL) 612 return (EINVAL); 613 name = ntlv->name; 614 } else 615 name = default_state_name; 616 /* 617 * Search named object with corresponding name. 618 * Since states objects are global - ignore the set value 619 * and use zero instead. 620 */ 621 *pno = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), 0, 622 IPFW_TLV_STATE_NAME, name); 623 /* 624 * We always return success here. 625 * The caller will check *pno and mark object as unresolved, 626 * then it will automatically create "default" object. 627 */ 628 return (0); 629} 630 631static struct named_object * 632dyn_findbykidx(struct ip_fw_chain *ch, uint16_t idx) 633{ 634 635 DYN_DEBUG("kidx %d", idx); 636 return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx)); 637} 638 639static int 640dyn_create(struct ip_fw_chain *ch, struct tid_info *ti, 641 uint16_t *pkidx) 642{ 643 struct namedobj_instance *ni; 644 struct dyn_state_obj *obj; 645 struct named_object *no; 646 ipfw_obj_ntlv *ntlv; 647 char *name; 648 649 DYN_DEBUG("uidx %d", ti->uidx); 650 if (ti->uidx != 0) { 651 if (ti->tlvs == NULL) 652 return (EINVAL); 653 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, 654 IPFW_TLV_STATE_NAME); 655 if (ntlv == NULL) 656 return (EINVAL); 657 name = ntlv->name; 658 } else 659 name = default_state_name; 660 661 ni = CHAIN_TO_SRV(ch); 662 obj = malloc(sizeof(*obj), M_IPFW, M_WAITOK | M_ZERO); 663 obj->no.name = obj->name; 664 obj->no.etlv = IPFW_TLV_STATE_NAME; 665 strlcpy(obj->name, name, sizeof(obj->name)); 666 667 IPFW_UH_WLOCK(ch); 668 no = ipfw_objhash_lookup_name_type(ni, 0, 669 IPFW_TLV_STATE_NAME, name); 670 if (no != NULL) { 671 /* 672 * Object is already created. 673 * Just return its kidx and bump refcount. 674 */ 675 *pkidx = no->kidx; 676 no->refcnt++; 677 IPFW_UH_WUNLOCK(ch); 678 free(obj, M_IPFW); 679 DYN_DEBUG("\tfound kidx %d", *pkidx); 680 return (0); 681 } 682 if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) { 683 DYN_DEBUG("\talloc_idx failed for %s", name); 684 IPFW_UH_WUNLOCK(ch); 685 free(obj, M_IPFW); 686 return (ENOSPC); 687 } 688 ipfw_objhash_add(ni, &obj->no); 689 SRV_OBJECT(ch, obj->no.kidx) = obj; 690 obj->no.refcnt++; 691 *pkidx = obj->no.kidx; 692 IPFW_UH_WUNLOCK(ch); 693 DYN_DEBUG("\tcreated kidx %d", *pkidx); 694 return (0); 695} 696 697static void 698dyn_destroy(struct ip_fw_chain *ch, struct named_object *no) 699{ 700 struct dyn_state_obj *obj; 701 702 IPFW_UH_WLOCK_ASSERT(ch); 703 704 KASSERT(no->refcnt == 1, 705 ("Destroying object '%s' (type %u, idx %u) with refcnt %u", 706 no->name, no->etlv, no->kidx, no->refcnt)); 707 DYN_DEBUG("kidx %d", no->kidx); 708 obj = SRV_OBJECT(ch, no->kidx); 709 SRV_OBJECT(ch, no->kidx) = NULL; 710 ipfw_objhash_del(CHAIN_TO_SRV(ch), no); 711 ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), no->kidx); 712 713 free(obj, M_IPFW); 714} 715 716static struct opcode_obj_rewrite dyn_opcodes[] = { 717 { 718 O_KEEP_STATE, IPFW_TLV_STATE_NAME, 719 dyn_classify, dyn_update, 720 dyn_findbyname, dyn_findbykidx, 721 dyn_create, dyn_destroy 722 }, 723 { 724 O_CHECK_STATE, IPFW_TLV_STATE_NAME, 725 dyn_classify, dyn_update, 726 dyn_findbyname, dyn_findbykidx, 727 dyn_create, dyn_destroy 728 }, 729 { 730 O_PROBE_STATE, IPFW_TLV_STATE_NAME, 731 dyn_classify, dyn_update, 732 dyn_findbyname, dyn_findbykidx, 733 dyn_create, dyn_destroy 734 }, 735 { 736 O_LIMIT, IPFW_TLV_STATE_NAME, 737 dyn_classify, dyn_update, 738 dyn_findbyname, dyn_findbykidx, 739 dyn_create, dyn_destroy 740 }, 741}; 742 743/* 744 * IMPORTANT: the hash function for dynamic rules must be commutative 745 * in source and destination (ip,port), because rules are bidirectional 746 * and we want to find both in the same bucket. 747 */ 748#ifndef IPFIREWALL_JENKINSHASH 749static __inline uint32_t 750hash_packet(const struct ipfw_flow_id *id) 751{ 752 uint32_t i; 753 754#ifdef INET6 755 if (IS_IP6_FLOW_ID(id)) 756 i = ntohl((id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ 757 (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ 758 (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ 759 (id->src_ip6.__u6_addr.__u6_addr32[3])); 760 else 761#endif /* INET6 */ 762 i = (id->dst_ip) ^ (id->src_ip); 763 i ^= (id->dst_port) ^ (id->src_port); 764 return (i); 765} 766 767static __inline uint32_t 768hash_parent(const struct ipfw_flow_id *id, const void *rule) 769{ 770 771 return (hash_packet(id) ^ ((uintptr_t)rule)); 772} 773 774#else /* IPFIREWALL_JENKINSHASH */ 775 776static VNET_DEFINE(uint32_t, dyn_hashseed); 777#define V_dyn_hashseed VNET(dyn_hashseed) 778 779static __inline int 780addrcmp4(const struct ipfw_flow_id *id) 781{ 782 783 if (id->src_ip < id->dst_ip) 784 return (0); 785 if (id->src_ip > id->dst_ip) 786 return (1); 787 if (id->src_port <= id->dst_port) 788 return (0); 789 return (1); 790} 791 792#ifdef INET6 793static __inline int 794addrcmp6(const struct ipfw_flow_id *id) 795{ 796 int ret; 797 798 ret = memcmp(&id->src_ip6, &id->dst_ip6, sizeof(struct in6_addr)); 799 if (ret < 0) 800 return (0); 801 if (ret > 0) 802 return (1); 803 if (id->src_port <= id->dst_port) 804 return (0); 805 return (1); 806} 807 808static __inline uint32_t 809hash_packet6(const struct ipfw_flow_id *id) 810{ 811 struct tuple6 { 812 struct in6_addr addr[2]; 813 uint16_t port[2]; 814 } t6; 815 816 if (addrcmp6(id) == 0) { 817 t6.addr[0] = id->src_ip6; 818 t6.addr[1] = id->dst_ip6; 819 t6.port[0] = id->src_port; 820 t6.port[1] = id->dst_port; 821 } else { 822 t6.addr[0] = id->dst_ip6; 823 t6.addr[1] = id->src_ip6; 824 t6.port[0] = id->dst_port; 825 t6.port[1] = id->src_port; 826 } 827 return (jenkins_hash32((const uint32_t *)&t6, 828 sizeof(t6) / sizeof(uint32_t), V_dyn_hashseed)); 829} 830#endif 831 832static __inline uint32_t 833hash_packet(const struct ipfw_flow_id *id) 834{ 835 struct tuple4 { 836 in_addr_t addr[2]; 837 uint16_t port[2]; 838 } t4; 839 840 if (IS_IP4_FLOW_ID(id)) { 841 /* All fields are in host byte order */ 842 if (addrcmp4(id) == 0) { 843 t4.addr[0] = id->src_ip; 844 t4.addr[1] = id->dst_ip; 845 t4.port[0] = id->src_port; 846 t4.port[1] = id->dst_port; 847 } else { 848 t4.addr[0] = id->dst_ip; 849 t4.addr[1] = id->src_ip; 850 t4.port[0] = id->dst_port; 851 t4.port[1] = id->src_port; 852 } 853 return (jenkins_hash32((const uint32_t *)&t4, 854 sizeof(t4) / sizeof(uint32_t), V_dyn_hashseed)); 855 } else 856#ifdef INET6 857 if (IS_IP6_FLOW_ID(id)) 858 return (hash_packet6(id)); 859#endif 860 return (0); 861} 862 863static __inline uint32_t 864hash_parent(const struct ipfw_flow_id *id, const void *rule) 865{ 866 867 return (jenkins_hash32((const uint32_t *)&rule, 868 sizeof(rule) / sizeof(uint32_t), hash_packet(id))); 869} 870#endif /* IPFIREWALL_JENKINSHASH */ 871 872/* 873 * Print customizable flow id description via log(9) facility. 874 */ 875static void 876print_dyn_rule_flags(const struct ipfw_flow_id *id, int dyn_type, 877 int log_flags, char *prefix, char *postfix) 878{ 879 struct in_addr da; 880#ifdef INET6 881 char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; 882#else 883 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 884#endif 885 886#ifdef INET6 887 if (IS_IP6_FLOW_ID(id)) { 888 ip6_sprintf(src, &id->src_ip6); 889 ip6_sprintf(dst, &id->dst_ip6); 890 } else 891#endif 892 { 893 da.s_addr = htonl(id->src_ip); 894 inet_ntop(AF_INET, &da, src, sizeof(src)); 895 da.s_addr = htonl(id->dst_ip); 896 inet_ntop(AF_INET, &da, dst, sizeof(dst)); 897 } 898 log(log_flags, "ipfw: %s type %d %s %d -> %s %d, %d %s\n", 899 prefix, dyn_type, src, id->src_port, dst, 900 id->dst_port, V_dyn_count, postfix); 901} 902 903#define print_dyn_rule(id, dtype, prefix, postfix) \ 904 print_dyn_rule_flags(id, dtype, LOG_DEBUG, prefix, postfix) 905 906#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) 907#define TIME_LE(a,b) ((int)((a)-(b)) < 0) 908#define _SEQ_GE(a,b) ((int)((a)-(b)) >= 0) 909#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 910#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 911#define TCP_FLAGS (TH_FLAGS | (TH_FLAGS << 8)) 912#define ACK_FWD 0x00010000 /* fwd ack seen */ 913#define ACK_REV 0x00020000 /* rev ack seen */ 914#define ACK_BOTH (ACK_FWD | ACK_REV) 915 916static uint32_t 917dyn_update_tcp_state(struct dyn_data *data, const struct ipfw_flow_id *pkt, 918 const struct tcphdr *tcp, int dir) 919{ 920 uint32_t ack, expire; 921 uint32_t state, old; 922 uint8_t th_flags; 923 924 expire = data->expire; 925 old = state = data->state; 926 th_flags = pkt->_flags & (TH_FIN | TH_SYN | TH_RST); 927 state |= (dir == MATCH_FORWARD) ? th_flags: (th_flags << 8); 928 switch (state & TCP_FLAGS) { 929 case TH_SYN: /* opening */ 930 expire = time_uptime + V_dyn_syn_lifetime; 931 break; 932 933 case BOTH_SYN: /* move to established */ 934 case BOTH_SYN | TH_FIN: /* one side tries to close */ 935 case BOTH_SYN | (TH_FIN << 8): 936 if (tcp == NULL) 937 break; 938 ack = ntohl(tcp->th_ack); 939 if (dir == MATCH_FORWARD) { 940 if (data->ack_fwd == 0 || 941 _SEQ_GE(ack, data->ack_fwd)) { 942 state |= ACK_FWD; 943 if (data->ack_fwd != ack) 944 ck_pr_store_32(&data->ack_fwd, ack); 945 } 946 } else { 947 if (data->ack_rev == 0 || 948 _SEQ_GE(ack, data->ack_rev)) { 949 state |= ACK_REV; 950 if (data->ack_rev != ack) 951 ck_pr_store_32(&data->ack_rev, ack); 952 } 953 } 954 if ((state & ACK_BOTH) == ACK_BOTH) { 955 /* 956 * Set expire time to V_dyn_ack_lifetime only if 957 * we got ACKs for both directions. 958 * We use XOR here to avoid possible state 959 * overwriting in concurrent thread. 960 */ 961 expire = time_uptime + V_dyn_ack_lifetime; 962 ck_pr_xor_32(&data->state, ACK_BOTH); 963 } else if ((data->state & ACK_BOTH) != (state & ACK_BOTH)) 964 ck_pr_or_32(&data->state, state & ACK_BOTH); 965 break; 966 967 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 968 if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) 969 V_dyn_fin_lifetime = V_dyn_keepalive_period - 1; 970 expire = time_uptime + V_dyn_fin_lifetime; 971 break; 972 973 default: 974 if (V_dyn_rst_lifetime >= V_dyn_keepalive_period) 975 V_dyn_rst_lifetime = V_dyn_keepalive_period - 1; 976 expire = time_uptime + V_dyn_rst_lifetime; 977 } 978 /* Save TCP state if it was changed */ 979 if ((state & TCP_FLAGS) != (old & TCP_FLAGS)) 980 ck_pr_or_32(&data->state, state & TCP_FLAGS); 981 return (expire); 982} 983 984/* 985 * Update ULP specific state. 986 * For TCP we keep sequence numbers and flags. For other protocols 987 * currently we update only expire time. Packets and bytes counters 988 * are also updated here. 989 */ 990static void 991dyn_update_proto_state(struct dyn_data *data, const struct ipfw_flow_id *pkt, 992 const void *ulp, int pktlen, int dir) 993{ 994 uint32_t expire; 995 996 /* NOTE: we are in critical section here. */ 997 switch (pkt->proto) { 998 case IPPROTO_UDP: 999 case IPPROTO_UDPLITE: 1000 expire = time_uptime + V_dyn_udp_lifetime; 1001 break; 1002 case IPPROTO_TCP: 1003 expire = dyn_update_tcp_state(data, pkt, ulp, dir); 1004 break; 1005 default: 1006 expire = time_uptime + V_dyn_short_lifetime; 1007 } 1008 /* 1009 * Expiration timer has the per-second granularity, no need to update 1010 * it every time when state is matched. 1011 */ 1012 if (data->expire != expire) 1013 ck_pr_store_32(&data->expire, expire); 1014 1015 if (dir == MATCH_FORWARD) 1016 DYN_COUNTER_INC(data, fwd, pktlen); 1017 else 1018 DYN_COUNTER_INC(data, rev, pktlen); 1019} 1020 1021/* 1022 * Lookup IPv4 state. 1023 * Must be called in critical section. 1024 */ 1025struct dyn_ipv4_state * 1026dyn_lookup_ipv4_state(const struct ipfw_flow_id *pkt, const void *ulp, 1027 struct ipfw_dyn_info *info, int pktlen) 1028{ 1029 struct dyn_ipv4_state *s; 1030 uint32_t version, bucket; 1031 1032 bucket = DYN_BUCKET(info->hashval, V_curr_dyn_buckets); 1033 info->version = DYN_BUCKET_VERSION(bucket, ipv4_add); 1034restart: 1035 version = DYN_BUCKET_VERSION(bucket, ipv4_del); 1036 CK_SLIST_FOREACH(s, &V_dyn_ipv4[bucket], entry) { 1037 DYNSTATE_PROTECT(s); 1038 if (version != DYN_BUCKET_VERSION(bucket, ipv4_del)) 1039 goto restart; 1040 if (s->proto != pkt->proto) 1041 continue; 1042 if (info->kidx != 0 && s->kidx != info->kidx) 1043 continue; 1044 if (s->sport == pkt->src_port && s->dport == pkt->dst_port && 1045 s->src == pkt->src_ip && s->dst == pkt->dst_ip) { 1046 info->direction = MATCH_FORWARD; 1047 break; 1048 } 1049 if (s->sport == pkt->dst_port && s->dport == pkt->src_port && 1050 s->src == pkt->dst_ip && s->dst == pkt->src_ip) { 1051 info->direction = MATCH_REVERSE; 1052 break; 1053 } 1054 } 1055 1056 if (s != NULL) 1057 dyn_update_proto_state(s->data, pkt, ulp, pktlen, 1058 info->direction); 1059 return (s); 1060} 1061 1062/* 1063 * Lookup IPv4 state. 1064 * Simplifed version is used to check that matching state doesn't exist. 1065 */ 1066static int 1067dyn_lookup_ipv4_state_locked(const struct ipfw_flow_id *pkt, 1068 const void *ulp, int pktlen, const void *parent, uint32_t ruleid, 1069 uint16_t rulenum, uint32_t bucket, uint16_t kidx) 1070{ 1071 struct dyn_ipv4_state *s; 1072 int dir; 1073 1074 dir = MATCH_NONE; 1075 DYN_BUCKET_ASSERT(bucket); 1076 CK_SLIST_FOREACH(s, &V_dyn_ipv4[bucket], entry) { 1077 if (s->proto != pkt->proto || 1078 s->kidx != kidx) 1079 continue; 1080 /* 1081 * XXXAE: Install synchronized state only when there are 1082 * no matching states. 1083 */ 1084 if (pktlen != 0 && ( 1085 s->data->parent != parent || 1086 s->data->ruleid != ruleid || 1087 s->data->rulenum != rulenum)) 1088 continue; 1089 if (s->sport == pkt->src_port && 1090 s->dport == pkt->dst_port && 1091 s->src == pkt->src_ip && s->dst == pkt->dst_ip) { 1092 dir = MATCH_FORWARD; 1093 break; 1094 } 1095 if (s->sport == pkt->dst_port && s->dport == pkt->src_port && 1096 s->src == pkt->dst_ip && s->dst == pkt->src_ip) { 1097 dir = MATCH_REVERSE; 1098 break; 1099 } 1100 } 1101 if (s != NULL) 1102 dyn_update_proto_state(s->data, pkt, ulp, pktlen, dir); 1103 return (s != NULL); 1104} 1105 1106struct dyn_ipv4_state * 1107dyn_lookup_ipv4_parent(const struct ipfw_flow_id *pkt, const void *rule, 1108 uint32_t ruleid, uint16_t rulenum, uint32_t hashval) 1109{ 1110 struct dyn_ipv4_state *s; 1111 uint32_t version, bucket; 1112 1113 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1114restart: 1115 version = DYN_BUCKET_VERSION(bucket, ipv4_parent_del); 1116 CK_SLIST_FOREACH(s, &V_dyn_ipv4_parent[bucket], entry) { 1117 DYNSTATE_PROTECT(s); 1118 if (version != DYN_BUCKET_VERSION(bucket, ipv4_parent_del)) 1119 goto restart; 1120 /* 1121 * NOTE: we do not need to check kidx, because parent rule 1122 * can not create states with different kidx. 1123 * And parent rule always created for forward direction. 1124 */ 1125 if (s->limit->parent == rule && 1126 s->limit->ruleid == ruleid && 1127 s->limit->rulenum == rulenum && 1128 s->proto == pkt->proto && 1129 s->sport == pkt->src_port && 1130 s->dport == pkt->dst_port && 1131 s->src == pkt->src_ip && s->dst == pkt->dst_ip) { 1132 if (s->limit->expire != time_uptime + 1133 V_dyn_short_lifetime) 1134 ck_pr_store_32(&s->limit->expire, 1135 time_uptime + V_dyn_short_lifetime); 1136 break; 1137 } 1138 } 1139 return (s); 1140} 1141 1142static struct dyn_ipv4_state * 1143dyn_lookup_ipv4_parent_locked(const struct ipfw_flow_id *pkt, 1144 const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t bucket) 1145{ 1146 struct dyn_ipv4_state *s; 1147 1148 DYN_BUCKET_ASSERT(bucket); 1149 CK_SLIST_FOREACH(s, &V_dyn_ipv4_parent[bucket], entry) { 1150 if (s->limit->parent == rule && 1151 s->limit->ruleid == ruleid && 1152 s->limit->rulenum == rulenum && 1153 s->proto == pkt->proto && 1154 s->sport == pkt->src_port && 1155 s->dport == pkt->dst_port && 1156 s->src == pkt->src_ip && s->dst == pkt->dst_ip) 1157 break; 1158 } 1159 return (s); 1160} 1161 1162 1163#ifdef INET6 1164static uint32_t 1165dyn_getscopeid(const struct ip_fw_args *args) 1166{ 1167 1168 /* 1169 * If source or destination address is an scopeid address, we need 1170 * determine the scope zone id to resolve address scope ambiguity. 1171 */ 1172 if (IN6_IS_ADDR_LINKLOCAL(&args->f_id.src_ip6) || 1173 IN6_IS_ADDR_LINKLOCAL(&args->f_id.dst_ip6)) { 1174 MPASS(args->oif != NULL || 1175 args->m->m_pkthdr.rcvif != NULL); 1176 return (in6_getscopezone(args->oif != NULL ? args->oif: 1177 args->m->m_pkthdr.rcvif, IPV6_ADDR_SCOPE_LINKLOCAL)); 1178 } 1179 return (0); 1180} 1181 1182/* 1183 * Lookup IPv6 state. 1184 * Must be called in critical section. 1185 */ 1186static struct dyn_ipv6_state * 1187dyn_lookup_ipv6_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1188 const void *ulp, struct ipfw_dyn_info *info, int pktlen) 1189{ 1190 struct dyn_ipv6_state *s; 1191 uint32_t version, bucket; 1192 1193 bucket = DYN_BUCKET(info->hashval, V_curr_dyn_buckets); 1194 info->version = DYN_BUCKET_VERSION(bucket, ipv6_add); 1195restart: 1196 version = DYN_BUCKET_VERSION(bucket, ipv6_del); 1197 CK_SLIST_FOREACH(s, &V_dyn_ipv6[bucket], entry) { 1198 DYNSTATE_PROTECT(s); 1199 if (version != DYN_BUCKET_VERSION(bucket, ipv6_del)) 1200 goto restart; 1201 if (s->proto != pkt->proto || s->zoneid != zoneid) 1202 continue; 1203 if (info->kidx != 0 && s->kidx != info->kidx) 1204 continue; 1205 if (s->sport == pkt->src_port && s->dport == pkt->dst_port && 1206 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) && 1207 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) { 1208 info->direction = MATCH_FORWARD; 1209 break; 1210 } 1211 if (s->sport == pkt->dst_port && s->dport == pkt->src_port && 1212 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->dst_ip6) && 1213 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->src_ip6)) { 1214 info->direction = MATCH_REVERSE; 1215 break; 1216 } 1217 } 1218 if (s != NULL) 1219 dyn_update_proto_state(s->data, pkt, ulp, pktlen, 1220 info->direction); 1221 return (s); 1222} 1223 1224/* 1225 * Lookup IPv6 state. 1226 * Simplifed version is used to check that matching state doesn't exist. 1227 */ 1228static int 1229dyn_lookup_ipv6_state_locked(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1230 const void *ulp, int pktlen, const void *parent, uint32_t ruleid, 1231 uint16_t rulenum, uint32_t bucket, uint16_t kidx) 1232{ 1233 struct dyn_ipv6_state *s; 1234 int dir; 1235 1236 dir = MATCH_NONE; 1237 DYN_BUCKET_ASSERT(bucket); 1238 CK_SLIST_FOREACH(s, &V_dyn_ipv6[bucket], entry) { 1239 if (s->proto != pkt->proto || s->kidx != kidx || 1240 s->zoneid != zoneid) 1241 continue; 1242 /* 1243 * XXXAE: Install synchronized state only when there are 1244 * no matching states. 1245 */ 1246 if (pktlen != 0 && ( 1247 s->data->parent != parent || 1248 s->data->ruleid != ruleid || 1249 s->data->rulenum != rulenum)) 1250 continue; 1251 if (s->sport == pkt->src_port && s->dport == pkt->dst_port && 1252 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) && 1253 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) { 1254 dir = MATCH_FORWARD; 1255 break; 1256 } 1257 if (s->sport == pkt->dst_port && s->dport == pkt->src_port && 1258 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->dst_ip6) && 1259 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->src_ip6)) { 1260 dir = MATCH_REVERSE; 1261 break; 1262 } 1263 } 1264 if (s != NULL) 1265 dyn_update_proto_state(s->data, pkt, ulp, pktlen, dir); 1266 return (s != NULL); 1267} 1268 1269static struct dyn_ipv6_state * 1270dyn_lookup_ipv6_parent(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1271 const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t hashval) 1272{ 1273 struct dyn_ipv6_state *s; 1274 uint32_t version, bucket; 1275 1276 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1277restart: 1278 version = DYN_BUCKET_VERSION(bucket, ipv6_parent_del); 1279 CK_SLIST_FOREACH(s, &V_dyn_ipv6_parent[bucket], entry) { 1280 DYNSTATE_PROTECT(s); 1281 if (version != DYN_BUCKET_VERSION(bucket, ipv6_parent_del)) 1282 goto restart; 1283 /* 1284 * NOTE: we do not need to check kidx, because parent rule 1285 * can not create states with different kidx. 1286 * Also parent rule always created for forward direction. 1287 */ 1288 if (s->limit->parent == rule && 1289 s->limit->ruleid == ruleid && 1290 s->limit->rulenum == rulenum && 1291 s->proto == pkt->proto && 1292 s->sport == pkt->src_port && 1293 s->dport == pkt->dst_port && s->zoneid == zoneid && 1294 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) && 1295 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) { 1296 if (s->limit->expire != time_uptime + 1297 V_dyn_short_lifetime) 1298 ck_pr_store_32(&s->limit->expire, 1299 time_uptime + V_dyn_short_lifetime); 1300 break; 1301 } 1302 } 1303 return (s); 1304} 1305 1306static struct dyn_ipv6_state * 1307dyn_lookup_ipv6_parent_locked(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1308 const void *rule, uint32_t ruleid, uint16_t rulenum, uint32_t bucket) 1309{ 1310 struct dyn_ipv6_state *s; 1311 1312 DYN_BUCKET_ASSERT(bucket); 1313 CK_SLIST_FOREACH(s, &V_dyn_ipv6_parent[bucket], entry) { 1314 if (s->limit->parent == rule && 1315 s->limit->ruleid == ruleid && 1316 s->limit->rulenum == rulenum && 1317 s->proto == pkt->proto && 1318 s->sport == pkt->src_port && 1319 s->dport == pkt->dst_port && s->zoneid == zoneid && 1320 IN6_ARE_ADDR_EQUAL(&s->src, &pkt->src_ip6) && 1321 IN6_ARE_ADDR_EQUAL(&s->dst, &pkt->dst_ip6)) 1322 break; 1323 } 1324 return (s); 1325} 1326 1327#endif /* INET6 */ 1328 1329/* 1330 * Lookup dynamic state. 1331 * pkt - filled by ipfw_chk() ipfw_flow_id; 1332 * ulp - determined by ipfw_chk() upper level protocol header; 1333 * dyn_info - info about matched state to return back; 1334 * Returns pointer to state's parent rule and dyn_info. If there is 1335 * no state, NULL is returned. 1336 * On match ipfw_dyn_lookup() updates state's counters. 1337 */ 1338struct ip_fw * 1339ipfw_dyn_lookup_state(const struct ip_fw_args *args, const void *ulp, 1340 int pktlen, const ipfw_insn *cmd, struct ipfw_dyn_info *info) 1341{ 1342 struct dyn_data *data; 1343 struct ip_fw *rule; 1344 1345 IPFW_RLOCK_ASSERT(&V_layer3_chain); 1346 1347 data = NULL; 1348 rule = NULL; 1349 info->kidx = cmd->arg1; 1350 info->direction = MATCH_NONE; 1351 info->hashval = hash_packet(&args->f_id); 1352 1353 DYNSTATE_CRITICAL_ENTER(); 1354 if (IS_IP4_FLOW_ID(&args->f_id)) { 1355 struct dyn_ipv4_state *s; 1356 1357 s = dyn_lookup_ipv4_state(&args->f_id, ulp, info, pktlen); 1358 if (s != NULL) { 1359 /* 1360 * Dynamic states are created using the same 5-tuple, 1361 * so it is assumed, that parent rule for O_LIMIT 1362 * state has the same address family. 1363 */ 1364 data = s->data; 1365 if (s->type == O_LIMIT) { 1366 s = data->parent; 1367 rule = s->limit->parent; 1368 } else 1369 rule = data->parent; 1370 } 1371 } 1372#ifdef INET6 1373 else if (IS_IP6_FLOW_ID(&args->f_id)) { 1374 struct dyn_ipv6_state *s; 1375 1376 s = dyn_lookup_ipv6_state(&args->f_id, dyn_getscopeid(args), 1377 ulp, info, pktlen); 1378 if (s != NULL) { 1379 data = s->data; 1380 if (s->type == O_LIMIT) { 1381 s = data->parent; 1382 rule = s->limit->parent; 1383 } else 1384 rule = data->parent; 1385 } 1386 } 1387#endif 1388 if (data != NULL) { 1389 /* 1390 * If cached chain id is the same, we can avoid rule index 1391 * lookup. Otherwise do lookup and update chain_id and f_pos. 1392 * It is safe even if there is concurrent thread that want 1393 * update the same state, because chain->id can be changed 1394 * only under IPFW_WLOCK(). 1395 */ 1396 if (data->chain_id != V_layer3_chain.id) { 1397 data->f_pos = ipfw_find_rule(&V_layer3_chain, 1398 data->rulenum, data->ruleid); 1399 /* 1400 * Check that found state has not orphaned. 1401 * When chain->id being changed the parent 1402 * rule can be deleted. If found rule doesn't 1403 * match the parent pointer, consider this 1404 * result as MATCH_NONE and return NULL. 1405 * 1406 * This will lead to creation of new similar state 1407 * that will be added into head of this bucket. 1408 * And the state that we currently have matched 1409 * should be deleted by dyn_expire_states(). 1410 */ 1411 if (V_layer3_chain.map[data->f_pos] == rule) 1412 data->chain_id = V_layer3_chain.id; 1413 else { 1414 rule = NULL; 1415 info->direction = MATCH_NONE; 1416 DYN_DEBUG("rule %p [%u, %u] is considered " 1417 "invalid in data %p", rule, data->ruleid, 1418 data->rulenum, data); 1419 } 1420 } 1421 info->f_pos = data->f_pos; 1422 } 1423 DYNSTATE_CRITICAL_EXIT(); 1424#if 0 1425 /* 1426 * Return MATCH_NONE if parent rule is in disabled set. 1427 * This will lead to creation of new similar state that 1428 * will be added into head of this bucket. 1429 * 1430 * XXXAE: we need to be able update state's set when parent 1431 * rule set is changed. 1432 */ 1433 if (rule != NULL && (V_set_disable & (1 << rule->set))) { 1434 rule = NULL; 1435 info->direction = MATCH_NONE; 1436 } 1437#endif 1438 return (rule); 1439} 1440 1441static struct dyn_parent * 1442dyn_alloc_parent(void *parent, uint32_t ruleid, uint16_t rulenum, 1443 uint8_t set, uint32_t hashval) 1444{ 1445 struct dyn_parent *limit; 1446 1447 limit = uma_zalloc(V_dyn_parent_zone, M_NOWAIT | M_ZERO); 1448 if (limit == NULL) { 1449 if (last_log != time_uptime) { 1450 last_log = time_uptime; 1451 log(LOG_DEBUG, 1452 "ipfw: Cannot allocate parent dynamic state, " 1453 "consider increasing " 1454 "net.inet.ip.fw.dyn_parent_max\n"); 1455 } 1456 return (NULL); 1457 } 1458 1459 limit->parent = parent; 1460 limit->ruleid = ruleid; 1461 limit->rulenum = rulenum; 1462 limit->set = set; 1463 limit->hashval = hashval; 1464 limit->expire = time_uptime + V_dyn_short_lifetime; 1465 return (limit); 1466} 1467 1468static struct dyn_data * 1469dyn_alloc_dyndata(void *parent, uint32_t ruleid, uint16_t rulenum, 1470 uint8_t set, const struct ipfw_flow_id *pkt, const void *ulp, int pktlen, 1471 uint32_t hashval, uint16_t fibnum) 1472{ 1473 struct dyn_data *data; 1474 1475 data = uma_zalloc(V_dyn_data_zone, M_NOWAIT | M_ZERO); 1476 if (data == NULL) { 1477 if (last_log != time_uptime) { 1478 last_log = time_uptime; 1479 log(LOG_DEBUG, 1480 "ipfw: Cannot allocate dynamic state, " 1481 "consider increasing net.inet.ip.fw.dyn_max\n"); 1482 } 1483 return (NULL); 1484 } 1485 1486 data->parent = parent; 1487 data->ruleid = ruleid; 1488 data->rulenum = rulenum; 1489 data->set = set; 1490 data->fibnum = fibnum; 1491 data->hashval = hashval; 1492 data->expire = time_uptime + V_dyn_syn_lifetime; 1493 dyn_update_proto_state(data, pkt, ulp, pktlen, MATCH_FORWARD); 1494 return (data); 1495} 1496 1497static struct dyn_ipv4_state * 1498dyn_alloc_ipv4_state(const struct ipfw_flow_id *pkt, uint16_t kidx, 1499 uint8_t type) 1500{ 1501 struct dyn_ipv4_state *s; 1502 1503 s = uma_zalloc(V_dyn_ipv4_zone, M_NOWAIT | M_ZERO); 1504 if (s == NULL) 1505 return (NULL); 1506 1507 s->type = type; 1508 s->kidx = kidx; 1509 s->proto = pkt->proto; 1510 s->sport = pkt->src_port; 1511 s->dport = pkt->dst_port; 1512 s->src = pkt->src_ip; 1513 s->dst = pkt->dst_ip; 1514 return (s); 1515} 1516 1517/* 1518 * Add IPv4 parent state. 1519 * Returns pointer to parent state. When it is not NULL we are in 1520 * critical section and pointer protected by hazard pointer. 1521 * When some error occurs, it returns NULL and exit from critical section 1522 * is not needed. 1523 */ 1524static struct dyn_ipv4_state * 1525dyn_add_ipv4_parent(void *rule, uint32_t ruleid, uint16_t rulenum, 1526 uint8_t set, const struct ipfw_flow_id *pkt, uint32_t hashval, 1527 uint32_t version, uint16_t kidx) 1528{ 1529 struct dyn_ipv4_state *s; 1530 struct dyn_parent *limit; 1531 uint32_t bucket; 1532 1533 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1534 DYN_BUCKET_LOCK(bucket); 1535 if (version != DYN_BUCKET_VERSION(bucket, ipv4_parent_add)) { 1536 /* 1537 * Bucket version has been changed since last lookup, 1538 * do lookup again to be sure that state does not exist. 1539 */ 1540 s = dyn_lookup_ipv4_parent_locked(pkt, rule, ruleid, 1541 rulenum, bucket); 1542 if (s != NULL) { 1543 /* 1544 * Simultaneous thread has already created this 1545 * state. Just return it. 1546 */ 1547 DYNSTATE_CRITICAL_ENTER(); 1548 DYNSTATE_PROTECT(s); 1549 DYN_BUCKET_UNLOCK(bucket); 1550 return (s); 1551 } 1552 } 1553 1554 limit = dyn_alloc_parent(rule, ruleid, rulenum, set, hashval); 1555 if (limit == NULL) { 1556 DYN_BUCKET_UNLOCK(bucket); 1557 return (NULL); 1558 } 1559 1560 s = dyn_alloc_ipv4_state(pkt, kidx, O_LIMIT_PARENT); 1561 if (s == NULL) { 1562 DYN_BUCKET_UNLOCK(bucket); 1563 uma_zfree(V_dyn_parent_zone, limit); 1564 return (NULL); 1565 } 1566 1567 s->limit = limit; 1568 CK_SLIST_INSERT_HEAD(&V_dyn_ipv4_parent[bucket], s, entry); 1569 DYN_COUNT_INC(dyn_parent_count); 1570 DYN_BUCKET_VERSION_BUMP(bucket, ipv4_parent_add); 1571 DYNSTATE_CRITICAL_ENTER(); 1572 DYNSTATE_PROTECT(s); 1573 DYN_BUCKET_UNLOCK(bucket); 1574 return (s); 1575} 1576 1577static int 1578dyn_add_ipv4_state(void *parent, uint32_t ruleid, uint16_t rulenum, 1579 uint8_t set, const struct ipfw_flow_id *pkt, const void *ulp, int pktlen, 1580 uint32_t hashval, struct ipfw_dyn_info *info, uint16_t fibnum, 1581 uint16_t kidx, uint8_t type) 1582{ 1583 struct dyn_ipv4_state *s; 1584 void *data; 1585 uint32_t bucket; 1586 1587 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1588 DYN_BUCKET_LOCK(bucket); 1589 if (info->direction == MATCH_UNKNOWN || 1590 info->kidx != kidx || 1591 info->hashval != hashval || 1592 info->version != DYN_BUCKET_VERSION(bucket, ipv4_add)) { 1593 /* 1594 * Bucket version has been changed since last lookup, 1595 * do lookup again to be sure that state does not exist. 1596 */ 1597 if (dyn_lookup_ipv4_state_locked(pkt, ulp, pktlen, parent, 1598 ruleid, rulenum, bucket, kidx) != 0) { 1599 DYN_BUCKET_UNLOCK(bucket); 1600 return (EEXIST); 1601 } 1602 } 1603 1604 data = dyn_alloc_dyndata(parent, ruleid, rulenum, set, pkt, ulp, 1605 pktlen, hashval, fibnum); 1606 if (data == NULL) { 1607 DYN_BUCKET_UNLOCK(bucket); 1608 return (ENOMEM); 1609 } 1610 1611 s = dyn_alloc_ipv4_state(pkt, kidx, type); 1612 if (s == NULL) { 1613 DYN_BUCKET_UNLOCK(bucket); 1614 uma_zfree(V_dyn_data_zone, data); 1615 return (ENOMEM); 1616 } 1617 1618 s->data = data; 1619 CK_SLIST_INSERT_HEAD(&V_dyn_ipv4[bucket], s, entry); 1620 DYN_COUNT_INC(dyn_count); 1621 DYN_BUCKET_VERSION_BUMP(bucket, ipv4_add); 1622 DYN_BUCKET_UNLOCK(bucket); 1623 return (0); 1624} 1625 1626#ifdef INET6 1627static struct dyn_ipv6_state * 1628dyn_alloc_ipv6_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1629 uint16_t kidx, uint8_t type) 1630{ 1631 struct dyn_ipv6_state *s; 1632 1633 s = uma_zalloc(V_dyn_ipv6_zone, M_NOWAIT | M_ZERO); 1634 if (s == NULL) 1635 return (NULL); 1636 1637 s->type = type; 1638 s->kidx = kidx; 1639 s->zoneid = zoneid; 1640 s->proto = pkt->proto; 1641 s->sport = pkt->src_port; 1642 s->dport = pkt->dst_port; 1643 s->src = pkt->src_ip6; 1644 s->dst = pkt->dst_ip6; 1645 return (s); 1646} 1647 1648/* 1649 * Add IPv6 parent state. 1650 * Returns pointer to parent state. When it is not NULL we are in 1651 * critical section and pointer protected by hazard pointer. 1652 * When some error occurs, it return NULL and exit from critical section 1653 * is not needed. 1654 */ 1655static struct dyn_ipv6_state * 1656dyn_add_ipv6_parent(void *rule, uint32_t ruleid, uint16_t rulenum, 1657 uint8_t set, const struct ipfw_flow_id *pkt, uint32_t zoneid, 1658 uint32_t hashval, uint32_t version, uint16_t kidx) 1659{ 1660 struct dyn_ipv6_state *s; 1661 struct dyn_parent *limit; 1662 uint32_t bucket; 1663 1664 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1665 DYN_BUCKET_LOCK(bucket); 1666 if (version != DYN_BUCKET_VERSION(bucket, ipv6_parent_add)) { 1667 /* 1668 * Bucket version has been changed since last lookup, 1669 * do lookup again to be sure that state does not exist. 1670 */ 1671 s = dyn_lookup_ipv6_parent_locked(pkt, zoneid, rule, ruleid, 1672 rulenum, bucket); 1673 if (s != NULL) { 1674 /* 1675 * Simultaneous thread has already created this 1676 * state. Just return it. 1677 */ 1678 DYNSTATE_CRITICAL_ENTER(); 1679 DYNSTATE_PROTECT(s); 1680 DYN_BUCKET_UNLOCK(bucket); 1681 return (s); 1682 } 1683 } 1684 1685 limit = dyn_alloc_parent(rule, ruleid, rulenum, set, hashval); 1686 if (limit == NULL) { 1687 DYN_BUCKET_UNLOCK(bucket); 1688 return (NULL); 1689 } 1690 1691 s = dyn_alloc_ipv6_state(pkt, zoneid, kidx, O_LIMIT_PARENT); 1692 if (s == NULL) { 1693 DYN_BUCKET_UNLOCK(bucket); 1694 uma_zfree(V_dyn_parent_zone, limit); 1695 return (NULL); 1696 } 1697 1698 s->limit = limit; 1699 CK_SLIST_INSERT_HEAD(&V_dyn_ipv6_parent[bucket], s, entry); 1700 DYN_COUNT_INC(dyn_parent_count); 1701 DYN_BUCKET_VERSION_BUMP(bucket, ipv6_parent_add); 1702 DYNSTATE_CRITICAL_ENTER(); 1703 DYNSTATE_PROTECT(s); 1704 DYN_BUCKET_UNLOCK(bucket); 1705 return (s); 1706} 1707 1708static int 1709dyn_add_ipv6_state(void *parent, uint32_t ruleid, uint16_t rulenum, 1710 uint8_t set, const struct ipfw_flow_id *pkt, uint32_t zoneid, 1711 const void *ulp, int pktlen, uint32_t hashval, struct ipfw_dyn_info *info, 1712 uint16_t fibnum, uint16_t kidx, uint8_t type) 1713{ 1714 struct dyn_ipv6_state *s; 1715 struct dyn_data *data; 1716 uint32_t bucket; 1717 1718 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1719 DYN_BUCKET_LOCK(bucket); 1720 if (info->direction == MATCH_UNKNOWN || 1721 info->kidx != kidx || 1722 info->hashval != hashval || 1723 info->version != DYN_BUCKET_VERSION(bucket, ipv6_add)) { 1724 /* 1725 * Bucket version has been changed since last lookup, 1726 * do lookup again to be sure that state does not exist. 1727 */ 1728 if (dyn_lookup_ipv6_state_locked(pkt, zoneid, ulp, pktlen, 1729 parent, ruleid, rulenum, bucket, kidx) != 0) { 1730 DYN_BUCKET_UNLOCK(bucket); 1731 return (EEXIST); 1732 } 1733 } 1734 1735 data = dyn_alloc_dyndata(parent, ruleid, rulenum, set, pkt, ulp, 1736 pktlen, hashval, fibnum); 1737 if (data == NULL) { 1738 DYN_BUCKET_UNLOCK(bucket); 1739 return (ENOMEM); 1740 } 1741 1742 s = dyn_alloc_ipv6_state(pkt, zoneid, kidx, type); 1743 if (s == NULL) { 1744 DYN_BUCKET_UNLOCK(bucket); 1745 uma_zfree(V_dyn_data_zone, data); 1746 return (ENOMEM); 1747 } 1748 1749 s->data = data; 1750 CK_SLIST_INSERT_HEAD(&V_dyn_ipv6[bucket], s, entry); 1751 DYN_COUNT_INC(dyn_count); 1752 DYN_BUCKET_VERSION_BUMP(bucket, ipv6_add); 1753 DYN_BUCKET_UNLOCK(bucket); 1754 return (0); 1755} 1756#endif /* INET6 */ 1757 1758static void * 1759dyn_get_parent_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1760 struct ip_fw *rule, uint32_t hashval, uint32_t limit, uint16_t kidx) 1761{ 1762 char sbuf[24]; 1763 struct dyn_parent *p; 1764 void *ret; 1765 uint32_t bucket, version; 1766 1767 p = NULL; 1768 ret = NULL; 1769 bucket = DYN_BUCKET(hashval, V_curr_dyn_buckets); 1770 DYNSTATE_CRITICAL_ENTER(); 1771 if (IS_IP4_FLOW_ID(pkt)) { 1772 struct dyn_ipv4_state *s; 1773 1774 version = DYN_BUCKET_VERSION(bucket, ipv4_parent_add); 1775 s = dyn_lookup_ipv4_parent(pkt, rule, rule->id, 1776 rule->rulenum, bucket); 1777 if (s == NULL) { 1778 /* 1779 * Exit from critical section because dyn_add_parent() 1780 * will acquire bucket lock. 1781 */ 1782 DYNSTATE_CRITICAL_EXIT(); 1783 1784 s = dyn_add_ipv4_parent(rule, rule->id, 1785 rule->rulenum, rule->set, pkt, hashval, 1786 version, kidx); 1787 if (s == NULL) 1788 return (NULL); 1789 /* Now we are in critical section again. */ 1790 } 1791 ret = s; 1792 p = s->limit; 1793 } 1794#ifdef INET6 1795 else if (IS_IP6_FLOW_ID(pkt)) { 1796 struct dyn_ipv6_state *s; 1797 1798 version = DYN_BUCKET_VERSION(bucket, ipv6_parent_add); 1799 s = dyn_lookup_ipv6_parent(pkt, zoneid, rule, rule->id, 1800 rule->rulenum, bucket); 1801 if (s == NULL) { 1802 /* 1803 * Exit from critical section because dyn_add_parent() 1804 * can acquire bucket mutex. 1805 */ 1806 DYNSTATE_CRITICAL_EXIT(); 1807 1808 s = dyn_add_ipv6_parent(rule, rule->id, 1809 rule->rulenum, rule->set, pkt, zoneid, hashval, 1810 version, kidx); 1811 if (s == NULL) 1812 return (NULL); 1813 /* Now we are in critical section again. */ 1814 } 1815 ret = s; 1816 p = s->limit; 1817 } 1818#endif 1819 else { 1820 DYNSTATE_CRITICAL_EXIT(); 1821 return (NULL); 1822 } 1823 1824 /* Check the limit */ 1825 if (DPARENT_COUNT(p) >= limit) { 1826 DYNSTATE_CRITICAL_EXIT(); 1827 if (V_fw_verbose && last_log != time_uptime) { 1828 last_log = time_uptime; 1829 snprintf(sbuf, sizeof(sbuf), "%u drop session", 1830 rule->rulenum); 1831 print_dyn_rule_flags(pkt, O_LIMIT, 1832 LOG_SECURITY | LOG_DEBUG, sbuf, 1833 "too many entries"); 1834 } 1835 return (NULL); 1836 } 1837 1838 /* Take new session into account. */ 1839 DPARENT_COUNT_INC(p); 1840 /* 1841 * We must exit from critical section because the following code 1842 * can acquire bucket mutex. 1843 * We rely on the the 'count' field. The state will not expire 1844 * until it has some child states, i.e. 'count' field is not zero. 1845 * Return state pointer, it will be used by child states as parent. 1846 */ 1847 DYNSTATE_CRITICAL_EXIT(); 1848 return (ret); 1849} 1850 1851static int 1852dyn_install_state(const struct ipfw_flow_id *pkt, uint32_t zoneid, 1853 uint16_t fibnum, const void *ulp, int pktlen, void *rule, 1854 uint32_t ruleid, uint16_t rulenum, uint8_t set, 1855 struct ipfw_dyn_info *info, uint32_t limit, uint16_t limit_mask, 1856 uint16_t kidx, uint8_t type) 1857{ 1858 struct ipfw_flow_id id; 1859 uint32_t hashval, parent_hashval; 1860 int ret; 1861 1862 MPASS(type == O_LIMIT || type == O_KEEP_STATE); 1863 1864 if (type == O_LIMIT) { 1865 /* Create masked flow id and calculate bucket */ 1866 id.addr_type = pkt->addr_type; 1867 id.proto = pkt->proto; 1868 id.fib = fibnum; /* unused */ 1869 id.src_port = (limit_mask & DYN_SRC_PORT) ? 1870 pkt->src_port: 0; 1871 id.dst_port = (limit_mask & DYN_DST_PORT) ? 1872 pkt->dst_port: 0; 1873 if (IS_IP4_FLOW_ID(pkt)) { 1874 id.src_ip = (limit_mask & DYN_SRC_ADDR) ? 1875 pkt->src_ip: 0; 1876 id.dst_ip = (limit_mask & DYN_DST_ADDR) ? 1877 pkt->dst_ip: 0; 1878 } 1879#ifdef INET6 1880 else if (IS_IP6_FLOW_ID(pkt)) { 1881 if (limit_mask & DYN_SRC_ADDR) 1882 id.src_ip6 = pkt->src_ip6; 1883 else 1884 memset(&id.src_ip6, 0, sizeof(id.src_ip6)); 1885 if (limit_mask & DYN_DST_ADDR) 1886 id.dst_ip6 = pkt->dst_ip6; 1887 else 1888 memset(&id.dst_ip6, 0, sizeof(id.dst_ip6)); 1889 } 1890#endif 1891 else 1892 return (EAFNOSUPPORT); 1893 1894 parent_hashval = hash_parent(&id, rule); 1895 rule = dyn_get_parent_state(&id, zoneid, rule, parent_hashval, 1896 limit, kidx); 1897 if (rule == NULL) { 1898#if 0 1899 if (V_fw_verbose && last_log != time_uptime) { 1900 last_log = time_uptime; 1901 snprintf(sbuf, sizeof(sbuf), 1902 "%u drop session", rule->rulenum); 1903 print_dyn_rule_flags(pkt, O_LIMIT, 1904 LOG_SECURITY | LOG_DEBUG, sbuf, 1905 "too many entries"); 1906 } 1907#endif 1908 return (EACCES); 1909 } 1910 /* 1911 * Limit is not reached, create new state. 1912 * Now rule points to parent state. 1913 */ 1914 } 1915 1916 hashval = hash_packet(pkt); 1917 if (IS_IP4_FLOW_ID(pkt)) 1918 ret = dyn_add_ipv4_state(rule, ruleid, rulenum, set, pkt, 1919 ulp, pktlen, hashval, info, fibnum, kidx, type); 1920#ifdef INET6 1921 else if (IS_IP6_FLOW_ID(pkt)) 1922 ret = dyn_add_ipv6_state(rule, ruleid, rulenum, set, pkt, 1923 zoneid, ulp, pktlen, hashval, info, fibnum, kidx, type); 1924#endif /* INET6 */ 1925 else 1926 ret = EAFNOSUPPORT; 1927 1928 if (type == O_LIMIT) { 1929 if (ret != 0) { 1930 /* 1931 * We failed to create child state for O_LIMIT 1932 * opcode. Since we already counted it in the parent, 1933 * we must revert counter back. The 'rule' points to 1934 * parent state, use it to get dyn_parent. 1935 * 1936 * XXXAE: it should be safe to use 'rule' pointer 1937 * without extra lookup, parent state is referenced 1938 * and should not be freed. 1939 */ 1940 if (IS_IP4_FLOW_ID(&id)) 1941 DPARENT_COUNT_DEC( 1942 ((struct dyn_ipv4_state *)rule)->limit); 1943#ifdef INET6 1944 else if (IS_IP6_FLOW_ID(&id)) 1945 DPARENT_COUNT_DEC( 1946 ((struct dyn_ipv6_state *)rule)->limit); 1947#endif 1948 } 1949 } 1950 /* 1951 * EEXIST means that simultaneous thread has created this 1952 * state. Consider this as success. 1953 * 1954 * XXXAE: should we invalidate 'info' content here? 1955 */ 1956 if (ret == EEXIST) 1957 return (0); 1958 return (ret); 1959} 1960 1961/* 1962 * Install dynamic state. 1963 * chain - ipfw's instance; 1964 * rule - the parent rule that installs the state; 1965 * cmd - opcode that installs the state; 1966 * args - ipfw arguments; 1967 * ulp - upper level protocol header; 1968 * pktlen - packet length; 1969 * info - dynamic state lookup info; 1970 * tablearg - tablearg id. 1971 * 1972 * Returns non-zero value (failure) if state is not installed because 1973 * of errors or because session limitations are enforced. 1974 */ 1975int 1976ipfw_dyn_install_state(struct ip_fw_chain *chain, struct ip_fw *rule, 1977 const ipfw_insn_limit *cmd, const struct ip_fw_args *args, 1978 const void *ulp, int pktlen, struct ipfw_dyn_info *info, 1979 uint32_t tablearg) 1980{ 1981 uint32_t limit; 1982 uint16_t limit_mask; 1983 1984 if (cmd->o.opcode == O_LIMIT) { 1985 limit = IP_FW_ARG_TABLEARG(chain, cmd->conn_limit, limit); 1986 limit_mask = cmd->limit_mask; 1987 } else { 1988 limit = 0; 1989 limit_mask = 0; 1990 } 1991 return (dyn_install_state(&args->f_id, 1992#ifdef INET6 1993 IS_IP6_FLOW_ID(&args->f_id) ? dyn_getscopeid(args): 1994#endif 1995 0, M_GETFIB(args->m), ulp, pktlen, rule, rule->id, rule->rulenum, 1996 rule->set, info, limit, limit_mask, cmd->o.arg1, cmd->o.opcode)); 1997} 1998 1999/* 2000 * Free safe to remove state entries from expired lists. 2001 */ 2002static void 2003dyn_free_states(struct ip_fw_chain *chain) 2004{ 2005 struct dyn_ipv4_state *s4, *s4n; 2006#ifdef INET6 2007 struct dyn_ipv6_state *s6, *s6n; 2008#endif 2009 int cached_count, i; 2010 2011 /* 2012 * We keep pointers to objects that are in use on each CPU 2013 * in the per-cpu dyn_hp pointer. When object is going to be 2014 * removed, first of it is unlinked from the corresponding 2015 * list. This leads to changing of dyn_bucket_xxx_delver version. 2016 * Unlinked objects is placed into corresponding dyn_expired_xxx 2017 * list. Reader that is going to dereference object pointer checks 2018 * dyn_bucket_xxx_delver version before and after storing pointer 2019 * into dyn_hp. If version is the same, the object is protected 2020 * from freeing and it is safe to dereference. Othervise reader 2021 * tries to iterate list again from the beginning, but this object 2022 * now unlinked and thus will not be accessible. 2023 * 2024 * Copy dyn_hp pointers for each CPU into dyn_hp_cache array. 2025 * It does not matter that some pointer can be changed in 2026 * time while we are copying. We need to check, that objects 2027 * removed in the previous pass are not in use. And if dyn_hp 2028 * pointer does not contain it in the time when we are copying, 2029 * it will not appear there, because it is already unlinked. 2030 * And for new pointers we will not free objects that will be 2031 * unlinked in this pass. 2032 */ 2033 cached_count = 0; 2034 CPU_FOREACH(i) { 2035 dyn_hp_cache[cached_count] = DYNSTATE_GET(i); 2036 if (dyn_hp_cache[cached_count] != NULL) 2037 cached_count++; 2038 } 2039 2040 /* 2041 * Free expired states that are safe to free. 2042 * Check each entry from previous pass in the dyn_expired_xxx 2043 * list, if pointer to the object is in the dyn_hp_cache array, 2044 * keep it until next pass. Otherwise it is safe to free the 2045 * object. 2046 * 2047 * XXXAE: optimize this to use SLIST_REMOVE_AFTER. 2048 */ 2049#define DYN_FREE_STATES(s, next, name) do { \ 2050 s = SLIST_FIRST(&V_dyn_expired_ ## name); \ 2051 while (s != NULL) { \ 2052 next = SLIST_NEXT(s, expired); \ 2053 for (i = 0; i < cached_count; i++) \ 2054 if (dyn_hp_cache[i] == s) \ 2055 break; \ 2056 if (i == cached_count) { \ 2057 if (s->type == O_LIMIT_PARENT && \ 2058 s->limit->count != 0) { \ 2059 s = next; \ 2060 continue; \ 2061 } \ 2062 SLIST_REMOVE(&V_dyn_expired_ ## name, \ 2063 s, dyn_ ## name ## _state, expired); \ 2064 if (s->type == O_LIMIT_PARENT) \ 2065 uma_zfree(V_dyn_parent_zone, s->limit); \ 2066 else \ 2067 uma_zfree(V_dyn_data_zone, s->data); \ 2068 uma_zfree(V_dyn_ ## name ## _zone, s); \ 2069 } \ 2070 s = next; \ 2071 } \ 2072} while (0) 2073 2074 /* 2075 * Protect access to expired lists with DYN_EXPIRED_LOCK. 2076 * Userland can invoke ipfw_expire_dyn_states() to delete 2077 * specific states, this will lead to modification of expired 2078 * lists. 2079 * 2080 * XXXAE: do we need DYN_EXPIRED_LOCK? We can just use 2081 * IPFW_UH_WLOCK to protect access to these lists. 2082 */ 2083 DYN_EXPIRED_LOCK(); 2084 DYN_FREE_STATES(s4, s4n, ipv4); 2085#ifdef INET6 2086 DYN_FREE_STATES(s6, s6n, ipv6); 2087#endif 2088 DYN_EXPIRED_UNLOCK(); 2089#undef DYN_FREE_STATES 2090} 2091 2092/* 2093 * Returns 1 when state is matched by specified range, otherwise returns 0. 2094 */ 2095static int 2096dyn_match_range(uint16_t rulenum, uint8_t set, const ipfw_range_tlv *rt) 2097{ 2098 2099 MPASS(rt != NULL); 2100 /* flush all states */ 2101 if (rt->flags & IPFW_RCFLAG_ALL) 2102 return (1); 2103 if ((rt->flags & IPFW_RCFLAG_SET) != 0 && set != rt->set) 2104 return (0); 2105 if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 && 2106 (rulenum < rt->start_rule || rulenum > rt->end_rule)) 2107 return (0); 2108 return (1); 2109} 2110 2111static int 2112dyn_match_ipv4_state(struct dyn_ipv4_state *s, const ipfw_range_tlv *rt) 2113{ 2114 2115 if (s->type == O_LIMIT_PARENT) 2116 return (dyn_match_range(s->limit->rulenum, 2117 s->limit->set, rt)); 2118 2119 if (s->type == O_LIMIT) 2120 return (dyn_match_range(s->data->rulenum, s->data->set, rt)); 2121 2122 if (dyn_match_range(s->data->rulenum, s->data->set, rt)) 2123 return (1); 2124 2125 return (0); 2126} 2127 2128#ifdef INET6 2129static int 2130dyn_match_ipv6_state(struct dyn_ipv6_state *s, const ipfw_range_tlv *rt) 2131{ 2132 2133 if (s->type == O_LIMIT_PARENT) 2134 return (dyn_match_range(s->limit->rulenum, 2135 s->limit->set, rt)); 2136 2137 if (s->type == O_LIMIT) 2138 return (dyn_match_range(s->data->rulenum, s->data->set, rt)); 2139 2140 if (dyn_match_range(s->data->rulenum, s->data->set, rt)) 2141 return (1); 2142 2143 return (0); 2144} 2145#endif 2146 2147/* 2148 * Unlink expired entries from states lists. 2149 * @rt can be used to specify the range of states for deletion. 2150 */ 2151static void 2152dyn_expire_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt) 2153{ 2154 struct dyn_ipv4_slist expired_ipv4; 2155#ifdef INET6 2156 struct dyn_ipv6_slist expired_ipv6; 2157 struct dyn_ipv6_state *s6, *s6n, *s6p; 2158#endif 2159 struct dyn_ipv4_state *s4, *s4n, *s4p; 2160 int bucket, removed, length, max_length; 2161 2162 /* 2163 * Unlink expired states from each bucket. 2164 * With acquired bucket lock iterate entries of each lists: 2165 * ipv4, ipv4_parent, ipv6, and ipv6_parent. Check expired time 2166 * and unlink entry from the list, link entry into temporary 2167 * expired_xxx lists then bump "del" bucket version. 2168 * 2169 * When an entry is removed, corresponding states counter is 2170 * decremented. If entry has O_LIMIT type, parent's reference 2171 * counter is decremented. 2172 * 2173 * NOTE: this function can be called from userspace context 2174 * when user deletes rules. In this case all matched states 2175 * will be forcedly unlinked. O_LIMIT_PARENT states will be kept 2176 * in the expired lists until reference counter become zero. 2177 */ 2178#define DYN_UNLINK_STATES(s, prev, next, exp, af, name, extra) do { \ 2179 length = 0; \ 2180 removed = 0; \ 2181 prev = NULL; \ 2182 s = CK_SLIST_FIRST(&V_dyn_ ## name [bucket]); \ 2183 while (s != NULL) { \ 2184 next = CK_SLIST_NEXT(s, entry); \ 2185 if ((TIME_LEQ((s)->exp, time_uptime) && extra) || \ 2186 (rt != NULL && dyn_match_ ## af ## _state(s, rt))) {\ 2187 if (prev != NULL) \ 2188 CK_SLIST_REMOVE_AFTER(prev, entry); \ 2189 else \ 2190 CK_SLIST_REMOVE_HEAD( \ 2191 &V_dyn_ ## name [bucket], entry); \ 2192 removed++; \ 2193 SLIST_INSERT_HEAD(&expired_ ## af, s, expired); \ 2194 if (s->type == O_LIMIT_PARENT) \ 2195 DYN_COUNT_DEC(dyn_parent_count); \ 2196 else { \ 2197 DYN_COUNT_DEC(dyn_count); \ 2198 if (s->type == O_LIMIT) { \ 2199 s = s->data->parent; \ 2200 DPARENT_COUNT_DEC(s->limit); \ 2201 } \ 2202 } \ 2203 } else { \ 2204 prev = s; \ 2205 length++; \ 2206 } \ 2207 s = next; \ 2208 } \ 2209 if (removed != 0) \ 2210 DYN_BUCKET_VERSION_BUMP(bucket, name ## _del); \ 2211 if (length > max_length) \ 2212 max_length = length; \ 2213} while (0) 2214 2215 SLIST_INIT(&expired_ipv4); 2216#ifdef INET6 2217 SLIST_INIT(&expired_ipv6); 2218#endif 2219 max_length = 0; 2220 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2221 DYN_BUCKET_LOCK(bucket); 2222 DYN_UNLINK_STATES(s4, s4p, s4n, data->expire, ipv4, ipv4, 1); 2223 DYN_UNLINK_STATES(s4, s4p, s4n, limit->expire, ipv4, 2224 ipv4_parent, (s4->limit->count == 0)); 2225#ifdef INET6 2226 DYN_UNLINK_STATES(s6, s6p, s6n, data->expire, ipv6, ipv6, 1); 2227 DYN_UNLINK_STATES(s6, s6p, s6n, limit->expire, ipv6, 2228 ipv6_parent, (s6->limit->count == 0)); 2229#endif 2230 DYN_BUCKET_UNLOCK(bucket); 2231 } 2232 /* Update curr_max_length for statistics. */ 2233 V_curr_max_length = max_length; 2234 /* 2235 * Concatenate temporary lists with global expired lists. 2236 */ 2237 DYN_EXPIRED_LOCK(); 2238 SLIST_CONCAT(&V_dyn_expired_ipv4, &expired_ipv4, 2239 dyn_ipv4_state, expired); 2240#ifdef INET6 2241 SLIST_CONCAT(&V_dyn_expired_ipv6, &expired_ipv6, 2242 dyn_ipv6_state, expired); 2243#endif 2244 DYN_EXPIRED_UNLOCK(); 2245#undef DYN_UNLINK_STATES 2246#undef DYN_UNREF_STATES 2247} 2248 2249static struct mbuf * 2250dyn_mgethdr(int len, uint16_t fibnum) 2251{ 2252 struct mbuf *m; 2253 2254 m = m_gethdr(M_NOWAIT, MT_DATA); 2255 if (m == NULL) 2256 return (NULL); 2257#ifdef MAC 2258 mac_netinet_firewall_send(m); 2259#endif 2260 M_SETFIB(m, fibnum); 2261 m->m_data += max_linkhdr; 2262 m->m_flags |= M_SKIP_FIREWALL; 2263 m->m_len = m->m_pkthdr.len = len; 2264 bzero(m->m_data, len); 2265 return (m); 2266} 2267 2268static void 2269dyn_make_keepalive_ipv4(struct mbuf *m, in_addr_t src, in_addr_t dst, 2270 uint32_t seq, uint32_t ack, uint16_t sport, uint16_t dport) 2271{ 2272 struct tcphdr *tcp; 2273 struct ip *ip; 2274 2275 ip = mtod(m, struct ip *); 2276 ip->ip_v = 4; 2277 ip->ip_hl = sizeof(*ip) >> 2; 2278 ip->ip_tos = IPTOS_LOWDELAY; 2279 ip->ip_len = htons(m->m_len); 2280 ip->ip_off |= htons(IP_DF); 2281 ip->ip_ttl = V_ip_defttl; 2282 ip->ip_p = IPPROTO_TCP; 2283 ip->ip_src.s_addr = htonl(src); 2284 ip->ip_dst.s_addr = htonl(dst); 2285 2286 tcp = mtodo(m, sizeof(struct ip)); 2287 tcp->th_sport = htons(sport); 2288 tcp->th_dport = htons(dport); 2289 tcp->th_off = sizeof(struct tcphdr) >> 2; 2290 tcp->th_seq = htonl(seq); 2291 tcp->th_ack = htonl(ack); 2292 tcp->th_flags = TH_ACK; 2293 tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2294 htons(sizeof(struct tcphdr) + IPPROTO_TCP)); 2295 2296 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 2297 m->m_pkthdr.csum_flags = CSUM_TCP; 2298} 2299 2300static void 2301dyn_enqueue_keepalive_ipv4(struct mbufq *q, const struct dyn_ipv4_state *s) 2302{ 2303 struct mbuf *m; 2304 2305 if ((s->data->state & ACK_FWD) == 0 && s->data->ack_fwd > 0) { 2306 m = dyn_mgethdr(sizeof(struct ip) + sizeof(struct tcphdr), 2307 s->data->fibnum); 2308 if (m != NULL) { 2309 dyn_make_keepalive_ipv4(m, s->dst, s->src, 2310 s->data->ack_fwd - 1, s->data->ack_rev, 2311 s->dport, s->sport); 2312 if (mbufq_enqueue(q, m)) { 2313 m_freem(m); 2314 log(LOG_DEBUG, "ipfw: limit for IPv4 " 2315 "keepalive queue is reached.\n"); 2316 return; 2317 } 2318 } 2319 } 2320 2321 if ((s->data->state & ACK_REV) == 0 && s->data->ack_rev > 0) { 2322 m = dyn_mgethdr(sizeof(struct ip) + sizeof(struct tcphdr), 2323 s->data->fibnum); 2324 if (m != NULL) { 2325 dyn_make_keepalive_ipv4(m, s->src, s->dst, 2326 s->data->ack_rev - 1, s->data->ack_fwd, 2327 s->sport, s->dport); 2328 if (mbufq_enqueue(q, m)) { 2329 m_freem(m); 2330 log(LOG_DEBUG, "ipfw: limit for IPv4 " 2331 "keepalive queue is reached.\n"); 2332 return; 2333 } 2334 } 2335 } 2336} 2337 2338/* 2339 * Prepare and send keep-alive packets. 2340 */ 2341static void 2342dyn_send_keepalive_ipv4(struct ip_fw_chain *chain) 2343{ 2344 struct mbufq q; 2345 struct mbuf *m; 2346 struct dyn_ipv4_state *s; 2347 uint32_t bucket; 2348 2349 mbufq_init(&q, DYN_KEEPALIVE_MAXQ); 2350 IPFW_UH_RLOCK(chain); 2351 /* 2352 * It is safe to not use hazard pointer and just do lockless 2353 * access to the lists, because states entries can not be deleted 2354 * while we hold IPFW_UH_RLOCK. 2355 */ 2356 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2357 CK_SLIST_FOREACH(s, &V_dyn_ipv4[bucket], entry) { 2358 /* 2359 * Only established TCP connections that will 2360 * become expired withing dyn_keepalive_interval. 2361 */ 2362 if (s->proto != IPPROTO_TCP || 2363 (s->data->state & BOTH_SYN) != BOTH_SYN || 2364 TIME_LEQ(time_uptime + V_dyn_keepalive_interval, 2365 s->data->expire)) 2366 continue; 2367 dyn_enqueue_keepalive_ipv4(&q, s); 2368 } 2369 } 2370 IPFW_UH_RUNLOCK(chain); 2371 while ((m = mbufq_dequeue(&q)) != NULL) 2372 ip_output(m, NULL, NULL, 0, NULL, NULL); 2373} 2374 2375#ifdef INET6 2376static void 2377dyn_make_keepalive_ipv6(struct mbuf *m, const struct in6_addr *src, 2378 const struct in6_addr *dst, uint32_t zoneid, uint32_t seq, uint32_t ack, 2379 uint16_t sport, uint16_t dport) 2380{ 2381 struct tcphdr *tcp; 2382 struct ip6_hdr *ip6; 2383 2384 ip6 = mtod(m, struct ip6_hdr *); 2385 ip6->ip6_vfc |= IPV6_VERSION; 2386 ip6->ip6_plen = htons(sizeof(struct tcphdr)); 2387 ip6->ip6_nxt = IPPROTO_TCP; 2388 ip6->ip6_hlim = IPV6_DEFHLIM; 2389 ip6->ip6_src = *src; 2390 if (IN6_IS_ADDR_LINKLOCAL(src)) 2391 ip6->ip6_src.s6_addr16[1] = htons(zoneid & 0xffff); 2392 ip6->ip6_dst = *dst; 2393 if (IN6_IS_ADDR_LINKLOCAL(dst)) 2394 ip6->ip6_dst.s6_addr16[1] = htons(zoneid & 0xffff); 2395 2396 tcp = mtodo(m, sizeof(struct ip6_hdr)); 2397 tcp->th_sport = htons(sport); 2398 tcp->th_dport = htons(dport); 2399 tcp->th_off = sizeof(struct tcphdr) >> 2; 2400 tcp->th_seq = htonl(seq); 2401 tcp->th_ack = htonl(ack); 2402 tcp->th_flags = TH_ACK; 2403 tcp->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr), 2404 IPPROTO_TCP, 0); 2405 2406 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 2407 m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; 2408} 2409 2410static void 2411dyn_enqueue_keepalive_ipv6(struct mbufq *q, const struct dyn_ipv6_state *s) 2412{ 2413 struct mbuf *m; 2414 2415 if ((s->data->state & ACK_FWD) == 0 && s->data->ack_fwd > 0) { 2416 m = dyn_mgethdr(sizeof(struct ip6_hdr) + 2417 sizeof(struct tcphdr), s->data->fibnum); 2418 if (m != NULL) { 2419 dyn_make_keepalive_ipv6(m, &s->dst, &s->src, 2420 s->zoneid, s->data->ack_fwd - 1, s->data->ack_rev, 2421 s->dport, s->sport); 2422 if (mbufq_enqueue(q, m)) { 2423 m_freem(m); 2424 log(LOG_DEBUG, "ipfw: limit for IPv6 " 2425 "keepalive queue is reached.\n"); 2426 return; 2427 } 2428 } 2429 } 2430 2431 if ((s->data->state & ACK_REV) == 0 && s->data->ack_rev > 0) { 2432 m = dyn_mgethdr(sizeof(struct ip6_hdr) + 2433 sizeof(struct tcphdr), s->data->fibnum); 2434 if (m != NULL) { 2435 dyn_make_keepalive_ipv6(m, &s->src, &s->dst, 2436 s->zoneid, s->data->ack_rev - 1, s->data->ack_fwd, 2437 s->sport, s->dport); 2438 if (mbufq_enqueue(q, m)) { 2439 m_freem(m); 2440 log(LOG_DEBUG, "ipfw: limit for IPv6 " 2441 "keepalive queue is reached.\n"); 2442 return; 2443 } 2444 } 2445 } 2446} 2447 2448static void 2449dyn_send_keepalive_ipv6(struct ip_fw_chain *chain) 2450{ 2451 struct mbufq q; 2452 struct mbuf *m; 2453 struct dyn_ipv6_state *s; 2454 uint32_t bucket; 2455 2456 mbufq_init(&q, DYN_KEEPALIVE_MAXQ); 2457 IPFW_UH_RLOCK(chain); 2458 /* 2459 * It is safe to not use hazard pointer and just do lockless 2460 * access to the lists, because states entries can not be deleted 2461 * while we hold IPFW_UH_RLOCK. 2462 */ 2463 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2464 CK_SLIST_FOREACH(s, &V_dyn_ipv6[bucket], entry) { 2465 /* 2466 * Only established TCP connections that will 2467 * become expired withing dyn_keepalive_interval. 2468 */ 2469 if (s->proto != IPPROTO_TCP || 2470 (s->data->state & BOTH_SYN) != BOTH_SYN || 2471 TIME_LEQ(time_uptime + V_dyn_keepalive_interval, 2472 s->data->expire)) 2473 continue; 2474 dyn_enqueue_keepalive_ipv6(&q, s); 2475 } 2476 } 2477 IPFW_UH_RUNLOCK(chain); 2478 while ((m = mbufq_dequeue(&q)) != NULL) 2479 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 2480} 2481#endif /* INET6 */ 2482 2483static void 2484dyn_grow_hashtable(struct ip_fw_chain *chain, uint32_t new) 2485{ 2486#ifdef INET6 2487 struct dyn_ipv6ck_slist *ipv6, *ipv6_parent; 2488 uint32_t *ipv6_add, *ipv6_del, *ipv6_parent_add, *ipv6_parent_del; 2489 struct dyn_ipv6_state *s6; 2490#endif 2491 struct dyn_ipv4ck_slist *ipv4, *ipv4_parent; 2492 uint32_t *ipv4_add, *ipv4_del, *ipv4_parent_add, *ipv4_parent_del; 2493 struct dyn_ipv4_state *s4; 2494 struct mtx *bucket_lock; 2495 void *tmp; 2496 uint32_t bucket; 2497 2498 MPASS(powerof2(new)); 2499 DYN_DEBUG("grow hash size %u -> %u", V_curr_dyn_buckets, new); 2500 /* 2501 * Allocate and initialize new lists. 2502 * XXXAE: on memory pressure this can disable callout timer. 2503 */ 2504 bucket_lock = malloc(new * sizeof(struct mtx), M_IPFW, 2505 M_WAITOK | M_ZERO); 2506 ipv4 = malloc(new * sizeof(struct dyn_ipv4ck_slist), M_IPFW, 2507 M_WAITOK | M_ZERO); 2508 ipv4_parent = malloc(new * sizeof(struct dyn_ipv4ck_slist), M_IPFW, 2509 M_WAITOK | M_ZERO); 2510 ipv4_add = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); 2511 ipv4_del = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); 2512 ipv4_parent_add = malloc(new * sizeof(uint32_t), M_IPFW, 2513 M_WAITOK | M_ZERO); 2514 ipv4_parent_del = malloc(new * sizeof(uint32_t), M_IPFW, 2515 M_WAITOK | M_ZERO); 2516#ifdef INET6 2517 ipv6 = malloc(new * sizeof(struct dyn_ipv6ck_slist), M_IPFW, 2518 M_WAITOK | M_ZERO); 2519 ipv6_parent = malloc(new * sizeof(struct dyn_ipv6ck_slist), M_IPFW, 2520 M_WAITOK | M_ZERO); 2521 ipv6_add = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); 2522 ipv6_del = malloc(new * sizeof(uint32_t), M_IPFW, M_WAITOK | M_ZERO); 2523 ipv6_parent_add = malloc(new * sizeof(uint32_t), M_IPFW, 2524 M_WAITOK | M_ZERO); 2525 ipv6_parent_del = malloc(new * sizeof(uint32_t), M_IPFW, 2526 M_WAITOK | M_ZERO); 2527#endif 2528 for (bucket = 0; bucket < new; bucket++) { 2529 DYN_BUCKET_LOCK_INIT(bucket_lock, bucket); 2530 CK_SLIST_INIT(&ipv4[bucket]); 2531 CK_SLIST_INIT(&ipv4_parent[bucket]); 2532#ifdef INET6 2533 CK_SLIST_INIT(&ipv6[bucket]); 2534 CK_SLIST_INIT(&ipv6_parent[bucket]); 2535#endif 2536 } 2537 2538#define DYN_RELINK_STATES(s, hval, i, head, ohead) do { \ 2539 while ((s = CK_SLIST_FIRST(&V_dyn_ ## ohead[i])) != NULL) { \ 2540 CK_SLIST_REMOVE_HEAD(&V_dyn_ ## ohead[i], entry); \ 2541 CK_SLIST_INSERT_HEAD(&head[DYN_BUCKET(s->hval, new)], \ 2542 s, entry); \ 2543 } \ 2544} while (0) 2545 /* 2546 * Prevent rules changing from userland. 2547 */ 2548 IPFW_UH_WLOCK(chain); 2549 /* 2550 * Hold traffic processing until we finish resize to 2551 * prevent access to states lists. 2552 */ 2553 IPFW_WLOCK(chain); 2554 /* Re-link all dynamic states */ 2555 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2556 DYN_RELINK_STATES(s4, data->hashval, bucket, ipv4, ipv4); 2557 DYN_RELINK_STATES(s4, limit->hashval, bucket, ipv4_parent, 2558 ipv4_parent); 2559#ifdef INET6 2560 DYN_RELINK_STATES(s6, data->hashval, bucket, ipv6, ipv6); 2561 DYN_RELINK_STATES(s6, limit->hashval, bucket, ipv6_parent, 2562 ipv6_parent); 2563#endif 2564 } 2565 2566#define DYN_SWAP_PTR(old, new, tmp) do { \ 2567 tmp = old; \ 2568 old = new; \ 2569 new = tmp; \ 2570} while (0) 2571 /* Swap pointers */ 2572 DYN_SWAP_PTR(V_dyn_bucket_lock, bucket_lock, tmp); 2573 DYN_SWAP_PTR(V_dyn_ipv4, ipv4, tmp); 2574 DYN_SWAP_PTR(V_dyn_ipv4_parent, ipv4_parent, tmp); 2575 DYN_SWAP_PTR(V_dyn_ipv4_add, ipv4_add, tmp); 2576 DYN_SWAP_PTR(V_dyn_ipv4_parent_add, ipv4_parent_add, tmp); 2577 DYN_SWAP_PTR(V_dyn_ipv4_del, ipv4_del, tmp); 2578 DYN_SWAP_PTR(V_dyn_ipv4_parent_del, ipv4_parent_del, tmp); 2579 2580#ifdef INET6 2581 DYN_SWAP_PTR(V_dyn_ipv6, ipv6, tmp); 2582 DYN_SWAP_PTR(V_dyn_ipv6_parent, ipv6_parent, tmp); 2583 DYN_SWAP_PTR(V_dyn_ipv6_add, ipv6_add, tmp); 2584 DYN_SWAP_PTR(V_dyn_ipv6_parent_add, ipv6_parent_add, tmp); 2585 DYN_SWAP_PTR(V_dyn_ipv6_del, ipv6_del, tmp); 2586 DYN_SWAP_PTR(V_dyn_ipv6_parent_del, ipv6_parent_del, tmp); 2587#endif 2588 bucket = V_curr_dyn_buckets; 2589 V_curr_dyn_buckets = new; 2590 2591 IPFW_WUNLOCK(chain); 2592 IPFW_UH_WUNLOCK(chain); 2593 2594 /* Release old resources */ 2595 while (bucket-- != 0) 2596 DYN_BUCKET_LOCK_DESTROY(bucket_lock, bucket); 2597 free(bucket_lock, M_IPFW); 2598 free(ipv4, M_IPFW); 2599 free(ipv4_parent, M_IPFW); 2600 free(ipv4_add, M_IPFW); 2601 free(ipv4_parent_add, M_IPFW); 2602 free(ipv4_del, M_IPFW); 2603 free(ipv4_parent_del, M_IPFW); 2604#ifdef INET6 2605 free(ipv6, M_IPFW); 2606 free(ipv6_parent, M_IPFW); 2607 free(ipv6_add, M_IPFW); 2608 free(ipv6_parent_add, M_IPFW); 2609 free(ipv6_del, M_IPFW); 2610 free(ipv6_parent_del, M_IPFW); 2611#endif 2612} 2613 2614/* 2615 * This function is used to perform various maintenance 2616 * on dynamic hash lists. Currently it is called every second. 2617 */ 2618static void 2619dyn_tick(void *vnetx) 2620{ 2621 uint32_t buckets; 2622 2623 CURVNET_SET((struct vnet *)vnetx); 2624 /* 2625 * First free states unlinked in previous passes. 2626 */ 2627 dyn_free_states(&V_layer3_chain); 2628 /* 2629 * Now unlink others expired states. 2630 * We use IPFW_UH_WLOCK to avoid concurrent call of 2631 * dyn_expire_states(). It is the only function that does 2632 * deletion of state entries from states lists. 2633 */ 2634 IPFW_UH_WLOCK(&V_layer3_chain); 2635 dyn_expire_states(&V_layer3_chain, NULL); 2636 IPFW_UH_WUNLOCK(&V_layer3_chain); 2637 /* 2638 * Send keepalives if they are enabled and the time has come. 2639 */ 2640 if (V_dyn_keepalive != 0 && 2641 V_dyn_keepalive_last + V_dyn_keepalive_period <= time_uptime) { 2642 V_dyn_keepalive_last = time_uptime; 2643 dyn_send_keepalive_ipv4(&V_layer3_chain); 2644#ifdef INET6 2645 dyn_send_keepalive_ipv6(&V_layer3_chain); 2646#endif 2647 } 2648 /* 2649 * Check if we need to resize the hash: 2650 * if current number of states exceeds number of buckets in hash, 2651 * and dyn_buckets_max permits to grow the number of buckets, then 2652 * do it. Grow hash size to the minimum power of 2 which is bigger 2653 * than current states count. 2654 */ 2655 if (V_curr_dyn_buckets < V_dyn_buckets_max && 2656 (V_curr_dyn_buckets < V_dyn_count / 2 || ( 2657 V_curr_dyn_buckets < V_dyn_count && V_curr_max_length > 8))) { 2658 buckets = 1 << fls(V_dyn_count); 2659 if (buckets > V_dyn_buckets_max) 2660 buckets = V_dyn_buckets_max; 2661 dyn_grow_hashtable(&V_layer3_chain, buckets); 2662 } 2663 2664 callout_reset_on(&V_dyn_timeout, hz, dyn_tick, vnetx, 0); 2665 CURVNET_RESTORE(); 2666} 2667 2668void 2669ipfw_expire_dyn_states(struct ip_fw_chain *chain, ipfw_range_tlv *rt) 2670{ 2671 /* 2672 * Do not perform any checks if we currently have no dynamic states 2673 */ 2674 if (V_dyn_count == 0) 2675 return; 2676 2677 IPFW_UH_WLOCK_ASSERT(chain); 2678 dyn_expire_states(chain, rt); 2679} 2680 2681/* 2682 * Returns size of dynamic states in legacy format 2683 */ 2684int 2685ipfw_dyn_len(void) 2686{ 2687 2688 return ((V_dyn_count + V_dyn_parent_count) * sizeof(ipfw_dyn_rule)); 2689} 2690 2691/* 2692 * Returns number of dynamic states. 2693 * Used by dump format v1 (current). 2694 */ 2695uint32_t 2696ipfw_dyn_get_count(void) 2697{ 2698 2699 return (V_dyn_count + V_dyn_parent_count); 2700} 2701 2702/* 2703 * Check if rule contains at least one dynamic opcode. 2704 * 2705 * Returns 1 if such opcode is found, 0 otherwise. 2706 */ 2707int 2708ipfw_is_dyn_rule(struct ip_fw *rule) 2709{ 2710 int cmdlen, l; 2711 ipfw_insn *cmd; 2712 2713 l = rule->cmd_len; 2714 cmd = rule->cmd; 2715 cmdlen = 0; 2716 for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { 2717 cmdlen = F_LEN(cmd); 2718 2719 switch (cmd->opcode) { 2720 case O_LIMIT: 2721 case O_KEEP_STATE: 2722 case O_PROBE_STATE: 2723 case O_CHECK_STATE: 2724 return (1); 2725 } 2726 } 2727 2728 return (0); 2729} 2730 2731static void 2732dyn_export_parent(const struct dyn_parent *p, uint16_t kidx, 2733 ipfw_dyn_rule *dst) 2734{ 2735 2736 dst->dyn_type = O_LIMIT_PARENT; 2737 dst->kidx = kidx; 2738 dst->count = (uint16_t)DPARENT_COUNT(p); 2739 dst->expire = TIME_LEQ(p->expire, time_uptime) ? 0: 2740 p->expire - time_uptime; 2741 2742 /* 'rule' is used to pass up the rule number and set */ 2743 memcpy(&dst->rule, &p->rulenum, sizeof(p->rulenum)); 2744 /* store set number into high word of dst->rule pointer. */ 2745 memcpy((char *)&dst->rule + sizeof(p->rulenum), &p->set, 2746 sizeof(p->set)); 2747 2748 /* unused fields */ 2749 dst->pcnt = 0; 2750 dst->bcnt = 0; 2751 dst->parent = NULL; 2752 dst->state = 0; 2753 dst->ack_fwd = 0; 2754 dst->ack_rev = 0; 2755 dst->bucket = p->hashval; 2756 /* 2757 * The legacy userland code will interpret a NULL here as a marker 2758 * for the last dynamic rule. 2759 */ 2760 dst->next = (ipfw_dyn_rule *)1; 2761} 2762 2763static void 2764dyn_export_data(const struct dyn_data *data, uint16_t kidx, uint8_t type, 2765 ipfw_dyn_rule *dst) 2766{ 2767 2768 dst->dyn_type = type; 2769 dst->kidx = kidx; 2770 dst->pcnt = data->pcnt_fwd + data->pcnt_rev; 2771 dst->bcnt = data->bcnt_fwd + data->bcnt_rev; 2772 dst->expire = TIME_LEQ(data->expire, time_uptime) ? 0: 2773 data->expire - time_uptime; 2774 2775 /* 'rule' is used to pass up the rule number and set */ 2776 memcpy(&dst->rule, &data->rulenum, sizeof(data->rulenum)); 2777 /* store set number into high word of dst->rule pointer. */ 2778 memcpy((char *)&dst->rule + sizeof(data->rulenum), &data->set, 2779 sizeof(data->set)); 2780 2781 /* unused fields */ 2782 dst->parent = NULL; 2783 dst->state = data->state; 2784 dst->ack_fwd = data->ack_fwd; 2785 dst->ack_rev = data->ack_rev; 2786 dst->count = 0; 2787 dst->bucket = data->hashval; 2788 /* 2789 * The legacy userland code will interpret a NULL here as a marker 2790 * for the last dynamic rule. 2791 */ 2792 dst->next = (ipfw_dyn_rule *)1; 2793} 2794 2795static void 2796dyn_export_ipv4_state(const struct dyn_ipv4_state *s, ipfw_dyn_rule *dst) 2797{ 2798 2799 switch (s->type) { 2800 case O_LIMIT_PARENT: 2801 dyn_export_parent(s->limit, s->kidx, dst); 2802 break; 2803 default: 2804 dyn_export_data(s->data, s->kidx, s->type, dst); 2805 } 2806 2807 dst->id.dst_ip = s->dst; 2808 dst->id.src_ip = s->src; 2809 dst->id.dst_port = s->dport; 2810 dst->id.src_port = s->sport; 2811 dst->id.fib = s->data->fibnum; 2812 dst->id.proto = s->proto; 2813 dst->id._flags = 0; 2814 dst->id.addr_type = 4; 2815 2816 memset(&dst->id.dst_ip6, 0, sizeof(dst->id.dst_ip6)); 2817 memset(&dst->id.src_ip6, 0, sizeof(dst->id.src_ip6)); 2818 dst->id.flow_id6 = dst->id.extra = 0; 2819} 2820 2821#ifdef INET6 2822static void 2823dyn_export_ipv6_state(const struct dyn_ipv6_state *s, ipfw_dyn_rule *dst) 2824{ 2825 2826 switch (s->type) { 2827 case O_LIMIT_PARENT: 2828 dyn_export_parent(s->limit, s->kidx, dst); 2829 break; 2830 default: 2831 dyn_export_data(s->data, s->kidx, s->type, dst); 2832 } 2833 2834 dst->id.src_ip6 = s->src; 2835 dst->id.dst_ip6 = s->dst; 2836 dst->id.dst_port = s->dport; 2837 dst->id.src_port = s->sport; 2838 dst->id.fib = s->data->fibnum; 2839 dst->id.proto = s->proto; 2840 dst->id._flags = 0; 2841 dst->id.addr_type = 6; 2842 2843 dst->id.dst_ip = dst->id.src_ip = 0; 2844 dst->id.flow_id6 = dst->id.extra = 0; 2845} 2846#endif /* INET6 */ 2847 2848/* 2849 * Fills the buffer given by @sd with dynamic states. 2850 * Used by dump format v1 (current). 2851 * 2852 * Returns 0 on success. 2853 */ 2854int 2855ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd) 2856{ 2857#ifdef INET6 2858 struct dyn_ipv6_state *s6; 2859#endif 2860 struct dyn_ipv4_state *s4; 2861 ipfw_obj_dyntlv *dst, *last; 2862 ipfw_obj_ctlv *ctlv; 2863 uint32_t bucket; 2864 2865 if (V_dyn_count == 0) 2866 return (0); 2867 2868 /* 2869 * IPFW_UH_RLOCK garantees that another userland request 2870 * and callout thread will not delete entries from states 2871 * lists. 2872 */ 2873 IPFW_UH_RLOCK_ASSERT(chain); 2874 2875 ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); 2876 if (ctlv == NULL) 2877 return (ENOMEM); 2878 ctlv->head.type = IPFW_TLV_DYNSTATE_LIST; 2879 ctlv->objsize = sizeof(ipfw_obj_dyntlv); 2880 last = NULL; 2881 2882#define DYN_EXPORT_STATES(s, af, h, b) \ 2883 CK_SLIST_FOREACH(s, &V_dyn_ ## h[b], entry) { \ 2884 dst = (ipfw_obj_dyntlv *)ipfw_get_sopt_space(sd, \ 2885 sizeof(ipfw_obj_dyntlv)); \ 2886 if (dst == NULL) \ 2887 return (ENOMEM); \ 2888 dyn_export_ ## af ## _state(s, &dst->state); \ 2889 dst->head.length = sizeof(ipfw_obj_dyntlv); \ 2890 dst->head.type = IPFW_TLV_DYN_ENT; \ 2891 last = dst; \ 2892 } 2893 2894 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2895 DYN_EXPORT_STATES(s4, ipv4, ipv4_parent, bucket); 2896 DYN_EXPORT_STATES(s4, ipv4, ipv4, bucket); 2897#ifdef INET6 2898 DYN_EXPORT_STATES(s6, ipv6, ipv6_parent, bucket); 2899 DYN_EXPORT_STATES(s6, ipv6, ipv6, bucket); 2900#endif /* INET6 */ 2901 } 2902 2903 /* mark last dynamic rule */ 2904 if (last != NULL) 2905 last->head.flags = IPFW_DF_LAST; /* XXX: unused */ 2906 return (0); 2907#undef DYN_EXPORT_STATES 2908} 2909 2910/* 2911 * Fill given buffer with dynamic states (legacy format). 2912 * IPFW_UH_RLOCK has to be held while calling. 2913 */ 2914void 2915ipfw_get_dynamic(struct ip_fw_chain *chain, char **pbp, const char *ep) 2916{ 2917#ifdef INET6 2918 struct dyn_ipv6_state *s6; 2919#endif 2920 struct dyn_ipv4_state *s4; 2921 ipfw_dyn_rule *p, *last = NULL; 2922 char *bp; 2923 uint32_t bucket; 2924 2925 if (V_dyn_count == 0) 2926 return; 2927 bp = *pbp; 2928 2929 IPFW_UH_RLOCK_ASSERT(chain); 2930 2931#define DYN_EXPORT_STATES(s, af, head, b) \ 2932 CK_SLIST_FOREACH(s, &V_dyn_ ## head[b], entry) { \ 2933 if (bp + sizeof(*p) > ep) \ 2934 break; \ 2935 p = (ipfw_dyn_rule *)bp; \ 2936 dyn_export_ ## af ## _state(s, p); \ 2937 last = p; \ 2938 bp += sizeof(*p); \ 2939 } 2940 2941 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 2942 DYN_EXPORT_STATES(s4, ipv4, ipv4_parent, bucket); 2943 DYN_EXPORT_STATES(s4, ipv4, ipv4, bucket); 2944#ifdef INET6 2945 DYN_EXPORT_STATES(s6, ipv6, ipv6_parent, bucket); 2946 DYN_EXPORT_STATES(s6, ipv6, ipv6, bucket); 2947#endif /* INET6 */ 2948 } 2949 2950 if (last != NULL) /* mark last dynamic rule */ 2951 last->next = NULL; 2952 *pbp = bp; 2953#undef DYN_EXPORT_STATES 2954} 2955 2956void 2957ipfw_dyn_init(struct ip_fw_chain *chain) 2958{ 2959 2960#ifdef IPFIREWALL_JENKINSHASH 2961 V_dyn_hashseed = arc4random(); 2962#endif 2963 V_dyn_max = 16384; /* max # of states */ 2964 V_dyn_parent_max = 4096; /* max # of parent states */ 2965 V_dyn_buckets_max = 8192; /* must be power of 2 */ 2966 2967 V_dyn_ack_lifetime = 300; 2968 V_dyn_syn_lifetime = 20; 2969 V_dyn_fin_lifetime = 1; 2970 V_dyn_rst_lifetime = 1; 2971 V_dyn_udp_lifetime = 10; 2972 V_dyn_short_lifetime = 5; 2973 2974 V_dyn_keepalive_interval = 20; 2975 V_dyn_keepalive_period = 5; 2976 V_dyn_keepalive = 1; /* send keepalives */ 2977 V_dyn_keepalive_last = time_uptime; 2978 2979 V_dyn_data_zone = uma_zcreate("IPFW dynamic states data", 2980 sizeof(struct dyn_data), NULL, NULL, NULL, NULL, 2981 UMA_ALIGN_PTR, 0); 2982 uma_zone_set_max(V_dyn_data_zone, V_dyn_max); 2983 2984 V_dyn_parent_zone = uma_zcreate("IPFW parent dynamic states", 2985 sizeof(struct dyn_parent), NULL, NULL, NULL, NULL, 2986 UMA_ALIGN_PTR, 0); 2987 uma_zone_set_max(V_dyn_parent_zone, V_dyn_parent_max); 2988 2989 SLIST_INIT(&V_dyn_expired_ipv4); 2990 V_dyn_ipv4 = NULL; 2991 V_dyn_ipv4_parent = NULL; 2992 V_dyn_ipv4_zone = uma_zcreate("IPFW IPv4 dynamic states", 2993 sizeof(struct dyn_ipv4_state), NULL, NULL, NULL, NULL, 2994 UMA_ALIGN_PTR, 0); 2995 2996#ifdef INET6 2997 SLIST_INIT(&V_dyn_expired_ipv6); 2998 V_dyn_ipv6 = NULL; 2999 V_dyn_ipv6_parent = NULL; 3000 V_dyn_ipv6_zone = uma_zcreate("IPFW IPv6 dynamic states", 3001 sizeof(struct dyn_ipv6_state), NULL, NULL, NULL, NULL, 3002 UMA_ALIGN_PTR, 0); 3003#endif 3004 3005 /* Initialize buckets. */ 3006 V_curr_dyn_buckets = 0; 3007 V_dyn_bucket_lock = NULL; 3008 dyn_grow_hashtable(chain, 256); 3009 3010 if (IS_DEFAULT_VNET(curvnet)) 3011 dyn_hp_cache = malloc(mp_ncpus * sizeof(void *), M_IPFW, 3012 M_WAITOK | M_ZERO); 3013 3014 DYN_EXPIRED_LOCK_INIT(); 3015 callout_init(&V_dyn_timeout, 1); 3016 callout_reset(&V_dyn_timeout, hz, dyn_tick, curvnet); 3017 IPFW_ADD_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); 3018} 3019 3020void 3021ipfw_dyn_uninit(int pass) 3022{ 3023#ifdef INET6 3024 struct dyn_ipv6_state *s6; 3025#endif 3026 struct dyn_ipv4_state *s4; 3027 int bucket; 3028 3029 if (pass == 0) { 3030 callout_drain(&V_dyn_timeout); 3031 return; 3032 } 3033 IPFW_DEL_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); 3034 DYN_EXPIRED_LOCK_DESTROY(); 3035 3036#define DYN_FREE_STATES_FORCED(CK, s, af, name, en) do { \ 3037 while ((s = CK ## SLIST_FIRST(&V_dyn_ ## name)) != NULL) { \ 3038 CK ## SLIST_REMOVE_HEAD(&V_dyn_ ## name, en); \ 3039 if (s->type == O_LIMIT_PARENT) \ 3040 uma_zfree(V_dyn_parent_zone, s->limit); \ 3041 else \ 3042 uma_zfree(V_dyn_data_zone, s->data); \ 3043 uma_zfree(V_dyn_ ## af ## _zone, s); \ 3044 } \ 3045} while (0) 3046 for (bucket = 0; bucket < V_curr_dyn_buckets; bucket++) { 3047 DYN_BUCKET_LOCK_DESTROY(V_dyn_bucket_lock, bucket); 3048 3049 DYN_FREE_STATES_FORCED(CK_, s4, ipv4, ipv4[bucket], entry); 3050 DYN_FREE_STATES_FORCED(CK_, s4, ipv4, ipv4_parent[bucket], 3051 entry); 3052#ifdef INET6 3053 DYN_FREE_STATES_FORCED(CK_, s6, ipv6, ipv6[bucket], entry); 3054 DYN_FREE_STATES_FORCED(CK_, s6, ipv6, ipv6_parent[bucket], 3055 entry); 3056#endif /* INET6 */ 3057 } 3058 DYN_FREE_STATES_FORCED(, s4, ipv4, expired_ipv4, expired); 3059#ifdef INET6 3060 DYN_FREE_STATES_FORCED(, s6, ipv6, expired_ipv6, expired); 3061#endif 3062#undef DYN_FREE_STATES_FORCED 3063 3064 uma_zdestroy(V_dyn_ipv4_zone); 3065 uma_zdestroy(V_dyn_data_zone); 3066 uma_zdestroy(V_dyn_parent_zone); 3067#ifdef INET6 3068 uma_zdestroy(V_dyn_ipv6_zone); 3069 free(V_dyn_ipv6, M_IPFW); 3070 free(V_dyn_ipv6_parent, M_IPFW); 3071 free(V_dyn_ipv6_add, M_IPFW); 3072 free(V_dyn_ipv6_parent_add, M_IPFW); 3073 free(V_dyn_ipv6_del, M_IPFW); 3074 free(V_dyn_ipv6_parent_del, M_IPFW); 3075#endif 3076 free(V_dyn_bucket_lock, M_IPFW); 3077 free(V_dyn_ipv4, M_IPFW); 3078 free(V_dyn_ipv4_parent, M_IPFW); 3079 free(V_dyn_ipv4_add, M_IPFW); 3080 free(V_dyn_ipv4_parent_add, M_IPFW); 3081 free(V_dyn_ipv4_del, M_IPFW); 3082 free(V_dyn_ipv4_parent_del, M_IPFW); 3083 if (IS_DEFAULT_VNET(curvnet)) 3084 free(dyn_hp_cache, M_IPFW); 3085} 3086 3087 3088