ip_fw_dynamic.c revision 217322
1/*- 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h> 27__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_dynamic.c 217322 2011-01-12 19:53:50Z mdf $"); 28 29#define DEB(x) 30#define DDB(x) x 31 32/* 33 * Dynamic rule support for ipfw 34 */ 35 36#if !defined(KLD_MODULE) 37#include "opt_ipfw.h" 38#include "opt_ipdivert.h" 39#include "opt_ipdn.h" 40#include "opt_inet.h" 41#ifndef INET 42#error IPFIREWALL requires INET. 43#endif /* INET */ 44#endif 45#include "opt_inet6.h" 46#include "opt_ipsec.h" 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/malloc.h> 51#include <sys/mbuf.h> 52#include <sys/kernel.h> 53#include <sys/lock.h> 54#include <sys/socket.h> 55#include <sys/sysctl.h> 56#include <sys/syslog.h> 57#include <net/ethernet.h> /* for ETHERTYPE_IP */ 58#include <net/if.h> 59#include <net/vnet.h> 60 61#include <netinet/in.h> 62#include <netinet/ip.h> 63#include <netinet/ip_var.h> /* ip_defttl */ 64#include <netinet/ip_fw.h> 65#include <netinet/ipfw/ip_fw_private.h> 66#include <netinet/tcp_var.h> 67#include <netinet/udp.h> 68 69#include <netinet/ip6.h> /* IN6_ARE_ADDR_EQUAL */ 70#ifdef INET6 71#include <netinet6/in6_var.h> 72#include <netinet6/ip6_var.h> 73#endif 74 75#include <machine/in_cksum.h> /* XXX for in_cksum */ 76 77#ifdef MAC 78#include <security/mac/mac_framework.h> 79#endif 80 81/* 82 * Description of dynamic rules. 83 * 84 * Dynamic rules are stored in lists accessed through a hash table 85 * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can 86 * be modified through the sysctl variable dyn_buckets which is 87 * updated when the table becomes empty. 88 * 89 * XXX currently there is only one list, ipfw_dyn. 90 * 91 * When a packet is received, its address fields are first masked 92 * with the mask defined for the rule, then hashed, then matched 93 * against the entries in the corresponding list. 94 * Dynamic rules can be used for different purposes: 95 * + stateful rules; 96 * + enforcing limits on the number of sessions; 97 * + in-kernel NAT (not implemented yet) 98 * 99 * The lifetime of dynamic rules is regulated by dyn_*_lifetime, 100 * measured in seconds and depending on the flags. 101 * 102 * The total number of dynamic rules is stored in dyn_count. 103 * The max number of dynamic rules is dyn_max. When we reach 104 * the maximum number of rules we do not create anymore. This is 105 * done to avoid consuming too much memory, but also too much 106 * time when searching on each packet (ideally, we should try instead 107 * to put a limit on the length of the list on each bucket...). 108 * 109 * Each dynamic rule holds a pointer to the parent ipfw rule so 110 * we know what action to perform. Dynamic rules are removed when 111 * the parent rule is deleted. XXX we should make them survive. 112 * 113 * There are some limitations with dynamic rules -- we do not 114 * obey the 'randomized match', and we do not do multiple 115 * passes through the firewall. XXX check the latter!!! 116 */ 117 118/* 119 * Static variables followed by global ones 120 */ 121static VNET_DEFINE(ipfw_dyn_rule **, ipfw_dyn_v); 122static VNET_DEFINE(u_int32_t, dyn_buckets); 123static VNET_DEFINE(u_int32_t, curr_dyn_buckets); 124static VNET_DEFINE(struct callout, ipfw_timeout); 125#define V_ipfw_dyn_v VNET(ipfw_dyn_v) 126#define V_dyn_buckets VNET(dyn_buckets) 127#define V_curr_dyn_buckets VNET(curr_dyn_buckets) 128#define V_ipfw_timeout VNET(ipfw_timeout) 129 130static uma_zone_t ipfw_dyn_rule_zone; 131#ifndef __FreeBSD__ 132DEFINE_SPINLOCK(ipfw_dyn_mtx); 133#else 134static struct mtx ipfw_dyn_mtx; /* mutex guarding dynamic rules */ 135#endif 136 137#define IPFW_DYN_LOCK_INIT() \ 138 mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF) 139#define IPFW_DYN_LOCK_DESTROY() mtx_destroy(&ipfw_dyn_mtx) 140#define IPFW_DYN_LOCK() mtx_lock(&ipfw_dyn_mtx) 141#define IPFW_DYN_UNLOCK() mtx_unlock(&ipfw_dyn_mtx) 142#define IPFW_DYN_LOCK_ASSERT() mtx_assert(&ipfw_dyn_mtx, MA_OWNED) 143 144void 145ipfw_dyn_unlock(void) 146{ 147 IPFW_DYN_UNLOCK(); 148} 149 150/* 151 * Timeouts for various events in handing dynamic rules. 152 */ 153static VNET_DEFINE(u_int32_t, dyn_ack_lifetime); 154static VNET_DEFINE(u_int32_t, dyn_syn_lifetime); 155static VNET_DEFINE(u_int32_t, dyn_fin_lifetime); 156static VNET_DEFINE(u_int32_t, dyn_rst_lifetime); 157static VNET_DEFINE(u_int32_t, dyn_udp_lifetime); 158static VNET_DEFINE(u_int32_t, dyn_short_lifetime); 159 160#define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) 161#define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) 162#define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) 163#define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) 164#define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) 165#define V_dyn_short_lifetime VNET(dyn_short_lifetime) 166 167/* 168 * Keepalives are sent if dyn_keepalive is set. They are sent every 169 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 170 * seconds of lifetime of a rule. 171 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower 172 * than dyn_keepalive_period. 173 */ 174 175static VNET_DEFINE(u_int32_t, dyn_keepalive_interval); 176static VNET_DEFINE(u_int32_t, dyn_keepalive_period); 177static VNET_DEFINE(u_int32_t, dyn_keepalive); 178 179#define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) 180#define V_dyn_keepalive_period VNET(dyn_keepalive_period) 181#define V_dyn_keepalive VNET(dyn_keepalive) 182 183static VNET_DEFINE(u_int32_t, dyn_count); /* # of dynamic rules */ 184static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */ 185 186#define V_dyn_count VNET(dyn_count) 187#define V_dyn_max VNET(dyn_max) 188 189#ifdef SYSCTL_NODE 190 191SYSBEGIN(f2) 192 193SYSCTL_DECL(_net_inet_ip_fw); 194SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, 195 CTLFLAG_RW, &VNET_NAME(dyn_buckets), 0, 196 "Number of dyn. buckets"); 197SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, 198 CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, 199 "Current Number of dyn. buckets"); 200SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_count, 201 CTLFLAG_RD, &VNET_NAME(dyn_count), 0, 202 "Number of dyn. rules"); 203SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_max, 204 CTLFLAG_RW, &VNET_NAME(dyn_max), 0, 205 "Max number of dyn. rules"); 206SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, 207 CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, 208 "Lifetime of dyn. rules for acks"); 209SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, 210 CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, 211 "Lifetime of dyn. rules for syn"); 212SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, 213 CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, 214 "Lifetime of dyn. rules for fin"); 215SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, 216 CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, 217 "Lifetime of dyn. rules for rst"); 218SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, 219 CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, 220 "Lifetime of dyn. rules for UDP"); 221SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, 222 CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, 223 "Lifetime of dyn. rules for other situations"); 224SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, 225 CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, 226 "Enable keepalives for dyn. rules"); 227 228SYSEND 229 230#endif /* SYSCTL_NODE */ 231 232 233static __inline int 234hash_packet6(struct ipfw_flow_id *id) 235{ 236 u_int32_t i; 237 i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ 238 (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ 239 (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ 240 (id->src_ip6.__u6_addr.__u6_addr32[3]) ^ 241 (id->dst_port) ^ (id->src_port); 242 return i; 243} 244 245/* 246 * IMPORTANT: the hash function for dynamic rules must be commutative 247 * in source and destination (ip,port), because rules are bidirectional 248 * and we want to find both in the same bucket. 249 */ 250static __inline int 251hash_packet(struct ipfw_flow_id *id) 252{ 253 u_int32_t i; 254 255#ifdef INET6 256 if (IS_IP6_FLOW_ID(id)) 257 i = hash_packet6(id); 258 else 259#endif /* INET6 */ 260 i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); 261 i &= (V_curr_dyn_buckets - 1); 262 return i; 263} 264 265static __inline void 266unlink_dyn_rule_print(struct ipfw_flow_id *id) 267{ 268 struct in_addr da; 269#ifdef INET6 270 char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; 271#else 272 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 273#endif 274 275#ifdef INET6 276 if (IS_IP6_FLOW_ID(id)) { 277 ip6_sprintf(src, &id->src_ip6); 278 ip6_sprintf(dst, &id->dst_ip6); 279 } else 280#endif 281 { 282 da.s_addr = htonl(id->src_ip); 283 inet_ntoa_r(da, src); 284 da.s_addr = htonl(id->dst_ip); 285 inet_ntoa_r(da, dst); 286 } 287 printf("ipfw: unlink entry %s %d -> %s %d, %d left\n", 288 src, id->src_port, dst, id->dst_port, V_dyn_count - 1); 289} 290 291/** 292 * unlink a dynamic rule from a chain. prev is a pointer to 293 * the previous one, q is a pointer to the rule to delete, 294 * head is a pointer to the head of the queue. 295 * Modifies q and potentially also head. 296 */ 297#define UNLINK_DYN_RULE(prev, head, q) { \ 298 ipfw_dyn_rule *old_q = q; \ 299 \ 300 /* remove a refcount to the parent */ \ 301 if (q->dyn_type == O_LIMIT) \ 302 q->parent->count--; \ 303 DEB(unlink_dyn_rule_print(&q->id);) \ 304 if (prev != NULL) \ 305 prev->next = q = q->next; \ 306 else \ 307 head = q = q->next; \ 308 V_dyn_count--; \ 309 uma_zfree(ipfw_dyn_rule_zone, old_q); } 310 311#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) 312 313/** 314 * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. 315 * 316 * If keep_me == NULL, rules are deleted even if not expired, 317 * otherwise only expired rules are removed. 318 * 319 * The value of the second parameter is also used to point to identify 320 * a rule we absolutely do not want to remove (e.g. because we are 321 * holding a reference to it -- this is the case with O_LIMIT_PARENT 322 * rules). The pointer is only used for comparison, so any non-null 323 * value will do. 324 */ 325static void 326remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) 327{ 328 static u_int32_t last_remove = 0; 329 330#define FORCE (keep_me == NULL) 331 332 ipfw_dyn_rule *prev, *q; 333 int i, pass = 0, max_pass = 0; 334 335 IPFW_DYN_LOCK_ASSERT(); 336 337 if (V_ipfw_dyn_v == NULL || V_dyn_count == 0) 338 return; 339 /* do not expire more than once per second, it is useless */ 340 if (!FORCE && last_remove == time_uptime) 341 return; 342 last_remove = time_uptime; 343 344 /* 345 * because O_LIMIT refer to parent rules, during the first pass only 346 * remove child and mark any pending LIMIT_PARENT, and remove 347 * them in a second pass. 348 */ 349next_pass: 350 for (i = 0 ; i < V_curr_dyn_buckets ; i++) { 351 for (prev=NULL, q = V_ipfw_dyn_v[i] ; q ; ) { 352 /* 353 * Logic can become complex here, so we split tests. 354 */ 355 if (q == keep_me) 356 goto next; 357 if (rule != NULL && rule != q->rule) 358 goto next; /* not the one we are looking for */ 359 if (q->dyn_type == O_LIMIT_PARENT) { 360 /* 361 * handle parent in the second pass, 362 * record we need one. 363 */ 364 max_pass = 1; 365 if (pass == 0) 366 goto next; 367 if (FORCE && q->count != 0 ) { 368 /* XXX should not happen! */ 369 printf("ipfw: OUCH! cannot remove rule," 370 " count %d\n", q->count); 371 } 372 } else { 373 if (!FORCE && 374 !TIME_LEQ( q->expire, time_uptime )) 375 goto next; 376 } 377 if (q->dyn_type != O_LIMIT_PARENT || !q->count) { 378 UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); 379 continue; 380 } 381next: 382 prev=q; 383 q=q->next; 384 } 385 } 386 if (pass++ < max_pass) 387 goto next_pass; 388} 389 390void 391ipfw_remove_dyn_children(struct ip_fw *rule) 392{ 393 IPFW_DYN_LOCK(); 394 remove_dyn_rule(rule, NULL /* force removal */); 395 IPFW_DYN_UNLOCK(); 396} 397 398/** 399 * lookup a dynamic rule, locked version 400 */ 401static ipfw_dyn_rule * 402lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction, 403 struct tcphdr *tcp) 404{ 405 /* 406 * stateful ipfw extensions. 407 * Lookup into dynamic session queue 408 */ 409#define MATCH_REVERSE 0 410#define MATCH_FORWARD 1 411#define MATCH_NONE 2 412#define MATCH_UNKNOWN 3 413 int i, dir = MATCH_NONE; 414 ipfw_dyn_rule *prev, *q=NULL; 415 416 IPFW_DYN_LOCK_ASSERT(); 417 418 if (V_ipfw_dyn_v == NULL) 419 goto done; /* not found */ 420 i = hash_packet( pkt ); 421 for (prev=NULL, q = V_ipfw_dyn_v[i] ; q != NULL ; ) { 422 if (q->dyn_type == O_LIMIT_PARENT && q->count) 423 goto next; 424 if (TIME_LEQ( q->expire, time_uptime)) { /* expire entry */ 425 UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); 426 continue; 427 } 428 if (pkt->proto == q->id.proto && 429 q->dyn_type != O_LIMIT_PARENT) { 430 if (IS_IP6_FLOW_ID(pkt)) { 431 if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 432 &(q->id.src_ip6)) && 433 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 434 &(q->id.dst_ip6)) && 435 pkt->src_port == q->id.src_port && 436 pkt->dst_port == q->id.dst_port ) { 437 dir = MATCH_FORWARD; 438 break; 439 } 440 if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 441 &(q->id.dst_ip6)) && 442 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 443 &(q->id.src_ip6)) && 444 pkt->src_port == q->id.dst_port && 445 pkt->dst_port == q->id.src_port ) { 446 dir = MATCH_REVERSE; 447 break; 448 } 449 } else { 450 if (pkt->src_ip == q->id.src_ip && 451 pkt->dst_ip == q->id.dst_ip && 452 pkt->src_port == q->id.src_port && 453 pkt->dst_port == q->id.dst_port ) { 454 dir = MATCH_FORWARD; 455 break; 456 } 457 if (pkt->src_ip == q->id.dst_ip && 458 pkt->dst_ip == q->id.src_ip && 459 pkt->src_port == q->id.dst_port && 460 pkt->dst_port == q->id.src_port ) { 461 dir = MATCH_REVERSE; 462 break; 463 } 464 } 465 } 466next: 467 prev = q; 468 q = q->next; 469 } 470 if (q == NULL) 471 goto done; /* q = NULL, not found */ 472 473 if ( prev != NULL) { /* found and not in front */ 474 prev->next = q->next; 475 q->next = V_ipfw_dyn_v[i]; 476 V_ipfw_dyn_v[i] = q; 477 } 478 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 479 u_char flags = pkt->_flags & (TH_FIN|TH_SYN|TH_RST); 480 481#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 482#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 483 q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); 484 switch (q->state) { 485 case TH_SYN: /* opening */ 486 q->expire = time_uptime + V_dyn_syn_lifetime; 487 break; 488 489 case BOTH_SYN: /* move to established */ 490 case BOTH_SYN | TH_FIN : /* one side tries to close */ 491 case BOTH_SYN | (TH_FIN << 8) : 492 if (tcp) { 493#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) 494 u_int32_t ack = ntohl(tcp->th_ack); 495 if (dir == MATCH_FORWARD) { 496 if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) 497 q->ack_fwd = ack; 498 else { /* ignore out-of-sequence */ 499 break; 500 } 501 } else { 502 if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) 503 q->ack_rev = ack; 504 else { /* ignore out-of-sequence */ 505 break; 506 } 507 } 508 } 509 q->expire = time_uptime + V_dyn_ack_lifetime; 510 break; 511 512 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 513 if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) 514 V_dyn_fin_lifetime = V_dyn_keepalive_period - 1; 515 q->expire = time_uptime + V_dyn_fin_lifetime; 516 break; 517 518 default: 519#if 0 520 /* 521 * reset or some invalid combination, but can also 522 * occur if we use keep-state the wrong way. 523 */ 524 if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) 525 printf("invalid state: 0x%x\n", q->state); 526#endif 527 if (V_dyn_rst_lifetime >= V_dyn_keepalive_period) 528 V_dyn_rst_lifetime = V_dyn_keepalive_period - 1; 529 q->expire = time_uptime + V_dyn_rst_lifetime; 530 break; 531 } 532 } else if (pkt->proto == IPPROTO_UDP) { 533 q->expire = time_uptime + V_dyn_udp_lifetime; 534 } else { 535 /* other protocols */ 536 q->expire = time_uptime + V_dyn_short_lifetime; 537 } 538done: 539 if (match_direction) 540 *match_direction = dir; 541 return q; 542} 543 544ipfw_dyn_rule * 545ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, 546 struct tcphdr *tcp) 547{ 548 ipfw_dyn_rule *q; 549 550 IPFW_DYN_LOCK(); 551 q = lookup_dyn_rule_locked(pkt, match_direction, tcp); 552 if (q == NULL) 553 IPFW_DYN_UNLOCK(); 554 /* NB: return table locked when q is not NULL */ 555 return q; 556} 557 558static void 559realloc_dynamic_table(void) 560{ 561 IPFW_DYN_LOCK_ASSERT(); 562 563 /* 564 * Try reallocation, make sure we have a power of 2 and do 565 * not allow more than 64k entries. In case of overflow, 566 * default to 1024. 567 */ 568 569 if (V_dyn_buckets > 65536) 570 V_dyn_buckets = 1024; 571 if ((V_dyn_buckets & (V_dyn_buckets-1)) != 0) { /* not a power of 2 */ 572 V_dyn_buckets = V_curr_dyn_buckets; /* reset */ 573 return; 574 } 575 V_curr_dyn_buckets = V_dyn_buckets; 576 if (V_ipfw_dyn_v != NULL) 577 free(V_ipfw_dyn_v, M_IPFW); 578 for (;;) { 579 V_ipfw_dyn_v = malloc(V_curr_dyn_buckets * sizeof(ipfw_dyn_rule *), 580 M_IPFW, M_NOWAIT | M_ZERO); 581 if (V_ipfw_dyn_v != NULL || V_curr_dyn_buckets <= 2) 582 break; 583 V_curr_dyn_buckets /= 2; 584 } 585} 586 587/** 588 * Install state of type 'type' for a dynamic session. 589 * The hash table contains two type of rules: 590 * - regular rules (O_KEEP_STATE) 591 * - rules for sessions with limited number of sess per user 592 * (O_LIMIT). When they are created, the parent is 593 * increased by 1, and decreased on delete. In this case, 594 * the third parameter is the parent rule and not the chain. 595 * - "parent" rules for the above (O_LIMIT_PARENT). 596 */ 597static ipfw_dyn_rule * 598add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) 599{ 600 ipfw_dyn_rule *r; 601 int i; 602 603 IPFW_DYN_LOCK_ASSERT(); 604 605 if (V_ipfw_dyn_v == NULL || 606 (V_dyn_count == 0 && V_dyn_buckets != V_curr_dyn_buckets)) { 607 realloc_dynamic_table(); 608 if (V_ipfw_dyn_v == NULL) 609 return NULL; /* failed ! */ 610 } 611 i = hash_packet(id); 612 613 r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO); 614 if (r == NULL) { 615 printf ("ipfw: sorry cannot allocate state\n"); 616 return NULL; 617 } 618 619 /* increase refcount on parent, and set pointer */ 620 if (dyn_type == O_LIMIT) { 621 ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; 622 if ( parent->dyn_type != O_LIMIT_PARENT) 623 panic("invalid parent"); 624 parent->count++; 625 r->parent = parent; 626 rule = parent->rule; 627 } 628 629 r->id = *id; 630 r->expire = time_uptime + V_dyn_syn_lifetime; 631 r->rule = rule; 632 r->dyn_type = dyn_type; 633 r->pcnt = r->bcnt = 0; 634 r->count = 0; 635 636 r->bucket = i; 637 r->next = V_ipfw_dyn_v[i]; 638 V_ipfw_dyn_v[i] = r; 639 V_dyn_count++; 640 DEB({ 641 struct in_addr da; 642#ifdef INET6 643 char src[INET6_ADDRSTRLEN]; 644 char dst[INET6_ADDRSTRLEN]; 645#else 646 char src[INET_ADDRSTRLEN]; 647 char dst[INET_ADDRSTRLEN]; 648#endif 649 650#ifdef INET6 651 if (IS_IP6_FLOW_ID(&(r->id))) { 652 ip6_sprintf(src, &r->id.src_ip6); 653 ip6_sprintf(dst, &r->id.dst_ip6); 654 } else 655#endif 656 { 657 da.s_addr = htonl(r->id.src_ip); 658 inet_ntoa_r(da, src); 659 da.s_addr = htonl(r->id.dst_ip); 660 inet_ntoa_r(da, dst); 661 } 662 printf("ipfw: add dyn entry ty %d %s %d -> %s %d, total %d\n", 663 dyn_type, src, r->id.src_port, dst, r->id.dst_port, 664 V_dyn_count); 665 }) 666 return r; 667} 668 669/** 670 * lookup dynamic parent rule using pkt and rule as search keys. 671 * If the lookup fails, then install one. 672 */ 673static ipfw_dyn_rule * 674lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) 675{ 676 ipfw_dyn_rule *q; 677 int i; 678 679 IPFW_DYN_LOCK_ASSERT(); 680 681 if (V_ipfw_dyn_v) { 682 int is_v6 = IS_IP6_FLOW_ID(pkt); 683 i = hash_packet( pkt ); 684 for (q = V_ipfw_dyn_v[i] ; q != NULL ; q=q->next) 685 if (q->dyn_type == O_LIMIT_PARENT && 686 rule== q->rule && 687 pkt->proto == q->id.proto && 688 pkt->src_port == q->id.src_port && 689 pkt->dst_port == q->id.dst_port && 690 ( 691 (is_v6 && 692 IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 693 &(q->id.src_ip6)) && 694 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 695 &(q->id.dst_ip6))) || 696 (!is_v6 && 697 pkt->src_ip == q->id.src_ip && 698 pkt->dst_ip == q->id.dst_ip) 699 ) 700 ) { 701 q->expire = time_uptime + V_dyn_short_lifetime; 702 DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);) 703 return q; 704 } 705 } 706 return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); 707} 708 709/** 710 * Install dynamic state for rule type cmd->o.opcode 711 * 712 * Returns 1 (failure) if state is not installed because of errors or because 713 * session limitations are enforced. 714 */ 715int 716ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, 717 struct ip_fw_args *args, uint32_t tablearg) 718{ 719 static int last_log; 720 ipfw_dyn_rule *q; 721 struct in_addr da; 722#ifdef INET6 723 char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2]; 724#else 725 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 726#endif 727 728 src[0] = '\0'; 729 dst[0] = '\0'; 730 731 IPFW_DYN_LOCK(); 732 733 DEB( 734#ifdef INET6 735 if (IS_IP6_FLOW_ID(&(args->f_id))) { 736 ip6_sprintf(src, &args->f_id.src_ip6); 737 ip6_sprintf(dst, &args->f_id.dst_ip6); 738 } else 739#endif 740 { 741 da.s_addr = htonl(args->f_id.src_ip); 742 inet_ntoa_r(da, src); 743 da.s_addr = htonl(args->f_id.dst_ip); 744 inet_ntoa_r(da, dst); 745 } 746 printf("ipfw: %s: type %d %s %u -> %s %u\n", 747 __func__, cmd->o.opcode, src, args->f_id.src_port, 748 dst, args->f_id.dst_port); 749 src[0] = '\0'; 750 dst[0] = '\0'; 751 ) 752 753 q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL); 754 755 if (q != NULL) { /* should never occur */ 756 if (last_log != time_uptime) { 757 last_log = time_uptime; 758 printf("ipfw: %s: entry already present, done\n", 759 __func__); 760 } 761 IPFW_DYN_UNLOCK(); 762 return (0); 763 } 764 765 if (V_dyn_count >= V_dyn_max) 766 /* Run out of slots, try to remove any expired rule. */ 767 remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); 768 769 if (V_dyn_count >= V_dyn_max) { 770 if (last_log != time_uptime) { 771 last_log = time_uptime; 772 printf("ipfw: %s: Too many dynamic rules\n", __func__); 773 } 774 IPFW_DYN_UNLOCK(); 775 return (1); /* cannot install, notify caller */ 776 } 777 778 switch (cmd->o.opcode) { 779 case O_KEEP_STATE: /* bidir rule */ 780 add_dyn_rule(&args->f_id, O_KEEP_STATE, rule); 781 break; 782 783 case O_LIMIT: { /* limit number of sessions */ 784 struct ipfw_flow_id id; 785 ipfw_dyn_rule *parent; 786 uint32_t conn_limit; 787 uint16_t limit_mask = cmd->limit_mask; 788 789 conn_limit = (cmd->conn_limit == IP_FW_TABLEARG) ? 790 tablearg : cmd->conn_limit; 791 792 DEB( 793 if (cmd->conn_limit == IP_FW_TABLEARG) 794 printf("ipfw: %s: O_LIMIT rule, conn_limit: %u " 795 "(tablearg)\n", __func__, conn_limit); 796 else 797 printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n", 798 __func__, conn_limit); 799 ) 800 801 id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0; 802 id.proto = args->f_id.proto; 803 id.addr_type = args->f_id.addr_type; 804 id.fib = M_GETFIB(args->m); 805 806 if (IS_IP6_FLOW_ID (&(args->f_id))) { 807 if (limit_mask & DYN_SRC_ADDR) 808 id.src_ip6 = args->f_id.src_ip6; 809 if (limit_mask & DYN_DST_ADDR) 810 id.dst_ip6 = args->f_id.dst_ip6; 811 } else { 812 if (limit_mask & DYN_SRC_ADDR) 813 id.src_ip = args->f_id.src_ip; 814 if (limit_mask & DYN_DST_ADDR) 815 id.dst_ip = args->f_id.dst_ip; 816 } 817 if (limit_mask & DYN_SRC_PORT) 818 id.src_port = args->f_id.src_port; 819 if (limit_mask & DYN_DST_PORT) 820 id.dst_port = args->f_id.dst_port; 821 if ((parent = lookup_dyn_parent(&id, rule)) == NULL) { 822 printf("ipfw: %s: add parent failed\n", __func__); 823 IPFW_DYN_UNLOCK(); 824 return (1); 825 } 826 827 if (parent->count >= conn_limit) { 828 /* See if we can remove some expired rule. */ 829 remove_dyn_rule(rule, parent); 830 if (parent->count >= conn_limit) { 831 if (V_fw_verbose && last_log != time_uptime) { 832 last_log = time_uptime; 833#ifdef INET6 834 /* 835 * XXX IPv6 flows are not 836 * supported yet. 837 */ 838 if (IS_IP6_FLOW_ID(&(args->f_id))) { 839 char ip6buf[INET6_ADDRSTRLEN]; 840 snprintf(src, sizeof(src), 841 "[%s]", ip6_sprintf(ip6buf, 842 &args->f_id.src_ip6)); 843 snprintf(dst, sizeof(dst), 844 "[%s]", ip6_sprintf(ip6buf, 845 &args->f_id.dst_ip6)); 846 } else 847#endif 848 { 849 da.s_addr = 850 htonl(args->f_id.src_ip); 851 inet_ntoa_r(da, src); 852 da.s_addr = 853 htonl(args->f_id.dst_ip); 854 inet_ntoa_r(da, dst); 855 } 856 log(LOG_SECURITY | LOG_DEBUG, 857 "ipfw: %d %s %s:%u -> %s:%u, %s\n", 858 parent->rule->rulenum, 859 "drop session", 860 src, (args->f_id.src_port), 861 dst, (args->f_id.dst_port), 862 "too many entries"); 863 } 864 IPFW_DYN_UNLOCK(); 865 return (1); 866 } 867 } 868 add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent); 869 break; 870 } 871 default: 872 printf("ipfw: %s: unknown dynamic rule type %u\n", 873 __func__, cmd->o.opcode); 874 IPFW_DYN_UNLOCK(); 875 return (1); 876 } 877 878 /* XXX just set lifetime */ 879 lookup_dyn_rule_locked(&args->f_id, NULL, NULL); 880 881 IPFW_DYN_UNLOCK(); 882 return (0); 883} 884 885/* 886 * Generate a TCP packet, containing either a RST or a keepalive. 887 * When flags & TH_RST, we are sending a RST packet, because of a 888 * "reset" action matched the packet. 889 * Otherwise we are sending a keepalive, and flags & TH_ 890 * The 'replyto' mbuf is the mbuf being replied to, if any, and is required 891 * so that MAC can label the reply appropriately. 892 */ 893struct mbuf * 894ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, 895 u_int32_t ack, int flags) 896{ 897 struct mbuf *m = NULL; /* stupid compiler */ 898 int len, dir; 899 struct ip *h = NULL; /* stupid compiler */ 900#ifdef INET6 901 struct ip6_hdr *h6 = NULL; 902#endif 903 struct tcphdr *th = NULL; 904 905 MGETHDR(m, M_DONTWAIT, MT_DATA); 906 if (m == NULL) 907 return (NULL); 908 909 M_SETFIB(m, id->fib); 910#ifdef MAC 911 if (replyto != NULL) 912 mac_netinet_firewall_reply(replyto, m); 913 else 914 mac_netinet_firewall_send(m); 915#else 916 (void)replyto; /* don't warn about unused arg */ 917#endif 918 919 switch (id->addr_type) { 920 case 4: 921 len = sizeof(struct ip) + sizeof(struct tcphdr); 922 break; 923#ifdef INET6 924 case 6: 925 len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 926 break; 927#endif 928 default: 929 /* XXX: log me?!? */ 930 FREE_PKT(m); 931 return (NULL); 932 } 933 dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN); 934 935 m->m_data += max_linkhdr; 936 m->m_flags |= M_SKIP_FIREWALL; 937 m->m_pkthdr.len = m->m_len = len; 938 m->m_pkthdr.rcvif = NULL; 939 bzero(m->m_data, len); 940 941 switch (id->addr_type) { 942 case 4: 943 h = mtod(m, struct ip *); 944 945 /* prepare for checksum */ 946 h->ip_p = IPPROTO_TCP; 947 h->ip_len = htons(sizeof(struct tcphdr)); 948 if (dir) { 949 h->ip_src.s_addr = htonl(id->src_ip); 950 h->ip_dst.s_addr = htonl(id->dst_ip); 951 } else { 952 h->ip_src.s_addr = htonl(id->dst_ip); 953 h->ip_dst.s_addr = htonl(id->src_ip); 954 } 955 956 th = (struct tcphdr *)(h + 1); 957 break; 958#ifdef INET6 959 case 6: 960 h6 = mtod(m, struct ip6_hdr *); 961 962 /* prepare for checksum */ 963 h6->ip6_nxt = IPPROTO_TCP; 964 h6->ip6_plen = htons(sizeof(struct tcphdr)); 965 if (dir) { 966 h6->ip6_src = id->src_ip6; 967 h6->ip6_dst = id->dst_ip6; 968 } else { 969 h6->ip6_src = id->dst_ip6; 970 h6->ip6_dst = id->src_ip6; 971 } 972 973 th = (struct tcphdr *)(h6 + 1); 974 break; 975#endif 976 } 977 978 if (dir) { 979 th->th_sport = htons(id->src_port); 980 th->th_dport = htons(id->dst_port); 981 } else { 982 th->th_sport = htons(id->dst_port); 983 th->th_dport = htons(id->src_port); 984 } 985 th->th_off = sizeof(struct tcphdr) >> 2; 986 987 if (flags & TH_RST) { 988 if (flags & TH_ACK) { 989 th->th_seq = htonl(ack); 990 th->th_flags = TH_RST; 991 } else { 992 if (flags & TH_SYN) 993 seq++; 994 th->th_ack = htonl(seq); 995 th->th_flags = TH_RST | TH_ACK; 996 } 997 } else { 998 /* 999 * Keepalive - use caller provided sequence numbers 1000 */ 1001 th->th_seq = htonl(seq); 1002 th->th_ack = htonl(ack); 1003 th->th_flags = TH_ACK; 1004 } 1005 1006 switch (id->addr_type) { 1007 case 4: 1008 th->th_sum = in_cksum(m, len); 1009 1010 /* finish the ip header */ 1011 h->ip_v = 4; 1012 h->ip_hl = sizeof(*h) >> 2; 1013 h->ip_tos = IPTOS_LOWDELAY; 1014 h->ip_off = 0; 1015 /* ip_len must be in host format for ip_output */ 1016 h->ip_len = len; 1017 h->ip_ttl = V_ip_defttl; 1018 h->ip_sum = 0; 1019 break; 1020#ifdef INET6 1021 case 6: 1022 th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6), 1023 sizeof(struct tcphdr)); 1024 1025 /* finish the ip6 header */ 1026 h6->ip6_vfc |= IPV6_VERSION; 1027 h6->ip6_hlim = IPV6_DEFHLIM; 1028 break; 1029#endif 1030 } 1031 1032 return (m); 1033} 1034 1035/* 1036 * This procedure is only used to handle keepalives. It is invoked 1037 * every dyn_keepalive_period 1038 */ 1039static void 1040ipfw_tick(void * vnetx) 1041{ 1042 struct mbuf *m0, *m, *mnext, **mtailp; 1043#ifdef INET6 1044 struct mbuf *m6, **m6_tailp; 1045#endif 1046 int i; 1047 ipfw_dyn_rule *q; 1048#ifdef VIMAGE 1049 struct vnet *vp = vnetx; 1050#endif 1051 1052 CURVNET_SET(vp); 1053 if (V_dyn_keepalive == 0 || V_ipfw_dyn_v == NULL || V_dyn_count == 0) 1054 goto done; 1055 1056 /* 1057 * We make a chain of packets to go out here -- not deferring 1058 * until after we drop the IPFW dynamic rule lock would result 1059 * in a lock order reversal with the normal packet input -> ipfw 1060 * call stack. 1061 */ 1062 m0 = NULL; 1063 mtailp = &m0; 1064#ifdef INET6 1065 m6 = NULL; 1066 m6_tailp = &m6; 1067#endif 1068 IPFW_DYN_LOCK(); 1069 for (i = 0 ; i < V_curr_dyn_buckets ; i++) { 1070 for (q = V_ipfw_dyn_v[i] ; q ; q = q->next ) { 1071 if (q->dyn_type == O_LIMIT_PARENT) 1072 continue; 1073 if (q->id.proto != IPPROTO_TCP) 1074 continue; 1075 if ( (q->state & BOTH_SYN) != BOTH_SYN) 1076 continue; 1077 if (TIME_LEQ(time_uptime + V_dyn_keepalive_interval, 1078 q->expire)) 1079 continue; /* too early */ 1080 if (TIME_LEQ(q->expire, time_uptime)) 1081 continue; /* too late, rule expired */ 1082 1083 m = ipfw_send_pkt(NULL, &(q->id), q->ack_rev - 1, 1084 q->ack_fwd, TH_SYN); 1085 mnext = ipfw_send_pkt(NULL, &(q->id), q->ack_fwd - 1, 1086 q->ack_rev, 0); 1087 1088 switch (q->id.addr_type) { 1089 case 4: 1090 if (m != NULL) { 1091 *mtailp = m; 1092 mtailp = &(*mtailp)->m_nextpkt; 1093 } 1094 if (mnext != NULL) { 1095 *mtailp = mnext; 1096 mtailp = &(*mtailp)->m_nextpkt; 1097 } 1098 break; 1099#ifdef INET6 1100 case 6: 1101 if (m != NULL) { 1102 *m6_tailp = m; 1103 m6_tailp = &(*m6_tailp)->m_nextpkt; 1104 } 1105 if (mnext != NULL) { 1106 *m6_tailp = mnext; 1107 m6_tailp = &(*m6_tailp)->m_nextpkt; 1108 } 1109 break; 1110#endif 1111 } 1112 1113 m = mnext = NULL; 1114 } 1115 } 1116 IPFW_DYN_UNLOCK(); 1117 for (m = mnext = m0; m != NULL; m = mnext) { 1118 mnext = m->m_nextpkt; 1119 m->m_nextpkt = NULL; 1120 ip_output(m, NULL, NULL, 0, NULL, NULL); 1121 } 1122#ifdef INET6 1123 for (m = mnext = m6; m != NULL; m = mnext) { 1124 mnext = m->m_nextpkt; 1125 m->m_nextpkt = NULL; 1126 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 1127 } 1128#endif 1129done: 1130 callout_reset_on(&V_ipfw_timeout, V_dyn_keepalive_period * hz, 1131 ipfw_tick, vnetx, 0); 1132 CURVNET_RESTORE(); 1133} 1134 1135void 1136ipfw_dyn_attach(void) 1137{ 1138 ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule", 1139 sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, 1140 UMA_ALIGN_PTR, 0); 1141 1142 IPFW_DYN_LOCK_INIT(); 1143} 1144 1145void 1146ipfw_dyn_detach(void) 1147{ 1148 uma_zdestroy(ipfw_dyn_rule_zone); 1149 IPFW_DYN_LOCK_DESTROY(); 1150} 1151 1152void 1153ipfw_dyn_init(void) 1154{ 1155 V_ipfw_dyn_v = NULL; 1156 V_dyn_buckets = 256; /* must be power of 2 */ 1157 V_curr_dyn_buckets = 256; /* must be power of 2 */ 1158 1159 V_dyn_ack_lifetime = 300; 1160 V_dyn_syn_lifetime = 20; 1161 V_dyn_fin_lifetime = 1; 1162 V_dyn_rst_lifetime = 1; 1163 V_dyn_udp_lifetime = 10; 1164 V_dyn_short_lifetime = 5; 1165 1166 V_dyn_keepalive_interval = 20; 1167 V_dyn_keepalive_period = 5; 1168 V_dyn_keepalive = 1; /* do send keepalives */ 1169 1170 V_dyn_max = 4096; /* max # of dynamic rules */ 1171 callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE); 1172 callout_reset_on(&V_ipfw_timeout, hz, ipfw_tick, curvnet, 0); 1173} 1174 1175void 1176ipfw_dyn_uninit(int pass) 1177{ 1178 if (pass == 0) 1179 callout_drain(&V_ipfw_timeout); 1180 else { 1181 if (V_ipfw_dyn_v != NULL) 1182 free(V_ipfw_dyn_v, M_IPFW); 1183 } 1184} 1185 1186int 1187ipfw_dyn_len(void) 1188{ 1189 return (V_ipfw_dyn_v == NULL) ? 0 : 1190 (V_dyn_count * sizeof(ipfw_dyn_rule)); 1191} 1192 1193void 1194ipfw_get_dynamic(char **pbp, const char *ep) 1195{ 1196 ipfw_dyn_rule *p, *last = NULL; 1197 char *bp; 1198 int i; 1199 1200 if (V_ipfw_dyn_v == NULL) 1201 return; 1202 bp = *pbp; 1203 1204 IPFW_DYN_LOCK(); 1205 for (i = 0 ; i < V_curr_dyn_buckets; i++) 1206 for (p = V_ipfw_dyn_v[i] ; p != NULL; p = p->next) { 1207 if (bp + sizeof *p <= ep) { 1208 ipfw_dyn_rule *dst = 1209 (ipfw_dyn_rule *)bp; 1210 bcopy(p, dst, sizeof *p); 1211 bcopy(&(p->rule->rulenum), &(dst->rule), 1212 sizeof(p->rule->rulenum)); 1213 /* 1214 * store set number into high word of 1215 * dst->rule pointer. 1216 */ 1217 bcopy(&(p->rule->set), 1218 (char *)&dst->rule + 1219 sizeof(p->rule->rulenum), 1220 sizeof(p->rule->set)); 1221 /* 1222 * store a non-null value in "next". 1223 * The userland code will interpret a 1224 * NULL here as a marker 1225 * for the last dynamic rule. 1226 */ 1227 bcopy(&dst, &dst->next, sizeof(dst)); 1228 last = dst; 1229 dst->expire = 1230 TIME_LEQ(dst->expire, time_uptime) ? 1231 0 : dst->expire - time_uptime ; 1232 bp += sizeof(ipfw_dyn_rule); 1233 } 1234 } 1235 IPFW_DYN_UNLOCK(); 1236 if (last != NULL) /* mark last dynamic rule */ 1237 bzero(&last->next, sizeof(last)); 1238 *pbp = bp; 1239} 1240/* end of file */ 1241