ip_fw_dynamic.c revision 225736
1/*- 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h> 27__FBSDID("$FreeBSD: stable/9/sys/netinet/ipfw/ip_fw_dynamic.c 225518 2011-09-12 21:09:56Z jhb $"); 28 29#define DEB(x) 30#define DDB(x) x 31 32/* 33 * Dynamic rule support for ipfw 34 */ 35 36#include "opt_ipfw.h" 37#if !defined(KLD_MODULE) 38#include "opt_ipdivert.h" 39#include "opt_ipdn.h" 40#include "opt_inet.h" 41#ifndef INET 42#error IPFIREWALL requires INET. 43#endif /* INET */ 44#endif 45#include "opt_inet6.h" 46#include "opt_ipsec.h" 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/malloc.h> 51#include <sys/mbuf.h> 52#include <sys/kernel.h> 53#include <sys/lock.h> 54#include <sys/socket.h> 55#include <sys/sysctl.h> 56#include <sys/syslog.h> 57#include <net/ethernet.h> /* for ETHERTYPE_IP */ 58#include <net/if.h> 59#include <net/vnet.h> 60 61#include <netinet/in.h> 62#include <netinet/ip.h> 63#include <netinet/ip_var.h> /* ip_defttl */ 64#include <netinet/ip_fw.h> 65#include <netinet/ipfw/ip_fw_private.h> 66#include <netinet/tcp_var.h> 67#include <netinet/udp.h> 68 69#include <netinet/ip6.h> /* IN6_ARE_ADDR_EQUAL */ 70#ifdef INET6 71#include <netinet6/in6_var.h> 72#include <netinet6/ip6_var.h> 73#endif 74 75#include <machine/in_cksum.h> /* XXX for in_cksum */ 76 77#ifdef MAC 78#include <security/mac/mac_framework.h> 79#endif 80 81/* 82 * Description of dynamic rules. 83 * 84 * Dynamic rules are stored in lists accessed through a hash table 85 * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can 86 * be modified through the sysctl variable dyn_buckets which is 87 * updated when the table becomes empty. 88 * 89 * XXX currently there is only one list, ipfw_dyn. 90 * 91 * When a packet is received, its address fields are first masked 92 * with the mask defined for the rule, then hashed, then matched 93 * against the entries in the corresponding list. 94 * Dynamic rules can be used for different purposes: 95 * + stateful rules; 96 * + enforcing limits on the number of sessions; 97 * + in-kernel NAT (not implemented yet) 98 * 99 * The lifetime of dynamic rules is regulated by dyn_*_lifetime, 100 * measured in seconds and depending on the flags. 101 * 102 * The total number of dynamic rules is stored in dyn_count. 103 * The max number of dynamic rules is dyn_max. When we reach 104 * the maximum number of rules we do not create anymore. This is 105 * done to avoid consuming too much memory, but also too much 106 * time when searching on each packet (ideally, we should try instead 107 * to put a limit on the length of the list on each bucket...). 108 * 109 * Each dynamic rule holds a pointer to the parent ipfw rule so 110 * we know what action to perform. Dynamic rules are removed when 111 * the parent rule is deleted. XXX we should make them survive. 112 * 113 * There are some limitations with dynamic rules -- we do not 114 * obey the 'randomized match', and we do not do multiple 115 * passes through the firewall. XXX check the latter!!! 116 */ 117 118/* 119 * Static variables followed by global ones 120 */ 121static VNET_DEFINE(ipfw_dyn_rule **, ipfw_dyn_v); 122static VNET_DEFINE(u_int32_t, dyn_buckets); 123static VNET_DEFINE(u_int32_t, curr_dyn_buckets); 124static VNET_DEFINE(struct callout, ipfw_timeout); 125#define V_ipfw_dyn_v VNET(ipfw_dyn_v) 126#define V_dyn_buckets VNET(dyn_buckets) 127#define V_curr_dyn_buckets VNET(curr_dyn_buckets) 128#define V_ipfw_timeout VNET(ipfw_timeout) 129 130static uma_zone_t ipfw_dyn_rule_zone; 131#ifndef __FreeBSD__ 132DEFINE_SPINLOCK(ipfw_dyn_mtx); 133#else 134static struct mtx ipfw_dyn_mtx; /* mutex guarding dynamic rules */ 135#endif 136 137#define IPFW_DYN_LOCK_INIT() \ 138 mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF) 139#define IPFW_DYN_LOCK_DESTROY() mtx_destroy(&ipfw_dyn_mtx) 140#define IPFW_DYN_LOCK() mtx_lock(&ipfw_dyn_mtx) 141#define IPFW_DYN_UNLOCK() mtx_unlock(&ipfw_dyn_mtx) 142#define IPFW_DYN_LOCK_ASSERT() mtx_assert(&ipfw_dyn_mtx, MA_OWNED) 143 144void 145ipfw_dyn_unlock(void) 146{ 147 IPFW_DYN_UNLOCK(); 148} 149 150/* 151 * Timeouts for various events in handing dynamic rules. 152 */ 153static VNET_DEFINE(u_int32_t, dyn_ack_lifetime); 154static VNET_DEFINE(u_int32_t, dyn_syn_lifetime); 155static VNET_DEFINE(u_int32_t, dyn_fin_lifetime); 156static VNET_DEFINE(u_int32_t, dyn_rst_lifetime); 157static VNET_DEFINE(u_int32_t, dyn_udp_lifetime); 158static VNET_DEFINE(u_int32_t, dyn_short_lifetime); 159 160#define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) 161#define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) 162#define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) 163#define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) 164#define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) 165#define V_dyn_short_lifetime VNET(dyn_short_lifetime) 166 167/* 168 * Keepalives are sent if dyn_keepalive is set. They are sent every 169 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 170 * seconds of lifetime of a rule. 171 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower 172 * than dyn_keepalive_period. 173 */ 174 175static VNET_DEFINE(u_int32_t, dyn_keepalive_interval); 176static VNET_DEFINE(u_int32_t, dyn_keepalive_period); 177static VNET_DEFINE(u_int32_t, dyn_keepalive); 178 179#define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) 180#define V_dyn_keepalive_period VNET(dyn_keepalive_period) 181#define V_dyn_keepalive VNET(dyn_keepalive) 182 183static VNET_DEFINE(u_int32_t, dyn_count); /* # of dynamic rules */ 184static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */ 185 186#define V_dyn_count VNET(dyn_count) 187#define V_dyn_max VNET(dyn_max) 188 189#ifdef SYSCTL_NODE 190 191SYSBEGIN(f2) 192 193SYSCTL_DECL(_net_inet_ip_fw); 194SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, 195 CTLFLAG_RW, &VNET_NAME(dyn_buckets), 0, 196 "Number of dyn. buckets"); 197SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, 198 CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, 199 "Current Number of dyn. buckets"); 200SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_count, 201 CTLFLAG_RD, &VNET_NAME(dyn_count), 0, 202 "Number of dyn. rules"); 203SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_max, 204 CTLFLAG_RW, &VNET_NAME(dyn_max), 0, 205 "Max number of dyn. rules"); 206SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, 207 CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, 208 "Lifetime of dyn. rules for acks"); 209SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, 210 CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, 211 "Lifetime of dyn. rules for syn"); 212SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, 213 CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, 214 "Lifetime of dyn. rules for fin"); 215SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, 216 CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, 217 "Lifetime of dyn. rules for rst"); 218SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, 219 CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, 220 "Lifetime of dyn. rules for UDP"); 221SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, 222 CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, 223 "Lifetime of dyn. rules for other situations"); 224SYSCTL_VNET_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, 225 CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, 226 "Enable keepalives for dyn. rules"); 227 228SYSEND 229 230#endif /* SYSCTL_NODE */ 231 232 233static __inline int 234hash_packet6(struct ipfw_flow_id *id) 235{ 236 u_int32_t i; 237 i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ 238 (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ 239 (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ 240 (id->src_ip6.__u6_addr.__u6_addr32[3]) ^ 241 (id->dst_port) ^ (id->src_port); 242 return i; 243} 244 245/* 246 * IMPORTANT: the hash function for dynamic rules must be commutative 247 * in source and destination (ip,port), because rules are bidirectional 248 * and we want to find both in the same bucket. 249 */ 250static __inline int 251hash_packet(struct ipfw_flow_id *id) 252{ 253 u_int32_t i; 254 255#ifdef INET6 256 if (IS_IP6_FLOW_ID(id)) 257 i = hash_packet6(id); 258 else 259#endif /* INET6 */ 260 i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); 261 i &= (V_curr_dyn_buckets - 1); 262 return i; 263} 264 265static __inline void 266unlink_dyn_rule_print(struct ipfw_flow_id *id) 267{ 268 struct in_addr da; 269#ifdef INET6 270 char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; 271#else 272 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 273#endif 274 275#ifdef INET6 276 if (IS_IP6_FLOW_ID(id)) { 277 ip6_sprintf(src, &id->src_ip6); 278 ip6_sprintf(dst, &id->dst_ip6); 279 } else 280#endif 281 { 282 da.s_addr = htonl(id->src_ip); 283 inet_ntoa_r(da, src); 284 da.s_addr = htonl(id->dst_ip); 285 inet_ntoa_r(da, dst); 286 } 287 printf("ipfw: unlink entry %s %d -> %s %d, %d left\n", 288 src, id->src_port, dst, id->dst_port, V_dyn_count - 1); 289} 290 291/** 292 * unlink a dynamic rule from a chain. prev is a pointer to 293 * the previous one, q is a pointer to the rule to delete, 294 * head is a pointer to the head of the queue. 295 * Modifies q and potentially also head. 296 */ 297#define UNLINK_DYN_RULE(prev, head, q) { \ 298 ipfw_dyn_rule *old_q = q; \ 299 \ 300 /* remove a refcount to the parent */ \ 301 if (q->dyn_type == O_LIMIT) \ 302 q->parent->count--; \ 303 DEB(unlink_dyn_rule_print(&q->id);) \ 304 if (prev != NULL) \ 305 prev->next = q = q->next; \ 306 else \ 307 head = q = q->next; \ 308 V_dyn_count--; \ 309 uma_zfree(ipfw_dyn_rule_zone, old_q); } 310 311#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) 312 313/** 314 * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. 315 * 316 * If keep_me == NULL, rules are deleted even if not expired, 317 * otherwise only expired rules are removed. 318 * 319 * The value of the second parameter is also used to point to identify 320 * a rule we absolutely do not want to remove (e.g. because we are 321 * holding a reference to it -- this is the case with O_LIMIT_PARENT 322 * rules). The pointer is only used for comparison, so any non-null 323 * value will do. 324 */ 325static void 326remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) 327{ 328 static u_int32_t last_remove = 0; 329 330#define FORCE (keep_me == NULL) 331 332 ipfw_dyn_rule *prev, *q; 333 int i, pass = 0, max_pass = 0; 334 335 IPFW_DYN_LOCK_ASSERT(); 336 337 if (V_ipfw_dyn_v == NULL || V_dyn_count == 0) 338 return; 339 /* do not expire more than once per second, it is useless */ 340 if (!FORCE && last_remove == time_uptime) 341 return; 342 last_remove = time_uptime; 343 344 /* 345 * because O_LIMIT refer to parent rules, during the first pass only 346 * remove child and mark any pending LIMIT_PARENT, and remove 347 * them in a second pass. 348 */ 349next_pass: 350 for (i = 0 ; i < V_curr_dyn_buckets ; i++) { 351 for (prev=NULL, q = V_ipfw_dyn_v[i] ; q ; ) { 352 /* 353 * Logic can become complex here, so we split tests. 354 */ 355 if (q == keep_me) 356 goto next; 357 if (rule != NULL && rule != q->rule) 358 goto next; /* not the one we are looking for */ 359 if (q->dyn_type == O_LIMIT_PARENT) { 360 /* 361 * handle parent in the second pass, 362 * record we need one. 363 */ 364 max_pass = 1; 365 if (pass == 0) 366 goto next; 367 if (FORCE && q->count != 0 ) { 368 /* XXX should not happen! */ 369 printf("ipfw: OUCH! cannot remove rule," 370 " count %d\n", q->count); 371 } 372 } else { 373 if (!FORCE && 374 !TIME_LEQ( q->expire, time_uptime )) 375 goto next; 376 } 377 if (q->dyn_type != O_LIMIT_PARENT || !q->count) { 378 UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); 379 continue; 380 } 381next: 382 prev=q; 383 q=q->next; 384 } 385 } 386 if (pass++ < max_pass) 387 goto next_pass; 388} 389 390void 391ipfw_remove_dyn_children(struct ip_fw *rule) 392{ 393 IPFW_DYN_LOCK(); 394 remove_dyn_rule(rule, NULL /* force removal */); 395 IPFW_DYN_UNLOCK(); 396} 397 398/** 399 * lookup a dynamic rule, locked version 400 */ 401static ipfw_dyn_rule * 402lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction, 403 struct tcphdr *tcp) 404{ 405 /* 406 * stateful ipfw extensions. 407 * Lookup into dynamic session queue 408 */ 409#define MATCH_REVERSE 0 410#define MATCH_FORWARD 1 411#define MATCH_NONE 2 412#define MATCH_UNKNOWN 3 413 int i, dir = MATCH_NONE; 414 ipfw_dyn_rule *prev, *q=NULL; 415 416 IPFW_DYN_LOCK_ASSERT(); 417 418 if (V_ipfw_dyn_v == NULL) 419 goto done; /* not found */ 420 i = hash_packet( pkt ); 421 for (prev=NULL, q = V_ipfw_dyn_v[i] ; q != NULL ; ) { 422 if (q->dyn_type == O_LIMIT_PARENT && q->count) 423 goto next; 424 if (TIME_LEQ( q->expire, time_uptime)) { /* expire entry */ 425 UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); 426 continue; 427 } 428 if (pkt->proto == q->id.proto && 429 q->dyn_type != O_LIMIT_PARENT) { 430 if (IS_IP6_FLOW_ID(pkt)) { 431 if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 432 &(q->id.src_ip6)) && 433 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 434 &(q->id.dst_ip6)) && 435 pkt->src_port == q->id.src_port && 436 pkt->dst_port == q->id.dst_port ) { 437 dir = MATCH_FORWARD; 438 break; 439 } 440 if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 441 &(q->id.dst_ip6)) && 442 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 443 &(q->id.src_ip6)) && 444 pkt->src_port == q->id.dst_port && 445 pkt->dst_port == q->id.src_port ) { 446 dir = MATCH_REVERSE; 447 break; 448 } 449 } else { 450 if (pkt->src_ip == q->id.src_ip && 451 pkt->dst_ip == q->id.dst_ip && 452 pkt->src_port == q->id.src_port && 453 pkt->dst_port == q->id.dst_port ) { 454 dir = MATCH_FORWARD; 455 break; 456 } 457 if (pkt->src_ip == q->id.dst_ip && 458 pkt->dst_ip == q->id.src_ip && 459 pkt->src_port == q->id.dst_port && 460 pkt->dst_port == q->id.src_port ) { 461 dir = MATCH_REVERSE; 462 break; 463 } 464 } 465 } 466next: 467 prev = q; 468 q = q->next; 469 } 470 if (q == NULL) 471 goto done; /* q = NULL, not found */ 472 473 if ( prev != NULL) { /* found and not in front */ 474 prev->next = q->next; 475 q->next = V_ipfw_dyn_v[i]; 476 V_ipfw_dyn_v[i] = q; 477 } 478 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 479 u_char flags = pkt->_flags & (TH_FIN|TH_SYN|TH_RST); 480 481#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 482#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 483 q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); 484 switch (q->state) { 485 case TH_SYN: /* opening */ 486 q->expire = time_uptime + V_dyn_syn_lifetime; 487 break; 488 489 case BOTH_SYN: /* move to established */ 490 case BOTH_SYN | TH_FIN : /* one side tries to close */ 491 case BOTH_SYN | (TH_FIN << 8) : 492 if (tcp) { 493#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) 494 u_int32_t ack = ntohl(tcp->th_ack); 495 if (dir == MATCH_FORWARD) { 496 if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) 497 q->ack_fwd = ack; 498 else { /* ignore out-of-sequence */ 499 break; 500 } 501 } else { 502 if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) 503 q->ack_rev = ack; 504 else { /* ignore out-of-sequence */ 505 break; 506 } 507 } 508 } 509 q->expire = time_uptime + V_dyn_ack_lifetime; 510 break; 511 512 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 513 if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) 514 V_dyn_fin_lifetime = V_dyn_keepalive_period - 1; 515 q->expire = time_uptime + V_dyn_fin_lifetime; 516 break; 517 518 default: 519#if 0 520 /* 521 * reset or some invalid combination, but can also 522 * occur if we use keep-state the wrong way. 523 */ 524 if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) 525 printf("invalid state: 0x%x\n", q->state); 526#endif 527 if (V_dyn_rst_lifetime >= V_dyn_keepalive_period) 528 V_dyn_rst_lifetime = V_dyn_keepalive_period - 1; 529 q->expire = time_uptime + V_dyn_rst_lifetime; 530 break; 531 } 532 } else if (pkt->proto == IPPROTO_UDP) { 533 q->expire = time_uptime + V_dyn_udp_lifetime; 534 } else { 535 /* other protocols */ 536 q->expire = time_uptime + V_dyn_short_lifetime; 537 } 538done: 539 if (match_direction) 540 *match_direction = dir; 541 return q; 542} 543 544ipfw_dyn_rule * 545ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, 546 struct tcphdr *tcp) 547{ 548 ipfw_dyn_rule *q; 549 550 IPFW_DYN_LOCK(); 551 q = lookup_dyn_rule_locked(pkt, match_direction, tcp); 552 if (q == NULL) 553 IPFW_DYN_UNLOCK(); 554 /* NB: return table locked when q is not NULL */ 555 return q; 556} 557 558static void 559realloc_dynamic_table(void) 560{ 561 IPFW_DYN_LOCK_ASSERT(); 562 563 /* 564 * Try reallocation, make sure we have a power of 2 and do 565 * not allow more than 64k entries. In case of overflow, 566 * default to 1024. 567 */ 568 569 if (V_dyn_buckets > 65536) 570 V_dyn_buckets = 1024; 571 if ((V_dyn_buckets & (V_dyn_buckets-1)) != 0) { /* not a power of 2 */ 572 V_dyn_buckets = V_curr_dyn_buckets; /* reset */ 573 return; 574 } 575 V_curr_dyn_buckets = V_dyn_buckets; 576 if (V_ipfw_dyn_v != NULL) 577 free(V_ipfw_dyn_v, M_IPFW); 578 for (;;) { 579 V_ipfw_dyn_v = malloc(V_curr_dyn_buckets * sizeof(ipfw_dyn_rule *), 580 M_IPFW, M_NOWAIT | M_ZERO); 581 if (V_ipfw_dyn_v != NULL || V_curr_dyn_buckets <= 2) 582 break; 583 V_curr_dyn_buckets /= 2; 584 } 585} 586 587/** 588 * Install state of type 'type' for a dynamic session. 589 * The hash table contains two type of rules: 590 * - regular rules (O_KEEP_STATE) 591 * - rules for sessions with limited number of sess per user 592 * (O_LIMIT). When they are created, the parent is 593 * increased by 1, and decreased on delete. In this case, 594 * the third parameter is the parent rule and not the chain. 595 * - "parent" rules for the above (O_LIMIT_PARENT). 596 */ 597static ipfw_dyn_rule * 598add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) 599{ 600 ipfw_dyn_rule *r; 601 int i; 602 603 IPFW_DYN_LOCK_ASSERT(); 604 605 if (V_ipfw_dyn_v == NULL || 606 (V_dyn_count == 0 && V_dyn_buckets != V_curr_dyn_buckets)) { 607 realloc_dynamic_table(); 608 if (V_ipfw_dyn_v == NULL) 609 return NULL; /* failed ! */ 610 } 611 i = hash_packet(id); 612 613 r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO); 614 if (r == NULL) { 615 printf ("ipfw: sorry cannot allocate state\n"); 616 return NULL; 617 } 618 619 /* increase refcount on parent, and set pointer */ 620 if (dyn_type == O_LIMIT) { 621 ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; 622 if ( parent->dyn_type != O_LIMIT_PARENT) 623 panic("invalid parent"); 624 parent->count++; 625 r->parent = parent; 626 rule = parent->rule; 627 } 628 629 r->id = *id; 630 r->expire = time_uptime + V_dyn_syn_lifetime; 631 r->rule = rule; 632 r->dyn_type = dyn_type; 633 r->pcnt = r->bcnt = 0; 634 r->count = 0; 635 636 r->bucket = i; 637 r->next = V_ipfw_dyn_v[i]; 638 V_ipfw_dyn_v[i] = r; 639 V_dyn_count++; 640 DEB({ 641 struct in_addr da; 642#ifdef INET6 643 char src[INET6_ADDRSTRLEN]; 644 char dst[INET6_ADDRSTRLEN]; 645#else 646 char src[INET_ADDRSTRLEN]; 647 char dst[INET_ADDRSTRLEN]; 648#endif 649 650#ifdef INET6 651 if (IS_IP6_FLOW_ID(&(r->id))) { 652 ip6_sprintf(src, &r->id.src_ip6); 653 ip6_sprintf(dst, &r->id.dst_ip6); 654 } else 655#endif 656 { 657 da.s_addr = htonl(r->id.src_ip); 658 inet_ntoa_r(da, src); 659 da.s_addr = htonl(r->id.dst_ip); 660 inet_ntoa_r(da, dst); 661 } 662 printf("ipfw: add dyn entry ty %d %s %d -> %s %d, total %d\n", 663 dyn_type, src, r->id.src_port, dst, r->id.dst_port, 664 V_dyn_count); 665 }) 666 return r; 667} 668 669/** 670 * lookup dynamic parent rule using pkt and rule as search keys. 671 * If the lookup fails, then install one. 672 */ 673static ipfw_dyn_rule * 674lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) 675{ 676 ipfw_dyn_rule *q; 677 int i; 678 679 IPFW_DYN_LOCK_ASSERT(); 680 681 if (V_ipfw_dyn_v) { 682 int is_v6 = IS_IP6_FLOW_ID(pkt); 683 i = hash_packet( pkt ); 684 for (q = V_ipfw_dyn_v[i] ; q != NULL ; q=q->next) 685 if (q->dyn_type == O_LIMIT_PARENT && 686 rule== q->rule && 687 pkt->proto == q->id.proto && 688 pkt->src_port == q->id.src_port && 689 pkt->dst_port == q->id.dst_port && 690 ( 691 (is_v6 && 692 IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 693 &(q->id.src_ip6)) && 694 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 695 &(q->id.dst_ip6))) || 696 (!is_v6 && 697 pkt->src_ip == q->id.src_ip && 698 pkt->dst_ip == q->id.dst_ip) 699 ) 700 ) { 701 q->expire = time_uptime + V_dyn_short_lifetime; 702 DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);) 703 return q; 704 } 705 } 706 return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); 707} 708 709/** 710 * Install dynamic state for rule type cmd->o.opcode 711 * 712 * Returns 1 (failure) if state is not installed because of errors or because 713 * session limitations are enforced. 714 */ 715int 716ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, 717 struct ip_fw_args *args, uint32_t tablearg) 718{ 719 static int last_log; 720 ipfw_dyn_rule *q; 721 struct in_addr da; 722#ifdef INET6 723 char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2]; 724#else 725 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 726#endif 727 728 src[0] = '\0'; 729 dst[0] = '\0'; 730 731 IPFW_DYN_LOCK(); 732 733 DEB( 734#ifdef INET6 735 if (IS_IP6_FLOW_ID(&(args->f_id))) { 736 ip6_sprintf(src, &args->f_id.src_ip6); 737 ip6_sprintf(dst, &args->f_id.dst_ip6); 738 } else 739#endif 740 { 741 da.s_addr = htonl(args->f_id.src_ip); 742 inet_ntoa_r(da, src); 743 da.s_addr = htonl(args->f_id.dst_ip); 744 inet_ntoa_r(da, dst); 745 } 746 printf("ipfw: %s: type %d %s %u -> %s %u\n", 747 __func__, cmd->o.opcode, src, args->f_id.src_port, 748 dst, args->f_id.dst_port); 749 src[0] = '\0'; 750 dst[0] = '\0'; 751 ) 752 753 q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL); 754 755 if (q != NULL) { /* should never occur */ 756 DEB( 757 if (last_log != time_uptime) { 758 last_log = time_uptime; 759 printf("ipfw: %s: entry already present, done\n", 760 __func__); 761 }) 762 IPFW_DYN_UNLOCK(); 763 return (0); 764 } 765 766 if (V_dyn_count >= V_dyn_max) 767 /* Run out of slots, try to remove any expired rule. */ 768 remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); 769 770 if (V_dyn_count >= V_dyn_max) { 771 if (last_log != time_uptime) { 772 last_log = time_uptime; 773 printf("ipfw: %s: Too many dynamic rules\n", __func__); 774 } 775 IPFW_DYN_UNLOCK(); 776 return (1); /* cannot install, notify caller */ 777 } 778 779 switch (cmd->o.opcode) { 780 case O_KEEP_STATE: /* bidir rule */ 781 add_dyn_rule(&args->f_id, O_KEEP_STATE, rule); 782 break; 783 784 case O_LIMIT: { /* limit number of sessions */ 785 struct ipfw_flow_id id; 786 ipfw_dyn_rule *parent; 787 uint32_t conn_limit; 788 uint16_t limit_mask = cmd->limit_mask; 789 790 conn_limit = (cmd->conn_limit == IP_FW_TABLEARG) ? 791 tablearg : cmd->conn_limit; 792 793 DEB( 794 if (cmd->conn_limit == IP_FW_TABLEARG) 795 printf("ipfw: %s: O_LIMIT rule, conn_limit: %u " 796 "(tablearg)\n", __func__, conn_limit); 797 else 798 printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n", 799 __func__, conn_limit); 800 ) 801 802 id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0; 803 id.proto = args->f_id.proto; 804 id.addr_type = args->f_id.addr_type; 805 id.fib = M_GETFIB(args->m); 806 807 if (IS_IP6_FLOW_ID (&(args->f_id))) { 808 if (limit_mask & DYN_SRC_ADDR) 809 id.src_ip6 = args->f_id.src_ip6; 810 if (limit_mask & DYN_DST_ADDR) 811 id.dst_ip6 = args->f_id.dst_ip6; 812 } else { 813 if (limit_mask & DYN_SRC_ADDR) 814 id.src_ip = args->f_id.src_ip; 815 if (limit_mask & DYN_DST_ADDR) 816 id.dst_ip = args->f_id.dst_ip; 817 } 818 if (limit_mask & DYN_SRC_PORT) 819 id.src_port = args->f_id.src_port; 820 if (limit_mask & DYN_DST_PORT) 821 id.dst_port = args->f_id.dst_port; 822 if ((parent = lookup_dyn_parent(&id, rule)) == NULL) { 823 printf("ipfw: %s: add parent failed\n", __func__); 824 IPFW_DYN_UNLOCK(); 825 return (1); 826 } 827 828 if (parent->count >= conn_limit) { 829 /* See if we can remove some expired rule. */ 830 remove_dyn_rule(rule, parent); 831 if (parent->count >= conn_limit) { 832 if (V_fw_verbose && last_log != time_uptime) { 833 last_log = time_uptime; 834#ifdef INET6 835 /* 836 * XXX IPv6 flows are not 837 * supported yet. 838 */ 839 if (IS_IP6_FLOW_ID(&(args->f_id))) { 840 char ip6buf[INET6_ADDRSTRLEN]; 841 snprintf(src, sizeof(src), 842 "[%s]", ip6_sprintf(ip6buf, 843 &args->f_id.src_ip6)); 844 snprintf(dst, sizeof(dst), 845 "[%s]", ip6_sprintf(ip6buf, 846 &args->f_id.dst_ip6)); 847 } else 848#endif 849 { 850 da.s_addr = 851 htonl(args->f_id.src_ip); 852 inet_ntoa_r(da, src); 853 da.s_addr = 854 htonl(args->f_id.dst_ip); 855 inet_ntoa_r(da, dst); 856 } 857 log(LOG_SECURITY | LOG_DEBUG, 858 "ipfw: %d %s %s:%u -> %s:%u, %s\n", 859 parent->rule->rulenum, 860 "drop session", 861 src, (args->f_id.src_port), 862 dst, (args->f_id.dst_port), 863 "too many entries"); 864 } 865 IPFW_DYN_UNLOCK(); 866 return (1); 867 } 868 } 869 add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent); 870 break; 871 } 872 default: 873 printf("ipfw: %s: unknown dynamic rule type %u\n", 874 __func__, cmd->o.opcode); 875 IPFW_DYN_UNLOCK(); 876 return (1); 877 } 878 879 /* XXX just set lifetime */ 880 lookup_dyn_rule_locked(&args->f_id, NULL, NULL); 881 882 IPFW_DYN_UNLOCK(); 883 return (0); 884} 885 886/* 887 * Generate a TCP packet, containing either a RST or a keepalive. 888 * When flags & TH_RST, we are sending a RST packet, because of a 889 * "reset" action matched the packet. 890 * Otherwise we are sending a keepalive, and flags & TH_ 891 * The 'replyto' mbuf is the mbuf being replied to, if any, and is required 892 * so that MAC can label the reply appropriately. 893 */ 894struct mbuf * 895ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, 896 u_int32_t ack, int flags) 897{ 898 struct mbuf *m = NULL; /* stupid compiler */ 899 int len, dir; 900 struct ip *h = NULL; /* stupid compiler */ 901#ifdef INET6 902 struct ip6_hdr *h6 = NULL; 903#endif 904 struct tcphdr *th = NULL; 905 906 MGETHDR(m, M_DONTWAIT, MT_DATA); 907 if (m == NULL) 908 return (NULL); 909 910 M_SETFIB(m, id->fib); 911#ifdef MAC 912 if (replyto != NULL) 913 mac_netinet_firewall_reply(replyto, m); 914 else 915 mac_netinet_firewall_send(m); 916#else 917 (void)replyto; /* don't warn about unused arg */ 918#endif 919 920 switch (id->addr_type) { 921 case 4: 922 len = sizeof(struct ip) + sizeof(struct tcphdr); 923 break; 924#ifdef INET6 925 case 6: 926 len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 927 break; 928#endif 929 default: 930 /* XXX: log me?!? */ 931 FREE_PKT(m); 932 return (NULL); 933 } 934 dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN); 935 936 m->m_data += max_linkhdr; 937 m->m_flags |= M_SKIP_FIREWALL; 938 m->m_pkthdr.len = m->m_len = len; 939 m->m_pkthdr.rcvif = NULL; 940 bzero(m->m_data, len); 941 942 switch (id->addr_type) { 943 case 4: 944 h = mtod(m, struct ip *); 945 946 /* prepare for checksum */ 947 h->ip_p = IPPROTO_TCP; 948 h->ip_len = htons(sizeof(struct tcphdr)); 949 if (dir) { 950 h->ip_src.s_addr = htonl(id->src_ip); 951 h->ip_dst.s_addr = htonl(id->dst_ip); 952 } else { 953 h->ip_src.s_addr = htonl(id->dst_ip); 954 h->ip_dst.s_addr = htonl(id->src_ip); 955 } 956 957 th = (struct tcphdr *)(h + 1); 958 break; 959#ifdef INET6 960 case 6: 961 h6 = mtod(m, struct ip6_hdr *); 962 963 /* prepare for checksum */ 964 h6->ip6_nxt = IPPROTO_TCP; 965 h6->ip6_plen = htons(sizeof(struct tcphdr)); 966 if (dir) { 967 h6->ip6_src = id->src_ip6; 968 h6->ip6_dst = id->dst_ip6; 969 } else { 970 h6->ip6_src = id->dst_ip6; 971 h6->ip6_dst = id->src_ip6; 972 } 973 974 th = (struct tcphdr *)(h6 + 1); 975 break; 976#endif 977 } 978 979 if (dir) { 980 th->th_sport = htons(id->src_port); 981 th->th_dport = htons(id->dst_port); 982 } else { 983 th->th_sport = htons(id->dst_port); 984 th->th_dport = htons(id->src_port); 985 } 986 th->th_off = sizeof(struct tcphdr) >> 2; 987 988 if (flags & TH_RST) { 989 if (flags & TH_ACK) { 990 th->th_seq = htonl(ack); 991 th->th_flags = TH_RST; 992 } else { 993 if (flags & TH_SYN) 994 seq++; 995 th->th_ack = htonl(seq); 996 th->th_flags = TH_RST | TH_ACK; 997 } 998 } else { 999 /* 1000 * Keepalive - use caller provided sequence numbers 1001 */ 1002 th->th_seq = htonl(seq); 1003 th->th_ack = htonl(ack); 1004 th->th_flags = TH_ACK; 1005 } 1006 1007 switch (id->addr_type) { 1008 case 4: 1009 th->th_sum = in_cksum(m, len); 1010 1011 /* finish the ip header */ 1012 h->ip_v = 4; 1013 h->ip_hl = sizeof(*h) >> 2; 1014 h->ip_tos = IPTOS_LOWDELAY; 1015 h->ip_off = 0; 1016 /* ip_len must be in host format for ip_output */ 1017 h->ip_len = len; 1018 h->ip_ttl = V_ip_defttl; 1019 h->ip_sum = 0; 1020 break; 1021#ifdef INET6 1022 case 6: 1023 th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6), 1024 sizeof(struct tcphdr)); 1025 1026 /* finish the ip6 header */ 1027 h6->ip6_vfc |= IPV6_VERSION; 1028 h6->ip6_hlim = IPV6_DEFHLIM; 1029 break; 1030#endif 1031 } 1032 1033 return (m); 1034} 1035 1036/* 1037 * This procedure is only used to handle keepalives. It is invoked 1038 * every dyn_keepalive_period 1039 */ 1040static void 1041ipfw_tick(void * vnetx) 1042{ 1043 struct mbuf *m0, *m, *mnext, **mtailp; 1044#ifdef INET6 1045 struct mbuf *m6, **m6_tailp; 1046#endif 1047 int i; 1048 ipfw_dyn_rule *q; 1049#ifdef VIMAGE 1050 struct vnet *vp = vnetx; 1051#endif 1052 1053 CURVNET_SET(vp); 1054 if (V_dyn_keepalive == 0 || V_ipfw_dyn_v == NULL || V_dyn_count == 0) 1055 goto done; 1056 1057 /* 1058 * We make a chain of packets to go out here -- not deferring 1059 * until after we drop the IPFW dynamic rule lock would result 1060 * in a lock order reversal with the normal packet input -> ipfw 1061 * call stack. 1062 */ 1063 m0 = NULL; 1064 mtailp = &m0; 1065#ifdef INET6 1066 m6 = NULL; 1067 m6_tailp = &m6; 1068#endif 1069 IPFW_DYN_LOCK(); 1070 for (i = 0 ; i < V_curr_dyn_buckets ; i++) { 1071 for (q = V_ipfw_dyn_v[i] ; q ; q = q->next ) { 1072 if (q->dyn_type == O_LIMIT_PARENT) 1073 continue; 1074 if (q->id.proto != IPPROTO_TCP) 1075 continue; 1076 if ( (q->state & BOTH_SYN) != BOTH_SYN) 1077 continue; 1078 if (TIME_LEQ(time_uptime + V_dyn_keepalive_interval, 1079 q->expire)) 1080 continue; /* too early */ 1081 if (TIME_LEQ(q->expire, time_uptime)) 1082 continue; /* too late, rule expired */ 1083 1084 m = ipfw_send_pkt(NULL, &(q->id), q->ack_rev - 1, 1085 q->ack_fwd, TH_SYN); 1086 mnext = ipfw_send_pkt(NULL, &(q->id), q->ack_fwd - 1, 1087 q->ack_rev, 0); 1088 1089 switch (q->id.addr_type) { 1090 case 4: 1091 if (m != NULL) { 1092 *mtailp = m; 1093 mtailp = &(*mtailp)->m_nextpkt; 1094 } 1095 if (mnext != NULL) { 1096 *mtailp = mnext; 1097 mtailp = &(*mtailp)->m_nextpkt; 1098 } 1099 break; 1100#ifdef INET6 1101 case 6: 1102 if (m != NULL) { 1103 *m6_tailp = m; 1104 m6_tailp = &(*m6_tailp)->m_nextpkt; 1105 } 1106 if (mnext != NULL) { 1107 *m6_tailp = mnext; 1108 m6_tailp = &(*m6_tailp)->m_nextpkt; 1109 } 1110 break; 1111#endif 1112 } 1113 1114 m = mnext = NULL; 1115 } 1116 } 1117 IPFW_DYN_UNLOCK(); 1118 for (m = mnext = m0; m != NULL; m = mnext) { 1119 mnext = m->m_nextpkt; 1120 m->m_nextpkt = NULL; 1121 ip_output(m, NULL, NULL, 0, NULL, NULL); 1122 } 1123#ifdef INET6 1124 for (m = mnext = m6; m != NULL; m = mnext) { 1125 mnext = m->m_nextpkt; 1126 m->m_nextpkt = NULL; 1127 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 1128 } 1129#endif 1130done: 1131 callout_reset_on(&V_ipfw_timeout, V_dyn_keepalive_period * hz, 1132 ipfw_tick, vnetx, 0); 1133 CURVNET_RESTORE(); 1134} 1135 1136void 1137ipfw_dyn_attach(void) 1138{ 1139 ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule", 1140 sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, 1141 UMA_ALIGN_PTR, 0); 1142 1143 IPFW_DYN_LOCK_INIT(); 1144} 1145 1146void 1147ipfw_dyn_detach(void) 1148{ 1149 uma_zdestroy(ipfw_dyn_rule_zone); 1150 IPFW_DYN_LOCK_DESTROY(); 1151} 1152 1153void 1154ipfw_dyn_init(void) 1155{ 1156 V_ipfw_dyn_v = NULL; 1157 V_dyn_buckets = 256; /* must be power of 2 */ 1158 V_curr_dyn_buckets = 256; /* must be power of 2 */ 1159 1160 V_dyn_ack_lifetime = 300; 1161 V_dyn_syn_lifetime = 20; 1162 V_dyn_fin_lifetime = 1; 1163 V_dyn_rst_lifetime = 1; 1164 V_dyn_udp_lifetime = 10; 1165 V_dyn_short_lifetime = 5; 1166 1167 V_dyn_keepalive_interval = 20; 1168 V_dyn_keepalive_period = 5; 1169 V_dyn_keepalive = 1; /* do send keepalives */ 1170 1171 V_dyn_max = 4096; /* max # of dynamic rules */ 1172 callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE); 1173 callout_reset_on(&V_ipfw_timeout, hz, ipfw_tick, curvnet, 0); 1174} 1175 1176void 1177ipfw_dyn_uninit(int pass) 1178{ 1179 if (pass == 0) 1180 callout_drain(&V_ipfw_timeout); 1181 else { 1182 if (V_ipfw_dyn_v != NULL) 1183 free(V_ipfw_dyn_v, M_IPFW); 1184 } 1185} 1186 1187int 1188ipfw_dyn_len(void) 1189{ 1190 return (V_ipfw_dyn_v == NULL) ? 0 : 1191 (V_dyn_count * sizeof(ipfw_dyn_rule)); 1192} 1193 1194void 1195ipfw_get_dynamic(char **pbp, const char *ep) 1196{ 1197 ipfw_dyn_rule *p, *last = NULL; 1198 char *bp; 1199 int i; 1200 1201 if (V_ipfw_dyn_v == NULL) 1202 return; 1203 bp = *pbp; 1204 1205 IPFW_DYN_LOCK(); 1206 for (i = 0 ; i < V_curr_dyn_buckets; i++) 1207 for (p = V_ipfw_dyn_v[i] ; p != NULL; p = p->next) { 1208 if (bp + sizeof *p <= ep) { 1209 ipfw_dyn_rule *dst = 1210 (ipfw_dyn_rule *)bp; 1211 bcopy(p, dst, sizeof *p); 1212 bcopy(&(p->rule->rulenum), &(dst->rule), 1213 sizeof(p->rule->rulenum)); 1214 /* 1215 * store set number into high word of 1216 * dst->rule pointer. 1217 */ 1218 bcopy(&(p->rule->set), 1219 (char *)&dst->rule + 1220 sizeof(p->rule->rulenum), 1221 sizeof(p->rule->set)); 1222 /* 1223 * store a non-null value in "next". 1224 * The userland code will interpret a 1225 * NULL here as a marker 1226 * for the last dynamic rule. 1227 */ 1228 bcopy(&dst, &dst->next, sizeof(dst)); 1229 last = dst; 1230 dst->expire = 1231 TIME_LEQ(dst->expire, time_uptime) ? 1232 0 : dst->expire - time_uptime ; 1233 bp += sizeof(ipfw_dyn_rule); 1234 } 1235 } 1236 IPFW_DYN_UNLOCK(); 1237 if (last != NULL) /* mark last dynamic rule */ 1238 bzero(&last->next, sizeof(last)); 1239 *pbp = bp; 1240} 1241/* end of file */ 1242