ip_fw_dynamic.c revision 200580
1/*- 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h> 27__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_dynamic.c 200580 2009-12-15 16:15:14Z luigi $"); 28 29#define DEB(x) 30#define DDB(x) x 31 32/* 33 * Dynamic rule support for ipfw 34 */ 35 36#if !defined(KLD_MODULE) 37#include "opt_ipfw.h" 38#include "opt_ipdivert.h" 39#include "opt_ipdn.h" 40#include "opt_inet.h" 41#ifndef INET 42#error IPFIREWALL requires INET. 43#endif /* INET */ 44#endif 45#include "opt_inet6.h" 46#include "opt_ipsec.h" 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/malloc.h> 51#include <sys/mbuf.h> 52#include <sys/kernel.h> 53#include <sys/lock.h> 54#include <sys/socket.h> 55#include <sys/sysctl.h> 56#include <sys/syslog.h> 57#include <net/ethernet.h> /* for ETHERTYPE_IP */ 58#include <net/if.h> 59#include <net/vnet.h> 60 61#include <netinet/in.h> 62#include <netinet/ip.h> 63#include <netinet/ip_var.h> /* ip_defttl */ 64#include <netinet/ip_fw.h> 65#include <netinet/ipfw/ip_fw_private.h> 66#include <netinet/tcp_var.h> 67#include <netinet/udp.h> 68 69#ifdef INET6 70#include <netinet/ip6.h> 71#include <netinet6/in6_var.h> 72#include <netinet6/ip6_var.h> 73#endif 74 75#include <machine/in_cksum.h> /* XXX for in_cksum */ 76 77#ifdef MAC 78#include <security/mac/mac_framework.h> 79#endif 80 81/* 82 * Description of dynamic rules. 83 * 84 * Dynamic rules are stored in lists accessed through a hash table 85 * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can 86 * be modified through the sysctl variable dyn_buckets which is 87 * updated when the table becomes empty. 88 * 89 * XXX currently there is only one list, ipfw_dyn. 90 * 91 * When a packet is received, its address fields are first masked 92 * with the mask defined for the rule, then hashed, then matched 93 * against the entries in the corresponding list. 94 * Dynamic rules can be used for different purposes: 95 * + stateful rules; 96 * + enforcing limits on the number of sessions; 97 * + in-kernel NAT (not implemented yet) 98 * 99 * The lifetime of dynamic rules is regulated by dyn_*_lifetime, 100 * measured in seconds and depending on the flags. 101 * 102 * The total number of dynamic rules is stored in dyn_count. 103 * The max number of dynamic rules is dyn_max. When we reach 104 * the maximum number of rules we do not create anymore. This is 105 * done to avoid consuming too much memory, but also too much 106 * time when searching on each packet (ideally, we should try instead 107 * to put a limit on the length of the list on each bucket...). 108 * 109 * Each dynamic rule holds a pointer to the parent ipfw rule so 110 * we know what action to perform. Dynamic rules are removed when 111 * the parent rule is deleted. XXX we should make them survive. 112 * 113 * There are some limitations with dynamic rules -- we do not 114 * obey the 'randomized match', and we do not do multiple 115 * passes through the firewall. XXX check the latter!!! 116 */ 117static VNET_DEFINE(ipfw_dyn_rule **, ipfw_dyn_v); 118static VNET_DEFINE(u_int32_t, dyn_buckets); 119static VNET_DEFINE(u_int32_t, curr_dyn_buckets); 120static VNET_DEFINE(struct callout, ipfw_timeout); 121#define V_ipfw_dyn_v VNET(ipfw_dyn_v) 122#define V_dyn_buckets VNET(dyn_buckets) 123#define V_curr_dyn_buckets VNET(curr_dyn_buckets) 124#define V_ipfw_timeout VNET(ipfw_timeout) 125 126static uma_zone_t ipfw_dyn_rule_zone; 127static struct mtx ipfw_dyn_mtx; /* mutex guarding dynamic rules */ 128 129#define IPFW_DYN_LOCK_INIT() \ 130 mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF) 131#define IPFW_DYN_LOCK_DESTROY() mtx_destroy(&ipfw_dyn_mtx) 132#define IPFW_DYN_LOCK() mtx_lock(&ipfw_dyn_mtx) 133#define IPFW_DYN_UNLOCK() mtx_unlock(&ipfw_dyn_mtx) 134#define IPFW_DYN_LOCK_ASSERT() mtx_assert(&ipfw_dyn_mtx, MA_OWNED) 135 136void 137ipfw_dyn_unlock(void) 138{ 139 IPFW_DYN_UNLOCK(); 140} 141 142/* 143 * Timeouts for various events in handing dynamic rules. 144 */ 145static VNET_DEFINE(u_int32_t, dyn_ack_lifetime); 146static VNET_DEFINE(u_int32_t, dyn_syn_lifetime); 147static VNET_DEFINE(u_int32_t, dyn_fin_lifetime); 148static VNET_DEFINE(u_int32_t, dyn_rst_lifetime); 149static VNET_DEFINE(u_int32_t, dyn_udp_lifetime); 150static VNET_DEFINE(u_int32_t, dyn_short_lifetime); 151 152#define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) 153#define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) 154#define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) 155#define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) 156#define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) 157#define V_dyn_short_lifetime VNET(dyn_short_lifetime) 158 159/* 160 * Keepalives are sent if dyn_keepalive is set. They are sent every 161 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 162 * seconds of lifetime of a rule. 163 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower 164 * than dyn_keepalive_period. 165 */ 166 167static VNET_DEFINE(u_int32_t, dyn_keepalive_interval); 168static VNET_DEFINE(u_int32_t, dyn_keepalive_period); 169static VNET_DEFINE(u_int32_t, dyn_keepalive); 170 171#define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) 172#define V_dyn_keepalive_period VNET(dyn_keepalive_period) 173#define V_dyn_keepalive VNET(dyn_keepalive) 174 175static VNET_DEFINE(u_int32_t, dyn_count); /* # of dynamic rules */ 176static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */ 177 178#define V_dyn_count VNET(dyn_count) 179#define V_dyn_max VNET(dyn_max) 180 181#ifdef SYSCTL_NODE 182SYSCTL_DECL(_net_inet_ip_fw); 183SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, 184 CTLFLAG_RW, &VNET_NAME(dyn_buckets), 0, 185 "Number of dyn. buckets"); 186SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, 187 CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, 188 "Current Number of dyn. buckets"); 189SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, 190 CTLFLAG_RD, &VNET_NAME(dyn_count), 0, 191 "Number of dyn. rules"); 192SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, 193 CTLFLAG_RW, &VNET_NAME(dyn_max), 0, 194 "Max number of dyn. rules"); 195SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, 196 CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, 197 "Lifetime of dyn. rules for acks"); 198SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, 199 CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, 200 "Lifetime of dyn. rules for syn"); 201SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, 202 CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, 203 "Lifetime of dyn. rules for fin"); 204SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, 205 CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, 206 "Lifetime of dyn. rules for rst"); 207SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, 208 CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, 209 "Lifetime of dyn. rules for UDP"); 210SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, 211 CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, 212 "Lifetime of dyn. rules for other situations"); 213SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, 214 CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, 215 "Enable keepalives for dyn. rules"); 216#endif /* SYSCTL_NODE */ 217 218 219static __inline int 220hash_packet6(struct ipfw_flow_id *id) 221{ 222 u_int32_t i; 223 i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ 224 (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ 225 (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ 226 (id->src_ip6.__u6_addr.__u6_addr32[3]) ^ 227 (id->dst_port) ^ (id->src_port); 228 return i; 229} 230 231/* 232 * IMPORTANT: the hash function for dynamic rules must be commutative 233 * in source and destination (ip,port), because rules are bidirectional 234 * and we want to find both in the same bucket. 235 */ 236static __inline int 237hash_packet(struct ipfw_flow_id *id) 238{ 239 u_int32_t i; 240 241#ifdef INET6 242 if (IS_IP6_FLOW_ID(id)) 243 i = hash_packet6(id); 244 else 245#endif /* INET6 */ 246 i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); 247 i &= (V_curr_dyn_buckets - 1); 248 return i; 249} 250 251static __inline void 252unlink_dyn_rule_print(struct ipfw_flow_id *id) 253{ 254 struct in_addr da; 255#ifdef INET6 256 char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; 257#else 258 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 259#endif 260 261#ifdef INET6 262 if (IS_IP6_FLOW_ID(id)) { 263 ip6_sprintf(src, &id->src_ip6); 264 ip6_sprintf(dst, &id->dst_ip6); 265 } else 266#endif 267 { 268 da.s_addr = htonl(id->src_ip); 269 inet_ntoa_r(da, src); 270 da.s_addr = htonl(id->dst_ip); 271 inet_ntoa_r(da, dst); 272 } 273 printf("ipfw: unlink entry %s %d -> %s %d, %d left\n", 274 src, id->src_port, dst, id->dst_port, V_dyn_count - 1); 275} 276 277/** 278 * unlink a dynamic rule from a chain. prev is a pointer to 279 * the previous one, q is a pointer to the rule to delete, 280 * head is a pointer to the head of the queue. 281 * Modifies q and potentially also head. 282 */ 283#define UNLINK_DYN_RULE(prev, head, q) { \ 284 ipfw_dyn_rule *old_q = q; \ 285 \ 286 /* remove a refcount to the parent */ \ 287 if (q->dyn_type == O_LIMIT) \ 288 q->parent->count--; \ 289 DEB(unlink_dyn_rule_print(&q->id);) \ 290 if (prev != NULL) \ 291 prev->next = q = q->next; \ 292 else \ 293 head = q = q->next; \ 294 V_dyn_count--; \ 295 uma_zfree(ipfw_dyn_rule_zone, old_q); } 296 297#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) 298 299/** 300 * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. 301 * 302 * If keep_me == NULL, rules are deleted even if not expired, 303 * otherwise only expired rules are removed. 304 * 305 * The value of the second parameter is also used to point to identify 306 * a rule we absolutely do not want to remove (e.g. because we are 307 * holding a reference to it -- this is the case with O_LIMIT_PARENT 308 * rules). The pointer is only used for comparison, so any non-null 309 * value will do. 310 */ 311static void 312remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) 313{ 314 static u_int32_t last_remove = 0; 315 316#define FORCE (keep_me == NULL) 317 318 ipfw_dyn_rule *prev, *q; 319 int i, pass = 0, max_pass = 0; 320 321 IPFW_DYN_LOCK_ASSERT(); 322 323 if (V_ipfw_dyn_v == NULL || V_dyn_count == 0) 324 return; 325 /* do not expire more than once per second, it is useless */ 326 if (!FORCE && last_remove == time_uptime) 327 return; 328 last_remove = time_uptime; 329 330 /* 331 * because O_LIMIT refer to parent rules, during the first pass only 332 * remove child and mark any pending LIMIT_PARENT, and remove 333 * them in a second pass. 334 */ 335next_pass: 336 for (i = 0 ; i < V_curr_dyn_buckets ; i++) { 337 for (prev=NULL, q = V_ipfw_dyn_v[i] ; q ; ) { 338 /* 339 * Logic can become complex here, so we split tests. 340 */ 341 if (q == keep_me) 342 goto next; 343 if (rule != NULL && rule != q->rule) 344 goto next; /* not the one we are looking for */ 345 if (q->dyn_type == O_LIMIT_PARENT) { 346 /* 347 * handle parent in the second pass, 348 * record we need one. 349 */ 350 max_pass = 1; 351 if (pass == 0) 352 goto next; 353 if (FORCE && q->count != 0 ) { 354 /* XXX should not happen! */ 355 printf("ipfw: OUCH! cannot remove rule," 356 " count %d\n", q->count); 357 } 358 } else { 359 if (!FORCE && 360 !TIME_LEQ( q->expire, time_uptime )) 361 goto next; 362 } 363 if (q->dyn_type != O_LIMIT_PARENT || !q->count) { 364 UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); 365 continue; 366 } 367next: 368 prev=q; 369 q=q->next; 370 } 371 } 372 if (pass++ < max_pass) 373 goto next_pass; 374} 375 376void 377remove_dyn_children(struct ip_fw *rule) 378{ 379 IPFW_DYN_LOCK(); 380 remove_dyn_rule(rule, NULL /* force removal */); 381 IPFW_DYN_UNLOCK(); 382} 383 384/** 385 * lookup a dynamic rule. 386 */ 387ipfw_dyn_rule * 388lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction, 389 struct tcphdr *tcp) 390{ 391 /* 392 * stateful ipfw extensions. 393 * Lookup into dynamic session queue 394 */ 395#define MATCH_REVERSE 0 396#define MATCH_FORWARD 1 397#define MATCH_NONE 2 398#define MATCH_UNKNOWN 3 399 int i, dir = MATCH_NONE; 400 ipfw_dyn_rule *prev, *q=NULL; 401 402 IPFW_DYN_LOCK_ASSERT(); 403 404 if (V_ipfw_dyn_v == NULL) 405 goto done; /* not found */ 406 i = hash_packet( pkt ); 407 for (prev=NULL, q = V_ipfw_dyn_v[i] ; q != NULL ; ) { 408 if (q->dyn_type == O_LIMIT_PARENT && q->count) 409 goto next; 410 if (TIME_LEQ( q->expire, time_uptime)) { /* expire entry */ 411 UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); 412 continue; 413 } 414 if (pkt->proto == q->id.proto && 415 q->dyn_type != O_LIMIT_PARENT) { 416 if (IS_IP6_FLOW_ID(pkt)) { 417 if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 418 &(q->id.src_ip6)) && 419 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 420 &(q->id.dst_ip6)) && 421 pkt->src_port == q->id.src_port && 422 pkt->dst_port == q->id.dst_port ) { 423 dir = MATCH_FORWARD; 424 break; 425 } 426 if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 427 &(q->id.dst_ip6)) && 428 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 429 &(q->id.src_ip6)) && 430 pkt->src_port == q->id.dst_port && 431 pkt->dst_port == q->id.src_port ) { 432 dir = MATCH_REVERSE; 433 break; 434 } 435 } else { 436 if (pkt->src_ip == q->id.src_ip && 437 pkt->dst_ip == q->id.dst_ip && 438 pkt->src_port == q->id.src_port && 439 pkt->dst_port == q->id.dst_port ) { 440 dir = MATCH_FORWARD; 441 break; 442 } 443 if (pkt->src_ip == q->id.dst_ip && 444 pkt->dst_ip == q->id.src_ip && 445 pkt->src_port == q->id.dst_port && 446 pkt->dst_port == q->id.src_port ) { 447 dir = MATCH_REVERSE; 448 break; 449 } 450 } 451 } 452next: 453 prev = q; 454 q = q->next; 455 } 456 if (q == NULL) 457 goto done; /* q = NULL, not found */ 458 459 if ( prev != NULL) { /* found and not in front */ 460 prev->next = q->next; 461 q->next = V_ipfw_dyn_v[i]; 462 V_ipfw_dyn_v[i] = q; 463 } 464 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 465 u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); 466 467#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 468#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 469 q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); 470 switch (q->state) { 471 case TH_SYN: /* opening */ 472 q->expire = time_uptime + V_dyn_syn_lifetime; 473 break; 474 475 case BOTH_SYN: /* move to established */ 476 case BOTH_SYN | TH_FIN : /* one side tries to close */ 477 case BOTH_SYN | (TH_FIN << 8) : 478 if (tcp) { 479#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) 480 u_int32_t ack = ntohl(tcp->th_ack); 481 if (dir == MATCH_FORWARD) { 482 if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) 483 q->ack_fwd = ack; 484 else { /* ignore out-of-sequence */ 485 break; 486 } 487 } else { 488 if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) 489 q->ack_rev = ack; 490 else { /* ignore out-of-sequence */ 491 break; 492 } 493 } 494 } 495 q->expire = time_uptime + V_dyn_ack_lifetime; 496 break; 497 498 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 499 if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) 500 V_dyn_fin_lifetime = V_dyn_keepalive_period - 1; 501 q->expire = time_uptime + V_dyn_fin_lifetime; 502 break; 503 504 default: 505#if 0 506 /* 507 * reset or some invalid combination, but can also 508 * occur if we use keep-state the wrong way. 509 */ 510 if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) 511 printf("invalid state: 0x%x\n", q->state); 512#endif 513 if (V_dyn_rst_lifetime >= V_dyn_keepalive_period) 514 V_dyn_rst_lifetime = V_dyn_keepalive_period - 1; 515 q->expire = time_uptime + V_dyn_rst_lifetime; 516 break; 517 } 518 } else if (pkt->proto == IPPROTO_UDP) { 519 q->expire = time_uptime + V_dyn_udp_lifetime; 520 } else { 521 /* other protocols */ 522 q->expire = time_uptime + V_dyn_short_lifetime; 523 } 524done: 525 if (match_direction) 526 *match_direction = dir; 527 return q; 528} 529 530ipfw_dyn_rule * 531lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, 532 struct tcphdr *tcp) 533{ 534 ipfw_dyn_rule *q; 535 536 IPFW_DYN_LOCK(); 537 q = lookup_dyn_rule_locked(pkt, match_direction, tcp); 538 if (q == NULL) 539 IPFW_DYN_UNLOCK(); 540 /* NB: return table locked when q is not NULL */ 541 return q; 542} 543 544static void 545realloc_dynamic_table(void) 546{ 547 IPFW_DYN_LOCK_ASSERT(); 548 549 /* 550 * Try reallocation, make sure we have a power of 2 and do 551 * not allow more than 64k entries. In case of overflow, 552 * default to 1024. 553 */ 554 555 if (V_dyn_buckets > 65536) 556 V_dyn_buckets = 1024; 557 if ((V_dyn_buckets & (V_dyn_buckets-1)) != 0) { /* not a power of 2 */ 558 V_dyn_buckets = V_curr_dyn_buckets; /* reset */ 559 return; 560 } 561 V_curr_dyn_buckets = V_dyn_buckets; 562 if (V_ipfw_dyn_v != NULL) 563 free(V_ipfw_dyn_v, M_IPFW); 564 for (;;) { 565 V_ipfw_dyn_v = malloc(V_curr_dyn_buckets * sizeof(ipfw_dyn_rule *), 566 M_IPFW, M_NOWAIT | M_ZERO); 567 if (V_ipfw_dyn_v != NULL || V_curr_dyn_buckets <= 2) 568 break; 569 V_curr_dyn_buckets /= 2; 570 } 571} 572 573/** 574 * Install state of type 'type' for a dynamic session. 575 * The hash table contains two type of rules: 576 * - regular rules (O_KEEP_STATE) 577 * - rules for sessions with limited number of sess per user 578 * (O_LIMIT). When they are created, the parent is 579 * increased by 1, and decreased on delete. In this case, 580 * the third parameter is the parent rule and not the chain. 581 * - "parent" rules for the above (O_LIMIT_PARENT). 582 */ 583static ipfw_dyn_rule * 584add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) 585{ 586 ipfw_dyn_rule *r; 587 int i; 588 589 IPFW_DYN_LOCK_ASSERT(); 590 591 if (V_ipfw_dyn_v == NULL || 592 (V_dyn_count == 0 && V_dyn_buckets != V_curr_dyn_buckets)) { 593 realloc_dynamic_table(); 594 if (V_ipfw_dyn_v == NULL) 595 return NULL; /* failed ! */ 596 } 597 i = hash_packet(id); 598 599 r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO); 600 if (r == NULL) { 601 printf ("ipfw: sorry cannot allocate state\n"); 602 return NULL; 603 } 604 605 /* increase refcount on parent, and set pointer */ 606 if (dyn_type == O_LIMIT) { 607 ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; 608 if ( parent->dyn_type != O_LIMIT_PARENT) 609 panic("invalid parent"); 610 parent->count++; 611 r->parent = parent; 612 rule = parent->rule; 613 } 614 615 r->id = *id; 616 r->expire = time_uptime + V_dyn_syn_lifetime; 617 r->rule = rule; 618 r->dyn_type = dyn_type; 619 r->pcnt = r->bcnt = 0; 620 r->count = 0; 621 622 r->bucket = i; 623 r->next = V_ipfw_dyn_v[i]; 624 V_ipfw_dyn_v[i] = r; 625 V_dyn_count++; 626 DEB({ 627 struct in_addr da; 628#ifdef INET6 629 char src[INET6_ADDRSTRLEN]; 630 char dst[INET6_ADDRSTRLEN]; 631#else 632 char src[INET_ADDRSTRLEN]; 633 char dst[INET_ADDRSTRLEN]; 634#endif 635 636#ifdef INET6 637 if (IS_IP6_FLOW_ID(&(r->id))) { 638 ip6_sprintf(src, &r->id.src_ip6); 639 ip6_sprintf(dst, &r->id.dst_ip6); 640 } else 641#endif 642 { 643 da.s_addr = htonl(r->id.src_ip); 644 inet_ntoa_r(da, src); 645 da.s_addr = htonl(r->id.dst_ip); 646 inet_ntoa_r(da, dst); 647 } 648 printf("ipfw: add dyn entry ty %d %s %d -> %s %d, total %d\n", 649 dyn_type, src, r->id.src_port, dst, r->id.dst_port, 650 V_dyn_count); 651 }) 652 return r; 653} 654 655/** 656 * lookup dynamic parent rule using pkt and rule as search keys. 657 * If the lookup fails, then install one. 658 */ 659static ipfw_dyn_rule * 660lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) 661{ 662 ipfw_dyn_rule *q; 663 int i; 664 665 IPFW_DYN_LOCK_ASSERT(); 666 667 if (V_ipfw_dyn_v) { 668 int is_v6 = IS_IP6_FLOW_ID(pkt); 669 i = hash_packet( pkt ); 670 for (q = V_ipfw_dyn_v[i] ; q != NULL ; q=q->next) 671 if (q->dyn_type == O_LIMIT_PARENT && 672 rule== q->rule && 673 pkt->proto == q->id.proto && 674 pkt->src_port == q->id.src_port && 675 pkt->dst_port == q->id.dst_port && 676 ( 677 (is_v6 && 678 IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 679 &(q->id.src_ip6)) && 680 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 681 &(q->id.dst_ip6))) || 682 (!is_v6 && 683 pkt->src_ip == q->id.src_ip && 684 pkt->dst_ip == q->id.dst_ip) 685 ) 686 ) { 687 q->expire = time_uptime + V_dyn_short_lifetime; 688 DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);) 689 return q; 690 } 691 } 692 return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); 693} 694 695/** 696 * Install dynamic state for rule type cmd->o.opcode 697 * 698 * Returns 1 (failure) if state is not installed because of errors or because 699 * session limitations are enforced. 700 */ 701int 702install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, 703 struct ip_fw_args *args, uint32_t tablearg) 704{ 705 static int last_log; 706 ipfw_dyn_rule *q; 707 struct in_addr da; 708#ifdef INET6 709 char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2]; 710#else 711 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 712#endif 713 714 src[0] = '\0'; 715 dst[0] = '\0'; 716 717 IPFW_DYN_LOCK(); 718 719 DEB( 720#ifdef INET6 721 if (IS_IP6_FLOW_ID(&(args->f_id))) { 722 ip6_sprintf(src, &args->f_id.src_ip6); 723 ip6_sprintf(dst, &args->f_id.dst_ip6); 724 } else 725#endif 726 { 727 da.s_addr = htonl(args->f_id.src_ip); 728 inet_ntoa_r(da, src); 729 da.s_addr = htonl(args->f_id.dst_ip); 730 inet_ntoa_r(da, dst); 731 } 732 printf("ipfw: %s: type %d %s %u -> %s %u\n", 733 __func__, cmd->o.opcode, src, args->f_id.src_port, 734 dst, args->f_id.dst_port); 735 src[0] = '\0'; 736 dst[0] = '\0'; 737 ) 738 739 q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL); 740 741 if (q != NULL) { /* should never occur */ 742 if (last_log != time_uptime) { 743 last_log = time_uptime; 744 printf("ipfw: %s: entry already present, done\n", 745 __func__); 746 } 747 IPFW_DYN_UNLOCK(); 748 return (0); 749 } 750 751 if (V_dyn_count >= V_dyn_max) 752 /* Run out of slots, try to remove any expired rule. */ 753 remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); 754 755 if (V_dyn_count >= V_dyn_max) { 756 if (last_log != time_uptime) { 757 last_log = time_uptime; 758 printf("ipfw: %s: Too many dynamic rules\n", __func__); 759 } 760 IPFW_DYN_UNLOCK(); 761 return (1); /* cannot install, notify caller */ 762 } 763 764 switch (cmd->o.opcode) { 765 case O_KEEP_STATE: /* bidir rule */ 766 add_dyn_rule(&args->f_id, O_KEEP_STATE, rule); 767 break; 768 769 case O_LIMIT: { /* limit number of sessions */ 770 struct ipfw_flow_id id; 771 ipfw_dyn_rule *parent; 772 uint32_t conn_limit; 773 uint16_t limit_mask = cmd->limit_mask; 774 775 conn_limit = (cmd->conn_limit == IP_FW_TABLEARG) ? 776 tablearg : cmd->conn_limit; 777 778 DEB( 779 if (cmd->conn_limit == IP_FW_TABLEARG) 780 printf("ipfw: %s: O_LIMIT rule, conn_limit: %u " 781 "(tablearg)\n", __func__, conn_limit); 782 else 783 printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n", 784 __func__, conn_limit); 785 ) 786 787 id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0; 788 id.proto = args->f_id.proto; 789 id.addr_type = args->f_id.addr_type; 790 id.fib = M_GETFIB(args->m); 791 792 if (IS_IP6_FLOW_ID (&(args->f_id))) { 793 if (limit_mask & DYN_SRC_ADDR) 794 id.src_ip6 = args->f_id.src_ip6; 795 if (limit_mask & DYN_DST_ADDR) 796 id.dst_ip6 = args->f_id.dst_ip6; 797 } else { 798 if (limit_mask & DYN_SRC_ADDR) 799 id.src_ip = args->f_id.src_ip; 800 if (limit_mask & DYN_DST_ADDR) 801 id.dst_ip = args->f_id.dst_ip; 802 } 803 if (limit_mask & DYN_SRC_PORT) 804 id.src_port = args->f_id.src_port; 805 if (limit_mask & DYN_DST_PORT) 806 id.dst_port = args->f_id.dst_port; 807 if ((parent = lookup_dyn_parent(&id, rule)) == NULL) { 808 printf("ipfw: %s: add parent failed\n", __func__); 809 IPFW_DYN_UNLOCK(); 810 return (1); 811 } 812 813 if (parent->count >= conn_limit) { 814 /* See if we can remove some expired rule. */ 815 remove_dyn_rule(rule, parent); 816 if (parent->count >= conn_limit) { 817 if (V_fw_verbose && last_log != time_uptime) { 818 last_log = time_uptime; 819#ifdef INET6 820 /* 821 * XXX IPv6 flows are not 822 * supported yet. 823 */ 824 if (IS_IP6_FLOW_ID(&(args->f_id))) { 825 char ip6buf[INET6_ADDRSTRLEN]; 826 snprintf(src, sizeof(src), 827 "[%s]", ip6_sprintf(ip6buf, 828 &args->f_id.src_ip6)); 829 snprintf(dst, sizeof(dst), 830 "[%s]", ip6_sprintf(ip6buf, 831 &args->f_id.dst_ip6)); 832 } else 833#endif 834 { 835 da.s_addr = 836 htonl(args->f_id.src_ip); 837 inet_ntoa_r(da, src); 838 da.s_addr = 839 htonl(args->f_id.dst_ip); 840 inet_ntoa_r(da, dst); 841 } 842 log(LOG_SECURITY | LOG_DEBUG, 843 "ipfw: %d %s %s:%u -> %s:%u, %s\n", 844 parent->rule->rulenum, 845 "drop session", 846 src, (args->f_id.src_port), 847 dst, (args->f_id.dst_port), 848 "too many entries"); 849 } 850 IPFW_DYN_UNLOCK(); 851 return (1); 852 } 853 } 854 add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent); 855 break; 856 } 857 default: 858 printf("ipfw: %s: unknown dynamic rule type %u\n", 859 __func__, cmd->o.opcode); 860 IPFW_DYN_UNLOCK(); 861 return (1); 862 } 863 864 /* XXX just set lifetime */ 865 lookup_dyn_rule_locked(&args->f_id, NULL, NULL); 866 867 IPFW_DYN_UNLOCK(); 868 return (0); 869} 870 871/* 872 * Generate a TCP packet, containing either a RST or a keepalive. 873 * When flags & TH_RST, we are sending a RST packet, because of a 874 * "reset" action matched the packet. 875 * Otherwise we are sending a keepalive, and flags & TH_ 876 * The 'replyto' mbuf is the mbuf being replied to, if any, and is required 877 * so that MAC can label the reply appropriately. 878 */ 879struct mbuf * 880send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, 881 u_int32_t ack, int flags) 882{ 883 struct mbuf *m; 884 int len, dir; 885 struct ip *h = NULL; /* stupid compiler */ 886#ifdef INET6 887 struct ip6_hdr *h6 = NULL; 888#endif 889 struct tcphdr *th = NULL; 890 891 MGETHDR(m, M_DONTWAIT, MT_DATA); 892 if (m == NULL) 893 return (NULL); 894 895 M_SETFIB(m, id->fib); 896#ifdef MAC 897 if (replyto != NULL) 898 mac_netinet_firewall_reply(replyto, m); 899 else 900 mac_netinet_firewall_send(m); 901#else 902 (void)replyto; /* don't warn about unused arg */ 903#endif 904 905 switch (id->addr_type) { 906 case 4: 907 len = sizeof(struct ip) + sizeof(struct tcphdr); 908 break; 909#ifdef INET6 910 case 6: 911 len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 912 break; 913#endif 914 default: 915 /* XXX: log me?!? */ 916 m_freem(m); 917 return (NULL); 918 } 919 dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN); 920 921 m->m_data += max_linkhdr; 922 m->m_flags |= M_SKIP_FIREWALL; 923 m->m_pkthdr.len = m->m_len = len; 924 m->m_pkthdr.rcvif = NULL; 925 bzero(m->m_data, len); 926 927 switch (id->addr_type) { 928 case 4: 929 h = mtod(m, struct ip *); 930 931 /* prepare for checksum */ 932 h->ip_p = IPPROTO_TCP; 933 h->ip_len = htons(sizeof(struct tcphdr)); 934 if (dir) { 935 h->ip_src.s_addr = htonl(id->src_ip); 936 h->ip_dst.s_addr = htonl(id->dst_ip); 937 } else { 938 h->ip_src.s_addr = htonl(id->dst_ip); 939 h->ip_dst.s_addr = htonl(id->src_ip); 940 } 941 942 th = (struct tcphdr *)(h + 1); 943 break; 944#ifdef INET6 945 case 6: 946 h6 = mtod(m, struct ip6_hdr *); 947 948 /* prepare for checksum */ 949 h6->ip6_nxt = IPPROTO_TCP; 950 h6->ip6_plen = htons(sizeof(struct tcphdr)); 951 if (dir) { 952 h6->ip6_src = id->src_ip6; 953 h6->ip6_dst = id->dst_ip6; 954 } else { 955 h6->ip6_src = id->dst_ip6; 956 h6->ip6_dst = id->src_ip6; 957 } 958 959 th = (struct tcphdr *)(h6 + 1); 960 break; 961#endif 962 } 963 964 if (dir) { 965 th->th_sport = htons(id->src_port); 966 th->th_dport = htons(id->dst_port); 967 } else { 968 th->th_sport = htons(id->dst_port); 969 th->th_dport = htons(id->src_port); 970 } 971 th->th_off = sizeof(struct tcphdr) >> 2; 972 973 if (flags & TH_RST) { 974 if (flags & TH_ACK) { 975 th->th_seq = htonl(ack); 976 th->th_flags = TH_RST; 977 } else { 978 if (flags & TH_SYN) 979 seq++; 980 th->th_ack = htonl(seq); 981 th->th_flags = TH_RST | TH_ACK; 982 } 983 } else { 984 /* 985 * Keepalive - use caller provided sequence numbers 986 */ 987 th->th_seq = htonl(seq); 988 th->th_ack = htonl(ack); 989 th->th_flags = TH_ACK; 990 } 991 992 switch (id->addr_type) { 993 case 4: 994 th->th_sum = in_cksum(m, len); 995 996 /* finish the ip header */ 997 h->ip_v = 4; 998 h->ip_hl = sizeof(*h) >> 2; 999 h->ip_tos = IPTOS_LOWDELAY; 1000 h->ip_off = 0; 1001 h->ip_len = len; 1002 h->ip_ttl = V_ip_defttl; 1003 h->ip_sum = 0; 1004 break; 1005#ifdef INET6 1006 case 6: 1007 th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6), 1008 sizeof(struct tcphdr)); 1009 1010 /* finish the ip6 header */ 1011 h6->ip6_vfc |= IPV6_VERSION; 1012 h6->ip6_hlim = IPV6_DEFHLIM; 1013 break; 1014#endif 1015 } 1016 1017 return (m); 1018} 1019 1020/* 1021 * This procedure is only used to handle keepalives. It is invoked 1022 * every dyn_keepalive_period 1023 */ 1024static void 1025ipfw_tick(void * vnetx) 1026{ 1027 struct mbuf *m0, *m, *mnext, **mtailp; 1028#ifdef INET6 1029 struct mbuf *m6, **m6_tailp; 1030#endif 1031 int i; 1032 ipfw_dyn_rule *q; 1033#ifdef VIMAGE 1034 struct vnet *vp = vnetx; 1035#endif 1036 1037 CURVNET_SET(vp); 1038 if (V_dyn_keepalive == 0 || V_ipfw_dyn_v == NULL || V_dyn_count == 0) 1039 goto done; 1040 1041 /* 1042 * We make a chain of packets to go out here -- not deferring 1043 * until after we drop the IPFW dynamic rule lock would result 1044 * in a lock order reversal with the normal packet input -> ipfw 1045 * call stack. 1046 */ 1047 m0 = NULL; 1048 mtailp = &m0; 1049#ifdef INET6 1050 m6 = NULL; 1051 m6_tailp = &m6; 1052#endif 1053 IPFW_DYN_LOCK(); 1054 for (i = 0 ; i < V_curr_dyn_buckets ; i++) { 1055 for (q = V_ipfw_dyn_v[i] ; q ; q = q->next ) { 1056 if (q->dyn_type == O_LIMIT_PARENT) 1057 continue; 1058 if (q->id.proto != IPPROTO_TCP) 1059 continue; 1060 if ( (q->state & BOTH_SYN) != BOTH_SYN) 1061 continue; 1062 if (TIME_LEQ(time_uptime + V_dyn_keepalive_interval, 1063 q->expire)) 1064 continue; /* too early */ 1065 if (TIME_LEQ(q->expire, time_uptime)) 1066 continue; /* too late, rule expired */ 1067 1068 m = send_pkt(NULL, &(q->id), q->ack_rev - 1, 1069 q->ack_fwd, TH_SYN); 1070 mnext = send_pkt(NULL, &(q->id), q->ack_fwd - 1, 1071 q->ack_rev, 0); 1072 1073 switch (q->id.addr_type) { 1074 case 4: 1075 if (m != NULL) { 1076 *mtailp = m; 1077 mtailp = &(*mtailp)->m_nextpkt; 1078 } 1079 if (mnext != NULL) { 1080 *mtailp = mnext; 1081 mtailp = &(*mtailp)->m_nextpkt; 1082 } 1083 break; 1084#ifdef INET6 1085 case 6: 1086 if (m != NULL) { 1087 *m6_tailp = m; 1088 m6_tailp = &(*m6_tailp)->m_nextpkt; 1089 } 1090 if (mnext != NULL) { 1091 *m6_tailp = mnext; 1092 m6_tailp = &(*m6_tailp)->m_nextpkt; 1093 } 1094 break; 1095#endif 1096 } 1097 1098 m = mnext = NULL; 1099 } 1100 } 1101 IPFW_DYN_UNLOCK(); 1102 for (m = mnext = m0; m != NULL; m = mnext) { 1103 mnext = m->m_nextpkt; 1104 m->m_nextpkt = NULL; 1105 ip_output(m, NULL, NULL, 0, NULL, NULL); 1106 } 1107#ifdef INET6 1108 for (m = mnext = m6; m != NULL; m = mnext) { 1109 mnext = m->m_nextpkt; 1110 m->m_nextpkt = NULL; 1111 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 1112 } 1113#endif 1114done: 1115 callout_reset(&V_ipfw_timeout, V_dyn_keepalive_period * hz, 1116 ipfw_tick, vnetx); 1117 CURVNET_RESTORE(); 1118} 1119 1120void 1121ipfw_dyn_attach(void) 1122{ 1123 ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule", 1124 sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, 1125 UMA_ALIGN_PTR, 0); 1126 1127 IPFW_DYN_LOCK_INIT(); 1128} 1129 1130void 1131ipfw_dyn_detach(void) 1132{ 1133 uma_zdestroy(ipfw_dyn_rule_zone); 1134 IPFW_DYN_LOCK_DESTROY(); 1135} 1136 1137void 1138ipfw_dyn_init(void) 1139{ 1140 V_ipfw_dyn_v = NULL; 1141 V_dyn_buckets = 256; /* must be power of 2 */ 1142 V_curr_dyn_buckets = 256; /* must be power of 2 */ 1143 1144 V_dyn_ack_lifetime = 300; 1145 V_dyn_syn_lifetime = 20; 1146 V_dyn_fin_lifetime = 1; 1147 V_dyn_rst_lifetime = 1; 1148 V_dyn_udp_lifetime = 10; 1149 V_dyn_short_lifetime = 5; 1150 1151 V_dyn_keepalive_interval = 20; 1152 V_dyn_keepalive_period = 5; 1153 V_dyn_keepalive = 1; /* do send keepalives */ 1154 1155 V_dyn_max = 4096; /* max # of dynamic rules */ 1156 callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE); 1157 callout_reset(&V_ipfw_timeout, hz, ipfw_tick, curvnet); 1158} 1159 1160void 1161ipfw_dyn_uninit(int pass) 1162{ 1163 if (pass == 0) 1164 callout_drain(&V_ipfw_timeout); 1165 else { 1166 if (V_ipfw_dyn_v != NULL) 1167 free(V_ipfw_dyn_v, M_IPFW); 1168 } 1169} 1170 1171int 1172ipfw_dyn_len(void) 1173{ 1174 return (V_ipfw_dyn_v == NULL) ? 0 : 1175 (V_dyn_count * sizeof(ipfw_dyn_rule)); 1176} 1177 1178void 1179ipfw_get_dynamic(char **pbp, const char *ep) 1180{ 1181 ipfw_dyn_rule *p, *last = NULL; 1182 char *bp; 1183 int i; 1184 1185 if (V_ipfw_dyn_v == NULL) 1186 return; 1187 bp = *pbp; 1188 1189 IPFW_DYN_LOCK(); 1190 for (i = 0 ; i < V_curr_dyn_buckets; i++) 1191 for (p = V_ipfw_dyn_v[i] ; p != NULL; p = p->next) { 1192 if (bp + sizeof *p <= ep) { 1193 ipfw_dyn_rule *dst = 1194 (ipfw_dyn_rule *)bp; 1195 bcopy(p, dst, sizeof *p); 1196 bcopy(&(p->rule->rulenum), &(dst->rule), 1197 sizeof(p->rule->rulenum)); 1198 /* 1199 * store set number into high word of 1200 * dst->rule pointer. 1201 */ 1202 bcopy(&(p->rule->set), 1203 (char *)&dst->rule + 1204 sizeof(p->rule->rulenum), 1205 sizeof(p->rule->set)); 1206 /* 1207 * store a non-null value in "next". 1208 * The userland code will interpret a 1209 * NULL here as a marker 1210 * for the last dynamic rule. 1211 */ 1212 bcopy(&dst, &dst->next, sizeof(dst)); 1213 last = dst; 1214 dst->expire = 1215 TIME_LEQ(dst->expire, time_uptime) ? 1216 0 : dst->expire - time_uptime ; 1217 bp += sizeof(ipfw_dyn_rule); 1218 } 1219 } 1220 IPFW_DYN_UNLOCK(); 1221 if (last != NULL) /* mark last dynamic rule */ 1222 bzero(&last->next, sizeof(last)); 1223 *pbp = bp; 1224} 1225