ip_fw_dynamic.c revision 201122
1/*- 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h> 27__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_dynamic.c 201122 2009-12-28 10:47:04Z luigi $"); 28 29#define DEB(x) 30#define DDB(x) x 31 32/* 33 * Dynamic rule support for ipfw 34 */ 35 36#if !defined(KLD_MODULE) 37#include "opt_ipfw.h" 38#include "opt_ipdivert.h" 39#include "opt_ipdn.h" 40#include "opt_inet.h" 41#ifndef INET 42#error IPFIREWALL requires INET. 43#endif /* INET */ 44#endif 45#include "opt_inet6.h" 46#include "opt_ipsec.h" 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/malloc.h> 51#include <sys/mbuf.h> 52#include <sys/kernel.h> 53#include <sys/lock.h> 54#include <sys/socket.h> 55#include <sys/sysctl.h> 56#include <sys/syslog.h> 57#include <net/ethernet.h> /* for ETHERTYPE_IP */ 58#include <net/if.h> 59#include <net/vnet.h> 60 61#include <netinet/in.h> 62#include <netinet/ip.h> 63#include <netinet/ip_var.h> /* ip_defttl */ 64#include <netinet/ip_fw.h> 65#include <netinet/ipfw/ip_fw_private.h> 66#include <netinet/tcp_var.h> 67#include <netinet/udp.h> 68 69#include <netinet/ip6.h> /* IN6_ARE_ADDR_EQUAL */ 70#ifdef INET6 71#include <netinet6/in6_var.h> 72#include <netinet6/ip6_var.h> 73#endif 74 75#include <machine/in_cksum.h> /* XXX for in_cksum */ 76 77#ifdef MAC 78#include <security/mac/mac_framework.h> 79#endif 80 81/* 82 * Description of dynamic rules. 83 * 84 * Dynamic rules are stored in lists accessed through a hash table 85 * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can 86 * be modified through the sysctl variable dyn_buckets which is 87 * updated when the table becomes empty. 88 * 89 * XXX currently there is only one list, ipfw_dyn. 90 * 91 * When a packet is received, its address fields are first masked 92 * with the mask defined for the rule, then hashed, then matched 93 * against the entries in the corresponding list. 94 * Dynamic rules can be used for different purposes: 95 * + stateful rules; 96 * + enforcing limits on the number of sessions; 97 * + in-kernel NAT (not implemented yet) 98 * 99 * The lifetime of dynamic rules is regulated by dyn_*_lifetime, 100 * measured in seconds and depending on the flags. 101 * 102 * The total number of dynamic rules is stored in dyn_count. 103 * The max number of dynamic rules is dyn_max. When we reach 104 * the maximum number of rules we do not create anymore. This is 105 * done to avoid consuming too much memory, but also too much 106 * time when searching on each packet (ideally, we should try instead 107 * to put a limit on the length of the list on each bucket...). 108 * 109 * Each dynamic rule holds a pointer to the parent ipfw rule so 110 * we know what action to perform. Dynamic rules are removed when 111 * the parent rule is deleted. XXX we should make them survive. 112 * 113 * There are some limitations with dynamic rules -- we do not 114 * obey the 'randomized match', and we do not do multiple 115 * passes through the firewall. XXX check the latter!!! 116 */ 117 118/* 119 * Static variables followed by global ones 120 */ 121static VNET_DEFINE(ipfw_dyn_rule **, ipfw_dyn_v); 122static VNET_DEFINE(u_int32_t, dyn_buckets); 123static VNET_DEFINE(u_int32_t, curr_dyn_buckets); 124static VNET_DEFINE(struct callout, ipfw_timeout); 125#define V_ipfw_dyn_v VNET(ipfw_dyn_v) 126#define V_dyn_buckets VNET(dyn_buckets) 127#define V_curr_dyn_buckets VNET(curr_dyn_buckets) 128#define V_ipfw_timeout VNET(ipfw_timeout) 129 130static uma_zone_t ipfw_dyn_rule_zone; 131static struct mtx ipfw_dyn_mtx; /* mutex guarding dynamic rules */ 132 133#define IPFW_DYN_LOCK_INIT() \ 134 mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF) 135#define IPFW_DYN_LOCK_DESTROY() mtx_destroy(&ipfw_dyn_mtx) 136#define IPFW_DYN_LOCK() mtx_lock(&ipfw_dyn_mtx) 137#define IPFW_DYN_UNLOCK() mtx_unlock(&ipfw_dyn_mtx) 138#define IPFW_DYN_LOCK_ASSERT() mtx_assert(&ipfw_dyn_mtx, MA_OWNED) 139 140void 141ipfw_dyn_unlock(void) 142{ 143 IPFW_DYN_UNLOCK(); 144} 145 146/* 147 * Timeouts for various events in handing dynamic rules. 148 */ 149static VNET_DEFINE(u_int32_t, dyn_ack_lifetime); 150static VNET_DEFINE(u_int32_t, dyn_syn_lifetime); 151static VNET_DEFINE(u_int32_t, dyn_fin_lifetime); 152static VNET_DEFINE(u_int32_t, dyn_rst_lifetime); 153static VNET_DEFINE(u_int32_t, dyn_udp_lifetime); 154static VNET_DEFINE(u_int32_t, dyn_short_lifetime); 155 156#define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) 157#define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) 158#define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) 159#define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) 160#define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) 161#define V_dyn_short_lifetime VNET(dyn_short_lifetime) 162 163/* 164 * Keepalives are sent if dyn_keepalive is set. They are sent every 165 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 166 * seconds of lifetime of a rule. 167 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower 168 * than dyn_keepalive_period. 169 */ 170 171static VNET_DEFINE(u_int32_t, dyn_keepalive_interval); 172static VNET_DEFINE(u_int32_t, dyn_keepalive_period); 173static VNET_DEFINE(u_int32_t, dyn_keepalive); 174 175#define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) 176#define V_dyn_keepalive_period VNET(dyn_keepalive_period) 177#define V_dyn_keepalive VNET(dyn_keepalive) 178 179static VNET_DEFINE(u_int32_t, dyn_count); /* # of dynamic rules */ 180static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */ 181 182#define V_dyn_count VNET(dyn_count) 183#define V_dyn_max VNET(dyn_max) 184 185#ifdef SYSCTL_NODE 186SYSCTL_DECL(_net_inet_ip_fw); 187SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, 188 CTLFLAG_RW, &VNET_NAME(dyn_buckets), 0, 189 "Number of dyn. buckets"); 190SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, 191 CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, 192 "Current Number of dyn. buckets"); 193SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, 194 CTLFLAG_RD, &VNET_NAME(dyn_count), 0, 195 "Number of dyn. rules"); 196SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, 197 CTLFLAG_RW, &VNET_NAME(dyn_max), 0, 198 "Max number of dyn. rules"); 199SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, 200 CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, 201 "Lifetime of dyn. rules for acks"); 202SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, 203 CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, 204 "Lifetime of dyn. rules for syn"); 205SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, 206 CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, 207 "Lifetime of dyn. rules for fin"); 208SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, 209 CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, 210 "Lifetime of dyn. rules for rst"); 211SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, 212 CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, 213 "Lifetime of dyn. rules for UDP"); 214SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, 215 CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, 216 "Lifetime of dyn. rules for other situations"); 217SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, 218 CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, 219 "Enable keepalives for dyn. rules"); 220#endif /* SYSCTL_NODE */ 221 222 223static __inline int 224hash_packet6(struct ipfw_flow_id *id) 225{ 226 u_int32_t i; 227 i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ 228 (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ 229 (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ 230 (id->src_ip6.__u6_addr.__u6_addr32[3]) ^ 231 (id->dst_port) ^ (id->src_port); 232 return i; 233} 234 235/* 236 * IMPORTANT: the hash function for dynamic rules must be commutative 237 * in source and destination (ip,port), because rules are bidirectional 238 * and we want to find both in the same bucket. 239 */ 240static __inline int 241hash_packet(struct ipfw_flow_id *id) 242{ 243 u_int32_t i; 244 245#ifdef INET6 246 if (IS_IP6_FLOW_ID(id)) 247 i = hash_packet6(id); 248 else 249#endif /* INET6 */ 250 i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); 251 i &= (V_curr_dyn_buckets - 1); 252 return i; 253} 254 255static __inline void 256unlink_dyn_rule_print(struct ipfw_flow_id *id) 257{ 258 struct in_addr da; 259#ifdef INET6 260 char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; 261#else 262 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 263#endif 264 265#ifdef INET6 266 if (IS_IP6_FLOW_ID(id)) { 267 ip6_sprintf(src, &id->src_ip6); 268 ip6_sprintf(dst, &id->dst_ip6); 269 } else 270#endif 271 { 272 da.s_addr = htonl(id->src_ip); 273 inet_ntoa_r(da, src); 274 da.s_addr = htonl(id->dst_ip); 275 inet_ntoa_r(da, dst); 276 } 277 printf("ipfw: unlink entry %s %d -> %s %d, %d left\n", 278 src, id->src_port, dst, id->dst_port, V_dyn_count - 1); 279} 280 281/** 282 * unlink a dynamic rule from a chain. prev is a pointer to 283 * the previous one, q is a pointer to the rule to delete, 284 * head is a pointer to the head of the queue. 285 * Modifies q and potentially also head. 286 */ 287#define UNLINK_DYN_RULE(prev, head, q) { \ 288 ipfw_dyn_rule *old_q = q; \ 289 \ 290 /* remove a refcount to the parent */ \ 291 if (q->dyn_type == O_LIMIT) \ 292 q->parent->count--; \ 293 DEB(unlink_dyn_rule_print(&q->id);) \ 294 if (prev != NULL) \ 295 prev->next = q = q->next; \ 296 else \ 297 head = q = q->next; \ 298 V_dyn_count--; \ 299 uma_zfree(ipfw_dyn_rule_zone, old_q); } 300 301#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) 302 303/** 304 * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. 305 * 306 * If keep_me == NULL, rules are deleted even if not expired, 307 * otherwise only expired rules are removed. 308 * 309 * The value of the second parameter is also used to point to identify 310 * a rule we absolutely do not want to remove (e.g. because we are 311 * holding a reference to it -- this is the case with O_LIMIT_PARENT 312 * rules). The pointer is only used for comparison, so any non-null 313 * value will do. 314 */ 315static void 316remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) 317{ 318 static u_int32_t last_remove = 0; 319 320#define FORCE (keep_me == NULL) 321 322 ipfw_dyn_rule *prev, *q; 323 int i, pass = 0, max_pass = 0; 324 325 IPFW_DYN_LOCK_ASSERT(); 326 327 if (V_ipfw_dyn_v == NULL || V_dyn_count == 0) 328 return; 329 /* do not expire more than once per second, it is useless */ 330 if (!FORCE && last_remove == time_uptime) 331 return; 332 last_remove = time_uptime; 333 334 /* 335 * because O_LIMIT refer to parent rules, during the first pass only 336 * remove child and mark any pending LIMIT_PARENT, and remove 337 * them in a second pass. 338 */ 339next_pass: 340 for (i = 0 ; i < V_curr_dyn_buckets ; i++) { 341 for (prev=NULL, q = V_ipfw_dyn_v[i] ; q ; ) { 342 /* 343 * Logic can become complex here, so we split tests. 344 */ 345 if (q == keep_me) 346 goto next; 347 if (rule != NULL && rule != q->rule) 348 goto next; /* not the one we are looking for */ 349 if (q->dyn_type == O_LIMIT_PARENT) { 350 /* 351 * handle parent in the second pass, 352 * record we need one. 353 */ 354 max_pass = 1; 355 if (pass == 0) 356 goto next; 357 if (FORCE && q->count != 0 ) { 358 /* XXX should not happen! */ 359 printf("ipfw: OUCH! cannot remove rule," 360 " count %d\n", q->count); 361 } 362 } else { 363 if (!FORCE && 364 !TIME_LEQ( q->expire, time_uptime )) 365 goto next; 366 } 367 if (q->dyn_type != O_LIMIT_PARENT || !q->count) { 368 UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); 369 continue; 370 } 371next: 372 prev=q; 373 q=q->next; 374 } 375 } 376 if (pass++ < max_pass) 377 goto next_pass; 378} 379 380void 381ipfw_remove_dyn_children(struct ip_fw *rule) 382{ 383 IPFW_DYN_LOCK(); 384 remove_dyn_rule(rule, NULL /* force removal */); 385 IPFW_DYN_UNLOCK(); 386} 387 388/** 389 * lookup a dynamic rule, locked version 390 */ 391static ipfw_dyn_rule * 392lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction, 393 struct tcphdr *tcp) 394{ 395 /* 396 * stateful ipfw extensions. 397 * Lookup into dynamic session queue 398 */ 399#define MATCH_REVERSE 0 400#define MATCH_FORWARD 1 401#define MATCH_NONE 2 402#define MATCH_UNKNOWN 3 403 int i, dir = MATCH_NONE; 404 ipfw_dyn_rule *prev, *q=NULL; 405 406 IPFW_DYN_LOCK_ASSERT(); 407 408 if (V_ipfw_dyn_v == NULL) 409 goto done; /* not found */ 410 i = hash_packet( pkt ); 411 for (prev=NULL, q = V_ipfw_dyn_v[i] ; q != NULL ; ) { 412 if (q->dyn_type == O_LIMIT_PARENT && q->count) 413 goto next; 414 if (TIME_LEQ( q->expire, time_uptime)) { /* expire entry */ 415 UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); 416 continue; 417 } 418 if (pkt->proto == q->id.proto && 419 q->dyn_type != O_LIMIT_PARENT) { 420 if (IS_IP6_FLOW_ID(pkt)) { 421 if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 422 &(q->id.src_ip6)) && 423 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 424 &(q->id.dst_ip6)) && 425 pkt->src_port == q->id.src_port && 426 pkt->dst_port == q->id.dst_port ) { 427 dir = MATCH_FORWARD; 428 break; 429 } 430 if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 431 &(q->id.dst_ip6)) && 432 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 433 &(q->id.src_ip6)) && 434 pkt->src_port == q->id.dst_port && 435 pkt->dst_port == q->id.src_port ) { 436 dir = MATCH_REVERSE; 437 break; 438 } 439 } else { 440 if (pkt->src_ip == q->id.src_ip && 441 pkt->dst_ip == q->id.dst_ip && 442 pkt->src_port == q->id.src_port && 443 pkt->dst_port == q->id.dst_port ) { 444 dir = MATCH_FORWARD; 445 break; 446 } 447 if (pkt->src_ip == q->id.dst_ip && 448 pkt->dst_ip == q->id.src_ip && 449 pkt->src_port == q->id.dst_port && 450 pkt->dst_port == q->id.src_port ) { 451 dir = MATCH_REVERSE; 452 break; 453 } 454 } 455 } 456next: 457 prev = q; 458 q = q->next; 459 } 460 if (q == NULL) 461 goto done; /* q = NULL, not found */ 462 463 if ( prev != NULL) { /* found and not in front */ 464 prev->next = q->next; 465 q->next = V_ipfw_dyn_v[i]; 466 V_ipfw_dyn_v[i] = q; 467 } 468 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 469 u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); 470 471#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 472#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 473 q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); 474 switch (q->state) { 475 case TH_SYN: /* opening */ 476 q->expire = time_uptime + V_dyn_syn_lifetime; 477 break; 478 479 case BOTH_SYN: /* move to established */ 480 case BOTH_SYN | TH_FIN : /* one side tries to close */ 481 case BOTH_SYN | (TH_FIN << 8) : 482 if (tcp) { 483#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) 484 u_int32_t ack = ntohl(tcp->th_ack); 485 if (dir == MATCH_FORWARD) { 486 if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) 487 q->ack_fwd = ack; 488 else { /* ignore out-of-sequence */ 489 break; 490 } 491 } else { 492 if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) 493 q->ack_rev = ack; 494 else { /* ignore out-of-sequence */ 495 break; 496 } 497 } 498 } 499 q->expire = time_uptime + V_dyn_ack_lifetime; 500 break; 501 502 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 503 if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) 504 V_dyn_fin_lifetime = V_dyn_keepalive_period - 1; 505 q->expire = time_uptime + V_dyn_fin_lifetime; 506 break; 507 508 default: 509#if 0 510 /* 511 * reset or some invalid combination, but can also 512 * occur if we use keep-state the wrong way. 513 */ 514 if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) 515 printf("invalid state: 0x%x\n", q->state); 516#endif 517 if (V_dyn_rst_lifetime >= V_dyn_keepalive_period) 518 V_dyn_rst_lifetime = V_dyn_keepalive_period - 1; 519 q->expire = time_uptime + V_dyn_rst_lifetime; 520 break; 521 } 522 } else if (pkt->proto == IPPROTO_UDP) { 523 q->expire = time_uptime + V_dyn_udp_lifetime; 524 } else { 525 /* other protocols */ 526 q->expire = time_uptime + V_dyn_short_lifetime; 527 } 528done: 529 if (match_direction) 530 *match_direction = dir; 531 return q; 532} 533 534ipfw_dyn_rule * 535ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, 536 struct tcphdr *tcp) 537{ 538 ipfw_dyn_rule *q; 539 540 IPFW_DYN_LOCK(); 541 q = lookup_dyn_rule_locked(pkt, match_direction, tcp); 542 if (q == NULL) 543 IPFW_DYN_UNLOCK(); 544 /* NB: return table locked when q is not NULL */ 545 return q; 546} 547 548static void 549realloc_dynamic_table(void) 550{ 551 IPFW_DYN_LOCK_ASSERT(); 552 553 /* 554 * Try reallocation, make sure we have a power of 2 and do 555 * not allow more than 64k entries. In case of overflow, 556 * default to 1024. 557 */ 558 559 if (V_dyn_buckets > 65536) 560 V_dyn_buckets = 1024; 561 if ((V_dyn_buckets & (V_dyn_buckets-1)) != 0) { /* not a power of 2 */ 562 V_dyn_buckets = V_curr_dyn_buckets; /* reset */ 563 return; 564 } 565 V_curr_dyn_buckets = V_dyn_buckets; 566 if (V_ipfw_dyn_v != NULL) 567 free(V_ipfw_dyn_v, M_IPFW); 568 for (;;) { 569 V_ipfw_dyn_v = malloc(V_curr_dyn_buckets * sizeof(ipfw_dyn_rule *), 570 M_IPFW, M_NOWAIT | M_ZERO); 571 if (V_ipfw_dyn_v != NULL || V_curr_dyn_buckets <= 2) 572 break; 573 V_curr_dyn_buckets /= 2; 574 } 575} 576 577/** 578 * Install state of type 'type' for a dynamic session. 579 * The hash table contains two type of rules: 580 * - regular rules (O_KEEP_STATE) 581 * - rules for sessions with limited number of sess per user 582 * (O_LIMIT). When they are created, the parent is 583 * increased by 1, and decreased on delete. In this case, 584 * the third parameter is the parent rule and not the chain. 585 * - "parent" rules for the above (O_LIMIT_PARENT). 586 */ 587static ipfw_dyn_rule * 588add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) 589{ 590 ipfw_dyn_rule *r; 591 int i; 592 593 IPFW_DYN_LOCK_ASSERT(); 594 595 if (V_ipfw_dyn_v == NULL || 596 (V_dyn_count == 0 && V_dyn_buckets != V_curr_dyn_buckets)) { 597 realloc_dynamic_table(); 598 if (V_ipfw_dyn_v == NULL) 599 return NULL; /* failed ! */ 600 } 601 i = hash_packet(id); 602 603 r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO); 604 if (r == NULL) { 605 printf ("ipfw: sorry cannot allocate state\n"); 606 return NULL; 607 } 608 609 /* increase refcount on parent, and set pointer */ 610 if (dyn_type == O_LIMIT) { 611 ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; 612 if ( parent->dyn_type != O_LIMIT_PARENT) 613 panic("invalid parent"); 614 parent->count++; 615 r->parent = parent; 616 rule = parent->rule; 617 } 618 619 r->id = *id; 620 r->expire = time_uptime + V_dyn_syn_lifetime; 621 r->rule = rule; 622 r->dyn_type = dyn_type; 623 r->pcnt = r->bcnt = 0; 624 r->count = 0; 625 626 r->bucket = i; 627 r->next = V_ipfw_dyn_v[i]; 628 V_ipfw_dyn_v[i] = r; 629 V_dyn_count++; 630 DEB({ 631 struct in_addr da; 632#ifdef INET6 633 char src[INET6_ADDRSTRLEN]; 634 char dst[INET6_ADDRSTRLEN]; 635#else 636 char src[INET_ADDRSTRLEN]; 637 char dst[INET_ADDRSTRLEN]; 638#endif 639 640#ifdef INET6 641 if (IS_IP6_FLOW_ID(&(r->id))) { 642 ip6_sprintf(src, &r->id.src_ip6); 643 ip6_sprintf(dst, &r->id.dst_ip6); 644 } else 645#endif 646 { 647 da.s_addr = htonl(r->id.src_ip); 648 inet_ntoa_r(da, src); 649 da.s_addr = htonl(r->id.dst_ip); 650 inet_ntoa_r(da, dst); 651 } 652 printf("ipfw: add dyn entry ty %d %s %d -> %s %d, total %d\n", 653 dyn_type, src, r->id.src_port, dst, r->id.dst_port, 654 V_dyn_count); 655 }) 656 return r; 657} 658 659/** 660 * lookup dynamic parent rule using pkt and rule as search keys. 661 * If the lookup fails, then install one. 662 */ 663static ipfw_dyn_rule * 664lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) 665{ 666 ipfw_dyn_rule *q; 667 int i; 668 669 IPFW_DYN_LOCK_ASSERT(); 670 671 if (V_ipfw_dyn_v) { 672 int is_v6 = IS_IP6_FLOW_ID(pkt); 673 i = hash_packet( pkt ); 674 for (q = V_ipfw_dyn_v[i] ; q != NULL ; q=q->next) 675 if (q->dyn_type == O_LIMIT_PARENT && 676 rule== q->rule && 677 pkt->proto == q->id.proto && 678 pkt->src_port == q->id.src_port && 679 pkt->dst_port == q->id.dst_port && 680 ( 681 (is_v6 && 682 IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), 683 &(q->id.src_ip6)) && 684 IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), 685 &(q->id.dst_ip6))) || 686 (!is_v6 && 687 pkt->src_ip == q->id.src_ip && 688 pkt->dst_ip == q->id.dst_ip) 689 ) 690 ) { 691 q->expire = time_uptime + V_dyn_short_lifetime; 692 DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);) 693 return q; 694 } 695 } 696 return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); 697} 698 699/** 700 * Install dynamic state for rule type cmd->o.opcode 701 * 702 * Returns 1 (failure) if state is not installed because of errors or because 703 * session limitations are enforced. 704 */ 705int 706ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, 707 struct ip_fw_args *args, uint32_t tablearg) 708{ 709 static int last_log; 710 ipfw_dyn_rule *q; 711 struct in_addr da; 712#ifdef INET6 713 char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2]; 714#else 715 char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 716#endif 717 718 src[0] = '\0'; 719 dst[0] = '\0'; 720 721 IPFW_DYN_LOCK(); 722 723 DEB( 724#ifdef INET6 725 if (IS_IP6_FLOW_ID(&(args->f_id))) { 726 ip6_sprintf(src, &args->f_id.src_ip6); 727 ip6_sprintf(dst, &args->f_id.dst_ip6); 728 } else 729#endif 730 { 731 da.s_addr = htonl(args->f_id.src_ip); 732 inet_ntoa_r(da, src); 733 da.s_addr = htonl(args->f_id.dst_ip); 734 inet_ntoa_r(da, dst); 735 } 736 printf("ipfw: %s: type %d %s %u -> %s %u\n", 737 __func__, cmd->o.opcode, src, args->f_id.src_port, 738 dst, args->f_id.dst_port); 739 src[0] = '\0'; 740 dst[0] = '\0'; 741 ) 742 743 q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL); 744 745 if (q != NULL) { /* should never occur */ 746 if (last_log != time_uptime) { 747 last_log = time_uptime; 748 printf("ipfw: %s: entry already present, done\n", 749 __func__); 750 } 751 IPFW_DYN_UNLOCK(); 752 return (0); 753 } 754 755 if (V_dyn_count >= V_dyn_max) 756 /* Run out of slots, try to remove any expired rule. */ 757 remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); 758 759 if (V_dyn_count >= V_dyn_max) { 760 if (last_log != time_uptime) { 761 last_log = time_uptime; 762 printf("ipfw: %s: Too many dynamic rules\n", __func__); 763 } 764 IPFW_DYN_UNLOCK(); 765 return (1); /* cannot install, notify caller */ 766 } 767 768 switch (cmd->o.opcode) { 769 case O_KEEP_STATE: /* bidir rule */ 770 add_dyn_rule(&args->f_id, O_KEEP_STATE, rule); 771 break; 772 773 case O_LIMIT: { /* limit number of sessions */ 774 struct ipfw_flow_id id; 775 ipfw_dyn_rule *parent; 776 uint32_t conn_limit; 777 uint16_t limit_mask = cmd->limit_mask; 778 779 conn_limit = (cmd->conn_limit == IP_FW_TABLEARG) ? 780 tablearg : cmd->conn_limit; 781 782 DEB( 783 if (cmd->conn_limit == IP_FW_TABLEARG) 784 printf("ipfw: %s: O_LIMIT rule, conn_limit: %u " 785 "(tablearg)\n", __func__, conn_limit); 786 else 787 printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n", 788 __func__, conn_limit); 789 ) 790 791 id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0; 792 id.proto = args->f_id.proto; 793 id.addr_type = args->f_id.addr_type; 794 id.fib = M_GETFIB(args->m); 795 796 if (IS_IP6_FLOW_ID (&(args->f_id))) { 797 if (limit_mask & DYN_SRC_ADDR) 798 id.src_ip6 = args->f_id.src_ip6; 799 if (limit_mask & DYN_DST_ADDR) 800 id.dst_ip6 = args->f_id.dst_ip6; 801 } else { 802 if (limit_mask & DYN_SRC_ADDR) 803 id.src_ip = args->f_id.src_ip; 804 if (limit_mask & DYN_DST_ADDR) 805 id.dst_ip = args->f_id.dst_ip; 806 } 807 if (limit_mask & DYN_SRC_PORT) 808 id.src_port = args->f_id.src_port; 809 if (limit_mask & DYN_DST_PORT) 810 id.dst_port = args->f_id.dst_port; 811 if ((parent = lookup_dyn_parent(&id, rule)) == NULL) { 812 printf("ipfw: %s: add parent failed\n", __func__); 813 IPFW_DYN_UNLOCK(); 814 return (1); 815 } 816 817 if (parent->count >= conn_limit) { 818 /* See if we can remove some expired rule. */ 819 remove_dyn_rule(rule, parent); 820 if (parent->count >= conn_limit) { 821 if (V_fw_verbose && last_log != time_uptime) { 822 last_log = time_uptime; 823#ifdef INET6 824 /* 825 * XXX IPv6 flows are not 826 * supported yet. 827 */ 828 if (IS_IP6_FLOW_ID(&(args->f_id))) { 829 char ip6buf[INET6_ADDRSTRLEN]; 830 snprintf(src, sizeof(src), 831 "[%s]", ip6_sprintf(ip6buf, 832 &args->f_id.src_ip6)); 833 snprintf(dst, sizeof(dst), 834 "[%s]", ip6_sprintf(ip6buf, 835 &args->f_id.dst_ip6)); 836 } else 837#endif 838 { 839 da.s_addr = 840 htonl(args->f_id.src_ip); 841 inet_ntoa_r(da, src); 842 da.s_addr = 843 htonl(args->f_id.dst_ip); 844 inet_ntoa_r(da, dst); 845 } 846 log(LOG_SECURITY | LOG_DEBUG, 847 "ipfw: %d %s %s:%u -> %s:%u, %s\n", 848 parent->rule->rulenum, 849 "drop session", 850 src, (args->f_id.src_port), 851 dst, (args->f_id.dst_port), 852 "too many entries"); 853 } 854 IPFW_DYN_UNLOCK(); 855 return (1); 856 } 857 } 858 add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent); 859 break; 860 } 861 default: 862 printf("ipfw: %s: unknown dynamic rule type %u\n", 863 __func__, cmd->o.opcode); 864 IPFW_DYN_UNLOCK(); 865 return (1); 866 } 867 868 /* XXX just set lifetime */ 869 lookup_dyn_rule_locked(&args->f_id, NULL, NULL); 870 871 IPFW_DYN_UNLOCK(); 872 return (0); 873} 874 875/* 876 * Generate a TCP packet, containing either a RST or a keepalive. 877 * When flags & TH_RST, we are sending a RST packet, because of a 878 * "reset" action matched the packet. 879 * Otherwise we are sending a keepalive, and flags & TH_ 880 * The 'replyto' mbuf is the mbuf being replied to, if any, and is required 881 * so that MAC can label the reply appropriately. 882 */ 883struct mbuf * 884ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, 885 u_int32_t ack, int flags) 886{ 887 struct mbuf *m; 888 int len, dir; 889 struct ip *h = NULL; /* stupid compiler */ 890#ifdef INET6 891 struct ip6_hdr *h6 = NULL; 892#endif 893 struct tcphdr *th = NULL; 894 895 MGETHDR(m, M_DONTWAIT, MT_DATA); 896 if (m == NULL) 897 return (NULL); 898 899 M_SETFIB(m, id->fib); 900#ifdef MAC 901 if (replyto != NULL) 902 mac_netinet_firewall_reply(replyto, m); 903 else 904 mac_netinet_firewall_send(m); 905#else 906 (void)replyto; /* don't warn about unused arg */ 907#endif 908 909 switch (id->addr_type) { 910 case 4: 911 len = sizeof(struct ip) + sizeof(struct tcphdr); 912 break; 913#ifdef INET6 914 case 6: 915 len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 916 break; 917#endif 918 default: 919 /* XXX: log me?!? */ 920 m_freem(m); 921 return (NULL); 922 } 923 dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN); 924 925 m->m_data += max_linkhdr; 926 m->m_flags |= M_SKIP_FIREWALL; 927 m->m_pkthdr.len = m->m_len = len; 928 m->m_pkthdr.rcvif = NULL; 929 bzero(m->m_data, len); 930 931 switch (id->addr_type) { 932 case 4: 933 h = mtod(m, struct ip *); 934 935 /* prepare for checksum */ 936 h->ip_p = IPPROTO_TCP; 937 h->ip_len = htons(sizeof(struct tcphdr)); 938 if (dir) { 939 h->ip_src.s_addr = htonl(id->src_ip); 940 h->ip_dst.s_addr = htonl(id->dst_ip); 941 } else { 942 h->ip_src.s_addr = htonl(id->dst_ip); 943 h->ip_dst.s_addr = htonl(id->src_ip); 944 } 945 946 th = (struct tcphdr *)(h + 1); 947 break; 948#ifdef INET6 949 case 6: 950 h6 = mtod(m, struct ip6_hdr *); 951 952 /* prepare for checksum */ 953 h6->ip6_nxt = IPPROTO_TCP; 954 h6->ip6_plen = htons(sizeof(struct tcphdr)); 955 if (dir) { 956 h6->ip6_src = id->src_ip6; 957 h6->ip6_dst = id->dst_ip6; 958 } else { 959 h6->ip6_src = id->dst_ip6; 960 h6->ip6_dst = id->src_ip6; 961 } 962 963 th = (struct tcphdr *)(h6 + 1); 964 break; 965#endif 966 } 967 968 if (dir) { 969 th->th_sport = htons(id->src_port); 970 th->th_dport = htons(id->dst_port); 971 } else { 972 th->th_sport = htons(id->dst_port); 973 th->th_dport = htons(id->src_port); 974 } 975 th->th_off = sizeof(struct tcphdr) >> 2; 976 977 if (flags & TH_RST) { 978 if (flags & TH_ACK) { 979 th->th_seq = htonl(ack); 980 th->th_flags = TH_RST; 981 } else { 982 if (flags & TH_SYN) 983 seq++; 984 th->th_ack = htonl(seq); 985 th->th_flags = TH_RST | TH_ACK; 986 } 987 } else { 988 /* 989 * Keepalive - use caller provided sequence numbers 990 */ 991 th->th_seq = htonl(seq); 992 th->th_ack = htonl(ack); 993 th->th_flags = TH_ACK; 994 } 995 996 switch (id->addr_type) { 997 case 4: 998 th->th_sum = in_cksum(m, len); 999 1000 /* finish the ip header */ 1001 h->ip_v = 4; 1002 h->ip_hl = sizeof(*h) >> 2; 1003 h->ip_tos = IPTOS_LOWDELAY; 1004 h->ip_off = 0; 1005#ifdef HAVE_NET_IPLEN /* XXX do we handle layer2 ? */ 1006 h->ip_len = htons(len); 1007#else 1008 h->ip_len = len; 1009#endif 1010 h->ip_ttl = V_ip_defttl; 1011 h->ip_sum = 0; 1012 break; 1013#ifdef INET6 1014 case 6: 1015 th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6), 1016 sizeof(struct tcphdr)); 1017 1018 /* finish the ip6 header */ 1019 h6->ip6_vfc |= IPV6_VERSION; 1020 h6->ip6_hlim = IPV6_DEFHLIM; 1021 break; 1022#endif 1023 } 1024 1025 return (m); 1026} 1027 1028/* 1029 * This procedure is only used to handle keepalives. It is invoked 1030 * every dyn_keepalive_period 1031 */ 1032static void 1033ipfw_tick(void * vnetx) 1034{ 1035 struct mbuf *m0, *m, *mnext, **mtailp; 1036#ifdef INET6 1037 struct mbuf *m6, **m6_tailp; 1038#endif 1039 int i; 1040 ipfw_dyn_rule *q; 1041#ifdef VIMAGE 1042 struct vnet *vp = vnetx; 1043#endif 1044 1045 CURVNET_SET(vp); 1046 if (V_dyn_keepalive == 0 || V_ipfw_dyn_v == NULL || V_dyn_count == 0) 1047 goto done; 1048 1049 /* 1050 * We make a chain of packets to go out here -- not deferring 1051 * until after we drop the IPFW dynamic rule lock would result 1052 * in a lock order reversal with the normal packet input -> ipfw 1053 * call stack. 1054 */ 1055 m0 = NULL; 1056 mtailp = &m0; 1057#ifdef INET6 1058 m6 = NULL; 1059 m6_tailp = &m6; 1060#endif 1061 IPFW_DYN_LOCK(); 1062 for (i = 0 ; i < V_curr_dyn_buckets ; i++) { 1063 for (q = V_ipfw_dyn_v[i] ; q ; q = q->next ) { 1064 if (q->dyn_type == O_LIMIT_PARENT) 1065 continue; 1066 if (q->id.proto != IPPROTO_TCP) 1067 continue; 1068 if ( (q->state & BOTH_SYN) != BOTH_SYN) 1069 continue; 1070 if (TIME_LEQ(time_uptime + V_dyn_keepalive_interval, 1071 q->expire)) 1072 continue; /* too early */ 1073 if (TIME_LEQ(q->expire, time_uptime)) 1074 continue; /* too late, rule expired */ 1075 1076 m = ipfw_send_pkt(NULL, &(q->id), q->ack_rev - 1, 1077 q->ack_fwd, TH_SYN); 1078 mnext = ipfw_send_pkt(NULL, &(q->id), q->ack_fwd - 1, 1079 q->ack_rev, 0); 1080 1081 switch (q->id.addr_type) { 1082 case 4: 1083 if (m != NULL) { 1084 *mtailp = m; 1085 mtailp = &(*mtailp)->m_nextpkt; 1086 } 1087 if (mnext != NULL) { 1088 *mtailp = mnext; 1089 mtailp = &(*mtailp)->m_nextpkt; 1090 } 1091 break; 1092#ifdef INET6 1093 case 6: 1094 if (m != NULL) { 1095 *m6_tailp = m; 1096 m6_tailp = &(*m6_tailp)->m_nextpkt; 1097 } 1098 if (mnext != NULL) { 1099 *m6_tailp = mnext; 1100 m6_tailp = &(*m6_tailp)->m_nextpkt; 1101 } 1102 break; 1103#endif 1104 } 1105 1106 m = mnext = NULL; 1107 } 1108 } 1109 IPFW_DYN_UNLOCK(); 1110 for (m = mnext = m0; m != NULL; m = mnext) { 1111 mnext = m->m_nextpkt; 1112 m->m_nextpkt = NULL; 1113 ip_output(m, NULL, NULL, 0, NULL, NULL); 1114 } 1115#ifdef INET6 1116 for (m = mnext = m6; m != NULL; m = mnext) { 1117 mnext = m->m_nextpkt; 1118 m->m_nextpkt = NULL; 1119 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 1120 } 1121#endif 1122done: 1123 callout_reset(&V_ipfw_timeout, V_dyn_keepalive_period * hz, 1124 ipfw_tick, vnetx); 1125 CURVNET_RESTORE(); 1126} 1127 1128void 1129ipfw_dyn_attach(void) 1130{ 1131 ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule", 1132 sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, 1133 UMA_ALIGN_PTR, 0); 1134 1135 IPFW_DYN_LOCK_INIT(); 1136} 1137 1138void 1139ipfw_dyn_detach(void) 1140{ 1141 uma_zdestroy(ipfw_dyn_rule_zone); 1142 IPFW_DYN_LOCK_DESTROY(); 1143} 1144 1145void 1146ipfw_dyn_init(void) 1147{ 1148 V_ipfw_dyn_v = NULL; 1149 V_dyn_buckets = 256; /* must be power of 2 */ 1150 V_curr_dyn_buckets = 256; /* must be power of 2 */ 1151 1152 V_dyn_ack_lifetime = 300; 1153 V_dyn_syn_lifetime = 20; 1154 V_dyn_fin_lifetime = 1; 1155 V_dyn_rst_lifetime = 1; 1156 V_dyn_udp_lifetime = 10; 1157 V_dyn_short_lifetime = 5; 1158 1159 V_dyn_keepalive_interval = 20; 1160 V_dyn_keepalive_period = 5; 1161 V_dyn_keepalive = 1; /* do send keepalives */ 1162 1163 V_dyn_max = 4096; /* max # of dynamic rules */ 1164 callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE); 1165 callout_reset(&V_ipfw_timeout, hz, ipfw_tick, curvnet); 1166} 1167 1168void 1169ipfw_dyn_uninit(int pass) 1170{ 1171 if (pass == 0) 1172 callout_drain(&V_ipfw_timeout); 1173 else { 1174 if (V_ipfw_dyn_v != NULL) 1175 free(V_ipfw_dyn_v, M_IPFW); 1176 } 1177} 1178 1179int 1180ipfw_dyn_len(void) 1181{ 1182 return (V_ipfw_dyn_v == NULL) ? 0 : 1183 (V_dyn_count * sizeof(ipfw_dyn_rule)); 1184} 1185 1186void 1187ipfw_get_dynamic(char **pbp, const char *ep) 1188{ 1189 ipfw_dyn_rule *p, *last = NULL; 1190 char *bp; 1191 int i; 1192 1193 if (V_ipfw_dyn_v == NULL) 1194 return; 1195 bp = *pbp; 1196 1197 IPFW_DYN_LOCK(); 1198 for (i = 0 ; i < V_curr_dyn_buckets; i++) 1199 for (p = V_ipfw_dyn_v[i] ; p != NULL; p = p->next) { 1200 if (bp + sizeof *p <= ep) { 1201 ipfw_dyn_rule *dst = 1202 (ipfw_dyn_rule *)bp; 1203 bcopy(p, dst, sizeof *p); 1204 bcopy(&(p->rule->rulenum), &(dst->rule), 1205 sizeof(p->rule->rulenum)); 1206 /* 1207 * store set number into high word of 1208 * dst->rule pointer. 1209 */ 1210 bcopy(&(p->rule->set), 1211 (char *)&dst->rule + 1212 sizeof(p->rule->rulenum), 1213 sizeof(p->rule->set)); 1214 /* 1215 * store a non-null value in "next". 1216 * The userland code will interpret a 1217 * NULL here as a marker 1218 * for the last dynamic rule. 1219 */ 1220 bcopy(&dst, &dst->next, sizeof(dst)); 1221 last = dst; 1222 dst->expire = 1223 TIME_LEQ(dst->expire, time_uptime) ? 1224 0 : dst->expire - time_uptime ; 1225 bp += sizeof(ipfw_dyn_rule); 1226 } 1227 } 1228 IPFW_DYN_UNLOCK(); 1229 if (last != NULL) /* mark last dynamic rule */ 1230 bzero(&last->next, sizeof(last)); 1231 *pbp = bp; 1232} 1233/* end of file */ 1234