pf_lb.c revision 1.9
1/* $OpenBSD: pf_lb.c,v 1.9 2009/12/14 12:31:45 henning Exp $ */ 2 3/* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38#include "bpfilter.h" 39#include "pflog.h" 40#include "pfsync.h" 41#include "pflow.h" 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/mbuf.h> 46#include <sys/filio.h> 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/kernel.h> 50#include <sys/time.h> 51#include <sys/pool.h> 52#include <sys/proc.h> 53#include <sys/rwlock.h> 54 55#include <crypto/md5.h> 56 57#include <net/if.h> 58#include <net/if_types.h> 59#include <net/bpf.h> 60#include <net/route.h> 61#include <net/radix_mpath.h> 62 63#include <netinet/in.h> 64#include <netinet/in_var.h> 65#include <netinet/in_systm.h> 66#include <netinet/ip.h> 67#include <netinet/ip_var.h> 68#include <netinet/tcp.h> 69#include <netinet/tcp_seq.h> 70#include <netinet/udp.h> 71#include <netinet/ip_icmp.h> 72#include <netinet/in_pcb.h> 73#include <netinet/tcp_timer.h> 74#include <netinet/tcp_var.h> 75#include <netinet/udp_var.h> 76#include <netinet/icmp_var.h> 77#include <netinet/if_ether.h> 78 79#include <dev/rndvar.h> 80#include <net/pfvar.h> 81#include <net/if_pflog.h> 82#include <net/if_pflow.h> 83 84#if NPFSYNC > 0 85#include <net/if_pfsync.h> 86#endif /* NPFSYNC > 0 */ 87 88#ifdef INET6 89#include <netinet/ip6.h> 90#include <netinet/in_pcb.h> 91#include <netinet/icmp6.h> 92#include <netinet6/nd6.h> 93#endif /* INET6 */ 94 95 96#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x 97 98/* 99 * Global variables 100 */ 101 102void pf_hash(struct pf_addr *, struct pf_addr *, 103 struct pf_poolhashkey *, sa_family_t); 104int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, 105 struct pf_addr *, struct pf_addr *, u_int16_t, 106 struct pf_addr *, u_int16_t *, u_int16_t, u_int16_t, 107 struct pf_src_node **, int); 108 109#define mix(a,b,c) \ 110 do { \ 111 a -= b; a -= c; a ^= (c >> 13); \ 112 b -= c; b -= a; b ^= (a << 8); \ 113 c -= a; c -= b; c ^= (b >> 13); \ 114 a -= b; a -= c; a ^= (c >> 12); \ 115 b -= c; b -= a; b ^= (a << 16); \ 116 c -= a; c -= b; c ^= (b >> 5); \ 117 a -= b; a -= c; a ^= (c >> 3); \ 118 b -= c; b -= a; b ^= (a << 10); \ 119 c -= a; c -= b; c ^= (b >> 15); \ 120 } while (0) 121 122/* 123 * hash function based on bridge_hash in if_bridge.c 124 */ 125void 126pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 127 struct pf_poolhashkey *key, sa_family_t af) 128{ 129 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; 130 131 switch (af) { 132#ifdef INET 133 case AF_INET: 134 a += inaddr->addr32[0]; 135 b += key->key32[1]; 136 mix(a, b, c); 137 hash->addr32[0] = c + key->key32[2]; 138 break; 139#endif /* INET */ 140#ifdef INET6 141 case AF_INET6: 142 a += inaddr->addr32[0]; 143 b += inaddr->addr32[2]; 144 mix(a, b, c); 145 hash->addr32[0] = c; 146 a += inaddr->addr32[1]; 147 b += inaddr->addr32[3]; 148 c += key->key32[1]; 149 mix(a, b, c); 150 hash->addr32[1] = c; 151 a += inaddr->addr32[2]; 152 b += inaddr->addr32[1]; 153 c += key->key32[2]; 154 mix(a, b, c); 155 hash->addr32[2] = c; 156 a += inaddr->addr32[3]; 157 b += inaddr->addr32[0]; 158 c += key->key32[3]; 159 mix(a, b, c); 160 hash->addr32[3] = c; 161 break; 162#endif /* INET6 */ 163 } 164} 165 166int 167pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, 168 struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, 169 struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, 170 struct pf_src_node **sn, int rdomain) 171{ 172 struct pf_state_key_cmp key; 173 struct pf_addr init_addr; 174 u_int16_t cut; 175 176 bzero(&init_addr, sizeof(init_addr)); 177 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn, &r->nat, 178 PF_SN_NAT)) 179 return (1); 180 181 if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) { 182 if (dport == htons(ICMP6_ECHO_REQUEST) || 183 dport == htons(ICMP_ECHO)) { 184 low = 1; 185 high = 65535; 186 } else 187 return (0); /* Don't try to modify non-echo ICMP */ 188 } 189 190 do { 191 key.af = af; 192 key.proto = proto; 193 key.rdomain = rdomain; 194 PF_ACPY(&key.addr[1], daddr, key.af); 195 PF_ACPY(&key.addr[0], naddr, key.af); 196 key.port[1] = dport; 197 198 /* 199 * port search; start random, step; 200 * similar 2 portloop in in_pcbbind 201 */ 202 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || 203 proto == IPPROTO_ICMP)) { 204 /* XXX bug icmp states dont use the id on both sides */ 205 key.port[0] = dport; 206 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) 207 return (0); 208 } else if (low == 0 && high == 0) { 209 key.port[0] = *nport; 210 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) 211 return (0); 212 } else if (low == high) { 213 key.port[0] = htons(low); 214 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 215 *nport = htons(low); 216 return (0); 217 } 218 } else { 219 u_int16_t tmp; 220 221 if (low > high) { 222 tmp = low; 223 low = high; 224 high = tmp; 225 } 226 /* low < high */ 227 cut = arc4random_uniform(1 + high - low) + low; 228 /* low <= cut <= high */ 229 for (tmp = cut; tmp <= high; ++(tmp)) { 230 key.port[0] = htons(tmp); 231 if (pf_find_state_all(&key, PF_IN, NULL) == 232 NULL && !in_baddynamic(tmp, proto)) { 233 *nport = htons(tmp); 234 return (0); 235 } 236 } 237 for (tmp = cut - 1; tmp >= low; --(tmp)) { 238 key.port[0] = htons(tmp); 239 if (pf_find_state_all(&key, PF_IN, NULL) == 240 NULL && !in_baddynamic(tmp, proto)) { 241 *nport = htons(tmp); 242 return (0); 243 } 244 } 245 } 246 247 switch (r->nat.opts & PF_POOL_TYPEMASK) { 248 case PF_POOL_RANDOM: 249 case PF_POOL_ROUNDROBIN: 250 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn, 251 &r->nat, PF_SN_NAT)) 252 return (1); 253 break; 254 case PF_POOL_NONE: 255 case PF_POOL_SRCHASH: 256 case PF_POOL_BITMASK: 257 default: 258 return (1); 259 } 260 } while (! PF_AEQ(&init_addr, naddr, af) ); 261 return (1); /* none available */ 262} 263 264int 265pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 266 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns, 267 struct pf_pool *rpool, enum pf_sn_types type) 268{ 269 unsigned char hash[16]; 270 struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; 271 struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; 272 struct pf_pooladdr *acur = rpool->cur; 273 struct pf_src_node k; 274 275 if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR && 276 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 277 k.af = af; 278 k.type = type; 279 PF_ACPY(&k.addr, saddr, af); 280 k.rule.ptr = r; 281 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 282 sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 283 if (sns[type] != NULL) { 284 if (!PF_AZERO(&(sns[type])->raddr, af)) 285 PF_ACPY(naddr, &(sns[type])->raddr, af); 286 if (pf_status.debug >= PF_DEBUG_MISC) { 287 printf("pf_map_addr: src tracking (%u) maps ", 288 type); 289 pf_print_host(&k.addr, 0, af); 290 printf(" to "); 291 pf_print_host(naddr, 0, af); 292 printf("\n"); 293 } 294 return (0); 295 } 296 } 297 298 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) 299 return (1); 300 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 301 switch (af) { 302#ifdef INET 303 case AF_INET: 304 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && 305 (rpool->opts & PF_POOL_TYPEMASK) != 306 PF_POOL_ROUNDROBIN) 307 return (1); 308 raddr = &rpool->cur->addr.p.dyn->pfid_addr4; 309 rmask = &rpool->cur->addr.p.dyn->pfid_mask4; 310 break; 311#endif /* INET */ 312#ifdef INET6 313 case AF_INET6: 314 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && 315 (rpool->opts & PF_POOL_TYPEMASK) != 316 PF_POOL_ROUNDROBIN) 317 return (1); 318 raddr = &rpool->cur->addr.p.dyn->pfid_addr6; 319 rmask = &rpool->cur->addr.p.dyn->pfid_mask6; 320 break; 321#endif /* INET6 */ 322 } 323 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { 324 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) 325 return (1); /* unsupported */ 326 } else { 327 raddr = &rpool->cur->addr.v.a.addr; 328 rmask = &rpool->cur->addr.v.a.mask; 329 } 330 331 switch (rpool->opts & PF_POOL_TYPEMASK) { 332 case PF_POOL_NONE: 333 PF_ACPY(naddr, raddr, af); 334 break; 335 case PF_POOL_BITMASK: 336 PF_POOLMASK(naddr, raddr, rmask, saddr, af); 337 break; 338 case PF_POOL_RANDOM: 339 if (init_addr != NULL && PF_AZERO(init_addr, af)) { 340 switch (af) { 341#ifdef INET 342 case AF_INET: 343 rpool->counter.addr32[0] = htonl(arc4random()); 344 break; 345#endif /* INET */ 346#ifdef INET6 347 case AF_INET6: 348 if (rmask->addr32[3] != 0xffffffff) 349 rpool->counter.addr32[3] = 350 htonl(arc4random()); 351 else 352 break; 353 if (rmask->addr32[2] != 0xffffffff) 354 rpool->counter.addr32[2] = 355 htonl(arc4random()); 356 else 357 break; 358 if (rmask->addr32[1] != 0xffffffff) 359 rpool->counter.addr32[1] = 360 htonl(arc4random()); 361 else 362 break; 363 if (rmask->addr32[0] != 0xffffffff) 364 rpool->counter.addr32[0] = 365 htonl(arc4random()); 366 break; 367#endif /* INET6 */ 368 } 369 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 370 PF_ACPY(init_addr, naddr, af); 371 372 } else { 373 PF_AINC(&rpool->counter, af); 374 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 375 } 376 break; 377 case PF_POOL_SRCHASH: 378 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); 379 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); 380 break; 381 case PF_POOL_ROUNDROBIN: 382 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 383 if (!pfr_pool_get(rpool->cur->addr.p.tbl, 384 &rpool->tblidx, &rpool->counter, 385 &raddr, &rmask, af)) 386 goto get_addr; 387 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 388 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 389 &rpool->tblidx, &rpool->counter, 390 &raddr, &rmask, af)) 391 goto get_addr; 392 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 393 goto get_addr; 394 395 try_next: 396 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) 397 rpool->cur = TAILQ_FIRST(&rpool->list); 398 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 399 rpool->tblidx = -1; 400 if (pfr_pool_get(rpool->cur->addr.p.tbl, 401 &rpool->tblidx, &rpool->counter, 402 &raddr, &rmask, af)) { 403 /* table contains no address of type 'af' */ 404 if (rpool->cur != acur) 405 goto try_next; 406 return (1); 407 } 408 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 409 rpool->tblidx = -1; 410 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 411 &rpool->tblidx, &rpool->counter, 412 &raddr, &rmask, af)) { 413 /* table contains no address of type 'af' */ 414 if (rpool->cur != acur) 415 goto try_next; 416 return (1); 417 } 418 } else { 419 raddr = &rpool->cur->addr.v.a.addr; 420 rmask = &rpool->cur->addr.v.a.mask; 421 PF_ACPY(&rpool->counter, raddr, af); 422 } 423 424 get_addr: 425 PF_ACPY(naddr, &rpool->counter, af); 426 if (init_addr != NULL && PF_AZERO(init_addr, af)) 427 PF_ACPY(init_addr, naddr, af); 428 PF_AINC(&rpool->counter, af); 429 break; 430 } 431 432 if (rpool->opts & PF_POOL_STICKYADDR) { 433 if (sns[type] != NULL) { 434 pf_remove_src_node(sns[type]); 435 sns[type] = NULL; 436 } 437 if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr, 438 0)) 439 return (1); 440 } 441 442 if (pf_status.debug >= PF_DEBUG_NOISY && 443 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 444 printf("pf_map_addr: selected address "); 445 pf_print_host(naddr, 0, af); 446 printf("\n"); 447 } 448 449 return (0); 450} 451 452int 453pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd, struct pf_addr *saddr, 454 u_int16_t *sport, struct pf_addr *daddr, u_int16_t *dport, 455 struct pf_src_node **sns) 456{ 457 struct pf_addr naddr; 458 u_int16_t nport = 0; 459 460 if (!TAILQ_EMPTY(&r->nat.list)) { 461 /* XXX is this right? what if rtable is changed at the same 462 * XXX time? where do I need to figure out the sport? */ 463 if (pf_get_sport(pd->af, pd->proto, r, saddr, 464 daddr, *dport, &naddr, &nport, r->nat.proxy_port[0], 465 r->nat.proxy_port[1], sns, pd->rdomain)) { 466 DPFPRINTF(PF_DEBUG_MISC, 467 ("pf: NAT proxy port allocation " 468 "(%u-%u) failed\n", 469 r->nat.proxy_port[0], 470 r->nat.proxy_port[1])); 471 return (-1); 472 } 473 PF_ACPY(saddr, &naddr, pd->af); 474 if (nport) 475 *sport = nport; 476 } 477 if (!TAILQ_EMPTY(&r->rdr.list)) { 478 if (pf_map_addr(pd->af, r, saddr, &naddr, NULL, sns, &r->rdr, 479 PF_SN_RDR)) 480 return (-1); 481 if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) 482 PF_POOLMASK(&naddr, &naddr, &r->rdr.cur->addr.v.a.mask, 483 daddr, pd->af); 484 485 if (r->rdr.proxy_port[1]) { 486 u_int32_t tmp_nport; 487 488 tmp_nport = ((ntohs(*dport) - 489 ntohs(r->dst.port[0])) % 490 (r->rdr.proxy_port[1] - 491 r->rdr.proxy_port[0] + 1)) + 492 r->rdr.proxy_port[0]; 493 494 /* wrap around if necessary */ 495 if (tmp_nport > 65535) 496 tmp_nport -= 65535; 497 nport = htons((u_int16_t)tmp_nport); 498 } else if (r->rdr.proxy_port[0]) 499 nport = htons(r->rdr.proxy_port[0]); 500 501 PF_ACPY(daddr, &naddr, pd->af); 502 if (nport) 503 *dport = nport; 504 } 505 506 return (0); 507} 508