pf_lb.c revision 1.14
1/* $OpenBSD: pf_lb.c,v 1.14 2011/05/17 12:44:05 mikeb Exp $ */ 2 3/* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38#include "bpfilter.h" 39#include "pflog.h" 40#include "pfsync.h" 41#include "pflow.h" 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/mbuf.h> 46#include <sys/filio.h> 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/kernel.h> 50#include <sys/time.h> 51#include <sys/pool.h> 52#include <sys/proc.h> 53#include <sys/rwlock.h> 54#include <sys/syslog.h> 55 56#include <crypto/md5.h> 57 58#include <net/if.h> 59#include <net/if_types.h> 60#include <net/bpf.h> 61#include <net/route.h> 62#include <net/radix_mpath.h> 63 64#include <netinet/in.h> 65#include <netinet/in_var.h> 66#include <netinet/in_systm.h> 67#include <netinet/ip.h> 68#include <netinet/ip_var.h> 69#include <netinet/tcp.h> 70#include <netinet/tcp_seq.h> 71#include <netinet/udp.h> 72#include <netinet/ip_icmp.h> 73#include <netinet/in_pcb.h> 74#include <netinet/tcp_timer.h> 75#include <netinet/tcp_var.h> 76#include <netinet/udp_var.h> 77#include <netinet/icmp_var.h> 78#include <netinet/if_ether.h> 79 80#include <dev/rndvar.h> 81#include <net/pfvar.h> 82#include <net/if_pflog.h> 83#include <net/if_pflow.h> 84 85#if NPFSYNC > 0 86#include <net/if_pfsync.h> 87#endif /* NPFSYNC > 0 */ 88 89#ifdef INET6 90#include <netinet/ip6.h> 91#include <netinet/in_pcb.h> 92#include <netinet/icmp6.h> 93#include <netinet6/nd6.h> 94#endif /* INET6 */ 95 96 97/* 98 * Global variables 99 */ 100 101void pf_hash(struct pf_addr *, struct pf_addr *, 102 struct pf_poolhashkey *, sa_family_t); 103int pf_get_sport(struct pf_pdesc *, struct pf_rule *, 104 struct pf_addr *, u_int16_t *, u_int16_t, 105 u_int16_t, struct pf_src_node **); 106int pf_islinklocal(sa_family_t, struct pf_addr *); 107 108#define mix(a,b,c) \ 109 do { \ 110 a -= b; a -= c; a ^= (c >> 13); \ 111 b -= c; b -= a; b ^= (a << 8); \ 112 c -= a; c -= b; c ^= (b >> 13); \ 113 a -= b; a -= c; a ^= (c >> 12); \ 114 b -= c; b -= a; b ^= (a << 16); \ 115 c -= a; c -= b; c ^= (b >> 5); \ 116 a -= b; a -= c; a ^= (c >> 3); \ 117 b -= c; b -= a; b ^= (a << 10); \ 118 c -= a; c -= b; c ^= (b >> 15); \ 119 } while (0) 120 121/* 122 * hash function based on bridge_hash in if_bridge.c 123 */ 124void 125pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 126 struct pf_poolhashkey *key, sa_family_t af) 127{ 128 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; 129 130 switch (af) { 131#ifdef INET 132 case AF_INET: 133 a += inaddr->addr32[0]; 134 b += key->key32[1]; 135 mix(a, b, c); 136 hash->addr32[0] = c + key->key32[2]; 137 break; 138#endif /* INET */ 139#ifdef INET6 140 case AF_INET6: 141 a += inaddr->addr32[0]; 142 b += inaddr->addr32[2]; 143 mix(a, b, c); 144 hash->addr32[0] = c; 145 a += inaddr->addr32[1]; 146 b += inaddr->addr32[3]; 147 c += key->key32[1]; 148 mix(a, b, c); 149 hash->addr32[1] = c; 150 a += inaddr->addr32[2]; 151 b += inaddr->addr32[1]; 152 c += key->key32[2]; 153 mix(a, b, c); 154 hash->addr32[2] = c; 155 a += inaddr->addr32[3]; 156 b += inaddr->addr32[0]; 157 c += key->key32[3]; 158 mix(a, b, c); 159 hash->addr32[3] = c; 160 break; 161#endif /* INET6 */ 162 } 163} 164 165int 166pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r, 167 struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, 168 struct pf_src_node **sn) 169{ 170 struct pf_state_key_cmp key; 171 struct pf_addr init_addr; 172 u_int16_t cut; 173 174 bzero(&init_addr, sizeof(init_addr)); 175 if (pf_map_addr(pd->af, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat, 176 PF_SN_NAT)) 177 return (1); 178 179 if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) { 180 if (pd->ndport == htons(ICMP6_ECHO_REQUEST) || 181 pd->ndport == htons(ICMP_ECHO)) { 182 low = 1; 183 high = 65535; 184 } else 185 return (0); /* Don't try to modify non-echo ICMP */ 186 } 187 188 do { 189 key.af = pd->af; 190 key.proto = pd->proto; 191 key.rdomain = pd->rdomain; 192 PF_ACPY(&key.addr[0], &pd->ndaddr, key.af); 193 PF_ACPY(&key.addr[1], naddr, key.af); 194 key.port[0] = pd->ndport; 195 196 /* 197 * port search; start random, step; 198 * similar 2 portloop in in_pcbbind 199 */ 200 if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP || 201 pd->proto == IPPROTO_ICMP)) { 202 /* XXX bug: icmp states dont use the id on both 203 * XXX sides (traceroute -I through nat) */ 204 key.port[1] = pd->nsport; 205 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 206 *nport = pd->nsport; 207 return (0); 208 } 209 } else if (low == 0 && high == 0) { 210 key.port[1] = pd->nsport; 211 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 212 *nport = pd->nsport; 213 return (0); 214 } 215 } else if (low == high) { 216 key.port[1] = htons(low); 217 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 218 *nport = htons(low); 219 return (0); 220 } 221 } else { 222 u_int16_t tmp; 223 224 if (low > high) { 225 tmp = low; 226 low = high; 227 high = tmp; 228 } 229 /* low < high */ 230 cut = arc4random_uniform(1 + high - low) + low; 231 /* low <= cut <= high */ 232 for (tmp = cut; tmp <= high; ++(tmp)) { 233 key.port[1] = htons(tmp); 234 if (pf_find_state_all(&key, PF_IN, NULL) == 235 NULL && !in_baddynamic(tmp, pd->proto)) { 236 *nport = htons(tmp); 237 return (0); 238 } 239 } 240 for (tmp = cut - 1; tmp >= low; --(tmp)) { 241 key.port[1] = htons(tmp); 242 if (pf_find_state_all(&key, PF_IN, NULL) == 243 NULL && !in_baddynamic(tmp, pd->proto)) { 244 *nport = htons(tmp); 245 return (0); 246 } 247 } 248 } 249 250 switch (r->nat.opts & PF_POOL_TYPEMASK) { 251 case PF_POOL_RANDOM: 252 case PF_POOL_ROUNDROBIN: 253 if (pf_map_addr(pd->af, r, &pd->nsaddr, naddr, 254 &init_addr, sn, &r->nat, PF_SN_NAT)) 255 return (1); 256 break; 257 case PF_POOL_NONE: 258 case PF_POOL_SRCHASH: 259 case PF_POOL_BITMASK: 260 default: 261 return (1); 262 } 263 } while (! PF_AEQ(&init_addr, naddr, pd->af) ); 264 return (1); /* none available */ 265} 266 267int 268pf_islinklocal(sa_family_t af, struct pf_addr *addr) 269{ 270 if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6)) 271 return (1); 272 return (0); 273} 274 275int 276pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 277 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns, 278 struct pf_pool *rpool, enum pf_sn_types type) 279{ 280 unsigned char hash[16]; 281 struct pf_addr *raddr = &rpool->addr.v.a.addr; 282 struct pf_addr *rmask = &rpool->addr.v.a.mask; 283 struct pf_src_node k; 284 285 if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR && 286 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 287 k.af = af; 288 k.type = type; 289 PF_ACPY(&k.addr, saddr, af); 290 k.rule.ptr = r; 291 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 292 sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 293 if (sns[type] != NULL) { 294 if (!PF_AZERO(&(sns[type])->raddr, af)) 295 PF_ACPY(naddr, &(sns[type])->raddr, af); 296 if (pf_status.debug >= LOG_DEBUG) { 297 log(LOG_DEBUG, "pf: pf_map_addr: " 298 "src tracking (%u) maps ", type); 299 pf_print_host(&k.addr, 0, af); 300 addlog(" to "); 301 pf_print_host(naddr, 0, af); 302 addlog("\n"); 303 } 304 return (0); 305 } 306 } 307 308 if (rpool->addr.type == PF_ADDR_NOROUTE) 309 return (1); 310 if (rpool->addr.type == PF_ADDR_DYNIFTL) { 311 switch (af) { 312#ifdef INET 313 case AF_INET: 314 if (rpool->addr.p.dyn->pfid_acnt4 < 1 && 315 (rpool->opts & PF_POOL_TYPEMASK) != 316 PF_POOL_ROUNDROBIN) 317 return (1); 318 raddr = &rpool->addr.p.dyn->pfid_addr4; 319 rmask = &rpool->addr.p.dyn->pfid_mask4; 320 break; 321#endif /* INET */ 322#ifdef INET6 323 case AF_INET6: 324 if (rpool->addr.p.dyn->pfid_acnt6 < 1 && 325 (rpool->opts & PF_POOL_TYPEMASK) != 326 PF_POOL_ROUNDROBIN) 327 return (1); 328 raddr = &rpool->addr.p.dyn->pfid_addr6; 329 rmask = &rpool->addr.p.dyn->pfid_mask6; 330 break; 331#endif /* INET6 */ 332 } 333 } else if (rpool->addr.type == PF_ADDR_TABLE) { 334 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) 335 return (1); /* unsupported */ 336 } else { 337 raddr = &rpool->addr.v.a.addr; 338 rmask = &rpool->addr.v.a.mask; 339 } 340 341 switch (rpool->opts & PF_POOL_TYPEMASK) { 342 case PF_POOL_NONE: 343 PF_ACPY(naddr, raddr, af); 344 break; 345 case PF_POOL_BITMASK: 346 PF_POOLMASK(naddr, raddr, rmask, saddr, af); 347 break; 348 case PF_POOL_RANDOM: 349 if (init_addr != NULL && PF_AZERO(init_addr, af)) { 350 switch (af) { 351#ifdef INET 352 case AF_INET: 353 rpool->counter.addr32[0] = htonl(arc4random()); 354 break; 355#endif /* INET */ 356#ifdef INET6 357 case AF_INET6: 358 if (rmask->addr32[3] != 0xffffffff) 359 rpool->counter.addr32[3] = 360 htonl(arc4random()); 361 else 362 break; 363 if (rmask->addr32[2] != 0xffffffff) 364 rpool->counter.addr32[2] = 365 htonl(arc4random()); 366 else 367 break; 368 if (rmask->addr32[1] != 0xffffffff) 369 rpool->counter.addr32[1] = 370 htonl(arc4random()); 371 else 372 break; 373 if (rmask->addr32[0] != 0xffffffff) 374 rpool->counter.addr32[0] = 375 htonl(arc4random()); 376 break; 377#endif /* INET6 */ 378 } 379 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 380 PF_ACPY(init_addr, naddr, af); 381 382 } else { 383 PF_AINC(&rpool->counter, af); 384 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 385 } 386 break; 387 case PF_POOL_SRCHASH: 388 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); 389 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); 390 break; 391 case PF_POOL_ROUNDROBIN: 392 if (rpool->addr.type == PF_ADDR_TABLE) { 393 if (pfr_pool_get(rpool->addr.p.tbl, 394 &rpool->tblidx, &rpool->counter, 395 &raddr, &rmask, &rpool->kif, af, NULL)) 396 return (1); 397 } else if (rpool->addr.type == PF_ADDR_DYNIFTL) { 398 if (pfr_pool_get(rpool->addr.p.dyn->pfid_kt, 399 &rpool->tblidx, &rpool->counter, 400 &raddr, &rmask, &rpool->kif, af, pf_islinklocal)) 401 return (1); 402 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 403 return (1); 404 405 PF_ACPY(naddr, &rpool->counter, af); 406 if (init_addr != NULL && PF_AZERO(init_addr, af)) 407 PF_ACPY(init_addr, naddr, af); 408 PF_AINC(&rpool->counter, af); 409 break; 410 } 411 412 if (rpool->opts & PF_POOL_STICKYADDR) { 413 if (sns[type] != NULL) { 414 pf_remove_src_node(sns[type]); 415 sns[type] = NULL; 416 } 417 if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr, 418 0)) 419 return (1); 420 } 421 422 if (pf_status.debug >= LOG_NOTICE && 423 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 424 log(LOG_NOTICE, "pf: pf_map_addr: selected address "); 425 pf_print_host(naddr, 0, af); 426 addlog("\n"); 427 } 428 429 return (0); 430} 431 432int 433pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd, 434 struct pf_src_node **sns) 435{ 436 struct pf_addr naddr; 437 u_int16_t nport = 0; 438 439 if (r->nat.addr.type != PF_ADDR_NONE) { 440 /* XXX is this right? what if rtable is changed at the same 441 * XXX time? where do I need to figure out the sport? */ 442 if (pf_get_sport(pd, r, &naddr, &nport, 443 r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) { 444 DPFPRINTF(LOG_NOTICE, 445 "pf: NAT proxy port allocation (%u-%u) failed", 446 r->nat.proxy_port[0], 447 r->nat.proxy_port[1]); 448 return (-1); 449 } 450 PF_ACPY(&pd->nsaddr, &naddr, pd->af); 451 pd->nsport = nport; 452 } 453 if (r->rdr.addr.type != PF_ADDR_NONE) { 454 if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns, 455 &r->rdr, PF_SN_RDR)) 456 return (-1); 457 if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) 458 PF_POOLMASK(&naddr, &naddr, &r->rdr.addr.v.a.mask, 459 &pd->ndaddr, pd->af); 460 461 if (r->rdr.proxy_port[1]) { 462 u_int32_t tmp_nport; 463 464 tmp_nport = ((ntohs(pd->ndport) - 465 ntohs(r->dst.port[0])) % 466 (r->rdr.proxy_port[1] - 467 r->rdr.proxy_port[0] + 1)) + 468 r->rdr.proxy_port[0]; 469 470 /* wrap around if necessary */ 471 if (tmp_nport > 65535) 472 tmp_nport -= 65535; 473 nport = htons((u_int16_t)tmp_nport); 474 } else if (r->rdr.proxy_port[0]) 475 nport = htons(r->rdr.proxy_port[0]); 476 477 PF_ACPY(&pd->ndaddr, &naddr, pd->af); 478 if (nport) 479 pd->ndport = nport; 480 } 481 482 return (0); 483} 484