pf_lb.c revision 1.9
1/*	$OpenBSD: pf_lb.c,v 1.9 2009/12/14 12:31:45 henning Exp $ */
2
3/*
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *    - Redistributions of source code must retain the above copyright
13 *      notice, this list of conditions and the following disclaimer.
14 *    - Redistributions in binary form must reproduce the above
15 *      copyright notice, this list of conditions and the following
16 *      disclaimer in the documentation and/or other materials provided
17 *      with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38#include "bpfilter.h"
39#include "pflog.h"
40#include "pfsync.h"
41#include "pflow.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/mbuf.h>
46#include <sys/filio.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/kernel.h>
50#include <sys/time.h>
51#include <sys/pool.h>
52#include <sys/proc.h>
53#include <sys/rwlock.h>
54
55#include <crypto/md5.h>
56
57#include <net/if.h>
58#include <net/if_types.h>
59#include <net/bpf.h>
60#include <net/route.h>
61#include <net/radix_mpath.h>
62
63#include <netinet/in.h>
64#include <netinet/in_var.h>
65#include <netinet/in_systm.h>
66#include <netinet/ip.h>
67#include <netinet/ip_var.h>
68#include <netinet/tcp.h>
69#include <netinet/tcp_seq.h>
70#include <netinet/udp.h>
71#include <netinet/ip_icmp.h>
72#include <netinet/in_pcb.h>
73#include <netinet/tcp_timer.h>
74#include <netinet/tcp_var.h>
75#include <netinet/udp_var.h>
76#include <netinet/icmp_var.h>
77#include <netinet/if_ether.h>
78
79#include <dev/rndvar.h>
80#include <net/pfvar.h>
81#include <net/if_pflog.h>
82#include <net/if_pflow.h>
83
84#if NPFSYNC > 0
85#include <net/if_pfsync.h>
86#endif /* NPFSYNC > 0 */
87
88#ifdef INET6
89#include <netinet/ip6.h>
90#include <netinet/in_pcb.h>
91#include <netinet/icmp6.h>
92#include <netinet6/nd6.h>
93#endif /* INET6 */
94
95
96#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
97
98/*
99 * Global variables
100 */
101
102void			 pf_hash(struct pf_addr *, struct pf_addr *,
103			    struct pf_poolhashkey *, sa_family_t);
104int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
105			    struct pf_addr *, struct pf_addr *, u_int16_t,
106			    struct pf_addr *, u_int16_t *, u_int16_t, u_int16_t,
107			    struct pf_src_node **, int);
108
109#define mix(a,b,c) \
110	do {					\
111		a -= b; a -= c; a ^= (c >> 13);	\
112		b -= c; b -= a; b ^= (a << 8);	\
113		c -= a; c -= b; c ^= (b >> 13);	\
114		a -= b; a -= c; a ^= (c >> 12);	\
115		b -= c; b -= a; b ^= (a << 16);	\
116		c -= a; c -= b; c ^= (b >> 5);	\
117		a -= b; a -= c; a ^= (c >> 3);	\
118		b -= c; b -= a; b ^= (a << 10);	\
119		c -= a; c -= b; c ^= (b >> 15);	\
120	} while (0)
121
122/*
123 * hash function based on bridge_hash in if_bridge.c
124 */
125void
126pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
127    struct pf_poolhashkey *key, sa_family_t af)
128{
129	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
130
131	switch (af) {
132#ifdef INET
133	case AF_INET:
134		a += inaddr->addr32[0];
135		b += key->key32[1];
136		mix(a, b, c);
137		hash->addr32[0] = c + key->key32[2];
138		break;
139#endif /* INET */
140#ifdef INET6
141	case AF_INET6:
142		a += inaddr->addr32[0];
143		b += inaddr->addr32[2];
144		mix(a, b, c);
145		hash->addr32[0] = c;
146		a += inaddr->addr32[1];
147		b += inaddr->addr32[3];
148		c += key->key32[1];
149		mix(a, b, c);
150		hash->addr32[1] = c;
151		a += inaddr->addr32[2];
152		b += inaddr->addr32[1];
153		c += key->key32[2];
154		mix(a, b, c);
155		hash->addr32[2] = c;
156		a += inaddr->addr32[3];
157		b += inaddr->addr32[0];
158		c += key->key32[3];
159		mix(a, b, c);
160		hash->addr32[3] = c;
161		break;
162#endif /* INET6 */
163	}
164}
165
166int
167pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
168    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
169    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
170    struct pf_src_node **sn, int rdomain)
171{
172	struct pf_state_key_cmp	key;
173	struct pf_addr		init_addr;
174	u_int16_t		cut;
175
176	bzero(&init_addr, sizeof(init_addr));
177	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn, &r->nat,
178	    PF_SN_NAT))
179		return (1);
180
181	if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
182		if (dport == htons(ICMP6_ECHO_REQUEST) ||
183		    dport == htons(ICMP_ECHO)) {
184			low = 1;
185			high = 65535;
186		} else
187			return (0);	/* Don't try to modify non-echo ICMP */
188	}
189
190	do {
191		key.af = af;
192		key.proto = proto;
193		key.rdomain = rdomain;
194		PF_ACPY(&key.addr[1], daddr, key.af);
195		PF_ACPY(&key.addr[0], naddr, key.af);
196		key.port[1] = dport;
197
198		/*
199		 * port search; start random, step;
200		 * similar 2 portloop in in_pcbbind
201		 */
202		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
203		    proto == IPPROTO_ICMP)) {
204			/* XXX bug icmp states dont use the id on both sides */
205			key.port[0] = dport;
206			if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
207				return (0);
208		} else if (low == 0 && high == 0) {
209			key.port[0] = *nport;
210			if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
211				return (0);
212		} else if (low == high) {
213			key.port[0] = htons(low);
214			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
215				*nport = htons(low);
216				return (0);
217			}
218		} else {
219			u_int16_t tmp;
220
221			if (low > high) {
222				tmp = low;
223				low = high;
224				high = tmp;
225			}
226			/* low < high */
227			cut = arc4random_uniform(1 + high - low) + low;
228			/* low <= cut <= high */
229			for (tmp = cut; tmp <= high; ++(tmp)) {
230				key.port[0] = htons(tmp);
231				if (pf_find_state_all(&key, PF_IN, NULL) ==
232				    NULL && !in_baddynamic(tmp, proto)) {
233					*nport = htons(tmp);
234					return (0);
235				}
236			}
237			for (tmp = cut - 1; tmp >= low; --(tmp)) {
238				key.port[0] = htons(tmp);
239				if (pf_find_state_all(&key, PF_IN, NULL) ==
240				    NULL && !in_baddynamic(tmp, proto)) {
241					*nport = htons(tmp);
242					return (0);
243				}
244			}
245		}
246
247		switch (r->nat.opts & PF_POOL_TYPEMASK) {
248		case PF_POOL_RANDOM:
249		case PF_POOL_ROUNDROBIN:
250			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn,
251			    &r->nat, PF_SN_NAT))
252				return (1);
253			break;
254		case PF_POOL_NONE:
255		case PF_POOL_SRCHASH:
256		case PF_POOL_BITMASK:
257		default:
258			return (1);
259		}
260	} while (! PF_AEQ(&init_addr, naddr, af) );
261	return (1);					/* none available */
262}
263
264int
265pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
266    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
267    struct pf_pool *rpool, enum pf_sn_types type)
268{
269	unsigned char		 hash[16];
270	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
271	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
272	struct pf_pooladdr	*acur = rpool->cur;
273	struct pf_src_node	 k;
274
275	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
276	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
277		k.af = af;
278		k.type = type;
279		PF_ACPY(&k.addr, saddr, af);
280		k.rule.ptr = r;
281		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
282		sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
283		if (sns[type] != NULL) {
284			if (!PF_AZERO(&(sns[type])->raddr, af))
285				PF_ACPY(naddr, &(sns[type])->raddr, af);
286			if (pf_status.debug >= PF_DEBUG_MISC) {
287				printf("pf_map_addr: src tracking (%u) maps ",
288				    type);
289				pf_print_host(&k.addr, 0, af);
290				printf(" to ");
291				pf_print_host(naddr, 0, af);
292				printf("\n");
293			}
294			return (0);
295		}
296	}
297
298	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
299		return (1);
300	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
301		switch (af) {
302#ifdef INET
303		case AF_INET:
304			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
305			    (rpool->opts & PF_POOL_TYPEMASK) !=
306			    PF_POOL_ROUNDROBIN)
307				return (1);
308			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
309			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
310			break;
311#endif /* INET */
312#ifdef INET6
313		case AF_INET6:
314			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
315			    (rpool->opts & PF_POOL_TYPEMASK) !=
316			    PF_POOL_ROUNDROBIN)
317				return (1);
318			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
319			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
320			break;
321#endif /* INET6 */
322		}
323	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
324		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
325			return (1); /* unsupported */
326	} else {
327		raddr = &rpool->cur->addr.v.a.addr;
328		rmask = &rpool->cur->addr.v.a.mask;
329	}
330
331	switch (rpool->opts & PF_POOL_TYPEMASK) {
332	case PF_POOL_NONE:
333		PF_ACPY(naddr, raddr, af);
334		break;
335	case PF_POOL_BITMASK:
336		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
337		break;
338	case PF_POOL_RANDOM:
339		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
340			switch (af) {
341#ifdef INET
342			case AF_INET:
343				rpool->counter.addr32[0] = htonl(arc4random());
344				break;
345#endif /* INET */
346#ifdef INET6
347			case AF_INET6:
348				if (rmask->addr32[3] != 0xffffffff)
349					rpool->counter.addr32[3] =
350					    htonl(arc4random());
351				else
352					break;
353				if (rmask->addr32[2] != 0xffffffff)
354					rpool->counter.addr32[2] =
355					    htonl(arc4random());
356				else
357					break;
358				if (rmask->addr32[1] != 0xffffffff)
359					rpool->counter.addr32[1] =
360					    htonl(arc4random());
361				else
362					break;
363				if (rmask->addr32[0] != 0xffffffff)
364					rpool->counter.addr32[0] =
365					    htonl(arc4random());
366				break;
367#endif /* INET6 */
368			}
369			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
370			PF_ACPY(init_addr, naddr, af);
371
372		} else {
373			PF_AINC(&rpool->counter, af);
374			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
375		}
376		break;
377	case PF_POOL_SRCHASH:
378		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
379		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
380		break;
381	case PF_POOL_ROUNDROBIN:
382		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
383			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
384			    &rpool->tblidx, &rpool->counter,
385			    &raddr, &rmask, af))
386				goto get_addr;
387		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
388			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
389			    &rpool->tblidx, &rpool->counter,
390			    &raddr, &rmask, af))
391				goto get_addr;
392		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
393			goto get_addr;
394
395	try_next:
396		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
397			rpool->cur = TAILQ_FIRST(&rpool->list);
398		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
399			rpool->tblidx = -1;
400			if (pfr_pool_get(rpool->cur->addr.p.tbl,
401			    &rpool->tblidx, &rpool->counter,
402			    &raddr, &rmask, af)) {
403				/* table contains no address of type 'af' */
404				if (rpool->cur != acur)
405					goto try_next;
406				return (1);
407			}
408		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
409			rpool->tblidx = -1;
410			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
411			    &rpool->tblidx, &rpool->counter,
412			    &raddr, &rmask, af)) {
413				/* table contains no address of type 'af' */
414				if (rpool->cur != acur)
415					goto try_next;
416				return (1);
417			}
418		} else {
419			raddr = &rpool->cur->addr.v.a.addr;
420			rmask = &rpool->cur->addr.v.a.mask;
421			PF_ACPY(&rpool->counter, raddr, af);
422		}
423
424	get_addr:
425		PF_ACPY(naddr, &rpool->counter, af);
426		if (init_addr != NULL && PF_AZERO(init_addr, af))
427			PF_ACPY(init_addr, naddr, af);
428		PF_AINC(&rpool->counter, af);
429		break;
430	}
431
432	if (rpool->opts & PF_POOL_STICKYADDR) {
433		if (sns[type] != NULL) {
434			pf_remove_src_node(sns[type]);
435			sns[type] = NULL;
436		}
437		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
438		    0))
439			return (1);
440	}
441
442	if (pf_status.debug >= PF_DEBUG_NOISY &&
443	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
444		printf("pf_map_addr: selected address ");
445		pf_print_host(naddr, 0, af);
446		printf("\n");
447	}
448
449	return (0);
450}
451
452int
453pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd, struct pf_addr *saddr,
454    u_int16_t *sport, struct pf_addr *daddr, u_int16_t *dport,
455    struct pf_src_node **sns)
456{
457	struct pf_addr	naddr;
458	u_int16_t	nport = 0;
459
460	if (!TAILQ_EMPTY(&r->nat.list)) {
461		/* XXX is this right? what if rtable is changed at the same
462		 * XXX time? where do I need to figure out the sport? */
463		if (pf_get_sport(pd->af, pd->proto, r, saddr,
464		    daddr, *dport, &naddr, &nport, r->nat.proxy_port[0],
465		    r->nat.proxy_port[1], sns, pd->rdomain)) {
466			DPFPRINTF(PF_DEBUG_MISC,
467			    ("pf: NAT proxy port allocation "
468			    "(%u-%u) failed\n",
469			    r->nat.proxy_port[0],
470			    r->nat.proxy_port[1]));
471			return (-1);
472		}
473		PF_ACPY(saddr, &naddr, pd->af);
474		if (nport)
475			*sport = nport;
476	}
477	if (!TAILQ_EMPTY(&r->rdr.list)) {
478		if (pf_map_addr(pd->af, r, saddr, &naddr, NULL, sns, &r->rdr,
479		    PF_SN_RDR))
480			return (-1);
481		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
482			PF_POOLMASK(&naddr, &naddr,  &r->rdr.cur->addr.v.a.mask,
483			    daddr, pd->af);
484
485			if (r->rdr.proxy_port[1]) {
486				u_int32_t	tmp_nport;
487
488				tmp_nport = ((ntohs(*dport) -
489				    ntohs(r->dst.port[0])) %
490				    (r->rdr.proxy_port[1] -
491				    r->rdr.proxy_port[0] + 1)) +
492				    r->rdr.proxy_port[0];
493
494				/* wrap around if necessary */
495				if (tmp_nport > 65535)
496					tmp_nport -= 65535;
497				nport = htons((u_int16_t)tmp_nport);
498			} else if (r->rdr.proxy_port[0])
499				nport = htons(r->rdr.proxy_port[0]);
500
501		PF_ACPY(daddr, &naddr, pd->af);
502		if (nport)
503			*dport = nport;
504	}
505
506	return (0);
507}
508