pf_lb.c revision 1.6
1/*	$OpenBSD: pf_lb.c,v 1.6 2009/09/01 13:42:00 henning Exp $ */
2
3/*
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *    - Redistributions of source code must retain the above copyright
13 *      notice, this list of conditions and the following disclaimer.
14 *    - Redistributions in binary form must reproduce the above
15 *      copyright notice, this list of conditions and the following
16 *      disclaimer in the documentation and/or other materials provided
17 *      with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38#include "bpfilter.h"
39#include "pflog.h"
40#include "pfsync.h"
41#include "pflow.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/mbuf.h>
46#include <sys/filio.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/kernel.h>
50#include <sys/time.h>
51#include <sys/pool.h>
52#include <sys/proc.h>
53#include <sys/rwlock.h>
54
55#include <crypto/md5.h>
56
57#include <net/if.h>
58#include <net/if_types.h>
59#include <net/bpf.h>
60#include <net/route.h>
61#include <net/radix_mpath.h>
62
63#include <netinet/in.h>
64#include <netinet/in_var.h>
65#include <netinet/in_systm.h>
66#include <netinet/ip.h>
67#include <netinet/ip_var.h>
68#include <netinet/tcp.h>
69#include <netinet/tcp_seq.h>
70#include <netinet/udp.h>
71#include <netinet/ip_icmp.h>
72#include <netinet/in_pcb.h>
73#include <netinet/tcp_timer.h>
74#include <netinet/tcp_var.h>
75#include <netinet/udp_var.h>
76#include <netinet/icmp_var.h>
77#include <netinet/if_ether.h>
78
79#include <dev/rndvar.h>
80#include <net/pfvar.h>
81#include <net/if_pflog.h>
82#include <net/if_pflow.h>
83
84#if NPFSYNC > 0
85#include <net/if_pfsync.h>
86#endif /* NPFSYNC > 0 */
87
88#ifdef INET6
89#include <netinet/ip6.h>
90#include <netinet/in_pcb.h>
91#include <netinet/icmp6.h>
92#include <netinet6/nd6.h>
93#endif /* INET6 */
94
95
96#define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
97
98/*
99 * Global variables
100 */
101
102void			 pf_hash(struct pf_addr *, struct pf_addr *,
103			    struct pf_poolhashkey *, sa_family_t);
104int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
105			    struct pf_addr *, struct pf_addr *, u_int16_t,
106			    struct pf_addr *, u_int16_t *, u_int16_t, u_int16_t,
107			    struct pf_src_node **);
108
109#define mix(a,b,c) \
110	do {					\
111		a -= b; a -= c; a ^= (c >> 13);	\
112		b -= c; b -= a; b ^= (a << 8);	\
113		c -= a; c -= b; c ^= (b >> 13);	\
114		a -= b; a -= c; a ^= (c >> 12);	\
115		b -= c; b -= a; b ^= (a << 16);	\
116		c -= a; c -= b; c ^= (b >> 5);	\
117		a -= b; a -= c; a ^= (c >> 3);	\
118		b -= c; b -= a; b ^= (a << 10);	\
119		c -= a; c -= b; c ^= (b >> 15);	\
120	} while (0)
121
122/*
123 * hash function based on bridge_hash in if_bridge.c
124 */
125void
126pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
127    struct pf_poolhashkey *key, sa_family_t af)
128{
129	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
130
131	switch (af) {
132#ifdef INET
133	case AF_INET:
134		a += inaddr->addr32[0];
135		b += key->key32[1];
136		mix(a, b, c);
137		hash->addr32[0] = c + key->key32[2];
138		break;
139#endif /* INET */
140#ifdef INET6
141	case AF_INET6:
142		a += inaddr->addr32[0];
143		b += inaddr->addr32[2];
144		mix(a, b, c);
145		hash->addr32[0] = c;
146		a += inaddr->addr32[1];
147		b += inaddr->addr32[3];
148		c += key->key32[1];
149		mix(a, b, c);
150		hash->addr32[1] = c;
151		a += inaddr->addr32[2];
152		b += inaddr->addr32[1];
153		c += key->key32[2];
154		mix(a, b, c);
155		hash->addr32[2] = c;
156		a += inaddr->addr32[3];
157		b += inaddr->addr32[0];
158		c += key->key32[3];
159		mix(a, b, c);
160		hash->addr32[3] = c;
161		break;
162#endif /* INET6 */
163	}
164}
165
166int
167pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
168    struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
169    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
170    struct pf_src_node **sn)
171{
172	struct pf_state_key_cmp	key;
173	struct pf_addr		init_addr;
174	u_int16_t		cut;
175
176	bzero(&init_addr, sizeof(init_addr));
177	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn, &r->nat))
178		return (1);
179
180	if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
181		if (dport == htons(ICMP6_ECHO_REQUEST) ||
182		    dport == htons(ICMP_ECHO)) {
183			low = 1;
184			high = 65535;
185		} else
186			return (0);	/* Don't try to modify non-echo ICMP */
187	}
188
189	do {
190		key.af = af;
191		key.proto = proto;
192		PF_ACPY(&key.addr[1], daddr, key.af);
193		PF_ACPY(&key.addr[0], naddr, key.af);
194		key.port[1] = dport;
195
196		/*
197		 * port search; start random, step;
198		 * similar 2 portloop in in_pcbbind
199		 */
200		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
201		    proto == IPPROTO_ICMP)) {
202			/* XXX bug icmp states dont use the id on both sides */
203			key.port[0] = dport;
204			if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
205				return (0);
206		} else if (low == 0 && high == 0) {
207			key.port[0] = *nport;
208			if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
209				return (0);
210		} else if (low == high) {
211			key.port[0] = htons(low);
212			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
213				*nport = htons(low);
214				return (0);
215			}
216		} else {
217			u_int16_t tmp;
218
219			if (low > high) {
220				tmp = low;
221				low = high;
222				high = tmp;
223			}
224			/* low < high */
225			cut = arc4random_uniform(1 + high - low) + low;
226			/* low <= cut <= high */
227			for (tmp = cut; tmp <= high; ++(tmp)) {
228				key.port[0] = htons(tmp);
229				if (pf_find_state_all(&key, PF_IN, NULL) ==
230				    NULL && !in_baddynamic(tmp, proto)) {
231					*nport = htons(tmp);
232					return (0);
233				}
234			}
235			for (tmp = cut - 1; tmp >= low; --(tmp)) {
236				key.port[0] = htons(tmp);
237				if (pf_find_state_all(&key, PF_IN, NULL) ==
238				    NULL && !in_baddynamic(tmp, proto)) {
239					*nport = htons(tmp);
240					return (0);
241				}
242			}
243		}
244
245		switch (r->nat.opts & PF_POOL_TYPEMASK) {
246		case PF_POOL_RANDOM:
247		case PF_POOL_ROUNDROBIN:
248			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn,
249			    &r->nat))
250				return (1);
251			break;
252		case PF_POOL_NONE:
253		case PF_POOL_SRCHASH:
254		case PF_POOL_BITMASK:
255		default:
256			return (1);
257		}
258	} while (! PF_AEQ(&init_addr, naddr, af) );
259	return (1);					/* none available */
260}
261
262int
263pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
264    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn,
265    struct pf_pool *rpool)
266{
267	unsigned char		 hash[16];
268	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
269	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
270	struct pf_pooladdr	*acur = rpool->cur;
271	struct pf_src_node	 k;
272
273	if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR &&
274	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
275		k.af = af;
276		PF_ACPY(&k.addr, saddr, af);
277		if (r->rule_flag & PFRULE_RULESRCTRACK ||
278		    rpool->opts & PF_POOL_STICKYADDR)
279			k.rule.ptr = r;
280		else
281			k.rule.ptr = NULL;
282		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
283		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
284		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
285			PF_ACPY(naddr, &(*sn)->raddr, af);
286			if (pf_status.debug >= PF_DEBUG_MISC) {
287				printf("pf_map_addr: src tracking maps ");
288				pf_print_host(&k.addr, 0, af);
289				printf(" to ");
290				pf_print_host(naddr, 0, af);
291				printf("\n");
292			}
293			return (0);
294		}
295	}
296
297	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
298		return (1);
299	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
300		switch (af) {
301#ifdef INET
302		case AF_INET:
303			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
304			    (rpool->opts & PF_POOL_TYPEMASK) !=
305			    PF_POOL_ROUNDROBIN)
306				return (1);
307			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
308			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
309			break;
310#endif /* INET */
311#ifdef INET6
312		case AF_INET6:
313			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
314			    (rpool->opts & PF_POOL_TYPEMASK) !=
315			    PF_POOL_ROUNDROBIN)
316				return (1);
317			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
318			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
319			break;
320#endif /* INET6 */
321		}
322	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
323		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
324			return (1); /* unsupported */
325	} else {
326		raddr = &rpool->cur->addr.v.a.addr;
327		rmask = &rpool->cur->addr.v.a.mask;
328	}
329
330	switch (rpool->opts & PF_POOL_TYPEMASK) {
331	case PF_POOL_NONE:
332		PF_ACPY(naddr, raddr, af);
333		break;
334	case PF_POOL_BITMASK:
335		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
336		break;
337	case PF_POOL_RANDOM:
338		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
339			switch (af) {
340#ifdef INET
341			case AF_INET:
342				rpool->counter.addr32[0] = htonl(arc4random());
343				break;
344#endif /* INET */
345#ifdef INET6
346			case AF_INET6:
347				if (rmask->addr32[3] != 0xffffffff)
348					rpool->counter.addr32[3] =
349					    htonl(arc4random());
350				else
351					break;
352				if (rmask->addr32[2] != 0xffffffff)
353					rpool->counter.addr32[2] =
354					    htonl(arc4random());
355				else
356					break;
357				if (rmask->addr32[1] != 0xffffffff)
358					rpool->counter.addr32[1] =
359					    htonl(arc4random());
360				else
361					break;
362				if (rmask->addr32[0] != 0xffffffff)
363					rpool->counter.addr32[0] =
364					    htonl(arc4random());
365				break;
366#endif /* INET6 */
367			}
368			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
369			PF_ACPY(init_addr, naddr, af);
370
371		} else {
372			PF_AINC(&rpool->counter, af);
373			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
374		}
375		break;
376	case PF_POOL_SRCHASH:
377		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
378		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
379		break;
380	case PF_POOL_ROUNDROBIN:
381		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
382			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
383			    &rpool->tblidx, &rpool->counter,
384			    &raddr, &rmask, af))
385				goto get_addr;
386		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
387			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
388			    &rpool->tblidx, &rpool->counter,
389			    &raddr, &rmask, af))
390				goto get_addr;
391		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
392			goto get_addr;
393
394	try_next:
395		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
396			rpool->cur = TAILQ_FIRST(&rpool->list);
397		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
398			rpool->tblidx = -1;
399			if (pfr_pool_get(rpool->cur->addr.p.tbl,
400			    &rpool->tblidx, &rpool->counter,
401			    &raddr, &rmask, af)) {
402				/* table contains no address of type 'af' */
403				if (rpool->cur != acur)
404					goto try_next;
405				return (1);
406			}
407		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
408			rpool->tblidx = -1;
409			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
410			    &rpool->tblidx, &rpool->counter,
411			    &raddr, &rmask, af)) {
412				/* table contains no address of type 'af' */
413				if (rpool->cur != acur)
414					goto try_next;
415				return (1);
416			}
417		} else {
418			raddr = &rpool->cur->addr.v.a.addr;
419			rmask = &rpool->cur->addr.v.a.mask;
420			PF_ACPY(&rpool->counter, raddr, af);
421		}
422
423	get_addr:
424		PF_ACPY(naddr, &rpool->counter, af);
425		if (init_addr != NULL && PF_AZERO(init_addr, af))
426			PF_ACPY(init_addr, naddr, af);
427		PF_AINC(&rpool->counter, af);
428		break;
429	}
430	if (*sn != NULL)
431		PF_ACPY(&(*sn)->raddr, naddr, af);
432
433	if (pf_status.debug >= PF_DEBUG_NOISY &&
434	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
435		printf("pf_map_addr: selected address ");
436		pf_print_host(naddr, 0, af);
437		printf("\n");
438	}
439
440	return (0);
441}
442
443int
444pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd, struct pf_addr *saddr,
445    u_int16_t *sport, struct pf_addr *daddr, u_int16_t *dport)
446{
447	struct pf_addr	naddr;
448	u_int16_t	nport = 0;
449
450	struct pf_src_node srcnode, *sn = &srcnode;
451
452	if (!TAILQ_EMPTY(&r->nat.list)) {
453		if (pf_get_sport(pd->af, pd->proto, r, saddr,
454		    daddr, *dport, &naddr, &nport, r->nat.proxy_port[0],
455		    r->nat.proxy_port[1], &sn)) {
456			DPFPRINTF(PF_DEBUG_MISC,
457			    ("pf: NAT proxy port allocation "
458			    "(%u-%u) failed\n",
459			    r->nat.proxy_port[0],
460			    r->nat.proxy_port[1]));
461			return (-1);
462		}
463		PF_ACPY(saddr, &naddr, pd->af);
464		*sport = nport;
465	}
466	if (!TAILQ_EMPTY(&r->rdr.list)) {
467		if (pf_map_addr(pd->af, r, saddr, &naddr, NULL, &sn, &r->rdr))
468			return (-1);
469		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
470			PF_POOLMASK(&naddr, &naddr,  &r->rdr.cur->addr.v.a.mask,
471			    daddr, pd->af);
472
473			if (r->rdr.proxy_port[1]) {
474				u_int32_t	tmp_nport;
475
476				tmp_nport = ((ntohs(*dport) -
477				    ntohs(r->dst.port[0])) %
478				    (r->rdr.proxy_port[1] -
479				    r->rdr.proxy_port[0] + 1)) +
480				    r->rdr.proxy_port[0];
481
482				/* wrap around if necessary */
483				if (tmp_nport > 65535)
484					tmp_nport -= 65535;
485				nport = htons((u_int16_t)tmp_nport);
486			} else if (r->rdr.proxy_port[0])
487				nport = htons(r->rdr.proxy_port[0]);
488
489		PF_ACPY(daddr, &naddr, pd->af);
490		if (nport)
491			*dport = nport;
492	}
493
494	return (0);
495}
496
497