pf_lb.c revision 1.14
1/*	$OpenBSD: pf_lb.c,v 1.14 2011/05/17 12:44:05 mikeb Exp $ */
2
3/*
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *    - Redistributions of source code must retain the above copyright
13 *      notice, this list of conditions and the following disclaimer.
14 *    - Redistributions in binary form must reproduce the above
15 *      copyright notice, this list of conditions and the following
16 *      disclaimer in the documentation and/or other materials provided
17 *      with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38#include "bpfilter.h"
39#include "pflog.h"
40#include "pfsync.h"
41#include "pflow.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/mbuf.h>
46#include <sys/filio.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/kernel.h>
50#include <sys/time.h>
51#include <sys/pool.h>
52#include <sys/proc.h>
53#include <sys/rwlock.h>
54#include <sys/syslog.h>
55
56#include <crypto/md5.h>
57
58#include <net/if.h>
59#include <net/if_types.h>
60#include <net/bpf.h>
61#include <net/route.h>
62#include <net/radix_mpath.h>
63
64#include <netinet/in.h>
65#include <netinet/in_var.h>
66#include <netinet/in_systm.h>
67#include <netinet/ip.h>
68#include <netinet/ip_var.h>
69#include <netinet/tcp.h>
70#include <netinet/tcp_seq.h>
71#include <netinet/udp.h>
72#include <netinet/ip_icmp.h>
73#include <netinet/in_pcb.h>
74#include <netinet/tcp_timer.h>
75#include <netinet/tcp_var.h>
76#include <netinet/udp_var.h>
77#include <netinet/icmp_var.h>
78#include <netinet/if_ether.h>
79
80#include <dev/rndvar.h>
81#include <net/pfvar.h>
82#include <net/if_pflog.h>
83#include <net/if_pflow.h>
84
85#if NPFSYNC > 0
86#include <net/if_pfsync.h>
87#endif /* NPFSYNC > 0 */
88
89#ifdef INET6
90#include <netinet/ip6.h>
91#include <netinet/in_pcb.h>
92#include <netinet/icmp6.h>
93#include <netinet6/nd6.h>
94#endif /* INET6 */
95
96
97/*
98 * Global variables
99 */
100
101void			 pf_hash(struct pf_addr *, struct pf_addr *,
102			    struct pf_poolhashkey *, sa_family_t);
103int			 pf_get_sport(struct pf_pdesc *, struct pf_rule *,
104			    struct pf_addr *, u_int16_t *, u_int16_t,
105			    u_int16_t, struct pf_src_node **);
106int			 pf_islinklocal(sa_family_t, struct pf_addr *);
107
108#define mix(a,b,c) \
109	do {					\
110		a -= b; a -= c; a ^= (c >> 13);	\
111		b -= c; b -= a; b ^= (a << 8);	\
112		c -= a; c -= b; c ^= (b >> 13);	\
113		a -= b; a -= c; a ^= (c >> 12);	\
114		b -= c; b -= a; b ^= (a << 16);	\
115		c -= a; c -= b; c ^= (b >> 5);	\
116		a -= b; a -= c; a ^= (c >> 3);	\
117		b -= c; b -= a; b ^= (a << 10);	\
118		c -= a; c -= b; c ^= (b >> 15);	\
119	} while (0)
120
121/*
122 * hash function based on bridge_hash in if_bridge.c
123 */
124void
125pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
126    struct pf_poolhashkey *key, sa_family_t af)
127{
128	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
129
130	switch (af) {
131#ifdef INET
132	case AF_INET:
133		a += inaddr->addr32[0];
134		b += key->key32[1];
135		mix(a, b, c);
136		hash->addr32[0] = c + key->key32[2];
137		break;
138#endif /* INET */
139#ifdef INET6
140	case AF_INET6:
141		a += inaddr->addr32[0];
142		b += inaddr->addr32[2];
143		mix(a, b, c);
144		hash->addr32[0] = c;
145		a += inaddr->addr32[1];
146		b += inaddr->addr32[3];
147		c += key->key32[1];
148		mix(a, b, c);
149		hash->addr32[1] = c;
150		a += inaddr->addr32[2];
151		b += inaddr->addr32[1];
152		c += key->key32[2];
153		mix(a, b, c);
154		hash->addr32[2] = c;
155		a += inaddr->addr32[3];
156		b += inaddr->addr32[0];
157		c += key->key32[3];
158		mix(a, b, c);
159		hash->addr32[3] = c;
160		break;
161#endif /* INET6 */
162	}
163}
164
165int
166pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
167    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
168    struct pf_src_node **sn)
169{
170	struct pf_state_key_cmp	key;
171	struct pf_addr		init_addr;
172	u_int16_t		cut;
173
174	bzero(&init_addr, sizeof(init_addr));
175	if (pf_map_addr(pd->af, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
176	    PF_SN_NAT))
177		return (1);
178
179	if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) {
180		if (pd->ndport == htons(ICMP6_ECHO_REQUEST) ||
181		    pd->ndport == htons(ICMP_ECHO)) {
182			low = 1;
183			high = 65535;
184		} else
185			return (0);	/* Don't try to modify non-echo ICMP */
186	}
187
188	do {
189		key.af = pd->af;
190		key.proto = pd->proto;
191		key.rdomain = pd->rdomain;
192		PF_ACPY(&key.addr[0], &pd->ndaddr, key.af);
193		PF_ACPY(&key.addr[1], naddr, key.af);
194		key.port[0] = pd->ndport;
195
196		/*
197		 * port search; start random, step;
198		 * similar 2 portloop in in_pcbbind
199		 */
200		if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
201		    pd->proto == IPPROTO_ICMP)) {
202			/* XXX bug: icmp states dont use the id on both
203			 * XXX sides (traceroute -I through nat) */
204			key.port[1] = pd->nsport;
205			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
206				*nport = pd->nsport;
207				return (0);
208			}
209		} else if (low == 0 && high == 0) {
210			key.port[1] = pd->nsport;
211			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
212				*nport = pd->nsport;
213				return (0);
214			}
215		} else if (low == high) {
216			key.port[1] = htons(low);
217			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
218				*nport = htons(low);
219				return (0);
220			}
221		} else {
222			u_int16_t tmp;
223
224			if (low > high) {
225				tmp = low;
226				low = high;
227				high = tmp;
228			}
229			/* low < high */
230			cut = arc4random_uniform(1 + high - low) + low;
231			/* low <= cut <= high */
232			for (tmp = cut; tmp <= high; ++(tmp)) {
233				key.port[1] = htons(tmp);
234				if (pf_find_state_all(&key, PF_IN, NULL) ==
235				    NULL && !in_baddynamic(tmp, pd->proto)) {
236					*nport = htons(tmp);
237					return (0);
238				}
239			}
240			for (tmp = cut - 1; tmp >= low; --(tmp)) {
241				key.port[1] = htons(tmp);
242				if (pf_find_state_all(&key, PF_IN, NULL) ==
243				    NULL && !in_baddynamic(tmp, pd->proto)) {
244					*nport = htons(tmp);
245					return (0);
246				}
247			}
248		}
249
250		switch (r->nat.opts & PF_POOL_TYPEMASK) {
251		case PF_POOL_RANDOM:
252		case PF_POOL_ROUNDROBIN:
253			if (pf_map_addr(pd->af, r, &pd->nsaddr, naddr,
254			    &init_addr, sn, &r->nat, PF_SN_NAT))
255				return (1);
256			break;
257		case PF_POOL_NONE:
258		case PF_POOL_SRCHASH:
259		case PF_POOL_BITMASK:
260		default:
261			return (1);
262		}
263	} while (! PF_AEQ(&init_addr, naddr, pd->af) );
264	return (1);					/* none available */
265}
266
267int
268pf_islinklocal(sa_family_t af, struct pf_addr *addr)
269{
270	if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
271		return (1);
272	return (0);
273}
274
275int
276pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
277    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
278    struct pf_pool *rpool, enum pf_sn_types type)
279{
280	unsigned char		 hash[16];
281	struct pf_addr		*raddr = &rpool->addr.v.a.addr;
282	struct pf_addr		*rmask = &rpool->addr.v.a.mask;
283	struct pf_src_node	 k;
284
285	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
286	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
287		k.af = af;
288		k.type = type;
289		PF_ACPY(&k.addr, saddr, af);
290		k.rule.ptr = r;
291		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
292		sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
293		if (sns[type] != NULL) {
294			if (!PF_AZERO(&(sns[type])->raddr, af))
295				PF_ACPY(naddr, &(sns[type])->raddr, af);
296			if (pf_status.debug >= LOG_DEBUG) {
297				log(LOG_DEBUG, "pf: pf_map_addr: "
298				    "src tracking (%u) maps ", type);
299				pf_print_host(&k.addr, 0, af);
300				addlog(" to ");
301				pf_print_host(naddr, 0, af);
302				addlog("\n");
303			}
304			return (0);
305		}
306	}
307
308	if (rpool->addr.type == PF_ADDR_NOROUTE)
309		return (1);
310	if (rpool->addr.type == PF_ADDR_DYNIFTL) {
311		switch (af) {
312#ifdef INET
313		case AF_INET:
314			if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
315			    (rpool->opts & PF_POOL_TYPEMASK) !=
316			    PF_POOL_ROUNDROBIN)
317				return (1);
318			 raddr = &rpool->addr.p.dyn->pfid_addr4;
319			 rmask = &rpool->addr.p.dyn->pfid_mask4;
320			break;
321#endif /* INET */
322#ifdef INET6
323		case AF_INET6:
324			if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
325			    (rpool->opts & PF_POOL_TYPEMASK) !=
326			    PF_POOL_ROUNDROBIN)
327				return (1);
328			raddr = &rpool->addr.p.dyn->pfid_addr6;
329			rmask = &rpool->addr.p.dyn->pfid_mask6;
330			break;
331#endif /* INET6 */
332		}
333	} else if (rpool->addr.type == PF_ADDR_TABLE) {
334		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
335			return (1); /* unsupported */
336	} else {
337		raddr = &rpool->addr.v.a.addr;
338		rmask = &rpool->addr.v.a.mask;
339	}
340
341	switch (rpool->opts & PF_POOL_TYPEMASK) {
342	case PF_POOL_NONE:
343		PF_ACPY(naddr, raddr, af);
344		break;
345	case PF_POOL_BITMASK:
346		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
347		break;
348	case PF_POOL_RANDOM:
349		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
350			switch (af) {
351#ifdef INET
352			case AF_INET:
353				rpool->counter.addr32[0] = htonl(arc4random());
354				break;
355#endif /* INET */
356#ifdef INET6
357			case AF_INET6:
358				if (rmask->addr32[3] != 0xffffffff)
359					rpool->counter.addr32[3] =
360					    htonl(arc4random());
361				else
362					break;
363				if (rmask->addr32[2] != 0xffffffff)
364					rpool->counter.addr32[2] =
365					    htonl(arc4random());
366				else
367					break;
368				if (rmask->addr32[1] != 0xffffffff)
369					rpool->counter.addr32[1] =
370					    htonl(arc4random());
371				else
372					break;
373				if (rmask->addr32[0] != 0xffffffff)
374					rpool->counter.addr32[0] =
375					    htonl(arc4random());
376				break;
377#endif /* INET6 */
378			}
379			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
380			PF_ACPY(init_addr, naddr, af);
381
382		} else {
383			PF_AINC(&rpool->counter, af);
384			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
385		}
386		break;
387	case PF_POOL_SRCHASH:
388		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
389		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
390		break;
391	case PF_POOL_ROUNDROBIN:
392		if (rpool->addr.type == PF_ADDR_TABLE) {
393			if (pfr_pool_get(rpool->addr.p.tbl,
394			    &rpool->tblidx, &rpool->counter,
395			    &raddr, &rmask, &rpool->kif, af, NULL))
396				return (1);
397		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
398			if (pfr_pool_get(rpool->addr.p.dyn->pfid_kt,
399			    &rpool->tblidx, &rpool->counter,
400			    &raddr, &rmask, &rpool->kif, af, pf_islinklocal))
401				return (1);
402		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
403			return (1);
404
405		PF_ACPY(naddr, &rpool->counter, af);
406		if (init_addr != NULL && PF_AZERO(init_addr, af))
407			PF_ACPY(init_addr, naddr, af);
408		PF_AINC(&rpool->counter, af);
409		break;
410	}
411
412	if (rpool->opts & PF_POOL_STICKYADDR) {
413		if (sns[type] != NULL) {
414			pf_remove_src_node(sns[type]);
415			sns[type] = NULL;
416		}
417		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
418		    0))
419			return (1);
420	}
421
422	if (pf_status.debug >= LOG_NOTICE &&
423	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
424		log(LOG_NOTICE, "pf: pf_map_addr: selected address ");
425		pf_print_host(naddr, 0, af);
426		addlog("\n");
427	}
428
429	return (0);
430}
431
432int
433pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
434    struct pf_src_node **sns)
435{
436	struct pf_addr	naddr;
437	u_int16_t	nport = 0;
438
439	if (r->nat.addr.type != PF_ADDR_NONE) {
440		/* XXX is this right? what if rtable is changed at the same
441		 * XXX time? where do I need to figure out the sport? */
442		if (pf_get_sport(pd, r, &naddr, &nport,
443		    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
444			DPFPRINTF(LOG_NOTICE,
445			    "pf: NAT proxy port allocation (%u-%u) failed",
446			    r->nat.proxy_port[0],
447			    r->nat.proxy_port[1]);
448			return (-1);
449		}
450		PF_ACPY(&pd->nsaddr, &naddr, pd->af);
451		pd->nsport = nport;
452	}
453	if (r->rdr.addr.type != PF_ADDR_NONE) {
454		if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
455		    &r->rdr, PF_SN_RDR))
456			return (-1);
457		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
458			PF_POOLMASK(&naddr, &naddr,  &r->rdr.addr.v.a.mask,
459			    &pd->ndaddr, pd->af);
460
461			if (r->rdr.proxy_port[1]) {
462				u_int32_t	tmp_nport;
463
464				tmp_nport = ((ntohs(pd->ndport) -
465				    ntohs(r->dst.port[0])) %
466				    (r->rdr.proxy_port[1] -
467				    r->rdr.proxy_port[0] + 1)) +
468				    r->rdr.proxy_port[0];
469
470				/* wrap around if necessary */
471				if (tmp_nport > 65535)
472					tmp_nport -= 65535;
473				nport = htons((u_int16_t)tmp_nport);
474			} else if (r->rdr.proxy_port[0])
475				nport = htons(r->rdr.proxy_port[0]);
476
477		PF_ACPY(&pd->ndaddr, &naddr, pd->af);
478		if (nport)
479			pd->ndport = nport;
480	}
481
482	return (0);
483}
484