pf_lb.c revision 1.24
1/*	$OpenBSD: pf_lb.c,v 1.24 2012/12/29 14:59:52 markus Exp $ */
2
3/*
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *    - Redistributions of source code must retain the above copyright
13 *      notice, this list of conditions and the following disclaimer.
14 *    - Redistributions in binary form must reproduce the above
15 *      copyright notice, this list of conditions and the following
16 *      disclaimer in the documentation and/or other materials provided
17 *      with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38#include "bpfilter.h"
39#include "pflog.h"
40#include "pfsync.h"
41#include "pflow.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/mbuf.h>
46#include <sys/filio.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/kernel.h>
50#include <sys/time.h>
51#include <sys/pool.h>
52#include <sys/proc.h>
53#include <sys/rwlock.h>
54#include <sys/syslog.h>
55#include <sys/stdint.h>
56
57#include <crypto/md5.h>
58
59#include <net/if.h>
60#include <net/if_types.h>
61#include <net/bpf.h>
62#include <net/route.h>
63#include <net/radix_mpath.h>
64
65#include <netinet/in.h>
66#include <netinet/in_var.h>
67#include <netinet/in_systm.h>
68#include <netinet/ip.h>
69#include <netinet/ip_var.h>
70#include <netinet/tcp.h>
71#include <netinet/tcp_seq.h>
72#include <netinet/udp.h>
73#include <netinet/ip_icmp.h>
74#include <netinet/in_pcb.h>
75#include <netinet/tcp_timer.h>
76#include <netinet/tcp_var.h>
77#include <netinet/udp_var.h>
78#include <netinet/icmp_var.h>
79#include <netinet/if_ether.h>
80
81#include <dev/rndvar.h>
82#include <net/pfvar.h>
83#include <net/if_pflog.h>
84#include <net/if_pflow.h>
85
86#if NPFSYNC > 0
87#include <net/if_pfsync.h>
88#endif /* NPFSYNC > 0 */
89
90#ifdef INET6
91#include <netinet/ip6.h>
92#include <netinet/in_pcb.h>
93#include <netinet/icmp6.h>
94#include <netinet6/nd6.h>
95#endif /* INET6 */
96
97
98/*
99 * Global variables
100 */
101
102void			 pf_hash(struct pf_addr *, struct pf_addr *,
103			    struct pf_poolhashkey *, sa_family_t);
104int			 pf_get_sport(struct pf_pdesc *, struct pf_rule *,
105			    struct pf_addr *, u_int16_t *, u_int16_t,
106			    u_int16_t, struct pf_src_node **);
107int			 pf_get_transaddr_af(struct pf_rule *,
108			    struct pf_pdesc *, struct pf_src_node **);
109int			 pf_map_addr_sticky(sa_family_t, struct pf_rule *,
110			    struct pf_addr *, struct pf_addr *,
111			    struct pf_src_node **, struct pf_pool *,
112			    enum pf_sn_types);
113
114#define mix(a,b,c) \
115	do {					\
116		a -= b; a -= c; a ^= (c >> 13);	\
117		b -= c; b -= a; b ^= (a << 8);	\
118		c -= a; c -= b; c ^= (b >> 13);	\
119		a -= b; a -= c; a ^= (c >> 12);	\
120		b -= c; b -= a; b ^= (a << 16);	\
121		c -= a; c -= b; c ^= (b >> 5);	\
122		a -= b; a -= c; a ^= (c >> 3);	\
123		b -= c; b -= a; b ^= (a << 10);	\
124		c -= a; c -= b; c ^= (b >> 15);	\
125	} while (0)
126
127/*
128 * hash function based on bridge_hash in if_bridge.c
129 */
130void
131pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
132    struct pf_poolhashkey *key, sa_family_t af)
133{
134	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
135
136	switch (af) {
137#ifdef INET
138	case AF_INET:
139		a += inaddr->addr32[0];
140		b += key->key32[1];
141		mix(a, b, c);
142		hash->addr32[0] = c + key->key32[2];
143		break;
144#endif /* INET */
145#ifdef INET6
146	case AF_INET6:
147		a += inaddr->addr32[0];
148		b += inaddr->addr32[2];
149		mix(a, b, c);
150		hash->addr32[0] = c;
151		a += inaddr->addr32[1];
152		b += inaddr->addr32[3];
153		c += key->key32[1];
154		mix(a, b, c);
155		hash->addr32[1] = c;
156		a += inaddr->addr32[2];
157		b += inaddr->addr32[1];
158		c += key->key32[2];
159		mix(a, b, c);
160		hash->addr32[2] = c;
161		a += inaddr->addr32[3];
162		b += inaddr->addr32[0];
163		c += key->key32[3];
164		mix(a, b, c);
165		hash->addr32[3] = c;
166		break;
167#endif /* INET6 */
168	}
169}
170
171int
172pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
173    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
174    struct pf_src_node **sn)
175{
176	struct pf_state_key_cmp	key;
177	struct pf_addr		init_addr;
178	u_int16_t		cut;
179
180	bzero(&init_addr, sizeof(init_addr));
181	if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
182	    PF_SN_NAT))
183		return (1);
184
185	if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) {
186		if (pd->ndport == htons(ICMP6_ECHO_REQUEST) ||
187		    pd->ndport == htons(ICMP_ECHO)) {
188			low = 1;
189			high = 65535;
190		} else
191			return (0);	/* Don't try to modify non-echo ICMP */
192	}
193
194	do {
195		key.af = pd->naf;
196		key.proto = pd->proto;
197		key.rdomain = pd->rdomain;
198		PF_ACPY(&key.addr[0], &pd->ndaddr, key.af);
199		PF_ACPY(&key.addr[1], naddr, key.af);
200		key.port[0] = pd->ndport;
201
202		/*
203		 * port search; start random, step;
204		 * similar 2 portloop in in_pcbbind
205		 */
206		if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
207		    pd->proto == IPPROTO_ICMP)) {
208			/* XXX bug: icmp states dont use the id on both
209			 * XXX sides (traceroute -I through nat) */
210			key.port[1] = pd->nsport;
211			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
212				*nport = pd->nsport;
213				return (0);
214			}
215		} else if (low == 0 && high == 0) {
216			key.port[1] = pd->nsport;
217			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
218				*nport = pd->nsport;
219				return (0);
220			}
221		} else if (low == high) {
222			key.port[1] = htons(low);
223			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
224				*nport = htons(low);
225				return (0);
226			}
227		} else {
228			u_int16_t tmp;
229
230			if (low > high) {
231				tmp = low;
232				low = high;
233				high = tmp;
234			}
235			/* low < high */
236			cut = arc4random_uniform(1 + high - low) + low;
237			/* low <= cut <= high */
238			for (tmp = cut; tmp <= high; ++(tmp)) {
239				key.port[1] = htons(tmp);
240				if (pf_find_state_all(&key, PF_IN, NULL) ==
241				    NULL && !in_baddynamic(tmp, pd->proto)) {
242					*nport = htons(tmp);
243					return (0);
244				}
245			}
246			for (tmp = cut - 1; tmp >= low; --(tmp)) {
247				key.port[1] = htons(tmp);
248				if (pf_find_state_all(&key, PF_IN, NULL) ==
249				    NULL && !in_baddynamic(tmp, pd->proto)) {
250					*nport = htons(tmp);
251					return (0);
252				}
253			}
254		}
255
256		switch (r->nat.opts & PF_POOL_TYPEMASK) {
257		case PF_POOL_RANDOM:
258		case PF_POOL_ROUNDROBIN:
259		case PF_POOL_LEASTSTATES:
260			if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
261			    &init_addr, sn, &r->nat, PF_SN_NAT))
262				return (1);
263			break;
264		case PF_POOL_NONE:
265		case PF_POOL_SRCHASH:
266		case PF_POOL_BITMASK:
267		default:
268			return (1);
269		}
270	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
271	return (1);					/* none available */
272}
273
274int
275pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
276    struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool,
277    enum pf_sn_types type)
278{
279	struct pf_addr		*raddr, *rmask, *cached;
280	struct pf_state		*s;
281	struct pf_src_node	 k;
282	int			 valid;
283
284	k.af = af;
285	k.type = type;
286	PF_ACPY(&k.addr, saddr, af);
287	k.rule.ptr = r;
288	pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
289	sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
290	if (sns[type] == NULL)
291		return (-1);
292
293	/* check if the cached entry is still valid */
294	cached = &(sns[type])->raddr;
295	valid = 0;
296	if (PF_AZERO(cached, af)) {
297		valid = 1;
298	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
299		if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached,
300		    af, 0))
301			valid = 1;
302	} else if (rpool->addr.type == PF_ADDR_TABLE) {
303		if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0))
304			valid = 1;
305	} else if (rpool->addr.type != PF_ADDR_NOROUTE) {
306		raddr = &rpool->addr.v.a.addr;
307		rmask = &rpool->addr.v.a.mask;
308		valid = pf_match_addr(0, raddr, rmask, cached, af);
309	}
310	if (!valid) {
311		if (pf_status.debug >= LOG_DEBUG) {
312			log(LOG_DEBUG, "pf: pf_map_addr: "
313			    "stale src tracking (%u) ", type);
314			pf_print_host(&k.addr, 0, af);
315			addlog(" to ");
316			pf_print_host(cached, 0, af);
317			addlog("\n");
318		}
319		if (sns[type]->states != 0) {
320			/* XXX expensive */
321			RB_FOREACH(s, pf_state_tree_id,
322			   &tree_id)
323				pf_state_rm_src_node(s,
324				    sns[type]);
325		}
326		sns[type]->expire = 1;
327		pf_remove_src_node(sns[type]);
328		sns[type] = NULL;
329		return (-1);
330	}
331	if (!PF_AZERO(cached, af))
332		PF_ACPY(naddr, cached, af);
333	if (pf_status.debug >= LOG_DEBUG) {
334		log(LOG_DEBUG, "pf: pf_map_addr: "
335		    "src tracking (%u) maps ", type);
336		pf_print_host(&k.addr, 0, af);
337		addlog(" to ");
338		pf_print_host(naddr, 0, af);
339		addlog("\n");
340	}
341	return (0);
342}
343
344int
345pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
346    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
347    struct pf_pool *rpool, enum pf_sn_types type)
348{
349	unsigned char		 hash[16];
350	struct pf_addr		 faddr;
351	struct pf_addr		*raddr = &rpool->addr.v.a.addr;
352	struct pf_addr		*rmask = &rpool->addr.v.a.mask;
353	u_int64_t		 states;
354	u_int16_t		 weight;
355	u_int64_t		 load;
356	u_int64_t		 cload;
357
358	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
359	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
360	    pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0)
361		return (0);
362
363	if (rpool->addr.type == PF_ADDR_NOROUTE)
364		return (1);
365	if (rpool->addr.type == PF_ADDR_DYNIFTL) {
366		switch (af) {
367#ifdef INET
368		case AF_INET:
369			if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
370			    ((rpool->opts & PF_POOL_TYPEMASK) !=
371			    PF_POOL_ROUNDROBIN) &&
372			    ((rpool->opts & PF_POOL_TYPEMASK) !=
373			    PF_POOL_LEASTSTATES))
374				return (1);
375			raddr = &rpool->addr.p.dyn->pfid_addr4;
376			rmask = &rpool->addr.p.dyn->pfid_mask4;
377			break;
378#endif /* INET */
379#ifdef INET6
380		case AF_INET6:
381			if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
382			    ((rpool->opts & PF_POOL_TYPEMASK) !=
383			    PF_POOL_ROUNDROBIN) &&
384			    ((rpool->opts & PF_POOL_TYPEMASK) !=
385			    PF_POOL_LEASTSTATES))
386				return (1);
387			raddr = &rpool->addr.p.dyn->pfid_addr6;
388			rmask = &rpool->addr.p.dyn->pfid_mask6;
389			break;
390#endif /* INET6 */
391		}
392	} else if (rpool->addr.type == PF_ADDR_TABLE) {
393		if (((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) &&
394		    ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
395			return (1); /* unsupported */
396	} else {
397		raddr = &rpool->addr.v.a.addr;
398		rmask = &rpool->addr.v.a.mask;
399	}
400
401	switch (rpool->opts & PF_POOL_TYPEMASK) {
402	case PF_POOL_NONE:
403		PF_ACPY(naddr, raddr, af);
404		break;
405	case PF_POOL_BITMASK:
406		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
407		break;
408	case PF_POOL_RANDOM:
409		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
410			switch (af) {
411#ifdef INET
412			case AF_INET:
413				rpool->counter.addr32[0] = htonl(arc4random());
414				break;
415#endif /* INET */
416#ifdef INET6
417			case AF_INET6:
418				if (rmask->addr32[3] != 0xffffffff)
419					rpool->counter.addr32[3] =
420					    htonl(arc4random());
421				else
422					break;
423				if (rmask->addr32[2] != 0xffffffff)
424					rpool->counter.addr32[2] =
425					    htonl(arc4random());
426				else
427					break;
428				if (rmask->addr32[1] != 0xffffffff)
429					rpool->counter.addr32[1] =
430					    htonl(arc4random());
431				else
432					break;
433				if (rmask->addr32[0] != 0xffffffff)
434					rpool->counter.addr32[0] =
435					    htonl(arc4random());
436				break;
437#endif /* INET6 */
438			}
439			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
440			PF_ACPY(init_addr, naddr, af);
441
442		} else {
443			PF_AINC(&rpool->counter, af);
444			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
445		}
446		break;
447	case PF_POOL_SRCHASH:
448		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
449		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
450		break;
451	case PF_POOL_ROUNDROBIN:
452		if (rpool->addr.type == PF_ADDR_TABLE ||
453		    rpool->addr.type == PF_ADDR_DYNIFTL) {
454			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
455				/*
456				 * reset counter in case its value
457				 * has been removed from the pool.
458				 */
459				bzero(&rpool->counter, sizeof(rpool->counter));
460				if (pfr_pool_get(rpool, &raddr, &rmask, af))
461					return (1);
462			}
463		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
464			return (1);
465
466		/* iterate over table if it contains entries which are weighted */
467		if ((rpool->addr.type == PF_ADDR_TABLE &&
468		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
469		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
470		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
471			do {
472				if (rpool->addr.type == PF_ADDR_TABLE ||
473				    rpool->addr.type == PF_ADDR_DYNIFTL) {
474					if (pfr_pool_get(rpool,
475					    &raddr, &rmask, af))
476						return (1);
477				} else {
478					log(LOG_ERR, "pf: pf_map_addr: "
479					    "weighted RR failure");
480					return (1);
481				}
482				if (rpool->weight >= rpool->curweight)
483					break;
484				PF_AINC(&rpool->counter, af);
485			} while (1);
486
487			weight = rpool->weight;
488		}
489
490		PF_ACPY(naddr, &rpool->counter, af);
491		if (init_addr != NULL && PF_AZERO(init_addr, af))
492			PF_ACPY(init_addr, naddr, af);
493		PF_AINC(&rpool->counter, af);
494		break;
495	case PF_POOL_LEASTSTATES:
496		/* retrieve an address first */
497		if (rpool->addr.type == PF_ADDR_TABLE ||
498		    rpool->addr.type == PF_ADDR_DYNIFTL) {
499			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
500				/* see PF_POOL_ROUNDROBIN */
501				bzero(&rpool->counter, sizeof(rpool->counter));
502				if (pfr_pool_get(rpool, &raddr, &rmask, af))
503					return (1);
504			}
505		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
506			return (1);
507
508		states = rpool->states;
509		weight = rpool->weight;
510
511		if ((rpool->addr.type == PF_ADDR_TABLE &&
512		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
513		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
514		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
515			load = ((UINT16_MAX * rpool->states) / rpool->weight);
516		else
517			load = states;
518
519		PF_ACPY(&faddr, &rpool->counter, af);
520
521		PF_ACPY(naddr, &rpool->counter, af);
522		if (init_addr != NULL && PF_AZERO(init_addr, af))
523			PF_ACPY(init_addr, naddr, af);
524
525		/*
526		 * iterate *once* over whole table and find destination with
527		 * least connection
528		 */
529		do  {
530			PF_AINC(&rpool->counter, af);
531			if (rpool->addr.type == PF_ADDR_TABLE ||
532			    rpool->addr.type == PF_ADDR_DYNIFTL) {
533				if (pfr_pool_get(rpool, &raddr, &rmask, af))
534					return (1);
535			} else if (pf_match_addr(0, raddr, rmask,
536			    &rpool->counter, af))
537				return (1);
538
539			if ((rpool->addr.type == PF_ADDR_TABLE &&
540			    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
541			    (rpool->addr.type == PF_ADDR_DYNIFTL &&
542			    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
543				cload = ((UINT16_MAX * rpool->states)
544					/ rpool->weight);
545			else
546				cload = rpool->states;
547
548			/* find lc minimum */
549			if (cload < load) {
550				states = rpool->states;
551				weight = rpool->weight;
552				load = cload;
553
554				PF_ACPY(naddr, &rpool->counter, af);
555				if (init_addr != NULL &&
556				    PF_AZERO(init_addr, af))
557				    PF_ACPY(init_addr, naddr, af);
558			}
559		} while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
560		    (states > 0));
561
562		if (rpool->addr.type == PF_ADDR_TABLE) {
563			if (pfr_states_increase(rpool->addr.p.tbl,
564			    naddr, af) == -1) {
565				if (pf_status.debug >= LOG_DEBUG) {
566					log(LOG_DEBUG,"pf: pf_map_addr: "
567					    "selected address ");
568					pf_print_host(naddr, 0, af);
569					addlog(". Failed to increase count!\n");
570				}
571				return (1);
572			}
573		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
574			if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
575			    naddr, af) == -1) {
576				if (pf_status.debug >= LOG_DEBUG) {
577					log(LOG_DEBUG, "pf: pf_map_addr: "
578					    "selected address ");
579					pf_print_host(naddr, 0, af);
580					addlog(". Failed to increase count!\n");
581				}
582				return (1);
583			}
584		}
585		break;
586	}
587
588	if (rpool->opts & PF_POOL_STICKYADDR) {
589		if (sns[type] != NULL) {
590			pf_remove_src_node(sns[type]);
591			sns[type] = NULL;
592		}
593		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
594		    0))
595			return (1);
596	}
597
598	if (pf_status.debug >= LOG_NOTICE &&
599	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
600		log(LOG_NOTICE, "pf: pf_map_addr: selected address ");
601		pf_print_host(naddr, 0, af);
602		if ((rpool->opts & PF_POOL_TYPEMASK) ==
603		    PF_POOL_LEASTSTATES)
604			addlog(" with state count %llu", states);
605		if ((rpool->addr.type == PF_ADDR_TABLE &&
606		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
607		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
608		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
609			addlog(" with weight %u", weight);
610		addlog("\n");
611	}
612
613	return (0);
614}
615
616int
617pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
618    struct pf_src_node **sns, struct pf_rule **nr)
619{
620	struct pf_addr	naddr;
621	u_int16_t	nport = 0;
622
623#ifdef INET6
624	if (pd->af != pd->naf)
625		return (pf_get_transaddr_af(r, pd, sns));
626#endif /* INET6 */
627
628	if (r->nat.addr.type != PF_ADDR_NONE) {
629		/* XXX is this right? what if rtable is changed at the same
630		 * XXX time? where do I need to figure out the sport? */
631		if (pf_get_sport(pd, r, &naddr, &nport,
632		    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
633			DPFPRINTF(LOG_NOTICE,
634			    "pf: NAT proxy port allocation (%u-%u) failed",
635			    r->nat.proxy_port[0],
636			    r->nat.proxy_port[1]);
637			return (-1);
638		}
639		*nr = r;
640		PF_ACPY(&pd->nsaddr, &naddr, pd->af);
641		pd->nsport = nport;
642	}
643	if (r->rdr.addr.type != PF_ADDR_NONE) {
644		if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
645		    &r->rdr, PF_SN_RDR))
646			return (-1);
647		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
648			PF_POOLMASK(&naddr, &naddr,  &r->rdr.addr.v.a.mask,
649			    &pd->ndaddr, pd->af);
650
651			if (r->rdr.proxy_port[1]) {
652				u_int32_t	tmp_nport;
653
654				tmp_nport = ((ntohs(pd->ndport) -
655				    ntohs(r->dst.port[0])) %
656				    (r->rdr.proxy_port[1] -
657				    r->rdr.proxy_port[0] + 1)) +
658				    r->rdr.proxy_port[0];
659
660				/* wrap around if necessary */
661				if (tmp_nport > 65535)
662					tmp_nport -= 65535;
663				nport = htons((u_int16_t)tmp_nport);
664			} else if (r->rdr.proxy_port[0])
665				nport = htons(r->rdr.proxy_port[0]);
666		*nr = r;
667		PF_ACPY(&pd->ndaddr, &naddr, pd->af);
668		if (nport)
669			pd->ndport = nport;
670	}
671
672	return (0);
673}
674
675#ifdef INET6
676int
677pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
678    struct pf_src_node **sns)
679{
680	struct pf_addr	ndaddr, nsaddr, naddr;
681	u_int16_t	nport = 0;
682	int		prefixlen = 96;
683
684	if (pf_status.debug >= LOG_NOTICE) {
685		log(LOG_NOTICE, "pf: af-to %s %s, ",
686		    pd->naf == AF_INET ? "inet" : "inet6",
687		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
688		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
689		addlog(" -> ");
690		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
691		addlog("\n");
692	}
693
694	if (r->nat.addr.type == PF_ADDR_NONE)
695		panic("pf_get_transaddr_af: no nat pool for source address");
696
697	/* get source address and port */
698	if (pf_get_sport(pd, r, &nsaddr, &nport,
699	    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
700		DPFPRINTF(LOG_NOTICE,
701		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
702		    r->nat.proxy_port[0],
703		    r->nat.proxy_port[1]);
704		return (-1);
705	}
706	pd->nsport = nport;
707
708	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
709		if (pd->dir == PF_IN) {
710			NTOHS(pd->ndport);
711			if (pd->ndport == ICMP6_ECHO_REQUEST)
712				pd->ndport = ICMP_ECHO;
713			else if (pd->ndport == ICMP6_ECHO_REPLY)
714				pd->ndport = ICMP_ECHOREPLY;
715			HTONS(pd->ndport);
716		} else {
717			NTOHS(pd->nsport);
718			if (pd->nsport == ICMP6_ECHO_REQUEST)
719				pd->nsport = ICMP_ECHO;
720			else if (pd->nsport == ICMP6_ECHO_REPLY)
721				pd->nsport = ICMP_ECHOREPLY;
722			HTONS(pd->nsport);
723		}
724	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
725		if (pd->dir == PF_IN) {
726			NTOHS(pd->ndport);
727			if (pd->ndport == ICMP_ECHO)
728				pd->ndport = ICMP6_ECHO_REQUEST;
729			else if (pd->ndport == ICMP_ECHOREPLY)
730				pd->ndport = ICMP6_ECHO_REPLY;
731			HTONS(pd->ndport);
732		} else {
733			NTOHS(pd->nsport);
734			if (pd->nsport == ICMP_ECHO)
735				pd->nsport = ICMP6_ECHO_REQUEST;
736			else if (pd->nsport == ICMP_ECHOREPLY)
737				pd->nsport = ICMP6_ECHO_REPLY;
738			HTONS(pd->nsport);
739		}
740	}
741
742	/* get the destination address and port */
743	if (r->rdr.addr.type != PF_ADDR_NONE) {
744		if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
745		    &r->rdr, PF_SN_RDR))
746			return (-1);
747		if (r->rdr.proxy_port[0])
748			pd->ndport = htons(r->rdr.proxy_port[0]);
749
750		if (pd->naf == AF_INET) {
751			/* The prefix is the IPv4 rdr address */
752			prefixlen = in_mask2len((struct in_addr *)
753			    &r->rdr.addr.v.a.mask);
754			inet_nat46(pd->naf, &pd->ndaddr,
755			    &ndaddr, &naddr, prefixlen);
756		} else {
757			/* The prefix is the IPv6 rdr address */
758			prefixlen =
759			    in6_mask2len((struct in6_addr *)
760			    &r->rdr.addr.v.a.mask, NULL);
761			inet_nat64(pd->naf, &pd->ndaddr,
762			    &ndaddr, &naddr, prefixlen);
763		}
764	} else {
765		if (pd->naf == AF_INET) {
766			/* The prefix is the IPv6 dst address */
767			prefixlen =
768			    in6_mask2len((struct in6_addr *)
769			    &r->dst.addr.v.a.mask, NULL);
770			if (prefixlen < 32)
771				prefixlen = 96;
772			inet_nat64(pd->naf, &pd->ndaddr,
773			    &ndaddr, &pd->ndaddr, prefixlen);
774		} else {
775			/*
776			 * The prefix is the IPv6 nat address
777			 * (that was stored in pd->nsaddr)
778			 */
779			prefixlen = in6_mask2len((struct in6_addr *)
780			    &r->nat.addr.v.a.mask, NULL);
781			if (prefixlen > 96)
782				prefixlen = 96;
783			inet_nat64(pd->naf, &pd->ndaddr,
784			    &ndaddr, &nsaddr, prefixlen);
785		}
786	}
787
788	PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf);
789	PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf);
790
791	if (pf_status.debug >= LOG_NOTICE) {
792		log(LOG_NOTICE, "pf: af-to %s %s done, prefixlen %d, ",
793		    pd->naf == AF_INET ? "inet" : "inet6",
794		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
795		    prefixlen);
796		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
797		addlog(" -> ");
798		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
799		addlog("\n");
800	}
801
802	return (0);
803}
804#endif /* INET6 */
805
806int
807pf_postprocess_addr(struct pf_state *cur) {
808	struct pf_rule *nr;
809
810	nr = cur->natrule.ptr;
811
812	/* decrease counter */
813	if (nr != NULL) {
814		int			 slbcount;
815		struct pf_pool		 rpool;
816		struct pf_addr		 lookup_addr;
817		struct pf_state_key	*sks;
818
819		sks = cur ? cur->key[PF_SK_STACK] : NULL;
820
821		/* check for outgoing or ingoing balancing */
822		if (nr->rt == PF_ROUTETO)
823			lookup_addr = cur->rt_addr;
824		else if (sks != NULL)
825			lookup_addr = sks->addr[1];
826		else {
827			if (pf_status.debug >= LOG_DEBUG) {
828				log(LOG_DEBUG, "pf: pf_unlink_state: "
829				    "unable to optain address");
830			}
831			return (1);
832		}
833
834		/* check for appropriate pool */
835		if (nr->rdr.addr.type != PF_ADDR_NONE)
836			rpool = nr->rdr;
837		else if (nr->nat.addr.type != PF_ADDR_NONE)
838			rpool = nr->nat;
839		else if (nr->route.addr.type != PF_ADDR_NONE)
840			rpool = nr->route;
841
842		if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
843			return (0);
844
845		if (rpool.addr.type == PF_ADDR_TABLE) {
846			if ((slbcount = pfr_states_decrease(
847			    rpool.addr.p.tbl,
848			    &lookup_addr, sks->af)) == -1) {
849				if (pf_status.debug >= LOG_DEBUG) {
850					log(LOG_DEBUG, "pf: pf_unlink_state: "
851					    "selected address ");
852					pf_print_host(&lookup_addr,
853					    sks->port[0], sks->af);
854					addlog(". Failed to "
855					    "decrease count!\n");
856				}
857				return (1);
858			}
859		} else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
860			if ((slbcount = pfr_states_decrease(
861			    rpool.addr.p.dyn->pfid_kt,
862			    &lookup_addr, sks->af)) == -1) {
863				if (pf_status.debug >= LOG_DEBUG) {
864					log(LOG_DEBUG,
865					    "pf: pf_unlink_state: "
866					    "selected address ");
867					pf_print_host(&lookup_addr,
868					    sks->port[0], sks->af);
869					addlog(". Failed to "
870					    "decrease count!\n");
871				}
872				return (1);
873			}
874		}
875		if (slbcount > -1) {
876			if (pf_status.debug >= LOG_NOTICE) {
877				log(LOG_NOTICE,
878				    "pf: pf_unlink_state: selected address ");
879				pf_print_host(&lookup_addr, sks->port[0],
880				    sks->af);
881				addlog(" decreased state count to %u\n",
882				    slbcount);
883			}
884		}
885	}
886
887	return (0);
888}
889