pf_lb.c revision 1.59
1/*	$OpenBSD: pf_lb.c,v 1.59 2017/02/08 01:20:40 jsg Exp $ */
2
3/*
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 *    - Redistributions of source code must retain the above copyright
13 *      notice, this list of conditions and the following disclaimer.
14 *    - Redistributions in binary form must reproduce the above
15 *      copyright notice, this list of conditions and the following
16 *      disclaimer in the documentation and/or other materials provided
17 *      with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38#include "bpfilter.h"
39#include "pflog.h"
40#include "pfsync.h"
41#include "pflow.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/mbuf.h>
46#include <sys/filio.h>
47#include <sys/socket.h>
48#include <sys/socketvar.h>
49#include <sys/kernel.h>
50#include <sys/time.h>
51#include <sys/pool.h>
52#include <sys/rwlock.h>
53#include <sys/syslog.h>
54#include <sys/stdint.h>
55
56#include <crypto/siphash.h>
57
58#include <net/if.h>
59#include <net/bpf.h>
60#include <net/route.h>
61
62#include <netinet/in.h>
63#include <netinet/ip.h>
64#include <netinet/in_pcb.h>
65#include <netinet/ip_var.h>
66#include <netinet/ip_icmp.h>
67#include <netinet/icmp_var.h>
68#include <netinet/tcp.h>
69#include <netinet/tcp_seq.h>
70#include <netinet/tcp_timer.h>
71#include <netinet/udp.h>
72#include <netinet/udp_var.h>
73#include <netinet/if_ether.h>
74
75#ifdef INET6
76#include <netinet/ip6.h>
77#include <netinet/icmp6.h>
78#endif /* INET6 */
79
80#include <net/pfvar.h>
81#include <net/pfvar_priv.h>
82
83#if NPFLOG > 0
84#include <net/if_pflog.h>
85#endif	/* NPFLOG > 0 */
86
87#if NPFLOW > 0
88#include <net/if_pflow.h>
89#endif	/* NPFLOW > 0 */
90
91#if NPFSYNC > 0
92#include <net/if_pfsync.h>
93#endif /* NPFSYNC > 0 */
94
95u_int64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
96			    struct pf_poolhashkey *, sa_family_t);
97int			 pf_get_sport(struct pf_pdesc *, struct pf_rule *,
98			    struct pf_addr *, u_int16_t *, u_int16_t,
99			    u_int16_t, struct pf_src_node **);
100int			 pf_get_transaddr_af(struct pf_rule *,
101			    struct pf_pdesc *, struct pf_src_node **);
102int			 pf_map_addr_sticky(sa_family_t, struct pf_rule *,
103			    struct pf_addr *, struct pf_addr *,
104			    struct pf_src_node **, struct pf_pool *,
105			    enum pf_sn_types);
106
107u_int64_t
108pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
109    struct pf_poolhashkey *key, sa_family_t af)
110{
111	uint64_t res = 0;
112#ifdef INET6
113	union {
114		uint64_t hash64;
115		uint32_t hash32[2];
116	} h;
117#endif	/* INET6 */
118
119	switch (af) {
120	case AF_INET:
121		res = SipHash24((SIPHASH_KEY *)key,
122		    &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
123		hash->addr32[0] = res;
124		break;
125#ifdef INET6
126	case AF_INET6:
127		res = SipHash24((SIPHASH_KEY *)key, &inaddr->addr32[0],
128		    4 * sizeof(inaddr->addr32[0]));
129		h.hash64 = res;
130		hash->addr32[0] = h.hash32[0];
131		hash->addr32[1] = h.hash32[1];
132		/*
133		 * siphash isn't big enough, but flipping it around is
134		 * good enough here.
135		 */
136		hash->addr32[2] = ~h.hash32[1];
137		hash->addr32[3] = ~h.hash32[0];
138		break;
139#endif /* INET6 */
140	default:
141		unhandled_af(af);
142	}
143	return (res);
144}
145
146int
147pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
148    struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
149    struct pf_src_node **sn)
150{
151	struct pf_state_key_cmp	key;
152	struct pf_addr		init_addr;
153	u_int16_t		cut;
154	int			dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN;
155	int			sidx = pd->sidx;
156	int			didx = pd->didx;
157
158	bzero(&init_addr, sizeof(init_addr));
159	if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
160	    PF_SN_NAT))
161		return (1);
162
163	if (pd->proto == IPPROTO_ICMP) {
164		if (pd->ndport == htons(ICMP_ECHO)) {
165			low = 1;
166			high = 65535;
167		} else
168			return (0);	/* Don't try to modify non-echo ICMP */
169	}
170#ifdef INET6
171	if (pd->proto == IPPROTO_ICMPV6) {
172		if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) {
173			low = 1;
174			high = 65535;
175		} else
176			return (0);	/* Don't try to modify non-echo ICMP */
177	}
178#endif /* INET6 */
179
180	do {
181		key.af = pd->naf;
182		key.proto = pd->proto;
183		key.rdomain = pd->rdomain;
184		PF_ACPY(&key.addr[didx], &pd->ndaddr, key.af);
185		PF_ACPY(&key.addr[sidx], naddr, key.af);
186		key.port[didx] = pd->ndport;
187
188		/*
189		 * port search; start random, step;
190		 * similar 2 portloop in in_pcbbind
191		 */
192		if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
193		    pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) {
194			/* XXX bug: icmp states dont use the id on both
195			 * XXX sides (traceroute -I through nat) */
196			key.port[sidx] = pd->nsport;
197			if (pf_find_state_all(&key, dir, NULL) == NULL) {
198				*nport = pd->nsport;
199				return (0);
200			}
201		} else if (low == 0 && high == 0) {
202			key.port[sidx] = pd->nsport;
203			if (pf_find_state_all(&key, dir, NULL) == NULL) {
204				*nport = pd->nsport;
205				return (0);
206			}
207		} else if (low == high) {
208			key.port[sidx] = htons(low);
209			if (pf_find_state_all(&key, dir, NULL) == NULL) {
210				*nport = htons(low);
211				return (0);
212			}
213		} else {
214			u_int16_t tmp;
215
216			if (low > high) {
217				tmp = low;
218				low = high;
219				high = tmp;
220			}
221			/* low < high */
222			cut = arc4random_uniform(1 + high - low) + low;
223			/* low <= cut <= high */
224			for (tmp = cut; tmp <= high; ++(tmp)) {
225				key.port[sidx] = htons(tmp);
226				if (pf_find_state_all(&key, dir, NULL) ==
227				    NULL && !in_baddynamic(tmp, pd->proto)) {
228					*nport = htons(tmp);
229					return (0);
230				}
231			}
232			for (tmp = cut - 1; tmp >= low; --(tmp)) {
233				key.port[sidx] = htons(tmp);
234				if (pf_find_state_all(&key, dir, NULL) ==
235				    NULL && !in_baddynamic(tmp, pd->proto)) {
236					*nport = htons(tmp);
237					return (0);
238				}
239			}
240		}
241
242		switch (r->nat.opts & PF_POOL_TYPEMASK) {
243		case PF_POOL_RANDOM:
244		case PF_POOL_ROUNDROBIN:
245		case PF_POOL_LEASTSTATES:
246			/*
247			 * pick a different source address since we're out
248			 * of free port choices for the current one.
249			 */
250			if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
251			    &init_addr, sn, &r->nat, PF_SN_NAT))
252				return (1);
253			break;
254		case PF_POOL_NONE:
255		case PF_POOL_SRCHASH:
256		case PF_POOL_BITMASK:
257		default:
258			return (1);
259		}
260	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
261	return (1);					/* none available */
262}
263
264int
265pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
266    struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool,
267    enum pf_sn_types type)
268{
269	struct pf_addr		*raddr, *rmask, *cached;
270	struct pf_state		*s;
271	struct pf_src_node	 k;
272	int			 valid;
273
274	k.af = af;
275	k.type = type;
276	PF_ACPY(&k.addr, saddr, af);
277	k.rule.ptr = r;
278	pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
279	sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
280	if (sns[type] == NULL)
281		return (-1);
282
283	/* check if the cached entry is still valid */
284	cached = &(sns[type])->raddr;
285	valid = 0;
286	if (PF_AZERO(cached, af)) {
287		valid = 1;
288	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
289		if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached,
290		    af, 0))
291			valid = 1;
292	} else if (rpool->addr.type == PF_ADDR_TABLE) {
293		if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0))
294			valid = 1;
295	} else if (rpool->addr.type != PF_ADDR_NOROUTE) {
296		raddr = &rpool->addr.v.a.addr;
297		rmask = &rpool->addr.v.a.mask;
298		valid = pf_match_addr(0, raddr, rmask, cached, af);
299	}
300	if (!valid) {
301		if (pf_status.debug >= LOG_DEBUG) {
302			log(LOG_DEBUG, "pf: pf_map_addr: "
303			    "stale src tracking (%u) ", type);
304			pf_print_host(&k.addr, 0, af);
305			addlog(" to ");
306			pf_print_host(cached, 0, af);
307			addlog("\n");
308		}
309		if (sns[type]->states != 0) {
310			/* XXX expensive */
311			RB_FOREACH(s, pf_state_tree_id,
312			   &tree_id)
313				pf_state_rm_src_node(s,
314				    sns[type]);
315		}
316		sns[type]->expire = 1;
317		pf_remove_src_node(sns[type]);
318		sns[type] = NULL;
319		return (-1);
320	}
321	if (!PF_AZERO(cached, af))
322		PF_ACPY(naddr, cached, af);
323	if (pf_status.debug >= LOG_DEBUG) {
324		log(LOG_DEBUG, "pf: pf_map_addr: "
325		    "src tracking (%u) maps ", type);
326		pf_print_host(&k.addr, 0, af);
327		addlog(" to ");
328		pf_print_host(naddr, 0, af);
329		addlog("\n");
330	}
331	return (0);
332}
333
334int
335pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
336    struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
337    struct pf_pool *rpool, enum pf_sn_types type)
338{
339	unsigned char		 hash[16];
340	struct pf_addr		 faddr;
341	struct pf_addr		*raddr = &rpool->addr.v.a.addr;
342	struct pf_addr		*rmask = &rpool->addr.v.a.mask;
343	u_int64_t		 states;
344	u_int16_t		 weight;
345	u_int64_t		 load;
346	u_int64_t		 cload;
347	u_int64_t		 hashidx;
348	int			 cnt;
349
350	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
351	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
352	    pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0)
353		return (0);
354
355	if (rpool->addr.type == PF_ADDR_NOROUTE)
356		return (1);
357	if (rpool->addr.type == PF_ADDR_DYNIFTL) {
358		switch (af) {
359		case AF_INET:
360			if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
361			    !PF_POOL_DYNTYPE(rpool->opts))
362				return (1);
363			raddr = &rpool->addr.p.dyn->pfid_addr4;
364			rmask = &rpool->addr.p.dyn->pfid_mask4;
365			break;
366#ifdef INET6
367		case AF_INET6:
368			if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
369			    !PF_POOL_DYNTYPE(rpool->opts))
370				return (1);
371			raddr = &rpool->addr.p.dyn->pfid_addr6;
372			rmask = &rpool->addr.p.dyn->pfid_mask6;
373			break;
374#endif /* INET6 */
375		default:
376			unhandled_af(af);
377		}
378	} else if (rpool->addr.type == PF_ADDR_TABLE) {
379		if (!PF_POOL_DYNTYPE(rpool->opts))
380			return (1); /* unsupported */
381	} else {
382		raddr = &rpool->addr.v.a.addr;
383		rmask = &rpool->addr.v.a.mask;
384	}
385
386	switch (rpool->opts & PF_POOL_TYPEMASK) {
387	case PF_POOL_NONE:
388		PF_ACPY(naddr, raddr, af);
389		break;
390	case PF_POOL_BITMASK:
391		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
392		break;
393	case PF_POOL_RANDOM:
394		if (rpool->addr.type == PF_ADDR_TABLE) {
395			cnt = rpool->addr.p.tbl->pfrkt_cnt;
396			if (cnt == 0)
397				rpool->tblidx = 0;
398			else
399				rpool->tblidx = (int)arc4random_uniform(cnt);
400			memset(&rpool->counter, 0, sizeof(rpool->counter));
401			if (pfr_pool_get(rpool, &raddr, &rmask, af))
402				return (1);
403			PF_ACPY(naddr, &rpool->counter, af);
404		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
405			cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
406			if (cnt == 0)
407				rpool->tblidx = 0;
408			else
409				rpool->tblidx = (int)arc4random_uniform(cnt);
410			memset(&rpool->counter, 0, sizeof(rpool->counter));
411			if (pfr_pool_get(rpool, &raddr, &rmask, af))
412				return (1);
413			PF_ACPY(naddr, &rpool->counter, af);
414		} else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
415			switch (af) {
416			case AF_INET:
417				rpool->counter.addr32[0] = arc4random();
418				break;
419#ifdef INET6
420			case AF_INET6:
421				if (rmask->addr32[3] != 0xffffffff)
422					rpool->counter.addr32[3] = arc4random();
423				else
424					break;
425				if (rmask->addr32[2] != 0xffffffff)
426					rpool->counter.addr32[2] = arc4random();
427				else
428					break;
429				if (rmask->addr32[1] != 0xffffffff)
430					rpool->counter.addr32[1] = arc4random();
431				else
432					break;
433				if (rmask->addr32[0] != 0xffffffff)
434					rpool->counter.addr32[0] = arc4random();
435				break;
436#endif /* INET6 */
437			default:
438				unhandled_af(af);
439			}
440			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
441			PF_ACPY(init_addr, naddr, af);
442
443		} else {
444			PF_AINC(&rpool->counter, af);
445			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
446		}
447		break;
448	case PF_POOL_SRCHASH:
449		hashidx =
450		    pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
451		if (rpool->addr.type == PF_ADDR_TABLE) {
452			cnt = rpool->addr.p.tbl->pfrkt_cnt;
453			if (cnt == 0)
454				rpool->tblidx = 0;
455			else
456				rpool->tblidx = (int)(hashidx % cnt);
457			memset(&rpool->counter, 0, sizeof(rpool->counter));
458			if (pfr_pool_get(rpool, &raddr, &rmask, af))
459				return (1);
460			PF_ACPY(naddr, &rpool->counter, af);
461		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
462			cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
463			if (cnt == 0)
464				rpool->tblidx = 0;
465			else
466				rpool->tblidx = (int)(hashidx % cnt);
467			memset(&rpool->counter, 0, sizeof(rpool->counter));
468			if (pfr_pool_get(rpool, &raddr, &rmask, af))
469				return (1);
470			PF_ACPY(naddr, &rpool->counter, af);
471		} else {
472			PF_POOLMASK(naddr, raddr, rmask,
473			    (struct pf_addr *)&hash, af);
474		}
475		break;
476	case PF_POOL_ROUNDROBIN:
477		if (rpool->addr.type == PF_ADDR_TABLE ||
478		    rpool->addr.type == PF_ADDR_DYNIFTL) {
479			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
480				/*
481				 * reset counter in case its value
482				 * has been removed from the pool.
483				 */
484				bzero(&rpool->counter, sizeof(rpool->counter));
485				if (pfr_pool_get(rpool, &raddr, &rmask, af))
486					return (1);
487			}
488		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
489			return (1);
490
491		/* iterate over table if it contains entries which are weighted */
492		if ((rpool->addr.type == PF_ADDR_TABLE &&
493		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
494		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
495		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
496			do {
497				if (rpool->addr.type == PF_ADDR_TABLE ||
498				    rpool->addr.type == PF_ADDR_DYNIFTL) {
499					if (pfr_pool_get(rpool,
500					    &raddr, &rmask, af))
501						return (1);
502				} else {
503					log(LOG_ERR, "pf: pf_map_addr: "
504					    "weighted RR failure");
505					return (1);
506				}
507				if (rpool->weight >= rpool->curweight)
508					break;
509				PF_AINC(&rpool->counter, af);
510			} while (1);
511
512			weight = rpool->weight;
513		}
514
515		PF_ACPY(naddr, &rpool->counter, af);
516		if (init_addr != NULL && PF_AZERO(init_addr, af))
517			PF_ACPY(init_addr, naddr, af);
518		PF_AINC(&rpool->counter, af);
519		break;
520	case PF_POOL_LEASTSTATES:
521		/* retrieve an address first */
522		if (rpool->addr.type == PF_ADDR_TABLE ||
523		    rpool->addr.type == PF_ADDR_DYNIFTL) {
524			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
525				/* see PF_POOL_ROUNDROBIN */
526				bzero(&rpool->counter, sizeof(rpool->counter));
527				if (pfr_pool_get(rpool, &raddr, &rmask, af))
528					return (1);
529			}
530		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
531			return (1);
532
533		states = rpool->states;
534		weight = rpool->weight;
535
536		if ((rpool->addr.type == PF_ADDR_TABLE &&
537		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
538		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
539		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
540			load = ((UINT16_MAX * rpool->states) / rpool->weight);
541		else
542			load = states;
543
544		PF_ACPY(&faddr, &rpool->counter, af);
545
546		PF_ACPY(naddr, &rpool->counter, af);
547		if (init_addr != NULL && PF_AZERO(init_addr, af))
548			PF_ACPY(init_addr, naddr, af);
549
550		/*
551		 * iterate *once* over whole table and find destination with
552		 * least connection
553		 */
554		do  {
555			PF_AINC(&rpool->counter, af);
556			if (rpool->addr.type == PF_ADDR_TABLE ||
557			    rpool->addr.type == PF_ADDR_DYNIFTL) {
558				if (pfr_pool_get(rpool, &raddr, &rmask, af))
559					return (1);
560			} else if (pf_match_addr(0, raddr, rmask,
561			    &rpool->counter, af))
562				return (1);
563
564			if ((rpool->addr.type == PF_ADDR_TABLE &&
565			    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
566			    (rpool->addr.type == PF_ADDR_DYNIFTL &&
567			    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
568				cload = ((UINT16_MAX * rpool->states)
569					/ rpool->weight);
570			else
571				cload = rpool->states;
572
573			/* find lc minimum */
574			if (cload < load) {
575				states = rpool->states;
576				weight = rpool->weight;
577				load = cload;
578
579				PF_ACPY(naddr, &rpool->counter, af);
580				if (init_addr != NULL &&
581				    PF_AZERO(init_addr, af))
582				    PF_ACPY(init_addr, naddr, af);
583			}
584		} while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
585		    (states > 0));
586
587		if (rpool->addr.type == PF_ADDR_TABLE) {
588			if (pfr_states_increase(rpool->addr.p.tbl,
589			    naddr, af) == -1) {
590				if (pf_status.debug >= LOG_DEBUG) {
591					log(LOG_DEBUG,"pf: pf_map_addr: "
592					    "selected address ");
593					pf_print_host(naddr, 0, af);
594					addlog(". Failed to increase count!\n");
595				}
596				return (1);
597			}
598		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
599			if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
600			    naddr, af) == -1) {
601				if (pf_status.debug >= LOG_DEBUG) {
602					log(LOG_DEBUG, "pf: pf_map_addr: "
603					    "selected address ");
604					pf_print_host(naddr, 0, af);
605					addlog(". Failed to increase count!\n");
606				}
607				return (1);
608			}
609		}
610		break;
611	}
612
613	if (rpool->opts & PF_POOL_STICKYADDR) {
614		if (sns[type] != NULL) {
615			pf_remove_src_node(sns[type]);
616			sns[type] = NULL;
617		}
618		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr))
619			return (1);
620	}
621
622	if (pf_status.debug >= LOG_NOTICE &&
623	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
624		log(LOG_NOTICE, "pf: pf_map_addr: selected address ");
625		pf_print_host(naddr, 0, af);
626		if ((rpool->opts & PF_POOL_TYPEMASK) ==
627		    PF_POOL_LEASTSTATES)
628			addlog(" with state count %llu", states);
629		if ((rpool->addr.type == PF_ADDR_TABLE &&
630		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
631		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
632		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
633			addlog(" with weight %u", weight);
634		addlog("\n");
635	}
636
637	return (0);
638}
639
640int
641pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
642    struct pf_src_node **sns, struct pf_rule **nr)
643{
644	struct pf_addr	naddr;
645	u_int16_t	nport;
646
647#ifdef INET6
648	if (pd->af != pd->naf)
649		return (pf_get_transaddr_af(r, pd, sns));
650#endif /* INET6 */
651
652	if (r->nat.addr.type != PF_ADDR_NONE) {
653		/* XXX is this right? what if rtable is changed at the same
654		 * XXX time? where do I need to figure out the sport? */
655		nport = 0;
656		if (pf_get_sport(pd, r, &naddr, &nport,
657		    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
658			DPFPRINTF(LOG_NOTICE,
659			    "pf: NAT proxy port allocation (%u-%u) failed",
660			    r->nat.proxy_port[0],
661			    r->nat.proxy_port[1]);
662			return (-1);
663		}
664		*nr = r;
665		PF_ACPY(&pd->nsaddr, &naddr, pd->af);
666		pd->nsport = nport;
667	}
668	if (r->rdr.addr.type != PF_ADDR_NONE) {
669		if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
670		    &r->rdr, PF_SN_RDR))
671			return (-1);
672		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
673			PF_POOLMASK(&naddr, &naddr,  &r->rdr.addr.v.a.mask,
674			    &pd->ndaddr, pd->af);
675
676		nport = 0;
677		if (r->rdr.proxy_port[1]) {
678			u_int32_t	tmp_nport;
679
680			tmp_nport = ((ntohs(pd->ndport) -
681			    ntohs(r->dst.port[0])) %
682			    (r->rdr.proxy_port[1] -
683			    r->rdr.proxy_port[0] + 1)) +
684			    r->rdr.proxy_port[0];
685
686			/* wrap around if necessary */
687			if (tmp_nport > 65535)
688				tmp_nport -= 65535;
689			nport = htons((u_int16_t)tmp_nport);
690		} else if (r->rdr.proxy_port[0])
691			nport = htons(r->rdr.proxy_port[0]);
692		*nr = r;
693		PF_ACPY(&pd->ndaddr, &naddr, pd->af);
694		if (nport)
695			pd->ndport = nport;
696	}
697
698	return (0);
699}
700
701#ifdef INET6
702int
703pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
704    struct pf_src_node **sns)
705{
706	struct pf_addr	ndaddr, nsaddr, naddr;
707	u_int16_t	nport;
708	int		prefixlen = 96;
709
710	if (pf_status.debug >= LOG_NOTICE) {
711		log(LOG_NOTICE, "pf: af-to %s %s, ",
712		    pd->naf == AF_INET ? "inet" : "inet6",
713		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
714		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
715		addlog(" -> ");
716		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
717		addlog("\n");
718	}
719
720	if (r->nat.addr.type == PF_ADDR_NONE)
721		panic("pf_get_transaddr_af: no nat pool for source address");
722
723	/* get source address and port */
724	nport = 0;
725	if (pf_get_sport(pd, r, &nsaddr, &nport,
726	    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
727		DPFPRINTF(LOG_NOTICE,
728		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
729		    r->nat.proxy_port[0],
730		    r->nat.proxy_port[1]);
731		return (-1);
732	}
733	pd->nsport = nport;
734
735	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
736		if (pd->dir == PF_IN) {
737			pd->ndport = ntohs(pd->ndport);
738			if (pd->ndport == ICMP6_ECHO_REQUEST)
739				pd->ndport = ICMP_ECHO;
740			else if (pd->ndport == ICMP6_ECHO_REPLY)
741				pd->ndport = ICMP_ECHOREPLY;
742			pd->ndport = htons(pd->ndport);
743		} else {
744			pd->nsport = ntohs(pd->nsport);
745			if (pd->nsport == ICMP6_ECHO_REQUEST)
746				pd->nsport = ICMP_ECHO;
747			else if (pd->nsport == ICMP6_ECHO_REPLY)
748				pd->nsport = ICMP_ECHOREPLY;
749			pd->nsport = htons(pd->nsport);
750		}
751	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
752		if (pd->dir == PF_IN) {
753			pd->ndport = ntohs(pd->ndport);
754			if (pd->ndport == ICMP_ECHO)
755				pd->ndport = ICMP6_ECHO_REQUEST;
756			else if (pd->ndport == ICMP_ECHOREPLY)
757				pd->ndport = ICMP6_ECHO_REPLY;
758			pd->ndport = htons(pd->ndport);
759		} else {
760			pd->nsport = ntohs(pd->nsport);
761			if (pd->nsport == ICMP_ECHO)
762				pd->nsport = ICMP6_ECHO_REQUEST;
763			else if (pd->nsport == ICMP_ECHOREPLY)
764				pd->nsport = ICMP6_ECHO_REPLY;
765			pd->nsport = htons(pd->nsport);
766		}
767	}
768
769	/* get the destination address and port */
770	if (r->rdr.addr.type != PF_ADDR_NONE) {
771		if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
772		    &r->rdr, PF_SN_RDR))
773			return (-1);
774		if (r->rdr.proxy_port[0])
775			pd->ndport = htons(r->rdr.proxy_port[0]);
776
777		if (pd->naf == AF_INET) {
778			/* The prefix is the IPv4 rdr address */
779			prefixlen = in_mask2len((struct in_addr *)
780			    &r->rdr.addr.v.a.mask);
781			inet_nat46(pd->naf, &pd->ndaddr,
782			    &ndaddr, &naddr, prefixlen);
783		} else {
784			/* The prefix is the IPv6 rdr address */
785			prefixlen =
786			    in6_mask2len((struct in6_addr *)
787			    &r->rdr.addr.v.a.mask, NULL);
788			inet_nat64(pd->naf, &pd->ndaddr,
789			    &ndaddr, &naddr, prefixlen);
790		}
791	} else {
792		if (pd->naf == AF_INET) {
793			/* The prefix is the IPv6 dst address */
794			prefixlen =
795			    in6_mask2len((struct in6_addr *)
796			    &r->dst.addr.v.a.mask, NULL);
797			if (prefixlen < 32)
798				prefixlen = 96;
799			inet_nat64(pd->naf, &pd->ndaddr,
800			    &ndaddr, &pd->ndaddr, prefixlen);
801		} else {
802			/*
803			 * The prefix is the IPv6 nat address
804			 * (that was stored in pd->nsaddr)
805			 */
806			prefixlen = in6_mask2len((struct in6_addr *)
807			    &r->nat.addr.v.a.mask, NULL);
808			if (prefixlen > 96)
809				prefixlen = 96;
810			inet_nat64(pd->naf, &pd->ndaddr,
811			    &ndaddr, &nsaddr, prefixlen);
812		}
813	}
814
815	PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf);
816	PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf);
817
818	if (pf_status.debug >= LOG_NOTICE) {
819		log(LOG_NOTICE, "pf: af-to %s %s done, prefixlen %d, ",
820		    pd->naf == AF_INET ? "inet" : "inet6",
821		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
822		    prefixlen);
823		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
824		addlog(" -> ");
825		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
826		addlog("\n");
827	}
828
829	return (0);
830}
831#endif /* INET6 */
832
833int
834pf_postprocess_addr(struct pf_state *cur)
835{
836	struct pf_rule		*nr;
837	struct pf_state_key	*sks;
838	struct pf_pool		 rpool;
839	struct pf_addr		 lookup_addr;
840	int			 slbcount = -1;
841
842	nr = cur->natrule.ptr;
843
844	if (nr == NULL)
845		return (0);
846
847	/* decrease counter */
848
849	sks = cur->key[PF_SK_STACK];
850
851	/* check for outgoing or ingoing balancing */
852	if (nr->rt == PF_ROUTETO)
853		lookup_addr = cur->rt_addr;
854	else if (sks != NULL)
855		lookup_addr = sks->addr[1];
856	else {
857		if (pf_status.debug >= LOG_DEBUG) {
858			log(LOG_DEBUG, "pf: %s: unable to obtain address",
859			    __func__);
860		}
861		return (1);
862	}
863
864	/* check for appropriate pool */
865	if (nr->rdr.addr.type != PF_ADDR_NONE)
866		rpool = nr->rdr;
867	else if (nr->nat.addr.type != PF_ADDR_NONE)
868		rpool = nr->nat;
869	else if (nr->route.addr.type != PF_ADDR_NONE)
870		rpool = nr->route;
871	else
872		return (0);
873
874	if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
875		return (0);
876
877	if (rpool.addr.type == PF_ADDR_TABLE) {
878		if ((slbcount = pfr_states_decrease(
879		    rpool.addr.p.tbl,
880		    &lookup_addr, sks->af)) == -1) {
881			if (pf_status.debug >= LOG_DEBUG) {
882				log(LOG_DEBUG, "pf: %s: selected address ",
883				    __func__);
884				pf_print_host(&lookup_addr,
885				    sks->port[0], sks->af);
886				addlog(". Failed to "
887				    "decrease count!\n");
888			}
889			return (1);
890		}
891	} else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
892		if ((slbcount = pfr_states_decrease(
893		    rpool.addr.p.dyn->pfid_kt,
894		    &lookup_addr, sks->af)) == -1) {
895			if (pf_status.debug >= LOG_DEBUG) {
896				log(LOG_DEBUG, "pf: %s: selected address ",
897				    __func__);
898				pf_print_host(&lookup_addr,
899				    sks->port[0], sks->af);
900				addlog(". Failed to "
901				    "decrease count!\n");
902			}
903			return (1);
904		}
905	}
906	if (slbcount > -1) {
907		if (pf_status.debug >= LOG_NOTICE) {
908			log(LOG_NOTICE, "pf: %s: selected address ", __func__);
909			pf_print_host(&lookup_addr, sks->port[0],
910			    sks->af);
911			addlog(" decreased state count to %u\n",
912			    slbcount);
913		}
914	}
915	return (0);
916}
917